diff --git a/.bazelrc b/.bazelrc
index 30c138e07a4..9ac5a1bbf40 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -238,6 +238,10 @@ build:linux --copt=-w
 build:macos --copt=-w
 build:windows --copt=/w
 
+# Tensorflow uses M_* math constants that only get defined by MSVC headers if
+# _USE_MATH_DEFINES is defined.
+build:windows --copt=/D_USE_MATH_DEFINES
+
 # Default paths for TF_SYSTEM_LIBS
 build:linux --define=PREFIX=/usr
 build:linux --define=LIBDIR=$(PREFIX)/lib
@@ -258,9 +262,8 @@ build:windows --host_cxxopt=/std:c++14
 # On windows, we still link everything into a single DLL.
 build:windows --config=monolithic
 
-# On linux and macos, we dynamically link small amount of kernels
+# On linux, we dynamically link small amount of kernels
 build:linux --config=dynamic_kernels
-build:macos --config=dynamic_kernels
 
 # Make sure to include as little of windows.h as possible
 build:windows --copt=-DWIN32_LEAN_AND_MEAN
@@ -378,9 +381,9 @@ build:rbe_linux_py3 --python_path="/usr/bin/python3"
 build:rbe_linux_py3 --repo_env=TF_PYTHON_CONFIG_REPO="@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/py3"
 
 build:rbe_win --config=rbe
-build:rbe_win --crosstool_top="@org_tensorflow//third_party/toolchains/preconfig/win_1803/bazel_026:toolchain"
+build:rbe_win --crosstool_top="@org_tensorflow//third_party/toolchains/preconfig/win_1803/bazel_121:toolchain"
 build:rbe_win --extra_execution_platforms="@org_tensorflow//third_party/toolchains/preconfig/win_1803:rbe_windows_1803"
-build:rbe_win --extra_toolchains="@org_tensorflow//third_party/toolchains/preconfig/win_1803/bazel_026:cc-toolchain-x64_windows"
+build:rbe_win --extra_toolchains="@org_tensorflow//third_party/toolchains/preconfig/win_1803/bazel_121:cc-toolchain-x64_windows"
 build:rbe_win --host_javabase="@org_tensorflow//third_party/toolchains/preconfig/win_1803:windows_jdk8"
 build:rbe_win --host_platform="@org_tensorflow//third_party/toolchains/preconfig/win_1803:rbe_windows_1803"
 build:rbe_win --javabase="@org_tensorflow//third_party/toolchains/preconfig/win_1803:windows_jdk8"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 756b7f06eb3..b4dc0e73975 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -72,7 +72,7 @@ TensorFlow coding style.
     [tensorflow/core](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core)
     and
     [tensorflow/python](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python).
-    TensorFlow has reached version 1 and hence cannot make
+    TensorFlow has passed version 1.0 and hence cannot make
     non-backward-compatible API changes without a major release. Reviewers of
     your pull request will comment on any API compatibility issues.
 *   When you contribute a new feature to TensorFlow, the maintenance burden is
diff --git a/README.md b/README.md
index 58775b1d6d9..56baa0740c3 100644
--- a/README.md
+++ b/README.md
@@ -37,18 +37,18 @@ See the [TensorFlow install guide](https://www.tensorflow.org/install) for the
 [Docker container](https://www.tensorflow.org/install/docker), and
 [build from source](https://www.tensorflow.org/install/source).
 
-To install the current release for CPU-only:
+To install the current release, which includes support for
+[CUDA-enabled GPU cards](https://www.tensorflow.org/install/gpu) *(Ubuntu and
+Windows)*:
 
 ```
 $ pip install tensorflow
 ```
 
-Use the GPU package for
-[CUDA-enabled GPU cards](https://www.tensorflow.org/install/gpu) *(Ubuntu and
-Windows)*:
+A smaller CPU-only package is also available:
 
 ```
-$ pip install tensorflow-gpu
+$ pip install tensorflow-cpu
 ```
 
 To update TensorFlow to the latest version, add `--upgrade` flag to the above
@@ -56,7 +56,7 @@ commands.
 
 *Nightly binaries are available for testing using the
 [tf-nightly](https://pypi.python.org/pypi/tf-nightly) and
-[tf-nightly-gpu](https://pypi.python.org/pypi/tf-nightly-gpu) packages on PyPi.*
+[tf-nightly-cpu](https://pypi.python.org/pypi/tf-nightly-cpu) packages on PyPi.*
 
 #### *Try your first TensorFlow program*
 
@@ -150,17 +150,3 @@ Learn more about the
 ## License
 
 [Apache License 2.0](LICENSE)
-
-## Feature Prioritization Survey
-
-The TensorFlow team is working on building/improving features, and understands
-that it is very important to prioritize these efforts based on what TF users
-need.
-
-The goal of this short, < 5min
-[survey](https://google.qualtrics.com/jfe/form/SV_d5nqhCEbkDkQ7ad), is to help
-the TensorFlow team better understand what features to prioritize based on your
-feedback. Participation is of course optional.
-
-Take the survey
-[HERE](https://google.qualtrics.com/jfe/form/SV_d5nqhCEbkDkQ7ad).
diff --git a/configure.py b/configure.py
index 93c386240ce..b98cc9fdccc 100644
--- a/configure.py
+++ b/configure.py
@@ -147,14 +147,16 @@ def write_action_env_to_bazelrc(var_name, var):
   write_to_bazelrc('build --action_env %s="%s"' % (var_name, str(var)))
 
 
-def run_shell(cmd, allow_non_zero=False):
+def run_shell(cmd, allow_non_zero=False, stderr=None):
+  if stderr is None:
+    stderr = sys.stdout
   if allow_non_zero:
     try:
-      output = subprocess.check_output(cmd)
+      output = subprocess.check_output(cmd, stderr=stderr)
     except subprocess.CalledProcessError as e:
       output = e.output
   else:
-    output = subprocess.check_output(cmd)
+    output = subprocess.check_output(cmd, stderr=stderr)
   return output.decode('UTF-8').strip()
 
 
@@ -169,10 +171,12 @@ def get_python_path(environ_cp, python_bin_path):
   if environ_cp.get('PYTHONPATH'):
     python_paths = environ_cp.get('PYTHONPATH').split(':')
   try:
+    stderr = open(os.devnull, 'wb')
     library_paths = run_shell([
         python_bin_path, '-c',
         'import site; print("\\n".join(site.getsitepackages()))'
-    ]).split('\n')
+    ],
+                              stderr=stderr).split('\n')
   except subprocess.CalledProcessError:
     library_paths = [
         run_shell([
@@ -1179,10 +1183,17 @@ def system_specific_test_config(env):
       write_to_bazelrc('test --test_env=LD_LIBRARY_PATH')
     else:
       test_and_build_filters.append('-gpu')
-  write_to_bazelrc('test --test_tag_filters=%s' %
+
+  # Disable tests with "v1only" tag in "v2" Bazel config, but not in "v1" config
+  write_to_bazelrc('test:v1 --test_tag_filters=%s' %
                    ','.join(test_and_build_filters + test_only_filters))
-  write_to_bazelrc('test --build_tag_filters=%s' %
+  write_to_bazelrc('test:v1 --build_tag_filters=%s' %
                    ','.join(test_and_build_filters))
+  write_to_bazelrc(
+      'test:v2 --test_tag_filters=%s' %
+      ','.join(test_and_build_filters + test_only_filters + ['-v1only']))
+  write_to_bazelrc('test:v2 --build_tag_filters=%s' %
+                   ','.join(test_and_build_filters + ['-v1only']))
 
 
 def set_system_libs_flag(environ_cp):
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 081edb21ae1..d8a681c3999 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -860,7 +860,7 @@ gen_api_init_files(
     output_files = TENSORFLOW_API_INIT_FILES_V1,
     output_package = "tensorflow._api.v1",
     root_file_name = "v1.py",
-    root_init_template = "api_template_v1.__init__.py",
+    root_init_template = "$(location api_template_v1.__init__.py)",
 )
 
 gen_api_init_files(
@@ -883,7 +883,7 @@ gen_api_init_files(
     output_files = TENSORFLOW_API_INIT_FILES_V2,
     output_package = "tensorflow._api.v2",
     root_file_name = "v2.py",
-    root_init_template = "api_template.__init__.py",
+    root_init_template = "$(location api_template.__init__.py)",
 )
 
 py_library(
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index c515cc76b9a..a8cd6d1782c 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -89,6 +89,7 @@ except ImportError:
 # Enable TF2 behaviors
 from tensorflow.python.compat import v2_compat as _compat  # pylint: disable=g-import-not-at-top
 _compat.enable_v2_behavior()
+_major_api_version = 2
 
 
 # Load all plugin libraries from site-packages/tensorflow-plugins if we are
@@ -119,8 +120,14 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
+  # TODO(gunan): Add sanity checks to loaded modules here.
   for _s in _site_packages_dirs:
-    # TODO(gunan): Add sanity checks to loaded modules here.
+    # Load first party dynamic kernels.
+    _main_dir = _os.path.join(_s, 'tensorflow_core/core/kernels')
+    if _fi.file_exists(_main_dir):
+      _ll.load_library(_main_dir)
+
+    # Load third party dynamic kernels.
     _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
     if _fi.file_exists(_plugin_dir):
       _ll.load_library(_plugin_dir)
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 2b2899c3fe0..b6b5e36f0d5 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -104,6 +104,8 @@ from tensorflow.python.platform import flags  # pylint: disable=g-import-not-at-
 _current_module.app.flags = flags  # pylint: disable=undefined-variable
 setattr(_current_module, "flags", flags)
 
+_major_api_version = 1
+
 # Load all plugin libraries from site-packages/tensorflow-plugins if we are
 # running under pip.
 # TODO(gunan): Enable setting an environment variable to define arbitrary plugin
@@ -132,8 +134,14 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
+  # TODO(gunan): Add sanity checks to loaded modules here.
   for _s in _site_packages_dirs:
-    # TODO(gunan): Add sanity checks to loaded modules here.
+    # Load first party dynamic kernels.
+    _main_dir = _os.path.join(_s, 'tensorflow_core/core/kernels')
+    if _fi.file_exists(_main_dir):
+      _ll.load_library(_main_dir)
+
+    # Load third party dynamic kernels.
     _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
     if _fi.file_exists(_plugin_dir):
       _ll.load_library(_plugin_dir)
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index efe01f7e049..76a02090c3b 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -53,6 +53,20 @@ filegroup(
     visibility = ["//visibility:public"],
 )
 
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "c_api_internal.h",
+        "tf_status_helper.h",
+        "tf_status_internal.h",
+        "tf_tensor_internal.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
+
 tf_cuda_library(
     name = "c_api_internal",
     hdrs = [
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 130e9a0c3c7..92e994183a2 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -88,6 +88,18 @@ tf_cuda_library(
     alwayslink = 1,
 )
 
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "c_api_experimental.h",
+        "c_api_internal.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
+
 tf_cuda_library(
     name = "c_api_internal",
     srcs = ["c_api_experimental.h"],
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 66a2a4aaa3c..c1aa187876f 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -464,7 +464,7 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
         &new_remote_device_mgr));
     remote_device_mgr = new_remote_device_mgr.get();
   } else {
-    ctx->context->ClearCaches();
+    ctx->context->ClearCachesAndDefaultExecutor();
     // TODO(b/143914772): Potential memory leak if rendezvous has pending
     // tensors for removed / replaced workers.
 
@@ -754,7 +754,9 @@ TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) {
   return list;
 }
 
-void TFE_ContextClearCaches(TFE_Context* ctx) { ctx->context->ClearCaches(); }
+void TFE_ContextClearCaches(TFE_Context* ctx) {
+  ctx->context->ClearCachesAndThreadExecutors();
+}
 
 // Set server_def on the context, possibly updating it.
 TF_CAPI_EXPORT extern void TFE_ContextSetServerDef(TFE_Context* ctx,
diff --git a/tensorflow/c/eager/c_api_internal.cc b/tensorflow/c/eager/c_api_internal.cc
index f6092715e17..4f3de479ba7 100644
--- a/tensorflow/c/eager/c_api_internal.cc
+++ b/tensorflow/c/eager/c_api_internal.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/eager/c_api_internal.h"
 
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/host_info.h"
 
 TFE_Op* NewOrResetOp(TFE_Context* ctx, const char* op_or_function_name,
@@ -26,29 +27,22 @@ TFE_Op* NewOrResetOp(TFE_Context* ctx, const char* op_or_function_name,
   if (!status->status.ok()) {
     return nullptr;
   }
-  auto create_or_reset =
-      [&op_to_reset, &ctx, &name, &types, &raw_device_name, &status](
-          bool is_function, TFE_OpInferenceContext* inference_ctx) -> TFE_Op* {
-    if (op_to_reset) {
-      status->status = op_to_reset->Reset(ctx, name, is_function, types,
-                                          raw_device_name, inference_ctx);
-      return op_to_reset;
-    } else {
-      TFE_Op* new_op = new TFE_Op(ctx, name, is_function, types, inference_ctx);
-      status->status = new_op->operation.SetDeviceName(raw_device_name);
-      return new_op;
-    }
-  };
 
+  if (op_to_reset && op_to_reset->ctx != ctx) {
+    status->status = tensorflow::errors::Internal(
+        "Cannot reset a TFE_Op from another TFE_Context");
+    return nullptr;
+  }
+
+  std::unique_ptr<TFE_OpInferenceContext> inference_ctx;
   if (!is_function) {
     const tensorflow::OpDef* op_def;
     status->status = tensorflow::OpDefForOp(op_or_function_name, &op_def);
     if (!status->status.ok()) {
       return nullptr;
     }
-    return create_or_reset(false, new TFE_OpInferenceContext(op_def));
-  }
-  if (!ctx->context->FindFunctionByName(name)) {
+    inference_ctx.reset(new TFE_OpInferenceContext(op_def));
+  } else if (!ctx->context->FindFunctionByName(name)) {
     status->status = tensorflow::errors::NotFound(
         "'", name,
         "' is neither a type of a primitive operation nor a name "
@@ -58,5 +52,15 @@ TFE_Op* NewOrResetOp(TFE_Context* ctx, const char* op_or_function_name,
         "registered in the binary running in this process.");
     return nullptr;
   }
-  return create_or_reset(true, nullptr);
+
+  if (op_to_reset) {
+    status->status = op_to_reset->Reset(
+        name, is_function, types, raw_device_name, std::move(inference_ctx));
+    return op_to_reset;
+  }
+
+  TFE_Op* new_op =
+      new TFE_Op(ctx, name, is_function, types, std::move(inference_ctx));
+  status->status = new_op->operation.SetDeviceName(raw_device_name);
+  return new_op;
 }
diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h
index 29106e2998d..df192913b72 100644
--- a/tensorflow/c/eager/c_api_internal.h
+++ b/tensorflow/c/eager/c_api_internal.h
@@ -125,24 +125,26 @@ struct TFE_OpInferenceContext {
 struct TFE_Op {
   TFE_Op(TFE_Context* ctx, const char* op, bool is_function,
          const tensorflow::AttrTypeMap* t,
-         TFE_OpInferenceContext* inference_ctx)
-      : operation(ctx->context, op, is_function, t),
-        inference_ctx(inference_ctx) {}
+         std::unique_ptr<TFE_OpInferenceContext> inference_ctx)
+      : ctx(ctx),
+        operation(ctx->context, op, is_function, t),
+        inference_ctx(std::move(inference_ctx)) {}
 
   void Clear() {
     operation.Clear();
     inference_ctx.reset();
   }
 
-  tensorflow::Status Reset(TFE_Context* ctx, const char* op, bool is_function,
+  tensorflow::Status Reset(const char* op, bool is_function,
                            const tensorflow::AttrTypeMap* t,
                            const char* raw_device_name,
-                           TFE_OpInferenceContext* infer_ctx) {
-    inference_ctx.reset(infer_ctx);
+                           std::unique_ptr<TFE_OpInferenceContext> infer_ctx) {
+    inference_ctx = std::move(infer_ctx);
     return operation.Reset(ctx->context, op, is_function, t, raw_device_name,
                            nullptr);
   }
 
+  TFE_Context* ctx;
   tensorflow::EagerOperation operation;
   std::unique_ptr<TFE_OpInferenceContext> inference_ctx;
 };
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 70dadf79dbe..a9f429b8bd3 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -233,6 +233,7 @@ cc_library_with_android_deps(
     deps = [
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_experimental",
         "//tensorflow/core:protos_all_cc",
     ],
 )
diff --git a/tensorflow/cc/client/client_session.cc b/tensorflow/cc/client/client_session.cc
index 97038b2ba86..c4add1589e7 100644
--- a/tensorflow/cc/client/client_session.cc
+++ b/tensorflow/cc/client/client_session.cc
@@ -127,6 +127,33 @@ Status ClientSession::Run(const RunOptions& run_options, const FeedType& inputs,
                                target_node_names, outputs, run_metadata);
 }
 
+Status ClientSession::Run(
+    const RunOptions& run_options, const FeedType& inputs,
+    const std::vector<Output>& fetch_outputs,
+    const std::vector<Operation>& run_outputs, std::vector<Tensor>* outputs,
+    RunMetadata* run_metadata,
+    const thread::ThreadPoolOptions& threadpool_options) const {
+  std::vector<std::pair<string, Tensor>> feeds;
+  for (auto const& feed : inputs) {
+    TF_RETURN_IF_ERROR(feed.second.status);
+    feeds.emplace_back(feed.first.name(), feed.second.tensor);
+  }
+  std::vector<string> output_tensor_names;
+  output_tensor_names.reserve(fetch_outputs.size());
+  for (auto const& output : fetch_outputs) {
+    output_tensor_names.push_back(output.name());
+  }
+  std::vector<string> target_node_names;
+  target_node_names.reserve(run_outputs.size());
+  for (auto const& output : run_outputs) {
+    target_node_names.push_back(output.node()->name());
+  }
+  TF_RETURN_IF_ERROR(impl()->MaybeExtendGraph());
+  return impl()->session_->Run(run_options, feeds, output_tensor_names,
+                               target_node_names, outputs, run_metadata,
+                               threadpool_options);
+}
+
 Status ClientSession::MakeCallable(const CallableOptions& callable_options,
                                    CallableHandle* out_handle) {
   TF_RETURN_IF_ERROR(impl()->MaybeExtendGraph());
diff --git a/tensorflow/cc/client/client_session.h b/tensorflow/cc/client/client_session.h
index b0bb6c3fa6d..3765eaec9bf 100644
--- a/tensorflow/cc/client/client_session.h
+++ b/tensorflow/cc/client/client_session.h
@@ -93,6 +93,14 @@ class ClientSession {
              const std::vector<Operation>& run_outputs,
              std::vector<Tensor>* outputs, RunMetadata* run_metadata) const;
 
+  /// Same as above. Additionally allows user to provide custom threadpool
+  /// implementation via ThreadPoolOptions.
+  Status Run(const RunOptions& run_options, const FeedType& inputs,
+             const std::vector<Output>& fetch_outputs,
+             const std::vector<Operation>& run_outputs,
+             std::vector<Tensor>* outputs, RunMetadata* run_metadata,
+             const thread::ThreadPoolOptions& threadpool_options) const;
+
   /// \brief A handle to a subgraph, created with
   /// `ClientSession::MakeCallable()`.
   typedef int64 CallableHandle;
diff --git a/tensorflow/cc/client/client_session_test.cc b/tensorflow/cc/client/client_session_test.cc
index 3d1c38483c4..27ec4c0871d 100644
--- a/tensorflow/cc/client/client_session_test.cc
+++ b/tensorflow/cc/client/client_session_test.cc
@@ -112,7 +112,7 @@ TEST(ClientSessionTest, Extend) {
   test::ExpectTensorEqual<int>(outputs[0], test::AsTensor<int>({31, 42}, {2}));
 }
 
-TEST(ClientSessionTest, MultiThreaded) {
+TEST(ClientSessionTest, MultiThreadedWithDefaultThreadpool) {
   Scope root = Scope::NewRootScope();
   auto a = Add(root, {1, 2}, {3, 4});
   auto b = Mul(root, {1, 2}, {3, 4});
@@ -138,6 +138,49 @@ TEST(ClientSessionTest, MultiThreaded) {
   test::ExpectTensorEqual<int>(outputs[0], test::AsTensor<int>({-1, 2}, {2}));
 }
 
+TEST(ClientSessionTest, MultiThreadedWithCustomThreadpool) {
+  Scope root = Scope::NewRootScope();
+  int num_threads = 3;
+  auto a = Add(root, {1, 2}, {3, 4});
+  auto b = Mul(root, {1, 2}, {3, 4});
+  ClientSession session(root);
+
+  auto inter_op_threadpool =
+      absl::make_unique<CustomThreadPoolImpl>(num_threads);
+  ASSERT_EQ(inter_op_threadpool->GetNumScheduleCalled(), 0);
+
+  auto intra_op_threadpool =
+      absl::make_unique<CustomThreadPoolImpl>(num_threads);
+  ASSERT_EQ(intra_op_threadpool->GetNumScheduleCalled(), 0);
+
+  tensorflow::thread::ThreadPoolOptions threadPoolOptions;
+  threadPoolOptions.inter_op_threadpool = inter_op_threadpool.get();
+  threadPoolOptions.intra_op_threadpool = intra_op_threadpool.get();
+
+  {
+    thread::ThreadPool thread_pool(Env::Default(), "pool", 2);
+    thread_pool.Schedule([&session, a]() {
+      std::vector<Tensor> outputs;
+      TF_EXPECT_OK(session.Run(RunOptions(), ClientSession::FeedType{}, {a}, {},
+                               &outputs, nullptr, thread::ThreadPoolOptions()));
+      test::ExpectTensorEqual<int>(outputs[0],
+                                   test::AsTensor<int>({4, 6}, {2}));
+    });
+    thread_pool.Schedule([&session, b]() {
+      std::vector<Tensor> outputs;
+      TF_EXPECT_OK(session.Run(RunOptions(), ClientSession::FeedType{}, {b}, {},
+                               &outputs, nullptr, thread::ThreadPoolOptions()));
+      test::ExpectTensorEqual<int>(outputs[0],
+                                   test::AsTensor<int>({3, 8}, {2}));
+    });
+  }
+  auto c = Sub(root, b, a);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run(RunOptions(), ClientSession::FeedType{}, {c}, {},
+                           &outputs, nullptr, thread::ThreadPoolOptions()));
+  test::ExpectTensorEqual<int>(outputs[0], test::AsTensor<int>({-1, 2}, {2}));
+}
+
 TEST(ClientSessionTest, CallableWithDefaultThreadPool) {
   Scope root = Scope::NewRootScope();
   auto a = Placeholder(root, DT_INT32);
diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
index b3c1e6a913a..f67c6f91d6c 100644
--- a/tensorflow/cc/gradients/math_grad.cc
+++ b/tensorflow/cc/gradients/math_grad.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#define _USE_MATH_DEFINES
 #include <cmath>
 
 #include "tensorflow/cc/ops/array_ops_internal.h"
diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index 71709e40b36..b64f0f55417 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@@ -125,11 +125,11 @@ cc_library(
     deps = [
         ":constants",
         "@com_google_absl//absl/container:flat_hash_set",
-        "//tensorflow/core/platform:strcat",
-        "//tensorflow/core/util/tensor_bundle",
     ] + if_not_mobile([
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/platform:strcat",
+        "//tensorflow/core/util/tensor_bundle",
     ]),
 )
 
diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD
index b6e260f00a5..a17ad6d27a9 100644
--- a/tensorflow/compiler/aot/BUILD
+++ b/tensorflow/compiler/aot/BUILD
@@ -75,8 +75,8 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",  # fixdeps: keep
-        "@llvm//:x86_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:support",  # fixdeps: keep
+        "@llvm-project//llvm:x86_code_gen",  # fixdeps: keep
     ],
 )
 
@@ -104,11 +104,11 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
-        "@llvm//:aarch64_code_gen",  # fixdeps: keep
-        "@llvm//:arm_code_gen",  # fixdeps: keep
-        "@llvm//:powerpc_code_gen",  # fixdeps: keep
-        "@llvm//:target",
-        "@llvm//:x86_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:aarch64_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:arm_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:powerpc_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:target",
+        "@llvm-project//llvm:x86_code_gen",  # fixdeps: keep
     ],
 )
 
@@ -205,9 +205,9 @@ cc_library(
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
-        "@llvm//:support",
-        "@llvm//:target",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
     ],
 )
 
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index fb81266a048..c8bbb1a473c 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -407,6 +407,7 @@ def target_llvm_triple():
         "//tensorflow:android_arm64": "aarch64-none-android",
         "//tensorflow:android_x86": "i686-none-android",
         "//tensorflow:ios": "arm64-none-ios",
+        "//tensorflow:ios_x86_64": "x86_64-apple-ios",
         "//tensorflow:linux_ppc64le": "ppc64le-ibm-linux-gnu",
         "//tensorflow:macos": "x86_64-none-darwin",
         "//conditions:default": "x86_64-pc-linux",
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 4526090d68a..15e53b7be67 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -500,6 +500,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
     ],
 )
 
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.h b/tensorflow/compiler/jit/build_xla_ops_pass.h
index 58f7c4b3a0d..8487802aa66 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.h
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.h
@@ -22,8 +22,9 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Adds _XlaCompile and _XlaRun operations to the TF graph that compiles and
-// executes (using XLA) TF function calls marked with "_XlaCompiledKernel".
+// Replaces TF function calls marked with `_XlaCompiledKernel` with _XlaCompile
+// and _XlaRun nodes (which compile and launch, respectively, the corresponding
+// HLO module).
 class BuildXlaOpsPass : public GraphOptimizationPass {
  public:
   // If enable_lazy_compilation is not nullopt then *enable_lazy_compilation
diff --git a/tensorflow/compiler/jit/defs.cc b/tensorflow/compiler/jit/defs.cc
index b23f6ec35f5..4bea71e8fc1 100644
--- a/tensorflow/compiler/jit/defs.cc
+++ b/tensorflow/compiler/jit/defs.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+const char* const kXlaMustCompileAttr = "_XlaMustCompile";
+
 const char* const kXlaCompileAttr = "_XlaCompile";
 
 // User-provided through jit_scope APIs. Effective only when auto_jit is OFF.
diff --git a/tensorflow/compiler/jit/defs.h b/tensorflow/compiler/jit/defs.h
index bf8009344df..9eb4c2ca2e8 100644
--- a/tensorflow/compiler/jit/defs.h
+++ b/tensorflow/compiler/jit/defs.h
@@ -22,7 +22,16 @@ limitations under the License.
 namespace tensorflow {
 
 // Name of attribute used to tag operators for compilation with XLA
+
+// Implies must-compile semantics: either it will be compiled
+// with XLA, or an error will be thrown.
+extern const char* const kXlaMustCompileAttr;  // "_XlaMustCompile"
+
+// Implies auto-clustering: tagged nodes will be clustered and compiled with XLA
+// on a best-effort basis.
 extern const char* const kXlaCompileAttr;  // "_XlaCompile"
+
+// Implies auto-clustering within the given scope.
 extern const char* const kXlaScopeAttr;    // "_XlaScope"
 extern const char* const kXlaInternalScopeAttr;  // "_XlaInternalScope"
 
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
index 8b627cd959a..bf8b2c41e0e 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
@@ -27,6 +27,15 @@ limitations under the License.
 
 namespace tensorflow {
 
+// EncapsulateSubgraphs pass takes all the nodes with the same cluster ID
+// (derived from kXlaClusterAttr=ID (kXlaClusterAttr) attribute), puts them into
+// a TF function, and replaces the subgraph in the main graph with a call to
+// that TF function annotated with kXlaCompiledKernelAttr (_XlaCompiledKernel).
+class EncapsulateSubgraphsPass : public GraphOptimizationPass {
+ public:
+  Status Run(const GraphOptimizationPassOptions& options) override;
+};
+
 // A rewriting function to apply to each subgraph during encapsulation.
 // 'arg_source_tensors' are the tensors corresponding to the arguments in the
 // original source graph (*not* 'graph').
@@ -100,11 +109,6 @@ extern const char* const kXlaHasReferenceVarsAttr;
 // TODO(hpucha): Move the utilities to a more appropriate place.
 void SortControlInputs(GraphDef* gdef);
 
-class EncapsulateSubgraphsPass : public GraphOptimizationPass {
- public:
-  Status Run(const GraphOptimizationPassOptions& options) override;
-};
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_JIT_ENCAPSULATE_SUBGRAPHS_PASS_H_
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
index 99e9dfd598f..3057e4c7469 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
@@ -28,7 +28,7 @@
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/platform/env.h"
 
-    namespace tensorflow {
+namespace tensorflow {
 
 // Encapsulates nodes marked with the _xla_compile_id attribute into
 // XlaLaunch operators.
diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
index 90fa15bc29b..277c8dbc594 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
@@ -2130,6 +2130,53 @@ Status ExtractOutsideCompilationForNodesWithAssociatedFunctions(
   return Status::OK();
 }
 
+Status CopyOutsideCompilationConstNodes(
+    Graph* g, const string& outside_compilation_attr_name) {
+  for (Node* n : g->op_nodes()) {
+    if (!n->IsConstant() ||
+        !HasNodeAttr(n->def(), outside_compilation_attr_name)) {
+      continue;
+    }
+
+    std::vector<const Edge*> out_edges(n->out_edges().begin(),
+                                       n->out_edges().end());
+    bool has_non_oc_output = false;
+    for (const Edge* e : out_edges) {
+      if (!e->IsControlEdge() &&
+          !HasNodeAttr(e->dst()->def(), outside_compilation_attr_name)) {
+        has_non_oc_output = true;
+        break;
+      }
+    }
+    if (!has_non_oc_output) {
+      continue;
+    }
+
+    NodeDef copy_def = n->def();
+    copy_def.set_name(g->NewName(n->name()));
+    copy_def.mutable_attr()->erase(outside_compilation_attr_name);
+    Status s;
+    Node* copy_node = g->AddNode(copy_def, &s);
+    TF_RETURN_IF_ERROR(s);
+    for (const Edge* e : n->in_edges()) {
+      if (e->IsControlEdge()) {
+        g->AddControlEdge(e->src(), copy_node);
+      }
+    }
+    for (const Edge* e : out_edges) {
+      if (!e->IsControlEdge() &&
+          !HasNodeAttr(e->dst()->def(), outside_compilation_attr_name)) {
+        Node* dst = e->dst();
+        int dst_input = e->dst_input();
+        g->RemoveEdge(e);
+        g->AddEdge(copy_node, 0, dst, dst_input);
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
 }  // namespace
 
 Status RewriteOutsideCompilationSubgraphFn::operator()(
@@ -2279,6 +2326,10 @@ Status ExtractOutsideCompilationForFunction(
   std::vector<string> outside_compilation_host_graphs;
   std::vector<string> shape_inference_graphs_to_rewrite;
   if (*has_outside_compilation) {
+    // Copy outside compilation Const nodes with non outside compilation users.
+    TF_RETURN_IF_ERROR(CopyOutsideCompilationConstNodes(
+        fbody->graph, outside_compilation_attr_name));
+
     // Find dependencies between outside compilation clusters.
     TF_ASSIGN_OR_RETURN(auto cluster_deps,
                         OutsideCompilationClusterDependencies(
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index a5fadc08094..edcec281802 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -1187,7 +1187,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() {
     }
 
     if (!whitelist.empty() && !whitelist.contains(node->def().op())) {
-      VLOG(1) << "Rejecting " << node->name()
+      VLOG(1) << "Rejecting TF operation " << node->def().op()
               << " as it is not listed in --tf_xla_ops_to_cluster.";
       continue;
     }
@@ -1770,9 +1770,10 @@ absl::flat_hash_map<string, std::vector<string>>* GetWhitelistTable() {
            {"ComplexAbs", "Angle", "Conj", "Abs", "Acos", "Acosh", "Asin",
             "Atan", "Atanh", "Ceil", "Cos", "Cosh", "Sin", "Exp", "Expm1",
             "Floor", "IsFinite", "IsInf", "IsNan", "Inv", "Reciprocal", "Log",
-            "Log1p", "Invert", "LogicalNot", "Neg", "Rint", "Round", "Rsqrt",
-            "Sigmoid", "Sign", "Sinh", "Softplus", "Softsign", "Sqrt", "Square",
-            "Tan", "Tanh", "Real", "Imag", "Erf", "Erfc", "Lgamma", "Digamma",
+            "Log1p", "Invert", "LogicalNot", "Ndtri", "Neg", "Rint", "Round",
+            "Rsqrt", "Sigmoid", "Sign", "Sinh", "Softplus", "Softsign", "Sqrt",
+            "Square", "Tan", "Tanh", "Real", "Imag", "Erf", "Erfc", "Erfinv",
+            "Lgamma", "Digamma",
             // Binary
             "Add", "AddV2", "Sub", "Mul", "Div", "Atan2", "Complex", "DivNoNan",
             "MulNoNan", "FloorDiv", "Xlogy", "Xdivy", "FloorMod", "BitwiseAnd",
@@ -2035,6 +2036,7 @@ absl::flat_hash_set<string> GetKnownXLAWhitelistOp() {
                                      "XlaDynamicSlice",
                                      "XlaDynamicUpdateSlice",
                                      "XlaEinsum",
+                                     "XlaGather",
                                      "XlaIf",
                                      "XlaKeyValueSort",
                                      "XlaPad",
@@ -2042,6 +2044,7 @@ absl::flat_hash_set<string> GetKnownXLAWhitelistOp() {
                                      "XlaReduce",
                                      "XlaReduceWindow",
                                      "XlaReplicaId",
+                                     "XlaScatter",
                                      "XlaSelectAndScatter",
                                      "XlaSelfAdjointEig",
                                      "XlaSend",
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.h b/tensorflow/compiler/jit/mark_for_compilation_pass.h
index 0c9b40776f5..8b660710898 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.h
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.h
@@ -34,8 +34,9 @@ extern const char* const kXlaClusterAttr;
 // compilation by the encapsulate subgraphs pass.
 extern const char* const kXlaOutsideCompilationAttr;
 
-// Pass that marks a subset of operators in the graph with attribute
-// _XlaCluster so they are compiled by the EncapsulateSubgraphsPass.
+// Marks a subset of nodes in the graph which are to be clustered
+// with an attribute _XlaCluster=<cluster id> so they are picked up by the
+// EncapsulateSubgraphsPass.
 class MarkForCompilationPass : public GraphOptimizationPass {
  public:
   MarkForCompilationPass() = default;
diff --git a/tensorflow/compiler/jit/shape_inference.cc b/tensorflow/compiler/jit/shape_inference.cc
index 2ed085d021f..72804ff57e4 100644
--- a/tensorflow/compiler/jit/shape_inference.cc
+++ b/tensorflow/compiler/jit/shape_inference.cc
@@ -17,7 +17,10 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/shape_inference_helpers.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
+#include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/util/dump_graph.h"
 
@@ -39,7 +42,7 @@ Status ShapeHandleToTensorShape(shape_inference::InferenceContext* context,
   return PartialTensorShape::MakePartialShape(dims.data(), dims.size(), shape);
 }
 
-Status PropagateShapes(const Graph& graph,
+Status PropagateShapes(Graph* graph,
                        const std::map<int, InferredShape>& arg_shapes,
                        const std::vector<BackEdgeHelper::BackEdge>& back_edges,
                        ShapeRefiner* shape_refiner) {
@@ -54,7 +57,7 @@ Status PropagateShapes(const Graph& graph,
   // shapes.
   // TODO(phawkins): handle cyclic graphs.
   std::vector<Node*> order;
-  GetReversePostOrder(graph, &order);
+  GetReversePostOrder(*graph, &order);
 
   for (Node* n : order) {
     // Ignore the status returned by the shape_refiner. We want the best effort
@@ -99,6 +102,67 @@ Status PropagateShapes(const Graph& graph,
       }
     }
 
+    // Sometimes we have VariableShape nodes in while loop (after Enter nodes).
+    // They won't be constant-folded because TensorFlow constant folding does
+    // not handle Enter nodes (and thus does not handle any nodes after Enter
+    // nodes). We try to replace such VariableShape nodes with Const nodes here.
+    if (n->type_string() == "VariableShape") {
+      shape_inference::InferenceContext* context = shape_refiner->GetContext(n);
+      auto handle_shapes_and_types = context->input_handle_shapes_and_types(0);
+      if (handle_shapes_and_types && !handle_shapes_and_types->empty()) {
+        shape_inference::ShapeHandle handle =
+            handle_shapes_and_types->at(0).shape;
+        TensorShapeProto shape_proto;
+        context->ShapeHandleToProto(handle, &shape_proto);
+        if (!shape_proto.unknown_rank()) {
+          NodeDef const_def;
+          const_def.set_op("Const");
+          Node* var_node;
+          TF_RETURN_IF_ERROR(n->input_node(0, &var_node));
+          const_def.set_name(
+              graph->NewName(absl::StrCat("var_shape_", var_node->name())));
+          DataType dtype = n->output_type(0);
+          AddNodeAttr("dtype", dtype, &const_def);
+          TensorProto value;
+          value.set_dtype(dtype);
+          value.mutable_tensor_shape()->add_dim()->set_size(
+              shape_proto.dim_size());
+          for (const auto& dim : shape_proto.dim()) {
+            if (dtype == DT_INT32) {
+              value.add_int_val(dim.size());
+            } else {
+              value.add_int64_val(dim.size());
+            }
+          }
+          AddNodeAttr("value", value, &const_def);
+          for (auto const& attr : n->attrs()) {
+            if (*attr.first.begin() == '_') {
+              AddNodeAttr(attr.first, attr.second, &const_def);
+            }
+          }
+
+          Status s;
+          Node* const_node = graph->AddNode(const_def, &s);
+          TF_RETURN_IF_ERROR(s);
+
+          graph->AddControlEdge(var_node, const_node);
+          std::vector<const Edge*> out_edges(n->out_edges().begin(),
+                                             n->out_edges().end());
+          for (const Edge* e : out_edges) {
+            if (e->IsControlEdge()) {
+              graph->AddControlEdge(const_node, e->dst());
+              graph->RemoveEdge(e);
+            } else {
+              Node* dst = e->dst();
+              int dst_input = e->dst_input();
+              graph->RemoveEdge(e);
+              graph->AddEdge(const_node, 0, dst, dst_input);
+            }
+          }
+        }
+      }
+    }
+
     // Merge node causes a loop so we remove NextIteration->Merge edge before
     // performing shape inference. But removing those edges also prevents us
     // from inferring output shape for Merge node (we need shapes for all its
@@ -196,7 +260,7 @@ Status InferShapes(Graph* graph, const std::map<int, InferredShape>& arg_shapes,
   // the shape inference is complete.
   BackEdgeHelper back_edge;
   TF_RETURN_IF_ERROR(back_edge.Remove(graph));
-  TF_RETURN_IF_ERROR(PropagateShapes(*graph, arg_shapes,
+  TF_RETURN_IF_ERROR(PropagateShapes(graph, arg_shapes,
                                      back_edge.RemovedEdges(), &shape_refiner));
   TF_RETURN_IF_ERROR(back_edge.Replace());
 
diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 99e95314f64..34ff0c55615 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -191,7 +191,7 @@ class XlaAssignVariableOp : public OpKernel {
   REGISTER_KERNEL_BUILDER(Name("IteratorGetNextAsOptional").Device(DEVICE),    \
                           data::IteratorGetNextAsOptionalOp);                  \
   REGISTER_KERNEL_BUILDER(Name("IteratorGetNextSync").Device(DEVICE),          \
-                          data::IteratorGetNextSyncOp);                        \
+                          data::IteratorGetNextOp);                            \
   REGISTER_KERNEL_BUILDER(Name("IteratorToStringHandle")                       \
                               .Device(DEVICE)                                  \
                               .HostMemory("string_handle"),                    \
diff --git a/tensorflow/compiler/jit/xla_kernel_creator.cc b/tensorflow/compiler/jit/xla_kernel_creator.cc
index e3706a09278..23bd7425dbd 100644
--- a/tensorflow/compiler/jit/xla_kernel_creator.cc
+++ b/tensorflow/compiler/jit/xla_kernel_creator.cc
@@ -21,7 +21,7 @@ namespace tensorflow {
 
 bool XlaKernelCreator::CanCreateKernel(const FunctionLibraryRuntime& flr,
                                        const NodeDef& node_def) const {
-  return CanCreateXlaKernel(flr, node_def);
+  return CanCreateXlaKernel(node_def);
 }
 
 Status XlaKernelCreator::CreateKernel(FunctionLibraryRuntime* flr,
diff --git a/tensorflow/compiler/jit/xla_kernel_creator_test.cc b/tensorflow/compiler/jit/xla_kernel_creator_test.cc
index 28606abf2b2..7ec37332906 100644
--- a/tensorflow/compiler/jit/xla_kernel_creator_test.cc
+++ b/tensorflow/compiler/jit/xla_kernel_creator_test.cc
@@ -95,15 +95,17 @@ AttrValue BoolAttr(bool b) {
 
 TEST_F(XlaKernelCreatorTest, OneFloatOneResourceArgument) {
   FunctionDef fdef = XTimesY();
-  (*fdef.mutable_attr())["_XlaCompile"] = BoolAttr(true);
+  (*fdef.mutable_attr())["_XlaMustCompile"] = BoolAttr(true);
   Init({fdef});
   XlaKernelCreator xla_kernel_creator;
-
-  Status status = xla_kernel_creator.CreateKernel(
-      flr_, ToNodeDef(R"pb(
+  NodeDef callsite =
+      ToNodeDef(R"pb(
         name: 'XTimesY' op: 'XTimesY' input: 'a' input: 'b'
-      )pb"),
-      &kernel_);
+      )pb");
+  (*callsite.mutable_attr())["_XlaMustCompile"] = BoolAttr(true);
+
+  // Note: need to set attribute on the created node.
+  Status status = xla_kernel_creator.CreateKernel(flr_, callsite, &kernel_);
   ASSERT_TRUE(status.ok()) << status.ToString();
 
   EXPECT_EQ("XTimesY", kernel_->name());
@@ -137,7 +139,7 @@ TEST_F(XlaKernelCreatorTest, FailsIfXlaCompileAttrNotSet) {
 
 TEST_F(XlaKernelCreatorTest, FailsIfXlaCompileAttrIsSetToFalse) {
   FunctionDef fdef = XTimesY();
-  (*fdef.mutable_attr())["_XlaCompile"] = BoolAttr(false);
+  (*fdef.mutable_attr())["_XlaMustCompile"] = BoolAttr(false);
   Init({fdef});
   XlaKernelCreator xla_kernel_creator;
 
diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.cc b/tensorflow/compiler/jit/xla_kernel_creator_util.cc
index 6441dd3ed28..94727fdf35a 100644
--- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc
+++ b/tensorflow/compiler/jit/xla_kernel_creator_util.cc
@@ -23,7 +23,9 @@ limitations under the License.
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
 #include "tensorflow/compiler/tf2xla/const_analysis.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/util/ptr_util.h"
 
@@ -68,40 +70,10 @@ class SinglePassSearch {
 };
 }  // namespace
 
-bool CanCreateXlaKernel(const FunctionLibraryRuntime& flr,
-                        const NodeDef& node_def) {
-  const FunctionDef* function_def =
-      flr.GetFunctionLibraryDefinition()->Find(node_def.name());
-  if (function_def == nullptr) {
-    // The node def is not calling a function. Individual ops can be
-    // run directly using on-demand mode, no need to create XlaLaunch
-    // kernel for them.
-    return false;
-  }
-
-  // If kXlaCompileAttr is set on the node_def, use its value.
-  const auto& it = node_def.attr().find(kXlaCompileAttr);
-  if (it != node_def.attr().end()) {
-    return it->second.b();
-  }
-
-  // kXlaCompileAttr is not set on node_def, check if it is set on
-  // FunctionDef.
-  bool xla_compile = false;
-  Status status = flr.GetFunctionLibraryDefinition()->GetAttr(
-      node_def, kXlaCompileAttr, &xla_compile);
-  if (!status.ok() || !xla_compile) {
-    if (VLOG_IS_ON(3)) {
-      if (!status.ok()) {
-        VLOG(3) << "No " << kXlaCompileAttr << " attr defined for "
-                << node_def.op() << ". status=" << status.ToString();
-      } else {
-        VLOG(3) << node_def.op() << " is explicitly marked not to be compiled";
-      }
-    }
-    return false;
-  }
-  return true;
+bool CanCreateXlaKernel(const NodeDef& node_def) {
+  // If kXlaMustCompileAttr is set on the node_def, use its value.
+  const auto& it = node_def.attr().find(kXlaMustCompileAttr);
+  return it != node_def.attr().end() && it->second.b();
 }
 
 // Given a FunctionLibraryRuntime and a NodeDef calling a function in the
@@ -118,8 +90,11 @@ Status GetBodyAndConstantsAndResources(FunctionLibraryRuntime* flr,
   FunctionLibraryRuntime::Handle handle;
   // If node_def is not instantiable, e.g., the function does not exist,
   // simply bail out.
+  NameAttrList function;
+  TF_RETURN_IF_ERROR(NameAndAttrsFromFunctionCall(node_def, &function));
+
   TF_RETURN_IF_ERROR(
-      flr->Instantiate(node_def.op(), AttrSlice(&node_def.attr()), &handle));
+      flr->Instantiate(function.name(), AttrSlice(&function.attr()), &handle));
   *fbody = flr->GetFunctionBody(handle);
   CHECK(*fbody);  // Can't be nullptr since we just instantiated it.
   const DataTypeVector& arg_types = (*fbody)->arg_types;
@@ -149,7 +124,7 @@ Status GetBodyAndConstantsAndResources(FunctionLibraryRuntime* flr,
 
 Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def,
                        std::unique_ptr<OpKernel>* kernel) {
-  if (!CanCreateXlaKernel(*flr, node_def)) {
+  if (!CanCreateXlaKernel(node_def)) {
     return errors::Internal("Invalid node: ", node_def.ShortDebugString());
   }
 
@@ -241,9 +216,7 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def,
 
   // Create the kernel.
   NameAttrList function;
-  function.set_name(node_def.op());
-  *(function.mutable_attr()) = node_def.attr();
-
+  TF_RETURN_IF_ERROR(NameAndAttrsFromFunctionCall(node_def, &function));
   Device* dev = flr->device();
   Status s;
   OpKernelConstruction construction(
diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.h b/tensorflow/compiler/jit/xla_kernel_creator_util.h
index 71398c334fc..5ec8df01f77 100644
--- a/tensorflow/compiler/jit/xla_kernel_creator_util.h
+++ b/tensorflow/compiler/jit/xla_kernel_creator_util.h
@@ -24,11 +24,9 @@ namespace tensorflow {
 class FunctionLibraryRuntime;
 class OpKernel;
 
-  // Given a NodeDef 'node_def' and the function library runtime 'flr', returns
-  // true if 'node_def' is a call to a compilable function defined in 'flr',
-  // with the kXlaCompileAttr set.
-bool CanCreateXlaKernel(const FunctionLibraryRuntime& flr,
-                        const NodeDef& node_def);
+// Given a NodeDef `node_def` returns true iff `node_def` has kXlaCompileAttr
+// set.
+bool CanCreateXlaKernel(const NodeDef& node_def);
 
 // Given a supported NodeDef, returns a XlaLaunchOp that computes the node.
 Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def,
diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD
index b54d5867487..554288a0937 100644
--- a/tensorflow/compiler/mlir/BUILD
+++ b/tensorflow/compiler/mlir/BUILD
@@ -6,7 +6,7 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
 package(
     default_visibility = [
         "//tensorflow/compiler/tf2xla:__subpackages__",
-        "@local_config_mlir//:friends",
+        "@llvm-project//mlir:friends",
     ],
     licenses = ["notice"],  # Apache 2.0
 )
@@ -30,8 +30,8 @@ cc_library(
     hdrs = ["op_or_arg_name_mapper.h"],
     deps = [
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -43,11 +43,11 @@ cc_library(
         ":passes",
         "//tensorflow/core:lib",
         "//tensorflow/core/platform:logging",
-        "@llvm//:support",
-        "@local_config_mlir//:MlirOptLib",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//test:TestTransforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:MlirOptLib",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir/test:TestTransforms",
     ],
 )
 
@@ -80,9 +80,9 @@ cc_library(
         "//tensorflow/compiler/mlir/xla:xla_legalize_tf",
         "//tensorflow/compiler/mlir/xla:xla_legalize_to_standard",
         "//tensorflow/compiler/mlir/xla:xla_lower",
-        "@local_config_mlir//:AffineDialectRegistration",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:QuantOpsDialectRegistration",
+        "@llvm-project//mlir:AffineDialectRegistration",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:QuantOpsDialectRegistration",
     ],
 )
 
@@ -92,7 +92,7 @@ cc_library(
     hdrs = ["init_mlir.h"],
     deps = [
         "//tensorflow/core:lib",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -122,11 +122,11 @@ tf_cc_binary(
         "//tensorflow/core:tensorflow",
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TranslateClParser",
-        "@local_config_mlir//:Translation",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TranslateClParser",
+        "@llvm-project//mlir:Translation",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/README.md b/tensorflow/compiler/mlir/README.md
index f86b329b39f..cbb0b08503a 100644
--- a/tensorflow/compiler/mlir/README.md
+++ b/tensorflow/compiler/mlir/README.md
@@ -1,11 +1,11 @@
 # MLIR dialects and utilities for TensorFlow, TensorFlow Lite and XLA.
 
 This module contains the MLIR
-([Multi-Level Intermediate Representation](https://github.com/tensorflow/mlir))
+([Multi-Level Intermediate Representation](https://mlir.llvm.org))
 dialects and utilities for
 
 1. TensorFlow
 2. XLA
 3. TF Lite
 
-See [MLIR repo](https://github.com/tensorflow/mlir) for complete documentation.
\ No newline at end of file
+See [MLIR's website](https://mlir.llvm.org) for complete documentation.
diff --git a/tensorflow/compiler/mlir/glob_lit_test.bzl b/tensorflow/compiler/mlir/glob_lit_test.bzl
index f82f719f2ce..fda2f819b98 100644
--- a/tensorflow/compiler/mlir/glob_lit_test.bzl
+++ b/tensorflow/compiler/mlir/glob_lit_test.bzl
@@ -10,7 +10,7 @@ load("@bazel_skylib//lib:paths.bzl", "paths")
 
 # Default values used by the test runner.
 _default_test_file_exts = ["mlir", ".pbtxt", ".td"]
-_default_driver = "@local_config_mlir//:run_lit.sh"
+_default_driver = "@llvm-project//mlir:run_lit.sh"
 _default_size = "small"
 _default_tags = ["no_rocm"]
 
@@ -50,16 +50,16 @@ def _run_lit_test(name, data, size, tags, driver, features):
 
     native.py_test(
         name = name,
-        srcs = ["@llvm//:lit"],
+        srcs = ["@llvm-project//llvm:lit"],
         tags = tags,
         args = [
             "tensorflow/compiler/mlir/" + paths.basename(data[-1]) + " --config-prefix=runlit -v",
         ] + features,
         data = data + [
             "//tensorflow/compiler/mlir:litfiles",
-            "@llvm//:FileCheck",
-            "@llvm//:count",
-            "@llvm//:not",
+            "@llvm-project//llvm:FileCheck",
+            "@llvm-project//llvm:count",
+            "@llvm-project//llvm:not",
         ],
         size = size,
         main = "lit.py",
diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD
index 6f08ca3b5e8..700b2e6bb16 100644
--- a/tensorflow/compiler/mlir/lite/BUILD
+++ b/tensorflow/compiler/mlir/lite/BUILD
@@ -1,6 +1,6 @@
 load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test", "tf_native_cc_binary")
 load(
-    "@local_config_mlir//:tblgen.bzl",
+    "//third_party/mlir:tblgen.bzl",
     "gentbl",
 )
 
@@ -8,13 +8,14 @@ package(
     default_visibility = [
         # TODO(jpienaar): Make the visibility more restrictive.
         ":friends",
+        "//tensorflow/lite/experimental/tf_runtime:__subpackages__",
     ],
     licenses = ["notice"],  # Apache 2.0
 )
 
 package_group(
     name = "friends",
-    includes = ["@local_config_mlir//:subpackages"],
+    includes = ["//third_party/mlir:subpackages"],
     packages = [
         "//learning/brain/experimental/mlir/...",
         "//learning/brain/google/xla/...",
@@ -27,7 +28,7 @@ filegroup(
     srcs = [
         "ir/tfl_ops.td",
         "//tensorflow/compiler/mlir/lite/quantization:quantization_td_files",
-        "@local_config_mlir//:OpBaseTdFiles",
+        "@llvm-project//mlir:OpBaseTdFiles",
     ],
 )
 
@@ -47,7 +48,7 @@ gentbl(
             "g3doc/tfl_ops.md",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/tfl_ops.td",
     td_srcs = [
         ":tensorflow_lite_ops_td_files",
@@ -62,11 +63,11 @@ gentbl(
             "transforms/generated_prepare_tf.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/prepare_patterns.td",
     td_srcs = [
         ":tensorflow_lite_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_optimize_td_files",
     ],
@@ -80,11 +81,11 @@ gentbl(
             "transforms/generated_lower_static_tensor_list.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/tensorlist_patterns.td",
     td_srcs = [
         ":tensorflow_lite_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files",
     ],
 )
@@ -97,11 +98,11 @@ gentbl(
             "transforms/generated_legalize_tf.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/legalize_patterns.td",
     td_srcs = [
         ":tensorflow_lite_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files",
     ],
 )
@@ -114,11 +115,12 @@ gentbl(
             "transforms/generated_optimize.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/optimize_patterns.td",
     td_srcs = [
         ":tensorflow_lite_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
+        "//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files",
     ],
 )
 
@@ -130,11 +132,11 @@ gentbl(
             "transforms/generated_quantize.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/quantize_patterns.td",
     td_srcs = [
         ":tensorflow_lite_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
     ],
 )
 
@@ -146,11 +148,11 @@ gentbl(
             "transforms/generated_post_quantize.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/post_quantize_patterns.td",
     td_srcs = [
         ":tensorflow_lite_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
     ],
 )
 
@@ -163,9 +165,9 @@ cc_library(
         "utils/validators.h",
     ],
     deps = [
-        "@local_config_mlir//:Dialect",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//mlir:Dialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
     ],
 )
 
@@ -183,21 +185,21 @@ cc_library(
         "transforms/passes.h",
         "utils/attribute_utils.h",
         "//tensorflow/compiler/mlir/lite/quantization:quantization_traits.h",
-        "@local_config_mlir//:include/mlir/Transforms/InliningUtils.h",
+        "@llvm-project//mlir:include/mlir/Transforms/InliningUtils.h",
     ],
     deps = [
         ":tensorflow_lite_ops_inc_gen",
         ":validators",
         "//tensorflow/compiler/mlir/tensorflow",
         "//tensorflow/lite/schema:schema_fbs",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:Dialect",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:Dialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -214,10 +216,10 @@ cc_library(
     deps = [
         ":tensorflow_lite",
         "//tensorflow/compiler/mlir/tensorflow",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -231,9 +233,9 @@ cc_library(
     ],
     deps = [
         ":tensorflow_lite",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
     ],
 )
 
@@ -246,10 +248,10 @@ tf_cc_test(
         "//tensorflow/compiler/mlir/tensorflow",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -290,14 +292,14 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/platform:logging",
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
@@ -315,12 +317,12 @@ cc_library(
         ":tensorflow_lite",
         ":validators",
         "//tensorflow/compiler/mlir/tensorflow",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -346,13 +348,13 @@ cc_library(
         "//tensorflow/compiler/mlir/lite/quantization:quantization_lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -374,7 +376,7 @@ genrule(
         "utils/generated_op_quant_spec_getters.inc",
     ],
     cmd = ("$(location //tensorflow/compiler/mlir/lite/quantization:op_quant_spec_getters_gen) " +
-           "-I external/local_config_mlir/include " +
+           "-I external/llvm-project/mlir/include " +
            "-I external/org_tensorflow " +
            "$(location //tensorflow/compiler/mlir/lite:ir/tfl_ops.td) " + " -o $@"),
     tools = ["//tensorflow/compiler/mlir/lite/quantization:op_quant_spec_getters_gen"],
@@ -388,7 +390,7 @@ cc_library(
     ],
     deps = [
         ":tensorflow_lite",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
     alwayslink = 1,
 )
@@ -399,9 +401,9 @@ tf_native_cc_binary(
         "operator_converter_gen.cc",
     ],
     deps = [
-        "@llvm//:support",
-        "@llvm//:tablegen",
-        "@local_config_mlir//:TableGen",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:tablegen",
+        "@llvm-project//mlir:TableGen",
     ],
 )
 
@@ -434,12 +436,17 @@ cc_library(
     deps = [
         ":tensorflow_lite",
         "//tensorflow/compiler/mlir/tensorflow",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/core/platform:errors",
+        "//tensorflow/core/platform:status",
+        "//tensorflow/lite/kernels/internal:kernel_utils",
         "//tensorflow/lite/schema:schema_fbs",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
         "@flatbuffers",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:TransformUtils",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:TransformUtils",
     ],
 )
 
@@ -462,7 +469,7 @@ cc_library(
     ],
     deps = [
         "//tensorflow/lite/core/api",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -507,14 +514,14 @@ cc_library(
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@flatbuffers",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:QuantOpsDialectRegistration",
-        "@local_config_mlir//:StandardDialectRegistration",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:Translation",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:QuantOpsDialectRegistration",
+        "@llvm-project//mlir:StandardDialectRegistration",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Translation",
     ],
     alwayslink = 1,
 )
@@ -523,7 +530,7 @@ tf_cc_binary(
     name = "flatbuffer_translate",
     deps = [
         ":flatbuffer_translate_lib",
-        "@local_config_mlir//:MlirTranslateMain",
+        "@llvm-project//mlir:MlirTranslateMain",
     ],
 )
 
@@ -536,7 +543,7 @@ cc_library(
         "tf_tfl_translate_cl.h",
     ],
     deps = [
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -548,7 +555,7 @@ cc_library(
     ],
     deps = [
         "//tensorflow/compiler/mlir/lite/quantization:quantization_config",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -576,9 +583,9 @@ tf_cc_binary(
         "//tensorflow/lite/schema:schema_fbs",
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -589,16 +596,15 @@ tf_cc_binary(
         ":flatbuffer_translate_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core/platform:logging",
-        "//tensorflow/core/platform/default/build_config:base",
         "//tensorflow/lite:framework",
         "//tensorflow/lite/delegates/flex:delegate",
         "//tensorflow/lite/kernels:builtin_ops",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Parser",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -621,12 +627,12 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
         "//tensorflow/compiler/mlir/tensorflow:tf_graph_optimization_pass",
         "//tensorflow/compiler/mlir/tensorflow:translate_lib",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:QuantOpsDialectRegistration",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:QuantOpsDialectRegistration",
+        "@llvm-project//mlir:Transforms",
     ],
 )
 
@@ -653,15 +659,15 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/lite/tools/optimize:quantize_weights",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Parser",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:QuantOpsDialectRegistration",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:QuantOpsDialectRegistration",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Transforms",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/lite/emit_error_reporter.h b/tensorflow/compiler/mlir/lite/emit_error_reporter.h
index 40e89c5dec8..76cc1f612bb 100644
--- a/tensorflow/compiler/mlir/lite/emit_error_reporter.h
+++ b/tensorflow/compiler/mlir/lite/emit_error_reporter.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <cstdarg>
 
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/lite/core/api/error_reporter.h"
 
 namespace tflite {
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc
index 11d97120d00..43974e02bba 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <algorithm>
 #include <cctype>
+#include <cstdint>
 #include <iostream>
 #include <sstream>
 #include <string>
@@ -43,24 +44,24 @@ limitations under the License.
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Translation.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Translation.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/flatbuffer_operator.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_translate.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_translate_flags.h"
@@ -103,12 +104,26 @@ using llvm::cl::opt;
 // Commandline flag to enable the control of flatbuffer import.
 bool use_external_constant;
 
+// Commandline flag to enable graph pruning.
+bool experimental_prune_unreachable_nodes_unconditionally;
+
 // NOLINTNEXTLINE
 static opt<bool, true> use_external_constant_flag(
     "use-external-constant",
     llvm::cl::desc("Use external constant during flatbuffer import"),
     llvm::cl::location(use_external_constant), llvm::cl::init(false));
 
+// TODO(b/147111261): After the importer supports generic custom ops, we should
+// change the flag to a more lightwise flag, e.g.
+// "import_custom_ops_as_side_effect_free_ops", and let the MLIR DCE to prune
+// the operations.
+// NOLINTNEXTLINE
+static opt<bool, true> experimental_prune_unreachable_nodes_unconditionally_flg(
+    "experimental-prune-unreachable-nodes-unconditionally",
+    llvm::cl::desc("Prune nodes that are not ancestors of the output nodes."),
+    llvm::cl::location(experimental_prune_unreachable_nodes_unconditionally),
+    llvm::cl::init(false));
+
 namespace {
 bool IsScalar(const TensorT& tensor) {
   // TODO(b/138222071) We can't distinguish scalars and unranked tensors
@@ -212,12 +227,12 @@ StatusOr<mlir::TensorType> GetTensorType(const TensorT& tensor, Builder builder,
 // type, thus none stats op is required and nullptr is retruned.
 // If the min max information is invalid, nullptr is returned.
 mlir::Operation* ConvertMinMaxToStatsOp(const TensorT& tensor, OpBuilder b,
-                                        Value* res) {
+                                        Value res) {
   // If the `tensor` has scale/zero_point, it must have been quantized, then the
   // min/max stats is just for comments, so ignore it.
   if (!tensor.quantization || IsQuantized(tensor)) return nullptr;
   // If the result isn't float and unquantizable, the min/max is ignored.
-  if (!res->getType()
+  if (!res.getType()
            .cast<mlir::ShapedType>()
            .getElementType()
            .isa<mlir::FloatType>()) {
@@ -255,10 +270,23 @@ mlir::Operation* ConvertMinMaxToStatsOp(const TensorT& tensor, OpBuilder b,
 }
 
 StatusOr<std::string> OpNameForOpCode(const tflite::OperatorCodeT opcode) {
-  // TODO(krzysd) Support custom ops
+  // TODO(b/143872630): Support custom ops
   if (opcode.builtin_code == tflite::BuiltinOperator_CUSTOM) {
-    return errors::Unimplemented("unsupported custom operation: ",
-                                 opcode.custom_code);
+    // Adding some custom op supported on GPU.
+    const absl::string_view custom_name = opcode.custom_code;
+    if (custom_name == "MaxPoolingWithArgmax2D") {
+      return std::string("tfl.max_pooling_with_argmax_2d");
+    }
+    if (custom_name == "Convolution2DTransposeBias") {
+      return std::string("tfl.convolution_2d_transpose_bias");
+    }
+    if (custom_name == "MaxUnpooling2D") {
+      return std::string("tfl.max_unpooling_2d");
+    }
+    // Use an unsupported op name instead of throwing an error here in case the
+    // op is pruned during the import.
+    return std::string(
+        llvm::Twine("tfl.UNSUPPORTED_custom_", opcode.custom_code).str());
   }
   if (opcode.builtin_code == tflite::BuiltinOperator_IF) {
     return std::string("tf.If");
@@ -495,14 +523,21 @@ bool IsBasicLSTMOp(tflite::BuiltinOptionsUnion op_union) {
   }
 }
 
+// Returns true if this is a custom op.
+bool IsCustomOp(const std::string& op_name) {
+  return op_name == "tfl.max_pooling_with_argmax_2d" ||
+         op_name == "tfl.max_unpooling_2d" ||
+         op_name == "tfl.convolution_2d_transpose_bias";
+}
+
 // TODO(krzysd) Handle function calls
 StatusOr<Operation*> ConvertOp(
-    const tflite::OperatorT& op, const std::vector<Value*>& vals_map,
-    Value* optional_arg_marker, const std::vector<std::string>& op_names,
+    const tflite::OperatorT& op, const std::vector<Value>& vals_map,
+    Value optional_arg_marker, const std::vector<std::string>& op_names,
     const std::vector<std::string>& func_names,
     const std::vector<std::unique_ptr<tflite::TensorT>>& tensors, Location loc,
     OpBuilder builder) {
-  llvm::SmallVector<Value*, 4> operands;
+  llvm::SmallVector<Value, 4> operands;
   llvm::SmallVector<mlir::Type, 2> outputTypes;
 
   if (op.outputs.empty()) {
@@ -557,7 +592,15 @@ StatusOr<Operation*> ConvertOp(
   }
 
   llvm::SmallVector<mlir::NamedAttribute, 2> attrs;
-  mlir::BuiltinOptionsToAttributes(op.builtin_options, builder, attrs);
+  if (IsCustomOp(op_name)) {
+    auto status = mlir::CustomOptionsToAttributes(op_name, op.custom_options,
+                                                  builder, loc, &attrs);
+    if (!status.ok()) {
+      return emitError(loc, status.ToString()), status;
+    }
+  } else {
+    mlir::BuiltinOptionsToAttributes(op.builtin_options, builder, attrs);
+  }
   op_state.addAttributes(attrs);
 
   // Handle the conversion from subgraph index to functions for If and While
@@ -619,6 +662,49 @@ mlir::NamedAttribute BuildTFEntryFunctionAttribute(
       name, builder->getStringAttr(llvm::join(tensor_names, ",")));
 }
 
+// Given a list of output indices, traverses the subgraph and returns the set of
+// ops that are ancestors of the output tensors.
+StatusOr<absl::flat_hash_set<const tflite::OperatorT*>> PruneSubgraph(
+    const tflite::SubGraphT& subgraph, ArrayRef<int32_t> output_indices) {
+  // Create a map from tensor index to defining op.
+  absl::flat_hash_map<int32_t, const tflite::OperatorT*> defining_op;
+  for (const auto& op : subgraph.operators) {
+    for (int32_t output : op->outputs) {
+      defining_op[output] = op.get();
+    }
+  }
+
+  std::vector<const tflite::OperatorT*> queue;
+  for (int32_t output : output_indices) {
+    if (auto& op = defining_op[output]) {
+      queue.push_back(op);
+    } else {
+      return errors::InvalidArgument("Output tensor doesn't have defining op");
+    }
+  }
+
+  // Traverse the graph towards inputs.
+  absl::flat_hash_set<const tflite::OperatorT*> visited;
+  while (!queue.empty()) {
+    const tflite::OperatorT* op = queue.back();
+    queue.pop_back();
+    if (!visited.insert(op).second) {
+      // The node has already been visited.
+      continue;
+    }
+
+    for (int32_t input : op->inputs) {
+      // Input tensor may not have a defining op in case it is a subgraph input
+      // or a constant tensor.
+      if (auto& op = defining_op[input]) {
+        queue.push_back(op);
+      }
+    }
+  }
+
+  return visited;
+}
+
 // Build a FuncOp from a tflite SubGraph
 // The op_names are a mapping from indexes into the TFLite operators array to
 // the operator name MLIR expects (tfl.foo_op). The buffers are directly taken
@@ -635,7 +721,8 @@ StatusOr<FuncOp> ConvertSubgraph(
     const std::vector<std::unique_ptr<tflite::BufferT>>& buffers,
     Location base_loc, Builder builder,
     const std::vector<std::string>& ordered_output_arrays, bool is_entry_point,
-    bool use_external_constant) {
+    bool use_external_constant,
+    bool experimental_prune_unreachable_nodes_unconditionally) {
   llvm::SmallVector<mlir::Type, 2> ret_types;
   llvm::SmallVector<mlir::Type, 4> input_types;
 
@@ -692,19 +779,19 @@ StatusOr<FuncOp> ConvertSubgraph(
   auto& body = func.getBody();
   OpBuilder op_builder{body};
 
-  std::vector<Value*> vals_map(subgraph.tensors.size(), nullptr);
-  Value* maybe_optional_arg_marker = nullptr;
+  std::vector<Value> vals_map(subgraph.tensors.size(), nullptr);
+  Value maybe_optional_arg_marker = nullptr;
 
   // Get or construct MLIR values for each input
   for (int i = 0, e = subgraph.inputs.size(); i < e; i++) {
     auto input_tensor = subgraph.inputs[i];
     const auto& tensor = *subgraph.tensors.at(input_tensor);
     auto loc = TensorLoc(tensor, builder, base_loc);
-    if (nullptr != vals_map[input_tensor]) {
+    if (vals_map[input_tensor]) {
       auto err = errors::FailedPrecondition("duplicate input arguments");
       return emitError(loc, err.ToString()), err;
     }
-    Value* input_value = func.getArgument(i);
+    Value input_value = func.getArgument(i);
 
     // If the `tensor` has min/max and doesn't have scale/zero_point
     // information, a stats op is created to use the input_value, then the
@@ -731,8 +818,19 @@ StatusOr<FuncOp> ConvertSubgraph(
     func.setAttr("tf.entry_function", builder.getDictionaryAttr(attributes));
   }
 
+  absl::flat_hash_set<const tflite::OperatorT*> pruned_subgraph_ops;
+  if (experimental_prune_unreachable_nodes_unconditionally) {
+    TF_ASSIGN_OR_RETURN(pruned_subgraph_ops,
+                        PruneSubgraph(subgraph, func_outputs));
+  }
+
   // Construct MLIR operators from TFLite operators
   for (auto& op : subgraph.operators) {
+    if (experimental_prune_unreachable_nodes_unconditionally &&
+        !pruned_subgraph_ops.contains(op)) {
+      continue;
+    }
+
     for (auto input_num : op->inputs) {
       // The operators in a graph are topologically sorted
       // and so if no previous operation has produced a tensor
@@ -745,7 +843,7 @@ StatusOr<FuncOp> ConvertSubgraph(
                                             builder.getUnitAttr())
                   .getResult();
         }
-      } else if (nullptr == vals_map.at(input_num)) {
+      } else if (!vals_map.at(input_num)) {
         auto& const_tensor = *subgraph.tensors[input_num];
         auto const_loc = TensorLoc(const_tensor, builder, base_loc);
         auto op_or_err =
@@ -768,7 +866,7 @@ StatusOr<FuncOp> ConvertSubgraph(
             ? base_loc
             : TensorLoc(*subgraph.tensors[op->outputs[0]], builder, base_loc);
     // If there's an optional argument, maybe_optional_arg_marker has been set
-    // to a valid Value*
+    // to a valid Value
     TF_ASSIGN_OR_RETURN(
         auto* mlir_op,
         ConvertOp(*op, vals_map, maybe_optional_arg_marker, op_names,
@@ -791,9 +889,9 @@ StatusOr<FuncOp> ConvertSubgraph(
   }
 
   // Construct return values
-  llvm::SmallVector<Value*, 4> return_operands;
+  llvm::SmallVector<Value, 4> return_operands;
   for (auto index : func_outputs) {
-    if (nullptr == vals_map.at(index)) {
+    if (!vals_map.at(index)) {
       auto& const_tensor = *subgraph.tensors[index];
       auto const_loc = TensorLoc(const_tensor, builder, base_loc);
       auto op_or_err =
@@ -837,7 +935,8 @@ std::string SubgraphName(unsigned index, const tflite::SubGraphT& subgraph) {
 OwningModuleRef tflite::FlatBufferToMlir(
     absl::string_view buffer, MLIRContext* context, Location base_loc,
     const std::vector<std::string>& ordered_output_arrays,
-    bool use_external_constant) {
+    bool use_external_constant,
+    bool experimental_prune_unreachable_nodes_unconditionally) {
   auto model_ptr =
       FlatBufferModel::VerifyAndBuildFromBuffer(buffer.data(), buffer.length());
   if (nullptr == model_ptr) {
@@ -892,7 +991,8 @@ OwningModuleRef tflite::FlatBufferToMlir(
         // TODO(b/131175224,b/132239787) Support multiple entry points
         builder, ordered_output_arrays,
         /*is_entry_point=*/e.index() == 0,
-        /*use_external_constant=*/use_external_constant);
+        /*use_external_constant=*/use_external_constant,
+        experimental_prune_unreachable_nodes_unconditionally);
     if (!func_or_error.ok()) {
       return emitError(base_loc, "could not translate function ")
                  << subgraph->name,
@@ -905,9 +1005,10 @@ OwningModuleRef tflite::FlatBufferToMlir(
   return OwningModuleRef(module);
 }
 
-static OwningModuleRef FlatBufferFileToMlirTrans(llvm::SourceMgr* source_mgr,
-                                                 MLIRContext* context,
-                                                 bool use_external_constant) {
+static OwningModuleRef FlatBufferFileToMlirTrans(
+    llvm::SourceMgr* source_mgr, MLIRContext* context,
+    bool use_external_constant,
+    bool experimental_prune_unreachable_nodes_unconditionally) {
   const llvm::MemoryBuffer* input =
       source_mgr->getMemoryBuffer(source_mgr->getMainFileID());
   std::string error;
@@ -924,12 +1025,14 @@ static OwningModuleRef FlatBufferFileToMlirTrans(llvm::SourceMgr* source_mgr,
 
   return tflite::FlatBufferToMlir(
       absl::string_view(input->getBufferStart(), input->getBufferSize()),
-      context, loc, outputs, use_external_constant);
+      context, loc, outputs, use_external_constant,
+      experimental_prune_unreachable_nodes_unconditionally);
 }
 
 static mlir::TranslateToMLIRRegistration FlatBufferFileToMlirTransReg(
     "tflite-flatbuffer-to-mlir",
     [](llvm::SourceMgr& source_mgr, MLIRContext* context) {
-      return FlatBufferFileToMlirTrans(&source_mgr, context,
-                                       use_external_constant);
+      return FlatBufferFileToMlirTrans(
+          &source_mgr, context, use_external_constant,
+          experimental_prune_unreachable_nodes_unconditionally);
     });
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.h b/tensorflow/compiler/mlir/lite/flatbuffer_import.h
index 66b31c54c80..e3210c6d03f 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_import.h
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.h
@@ -17,9 +17,9 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_LITE_FLATBUFFER_IMPORT_H_
 
 #include "absl/strings/string_view.h"
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 
 namespace tflite {
 // Converts a TFLite flatbuffer stored in `buffer` to a MLIR module
@@ -31,11 +31,14 @@ namespace tflite {
 // on failure, and more specific errors will be emitted via the context.
 // If `use_external_constant` is true, it will create `tfl.external_const`
 // instead of `tfl.const`.
+// If `experimental_prune_unreachable_nodes_unconditionally` is true, nodes that
+// are not ancestors of the output nodes will be pruned.
 mlir::OwningModuleRef FlatBufferToMlir(
     absl::string_view buffer, mlir::MLIRContext* context,
     mlir::Location base_loc,
     const std::vector<std::string>& ordered_output_arrays,
-    bool use_external_constant = false);
+    bool use_external_constant = false,
+    bool experimental_prune_unreachable_nodes_unconditionally = false);
 }  // namespace tflite
 
 #endif  // TENSORFLOW_COMPILER_MLIR_LITE_FLATBUFFER_IMPORT_H_
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
index 851292b10fa..d9680a51ae0 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
@@ -17,15 +17,45 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/str_cat.h"
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
+namespace {
+
+using ::tensorflow::Status;
+using ::tensorflow::errors::InvalidArgument;
+using ::xla::StatusOr;
+
+StatusOr<mlir::StringAttr> GetPaddingAttr(TfLitePadding pad_params,
+                                          mlir::Builder builder,
+                                          mlir::Location loc) {
+  auto padding = tflite::Padding::Padding_VALID;
+  if (pad_params == TfLitePadding::kTfLitePaddingSame) {
+    padding = tflite::Padding_SAME;
+  } else if (pad_params == TfLitePadding::kTfLitePaddingValid) {
+    padding = tflite::Padding_VALID;
+  } else {
+    return InvalidArgument(
+        absl::StrCat("Invalid padding type", std::to_string(pad_params)));
+  }
+
+  const char* option_name = tflite::EnumNamePadding(padding);
+  return builder.getStringAttr(option_name);
+}
+
+}  // namespace
+
 // TODO(jpienaar): This is a placeholder. This should be done in more efficient
 // way when part of the translation of module.
 static tflite::ActivationFunctionType ConvertTFL_AFAttrForOptionWriter(
@@ -212,5 +242,44 @@ static mlir::Attribute BuildTFL_PaddingAttr(tflite::Padding value,
   return builder.getStringAttr(option_name);
 }
 
+Status mlir::CustomOptionsToAttributes(
+    const std::string& op_name, const std::vector<uint8_t>& custom_options,
+    mlir::Builder builder, mlir::Location loc,
+    llvm::SmallVectorImpl<mlir::NamedAttribute>* attributes) {
+  if (op_name == "tfl.max_pooling_with_argmax_2d" ||
+      op_name == "tfl.max_unpooling_2d") {
+    auto* pool_params =
+        reinterpret_cast<const TfLitePoolParams*>(custom_options.data());
+    TF_ASSIGN_OR_RETURN(auto padding_attribute,
+                        GetPaddingAttr(pool_params->padding, builder, loc));
+    attributes->emplace_back(
+        builder.getNamedAttr("padding", padding_attribute));
+    attributes->emplace_back(builder.getNamedAttr(
+        "stride_h", builder.getI32IntegerAttr(pool_params->stride_height)));
+    attributes->emplace_back(builder.getNamedAttr(
+        "stride_w", builder.getI32IntegerAttr(pool_params->stride_width)));
+    attributes->emplace_back(builder.getNamedAttr(
+        "filter_w", builder.getI32IntegerAttr(pool_params->filter_height)));
+    attributes->emplace_back(builder.getNamedAttr(
+        "filter_h", builder.getI32IntegerAttr(pool_params->filter_width)));
+    return Status::OK();
+
+  } else if (op_name == "tfl.convolution_2d_transpose_bias") {
+    auto* conv_params = reinterpret_cast<const TfLiteTransposeConvParams*>(
+        custom_options.data());
+    TF_ASSIGN_OR_RETURN(auto padding_attribute,
+                        GetPaddingAttr(conv_params->padding, builder, loc));
+    attributes->emplace_back(
+        builder.getNamedAttr("padding", padding_attribute));
+    attributes->emplace_back(builder.getNamedAttr(
+        "stride_h", builder.getI32IntegerAttr(conv_params->stride_height)));
+    attributes->emplace_back(builder.getNamedAttr(
+        "stride_w", builder.getI32IntegerAttr(conv_params->stride_width)));
+    return Status::OK();
+  }
+
+  return InvalidArgument(absl::StrCat("invalid custom op type: ", op_name));
+}
+
 // Pull in FlatBuffer writers for TFLite generated using TableGen
 #include "tensorflow/compiler/mlir/lite/operator_converters.inc"
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_operator.h b/tensorflow/compiler/mlir/lite/flatbuffer_operator.h
index 35293c1b812..fdc0fd81f8f 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_operator.h
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_operator.h
@@ -26,9 +26,10 @@ limitations under the License.
 #include "flatbuffers/flatbuffers.h"  // TF:flatbuffers
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
 namespace mlir {
@@ -45,7 +46,7 @@ llvm::Optional<flatbuffers::Offset<tflite::Operator>> CreateFlatBufferOperator(
     const std::vector<int32_t> &operands, const std::vector<int32_t> &results,
     flatbuffers::FlatBufferBuilder *fbb);
 
-// Populate the array of mlir::NamedAttributes corresponding to the given
+// Populates the array of mlir::NamedAttributes corresponding to the given
 // tflite::FlatbufferOptionsUnion.
 // We use an out parameter per LLVM convention
 void BuiltinOptionsToAttributes(
@@ -53,6 +54,15 @@ void BuiltinOptionsToAttributes(
     // NOLINTNEXTLINE
     llvm::SmallVectorImpl<mlir::NamedAttribute> &attributes);
 
+// Populates the array of mlir::NamedAttributes corresponding to the given
+// custom_options.
+// We use an out parameter per LLVM convention
+tensorflow::Status CustomOptionsToAttributes(
+    const std::string &op_name, const std::vector<uint8_t> &custom_options,
+    mlir::Builder builder,
+    // NOLINTNEXTLINE
+    Location loc, llvm::SmallVectorImpl<mlir::NamedAttribute> *attributes);
+
 }  // namespace mlir
 
 #endif  // TENSORFLOW_COMPILER_MLIR_LITE_FLATBUFFER_OPERATOR_H_
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc b/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc
index 3ed8eb87eb9..5abd37b22fa 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc
@@ -41,21 +41,22 @@ limitations under the License.
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Translation.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Translation.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/flatbuffer_operator.h"
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
+#include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.h"
 #include "tensorflow/compiler/mlir/op_or_arg_name_mapper.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -230,19 +231,19 @@ static bool IsConst(Operation* op) {
 }
 
 template <typename T>
-static bool HasValidTFLiteType(Value* value, T& error_handler) {
+static bool HasValidTFLiteType(Value value, T& error_handler) {
   // None type is allowed to represent unspecified operands.
-  if (value->getType().isa<NoneType>()) return true;
+  if (value.getType().isa<NoneType>()) return true;
 
-  auto type = value->getType().dyn_cast<TensorType>();
+  auto type = value.getType().dyn_cast<TensorType>();
   if (!type) {
-    if (auto op = value->getDefiningOp()) {
+    if (auto op = value.getDefiningOp()) {
       error_handler.emitError()
           << '\'' << op << "' should produce value of tensor type instead of "
-          << value->getType();
+          << value.getType();
       return false;
     }
-    error_handler.emitError("expected tensor type, got ") << value->getType();
+    error_handler.emitError("expected tensor type, got ") << value.getType();
     return false;
   }
 
@@ -279,9 +280,9 @@ static bool IsValidTFLiteMlirModule(ModuleOp module) {
     }
     auto& bb = fn.getBlocks().front();
 
-    for (auto* arg : bb.getArguments()) {
+    for (auto arg : bb.getArguments()) {
       if (!HasValidTFLiteType(arg, fn))
-        return fn.emitError("invalid TFLite type: ") << arg->getType(), false;
+        return fn.emitError("invalid TFLite type: ") << arg.getType(), false;
     }
 
     // Verify that all operations except the terminator have exactly one
@@ -289,9 +290,9 @@ static bool IsValidTFLiteMlirModule(ModuleOp module) {
     for (auto& inst : bb) {
       if (inst.isKnownTerminator()) break;
 
-      for (auto* result : inst.getResults()) {
+      for (auto result : inst.getResults()) {
         if (!HasValidTFLiteType(result, inst))
-          return fn.emitError("invalid TFLite type: ") << result->getType(),
+          return fn.emitError("invalid TFLite type: ") << result.getType(),
                  false;
       }
     }
@@ -361,7 +362,7 @@ class Translator {
 
   // Builds TFLite tensor from the given value. `buffer_idx` is index of the
   // corresponding buffer. Emits error and returns llvm::None on failure.
-  Optional<BufferOffset<tflite::Tensor>> BuildTensor(Value* value,
+  Optional<BufferOffset<tflite::Tensor>> BuildTensor(Value value,
                                                      const std::string& name,
                                                      unsigned buffer_idx);
 
@@ -419,7 +420,7 @@ class Translator {
   bool IsStatefulOperand(mlir::Operation* op, int operand_index);
 
   // Returns a unique name for `val`.
-  std::string UniqueName(mlir::Value* val);
+  std::string UniqueName(mlir::Value val);
 
   ModuleOp module_;
 
@@ -449,7 +450,7 @@ class Translator {
   std::vector<std::string> failed_custom_ops_;
 };
 
-std::string Translator::UniqueName(mlir::Value* val) {
+std::string Translator::UniqueName(mlir::Value val) {
   return name_mapper_.GetUniqueName(val);
 }
 
@@ -502,8 +503,8 @@ Optional<BufferOffset<tflite::Buffer>> Translator::BuildBuffer(
 }
 
 Optional<BufferOffset<tflite::Tensor>> Translator::BuildTensor(
-    Value* value, const std::string& name, unsigned buffer_idx) {
-  auto type = value->getType().cast<TensorType>();
+    Value value, const std::string& name, unsigned buffer_idx) {
+  auto type = value.getType().cast<TensorType>();
 
   // TFLite requires tensor shape only for the inputs and constants.
   // However, we output all known shapes for better round-tripping
@@ -515,7 +516,7 @@ Optional<BufferOffset<tflite::Tensor>> Translator::BuildTensor(
 
     if (std::any_of(shape_ref.begin(), shape_ref.end(), is_out_of_range))
       return mlir::emitError(
-          value->getLoc(),
+          value.getLoc(),
           "result shape dimensions out of 32 bit int type range");
 
     return mlir::success();
@@ -527,7 +528,7 @@ Optional<BufferOffset<tflite::Tensor>> Translator::BuildTensor(
     if (mlir::failed(check_shape(shape_ref))) return llvm::None;
 
     shape = std::vector<int32_t>(shape_ref.begin(), shape_ref.end());
-  } else if (auto* inst = value->getDefiningOp()) {
+  } else if (auto* inst = value.getDefiningOp()) {
     if (IsConst(inst)) {
       // Const op can have a result of dynamic shaped type (e.g. due to constant
       // folding), but we can still derive the shape of a constant tensor for
@@ -570,7 +571,7 @@ Optional<BufferOffset<tflite::Tensor>> Translator::BuildTensor(
   // marked as a stateful. If so, set the tensor's is_variable as true
   // This is v1 ref variable semantics in the TFLite runtime.
   bool is_variable = false;
-  for (auto& use : value->getUses()) {
+  for (auto& use : value.getUses()) {
     is_variable = IsStatefulOperand(use.getOwner(), use.getOperandNumber());
     if (is_variable) {
       break;
@@ -669,6 +670,16 @@ Translator::CreateFlexBuilderWithNodeAttrs(
       case ::tensorflow::AttrValue::kS:
         flex_builder->String(key, attr.s());
         break;
+      case ::tensorflow::AttrValue::kType: {
+        auto status_or_tfl_type = tflite::TfTypeToTflType(attr.type());
+        if (status_or_tfl_type.ok()) {
+          flex_builder->Int(key, status_or_tfl_type.ValueOrDie());
+        } else {
+          emitWarning(loc, "ignoring unsupported tensorflow type: ")
+              << std::to_string(attr.type());
+        }
+        break;
+      }
       case ::tensorflow::AttrValue::kI:
         flex_builder->Int(key, attr.i());
         break;
@@ -906,13 +917,13 @@ Optional<BufferOffset<tflite::SubGraph>> Translator::BuildSubGraph(FuncOp fn) {
   bool has_input_attr = false;
   InitializeNamesFromAttribute(fn, &has_input_attr);
   std::vector<BufferOffset<tflite::Tensor>> tensors;
-  llvm::DenseMap<Value*, int> tensor_index_map;
+  llvm::DenseMap<Value, int> tensor_index_map;
 
   // Builds tensor and buffer for argument or operation result. Returns false
   // on failure.
-  auto build_tensor_and_buffer = [&](Value* value, const std::string& name) {
+  auto build_tensor_and_buffer = [&](Value value, const std::string& name) {
     // NoneType represents optional and may be skipped here.
-    if (value->getType().isa<NoneType>()) {
+    if (value.getType().isa<NoneType>()) {
       return true;
     }
 
@@ -925,7 +936,7 @@ Optional<BufferOffset<tflite::SubGraph>> Translator::BuildSubGraph(FuncOp fn) {
     // make the Buffer empty apart from setting the buffer_idx=0 in the Tensor.
     // This does not seem to affect runtime behavior for RNN/LSTM, but would be
     // good for reducing memory footprint.
-    if (auto* inst = value->getDefiningOp()) {
+    if (auto* inst = value.getDefiningOp()) {
       auto buffer_or = BuildBuffer(inst);
       if (!buffer_or) return false;
       buffers_.push_back(*buffer_or);
@@ -942,7 +953,7 @@ Optional<BufferOffset<tflite::SubGraph>> Translator::BuildSubGraph(FuncOp fn) {
   // have associated tensor and buffer. Build FlatBuffer tensor and buffer for
   // other functions.
   for (unsigned i = 0, e = bb.getNumArguments(); i < e; ++i) {
-    mlir::BlockArgument* arg = bb.getArgument(i);
+    mlir::BlockArgument arg = bb.getArgument(i);
     std::string name;
     if (has_input_attr) name = name_mapper_.GetUniqueName(arg);
     if (name.empty()) name = absl::StrCat("arg", i);
@@ -964,15 +975,15 @@ Optional<BufferOffset<tflite::SubGraph>> Translator::BuildSubGraph(FuncOp fn) {
     // Fetch operand and result tensor indices.
     std::vector<int32_t> operands;
     operands.reserve(inst.getNumOperands());
-    for (auto* operand : inst.getOperands()) {
-      if (operand->getType().isa<NoneType>())
+    for (auto operand : inst.getOperands()) {
+      if (operand.getType().isa<NoneType>())
         operands.push_back(kTfLiteOptionalTensor);
       else
         operands.push_back(tensor_index_map.lookup(operand));
     }
     std::vector<int32_t> results;
     results.reserve(inst.getNumOperands());
-    for (auto* result : inst.getResults()) {
+    for (auto result : inst.getResults()) {
       results.push_back(tensor_index_map.lookup(result));
     }
 
@@ -986,10 +997,10 @@ Optional<BufferOffset<tflite::SubGraph>> Translator::BuildSubGraph(FuncOp fn) {
 
   // Get input and output tensor indices for the subgraph.
   std::vector<int32_t> inputs, outputs;
-  for (auto* arg : bb.getArguments()) {
+  for (auto arg : bb.getArguments()) {
     inputs.push_back(tensor_index_map[arg]);
   }
-  for (auto* result : bb.getTerminator()->getOperands()) {
+  for (auto result : bb.getTerminator()->getOperands()) {
     outputs.push_back(tensor_index_map[result]);
   }
 
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_translate.h b/tensorflow/compiler/mlir/lite/flatbuffer_translate.h
index a69921c3b09..03f92ddbf03 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_translate.h
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_translate.h
@@ -18,14 +18,15 @@ limitations under the License.
 
 #include <string>
 
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/op_or_arg_name_mapper.h"
 
 namespace tflite {
 
 // Translates the given MLIR `module` into a FlatBuffer and stores the
-// serialized flatbuffer into the string. This uses OpLocNameMapper to convert
-// location of the op to name in flatbuffer.
+// serialized flatbuffer into the string. This uses OpOrArgLocNameMapper to
+// convert location of the op to name in flatbuffer. Returns true if translation
+// fails, otherwise returns false.
 bool MlirToFlatBufferTranslateFunction(mlir::ModuleOp module,
                                        std::string* serialized_flatbuffer,
                                        bool emit_builtin_tflite_ops,
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc
index 221c9aa2adc..b72b519a724 100644
--- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc
@@ -25,17 +25,17 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Transforms/InliningUtils.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Transforms/InliningUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
 namespace mlir {
@@ -301,14 +301,14 @@ Attribute ConstFoldUnaryOp(Type result_type, Attribute operand,
   return {};
 }
 
-void buildComparisonBinOp(Builder *builder, OperationState &result, Value *lhs,
-                          Value *rhs) {
+void buildComparisonBinOp(Builder *builder, OperationState &result, Value lhs,
+                          Value rhs) {
   auto result_type =
-      OpTrait::util::getBroadcastedType(lhs->getType(), rhs->getType());
+      OpTrait::util::getBroadcastedType(lhs.getType(), rhs.getType());
   if (!result_type)
     emitError(result.location)
-        << "non-broadcastable operands: " << lhs->getType() << " and "
-        << rhs->getType();
+        << "non-broadcastable operands: " << lhs.getType() << " and "
+        << rhs.getType();
   result.addOperands({lhs, rhs});
   // Comparison binary ops always return i1 tensor.
   if (auto shaped_type = result_type.dyn_cast<RankedTensorType>()) {
@@ -321,15 +321,15 @@ void buildComparisonBinOp(Builder *builder, OperationState &result, Value *lhs,
 }
 
 void buildFusedBroadcastableBinOp(Builder *builder, OperationState &result,
-                                  Value *lhs, Value *rhs,
+                                  Value lhs, Value rhs,
                                   StringAttr fused_activation_function) {
   auto result_type =
-      OpTrait::util::getBroadcastedType(lhs->getType(), rhs->getType());
+      OpTrait::util::getBroadcastedType(lhs.getType(), rhs.getType());
 
   if (!result_type)
     emitError(result.location)
-        << "non-broadcastable operands: " << lhs->getType() << " and "
-        << rhs->getType();
+        << "non-broadcastable operands: " << lhs.getType() << " and "
+        << rhs.getType();
 
   result.addOperands({lhs, rhs});
   result.addAttribute("fused_activation_function", fused_activation_function);
@@ -358,7 +358,7 @@ OpFoldResult AddOp::fold(ArrayRef<Attribute> operands) {
 namespace {
 
 int64_t GetConcatenationOpAxis(ConcatenationOp op) {
-  auto output_type = op.output()->getType().cast<RankedTensorType>();
+  auto output_type = op.output().getType().cast<RankedTensorType>();
   int64_t axis = op.axis().getSExtValue();
   if (axis < 0) axis += output_type.getRank();
   return axis;
@@ -452,7 +452,7 @@ LogicalResult VerifyConcatenationOpTypes(Operation *op,
 }
 
 LogicalResult Verify(ConcatenationOp op) {
-  auto output_type = op.output()->getType().dyn_cast<RankedTensorType>();
+  auto output_type = op.output().getType().dyn_cast<RankedTensorType>();
 
   // If the output type is unranked, there is nothing else to be verified.
   if (!output_type) return success();
@@ -462,8 +462,8 @@ LogicalResult Verify(ConcatenationOp op) {
     return op.emitOpError("concatenation dimension must be in [-rank, rank)");
 
   SmallVector<TensorType, 4> operand_types;
-  for (Value *operand : op.values())
-    operand_types.push_back(operand->getType().cast<TensorType>());
+  for (Value operand : op.values())
+    operand_types.push_back(operand.getType().cast<TensorType>());
 
   return VerifyConcatenationOpTypes(op.getOperation(), output_type,
                                     operand_types, axis);
@@ -520,7 +520,7 @@ DenseElementsAttr ConstFoldConcatenateOpDense(ArrayRef<Attribute> operands,
 
 OpFoldResult ConcatenationOp::fold(ArrayRef<Attribute> operands) {
   if (fused_activation_function() == "NONE") {
-    if (auto output_type = output()->getType().dyn_cast<RankedTensorType>()) {
+    if (auto output_type = output().getType().dyn_cast<RankedTensorType>()) {
       const int64_t axis = GetConcatenationOpAxis(*this);
       if (IsConcatenationOpConstFoldable(*this, operands, output_type, axis))
         return ConstFoldConcatenateOpDense(operands, output_type, axis);
@@ -528,9 +528,9 @@ OpFoldResult ConcatenationOp::fold(ArrayRef<Attribute> operands) {
   }
 
   // Remove all empty values.
-  SmallVector<Value *, 4> non_empty_values;
-  for (Value *value : this->values()) {
-    const auto shaped_type = value->getType().cast<ShapedType>();
+  SmallVector<Value, 4> non_empty_values;
+  for (Value value : this->values()) {
+    const auto shaped_type = value.getType().cast<ShapedType>();
     if (shaped_type.hasStaticShape() && shaped_type.getNumElements() == 0) {
       continue;
     }
@@ -559,8 +559,8 @@ OpFoldResult ConcatenationOp::fold(ArrayRef<Attribute> operands) {
 //===----------------------------------------------------------------------===//
 
 LogicalResult Verify(FullyConnectedOp op) {
-  ShapedType input_type = op.input()->getType().cast<ShapedType>();
-  ShapedType filter_type = op.filter()->getType().cast<ShapedType>();
+  ShapedType input_type = op.input().getType().cast<ShapedType>();
+  ShapedType filter_type = op.filter().getType().cast<ShapedType>();
   if (filter_type.hasRank() && filter_type.getRank() != 2) {
     return op.emitOpError("expect 2d filter, got ") << filter_type;
   }
@@ -582,7 +582,7 @@ LogicalResult Verify(FullyConnectedOp op) {
   // format.
   if (op.weights_format() == "DEFAULT") {
     ShapedType output_type =
-        (*op.output().begin())->getType().cast<ShapedType>();
+        (*op.output().begin()).getType().cast<ShapedType>();
     if (!output_type.hasStaticShape()) {
       return mlir::success();
     }
@@ -609,9 +609,9 @@ LogicalResult Verify(FullyConnectedOp op) {
 //===----------------------------------------------------------------------===//
 
 static void BuildGatherOp(Builder *builder, OperationState &result,
-                          Value *params, Value *indices, IntegerAttr axis) {
-  auto params_type = params->getType().cast<TensorType>();
-  auto indices_type = indices->getType().cast<TensorType>();
+                          Value params, Value indices, IntegerAttr axis) {
+  auto params_type = params.getType().cast<TensorType>();
+  auto indices_type = indices.getType().cast<TensorType>();
 
   // If params/indices is unranked, then output is unranked.
   if (!params_type.hasRank() || !indices_type.hasRank())
@@ -704,8 +704,8 @@ static LogicalResult Verify(PackOp op) {
   if (op.getOperation()->getNumOperands() != op.values_count())
     return op.emitOpError("input count should match 'values_count' attribute");
 
-  Value *operand0 = op.getOperand(0);
-  auto input_type = operand0->getType().cast<ShapedType>();
+  Value operand0 = op.getOperand(0);
+  auto input_type = operand0.getType().cast<ShapedType>();
 
   // Check axis bounds.
   if (input_type.hasRank()) {
@@ -717,8 +717,8 @@ static LogicalResult Verify(PackOp op) {
 
   // Make sure all inputs have the same shape and element type.
   // TODO(rahulsp): Simplify once b/135032064 is fixed.
-  for (Value *operand : op.getOperands()) {
-    auto other_type = operand->getType().cast<ShapedType>();
+  for (Value operand : op.getOperands()) {
+    auto other_type = operand.getType().cast<ShapedType>();
     if (input_type != other_type)
       return op.emitOpError("operands should be of the same type. got ")
              << input_type << ", " << other_type;
@@ -732,9 +732,9 @@ static LogicalResult Verify(PackOp op) {
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(PReluOp op) {
-  auto input_type = op.input()->getType().cast<ShapedType>();
-  auto alpha_type = op.alpha()->getType().cast<ShapedType>();
-  auto output_type = op.output()->getType().cast<ShapedType>();
+  auto input_type = op.input().getType().cast<ShapedType>();
+  auto alpha_type = op.alpha().getType().cast<ShapedType>();
+  auto output_type = op.output().getType().cast<ShapedType>();
 
   if (input_type.hasStaticShape() && alpha_type.hasStaticShape()) {
     if (input_type.getRank() != alpha_type.getRank() + 1) {
@@ -783,13 +783,13 @@ struct RemoveAdjacentReshape : public RewritePattern {
 
   PatternMatchResult match(Operation *op) const override {
     auto thisOp = cast<ReshapeOp>(op);
-    auto prevOp = thisOp.getOperand(0)->getDefiningOp();
+    auto prevOp = thisOp.getOperand(0).getDefiningOp();
     return isa_and_nonnull<ReshapeOp>(prevOp) ? matchSuccess() : matchFailure();
   }
 
   void rewrite(Operation *op, PatternRewriter &rewriter) const override {
     auto thisOp = cast<ReshapeOp>(op);
-    auto prevOp = cast<ReshapeOp>(thisOp.getOperand(0)->getDefiningOp());
+    auto prevOp = cast<ReshapeOp>(thisOp.getOperand(0).getDefiningOp());
 
     // Replace
     //   %1 = "tfl.reshape"(%0, %shape0)
@@ -807,7 +807,7 @@ struct RemoveAdjacentReshape : public RewritePattern {
 OpFoldResult ReshapeOp::fold(ArrayRef<Attribute> operands) {
   // Remove identity reshape with both static result and input shape.
   auto result_type = getType().cast<ShapedType>();
-  auto input_type = getOperand(0)->getType().cast<ShapedType>();
+  auto input_type = getOperand(0).getType().cast<ShapedType>();
   if (result_type.hasStaticShape() && result_type == input_type) {
     return getOperand(0);
   }
@@ -865,7 +865,7 @@ struct RemoveRedundantUnpackPack : public RewritePattern {
   PatternMatchResult matchAndRewrite(Operation *op,
                                      PatternRewriter &rewriter) const override {
     TFL::PackOp pack_op = cast<TFL::PackOp>(op);
-    Operation *first_input = pack_op.getOperand(0)->getDefiningOp();
+    Operation *first_input = pack_op.getOperand(0).getDefiningOp();
     if (!first_input) return matchFailure();
     auto input_unpack_op = dyn_cast_or_null<TFL::UnpackOp>(first_input);
     if (!input_unpack_op) return matchFailure();
@@ -880,8 +880,8 @@ struct RemoveRedundantUnpackPack : public RewritePattern {
       return matchFailure();
     for (auto input_output :
          llvm::zip(pack_op.getOperands(), input_unpack_op.getResults())) {
-      Value *pack_input = std::get<0>(input_output);
-      Value *unpack_output = std::get<1>(input_output);
+      Value pack_input = std::get<0>(input_output);
+      Value unpack_output = std::get<1>(input_output);
       // Make sure the ordering is the same for the pack op & unpack op.
       if (pack_input != unpack_output) return matchFailure();
     }
@@ -905,9 +905,9 @@ void PackOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(SliceOp op) {
-  auto input_type = op.input()->getType().cast<ShapedType>();
-  auto begin_type = op.begin()->getType().cast<ShapedType>();
-  auto size_type = op.size()->getType().cast<ShapedType>();
+  auto input_type = op.input().getType().cast<ShapedType>();
+  auto begin_type = op.begin().getType().cast<ShapedType>();
+  auto size_type = op.size().getType().cast<ShapedType>();
   if (input_type.hasStaticShape() && begin_type.hasStaticShape() &&
       size_type.hasStaticShape()) {
     if (input_type.getRank() != begin_type.getNumElements()) {
@@ -984,8 +984,8 @@ OpFoldResult SubOp::fold(ArrayRef<Attribute> operands) {
 // TopKOp
 //===----------------------------------------------------------------------===//
 
-static void BuildTopKOp(Builder *builder, OperationState &result, Value *input,
-                        Value *k) {
+static void BuildTopKOp(Builder *builder, OperationState &result, Value input,
+                        Value k) {
   // Output size is only known if k is constant value. A negative dimension is
   // considered dynamic so use -1 here if k is not a constant value.
   int const_k = -1;
@@ -995,7 +995,7 @@ static void BuildTopKOp(Builder *builder, OperationState &result, Value *input,
     // TODO(jpienaar): This should use a helper function.
     const_k = cst.getValue<IntegerAttr>({}).getValue().getSExtValue();
 
-  auto val_type = input->getType().cast<TensorType>();
+  auto val_type = input.getType().cast<TensorType>();
   // If value is unranked, then so is results.
   if (!val_type.hasRank())
     return TFL::TopKV2Op::build(
@@ -1035,7 +1035,7 @@ struct DropFakeQuant : public RewritePattern {
     // If all the users of this op have valid "minmax" attributes, it is matched
     // and can be removed.
     auto fakeQuantOp = cast<FakeQuantOp>(op);
-    for (auto *operand : fakeQuantOp.getResult()->getUsers())
+    for (auto *operand : fakeQuantOp.getResult().getUsers())
       if (!HasValidMinMaxAttribute(operand)) return matchFailure();
 
     return matchSuccess();
@@ -1075,7 +1075,7 @@ static LogicalResult Verify(UnpackOp op) {
 
 // Extracts and returns the signed integer constant in a 0-rank integer tensor
 // or 1-element 1-rank integer tensor if 'value' is a constant.
-static llvm::Optional<int64_t> ExtractConstantIntFromTensor(Value *value) {
+static llvm::Optional<int64_t> ExtractConstantIntFromTensor(Value value) {
   ElementsAttr attr;
   if (!matchPattern(value, m_Constant(&attr))) return {};
   if (attr.getNumElements() != 1) return {};
@@ -1101,8 +1101,8 @@ static LogicalResult VerifySplitOpOutputTypes(
     ExpectedOutputTypeGetter get_expected_output_type) {
   for (int64_t i = 0; i < num_splits; ++i) {
     auto expected_output_type = get_expected_output_type(i);
-    Value *output = op->getResult(i);
-    auto output_type = output->getType().dyn_cast<RankedTensorType>();
+    Value output = op->getResult(i);
+    auto output_type = output.getType().dyn_cast<RankedTensorType>();
     if (!output_type || output_type != expected_output_type)
       return op->emitOpError()
              << "output #" << i << " should be " << expected_output_type;
@@ -1121,7 +1121,7 @@ static LogicalResult Verify(SplitOp op) {
   if (!split_dim_opt) return success();
 
   // If 'input' is not a ranked tensor, there are no other checks.
-  auto input_type = op.value()->getType().dyn_cast<RankedTensorType>();
+  auto input_type = op.value().getType().dyn_cast<RankedTensorType>();
   if (!input_type) return success();
 
   int64_t split_dim = split_dim_opt.getValue();
@@ -1157,7 +1157,7 @@ static LogicalResult Verify(SplitVOp op) {
   if (!split_dim_opt) return success();
 
   // If 'input' is not a ranked tensor, there are no other checks.
-  auto input_type = op.value()->getType().dyn_cast<RankedTensorType>();
+  auto input_type = op.value().getType().dyn_cast<RankedTensorType>();
   if (!input_type) return success();
 
   int64_t split_dim = split_dim_opt.getValue();
@@ -1177,8 +1177,7 @@ static LogicalResult Verify(SplitVOp op) {
     return success();
 
   if (size_splits_attr.getNumElements() != num_splits) {
-    auto size_splits_type =
-        op.size_splits()->getType().cast<RankedTensorType>();
+    auto size_splits_type = op.size_splits().getType().cast<RankedTensorType>();
     RankedTensorType expected_size_splits_type =
         RankedTensorType::get({num_splits}, size_splits_type.getElementType());
     return op.emitOpError("'size_splits' should be ")
@@ -1414,7 +1413,7 @@ OpFoldResult RankOp::fold(ArrayRef<Attribute> operands) {
   }
 
   // Also fold if `input` has a known rank.
-  auto input_type = input()->getType().cast<ShapedType>();
+  auto input_type = input().getType().cast<ShapedType>();
   // Do not fold if rank is zero because the TFLite converter doesn't
   // distinguish between unranked input and scalar input due to b/138865275.
   // TODO(b/138865275): Remove `input_type.getRank() != 0` in the following
@@ -1438,6 +1437,56 @@ OpFoldResult ConstOp::fold(ArrayRef<Attribute> operands) {
   return value();
 }
 
+//===----------------------------------------------------------------------===//
+// SelectV2Op
+//===----------------------------------------------------------------------===//
+
+static void BuildSelectV2Op(Builder *builder, OperationState &result,
+                            Value cond, Value x, Value y) {
+  auto operand_type =
+      OpTrait::util::getBroadcastedType(x.getType(), y.getType());
+
+  if (!operand_type)
+    emitError(result.location) << "non-broadcastable operands: " << x.getType()
+                               << " and " << y.getType();
+
+  bool has_static_cond_shape = false;
+  bool has_static_operand_shape = false;
+  ArrayRef<int64_t> cond_shape;
+  ArrayRef<int64_t> operand_shape;
+
+  if (auto shaped_type = cond.getType().dyn_cast<ShapedType>()) {
+    if (shaped_type.hasStaticShape()) {
+      has_static_cond_shape = true;
+      cond_shape = shaped_type.getShape();
+    }
+  }
+  if (auto shaped_type = operand_type.dyn_cast<ShapedType>()) {
+    if (shaped_type.hasStaticShape()) {
+      has_static_operand_shape = true;
+      operand_shape = shaped_type.getShape();
+    }
+  }
+
+  SmallVector<int64_t, 4> broadcastedShape;
+  if (has_static_cond_shape && has_static_operand_shape &&
+      !OpTrait::util::getBroadcastedShape(cond_shape, operand_shape,
+                                          broadcastedShape)) {
+    emitError(result.location) << "non-broadcastable operands: " << operand_type
+                               << " and " << cond.getType();
+  }
+
+  result.addOperands({cond, x, y});
+
+  auto elementType = x.getType().dyn_cast<ShapedType>().getElementType();
+  if (has_static_cond_shape && has_static_operand_shape) {
+    result.types.push_back(
+        RankedTensorType::get(broadcastedShape, elementType));
+  } else {
+    result.types.push_back(UnrankedTensorType::get(elementType));
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // RangeOp
 //===----------------------------------------------------------------------===//
@@ -1521,9 +1570,8 @@ OpFoldResult RangeOp::fold(ArrayRef<Attribute> operands) {
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(TransposeConvOp op) {
-  ShapedType output_type = op.output()->getType().cast<ShapedType>();
-  ShapedType output_shape_type =
-      op.output_shape()->getType().cast<ShapedType>();
+  ShapedType output_type = op.output().getType().cast<ShapedType>();
+  ShapedType output_shape_type = op.output_shape().getType().cast<ShapedType>();
   if (output_type.hasRank() && output_shape_type.hasStaticShape()) {
     if (output_type.getRank() != output_shape_type.getDimSize(0)) {
       return op.emitOpError(llvm::formatv(
@@ -1629,9 +1677,9 @@ OpFoldResult TransposeOp::fold(ArrayRef<Attribute> operands) {
 }
 
 static LogicalResult Verify(TransposeOp op) {
-  auto input_type = op.x()->getType().cast<ShapedType>();
-  auto perm_type = op.perm()->getType().cast<ShapedType>();
-  auto output_type = op.y()->getType().cast<ShapedType>();
+  auto input_type = op.x().getType().cast<ShapedType>();
+  auto perm_type = op.perm().getType().cast<ShapedType>();
+  auto output_type = op.y().getType().cast<ShapedType>();
   if (input_type.hasStaticShape() && perm_type.hasStaticShape()) {
     if (perm_type.getNumElements() != input_type.getRank()) {
       return op.emitOpError(
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h
index 4fcfea7e9c7..c3c880d8cb6 100644
--- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h
@@ -18,15 +18,15 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_IR_TFL_OPS_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_IR_TFL_OPS_H_
 
-#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Traits.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:llvm-project
+#include "mlir/Dialect/Traits.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_traits.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_traits.h"
 #include "tensorflow/lite/schema/schema_generated.h"
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
index fdc256acf41..691264d32a4 100644
--- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
@@ -135,7 +135,7 @@ def TFL_FpOrI32OrI64Tensor : TensorOf<[AnyFloat, TFL_Int32Or64]>;
 //===----------------------------------------------------------------------===//
 
 class TFL_OperandIsUnrankedPred<int n> :
-  CPred<"$_op.getOperand(" # n # ")->getType().isa<UnrankedTensorType>()">;
+  CPred<"$_op.getOperand(" # n # ").getType().isa<UnrankedTensorType>()">;
 
 // TODO: Some of these could be generalized and/or moved to more general
 // location.
@@ -144,38 +144,38 @@ class TFL_OperandHasRank<int n, int m> :
   PredOpTrait<"operand " # n # " is " # m # "-D",
     Or<[TFL_OperandIsUnrankedPred<n>,
       CPred<"$_op.getOperand(" # n #
-      ")->getType().cast<ShapedType>().getRank() == " # m>]>>;
+      ").getType().cast<ShapedType>().getRank() == " # m>]>>;
 
 // Returns true if the n-th operand is ranked and has rank dim.
 class TFL_OperandHasKnownRank<int n, int dim> : And<[
-  CPred<"$_op.getOperand(" # n # ")->getType().isa<RankedTensorType>()">,
-  CPred<"$_op.getOperand(" # n # ")->getType().cast<ShapedType>().getRank() == "
+  CPred<"$_op.getOperand(" # n # ").getType().isa<RankedTensorType>()">,
+  CPred<"$_op.getOperand(" # n # ").getType().cast<ShapedType>().getRank() == "
     # dim>]>;
 
 // True if operand n is ranked and has a rank > dim.
 class TFL_OperandIsRankedAndHasDimPred<int n, int dim> : And<[
-  CPred<"$_op.getOperand(" # n # ")->getType().isa<RankedTensorType>()">,
-  CPred<"$_op.getOperand(" # n # ")->getType().cast<ShapedType>().getRank() > "
+  CPred<"$_op.getOperand(" # n # ").getType().isa<RankedTensorType>()">,
+  CPred<"$_op.getOperand(" # n # ").getType().cast<ShapedType>().getRank() > "
   # dim>]>;
 
 class TFL_OperandDimEquals<int n, int dim, int size> : And<[
   TFL_OperandIsRankedAndHasDimPred<n, dim>,
-  CPred<"$_op.getOperand(" # n # ")->getType().cast<ShapedType>()"
+  CPred<"$_op.getOperand(" # n # ").getType().cast<ShapedType>()"
       ".getShape()[" # dim # " ] == " # size>]>;
 
 // Returns true if the n-th operand has unknown rank or at least rank m.
 class TFL_OperandHasAtleastRank<int n, int m> :
   PredOpTrait<"operand " # n # " is " # m # "-D",
-    Or<[CPred<"$_op.getOperand(" # n # ")->getType().isa<UnrankedTensorType>()">,
+    Or<[CPred<"$_op.getOperand(" # n # ").getType().isa<UnrankedTensorType>()">,
       CPred<"$_op.getOperand(" # n #
-        ")->getType().cast<ShapedType>().getRank() >= " # m>]>>;
+        ").getType().cast<ShapedType>().getRank() >= " # m>]>>;
 
 class TFL_OperandRankEquals1DimOfOperand<int x, int y> :
   PredOpTrait<"operand " # x # "'s rank equals operand " # y # "'s size",
     CPred<"$_op.getOperand(" # x #
-      ")->getType().cast<ShapedType>().getRank() == "
+      ").getType().cast<ShapedType>().getRank() == "
       "$_op.getOperand(" # y #
-      ")->getType().cast<ShapedType>().getShape()[0]">>;
+      ").getType().cast<ShapedType>().getShape()[0]">>;
 
 class TFL_Operand0DOr1ElementTensor<int x> :
   PredOpTrait<"operand #" # x # " is an 0-d tensor or 1-d tensor w/ 1 element",
@@ -195,7 +195,7 @@ class TFL_OperandHasRankLessThan<int n, int m> :
   PredOpTrait<"operand " # n # " is maximum " # m # "-D",
     Or<[TFL_OperandIsUnrankedPred<n>,
       CPred<"$_op.getOperand(" # n #
-      ")->getType().cast<ShapedType>().getRank() <= " # m>]>>;
+      ").getType().cast<ShapedType>().getRank() <= " # m>]>>;
 
 // This is a quantization-aware version of TCresVTEtIsSameAsOp
 class TFL_TCresVTEtIsSameAsOp<int i, int j> : And<[
@@ -224,10 +224,10 @@ def BinaryOpSameElementTypeConstraint :
 //===----------------------------------------------------------------------===//
 
 def TFL_BroadcastableBinaryBuilder : OpBuilder<
-  "Builder *builder, OperationState &result, Value *lhs, Value *rhs",
+  "Builder *builder, OperationState &result, Value lhs, Value rhs",
   [{
     auto resultType =
-      OpTrait::util::getBroadcastedType(lhs->getType(), rhs->getType());
+      OpTrait::util::getBroadcastedType(lhs.getType(), rhs.getType());
     if (!resultType)
       mlir::emitError(result.location, "non-broadcastable operands");
     result.addOperands({lhs, rhs});
@@ -235,7 +235,7 @@ def TFL_BroadcastableBinaryBuilder : OpBuilder<
   }]>;
 
 def TFL_FusedBroadcastableBinaryBuilder : OpBuilder<
-  "Builder *builder, OperationState &result, Value *lhs, Value *rhs, "
+  "Builder *builder, OperationState &result, Value lhs, Value rhs, "
   "StringAttr fusedActivationFunction",
   [{
     buildFusedBroadcastableBinOp(
@@ -243,7 +243,7 @@ def TFL_FusedBroadcastableBinaryBuilder : OpBuilder<
   }]>;
 
 def TFL_ComparisonBinaryBuilder : OpBuilder<
-  "Builder *builder, OperationState &result, Value *lhs, Value *rhs",
+  "Builder *builder, OperationState &result, Value lhs, Value rhs",
   [{
     buildComparisonBinOp(builder, result, lhs, rhs);
   }]>;
@@ -427,6 +427,33 @@ def TFL_TransposeConvOp:
   let verifier = [{ return Verify(*this); }];
 }
 
+def TFL_Convolution2DTransposeBiasOp :
+  Op<TFL_Dialect, "convolution_2d_transpose_bias", [NoSideEffect]> {
+  let summary = " Transpose convolution with bias operator";
+
+  let description = [{
+Performs transpose convolution operation on inputs,
+with the option of adding a bias.
+Note this is a custom op that is not supported in the standard runtime.
+
+    Inputs:
+      `inputs[0]`: required: the input activation tensor
+      `inputs[1]`: required: the filter weight tensor
+      `inputs[2]`: optional: the bias tensor
+  }];
+
+  let arguments = (
+    ins AnyTensor:$input,
+    AnyTensor:$filter,
+    TFL_TensorOfOrNone<[AnyType]>:$bias,
+    TFL_PaddingAttr:$padding,
+    I32Attr:$stride_h,
+    I32Attr:$stride_w
+  );
+
+  let results = (outs AnyTensor:$output);
+}
+
 def TFL_AveragePool2DOp:
     TFL_Op<"average_pool_2d", [NoSideEffect, SameOperandsAndResultsScale]> {
   let summary = "Average_pool_2d operator";
@@ -471,7 +498,7 @@ def TFL_ArgMaxOp : TFL_Op<"arg_max", [NoSideEffect]> {
   let hasOptions = 1;
 
   DerivedTFLiteTypeAttr output_type = DerivedTFLiteTypeAttr<[{
-    return getResult()->getType().cast<TensorType>().getElementType().
+    return getResult().getType().cast<TensorType>().getElementType().
         cast<IntegerType>().getWidth() > 32 ? tflite::TensorType_INT64 :
             tflite::TensorType_INT32;
     }]>;
@@ -500,7 +527,7 @@ def TFL_ArgMinOp : TFL_Op<"arg_min", [NoSideEffect]> {
   let hasOptions = 1;
 
   DerivedTFLiteTypeAttr output_type = DerivedTFLiteTypeAttr<[{
-    return getResult()->getType().cast<TensorType>().getElementType().
+    return getResult().getType().cast<TensorType>().getElementType().
         cast<IntegerType>().getWidth() > 32 ? tflite::TensorType_INT64 :
             tflite::TensorType_INT32;
     }]>;
@@ -669,7 +696,7 @@ def TFL_GatherOp : TFL_Op<"gather", [
   let builders =
   [
     OpBuilder<"Builder *builder, OperationState &result, "
-      "Value *params, Value *indices, IntegerAttr axis",
+      "Value params, Value indices, IntegerAttr axis",
         [{ BuildGatherOp(builder, result, params, indices, axis); }]>
   ];
 
@@ -932,7 +959,7 @@ def TFL_NotEqualOp : TFL_Op<"not_equal", [
   let builders =
   [
     OpBuilder<
-      "Builder *builder, OperationState &result, Value *lhs, Value *rhs",
+      "Builder *builder, OperationState &result, Value lhs, Value rhs",
       [{
         buildComparisonBinOp(builder, result, lhs, rhs);
       }]>
@@ -1427,6 +1454,63 @@ def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [
   let customOption = "Pool2DOptions";
 }
 
+def TFL_MaxPoolingWithArgMax2DOp :
+    Op<TFL_Dialect, "max_pooling_with_argmax_2d", [NoSideEffect]> {
+  let summary = "Max Pool 2D with argmax op";
+
+  let description = [{
+    Performs max pooling on the input and outputs both max values and indices.
+    Each index is a flatten index in a sub-array of "filter_w" x "filter_h" size
+    Note this is a custom op that is not supported in the standard runtime.
+
+    Inputs:
+      `inputs[0]`: required: the input activation tensor
+  }];
+
+  let arguments = (
+    ins AnyTensor:$input,
+    TFL_PaddingAttr:$padding,
+    I32Attr:$stride_w,
+    I32Attr:$stride_h,
+    I32Attr:$filter_w,
+    I32Attr:$filter_h
+  );
+
+  let results = (outs
+    AnyTensor:$value,
+    AnyTensor:$indices
+  );
+}
+
+def TFL_MaxUnpooling2DOp :
+    Op<TFL_Dialect, "max_unpooling_2d", [NoSideEffect]> {
+  let summary = "Max Unpool 2D";
+
+  let description = [{
+    Performs max unpool operation.
+    To some extent this is the reverse operation of max pooling:
+    the elements in the input activation tensor is stored into the position
+    specified by the input indices.
+    Note this is a custom op that is not supported in the standard runtime.
+
+    Inputs:
+      `inputs[0]`: required: the input activation tensor
+      `inputs[1]`: required: the input indices
+  }];
+
+  let arguments = (
+    ins AnyTensor:$input,
+    AnyTensor:$indices,
+    TFL_PaddingAttr:$padding,
+    I32Attr:$stride_w,
+    I32Attr:$stride_h,
+    I32Attr:$filter_w,
+    I32Attr:$filter_h
+  );
+
+  let results = (outs AnyTensor:$outputs);
+}
+
 def TFL_MaximumOp : TFL_Op<"maximum", [
     Broadcastable, NoSideEffect, Commutative, SameOperandsAndResultsScale,
     TFL_OperandHasRankLessThan<0, 4>, TFL_OperandHasRankLessThan<1, 4>]> {
@@ -1996,7 +2080,7 @@ def TFL_ShapeOp: TFL_Op<"shape", [NoSideEffect]> {
   let results = (outs AnyTensor:$output);
 
   DerivedTypeAttr out_type = DerivedTypeAttr<[{
-    return getResult()->getType().cast<TensorType>().getElementType();
+    return getResult().getType().cast<TensorType>().getElementType();
   }]>;
 
   let hasOptions = 1;
@@ -2081,9 +2165,9 @@ def TFL_SelectOp : TFL_Op<"select", [NoSideEffect,
 
   // TODO(jpienaar): autogenerate this.
   let builders = [OpBuilder<"Builder *builder, OperationState &result, "
-                            "Value *condition, Value *x, Value *y",
+                            "Value condition, Value x, Value y",
   [{
-    auto resultType = x->getType();
+    auto resultType = x.getType();
     result.addOperands({condition, x, y});
     result.types.push_back(resultType);
   }]>];
@@ -2091,6 +2175,32 @@ def TFL_SelectOp : TFL_Op<"select", [NoSideEffect,
   let hasOptions = 1;
 }
 
+def TFL_SelectV2Op : TFL_Op<"select_v2", [NoSideEffect]> {
+  let summary = "SelectV2 operator";
+
+  let description = [{
+    Select values of 'x' if the corresponding value of 'condition' is true or
+    the value of 'y' if false. There are valid condition input sizes:
+
+    1. Either the same shape (in which case the select is elementwise), or
+    2. Broadcastable shapes between 'condition', 'x' and 'y'.
+  }];
+
+  let arguments = (ins
+    TFL_BoolTensor:$condition,
+    TensorOf<[F32, I1, I8, I16, I32, I64, TFL_Uint8]>:$x,
+    TensorOf<[F32, I1, I8, I16, I32, I64, TFL_Uint8]>:$y);
+  let results = (outs AnyTensor:$output);
+
+  let builders = [OpBuilder<"Builder *builder, OperationState &result, "
+                            "Value cond, Value x, Value y",
+  [{
+    BuildSelectV2Op(builder, result, cond, x, y);
+  }]>];
+
+  let hasOptions = 1;
+}
+
 def TFL_SinOp: TFL_Op<"sin", [
     NoSideEffect, SameOperandsAndResultType, NoQuantizableResult]> {
   let summary = "Sine operator";
@@ -2277,7 +2387,7 @@ def TFL_TopKV2Op: TFL_Op<"topk_v2", [NoSideEffect, TFL_OperandHasRank<1,0>,
     I32Tensor:$indices);
 
   let builders = [OpBuilder<"Builder *builder, OperationState &result, "
-                            "Value *input, Value *k",
+                            "Value input, Value k",
                   [{ BuildTopKOp(builder, result, input, k); }]>];
 
   let hasOptions = 1;
@@ -2333,14 +2443,14 @@ def TFL_UnpackOp : TFL_Op<"unpack", [NoSideEffect]> {
   }];
 
   let arguments = (ins
-    TensorOf<[F32, I8, I32, QI8, QUI8]>:$input,
+    TensorOf<[F32, I1, I8, I32, QI8, QUI8]>:$input,
 
     I32Attr:$num,
     I32Attr:$axis
   );
 
   let results = (outs
-    Variadic<TensorOf<[F32, I8, I32, QI8, QUI8]>>:$outputs
+    Variadic<TensorOf<[F32, I1, I8, I32, QI8, QUI8]>>:$outputs
   );
 
   let verifier = [{ return Verify(*this); }];
@@ -2707,7 +2817,7 @@ in the unique output `y`. In other words:
   );
 
   DerivedTFLiteTypeAttr idx_out_type = DerivedTFLiteTypeAttr<[{
-    return getResult(1)->getType().cast<TensorType>().getElementType().
+    return getResult(1).getType().cast<TensorType>().getElementType().
         cast<IntegerType>().getWidth() > 32 ? tflite::TensorType_INT64 :
             tflite::TensorType_INT32;
     }]>;
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_traits.h b/tensorflow/compiler/mlir/lite/ir/tfl_traits.h
index 0ec63531658..c489dc825d0 100644
--- a/tensorflow/compiler/mlir/lite/ir/tfl_traits.h
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_traits.h
@@ -19,7 +19,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_LITE_IR_TFL_TRAITS_H_
 
 #include "mlir/IR/OpDefinition.h"
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 
 namespace mlir {
 namespace OpTrait {
diff --git a/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc b/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc
index eb840eeeeb4..3099cbeb1fa 100644
--- a/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc
+++ b/tensorflow/compiler/mlir/lite/mlir_tflite_runner.cc
@@ -30,10 +30,10 @@ limitations under the License.
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Parser.h"  // TF:local_config_mlir
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Parser.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/flatbuffer_translate.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_translate_flags.h"
 #include "tensorflow/core/platform/init_main.h"
diff --git a/tensorflow/compiler/mlir/lite/operator_converter_gen.cc b/tensorflow/compiler/mlir/lite/operator_converter_gen.cc
index b2c125d7001..0f23cbefebd 100644
--- a/tensorflow/compiler/mlir/lite/operator_converter_gen.cc
+++ b/tensorflow/compiler/mlir/lite/operator_converter_gen.cc
@@ -27,7 +27,7 @@ limitations under the License.
 #include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include "mlir/TableGen/Attribute.h"  // TF:local_config_mlir
+#include "mlir/TableGen/Attribute.h"  // TF:llvm-project
 
 using llvm::DefInit;
 using llvm::dyn_cast;
diff --git a/tensorflow/compiler/mlir/lite/python/BUILD b/tensorflow/compiler/mlir/lite/python/BUILD
index 8e2198c2a6a..98f840d3fe7 100644
--- a/tensorflow/compiler/mlir/lite/python/BUILD
+++ b/tensorflow/compiler/mlir/lite/python/BUILD
@@ -28,10 +28,10 @@ cc_library(
         "//tensorflow/lite/toco:toco_flags_proto_cc",
         "//tensorflow/lite/toco:types_proto_cc",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:ViewOpGraph",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:ViewOpGraph",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc
index e2ba0cb822b..4ea26ee2f06 100644
--- a/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc
+++ b/tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.cc
@@ -19,11 +19,11 @@ limitations under the License.
 #include <utility>
 
 #include "llvm/Support/ToolOutputFile.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/FileUtilities.h"  // TF:local_config_mlir
-#include "mlir/Transforms/ViewOpGraph.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/FileUtilities.h"  // TF:llvm-project
+#include "mlir/Transforms/ViewOpGraph.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"
 #include "tensorflow/compiler/mlir/lite/tf_tfl_passes.h"
 #include "tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h"
@@ -151,10 +151,9 @@ Status RegisterCustomBuiltinOps(const std::vector<string> extra_tf_opdefs) {
       return errors::InvalidArgument("fail to parse extra OpDef");
     }
     // Make sure the op is not already registered. If registered continue.
-    const OpRegistrationData* op_reg = nullptr;
-    auto status =
-        tensorflow::OpRegistry::Global()->LookUp(opdef.name(), &op_reg);
-    if (status.ok()) continue;
+    const OpRegistrationData* op_reg =
+        tensorflow::OpRegistry::Global()->LookUp(opdef.name());
+    if (op_reg) continue;
 
     tensorflow::OpRegistry::Global()->Register(
         [opdef](tensorflow::OpRegistrationData* op_reg_data) -> Status {
@@ -278,7 +277,6 @@ Status ConvertGraphDefToTFLiteFlatBuffer(const toco::ModelFlags& model_flags,
   auto status = ConvertTFExecutorToTFLOrFlatbuffer(
       module.get(), /*export_to_mlir=*/false, emit_builtin_tflite_ops,
       emit_select_tf_ops, emit_custom_ops, quant_specs, result, &pm);
-
   if (toco_flags.has_dump_graphviz_dir()) {
     TF_RETURN_IF_ERROR(DumpOpGraphToFile(
         // rename once we enable the new converter feature flag.
diff --git a/tensorflow/compiler/mlir/lite/quantization/BUILD b/tensorflow/compiler/mlir/lite/quantization/BUILD
index 4ef6ac0b0cb..7cc03adf543 100644
--- a/tensorflow/compiler/mlir/lite/quantization/BUILD
+++ b/tensorflow/compiler/mlir/lite/quantization/BUILD
@@ -13,7 +13,7 @@ package(
 
 package_group(
     name = "friends",
-    includes = ["@local_config_mlir//:subpackages"],
+    includes = ["//third_party/mlir:subpackages"],
     packages = ["//tensorflow/compiler/mlir/..."],
 )
 
@@ -26,8 +26,8 @@ filegroup(
     name = "quantization_td_files",
     srcs = [
         "quantization.td",
-        "@local_config_mlir//:OpBaseTdFiles",
-        "@local_config_mlir//:QuantizationOpsTdFiles",
+        "@llvm-project//mlir:OpBaseTdFiles",
+        "@llvm-project//mlir:QuantizationOpsTdFiles",
     ],
 )
 
@@ -53,13 +53,13 @@ cc_library(
         "//tensorflow/core:lib_proto_parsing",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -75,11 +75,11 @@ cc_library(
     ],
     deps = [
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:QuantOps",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
         # TODO(fengliuai): remove this dependence.
         "//tensorflow/compiler/mlir/lite:tensorflow_lite",
         "//tensorflow/core:lib_proto_parsing",
@@ -97,7 +97,7 @@ cc_library(
     deps = [
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -107,8 +107,8 @@ tf_native_cc_binary(
         "tools/op_quant_spec_getters_gen.cc",
     ],
     deps = [
-        "@llvm//:support",
-        "@llvm//:tablegen",
-        "@local_config_mlir//:TableGen",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:tablegen",
+        "@llvm-project//mlir:TableGen",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/quantization/import_quant_stats_pass.cc b/tensorflow/compiler/mlir/lite/quantization/import_quant_stats_pass.cc
index 0326d122c07..4c4d8f1d9a2 100644
--- a/tensorflow/compiler/mlir/lite/quantization/import_quant_stats_pass.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/import_quant_stats_pass.cc
@@ -23,18 +23,18 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/AffineExpr.h"  // TF:local_config_mlir
-#include "mlir/IR/AffineMap.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/AffineExpr.h"  // TF:llvm-project
+#include "mlir/IR/AffineMap.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_info.pb.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_passes.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/import_utils.h"
@@ -70,16 +70,16 @@ class ImportQuantStatsPass : public FunctionPass<ImportQuantStatsPass> {
   void ImportAsStatsOps(OpBuilder b, Operation *op, int index,
                         const QuantParamsEntry &info);
 
-  void InsertStatsOpAtResult(OpBuilder b, Value *res, ElementsAttr layer_stats,
+  void InsertStatsOpAtResult(OpBuilder b, Value res, ElementsAttr layer_stats,
                              ElementsAttr axis_stats, IntegerAttr axis);
 
   // If the index is out of range, this method returns false. Otherwise it
   // returns true if the value is a float tensor.
   bool IsQuantizableResult(Operation *op, int index) {
     if (index < 0 || index >= op->getNumResults()) return false;
-    Value *res = op->getResult(index);
-    return res->getType().isa<ShapedType>() &&
-           res->getType().cast<ShapedType>().getElementType().isa<FloatType>();
+    Value res = op->getResult(index);
+    return res.getType().isa<ShapedType>() &&
+           res.getType().cast<ShapedType>().getElementType().isa<FloatType>();
   }
 
   // A method to retrieve the name for the given op.
@@ -117,13 +117,13 @@ bool ImportQuantStatsPass::ParseQuantStats(const std::string &stats_str) {
   return false;
 }
 
-void ImportQuantStatsPass::InsertStatsOpAtResult(OpBuilder b, Value *res,
+void ImportQuantStatsPass::InsertStatsOpAtResult(OpBuilder b, Value res,
                                                  ElementsAttr layer_stats,
                                                  ElementsAttr axis_stats,
                                                  IntegerAttr axis) {
   auto stats_op = b.create<quant::StatisticsOp>(b.getUnknownLoc(), res,
                                                 layer_stats, axis_stats, axis);
-  res->replaceAllUsesWith(stats_op);
+  res.replaceAllUsesWith(stats_op);
   stats_op.getOperation()->replaceUsesOfWith(stats_op, res);
 }
 
diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/BUILD b/tensorflow/compiler/mlir/lite/quantization/lite/BUILD
index 880f5ae5210..d076911761f 100644
--- a/tensorflow/compiler/mlir/lite/quantization/lite/BUILD
+++ b/tensorflow/compiler/mlir/lite/quantization/lite/BUILD
@@ -9,7 +9,7 @@ package(
 
 package_group(
     name = "friends",
-    includes = ["@local_config_mlir//:subpackages"],
+    includes = ["//third_party/mlir:subpackages"],
     packages = [
         "//learning/brain/experimental/mlir/...",
         "//tensorflow/lite/...",
@@ -36,9 +36,9 @@ cc_library(
         "//tensorflow/lite/core/api",
         "//tensorflow/lite/schema:schema_fbs",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
     ],
 )
 
@@ -53,6 +53,6 @@ tf_cc_binary(
         "//tensorflow/lite:framework",
         "//tensorflow/lite/schema:schema_fbs",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
index 97aa128653f..d00357be155 100644
--- a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
@@ -17,11 +17,11 @@ limitations under the License.
 
 #include "absl/strings/string_view.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_import.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_translate.h"
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc
index a145e75465e..3fd1ff2ac94 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc
@@ -23,17 +23,17 @@ limitations under the License.
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/ir/tfl_traits.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_traits.h"
@@ -146,14 +146,14 @@ class QuantizationDriver {
 
   // Adds all the users of index-th result of op to the work list.
   void AddUserToList(Operation *op, int index) {
-    for (auto *user : op->getResult(index)->getUsers()) {
+    for (auto *user : op->getResult(index).getUsers()) {
       work_list_.push_back(user);
     }
   }
 
   // Adds the defining op of index-th operand of op to the work list.
   void AddOperandToList(Operation *op, int index) {
-    if (auto *inst = op->getOperand(index)->getDefiningOp()) {
+    if (auto *inst = op->getOperand(index).getDefiningOp()) {
       work_list_.push_back(inst);
     }
   }
@@ -183,20 +183,20 @@ class QuantizationDriver {
   // of the op.
   void QuantizeOpResult(Operation *op, int index, QuantParams params);
 
-  void QuantizeArg(BlockArgument *arg, QuantParams params);
+  void QuantizeArg(BlockArgument arg, QuantParams params);
 
   // Inserts the Quantize and Dequantize ops to quantize the value and returns
   // the Quantize op.
-  void QuantizeValue(Value *value, QuantParams params, Location loc);
+  void QuantizeValue(Value value, QuantParams params, Location loc);
 
   // Inserts the Quantize ops for requantizing the index-th result of the op.
   void RequantizeOpResult(Operation *op, int index, RequantizeState *state);
 
-  void RequantizeArg(BlockArgument *arg, RequantizeState *state);
+  void RequantizeArg(BlockArgument arg, RequantizeState *state);
 
   // Inserts the Quantize and Dequantize ops to quantize the value and returns
   // the Quantize op.
-  void RequantizeValue(Value *value, RequantizeState *state, Location loc);
+  void RequantizeValue(Value value, RequantizeState *state, Location loc);
 
   // A heuristic to get the quantization parameter satisfies the same scale
   // constraints for the op. Returns an empty option if this quantization
@@ -213,7 +213,7 @@ class QuantizationDriver {
     return states_[result_states_[{op, index}]];
   }
 
-  QuantState &GetArgQuantState(BlockArgument *arg) {
+  QuantState &GetArgQuantState(BlockArgument arg) {
     return states_[arg_states_[arg]];
   }
 
@@ -227,7 +227,7 @@ class QuantizationDriver {
     return rescale_states_[result_states_[{op, index}]];
   }
 
-  RequantizeState &GetArgRequantizeState(BlockArgument *arg) {
+  RequantizeState &GetArgRequantizeState(BlockArgument arg) {
     return rescale_states_[arg_states_[arg]];
   }
 
@@ -235,32 +235,45 @@ class QuantizationDriver {
   // `as_result` is true or index-th operand if `as_result` is false. The state
   // is immutable if the type is a quantized type. Returns the index of this
   // new state in the state vector.
-  int InitializeState(Operation *op, int index, Value *val, bool as_result);
+  int InitializeState(Operation *op, int index, Value val, bool as_result);
+
+  // Sets the state of an argument. If this value is cached, uses the cached
+  // result without creating new entry in the state vector. Otherwise, allocate
+  // a new entry in the state vector.
+  void InitializeArgState(BlockArgument arg, Value in,
+                          llvm::DenseMap<Value, int> *cache) {
+    auto cached = cache->insert({in, 0});
+    if (!cached.second) {
+      arg_states_[arg] = cached.first->second;
+      return;
+    }
+    QuantParams params =
+        quant::QuantizedType::getQuantizedElementType(in.getType());
+    bool immutable = !EmptyParams(params);
+    int next_state_index = states_.size();
+    states_.push_back({params, immutable});
+    arg_states_[arg] = next_state_index;
+    cached.first->second = next_state_index;
+  }
 
   // Sets the state of the index-th operand of the op. If this operand is
   // cached, uses the cached result without creating new entry in the state
   // vector. Otherwise, allocate a new entry in the state vector.
-  void InitializeOperandState(Operation *op, int index, Value *in,
-                              llvm::DenseMap<Value *, int> *cache,
-                              bool is_argument) {
+  void InitializeOperandState(Operation *op, int index, Value in,
+                              llvm::DenseMap<Value, int> *cache) {
     auto cached = cache->insert({in, 0});
     if (!cached.second) {
       operand_states_.insert({{op, index}, cached.first->second});
       return;
     }
     cached.first->second = InitializeState(op, index, in, /*as_result=*/false);
-    if (is_argument) {
-      auto *arg = llvm::cast<BlockArgument>(in);
-      arg_states_[arg] = cached.first->second;
-      args_.push_back(arg);
-    }
   }
 
   // Sets the state of the index-th result of the op. If this result is cached,
   // uses the cached result without creating new entry in the state vector.
   // Otherwise, allocate a new entry in the state vector.
-  void InitializeResultState(Operation *op, int index, Value *res,
-                             llvm::DenseMap<Value *, int> *cache) {
+  void InitializeResultState(Operation *op, int index, Value res,
+                             llvm::DenseMap<Value, int> *cache) {
     auto cached = cache->insert({res, 0});
     if (!cached.second) {
       result_states_.insert({{op, index}, cached.first->second});
@@ -279,7 +292,8 @@ class QuantizationDriver {
   // rest are weights.
   llvm::DenseSet<Operation *> weights_;
 
-  // The weights require narrow_range quantization. If the value of this map is
+  // The weights require narrow_range quantization. This map collects all the
+  // weight operands defined by the op quant spec. If the value of the entry is
   // positive, per-channel quantization is required.
   llvm::DenseMap<Operation *, int> optimized_weights_;
 
@@ -300,11 +314,11 @@ class QuantizationDriver {
   // results and arguments.
   llvm::DenseMap<OpValue, int> operand_states_;
   llvm::DenseMap<OpValue, int> result_states_;
-  llvm::DenseMap<BlockArgument *, int> arg_states_;
+  llvm::DenseMap<BlockArgument, int> arg_states_;
 
   // This vector is to preserve the arguments order, so the newly inserted
   // quantized ops for the arguments are deterministically ordered.
-  llvm::SmallVector<BlockArgument *, 4> args_;
+  llvm::SmallVector<BlockArgument, 4> args_;
 
   OpQuantSpecGetter op_quant_spec_getter_;
 };
@@ -321,10 +335,10 @@ bool QuantizationDriver::IsQuantized(Operation *op) {
   return true;
 }
 
-int QuantizationDriver::InitializeState(Operation *op, int index, Value *val,
+int QuantizationDriver::InitializeState(Operation *op, int index, Value val,
                                         bool as_result) {
   QuantParams params =
-      quant::QuantizedType::getQuantizedElementType(val->getType());
+      quant::QuantizedType::getQuantizedElementType(val.getType());
   bool immutable = !EmptyParams(params);
   int next_state_index = states_.size();
   states_.push_back({params, immutable});
@@ -338,7 +352,7 @@ int QuantizationDriver::InitializeState(Operation *op, int index, Value *val,
 
 bool QuantizationDriver::SetConstantResultParams(Operation *op) {
   ElementsAttr attr;
-  Value *res = op->getResult(0);
+  Value res = op->getResult(0);
   if (!matchPattern(res, m_Constant(&attr))) {
     return false;
   }
@@ -362,7 +376,7 @@ bool QuantizationDriver::SetConstantResultParams(Operation *op) {
   } else {
     // per-tensor quantization weight
     final_type = GetUniformQuantizedTypeForWeight(
-        attr, /*symmetric=*/is_weight_with_per_channel_support,
+        attr, /*symmetric=*/is_weight && is_signed_,
         /*num_bits=*/8, is_signed_,
         /*narrow_range_=*/is_weight);
   }
@@ -428,18 +442,18 @@ bool QuantizationDriver::SetOperandParams(Operation *op, int index,
 void QuantizationDriver::QuantizeOpResult(Operation *op, int index,
                                           QuantParams params) {
   builder_.setInsertionPoint(op->getBlock(), ++Block::iterator(op));
-  Value *original_result = op->getResult(index);
+  Value original_result = op->getResult(index);
   QuantizeValue(original_result, params, op->getLoc());
 }
 
-void QuantizationDriver::QuantizeArg(BlockArgument *arg, QuantParams params) {
-  builder_.setInsertionPointToStart(arg->getOwner());
+void QuantizationDriver::QuantizeArg(BlockArgument arg, QuantParams params) {
+  builder_.setInsertionPointToStart(arg.getOwner());
   QuantizeValue(arg, params, builder_.getUnknownLoc());
 }
 
-void QuantizationDriver::QuantizeValue(Value *value, QuantParams params,
+void QuantizationDriver::QuantizeValue(Value value, QuantParams params,
                                        Location loc) {
-  Type expressed_type = value->getType();
+  Type expressed_type = value.getType();
   Type new_type = params.castFromExpressedType(expressed_type);
   // This value isn't an expressed type (float), skip.
   if (!new_type) return;
@@ -451,7 +465,7 @@ void QuantizationDriver::QuantizeValue(Value *value, QuantParams params,
                                                        quantize.output());
   // `original_result` has a use to `quantize`, so this will replace that use
   // by the result of `dequantize`. Remember to reset that use afterwards
-  value->replaceAllUsesWith(dequantize);
+  value.replaceAllUsesWith(dequantize);
   quantize.getOperation()->replaceUsesOfWith(dequantize, value);
 }
 
@@ -459,9 +473,9 @@ void QuantizationDriver::RequantizeOpResult(Operation *op, int index,
                                             RequantizeState *state) {
   if (state->pos == RequantizeState::NO_REQUANTIZE) return;
   builder_.setInsertionPointAfter(op);
-  Value *value = op->getResult(index);
+  Value value = op->getResult(index);
   if (state->pos == RequantizeState::ON_OUTPUT) {
-    Operation *user = value->getUses().begin().getUser();
+    Operation *user = value.getUses().begin().getUser();
     if (llvm::isa<TFL::QuantizeOp>(user)) {
       // The requantize op is inserted between `quantize` and `dequantize` ops.
       value = user->getResult(0);
@@ -471,31 +485,31 @@ void QuantizationDriver::RequantizeOpResult(Operation *op, int index,
   RequantizeValue(value, state, op->getLoc());
 }
 
-void QuantizationDriver::RequantizeArg(BlockArgument *arg,
+void QuantizationDriver::RequantizeArg(BlockArgument arg,
                                        RequantizeState *state) {
-  Value *value = arg;
-  builder_.setInsertionPointToStart(arg->getOwner());
-  if (value->hasOneUse()) {
-    auto user = value->use_begin().getUser();
+  Value value = arg;
+  builder_.setInsertionPointToStart(arg.getOwner());
+  if (value.hasOneUse()) {
+    auto user = value.use_begin().getUser();
     if (auto q = llvm::dyn_cast<TFL::QuantizeOp>(user)) {
       value = q.output();
-      builder_.setInsertionPoint(arg->getOwner(), ++Block::iterator(user));
+      builder_.setInsertionPoint(arg.getOwner(), ++Block::iterator(user));
     }
   }
   RequantizeValue(value, state, builder_.getUnknownLoc());
 }
 
-void QuantizationDriver::RequantizeValue(Value *value, RequantizeState *state,
+void QuantizationDriver::RequantizeValue(Value value, RequantizeState *state,
                                          Location loc) {
   Type new_type;
   if (state->pos == RequantizeState::ON_INPUT) {
-    Type expressed_type = value->getType();
+    Type expressed_type = value.getType();
     // The value needs to be requantized. A Quantize op will be created to use
     // it as the operand and replace its uses.
     new_type = state->params.castFromExpressedType(expressed_type);
   } else {
     Type expressed_type =
-        quant::QuantizedType::castToExpressedType(value->getType());
+        quant::QuantizedType::castToExpressedType(value.getType());
     if (!expressed_type) return;
 
     // The value needs to be requantized. A Quantize op will be created to use
@@ -508,7 +522,7 @@ void QuantizationDriver::RequantizeValue(Value *value, RequantizeState *state,
   TypeAttr type_attr = TypeAttr::get(new_type);
   auto requantize_op =
       builder_.create<TFL::QuantizeOp>(loc, new_type, value, type_attr);
-  value->replaceAllUsesWith(requantize_op);
+  value.replaceAllUsesWith(requantize_op);
   requantize_op.getOperation()->replaceUsesOfWith(requantize_op, value);
 }
 
@@ -586,10 +600,10 @@ void QuantizationDriver::PreprocessConstantOps() {
     auto type = cst.getType().dyn_cast<ShapedType>();
     if (!type || !type.getElementType().isa<FloatType>()) return;
 
-    Value *value = cst.getResult();
+    Value value = cst.getResult();
     SmallVector<std::pair<Operation *, int>, 4> bias_users;
     bool used_as_weight = false;
-    for (auto &use : value->getUses()) {
+    for (auto &use : value.getUses()) {
       auto spec = GetQuantSpec(use.getOwner());
       auto biases = spec->biases_params;
       Operation *user = use.getOwner();
@@ -629,7 +643,20 @@ void QuantizationDriver::PreprocessConstantOps() {
 }
 
 void QuantizationDriver::SetupAllStates() {
-  llvm::DenseMap<Value *, int> value_to_state;
+  llvm::DenseMap<Value, int> value_to_state;
+
+  for (auto arg : fn_.getArguments()) {
+    args_.push_back(arg);
+    Value value = arg;
+    // If the argument is quantized, it should only has one user.
+    if (arg.hasOneUse()) {
+      auto user = value.use_begin().getUser();
+      if (auto q = llvm::dyn_cast<TFL::QuantizeOp>(user)) {
+        value = q.output();
+      }
+    }
+    InitializeArgState(arg, value, &value_to_state);
+  }
 
   fn_.walk([&](Operation *op) {
     if (op->isKnownTerminator() ||
@@ -638,26 +665,24 @@ void QuantizationDriver::SetupAllStates() {
     work_list_.push_back(op);
 
     for (int i = 0, e = op->getNumOperands(); i != e; ++i) {
-      auto *operand = op->getOperand(i);
-      bool is_argument = true;
-      if (auto *inst = operand->getDefiningOp()) {
+      auto operand = op->getOperand(i);
+      if (auto *inst = operand.getDefiningOp()) {
         // If the operand comes from a tfl.dequantize op, we use the quantized
         // input of this tfl.dequantize op to set the state.
         if (auto dq = llvm::dyn_cast<TFL::DequantizeOp>(inst)) {
           operand = dq.input();
         }
-        is_argument = false;
       }
-      InitializeOperandState(op, i, operand, &value_to_state, is_argument);
+      InitializeOperandState(op, i, operand, &value_to_state);
     }
 
     for (int res = 0, e = op->getNumResults(); res != e; ++res) {
-      auto *result = op->getResult(res);
+      Value result = op->getResult(res);
       // If the result has been quantized, it should only be used by a
       // tfl.quantize op. For this case, we uses the quantized result to
       // create the state and mark it immutable.
-      if (result->hasOneUse()) {
-        auto user = result->use_begin().getUser();
+      if (result.hasOneUse()) {
+        auto user = result.use_begin().getUser();
         if (auto q = llvm::dyn_cast<TFL::QuantizeOp>(user)) {
           result = q.output();
         }
@@ -746,7 +771,7 @@ bool QuantizationDriver::PropagateParams() {
 }
 
 void QuantizationDriver::Finalize() {
-  for (auto *arg : args_) {
+  for (auto arg : args_) {
     auto &state = GetArgQuantState(arg);
     auto &requantize = GetArgRequantizeState(arg);
     if (state.IsEmpty() ||
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_passes.h b/tensorflow/compiler/mlir/lite/quantization/quantization_passes.h
index 56beb387370..58e9538045b 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_passes.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_passes.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_QUANTIZATION_PASSES_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_QUANTIZATION_PASSES_H_
 
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
 
 namespace mlir {
 namespace quant {
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_traits.h b/tensorflow/compiler/mlir/lite/quantization/quantization_traits.h
index 3830d11afe4..ea278344dec 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_traits.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_traits.h
@@ -18,8 +18,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_QUANTIZATION_TRAITS_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_QUANTIZATION_TRAITS_H_
 
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 
 namespace mlir {
 namespace OpTrait {
@@ -70,7 +70,7 @@ class FixedResultUniformScale {
     QuantizedType GetResultQuantizedType(int index) {
       auto op = this->getOperation();
       auto result_type =
-          op->getResult(index)->getType().template cast<TensorType>();
+          op->getResult(index).getType().template cast<TensorType>();
       Builder builder(op->getContext());
       IntegerType storage_type = builder.getIntegerType(BitWidth);
       const double scale = static_cast<double>(ScaleMantissa) *
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
index ca10809be69..5ff4ffa4b92 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
@@ -21,15 +21,15 @@ limitations under the License.
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/QuantizeUtils.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/QuantizeUtils.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/utils/attribute_utils.h"
 
 namespace mlir {
@@ -367,7 +367,7 @@ ElementsAttr Quantize(Attribute real_value, Type tensor_type) {
 static bool PreferResultScale(Operation* op) {
   int float_operands = 0;
   for (auto operand : op->getOperands()) {
-    if (auto operand_type = operand->getType().dyn_cast<ShapedType>()) {
+    if (auto operand_type = operand.getType().dyn_cast<ShapedType>()) {
       if (operand_type.getElementType().isa<FloatType>()) {
         if (float_operands++ > 1) return true;
       }
@@ -400,22 +400,22 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func,
     quant::StatisticsOp stats_op = all_stats_ops.back();
     all_stats_ops.pop_back();
 
-    if (auto def = stats_op.arg()->getDefiningOp()) {
+    if (auto def = stats_op.arg().getDefiningOp()) {
       if (IsStatsRedundant(def, op_quant_spec_getter)) {
         redundant_stats_ops.insert(stats_op);
       }
     }
 
-    for (auto user : stats_op.getResult()->getUsers()) {
+    for (auto user : stats_op.getResult().getUsers()) {
       // We don't propagate this parameter down if it has multiple operands.
       // We want to use the result parameter scales instead.
 
       if (user->hasTrait<OpTrait::quant::SameOperandsAndResultsScale>() &&
           !PreferResultScale(user)) {
-        for (Value* res : user->getResults()) {
-          if (res->hasOneUse()) {
+        for (Value res : user->getResults()) {
+          if (res.hasOneUse()) {
             if (auto next_stats = llvm::dyn_cast<quant::StatisticsOp>(
-                    *res->getUsers().begin())) {
+                    *res.getUsers().begin())) {
               // quantization parameters can be propagated to next_stats
               redundant_stats_ops.insert(next_stats);
               // add next_stats to the work list so propagation can
@@ -440,12 +440,12 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func,
     quant::StatisticsOp stats_op = all_stats_ops.back();
     all_stats_ops.pop_back();
 
-    if (auto def = stats_op.arg()->getDefiningOp()) {
+    if (auto def = stats_op.arg().getDefiningOp()) {
       if (def->hasTrait<OpTrait::quant::SameOperandsAndResultsScale>() &&
           PreferResultScale(def)) {
         for (auto input : def->getOperands()) {
           if (auto next_stats = llvm::dyn_cast_or_null<quant::StatisticsOp>(
-                  input->getDefiningOp())) {
+                  input.getDefiningOp())) {
             redundant_stats_ops.insert(next_stats);
             all_stats_ops.push_back(next_stats);
           }
@@ -458,7 +458,7 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func,
   for (auto it : redundant_stats_ops) {
     if (!llvm::isa<quant::StatisticsOp>(it)) return true;
     auto stats_op = llvm::cast<quant::StatisticsOp>(it);
-    stats_op.getResult()->replaceAllUsesWith(stats_op.arg());
+    stats_op.getResult().replaceAllUsesWith(stats_op.arg());
     stats_op.erase();
   }
 
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
index 0f7ec91ebc6..60fc2add989 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
@@ -23,18 +23,18 @@ limitations under the License.
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/QuantOps.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_traits.h"
 
 namespace mlir {
@@ -116,7 +116,7 @@ struct ConvertStatsToQDQs : public OpRewritePattern<quant::StatisticsOp> {
     auto q = rewriter.create<Q>(op.getLoc(), result_type, op.arg(),
                                 TypeAttr::get(result_type));
     auto dq = rewriter.create<DQ>(op.getLoc(), op.getType(), q);
-    op.getResult()->replaceAllUsesWith(dq);
+    op.getResult().replaceAllUsesWith(dq);
     q.getOperation()->replaceUsesOfWith(dq, op.arg());
     op.erase();
 
@@ -161,8 +161,8 @@ struct QuantizationPattern : public RewritePattern {
     if (op->getNumResults() != 1) {
       return matchFailure();
     }
-    Value* quantized_value = op->getResult(0);
-    for (Operation* quantized_op : quantized_value->getUsers()) {
+    Value quantized_value = op->getResult(0);
+    for (Operation* quantized_op : quantized_value.getUsers()) {
       // If it is requantize op, we shouldn't rewrite this op.
       if (llvm::isa<Q>(quantized_op) || llvm::isa<DQ>(quantized_op)) {
         return matchFailure();
@@ -176,17 +176,17 @@ struct QuantizationPattern : public RewritePattern {
 
       // Collect all the quantized inputs and "clone" the matched op by these
       // inputs.
-      SmallVector<Value*, 4> inputs;
+      SmallVector<Value, 4> inputs;
       inputs.reserve(quantized_op->getNumOperands());
       for (auto operand : quantized_op->getOperands()) {
-        Type operand_type = operand->getType();
+        Type operand_type = operand.getType();
         if (operand_type.isa<NoneType>()) {
           inputs.push_back(operand);
           continue;
         }
 
-        auto ele_type = operand->getType().cast<TensorType>().getElementType();
-        if (auto op_inst = dyn_cast_or_null<DQ>(operand->getDefiningOp())) {
+        auto ele_type = operand.getType().cast<TensorType>().getElementType();
+        if (auto op_inst = dyn_cast_or_null<DQ>(operand.getDefiningOp())) {
           inputs.push_back(op_inst.input());
         } else if (ele_type.isa<IntegerType>()) {
           // If the operand is an integer tensor, then it doesn't require the
@@ -201,13 +201,13 @@ struct QuantizationPattern : public RewritePattern {
 
       // Collect all the quantized outputs and replace them by the results of
       // the new quantized op.
-      llvm::SmallDenseMap<Value*, int> outputs_replaced;
+      llvm::SmallDenseMap<Value, int> outputs_replaced;
       SmallVector<Type, 4> output_types;
       output_types.reserve(quantized_op->getNumResults());
       for (auto enumerated_result :
            llvm::enumerate(quantized_op->getResults())) {
-        Value* result = enumerated_result.value();
-        Type result_type = result->getType();
+        Value result = enumerated_result.value();
+        Type result_type = result.getType();
         // Add this to the test coverage once we create test ops with none type
         // results.
         if (result_type.isa<NoneType>()) {
@@ -216,20 +216,20 @@ struct QuantizationPattern : public RewritePattern {
           continue;
         }
         Type result_ele_type =
-            result->getType().cast<TensorType>().getElementType();
+            result.getType().cast<TensorType>().getElementType();
         // If the user is the Quantize op, it must be the only user.
-        if (result->hasOneUse() && llvm::isa<Q>(*result->user_begin())) {
-          auto user = llvm::cast<Q>(*result->user_begin());
+        if (result.hasOneUse() && llvm::isa<Q>(*result.user_begin())) {
+          auto user = llvm::cast<Q>(*result.user_begin());
           outputs_replaced.insert({user.output(), enumerated_result.index()});
           output_types.push_back(user.getType());
         } else if (result_ele_type.template isa<IntegerType>()) {
           // If the result is an integer tensor, then it doesn't require the
           // D op in the pattern.
           outputs_replaced.insert({result, enumerated_result.index()});
-          output_types.push_back(result->getType());
+          output_types.push_back(result.getType());
         } else if (static_cast<const ConcretTy*>(this)->AllowHybridResult()) {
           outputs_replaced.insert({result, enumerated_result.index()});
-          output_types.push_back(result->getType());
+          output_types.push_back(result.getType());
         } else {
           return matchFailure();
         }
@@ -241,7 +241,7 @@ struct QuantizationPattern : public RewritePattern {
                                output_types, quantized_op->getAttrs());
       Operation* new_op = rewriter.createOperation(new_state);
       for (auto output : outputs_replaced) {
-        output.getFirst()->replaceAllUsesWith(
+        output.getFirst().replaceAllUsesWith(
             new_op->getResult(output.getSecond()));
       }
 
@@ -252,7 +252,7 @@ struct QuantizationPattern : public RewritePattern {
         // For constant operands, the floating-point constant is duplicated in
         // case it is quantized.
         for (int i = 0, e = new_op->getNumOperands(); i != e; ++i) {
-          auto def = new_op->getOperand(i)->getDefiningOp();
+          auto def = new_op->getOperand(i).getDefiningOp();
           if (auto q = llvm::dyn_cast_or_null<Q>(def)) {
             DenseFPElementsAttr attr;
             if (!matchPattern(q.input(), m_Constant(&attr))) {
@@ -265,7 +265,7 @@ struct QuantizationPattern : public RewritePattern {
 
         for (int i = 0, e = new_op->getNumResults(); i != e; ++i) {
           if (!quantized_op->getResult(i)
-                   ->getType()
+                   .getType()
                    .cast<ShapedType>()
                    .getElementType()
                    .isa<FloatType>()) {
@@ -283,13 +283,13 @@ struct QuantizationPattern : public RewritePattern {
           // Find the Dequantize/Dequantize users of the new op results, and
           // replace the usage. Then all the floating-point ops are connected.
           // N.B. the return op will use this floating-point result.
-          for (auto user : new_op->getResult(i)->getUsers()) {
+          for (auto user : new_op->getResult(i).getUsers()) {
             // Skip the Requantize op, and we know it has a single user.
             if (llvm::isa<Q>(user)) {
-              user = *user->getResult(0)->getUsers().begin();
+              user = *user->getResult(0).getUsers().begin();
             }
             if (auto dequantize = llvm::dyn_cast<DQ>(user)) {
-              dequantize.getResult()->replaceAllUsesWith(
+              dequantize.getResult().replaceAllUsesWith(
                   quantized_op->getResult(i));
             }
           }
@@ -316,7 +316,7 @@ struct ConvertUnsignedToSigned : public OpRewritePattern<Q> {
 
   PatternMatchResult matchAndRewrite(Q op,
                                      PatternRewriter& rewriter) const override {
-    Type output_type = op.output()->getType();
+    Type output_type = op.output().getType();
     auto qtype = QType::getQuantizedElementType(output_type);
     if (!qtype || qtype.isSigned()) return this->matchFailure();
 
diff --git a/tensorflow/compiler/mlir/lite/quantization/tests/BUILD b/tensorflow/compiler/mlir/lite/quantization/tests/BUILD
index 9f47185e90a..4faa8d2efe8 100644
--- a/tensorflow/compiler/mlir/lite/quantization/tests/BUILD
+++ b/tensorflow/compiler/mlir/lite/quantization/tests/BUILD
@@ -4,7 +4,7 @@ package(licenses = ["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = ["mlir"],
 )
 
@@ -14,6 +14,6 @@ filegroup(
     testonly = True,
     data = [
         "//tensorflow/compiler/mlir:tf-opt",
-        "@llvm//:FileCheck",
+        "@llvm-project//llvm:FileCheck",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/quantization/tools/op_quant_spec_getters_gen.cc b/tensorflow/compiler/mlir/lite/quantization/tools/op_quant_spec_getters_gen.cc
index bc49b0df23f..abc38505abd 100644
--- a/tensorflow/compiler/mlir/lite/quantization/tools/op_quant_spec_getters_gen.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/tools/op_quant_spec_getters_gen.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include "mlir/TableGen/Operator.h"  // TF:local_config_mlir
+#include "mlir/TableGen/Operator.h"  // TF:llvm-project
 
 using llvm::LessRecord;
 using llvm::raw_ostream;
@@ -36,7 +36,7 @@ using mlir::tblgen::Operator;
 // NOLINTNEXTLINE
 static bool OpQuantSpecWriter(raw_ostream &os, RecordKeeper &records) {
   llvm::Regex acc_uniform_trait_regex{"AccumulatorUniformScale<([0-9]*),"};
-  llvm::Regex coeff_index_trait_regex{"AffineOpCoefficient<([0-9]*),"};
+  llvm::Regex coeff_index_trait_regex{"AffineOpCoefficient<(-?[0-9]*),"};
   llvm::Regex fixed_uniform_trait_regex{
       "FixedResultUniformScale<([0-9]+).*(true|false)>"};
   emitSourceFileHeader("Generated Ops Quant Spec Getters", os);
diff --git a/tensorflow/compiler/mlir/lite/tests/BUILD b/tensorflow/compiler/mlir/lite/tests/BUILD
index 9f47185e90a..4faa8d2efe8 100644
--- a/tensorflow/compiler/mlir/lite/tests/BUILD
+++ b/tensorflow/compiler/mlir/lite/tests/BUILD
@@ -4,7 +4,7 @@ package(licenses = ["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = ["mlir"],
 )
 
@@ -14,6 +14,6 @@ filegroup(
     testonly = True,
     data = [
         "//tensorflow/compiler/mlir:tf-opt",
-        "@llvm//:FileCheck",
+        "@llvm-project//llvm:FileCheck",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/tests/debuginfo/BUILD b/tensorflow/compiler/mlir/lite/tests/debuginfo/BUILD
index 2498a106f8f..5ef392b0ea0 100644
--- a/tensorflow/compiler/mlir/lite/tests/debuginfo/BUILD
+++ b/tensorflow/compiler/mlir/lite/tests/debuginfo/BUILD
@@ -7,10 +7,12 @@ glob_lit_tests(
         ":debug_info_files",
         ":test_utilities",
     ],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = [
         "pbtxt",
-        "py",
+        # TODO(fengliuai): reenable these tests after the fused loc is
+        # supported in the diagnostic handler.
+        # "py",
     ],
 )
 
@@ -31,8 +33,8 @@ filegroup(
         ":saved_model_error",
         "//tensorflow/compiler/mlir/lite:flatbuffer_to_string",
         "//tensorflow/compiler/mlir/lite:tf_tfl_translate",
-        "@llvm//:FileCheck",
-        "@llvm//:not",
+        "@llvm-project//llvm:FileCheck",
+        "@llvm-project//llvm:not",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/lite/tests/debuginfo/concrete_function_error.py b/tensorflow/compiler/mlir/lite/tests/debuginfo/concrete_function_error.py
index 0bb386f4829..7fe587095b6 100644
--- a/tensorflow/compiler/mlir/lite/tests/debuginfo/concrete_function_error.py
+++ b/tensorflow/compiler/mlir/lite/tests/debuginfo/concrete_function_error.py
@@ -21,6 +21,7 @@ from __future__ import division
 from __future__ import print_function
 
 import sys
+
 from absl import app
 
 import tensorflow.compat.v2 as tf
diff --git a/tensorflow/compiler/mlir/lite/tests/debuginfo/saved_model_error.py b/tensorflow/compiler/mlir/lite/tests/debuginfo/saved_model_error.py
index a4011226f14..fa35d229bc4 100644
--- a/tensorflow/compiler/mlir/lite/tests/debuginfo/saved_model_error.py
+++ b/tensorflow/compiler/mlir/lite/tests/debuginfo/saved_model_error.py
@@ -21,6 +21,7 @@ from __future__ import division
 from __future__ import print_function
 
 import sys
+
 from absl import app
 
 import tensorflow.compat.v2 as tf
diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/BUILD b/tensorflow/compiler/mlir/lite/tests/end2end/BUILD
index a15b434571c..732fd784bbc 100644
--- a/tensorflow/compiler/mlir/lite/tests/end2end/BUILD
+++ b/tensorflow/compiler/mlir/lite/tests/end2end/BUILD
@@ -7,7 +7,7 @@ glob_lit_tests(
         ":quant_stats_files",
         ":test_utilities",
     ],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = [
         "pbtxt",
     ],
@@ -20,7 +20,7 @@ filegroup(
     data = [
         "//tensorflow/compiler/mlir/lite:flatbuffer_to_string",
         "//tensorflow/compiler/mlir/lite:tf_tfl_translate",
-        "@llvm//:FileCheck",
+        "@llvm-project//llvm:FileCheck",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt
index 0fcee7d7e8f..80452715b78 100644
--- a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt
+++ b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt
@@ -38,6 +38,6 @@ versions {
 
 # CHECK: func @main(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<*xi32>
 # CHECK: attributes {tf.entry_function = {inputs = "input0,input1", outputs = "output"}} {
-# CHECK-NEXT:   %0 = "tf.BannaPotatoSaladWithColeslaw"(%arg0, %arg1) {T = i32, device = "", name = "output"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32>
+# CHECK-NEXT:   %0 = "tf.BannaPotatoSaladWithColeslaw"(%arg0, %arg1) {T = i32, device = ""} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32>
 # CHECK-NEXT:   return %0 : tensor<*xi32>
 # CHECK-NEXT: }
diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/BUILD b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/BUILD
index 87caef0237e..b52b766a10d 100644
--- a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/BUILD
+++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/BUILD
@@ -8,7 +8,7 @@ glob_lit_tests(
         ":extra_files",
         ":test_utilities",
     ],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = [
         "mlir",
         "cc",
@@ -24,7 +24,7 @@ filegroup(
         ":importer_test_min_max",
         "//tensorflow/compiler/mlir/lite:flatbuffer_to_string",
         "//tensorflow/compiler/mlir/lite:flatbuffer_translate",
-        "@llvm//:FileCheck",
+        "@llvm-project//llvm:FileCheck",
     ],
 )
 
@@ -51,7 +51,7 @@ tf_native_cc_binary(
         "//tensorflow/lite:framework",
         "//tensorflow/lite/schema:schema_fbs",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -67,6 +67,6 @@ tf_native_cc_binary(
         "//tensorflow/lite:framework",
         "//tensorflow/lite/schema:schema_fbs",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/output_arrays.mlir b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/output_arrays.mlir
index d228cc06a88..20df2f75732 100644
--- a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/output_arrays.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/output_arrays.mlir
@@ -11,6 +11,8 @@ func @main(tensor<4xf32>) -> tensor<4xf32> {
   %3 = "tfl.div"(%2, %1) {fused_activation_function = "NONE"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("div")
   // CHECK: %[[EXP:.*]] = "tfl.exp"
   %4 = "tfl.exp"(%3) : (tensor<4xf32>) -> tensor<4xf32> loc("exp")
+  // tfl.neg should not be pruned
+  // CHECK: %[[NEG:.*]] = "tfl.neg"
   %5 = "tfl.neg"(%4) : (tensor<4xf32>) -> tensor<4xf32> loc("neg")
   // CHECK: return %[[MUL]], %[[EXP]], %[[DIV]]
   return %5 : tensor<4xf32>
diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/pruning.mlir b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/pruning.mlir
new file mode 100644
index 00000000000..0d7f911f282
--- /dev/null
+++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/pruning.mlir
@@ -0,0 +1,19 @@
+// RUN: flatbuffer_translate -mlir-to-tflite-flatbuffer %s -o - | flatbuffer_translate -output-arrays=mul,exp,div --experimental-prune-unreachable-nodes-unconditionally --tflite-flatbuffer-to-mlir - -o - | FileCheck --dump-input-on-failure %s
+// Confirm graph pruning.
+
+func @main(tensor<4xf32>) -> tensor<4xf32> {
+^bb0(%arg0: tensor<4xf32>):
+  %0 = "tfl.pseudo_const" () {value = dense<1.0> : tensor<4xf32>} : () -> tensor<4xf32> loc("Const")
+  %1 = "tfl.squared_difference"(%arg0, %0) {fused_activation_function = "NONE"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("squared_difference")
+  // CHECK: %[[MUL:.*]] = tfl.mul
+  %2 = "tfl.mul"(%0, %1) {fused_activation_function = "NONE"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("mul")
+  // CHECK: %[[DIV:.*]] = tfl.div
+  %3 = "tfl.div"(%2, %1) {fused_activation_function = "NONE"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("div")
+  // CHECK: %[[EXP:.*]] = "tfl.exp"
+  %4 = "tfl.exp"(%3) : (tensor<4xf32>) -> tensor<4xf32> loc("exp")
+  // tfl.neg should be pruned
+  // CHECK-NOT: "tfl.neg"
+  %5 = "tfl.neg"(%4) : (tensor<4xf32>) -> tensor<4xf32> loc("neg")
+  // CHECK: return %[[MUL]], %[[EXP]], %[[DIV]]
+  return %5 : tensor<4xf32>
+}
diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
index e22198da6ea..e7efc7de99b 100644
--- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
@@ -521,21 +521,30 @@ func @select_multidim(%arg0: tensor<8xi1>, %arg1: tensor<8x3xf32>, %arg2: tensor
 // CHECK:  return
 }
 
-func @select_v2(%arg0: tensor<8xi1>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>) -> tensor<8xf32> {
+func @select_v2_same_shape(%arg0: tensor<8xi1>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>) -> tensor<8xf32> {
   %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<8xi1>, tensor<8xf32>, tensor<8xf32>) -> tensor<8xf32>
   return %0: tensor<8xf32>
 
-// CHECK-LABEL: select_v2
+// CHECK-LABEL: select_v2_same_shape
 // CHECK:  "tfl.select"(%arg0, %arg1, %arg2)
 // CHECK:  return
 }
 
-func @select_v2_multidim(%arg0: tensor<8xi1>, %arg1: tensor<8x3xf32>, %arg2: tensor<8x3xf32>) -> tensor<8x3xf32> {
-  %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<8xi1>, tensor<8x3xf32>, tensor<8x3xf32>) -> tensor<8x3xf32>
+func @select_v2_multidim(%arg0: tensor<3xi1>, %arg1: tensor<8x3xf32>, %arg2: tensor<8x3xf32>) -> tensor<8x3xf32> {
+  %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<3xi1>, tensor<8x3xf32>, tensor<8x3xf32>) -> tensor<8x3xf32>
   return %0: tensor<8x3xf32>
 
 // CHECK-LABEL: select_v2_multidim
-// CHECK:  "tfl.select"(%arg0, %arg1, %arg2)
+// CHECK:  "tfl.select_v2"(%arg0, %arg1, %arg2)
+// CHECK:  return
+}
+
+func @select_v2_broadcast(%arg0: tensor<4xi1>, %arg1: tensor<3x4xf32>, %arg2: tensor<8x3x4xf32>) -> tensor<8x3x4xf32> {
+  %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<4xi1>, tensor<3x4xf32>, tensor<8x3x4xf32>) -> tensor<8x3x4xf32>
+  return %0: tensor<8x3x4xf32>
+
+// CHECK-LABEL: select_v2_broadcast
+// CHECK:  "tfl.select_v2"(%arg0, %arg1, %arg2)
 // CHECK:  return
 }
 
diff --git a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/BUILD b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/BUILD
index c13df3faafc..c0ae9570225 100644
--- a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/BUILD
+++ b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/BUILD
@@ -4,7 +4,7 @@ licenses(["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = ["mlir"],
 )
 
@@ -15,7 +15,7 @@ filegroup(
     data = [
         "//tensorflow/compiler/mlir/lite:flatbuffer_to_string",
         "//tensorflow/compiler/mlir/lite:flatbuffer_translate",
-        "@llvm//:FileCheck",
-        "@llvm//:not",
+        "@llvm-project//llvm:FileCheck",
+        "@llvm-project//llvm:not",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/type_attr.mlir b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/type_attr.mlir
new file mode 100644
index 00000000000..01410d370d4
--- /dev/null
+++ b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/type_attr.mlir
@@ -0,0 +1,40 @@
+// RUN: flatbuffer_translate -mlir-to-tflite-flatbuffer %s -emit-custom-ops -emit-builtin-tflite-ops=false -o - | flatbuffer_to_string - | FileCheck %s
+
+// CHECK: {
+// CHECK:   version: 3,
+// CHECK:   operator_codes: [ {
+// CHECK:     builtin_code: CUSTOM,
+// CHECK:     custom_code: "SomeOperation"
+// CHECK:   } ],
+// CHECK:   subgraphs: [ {
+// CHECK:     tensors: [ {
+// CHECK:       shape: [  ],
+// CHECK:       type: INT32,
+// CHECK:       buffer: 1,
+// CHECK:       name: "tf.SomeOperation",
+// CHECK:       quantization: {
+// CHECK-EMPTY
+// CHECK:       }
+// CHECK:     } ],
+// CHECK:     inputs: [  ],
+// CHECK:     outputs: [ 0 ],
+// CHECK:     operators: [ {
+// CHECK:       inputs: [  ],
+// CHECK:       outputs: [ 0 ],
+// CHECK:       custom_options: [ 100, 116, 121, 112, 101, 0, 1, 7, 1, 1, 1, 2, 4, 2, 36, 1 ]
+// CHECK:     } ],
+// CHECK:     name: "main"
+// CHECK:   } ],
+// CHECK:   description: "MLIR Converted.",
+// CHECK:   buffers: [ {
+// CHECK-EMPTY
+// CHECK:   }, {
+// CHECK-EMPTY
+// CHECK:   } ]
+// CHECK: }
+
+func @main() -> tensor<*xi32> {
+	// Tests that the below type attribute is convertible into the corresponding custom option in flatbuffer.
+  %0 = "tf.SomeOperation"() {dtype = i32 } : () -> tensor<*xi32>
+  return %0 : tensor<*xi32>
+}
diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir
index ad3b5540dd7..a60796d1580 100644
--- a/tensorflow/compiler/mlir/lite/tests/ops.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir
@@ -518,6 +518,20 @@ func @testMaxPool2DWrongOperandStorageType(tensor<1x7x7x16x!quant.uniform<i9:f32
 
 // -----
 
+func @testMaxPoolingWithArgMax2D(%arg0: tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) {
+  %0, %1 = "tfl.max_pooling_with_argmax_2d"(%arg0) {filter_h = 2 : i32, filter_w = 2 : i32, padding = "SAME", stride_h = 2 : i32, stride_w = 2 : i32} : (tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>)
+  return %0, %1 : tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>
+}
+
+// -----
+
+func @testMaxUnpooling2D(%arg0: tensor<1x8x8x128xf32>, %arg1: tensor<1x8x8x128xf32>) -> tensor<1x8x8x128xf32> {
+  %0 = "tfl.max_unpooling_2d"(%arg0, %arg1) {filter_h = 2 : i32, filter_w = 2 : i32, padding = "SAME", stride_h = 2 : i32, stride_w = 2 : i32} : (tensor<1x8x8x128xf32>, tensor<1x8x8x128xf32>) -> (tensor<1x8x8x128xf32>)
+  return %0 : tensor<1x8x8x128xf32>
+}
+
+// -----
+
 // CHECK-LABEL: testLogistic
 func @testLogistic(tensor<1x2x3x4x5xbf16>) -> tensor<1x2x3x4x5xbf16> {
 ^bb0(%arg0: tensor<1x2x3x4x5xbf16>):
@@ -1942,6 +1956,13 @@ func @testTransposeConv(%arg0: tensor<4xi32>, %arg1: tensor<32x4x4x128xf32>, %ar
 
 // -----
 
+func @testConvolution2DTransposeBias(%arg0: tensor<32x4x4x128xf32>, %arg1: tensor<1x32x42x128xf32>, %arg2: tensor<4xi32>) -> tensor<1x64x84x32xf32> {
+  %0 = "tfl.convolution_2d_transpose_bias"(%arg0, %arg1, %arg2) {padding = "SAME", stride_h = 2 : i32, stride_w = 2 : i32} : (tensor<32x4x4x128xf32>, tensor<1x32x42x128xf32>, tensor<4xi32>) -> tensor<1x64x84x32xf32>
+  return %0 : tensor<1x64x84x32xf32>
+}
+
+// -----
+
 func @testTransposeConvBadOutputRank(%arg0: tensor<4xi32>, %arg1: tensor<32x4x4x128xf32>, %arg2: tensor<1x32x42x128xf32>) -> tensor<64x84x32xf32> {
   // expected-error @+1 {{expect output type has rank = 4, got output type tensor<64x84x32xf32>}}
   %0 = "tfl.transpose_conv"(%arg0, %arg1, %arg2) {padding = "SAME", stride_h = 2 : i32, stride_w = 2 : i32} : (tensor<4xi32>, tensor<32x4x4x128xf32>, tensor<1x32x42x128xf32>) -> tensor<64x84x32xf32>
diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir
index 1d51adb16f2..5a07946fd9e 100644
--- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir
@@ -140,22 +140,6 @@ func @fuseAddWithRelu6IntoDepthwiseConv2d(%arg0: tensor<256x32x32x3xf32>, %arg1:
   // CHECK-SAME: fused_activation_function = "RELU6"
 }
 
-// CHECK-LABEL: intermOpUsedTwice
-func @intermOpUsedTwice(%arg0: tensor<256x32x32x3xf32>, %arg1: tensor<16x3x3x3xf32>) -> (tensor<256x30x30x16xf32>, tensor<256x30x30x16xf32>) {
-  %cst = constant dense<1.5> : tensor<16xf32>
-  %cst_0 = constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32>
-  %0 = "tfl.conv_2d"(%arg0, %arg1, %cst_0) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<256x32x32x3xf32>, tensor<16x3x3x3xf32>, tensor<16xf32>) -> tensor<256x30x30x16xf32>
-  %1 = "tfl.add"(%0, %cst) {fused_activation_function = "RELU6"} : (tensor<256x30x30x16xf32>, tensor<16xf32>) -> tensor<256x30x30x16xf32>
-  return %0, %1 : tensor<256x30x30x16xf32>, tensor<256x30x30x16xf32>
-
-  // CHECK:  %cst = constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00,
-  // CHECK:  %cst_0 = constant dense<[2.500000e+00, 3.500000e+00, 4.500000e+00, 5.500000e+00,
-  // CHECK:  %0 = "tfl.conv_2d"(%arg0, %arg1, %cst) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32}
-  // CHECK:  %1 = "tfl.conv_2d"(%arg0, %arg1, %cst_0) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "RELU6", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32}
-  // CHECK:  return %0, %1
-
-}
-
 // CHECK-LABEL: @fuseMulIntoFullyConnected
 func @fuseMulIntoFullyConnected(%arg0: tensor<4x2xf32>) -> tensor<4x2xf32> {
   %cst0 = constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf32>
@@ -167,8 +151,8 @@ func @fuseMulIntoFullyConnected(%arg0: tensor<4x2xf32>) -> tensor<4x2xf32> {
 
   return %1 : tensor<4x2xf32>
 
-// CHECK:  %[[CONSTANT:.*]] = "tf.Const"{{.*}} dense<{{\[\[}}1.000000e+00, 4.000000e+00], [3.000000e+00, 8.000000e+00]]> : tensor<2x2xf32>
-// CHECK:  %[[CONSTANT0:.*]] = "tf.Const"{{.*}} dense<[2.000000e+00, 4.000000e+00]> : tensor<2xf32>
+// CHECK:  %[[CONSTANT:.*]] = constant dense<{{\[\[}}1.000000e+00, 4.000000e+00], [3.000000e+00, 8.000000e+00]]> : tensor<2x2xf32>
+// CHECK:  %[[CONSTANT0:.*]] = constant dense<[2.000000e+00, 4.000000e+00]> : tensor<2xf32>
 // CHECK:  %[[RES:.*]] = "tfl.fully_connected"(%arg0, %[[CONSTANT]], %[[CONSTANT0]]) {fused_activation_function = "RELU6", keep_num_dims = false, weights_format = "DEFAULT"}
 // CHECK:  return %[[RES]] : tensor<4x2xf32>
 }
@@ -233,8 +217,8 @@ func @fuseMulIntoFullyConnectedBroadcast(%arg0: tensor<1x3xf32>) -> tensor<1x2xf
   %1 = "tfl.mul"(%0, %cst2) {fused_activation_function = "RELU6"} : (tensor<1x2xf32>, tensor<2xf32>) -> tensor<1x2xf32>
   return %1 : tensor<1x2xf32>
 
-// CHECK:  %[[CONSTANT:.*]] = "tf.Const"{{.*}} dense<{{\[\[}}1.000000e+00, 2.000000e+00, 3.000000e+00], [2.000000e+00, 4.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
-// CHECK:  %[[CONSTANT0:.*]] = "tf.Const"{{.*}} dense<[2.000000e+00, 4.000000e+00]> : tensor<2xf32>
+// CHECK:  %[[CONSTANT:.*]] = constant dense<{{\[\[}}1.000000e+00, 2.000000e+00, 3.000000e+00], [2.000000e+00, 4.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
+// CHECK:  %[[CONSTANT0:.*]] = constant dense<[2.000000e+00, 4.000000e+00]> : tensor<2xf32>
 // CHECK:  %[[RES:.*]] = "tfl.fully_connected"(%arg0, %[[CONSTANT]], %[[CONSTANT0]]) {fused_activation_function = "RELU6", keep_num_dims = false, weights_format = "DEFAULT"}
 // CHECK:  return %[[RES]] : tensor<1x2xf32>
 }
@@ -249,7 +233,7 @@ func @fuseMulIntoFullyConnectedNoBias(%arg0: tensor<4x2xf32>, %arg1: none) -> te
 
   return %1 : tensor<4x2xf32>
 
-// CHECK:  %[[CONSTANT:.*]] = "tf.Const"{{.*}} dense<{{\[\[}}1.000000e+00, 4.000000e+00], [3.000000e+00, 8.000000e+00]]> : tensor<2x2xf32>
+// CHECK:  %[[CONSTANT:.*]] = constant dense<{{\[\[}}1.000000e+00, 4.000000e+00], [3.000000e+00, 8.000000e+00]]> : tensor<2x2xf32>
 // CHECK:  %[[RES:.*]] = "tfl.fully_connected"(%arg0, %[[CONSTANT]], %arg1) {fused_activation_function = "RELU6", keep_num_dims = false, weights_format = "DEFAULT"} : (tensor<4x2xf32>, tensor<2x2xf32>, none) -> tensor<4x2xf32>
 // CHECK:  return %[[RES]] : tensor<4x2xf32>
 }
@@ -631,3 +615,18 @@ func @fuse_relu_to_add(%arg0: tensor<2x3xf32>, %arg1: tensor<2x3xf32>) -> tensor
   // CHECK: %[[RES:.*]] = tfl.add %arg0, %arg1 {fused_activation_function = "RELU_N1_TO_1"}
   // CHECK: return %[[RES]]
 }
+
+// CHECK-LABEL: NotfuseAddIntoConv2d_MultipleUsers
+func @NotfuseAddIntoConv2d_MultipleUsers(%arg0: tensor<256x32x32x3xf32>, %arg1: tensor<16x3x3x3xf32>) -> (tensor<256x30x30x16xf32>, tensor<256x30x30x16xf32>) {
+  %cst = constant dense<1.5> : tensor<16xf32>
+  %cst_1 = constant dense<3.5> : tensor<16xf32>
+  %cst_0 = constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32>
+  %0 = "tfl.conv_2d"(%arg0, %arg1, %cst_0) {dilation_h_factor = 2 : i32, dilation_w_factor = 3 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 4 : i32, stride_w = 5 : i32} : (tensor<256x32x32x3xf32>, tensor<16x3x3x3xf32>, tensor<16xf32>) -> tensor<256x30x30x16xf32>
+  %1 = "tfl.add"(%0, %cst) {fused_activation_function = "NONE"} : (tensor<256x30x30x16xf32>, tensor<16xf32>) -> tensor<256x30x30x16xf32>
+  %2 = "tfl.add"(%0, %cst_1) {fused_activation_function = "NONE"} : (tensor<256x30x30x16xf32>, tensor<16xf32>) -> tensor<256x30x30x16xf32>
+  return %1, %2 : tensor<256x30x30x16xf32>, tensor<256x30x30x16xf32>
+
+  // CHECK: %[[tfl_conv2d:[0-9].*]] = "tfl.conv_2d"
+  // CHECK: tfl.add
+  // CHECK-NEXT: tfl.add
+}
diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir
index 29585296217..f6054f3d65d 100644
--- a/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir
@@ -125,3 +125,21 @@ func @prepareDepthwiseConv2D(%arg0: tensor<1x224x224x3xf32>) -> tensor<1x112x112
 // PerTensor: %[[dq:.*]] = "tfl.dequantize"(%[[q]])
 // PerTensor: %[[conv:.*]] = "tfl.depthwise_conv_2d"(%arg0, %[[dq]]
 }
+
+// CHECK-LABEL: QuantizeFullyConnected
+func @QuantizeFullyConnected(%arg0: tensor<1x224x224x3xf32>) -> tensor<1x112x112x32xf32> {
+  %w = constant dense<127.0> : tensor<32x12xf32>
+  %b = constant dense<0.0> : tensor<32xf32>
+  %fc = "tfl.fully_connected"(%arg0, %w, %b) {fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"} : (tensor<1x224x224x3xf32>, tensor<32x12xf32>, tensor<32xf32>) -> tensor<1x112x112x32xf32>
+  return %fc : tensor<1x112x112x32xf32>
+
+// CHECK: %[[cst:.*]] = constant dense<1.270000e+02> : tensor<32x12xf32>
+// CHECK: %[[q:.*]] = "tfl.quantize"(%cst) {qtype = tensor<32x12x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>} : (tensor<32x12xf32>) -> tensor<32x12x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>
+// CHECK: %[[dq:.*]] = "tfl.dequantize"(%0) : (tensor<32x12x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>) -> tensor<32x12xf32>
+// CHECK: "tfl.fully_connected"(%arg0, %[[dq]]
+
+// PerTensor: %[[cst:.*]] = constant dense<1.270000e+02> : tensor<32x12xf32>
+// PerTensor: %[[q:.*]] = "tfl.quantize"(%cst) {qtype = tensor<32x12x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>} : (tensor<32x12xf32>) -> tensor<32x12x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>
+// PerTensor: %[[dq:.*]] = "tfl.dequantize"(%0) : (tensor<32x12x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>) -> tensor<32x12xf32>
+// PerTensor: "tfl.fully_connected"(%arg0, %[[dq]]
+}
diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-quantize.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-quantize.mlir
index cd111176163..fc9c55089a3 100644
--- a/tensorflow/compiler/mlir/lite/tests/prepare-quantize.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/prepare-quantize.mlir
@@ -379,26 +379,26 @@ func @QuantizeConcatResToAllNoRequantize(tensor<1x2x!quant.uniform<u8:f32, 0.1:1
 // CHECK: %2 = "tfl.dequantize"(%arg0) : (tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<1x2xf32>
 // CHECK: %3 = "tfl.concatenation"(%2, %1) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<2x2xf32>
 // CHECK: %4 = "tfl.quantize"(%3) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
-// CHeCK: return %4 : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: return %4 : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
 }
 
 // CHECK-LABEL: QuantizeConcatResToAllRequantize
 func @QuantizeConcatResToAllRequantize(tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 0.1:128>> {
 ^bb0(%arg0: tensor<1x2xf32>, %arg1: tensor<1x2xf32>):
-  %0 = "tfl.quantize"(%arg0) {qtype = tensor<2x!quant.uniform<u8:f32, 2.0:128>>} : (tensor<1x2xf32>) -> tensor<1x2x!quant.uniform<u8:f32, 2.0:128>>
+  %0 = "tfl.quantize"(%arg0) {qtype = tensor<1x2x!quant.uniform<u8:f32, 2.0:128>>} : (tensor<1x2xf32>) -> tensor<1x2x!quant.uniform<u8:f32, 2.0:128>>
   %1 = "tfl.dequantize"(%0) : (tensor<1x2x!quant.uniform<u8:f32, 2.0:128>>) -> tensor<1x2xf32>
   %2 = "tfl.concatenation"(%1, %arg1) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<2x2xf32>
   %3 = "tfl.quantize"(%2) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
   return %3 : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
 
-// CHECK %0 = "tfl.quantize"(%arg0) {qtype = tensor<2x!quant.uniform<u8:f32, 2.000000e+00:128>>} : (tensor<2xf32>) -> tensor<2x!quant.uniform<u8:f32, 2.000000e+00:128>>
-// CHECK %1 = "tfl.quantize"(%0) {qtype = tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x!quant.uniform<u8:f32, 2.000000e+00:128>>) -> tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>
-// CHECK %2 = "tfl.dequantize"(%1) : (tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<2xf32>
-// CHECK %3 = "tfl.quantize"(%arg1) {qtype = tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2xf32>) -> tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>
-// CHECK %4 = "tfl.dequantize"(%3) : (tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<2xf32>
-// CHECK %5 = "tfl.concatenation"(%2, %4) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<2xf32>, tensor<2xf32>) -> tensor<2x2xf32>
-// CHECK %6 = "tfl.quantize"(%5) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
-// CHECK return %6 : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: %[[Q1:.*]] =  "tfl.quantize"(%arg1) {qtype = tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<1x2xf32>) -> tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: %[[DQ1:.*]] = "tfl.dequantize"(%[[Q1]]) : (tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<1x2xf32>
+// CHECK: %[[Q0:.*]] = "tfl.quantize"(%arg0) {qtype = tensor<1x2x!quant.uniform<u8:f32, 2.000000e+00:128>>} : (tensor<1x2xf32>) -> tensor<1x2x!quant.uniform<u8:f32, 2.000000e+00:128>>
+// CHECK: %[[RQ0:.*]] = "tfl.quantize"(%[[Q0]]) {qtype = tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<1x2x!quant.uniform<u8:f32, 2.000000e+00:128>>) -> tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: %[[DQ0:.*]] = "tfl.dequantize"(%[[RQ0]]) : (tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<1x2xf32>
+// CHECK: %[[CONC:.*]] = "tfl.concatenation"(%[[DQ0]], %[[DQ1]]) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<2x2xf32>
+// CHECK: %[[Q:.*]] = "tfl.quantize"(%[[CONC]]) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: return %[[Q]] : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
 }
 
 // CHECK-LABEL: QuantizeConcatResToAllRequantizeArg
@@ -409,13 +409,13 @@ func @QuantizeConcatResToAllRequantizeArg(tensor<1x2x!quant.uniform<u8:f32, 2.0:
   %3 = "tfl.quantize"(%2) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
   return %3 : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
 
-// CHECK %1 = "tfl.quantize"(%arg0) {qtype = tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<1x2x!quant.uniform<u8:f32, 2.000000e+00:128>>) -> tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
-// CHECK %2 = "tfl.dequantize"(%1) : (tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<1x2xf32>
-// CHECK %3 = "tfl.quantize"(%arg1) {qtype = tensor<2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<1x2xf32>) -> tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
-// CHECK %4 = "tfl.dequantize"(%3) : (tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<1x2xf32>
-// CHECK %5 = "tfl.concatenation"(%2, %4) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<2x2xf32>
-// CHECK %6 = "tfl.quantize"(%5) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
-// CHECK return %6 : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: %[[Q1:.*]] =  "tfl.quantize"(%arg1) {qtype = tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<1x2xf32>) -> tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: %[[DQ1:.*]] = "tfl.dequantize"(%[[Q1]]) : (tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<1x2xf32>
+// CHECK: %[[RQ0:.*]] = "tfl.quantize"(%arg0) {qtype = tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<1x2x!quant.uniform<u8:f32, 2.000000e+00:128>>) -> tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: %[[DQ0:.*]] = "tfl.dequantize"(%[[RQ0]]) : (tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>) -> tensor<1x2xf32>
+// CHECK: %[[CONC:.*]] = "tfl.concatenation"(%[[DQ0]], %[[DQ1]]) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<2x2xf32>
+// CHECK: %[[Q:.*]] = "tfl.quantize"(%[[CONC]]) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: return %[[Q]] : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
 }
 
 // CHECK-LABEL: RequantizeAlreadyQuantizedModel
diff --git a/tensorflow/compiler/mlir/lite/tests/quantize.mlir b/tensorflow/compiler/mlir/lite/tests/quantize.mlir
index 225123eb3d3..89d1e7cb7f4 100644
--- a/tensorflow/compiler/mlir/lite/tests/quantize.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/quantize.mlir
@@ -204,8 +204,9 @@ func @QuantizeConcatRequantize(tensor<1x2x!quant.uniform<u8:f32, 2.0:128>>, tens
   %3 = "tfl.quantize"(%2) {qtype = tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
   return %3 : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
 
-// CHECK: %[[q:.*]] = "tfl.quantize"(%arg1) {qtype = tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>}
-// CHECK: %[[cc:.*]] = "tfl.concatenation"(%arg0, %[[q]]) {axis = 0 : i32, fused_activation_function = "NONE"}
+// CHECK: %[[q1:.*]] = "tfl.quantize"(%arg1) {qtype = tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>}
+// CHECK: %[[q0:.*]] = "tfl.quantize"(%arg0) {qtype = tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>} : (tensor<1x2x!quant.uniform<u8:f32, 2.000000e+00:128>>) -> tensor<1x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
+// CHECK: %[[cc:.*]] = "tfl.concatenation"(%[[q0]], %[[q1]]) {axis = 0 : i32, fused_activation_function = "NONE"}
 // CHECK: return %[[cc]] : tensor<2x2x!quant.uniform<u8:f32, 1.000000e-01:128>>
 }
 
diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc
index 58ff9ce9d2e..e2cf3f9012a 100644
--- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc
+++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc
@@ -15,11 +15,11 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/lite/tf_tfl_passes.h"
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_passes.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.h b/tensorflow/compiler/mlir/lite/tf_tfl_passes.h
index 7d5b28356dd..651248b1059 100644
--- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.h
+++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_TF_TFL_PASSES_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_TF_TFL_PASSES_H_
 
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"
 
 namespace tensorflow {
diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc
index aa7e4f21c74..648f469e9b0 100644
--- a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc
+++ b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc
@@ -20,11 +20,11 @@ limitations under the License.
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Support/FileUtilities.h"  // TF:local_config_mlir
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Support/FileUtilities.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/init_mlir.h"
 #include "tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"
 #include "tensorflow/compiler/mlir/lite/flatbuffer_translate.h"
@@ -103,7 +103,7 @@ static int PrintFunctionResultMapping(const std::string &result,
     i = 0;
     for (auto output : *subgraph->outputs()) {
       print_buffer(*subgraph, i, output, [&](int i) {
-        return terminator ? terminator->getOperand(i)->getLoc() : unknown_loc;
+        return terminator ? terminator->getOperand(i).getLoc() : unknown_loc;
       });
     }
   }
diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc
index bab2ffff7cb..71deb4a8cb3 100644
--- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc
+++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc
@@ -15,12 +15,12 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h"
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Parser.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/FileUtilities.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Parser.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/FileUtilities.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/flatbuffer_translate.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h
index 0f6b2f384f0..6f002af463b 100644
--- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h
+++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h
@@ -17,9 +17,9 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_LITE_TF_TO_TFL_FLATBUFFER_H_
 
 #include "llvm/Support/SourceMgr.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
diff --git a/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc b/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
index 52eb6216e90..7aab9f08732 100644
--- a/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
@@ -21,26 +21,26 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Analysis/LoopAnalysis.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Analysis/LoopAnalysis.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
 #include "tensorflow/compiler/mlir/lite/utils/attribute_utils.h"
@@ -188,10 +188,10 @@ struct OphintCompositeOp {
 
   // This function will process the aggregated inputs based on different
   // strategies like "first", "last", "stack".
-  std::map<int, Value*> GetAggregatedInputs(OpBuilder* builder) {
-    std::map<int, Value*> aggregated_inputs;
+  std::map<int, Value> GetAggregatedInputs(OpBuilder* builder) {
+    std::map<int, Value> aggregated_inputs;
     for (const auto& kv : inputs) {
-      Value* op_input = nullptr;
+      Value op_input = nullptr;
       const AggregatedOperand& operand = kv.second;
       // Dealing with "stack" strategy:
       // This breaks into two parts:
@@ -203,9 +203,9 @@ struct OphintCompositeOp {
         if (operand.ops.size() == 1) {
           // If ops size is 1, it will be simply expanding dimensions at dim 0.
           Operation* current_identity_op = operand.ops.begin()->second;
-          Value* input = current_identity_op->getOperand(0);
+          Value input = current_identity_op->getOperand(0);
           RankedTensorType input_type =
-              input->getType().cast<RankedTensorType>();
+              input.getType().cast<RankedTensorType>();
           // The Reshape will be {1, (original_shape)}
           SmallVector<int64_t, 4> reshape_op_shape;
           reshape_op_shape.push_back(1);
@@ -234,21 +234,21 @@ struct OphintCompositeOp {
 
         } else {
           // Insert a pack op to pack all the inputs together.
-          std::vector<Value*> pack_input_operands;
-          std::vector<Value*> packed_input_consumers;
+          std::vector<Value> pack_input_operands;
+          std::vector<Value> packed_input_consumers;
           for (int i = 0, e = operand.ops.size(); i < e; ++i) {
             pack_input_operands.push_back(operand.ops.at(i)->getOperand(0));
             packed_input_consumers.push_back(operand.ops.at(i)->getResult(0));
           }
           // Find the first op that consumes the last value of the aggregated
           // inputs.
-          Operation* first_use = *(packed_input_consumers.back()->user_begin());
+          Operation* first_use = *(packed_input_consumers.back().user_begin());
           // The pack reshape will be {N, (original_shape)}
           SmallVector<int64_t, 4> pack_shape;
           pack_shape.push_back(pack_input_operands.size());
           RankedTensorType type = operand.ops.at(0)
                                       ->getResult(0)
-                                      ->getType()
+                                      .getType()
                                       .cast<RankedTensorType>();
           for (const auto& dim : type.getShape()) {
             pack_shape.push_back(dim);
@@ -288,9 +288,9 @@ struct OphintCompositeOp {
       const AggregatedOperand& operand = kv.second;
       if (operand.aggregation == kStrategyStack) {
         const int output_numer = operand.ops.size();
-        Value* first_output = operand.ops.at(0)->getOperand(0);
+        Value first_output = operand.ops.at(0)->getOperand(0);
         RankedTensorType first_output_type =
-            first_output->getType().cast<RankedTensorType>();
+            first_output.getType().cast<RankedTensorType>();
         // The aggregated output shape will be {N, original_shape}.
         SmallVector<int64_t, 4> shape;
         shape.push_back(output_numer);
@@ -300,12 +300,12 @@ struct OphintCompositeOp {
         aggregated_output_types[kv.first] =
             RankedTensorType::get(shape, first_output_type.getElementType());
       } else if (operand.aggregation == kStrategyLast) {
-        Value* last_output =
+        Value last_output =
             operand.ops.at(operand.ops.size() - 1)->getOperand(0);
-        aggregated_output_types[kv.first] = last_output->getType();
+        aggregated_output_types[kv.first] = last_output.getType();
       } else {
-        Value* first_output = operand.ops.at(0)->getOperand(0);
-        aggregated_output_types[kv.first] = first_output->getType();
+        Value first_output = operand.ops.at(0)->getOperand(0);
+        aggregated_output_types[kv.first] = first_output.getType();
       }
     }
     return aggregated_output_types;
@@ -329,7 +329,7 @@ struct OphintCompositeOp {
         Operation* first_output = operand.ops.at(0);
         Location insert_loc = first_output->getLoc();
         SmallVector<Type, 4> unpack_output_types(
-            output_number, first_output->getOperand(0)->getType());
+            output_number, first_output->getOperand(0).getType());
 
         builder->setInsertionPoint(first_output);
         Operation* unpack_op = builder->create<TFL::UnpackOp>(
@@ -404,7 +404,7 @@ void PreprocessTopoSortGraph(
       // should only count as one.
       llvm::DenseSet<Operation*> input_ops;
       for (int i = 0; i < op.getNumOperands(); ++i) {
-        Operation* input_op = op.getOperand(i)->getDefiningOp();
+        Operation* input_op = op.getOperand(i).getDefiningOp();
         if (input_op) input_ops.insert(input_op);
       }
       if (input_ops.empty()) {
@@ -507,15 +507,15 @@ LogicalResult TopoSortOperations(OpBuilder* builder) {
 
 Operation* BuildFusedFuncOp(StringRef func_name, StringRef fused_func_type,
                             Operation* insert_before_op,
-                            const std::map<int, Value*>& inputs,
+                            const std::map<int, Value>& inputs,
                             const std::map<int, Type>& output_types,
                             OpBuilder* builder, ModuleOp* module_op) {
   SmallVector<Type, 4> input_types;
-  SmallVector<Value*, 4> input_values;
+  SmallVector<Value, 4> input_values;
   SmallVector<int, 4> input_indexes;
   for (const auto& kv : inputs) {
-    Value* input = kv.second;
-    input_types.push_back(input->getType());
+    Value input = kv.second;
+    input_types.push_back(input.getType());
     input_values.push_back(input);
     input_indexes.push_back(kv.first);
   }
@@ -588,8 +588,8 @@ llvm::DenseSet<Operation*> BfsForReachableOps(ArrayRef<Operation*> input_ops) {
   llvm::DenseSet<Operation*> reachable_ops;
   std::queue<Operation*> ops_queue;
   for (auto& input_op : input_ops) {
-    for (Value* value : input_op->getOperands()) {
-      Operation* op = value->getDefiningOp();
+    for (Value value : input_op->getOperands()) {
+      Operation* op = value.getDefiningOp();
       if (op != nullptr) ops_queue.push(op);
     }
   }
@@ -598,8 +598,8 @@ llvm::DenseSet<Operation*> BfsForReachableOps(ArrayRef<Operation*> input_ops) {
     Operation* current_op = ops_queue.front();
     ops_queue.pop();
     reachable_ops.insert(current_op);
-    for (Value* value : current_op->getOperands()) {
-      Operation* upstream_op = value->getDefiningOp();
+    for (Value value : current_op->getOperands()) {
+      Operation* upstream_op = value.getDefiningOp();
       // Not visited, put it into the queue.
       if (upstream_op != nullptr &&
           !llvm::is_contained(reachable_ops, upstream_op)) {
@@ -625,7 +625,7 @@ LogicalResult ConvertOphintToStub(StringRef stub_name,
       BfsForReachableOps(ophint_composite_op.GetAllOutputOps());
 
   // Step 3, deal with inputs aggregation strategies.
-  const std::map<int, Value*>& aggregated_inputs =
+  const std::map<int, Value>& aggregated_inputs =
       ophint_composite_op.GetAggregatedInputs(builder);
 
   // Step 4, get aggregated output types.
@@ -642,7 +642,7 @@ LogicalResult ConvertOphintToStub(StringRef stub_name,
       aggregated_inputs, aggregated_output_types, builder, module_op);
 
   for (const auto& kv : aggregated_inputs) {
-    Operation* op = kv.second->getDefiningOp();
+    Operation* op = kv.second.getDefiningOp();
     if (op == nullptr) return failure();
     op->moveBefore(fused_op);
   }
diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_ophint_func_op.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_ophint_func_op.cc
index ed3a9ea5000..e31b143ab43 100644
--- a/tensorflow/compiler/mlir/lite/transforms/legalize_ophint_func_op.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/legalize_ophint_func_op.cc
@@ -15,23 +15,23 @@ limitations under the License.
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 
 namespace mlir {
@@ -92,18 +92,18 @@ LogicalResult BuildUnidirectionalSequenceRnnOp(FuncOp composite_func_op,
   if (call_op.getNumResults() != 1) return failure();
 
   // Inputs is indexed at 0.
-  Value* input = call_op.getOperand(0);
+  Value input = call_op.getOperand(0);
   // Input_weight is indexed at 1.
-  Value* weight = call_op.getOperand(1);
+  Value weight = call_op.getOperand(1);
   // Recurrent_weight is indexed at 2.
-  Value* recurrent_weight = call_op.getOperand(2);
+  Value recurrent_weight = call_op.getOperand(2);
   // Bias is indexed at 3.
-  Value* bias = call_op.getOperand(3);
+  Value bias = call_op.getOperand(3);
   // Hidden_state is indexed at 4.
-  Value* hidden_state = call_op.getOperand(4);
+  Value hidden_state = call_op.getOperand(4);
 
   // Build Output.
-  auto output_type = call_op.getResult(0)->getType();
+  auto output_type = call_op.getResult(0).getType();
 
   // Currently, ophinted RNN only supports time_major = True.
   const bool time_major = true;
@@ -127,7 +127,7 @@ LogicalResult BuildUnidirectionalSequenceLSTMOp(FuncOp composite_func_op,
   auto input_index_attr = composite_func_op.getAttr(kTfLiteFunctionInputIndex)
                               .cast<ArrayAttr>()
                               .getValue();
-  llvm::DenseMap<int, Value*> fused_ops_index_to_call_op_args;
+  llvm::DenseMap<int, Value> fused_ops_index_to_call_op_args;
 
   for (int i = 0; i < call_op.getNumOperands(); ++i) {
     int input_index = input_index_attr[i].cast<IntegerAttr>().getInt();
@@ -139,7 +139,7 @@ LogicalResult BuildUnidirectionalSequenceLSTMOp(FuncOp composite_func_op,
 
   // We encounter some optional arguments not filled, so we need to create an
   // empty Value.
-  Value* none_value;
+  Value none_value;
   if (call_op.getNumOperands() <
       kUnidirectionalSequenceLSTMOpTotalIArgumentNum) {
     builder->setInsertionPoint(call_op.getOperation());
@@ -148,7 +148,7 @@ LogicalResult BuildUnidirectionalSequenceLSTMOp(FuncOp composite_func_op,
   }
 
   // Prepare all operands for the UnidirectionalSequenceLSTMOp.
-  SmallVector<Value*, kUnidirectionalSequenceLSTMOpTotalIArgumentNum> operands;
+  SmallVector<Value, kUnidirectionalSequenceLSTMOpTotalIArgumentNum> operands;
   for (int i = 0; i < kUnidirectionalSequenceLSTMOpTotalIArgumentNum; ++i) {
     auto operand_it = fused_ops_index_to_call_op_args.find(i);
     if (operand_it == fused_ops_index_to_call_op_args.end()) {
@@ -169,12 +169,12 @@ LogicalResult BuildUnidirectionalSequenceLSTMOp(FuncOp composite_func_op,
   if (call_op.getNumResults() > 1) {
     for (int i = 0; i < call_op.getNumResults() - 1; ++i) {
       // This one should not be used.
-      Value* unused_output = call_op.getResult(i);
-      if (!unused_output->use_empty()) return failure();
+      Value unused_output = call_op.getResult(i);
+      if (!unused_output.use_empty()) return failure();
     }
   }
   output_types.push_back(
-      call_op.getResult(call_op.getNumResults() - 1)->getType());
+      call_op.getResult(call_op.getNumResults() - 1).getType());
 
   // Prepare attributes.
   SmallVector<NamedAttribute, 4> attributes;
@@ -206,11 +206,11 @@ LogicalResult ConvertTfLiteFusedOpIfAvailable(StringRef func_name,
     LogicalResult build_fused_op_result = BuildUnidirectionalSequenceLSTMOp(
         composite_func_op, call_op, builder, &fused_op);
     if (failed(build_fused_op_result)) return build_fused_op_result;
-    Value* call_output = call_op.getResult(call_op.getNumResults() - 1);
-    if (call_output->getType() != fused_op->getResult(0)->getType()) {
+    Value call_output = call_op.getResult(call_op.getNumResults() - 1);
+    if (call_output.getType() != fused_op->getResult(0).getType()) {
       return failure();
     }
-    call_output->replaceAllUsesWith(fused_op->getResult(0));
+    call_output.replaceAllUsesWith(fused_op->getResult(0));
   } else {  // If we support more fused op, we should add the conversion here.
     return failure();
   }
diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
index 9e9dfa5874f..596809d3bcb 100644
--- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
@@ -39,7 +39,7 @@ def Merge2AttrsToArray : NativeCodeCall<"$_builder.getArrayAttr({$0, $1})">;
 // Use the tensor type information from $0 and convert min $1, max $2 and
 // numBits $3 and narrowRange $4 to a QuantizedType.
 def ConvertToQuantTypeFromAttrs : NativeCodeCall<
-    "GetQuantizedTypeAttr($_builder, $0->getType(), $1, $2, -1, $3, $4, /*is_signed=*/false)">;
+    "GetQuantizedTypeAttr($_builder, $0.getType(), $1, $2, -1, $3, $4, /*is_signed=*/false)">;
 
 // Converts an integer attribute $0 to 32-bit with builder.
 def convertIntAttrTo32Bit : NativeCodeCall<
@@ -49,6 +49,11 @@ def convertIntAttrTo32Bit : NativeCodeCall<
 def ExtractSingleElementAsInteger : NativeCodeCall<
     "ExtractSingleElementAsInteger($_self.cast<ElementsAttr>())">;
 
+// Checks whether the given operation has static shapes and same shapes of all inputs.
+def HasSameStaticShapesPred : CPred<"HasSameStaticShapes($0.getDefiningOp())">;
+def HasSameStaticShapes : Constraint<HasSameStaticShapesPred, "op must have static same input shapes">;
+def HasNotSameStaticShapes : Constraint<Neg<HasSameStaticShapesPred>, "op must have not static same input shapes">;
+
 //===----------------------------------------------------------------------===//
 // Nullary ops patterns.
 //===----------------------------------------------------------------------===//
@@ -145,10 +150,9 @@ def : Pat<(TF_RoundOp $arg), (TFL_RoundOp $arg)>;
 def : Pat<(TF_RsqrtOp $arg), (TFL_RsqrtOp $arg)>;
 def : Pat<(TF_SqrtOp $arg), (TFL_SqrtOp $arg)>;
 def : Pat<(TF_SquareOp $arg), (TFL_SquareOp $arg)>;
-// TODO(jpienaar): this is not true for all selects, TF's select supports rank 0
-// condition
 def : Pat<(TF_SelectOp $cond, $x, $y), (TFL_SelectOp $cond, $x, $y)>;
-def : Pat<(TF_SelectV2Op $cond, $x, $y), (TFL_SelectOp $cond, $x, $y)>;
+def : Pat<(TF_SelectV2Op:$src_op $cond, $x, $y), (TFL_SelectOp $cond, $x, $y), [(HasSameStaticShapes $src_op)]>;
+def : Pat<(TF_SelectV2Op:$src_op $cond, $x, $y), (TFL_SelectV2Op $cond, $x, $y), [(HasNotSameStaticShapes $src_op)]>;
 def : Pat<(TF_ShapeOp $arg), (TFL_ShapeOp $arg)>;
 def : Pat<(TF_SigmoidOp $arg), (TFL_LogisticOp $arg)>;
 def : Pat<(TF_SinOp F32Tensor:$arg), (TFL_SinOp $arg)>;
diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
index 698ba4d4483..5513f2ad546 100644
--- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
@@ -28,15 +28,15 @@ limitations under the License.
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
@@ -66,6 +66,28 @@ struct LegalizeTF : public FunctionPass<LegalizeTF> {
   void runOnFunction() override;
 };
 
+// Returns true if all tensor value in `values` has static shape and same shape.
+bool HasSameStaticShapes(Operation* op) {
+  auto values = op->getOperands();
+  int index = 0;
+  ArrayRef<int64_t> shape;
+  for (Value value : values) {
+    auto shaped_type = value.getType().dyn_cast<ShapedType>();
+    if (!shaped_type && !shaped_type.hasStaticShape()) {
+      return false;
+    }
+    if (index == 0) {
+      shape = shaped_type.getShape();
+    } else {
+      if (shape != shaped_type.getShape()) {
+        return false;
+      }
+    }
+    ++index;
+  }
+  return true;
+}
+
 #include "tensorflow/compiler/mlir/lite/transforms/generated_legalize_tf.inc"
 
 #define DECL_CONVERT_OP(tf_op)                                             \
@@ -100,7 +122,7 @@ PatternMatchResult ConvertTFConcatOp::matchAndRewrite(
   auto tf_concat_op = cast<TF::ConcatOp>(op);
 
   auto values = tf_concat_op.values();
-  auto output_type = tf_concat_op.output()->getType();
+  auto output_type = tf_concat_op.output().getType();
   // Extract axis attribute from constant concat_dims tensor
   ElementsAttr axis;
   if (!matchPattern(tf_concat_op.concat_dim(), m_Constant(&axis)))
@@ -119,7 +141,7 @@ PatternMatchResult ConvertTFConcatV2Op::matchAndRewrite(
   auto tf_concat_op = cast<TF::ConcatV2Op>(op);
 
   auto values = tf_concat_op.values();
-  auto output_type = tf_concat_op.output()->getType();
+  auto output_type = tf_concat_op.output().getType();
   // Extract axis attribute from constant axis tensor
   ElementsAttr axis;
   if (!matchPattern(tf_concat_op.axis(), m_Constant(&axis)))
@@ -145,7 +167,7 @@ PatternMatchResult ConvertTFMatMulOp::matchAndRewrite(
   if (tf_matmul_op.transpose_a()) return matchFailure();
   if (!tf_matmul_op.transpose_b()) return matchFailure();
 
-  Type output_type = tf_matmul_op.getResult()->getType();
+  Type output_type = tf_matmul_op.getResult().getType();
   // TODO(jpienaar): Follow up post shuffle discussion.
   auto no_input = rewriter.create<ConstantOp>(
       op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr());
@@ -161,8 +183,8 @@ PatternMatchResult ConvertTFPackOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_pack_op = cast<TF::PackOp>(op);
 
-  SmallVector<Value*, 4> values(tf_pack_op.values());
-  auto output_type = tf_pack_op.output()->getType();
+  SmallVector<Value, 4> values(tf_pack_op.values());
+  auto output_type = tf_pack_op.output().getType();
   auto values_count = rewriter.getI32IntegerAttr(tf_pack_op.N());
   // Axis can be negative.
   auto axis = rewriter.getI32IntegerAttr(tf_pack_op.axis().getSExtValue());
@@ -176,10 +198,10 @@ PatternMatchResult ConvertTFReshapeOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_reshape_op = cast<TF::ReshapeOp>(op);
 
-  auto* input = tf_reshape_op.tensor();
-  auto* shape = tf_reshape_op.shape();
+  auto input = tf_reshape_op.tensor();
+  auto shape = tf_reshape_op.shape();
 
-  ShapedType shape_type = shape->getType().cast<ShapedType>();
+  ShapedType shape_type = shape.getType().cast<ShapedType>();
   // The tfl reshape's #2 operand needs to i32 tensor type, so we have to cast.
   if (!shape_type.getElementType().isInteger(32)) {
     auto new_shape = shape_type.getShape();
@@ -191,7 +213,7 @@ PatternMatchResult ConvertTFReshapeOp::matchAndRewrite(
                                     rewriter.getBoolAttr(false))
                 .y();
   }
-  rewriter.replaceOpWithNewOp<ReshapeOp>(op, tf_reshape_op.output()->getType(),
+  rewriter.replaceOpWithNewOp<ReshapeOp>(op, tf_reshape_op.output().getType(),
                                          input, shape);
   return matchSuccess();
 }
@@ -200,7 +222,7 @@ PatternMatchResult ConvertTFSplitOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_split_op = cast<TF::SplitOp>(op);
 
-  auto output_types = functional::map([](Value* v) { return v->getType(); },
+  auto output_types = functional::map([](Value v) { return v.getType(); },
                                       tf_split_op.output());
   // Number of splits cannot be negative.
   auto num_split = rewriter.getI32IntegerAttr(tf_split_op.num_split());
@@ -215,7 +237,7 @@ PatternMatchResult ConvertTFSplitVOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_splitv_op = cast<TF::SplitVOp>(op);
 
-  auto output_types = functional::map([](Value* v) { return v->getType(); },
+  auto output_types = functional::map([](Value v) { return v.getType(); },
                                       tf_splitv_op.output());
   // Number of splits cannot be negative.
   auto num_split = rewriter.getI32IntegerAttr(tf_splitv_op.num_split());
@@ -226,13 +248,13 @@ PatternMatchResult ConvertTFSplitVOp::matchAndRewrite(
   return matchSuccess();
 }
 
-Value* PadStridedSliceAttributeArray(Operation* op, PatternRewriter& rewriter,
-                                     Value* attribute,
-                                     ArrayRef<int32_t> padding_val, int* mask) {
+Value PadStridedSliceAttributeArray(Operation* op, PatternRewriter& rewriter,
+                                    Value attribute,
+                                    ArrayRef<int32_t> padding_val, int* mask) {
   DenseIntElementsAttr dense_elem_attr;
   SmallVector<int32_t, 8> padded_val;
 
-  auto ranked_attr_type = attribute->getType().dyn_cast<RankedTensorType>();
+  auto ranked_attr_type = attribute.getType().dyn_cast<RankedTensorType>();
   if (!ranked_attr_type ||
       !matchPattern(attribute, m_Constant(&dense_elem_attr))) {
     // If the input attribute is neither ranked type nor constant, we
@@ -258,14 +280,14 @@ PatternMatchResult ConvertTFStridedSliceOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_strided_slice_op = cast<TF::StridedSliceOp>(op);
   auto ranked_input_type =
-      tf_strided_slice_op.input()->getType().dyn_cast<RankedTensorType>();
+      tf_strided_slice_op.input().getType().dyn_cast<RankedTensorType>();
   if (!ranked_input_type) {
     // If input is not a ranked tensor, we can't deduce the padding dimensions
     // from it, so we just do a plain conversion here.
     rewriter.replaceOpWithNewOp<TFL::StridedSliceOp>(
-        op, tf_strided_slice_op.output()->getType(),
-        tf_strided_slice_op.input(), tf_strided_slice_op.begin(),
-        tf_strided_slice_op.end(), tf_strided_slice_op.strides(),
+        op, tf_strided_slice_op.output().getType(), tf_strided_slice_op.input(),
+        tf_strided_slice_op.begin(), tf_strided_slice_op.end(),
+        tf_strided_slice_op.strides(),
         rewriter.getI32IntegerAttr(
             tf_strided_slice_op.begin_mask().getSExtValue()),
         rewriter.getI32IntegerAttr(
@@ -283,20 +305,20 @@ PatternMatchResult ConvertTFStridedSliceOp::matchAndRewrite(
   // Pad `begin` array with zero values and update the `begin_mask`.
   SmallVector<int32_t, 8> begin_pad_val(num_input_dims, 0);
   int begin_mask = tf_strided_slice_op.begin_mask().getSExtValue();
-  Value* padded_begin = PadStridedSliceAttributeArray(
+  Value padded_begin = PadStridedSliceAttributeArray(
       op, rewriter, tf_strided_slice_op.begin(), begin_pad_val, &begin_mask);
   // Pad `end` array with `input_shape` and update the `end_mask`.
   int end_mask = tf_strided_slice_op.end_mask().getSExtValue();
   auto input_shape = ranked_input_type.getShape();
   SmallVector<int32_t, 8> end_pad_val(input_shape.begin(), input_shape.end());
-  Value* padded_end = PadStridedSliceAttributeArray(
+  Value padded_end = PadStridedSliceAttributeArray(
       op, rewriter, tf_strided_slice_op.end(), end_pad_val, &end_mask);
   // Pad `strides` array with ones.
   SmallVector<int32_t, 8> strides_pad_val(num_input_dims, 1);
-  Value* padded_strides = PadStridedSliceAttributeArray(
+  Value padded_strides = PadStridedSliceAttributeArray(
       op, rewriter, tf_strided_slice_op.strides(), strides_pad_val, nullptr);
   rewriter.replaceOpWithNewOp<TFL::StridedSliceOp>(
-      op, tf_strided_slice_op.output()->getType(), tf_strided_slice_op.input(),
+      op, tf_strided_slice_op.output().getType(), tf_strided_slice_op.input(),
       padded_begin, padded_end, padded_strides,
       rewriter.getI32IntegerAttr(begin_mask),
       rewriter.getI32IntegerAttr(end_mask),
@@ -313,8 +335,8 @@ PatternMatchResult ConvertTFUnpackOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_unpack_op = cast<TF::UnpackOp>(op);
 
-  auto* input = tf_unpack_op.value();
-  auto output_types = functional::map([](Value* v) { return v->getType(); },
+  auto input = tf_unpack_op.value();
+  auto output_types = functional::map([](Value v) { return v.getType(); },
                                       tf_unpack_op.output());
   auto num = rewriter.getI32IntegerAttr(tf_unpack_op.num());
   // Axis can be negative.
@@ -338,7 +360,7 @@ bool ConvertTFMatrixDiagV2orV3(Operation* op, PatternRewriter* rewriter) {
   if (tf_matrix_diag_v2_or_v3_op.getNumOperands() != 5) return false;
 
   auto input = tf_matrix_diag_v2_or_v3_op.diagonal();
-  auto output_type = tf_matrix_diag_v2_or_v3_op.output()->getType();
+  auto output_type = tf_matrix_diag_v2_or_v3_op.output().getType();
 
   // Extract k constant tensor and check value = 0.
   ElementsAttr k;
@@ -478,7 +500,7 @@ PatternMatchResult ConvertTFReciprocalOp::matchAndRewrite(
 
   auto status_or_const_op = CreateConstOpWithSingleValue(
       &rewriter, op->getLoc(),
-      tf_reciprocal_op.x()->getType().cast<ShapedType>(), 1);
+      tf_reciprocal_op.x().getType().cast<ShapedType>(), 1);
   if (!status_or_const_op.ok()) {
     return matchFailure();
   }
diff --git a/tensorflow/compiler/mlir/lite/transforms/load_quantization_recipe.cc b/tensorflow/compiler/mlir/lite/transforms/load_quantization_recipe.cc
index f1668b0ffb9..3349261af02 100644
--- a/tensorflow/compiler/mlir/lite/transforms/load_quantization_recipe.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/load_quantization_recipe.cc
@@ -19,11 +19,11 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
@@ -50,13 +50,13 @@ struct LoadQuantizationRecipe : public FunctionPass<LoadQuantizationRecipe> {
 
   // Create LSTM gates with different weights for input, recurrent and
   // cell state, and also the layer normalization parameters.
-  Operation* CreateGate(Location loc, Value* in, Value* in_w, Value* rec,
-                        Value* rec_w,
-                        llvm::Optional<std::pair<Value*, Value*>> cell,
-                        Value* ln_w, Value* ln_bias, OpBuilder* builder);
+  Operation* CreateGate(Location loc, Value in, Value in_w, Value rec,
+                        Value rec_w,
+                        llvm::Optional<std::pair<Value, Value>> cell,
+                        Value ln_w, Value ln_bias, OpBuilder* builder);
 
-  Operation* CreateLayerNorm(Location loc, Value* in, Value* ln_w,
-                             Value* ln_bias, OpBuilder* builder);
+  Operation* CreateLayerNorm(Location loc, Value in, Value ln_w, Value ln_bias,
+                             OpBuilder* builder);
 
   // Add the internal implementation of the LSTM to its regions.
   void LoadForLSTMOp(LSTMOp lstm, OpBuilder* builder);
@@ -71,7 +71,7 @@ struct LoadQuantizationRecipe : public FunctionPass<LoadQuantizationRecipe> {
 
 void LoadQuantizationRecipe::Initialize(LSTMOp lstm, OpBuilder* builder) {
   Type expressed_type =
-      lstm.input()->getType().cast<ShapedType>().getElementType();
+      lstm.input().getType().cast<ShapedType>().getElementType();
   Type int8_storage_type = builder->getIntegerType(8);
   Type int16_storage_type = builder->getIntegerType(16);
   auto flag = quant::QuantizationFlags::FlagValue::Signed;
@@ -88,12 +88,12 @@ void LoadQuantizationRecipe::Initialize(LSTMOp lstm, OpBuilder* builder) {
   auto any_int16 = quant::AnyQuantizedType::get(
       flag, int16_storage_type, expressed_type, int16_min, int16_max);
 
-  int8 = any_int8.castFromExpressedType(lstm.input()->getType());
-  int16 = any_int16.castFromExpressedType(lstm.input()->getType());
+  int8 = any_int8.castFromExpressedType(lstm.input().getType());
+  int16 = any_int16.castFromExpressedType(lstm.input().getType());
 }
 
-Operation* LoadQuantizationRecipe::CreateLayerNorm(Location loc, Value* in,
-                                                   Value* ln_w, Value* ln_bias,
+Operation* LoadQuantizationRecipe::CreateLayerNorm(Location loc, Value in,
+                                                   Value ln_w, Value ln_bias,
                                                    OpBuilder* builder) {
   // Note that l2_normalization and add ops here are not the execution kernel
   // implementation for layer_normalization and we just want to use them to
@@ -105,8 +105,8 @@ Operation* LoadQuantizationRecipe::CreateLayerNorm(Location loc, Value* in,
 }
 
 Operation* LoadQuantizationRecipe::CreateGate(
-    Location loc, Value* in, Value* in_w, Value* rec, Value* rec_w,
-    llvm::Optional<std::pair<Value*, Value*>> cell, Value* ln_w, Value* ln_bias,
+    Location loc, Value in, Value in_w, Value rec, Value rec_w,
+    llvm::Optional<std::pair<Value, Value>> cell, Value ln_w, Value ln_bias,
     OpBuilder* builder) {
   auto s1 = builder->create<FullyConnectedOp>(loc, int16, in, in_w, none_cst,
                                               none_af, fc_format, keep_dims);
@@ -119,13 +119,13 @@ Operation* LoadQuantizationRecipe::CreateGate(
                                      cell.getValue().second, none_af);
     s4 = builder->create<AddNOp>(
         loc, int16,
-        llvm::ArrayRef<Value*>(
+        llvm::ArrayRef<Value>(
             {*s1.output().begin(), *s2.output().begin(), s3.output()}));
 
   } else {
     s4 = builder->create<AddNOp>(
         loc, int16,
-        llvm::ArrayRef<Value*>({*s1.output().begin(), *s2.output().begin()}));
+        llvm::ArrayRef<Value>({*s1.output().begin(), *s2.output().begin()}));
   }
 
   auto s5 = CreateLayerNorm(loc, s4.sum(), ln_w, ln_bias, builder);
@@ -144,22 +144,20 @@ void LoadQuantizationRecipe::LoadForLSTMOp(LSTMOp lstm, OpBuilder* builder) {
   region.push_back(new Block);
   builder->setInsertionPointToEnd(&region.front());
   Location loc = lstm.getLoc();
-  Type int32_type = builder->getIntegerType(32);
-  Type int32_tensor = UnrankedTensorType::get(int32_type);
   none_cst = builder->create<ConstantOp>(loc, builder->getNoneType(),
                                          builder->getUnitAttr());
 
   auto input_gate = CreateGate(
       loc, lstm.input(), lstm.input_to_input_weights(),
       lstm.input_activation_state(), lstm.recurrent_to_input_weights(),
-      llvm::Optional<std::pair<Value*, Value*>>(
+      llvm::Optional<std::pair<Value, Value>>(
           {lstm.input_cell_state(), lstm.cell_to_input_weights()}),
       lstm.input_layer_norm_coefficients(), lstm.input_gate_bias(), builder);
 
   auto forget_gate = CreateGate(
       loc, lstm.input(), lstm.input_to_forget_weights(),
       lstm.input_activation_state(), lstm.recurrent_to_forget_weights(),
-      llvm::Optional<std::pair<Value*, Value*>>(
+      llvm::Optional<std::pair<Value, Value>>(
           {lstm.input_cell_state(), lstm.cell_to_forget_weights()}),
       lstm.forget_layer_norm_coefficients(), lstm.forget_gate_bias(), builder);
 
@@ -179,7 +177,7 @@ void LoadQuantizationRecipe::LoadForLSTMOp(LSTMOp lstm, OpBuilder* builder) {
   auto output_gate = CreateGate(
       loc, lstm.input(), lstm.input_to_output_weights(),
       lstm.input_activation_state(), lstm.recurrent_to_output_weights(),
-      llvm::Optional<std::pair<Value*, Value*>>(
+      llvm::Optional<std::pair<Value, Value>>(
           {new_cell, lstm.cell_to_output_weights()}),
       lstm.output_layer_norm_coefficients(), lstm.output_gate_bias(), builder);
 
diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
index 7c02342eedd..bc8d9162b78 100644
--- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
@@ -29,28 +29,28 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Analysis/LoopAnalysis.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
+#include "mlir/Analysis/LoopAnalysis.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
 #include "tensorflow/compiler/mlir/lite/utils/attribute_utils.h"
@@ -84,8 +84,8 @@ struct LowerStaticTensorListPass
                                 TensorListPatternRewriter *rewriter);
 };
 
-Value *CreateI32SplatConst(Location loc, PatternRewriter *rewriter,
-                           ArrayRef<int64_t> shape, int32_t val) {
+Value CreateI32SplatConst(Location loc, PatternRewriter *rewriter,
+                          ArrayRef<int64_t> shape, int32_t val) {
   RankedTensorType type =
       RankedTensorType::get(shape, rewriter->getIntegerType(32));
   DenseElementsAttr attr =
@@ -93,9 +93,9 @@ Value *CreateI32SplatConst(Location loc, PatternRewriter *rewriter,
   return rewriter->create<ConstantOp>(loc, type, attr);
 }
 
-Value *CreateI32SplatTensor(Location loc, PatternRewriter *rewriter,
-                            Value *shape_tensor, int32_t val) {
-  Value *scalar_val = CreateI32SplatConst(loc, rewriter, {}, val);
+Value CreateI32SplatTensor(Location loc, PatternRewriter *rewriter,
+                           Value shape_tensor, int32_t val) {
+  Value scalar_val = CreateI32SplatConst(loc, rewriter, {}, val);
   return rewriter->create<TF::FillOp>(
       loc, RankedTensorType::get({-1}, rewriter->getIntegerType(32)),
       shape_tensor, scalar_val);
@@ -131,32 +131,32 @@ Type GetTensorTypeForTensorList(Type element_type, TF::VariantType handle_dtype,
 // Requires that `start_index` and `size` are scalar tensors and
 // `item_position_shape` is a 1-D tensor with only one element equal to the rank
 // of an item in the tensorlist.
-TF::SliceOp CreateSliceOpForTensorList(Location loc, Value *input_list,
-                                       Value *start_index, Value *size,
-                                       Value *item_rank, Type result_type,
+TF::SliceOp CreateSliceOpForTensorList(Location loc, Value input_list,
+                                       Value start_index, Value size,
+                                       Value item_rank, Type result_type,
                                        PatternRewriter *rewriter) {
   // Create the start position of slice. This is done by concatenating
   // `start_index` and `partial_start_position` together.
   IntegerType shape_dtype = rewriter->getIntegerType(32);
   RankedTensorType position_type = RankedTensorType::get({-1}, shape_dtype);
-  Value *partial_start_position =
+  Value partial_start_position =
       CreateI32SplatTensor(loc, rewriter, item_rank, 0);
-  Value *scalar_zero = CreateI32SplatConst(loc, rewriter, {}, 0);
+  Value scalar_zero = CreateI32SplatConst(loc, rewriter, {}, 0);
   RankedTensorType vector_type = RankedTensorType::get({1}, shape_dtype);
   auto expanded_start_index = rewriter->create<TF::ExpandDimsOp>(
       loc, vector_type, start_index, scalar_zero);
   auto start_position = rewriter->create<TF::ConcatOp>(
       loc, position_type, scalar_zero,
-      ArrayRef<Value *>({expanded_start_index, partial_start_position}));
+      ArrayRef<Value>({expanded_start_index, partial_start_position}));
 
   // Create the slice size tensor. This is done by concatenating `size` and
   // `partial_size`.
   auto size_leading_dim =
       rewriter->create<TF::ExpandDimsOp>(loc, vector_type, size, scalar_zero);
-  Value *partial_size = CreateI32SplatTensor(loc, rewriter, item_rank, -1);
+  Value partial_size = CreateI32SplatTensor(loc, rewriter, item_rank, -1);
   auto slice_size = rewriter->create<TF::ConcatOp>(
       loc, position_type, scalar_zero,
-      ArrayRef<Value *>({size_leading_dim, partial_size}));
+      ArrayRef<Value>({size_leading_dim, partial_size}));
 
   return rewriter->create<TF::SliceOp>(loc, result_type, input_list,
                                        start_position, slice_size);
@@ -180,31 +180,31 @@ struct ConvertTensorListSetItem : public ConversionPattern {
   //        0), [-1, -1, ...])), (ExpandDims $item, expand_dim = 0), (Slice
   //        $input, [$index + 1, 0, 0, ...], [-1, -1, ...]))>;
   PatternMatchResult matchAndRewrite(
-      Operation *operation, ArrayRef<Value *> operands,
+      Operation *operation, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto op = llvm::cast<TF::TensorListSetItemOp>(operation);
     Location loc = op.getLoc();
-    Value *input = operands[0];
-    Value *index = operands[1];
-    Value *item = operands[2];
+    Value input = operands[0];
+    Value index = operands[1];
+    Value item = operands[2];
 
     IntegerType shape_dtype = rewriter.getIntegerType(32);
     auto item_rank = rewriter.create<TF::RankOp>(
         loc, RankedTensorType::get({}, shape_dtype), item);
-    Value *scalar_zero = CreateI32SplatConst(loc, &rewriter, {}, 0);
+    Value scalar_zero = CreateI32SplatConst(loc, &rewriter, {}, 0);
 
     // Calculate `index` + 1, which is used to generate the start position for
     // the second slice op.
     auto suffix_start =
-        rewriter.create<TF::AddOp>(loc, index->getType(), index,
+        rewriter.create<TF::AddOp>(loc, index.getType(), index,
                                    CreateI32SplatConst(loc, &rewriter, {}, 1));
 
     auto item_position_shape = rewriter.create<TF::ExpandDimsOp>(
         loc, RankedTensorType::get({1}, shape_dtype), item_rank, scalar_zero);
     // Create two slice ops.
-    Type element_type = input->getType().cast<TensorType>().getElementType();
+    Type element_type = input.getType().cast<TensorType>().getElementType();
     UnrankedTensorType unranked_tensor = UnrankedTensorType::get(element_type);
-    Value *scalar_minus_one = CreateI32SplatConst(loc, &rewriter, {}, -1);
+    Value scalar_minus_one = CreateI32SplatConst(loc, &rewriter, {}, -1);
     TF::SliceOp slice1 =
         CreateSliceOpForTensorList(loc, /*input_list=*/input,
                                    /*start_index=*/scalar_zero,
@@ -225,8 +225,8 @@ struct ConvertTensorListSetItem : public ConversionPattern {
 
     // Concatenate three parts together to generate the final result.
     rewriter.replaceOpWithNewOp<TF::ConcatOp>(
-        op, input->getType(), scalar_zero,
-        ArrayRef<Value *>({slice1, expanded_item, slice2}));
+        op, input.getType(), scalar_zero,
+        ArrayRef<Value>({slice1, expanded_item, slice2}));
     return matchSuccess();
   }
 };
@@ -241,14 +241,14 @@ struct ConvertTensorListInitOp : public ConversionPattern {
 
   // Create and return a 1-d tensor with exactly one element equal to the number
   // of list elements to initialize the output tensor list with.
-  virtual Value *GetNumElements(OpT op, ArrayRef<Value *> operands,
-                                PatternRewriter *rewriter) const = 0;
+  virtual Value GetNumElements(OpT op, ArrayRef<Value> operands,
+                               PatternRewriter *rewriter) const = 0;
 
   // Rewrites the original op into `tf.fill`. The result tensor shape is
   // [num_element, element_shape]. All the values in the result tensor will be
   // initialized to 0.
   PatternMatchResult matchAndRewrite(
-      Operation *operation, ArrayRef<Value *> operands,
+      Operation *operation, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     OpT op = llvm::cast<OpT>(operation);
 
@@ -263,8 +263,8 @@ struct ConvertTensorListInitOp : public ConversionPattern {
       return matchFailure();
     }
 
-    Value *element_shape = operands[0];
-    Type shape_dtype = getElementTypeOrSelf(element_shape->getType());
+    Value element_shape = operands[0];
+    Type shape_dtype = getElementTypeOrSelf(element_shape.getType());
 
     DenseIntElementsAttr dense_elem_attr;
     if (matchPattern(element_shape, m_Constant(&dense_elem_attr))) {
@@ -297,11 +297,10 @@ struct ConvertTensorListInitOp : public ConversionPattern {
         new_element_shape_values.push_back(dim_value);
       }
 
-      auto attr =
-          DenseIntElementsAttr::get(element_shape->getType().cast<ShapedType>(),
-                                    new_element_shape_values);
+      auto attr = DenseIntElementsAttr::get(
+          element_shape.getType().cast<ShapedType>(), new_element_shape_values);
       auto new_element_shape = rewriter.create<ConstantOp>(
-          op.getLoc(), element_shape->getType(), attr);
+          op.getLoc(), element_shape.getType(), attr);
       element_shape = new_element_shape;
     }
 
@@ -330,11 +329,11 @@ struct ConvertTensorListInitOp : public ConversionPattern {
     Location loc = op.getLoc();
     // Add number of elements as the prefix to the element shape to get shape of
     // the output tensor.
-    Value *leading_dim = GetNumElements(op, operands, &rewriter);
-    Value *scalar_zero = CreateI32SplatConst(loc, &rewriter, {}, 0);
+    Value leading_dim = GetNumElements(op, operands, &rewriter);
+    Value scalar_zero = CreateI32SplatConst(loc, &rewriter, {}, 0);
     auto list_shape = rewriter.create<TF::ConcatOp>(
         loc, shape_type, scalar_zero,
-        ArrayRef<Value *>({leading_dim, element_shape}));
+        ArrayRef<Value>({leading_dim, element_shape}));
 
     // Create a zero-initialized constant tensor that has the same type
     // as specified by element_dtype.
@@ -352,11 +351,11 @@ struct ConvertTensorListReserve
   explicit ConvertTensorListReserve(MLIRContext *context)
       : ConvertTensorListInitOp(context) {}
 
-  Value *GetNumElements(TF::TensorListReserveOp op, ArrayRef<Value *> operands,
-                        PatternRewriter *rewriter) const override {
-    Value *scalar_zero = CreateI32SplatConst(op.getLoc(), rewriter, {}, 0);
-    Type shape_dtype = getElementTypeOrSelf(op.element_shape()->getType());
-    Value *num_elements = operands[1];
+  Value GetNumElements(TF::TensorListReserveOp op, ArrayRef<Value> operands,
+                       PatternRewriter *rewriter) const override {
+    Value scalar_zero = CreateI32SplatConst(op.getLoc(), rewriter, {}, 0);
+    Type shape_dtype = getElementTypeOrSelf(op.element_shape().getType());
+    Value num_elements = operands[1];
     return rewriter->create<TF::ExpandDimsOp>(
         op.getLoc(), RankedTensorType::get({1}, shape_dtype), num_elements,
         scalar_zero);
@@ -371,8 +370,8 @@ struct ConvertEmptyTensorList
   explicit ConvertEmptyTensorList(MLIRContext *context)
       : ConvertTensorListInitOp(context) {}
 
-  Value *GetNumElements(TF::EmptyTensorListOp op, ArrayRef<Value *> operands,
-                        PatternRewriter *rewriter) const override {
+  Value GetNumElements(TF::EmptyTensorListOp op, ArrayRef<Value> operands,
+                       PatternRewriter *rewriter) const override {
     return CreateI32SplatConst(op.getLoc(), rewriter, {1}, 0);
   }
 };
@@ -383,23 +382,23 @@ struct ConvertTensorListPushBack : public ConversionPattern {
                           context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation *op, ArrayRef<Value *> operands,
+      Operation *op, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     TF::TensorListPushBackOp push_back_op = cast<TF::TensorListPushBackOp>(op);
-    Value *input_handle = operands[0];
-    Value *item = operands[1];
+    Value input_handle = operands[0];
+    Value item = operands[1];
 
     // Expand the shape of the item so that it will have rank same as the input
     // tensor and it is compatible for the Concat Op.
     Type expanded_item_type =
-        PrependLeadingDimIfRanked(1, item->getType(), &rewriter);
-    Value *scalar_zero = CreateI32SplatConst(op->getLoc(), &rewriter, {}, 0);
+        PrependLeadingDimIfRanked(1, item.getType(), &rewriter);
+    Value scalar_zero = CreateI32SplatConst(op->getLoc(), &rewriter, {}, 0);
     auto expanded_item = rewriter.create<TF::ExpandDimsOp>(
         op->getLoc(), expanded_item_type, item, scalar_zero);
 
     Type elem_type = getElementTypeOrSelf(item);
     auto handle_dtype =
-        getElementTypeOrSelf(push_back_op.output_handle()->getType())
+        getElementTypeOrSelf(push_back_op.output_handle().getType())
             .cast<TF::VariantType>();
     Type result_type =
         GetTensorTypeForTensorList(elem_type, handle_dtype, &rewriter);
@@ -408,7 +407,7 @@ struct ConvertTensorListPushBack : public ConversionPattern {
     // get a tensor equivalent to the TensorList generated by this op.
     rewriter.replaceOpWithNewOp<TF::ConcatOp>(
         push_back_op, result_type, scalar_zero,
-        ArrayRef<Value *>({input_handle, expanded_item}));
+        ArrayRef<Value>({input_handle, expanded_item}));
     return matchSuccess();
   }
 };
@@ -429,14 +428,14 @@ struct ConvertTensorListResize : public ConversionPattern {
                           context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation *op, ArrayRef<Value *> operands,
+      Operation *op, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     TF::TensorListResizeOp resize_op = cast<TF::TensorListResizeOp>(op);
-    Value *input_handle = operands[0];
-    Value *size = operands[1];
+    Value input_handle = operands[0];
+    Value size = operands[1];
 
     Location loc = resize_op.getLoc();
-    Value *scalar_zero = CreateI32SplatConst(loc, &rewriter, {}, 0);
+    Value scalar_zero = CreateI32SplatConst(loc, &rewriter, {}, 0);
 
     // Compute the input tensorlist's length and store it in `input_size`.
     IntegerType shape_dtype = rewriter.getIntegerType(32);
@@ -446,7 +445,7 @@ struct ConvertTensorListResize : public ConversionPattern {
     // Infer result type of this op based on TF's shape inference result.
     Type elem_type = getElementTypeOrSelf(input_handle);
     auto handle_dtype =
-        getElementTypeOrSelf(resize_op.output_handle()->getType())
+        getElementTypeOrSelf(resize_op.output_handle().getType())
             .cast<TF::VariantType>();
     Type result_type =
         GetTensorTypeForTensorList(elem_type, handle_dtype, &rewriter);
@@ -463,8 +462,8 @@ struct ConvertTensorListResize : public ConversionPattern {
     auto input_shape = rewriter.create<TF::ShapeOp>(
         loc, RankedTensorType::get({-1}, shape_dtype), input_handle);
 
-    Type branch_args_type[] = {input_handle->getType(), input_shape.getType(),
-                               size_diff.getType(), size->getType()};
+    Type branch_args_type[] = {input_handle.getType(), input_shape.getType(),
+                               size_diff.getType(), size.getType()};
     Type branch_result_type[] = {result_type};
     auto func_type = FunctionType::get(branch_args_type, branch_result_type,
                                        rewriter.getContext());
@@ -491,7 +490,7 @@ struct ConvertTensorListResize : public ConversionPattern {
     rewriter.replaceOpWithNewOp<TF::IfOp>(
         op, result_type, if_cond,
         /*input=*/
-        ArrayRef<Value *>({input_handle, input_shape, size_diff, size}),
+        ArrayRef<Value>({input_handle, input_shape, size_diff, size}),
         /*then_branch=*/rewriter.getSymbolRefAttr(then_branch_op),
         /*else_branch=*/rewriter.getSymbolRefAttr(else_branch_op),
         /*output_shapes=*/rewriter.getStrArrayAttr({"{}"}),
@@ -517,14 +516,14 @@ struct ConvertTensorListResize : public ConversionPattern {
 
     Location loc = resize_op.getLoc();
     // Get the element shape by slicing from index 1 in the input shape.
-    Value *slice_size = CreateI32SplatConst(loc, rewriter, {1}, -1);
-    Value *scalar_zero = CreateI32SplatConst(loc, rewriter, {}, 0);
-    Value *slice_start = CreateI32SplatConst(loc, rewriter, {1}, 1);
+    Value slice_size = CreateI32SplatConst(loc, rewriter, {1}, -1);
+    Value scalar_zero = CreateI32SplatConst(loc, rewriter, {}, 0);
+    Value slice_start = CreateI32SplatConst(loc, rewriter, {1}, 1);
     auto elem_shape = rewriter->create<TF::SliceOp>(
         loc, RankedTensorType::get({-1}, shape_dtype), input_shape, slice_start,
         slice_size);
     auto extended_part = rewriter->create<TF::TensorListReserveOp>(
-        loc, resize_op.output_handle()->getType(), elem_shape, size_diff);
+        loc, resize_op.output_handle().getType(), elem_shape, size_diff);
     // `ConcatOp` expects non-variant-typed input. Insert a
     // `TensorListStackOp` here to convert type from variant to non-variant.
     // Note that we are using the same `result_type` for both the
@@ -536,8 +535,8 @@ struct ConvertTensorListResize : public ConversionPattern {
         /*num_elements=*/rewriter->getI32IntegerAttr(-1));
     auto concat_op = rewriter->create<TF::ConcatOp>(
         loc, result_type, scalar_zero,
-        ArrayRef<Value *>({input, stacked_extended_part}));
-    rewriter->create<ReturnOp>(loc, ArrayRef<Value *>({concat_op}));
+        ArrayRef<Value>({input, stacked_extended_part}));
+    rewriter->create<ReturnOp>(loc, ArrayRef<Value>({concat_op}));
   }
 
   void CreateCondFalseBranch(Location loc, Type shape_dtype, Type result_type,
@@ -550,8 +549,8 @@ struct ConvertTensorListResize : public ConversionPattern {
     Block *block = branch_func.addEntryBlock();
     rewriter->setInsertionPointToStart(block);
 
-    Value *scalar_zero = CreateI32SplatConst(loc, rewriter, {}, 0);
-    Value *vector_one = CreateI32SplatConst(loc, rewriter, {1}, 1);
+    Value scalar_zero = CreateI32SplatConst(loc, rewriter, {}, 0);
+    Value vector_one = CreateI32SplatConst(loc, rewriter, {1}, 1);
     auto input = block->getArgument(0);
     auto size = block->getArgument(3);
 
@@ -566,7 +565,7 @@ struct ConvertTensorListResize : public ConversionPattern {
                                    /*start_index=*/scalar_zero, /*size=*/size,
                                    /*item_rank=*/partial_position_shape,
                                    /*result_type=*/result_type, rewriter);
-    rewriter->create<ReturnOp>(loc, ArrayRef<Value *>({slice_op}));
+    rewriter->create<ReturnOp>(loc, ArrayRef<Value>({slice_op}));
   }
 };
 
@@ -576,11 +575,11 @@ struct ConvertTensorListGetItem : public ConversionPattern {
                           context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation *operation, ArrayRef<Value *> operands,
+      Operation *operation, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto op = llvm::cast<TF::TensorListGetItemOp>(operation);
-    Value *input = operands[0];
-    Value *index = operands[1];
+    Value input = operands[0];
+    Value index = operands[1];
     rewriter.replaceOpWithNewOp<TF::GatherOp>(
         operation, op.getType(), input, index, rewriter.getBoolAttr(true));
     return matchSuccess();
@@ -593,11 +592,11 @@ struct ConvertTensorListLength : public ConversionPattern {
                           context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation *operation, ArrayRef<Value *> operands,
+      Operation *operation, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto op = llvm::cast<TF::TensorListLengthOp>(operation);
     Location loc = op.getLoc();
-    Value *input_handle = operands[0];
+    Value input_handle = operands[0];
 
     BoolAttr true_attr = rewriter.getBoolAttr(true);
     auto shape = rewriter.create<TF::ShapeOp>(loc, input_handle,
@@ -615,19 +614,19 @@ struct ConvertTensorListStack : public ConversionPattern {
                           context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation *operation, ArrayRef<Value *> operands,
+      Operation *operation, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto op = llvm::cast<TF::TensorListStackOp>(operation);
     Location loc = op.getLoc();
-    Value *input = operands[0];
-    Value *element_shape = operands[1];
+    Value input = operands[0];
+    Value element_shape = operands[1];
 
     // If the `element_shape` is a known constant (which is defined when calling
     // `tensor_list_stack`) and also valid (not scalar), we rewrite this op to a
     // trivial Reshape op (that doesn't actually change the input's shape) and
     // also populate the shape info to the op result. The shape of the
     // tensorlist is inferred from `num_elements` and `element_shape`.
-    auto ranked_type = element_shape->getType().dyn_cast<RankedTensorType>();
+    auto ranked_type = element_shape.getType().dyn_cast<RankedTensorType>();
     DenseIntElementsAttr dense_elem_attr;
     if ((ranked_type && ranked_type.getRank() == 0) ||
         !matchPattern(element_shape, m_Constant(&dense_elem_attr))) {
@@ -655,11 +654,11 @@ struct ConvertIdentity : public ConversionPattern {
       : ConversionPattern(TF::IdentityOp::getOperationName(), 1, context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation *operation, ArrayRef<Value *> operands,
+      Operation *operation, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto op = llvm::cast<TF::IdentityOp>(operation);
-    Value *input = operands[0];
-    rewriter.replaceOpWithNewOp<TF::IdentityOp>(op, input->getType(), operands,
+    Value input = operands[0];
+    rewriter.replaceOpWithNewOp<TF::IdentityOp>(op, input.getType(), operands,
                                                 op.getAttrs());
     return matchSuccess();
   }
@@ -687,7 +686,7 @@ static LogicalResult UpdateFunctionTypes(TF::WhileOp op) {
       Type arg_type = func_type.getInput(i);
       if (getElementTypeOrSelf(arg_type).isa<TF::VariantType>()) {
         arg_type = UnrankedTensorType::get(
-            getElementTypeOrSelf(op.getOperand(i)->getType()));
+            getElementTypeOrSelf(op.getOperand(i).getType()));
       }
       updated_argument_types.push_back(arg_type);
     }
@@ -703,7 +702,7 @@ static LogicalResult UpdateFunctionTypes(TF::WhileOp op) {
         // from the corresponding input operand. This is correct because while
         // body's inputs and results have the same type.
         result_type = UnrankedTensorType::get(
-            getElementTypeOrSelf(op.getOperand(i)->getType()));
+            getElementTypeOrSelf(op.getOperand(i).getType()));
       }
       updated_result_types.push_back(result_type);
     }
@@ -717,7 +716,7 @@ static LogicalResult UpdateFunctionTypes(TF::WhileOp op) {
     // Change the argument type for the first block.
     Block &body_first_bb = func.front();
     for (int i = 0; i < body_first_bb.getNumArguments(); ++i) {
-      body_first_bb.getArgument(i)->setType(updated_argument_types[i]);
+      body_first_bb.getArgument(i).setType(updated_argument_types[i]);
     }
   }
   return success();
@@ -728,19 +727,19 @@ struct ConvertWhile : public ConversionPattern {
       : ConversionPattern(TF::WhileOp::getOperationName(), 1, context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation *operation, ArrayRef<Value *> operands,
+      Operation *operation, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto op = llvm::cast<TF::WhileOp>(operation);
 
     llvm::SmallVector<Type, 8> result_types;
     result_types.reserve(op.getNumOperands());
     for (int i = 0, e = operands.size(); i != e; ++i) {
-      Type result_ty = op.getResult(i)->getType();
+      Type result_ty = op.getResult(i).getType();
 
       // If we notice the result type is a DT_VARIANT, we change the
       // corresponding result type to unranked tensor type.
       if (getElementTypeOrSelf(result_ty).isa<TF::VariantType>()) {
-        Type element_ty = getElementTypeOrSelf(operands[i]->getType());
+        Type element_ty = getElementTypeOrSelf(operands[i].getType());
         result_ty = UnrankedTensorType::get(element_ty);
       }
       result_types.push_back(result_ty);
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
index 1313bae97a1..69b767068ff 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
@@ -30,14 +30,14 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
 #include "tensorflow/compiler/mlir/lite/utils/validators.h"
@@ -50,16 +50,16 @@ namespace TFL {
 // The actual Optimize Pass.
 namespace {
 
-bool L2NormalizeReduceAxis(Value *sq_op, DenseElementsAttr axis) {
-  if (sq_op->getType().cast<ShapedType>().getRank() - 1 ==
+bool L2NormalizeReduceAxis(Value sq_op, DenseElementsAttr axis) {
+  if (sq_op.getType().cast<ShapedType>().getRank() - 1 ==
           *axis.getValues<int>().begin() ||
       *axis.getValues<int>().begin() == -1) {
     return true;
   }
-  if (sq_op->getType().cast<ShapedType>().getRank() != axis.getNumElements()) {
+  if (sq_op.getType().cast<ShapedType>().getRank() != axis.getNumElements()) {
     return false;
   }
-  auto shape = sq_op->getType().cast<ShapedType>();
+  auto shape = sq_op.getType().cast<ShapedType>();
   SmallVector<int, 4> elems{axis.getValues<int>().begin(),
                             axis.getValues<int>().end()};
   for (int i = 0; i < shape.getRank(); ++i) {
@@ -142,8 +142,8 @@ ElementsAttr ExpandTo4DForDepthwiseConv(Attribute a) {
 
 // Returns shape of a ranked tensor.
 // Precondition: output_val's is ranked tensor.
-DenseElementsAttr GetShape(Value *output_val) {
-  auto output_type = output_val->getType().cast<RankedTensorType>();
+DenseElementsAttr GetShape(Value output_val) {
+  auto output_type = output_val.getType().cast<RankedTensorType>();
   auto shape_vector = output_type.getShape();
   std::vector<int32_t> shape(shape_vector.size());
   for (int i = 0; i < shape_vector.size(); ++i) {
@@ -152,7 +152,7 @@ DenseElementsAttr GetShape(Value *output_val) {
   return mlir::DenseElementsAttr::get(
       RankedTensorType::get(
           {static_cast<int>(shape.size())},
-          mlir::IntegerType::get(32, output_val->getContext())),
+          mlir::IntegerType::get(32, output_val.getContext())),
       llvm::makeArrayRef(shape));
 }
 
@@ -167,19 +167,19 @@ struct FuseFullyConnectedAndAdd : public OpRewritePattern<TFL::AddOp> {
                                      PatternRewriter &rewriter) const override {
     // Add.
     DenseElementsAttr added_value;
-    Value *constant_val = add_op.rhs();
+    Value constant_val = add_op.rhs();
     if (!matchPattern(constant_val, m_Constant(&added_value)))
       return matchFailure();
 
     // Fully Connected.
     auto fc_op =
-        dyn_cast_or_null<TFL::FullyConnectedOp>(add_op.lhs()->getDefiningOp());
+        dyn_cast_or_null<TFL::FullyConnectedOp>(add_op.lhs().getDefiningOp());
     if (!fc_op) return matchFailure();
 
-    Value *filter = fc_op.filter();
-    Value *bias = fc_op.bias();
+    Value filter = fc_op.filter();
+    Value bias = fc_op.bias();
     ElementsAttr bias_value;
-    const bool is_none_bias = bias->getType().isa<NoneType>();
+    const bool is_none_bias = bias.getType().isa<NoneType>();
     if (!is_none_bias && !matchPattern(bias, m_Constant(&bias_value)))
       return matchFailure();
     if (fc_op.fused_activation_function() != "NONE") return matchFailure();
@@ -213,7 +213,7 @@ struct FuseFullyConnectedAndRelu : public OpRewritePattern<TFL::ReluOp> {
 
   PatternMatchResult matchAndRewrite(TFL::ReluOp relu_op,
                                      PatternRewriter &rewriter) const override {
-    Operation *input = relu_op.getOperand()->getDefiningOp();
+    Operation *input = relu_op.getOperand().getDefiningOp();
     if (!isa_and_nonnull<FullyConnectedOp>(input)) return matchFailure();
     auto fully_connected_op = cast<FullyConnectedOp>(input);
     if (fully_connected_op.fused_activation_function() != "NONE")
@@ -242,18 +242,18 @@ struct FuseFullyConnectedAndMul : public OpRewritePattern<TFL::MulOp> {
                                      PatternRewriter &rewriter) const override {
     // Mul.
     DenseElementsAttr cst;
-    Value *constant_val = mul_op.rhs();
+    Value constant_val = mul_op.rhs();
     if (!matchPattern(constant_val, m_Constant(&cst))) return matchFailure();
 
     // Fully Connected.
     auto fc_op =
-        dyn_cast_or_null<TFL::FullyConnectedOp>(mul_op.lhs()->getDefiningOp());
+        dyn_cast_or_null<TFL::FullyConnectedOp>(mul_op.lhs().getDefiningOp());
     if (!fc_op) return matchFailure();
-    Value *filter = fc_op.filter();
-    Value *bias = fc_op.bias();
+    Value filter = fc_op.filter();
+    Value bias = fc_op.bias();
     ElementsAttr cst_tmp;
     if (!matchPattern(filter, m_Constant(&cst_tmp))) return matchFailure();
-    if (!bias->getType().isa<NoneType>() &&
+    if (!bias.getType().isa<NoneType>() &&
         !matchPattern(bias, m_Constant(&cst_tmp)))
       return matchFailure();
     if (fc_op.fused_activation_function().equals("None")) return matchFailure();
@@ -261,8 +261,8 @@ struct FuseFullyConnectedAndMul : public OpRewritePattern<TFL::MulOp> {
     // Broadcast the constant operand of Mul if it isn't compatible to the
     // filter input. We only support broadcasting the operand along the depth
     // dimension, when the operand's depth is 1.
-    Value *new_const_val = constant_val;
-    if (!IsBroadcastableElementsAttrAndType(cst.getType(), filter->getType())) {
+    Value new_const_val = constant_val;
+    if (!IsBroadcastableElementsAttrAndType(cst.getType(), filter.getType())) {
       auto original_shape = cst.getType().getShape();
       llvm::SmallVector<int64_t, 4> normalized_shape(original_shape.begin(),
                                                      original_shape.end());
@@ -270,7 +270,7 @@ struct FuseFullyConnectedAndMul : public OpRewritePattern<TFL::MulOp> {
       auto new_cst = cst.reshape(RankedTensorType::get(
           normalized_shape, cst.getType().getElementType()));
       Type new_type = new_cst.getType();
-      if (!IsBroadcastableElementsAttrAndType(new_type, filter->getType())) {
+      if (!IsBroadcastableElementsAttrAndType(new_type, filter.getType())) {
         return matchFailure();
       }
       auto new_op =
@@ -285,7 +285,7 @@ struct FuseFullyConnectedAndMul : public OpRewritePattern<TFL::MulOp> {
     auto new_filter =
         rewriter.create<TF::MulOp>(loc, filter, new_const_val).z();
     // If bias isn't None, it needs to be multiplied as well.
-    if (!bias->getType().isa<NoneType>()) {
+    if (!bias.getType().isa<NoneType>()) {
       bias = rewriter.create<TF::MulOp>(loc, bias, constant_val).z();
     }
 
@@ -311,7 +311,7 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern<AffineOpType> {
   PatternMatchResult matchAndRewrite(AffineOpType fc_op,
                                      PatternRewriter &rewriter) const override {
     // Binary op.
-    Operation *binary_op = fc_op.input()->getDefiningOp();
+    Operation *binary_op = fc_op.input().getDefiningOp();
     if (!binary_op || binary_op->getNumOperands() != 2)
       return this->matchFailure();
     // We only handle the cases the RHS is a scalar.
@@ -325,20 +325,20 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern<AffineOpType> {
     APFloat cst_value = *cst.float_value_begin();
 
     // Affine op.
-    Value *filter = fc_op.filter();
-    Value *bias = fc_op.bias();
+    Value filter = fc_op.filter();
+    Value bias = fc_op.bias();
     DenseFPElementsAttr filter_cst, bias_cst;
     if (!matchPattern(filter, m_Constant(&filter_cst))) {
       // The filter maybe quantized, then we should set it to the real constant.
-      auto dq = llvm::dyn_cast_or_null<DequantizeOp>(filter->getDefiningOp());
+      auto dq = llvm::dyn_cast_or_null<DequantizeOp>(filter.getDefiningOp());
       if (!dq) return this->matchFailure();
-      auto q = llvm::dyn_cast_or_null<QuantizeOp>(dq.input()->getDefiningOp());
+      auto q = llvm::dyn_cast_or_null<QuantizeOp>(dq.input().getDefiningOp());
       if (!q || !matchPattern(q.input(), m_Constant(&filter_cst))) {
         return this->matchFailure();
       }
       filter = q.input();
     }
-    if (!bias->getType().isa<NoneType>() &&
+    if (!bias.getType().isa<NoneType>() &&
         !matchPattern(bias, m_Constant(&bias_cst)))
       return this->matchFailure();
     ShapedType filter_type = filter_cst.getType();
@@ -362,7 +362,7 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern<AffineOpType> {
       // The new bias should be a 1-D tensor with length equals to the bias
       // dimension of the weight.
       SmallVector<APFloat, 4> new_bias_values;
-      if (bias->getType().isa<NoneType>()) {  // none bias, a list of zeros
+      if (bias.getType().isa<NoneType>()) {  // none bias, a list of zeros
         new_bias_values.resize(bias_size, APFloat(0.0));
       } else if (bias_cst.getNumElements() == 1) {  // scalar bias, broadcast it
         new_bias_values.resize(bias_size, *bias_cst.float_value_begin());
@@ -401,12 +401,12 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern<AffineOpType> {
       // We recreate the constant op in case it is shared by the other ops. This
       // might increase the model size.
       auto new_filter_op = rewriter.create<ConstOp>(
-          fc_op.getLoc(), filter->getType(), new_filter);
+          fc_op.getLoc(), filter.getType(), new_filter);
       fc_op.setOperand(0, binary_op->getOperand(0));
       if (fc_op.filter() != filter) {
         // This filter goes through quantize and dequantize ops. Then we just
         // need to update the weight to the quantize op.
-        filter->replaceAllUsesWith(new_filter_op);
+        filter.replaceAllUsesWith(new_filter_op);
       } else {
         // This filter doesn't go through quantize and dequantize ops, Then
         // we update the weight of the affine op directly.
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
index 59dc271400e..6761abf36ec 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
@@ -17,15 +17,15 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 
 namespace mlir {
@@ -98,13 +98,13 @@ class FoldIfOp : public OpRewritePattern<TF::IfOp> {
     for (int i = 0, e = func.getNumArguments(); i != e; ++i)
       mapper.map(func.getArgument(i), op.getOperand(i + 1));
 
-    llvm::SmallVector<Value*, 4> updated_results;
+    llvm::SmallVector<Value, 4> updated_results;
     for (auto& op_to_inline : func.getBody().front()) {
       // If this is a terminator, identify the values to use to replace the
       // original If op.
       if (op_to_inline.isKnownTerminator()) {
         updated_results.reserve(op_to_inline.getNumOperands());
-        for (Value* operand : op_to_inline.getOperands())
+        for (Value operand : op_to_inline.getOperands())
           updated_results.push_back(mapper.lookup(operand));
         break;
       }
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
index a91f6de1971..c0e49bfb49a 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
@@ -18,6 +18,7 @@ limitations under the License.
 include "mlir/IR/OpBase.td"
 include "mlir/Dialect/StandardOps/Ops.td"
 include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td"
+include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td"
 
 def F32ElementsAttr : ElementsAttrBase<
   CPred<"$_self.cast<ElementsAttr>().getType().getElementType().isF32()">, "float constant tensor">;
@@ -53,13 +54,15 @@ foreach actFnPair = [[TFL_ReluOp, TFL_AF_Relu],
                      [TFL_Relu1Op, TFL_AF_Relu1]] in
   defm : FuseActFnIntoConvOpPat<actFnPair[0], actFnPair[1]>;
 
+// Checks if the value has only one user.
+def HasOneUse : Constraint<CPred<"$0.hasOneUse()">>;
 
 // If we see a binary op (add, sub) op adding a constant value to a convolution
 // op with constant bias, we can fuse the binary op into the convolution op by
 // constant folding the bias and the binary op's constant operand. The following
 // pattern restricts to float constant values for now.
 multiclass FuseBinaryOpToPrecedingAffine<dag binaryOp> {
-  def : Pat<(binaryOp (TFL_Conv2DOp $input, $filter,
+  def : Pat<(binaryOp (TFL_Conv2DOp:$output $input, $filter,
                           (ConstantOp F32ElementsAttr:$bias),
                           $h_factor, $w_factor, TFL_AF_None,
                           $padding, $stride_h, $stride_w),
@@ -68,8 +71,9 @@ multiclass FuseBinaryOpToPrecedingAffine<dag binaryOp> {
                         (binaryOp (ConstantOp $bias),
                                    (ConstantOp $value), TFL_AF_None),
                         $h_factor, $w_factor, $act_fn,
-                        $padding, $stride_h, $stride_w)>;
-  def : Pat<(binaryOp (TFL_DepthwiseConv2DOp $input, $filter,
+                        $padding, $stride_h, $stride_w),
+          [(HasOneUse $output)]>;
+  def : Pat<(binaryOp (TFL_DepthwiseConv2DOp:$output $input, $filter,
                           (ConstantOp F32ElementsAttr:$bias),
                           $h_factor, $w_factor, TFL_AF_None,
                           $padding, $stride_h, $stride_w,
@@ -81,7 +85,8 @@ multiclass FuseBinaryOpToPrecedingAffine<dag binaryOp> {
                                      TFL_AF_None),
                           $h_factor, $w_factor, $act_fn,
                           $padding, $stride_h, $stride_w,
-                          $multiplier)>;
+                          $multiplier),
+          [(HasOneUse $output)]>;
 }
 foreach binaryOp = [TFL_AddOp, TFL_SubOp] in
   defm : FuseBinaryOpToPrecedingAffine<binaryOp>;
@@ -101,7 +106,7 @@ def ExpandTo4DForDepthwiseConv: NativeCodeCall<
 // The following pattern restricts to float constant values for now.
 
 multiclass FuseMulOrDivWithConv2dOrDepthwiseConv2d<dag BinaryOp> {
-  def : Pat<(BinaryOp (TFL_DepthwiseConv2DOp $input,
+  def : Pat<(BinaryOp (TFL_DepthwiseConv2DOp:$output $input,
                           (ConstantOp F32ElementsAttr:$filter),
                           (ConstantOp F32ElementsAttr:$bias),
                           $h_factor, $w_factor, TFL_AF_None,
@@ -119,8 +124,9 @@ multiclass FuseMulOrDivWithConv2dOrDepthwiseConv2d<dag BinaryOp> {
                           $h_factor, $w_factor, $act_fn,
                           $padding, $stride_h, $stride_w,
                           $multiplier),
-          [(CanFuseConvOrDepthwiseConv<"true"> $filter, $value)]>;
-  def : Pat<(BinaryOp (TFL_Conv2DOp $input,
+          [(CanFuseConvOrDepthwiseConv<"true"> $filter, $value),
+           (HasOneUse $output)]>;
+  def : Pat<(BinaryOp (TFL_Conv2DOp:$conv_output $input,
                           (ConstantOp F32ElementsAttr:$filter),
                           (ConstantOp F32ElementsAttr:$bias),
                           $h_factor, $w_factor, TFL_AF_None,
@@ -135,7 +141,8 @@ multiclass FuseMulOrDivWithConv2dOrDepthwiseConv2d<dag BinaryOp> {
                                      TFL_AF_None),
                           $h_factor, $w_factor, $act_fn,
                           $padding, $stride_h, $stride_w),
-          [(CanFuseConvOrDepthwiseConv<"false"> $filter, $value)]>;
+          [(CanFuseConvOrDepthwiseConv<"false"> $filter, $value),
+           (HasOneUse $conv_output)]>;
 }
 
 foreach BinaryOp = [TFL_DivOp, TFL_MulOp] in
@@ -154,7 +161,7 @@ def EqualOperands : Constraint<CPred<"$0 == $1">>;
 
 // Checks if the operand has rank == n
 class OperandHasRank<int n> : Constraint<
-  CPred<"$0->getType().cast<ShapedType>().getRank() == " # n>>;
+  CPred<"$0.getType().cast<ShapedType>().getRank() == " # n>>;
 
 // Matching HardSwish
 def : Pat<
@@ -249,7 +256,7 @@ foreach L2NormalizePairs = [[TFL_MulOp, TFL_RsqrtOp], [TFL_DivOp, TFL_SqrtOp]]
   in defm : L2NormalizePatterns<L2NormalizePairs[0], L2NormalizePairs[1]>;
 
 def AreBroadcastableTypes : Constraint<CPred<
-  "TFL::IsBroadcastableElementsAttrAndType($0->getType(), $1->getType())">>;
+  "TFL::IsBroadcastableElementsAttrAndType($0.getType(), $1.getType())">>;
 
 // Pattern for skipping Tile if it is mainly for broadcasting and the
 // Op is already supporting broadcasting.
@@ -307,3 +314,7 @@ multiclass FusedBinaryActivationFuncOpPat<dag BinaryOp> {
 foreach BinaryOps = [TFL_AddOp, TFL_DivOp,
                      TFL_MulOp, TFL_SubOp] in
   defm : FusedBinaryActivationFuncOpPat<BinaryOps>;
+
+// The constant folding in this pass might produce constant in the tf dialect.
+// This rule is to legalize these constant to the tfl dialect.
+def : Pat<(TF_ConstOp ElementsAttr:$value), (TFL_ConstOp $value)>;
diff --git a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc
index 4f56de26864..267901f69f3 100644
--- a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc
@@ -16,8 +16,8 @@ limitations under the License.
 // This transformation pass applies some clean up steps after quantization.
 
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
@@ -67,33 +67,33 @@ void RemoveQuantizationAdaptorOps(FuncOp func) {
     // In each iteration, a new argument is appended to the end of the list
     // and the current argument is erased, so here we always process the first
     // argument in the list.
-    auto* arg = bb.getArgument(0);
+    auto arg = bb.getArgument(0);
 
     auto remove_quantize_op = [&](QuantizeOp quantize_op) {
       auto quantize_output = quantize_op.output();
-      auto quantize_type = quantize_output->getType();
+      auto quantize_type = quantize_output.getType();
       input_types.push_back(quantize_type);
-      auto* new_arg = bb.addArgument(quantize_type);
-      quantize_output->replaceAllUsesWith(new_arg);
+      auto new_arg = bb.addArgument(quantize_type);
+      quantize_output.replaceAllUsesWith(new_arg);
       quantize_op.erase();
-      arg->dropAllUses();
+      arg.dropAllUses();
       bb.eraseArgument(0);
     };
 
     // This is looking for a pattern: arg -> tfl.quantize
-    if (arg->hasOneUse() && llvm::isa<QuantizeOp>(*arg->user_begin())) {
-      auto quantize_op = llvm::cast<QuantizeOp>(*arg->user_begin());
+    if (arg.hasOneUse() && llvm::isa<QuantizeOp>(*arg.user_begin())) {
+      auto quantize_op = llvm::cast<QuantizeOp>(*arg.user_begin());
       remove_quantize_op(quantize_op);
       continue;
     }
 
     // Make a copy of current argument and append it to the end of the list if
     // the pattern isn't found.
-    Type arg_type = arg->getType();
+    Type arg_type = arg.getType();
     input_types.push_back(arg_type);
-    auto* new_arg = bb.addArgument(arg_type);
-    arg->replaceAllUsesWith(new_arg);
-    arg->dropAllUses();
+    auto new_arg = bb.addArgument(arg_type);
+    arg.replaceAllUsesWith(new_arg);
+    arg.dropAllUses();
     bb.eraseArgument(0);
   }
 
@@ -102,16 +102,16 @@ void RemoveQuantizationAdaptorOps(FuncOp func) {
   llvm::SmallVector<Type, 4> output_types;
   output_types.reserve(num_return_operands);
   for (int i = 0; i != num_return_operands; ++i) {
-    auto* returned_value = terminator->getOperand(i);
-    Operation* returned_op = returned_value->getDefiningOp();
+    auto returned_value = terminator->getOperand(i);
+    Operation* returned_op = returned_value.getDefiningOp();
     if (returned_op && llvm::isa<DequantizeOp>(returned_op)) {
       auto dequantize_op = llvm::cast<DequantizeOp>(returned_op);
-      Value* dequantized_result = dequantize_op.input();
-      output_types.push_back(dequantized_result->getType());
+      Value dequantized_result = dequantize_op.input();
+      output_types.push_back(dequantized_result.getType());
       terminator->setOperand(i, dequantized_result);
       returned_op->erase();
     } else {
-      output_types.push_back(returned_value->getType());
+      output_types.push_back(returned_value.getType());
     }
   }
   auto new_func_type = builder.getFunctionType(input_types, output_types);
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
index c299064a136..a1fb78ac38b 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
@@ -22,19 +22,19 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
 #include "tensorflow/compiler/mlir/lite/utils/lstm_utils.h"
@@ -53,8 +53,8 @@ class ConvertEmbeddedLookupFunc {
   void RewriteFunc() {
     func_.setAttr(kTFImplements,
                   StringAttr::get("embedding_lookup", func_.getContext()));
-    Value* lookup = func_.getArgument(1);
-    Value* value = func_.getArgument(0);
+    Value lookup = func_.getArgument(1);
+    Value value = func_.getArgument(0);
     auto output_type = func_.getType().getResult(0);
 
     OpBuilder builder(func_.getBody());
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td b/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td
index a2dc2e93746..40bf54935c4 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td
@@ -135,10 +135,10 @@ def : Pat<(TF_ReshapeOp
 // Casts result type of $1 to a quantized type by using the quantization
 // parameters from the type in $0.
 class UpdateShapeWithAxis<int i> : NativeCodeCall<
-  "CastQuantizedTypeAttrFromExpressedType($_builder, $0, $1->getType(), " # i # ")">;
+  "CastQuantizedTypeAttrFromExpressedType($_builder, $0, $1.getType(), " # i # ")">;
 
 class UsedBy<string op> : Constraint<
-  CPred<"llvm::isa<mlir::TFL::" # op # "Op>(*$0->getUsers().begin())">>;
+  CPred<"llvm::isa<mlir::TFL::" # op # "Op>(*$0.getUsers().begin())">>;
 
 // When the op is passing-through, the output types of the quantized ops need
 // to be updated as well. Since the quantize op manages its own type by the
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
index 5d139f83933..0b6da59ca6e 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
@@ -21,10 +21,10 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CommandLine.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
@@ -139,7 +139,7 @@ bool PrepareQuantizePass::SetInputNodesQuantizationParams(FuncOp func) {
   BoolAttr narrow_range = builder.getBoolAttr(false);
 
   auto add_quantize_op = [&](Location loc, Type input_type, Block* block,
-                             Block::iterator insertion_point, Value* arg,
+                             Block::iterator insertion_point, Value arg,
                              int i) {
     if (auto shaped = input_type.dyn_cast<ShapedType>()) {
       if (shaped.getElementType().isa<FloatType>()) {
@@ -153,16 +153,16 @@ bool PrepareQuantizePass::SetInputNodesQuantizationParams(FuncOp func) {
                                                     params);
         auto dq_op =
             builder.create<TFL::DequantizeOp>(loc, input_type, q_op.output());
-        arg->replaceAllUsesWith(dq_op.output());
+        arg.replaceAllUsesWith(dq_op.output());
         q_op.setOperand(arg);
       }
     }
   };
 
   for (int i = 0, e = func.getNumArguments(); i != e; ++i) {
-    BlockArgument* arg = func.getArgument(i);
-    auto* arg_block = arg->getOwner();
-    add_quantize_op(arg->getLoc(), arg->getType(), arg_block,
+    BlockArgument arg = func.getArgument(i);
+    auto* arg_block = arg.getOwner();
+    add_quantize_op(arg.getLoc(), arg.getType(), arg_block,
                     std::next(arg_block->begin(), i), arg, i);
   }
 
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
index 45248ddc01c..ab4d30e1170 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
@@ -38,17 +38,17 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Analysis/LoopAnalysis.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Analysis/LoopAnalysis.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
@@ -115,17 +115,17 @@ struct InsertTFLQuantOpsAfterTFFakeQuantOp
                                      PatternRewriter &rewriter) const override {
     // We don't want to insert quantize/dequantize if the quantize op exists.
     auto res = tf_op.outputs();
-    if (!res->hasOneUse() || isa<QuantizeOp>(*res->user_begin()))
+    if (!res.hasOneUse() || isa<QuantizeOp>(*res.user_begin()))
       return this->matchFailure();
 
     // Extract the min/max constant values from the operands. We also consider
     // a special case that there are tf.Identity ops between the min/max
     // constants and the tf.FakeQuantWithMinMaxVarsOp.
-    Value *min = tf_op.min(), *max = tf_op.max();
+    Value min = tf_op.min(), max = tf_op.max();
     DenseFPElementsAttr min_value, max_value;
-    if (auto id1 = dyn_cast_or_null<TF::IdentityOp>(min->getDefiningOp()))
+    if (auto id1 = dyn_cast_or_null<TF::IdentityOp>(min.getDefiningOp()))
       min = id1.input();
-    if (auto id2 = dyn_cast_or_null<TF::IdentityOp>(max->getDefiningOp()))
+    if (auto id2 = dyn_cast_or_null<TF::IdentityOp>(max.getDefiningOp()))
       max = id2.input();
     if (!matchPattern(min, m_Constant(&min_value))) return this->matchFailure();
     if (!matchPattern(max, m_Constant(&max_value))) return this->matchFailure();
@@ -133,7 +133,7 @@ struct InsertTFLQuantOpsAfterTFFakeQuantOp
     int quant_dim = -1;
     if (PerAxis) {
       // This is a special case that the quant_dim is the last dimensions.
-      quant_dim = res->getType().template cast<ShapedType>().getRank() - 1;
+      quant_dim = res.getType().template cast<ShapedType>().getRank() - 1;
     }
     // Use the min/max from the operands and the num_bits and narrow_range
     // attribute to create the quantization parameter for the new quantize op.
@@ -150,12 +150,12 @@ struct InsertTFLQuantOpsAfterTFFakeQuantOp
     // Finally, use the quantization parameter to create the quantize and
     // dequantize ops, and insert them between the tf.FakeQuantWithMinMaxVarsOp
     // and its users.
-    Value *value = tf_op.outputs();
+    Value value = tf_op.outputs();
     auto quantize = rewriter.create<TFL::QuantizeOp>(
         tf_op.getLoc(), qtype.getValue(), value, qtype);
     auto dequantize = rewriter.create<TFL::DequantizeOp>(
         tf_op.getLoc(), res_type, quantize.output());
-    value->replaceAllUsesWith(dequantize);
+    value.replaceAllUsesWith(dequantize);
     quantize.getOperation()->replaceUsesOfWith(dequantize, value);
 
     return this->matchSuccess();
@@ -177,8 +177,8 @@ using PreparePerChannelFakeQuant =
 //
 //   TFL::[op] createTFLOp(ConvertTFConvOpMatchState *state,
 //                         PatternRewriter &rewriter, Location loc,
-//                         Type result_type, Value *input,
-//                         Value *filter, Value *bias) const;
+//                         Type result_type, Value input,
+//                         Value filter, Value bias) const;
 //
 // And also the following method for getting the dimension for bias tensor:
 //
@@ -240,7 +240,7 @@ struct ConvertTFConvOp : public RewritePattern {
     // that we can extract info from the shape (e.g., for constructing bias
     // tensor, for setting depth_multiplier attribute, etc.).
     auto filter_type =
-        tf_op.filter()->getType().template dyn_cast<RankedTensorType>();
+        tf_op.filter().getType().template dyn_cast<RankedTensorType>();
     if (filter_type && filter_type.getRank() == 4)
       return matchSuccess(std::move(state));
 
@@ -262,7 +262,7 @@ struct ConvertTFConvOp : public RewritePattern {
 
     // Get a splat zero tensor with the expected dimension for the bias tensor
     auto filter = tf_op.filter();
-    auto filter_type = filter->getType().template cast<RankedTensorType>();
+    auto filter_type = filter.getType().template cast<RankedTensorType>();
     auto elem_type = filter_type.getElementType();
     auto bias_dim = static_cast<const ConcreteType *>(this)->getBiasDim(
         filter_type.getShape());
@@ -294,8 +294,8 @@ class ConvertTFConv2D : public ConvertTFConvOp<ConvertTFConv2D, TF::Conv2DOp> {
 
   TFL::Conv2DOp createTFLOp(ConvertTFConvOpMatchState *state,
                             PatternRewriter &rewriter, Location loc,
-                            Type result_type, Value *input, Value *filter,
-                            Value *bias) const {
+                            Type result_type, Value input, Value filter,
+                            Value bias) const {
     filter = legalizeFilter(rewriter, loc, filter);
     return rewriter.create<TFL::Conv2DOp>(
         loc, result_type, input, filter, bias,
@@ -312,8 +312,8 @@ class ConvertTFConv2D : public ConvertTFConvOp<ConvertTFConv2D, TF::Conv2DOp> {
   // format HWIO to TFLite Conv2D op filter data format OHWI and return Value
   // for the converted filter.  Requires that filter is verified by the match
   // method that it is a 4-D RankedTensorType.
-  Value *legalizeFilter(PatternRewriter &rewriter, Location loc,
-                        Value *filter) const {
+  Value legalizeFilter(PatternRewriter &rewriter, Location loc,
+                       Value filter) const {
     // Create a constant op for HWIO to OHWI transpose permutation.
     SmallVector<int, 4> perm = {3, 0, 1, 2};
     auto perm_type = RankedTensorType::get({static_cast<int>(perm.size())},
@@ -323,7 +323,7 @@ class ConvertTFConv2D : public ConvertTFConvOp<ConvertTFConv2D, TF::Conv2DOp> {
     auto perm_op = rewriter.create<TF::ConstOp>(loc, perm_type, perm_attr);
 
     // Create tensor type for the transpose result.
-    auto filter_type = filter->getType().cast<RankedTensorType>();
+    auto filter_type = filter.getType().cast<RankedTensorType>();
     auto result_shape = functional::map(
         [filter_type](int64_t dim) { return filter_type.getDimSize(dim); },
         perm);
@@ -349,14 +349,14 @@ class ConvertTFDepthwiseConv2dNative
 
   TFL::DepthwiseConv2DOp createTFLOp(ConvertTFConvOpMatchState *state,
                                      PatternRewriter &rewriter, Location loc,
-                                     Type result_type, Value *input,
-                                     Value *filter, Value *bias) const {
+                                     Type result_type, Value input,
+                                     Value filter, Value bias) const {
     // Compared to tfl.conv_2d, tfl.depthwise_conv_2d has an additional
     // 'depth_multiplier' attribute. However, tf.DepthwiseConv2dNative does not
     // have a corresponding 'depth_multiplier' attribute; the multiplier is the
     // fourth dimension in the 4-D filter tensor. We query the multiplier from
     // tf.DepthwiseConv2dNative and set it as the attribute value accordingly.
-    auto multiplier = filter->getType().cast<RankedTensorType>().getDimSize(3);
+    auto multiplier = filter.getType().cast<RankedTensorType>().getDimSize(3);
 
     filter = legalizeFilter(rewriter, loc, filter);
     return rewriter.create<TFL::DepthwiseConv2DOp>(
@@ -378,9 +378,9 @@ class ConvertTFDepthwiseConv2dNative
   /// filter data format is [1, filter_height, filter_width, out_channels].
   /// Requires that filter is verified by the match method that it is a 4-D
   /// RankedTensorType.
-  Value *legalizeFilter(PatternRewriter &rewriter, Location loc,
-                        Value *filter) const {
-    auto filter_type = filter->getType().cast<RankedTensorType>();
+  Value legalizeFilter(PatternRewriter &rewriter, Location loc,
+                       Value filter) const {
+    auto filter_type = filter.getType().cast<RankedTensorType>();
     auto filterShape = filter_type.getShape();
     SmallVector<int64_t, 4> result_shape = {1, filterShape[0], filterShape[1],
                                             filterShape[2] * filterShape[3]};
@@ -430,13 +430,13 @@ struct ConvertTFStridedSlice : public RewritePattern {
     if (new_axis_mask == 0) return matchFailure();
 
     // Insert a new reshape op.
-    Value *original_input = strided_slice_op.input();
+    Value original_input = strided_slice_op.input();
     RankedTensorType original_input_type =
-        original_input->getType().cast<RankedTensorType>();
+        original_input.getType().cast<RankedTensorType>();
     const ArrayRef<int64_t> &original_input_shape =
         original_input_type.getShape();
     RankedTensorType begin_type =
-        strided_slice_op.begin()->getType().cast<RankedTensorType>();
+        strided_slice_op.begin().getType().cast<RankedTensorType>();
     const int dim_size = begin_type.getShape()[0];
     SmallVector<int64_t, 4> new_shape;
     int mask = 1;
diff --git a/tensorflow/compiler/mlir/lite/transforms/quantize.cc b/tensorflow/compiler/mlir/lite/transforms/quantize.cc
index e47e97a60e8..6842621db70 100644
--- a/tensorflow/compiler/mlir/lite/transforms/quantize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/quantize.cc
@@ -19,17 +19,17 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
+#include "mlir/Dialect/QuantOps/QuantTypes.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc b/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc
index 123d1f86319..17125bffd85 100644
--- a/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc
@@ -18,24 +18,24 @@ limitations under the License.
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.h"
 
@@ -71,19 +71,19 @@ struct SplitMergedOperandsPass : public FunctionPass<SplitMergedOperandsPass> {
 };
 
 LogicalResult DuplicateValueIfNeeded(Operation* op,
-                                     llvm::DenseSet<Value*>* values,
+                                     llvm::DenseSet<Value>* values,
                                      OpBuilder* builder) {
   std::vector<int> stateful_operands_index;
   if (!IsStatefulOp(op, &stateful_operands_index)) return success();
 
   for (int index : stateful_operands_index) {
-    Value* operand = op->getOperand(index);
+    Value operand = op->getOperand(index);
     auto inserted_value = values->insert(operand).second;
     if (inserted_value) continue;
     // We can only clone the constant op at this point.
     // Since all ops have been legalized to tflite ops, so we only care about
     // ConstOp or QConstOp or mlir constant op/
-    Operation* input_op = operand->getDefiningOp();
+    Operation* input_op = operand.getDefiningOp();
     if (input_op == nullptr) return failure();
 
     Attribute attr;
@@ -102,7 +102,7 @@ LogicalResult DuplicateValueIfNeeded(Operation* op,
 }
 
 void SplitMergedOperandsPass::runOnFunction() {
-  llvm::DenseSet<Value*> stateful_values;
+  llvm::DenseSet<Value> stateful_values;
   auto func = getFunction();
   OpBuilder builder(func);
   for (auto& bb : func.getBody()) {
diff --git a/tensorflow/compiler/mlir/lite/transforms/trim_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/trim_functions_tf.cc
index 87b96de762a..5a7397ed9c9 100644
--- a/tensorflow/compiler/mlir/lite/transforms/trim_functions_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/trim_functions_tf.cc
@@ -20,13 +20,13 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Support/CommandLine.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
 
 // The cmd line flag to specify the whitelist of functions. Rest are trimmed
diff --git a/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.cc b/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.cc
index 61d33a5233e..e245bb801b7 100644
--- a/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.cc
@@ -24,17 +24,17 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Analysis/LoopAnalysis.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:local_config_mlir
-#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Analysis/LoopAnalysis.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"  // TF:llvm-project
+#include "mlir/Dialect/QuantOps/UniformSupport.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
@@ -67,7 +67,7 @@ void UnrollBatchMatMulPass::runOnFunction() {
 
 template <typename BatchMatMulOpType>
 TF::ReshapeOp ConvertTFBatchMatMulOp<BatchMatMulOpType>::createReshapeOp(
-    Value* value, ArrayRef<int64_t> shape, Type element_type, Location loc,
+    Value value, ArrayRef<int64_t> shape, Type element_type, Location loc,
     PatternRewriter& rewriter) {
   int64_t shape_rank = shape.size();
   auto shape_spec_type =
@@ -81,9 +81,9 @@ TF::ReshapeOp ConvertTFBatchMatMulOp<BatchMatMulOpType>::createReshapeOp(
 }
 
 template <typename BatchMatMulOpType>
-std::vector<Value*> ConvertTFBatchMatMulOp<BatchMatMulOpType>::sliceInput(
-    Value* value, int batch_size, Location loc, PatternRewriter& rewriter) {
-  RankedTensorType tensorType = value->getType().cast<RankedTensorType>();
+std::vector<Value> ConvertTFBatchMatMulOp<BatchMatMulOpType>::sliceInput(
+    Value value, int batch_size, Location loc, PatternRewriter& rewriter) {
+  RankedTensorType tensorType = value.getType().cast<RankedTensorType>();
   Type element_type = tensorType.getElementType();
 
   int rank = tensorType.getShape().size();
@@ -96,7 +96,7 @@ std::vector<Value*> ConvertTFBatchMatMulOp<BatchMatMulOpType>::sliceInput(
 
   SmallVector<int64_t, 3> slice_size = {1, num_rows, num_cols};
 
-  std::vector<Value*> sliced;
+  std::vector<Value> sliced;
   Type int64_type = rewriter.getIntegerType(64);
   Type slice_result_type = RankedTensorType::get(slice_size, element_type);
 
@@ -126,8 +126,8 @@ std::vector<Value*> ConvertTFBatchMatMulOp<BatchMatMulOpType>::sliceInput(
 
 template <typename BatchMatMulOpType>
 TF::TransposeOp ConvertTFBatchMatMulOp<BatchMatMulOpType>::createTransposeOp(
-    Value* value, Location loc, PatternRewriter& rewriter) {
-  auto value_type = value->getType().cast<RankedTensorType>();
+    Value value, Location loc, PatternRewriter& rewriter) {
+  auto value_type = value.getType().cast<RankedTensorType>();
   auto shape = value_type.getShape();
   int dims = shape.size();
 
@@ -158,13 +158,12 @@ TF::TransposeOp ConvertTFBatchMatMulOp<BatchMatMulOpType>::createTransposeOp(
 
 template <typename BatchMatMulOpType>
 TF::PackOp ConvertTFBatchMatMulOp<BatchMatMulOpType>::createMatMulOps(
-    const std::vector<Value*>& sliced_lhs,
-    const std::vector<Value*>& sliced_rhs, const tensorflow::MatMulBCast& bcast,
-    int rows, int cols, Type element_type, Location loc,
-    PatternRewriter& rewriter) {
+    const std::vector<Value>& sliced_lhs, const std::vector<Value>& sliced_rhs,
+    const tensorflow::MatMulBCast& bcast, int rows, int cols, Type element_type,
+    Location loc, PatternRewriter& rewriter) {
   auto matmul_type = RankedTensorType::get({rows, cols}, element_type);
 
-  std::vector<Value*> matmuls;
+  std::vector<Value> matmuls;
   for (int batch_idx = 0; batch_idx < bcast.output_batch_size(); ++batch_idx) {
     int lhs_batch_idx, rhs_batch_idx;
     if (bcast.IsBroadcastingRequired()) {
@@ -195,20 +194,20 @@ TF::PackOp ConvertTFBatchMatMulOp<BatchMatMulOpType>::createMatMulOps(
 template <typename BatchMatMulOpType>
 PatternMatchResult ConvertTFBatchMatMulOp<BatchMatMulOpType>::matchAndRewrite(
     BatchMatMulOpType op, PatternRewriter& rewriter) const {
-  Value* input_lhs = op.x();
-  Value* input_rhs = op.y();
+  Value input_lhs = op.x();
+  Value input_rhs = op.y();
 
-  if (!input_lhs->getType().isa<RankedTensorType>()) {
+  if (!input_lhs.getType().isa<RankedTensorType>()) {
     // LHS must be a ranked tensor type
     return this->matchFailure();
   }
-  if (!input_rhs->getType().isa<RankedTensorType>()) {
+  if (!input_rhs.getType().isa<RankedTensorType>()) {
     // RHS must be a ranked tensor type
     return this->matchFailure();
   }
 
-  auto lhs_type = input_lhs->getType().cast<RankedTensorType>();
-  auto rhs_type = input_rhs->getType().cast<RankedTensorType>();
+  auto lhs_type = input_lhs.getType().cast<RankedTensorType>();
+  auto rhs_type = input_rhs.getType().cast<RankedTensorType>();
 
   auto element_type = lhs_type.getElementType();
 
@@ -234,7 +233,7 @@ PatternMatchResult ConvertTFBatchMatMulOp<BatchMatMulOpType>::matchAndRewrite(
   if (op.adj_x()) {
     input_lhs = createTransposeOp(input_lhs, loc, rewriter);
 
-    lhs_type = input_lhs->getType().cast<RankedTensorType>();
+    lhs_type = input_lhs.getType().cast<RankedTensorType>();
     lhs_shape = lhs_type.getShape();
   }
 
@@ -242,7 +241,7 @@ PatternMatchResult ConvertTFBatchMatMulOp<BatchMatMulOpType>::matchAndRewrite(
   if (op.adj_y()) {
     input_rhs = createTransposeOp(input_rhs, loc, rewriter);
 
-    rhs_type = input_rhs->getType().cast<RankedTensorType>();
+    rhs_type = input_rhs.getType().cast<RankedTensorType>();
     rhs_shape = rhs_type.getShape();
   }
 
@@ -276,9 +275,9 @@ PatternMatchResult ConvertTFBatchMatMulOp<BatchMatMulOpType>::matchAndRewrite(
   }
 
   // Compute slices for each batch in the LHS and RHS.
-  std::vector<Value*> sliced_lhs =
+  std::vector<Value> sliced_lhs =
       sliceInput(input_lhs, bcast.x_batch_size(), loc, rewriter);
-  std::vector<Value*> sliced_rhs =
+  std::vector<Value> sliced_rhs =
       sliceInput(input_rhs, bcast.y_batch_size(), loc, rewriter);
 
   // Compute (single batch) MatMul for each output batch. The MatMul outputs
diff --git a/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.h b/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.h
index 19b75963ebf..4aae05bde60 100644
--- a/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.h
+++ b/tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.h
@@ -17,9 +17,9 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_LITE_TRANSFORMS_UNROLL_BATCH_MATMUL_H_
 
 #include "llvm/ADT/ArrayRef.h"
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/core/util/matmul_bcast.h"
 
@@ -33,19 +33,18 @@ template <typename BatchMatMulOpType>
 class ConvertTFBatchMatMulOp : public OpRewritePattern<BatchMatMulOpType> {
   using OpRewritePattern<BatchMatMulOpType>::OpRewritePattern;
 
-  static TF::ReshapeOp createReshapeOp(Value* value, ArrayRef<int64_t> shape,
+  static TF::ReshapeOp createReshapeOp(Value value, ArrayRef<int64_t> shape,
                                        Type element_type, Location loc,
                                        PatternRewriter& rewriter);
 
-  static std::vector<Value*> sliceInput(Value* value, int batch_size,
-                                        Location loc,
-                                        PatternRewriter& rewriter);
+  static std::vector<Value> sliceInput(Value value, int batch_size,
+                                       Location loc, PatternRewriter& rewriter);
 
-  static TF::TransposeOp createTransposeOp(Value* value, Location loc,
+  static TF::TransposeOp createTransposeOp(Value value, Location loc,
                                            PatternRewriter& rewriter);
 
-  static TF::PackOp createMatMulOps(const std::vector<Value*>& sliced_lhs,
-                                    const std::vector<Value*>& sliced_rhs,
+  static TF::PackOp createMatMulOps(const std::vector<Value>& sliced_lhs,
+                                    const std::vector<Value>& sliced_rhs,
                                     const tensorflow::MatMulBCast& bcast,
                                     int rows, int cols, Type element_type,
                                     Location loc, PatternRewriter& rewriter);
diff --git a/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc b/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc
index 33da9929711..a9cc483df76 100644
--- a/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TFL {
diff --git a/tensorflow/compiler/mlir/lite/utils/attribute_utils.h b/tensorflow/compiler/mlir/lite/utils/attribute_utils.h
index 263a0a8dc93..5a11690d15f 100644
--- a/tensorflow/compiler/mlir/lite/utils/attribute_utils.h
+++ b/tensorflow/compiler/mlir/lite/utils/attribute_utils.h
@@ -19,7 +19,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_UTILS_ATTRIBUTE_UTILS_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_ATTRIBUTE_UTILS_H_
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TFL {
diff --git a/tensorflow/compiler/mlir/lite/utils/convert_type.cc b/tensorflow/compiler/mlir/lite/utils/convert_type.cc
index 167749d5f2e..85bd6a18764 100644
--- a/tensorflow/compiler/mlir/lite/utils/convert_type.cc
+++ b/tensorflow/compiler/mlir/lite/utils/convert_type.cc
@@ -15,15 +15,20 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
 
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
+#include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
 
+using xla::StatusOr;
+
+namespace errors = tensorflow::errors;
+
 mlir::Type ConvertElementType(tflite::TensorType type, mlir::Builder builder) {
   switch (type) {
     case tflite::TensorType_FLOAT32:
@@ -74,4 +79,31 @@ tensorflow::DataType TflTypeToTfType(tflite::TensorType type) {
   }
 }
 
+StatusOr<tflite::TensorType> TfTypeToTflType(tensorflow::DataType type) {
+  switch (type) {
+    case tensorflow::DT_BOOL:
+      return tflite::TensorType_BOOL;
+    case tensorflow::DT_COMPLEX64:
+      return tflite::TensorType_COMPLEX64;
+    case tensorflow::DT_HALF:
+      return tflite::TensorType_FLOAT16;
+    case tensorflow::DT_FLOAT:
+      return tflite::TensorType_FLOAT32;
+    case tensorflow::DT_INT8:
+      return tflite::TensorType_INT8;
+    case tensorflow::DT_INT16:
+      return tflite::TensorType_INT16;
+    case tensorflow::DT_INT32:
+      return tflite::TensorType_INT32;
+    case tensorflow::DT_INT64:
+      return tflite::TensorType_INT64;
+    case tensorflow::DT_STRING:
+      return tflite::TensorType_STRING;
+    case tensorflow::DT_UINT8:
+      return tflite::TensorType_UINT8;
+    default:
+      return errors::InvalidArgument("unsupported tensor data type", type);
+  }
+}
+
 }  // namespace tflite
diff --git a/tensorflow/compiler/mlir/lite/utils/convert_type.h b/tensorflow/compiler/mlir/lite/utils/convert_type.h
index ff4ccb325a8..90600c423bd 100644
--- a/tensorflow/compiler/mlir/lite/utils/convert_type.h
+++ b/tensorflow/compiler/mlir/lite/utils/convert_type.h
@@ -16,11 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONVERT_TYPE_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONVERT_TYPE_H_
 
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
 namespace mlir {
+
 class Builder;
 }
 
@@ -32,5 +34,8 @@ mlir::Type ConvertElementType(tflite::TensorType type, mlir::Builder builder);
 // Tensorflow type
 tensorflow::DataType TflTypeToTfType(tflite::TensorType type);
 
+// Convert the Tensorflow scalar type to the corresponding TFLite type
+xla::StatusOr<tflite::TensorType> TfTypeToTflType(tensorflow::DataType type);
+
 }  // namespace tflite
 #endif  // TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONVERT_TYPE_H_
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
index 92a8ad49bf4..132448c58bd 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
@@ -20,20 +20,20 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 
@@ -42,35 +42,35 @@ namespace TFL {
 
 namespace {
 
-Value* CreateI32SplatConst(OpBuilder* builder, ArrayRef<int64_t> shape,
-                           int32_t val, mlir::Location location) {
+Value CreateI32SplatConst(OpBuilder* builder, ArrayRef<int64_t> shape,
+                          int32_t val, mlir::Location location) {
   auto type = RankedTensorType::get(shape, builder->getIntegerType(32));
   auto attr = DenseElementsAttr::get(type, val);
   return builder->create<ConstantOp>(location, type, attr);
 }
 
-Value* CreateF32SplatConst(OpBuilder* builder, ArrayRef<int64_t> shape,
-                           float val, mlir::Location location) {
+Value CreateF32SplatConst(OpBuilder* builder, ArrayRef<int64_t> shape,
+                          float val, mlir::Location location) {
   auto type = RankedTensorType::get(shape, builder->getF32Type());
   auto attr = DenseElementsAttr::get(type, val);
   return builder->create<ConstantOp>(location, type, attr);
 }
 
-Value* CreateI64DenseConst(OpBuilder* builder, ArrayRef<int64_t> shape,
-                           ArrayRef<int64_t> values, mlir::Location location) {
+Value CreateI64DenseConst(OpBuilder* builder, ArrayRef<int64_t> shape,
+                          ArrayRef<int64_t> values, mlir::Location location) {
   auto type = RankedTensorType::get(static_cast<int>(shape.size()),
                                     builder->getIntegerType(64));
   auto attr = DenseElementsAttr::get(type, values);
   return builder->create<ConstantOp>(location, type, attr);
 }
 
-Value* CreateNoneValue(OpBuilder* builder, mlir::Location location) {
+Value CreateNoneValue(OpBuilder* builder, mlir::Location location) {
   return builder->create<mlir::ConstantOp>(location, builder->getNoneType(),
                                            builder->getUnitAttr());
 }
 
-Value* Transpose2D(OpBuilder* builder, Value* value_to_transpose,
-                   RankedTensorType type, mlir::Location location) {
+Value Transpose2D(OpBuilder* builder, Value value_to_transpose,
+                  RankedTensorType type, mlir::Location location) {
   // Create a constant op for transpose permutation.
   SmallVector<int64_t, 2> perm = {1, 0};
   auto perm_op = CreateI64DenseConst(builder, perm, perm, location);
@@ -87,16 +87,16 @@ Value* Transpose2D(OpBuilder* builder, Value* value_to_transpose,
                                           value_to_transpose, perm_op);
 }
 
-ArrayRef<int64_t> GetRankedTensorShape(Value* value) {
-  return value->getType().cast<RankedTensorType>().getShape();
+ArrayRef<int64_t> GetRankedTensorShape(Value value) {
+  return value.getType().cast<RankedTensorType>().getShape();
 }
 
-Value* SliceRankedTensor(OpBuilder* builder, Value* input,
-                         ArrayRef<int64_t> begin_shape,
-                         ArrayRef<int64_t> begin_values,
-                         ArrayRef<int64_t> size_shape,
-                         ArrayRef<int64_t> size_values,
-                         mlir::Location location) {
+Value SliceRankedTensor(OpBuilder* builder, Value input,
+                        ArrayRef<int64_t> begin_shape,
+                        ArrayRef<int64_t> begin_values,
+                        ArrayRef<int64_t> size_shape,
+                        ArrayRef<int64_t> size_values,
+                        mlir::Location location) {
   // If the size of the tensor to be sliced from the input overflows
   // the input tensor's dimensions, return 0-valued tensor of the requested
   // shape.
@@ -120,7 +120,7 @@ Value* SliceRankedTensor(OpBuilder* builder, Value* input,
       location,
       RankedTensorType::get(
           size_values,
-          input->getType().cast<RankedTensorType>().getElementType()),
+          input.getType().cast<RankedTensorType>().getElementType()),
       input, slice_i2c_begin, slice_i2c_size);
 }
 
@@ -327,8 +327,7 @@ void ConvertLSTMCellSimpleToFusedLSTM::UpdateFuncSignature() {
   SmallVector<int64_t, 2> output_shape{1, -1};
   auto input_types = fused_func_op_.getType().getInputs();
   auto output_type = mlir::RankedTensorType::get(
-      output_shape,
-      input_->getType().cast<RankedTensorType>().getElementType());
+      output_shape, input_.getType().cast<RankedTensorType>().getElementType());
   fused_func_op_.setType(mlir::FunctionType::get(input_types, output_type,
                                                  fused_func_op_.getContext()));
 }
@@ -351,8 +350,7 @@ LogicalResult ConvertLSTMCellSimpleToFusedLSTM::RewriteFunc() {
   // Create the fused LSTM op.
   SmallVector<int64_t, 2> output_shape = {1, n_output_};
   auto result_type = mlir::RankedTensorType::get(
-      output_shape,
-      input_->getType().cast<RankedTensorType>().getElementType());
+      output_shape, input_.getType().cast<RankedTensorType>().getElementType());
   lstm_ = builder_.create<mlir::TFL::LSTMOp>(
       fused_func_op_.getLoc(), result_type, input_, input2input_, input2forget_,
       input2cell_, input2output_, rec2input_, rec2forget_, rec2cell_,
@@ -371,7 +369,7 @@ LogicalResult ConvertLSTMCellSimpleToFusedLSTM::RewriteFunc() {
   SmallVector<int64_t, 2> func_output_shape = {1, -1};
   auto func_result_type = mlir::RankedTensorType::get(
       func_output_shape,
-      input_->getType().cast<RankedTensorType>().getElementType());
+      input_.getType().cast<RankedTensorType>().getElementType());
 
   auto tensor_cast = builder_.create<mlir::TensorCastOp>(
       fused_func_op_.getLoc(), lstm_.getResult(), func_result_type);
@@ -426,7 +424,7 @@ LogicalResult ConvertLSTMCellSimpleToFusedLSTM::Initialize() {
   bias_ = fused_func_op_.getArgument(2);
 
   weight_ = fused_func_op_.getArgument(1);
-  weight_type_ = weight_->getType().cast<RankedTensorType>();
+  weight_type_ = weight_.getType().cast<RankedTensorType>();
 
   if (weight_type_.getRank() != 2) {
     return fused_func_op_.emitError() << "The weight tensor was not of rank 2";
@@ -440,7 +438,7 @@ LogicalResult ConvertLSTMCellSimpleToFusedLSTM::Initialize() {
   n_cell_ = weight_type_.getDimSize(1) / num_gates_;
 
   projection_ = fused_func_op_.getArgument(3);
-  projection_type_ = projection_->getType().cast<RankedTensorType>();
+  projection_type_ = projection_.getType().cast<RankedTensorType>();
   if (projection_type_.getRank() != 2) {
     n_output_ = n_cell_;
   } else {
@@ -467,8 +465,7 @@ LogicalResult ConvertLayerNormalizedLSTMCellSimpleToFusedLSTM::Initialize() {
   }
 
   layer_norm_scale_ = fused_func_op_.getArgument(4);
-  layer_norm_scale_type_ =
-      layer_norm_scale_->getType().cast<RankedTensorType>();
+  layer_norm_scale_type_ = layer_norm_scale_.getType().cast<RankedTensorType>();
   if (layer_norm_scale_type_.getRank() != 1) {
     return fused_func_op_.emitError()
            << "The layer_norm_scale tensor was not of rank 1";
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.h b/tensorflow/compiler/mlir/lite/utils/lstm_utils.h
index 235d4387faf..f6a2991ca4c 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.h
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.h
@@ -20,12 +20,12 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_LSTM_UTILS_H_
 
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 
 namespace mlir {
@@ -102,15 +102,15 @@ class ConvertLSTMCellSimpleToFusedLSTM {
 
   // specified state
   FuncOp fused_func_op_;
-  Value* input_;
-  Value* weight_;
-  Value* bias_;
-  Value* projection_;
+  Value input_;
+  Value weight_;
+  Value bias_;
+  Value projection_;
   bool couple_input_forget_gates_;
 
   // internal state
-  Value* weight_transposed_;
-  Value* projection_transposed_;
+  Value weight_transposed_;
+  Value projection_transposed_;
   RankedTensorType weight_type_;
   RankedTensorType projection_type_;
   int num_gates_;
@@ -121,40 +121,40 @@ class ConvertLSTMCellSimpleToFusedLSTM {
   int num_cols_projection_transposed_;
 
   // input -> cifg
-  Value* input2input_;
-  Value* input2forget_;
-  Value* input2cell_;
-  Value* input2output_;
+  Value input2input_;
+  Value input2forget_;
+  Value input2cell_;
+  Value input2output_;
 
   // recurrent -> cifg
-  Value* rec2input_;
-  Value* rec2forget_;
-  Value* rec2cell_;
-  Value* rec2output_;
+  Value rec2input_;
+  Value rec2forget_;
+  Value rec2cell_;
+  Value rec2output_;
 
   // bias -> cifg
-  Value* bias2input_;
-  Value* bias2forget_;
-  Value* bias2cell_;
-  Value* bias2output_;
+  Value bias2input_;
+  Value bias2forget_;
+  Value bias2cell_;
+  Value bias2output_;
 
   // projection
-  Value* proj_weight_;
-  Value* proj_bias_;
+  Value proj_weight_;
+  Value proj_bias_;
 
   // state
-  Value* input_activation_state_;
-  Value* input_cell_state_;
+  Value input_activation_state_;
+  Value input_cell_state_;
 
   // layer norm coefficients
-  Value* input_layer_norm_coefficients_;
-  Value* forget_layer_norm_coefficients_;
-  Value* cell_layer_norm_coefficients_;
-  Value* output_layer_norm_coefficients_;
+  Value input_layer_norm_coefficients_;
+  Value forget_layer_norm_coefficients_;
+  Value cell_layer_norm_coefficients_;
+  Value output_layer_norm_coefficients_;
 
   mlir::TFL::LSTMOp lstm_;
 
-  Value* none_;
+  Value none_;
   SmallVector<int64_t, 1> bias_slice_shape_;
   SmallVector<int64_t, 1> bias_size_values_;
   SmallVector<int64_t, 2> weight_slice_shape_;
@@ -199,7 +199,7 @@ class ConvertLayerNormalizedLSTMCellSimpleToFusedLSTM
 
  private:
   // specified state
-  Value* layer_norm_scale_;
+  Value layer_norm_scale_;
 
   // internal state
   RankedTensorType layer_norm_scale_type_;
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc
index 798c6db5355..b229206a4e4 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc
@@ -24,17 +24,17 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/core/platform/test.h"
 
 namespace mlir {
@@ -128,22 +128,20 @@ TEST_F(LstmUtilsTest, ConvertLSTMCellSimple) {
 
   auto transpose_op = fused_lstm_func_.getBody().front().begin();
   transpose_op++;
-  EXPECT_EQ(transpose_op->getOperand(0)
-                ->getType()
-                .cast<RankedTensorType>()
-                .getDimSize(0),
-            3);
-  EXPECT_EQ(transpose_op->getOperand(0)
-                ->getType()
-                .cast<RankedTensorType>()
-                .getDimSize(1),
-            12);
   EXPECT_EQ(
-      transpose_op->getResult(0)->getType().cast<RankedTensorType>().getDimSize(
+      transpose_op->getOperand(0).getType().cast<RankedTensorType>().getDimSize(
+          0),
+      3);
+  EXPECT_EQ(
+      transpose_op->getOperand(0).getType().cast<RankedTensorType>().getDimSize(
+          1),
+      12);
+  EXPECT_EQ(
+      transpose_op->getResult(0).getType().cast<RankedTensorType>().getDimSize(
           0),
       12);
   EXPECT_EQ(
-      transpose_op->getResult(0)->getType().cast<RankedTensorType>().getDimSize(
+      transpose_op->getResult(0).getType().cast<RankedTensorType>().getDimSize(
           1),
       3);
 
@@ -156,12 +154,12 @@ TEST_F(LstmUtilsTest, ConvertLSTMCellSimple) {
   EXPECT_EQ(it->getNumOperands(), 24);
   EXPECT_EQ(it->getNumResults(), 1);
   // cifg = false, so input2input is not None.
-  EXPECT_FALSE(it->getOperand(1)->getType().isa<NoneType>());
+  EXPECT_FALSE(it->getOperand(1).getType().isa<NoneType>());
   // input layer norm is None
-  EXPECT_TRUE(it->getOperand(20)->getType().isa<NoneType>());
+  EXPECT_TRUE(it->getOperand(20).getType().isa<NoneType>());
   // proj_bias is F32
   EXPECT_TRUE(it->getOperand(17)
-                  ->getType()
+                  .getType()
                   .cast<RankedTensorType>()
                   .getElementType()
                   .isF32());
@@ -169,7 +167,7 @@ TEST_F(LstmUtilsTest, ConvertLSTMCellSimple) {
   // output gate bias is 0 since it is out of bounds of the bias tensor, so
   // we set its value as a const tensor of specified size and value 0.
   EXPECT_TRUE(
-      mlir::cast<mlir::ConstantOp>(it->getOpOperand(15).get()->getDefiningOp())
+      mlir::cast<mlir::ConstantOp>(it->getOpOperand(15).get().getDefiningOp())
           .getValue()
           .cast<ElementsAttr>()
           .getValue<FloatAttr>(0)
@@ -209,7 +207,7 @@ TEST_F(LstmUtilsTest, ConvertLSTMCellSimpleToFusedLSTMCoupleInputForget) {
   EXPECT_EQ(it->getNumOperands(), 24);
   EXPECT_EQ(it->getNumResults(), 1);
   // cifg = true, so input2input is None.
-  EXPECT_TRUE(it->getOperand(1)->getType().isa<NoneType>());
+  EXPECT_TRUE(it->getOperand(1).getType().isa<NoneType>());
 }
 
 TEST_F(LstmUtilsTest, ConvertLayerNormLSTMCellSimpleToFusedLSTM) {
@@ -235,15 +233,15 @@ TEST_F(LstmUtilsTest, ConvertLayerNormLSTMCellSimpleToFusedLSTM) {
   EXPECT_EQ(it->getNumOperands(), 24);
   EXPECT_EQ(it->getNumResults(), 1);
   // cifg = false, so input2input is not None.
-  EXPECT_FALSE(it->getOperand(1)->getType().isa<NoneType>());
+  EXPECT_FALSE(it->getOperand(1).getType().isa<NoneType>());
 
   // input layer norm
-  EXPECT_FALSE(it->getOperand(20)->getType().isa<NoneType>());
+  EXPECT_FALSE(it->getOperand(20).getType().isa<NoneType>());
   EXPECT_EQ(
-      it->getOperand(20)->getType().cast<RankedTensorType>().getShape().size(),
+      it->getOperand(20).getType().cast<RankedTensorType>().getShape().size(),
       1);
-  EXPECT_EQ(
-      it->getOperand(20)->getType().cast<RankedTensorType>().getDimSize(0), 3);
+  EXPECT_EQ(it->getOperand(20).getType().cast<RankedTensorType>().getDimSize(0),
+            3);
 
   EXPECT_EQ(fused_ln_lstm_func_.getType().getNumResults(), 1);
   auto output_types = fused_ln_lstm_func_.getType().getResults();
diff --git a/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.cc b/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.cc
index 45b8fc96361..f830f67bc10 100644
--- a/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <vector>
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.h b/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.h
index b1d24284acc..917ae93f6a8 100644
--- a/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.h
+++ b/tensorflow/compiler/mlir/lite/utils/stateful_ops_utils.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_UTILS_STATEFUL_OPS_UTILS_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_STATEFUL_OPS_UTILS_H_
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TFL {
diff --git a/tensorflow/compiler/mlir/lite/utils/validators.cc b/tensorflow/compiler/mlir/lite/utils/validators.cc
index f00f8b489d0..f8e3dd12c8b 100644
--- a/tensorflow/compiler/mlir/lite/utils/validators.cc
+++ b/tensorflow/compiler/mlir/lite/utils/validators.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/lite/utils/validators.h"
 
-#include "mlir/Dialect/Traits.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
+#include "mlir/Dialect/Traits.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TFL {
diff --git a/tensorflow/compiler/mlir/lite/utils/validators.h b/tensorflow/compiler/mlir/lite/utils/validators.h
index 0a5d790a6eb..e1ae4392881 100644
--- a/tensorflow/compiler/mlir/lite/utils/validators.h
+++ b/tensorflow/compiler/mlir/lite/utils/validators.h
@@ -19,8 +19,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_UTILS_VALIDATORS_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_VALIDATORS_H_
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TFL {
@@ -51,8 +51,8 @@ bool TFIntListIsAllOnes(const ArrayAttr &attr);
 
 // Returns true iff the given value is a float tensor.
 // is "DT_FLOAT".
-inline bool TFTypeIsFloatTensor(Value *value) {
-  auto tensorType = value->getType().dyn_cast<TensorType>();
+inline bool TFTypeIsFloatTensor(Value value) {
+  auto tensorType = value.getType().dyn_cast<TensorType>();
   if (!tensorType) return false;
   return tensorType.getElementType().isa<FloatType>();
 }
diff --git a/tensorflow/compiler/mlir/op_or_arg_name_mapper.cc b/tensorflow/compiler/mlir/op_or_arg_name_mapper.cc
index 6b8dd7b0c14..fdaddcfb318 100644
--- a/tensorflow/compiler/mlir/op_or_arg_name_mapper.cc
+++ b/tensorflow/compiler/mlir/op_or_arg_name_mapper.cc
@@ -25,9 +25,9 @@ limitations under the License.
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
 
 static inline absl::string_view StringRefToView(llvm::StringRef ref) {
   return absl::string_view(ref.data(), ref.size());
@@ -148,18 +148,18 @@ std::string OpOrArgLocNameMapper::GetName(OpOrVal op_or_val) {
     // generated using the op type.
     return op->getName().getStringRef();
   }
-  auto* val = op_or_val.dyn_cast<mlir::Value*>();
-  auto name_from_loc = GetNameFromLoc(val->getLoc());
+  auto val = op_or_val.dyn_cast<mlir::Value>();
+  auto name_from_loc = GetNameFromLoc(val.getLoc());
   if (!name_from_loc.empty()) return name_from_loc;
   // If the location is none of the expected types, then simply use name
   // generated using the op type. Follow TF convention and append the result
   // index unless 0.
-  if (auto* result = llvm::dyn_cast<mlir::OpResult>(val)) {
-    if (result->getResultNumber() > 0)
+  if (auto result = val.dyn_cast<mlir::OpResult>()) {
+    if (result.getResultNumber() > 0)
       return llvm::formatv("{0}:{1}",
-                           result->getOwner()->getName().getStringRef(),
-                           result->getResultNumber());
-    return result->getOwner()->getName().getStringRef();
+                           result.getOwner()->getName().getStringRef(),
+                           result.getResultNumber());
+    return result.getOwner()->getName().getStringRef();
   }
   return "";
 }
diff --git a/tensorflow/compiler/mlir/op_or_arg_name_mapper.h b/tensorflow/compiler/mlir/op_or_arg_name_mapper.h
index 6517349146e..db83a8dfd7c 100644
--- a/tensorflow/compiler/mlir/op_or_arg_name_mapper.h
+++ b/tensorflow/compiler/mlir/op_or_arg_name_mapper.h
@@ -23,14 +23,14 @@ limitations under the License.
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
 
 namespace tensorflow {
 
 // PointerUnion for operation and value.
 // TODO(jpienaar): Rename the files.
-using OpOrVal = llvm::PointerUnion<mlir::Operation*, mlir::Value*>;
+using OpOrVal = llvm::PointerUnion<mlir::Operation*, mlir::Value>;
 
 // Mapper from operation or value to name.
 class OpOrArgNameMapper {
diff --git a/tensorflow/compiler/mlir/python/mlir.i b/tensorflow/compiler/mlir/python/mlir.i
index 2ecea47b3d3..b1d53288204 100644
--- a/tensorflow/compiler/mlir/python/mlir.i
+++ b/tensorflow/compiler/mlir/python/mlir.i
@@ -108,6 +108,45 @@ string ExperimentalConvertSavedModelToMlir(
   return MlirModuleToString(*module_or.ConsumeValueOrDie(), show_debug_info);
 }
 
+// Load a SavedModel V1 and return a textual MLIR string corresponding to it.
+//
+// Args:
+//   saved_model_path: File path from which to load the SavedModel.
+//   tags: Tags to identify MetaGraphDef that need to be loaded.
+//
+// Returns:
+//   A string of textual MLIR representing the raw imported SavedModel.
+string ExperimentalConvertSavedModelV1ToMlir(
+    const string &saved_model_path,
+    const string &tags,
+    bool show_debug_info,
+    TF_Status* status) {
+  // Load the saved model into a SavedModelBundle.
+
+  std::unordered_set<string> tag_set
+      = absl::StrSplit(tags, ',', absl::SkipEmpty());
+
+  tensorflow::SavedModelBundle bundle;
+  auto load_status = tensorflow::LoadSavedModel(
+      {}, {},
+      saved_model_path, tag_set, &bundle);
+  if (!load_status.ok()) {
+    Set_TF_Status_from_Status(status, load_status);
+    return "// error";
+  }
+
+  // Convert the SavedModelBundle to an MLIR module.
+
+  mlir::MLIRContext context;
+  auto module_or = ConvertSavedModelV1ToMlir(bundle, &context);
+  if (!module_or.status().ok()) {
+    Set_TF_Status_from_Status(status, module_or.status());
+    return "// error";
+  }
+
+  return MlirModuleToString(*module_or.ConsumeValueOrDie(), show_debug_info);
+}
+
 
 string ExperimentalRunPassPipeline(
     const string &mlir_txt,
@@ -154,6 +193,7 @@ string ExperimentalRunPassPipeline(
 %unignore tensorflow::swig;
 %unignore tensorflow::swig::ImportGraphDef;
 %unignore tensorflow::swig::ExperimentalConvertSavedModelToMlir;
+%unignore tensorflow::swig::ExperimentalConvertSavedModelV1ToMlir;
 %unignore tensorflow::swig::ExperimentalRunPassPipeline;
 
 // Wrap this function
@@ -167,6 +207,11 @@ static string ExperimentalConvertSavedModelToMlir(
     const string &exported_names,
     bool show_debug_info,
     TF_Status* status);
+static string ExperimentalConvertSavedModelV1ToMlir(
+    const string &saved_model_path,
+    const string &tags,
+    bool show_debug_info,
+    TF_Status* status);
 static string ExperimentalRunPassPipeline(
     const string &mlir_txt,
     const string &pass_pipeline,
@@ -188,6 +233,14 @@ def experimental_convert_saved_model_to_mlir(saved_model_path,
     show_debug_info
   ).decode('utf-8');
 
+def experimental_convert_saved_model_v1_to_mlir(saved_model_path,
+                                                tags, show_debug_info):
+  return ExperimentalConvertSavedModelV1ToMlir(
+    str(saved_model_path).encode('utf-8'),
+    str(tags).encode('utf-8'),
+    show_debug_info
+  ).decode('utf-8');
+
 def experimental_run_pass_pipeline(mlir_txt, pass_pipeline, show_debug_info):
   return ExperimentalRunPassPipeline(
     mlir_txt.encode('utf-8'),
diff --git a/tensorflow/compiler/mlir/runlit.site.cfg.py b/tensorflow/compiler/mlir/runlit.site.cfg.py
index e14199ed43b..8f36de71c5f 100644
--- a/tensorflow/compiler/mlir/runlit.site.cfg.py
+++ b/tensorflow/compiler/mlir/runlit.site.cfg.py
@@ -24,10 +24,11 @@ import lit.llvm
 # file, instead config is injected by lit.py. The structure is common for lit
 # tests and intended to only persist temporarily (b/136126535).
 # pylint: disable=undefined-variable
-config.llvm_tools_dir = os.path.join(os.environ['TEST_SRCDIR'], 'llvm')
+config.llvm_tools_dir = os.path.join(os.environ['TEST_SRCDIR'], 'llvm-project',
+                                     'llvm')
 config.mlir_obj_root = os.path.join(os.environ['TEST_SRCDIR'])
-config.mlir_tools_dir = os.path.join(os.environ['TEST_SRCDIR'],
-                                     'local_config_mlir')
+config.mlir_tools_dir = os.path.join(os.environ['TEST_SRCDIR'], 'llvm-project',
+                                     'mlir')
 # TODO(jpienaar): Replace with suffices in build rule.
 config.suffixes = ['.td', '.mlir', '.pbtxt']
 
diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD
index 93fad60614b..2888997c7b2 100644
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@@ -1,4 +1,4 @@
-load("@local_config_mlir//:tblgen.bzl", "gentbl")
+load("//third_party/mlir:tblgen.bzl", "gentbl")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_gen_op_wrapper_py", "tf_native_cc_binary")
 
 package(
@@ -8,10 +8,10 @@ package(
 
 package_group(
     name = "friends",
-    includes = ["@local_config_mlir//:subpackages"],
+    includes = ["//third_party/mlir:subpackages"],
     packages = [
         "//tensorflow/compiler/...",
-        "//tensorflow/core/tfrt_delegate/...",
+        "//tensorflow/lite/experimental/tf_runtime/...",
         "//tensorflow/python/...",
     ],
 )
@@ -22,8 +22,8 @@ filegroup(
         "ir/tf_generated_ops.td",
         "ir/tf_op_base.td",
         "ir/tf_ops.td",
-        "@local_config_mlir//:OpBaseTdFiles",
-        "@local_config_mlir//:include/mlir/Analysis/CallInterfaces.td",
+        "@llvm-project//mlir:OpBaseTdFiles",
+        "@llvm-project//mlir:include/mlir/Analysis/CallInterfaces.td",
     ],
 )
 
@@ -43,7 +43,7 @@ gentbl(
             "g3doc/tf_ops.md",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/tf_ops.td",
     td_srcs = [
         ":tensorflow_ops_td_files",
@@ -66,11 +66,11 @@ gentbl(
             "g3doc/tf_saved_model.md",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/tf_saved_model_ops.td",
     td_srcs = [
-        "@local_config_mlir//:include/mlir/IR/OpBase.td",
-        "@local_config_mlir//:include/mlir/Dialect/StandardOps/Ops.td",
+        "@llvm-project//mlir:include/mlir/IR/OpBase.td",
+        "@llvm-project//mlir:include/mlir/Dialect/StandardOps/Ops.td",
     ],
 )
 
@@ -90,11 +90,11 @@ gentbl(
             "g3doc/tf_executor.md",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/tf_executor_ops.td",
     td_srcs = [
-        "@local_config_mlir//:include/mlir/IR/OpBase.td",
-        "@local_config_mlir//:include/mlir/Dialect/StandardOps/Ops.td",
+        "@llvm-project//mlir:include/mlir/IR/OpBase.td",
+        "@llvm-project//mlir:include/mlir/Dialect/StandardOps/Ops.td",
     ],
 )
 
@@ -114,11 +114,11 @@ gentbl(
             "g3doc/tf_device.md",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/tf_device_ops.td",
     td_srcs = [
-        "@local_config_mlir//:include/mlir/IR/OpBase.td",
-        "@local_config_mlir//:include/mlir/Dialect/StandardOps/Ops.td",
+        "@llvm-project//mlir:include/mlir/IR/OpBase.td",
+        "@llvm-project//mlir:include/mlir/Dialect/StandardOps/Ops.td",
     ],
 )
 
@@ -130,7 +130,7 @@ gentbl(
             "transforms/generated_canonicalize.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/canonicalize.td",
     td_srcs = [
         ":tensorflow_ops_td_files",
@@ -162,8 +162,8 @@ cc_library(
         "ir/tf_types.h",
         "transforms/bridge.h",
         "transforms/passes.h",
-        "@local_config_mlir//:include/mlir/Analysis/CallInterfaces.h",
-        "@local_config_mlir//:include/mlir/Transforms/InliningUtils.h",
+        "@llvm-project//mlir:include/mlir/Analysis/CallInterfaces.h",
+        "@llvm-project//mlir:include/mlir/Transforms/InliningUtils.h",
     ],
     includes = ["include"],
     deps = [
@@ -177,17 +177,17 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/platform:logging",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:CallOpInterfacesIncGen",
-        "@local_config_mlir//:Dialect",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Parser",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TransformUtils",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:CallOpInterfacesIncGen",
+        "@llvm-project//mlir:Dialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TransformUtils",
+        "@llvm-project//mlir:Transforms",
     ],
     # TODO(jpienaar): Merge in the dialect registration.
     alwayslink = 1,
@@ -201,11 +201,11 @@ gentbl(
             "transforms/generated_decompose_resource_ops.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/decompose_resource_ops.td",
     td_srcs = [
         ":tensorflow_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
     ],
 )
 
@@ -220,7 +220,7 @@ cc_library(
     deps = [
         ":decompose_resource_ops_inc_gen",
         ":tensorflow",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -290,15 +290,15 @@ cc_library(
         "//tensorflow/core/platform:logging",
         "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc",
         "//tensorflow/core/protobuf/tpu:dynamic_padding_proto_cc",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Parser",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TransformUtils",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TransformUtils",
+        "@llvm-project//mlir:Transforms",
     ],
     # TODO(jpienaar): Merge in the dialect registration.
     alwayslink = 1,
@@ -311,8 +311,8 @@ cc_library(
     ],
     deps = [
         ":lower_tf_lib",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
     ],
     alwayslink = 1,
 )
@@ -323,7 +323,7 @@ cc_library(
     srcs = ["ir/dialect_registration.cc"],
     deps = [
         ":tensorflow",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
     alwayslink = 1,
 )
@@ -348,15 +348,18 @@ cc_library(
         ":tensorflow",
         ":tensorflow_passes",
         "//tensorflow/cc/saved_model:bundle_v2",
+        "//tensorflow/cc/saved_model:loader_lite",
         "//tensorflow/compiler/jit:shape_inference_helpers",
         "//tensorflow/compiler/mlir:op_or_arg_name_mapper",
         "//tensorflow/compiler/tf2xla:functionalize_control_flow",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/grappler/utils:transitive_fanin",
         "//tensorflow/core/platform:types",
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/algorithm:container",
@@ -365,12 +368,12 @@ cc_library(
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardDialectRegistration",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardDialectRegistration",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -387,7 +390,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -417,11 +420,11 @@ cc_library(
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardDialectRegistration",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardDialectRegistration",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -444,8 +447,8 @@ cc_library(
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -454,10 +457,10 @@ cc_library(
     srcs = ["translate/translate_tf_dialect_op.cc"],
     deps = [
         ":export_tf_dialect_op",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:Translation",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Translation",
     ],
     alwayslink = 1,
 )
@@ -474,9 +477,9 @@ cc_library(
         "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
     ],
     alwayslink = 1,
 )
@@ -513,7 +516,7 @@ cc_library(
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -529,9 +532,9 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -546,8 +549,8 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -565,8 +568,8 @@ cc_library(
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -581,7 +584,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/stream_executor/lib",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -603,8 +606,8 @@ cc_library(
     hdrs = ["utils/error_util.h"],
     deps = [
         "//tensorflow/core:lib",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -626,13 +629,14 @@ cc_library(
         "//tensorflow/c:tf_status",
         "//tensorflow/c/eager:c_api",
         "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//tensorflow/stream_executor",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -642,7 +646,7 @@ cc_library(
     deps = [
         ":tensorflow_dialect_registration",
         ":tf_dialect_passes",
-        "@local_config_mlir//:StandardDialectRegistration",
+        "@llvm-project//mlir:StandardDialectRegistration",
     ],
 )
 
@@ -661,9 +665,9 @@ cc_library(
         "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
     ],
     alwayslink = 1,
 )
@@ -690,26 +694,23 @@ cc_library(
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
     ],
 )
 
 cc_library(
     name = "translate_lib",
-    srcs = [
-        "translate/tf_mlir_translate.cc",
-    ],
-    hdrs = [
-        "translate/tf_mlir_translate.h",
-    ],
+    srcs = ["translate/tf_mlir_translate.cc"],
+    hdrs = ["translate/tf_mlir_translate.h"],
     deps = [
         ":convert_graphdef",
         ":error_util",
         ":import_utils",
         ":mangling_util",
         ":mlir_roundtrip_flags",
+        "//tensorflow/cc/saved_model:bundle_v2",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib_proto_parsing",
         "//tensorflow/core:ops",
@@ -718,10 +719,10 @@ cc_library(
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Parser",
-        "@local_config_mlir//:Pass",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Pass",
     ],
 )
 
@@ -734,7 +735,7 @@ cc_library(
         "translate/tf_mlir_translate_cl.h",
     ],
     deps = [
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -751,9 +752,9 @@ cc_library(
         ":translate_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Translation",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Translation",
     ],
     alwayslink = 1,
 )
@@ -768,8 +769,8 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -779,17 +780,17 @@ tf_native_cc_binary(
         "translate/derived_attr_populator_gen.cc",
     ],
     deps = [
-        "@llvm//:support",
-        "@llvm//:tablegen",
-        "@local_config_mlir//:TableGen",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:tablegen",
+        "@llvm-project//mlir:TableGen",
     ],
 )
 
 genrule(
     name = "derived_attr_populator_inc",
     srcs = [
-        "@local_config_mlir//:include/mlir/Analysis/CallInterfaces.td",
-        "@local_config_mlir//:include/mlir/IR/OpBase.td",
+        "@llvm-project//mlir:include/mlir/Analysis/CallInterfaces.td",
+        "@llvm-project//mlir:include/mlir/IR/OpBase.td",
         "ir/tf_generated_ops.td",
         "ir/tf_op_base.td",
         "ir/tf_ops.td",
@@ -798,7 +799,7 @@ genrule(
         "translate/derived_attr_populator.inc",
     ],
     cmd = ("$(location :derived_attr_populator_gen) " +
-           "-I external/local_config_mlir/include " +
+           "-I external/llvm-project/mlir/include " +
            "-I external/org_tensorflow " +
            "$(location //tensorflow/compiler/mlir/tensorflow:ir/tf_ops.td) " + " -o $@"),
     tools = [":derived_attr_populator_gen"],
@@ -819,11 +820,11 @@ gentbl(
             "transforms/generated_optimize.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/optimize.td",
     td_srcs = [
         ":tensorflow_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
     ],
 )
 
@@ -846,13 +847,13 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core/platform:logging",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Parser",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:TransformUtils",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:TransformUtils",
+        "@llvm-project//mlir:Transforms",
     ],
 )
 
@@ -903,11 +904,11 @@ gentbl(
             "transforms/generated_lower_tf.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/lower_tf.td",
     td_srcs = [
         ":tensorflow_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
     ],
 )
 
@@ -923,7 +924,8 @@ cc_library(
         ":lower_tf_inc_gen",
         ":tensorflow",
         "//tensorflow/core:framework",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
     alwayslink = 1,
 )
@@ -936,7 +938,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -949,7 +951,7 @@ tf_cc_test(
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -960,9 +962,9 @@ cc_library(
     deps = [
         "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:framework",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -978,9 +980,9 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -991,8 +993,8 @@ cc_library(
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core/platform:logging",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -1006,8 +1008,8 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/platform:test",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -1017,9 +1019,9 @@ cc_library(
     hdrs = ["utils/bridge_logger.h"],
     deps = [
         ":dump_mlir_util",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
     ],
 )
 
@@ -1032,9 +1034,9 @@ cc_library(
         "//tensorflow/compiler/tf2xla:resource_operation_table",
         "//tensorflow/core:framework",
         "@com_google_absl//absl/strings",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
     ],
 )
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
index 36a2560b7c8..785f8e7f966 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
@@ -26,16 +26,16 @@ limitations under the License.
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -84,17 +84,17 @@ int64_t FindPassthroughArgumentForReturnValue(int64_t return_index,
                                               FuncOp func_op) {
   auto value =
       func_op.getBody().front().getTerminator()->getOperand(return_index);
-  assert(mlir::getElementTypeOrSelf(value->getType()).isa<TF::ResourceType>());
+  assert(mlir::getElementTypeOrSelf(value.getType()).isa<TF::ResourceType>());
   int64_t arg_index = -1;
-  auto try_parse_arg_index = [&arg_index](Value* v) {
-    auto resource_arg = llvm::dyn_cast<BlockArgument>(v);
-    if (resource_arg) arg_index = resource_arg->getArgNumber();
+  auto try_parse_arg_index = [&arg_index](Value v) {
+    auto resource_arg = v.dyn_cast<BlockArgument>();
+    if (resource_arg) arg_index = resource_arg.getArgNumber();
     return arg_index;
   };
   while (try_parse_arg_index(value) == -1) {
-    auto op = value->getDefiningOp();
+    auto op = value.getDefiningOp();
     assert(op);
-    int64_t res_num = llvm::dyn_cast<OpResult>(value)->getResultNumber();
+    int64_t res_num = value.cast<OpResult>().getResultNumber();
     if (auto graph = llvm::dyn_cast<tf_executor::GraphOp>(op)) {
       value = graph.GetFetch().getOperand(res_num);
     } else if (auto island = llvm::dyn_cast<tf_executor::IslandOp>(op)) {
@@ -126,13 +126,13 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
   // Before having that, we assume function arguments do not alias each other.
   int64_t next_unique_id = 0;
   for (auto arg : func_op.getArguments()) {
-    if (!mlir::getElementTypeOrSelf(arg->getType()).isa<TF::ResourceType>())
+    if (!mlir::getElementTypeOrSelf(arg.getType()).isa<TF::ResourceType>())
       continue;
     resource_value_to_ids_[arg].insert(next_unique_id++);
   }
   llvm::StringMap<int64_t> var_handle_name_id_map;
-  auto forward_input_to_output = [&](Value* operand, Value* result) {
-    if (!mlir::getElementTypeOrSelf(result->getType()).isa<TF::ResourceType>())
+  auto forward_input_to_output = [&](Value operand, Value result) {
+    if (!mlir::getElementTypeOrSelf(result.getType()).isa<TF::ResourceType>())
       return;
     auto& result_ids = resource_value_to_ids_[result];
     auto operand_it = resource_value_to_ids_.find(operand);
@@ -161,8 +161,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
       // analysis. Inside that block, we can still treat its block arguments as
       // different resources.
       for (auto arg : replicate.GetBody().getArguments()) {
-        if (mlir::getElementTypeOrSelf(arg->getType())
-                .isa<TF::ResourceType>()) {
+        if (mlir::getElementTypeOrSelf(arg.getType()).isa<TF::ResourceType>()) {
           resource_value_to_ids_[arg].insert(next_unique_id++);
         }
       }
@@ -171,7 +170,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
       // If a result is a passthrough of the body input, use the corresponding
       // operand's resource IDs.
       for (auto result : llvm::enumerate(while_op.getResults())) {
-        if (!mlir::getElementTypeOrSelf(result.value()->getType())
+        if (!mlir::getElementTypeOrSelf(result.value().getType())
                  .isa<TF::ResourceType>()) {
           continue;
         }
@@ -192,7 +191,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
       // If a result is a passthrough of both branches' inputs, merge the
       // resource IDs of corresponding operands for the two inputs.
       for (auto result : llvm::enumerate(if_op.getResults())) {
-        if (!mlir::getElementTypeOrSelf(result.value()->getType())
+        if (!mlir::getElementTypeOrSelf(result.value().getType())
                  .isa<TF::ResourceType>()) {
           continue;
         }
@@ -211,7 +210,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
       }
     } else {
       for (auto result : op->getResults()) {
-        if (!mlir::getElementTypeOrSelf(result->getType())
+        if (!mlir::getElementTypeOrSelf(result.getType())
                  .isa<TF::ResourceType>())
           continue;
         resource_value_to_ids_[result].insert(kUnknownResourceId);
@@ -220,7 +219,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
   });
 }
 
-bool ResourceAliasAnalysis::IsUnknownResource(const Value* resource) const {
+bool ResourceAliasAnalysis::IsUnknownResource(const Value resource) const {
   auto it = resource_value_to_ids_.find(resource);
   assert(it != resource_value_to_ids_.end() && !it->getSecond().empty());
   // The set is sorted so we only need to check the first element since
@@ -231,7 +230,7 @@ bool ResourceAliasAnalysis::IsUnknownResource(const Value* resource) const {
 }
 
 const llvm::SmallSet<int64_t, 8>& ResourceAliasAnalysis::GetResourceUniqueIds(
-    const Value* resource) const {
+    const Value resource) const {
   auto it = resource_value_to_ids_.find(resource);
   assert(it != resource_value_to_ids_.end() && "Unseen resource was queried");
   return it->getSecond();
@@ -253,14 +252,14 @@ llvm::SmallDenseSet<int64_t, 8> FindAccessedResources(
   llvm::SmallDenseSet<int64_t, 8> resources;
 
   for (auto operand : op->getOperands()) {
-    if (!mlir::getElementTypeOrSelf(operand->getType()).isa<TF::ResourceType>())
+    if (!mlir::getElementTypeOrSelf(operand.getType()).isa<TF::ResourceType>())
       continue;
     if (alias_analysis.IsUnknownResource(operand)) return UnknownResourceSet();
     const auto& ids = alias_analysis.GetResourceUniqueIds(operand);
     resources.insert(ids.begin(), ids.end());
   }
   for (auto result : op->getResults()) {
-    if (!mlir::getElementTypeOrSelf(result->getType()).isa<TF::ResourceType>())
+    if (!mlir::getElementTypeOrSelf(result.getType()).isa<TF::ResourceType>())
       continue;
     if (alias_analysis.IsUnknownResource(result)) return UnknownResourceSet();
     const auto& ids = alias_analysis.GetResourceUniqueIds(result);
@@ -310,7 +309,21 @@ bool OpIsKnownToHaveNoSideEffect(Operation* op) {
   if (auto while_op = llvm::dyn_cast<TF::WhileOp>(op)) {
     return while_op.is_stateless();
   }
-  return false;
+
+  // Try to get the statefulness flag from the registry.
+  //
+  // TODO(yuanzx): Remove this after all ops are defined in the dialect.
+  if (op->getName().getDialect() !=
+      TF::TensorFlowDialect::getDialectNamespace()) {
+    return false;
+  }
+  StringRef op_name = op->getName().getStringRef();
+  // Drop the `tf.` prefix to query TF registry.
+  auto node_name =
+      op_name.drop_front(TensorFlowDialect::getDialectNamespace().size() + 1);
+  const tensorflow::OpRegistrationData* op_reg_data =
+      tensorflow::OpRegistry::Global()->LookUp(node_name.data());
+  return op_reg_data && !op_reg_data->op_def.is_stateful();
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
index 98df0941340..9457a3e8c6d 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
@@ -22,10 +22,10 @@ limitations under the License.
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Region.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Region.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TF {
@@ -42,12 +42,12 @@ class ResourceAliasAnalysis {
   ResourceAliasAnalysis(ResourceAliasAnalysis&&) = default;
 
   // Returns if the analysis fails to resolve a resource-type value.
-  bool IsUnknownResource(const Value* resource) const;
+  bool IsUnknownResource(const Value resource) const;
 
   // Returns the set unique IDs which `resource` could alias. Requires that
   // IsUnknownResource(resource) == true.
   const llvm::SmallSet<int64_t, 8>& GetResourceUniqueIds(
-      const Value* resource) const;
+      const Value resource) const;
 
  private:
   ResourceAliasAnalysis() = default;
@@ -56,7 +56,7 @@ class ResourceAliasAnalysis {
   void AnalyzeFunction(FuncOp func_op);
 
   // Maps each resource-type value to a set of unique IDs that it could alias.
-  llvm::SmallDenseMap<const Value*, llvm::SmallSet<int64_t, 8>, 8>
+  llvm::SmallDenseMap<Value, llvm::SmallSet<int64_t, 8>, 8>
       resource_value_to_ids_;
 };
 
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.cc
index 08712a7929b..e4b797d349a 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.cc
@@ -18,9 +18,9 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
 
-#include "mlir/IR/DialectImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
+#include "mlir/IR/DialectImplementation.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TFControlFlow {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h
index d3cf173473b..59a1cc21b28 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h
@@ -23,9 +23,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_CONTROL_FLOW_OPS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_CONTROL_FLOW_OPS_H_
 
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TFControlFlow {
@@ -90,8 +90,8 @@ class EnterOp
 
   static StringRef getOperationName() { return "_tf.Enter"; }
 
-  Value *getData() { return getOperand(0); }
-  void setData(Value *value) { setOperand(0, value); }
+  Value getData() { return getOperand(0); }
+  void setData(Value value) { setOperand(0, value); }
 
   LogicalResult verify();
 };
@@ -172,8 +172,8 @@ class NextIterationSinkOp
 
   static StringRef getOperationName() { return "_tf.NextIteration.sink"; }
 
-  Value *getData() { return getOperand(0); }
-  void setData(Value *value) { setOperand(0, value); }
+  Value getData() { return getOperand(0); }
+  void setData(Value value) { setOperand(0, value); }
 
   LogicalResult verify();
 };
@@ -202,8 +202,8 @@ class LoopCondOp
   using Op::Op;
   static StringRef getOperationName() { return "_tf.LoopCond"; }
 
-  Value *getData() { return getOperand(0); }
-  void setData(Value *value) { setOperand(0, value); }
+  Value getData() { return getOperand(0); }
+  void setData(Value value) { setOperand(0, value); }
 
   LogicalResult verify();
 };
@@ -233,11 +233,11 @@ class SwitchOp : public Op<SwitchOp, OpTrait::AtLeastNOperands<2>::Impl,
 
   static StringRef getOperationName() { return "_tf.Switch"; }
 
-  Value *getData() { return getOperand(0); }
-  void setData(Value *value) { setOperand(0, value); }
+  Value getData() { return getOperand(0); }
+  void setData(Value value) { setOperand(0, value); }
 
-  Value *getPredicate() { return getOperand(1); }
-  void setPredicate(Value *value) { setOperand(1, value); }
+  Value getPredicate() { return getOperand(1); }
+  void setPredicate(Value value) { setOperand(1, value); }
 
   LogicalResult verify();
 };
@@ -266,8 +266,8 @@ class ExitOp : public Op<ExitOp, OpTrait::AtLeastNOperands<1>::Impl,
   using Op::Op;
   static StringRef getOperationName() { return "_tf.Exit"; }
 
-  Value *getData() { return getOperand(0); }
-  void setData(Value *value) { setOperand(0, value); }
+  Value getData() { return getOperand(0); }
+  void setData(Value value) { setOperand(0, value); }
 
   LogicalResult verify();
 };
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
index ffba86e78ff..b313b06bd3b 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
@@ -25,19 +25,19 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SMLoc.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Support/STLExtras.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Support/STLExtras.h"  // TF:llvm-project
 #include "tensorflow/core/platform/logging.h"
 
 namespace mlir {
@@ -183,12 +183,12 @@ void Print(ReplicateOp op, OpAsmPrinter* p) {
   if (op.getNumOperands()) {
     *p << '(';
     Block& block = op.body().front();
-    interleaveComma(block.getArguments(), *p, [&](BlockArgument* arg) {
-      const int block_arg_num = arg->getArgNumber();
+    interleaveComma(block.getArguments(), *p, [&](BlockArgument arg) {
+      const int block_arg_num = arg.getArgNumber();
       *p << '[';
       p->printOperands(std::next(op.operand_begin(), block_arg_num * n),
                        std::next(op.operand_begin(), (block_arg_num + 1) * n));
-      *p << "] as " << *arg << ": " << arg->getType();
+      *p << "] as " << arg << ": " << arg.getType();
     });
     *p << ')';
   }
@@ -229,13 +229,13 @@ LogicalResult Verify(ReplicateOp op) {
 
   // Check replicated input types match block argument types.
   for (auto block_arg : block.getArguments()) {
-    Type block_arg_type = block_arg->getType();
-    for (int i = n * block_arg->getArgNumber(), e = i + n; i < e; ++i)
+    Type block_arg_type = block_arg.getType();
+    for (int i = n * block_arg.getArgNumber(), e = i + n; i < e; ++i)
       if (failed(VerifyCompatibleTypes(block_arg_type,
-                                       op.getOperand(i)->getType())))
+                                       op.getOperand(i).getType())))
         return op.emitOpError()
                << "incompatible types for operand " << i
-               << " and block argument " << block_arg->getArgNumber();
+               << " and block argument " << block_arg.getArgNumber();
   }
 
   Operation& terminator = block.back();
@@ -280,9 +280,9 @@ void BuildReplicateOp(
 
   for (auto& replicated_input : replicated_inputs) {
     DCHECK_EQ(llvm::size(replicated_input.first), n);
-    for (auto* input : replicated_input.first) {
+    for (auto input : replicated_input.first) {
       DCHECK(succeeded(
-          VerifyCompatibleTypes(input->getType(), replicated_input.second)));
+          VerifyCompatibleTypes(input.getType(), replicated_input.second)));
       state->addOperands(input);
     }
     block.addArgument(replicated_input.second);
@@ -296,7 +296,7 @@ void BuildReplicateOp(
 void ReplicateOp::build(
     Builder* builder, OperationState& state, int n,
     llvm::ArrayRef<llvm::StringRef> devices,
-    llvm::ArrayRef<std::pair<llvm::ArrayRef<Value*>, Type>> replicated_inputs,
+    llvm::ArrayRef<std::pair<llvm::ArrayRef<Value>, Type>> replicated_inputs,
     llvm::ArrayRef<Type> replica_output_types) {
   BuildReplicateOp(builder, &state, n, devices, replicated_inputs,
                    replica_output_types);
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h
index 91370bc6501..a500af45c44 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h
@@ -19,8 +19,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_DEVICE_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_DEVICE_H_
 
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
 
 namespace mlir {
 namespace tf_device {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td
index 403932ed9a8..88cc08aca6d 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td
@@ -185,7 +185,7 @@ For example:
   let builders = [
     OpBuilder<"Builder* builder, OperationState& state, int n, "
               "llvm::ArrayRef<llvm::StringRef> devices, "
-              "llvm::ArrayRef<std::pair<llvm::ArrayRef<Value*>, Type>>"
+              "llvm::ArrayRef<std::pair<llvm::ArrayRef<Value>, Type>>"
               "    replicated_inputs, "
               "llvm::ArrayRef<Type> replica_output_types">,
     OpBuilder<"Builder* builder, OperationState& state, int n, "
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
index 5a018a39fd7..13dc2993371 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
@@ -26,23 +26,23 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Traits.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/DialectImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Transforms/FoldUtils.h"  // TF:local_config_mlir
-#include "mlir/Transforms/InliningUtils.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/Dialect/Traits.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/DialectImplementation.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Transforms/FoldUtils.h"  // TF:llvm-project
+#include "mlir/Transforms/InliningUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
 namespace mlir {
@@ -167,7 +167,7 @@ namespace {
 LogicalResult VerifyControlOperandsAfterAllData(Operation *op) {
   bool found_control = false;
   for (int operand_idx : llvm::seq<int>(0, op->getNumOperands())) {
-    if (op->getOperand(operand_idx)->getType().isa<ControlType>()) {
+    if (op->getOperand(operand_idx).getType().isa<ControlType>()) {
       found_control = true;
       continue;
     }
@@ -216,9 +216,9 @@ LogicalResult Verify(GraphOp graph) {
     return fetch.emitOpError() << "does not have enough operands to cover the "
                                   "graph returned values";
   for (int i : llvm::seq<int>(0, fetch.getNumOperands())) {
-    Value *operand = fetch.getOperand(i);
+    Value operand = fetch.getOperand(i);
     // Break out of the loop at the first control operand encountered.
-    if (operand->getType().isa<ControlType>()) {
+    if (operand.getType().isa<ControlType>()) {
       if (i != graph.getNumResults())
         return fetch.emitOpError()
                << "operand #" << i
@@ -228,7 +228,7 @@ LogicalResult Verify(GraphOp graph) {
     if (i >= graph.getNumResults())
       return fetch.emitOpError()
              << "operand #" << i << " does not have a graph results to bind";
-    if (graph.getResult(i)->getType() != operand->getType())
+    if (graph.getResult(i).getType() != operand.getType())
       return fetch.emitOpError()
              << "operand #" << i << " type mismatch graph results";
   }
@@ -331,8 +331,8 @@ LogicalResult Verify(IslandOp island) {
            << "has " << yield.getNumOperands()
            << " operand, but island returns " << result_count;
   for (int operand_idx : llvm::seq<int>(0, yield.getNumOperands())) {
-    if (island.getResult(operand_idx)->getType() !=
-        yield.getOperand(operand_idx)->getType())
+    if (island.getResult(operand_idx).getType() !=
+        yield.getOperand(operand_idx).getType())
       return yield.emitOpError()
              << "operand #" << operand_idx << " type mismatch island results";
   }
@@ -340,7 +340,7 @@ LogicalResult Verify(IslandOp island) {
   // Check that there aren't any control results other than the last one.
   Type control_type = ControlType::get(island.getContext());
   for (int operand_idx : llvm::seq<int>(0, island.getNumResults() - 1)) {
-    if (island.getResult(operand_idx)->getType() == control_type)
+    if (island.getResult(operand_idx).getType() == control_type)
       return yield.emitOpError()
              << "unexpected control type for operand #" << operand_idx;
   }
@@ -503,12 +503,12 @@ ParseResult ParseSwitchOp(OpAsmParser &parser, OperationState &result) {
 void Print(SwitchOp switch_op, OpAsmPrinter &p) {
   p << switch_op.getOperationName() << ' ';
   p.printOperands(switch_op.getOperands());
-  Type data_operand_ty = switch_op.data()->getType();
+  Type data_operand_ty = switch_op.data().getType();
   // If the types aren't perfectly matching, print the functional type syntax
   // else print the shorter single type.
   p << " : ";
-  if (switch_op.trueOutput()->getType() != data_operand_ty ||
-      switch_op.falseOutput()->getType() != data_operand_ty) {
+  if (switch_op.trueOutput().getType() != data_operand_ty ||
+      switch_op.falseOutput().getType() != data_operand_ty) {
     p.printFunctionalType(switch_op.getOperation());
   } else {
     p << switch_op.getType(0);
@@ -535,12 +535,12 @@ LogicalResult Verify(SwitchNOp switchn) {
            << "expect `num_outs` (" << num_outs.getInt() << ") results but got "
            << (switchn.getNumResults() - 1);
 
-  auto operand0_type = switchn.getOperand(0)->getType();
-  for (Value *result : switchn.outputs())
-    if (operand0_type != result->getType())
+  auto operand0_type = switchn.getOperand(0).getType();
+  for (Value result : switchn.outputs())
+    if (operand0_type != result.getType())
       return switchn.emitOpError()
              << "type mismatch between data operand and result: "
-             << operand0_type << " vs " << result->getType();
+             << operand0_type << " vs " << result.getType();
 
   return success();
 }
@@ -616,12 +616,12 @@ LogicalResult Verify(MergeOp merge) {
   if (!merge.getNumOperands())
     return merge.emitOpError() << "expects at least one operand";
 
-  Type data_type = merge.getOperand(0)->getType();
+  Type data_type = merge.getOperand(0).getType();
   if (data_type.isa<ControlType>())
     return merge.emitOpError() << "expects a non-control input";
 
   // Check that each operand can be individually broadcasted to the output type.
-  Type output_type = merge.output()->getType();
+  Type output_type = merge.output().getType();
   TensorType output_tensor_ty = output_type.dyn_cast<TensorType>();
   if (!output_tensor_ty) {
     return merge.emitOpError()
@@ -666,7 +666,7 @@ void Print(MergeOp merge, OpAsmPrinter &p) {
   bool use_short_form = true;
   int num_data_operands = 0;
 
-  Type output_type = merge.output()->getType();
+  Type output_type = merge.output().getType();
   for (Type operand_type : merge.getOperandTypes()) {
     if (operand_type.isa<ControlType>()) break;
     num_data_operands++;
@@ -750,7 +750,7 @@ void Print(EnterOp enter, OpAsmPrinter &p) {
   // If the types aren't perfectly matching, print the functional type syntax
   // else print the shorter single type.
   p << " : ";
-  if (enter.data()->getType() != enter.output()->getType()) {
+  if (enter.data().getType() != enter.output().getType()) {
     p.printFunctionalType(enter.getOperation());
   } else {
     p << enter.getType(0);
@@ -824,10 +824,10 @@ ParseResult ParseEnterOp(OpAsmParser &parser, OperationState &result) {
 namespace {
 
 LogicalResult Verify(NextIterationSourceOp source) {
-  Value *token = source.token();
-  if (!token->hasOneUse())
+  Value token = source.token();
+  if (!token.hasOneUse())
     return source.emitOpError() << "expects a single user for produced token";
-  if (!isa<NextIterationSinkOp>(*token->user_begin()))
+  if (!isa<NextIterationSinkOp>(*token.user_begin()))
     return source.emitOpError() << "token should be consumed by a sink op";
   return success();
 }
@@ -858,8 +858,8 @@ ParseResult ParseNextIterationSourceOp(OpAsmParser &parser,
 namespace {
 
 LogicalResult Verify(NextIterationSinkOp sink) {
-  Value *token = sink.token();
-  Operation *definingOp = token->getDefiningOp();
+  Value token = sink.token();
+  Operation *definingOp = token.getDefiningOp();
   if (!definingOp)
     return sink.emitOpError() << "expects a token directly produced by a "
                                  "tf_executor.NextIteration.Source op: ";
@@ -867,11 +867,11 @@ LogicalResult Verify(NextIterationSinkOp sink) {
   if (!source)
     return sink.emitOpError() << "expects a token produced by a "
                                  "tf_executor.NextIteration.Source op: ";
-  if (source.output()->getType() != sink.input()->getType())
+  if (source.output().getType() != sink.input().getType())
     return sink.emitOpError()
-           << "input type " << sink.input()->getType()
+           << "input type " << sink.input().getType()
            << " mismatch the tf_executor.NextIteration.Source output type: "
-           << source.output()->getType();
+           << source.output().getType();
   return success();
 }
 
@@ -880,7 +880,7 @@ void Print(NextIterationSinkOp next_iteration, OpAsmPrinter &p) {
   p.printOperand(next_iteration.getOperand(0));
   p << "] ";
   p.printOperands(llvm::drop_begin(next_iteration.getOperands(), 1));
-  p << " : " << next_iteration.getOperand(1)->getType();
+  p << " : " << next_iteration.getOperand(1).getType();
   p.printOptionalAttrDict(next_iteration.getAttrs());
 }
 
@@ -980,11 +980,11 @@ void Print(LoopCondOp loop_cond, OpAsmPrinter &p) {
   p.printOperands(loop_cond.getOperands());
 
   // If the types aren't matching (broadcast), print the functional type syntax.
-  if (loop_cond.input()->getType() != loop_cond.output()->getType()) {
+  if (loop_cond.input().getType() != loop_cond.output().getType()) {
     p << " : ";
     p.printFunctionalType(loop_cond.getOperation());
   } else {
-    p << " : " << loop_cond.input()->getType();
+    p << " : " << loop_cond.input().getType();
   }
 
   p.printOptionalAttrDict(loop_cond.getAttrs());
@@ -1087,18 +1087,18 @@ struct HoistInnerOpsSingleIslandGraph : public OpRewritePattern<GraphOp> {
     YieldOp yield_op = island_op.GetYield();
 
     // Map graph results to inner ops results of single island.
-    llvm::SmallVector<Value *, 8> new_rets;
-    for (Value *operand : fetch_op.fetches()) {
+    llvm::SmallVector<Value, 8> new_rets;
+    for (Value operand : fetch_op.fetches()) {
       // Control results should not be propagated out.
-      if (operand->getType().isa<ControlType>()) break;
+      if (operand.getType().isa<ControlType>()) break;
 
-      if (operand->getDefiningOp() != island_op) {
+      if (operand.getDefiningOp() != island_op) {
         // Operand is not from island, simply propagate it out.
         new_rets.push_back(operand);
       } else {
         // Lookup yield operand in island for inner op result.
-        auto result = llvm::cast<OpResult>(operand);
-        new_rets.push_back(yield_op.getOperand(result->getResultNumber()));
+        auto result = operand.cast<OpResult>();
+        new_rets.push_back(yield_op.getOperand(result.getResultNumber()));
       }
     }
 
@@ -1138,7 +1138,7 @@ struct DropEmptyIslandNoOperandNoDataResult
         !HasSingleOpInBlock<YieldOp>(&op.GetBody()))
       return matchFailure();
 
-    for (auto &use : llvm::make_early_inc_range(op.control()->getUses()))
+    for (auto &use : llvm::make_early_inc_range(op.control().getUses()))
       use.getOwner()->eraseOperand(use.getOperandNumber());
 
     rewriter.eraseOp(op);
@@ -1158,7 +1158,7 @@ struct DropEmptyIslandNoOperandOneDataResult
   PatternMatchResult matchAndRewrite(IslandOp op,
                                      PatternRewriter &rewriter) const override {
     if (op.getNumOperands() != 0 || op.getNumResults() != 2 ||
-        !op.control()->use_empty() ||
+        !op.control().use_empty() ||
         !HasSingleOpInBlock<YieldOp>(&op.GetBody()))
       return matchFailure();
 
@@ -1193,7 +1193,7 @@ struct DropEmptyControlTrigger : public OpRewritePattern<ControlTriggerOp> {
                                      PatternRewriter &rewriter) const override {
     if (op.getNumOperands() != 0) return matchFailure();
 
-    for (auto &use : llvm::make_early_inc_range(op.control()->getUses()))
+    for (auto &use : llvm::make_early_inc_range(op.control().getUses()))
       use.getOwner()->eraseOperand(use.getOperandNumber());
 
     rewriter.eraseOp(op);
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h
index 8df3ecb2559..b7d8549ece7 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h
@@ -21,13 +21,13 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_EXECUTOR_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_EXECUTOR_H_
 
-#include "mlir/Dialect/Traits.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/Dialect/Traits.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td
index 0f243957869..3922981bd50 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor_ops.td
@@ -460,7 +460,7 @@ def TfExecutor_NextIterationSourceOp : TfExecutor_Op<"NextIteration.Source",
 
   let extraClassDeclaration = [{
     NextIterationSinkOp GetSink() {
-      return cast<NextIterationSinkOp>(*token()->user_begin());
+      return cast<NextIterationSinkOp>(*token().user_begin());
     }
   }];
 
@@ -514,8 +514,8 @@ def TfExecutor_NextIterationSinkOp : TfExecutor_Op<"NextIteration.Sink",
   );
 
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *token, "
-    "ArrayRef<Value *> operands, ArrayRef<NamedAttribute> attributes = {}",
+    "Builder *builder, OperationState &result, Value token, "
+    "ArrayRef<Value> operands, ArrayRef<NamedAttribute> attributes = {}",
     [{
       assert(operands.size() >= 1 && "tf_executor.NextIteration.Sink builder "
              "expects at least one operand");
@@ -594,7 +594,7 @@ def TfExecutor_ControlTriggerOp : TfExecutor_Op<"ControlTrigger",
 
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, "
-    "ArrayRef<Value *> operands, ArrayRef<NamedAttribute> attributes = {}",
+    "ArrayRef<Value> operands, ArrayRef<NamedAttribute> attributes = {}",
     [{
       assert(operands.size() >= 1 && "tf_executor.ControlTrigger builder "
              "expects at least one operand");
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
index 7257a1ba8f0..9b3d749864c 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@@ -821,6 +821,40 @@ def TF_ConcatOp : TF_Op<"Concat", [NoSideEffect]> {
   }];
 }
 
+def TF_ConcatOffsetOp : TF_Op<"ConcatOffset", [NoSideEffect]> {
+  let summary = "Computes offsets of concat inputs within its output.";
+
+  let description = [{
+For example:
+
+```
+# 'x' is [2, 2, 7]
+# 'y' is [2, 3, 7]
+# 'z' is [2, 5, 7]
+concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0]
+```
+
+This is typically used by gradient computations for a concat operation.
+  }];
+
+  let arguments = (ins
+    I32Tensor:$concat_dim,
+    Variadic<I32Tensor>:$shape
+  );
+
+  let results = (outs
+    Variadic<I32Tensor>:$offset
+  );
+
+  TF_DerivedOperandSizeAttr N = TF_DerivedOperandSizeAttr<1>;
+
+  let verifier = [{
+    return Verify(*this);
+  }];
+
+  let hasFolder = 1;
+}
+
 def TF_ConcatV2Op : TF_Op<"ConcatV2", [NoSideEffect]> {
   let summary = "Concatenates tensors along one dimension.";
 
@@ -1350,6 +1384,10 @@ as illustrated on the following example:
 
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>;
   TF_DerivedOperandSizeAttr N = TF_DerivedOperandSizeAttr<0>;
+
+  let verifier = [{
+    return Verify(*this);
+  }];
 }
 
 def TF_EinsumOp : TF_Op<"Einsum", [NoSideEffect]> {
@@ -1506,8 +1544,8 @@ tf.math.equal(x, y) ==> array([True,  True])
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 
   let builders = [
-    OpBuilder<"Builder* builder, OperationState& result, Value* x, "
-              "Value* y, BoolAttr incompatible_shape_error">
+    OpBuilder<"Builder* builder, OperationState& result, Value  x, "
+              "Value  y, BoolAttr incompatible_shape_error">
   ];
 
   let verifier = [{
@@ -1607,6 +1645,11 @@ size 1.
 
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
   TF_DerivedOperandTypeAttr Tdim = TF_DerivedOperandTypeAttr<1>;
+
+  let builders = [
+    OpBuilder<"Builder* builder, OperationState& result, Value  condition, "
+              "Value  dim">
+  ];
 }
 
 def TF_FakeQuantWithMinMaxArgsOp : TF_Op<"FakeQuantWithMinMaxArgs", [NoSideEffect, SameOperandsAndResultType]> {
@@ -1883,6 +1926,102 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors.
   }];
 }
 
+def TF_FusedBatchNormGradOp : TF_Op<"FusedBatchNormGrad", [NoSideEffect]> {
+  let summary = "Gradient for batch normalization.";
+
+  let description = [{
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+  }];
+
+  let arguments = (ins
+    F32Tensor:$y_backprop,
+    F32Tensor:$x,
+    F32Tensor:$scale,
+    F32Tensor:$reserve_space_1,
+    F32Tensor:$reserve_space_2,
+
+    DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
+    DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
+    DefaultValuedAttr<BoolAttr, "true">:$is_training
+  );
+
+  let results = (outs
+    F32Tensor:$x_backprop,
+    F32Tensor:$scale_backprop,
+    F32Tensor:$offset_backprop,
+    F32Tensor:$reserve_space_3,
+    F32Tensor:$reserve_space_4
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_FusedBatchNormGradV2Op : TF_Op<"FusedBatchNormGradV2", [NoSideEffect]> {
+  let summary = "Gradient for batch normalization.";
+
+  let description = [{
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+  }];
+
+  let arguments = (ins
+    TensorOf<[BF16, F16, F32]>:$y_backprop,
+    TensorOf<[BF16, F16, F32]>:$x,
+    F32Tensor:$scale,
+    F32Tensor:$reserve_space_1,
+    F32Tensor:$reserve_space_2,
+
+    DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
+    DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
+    DefaultValuedAttr<BoolAttr, "true">:$is_training
+  );
+
+  let results = (outs
+    TensorOf<[BF16, F16, F32]>:$x_backprop,
+    F32Tensor:$scale_backprop,
+    F32Tensor:$offset_backprop,
+    F32Tensor:$reserve_space_3,
+    F32Tensor:$reserve_space_4
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+  TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<3>;
+}
+
+def TF_FusedBatchNormGradV3Op : TF_Op<"FusedBatchNormGradV3", [NoSideEffect]> {
+  let summary = "Gradient for batch normalization.";
+
+  let description = [{
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+  }];
+
+  let arguments = (ins
+    TensorOf<[BF16, F16, F32]>:$y_backprop,
+    TensorOf<[BF16, F16, F32]>:$x,
+    F32Tensor:$scale,
+    F32Tensor:$reserve_space_1,
+    F32Tensor:$reserve_space_2,
+    F32Tensor:$reserve_space_3,
+
+    DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
+    DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
+    DefaultValuedAttr<BoolAttr, "true">:$is_training
+  );
+
+  let results = (outs
+    TensorOf<[BF16, F16, F32]>:$x_backprop,
+    F32Tensor:$scale_backprop,
+    F32Tensor:$offset_backprop,
+    F32Tensor:$reserve_space_4,
+    F32Tensor:$reserve_space_5
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+  TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<3>;
+}
+
 def TF_FusedBatchNormV3Op : TF_Op<"FusedBatchNormV3", [NoSideEffect]> {
   let summary = "Batch normalization.";
 
@@ -2455,6 +2594,55 @@ def TF_LeakyReluOp : TF_Op<"LeakyRelu", [NoSideEffect, SameOperandsAndResultType
   let hasFolder = 1;
 }
 
+def TF_LeftShiftOp : TF_Op<"LeftShift", [Broadcastable, NoSideEffect]>,
+                     WithBroadcastableBinOpBuilder {
+  let summary = "Elementwise computes the bitwise left-shift of `x` and `y`.";
+
+  let description = [{
+If `y` is negative, or greater than or equal to the width of `x` in bits the
+result is implementation defined.
+
+Example:
+
+```python
+import tensorflow as tf
+from tensorflow.python.ops import bitwise_ops
+import numpy as np
+dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
+
+for dtype in dtype_list:
+  lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
+  rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
+
+  left_shift_result = bitwise_ops.left_shift(lhs, rhs)
+
+  print(left_shift_result)
+
+# This will print:
+# tf.Tensor([ -32   -5 -128    0], shape=(4,), dtype=int8)
+# tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int16)
+# tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int32)
+# tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int64)
+
+lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
+rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
+bitwise_ops.left_shift(lhs, rhs)
+# <tf.Tensor: shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
+```
+  }];
+
+  let arguments = (ins
+    TF_IntTensor:$x,
+    TF_IntTensor:$y
+  );
+
+  let results = (outs
+    TF_IntTensor:$z
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_LessOp : TF_Op<"Less", [Broadcastable, NoSideEffect]>,
                 WithBroadcastableCmpOpBuilder {
   let summary = "Returns the truth value of (x < y) element-wise.";
@@ -2548,6 +2736,31 @@ tf.math.log(x) ==> [-inf, -0.6931472,  0. ,  1.609438]
   let hasCanonicalizer = 1;
 }
 
+def TF_Log1pOp : TF_Op<"Log1p", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = "Computes natural logarithm of (1 + x) element-wise.";
+
+  let description = [{
+I.e., \\(y = \log_e (1 + x)\\).
+
+Example:
+
+```python
+x = tf.constant([0, 0.5, 1, 5])
+tf.math.log1p(x) ==> [0., 0.4054651, 0.6931472, 1.7917595]
+```
+  }];
+
+  let arguments = (ins
+    TF_FpOrComplexTensor:$x
+  );
+
+  let results = (outs
+    TF_FpOrComplexTensor:$y
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_LogSoftmaxOp : TF_Op<"LogSoftmax", [NoSideEffect, SameOperandsAndResultType]> {
   let summary = "Computes log softmax activations.";
 
@@ -3165,8 +3378,8 @@ retained with length 1.
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
 
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *input, "
-    "Value *reduction_indices, BoolAttr keep_dims"
+    "Builder *builder, OperationState &result, Value input, "
+    "Value reduction_indices, BoolAttr keep_dims"
   >];
 }
 
@@ -3577,8 +3790,8 @@ def TF_NotEqualOp : TF_Op<"NotEqual", [Commutative, NoSideEffect]> {
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 
   let builders = [
-    OpBuilder<"Builder* builder, OperationState& result, Value* x, "
-              "Value* y, BoolAttr incompatible_shape_error">
+    OpBuilder<"Builder* builder, OperationState& result, Value  x, "
+              "Value  y, BoolAttr incompatible_shape_error">
   ];
 
   let verifier = [{
@@ -3695,6 +3908,12 @@ output =
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<2>;
   TF_DerivedOperandTypeAttr TI = TF_DerivedOperandTypeAttr<0>;
 
+  let builders = [
+    OpBuilder<"Builder* builder, OperationState& result, Value  indices, "
+              "Value  depth, Value  on_value, Value  off_value, "
+              "IntegerAttr axis">
+  ];
+
   let verifier = [{
     return Verify(*this);
   }];
@@ -4125,8 +4344,8 @@ tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<0>;
 
   let builders = [
-    OpBuilder<"Builder* builder, OperationState& result, Value* start, "
-              "Value* limit, Value* delta">
+    OpBuilder<"Builder* builder, OperationState& result, Value  start, "
+              "Value  limit, Value  delta">
   ];
 }
 
@@ -4160,7 +4379,7 @@ of the tensor. Rank is also known as "order", "degree", or "ndims."
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 
   let builders = [
-    OpBuilder<"Builder* builder, OperationState& result, Value* input">
+    OpBuilder<"Builder* builder, OperationState& result, Value  input">
   ];
 }
 
@@ -4396,7 +4615,7 @@ reshape(t, []) ==> 7
 
   let builders = [
     OpBuilder<
-      "Builder* builder, OperationState& result, Value* tensor, Value* shape">
+      "Builder* builder, OperationState& result, Value  tensor, Value  shape">
   ];
 
   let verifier = [{
@@ -4666,6 +4885,58 @@ reverse(t, dims) ==> [[[[8, 9, 10, 11],
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
 }
 
+def TF_RightShiftOp : TF_Op<"RightShift", [Broadcastable, NoSideEffect]>,
+                      WithBroadcastableBinOpBuilder {
+  let summary = "Elementwise computes the bitwise right-shift of `x` and `y`.";
+
+  let description = [{
+Performs a logical shift for unsigned integer types, and an arithmetic shift
+for signed integer types.
+
+If `y` is negative, or greater than or equal to than the width of `x` in bits
+the result is implementation defined.
+
+Example:
+
+```python
+import tensorflow as tf
+from tensorflow.python.ops import bitwise_ops
+import numpy as np
+dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
+
+for dtype in dtype_list:
+  lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
+  rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
+
+  right_shift_result = bitwise_ops.right_shift(lhs, rhs)
+
+  print(right_shift_result)
+
+# This will print:
+# tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int8)
+# tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int16)
+# tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int32)
+# tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int64)
+
+lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
+rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
+bitwise_ops.right_shift(lhs, rhs)
+# <tf.Tensor: shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
+```
+  }];
+
+  let arguments = (ins
+    TF_IntTensor:$x,
+    TF_IntTensor:$y
+  );
+
+  let results = (outs
+    TF_IntTensor:$z
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_RoundOp : TF_Op<"Round", [NoSideEffect, SameOperandsAndResultType]> {
   let summary = [{
 Rounds the values of a tensor to the nearest integer, element-wise.
@@ -4725,6 +4996,212 @@ is the corresponding input gradient.
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_SegmentMaxOp : TF_Op<"SegmentMax", [NoSideEffect]> {
+  let summary = "Computes the maximum along segments of a tensor.";
+
+  let description = [{
+Read
+[the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+for an explanation of segments.
+
+Computes a tensor such that
+\\(output_i = \max_j(data_j)\\) where `max` is over `j` such
+that `segment_ids[j] == i`.
+
+If the max is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMax.png" alt>
+</div>
+
+For example:
+
+```
+c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+tf.segment_max(c, tf.constant([0, 0, 1]))
+# ==> [[4, 3, 3, 4],
+#      [5, 6, 7, 8]]
+```
+  }];
+
+  let arguments = (ins
+    TF_IntOrFpTensor:$data,
+    TF_I32OrI64Tensor:$segment_ids
+  );
+
+  let results = (outs
+    TF_IntOrFpTensor:$output
+  );
+
+  TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_SegmentMeanOp : TF_Op<"SegmentMean", [NoSideEffect]> {
+  let summary = "Computes the mean along segments of a tensor.";
+
+  let description = [{
+Read
+[the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+for an explanation of segments.
+
+Computes a tensor such that
+\\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
+over `j` such that `segment_ids[j] == i` and `N` is the total number of
+values summed.
+
+If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+</div>
+
+For example:
+
+```
+c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+tf.segment_mean(c, tf.constant([0, 0, 1]))
+# ==> [[2.5, 2.5, 2.5, 2.5],
+#      [5, 6, 7, 8]]
+```
+  }];
+
+  let arguments = (ins
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$data,
+    TF_I32OrI64Tensor:$segment_ids
+  );
+
+  let results = (outs
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$output
+  );
+
+  TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_SegmentMinOp : TF_Op<"SegmentMin", [NoSideEffect]> {
+  let summary = "Computes the minimum along segments of a tensor.";
+
+  let description = [{
+Read
+[the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+for an explanation of segments.
+
+Computes a tensor such that
+\\(output_i = \min_j(data_j)\\) where `min` is over `j` such
+that `segment_ids[j] == i`.
+
+If the min is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
+</div>
+
+For example:
+
+```
+c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+tf.segment_min(c, tf.constant([0, 0, 1]))
+# ==> [[1, 2, 2, 1],
+#      [5, 6, 7, 8]]
+```
+  }];
+
+  let arguments = (ins
+    TF_IntOrFpTensor:$data,
+    TF_I32OrI64Tensor:$segment_ids
+  );
+
+  let results = (outs
+    TF_IntOrFpTensor:$output
+  );
+
+  TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_SegmentProdOp : TF_Op<"SegmentProd", [NoSideEffect]> {
+  let summary = "Computes the product along segments of a tensor.";
+
+  let description = [{
+Read
+[the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+for an explanation of segments.
+
+Computes a tensor such that
+\\(output_i = \prod_j data_j\\) where the product is over `j` such
+that `segment_ids[j] == i`.
+
+If the product is empty for a given segment ID `i`, `output[i] = 1`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
+</div>
+
+For example:
+
+```
+c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+tf.segment_prod(c, tf.constant([0, 0, 1]))
+# ==> [[4, 6, 6, 4],
+#      [5, 6, 7, 8]]
+```
+  }];
+
+  let arguments = (ins
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$data,
+    TF_I32OrI64Tensor:$segment_ids
+  );
+
+  let results = (outs
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$output
+  );
+
+  TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_SegmentSumOp : TF_Op<"SegmentSum", [NoSideEffect]> {
+  let summary = "Computes the sum along segments of a tensor.";
+
+  let description = [{
+Read
+[the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+for an explanation of segments.
+
+Computes a tensor such that
+\\(output_i = \sum_j data_j\\) where sum is over `j` such
+that `segment_ids[j] == i`.
+
+If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
+</div>
+
+For example:
+
+```
+c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+tf.segment_sum(c, tf.constant([0, 0, 1]))
+# ==> [[5, 5, 5, 5],
+#      [5, 6, 7, 8]]
+```
+  }];
+
+  let arguments = (ins
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$data,
+    TF_I32OrI64Tensor:$segment_ids
+  );
+
+  let results = (outs
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$output
+  );
+
+  TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_SelectOp : TF_Op<"Select", [NoSideEffect]> {
   let summary = "Selects elements from `x` or `y`, depending on `condition`.";
 
@@ -4799,6 +5276,10 @@ def TF_SelectV2Op : TF_Op<"SelectV2", [NoSideEffect]> {
   );
 
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>;
+
+  let builders = [
+    OpBuilder<"Builder* builder, OperationState& result, Value  condition, Value  e, Value  t">
+  ];
 }
 
 def TF_ShapeOp : TF_Op<"Shape", [NoSideEffect]> {
@@ -4831,7 +5312,7 @@ shape(t) ==> [2, 2, 3]
   }];
 
   let builders = [
-    OpBuilder<"Builder* builder, OperationState& result, Value* input, BoolAttr use32Bit">
+    OpBuilder<"Builder* builder, OperationState& result, Value  input, BoolAttr use32Bit">
   ];
 
   let hasFolder = 1;
@@ -5207,6 +5688,36 @@ x = [[[[1, 2, 3, 4],
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_SparseSoftmaxCrossEntropyWithLogitsOp : TF_Op<"SparseSoftmaxCrossEntropyWithLogits", [NoSideEffect]> {
+  let summary = [{
+Computes softmax cross entropy cost and gradients to backpropagate.
+  }];
+
+  let description = [{
+Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
+a matrix of label probabilities, but rather a single label per row
+of features.  This label is considered to have probability 1.0 for the
+given row.
+
+Inputs are the logits, not probabilities.
+  }];
+
+  let arguments = (ins
+    TF_FpTensor:$features,
+    TF_I32OrI64Tensor:$labels
+  );
+
+  let results = (outs
+    TF_FpTensor:$loss,
+    TF_FpTensor:$backprop
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+  TF_DerivedOperandTypeAttr Tlabels = TF_DerivedOperandTypeAttr<1>;
+
+  let verifier = [{ return Verify(*this); }];
+}
+
 def TF_SparseToDenseOp : TF_Op<"SparseToDense", [NoSideEffect]> {
   let summary = "Converts a sparse representation into a dense tensor.";
 
@@ -5541,6 +6052,17 @@ receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
   TF_DerivedOperandTypeAttr Index = TF_DerivedOperandTypeAttr<1>;
 
   let verifier = [{ return VerifyStridedSliceBase(*this); }];
+
+  let extraClassDeclaration = [{
+    // If sliced shape is able to be deduced, returns true, updates
+    // `begin_indices`, `end_indices`, and `strides` with their canonical
+    // values, respectively.
+    bool GetSlicedBoundRanges(
+      ::llvm::ArrayRef<int64_t> shape,
+      ::llvm::SmallVectorImpl<int64_t> *begin_indices,
+      ::llvm::SmallVectorImpl<int64_t> *end_indices,
+      ::llvm::SmallVectorImpl<int64_t> *strides);
+  }];
 }
 
 def TF_StridedSliceGradOp : TF_Op<"StridedSliceGrad", [NoSideEffect]> {
@@ -5641,8 +6163,8 @@ retained with length 1.
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
 
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *input, "
-    "Value *reduction_indices, BoolAttr keep_dims"
+    "Builder *builder, OperationState &result, Value input, "
+    "Value reduction_indices, BoolAttr keep_dims"
   >];
 }
 
@@ -5969,6 +6491,103 @@ num_elements: optional. If not -1, the number of elements in the list.
   }];
 }
 
+def TF_TensorScatterUpdateOp : TF_Op<"TensorScatterUpdate", [NoSideEffect]> {
+  let summary = [{
+Scatter `updates` into an existing tensor according to `indices`.
+  }];
+
+  let description = [{
+This operation creates a new tensor by applying sparse `updates` to the passed
+in `tensor`.
+This operation is very similar to `tf.scatter_nd`, except that the updates are
+scattered onto an existing tensor (as opposed to a zero-tensor). If the memory
+for the existing tensor cannot be re-used, a copy is made and updated.
+
+If `indices` contains duplicates, then their updates are accumulated (summed).
+
+**WARNING**: The order in which updates are applied is nondeterministic, so the
+output will be nondeterministic if `indices` contains duplicates -- because
+of some numerical approximation issues, numbers summed in different order
+may yield different results.
+
+`indices` is an integer tensor containing indices into a new tensor of shape
+`shape`.  The last dimension of `indices` can be at most the rank of `shape`:
+
+    indices.shape[-1] <= shape.rank
+
+The last dimension of `indices` corresponds to indices into elements
+(if `indices.shape[-1] = shape.rank`) or slices
+(if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+`shape`.  `updates` is a tensor with shape
+
+    indices.shape[:-1] + shape[indices.shape[-1]:]
+
+The simplest form of scatter is to insert individual elements in a tensor by
+index. For example, say we want to insert 4 scattered elements in a rank-1
+tensor with 8 elements.
+
+<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
+</div>
+
+In Python, this scatter operation would look like this:
+
+    >>> indices = tf.constant([[4], [3], [1], [7]])
+    >>> updates = tf.constant([9, 10, 11, 12])
+    >>> tensor = tf.ones([8], dtype=tf.int32)
+    >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates))
+    tf.Tensor([ 1 11  1 10  9  1  1 12], shape=(8,), dtype=int32)
+
+We can also, insert entire slices of a higher rank tensor all at once. For
+example, if we wanted to insert two slices in the first dimension of a
+rank-3 tensor with two matrices of new values.
+
+In Python, this scatter operation would look like this:
+
+    >>> indices = tf.constant([[0], [2]])
+    >>> updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+    ...                         [7, 7, 7, 7], [8, 8, 8, 8]],
+    ...                        [[5, 5, 5, 5], [6, 6, 6, 6],
+    ...                         [7, 7, 7, 7], [8, 8, 8, 8]]])
+    >>> tensor = tf.ones([4, 4, 4], dtype=tf.int32)
+    >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates).numpy())
+    [[[5 5 5 5]
+      [6 6 6 6]
+      [7 7 7 7]
+      [8 8 8 8]]
+     [[1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]]
+     [[5 5 5 5]
+      [6 6 6 6]
+      [7 7 7 7]
+      [8 8 8 8]]
+     [[1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]]]
+
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, the index is ignored.
+  }];
+
+  let arguments = (ins
+    TF_Tensor:$tensor,
+    TF_I32OrI64Tensor:$indices,
+    TF_Tensor:$updates
+  );
+
+  let results = (outs
+    TF_Tensor:$output
+  );
+
+  TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+
+  let verifier = [{ return Verify(*this); }];
+}
+
 def TF_TileOp : TF_Op<"Tile", [NoSideEffect]> {
   let summary = "Constructs a tensor by tiling a given tensor.";
 
@@ -6075,7 +6694,7 @@ The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
 
   let builders = [
     OpBuilder<
-      "Builder* builder, OperationState& result, Value* x, Value* perm">
+      "Builder* builder, OperationState& result, Value  x, Value  perm">
   ];
 
   let verifier = [{
@@ -6119,7 +6738,7 @@ def TF_UniqueOp : TF_Op<"Unique", [NoSideEffect]> {
   let description = [{
 This operation returns a tensor `y` containing all of the unique elements of `x`
 sorted in the same order that they occur in `x`; `x` does not need to be sorted.
-This operation also returns a tensor `idx` the same size as `x` that contains 
+This operation also returns a tensor `idx` the same size as `x` that contains
 the index of each value of `x` in the unique output `y`. In other words:
 
 `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td
index c3a51613357..5505b8980e3 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td
@@ -171,6 +171,8 @@ def TF_FpOrI32OrI64Tensor : TensorOf<[AnyFloat, TF_I32Or64]>;
 // Any integer or floating-point tensor types
 def TF_IntOrFpTensor : TensorOf<[TF_Int, AnyFloat]>;
 
+def TF_SintOrFpTensor : TensorOf<[TF_SInt, AnyFloat]>;
+
 def TF_FpOrComplexTensor : TensorOf<[AnyFloat, TF_AnyComplex]>;
 
 def TF_AnyNumber : AnyTypeOf<[TF_Int, AnyFloat, TF_AnyQuantized, TF_AnyComplex],
@@ -297,10 +299,10 @@ def TF_IntTypeAttr : TypeAttrBase<"IntegerType", "integer type"> {
 // behavior. The result type has the same element type as both operands.
 class WithBroadcastableBinOpBuilder {
   list<OpBuilder> builders = [OpBuilder<
-"Builder *builder, OperationState &result, Value* x, Value* y",
+"Builder *builder, OperationState &result, Value  x, Value  y",
 [{
   auto resultType =
-      OpTrait::util::getBroadcastedType(x->getType(), y->getType());
+      OpTrait::util::getBroadcastedType(x.getType(), y.getType());
   if (!resultType)
     mlir::emitError(result.location, "non-broadcastable operands");
   return build(builder, result, resultType, x, y);
@@ -312,17 +314,17 @@ class WithBroadcastableBinOpBuilder {
 // behavior. The result type has bool element type.
 class WithBroadcastableCmpOpBuilder {
   list<OpBuilder> builders = [OpBuilder<
-"Builder *builder, OperationState &result, Value* x, Value* y",
+"Builder *builder, OperationState &result, Value  x, Value  y",
 [{
   Type resultType;
-  if (x->getType().isa<UnrankedTensorType>() ||
-      y->getType().isa<UnrankedTensorType>()) {
+  if (x.getType().isa<UnrankedTensorType>() ||
+      y.getType().isa<UnrankedTensorType>()) {
     resultType = UnrankedTensorType::get(builder->getI1Type());
   } else {
     SmallVector<int64_t, 4> resultShape;
     if (!OpTrait::util::getBroadcastedShape(
-            x->getType().cast<ShapedType>().getShape(),
-            y->getType().cast<ShapedType>().getShape(), resultShape)) {
+            x.getType().cast<ShapedType>().getShape(),
+            y.getType().cast<ShapedType>().getShape(), resultShape)) {
       mlir::emitError(result.location,
                       "operands have no broadcastable shapes");
     }
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
index 3744cdeb66e..9b07b2f0c92 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
@@ -21,9 +21,12 @@ limitations under the License.
 #include <limits>
 #include <numeric>
 #include <string>
+#include <tuple>
 #include <type_traits>
 
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
@@ -32,27 +35,28 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Traits.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/DialectImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Parser.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Support/STLExtras.h"  // TF:local_config_mlir
-#include "mlir/Transforms/InliningUtils.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/Dialect/Traits.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/DialectImplementation.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Parser.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Support/STLExtras.h"  // TF:llvm-project
+#include "mlir/Transforms/InliningUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
@@ -68,17 +72,17 @@ namespace TF {
 // may have non-static shape because the shape is not propagated during constant
 // folding. If the defining op for the given operand is a constant op, this
 // routine uses the constant op's attribute to get the actual shape.
-static RankedTensorType GetRankedTensorTypeForOperand(Value *operand) {
+static RankedTensorType GetRankedTensorTypeForOperand(Value operand) {
   DenseElementsAttr attr;
   if (matchPattern(operand, m_Constant(&attr))) {
     return attr.getType().dyn_cast<RankedTensorType>();
   }
-  return operand->getType().dyn_cast<RankedTensorType>();
+  return operand.getType().dyn_cast<RankedTensorType>();
 }
 
 // Returns true if the given `value` is of ranked float tensor type with the
 // given `rank`.
-static inline bool isOfRankedFloatTensorType(Value *value, int rank) {
+static inline bool isOfRankedFloatTensorType(Value value, int rank) {
   RankedTensorType type = GetRankedTensorTypeForOperand(value);
   return type && type.getRank() == rank &&
          type.getElementType().isa<FloatType>();
@@ -86,21 +90,21 @@ static inline bool isOfRankedFloatTensorType(Value *value, int rank) {
 
 // Returns true if the given `value` has the specified rank or has unranked
 // type.
-static inline bool IsOfRankOrUnranked(Value *value, int64_t rank) {
+static inline bool IsOfRankOrUnranked(Value value, int64_t rank) {
   RankedTensorType type = GetRankedTensorTypeForOperand(value);
   return !type || type.getRank() == rank;
 }
 
 // Returns true if the given `value` has at least the specified rank or has
 // unranked type.
-static inline bool HasRankAtLeast(Value *value, int64_t rank) {
+static inline bool HasRankAtLeast(Value value, int64_t rank) {
   RankedTensorType type = GetRankedTensorTypeForOperand(value);
   return !type || type.getRank() >= rank;
 }
 
 // Returns true if the given `value` has at most the specified rank or has
 // unranked type.
-static inline bool HasRankAtMost(Value *value, int64_t rank) {
+static inline bool HasRankAtMost(Value value, int64_t rank) {
   RankedTensorType type = GetRankedTensorTypeForOperand(value);
   return !type || type.getRank() <= rank;
 }
@@ -154,10 +158,10 @@ static bool IsUnknownDimOrRank(int64_t dim_or_rank) {
 // Returns the tf.Equal/tf.NotEqual result type given `x` and `y` and inputs. If
 // `incompatible_shape_error` is true, reports error if `x` and `y` has
 // incompatible shapes. Otherwise, returns a tensor type with unknown rank.
-static Type DeduceEqualCmpOpType(Builder *builder, Location loc, Value *x,
-                                 Value *y, BoolAttr incompatible_shape_error) {
+static Type DeduceEqualCmpOpType(Builder *builder, Location loc, Value x,
+                                 Value y, BoolAttr incompatible_shape_error) {
   auto result_type =
-      OpTrait::util::getBroadcastedType(x->getType(), y->getType());
+      OpTrait::util::getBroadcastedType(x.getType(), y.getType());
   if (!result_type) {
     if (incompatible_shape_error.getValue()) {
       mlir::emitError(loc, "non-broadcastable operands");
@@ -181,9 +185,9 @@ static int64_t GetDimForAxis(int64_t axis, int64_t rank) {
 // Infers output type for reduction ops such as SumOp, MaxOp etc.
 // TODO(b/e667204a): Move this logic to shape inference once it supports custom
 // inference functions.
-static Type InferReductionOpType(Value *input, Value *reduction_indices,
+static Type InferReductionOpType(Value input, Value reduction_indices,
                                  BoolAttr keep_dims, Builder *builder) {
-  Type input_ty = input->getType();
+  Type input_ty = input.getType();
   Type element_ty = getElementTypeOrSelf(input_ty);
 
   // Output type is unranked if input type is not ranked.
@@ -324,14 +328,14 @@ void AddV2Op::getCanonicalizationPatterns(OwningRewritePatternList &results,
 //===----------------------------------------------------------------------===//
 
 // Verifies an reduction op's `input` and reduction `dims`.
-static LogicalResult VerifyReductionInputAndDims(Value *input, Value *dims,
+static LogicalResult VerifyReductionInputAndDims(Value input, Value dims,
                                                  Location loc) {
-  auto dims_type = dims->getType().dyn_cast<RankedTensorType>();
+  auto dims_type = dims.getType().dyn_cast<RankedTensorType>();
   if (!dims_type) return success();
   if (dims_type.getRank() > 1)
     return emitError(loc, "dimensions can only be 0D or 1D tensor");
 
-  auto input_type = input->getType().dyn_cast<RankedTensorType>();
+  auto input_type = input.getType().dyn_cast<RankedTensorType>();
   if (!input_type) return success();
   int64_t rank = input_type.getRank();
 
@@ -437,9 +441,8 @@ static LogicalResult Verify(BiasAddOp op) {
   if (!IsOfRankOrUnranked(op.bias(), 1))
     return op.emitOpError("requires bias operand to have rank exactly one");
 
-  RankedTensorType value_ty =
-      op.value()->getType().dyn_cast<RankedTensorType>();
-  RankedTensorType bias_ty = op.bias()->getType().dyn_cast<RankedTensorType>();
+  RankedTensorType value_ty = op.value().getType().dyn_cast<RankedTensorType>();
+  RankedTensorType bias_ty = op.bias().getType().dyn_cast<RankedTensorType>();
   if (!bias_ty || !value_ty) return success();
 
   // TODO(hinsu): Leverage tensor_format.h utility in TensorFlow to compute
@@ -524,7 +527,7 @@ static LogicalResult Verify(OpT op) {
   Operation::operand_range values = op.values();
 
   int axis_idx = std::is_same<OpT, ConcatOp>() ? 0 : 1;
-  Value *axis = *op.getODSOperands(axis_idx).begin();
+  Value axis = *op.getODSOperands(axis_idx).begin();
   if (!HasRankAtMost(axis, 1)) {
     return op.emitOpError(
         "requires axis to be of scalar type (or vector type for older "
@@ -535,6 +538,118 @@ static LogicalResult Verify(OpT op) {
                                   /*mask_one_dim=*/true, op.getOperation());
 }
 
+//===----------------------------------------------------------------------===//
+// ConcatOffsetOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(ConcatOffsetOp op) {
+  if (op.N() < 2)
+    return op.emitOpError() << "requires N to be at least 2, got " << op.N();
+
+  if (op.shape().size() != op.offset().size())
+    return op.emitOpError()
+           << "requires sizes of shapes and offsets to be the same, got sizes "
+           << op.shape().size() << " and " << op.offset().size();
+
+  auto ranked_dim = op.concat_dim().getType().dyn_cast<RankedTensorType>();
+  if (ranked_dim && ranked_dim.getRank() != 0)
+    return op.emitOpError()
+           << "requires concat_dim to be a scalar, got tensor of rank "
+           << ranked_dim.getRank();
+
+  int64_t num_dims = -1;
+  for (auto shape_offset_idx :
+       llvm::enumerate(llvm::zip(op.shape(), op.offset()))) {
+    Value shape = std::get<0>(shape_offset_idx.value());
+    Value offset = std::get<1>(shape_offset_idx.value());
+    const size_t idx = shape_offset_idx.index();
+
+    if (failed(verifyCompatibleShape(shape.getType(), offset.getType())))
+      return op.emitOpError() << "requires operand and result " << idx
+                              << " to have compatible shapes";
+
+    auto ranked_shape = shape.getType().dyn_cast<RankedTensorType>();
+    if (!ranked_shape) continue;
+
+    if (ranked_shape.getRank() != 1)
+      return op.emitOpError() << "requires shape tensor operand " << idx
+                              << " to be of rank 1, got tensor of rank "
+                              << ranked_shape.getRank();
+
+    if (!ranked_shape.hasStaticShape()) continue;
+
+    int64_t ranked_shape_dim = ranked_shape.getDimSize(0);
+    if (num_dims == -1)
+      num_dims = ranked_shape_dim;
+    else if (ranked_shape_dim != num_dims)
+      return op.emitOpError()
+             << "requires shape tensor (rank 1) operand " << idx
+             << " to be of length " << num_dims
+             << ", got tensor (rank 1) of length " << ranked_shape_dim;
+  }
+
+  return success();
+}
+
+LogicalResult ConcatOffsetOp::fold(ArrayRef<Attribute> operands,
+                                   SmallVectorImpl<OpFoldResult> &results) {
+  // ConcatOffset must have its first operand be concat_dim and at least two
+  // shape tensors in variadic shapes operand.
+  if (operands.size() < 3) return failure();
+
+  // Check concat_dim is a scalar.
+  auto concat_dim_attr = operands[0].dyn_cast_or_null<DenseIntElementsAttr>();
+  if (!concat_dim_attr || concat_dim_attr.getType().getRank() != 0)
+    return failure();
+
+  llvm::SmallVector<DenseIntElementsAttr, 4> shapes;
+  shapes.reserve(operands.size() - 1);
+  for (Attribute shape : llvm::drop_begin(operands, 1))
+    if (auto shape_attr = shape.dyn_cast_or_null<DenseIntElementsAttr>())
+      shapes.push_back(shape_attr);
+    else
+      return failure();
+
+  // Check all shapes are vectors of the same length.
+  if (shapes.front().getType().getRank() != 1) return success();
+  const int64_t num_dims = shapes.front().getNumElements();
+  for (DenseIntElementsAttr shape : llvm::drop_begin(shapes, 1))
+    if (shape.getType().getRank() != 1 || shape.getNumElements() != num_dims)
+      return failure();
+
+  // Check concat_dim is within [-num_dims, num_dims).
+  int32_t concat_dim = (*concat_dim_attr.getValues<int32_t>().begin());
+  if (concat_dim < 0) concat_dim += num_dims;
+  if (concat_dim >= num_dims || concat_dim < 0) return failure();
+
+  // Check all elements besides at concat_dim match across all shape tensors.
+  SmallVector<int32_t, 4> shape0;
+  shape0.reserve(num_dims);
+  for (int32_t dim : shapes.front().getValues<int32_t>()) shape0.push_back(dim);
+
+  for (DenseIntElementsAttr shape : llvm::drop_begin(shapes, 1)) {
+    for (auto dims_and_idx : llvm::enumerate(llvm::zip(shape0, shape))) {
+      if (dims_and_idx.index() == concat_dim) continue;
+
+      if (std::get<0>(dims_and_idx.value()) !=
+          std::get<1>(dims_and_idx.value()).getSExtValue())
+        return failure();
+    }
+  }
+
+  // Compute an exclusive cumulative sum of elements at concat_dim.
+  results.reserve(shapes.size());
+  SmallVector<int32_t, 4> cumulative_sum(num_dims, 0);
+  RankedTensorType offset_type =
+      RankedTensorType::get({num_dims}, IntegerType::get(32, getContext()));
+  for (DenseIntElementsAttr shape : shapes) {
+    results.push_back(DenseIntElementsAttr::get(offset_type, cumulative_sum));
+    cumulative_sum[concat_dim] += shape.getValue<int32_t>(concat_dim);
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // ConjOp
 //===----------------------------------------------------------------------===//
@@ -670,7 +785,7 @@ static LogicalResult Verify(OpT op) {
   }
 
   int64_t input_channels = -1;
-  if (auto ty = op.input()->getType().template dyn_cast<RankedTensorType>()) {
+  if (auto ty = op.input().getType().template dyn_cast<RankedTensorType>()) {
     std::string data_format = op.data_format().str();
     tensorflow::TensorFormat format;
     auto is_valid = FormatFromString(data_format, &format);
@@ -680,7 +795,7 @@ static LogicalResult Verify(OpT op) {
   }
 
   int64_t filter_channels = -1;
-  if (auto ty = op.filter()->getType().template dyn_cast<RankedTensorType>()) {
+  if (auto ty = op.filter().getType().template dyn_cast<RankedTensorType>()) {
     int idx = tensorflow::GetFilterTensorInputChannelsDimIndex(
         num_dims, tensorflow::FORMAT_HWIO);
     filter_channels = ty.getDimSize(idx);
@@ -726,6 +841,101 @@ void DivOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
   results.insert<DivWithSqrtDivisor>(context);
 }
 
+//===----------------------------------------------------------------------===//
+// DynamicStitchOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(DynamicStitchOp op) {
+  if (op.N() < 1) return op.emitOpError("requires attribute N with value >= 1");
+
+  if (RankedTensorType out_ty = op.getType().dyn_cast<RankedTensorType>()) {
+    if (out_ty.getRank() == 0) {
+      return op.emitOpError("requires non scalar output");
+    }
+  }
+
+  llvm::SmallDenseSet<int64_t, 8> index_values;
+  bool all_indices_const = true;
+  int32_t max_index = -1;
+  llvm::Optional<SmallVector<int64_t, 4>> inferred_item_shape;
+  for (auto it : llvm::zip(op.indices(), op.data())) {
+    Value index = std::get<0>(it);
+
+    DenseIntElementsAttr index_attr;
+    if (matchPattern(index, m_Constant(&index_attr))) {
+      for (int32_t index : index_attr.getValues<int32_t>()) {
+        if (index < 0)
+          return op.emitOpError()
+                 << "requires non-negative index values; found " << index;
+        max_index = std::max(index, max_index);
+        index_values.insert(index);
+      }
+    } else {
+      all_indices_const = false;
+    }
+
+    Value data = std::get<1>(it);
+    RankedTensorType index_ty = index.getType().dyn_cast<RankedTensorType>();
+    RankedTensorType data_ty = data.getType().dyn_cast<RankedTensorType>();
+    if (!index_ty || !data_ty) continue;
+
+    int64_t index_rank = index_ty.getRank();
+    ArrayRef<int64_t> data_shape = data_ty.getShape();
+    ArrayRef<int64_t> index_shape = index_ty.getShape();
+    if (failed(mlir::verifyCompatibleShape(index_shape,
+                                           data_shape.take_front(index_rank))))
+      return op.emitOpError() << "requires shape of data with type " << data_ty
+                              << " to have prefix matching with shape of the "
+                                 "corresponding index type "
+                              << index_ty;
+
+    ArrayRef<int64_t> item_shape = data_shape.drop_front(index_rank);
+    if (!inferred_item_shape) {
+      inferred_item_shape = llvm::to_vector<4>(item_shape);
+      continue;
+    }
+
+    if (failed(mlir::verifyCompatibleShape(item_shape, *inferred_item_shape)))
+      return op.emitOpError() << "has inconsistent shaped data and index "
+                                 "pairs; inferred item shapes ["
+                              << llvm::makeArrayRef(*inferred_item_shape)
+                              << "] and [" << item_shape << "] don't match";
+    for (int i = 0, e = item_shape.size(); i < e; ++i) {
+      int64_t &inferred_dim = (*inferred_item_shape)[i];
+      int64_t dim = item_shape[i];
+      if (ShapedType::isDynamic(inferred_dim)) inferred_dim = dim;
+    }
+  }
+
+  // If all indices are constants, then verify that they cover all indices in
+  // the range [0, max_index] and the output type is legal.
+  if (all_indices_const) {
+    for (int32_t i = 0; i <= max_index; i++) {
+      if (!index_values.count(i))
+        return op.emitOpError() << "missing index " << i;
+    }
+
+    if (inferred_item_shape) {
+      SmallVector<int64_t, 4> expected_shape;
+      expected_shape.push_back(max_index + 1);
+      expected_shape.append(inferred_item_shape->begin(),
+                            inferred_item_shape->end());
+
+      auto out_ty = op.getType().cast<TensorType>();
+      auto expected_out_ty =
+          RankedTensorType::get(expected_shape, out_ty.getElementType());
+
+      if (!AreCastCompatible(out_ty, expected_out_ty)) {
+        return op.emitOpError() << "has invalid output type; should be "
+                                   "compatible with inferred type "
+                                << expected_out_ty;
+      }
+    }
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // EinsumOp
 //===----------------------------------------------------------------------===//
@@ -770,13 +980,44 @@ static LogicalResult Verify(EqualOp op) {
       op.getOperation());
 }
 
-void EqualOp::build(Builder *builder, OperationState &result, Value *x,
-                    Value *y, BoolAttr incompatible_shape_error) {
+void EqualOp::build(Builder *builder, OperationState &result, Value x, Value y,
+                    BoolAttr incompatible_shape_error) {
   auto result_type = DeduceEqualCmpOpType(builder, result.location, x, y,
                                           incompatible_shape_error);
   return build(builder, result, result_type, x, y, incompatible_shape_error);
 }
 
+//===----------------------------------------------------------------------===//
+// ExpandDimsOp
+//===----------------------------------------------------------------------===//
+
+Type InferExpandDimsOpType(Value input, Value dim) {
+  Type element_ty = input.getType().cast<TensorType>().getElementType();
+  auto unranked_ty = UnrankedTensorType::get(element_ty);
+
+  auto input_ty = input.getType().dyn_cast<RankedTensorType>();
+  if (!input_ty) return unranked_ty;
+
+  DenseIntElementsAttr dim_attr;
+  if (!matchPattern(dim, m_Constant(&dim_attr)) ||
+      dim_attr.getNumElements() != 1)
+    return unranked_ty;
+  int64_t dim_val = (*dim_attr.begin()).getSExtValue();
+  int64_t input_rank = input_ty.getRank();
+
+  if (dim_val < -input_rank - 1 || dim_val > input_rank + 1) return unranked_ty;
+  if (dim_val < 0) dim_val += input_rank + 1;
+
+  SmallVector<int64_t, 4> shape = llvm::to_vector<4>(input_ty.getShape());
+  shape.insert(shape.begin() + dim_val, 1);
+  return RankedTensorType::get(shape, element_ty);
+}
+
+void ExpandDimsOp::build(Builder *builder, OperationState &result, Value input,
+                         Value dim) {
+  return build(builder, result, InferExpandDimsOpType(input, dim), input, dim);
+}
+
 //===----------------------------------------------------------------------===//
 // FakeQuantWithMinMaxArgsOp
 //===----------------------------------------------------------------------===//
@@ -832,16 +1073,16 @@ static LogicalResult Verify(FakeQuantWithMinMaxVarsPerChannelOp op) {
   if (!isOfRankedFloatTensorType(op.max(), 1))
     return op.emitOpError("requires max to be a 1d float tensor");
 
-  Value *inputs = op.inputs();
+  Value inputs = op.inputs();
   if (!HasRankAtLeast(inputs, 1) ||
-      inputs->getType().isa<UnrankedTensorType>()) {
+      inputs.getType().isa<UnrankedTensorType>()) {
     return op.emitError("requires inputs to be at least 1d float tensor");
   }
 
-  auto inputsType = inputs->getType().cast<ShapedType>();
+  auto inputsType = inputs.getType().cast<ShapedType>();
   int depth = inputsType.getDimSize(inputsType.getRank() - 1);
-  if (op.min()->getType().cast<ShapedType>().getDimSize(0) != depth ||
-      op.max()->getType().cast<ShapedType>().getDimSize(0) != depth) {
+  if (op.min().getType().cast<ShapedType>().getDimSize(0) != depth ||
+      op.max().getType().cast<ShapedType>().getDimSize(0) != depth) {
     return op.emitOpError(
         "requires min and max to have same size as last dimension of inputs");
   }
@@ -897,7 +1138,7 @@ static LogicalResult Verify(FusedBatchNormOp op) {
 
 static LogicalResult Verify(GatherV2Op op) {
   int64_t batch_dims = op.batch_dims().getSExtValue();
-  if (auto ty = op.indices()->getType().dyn_cast<RankedTensorType>()) {
+  if (auto ty = op.indices().getType().dyn_cast<RankedTensorType>()) {
     int64_t rank = ty.getRank();
     if (batch_dims > rank || batch_dims < -rank)
       return op.emitOpError()
@@ -912,7 +1153,7 @@ static LogicalResult Verify(GatherV2Op op) {
   DenseIntElementsAttr axis_attr;
   if (matchPattern(op.axis(), m_Constant(&axis_attr))) {
     int64_t axis = (*axis_attr.begin()).getSExtValue();
-    if (auto ty = op.params()->getType().dyn_cast<RankedTensorType>()) {
+    if (auto ty = op.params().getType().dyn_cast<RankedTensorType>()) {
       int64_t rank = ty.getRank();
       if (axis >= rank || axis < -rank)
         return op.emitOpError() << "axis (" << axis << ") must be in range ["
@@ -955,7 +1196,7 @@ static LogicalResult Verify(IfOp op) {
                         " inputs");
 
   for (unsigned i = 0; i < expectedNumInputs; ++i) {
-    auto operandType = op.getOperand(i + 1)->getType().cast<TensorType>();
+    auto operandType = op.getOperand(i + 1).getType().cast<TensorType>();
     auto thenInputType = thenFuncType.getInput(i).cast<TensorType>();
     if (!AreCastCompatible(operandType, thenInputType))
       return op.emitError(
@@ -986,7 +1227,7 @@ static LogicalResult Verify(IfOp op) {
                         " results");
 
   for (unsigned i = 0; i < expectedNumResults; ++i) {
-    auto resultType = op.getResult(i)->getType().cast<TensorType>();
+    auto resultType = op.getResult(i).getType().cast<TensorType>();
     auto thenResultType = thenFuncType.getResult(i).cast<TensorType>();
     if (!AreCastCompatible(thenResultType, resultType))
       return op.emitError(
@@ -1062,8 +1303,8 @@ void LogicalNotOp::getCanonicalizationPatterns(
 // MaxOp
 //===----------------------------------------------------------------------===//
 
-void MaxOp::build(Builder *builder, OperationState &result, Value *input,
-                  Value *reduction_indices, BoolAttr keep_dims) {
+void MaxOp::build(Builder *builder, OperationState &result, Value input,
+                  Value reduction_indices, BoolAttr keep_dims) {
   Type out_ty =
       InferReductionOpType(input, reduction_indices, keep_dims, builder);
   build(builder, result, out_ty, input, reduction_indices, keep_dims);
@@ -1108,8 +1349,8 @@ static LogicalResult Verify(NotEqualOp op) {
       op.getOperation());
 }
 
-void NotEqualOp::build(Builder *builder, OperationState &result, Value *x,
-                       Value *y, BoolAttr incompatible_shape_error) {
+void NotEqualOp::build(Builder *builder, OperationState &result, Value x,
+                       Value y, BoolAttr incompatible_shape_error) {
   auto result_type = DeduceEqualCmpOpType(builder, result.location, x, y,
                                           incompatible_shape_error);
   return build(builder, result, result_type, x, y, incompatible_shape_error);
@@ -1122,7 +1363,7 @@ void NotEqualOp::build(Builder *builder, OperationState &result, Value *x,
 static LogicalResult Verify(OneHotOp op) {
   int64_t axis = op.axis().getSExtValue();
 
-  auto indices_ty = op.indices()->getType().dyn_cast<RankedTensorType>();
+  auto indices_ty = op.indices().getType().dyn_cast<RankedTensorType>();
   if (indices_ty &&
       !(axis == -1 || (axis >= 0 && axis <= indices_ty.getShape().size()))) {
     return op.emitOpError()
@@ -1147,9 +1388,8 @@ static LogicalResult Verify(OneHotOp op) {
 
   DenseIntElementsAttr depth_attr;
   if (matchPattern(op.depth(), m_Constant(&depth_attr))) {
-    if (depth_attr.getType().getRank() != 0) {
+    if (depth_attr.getType().getRank() != 0)
       return op.emitOpError() << "requires depth to be a scalar";
-    }
     int64_t depth = depth_attr.getValue<APInt>({}).getSExtValue();
     if (depth < 0) {
       return op.emitOpError() << "depth must be non-negative, got: " << depth;
@@ -1159,6 +1399,36 @@ static LogicalResult Verify(OneHotOp op) {
   return success();
 }
 
+static TensorType InferOneHotOpType(Value indices, Value depth, Value on_value,
+                                    Value off_value, IntegerAttr axis) {
+  int64_t axis_val = axis.getInt();
+  Type element_ty = on_value.getType().cast<TensorType>().getElementType();
+  auto unranked_ty = UnrankedTensorType::get(element_ty);
+  if (axis_val < -1) return unranked_ty;
+
+  auto indices_ty = indices.getType().dyn_cast<RankedTensorType>();
+  if (!indices_ty) return unranked_ty;
+
+  auto shape = llvm::to_vector<2>(indices_ty.getShape());
+  if (axis_val == -1) axis_val = shape.size();
+
+  int64_t depth_val = ShapedType::kDynamicSize;
+  DenseIntElementsAttr depth_attr;
+  if (matchPattern(depth, m_Constant(&depth_attr)) &&
+      depth_attr.getNumElements() == 1)
+    depth_val = (*depth_attr.begin()).getSExtValue();
+  shape.insert(shape.begin() + axis_val, depth_val);
+  return RankedTensorType::get(shape, element_ty);
+}
+
+void OneHotOp::build(Builder *builder, OperationState &result, Value indices,
+                     Value depth, Value on_value, Value off_value,
+                     IntegerAttr axis) {
+  build(builder, result,
+        InferOneHotOpType(indices, depth, on_value, off_value, axis), indices,
+        depth, on_value, off_value, axis);
+}
+
 //===----------------------------------------------------------------------===//
 // PackOp
 //===----------------------------------------------------------------------===//
@@ -1174,8 +1444,8 @@ static LogicalResult Verify(PackOp op) {
   }
 
   int64_t inputs_rank = -1;
-  for (Value *value : values) {
-    if (auto ty = value->getType().dyn_cast<RankedTensorType>()) {
+  for (Value value : values) {
+    if (auto ty = value.getType().dyn_cast<RankedTensorType>()) {
       // Exit early as input types are verified to be compatible so all ranked
       // tensors have the same rank.
       inputs_rank = ty.getRank();
@@ -1199,6 +1469,59 @@ static LogicalResult Verify(PackOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// ParseExampleV2Op
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(ParseExampleV2Op op) {
+  // NOTE(mrry): This validates properties of an op that would previously be
+  // validated by the TensorFlow OpDef type checker. In addition to these
+  // checks, the shape inference function for ParseExampleV2 validates the
+  // consistency of the argument and result types.
+
+  // Validate dense variadic input and output lengths.
+  // NOTE(mrry): The Tdense attr is derived from dense_defaults, so we
+  // do not need to validate dense_defaults.
+  auto dense_types_count =
+      std::distance(op.Tdense().begin(), op.Tdense().end());
+  auto dense_values_count =
+      std::distance(op.dense_values().begin(), op.dense_values().end());
+  if (dense_values_count != dense_types_count) {
+    return op.emitError() << "output 'dense_values' should have same length "
+                          << "as attribute 'Tdense'";
+  }
+
+  // Validate sparse variadic output lengths.
+  // NOTE(mrry): The sparse_types attr is derived from sparse_values, so we
+  // do not need to validate sparse_values.
+  auto sparse_types_count =
+      std::distance(op.sparse_types().begin(), op.sparse_types().end());
+  if (op.num_sparse() != sparse_types_count) {
+    return op.emitError() << "attribute 'num_sparse' should be the same as "
+                          << "the length of attribute 'sparse_types'";
+  }
+  if (op.sparse_indices().size() != sparse_types_count) {
+    return op.emitError() << "output 'sparse_indices' should have same length "
+                          << "as attribute 'sparse_types'";
+  }
+  if (op.sparse_shapes().size() != sparse_types_count) {
+    return op.emitError() << "output 'sparse_shapes' should have same length "
+                          << "as attribute 'sparse_types'";
+  }
+
+  // Validate ragged variadic output lengths.
+  auto ragged_value_types_count = std::distance(op.ragged_value_types().begin(),
+                                                op.ragged_value_types().end());
+  auto ragged_split_types_count = std::distance(op.ragged_split_types().begin(),
+                                                op.ragged_split_types().end());
+  if (ragged_value_types_count != ragged_split_types_count) {
+    return op.emitError() << "attribute 'ragged_value_types' should have same "
+                          << "length as attribute 'ragged_split_types'";
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // ReciprocalOp
 //===----------------------------------------------------------------------===//
@@ -1222,10 +1545,10 @@ static LogicalResult Verify(RandomUniformOp op) {
 // RangeOp
 //===----------------------------------------------------------------------===//
 
-void RangeOp::build(Builder *builder, OperationState &result, Value *start,
-                    Value *limit, Value *delta) {
-  assert(start->getType() == limit->getType());
-  assert(start->getType() == delta->getType());
+void RangeOp::build(Builder *builder, OperationState &result, Value start,
+                    Value limit, Value delta) {
+  assert(start.getType() == limit.getType());
+  assert(start.getType() == delta.getType());
   DenseIntElementsAttr start_val;
   DenseIntElementsAttr limit_val;
   DenseIntElementsAttr delta_val;
@@ -1239,20 +1562,20 @@ void RangeOp::build(Builder *builder, OperationState &result, Value *start,
         builder, result,
         RankedTensorType::get(
             size.getSExtValue(),
-            start->getType().cast<TensorType>().getElementType()),
+            start.getType().cast<TensorType>().getElementType()),
         start, limit, delta);
   }
   return RangeOp::build(
       builder, result,
       RankedTensorType::get(
-          {-1}, start->getType().cast<TensorType>().getElementType()),
+          {-1}, start.getType().cast<TensorType>().getElementType()),
       start, limit, delta);
 }
 //===----------------------------------------------------------------------===//
 // RankOp
 //===----------------------------------------------------------------------===//
 
-void RankOp::build(Builder *builder, OperationState &result, Value *input) {
+void RankOp::build(Builder *builder, OperationState &result, Value input) {
   return RankOp::build(builder, result,
                        RankedTensorType::get({}, builder->getIntegerType(32)),
                        input);
@@ -1274,17 +1597,17 @@ void RealDivOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
 // TODO(b/128020684): Verify the rank of the output and change to use
 // m_Constant.
 static LogicalResult Verify(ReshapeOp op) {
-  auto shapeType = op.shape()->getType().cast<TensorType>();
+  auto shapeType = op.shape().getType().cast<TensorType>();
   if (!shapeType.hasRank()) return success();
   if (shapeType.getRank() != 1)
     return op.emitOpError("shape must be 1D tensor");
   auto rankByShape = shapeType.getShape()[0];
-  auto typeOfTensor = op.tensor()->getType().cast<TensorType>();
+  auto typeOfTensor = op.tensor().getType().cast<TensorType>();
   // No compile time verification for unknown sized shape.
   if (rankByShape == -1 || !typeOfTensor.hasStaticShape()) return success();
   // Check values if constant shape. No compiling time verification for
   // non-constant shape.
-  auto *shapeOp = op.shape()->getDefiningOp();
+  auto *shapeOp = op.shape().getDefiningOp();
   if (!shapeOp) return success();
   Attribute shapeCst;
   if (auto shapeStdOp = dyn_cast<ConstantOp>(shapeOp)) {
@@ -1336,9 +1659,9 @@ static LogicalResult Verify(ReshapeOp op) {
   return success();
 }
 
-void ReshapeOp::build(Builder *builder, OperationState &result, Value *tensor,
-                      Value *shape) {
-  auto ttype = tensor->getType().cast<ShapedType>();
+void ReshapeOp::build(Builder *builder, OperationState &result, Value tensor,
+                      Value shape) {
+  auto ttype = tensor.getType().cast<ShapedType>();
   auto etype = ttype.getElementType();
 
   auto unranked = [builder, etype, &result, shape, tensor]() {
@@ -1394,6 +1717,37 @@ void ReshapeOp::build(Builder *builder, OperationState &result, Value *tensor,
   return unranked();
 }
 
+//===----------------------------------------------------------------------===//
+// SelectV2Op
+//===----------------------------------------------------------------------===//
+
+static Type InferSelectV2OpType(Value condition, Value e, Value t) {
+  Type element_ty = e.getType().cast<TensorType>().getElementType();
+  auto unranked_ty = UnrankedTensorType::get(element_ty);
+
+  Type broadcasted_ty =
+      OpTrait::util::getBroadcastedType(e.getType(), t.getType());
+  if (!broadcasted_ty) return unranked_ty;
+
+  auto cond_ranked_ty = condition.getType().dyn_cast<RankedTensorType>();
+  auto broadcasted_ranked_ty = broadcasted_ty.dyn_cast<RankedTensorType>();
+  if (!cond_ranked_ty || !broadcasted_ranked_ty) return unranked_ty;
+
+  // Explicitly get broadcasted output type as element types of condition may
+  // not be same as the broadcated type's element type.
+  SmallVector<int64_t, 4> result_shape;
+  if (!OpTrait::util::getBroadcastedShape(cond_ranked_ty.getShape(),
+                                          broadcasted_ranked_ty.getShape(),
+                                          result_shape))
+    return unranked_ty;
+  return RankedTensorType::get(result_shape, element_ty);
+}
+
+void SelectV2Op::build(Builder *builder, OperationState &result,
+                       Value condition, Value e, Value t) {
+  build(builder, result, InferSelectV2OpType(condition, e, t), condition, e, t);
+}
+
 //===----------------------------------------------------------------------===//
 // ShapeOp
 //===----------------------------------------------------------------------===//
@@ -1436,7 +1790,7 @@ LogicalResult VerifyShapeOperandAndResult(Operation *op, Type operand_type,
 }  // anonymous namespace
 
 static LogicalResult Verify(ShapeOp op) {
-  return VerifyShapeOperandAndResult(op, op.input()->getType(), op.getType());
+  return VerifyShapeOperandAndResult(op, op.input().getType(), op.getType());
 }
 
 // Converts shape of the given type to attribute if it is of ranked tensor type.
@@ -1461,12 +1815,12 @@ static Attribute ConvertShapeToAttr(Type input_ty, int out_width) {
 OpFoldResult ShapeOp::fold(ArrayRef<Attribute> operands) {
   int width =
       getType().cast<ShapedType>().getElementType().getIntOrFloatBitWidth();
-  return ConvertShapeToAttr(getOperand()->getType(), width);
+  return ConvertShapeToAttr(getOperand().getType(), width);
 }
 
-void ShapeOp::build(Builder *builder, OperationState &result, Value *input,
+void ShapeOp::build(Builder *builder, OperationState &result, Value input,
                     BoolAttr use32Bit) {
-  auto rankedTensorType = input->getType().dyn_cast<RankedTensorType>();
+  auto rankedTensorType = input.getType().dyn_cast<RankedTensorType>();
   int64_t rank = rankedTensorType ? rankedTensorType.getRank() : -1;
   auto out_type = use32Bit.getValue() ? builder->getIntegerType(32)
                                       : builder->getIntegerType(64);
@@ -1491,7 +1845,7 @@ static LogicalResult Verify(ShapeNOp op) {
 
   for (auto i : llvm::seq<uint64_t>(0, num_tensors)) {
     auto verification = VerifyShapeOperandAndResult(
-        op, op.getOperand(i)->getType(), op.getResult(i)->getType(), i);
+        op, op.getOperand(i).getType(), op.getResult(i).getType(), i);
     if (failed(verification)) return verification;
   }
 
@@ -1564,7 +1918,7 @@ static LogicalResult Verify(SliceOp op) {
                                " same number of elements";
   }
 
-  auto input_ty = op.input()->getType().dyn_cast<RankedTensorType>();
+  auto input_ty = op.input().getType().dyn_cast<RankedTensorType>();
   if (input_ty && begin_ty.getNumElements() != input_ty.getRank()) {
     return op.emitOpError() << "requires number of elements in begin and size"
                                "are equal to input rank";
@@ -1618,7 +1972,7 @@ static LogicalResult Verify(SoftmaxOp op) {
 //
 static LogicalResult Verify(SoftmaxCrossEntropyWithLogitsOp op) {
   auto broadcasted_ty = OpTrait::util::getBroadcastedType(
-                            op.features()->getType(), op.labels()->getType())
+                            op.features().getType(), op.labels().getType())
                             .dyn_cast_or_null<ShapedType>();
   if (!broadcasted_ty ||
       (broadcasted_ty.hasRank() && broadcasted_ty.getRank() != 2))
@@ -1628,6 +1982,31 @@ static LogicalResult Verify(SoftmaxCrossEntropyWithLogitsOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// SparseSoftmaxCrossEntropyWithLogitsOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(SparseSoftmaxCrossEntropyWithLogitsOp op) {
+  if (!IsOfRankOrUnranked(op.features(), 2)) {
+    return op.emitOpError("requires features operand of rank two");
+  }
+  if (!IsOfRankOrUnranked(op.labels(), 1)) {
+    return op.emitOpError("requires labels operand of rank one");
+  }
+  auto features_ty = op.features().getType().dyn_cast<RankedTensorType>();
+  auto labels_ty = op.labels().getType().dyn_cast<RankedTensorType>();
+  if (features_ty && labels_ty) {
+    int64_t features_batches = features_ty.getDimSize(0);
+    int64_t labels_batches = labels_ty.getDimSize(0);
+    if (!ShapedType::isDynamic(features_batches) &&
+        !ShapedType::isDynamic(labels_batches) &&
+        features_batches != labels_batches)
+      return op.emitOpError(
+          "requires features and labels with matching first dimension");
+  }
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // SplitOp
 //===----------------------------------------------------------------------===//
@@ -1639,8 +2018,8 @@ template <class Op>
 LogicalResult VerifySplitInputAndSplitDim(Op op, Optional<int64_t> *dim_index) {
   *dim_index = llvm::None;
 
-  Value *split_dim = op.split_dim();
-  if (auto split_dim_type = split_dim->getType().dyn_cast<RankedTensorType>())
+  Value split_dim = op.split_dim();
+  if (auto split_dim_type = split_dim.getType().dyn_cast<RankedTensorType>())
     if (split_dim_type.getRank() != 0)
       return op.emitOpError(
           "split dimension should be an integer scalar tensor");
@@ -1648,7 +2027,7 @@ LogicalResult VerifySplitInputAndSplitDim(Op op, Optional<int64_t> *dim_index) {
   // We can perform further verification if the input tensor to be split has
   // known rank and the split dimension tensor is a constant.
 
-  auto input_type = op.value()->getType().template dyn_cast<RankedTensorType>();
+  auto input_type = op.value().getType().template dyn_cast<RankedTensorType>();
   if (!input_type) return success();
 
   int64_t input_rank = input_type.getRank();
@@ -1677,7 +2056,7 @@ static LogicalResult Verify(SplitOp op) {
   if (!dim_index) return success();
 
   int64_t input_dim_size =
-      op.value()->getType().cast<RankedTensorType>().getDimSize(*dim_index);
+      op.value().getType().cast<RankedTensorType>().getDimSize(*dim_index);
   if (input_dim_size == ShapedType::kDynamicSize) return success();
 
   if (input_dim_size % op.getNumResults() != 0)
@@ -1693,7 +2072,7 @@ static LogicalResult Verify(SplitOp op) {
 
 static LogicalResult Verify(SplitVOp op) {
   auto split_sizes_type =
-      op.size_splits()->getType().dyn_cast<RankedTensorType>();
+      op.size_splits().getType().dyn_cast<RankedTensorType>();
   if (!split_sizes_type) return success();
 
   if (split_sizes_type.getRank() != 1 ||
@@ -1706,7 +2085,7 @@ static LogicalResult Verify(SplitVOp op) {
   if (!dim_index) return success();
 
   int64_t input_dim_size =
-      op.value()->getType().cast<RankedTensorType>().getDimSize(*dim_index);
+      op.value().getType().cast<RankedTensorType>().getDimSize(*dim_index);
   if (input_dim_size == ShapedType::kDynamicSize) return success();
 
   // If split sizes come from a constant, they must sum to the dimension size
@@ -1773,8 +2152,8 @@ void SubOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
 // SumOp
 //===----------------------------------------------------------------------===//
 
-void SumOp::build(Builder *builder, OperationState &result, Value *input,
-                  Value *reduction_indices, BoolAttr keep_dims) {
+void SumOp::build(Builder *builder, OperationState &result, Value input,
+                  Value reduction_indices, BoolAttr keep_dims) {
   Type out_ty =
       InferReductionOpType(input, reduction_indices, keep_dims, builder);
   build(builder, result, out_ty, input, reduction_indices, keep_dims);
@@ -1797,8 +2176,8 @@ static LogicalResult VerifyStridedSliceBase(OpTy op) {
   // Expected size for operands begin, end and strides vector operands.
   int64_t expected_size = -1;
 
-  for (Value *val : {op.begin(), op.end(), op.strides()}) {
-    auto operand_ty = val->getType().dyn_cast<ShapedType>();
+  for (Value val : {op.begin(), op.end(), op.strides()}) {
+    auto operand_ty = val.getType().dyn_cast<ShapedType>();
     if (!operand_ty || !operand_ty.hasStaticShape()) {
       // TensorFlow constant ops may have non-static shape because the shape is
       // not propagated during constant folding. If the defining op for this
@@ -1912,12 +2291,51 @@ static void CalculateSlicedShapeAndBoundRanges(
   }
 }
 
+bool StridedSliceOp::GetSlicedBoundRanges(
+    ArrayRef<int64_t> shape, SmallVectorImpl<int64_t> *begin_indices,
+    SmallVectorImpl<int64_t> *end_indices, SmallVectorImpl<int64_t> *strides) {
+  if (this->ellipsis_mask().getZExtValue() ||
+      this->new_axis_mask().getZExtValue() ||
+      this->shrink_axis_mask().getZExtValue())
+    return false;  // TODO(antiagainst): support these masks
+
+  // TODO(hinsu): Support lowering for ops with dynamic begin and end values
+  // when it is possible to derive indices based on mask attributes.
+  DenseIntElementsAttr begin_indices_attr, end_indices_attr, strides_attr;
+  if (!matchPattern(this->begin(), m_Constant(&begin_indices_attr)) ||
+      !matchPattern(this->end(), m_Constant(&end_indices_attr)) ||
+      !matchPattern(this->strides(), m_Constant(&strides_attr)))
+    return false;
+
+  auto input_shape = llvm::to_vector<4>(shape);
+  int rank = input_shape.size();
+
+  begin_indices->clear();
+  begin_indices->reserve(rank);
+  end_indices->clear();
+  end_indices->reserve(rank);
+  strides->clear();
+  strides->reserve(rank);
+
+  for (const APInt &index : begin_indices_attr)
+    begin_indices->push_back(index.getSExtValue());
+  for (const APInt &index : end_indices_attr)
+    end_indices->push_back(index.getSExtValue());
+  for (const APInt &stride : strides_attr)
+    strides->push_back(stride.getSExtValue());
+
+  CalculateSlicedShapeAndBoundRanges(
+      input_shape, this->begin_mask().getZExtValue(),
+      this->end_mask().getZExtValue(), *begin_indices, *end_indices, *strides);
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // StridedSliceGradOp
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(StridedSliceGradOp op) {
-  auto shape_type = op.shape()->getType().dyn_cast<RankedTensorType>();
+  auto shape_type = op.shape().getType().dyn_cast<RankedTensorType>();
   if (shape_type && shape_type.getRank() != 1)
     return op.emitOpError("'shape' operand must be 1D tensor, but got ")
            << shape_type.getRank() << "D tensor";
@@ -1999,6 +2417,35 @@ static LogicalResult Verify(TensorListStackOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// TensorScatterUpdateOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(TensorScatterUpdateOp op) {
+  if (!HasRankAtLeast(op.tensor(), 1))
+    return op.emitOpError(
+        "requires tensor operand to have at least 1 dimension");
+  if (!HasRankAtLeast(op.indices(), 1))
+    return op.emitOpError(
+        "requires indices operand to have at least 1 dimension");
+  if (!HasRankAtLeast(op.updates(), 1))
+    return op.emitOpError(
+        "requires updates operand to have at least 1 dimension");
+
+  auto tensor_ty = op.tensor().getType().dyn_cast<RankedTensorType>();
+  auto indices_ty = op.indices().getType().dyn_cast<RankedTensorType>();
+  if (!tensor_ty || !indices_ty) return success();
+
+  int64_t num_index_dims = indices_ty.getShape().back();
+  if (ShapedType::isDynamic(num_index_dims)) return success();
+
+  if (num_index_dims > tensor_ty.getRank())
+    return op.emitOpError(
+        "requires tensor operand with rank greater than or equal to the "
+        "indices operand's last dimensions");
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // TopKV2Op
 //===----------------------------------------------------------------------===//
@@ -2028,9 +2475,9 @@ static LogicalResult Verify(TransposeOp op) {
 }
 
 // TODO(jpienaar): perm could be optional too.
-void TransposeOp::build(Builder *builder, OperationState &result, Value *x,
-                        Value *perm) {
-  auto x_type = x->getType().cast<TensorType>();
+void TransposeOp::build(Builder *builder, OperationState &result, Value x,
+                        Value perm) {
+  auto x_type = x.getType().cast<TensorType>();
   // If value is unranked, then so is results.
   if (!x_type.hasRank())
     return TransposeOp::build(builder, result,
@@ -2061,7 +2508,7 @@ void TransposeOp::build(Builder *builder, OperationState &result, Value *x,
 }
 
 OpFoldResult TransposeOp::fold(ArrayRef<Attribute> operands) {
-  auto const_perm = dyn_cast_or_null<TF::ConstOp>(perm()->getDefiningOp());
+  auto const_perm = dyn_cast_or_null<TF::ConstOp>(perm().getDefiningOp());
 
   if (!const_perm) {
     return {};
@@ -2093,7 +2540,7 @@ void TruncateDivOp::getCanonicalizationPatterns(
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(UnpackOp op) {
-  auto value_type = op.value()->getType().dyn_cast<RankedTensorType>();
+  auto value_type = op.value().getType().dyn_cast<RankedTensorType>();
   if (!value_type) return success();
 
   int64_t value_rank = value_type.getRank();
@@ -2121,9 +2568,9 @@ static LogicalResult VerifyUnsortedSegmentReduction(Op op) {
   if (!HasRankAtMost(op.num_segments(), 0))
     return op.emitOpError("number of segments should be a 0-D tensor");
 
-  auto data_type = op.data()->getType().template dyn_cast<RankedTensorType>();
+  auto data_type = op.data().getType().template dyn_cast<RankedTensorType>();
   auto segment_ids_type =
-      op.segment_ids()->getType().template dyn_cast<RankedTensorType>();
+      op.segment_ids().getType().template dyn_cast<RankedTensorType>();
   if (data_type && segment_ids_type) {
     if (data_type.getRank() < segment_ids_type.getRank())
       return op.emitOpError(
@@ -2161,7 +2608,7 @@ static LogicalResult VerifyUnsortedSegmentReduction(Op op) {
 
 static LogicalResult Verify(VariableShapeOp op) {
   auto resource_operand_type = op.input()
-                                   ->getType()
+                                   .getType()
                                    .cast<TensorType>()
                                    .getElementType()
                                    .cast<TF::ResourceType>();
@@ -2312,10 +2759,10 @@ struct TFInlinerInterface : public DialectInlinerInterface {
   // operation that takes 'input' as the only operand, and produces a single
   // result of 'resultType'. If a conversion can not be generated, nullptr
   // should be returned.
-  Operation *materializeCallConversion(OpBuilder &builder, Value *input,
+  Operation *materializeCallConversion(OpBuilder &builder, Value input,
                                        Type result_type,
                                        Location conversion_loc) const final {
-    if (!result_type.isa<TensorType>() || !input->getType().isa<TensorType>())
+    if (!result_type.isa<TensorType>() || !input.getType().isa<TensorType>())
       return nullptr;
     return builder.create<TF::CastOp>(conversion_loc, result_type, input,
                                       /*truncate=*/builder.getBoolAttr(false));
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h
index e9aaed56afc..b6f1f76782f 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h
@@ -19,16 +19,16 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_H_
 
-#include "mlir/Analysis/CallInterfaces.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Traits.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "mlir/Analysis/CallInterfaces.h"  // TF:llvm-project
+#include "mlir/Dialect/Traits.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
index 9b6196cda5b..8444ec783f0 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
@@ -29,6 +29,7 @@ limitations under the License.
 
 include "tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td"
 include "mlir/Analysis/CallInterfaces.td"
+include "mlir/IR/OpBase.td"
 
 class TF_TensorListInitOp<string mnemonic> : TF_Op<mnemonic, [NoSideEffect]> {
   let results = (outs
@@ -56,7 +57,7 @@ class TF_TensorListInitOp<string mnemonic> : TF_Op<mnemonic, [NoSideEffect]> {
     // Returns data type of the result handle. Returned type contains type of
     // the TensorList element as a subtype.
     VariantType handle_dtype() {
-      return getElementTypeOrSelf(handle()->getType()).cast<TF::VariantType>();
+      return getElementTypeOrSelf(handle().getType()).cast<TF::VariantType>();
     }
   }];
 }
@@ -232,6 +233,50 @@ def TF_LegacyCallOp : TF_Op<"LegacyCall",
   }];
 }
 
+def TF_ParseExampleV2Op : TF_Op<"ParseExampleV2",
+                                [NoSideEffect,
+                                 AttrSizedResultSegments]> {
+
+  let summary =
+    "Transforms a vector of tf.Example protos (as strings) into typed tensors.";
+
+  let arguments = (ins
+    TF_StrTensor:$serialized,
+    TF_StrTensor:$names,
+    TF_StrTensor:$sparse_keys,
+    TF_StrTensor:$dense_keys,
+    TF_StrTensor:$ragged_keys,
+    Variadic<TensorOf<[F32, I64, TF_Str]>>:$dense_defaults,
+
+    Confined<I64Attr, [IntMinValue<0>]>:$num_sparse,
+    I32ElementsAttr:$result_segment_sizes
+  );
+
+  let results = (outs
+    Variadic<I64Tensor>:$sparse_indices,                    // len(sparse_types)
+    Variadic<TensorOf<[F32, I64, TF_Str]>>:$sparse_values,  // len(sparse_types)
+    Variadic<I64Tensor>:$sparse_shapes,                     // len(sparse_types)
+    Variadic<TensorOf<[F32, I64, TF_Str]>>:$dense_values,   // len(Tdense)
+    Variadic<TensorOf<[F32, I64, TF_Str]>>:$ragged_values,  // len(ragged_value_types)
+                                                            //     = len(ragged_split_types)
+    Variadic<TensorOf<[I32, I64]>>:$ragged_row_splits       // len(ragged_split_types)
+                                                            //     = len(ragged_value_types)
+  );
+
+  // The Verify(ParseExampleV2Op) function validates that the lengths and types
+  // of these attrs are compatible.
+  TF_DerivedOperandTypeListAttr Tdense = TF_DerivedOperandTypeListAttr<5>;
+  TF_DerivedResultTypeListAttr sparse_types = TF_DerivedResultTypeListAttr<1>;
+  TF_DerivedResultTypeListAttr ragged_value_types =
+    TF_DerivedResultTypeListAttr<4>;
+  TF_DerivedResultTypeListAttr ragged_split_types =
+    TF_DerivedResultTypeListAttr<5>;
+
+  let verifier = [{
+    return Verify(*this);
+  }];
+}
+
 def TF_PartitionedCallOp : TF_Op<"PartitionedCall",
                                  [CallOpInterface, NoSideEffect]> {
   let summary =
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
index c672d624944..17cc4cdfbe5 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
@@ -22,16 +22,16 @@ limitations under the License.
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 
 namespace mlir {
 namespace tf_saved_model {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h
index 9998858356d..6f4b2061628 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_SAVED_MODEL_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_SAVED_MODEL_H_
 
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
 
 namespace mlir {
 namespace tf_saved_model {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h
index c600f1445c5..51315c4f90c 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h
@@ -18,10 +18,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TRAITS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TRAITS_H_
 
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
 namespace mlir {
@@ -47,7 +47,7 @@ class OperandsSameAsResultsTypeOrRef
     LogicalResult shapeMatch = impl::verifySameOperandsAndResultShape(op);
     if (failed(shapeMatch)) return shapeMatch;
 
-    auto type = getElementTypeOrSelf(op->getResult(0)->getType());
+    auto type = getElementTypeOrSelf(op->getResult(0).getType());
 
     // Verify that the first result type is same as the rest of the results.
     // We skip the comparison against itself.
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
index ff43728928a..539605d6ccc 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
 #include "llvm/Support/ErrorHandling.h"
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
index 6c97253ef33..7ff54e0c7f4 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
@@ -18,10 +18,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TYPES_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TYPES_H_
 
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/BUILD b/tensorflow/compiler/mlir/tensorflow/tests/BUILD
index ef93af93b40..a4ebc997991 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/tests/BUILD
@@ -4,7 +4,7 @@ package(licenses = ["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = ["mlir"],
 )
 
@@ -14,7 +14,7 @@ filegroup(
     testonly = True,
     data = [
         "//tensorflow/compiler/mlir:tf-opt",
-        "@llvm//:FileCheck",
-        "@llvm//:not",
+        "@llvm-project//llvm:FileCheck",
+        "@llvm-project//llvm:not",
     ],
 )
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir b/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir
index b7d438b38ed..2a17ec16898 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir
@@ -67,6 +67,116 @@ func @testAdd() -> tensor<2x2xi32> {
   return %2: tensor<2x2xi32>
 }
 
+// CHECK-LABEL: testSimpleConcatOffset
+func @testSimpleConcatOffset() -> (tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) {
+  %concat_dim = constant dense<1> : tensor<i32>
+  %shape0 = constant dense<[2, 2, 7]> : tensor<3xi32>
+  %shape1 = constant dense<[2, 3, 7]> : tensor<3xi32>
+  %shape2 = constant dense<[2, 5, 7]> : tensor<3xi32>
+
+  // CHECK: [[OFFSET_0:%.*]] = "tf.Const{{.*}} dense<0> : tensor<3xi32>
+  // CHECK: [[OFFSET_1:%.*]] = "tf.Const{{.*}} dense<[0, 2, 0]> : tensor<3xi32>
+  // CHECK: [[OFFSET_2:%.*]] = "tf.Const{{.*}} dense<[0, 5, 0]> : tensor<3xi32>
+
+  %offset:3 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1, %shape2) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>, tensor<3xi32>)
+
+  // CHECK: return [[OFFSET_0]], [[OFFSET_1]], [[OFFSET_2]]
+  return %offset#0, %offset#1, %offset#2: tensor<3xi32>, tensor<3xi32>, tensor<3xi32>
+}
+
+// CHECK-LABEL: testConcatOffsetWithZeros
+func @testConcatOffsetWithZeros() -> (tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) {
+  %concat_dim = constant dense<1> : tensor<i32>
+  %shape0 = constant dense<0> : tensor<3xi32>
+  %shape1 = constant dense<[0, 3, 0]> : tensor<3xi32>
+  %shape2 = constant dense<[0, 5, 0]> : tensor<3xi32>
+  %shape3 = constant dense<0> : tensor<3xi32>
+
+  // CHECK: [[OFFSET_0:%.*]] = "tf.Const{{.*}} dense<0> : tensor<3xi32>
+  // CHECK: [[OFFSET_2:%.*]] = "tf.Const{{.*}} dense<[0, 3, 0]> : tensor<3xi32>
+  // CHECK: [[OFFSET_3:%.*]] = "tf.Const{{.*}} dense<[0, 8, 0]> : tensor<3xi32>
+
+  %offset:4 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1, %shape2, %shape3) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>)
+
+  // CHECK: return [[OFFSET_0]], [[OFFSET_0]], [[OFFSET_2]], [[OFFSET_3]]
+  return %offset#0, %offset#1, %offset#2, %offset#3: tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>
+}
+
+// CHECK-LABEL: testConcatOffsetNegativeConcatDim
+func @testConcatOffsetNegativeConcatDim() -> (tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) {
+  %concat_dim = constant dense<-1> : tensor<i32>
+  %shape0 = constant dense<[2, 8, 3]> : tensor<3xi32>
+  %shape1 = constant dense<[2, 8, 5]> : tensor<3xi32>
+  %shape2 = constant dense<[2, 8, 7]> : tensor<3xi32>
+
+  // CHECK: [[OFFSET_0:%.*]] = "tf.Const{{.*}} dense<0> : tensor<3xi32>
+  // CHECK: [[OFFSET_1:%.*]] = "tf.Const{{.*}} dense<[0, 0, 3]> : tensor<3xi32>
+  // CHECK: [[OFFSET_2:%.*]] = "tf.Const{{.*}} dense<[0, 0, 8]> : tensor<3xi32>
+
+  %offset:3 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1, %shape2) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>, tensor<3xi32>)
+
+  // CHECK: return [[OFFSET_0]], [[OFFSET_1]], [[OFFSET_2]]
+  return %offset#0, %offset#1, %offset#2: tensor<3xi32>, tensor<3xi32>, tensor<3xi32>
+}
+
+// CHECK-LABEL: testConcatOffsetNonConstConcatDim
+func @testConcatOffsetNonConstConcatDim(%concat_dim: tensor<i32>) -> (tensor<3xi32>, tensor<3xi32>) {
+  %shape0 = constant dense<[2, 2, 7]> : tensor<3xi32>
+  %shape1 = constant dense<[2, 3, 7]> : tensor<3xi32>
+
+  // CHECK: tf.ConcatOffset
+  %offset:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>)
+
+  return %offset#0, %offset#1: tensor<3xi32>, tensor<3xi32>
+}
+
+// CHECK-LABEL: testConcatOffsetNonConstShape
+func @testConcatOffsetNonConstShape(%shape1: tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>) {
+  %concat_dim = constant dense<1> : tensor<i32>
+  %shape0 = constant dense<[2, 2, 7]> : tensor<3xi32>
+
+  // CHECK: tf.ConcatOffset
+  %offset:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>)
+
+  return %offset#0, %offset#1: tensor<3xi32>, tensor<3xi32>
+}
+
+// CHECK-LABEL: testConcatOffsetBadNegativeConcatDim
+func @testConcatOffsetBadNegativeConcatDim() -> (tensor<3xi32>, tensor<3xi32>) {
+  %concat_dim = constant dense<-4> : tensor<i32>
+  %shape0 = constant dense<[2, 2, 7]> : tensor<3xi32>
+  %shape1 = constant dense<[2, 3, 7]> : tensor<3xi32>
+
+  // CHECK: tf.ConcatOffset
+  %offset:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>)
+
+  return %offset#0, %offset#1: tensor<3xi32>, tensor<3xi32>
+}
+
+// CHECK-LABEL: testConcatOffsetBadPositiveConcatDim
+func @testConcatOffsetBadPositiveConcatDim() -> (tensor<3xi32>, tensor<3xi32>) {
+  %concat_dim = constant dense<3> : tensor<i32>
+  %shape0 = constant dense<[2, 2, 7]> : tensor<3xi32>
+  %shape1 = constant dense<[2, 3, 7]> : tensor<3xi32>
+
+  // CHECK: tf.ConcatOffset
+  %offset:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>)
+
+  return %offset#0, %offset#1: tensor<3xi32>, tensor<3xi32>
+}
+
+// CHECK-LABEL: testConcatOffsetDifferentNonConcatDimElements
+func @testConcatOffsetDifferentNonConcatDimElements() -> (tensor<3xi32>, tensor<3xi32>) {
+  %concat_dim = constant dense<1> : tensor<i32>
+  %shape0 = constant dense<[2, 2, 7]> : tensor<3xi32>
+  %shape1 = constant dense<[2, 3, 8]> : tensor<3xi32>
+
+  // CHECK: tf.ConcatOffset
+  %offset:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>)
+
+  return %offset#0, %offset#1: tensor<3xi32>, tensor<3xi32>
+}
+
 // Ops with side effects should not get constant folded.
 // CHECK-LABEL: func @testSideEffectOp() -> tensor<3xf32>
 func @testSideEffectOp() -> tensor<3xf32> {
@@ -77,7 +187,7 @@ func @testSideEffectOp() -> tensor<3xf32> {
   return %1: tensor<3xf32>
 }
 
-// Ops with unimplemnted attributes which couldn't be added to the TFE_Op.
+// Ops with unimplemented attributes which couldn't be added to the TFE_Op.
 // CHECK-LABEL: func @testUnimplementedOp() -> (tensor<i32>, tensor<i32>)
 func @testUnimplementedOp() -> (tensor<i32>, tensor<i32>) {
   %0 = constant dense<1> : tensor<i32>
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir b/tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir
index 60117552c8e..5ecef050055 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir
@@ -121,7 +121,7 @@ func @ref_tf_executor_ops(%arg0: tensor<4x!tf.f32ref>, %arg1: tensor<4x!tf.f32re
 
 // -----
 
-// Tests if empty island with just control dependency inputs and output is
+// Tests if empty island with just one control dependency input and output is
 // handled correctly.
 // CHECK-LABEL: func @empty_island_control_dep_only
 func @empty_island_control_dep_only() -> tensor<i32> {
@@ -138,10 +138,10 @@ func @empty_island_control_dep_only() -> tensor<i32> {
     }
     // CHECK-NEXT: %[[CONST2:[0-9]*]]:2 = "_tf.Const"()
     // CHECK-SAME: () -> (tensor<i32>, !_tf.control)
-    %2 = tf_executor.island(%0#1, %1#1) {
+    %2 = tf_executor.island(%0#1) {
       tf_executor.yield
     }
-    %3:2 = tf_executor.island(%2) {
+    %3:2 = tf_executor.island(%2, %1#1) {
       %6 = "tf.Add"(%0#0, %1#0) : (tensor<i32>, tensor<i32>) -> tensor<i32>
       tf_executor.yield %6 : tensor<i32>
     }
@@ -151,3 +151,38 @@ func @empty_island_control_dep_only() -> tensor<i32> {
   }
   return %fetch : tensor<i32>
 }
+
+// -----
+
+// Tests if empty island with multiple control inputs will be replaced with a
+// no-op.
+// CHECK-LABEL: func @empty_island_multi_control_inputs
+func @empty_island_multi_control_inputs() -> tensor<i32> {
+  %fetch = tf_executor.graph {
+    %0:2 = tf_executor.island {
+      %4 = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor<i32>} : () -> tensor<i32>
+      tf_executor.yield %4 : tensor<i32>
+    }
+    // CHECK-NEXT: %[[CONST1:[0-9]*]]:2 = "_tf.Const"()
+    // CHECK-SAME: () -> (tensor<i32>, !_tf.control)
+    %1:2 = tf_executor.island {
+      %5 = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor<i32>} : () -> tensor<i32>
+      tf_executor.yield %5 : tensor<i32>
+    }
+    // CHECK-NEXT: %[[CONST2:[0-9]*]]:2 = "_tf.Const"()
+    // CHECK-SAME: () -> (tensor<i32>, !_tf.control)
+    %2 = tf_executor.island(%0#1, %1#1) {
+      tf_executor.yield
+    }
+    // CHECK-NEXT: %[[NOOP:[0-9]*]] = "_tf.NoOp"(%[[CONST1]]#1, %[[CONST2]]#1)
+    // CHECK-SAME: (!_tf.control, !_tf.control) -> !_tf.control
+    %3:2 = tf_executor.island(%2) {
+      %6 = "tf.Add"(%0#0, %1#0) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+      tf_executor.yield %6 : tensor<i32>
+    }
+    // CHECK-NEXT: %[[ADD:[0-9]*]]:2 = "_tf.Add"(%[[CONST1]]#0, %[[CONST2]]#0, %[[NOOP]])
+    // CHECK-SAME: (tensor<i32>, tensor<i32>, !_tf.control) -> (tensor<i32>, !_tf.control)
+    tf_executor.fetch %3#0 : tensor<i32>
+  }
+  return %fetch : tensor<i32>
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graph_pruning.mlir b/tensorflow/compiler/mlir/tensorflow/tests/graph_pruning.mlir
index 771ad5e30d8..8585790564b 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graph_pruning.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graph_pruning.mlir
@@ -167,16 +167,3 @@ func @control_fetch(%arg0 : i32) {
   }
   return
 }
-
-// Check that @main function is pruned.
-// CHECK-LABEL: func @main
-func @main() {
-  tf_executor.graph {
-    // CHECK-NOT: tf_executor.island
-    %0 = tf_executor.island {
-      tf_executor.yield
-    }
-    tf_executor.fetch
-  }
-  return
-}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graph_pruning_skip_main.mlir b/tensorflow/compiler/mlir/tensorflow/tests/graph_pruning_skip_main.mlir
deleted file mode 100644
index 86568cccd0f..00000000000
--- a/tensorflow/compiler/mlir/tensorflow/tests/graph_pruning_skip_main.mlir
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN: tf-opt %s -tf-executor-graph-pruning=skip-main-func | FileCheck %s --dump-input=fail
-
-// Check that @main function is skipped by default.
-// CHECK-LABEL: func @main
-func @main() {
-  tf_executor.graph {
-    // CHECKT: tf_executor.island
-    %0 = tf_executor.island {
-      tf_executor.yield
-    }
-    tf_executor.fetch
-  }
-  return
-}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/BUILD b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/BUILD
index 6c4d6d2b2ab..5880245cc2d 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/BUILD
@@ -4,7 +4,7 @@ licenses(["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = ["pbtxt"],
 )
 
@@ -14,7 +14,7 @@ filegroup(
     testonly = True,
     data = [
         "//tensorflow/compiler/mlir:tf-mlir-translate",
-        "@llvm//:FileCheck",
-        "@llvm//:not",
+        "@llvm-project//llvm:FileCheck",
+        "@llvm-project//llvm:not",
     ],
 )
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-if-ops.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-if-ops.pbtxt
index cbfa973fd64..8eca30802ef 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-if-ops.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-if-ops.pbtxt
@@ -1,11 +1,11 @@
-# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-input-arrays=a,b -tf-input-data-types=DT_FLOAT,DT_FLOAT -tf-input-shapes=':' -tf-output-arrays=StatefulIf,StatelessIf -o - | FileCheck %s
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-input-arrays=a,b -tf-input-data-types=DT_FLOAT,DT_FLOAT -tf-input-shapes=':' -tf-output-arrays=StatefulIf,StatelessIf -o - -mlir-print-debuginfo | FileCheck %s
 
 # Verify that TensorFlow If and StatelessIf ops are mapped to the
 # composite If op in MLIR with is_stateless attribute set accordingly to
 # distinguish between them.
 
-# CHECK-DAG: "tf.If"{{.*}} is_stateless = false, name = "StatefulIf"
-# CHECK-DAG: "tf.If"{{.*}} is_stateless = true, name = "StatelessIf"
+# CHECK-DAG: "tf.If"{{.*}} is_stateless = false{{.*}} loc("StatefulIf")
+# CHECK-DAG: "tf.If"{{.*}} is_stateless = true{{.*}} loc("StatelessIf")
 
 node {
   name: "tf.Less"
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-while-ops.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-while-ops.pbtxt
index 953f83a9f68..ede01ebf62b 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-while-ops.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/functional-while-ops.pbtxt
@@ -1,11 +1,11 @@
-# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-input-arrays=iter,val -tf-input-data-types=DT_INT32,DT_FLOAT -tf-input-shapes=':' -tf-output-arrays=StatefulWhile:1,StatelessWhile:1 -o - | FileCheck %s
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-input-arrays=iter,val -tf-input-data-types=DT_INT32,DT_FLOAT -tf-input-shapes=':' -tf-output-arrays=StatefulWhile:1,StatelessWhile:1 -o - -mlir-print-debuginfo | FileCheck %s
 
 # Verify that TensorFlow While and StatelessWhile ops are mapped to the
 # composite While op in MLIR with is_stateless attribute set accordingly to
 # distinguish between them.
 
-# CHECK-DAG: "tf.While"{{.*}} is_stateless = false, name = "StatefulWhile"
-# CHECK-DAG: "tf.While"{{.*}} is_stateless = true, name = "StatelessWhile"
+# CHECK-DAG: "tf.While"{{.*}} is_stateless = false{{.*}} loc("StatefulWhile")
+# CHECK-DAG: "tf.While"{{.*}} is_stateless = true{{.*}} loc("StatelessWhile")
 
 node {
   name: "StatefulWhile"
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/mlir_passthrough_op.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/mlir_passthrough_op.pbtxt
index 1df903d46ce..da79023093c 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/mlir_passthrough_op.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/mlir_passthrough_op.pbtxt
@@ -1,7 +1,7 @@
 # RUN: tf-mlir-translate -graphdef-to-mlir %s | FileCheck %s
 
 # CHECK:"tf.MlirPassthroughOp"
-# CHECK: mlir_module = "\0Afunc @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32> {\0A   %add = \22tf.Add\22(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10xf32>\0A   %ret = \22magic.op\22(%add, %add) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10x10xf32>\0A   return %ret : tensor<10x10xf32>\0A}\0A", name = "MlirPassthroughOp"} : (tensor<10xf32>, tensor<10xf32>) -> tensor<*xf32>
+# CHECK: mlir_module = "\0Afunc @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32> {\0A   %add = \22tf.Add\22(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10xf32>\0A   %ret = \22magic.op\22(%add, %add) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10x10xf32>\0A   return %ret : tensor<10x10xf32>\0A}\0A"} : (tensor<10xf32>, tensor<10xf32>) -> tensor<*xf32>
 
 node {
   name: "x"
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/node-locations.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/node-locations.pbtxt
index a8f58c427fd..fdf279f3887 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/node-locations.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/node-locations.pbtxt
@@ -90,6 +90,6 @@ library {
 }
 
 # TODO(b/142400497): What is the semantic contract for locations?
-# CHECK: "tf.Const"{{.*}}value = dense<2>{{.*}}loc(fused["n1@f1", "n2@f2"])
+# CHECK: "tf.Const"{{.*}}value = dense<2>{{.*}}loc(fused["n1@f1", "n2@f2", "fused_node_outside_function"])
 # CHECK: "tf.Const"{{.*}}value = dense<0>{{.*}}loc("node_outside_function")
 # CHECK: "tf.Const"{{.*}}value = dense<1>{{.*}}loc("node_inside_function@foo")
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/parse_example.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/parse_example.pbtxt
new file mode 100644
index 00000000000..7411a5ea4d7
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/parse_example.pbtxt
@@ -0,0 +1,225 @@
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-input-arrays=input0 -tf-input-data-types=DT_STRING -tf-input-shapes=32 -tf-output-arrays=ParseExample/ParseExampleV2:0,ParseExample/ParseExampleV2:7 -o - | FileCheck %s
+
+# CHECK: %[[parse_example:.*]]:8, %[[parse_example_control:.*]] = tf_executor.island wraps "tf.ParseExampleV2"(%arg0,
+# CHECK: result_segment_sizes = dense<[2, 2, 2, 2, 0, 0]> : vector<6xi32>
+# CHECK: tf_executor.fetch %[[parse_example]]#0, %[[parse_example]]#7 : tensor<?x2xi64>, tensor<32xf32>
+
+node {
+  name: "input0"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+}
+node {
+  name: "ParseExample/Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ParseExample/Const_1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ParseExample/ParseExampleV2/names"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ParseExample/ParseExampleV2/sparse_keys"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        string_val: "feature_key3"
+        string_val: "feature_key4"
+      }
+    }
+  }
+}
+node {
+  name: "ParseExample/ParseExampleV2/dense_keys"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        string_val: "feature_key1"
+        string_val: "feature_key2"
+      }
+    }
+  }
+}
+node {
+  name: "ParseExample/ParseExampleV2/ragged_keys"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ParseExample/ParseExampleV2"
+  op: "ParseExampleV2"
+  input: "input0"
+  input: "ParseExample/ParseExampleV2/names"
+  input: "ParseExample/ParseExampleV2/sparse_keys"
+  input: "ParseExample/ParseExampleV2/dense_keys"
+  input: "ParseExample/ParseExampleV2/ragged_keys"
+  input: "ParseExample/Const"
+  input: "ParseExample/Const_1"
+  attr {
+    key: "Tdense"
+    value {
+      list {
+        type: DT_FLOAT
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "dense_shapes"
+    value {
+      list {
+        shape {
+        }
+        shape {
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_sparse"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "ragged_split_types"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "ragged_value_types"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "sparse_types"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_INT64
+      }
+    }
+  }
+}
+versions {
+  producer: 175
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/quint8-const.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/quint8-const.pbtxt
index 748bc996f36..cf8051f7aaa 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/quint8-const.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/quint8-const.pbtxt
@@ -1,4 +1,4 @@
-# RUN: tf-mlir-translate -graphdef-to-mlir %s -o - | FileCheck %s
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -o - -mlir-print-debuginfo | FileCheck %s
 
 node {
   name: "Quantized_Constant"
@@ -28,5 +28,5 @@ versions {
 }
 
 # CHECK: tf.Const
-# CHECK-SAME: name = "Quantized_Constant"
 # CHECK-SAME: value = opaque<"tf", "{{0[xX][0-9a-fA-F]*}}"> : tensor<!tf.quint8>
+# CHECK-SAME: loc("Quantized_Constant")
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt
index 3dd5ce58ed2..e819efcddd1 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt
@@ -1,13 +1,13 @@
-# RUN: tf-mlir-translate -graphdef-to-splatted-mlir %s -o - | FileCheck %s --dump-input-on-failure
+# RUN: tf-mlir-translate -graphdef-to-splatted-mlir %s -o - -mlir-print-debuginfo | FileCheck %s --dump-input-on-failure
 
 # CHECK: tf_executor.SwitchN
 # CHECK-SAME: of 3 : tensor<i32>
 # CHECK-SAME: T = i32
-# CHECK-SAME: name = "Case/branch_index/_3"
+# CHECK-SAME: loc("Case/branch_index/_3")
 # CHECK: tf_executor.SwitchN
 # CHECK-SAME: of 2 : tensor<f32>
 # CHECK-SAME: T = f32
-# CHECK-SAME: name = "Case/Case/input_0/_7"
+# CHECK-SAME: loc("Case/Case/input_0/_7")
 
 node {
   name: "Case/branch_index"
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir
index 3448c8c2005..c1c5f419ca9 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir
@@ -182,7 +182,6 @@ func @rsqrt_grad_unranked(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<
   return %0 : tensor<*xf32>
 }
 
-
 // CHECK-LABEL: SoftmaxCrossEntropyWithLogits
 // CHECK-SAME: %[[FEATURES:.*]]: tensor<2x3xf32>, %[[LABELS:.*]]: tensor<2x3xf32>
 func @SoftmaxCrossEntropyWithLogits(%features: tensor<2x3xf32>, %labels: tensor<2x3xf32>) -> (tensor<2xf32>, tensor<2x3xf32>) {
@@ -222,6 +221,66 @@ func @scalar_SoftmaxCrossEntropyWithLogits(%features: tensor<f32>, %labels: tens
   return %0#0, %0#1 : tensor<?xf32>, tensor<?x?xf32>
 }
 
+// CHECK-LABEL: SparseSoftmaxCrossEntropyWithLogits
+// CHECK-SAME: %[[FEATURES:.*]]: tensor<2x3xf32>, %[[SPARSE_LABELS:.*]]: tensor<2xi32>
+func @SparseSoftmaxCrossEntropyWithLogits(%features: tensor<2x3xf32>, %labels: tensor<2xi32>) -> (tensor<2xf32>, tensor<2x3xf32>) {
+  // Convert SPARSE_LABELS to dense LABELS.
+  // CHECK-DAG: %[[DEPTH:.*]] = "tf.Const"() {value = dense<3> : tensor<i32>} : () -> tensor<i32>
+  // CHECK-DAG: %[[ONE:.*]] = "tf.Const"() {value = dense<1.000000e+00> : tensor<f32>} : () -> tensor<f32>
+  // CHECK-DAG: %[[ZERO:.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32>
+  // CHECK-DAG: %[[LABELS:.*]] = "tf.OneHot"(%[[SPARSE_LABELS]], %[[DEPTH]], %[[ONE]], %[[ZERO]]) {axis = 1 : i64} : (tensor<2xi32>, tensor<i32>, tensor<f32>, tensor<f32>) -> tensor<2x3xf32>
+
+  // Adjust labels to have Nan for out of range labels.
+  // CHECK-DAG: %[[ZERO_I32:.*]] = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
+  // CHECK-DAG: %[[IS_NEGATIVE:.*]] = "tf.LessEqual"(%[[ZERO_I32]], %arg1) : (tensor<i32>, tensor<2xi32>) -> tensor<2xi1>
+  // CHECK-DAG: %[[IS_LESS:.*]] = "tf.Less"(%arg1, %[[DEPTH]]) : (tensor<2xi32>, tensor<i32>) -> tensor<2xi1>
+  // CHECK-DAG: %[[IS_WITHIN_RANGE:.*]] = "tf.LogicalAnd"(%[[IS_NEGATIVE]], %[[IS_LESS]]) : (tensor<2xi1>, tensor<2xi1>) -> tensor<2xi1>
+  // CHECK-DAG: %[[NAN:.*]] = "tf.Const"() {value = dense<0x7FC00000> : tensor<f32>} : () -> tensor<f32>
+  // CHECK-DAG: %[[ZERO_OR_NAN:.*]] = "tf.SelectV2"(%[[IS_WITHIN_RANGE]], %[[ZERO]], %[[NAN]]) : (tensor<2xi1>, tensor<f32>, tensor<f32>) -> tensor<2xf32>
+  // CHECK-DAG: %[[NEG_ONE:.*]] = "tf.Const"() {value = dense<-1> : tensor<1xi64>} : () -> tensor<1xi64>
+  // CHECK-DAG: %[[RESHAPE:.*]] = "tf.ExpandDims"(%[[ZERO_OR_NAN]], %[[NEG_ONE]]) : (tensor<2xf32>, tensor<1xi64>) -> tensor<2x1xf32>
+  // CHECK-DAG: %[[ADJUSTED_LABELS:.*]] = "tf.AddV2"(%[[LABELS]], %[[RESHAPE]]) : (tensor<2x3xf32>, tensor<2x1xf32>) -> tensor<2x3xf32>
+
+  // SoftmaxCrossEntropyWithLogits expansion
+  // CHECK-DAG: = "tf.Neg"({{.*}}) : (tensor<2x3xf32>) -> tensor<2x3xf32>
+  // CHECK-DAG: = "tf.LogSoftmax"({{.*}}) : (tensor<2x3xf32>) -> tensor<2x3xf32>
+  // CHECK-DAG: = "tf.Mul"({{.*}}) : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+  // CHECK-DAG: = "tf.Sum"({{.*}}) {keep_dims = false} : (tensor<2x3xf32>, tensor<1xi64>) -> tensor<2xf32>
+  // CHECK-DAG: = "tf.Softmax"({{.*}}) : (tensor<2x3xf32>) -> tensor<2x3xf32>
+  // CHECK-DAG: = "tf.Sub"({{.*}}) : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%features, %labels) : (tensor<2x3xf32>, tensor<2xi32>) -> (tensor<2xf32>, tensor<2x3xf32>)
+  return %0#0, %0#1 : tensor<2xf32>, tensor<2x3xf32>
+}
+
+// CHECK-LABEL: SparseSoftmaxCrossEntropyWithLogits_with_bf16_i64
+func @SparseSoftmaxCrossEntropyWithLogits_with_bf16_i64(%features: tensor<2x3xbf16>, %labels: tensor<2xi64>) -> (tensor<2xbf16>, tensor<2x3xbf16>) {
+  // CHECK-NOT: tf.SparseSoftmaxCrossEntropyWithLogits
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%features, %labels) : (tensor<2x3xbf16>, tensor<2xi64>) -> (tensor<2xbf16>, tensor<2x3xbf16>)
+  return %0#0, %0#1 : tensor<2xbf16>, tensor<2x3xbf16>
+}
+
+// CHECK-LABEL: SparseSoftmaxCrossEntropyWithLogits_with_unranked_labels
+func @SparseSoftmaxCrossEntropyWithLogits_with_unranked_labels(%features: tensor<2x3xf32>, %labels: tensor<?xi64>) -> (tensor<2xf32>, tensor<2x3xf32>) {
+  // CHECK-NOT: tf.SparseSoftmaxCrossEntropyWithLogits
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%features, %labels) : (tensor<2x3xf32>, tensor<?xi64>) -> (tensor<2xf32>, tensor<2x3xf32>)
+  return %0#0, %0#1 : tensor<2xf32>, tensor<2x3xf32>
+}
+
+// CHECK-LABEL: SparseSoftmaxCrossEntropyWithLogits_with_dynamic_labels
+func @SparseSoftmaxCrossEntropyWithLogits_with_dynamic_labels(%features: tensor<2x3xf32>, %labels: tensor<*xi64>) -> (tensor<2xf32>, tensor<2x3xf32>) {
+  // CHECK-NOT: tf.SparseSoftmaxCrossEntropyWithLogits
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%features, %labels) : (tensor<2x3xf32>, tensor<*xi64>) -> (tensor<2xf32>, tensor<2x3xf32>)
+  return %0#0, %0#1 : tensor<2xf32>, tensor<2x3xf32>
+}
+
+// CHECK-LABEL: SparseSoftmaxCrossEntropyWithLogits_with_dynamic
+func @SparseSoftmaxCrossEntropyWithLogits_with_dynamic(%features: tensor<*xbf16>, %labels: tensor<*xi64>) -> (tensor<2xbf16>, tensor<*xbf16>) {
+  // CHECK: tf.SparseSoftmaxCrossEntropyWithLogits
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%features, %labels) : (tensor<*xbf16>, tensor<*xi64>) -> (tensor<2xbf16>, tensor<*xbf16>)
+  return %0#0, %0#1 : tensor<2xbf16>, tensor<*xbf16>
+}
+
 // CHECK-LABEL: func @tanhgrad_float
 // CHECK-SAME: (%[[Y:.*]]: tensor<*xf32>, %[[DY:.*]]: tensor<*xf32>)
 func @tanhgrad_float(%y : tensor<*xf32>, %dy: tensor<*xf32>) -> tensor<*xf32> {
@@ -276,3 +335,99 @@ func @addN_variant(%arg0: tensor<!tf.variant<tensor<2xf32>>>, %arg1: tensor<!tf.
   %0 = "tf.AddN"(%arg0, %arg1, %arg2) : (tensor<!tf.variant<tensor<2xf32>>>, tensor<!tf.variant<tensor<2xf32>>>, tensor<!tf.variant<tensor<2xf32>>>) -> tensor<!tf.variant<tensor<2xf32>>>
   return %0 : tensor<!tf.variant<tensor<2xf32>>>
 }
+
+// CHECK-LABEL: func @DynamicStitch_simple
+func @DynamicStitch_simple(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64>
+  // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32>
+  // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>)
+  // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor<i64>} : () -> tensor<i64>
+  // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2xf32>, tensor<2xf32>, tensor<i64>) -> tensor<2x2xf32>
+  // CHECK: return %[[RESULT]]
+
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %0 : tensor<2x2xf32>
+}
+
+// CHECK-LABEL: DynamicStitch_scalar_matrix_indices
+func @DynamicStitch_scalar_matrix_indices(%arg0: tensor<2xf32>, %arg1: tensor<2x2x2xf32>) -> (tensor<5x2xf32>) {
+  // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64>
+  // CHECK-DAG: %[[INP0:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2xf32>, tensor<2xi64>) -> tensor<1x2xf32>
+  // CHECK-DAG: %[[ITEMS0:.*]] = "tf.Unpack"(%[[INP0]]) {axis = 0 : i64} : (tensor<1x2xf32>) -> tensor<2xf32>
+  // CHECK-DAG: %[[INP1:.*]] = "tf.Reshape"(%arg1, %[[SHAPE]]) : (tensor<2x2x2xf32>, tensor<2xi64>) -> tensor<4x2xf32>
+  // CHECK-DAG: %[[ITEMS1:.*]]:4 = "tf.Unpack"(%[[INP1]]) {axis = 0 : i64} : (tensor<4x2xf32>) -> (tensor<2xf32>, tensor<2xf32>, tensor<2xf32>, tensor<2xf32>)
+  // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor<i64>} : () -> tensor<i64>
+  // CHECK-DAG: %6 = "tf.ConcatV2"(%[[ITEMS1]]#3, %[[ITEMS1]]#2, %[[ITEMS1]]#1, %[[ITEMS1]]#0, %[[ITEMS0]], %[[AXIS]]) : (tensor<2xf32>, tensor<2xf32>, tensor<2xf32>, tensor<2xf32>, tensor<2xf32>, tensor<i64>) -> tensor<5x2xf32>
+
+  %indices0 = "tf.Const"() {value = dense<4> : tensor<i32>} : () -> tensor<i32>
+  %indices1 = "tf.Const"() {value = dense<[[3, 2], [1, 0]]> : tensor<2x2xi32>} : () -> tensor<2x2xi32>
+  %0 = "tf.DynamicStitch"(%indices0, %indices1, %arg0, %arg1) : (tensor<i32>, tensor<2x2xi32>, tensor<2xf32>, tensor<2x2x2xf32>) -> tensor<5x2xf32>
+  return %0 : tensor<5x2xf32>
+}
+
+// Verify that custom types are lowered and have legal output.
+// CHECK-LABEL: func @DynamicStitch_uint8
+func @DynamicStitch_uint8(%arg0: tensor<2x2x!tf.uint8>) -> tensor<2x2x!tf.uint8> {
+  // CHECK-NOT: tf.DynamicStitch
+
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2x!tf.uint8>) -> tensor<2x2x!tf.uint8>
+  return %0 : tensor<2x2x!tf.uint8>
+}
+
+// CHECK-LABEL: func @DynamicStitch_scalar_item
+func @DynamicStitch_scalar_item(%arg0: tensor<2xf32>) -> tensor<2xf32> {
+  // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<-1> : tensor<1xi64>} : () -> tensor<1xi64>
+  // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2xf32>, tensor<1xi64>) -> tensor<2xf32>
+  // CHECK-DAG: %[[ITEMS]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2xf32>) -> (tensor<f32>, tensor<f32>)
+  // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor<i64>} : () -> tensor<i64>
+  // CHECK-DAG: %[[RESULT]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<f32>, tensor<f32>, tensor<i64>) -> tensor<2xf32>
+  // CHECK: return %[[RESULT]]
+
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2xf32>) -> tensor<2xf32>
+  return %0 : tensor<2xf32>
+}
+
+// CHECK-LABEL: func @DynamicStitch_matrix_item
+func @DynamicStitch_matrix_item(%arg0: tensor<2x2x2xf32>) -> tensor<2x2x2xf32> {
+  // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2, 2]> : tensor<3xi64>} : () -> tensor<3xi64>
+  // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2x2xf32>, tensor<3xi64>) -> tensor<2x2x2xf32>
+  // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2x2xf32>) -> (tensor<2x2xf32>, tensor<2x2xf32>)
+  // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor<i64>} : () -> tensor<i64>
+  // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2x2xf32>, tensor<2x2xf32>, tensor<i64>) -> tensor<2x2x2xf32>
+  // CHECK: return %[[RESULT]]
+
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2x2xf32>) -> tensor<2x2x2xf32>
+  return %0 : tensor<2x2x2xf32>
+}
+
+// CHECK-LABEL: func @DynamicStitch_dynamic
+func @DynamicStitch_dynamic(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> tensor<*xf32> {
+  // CHECK: tf.DynamicStitch
+  %0 = "tf.DynamicStitch"(%arg0, %arg1) : (tensor<*xi32>, tensor<*xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+
+// CHECK-LABEL: func @DynamicStitch_duplicates
+func @DynamicStitch_duplicates(%arg0: tensor<2x2xf32>) -> tensor<1x2xf32> {
+  // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64>
+  // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32>
+  // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>)
+  // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor<i64>} : () -> tensor<i64>
+  // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[AXIS]]) : (tensor<2xf32>, tensor<i64>) -> tensor<1x2xf32>
+  // CHECK: return %[[RESULT]]
+
+  %indices = "tf.Const"() {value = dense<[0, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2xf32>) -> tensor<1x2xf32>
+  return %0 : tensor<1x2xf32>
+}
+
+func @Reciprocal(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  // CHECK: %[[ONE:.*]] = "tf.Const"() {value = dense<1.000000e+00> : tensor<f32>} : () -> tensor<f32>
+  // CHECK: "tf.Div"(%[[ONE]], %arg0) : (tensor<f32>, tensor<*xf32>) -> tensor<*xf32>
+  %0 = "tf.Reciprocal"(%arg0) : (tensor<*xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/BUILD b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/BUILD
index 976ad56a895..cbdf5d96d0e 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/BUILD
@@ -4,7 +4,7 @@ licenses(["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = ["mlir"],
 )
 
@@ -14,7 +14,7 @@ filegroup(
     testonly = True,
     data = [
         "//tensorflow/compiler/mlir:tf-mlir-translate",
-        "@llvm//:FileCheck",
-        "@llvm//:not",
+        "@llvm-project//llvm:FileCheck",
+        "@llvm-project//llvm:not",
     ],
 )
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/convert_tensor.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/convert_tensor.mlir
index 52e4c529815..e6e22722aec 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/convert_tensor.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/convert_tensor.mlir
@@ -1,8 +1,8 @@
 // RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s
 
 func @main() -> (tensor<1x2xf16>, tensor<2xf16>) {
-  %0:2 = "_tf.Const"() {device = "", name = "foo", dtype = "tfdtype$DT_HALF", value = dense<1.0> : tensor<1x2xf16>} : () -> (tensor<1x2xf16>, !_tf.control)
-  %1:2 = "_tf.Const"() {device = "", name = "bar", dtype = "tfdtype$DT_HALF", value = dense<[1.0, 2.0]> : tensor<2xf16>} : () -> (tensor<2xf16>, !_tf.control)
+  %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_HALF", value = dense<1.0> : tensor<1x2xf16>} : () -> (tensor<1x2xf16>, !_tf.control) loc("foo")
+  %1:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_HALF", value = dense<[1.0, 2.0]> : tensor<2xf16>} : () -> (tensor<2xf16>, !_tf.control) loc("bar")
   return %0#0, %1#0 : tensor<1x2xf16>, tensor<2xf16>
 
 // CHECK: node {
@@ -13,4 +13,4 @@ func @main() -> (tensor<1x2xf16>, tensor<2xf16>) {
 // CHECK-NEXT: op: "Const"
 // CHECK: half_val: 15360
 // CHECK: half_val: 16384
-}
\ No newline at end of file
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/function-resource-args.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/function-resource-args.mlir
index 24cb7b703c6..515e03ac2d2 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/function-resource-args.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/function-resource-args.mlir
@@ -2,7 +2,7 @@
 
 func @main() -> tensor<*x!tf.resource> attributes {tf.entry_function = {inputs = "", outputs = "func_call"}} {
   %0 = tf_executor.graph {
-    %outputs, %control = tf_executor.island wraps "tf.VarHandleOp"() {container = "a", device = "/CPU:0", dtype = i64, name = "x", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<i64>>>
+    %outputs, %control = tf_executor.island wraps "tf.VarHandleOp"() {container = "a", device = "/CPU:0", dtype = i64, shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<i64>>> loc("x")
     %outputs_0, %control_1 = tf_executor.island wraps "tf.LegacyCall"(%outputs, %outputs) {_disable_call_shape_inference = true, f = @test_func_name0} : (tensor<!tf.resource<tensor<i64>>>, tensor<!tf.resource<tensor<i64>>>) -> tensor<*x!tf.resource>
     tf_executor.fetch %outputs_0 : tensor<*x!tf.resource>
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/graph-as-function.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/graph-as-function.mlir
index 40ddad90aec..cb9c5c380ba 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/graph-as-function.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/graph-as-function.mlir
@@ -2,15 +2,15 @@
 
 func @main(%arg0: tensor<*x!tf.resource>, %arg1: tensor<*x!tf.resource<tensor<3x3x1x32xf32>>>, %arg2: tensor<*xf32>, %arg3: tensor<2x4x6x8xi32>) -> (tensor<f32>, tensor<f32>)
 attributes {tf.entry_function = {inputs = "args_0,args_1,args_2,args_3", outputs = "rets_0_RetVal,rets_1_RetVal"}} {
-  %0:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_FLOAT", name = "const", value = dense<0.000000e+00> : tensor<f32>} : () -> (tensor<f32>, !_tf.control)
-  %1:2 = "_tf.Identity"(%0#0) {T = "tfdtype$DT_FLOAT", device = "", name = "identity"} : (tensor<f32>) -> (tensor<f32>, !_tf.control)
-  %2:2 = "_tf.StatefulPartitionedCall"(%0#0, %arg1) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_RESOURCE"], Tout = ["tfdtype$DT_FLOAT"], _gradient_op_type = "PartitionedCall-1205", config = "", config_proto = "\0A\07\0A\03GPU\10\00\0A\07\0A\03CPU\10\012\02J\008\01", device = "", executor_type = "", f = @function0, name = "statefulpartitionedcall"} : (tensor<f32>, tensor<*x!tf.resource<tensor<3x3x1x32xf32>>>) -> (tensor<f32>, !_tf.control)
-  return %1#0, %2#0 : tensor<f32>, tensor<f32>
+  %0 = "tf.Const"() {device = "", dtype = "tfdtype$DT_FLOAT", value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32> loc("const")
+  %1 = "tf.Identity"(%0) {T = "tfdtype$DT_FLOAT", device = ""} : (tensor<f32>) -> tensor<f32> loc("identity")
+  %2 = "tf.StatefulPartitionedCall"(%0, %arg1) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_RESOURCE"], Tout = ["tfdtype$DT_FLOAT"], _gradient_op_type = "PartitionedCall-1205", config = "", config_proto = "\0A\07\0A\03GPU\10\00\0A\07\0A\03CPU\10\012\02J\008\01", device = "", executor_type = "", f = @function0} : (tensor<f32>, tensor<*x!tf.resource<tensor<3x3x1x32xf32>>>) -> tensor<f32> loc("statefulpartitionedcall")
+  return %1, %2 : tensor<f32>, tensor<f32>
 }
 
 func @function0(%arg0: tensor<*xf32>, %arg1: tensor<*x!tf.resource>) -> tensor<*xf32>
 attributes {tf.signature.is_stateful} {
-  %0:2 = "_tf.Identity"(%arg0) {T = "tfdtype$DT_FLOAT", device = "", name = "Identity"} : (tensor<*xf32>) -> (tensor<*xf32>, !_tf.control)
+  %0 = "tf.Identity"(%arg0) {T = "tfdtype$DT_FLOAT", device = ""} : (tensor<*xf32>) -> tensor<*xf32> loc("Identity@function0")
   return %0#0 : tensor<*xf32>
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/legalized_name.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/legalized_name.mlir
index 67ccf52b62f..60b239aee14 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/legalized_name.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/legalized_name.mlir
@@ -6,7 +6,7 @@ func @main() {
   %0 = "tf.Const"() {dtype = "tfdtype$DT_INT32", value = dense<0> : tensor<i32>} : () -> (tensor<i32>) loc("^foo")
   // CHECK: name: "fo.o"
   %1 = "tf.Const"() {dtype = "tfdtype$DT_INT32", value = dense<1> : tensor<i32>} : () -> (tensor<i32>) loc("fo{o")
-  // CHECK: name: "foo.1"
+  // CHECK: name: "foo"
   %2 = "tf.Const"() {dtype = "tfdtype$DT_INT32", value = dense<2> : tensor<i32>} : () -> (tensor<i32>) loc("foo@1")
   // CHECK: name: "ba.r"
   %3 = "tf.Const"() {dtype = "tfdtype$DT_INT32", value = dense<2> : tensor<i32>} : () -> (tensor<i32>) loc("ba r")
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/parse_example.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/parse_example.mlir
new file mode 100644
index 00000000000..ec51fdc8e11
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/parse_example.mlir
@@ -0,0 +1,86 @@
+// RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s
+
+module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 175 : i32}} {
+  func @main(%arg0: tensor<32x!tf.string>) -> (tensor<?x2xi64>) attributes {tf.entry_function = {inputs = "input0", outputs = "ParseExample/ParseExampleV2"}} {
+
+    %0 = tf_executor.graph {
+      // NOTE(mrry): This dummy input was manually added because the exporter expects it and fails otherwise.
+      %dummy_input, %control_dummy = tf_executor.island wraps "tf.Placeholder.input"(%arg0) {device = "", dtype = "tfdtype$DT_STRING", shape = "tfshape$dim { size: 32 }"} : (tensor<32x!tf.string>) -> tensor<32x!tf.string>
+
+      %outputs, %control = tf_executor.island wraps "tf.Const"() {device = "", dtype = f32, value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32>
+      %outputs_0, %control_1 = tf_executor.island wraps "tf.Const"() {device = "", dtype = f32, value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32>
+      %outputs_2, %control_3 = tf_executor.island wraps "tf.Const"() {device = "", dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B2073697A653A2032207D207D2074656E736F725F636F6E74656E743A20225C3031345C303134666561747572655F6B657931666561747572655F6B65793222"> : tensor<2x!tf.string>} : () -> tensor<2x!tf.string>
+      %outputs_4, %control_5 = tf_executor.island wraps "tf.Const"() {device = "", dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+      %outputs_6, %control_7 = tf_executor.island wraps "tf.Const"() {device = "", dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+      %outputs_8, %control_9 = tf_executor.island wraps "tf.Const"() {device = "", dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B2073697A653A2032207D207D2074656E736F725F636F6E74656E743A20225C3031345C303134666561747572655F6B657933666561747572655F6B65793422"> : tensor<2x!tf.string>} : () -> tensor<2x!tf.string>
+
+      %outputs_10:8, %control_11 = tf_executor.island wraps "tf.ParseExampleV2"(%dummy_input, %outputs_4, %outputs_8, %outputs_2, %outputs_6, %outputs, %outputs_0) {Tdense = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], dense_shapes = ["tfshape$", "tfshape$"], device = "", name = "ParseExample/ParseExampleV2", num_sparse = 2 : i64, ragged_split_types = [], ragged_value_types = [], result_segment_sizes = dense<[2, 2, 2, 2, 0, 0]> : vector<6xi32>, sparse_types = ["tfdtype$DT_STRING", "tfdtype$DT_INT64"]} : (tensor<32x!tf.string>, tensor<0x!tf.string>, tensor<2x!tf.string>, tensor<2x!tf.string>, tensor<0x!tf.string>, tensor<0xf32>, tensor<0xf32>) -> (tensor<?x2xi64>, tensor<?x2xi64>, tensor<?x!tf.string>, tensor<?xi64>, tensor<2xi64>, tensor<2xi64>, tensor<32xf32>, tensor<32xf32>)
+      // CHECK:      name: "ParseExample/ParseExampleV2"
+      // CHECK-NEXT: op: "ParseExampleV2"
+      // CHECK-NEXT: input: "input0"
+      // CHECK-NEXT: input: "_tf.Const3"
+      // CHECK-NEXT: input: "_tf.Const5"
+      // CHECK-NEXT: input: "_tf.Const2"
+      // CHECK-NEXT: input: "_tf.Const4"
+      // CHECK-NEXT: input: "_tf.Const"
+      // CHECK-NEXT: input: "_tf.Const1"
+      // CHECK-NEXT: attr {
+      // CHECK-NEXT:   key: "Tdense"
+      // CHECK-NEXT:     value {
+      // CHECK-NEXT:       list {
+      // CHECK-NEXT:         type: DT_FLOAT
+      // CHECK-NEXT:         type: DT_FLOAT
+      // CHECK-NEXT:       }
+      // CHECK-NEXT:     }
+      // CHECK-NEXT:   }
+      // CHECK-NEXT: attr {
+      // CHECK-NEXT:   key: "dense_shapes"
+      // CHECK-NEXT:   value {
+      // CHECK-NEXT:     list {
+      // CHECK-NEXT:       shape {
+      // CHECK-NEXT:       }
+      // CHECK-NEXT:       shape {
+      // CHECK-NEXT:       }
+      // CHECK-NEXT:     }
+      // CHECK-NEXT:   }
+      // CHECK-NEXT: }
+      // CHECK-NEXT: attr {
+      // CHECK-NEXT:   key: "num_sparse"
+      // CHECK-NEXT:   value {
+      // CHECK-NEXT:     i: 2
+      // CHECK-NEXT:   }
+      // CHECK-NEXT: }
+      // CHECK-NEXT: attr {
+      // CHECK-NEXT:   key: "ragged_split_types"
+      // CHECK-NEXT:   value {
+      // CHECK-NEXT:     list {
+      // CHECK-NEXT:     }
+      // CHECK-NEXT:   }
+      // CHECK-NEXT: }
+      // CHECK-NEXT: attr {
+      // CHECK-NEXT:   key: "ragged_value_types"
+      // CHECK-NEXT:   value {
+      // CHECK-NEXT:     list {
+      // CHECK-NEXT:     }
+      // CHECK-NEXT:   }
+      // CHECK-NEXT: }
+      // CHECK-NEXT: attr {
+      // CHECK-NEXT:   key: "sparse_types"
+      // CHECK-NEXT:   value {
+      // CHECK-NEXT:     list {
+      // CHECK-NEXT:       type: DT_STRING
+      // CHECK-NEXT:       type: DT_INT64
+      // CHECK-NEXT:     }
+      // CHECK-NEXT:   }
+      // CHECK-NEXT: }
+
+      tf_executor.fetch %outputs_10#0 : tensor<?x2xi64>
+    }
+    return %0#0 : tensor<?x2xi64>
+    // CHECK:      name: "main"
+    // CHECK-NEXT: op: "_Retval"
+    // CHECK-NEXT: input: "ParseExample/ParseExampleV2"
+
+  }
+}
+
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
index 2c3c72869b0..582f2237d01 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
@@ -17,8 +17,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr
   func @simple_chain(%arg0: tensor<1xf32>) -> tensor<*xf32> {
 // CHECK: %[[MUL:.*]] = "tf.Mul"{{.*}} (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 // CHECK: %[[ADD:.*]] = "tf.Add"(%[[MUL]], %[[MUL]]) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
-// CHECK: %[[CAST:.*]] = "tf.Cast"(%[[ADD]]) {{.*}} : (tensor<1xf32>) -> tensor<*xf32>
-// CHECK: return %[[CAST]] : tensor<*xf32>
+// CHECK: return %[[ADD]] : tensor<1xf32>
     %0 = "tf.Mul"(%arg0, %arg0) : (tensor<1xf32>, tensor<1xf32>) -> tensor<*xf32>
     %1 = "tf.Add"(%0, %0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
     return %1 : tensor<*xf32>
@@ -29,10 +28,12 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr
 // CHECK: %[[MUL:.*]] = "tf.Mul"{{.*}} (tensor<1xf32>, tensor<10xf32>) -> tensor<10xf32>
 // CHECK: %[[ADD:.*]] = "tf.Add"(%[[MUL]], %[[MUL]]) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10xf32>
 // CHECK: %[[CAST:.*]] = "tf.Cast"(%[[ADD]]) {{.*}} : (tensor<10xf32>) -> tensor<*xf32>
-// CHECK: return %[[CAST]] : tensor<*xf32>
+// CHECK: %[[UNKNOWN:.*]] = "unknown.A"(%[[CAST]]) : (tensor<*xf32>) -> tensor<*xf32>
+// CHECK: return %[[UNKNOWN]] : tensor<*xf32>
     %0 = "tf.Mul"(%arg0, %arg1) : (tensor<1xf32>, tensor<10xf32>) -> tensor<*xf32>
     %1 = "tf.Add"(%0, %0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
-    return %1 : tensor<*xf32>
+    %2 = "unknown.A"(%1) : (tensor<*xf32>) -> tensor<*xf32>
+    return %2 : tensor<*xf32>
   }
 
 // CHECK-LABEL: func @unknown_op
@@ -52,8 +53,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr
 // CHECK: %[[CST:.*]] = "tf.Const"{{.*}} {value = dense<1> : tensor<4xi32>} : () -> tensor<4xi32>
 // CHECK: %[[CONV:.*]] = "tf.Conv2DBackpropInput"(%[[CST]]
 // CHECK-SAME: (tensor<4xi32>, tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
-// CHECK: %[[CAST:.*]] = "tf.Cast"(%[[CONV]]) {{.*}} : (tensor<1x1x1x1xf32>) -> tensor<?x?x?x?xf32>
-// CHECK: return %[[CAST]] : tensor<?x?x?x?xf32>
+// CHECK: return %[[CONV]] : tensor<1x1x1x1xf32>
     %0 = "tf.Shape"(%arg0) : (tensor<1x1x1x1xi32>) -> tensor<4xi32>
     %1 = "tf.Conv2DBackpropInput"(%0, %arg1, %arg1) {
       padding = "VALID", strides = [1, 1, 1, 1]
@@ -105,14 +105,16 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr
   }
 
   // CHECK-LABEL: func @shape_from_while_to_cond_body_functions
-  func @shape_from_while_to_cond_body_functions(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-    %0 = "tf.While"(%arg0) {cond = @while_cond_func, body = @while_body_func, is_stateless = true} : (tensor<4xf32>) -> tensor<4xf32>
-    return %0 : tensor<4xf32>
+  func @shape_from_while_to_cond_body_functions(%arg0: tensor<4xf32>, %arg1: tensor<!tf.resource<tensor<4xf32>>>, %arg2: tensor<!tf.resource<tensor<*xf32>>>) -> tensor<4xf32> {
+    // CHECK "tf.While"
+    // CHECK-SAME (tensor<4xf32>, tensor<!tf.resource<tensor<4xf32>>>, tensor<!tf.resource<tensor<*xf32>>>) -> (tensor<4xf32>, tensor<!tf.resource<tensor<4xf32>>>, tensor<!tf.resource<tensor<*xf32>>>)
+    %0:3 = "tf.While"(%arg0, %arg1, %arg2) {cond = @while_cond_func, body = @while_body_func, is_stateless = true} : (tensor<4xf32>, tensor<!tf.resource<tensor<4xf32>>>, tensor<!tf.resource<tensor<*xf32>>>) -> (tensor<4xf32>, tensor<*x!tf.resource>, tensor<!tf.resource<tensor<*xf32>>>)
+    return %0#0 : tensor<4xf32>
   }
 
   // CHECK-LABEL: func @while_cond_func
-  // CHECK-SAME: %arg0: tensor<4xf32>) -> tensor<i1>
-  func @while_cond_func(%arg0: tensor<*xf32>) -> tensor<i1> {
+  // CHECK-SAME: (%arg0: tensor<4xf32>, %arg1: tensor<!tf.resource<tensor<4xf32>>>, %arg2: tensor<!tf.resource<tensor<*xf32>>>) -> tensor<i1>
+  func @while_cond_func(%arg0: tensor<*xf32>, %arg1: tensor<*x!tf.resource>, %arg2: tensor<!tf.resource<tensor<*xf32>>>) -> tensor<i1> {
     %0 = "tf.Const"() {value = dense<[1.000000e-04,2.000000e-04,3.000000e-04,4.000000e-04]> : tensor<4xf32>} : () -> tensor<4xf32>
     %1 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
     // CHECK: tf.Equal
@@ -124,14 +126,27 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr
   }
 
   // CHECK-LABEL: func @while_body_func
-  func @while_body_func(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  func @while_body_func(%arg0: tensor<*xf32>, %arg1: tensor<*x!tf.resource>, %arg2: tensor<!tf.resource<tensor<*xf32>>>) -> (tensor<*xf32>, tensor<*x!tf.resource>, tensor<!tf.resource<tensor<*xf32>>>) {
     %0 = "tf.Const"() {value = dense<1.000000e-04> : tensor<f32>} : () -> tensor<f32>
     // CHECK: tf.AddV2
     // CHECK-SAME: (tensor<4xf32>, tensor<f32>) -> tensor<4xf32>
     %1 = "tf.AddV2"(%arg0, %0) : (tensor<*xf32>, tensor<f32>) -> tensor<*xf32>
+    // CHECK: "tf.Identity"
+    // CHECK-SAME: (tensor<!tf.resource<tensor<4xf32>>>) -> tensor<!tf.resource<tensor<4xf32>>>
+    %2 = "tf.Identity"(%arg1) : (tensor<*x!tf.resource>) -> tensor<*x!tf.resource>
+    // CHECK: "tf.TPUReplicatedInput"
+    // CHECK-SAME: (tensor<!tf.resource<tensor<4xf32>>>) -> tensor<!tf.resource<tensor<4xf32>>>
+    %ri = "tf.TPUReplicatedInput"(%2) : (tensor<*x!tf.resource>) -> tensor<*x!tf.resource>
+    // CHECK: "tf.ReadVariableOp"
+    // CHECK-SAME: (tensor<!tf.resource<tensor<4xf32>>>) -> tensor<4xf32>
+    %read = "tf.ReadVariableOp"(%ri) : (tensor<*x!tf.resource>) -> tensor<*xf32>
+    // CHECK: "tf.ReadVariableOp"
+    // CHECK-SAME: (tensor<!tf.resource<tensor<*xf32>>>) -> tensor<*xf32>
+    %read1 = "tf.ReadVariableOp"(%arg2) : (tensor<!tf.resource<tensor<*xf32>>>) -> tensor<*xf32>
     // CHECK: return
     // CHECK-SAME: tensor<4xf32>
-    return %1 : tensor<*xf32>
+    // CHECK-SAME: tensor<!tf.resource<tensor<4xf32>>>
+    return %1, %arg1, %arg2 : tensor<*xf32>, tensor<*x!tf.resource>, tensor<!tf.resource<tensor<*xf32>>>
   }
 
   // CHECK-LABEL: func @invalid_function_reused_by_control_flows
@@ -162,4 +177,28 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr
     // CHECK-SAME: tensor<*xf32>
     return %0 : tensor<*xf32>
   }
+
+  // CHECK-LABEL: func @with_graph_and_islands
+  // CHECK-SAME: %[[ARG_0:.*]]: tensor<!tf.resource<tensor<4xf32>>>
+  // CHECK-SAME: -> tensor<4xf32>
+  func @with_graph_and_islands(%arg0: tensor<!tf.resource<tensor<4xf32>>>) -> tensor<*xf32> {
+    %graph = tf_executor.graph {
+      %island:2 = tf_executor.island {
+        // CHECK: %[[ID_0:.*]] = "tf.IdentityN"(%[[ARG_0]])
+        %id0 = "tf.IdentityN"(%arg0)
+          : (tensor<!tf.resource<tensor<4xf32>>>) -> tensor<!tf.resource<tensor<4xf32>>>
+        // CHECK-NEXT: %[[READ_0:.*]] = "tf.ReadVariableOp"(%[[ID_0]])
+        // CHECK-SAME: (tensor<!tf.resource<tensor<4xf32>>>) -> tensor<4xf32>
+        %read = "tf.ReadVariableOp"(%id0) : (tensor<!tf.resource<tensor<4xf32>>>) -> tensor<*xf32>
+        // CHECK-NEXT: tf_executor.yield %[[READ_0]] : tensor<4xf32>
+        tf_executor.yield %read : tensor<*xf32>
+      }
+      // CHECK: tf_executor.fetch
+      // CHECK-SAME: tensor<4xf32>
+      tf_executor.fetch %island#0 : tensor<*xf32>
+    }
+    // CHECK: return
+    // CHECK-SAME: tensor<4xf32>
+    return %graph : tensor<*xf32>
+  }
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir b/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir
index 9b17956f399..5ff3212db65 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir
@@ -737,3 +737,43 @@ func @while_cond(
   // expected-remark@above {{ID: 6}}
   // expected-remark@above {{Predecessors: {5}}}
 }
+
+// -----
+
+// Tests that the pass tracks control dependencies based on TF op registry
+// statefulness flag, for ops not yet defined in ODS.
+
+// CHECK-LABEL: func @tf_registry_ops
+func @tf_registry_ops(
+  // expected-remark@above {{ID: 8}}
+  %arg0: tensor<!tf.string>, %arg1: tensor<!tf.string>) {
+  tf_executor.graph {
+  // expected-remark@above {{ID: 6}}
+  // expected-remark@above {{Successors: {7}}}
+    %island = tf_executor.island {
+    // expected-remark@above {{ID: 4}}
+    // expected-remark@above {{Successors: {5}}}
+      "tf.PrintV2"(%arg0) { output_stream = "stderr", end = "\n" }
+      // expected-remark@above {{ID: 0}}
+      // expected-remark@above {{Successors: {2}}}
+        : (tensor<!tf.string>) -> ()
+      %merge_summary = "tf.MergeSummary"(%arg0, %arg1) { N = 2 }
+      // expected-remark@above {{ID: 1}}
+        : (tensor<!tf.string>, tensor<!tf.string>) -> (tensor<!tf.string>)
+      "tf.PrintV2"(%merge_summary) { output_stream = "stderr", end = "\n" }
+      // expected-remark@above {{ID: 2}}
+      // expected-remark@above {{Predecessors: {0}}}
+      // expected-remark@above {{Successors: {3}}}
+        : (tensor<!tf.string>) -> ()
+      tf_executor.yield
+      // expected-remark@above {{ID: 3}}
+      // expected-remark@above {{Predecessors: {2}}}
+    }
+    tf_executor.fetch %island : !tf_executor.control
+    // expected-remark@above {{ID: 5}}
+    // expected-remark@above {{Predecessors: {4}}}
+  }
+  return
+  // expected-remark@above {{ID: 7}}
+  // expected-remark@above {{Predecessors: {6}}}
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
index 9db1ae27837..d58a0b86df5 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
@@ -898,6 +898,39 @@ func @testSoftmaxCrossEntropyWithLogits(%arg0: tensor<3xf32>, %arg1: tensor<3xf3
 
 // -----
 
+// Test valid tf.SparseSoftmaxCrossEntropyWithLogits
+// CHECK-LABEL: func @testSparseSoftmaxCrossEntropyWithLogits
+func @testSparseSoftmaxCrossEntropyWithLogits(%arg0: tensor<2x3xf32>, %arg1: tensor<2xi32>) -> (tensor<3xf32>, tensor<2x3xf32>) {
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%arg0, %arg1) : (tensor<2x3xf32>, tensor<2xi32>) -> (tensor<3xf32>, tensor<2x3xf32>)
+  return %0#0, %0#1 : tensor<3xf32>, tensor<2x3xf32>
+}
+
+// -----
+
+func @testSparseSoftmaxCrossEntropyWithLogits(%arg0: tensor<3xf32>, %arg1: tensor<3xi32>) -> (tensor<3xf32>, tensor<2x3xf32>) {
+  // expected-error @+1 {{requires features operand of rank two}}
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%arg0, %arg1) : (tensor<3xf32>, tensor<3xi32>) -> (tensor<3xf32>, tensor<2x3xf32>)
+  return %0#0, %0#1 : tensor<3xf32>, tensor<2x3xf32>
+}
+
+// -----
+
+func @testSparseSoftmaxCrossEntropyWithLogits(%arg0: tensor<2x3xf32>, %arg1: tensor<2x3xi32>) -> (tensor<2xf32>, tensor<2x3xf32>) {
+  // expected-error @+1 {{requires labels operand of rank one}}
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%arg0, %arg1) : (tensor<2x3xf32>, tensor<2x3xi32>) -> (tensor<2xf32>, tensor<2x3xf32>)
+  return %0#0, %0#1 : tensor<2xf32>, tensor<2x3xf32>
+}
+
+// -----
+
+func @testSparseSoftmaxCrossEntropyWithLogits(%arg0: tensor<2x3xf32>, %arg1: tensor<3xi32>) -> (tensor<2xf32>, tensor<2x3xf32>) {
+  // expected-error @+1 {{requires features and labels with matching first dimension}}
+  %0:2 = "tf.SparseSoftmaxCrossEntropyWithLogits"(%arg0, %arg1) : (tensor<2x3xf32>, tensor<3xi32>) -> (tensor<2xf32>, tensor<2x3xf32>)
+  return %0#0, %0#1 : tensor<2xf32>, tensor<2x3xf32>
+}
+
+// -----
+
 func @testWhileCond(tensor<*xf32>) -> (tensor<i1>)
 func @testWhileBody(tensor<*xf32>) -> (tensor<*xf32>)
 
@@ -2009,3 +2042,246 @@ func @stridedSliceGrad(%dy: tensor<4x8xf32>, %begin: tensor<2xi64>, %end: tensor
   %0 = "tf.StridedSliceGrad"(%shape, %begin, %end, %strides, %dy) : (tensor<1x2xi64>, tensor<2xi64>, tensor<2xi64>, tensor<2xi64>, tensor<4x8xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func @testDynamicStitch() -> tensor<2x2xf32> {
+  // expected-error @+1 {{requires attribute N with value >= 1}}
+  %0 = "tf.DynamicStitch"() : () -> (tensor<2x2xf32>)
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<2x2xf32>) -> tensor<f32> {
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  // expected-error @+1 {{requires non scalar output}}
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2xf32>) -> tensor<f32>
+  return %0 : tensor<f32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %indices = "tf.Const"() {value = dense<[-1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  // expected-error @+1 {{requires non-negative index values; found -1}}
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<3x2xf32>) -> tensor<2x2xf32> {
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  // expected-error @+1 {{requires shape of data with type 'tensor<3x2xf32>' to have prefix matching with shape of the corresponding index type 'tensor<2xi32>'}}
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<3x2xf32>) -> tensor<2x2xf32>
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<2xf32>, %arg1: tensor<2x2x3xf32>) -> (tensor<5x2xf32>) {
+  %indices0 = "tf.Const"() {value = dense<4> : tensor<i32>} : () -> tensor<i32>
+  %indices1 = "tf.Const"() {value = dense<[[3, 2], [1, 0]]> : tensor<2x2xi32>} : () -> tensor<2x2xi32>
+
+  // expected-error @+1 {{inconsistent shaped data and index pairs; inferred item shapes [2] and [3] don't match}}
+  %0 = "tf.DynamicStitch"(%indices0, %indices1, %arg0, %arg1) : (tensor<i32>, tensor<2x2xi32>, tensor<2xf32>, tensor<2x2x3xf32>) -> tensor<5x2xf32>
+  return %0 : tensor<5x2xf32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %indices = "tf.Const"() {value = dense<[2, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  // expected-error @+1 {{missing index 1}}
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<2x2xf32>) -> tensor<3x2xf32> {
+  %indices = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  // expected-error @+1 {{has invalid output type; should be compatible with inferred type 'tensor<2x2xf32>'}}
+  %0 = "tf.DynamicStitch"(%indices, %arg0) : (tensor<2xi32>, tensor<2x2xf32>) -> tensor<3x2xf32>
+  return %0 : tensor<3x2xf32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<?x2xi32>, %arg1: tensor<?x3x3xf32>) -> (tensor<*xf32>) {
+  // expected-error @+1 {{requires shape of data with type 'tensor<?x3x3xf32>' to have prefix matching with shape of the corresponding index type 'tensor<?x2xi32>'}}
+  %0 = "tf.DynamicStitch"(%arg0, %arg1) : (tensor<?x2xi32>, tensor<?x3x3xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+
+// -----
+
+func @testDynamicStitch(%arg0: tensor<?x3xf32>, %arg1: tensor<2x?xf32>) -> (tensor<2x3x2xf32>) {
+  %indices0 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
+  %indices1 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
+
+  // expected-error @+1 {{has invalid output type; should be compatible with inferred type 'tensor<2x2x3xf32>'}}
+  %0 = "tf.DynamicStitch"(%indices0, %indices1, %arg0, %arg1) : (tensor<i32>, tensor<i32>, tensor<?x3xf32>, tensor<2x?xf32>) -> tensor<2x3x2xf32>
+  return %0 : tensor<2x3x2xf32>
+}
+
+// -----
+
+func @testConcatOffest(%concat_dim: tensor<i32>, %shape0: tensor<3xi32>) {
+  // expected-error @+1 {{'tf.ConcatOffset' op requires N to be at least 2, got 1}}
+  %0 = "tf.ConcatOffset"(%concat_dim, %shape0) : (tensor<i32>, tensor<3xi32>) -> tensor<3xi32>
+  return
+}
+
+// -----
+
+func @testConcatOffest(%concat_dim: tensor<i32>, %shape0: tensor<3xi32>, %shape1: tensor<3xi32>) {
+  // expected-error @+1 {{'tf.ConcatOffset' op requires sizes of shapes and offsets to be the same, got sizes 2 and 3}}
+  %0:3 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>, tensor<3xi32>)
+  return
+}
+
+// -----
+
+func @testConcatOffest(%concat_dim: tensor<1xi32>, %shape0: tensor<3xi32>, %shape1: tensor<3xi32>) {
+  // expected-error @+1 {{'tf.ConcatOffset' op requires concat_dim to be a scalar, got tensor of rank 1}}
+  %0:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<1xi32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<3xi32>)
+  return
+}
+
+// -----
+
+func @testConcatOffest(%concat_dim: tensor<i32>, %shape0: tensor<3xi32>, %shape1: tensor<3xi32>) {
+  // expected-error @+1 {{'tf.ConcatOffset' op requires operand and result 1 to have compatible shapes}}
+  %0:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3xi32>) -> (tensor<3xi32>, tensor<8xi32>)
+  return
+}
+
+// -----
+
+func @testConcatOffest(%concat_dim: tensor<i32>, %shape0: tensor<3xi32>, %shape1: tensor<3x3xi32>) {
+  // expected-error @+1 {{'tf.ConcatOffset' op requires shape tensor operand 1 to be of rank 1, got tensor of rank 2}}
+  %0:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<3x3xi32>) -> (tensor<3xi32>, tensor<3x3xi32>)
+  return
+}
+
+// -----
+
+func @testConcatOffest(%concat_dim: tensor<i32>, %shape0: tensor<3xi32>, %shape1: tensor<8xi32>) {
+  // expected-error @+1 {{'tf.ConcatOffset' op requires shape tensor (rank 1) operand 1 to be of length 3, got tensor (rank 1) of length 8}}
+  %0:2 = "tf.ConcatOffset"(%concat_dim, %shape0, %shape1) : (tensor<i32>, tensor<3xi32>, tensor<8xi32>) -> (tensor<3xi32>, tensor<8xi32>)
+  return
+}
+
+// -----
+
+func @tensor_scatter_update(%tensor: tensor<f32>, %indices: tensor<4x2xi32>, %updates: tensor<4x4xf32>) -> tensor<f32> {
+  // expected-error @+1 {{op requires tensor operand to have at least 1 dimension}}
+  %0 = "tf.TensorScatterUpdate"(%tensor, %indices, %updates) : (tensor<f32>, tensor<4x2xi32>, tensor<4x4xf32>) -> tensor<f32>
+  return %0 : tensor<f32>
+}
+
+// -----
+
+func @tensor_scatter_update(%tensor: tensor<4x4x4xf32>, %indices: tensor<i32>, %updates: tensor<4x4xf32>) -> tensor<4x4x4xf32> {
+  // expected-error @+1 {{op requires indices operand to have at least 1 dimension}}
+  %0 = "tf.TensorScatterUpdate"(%tensor, %indices, %updates) : (tensor<4x4x4xf32>, tensor<i32>, tensor<4x4xf32>) -> tensor<4x4x4xf32>
+  return %0 : tensor<4x4x4xf32>
+}
+
+// -----
+
+func @tensor_scatter_update(%tensor: tensor<4x4x4xf32>, %indices: tensor<4x2xi32>, %updates: tensor<f32>) -> tensor<4x4x4xf32> {
+  // expected-error @+1 {{op requires updates operand to have at least 1 dimension}}
+  %0 = "tf.TensorScatterUpdate"(%tensor, %indices, %updates) : (tensor<4x4x4xf32>, tensor<4x2xi32>, tensor<f32>) -> tensor<4x4x4xf32>
+  return %0 : tensor<4x4x4xf32>
+}
+
+// -----
+
+func @tensor_scatter_update(%tensor: tensor<4xf32>, %indices: tensor<4x2xi32>, %updates: tensor<4x4xf32>) -> tensor<4x4x4xf32> {
+  // expected-error @+1 {{op requires tensor operand with rank greater than or equal to the indices operand's last dimensions}}
+  %0 = "tf.TensorScatterUpdate"(%tensor, %indices, %updates) : (tensor<4xf32>, tensor<4x2xi32>, tensor<4x4xf32>) -> tensor<4x4x4xf32>
+  return %0 : tensor<4x4x4xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @testParseExampleV2DenseOnlyValid
+func @testParseExampleV2DenseOnlyValid(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %dense_keys : tensor<2x!tf.string>, %dense_default_0 : tensor<?xf32>, %dense_default_1 : tensor<?xf32>) -> (tensor<32xf32>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  %result:2 = "tf.ParseExampleV2"(%serialized, %names, %empty_str_vector, %dense_keys, %empty_str_vector, %dense_default_0, %dense_default_1) {dense_shapes = ["tfshape$", "tfshape$"], num_sparse = 0 : i64, result_segment_sizes = dense<[0, 0, 0, 2, 0, 0]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<0x!tf.string>, tensor<2x!tf.string>, tensor<0x!tf.string>, tensor<?xf32>, tensor<?xf32>) -> (tensor<32xf32>, tensor<32xf32>)
+  return %result#0 : tensor<32xf32>
+}
+
+// -----
+
+func @testParseExampleV2DenseMismatchedInputOutput(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %dense_keys : tensor<2x!tf.string>, %dense_default_0 : tensor<?xf32>, %dense_default_1 : tensor<?xf32>) -> (tensor<32xf32>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  // expected-error @+1 {{output 'dense_values' should have same length as attribute 'Tdense'}}
+  %result:3 = "tf.ParseExampleV2"(%serialized, %names, %empty_str_vector, %dense_keys, %empty_str_vector, %dense_default_0, %dense_default_1) {dense_shapes = ["tfshape$", "tfshape$"], num_sparse = 0 : i64, result_segment_sizes = dense<[0, 0, 0, 3, 0, 0]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<0x!tf.string>, tensor<2x!tf.string>, tensor<0x!tf.string>, tensor<?xf32>, tensor<?xf32>) -> (tensor<32xf32>, tensor<32xf32>, tensor<32xi64>)
+  return %result#0 : tensor<32xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @testParseExampleV2SparseOnlyValid
+func @testParseExampleV2SparseOnlyValid(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %sparse_keys : tensor<2x!tf.string>) -> (tensor<?x2xi64>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  %result:6 = "tf.ParseExampleV2"(%serialized, %names, %sparse_keys, %empty_str_vector, %empty_str_vector) {dense_shapes = [], num_sparse = 2 : i64, result_segment_sizes = dense<[2, 2, 2, 0, 0, 0]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<2x!tf.string>, tensor<0x!tf.string>, tensor<0x!tf.string>) -> (tensor<?x2xi64>, tensor<?x2xi64>, tensor<?x!tf.string>, tensor<?xi64>, tensor<2xi64>, tensor<2xi64>)
+  return %result#0 : tensor<?x2xi64>
+}
+
+// -----
+
+func @testParseExampleV2SparseInvalidNumSparse(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %sparse_keys : tensor<2x!tf.string>) -> (tensor<?x2xi64>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  // expected-error @+1 {{attribute 'num_sparse' should be the same as the length of attribute 'sparse_types'}}
+  %result:6 = "tf.ParseExampleV2"(%serialized, %names, %sparse_keys, %empty_str_vector, %empty_str_vector) {dense_shapes = [], num_sparse = 3 : i64, result_segment_sizes = dense<[2, 2, 2, 0, 0, 0]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<2x!tf.string>, tensor<0x!tf.string>, tensor<0x!tf.string>) -> (tensor<?x2xi64>, tensor<?x2xi64>, tensor<?x!tf.string>, tensor<?xi64>, tensor<2xi64>, tensor<2xi64>)
+  return %result#0 : tensor<?x2xi64>
+}
+
+// -----
+
+func @testParseExampleV2SparseInvalidSparseIndicesOutput(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %sparse_keys : tensor<2x!tf.string>) -> (tensor<?x2xi64>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  // expected-error @+1 {{output 'sparse_indices' should have same length as attribute 'sparse_types'}}
+  %result:5 = "tf.ParseExampleV2"(%serialized, %names, %sparse_keys, %empty_str_vector, %empty_str_vector) {dense_shapes = [], num_sparse = 2 : i64, result_segment_sizes = dense<[1, 2, 2, 0, 0, 0]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<2x!tf.string>, tensor<0x!tf.string>, tensor<0x!tf.string>) -> (tensor<?x2xi64>, tensor<?x!tf.string>, tensor<?xi64>, tensor<2xi64>, tensor<2xi64>)
+  return %result#0 : tensor<?x2xi64>
+}
+
+// -----
+
+func @testParseExampleV2SparseOnlyValid(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %sparse_keys : tensor<2x!tf.string>) -> (tensor<?x2xi64>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  // expected-error @+1 {{output 'sparse_shapes' should have same length as attribute 'sparse_types'}}
+  %result:5 = "tf.ParseExampleV2"(%serialized, %names, %sparse_keys, %empty_str_vector, %empty_str_vector) {dense_shapes = [], num_sparse = 2 : i64, result_segment_sizes = dense<[2, 2, 1, 0, 0, 0]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<2x!tf.string>, tensor<0x!tf.string>, tensor<0x!tf.string>) -> (tensor<?x2xi64>, tensor<?x2xi64>, tensor<?x!tf.string>, tensor<?xi64>, tensor<2xi64>)
+  return %result#0 : tensor<?x2xi64>
+}
+
+// -----
+
+// CHECK-LABEL: func @testParseExampleV2RaggedOnlyValid
+func @testParseExampleV2RaggedOnlyValid(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %ragged_keys : tensor<2x!tf.string>) -> (tensor<?xf32>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  %result:4 = "tf.ParseExampleV2"(%serialized, %names, %empty_str_vector, %empty_str_vector, %ragged_keys) {dense_shapes = [], num_sparse = 0 : i64, result_segment_sizes = dense<[0, 0, 0, 0, 2, 2]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<0x!tf.string>, tensor<0x!tf.string>, tensor<2x!tf.string>) -> (tensor<?xf32>, tensor<?x!tf.string>, tensor<?xi32>, tensor<?xi64>)
+  return %result#0 : tensor<?xf32>
+}
+
+// -----
+
+func @testParseExampleV2RaggedMismatchedOutputLengths(%serialized: tensor<32x!tf.string>, %names : tensor<32x!tf.string>, %ragged_keys : tensor<2x!tf.string>) -> (tensor<?xf32>) {
+  %empty_str_vector = "tf.Const"() {dtype = !tf.string, value = opaque<"tf", "0x746674656E736F722464747970653A2044545F535452494E472074656E736F725F7368617065207B2064696D207B207D207D"> : tensor<0x!tf.string>} : () -> tensor<0x!tf.string>
+  // expected-error @+1 {{attribute 'ragged_value_types' should have same length as attribute 'ragged_split_types'}}
+  %result:3 = "tf.ParseExampleV2"(%serialized, %names, %empty_str_vector, %empty_str_vector, %ragged_keys) {dense_shapes = [], num_sparse = 0 : i64, result_segment_sizes = dense<[0, 0, 0, 0, 2, 1]> : vector<6xi32>} : (tensor<32x!tf.string>, tensor<32x!tf.string>, tensor<0x!tf.string>, tensor<0x!tf.string>, tensor<2x!tf.string>) -> (tensor<?xf32>, tensor<?x!tf.string>, tensor<?xi32>)
+  return %result#0 : tensor<?xf32>
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_device_ops_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_device_ops_invalid.mlir
index 80fb5b98b67..8a546285f76 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tf_device_ops_invalid.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_device_ops_invalid.mlir
@@ -94,7 +94,7 @@ func @verifier_replicate_terminator() {
 // Check that a replicate with 'n' attribute that is less than 2 is invalid.
 func @verifier_replicate_n() {
   "tf_device.replicate" () ({
-// expected-error@-1 {{'tf_device.replicate' op attribute 'n' failed to satisfy constraint: 32-bit integer attribute whose minimal value is 2}}
+// expected-error@-1 {{'tf_device.replicate' op attribute 'n' failed to satisfy constraint: 32-bit integer attribute whose minimum value is 2}}
   ^entry:
     tf_device.return
   }) {n = 1 : i32} : () -> ()
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD
index 5ad0d96f79e..93ee05d478e 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD
@@ -13,18 +13,30 @@ py_library(
     ],
 )
 
+py_library(
+    name = "common_v1",
+    srcs = ["common_v1.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
 filegroup(
     name = "test_utilities",
     testonly = True,
     data = [
-        "@llvm//:FileCheck",
+        "@llvm-project//llvm:FileCheck",
     ],
 )
 
 # Drop trailing ".py" from all test file names.
 all_test_basenames = [py[:-3] for py in glob(
     ["*.py"],
-    exclude = ["common.py"],
+    exclude = [
+        "common.py",
+        "common_v1.py",
+    ],
 )]
 
 # Instantiate all the tests.
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/basic_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/basic_v1.py
new file mode 100644
index 00000000000..8fb8b4e6e2d
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/basic_v1.py
@@ -0,0 +1,64 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# RUN: %p/basic_v1 | FileCheck %s
+
+# pylint: disable=missing-docstring,line-too-long
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v1 as tf
+from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1
+
+# CHECK: "tf_saved_model.global_tensor"() {is_mutable, sym_name = "y", type = tensor<1x3xf32>, value = {{.*}} : tensor<1x3xf32>} : () -> ()
+# CHECK: func @basic([[ARG0:%.*]]: tensor<3x1xf32>,
+# CHECK-SAME: [[ARG1:%.*]]: tensor<!tf.resource<tensor<1x3xf32>>> {tf_saved_model.bound_input = @y}) -> tensor<3x3xf32>
+# CHECK-NEXT: [[R0:%.*]] = "tf.ReadVariableOp"([[ARG1]]) {{{.*}}} : (tensor<!tf.resource<tensor<1x3xf32>>>) -> tensor<1x3xf32>
+# CHECK-NEXT: [[R1:%.*]] = "tf.MatMul"([[ARG0]], [[R0]]) {{{.*}}} : (tensor<3x1xf32>, tensor<1x3xf32>) -> tensor<3x3xf32>
+# CHECK-NEXT: return [[R1]] : tensor<3x3xf32>
+
+
+def Test():
+
+  # Default TF1.x uses reference variables that are not supported by SavedModel
+  # v1 Importer. To use SavedModel V1 Importer, resource variables should be
+  # enabled.
+  tf.compat.v1.enable_resource_variables()
+
+  tf.compat.v1.disable_eager_execution()
+
+  x = tf.constant([[1.0], [1.0], [1.0]])
+  y = tf.compat.v1.get_variable(
+      name='y',
+      shape=(1, 3),
+      initializer=tf.random_normal_initializer(),
+      trainable=True)
+  r = tf.matmul(x, y)
+
+  tensor_info_x = tf.compat.v1.saved_model.utils.build_tensor_info(x)
+  tensor_info_r = tf.compat.v1.saved_model.utils.build_tensor_info(r)
+
+  return {
+      'basic':
+          (tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+              inputs={'x': tensor_info_x},
+              outputs={'r': tensor_info_r},
+              method_name=tf.saved_model.PREDICT_METHOD_NAME))
+  }
+
+
+if __name__ == '__main__':
+  common_v1.do_test(Test())
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl
index e60d393bae8..0e83900d98c 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/build_defs.bzl
@@ -11,6 +11,7 @@ def tf_saved_model_test(name, data):
         srcs = [name + ".py"],
         deps = [
             "//tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model:common",
+            "//tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model:common_v1",
         ],
     )
 
@@ -22,5 +23,5 @@ def tf_saved_model_test(name, data):
     lit_test(
         name = name + ".py",
         data = [name] + data,
-        driver = "@local_config_mlir//:run_lit.sh",
+        driver = "@llvm-project//mlir:run_lit.sh",
     )
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/common.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/common.py
index 67725236f07..fd8221cd190 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/common.py
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/common.py
@@ -23,6 +23,7 @@ from __future__ import division
 from __future__ import print_function
 
 import tempfile
+
 from absl import app
 from absl import flags
 from absl import logging
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/common_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/common_v1.py
new file mode 100644
index 00000000000..35858d2b38a
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/common_v1.py
@@ -0,0 +1,93 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Serves as a common "main" function for all the SavedModel tests.
+
+There is a fair amount of setup needed to initialize tensorflow and get it
+into a proper TF2 execution mode. This hides that boilerplate.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+from absl import app
+from absl import flags
+from absl import logging
+import tensorflow.compat.v1 as tf
+
+from tensorflow.python import pywrap_tensorflow
+
+# Use /tmp to make debugging the tests easier (see README.md)
+flags.DEFINE_string('save_model_path', '', 'Path to save the model to.')
+FLAGS = flags.FLAGS
+
+
+# This function needs to take a "create_module_fn", as opposed to just the
+# module itself, because the creation of the module has to be delayed until
+# after absl and tensorflow have run various initialization steps.
+def do_test(signature_def_map, show_debug_info=False):
+  """Runs test.
+
+  1. Performs absl and tf "main"-like initialization that must run before almost
+     anything else.
+  2. Converts signature_def_map to SavedModel V1
+  3. Converts SavedModel V1 to MLIR
+  4. Prints the textual MLIR to stdout (it is expected that the caller will have
+     FileCheck checks in its file to check this output).
+
+  This is only for use by the MLIR SavedModel importer tests.
+
+  Args:
+    signature_def_map: A map from string key to signature_def. The key will be
+      used as function name in the resulting MLIR.
+    show_debug_info: If true, shows debug locations in the resulting MLIR.
+  """
+
+  # Make LOG(ERROR) in C++ code show up on the console.
+  # All `Status` passed around in the C++ API seem to eventually go into
+  # `LOG(ERROR)`, so this makes them print out by default.
+  logging.set_stderrthreshold('error')
+
+  def app_main(argv):
+    """Function passed to absl.app.run."""
+    if len(argv) > 1:
+      raise app.UsageError('Too many command-line arguments.')
+    if FLAGS.save_model_path:
+      save_model_path = FLAGS.save_model_path
+    else:
+      save_model_path = tempfile.mktemp(suffix='.saved_model')
+
+    sess = tf.Session()
+    sess.run(tf.initializers.global_variables())
+    builder = tf.saved_model.builder.SavedModelBuilder(save_model_path)
+    builder.add_meta_graph_and_variables(
+        sess, [tf.saved_model.tag_constants.SERVING],
+        signature_def_map,
+        strip_default_attrs=True)
+    builder.save()
+
+    logging.info('Saved model to: %s', save_model_path)
+    mlir = pywrap_tensorflow.experimental_convert_saved_model_v1_to_mlir(
+        save_model_path, ','.join([tf.saved_model.tag_constants.SERVING]),
+        show_debug_info)
+    # We don't strictly need this, but it serves as a handy sanity check
+    # for that API, which is otherwise a bit annoying to test.
+    # The canonicalization shouldn't affect these tests in any way.
+    mlir = pywrap_tensorflow.experimental_run_pass_pipeline(
+        mlir, 'tf-standard-pipeline', show_debug_info)
+    print(mlir)
+
+  app.run(app_main)
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/shared_variable_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/shared_variable_v1.py
new file mode 100644
index 00000000000..6ba51c2a325
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/shared_variable_v1.py
@@ -0,0 +1,64 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# RUN: %p/shared_variable_v1 | FileCheck %s
+
+# pylint: disable=missing-docstring,line-too-long
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v1 as tf
+from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1
+
+# CHECK: "tf_saved_model.global_tensor"() {is_mutable, sym_name = "y", type = tensor<1x3xf32>, value = {{.*}} : tensor<1x3xf32>} : () -> ()
+# CHECK: func {{@.*}}([[ARG0:%.*]]: tensor<3x1xf32>,
+# CHECK-SAME: [[ARG1:%.*]]: tensor<!tf.resource<tensor<1x3xf32>>> {tf_saved_model.bound_input = @y}) -> tensor<3x3xf32>
+
+# CHECK: func {{@.*}}([[ARG2:%.*]]: tensor<3x1xf32>,
+# CHECK-SAME: [[ARG3:%.*]]: tensor<!tf.resource<tensor<1x3xf32>>> {tf_saved_model.bound_input = @y}) -> tensor<3x3xf32>
+
+
+def Test():
+
+  # Default TF1.x uses reference variables that are not supported by SavedModel
+  # v1 Importer. To use SavedModel V1 Importer, resource variables should be
+  # enabled.
+  tf.enable_resource_variables()
+
+  tf.compat.v1.disable_eager_execution()
+
+  x = tf.constant([[1.0], [1.0], [1.0]])
+  y = tf.get_variable(
+      name='y',
+      shape=(1, 3),
+      initializer=tf.random_normal_initializer(),
+      trainable=True)
+  r = tf.matmul(x, y)
+
+  tensor_info_x = tf.saved_model.utils.build_tensor_info(x)
+  tensor_info_r = tf.saved_model.utils.build_tensor_info(r)
+
+  signature_def = tf.saved_model.signature_def_utils.build_signature_def(
+      inputs={'x': tensor_info_x},
+      outputs={'r': tensor_info_r},
+      method_name=tf.saved_model.PREDICT_METHOD_NAME)
+
+  # Create two signatures that share the same variable.
+  return {'basic': signature_def, 'basic_2': signature_def}
+
+
+if __name__ == '__main__':
+  common_v1.do_test(Test())
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
index 7ef3449e3e9..75e7d2daeeb 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
@@ -17,8 +17,8 @@ limitations under the License.
 
 #include <memory>
 
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
@@ -27,6 +27,9 @@ namespace mlir {
 namespace TFTPU {
 
 void CreateTPUBridge(OpPassManager &pm) {
+  // Run shape inference so that tf_executor/tf_device ops created later will
+  // likely to inherit more concrete types.
+  pm.addPass(TF::CreateTFShapeInferencePass());
   OpPassManager &func_pm = pm.nest<FuncOp>();
   func_pm.addPass(tf_executor::CreateTFExecutorIslandCoarseningPass());
   func_pm.addPass(CreateTPUClusterFormationPass());
@@ -35,8 +38,13 @@ void CreateTPUBridge(OpPassManager &pm) {
   // because DecomposeResourceOpsPass uses pattern rewriter which hoists
   // changed constants out of tf_device.Launch.
   func_pm.addPass(TFDevice::CreateDecomposeResourceOpsPass());
-  func_pm.addPass(tf_executor::CreateTFExecutorConstantSinkingPass());
-  func_pm.addPass(TFDevice::CreateResourceOpLiftingPass());
+
+  // Run another shape inference pass because resource ecomposition might have
+  // created new partial types.
+  pm.addPass(TF::CreateTFShapeInferencePass());
+  OpPassManager &func_pm2 = pm.nest<FuncOp>();
+  func_pm2.addPass(tf_executor::CreateTFExecutorConstantSinkingPass());
+  func_pm2.addPass(TFDevice::CreateResourceOpLiftingPass());
 
   pm.addPass(TF::CreateResourceDeviceInferencePass());
   pm.addPass(TFDevice::CreateClusterOutliningPass());
@@ -56,7 +64,7 @@ tensorflow::Status TPUBridge(ModuleOp module, bool enable_logging) {
 
   // Add logger to bridge passmanager.
   if (enable_logging)
-    bridge.addInstrumentation(std::make_unique<tensorflow::BridgeLogger>());
+    bridge.enableIRPrinting(std::make_unique<tensorflow::BridgeLoggerConfig>());
 
   // Populate a passmanager with the list of passes that implement the bridge.
   CreateTPUBridge(bridge);
@@ -80,7 +88,7 @@ tensorflow::Status RunBridgeWithStandardPipeline(ModuleOp module,
 
   // Add logger to bridge passmanager.
   if (enable_logging)
-    bridge.addInstrumentation(std::make_unique<tensorflow::BridgeLogger>());
+    bridge.enableIRPrinting(std::make_unique<tensorflow::BridgeLoggerConfig>());
 
   StandardPipelineOptions pipeline_options;
   pipeline_options.enable_inliner.setValue(enable_inliner);
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.h b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.h
index ff446af24f5..34543069f5b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_BRIDGE_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_BRIDGE_H_
 
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/core/lib/core/status.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge_pass.cc
index 0208dc2f579..3af20758207 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge_pass.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/transforms/bridge.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td b/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td
index 7c38b78f239..bfe58397f22 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td
@@ -23,7 +23,7 @@ def SingleResultAndOperandHaveSameElementType : Constraint<
   CPred<"getElementTypeOrSelf($0) == getElementTypeOrSelf($1)">>;
 
 def SingleResultAndOperandHaveSameType : Constraint<
-  CPred<"$0->getType() == $1->getType()">>;
+  CPred<"$0.getType() == $1.getType()">>;
 
 def IsRank2Tensor : Type<HasAnyRankOfPred<[2]>, "Rank 2 tensor">;
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc
index 165d1b2388b..feeddf4696e 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc
@@ -20,13 +20,13 @@ limitations under the License.
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
@@ -68,11 +68,11 @@ StringRef GetDevice(Operation* op) {
 // re-ordered but forming clusters of non-continuous ops is effectively
 // re-ordering them..
 bool CanMergeIntoCluster(const Cluster& c, Operation* to_merge) {
-  return llvm::all_of(to_merge->getOperands(), [&](Value* operand) {
+  return llvm::all_of(to_merge->getOperands(), [&](Value operand) {
     // Block arguments.
-    if (isa<BlockArgument>(operand)) return true;
+    if (operand.isa<BlockArgument>()) return true;
 
-    Operation* defining_op = operand->getDefiningOp();
+    Operation* defining_op = operand.getDefiningOp();
 
     // Operand produced by other islands.
     if (defining_op->getBlock() != c.ops.front()->getBlock()) return true;
@@ -95,12 +95,12 @@ bool CanMergeIntoCluster(const Cluster& c, Operation* to_merge) {
   });
 }
 
-void ReplaceLiveOutExternalUses(llvm::ArrayRef<Value*> live_outs,
+void ReplaceLiveOutExternalUses(llvm::ArrayRef<Value> live_outs,
                                 tf_device::LaunchOp launch_op) {
   Region* launch_op_region = &launch_op.body();
   for (const auto& p : llvm::zip(live_outs, launch_op.getResults())) {
-    Value* from = std::get<0>(p);
-    for (auto& use : from->getUses()) {
+    Value from = std::get<0>(p);
+    for (auto& use : from.getUses()) {
       if (launch_op_region->isAncestor(use.getOwner()->getParentRegion()))
         continue;
       use.set(std::get<1>(p));
@@ -109,14 +109,14 @@ void ReplaceLiveOutExternalUses(llvm::ArrayRef<Value*> live_outs,
 }
 
 // Get all escaped live-out values of a region.
-void GetLiveOuts(Region* region, llvm::SmallVectorImpl<Value*>* live_outs) {
+void GetLiveOuts(Region* region, llvm::SmallVectorImpl<Value>* live_outs) {
   live_outs->clear();
 
   for (Operation& op : region->front()) {
-    for (Value* v : op.getResults()) {
+    for (Value v : op.getResults()) {
       // A value is live-out if any of its users are not inside value producer's
       // region.
-      bool is_live_out = llvm::any_of(v->getUsers(), [&](Operation* user) {
+      bool is_live_out = llvm::any_of(v.getUsers(), [&](Operation* user) {
         return !region->isAncestor(user->getParentRegion());
       });
 
@@ -145,7 +145,7 @@ void BuildLaunchForCluster(const Cluster& c, OpBuilder* builder) {
 
   // Get all escaped live-out values of region, they are used later to determine
   // return values and types of launch op.
-  llvm::SmallVector<Value*, 4> live_outs;
+  llvm::SmallVector<Value, 4> live_outs;
   GetLiveOuts(&region, &live_outs);
 
   // Build a `tf_device.return` op at end of region, with all live-out values
@@ -157,8 +157,8 @@ void BuildLaunchForCluster(const Cluster& c, OpBuilder* builder) {
 
   llvm::SmallVector<Type, 4> live_out_types;
   live_out_types.reserve(live_outs.size());
-  for (Value* v : live_outs) {
-    live_out_types.emplace_back(v->getType());
+  for (Value v : live_outs) {
+    live_out_types.emplace_back(v.getType());
   }
 
   tf_device::LaunchOp launch_op = builder->create<tf_device::LaunchOp>(
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
index 10337df1a66..1f082bd1137 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
@@ -17,15 +17,15 @@ limitations under the License.
 // `tf_device.launch` with equivalent `tf_device.launch_func` operations.
 
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Transforms/RegionUtils.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
@@ -51,12 +51,12 @@ void ReplaceLaunchReturnWithReturn(tf_device::ReturnOp launch_return_op,
 
 // Builds a function that outlines region attached to launch_op and inserts
 // built function into given module.
-FuncOp BuildFunction(StringRef device, llvm::ArrayRef<Value*> live_ins,
+FuncOp BuildFunction(StringRef device, llvm::ArrayRef<Value> live_ins,
                      tf_device::LaunchOp launch_op, SymbolTable* symbol_table,
                      OpBuilder* builder) {
   llvm::SmallVector<Type, 4> operand_types;
   operand_types.reserve(live_ins.size());
-  for (Value* v : live_ins) operand_types.emplace_back(v->getType());
+  for (Value v : live_ins) operand_types.emplace_back(v.getType());
 
   llvm::SmallVector<Type, 4> result_types(launch_op.getResultTypes());
 
@@ -101,7 +101,7 @@ FuncOp BuildFunction(StringRef device, llvm::ArrayRef<Value*> live_ins,
 // removed afterwards.`
 void OutlineLaunch(tf_device::LaunchOp launch_op, SymbolTable* symbol_table,
                    OpBuilder* builder) {
-  llvm::SetVector<Value*> live_ins;
+  llvm::SetVector<Value> live_ins;
   getUsedValuesDefinedAbove(launch_op.body(), launch_op.body(), live_ins);
 
   StringRef device =
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
index 0ef0072390d..11eafdede08 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/c/eager/c_api.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/eval_util.h"
+#include "tensorflow/core/platform/mutex.h"
 
 namespace mlir {
 namespace TF {
@@ -59,6 +60,10 @@ LogicalResult ConstantFoldFallbackHook(
     inputs.push_back(input.cast<ElementsAttr>());
   }
 
+  // Avoid overlapping folds with the same context.
+  // TODO(jpienaar): Avoid using global context & mutex here.
+  static auto* mu = new tensorflow::mutex();
+  tensorflow::mutex_lock l(*mu);
   return tensorflow::EvaluateOperation(inst, inputs, ctx, &results);
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
index ad52ac66538..3718d4bd765 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
@@ -18,9 +18,9 @@ limitations under the License.
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.cc
index f17a5cd8808..51c37b038d3 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.cc
@@ -15,10 +15,10 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h"
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h b/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h
index 566d956ac85..ae8b4eace4d 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_DECODE_CONSTANT_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_DECODE_CONSTANT_H_
 
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h
index 3a816233fdf..6697a2181ad 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_DECOMPOSE_RESOURCE_OPS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_DECOMPOSE_RESOURCE_OPS_H_
 
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td
index 3c98f30de7b..db82a71bf80 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td
@@ -25,7 +25,7 @@ def CreateTFReadVariableOp: NativeCodeCall<
     "$_builder.create<TF::ReadVariableOp>("
     "  $0.getLoc(),"
     "  UnrankedTensorType::get("
-    "    $1->getType().cast<TensorType>().getElementType()),"
+    "    $1.getType().cast<TensorType>().getElementType()),"
     "  $2)"
     >;
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops_pass.cc
index 61fc12d6ab9..8d83b5c2fa2 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops_pass.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/delete_unused_funcs.cc b/tensorflow/compiler/mlir/tensorflow/transforms/delete_unused_funcs.cc
index 50215b7163a..3b13633ed80 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/delete_unused_funcs.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/delete_unused_funcs.cc
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/dialect_hooks.cc b/tensorflow/compiler/mlir/tensorflow/transforms/dialect_hooks.cc
index af6476615bb..05b0fb20b62 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/dialect_hooks.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/dialect_hooks.cc
@@ -16,13 +16,13 @@ limitations under the License.
 #include <string>
 
 #include "llvm/ADT/ArrayRef.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/DialectHooks.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/DialectHooks.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc b/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc
index 918e6ac3078..837944ce0e7 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc
@@ -27,12 +27,12 @@ limitations under the License.
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/core/platform/logging.h"
@@ -49,11 +49,11 @@ enum IslandType { kParentIsland, kChildIsland };
 // IslandResult is a helper struct holding an islands result and associated
 // inner op result.
 struct IslandResult {
-  IslandResult(Value* inner_op_result, Value* island_result)
+  IslandResult(Value inner_op_result, Value island_result)
       : inner_op_result(inner_op_result), island_result(island_result) {}
 
-  Value* inner_op_result;
-  Value* island_result;
+  Value inner_op_result;
+  Value island_result;
 };
 
 struct ExecutorIslandCoarsening
@@ -70,16 +70,16 @@ llvm::Optional<IslandOp> GetOperandCandidateToMergeWith(IslandOp island) {
   Operation* candidate = nullptr;
 
   // Check island control operands.
-  for (Value* input : island.controlInputs()) {
-    Operation* def = input->getDefiningOp();
+  for (Value input : island.controlInputs()) {
+    Operation* def = input.getDefiningOp();
     DCHECK_EQ(def->getParentOp(), graph_op);
     if (!candidate || candidate->isBeforeInBlock(def)) candidate = def;
   }
 
   // Check island data operands.
   island.walk([graph_op, &candidate](Operation* op) {
-    for (Value* input : op->getOperands()) {
-      Operation* def = input->getDefiningOp();
+    for (Value input : op->getOperands()) {
+      Operation* def = input.getDefiningOp();
       if (!def || def->getParentOp() != graph_op) continue;
       if (!candidate || candidate->isBeforeInBlock(def)) candidate = def;
     }
@@ -99,15 +99,15 @@ llvm::Optional<IslandOp> GetResultCandidateToMergeWith(IslandOp island) {
   Operation* candidate = nullptr;
 
   // Check island control results.
-  for (Operation* user : island.control()->getUsers()) {
+  for (Operation* user : island.control().getUsers()) {
     DCHECK_EQ(user->getParentOp(), graph_op);
     if (!candidate || user->isBeforeInBlock(candidate)) candidate = user;
   }
 
   // Check island data results.
   Block& graph_body = llvm::cast<GraphOp>(graph_op).GetBody();
-  for (Value* result : island.outputs()) {
-    for (Operation* user : result->getUsers()) {
+  for (Value result : island.outputs()) {
+    for (Operation* user : result.getUsers()) {
       Operation* def = graph_body.findAncestorOpInBlock(*user);
       DCHECK_NE(def, nullptr);
       if (!candidate || def->isBeforeInBlock(candidate)) candidate = def;
@@ -121,9 +121,9 @@ llvm::Optional<IslandOp> GetResultCandidateToMergeWith(IslandOp island) {
 
 // Collects the operands for the new island by collecting all control inputs of
 // the islands being merged.
-llvm::SmallSetVector<Value*, 8> GetNewIslandOperands(IslandOp parent,
-                                                     IslandOp child) {
-  llvm::SmallSetVector<Value*, 8> operands;
+llvm::SmallSetVector<Value, 8> GetNewIslandOperands(IslandOp parent,
+                                                    IslandOp child) {
+  llvm::SmallSetVector<Value, 8> operands;
   operands.insert(parent.getOperands().begin(), parent.getOperands().end());
   operands.insert(child.getOperands().begin(), child.getOperands().end());
   operands.remove(parent.control());
@@ -145,9 +145,9 @@ llvm::SmallVector<IslandResult, 8> GetNewIslandResultsAndForwardResults(
   for (auto ret_vals :
        llvm::zip(parent.GetYield().getOperands(), parent.outputs())) {
     bool result_captured = false;
-    Value* inner_op_result = std::get<0>(ret_vals);
-    Value* island_result = std::get<1>(ret_vals);
-    for (auto& use : llvm::make_early_inc_range(island_result->getUses())) {
+    Value inner_op_result = std::get<0>(ret_vals);
+    Value island_result = std::get<1>(ret_vals);
+    for (auto& use : llvm::make_early_inc_range(island_result.getUses())) {
       if (child_body.findAncestorOpInBlock(*use.getOwner())) {
         // Forward result from inner op.
         use.set(inner_op_result);
@@ -160,9 +160,9 @@ llvm::SmallVector<IslandResult, 8> GetNewIslandResultsAndForwardResults(
 
   for (auto ret_vals :
        llvm::zip(child.GetYield().getOperands(), child.outputs())) {
-    Value* inner_op_result = std::get<0>(ret_vals);
-    Value* island_result = std::get<1>(ret_vals);
-    if (!island_result->use_empty()) {
+    Value inner_op_result = std::get<0>(ret_vals);
+    Value island_result = std::get<1>(ret_vals);
+    if (!island_result.use_empty()) {
       results.emplace_back(inner_op_result, island_result);
     }
   }
@@ -173,12 +173,12 @@ llvm::SmallVector<IslandResult, 8> GetNewIslandResultsAndForwardResults(
 // Creates the new merged island.
 IslandOp CreateNewIsland(IslandOp parent, IslandOp child,
                          IslandType insert_position,
-                         llvm::ArrayRef<Value*> operands,
+                         llvm::ArrayRef<Value> operands,
                          llvm::ArrayRef<IslandResult> results) {
   // Collect types from results.
   llvm::SmallVector<Type, 8> result_types;
   for (const auto& result : results)
-    result_types.push_back(result.inner_op_result->getType());
+    result_types.push_back(result.inner_op_result.getType());
 
   // IslandOps always have a control result.
   result_types.push_back(ControlType::get(parent.getContext()));
@@ -194,14 +194,14 @@ IslandOp CreateNewIsland(IslandOp parent, IslandOp child,
 // Creates respective YieldOp for the new merged island.
 YieldOp CreateNewIslandYieldOp(IslandOp new_island,
                                llvm::ArrayRef<IslandResult> results) {
-  llvm::SmallVector<Value*, 8> yield_operands;
+  llvm::SmallVector<Value, 8> yield_operands;
   yield_operands.reserve(results.size());
 
   for (auto ret_vals : llvm::zip(results, new_island.outputs())) {
     const auto& old_result = std::get<0>(ret_vals);
 
     // Replace original island result with new island result.
-    old_result.island_result->replaceAllUsesWith(std::get<1>(ret_vals));
+    old_result.island_result.replaceAllUsesWith(std::get<1>(ret_vals));
 
     // Add associated inner op result to operands of the YieldOp.
     yield_operands.push_back(old_result.inner_op_result);
@@ -232,8 +232,7 @@ void MoveInnerOpsToNewIsland(IslandOp parent, IslandOp child,
 // Merges two islands and places new merged island before parent or child.
 void MergeIslands(IslandOp parent, IslandOp child, IslandType insert_position) {
   // Collect operands for the new merged island.
-  llvm::SmallSetVector<Value*, 8> operands =
-      GetNewIslandOperands(parent, child);
+  llvm::SmallSetVector<Value, 8> operands = GetNewIslandOperands(parent, child);
 
   // Collect results for the new merged island.
   llvm::SmallVector<IslandResult, 8> results =
@@ -250,8 +249,8 @@ void MergeIslands(IslandOp parent, IslandOp child, IslandType insert_position) {
   MoveInnerOpsToNewIsland(parent, child, new_yield_op.getOperation());
 
   // Update control inputs to point to the new merged island.
-  child.control()->replaceAllUsesWith(new_island.control());
-  parent.control()->replaceAllUsesWith(new_island.control());
+  child.control().replaceAllUsesWith(new_island.control());
+  parent.control().replaceAllUsesWith(new_island.control());
 
   // Remove merged islands.
   child.erase();
@@ -288,15 +287,15 @@ bool MergeIslandWithResult(IslandOp parent) {
 // This allows our def-use based island coarsening algorithm to merge
 // islands that independently feed into a fetch.
 void InsertDummyIslandForFetch(FetchOp fetch) {
-  llvm::SmallVector<Value*, 4> data_fetches;
+  llvm::SmallVector<Value, 4> data_fetches;
   llvm::SmallVector<Type, 4> data_types;
-  llvm::SmallVector<Value*, 4> control_fetches;
+  llvm::SmallVector<Value, 4> control_fetches;
   for (auto value : fetch.fetches()) {
-    if (value->getType().isa<ControlType>()) {
+    if (value.getType().isa<ControlType>()) {
       control_fetches.push_back(value);
     } else {
       data_fetches.push_back(value);
-      data_types.push_back(value->getType());
+      data_types.push_back(value.getType());
     }
   }
   auto island = OpBuilder(fetch).create<IslandOp>(
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc b/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc
index 52b425c4ee6..44309a5e019 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc
@@ -30,24 +30,24 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Analysis/LoopAnalysis.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/IR/Visitors.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Analysis/LoopAnalysis.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/IR/Visitors.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -65,13 +65,13 @@ class SwitchFoldPass : public mlir::FunctionPass<SwitchFoldPass> {
 }  // namespace
 
 // Returns the defining op for a value looking through islands.
-static Operation* GetDefiningOp(Value* val) {
-  Operation* op = val->getDefiningOp();
+static Operation* GetDefiningOp(Value val) {
+  Operation* op = val.getDefiningOp();
   auto island_op = dyn_cast<tf_executor::IslandOp>(op);
   if (!island_op) return op;
   auto yield_op = island_op.GetYield();
-  auto index = cast<mlir::OpResult>(val)->getResultNumber();
-  return yield_op.getOperand(index)->getDefiningOp();
+  auto index = val.cast<mlir::OpResult>().getResultNumber();
+  return yield_op.getOperand(index).getDefiningOp();
 }
 
 // Returns either the value or input to an IdentityOp.
@@ -81,7 +81,7 @@ static Operation* GetDefiningOp(Value* val) {
 // identity nodes are common so handle them specially when considering
 // predicate in a minimally invasive way until identity's are handled more
 // generally.
-static Value* LookThroughIdentityOp(Value* pred_val) {
+static Value LookThroughIdentityOp(Value pred_val) {
   if (!pred_val) return pred_val;
   auto op = GetDefiningOp(pred_val);
   if (auto id_op = dyn_cast<TF::IdentityOp>(op)) pred_val = id_op.input();
@@ -114,7 +114,7 @@ class DeadQueue {
       // feeding into the Merge then we could have a null value here.
       count = 0;
       for (auto operand : op->getOperands()) {
-        if (operand && !operand->getType().isa<tf_executor::ControlType>())
+        if (operand && !operand.getType().isa<tf_executor::ControlType>())
           ++count;
       }
     }
@@ -124,9 +124,9 @@ class DeadQueue {
   }
 
   // Enqueue users of a value.
-  void EnqueueUsers(Value* val) {
-    for (auto user : val->getUsers()) {
-      Enqueue(user, val->getType().isa<tf_executor::ControlType>());
+  void EnqueueUsers(Value val) {
+    for (auto user : val.getUsers()) {
+      Enqueue(user, val.getType().isa<tf_executor::ControlType>());
     }
   }
 
@@ -175,7 +175,7 @@ class DeadQueue {
 // Enqueues values of foldable switch ops.
 static void MatchSwitchFoldOps(tf_executor::SwitchOp switch_op,
                                DeadQueue* queue) {
-  Value* pred_val = LookThroughIdentityOp(switch_op.predicate());
+  Value pred_val = LookThroughIdentityOp(switch_op.predicate());
 
   // If predicate or input is null then enqueue entire op for deletion.
   if (pred_val == nullptr || switch_op.data() == nullptr) {
@@ -187,9 +187,9 @@ static void MatchSwitchFoldOps(tf_executor::SwitchOp switch_op,
   if (!matchPattern(pred_val, m_Constant(&pred))) return;
 
   bool taken = pred.getSplatValue<bool>();
-  Value* dead = taken ? switch_op.falseOutput() : switch_op.trueOutput();
-  Value* live = !taken ? switch_op.falseOutput() : switch_op.trueOutput();
-  live->replaceAllUsesWith(switch_op.data());
+  Value dead = taken ? switch_op.falseOutput() : switch_op.trueOutput();
+  Value live = !taken ? switch_op.falseOutput() : switch_op.trueOutput();
+  live.replaceAllUsesWith(switch_op.data());
   queue->EnqueueUsers(dead);
 
   // Delete switch op.
@@ -210,15 +210,15 @@ static LogicalResult FoldMergeNodes(FuncOp function, const DeadQueue& queue) {
 
   for (auto it : queue.merge_nodes()) {
     // Find the valid input to merge node.
-    Value* val = nullptr;
+    Value val = nullptr;
     int index = -1;
     auto* merge = it.first;
     auto merge_op = cast<tf_executor::MergeOp>(merge);
     for (auto e : llvm::enumerate(merge->getOperands())) {
-      Value* operand = e.value();
+      Value operand = e.value();
       if (!operand) continue;
       // Skip control operands.
-      if (operand->getType().isa<tf_executor::ControlType>()) break;
+      if (operand.getType().isa<tf_executor::ControlType>()) break;
       if (val != nullptr) {
         return merge->emitOpError("multiple valid inputs post switch folding");
       }
@@ -226,26 +226,26 @@ static LogicalResult FoldMergeNodes(FuncOp function, const DeadQueue& queue) {
       index = e.index();
     }
     assert(val != nullptr && "merge node should have been deleted");
-    merge_op.output()->replaceAllUsesWith(val);
+    merge_op.output().replaceAllUsesWith(val);
 
     // Build and insert value_index only if needed.
-    if (!merge_op.value_index()->use_empty()) {
-      merge_op.value_index()->replaceAllUsesWith(
+    if (!merge_op.value_index().use_empty()) {
+      merge_op.value_index().replaceAllUsesWith(
           build_index(merge->getLoc(), index));
     }
 
     // Propagate control dependencies if used.
-    if (!merge_op.control()->use_empty()) {
+    if (!merge_op.control().use_empty()) {
       // Change control dependencies from the merge to being on the parent of
       // the value being propagated.
-      auto def_op = val->getDefiningOp();
+      auto def_op = val.getDefiningOp();
 #ifndef NDEBUG
       auto exec_dialect =
           function.getContext()->getRegisteredDialect("tf_executor");
       assert(def_op->getDialect() == exec_dialect &&
              "unable to forward control dependencies");
 #endif
-      merge_op.control()->replaceAllUsesWith(
+      merge_op.control().replaceAllUsesWith(
           def_op->getResult(def_op->getNumResults() - 1));
     }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc
index e9b3879c025..6e713570f75 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc
@@ -16,14 +16,14 @@ limitations under the License.
 // This transformation pass transforms functional control flow operations in the
 // standard TensorFlow dialect to MLIR Control Flow Graph (CFG) form.
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
@@ -48,12 +48,12 @@ struct FunctionalControlFlowToCFG
 //   non-empty means True and empty means False. If the tensor is not a scalar,
 //   being empty means False and being non-empty means True.
 //
-static Value* LowerCondition(Location loc, Value* value, OpBuilder* builder) {
+static Value LowerCondition(Location loc, Value value, OpBuilder* builder) {
   // TODO: Right now we just handle zero-D tensors of boolean values.
   // FIXME: This is almost all wrong, but is a placeholder to unblock the one
   // testcases, later patches will build on this once I build the right infra to
   // support it.
-  TensorType type = value->getType().cast<TensorType>();
+  TensorType type = value.getType().cast<TensorType>();
   if (!type.hasRank() || type.getRank() != 0 ||
       !type.getElementType().isInteger(1)) {
     return emitError(loc, "only supports zero-D bool tensors now"), nullptr;
@@ -70,17 +70,16 @@ static Value* LowerCondition(Location loc, Value* value, OpBuilder* builder) {
 // Requires the function to provide arguments for each of the `fn` operands
 // that is compatible for tensor cast.
 //
-static Operation* CallFn(Location loc,
-                         const std::function<Value*(int)>& get_arg, FuncOp fn,
-                         OpBuilder* builder) {
+static Operation* CallFn(Location loc, const std::function<Value(int)>& get_arg,
+                         FuncOp fn, OpBuilder* builder) {
   FunctionType fn_type = fn.getType();
-  llvm::SmallVector<Value*, 4> operands;
+  llvm::SmallVector<Value, 4> operands;
   int num_operands = fn_type.getNumInputs();
   operands.reserve(num_operands);
   for (int i = 0; i < num_operands; ++i) {
-    Value* val = get_arg(i);
+    Value val = get_arg(i);
     Type expected = fn_type.getInput(i);
-    if (val->getType() != expected) {
+    if (val.getType() != expected) {
       val =
           builder->create<TF::CastOp>(loc, expected, val,
                                       /*Truncate=*/builder->getBoolAttr(false));
@@ -95,16 +94,16 @@ static Operation* CallFn(Location loc,
 //
 // Requires the function to provide values for each of the block arguments and
 // they should be pair-wise compatible for tensor cast.
-static llvm::SmallVector<Value*, 4> PrepareValsForJump(
-    Location loc, const std::function<Value*(int)>& get_val, Block* block,
+static llvm::SmallVector<Value, 4> PrepareValsForJump(
+    Location loc, const std::function<Value(int)>& get_val, Block* block,
     OpBuilder* builder) {
-  llvm::SmallVector<Value*, 4> result;
+  llvm::SmallVector<Value, 4> result;
   int num_vals = block->getNumArguments();
   result.reserve(num_vals);
   for (int i = 0; i < num_vals; ++i) {
-    Value* val = get_val(i);
-    Type expected = block->getArgument(i)->getType();
-    if (val->getType() != expected) {
+    Value val = get_val(i);
+    Type expected = block->getArgument(i).getType();
+    if (val.getType() != expected) {
       val =
           builder->create<TF::CastOp>(loc, expected, val,
                                       /*Truncate=*/builder->getBoolAttr(false));
@@ -119,7 +118,7 @@ static llvm::SmallVector<Value*, 4> PrepareValsForJump(
 //
 // Requires the function to provide values for each of the block arguments and
 // they should be pair-wise compatible for tensor cast.
-static void JumpToBlock(Location loc, const std::function<Value*(int)>& get_arg,
+static void JumpToBlock(Location loc, const std::function<Value(int)>& get_arg,
                         Block* block, OpBuilder* builder) {
   auto operands = PrepareValsForJump(loc, get_arg, block, builder);
   builder->create<BranchOp>(loc, block, operands);
@@ -136,14 +135,14 @@ static void ReplaceOpResultWithBlockArgs(Location loc, Operation* op,
                                          Block* block, OpBuilder* builder) {
   assert(op->getNumResults() == block->getNumArguments());
   for (unsigned i = 0, e = op->getNumResults(); i != e; ++i) {
-    Value* arg = block->getArgument(i);
-    Value* result = op->getResult(i);
-    if (arg->getType() != result->getType()) {
+    Value arg = block->getArgument(i);
+    Value result = op->getResult(i);
+    if (arg.getType() != result.getType()) {
       arg =
-          builder->create<TF::CastOp>(loc, result->getType(), arg,
+          builder->create<TF::CastOp>(loc, result.getType(), arg,
                                       /*Truncate=*/builder->getBoolAttr(false));
     }
-    result->replaceAllUsesWith(arg);
+    result.replaceAllUsesWith(arg);
   }
 }
 
@@ -160,7 +159,7 @@ static LogicalResult LowerIfOp(IfOp op) {
   OpBuilder builder(op_inst);
 
   // Lower the condition to a boolean value (i1).
-  Value* cond_i1 = LowerCondition(loc, op.cond(), &builder);
+  Value cond_i1 = LowerCondition(loc, op.cond(), &builder);
   if (!cond_i1) return failure();
 
   auto module = op_inst->getParentOfType<ModuleOp>();
@@ -174,8 +173,8 @@ static LogicalResult LowerIfOp(IfOp op) {
 
   // Add the block arguments to the merge point, and replace all uses of the
   // original operation results with them.
-  for (Value* value : op_inst->getResults())
-    merge_block->addArgument(value->getType());
+  for (Value value : op_inst->getResults())
+    merge_block->addArgument(value.getType());
   ReplaceOpResultWithBlockArgs(loc, op_inst, merge_block, &builder);
 
   // Get arguments to the branches after dropping the condition which is the
@@ -200,8 +199,8 @@ static LogicalResult LowerIfOp(IfOp op) {
   // orig_block with a conditional branch.
   builder.setInsertionPointToEnd(orig_block);
   builder.create<CondBranchOp>(loc, cond_i1, then_block,
-                               llvm::ArrayRef<Value*>(), else_block,
-                               llvm::ArrayRef<Value*>());
+                               llvm::ArrayRef<Value>(), else_block,
+                               llvm::ArrayRef<Value>());
 
   // Finally, delete the op in question.
   op_inst->erase();
@@ -277,7 +276,7 @@ static LogicalResult LowerWhileOp(WhileOp op) {
   Operation* cond_call_op = CallFn(loc, get_cond_arg, cond_fn, &builder);
 
   assert(cond_call_op->getNumResults() == 1);
-  Value* condition = LowerCondition(loc, cond_call_op->getResult(0), &builder);
+  Value condition = LowerCondition(loc, cond_call_op->getResult(0), &builder);
   auto br_operands =
       PrepareValsForJump(loc, get_cond_arg, body_block, &builder);
   builder.create<CondBranchOp>(loc, condition, body_block, br_operands,
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc b/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc
index 23cdebc4323..c7dac93101b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc
@@ -18,10 +18,10 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 
@@ -38,8 +38,8 @@ void PruneGraph(GraphOp graph) {
 
   // Visit an op's operands if it is output of an Operation in same graph.
   auto visit_op = [&](Operation* op) {
-    for (Value* operand : op->getOperands()) {
-      Operation* def = operand->getDefiningOp();
+    for (Value operand : op->getOperands()) {
+      Operation* def = operand.getDefiningOp();
       if (def && def->getParentOp() == graph &&
           reachable_ops.insert(def).second) {
         // Op has not been visited, add to queue to visit later.
@@ -86,36 +86,17 @@ namespace {
 // This transformation pass prunes a TF graph eliminating dead-nodes.
 struct GraphPruning : public FunctionPass<GraphPruning> {
   void runOnFunction() override {
-    FuncOp func = getFunction();
-    if (func.getName() == "main" && skip_main_func) return;
-    func.walk([](tf_executor::GraphOp graph) { PruneGraph(graph); });
+    getFunction().walk([](tf_executor::GraphOp graph) { PruneGraph(graph); });
   }
-
-  struct Options : public PassOptions<Options> {
-    Option<bool> skip_main_func{
-        *this, "skip-main-func",
-        llvm::cl::desc("skip graph pruning for main function"),
-        llvm::cl::init(false)};
-  };
-
-  explicit GraphPruning(bool skip_main_func)
-      : FunctionPass<GraphPruning>(), skip_main_func(skip_main_func) {}
-
-  explicit GraphPruning(const Options& option)
-      : GraphPruning(option.skip_main_func) {}
-
- private:
-  bool skip_main_func;
 };
 
 }  // namespace
 
-std::unique_ptr<OpPassBase<FuncOp>> CreateTFExecutorGraphPruningPass(
-    bool skip_main_func) {
-  return std::make_unique<GraphPruning>(skip_main_func);
+std::unique_ptr<OpPassBase<FuncOp>> CreateTFExecutorGraphPruningPass() {
+  return std::make_unique<GraphPruning>();
 }
 
-static PassRegistration<GraphPruning, GraphPruning::Options> pass(
+static PassRegistration<GraphPruning> pass(
     "tf-executor-graph-pruning",
     "Prune unreachable nodes in a TensorFlow Graph.");
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/inline_global_tensors.cc b/tensorflow/compiler/mlir/tensorflow/transforms/inline_global_tensors.cc
index c994ccf498b..6d780d08d6b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/inline_global_tensors.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/inline_global_tensors.cc
@@ -24,10 +24,10 @@ limitations under the License.
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h"
 
@@ -55,7 +55,7 @@ void InlineGlobalTensorsPass::runOnModule() {
       // Replace the arg with a tf.Const op in the function body.
       auto const_op = builder.create<TF::ConstOp>(global_tensor.getLoc(),
                                                   global_tensor.value());
-      func.getArgument(i)->replaceAllUsesWith(const_op.getResult());
+      func.getArgument(i).replaceAllUsesWith(const_op.getResult());
       args_to_erase.push_back(i);
     }
     func.eraseArguments(args_to_erase);
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
index e06831ceb21..e9434ab4d5d 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
@@ -17,12 +17,14 @@ limitations under the License.
 
 #include <numeric>
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/core/util/tensor_format.h"
@@ -67,6 +69,14 @@ static DenseElementsAttr GetScalarOfType(Type ty, int64_t raw_value) {
   return DenseElementsAttr::get(scalar_ty, attr);
 }
 
+// Returns float DenseElementsAttr with scalar shape with the specified value.
+static DenseElementsAttr GetScalarOfFloatType(Type ty, double raw_value) {
+  auto float_ty = ty.cast<FloatType>();
+  FloatAttr attr = FloatAttr::get(float_ty, raw_value);
+  RankedTensorType scalar_ty = RankedTensorType::get({}, ty);
+  return DenseElementsAttr::get(scalar_ty, attr);
+}
+
 // Returns reduction indices to use while lowering tf.BiasAddGrad op to tf.Sum
 // op.
 DenseIntElementsAttr GetBiasAddGradReductionIndices(int64_t rank,
@@ -124,8 +134,8 @@ class LowerAddNOp : public OpRewritePattern<TF::AddNOp> {
 
     // TODO(hinsu): Improve parallelism by splitting operands in two halves and
     // accumulating them first.
-    Value *result = *op.inputs().begin();
-    for (Value *operand : llvm::drop_begin(op.inputs(), 1)) {
+    Value result = *op.inputs().begin();
+    for (Value operand : llvm::drop_begin(op.inputs(), 1)) {
       result = rewriter.create<TF::AddV2Op>(op.getLoc(), result, operand);
     }
 
@@ -134,6 +144,101 @@ class LowerAddNOp : public OpRewritePattern<TF::AddNOp> {
   }
 };
 
+// Lowers DynamicStitch op with constant indices and with static input and
+// output shapes using Reshape, UnPack and ConcatV2 op.
+//
+//   %indices0 = "tf.Const"() {value = dense<4> : tensor<i32>}
+//   %indices1 = "tf.Const"() {value = dense<[[3, 2], [1, 0]]> :
+//   tensor<2x2xi32>} %0 = "tf.DynamicStitch"(%indices0, %indices1, %arg0,
+//   %arg1)
+//     : (tensor<i32>, tensor<2x2xi32>, tensor<2xf32>, tensor<2x2x2xf32>)
+//     -> tensor<5x2xf32>
+//
+// is lowered to
+//
+//   %shape = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>}
+//   %inp0 = "tf.Reshape"(%arg0, %shape)
+//     : (tensor<2xf32>, tensor<2xi64>) -> tensor<1x2xf32>
+//   %inp1 = "tf.Reshape"(%arg1, %shape)
+//     : (tensor<2x2x2xf32>, tensor<2xi64>) -> tensor<4x2xf32>
+//   %items0 = "tf.Unpack"(%[[INP0]]) {axis = 0 : i64}
+//     : (tensor<1x2xf32>) -> tensor<2xf32>
+//   %items1:4 = "tf.Unpack"(%[[INP1]]) {axis = 0 : i64}
+//     : (tensor<4x2xf32>) -> (tensor<2xf32>, tensor<2xf32>, tensor<2xf32>,
+//     tensor<2xf32>)
+//   %axis = "tf.Const"() {value = dense<0> : tensor<i64>}
+//   %0 = "tf.ConcatV2"(items1#3, items1#2, items1#1, items1#0, %items0, %axis)
+//     : (tensor<2xf32>, tensor<2xf32>, tensor<2xf32>, tensor<2xf32>,
+//        tensor<2xf32>, tensor<i64>) -> tensor<5x2xf32>
+//
+class LowerDynamicStitchOp : public OpRewritePattern<TF::DynamicStitchOp> {
+ public:
+  explicit LowerDynamicStitchOp(MLIRContext *context)
+      : OpRewritePattern<TF::DynamicStitchOp>(context) {}
+
+  PatternMatchResult matchAndRewrite(DynamicStitchOp op,
+                                     PatternRewriter &rewriter) const override {
+    // Static output type is used to compute intermediate values. Note that the
+    // output type doesn't have to be static but if input types and indices are
+    // constant, then the output type can be statically determined.
+    RankedTensorType out_ty = op.getType().dyn_cast<RankedTensorType>();
+    if (!out_ty || !out_ty.hasStaticShape()) return matchFailure();
+
+    // Extract out all the constant indices' attributes and verify that data
+    // types are static.
+    SmallVector<DenseIntElementsAttr, 4> indices;
+    indices.reserve(op.N());
+    for (auto it : llvm::zip(op.indices(), op.data())) {
+      Value index = std::get<0>(it);
+      Value data = std::get<1>(it);
+
+      DenseIntElementsAttr index_attr;
+      if (!matchPattern(index, m_Constant(&index_attr))) return matchFailure();
+      indices.push_back(index_attr);
+
+      RankedTensorType data_ty = data.getType().dyn_cast<RankedTensorType>();
+      if (!data_ty || !data_ty.hasStaticShape()) return matchFailure();
+    }
+
+    // Compute type of each of the items and shape to use while reshaping inputs
+    // so that they can be unpacked to extract out individual items.
+    ArrayRef<int64_t> item_shape = out_ty.getShape().drop_front(1);
+    auto item_ty = RankedTensorType::get(item_shape, out_ty.getElementType());
+
+    SmallVector<int64_t, 4> packed_shape;
+    packed_shape.push_back(-1);
+    packed_shape.append(item_shape.begin(), item_shape.end());
+    Location loc = op.getLoc();
+    auto packed_shape_val = rewriter.create<ConstOp>(
+        loc, GetI64ElementsAttr(packed_shape, &rewriter));
+
+    // Prepare each of the output item by unpacking data and then putting it to
+    // the specified index.
+    SmallVector<Value, 8> values(out_ty.getDimSize(0));
+    for (auto it : llvm::zip(indices, op.data())) {
+      DenseIntElementsAttr index_attr = std::get<0>(it);
+      Value data = std::get<1>(it);
+
+      auto reshaped_data =
+          rewriter.create<ReshapeOp>(loc, data, packed_shape_val);
+      auto num_items =
+          reshaped_data.getType().cast<RankedTensorType>().getShape()[0];
+      auto items = rewriter.create<UnpackOp>(
+          loc, SmallVector<Type, 4>(num_items, item_ty), reshaped_data,
+          /*axis=*/APInt(64, 0));
+      for (auto index_item : llvm::zip(index_attr, items.getResults())) {
+        int64_t output_index = std::get<0>(index_item).getSExtValue();
+        Value item = std::get<1>(index_item);
+        values[output_index] = item;
+      }
+    }
+
+    auto axis = rewriter.create<ConstOp>(loc, rewriter.getI64IntegerAttr(0));
+    rewriter.replaceOpWithNewOp<ConcatV2Op>(op, op.getType(), values, axis);
+    return matchSuccess();
+  }
+};
+
 // Lowers Pack op to ConcatV2 op after changing shape of the inputs with
 // ExpandDims op.
 //
@@ -159,13 +264,13 @@ class LowerPackOp : public OpRewritePattern<TF::PackOp> {
     int64_t axis = op.axis().getSExtValue();
 
     Type prev_input_ty, inferred_ty;
-    SmallVector<Value *, 4> expanded_inputs;
+    SmallVector<Value, 4> expanded_inputs;
     expanded_inputs.reserve(op.N());
-    for (Value *input : op.values()) {
+    for (Value input : op.values()) {
       // If input type is different than the previous input type, infer the
       // output type. Otherwise, use the already inferred output type from the
       // previous iteration.
-      Type input_ty = input->getType();
+      Type input_ty = input.getType();
       if (input_ty != prev_input_ty) {
         inferred_ty = InferExpandDimsType(input_ty, axis, &rewriter);
         prev_input_ty = input_ty;
@@ -184,8 +289,7 @@ class LowerPackOp : public OpRewritePattern<TF::PackOp> {
 
 void PopulateLoweringTFPatterns(MLIRContext *context,
                                 OwningRewritePatternList *patterns) {
-  patterns->insert<LowerAddNOp>(context);
-  patterns->insert<LowerPackOp>(context);
+  patterns->insert<LowerAddNOp, LowerDynamicStitchOp, LowerPackOp>(context);
   populateWithGenerated(context, patterns);
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h
index 4b85ac3b46a..b72b0f25938 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_LOWER_TF_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_LOWER_TF_H_
 
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td
index 069bc07f4a1..ec0ac5e3c1e 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td
@@ -21,13 +21,23 @@ include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td"
 class GetScalarOfType<int value> : NativeCodeCall<
   "GetScalarOfType(getElementTypeOrSelf($0)," # value # ")">;
 
+class GetScalarOfFloatType<string value> : NativeCodeCall<
+  "GetScalarOfFloatType(getElementTypeOrSelf($0)," # value # ")">;
+
+def GetScalarNanOfType : NativeCodeCall<
+  "GetScalarOfFloatType(getElementTypeOrSelf($0), "
+  "std::numeric_limits<double>::quiet_NaN())">;
+
+class GetI64ScalarElementsAttr<int value> :
+  NativeCodeCall<"GetI64ElementsAttr({" # value # "}, &$_builder)">;
+
 //===----------------------------------------------------------------------===//
 // BiasAddGrad op patterns.
 //===----------------------------------------------------------------------===//
 
 def GetBiasAddGradReductionIndices : NativeCodeCall<
   "GetBiasAddGradReductionIndices("
-  "$0->getType().cast<RankedTensorType>().getRank(), $1, &$_builder)">;
+  "$0.getType().cast<RankedTensorType>().getRank(), $1, &$_builder)">;
 
 def LowerBiasAddGradOp :
   Pat<(TF_BiasAddGradOp AnyRankedTensor:$out_backprop, $data_format),
@@ -56,19 +66,57 @@ def LowerBiasAddGradOp :
 // TODO(hinsu): Support scalar inputs by introducing reshape to 1D.
 def NonScalarType : Type<Neg<HasAnyRankOfPred<[0]>>, "Non scalar type">;
 
-def GetLastDimReductionAxis :
-  NativeCodeCall<"GetI64ElementsAttr({-1}, &$_builder)">;
-
 def LowerSoftmaxCrossEntropyWithLogitsOp : Pattern<
   (TF_SoftmaxCrossEntropyWithLogitsOp AnyRankedTensor:$features,
                                       AnyRankedTensor:$labels),
   [(TF_SumOp (TF_MulOp:$sum_input (TF_NegOp $labels),
                                   (TF_LogSoftmaxOp $features)),
-             (TF_ConstOp (GetLastDimReductionAxis)),
+             (TF_ConstOp (GetI64ScalarElementsAttr<-1>)),
              /*keep_dims=*/ConstBoolAttrFalse),
    (TF_SubOp (TF_SoftmaxOp $features), $labels)],
   [(NonScalarType $features), (NonScalarType $labels)]>;
 
+// Returns size of the specified dimension as scalar elements attribute of type
+// $1.
+// Requires $0 to be of RankedTensorType with rank greater than `dim` and the
+// dimension should be known.
+class GetDimSizeOfType<int dim> : NativeCodeCall<
+  "GetScalarOfType(getElementTypeOrSelf($1), "
+  "$0.getType().cast<RankedTensorType>().getDimSize(" # dim # "))">;
+
+// Same as the above with i32 element type.
+class GetDimSizeAsI32<int dim> : NativeCodeCall<
+  "GetScalarOfType($_builder.getIntegerType(32), "
+  "$0.getType().cast<RankedTensorType>().getDimSize(" # dim # "))">;
+
+// Sparse version of SoftmaxCrossEntropyWithLogits is lowered to dense by
+// expanding the sparse labels using:
+//
+// labels = OneHotOp(sparse_labels, depth, 1.0, 0.0)
+//
+// If any of the indices are out of range, we must populate the labels with
+// NaNs to follow the semantics of the op.
+def LowerSparseSoftmaxCrossEntropyWithLogitsOp : Pattern<
+  (TF_SparseSoftmaxCrossEntropyWithLogitsOp:$src_op
+    AnyStaticShapeTensor:$features, $sparse_labels),
+  [(TF_OneHotOp:$labels $sparse_labels,
+     (TF_ConstOp (GetDimSizeAsI32<1> $features, $src_op__0)),
+     (TF_ConstOp (GetScalarOfType<1> $features)),
+     (TF_ConstOp (GetScalarOfType<0> $features)),
+     ConstantAttr<I64Attr, "1">),
+   (TF_SelectV2Op:$zero_or_nan
+     (TF_LogicalAndOp
+       (TF_LessEqualOp
+         (TF_ConstOp (GetScalarOfType<0> $sparse_labels)), $sparse_labels),
+       (TF_LessOp $sparse_labels,
+         (TF_ConstOp (GetDimSizeOfType<1> $features, $sparse_labels)))),
+     (TF_ConstOp (GetScalarOfType<0> $features)),
+     (TF_ConstOp (GetScalarNanOfType $labels))),
+   (TF_AddV2Op:$adjusted_labels $labels,
+     (TF_ExpandDimsOp $zero_or_nan,
+       (TF_ConstOp (GetI64ScalarElementsAttr<-1>)))),
+  (TF_SoftmaxCrossEntropyWithLogitsOp $features, $adjusted_labels)]>;
+
 //===----------------------------------------------------------------------===//
 // Difference op patterns.
 //===----------------------------------------------------------------------===//
@@ -112,7 +160,7 @@ def LowerFillOp : Pat<(TF_FillOp $dims, $value),
 
 def GetAllAxes : NativeCodeCall<
   "GetI64ElementsAttrForSeq("
-  "0, $0->getType().cast<RankedTensorType>().getRank(), &$_builder)">;
+  "0, $0.getType().cast<RankedTensorType>().getRank(), &$_builder)">;
 
 // L2Loss is lowered using the formula,
 // L2Loss(input) = Sum(input * input) / 2
@@ -135,6 +183,14 @@ def : Pat<(TF_PadOp TensorOf<[AnyInteger, AnyFloat]>:$input, $paddings),
           (TF_PadV2Op $input, $paddings,
              (TF_ConstOp (GetScalarOfType<0> $input)))>;
 
+//===----------------------------------------------------------------------===//
+// Reciprocal op patterns.
+//===----------------------------------------------------------------------===//
+
+// TODO(hinsu): Support complex and unsigned input types.
+def LowerReciprocal : Pat<(TF_ReciprocalOp TF_SintOrFpTensor:$x),
+                          (TF_DivOp (TF_ConstOp (GetScalarOfType<1> $x)), $x)>;
+
 //===----------------------------------------------------------------------===//
 // Rsqrt op patterns.
 //===----------------------------------------------------------------------===//
@@ -164,7 +220,7 @@ def LowerTanhGradOp :
 //===----------------------------------------------------------------------===//
 
 def CreateTFShapeOp : NativeCodeCall<
-    "$_builder.create<TF::ShapeOp>($0->getLoc(), $1, $2)">;
+    "$_builder.create<TF::ShapeOp>($0.getLoc(), $1, $2)">;
 
 // TODO(hinsu): Support inputs of TensorList types.
 def LowerZerosLikeOp :
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf_pass.cc
index 309d0147bc0..be9e0f4aef4 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf_pass.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/materialize_mlir_passthrough_op.cc b/tensorflow/compiler/mlir/tensorflow/transforms/materialize_mlir_passthrough_op.cc
index 58dfab15d34..f9a459647c8 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/materialize_mlir_passthrough_op.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/materialize_mlir_passthrough_op.cc
@@ -17,16 +17,16 @@ limitations under the License.
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Parser.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Parser.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 
 #define DEBUG_TYPE "tf-materialize-passthrough-op"
@@ -79,7 +79,7 @@ void MaterializePassthroughOpPass::runOnFunction() {
     Block &block = body.front();
     for (const auto &arg_mapping :
          llvm::zip(block.getArguments(), op->getOperands())) {
-      std::get<0>(arg_mapping)->replaceAllUsesWith(std::get<1>(arg_mapping));
+      std::get<0>(arg_mapping).replaceAllUsesWith(std::get<1>(arg_mapping));
     }
     op->getBlock()->getOperations().splice(op->getIterator(),
                                            block.getOperations(), block.begin(),
@@ -87,7 +87,7 @@ void MaterializePassthroughOpPass::runOnFunction() {
     Operation &return_op = block.front();
     for (auto ret_mapping :
          llvm::zip(op->getResults(), return_op.getOperands())) {
-      std::get<0>(ret_mapping)->replaceAllUsesWith(std::get<1>(ret_mapping));
+      std::get<0>(ret_mapping).replaceAllUsesWith(std::get<1>(ret_mapping));
     }
     op->erase();
   });
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc
index 6e28b19ad80..a52b30e2fd2 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc
@@ -14,14 +14,14 @@ limitations under the License.
 ==============================================================================*/
 #include <iostream>
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/lite/utils/validators.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.td b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.td
index 6c11067ce7a..5681b78882a 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.td
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.td
@@ -21,7 +21,7 @@ def BroadcastableElements :
     Constraint<CPred<"TFL::IsBroadcastableElementsAttrs($0, $1)">>;
 def F32ElementsAttr : ElementsAttrBase<
     CPred<"$_self.cast<ElementsAttr>().getType().getElementType().isF32()">, "float constant tensor">;
-def DefinedByConv2D : Constraint<CPred<"llvm::isa_and_nonnull<mlir::TF::Conv2DOp>($0->getDefiningOp())">>;
+def DefinedByConv2D : Constraint<CPred<"llvm::isa_and_nonnull<mlir::TF::Conv2DOp>($0.getDefiningOp())">>;
 
 // If we see a Conv2D op followed by Mul, then multiply the filter
 // with the value in Mul.
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc b/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc
index e7acbb334ea..40f084af46b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc
@@ -20,9 +20,9 @@ limitations under the License.
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h"
 
@@ -52,9 +52,9 @@ using GlobalTensorUsesMap =
 // be keep in sync.
 bool IsReadOnlyVariableOp(Operation* op) { return isa<TF::ReadVariableOp>(op); }
 
-void RewriteReadOnlyVariableOpToTensorOp(Operation* op, Value* tensor_value) {
+void RewriteReadOnlyVariableOpToTensorOp(Operation* op, Value tensor_value) {
   auto read_variable = cast<TF::ReadVariableOp>(op);
-  read_variable.value()->replaceAllUsesWith(tensor_value);
+  read_variable.value().replaceAllUsesWith(tensor_value);
 }
 
 bool IsFreezable(GlobalTensorOp global_tensor,
@@ -73,8 +73,8 @@ bool IsFreezable(GlobalTensorOp global_tensor,
   // func for tf.ReadVariableOp. If the resource is passed into other functions
   // or control flow, we fail to prove it is freezable even though we could.
   for (auto& global_tensor_use : global_tensor_uses) {
-    auto* arg = global_tensor_use.func.getArgument(global_tensor_use.arg_index);
-    for (auto user : arg->getUsers()) {
+    auto arg = global_tensor_use.func.getArgument(global_tensor_use.arg_index);
+    for (auto user : arg.getUsers()) {
       if (!IsReadOnlyVariableOp(user)) {
         return false;
       }
@@ -129,13 +129,13 @@ void FreezeGlobalTensors(ModuleOp module,
     for (auto global_tensor_use : global_tensor_uses) {
       auto func = global_tensor_use.func;
       auto arg_index = global_tensor_use.arg_index;
-      Value* arg = func.getArgument(arg_index);
-      for (Operation* user : llvm::make_early_inc_range(arg->getUsers())) {
+      Value arg = func.getArgument(arg_index);
+      for (Operation* user : llvm::make_early_inc_range(arg.getUsers())) {
         RewriteReadOnlyVariableOpToTensorOp(user, arg);
         user->erase();
       }
       Type new_type = global_tensor.value().Attribute::getType();
-      arg->setType(new_type);
+      arg.setType(new_type);
       auto old_ftype = func.getType();
       auto input_types = old_ftype.getInputs().vec();
       input_types[arg_index] = new_type;
@@ -168,7 +168,7 @@ void EraseUnusedBoundInputs(ModuleOp module) {
     SmallVector<unsigned, 4> args_to_erase;
     for (int i = 0, e = func.getNumArguments(); i < e; i++) {
       if (func.getArgAttr(i, "tf_saved_model.bound_input") &&
-          func.getArgument(i)->use_empty()) {
+          func.getArgument(i).use_empty()) {
         args_to_erase.push_back(i);
       }
     }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
index c9c97735848..180e87eba46 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <memory>
 
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 
 namespace mlir {
 
@@ -46,7 +46,8 @@ std::unique_ptr<OpPassBase<ModuleOp>> CreateTFShapeInferencePass();
 // Optimizes Tensorflow graph.
 std::unique_ptr<OpPassBase<FuncOp>> CreateTFOptimizePass();
 
-struct StandardPipelineOptions : public PassOptions<StandardPipelineOptions> {
+struct StandardPipelineOptions
+    : public PassPipelineOptions<StandardPipelineOptions> {
   Option<bool> enable_inliner{*this, "enable-inliner",
                               llvm::cl::desc("Enable inliner."),
                               llvm::cl::init(false)};
@@ -79,8 +80,7 @@ std::unique_ptr<OpPassBase<FuncOp>> CreateSwitchFoldPass();
 std::unique_ptr<OpPassBase<FuncOp>> CreateTFExecutorIslandCoarseningPass();
 
 // Create a pass to prune tf_executor.graph from dead nodes.
-std::unique_ptr<OpPassBase<FuncOp>> CreateTFExecutorGraphPruningPass(
-    bool skip_main_func = false);
+std::unique_ptr<OpPassBase<FuncOp>> CreateTFExecutorGraphPruningPass();
 
 // Prunes unreachable operations of a tf_executor.graph operation.
 void PruneGraph(GraphOp graph);
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc b/tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc
index d6acb7488e1..55cb1e2c3df 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc
@@ -22,9 +22,9 @@ limitations under the License.
 // eliminating control dependencies, and results in the code being in the
 // canonical TensorFlow dialect.
 
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 
@@ -100,7 +100,7 @@ void RaiseTFControlFlow::rewriteOps() {
       // aren't necessary any more since the order within a block encodes the
       // same information.
       for (auto &operand : op.getOpOperands()) {
-        if (!operand.get()->getType().isa<TFControlType>())
+        if (!operand.get().getType().isa<TFControlType>())
           result.operands.push_back(operand.get());
 
         // Drop all operands from the old operation, eliminating any
@@ -110,14 +110,14 @@ void RaiseTFControlFlow::rewriteOps() {
 
       // Add a result type for each non-control result we find.
       bool sawControlResult = false;
-      for (auto *opResult : op.getResults()) {
-        if (opResult->getType().isa<TFControlType>()) {
+      for (auto opResult : op.getResults()) {
+        if (opResult.getType().isa<TFControlType>()) {
           sawControlResult = true;
         } else {
           // We assume all control inputs are at the end of the result list.
           assert(!sawControlResult && "all control results must be last");
           (void)sawControlResult;
-          result.types.push_back(opResult->getType());
+          result.types.push_back(opResult.getType());
         }
       }
 
@@ -129,7 +129,7 @@ void RaiseTFControlFlow::rewriteOps() {
       // We know that all the control results are last, so we can just rewrite
       // the first results.
       for (unsigned i = 0, e = result.types.size(); i != e; ++i)
-        op.getResult(i)->replaceAllUsesWith(replacement->getResult(i));
+        op.getResult(i).replaceAllUsesWith(replacement->getResult(i));
     }
   }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc
index 36f6f3a933c..7b4ae38726d 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc
@@ -20,11 +20,11 @@ limitations under the License.
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/IR/Visitors.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/IR/Visitors.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 
@@ -71,19 +71,19 @@ struct ReplicateInvariantOpHoistingPass
 // }
 void MakeShapeOpInvariant(tf_device::ReplicateOp replicate_op, int num_replicas,
                           Block* replicate_block, TF::ShapeOp shape_op) {
-  Value* input = shape_op.input();
+  Value input = shape_op.input();
   // If ShapeOp operand is replicate tensor block argument, replace with the
   // associated first replica operand.
-  if (auto block_arg = llvm::dyn_cast<BlockArgument>(input)) {
-    if (block_arg->getOwner() != replicate_block) return;
+  if (auto block_arg = input.dyn_cast<BlockArgument>()) {
+    if (block_arg.getOwner() != replicate_block) return;
 
     shape_op.setOperand(
-        replicate_op.getOperand(num_replicas * block_arg->getArgNumber()));
+        replicate_op.getOperand(num_replicas * block_arg.getArgNumber()));
 
     return;
   }
 
-  Operation* input_def = input->getDefiningOp();
+  Operation* input_def = input.getDefiningOp();
 
   // If ShapeOp operand is a ReadVariableOp result where the ReadVariableOp
   // operand is a replicate resource block argument, replace ShapeOp with
@@ -96,13 +96,13 @@ void MakeShapeOpInvariant(tf_device::ReplicateOp replicate_op, int num_replicas,
   // shape has not changed in replicate prior to read. Currently after both
   // ResourceOpLiftingPass and TPURewritePass, there should not be any updates
   // to resources prior to their respective ReadVariableOp.
-  if (auto block_arg = llvm::dyn_cast<BlockArgument>(read_var_op.resource())) {
-    if (block_arg->getOwner() != replicate_block) return;
+  if (auto block_arg = read_var_op.resource().dyn_cast<BlockArgument>()) {
+    if (block_arg.getOwner() != replicate_block) return;
 
     OpBuilder builder(shape_op);
     auto new_shape_op = builder.create<TF::VariableShapeOp>(
         shape_op.getLoc(), shape_op.getType(),
-        replicate_op.getOperand(num_replicas * block_arg->getArgNumber()));
+        replicate_op.getOperand(num_replicas * block_arg.getArgNumber()));
     shape_op.replaceAllUsesWith(new_shape_op.getOperation());
     shape_op.erase();
   }
@@ -111,8 +111,8 @@ void MakeShapeOpInvariant(tf_device::ReplicateOp replicate_op, int num_replicas,
 // Checks if op and inner op operands are all replicate invariant.
 bool IsOpReplicateInvariant(Region* replicate_region, Operation* op) {
   auto result = op->walk([&](Operation* inner_op) {
-    for (Value* operand : inner_op->getOperands()) {
-      Region* parent_region = operand->getParentRegion();
+    for (Value operand : inner_op->getOperands()) {
+      Region* parent_region = operand.getParentRegion();
       if (!parent_region || !parent_region->isProperAncestor(replicate_region))
         return WalkResult::interrupt();
     }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
index 9787ac0f0f0..ec0125b913d 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
@@ -24,13 +24,13 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 
@@ -60,7 +60,7 @@ llvm::SmallVector<tf_executor::IslandOp, 8> ExpandReplicateIntoReplicas(
   Operation& terminator = replicate_op.GetBody().back();
   llvm::SmallVector<Type, 8> output_types(terminator.getOperandTypes());
   auto control_type = tf_executor::ControlType::get(island_op.getContext());
-  llvm::SmallVector<Value*, 8> replica_inputs(island_op.controlInputs());
+  llvm::SmallVector<Value, 8> replica_inputs(island_op.controlInputs());
 
   // Replace replicate terminator with YieldOp.
   builder->setInsertionPoint(&terminator);
@@ -83,7 +83,7 @@ llvm::SmallVector<tf_executor::IslandOp, 8> ExpandReplicateIntoReplicas(
     mapping.clear();
     for (auto& block_arg : replicate_op.GetBody().getArguments())
       mapping.map(block_arg, replicate_op.getOperand(
-                                 block_arg->getArgNumber() * num_replicas + i));
+                                 block_arg.getArgNumber() * num_replicas + i));
 
     // Copy over replicate region into replica island.
     replicate_op.body().cloneInto(&replica.body(), mapping);
@@ -149,8 +149,8 @@ void CreateIslandsFromReplicate(const Dialect* tf_dialect,
                                   num_replicas);
 
   // Collect all replica results.
-  llvm::SmallVector<Value*, 8> replicas_outputs(replicate_op.getNumResults(),
-                                                nullptr);
+  llvm::SmallVector<Value, 8> replicas_outputs(replicate_op.getNumResults(),
+                                               nullptr);
   for (auto replica_and_idx : llvm::enumerate(replicas))
     for (auto replica_result_and_idx :
          llvm::enumerate(replica_and_idx.value().outputs()))
@@ -163,7 +163,7 @@ void CreateIslandsFromReplicate(const Dialect* tf_dialect,
 
   // Collect per replica control dependency and add to island operand if replica
   // island has no uses.
-  llvm::SmallVector<Value*, 8> island_operands;
+  llvm::SmallVector<Value, 8> island_operands;
   for (auto& replica : replicas)
     if (replica.use_empty()) island_operands.push_back(replica.control());
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc
index 6dc3e87f8ec..c92ce1f01ad 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc
@@ -26,16 +26,16 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/IR/Visitors.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/IR/Visitors.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
@@ -64,7 +64,7 @@ class PerFunctionResult {
 
   // Returns the recorded device assignment for a resource, if any.
   llvm::Optional<llvm::StringRef> DeviceForResource(
-      const Value* resource) const {
+      const Value resource) const {
     llvm::Optional<llvm::StringRef> result;
     if (alias_analysis_.IsUnknownResource(resource)) return result;
     for (int64_t id : alias_analysis_.GetResourceUniqueIds(resource)) {
@@ -87,7 +87,7 @@ class PerFunctionResult {
   // conflicts with an existing one, returns an error.
   //
   // If `changed` is provided, assign *changed to true if anything is modified.
-  LogicalResult AddResourceDevice(const Value* resource, llvm::StringRef device,
+  LogicalResult AddResourceDevice(const Value resource, llvm::StringRef device,
                                   bool* changed = nullptr) {
     if (alias_analysis_.IsUnknownResource(resource)) return success();
     for (int64_t id : alias_analysis_.GetResourceUniqueIds(resource)) {
@@ -108,7 +108,7 @@ class PerFunctionResult {
 };
 
 // Tries to record device assignment for a resource.
-LogicalResult AddResourceDeviceAndEmitError(const Value* resource,
+LogicalResult AddResourceDeviceAndEmitError(const Value resource,
                                             llvm::StringRef device,
                                             Operation* error_reporting_op,
                                             PerFunctionResult* result,
@@ -127,16 +127,16 @@ LogicalResult ComputeResourceDevicesInComputation(FuncOp func_op,
   OpBuilder builder(func_op);
   // Function arguments.
   for (auto arg : func_op.getArguments()) {
-    if (!mlir::getElementTypeOrSelf(arg->getType()).isa<TF::ResourceType>()) {
+    if (!mlir::getElementTypeOrSelf(arg.getType()).isa<TF::ResourceType>()) {
       continue;
     }
     auto device_attr = func_op.getArgAttrOfType<mlir::StringAttr>(
-        arg->getArgNumber(), kFuncDeviceAttr);
+        arg.getArgNumber(), kFuncDeviceAttr);
     if (!device_attr || device_attr.getValue() == "") {
       // If device_attr does not exist, try to construct it from any recorded
       // assignment.
       if (auto device = result->DeviceForResource(arg)) {
-        func_op.setArgAttr(arg->getArgNumber(), kFuncDeviceAttr,
+        func_op.setArgAttr(arg.getArgNumber(), kFuncDeviceAttr,
                            builder.getStringAttr(*device));
       }
       continue;
@@ -160,7 +160,7 @@ LogicalResult ComputeResourceDevicesInComputation(FuncOp func_op,
     }
     if (auto identity = llvm::dyn_cast<TF::IdentityOp>(op)) {
       // Try to construct IdentityOp's attribute from recorded assignment.
-      if (!mlir::getElementTypeOrSelf(identity.output()->getType())
+      if (!mlir::getElementTypeOrSelf(identity.output().getType())
                .isa<TF::ResourceType>()) {
         return WalkResult::advance();
       }
@@ -176,7 +176,7 @@ LogicalResult ComputeResourceDevicesInComputation(FuncOp func_op,
     // Propagate and record output device assignment for other ops based on
     // existing recording. E.g., IdentityN.
     for (auto output : op->getResults()) {
-      if (!mlir::getElementTypeOrSelf(output->getType())
+      if (!mlir::getElementTypeOrSelf(output.getType())
                .isa<TF::ResourceType>()) {
         continue;
       }
@@ -212,7 +212,7 @@ void ResourceDeviceInference::runOnModule() {
         for (auto operand_and_argument :
              llvm::zip(caller_operands, callee.getArguments())) {
           if (!mlir::getElementTypeOrSelf(
-                   std::get<0>(operand_and_argument)->getType())
+                   std::get<0>(operand_and_argument).getType())
                    .isa<TF::ResourceType>()) {
             continue;
           }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
index 2f32a3a2c28..70a69a36adf 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
@@ -19,13 +19,13 @@ limitations under the License.
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Transforms/RegionUtils.h"  // TF:local_config_mlir
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
@@ -87,26 +87,26 @@ void ForwardStoreToLoad(tf_device::LaunchOp launch_op) {
   // resource_handle_to_last_store_op keeps track of the most recent (last)
   // store to each resource. Non-existent entry indicates that a resource has
   // not been stored to yet.
-  llvm::SmallDenseMap<Value*, TF::AssignVariableOp>
+  llvm::SmallDenseMap<Value, TF::AssignVariableOp>
       resource_handle_to_last_store_op;
 
   // Only iterate through ops directly in launch_op's body as we can't handle
   // ops nested deeper in regions.
   for (Operation& op : llvm::make_early_inc_range(launch_op.GetBody())) {
     if (auto read_variable_op = dyn_cast<TF::ReadVariableOp>(&op)) {
-      Value* resource = read_variable_op.resource();
+      Value resource = read_variable_op.resource();
       auto last_store = resource_handle_to_last_store_op[resource];
       if (!last_store) continue;
 
       // Use stored value in last_store to replace all uses of current resource
       // load's result, then erase this resource load.
-      read_variable_op.value()->replaceAllUsesWith(last_store.value());
+      read_variable_op.value().replaceAllUsesWith(last_store.value());
       read_variable_op.erase();
       continue;
     }
 
     if (auto assign_variable_op = dyn_cast<TF::AssignVariableOp>(&op)) {
-      Value* resource = assign_variable_op.resource();
+      Value resource = assign_variable_op.resource();
       auto last_store = resource_handle_to_last_store_op[resource];
       // Previous store ops to same resource can be erased.
       if (last_store) last_store.erase();
@@ -120,17 +120,17 @@ void ForwardStoreToLoad(tf_device::LaunchOp launch_op) {
 // forwarding has been performed on this launch_op such that all loads of same
 // resource are on its initial values.
 void HoistResourceLoads(tf_device::LaunchOp launch_op) {
-  llvm::SmallDenseMap<Value*, TF::ReadVariableOp> resource_to_read_ops;
+  llvm::SmallDenseMap<Value, TF::ReadVariableOp> resource_to_read_ops;
 
   // Only iterate through ops directly in launch_op's body as we can't handle
   // ops nested deeper in regions.
   for (Operation& op : llvm::make_early_inc_range(launch_op.GetBody())) {
     auto read_variable_op = dyn_cast<TF::ReadVariableOp>(&op);
     if (!read_variable_op) continue;
-    Value* resource = read_variable_op.resource();
+    Value resource = read_variable_op.resource();
 
     // Skip resources created inside of launch_op.
-    if (resource->getParentRegion() == &launch_op.body()) continue;
+    if (resource.getParentRegion() == &launch_op.body()) continue;
 
     auto p = resource_to_read_ops.insert({resource, read_variable_op});
     if (p.second) {
@@ -156,18 +156,18 @@ bool AppendResourceStoreValueToReturn(tf_device::LaunchOp launch_op) {
   Block* body = &launch_op.GetBody();
   auto old_return = body->getTerminator();
 
-  llvm::SmallVector<Value*, 4> new_return_operands(old_return->getOperands());
+  llvm::SmallVector<Value, 4> new_return_operands(old_return->getOperands());
 
   // Only iterate through ops directly in launch_op's body as we can't handle
   // ops nested deeper in regions.
   for (Operation& op : launch_op.GetBody()) {
     auto assign_variable_op = dyn_cast<TF::AssignVariableOp>(&op);
     if (!assign_variable_op) continue;
-    Value* resource = assign_variable_op.resource();
+    Value resource = assign_variable_op.resource();
     if (!resource) continue;
 
     // Skip resources created inside of launch_op.
-    if (resource->getParentRegion() == &launch_op.body()) continue;
+    if (resource.getParentRegion() == &launch_op.body()) continue;
 
     // TODO(ycao): Prevent same value from being returned multiple times.
     // TODO(ycao): Do not return resource store value if it is defined outside
@@ -202,12 +202,12 @@ void SinkResourceStores(tf_device::LaunchOp launch_op, OpBuilder* builder) {
   builder->setInsertionPoint(launch_op);
   auto new_launch_op = builder->create<tf_device::LaunchOp>(
       launch_op.getLoc(), new_launch_return_types,
-      /*operands=*/llvm::SmallVector<Value*, 4>(), launch_op.getAttrs());
+      /*operands=*/llvm::SmallVector<Value, 4>(), launch_op.getAttrs());
   new_launch_op.body().takeBody(launch_op.body());
 
   // Replace uses of old launch_op results with those of new_launch_op.
   for (auto p : llvm::zip(launch_op.getResults(), new_launch_op.getResults())) {
-    std::get<0>(p)->replaceAllUsesWith(std::get<1>(p));
+    std::get<0>(p).replaceAllUsesWith(std::get<1>(p));
   }
 
   // Create a mapping from operands of new_return_op operands to new_launch_op
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
index 39b7fbb4d07..4f69d18a96b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
@@ -22,28 +22,33 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/SymbolTable.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Transforms/FoldUtils.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/SymbolTable.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Transforms/FoldUtils.h"  // TF:llvm-project
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
+#include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/types.pb.h"
 
 #define DEBUG_TYPE "tf-shape-inference"
 
@@ -68,29 +73,101 @@ Optional<llvm::SmallVector<mlir::Type, 4>> InferShapeForFunctionReturnType(
   // Manually fold tf.Cast that precedes the return instruction and only differs
   // in shape refinement level.
   for (OpOperand& arg_op : return_op.getOperation()->getOpOperands()) {
-    Operation* arg_defining_op = arg_op.get()->getDefiningOp();
+    Operation* arg_defining_op = arg_op.get().getDefiningOp();
     if (auto cast_op = dyn_cast_or_null<CastOp>(arg_defining_op)) {
       // Shape inference should not change the element type.
       if (cast_op.SrcT() != cast_op.DstT()) continue;
       // We only refine the result shape if the result a dynamic shape, the
       // input has static shape, and the two shapes are compatible.
-      auto has_static_shape = [](const Value* value) {
-        auto shaped_type = value->getType().dyn_cast<ShapedType>();
+      auto has_static_shape = [](const Value value) {
+        auto shaped_type = value.getType().dyn_cast<ShapedType>();
         return shaped_type && shaped_type.hasStaticShape();
       };
-      Value* input = cast_op.x();
-      Value* result = cast_op.y();
+      Value input = cast_op.x();
+      Value result = cast_op.y();
       if (!has_static_shape(input) || has_static_shape(result) ||
-          failed(verifyCompatibleShape(input->getType(), result->getType())))
+          failed(verifyCompatibleShape(input.getType(), result.getType())))
         continue;
 
       arg_op.set(cast_op.x());
-      if (cast_op.y()->use_empty()) cast_op.erase();
+      if (cast_op.y().use_empty()) cast_op.erase();
     }
   }
 
   return llvm::to_vector<4>(return_op.getOperandTypes());
 }
+
+// Returns if the shape inference pass supports an op outside the TF dialect.
+bool IsSupportedNonTFOp(Operation* op) {
+  return isa<tf_executor::YieldOp>(op) || isa<tf_executor::IslandOp>(op) ||
+         isa<tf_executor::FetchOp>(op) || isa<tf_executor::GraphOp>(op) ||
+         isa<ReturnOp>(op) || isa<tf_device::ReturnOp>(op);
+}
+
+// Inserts tf.Cast operation when changing the type of a result if the user is
+// not a TF operation, as we can't guarantee that the new type will be OK.
+void AddCastBackForUnsupportedNonTFUses(Operation* op, Value result,
+                                        Dialect* tf_dialect, Type old_type) {
+  OpBuilder builder(op);
+  builder.setInsertionPointAfter(op);
+  // A tf.Cast operation is lazily created on the first uses that isn't a TF
+  // operation.
+  TF::CastOp cast_op;
+  auto get_cast_op = [&]() {
+    if (!cast_op)
+      cast_op =
+          builder.create<TF::CastOp>(op->getLoc(), old_type, result,
+                                     /*truncate=*/builder.getBoolAttr(false));
+    return cast_op;
+  };
+  for (OpOperand& use : llvm::make_early_inc_range(result->getUses())) {
+    if (use.getOwner()->getDialect() != tf_dialect &&
+        !IsSupportedNonTFOp(use.getOwner()))
+      use.set(get_cast_op());
+  }
+}
+
+// Extracts a PartialTensorShape from the MLIR type.
+Optional<tensorflow::PartialTensorShape> GetShapeFromMlirType(Type t) {
+  if (auto ranked_type = t.dyn_cast<RankedTensorType>()) {
+    // Convert the MLIR shape indices (int64_t) to TensorFlow indices
+    // (int64).
+    ArrayRef<int64_t> shape = ranked_type.getShape();
+    SmallVector<int64, 8> tf_shape(shape.begin(), shape.end());
+    return tensorflow::PartialTensorShape({tf_shape.data(), tf_shape.size()});
+  }
+  return None;
+}
+
+// Passes the operand shapes/types to the op's results.
+bool InferShapeForPassThroughOps(OperandRange pass_through_operands,
+                                 Operation* op, Dialect* tf_dialect) {
+  bool changed = false;
+  for (auto entry : llvm::zip(pass_through_operands, op->getResults())) {
+    Type operand_type = std::get<0>(entry).getType();
+    Value result = std::get<1>(entry);
+    if (result.getType() == operand_type) continue;
+    AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect,
+                                       result.getType());
+    result.setType(operand_type);
+    changed = true;
+  }
+  return changed;
+}
+
+// Infers shape for necessary ops that are not in the TF dialect.
+bool InferShapeForNonTFDialectOperation(Operation* op, Dialect* tf_dialect) {
+  if (auto graph_op = dyn_cast<tf_executor::GraphOp>(op)) {
+    return InferShapeForPassThroughOps(graph_op.GetFetch().fetches(), op,
+                                       tf_dialect);
+  }
+  if (auto island_op = dyn_cast<tf_executor::IslandOp>(op)) {
+    return InferShapeForPassThroughOps(island_op.GetYield().fetches(), op,
+                                       tf_dialect);
+  }
+  return false;
+}
+
 }  // namespace
 
 bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
@@ -98,9 +175,13 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   assert(tf_dialect == op->getDialect());
 
   // If no result for this op needs shape inference, we have a fast-path return.
+  // But if the type is a resource, we do not skip it because we might not have
+  // the handle shapes.
   if (llvm::all_of(op->getResultTypes(), [](Type type) {
         auto shape_type = type.dyn_cast<ShapedType>();
-        return !shape_type || shape_type.hasStaticShape();
+        return !shape_type ||
+               (shape_type.hasStaticShape() &&
+                !shape_type.getElementType().isa<TF::ResourceType>());
       })) {
     LLVM_DEBUG(llvm::dbgs() << "Skipping inference for statically shaped op '"
                             << op->getName() << "'.\n";);
@@ -111,7 +192,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   // This is necessary to avoid reprocessing the tf.Cast that are inserted at
   // the end of this function.
   if (isa<CastOp>(op) &&
-      llvm::all_of(op->getResult(0)->getUsers(), [&](Operation* user) {
+      llvm::all_of(op->getResult(0).getUsers(), [&](Operation* user) {
         return user->getDialect() != tf_dialect;
       })) {
     LLVM_DEBUG(llvm::dbgs() << "Skipping inference for tf.Cast with no TF "
@@ -127,10 +208,9 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
 
   // Get information from the registry and check if we have a shape function for
   // this op.
-  const tensorflow::OpRegistrationData* op_reg_data;
-  if (!tensorflow::OpRegistry::Global()
-           ->LookUp(node_name.data(), &op_reg_data)
-           .ok()) {
+  const tensorflow::OpRegistrationData* op_reg_data =
+      tensorflow::OpRegistry::Global()->LookUp(node_name.data());
+  if (!op_reg_data) {
     LLVM_DEBUG(llvm::dbgs() << "Skipping inference for unregistered op '"
                             << op->getName() << "'.\n";);
     return false;
@@ -161,8 +241,11 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   std::vector<tensorflow::PartialTensorShape> input_shapes(
       op->getNumOperands());
   std::vector<tensorflow::Tensor> tensors(op->getNumOperands());
+  std::vector<std::unique_ptr<std::vector<
+      std::pair<tensorflow::PartialTensorShape, tensorflow::DataType>>>>
+      handle_shapes_and_types(op->getNumOperands());
   for (auto it : llvm::enumerate(op->getOperands())) {
-    Value* operand = it.value();
+    Value operand = it.value();
     size_t index = it.index();
 
     // If the operand is constant, then convert it to Tensor.
@@ -179,13 +262,32 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
       }
     }
 
-    Type operand_type = operand->getType();
-    if (auto ranked_type = operand_type.dyn_cast<RankedTensorType>()) {
-      // Convert the MLIR shape indices (int64_t) to TensorFlow indices (int64).
-      ArrayRef<int64_t> shape = ranked_type.getShape();
-      SmallVector<int64, 8> tf_shape(shape.begin(), shape.end());
-      input_shapes[index] =
-          tensorflow::PartialTensorShape({tf_shape.data(), tf_shape.size()});
+    Type operand_type = operand.getType();
+    if (auto shape = GetShapeFromMlirType(operand_type)) {
+      input_shapes[index] = *shape;
+    }
+    // Collect the handle shapes and types for a resource.
+    if (auto resource_type = operand_type.cast<TensorType>()
+                                 .getElementType()
+                                 .dyn_cast<TF::ResourceType>()) {
+      if (resource_type.getSubtypes().empty()) continue;
+      auto shapes_and_types = absl::make_unique<std::vector<
+          std::pair<tensorflow::PartialTensorShape, tensorflow::DataType>>>();
+      for (auto subtype : resource_type.getSubtypes()) {
+        auto shape = GetShapeFromMlirType(subtype);
+        // handle_shapes_and_types requires all shapes to be known. So if any
+        // subtype is unknown, clear the vector.
+        if (!shape) {
+          shapes_and_types = nullptr;
+          break;
+        }
+        tensorflow::DataType dtype;
+        auto status =
+            tensorflow::ConvertToDataType(subtype.getElementType(), &dtype);
+        assert(status.ok() && "Unknown element type");
+        shapes_and_types->emplace_back(*shape, dtype);
+      }
+      handle_shapes_and_types[index] = std::move(shapes_and_types);
     }
   }
 
@@ -194,8 +296,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   // function operates on.
   tensorflow::shape_inference::InferenceContext c(
       graph_version, *node_def, op_reg_data->op_def, input_shapes,
-      input_tensors, /*input_tensors_as_shapes=*/{},
-      /*input_handle_shapes_and_types=*/{});
+      input_tensors, /*input_tensors_as_shapes=*/{}, handle_shapes_and_types);
   auto status = c.Run(op_reg_data->shape_inference_fn);
   if (!status.ok()) {
     LLVM_DEBUG(llvm::dbgs() << "Shape inference error for '" << *op
@@ -207,47 +308,52 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
          "inference context matches the MLIR number of results.");
 
   // Update the shape for each of the operation result if the InferenceContext
-  // has more precise shapes recorded. A builder is used to insert tf.Cast
-  // operation when changing the type of a result is the user is not a TF
-  // operation, as we can't guarantee that the new type will be OK.
+  // has more precise shapes recorded.
   bool changed = false;
-  OpBuilder builder(op);
-  builder.setInsertionPointAfter(op);
   for (int output : llvm::seq<int>(0, c.num_outputs())) {
     // Skip already statically shaped results.
-    Value* result = op->getResult(output);
-    auto shaped_type = result->getType().dyn_cast<ShapedType>();
+    Value result = op->getResult(output);
+    auto shaped_type = result.getType().dyn_cast<ShapedType>();
     if (!shaped_type || shaped_type.hasStaticShape()) continue;
 
     tensorflow::shape_inference::ShapeHandle shape_handle = c.output(output);
     LLVM_DEBUG(llvm::dbgs() << "Inferred output " << output << " : "
                             << c.DebugString(shape_handle) << "\n");
-    if (!c.RankKnown(shape_handle)) continue;
-
-    // Convert the shape from TensorFlow (int64) to MLIR (int64_t).
-    SmallVector<int64_t, 8> shape;
-    for (int dim : llvm::seq<int>(0, c.Rank(shape_handle)))
-      shape.push_back(c.Value(c.Dim(shape_handle, dim)));
-    auto new_type = RankedTensorType::get(shape, shaped_type.getElementType());
-
-    // A tf.Cast operation is lazily created on the first uses that isn't a TF
-    // operation.
-    TF::CastOp cast_op;
-    auto get_cast_op = [&]() {
-      if (!cast_op)
-        cast_op =
-            builder.create<TF::CastOp>(op->getLoc(), result->getType(), result,
-                                       /*truncate=*/builder.getBoolAttr(false));
-      return cast_op;
+    auto get_tensor_type =
+        [&c](const tensorflow::shape_inference::ShapeHandle& sh,
+             Type element_type) -> TensorType {
+      if (!c.RankKnown(sh)) return UnrankedTensorType::get(element_type);
+      // Convert the shape from TensorFlow (int64) to MLIR (int64_t).
+      SmallVector<int64_t, 8> shape;
+      for (int dim : llvm::seq<int>(0, c.Rank(sh)))
+        shape.push_back(c.Value(c.Dim(sh, dim)));
+      return RankedTensorType::get(shape, element_type);
     };
-    for (OpOperand& use : llvm::make_early_inc_range(result->getUses())) {
-      if (use.getOwner()->getDialect() != tf_dialect) use.set(get_cast_op());
+    auto new_element_type = shaped_type.getElementType();
+    // Populate the handle shapes for a resource.
+    if (auto resource_type = new_element_type.dyn_cast<TF::ResourceType>()) {
+      auto handle_shapes_types = c.output_handle_shapes_and_types(output);
+      if (handle_shapes_types) {
+        llvm::SmallVector<mlir::TensorType, 1> subtypes;
+        OpBuilder b(op);
+        for (const auto& shape_n_type : *handle_shapes_types) {
+          Type element_type;
+          auto status =
+              tensorflow::ConvertDataType(shape_n_type.dtype, b, &element_type);
+          assert(status.ok() && "Unknown element type");
+          subtypes.push_back(get_tensor_type(shape_n_type.shape, element_type));
+        }
+        new_element_type = TF::ResourceType::get(subtypes, op->getContext());
+      }
     }
-
-    if (result->getType() == new_type) continue;
-
+    auto new_type = get_tensor_type(shape_handle, new_element_type);
+    if (result.getType() == new_type) continue;
+    // Inserts a cast back to the original type if any user is not in the TF
+    // dialect.
+    AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect,
+                                       result.getType());
     // Finally we inferred the shape and replace the type for this result.
-    result->setType(new_type);
+    result.setType(new_type);
     changed = true;
   }
   if (changed)
@@ -285,7 +391,7 @@ LogicalResult RefineShapeForControlFlowFunc(FuncOp func,
                                  func.getContext()));
 
   for (auto arg_and_idx : llvm::enumerate(func.getArguments())) {
-    arg_and_idx.value()->setType(input_types[arg_and_idx.index()]);
+    arg_and_idx.value().setType(input_types[arg_and_idx.index()]);
   }
 
   auto res =
@@ -307,8 +413,8 @@ LogicalResult PropagateShapeToIfWhileOpFunctions(
     int64_t max_iteration) {
   llvm::SmallVector<Type, 4> input_types;
   input_types.reserve(std::distance(op.input().begin(), op.input().end()));
-  for (Value* v : op.input()) {
-    input_types.push_back(v->getType());
+  for (Value v : op.input()) {
+    input_types.push_back(v.getType());
   }
 
   ModuleOp module = op.template getParentOfType<ModuleOp>();
@@ -360,7 +466,10 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
     LLVM_DEBUG(llvm::dbgs()
                << "Shape inference, iteration " << iteration << "\n");
     region->walk([&](Operation* op) {
-      if (op->getDialect() != tf_dialect) return;
+      if (op->getDialect() != tf_dialect) {
+        changed |= InferShapeForNonTFDialectOperation(op, tf_dialect);
+        return;
+      }
 
       // Before attempting inference, just try to fold the operation.
       if (succeeded(folder.tryToFold(op))) return;
@@ -415,7 +524,7 @@ LogicalResult InferShapeForFunction(FuncOp func,
     auto new_arg_type = mlir::RankedTensorType::get(shape, element_type);
     if (new_arg_type != func_type.getInput(i)) {
       // If the new type is more detailed, trigger shape inference.
-      func.getArgument(i)->setType(new_arg_type);
+      func.getArgument(i).setType(new_arg_type);
       needs_refinement = true;
     }
     new_arg_types.push_back(new_arg_type);
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
index 0529e6414b7..73993a07292 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
@@ -18,10 +18,10 @@ limitations under the License.
 
 #include <cstdint>
 
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Region.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Region.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 
 namespace mlir {
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
index d5b86173b69..129efd74f4f 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
@@ -20,15 +20,15 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
@@ -65,10 +65,9 @@ struct ShapeInference : public ModulePass<ShapeInference> {
     }
     for (auto func : module.getOps<FuncOp>()) {
       InferShapeUntilFixPoint(&func.getBody(), producer.getInt());
-    }
-
-    if (auto main_func = module.lookupSymbol<mlir::FuncOp>("main")) {
-      InferShapeForFunctionType(main_func);
+      // TODO(yuanzx): Verify that it is always fine to refine a function's
+      // return type, as long as we do not change the argument shapes.
+      InferShapeForFunctionType(func);
     }
   }
 };
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc b/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc
index e4358e7e1c7..9d872fb3d1a 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc
@@ -19,11 +19,11 @@ limitations under the License.
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
-#include "mlir/Transforms/RegionUtils.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -48,12 +48,11 @@ class ExecutorConstantSinking
       // The sunk_constant map keeps a mapping from a ConstOp defined above to
       // a sunk clone of it. This allows for reusing a sunk constant with
       // multiple uses in the region.
-      llvm::DenseMap<Value *, TF::ConstOp> sunk_constant;
+      llvm::DenseMap<Value, TF::ConstOp> sunk_constant;
       Region &body = launch.body();
       visitUsedValuesDefinedAbove(body, [&](OpOperand *use) {
-        Value *constant = use->get();
-        auto const_op =
-            dyn_cast_or_null<TF::ConstOp>(constant->getDefiningOp());
+        Value constant = use->get();
+        auto const_op = dyn_cast_or_null<TF::ConstOp>(constant.getDefiningOp());
         if (!const_op) return;
 
         // We found a constant, try to insert it in the map and re-use its
@@ -62,13 +61,13 @@ class ExecutorConstantSinking
         if (!map_entry.second) {
           // This constant has already been cloned into the region, reuse it.
           use->set(map_entry.first->getSecond().getResult());
-          LLVM_DEBUG(llvm::dbgs() << "Re-use sunk constant " << *use->get()
-                                  << "\n     in " << *use->get() << "\n");
-          if (constant->use_empty()) const_op.erase();
+          LLVM_DEBUG(llvm::dbgs() << "Re-use sunk constant " << use->get()
+                                  << "\n     in " << use->get() << "\n");
+          if (constant.use_empty()) const_op.erase();
           return;
         }
-        if (constant->hasOneUse()) {
-          LLVM_DEBUG(llvm::dbgs() << "Moved constant " << *constant << "\n");
+        if (constant.hasOneUse()) {
+          LLVM_DEBUG(llvm::dbgs() << "Moved constant " << constant << "\n");
           const_op.getOperation()->moveBefore(&body.begin()->front());
           return;
         }
@@ -76,8 +75,8 @@ class ExecutorConstantSinking
         body.begin()->getOperations().insert(body.begin()->begin(),
                                              map_entry.first->getSecond());
         use->set(map_entry.first->getSecond().getResult());
-        LLVM_DEBUG(llvm::dbgs() << "Sunk cloned constant " << *use->get()
-                                << "\n     in " << *use->get() << "\n");
+        LLVM_DEBUG(llvm::dbgs() << "Sunk cloned constant " << use->get()
+                                << "\n     in " << use->get() << "\n");
       });
     });
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/test_side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/transforms/test_side_effect_analysis.cc
index f0b7964389d..eb754cc3bbd 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/test_side_effect_analysis.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/test_side_effect_analysis.cc
@@ -22,11 +22,11 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
-#include "mlir/Transforms/RegionUtils.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc
index 2eb12c80efe..5606428bb19 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.cc
@@ -16,10 +16,10 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.h"
 
 #include "llvm/Support/CommandLine.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.h b/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.h
index 8b97bd606a9..49d92bf3151 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_graph_optimization_pass.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_TF_GRAPH_OPTIMIZATION_PASS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_TF_GRAPH_OPTIMIZATION_PASS_H_
 
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 
 namespace tensorflow {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
index 7a840aa0d12..98833a7de40 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
@@ -35,17 +35,17 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Transforms/RegionUtils.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -140,8 +140,8 @@ bool ShouldMoveOpAfterCluster(
     const llvm::SmallSetVector<Operation*, 8>& cluster_ops,
     const llvm::SmallSetVector<Operation*, 8>& preceding_users) {
   auto result = op->walk([&](Operation* op) {
-    for (Value* operand : op->getOperands()) {
-      Operation* def = operand->getDefiningOp();
+    for (Value operand : op->getOperands()) {
+      Operation* def = operand.getDefiningOp();
       // Operands may not have a defining op (BlockArgument) or is from a
       // different block.
       if (!def || def->getBlock() != block) continue;
@@ -179,13 +179,13 @@ llvm::SmallSetVector<Operation*, 8> CollectClusterPrecedingUsers(
 // `tf_device::LaunchOp` and associated terminator. Results that have no uses
 // outside of the cluster (i.e. results of ops in the cluster are only consumed
 // by other ops in the cluster) are pruned.
-llvm::SmallVector<Value*, 8> CollectClusterResults(
+llvm::SmallVector<Value, 8> CollectClusterResults(
     Block* block, const llvm::SmallSetVector<Operation*, 8>& cluster_ops) {
-  llvm::SmallVector<Value*, 8> results;
+  llvm::SmallVector<Value, 8> results;
 
   for (Operation* op : cluster_ops) {
-    for (Value* result : op->getResults()) {
-      for (Operation* user : result->getUsers()) {
+    for (Value result : op->getResults()) {
+      for (Operation* user : result.getUsers()) {
         // Check if user is not an op in the cluster.
         if (cluster_ops.count(block->findAncestorOpInBlock(*user)) == 0) {
           results.push_back(result);
@@ -200,13 +200,13 @@ llvm::SmallVector<Value*, 8> CollectClusterResults(
 
 // Creates a `tf_device::LaunchOp` to wrap cluster ops.
 tf_device::LaunchOp CreateLaunchOpForCluster(Operation* last_cluster_op,
-                                             llvm::ArrayRef<Value*> results) {
+                                             llvm::ArrayRef<Value> results) {
   // `tf_device::LaunchOp` will be placed at where the last op of the cluster
   // is.
   OpBuilder builder(last_cluster_op);
 
   llvm::SmallVector<Type, 8> result_types;
-  for (Value* result : results) result_types.push_back(result->getType());
+  for (Value result : results) result_types.push_back(result.getType());
 
   // An empty string placeholder is used for the device as that will be later
   // populated with the device of the associated TPUReplicateMetadata op.
@@ -241,12 +241,12 @@ void MoveClusterOpsToLaunchOp(
 // Replaces uses of cluster ops results outside of cluster with the associated
 // `tf_device::LaunchOp` results.
 void UpdateLaunchOpResultExternalUses(tf_device::LaunchOp launch_op,
-                                      llvm::ArrayRef<Value*> results) {
+                                      llvm::ArrayRef<Value> results) {
   Block& launch_op_block = launch_op.GetBody();
   for (auto ret_vals : llvm::zip(results, launch_op.getResults())) {
-    Value* old_ret = std::get<0>(ret_vals);
-    Value* new_ret = std::get<1>(ret_vals);
-    for (auto& use : old_ret->getUses())
+    Value old_ret = std::get<0>(ret_vals);
+    Value new_ret = std::get<1>(ret_vals);
+    for (auto& use : old_ret.getUses())
       if (!launch_op_block.findAncestorOpInBlock(*use.getOwner()))
         use.set(new_ret);
   }
@@ -307,7 +307,7 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
   llvm::SmallSetVector<Operation*, 8> unique_replicated_input_ops;
   mlir::visitUsedValuesDefinedAbove(
       launch_op.body(), launch_op.body(), [&](mlir::OpOperand* operand) {
-        Operation* def = operand->get()->getDefiningOp();
+        Operation* def = operand->get().getDefiningOp();
         if (def && llvm::isa<TF::TPUReplicatedInputOp>(def))
           unique_replicated_input_ops.insert(def);
       });
@@ -337,9 +337,9 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
 
   // Replace replicated cluster results with replicate op results.
   for (auto result_and_idx : llvm::enumerate(launch_op.getResults())) {
-    Value* result = result_and_idx.value();
+    Value result = result_and_idx.value();
     int idx = result_and_idx.index();
-    for (auto& use : result->getUses()) {
+    for (auto& use : result.getUses()) {
       Operation* def = use.getOwner();
       if (!def || !llvm::isa<TF::TPUReplicatedOutputOp>(def))
         return launch_op.emitError()
@@ -360,7 +360,7 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
   for (auto input_and_block_arg :
        llvm::zip(replicated_input_ops, replicate_op.GetBody().getArguments())) {
     Operation* input = std::get<0>(input_and_block_arg);
-    Value* block_arg = std::get<1>(input_and_block_arg);
+    Value block_arg = std::get<1>(input_and_block_arg);
     mlir::replaceAllUsesInRegionWith(input->getResult(0), block_arg,
                                      launch_op.body());
   }
@@ -412,7 +412,7 @@ LogicalResult FormClustersInBlock(Block* block,
     llvm::SmallSetVector<Operation*, 8> preceding_users =
         CollectClusterPrecedingUsers(block, cluster_ops);
 
-    llvm::SmallVector<Value*, 8> results =
+    llvm::SmallVector<Value, 8> results =
         CollectClusterResults(block, cluster_ops);
 
     tf_device::LaunchOp launch_op =
@@ -470,7 +470,7 @@ void TPUClusterFormation::runOnFunction() {
     // `tf_device.replicate` is created and replicated (1) operands/results are
     // untouched.
     if (op->getNumOperands() == 1 && op->getNumResults() == 1)
-      op->getResult(0)->replaceAllUsesWith(op->getOperand(0));
+      op->getResult(0).replaceAllUsesWith(op->getOperand(0));
 
     // Leftover TPUReplicatedInput/TPUReplicatedOutput that are not of
     // `num_replicas` to 1.
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc
index f2f885dbcc8..38a01e168f7 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc
@@ -24,15 +24,15 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/core/protobuf/tpu/dynamic_padding.pb.h"
@@ -60,9 +60,9 @@ llvm::SmallDenseMap<int32_t, int32_t> GetRemappedReplicatedInputIndices(
 
   llvm::SmallDenseMap<int32_t, int32_t> remapped_indices;
   for (auto operand_and_idx : llvm::enumerate(launch_func.getOperands()))
-    if (auto block_arg = llvm::dyn_cast<BlockArgument>(operand_and_idx.value()))
-      if (block_arg->getOwner() == replicate_block)
-        remapped_indices[block_arg->getArgNumber()] = operand_and_idx.index();
+    if (auto block_arg = operand_and_idx.value().dyn_cast<BlockArgument>())
+      if (block_arg.getOwner() == replicate_block)
+        remapped_indices[block_arg.getArgNumber()] = operand_and_idx.index();
 
   return remapped_indices;
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc
index 28332503adc..dddf916089b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc
@@ -28,18 +28,18 @@ limitations under the License.
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Transforms/RegionUtils.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -92,15 +92,15 @@ struct VariableAccessInfo {
 // Information about all resource accesses to be fused into a TPUExecute op.
 struct VariableAccessesForTPUExecute {
   // Maps each resource detected to VariableAccessInfo.
-  llvm::SmallDenseMap<Value*, VariableAccessInfo, 8> per_resource_info;
+  llvm::SmallDenseMap<Value, VariableAccessInfo, 8> per_resource_info;
   // The corresponding new output index in TPUExecuteAndUpdateVariables for
   // each old output index in TPUExecute.
   llvm::SmallVector<int, 8> old_to_new_output_mapping;
   // The resources read by ReadVariableOps that are inputs to TPUExecute.
   // Ordered by the input indices to TPUExecute
-  llvm::SmallVector<Value*, 8> resources_read;
+  llvm::SmallVector<Value, 8> resources_read;
   // Operands for the new TPUExecuteAndUpdateVariables.
-  llvm::SmallVector<Value*, 8> new_operand_values;
+  llvm::SmallVector<Value, 8> new_operand_values;
 };
 
 // Returns if an op accesses a resource.
@@ -135,23 +135,23 @@ VariableAccessesForTPUExecute BuildVariableAccessInfo(Operation* execute,
   // Find inputs that are variable reads.
   for (auto operand : llvm::enumerate(execute->getOpOperands())) {
     infos.new_operand_values.push_back(operand.value().get());
-    if (!operand.value().get()->getDefiningOp()) continue;
+    if (!operand.value().get().getDefiningOp()) continue;
     auto read_op = llvm::dyn_cast<TF::ReadVariableOp>(
-        operand.value().get()->getDefiningOp());
+        operand.value().get().getDefiningOp());
     if (!read_op) continue;
     auto resource = read_op.resource();
 
     if (check_device) {
-      if (auto resource_op = resource->getDefiningOp()) {
+      if (auto resource_op = resource.getDefiningOp()) {
         auto resource_attr = resource_op->getAttr(kDeviceAttr);
         // Check device matching for the node defining the resource.
         if (!resource_attr || resource_attr != device_attr) continue;
       } else {
-        auto resource_arg = llvm::dyn_cast<BlockArgument>(resource);
+        auto resource_arg = resource.dyn_cast<BlockArgument>();
         assert(resource_arg);
         // Check device matching for the argument defining the resource.
         auto resource_attr = func.getArgAttrOfType<mlir::StringAttr>(
-            resource_arg->getArgNumber(), kFuncDeviceAttr);
+            resource_arg.getArgNumber(), kFuncDeviceAttr);
         if (!resource_attr || resource_attr != device_attr) continue;
       }
     }
@@ -206,7 +206,7 @@ VariableAccessesForTPUExecute BuildVariableAccessInfo(Operation* execute,
     }
     infos.resources_read.erase(
         llvm::remove_if(infos.resources_read,
-                        [&](const Value* resource) {
+                        [&](const Value resource) {
                           return infos.per_resource_info.count(resource) == 0;
                         }),
         infos.resources_read.end());
@@ -222,9 +222,8 @@ VariableAccessesForTPUExecute BuildVariableAccessInfo(Operation* execute,
   llvm::SmallVector<bool, 8> output_fused(execute->getNumResults(), false);
   for (int i = 0; i < execute->getNumResults(); ++i) {
     auto result = execute->getResult(i);
-    if (!result->hasOneUse()) continue;
-    auto assign_op =
-        llvm::dyn_cast<TF::AssignVariableOp>(*result->user_begin());
+    if (!result.hasOneUse()) continue;
+    auto assign_op = llvm::dyn_cast<TF::AssignVariableOp>(*result.user_begin());
     if (!assign_op) continue;
     auto resource = assign_op.resource();
     auto it = infos.per_resource_info.find(resource);
@@ -330,7 +329,7 @@ void MergeForOneTPUExecute(Operation* execute, bool check_device,
   // Replace the uses.
   for (int i = 0; i < infos.old_to_new_output_mapping.size(); ++i) {
     if (infos.old_to_new_output_mapping[i] < 0) continue;
-    execute->getResult(i)->replaceAllUsesWith(
+    execute->getResult(i).replaceAllUsesWith(
         merged_execute.getResult(infos.old_to_new_output_mapping[i]));
   }
   // Remove the assign ops.
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
index 1033670dd1c..355c0afa40b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
@@ -25,15 +25,15 @@ limitations under the License.
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
@@ -277,7 +277,7 @@ Operation* BuildCompileOp(tf_device::LaunchFuncOp launch_func, int num_replicas,
   // TODO(b/139377366): When shape inference is ready, we can use compile time
   // shape inference to get inputs that have static shapes and only use shape
   // ops for the rest.
-  llvm::SmallVector<Value*, 4> compile_op_operands;
+  llvm::SmallVector<Value, 4> compile_op_operands;
   compile_op_operands.reserve(launch_func.getNumOperands());
 
   for (auto operand_and_idx : llvm::enumerate(launch_func.getOperands())) {
@@ -332,7 +332,7 @@ Operation* BuildExecuteOp(Operation* compile_op,
                           OpBuilder* builder) {
   // TPUExecute inherits all launch_func inputs, and takes an additional input
   // for compilation cache key.
-  llvm::SmallVector<Value*, 4> tensor_inputs(launch_func.getOperands());
+  llvm::SmallVector<Value, 4> tensor_inputs(launch_func.getOperands());
   tensor_inputs.push_back(compile_op->getResult(1));
 
   // TODO(b/139377366): Need to snapshot all resource variable inputs in
@@ -457,7 +457,7 @@ LogicalResult Rewrite(
   // the other ops that are intended to consume the compile result.
   Block* block = launch_func.getOperation()->getBlock();
   for (auto compile_result_op : block->getOps<TF::TPUCompilationResultOp>())
-    compile_result_op.output()->replaceAllUsesWith(compile_op->getResult(0));
+    compile_result_op.output().replaceAllUsesWith(compile_op->getResult(0));
 
   BuildTPUCompileSucceededAssertOp(compile_op, builder);
 
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc b/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc
index 764c7915577..98a043219db 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc
@@ -19,12 +19,12 @@ limitations under the License.
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/STLExtras.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/STLExtras.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 
@@ -44,7 +44,7 @@ struct BreakUpIslands : OperationPass<BreakUpIslands, FuncOp> {
 
   void BreakUpIsland(tf_executor::IslandOp op,
                      const TF::SideEffectAnalysis& side_effect_analysis,
-                     llvm::DenseMap<Operation*, llvm::SmallVector<Value*, 4>>*
+                     llvm::DenseMap<Operation*, llvm::SmallVector<Value, 4>>*
                          new_control_edges);
 };
 
@@ -64,7 +64,7 @@ void BreakUpIslands::runOnOperation() {
 
   // Map from the users of the existing islands to the list of control
   // edges that need to be added.
-  llvm::DenseMap<Operation*, llvm::SmallVector<Value*, 4>> new_control_edges;
+  llvm::DenseMap<Operation*, llvm::SmallVector<Value, 4>> new_control_edges;
   auto& side_effect_analysis = getAnalysis<TF::SideEffectAnalysis>();
   // Iterate in reverse order to avoid invalidating Operation* stored in
   // new_control_edges.
@@ -78,7 +78,7 @@ void BreakUpIslands::runOnOperation() {
 
   // Apply edge additions in reverse order so that the ops don't get
   // invalidated.
-  llvm::SmallVector<Value*, 8> edges;
+  llvm::SmallVector<Value, 8> edges;
   llvm::SmallPtrSet<Operation*, 4> dups;
   llvm::SmallVector<Type, 4> types;
   for (auto& item :
@@ -96,12 +96,12 @@ void BreakUpIslands::runOnOperation() {
     edges.assign(item.operand_begin(), item.operand_end());
     dups.clear();
 
-    for (Value* input : edges) {
-      dups.insert(input->getDefiningOp());
+    for (Value input : edges) {
+      dups.insert(input.getDefiningOp());
     }
     // Insert new control edges removing duplicates.
-    for (Value* value : llvm::reverse(edge.second)) {
-      if (dups.insert(value->getDefiningOp()).second) edges.push_back(value);
+    for (Value value : llvm::reverse(edge.second)) {
+      if (dups.insert(value.getDefiningOp()).second) edges.push_back(value);
     }
     state.addOperands(edges);
     Operation* new_op = builder.createOperation(state);
@@ -114,7 +114,7 @@ void BreakUpIslands::runOnOperation() {
 // Helper that creates an island. If `sub_op` is not nullptr, it will be moved
 // to the island.
 tf_executor::IslandOp CreateIsland(ArrayRef<Type> result_types,
-                                   ArrayRef<Value*> control_inputs,
+                                   ArrayRef<Value> control_inputs,
                                    const tf_executor::ControlType& control_type,
                                    const Location& loc, Operation* sub_op,
                                    tf_executor::IslandOp original_island) {
@@ -132,7 +132,7 @@ tf_executor::IslandOp CreateIsland(ArrayRef<Type> result_types,
   if (sub_op) {
     island_builder.create<tf_executor::YieldOp>(loc, sub_op->getResults());
   } else {
-    island_builder.create<tf_executor::YieldOp>(loc, ArrayRef<Value*>{});
+    island_builder.create<tf_executor::YieldOp>(loc, ArrayRef<Value>{});
   }
   return island;
 }
@@ -160,7 +160,7 @@ IslandSourcesAndSinks FindSourcesAndSinksInIsland(
     for (auto predecessor : predecessors) result.sinks.erase(predecessor);
     bool has_in_island_operands = false;
     for (auto operand : sub_op.getOperands()) {
-      auto defining_op = operand->getDefiningOp();
+      auto defining_op = operand.getDefiningOp();
       if (!defining_op || defining_op->getParentOp() != island) continue;
       // Remove operands from sinks.
       result.sinks.erase(defining_op);
@@ -178,7 +178,7 @@ IslandSourcesAndSinks FindSourcesAndSinksInIsland(
 void BreakUpIslands::BreakUpIsland(
     tf_executor::IslandOp op,
     const TF::SideEffectAnalysis& side_effect_analysis,
-    llvm::DenseMap<Operation*, llvm::SmallVector<Value*, 4>>*
+    llvm::DenseMap<Operation*, llvm::SmallVector<Value, 4>>*
         new_control_edges) {
   auto island_body = op.GetBody().without_terminator();
   // Skip islands that are already only a single op.
@@ -188,18 +188,18 @@ void BreakUpIslands::BreakUpIsland(
   auto island_control_inputs = llvm::to_vector<4>(op.controlInputs());
   // Add control dependencies for yields of values defined by other islands to
   // the island that defines that fetched value.
-  for (auto* fetch : op.GetYield().fetches()) {
+  for (auto fetch : op.GetYield().fetches()) {
     // Ok, because there is no op to add control to (eg: function args).
-    if (!fetch->getDefiningOp()) continue;
-    if (fetch->getDefiningOp()->getParentOp() == op) {
+    if (!fetch.getDefiningOp()) continue;
+    if (fetch.getDefiningOp()->getParentOp() == op) {
       // OK, because it is the same island.
     } else if (auto island_op = llvm::dyn_cast<tf_executor::IslandOp>(
-                   fetch->getDefiningOp())) {
+                   fetch.getDefiningOp())) {
       island_control_inputs.push_back(island_op.control());
     } else {
       // TODO(parkers): Any defining op that has a control output can be handled
       // just like an island.
-      fetch->getDefiningOp()->emitError("Fetching non-island as dependency.");
+      fetch.getDefiningOp()->emitError("Fetching non-island as dependency.");
       return signalPassFailure();
     }
   }
@@ -214,9 +214,9 @@ void BreakUpIslands::BreakUpIsland(
   auto sources_and_sinks =
       FindSourcesAndSinksInIsland(op, side_effect_analysis);
   // The corresponding control output of the new island created for each sub-op.
-  llvm::SmallDenseMap<Operation*, Value*, 8> new_control_for_sub_ops;
+  llvm::SmallDenseMap<Operation*, Value, 8> new_control_for_sub_ops;
   // Control outputs of newly created islands that are sinks.
-  llvm::SmallVector<Value*, 8> sink_island_controls;
+  llvm::SmallVector<Value, 8> sink_island_controls;
   // For each operation in the island, construct a new island to wrap the op,
   // yield all the results, and replace all the usages with the results of the
   // new island.
@@ -224,7 +224,7 @@ void BreakUpIslands::BreakUpIsland(
     const auto predecessors =
         side_effect_analysis.DirectControlPredecessors(&sub_op);
     // Get the controls from the predecessors.
-    llvm::SmallVector<Value*, 4> predecessors_control;
+    llvm::SmallVector<Value, 4> predecessors_control;
     predecessors_control.reserve(predecessors.size());
     for (auto predecessor : predecessors) {
       predecessors_control.push_back(new_control_for_sub_ops[predecessor]);
@@ -233,9 +233,9 @@ void BreakUpIslands::BreakUpIsland(
     // by inter-islands dependencies; otherwise, we do not need to include
     // island_control_inputs, since they must have been tracked by the (direct
     // or indirect) control predecessors or operands.
-    ArrayRef<Value*> control = sources_and_sinks.sources.count(&sub_op) > 0
-                                   ? island_control_inputs
-                                   : predecessors_control;
+    ArrayRef<Value> control = sources_and_sinks.sources.count(&sub_op) > 0
+                                  ? island_control_inputs
+                                  : predecessors_control;
     auto island =
         CreateIsland(llvm::to_vector<4>(sub_op.getResultTypes()), control,
                      control_type, sub_op.getLoc(), &sub_op, op);
@@ -255,11 +255,11 @@ void BreakUpIslands::BreakUpIsland(
     sink_island_controls.push_back(island.control());
   }
   assert(sink_island_controls.size() == 1);
-  op.control()->replaceAllUsesWith(sink_island_controls[0]);
+  op.control().replaceAllUsesWith(sink_island_controls[0]);
   // All existing outputs need to add a control flow edge from
   // sink_island_controls[0].
-  for (Value* out : op.outputs()) {
-    for (auto& use : out->getUses()) {
+  for (Value out : op.outputs()) {
+    for (auto& use : out.getUses()) {
       Operation* owner = use.getOwner();
       if (auto island_op =
               llvm::dyn_cast<tf_executor::IslandOp>(owner->getParentOp())) {
@@ -275,7 +275,7 @@ void BreakUpIslands::BreakUpIsland(
     }
   }
   for (auto item : llvm::zip(op.outputs(), op.GetYield().fetches()))
-    std::get<0>(item)->replaceAllUsesWith(std::get<1>(item));
+    std::get<0>(item).replaceAllUsesWith(std::get<1>(item));
   op.erase();
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/control_to_executor_dialect.cc b/tensorflow/compiler/mlir/tensorflow/translate/control_to_executor_dialect.cc
index 29979c02116..696891289ca 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/control_to_executor_dialect.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/control_to_executor_dialect.cc
@@ -22,13 +22,13 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/Support/Debug.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -68,9 +68,9 @@ static bool HasOnlyTFControlOperations(FuncOp function) {
 tf_executor::IslandOp ControlToExecutorDialectConversion::CreateIslandForOp(
     Operation *op, OpBuilder *builder) {
   // Create a new region for the tf_executor.island body
-  SmallVector<Value *, 8> operands;
-  for (Value *operand : op->getOperands())
-    if (operand->getType().isa<tf_executor::ControlType>())
+  SmallVector<Value, 8> operands;
+  for (Value operand : op->getOperands())
+    if (operand.getType().isa<tf_executor::ControlType>())
       operands.push_back(operand);
   SmallVector<Type, 8> types;
   for (Type result_type : op->getResultTypes())
@@ -118,8 +118,8 @@ void ControlToExecutorDialectConversion::runOnFunction() {
       // This is the return of the function, we will create a fetch in the graph
       // matching the operands of the returns. The return is then updated to
       // take as operands the results of the tf_executor.graph operation.
-      SmallVector<Value *, 8> ret_vals;
-      for (Value *operand : op.getOperands()) ret_vals.push_back(operand);
+      SmallVector<Value, 8> ret_vals;
+      for (Value operand : op.getOperands()) ret_vals.push_back(operand);
       for (auto &graph_result : llvm::enumerate(graph_op.getResults()))
         op.setOperand(graph_result.index(), graph_result.value());
       builder.create<tf_executor::FetchOp>(getFunction().getLoc(), ret_vals);
@@ -128,7 +128,7 @@ void ControlToExecutorDialectConversion::runOnFunction() {
     assert(IsUnderscoredTFOp(&op) && "Expected only _tf operations");
 
     // The operands and types arrays are used to create the tf_executor ops.
-    SmallVector<Value *, 8> operands;
+    SmallVector<Value, 8> operands;
     operands.append(op.getOperands().begin(), op.getOperands().end());
     SmallVector<Type, 8> types;
     for (Type result_type : op.getResultTypes()) {
@@ -155,7 +155,7 @@ void ControlToExecutorDialectConversion::runOnFunction() {
           loc, types, operands, ArrayRef<NamedAttribute>{});
     } else if (op.getName().getStringRef() == "_tf.NextIteration.source") {
       replacement = builder.create<tf_executor::NextIterationSourceOp>(
-          loc, op.getResult(0)->getType());
+          loc, op.getResult(0).getType());
       // Record a mapping of the name to the nextiteration.source so that when
       // we convert the sink we can get the token.
       StringAttr frame = op.getAttrOfType<StringAttr>("name");
@@ -164,9 +164,9 @@ void ControlToExecutorDialectConversion::runOnFunction() {
           cast<tf_executor::NextIterationSourceOp>(replacement);
       // Replace the results here since the _tf source does not produce a token
       // there isn't a mapping for the new result #1.
-      op.getResult(0)->replaceAllUsesWith(replacement->getResult(0));
+      op.getResult(0).replaceAllUsesWith(replacement->getResult(0));
       for (int i : llvm::seq<int>(1, op.getNumResults()))
-        op.getResult(i)->replaceAllUsesWith(replacement->getResult(i + 1));
+        op.getResult(i).replaceAllUsesWith(replacement->getResult(i + 1));
       replacement->setAttrs(op.getAttrList());
       op.erase();
       continue;
@@ -201,8 +201,8 @@ void ControlToExecutorDialectConversion::runOnFunction() {
 
       // Only the non-control operands are carried over, the island is handling
       // the control input.
-      for (Value *operand : op.getOperands())
-        if (!operand->getType().isa<tf_executor::ControlType>())
+      for (Value operand : op.getOperands())
+        if (!operand.getType().isa<tf_executor::ControlType>())
           result.operands.push_back(operand);
 
       // Add a result type for each non-control result we find
@@ -223,7 +223,7 @@ void ControlToExecutorDialectConversion::runOnFunction() {
       inner_op->setAttrs(op.getAttrList());
 
       // Add the terminator for the island
-      SmallVector<Value *, 8> ret_vals(inner_op->getResults());
+      SmallVector<Value, 8> ret_vals(inner_op->getResults());
       island_builder.create<tf_executor::YieldOp>(loc, ret_vals);
     }
 
@@ -232,7 +232,7 @@ void ControlToExecutorDialectConversion::runOnFunction() {
     if (!isa<tf_executor::IslandOp>(replacement))
       replacement->setAttrs(op.getAttrList());
     for (int i : llvm::seq<int>(0, op.getNumResults()))
-      op.getResult(i)->replaceAllUsesWith(replacement->getResult(i));
+      op.getResult(i).replaceAllUsesWith(replacement->getResult(i));
     op.erase();
   }
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/derived_attr_populator_gen.cc b/tensorflow/compiler/mlir/tensorflow/translate/derived_attr_populator_gen.cc
index 222463e1d29..be146ab63a0 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/derived_attr_populator_gen.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/derived_attr_populator_gen.cc
@@ -23,7 +23,7 @@ limitations under the License.
 #include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include "mlir/TableGen/Operator.h"  // TF:local_config_mlir
+#include "mlir/TableGen/Operator.h"  // TF:llvm-project
 
 using llvm::LessRecord;
 using llvm::raw_ostream;
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
index 8a4f8aacc0d..96a7fcbb5ba 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
@@ -21,13 +21,13 @@ limitations under the License.
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
@@ -42,54 +42,6 @@ struct ExecutorToControlDialectConversion
     : public FunctionPass<ExecutorToControlDialectConversion> {
   void runOnFunction() override;
 };
-
-// Replace all uses of value `v` with a list of new values. Because number of
-// new values might be greater than 1, users of `v` might be replaced with their
-// clones in case of non-resizable operands list.
-void ReplaceAllUsesOfValueWithValues(Value *v,
-                                     Operation::operand_range new_values) {
-  int new_values_size = std::distance(new_values.begin(), new_values.end());
-  if (new_values_size == 1) {
-    v->replaceAllUsesWith(*new_values.begin());
-    return;
-  }
-
-  OpBuilder builder(v->getContext());
-  for (Operation *user : llvm::make_early_inc_range(v->getUsers())) {
-    builder.setInsertionPoint(user);
-
-    llvm::SmallVector<Value *, 4> new_operands;
-    new_operands.reserve(user->getNumOperands() - 1 + new_values_size);
-    for (Value *operand : user->getOperands()) {
-      if (operand == v) {
-        new_operands.append(new_values.begin(), new_values.end());
-      } else {
-        new_operands.push_back(operand);
-      }
-    }
-
-    if (user->hasResizableOperandsList()) {
-      user->setOperands(new_operands);
-      continue;
-    }
-
-    OperationState state(user->getLoc(), user->getName().getStringRef());
-    state.addOperands(new_operands);
-
-    llvm::SmallVector<Type, 4> result_types(user->getResultTypes());
-    state.addTypes(result_types);
-
-    state.addAttributes(user->getAttrs());
-    for (auto &old_region : user->getRegions()) {
-      Region *r = state.addRegion();
-      r->takeBody(old_region);
-    }
-    Operation *replacement = builder.createOperation(state);
-    user->replaceAllUsesWith(replacement);
-    user->erase();
-  }
-}
-
 }  // end anonymous namespace
 
 static bool HasSingleGraph(FuncOp function) {
@@ -127,7 +79,7 @@ void ExecutorToControlDialectConversion::runOnFunction() {
       for (auto ops_and_ret_vals :
            llvm::zip(graph.getResults(), fetch.getOperands()))
         std::get<0>(ops_and_ret_vals)
-            ->replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
+            .replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
       op.erase();
       continue;
     }
@@ -135,7 +87,18 @@ void ExecutorToControlDialectConversion::runOnFunction() {
     builder.setInsertionPoint(&op);
 
     if (auto island = dyn_cast<tf_executor::IslandOp>(op)) {
-      Value *ctl_sequence = nullptr;
+      Value ctl_sequence = nullptr;
+      if (island.GetBody().without_terminator().empty() &&
+          island.getNumOperands() > 1) {
+        // For an empty island with multiple control inputs, we create a no-op
+        // inside it which will group all the inputs into one control output.
+        // This helps reducing the number of edges when there are multiple
+        // islands depending on this one.
+        builder.setInsertionPointToStart(&island.GetBody());
+        builder.create<TF::NoOp>(op.getLoc(), ArrayRef<Type>{},
+                                 ArrayRef<Value>{}, ArrayRef<NamedAttribute>{});
+        builder.setInsertionPoint(&op);
+      }
       for (Operation &wrapped_op : island.GetBody()) {
         LLVM_DEBUG(llvm::dbgs()
                    << " In island: " << wrapped_op.getName() << "\n");
@@ -143,7 +106,7 @@ void ExecutorToControlDialectConversion::runOnFunction() {
           for (auto ops_and_ret_vals :
                llvm::zip(island.getResults(), wrapped_op.getOperands()))
             std::get<0>(ops_and_ret_vals)
-                ->replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
+                .replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
           break;
         }
         // Add a leading _ off the name.
@@ -162,7 +125,7 @@ void ExecutorToControlDialectConversion::runOnFunction() {
         if (ctl_sequence) {
           state.operands.push_back(ctl_sequence);
         } else {
-          for (Value *ctl_operand : island.getOperands())
+          for (Value ctl_operand : island.getOperands())
             state.operands.push_back(ctl_operand);
         }
 
@@ -178,7 +141,7 @@ void ExecutorToControlDialectConversion::runOnFunction() {
         for (auto ops_and_ret_vals :
              llvm::zip(wrapped_op.getResults(), replacement->getResults()))
           std::get<0>(ops_and_ret_vals)
-              ->replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
+              .replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
 
         ctl_sequence = replacement->getResult(replacement->getNumResults() - 1);
       }
@@ -188,12 +151,13 @@ void ExecutorToControlDialectConversion::runOnFunction() {
         // been rewritten from ops in island. Last op rewritten must logically
         // carry // all the island control inputs, we can simply use it to
         // replace all uses of island's control output.
-        island.control()->replaceAllUsesWith(ctl_sequence);
-      } else {
-        // Getting here means island had an effectively empty body. In this
-        // case, island's control output should be replaced with all the control
-        // inputs of island.
-        ReplaceAllUsesOfValueWithValues(island.control(), island.getOperands());
+        island.control().replaceAllUsesWith(ctl_sequence);
+      } else if (island.getNumOperands() > 0) {
+        // Getting here means island had an effectively empty body and there is
+        // just one control input. In this case, island's control output should
+        // be replaced with the control input.
+        assert(island.getNumOperands() == 1);
+        island.control().replaceAllUsesWith(island.getOperand(0));
       }
 
       op.erase();
@@ -228,7 +192,7 @@ void ExecutorToControlDialectConversion::runOnFunction() {
     // dialect.
     auto non_null_operands = llvm::make_filter_range(
         op.getOperands(),
-        [](Value *v) { return !v->getType().isa<tf_executor::TokenType>(); });
+        [](Value v) { return !v.getType().isa<tf_executor::TokenType>(); });
     state.operands.append(non_null_operands.begin(), non_null_operands.end());
     for (Type result_type : op.getResultTypes()) {
       // Filter out TokenType, they don't exist in the control dialect.
@@ -248,14 +212,14 @@ void ExecutorToControlDialectConversion::runOnFunction() {
 
     if (auto next_iteration =
             dyn_cast<tf_executor::NextIterationSourceOp>(op)) {
-      next_iteration.output()->replaceAllUsesWith(replacement->getResult(0));
-      next_iteration.token()->dropAllUses();
-      next_iteration.control()->replaceAllUsesWith(replacement->getResult(1));
+      next_iteration.output().replaceAllUsesWith(replacement->getResult(0));
+      next_iteration.token().dropAllUses();
+      next_iteration.control().replaceAllUsesWith(replacement->getResult(1));
     } else {
       for (auto ops_and_ret_vals :
            llvm::zip(op.getResults(), replacement->getResults()))
         std::get<0>(ops_and_ret_vals)
-            ->replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
+            .replaceAllUsesWith(std::get<1>(ops_and_ret_vals));
     }
     op.erase();
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
index 9d572209b31..39698c0f96b 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
@@ -27,18 +27,19 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Support/DebugStringHelper.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Support/DebugStringHelper.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h"
@@ -110,25 +111,28 @@ std::string LegalizeNodeName(llvm::StringRef name) {
   return legalized_name;
 }
 
-// TODO(jpienaar): unify and move from here to be able to reuse with tflite
-std::string GetName(Operation* inst) {
-  // TODO(prakalps): b/137006652 prevents us from using location info (derived
-  // from experimental_debug_info) to generate node names. Until it is fixed,
-  // first check for "name" attribute to get node name.
-
-  // Default name is Operation type.
-  auto name = inst->getName().getStringRef();
-  if (auto attr = inst->getAttrOfType<mlir::StringAttr>("name")) {
-    name = attr.getValue();
-  } else if (auto name_loc = inst->getLoc().dyn_cast<mlir::NameLoc>()) {
-    name = name_loc.getName().strref();
-  } else if (auto call_loc = inst->getLoc().dyn_cast<mlir::CallSiteLoc>()) {
+llvm::StringRef GetNameFromLoc(mlir::Location loc,
+                               llvm::StringRef default_name) {
+  if (auto name_loc = loc.dyn_cast<mlir::NameLoc>()) {
+    return name_loc.getName().strref().split('@').first;
+  } else if (auto call_loc = loc.dyn_cast<mlir::CallSiteLoc>()) {
     // Return name if CallSiteLoc's callee has a NameLoc (as should be the case
     // if imported with DebugInfo), else use the fallback naming scheme below.
     if (auto name_loc = call_loc.getCallee().dyn_cast<mlir::NameLoc>())
-      name = name_loc.getName().strref();
+      return name_loc.getName().strref().split('@').first;
+  } else if (auto fused_loc = loc.dyn_cast<mlir::FusedLoc>()) {
+    // According to the importer, the last location of a fused location is
+    // the name from the node_def and the rests are from the experimental debug
+    // info.
+    return GetNameFromLoc(fused_loc.getLocations().back(), default_name);
   }
+  return default_name;
+}
 
+// TODO(jpienaar): unify and move from here to be able to reuse with tflite
+std::string GetName(Operation* inst) {
+  // Default name is Operation type.
+  auto name = GetNameFromLoc(inst->getLoc(), inst->getName().getStringRef());
   return LegalizeNodeName(name);
 }
 
@@ -161,7 +165,7 @@ class Exporter {
   explicit Exporter(Graph* graph, const Dialect* tf_dialect)
       : graph_(graph), tf_dialect_(tf_dialect) {}
 
-  Status AddArgumentNode(BlockArgument* arg, unsigned index,
+  Status AddArgumentNode(BlockArgument arg, unsigned index,
                          llvm::StringRef name);
   Status AddReturnNode(mlir::ReturnOp op,
                        llvm::ArrayRef<llvm::StringRef> names);
@@ -169,7 +173,7 @@ class Exporter {
   Status AddNextIterationNode(Operation* inst);
   Status AddEdge(Operation* inst);
 
-  StatusOr<std::unique_ptr<NodeDef>> GetArgumentNode(BlockArgument* arg,
+  StatusOr<std::unique_ptr<NodeDef>> GetArgumentNode(BlockArgument arg,
                                                      unsigned index,
                                                      llvm::StringRef name);
   StatusOr<std::unique_ptr<NodeDef>> GetReturnNode(Operation* inst,
@@ -177,7 +181,7 @@ class Exporter {
                                                    llvm::StringRef name);
   // Adds one edge between src_node and dst_node. If it is not a control edge,
   // an index is used to find out the right operand of the dst_node.
-  Status AddEdgeBetweenNodes(Value* src, Node* dst_node, unsigned dst_index);
+  Status AddEdgeBetweenNodes(Value src, Node* dst_node, unsigned dst_index);
 
   // Returns a unique name for `op`.
   std::string UniqueName(Operation* op);
@@ -189,7 +193,7 @@ class Exporter {
   absl::flat_hash_map<Operation*, string> op_to_name_;
   absl::flat_hash_map<string, int64> name_to_count_;
   absl::flat_hash_map<Operation*, Node*> nodes_;
-  absl::flat_hash_map<const BlockArgument*, Node*> args_;
+  llvm::DenseMap<BlockArgument, Node*> args_;
   // One single return operation can return multiple results, and each of them
   // will be converted to one node in the graph.
   typedef absl::InlinedVector<Node*, 4> NodeVector;
@@ -231,8 +235,8 @@ std::string Exporter::UniqueName(Operation* op) {
 }
 
 StatusOr<std::unique_ptr<NodeDef>> Exporter::GetArgumentNode(
-    BlockArgument* arg, unsigned index, llvm::StringRef name) {
-  auto func = arg->getParentRegion()->getParentOfType<mlir::FuncOp>();
+    BlockArgument arg, unsigned index, llvm::StringRef name) {
+  auto func = arg.getParentRegion()->getParentOfType<mlir::FuncOp>();
 
   auto node_def = absl::make_unique<NodeDef>();
   if (!name.empty())
@@ -244,7 +248,7 @@ StatusOr<std::unique_ptr<NodeDef>> Exporter::GetArgumentNode(
 
   DataType dtype;
   TF_RETURN_IF_ERROR(ConvertToDataType(
-      arg->getType().cast<mlir::TensorType>().getElementType(), &dtype));
+      arg.getType().cast<mlir::TensorType>().getElementType(), &dtype));
   AttrValue type_attr;
   type_attr.set_type(dtype);
   (*node_def->mutable_attr())["T"] = type_attr;
@@ -279,10 +283,10 @@ StatusOr<std::unique_ptr<NodeDef>> Exporter::GetReturnNode(
         UniqueName(inst->getParentOfType<mlir::FuncOp>().getName().str()));
 
   node_def->set_op(FunctionLibraryDefinition::kRetOp);
-  auto* inst_op = inst->getOperand(index);
+  auto inst_op = inst->getOperand(index);
   DataType dtype;
   TF_RETURN_IF_ERROR(ConvertToDataType(
-      inst_op->getType().cast<mlir::TensorType>().getElementType(), &dtype));
+      inst_op.getType().cast<mlir::TensorType>().getElementType(), &dtype));
   AttrValue type_attr;
   type_attr.set_type(dtype);
   (*node_def->mutable_attr())["T"] = type_attr;
@@ -292,10 +296,10 @@ StatusOr<std::unique_ptr<NodeDef>> Exporter::GetReturnNode(
   return node_def;
 }
 
-Status Exporter::AddEdgeBetweenNodes(Value* src, Node* dst_node,
+Status Exporter::AddEdgeBetweenNodes(Value src, Node* dst_node,
                                      unsigned dst_index) {
-  if (auto* input_result = dyn_cast<mlir::OpResult>(src)) {
-    auto* input_inst = input_result->getOwner();
+  if (auto input_result = src.dyn_cast<mlir::OpResult>()) {
+    auto* input_inst = input_result.getOwner();
     // replaces the input node by the sink one if it is an NextIteration source:
     auto it = source_to_sink_.find(input_inst);
     if (it != source_to_sink_.end()) {
@@ -304,16 +308,16 @@ Status Exporter::AddEdgeBetweenNodes(Value* src, Node* dst_node,
     auto node_it = nodes_.find(input_inst);
     TF_RET_CHECK(node_it != nodes_.end())
         << "Use of OpResult encountered before def!";
-    if (input_result->getType().isa<mlir::TFControlFlow::TFControlType>()) {
+    if (input_result.getType().isa<mlir::TFControlFlow::TFControlType>()) {
       graph_->AddControlEdge(node_it->second, dst_node);
     } else {
-      graph_->AddEdge(node_it->second, input_result->getResultNumber(),
-                      dst_node, dst_index);
+      graph_->AddEdge(node_it->second, input_result.getResultNumber(), dst_node,
+                      dst_index);
     }
     return Status::OK();
   }
 
-  auto* input_arg = cast<BlockArgument>(src);
+  auto input_arg = src.cast<BlockArgument>();
   auto input_node_it = args_.find(input_arg);
   TF_RET_CHECK(input_node_it != args_.end())
       << "Use of BlockArgument encounted before def!";
@@ -326,7 +330,7 @@ Status Exporter::AddEdge(Operation* inst) {
   auto* dst_node = nodes_[inst];
   bool is_return_op = isa<mlir::ReturnOp>(inst);
   for (int index = 0, e = inst->getNumOperands(); index < e; index++) {
-    auto* src = inst->getOperand(index);
+    auto src = inst->getOperand(index);
     // For return operation, the edge is from the operand owner to one of the
     // faked return nodes. The input index is always 0 for the return node.
     if (is_return_op) {
@@ -361,14 +365,14 @@ Status Exporter::AddInstructionNode(Operation* inst) {
   return Status::OK();
 }
 
-bool IsEntryFunctionArg(BlockArgument* arg) {
-  return arg->getParentRegion()->getParentOfType<mlir::FuncOp>().getName() ==
+bool IsEntryFunctionArg(BlockArgument arg) {
+  return arg.getParentRegion()->getParentOfType<mlir::FuncOp>().getName() ==
          "main";
 }
 
 // Creates argument nodes from Block argument. If a name is supplied, that
 // name will be used instead of generating a unique name.
-Status Exporter::AddArgumentNode(BlockArgument* arg, unsigned index,
+Status Exporter::AddArgumentNode(BlockArgument arg, unsigned index,
                                  llvm::StringRef name) {
   if (!IsEntryFunctionArg(arg) || !name.empty()) {
     TF_ASSIGN_OR_RETURN(auto node_def, GetArgumentNode(arg, index, name));
@@ -383,21 +387,21 @@ Status Exporter::AddArgumentNode(BlockArgument* arg, unsigned index,
   // is an input node. We recover the original input node and skip adding the
   // argument node. The new input node will be handled as normal in the
   // following steps.
-  if (!arg->hasOneUse()) {
+  if (!arg.hasOneUse()) {
     return errors::FailedPrecondition(
         "Arg in 'main' should only have one user.");
   }
-  auto* input = *arg->user_begin();
+  auto* input = *arg.user_begin();
   auto input_name = input->getName().getStringRef();
   input_name.consume_back(".input");
-  mlir::OpBuilder builder(arg->getOwner());
+  mlir::OpBuilder builder(arg.getOwner());
   auto loc = mlir::NameLoc::get(builder.getIdentifier(UniqueName(input)),
                                 builder.getContext());
   OperationState state(loc, input_name.str());
   state.attributes.append(input->getAttrs().begin(), input->getAttrs().end());
-  for (auto* op : input->getOperands()) {
+  for (auto op : input->getOperands()) {
     // Skip the argument in the new operation.
-    if (llvm::isa<BlockArgument>(op)) continue;
+    if (op.isa<BlockArgument>()) continue;
     state.operands.push_back(op);
   }
   state.types.append(input->getResultTypes().begin(),
@@ -405,9 +409,17 @@ Status Exporter::AddArgumentNode(BlockArgument* arg, unsigned index,
   auto* inst = builder.createOperation(state);
   // If it is one of the specified input names, then the new
   // instruction should have the same name.
-  op_to_name_[inst].assign(op_to_name_[input]);
+  auto& mapped_name = op_to_name_[inst];
+  const auto& input_mapped_name = op_to_name_[input];
+  DCHECK(mapped_name.empty())
+      << "AddArgumentNode() attempted to change the op_to_name_ mapping for "
+      << inst << " from " << mapped_name << " to " << input_mapped_name << ".";
+  DCHECK(!input_mapped_name.empty())
+      << "AddArgumentNode() attempted to set the op_to_name_ mapping for "
+      << inst << " to an empty string.";
+  mapped_name.assign(input_mapped_name);
   for (int index : llvm::seq<int>(0, input->getNumResults())) {
-    input->getResult(index)->replaceAllUsesWith(inst->getResult(index));
+    input->getResult(index).replaceAllUsesWith(inst->getResult(index));
   }
   input->dropAllReferences();
   input->erase();
@@ -511,9 +523,15 @@ StatusOr<std::unique_ptr<Graph>> Exporter::Convert(
       // Only assign defining op of operands of the return the output names if
       // the main graph did not have its _Retval nodes lifted into the functions
       // returns.
-      if (!graph_as_function)
-        exporter.op_to_name_[it.value()->getDefiningOp()] =
-            output_names[it.index()];
+      if (!graph_as_function) {
+        auto defining_op = it.value().getDefiningOp();
+        auto& mapped_name = exporter.op_to_name_[defining_op];
+        DCHECK(mapped_name.empty())
+            << "Convert() attempted to change the op_to_name_ mapping for "
+            << defining_op << " from " << mapped_name << " to output "
+            << it.index() << " name " << output_names[it.index()].str() << ".";
+        mapped_name = output_names[it.index()];
+      }
     }
   }
   if (!input_names.empty()) {
@@ -522,17 +540,23 @@ StatusOr<std::unique_ptr<Graph>> Exporter::Convert(
       exporter.name_to_count_[input_names[it.index()].str()] = 1;
       // Only assign user of argument the input name if the main graph did not
       // have its _Arg nodes lifted into the functions arguments.
-      if (!graph_as_function)
-        exporter.op_to_name_[*it.value()->user_begin()] =
-            input_names[it.index()];
+      if (!graph_as_function) {
+        auto first_user = *it.value().user_begin();
+        auto& mapped_name = exporter.op_to_name_[first_user];
+        DCHECK(mapped_name.empty())
+            << "Convert() attempted to change the op_to_name_ mapping for "
+            << first_user << " from " << mapped_name << " to input "
+            << it.index() << " name " << input_names[it.index()].str() << ".";
+        mapped_name = input_names[it.index()];
+      }
     }
   }
 
   // Adds nodes for basic block (function) arguments.
   for (auto it : llvm::enumerate(block.getArguments())) {
     int index = it.index();
-    auto* arg = it.value();
-    mlir::Type type = arg->getType();
+    auto arg = it.value();
+    mlir::Type type = arg.getType();
     if (!type.isa<mlir::TensorType>()) {
       return errors::InvalidArgument(
           "FuncOps arguments must have tensor types. Found ",
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h
index ab9b9731ab4..71ef3c8c493 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h
+++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h
@@ -17,9 +17,9 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_EXPORT_GRAPHDEF_H_
 
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph.pb.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.cc
index adb5ba2b569..8cc12869704 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.cc
@@ -83,11 +83,10 @@ Status GetUnregisteredAttrs(
   TF_ASSIGN_OR_RETURN(auto op_name,
                       GetTensorFlowOpName(inst->getName().getStringRef()));
 
-  const tensorflow::OpRegistrationData* op_reg_data;
-  auto status = tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data);
-  if (!status.ok()) {
+  const tensorflow::OpRegistrationData* op_reg_data =
+      tensorflow::OpRegistry::Global()->LookUp(op_name);
+  if (!op_reg_data) {
     // This is likely a function call node, so we should continue.
-    VLOG(1) << status.ToString();
     return Status::OK();
   }
 
@@ -132,8 +131,8 @@ StatusOr<std::unique_ptr<NodeDef>> ConvertTFDialectOpToNodeDef(
   if (inst->getDialect() && inst->getDialect()->getNamespace() == "_tf") {
     mlir::OperationState result(inst->getLoc(),
                                 inst->getName().getStringRef().drop_front());
-    for (mlir::Value* operand : inst->getOperands())
-      if (!operand->getType().isa<mlir::TFControlFlow::TFControlType>())
+    for (mlir::Value operand : inst->getOperands())
+      if (!operand.getType().isa<mlir::TFControlFlow::TFControlType>())
         result.operands.push_back(operand);
 
     // Add a result type for each non-control result we find
@@ -161,6 +160,13 @@ StatusOr<std::unique_ptr<NodeDef>> ConvertTFDialectOpToNodeDef(
     TF_RETURN_IF_ERROR(GetUnregisteredAttrs(inst, &attrs_to_ignore));
   }
 
+  if (inst->hasTrait<mlir::OpTrait::AttrSizedResultSegments>()) {
+    // TODO(b/146937733): Don't use <void> here.
+    llvm::StringRef attr_name = mlir::OpTrait::AttrSizedResultSegments<
+        void>::getResultSegmentSizeAttr();
+    attrs_to_ignore.insert(attr_name.data());
+  }
+
   TF_ASSIGN_OR_RETURN(auto node_def,
                       GetOperationNodeDef(attrs_to_ignore, inst, name));
 
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h b/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h
index 1e18a2d5d3b..df1f4859ded 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h
+++ b/tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_EXPORT_TF_DIALECT_OP_H_
 
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
index 868faed9b0b..0f258495f47 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
@@ -35,20 +35,22 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Analysis/Verifier.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/Analysis/Verifier.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/compiler/jit/shape_inference_helpers.h"
 #include "tensorflow/compiler/mlir/op_or_arg_name_mapper.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
@@ -70,6 +72,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/resource_var.h"
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/types.h"
@@ -80,6 +83,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/graph/tensor_id.h"
+#include "tensorflow/core/grappler/utils/transitive_fanin.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -264,7 +268,7 @@ class ImporterBase {
   mlir::Operation* createOperation(
       const Node& node, llvm::StringRef node_type_name,
       const mlir::OperationState& result,
-      const llvm::SmallVectorImpl<mlir::Value*>& control_operands,
+      const llvm::SmallVectorImpl<mlir::Value>& control_operands,
       bool convert_to_legacy_call = false);
 
   // Converts one NodeDef from the input GraphDef into an Operation and
@@ -421,7 +425,6 @@ Status UpdateLegacyFedInputNode(const GraphDef& graph_def,
 // - Replacing LegacyFedInput nodes with Placeholder nodes if
 //   convert_legacy_fed_inputs option is enabled.
 Status PreprocessGraphDef(const GraphImportConfig* specs, GraphDef* graph_def) {
-  const tensorflow::OpRegistrationData* op_reg_data;
   for (auto& node_def : *graph_def->mutable_node()) {
     // TODO(hinsu): Completely deprecate support for LegacyFedInput ops. One
     // solution could be have a tool to let users upgrade old serialized graphs.
@@ -431,11 +434,10 @@ Status PreprocessGraphDef(const GraphImportConfig* specs, GraphDef* graph_def) {
           UpdateLegacyFedInputNode(*graph_def, specs->inputs, &node_def));
     }
 
-    auto status =
-        tensorflow::OpRegistry::Global()->LookUp(node_def.op(), &op_reg_data);
-    if (!status.ok()) {
+    const tensorflow::OpRegistrationData* op_reg_data =
+        tensorflow::OpRegistry::Global()->LookUp(node_def.op());
+    if (!op_reg_data) {
       // This is likely a function call node, so we should continue.
-      VLOG(1) << status.ToString();
       continue;
     }
     ::tensorflow::AddDefaultsToNodeDef(op_reg_data->op_def, &node_def);
@@ -1176,7 +1178,7 @@ Status ImporterBase::ConvertFunctionArgAndRets(
     const absl::InlinedVector<OutputTensor, 4>& ret_nodes,
     const absl::InlinedVector<Node*, 4>& control_ret_nodes) {
   auto* bb = &func.front();
-  llvm::SmallDenseMap<std::pair<Node*, int>, mlir::Value*, 4>
+  llvm::SmallDenseMap<std::pair<Node*, int>, mlir::Value, 4>
       arg_nodes_to_values;
   for (int i = 0, e = arg_types.size(); i < e; ++i) {
     auto& arg_node = arg_nodes[i];
@@ -1184,8 +1186,8 @@ Status ImporterBase::ConvertFunctionArgAndRets(
     // be converted to mlir operations and don't have a mapping.
     mlir::Operation* island = node_values_.find(arg_node.node->id())->second;
 
-    auto* bb_arg = bb->getArgument(i);
-    mlir::Value* arg_def = bb_arg;
+    auto bb_arg = bb->getArgument(i);
+    mlir::Value arg_def = bb_arg;
 
     if (island->getNumResults() != 2)
       return errors::InvalidArgument(
@@ -1193,9 +1195,9 @@ Status ImporterBase::ConvertFunctionArgAndRets(
 
     // Collect mapping of OutputTensor to associated block arg.
     arg_nodes_to_values.try_emplace({arg_node.node, arg_node.index}, arg_def);
-    island->getResult(0)->replaceAllUsesWith(arg_def);
+    island->getResult(0).replaceAllUsesWith(arg_def);
     // Erase control outputs from feed.
-    auto control_uses = island->getResult(1)->getUses();
+    auto control_uses = island->getResult(1).getUses();
     for (auto& control_use : llvm::make_early_inc_range(control_uses))
       control_use.getOwner()->eraseOperand(control_use.getOperandNumber());
 
@@ -1208,7 +1210,7 @@ Status ImporterBase::ConvertFunctionArgAndRets(
     island->erase();
   }
 
-  llvm::SmallVector<mlir::Value*, 8> inst_to_return;
+  llvm::SmallVector<mlir::Value, 8> inst_to_return;
   for (const auto& ret : ret_nodes) {
     auto* inst = node_values_[ret.node->id()];
     auto op = absl::string_view(ret.node->type_string());
@@ -1320,15 +1322,21 @@ mlir::Location ImporterBase::GetLocation(const NodeDef& node_def) {
     return create_location(node_def.name(), function_name_for_debug_info_);
   } else {
     // If the original nodes are defined, then we use them to get a list of
-    // call sites, and then fuse them to a single fused location.
-    llvm::SmallVector<mlir::Location, 4> node_call_sites;
-    node_call_sites.reserve(original_nodes.size());
+    // call sites, and then fuse them to a single fused location, with the name
+    // of the node_def.
+    llvm::SmallVector<mlir::Location, 4> node_locations;
+    node_locations.reserve(original_nodes.size() + 1);
+
+    // store the names in the experimental_debug_info
     for (int i = 0, e = original_nodes.size(); i != e; ++i) {
       auto node_name = original_nodes[i];
       auto func_name = (i < original_funcs.size()) ? original_funcs[i] : "";
-      node_call_sites.push_back(create_location(node_name, func_name));
+      node_locations.push_back(create_location(node_name, func_name));
     }
-    return mlir::FusedLoc::get(node_call_sites, context_);
+    // store the name of the node_def
+    node_locations.push_back(
+        create_location(node_def.name(), function_name_for_debug_info_));
+    return mlir::FusedLoc::get(node_locations, context_);
   }
 }
 
@@ -1349,14 +1357,14 @@ std::string ImporterBase::GetLocationStr(const Node& node,
 mlir::Operation* ImporterBase::createOperation(
     const Node& node, llvm::StringRef node_type_name,
     const mlir::OperationState& result,
-    const llvm::SmallVectorImpl<mlir::Value*>& control_operands,
+    const llvm::SmallVectorImpl<mlir::Value>& control_operands,
     bool convert_to_legacy_call) {
   // For the tf.executor specific operations (not wrapped in an island), we
   // have an extra returned value for the control result, and we concatenate
   // control and non-control operands.
   mlir::SmallVector<mlir::Type, 4> types(result.types);
   types.push_back(mlir::tf_executor::ControlType::get(builder_.getContext()));
-  mlir::SmallVector<mlir::Value*, 4> operands(result.operands);
+  mlir::SmallVector<mlir::Value, 4> operands(result.operands);
   operands.append(control_operands.begin(), control_operands.end());
 
   auto loc = result.location;
@@ -1384,7 +1392,7 @@ mlir::Operation* ImporterBase::createOperation(
                                      builder_.getBlock()->begin());
     auto source_op =
         builder_at_begin.create<mlir::tf_executor::NextIterationSourceOp>(
-            loc, operands[0]->getType(), result.attributes);
+            loc, operands[0].getType(), result.attributes);
     return builder_.create<mlir::tf_executor::NextIterationSinkOp>(
         loc, source_op.token(), operands, result.attributes);
   }
@@ -1434,6 +1442,32 @@ mlir::Operation* ImporterBase::createOperation(
     inner_op = island_builder.createOperation(result);
   }
 
+  if (inner_op->hasTrait<mlir::OpTrait::AttrSizedResultSegments>()) {
+    // The op has multiple variadic outputs.
+    // Calculate result segment sizes using the OpDef.
+    NameRangeMap output_ranges;
+    // This will fail only if the OpDef is syntactically invalid.
+    // TODO(jpienaar): Convert this CHECK into a properly propagated error.
+    TF_CHECK_OK(
+        NameRangesForNode(node, node.op_def(), nullptr, &output_ranges));
+    std::vector<mlir::Attribute> values;
+    values.reserve(node.op_def().output_arg_size());
+    for (const auto& output_arg : node.op_def().output_arg()) {
+      auto range = output_ranges[output_arg.name()];
+      values.push_back(
+          island_builder.getI32IntegerAttr(range.second - range.first));
+    }
+
+    // Add derived "result_segment_sizes" attr to the created operation.
+    // TODO(b/146937733): Don't use <void> here.
+    llvm::StringRef attr_name = mlir::OpTrait::AttrSizedResultSegments<
+        void>::getResultSegmentSizeAttr();
+    auto attr_type = mlir::VectorType::get(node.op_def().output_arg_size(),
+                                           builder_.getIntegerType(32));
+    auto attr_value = mlir::DenseElementsAttr::get(attr_type, values);
+    inner_op->setAttr(attr_name, attr_value);
+  }
+
   // Add the terminator for the island
   island_builder.create<mlir::tf_executor::YieldOp>(result.location,
                                                     inner_op->getResults());
@@ -1499,7 +1533,7 @@ Status ImporterBase::ConvertNode(const Node& node) {
   result.operands.reserve(in_edges.size());
 
   // Collect the control operands separately, they will be held by the island.
-  mlir::SmallVector<mlir::Value*, 8> control_operands;
+  mlir::SmallVector<mlir::Value, 8> control_operands;
 
   for (const auto* input_edge : in_edges) {
     const Node& input_node = *input_edge->src();
@@ -1568,8 +1602,6 @@ Status ImporterBase::ConvertNode(const Node& node) {
                                                     &result.attributes));
   }
 
-  result.attributes.push_back(builder_.getNamedAttr(
-      "name", builder_.getStringAttr(std::string(node.name()))));
   result.attributes.push_back(builder_.getNamedAttr(
       "device", builder_.getStringAttr(std::string(node_def.device()))));
 
@@ -1625,7 +1657,7 @@ Status ImporterBase::AddBackedges() {
 Status ImporterBase::AddBackedge(mlir::Operation* sink, mlir::Operation* dst,
                                  int dst_input) {
   // Get the NextIteration.Source operation from the token operand of the sink.
-  mlir::Operation* source = sink->getOperand(0)->getDefiningOp();
+  mlir::Operation* source = sink->getOperand(0).getDefiningOp();
 
   // Adds the "source" to the operands of the dst by creating a new dst
   // operation.
@@ -1650,8 +1682,8 @@ Status ImporterBase::AddBackedge(mlir::Operation* sink, mlir::Operation* dst,
   // Replaces the output uses of the old operation by the corresponding
   // result of the new operation, and deletes the old operation.
   for (unsigned i = 0, e = dst->getNumResults(); i != e; ++i) {
-    auto* new_output = new_dst->getResult(i);
-    dst->getResult(i)->replaceAllUsesWith(new_output);
+    auto new_output = new_dst->getResult(i);
+    dst->getResult(i).replaceAllUsesWith(new_output);
   }
   dst->dropAllReferences();
   dst->erase();
@@ -1705,8 +1737,8 @@ class GraphDefImporter : public ImporterBase {
   static StatusOr<mlir::OwningModuleRef> Convert(
       mlir::MLIRContext* context, const Graph& graph,
       const GraphDebugInfo& debug_info,
-      const FunctionLibraryDefinition& flib_def,
-      const GraphImportConfig& specs);
+      const FunctionLibraryDefinition& flib_def, const GraphImportConfig& specs,
+      llvm::StringRef func_name);
 
  private:
   explicit GraphDefImporter(
@@ -1744,7 +1776,7 @@ class GraphDefImporter : public ImporterBase {
 StatusOr<mlir::OwningModuleRef> GraphDefImporter::Convert(
     mlir::MLIRContext* context, const Graph& graph,
     const GraphDebugInfo& debug_info, const FunctionLibraryDefinition& flib_def,
-    const GraphImportConfig& specs) {
+    const GraphImportConfig& specs, llvm::StringRef func_name) {
   mlir::OwningModuleRef module =
       mlir::ModuleOp::create(mlir::UnknownLoc::get(context));
   std::unordered_map<std::string, std::string> tf_name_to_mlir_name;
@@ -1832,7 +1864,7 @@ StatusOr<mlir::OwningModuleRef> GraphDefImporter::Convert(
                       {producer, min_consumer, bad_consumers})));
 
   TF_RETURN_IF_ERROR(importer.ImporterBase::Convert(
-      "main", func_type, arg_nodes, ret_nodes, control_ret_nodes, attrs,
+      func_name, func_type, arg_nodes, ret_nodes, control_ret_nodes, attrs,
       resource_arg_unique_ids));
   return module;
 }
@@ -2535,7 +2567,7 @@ Status CreateSavedModelIR(
         module.insert(module.getBody()->begin(), func);
         func.addEntryBlock();
         func.setName("__sm_exported_" + orig_func.getName().str());
-        llvm::SmallVector<mlir::Value*, 4> args_as_values;
+        llvm::SmallVector<mlir::Value, 4> args_as_values;
         for (auto block_argument : func.getArguments()) {
           args_as_values.push_back(block_argument);
         }
@@ -2742,6 +2774,292 @@ StatusOr<mlir::OwningModuleRef> SavedModelImporter::Convert(
   return module;
 }
 
+// A helper class to import a TensorFlow model expressed in SavedModel V1 into
+// an MLIR Module.
+class SavedModelV1Importer {
+ public:
+  // Main entry point: converts all functions (specified by SignatureDefs) in
+  // the given meta graph to an MLIR Module.
+  static StatusOr<mlir::OwningModuleRef> Convert(const SavedModelBundle& bundle,
+                                                 mlir::MLIRContext* context) {
+    SavedModelV1Importer importer(bundle, context);
+
+    return importer.ConvertSignatures();
+  }
+
+ private:
+  SavedModelV1Importer(const SavedModelBundle& bundle,
+                       mlir::MLIRContext* context)
+      : bundle_(bundle),
+        module_(mlir::ModuleOp::create(mlir::UnknownLoc::get(context))) {}
+
+  // Convert the SavedModel to TF Executor Dialect. It creates a MLIR function
+  // for each signature.
+  StatusOr<mlir::OwningModuleRef> ConvertSignatures();
+  StatusOr<mlir::OwningModuleRef> ConvertSignature(
+      const GraphImportConfig& specs, llvm::StringRef func_name,
+      const SignatureDef& signature_def, const GraphDef& sub_graph_def,
+      const GraphDebugInfo& debug_info,
+      const FunctionLibraryDefinition& flib_def);
+
+  // Create GlobalTensorOp for each variable and move each VarHandle op to
+  // the enclosing function's arugments.
+  Status LiftVariables();
+  void LiftVariable(mlir::TF::VarHandleOp op);
+
+  // Read all variables from the SavedModel through session, and create
+  // GlobalTensorOp for these variables.
+  Status ReadVariablesFromSession(
+      const llvm::SmallVectorImpl<mlir::TF::VarHandleOp>& ops);
+
+  GraphImportConfig::InputArrays ParseInputArrays(
+      const tensorflow::protobuf::Map<std::string, TensorInfo>& inputs);
+
+  std::vector<std::string> ParseOutputArrays(
+      const tensorflow::protobuf::Map<std::string, TensorInfo>& outputs);
+
+  const SavedModelBundle& bundle_;
+  mlir::OwningModuleRef module_;
+};
+
+// Convert the SavedModel to TF Executor Dialect. It creates a MLIR function
+// for each signature.
+StatusOr<mlir::OwningModuleRef> SavedModelV1Importer::ConvertSignatures() {
+  const auto& signatures = bundle_.GetSignatures();
+  const auto& graphdef = bundle_.meta_graph_def.graph_def();
+
+  FunctionLibraryDefinition flib_def(OpRegistry::Global(), graphdef.library());
+
+  // debug_info might not be loaded with loader_lite.
+  GraphDebugInfo debug_info;
+  if (bundle_.debug_info != nullptr) debug_info = *bundle_.debug_info;
+
+  for (const auto& key_and_signature_def : signatures) {
+    const auto& func_name = key_and_signature_def.first;
+    const auto& signature_def = key_and_signature_def.second;
+    GraphImportConfig specs;
+    specs.inputs = ParseInputArrays(signature_def.inputs());
+    specs.outputs = ParseOutputArrays(signature_def.outputs());
+
+    // Remove unused nodes and create a sub graphdef.
+    GraphDef sub_graph_def;
+    TF_RETURN_IF_ERROR(tensorflow::grappler::SetTransitiveFaninGraph(
+        graphdef, &sub_graph_def,
+        /* terminal_nodes = */ {specs.outputs.begin(), specs.outputs.end()}));
+
+    auto status_or_sub_module = ConvertSignature(
+        specs, func_name, signature_def, sub_graph_def, debug_info, flib_def);
+    if (!status_or_sub_module.ok()) {
+      LOG(ERROR) << "Failed to convert SignatureDef for " << func_name << ": "
+                 << status_or_sub_module.status();
+      continue;
+    }
+
+    auto& sub_module = status_or_sub_module.ValueOrDie();
+
+    // Move the converted functions to top level MLIR module.
+    auto* block = module_->getBody();
+    auto* sub_block = sub_module->getBody();
+    block->getOperations().splice(
+        mlir::Block::iterator(block->getTerminator()),
+        sub_block->getOperations(), sub_block->begin(),
+        mlir::Block::iterator(sub_block->getTerminator()));
+  }
+
+  TF_RETURN_IF_ERROR(LiftVariables());
+
+  return std::move(module_);
+}
+
+StatusOr<mlir::OwningModuleRef> SavedModelV1Importer::ConvertSignature(
+    const GraphImportConfig& specs, llvm::StringRef func_name,
+    const SignatureDef& signature_def, const GraphDef& sub_graph_def,
+    const GraphDebugInfo& debug_info,
+    const FunctionLibraryDefinition& flib_def) {
+  // Convert this sub graphdef to sub graph
+  GraphConstructorOptions options;
+  options.allow_internal_ops = true;
+  options.add_default_attributes = true;
+  Graph sub_graph(OpRegistry::Global());
+
+  TF_RETURN_IF_ERROR(
+      ConvertGraphDefToGraph(options, sub_graph_def, &sub_graph));
+
+  // Convert the sub graphdef to a MLIR function.
+  return GraphDefImporter::Convert(module_->getContext(), sub_graph, debug_info,
+                                   flib_def, specs, func_name);
+}
+
+// Create GlobalTensorOp for each variable and move each VarHandle op to
+// the enclosing function's arugments.
+Status SavedModelV1Importer::LiftVariables() {
+  llvm::SmallVector<mlir::TF::VarHandleOp, 4> ops;
+
+  bool contains_ref_variable = false;
+
+  module_->walk([&ops, &contains_ref_variable](mlir::Operation* op) {
+    if (auto var_handle_op = llvm::dyn_cast<mlir::TF::VarHandleOp>(op))
+      ops.push_back(var_handle_op);
+    else if (op->getName().getStringRef() == "tf.VariableV2")
+      contains_ref_variable = true;
+  });
+
+  if (contains_ref_variable)
+    return errors::InvalidArgument(
+        "Ref variable created by VariableV2 is not supported.");
+
+  if (ops.empty()) return Status::OK();
+
+  TF_RETURN_IF_ERROR(ReadVariablesFromSession(ops));
+
+  for (auto op : ops) LiftVariable(op);
+
+  return Status::OK();
+}
+
+// Move the result of the VarHandleOp to the enclosing function's arugment list
+// and erase this VarHandleOp.
+void SavedModelV1Importer::LiftVariable(mlir::TF::VarHandleOp op) {
+  mlir::OpBuilder builder(&module_->getBodyRegion());
+
+  auto func_op = op.getParentOfType<mlir::FuncOp>();
+  builder.setInsertionPoint(func_op);
+
+  auto func_type = func_op.getType();
+
+  // Create the new function type by adding variable type to the arguments.
+  llvm::SmallVector<mlir::Type, 4> new_input_types(
+      func_type.getInputs().begin(), func_type.getInputs().end());
+  new_input_types.push_back(op.resource()->getType());
+  auto new_func_type =
+      builder.getFunctionType(new_input_types, func_type.getResults());
+
+  auto new_func_op = builder.create<mlir::FuncOp>(
+      func_op.getLoc(), func_op.getName(), new_func_type,
+      llvm::ArrayRef<mlir::NamedAttribute>());
+
+  // Bind the argument to the corresponding global tensor op.
+  new_func_op.setArgAttr(new_func_op.getNumArguments() - 1,
+                         "tf_saved_model.bound_input",
+                         builder.getSymbolRefAttr(op.shared_name()));
+
+  // Replace the function body and update its signature.
+  auto& new_region = new_func_op.getBody();
+  new_region.getBlocks().splice(new_region.end(),
+                                func_op.getBody().getBlocks());
+
+  func_op.getOperation()->erase();
+
+  auto& new_block = new_region.front();
+  auto new_value = new_block.addArgument(op.resource()->getType());
+
+  op.getOperation()->replaceAllUsesWith(llvm::ArrayRef<mlir::Value>(new_value));
+
+  op.getOperation()->erase();
+}
+
+// Read all variables from the SavedModel through session, and create
+// GlobalTensorOp for these variables.
+Status SavedModelV1Importer::ReadVariablesFromSession(
+    const llvm::SmallVectorImpl<mlir::TF::VarHandleOp>& ops) {
+  mlir::OpBuilder builder(&module_->getBodyRegion());
+
+  // Find all variables and their corresponding read ops.
+
+  llvm::MapVector<llvm::StringRef, mlir::TF::VarHandleOp>
+      variable_names_and_ops;
+  for (auto op : ops) {
+    variable_names_and_ops[op.shared_name()] = op;
+  }
+
+  // Read all resource variables from the session.
+
+  std::vector<std::string> variable_names;
+  variable_names.reserve(variable_names_and_ops.size());
+  for (const auto& name_and_location : variable_names_and_ops)
+    variable_names.push_back(name_and_location.first);
+
+  std::vector<Tensor> resource_tensors;
+  TF_RETURN_IF_ERROR(bundle_.GetSession()->Run(
+      /*inputs=*/{}, variable_names,
+      /*target_node_names=*/{}, &resource_tensors));
+
+  const DeviceMgr* device_manager;
+  TF_RETURN_IF_ERROR(bundle_.GetSession()->LocalDeviceManager(&device_manager));
+
+  // Read all underlying tensors of the variables from the session.
+  std::vector<Tensor> tensors;
+  tensors.reserve(resource_tensors.size());
+  for (const auto& resource_tensor : resource_tensors) {
+    const auto& resource_handle = resource_tensor.scalar<ResourceHandle>()();
+
+    Device* device;
+    TF_RETURN_IF_ERROR(
+        device_manager->LookupDevice(resource_handle.device(), &device));
+
+    Var* var_ptr;
+    TF_RETURN_IF_ERROR(device->resource_manager()->Lookup(
+        resource_handle.container(), resource_handle.name(), &var_ptr));
+    core::RefCountPtr<Var> var(var_ptr);
+
+    // The variable tensor is already loaded into corresponding device's
+    // resource manager when we load the saved model using LoadSavedModel().
+    // Here we just read its value.
+    mutex_lock ml(*var->mu());
+    tensors.push_back(*var->tensor());
+  }
+
+  for (const auto& iter : llvm::zip(variable_names_and_ops, tensors)) {
+    const auto& name = std::get<0>(iter).first;
+    auto location = std::get<0>(iter).second.getLoc();
+    const auto& tensor = std::get<1>(iter);
+
+    // Create tensor attribute for this variable.
+    TF_ASSIGN_OR_RETURN(auto tensor_attr, ConvertTensor(tensor, &builder));
+
+    builder.create<mlir::tf_saved_model::GlobalTensorOp>(
+        location, builder.getStringAttr(name), tensor_attr,
+        mlir::TypeAttr::get(tensor_attr.getType()), builder.getUnitAttr());
+  }
+
+  return Status::OK();
+}
+
+GraphImportConfig::InputArrays SavedModelV1Importer::ParseInputArrays(
+    const tensorflow::protobuf::Map<std::string, TensorInfo>& inputs) {
+  GraphImportConfig::InputArrays results;
+  for (const auto& iter : inputs) {
+    const auto& tensor_info = iter.second;
+
+    // Only dense tensor is supported.
+    DCHECK_EQ(tensor_info.encoding_case(), tensorflow::TensorInfo::kName);
+
+    ArrayInfo array_info;
+    array_info.imported_dtype = tensor_info.dtype();
+    array_info.shape = tensor_info.tensor_shape();
+
+    std::vector<std::string> node_names =
+        absl::StrSplit(tensor_info.name(), ':');
+
+    results.insert(std::pair<std::string, ArrayInfo>(node_names.at(0),
+                                                     std::move(array_info)));
+  }
+  return results;
+}
+
+std::vector<std::string> SavedModelV1Importer::ParseOutputArrays(
+    const tensorflow::protobuf::Map<std::string, TensorInfo>& outputs) {
+  std::vector<std::string> results;
+  for (const auto& iter : outputs) {
+    const auto& tensor_info = iter.second;
+
+    std::vector<std::string> node_names =
+        absl::StrSplit(tensor_info.name(), ':');
+    results.push_back(node_names.at(0));
+  }
+  return results;
+}
+
 }  // namespace
 
 Status UpgradeLegacyGraph(Graph* graph, FunctionLibraryDefinition* flib_def) {
@@ -2777,7 +3095,8 @@ StatusOr<mlir::OwningModuleRef> ConvertGraphToMlir(
         UpgradeLegacyGraph(const_cast<Graph*>(&graph),
                            const_cast<FunctionLibraryDefinition*>(&flib_def)));
   }
-  return GraphDefImporter::Convert(context, graph, debug_info, flib_def, specs);
+  return GraphDefImporter::Convert(context, graph, debug_info, flib_def, specs,
+                                   /* func_name = */ "main");
 }
 
 StatusOr<mlir::OwningModuleRef> ConvertSavedModelToMlir(
@@ -2787,6 +3106,11 @@ StatusOr<mlir::OwningModuleRef> ConvertSavedModelToMlir(
                                      add_default_attributes);
 }
 
+StatusOr<mlir::OwningModuleRef> ConvertSavedModelV1ToMlir(
+    const SavedModelBundle& saved_model, mlir::MLIRContext* context) {
+  return SavedModelV1Importer::Convert(saved_model, context);
+}
+
 std::string MlirModuleToString(mlir::ModuleOp module, bool show_debug_info) {
   std::string txt_module;
   {
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.h b/tensorflow/compiler/mlir/tensorflow/translate/import_model.h
index d4b17073bd5..efc316483fe 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.h
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.h
@@ -18,9 +18,10 @@ limitations under the License.
 
 #include <string>
 
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/cc/saved_model/bundle_v2.h"
+#include "tensorflow/cc/saved_model/loader.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -50,6 +51,12 @@ stream_executor::port::StatusOr<mlir::OwningModuleRef> ConvertSavedModelToMlir(
     SavedModelV2Bundle* saved_model, mlir::MLIRContext* context,
     absl::Span<std::string> exported_names, bool add_default_attributes = true);
 
+// Given a V1 SavedModel, returns a MLIR module containing the functions,
+// expressed with tf_executor dialect.
+stream_executor::port::StatusOr<mlir::OwningModuleRef>
+ConvertSavedModelV1ToMlir(const SavedModelBundle& saved_model,
+                          mlir::MLIRContext* context);
+
 // Serialize a MLIR module to a string.
 std::string MlirModuleToString(mlir::ModuleOp m, bool show_debug_info = false);
 
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc
index ca13db56df3..004293410b3 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc
@@ -15,9 +15,9 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h"
 
-#include "mlir/Analysis/Verifier.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/Analysis/Verifier.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h
index 1daa29045c5..79a302b066b 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h
+++ b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_MLIR_ROUNDTRIP_PASS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_MLIR_ROUNDTRIP_PASS_H_
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 #include "tensorflow/core/lib/core/status.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc
index 86fbff91db1..a97bca9fc3d 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc
@@ -14,11 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 #include "llvm/Support/Debug.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 
 #define DEBUG_TYPE "tf-functional-to-executor"
@@ -75,7 +75,7 @@ void FunctionalToExecutorDialectConversion::runOnFunction() {
   builder.setInsertionPointToEnd(&graph_op.GetBody());
   auto island = builder.create<tf_executor::IslandOp>(
       loc, getFunction().getType().getResults(),
-      tf_executor::ControlType::get(&getContext()), ArrayRef<Value*>());
+      tf_executor::ControlType::get(&getContext()), ArrayRef<Value>());
   // Create Fetch.
   ValueRange to_fetch = island.getResults();
   if (to_fetch.size() != 1) {
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
index 5c59eace5cc..8f3cab0e619 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
@@ -17,14 +17,15 @@ limitations under the License.
 
 #include "absl/memory/memory.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Parser.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Parser.h"  // TF:llvm-project
+#include "tensorflow/cc/saved_model/bundle_v2.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
@@ -129,6 +130,27 @@ mlir::OwningModuleRef SavedModelToMlirImport(
   return module_or.ConsumeValueOrDie();
 }
 
+mlir::OwningModuleRef SavedModelV1ToMlirImport(
+    absl::string_view saved_model_dir,
+    const std::unordered_set<std::string>& tags, mlir::MLIRContext* context) {
+  tensorflow::SavedModelBundle bundle;
+  auto load_status = tensorflow::LoadSavedModel(
+      /* session_options = */ {}, /* run_options = */ {},
+      std::string(saved_model_dir), tags, &bundle);
+  if (!load_status.ok()) {
+    LOG(ERROR) << "Failed to load saved model v1 '" << saved_model_dir
+               << "': " << load_status;
+    return nullptr;
+  }
+
+  auto module_or = ConvertSavedModelV1ToMlir(bundle, context);
+  if (!module_or.status().ok()) {
+    LOG(ERROR) << "SavedModel V1 import failed: " << module_or.status();
+    return nullptr;
+  }
+  return module_or.ConsumeValueOrDie();
+}
+
 mlir::OwningModuleRef GraphdefToSplattedMlirTranslateFunction(
     llvm::StringRef input, absl::string_view debug_info_file,
     absl::string_view input_arrays, absl::string_view input_dtypes,
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h
index ce5337949c1..46e6376207c 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h
@@ -21,8 +21,8 @@ limitations under the License.
 
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 
 namespace tensorflow {
 // TODO(antiagainst): Directly manipulating files in library functions is not
@@ -54,6 +54,14 @@ mlir::OwningModuleRef SavedModelToMlirImport(
     absl::string_view saved_model_dir,
     const std::unordered_set<std::string>& tags,
     absl::Span<std::string> exported_names, mlir::MLIRContext* context);
+
+// Converts a TensorFlow V1 SavedModel stored in the directory with the given
+// `saved_model_dir` into a MLIR module. Creates MLIR entities into the
+// given MLIR `context`.
+mlir::OwningModuleRef SavedModelV1ToMlirImport(
+    absl::string_view saved_model_dir,
+    const std::unordered_set<std::string>& tags, mlir::MLIRContext* context);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_TF_MLIR_TRANSLATE_H_
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc
index 08b09924fd1..db46fdcf931 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_registration.cc
@@ -21,8 +21,8 @@ limitations under the License.
 
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Translation.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Translation.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc b/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc
index 38d6a572584..a9b5021559c 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc
@@ -14,11 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 #include "llvm/Support/ToolOutputFile.h"
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Translation.h"  // TF:local_config_mlir
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Translation.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc
index a37e092aa56..7d449b8775f 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc
@@ -17,26 +17,39 @@ limitations under the License.
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h"
 
 namespace tensorflow {
 
+BridgeLoggerConfig::BridgeLoggerConfig(bool print_module_scope,
+                                       bool print_after_only_on_change)
+    : mlir::PassManager::IRPrinterConfig(print_module_scope,
+                                         print_after_only_on_change) {}
+
 // Logs op to file with name of format `mlir_bridge-pass_name-file_suffix.mlir`.
-inline static void Log(mlir::Pass* pass, mlir::Operation* op,
+inline static void Log(BridgeLoggerConfig::PrintCallbackFn print_callback,
+                       mlir::Pass* pass, mlir::Operation* op,
                        llvm::StringRef file_suffix) {
-  DumpMlirOpToFile(
-      llvm::formatv("mlir_bridge-{0}-{1}", pass->getName(), file_suffix).str(),
-      op);
+  std::string name =
+      llvm::formatv("mlir_bridge_{0}_{1}", pass->getName(), file_suffix).str();
+
+  std::unique_ptr<llvm::raw_ostream> os;
+  std::string filepath;
+  if (CreateFileForDumping(name, &os, &filepath).ok()) print_callback(*os);
 }
 
-void BridgeLogger::runBeforePass(mlir::Pass* pass, mlir::Operation* op) {
-  Log(pass, op, "before");
+void BridgeLoggerConfig::printBeforeIfEnabled(mlir::Pass* pass,
+                                              mlir::Operation* operation,
+                                              PrintCallbackFn print_callback) {
+  Log(print_callback, pass, operation, "before");
 }
 
-void BridgeLogger::runAfterPass(mlir::Pass* pass, mlir::Operation* op) {
-  Log(pass, op, "after");
+void BridgeLoggerConfig::printAfterIfEnabled(mlir::Pass* pass,
+                                             mlir::Operation* operation,
+                                             PrintCallbackFn print_callback) {
+  Log(print_callback, pass, operation, "after");
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h
index 2943a37886a..4f6d49b77e9 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h
@@ -16,18 +16,32 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_BRIDGE_LOGGER_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_BRIDGE_LOGGER_H_
 
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassInstrumentation.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
 
 namespace tensorflow {
 
 // Logger for logging/dumping MLIR modules before and after passes in bridge
 // targeting TPUs.
-class BridgeLogger : public mlir::PassInstrumentation {
+class BridgeLoggerConfig : public mlir::PassManager::IRPrinterConfig {
  public:
-  void runBeforePass(mlir::Pass* pass, mlir::Operation* op) override;
-  void runAfterPass(mlir::Pass* pass, mlir::Operation* op) override;
+  explicit BridgeLoggerConfig(bool print_module_scope = false,
+                              bool print_after_only_on_change = true);
+
+  // A hook that may be overridden by a derived config that checks if the IR
+  // of 'operation' should be dumped *before* the pass 'pass' has been
+  // executed. If the IR should be dumped, 'print_callback' should be invoked
+  // with the stream to dump into.
+  void printBeforeIfEnabled(mlir::Pass *pass, mlir::Operation *operation,
+                            PrintCallbackFn print_callback) override;
+
+  // A hook that may be overridden by a derived config that checks if the IR
+  // of 'operation' should be dumped *after* the pass 'pass' has been
+  // executed. If the IR should be dumped, 'print_callback' should be invoked
+  // with the stream to dump into.
+  void printAfterIfEnabled(mlir::Pass *pass, mlir::Operation *operation,
+                           PrintCallbackFn print_callback) override;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
index 4e914a5a20d..02ffae658cc 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
@@ -17,15 +17,15 @@ limitations under the License.
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Parser.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Parser.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
@@ -216,8 +216,7 @@ Status ConvertMLIRToXlaComputation(mlir::ModuleOp module_op,
   // and canonicalization opportunities that are necessary for the second
   // LegalizeTFPass(allow_partial_conversion=false) invocation.
   tf2xla.addNestedPass<mlir::FuncOp>(mlir::xla_hlo::createLegalizeTFPass(true));
-  tf2xla.addPass(mlir::tf_executor::CreateTFExecutorGraphPruningPass(
-      /*skip_main_func=*/true));
+  tf2xla.addPass(mlir::tf_executor::CreateTFExecutorGraphPruningPass());
   tf2xla.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
   tf2xla.addNestedPass<mlir::FuncOp>(
       mlir::xla_hlo::createLegalizeTFPass(false));
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
index a07927ce432..4a462898276 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc
index 1c1f9803bd7..fafd6cc11cb 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc
@@ -23,10 +23,10 @@ limitations under the License.
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h
index 7e982bb489b..b2646c265ad 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc
index 69cda63e889..bcd37e39de9 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc
@@ -15,9 +15,9 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc
index e2d970c8dfd..7b0cbe6d5b5 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_type.cc
@@ -17,9 +17,9 @@ limitations under the License.
 
 #include "absl/strings/str_cat.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Support/DebugStringHelper.h"  // TF:local_config_mlir
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Support/DebugStringHelper.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_type.h b/tensorflow/compiler/mlir/tensorflow/utils/convert_type.h
index fa5c92c12fe..24c4273ad0e 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_type.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_type.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_CONVERT_TYPE_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_CONVERT_TYPE_H_
 
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.pb.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_type_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_type_test.cc
index 423d61dc2c6..e7206096d2c 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_type_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_type_test.cc
@@ -16,9 +16,9 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc
index 8309ab39feb..e983f3e9c0c 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc
@@ -21,10 +21,10 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/util/device_name_utils.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/device_util.h b/tensorflow/compiler/mlir/tensorflow/utils/device_util.h
index fa8a09801fa..73ae18d2487 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/device_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/device_util.h
@@ -17,8 +17,8 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_DEVICE_UTIL_H_
 
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/util/device_name_utils.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc
index a8d628b153a..cb25e000f7a 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc
@@ -21,12 +21,12 @@ limitations under the License.
 #include <utility>
 
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc
index 4b1d059bfa4..423e5012768 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc
@@ -24,7 +24,7 @@ limitations under the License.
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -60,10 +60,43 @@ std::string MakeUniqueFilename(string name) {
   filename = llvm::Twine(filename).concat(".mlir").str();
   return filename;
 }
+
+// Simple raw_ostream that prints to LOG(INFO).
+struct LogInfoRawStream : public llvm::raw_ostream {
+  LogInfoRawStream() { SetUnbuffered(); }
+  ~LogInfoRawStream() override = default;
+  uint64_t current_pos() const override { return 0; }
+
+  void write_impl(const char* ptr, size_t size) override {
+    LOG(INFO) << absl::string_view(ptr, size);
+  }
+};
+
+// Simple raw_ostream that prints to a file.
+struct WritableFileRawStream : public llvm::raw_ostream {
+  explicit WritableFileRawStream(std::unique_ptr<WritableFile> file)
+      : file(std::move(file)) {
+    SetUnbuffered();
+  }
+  ~WritableFileRawStream() override = default;
+  uint64_t current_pos() const override { return 0; }
+
+  void write_impl(const char* ptr, size_t size) override {
+    // Write the file if it is still valid. If the write fails, null out the
+    // file to avoid encountering another error.
+    if (file && !file->Append(StringPiece(ptr, size)).ok()) {
+      file = nullptr;
+    }
+  }
+
+  // The file being written to.
+  std::unique_ptr<WritableFile> file;
+};
 }  // namespace
 
-std::string DumpMlirOpToFile(llvm::StringRef name, mlir::Operation* op,
-                             llvm::StringRef dirname) {
+Status CreateFileForDumping(llvm::StringRef name,
+                            std::unique_ptr<llvm::raw_ostream>* os,
+                            std::string* filepath, llvm::StringRef dirname) {
   const char* dir = nullptr;
   if (!dirname.empty())
     dir = dirname.data();
@@ -72,44 +105,49 @@ std::string DumpMlirOpToFile(llvm::StringRef name, mlir::Operation* op,
 
   if (!dir) {
     LOG(WARNING)
-        << "Failed to dump MLIR operation '"
-        << op->getName().getStringRef().str() << "' to '" << name.str()
-        << "' because dump location is not specified through either "
+        << "Failed to generate file because dump location is not specified "
+           "through either "
            "TF_DUMP_GRAPH_PREFIX environment variable or function argument.";
-    return "(TF_DUMP_GRAPH_PREFIX not specified)";
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "(TF_DUMP_GRAPH_PREFIX not specified)");
   }
 
-  std::string txt_op;
-  {
-    llvm::raw_string_ostream os(txt_op);
-    op->print(os, mlir::OpPrintingFlags().useLocalScope());
-    os.flush();
-  }
-
-  Env* env = Env::Default();
-  std::string filepath;
   if (std::strncmp(dir, "-", 2) == 0) {
-    LOG(INFO) << txt_op;
-    filepath = "LOG(INFO)";
-  } else {
-    Status status = env->RecursivelyCreateDir(dir);
-    if (!status.ok()) {
-      LOG(WARNING) << "Failed to create '" << dir
-                   << "' directory for dumping MLIR operation '"
-                   << op->getName().getStringRef().str() << "': " << status;
-      return "(unavailable)";
-    }
-    filepath =
-        llvm::Twine(dir).concat("/").concat(MakeUniqueFilename(name)).str();
-    status = WriteStringToFile(env, filepath, txt_op);
-    if (!status.ok()) {
-      LOG(WARNING) << "Failed to dump MLIR operation '"
-                   << op->getName().getStringRef().str() << "' to file '"
-                   << filepath << "': " << status;
-      return "(unavailable)";
-    }
+    *os = std::make_unique<LogInfoRawStream>();
+    *filepath = "LOG(INFO)";
+    return Status();
   }
 
+  // Get a valid file path to dump with.
+  Env* env = Env::Default();
+  Status status = env->RecursivelyCreateDir(dir);
+  if (!status.ok()) {
+    LOG(WARNING) << "Failed to create '" << dir
+                 << "' directory for dumping: " << status;
+    return Status(error::Code::UNAVAILABLE, "(unavailable)");
+  }
+  *filepath =
+      llvm::Twine(dir).concat("/").concat(MakeUniqueFilename(name)).str();
+
+  // Try to open the file and generate a raw_ostream.
+  std::unique_ptr<WritableFile> file;
+  status = env->NewWritableFile(*filepath, &file);
+  if (!status.ok()) {
+    LOG(WARNING) << "Failed to create file '" << filepath << "': " << status;
+    return Status(error::Code::UNAVAILABLE, "(unavailable)");
+  }
+  *os = std::make_unique<WritableFileRawStream>(std::move(file));
+  return Status();
+}
+
+std::string DumpMlirOpToFile(llvm::StringRef name, mlir::Operation* op,
+                             llvm::StringRef dirname) {
+  std::unique_ptr<llvm::raw_ostream> os;
+  std::string filepath;
+  Status result = CreateFileForDumping(name, &os, &filepath, dirname);
+  if (!result.ok()) return result.error_message();
+
+  op->print(*os, mlir::OpPrintingFlags().useLocalScope());
   LOG(INFO) << "Dumped MLIR operation '" << op->getName().getStringRef().str()
             << "' to '" << filepath << "'";
   return filepath;
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h b/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h
index 8ae6797a4f8..c2e4683c1c6 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h
@@ -19,10 +19,26 @@ limitations under the License.
 #include <string>
 
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "tensorflow/core/platform/status.h"
 
 namespace tensorflow {
 
+// Creates a file to use for dumping and returns success if a file could be
+// created. The opened file is placed in 'os' and the path of the file used is
+// placed in 'filepath'.
+//
+// If the TF_DUMP_GRAPH_PREFIX environment variable is "-", then the LOG(INFO)
+// macro is used instead.
+//
+// This will create a file name via prefixing `name` with the value of the
+// TF_DUMP_GRAPH_PREFIX environment variable if `dirname` is empty and
+// suffixing `name` with ".mlir".
+Status CreateFileForDumping(llvm::StringRef name,
+                            std::unique_ptr<llvm::raw_ostream>* os,
+                            std::string* filepath,
+                            llvm::StringRef dirname = "");
+
 // Dumps MLIR operation to a file and returns the file name used.
 //
 // If the TF_DUMP_GRAPH_PREFIX environment variable is "-", then the MLIR
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util_test.cc
index 59d8da91e7b..947a0ef0af3 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util_test.cc
@@ -16,9 +16,9 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h"
 
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/error_util.h b/tensorflow/compiler/mlir/tensorflow/utils/error_util.h
index a60d90cbfb7..7eb30ee2c46 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/error_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/error_util.h
@@ -18,9 +18,9 @@ limitations under the License.
 
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
 #include "tensorflow/core/lib/core/status.h"
 
 // Error utilities for MLIR when interacting with code using Status returns.
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc
index 4e59cec86ab..3f4947bec23 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 
 #include "llvm/ADT/Twine.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/eval_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/eval_util.cc
index e70ab3197d5..dae0a6cf515 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/eval_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/eval_util.cc
@@ -20,10 +20,10 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/c/eager/c_api_internal.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/eval_util.h b/tensorflow/compiler/mlir/tensorflow/utils/eval_util.h
index 657ea688b93..39fd91afe40 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/eval_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/eval_util.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:llvm-project
 #include "tensorflow/c/eager/c_api.h"
 
 namespace tensorflow {
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
index e35b7130de8..d2f17586ad3 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
@@ -23,17 +23,17 @@ limitations under the License.
 #include "absl/strings/string_view.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/Support/DebugStringHelper.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/Support/DebugStringHelper.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
@@ -65,8 +65,12 @@ Status ConvertLocation(mlir::Location inst_loc,
       debug_info->add_original_node_names(name_loc.getName().c_str());
     }
   } else if (auto fused = inst_loc.dyn_cast<mlir::FusedLoc>()) {
-    for (auto loc : fused.getLocations()) {
-      TF_RETURN_IF_ERROR(ConvertLocation(loc, debug_info));
+    auto locations = fused.getLocations();
+    if (locations.size() <= 1)
+      return errors::InvalidArgument("expected experimental debuf info.");
+    // skip the first one, which is the name of the node_def.
+    for (int i = 0; i < locations.size() - 1; ++i) {
+      TF_RETURN_IF_ERROR(ConvertLocation(locations[i], debug_info));
     }
   }
   return Status::OK();
@@ -218,12 +222,12 @@ static bool IsRefTypeControlOp(mlir::Operation* op) {
 
   auto op_name = op_name_or_status.ConsumeValueOrDie();
   if (op_name.equals("NextIteration"))
-    return mlir::getElementTypeOrSelf(op->getOperand(0)->getType())
+    return mlir::getElementTypeOrSelf(op->getOperand(0).getType())
         .isa<mlir::TF::TensorFlowRefType>();
 
   if (op_name.equals("Enter") || op_name.equals("Exit") ||
       op_name.equals("Switch") || op_name.equals("Merge")) {
-    return getElementTypeOrSelf(op->getResult(0)->getType())
+    return getElementTypeOrSelf(op->getResult(0).getType())
         .isa<mlir::TF::TensorFlowRefType>();
   }
   return false;
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
index df176762c07..a8c91c0b494 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
@@ -23,10 +23,10 @@ limitations under the License.
 #include "absl/container/flat_hash_set.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/node_def.pb.h"
diff --git a/tensorflow/compiler/mlir/tf_mlir_opt_main.cc b/tensorflow/compiler/mlir/tf_mlir_opt_main.cc
index f70868e217f..736e954278e 100644
--- a/tensorflow/compiler/mlir/tf_mlir_opt_main.cc
+++ b/tensorflow/compiler/mlir/tf_mlir_opt_main.cc
@@ -17,10 +17,10 @@ limitations under the License.
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Support/FileUtilities.h"  // TF:local_config_mlir
-#include "mlir/Support/MlirOptMain.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Support/FileUtilities.h"  // TF:llvm-project
+#include "mlir/Support/MlirOptMain.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/init_mlir.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc
index 9ab31265a33..f5fc56556ec 100644
--- a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc
+++ b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc
@@ -21,11 +21,11 @@ limitations under the License.
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/Support/FileUtilities.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Support/ToolUtilities.h"  // TF:local_config_mlir
-#include "mlir/Support/TranslateClParser.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/Support/FileUtilities.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Support/ToolUtilities.h"  // TF:llvm-project
+#include "mlir/Support/TranslateClParser.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/init_mlir.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_cl.h"
@@ -54,6 +54,12 @@ static llvm::cl::opt<bool> import_saved_model(
     llvm::cl::desc("Import a saved model to its MLIR representation"),
     llvm::cl::value_desc("dir"));
 
+// NOLINTNEXTLINE
+static llvm::cl::opt<bool> import_saved_model_v1(
+    "savedmodel-v1-to-mlir",
+    llvm::cl::desc("Import a saved model V1 to its MLIR representation"),
+    llvm::cl::value_desc("dir"));
+
 // NOLINTNEXTLINE
 static llvm::cl::opt<std::string> saved_model_tags(
     "tf-savedmodel-tags",
@@ -77,10 +83,11 @@ int main(int argc, char** argv) {
 
   llvm::cl::ParseCommandLineOptions(argc, argv, "TF MLIR translation driver\n");
 
-  if (!import_saved_model && !requested_translation) {
+  if (!import_saved_model && !import_saved_model_v1 && !requested_translation) {
     llvm::errs() << "error: need to specify one translation to perform\n";
     return 1;
-  } else if (import_saved_model && requested_translation) {
+  } else if (import_saved_model && import_saved_model_v1 &&
+             requested_translation) {
     llvm::errs()
         << "error: cannot specify more than one translation to perform\n";
     return 1;
@@ -105,6 +112,16 @@ int main(int argc, char** argv) {
         &context);
     if (!module) return 1;
 
+    module->print(output->os());
+  } else if (import_saved_model_v1) {
+    std::unordered_set<std::string> tags =
+        absl::StrSplit(saved_model_tags, ',');
+    mlir::MLIRContext context;
+
+    auto module =
+        tensorflow::SavedModelV1ToMlirImport(input_filename, tags, &context);
+    if (!module) return 1;
+
     module->print(output->os());
   } else {
     auto input = mlir::openInputFile(input_filename, &error_message);
diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD
index 17629440871..451f37211e8 100644
--- a/tensorflow/compiler/mlir/xla/BUILD
+++ b/tensorflow/compiler/mlir/xla/BUILD
@@ -1,4 +1,4 @@
-load("@local_config_mlir//:tblgen.bzl", "gentbl")
+load("//third_party/mlir:tblgen.bzl", "gentbl")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_native_cc_binary")
 
 package(
@@ -8,7 +8,7 @@ package(
 
 package_group(
     name = "friends",
-    includes = ["@local_config_mlir//:subpackages"],
+    includes = ["//third_party/mlir:subpackages"],
     packages = [
         "//babelfish/device/...",
         "//learning/brain/experimental/mlir/...",
@@ -32,7 +32,7 @@ filegroup(
         "ir/hlo_ops_base.td",
         "ir/hlo_utils.td",
         "ir/lhlo_ops.td",
-        "@local_config_mlir//:OpBaseTdFiles",
+        "@llvm-project//mlir:OpBaseTdFiles",
     ],
 )
 
@@ -44,7 +44,7 @@ gentbl(
         ("-gen-struct-attr-decls", "ir/hlo_structs.h.inc"),
         ("-gen-struct-attr-defs", "ir/hlo_structs.cc.inc"),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/hlo_ops.td",
     td_includes = ["ir/hlo_utils.td"],
     td_srcs = [":hlo_ops_td_files"],
@@ -56,7 +56,7 @@ gentbl(
         ("-gen-op-decls", "ir/hlo_ops_base.h.inc"),
         ("-gen-op-defs", "ir/hlo_ops_base.cc.inc"),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/hlo_ops_base.td",
     td_srcs = [":hlo_ops_td_files"],
 )
@@ -67,7 +67,7 @@ gentbl(
         ("-gen-op-decls", "ir/lhlo_ops.h.inc"),
         ("-gen-op-defs", "ir/lhlo_ops.cc.inc"),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "ir/lhlo_ops.td",
     td_srcs = [":hlo_ops_td_files"],
 )
@@ -77,12 +77,12 @@ gentbl(
     tbl_outs = [
         ("-gen-rewriters", "transforms/generated_legalize_tf.inc"),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/legalize_tf_patterns.td",
     td_srcs = [
         ":hlo_ops_td_files",
-        "@llvm//:support",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:StdOpsTdFiles",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files",
     ],
 )
@@ -95,7 +95,7 @@ gentbl(
             "transforms/generated_canonicalize.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/canonicalize.td",
     td_srcs = [
         ":hlo_ops_td_files",
@@ -114,15 +114,16 @@ cc_library(
         ":hlo",
         "//tensorflow/compiler/mlir/tensorflow",
         "//tensorflow/compiler/mlir/tensorflow:lower_tf_lib",
+        "//tensorflow/compiler/xla/client:padding",
         "//tensorflow/core:framework",
         "//tensorflow/core/kernels:conv_grad_shape_utils",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
@@ -135,11 +136,11 @@ cc_library(
         ":lhlo",
         "//tensorflow/compiler/xla:status",
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
-        "@local_config_mlir//:AffineOps",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:AffineOps",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
     ],
     alwayslink = 1,
 )
@@ -151,13 +152,13 @@ cc_library(
     deps = [
         ":lhlo",
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Linalg",
-        "@local_config_mlir//:LinalgDialectRegistration",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Linalg",
+        "@llvm-project//mlir:LinalgDialectRegistration",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
@@ -169,14 +170,14 @@ cc_library(
     deps = [
         ":lhlo",
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
-        "@local_config_mlir//:GPUDialect",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Linalg",
-        "@local_config_mlir//:LoopOps",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:GPUDialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Linalg",
+        "@llvm-project//mlir:LoopOps",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
@@ -187,9 +188,9 @@ cc_library(
     deps = [
         ":lhlo",
         "@com_google_absl//absl/memory",
-        "@local_config_mlir//:Linalg",
-        "@local_config_mlir//:LinalgDialectRegistration",
-        "@local_config_mlir//:Pass",
+        "@llvm-project//mlir:Linalg",
+        "@llvm-project//mlir:LinalgDialectRegistration",
+        "@llvm-project//mlir:Pass",
     ],
     alwayslink = 1,
 )
@@ -202,10 +203,10 @@ cc_library(
         ":lhlo",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
@@ -215,11 +216,11 @@ gentbl(
     tbl_outs = [
         ("-gen-rewriters", "transforms/generated_legalize_to_standard.inc"),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/legalize_to_standard_patterns.td",
     td_srcs = [
         ":hlo_ops_td_files",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//mlir:StdOpsTdFiles",
     ],
 )
 
@@ -230,12 +231,12 @@ cc_library(
     ],
     deps = [
         ":hlo",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -246,11 +247,11 @@ cc_library(
     deps = [
         ":hlo",
         ":xla_legalize_to_standard_inc_gen",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
     ],
     alwayslink = 1,
 )
@@ -260,12 +261,12 @@ gentbl(
     tbl_outs = [
         ("-gen-rewriters", "transforms/generated_lower_complex.inc"),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "transforms/lower_complex_patterns.td",
     td_srcs = [
         ":hlo_ops_td_files",
-        "@llvm//:support",
-        "@local_config_mlir//:StdOpsTdFiles",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:StdOpsTdFiles",
     ],
 )
 
@@ -279,13 +280,13 @@ cc_library(
     deps = [
         ":hlo",
         ":xla_dialect_registration",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
@@ -310,13 +311,13 @@ cc_library(
         ":hlo_ops_base_inc_gen",
         ":hlo_ops_inc_gen",
         ":xla_canonicalize_inc_gen",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TransformUtils",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TransformUtils",
     ],
     alwayslink = 1,
 )
@@ -337,13 +338,13 @@ cc_library(
     deps = [
         ":hlo_ops_base_inc_gen",
         ":lhlo_ops_inc_gen",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TransformUtils",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TransformUtils",
     ],
     alwayslink = 1,
 )
@@ -357,7 +358,7 @@ cc_library(
         ":convert_op_folder",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla/service:hlo",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
     alwayslink = 1,
 )
@@ -369,7 +370,7 @@ cc_library(
     deps = [
         ":hlo",
         ":lhlo",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
     alwayslink = 1,
 )
@@ -388,9 +389,9 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:types",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -405,7 +406,7 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test_main",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -430,13 +431,13 @@ cc_library(
         "//tensorflow/compiler/xla/client/lib:slicing",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/stream_executor/lib",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:TransformUtils",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:TransformUtils",
+        "@llvm-project//mlir:Transforms",
     ],
 )
 
@@ -474,9 +475,9 @@ cc_library(
         "//tensorflow/compiler/xla:xla_proto_cc",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/core:lib",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
     ],
 )
 
@@ -493,9 +494,9 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_proto_cc",
         "//tensorflow/core:lib",
         "@com_google_protobuf//:protobuf_headers",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Translation",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Translation",
     ],
     alwayslink = 1,
 )
@@ -504,24 +505,24 @@ tf_native_cc_binary(
     name = "operator_writer_gen",
     srcs = ["operator_writer_gen.cc"],
     deps = [
-        "@llvm//:support",
-        "@llvm//:tablegen",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TableGen",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:tablegen",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TableGen",
     ],
 )
 
 genrule(
     name = "operator_writer_inc",
     srcs = [
-        "@local_config_mlir//:include/mlir/IR/OpBase.td",
+        "@llvm-project//mlir:include/mlir/IR/OpBase.td",
         ":ir/hlo_ops.td",
         ":ir/hlo_ops_base.td",
         ":ir/hlo_utils.td",
     ],
     outs = ["operator_writers.inc"],
     cmd = ("$(location :operator_writer_gen) " +
-           "-I external/local_config_mlir/include " +
+           "-I external/llvm-project/mlir/include " +
            "$(location //tensorflow/compiler/mlir/xla:ir/hlo_ops.td) " +
            " -o $@"),
     tools = [":operator_writer_gen"],
@@ -532,6 +533,6 @@ cc_library(
     srcs = ["convert_op_folder.cc"],
     hdrs = ["convert_op_folder.h"],
     deps = [
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
 )
diff --git a/tensorflow/compiler/mlir/xla/convert_op_folder.cc b/tensorflow/compiler/mlir/xla/convert_op_folder.cc
index 8245b4a0585..dfd7cb39bf9 100644
--- a/tensorflow/compiler/mlir/xla/convert_op_folder.cc
+++ b/tensorflow/compiler/mlir/xla/convert_op_folder.cc
@@ -17,9 +17,9 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 
 namespace mlir {
 namespace xla {
diff --git a/tensorflow/compiler/mlir/xla/convert_op_folder.h b/tensorflow/compiler/mlir/xla/convert_op_folder.h
index 63ac0e61df5..37a4db0227f 100644
--- a/tensorflow/compiler/mlir/xla/convert_op_folder.h
+++ b/tensorflow/compiler/mlir/xla/convert_op_folder.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
 #define TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 
 namespace mlir {
 namespace xla {
diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
index fe468e26ff6..5300824aabc 100644
--- a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
@@ -19,14 +19,14 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Region.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Region.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/xla/protobuf_util.h"
@@ -264,6 +264,12 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
       attributes.push_back(ConvertComparisonDirection(instruction));
       MakeAndReturn(CompareOp);
     }
+    case HloOpcode::kCholesky: {
+      attributes.push_back(builder_->getNamedAttr(
+          "lower",
+          builder_->getBoolAttr(instruction->cholesky_options().lower())));
+      MakeAndReturn(CholeskyOp);
+    }
     case HloOpcode::kGather: {
       auto gather_instruction = static_cast<HloGatherInstruction*>(instruction);
       attributes.push_back(ConvertGatherDimensionNumbers(
@@ -284,9 +290,21 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
       return func_builder
           ->create<mlir::xla_hlo::DynamicUpdateSliceOp>(
               loc, result_type, operands[0], operands[1],
-              llvm::ArrayRef<Value*>(operands.begin() + 2, operands.end()))
+              llvm::ArrayRef<Value>(operands.begin() + 2, operands.end()))
           .getOperation();
     }
+    case HloOpcode::kInfeed: {
+      attributes.push_back(builder_->getNamedAttr(
+          "infeed_config", mlir::StringAttr::get(instruction->infeed_config(),
+                                                 builder_->getContext())));
+      MakeAndReturn(InfeedOp);
+    }
+    case HloOpcode::kOutfeed: {
+      attributes.push_back(builder_->getNamedAttr(
+          "outfeed_config", mlir::StringAttr::get(instruction->outfeed_config(),
+                                                  builder_->getContext())));
+      MakeAndReturn(OutfeedOp);
+    }
     case HloOpcode::kPad: {
       const auto& padding_config = instruction->padding_config();
       llvm::SmallVector<int64_t, 4> edge_padding_low;
@@ -309,6 +327,12 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
                                          Convert(interior_padding))
           .getOperation();
     }
+    case HloOpcode::kSetDimensionSize: {
+      attributes.push_back(builder_->getNamedAttr(
+          "dimension", builder_->getIntegerAttr(builder_->getIntegerType(32),
+                                                instruction->dimension())));
+      MakeAndReturn(SetDimensionSizeOp);
+    }
     case HloOpcode::kSlice: {
       return func_builder
           ->create<mlir::xla_hlo::SliceOp>(
@@ -359,9 +383,31 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
               ConvertDimensions(instruction->dimensions()))
           .getOperation();
     }
+    case HloOpcode::kRng: {
+      auto shape = func_builder->create<mlir::ConstantOp>(
+          loc, Convert(result_type.cast<RankedTensorType>().getShape()));
+      switch (instruction->random_distribution()) {
+        case xla::RNG_UNIFORM:
+          return func_builder
+              ->create<mlir::xla_hlo::RngUniformOp>(
+                  loc, result_type, operands[0], operands[1], shape)
+              .getOperation();
+
+        case xla::RNG_NORMAL:
+          return func_builder
+              ->create<mlir::xla_hlo::RngNormalOp>(
+                  loc, result_type, operands[0], operands[1], shape)
+              .getOperation();
+
+        default:
+          return tensorflow::errors::InvalidArgument(absl::StrCat(
+              "Unsupported distribution: ",
+              RandomDistributionToString(instruction->random_distribution())));
+      }
+    }
     case HloOpcode::kWhile: {
       auto op = func_builder->create<mlir::xla_hlo::WhileOp>(
-          loc, operands[0]->getType(), operands[0]);
+          loc, operands[0].getType(), operands[0]);
       TF_RETURN_IF_ERROR(
           ImportComputation(instruction->while_condition(), &op.cond()));
       TF_RETURN_IF_ERROR(
@@ -461,10 +507,12 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
       NoAttributeCase(kPower, PowOp);
       NoAttributeCase(kReal, RealOp);
       NoAttributeCase(kRemainder, RemOp);
+      NoAttributeCase(kReplicaId, ReplicaIdOp);
       // The dimensions attribute is not present on the HLO Reshape instruction.
       // If dimensions are non-default, the XLA builder implements it as a
       // separate transpose.
       NoAttributeCase(kReshape, ReshapeOp);
+      NoAttributeCase(kRoundNearestAfz, RoundOp);
       NoAttributeCase(kRsqrt, RsqrtOp);
       NoAttributeCase(kSelect, SelectOp);
       NoAttributeCase(kShiftLeft, ShiftLeftOp);
@@ -500,9 +548,9 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
   }
 }
 
-StatusOr<llvm::SmallVector<mlir::Value*, 4>> HloFunctionImporter::GetOperands(
+StatusOr<llvm::SmallVector<mlir::Value, 4>> HloFunctionImporter::GetOperands(
     HloInstruction* instruction) {
-  llvm::SmallVector<mlir::Value*, 4> operands;
+  llvm::SmallVector<mlir::Value, 4> operands;
   for (const auto& operand : instruction->operands()) {
     auto input_it = instruction_value_map_.find(operand);
     if (input_it == instruction_value_map_.end()) {
@@ -590,8 +638,7 @@ tensorflow::Status HloFunctionImporter::GetMlirTypes(
   return tensorflow::Status::OK();
 }
 
-StatusOr<Value*> HloFunctionImporter::GetMlirValue(
-    HloInstruction* instruction) {
+StatusOr<Value> HloFunctionImporter::GetMlirValue(HloInstruction* instruction) {
   auto lookup = instruction_value_map_.find(instruction);
   if (lookup != instruction_value_map_.end()) {
     return lookup->second;
diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.h b/tensorflow/compiler/mlir/xla/hlo_function_importer.h
index bd36c9b2b54..9085e23ffd8 100644
--- a/tensorflow/compiler/mlir/xla/hlo_function_importer.h
+++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.h
@@ -18,12 +18,12 @@ limitations under the License.
 
 #include <unordered_map>
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/xla/status.h"
@@ -71,7 +71,7 @@ class HloFunctionImporter {
                                                mlir::OpBuilder* func_builder);
 
   // Gets the MLIR operand values from an HLO Instruction.
-  StatusOr<llvm::SmallVector<mlir::Value*, 4>> GetOperands(
+  StatusOr<llvm::SmallVector<mlir::Value, 4>> GetOperands(
       xla::HloInstruction* instruction);
 
   // Converts xla Tensor type to the corresponding MLIR type.
@@ -89,7 +89,7 @@ class HloFunctionImporter {
                       llvm::SmallVectorImpl<mlir::Type>* types);
 
   // Returns the Mlir Value for the corresponding HloInstruction.
-  StatusOr<mlir::Value*> GetMlirValue(xla::HloInstruction* instruction);
+  StatusOr<mlir::Value> GetMlirValue(xla::HloInstruction* instruction);
 
   // Converts an XLA PrecisionConfig to the corresponding MLIR attribute.
   mlir::NamedAttribute ConvertPrecisionConfig(xla::HloInstruction* instruction);
@@ -129,7 +129,7 @@ class HloFunctionImporter {
   std::unordered_map<xla::HloComputation*, mlir::FuncOp>* function_map_;
 
   // Mapping from HloInstructions to the associative MLIR values.
-  std::unordered_map<xla::HloInstruction*, mlir::Value*> instruction_value_map_;
+  std::unordered_map<xla::HloInstruction*, mlir::Value> instruction_value_map_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/mlir/xla/hlo_module_importer.cc b/tensorflow/compiler/mlir/xla/hlo_module_importer.cc
index 60a2b93d907..f8eabeb046d 100644
--- a/tensorflow/compiler/mlir/xla/hlo_module_importer.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_module_importer.cc
@@ -15,12 +15,12 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/xla/hlo_module_importer.h"
 
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
diff --git a/tensorflow/compiler/mlir/xla/hlo_module_importer.h b/tensorflow/compiler/mlir/xla/hlo_module_importer.h
index 5e8005f9489..c3e8c04cdcd 100644
--- a/tensorflow/compiler/mlir/xla/hlo_module_importer.h
+++ b/tensorflow/compiler/mlir/xla/hlo_module_importer.h
@@ -18,10 +18,10 @@ limitations under the License.
 
 #include <unordered_map>
 
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/xla/status.h"
diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc
index 7fa9dd71345..bfa57d97336 100644
--- a/tensorflow/compiler/mlir/xla/hlo_utils.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc
@@ -17,9 +17,9 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/xla/hlo_utils.h"
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/literal.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.h b/tensorflow/compiler/mlir/xla/hlo_utils.h
index b267b39ce5a..74bd4391395 100644
--- a/tensorflow/compiler/mlir/xla/hlo_utils.h
+++ b/tensorflow/compiler/mlir/xla/hlo_utils.h
@@ -18,9 +18,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_XLA_HLO_UTILS_H_
 #define TENSORFLOW_COMPILER_MLIR_XLA_HLO_UTILS_H_
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
index 41e561fd731..be0cd1bdc53 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
@@ -29,22 +29,22 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
-#include "mlir/Transforms/InliningUtils.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
+#include "mlir/Transforms/InliningUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h.inc"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_utils.h"
@@ -175,7 +175,7 @@ void ConstOp::build(Builder* builder, OperationState& result, Attribute value) {
 //===----------------------------------------------------------------------===//
 
 OpFoldResult IotaOp::fold(ArrayRef<Attribute> operands) {
-  const auto output_type = getResult()->getType().cast<ShapedType>();
+  const auto output_type = getResult().getType().cast<ShapedType>();
   const auto output_size = output_type.getNumElements();
   const auto dimension = iota_dimension().getSExtValue();
   const auto max_dim_size = output_type.getDimSize(dimension);
@@ -203,16 +203,15 @@ OpFoldResult IotaOp::fold(ArrayRef<Attribute> operands) {
 // AbsOp
 //===----------------------------------------------------------------------===//
 
-void AbsOp::build(Builder* builder, OperationState& result, Value* operand) {
-  auto shaped_type = operand->getType().cast<ShapedType>();
+void AbsOp::build(Builder* builder, OperationState& result, Value operand) {
+  auto shaped_type = operand.getType().cast<ShapedType>();
   Type new_type;
   if (!shaped_type.getElementType().isa<ComplexType>()) {
-    new_type = operand->getType();
+    new_type = operand.getType();
   } else if (shaped_type.hasRank()) {
-    new_type =
-        RankedTensorType::get(shaped_type.getShape(), operand->getType());
+    new_type = RankedTensorType::get(shaped_type.getShape(), operand.getType());
   } else {
-    new_type = UnrankedTensorType::get(operand->getType());
+    new_type = UnrankedTensorType::get(operand.getType());
   }
 
   return AbsOp::build(builder, result, new_type, operand);
@@ -222,10 +221,10 @@ void AbsOp::build(Builder* builder, OperationState& result, Value* operand) {
 // ConvertOp
 //===----------------------------------------------------------------------===//
 
-void ConvertOp::build(Builder* builder, OperationState& result, Value* operand,
+void ConvertOp::build(Builder* builder, OperationState& result, Value operand,
                       Type result_element_ty) {
   Type result_ty;
-  Type operand_ty = operand->getType();
+  Type operand_ty = operand.getType();
   if (auto ranked_ty = operand_ty.dyn_cast<RankedTensorType>()) {
     result_ty = RankedTensorType::get(ranked_ty.getShape(), result_element_ty);
   } else {
@@ -235,7 +234,7 @@ void ConvertOp::build(Builder* builder, OperationState& result, Value* operand,
 }
 
 OpFoldResult ConvertOp::fold(ArrayRef<Attribute> operands) {
-  if (getOperand()->getType() == getResult()->getType()) return getOperand();
+  if (getOperand().getType() == getResult().getType()) return getOperand();
 
   // If the operand is constant, we can do the conversion now.
   if (auto elementsAttr = operands.front().dyn_cast_or_null<ElementsAttr>()) {
@@ -252,7 +251,7 @@ OpFoldResult ConvertOp::fold(ArrayRef<Attribute> operands) {
 
 static LogicalResult Verify(GetTupleElementOp op) {
   auto indexVal = op.index().getZExtValue();
-  auto operandType = op.getOperand()->getType().cast<TupleType>();
+  auto operandType = op.getOperand().getType().cast<TupleType>();
   if (indexVal >= operandType.size()) {
     return op.emitOpError(
         llvm::formatv("index {0} is out of bounds of operand with size {1}",
@@ -269,7 +268,7 @@ static LogicalResult Verify(GetTupleElementOp op) {
 
 OpFoldResult GetTupleElementOp::fold(ArrayRef<Attribute> operands) {
   if (auto tupleOp =
-          dyn_cast_or_null<xla_hlo::TupleOp>(getOperand()->getDefiningOp())) {
+          dyn_cast_or_null<xla_hlo::TupleOp>(getOperand().getDefiningOp())) {
     return tupleOp.getOperand(index().getLimitedValue());
   }
 
@@ -291,6 +290,25 @@ static LogicalResult Verify(TupleOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// AllToAllOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(AllToAllOp op) {
+  // If operand is ranked, size of split dimension should be a multiple of split
+  // count.
+  auto type = op.getOperand().getType().dyn_cast<RankedTensorType>();
+  if (!type) return success();
+  auto split_dim_size = type.getDimSize(op.split_dimension().getSExtValue());
+  auto split_count = op.split_count().getSExtValue();
+  if (split_dim_size % split_count != 0) {
+    return op.emitError() << "split dimension has size " << split_dim_size
+                          << ", expected to be a multiple of split_count "
+                          << split_count;
+  }
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // BroadcastOp
 //===----------------------------------------------------------------------===//
@@ -305,9 +323,9 @@ static LogicalResult Verify(BroadcastOp op) {
         "broadcast_sizes has rank {0} instead of rank 1", sizesRank));
   }
 
-  auto resultType = op.getResult()->getType().cast<RankedTensorType>();
+  auto resultType = op.getResult().getType().cast<RankedTensorType>();
   auto resultRank = resultType.getRank();
-  auto operandType = op.operand()->getType().cast<RankedTensorType>();
+  auto operandType = op.operand().getType().cast<RankedTensorType>();
   auto operandRank = operandType.getRank();
   auto sizesSize = sizesType.getNumElements();
   auto expectedRank = operandRank + sizesSize;
@@ -341,7 +359,7 @@ static LogicalResult Verify(BroadcastOp op) {
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(BroadcastInDimOp op) {
-  auto operandType = op.operand()->getType().cast<RankedTensorType>();
+  auto operandType = op.operand().getType().cast<RankedTensorType>();
   auto operandRank = operandType.getRank();
   if (!op.broadcast_dimensions()) {
     if (operandRank == 0) {
@@ -368,7 +386,7 @@ static LogicalResult Verify(BroadcastInDimOp op) {
         dimensionsSize, operandRank));
   }
 
-  auto resultType = op.getResult()->getType().cast<RankedTensorType>();
+  auto resultType = op.getResult().getType().cast<RankedTensorType>();
   auto resultRank = resultType.getRank();
   if (resultRank < operandRank) {
     return op.emitOpError(
@@ -403,9 +421,9 @@ static LogicalResult Verify(BroadcastInDimOp op) {
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(ClampOp op) {
-  auto operandType = op.operand()->getType().cast<RankedTensorType>();
+  auto operandType = op.operand().getType().cast<RankedTensorType>();
   auto operandShape = operandType.getShape();
-  auto minType = op.min()->getType().cast<RankedTensorType>();
+  auto minType = op.min().getType().cast<RankedTensorType>();
 
   auto minShape = minType.getShape();
   if (minShape != operandShape && minType.getRank() != 0) {
@@ -415,7 +433,7 @@ static LogicalResult Verify(ClampOp op) {
         llvm::make_range(operandShape.begin(), operandShape.end())));
   }
 
-  auto maxType = op.max()->getType().cast<RankedTensorType>();
+  auto maxType = op.max().getType().cast<RankedTensorType>();
   auto maxShape = maxType.getShape();
   if (maxShape != operandShape && maxType.getRank() != 0) {
     return op.emitOpError(llvm::formatv(
@@ -431,9 +449,9 @@ static LogicalResult Verify(ClampOp op) {
 // ComplexOp
 //===----------------------------------------------------------------------===//
 
-void ComplexOp::build(Builder* builder, OperationState& state, Value* lhs,
-                      Value* rhs) {
-  auto type = lhs->getType();
+void ComplexOp::build(Builder* builder, OperationState& state, Value lhs,
+                      Value rhs) {
+  auto type = lhs.getType();
   auto element_ty = ComplexType::get(getElementTypeOrSelf(type));
   Type result_ty;
   if (auto ranked_type = type.dyn_cast<RankedTensorType>()) {
@@ -449,9 +467,9 @@ void ComplexOp::build(Builder* builder, OperationState& state, Value* lhs,
 
 OpFoldResult ComplexOp::fold(ArrayRef<Attribute> operands) {
   auto real_op =
-      dyn_cast_or_null<xla_hlo::RealOp>(getOperand(0)->getDefiningOp());
+      dyn_cast_or_null<xla_hlo::RealOp>(getOperand(0).getDefiningOp());
   auto imag_op =
-      dyn_cast_or_null<xla_hlo::ImagOp>(getOperand(1)->getDefiningOp());
+      dyn_cast_or_null<xla_hlo::ImagOp>(getOperand(1).getDefiningOp());
   if (real_op && imag_op && real_op.getOperand() == imag_op.getOperand()) {
     return real_op.getOperand();
   }
@@ -476,26 +494,26 @@ Type CreateRealType(Type type) {
 }
 }  // namespace
 
-void ImagOp::build(Builder* builder, OperationState& state, Value* val) {
-  build(builder, state, CreateRealType(val->getType()), val);
+void ImagOp::build(Builder* builder, OperationState& state, Value val) {
+  build(builder, state, CreateRealType(val.getType()), val);
 }
 
 OpFoldResult ImagOp::fold(ArrayRef<Attribute> operands) {
   if (auto complex_op =
-          dyn_cast_or_null<xla_hlo::ComplexOp>(getOperand()->getDefiningOp())) {
+          dyn_cast_or_null<xla_hlo::ComplexOp>(getOperand().getDefiningOp())) {
     return complex_op.getOperand(1);
   }
 
   return {};
 }
 
-void RealOp::build(Builder* builder, OperationState& state, Value* val) {
-  build(builder, state, CreateRealType(val->getType()), val);
+void RealOp::build(Builder* builder, OperationState& state, Value val) {
+  build(builder, state, CreateRealType(val.getType()), val);
 }
 
 OpFoldResult RealOp::fold(ArrayRef<Attribute> operands) {
   if (auto complex_op =
-          dyn_cast_or_null<xla_hlo::ComplexOp>(getOperand()->getDefiningOp())) {
+          dyn_cast_or_null<xla_hlo::ComplexOp>(getOperand().getDefiningOp())) {
     return complex_op.getOperand(0);
   }
 
@@ -512,12 +530,12 @@ OpFoldResult ConcatenateOp::fold(ArrayRef<Attribute> operands) {
 }
 
 static LogicalResult Verify(ConcatenateOp op) {
-  auto firstType = op.getOperand(0)->getType().cast<RankedTensorType>();
+  auto firstType = op.getOperand(0).getType().cast<RankedTensorType>();
 
   auto firstShape = firstType.getShape();
   int numOperands = op.getNumOperands();
   for (int i = 1; i < numOperands; i++) {
-    auto secondType = op.getOperand(i)->getType().cast<RankedTensorType>();
+    auto secondType = op.getOperand(i).getType().cast<RankedTensorType>();
 
     if (firstType.getRank() != secondType.getRank()) {
       return op.emitOpError(
@@ -552,18 +570,18 @@ void DynamicSliceOp::getCanonicalizationPatterns(
 //===----------------------------------------------------------------------===//
 
 OpFoldResult ReshapeOp::fold(ArrayRef<Attribute> operands) {
-  if (getOperand()->getType() == getType()) {
+  if (getOperand().getType() == getType()) {
     return getOperand();
   }
 
   if (auto prev_op =
-          dyn_cast_or_null<ReshapeOp>(getOperand()->getDefiningOp())) {
+          dyn_cast_or_null<ReshapeOp>(getOperand().getDefiningOp())) {
     setOperand(prev_op.getOperand());
     return getResult();
   }
 
   if (auto elements = operands.front().dyn_cast_or_null<DenseElementsAttr>()) {
-    return elements.reshape(getResult()->getType().cast<ShapedType>());
+    return elements.reshape(getResult().getType().cast<ShapedType>());
   }
 
   return {};
@@ -611,9 +629,9 @@ void ReduceOp::build(Builder* builder, OperationState& state,
   SmallVector<Type, 1> result_ty;
   result_ty.reserve(operands.size());
 
-  for (Value* operand : operands) {
+  for (Value operand : operands) {
     result_ty.push_back(
-        GetReduceResultType(operand->getType(), dimensions, builder));
+        GetReduceResultType(operand.getType(), dimensions, builder));
   }
   build(builder, state, result_ty, operands, init_values, dimensions);
 }
@@ -622,7 +640,7 @@ LogicalResult ReduceOp::fold(ArrayRef<Attribute> operands,
                              SmallVectorImpl<OpFoldResult>& results) {
   // No dimensions to reduce.
   if (dimensions().getNumElements() == 0) {
-    for (Value* input : this->operands()) {
+    for (Value input : this->operands()) {
       results.push_back(input);
     }
     return success();
@@ -645,8 +663,8 @@ static LogicalResult Verify(SelectOp op) {
 //===----------------------------------------------------------------------===//
 
 static LogicalResult Verify(PadOp op) {
-  auto input_type = op.operand()->getType().cast<RankedTensorType>();
-  auto pad_type = op.padding_value()->getType().cast<RankedTensorType>();
+  auto input_type = op.operand().getType().cast<RankedTensorType>();
+  auto pad_type = op.padding_value().getType().cast<RankedTensorType>();
 
   if (pad_type.getRank() != 0) {
     return op.emitOpError(
@@ -678,7 +696,7 @@ static LogicalResult Verify(PadOp op) {
 
   auto input_shape = input_type.getShape();
   auto output_shape =
-      op.getResult()->getType().cast<RankedTensorType>().getShape();
+      op.getResult().getType().cast<RankedTensorType>().getShape();
   if (input_shape.size() != output_shape.size()) {
     return op.emitOpError(
         llvm::formatv("operand rank ({0}) and result rank({0}) should match",
@@ -757,15 +775,15 @@ static Type GetBroadcastType(Builder* builder, Type x, Type y,
 }
 }  // namespace
 
-#define BINARY_BUILDER(Op)                                                    \
-  void Op::build(Builder* builder, OperationState& result, Value* left,       \
-                 Value* right, DenseIntElementsAttr broadcast_dimensions) {   \
-    auto type = GetBroadcastType(builder, left->getType().cast<ShapedType>(), \
-                                 right->getType().cast<ShapedType>(),         \
-                                 getElementTypeOrSelf(right->getType()),      \
-                                 broadcast_dimensions);                       \
-    return Op::build(builder, result, type, left, right,                      \
-                     broadcast_dimensions);                                   \
+#define BINARY_BUILDER(Op)                                                   \
+  void Op::build(Builder* builder, OperationState& result, Value left,       \
+                 Value right, DenseIntElementsAttr broadcast_dimensions) {   \
+    auto type = GetBroadcastType(builder, left.getType().cast<ShapedType>(), \
+                                 right.getType().cast<ShapedType>(),         \
+                                 getElementTypeOrSelf(right.getType()),      \
+                                 broadcast_dimensions);                      \
+    return Op::build(builder, result, type, left, right,                     \
+                     broadcast_dimensions);                                  \
   }
 
 BINARY_BUILDER(AddOp);
@@ -790,7 +808,7 @@ BINARY_BUILDER(XorOp);
 // SliceOp
 //===----------------------------------------------------------------------===//
 
-void SliceOp::build(Builder* builder, OperationState& result, Value* operand,
+void SliceOp::build(Builder* builder, OperationState& result, Value operand,
                     DenseIntElementsAttr start_indices,
                     DenseIntElementsAttr limit_indices,
                     DenseIntElementsAttr strides) {
@@ -811,11 +829,11 @@ static int64_t InferSliceDim(int64_t input_dim, int64_t start, int64_t end,
   return llvm::divideCeil(end - start, stride);
 }
 
-Type SliceOp::InferOutputTypes(Builder* builder, Value* operand,
+Type SliceOp::InferOutputTypes(Builder* builder, Value operand,
                                DenseIntElementsAttr start_indices,
                                DenseIntElementsAttr limit_indices,
                                DenseIntElementsAttr strides) {
-  Type ty = operand->getType();
+  Type ty = operand.getType();
   RankedTensorType ranked_ty = ty.dyn_cast<RankedTensorType>();
   if (!ranked_ty) return ty;
   int64_t rank = ranked_ty.getRank();
@@ -852,7 +870,7 @@ void SortOp::build(Builder* builder, OperationState& state, ValueRange operands,
 
   SmallVector<Type, 2> element_types;
   element_types.reserve(operands.size());
-  for (Value* operand : operands) element_types.push_back(operand->getType());
+  for (Value operand : operands) element_types.push_back(operand.getType());
   state.addTypes(builder->getTupleType(element_types));
 
   state.addRegion();
@@ -863,15 +881,14 @@ static LogicalResult Verify(SortOp op) {
   if (operands.empty()) return op.emitOpError("requires at least one input");
 
   // TODO(antiagainst): verify partionally dynamic shapes
-  if (llvm::all_of(operands, [](Value* operand) {
-        return operand->getType().cast<ShapedType>().hasRank();
+  if (llvm::all_of(operands, [](Value operand) {
+        return operand.getType().cast<ShapedType>().hasRank();
       })) {
     ArrayRef<int64_t> input_shape =
-        (*operands.begin())->getType().cast<ShapedType>().getShape();
+        (*operands.begin()).getType().cast<ShapedType>().getShape();
 
-    if (llvm::any_of(llvm::drop_begin(operands, 1), [&](Value* operand) {
-          return operand->getType().cast<ShapedType>().getShape() !=
-                 input_shape;
+    if (llvm::any_of(llvm::drop_begin(operands, 1), [&](Value operand) {
+          return operand.getType().cast<ShapedType>().getShape() != input_shape;
         }))
       return op.emitOpError("requires all inputs to have the same dimensions");
 
@@ -889,10 +906,10 @@ static LogicalResult Verify(SortOp op) {
   for (auto indexed_operand : llvm::enumerate(operands)) {
     int index = indexed_operand.index();
     Type element_type =
-        indexed_operand.value()->getType().cast<ShapedType>().getElementType();
+        indexed_operand.value().getType().cast<ShapedType>().getElementType();
     Type tensor_type = RankedTensorType::get({}, element_type);
     for (int i : {2 * index, 2 * index + 1}) {
-      Type arg_type = block.getArgument(i)->getType();
+      Type arg_type = block.getArgument(i).getType();
       if (arg_type != tensor_type)
         return op.emitOpError("comparator block argument #")
                << i << " should be of type " << tensor_type << " but got "
@@ -926,7 +943,7 @@ static LogicalResult Verify(TransposeOp op) {
   }
   auto permutationSize = permutationType.getNumElements();
 
-  auto operandType = op.operand()->getType().dyn_cast<RankedTensorType>();
+  auto operandType = op.operand().getType().dyn_cast<RankedTensorType>();
   if (operandType) {
     auto operandRank = operandType.getRank();
     if (operandRank != permutationSize) {
@@ -936,7 +953,7 @@ static LogicalResult Verify(TransposeOp op) {
     }
   }
 
-  auto resultType = op.getResult()->getType().dyn_cast<RankedTensorType>();
+  auto resultType = op.getResult().getType().dyn_cast<RankedTensorType>();
   if (resultType) {
     auto resultRank = resultType.getRank();
     if (resultRank != permutationSize) {
@@ -971,15 +988,15 @@ static LogicalResult Verify(TransposeOp op) {
 //===----------------------------------------------------------------------===//
 
 void GetTupleElementOp::build(Builder* builder, OperationState& result,
-                              Value* tuple, int32_t index) {
-  if (auto tuple_type = tuple->getType().dyn_cast<TupleType>()) {
+                              Value tuple, int32_t index) {
+  if (auto tuple_type = tuple.getType().dyn_cast<TupleType>()) {
     auto element_type = tuple_type.getType(index);
     build(builder, result, element_type, tuple,
           builder->getI32IntegerAttr(index));
     return;
   }
 
-  build(builder, result, tuple->getType(), tuple,
+  build(builder, result, tuple.getType(), tuple,
         builder->getI32IntegerAttr(index));
 }
 
@@ -992,7 +1009,7 @@ void TupleOp::build(Builder* builder, OperationState& result,
   SmallVector<Type, 4> types;
   types.reserve(values.size());
   for (auto val : values) {
-    types.push_back(val->getType());
+    types.push_back(val.getType());
   }
 
   build(builder, result, builder->getTupleType(types), values);
@@ -1011,10 +1028,10 @@ void UnaryEinsumOp::getCanonicalizationPatterns(
 // CompareOp
 //===----------------------------------------------------------------------===//
 
-void CompareOp::build(Builder* builder, OperationState& result, Value* lhs,
-                      Value* rhs, DenseIntElementsAttr broadcast_dimensions,
+void CompareOp::build(Builder* builder, OperationState& result, Value lhs,
+                      Value rhs, DenseIntElementsAttr broadcast_dimensions,
                       StringAttr comparison_direction) {
-  auto new_type = GetBroadcastType(builder, lhs->getType(), rhs->getType(),
+  auto new_type = GetBroadcastType(builder, lhs.getType(), rhs.getType(),
                                    builder->getI1Type(), broadcast_dimensions);
   build(builder, result, new_type, lhs, rhs, broadcast_dimensions,
         comparison_direction);
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.h b/tensorflow/compiler/mlir/xla/ir/hlo_ops.h
index 9610a787b7d..d0bc9619db9 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.h
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.h
@@ -19,16 +19,16 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_OPS_H_
 
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/DialectImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/DialectImplementation.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
 
 namespace mlir {
 class OpBuilder;
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
index 6eeb32e804c..5c30ff8f134 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
@@ -56,7 +56,7 @@ def HLO_Tensor : TensorOf<[AnyFloat, AnyInteger, AnyComplex]>;
 
 def HLO_ComplexTensor : TensorOf<[AnyComplex]>;
 
-def HLO_Tuple : NestedTupleOf<[HLO_Tensor]>;
+def HLO_Tuple : NestedTupleOf<[HLO_Tensor, HLO_Token]>;
 
 def HLO_TensorOrTuple : AnyTypeOf<[HLO_Tensor, HLO_Tuple]>;
 
@@ -76,6 +76,9 @@ def HLO_FpOrComplexTensor : TensorOf<[AnyFloat, AnyComplex]>;
 // Any int, floating-point or complex tensor types
 def HLO_IntFpOrComplexTensor : TensorOf<[HLO_Int, AnyFloat, AnyComplex]>;
 
+// Any pred, int or floating-point tensor types
+def HLO_PredIntOrFpTensor : TensorOf<[HLO_Pred, HLO_Int, AnyFloat]>;
+
 //===----------------------------------------------------------------------===//
 // XLA nullary op definitions.
 //===----------------------------------------------------------------------===//
@@ -128,7 +131,7 @@ class HLO_UnaryElementwiseOp<string mnemonic, list<OpTrait> traits,
 def HLO_AbsOp: HLO_UnaryElementwiseOp<"abs",
     [NoSideEffect, SameOperandsAndResultShape], HLO_Tensor>, BASE_HLO_AbsOp {
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *operand"
+    "Builder *builder, OperationState &result, Value operand"
   >];
 }
 
@@ -140,7 +143,7 @@ def HLO_ConvertOp : HLO_UnaryElementwiseOp<
     BASE_HLO_ConvertOp {
 
   let builders = [OpBuilder<
-    "Builder *, OperationState &tblgen_state, Value *operand, "
+    "Builder *, OperationState &tblgen_state, Value operand, "
     "Type result_element_ty"
   >];
 
@@ -149,6 +152,10 @@ def HLO_ConvertOp : HLO_UnaryElementwiseOp<
   let hasCustomHLOConverter = 1;
 }
 
+def HLO_ClzOp: HLO_UnaryElementwiseOp<"count_leading_zeros",
+    [NoSideEffect, SameOperandsAndResultType], HLO_IntTensor>,
+    BASE_HLO_ClzOp;
+
 def HLO_CosOp: HLO_UnaryElementwiseOp<"cos",
     [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
     BASE_HLO_CosOp;
@@ -191,6 +198,9 @@ def HLO_PopulationCountOp: HLO_UnaryElementwiseOp<"popcnt",
     [NoSideEffect, SameOperandsAndResultType], HLO_IntTensor>,
     BASE_HLO_PopulationCountOp;
 
+def HLO_RoundOp: HLO_UnaryElementwiseOp<"round",
+    [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_RoundOp;
+
 def HLO_RsqrtOp: HLO_UnaryElementwiseOp<"rsqrt",
     [NoSideEffect, SameOperandsAndResultType], HLO_FpOrComplexTensor>,
     BASE_HLO_RsqrtOp;
@@ -220,7 +230,7 @@ def HLO_ComplexOp: HLO_Op<"complex",
     [NoSideEffect, SameOperandsElementType, SameOperandsAndResultShape]>,
     BASE_HLO_ComplexOp {
   let builders = [OpBuilder<
-    "Builder *, OperationState &tblgen_state, Value *lhs, Value *rhs">];
+    "Builder *, OperationState &tblgen_state, Value lhs, Value rhs">];
 
   let arguments = (ins HLO_FpTensor:$lhs, HLO_FpTensor:$rhs);
   let results = (outs HLO_ComplexTensor);
@@ -230,7 +240,7 @@ def HLO_ComplexOp: HLO_Op<"complex",
 def HLO_ImagOp: HLO_Op<
     "imag", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_ImagOp {
   let builders = [OpBuilder<
-    "Builder *, OperationState &tblgen_state, Value *val">];
+    "Builder *, OperationState &tblgen_state, Value val">];
 
   let arguments = (ins HLO_ComplexTensor);
   let results = (outs HLO_FpTensor);
@@ -240,7 +250,7 @@ def HLO_ImagOp: HLO_Op<
 def HLO_RealOp: HLO_Op<
     "real", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_RealOp {
   let builders = [OpBuilder<
-    "Builder *, OperationState &tblgen_state, Value *val">];
+    "Builder *, OperationState &tblgen_state, Value val">];
 
   let arguments = (ins HLO_ComplexTensor);
   let results = (outs HLO_FpTensor);
@@ -261,7 +271,7 @@ class HLO_BinaryElementwiseOp<string mnemonic, list<OpTrait> traits> :
   );
 
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *left, Value* right, "
+    "Builder *builder, OperationState &result, Value left, Value  right, "
     "DenseIntElementsAttr broadcast_dimensions"
   >];
 
@@ -324,6 +334,101 @@ def HLO_AndOp: HLO_BinaryLogicalElementwiseOp<"and">, BASE_HLO_AndOp;
 def HLO_OrOp: HLO_BinaryLogicalElementwiseOp<"or">, BASE_HLO_OrOp;
 def HLO_XorOp : HLO_BinaryLogicalElementwiseOp<"xor">, BASE_HLO_XorOp;
 
+//===----------------------------------------------------------------------===//
+// XLA communication op definitions.
+//===----------------------------------------------------------------------===//
+
+// Represents a unique identifier for each Send/Recv instruction pair or
+// optionally for collective instructions (AllReduce, CollectivePermute,
+// AllToAll). Non-positive channel_id handle is equivalent to no channel id.
+def ChannelHandle : StructAttr<"ChannelHandle", HLO_Dialect, [
+                StructFieldAttr<"handle", I64Attr>,
+                StructFieldAttr<"type", I64Attr>]> {
+  let description = "two 64-bit integers 'handle' and 'type'";
+}
+
+// InfeedOp corresponds to 'InfeedWithToken' xla client API and not 'Infeed'.
+// InfeedWithToken allows ordering of infeed HLO instructions using tokens.
+def HLO_InfeedOp : HLO_Op<"infeed", []> {
+
+  string summary = "Infeed operator";
+
+  string description = [{
+    Reads a single data item from the implicit Infeed streaming interface of
+    the device, interpreting the data as the given shape, and returns a XlaOp
+    of the data. Multiple Infeed operations are allowed in a computation, but
+    there must be a total order among the Infeed operations.
+
+    See https://www.tensorflow.org/xla/operation_semantics#infeed.
+  }];
+
+  let arguments = (ins
+    HLO_Token:$token,
+    DefaultValuedAttr<StrAttr, "">:$infeed_config
+  );
+  let results = (outs HLO_Tuple);
+  let hasCustomHLOConverter = 1;
+}
+
+// OutfeedOp corresponds to 'OutfeedWithToken' xla client API and not 'Outfeed'.
+// OutfeedWithToken allows ordering of outfeed HLO instructions using tokens.
+def HLO_OutfeedOp : HLO_Op<"outfeed", []> {
+
+  string summary = "Outfeed operator";
+
+  string description = [{
+    Generates outgoing data transfers for the given data. It takes data and a
+    token type operand and produces a token type value. Tokens are used for
+    ordering side-effecting operations.
+
+    See https://www.tensorflow.org/xla/operation_semantics#outfeed.
+  }];
+
+  let arguments = (ins
+    HLO_TensorOrTuple:$operand,
+    HLO_Token:$token,
+    DefaultValuedAttr<StrAttr, "">:$outfeed_config
+  );
+  let results = (outs HLO_Token);
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_SendOp : HLO_Op<"send", []> {
+
+  string summary = "Send operator";
+
+  string description = [{
+    Sends the given operand data to a Recv instruction in another computation
+    that shares the same channel handle. Does not return any data. Similar to
+    the Recv operation, Send operation represents synchronous communication,
+    and is internally decomposed into 2 HLO instructions (Send and SendDone) to
+    enable asynchronous data transfers.
+
+    See https://www.tensorflow.org/xla/operation_semantics#send.
+  }];
+
+  let arguments = (ins
+    HLO_TensorOrTuple:$operand,
+    HLO_Token:$token,
+    ChannelHandle:$channel_id,
+    DefaultValuedAttr<BoolAttr, "false">:$is_host_transfer
+  );
+
+  let results = (outs HLO_Token);
+  let hasCustomHLOConverter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// XLA parallelism related op definitions.
+//===----------------------------------------------------------------------===//
+
+def HLO_ReplicaIdOp : HLO_Op<"replica_id", [NoSideEffect]>,
+      BASE_HLO_ReplicaIdOp {
+  // TODO(prakalps): The output should unsigned 32-bit integer but mlir does
+  // not differentiate between signed and unsigned int.
+  let results = (outs I32Tensor);
+}
+
 //===----------------------------------------------------------------------===//
 // XLA control flow op definitions.
 //===----------------------------------------------------------------------===//
@@ -343,7 +448,6 @@ def HLO_AfterAllOp : HLO_Op<"after_all", []> {
 
   let arguments = (ins Variadic<HLO_Token>:$operands);
   let results = (outs HLO_Token);
-  let hasCustomHLOConverter = 1;
 }
 
 def HLO_ConditionalOp: HLO_Op<"conditional", [NoSideEffect]> {
@@ -390,15 +494,6 @@ def HLO_WhileOp: HLO_Op<"while", [NoSideEffect, SameOperandsAndResultType]> {
   let hasCustomHLOConverter = 1;
 }
 
-// Represents a unique identifier for each Send/Recv instruction pair or
-// optionally for collective instructions (AllReduce, CollectivePermute,
-// AllToAll). Non-positive channel_id handle is equivalent to no channel id.
-def ChannelHandle : StructAttr<"ChannelHandle", HLO_Dialect, [
-                StructFieldAttr<"handle", I64Attr>,
-                StructFieldAttr<"type", I64Attr>]> {
-  let description = "two 64-bit integers 'handle' and 'type'";
-}
-
 def HLO_AllReduceOp : HLO_Op<"all_reduce",
     [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_AllReduceOp {
 
@@ -413,6 +508,19 @@ def HLO_AllReduceOp : HLO_Op<"all_reduce",
   let hasCustomHLOConverter = 1;
 }
 
+def HLO_AllToAllOp : HLO_Op<"all_to_all",
+    [NoSideEffect, SameOperandsElementType, SameOperandsShape]>, BASE_HLO_AllToAllOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64Attr:$split_dimension,
+    I64Attr:$concat_dimension,
+    I64Attr:$split_count,
+    I64ElementsAttr:$replica_groups
+  );
+  let results = (outs HLO_Tensor);
+}
+
 def HLO_ReduceOp: HLO_Op<"reduce", [
       NoSideEffect,
       SameVariadicOperandSize,
@@ -458,7 +566,7 @@ def HLO_GetTupleElementOp: HLO_Op<"get_tuple_element", [NoSideEffect]>, BASE_HLO
 
   let builders = [OpBuilder<
                   "Builder *builder, OperationState &results, "
-                  "Value* value, int32_t index">];
+                  "Value  value, int32_t index">];
 }
 
 def HLO_TupleOp : HLO_Op<"tuple", [NoSideEffect]>, BASE_HLO_TupleOp {
@@ -469,8 +577,6 @@ def HLO_TupleOp : HLO_Op<"tuple", [NoSideEffect]>, BASE_HLO_TupleOp {
                   "Builder *builder, OperationState &results, "
                   "ValueRange values">];
 
-  // TupleOp has special conversion logic to HLO.
-  let hasCustomHLOConverter = 1;
 }
 
 def HLO_CompareOp: HLO_Op<"compare",
@@ -482,14 +588,14 @@ def HLO_CompareOp: HLO_Op<"compare",
     HLO_ComparisonDirectionAttr:$comparison_direction
   );
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *left, Value* right, "
+    "Builder *builder, OperationState &result, Value left, Value  right, "
     "DenseIntElementsAttr broadcast_dimensions, "
     "StringAttr comparison_direction"
   >];
   let results = (outs HLO_PredTensor);
 
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *lhs, Value *rhs, "
+    "Builder *builder, OperationState &result, Value lhs, Value rhs, "
     "DenseIntElementsAttr broadcast_dimensions, StringAttr comparison_direction"
   >];
 }
@@ -512,7 +618,7 @@ def HLO_SliceOp: HLO_Op<
   let results = (outs HLO_Tensor);
 
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *operand, "
+    "Builder *builder, OperationState &result, Value operand, "
     "DenseIntElementsAttr start_indices, DenseIntElementsAttr limit_indices, "
     "DenseIntElementsAttr strides"
   >];
@@ -520,7 +626,7 @@ def HLO_SliceOp: HLO_Op<
   let extraClassDeclaration = [{
     // Infers output type for given operand and attributes. Result type is
     // unranked if any of the attributes is illegal.
-    static Type InferOutputTypes(Builder *builder, Value *operand,
+    static Type InferOutputTypes(Builder *builder, Value operand,
                                  DenseIntElementsAttr start_indices,
                                  DenseIntElementsAttr limit_indices,
                                  DenseIntElementsAttr strides);
@@ -572,8 +678,8 @@ def HLO_BatchNormGradOp : HLO_Op<"batch_norm_grad", [NoSideEffect]>,
   let results = (outs HLO_Tuple);
 }
 
-def HLO_BatchNormInferenceOp : HLO_Op<"batch_norm_inference", [NoSideEffect]>,
-    BASE_HLO_BatchNormInferenceOp {
+def HLO_BatchNormInferenceOp : HLO_Op<"batch_norm_inference",
+    [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_BatchNormInferenceOp {
 
   let arguments = (ins
     HLO_Tensor:$operand,
@@ -634,6 +740,16 @@ def HLO_BroadcastInDimOp : HLO_Op<"broadcast_in_dim",
   let hasCustomHLOConverter = 1;
 }
 
+def HLO_CholeskyOp : HLO_Op<"cholesky",
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_CholeskyOp {
+  let arguments = (ins
+    HLO_FpOrComplexTensor:$a,
+    DefaultValuedAttr<BoolAttr, "false">:$lower
+  );
+
+  let results = (outs HLO_FpOrComplexTensor);
+}
+
 def HLO_ClampOp : HLO_Op<"clamp",
       [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_ClampOp {
   let arguments = (ins
@@ -657,8 +773,6 @@ def HLO_ConcatenateOp : HLO_Op<"concatenate",
 
   let hasFolder = 1;
 
-  // TODO(b/129422361) ConcatOp has special conversion logic to HLO.
-  let hasCustomHLOConverter = 1;
 }
 
 def HLO_CrossReplicaSumOp : HLO_Op<"cross-replica-sum",
@@ -708,8 +822,6 @@ def HLO_ConvOp : HLO_Op<"conv", [NoSideEffect]>, BASE_HLO_ConvOp {
 
   let results = (outs HLO_Tensor);
 
-  // TODO(b/129422361): Conv Op has special conversion logic to HLO.
-  let hasCustomHLOConverter = 1;
 }
 
 def HLO_CopyOp: HLO_Op<"copy", [NoSideEffect, SameOperandsAndResultType]> {
@@ -751,7 +863,9 @@ def HLO_DotGeneralOp: HLO_Op<"dot_general", [NoSideEffect]>, BASE_HLO_DotGeneral
   let results = (outs HLO_Tensor);
 }
 
-def BASE_EinsumOp {
+// Define Base Einsum op within the HLO dialect as these are client ops and
+// therefore this class is not common between HLO and LHLO ops.
+class BASE_EinsumOp {
   string summary = "Einsum operator";
 
   string description = [{
@@ -760,7 +874,7 @@ def BASE_EinsumOp {
   }];
 }
 
-def HLO_EinsumOp: HLO_Op<"einsum", [NoSideEffect]> {
+def HLO_EinsumOp: HLO_Op<"einsum", [NoSideEffect]>, BASE_EinsumOp {
   let arguments = (ins
     HLO_Tensor:$lhs,
     HLO_Tensor:$rhs,
@@ -773,7 +887,7 @@ def HLO_EinsumOp: HLO_Op<"einsum", [NoSideEffect]> {
   // side HLO ops.
 }
 
-def HLO_UnaryEinsumOp: HLO_Op<"unary_einsum", [NoSideEffect]> {
+def HLO_UnaryEinsumOp: HLO_Op<"unary_einsum", [NoSideEffect]>, BASE_EinsumOp {
   let arguments = (ins
     HLO_Tensor:$operand,
     StrAttr:$einsum_config
@@ -796,9 +910,6 @@ def HLO_FftOp: HLO_Op<"fft", [NoSideEffect]>, BASE_HLO_FftOp {
   );
 
   let results = (outs HLO_Tensor);
-
-  // TODO(b/129422361) Attributes are not supported by the codegen.
-  let hasCustomHLOConverter = 1;
 }
 
 def GatherDimensionNumbers : StructAttr<"GatherDimensionNumbers", HLO_Dialect,
@@ -819,8 +930,6 @@ def HLO_GatherOp: HLO_Op<"gather", [NoSideEffect]>, BASE_HLO_GatherOp {
   );
 
   let results = (outs HLO_Tensor);
-
-  let hasCustomHLOConverter = 1;
 }
 
 def HLO_GetDimensionSizeOp: HLO_Op<"get_dimension_size", [NoSideEffect]>,
@@ -896,6 +1005,16 @@ def HLO_SelectAndScatterOp: HLO_Op<"select_and_scatter",
   let hasCustomHLOConverter = 1;
 }
 
+def HLO_SetDimensionSizeOp: HLO_Op<"set_dimension_size", [NoSideEffect]>,
+      BASE_HLO_SetDimensionSizeOp {
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I32Tensor:$size,
+    I32Attr:$dimension
+  );
+  let results = (outs HLO_Tensor);
+}
+
 def HLO_SortOp : HLO_Op<"sort", [NoSideEffect]>, BASE_HLO_SortOp {
   let arguments = (ins
     Variadic<HLO_Tensor>:$operands,
@@ -926,9 +1045,6 @@ def HLO_ReverseOp: HLO_Op<"reverse",
   let results = (outs HLO_Tensor);
 
   let hasFolder = 1;
-
-  // TODO(b/129422361): ReverseOp has a custom constructor for HLO.
-  let hasCustomHLOConverter = 1;
 }
 
 def HLO_PadOp: HLO_Op<"pad",
@@ -1029,12 +1145,24 @@ def HLO_TorchIndexSelectOp : HLO_Op<"torch_index_select", [NoSideEffect]> {
 //===----------------------------------------------------------------------===//
 def HLO_RngUniformOp : HLO_Op<"rng_uniform", []>, BASE_HLO_RngUniformOp {
   let arguments = (ins
-    HLO_Tensor:$a,
-    HLO_Tensor:$b,
+    HLO_PredIntOrFpTensor:$a,
+    HLO_PredIntOrFpTensor:$b,
     I64Tensor:$shape
   );
 
-  let results = (outs HLO_Tensor);
+  let results = (outs HLO_PredIntOrFpTensor);
+
+  let hasCustomHLOConverter = 1;
+}
+
+def HLO_RngNormalOp : HLO_Op<"rng_normal", []>, BASE_HLO_RngNormalOp {
+  let arguments = (ins
+    HLO_FpTensor:$mu,
+    HLO_FpTensor:$sigma,
+    I64Tensor:$shape
+  );
+
+  let results = (outs HLO_FpTensor);
 
   let hasCustomHLOConverter = 1;
 }
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
index 3be2c26a1bf..f2010bb56cb 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
@@ -68,6 +68,17 @@ class BASE_HLO_CeilOp {
   }];
 }
 
+class BASE_HLO_ClzOp {
+  string summary = "Count-leading-zeros (Clz) operator";
+
+  string description = [{
+    Returns the number of leading zeros in each operand element-wise.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
 class BASE_HLO_ComplexOp {
   string summary = "Complex operator";
 
@@ -228,6 +239,18 @@ class BASE_HLO_RealOp {
   }];
 }
 
+class BASE_HLO_RoundOp {
+  string summary = "Round operator";
+
+  string description = [{
+    Returns `Round(operand)` element-wise, rounding to nearest integer with
+    half-way cases rounding away from zero.
+
+    See
+    https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions.
+  }];
+}
+
 class BASE_HLO_RsqrtOp {
   string summary = "Reciprocal Square-root operator";
 
@@ -465,6 +488,26 @@ class BASE_HLO_XorOp {
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// XLA parallelism related op definitions.
+//===----------------------------------------------------------------------===//
+
+class BASE_HLO_ReplicaIdOp {
+  string summary = "ReplicaId operator";
+
+  string description = [{
+    Returns the unique ID (int32 scalar) of the replica.
+
+    The unique ID of each replica is an unsigned integer in the interval [0, N),
+    where N is the number of replicas. Since all the replicas are running the
+    same program, a ReplicaId() call in the program will return a different
+    value on each replica.
+
+    See https://www.tensorflow.org/xla/operation_semantics#replicaid.
+  }];
+}
+
+
 class BASE_HLO_AllReduceOp {
   string summary = "AllReduce operator";
 
@@ -626,6 +669,39 @@ class BASE_HLO_DynamicUpdateSliceOp {
 // XLA Other op definitions.
 //===----------------------------------------------------------------------===//
 
+class BASE_HLO_AllToAllOp {
+  string summary = "AllToAll";
+
+  string description = [{
+    AllToAll is a collective operation that sends data from all cores to all
+    cores. It has two phases:
+    - The scatter phase. On each core, the operand is split into `split_count`
+      number of blocks along the `split_dimensions`, and the blocks are
+      scattered to all cores, e.g., the i-th block is send to the i-th core.
+    - The gather phase. Each core concatenates the received blocks along the
+      `concat_dimension`.
+
+    The participating cores can be configured by:
+    - replica_groups: each ReplicaGroup contains a list of replica id
+      participating in the computation (replica id for the current replica can
+      be retrieved using ReplicaId op). AllToAll will be applied within
+      subgroups in the specified order. For example,
+      `replica_groups` = {{1,2,3}, {4,5,0}} means that an AllToAll will be applied
+      within replicas {1, 2, 3}, and in the gather phase, the received blocks
+      will be concatenated in the same order of 1, 2, 3. Then, another AllToAll
+      will be applied within replicas 4, 5, 0, and the concatenation order is
+      also 4, 5, 0. If `replica_groups` is empty, all replicas belong to one
+      group, in the concatenation order of their appearance.
+
+    Prerequisites:
+    - The dimension size of the operand on the split_dimension is divisible by
+      `split_count`.
+    - The operand's shape is not tuple.
+
+    See https://www.tensorflow.org/xla/operation_semantics#alltoall
+  }];
+}
+
 class BASE_HLO_BatchNormGradOp {
   string summary = "Batch Normalization Gradient";
 
@@ -707,6 +783,32 @@ class BASE_HLO_BroadcastInDimOp  {
   }];
 }
 
+class BASE_HLO_CholeskyOp {
+  string summary = "Cholesky operator";
+
+  string description = [{
+  Computes the Cholesky decomposition of a batch of symmetric (Hermitian)
+  positive definite matrices.
+
+  If lower is true, computes lower-triangular matrices l such that
+  `a=l.Transpose(l)`. If lower is false, computes upper-triangular matrices u such
+  that `a=Transpose(u).u`.
+
+  Input data is read only from the lower/upper triangle of a, depending on the
+  value of lower. Values from the other triangle are ignored. Output data is
+  returned in the same triangle; the values in the other triangle are
+  implementation-defined and may be anything.
+
+  If the rank of a is greater than 2, a is treated as a batch of matrices, where
+  all except the minor 2 dimensions are batch dimensions.
+
+  If a is not symmetric (Hermitian) positive definite, the result is
+  implementation-defined.
+
+    See https://www.tensorflow.org/xla/operation_semantics#cholesky.
+  }];
+}
+
 class BASE_HLO_ClampOp  {
   string summary = "Clamp operator";
 
@@ -846,6 +948,18 @@ class BASE_HLO_SelectAndScatterOp {
   }];
 }
 
+class BASE_HLO_SetDimensionSizeOp {
+  string summary = "SetDimensionSize operator";
+
+  string description = [{
+    Sets the dynamic size of operand's given dimension. Pass through the operand
+    as result, with dynamic dimension tracked by the compiler. Padded values
+    will be ignored by downstream reduction ops.
+
+    See https://www.tensorflow.org/xla/operation_semantics#setdimensionsize.
+  }];
+}
+
 class BASE_HLO_SortOp {
   string summary = "Sort operator";
 
@@ -895,11 +1009,26 @@ class BASE_HLO_RngUniformOp {
   string summary = "RNG with uniform distribution.";
 
   string description = [{
-    Constructs an output of a given shape with random numbers generated following
-    the uniform distribution over the interval `[a,b)`.
+    Constructs an output of a given shape with random numbers generated
+    following the uniform distribution over the interval `[a,b)`. The parameters
+    and output element type have to be a boolean type, an integral type or a
+    floating point types, and the types have to be consistent.
 
     See https://www.tensorflow.org/xla/operation_semantics#rnguniform.
   }];
 }
 
+class BASE_HLO_RngNormalOp {
+  string summary = "RNG with normal distribution.";
+
+  string description = [{
+    Constructs an output of a given shape with random numbers generated
+    following the normal distribution with parameters `mu` and `sigma`. The
+    parameters and output shape have to have a floating point elemental type.
+    The parameters furthermore have to be scalar valued.
+
+    See https://www.tensorflow.org/xla/operation_semantics#rngnormal.
+  }];
+}
+
 #endif // HLO_OPS_BASE
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_utils.cc b/tensorflow/compiler/mlir/xla/ir/hlo_utils.cc
index 7d3e2ca2384..08f4dc536cf 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_utils.cc
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_utils.cc
@@ -17,15 +17,14 @@ limitations under the License.
 
 #include <numeric>
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
 
 namespace mlir {
 namespace xla {
 
-DenseIntElementsAttr getBroadcastDimensionsAttr(Builder *b, Value *x,
-                                                Value *y) {
-  TensorType xType = x->getType().dyn_cast<RankedTensorType>();
-  TensorType yType = y->getType().dyn_cast<RankedTensorType>();
+DenseIntElementsAttr getBroadcastDimensionsAttr(Builder *b, Value x, Value y) {
+  TensorType xType = x.getType().dyn_cast<RankedTensorType>();
+  TensorType yType = y.getType().dyn_cast<RankedTensorType>();
   if (xType == yType || !xType || !yType) return {};
 
   // If the shapes have the same rank, then there is nothing to do.
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_utils.h b/tensorflow/compiler/mlir/xla/ir/hlo_utils.h
index 86c90b49f16..120b035e5d0 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_utils.h
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_utils.h
@@ -16,11 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_UTILS_H_
 #define TENSORFLOW_COMPILER_MLIR_XLA_IR_HLO_UTILS_H_
 
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
 
 namespace mlir {
@@ -29,14 +29,14 @@ namespace xla {
 // Computes the broadcast dimensions attr for an elementwise binary operator
 // between two ranked tensors.
 mlir::DenseIntElementsAttr getBroadcastDimensionsAttr(mlir::Builder* b,
-                                                      mlir::Value* x,
-                                                      mlir::Value* y);
+                                                      mlir::Value x,
+                                                      mlir::Value y);
 
 /// Get a constant splat for the given value type.
 template <typename T>
-static ElementsAttr getSplat(Builder* b, Value* val, T constant) {
-  auto valType = val->getType().cast<TensorType>();
-  auto valElementType = getElementTypeOrSelf(val->getType());
+static ElementsAttr getSplat(Builder* b, Value val, T constant) {
+  auto valType = val.getType().cast<TensorType>();
+  auto valElementType = getElementTypeOrSelf(val.getType());
 
   // Handle integer elements.
   Attribute elementAttr;
diff --git a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc
index c121aa703a3..0fbe5915fe8 100644
--- a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc
+++ b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc
@@ -28,20 +28,20 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/OpImplementation.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/OpImplementation.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h.inc"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h
index f73e5026541..1a07b1a45f3 100644
--- a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h
+++ b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h
@@ -19,15 +19,15 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_XLA_IR_LHLO_OPS_H_
 
 #include "llvm/ADT/StringRef.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Dialect.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/OpDefinition.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Support/Functional.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Dialect.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/OpDefinition.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Support/Functional.h"  // TF:llvm-project
 
 namespace mlir {
 class OpBuilder;
diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
index 5c351876440..c64b4ef9f4a 100644
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
@@ -27,16 +27,16 @@ limitations under the License.
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/type_to_shape.h"
 #include "tensorflow/compiler/xla/client/lib/matrix.h"
@@ -91,6 +91,8 @@ static double ConvertAPFloat(llvm::APFloat value) {
   return value.convertToDouble();
 }
 
+static inline bool Convertbool(bool value) { return value; }
+
 static absl::string_view ConvertStringRef(mlir::StringRef value) {
   return {value.data(), value.size()};
 }
@@ -115,6 +117,15 @@ static std::vector<int64> Convert_broadcast_dimensions(
   return ConvertDenseIntAttr(*broadcast_dimensions);
 }
 
+// Converts StringRef to xla FftType enum
+static xla::FftType Convert_fft_type(llvm::StringRef fft_type_str) {
+  xla::FftType fft_type_enum;
+  // Illegal fft_type string would be caught by the verifier, so 'FftType_Parse'
+  // call below should never return false.
+  if (!FftType_Parse(fft_type_str, &fft_type_enum)) return xla::FftType::FFT;
+  return fft_type_enum;
+}
+
 // Convert a nx2 dense attribute to a list of tuples. This is the way padding
 // is defined in hlo.
 static std::vector<std::pair<int64, int64>> Convert_padding(
@@ -151,10 +162,10 @@ static std::vector<xla::ReplicaGroup> Convert_replica_groups(
   return result;
 }
 
-#define I64_ELEMENTS_ATTR_TO_VECTOR(attribute)   \
-  static std::vector<int64> Convert_##attribute( \
-      mlir::DenseIntElementsAttr attribute) {    \
-    return ConvertDenseIntAttr(attribute);       \
+#define I64_ELEMENTS_ATTR_TO_VECTOR(attribute)                \
+  static std::vector<int64> Convert_##attribute(              \
+      llvm::Optional<mlir::DenseIntElementsAttr> attribute) { \
+    return ConvertDenseIntAttr(attribute);                    \
   }
 
 I64_ELEMENTS_ATTR_TO_VECTOR(broadcast_sizes);
@@ -163,6 +174,11 @@ I64_ELEMENTS_ATTR_TO_VECTOR(start_indices);
 I64_ELEMENTS_ATTR_TO_VECTOR(limit_indices);
 I64_ELEMENTS_ATTR_TO_VECTOR(strides);
 I64_ELEMENTS_ATTR_TO_VECTOR(slice_sizes);
+I64_ELEMENTS_ATTR_TO_VECTOR(fft_length);
+I64_ELEMENTS_ATTR_TO_VECTOR(dimensions);
+I64_ELEMENTS_ATTR_TO_VECTOR(window_strides);
+I64_ELEMENTS_ATTR_TO_VECTOR(lhs_dilation);
+I64_ELEMENTS_ATTR_TO_VECTOR(rhs_dilation);
 
 #undef I64_ELEMENTS_ATTR_TO_VECTOR
 
@@ -230,7 +246,7 @@ static xla::DotDimensionNumbers Convert_dot_dimension_numbers(
   return dot_dimension_numbers;
 }
 
-static xla::ConvolutionDimensionNumbers Convert_convolution_dimension_numbers(
+static xla::ConvolutionDimensionNumbers Convert_dimension_numbers(
     mlir::xla_hlo::ConvDimensionNumbers input) {
   xla::ConvolutionDimensionNumbers output;
 
@@ -281,7 +297,7 @@ static xla::ComparisonDirection Convert_comparison_direction(
       .ValueOrDie();
 }
 
-static xla::GatherDimensionNumbers Convert_gather_dimension_numbers(
+static xla::GatherDimensionNumbers Convert_dimension_numbers(
     mlir::xla_hlo::GatherDimensionNumbers input) {
   xla::GatherDimensionNumbers output;
 
@@ -335,7 +351,7 @@ namespace mlir {
 namespace {
 class ConvertToHloModule {
  public:
-  using ValueLoweringMap = llvm::DenseMap<Value*, xla::XlaOp>;
+  using ValueLoweringMap = llvm::DenseMap<Value, xla::XlaOp>;
   using FunctionLoweringMap = llvm::DenseMap<mlir::FuncOp, xla::XlaComputation>;
 
   // If use_tuple_args is true, then the entry function's arguments are
@@ -417,7 +433,7 @@ class ConvertToHloModule {
 namespace {
 
 struct OpLoweringContext {
-  llvm::DenseMap<mlir::Value*, xla::XlaOp>* values;
+  llvm::DenseMap<mlir::Value, xla::XlaOp>* values;
   mlir::ConvertToHloModule* converter;
   xla::XlaBuilder* builder;
 };
@@ -425,7 +441,7 @@ struct OpLoweringContext {
 llvm::SmallVector<xla::XlaOp, 4> GetTuple(mlir::Operation::operand_range values,
                                           OpLoweringContext ctx) {
   llvm::SmallVector<xla::XlaOp, 4> ops;
-  for (mlir::Value* value : values) {
+  for (mlir::Value value : values) {
     ops.push_back((*ctx.values)[value]);
   }
   return ops;
@@ -437,16 +453,6 @@ namespace mlir {
 namespace xla_hlo {
 namespace {
 
-LogicalResult ExportXlaOp(AfterAllOp op, OpLoweringContext ctx) {
-  auto& value_map = *ctx.values;
-  std::vector<xla::XlaOp> tokens(op.operands().size());
-  for (auto index_and_value : llvm::enumerate(op.operands())) {
-    tokens[index_and_value.index()] = value_map[index_and_value.value()];
-  }
-  value_map[op] = xla::AfterAll(ctx.builder, tokens);
-  return mlir::success();
-}
-
 LogicalResult ExportXlaOp(AllReduceOp op, OpLoweringContext ctx) {
   auto& value_map = *ctx.values;
   xla::XlaComputation computation;
@@ -485,13 +491,6 @@ LogicalResult ExportXlaOp(BroadcastInDimOp op, OpLoweringContext ctx) {
   return success();
 }
 
-LogicalResult ExportXlaOp(ConcatenateOp op, OpLoweringContext ctx) {
-  auto& value_map = *ctx.values;
-  value_map[op] = xla::ConcatInDim(ctx.builder, GetTuple(op.val(), ctx),
-                                   op.dimension().getSExtValue());
-  return success();
-}
-
 LogicalResult ExportXlaOp(ConditionalOp op, OpLoweringContext ctx) {
   xla::XlaComputation true_branch;
   xla::XlaComputation false_branch;
@@ -514,21 +513,6 @@ LogicalResult ExportXlaOp(ConstOp op, OpLoweringContext ctx) {
   return failure();
 }
 
-LogicalResult ExportXlaOp(ConvOp op, OpLoweringContext ctx) {
-  auto& value_map = *ctx.values;
-  value_map[op] = xla::ConvGeneralDilated(
-      value_map[op.lhs()], value_map[op.rhs()],
-      Convert_broadcast_dimensions(op.window_strides()),
-      Convert_padding(op.padding()),
-      Convert_broadcast_dimensions(op.lhs_dilation()),
-      Convert_broadcast_dimensions(op.rhs_dilation()),
-      Convert_convolution_dimension_numbers(op.dimension_numbers()),
-      op.feature_group_count().getSExtValue(),
-      op.batch_group_count().getSExtValue(),
-      Convert_precision_config(op.precision_config()).get());
-  return success();
-}
-
 LogicalResult ExportXlaOp(ConvertOp op, OpLoweringContext ctx) {
   auto& value_map = *ctx.values;
   value_map[op] = xla::ConvertElementType(
@@ -537,19 +521,13 @@ LogicalResult ExportXlaOp(ConvertOp op, OpLoweringContext ctx) {
   return success();
 }
 
-LogicalResult ExportXlaOp(CopyOp op, OpLoweringContext ctx) {
-  return failure();
-}
-
-LogicalResult ExportXlaOp(FftOp op, OpLoweringContext ctx) { return failure(); }
-
-LogicalResult ExportXlaOp(GatherOp op, OpLoweringContext ctx) {
+LogicalResult ExportXlaOp(InfeedOp op, OpLoweringContext ctx) {
   auto& value_map = *ctx.values;
-  xla::GatherDimensionNumbers dimension_numbers =
-      Convert_gather_dimension_numbers(op.dimension_numbers());
-  value_map[op] = xla::Gather(
-      value_map[op.operand()], value_map[op.start_indices()], dimension_numbers,
-      Convert_slice_sizes(op.slice_sizes()), op.indices_are_sorted());
+  // The shape argument expected by the xla client API is the type of the first
+  // element in the result tuple.
+  auto result_type = op.getType().cast<mlir::TupleType>().getType(0);
+  value_map[op] = xla::InfeedWithToken(
+      value_map[op.token()], xla::TypeToShape(result_type), op.infeed_config());
   return success();
 }
 
@@ -560,6 +538,14 @@ LogicalResult ExportXlaOp(IotaOp op, OpLoweringContext ctx) {
   return success();
 }
 
+LogicalResult ExportXlaOp(OutfeedOp op, OpLoweringContext ctx) {
+  auto& value_map = *ctx.values;
+  value_map[op] = xla::OutfeedWithToken(
+      value_map[op.operand()], value_map[op.token()],
+      xla::TypeToShape(op.operand().getType()), op.outfeed_config());
+  return success();
+}
+
 LogicalResult ExportXlaOp(PadOp op, OpLoweringContext ctx) {
   auto& value_map = *ctx.values;
   xla::PaddingConfig padding_config;
@@ -627,10 +613,10 @@ LogicalResult ExportXlaOp(ReturnOp op, OpLoweringContext ctx) {
   return failure();
 }
 
-LogicalResult ExportXlaOp(ReverseOp op, OpLoweringContext ctx) {
+LogicalResult ExportXlaOp(RngNormalOp op, OpLoweringContext ctx) {
   auto& value_map = *ctx.values;
-  value_map[op] = xla::Rev(value_map[op.operand()],
-                           Convert_broadcast_dimensions(op.dimensions()));
+  value_map[op] = xla::RngNormal(value_map[op.mu()], value_map[op.sigma()],
+                                 xla::TypeToShape(op.getType()));
   return success();
 }
 
@@ -674,6 +660,21 @@ LogicalResult ExportXlaOp(SelectAndScatterOp op, OpLoweringContext ctx) {
   return success();
 }
 
+LogicalResult ExportXlaOp(SendOp op, OpLoweringContext ctx) {
+  auto& value_map = *ctx.values;
+  if (op.is_host_transfer()) {
+    value_map[op] =
+        xla::SendToHost(value_map[op.operand()], value_map[op.token()],
+                        xla::TypeToShape(op.operand().getType()),
+                        Convert_channel_handle(op.channel_id()));
+    return success();
+  }
+  value_map[op] =
+      xla::SendWithToken(value_map[op.operand()], value_map[op.token()],
+                         Convert_channel_handle(op.channel_id()));
+  return success();
+}
+
 LogicalResult ExportXlaOp(SliceOp op, OpLoweringContext ctx) {
   return failure();
 }
@@ -690,12 +691,6 @@ LogicalResult ExportXlaOp(SortOp op, OpLoweringContext ctx) {
   return success();
 }
 
-LogicalResult ExportXlaOp(TupleOp op, OpLoweringContext ctx) {
-  auto& value_map = *ctx.values;
-  value_map[op] = xla::Tuple(ctx.builder, GetTuple(op.val(), ctx));
-  return success();
-}
-
 LogicalResult ExportXlaOp(UnaryEinsumOp op, OpLoweringContext ctx) {
   // Intentional as UnaryEinsumOp is always lowered to the EinsumOp with two
   // operands.
@@ -888,7 +883,7 @@ LogicalResult ConvertToHloModule::LowerBasicBlockAsFunction(
     std::vector<xla::Shape> arg_shapes;
     arg_shapes.reserve(bb.getNumArguments());
     for (auto& arg : bb.getArguments())
-      arg_shapes.push_back(xla::TypeToShape(arg->getType()));
+      arg_shapes.push_back(xla::TypeToShape(arg.getType()));
     xla::Shape input_shape = xla::ShapeUtil::MakeTupleShape(arg_shapes);
     auto tuple = xla::Parameter(builder, 0, input_shape, "arg_tuple");
     for (auto& it : llvm::enumerate(bb.getArguments())) {
@@ -896,9 +891,9 @@ LogicalResult ConvertToHloModule::LowerBasicBlockAsFunction(
     }
   } else {
     for (auto& it : llvm::enumerate(bb.getArguments())) {
-      auto* arg = it.value();
+      auto arg = it.value();
       auto num = it.index();
-      xla::Shape shape = xla::TypeToShape(arg->getType());
+      xla::Shape shape = xla::TypeToShape(arg.getType());
       lowering[arg] =
           xla::Parameter(builder, num, shape, absl::StrCat("Arg_", num));
     }
@@ -1029,7 +1024,7 @@ LogicalResult AddDynamicParameterBindings(mlir::ModuleOp module,
 
     llvm::SmallDenseSet<int32_t, 4> used_shape_indices;
     auto arg_type =
-        entry_func.getArgument(i)->getType().dyn_cast<RankedTensorType>();
+        entry_func.getArgument(i).getType().dyn_cast<RankedTensorType>();
     for (auto shape_and_padding : llvm::enumerate(llvm::zip(
              shape_indices.getValue(), padding_arg_indices.getValue()))) {
       const int element_index = shape_and_padding.index();
@@ -1064,7 +1059,7 @@ LogicalResult AddDynamicParameterBindings(mlir::ModuleOp module,
             kPaddingArgIndicesAttr, i, element_index, e, padding_arg_index));
 
       Type padding_arg_type =
-          entry_func.getArgument(padding_arg_index)->getType();
+          entry_func.getArgument(padding_arg_index).getType();
       if (auto tensor_type = padding_arg_type.dyn_cast<RankedTensorType>())
         if (tensor_type.getRank() != 0)
           return entry_func.emitError()
diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h
index 3dffe2bc461..6f91213b31a 100644
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_XLA_MLIR_HLO_TO_HLO_H_
 #define TENSORFLOW_COMPILER_MLIR_XLA_MLIR_HLO_TO_HLO_H_
 
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
@@ -37,7 +37,7 @@ Status ConvertMlirHloToHlo(mlir::ModuleOp module, xla::HloProto* hlo_proto,
 // from `value_lowering` map.
 llvm::Optional<xla::XlaOp> CreateXlaOperator(
     mlir::Operation* op,
-    llvm::DenseMap<mlir::Value*, xla::XlaOp>* value_lowering);
+    llvm::DenseMap<mlir::Value, xla::XlaOp>* value_lowering);
 
 }  // namespace mlir
 
diff --git a/tensorflow/compiler/mlir/xla/operator_writer_gen.cc b/tensorflow/compiler/mlir/xla/operator_writer_gen.cc
index acc3c17baf5..9a578c83ce6 100644
--- a/tensorflow/compiler/mlir/xla/operator_writer_gen.cc
+++ b/tensorflow/compiler/mlir/xla/operator_writer_gen.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/InitLLVM.h"
@@ -25,8 +26,8 @@ limitations under the License.
 #include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include "mlir/Support/STLExtras.h"  // TF:local_config_mlir
-#include "mlir/TableGen/Operator.h"  // TF:local_config_mlir
+#include "mlir/Support/STLExtras.h"  // TF:llvm-project
+#include "mlir/TableGen/Operator.h"  // TF:llvm-project
 
 using llvm::raw_ostream;
 using llvm::RecordKeeper;
@@ -42,14 +43,31 @@ static std::string GetDefaultAttrExport(
   Attribute attr = named_attr.attr;
   StringRef storage_type = attr.getStorageType();
   // For some attribute types we have a general conversion, so use that.
-  if (!attr.isEnumAttr() && (storage_type.endswith("IntegerAttr") ||
+  if (!attr.isEnumAttr() && (storage_type.endswith("BoolAttr") ||
                              storage_type.endswith("FloatAttr") ||
+                             storage_type.endswith("IntegerAttr") ||
                              storage_type.endswith("StringAttr"))) {
     return "Convert" + attr.getReturnType().str();
   }
   return "Convert_" + named_attr.name.str();
 }
 
+static std::string GetClientBuilder(const Operator& op) {
+  static const auto* kOpToXLABuilderMap =
+      new llvm::StringMap<StringRef>{{"ReverseOp", "Rev"},
+                                     {"ConcatenateOp", "ConcatInDim"},
+                                     {"ConvOp", "ConvGeneralDilated"}};
+
+  StringRef op_name = op.getCppClassName();
+
+  // Default case where the client builder method names closely follow the op
+  // names in the dialect. For e.g., AddOp -> xla::Add method.
+  if (!kOpToXLABuilderMap->count(op_name)) return op_name.drop_back(2);
+
+  // Otherwise, if the op to client builder method mapping is provided.
+  return kOpToXLABuilderMap->lookup(op_name);
+}
+
 static void BuildOperator(const Operator& op, raw_ostream* output) {
   auto& os = *output;
   os << "    auto& value_map = *lowering_context.values;\n"
@@ -71,7 +89,7 @@ static void BuildOperator(const Operator& op, raw_ostream* output) {
       }
 
       // Otherwise, this is a varidiac operand list.
-      os << "    std::vector<xla::XlaOp> xla_arg_" << index << ";"
+      os << "    std::vector<xla::XlaOp> xla_arg_" << index << ";\n"
          << "    for (auto operand : xla_op.getODSOperands(" << operand_number++
          << "))\n      xla_arg_" << index
          << ".push_back(value_map[operand]);\n";
@@ -85,10 +103,15 @@ static void BuildOperator(const Operator& op, raw_ostream* output) {
        << op.getArgName(index) << "());\n";
   }
 
-  // Assumes that the client builder method names closely follow the op names
-  // in the dialect. For e.g., AddOp -> xla::Add method.
-  StringRef op_name = op.getCppClassName();
-  os << "    auto xla_result = xla::" << op_name.drop_back(2) << "(";
+  // Emit call to client API
+  os << "    auto xla_result = xla::" << GetClientBuilder(op) << "(";
+
+  // If all operands are variadic, then pass the builder explicitly to xla
+  // client API call
+  if (op.getNumOperands() == op.getNumVariadicOperands()) {
+    os << "lowering_context.builder";
+    if (op.getNumArgs() != 0) os << ", ";
+  }
 
   // Emit each of the arguments.
   interleaveComma(llvm::seq<int>(0, op.getNumArgs()), os,
diff --git a/tensorflow/compiler/mlir/xla/tests/BUILD b/tensorflow/compiler/mlir/xla/tests/BUILD
index 9f47185e90a..4faa8d2efe8 100644
--- a/tensorflow/compiler/mlir/xla/tests/BUILD
+++ b/tensorflow/compiler/mlir/xla/tests/BUILD
@@ -4,7 +4,7 @@ package(licenses = ["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = ["mlir"],
 )
 
@@ -14,6 +14,6 @@ filegroup(
     testonly = True,
     data = [
         "//tensorflow/compiler/mlir:tf-opt",
-        "@llvm//:FileCheck",
+        "@llvm-project//llvm:FileCheck",
     ],
 )
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
index c4b0e9f9d14..7e743cacb2b 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
@@ -6,7 +6,7 @@
 
 // CHECK-LABEL: fusedBatchNorm_notraining
 func @fusedBatchNorm_notraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
-  // CHECK-NEXT: "xla_hlo.batch_norm_inference"(%arg0, %arg1, %arg2, %arg3, %arg4) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK: "xla_hlo.batch_norm_inference"(%arg0, %arg1, %arg2, %arg3, %arg4) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
   %0:5 = "tf.FusedBatchNorm"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
   return %0#0 : tensor<8x8x8x8xf32>
 }
@@ -14,11 +14,332 @@ func @fusedBatchNorm_notraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>
 // CHECK-LABEL: fusedBatchNorm_training
 func @fusedBatchNorm_training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
   // TODO(riverriddle) Support training.
-  // CHECK-NEXT: "tf.FusedBatchNorm"
+  // CHECK: "tf.FusedBatchNorm"
   %0:5 = "tf.FusedBatchNorm"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true}  : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
   return %0#0 : tensor<8x8x8x8xf32>
 }
 
+// CHECK-LABEL: fusedBatchNormV3_noTraining
+func @fusedBatchNormV3_noTraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK: "xla_hlo.batch_norm_inference"({{.*}}, %arg1, %arg2, %arg3, %arg4) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+  %0:6 = "tf.FusedBatchNormV3"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+//CHECK-LABEL: fusedBatchNormV3_noTraining_mixedPrecision
+func @fusedBatchNormV3_noTraining_mixedPrecision(%arg0: tensor<8x8x8x8xbf16>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xbf16>) {
+  // CHECK: %[[RESULT0:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xbf16>) -> tensor<8x8x8x8xf32>
+  // CHECK: %[[RESULT1:.*]] = "xla_hlo.batch_norm_inference"(%[[RESULT0]], %arg1, %arg2, %arg3, %arg4) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+  %0:6 = "tf.FusedBatchNormV3"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  // CHECK-NEXT: "xla_hlo.convert"(%[[RESULT1]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xbf16>
+  return %0#0 : tensor<8x8x8x8xbf16>
+}
+
+//CHECK-LABEL: fusedBatchNormV3_training
+func @fusedBatchNormV3_training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK: %[[RESULT0:.*]] = "xla_hlo.batch_norm_training"({{.*}}, %arg1, %arg2) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  %0:6 = "tf.FusedBatchNormV3"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  // CHECK: "xla_hlo.get_tuple_element"(%[[RESULT0]]) {index = 0 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8x8x8x8xf32>
+  // CHECK: "xla_hlo.get_tuple_element"(%[[RESULT0]]) {index = 1 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK: %[[VAR:.*]] = "xla_hlo.get_tuple_element"(%[[RESULT0]]) {index = 2 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK: xla_hlo.constant
+  // CHECK: "xla_hlo.mul"(%[[VAR]], {{.*}}) : (tensor<8xf32>, tensor<f32>) -> tensor<8xf32>
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+//CHECK-LABEL: fusedBatchNormV3_training_mixedPrecision
+func @fusedBatchNormV3_training_mixedPrecision(%arg0: tensor<8x8x8x8xbf16>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xbf16>) {
+  // CHECK: "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xbf16>) -> tensor<8x8x8x8xf32>
+  %0:6 = "tf.FusedBatchNormV3"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  // CHECK: "xla_hlo.convert"({{.*}}) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xbf16>
+  return %0#0 : tensor<8x8x8x8xbf16>
+}
+
+//CHECK-LABEL: fusedBatchNormV3_NCHW
+func @fusedBatchNormV3_NCHW(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK: "xla_hlo.batch_norm_training"({{.*}}, %arg1, %arg2) {epsilon = 1.000000e-03 : f32, feature_index = 1 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  %0:6 = "tf.FusedBatchNormV3"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NCHW", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGrad_noTraining
+func @fusedBatchNormGrad_noTraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[eps:.*]] = xla_hlo.constant dense<1.000000e-03> : tensor<f32>
+
+  // CHECK-NEXT: %[[add:.*]] = "xla_hlo.add"(%arg4, %[[eps]]) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr1:.*]] = "xla_hlo.rsqrt"(%[[add]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[sub:.*]] = "xla_hlo.sub"(%[[act]], %arg3) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[mul:.*]] = xla_hlo.mul %[[grad]], %[[sub]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8x8x8x8xf32>
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 1, 2]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cmul:.*]] = "xla_hlo.convert"(%[[mul]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red1:.*]] = "xla_hlo.reduce"(%[[cmul]], %[[init]]) ( {
+  // CHECK-NEXT: ^bb0(%arg5: tensor<f32>, %arg6: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced:.*]] = xla_hlo.add %arg5, %arg6 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr2:.*]] = "xla_hlo.convert"(%[[red1]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[mul2:.*]] = xla_hlo.mul %arg2, %[[scr1]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+  // CHECK-NEXT: %[[mul3:.*]] = "xla_hlo.mul"(%[[grad]], %[[mul2]]) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+
+  // CHECK-NEXT: %[[scale_backprop:.*]] = xla_hlo.mul %[[scr1]], %[[scr2]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 1, 2]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cgrad:.*]] = "xla_hlo.convert"(%[[grad]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init2:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red2:.*]] = "xla_hlo.reduce"(%[[cgrad]], %[[init2]]) ( {
+  // CHECK-NEXT: ^bb0(%arg5: tensor<f32>, %arg6: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced1:.*]] = xla_hlo.add %arg5, %arg6 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced1]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.convert"(%[[red2]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[mul3]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xf32>
+
+  %0:5 = "tf.FusedBatchNormGrad"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGrad_Training
+func @fusedBatchNormGrad_Training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[training:.*]] = "xla_hlo.batch_norm_grad"(%[[act]], %arg2, %arg3, %arg4, %[[grad]]) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8x8x8x8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  // CHECK-NEXT: %[[tact:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 0 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[scale_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 1 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 2 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[tact]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xf32>
+
+  %0:5 = "tf.FusedBatchNormGrad"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV2_noTraining
+func @fusedBatchNormGradV2_noTraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[eps:.*]] = xla_hlo.constant dense<1.000000e-03> : tensor<f32>
+
+  // CHECK-NEXT: %[[add:.*]] = "xla_hlo.add"(%arg4, %[[eps]]) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr1:.*]] = "xla_hlo.rsqrt"(%[[add]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[sub:.*]] = "xla_hlo.sub"(%[[act]], %arg3) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[mul:.*]] = xla_hlo.mul %[[grad]], %[[sub]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8x8x8x8xf32>
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 1, 2]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cmul:.*]] = "xla_hlo.convert"(%[[mul]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red1:.*]] = "xla_hlo.reduce"(%[[cmul]], %[[init]]) ( {
+  // CHECK-NEXT: ^bb0(%arg5: tensor<f32>, %arg6: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced:.*]] = xla_hlo.add %arg5, %arg6 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr2:.*]] = "xla_hlo.convert"(%[[red1]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[mul2:.*]] = xla_hlo.mul %arg2, %[[scr1]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+  // CHECK-NEXT: %[[mul3:.*]] = "xla_hlo.mul"(%[[grad]], %[[mul2]]) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+
+  // CHECK-NEXT: %[[scale_backprop:.*]] = xla_hlo.mul %[[scr1]], %[[scr2]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 1, 2]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cgrad:.*]] = "xla_hlo.convert"(%[[grad]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init2:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red2:.*]] = "xla_hlo.reduce"(%[[cgrad]], %[[init2]]) ( {
+  // CHECK-NEXT: ^bb0(%arg5: tensor<f32>, %arg6: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced1:.*]] = xla_hlo.add %arg5, %arg6 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced1]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.convert"(%[[red2]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[mul3]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xf32>
+
+  %0:5 = "tf.FusedBatchNormGradV2"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV2_Training
+func @fusedBatchNormGradV2_Training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[training:.*]] = "xla_hlo.batch_norm_grad"(%[[act]], %arg2, %arg3, %arg4, %[[grad]]) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8x8x8x8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  // CHECK-NEXT: %[[tact:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 0 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[scale_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 1 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 2 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[tact]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xf32>
+
+  %0:5 = "tf.FusedBatchNormGradV2"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV2_noTraining_mixed_precision
+func @fusedBatchNormGradV2_noTraining_mixed_precision(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xbf16>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xbf16>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xbf16>) -> tensor<8x8x8x8xf32>
+
+  // CHECK: %[[x_backprop:.*]] = "xla_hlo.convert"({{.*}}) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xbf16>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xbf16>
+
+  %0:5 = "tf.FusedBatchNormGradV2"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xbf16>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV2_Training_mixed_precision
+func @fusedBatchNormGradV2_Training_mixed_precision(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xbf16>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xbf16>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xbf16>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[training:.*]] = "xla_hlo.batch_norm_grad"(%[[act]], %arg2, %arg3, %arg4, %[[grad]]) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8x8x8x8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  // CHECK-NEXT: %[[tact:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 0 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[scale_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 1 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 2 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[tact]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xbf16>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xbf16>
+
+  %0:5 = "tf.FusedBatchNormGradV2"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xbf16>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV3_noTraining
+func @fusedBatchNormGradV3_noTraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>, %arg5: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[eps:.*]] = xla_hlo.constant dense<1.000000e-03> : tensor<f32>
+
+  // CHECK-NEXT: %[[add:.*]] = "xla_hlo.add"(%arg4, %[[eps]]) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr1:.*]] = "xla_hlo.rsqrt"(%[[add]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[sub:.*]] = "xla_hlo.sub"(%[[act]], %arg3) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[mul:.*]] = xla_hlo.mul %[[grad]], %[[sub]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8x8x8x8xf32>
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 1, 2]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cmul:.*]] = "xla_hlo.convert"(%[[mul]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red1:.*]] = "xla_hlo.reduce"(%[[cmul]], %[[init]]) ( {
+  // CHECK-NEXT: ^bb0(%arg6: tensor<f32>, %arg7: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced:.*]] = xla_hlo.add %arg6, %arg7 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr2:.*]] = "xla_hlo.convert"(%[[red1]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[mul2:.*]] = xla_hlo.mul %arg2, %[[scr1]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+  // CHECK-NEXT: %[[mul3:.*]] = "xla_hlo.mul"(%[[grad]], %[[mul2]]) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+
+  // CHECK-NEXT: %[[scale_backprop:.*]] = xla_hlo.mul %[[scr1]], %[[scr2]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 1, 2]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cgrad:.*]] = "xla_hlo.convert"(%[[grad]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init2:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red2:.*]] = "xla_hlo.reduce"(%[[cgrad]], %[[init2]]) ( {
+  // CHECK-NEXT: ^bb0(%arg6: tensor<f32>, %arg7: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced1:.*]] = xla_hlo.add %arg6, %arg7 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced1]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.convert"(%[[red2]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[mul3]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xf32>
+
+  %0:5 = "tf.FusedBatchNormGradV3"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV3_Training
+func @fusedBatchNormGradV3_Training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>, %arg5: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[training:.*]] = "xla_hlo.batch_norm_grad"(%[[act]], %arg2, %arg3, %arg4, %[[grad]]) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8x8x8x8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  // CHECK-NEXT: %[[tact:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 0 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[scale_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 1 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 2 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[tact]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xf32>
+
+  %0:5 = "tf.FusedBatchNormGradV3"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV3_noTraining_mixed_precision
+func @fusedBatchNormGradV3_noTraining_mixed_precision(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xbf16>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>, %arg5: tensor<8xf32>) -> (tensor<8x8x8x8xbf16>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xbf16>) -> tensor<8x8x8x8xf32>
+
+  // CHECK: %[[x_backprop:.*]] = "xla_hlo.convert"({{.*}}) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xbf16>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xbf16>
+
+  %0:5 = "tf.FusedBatchNormGradV3"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xbf16>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV3_Training_mixed_precision
+func @fusedBatchNormGradV3_Training_mixed_precision(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xbf16>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>, %arg5: tensor<8xf32>) -> (tensor<8x8x8x8xbf16>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xbf16>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[training:.*]] = "xla_hlo.batch_norm_grad"(%[[act]], %arg2, %arg3, %arg4, %[[grad]]) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8x8x8x8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  // CHECK-NEXT: %[[tact:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 0 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[scale_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 1 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.get_tuple_element"(%[[training]]) {index = 2 : i32} : (tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[tact]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xbf16>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xbf16>
+
+  %0:5 = "tf.FusedBatchNormGradV3"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xbf16>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xbf16>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV3_noTraining_NCHW
+func @fusedBatchNormGradV3_noTraining_NCHW(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>, %arg5: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK-NEXT: %[[grad:.*]] = "xla_hlo.convert"(%arg0) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[act:.*]] = "xla_hlo.convert"(%arg1) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[eps:.*]] = xla_hlo.constant dense<1.000000e-03> : tensor<f32>
+
+  // CHECK-NEXT: %[[add:.*]] = "xla_hlo.add"(%arg4, %[[eps]]) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr1:.*]] = "xla_hlo.rsqrt"(%[[add]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[sub:.*]] = "xla_hlo.sub"(%[[act]], %arg3) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[mul:.*]] = xla_hlo.mul %[[grad]], %[[sub]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8x8x8x8xf32>
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 2, 3]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cmul:.*]] = "xla_hlo.convert"(%[[mul]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red1:.*]] = "xla_hlo.reduce"(%[[cmul]], %[[init]]) ( {
+  // CHECK-NEXT: ^bb0(%arg6: tensor<f32>, %arg7: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced:.*]] = xla_hlo.add %arg6, %arg7 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 2, 3]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[scr2:.*]] = "xla_hlo.convert"(%[[red1]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[mul2:.*]] = xla_hlo.mul %arg2, %[[scr1]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+  // CHECK-NEXT: %[[mul3:.*]] = "xla_hlo.mul"(%[[grad]], %[[mul2]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<8x8x8x8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32>
+
+  // CHECK-NEXT: %[[scale_backprop:.*]] = xla_hlo.mul %[[scr1]], %[[scr2]] {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<8xf32>
+
+  // CHECK-NEXT: xla_hlo.constant dense<[0, 2, 3]> : tensor<3xi64>
+  // CHECK-NEXT: %[[cgrad:.*]] = "xla_hlo.convert"(%[[grad]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: %[[init2:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-NEXT: %[[red2:.*]] = "xla_hlo.reduce"(%[[cgrad]], %[[init2]]) ( {
+  // CHECK-NEXT: ^bb0(%arg6: tensor<f32>, %arg7: tensor<f32>):	// no predecessors
+  // CHECK-NEXT:   %[[reduced1:.*]] = xla_hlo.add %arg6, %arg7 : tensor<f32>
+  // CHECK-NEXT:   "xla_hlo.return"(%[[reduced1]]) : (tensor<f32>) -> ()
+  // CHECK-NEXT: }) {dimensions = dense<[0, 2, 3]> : tensor<3xi64>} : (tensor<8x8x8x8xf32>, tensor<f32>) -> tensor<8xf32>
+  // CHECK-NEXT: %[[offset_backprop:.*]] = "xla_hlo.convert"(%[[red2]]) : (tensor<8xf32>) -> tensor<8xf32>
+
+  // CHECK-NEXT: %[[x_backprop:.*]] = "xla_hlo.convert"(%[[mul3]]) : (tensor<8x8x8x8xf32>) -> tensor<8x8x8x8xf32>
+  // CHECK-NEXT: return %[[x_backprop]] : tensor<8x8x8x8xf32>
+
+  %0:5 = "tf.FusedBatchNormGradV3"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {T = "tfdtype$DT_FLOAT", data_format = "NCHW", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
+// CHECK-LABEL: fusedBatchNormGradV3_Training_NCHW
+func @fusedBatchNormGradV3_Training_NCHW(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8x8x8x8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>, %arg5: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) {
+  // CHECK: %{{.*}} = "xla_hlo.batch_norm_grad"(%{{.*}}, %arg2, %arg3, %arg4, %[[grad]]) {epsilon = 1.000000e-03 : f32, feature_index = 1 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8x8x8x8xf32>) -> tuple<tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>>
+  %0:5 = "tf.FusedBatchNormGradV3"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {T = "tfdtype$DT_FLOAT", data_format = "NCHW", epsilon = 0.001 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>)
+  return %0#0 : tensor<8x8x8x8xf32>
+}
+
 //===----------------------------------------------------------------------===//
 // Bias op legalizations.
 //===----------------------------------------------------------------------===//
@@ -87,6 +408,27 @@ func @broadcast_div(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2x
   return %0: tensor<1x2xi32>
 }
 
+// CHECK-LABEL: func @shift_left
+func @shift_left(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
+  // CHECK:  xla_hlo.shift_left %arg0, %arg1 : tensor<4xi32>
+  %0 = "tf.LeftShift"(%arg0, %arg1) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  return %0 : tensor<4xi32>
+}
+
+// CHECK-LABEL: func @div_dynamic
+func @div_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
+  // CHECK: "xla_hlo.div"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  %0 = "tf.Div"(%arg0, %arg1) : (tensor<?xi32>, tensor<?x?xi32>) -> tensor<?x?xi32>
+  return %0: tensor<?x?xi32>
+}
+
+// CHECK-LABEL: func @div_unranked
+func @div_unranked(%arg0: tensor<*xi32>, %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
+  // CHECK: tf.Div
+  %0 = "tf.Div"(%arg0, %arg1) : (tensor<*xi32>, tensor<?x?xi32>) -> tensor<?x?xi32>
+  return %0: tensor<?x?xi32>
+}
+
 // CHECK-LABEL: func @maximum
 func @maximum(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
   // CHECK:  xla_hlo.max %arg0, %arg1 : tensor<4xf32>
@@ -145,6 +487,34 @@ func @broadcast_sub(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2x
   return %0: tensor<1x2xi32>
 }
 
+// CHECK-LABEL: func @shift_right
+func @shift_right(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
+  // CHECK:  xla_hlo.shift_right_arithmetic %arg0, %arg1 : tensor<4xi32>
+  %0 = "tf.RightShift"(%arg0, %arg1) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  return %0 : tensor<4xi32>
+}
+
+// CHECK-LABEL: func @broadcast_shift_right
+func @broadcast_shift_right(%arg0: tensor<4xi32>, %arg1: tensor<2x4xi32>) -> tensor<2x4xi32> {
+  // CHECK: "xla_hlo.shift_right_arithmetic"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  %0 = "tf.RightShift"(%arg0, %arg1) : (tensor<4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32>
+  return %0 : tensor<2x4xi32>
+}
+
+// CHECK-LABEL: func @shift_right_unsigned
+func @shift_right_unsigned(%arg0: tensor<4x!tf.uint8>, %arg1: tensor<4x!tf.uint8>) -> tensor<4x!tf.uint8> {
+  // CHECK:  tf.RightShift
+  %0 = "tf.RightShift"(%arg0, %arg1) : (tensor<4x!tf.uint8>, tensor<4x!tf.uint8>) -> tensor<4x!tf.uint8>
+  return %0 : tensor<4x!tf.uint8>
+}
+
+// CHECK-LABEL: func @broadcast_shift_right_unsigned
+func @broadcast_shift_right_unsigned(%arg0: tensor<4x!tf.uint8>, %arg1: tensor<2x4x!tf.uint8>) -> tensor<2x4x!tf.uint8> {
+  // CHECK:  tf.RightShift
+  %0 = "tf.RightShift"(%arg0, %arg1) : (tensor<4x!tf.uint8>, tensor<2x4x!tf.uint8>) -> tensor<2x4x!tf.uint8>
+  return %0 : tensor<2x4x!tf.uint8>
+}
+
 // CHECK-LABEL: func @and
 func @and(%arg0: tensor<2xi1>) -> tensor<2xi1> {
   // CHECK-NEXT:  xla_hlo.and
@@ -166,6 +536,13 @@ func @and_dynamic(%arg0: tensor<?xi1>, %arg1: tensor<1xi1>) -> tensor<?xi1> {
   return %0: tensor<?xi1>
 }
 
+// CHECK-LABEL: func @and_unranked
+func @and_unranked(%arg0: tensor<*xi1>, %arg1: tensor<*xi1>) -> tensor<*xi1> {
+  // CHECK: tf.LogicalAnd
+  %0 = "tf.LogicalAnd"(%arg0, %arg1) : (tensor<*xi1>, tensor<*xi1>) -> tensor<*xi1>
+  return %0: tensor<*xi1>
+}
+
 // CHECK-LABEL: func @or
 func @or(%arg0: tensor<2xi1>) -> tensor<2xi1> {
   // CHECK-NEXT:  xla_hlo.or
@@ -310,6 +687,20 @@ func @floordiv_f16_broadcast(%arg0: tensor<2x3xf16>, %arg1: tensor<3xf16>) -> te
   return %0: tensor<2x3xf16>
 }
 
+// CHECK-LABEL: func @floordiv_dynamic
+func @floordiv_dynamic(%arg0: tensor<?x?xi32>, %arg1: tensor<?xi32>) -> tensor<?x?xi32> {
+  // CHECK: tf.FloorDiv
+  %0 = "tf.FloorDiv"(%arg0, %arg1) : (tensor<?x?xi32>, tensor<?xi32>) -> tensor<?x?xi32>
+  return %0: tensor<?x?xi32>
+}
+
+// CHECK-LABEL: func @floordiv_unranked
+func @floordiv_unranked(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>) -> tensor<*xi32> {
+  // CHECK: tf.FloorDiv
+  %0 = "tf.FloorDiv"(%arg0, %arg1) : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi32>
+  return %0: tensor<*xi32>
+}
+
 // CHECK-LABEL: func @floormod_broadcast_numerator
 func @floormod_broadcast_numerator(%arg0: tensor<3xi32>, %arg1: tensor<2x3xi32>) -> tensor<2x3xi32> {
   // CHECK-DAG: [[REM:%.+]] = "xla_hlo.remainder"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
@@ -344,6 +735,20 @@ func @floormod_broadcast_denominator(%arg0: tensor<2x3xi32>, %arg1: tensor<3xi32
   return %0: tensor<2x3xi32>
 }
 
+// CHECK-LABEL: func @floormod_dynamic
+func @floormod_dynamic(%arg0: tensor<?x?xi32>, %arg1: tensor<?xi32>) -> tensor<?x?xi32> {
+  // CHECK: tf.FloorMod
+  %0 = "tf.FloorMod"(%arg0, %arg1) : (tensor<?x?xi32>, tensor<?xi32>) -> tensor<?x?xi32>
+  return %0: tensor<?x?xi32>
+}
+
+// CHECK-LABEL: func @floormod_unranked
+func @floormod_unranked(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>) -> tensor<*xi32> {
+  // CHECK: tf.FloorMod
+  %0 = "tf.FloorMod"(%arg0, %arg1) : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi32>
+  return %0: tensor<*xi32>
+}
+
 // CHECK-LABEL: func @broadcast_to
 func @broadcast_to(%arg0: tensor<16xf32>) -> tensor<16x16x16x16xf32> {
   %cst = "tf.Const"() { value = dense<16> : tensor<4xi32> } : () -> tensor<4xi32>
@@ -415,6 +820,13 @@ func @equal_incompatible_shape_both_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<
   return %0: tensor<*xi1>
 }
 
+// CHECK-LABEL: func @equal_unranked
+func @equal_unranked(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>) -> tensor<*xi1> {
+  // CHECK: "tf.Equal"
+  %0 = "tf.Equal"(%arg0, %arg1) { incompatible_shape_error = false } : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi1>
+  return %0: tensor<*xi1>
+}
+
 // CHECK-LABEL: func @notequal
 func @notequal(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   // CHECK-NEXT:  "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "NE"}
@@ -482,6 +894,20 @@ func @broadcast_greater(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<
   return %0: tensor<1x2xi1>
 }
 
+// CHECK-LABEL: func @greater_dynamic
+func @greater_dynamic(%arg0: tensor<?xi32>) -> tensor<?xi1> {
+  // CHECK:  "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "GT"}
+  %0 = "tf.Greater"(%arg0, %arg0) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi1>
+  return %0: tensor<?xi1>
+}
+
+// CHECK-LABEL: func @greater_uranked
+func @greater_uranked(%arg0: tensor<*xi32>) -> tensor<*xi1> {
+  // CHECK:  "tf.Greater"
+  %0 = "tf.Greater"(%arg0, %arg0) : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi1>
+  return %0: tensor<*xi1>
+}
+
 // CHECK-LABEL: func @greater_equal
 func @greater_equal(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   // CHECK-NEXT:  "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "GE"}
@@ -761,13 +1187,22 @@ func @maxpool_valid_padding(%arg0: tensor<2x12x20x7xi32>) -> tensor<2x3x5x7xi32>
   return %0 : tensor<2x3x5x7xi32>
 }
 
+// CHECK-LABEL: maxpool_same_padding
+// CHECK-SAME: %[[ARG:.*]]: tensor
+func @maxpool_same_padding(%arg0: tensor<2x13x25x7xi32>) -> tensor<2x4x7x7xi32> {
+  // CHECK: padding = dense<{{\[\[}}0, 0, 1, 0], [0, 1, 1, 0]]> : tensor<2x4xi64>
+
+  %0 = "tf.MaxPool"(%arg0) {data_format = "NHWC", ksize = [1, 2, 3, 1], padding = "SAME", strides = [1, 4, 4, 1]} : (tensor<2x13x25x7xi32>) -> tensor<2x4x7x7xi32>
+  return %0 : tensor<2x4x7x7xi32>
+}
+
 //===----------------------------------------------------------------------===//
 // MaxPoolGrad op legalizations.
 //===----------------------------------------------------------------------===//
 
-// CHECK-LABEL: @max_pool_grad
+// CHECK-LABEL: @max_pool_grad_valid
 // CHECK-SAME: %[[INPUT:.*]]: tensor<10x24x24x64xf32>, %arg1: tensor<10x12x12x64xf32>, %[[GRAD:.*]]: tensor<10x12x12x64xf32>
-func @max_pool_grad(%orig_input: tensor<10x24x24x64xf32>, %orig_output: tensor<10x12x12x64xf32>, %grad: tensor<10x12x12x64xf32>) -> tensor<10x24x24x64xf32> {
+func @max_pool_grad_valid(%orig_input: tensor<10x24x24x64xf32>, %orig_output: tensor<10x12x12x64xf32>, %grad: tensor<10x12x12x64xf32>) -> tensor<10x24x24x64xf32> {
   // CHECK: %[[ZERO:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
   // CHECK: %[[RESULT:.*]] = "xla_hlo.select_and_scatter"(%[[INPUT]], %[[GRAD]], %[[ZERO]]) ( {
   // CHECK: ^bb0(%[[VALUE_A:.*]]: tensor<f32>, %[[VALUE_B:.*]]: tensor<f32>):
@@ -789,6 +1224,18 @@ func @max_pool_grad(%orig_input: tensor<10x24x24x64xf32>, %orig_output: tensor<1
   return %result : tensor<10x24x24x64xf32>
 }
 
+// CHECK-LABEL: @max_pool_grad_same
+func @max_pool_grad_same(%orig_input: tensor<2x13x25x7xf32>, %orig_output: tensor<2x4x7x7xf32>, %grad: tensor<2x4x7x7xf32>) -> tensor<2x13x25x7xf32> {
+  // CHECK: padding = dense<{{\[\[}}0, 0, 1, 0], [0, 1, 1, 0]]> : tensor<2x4xi64>
+  %result = "tf.MaxPoolGrad"(%orig_input, %orig_output, %grad) {
+     data_format = "NHWC",
+     ksize = [1, 2, 3, 1],
+     padding = "SAME",
+     strides = [1, 4, 4, 1]
+  } : (tensor<2x13x25x7xf32>, tensor<2x4x7x7xf32>, tensor<2x4x7x7xf32>) -> tensor<2x13x25x7xf32>
+  return %result : tensor<2x13x25x7xf32>
+}
+
 //===----------------------------------------------------------------------===//
 // OneHot op legalizations.
 //===----------------------------------------------------------------------===//
@@ -1243,6 +1690,34 @@ func @log_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> {
   return %0 : tensor<*xf32>
 }
 
+// CHECK-LABEL: @log1p
+func @log1p(%arg0: tensor<2xf32>) -> tensor<2xf32> {
+  // CHECK:  "xla_hlo.log_plus_one"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
+  %0 = "tf.Log1p"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
+  return %0 : tensor<2xf32>
+}
+
+// CHECK-LABEL: func @log1p_dynamic
+func @log1p_dynamic(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+  // CHECK:  "xla_hlo.log_plus_one"(%arg0) : (tensor<?xf32>) -> tensor<?xf32>
+  %0 = "tf.Log1p"(%arg0) : (tensor<?xf32>) -> tensor<?xf32>
+  return %0 : tensor<?xf32>
+}
+
+// CHECK-LABEL: func @log1p_unranked
+func @log1p_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  // CHECK:  "xla_hlo.log_plus_one"(%arg0) : (tensor<*xf32>) -> tensor<*xf32>
+  %0 = "tf.Log1p"(%arg0) : (tensor<*xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+
+// CHECK-LABEL: func @not_op_unranked
+func @not_op_unranked(%arg0: tensor<*xi1>) -> tensor<*xi1> {
+  // CHECK:  "xla_hlo.not"(%arg0) : (tensor<*xi1>) -> tensor<*xi1>
+  %0 = "tf.LogicalNot"(%arg0) : (tensor<*xi1>) -> tensor<*xi1>
+  return %0 : tensor<*xi1>
+}
+
 // CHECK-LABEL: @neg
 func @neg(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK:  "xla_hlo.neg"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
@@ -1404,6 +1879,18 @@ func @expand_dims(%arg0: tensor<2xf32>, %axis: tensor<i32>) -> tensor<1x2xf32> {
   return %0 : tensor<1x2xf32>
 }
 
+// CHECK-LABEL: func @sign
+// CHECK-SAME: [[ARG:%arg.*]]: tensor<1x2x3x4xf32>
+func @sign(%arg0: tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> {
+  // CHECK: [[PRED:%.*]] = "xla_hlo.compare"([[ARG]], [[ARG]])
+  // CHECK: [[ZEROS:%.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<1x2x3x4xf32>
+  // CHECK: [[SIGN:%.*]] = "xla_hlo.sign"([[ARG]])
+  // CHECK: [[SELECT:%.*]] = "xla_hlo.select"([[PRED]], [[ZEROS]], [[SIGN]])
+  // CHECK: return [[SELECT]] : tensor<1x2x3x4xf32>
+  %0 = "tf.Sign"(%arg0) : (tensor<1x2x3x4xf32>) -> (tensor<1x2x3x4xf32>)
+  return %0 : tensor<1x2x3x4xf32>
+}
+
 // CHECK-LABEL: slice_constant_start
 func @slice_constant_start(%arg0: tensor<4xi32>) -> tensor<2xi32> {
   // CHECK: %[[START:.*]] = xla_hlo.constant dense<1> : tensor<1xi64>
@@ -1525,23 +2012,45 @@ func @strided_slice_range_clamping(%input: tensor<4x8xf32>) -> tensor<0x3xf32> {
   return %output : tensor<0x3xf32>
 }
 
-// CHECK-LABEL: strided_slice_shrink_axis
-func @strided_slice_shrink_axis(%input: tensor<4x8xf32>) -> tensor<f32> {
-  %begin = "tf.Const"() {value = dense<[1, 3]> : tensor<2xi32>} : () -> (tensor<2xi32>)
-  %end = "tf.Const"() {value = dense<[2, 4]> : tensor<2xi32>} : () -> (tensor<2xi32>)
-  %strides = "tf.Const"() {value = dense<[1, 3]> : tensor<2xi32>} : () -> (tensor<2xi32>)
+// CHECK-LABEL: strided_slice_begin_end_mask
+// CHECK-SAME: %[[INPUT:[a-z0-9]+]]: tensor<4x128x1024xf32>
+func @strided_slice_begin_end_mask(%input: tensor<4x128x1024xf32>) {
 
-  // CHECK: %[[SLICED:.*]] = "xla_hlo.slice"
-  // CHECK-DAG-SAME: start_indices = dense<[1, 3]>
-  // CHECK-DAG-SAME: limit_indices = dense<[2, 4]>
-  // CHECK-DAG-SAME: strides = dense<[1, 3]>
-  // CHECK-SAME: -> tensor<1x1xf32>
+  // For StridedSlice
+  // Dim #:        0,   1,    2
+  // Input shape: [4, 128, 1024]
+  // Begin:        1,   4,   -3
+  // End:          8,  65,   42
+  // Stride:       1,   4,   -1
+  // Begin mask:   1,   0,    0  (= 1)
+  // End mask:     0,   0,    1  (= 4)
 
-  // CHECK: "xla_hlo.reshape"(%[[SLICED]]) : (tensor<1x1xf32>) -> tensor<f32>
+  // So result shape:
+  // Dim #0: begin mask (1) -> begin = 0; end 8 canonicalized to 4: so 4
+  // Dim #1: 4 to 65 stride 4: so 16
+  // Dim #2: begin -3 + 1024 = 1021; end mask (1) -> end = -1: so 1022
+  // result shape: [4, 16, 1022]
 
-  %output = "tf.StridedSlice"(%input, %begin, %end, %strides) {shrink_axis_mask = 3
-      : i64} : (tensor<4x8xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
-  return %output : tensor<f32>
+  // As output shape of StridedSlice differs, a reshape will follow.
+
+  %begin = "tf.Const"() {value = dense<[1, 4, -3]> : tensor<3xi32>} : () -> (tensor<3xi32>)
+  %end = "tf.Const"() {value = dense<[8, 65, 42]> : tensor<3xi32>} : () -> (tensor<3xi32>)
+  %strides = "tf.Const"() {value = dense<[1, 4, -1]> : tensor<3xi32>} : () -> (tensor<3xi32>)
+
+  // CHECK: %[[REVERSE:.*]] = "xla_hlo.reverse"(%[[INPUT]])
+
+  // CHECK: %[[SLICE:.*]] = "xla_hlo.slice"(%[[REVERSE]])
+  // CHECK-DAG-SAME: limit_indices = dense<[4, 65, 1024]>
+  // CHECK-DAG-SAME: start_indices = dense<[0, 4, 2]>
+  // CHECK-DAG-SAME: strides = dense<[1, 4, 1]>
+  // CHECK-SAME: -> tensor<4x16x1022xf32>
+
+  %0 = "tf.StridedSlice"(%input, %begin, %end, %strides) {begin_mask = 1, end_mask = 4} : (tensor<4x128x1024xf32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<f32>
+
+  // CHECK: "xla_hlo.reshape"(%[[SLICE]])
+  // CHECK-SAME: -> tensor<f32>
+
+  return
 }
 
 //===----------------------------------------------------------------------===//
@@ -2268,7 +2777,7 @@ func @gather_v2_unranked(%arg0: tensor<*xf32>, %arg1: tensor<*xi32>) -> tensor<*
 func @strided_slice_grad(%grad: tensor<4x16x1022xf32>) -> tensor<4x128x1024xf32> {
 
   // For StridedSlice
-  // Dim #:        0,   1,   2
+  // Dim #:        0,   1,    2
   // Input shape: [4, 128, 1024]
   // Begin:        1,   4,   -3
   // End:          8,  65,   42
@@ -2277,7 +2786,7 @@ func @strided_slice_grad(%grad: tensor<4x16x1022xf32>) -> tensor<4x128x1024xf32>
   // End mask:     0,   0,    1  (= 4)
 
   // So result shape:
-  // Dim #0: begin mask (1) -> begin = 0; end 8 cannonicalized to 4: so 4
+  // Dim #0: begin mask (1) -> begin = 0; end 8 canonicalized to 4: so 4
   // Dim #1: 4 to 65 stride 4: so 16
   // Dim #2: begin -3 + 1024 = 1021; end mask (1) -> end = -1: so 1022
   // result shape: [4, 16, 1022]
@@ -2302,3 +2811,20 @@ func @strided_slice_grad(%grad: tensor<4x16x1022xf32>) -> tensor<4x128x1024xf32>
   // CHECK: return [[PAD]]
   return %0: tensor<4x128x1024xf32>
 }
+
+// CHECK-LABEL: @tensor_scatter_update
+func @tensor_scatter_update(%tensor: tensor<?x?x?xf32>, %indices: tensor<?x2xi32>, %updates: tensor<?x?xf32>) -> tensor<?x?x?xf32> {
+  // CHECK: "xla_hlo.scatter"(%arg0, %arg1, %arg2) ( {
+  // CHECK:  ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+  // CHECK:    "xla_hlo.return"(%arg4) : (tensor<f32>) -> ()
+  // CHECK:  })
+  // CHECK-SAME: indices_are_sorted = false
+  // CHECK-SAME: scatter_dimension_numbers
+  // CHECK-SAME:   index_vector_dim = 1 : i64
+  // CHECK-SAME:   inserted_window_dims = dense<[0, 1]> : tensor<2xi64>
+  // CHECK-SAME:   scatter_dims_to_operand_dims = dense<[0, 1]> : tensor<2xi64>
+  // CHECK-SAME:   update_window_dims = dense<1> : tensor<1xi64>
+  // CHECK-SAME: unique_indices = false
+  %0 = "tf.TensorScatterUpdate"(%tensor, %indices, %updates) : (tensor<?x?x?xf32>, tensor<?x2xi32>, tensor<?x?xf32>) -> tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-to-std.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-to-std.mlir
index dae20d0f469..1d2cf767939 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-to-std.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-to-std.mlir
@@ -32,10 +32,10 @@ func @binary_ops_int(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32
   // CHECK-NEXT:   %2 = subi %1, %arg1 : tensor<4xi32>
   %2 = "xla_hlo.sub"(%1, %arg1) {name = "sub.5"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
 
-  // CHECK-NEXT:   %3 = divis %2, %arg1 : tensor<4xi32>
+  // CHECK-NEXT:   %3 = divi_signed %2, %arg1 : tensor<4xi32>
   %3 = "xla_hlo.div"(%2, %arg1) {name = "div.6"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
 
-  // CHECK-NEXT:   %4 = remis %3, %arg1 : tensor<4xi32>
+  // CHECK-NEXT:   %4 = remi_signed %3, %arg1 : tensor<4xi32>
   %4 = "xla_hlo.remainder"(%3, %arg1) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
 
   // CHECK-NEXT:   return %4 : tensor<4xi32>
diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-affine.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-affine.mlir
index d4ee0fdc2e2..74fea0cc687 100644
--- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-affine.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-affine.mlir
@@ -59,7 +59,7 @@ func @float_div_op(%lhs: memref<7xf32>, %rhs: memref<7xf32>,
 // CHECK-LABEL: func @int_div_op
 func @int_div_op(%lhs: memref<7xi32>, %rhs: memref<7xi32>,
                  %result: memref<7xi32>) -> () {
-  // CHECK: divis %{{.*}}, %{{.*}} : i32
+  // CHECK: divi_signed %{{.*}}, %{{.*}} : i32
   "xla_lhlo.div"(%lhs, %rhs, %result) {name = "div.1"}
       : (memref<7xi32>, memref<7xi32>, memref<7xi32>) -> ()
   return
diff --git a/tensorflow/compiler/mlir/xla/tests/ops.mlir b/tensorflow/compiler/mlir/xla/tests/ops.mlir
index a315a2318b5..c6db931e239 100644
--- a/tensorflow/compiler/mlir/xla/tests/ops.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/ops.mlir
@@ -13,6 +13,45 @@ func @invalid_type() -> !xla_hlo.foobar
 
 // -----
 
+// CHECK-LABEL: func @alltoall
+func @alltoall(%data: tensor<4x16xf32>) -> tensor<16x4xf32> {
+  %0 = "xla_hlo.all_to_all"(%data) {
+    split_dimension = 1 : i64,
+    concat_dimension = 0 : i64,
+    split_count = 4 : i64,
+    replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>
+  } : (tensor<4x16xf32>) -> tensor<16x4xf32>
+  return %0 : tensor<16x4xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @alltoall_unranked_input
+func @alltoall_unranked_input(%data: tensor<*xf32>) -> tensor<*xf32> {
+  %0 = "xla_hlo.all_to_all"(%data) {
+    split_dimension = 1 : i64,
+    concat_dimension = 0 : i64,
+    split_count = 5 : i64,
+    replica_groups = dense<[[0, 1, 2, 3, 4]]> : tensor<1x5xi64>
+  } : (tensor<*xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+
+// -----
+
+func @alltoall_invalid_split_dim_size(%data: tensor<4x16xf32>) -> tensor<16x4xf32> {
+// expected-error@+1 {{split dimension has size 16, expected to be a multiple of split_count 5}}
+  %0 = "xla_hlo.all_to_all"(%data) {
+    split_dimension = 1 : i64,
+    concat_dimension = 0 : i64,
+    split_count = 5 : i64,
+    replica_groups = dense<[[0, 1, 2, 3, 4]]> : tensor<1x5xi64>
+  } : (tensor<4x16xf32>) -> tensor<16x4xf32>
+  return %0 : tensor<16x4xf32>
+}
+
+// -----
+
 // CHECK-LABEL: func @broadcast
 func @broadcast(%arg0: tensor<3xi32>) -> tensor<1x2x3xi32> {
   %0 = "xla_hlo.broadcast"(%arg0) {broadcast_sizes = dense<[1, 2]> : tensor<2xi64>} : (tensor<3xi32>) -> tensor<1x2x3xi32>
@@ -189,6 +228,15 @@ func @dot_bad_precision_config(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -
 
 // -----
 
+func @rng_uniform_invalid_type(%mu: tensor<complex<f32>>, %sigma: tensor<f32>) -> tensor<2x3x5xf32> {
+  %shape = xla_hlo.constant dense<[2, 3, 5]> : tensor<3xi64>
+  // expected-error@+1 {{must be tensor of pred (AKA boolean or 1-bit integer) or 8/16/32/64-bit integer or floating-point values, but got 'tensor<complex<f32>>'}}
+  %0 = "xla_hlo.rng_uniform"(%mu, %sigma, %shape) : (tensor<complex<f32>>, tensor<f32>, tensor<3xi64>) -> tensor<2x3x5xf32>
+  return %0 : tensor<2x3x5xf32>
+}
+
+// -----
+
 // CHECK-LABEL: func @select
 func @select(%arg0: tensor<2x3xi1>, %arg1: tensor<2x3xi32>, %arg2: tensor<2x3xi32>) -> tensor<2x3xi32> {
   %0 = "xla_hlo.select"(%arg0, %arg1, %arg2) : (tensor<2x3xi1>, tensor<2x3xi32>, tensor<2x3xi32>) -> tensor<2x3xi32>
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/BUILD b/tensorflow/compiler/mlir/xla/tests/translate/BUILD
index 857ee2896a2..c4e747c90f3 100644
--- a/tensorflow/compiler/mlir/xla/tests/translate/BUILD
+++ b/tensorflow/compiler/mlir/xla/tests/translate/BUILD
@@ -4,7 +4,7 @@ package(licenses = ["notice"])
 
 glob_lit_tests(
     data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
+    driver = "@llvm-project//mlir:run_lit.sh",
     test_file_exts = [
         "mlir",
         "hlo",
@@ -18,7 +18,7 @@ filegroup(
     testonly = True,
     data = [
         "//tensorflow/compiler/mlir:tf-mlir-translate",
-        "@llvm//:FileCheck",
-        "@llvm//:not",
+        "@llvm-project//llvm:FileCheck",
+        "@llvm-project//llvm:not",
     ],
 )
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
index 442780a520c..125c958d6c3 100644
--- a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
@@ -355,6 +355,18 @@ func @main(%arg0: tensor<3x4xi32>, %arg1: tensor<4x5xi32>) -> tensor<3x5xi32> {
 
 // -----
 
+// CHECK:  HloModule
+func @main(%arg0: tensor<3x9xf32>) -> tensor<3x5xcomplex<f32>> {
+  %0 = "xla_hlo.fft"(%arg0) {fft_length = dense<9> : tensor<1xi64>, fft_type = "RFFT"} : (tensor<3x9xf32>) -> tensor<3x5xcomplex<f32>>
+  return %0 : tensor<3x5xcomplex<f32>>
+}
+
+// CHECK:  ENTRY
+// CHECK:  [[ARG:%.*]] = f32[3,9] parameter(0)
+// CHECK:  c64[3,5] fft(f32[3,9] [[ARG]]), fft_type=RFFT, fft_length={9}
+
+// -----
+
 // CHECK:  HloModule
 func @main(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>) -> tensor<10x300xf32> {
   // CHECK:  [[ARG0:%.*]] = f32[200,100,300] parameter(0)
@@ -396,6 +408,18 @@ func @main(%arg0: tuple<tensor<f32>, tensor<i32>>) -> tensor<f32> {
 
 // -----
 
+// CHECK:  HloModule
+func @main(%arg0: !xla_hlo.token) -> tuple<tuple<tensor<3xi32>, tensor<i1>>, !xla_hlo.token> {
+  %0 = "xla_hlo.infeed"(%arg0) {infeed_config = "foobar"} : (!xla_hlo.token) -> tuple<tuple<tensor<3xi32>, tensor<i1>>, !xla_hlo.token>
+  return %0 : tuple<tuple<tensor<3xi32>, tensor<i1>>, !xla_hlo.token>
+}
+
+// CHECK:  ENTRY
+// CHECK:  [[ARG:%.*]] = token[] parameter(0)
+// CHECK:  ROOT %[[RESULT:.*]] = ((s32[3], pred[]), token[]) infeed(token[] [[ARG]]), infeed_config="foobar"
+
+// -----
+
 // CHECK:  HloModule
 func @main() -> tensor<1x10xf32> {
   %result = "xla_hlo.iota"() {
@@ -409,6 +433,19 @@ func @main() -> tensor<1x10xf32> {
 
 // -----
 
+// CHECK:  HloModule
+func @main(%data: tensor<3xi32>, %token: !xla_hlo.token) -> !xla_hlo.token {
+  %0 = "xla_hlo.outfeed"(%data, %token) {outfeed_config = "foobar"} : (tensor<3xi32>, !xla_hlo.token) -> !xla_hlo.token
+  return %0 : !xla_hlo.token
+}
+
+// CHECK:  ENTRY
+// CHECK:  [[DATA:%.*]] = s32[3] parameter(0)
+// CHECK:  [[TOKEN:%.*]] = token[] parameter(1)
+// CHECK:  ROOT %[[RESULT:.*]] = token[] outfeed(s32[3] [[DATA]], token[] [[TOKEN]]), outfeed_config="foobar"
+
+// -----
+
 // CHECK:  HloModule
 func @main(%arg: tensor<4x6xf32>, %pad: tensor<f32>) -> tensor<13x19xf32> {
   %0 = "xla_hlo.pad"(%arg, %pad) {edge_padding_high = dense<[4,5]> : tensor<2xi64>, edge_padding_low = dense<[2,3]> : tensor<2xi64>, interior_padding = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>, tensor<f32>) -> tensor<13x19xf32>
@@ -504,6 +541,20 @@ func @main(%arg0 : tensor<10x11x12x13xf32>) -> tensor<10x11x12x13xf32> {
 
 // -----
 
+// CHECK:  HloModule
+func @main(%mu: tensor<f32>, %sigma: tensor<f32>) -> tensor<2x3x5xf32> {
+  %shape = xla_hlo.constant dense<[2, 3, 5]> : tensor<3xi64>
+  %0 = "xla_hlo.rng_normal"(%mu, %sigma, %shape) : (tensor<f32>, tensor<f32>, tensor<3xi64>) -> tensor<2x3x5xf32>
+  return %0 : tensor<2x3x5xf32>
+}
+
+// CHECK:  ENTRY
+// CHECK:  %[[MU:.*]] = f32[] parameter(0)
+// CHECK:  %[[SIGMA:.*]] = f32[] parameter(1)
+// CHECK:  ROOT %[[RESULT:.*]] = f32[2,3,5] rng(f32[] %[[MU]], f32[] %[[SIGMA]]), distribution=rng_normal
+
+// -----
+
 // CHECK:  HloModule
 func @main() -> tensor<2x3x5xf32> {
   %0 = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
@@ -599,6 +650,62 @@ func @main(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x12x12x64xf32>) -> te
 
 // -----
 
+// CHECK:  HloModule
+func @main(%arg: tensor<3x4xi32>, %token: !xla_hlo.token) -> !xla_hlo.token {
+  %0 = "xla_hlo.send"(%arg, %token) {
+    channel_id = {
+      handle = 5 : i64,
+      type = 2 : i64  // Device to host channel
+    },
+    is_host_transfer = true
+  } : (tensor<3x4xi32>, !xla_hlo.token) -> !xla_hlo.token
+  return %0 : !xla_hlo.token
+}
+
+// CHECK:  ENTRY
+// CHECK:  [[ARG:%.*]] = s32[3,4] parameter(0)
+// CHECK:  [[TOKEN:%.*]] = token[] parameter(1)
+// CHECK:  [[SEND:%.*]] = (s32[3,4], u32[], token[]) send(s32[3,4] [[ARG]], token[] [[TOKEN]]), channel_id=5, is_host_transfer=true
+// CHECK:  ROOT
+// CHECK-SAME:  token[] send-done((s32[3,4], u32[], token[]) [[SEND]]), channel_id=5, is_host_transfer=true
+
+// -----
+
+// CHECK:  HloModule
+func @main(%arg: tensor<3x4xi32>, %token: !xla_hlo.token) -> !xla_hlo.token {
+  %0 = "xla_hlo.send"(%arg, %token) {
+    channel_id = {
+      handle = 5 : i64,
+      type = 1 : i64  // Device to device channel
+    },
+    is_host_transfer = false
+  } : (tensor<3x4xi32>, !xla_hlo.token) -> !xla_hlo.token
+  return %0 : !xla_hlo.token
+}
+
+// CHECK:  ENTRY
+// CHECK:  [[ARG:%.*]] = s32[3,4] parameter(0)
+// CHECK:  [[TOKEN:%.*]] = token[] parameter(1)
+// CHECK:  [[SEND:%.*]] = (s32[3,4], u32[], token[]) send(s32[3,4] [[ARG]], token[] [[TOKEN]]), channel_id=5
+// CHECK:  ROOT
+// CHECK-SAME:  token[] send-done((s32[3,4], u32[], token[]) [[SEND]]), channel_id=5
+
+// -----
+
+// CHECK:  HloModule
+func @main(%arg: tensor<4x4xf32>, %size: tensor<i32>) -> tensor<4x4xf32> {
+  %0 = "xla_hlo.set_dimension_size"(%arg, %size) {dimension = 1 : i32} : (tensor<4x4xf32>, tensor<i32>) -> tensor<4x4xf32>
+  return %0 : tensor<4x4xf32>
+}
+
+// CHECK:  ENTRY
+// CHECK:  [[ARG:%.*]] = f32[4,4] parameter(0)
+// CHECK:  [[SIZE:%.*]] = s32[] parameter(1)
+// CHECK:  ROOT
+// CHECK-SAME:  f32[4,<=4] set-dimension-size(f32[4,4] [[ARG]], s32[] [[SIZE]]), dimensions={1}
+
+// -----
+
 // CHECK:  HloModule
 func @main(%arg: tensor<3x4xi32>) -> tensor<1x2xi32> {
   %0 = "xla_hlo.slice"(%arg) {start_indices = dense<[1, 0]> : tensor<2xi64>, limit_indices = dense<[2, 4]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} : (tensor<3x4xi32>) -> tensor<1x2xi32>
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
index 5f9670be2f1..b598a9b8852 100644
--- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
+++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
@@ -95,6 +95,15 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] {
   ROOT %call.2 = s64[] call(%arg0.1), to_apply=%call
 }
 
+// CHECK-LABEL:  func @test_cholesky
+// CHECK-SAME:  ([[ARG:%.*]]: tensor<1x291x291xf32>) -> tensor<1x291x291xf32>
+%test_cholesky (a: f32[1,291,291]) -> f32[1,291,291] {
+  %a = f32[1,291,291] parameter(0)
+  // CHECK-NEXT:  "xla_hlo.cholesky"([[ARG]]) {lower = true, name = {{.*}}} : (tensor<1x291x291xf32>) -> tensor<1x291x291xf32>
+  ROOT %out = f32[1,291,291] cholesky(f32[1,291,291] %a), lower=true
+}
+
+
 // CHECK-LABEL:  func @test_clamp(
 %test_clamp (Arg_0.1: f32[], Arg_1.2: f32[4], Arg_1.3: f32[]) -> f32[4] {
   %Arg_0.1 = f32[] parameter(0)
@@ -364,6 +373,16 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] {
   ROOT %imag.3 = f32[4] imag(c64[4] %Arg_0.1)
 }
 
+// CHECK-LABEL:  func @test_infeed
+// CHECK-SAME: ([[TOKEN:%.*]]: !xla_hlo.token) -> tuple<tensor<3xi32>, !xla_hlo.token> {
+%test_infeed (token0: token[]) -> (s32[3], token[]) {
+  %token0 = token[] parameter(0)
+  // CHECK-NEXT:  "xla_hlo.infeed"([[TOKEN]])
+  // CHECK-SAME:  infeed_config = "foobar"
+  ROOT %infeed = (s32[3], token[]) infeed(token[] %token0), infeed_config="foobar"
+}
+
+
 // CHECK-LABEL:  func @test_iota_1() -> tensor<4xf32> {
 %test_iota_1 () -> f32[4] {
   // CHECK-NEXT:  "xla_hlo.iota"() {iota_dimension = 0 : i64} : () -> tensor<4xf32>
@@ -444,6 +463,16 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] {
   ROOT %or.3 = pred[4] or(pred[4] %Arg_0.1, pred[4] %Arg_1.2)
 }
 
+// CHECK-LABEL:  func @test_outfeed
+// CHECK-SAME: ([[DATA:%.*]]: tensor<3xi32>, [[TOKEN:%.*]]: !xla_hlo.token) -> !xla_hlo.token {
+%test_outfeed (Arg_0.1: s32[3], Arg_1.2: token[]) -> token[] {
+  %Arg_0.1 = s32[3] parameter(0)
+  %Arg_1.2 = token[] parameter(1)
+  // CHECK-NEXT:  "xla_hlo.outfeed"([[DATA]], [[TOKEN]])
+  // CHECK-SAME:  outfeed_config = "foobar"
+  ROOT %outfeed.3 = token[] outfeed(s32[3] %Arg_0.1, token[] %Arg_1.2), outfeed_config="foobar"
+}
+
 // CHECK-LABEL:  func @test_pad(%arg0: tensor<4xf32>, %arg1: tensor<f32>) -> tensor<4xf32> {
 %test_pad (Arg_0.1: f32[4], Arg_1.2: f32[]) -> f32[4] {
   %Arg_0.1 = f32[4] parameter(0)
@@ -488,6 +517,26 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] {
   ROOT %power.3 = f32[4] power(f32[4] %Arg_0.1, f32[4] %Arg_1.2)
 }
 
+// CHECK-LABEL:  func @test_rng_normal
+// CHECK-SAME:  ([[ARG0:%.*]]: tensor<f32>, [[ARG1:%.*]]: tensor<f32>) -> tensor<2x3x5xf32>
+%test_rng_normal (Arg_0.1: f32[], Arg_1.2: f32[]) -> f32[2,3,5] {
+  %Arg_0.1 = f32[] parameter(0)
+  %Arg_1.2 = f32[] parameter(1)
+  // CHECK:  [[CST:%.*]] = constant dense<[2, 3, 5]> : tensor<3xi64>
+  // CHECK:  "xla_hlo.rng_normal"([[ARG0]], [[ARG1]], [[CST]])
+  ROOT %rng.4 = f32[2,3,5] rng(f32[] %Arg_0.1, f32[] %Arg_1.2), distribution=rng_normal
+}
+
+// CHECK-LABEL:  func @test_rng_uniform
+// CHECK-SAME:  ([[ARG0:%.*]]: tensor<f32>, [[ARG1:%.*]]: tensor<f32>) -> tensor<2x3x5xf32>
+%test_rng_uniform (Arg_0.1: f32[], Arg_1.2: f32[]) -> f32[2,3,5] {
+  %Arg_0.1 = f32[] parameter(0)
+  %Arg_1.2 = f32[] parameter(1)
+  // CHECK:  [[CST:%.*]] = constant dense<[2, 3, 5]> : tensor<3xi64>
+  // CHECK:  "xla_hlo.rng_uniform"([[ARG0]], [[ARG1]], [[CST]])
+  ROOT %rng.4 = f32[2,3,5] rng(f32[] %Arg_0.1, f32[] %Arg_1.2), distribution=rng_uniform
+}
+
 // CHECK-LABEL:  func @test_real
 %test_real (Arg_0.1: c64[4]) -> f32[4] {
   %Arg_0.1 = c64[4] parameter(0)
@@ -603,6 +652,15 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] {
   ROOT %select.4 = s32[2,3] select(pred[2,3] %Arg_0.1, s32[2,3] %Arg_1.2, s32[2,3] %Arg_2.3)
 }
 
+// CHECK-LABEL:  func @test_set_dimension_size
+// CHECK-SAME:  ([[ARG:%.*]]: tensor<4x4xf32>, [[SIZE:%.*]]: tensor<i32>)
+%test_set_dimension_size (Arg_0.1: f32[4,4], Arg_1.2: s32[]) -> f32[4,<=4] {
+  %Arg_0.1 = f32[4,4] parameter(0)
+  %Arg_1.2 = s32[] parameter(1)
+  // CHECK-NEXT:  "xla_hlo.set_dimension_size"([[ARG]], [[SIZE]]) {dimension = 1 : i32, name = "{{.*}}"} : (tensor<4x4xf32>, tensor<i32>) -> tensor<4x4xf32>
+  ROOT %set-dimension-size.2 = f32[4,<=4] set-dimension-size(f32[4,4] %Arg_0.1, s32[] %Arg_1.2), dimensions={1}
+}
+
 // CHECK-LABEL:  func @test_sine(%arg0: tensor<1x16x16x3xf32>) -> tensor<1x16x16x3xf32> {
 %test_sine (arg0.1: f32[1,16,16,3]) -> f32[1,16,16,3] {
   %arg0.1 = f32[1,16,16,3]{3,2,1,0} parameter(0), metadata={op_name="HLO_Args"}
diff --git a/tensorflow/compiler/mlir/xla/transforms/canonicalize.td b/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
index d510a3df994..df9be382f11 100644
--- a/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
+++ b/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
@@ -29,7 +29,7 @@ def BuildSliceLimits : NativeCodeCall<
 
 def BuildSliceStrides : NativeCodeCall<
   "GetI64ElementsAttr(SmallVector<int64_t, 4>("
-  "$0->getType().cast<RankedTensorType>().getRank(), 1), &$_builder)">;
+  "$0.getType().cast<RankedTensorType>().getRank(), 1), &$_builder)">;
 
 def DynamicSliceToSlice: Pat<(HLO_DynamicSliceOp HLO_Tensor:$input,
            (HLO_ConstOp I64ElementsAttr:$starting_indices),
diff --git a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
index 4a74fe4b2ae..9170b217471 100644
--- a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
@@ -16,18 +16,18 @@ limitations under the License.
 // This file implements logic for lowering HLO dialect to LHLO dialect.
 
 #include "absl/memory/memory.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
@@ -39,8 +39,8 @@ namespace {
 
 constexpr StringRef kTempBufferAttr = "temp";
 
-Value* GetTensorStoreOrReturnMemRef(Value* value) {
-  for (const auto& user : value->getUsers()) {
+Value GetTensorStoreOrReturnMemRef(Value value) {
+  for (const auto& user : value.getUsers()) {
     if (auto tensor_store = dyn_cast<TensorStoreOp>(user)) {
       if (tensor_store.getOperand(0) == value) {
         return tensor_store.getOperand(1);
@@ -56,9 +56,9 @@ Value* GetTensorStoreOrReturnMemRef(Value* value) {
   return nullptr;
 }
 
-Operation* GetLastUse(Value* value) {
-  Operation* last = value->getDefiningOp();
-  for (auto& user : value->getUses()) {
+Operation* GetLastUse(Value value) {
+  Operation* last = value.getDefiningOp();
+  for (auto& user : value.getUses()) {
     Operation* user_op = user.getOwner();
     if (!user_op->isBeforeInBlock(last)) {
       last = user_op;
@@ -67,9 +67,9 @@ Operation* GetLastUse(Value* value) {
   return last;
 }
 
-Value* InsertAllocAndDealloc(Location loc, Value* result,
-                             ConversionPatternRewriter* rewriter) {
-  auto result_type = result->getType().dyn_cast<ShapedType>();
+Value InsertAllocAndDealloc(Location loc, Value result,
+                            ConversionPatternRewriter* rewriter) {
+  auto result_type = result.getType().dyn_cast<ShapedType>();
   if (!result_type || !result_type.hasStaticShape()) {
     emitError(loc,
               "tensor to buffer conversion expects statically shaped results");
@@ -79,7 +79,7 @@ Value* InsertAllocAndDealloc(Location loc, Value* result,
 
   Operation* last = GetLastUse(result);
 
-  Operation* op = result->getDefiningOp();
+  Operation* op = result.getDefiningOp();
   OpBuilder allocBuilder(op);
   auto alloc = allocBuilder.create<AllocOp>(loc, memref_type);
   alloc.setAttr(kTempBufferAttr, rewriter->getBoolAttr(true));
@@ -93,8 +93,8 @@ Value* InsertAllocAndDealloc(Location loc, Value* result,
 /// For every tensor-type value that is produced in the original function,
 /// this function returns the buffer that can be used in the converted
 /// function to store that values held in the tensor.
-Value* GetBufferForResultValue(Location loc, Value* result,
-                               ConversionPatternRewriter* rewriter) {
+Value GetBufferForResultValue(Location loc, Value result,
+                              ConversionPatternRewriter* rewriter) {
   if (auto existing_memref = GetTensorStoreOrReturnMemRef(result)) {
     return existing_memref;
   }
@@ -108,7 +108,7 @@ class HloToLhloOpConverter : public ConversionPattern {
       : ConversionPattern(HloOpTy::getOperationName(), 1, context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation* op, ArrayRef<Value*> operands,
+      Operation* op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
     if (op->getParentRegion()->getBlocks().size() != 1) {
       emitError(op->getLoc(),
@@ -116,14 +116,14 @@ class HloToLhloOpConverter : public ConversionPattern {
                 "region containing the operation");
     }
     const auto& original_results = op->getResults();
-    SmallVector<Value*, 4> buffer_args(operands.begin(), operands.end());
+    SmallVector<Value, 4> buffer_args(operands.begin(), operands.end());
     for (auto result : original_results) {
       buffer_args.push_back(
           GetBufferForResultValue(op->getLoc(), result, &rewriter));
     }
     rewriter.create<LhloOpTy>(op->getLoc(), llvm::None, buffer_args,
                               op->getAttrs());
-    rewriter.replaceOp(op, ArrayRef<Value*>(buffer_args).slice(operands.size()),
+    rewriter.replaceOp(op, ArrayRef<Value>(buffer_args).slice(operands.size()),
                        original_results);
     return matchSuccess();
   }
@@ -135,7 +135,7 @@ struct HloToLHloReduceConverter
   using OpConversionPattern::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      xla_hlo::ReduceOp op, ArrayRef<Value*> operands,
+      xla_hlo::ReduceOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
     auto loc = op.getLoc();
     // TODO(b/137624192) Implement variadic reduce.
@@ -146,7 +146,7 @@ struct HloToLHloReduceConverter
                 "region containing the operation");
     }
     const auto& original_results = op.getResults();
-    SmallVector<Value*, 4> buffer_args(operands.begin(), operands.end());
+    SmallVector<Value, 4> buffer_args(operands.begin(), operands.end());
     for (auto result : original_results) {
       buffer_args.push_back(GetBufferForResultValue(loc, result, &rewriter));
     }
@@ -161,7 +161,7 @@ struct HloToLHloReduceConverter
     int original_arg_count = entry_block.getNumArguments();
     for (int i = 0; i < original_arg_count; ++i) {
       auto old_arg = entry_block.getArgument(i);
-      auto old_type = old_arg->getType().cast<TensorType>();
+      auto old_type = old_arg.getType().cast<TensorType>();
       auto new_type =
           MemRefType::get(old_type.getShape(), old_type.getElementType());
       auto new_arg = entry_block.addArgument(new_type);
@@ -169,7 +169,7 @@ struct HloToLHloReduceConverter
     }
     // Add an argument for the result.
     entry_block.addArgument(
-        entry_block.getArgument(original_arg_count)->getType());
+        entry_block.getArgument(original_arg_count).getType());
     // Remove the old arguments.
     for (int i = original_arg_count - 1; i >= 0; --i) {
       entry_block.eraseArgument(i);
@@ -178,7 +178,7 @@ struct HloToLHloReduceConverter
     rewriter.setInsertionPointToEnd(&entry_block);
     rewriter.create<xla_lhlo::TerminatorOp>(loc);
 
-    rewriter.replaceOp(op, ArrayRef<Value*>(buffer_args).slice(operands.size()),
+    rewriter.replaceOp(op, ArrayRef<Value>(buffer_args).slice(operands.size()),
                        original_results);
 
     return matchSuccess();
@@ -191,7 +191,7 @@ class HloToLhloTensorLoadConverter : public ConversionPattern {
       : ConversionPattern(TensorLoadOp::getOperationName(), 1, context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation* op, ArrayRef<Value*> operands,
+      Operation* op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
     rewriter.replaceOp(op, operands, op->getResults());
     return matchSuccess();
@@ -205,7 +205,7 @@ class HloToLhloTensorStoreConverter : public ConversionPattern {
       : ConversionPattern(TensorStoreOp::getOperationName(), 1, context) {}
 
   PatternMatchResult matchAndRewrite(
-      Operation* op, ArrayRef<Value*> operands,
+      Operation* op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
     rewriter.eraseOp(op);
     return matchSuccess();
@@ -218,7 +218,7 @@ class HloToLhloReturnConverter : public OpConversionPattern<xla_hlo::ReturnOp> {
   using OpConversionPattern::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      xla_hlo::ReturnOp op, ArrayRef<Value*> operands,
+      xla_hlo::ReturnOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
     rewriter.eraseOp(op);
     return matchSuccess();
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc
index 8a8afc01bec..8351f94d172 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc
@@ -18,17 +18,17 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Casting.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Block.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Block.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
 
@@ -99,8 +99,8 @@ LogicalResult LowerConditionalOp(mlir::xla_hlo::ConditionalOp conditional_op) {
                                 mapper, &builder)))
     return failure();
 
-  tail_block->addArguments(conditional_op.getResult()->getType());
-  conditional_op.getResult()->replaceAllUsesWith(tail_block->getArgument(0));
+  tail_block->addArguments(conditional_op.getResult().getType());
+  conditional_op.getResult().replaceAllUsesWith(tail_block->getArgument(0));
 
   op_inst->erase();
   return success();
@@ -171,8 +171,8 @@ LogicalResult LowerWhileOp(mlir::xla_hlo::WhileOp while_op) {
     auto cond_value = builder.create<mlir::ExtractElementOp>(loc, return_value);
 
     // Get the body block arguments.
-    llvm::SmallVector<Value*, 4> successor_args(cond_block->args_begin(),
-                                                cond_block->args_end());
+    llvm::SmallVector<Value, 4> successor_args(cond_block->args_begin(),
+                                               cond_block->args_end());
     builder.create<mlir::CondBranchOp>(loc, cond_value, body_block,
                                        successor_args, tail_block,
                                        successor_args);
@@ -201,7 +201,7 @@ LogicalResult LowerWhileOp(mlir::xla_hlo::WhileOp while_op) {
 
   // Erase the original while loop.
   tail_block->addArgument(while_op.getType());
-  while_op.getResult()->replaceAllUsesWith(tail_block->getArgument(0));
+  while_op.getResult().replaceAllUsesWith(tail_block->getArgument(0));
   op_inst->erase();
 
   return success();
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index eaed2da8fa7..9c58b242460 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -24,25 +24,26 @@ limitations under the License.
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Matchers.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Matchers.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h"
 #include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_utils.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
+#include "tensorflow/compiler/xla/client/padding.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/kernels/conv_grad_shape_utils.h"
 #include "tensorflow/core/util/padding.h"
@@ -54,25 +55,20 @@ namespace {
 
 class LegalizeTF : public FunctionPass<LegalizeTF> {
  public:
-  struct Options : public PassOptions<Options> {
-    Option<bool> allow_partial_conversion{
-        *this, "allow-partial-conversion",
-        llvm::cl::desc("Allow operations that can't be legalized."),
-        llvm::cl::init(false)};
-  };
-
-  explicit LegalizeTF(bool allow_partial_conversion)
-      : FunctionPass<LegalizeTF>(),
-        allow_partial_conversion_(allow_partial_conversion) {}
-
-  explicit LegalizeTF(const Options &option)
-      : LegalizeTF(option.allow_partial_conversion) {}
+  LegalizeTF() = default;
+  LegalizeTF(const LegalizeTF &) {}
+  explicit LegalizeTF(bool allow_partial_conversion) {
+    allow_partial_conversion_ = allow_partial_conversion;
+  }
 
   /// Performs the lowering to XLA dialect.
   void runOnFunction() override;
 
  private:
-  bool allow_partial_conversion_;
+  Option<bool> allow_partial_conversion_{
+      *this, "allow-partial-conversion",
+      llvm::cl::desc("Allow operations that can't be legalized."),
+      llvm::cl::init(false)};
 };
 
 /// Returns if the given TF data format string is the default format.
@@ -126,7 +122,7 @@ static IntegerAttr GetHLOAxisFromTFAxis(IntegerAttr attr, int64_t rank,
 // corresponding to the tensorflow axis. In particular, the tensorflow axis can
 // be negative, in which case, the corresponding HLO axis is
 // (axis + rank-of-the-tensor).
-static llvm::Optional<int64_t> GetIntegerHLOAxisFromTFAxis(Value *value,
+static llvm::Optional<int64_t> GetIntegerHLOAxisFromTFAxis(Value value,
                                                            int64_t rank) {
   DenseIntElementsAttr attrs;
   if (!matchPattern(value, m_Constant(&attrs)) ||
@@ -139,7 +135,7 @@ static llvm::Optional<int64_t> GetIntegerHLOAxisFromTFAxis(Value *value,
 
 /// Returns a `ConvertOp` that casts the elements to a i64 type while retaining
 /// the shape of the input value.
-static ConvertOp CastValueToI64(Location loc, Value *value,
+static ConvertOp CastValueToI64(Location loc, Value value,
                                 PatternRewriter *rewriter) {
   return rewriter->create<ConvertOp>(loc, value, rewriter->getIntegerType(64));
 }
@@ -223,14 +219,30 @@ static void BuildReduceBody(Type element_type, Region *body,
   builder->create<ReturnOp>(loc, reducer.getResult());
 }
 
+// Builds region taking two arguments and returning second argument as the
+// result. Corresponds to the function f(x, y) = y.
+// Used in Scatter op's computation to update specific elements.
+static void BuildBinaryAssignmentRegion(Type element_type, Region *region,
+                                        OpBuilder *builder) {}
+
+// Builds a set of operations for applying reduction on the input value. A
+// tf.sum op is created and will be legalized to tfl ops automatically.
+static Value ApplyReduction(Location loc, Value input,
+                            DenseIntElementsAttr reduce_dims,
+                            OpBuilder *builder) {
+  auto reduce_dims_op = builder->create<ConstOp>(loc, reduce_dims);
+  return builder->create<TF::SumOp>(loc, input, reduce_dims_op,
+                                    builder->getBoolAttr(false));
+}
+
 //===----------------------------------------------------------------------===//
 // BatchNorm op utilities.
 //===----------------------------------------------------------------------===//
 
 static IntegerAttr getFeatureDimensionAttr(Builder &b, StringAttr format,
-                                           Value *input) {
+                                           Value input) {
   return b.getI64IntegerAttr(
-      getFeatureDimension(format, input->getType().cast<RankedTensorType>()));
+      getFeatureDimension(format, input.getType().cast<RankedTensorType>()));
 }
 
 //===----------------------------------------------------------------------===//
@@ -241,8 +253,8 @@ static IntegerAttr getFeatureDimensionAttr(Builder &b, StringAttr format,
 // Requires input to have ranked tensor.
 static DenseIntElementsAttr getBiasFeatureDimension(Builder &b,
                                                     StringAttr format,
-                                                    Value *input) {
-  auto inputType = input->getType().cast<RankedTensorType>();
+                                                    Value input) {
+  auto inputType = input.getType().cast<RankedTensorType>();
   size_t featureDim = getFeatureDimension(format, inputType);
   RankedTensorType type = RankedTensorType::get(1, b.getIntegerType(64));
   return DenseIntElementsAttr::get(type, featureDim);
@@ -306,9 +318,9 @@ static DenseIntElementsAttr GetInteriorPadding(ElementsAttr tf_padding) {
 // same shape, this broadcasts size 1 tensors up to any rank. Dynamic dimensions
 // must be broadcasted with a size 1 tensor or another dynamic dimension.
 // Returns false on rankless.
-static bool AreBroadcastCompatible(Value *x, Value *y) {
-  auto x_rankless = x->getType().dyn_cast<RankedTensorType>();
-  auto y_rankless = y->getType().dyn_cast<RankedTensorType>();
+static bool AreBroadcastCompatible(Value x, Value y) {
+  auto x_rankless = x.getType().dyn_cast<RankedTensorType>();
+  auto y_rankless = y.getType().dyn_cast<RankedTensorType>();
   if (!x_rankless || !y_rankless) {
     return false;
   }
@@ -387,16 +399,16 @@ static void BuildArgMinMaxReductionBody(Type input_element_type,
   Location loc = body->getLoc();
   StringAttr compare_direction =
       StringAttr::get(direction, builder->getContext());
-  Value *compare = builder->create<CompareOp>(
+  Value compare = builder->create<CompareOp>(
       loc, block->getArgument(0), block->getArgument(2),
       /*broadcast_dimensions=*/nullptr, compare_direction);
 
-  Value *selected_input = builder->create<SelectOp>(
+  Value selected_input = builder->create<SelectOp>(
       loc, input_type, compare, block->getArgument(0), block->getArgument(2));
-  Value *selected_index = builder->create<SelectOp>(
+  Value selected_index = builder->create<SelectOp>(
       loc, index_type, compare, block->getArgument(1), block->getArgument(3));
 
-  Value *return_values[] = {selected_input, selected_index};
+  Value return_values[] = {selected_input, selected_index};
   builder->create<ReturnOp>(loc, return_values);
 }
 
@@ -404,9 +416,9 @@ static void BuildArgMinMaxReductionBody(Type input_element_type,
 // Slice op utilities.
 //===----------------------------------------------------------------------===//
 
-static bool CanBeTranslatedToDynamicSlice(Value *input, Value *start_indices,
+static bool CanBeTranslatedToDynamicSlice(Value input, Value start_indices,
                                           DenseIntElementsAttr slice_sizes) {
-  auto input_ty = input->getType().dyn_cast<RankedTensorType>();
+  auto input_ty = input.getType().dyn_cast<RankedTensorType>();
   int64_t input_rank = input_ty.getRank();
   ArrayRef<int64_t> input_shape = input_ty.getShape();
   DenseIntElementsAttr constant_start_indices;
@@ -445,7 +457,7 @@ static bool CanBeTranslatedToDynamicSlice(Value *input, Value *start_indices,
 // the end. HLO slice size can't be -1. As such, we need to translate TF slice
 // size -1 to HLO slice size.
 static DenseIntElementsAttr TFSliceSizes2HLOSliceSizes(
-    Value *input, Value *start_indices, DenseIntElementsAttr slice_sizes,
+    Value input, Value start_indices, DenseIntElementsAttr slice_sizes,
     Builder *builder) {
   DenseIntElementsAttr constant_start_indices;
   if (!matchPattern(start_indices, m_Constant(&constant_start_indices))) {
@@ -453,7 +465,7 @@ static DenseIntElementsAttr TFSliceSizes2HLOSliceSizes(
         .cast<DenseIntElementsAttr>();
   }
 
-  auto input_ty = input->getType().dyn_cast<RankedTensorType>();
+  auto input_ty = input.getType().dyn_cast<RankedTensorType>();
   int64_t input_rank = input_ty.getRank();
   ArrayRef<int64_t> input_shape = input_ty.getShape();
   SmallVector<int64_t, 4> normalized_sizes;
@@ -495,7 +507,7 @@ static void BuildSortComparisonBody(llvm::ArrayRef<Type> element_types,
   Location loc = body->getLoc();
   StringAttr compare_direction =
       StringAttr::get(direction, builder->getContext());
-  Value *compare = builder->create<xla_hlo::CompareOp>(
+  Value compare = builder->create<xla_hlo::CompareOp>(
       loc, block->getArgument(0), block->getArgument(1),
       /*broadcast_dimensions=*/nullptr, compare_direction);
 
@@ -562,9 +574,9 @@ class ConvertConv : public OpRewritePattern<OpT> {
     std::string data_format = op.data_format().str();
     if (!FormatFromString(data_format, &format)) return Pattern::matchFailure();
 
-    auto input_ty = op.input()->getType().template dyn_cast<RankedTensorType>();
+    auto input_ty = op.input().getType().template dyn_cast<RankedTensorType>();
     auto filter_ty =
-        op.filter()->getType().template dyn_cast<RankedTensorType>();
+        op.filter().getType().template dyn_cast<RankedTensorType>();
     auto result_ty = op.getType().template dyn_cast<RankedTensorType>();
 
     // Input, filter and the result needs to have static shape for calculation
@@ -654,7 +666,7 @@ class ConvertConv : public OpRewritePattern<OpT> {
     auto paddings_attr = rewriter.getNamedAttr(
         "padding", DenseElementsAttr::get<int64_t>(paddings_ty, paddings));
 
-    SmallVector<Value *, 2> operands(op.getOperands());
+    SmallVector<Value, 2> operands(op.getOperands());
     NamedAttribute attrs[] = {rhs_dilations_attr,     window_strides_attr,
                               dimension_numbers_attr, feature_group_count_attr,
                               batch_group_count_attr, paddings_attr};
@@ -686,10 +698,10 @@ class ConvertBF16FloorDivOp : public OpRewritePattern<TF::FloorDivOp> {
                                      PatternRewriter &rewriter) const override {
     auto l = op.x();
     auto r = op.y();
-    auto element_type = getElementTypeOrSelf(l->getType());
+    auto element_type = getElementTypeOrSelf(l.getType());
     if (!element_type.isBF16()) return matchFailure();
 
-    auto out_type = op.z()->getType().cast<TensorType>();
+    auto out_type = op.z().getType().cast<TensorType>();
 
     l = rewriter.create<ConvertOp>(op.getLoc(), l, rewriter.getF32Type());
     r = rewriter.create<ConvertOp>(op.getLoc(), r, rewriter.getF32Type());
@@ -731,6 +743,263 @@ class ConvertEinsumOp : public OpRewritePattern<TF::EinsumOp> {
   }
 };
 
+// The base class to convert TensorFlow FusedBatchNormGrad*Op to HLO
+// BatchNormGradOp for training and a sequence of binary ops for inference.
+// TODO(b/145536565): move to legalize_tf_patterns.td if it applies.
+template <typename FusedBatchNormGradOpT>
+class ConvertFusedBatchNormGradBase
+    : public OpRewritePattern<FusedBatchNormGradOpT> {
+ public:
+  using OpRewritePattern<FusedBatchNormGradOpT>::OpRewritePattern;
+
+  PatternMatchResult matchAndRewrite(FusedBatchNormGradOpT op,
+                                     PatternRewriter &rewriter) const override {
+    Location loc = op.getLoc();
+    Value grad = op.y_backprop();
+    Value act = op.x();
+    Value scale = op.scale();
+    Value mean = op.reserve_space_1();
+    Value var = op.reserve_space_2();
+
+    // TODO(b/141785544): Update this to not require static shapes.
+    // activation shape needs to be static to convert negative indices in
+    // TensorFlow to absolute indices required by HLO.
+    RankedTensorType act_type =
+        act.getType().template dyn_cast<RankedTensorType>();
+    if (!act_type) return Pattern::matchFailure();
+    Type act_ele_type = act_type.getElementType();
+    // To support mixed precision, the statistics type, which maybe more
+    // precise than the input types, are used for this op.
+    Type kernel_type =
+        scale.getType().template cast<TensorType>().getElementType();
+    grad = rewriter.create<ConvertOp>(loc, grad, kernel_type);
+    act = rewriter.create<ConvertOp>(loc, act, kernel_type);
+
+    auto feature_dim_attr =
+        getFeatureDimensionAttr(rewriter, op.data_formatAttr(), act);
+    auto feature_dim = feature_dim_attr.getValue().getSExtValue();
+
+    // Gets the result values.
+    Value x_backprop, scale_backprop, offset_backprop;
+    if (op.is_training()) {  // training
+      // TODO(b/145536565): handle GPU logic seperately.
+      // Infers the output type with the converted `act`.
+      Type feature_type = RankedTensorType::get(
+          {GetDimSize(act_type, feature_dim)}, kernel_type);
+      Type result_type = TupleType::get(
+          {act.getType(), feature_type, feature_type}, rewriter.getContext());
+
+      auto training_op = rewriter.create<BatchNormGradOp>(
+          loc, result_type, act, scale, mean, var, grad, op.epsilon(),
+          feature_dim_attr.getValue());
+
+      x_backprop =
+          rewriter.create<GetTupleElementOp>(loc, training_op.getResult(), 0);
+
+      scale_backprop =
+          rewriter.create<GetTupleElementOp>(loc, training_op.getResult(), 1);
+
+      offset_backprop =
+          rewriter.create<GetTupleElementOp>(loc, training_op.getResult(), 2);
+    } else {  // inference
+      SmallVector<int64_t, 4> non_feature_dims;
+      for (int64_t i = 0; i < act_type.getRank(); ++i) {
+        if (i == feature_dim) continue;
+        non_feature_dims.push_back(i);
+      }
+      auto reduce_dims = GetI64ElementsAttr(non_feature_dims, &rewriter);
+      auto broadcast_dims = GetI64ElementsAttr({feature_dim}, &rewriter);
+      auto no_broadcast_dims = GetI64ElementsAttr({}, &rewriter);
+
+      // scratch1 = rsqrt(var + epsilon)
+      RankedTensorType scalar_float = RankedTensorType::get({}, kernel_type);
+      auto epsilon = rewriter.create<ConstOp>(
+          loc, DenseFPElementsAttr::get(scalar_float, {op.epsilon()}));
+      auto add_op = rewriter.create<AddOp>(loc, var, epsilon.getResult(),
+                                           no_broadcast_dims);
+      Value scratch1 = rewriter.create<RsqrtOp>(loc, add_op);
+
+      // scratch2 = sum(y_backprop * (x - mean))
+      auto sub_op = rewriter.create<SubOp>(loc, act, mean, broadcast_dims);
+      auto weighted_grad =
+          rewriter.create<MulOp>(loc, grad, sub_op, no_broadcast_dims);
+      Value scratch2 =
+          ApplyReduction(loc, weighted_grad, reduce_dims, &rewriter);
+
+      // x_backprop = y_backprop * (scale * scratch1)
+      auto scaled_grad =
+          rewriter.create<MulOp>(loc, op.scale(), scratch1, no_broadcast_dims);
+      x_backprop =
+          rewriter.create<MulOp>(loc, grad, scaled_grad, broadcast_dims);
+
+      // scale_backprop = scratch2 * scratch1
+      scale_backprop =
+          rewriter.create<MulOp>(loc, scratch1, scratch2, no_broadcast_dims);
+
+      // offset_backprop = sum(y_backprop)
+      offset_backprop = ApplyReduction(loc, grad, reduce_dims, &rewriter);
+    }
+
+    x_backprop = rewriter.create<ConvertOp>(loc, x_backprop, act_ele_type);
+    // It doesn't matter what values we provide for the last 2 results.
+    rewriter.replaceOp(op,
+                       {/*x_backprop=*/x_backprop,
+                        /*scale_backprop=*/scale_backprop,
+                        /*offset_backprop=*/offset_backprop, op.x(), op.x()});
+    return Pattern::matchSuccess();
+  }
+};
+
+using ConvertFusedBatchNormGradOp =
+    ConvertFusedBatchNormGradBase<TF::FusedBatchNormGradOp>;
+using ConvertFusedBatchNormGradV2Op =
+    ConvertFusedBatchNormGradBase<TF::FusedBatchNormGradV2Op>;
+using ConvertFusedBatchNormGradV3Op =
+    ConvertFusedBatchNormGradBase<TF::FusedBatchNormGradV3Op>;
+
+// Converts TensorFlow FusedBatchNormV3Op to either HLO BatchNormTrainingOp or
+// HLO BatchNormInferenceOp, depending on the value of the 'is_training'
+// parameter.
+class ConvertFusedBatchNormV3Op
+    : public OpRewritePattern<TF::FusedBatchNormV3Op> {
+ public:
+  using OpRewritePattern::OpRewritePattern;
+
+  PatternMatchResult matchAndRewrite(TF::FusedBatchNormV3Op op,
+                                     PatternRewriter &rewriter) const override {
+    auto feature_dim =
+        getFeatureDimensionAttr(rewriter, op.data_formatAttr(), op.x());
+
+    auto input_type_tensor = op.x().getType().dyn_cast<TensorType>();
+    auto input_element_type = input_type_tensor.getElementType();
+
+    auto scale_type_tensor = op.scale().getType().dyn_cast<TensorType>();
+    auto scale_element_type = scale_type_tensor.getElementType();
+
+    // TODO(b/69928690): Support mixed precision in the XLA batch
+    // normalization operators. As a workaround, create a new x with the same
+    // element type as scale (which may be more precise than the input type).
+    Value bn_train_input = rewriter.create<xla_hlo::ConvertOp>(
+        op.getLoc(), op.x(), scale_element_type);
+    TensorType bn_train_input_type_tensor =
+        bn_train_input.getType().cast<TensorType>();
+
+    if (op.is_training()) {
+      // Training case.
+      auto operand_shape = bn_train_input_type_tensor.getShape();
+      // The mean and variance are each 1 dimensional arrays the size of the
+      // feature dimension, with the same element type as the operand (x).
+      // This shape must be constructed manually because the mean and variance
+      // inputs are empty in the training case.
+      Type mean_var_type = RankedTensorType::get(
+          {operand_shape[feature_dim.getInt()]}, scale_element_type);
+      // Op result type is a tuple of 3 values: output with same shape as input;
+      // batch_mean, and batch_var.
+      SmallVector<Type, 3> operand_types = {bn_train_input_type_tensor,
+                                            mean_var_type, mean_var_type};
+      Type result_type = TupleType::get(operand_types, rewriter.getContext());
+
+      auto bn_train_op = rewriter.create<xla_hlo::BatchNormTrainingOp>(
+          op.getLoc(), result_type, bn_train_input, op.scale(), op.offset(),
+          op.epsilon(), feature_dim.getValue());
+      // HLO op outputs a tuple of tensors. Extract those results.
+      auto bn_train_op_result = bn_train_op.getResult();
+      Value y_out = rewriter.create<xla_hlo::GetTupleElementOp>(
+          op.getLoc(), bn_train_op_result, 0);
+      Value batch_mean = rewriter.create<xla_hlo::GetTupleElementOp>(
+          op.getLoc(), bn_train_op_result, 1);
+      Value batch_variance = rewriter.create<xla_hlo::GetTupleElementOp>(
+          op.getLoc(), bn_train_op_result, 2);
+
+      // Apply Bessel's correction on the variance.
+      int total_input_size = bn_train_input_type_tensor.getNumElements();
+      int total_scale_size = scale_type_tensor.getNumElements();
+      int sample_size = total_input_size / total_scale_size;
+      int sample_size_minus_one = std::max(1, sample_size - 1);
+      double factor = static_cast<double>(sample_size) /
+                      static_cast<double>(sample_size_minus_one);
+      auto factor_const_op = rewriter.create<xla_hlo::ConstOp>(
+          op.getLoc(), rewriter.getFloatAttr(scale_element_type, factor));
+
+      auto corrected_variance = rewriter.create<xla_hlo::MulOp>(
+          op.getLoc(), batch_variance.getType(), batch_variance,
+          factor_const_op, /*DenseIntElementsAttr=*/DenseIntElementsAttr());
+
+      // Convert back to input type to stay aligned with expected output type
+      // for TF op.
+      y_out = rewriter.create<xla_hlo::ConvertOp>(op.getLoc(), y_out,
+                                                  input_element_type);
+
+      // TF FusedBatchNormV3 op expects 5 outputs. Outputs 3 and 4 are
+      // currently marked as "reserved spaces 1 and 2". They are used to
+      // pass the per-batch mean and variance to the gradiant. Here we
+      // maintain the same behavior by setting them to the mean and variance
+      // calculated by BatchNormTraining. Output 5 is unused; it doesn't
+      // matter what we pass there.
+      rewriter.replaceOp(op, {y_out, /*batch_mean=*/batch_mean,
+                              /*batch_variance=*/corrected_variance,
+                              /*reserve_space_1=*/batch_mean,
+                              /*reserve_space_2=*/corrected_variance,
+                              /*reserve_space_3=*/op.x()});
+    } else {  // Inference case.
+      auto bn_train_op = rewriter.create<BatchNormInferenceOp>(
+          op.getLoc(),
+          /*result_type=*/bn_train_input_type_tensor, bn_train_input,
+          op.scale(), op.offset(), op.mean(), op.variance(), op.epsilon(),
+          feature_dim.getValue());
+
+      // Convert back to input type to stay aligned with expected output type
+      // for TF op.
+      auto y_out = rewriter.create<xla_hlo::ConvertOp>(op.getLoc(), bn_train_op,
+                                                       input_element_type);
+
+      // The mean, variance, and reserved space outputs of the batch norm op are
+      // not used for inference. It doesn't matter what values we provide for
+      // the last 5 results.
+      rewriter.replaceOp(
+          op, {/*y=*/y_out, /*batch_mean=*/op.x(),
+               /*batch_variance=*/op.x(), /*reserve_space_1=*/op.x(),
+               /*reserve_space_2=*/op.x(), /*reserve_space_3=*/op.x()});
+    }
+    return Pattern::matchSuccess();
+  }
+};
+
+// Returns padding attribute for ReduceWindow op with given params.
+//
+// Requires padding to be either 'SAME' or 'VALID' and the number of input
+// dimensions to be equal to the size of window dimensions and window strides.
+static DenseIntElementsAttr GetReduceWindowPadding(
+    llvm::ArrayRef<int64_t> input_dims, ArrayAttr window_dims,
+    ArrayAttr window_strides, StringRef padding, Builder *builder) {
+  if (padding == "VALID") return {};
+  DCHECK_EQ(padding.str(), "SAME");
+
+  llvm::SmallVector<tensorflow::int64, 4> input_shape, window_shape, strides;
+  input_shape.reserve(input_dims.size());
+  window_shape.reserve(window_shape.size());
+  strides.reserve(window_strides.size());
+
+  for (const auto &dim : input_dims) input_shape.push_back(dim);
+  for (Attribute attr : window_dims)
+    window_shape.push_back(attr.cast<IntegerAttr>().getInt());
+  for (Attribute attr : window_strides)
+    strides.push_back(attr.cast<IntegerAttr>().getInt());
+
+  std::vector<std::pair<tensorflow::int64, tensorflow::int64>> paddings =
+      ::xla::MakePadding(input_shape, window_shape, strides,
+                         ::xla::Padding::kSame);
+  int64_t rank = paddings.size();
+  llvm::SmallVector<int64_t, 8> flatten_paddings(rank * 2);
+  for (int i = 0; i < rank; i++) {
+    flatten_paddings[i] = paddings[i].first;
+    flatten_paddings[rank + i] = paddings[i].second;
+  }
+  return DenseIntElementsAttr::get(
+      RankedTensorType::get({2, rank}, builder->getIntegerType(64)),
+      flatten_paddings);
+}
+
 // Converts MaxPool op to HLO ReduceWindow op by setting appropriate window
 // dimensions with max as the reduction function.
 //
@@ -746,21 +1015,21 @@ class ConvertMaxPoolOp : public OpRewritePattern<TF::MaxPoolOp> {
 
   PatternMatchResult matchAndRewrite(TF::MaxPoolOp op,
                                      PatternRewriter &rewriter) const override {
-    // TODO(hinsu): Support 'SAME' padding mode.
-    if (op.padding() != "VALID") return matchFailure();
-
     Type element_type =
-        op.input()->getType().cast<TensorType>().getElementType();
+        op.input().getType().cast<TensorType>().getElementType();
     if (!element_type.isIntOrFloat()) return matchFailure();
     Location loc = op.getLoc();
     ConstOp init = GetMinValueForType(element_type, loc, &rewriter);
 
+    auto input_ty = op.input().getType().dyn_cast<RankedTensorType>();
+    if (!input_ty) return matchFailure();
+    DenseIntElementsAttr paddings_attr = GetReduceWindowPadding(
+        input_ty.getShape(), op.ksize(), op.strides(), op.padding(), &rewriter);
     auto reduce = rewriter.create<ReduceWindowOp>(
         loc, op.getType(), op.input(), init.getResult(),
         GetI64ElementsAttr(op.ksize()), GetI64ElementsAttr(op.strides()),
         /*base_dilations=*/DenseIntElementsAttr(),
-        /*window_dilations=*/DenseIntElementsAttr(),
-        /*paddings=*/DenseIntElementsAttr());
+        /*window_dilations=*/DenseIntElementsAttr(), paddings_attr);
     BuildReduceBody<MaxOp>(element_type, &reduce.body(), &rewriter);
 
     rewriter.replaceOp(op, reduce.getResult());
@@ -798,9 +1067,9 @@ class ConvertSigmoidOp : public OpRewritePattern<TF::SigmoidOp> {
 
     auto scalar_one = rewriter.create<ConstOp>(
         op.getLoc(),
-        rewriter.getFloatAttr(getElementTypeOrSelf(operand->getType()), 0.5));
+        rewriter.getFloatAttr(getElementTypeOrSelf(operand.getType()), 0.5));
 
-    auto shaped_type = operand->getType().cast<ShapedType>();
+    auto shaped_type = operand.getType().cast<ShapedType>();
     auto constant_ones = rewriter.create<BroadcastOp>(
         op.getLoc(), shaped_type, scalar_one,
         DenseIntElementsAttr::get(
@@ -811,7 +1080,7 @@ class ConvertSigmoidOp : public OpRewritePattern<TF::SigmoidOp> {
     auto scaled_input = rewriter.create<MulOp>(
         op.getLoc(), operand, constant_ones, DenseIntElementsAttr());
     auto tanh_op =
-        rewriter.create<TanhOp>(op.getLoc(), operand->getType(), scaled_input);
+        rewriter.create<TanhOp>(op.getLoc(), operand.getType(), scaled_input);
     auto mul_op =
         rewriter.create<MulOp>(op.getLoc(), tanh_op, constant_ones,
                                /*DenseIntElementsAttr=*/DenseIntElementsAttr());
@@ -856,11 +1125,11 @@ class ConvertSoftmaxOp : public OpRewritePattern<OpTy> {
 
   PatternMatchResult matchAndRewrite(OpTy op,
                                      PatternRewriter &rewriter) const override {
-    Value *logits = op.logits();
+    Value logits = op.logits();
 
     // Softmax converter requires ranked type because the XLA reduce ops used
     // while lowering requires dimensions attribute to reduce along.
-    RankedTensorType type = logits->getType().dyn_cast<RankedTensorType>();
+    RankedTensorType type = logits.getType().dyn_cast<RankedTensorType>();
     if (!type) return Pattern::matchFailure();
 
     auto loc = op.getLoc();
@@ -886,16 +1155,16 @@ class ConvertSoftmaxOp : public OpRewritePattern<OpTy> {
         rewriter.create<SubOp>(loc, type, logits, max_logits, batch_dims);
 
     // Exponentiate the inputs.
-    Value *exp = rewriter.create<ExpOp>(loc, type, shifted_logits);
+    Value exp = rewriter.create<ExpOp>(loc, type, shifted_logits);
 
     // Compute summation of the exponentials.
     auto exp_sum =
         rewriter.create<TF::SumOp>(loc, exp, reduce_dim,
                                    /*keep_dims=*/rewriter.getBoolAttr(false));
-    Value *sum = exp_sum.getResult();
+    Value sum = exp_sum.getResult();
 
     if (use_log) {
-      Value *log = rewriter.create<LogOp>(loc, sum);
+      Value log = rewriter.create<LogOp>(loc, sum);
       rewriter.replaceOpWithNewOp<SubOp>(op, shifted_logits, log, batch_dims);
     } else {
       rewriter.replaceOpWithNewOp<DivOp>(op, exp, sum, batch_dims);
@@ -932,12 +1201,12 @@ class ConvertSizeOp : public OpRewritePattern<TF::SizeOp> {
 
   PatternMatchResult matchAndRewrite(TF::SizeOp op,
                                      PatternRewriter &rewriter) const override {
-    Value *input = op.input();
-    auto input_ty = input->getType().dyn_cast<RankedTensorType>();
+    Value input = op.input();
+    auto input_ty = input.getType().dyn_cast<RankedTensorType>();
     if (!input_ty) return Pattern::matchFailure();
 
     const int64_t rank = input_ty.getRank();
-    auto result_type = op.getResult()->getType();
+    auto result_type = op.getResult().getType();
     Operation *size =
         GetScalarConstOfType(result_type.cast<TensorType>().getElementType(),
                              op.getLoc(), 1, &rewriter);
@@ -995,7 +1264,7 @@ class ConvertSplitOp : public OpRewritePattern<TF::SplitOp> {
   PatternMatchResult matchAndRewrite(TF::SplitOp op,
                                      PatternRewriter &rewriter) const override {
     // We can only split along static dimensions.
-    auto input_type = op.value()->getType().dyn_cast<RankedTensorType>();
+    auto input_type = op.value().getType().dyn_cast<RankedTensorType>();
     if (!input_type) return matchFailure();
 
     // We can only match when the split dimension is a constant scalar.
@@ -1029,7 +1298,7 @@ class ConvertSplitOp : public OpRewritePattern<TF::SplitOp> {
     SmallVector<int64_t, 4> strides(input_rank, 1);
 
     // All HLO slice results used to replace the original tf.Split op.
-    SmallVector<Value *, 4> slices;
+    SmallVector<Value, 4> slices;
     slices.reserve(num_splits);
 
     for (int i = 0; i < num_splits; ++i) {
@@ -1087,7 +1356,7 @@ class ConvertSplitVOp : public OpRewritePattern<TF::SplitVOp> {
                                      PatternRewriter &rewriter) const override {
     // We can only split along static dimensions.
     // TODO(b/145731001): enhance to support dynamic-shaped inputs.
-    auto input_type = op.value()->getType().dyn_cast<RankedTensorType>();
+    auto input_type = op.value().getType().dyn_cast<RankedTensorType>();
     if (!input_type) return matchFailure();
 
     // We can only match when the split dimension is a constant scalar.
@@ -1141,7 +1410,7 @@ class ConvertSplitVOp : public OpRewritePattern<TF::SplitVOp> {
     SmallVector<int64_t, 4> strides(input_rank, 1);
 
     // All HLO slice results used to replace the original tf.Split op.
-    SmallVector<Value *, 4> slices;
+    SmallVector<Value, 4> slices;
     slices.reserve(op.getNumResults());
 
     for (int i = 0; i < op.getNumResults(); ++i) {
@@ -1184,7 +1453,7 @@ class ConvertStridedSliceOp : public OpRewritePattern<TF::StridedSliceOp> {
     //
     // TODO(hinsu): Relax this constraint for ops without negative indices and
     // strides.
-    auto input_ty = op.input()->getType().dyn_cast<RankedTensorType>();
+    auto input_ty = op.input().getType().dyn_cast<RankedTensorType>();
     if (!input_ty || !input_ty.hasStaticShape()) return matchFailure();
     ArrayRef<int64_t> input_shape = input_ty.getShape();
 
@@ -1195,20 +1464,9 @@ class ConvertStridedSliceOp : public OpRewritePattern<TF::StridedSliceOp> {
     auto result_ty = op.getType().dyn_cast<RankedTensorType>();
     if (!result_ty || !result_ty.hasStaticShape()) return matchFailure();
 
-    // TODO(hinsu): Support non-zero mask values. Currently only
-    // 'shrink_axis_mask' is supported.
-    for (StringRef mask :
-         {"begin_mask", "end_mask", "ellipsis_mask", "new_axis_mask"}) {
-      auto attr = op.getAttrOfType<IntegerAttr>(mask);
-      if (attr && attr.getValue() != 0) return matchFailure();
-    }
-
-    // TODO(hinsu): Support lowering for ops with dynamic begin and end values
-    // when it is possible to derive indices based on mask attributes.
-    DenseIntElementsAttr begin_indices, end_indices, strides;
-    if (!matchPattern(op.begin(), m_Constant(&begin_indices)) ||
-        !matchPattern(op.end(), m_Constant(&end_indices)) ||
-        !matchPattern(op.strides(), m_Constant(&strides)))
+    SmallVector<int64_t, 4> begin_indices, end_indices, strides;
+    if (!op.GetSlicedBoundRanges(input_shape, &begin_indices, &end_indices,
+                                 &strides))
       return matchFailure();
 
     SmallVector<int64_t, 4> hlo_begin_indices, hlo_end_indices, hlo_strides,
@@ -1218,18 +1476,15 @@ class ConvertStridedSliceOp : public OpRewritePattern<TF::StridedSliceOp> {
     hlo_end_indices.reserve(input_rank);
     hlo_strides.reserve(input_rank);
 
-    int64_t indices_elements = begin_indices.getNumElements();
+    int64_t indices_elements = begin_indices.size();
     if (input_rank < indices_elements) return matchFailure();
 
     // Convert from TensorFlow negative or out of range indices and strides
     // values to legal HLO Slice attributes.
     for (int i = 0, e = indices_elements; i != e; i++) {
-      int64_t begin = begin_indices.getValue<IntegerAttr>(i).getInt();
-      int64_t end = end_indices.getValue<IntegerAttr>(i).getInt();
-      int64_t stride = strides.getValue<IntegerAttr>(i).getInt();
-
-      if (begin < 0) begin = input_shape[i] + begin;
-      if (end < 0) end = input_shape[i] + end;
+      int64_t begin = begin_indices[i];
+      int64_t end = end_indices[i];
+      int64_t stride = strides[i];
 
       if (stride < 0) {
         // Negative stride means that the output values are computed starting
@@ -1297,8 +1552,8 @@ class ConvertStridedSliceGradOp
                                          &strides))
       return matchFailure();
 
-    Value *grad = op.dy();
-    Type element_type = grad->getType().cast<ShapedType>().getElementType();
+    Value grad = op.dy();
+    Type element_type = grad.getType().cast<ShapedType>().getElementType();
 
     // Perform reshape to undo any new/shrink axies done by strided slice.
     grad = rewriter.create<xla_hlo::ReshapeOp>(
@@ -1338,7 +1593,7 @@ class ConvertStridedSliceGradOp
 
     if (!dims_to_reverse.empty()) {
       grad = rewriter.create<xla_hlo::ReverseOp>(
-          op.getLoc(), grad->getType(), grad,
+          op.getLoc(), grad.getType(), grad,
           GetI64ElementsAttr(dims_to_reverse, &rewriter));
     }
 
@@ -1376,7 +1631,7 @@ class ConvertRangeOp : public OpRewritePattern<TF::RangeOp> {
   PatternMatchResult matchAndRewrite(TF::RangeOp op,
                                      PatternRewriter &rewriter) const override {
     auto result = op.getResult();
-    auto result_type = result->getType();
+    auto result_type = result.getType();
     if (!result_type.cast<ShapedType>().hasStaticShape()) {
       return matchFailure();
     }
@@ -1408,7 +1663,7 @@ class GenericConvertReductionOp : public OpRewritePattern<OpTy> {
     // TODO(b/141785544): Update this to not require static shapes.
     // Input shape needs to be static to convert negative indices in TensorFlow
     // to absolute indices required by HLO.
-    auto input_ty = op.input()->getType().template dyn_cast<RankedTensorType>();
+    auto input_ty = op.input().getType().template dyn_cast<RankedTensorType>();
     if (!input_ty) return this->matchFailure();
     ArrayRef<int64_t> input_shape = input_ty.getShape();
 
@@ -1439,14 +1694,14 @@ class GenericConvertReductionOp : public OpRewritePattern<OpTy> {
         rewriter.create<ConvertOp>(loc, op.input(), reduce_element_type);
 
     // Each reduction op can have a different initial value.
-    Value *init = Derived::GetInitialValue(reduce_element_type, loc, rewriter);
+    Value init = Derived::GetInitialValue(reduce_element_type, loc, rewriter);
 
     auto reduction = rewriter.create<ReduceOp>(
         loc, casted_input.getResult(), init,
         GetI64ElementsAttr(xla_dimensions, &rewriter));
     BuildReduceBody<ReductionOp>(reduce_element_type, &reduction.body(),
                                  &rewriter);
-    Value *result = reduction.getResult(0);
+    Value result = reduction.getResult(0);
 
     // The mean op needs to divide by the product of the reduced dimensions.
     if (std::is_same<OpTy, TF::MeanOp>::value) {
@@ -1490,8 +1745,8 @@ class ConvertMeanOp
     : public GenericConvertReductionOp<ConvertMeanOp, TF::MeanOp, AddOp> {
  public:
   using GenericConvertReductionOp::GenericConvertReductionOp;
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetScalarConstOfType(reduce_element_type, loc, 0, &rewriter);
   }
 };
@@ -1506,8 +1761,8 @@ class ConvertSumOp
  public:
   using GenericConvertReductionOp::GenericConvertReductionOp;
 
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetScalarConstOfType(reduce_element_type, loc, 0, &rewriter);
   }
 };
@@ -1523,8 +1778,8 @@ class ConvertMaxOp
  public:
   using GenericConvertReductionOp::GenericConvertReductionOp;
 
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetMinValueForType(reduce_element_type, loc, &rewriter);
   }
 };
@@ -1538,8 +1793,8 @@ class ConvertAllOp
     : public GenericConvertReductionOp<ConvertAllOp, TF::AllOp, AndOp> {
  public:
   using GenericConvertReductionOp::GenericConvertReductionOp;
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetScalarConstOfType(reduce_element_type, loc, 1, &rewriter);
   }
 };
@@ -1553,8 +1808,8 @@ class ConvertAnyOp
     : public GenericConvertReductionOp<ConvertAnyOp, TF::AnyOp, OrOp> {
  public:
   using GenericConvertReductionOp::GenericConvertReductionOp;
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetScalarConstOfType(reduce_element_type, loc, 0, &rewriter);
   }
 };
@@ -1571,7 +1826,7 @@ class ConvertArgMinMaxOp : public OpRewritePattern<OpTy> {
   PatternMatchResult matchAndRewrite(OpTy op,
                                      PatternRewriter &rewriter) const override {
     RankedTensorType input_type =
-        op.input()->getType().template dyn_cast<RankedTensorType>();
+        op.input().getType().template dyn_cast<RankedTensorType>();
     if (!input_type) {
       return this->matchFailure();
     }
@@ -1582,17 +1837,17 @@ class ConvertArgMinMaxOp : public OpRewritePattern<OpTy> {
     if (!input_element_type.isIntOrFloat()) return this->matchFailure();
 
     Location loc = op.getLoc();
-    Value *init_value =
+    Value init_value =
         Derived::GetInitialValue(input_element_type, loc, rewriter);
 
     RankedTensorType output_type =
-        op.output()->getType().template dyn_cast<RankedTensorType>();
+        op.output().getType().template dyn_cast<RankedTensorType>();
     if (!output_type) {
       return this->matchFailure();
     }
 
     Type index_element_type = output_type.getElementType();
-    Value *index_init_value =
+    Value index_init_value =
         GetScalarConstOfType(index_element_type, loc, 0, &rewriter);
 
     RankedTensorType index_type =
@@ -1607,21 +1862,21 @@ class ConvertArgMinMaxOp : public OpRewritePattern<OpTy> {
 
     IntegerAttr iota_dimension =
         IntegerAttr::get(rewriter.getIntegerType(64), axis);
-    Value *index_values =
+    Value index_values =
         rewriter.create<IotaOp>(loc, index_type, iota_dimension);
 
     std::vector<int64_t> dimensions = input_type.getShape();
     dimensions.erase(dimensions.begin() + axis);
     ArrayRef<int64_t> reduction_result_shape(dimensions);
 
-    Value *operands[] = {op.input(), index_values};
-    Value *init_values[] = {init_value, index_init_value};
+    Value operands[] = {op.input(), index_values};
+    Value init_values[] = {init_value, index_init_value};
     DenseIntElementsAttr reduction_dimensions =
         GetI64ElementsAttr({axis}, &rewriter);
 
     auto reduction = rewriter.create<ReduceOp>(
-        loc, llvm::ArrayRef<Value *>(operands),
-        llvm::ArrayRef<Value *>(init_values), reduction_dimensions);
+        loc, llvm::ArrayRef<Value>(operands),
+        llvm::ArrayRef<Value>(init_values), reduction_dimensions);
     StringRef direction = Derived::GetDirection();
     BuildArgMinMaxReductionBody(input_element_type, index_element_type,
                                 direction, &reduction.body(), &rewriter);
@@ -1643,14 +1898,70 @@ class ConvertArgMaxOp
  public:
   using ConvertArgMinMaxOp::ConvertArgMinMaxOp;
 
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetMinValueForType(reduce_element_type, loc, &rewriter);
   }
 
   static StringRef GetDirection() { return "GT"; }
 };
 
+// Converts TF TensorScatterUpdate op into Scatter Op with assignment:
+//
+//   %result = "xla_hlo.scatter"(%tensor, %indices, %updates)
+//     { dimensions = ... }
+//
+class ConvertTensorScatterUpdateOp
+    : public OpRewritePattern<TF::TensorScatterUpdateOp> {
+ public:
+  using OpRewritePattern::OpRewritePattern;
+
+  PatternMatchResult matchAndRewrite(TF::TensorScatterUpdateOp op,
+                                     PatternRewriter &rewriter) const override {
+    auto tensor_ty = op.tensor().getType().dyn_cast<RankedTensorType>();
+    auto indices_ty = op.indices().getType().dyn_cast<RankedTensorType>();
+    auto updates_ty = op.updates().getType().dyn_cast<RankedTensorType>();
+
+    if (!tensor_ty || !indices_ty || !updates_ty) return matchFailure();
+    // Last dimension of the indices needs to known at compile time for
+    // computation of the 'update_window_dims' attribute in the dimensions
+    // struct.
+    int64_t num_index_dims = indices_ty.getShape().back();
+    if (ShapedType::isDynamic(num_index_dims)) return matchFailure();
+
+    int64_t tensor_rank = tensor_ty.getRank();
+    int64_t indices_rank = indices_ty.getRank();
+    int64_t updates_rank = updates_ty.getRank();
+
+    int64_t window_dims = tensor_rank - num_index_dims;
+    auto dims_attr = ScatterDimensionNumbers::get(
+        GetI64ElementsAttrForSeq(updates_rank - window_dims, updates_rank,
+                                 &rewriter),
+        GetI64ElementsAttrForSeq(0, num_index_dims, &rewriter),
+        GetI64ElementsAttrForSeq(0, num_index_dims, &rewriter),
+        rewriter.getI64IntegerAttr(indices_rank - 1), rewriter.getContext());
+
+    Location loc = op.getLoc();
+    auto scatter = rewriter.create<ScatterOp>(
+        loc, op.getType(), op.tensor(), op.indices(), op.updates(), dims_attr);
+
+    // Build region to assign the new value.
+    [&](Region *region) {
+      OpBuilder::InsertionGuard guard(rewriter);
+      Block *block = rewriter.createBlock(region);
+
+      // Block arguments are scalars of the given element type.
+      Type type =
+          RankedTensorType::get(/*shape=*/{}, tensor_ty.getElementType());
+      block->addArguments({type, type});
+      rewriter.create<ReturnOp>(loc, block->getArgument(1));
+    }(&scatter.update_computation());
+
+    rewriter.replaceOp(op, scatter.getResult());
+    return matchSuccess();
+  }
+};
+
 // Converts Tile op to HLO BroadcastInDim and Reshape ops.
 //   For shape [S1, S2] and multiples [M1, M2],
 //     MS1 = M1 * S1; MS2 = M2 * S2
@@ -1666,7 +1977,7 @@ class ConvertTileOp : public OpRewritePattern<TF::TileOp> {
 
   PatternMatchResult matchAndRewrite(TF::TileOp op,
                                      PatternRewriter &rewriter) const override {
-    auto input_ty = op.input()->getType().dyn_cast<RankedTensorType>();
+    auto input_ty = op.input().getType().dyn_cast<RankedTensorType>();
     if (!input_ty || !input_ty.hasStaticShape()) return matchFailure();
     ArrayRef<int64_t> input_shape = input_ty.getShape();
     Type element_type = input_ty.getElementType();
@@ -1707,7 +2018,7 @@ class ConvertTileOp : public OpRewritePattern<TF::TileOp> {
         RankedTensorType::get(broadcasted_shape, element_type);
     Type output_type = op.getType();
 
-    Value *result = rewriter.create<BroadcastInDimOp>(
+    Value result = rewriter.create<BroadcastInDimOp>(
         loc, broadcasted_type, op.input(),
         GetI64ElementsAttr(broadcast_dimensions, &rewriter));
 
@@ -1727,19 +2038,24 @@ class ConvertMaxPoolGradOp : public OpRewritePattern<TF::MaxPoolGradOp> {
 
   PatternMatchResult matchAndRewrite(TF::MaxPoolGradOp op,
                                      PatternRewriter &rewriter) const override {
-    // TODO(parkers): Support 'SAME' padding mode.
-    if (op.padding() != "VALID") return matchFailure();
-
     Location loc = op.getLoc();
 
     Type element_type =
-        op.orig_input()->getType().cast<TensorType>().getElementType();
+        op.orig_input().getType().cast<TensorType>().getElementType();
+
+    // Compute paddings using the original input and kernel shape and strides.
+    // Here, ReduceWindow op as used as the MaxPool op is lowered to the
+    // ReduceWindow op.
+    auto input_ty = op.orig_input().getType().dyn_cast<RankedTensorType>();
+    if (!input_ty) return matchFailure();
+    DenseIntElementsAttr paddings_attr = GetReduceWindowPadding(
+        input_ty.getShape(), op.ksize(), op.strides(), op.padding(), &rewriter);
 
     auto result = rewriter.create<SelectAndScatterOp>(
         loc, op.getType(), op.orig_input(), op.grad(),
         GetScalarConstOfType(element_type, loc, 0, &rewriter),
         GetI64ElementsAttr(op.ksize()), GetI64ElementsAttr(op.strides()),
-        nullptr);
+        paddings_attr);
 
     BuildReduceBody<AddOp>(element_type, &result.scatter(), &rewriter);
     {
@@ -1783,11 +2099,11 @@ class ConvertConv2DBackpropInputOp
       return Pattern::matchFailure();
 
     auto out_backprop_ty =
-        op.out_backprop()->getType().dyn_cast<RankedTensorType>();
+        op.out_backprop().getType().dyn_cast<RankedTensorType>();
     if (!out_backprop_ty || !out_backprop_ty.hasStaticShape())
       return matchFailure();
     ArrayRef<int64_t> out_backprop_shape = out_backprop_ty.getShape();
-    auto filter_ty = op.filter()->getType().dyn_cast<RankedTensorType>();
+    auto filter_ty = op.filter().getType().dyn_cast<RankedTensorType>();
     if (!filter_ty || !filter_ty.hasStaticShape()) return matchFailure();
     ArrayRef<int64_t> filter_shape = filter_ty.getShape();
     int num_spatial_dims = 2;
@@ -1859,7 +2175,7 @@ class ConvertConv2DBackpropInputOp
     auto paddings_attr = DenseIntElementsAttr::get(paddings_ty, conv_paddings);
     auto spatial_dims_attr = GetI64ElementsAttr(spatial_dims, &rewriter);
 
-    Value *filter = op.filter();
+    Value filter = op.filter();
 
     if (feature_group_count != 1) {
       /*
@@ -1876,7 +2192,7 @@ class ConvertConv2DBackpropInputOp
 
     // activation gradients
     //   = gradients (with padding and dilation) <conv> mirrored_weights
-    Value *result = rewriter.create<ConvOp>(
+    Value result = rewriter.create<ConvOp>(
         loc, op.getType(), op.out_backprop(), filter,
         /*window_strides=*/GetI64ElementsAttr(ones, &rewriter),
         /*padding=*/paddings_attr, GetI64ElementsAttr(lhs_dilation, &rewriter),
@@ -1927,11 +2243,11 @@ class ConvertConv2DBackpropFilterOp
       return Pattern::matchFailure();
 
     auto out_backprop_ty =
-        op.out_backprop()->getType().dyn_cast<RankedTensorType>();
+        op.out_backprop().getType().dyn_cast<RankedTensorType>();
     if (!out_backprop_ty || !out_backprop_ty.hasStaticShape())
       return matchFailure();
     ArrayRef<int64_t> out_backprop_shape = out_backprop_ty.getShape();
-    auto input_ty = op.input()->getType().dyn_cast<RankedTensorType>();
+    auto input_ty = op.input().getType().dyn_cast<RankedTensorType>();
     if (!input_ty || !input_ty.hasStaticShape()) return matchFailure();
     ArrayRef<int64_t> input_shape = input_ty.getShape();
 
@@ -2077,7 +2393,7 @@ class ConvertConv2DBackpropFilterOp
     auto feature_dim_attr = rewriter.getI64IntegerAttr(feature_dim);
 
     Location loc = op.getLoc();
-    Value *result = rewriter.create<ConvOp>(
+    Value result = rewriter.create<ConvOp>(
         loc, op.getType(), op.input(), op.out_backprop(),
         /*window_strides=*/GetI64ElementsAttr(window_strides, &rewriter),
         /*padding=*/paddings_attr, GetI64ElementsAttr(lhs_dilation, &rewriter),
@@ -2116,7 +2432,7 @@ class ConvertOneHotOp : public OpRewritePattern<TF::OneHotOp> {
 
   PatternMatchResult matchAndRewrite(TF::OneHotOp op,
                                      PatternRewriter &rewriter) const override {
-    auto indices_ty = op.indices()->getType().dyn_cast<RankedTensorType>();
+    auto indices_ty = op.indices().getType().dyn_cast<RankedTensorType>();
     if (!indices_ty || !indices_ty.hasStaticShape()) return matchFailure();
     ArrayRef<int64_t> indices_shape = indices_ty.getShape();
     Type element_type = indices_ty.getElementType();
@@ -2140,21 +2456,21 @@ class ConvertOneHotOp : public OpRewritePattern<TF::OneHotOp> {
 
     Location loc = op.getLoc();
     auto index_type = RankedTensorType::get(output_dims, element_type);
-    Value *compare = rewriter.create<CompareOp>(
+    Value compare = rewriter.create<CompareOp>(
         loc, op.indices(),
         rewriter.create<IotaOp>(
             loc, index_type,
             IntegerAttr::get(rewriter.getIntegerType(64), axis)),
         GetI64ElementsAttr(broadcast_dims, &rewriter),
         StringAttr::get("EQ", rewriter.getContext()));
-    Value *on_value = rewriter.create<BroadcastOp>(
+    Value on_value = rewriter.create<BroadcastOp>(
         loc, op.getType(), op.on_value(),
         GetI64ElementsAttr(output_dims, &rewriter));
-    Value *off_value = rewriter.create<BroadcastOp>(
+    Value off_value = rewriter.create<BroadcastOp>(
         loc, op.getType(), op.off_value(),
         GetI64ElementsAttr(output_dims, &rewriter));
-    Value *result = rewriter.create<SelectOp>(loc, op.getType(), compare,
-                                              on_value, off_value);
+    Value result = rewriter.create<SelectOp>(loc, op.getType(), compare,
+                                             on_value, off_value);
 
     rewriter.replaceOp(
         op, {result},
@@ -2206,7 +2522,7 @@ class ConvertTopKV2Op : public OpRewritePattern<TF::TopKV2Op> {
 
     // The last dimension of the input tensor's shape should be known so we can
     // have clamped end_indices for slices.
-    TensorType input_type = op.input()->getType().cast<TensorType>();
+    TensorType input_type = op.input().getType().cast<TensorType>();
     if (!input_type.hasRank()) return matchFailure();
     int64_t input_rank = input_type.getRank();
     int64_t last_dim_index = input_rank - 1;
@@ -2216,14 +2532,14 @@ class ConvertTopKV2Op : public OpRewritePattern<TF::TopKV2Op> {
     // Create an Itoa op for indices.
     auto i32_type = rewriter.getIntegerType(32);
     Type iota_type = RankedTensorType::get(input_type.getShape(), i32_type);
-    Value *iota_op = rewriter.create<xla_hlo::IotaOp>(
+    Value iota_op = rewriter.create<xla_hlo::IotaOp>(
         op.getLoc(), iota_type, rewriter.getI64IntegerAttr(last_dim_index));
 
     // Create the sort op. It takes two inputs, one for the original input, the
     // other for the indices.
     auto sort_op = rewriter.create<xla_hlo::SortOp>(
-        op.getLoc(), llvm::ArrayRef<Value *>{op.input(), iota_op},
-        last_dim_index, /*is_stable=*/true);
+        op.getLoc(), llvm::ArrayRef<Value>{op.input(), iota_op}, last_dim_index,
+        /*is_stable=*/true);
     BuildSortComparisonBody({input_type.getElementType(), i32_type},
                             /*direction=*/"GT", &sort_op.comparator(),
                             &rewriter);
@@ -2242,13 +2558,13 @@ class ConvertTopKV2Op : public OpRewritePattern<TF::TopKV2Op> {
 
     // Get the slice for the top K elements.
 
-    Value *values = rewriter.create<xla_hlo::SliceOp>(
+    Value values = rewriter.create<xla_hlo::SliceOp>(
         op.getLoc(), tuple_first_element,
         GetI64ElementsAttr(begin_indices, &rewriter),
         GetI64ElementsAttr(end_indices, &rewriter),
         GetI64ElementsAttr(strides, &rewriter));
 
-    Value *indices = rewriter.create<xla_hlo::SliceOp>(
+    Value indices = rewriter.create<xla_hlo::SliceOp>(
         op.getLoc(), tuple_second_element,
         GetI64ElementsAttr(begin_indices, &rewriter),
         GetI64ElementsAttr(end_indices, &rewriter),
@@ -2271,7 +2587,7 @@ class ConvertUnpackOp : public OpRewritePattern<TF::UnpackOp> {
 
   PatternMatchResult matchAndRewrite(TF::UnpackOp op,
                                      PatternRewriter &rewriter) const override {
-    auto value_type = op.value()->getType().cast<RankedTensorType>();
+    auto value_type = op.value().getType().cast<RankedTensorType>();
     if (!value_type) return matchFailure();
 
     int64_t value_rank = value_type.getRank();
@@ -2284,7 +2600,7 @@ class ConvertUnpackOp : public OpRewritePattern<TF::UnpackOp> {
     SmallVector<int64_t, 4> strides(value_rank, 1);
 
     // All HLO slice+reshape results used to replace the original tf.Unpack op.
-    SmallVector<Value *, 4> results;
+    SmallVector<Value, 4> results;
     results.reserve(op.getNumResults());
 
     for (int i = 0; i < op.getNumResults(); ++i) {
@@ -2329,12 +2645,12 @@ class GenericConvertUnsortedSegmentReductionOp : public OpRewritePattern<OpTy> {
 
   PatternMatchResult matchAndRewrite(OpTy op,
                                      PatternRewriter &rewriter) const override {
-    auto data_type = op.data()->getType().template dyn_cast<RankedTensorType>();
+    auto data_type = op.data().getType().template dyn_cast<RankedTensorType>();
     if (!data_type) return this->matchFailure();
     int64_t data_rank = data_type.getRank();
 
     auto segment_ids_type =
-        op.segment_ids()->getType().template dyn_cast<RankedTensorType>();
+        op.segment_ids().getType().template dyn_cast<RankedTensorType>();
     if (!segment_ids_type) return this->matchFailure();
     int64_t segment_ids_rank = segment_ids_type.getRank();
 
@@ -2353,22 +2669,20 @@ class GenericConvertUnsortedSegmentReductionOp : public OpRewritePattern<OpTy> {
 
     // Broadccast the initial value for reduction. This will become the
     // 'operand' parameter to scatter to for the final scatter op.
-    Value *init = ConcreteClass::GetInitialValue(data_type.getElementType(),
-                                                 op.getLoc(), rewriter);
+    Value init = ConcreteClass::GetInitialValue(data_type.getElementType(),
+                                                op.getLoc(), rewriter);
     auto broadcasted_init = rewriter.create<xla_hlo::BroadcastOp>(
         op.getLoc(), output_type, init,
         GetI64ElementsAttr(output_shape, &rewriter));
 
     // Parameters for the generated scatter op.
-    auto range = llvm::seq<int64_t>(segment_ids_rank, data_rank);
-    SmallVector<int64_t, 4> update_window_dims(range.begin(), range.end());
     SmallVector<int64_t, 1> inserted_window_dims(1, 0);
     SmallVector<int64_t, 1> scatter_dims_to_operand_dims(1, 0);
     int64_t index_vector_dim = segment_ids_rank;
 
     // Put all parameters in a StructAttr.
     auto dims_attr = ScatterDimensionNumbers::get(
-        GetI64ElementsAttr(update_window_dims, &rewriter),
+        GetI64ElementsAttrForSeq(segment_ids_rank, data_rank, &rewriter),
         GetI64ElementsAttr(inserted_window_dims, &rewriter),
         GetI64ElementsAttr(scatter_dims_to_operand_dims, &rewriter),
         rewriter.getI64IntegerAttr(index_vector_dim), rewriter.getContext());
@@ -2391,8 +2705,8 @@ class ConvertUnsortedSegmentMaxOp
   using GenericConvertUnsortedSegmentReductionOp::
       GenericConvertUnsortedSegmentReductionOp;
 
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetMinValueForType(reduce_element_type, loc, &rewriter);
   }
 };
@@ -2404,8 +2718,8 @@ class ConvertUnsortedSegmentMinOp
   using GenericConvertUnsortedSegmentReductionOp::
       GenericConvertUnsortedSegmentReductionOp;
 
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetMaxValueForType(reduce_element_type, loc, &rewriter);
   }
 };
@@ -2417,8 +2731,8 @@ class ConvertUnsortedSegmentProdOp
   using GenericConvertUnsortedSegmentReductionOp::
       GenericConvertUnsortedSegmentReductionOp;
 
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetScalarConstOfType(reduce_element_type, loc, 1, &rewriter);
   }
 };
@@ -2430,8 +2744,8 @@ class ConvertUnsortedSegmentSumOp
   using GenericConvertUnsortedSegmentReductionOp::
       GenericConvertUnsortedSegmentReductionOp;
 
-  static Value *GetInitialValue(Type reduce_element_type, Location loc,
-                                PatternRewriter &rewriter) {
+  static Value GetInitialValue(Type reduce_element_type, Location loc,
+                               PatternRewriter &rewriter) {
     return GetScalarConstOfType(reduce_element_type, loc, 0, &rewriter);
   }
 };
@@ -2450,16 +2764,18 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) {
   // here for lowering to HLO.
   TF::PopulateLoweringTFPatterns(context, &patterns);
   patterns.insert<
-      ConvertArgMaxOp, ConvertBF16FloorDivOp, ConvertConv2D, ConvertEinsumOp,
-      ConvertMaxPoolOp, ConvertRangeOp, ConvertSigmoidOp, ConvertSizeOp,
-      ConvertMaxPoolOp, ConvertRangeOp, ConvertSigmoidOp,
+      ConvertAllOp, ConvertAnyOp, ConvertArgMaxOp, ConvertBF16FloorDivOp,
+      ConvertConv2D, ConvertConv2DBackpropFilterOp,
+      ConvertConv2DBackpropInputOp, ConvertEinsumOp,
+      ConvertFusedBatchNormGradOp, ConvertFusedBatchNormGradV2Op,
+      ConvertFusedBatchNormGradV3Op, ConvertFusedBatchNormV3Op, ConvertMaxOp,
+      ConvertMaxPoolOp, ConvertMaxPoolGradOp, ConvertMeanOp, ConvertOneHotOp,
+      ConvertRangeOp, ConvertSigmoidOp, ConvertSizeOp,
       ConvertSoftmaxOp<TF::LogSoftmaxOp, true>,
       ConvertSoftmaxOp<TF::SoftmaxOp, false>, ConvertSplitOp, ConvertSplitVOp,
-      ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertTopKV2Op,
-      ConvertUnpackOp, ConvertMeanOp, ConvertSumOp, ConvertMaxOp, ConvertAllOp,
-      ConvertAnyOp, ConvertTileOp, ConvertMaxPoolGradOp, ConvertOneHotOp,
-      ConvertConv2DBackpropInputOp, ConvertConv2DBackpropFilterOp,
-      ConvertUnsortedSegmentMaxOp, ConvertUnsortedSegmentMinOp,
+      ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertSumOp,
+      ConvertTensorScatterUpdateOp, ConvertTileOp, ConvertTopKV2Op,
+      ConvertUnpackOp, ConvertUnsortedSegmentMaxOp, ConvertUnsortedSegmentMinOp,
       ConvertUnsortedSegmentProdOp, ConvertUnsortedSegmentSumOp>(
       op->getContext());
 
@@ -2482,7 +2798,7 @@ void LegalizeTF::runOnFunction() {
     signalPassFailure();
 }
 
-static PassRegistration<LegalizeTF, LegalizeTF::Options> pass(
+static PassRegistration<LegalizeTF> pass(
     "xla-legalize-tf", "Legalize from TensorFlow to the XLA dialect");
 
 }  // end namespace
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc
index ac14bca6b2b..ee7cd7ea6db 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc
@@ -28,19 +28,19 @@ limitations under the License.
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/iterator_range.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
-#include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
@@ -64,13 +64,12 @@ createLegalizeTFControlFlowPass() {
 
 namespace {
 
-void Detuple(Value* tuple, Operation::result_range replace,
-             OpBuilder* builder) {
+void Detuple(Value tuple, Operation::result_range replace, OpBuilder* builder) {
   // De-tuple the results of the xla hlo conditional result.
   for (auto result_it : llvm::enumerate(replace)) {
     auto get_tuple_value = builder->create<xla_hlo::GetTupleElementOp>(
-        result_it.value()->getLoc(), tuple, result_it.index());
-    result_it.value()->replaceAllUsesWith(get_tuple_value);
+        result_it.value().getLoc(), tuple, result_it.index());
+    result_it.value().replaceAllUsesWith(get_tuple_value);
   }
 }
 
@@ -87,7 +86,7 @@ void ImportXlaRegion(mlir::FuncOp func, Region* dest_region, Location loc,
   auto entry_block = builder.createBlock(dest_region);
   auto tuple_arg = entry_block->addArgument(
       builder.getTupleType(func.getType().getInputs()));
-  llvm::SmallVector<Value*, 4> detupled_args;
+  llvm::SmallVector<Value, 4> detupled_args;
   detupled_args.reserve(func.getNumArguments());
 
   for (int64_t i = 0, s = func.getNumArguments(); i < s; i++) {
@@ -110,12 +109,12 @@ void LowerIf(TF::IfOp op, ModuleOp module) {
 
   // XLA prefers tuple arguments for control flow due to XLA not supporting
   // multiple return values.
-  SmallVector<Value*, 3> inputs(op.input());
+  SmallVector<Value, 3> inputs(op.input());
   builder.setInsertionPoint(op);
   auto tuple_input = builder.create<xla_hlo::TupleOp>(loc, inputs);
 
   // Create the new conditional op with tuple inputs.
-  SmallVector<Value*, 3> operands(op.getOperands());
+  SmallVector<Value, 3> operands(op.getOperands());
   SmallVector<Type, 4> types(op.getResultTypes());
   auto result_type = builder.getTupleType(types);
   auto conditional = builder.create<xla_hlo::ConditionalOp>(
@@ -142,12 +141,12 @@ void LowerWhile(TF::WhileOp op, ModuleOp module) {
 
   // XLA prefers tuple arguments for control flow due to XLA not supporting
   // multiple return values.
-  SmallVector<Value*, 3> inputs(op.input());
+  SmallVector<Value, 3> inputs(op.input());
   builder.setInsertionPoint(op);
-  Value* tuple_input = builder.create<xla_hlo::TupleOp>(loc, inputs);
+  Value tuple_input = builder.create<xla_hlo::TupleOp>(loc, inputs);
 
   // Create the new while op with tuple inputs.
-  SmallVector<Value*, 3> operands(op.getOperands());
+  SmallVector<Value, 3> operands(op.getOperands());
   SmallVector<Type, 4> types(op.getResultTypes());
   auto while_op = builder.create<xla_hlo::WhileOp>(
       loc, builder.getTupleType(types), tuple_input);
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
index 34c55e7218b..eeccf788dac 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
@@ -20,6 +20,11 @@ include "mlir/Dialect/StandardOps/Ops.td"
 include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td"
 include "tensorflow/compiler/mlir/xla/ir/hlo_ops.td"
 
+def SignedIntTensor : TensorOf<[I1, I8, I16, I32, I64]>;
+
+// IEEE compliant floating point tensors.
+def IEEEFloatTensor : TensorOf<[F16, F32, F64]>;
+
 //===----------------------------------------------------------------------===//
 // BatchNorm op patterns.
 //===----------------------------------------------------------------------===//
@@ -30,19 +35,19 @@ def FalseBoolAttr : AttrConstraint<CPred<"!$_self.getValue()">>;
 def TrueBoolAttr : AttrConstraint<CPred<"$_self.getValue()">>;
 
 def CastValueToI64: NativeCodeCall<
-  "CastValueToI64($0->getLoc(), $1, &$_builder)">;
+  "CastValueToI64($0.getLoc(), $1, &$_builder)">;
 
 // Here, $0 is an ElementsAttr with exactly one element of type integer. $1 is
 // the corresponding value of ranked tensor type whose axis is referred in $0.
 def GetHLOAxisFromTFAxis : NativeCodeCall<
   "GetHLOAxisFromTFAxis("
-  "$0, $1->getType().cast<RankedTensorType>().getRank(), &$_builder)">;
+  "$0, $1.getType().cast<RankedTensorType>().getRank(), &$_builder)">;
 
 // Same as the above but with $1 of type operand_range from variadic TensorFlow
 // input.
 def GetHLOAxisFromTFAxisVariadic : NativeCodeCall<
   "GetHLOAxisFromTFAxis("
-  "$0, (*$1.begin())->getType().cast<RankedTensorType>().getRank(), "
+  "$0, (*$1.begin()).getType().cast<RankedTensorType>().getRank(), "
   "&$_builder)">;
 
 def : Pattern<
@@ -87,12 +92,13 @@ def AreBroadcastCompatible : Constraint<CPred<"AreBroadcastCompatible($0, $1)">,
     "types must be broadcastable">;
 
 class DirectBinaryPat<Op FromOp, Op ToOp>
-  : Pat<(FromOp $l, $r),
+  : Pat<(FromOp AnyRankedTensor:$l, AnyRankedTensor:$r),
         (ToOp $l, $r, (BinBroadcastDimensions $l, $r))>;
 
 foreach fromToBinPair = [[TF_AddOp, HLO_AddOp],
                          [TF_AddV2Op, HLO_AddOp],
                          [TF_DivOp, HLO_DivOp],
+                         [TF_LeftShiftOp, HLO_ShiftLeftOp],
                          [TF_MaximumOp, HLO_MaxOp],
                          [TF_MinimumOp, HLO_MinOp],
                          [TF_MulOp, HLO_MulOp],
@@ -101,18 +107,22 @@ foreach fromToBinPair = [[TF_AddOp, HLO_AddOp],
                          [TF_SubOp, HLO_SubOp]] in
   def : DirectBinaryPat<fromToBinPair[0], fromToBinPair[1]>;
 
+def LowerRightShiftSigned :
+  Pat<(TF_RightShiftOp AnyRankedTensor:$l, AnyRankedTensor:$r),
+      (HLO_ShiftRightArithmeticOp $l, $r, (BinBroadcastDimensions $l, $r)),
+      [(SignedIntTensor $r)]>;
+
+// TODO(hinsu): Lower unsigned types to HLO_ShiftRightLogical once the HLO op
+// supports unsigned integers.
+
 def : Pat<(TF_ComplexOp $r, $i), (HLO_ComplexOp $r, $i)>;
 
-def IntegerTensor : TensorOf<[I1, I8, I16, I32, I64]>;
-
-// IEEE compliant floating point tensors.
-def IEEEFloatTensor : TensorOf<[F16, F32, F64]>;
-
 // Performs a substitution of FloorDiv, pseudo code below:
 //
 //  return floor(div(x, y))
-def : Pat<(TF_FloorDivOp IEEEFloatTensor:$l, IEEEFloatTensor:$r),
-        (HLO_FloorOp (HLO_DivOp $l, $r, (BinBroadcastDimensions $l, $r)))>;
+def : Pat<(TF_FloorDivOp AnyRankedTensor:$l, AnyRankedTensor:$r),
+          (HLO_FloorOp (HLO_DivOp $l, $r, (BinBroadcastDimensions $l, $r))),
+          [(IEEEFloatTensor $l)]>;
 
 // Performs a substitution of FloorDir for integer tensors, which required
 // additional correction for a negative numerator / denominator. Equivalent
@@ -131,7 +141,9 @@ def : Pat<(TF_FloorDivOp IEEEFloatTensor:$l, IEEEFloatTensor:$r),
 // without returning the broadcast of 'r' to broadcast('l', 'r').
 //
 // NOTE: This should be optimized for unsigned integers.
-def : Pat<(TF_FloorDivOp IntegerTensor:$l, IntegerTensor:$r),
+// Requires static shaped inputs to create constant splats and computation of
+// broadcast attributes.
+def : Pat<(TF_FloorDivOp AnyStaticShapeTensor:$l, AnyStaticShapeTensor:$r),
         (HLO_SelectOp
          (HLO_CompareOp
           (HLO_CompareOp $l, (HLO_ConstOp (ConstantSplat<"0"> $l)),
@@ -146,14 +158,17 @@ def : Pat<(TF_FloorDivOp IntegerTensor:$l, IntegerTensor:$r),
                         (HLO_ConstOp (ConstantSplat<"1"> $r)),
                         (NullDenseIntElementsAttr)),
                      (BinBroadcastDimensions $l, $r))),
-           (HLO_AbsOp:$abs $r), (BinBroadcastDimensions $neg, $abs)))>;
+           (HLO_AbsOp:$abs $r), (BinBroadcastDimensions $neg, $abs))),
+        [(SignedIntTensor $l)]>;
 
 // Performs a substitution of FloorMod designed to correct for possibly negative
 // values. Pseudocode shown below:
 //
 //   T trunc_mod = std::fmod(x, y);
 //   return trunc_mod != 0 && (y < 0 != trunc_mod < 0) ? trunc_mod + y
-def : Pat<(TF_FloorModOp $l, $r),
+// Requires static shaped inputs to create constant splats and computation of
+// broadcast attributes.
+def : Pat<(TF_FloorModOp AnyStaticShapeTensor:$l, AnyStaticShapeTensor:$r),
       (HLO_SelectOp
        (HLO_AndOp
         (HLO_CompareOp
@@ -186,8 +201,9 @@ def : Pat<(TF_BroadcastToOp:$result AnyRankedTensor:$input, $shape),
 //===----------------------------------------------------------------------===//
 
 class DirectLogicalBinaryPat<Op FromOp, Op ToOp>
-  : Pat<(FromOp IntegerTensor:$l, IntegerTensor:$r),
-        (ToOp $l, $r, (BinBroadcastDimensions $l, $r))>;
+  : Pat<(FromOp AnyRankedTensor:$l, AnyRankedTensor:$r),
+        (ToOp $l, $r, (BinBroadcastDimensions $l, $r)),
+        [(SignedIntTensor $l)]>;
 
 foreach fromToBinPair = [[TF_LogicalAndOp, HLO_AndOp],
                          [TF_LogicalOrOp, HLO_OrOp],
@@ -199,7 +215,7 @@ foreach fromToBinPair = [[TF_LogicalAndOp, HLO_AndOp],
 //===----------------------------------------------------------------------===//
 
 class DirectComparePat<Op FromOp, StrEnumAttrCase direction>
-  : Pat<(FromOp $l, $r),
+  : Pat<(FromOp AnyRankedTensor:$l, AnyRankedTensor:$r),
         (HLO_CompareOp $l, $r, (BinBroadcastDimensions $l, $r), direction)>;
 
 def : DirectComparePat<TF_GreaterOp, HLO_COMPARISON_DIRECTION_GT>;
@@ -208,7 +224,7 @@ def : DirectComparePat<TF_LessOp, HLO_COMPARISON_DIRECTION_LT>;
 def : DirectComparePat<TF_LessEqualOp, HLO_COMPARISON_DIRECTION_LE>;
 
 class EqualityPat<Op FromOp, StrEnumAttrCase direction>
-    : Pat<(FromOp $l, $r,
+    : Pat<(FromOp AnyRankedTensor:$l, AnyRankedTensor:$r,
            TrueBoolAttr:$incompatible_shape_error),
         (HLO_CompareOp $l, $r, (BinBroadcastDimensions $l, $r), direction),
         [(AreBroadcastCompatible $l, $r)]>;
@@ -235,10 +251,10 @@ def OneElementAttr
                      "Scalar ElementsAttr">;
 
 def HasRankedFirstOperand
-  : Constraint<CPred<"(*$0.begin())->getType().isa<RankedTensorType>()">>;
+  : Constraint<CPred<"(*$0.begin()).getType().isa<RankedTensorType>()">>;
 
 def IsShapedTensor
-  : Constraint<CPred<"$0->getType().isa<RankedTensorType>()">>;
+  : Constraint<CPred<"$0.getType().isa<RankedTensorType>()">>;
 
 // This pattern converts TensorFlow axis format to HLO axis format which
 // doesn't wrap around like TensorFlow and is always positive. For this
@@ -389,7 +405,7 @@ def : Pat<(TF_SliceOp:$op HLO_Tensor:$input, HLO_Tensor:$starting_indices,
 // Ternary op patterns.
 //===----------------------------------------------------------------------===//
 
-def BothTypesMatch : Constraint<CPred<"$0->getType() == $1->getType()">,
+def BothTypesMatch : Constraint<CPred<"$0.getType() == $1.getType()">,
    "types must be equal">;
 
 foreach src = [TF_SelectOp, TF_SelectV2Op] in
@@ -412,6 +428,8 @@ foreach Mapping = [
                    [TF_ImagOp, HLO_ImagOp],
                    [TF_IsFiniteOp, HLO_IsFiniteOp],
                    [TF_LogOp, HLO_LogOp],
+                   [TF_Log1pOp, HLO_Log1pOp],
+                   [TF_LogicalNotOp, HLO_NotOp],
                    [TF_NegOp, HLO_NegOp],
                    [TF_RealOp, HLO_RealOp],
                    [TF_RsqrtOp, HLO_RsqrtOp],
@@ -440,6 +458,19 @@ foreach TfOp = [TF_ExpandDimsOp, TF_ReshapeOp, TF_SqueezeOp, ] in {
             (HLO_ReshapeOp $arg), [(AnyStaticShapeTensor $res)]>;
 }
 
+// Returns 0 if x is NaN, 0 if x is 0, -1 if x < 0 and 1 if x > 0.
+def : Pat<(TF_SignOp $x),
+          (HLO_SelectOp
+            (HLO_CompareOp
+              $x,
+              $x,
+              (NullDenseIntElementsAttr),
+              HLO_COMPARISON_DIRECTION_NE
+            ),
+            (HLO_ConstOp (ConstantSplat<"0"> $x)),
+            (HLO_SignOp $x)
+          )>;
+
 //===----------------------------------------------------------------------===//
 // RngUniform.
 //===----------------------------------------------------------------------===//
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard.cc
index 29f3eb9a8f5..5e12abc466c 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard.cc
@@ -16,10 +16,10 @@ limitations under the License.
 // This file implements logic for lowering XLA dialect to Standard dialect.
 
 #include "llvm/ADT/StringSwitch.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
 #include "tensorflow/compiler/mlir/xla/transforms/rewriters.h"
@@ -47,8 +47,8 @@ struct CompareIConvert : public RewritePattern {
 
     auto lhs = compare_op.lhs();
     auto rhs = compare_op.rhs();
-    auto lhs_type = lhs->getType().cast<TensorType>();
-    auto rhs_type = rhs->getType().cast<TensorType>();
+    auto lhs_type = lhs.getType().cast<TensorType>();
+    auto rhs_type = rhs.getType().cast<TensorType>();
 
     // Broadcasting not supported by this rewrite.
     if (lhs_type.getShape() != rhs_type.getShape()) return matchFailure();
@@ -86,8 +86,8 @@ struct CompareFConvert : public RewritePattern {
 
     auto lhs = compare_op.lhs();
     auto rhs = compare_op.rhs();
-    auto lhs_type = lhs->getType().cast<TensorType>();
-    auto rhs_type = rhs->getType().cast<TensorType>();
+    auto lhs_type = lhs.getType().cast<TensorType>();
+    auto rhs_type = rhs.getType().cast<TensorType>();
 
     // Broadcasting not supported by this rewrite.
     if (lhs_type.getShape() != rhs_type.getShape()) return matchFailure();
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard_patterns.td
index 43c57b9bf7f..a15b28193cd 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard_patterns.td
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_to_standard_patterns.td
@@ -31,8 +31,8 @@ def : Pat<(HLO_ConstOp ElementsAttr:$value),
 //===----------------------------------------------------------------------===//
 
 def IsSameSizePred : CPred<
-    "$0->getType().cast<ShapedType>().getShape() "
-    "== $1->getType().cast<ShapedType>().getShape()">;
+    "$0.getType().cast<ShapedType>().getShape() "
+    "== $1.getType().cast<ShapedType>().getShape()">;
 def IsSameSizeConstraint : Constraint<IsSameSizePred, "inputs are same size">;
 
 
@@ -74,9 +74,9 @@ def : Pat<(HLO_MulOp HLO_IntTensor:$l, HLO_IntTensor:$r,
           [(IsSameSizeConstraint $l, $r)]>;
 def : Pat<(HLO_DivOp HLO_IntTensor:$l, HLO_IntTensor:$r,
                      IsNullAttr:$broadcast_dimensions),
-          (DivISOp $l, $r),
+          (SignedDivIOp $l, $r),
           [(IsSameSizeConstraint $l, $r)]>;
 def : Pat<(HLO_RemOp HLO_IntTensor:$l, HLO_IntTensor:$r,
                      IsNullAttr:$broadcast_dimensions),
-          (RemISOp $l, $r),
+          (SignedRemIOp $l, $r),
           [(IsSameSizeConstraint $l, $r)]>;
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
index a8a2eb77586..8ad6717a3f1 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "absl/memory/memory.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Dialect/Linalg/Utils/Utils.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 
 namespace mlir {
 namespace xla_lhlo {
@@ -42,7 +42,7 @@ struct LhloFuseLinalg : public FunctionPass<LhloFuseLinalg> {
     // tiled. In order to greedily fuse the ops, we have to start from the tiled
     // root linalg ops, i.e. linalg ops that write to output buffers of the
     // function.
-    llvm::SmallDenseSet<Value*> func_args;
+    llvm::SmallDenseSet<Value> func_args;
     for (auto func_arg : func.getArguments()) {
       func_args.insert(func_arg);
     }
@@ -52,7 +52,7 @@ struct LhloFuseLinalg : public FunctionPass<LhloFuseLinalg> {
       const SmallVector<int64_t, 2> tile_sizes(
           generic_op.getNumInputsAndOutputs(), 1);
       auto op = cast<LinalgOp>(generic_op.getOperation());
-      for (const Value* result : op.getOutputs()) {
+      for (const Value result : op.getOutputs()) {
         if (!func_args.count(result)) continue;
         if (linalg::tileLinalgOp(b, op, tile_sizes, /*permutation=*/{},
                                  &folder)) {
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_affine.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_affine.cc
index f3b8ab9c311..647c304c686 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_affine.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_affine.cc
@@ -16,14 +16,14 @@ limitations under the License.
 // This file implements logic for lowering LHLO dialect to Affine dialect.
 
 #include "absl/memory/memory.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Dialect/AffineOps/AffineOps.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h"
 
@@ -39,15 +39,15 @@ struct BinaryOpConverter : public OpRewritePattern<LhloOp> {
                                      PatternRewriter& rewriter) const override {
     const auto& lhs = op.lhs();
     const auto& rhs = op.rhs();
-    const auto& lhs_type = lhs->getType().template cast<MemRefType>();
-    const auto& rhs_type = rhs->getType().template cast<MemRefType>();
+    const auto& lhs_type = lhs.getType().template cast<MemRefType>();
+    const auto& rhs_type = rhs.getType().template cast<MemRefType>();
     const auto& element_type = lhs_type.getElementType();
 
     if (lhs_type.getShape() != rhs_type.getShape()) {
       return this->matchFailure();
     }
     const auto& shape = lhs_type.getShape();
-    SmallVector<Value*, 4> induction_vars;
+    SmallVector<Value, 4> induction_vars;
     const auto loc = op.getLoc();
     for (int i = 0; i < shape.size(); ++i) {
       auto forOp = rewriter.create<AffineForOp>(loc, 0, shape[i]);
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc
index 9f1f90cb2f0..28413041ac4 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc
@@ -19,21 +19,21 @@ limitations under the License.
 
 #include "absl/memory/memory.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/LoopOps/LoopOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
+#include "mlir/Dialect/GPU/GPUDialect.h"  // TF:llvm-project
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // TF:llvm-project
+#include "mlir/Dialect/LoopOps/LoopOps.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h"
 
@@ -49,13 +49,13 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern<ReduceOp> {
   using OpConversionPattern::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      ReduceOp reduce_op, ArrayRef<Value*> args,
+      ReduceOp reduce_op, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
     auto loc = reduce_op.getLoc();
     // Only support 1d reductions for now.
     int64_t size = 0;
     for (auto result : reduce_op.out()) {
-      auto shaped_type = result->getType().dyn_cast<ShapedType>();
+      auto shaped_type = result.getType().dyn_cast<ShapedType>();
       if (!shaped_type || shaped_type.getRank() != 1) {
         return matchFailure();
       }
@@ -71,7 +71,7 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern<ReduceOp> {
     // Require all inputs to have the same shape.
     int64_t reduce_dim_size = 0;
     for (auto input : reduce_op.operands()) {
-      auto shaped_type = input->getType().dyn_cast<ShapedType>();
+      auto shaped_type = input.getType().dyn_cast<ShapedType>();
       if (!shaped_type || !shaped_type.hasStaticShape()) {
         return matchFailure();
       }
@@ -105,7 +105,7 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern<ReduceOp> {
             loc, mapping.lookup(std::get<0>(pair)));
         rewriter.create<mlir::StoreOp>(loc, init_value,
                                        mapping.lookup(std::get<1>(pair)),
-                                       ArrayRef<Value*>{index});
+                                       ArrayRef<Value>{index});
       }
 
       // Insert a loop into the body to compute the reduction. The loop ranges
@@ -128,15 +128,15 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern<ReduceOp> {
       auto output = mapping.lookup(*reduce_op.out().begin());
       // TODO(herhut) Move this to the SliceOp builder.
       auto resType = MemRefType::get(
-          llvm::None, output->getType().cast<MemRefType>().getElementType(),
+          llvm::None, output.getType().cast<MemRefType>().getElementType(),
           makeStridedLinearLayoutMap(llvm::None,
                                      MemRefType::getDynamicStrideOrOffset(),
                                      rewriter.getContext()));
       auto accumulator = rewriter.create<mlir::linalg::SliceOp>(
-          loc, resType, output, ArrayRef<Value*>{launch_op.getThreadIds().x});
-      llvm::SmallVector<Value*, 4> indexings;
+          loc, resType, output, ArrayRef<Value>{launch_op.getThreadIds().x});
+      llvm::SmallVector<Value, 4> indexings;
       auto input_buffer = *reduce_op.operands().begin();
-      auto input_type = input_buffer->getType().cast<MemRefType>();
+      auto input_type = input_buffer.getType().cast<MemRefType>();
       for (int64_t dim = 0; dim < input_type.getRank(); ++dim) {
         indexings.push_back(dim == reducing_dimension
                                 ? loop.getInductionVar()
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc
index af7383c5101..739b9f3554d 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc
@@ -17,20 +17,20 @@ limitations under the License.
 
 #include "absl/memory/memory.h"
 #include "llvm/ADT/APInt.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/AffineExpr.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // TF:llvm-project
+#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/AffineExpr.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h"
 
@@ -53,11 +53,11 @@ class PointwiseToLinalgConverter : public OpConversionPattern<LhloOp> {
   using OpConversionPattern<LhloOp>::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      LhloOp lhlo_op, ArrayRef<Value*> args,
+      LhloOp lhlo_op, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
     auto loc = lhlo_op.getLoc();
     auto argType =
-        lhlo_op.getOperand(0)->getType().template dyn_cast<ShapedType>();
+        lhlo_op.getOperand(0).getType().template dyn_cast<ShapedType>();
     if (!argType || !argType.hasStaticShape()) {
       emitError(loc,
                 "lhlo to linalg conversion expects statically shaped args");
@@ -73,7 +73,7 @@ class PointwiseToLinalgConverter : public OpConversionPattern<LhloOp> {
     unsigned nloops = 0;
     int operandCount = args.size() - 1;
     for (const auto& arg : llvm::enumerate(args)) {
-      auto memrefType = arg.value()->getType().dyn_cast<MemRefType>();
+      auto memrefType = arg.value().getType().dyn_cast<MemRefType>();
       if (!memrefType) return ConversionPattern::matchFailure();
       unsigned rank = memrefType.getRank();
       if (!rank || (nloops && nloops != rank)) {
@@ -101,7 +101,7 @@ class PointwiseToLinalgConverter : public OpConversionPattern<LhloOp> {
     block->addArguments(bodyArgTypes);
     block->addArguments(bodyResultTypes);
 
-    SmallVector<Value*, 4> bodyArgs;
+    SmallVector<Value, 4> bodyArgs;
     for (int i = 0, e = bodyArgTypes.size(); i < e; ++i) {
       bodyArgs.push_back(block->getArgument(i));
     }
@@ -121,11 +121,11 @@ class ScalarPointwiseToStandardConverter : public OpConversionPattern<LhloOp> {
   using OpConversionPattern<LhloOp>::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      LhloOp lhlo_op, ArrayRef<Value*> args,
+      LhloOp lhlo_op, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
     auto loc = lhlo_op.getLoc();
     auto argType =
-        lhlo_op.getOperand(0)->getType().template dyn_cast<ShapedType>();
+        lhlo_op.getOperand(0).getType().template dyn_cast<ShapedType>();
     if (!argType || !argType.getElementType().isIntOrFloat() ||
         (argType.getRank() != 0)) {
       return ConversionPattern::matchFailure();
@@ -136,7 +136,7 @@ class ScalarPointwiseToStandardConverter : public OpConversionPattern<LhloOp> {
     auto rhs = rewriter.create<LoadOp>(loc, lhlo_op.rhs());
     Operation* op = MapLhloOpToStdScalarOp<LhloOp>(
         llvm::cast<LhloOp>(lhlo_op), argType.getElementType(),
-        llvm::ArrayRef<Value*>{lhs, rhs}, rewriter);
+        llvm::ArrayRef<Value>{lhs, rhs}, rewriter);
     rewriter.create<StoreOp>(loc, op->getResult(0), lhlo_op.out());
     rewriter.eraseOp(lhlo_op);
     return ConversionPattern::matchSuccess();
@@ -148,12 +148,12 @@ class BroadcastInDimConverter : public OpConversionPattern<BroadcastInDimOp> {
   using OpConversionPattern<BroadcastInDimOp>::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      BroadcastInDimOp broadcastOp, ArrayRef<Value*> args,
+      BroadcastInDimOp broadcastOp, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
     auto operandMemrefType =
-        broadcastOp.operand()->getType().dyn_cast<MemRefType>();
+        broadcastOp.operand().getType().dyn_cast<MemRefType>();
     auto resultMemrefType =
-        broadcastOp.output()->getType().dyn_cast<MemRefType>();
+        broadcastOp.output().getType().dyn_cast<MemRefType>();
     if (!operandMemrefType || !resultMemrefType) return matchFailure();
     auto broadcastDims = broadcastOp.broadcast_dimensions();
     if (!broadcastDims.hasValue()) return matchFailure();
@@ -167,7 +167,7 @@ class BroadcastInDimConverter : public OpConversionPattern<BroadcastInDimOp> {
 
  private:
   PatternMatchResult emitScalarBroadcast(
-      BroadcastInDimOp broadcastOp, ArrayRef<Value*> args,
+      BroadcastInDimOp broadcastOp, ArrayRef<Value> args,
       MemRefType resultMemrefType, ConversionPatternRewriter* rewriter) const {
     unsigned nloops = resultMemrefType.getRank();
     SmallVector<Attribute, 1> indexingMaps{
@@ -195,7 +195,7 @@ class BroadcastInDimConverter : public OpConversionPattern<BroadcastInDimOp> {
   }
 
   PatternMatchResult emitNonScalarBroadcast(
-      BroadcastInDimOp broadcastOp, ArrayRef<Value*> args,
+      BroadcastInDimOp broadcastOp, ArrayRef<Value> args,
       MemRefType operandMemrefType, MemRefType resultMemrefType,
       ConversionPatternRewriter* rewriter) const {
     SmallVector<Type, 4> bodyArgTypes{operandMemrefType.getElementType()};
@@ -250,10 +250,10 @@ class IotaConverter : public OpConversionPattern<IotaOp> {
   using OpConversionPattern<IotaOp>::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      IotaOp iotaOp, ArrayRef<Value*> args,
+      IotaOp iotaOp, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
     auto resultMemrefType =
-        iotaOp.getOperand()->getType().dyn_cast<MemRefType>();
+        iotaOp.getOperand().getType().dyn_cast<MemRefType>();
     if (!resultMemrefType) return matchFailure();
 
     auto resultElementType = resultMemrefType.getElementType();
@@ -301,7 +301,7 @@ class ConstConverter : public OpConversionPattern<ConstOp> {
   using OpConversionPattern<ConstOp>::OpConversionPattern;
 
   PatternMatchResult matchAndRewrite(
-      ConstOp constOp, ArrayRef<Value*> args,
+      ConstOp constOp, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
     auto loc = constOp.getLoc();
     auto valueAttr = constOp.value().cast<DenseElementsAttr>();
diff --git a/tensorflow/compiler/mlir/xla/transforms/lower_complex.cc b/tensorflow/compiler/mlir/xla/transforms/lower_complex.cc
index e09350f4f74..672398672de 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lower_complex.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lower_complex.cc
@@ -23,14 +23,14 @@ limitations under the License.
 #include <numeric>
 
 #include "llvm/ADT/STLExtras.h"
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassRegistry.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_utils.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/xla/transforms/lower_general_dot.cc b/tensorflow/compiler/mlir/xla/transforms/lower_general_dot.cc
index 515f818749e..c956cd6b277 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lower_general_dot.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lower_general_dot.cc
@@ -17,15 +17,15 @@ limitations under the License.
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/Operation.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/Operation.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/TypeUtilities.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
 #include "tensorflow/compiler/mlir/xla/transforms/rewriters.h"
@@ -44,12 +44,12 @@ using mlir::Value;
 
 namespace {
 
-Value *TransposeReshape(Value *arg, mlir::Location loc,
-                        llvm::ArrayRef<int64_t> left_dims,
-                        llvm::ArrayRef<int64_t> right_dims,
-                        llvm::ArrayRef<int64_t> arg_shape,
-                        PatternRewriter *rewriter) {
-  auto element_type = mlir::getElementTypeOrSelf(arg->getType());
+Value TransposeReshape(Value arg, mlir::Location loc,
+                       llvm::ArrayRef<int64_t> left_dims,
+                       llvm::ArrayRef<int64_t> right_dims,
+                       llvm::ArrayRef<int64_t> arg_shape,
+                       PatternRewriter *rewriter) {
+  auto element_type = mlir::getElementTypeOrSelf(arg.getType());
 
   int64_t left_size = 1;
   for (auto dim : left_dims) {
@@ -91,10 +91,10 @@ Value *TransposeReshape(Value *arg, mlir::Location loc,
                                                     transpose_result);
 }
 
-Value *ProcessDotArg(Value *arg, mlir::Location loc,
-                     ElementsAttr contract_dims_attr, bool outer_dims_first,
-                     PatternRewriter *rewriter) {
-  auto shape = arg->getType().cast<mlir::ShapedType>().getShape();
+Value ProcessDotArg(Value arg, mlir::Location loc,
+                    ElementsAttr contract_dims_attr, bool outer_dims_first,
+                    PatternRewriter *rewriter) {
+  auto shape = arg.getType().cast<mlir::ShapedType>().getShape();
 
   llvm::SmallVector<bool, 5> is_outer_dim;
   is_outer_dim.resize(shape.size(), true);
@@ -154,8 +154,8 @@ struct GeneralDotConvert
                              /*outer_dims_first=*/false, &rewriter);
 
     // Dot resulting shape.
-    auto lhs_shape = lhs->getType().cast<mlir::ShapedType>().getShape();
-    auto rhs_shape = rhs->getType().cast<mlir::ShapedType>().getShape();
+    auto lhs_shape = lhs.getType().cast<mlir::ShapedType>().getShape();
+    auto rhs_shape = rhs.getType().cast<mlir::ShapedType>().getShape();
     auto new_dot_type =
         RankedTensorType::get({lhs_shape[0], rhs_shape[1]}, dot_element_type);
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h b/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
index 11e3af7649b..d61d3e35afc 100644
--- a/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
+++ b/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 
 namespace mlir {
@@ -40,7 +40,7 @@ struct ScalarOp<xla_lhlo::CompareOp> {
 template <>
 struct ScalarOp<xla_lhlo::DivOp> {
   using FOp = ::mlir::DivFOp;
-  using IOp = ::mlir::DivISOp;
+  using IOp = ::mlir::SignedDivIOp;
 };
 template <>
 struct ScalarOp<xla_lhlo::MulOp> {
@@ -60,8 +60,8 @@ using ScalarIOp = typename ScalarOp<LHLO_BinaryOp>::IOp;
 
 template <typename LhloOp>
 Operation* MapLhloOpToStdScalarOp(LhloOp lhlo_op, ArrayRef<Type> result_types,
-                                  ArrayRef<Value*> block_args, OpBuilder b) {
-  Type element_type = block_args.front()->getType();
+                                  ArrayRef<Value> block_args, OpBuilder b) {
+  Type element_type = block_args.front().getType();
   if (element_type.isa<IntegerType>()) {
     return b.template create<ScalarIOp<LhloOp>>(lhlo_op.getLoc(), result_types,
                                                 block_args, mlir::None);
@@ -76,10 +76,10 @@ Operation* MapLhloOpToStdScalarOp(LhloOp lhlo_op, ArrayRef<Type> result_types,
 template <>
 inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::MaxOp>(
     xla_lhlo::MaxOp lhlo_op, ArrayRef<Type> result_types,
-    ArrayRef<Value*> block_args, OpBuilder b) {
+    ArrayRef<Value> block_args, OpBuilder b) {
   const auto& lhs = block_args[0];
   const auto& rhs = block_args[1];
-  Type element_type = lhs->getType();
+  Type element_type = lhs.getType();
   if (element_type.isa<IntegerType>()) {
     auto lhs_gt_rhs = b.create<ScalarIOp<CompareOp>>(
         lhlo_op.getLoc(), CmpIPredicate::sgt, lhs, rhs);
@@ -96,10 +96,10 @@ inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::MaxOp>(
 template <>
 inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::MinOp>(
     xla_lhlo::MinOp lhlo_op, ArrayRef<Type> result_types,
-    ArrayRef<Value*> block_args, OpBuilder b) {
+    ArrayRef<Value> block_args, OpBuilder b) {
   const auto& lhs = block_args[0];
   const auto& rhs = block_args[1];
-  Type element_type = lhs->getType();
+  Type element_type = lhs.getType();
   if (element_type.isa<IntegerType>()) {
     auto lhs_lt_rhs = b.create<ScalarIOp<CompareOp>>(
         lhlo_op.getLoc(), CmpIPredicate::slt, lhs, rhs);
@@ -116,8 +116,8 @@ inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::MinOp>(
 template <>
 inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::AndOp>(
     xla_lhlo::AndOp lhlo_op, ArrayRef<Type> result_types,
-    ArrayRef<Value*> block_args, OpBuilder b) {
-  Type element_type = block_args.front()->getType();
+    ArrayRef<Value> block_args, OpBuilder b) {
+  Type element_type = block_args.front().getType();
   return element_type.isa<IntegerType>()
              ? b.create<::mlir::AndOp>(lhlo_op.getLoc(), result_types,
                                        block_args, mlir::None)
@@ -150,10 +150,10 @@ inline Optional<CmpIPredicate> getIntCmpPredicate(
 template <>
 inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::CompareOp>(
     xla_lhlo::CompareOp lhlo_op, ArrayRef<Type> result_types,
-    ArrayRef<Value*> block_args, OpBuilder b) {
+    ArrayRef<Value> block_args, OpBuilder b) {
   const auto& lhs = block_args[0];
   const auto& rhs = block_args[1];
-  Type element_type = lhs->getType();
+  Type element_type = lhs.getType();
   if (element_type.isa<IntegerType>()) {
     Optional<CmpIPredicate> predicate =
         getIntCmpPredicate(lhlo_op.comparison_direction());
@@ -172,7 +172,7 @@ inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::CompareOp>(
 template <>
 inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::SelectOp>(
     xla_lhlo::SelectOp lhlo_op, ArrayRef<Type> result_types,
-    ArrayRef<Value*> block_args, OpBuilder b) {
+    ArrayRef<Value> block_args, OpBuilder b) {
   return b.create<::mlir::SelectOp>(lhlo_op.getLoc(), result_types, block_args,
                                     mlir::None);
 }
@@ -180,8 +180,8 @@ inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::SelectOp>(
 template <>
 inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::ExpOp>(
     xla_lhlo::ExpOp lhlo_op, ArrayRef<Type> result_types,
-    ArrayRef<Value*> block_args, OpBuilder b) {
-  Type element_type = block_args.front()->getType();
+    ArrayRef<Value> block_args, OpBuilder b) {
+  Type element_type = block_args.front().getType();
   return element_type.isa<FloatType>()
              ? b.create<::mlir::ExpOp>(lhlo_op.getLoc(), result_types,
                                        block_args, mlir::None)
diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h
index d659a3a87f4..21d1f08f3ea 100644
--- a/tensorflow/compiler/mlir/xla/transforms/passes.h
+++ b/tensorflow/compiler/mlir/xla/transforms/passes.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <memory>
 
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/Support/LogicalResult.h"  // TF:llvm-project
 
 namespace mlir {
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/rewriters.h b/tensorflow/compiler/mlir/xla/transforms/rewriters.h
index e4a014f137f..5f546d4651e 100644
--- a/tensorflow/compiler/mlir/xla/transforms/rewriters.h
+++ b/tensorflow/compiler/mlir/xla/transforms/rewriters.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <memory>
 
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
 
 namespace mlir {
 namespace xla_hlo {
diff --git a/tensorflow/compiler/mlir/xla/type_to_shape.cc b/tensorflow/compiler/mlir/xla/type_to_shape.cc
index 37c657c99ae..d82b2d33779 100644
--- a/tensorflow/compiler/mlir/xla/type_to_shape.cc
+++ b/tensorflow/compiler/mlir/xla/type_to_shape.cc
@@ -17,11 +17,11 @@ limitations under the License.
 
 #include <string>
 
-#include "mlir/IR/AffineMap.h"  // TF:local_config_mlir
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Support/DebugStringHelper.h"  // TF:local_config_mlir
+#include "mlir/IR/AffineMap.h"  // TF:llvm-project
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Support/DebugStringHelper.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
diff --git a/tensorflow/compiler/mlir/xla/type_to_shape.h b/tensorflow/compiler/mlir/xla/type_to_shape.h
index 4bc3fac9b1c..c9989def939 100644
--- a/tensorflow/compiler/mlir/xla/type_to_shape.h
+++ b/tensorflow/compiler/mlir/xla/type_to_shape.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_XLA_TYPE_TO_SHAPE_H_
 
 #include "llvm/ADT/STLExtras.h"
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/shape.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/framework/tensor_shape.h"
diff --git a/tensorflow/compiler/mlir/xla/type_to_shape_test.cc b/tensorflow/compiler/mlir/xla/type_to_shape_test.cc
index fc4eea79347..98f9b36c84b 100644
--- a/tensorflow/compiler/mlir/xla/type_to_shape_test.cc
+++ b/tensorflow/compiler/mlir/xla/type_to_shape_test.cc
@@ -17,9 +17,9 @@ limitations under the License.
 
 #include <iostream>
 
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
diff --git a/tensorflow/compiler/mlir/xla/xla_mlir_translate.cc b/tensorflow/compiler/mlir/xla/xla_mlir_translate.cc
index e79c03447c8..16be296ce6c 100644
--- a/tensorflow/compiler/mlir/xla/xla_mlir_translate.cc
+++ b/tensorflow/compiler/mlir/xla/xla_mlir_translate.cc
@@ -17,8 +17,8 @@ limitations under the License.
 
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Translation.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Translation.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/hlo_to_mlir_hlo.h"
 #include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h"
 #include "tensorflow/compiler/xla/debug_options_flags.h"
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 01a0f0a86f2..4c3dcd81eb7 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -316,6 +316,11 @@ tf_xla_py_test(
     timeout = "moderate",
     srcs = ["matrix_inverse_op_test.py"],
     python_version = "PY3",
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+    ],
     deps = [
         ":xla_test",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/compiler/tests/dense_layer_test.py b/tensorflow/compiler/tests/dense_layer_test.py
index 8020aa28ce4..8e653d2511c 100644
--- a/tensorflow/compiler/tests/dense_layer_test.py
+++ b/tensorflow/compiler/tests/dense_layer_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.compiler.tests import test_utils
diff --git a/tensorflow/compiler/tests/depthwise_conv_op_test.py b/tensorflow/compiler/tests/depthwise_conv_op_test.py
index a49985f0446..0f0ea50fde9 100644
--- a/tensorflow/compiler/tests/depthwise_conv_op_test.py
+++ b/tensorflow/compiler/tests/depthwise_conv_op_test.py
@@ -68,21 +68,21 @@ def ConfigsToTest():
     Tuple (input_size, filter_size, out_size, stride, padding), the depthwise
     convolution parameters.
   """
-  input_sizes = [[4, 5, 5, 48], [4, 8, 8, 84], [4, 17, 17, 48], [4, 9, 27, 8],
-                 [4, 31, 31, 7], [4, 35, 35, 2], [4, 147, 147, 2],
-                 [3, 299, 299, 3], [5, 183, 183, 1]]
-  filter_sizes = [[1, 1, 48, 2], [1, 3, 84, 1], [3, 1, 48, 4], [3, 3, 8, 1],
-                  [3, 3, 7, 1], [5, 5, 2, 1], [3, 3, 2, 8], [2, 2, 3,
-                                                             8], [5, 5, 1, 2]]
-  out_sizes = [[4, 5, 5, 96], [4, 8, 8, 84], [4, 17, 17, 192], [4, 9, 27, 8],
-               [4, 31, 31, 7], [4, 35, 35, 2], [4, 49, 49, 16],
+  input_sizes = [[4, 5, 5, 48], [2, 5, 5, 48], [4, 8, 8, 84], [4, 17, 17, 48],
+                 [4, 9, 27, 8], [4, 31, 31, 7], [4, 35, 35, 2],
+                 [4, 147, 147, 2], [3, 299, 299, 3], [5, 183, 183, 1]]
+  filter_sizes = [[1, 1, 48, 2], [2, 2, 48, 8], [1, 3, 84, 1], [3, 1, 48, 4],
+                  [3, 3, 8, 1], [3, 3, 7, 1], [5, 5, 2, 1], [3, 3, 2, 8],
+                  [2, 2, 3, 8], [5, 5, 1, 2]]
+  out_sizes = [[4, 5, 5, 96], [2, 5, 5, 384], [4, 8, 8, 84], [4, 17, 17, 192],
+               [4, 9, 27, 8], [4, 31, 31, 7], [4, 35, 35, 2], [4, 49, 49, 16],
                [3, 150, 150, 24], [5, 92, 92, 2]]
-  strides = [1, 1, 1, 1, 1, 1, 3, 2, 2]
+  strides = [1, 1, 1, 1, 1, 1, 1, 3, 2, 2]
   # pylint: disable=invalid-name
   VALID = "VALID"
   SAME = "SAME"
   # pylint: enable=invalid-name
-  paddings = [SAME, SAME, SAME, SAME, SAME, SAME, VALID, SAME, SAME, SAME]
+  paddings = [SAME, SAME, SAME, SAME, SAME, SAME, SAME, VALID, SAME, SAME, SAME]
   for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides,
                            paddings):
     yield i, f, o, s, p
diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py
index 109a7932c20..3bde1574f0e 100644
--- a/tensorflow/compiler/tests/jit_test.py
+++ b/tensorflow/compiler/tests/jit_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.compiler.tests import test_utils
diff --git a/tensorflow/compiler/tests/matrix_diag_ops_test.py b/tensorflow/compiler/tests/matrix_diag_ops_test.py
index 1ca9b157fa1..4c03211da5a 100644
--- a/tensorflow/compiler/tests/matrix_diag_ops_test.py
+++ b/tensorflow/compiler/tests/matrix_diag_ops_test.py
@@ -21,19 +21,10 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
-from tensorflow.python.compat import compat
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import googletest
 
 
-# LINT.IfChange
-matrix_diag_v3_forward_compat_date = (2019, 12, 6)
-# LINT.ThenChange(
-#   //tensorflow/python/kernel_tests/diag_op_test.py,
-#   //tensorflow/python/ops/array_ops.py,
-#   //tensorflow/python/ops/parallel_for/array_test.py
-# )
-
 default_v2_alignment = "LEFT_LEFT"
 alignment_list = ["RIGHT_LEFT", "LEFT_RIGHT"]
 
@@ -404,25 +395,20 @@ class MatrixDiagTest(xla_test.XLATestCase):
 
   # From here onwards are v2-only tests.
   def testSquare(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for align in alignment_list:
-        for _, tests in [square_cases(align)]:
-          for diag_index, (vecs, solution) in tests.items():
-            params = {"diagonal": vecs[0], "k": diag_index, "align": align}
-            self._assertOpOutputMatchesExpected(params, solution[0])
+    for align in alignment_list:
+      for _, tests in [square_cases(align)]:
+        for diag_index, (vecs, solution) in tests.items():
+          params = {"diagonal": vecs[0], "k": diag_index, "align": align}
+          self._assertOpOutputMatchesExpected(params, solution[0])
 
   def testSquareBatch(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for align in alignment_list:
-        for _, tests in [square_cases(align)]:
-          for diag_index, (vecs, solution) in tests.items():
-            params = {"diagonal": vecs, "k": diag_index, "align": align}
-            self._assertOpOutputMatchesExpected(params, solution)
+    for align in alignment_list:
+      for _, tests in [square_cases(align)]:
+        for diag_index, (vecs, solution) in tests.items():
+          params = {"diagonal": vecs, "k": diag_index, "align": align}
+          self._assertOpOutputMatchesExpected(params, solution)
 
   def testRectangularBatch(self):
-    if not compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      return
-
     # Stores expected num_rows and num_cols (when the other is given).
     # expected[(d_lower, d_upper)] = (expected_num_rows, expected_num_cols)
     test_list = list()
@@ -513,22 +499,21 @@ class MatrixDiagTest(xla_test.XLATestCase):
             }, solution_given_num_cols)
 
   def testPadding(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for padding_value, align in zip_to_first_list_length([555, -11],
-                                                           alignment_list):
-        for _, tests in all_tests(align):
-          for diag_index, (vecs, solution) in tests.items():
-            mask = (solution == 0)
-            solution = solution + (mask * padding_value)
-            self._assertOpOutputMatchesExpected(
-                {
-                    "diagonal": vecs,
-                    "k": diag_index,
-                    "num_rows": solution.shape[-2],
-                    "num_cols": solution.shape[-1],
-                    "padding_value": padding_value,
-                    "align": align
-                }, solution)
+    for padding_value, align in zip_to_first_list_length([555, -11],
+                                                         alignment_list):
+      for _, tests in all_tests(align):
+        for diag_index, (vecs, solution) in tests.items():
+          mask = (solution == 0)
+          solution = solution + (mask * padding_value)
+          self._assertOpOutputMatchesExpected(
+              {
+                  "diagonal": vecs,
+                  "k": diag_index,
+                  "num_rows": solution.shape[-2],
+                  "num_cols": solution.shape[-1],
+                  "padding_value": padding_value,
+                  "align": align
+              }, solution)
 
 
 class MatrixSetDiagTest(xla_test.XLATestCase):
@@ -634,36 +619,34 @@ class MatrixSetDiagTest(xla_test.XLATestCase):
 
   # From here onwards are v2-only tests.
   def testSingleMatrix(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for align in alignment_list:
-        for _, tests in all_tests(align):
-          for diag_index, (vecs, banded_mat) in tests.items():
-            mask = (banded_mat[0] == 0)
-            input_mat = np.random.randint(10, size=mask.shape)
-            solution = input_mat * mask + banded_mat[0]
-            self._assertOpOutputMatchesExpected(
-                {
-                    "input": input_mat,
-                    "diagonal": vecs[0],
-                    "k": diag_index,
-                    "align": align
-                }, solution)
+    for align in alignment_list:
+      for _, tests in all_tests(align):
+        for diag_index, (vecs, banded_mat) in tests.items():
+          mask = (banded_mat[0] == 0)
+          input_mat = np.random.randint(10, size=mask.shape)
+          solution = input_mat * mask + banded_mat[0]
+          self._assertOpOutputMatchesExpected(
+              {
+                  "input": input_mat,
+                  "diagonal": vecs[0],
+                  "k": diag_index,
+                  "align": align
+              }, solution)
 
   def testBatch(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for align in alignment_list:
-        for _, tests in all_tests(align):
-          for diag_index, (vecs, banded_mat) in tests.items():
-            mask = (banded_mat == 0)
-            input_mat = np.random.randint(10, size=mask.shape)
-            solution = input_mat * mask + banded_mat
-            self._assertOpOutputMatchesExpected(
-                {
-                    "input": input_mat,
-                    "diagonal": vecs,
-                    "k": diag_index,
-                    "align": align
-                }, solution)
+    for align in alignment_list:
+      for _, tests in all_tests(align):
+        for diag_index, (vecs, banded_mat) in tests.items():
+          mask = (banded_mat == 0)
+          input_mat = np.random.randint(10, size=mask.shape)
+          solution = input_mat * mask + banded_mat
+          self._assertOpOutputMatchesExpected(
+              {
+                  "input": input_mat,
+                  "diagonal": vecs,
+                  "k": diag_index,
+                  "align": align
+              }, solution)
 
 
 class MatrixDiagPartTest(xla_test.XLATestCase):
@@ -705,45 +688,42 @@ class MatrixDiagPartTest(xla_test.XLATestCase):
 
   # From here onwards are v2-only tests.
   def testSingleMatrix(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for align in alignment_list:
-        test_list = [square_cases(align), tall_cases(align), fat_cases(align)]
-        for mat, tests in test_list:
-          for diag_index, (solution, _) in tests.items():
-            self._assertOpOutputMatchesExpected(
-                {
-                    "input": mat[0],
-                    "k": diag_index,
-                    "align": align
-                }, solution[0])
+    for align in alignment_list:
+      test_list = [square_cases(align), tall_cases(align), fat_cases(align)]
+      for mat, tests in test_list:
+        for diag_index, (solution, _) in tests.items():
+          self._assertOpOutputMatchesExpected(
+              {
+                  "input": mat[0],
+                  "k": diag_index,
+                  "align": align
+              }, solution[0])
 
   def testBatch(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for align in alignment_list:
-        for mat, tests in all_tests(align):
-          for diag_index, (solution, _) in tests.items():
-            self._assertOpOutputMatchesExpected(
-                {
-                    "input": mat,
-                    "k": diag_index,
-                    "align": align
-                }, solution)
+    for align in alignment_list:
+      for mat, tests in all_tests(align):
+        for diag_index, (solution, _) in tests.items():
+          self._assertOpOutputMatchesExpected(
+              {
+                  "input": mat,
+                  "k": diag_index,
+                  "align": align
+              }, solution)
 
   def testPadding(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      for padding_value, align in zip_to_first_list_length([555, -11],
-                                                           alignment_list):
-        for mat, tests in all_tests(align):
-          for diag_index, (solution, _) in tests.items():
-            mask = (solution == 0)
-            solution = solution + (mask * padding_value)
-            self._assertOpOutputMatchesExpected(
-                {
-                    "input": mat,
-                    "k": diag_index,
-                    "padding_value": padding_value,
-                    "align": align
-                }, solution)
+    for padding_value, align in zip_to_first_list_length([555, -11],
+                                                         alignment_list):
+      for mat, tests in all_tests(align):
+        for diag_index, (solution, _) in tests.items():
+          mask = (solution == 0)
+          solution = solution + (mask * padding_value)
+          self._assertOpOutputMatchesExpected(
+              {
+                  "input": mat,
+                  "k": diag_index,
+                  "padding_value": padding_value,
+                  "align": align
+              }, solution)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/compiler/tests/quantized_ops_test.py b/tensorflow/compiler/tests/quantized_ops_test.py
index 100be3b9aa5..5d4fb39f2ea 100644
--- a/tensorflow/compiler/tests/quantized_ops_test.py
+++ b/tensorflow/compiler/tests/quantized_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import math
+
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
diff --git a/tensorflow/compiler/tests/reduce_ops_test.py b/tensorflow/compiler/tests/reduce_ops_test.py
index a39f633858a..57709c2cd10 100644
--- a/tensorflow/compiler/tests/reduce_ops_test.py
+++ b/tensorflow/compiler/tests/reduce_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import functools
 import itertools
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/compiler/tests/reverse_ops_test.py b/tensorflow/compiler/tests/reverse_ops_test.py
index 7dc323b0ab5..abfb73ade38 100644
--- a/tensorflow/compiler/tests/reverse_ops_test.py
+++ b/tensorflow/compiler/tests/reverse_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
diff --git a/tensorflow/compiler/tests/segment_reduction_ops_test.py b/tensorflow/compiler/tests/segment_reduction_ops_test.py
index 500617bc38b..ae86b6c30da 100644
--- a/tensorflow/compiler/tests/segment_reduction_ops_test.py
+++ b/tensorflow/compiler/tests/segment_reduction_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import functools
+
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
diff --git a/tensorflow/compiler/tests/self_adjoint_eig_op_test.py b/tensorflow/compiler/tests/self_adjoint_eig_op_test.py
index 0c1a1d145d4..9507a8c9c92 100644
--- a/tensorflow/compiler/tests/self_adjoint_eig_op_test.py
+++ b/tensorflow/compiler/tests/self_adjoint_eig_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/compiler/tests/svd_op_test.py b/tensorflow/compiler/tests/svd_op_test.py
index 7791b409a37..7e05eeb4c0a 100644
--- a/tensorflow/compiler/tests/svd_op_test.py
+++ b/tensorflow/compiler/tests/svd_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py
index 7d2425ee205..d49a6a37785 100644
--- a/tensorflow/compiler/tests/tensor_list_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_list_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
+
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.compiler.tests import xla_test
diff --git a/tensorflow/compiler/tests/while_test.py b/tensorflow/compiler/tests/while_test.py
index 3ef12ced704..420dc04bec3 100644
--- a/tensorflow/compiler/tests/while_test.py
+++ b/tensorflow/compiler/tests/while_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
diff --git a/tensorflow/compiler/tf2tensorrt/BUILD b/tensorflow/compiler/tf2tensorrt/BUILD
index f6e9780eabc..65679bd021a 100644
--- a/tensorflow/compiler/tf2tensorrt/BUILD
+++ b/tensorflow/compiler/tf2tensorrt/BUILD
@@ -500,7 +500,8 @@ cc_library(
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib_proto_parsing",
-    ],
+        "//tensorflow/core:lib",
+    ] + if_tensorrt([":tensorrt_lib"]),
 )
 
 tf_proto_library(
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
index 855e5d4285f..4e76287a953 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
@@ -51,6 +51,7 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/tensor_coding.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/public/version.h"
 #include "tensorflow/core/util/strided_slice_op.h"
 
 #if GOOGLE_CUDA
@@ -200,18 +201,6 @@ int64 TFAttrs::get<int64>(const string& key) const {
   return this->at(key)->i();
 }
 
-template <typename TensorShapeType>
-inline nvinfer1::Dims TensorShapeToTrtDims(const TensorShapeType& shape,
-                                           bool ignore_first_dim) {
-  nvinfer1::Dims trt_dims;
-  const int offset = (ignore_first_dim ? 1 : 0);
-  for (int i = offset; i < shape.dims(); i++) {
-    trt_dims.d[i - offset] = shape.dim_size(i);
-  }
-  trt_dims.nbDims = shape.dims() - offset;
-  return trt_dims;
-}
-
 template <typename Container>
 Status TensorShapeArrayToTrtDims(const Container& shape, nvinfer1::Dims* out,
                                  bool ignore_first_dim = false) {
@@ -286,7 +275,7 @@ Status ValidateTensorProperties(const string& producer_node_type,
   }
   *trt_dims = TensorShapeToTrtDims(shape,
                                    /*ignore_first_dim=*/use_implicit_batch);
-  // Get batch size for tensor if it will not be included the the shape.
+  // Get batch size for tensor if it will not be included the shape.
   if (use_implicit_batch) {
     *batch_size = shape.dim_size(0);
   }
@@ -314,66 +303,6 @@ Status ValidateTensorProperties(const string& producer_node_type,
   return Status::OK();
 }
 
-string DebugString(const nvinfer1::DimensionType type) {
-  switch (type) {
-    case nvinfer1::DimensionType::kSPATIAL:
-      return "kSPATIAL";
-    case nvinfer1::DimensionType::kCHANNEL:
-      return "kCHANNEL";
-    case nvinfer1::DimensionType::kINDEX:
-      return "kINDEX";
-    case nvinfer1::DimensionType::kSEQUENCE:
-      return "kSEQUENCE";
-    default:
-      return StrCat(static_cast<int>(type), "=unknown");
-  }
-}
-
-string DebugString(const nvinfer1::DataType trt_dtype) {
-  switch (trt_dtype) {
-    case nvinfer1::DataType::kFLOAT:
-      return "kFLOAT";
-    case nvinfer1::DataType::kHALF:
-      return "kHALF";
-    case nvinfer1::DataType::kINT8:
-      return "kINT8";
-    case nvinfer1::DataType::kINT32:
-      return "kINT32";
-    default:
-      return "Invalid TRT data type";
-  }
-}
-
-string DebugString(const nvinfer1::Dims& dims) {
-  string out = StrCat("nvinfer1::Dims(nbDims=", dims.nbDims, ", d=");
-  for (int i = 0; i < dims.nbDims; ++i) {
-    StrAppend(&out, dims.d[i]);
-    if (VLOG_IS_ON(2)) {
-      StrAppend(&out, "[", DebugString(dims.type[i]), "],");
-    } else {
-      StrAppend(&out, ",");
-    }
-  }
-  StrAppend(&out, ")");
-  return out;
-}
-
-string DebugString(const nvinfer1::Permutation& permutation, int len) {
-  string out = "nvinfer1::Permutation(";
-  for (int i = 0; i < len; ++i) {
-    StrAppend(&out, permutation.order[i], ",");
-  }
-  StrAppend(&out, ")");
-  return out;
-}
-
-string DebugString(const nvinfer1::ITensor& tensor) {
-  return StrCat("nvinfer1::ITensor(@", reinterpret_cast<uintptr_t>(&tensor),
-                ", name=", tensor.getName(),
-                ", dtype=", DebugString(tensor.getType()),
-                ", dims=", DebugString(tensor.getDimensions()), ")");
-}
-
 Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
                             const TRT_TensorOrWeights& operand_r,
                             const bool check_feasibility,
@@ -581,14 +510,6 @@ inline nvinfer1::Dims GetTrtDimsForTensor(const Tensor& tensor) {
   return dims;
 }
 
-inline bool HasStaticShape(const nvinfer1::Dims& dims) {
-  if (dims.nbDims < 0) return false;
-  for (int d = 0; d < dims.nbDims; ++d) {
-    if (dims.d[d] < 0) return false;
-  }
-  return true;
-}
-
 int64_t Prod(const nvinfer1::Dims& dims) {
   int64_t count = 1;
   for (int d = 0; d < dims.nbDims; ++d) {
@@ -732,9 +653,10 @@ size_t TRT_ShapedWeights::size_bytes() const {
 }
 
 string TRT_ShapedWeights::DebugString() const {
-  return StrCat("TRT_ShapedWeights(shape=", convert::DebugString(shape_),
-                ", type=", convert::DebugString(type_),
-                ", values=", reinterpret_cast<uintptr_t>(GetValues()), ")");
+  return StrCat(
+      "TRT_ShapedWeights(shape=", tensorflow::tensorrt::DebugString(shape_),
+      ", type=", tensorflow::tensorrt::DebugString(type_),
+      ", values=", reinterpret_cast<uintptr_t>(GetValues()), ")");
 }
 
 // A fake ITensor implementation used to check whether the TF-TRT converter can
@@ -858,7 +780,7 @@ nvinfer1::Dims TRT_TensorOrWeights::GetTrtDims() const {
 string TRT_TensorOrWeights::DebugString() const {
   string output = "TRT_TensorOrWeights(type=";
   if (is_tensor()) {
-    StrAppend(&output, "tensor=", convert::DebugString(*tensor()),
+    StrAppend(&output, "tensor=", tensorflow::tensorrt::DebugString(*tensor()),
               ", batch_size=", batch_size_);
   } else {
     StrAppend(&output, "weights=", weights_.DebugString());
@@ -1210,11 +1132,8 @@ static void InitializeTrtPlugins(nvinfer1::ILogger* trt_logger) {
   mutex_lock lock(plugin_mutex);
   if (plugin_initialized) return;
 
-  LOG(INFO) << "Linked TensorRT version: " << NV_TENSORRT_MAJOR << "."
-            << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH;
-  const int loaded_version = getInferLibVersion();
-  LOG(INFO) << "Loaded TensorRT version: " << loaded_version / 1000 << "."
-            << (loaded_version / 100) % 10 << "." << loaded_version % 100;
+  LOG(INFO) << "Linked TensorRT version: " << GetLinkedTensorRTVersion();
+  LOG(INFO) << "Loaded TensorRT version: " << GetLoadedTensorRTVersion();
 
   plugin_initialized = initLibNvInferPlugins(trt_logger, "");
   if (!plugin_initialized) {
@@ -1451,6 +1370,19 @@ Status Converter::BuildCudaEngine(
     }
   }
 
+#if IS_TRT_VERSION_GE(6, 0, 0, 0)
+  string precision_mode_str;
+  TF_RETURN_IF_ERROR(
+      TrtPrecisionModeToName(precision_mode_, &precision_mode_str));
+  string trt_network_name = StrCat(
+      "TF:", TF_VERSION_STRING, ", ", "TRT:", GetLoadedTensorRTVersion(), "-",
+      "Precision:", precision_mode_str, ", ", "Calibration:", use_calibration_,
+      ", ", "Max-Batch-Size:", max_batch_size, ", ",
+      "Max-Workspace-Size:", max_workspace_size_bytes);
+  VLOG(1) << "Setting TensorRT network name to " << trt_network_name;
+  network()->setName(trt_network_name.c_str());
+#endif  // #if IS_TRT_VERSION_GE(6, 0, 0, 0)
+
   VLOG(1) << "Building TensorRT engine";
   engine->reset(trt_builder_->buildCudaEngine(*network()));
 #endif
@@ -2230,7 +2162,37 @@ Status ConvertConv2DHelper(OpConverterParams* params, int group,
     conv_layer = layer;
   }
   nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
-
+  // Add an extra padding for Deconv because TRT doesn't accept the
+  // argument output_shape and thus the TRT output shape could be wrong
+  // in case of strides>1.
+  if (is_conv2d_backprop_input) {
+    auto tf_output_shape =
+        static_cast<int*>(backprop_output_size.weights().GetValues());
+    nvinfer1::Dims trt_output_shape = output_tensor->getDimensions();
+    // What determines the padding size is the difference between the given
+    // input_sizes (tf_output_shape) and TRT computed size.
+    const int height_diff = tf_output_shape[h_index] - trt_output_shape.d[1];
+    const int width_diff = tf_output_shape[w_index] - trt_output_shape.d[2];
+    if ((height_diff < 0) || (width_diff < 0)) {
+      return errors::InvalidArgument(
+          "input_sizes argument of Conv2DBackprop (i.e. output_shape argument "
+          "of conv2d_transpose) ",
+          "is too small for the given out_backprop argument of Conv2DBackprop "
+          "(i.e. input argument of conv2d_transpose). Expect: ",
+          "(", tf_output_shape[h_index], ", ", tf_output_shape[w_index],
+          ") >= ", "(", trt_output_shape.d[1], ", ", trt_output_shape.d[2],
+          ") for op ", node_def.name());
+    }
+    // Only add a padding layer if padding sizes are larger than 0
+    if ((height_diff > 0) || (width_diff > 0)) {
+      nvinfer1::DimsHW pre_padding(0, 0);
+      nvinfer1::DimsHW post_padding(height_diff, width_diff);
+      nvinfer1::IPaddingLayer* padding_layer =
+          params->converter->network()->addPadding(*output_tensor, pre_padding,
+                                                   post_padding);
+      output_tensor = padding_layer->getOutput(0);
+    }
+  }
   // Restore transpose.
   if (need_transpose) {
     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
@@ -5145,6 +5107,17 @@ Status ConvertTopK(OpConverterParams* params) {
       CheckInputsWeights(*params, {{"input", false}, {"k", true}}));
   TF_RETURN_IF_ERROR(
       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
+  TFAttrs attrs(node_def);
+  const bool sorted = attrs.get<bool>("sorted");
+  if (!sorted) {
+    // TensorRT only supports sorted output. Although TensorFlow API
+    // doesn't specify the order of output elements in case sorted=false,
+    // but it's safer to not convert because the output of TensorRT might
+    // be different with TensorFlow which can cause confusion.
+    return errors::InvalidArgument("Only sorted=True is supported, at",
+                                   node_def.name());
+  }
+
   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
   const int num_dims = tensor->getDimensions().nbDims;
   if (num_dims == 0) {
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
index 6fb3620bf81..a9f579c9ed7 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
@@ -42,14 +42,6 @@ namespace tensorrt {
 namespace convert {
 using ::stream_executor::port::StatusOr;
 
-#define IS_TRT_VERSION_GE(major, minor, patch, build)           \
-  ((NV_TENSORRT_MAJOR > major) ||                               \
-   (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR > minor) || \
-   (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && \
-    NV_TENSORRT_PATCH > patch) ||                               \
-   (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && \
-    NV_TENSORRT_PATCH == patch && NV_TENSORRT_BUILD >= build))
-
 struct EngineConnection {
   // Constructs a non-control edge.
   EngineConnection(const string& outside, int out_id, int out_port,
@@ -164,11 +156,6 @@ class OutputEdgeValidator {
   bool operator()(const Edge* out_edge) const;
 };
 
-string DebugString(const nvinfer1::DimensionType type);
-string DebugString(const nvinfer1::DataType trt_dtype);
-string DebugString(const nvinfer1::Dims& dims);
-string DebugString(const nvinfer1::Permutation& permutation, int len);
-string DebugString(const nvinfer1::ITensor& tensor);
 int64_t TrtWeightDimsNumElements(const nvinfer1::Dims& dims);
 int64_t TrtTensorDimsNumElements(const nvinfer1::Dims& dims);
 
@@ -341,7 +328,7 @@ class TRT_TensorOrWeights {
   // size represented in the shapes or the batch sizes are different. See
   // b/118387490 for more details.
   //
-  // if use_implicit_batch is false, batch_size_ is unused and
+  // If use_implicit_batch is false, batch_size_ is unused and
   // tensor_->getDimensions() will contain the entire shape (A,B,C).
   int batch_size_ = -1;
 
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
index 358004abac7..fa361c29933 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
@@ -1714,15 +1714,14 @@ TEST_F(OpConverterTest, ConvertReshape) {
   };
 
   // Reshape at batch dimension, should fail.
-  const int kReshapeBatchDimsCases = 5;
-  TestParams params[kReshapeBatchDimsCases] = {
+  std::vector<TestParams> params = {
       TestParams{1, {1, 2, 3}, {3, 1, 1, 2}},
       TestParams{1, {1, 2, -1}, {-1, 1, 1, 2}},
       TestParams{1, {1, 2, 3}, {-1, 1, 1, 2}},
       TestParams{-1, {1, 2, 3}, {1, 1, 1, 2}},
       TestParams{-1, {-1, 2, 3}, {1, 1, 1, 6}},  // TODO(laigd): it should pass.
   };
-  for (int i = 0; i < kReshapeBatchDimsCases; ++i) {
+  for (int i = 0; i < params.size(); ++i) {
     Reset();
     const std::vector<int>& dims = params[i].tensor_dims;
     AddTestTensor("input", dims, params[i].batch_size);
@@ -1734,8 +1733,7 @@ TEST_F(OpConverterTest, ConvertReshape) {
   }
 
   // Reshape on non batch dimensions, ok.
-  const int kReshapeOKCases = 8;
-  TestParams ok_params[kReshapeOKCases] = {
+  std::vector<TestParams> ok_params = {
       TestParams{-1, {1, 2, 3}, {-1, 1, 3, 2}},
       TestParams{1, {1, 2, 3}, {-1, 1, 3, 2}},
       TestParams{1, {1, 2, 3}, {1, 1, 3, 2}},
@@ -1745,7 +1743,7 @@ TEST_F(OpConverterTest, ConvertReshape) {
       TestParams{2, {1, 1}, {2}},
       TestParams{2, {}, {2, 1}},
   };
-  for (int i = 0; i < kReshapeOKCases; ++i) {
+  for (int i = 0; i < ok_params.size(); ++i) {
     const int batch_size = std::max(1, ok_params[i].batch_size);
     const auto& shape = ok_params[i].shape;
     Reset();
@@ -2549,14 +2547,13 @@ TEST_F(OpConverterTest, ConvertCombinedNMS) {
   };
 
   // Ok.
-  const int kCombinedNMSOKCases = 1;
-  TestParams ok_params[kCombinedNMSOKCases] = {
+  std::vector<TestParams> ok_params = {
       // TODO(aaroey): there is a bug in TRT's CombinedNonMaxSuppression
       // implementation that, the extra output classes that are outside of the
       // range specified by valid_detections[i] are not zeros but -1s.
       TestParams{{1, 1, 4}, {1, 3}, 3, 2, .5f, 0, {2, 4}, {2}, {2}}};
 
-  for (int i = 0; i < kCombinedNMSOKCases; ++i) {
+  for (int i = 0; i < ok_params.size(); ++i) {
     Reset();
 
     AddTestTensor("boxes", ok_params[i].boxes_tensor_dims);
@@ -2814,14 +2811,13 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
   };
 
   // Ok.
-  const int kExpandDimsOKCases = 8;
-  TestParams ok_params[kExpandDimsOKCases] = {
+  std::vector<TestParams> ok_params = {
       TestParams{{2, 3}, 1, {1, 2, 3}}, TestParams{{2, 3}, -3, {1, 2, 3}},
       TestParams{{2, 3}, 3, {2, 3, 1}}, TestParams{{2, 3}, -1, {2, 3, 1}},
       TestParams{{2, 3}, 2, {2, 1, 3}}, TestParams{{2, 3}, -2, {2, 1, 3}},
       TestParams{{6}, 1, {1, 6}},       TestParams{{6}, -1, {6, 1}},
   };
-  for (int i = 0; i < kExpandDimsOKCases; ++i) {
+  for (int i = 0; i < ok_params.size(); ++i) {
     Reset();
     AddTestTensor("input", ok_params[i].input_dims);
     AddTestWeights<int32>("weights", {1}, {ok_params[i].axis});
@@ -2931,8 +2927,7 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
   };
 
   // Ok.
-  const int kSqueezeOKCases = 10;
-  TestParams ok_params[kSqueezeOKCases] = {
+  std::vector<TestParams> ok_params = {
       TestParams{{1, 2, 3}, {1}, {2, 3}},
       TestParams{{1, 2, 3}, {-3}, {2, 3}},
       TestParams{{2, 3, 1}, {3}, {2, 3}},
@@ -2944,7 +2939,7 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
       TestParams{{1, 6}, {1}, {6}},
       TestParams{{6, 1}, {2}, {6}},
   };
-  for (int i = 0; i < kSqueezeOKCases; ++i) {
+  for (int i = 0; i < ok_params.size(); ++i) {
     Reset();
     NodeDef node_def = get_squeeze_nodedef(ok_params[i].axis);
     AddTestTensor("input", ok_params[i].input_dims);
@@ -3114,13 +3109,8 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   // Same input is used for all tests.
   const std::vector<float> ok_input = {1, 2, 3, 4, 5, 6};
 
-#if IS_TRT_VERSION_GE(5, 1, 3, 1)
-  const int kStridedSliceOKCases = 31;
-#else
-  const int kStridedSliceOKCases = 27;
-#endif
   // Ok.
-  TestParams ok_params[kStridedSliceOKCases] = {
+  std::vector<TestParams> ok_params = {
     // 2D Crop.
     TestParams{
         /*input_dims=*/{1, 2, 3},
@@ -3484,6 +3474,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
         /*expected_output_dims=*/{1, 2, 1},
         /*expected_output=*/{2, 5},
     },
+#if IS_TRT_VERSION_GE(5, 1, 3, 1)
     TestParams{
         /*input_dims=*/{1, 2, 3},
         /*begin=*/{0, 0, 0, 0, 1},
@@ -3537,9 +3528,10 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
         /*expected_output_dims=*/{},
         /*expected_output=*/{1},
     },
+#endif  // IS_TRT_VERSION_GE(5, 1, 3, 1)
   };
 
-  for (int i = 0; i < kStridedSliceOKCases; i++) {
+  for (int i = 0; i < ok_params.size(); i++) {
     Reset();
     NodeDef node_def = get_strided_slice_nodedef(
         ok_params[i].begin_mask, ok_params[i].end_mask,
@@ -3672,8 +3664,7 @@ TEST_F(OpConverterTest, ConvertSlice) {
   };
 
   // Ok.
-  const int kSliceOKCases = 5;
-  TestParams ok_params[kSliceOKCases] = {
+  std::vector<TestParams> ok_params = {
       TestParams{{1, 2, 3},
                  {0, 0, 0, 0},
                  {-1, -1, -1, -1},
@@ -3687,7 +3678,7 @@ TEST_F(OpConverterTest, ConvertSlice) {
       TestParams{{6}, {0, 1}, {-1, 3}, {3}, {2, 3, 4}},
   };
 
-  for (int i = 0; i < kSliceOKCases; i++) {
+  for (int i = 0; i < ok_params.size(); i++) {
     Reset();
     NodeDef node_def = get_slice_nodedef();
     AddTestTensor("input", ok_params[i].input_dims);
@@ -3856,8 +3847,7 @@ TEST_F(OpConverterTest, ConvertConv2D) {
   };
 
   // Ok.
-  const int kConv2DOKCases = 7;
-  TestParams ok_params[kConv2DOKCases] = {
+  std::vector<TestParams> ok_params = {
       // Basic
       TestParams{/*input_dims=*/{1, 2, 3},
                  /*input=*/{0, 1, 2, 3, 3, 4},
@@ -3942,9 +3932,34 @@ TEST_F(OpConverterTest, ConvertConv2D) {
                  /*is_conv2d_backprop_input=*/true,
                  /*expected_output_dims=*/{1, 2, 4},
                  /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
+      // Transpose Strided NHWC
+      TestParams{/*input_dims=*/{2, 2, 1},
+                 /*input=*/{0, 1, 2, 3},
+                 /*filter_dims=*/{1, 2, 1, 1},
+                 /*filter=*/{-1, 1},
+                 /*strides=*/{1, 1, 2, 1},
+                 /*padding=*/"SAME",
+                 /*data_format=*/"NHWC",
+                 /*dilations=*/{1, 1, 1, 1},
+                 /*is_conv2d_backprop_input=*/true,
+                 /*expected_output_dims=*/{2, 4, 1},
+                 /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
+      // Transpose Strided NHWC with VALID padding
+      TestParams{/*input_dims=*/{3, 1, 1},
+                 /*input=*/{0, 1, 2},
+                 /*filter_dims=*/{2, 1, 1, 1},
+                 /*filter=*/{-1, 1},
+                 /*strides=*/{1, 2, 1, 1},
+                 /*padding=*/"VALID",
+                 /*data_format=*/"NHWC",
+                 /*dilations=*/{1, 1, 1, 1},
+                 /*is_conv2d_backprop_input=*/true,
+                 /*expected_output_dims=*/{7, 1, 1},
+                 /*expected_output=*/{0, 0, -1, 1, -2, 2, 0}},
+
   };
 
-  for (int i = 0; i < kConv2DOKCases; i++) {
+  for (int i = 0; i < ok_params.size(); i++) {
     Reset();
     NodeDef node_def = get_conv2d_nodedef(
         ok_params[i].strides, ok_params[i].padding, ok_params[i].data_format,
@@ -3953,10 +3968,10 @@ TEST_F(OpConverterTest, ConvertConv2D) {
     AddTestWeights<float>("weights", ok_params[i].filter_dims,
                           ok_params[i].filter);
     if (ok_params[i].is_conv2d_backprop_input) {
-      AddTestWeights<float>(
-          "input_sizes",
-          {static_cast<int>(ok_params[i].expected_output.size())},
-          ok_params[i].expected_output);
+      std::vector<int> tf_input_sizes = ok_params[i].expected_output_dims;
+      tf_input_sizes.insert(tf_input_sizes.begin(), 1);  // Add batch dimension.
+      QCHECK_EQ(4, tf_input_sizes.size());
+      AddTestWeights<int>("input_sizes", {4}, tf_input_sizes);
     }
     RunValidationAndConversion(node_def);
     TRT_TensorOrWeights output;
@@ -4141,8 +4156,7 @@ TEST_F(OpConverterTest, ConvertConv3D) {
   };
 
   // Start here
-  const int kConv3DOKCases = 8;
-  TestParams ok_params[kConv3DOKCases] = {
+  std::vector<TestParams> ok_params = {
       // Basic - just 1x1 conv - input = output
       TestParams{
           /*input_dims=*/{1, 3, 3, 3},  // CDHW
@@ -4277,7 +4291,7 @@ TEST_F(OpConverterTest, ConvertConv3D) {
 
   };
 
-  for (int i = 0; i < kConv3DOKCases; i++) {
+  for (int i = 0; i < ok_params.size(); i++) {
     Reset();
     NodeDef node_def = get_conv3d_nodedef(
         ok_params[i].strides, ok_params[i].padding, ok_params[i].data_format,
@@ -4361,8 +4375,7 @@ TEST_F(OpConverterTest, ConvertPool3D) {
   const std::vector<float> common_array{-4, 2,  15, 3, 6,   -3, 22, 1,   88,
                                         56, 36, 1,  1, 105, 1,  16, -28, 1,
                                         42, 9,  3,  1, 7,   1,  11, 61,  5};
-  const int kPool3DOKCases = 10;
-  TestParams ok_params[kPool3DOKCases] = {
+  std::vector<TestParams> ok_params = {
       // Basic - just 1x1 max pooling - input = output
       TestParams{/*input_dims=*/{1, 3, 3, 3},
                  /*input=*/common_array,
@@ -4472,7 +4485,7 @@ TEST_F(OpConverterTest, ConvertPool3D) {
                                                         // the corners
       }};
 
-  for (int i = 0; i < kPool3DOKCases; i++) {
+  for (int i = 0; i < ok_params.size(); i++) {
     Reset();
     NodeDef node_def = get_pool3d_nodedef(
         ok_params[i].ksize, ok_params[i].strides, ok_params[i].padding,
@@ -4572,10 +4585,9 @@ void TestConvertGather(OpConverterTest* test) {
   };
 
   // Input is the same {1, 2, 3, 4, 5, 6} for all cases.
-  const int kGatherOKCases = 11;
   const std::vector<CType> params_input = {CType(1), CType(2), CType(3),
                                            CType(4), CType(5), CType(6)};
-  TestParams ok_params[kGatherOKCases] = {
+  std::vector<TestParams> ok_params = {
       // Vector indices, and output rank is rank(params).
       TestParams{
           /*params_shape=*/{1, 1, 2, 3},
@@ -4680,7 +4692,7 @@ void TestConvertGather(OpConverterTest* test) {
   };
 
   // Ok.
-  for (int i = 0; i < kGatherOKCases; i++) {
+  for (int i = 0; i < ok_params.size(); i++) {
     test->Reset();
     const auto& params_shape = ok_params[i].params_shape;
     if (ok_params[i].params_is_tensor) {
@@ -4993,8 +5005,7 @@ void TestConvertConcat(OpConverterTest* test) {
       InitTestVector<CType>(6, /*start_value=*/CType(6))};
   // TODO(hinsu): Use std::vector instead of an array to avoid use of explicit
   // size.
-  const int kConcatOKCases = 4;
-  TestParams ok_params[kConcatOKCases] = {
+  std::vector<TestParams> ok_params = {
       {
           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
           /*input_values=*/common_input,
@@ -5034,7 +5045,7 @@ void TestConvertConcat(OpConverterTest* test) {
       },
   };
 
-  for (int i = 0; i < kConcatOKCases; ++i) {
+  for (int i = 0; i < ok_params.size(); ++i) {
     test->Reset();
     const int num_inputs = ok_params[i].input_shapes.size();
     EXPECT_EQ(num_inputs, ok_params[i].input_values.size());
@@ -5167,8 +5178,7 @@ void TestConvertSplit(OpConverterTest* test) {
   };
 
   const std::vector<CType> common_input = InitTestVector<CType>(6);
-  const int kSplitOKCases = 4;
-  TestParams ok_params[kSplitOKCases] = {
+  std::vector<TestParams> ok_params = {
       // Identity (num_split = 1)
       {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
        /*num_split=*/1, /*expected_output_dims=*/{1, 2, 3},
@@ -5201,7 +5211,7 @@ void TestConvertSplit(OpConverterTest* test) {
        {InitTestVector<CType>(3), InitTestVector<CType>(3, CType(3))}},
   };
 
-  for (int i = 0; i < kSplitOKCases; ++i) {
+  for (int i = 0; i < ok_params.size(); ++i) {
     test->Reset();
     NodeDef node_def = get_split_nodedef(dtype, ok_params[i].num_split);
     // Create inputs.
@@ -5343,8 +5353,7 @@ void TestConvertUnpack(OpConverterTest* test) {
   };
 
   const std::vector<CType> common_input = InitTestVector<CType>(6);
-  const int kUnpackOKCases = 4;
-  TestParams ok_params[kUnpackOKCases] = {
+  std::vector<TestParams> ok_params = {
       {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
        /*num=*/1, /*expected_output_dims=*/{2, 3},
        /*expected_outputs=*/{InitTestVector<CType>(6)}},
@@ -5381,7 +5390,7 @@ void TestConvertUnpack(OpConverterTest* test) {
         {CType(5)}}},
   };
 
-  for (int i = 0; i < kUnpackOKCases; ++i) {
+  for (int i = 0; i < ok_params.size(); ++i) {
     test->Reset();
     NodeDef node_def =
         get_unpack_nodedef(dtype, ok_params[i].num, ok_params[i].axis);
diff --git a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc
index 40fd3a7b65f..757ddd159c9 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc
@@ -71,7 +71,7 @@ Status TRTOptimizationPass::Init(
     trt_logger_name_ = params.at("trt_logger").s();
   }
   if (params.count("use_implicit_batch")) {
-    use_implicit_batch = params.at("use_implicit_batch").b();
+    use_implicit_batch_ = params.at("use_implicit_batch").b();
   }
   return Status::OK();
 }
@@ -264,7 +264,7 @@ Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster,
   cp.is_dyn_op = is_dynamic_op_;
   cp.max_cached_engines = max_cached_batches_;
   cp.use_calibration = use_calibration_;
-  cp.use_implicit_batch = use_implicit_batch;
+  cp.use_implicit_batch = use_implicit_batch_;
   auto status = ConvertAfterShapes(cp);
   VLOG(1) << "Returning from " << name_;
   return status;
diff --git a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.h b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.h
index cc17b3409e6..3ce0d09b7c0 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.h
@@ -42,7 +42,7 @@ class TRTOptimizationPass : public grappler::CustomGraphOptimizer {
         max_cached_batches_(1),
         max_workspace_size_bytes_(256LL << 20),
         use_calibration_(true),
-        use_implicit_batch(true) {
+        use_implicit_batch_(true) {
     VLOG(1) << "Constructing " << name_;
   }
 
@@ -74,7 +74,7 @@ class TRTOptimizationPass : public grappler::CustomGraphOptimizer {
   int max_cached_batches_;
   int64_t max_workspace_size_bytes_;
   bool use_calibration_;
-  bool use_implicit_batch;
+  bool use_implicit_batch_;
 };
 
 }  // namespace convert
diff --git a/tensorflow/compiler/tf2tensorrt/convert/utils.cc b/tensorflow/compiler/tf2tensorrt/convert/utils.cc
index ca21c193d63..d142bc58bef 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/utils.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/utils.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
 namespace tensorrt {
@@ -51,5 +53,101 @@ Status TrtPrecisionModeFromName(const string& name, TrtPrecisionMode* mode) {
   return Status::OK();
 }
 
+#if GOOGLE_CUDA && GOOGLE_TENSORRT
+using absl::StrAppend;
+using absl::StrCat;
+
+string DebugString(const nvinfer1::DimensionType type) {
+  switch (type) {
+    case nvinfer1::DimensionType::kSPATIAL:
+      return "kSPATIAL";
+    case nvinfer1::DimensionType::kCHANNEL:
+      return "kCHANNEL";
+    case nvinfer1::DimensionType::kINDEX:
+      return "kINDEX";
+    case nvinfer1::DimensionType::kSEQUENCE:
+      return "kSEQUENCE";
+    default:
+      return StrCat(static_cast<int>(type), "=unknown");
+  }
+}
+
+string DebugString(const nvinfer1::Dims& dims) {
+  string out = StrCat("nvinfer1::Dims(nbDims=", dims.nbDims, ", d=");
+  for (int i = 0; i < dims.nbDims; ++i) {
+    StrAppend(&out, dims.d[i]);
+    if (VLOG_IS_ON(2)) {
+      StrAppend(&out, "[", DebugString(dims.type[i]), "],");
+    } else {
+      StrAppend(&out, ",");
+    }
+  }
+  StrAppend(&out, ")");
+  return out;
+}
+
+string DebugString(const nvinfer1::DataType trt_dtype) {
+  switch (trt_dtype) {
+    case nvinfer1::DataType::kFLOAT:
+      return "kFLOAT";
+    case nvinfer1::DataType::kHALF:
+      return "kHALF";
+    case nvinfer1::DataType::kINT8:
+      return "kINT8";
+    case nvinfer1::DataType::kINT32:
+      return "kINT32";
+    default:
+      return "Invalid TRT data type";
+  }
+}
+
+string DebugString(const nvinfer1::Permutation& permutation, int len) {
+  string out = "nvinfer1::Permutation(";
+  for (int i = 0; i < len; ++i) {
+    StrAppend(&out, permutation.order[i], ",");
+  }
+  StrAppend(&out, ")");
+  return out;
+}
+
+string DebugString(const nvinfer1::ITensor& tensor) {
+  return StrCat("nvinfer1::ITensor(@", reinterpret_cast<uintptr_t>(&tensor),
+                ", name=", tensor.getName(),
+                ", dtype=", DebugString(tensor.getType()),
+                ", dims=", DebugString(tensor.getDimensions()), ")");
+}
+
+#endif
+
+string GetLinkedTensorRTVersion() {
+  int major, minor, patch;
+#if GOOGLE_CUDA && GOOGLE_TENSORRT
+  major = NV_TENSORRT_MAJOR;
+  minor = NV_TENSORRT_MINOR;
+  patch = NV_TENSORRT_PATCH;
+#else
+  major = 0;
+  minor = 0;
+  patch = 0;
+#endif
+  return absl::StrCat(major, ".", minor, ".", patch);
+}
+
+string GetLoadedTensorRTVersion() {
+  int major, minor, patch;
+#if GOOGLE_CUDA && GOOGLE_TENSORRT
+  int ver = getInferLibVersion();
+  major = ver / 1000;
+  ver = ver - major * 1000;
+  minor = ver / 100;
+  patch = ver - minor * 100;
+#else
+  major = 0;
+  minor = 0;
+  patch = 0;
+#endif
+  return absl::StrCat(major, ".", minor, ".", patch);
+}
+
 }  // namespace tensorrt
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2tensorrt/convert/utils.h b/tensorflow/compiler/tf2tensorrt/convert/utils.h
index eb60829d31d..9015c24b1f4 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/utils.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/utils.h
@@ -17,9 +17,15 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_TF2TENSORRT_CONVERT_UTILS_H_
 
 #include <memory>
+#include <vector>
 
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/status.h"
 
+#if GOOGLE_CUDA && GOOGLE_TENSORRT
+#include "third_party/tensorrt/NvInfer.h"
+#endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
+
 namespace tensorflow {
 namespace tensorrt {
 
@@ -45,6 +51,60 @@ Status TrtPrecisionModeToName(TrtPrecisionMode mode, string* name);
 
 Status TrtPrecisionModeFromName(const string& name, TrtPrecisionMode* mode);
 
+// Define a hash function for vector<TensorShape> because it is used as the key
+// for the engine cache.
+struct VectorTensorShapeHasher {
+  std::size_t operator()(const std::vector<TensorShape>& key) const {
+    return std::hash<std::string>()(TensorShapeUtils::ShapeListString(key));
+  }
+};
+
+#if GOOGLE_CUDA && GOOGLE_TENSORRT
+
+#define IS_TRT_VERSION_GE(major, minor, patch, build)           \
+  ((NV_TENSORRT_MAJOR > major) ||                               \
+   (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR > minor) || \
+   (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && \
+    NV_TENSORRT_PATCH > patch) ||                               \
+   (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && \
+    NV_TENSORRT_PATCH == patch && NV_TENSORRT_BUILD >= build))
+
+string DebugString(const nvinfer1::DimensionType type);
+string DebugString(const nvinfer1::Dims& dims);
+string DebugString(const nvinfer1::DataType trt_dtype);
+string DebugString(const nvinfer1::Permutation& permutation, int len);
+string DebugString(const nvinfer1::ITensor& tensor);
+
+inline bool HasStaticShape(const nvinfer1::Dims& dims) {
+  if (dims.nbDims < 0) return false;
+  for (int d = 0; d < dims.nbDims; ++d) {
+    if (dims.d[d] < 0) return false;
+  }
+  return true;
+}
+
+template <typename TensorShapeType>
+inline nvinfer1::Dims TensorShapeToTrtDims(const TensorShapeType& shape,
+                                           bool ignore_first_dim) {
+  nvinfer1::Dims trt_dims;
+  const int offset = (ignore_first_dim ? 1 : 0);
+  for (int i = offset; i < shape.dims(); i++) {
+    trt_dims.d[i - offset] = shape.dim_size(i);
+  }
+  trt_dims.nbDims = shape.dims() - offset;
+  return trt_dims;
+}
+
+// Return a string that includes compile time
+// TensorRT library version information {Maj, Min, Patch}.
+string GetLinkedTensorRTVersion();
+
+// Return a string that includes runtime time
+// TensorRT library version information {Maj, Min, Patch}.
+string GetLoadedTensorRTVersion();
+
+#endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
+
 }  // namespace tensorrt
 }  // namespace tensorflow
 
diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
index ca591460c65..c14de3a6736 100644
--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
@@ -529,6 +529,25 @@ bool TRTEngineOp::ExecuteTrtEngine(OpKernelContext* ctx,
                                    EngineContext* engine_context) {
   VLOG(1) << "Executing TRT engine: " << name();
   auto& cuda_engine = engine_context->cuda_engine;
+
+  if (VLOG_IS_ON(2)) {
+#if IS_TRT_VERSION_GE(6, 0, 0, 0)
+    VLOG(2) << "  Network name: " << cuda_engine->getName();
+#endif  // #if IS_TRT_VERSION_GE(6, 0, 0, 0)
+    VLOG(2) << "  Activation size: " << cuda_engine->getDeviceMemorySize()
+            << " bytes";
+    VLOG(2) << "  Workspace size: " << cuda_engine->getWorkspaceSize()
+            << " bytes";
+    VLOG(2) << "  Datatype of " << cuda_engine->getNbBindings()
+            << " inputs/outputs";
+    string binding_types = "";
+    for (int i = 0; i < cuda_engine->getNbBindings(); i++) {
+      binding_types += "    " + string(cuda_engine->getBindingName(i)) + ": " +
+                       DebugString(cuda_engine->getBindingDataType(i)) + "\n";
+    }
+    VLOG(2) << binding_types;
+  }
+
   const bool kRetry = true;
   // All inputs must have the same batch size, so just get it from the first
   // input.
@@ -694,6 +713,8 @@ StatusOr<EngineContext*> TRTEngineOp::GetEngine(
   // single element containing the only engine.
   if (static_engine_) {
     if (cache.size()) {
+      // TODO(laigd): need a better shape compatibility check for the case where
+      // implicit batch is disabled.
       if (!use_implicit_batch_ ||
           AreShapesCompatible(input_shapes, cache.begin()->first)) {
         return cache.begin()->second.get();
diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h b/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h
index 8d603ac4d55..808b689127e 100644
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h
+++ b/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h
@@ -114,14 +114,6 @@ class LRUCache {
   }
 };
 
-// Define a hash function for vector<TensorShape> because it is used as the key
-// for the engine cache.
-struct VectorTensorShapeHasher {
-  std::size_t operator()(const std::vector<TensorShape>& key) const {
-    return std::hash<std::string>()(TensorShapeUtils::ShapeListString(key));
-  }
-};
-
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
 
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index c509afbc33a..afe96952358 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -627,7 +627,7 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags",
         "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:session_options",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 242448e443e..dbc8397441f 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -48,6 +48,7 @@ tf_kernel_library(
         "function_ops.cc",
         "gather_op.cc",
         "gather_op_helpers.h",
+        "gather_scatter_ops.cc",
         "identity_op.cc",
         "image_ops.cc",
         "image_resize_ops.cc",
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
index 4f79ce109fb..dda0d79337a 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -512,22 +512,26 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
         filter_in_depth = filter_shape.dimensions(attrs.num_spatial_dims),
         feature_group_count = in_depth / filter_in_depth;
 
+  // In the case of depthwise convolutions, the computation can be done by the
+  // batch_group_count parameter.
+  bool use_batch_group_count = in_depth > 1 && in_depth == filter_in_depth &&
+                               (feature_group_count != 1 || attrs.depthwise);
+
+  if (use_batch_group_count) {
+    feature_group_count = 1;
+  }
+
   // The activations (inputs) form the LHS of the convolution.
   // Activations have shape: [batch, in_rows, in_cols, ..., in_depth]
   // For the gradient computation, we need to:
   // 1. In the case of group convolution, move the num_groups dimension before
   // the batch dimension
   // 2. Swap the roles of the batch and feature dimensions.
-  if (feature_group_count != 1 && !attrs.depthwise) {
+  if (!use_batch_group_count && feature_group_count != 1 && !attrs.depthwise) {
     activations = TransposeInputForGroupConvolutionBackpropFilter(
         activations, input_shape, feature_group_count, n_dim, c_dim);
   }
 
-  // In the case of depthwise convolution with no multiplier,
-  // the computation can be done by the batch_group_count parameter.
-  bool use_batch_group_count =
-      filter_tensor_shape.dim_size(num_dims - 1) == 1 && attrs.depthwise;
-
   std::vector<std::pair<int64, int64>> padding(attrs.num_spatial_dims);
   std::vector<int64> rhs_dilation(attrs.num_spatial_dims);
   std::vector<int64> window_strides(attrs.num_spatial_dims);
diff --git a/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc b/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc
new file mode 100644
index 00000000000..19aa85f9d42
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc
@@ -0,0 +1,102 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+namespace {
+
+class GatherOp : public XlaOpKernel {
+ public:
+  explicit GatherOp(OpKernelConstruction* context) : XlaOpKernel(context) {
+    string dnums_attr;
+    OP_REQUIRES_OK(context, context->GetAttr("dimension_numbers", &dnums_attr));
+    OP_REQUIRES(
+        context, dnums_.ParsePartialFromString(dnums_attr),
+        errors::InvalidArgument("Error parsing gather dimension numbers"));
+    OP_REQUIRES_OK(
+        context, context->GetAttr("indices_are_sorted", &indices_are_sorted_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    std::vector<int64> slice_sizes;
+    OP_REQUIRES_OK(ctx,
+                   ctx->ConstantInputAsIntVector("slice_sizes", &slice_sizes));
+    xla::XlaOp result =
+        xla::Gather(ctx->Input("operand"), ctx->Input("start_indices"), dnums_,
+                    slice_sizes, indices_are_sorted_);
+    ctx->SetOutput(0, result);
+  }
+
+ private:
+  xla::GatherDimensionNumbers dnums_;
+  bool indices_are_sorted_;
+};
+
+REGISTER_XLA_OP(Name("XlaGather"), GatherOp);
+
+class ScatterOp : public XlaOpKernel {
+ public:
+  explicit ScatterOp(OpKernelConstruction* context) : XlaOpKernel(context) {
+    OP_REQUIRES_OK(
+        context, context->GetAttr("update_computation", &update_computation_));
+    string dnums_attr;
+    OP_REQUIRES_OK(context, context->GetAttr("dimension_numbers", &dnums_attr));
+    OP_REQUIRES(
+        context, dnums_.ParsePartialFromString(dnums_attr),
+        errors::InvalidArgument("Error parsing scatter dimension numbers"));
+    OP_REQUIRES_OK(
+        context, context->GetAttr("indices_are_sorted", &indices_are_sorted_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    const DataType dtype = ctx->input_type(0);
+
+    XlaCompiler::Argument update_computation_arg;
+    update_computation_arg.kind = XlaCompiler::Argument::kParameter;
+    update_computation_arg.type = dtype;
+    update_computation_arg.shape = TensorShape();
+
+    XlaCompiler::CompileOptions compile_options;
+    compile_options.use_tuple_arg = false;
+    compile_options.always_return_tuple = false;
+    compile_options.is_entry_computation = false;
+    XlaCompiler::CompilationResult update_computation;
+    OP_REQUIRES_OK(ctx, ctx->compiler()->CompileFunction(
+                            compile_options, *update_computation_,
+                            {update_computation_arg, update_computation_arg},
+                            &update_computation));
+
+    xla::XlaOp result =
+        xla::Scatter(ctx->Input("operand"), ctx->Input("scatter_indices"),
+                     ctx->Input("updates"), *update_computation.computation,
+                     dnums_, indices_are_sorted_);
+    ctx->SetOutput(0, result);
+  }
+
+ private:
+  const NameAttrList* update_computation_;
+  xla::ScatterDimensionNumbers dnums_;
+  bool indices_are_sorted_;
+};
+
+REGISTER_XLA_OP(Name("XlaScatter"), ScatterOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc
index e6076907980..83a894e91fe 100644
--- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc
@@ -111,6 +111,11 @@ XLAJIT_MAKE_UNARY(Real, xla::Real(x));
 XLAJIT_MAKE_UNARY(Imag, xla::Imag(x));
 XLAJIT_MAKE_UNARY(Erf, xla::Erf(x));
 XLAJIT_MAKE_UNARY(Erfc, xla::Erfc(x));
+XLAJIT_MAKE_UNARY(Erfinv, xla::ErfInv(x));
+// ndtri = sqrt(2) * erfinv(2 * x - 1)
+XLAJIT_MAKE_UNARY(Ndtri, xla::ScalarLike(x, std::sqrt(2.0)) *
+                             xla::ErfInv(xla::ScalarLike(x, 2.0) * x -
+                                         xla::ScalarLike(x, 1.0)));
 XLAJIT_MAKE_UNARY(Lgamma, xla::Lgamma(x));
 XLAJIT_MAKE_UNARY(Digamma, xla::Digamma(x));
 XLAJIT_MAKE_UNARY(BesselI0e, xla::BesselI0e(x));
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index 33b740a706c..6b71cca9c2a 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -665,5 +665,50 @@ REGISTER_OP("XlaReplicaId")
     })
     .Doc("Replica ID.");
 
+REGISTER_OP("XlaGather")
+    .Input("operand: T")
+    .Input("start_indices: Tindices")
+    .Input("slice_sizes: Tindices")
+    .Attr("dimension_numbers: string")
+    .Attr("indices_are_sorted: bool")
+    .Attr("T: numbertype")
+    .Attr("Tindices: {int32, int64}")
+    .Output("output: T")
+    .SetShapeFn(UnchangedRank)
+    .Doc(R"doc(
+Wraps the XLA Gather operator documented at
+  https://www.tensorflow.org/xla/operation_semantics#gather
+operand: The array we're gathering from.
+start_indices: Array containing the starting indices of the slices we gather.
+dimension_numbers: A serialized xla::GatherDimensionNumbers proto.
+slice_sizes: slice_sizes[i] is the bounds for the slice on dimension i.
+indices_are_sorted: Boolean indicating if the indices are sorted.
+)doc");
+
+REGISTER_OP("XlaScatter")
+    .Input("operand: T")
+    .Input("scatter_indices: Tindices")
+    .Input("updates: T")
+    .Attr("update_computation: func")
+    .Attr("dimension_numbers: string")
+    .Attr("indices_are_sorted: bool")
+    .Attr("T: numbertype")
+    .Attr("Tindices: {int32, int64}")
+    .Output("output: T")
+    .SetShapeFn(UnchangedRank)
+    .Doc(R"doc(
+Wraps the XLA Scatter operator documented at
+  https://www.tensorflow.org/xla/operation_semantics#scatter.
+
+operand: Array to be scattered into.
+scatter_indices: Array containing the starting indices of the slices that must
+  be scattered to.
+updates: Array containing the values that must be used for scattering.
+update_computation: Computation to be used for combining the existing values in
+  the input array and the updates during scatter.
+dimension_numbers: A serialized xla::ScatterDimensionNumbers proto.
+indices_are_sorted: Boolean indicating if the indices are sorted.
+)doc");
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index 24f1e7b41ec..bf258482e56 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -81,7 +81,8 @@ ceil = _unary_op(math_ops.ceil)
 digamma = _unary_op(math_ops.digamma)
 erf = _unary_op(math_ops.erf)
 erfc = _unary_op(math_ops.erfc)
-# TODO(phawkins): implement erfinv
+erfinv = _unary_op(math_ops.erfinv)
+ndtri = _unary_op(math_ops.ndtri)
 exp = _unary_op(math_ops.exp)
 expm1 = _unary_op(math_ops.expm1)
 floor = _unary_op(math_ops.floor)
@@ -415,3 +416,27 @@ sort = gen_xla_ops.xla_sort
 key_value_sort = gen_xla_ops.xla_key_value_sort
 while_loop = gen_xla_ops.xla_while
 dequantize = gen_xla_ops.xla_dequantize
+
+
+def gather(operand, start_indices, dimension_numbers, slice_sizes,
+           indices_are_sorted=False, name=None):
+  return gen_xla_ops.xla_gather(
+      operand,
+      start_indices,
+      slice_sizes=slice_sizes,
+      dimension_numbers=dimension_numbers.SerializeToString(),
+      indices_are_sorted=indices_are_sorted,
+      name=name)
+
+
+def scatter(operand, scatter_indices, updates, update_computation,
+            dimension_numbers, indices_are_sorted=False, name=None):
+  return gen_xla_ops.xla_scatter(
+      operand,
+      scatter_indices,
+      updates,
+      update_computation=update_computation,
+      dimension_numbers=dimension_numbers.SerializeToString(),
+      indices_are_sorted=indices_are_sorted,
+      name=name)
+
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 4e2866865a2..3a430c36a82 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -417,7 +417,6 @@ cc_library(
         ":array3d",
         ":array4d",
         ":shape_util",
-        ":sparse_index_array",
         ":status_macros",
         ":types",
         ":util",
@@ -463,7 +462,6 @@ cc_library(
         ":array4d",
         ":literal",
         ":shape_util",
-        ":sparse_index_array",
         ":status_macros",
         ":types",
         ":util",
@@ -840,29 +838,6 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "sparse_index_array",
-    srcs = ["sparse_index_array.cc"],
-    hdrs = ["sparse_index_array.h"],
-    deps = [
-        ":array2d",
-        ":shape_util",
-        ":xla_data_proto_cc",
-        "@com_google_absl//absl/container:inlined_vector",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
-tf_cc_test(
-    name = "sparse_index_array_test",
-    srcs = ["sparse_index_array_test.cc"],
-    deps = [
-        ":sparse_index_array",
-        ":test",
-        "//tensorflow/core:test_main",
-    ],
-)
-
 cc_library(
     name = "parse_flags_from_env",
     srcs = ["parse_flags_from_env.cc"],
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index fd31fb17bba..47fe026385e 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -129,7 +129,7 @@ cc_library(
         "//tensorflow/stream_executor:device_memory_allocator",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:span",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -147,7 +147,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:compiler",
         "//tensorflow/core:stream_executor_no_cuda",
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -253,6 +253,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_matchers",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
diff --git a/tensorflow/compiler/xla/client/lib/math.cc b/tensorflow/compiler/xla/client/lib/math.cc
index 8c85482c8f8..9153ac9e524 100644
--- a/tensorflow/compiler/xla/client/lib/math.cc
+++ b/tensorflow/compiler/xla/client/lib/math.cc
@@ -15,9 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/client/lib/math.h"
 
-// This macro is required to make MSVC defines math constants in math.h
-#define _USE_MATH_DEFINES
-#include <math.h>
+#include <cmath>
 
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
diff --git a/tensorflow/compiler/xla/client/lib/tridiagonal.cc b/tensorflow/compiler/xla/client/lib/tridiagonal.cc
index d2ea6d57069..13cc3630137 100644
--- a/tensorflow/compiler/xla/client/lib/tridiagonal.cc
+++ b/tensorflow/compiler/xla/client/lib/tridiagonal.cc
@@ -36,6 +36,8 @@ namespace {
 struct TridiagonalSystemShape {
   const int64 rank;
   const int64 num_equations;
+  TridiagonalSystemShape(int64 rk, int64 num_eqs)
+      : rank(rk), num_equations(num_eqs) {}
 };
 
 Status CheckSecondToLastDimension(const Shape& op_shape, int64 rank,
@@ -109,9 +111,7 @@ StatusOr<TridiagonalSystemShape> CheckSystemAndReturnShape(XlaOp lower_diagonal,
   TF_RETURN_IF_ERROR(CheckSecondToLastDimension(upper_diagonal_shape, rank, 1,
                                                 "upper diagonal"));
 
-  TridiagonalSystemShape result = {.rank = rank,
-                                   .num_equations = num_equations};
-  return result;
+  return TridiagonalSystemShape(rank, num_equations);
 }
 
 XlaOp Coefficient(XlaOp operand, int64 i) {
diff --git a/tensorflow/compiler/xla/g3doc/index.md b/tensorflow/compiler/xla/g3doc/index.md
index 39715fbe7a9..38c6672685d 100644
--- a/tensorflow/compiler/xla/g3doc/index.md
+++ b/tensorflow/compiler/xla/g3doc/index.md
@@ -81,32 +81,19 @@ For a detailed usage example, see the
 ### Explicit compilation
 
 Explicit compilation API offers a more fine-grained control for choosing which
-functions should be compiled with XLA. However, it requires restructuring source
-code, as not all TensorFlow operations can be represented in XLA. That is, using
-explicit compilation on API on functions which can not be represented in XLA
-results in an exception.
+functions should be compiled with XLA. However, it might require restructuring
+of the source code, as not all TensorFlow operations can be represented in XLA.
 
-#### TF2: Use `@tf.function(experimental_compile=True)`
+Note: Using the explicit compilation on API on functions which can not be
+represented in XLA results in an exception.
 
 Optimizing sections of the program using
 [`tf.function`](https://www.tensorflow.org/api_docs/python/tf/function) is a
-standard approach for
-[improving performance](https://www.tensorflow.org/tutorials/customization/performance)
-of TF2 programs. You can enable compilation with XLA by setting the
-`experimental_compile` argument of `tf.function` to `True`.
-
-Note: `experimental_compile` only works in
-[eager](https://www.tensorflow.org/guide/eager) mode.
-
-#### TF1: Use `xla.compile`
-
-If you are using TF1, you can use the `xla.compile` API for explicit compilation
-using XLA. See the [tutorial colab](./tutorials/xla_compile.ipynb) for usage
-examples.
-
-Note: Gradient computation of graph in `xla.compile()` is prohibited because it
-can cause performance degradation. To avoid this issue, move gradient
-computation inside `xla.compile()`.
+standard approach for [improving
+performance](https://www.tensorflow.org/tutorials/customization/performance) of
+TF2 programs. You can enable compilation with XLA by setting the
+`experimental_compile` argument of `tf.function` to `True`. See the [tutorial
+colab](./tutorials/experimental_compile.ipynb) for usage examples.
 
 ### AOT (Ahead-of-time) compilation for CPU with `tfcompile`
 
diff --git a/tensorflow/compiler/xla/g3doc/operation_semantics.md b/tensorflow/compiler/xla/g3doc/operation_semantics.md
index ee7b2b20928..0185bb4bb2f 100644
--- a/tensorflow/compiler/xla/g3doc/operation_semantics.md
+++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md
@@ -2053,8 +2053,8 @@ window_strides, padding)` </b>
 :                     :                     : as to have the same output shape :
 :                     :                     : as input if the stride is 1, or  :
 :                     :                     : Padding\:\:kValid, which uses no :
-:                     :                     : no padding and "stops" the       :
-:                     :                     : window once it no longer fits)   :
+:                     :                     : padding and "stops" the window   :
+:                     :                     : once it no longer fits)          :
 
 Below code and figure shows an example of using `ReduceWindow`. Input is a
 matrix of size [4x6] and both window_dimensions and window_stride_dimensions are
diff --git a/tensorflow/compiler/xla/g3doc/tutorials/experimental_compile.ipynb b/tensorflow/compiler/xla/g3doc/tutorials/experimental_compile.ipynb
new file mode 100644
index 00000000000..c8c08fc3ffa
--- /dev/null
+++ b/tensorflow/compiler/xla/g3doc/tutorials/experimental_compile.ipynb
@@ -0,0 +1,268 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Using XLA with tf.function",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "metadata": {
+        "colab_type": "text",
+        "id": "f4TSNCvpENrW"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "##### Copyright 2019 The TensorFlow Authors."
+      ]
+    },
+    {
+      "metadata": {
+        "cellView": "form",
+        "colab_type": "code",
+        "id": "vamNSA0vEP-m",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "e1oSi4lHFt3z"
+      },
+      "source": [
+        "# Using XLA via `tf.function` and `experimental_compile`"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sDy5lSBd4BDE",
+        "colab_type": "text"
+      },
+      "source": [
+        "In this colab, we train a TensorFlow model to classify the MNIST dataset, where the training function is compiled using XLA.\n",
+        "\n",
+        "We start by loading TensorFlow, with eager execution enabled."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "b7noD9NjFRL-"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/xla/tutorials/xla_compile\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/compiler/xla/g3doc/tutorials/xla_compile.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/xla/g3doc/tutorials/xla_compile.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "45kUPj5ZFrRa"
+      },
+      "source": [
+        "import tensorflow as tf\n",
+        "\n",
+        "tf.enable_eager_execution()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "GZVNiRmTDV-5"
+      },
+      "source": [
+        "Then, we define some necessary constants and prepare the MNIST dataset."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "f37TSEGvGX4_",
+        "colab": {}
+      },
+      "source": [
+        "# Size of each input image, 28 x 28 pixels\n",
+        "IMAGE_SIZE = 28 * 28\n",
+        "# Number of distinct number labels, [0..9]\n",
+        "NUM_CLASSES = 10\n",
+        "# Number of examples in each training batch (step)\n",
+        "TRAIN_BATCH_SIZE = 100\n",
+        "# Number of training steps to run\n",
+        "TRAIN_STEPS = 1000\n",
+        "\n",
+        "# Loads MNIST dataset.\n",
+        "train, test = tf.keras.datasets.mnist.load_data()\n",
+        "train_ds = tf.data.Dataset.from_tensor_slices(train).batch(TRAIN_BATCH_SIZE).repeat()\n",
+        "\n",
+        "# Casting from raw data to the required datatypes.\n",
+        "def cast(images, labels):\n",
+        "  images = tf.cast(\n",
+        "      tf.reshape(images, [-1, IMAGE_SIZE]), tf.float32)\n",
+        "  labels = tf.cast(labels, tf.int64)\n",
+        "  return (images, labels)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lv7I-u_82v1S",
+        "colab_type": "text"
+      },
+      "source": [
+        "Finally, we define the model and the optimizer. For the model, we shall use a single dense layer."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "7O2NcEfG206Q",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "layer = tf.keras.layers.Dense(NUM_CLASSES)\n",
+        "optimizer = tf.keras.optimizers.Adam()\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "x_ZehpZP-SfS"
+      },
+      "source": [
+        "# Define the training function\n",
+        "\n",
+        "In the training function, we get predicted labels using the layer defined above, and then we minimize the gradient of the loss using the optimizer. In order to compile the computation using XLA, we place it inside `tf.function` with `experimental_compile=True`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "ZbhJl_WvGa3g",
+        "colab": {}
+      },
+      "source": [
+        "@tf.function(experimental_compile=True)\n",
+        "def train_mnist(images, labels):\n",
+        "    images, labels = cast(images, labels)\n",
+        "\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      predicted_labels = layer(images)\n",
+        "      loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
+        "          logits=predicted_labels, labels=labels\n",
+        "      ))\n",
+        "    layer_variables = layer.trainable_variables\n",
+        "    grads = tape.gradient(loss, layer_variables)\n",
+        "    optimizer.apply_gradients(zip(grads, layer_variables))\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "EZD1m_n1DxAF"
+      },
+      "source": [
+        "# Train and test the model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gukC2Hol3sFZ",
+        "colab_type": "text"
+      },
+      "source": [
+        "Once we have defined the training function, we can define the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "qe28bAHNHUG2",
+        "colab": {}
+      },
+      "source": [
+        "for images, labels in train_ds:\n",
+        "  if optimizer.iterations > TRAIN_STEPS:\n",
+        "    break\n",
+        "  train_mnist(images, labels)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "qgsKmz3n2UiW"
+      },
+      "source": [
+        "And, finally, check the accuracy:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "_GxF6jTRHVuA"
+      },
+      "source": [
+        "images, labels = cast(test[0], test[1])\n",
+        "predicted_labels = layer(images)\n",
+        "correct_prediction = tf.equal(tf.argmax(predicted_labels, 1), labels)\n",
+        "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
+        "print(\"Prediction accuracy after training: %s\" % accuracy)"
+      ],
+      "execution_count": 0
+    }
+  ]
+}
diff --git a/tensorflow/compiler/xla/g3doc/tutorials/xla_compile.ipynb b/tensorflow/compiler/xla/g3doc/tutorials/xla_compile.ipynb
deleted file mode 100644
index 715585db337..00000000000
--- a/tensorflow/compiler/xla/g3doc/tutorials/xla_compile.ipynb
+++ /dev/null
@@ -1,373 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "The XLA compile API",
-      "version": "0.3.2",
-      "provenance": [],
-      "collapsed_sections": [],
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    }
-  },
-  "cells": [
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "f4TSNCvpENrW"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "##### Copyright 2018 The TensorFlow Authors."
-      ]
-    },
-    {
-      "metadata": {
-        "cellView": "form",
-        "colab_type": "code",
-        "id": "vamNSA0vEP-m",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "e1oSi4lHFt3z"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "# The XLA compile API"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "b7noD9NjFRL-"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/xla/tutorials/xla_compile\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/compiler/xla/g3doc/tutorials/xla_compile.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/xla/g3doc/tutorials/xla_compile.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "v9YbsuLZaBXy"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "\n",
-        "\n",
-        "Import TensorFlow and the XLA library. XLA contains `xla.compile()`, an API that compiles part or all of a model with [XLA](https://www.tensorflow.org/extend/xla/)."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "45kUPj5ZFrRa",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "import tensorflow as tf\n",
-        "\n",
-        "from tensorflow.contrib.compiler import xla"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "GZVNiRmTDV-5"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "Define some necessary constants and prepare the MNIST dataset."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "f37TSEGvGX4_",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "# Size of each input image, 28 x 28 pixels\n",
-        "IMAGE_SIZE = 28 * 28\n",
-        "# Number of distinct number labels, [0..9]\n",
-        "NUM_CLASSES = 10\n",
-        "# Number of examples in each training batch (step)\n",
-        "TRAIN_BATCH_SIZE = 100\n",
-        "# Number of training steps to run\n",
-        "TRAIN_STEPS = 1000"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "TiVXchblG5hK",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "# Loads MNIST dataset.\n",
-        "train, test = tf.keras.datasets.mnist.load_data()\n",
-        "train_ds = tf.data.Dataset.from_tensor_slices(train).batch(TRAIN_BATCH_SIZE).repeat()\n",
-        "test_ds = tf.data.Dataset.from_tensor_slices(test).batch(TRAIN_BATCH_SIZE)\n",
-        "\n",
-        "iterator = tf.data.Iterator.from_structure(train_ds.output_types, train_ds.output_shapes)\n",
-        "images, labels = iterator.get_next()\n",
-        "images = tf.reshape(images, [-1, IMAGE_SIZE])\n",
-        "images, labels = tf.cast(images, tf.float32), tf.cast(labels, tf.int64)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "x_ZehpZP-SfS"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "# Define the model constructing function\n",
-        "\n",
-        "Following code block contains a function that constructs a simple model with one dense layer, including both forward and backward propagation.\n",
-        "\n",
-        "When called, it returns two values. `y` is a `tf.Tensor` representing predicted probability of each target class, `train_step` is a `tf.Operation` that increments `global_step` and applies variable update."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "ZbhJl_WvGa3g",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "def build_mnist_model(x, y_):\n",
-        "  y = tf.keras.layers.Dense(NUM_CLASSES).apply(x)\n",
-        "\n",
-        "  cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)\n",
-        "  train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)\n",
-        "\n",
-        "  return y, train_step"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "7Jh3lyQHDfM9"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "# Enable XLA\n",
-        "\n",
-        "Use `xla.compile` with the `build_mnist_model` function to enable XLA. Following code block wraps the model with `xla.compile()`, which allows the target function with provided inputs to be executed by XLA."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "kYpCXCdRHNuN",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "[y] = xla.compile(build_mnist_model, inputs=[images, labels])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "4giQh62IrZGF"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "When compiling the graph, XLA replaces all the graph nodes constructed in the target function with a few XLA ops.\n",
-        "\n",
-        "xla.compile does not return any\n",
-        "`tf.Operation` nodes that can be executed independently from the generated XLA ops. Instead, returned `tf.Operation` nodes from the target function are added as control dependencies of all returned `tf.Tensor` values. This triggers execution of the `tf.Operation` nodes when the returned tensors are evaluated.\n",
-        "\n",
-        "In pseudo-code, xla.compile's implementation looks as follows:\n",
-        "\n",
-        "---\n",
-        "```\n",
-        "# Ask Tensorflow to execute code in XLA-friendly manner\n",
-        "\n",
-        "y, train_step = build_mnist_model(images, labels)\n",
-        "with tf.control_dependencies([train_step]):\n",
-        "  y = tf.identity(y)\n",
-        "\n",
-        "# Ask Tensorflow to STOP executing code in XLA-friendly manner\n",
-        "```\n",
-        "---\n",
-        "\n",
-        "xla.compile() always returns a list of `tf.Tensor`'s (even if there is only one-element)."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "TPGas4jjFLZl"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "If you were to print the constructed graph now, you will see that it is not much different from a normal Tensorflow graph and you won't be able to find XLA ops mentioned before. This is because the actual compilation happens later when you try to execute the graph with `sess.run()`.  At that time, Tensorflow triggers a series of graph rewrite passes that actually generate XLA ops, which compiles and executes computation when all inputs are ready."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "EZD1m_n1DxAF"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "# Train and test the model"
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "qe28bAHNHUG2",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "# Creates session and initialize all variables.\n",
-        "# xla.compile() doesn't work with Keras model.fit() API or TF eager mode yet.\n",
-        "sess = tf.Session()\n",
-        "sess.run(tf.global_variables_initializer())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "metadata": {
-        "colab_type": "text",
-        "id": "qgsKmz3n2UiW"
-      },
-      "cell_type": "markdown",
-      "source": [
-        "Following code block trains model. Evaluating `y` also triggers its control dependency node `train_step`, which updates model variables."
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "_GxF6jTRHVuA",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        },
-        "outputId": "fbf299ca-02d5-4e95-f9fe-8f3c0432d132"
-      },
-      "cell_type": "code",
-      "source": [
-        "# Feeds training dataset\n",
-        "sess.run(iterator.make_initializer(train_ds))\n",
-        "\n",
-        "# Runs TRAIN_STEPS steps\n",
-        "for i in range(TRAIN_STEPS):\n",
-        "  sess.run(y)\n",
-        "\n",
-        "print(\"Model trained for %s steps.\" % TRAIN_STEPS)"
-      ],
-      "execution_count": 21,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Model trained for 1000 steps.\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "dHlQlRSRHXD1",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        },
-        "outputId": "9c3677a2-ec84-406f-9d2c-d722844f3093"
-      },
-      "cell_type": "code",
-      "source": [
-        "# Tests trained model\n",
-        "\n",
-        "# Feeds testing dataset\n",
-        "sess.run(iterator.make_initializer(test_ds))\n",
-        "\n",
-        "# Calculates accuracy\n",
-        "correct_prediction = tf.equal(tf.argmax(y, 1), labels)\n",
-        "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
-        "print(\"Prediction accuracy after training: %s\" % sess.run(accuracy))"
-      ],
-      "execution_count": 22,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Prediction accuracy after training: 0.91\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "metadata": {
-        "colab_type": "code",
-        "id": "ynJQIuzjHYOb",
-        "colab": {}
-      },
-      "cell_type": "code",
-      "source": [
-        "# Cleans up session\n",
-        "sess.close()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    }
-  ]
-}
diff --git a/tensorflow/compiler/xla/layout.cc b/tensorflow/compiler/xla/layout.cc
index 5f0b5c62187..d234e729688 100644
--- a/tensorflow/compiler/xla/layout.cc
+++ b/tensorflow/compiler/xla/layout.cc
@@ -52,7 +52,6 @@ string Tile::ToString() const {
   for (const int64 dimension : proto.minor_to_major()) {
     layout.add_minor_to_major(dimension);
   }
-  layout.set_max_sparse_elements(proto.max_sparse_elements());
   for (const TileProto& tile_proto : proto.tiles()) {
     *layout.add_tiles() = Tile::CreateFromProto(tile_proto);
   }
@@ -68,7 +67,6 @@ LayoutProto Layout::ToProto() const {
   for (const int64 dimension : minor_to_major()) {
     proto.add_minor_to_major(dimension);
   }
-  proto.set_max_sparse_elements(max_sparse_elements_);
   for (const Tile& tile : tiles()) {
     *proto.add_tiles() = tile.ToProto();
   }
@@ -78,10 +76,7 @@ LayoutProto Layout::ToProto() const {
 }
 
 string Layout::ToString() const {
-  if (format() == SPARSE) {
-    CHECK_EQ(tiles_size(), 0) << "Sparse layout should not be tiled.";
-    return absl::StrCat("sparse{", max_sparse_elements(), "}");
-  } else if (format() == DENSE) {
+  if (format() == DENSE) {
     string colon_string = tiles().empty() ? "" : "T";
     for (Tile tile : tiles()) {
       absl::StrAppend(&colon_string, tile.ToString());
@@ -107,10 +102,6 @@ bool Layout::Equal::operator()(const Layout& lhs, const Layout& rhs) {
   if (lhs.format() == DENSE && lhs.minor_to_major() != rhs.minor_to_major()) {
     return false;
   }
-  if (lhs.format() == SPARSE &&
-      lhs.max_sparse_elements() != rhs.max_sparse_elements()) {
-    return false;
-  }
   if (!ignore_tiles_ && lhs.tiles() != rhs.tiles()) {
     return false;
   }
diff --git a/tensorflow/compiler/xla/layout.h b/tensorflow/compiler/xla/layout.h
index 1234d01755b..fd6d62ac2f7 100644
--- a/tensorflow/compiler/xla/layout.h
+++ b/tensorflow/compiler/xla/layout.h
@@ -203,12 +203,6 @@ class Layout {
   absl::Span<const Tile> tiles() const { return tiles_; }
   absl::InlinedVector<Tile, 2>* mutable_tiles() { return &tiles_; }
 
-  // Methods for accessing the int64 fields.
-  int64 max_sparse_elements() const { return max_sparse_elements_; }
-  Layout& set_max_sparse_elements(int64 value) {
-    max_sparse_elements_ = value;
-    return *this;
-  }
   int64 element_size_in_bits() const { return element_size_in_bits_; }
   Layout& set_element_size_in_bits(int64 value) {
     element_size_in_bits_ = value;
@@ -233,8 +227,7 @@ class Layout {
 
   template <typename H>
   friend H AbslHashValue(H h, const Layout& l) {
-    return H::combine(std::move(h), l.format_, l.minor_to_major_,
-                      l.max_sparse_elements_, l.tiles_,
+    return H::combine(std::move(h), l.format_, l.minor_to_major_, l.tiles_,
                       l.element_size_in_bits_);
   }
 
@@ -255,11 +248,6 @@ class Layout {
   // And the major dim is [8,100,100,3][1], which is size 100.
   absl::InlinedVector<int64, 6> minor_to_major_;
 
-  // The maximum number of elements that can be stored for SPARSE formats.  This
-  // can be used to determine the maximum size in bytes of arrays stored in
-  // memory.  This field must be zero unless the format is SPARSE.
-  int64 max_sparse_elements_ = 0;
-
   // The tiles used in tiling-based layout.
   absl::InlinedVector<Tile, 2> tiles_;
 
diff --git a/tensorflow/compiler/xla/layout_test.cc b/tensorflow/compiler/xla/layout_test.cc
index 26805c5c0a2..7bcc19c9725 100644
--- a/tensorflow/compiler/xla/layout_test.cc
+++ b/tensorflow/compiler/xla/layout_test.cc
@@ -34,8 +34,6 @@ class LayoutTest : public ::testing::Test {};
 TEST_F(LayoutTest, ToString) {
   EXPECT_EQ(Layout().ToString(), "invalid{}");
   EXPECT_EQ(Layout({4, 5, 6}).ToString(), "{4,5,6}");
-  EXPECT_EQ(Layout().set_format(SPARSE).set_max_sparse_elements(123).ToString(),
-            "sparse{123}");
   EXPECT_EQ(Layout({4, 5, 6}).ToString(), "{4,5,6}");
   EXPECT_EQ(Layout({3, 2, 1, 0}, {Tile({42, 123}), Tile({4, 5})}).ToString(),
             "{3,2,1,0:T(42,123)(4,5)}");
@@ -65,11 +63,6 @@ TEST_F(LayoutTest, StreamOut) {
   }
 }
 
-TEST_F(LayoutTest, SparseLayoutMaxElements) {
-  EXPECT_EQ(LayoutUtil::MaxSparseElements(LayoutUtil::MakeSparseLayout(101)),
-            101);
-}
-
 TEST_F(LayoutTest, Equality) {
   EXPECT_EQ(Layout(), Layout());
   const std::vector<int64> empty_dims;
@@ -90,12 +83,6 @@ TEST_F(LayoutTest, Equality) {
             Layout({0, 1, 2}).set_memory_space(3));
   EXPECT_NE(Layout({0, 1, 2}).set_memory_space(1),
             Layout({0, 1, 2}).set_memory_space(3));
-  EXPECT_EQ(Layout().set_format(SPARSE), Layout().set_format(SPARSE));
-  EXPECT_EQ(Layout().set_format(SPARSE).set_max_sparse_elements(42),
-            Layout().set_format(SPARSE).set_max_sparse_elements(42));
-  EXPECT_NE(Layout().set_format(SPARSE).set_max_sparse_elements(42),
-            Layout().set_format(SPARSE).set_max_sparse_elements(24));
-
   EXPECT_FALSE(
       Layout::Equal()(Layout({0, 1, 2}, {Tile({42, 44})}), Layout({0, 1, 2})));
   EXPECT_TRUE(Layout::Equal().IgnoreTiles()(Layout({0, 1, 2}, {Tile({42, 44})}),
@@ -117,8 +104,6 @@ TEST_F(LayoutTest, LayoutToFromProto) {
 
   expect_unchanged(Layout());
   expect_unchanged(Layout({1, 3, 2, 0}));
-  expect_unchanged(Layout().set_format(SPARSE));
-  expect_unchanged(Layout().set_format(SPARSE).set_max_sparse_elements(123));
   expect_unchanged(Layout({0, 1}).set_element_size_in_bits(42));
   expect_unchanged(Layout({3, 2, 1, 0}, {Tile({42, 123}), Tile({4, 5})}));
 }
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 45572d9062e..6f8ece1bb10 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -94,13 +94,6 @@ void SetDefaultLayoutToContainer(T* minor_to_major) {
   return layout;
 }
 
-/* static */ Layout LayoutUtil::MakeSparseLayout(int64 max_sparse_elements) {
-  Layout layout;
-  layout.set_format(SPARSE);
-  layout.set_max_sparse_elements(max_sparse_elements);
-  return layout;
-}
-
 namespace {
 
 // Internal helper that creates a default layout for an array of the given rank.
@@ -293,19 +286,6 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
                         layout.minor_to_major().end(), std::greater<int64>());
 }
 
-/* static */ bool LayoutUtil::IsSparseArray(const Shape& shape) {
-  return shape.IsArray() && shape.has_layout() && IsSparse(shape.layout());
-}
-
-/* static */ bool LayoutUtil::IsSparse(const Layout& layout) {
-  return layout.format() == SPARSE;
-}
-
-/* static */ int64 LayoutUtil::MaxSparseElements(const Layout& layout) {
-  CHECK(IsSparse(layout));
-  return layout.max_sparse_elements();
-}
-
 /* static */ bool LayoutUtil::HasLayout(const Shape& shape) {
   if (shape.IsTuple()) {
     // Tuple shape: all subshapes must have a layout.
@@ -461,8 +441,6 @@ Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src, Shape* dst) {
   for (int64 minor_to_major : layout.minor_to_major()) {
     hash_value = Hash64Combine(hash_value, hash<int64>()(minor_to_major));
   }
-  hash_value = Hash64Combine(hash_value, layout.max_sparse_elements());
-
   for (Tile tile : layout.tiles()) {
     for (int64 tile_dim : tile.dimensions()) {
       hash_value = Hash64Combine(hash_value, hash<int64>()(tile_dim));
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index b391220ade9..60e135de354 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -49,10 +49,6 @@ class LayoutUtil {
   // dimensions.
   static Layout MakeDescendingLayout(int64 rank);
 
-  // Creates a sparse layout with the given maximum number of elements. (This is
-  // a convenience function for protobuf construction.)
-  static Layout MakeSparseLayout(int64 max_sparse_elements);
-
   // Returns default layout for the given shape.
   static Layout GetDefaultLayoutForShape(const Shape& shape);
 
@@ -109,17 +105,6 @@ class LayoutUtil {
   //        more minor, and so on until dimension N-1 which is the minor.
   static bool IsMonotonicWithDim0Major(const Layout& layout);
 
-  // Returns whether the given Shape is an array (i.e. not a tuple) and has a
-  // sparse format layout.
-  static bool IsSparseArray(const Shape& shape);
-
-  // Returns whether the given Layout has a sparse format.
-  static bool IsSparse(const Layout& layout);
-
-  // Returns the maximum number of elements that can be stored in a sparse
-  // layout.
-  static int64 MaxSparseElements(const Layout& layout);
-
   // Returns whether the given shape has a layout. For tuple shapes, true is
   // returned only if all elements have layouts.
   static bool HasLayout(const Shape& shape);
diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc
index 12da2140636..398baa13fca 100644
--- a/tensorflow/compiler/xla/layout_util_test.cc
+++ b/tensorflow/compiler/xla/layout_util_test.cc
@@ -33,14 +33,6 @@ class LayoutUtilTest : public ::testing::Test {
     *shape.mutable_layout() = LayoutUtil::MakeLayout(minor_to_major);
     return shape;
   }
-
-  Shape MakeShapeWithSparseLayout(PrimitiveType element_type,
-                                  absl::Span<const int64> dimensions,
-                                  int64 max_sparse_elements) {
-    Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
-    *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
-    return shape;
-  }
 };
 
 TEST_F(LayoutUtilTest, TupleLayoutComparison) {
@@ -92,29 +84,6 @@ TEST_F(LayoutUtilTest, CopyLayoutArray) {
   EXPECT_FALSE(dst.has_layout());
 }
 
-TEST_F(LayoutUtilTest, CopyLayoutSparse) {
-  Shape src = MakeShapeWithSparseLayout(F32, {2, 3}, 2);
-  Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
-
-  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
-  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-
-  // Should work if destination has no layout.
-  dst.clear_layout();
-  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
-  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-
-  // If source is cleared, then destination should be cleared.
-  src.clear_layout();
-  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-  EXPECT_TRUE(dst.has_layout());
-  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
-  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-  EXPECT_FALSE(dst.has_layout());
-}
-
 TEST_F(LayoutUtilTest, CopyLayoutTuple) {
   Shape src = ShapeUtil::MakeTupleShape(
       {MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
@@ -134,25 +103,6 @@ TEST_F(LayoutUtilTest, CopyLayoutTuple) {
   EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
 }
 
-TEST_F(LayoutUtilTest, CopyLayoutTupleSparse) {
-  Shape src = ShapeUtil::MakeTupleShape(
-      {MakeShapeWithSparseLayout(F32, {2, 3}, 4),
-       MakeShapeWithSparseLayout(F32, {42, 123}, 4),
-       ShapeUtil::MakeTupleShape(
-           {MakeShapeWithLayout(F32, {}, {}),
-            MakeShapeWithSparseLayout(F32, {1, 2, 3}, 6)})});
-  Shape dst = ShapeUtil::MakeTupleShape(
-      {MakeShapeWithLayout(F32, {2, 3}, {1, 0}),
-       MakeShapeWithLayout(F32, {42, 123}, {1, 0}),
-       ShapeUtil::MakeTupleShape(
-           {MakeShapeWithLayout(F32, {}, {}),
-            MakeShapeWithLayout(F32, {1, 2, 3}, {1, 2, 0})})});
-
-  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
-  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-}
-
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
   Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
   Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
@@ -160,13 +110,6 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
   EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
 }
 
-TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleSameRank) {
-  Shape src = MakeShapeWithSparseLayout(F32, {123, 42, 7}, 6);
-  Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
-  ASSERT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
-  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
-}
-
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
   Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
   Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
@@ -176,15 +119,6 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
               ::testing::ContainsRegex("cannot copy layout from shape"));
 }
 
-TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleDifferentRank) {
-  Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
-  Shape dst = MakeShapeWithSparseLayout(F32, {2, 3}, 4);
-  auto status = LayoutUtil::CopyLayoutBetweenShapes(src, &dst);
-  EXPECT_FALSE(status.ok());
-  EXPECT_THAT(status.error_message(),
-              ::testing::ContainsRegex("cannot copy layout from shape"));
-}
-
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleTuple) {
   Shape src =
       ShapeUtil::MakeTupleShape({MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index 3d6310c1e17..6c7aff3b11e 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -80,7 +80,7 @@ bool LiteralProtoHasValues(const LiteralProto& proto) {
          proto.c64s_size() || proto.c128s_size() ||
          proto.tuple_literals_size() || !proto.f16s().empty() ||
          !proto.bf16s().empty() || !proto.u16s().empty() ||
-         !proto.s16s().empty() || proto.sparse_indices_size();
+         !proto.s16s().empty();
 }
 
 }  // namespace
@@ -135,21 +135,8 @@ void Literal::SetPiece(const Shape& shape, Piece* piece, bool allocate_arrays) {
       // Literals can be used as DMA targets, which can require alignment. We
       // force a 16-byte minimum alignment.
       constexpr int kMinimumAlignment = 16;
-      if (LayoutUtil::IsSparseArray(shape)) {
-        // For sparse arrays, the buffer must be of the size of the maximum
-        // number of sparse elements possible.
-        const int64 max_sparse_elements =
-            LayoutUtil::MaxSparseElements(shape.layout());
-        piece->set_buffer(static_cast<char*>(tensorflow::port::AlignedMalloc(
-            max_sparse_elements *
-                ShapeUtil::ByteSizeOfPrimitiveType(shape.element_type()),
-            kMinimumAlignment)));
-        piece->set_sparse_indices(
-            new SparseIndexArray(max_sparse_elements, shape.rank()));
-      } else {
-        piece->set_buffer(static_cast<char*>(tensorflow::port::AlignedMalloc(
-            piece->size_bytes(), kMinimumAlignment)));
-      }
+      piece->set_buffer(static_cast<char*>(tensorflow::port::AlignedMalloc(
+          piece->size_bytes(), kMinimumAlignment)));
     }
   } else {
     // If the shape is neither an array nor tuple, then it must be
@@ -181,7 +168,6 @@ void Literal::DeallocateBuffers() {
       [&](const ShapeIndex& index, Piece* piece) {
         if (piece->buffer() != nullptr) {
           tensorflow::port::AlignedFree(piece->buffer());
-          delete piece->sparse_indices();
         }
       });
 }
@@ -211,16 +197,6 @@ Literal LiteralBase::CreateFromShape(const Shape& shape) {
   return literal;
 }
 
-const SparseIndexArray* LiteralBase::sparse_indices(
-    const ShapeIndex& shape_index) const {
-  return piece(shape_index).sparse_indices();
-}
-
-SparseIndexArray* MutableLiteralBase::sparse_indices(
-    const ShapeIndex& shape_index) {
-  return piece(shape_index).sparse_indices();
-}
-
 template <typename NativeT>
 Status MutableLiteralBase::CopySliceFromInternal(
     const LiteralBase& src_literal, absl::Span<const int64> src_base,
@@ -373,12 +349,9 @@ std::vector<Literal> Literal::DecomposeTuple() {
           }
           Piece& src_piece = piece(src_index);
 
-          // Move the respective buffer and sparse indices over to the element
-          // Literal.
+          // Move the respective buffer over to the element Literal.
           dest_piece->set_buffer(src_piece.buffer());
           src_piece.set_buffer(nullptr);
-          dest_piece->set_sparse_indices(src_piece.sparse_indices());
-          src_piece.set_sparse_indices(nullptr);
         });
   }
   // Set this literal to be nil-shaped.
@@ -512,8 +485,6 @@ Status Literal::MoveFrom(Literal&& src_literal,
         Piece& dest_piece = piece(dest_index);
         tensorflow::port::AlignedFree(dest_piece.buffer());
         dest_piece.set_buffer(src_piece.buffer());
-        delete dest_piece.sparse_indices();
-        dest_piece.set_sparse_indices(src_piece.sparse_indices());
       });
 
   src_literal.shape_ = absl::make_unique<Shape>(ShapeUtil::MakeNil());
@@ -738,14 +709,14 @@ Literal LiteralBase::SliceInternal(
     const Shape& result_shape, absl::Span<const int64> start_indices) const {
   Literal result_literal(result_shape);
   DimensionVector new_indices(result_shape.rank());
-  result_literal.EachCell<NativeT>(
-      [&](absl::Span<const int64> indices, NativeT /*value*/) {
-        for (int64 i = 0; i < result_shape.rank(); ++i) {
-          new_indices[i] = indices[i] + start_indices[i];
-        }
-        NativeT value = Get<NativeT>(new_indices);
-        result_literal.Set<NativeT>(indices, value);
-      });
+  CHECK(result_literal
+            .Populate<NativeT>([&](absl::Span<const int64> indices) {
+              for (int64 i = 0; i < result_shape.rank(); ++i) {
+                new_indices[i] = indices[i] + start_indices[i];
+              }
+              return Get<NativeT>(new_indices);
+            })
+            .ok());
   return result_literal;
 }
 
@@ -854,66 +825,6 @@ string LiteralBase::GetAsString(absl::Span<const int64> multi_index,
   }
 }
 
-string LiteralBase::GetSparseElementAsString(
-    int64 sparse_element_number, const ShapeIndex& shape_index) const {
-  const Shape& subshape = ShapeUtil::GetSubshape(shape(), shape_index);
-  CHECK(LayoutUtil::IsSparseArray(subshape));
-  switch (subshape.element_type()) {
-    case PRED:
-      return GetSparseElement<bool>(sparse_element_number, shape_index)
-                 ? "true"
-                 : "false";
-    case S8:
-      return StrCat(GetSparseElement<int8>(sparse_element_number, shape_index));
-    case S16:
-      return StrCat(
-          GetSparseElement<int16>(sparse_element_number, shape_index));
-    case S32:
-      return StrCat(
-          GetSparseElement<int32>(sparse_element_number, shape_index));
-    case S64:
-      return StrCat(
-          GetSparseElement<int64>(sparse_element_number, shape_index));
-    case U8:
-      return StrCat(
-          GetSparseElement<uint8>(sparse_element_number, shape_index));
-    case U16:
-      return StrCat(
-          GetSparseElement<uint16>(sparse_element_number, shape_index));
-    case U32:
-      return StrCat(
-          GetSparseElement<uint32>(sparse_element_number, shape_index));
-    case U64:
-      return StrCat(
-          GetSparseElement<uint64>(sparse_element_number, shape_index));
-    case F16:
-      return StrCat(static_cast<float>(
-          GetSparseElement<half>(sparse_element_number, shape_index)));
-    case F32:
-      return StrCat(
-          GetSparseElement<float>(sparse_element_number, shape_index));
-    case BF16:
-      return StrCat(static_cast<float>(
-          GetSparseElement<bfloat16>(sparse_element_number, shape_index)));
-    case F64:
-      return StrCat(
-          GetSparseElement<double>(sparse_element_number, shape_index));
-    case C64: {
-      complex64 c =
-          GetSparseElement<complex64>(sparse_element_number, shape_index);
-      return StrCat("(", c.real(), ", ", c.imag(), ")");
-    }
-    case C128: {
-      complex128 c =
-          GetSparseElement<complex128>(sparse_element_number, shape_index);
-      return StrCat("(", c.real(), ", ", c.imag(), ")");
-    }
-    default:
-      LOG(FATAL) << "Invalid element type for sparse arrays: "
-                 << PrimitiveType_Name(subshape.element_type());
-  }
-}
-
 absl::optional<int64> LiteralBase::GetIntegralAsS64(
     absl::Span<const int64> multi_index) const {
   CHECK(LayoutUtil::IsDenseArray(shape()));
@@ -1047,81 +958,6 @@ Status MutableLiteralBase::SetFromDouble(absl::Span<const int64> multi_index,
   return Status::OK();
 }
 
-absl::Span<const int64> LiteralBase::GetSparseIndex(
-    int64 sparse_element_number, const ShapeIndex& shape_index) const {
-  const Piece& p = piece(shape_index);
-  CHECK_GE(sparse_element_number, 0);
-  CHECK_LT(sparse_element_number, p.sparse_indices()->index_count());
-  return p.sparse_indices()->At(sparse_element_number);
-}
-
-void MutableLiteralBase::SortSparseElements(const ShapeIndex& shape_index) {
-  piece(shape_index).SortSparseElements();
-}
-
-void LiteralBase::Piece::SortSparseElements() {
-  switch (subshape().element_type()) {
-    case PRED:
-      SortSparseElementsInternal<bool>();
-      break;
-    case S8:
-      SortSparseElementsInternal<int8>();
-      break;
-    case U8:
-      SortSparseElementsInternal<uint8>();
-      break;
-    case S16:
-      SortSparseElementsInternal<int16>();
-      break;
-    case U16:
-      SortSparseElementsInternal<uint16>();
-      break;
-    case S32:
-      SortSparseElementsInternal<int32>();
-      break;
-    case U32:
-      SortSparseElementsInternal<uint32>();
-      break;
-    case S64:
-      SortSparseElementsInternal<int64>();
-      break;
-    case U64:
-      SortSparseElementsInternal<uint64>();
-      break;
-    case F32:
-      SortSparseElementsInternal<float>();
-      break;
-    case F64:
-      SortSparseElementsInternal<double>();
-      break;
-    case C64:
-      SortSparseElementsInternal<complex64>();
-      break;
-    case C128:
-      SortSparseElementsInternal<complex128>();
-      break;
-    case F16:
-      SortSparseElementsInternal<half>();
-      break;
-    case BF16:
-      SortSparseElementsInternal<bfloat16>();
-      break;
-    default:
-      LOG(FATAL) << "Element type not valid for sparse array: "
-                 << PrimitiveType_Name(subshape().element_type());
-  }
-}
-
-template <typename NativeT>
-void LiteralBase::Piece::SortSparseElementsInternal() {
-  CHECK(LayoutUtil::IsSparseArray(subshape()));
-  int64 num_elements = sparse_indices()->index_count();
-  auto values = data<NativeT>();
-  CHECK_LE(num_elements, values.size());
-  sparse_indices()->SortWithValues(
-      absl::Span<NativeT>(values.data(), num_elements));
-}
-
 namespace {
 
 string ShapeToString(bool print_layout, const Shape& shape) {
@@ -1151,32 +987,6 @@ void TupleToStringHelper(const LiteralBase& literal,
   pieces->push_back("\n)");
 }
 
-void SparseArrayToStringHelper(const LiteralBase& literal,
-                               const Shape& subshape, bool print_shape,
-                               bool print_layout, std::vector<string>* pieces) {
-  if (print_shape) {
-    pieces->push_back(ShapeToString(print_layout, subshape));
-  }
-  pieces->push_back("{");
-  int64 rank = subshape.rank();
-  int64 num_elements = literal.sparse_element_count();
-  for (int64 i = 0; i < num_elements; ++i) {
-    if (i > 0) {
-      pieces->push_back(", ");
-    }
-    if (rank == 1) {
-      pieces->push_back(StrCat(literal.GetSparseIndex(i)[0]));
-      pieces->push_back(": ");
-    } else {
-      pieces->push_back("[");
-      pieces->push_back(absl::StrJoin(literal.GetSparseIndex(i), ", "));
-      pieces->push_back("]: ");
-    }
-    pieces->push_back(literal.GetSparseElementAsString(i));
-  }
-  pieces->push_back("}");
-}
-
 void DenseArrayToStringHelper(const LiteralBase& literal,
                               const ShapeIndex& shape_index, bool print_shape,
                               bool print_layout, std::vector<string>* pieces) {
@@ -1261,9 +1071,6 @@ void ToStringHelper(const LiteralBase& literal, const ShapeIndex& shape_index,
                         pieces);
   } else if (subshape.IsToken()) {
     pieces->push_back("token");
-  } else if (LayoutUtil::IsSparseArray(subshape)) {
-    SparseArrayToStringHelper(literal, subshape, print_shape, print_layout,
-                              pieces);
   } else {
     CHECK(LayoutUtil::IsDenseArray(subshape));
     DenseArrayToStringHelper(literal, shape_index, print_shape, print_layout,
@@ -1273,11 +1080,6 @@ void ToStringHelper(const LiteralBase& literal, const ShapeIndex& shape_index,
 
 }  // namespace
 
-int64 LiteralBase::sparse_element_count() const {
-  CHECK(LayoutUtil::IsSparseArray(shape()));
-  return sparse_indices()->index_count();
-}
-
 string LiteralBase::ToString() const {
   std::vector<string> pieces;
   CHECK(LayoutUtil::HasLayout(this->shape()));
@@ -2053,22 +1855,6 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) {
   TF_RET_CHECK(LayoutUtil::HasLayout(shape));
   TF_RET_CHECK(ShapeUtil::Equal(shape, subshape()));
 
-  if (LayoutUtil::IsSparseArray(subshape())) {
-    // Compute the number of elements (indices) in the sparse shape and reserve
-    // the necessary space in spare_indices.
-    TF_RET_CHECK(subshape().rank() != 0) << "Scalar shapes cannot be sparse";
-    TF_RET_CHECK(proto.sparse_indices_size() % subshape().rank() == 0)
-        << "Unexpected number of indices in proto ("
-        << proto.sparse_indices_size() << ") for shape of rank "
-        << subshape().rank();
-    const int64 index_count = proto.sparse_indices_size() / subshape().rank();
-    sparse_indices()->Resize(index_count);
-
-    // Copy the indices from the proto into the SparseIndexArray object.
-    TF_RETURN_IF_ERROR(CopyFromRepeatedField(sparse_indices()->mutable_data(),
-                                             proto.sparse_indices()));
-  }
-
   switch (subshape().element_type()) {
     case PRED:
       TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<bool>(), proto.preds()));
@@ -2175,11 +1961,6 @@ LiteralProto LiteralBase::ToProto() const {
         piece.WriteToProto(proto_piece);
       });
 
-  if (LayoutUtil::IsSparseArray(shape())) {
-    CopyToRepeatedField(proto.mutable_sparse_indices(),
-                        sparse_indices()->data());
-  }
-
   return proto;
 }
 
@@ -2295,12 +2076,6 @@ MutableBorrowingLiteral::MutableBorrowingLiteral(const char* src_buf_ptr,
 
 MutableBorrowingLiteral::~MutableBorrowingLiteral() {
   if (root_piece_ != nullptr) {
-    root_piece_->ForEachMutableSubpiece(
-        [&](const ShapeIndex& index, Piece* piece) {
-          if (piece->buffer() != nullptr) {
-            delete piece->sparse_indices();
-          }
-        });
     delete root_piece_;
   }
 }
diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h
index 2d27f8eb7f6..7aee34437e6 100644
--- a/tensorflow/compiler/xla/literal.h
+++ b/tensorflow/compiler/xla/literal.h
@@ -35,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/sparse_index_array.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -77,11 +76,6 @@ class LiteralBase {
   template <typename NativeT>
   absl::Span<const NativeT> data(const ShapeIndex& shape_index = {}) const;
 
-  // Returns a const pointer to the sparse index array. Returns nullptr if the
-  // literal is not a sparse array.
-  const SparseIndexArray* sparse_indices(
-      const ShapeIndex& shape_index = {}) const;
-
   // Returns a const pointer to (or size of) the underlying buffer holding the
   // array at the given shape index. CHECKs if the subshape of the literal at
   // the given ShapeIndex is not array.
@@ -126,10 +120,6 @@ class LiteralBase {
   // into text.
   string GetAsString(absl::Span<const int64> multi_index,
                      const ShapeIndex& shape_index = {}) const;
-  // As GetSparseElement(), but determines the correct type and converts the
-  // value into text.
-  string GetSparseElementAsString(int64 sparse_element_number,
-                                  const ShapeIndex& shape_index = {}) const;
 
   // Return whether the value at the specified index is equal to the provided
   // generic `value` (T must be an arithmetic type).
@@ -172,21 +162,6 @@ class LiteralBase {
   absl::optional<complex128> GetAsComplex128(
       absl::Span<const int64> multi_index) const;
 
-  // Returns the multi-index of the element in a sparse literal at the given
-  // sparse element number.  The sparse element number is the position with in
-  // the sparse array's list of (index, value) pairs, and is checked against the
-  // total number of (index, value) pairs in the sparse array.
-  absl::Span<const int64> GetSparseIndex(
-      int64 sparse_element_number, const ShapeIndex& shape_index = {}) const;
-
-  // Returns the value of the element in a sparse literal at the given sparse
-  // element number.  The sparse element number is the position with in the
-  // sparse array's list of (index, value) pairs, and is checked against the
-  // total number of (index, value) pairs in the sparse array.
-  template <typename NativeT>
-  NativeT GetSparseElement(int64 sparse_element_number,
-                           const ShapeIndex& shape_index = {}) const;
-
   // Invokes the "per cell" callback for each element in the provided
   // literal with the element's indices and a string representation of
   // the element's value.
@@ -259,13 +234,7 @@ class LiteralBase {
     return ShapeUtil::ElementsIn(ShapeUtil::GetSubshape(shape(), index));
   }
 
-  // Returns the count of the elements in the sparse array at the given shape
-  // index in this literal, which will be no larger than
-  // LayoutUtil::MaxSparseElements(SetSubshape(shape(), index).layout()).
-  int64 sparse_element_count() const;
-
-  // Compute a hash for this literal.  This literal must not be a sparse tensor
-  // or a tuple containing a sparse tensor.
+  // Compute a hash for this literal.
   size_t Hash() const;
 
   // Converts this literal to the given shape. Returns an error is the
@@ -385,14 +354,6 @@ class LiteralBase {
     char* buffer() const { return buffer_; }
     void set_buffer(char* buffer) { buffer_ = buffer; }
 
-    // The array of multi-indices that provide the locations of non-zero
-    // elements in a sparse array.  Only used if
-    // LayoutUtil::IsSparseArray(shape()) is true.
-    SparseIndexArray* sparse_indices() const { return sparse_indices_; }
-    void set_sparse_indices(SparseIndexArray* sparse_indices) {
-      sparse_indices_ = sparse_indices;
-    }
-
     // Gets or sets the subshape of this piece. This reference points to a
     // subshape within the shape in the containing Literal (Literal::shape_).
     const Shape& subshape() const { return *subshape_; }
@@ -402,13 +363,7 @@ class LiteralBase {
     int64 size_bytes() const { return ShapeUtil::ByteSizeOf(subshape()); }
 
     // Returns the number of elements in this piece's array.
-    int64 element_count() const {
-      // If this is a sparse array, use the number of elements represented by
-      // the indices in the associated SparseIndexArray.
-      return LayoutUtil::IsSparseArray(subshape())
-                 ? sparse_indices()->index_count()
-                 : ShapeUtil::ElementsIn(subshape());
-    }
+    int64 element_count() const { return ShapeUtil::ElementsIn(subshape()); }
 
     // Returns the child piece at 'index' of this piece.
     Piece& child(int64 index) { return children_[index]; }
@@ -489,9 +444,6 @@ class LiteralBase {
     // piece must be equal (not just compatible) to the shape of the proto.
     Status CopyFromProto(const LiteralProto& proto);
 
-    // Sorts the elements in a sparse array.
-    void SortSparseElements();
-
    private:
     // Helpers for traversing the piece via ForEachSubpiece rooted at 'index'.
     // The first non-OK (or non-true) value is returned by the function.
@@ -541,17 +493,9 @@ class LiteralBase {
     bool EqualElementsInternal(const Piece& other,
                                std::vector<int64>* multi_index) const;
 
-    // Helper for SortSparseElements that has the element type as a template
-    // parameter.
-    template <typename NativeT>
-    void SortSparseElementsInternal();
-
     // For array-shaped pieces, this is the buffer holding the literal data.
     char* buffer_ = nullptr;
 
-    // For sparse arrays, this is the array of indices.
-    SparseIndexArray* sparse_indices_ = nullptr;
-
     // The shape of piece. This points into the shape of the containing Literal
     // (Literal::shape_).
     const Shape* subshape_ = nullptr;
@@ -598,10 +542,6 @@ class MutableLiteralBase : public LiteralBase {
   // Unhide const method from parent class.
   using LiteralBase::data;
 
-  // Returns a pointer to the sparse index array. Returns nullptr if the literal
-  // is not a sparse array.
-  SparseIndexArray* sparse_indices(const ShapeIndex& shape_index = {});
-
   // TODO(b/67651157): Remove this accessor. Literal users should not be able to
   // mutate the shape as this can produce malformed Literals.
   Shape* mutable_shape_do_not_use() { return shape_.get(); }
@@ -613,16 +553,6 @@ class MutableLiteralBase : public LiteralBase {
   // Unhide const method from parent class.
   using LiteralBase::untyped_data;
 
-  // Populates a literal with a sparse layout with the given indices and values.
-  // Each index in the indices array is CHECKed against the dimensions in the
-  // literal's shape.  If sort is true, then the indices and values will be
-  // sorted.  If sort is false, then the indices and values are assumed to
-  // already be in sorted order.  See CreateSparse for an example of how data
-  // are populated.
-  template <typename NativeT>
-  void PopulateSparse(SparseIndexArray indices,
-                      absl::Span<const NativeT> values, bool sort = true);
-
   // Copy values from 'src_literal' rooted at 'src_shape_index' into this
   // literal rooted at 'dest_shape_index'. The subshape of this literal rooted
   // at 'dest_shape_index' must be compatible with the subshape of 'src_literal'
@@ -661,16 +591,6 @@ class MutableLiteralBase : public LiteralBase {
   template <typename NativeT>
   void Set(absl::Span<const int64> multi_index, NativeT value);
 
-  // Appends the given element to the literal.  If the elements are not appended
-  // in sorted order, then SortSparseElements should be called before calling
-  // other methods.  This literal must have a sparse layout.
-  template <typename NativeT>
-  void AppendSparseElement(absl::Span<const int64> multi_index, NativeT value,
-                           const ShapeIndex& shape_index = {});
-
-  // Sorts the elements in a sparse array.
-  void SortSparseElements(const ShapeIndex& shape_index = {});
-
   // As Set(), but truncates `value` to the literal element type before storing.
   // This literal must be an array.
   Status SetIntegralAsS64(absl::Span<const int64> multi_index, int64 value);
@@ -988,34 +908,6 @@ NativeT LiteralBase::GetFirstElement() const {
   return data<NativeT>().at(0);
 }
 
-template <typename NativeT>
-NativeT LiteralBase::GetSparseElement(int64 sparse_element_number,
-                                      const ShapeIndex& shape_index) const {
-  CHECK(
-      LayoutUtil::IsSparseArray(ShapeUtil::GetSubshape(shape(), shape_index)));
-  return data<NativeT>(shape_index)[sparse_element_number];
-}
-
-template <typename NativeT>
-void MutableLiteralBase::AppendSparseElement(
-    absl::Span<const int64> multi_index, NativeT value,
-    const ShapeIndex& shape_index) {
-  Piece& p = piece(shape_index);
-  const Shape& subshape = p.subshape();
-  CHECK(LayoutUtil::IsSparseArray(subshape));
-  int64 rank = subshape.rank();
-  CHECK_EQ(multi_index.size(), rank);
-  for (int64 i = 0; i < rank; ++i) {
-    CHECK_GE(multi_index[i], 0);
-    CHECK_LT(multi_index[i], subshape.dimensions(i));
-  }
-  int64 last_element = p.sparse_indices()->index_count();
-  CHECK_LT(last_element, LayoutUtil::MaxSparseElements(subshape.layout()));
-  p.sparse_indices()->Append(multi_index);
-  CHECK_LT(last_element, p.data<NativeT>().size());
-  p.data<NativeT>()[last_element] = value;
-}
-
 template <typename NativeT>
 void LiteralBase::EachCell(
     std::function<void(absl::Span<const int64> indices, NativeT value)>
@@ -1094,31 +986,6 @@ void MutableLiteralBase::PopulateR4FromArray4D(const Array4D<NativeT>& values) {
   PopulateFromArray(values);
 }
 
-template <typename NativeT>
-void MutableLiteralBase::PopulateSparse(SparseIndexArray indices,
-                                        absl::Span<const NativeT> values,
-                                        bool sort) {
-  CHECK(LayoutUtil::IsSparseArray(shape()));
-  int rank = shape().rank();
-  CHECK_EQ(indices.rank(), rank);
-  int64 max_elements = LayoutUtil::MaxSparseElements(shape().layout());
-  CHECK_LE(indices.max_indices(), max_elements);
-  int64 num_elements = values.size();
-  CHECK_LE(num_elements, max_elements);
-  CHECK_EQ(num_elements, indices.index_count());
-  auto root_data = root_piece().data<NativeT>();
-  // Piece::data() returns a Span of size equal to the number of indices
-  // in the SparseIndexArray. So there is no need to adjust the size of the data
-  // here. It is enough to just copy the incoming values into the data buffer.
-  std::copy(values.begin(), values.end(), root_data.begin());
-  *this->root_piece().sparse_indices() = std::move(indices);
-  if (sort) {
-    auto root_data = this->root_piece().data<NativeT>();
-    this->root_piece().sparse_indices()->SortWithValues(root_data);
-  }
-  DCHECK(this->root_piece().sparse_indices()->Validate(shape()));
-}
-
 template <typename NativeT, typename FnType>
 Status MutableLiteralBase::PopulateInternal(const FnType& generator,
                                             bool parallel) {
diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc
index 9b17cb762c8..6afbcce40b0 100644
--- a/tensorflow/compiler/xla/literal_test.cc
+++ b/tensorflow/compiler/xla/literal_test.cc
@@ -252,42 +252,6 @@ TEST_F(LiteralUtilTest, CreateR3FromArray3d) {
   EXPECT_EQ(expected, result);
 }
 
-TEST_F(LiteralUtilTest, CreateSparse) {
-  std::vector<int64> dimensions = {8, 8, 8};
-  Array2D<int64> indices = {
-      {3, 4, 5},
-      {1, 2, 3},
-      {2, 3, 4},
-      {3, 5, 6},
-  };
-  std::vector<int64> values = {7, 8, 9, 10};
-  auto literal = LiteralUtil::CreateSparse<int64>(
-      dimensions, SparseIndexArray(indices.n1() + 3, indices), values);
-
-  Array2D<int64> expected_indices = {
-      {1, 2, 3},
-      {2, 3, 4},
-      {3, 4, 5},
-      {3, 5, 6},
-  };
-  std::vector<int64> expected_values = {8, 9, 7, 10};
-
-  EXPECT_EQ(literal.sparse_indices()->data(),
-            absl::Span<const int64>(expected_indices.data(),
-                                    expected_indices.num_elements()));
-  EXPECT_EQ(literal.data<int64>(), absl::Span<const int64>(expected_values));
-
-  // Serialize then deserialize and verify the resulting literal.
-  TF_ASSERT_OK_AND_ASSIGN(Literal literal_from_proto,
-                          Literal::CreateFromProto(literal.ToProto()));
-
-  EXPECT_EQ(literal_from_proto.sparse_indices()->data(),
-            absl::Span<const int64>(expected_indices.data(),
-                                    expected_indices.num_elements()));
-  EXPECT_EQ(literal_from_proto.data<int64>(),
-            absl::Span<const int64>(expected_values));
-}
-
 TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) {
   // clang-format off
   auto literal = LiteralUtil::CreateR4Projected<float>({
@@ -1978,43 +1942,6 @@ TEST_F(LiteralUtilTest, InvalidProtoTooManyTupleElements) {
   EXPECT_THAT(status.error_message(), HasSubstr("Expected 2 tuple elements"));
 }
 
-TEST_F(LiteralUtilTest, SortSparseElements) {
-  auto literal = LiteralUtil::CreateSparse<float>({10, 10, 10},
-                                                  SparseIndexArray(10, 3), {});
-  literal.AppendSparseElement<float>({2, 3, 4}, 2.0);
-  literal.AppendSparseElement<float>({3, 4, 5}, 3.0);
-  literal.AppendSparseElement<float>({1, 2, 3}, 1.0);
-  literal.SortSparseElements();
-  EXPECT_EQ(literal.ToString(),
-            "f32[10,10,10]{[1, 2, 3]: 1, [2, 3, 4]: 2, [3, 4, 5]: 3}");
-}
-
-TEST_F(LiteralUtilTest, GetSparseElementAsString) {
-  std::vector<int64> dimensions = {10, 10, 10};
-  SparseIndexArray indices(10, {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}});
-
-  EXPECT_EQ(
-      LiteralUtil::CreateSparse<bool>(dimensions, indices, {true, false, true})
-          .GetSparseElementAsString(1),
-      "false");
-  EXPECT_EQ(LiteralUtil::CreateSparse<int64>(dimensions, indices, {1, 2, 3})
-                .GetSparseElementAsString(1),
-            absl::StrCat(int64{2}));
-  EXPECT_EQ(
-      LiteralUtil::CreateSparse<double>(dimensions, indices, {1.0, 2.0, 3.0})
-          .GetSparseElementAsString(1),
-      absl::StrCat(double{2.0}));
-  EXPECT_EQ(LiteralUtil::CreateSparse<half>(dimensions, indices,
-                                            {half{1.0}, half{2.0}, half{3.0}})
-                .GetSparseElementAsString(1),
-            absl::StrCat(static_cast<float>(half{2.0})));
-  EXPECT_EQ(LiteralUtil::CreateSparse<complex64>(
-                dimensions, indices,
-                std::vector<complex64>{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}})
-                .GetSparseElementAsString(1),
-            absl::StrCat("(", float{3.0}, ", ", float{4.0}, ")"));
-}
-
 TEST_F(LiteralUtilTest, BroadcastVectorToMatrix0) {
   Literal literal = LiteralUtil::CreateR1<int64>({1, 2});
   TF_ASSERT_OK_AND_ASSIGN(
@@ -2061,6 +1988,11 @@ TEST_F(LiteralUtilTest, GetAsComplex128) {
   EXPECT_FALSE(c6.GetAsComplex128({}).has_value());
 }
 
+TEST_F(LiteralUtilTest, SliceOnBool) {
+  Literal c1 = LiteralUtil::CreateR1<bool>({true, true, false});
+  EXPECT_EQ(c1, c1.Slice({0}, {3}));
+}
+
 TEST_F(LiteralUtilTest, IsEqualAt) {
   double val_double = 10.0;
   int val_integral = 10;
diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index c4535badafa..b22b71a2ec0 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -38,7 +38,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/sparse_index_array.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -102,46 +101,6 @@ class LiteralUtil {
           values,
       const Layout& layout);
 
-  // Creates a literal with a sparse layout and the given indices and values.
-  // The shape is initialized from the given dimensions.  The minor dimension of
-  // the indices array must equal the rank of the shape (i.e. size of the
-  // dimensions array). The major dimension of the indices array must equal the
-  // number of elements in the values array. The maximum number of elements in
-  // the array is taken from the max_indices() value of the index array.
-  //
-  // XLA assumes that sparse literals are in sorted order for all operations. If
-  // the `sort` argument is true, then the indices and values will be sorted
-  // while copying them into the literal. If you have ensured that the indices
-  // and values are already sorted, then you may set the `sort` argument to
-  // false to skip the sorting step.
-  //
-  // For example:
-  //
-  //   CreateSparse(
-  //     {12, 12, 12},
-  //     SparseIndexArray(10, 3,
-  //                      Array2D{
-  //                        {0, 1, 2},
-  //                        {3, 4, 5},
-  //                        {6, 7, 8},
-  //                        {9, 10, 11},
-  //                      }),
-  //     {1.0, 2.0 3.0, 4.0})
-  //
-  // This creates an array with shape F64[12,12,12]sparse{10}, that has the
-  // following non-zero values:
-  //
-  //     [0,  1,  2]: 1.0
-  //     [3,  4,  5]: 2.0
-  //     [6,  7,  8]: 3.0
-  //     [9, 10, 11]: 4.0
-  //
-  template <typename NativeT>
-  static Literal CreateSparse(absl::Span<const int64> dimensions,
-                              SparseIndexArray indices,
-                              absl::Span<const NativeT> values,
-                              bool sort = true);
-
   // Creates a scalar literal value zero of the given primitive type.
   static Literal Zero(PrimitiveType primitive_type);
   // Creates a scalar literal value one of the given primitive type.
@@ -417,21 +376,6 @@ template <typename NativeT>
   return CreateR4FromArray4DWithLayout(tmp, layout);
 }
 
-template <typename NativeT>
-/* static */ Literal LiteralUtil::CreateSparse(
-    absl::Span<const int64> dimensions, SparseIndexArray indices,
-    absl::Span<const NativeT> values, bool sort) {
-  int64 num_elements = values.size();
-  int64 rank = dimensions.size();
-  CHECK_EQ(num_elements, indices.index_count());
-  CHECK_EQ(rank, indices.rank());
-  Literal literal(ShapeUtil::MakeShapeWithSparseLayout(
-      primitive_util::NativeToPrimitiveType<NativeT>(), dimensions,
-      indices.max_indices()));
-  literal.PopulateSparse(indices, values, sort);
-  return literal;
-}
-
 template <typename NativeT>
 /* static */ Literal LiteralUtil::CreateR4(
     std::initializer_list<std::initializer_list<
diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index cdbe69d617e..c01f906fe85 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -140,9 +140,9 @@ tf_cc_test(
 )
 
 cc_library(
-    name = "device_state",
-    srcs = ["device_state.cc"],
-    hdrs = ["device_state.h"],
+    name = "local_device_state",
+    srcs = ["local_device_state.cc"],
+    hdrs = ["local_device_state.h"],
     deps = [
         ":event_pool",
         ":semaphore",
@@ -161,7 +161,7 @@ cc_library(
     srcs = ["local_client.cc"],
     hdrs = ["local_client.h"],
     deps = [
-        ":device_state",
+        ":local_device_state",
         ":shared_device_buffer",
         "//tensorflow/compiler/xla:executable_run_options",
         "//tensorflow/compiler/xla:literal",
diff --git a/tensorflow/compiler/xla/python/local_client.cc b/tensorflow/compiler/xla/python/local_client.cc
index d0bb1eb8015..021f40d0782 100644
--- a/tensorflow/compiler/xla/python/local_client.cc
+++ b/tensorflow/compiler/xla/python/local_client.cc
@@ -105,6 +105,13 @@ limitations under the License.
 
 namespace xla {
 
+StatusOr<LocalDeviceState*> Device::GetLocalDeviceState() const {
+  if (local_device_state_) {
+    return local_device_state_.get();
+  }
+  return InvalidArgument("Device %s is not a local device.", DebugString());
+}
+
 std::string CpuDevice::DebugString() const {
   return absl::StrCat("CPU_", id());
 }
@@ -115,7 +122,7 @@ std::string GpuDevice::DebugString() const {
 
 static StatusOr<std::unique_ptr<se::MultiDeviceAdapter>> CreateBFCAllocator(
     se::Platform* platform,
-    absl::Span<const std::unique_ptr<DeviceState>> device_states,
+    absl::Span<const std::shared_ptr<Device>> local_devices,
     LocalClient* client, double memory_fraction, bool preallocate) {
   CHECK_GT(client->backend().device_count(), 0);
   std::vector<se::MultiDeviceAdapter::AllocatorWithStream> allocators;
@@ -148,19 +155,24 @@ static StatusOr<std::unique_ptr<se::MultiDeviceAdapter>> CreateBFCAllocator(
         /*allow_growth=*/!preallocate,
         absl::StrCat("GPU_", device_ordinal, "_bfc"));
     allocators.emplace_back(std::move(gpu_bfc_allocator),
-                            device_states.at(device_ordinal)->compute_stream());
+                            local_devices.at(device_ordinal)
+                                ->local_device_state()
+                                ->compute_stream());
   }
   return absl::make_unique<se::MultiDeviceAdapter>(platform,
                                                    std::move(allocators));
 }
 
-static std::shared_ptr<Device> MakeDevice(const std::string& platform_name,
-                                          int id, int local_device_ordinal) {
+static std::shared_ptr<Device> MakeDevice(
+    const std::string& platform_name, int id,
+    std::unique_ptr<LocalDeviceState> local_device_state) {
   if (platform_name == "cpu") {
-    return std::make_shared<CpuDevice>(id, local_device_ordinal, platform_name);
+    return std::make_shared<CpuDevice>(id, std::move(local_device_state),
+                                       platform_name);
   } else {
     CHECK_EQ(platform_name, "gpu");
-    return std::make_shared<GpuDevice>(id, local_device_ordinal, platform_name);
+    return std::make_shared<GpuDevice>(id, std::move(local_device_state),
+                                       platform_name);
   }
 }
 
@@ -179,16 +191,15 @@ StatusOr<std::shared_ptr<PyLocalClient>> PyLocalClient::Get(
                       ClientLibrary::GetOrCreateLocalClient(options));
 
   bool gpu_platform = platform_name == "gpu";
-  std::vector<std::unique_ptr<DeviceState>> device_states;
   std::vector<std::shared_ptr<Device>> devices;
   bool synchronous_deallocation = platform_name == "cpu";
   for (int i = 0; i < client->device_count(); ++i) {
     se::StreamExecutor* executor =
         client->backend().stream_executor(i).ValueOrDie();
-    device_states.push_back(absl::make_unique<DeviceState>(
+    auto device_state = absl::make_unique<LocalDeviceState>(
         executor, synchronous_deallocation, asynchronous,
-        /*allow_event_reuse=*/gpu_platform));
-    devices.push_back(MakeDevice(platform_name, i, i));
+        /*allow_event_reuse=*/gpu_platform);
+    devices.push_back(MakeDevice(platform_name, i, std::move(device_state)));
   }
 
   std::unique_ptr<se::DeviceMemoryAllocator> allocator;
@@ -196,7 +207,7 @@ StatusOr<std::shared_ptr<PyLocalClient>> PyLocalClient::Get(
   if (gpu_platform) {
     if (allocator_config.kind != AllocatorConfig::Kind::kPlatform) {
       TF_ASSIGN_OR_RETURN(allocator,
-                          CreateBFCAllocator(platform, device_states, client,
+                          CreateBFCAllocator(platform, devices, client,
                                              allocator_config.memory_fraction,
                                              allocator_config.preallocate));
     }
@@ -217,21 +228,18 @@ StatusOr<std::shared_ptr<PyLocalClient>> PyLocalClient::Get(
 
   return std::make_shared<PyLocalClient>(
       platform_name, client, std::move(devices), /*host_id=*/0,
-      std::move(device_states), std::move(allocator),
-      std::move(host_memory_allocator));
+      std::move(allocator), std::move(host_memory_allocator));
 }
 
 PyLocalClient::PyLocalClient(
     std::string platform_name, LocalClient* client,
     std::vector<std::shared_ptr<Device>> devices, int host_id,
-    std::vector<std::unique_ptr<DeviceState>> device_states,
     std::unique_ptr<se::DeviceMemoryAllocator> allocator,
     std::unique_ptr<tensorflow::Allocator> host_memory_allocator)
     : platform_name_(std::move(platform_name)),
       client_(client),
       devices_(std::move(devices)),
       host_id_(host_id),
-      device_states_(std::move(device_states)),
       owned_allocator_(std::move(allocator)),
       host_memory_allocator_(std::move(host_memory_allocator)),
       h2d_transfer_pool_(tensorflow::Env::Default(), "py_xla_h2d_transfer",
@@ -242,15 +250,16 @@ PyLocalClient::PyLocalClient(
     allocator_ = client_->backend().memory_allocator();
   }
 
-  local_devices_.resize(device_states_.size());
   for (const std::shared_ptr<Device>& device : devices_) {
     CHECK(id_to_device_.insert({device->id(), device}).second)
         << "Duplicate device id: " << device->id();
 
-    if (device->local_device_ordinal() != -1) {
-      int idx = device->local_device_ordinal();
+    if (device->local_device_state()) {
+      int idx = device->local_device_state()->device_ordinal();
+      if (idx >= local_devices_.size()) {
+        local_devices_.resize(idx + 1);
+      }
       CHECK(local_devices_[idx] == nullptr) << idx;
-      CHECK_LT(idx, local_devices_.size());
       local_devices_[idx] = device;
     }
   }
@@ -274,17 +283,19 @@ PyLocalClient::DeserializeExecutable(
 }
 
 Status PyLocalClient::TransferToInfeed(const LiteralSlice& literal,
-                                       int device_ordinal) {
-  TF_RETURN_IF_ERROR(
-      CheckDeviceOrdinal(device_ordinal, "PyLocalClient::TransferToInfeed"));
-  return client_->TransferToInfeedLocal(literal, device_ordinal);
+                                       std::shared_ptr<Device> device) {
+  TF_ASSIGN_OR_RETURN(LocalDeviceState * local_device,
+                      device->GetLocalDeviceState());
+  return client_->TransferToInfeedLocal(literal,
+                                        local_device->device_ordinal());
 }
 
-StatusOr<Literal> PyLocalClient::TransferFromOutfeed(const Shape& shape,
-                                                     int device_ordinal) {
-  TF_RETURN_IF_ERROR(
-      CheckDeviceOrdinal(device_ordinal, "PyLocalClient::TransferFromOutfeed"));
-  return client_->TransferFromOutfeedLocal(shape, device_ordinal);
+StatusOr<Literal> PyLocalClient::TransferFromOutfeed(
+    const Shape& shape, std::shared_ptr<Device> device) {
+  TF_ASSIGN_OR_RETURN(LocalDeviceState * local_device,
+                      device->GetLocalDeviceState());
+  return client_->TransferFromOutfeedLocal(shape,
+                                           local_device->device_ordinal());
 }
 
 StatusOr<DeviceAssignment> PyLocalClient::GetDefaultDeviceAssignment(
@@ -293,36 +304,26 @@ StatusOr<DeviceAssignment> PyLocalClient::GetDefaultDeviceAssignment(
       num_replicas, /*computation_count=*/1);
 }
 
-Status PyLocalClient::CheckDeviceOrdinal(int device_ordinal,
-                                         absl::string_view caller_name) {
-  if (device_ordinal < 0 || device_ordinal >= local_device_count()) {
-    return InvalidArgument(
-        "%s got bad device_ordinal: %d (num_local_devices=%d)", caller_name,
-        device_ordinal, local_device_count());
-  }
-  return Status::OK();
-}
-
 /* static */
 StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::FromLiterals(
     std::vector<BorrowingLiteral> leaves_literals, const Shape& tuple_shape,
     std::shared_ptr<void> leaves_reference,
-    std::shared_ptr<PyLocalClient> client, int device_ordinal) {
+    std::shared_ptr<PyLocalClient> client, std::shared_ptr<Device> device) {
   tensorflow::profiler::TraceMe traceme("PyLocalBuffer::FromLiterals");
   VLOG(1) << "PyLocalBuffer::FromLiterals: shape: " << tuple_shape.ToString()
-          << " device ordinal: " << device_ordinal;
-  TF_RETURN_IF_ERROR(client->CheckDeviceOrdinal(device_ordinal,
-                                                "PyLocalBuffer::FromLiterals"));
-  DeviceState* device = &client->device_state(device_ordinal);
+          << " device: " << device->DebugString();
+  TF_ASSIGN_OR_RETURN(LocalDeviceState * local_device,
+                      device->GetLocalDeviceState());
   TransferManager* transfer_manager =
       client->client()->backend().transfer_manager();
   se::DeviceMemoryAllocator* allocator = client->allocator();
   TF_ASSIGN_OR_RETURN(
       Shape compact_shape,
       transfer_manager->ChooseCompactLayoutForShape(tuple_shape));
-  TF_ASSIGN_OR_RETURN(ScopedShapedBuffer scoped_buffer,
-                      transfer_manager->AllocateScopedShapedBuffer(
-                          compact_shape, allocator, device_ordinal));
+  TF_ASSIGN_OR_RETURN(
+      ScopedShapedBuffer scoped_buffer,
+      transfer_manager->AllocateScopedShapedBuffer(
+          compact_shape, allocator, local_device->device_ordinal()));
 
   // Make the host to device stream wait for the newly allocated buffer to be
   // available on the compute stream. We schedule this wait synchronously; while
@@ -331,8 +332,9 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::FromLiterals(
   // computations that depend on this transfer being enqueued on the compute
   // stream.
   if (!transfer_manager->CanShapedBufferBeAccessedNow(
-          device->host_to_device_stream()->parent(), scoped_buffer)) {
-    device->host_to_device_stream()->ThenWaitFor(device->compute_stream());
+          local_device->host_to_device_stream()->parent(), scoped_buffer)) {
+    local_device->host_to_device_stream()->ThenWaitFor(
+        local_device->compute_stream());
   }
 
   std::shared_ptr<BufferDefinitionEvent> definition_event =
@@ -344,16 +346,15 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::FromLiterals(
   // TODO(makro): Use move capture once C++ 14 features are available.
   auto leaves = std::make_shared<std::vector<BorrowingLiteral>>(
       std::move(leaves_literals));
-  auto transfer_h2d = [client, transfer_manager, device, device_ordinal,
-                       device_buffer, compact_shape, leaves,
-                       leaves_reference]() {
+  auto transfer_h2d = [client, transfer_manager, local_device, device_buffer,
+                       compact_shape, leaves, leaves_reference]() {
     // This function uses TF_CHECK_OK and ValueOrDie() since we have no way to
     // report failures from a callback. However, the operations here are
     // unlikely to fail and not recoverable even if we were to fail: DMAs to
     // memory that has already been allocated, and a possible Event allocation.
     ShapedBuffer buffer = device_buffer->AsShapedBuffer(compact_shape);
     TF_CHECK_OK(transfer_manager->WriteTupleIndexTablesAsync(
-        device->host_to_device_stream(), buffer));
+        local_device->host_to_device_stream(), buffer));
     std::vector<std::shared_ptr<void>> staging_buffers;
     staging_buffers.reserve(leaves->size());
     auto it = leaves->begin();
@@ -363,7 +364,7 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::FromLiterals(
       ShapedBuffer leaf(
           indexed_shape.shape,
           transfer_manager->HostShapeToDeviceShape(indexed_shape.shape),
-          client->client()->platform(), device_ordinal);
+          client->client()->platform(), local_device->device_ordinal());
       leaf.buffers().CopySubtreeFrom(buffer.buffers(), indexed_shape.index, {});
 
       // If applicable on the backend, stage the transfer via host memory
@@ -379,51 +380,53 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::FromLiterals(
         BorrowingLiteral literal(static_cast<const char*>(staging_buffer.get()),
                                  it->shape());
         TF_CHECK_OK(transfer_manager->TransferLiteralToDeviceAsync(
-            device->host_to_device_stream(), literal, leaf));
+            local_device->host_to_device_stream(), literal, leaf));
         staging_buffers.push_back(std::move(staging_buffer));
       } else {
         // Otherwise, just transfer the literal.
         TF_CHECK_OK(transfer_manager->TransferLiteralToDeviceAsync(
-            device->host_to_device_stream(), *it, leaf));
+            local_device->host_to_device_stream(), *it, leaf));
       }
       ++it;
     }
 
     EventPool::Handle event =
-        device->event_pool()
-            .ThenAllocateAndRecordEvent(device->host_to_device_stream())
+        local_device->event_pool()
+            .ThenAllocateAndRecordEvent(local_device->host_to_device_stream())
             .ValueOrDie();
 
     // Sets the buffer definition event. Note: this has the side effect of
     // unblocking any host threads that may have been waiting to consume the
     // buffer.
     device_buffer->definition_event()->SetDefinitionEvent(
-        std::move(event), device->host_to_device_stream());
+        std::move(event), local_device->host_to_device_stream());
 
-    if (device->synchronous_deallocation()) {
-      device->ThenRelease(device->host_to_device_stream(), device_buffer);
+    if (local_device->synchronous_deallocation()) {
+      local_device->ThenRelease(local_device->host_to_device_stream(),
+                                device_buffer);
     }
 
-    device->ThenRelease(
-        device->host_to_device_stream(),
+    local_device->ThenRelease(
+        local_device->host_to_device_stream(),
         std::make_pair(leaves_reference, std::move(staging_buffers)));
   };
   client->h2d_transfer_pool()->Schedule(transfer_h2d);
-  return absl::make_unique<PyLocalBuffer>(
-      compact_shape, std::move(device_buffer), std::move(client));
+  return absl::make_unique<PyLocalBuffer>(compact_shape,
+                                          std::move(device_buffer),
+                                          std::move(client), std::move(device));
 }
 
 /* static */ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::MakeTuple(
     const std::vector<PyLocalBuffer*> buffers,
-    std::shared_ptr<PyLocalClient> client, int device_ordinal) {
-  TF_RETURN_IF_ERROR(
-      client->CheckDeviceOrdinal(device_ordinal, "PyLocalBuffer::MakeTuple"));
+    std::shared_ptr<PyLocalClient> client, std::shared_ptr<Device> device) {
+  TF_ASSIGN_OR_RETURN(LocalDeviceState * local_device,
+                      device->GetLocalDeviceState());
   std::vector<Shape> host_shapes;
   std::vector<std::shared_ptr<SharedDeviceBuffer>> device_buffers;
   host_shapes.reserve(buffers.size());
   device_buffers.reserve(buffers.size());
   for (const PyLocalBuffer* buffer : buffers) {
-    TF_RET_CHECK(buffer->device_ordinal() == device_ordinal);
+    TF_RET_CHECK(buffer->device().get() == device.get());
     std::shared_ptr<SharedDeviceBuffer> device_buffer = buffer->DeviceBuffer();
     if (!device_buffer) {
       return InvalidArgument(
@@ -436,45 +439,48 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::FromLiterals(
   se::DeviceMemoryAllocator* allocator = client->allocator();
   TransferManager* transfer_manager =
       client->client()->backend().transfer_manager();
-  DeviceState& device = client->device_state(device_ordinal);
 
   auto definition_event = std::make_shared<BufferDefinitionEvent>();
-  TF_ASSIGN_OR_RETURN(
-      std::shared_ptr<SharedDeviceBuffer> tuple_buffer,
-      SharedDeviceBuffer::MakeTuple(device_buffers, transfer_manager, allocator,
-                                    device_ordinal, definition_event));
+  TF_ASSIGN_OR_RETURN(std::shared_ptr<SharedDeviceBuffer> tuple_buffer,
+                      SharedDeviceBuffer::MakeTuple(
+                          device_buffers, transfer_manager, allocator,
+                          local_device->device_ordinal(), definition_event));
   auto buffer = absl::make_unique<PyLocalBuffer>(
-      ShapeUtil::MakeTupleShape(host_shapes), tuple_buffer, std::move(client));
+      ShapeUtil::MakeTupleShape(host_shapes), tuple_buffer, std::move(client),
+      std::move(device));
 
   // TODO(phawkins): extend TransferManager so we do not need to form a full
   // ShapedBuffer just to write the root tuple index table.
   TF_ASSIGN_OR_RETURN(ShapedBuffer shaped_buffer, buffer->AsShapedBuffer());
   if (!transfer_manager->CanShapedBufferBeAccessedNow(
-          device.host_to_device_stream()->parent(), shaped_buffer)) {
+          local_device->host_to_device_stream()->parent(), shaped_buffer)) {
     // Wait for the compute stream so that memory allocations are synchronized.
-    device.host_to_device_stream()->ThenWaitFor(device.compute_stream());
+    local_device->host_to_device_stream()->ThenWaitFor(
+        local_device->compute_stream());
   }
   TF_RETURN_IF_ERROR(transfer_manager->WriteRootTupleIndexTable(
-      device.host_to_device_stream(), shaped_buffer));
+      local_device->host_to_device_stream(), shaped_buffer));
 
   TF_ASSIGN_OR_RETURN(EventPool::Handle event,
-                      device.event_pool().ThenAllocateAndRecordEvent(
-                          device.host_to_device_stream()));
+                      local_device->event_pool().ThenAllocateAndRecordEvent(
+                          local_device->host_to_device_stream()));
   definition_event->SetDefinitionEvent(std::move(event),
-                                       device.host_to_device_stream());
+                                       local_device->host_to_device_stream());
 
-  if (device.synchronous_deallocation()) {
-    device.ThenRelease(device.host_to_device_stream(), std::move(tuple_buffer));
+  if (local_device->synchronous_deallocation()) {
+    local_device->ThenRelease(local_device->host_to_device_stream(),
+                              std::move(tuple_buffer));
   }
   return buffer;
 }
 
 PyLocalBuffer::PyLocalBuffer(Shape on_host_shape,
                              std::shared_ptr<SharedDeviceBuffer> device_buffer,
-                             std::shared_ptr<PyLocalClient> client)
+                             std::shared_ptr<PyLocalClient> client,
+                             std::shared_ptr<Device> device)
     : client_(std::move(client)),
       on_host_shape_(std::move(on_host_shape)),
-      device_ordinal_(device_buffer->device_ordinal()),
+      device_(std::move(device)),
       device_buffer_(std::move(device_buffer)) {}
 
 void PyLocalBuffer::Delete() {
@@ -499,8 +505,7 @@ Status PyLocalBuffer::CopyToHostAsync() {
     }
     host_value = host_value_ = std::make_shared<HostValue>();
   }
-  se::Stream* stream =
-      client_->device_state(device_ordinal_).device_to_host_stream();
+  se::Stream* stream = device_->local_device_state()->GetDeviceToHostStream();
   WaitForBufferDefinitionEventsOnStream(*device_buffer, stream);
   host_value->value = std::make_shared<Literal>(on_host_shape_);
   TF_ASSIGN_OR_RETURN(ShapedBuffer shaped_buffer, AsShapedBuffer());
@@ -564,36 +569,38 @@ PyLocalBuffer::DestructureTuple() {
   for (int64 i = 0; i < num_children; ++i) {
     results.push_back(absl::make_unique<PyLocalBuffer>(
         on_host_shape_.tuple_shapes(i), device_buffer_->children().at(i),
-        client_));
+        client_, device_));
   }
   return results;
 }
 
 StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::CopyToDevice(
-    int dst_device_ordinal) {
+    std::shared_ptr<Device> dst_device) {
   tensorflow::profiler::TraceMe traceme("PyLocalBuffer::CopyToDevice");
   std::shared_ptr<SharedDeviceBuffer> src_device_buffer = DeviceBuffer();
-  if (dst_device_ordinal == device_ordinal_) {
-    return absl::make_unique<PyLocalBuffer>(on_host_shape_, src_device_buffer,
-                                            client_);
-  }
-  int transfer_device_ordinal = client_->EnqueueD2DTransfersOnSrcStream()
-                                    ? device_ordinal_
-                                    : dst_device_ordinal;
-  DeviceState& transfer_device = client_->device_state(transfer_device_ordinal);
-  const DeviceState& dst_device = client_->device_state(dst_device_ordinal);
+  TF_ASSIGN_OR_RETURN(LocalDeviceState * dst_local_device,
+                      dst_device->GetLocalDeviceState());
 
-  se::Stream* transfer_stream = transfer_device.GetDeviceToDeviceStream();
+  if (dst_device.get() == device_.get()) {
+    return absl::make_unique<PyLocalBuffer>(on_host_shape_, src_device_buffer,
+                                            client_, device_);
+  }
+  LocalDeviceState* transfer_local_device =
+      client_->EnqueueD2DTransfersOnSrcStream() ? device_->local_device_state()
+                                                : dst_local_device;
+
+  se::Stream* transfer_stream =
+      transfer_local_device->GetDeviceToDeviceStream();
 
   TransferManager* transfer_manager =
       client_->client()->backend().transfer_manager();
-  TF_ASSIGN_OR_RETURN(
-      ScopedShapedBuffer dst_buffer,
-      transfer_manager->AllocateScopedShapedBuffer(
-          on_host_shape_, client_->allocator(), dst_device_ordinal));
+  TF_ASSIGN_OR_RETURN(ScopedShapedBuffer dst_buffer,
+                      transfer_manager->AllocateScopedShapedBuffer(
+                          on_host_shape_, client_->allocator(),
+                          dst_local_device->device_ordinal()));
   if (!transfer_manager->CanShapedBufferBeAccessedNow(
-          dst_device.compute_stream()->parent(), dst_buffer)) {
-    transfer_stream->ThenWaitFor(dst_device.compute_stream());
+          dst_local_device->compute_stream()->parent(), dst_buffer)) {
+    transfer_stream->ThenWaitFor(dst_local_device->compute_stream());
   }
   TF_ASSIGN_OR_RETURN(ShapedBuffer src_buffer, AsShapedBuffer());
 
@@ -607,37 +614,39 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalBuffer::CopyToDevice(
     TF_RET_CHECK(input_buffer.size() == output_buffer.size())
         << "input: " << input_buffer.size()
         << " output: " << output_buffer.size();
-    TF_RETURN_IF_ERROR(transfer_device.ThenMemcpyDeviceToDevice(
-        transfer_stream, dst_device.compute_stream(), input_buffer,
+    TF_RETURN_IF_ERROR(transfer_local_device->ThenMemcpyDeviceToDevice(
+        transfer_stream, dst_local_device->compute_stream(), input_buffer,
         output_buffer));
   }
 
   // We hold on to the `src_device_buffer` until the transfer is finished.
-  transfer_device.ThenRelease(transfer_stream, std::move(src_device_buffer));
+  transfer_local_device->ThenRelease(transfer_stream,
+                                     std::move(src_device_buffer));
 
   // Write new tuple buffers. The destination buffers have different addresses,
   // so we must construct tuple buffers from scratch instead of copying them.
   if (dst_buffer.on_device_shape().IsTuple()) {
     TF_RETURN_IF_ERROR(transfer_manager->WriteTupleIndexTablesAsync(
-        dst_device.host_to_device_stream(), dst_buffer));
+        dst_local_device->host_to_device_stream(), dst_buffer));
 
     // We need a single definition event, so make the device to device stream
     // wait for the stream that wrote the tuple index tables on the destination
     // device.
-    transfer_stream->ThenWaitFor(dst_device.host_to_device_stream());
+    transfer_stream->ThenWaitFor(dst_local_device->host_to_device_stream());
   }
 
   auto definition_event = std::make_shared<BufferDefinitionEvent>();
   TF_ASSIGN_OR_RETURN(
       EventPool::Handle event,
-      transfer_device.event_pool().ThenAllocateAndRecordEvent(transfer_stream));
+      transfer_local_device->event_pool().ThenAllocateAndRecordEvent(
+          transfer_stream));
   definition_event->SetDefinitionEvent(std::move(event), transfer_stream);
 
   std::shared_ptr<SharedDeviceBuffer> dst_device_buffer =
       SharedDeviceBuffer::FromScopedShapedBuffer(std::move(dst_buffer),
                                                  definition_event);
   return absl::make_unique<PyLocalBuffer>(
-      on_host_shape_, std::move(dst_device_buffer), client_);
+      on_host_shape_, std::move(dst_device_buffer), client_, dst_device);
 }
 
 Status PyLocalBuffer::BlockHostUntilReady() {
@@ -652,7 +661,7 @@ Status PyLocalBuffer::BlockHostUntilReady() {
   // be an issue, we could either use a separate stream for this purpose, or
   // poll for the buffer definition events.
   se::Stream* stream = client_->device_state(device_buffer->device_ordinal())
-                           .device_to_host_stream();
+                           .GetDeviceToHostStream();
   WaitForBufferDefinitionEventsOnStream(*device_buffer, stream);
   return stream->BlockHostUntilDone();
 }
@@ -694,7 +703,7 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalExecutable::ExecuteHelper(
   const int device_id = (*device_assignment_)(replica, 0);
   std::shared_ptr<Device> device = LookupDevice(*client_, device_id);
   CHECK_EQ(device->host_id(), client_->host_id());
-  int device_ordinal = device->local_device_ordinal();
+  int device_ordinal = device->local_device_state()->device_ordinal();
   tensorflow::profiler::TraceMe traceme("LocalExecutable::Execute");
   VLOG(3) << "Replica " << replica
           << " mapped to device ordinal for execution: " << device_ordinal;
@@ -729,7 +738,7 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalExecutable::ExecuteHelper(
             << " buffer: " << argument_buffers.back().ToString();
   }
 
-  DeviceState* device_state = &client_->device_state(device_ordinal);
+  LocalDeviceState* device_state = &client_->device_state(device_ordinal);
   // The choice of where we wait is arbitrary; the reason for the wait is pacing
   // to avoid problems such as memory fragmentation and running ahead too far,
   // not for correctness. Placing it before the executable launch allows the
@@ -782,7 +791,7 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalExecutable::ExecuteHelper(
       device_state->compute_stream(),
       std::make_tuple(executable_, compute_reservation, device_assignment_));
   return absl::make_unique<PyLocalBuffer>(on_host_shape, std::move(out_buffer),
-                                          client_);
+                                          client_, device);
 }
 
 StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalExecutable::Execute(
@@ -833,8 +842,7 @@ PyLocalExecutable::ExecutePerReplica(
     for (int i = 0; i < num_local_replicas; ++i) {
       const int replica = local_replicas_[i];
       std::shared_ptr<Device> device = local_devices_[i];
-      const DeviceState& device_state =
-          client_->device_state(device->local_device_ordinal());
+      const LocalDeviceState& device_state = *device->local_device_state();
       device_state.execute_thread()->Schedule([&, replica, i] {
         results[i] = ExecuteHelper(argument_handles[i], replica, run_id);
 
diff --git a/tensorflow/compiler/xla/python/local_client.h b/tensorflow/compiler/xla/python/local_client.h
index 3f13f62241f..e0a21ad6f1e 100644
--- a/tensorflow/compiler/xla/python/local_client.h
+++ b/tensorflow/compiler/xla/python/local_client.h
@@ -27,7 +27,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/executable_build_options.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
-#include "tensorflow/compiler/xla/python/device_state.h"
+#include "tensorflow/compiler/xla/python/local_device_state.h"
 #include "tensorflow/compiler/xla/python/shared_device_buffer.h"
 #include "tensorflow/compiler/xla/service/computation_placer.h"
 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
@@ -43,10 +43,10 @@ class PyLocalExecutable;
 
 class Device {
  public:
-  explicit Device(int id, int local_device_ordinal,
+  explicit Device(int id, std::unique_ptr<LocalDeviceState> local_device_state,
                   absl::string_view platform_name, int host_id = 0)
       : id_(id),
-        local_device_ordinal_(local_device_ordinal),
+        local_device_state_(std::move(local_device_state)),
         host_id_(host_id),
         platform_name_(platform_name) {}
   virtual ~Device() {}
@@ -56,13 +56,17 @@ class Device {
   // hosts' devices.  This is the ID that should be used in a DeviceAssignment.
   int id() const { return id_; }
 
-  // If this is a device local to this host, the local index of this device as
-  // according to the underlying backend. Unlike id(), this will always be in
-  // the range [0, num_local_devices), and can be used with the xla::LocalClient
-  // and xla::Backend APIs.
-  //
-  // -1 if this device is not local to this host.
-  int local_device_ordinal() const { return local_device_ordinal_; }
+  // If this is a device local to this host, returns a LocalDeviceState object
+  // that can be used to manipulate the device. Returns nullptr if the device is
+  // not local to this host.
+  LocalDeviceState* local_device_state() const {
+    return local_device_state_.get();
+  }
+
+  // If this is a device local to this host, returns a LocalDeviceState object
+  // that can be used to manipulate the device. Returns an error if the device
+  // is not local to this host.
+  StatusOr<LocalDeviceState*> GetLocalDeviceState() const;
 
   // The ID of this device's host. This is always 0 on single-host platforms.
   int host_id() const { return host_id_; }
@@ -73,7 +77,7 @@ class Device {
 
  private:
   const int id_;
-  const int local_device_ordinal_;
+  const std::unique_ptr<LocalDeviceState> local_device_state_;
   const int host_id_;
   const std::string platform_name_;
 };
@@ -123,13 +127,14 @@ class PyLocalClient {
   explicit PyLocalClient(
       std::string platform_name, LocalClient* client,
       std::vector<std::shared_ptr<Device>> devices, int host_id,
-      std::vector<std::unique_ptr<DeviceState>> device_states,
       std::unique_ptr<se::DeviceMemoryAllocator> allocator,
       std::unique_ptr<tensorflow::Allocator> host_memory_allocator);
   virtual ~PyLocalClient() = default;
 
-  Status TransferToInfeed(const LiteralSlice& literal, int device_ordinal);
-  StatusOr<Literal> TransferFromOutfeed(const Shape& shape, int device_ordinal);
+  Status TransferToInfeed(const LiteralSlice& literal,
+                          std::shared_ptr<Device> device);
+  StatusOr<Literal> TransferFromOutfeed(const Shape& shape,
+                                        std::shared_ptr<Device> device);
 
   virtual StatusOr<DeviceAssignment> GetDefaultDeviceAssignment(
       int num_replicas) const;
@@ -146,8 +151,8 @@ class PyLocalClient {
   int host_id() const { return host_id_; }
   const std::string& platform_name() const { return platform_name_; }
 
-  DeviceState& device_state(int device_ordinal) const {
-    return *device_states_.at(device_ordinal);
+  LocalDeviceState& device_state(int device_ordinal) const {
+    return *local_devices_.at(device_ordinal)->local_device_state();
   }
 
   LocalClient* client() const { return client_; }
@@ -178,10 +183,6 @@ class PyLocalClient {
       const std::string& serialized,
       std::shared_ptr<PyLocalClient> this_shared) const;
 
-  // Returns a bad status containing `caller_name` if `device_ordinal` doesn't
-  // correspond to a local device.
-  Status CheckDeviceOrdinal(int device_ordinal, absl::string_view caller_name);
-
  protected:
   std::string platform_name_;
   LocalClient* client_;
@@ -194,8 +195,6 @@ class PyLocalClient {
   std::vector<std::shared_ptr<Device>> local_devices_;
   int host_id_;
 
-  // Device states local to this host. Indexed by local device ordinal.
-  std::vector<std::unique_ptr<DeviceState>> device_states_;
   se::DeviceMemoryAllocator* allocator_;
   std::unique_ptr<se::DeviceMemoryAllocator> owned_allocator_;
 
@@ -219,16 +218,16 @@ class PyLocalBuffer {
   static StatusOr<std::unique_ptr<PyLocalBuffer>> FromLiterals(
       std::vector<BorrowingLiteral> leaves_literals, const Shape& tuple_shape,
       std::shared_ptr<void> leaves_reference,
-      std::shared_ptr<PyLocalClient> client, int device_ordinal);
+      std::shared_ptr<PyLocalClient> client, std::shared_ptr<Device> device);
 
   static StatusOr<std::unique_ptr<PyLocalBuffer>> MakeTuple(
       const std::vector<PyLocalBuffer*> buffers,
-      std::shared_ptr<PyLocalClient> client, int device_ordinal);
+      std::shared_ptr<PyLocalClient> client, std::shared_ptr<Device> device);
 
-  PyLocalBuffer() = default;
   PyLocalBuffer(Shape on_host_shape,
                 std::shared_ptr<SharedDeviceBuffer> device_buffer,
-                std::shared_ptr<PyLocalClient> client);
+                std::shared_ptr<PyLocalClient> client,
+                std::shared_ptr<Device> device);
 
   PyLocalBuffer(const PyLocalBuffer&) = delete;
   PyLocalBuffer(PyLocalBuffer&&) = delete;
@@ -236,7 +235,7 @@ class PyLocalBuffer {
   PyLocalBuffer& operator=(PyLocalBuffer&&) = delete;
 
   const Shape& on_host_shape() const { return on_host_shape_; }
-  int device_ordinal() const { return device_ordinal_; }
+  std::shared_ptr<Device> device() const { return device_; }
   const std::string& platform_name() const { return client_->platform_name(); }
   std::shared_ptr<PyLocalClient> client() const { return client_; }
 
@@ -266,8 +265,9 @@ class PyLocalBuffer {
   // Destructures a tuple-valued PyLocalBuffer into its constituent elements.
   StatusOr<std::vector<std::unique_ptr<PyLocalBuffer>>> DestructureTuple();
 
-  // Copies the buffer to device `dst_device_ordinal`.
-  StatusOr<std::unique_ptr<PyLocalBuffer>> CopyToDevice(int dst_device_ordinal);
+  // Copies the buffer to device `dst_device`.
+  StatusOr<std::unique_ptr<PyLocalBuffer>> CopyToDevice(
+      std::shared_ptr<Device> dst_device);
 
   // Blocks the host until the buffer's value has been computed and is ready for
   // immediate use on the device. Useful in particular for timing benchmarks.
@@ -276,7 +276,7 @@ class PyLocalBuffer {
  private:
   const std::shared_ptr<PyLocalClient> client_;
   const Shape on_host_shape_;
-  const int device_ordinal_;
+  const std::shared_ptr<Device> device_;
   mutable absl::Mutex mu_;
   std::shared_ptr<SharedDeviceBuffer> device_buffer_ GUARDED_BY(mu_);
 
diff --git a/tensorflow/compiler/xla/python/device_state.cc b/tensorflow/compiler/xla/python/local_device_state.cc
similarity index 72%
rename from tensorflow/compiler/xla/python/device_state.cc
rename to tensorflow/compiler/xla/python/local_device_state.cc
index 3403d882e92..0373d4b642b 100644
--- a/tensorflow/compiler/xla/python/device_state.cc
+++ b/tensorflow/compiler/xla/python/local_device_state.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/device_state.h"
+#include "tensorflow/compiler/xla/python/local_device_state.h"
 
 #include <memory>
 #include <vector>
@@ -24,20 +24,25 @@ limitations under the License.
 
 namespace xla {
 
-DeviceState::DeviceState(se::StreamExecutor* executor,
-                         bool synchronous_deallocation, bool asynchronous,
-                         bool allow_event_reuse)
+LocalDeviceState::LocalDeviceState(se::StreamExecutor* executor,
+                                   bool synchronous_deallocation,
+                                   bool asynchronous, bool allow_event_reuse)
     : synchronous_deallocation_(synchronous_deallocation),
       event_pool_(allow_event_reuse),
-      compute_semaphore_(/*capacity=*/asynchronous ? 32 : 1) {
+      compute_semaphore_(/*capacity=*/asynchronous ? 32 : 1),
+      executor_(executor) {
   compute_stream_ = absl::make_unique<se::Stream>(executor);
   host_to_device_stream_ = absl::make_unique<se::Stream>(executor);
-  device_to_host_stream_ = absl::make_unique<se::Stream>(executor);
   callback_stream_ = absl::make_unique<se::Stream>(executor);
   compute_stream_->Init();
   host_to_device_stream_->Init();
-  device_to_host_stream_->Init();
   callback_stream_->Init();
+  device_to_host_streams_.reserve(kNumDeviceToHostStreams);
+  for (int i = 0; i < kNumDeviceToHostStreams; ++i) {
+    auto stream = absl::make_unique<se::Stream>(executor);
+    stream->Init();
+    device_to_host_streams_.push_back(std::move(stream));
+  }
   device_to_device_streams_.reserve(kNumDeviceToDeviceStreams);
   for (int i = 0; i < kNumDeviceToDeviceStreams; ++i) {
     auto stream = absl::make_unique<se::Stream>(executor);
@@ -50,14 +55,14 @@ DeviceState::DeviceState(se::StreamExecutor* executor,
                                                      "py_xla_callback");
 }
 
-DeviceState::~DeviceState() {
+LocalDeviceState::~LocalDeviceState() {
   Status status = SynchronizeAllActivity();
   if (!status.ok()) {
     LOG(ERROR) << "Error when closing device: " << status;
   }
 }
 
-Status DeviceState::SynchronizeAllActivity() {
+Status LocalDeviceState::SynchronizeAllActivity() {
   Status status;
   // TODO(phawkins): in theory the call to SynchronizeAllActivity below should
   // suffice. However on the Host platform SynchronizeAllActivity is a dummy
@@ -73,10 +78,9 @@ Status DeviceState::SynchronizeAllActivity() {
   return status;
 }
 
-Status DeviceState::ThenMemcpyDeviceToDevice(se::Stream* transfer_stream,
-                                             se::Stream* dst_stream,
-                                             se::DeviceMemoryBase src_buffer,
-                                             se::DeviceMemoryBase dst_buffer) {
+Status LocalDeviceState::ThenMemcpyDeviceToDevice(
+    se::Stream* transfer_stream, se::Stream* dst_stream,
+    se::DeviceMemoryBase src_buffer, se::DeviceMemoryBase dst_buffer) {
   // The default implementation simply calls ThenMemcpyD2D, and assumes that
   // the buffer addresses identify the devices. This does not work
   // on all platforms; this method is virtual so it can be overridden.
@@ -84,14 +88,22 @@ Status DeviceState::ThenMemcpyDeviceToDevice(se::Stream* transfer_stream,
   return Status::OK();
 }
 
-void DeviceState::ThenExecuteOnCallbackThread(
+void LocalDeviceState::ThenExecuteOnCallbackThread(
     se::Stream* stream, std::function<void()> callback) const {
   stream->ThenDoHostCallback([this, callback]() mutable {
     callback_thread_->Schedule(std::move(callback));
   });
 }
 
-se::Stream* DeviceState::GetDeviceToDeviceStream() {
+se::Stream* LocalDeviceState::GetDeviceToHostStream() {
+  absl::MutexLock lock(&mu_);
+  int i = next_device_to_host_stream_;
+  next_device_to_host_stream_ =
+      (next_device_to_host_stream_ + 1) % device_to_host_streams_.size();
+  return device_to_host_streams_.at(i).get();
+}
+
+se::Stream* LocalDeviceState::GetDeviceToDeviceStream() {
   absl::MutexLock lock(&mu_);
   int i = next_device_to_device_stream_;
   next_device_to_device_stream_ =
diff --git a/tensorflow/compiler/xla/python/device_state.h b/tensorflow/compiler/xla/python/local_device_state.h
similarity index 82%
rename from tensorflow/compiler/xla/python/device_state.h
rename to tensorflow/compiler/xla/python/local_device_state.h
index 3772c03fc59..7348b9c59f0 100644
--- a/tensorflow/compiler/xla/python/device_state.h
+++ b/tensorflow/compiler/xla/python/local_device_state.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DEVICE_STATE_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_DEVICE_STATE_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_DEVICE_STATE_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_DEVICE_STATE_H_
 
 #include <memory>
 #include <vector>
@@ -29,9 +29,9 @@ limitations under the License.
 namespace xla {
 
 // Class that encapsulates state relating to a device (e.g., a GPU) on which we
-// can perform computation and transfers. DeviceState objects only exist for
-// devices local to this host.
-class DeviceState {
+// can perform computation and transfers. LocalDeviceState objects only exist
+// for devices local to this host.
+class LocalDeviceState {
  public:
   // If synchronous_deallocation is true, the host must not free buffers until
   // compute/transfers that use those buffers have completed. For example, this
@@ -40,9 +40,12 @@ class DeviceState {
   //
   // If asynchronous is false, the host will synchronize to the device after
   // each execution or transfer. This is intended for debugging only.
-  DeviceState(se::StreamExecutor* executor, bool synchronous_deallocation,
-              bool asynchronous, bool allow_event_reuse);
-  virtual ~DeviceState();
+  LocalDeviceState(se::StreamExecutor* executor, bool synchronous_deallocation,
+                   bool asynchronous, bool allow_event_reuse);
+  virtual ~LocalDeviceState();
+
+  // StreamExecutor (local) device ordinal.
+  int device_ordinal() const { return executor_->device_ordinal(); }
 
   bool synchronous_deallocation() const { return synchronous_deallocation_; }
 
@@ -52,9 +55,10 @@ class DeviceState {
   se::Stream* host_to_device_stream() const {
     return host_to_device_stream_.get();
   }
-  se::Stream* device_to_host_stream() const {
-    return device_to_host_stream_.get();
-  }
+
+  // Returns a device to host stream. Allocates streams in a round-robin fashion
+  // amongst the available streams.
+  se::Stream* GetDeviceToHostStream();
 
   // Returns a device to device stream. Allocates streams in a round-robin
   // fashion amongst the available streams.
@@ -104,15 +108,18 @@ class DeviceState {
   // stream by the host ahead of the device.
   Semaphore compute_semaphore_;
 
+  se::StreamExecutor* executor_;
   std::unique_ptr<se::Stream> compute_stream_;
   std::unique_ptr<se::Stream> host_to_device_stream_;
-  std::unique_ptr<se::Stream> device_to_host_stream_;
+  std::vector<std::unique_ptr<se::Stream>> device_to_host_streams_;
   std::vector<std::unique_ptr<se::Stream>> device_to_device_streams_;
 
-  // Number of device-to-device streams to create in the multistream case.
+  // Number of device-to-host and device-to-device streams.
+  static constexpr int kNumDeviceToHostStreams = 4;
   static constexpr int kNumDeviceToDeviceStreams = 4;
 
   absl::Mutex mu_;
+  int next_device_to_host_stream_ GUARDED_BY(mu_) = 0;
   int next_device_to_device_stream_ GUARDED_BY(mu_) = 0;
 
   // Callback stream is used for running short host-side callbacks after device
@@ -132,4 +139,4 @@ class DeviceState {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_DEVICE_STATE_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_DEVICE_STATE_H_
diff --git a/tensorflow/compiler/xla/python/tpu_driver/BUILD b/tensorflow/compiler/xla/python/tpu_driver/BUILD
index 99a07c31256..b796fe8c541 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/BUILD
+++ b/tensorflow/compiler/xla/python/tpu_driver/BUILD
@@ -31,11 +31,6 @@ tf_proto_library_cc(
     use_grpc_namespace = True,
 )
 
-cc_library(
-    name = "c_api",
-    hdrs = ["c_api.h"],
-)
-
 cc_library(
     name = "tpu_driver",
     srcs = [
@@ -66,6 +61,7 @@ cc_library(
     hdrs = ["grpc_tpu_driver.h"],
     deps = [
         ":tpu_driver",
+        "//tensorflow:grpc++",
         "//tensorflow/core/platform:logging",
         "//tensorflow/compiler/xla:status",
         "//tensorflow/compiler/xla:util",
@@ -77,6 +73,25 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "external_tpu_driver",
+    srcs = ["external_tpu_driver.cc"],
+    deps = [
+        ":tpu_driver",
+        "@com_google_absl//absl/strings:str_format",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/compiler/xla:status",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "//tensorflow/compiler/xla/service:hlo_proto_cc",
+        ":tpu_service_proto_cc",
+        ":tpu_driver_proto_cc",
+        "//tensorflow/compiler/xla/python/tpu_driver/client:c_api",
+    ] + external_deps(),
+    alwayslink = 1,
+)
+
 cc_library(
     name = "recording_tpu_driver",
     srcs = [
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/BUILD b/tensorflow/compiler/xla/python/tpu_driver/client/BUILD
index d5d492de054..932bee43ffc 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/BUILD
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/BUILD
@@ -19,7 +19,6 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/compiler/xla/client:executable_build_options",
-        "//tensorflow/compiler/xla/python:device_state",
         "//tensorflow/compiler/xla/python:local_client",
         "//tensorflow/compiler/xla/python:semaphore",
         "//tensorflow/compiler/xla/python/tpu_driver",
@@ -76,3 +75,8 @@ py_library(
         "//third_party/py/numpy",
     ],
 )
+
+cc_library(
+    name = "c_api",
+    hdrs = ["c_api.h"],
+)
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/c_api.h b/tensorflow/compiler/xla/python/tpu_driver/client/c_api.h
new file mode 100644
index 00000000000..8c967d6e0a1
--- /dev/null
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/c_api.h
@@ -0,0 +1,227 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_
+
+#include <stdint.h>
+
+#define TPUDRIVER_CAPI_EXPORT __attribute__((visibility("default")))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct TpuDriverFn;
+
+typedef struct TpuDriver TpuDriver;
+
+typedef struct TpuEvent TpuEvent;
+
+typedef struct TpuBufferHandleInternal TpuBufferHandleInternal;
+
+typedef struct TpuCompiledProgramHandleInternal
+    TpuCompiledProgramHandleInternal;
+
+typedef struct TpuLoadedProgramHandleInternal TpuLoadedProgramHandleInternal;
+typedef struct HloProtoInternal HloProtoInternal;
+
+typedef struct TpuBufferHandle {
+  TpuBufferHandleInternal* internal_handle;
+  TpuEvent* event;
+  int64_t size_in_bytes;
+} TpuBufferHandle;
+
+typedef struct TpuCompiledProgramHandle {
+  TpuCompiledProgramHandleInternal* internal_handle;
+  TpuEvent* event;
+} TpuCompiledProgramHandle;
+
+typedef struct TpuLoadedProgramHandle {
+  TpuLoadedProgramHandleInternal* internal_handle;
+  TpuEvent* event;
+} TpuLoadedProgramHandle;
+
+typedef struct HloProto {
+  HloProtoInternal* internal_hlo_proto;
+} HloProto;
+
+typedef struct DeviceAssignment {
+  int replica_count;
+  int computation_count;
+} DeviceAssignment;
+
+typedef struct TpuStatus {
+  int32_t code;
+  char* msg;
+} TpuStatus;
+
+typedef struct CompiledProgramShape {
+  struct TpuStatus* status;
+  void* bytes;
+  int32_t size;
+} CompiledProgramShape;
+
+typedef void(PrototypeTpuDriver_Initialize)(struct TpuDriverFn* driver_fn);
+typedef struct TpuDriver*(PrototypeTpuDriver_Open)(const char* worker);
+typedef void(PrototypeTpuDriver_Close)(struct TpuDriver* driver);
+
+// TODO(frankchn): Make this not a hard-coded constant.
+const int32_t MemoryRegion_HBM = 1;
+
+typedef struct TpuCompiledProgramHandle*(PrototypeTpuDriver_CompileProgram)(
+    struct TpuDriver* driver, const struct HloProto hlo_proto,
+    int32_t num_replicas, int32_t eventc, struct TpuEvent** eventv);
+
+typedef struct TpuCompiledProgramHandle*(
+    PrototypeTpuDriver_CompileProgramFromText)(struct TpuDriver* driver,
+                                               const char* hlo_text,
+                                               int32_t num_replicas,
+                                               int32_t eventc,
+                                               struct TpuEvent** eventv);
+
+typedef struct TpuLoadedProgramHandle*(PrototypeTpuDriver_LoadProgram)(
+    struct TpuDriver* driver, int32_t core_id,
+    const struct TpuCompiledProgramHandle* compiled_program_handle,
+    int32_t eventc, struct TpuEvent** eventv);
+
+typedef struct TpuEvent*(PrototypeTpuDriver_UnloadProgram)(
+    struct TpuDriver* driver,
+    struct TpuLoadedProgramHandle* loaded_program_handle, int32_t eventc,
+    struct TpuEvent** eventv);
+
+typedef struct TpuEvent*(PrototypeTpuDriver_ExecuteProgram)(
+    struct TpuDriver* driver, struct TpuLoadedProgramHandle* handle,
+    int32_t inputc, struct TpuBufferHandle** input_buffer_handle,
+    int32_t outputc, struct TpuBufferHandle** output_buffer_handle,
+    struct DeviceAssignment device_assignment, int32_t eventc,
+    struct TpuEvent** eventv);
+
+typedef struct TpuBufferHandle*(PrototypeTpuDriver_AllocateTuple)(
+    struct TpuDriver* driver, int32_t core_id, int32_t memory_region,
+    int32_t bufferc, struct TpuBufferHandle** buffer_handle, int32_t eventc,
+    struct TpuEvent** eventv);
+
+typedef struct TpuBufferHandle*(PrototypeTpuDriver_Allocate)(
+    struct TpuDriver* driver, int32_t core_id, int32_t memory_region,
+    int64_t num_bytes, int32_t eventc, struct TpuEvent** eventv);
+
+typedef struct TpuEvent*(PrototypeTpuDriver_Deallocate)(
+    struct TpuDriver* driver, struct TpuBufferHandle* buffer_handle,
+    int32_t eventc, struct TpuEvent** eventv);
+
+typedef struct TpuEvent*(PrototypeTpuDriver_TransferToDevice)(
+    struct TpuDriver* driver, const void* src, struct TpuBufferHandle* dst,
+    int32_t eventc, struct TpuEvent** eventv);
+
+typedef struct TpuEvent*(PrototypeTpuDriver_TransferFromDevice)(
+    struct TpuDriver* driver, struct TpuBufferHandle* src, void* dst,
+    int32_t eventc, struct TpuEvent** eventv);
+
+typedef struct TpuEvent*(PrototypeTpuDriver_TransferFromDeviceToDevice)(
+    struct TpuDriver* driver, struct TpuBufferHandle* src,
+    struct TpuBufferHandle* dst, int32_t eventc, struct TpuEvent** eventv);
+
+typedef void(PrototypeTpuDriver_CreateDeviceAssignment)(int replica_count,
+                                                        int computation_count);
+
+typedef struct CompiledProgramShape*(
+    PrototypeTpuDriver_GetCompiledProgramShape)(
+    struct TpuCompiledProgramHandle* handle);
+
+typedef void(PrototypeTpuDriver_FreeCompiledProgramShape)(
+    struct CompiledProgramShape* shape);
+
+typedef void(PrototypeTpuDriver_EventAddCallback)(
+    struct TpuEvent* event,
+    void (*callback_fn)(struct TpuStatus*, void* additional_info),
+    void* additional_info);
+
+typedef struct TpuStatus*(PrototypeTpuDriver_EventAwait)(struct TpuEvent* event,
+                                                         int64_t timeout_in_us);
+
+typedef void(PrototypeTpuDriver_FreeEvent)(struct TpuEvent* event);
+
+typedef void(PrototypeTpuDriver_FreeStatus)(struct TpuStatus* status);
+
+typedef const char*(PrototypeTpuDriver_Version)();
+
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_Initialize TpuDriver_Initialize;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_Open TpuDriver_Open;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_Close TpuDriver_Close;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_CompileProgram
+    TpuDriver_CompileProgram;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_CompileProgramFromText
+    TpuDriver_CompileProgramFromText;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_LoadProgram
+    TpuDriver_LoadProgram;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_UnloadProgram
+    TpuDriver_UnloadProgram;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_ExecuteProgram
+    TpuDriver_ExecuteProgram;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_AllocateTuple
+    TpuDriver_AllocateTuple;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_Allocate TpuDriver_Allocate;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_Deallocate TpuDriver_Deallocate;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_TransferToDevice
+    TpuDriver_TransferToDevice;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_TransferFromDevice
+    TpuDriver_TransferFromDevice;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_TransferFromDeviceToDevice
+    TpuDriver_TransferFromDeviceToDevice;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_GetCompiledProgramShape
+    TpuDriver_GetCompiledProgramShape;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_FreeCompiledProgramShape
+    TpuDriver_FreeCompiledProgramShape;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_EventAddCallback
+    TpuDriver_EventAddCallback;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_EventAwait TpuDriver_EventAwait;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_FreeEvent TpuDriver_FreeEvent;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_FreeStatus TpuDriver_FreeStatus;
+TPUDRIVER_CAPI_EXPORT extern PrototypeTpuDriver_Version TpuDriver_Version;
+
+#ifdef __cplusplus
+}
+#endif
+
+struct TpuDriverFn {
+  PrototypeTpuDriver_Open* TpuDriver_Open;                          // NOLINT
+  PrototypeTpuDriver_Close* TpuDriver_Close;                        // NOLINT
+  PrototypeTpuDriver_CompileProgram* TpuDriver_CompileProgram;      // NOLINT
+  PrototypeTpuDriver_CompileProgramFromText*
+      TpuDriver_CompileProgramFromText;                             // NOLINT
+  PrototypeTpuDriver_LoadProgram* TpuDriver_LoadProgram;            // NOLINT
+  PrototypeTpuDriver_UnloadProgram* TpuDriver_UnloadProgram;        // NOLINT
+  PrototypeTpuDriver_ExecuteProgram* TpuDriver_ExecuteProgram;      // NOLINT
+  PrototypeTpuDriver_AllocateTuple* TpuDriver_AllocateTuple;        // NOLINT
+  PrototypeTpuDriver_Allocate* TpuDriver_Allocate;                  // NOLINT
+  PrototypeTpuDriver_Deallocate* TpuDriver_Deallocate;              // NOLINT
+  PrototypeTpuDriver_TransferToDevice* TpuDriver_TransferToDevice;  // NOLINT
+  PrototypeTpuDriver_TransferFromDevice*
+      TpuDriver_TransferFromDevice;  // NOLINT
+  PrototypeTpuDriver_TransferFromDeviceToDevice*
+      TpuDriver_TransferFromDeviceToDevice;                         // NOLINT
+  PrototypeTpuDriver_GetCompiledProgramShape*
+      TpuDriver_GetCompiledProgramShape;  // NOLINT
+  PrototypeTpuDriver_FreeCompiledProgramShape*
+      TpuDriver_FreeCompiledProgramShape;                           // NOLINT
+  PrototypeTpuDriver_EventAddCallback* TpuDriver_EventAddCallback;  // NOLINT
+  PrototypeTpuDriver_EventAwait* TpuDriver_EventAwait;              // NOLINT
+  PrototypeTpuDriver_FreeEvent* TpuDriver_FreeEvent;                // NOLINT
+  PrototypeTpuDriver_FreeStatus* TpuDriver_FreeStatus;              // NOLINT
+  PrototypeTpuDriver_Version* TpuDriver_Version;                    // NOLINT
+};
+
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/c_api_client.c b/tensorflow/compiler/xla/python/tpu_driver/client/c_api_client.c
index 70ab4af85fd..5fabc8380a5 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/c_api_client.c
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/c_api_client.c
@@ -13,15 +13,20 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+// Before you start, make sure c_api.so, c_api.h and and c_api_client.c are in
+// the same working directory.
+//
 // To compile: gcc -o c_api_client c_api_client.c -ldl
-// To run, make sure c_api.so and c_api_client in the same directory, and then
-//   sudo ./c_api_client
+// To run: sudo ./c_api_client
 
 #include <dlfcn.h>
 #include <stdio.h>
 #include <stdlib.h>
 
-int main(int argc, char** argv) {
+#include "c_api.h"
+
+void* LoadAndInitializeDriver(const char* shared_lib,
+                              struct TpuDriverFn* driver_fn) {
   void* handle;
   handle = dlopen("./c_api.so", RTLD_NOW);
   if (!handle) {
@@ -29,21 +34,124 @@ int main(int argc, char** argv) {
     exit(EXIT_FAILURE);
   }
 
-  const char* (*TpuDriver_Version)(void);
-  void (*TpuDriver_Initialize)(void);
-  void (*TpuDriver_Open)(const char* worker);
+  PrototypeTpuDriver_Initialize* initialize_fn;
+  *(void**)(&initialize_fn) = dlsym(handle, "TpuDriver_Initialize");
+  initialize_fn(driver_fn);
 
-  fprintf(stdout, "------ Going to Find Out Version ------\n");
-  *(void**)(&TpuDriver_Version) = dlsym(handle, "TpuDriver_Version");
-  fprintf(stdout, "TPU Driver Version: %s\n", TpuDriver_Version());
+  return handle;
+}
 
-  fprintf(stdout, "------ Going to Initialize ------\n");
-  *(void**)(&TpuDriver_Initialize) = dlsym(handle, "TpuDriver_Initialize");
-  TpuDriver_Initialize();
+int main(int argc, char** argv) {
+  struct TpuDriverFn driver_fn;
+  void* handle = LoadAndInitializeDriver("./c_api.so", &driver_fn);
+
+  fprintf(stdout, "------ Going to Query Version ------\n");
+  fprintf(stdout, "TPU Driver Version: %s\n", driver_fn.TpuDriver_Version());
 
   fprintf(stdout, "------ Going to Open a TPU Driver ------\n");
-  *(void**)(&TpuDriver_Open) = dlsym(handle, "TpuDriver_Open");
-  TpuDriver_Open("local://");
+  struct TpuDriver* driver = driver_fn.TpuDriver_Open("local://");
+
+  // An example of simple program to sum two parameters.
+  const char* hlo_module_text = R"(HloModule add_vec_module
+    ENTRY %add_vec (a: s32[256], b: s32[256]) -> s32[256] {
+      %a = s32[256] parameter(0)
+      %b = s32[256] parameter(1)
+      ROOT %sum = s32[256] add(%a, %b)
+    }
+    )";
+
+  fprintf(stdout, "------ Going to Compile a TPU program ------\n");
+  struct TpuCompiledProgramHandle* cph =
+      driver_fn.TpuDriver_CompileProgramFromText(driver, hlo_module_text,
+      /*num_replicas=*/1, /*eventc=*/0, /*eventv*/NULL);
+
+  fprintf(stdout, "------ Going to Load a TPU program ------\n");
+
+  struct TpuLoadedProgramHandle* lph =
+      driver_fn.TpuDriver_LoadProgram(driver, /*core_id=*/0, cph,
+      /*eventc=*/0, /*eventv=*/NULL);
+
+  const int size = 1024;
+
+  fprintf(stdout, "------ Going to Allocate a TPU Buffer ------\n");
+  struct TpuBufferHandle* buf_a_handle =
+      driver_fn.TpuDriver_Allocate(driver, /*core-id=*/0, /*memory_region=*/1,
+        /*bytes=*/size, /*eventc=*/0, /*eventv=*/NULL);
+  fprintf(stdout, "------ Going to Allocate a TPU Buffer ------\n");
+  struct TpuBufferHandle* buf_b_handle =
+      driver_fn.TpuDriver_Allocate(driver, /*core-id=*/0, /*memory_region=*/1,
+        /*bytes=*/size, /*eventc=*/0, /*eventv=*/NULL);
+  fprintf(stdout, "------ Going to Allocate a TPU Buffer ------\n");
+  struct TpuBufferHandle* buf_sum_handle =
+      driver_fn.TpuDriver_Allocate(driver, /*core-id=*/0, /*memory_region=*/1,
+        /*bytes=*/size, /*eventc=*/0, /*eventv=*/NULL);
+
+  char a_src[size], b_src[size], sum_src[size];
+  for (int i = 0; i < size; ++i) {
+    a_src[i] = 1;
+    b_src[i] = 2;
+    sum_src[i] = 0;
+  }
+
+  TpuEvent* allocate_buf_a_events[] = {buf_a_handle->event};
+  fprintf(stdout, "------ Going to Transfer To Device ------\n");
+  struct TpuEvent* transfer_ev1 =
+      driver_fn.TpuDriver_TransferToDevice(driver, a_src, buf_a_handle,
+        /*eventc=*/1, /*eventv=*/allocate_buf_a_events);
+  TpuEvent* allocate_buf_b_events[] = {buf_a_handle->event};
+  fprintf(stdout, "------ Going to Transfer To Device ------\n");
+  struct TpuEvent* transfer_ev2 =
+      driver_fn.TpuDriver_TransferToDevice(driver, b_src, buf_b_handle,
+        /*eventc=*/1, /*eventv=*/allocate_buf_b_events);
+
+  fprintf(stdout, "------ Going to Execute a TPU program ------\n");
+  DeviceAssignment device_assignment = {1, 1};
+  TpuBufferHandle* input_buffer_handle[] = {buf_a_handle, buf_b_handle};
+  TpuBufferHandle* output_buffer_handle[] = {buf_sum_handle};
+  TpuEvent* transfer_events[] = {transfer_ev1, transfer_ev2};
+  struct TpuEvent* execute_event =
+      driver_fn.TpuDriver_ExecuteProgram(driver, lph,
+      /*inputc=*/2, /*input_buffer_handle=*/input_buffer_handle,
+      /*outputc=*/1, /*output_buffer_handle=*/output_buffer_handle,
+      device_assignment,
+      /*eventc=*/2, /*eventv*/transfer_events);
+
+  fprintf(stdout, "------ Going to Transfer From Device ------\n");
+  TpuEvent* execute_events[] = {execute_event};
+  struct TpuEvent* transfer_sum_event =
+      driver_fn.TpuDriver_TransferFromDevice(driver, buf_sum_handle, sum_src,
+        /*eventc=*/1, /*eventv=*/execute_events);
+
+  TpuStatus* status = driver_fn.TpuDriver_EventAwait(transfer_sum_event,
+                                                     10000000);
+  if (status->code != 0) {
+    fprintf(stdout, "Transfer Event Await: Code: %d, Message: %s\n",
+          status->code, status->msg);
+  }
+
+  fprintf(stdout, "------ Going to Unload a TPU program ------\n");
+  struct TpuEvent* unload_program_event = driver_fn.TpuDriver_UnloadProgram(
+      driver, lph, /*eventc=*/1, /*eventv=*/execute_events);
+
+  fprintf(stdout, "------ Going to Deallocate a TPU Buffer ------\n");
+  struct TpuEvent* dealloc_ev1 = driver_fn.TpuDriver_Deallocate(driver,
+      buf_a_handle, /*eventc=*/0, /*eventv=*/NULL);
+  driver_fn.TpuDriver_FreeEvent(dealloc_ev1);
+
+  fprintf(stdout, "------ Going to Deallocate a TPU Buffer ------\n");
+  struct TpuEvent* dealloc_ev2 = driver_fn.TpuDriver_Deallocate(driver,
+      buf_b_handle, /*eventc=*/0, /*eventv=*/NULL);
+  driver_fn.TpuDriver_FreeEvent(dealloc_ev2);
+
+  fprintf(stdout, "------ Going to Deallocate a TPU Buffer ------\n");
+  struct TpuEvent* dealloc_ev3 = driver_fn.TpuDriver_Deallocate(driver,
+      buf_sum_handle, /*eventc=*/0, /*eventv=*/NULL);
+  driver_fn.TpuDriver_FreeEvent(dealloc_ev3);
+
+  fprintf(stdout, "sum:\n");
+  for (size_t i = 0; i < size; ++i) {
+    fprintf(stdout, "%d ", sum_src[i]);
+  }
 
   dlclose(handle);
   exit(EXIT_SUCCESS);
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
index 2b69239bb7a..48f89b5cf2f 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
@@ -39,10 +39,9 @@ std::string TpuDevice::DebugString() const {
 }
 
 static std::shared_ptr<Device> MakeDevice(const std::string& platform_name,
-                                          int id, int local_device_ordinal) {
+                                          int id) {
   CHECK_EQ(platform_name, "tpu");
-  CHECK_EQ(id, local_device_ordinal);  // Every device must be local for now.
-  return std::make_shared<TpuDevice>(id, local_device_ordinal, "tpu");
+  return std::make_shared<TpuDevice>(id, /*local_device_state=*/nullptr, "tpu");
 }
 
 StatusOr<std::shared_ptr<PyTpuClient>> PyTpuClient::Get(
@@ -67,7 +66,7 @@ StatusOr<std::shared_ptr<PyTpuClient>> PyTpuClient::Get(
   LOG(INFO) << "Creating " << num_cores << " TPU device(s).";
   devices.reserve(num_cores);
   for (int i = 0; i < num_cores; ++i) {
-    devices.push_back(MakeDevice("tpu", i, i));
+    devices.push_back(MakeDevice("tpu", i));
   }
 
   return std::make_shared<PyTpuClient>("tpu", std::move(client),
@@ -87,8 +86,8 @@ PyTpuClient::PyTpuClient(std::string platform_name,
     CHECK(id_to_device_.insert({device->id(), device}).second)
         << "Duplicate device id: " << device->id();
 
-    if (device->local_device_ordinal() != -1) {
-      int idx = device->local_device_ordinal();
+    if (device->id() != -1) {
+      int idx = device->id();
       CHECK(local_devices_[idx] == nullptr) << idx;
       CHECK_LT(idx, local_devices_.size());
       local_devices_[idx] = device;
@@ -509,7 +508,7 @@ PyTpuExecutable::ExecuteResult PyTpuExecutable::ExecuteHelper(
   const int device_id = device_assignment_(replica, 0);
   std::shared_ptr<Device> device = LookupDevice(*client_, device_id);
   CHECK_EQ(device->host_id(), client_->host_id());
-  int device_ordinal = device->local_device_ordinal();
+  int device_ordinal = device->id();
   tensorflow::profiler::TraceMe traceme("PyTpuExecutable::Execute");
   VLOG(3) << "Replica " << replica
           << " mapped to device ordinal for execution: " << device_ordinal;
@@ -742,7 +741,7 @@ PyTpuExecutable::ExecutePerReplica(
     const int device_id = (*device_assignment)(replica, 0);
     std::shared_ptr<Device> device = LookupDevice(*client, device_id);
     CHECK_EQ(device->host_id(), client->host_id());
-    int device_ordinal = device->local_device_ordinal();
+    int device_ordinal = device->id();
     loaded_programs[replica] = client->driver()->LoadProgram(
         device_ordinal, compiled_program.get(), {});
   }
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h
index 7624a14943f..49d4182b719 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h
@@ -24,7 +24,6 @@ limitations under the License.
 #include "absl/synchronization/notification.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/executable_build_options.h"
-#include "tensorflow/compiler/xla/python/device_state.h"
 #include "tensorflow/compiler/xla/python/local_client.h"
 #include "tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h"
 #include "tensorflow/compiler/xla/python/tpu_driver/tpu_driver.pb.h"
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc
index 60886416a62..2b7082d40c9 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc
@@ -96,9 +96,9 @@ PYBIND11_MODULE(tpu_client_extension, m) {
                           std::make_move_iterator(tree.leaves.end()));
 
             py::gil_scoped_release gil_release;
-            return PyTpuBuffer::FromLiterals(
-                std::move(leaves), tree.shape, std::move(py_buffer_ref),
-                std::move(client), device->local_device_ordinal());
+            return PyTpuBuffer::FromLiterals(std::move(leaves), tree.shape,
+                                             std::move(py_buffer_ref),
+                                             std::move(client), device->id());
           })
       .def_static(
           "from_python",
@@ -135,8 +135,8 @@ PYBIND11_MODULE(tpu_client_extension, m) {
                           "Cannot make tuple on device '%s' with '%s' backend",
                           device->DebugString(), client->platform_name());
                     }
-                    return PyTpuBuffer::MakeTuple(
-                        buffers, client, device->local_device_ordinal());
+                    return PyTpuBuffer::MakeTuple(buffers, client,
+                                                  device->id());
                   })
       .def_static("make_tuple", &PyTpuBuffer::MakeTuple)
       .def("copy_to_device",
@@ -144,7 +144,7 @@ PYBIND11_MODULE(tpu_client_extension, m) {
              CHECK(dst_device != nullptr);
              GlobalPyRefManager()->CollectGarbage();
              py::gil_scoped_release gil_release;
-             return buffer->CopyToDevice(dst_device->local_device_ordinal());
+             return buffer->CopyToDevice(dst_device->id());
            })
       .def("copy_to_device",
            [](PyTpuBuffer* buffer, int dst_device_ordinal) {
@@ -193,7 +193,7 @@ PYBIND11_MODULE(tpu_client_extension, m) {
            [](const PyTpuExecutable& executable) {
              std::vector<int> device_ordinals;
              for (std::shared_ptr<Device> device : executable.local_devices()) {
-               device_ordinals.push_back(device->local_device_ordinal());
+               device_ordinals.push_back(device->id());
              }
              return device_ordinals;
            })
diff --git a/tensorflow/compiler/xla/python/tpu_driver/external_tpu_driver.cc b/tensorflow/compiler/xla/python/tpu_driver/external_tpu_driver.cc
new file mode 100644
index 00000000000..8a8e868b2b8
--- /dev/null
+++ b/tensorflow/compiler/xla/python/tpu_driver/external_tpu_driver.cc
@@ -0,0 +1,387 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// ==============================================================================
+
+#include <dlfcn.h>
+
+#include "absl/strings/str_format.h"
+#include "absl/time/time.h"
+#include "tensorflow/compiler/xla/python/tpu_driver/client/c_api.h"
+#include "tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h"
+#include "tensorflow/compiler/xla/python/tpu_driver/tpu_driver.pb.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace tpu_driver {
+namespace {
+
+class ExternalTpuDriver;
+
+class ExternalEvent : public Event {
+ public:
+  explicit ExternalEvent(::TpuDriverFn* driver_fn, ::TpuEvent* event)
+      : driver_fn_(driver_fn), event_(event) {}
+
+  ~ExternalEvent() override { driver_fn_->TpuDriver_FreeEvent(event_); }
+
+  xla::Status Await() override {
+    auto tpu_status = driver_fn_->TpuDriver_EventAwait(event_, -1);
+    auto ret = xla::Status(tensorflow::error::Code(tpu_status->code),
+                           absl::StrFormat("%s", tpu_status->msg));
+    driver_fn_->TpuDriver_FreeStatus(tpu_status);
+    return ret;
+  }
+
+  absl::optional<xla::Status> AwaitWithTimeout(
+      absl::Duration duration) override {
+    auto tpu_status_or = driver_fn_->TpuDriver_EventAwait(
+        event_, absl::ToInt64Microseconds(duration));
+    if (tpu_status_or == nullptr) {
+      return absl::nullopt;
+    } else {
+      auto ret = xla::Status(tensorflow::error::Code(tpu_status_or->code),
+                             absl::StrFormat("%s", tpu_status_or->msg));
+      driver_fn_->TpuDriver_FreeStatus(tpu_status_or);
+      return ret;
+    }
+  }
+
+  void AddCallback(std::function<void(xla::Status)> callback) override {
+    // We have to create a new copy of the fn on the heap to make it persist.
+    std::function<void(xla::Status)>* callback_addr =
+        new std::function<void(xla::Status)>(callback);
+
+    // Using the callback_addr instead of capturing because C++11 lambdas with
+    // variable captures cannot be converted to C function pointers.
+    driver_fn_->TpuDriver_EventAddCallback(
+        event_,
+        [](struct TpuStatus* status, void* additional_info) {
+          auto callback_addr =
+              static_cast<std::function<void(xla::Status)>*>(additional_info);
+          auto xla_status = xla::Status(tensorflow::error::Code(status->code),
+                                        absl::StrFormat("%s", status->msg));
+          (*callback_addr)(xla_status);
+          delete callback_addr;
+        },
+        callback_addr);
+  }
+
+ private:
+  ::TpuDriverFn* driver_fn_;
+  ::TpuEvent* event_;
+
+  friend ExternalTpuDriver;
+};
+
+class ExternalBufferHandle : public BufferHandle {
+ public:
+  explicit ExternalBufferHandle(::TpuDriverFn* driver_fn,
+                                ::TpuBufferHandle* handle)
+      : handle_(handle), event_(new ExternalEvent(driver_fn, handle->event)) {}
+
+  std::shared_ptr<Event> OnReady() override { return event_; }
+
+  int64_t size_in_bytes() override { return handle_->size_in_bytes; }
+
+  absl::optional<xla::ShapeProto> shape() override {
+    LOG(FATAL) << "Unimplemented.";
+    return absl::nullopt;
+  }
+
+ private:
+  ::TpuBufferHandle* handle_;
+  std::shared_ptr<ExternalEvent> event_;
+
+  friend ExternalTpuDriver;
+};
+
+class ExternalCompiledProgramHandle : public CompiledProgramHandle {
+ public:
+  explicit ExternalCompiledProgramHandle(::TpuDriverFn* driver_fn,
+                                         ::TpuCompiledProgramHandle* handle)
+      : handle_(handle),
+        driver_fn_(driver_fn),
+        event_(new ExternalEvent(driver_fn, handle->event)) {}
+
+  std::shared_ptr<Event> OnReady() override { return event_; }
+
+  int64_t size_in_bytes() override {
+    LOG(FATAL) << "Unimplemented.";
+    return 0;
+  }
+
+  xla::Status program_shape(xla::ProgramShapeProto* program_shape) override {
+    struct CompiledProgramShape* shape =
+        driver_fn_->TpuDriver_GetCompiledProgramShape(handle_);
+    program_shape->ParseFromArray(shape->bytes, shape->size);
+
+    auto status = xla::Status(tensorflow::error::Code(shape->status->code),
+                              absl::StrFormat("%s", shape->status->msg));
+    driver_fn_->TpuDriver_FreeCompiledProgramShape(shape);
+
+    return status;
+  }
+
+ private:
+  ::TpuCompiledProgramHandle* handle_;
+  ::TpuDriverFn* driver_fn_;
+  std::shared_ptr<ExternalEvent> event_;
+
+  friend ExternalTpuDriver;
+};
+
+class ExternalLoadedProgramHandle : public LoadedProgramHandle {
+ public:
+  explicit ExternalLoadedProgramHandle(::TpuDriverFn* driver_fn,
+                                       ::TpuLoadedProgramHandle* handle)
+      : handle_(handle), event_(new ExternalEvent(driver_fn, handle->event)) {}
+  std::shared_ptr<Event> OnReady() override { return event_; }
+
+  int64_t size_in_bytes() override {
+    LOG(FATAL) << "Unimplemented.";
+    return 0;
+  }
+
+ private:
+  ::TpuLoadedProgramHandle* handle_;
+  std::shared_ptr<ExternalEvent> event_;
+
+  friend ExternalTpuDriver;
+};
+
+class ExternalTpuDriver : public TpuDriver {
+ public:
+  explicit ExternalTpuDriver(const std::string& so_path) {
+    void* handle;
+    handle = dlopen(so_path.c_str(), RTLD_NOW);
+    if (!handle) {
+      LOG(FATAL) << "Unable to load shared library: " << dlerror();
+    }
+
+    PrototypeTpuDriver_Initialize* initialize_fn;
+    *reinterpret_cast<void**>(&initialize_fn) =
+        dlsym(handle, "TpuDriver_Initialize");
+    initialize_fn(&driver_fn_);
+
+    driver_ = driver_fn_.TpuDriver_Open("local://");
+  }
+
+  ~ExternalTpuDriver() override {}
+
+  void QuerySystemInfo(SystemInfo* system_info) override {
+    LOG(FATAL) << "Unimplemented.";
+  }
+
+  xla::Status Reset() override { LOG(FATAL) << "Unimplemented."; }
+
+  std::unique_ptr<BufferHandle> Allocate(
+      int32_t core_id, MemoryRegion region, int64_t num_bytes,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+    auto bh = absl::make_unique<ExternalBufferHandle>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_Allocate(driver_, core_id, region, num_bytes,
+                                      wait_for.size(), tpu_events));
+    delete tpu_events;
+    return bh;
+  }
+
+  std::unique_ptr<BufferHandle> Allocate(
+      int32_t core_id, MemoryRegion region, const xla::ShapeProto& shape,
+      absl::Span<Event* const> wait_for) override {
+    LOG(FATAL) << "Unimplemented.";
+    return nullptr;
+  }
+
+  std::unique_ptr<BufferHandle> AllocateTuple(
+      int32_t core_id, MemoryRegion region,
+      absl::Span<BufferHandle* const> children,
+      absl::Span<Event* const> wait_for) override {
+    LOG(FATAL) << "Unimplemented.";
+    return nullptr;
+  }
+
+  std::shared_ptr<Event> Deallocate(
+      std::unique_ptr<BufferHandle> handle,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+    auto event = std::make_shared<ExternalEvent>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_Deallocate(
+            driver_, static_cast<ExternalBufferHandle*>(handle.get())->handle_,
+            wait_for.size(), tpu_events));
+    delete tpu_events;
+    return event;
+  }
+
+  std::shared_ptr<Event> TransferToDevice(
+      const void* src, BufferHandle* dst,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+    auto event = std::make_shared<ExternalEvent>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_TransferToDevice(
+            driver_, src, static_cast<ExternalBufferHandle*>(dst)->handle_,
+            wait_for.size(), tpu_events));
+    delete tpu_events;
+    return event;
+  }
+
+  std::shared_ptr<Event> TransferFromDevice(
+      const BufferHandle* src, void* dst,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+    auto event = std::make_shared<ExternalEvent>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_TransferFromDevice(
+            driver_, static_cast<const ExternalBufferHandle*>(src)->handle_,
+            dst, wait_for.size(), tpu_events));
+    delete tpu_events;
+    return event;
+  }
+
+  std::shared_ptr<Event> TransferFromDeviceToDevice(
+      const BufferHandle* src, BufferHandle* dst,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+    auto event = std::make_shared<ExternalEvent>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_TransferFromDeviceToDevice(
+            driver_, static_cast<const ExternalBufferHandle*>(src)->handle_,
+            static_cast<ExternalBufferHandle*>(dst)->handle_, wait_for.size(),
+            tpu_events));
+    delete tpu_events;
+    return event;
+  }
+
+  std::unique_ptr<CompiledProgramHandle> CompileProgram(
+      const xla::HloProto& source, int32_t num_replicas,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+
+    struct HloProto hlo;
+    hlo.size = source.ByteSizeLong();
+    hlo.bytes = malloc(hlo.size);
+    if (!source.SerializeToArray(hlo.bytes, hlo.size)) {
+      LOG(ERROR) << "Unable to serialize HLO to array.";
+      return nullptr;
+    }
+
+    auto handle = absl::make_unique<ExternalCompiledProgramHandle>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_CompileProgram(driver_, hlo, num_replicas,
+                                            wait_for.size(), tpu_events));
+
+    free(hlo.bytes);
+    delete tpu_events;
+    return handle;
+  }
+  std::unique_ptr<LoadedProgramHandle> LoadProgram(
+      int32_t core_id, const CompiledProgramHandle* handle,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+
+    auto loaded_handle = absl::make_unique<ExternalLoadedProgramHandle>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_LoadProgram(
+            driver_, core_id,
+            static_cast<const ExternalCompiledProgramHandle*>(handle)->handle_,
+            wait_for.size(), tpu_events));
+
+    delete tpu_events;
+    return loaded_handle;
+  }
+
+  std::shared_ptr<Event> UnloadProgram(
+      std::unique_ptr<LoadedProgramHandle> handle,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+    auto event = std::make_shared<ExternalEvent>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_UnloadProgram(
+            driver_,
+            static_cast<ExternalLoadedProgramHandle*>(handle.get())->handle_,
+            wait_for.size(), tpu_events));
+    delete tpu_events;
+    return event;
+  }
+
+  std::shared_ptr<Event> ExecuteProgram(
+      LoadedProgramHandle* program, absl::Span<BufferHandle* const> inputs,
+      absl::Span<BufferHandle* const> outputs,
+      const xla::DeviceAssignmentProto& device_assignment,
+      absl::Span<Event* const> wait_for) override {
+    auto tpu_events = MakeEventArray(wait_for);
+
+    struct DeviceAssignmentProto da_proto;
+    da_proto.size = device_assignment.ByteSizeLong();
+    da_proto.bytes = malloc(da_proto.size);
+    if (!device_assignment.SerializeToArray(da_proto.bytes, da_proto.size)) {
+      LOG(ERROR) << "Unable to serialize device assignment to array.";
+      return nullptr;
+    }
+
+    std::vector<::TpuBufferHandle*> inputv;
+    inputv.reserve(inputs.size());
+    for (int i = 0; i < inputs.size(); i++) {
+      inputv.push_back(
+          static_cast<ExternalBufferHandle* const>(inputs[i])->handle_);
+    }
+    std::vector<::TpuBufferHandle*> outputv;
+    outputv.reserve(outputs.size());
+    for (int i = 0; i < outputs.size(); i++) {
+      outputv.push_back(
+          static_cast<ExternalBufferHandle* const>(outputs[i])->handle_);
+    }
+
+    auto event = std::make_shared<ExternalEvent>(
+        &driver_fn_,
+        driver_fn_.TpuDriver_ExecuteProgram(
+            driver_,
+            static_cast<ExternalLoadedProgramHandle*>(program)->handle_,
+            inputs.size(), inputv.data(), outputs.size(), outputv.data(),
+            da_proto, wait_for.size(), tpu_events));
+
+    free(da_proto.bytes);
+    return event;
+  }
+
+  std::unique_ptr<TpuLinearizer> GetLinearizer() override { return nullptr; }
+
+ private:
+  ::TpuDriverFn driver_fn_;
+  ::TpuDriver* driver_;
+
+  ::TpuEvent** MakeEventArray(absl::Span<Event* const> wait_for) {
+    if (wait_for.empty()) return nullptr;
+    ::TpuEvent** ret = new ::TpuEvent*[wait_for.size()];
+    for (int i = 0; i < wait_for.size(); i++) {
+      ret[i] = static_cast<ExternalEvent* const>(wait_for[i])->event_;
+    }
+    return ret;
+  }
+};
+
+xla::StatusOr<std::unique_ptr<TpuDriver>> RegisterExternalTpuDriver(
+    const TpuDriverConfig& config) {
+  std::string shared_lib = config.worker().substr(strlen("external://"));
+  return xla::StatusOr<std::unique_ptr<TpuDriver>>(
+      absl::make_unique<ExternalTpuDriver>(shared_lib));
+}
+
+REGISTER_TPU_DRIVER("external://", RegisterExternalTpuDriver);
+
+}  // namespace
+}  // namespace tpu_driver
diff --git a/tensorflow/compiler/xla/python/tpu_driver/platform/external/tools.bzl b/tensorflow/compiler/xla/python/tpu_driver/platform/external/tools.bzl
index d2823aeb995..99b07b6c787 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/platform/external/tools.bzl
+++ b/tensorflow/compiler/xla/python/tpu_driver/platform/external/tools.bzl
@@ -33,5 +33,4 @@ def external_deps():
         "@com_google_absl//absl/synchronization",
         "@com_google_absl//absl/time",
         "@com_google_absl//absl/types:span",
-        "//tensorflow:grpc++",
     ]
diff --git a/tensorflow/compiler/xla/python/xla.cc b/tensorflow/compiler/xla/python/xla.cc
index f1776763796..b5eb6fa47da 100644
--- a/tensorflow/compiler/xla/python/xla.cc
+++ b/tensorflow/compiler/xla/python/xla.cc
@@ -142,6 +142,16 @@ Status PyRegisterCustomCallTarget(const std::string& fn_name,
   return Status::OK();
 }
 
+StatusOr<std::shared_ptr<Device>> LookupDeviceOrdinal(
+    PyLocalClient* client, int device_ordinal, absl::string_view caller_name) {
+  if (device_ordinal < 0 || device_ordinal >= client->local_device_count()) {
+    return InvalidArgument(
+        "%s got bad device_ordinal: %d (num_local_devices=%d)", caller_name,
+        device_ordinal, client->local_device_count());
+  }
+  return client->local_devices()[device_ordinal];
+}
+
 }  // namespace
 
 PYBIND11_MODULE(xla_extension, m) {
@@ -381,13 +391,27 @@ PYBIND11_MODULE(xla_extension, m) {
              }
              return result;
            })
+      // TODO(phawkins): delete overload that accepts a device_ordinal after
+      // all callers have been updated to pass a Device.
       .def("TransferToInfeed",
            [](PyLocalClient* client, const LiteralSlice& literal,
               int device_ordinal) {
              GlobalPyRefManager()->CollectGarbage();
              py::gil_scoped_release gil_release;
-             return client->TransferToInfeed(literal, device_ordinal);
+             TF_ASSIGN_OR_RETURN(std::shared_ptr<Device> device,
+                                 LookupDeviceOrdinal(client, device_ordinal,
+                                                     "TransferToInfeed"));
+             return client->TransferToInfeed(literal, device);
            })
+      .def("TransferToInfeed",
+           [](PyLocalClient* client, const LiteralSlice& literal,
+              std::shared_ptr<Device> device) {
+             GlobalPyRefManager()->CollectGarbage();
+             py::gil_scoped_release gil_release;
+             return client->TransferToInfeed(literal, device);
+           })
+      // TODO(phawkins): delete overload that accepts a device_ordinal after
+      // all callers have been updated to pass a Device.
       .def("TransferFromOutfeed",
            [](PyLocalClient* client, const Shape& shape,
               int device_ordinal) -> StatusOr<py::object> {
@@ -395,8 +419,24 @@ PYBIND11_MODULE(xla_extension, m) {
              std::shared_ptr<Literal> literal_shared;
              {
                py::gil_scoped_release gil_release;
-               TF_ASSIGN_OR_RETURN(Literal literal, client->TransferFromOutfeed(
-                                                        shape, device_ordinal));
+               TF_ASSIGN_OR_RETURN(std::shared_ptr<Device> device,
+                                   LookupDeviceOrdinal(client, device_ordinal,
+                                                       "TransferFromOutfeed"));
+               TF_ASSIGN_OR_RETURN(Literal literal,
+                                   client->TransferFromOutfeed(shape, device));
+               literal_shared = std::make_shared<Literal>(std::move(literal));
+             }
+             return LiteralToPython(std::move(literal_shared));
+           })
+      .def("TransferFromOutfeed",
+           [](PyLocalClient* client, const Shape& shape,
+              std::shared_ptr<Device> device) -> StatusOr<py::object> {
+             GlobalPyRefManager()->CollectGarbage();
+             std::shared_ptr<Literal> literal_shared;
+             {
+               py::gil_scoped_release gil_release;
+               TF_ASSIGN_OR_RETURN(Literal literal,
+                                   client->TransferFromOutfeed(shape, device));
                literal_shared = std::make_shared<Literal>(std::move(literal));
              }
              return LiteralToPython(std::move(literal_shared));
@@ -440,7 +480,7 @@ PYBIND11_MODULE(xla_extension, m) {
             py::gil_scoped_release gil_release;
             return PyLocalBuffer::FromLiterals(
                 std::move(leaves), tree.shape, std::move(py_buffer_ref),
-                std::move(client), device->local_device_ordinal());
+                std::move(client), std::move(device));
           })
       .def_static("make_tuple",
                   [](const std::vector<PyLocalBuffer*> buffers,
@@ -454,15 +494,15 @@ PYBIND11_MODULE(xla_extension, m) {
                           "Cannot make tuple on device '%s' with '%s' backend",
                           device->DebugString(), client->platform_name());
                     }
-                    return PyLocalBuffer::MakeTuple(
-                        buffers, client, device->local_device_ordinal());
+                    return PyLocalBuffer::MakeTuple(buffers, std::move(client),
+                                                    std::move(device));
                   })
       .def("copy_to_device",
            [](PyLocalBuffer* buffer, std::shared_ptr<Device> dst_device) {
              CHECK(dst_device != nullptr);
              GlobalPyRefManager()->CollectGarbage();
              py::gil_scoped_release gil_release;
-             return buffer->CopyToDevice(dst_device->local_device_ordinal());
+             return buffer->CopyToDevice(std::move(dst_device));
            })
       .def("delete", &PyLocalBuffer::Delete)
       .def("destructure", &PyLocalBuffer::DestructureTuple)
@@ -485,10 +525,7 @@ PYBIND11_MODULE(xla_extension, m) {
              return LiteralToPython(std::move(literal));
            })
       .def("shape", &PyLocalBuffer::on_host_shape)
-      .def("device",
-           [](PyLocalBuffer* buffer) -> std::shared_ptr<Device> {
-             return buffer->client()->local_devices()[buffer->device_ordinal()];
-           })
+      .def("device", &PyLocalBuffer::device)
       .def("platform", &PyLocalBuffer::platform_name)
       .def("is_deleted",
            [](const PyLocalBuffer& buffer) {
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index ec5ca9a4a75..fb56e436aaa 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -444,7 +444,7 @@ def shape_from_pyval(pyval):
   return convert(pyval)
 
 
-def transfer_to_infeed(value, device_ordinal=0):
+def transfer_to_infeed(value, device=None):
   """Transfers the given value into the XLA infeed queue.
 
   XLA's infeed queue is a single queue that feeds the "XLA virtual machine" with
@@ -454,29 +454,31 @@ def transfer_to_infeed(value, device_ordinal=0):
   Args:
     value: the value that the caller would like to enqueue into the XLA infeed
       queue
-    device_ordinal: the device to infeed the value to. Each device has a
+    device: the device to infeed the value to. Each device has a
       distinct infeed queue.
   """
   # TODO(phawkins): support non-default backends.
   backend = get_local_backend()
-  backend.client.TransferToInfeed(value, device_ordinal)
+  device = device or backend.local_devices()[0]
+  backend.client.TransferToInfeed(value, device)
 
 
-def transfer_from_outfeed(shape, device_ordinal=0):
-  """Transfers a literal of the given shape from `device_ordinal`'s outfeed.
+def transfer_from_outfeed(shape, device=None):
+  """Transfers a literal of the given shape from `device`'s outfeed.
 
   Args:
     shape: The shape of the value to transfer from outfeed.
-    device_ordinal: The device ordinal to transfer the outfeed value from. Each
-      device has a distinct outfeed queue..
+    device: The device from which to transfer the outfeed value. Each device has
+      a distinct outfeed queue..
 
   Returns:
     The literal value that is produced from the outfeed queue.
   """
   # TODO(phawkins): support non-default backends.
   backend = get_local_backend()
+  device = device or backend.local_devices()[0]
   return backend.client.TransferFromOutfeed(
-      shape.with_major_to_minor_layout_if_absent(), device_ordinal)
+      shape.with_major_to_minor_layout_if_absent(), device)
 
 
 DeviceAssignment = _xla.DeviceAssignment
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index b4ea4d9e263..9b24a583cd5 100755
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -608,7 +608,6 @@ cc_library(
         ":hlo",
         ":hlo_parser",
         "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
     ],
@@ -1079,7 +1078,7 @@ cc_library(
     deps = [
         ":compiler",
         "//tensorflow/core:lib_internal",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -1844,6 +1843,7 @@ tf_cc_test(
         ":hlo_creation_utils",
         ":hlo_parser",
         ":hlo_pass",
+        ":hlo_pass_pipeline",
         ":pattern_matcher",
         ":pattern_matcher_gmock",
         ":shape_inference",
@@ -1982,6 +1982,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
     ],
@@ -2018,6 +2019,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
     ],
 )
 
@@ -2053,6 +2055,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
     ],
 )
 
@@ -2118,6 +2121,7 @@ tf_cc_test(
         ":while_loop_simplifier",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "@com_google_absl//absl/strings",
@@ -2179,6 +2183,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
     ],
 )
 
@@ -2207,6 +2212,7 @@ tf_cc_test(
         ":hlo_parser",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
     ],
 )
 
@@ -2236,6 +2242,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -2282,13 +2289,17 @@ cc_library(
     deps = [
         ":dynamic_dimension_inference",
         ":hlo",
+        ":hlo_casting_utils",
         ":hlo_dce",
         ":hlo_pass",
+        "//tensorflow/compiler/xla:comparison_util",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
@@ -2319,6 +2330,7 @@ xla_test(
         "//tensorflow/compiler/xla/tests:client_library_test_base",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -2339,6 +2351,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -2951,6 +2964,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:test_helpers",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -3309,6 +3323,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:literal_test_util",
         "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/memory",
     ],
@@ -3450,6 +3465,7 @@ tf_cc_test(
         ":hlo_element_type_converter",
         ":hlo_matchers",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
     ],
 )
 
@@ -3528,8 +3544,8 @@ cc_library(
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
-        "@llvm//:core",
-        "@llvm//:transform_utils",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:transform_utils",
     ],
 )
 
@@ -3837,6 +3853,7 @@ tf_cc_test(
         ":sort_simplifier",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -3868,6 +3885,7 @@ tf_cc_test(
         ":stable_sort_expander",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -3959,6 +3977,7 @@ tf_cc_test(
         ":while_loop_invariant_code_motion",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -3986,6 +4005,7 @@ tf_cc_test(
         ":while_loop_constant_sinking",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -4047,6 +4067,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
         "@com_google_absl//absl/strings",
     ],
@@ -4095,9 +4116,9 @@ tf_cc_test(
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/compiler/xla/tests:verified_hlo_module",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
-        "//tensorflow/core:test_main",  # fixdeps: keep
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
old mode 100755
new mode 100644
index f145b447bef..0225d2d3bd6
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -80,6 +80,68 @@ bool IsAll(const HloInstruction* op, int8 value) {
   }
 }
 
+bool IsAnyOperandComplex(const HloInstruction* hlo) {
+  for (auto operand : hlo->operands()) {
+    if (ShapeUtil::ElementIsComplex(operand->shape())) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool IsPositive(const HloInstruction* hlo,
+                const AlgebraicSimplifierOptions& options) {
+  // Utility only handles real types.
+  if (IsAnyOperandComplex(hlo)) {
+    return false;
+  }
+  switch (hlo->opcode()) {
+    case HloOpcode::kGetTupleElement: {
+      const HloInstruction* gte_operand = hlo->operand(0);
+      switch (gte_operand->opcode()) {
+        case HloOpcode::kCustomCall: {
+          const auto& target = gte_operand->custom_call_target();
+          return target ==
+                     options.get_cudnn_batchnorm_forward_training_metadata() &&
+                 hlo->tuple_index() == 2;
+        }
+        default:
+          return false;
+      }
+    }
+    case HloOpcode::kPower:
+    case HloOpcode::kAbs:
+    case HloOpcode::kRsqrt:
+    case HloOpcode::kSqrt:
+      return IsPositive(hlo->operand(0), options);
+
+    case HloOpcode::kMultiply: {
+      return hlo->operand(0) == hlo->operand(1) &&
+             IsPositive(hlo->operand(0), options);
+    }
+    default:
+      return false;
+  }
+}
+
+bool IsNonNegative(const HloInstruction* hlo,
+                   const AlgebraicSimplifierOptions& options) {
+  // Utility only handles real types.
+  if (IsAnyOperandComplex(hlo)) {
+    return false;
+  }
+  switch (hlo->opcode()) {
+    case HloOpcode::kMultiply: {
+      return hlo->operand(0) == hlo->operand(1);
+    }
+    case HloOpcode::kAbs: {
+      return true;
+    }
+    default:
+      return IsPositive(hlo, options);
+  }
+}
+
 // Checks whether `op` is a floating-point constant or broadcast of a constant
 // of the form +/- 2^k for some integer k positive, negative, or zero.  Such
 // values are interesting because multiplying by a power of 2 just moves the
@@ -212,6 +274,8 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor {
                                       AlgebraicSimplifier* simplifier)
       : options_(options), simplifier_(simplifier) {}
 
+  Status HandleAbs(HloInstruction* abs) override;
+
   Status HandleAdd(HloInstruction* add) override;
 
   Status HandleAnd(HloInstruction* logical_and) override;
@@ -279,8 +343,15 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor {
   Status HandleReduceWindow(HloInstruction* reduce_window) override;
 
   Status HandleReverse(HloInstruction* reverse) override;
+
+  Status HandleRsqrt(HloInstruction* rsqrt) override;
+
   Status HandleSlice(HloInstruction* slice) override;
+
+  Status HandleSqrt(HloInstruction* sqrt) override;
+
   Status HandleDynamicSlice(HloInstruction* dynamic_slice) override;
+
   Status HandleDynamicUpdateSlice(
       HloInstruction* dynamic_update_slice) override;
   Status HandleScatter(HloInstruction* scatter) override;
@@ -501,6 +572,16 @@ bool AlgebraicSimplifierVisitor::ReplaceInstructionIfSameShape(
   return true;
 }
 
+Status AlgebraicSimplifierVisitor::HandleAbs(HloInstruction* abs) {
+  HloInstruction* abs_operand = abs->mutable_operand(0);
+  VLOG(10) << "trying transform [Abs(A) => A] " << abs->ToString()
+           << " Abs operand is: " << abs_operand->ToString();
+  if (IsNonNegative(abs->operand(0), options_)) {
+    return ReplaceInstruction(abs, abs_operand);
+  }
+  return Status::OK();
+}
+
 Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) {
   HloInstruction *lhs, *rhs;
   CHECK(Match(add, m::Add(m::Op(&lhs), m::Op(&rhs))));
@@ -2127,24 +2208,24 @@ Status AlgebraicSimplifierVisitor::HandleClamp(HloInstruction* clamp) {
 Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) {
   HloInstruction *lhs, *rhs;
   CHECK(Match(multiply, m::Multiply(m::Op(&lhs), m::Op(&rhs))));
-  // A*1 => A
-  VLOG(10) << "trying transform [A*1 => A]: " << multiply->ToString();
+  // LHS*1 => LHS
+  VLOG(10) << "trying transform [LHS*1 => LHS]: " << multiply->ToString();
   if (IsAll(rhs, 1) && ReplaceInstructionIfSameShape(multiply, lhs)) {
     return Status::OK();
   }
-  // 1*A => A
-  VLOG(10) << "trying transform [1*A => A]: " << multiply->ToString();
+  // 1*RHS => RHS
+  VLOG(10) << "trying transform [1*RHS => RHS]: " << multiply->ToString();
   if (IsAll(lhs, 1) && ReplaceInstructionIfSameShape(multiply, rhs)) {
     return Status::OK();
   }
 
-  // 0*A => 0. Only applies for integral types for correct NaN-handling.
+  // 0*RHS => 0. Only applies for integral types for correct NaN-handling.
   if (IsAll(lhs, 0) &&
       primitive_util::IsIntegralType(multiply->shape().element_type()) &&
       ReplaceInstructionIfSameShape(multiply, lhs)) {
     return Status::OK();
   }
-  // A*0 => 0
+  // LHS*0 => 0
   if (IsAll(rhs, 0) &&
       primitive_util::IsIntegralType(multiply->shape().element_type()) &&
       ReplaceInstructionIfSameShape(multiply, rhs)) {
@@ -2174,7 +2255,8 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) {
                                      product_of_constants));
   }
 
-  // exp(A) * exp(B) => exp(A+B)
+  VLOG(10) << "trying to transform exp(LHS) * exp(RHS) => exp(LHS+RHS) "
+           << multiply->ToString();
   if (Match(multiply, m::Multiply(m::Exp(m::Op(&lhs)), m::Exp(m::Op(&rhs))))) {
     auto add = computation_->AddInstruction(HloInstruction::CreateBinary(
         multiply->shape(), HloOpcode::kAdd, lhs, rhs));
@@ -2182,6 +2264,18 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) {
         multiply,
         HloInstruction::CreateUnary(multiply->shape(), HloOpcode::kExp, add));
   }
+
+  VLOG(10) << "trying transform [rsqrt(B) * rsqrt(B) => 1/B] "
+           << multiply->ToString();
+  HloInstruction* b;
+  if (Match(multiply, m::Multiply(m::Rsqrt(m::Op(&b)), m::Rsqrt(m::Op(&b)))) &&
+      IsPositive(b, options_)) {
+    return ReplaceWithNewInstruction(
+        multiply,
+        HloInstruction::CreateBinary(multiply->shape(), HloOpcode::kDivide,
+                                     MakeScalarLike(b, 1), b));
+  }
+
   return Status::OK();
 }
 
@@ -3329,6 +3423,31 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) {
   return Status::OK();
 }
 
+Status AlgebraicSimplifierVisitor::HandleRsqrt(HloInstruction* rsqrt) {
+  VLOG(10) << "trying transform [rsqrt(Pow(A, -2)) => |A|] "
+           << rsqrt->ToString();
+  HloInstruction* rsqrt_operand = rsqrt->mutable_operand(0);
+  if (rsqrt_operand->opcode() == HloOpcode::kPower &&
+      IsAll(rsqrt_operand->operand(1), -2) &&
+      IsPositive(rsqrt_operand, options_)) {
+    return ReplaceWithNewInstruction(
+        rsqrt, HloInstruction::CreateUnary(rsqrt->shape(), HloOpcode::kAbs,
+                                           rsqrt_operand->mutable_operand(0)));
+  }
+
+  VLOG(10) << "trying transform [rsqrt(Divide(1, A)) => sqrt(A)] "
+           << rsqrt->ToString();
+  if (rsqrt_operand->opcode() == HloOpcode::kDivide &&
+      IsAll(rsqrt_operand->operand(0), 1) &&
+      IsPositive(rsqrt_operand->operand(1), options_)) {
+    return ReplaceWithNewInstruction(
+        rsqrt, HloInstruction::CreateUnary(rsqrt->shape(), HloOpcode::kSqrt,
+                                           rsqrt_operand->mutable_operand(1)));
+  }
+
+  return Status::OK();
+}
+
 Status AlgebraicSimplifierVisitor::HandleDynamicSlice(
     HloInstruction* dynamic_slice) {
   auto operand = dynamic_slice->mutable_operand(0);
@@ -3813,6 +3932,19 @@ Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) {
   return Status::OK();
 }
 
+Status AlgebraicSimplifierVisitor::HandleSqrt(HloInstruction* sqrt) {
+  VLOG(10) << "trying transform [sqrt(A*A) => |A|] " << sqrt->ToString();
+  HloInstruction* sqrt_operand = sqrt->mutable_operand(0);
+  if (sqrt_operand->opcode() == HloOpcode::kMultiply &&
+      sqrt_operand->operand(0) == sqrt_operand->operand(1)) {
+    return ReplaceWithNewInstruction(
+        sqrt, HloInstruction::CreateUnary(
+                  sqrt_operand->mutable_operand(0)->shape(), HloOpcode::kAbs,
+                  sqrt_operand->mutable_operand(0)));
+  }
+  return Status::OK();
+}
+
 namespace {
 bool OnlyPermutesDegenerateDims(const Shape& shape,
                                 absl::Span<const int64> perm) {
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h
index 74d8b1d4582..ce364a16134 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.h
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h
@@ -99,7 +99,27 @@ class AlgebraicSimplifierOptions {
 
   int64 very_small_gather_size() const { return very_small_gather_size_; }
 
+  void set_cudnn_batchnorm_forward_training_metadata(const string& c) {
+    metadata_.cudnn_batchnorm_forward_training_metadata = c;
+  }
+
+  const string& get_cudnn_batchnorm_forward_training_metadata() const {
+    return metadata_.cudnn_batchnorm_forward_training_metadata;
+  }
+
  private:
+  // Metadata struct can be used to store any metadata information encapsulated
+  // with the AlgebraicSimplierOptions that can be later used in an
+  // AlgebraicSimplifier pass. For example,
+  // cudnn_batchnorm_forward_training_metadata can be used to store the name of
+  // a custom call. If the custom call is
+  // __cudnn$batchNormalizationForwardTraining, the output with index 2 is
+  // guaranteed to be postive. This property has been used to recursively
+  // determine if the operand of an instruction is always positive.
+  struct Metadata {
+    string cudnn_batchnorm_forward_training_metadata{""};
+    Metadata() {}
+  };
   ReshapeIsBitcastCallback reshape_is_bitcast_callback_;
   bool is_layout_sensitive_{false};
   bool enable_dot_strength_reduction_{true};
@@ -107,6 +127,7 @@ class AlgebraicSimplifierOptions {
   bool enable_conv_simplification_{true};
   bool enable_window_reduce_to_reduce_replacement_{true};
   int64 very_small_gather_size_{4};
+  Metadata metadata_;
 };
 
 // A pass which performs algebraic simplifications.
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index f37ff5387ee..b4e66eb1ad7 100755
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_fix.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
 #include "tensorflow/compiler/xla/service/pattern_matcher.h"
 #include "tensorflow/compiler/xla/service/pattern_matcher_gmock.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
@@ -5847,5 +5848,243 @@ TEST_F(AlgebraicSimplifierTest, SliceOfConcat) {
               GmockMatch(m::Parameter(1)));
 }
 
+TEST_F(AlgebraicSimplifierTest, SqrtOfSelfMultiply) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[32]{0} parameter(0)
+      m0 = f32[32]{0} multiply(f32[32]{0} p0, f32[32]{0} p0)
+      ROOT s0 = f32[32]{0} sqrt(f32[32]{0} m0)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  EXPECT_THAT(m->entry_computation()->root_instruction(),
+              GmockMatch(m::Abs(m::Parameter(0))));
+}
+
+TEST_F(AlgebraicSimplifierTest, RsqrtOfRPower) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[128,32,2,112]{3,2,1,0} parameter(0)
+      p1 = f32[32]{0} parameter(1)
+      p2 = f32[32]{0} parameter(2)
+      c0 = f32[] constant(0.001)
+      c1 = s64[] constant(1)
+      custom-call.1 = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) custom-call(p0, p1, p2, c0, c1), custom_call_target="__cudnn$batchNormalizationForwardTraining"
+      get-tuple-element.1 = f32[128,32,2,112]{3,2,1,0} get-tuple-element(custom-call.1), index=0
+      get-tuple-element.2 = f32[32]{0} get-tuple-element(custom-call.1), index=1
+      get-tuple-element = f32[32]{0} get-tuple-element(custom-call.1), index=2
+      c2 = f32[] constant(-2)
+      broadcast = f32[32]{0} broadcast(f32[] c2), dimensions={}
+      power = f32[32]{0} power(get-tuple-element, broadcast)
+      rsqrt = f32[32]{0} rsqrt(f32[32]{0} power)
+      ROOT tuple = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) tuple(get-tuple-element.1, get-tuple-element.2, rsqrt)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  default_options_.set_cudnn_batchnorm_forward_training_metadata(
+      "__cudnn$batchNormalizationForwardTraining");
+  ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  // Expect transformation: rsqrt(power(gte.2,-2)) -> abs(gte.2)
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kPower), nullptr);
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kRsqrt), nullptr);
+  auto computation = m->entry_computation();
+  auto root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kTuple);
+  EXPECT_EQ(root->operand(2)->opcode(), HloOpcode::kAbs);
+  EXPECT_EQ(root->operand(2)->operand(0)->opcode(),
+            HloOpcode::kGetTupleElement);
+}
+
+TEST_F(AlgebraicSimplifierTest, RsqrtDivide) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[128,32,2,112]{3,2,1,0} parameter(0)
+      p1 = f32[32]{0} parameter(1)
+      p2 = f32[32]{0} parameter(2)
+      constant = f32[] constant(0.001)
+      constant.1 = s64[] constant(1)
+      custom-call.1 = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) custom-call(p0, p1, p2, constant, constant.1), custom_call_target="__cudnn$batchNormalizationForwardTraining"
+      get-tuple-element.1 = f32[128,32,2,112]{3,2,1,0} get-tuple-element(custom-call.1), index=0
+      get-tuple-element.2 = f32[32]{0} get-tuple-element(custom-call.1), index=1
+      get-tuple-element = f32[32]{0} get-tuple-element(custom-call.1), index=2
+      constant.2 = f32[] constant(1)
+      broadcast.1 = f32[32]{0} broadcast(constant.2), dimensions={}
+      divide = f32[32]{0} divide(broadcast.1, get-tuple-element)
+      rsqrt = f32[32]{0} rsqrt(divide)
+      ROOT tuple = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) tuple(get-tuple-element.1, get-tuple-element.2, rsqrt)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  default_options_.set_cudnn_batchnorm_forward_training_metadata(
+      "__cudnn$batchNormalizationForwardTraining");
+  ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  // Expect transformation: rsqrt(divide(1,gte.2)) -> sqrt(gte.2)
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kDivide), nullptr);
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kRsqrt), nullptr);
+  auto computation = m->entry_computation();
+  auto root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kTuple);
+  EXPECT_EQ(root->operand(2)->opcode(), HloOpcode::kSqrt);
+  EXPECT_EQ(root->operand(2)->operand(0)->opcode(),
+            HloOpcode::kGetTupleElement);
+}
+
+TEST_F(AlgebraicSimplifierTest, MultiplySelfRsqrt) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[128,32,2,112]{3,2,1,0} parameter(0)
+      p1 = f32[32]{0} parameter(1)
+      p2 = f32[32]{0} parameter(2)
+      constant = f32[] constant(0.001)
+      constant.1 = s64[] constant(1)
+      custom-call.1 = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) custom-call(p0, p1, p2, constant, constant.1), custom_call_target="__cudnn$batchNormalizationForwardTraining"
+      get-tuple-element.1 = f32[128,32,2,112]{3,2,1,0} get-tuple-element(custom-call.1), index=0
+      get-tuple-element.2 = f32[32]{0} get-tuple-element(custom-call.1), index=1
+      get-tuple-element = f32[32]{0} get-tuple-element(custom-call.1), index=2
+      rsqrt = f32[32]{0} rsqrt(get-tuple-element)
+      multiply = f32[32]{0} multiply(rsqrt, rsqrt)
+      ROOT tuple = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) tuple(get-tuple-element.1, get-tuple-element.2, multiply)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  default_options_.set_cudnn_batchnorm_forward_training_metadata(
+      "__cudnn$batchNormalizationForwardTraining");
+  ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+
+  // Expect transformation: multiply(rsqrt(gte.2), rsqrt(gte.2)) -> divide(1,
+  // gte.2)
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kMultiply), nullptr);
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kRsqrt), nullptr);
+
+  auto computation = m->entry_computation();
+  auto root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kTuple);
+  EXPECT_EQ(root->operand(2)->opcode(), HloOpcode::kDivide);
+  EXPECT_EQ(root->operand(2)->operand(0)->opcode(), HloOpcode::kBroadcast);
+  EXPECT_EQ(root->operand(2)->operand(1)->opcode(),
+            HloOpcode::kGetTupleElement);
+}
+
+TEST_F(AlgebraicSimplifierTest, MultiplySelfRsqrt_NegativeTestCase) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[128,32,2,112]{3,2,1,0} parameter(0)
+      p1 = f32[32]{0} parameter(1)
+      p2 = f32[32]{0} parameter(2)
+      constant = f32[] constant(0.001)
+      constant.1 = s64[] constant(1)
+      custom-call.1 = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) custom-call(p0, p1, p2, constant, constant.1), custom_call_target="__cudnn$batchNormalizationForwardTraining"
+      get-tuple-element.1 = f32[128,32,2,112]{3,2,1,0} get-tuple-element(custom-call.1), index=0
+      get-tuple-element.2 = f32[32]{0} get-tuple-element(custom-call.1), index=1
+      get-tuple-element = f32[32]{0} get-tuple-element(custom-call.1), index=2
+      rsqrt = f32[32]{0} rsqrt(get-tuple-element)
+      multiply = f32[32]{0} multiply(rsqrt, rsqrt)
+      ROOT tuple = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) tuple(get-tuple-element.1, get-tuple-element.2, multiply)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  default_options_.set_cudnn_batchnorm_forward_training_metadata(
+      "__cudnn$batchNormalizationForward");
+  ASSERT_FALSE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  EXPECT_NE(FindInstruction(m.get(), HloOpcode::kMultiply), nullptr);
+  EXPECT_NE(FindInstruction(m.get(), HloOpcode::kRsqrt), nullptr);
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kDivide), nullptr);
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kBroadcast), nullptr);
+  EXPECT_EQ(m->entry_computation()->root_instruction()->operand(2)->opcode(),
+            HloOpcode::kMultiply);
+}
+
+TEST_F(AlgebraicSimplifierTest, AbsEliminationBatchnormTraining) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[128,32,2,112]{3,2,1,0} parameter(0)
+      p1 = f32[32]{0} parameter(1)
+      p2 = f32[32]{0} parameter(2)
+      constant = f32[] constant(0.001)
+      constant.1 = s64[] constant(1)
+      custom-call.1 = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) custom-call(p0, p1, p2, constant, constant.1), custom_call_target="__cudnn$batchNormalizationForwardTraining"
+      get-tuple-element.1 = f32[128,32,2,112]{3,2,1,0} get-tuple-element(custom-call.1), index=0
+      get-tuple-element.2 = f32[32]{0} get-tuple-element(custom-call.1), index=1
+      get-tuple-element = f32[32]{0} get-tuple-element(custom-call.1), index=2
+      abs = f32[32]{0} abs(get-tuple-element)
+      ROOT %tuple = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) tuple(get-tuple-element.1, get-tuple-element.2, abs)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  default_options_.set_cudnn_batchnorm_forward_training_metadata(
+      "__cudnn$batchNormalizationForwardTraining");
+  ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  // Verify that the module doesn't have any abs node.
+  EXPECT_EQ(FindInstruction(m.get(), HloOpcode::kAbs), nullptr);
+  EXPECT_EQ(m->entry_computation()->root_instruction()->operand(2)->opcode(),
+            HloOpcode::kGetTupleElement);
+}
+
+TEST_F(AlgebraicSimplifierTest,
+       AbsEliminationBatchnormTraining_NegativeTestCase) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[128,32,2,112]{3,2,1,0} parameter(0)
+      p1 = f32[32]{0} parameter(1)
+      p2 = f32[32]{0} parameter(2)
+      constant = f32[] constant(0.001)
+      constant.1 = s64[] constant(1)
+      custom-call.1 = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) custom-call(p0, p1, p2, constant, constant.1), custom_call_target="__cudnn$batchNormalizationForwardTraining"
+      get-tuple-element.1 = f32[128,32,2,112]{3,2,1,0} get-tuple-element(custom-call.1), index=0
+      get-tuple-element.2 = f32[32]{0} get-tuple-element(custom-call.1), index=1
+      get-tuple-element = f32[32]{0} get-tuple-element(custom-call.1), index=2
+      abs = f32[32]{0} abs(get-tuple-element)
+      ROOT %tuple = (f32[128,32,2,112]{3,2,1,0}, f32[32]{0}, f32[32]{0}) tuple(get-tuple-element.1, get-tuple-element.2, abs)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  default_options_.set_cudnn_batchnorm_forward_training_metadata(
+      "__cudnn$batchNormalizationForwardInference");
+  ASSERT_FALSE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  EXPECT_NE(FindInstruction(m.get(), HloOpcode::kAbs), nullptr);
+}
+
+TEST_F(AlgebraicSimplifierTest, AbsEliminationMultiply) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p = f32[32]{0} parameter(0)
+      m = f32[32]{0} multiply(p, p)
+      ROOT a = f32[32]{0} abs(m)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  EXPECT_THAT(m->entry_computation()->root_instruction(),
+              GmockMatch(m::Multiply(m::Parameter(0), m::Parameter(0))));
+}
+
+TEST_F(AlgebraicSimplifierTest, AbsEliminationPower2) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[32]{0} parameter(0)
+      c0 = f32[] constant(2)
+      b0 = f32[32]{0} broadcast(c0), dimensions={}
+      pow = f32[32]{0} power(p0, b0)
+      ROOT a = f32[32]{0} abs(pow)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr));
+  ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie());
+  // Pow(A, 2) is transformed to AA. As a result, Abs(Power(A, 2)) is
+  // transformed to AA.
+  EXPECT_THAT(m->entry_computation()->root_instruction(),
+              GmockMatch(m::Multiply(m::Parameter(0), m::Parameter(0))));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.cc b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
index 06aaad351e6..ec8c391a542 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
@@ -366,12 +366,13 @@ void ArCrsCombiner::GroupAllReducesById(HloModule* module) {
 }
 
 Status ArCrsCombiner::KeepProvablyEqualInstructionGroupsMPMD() {
-  for (auto it : all_reduce_map_) {
-    auto channel_id = it.first;
+  for (auto it = all_reduce_map_.begin(); it != all_reduce_map_.end();) {
+    auto copy_it = it++;  // Advance `it` before invalidation from erase.
+    auto channel_id = copy_it->first;
     VLOG(2)
         << "KeepProvablyEqualInstructionGroups. Checking AllReduce channel id: "
         << channel_id << "\n";
-    auto pairs_vec = it.second;
+    auto pairs_vec = copy_it->second;
     TF_RET_CHECK(pairs_vec.size() == num_spatial_partitions_);
     auto instr_0 = pairs_vec[0].ar;
     for (int i = 1; i < pairs_vec.size(); ++i) {
@@ -381,7 +382,7 @@ Status ArCrsCombiner::KeepProvablyEqualInstructionGroupsMPMD() {
       absl::flat_hash_map<int64, int64> visited_pairs;
       while (true) {
         if (!InstructionsComputeSameValue(next_0, next_i, &visited_pairs)) {
-          all_reduce_map_.erase(channel_id);
+          all_reduce_map_.erase(copy_it);
           VLOG(2) << "KeepProvablyEqualInstructionGroups. Erased AllReduce "
                      "channel id: "
                   << channel_id << "\n";
@@ -406,12 +407,13 @@ Status ArCrsCombiner::KeepProvablyEqualInstructionGroupsSPMD(
       auto replication_analysis,
       HloReplicationAnalysis::Run(module, /*cross_partition_spmd=*/true));
 
-  for (auto it : all_reduce_map_) {
-    auto channel_id = it.first;
+  for (auto it = all_reduce_map_.begin(); it != all_reduce_map_.end();) {
+    auto copy_it = it++;  // Advance `it` before invalidation from erase.
+    auto channel_id = copy_it->first;
     VLOG(2)
         << "KeepProvablyEqualInstructionGroups. Checking AllReduce channel id: "
         << channel_id << "\n";
-    auto pairs_vec = it.second;
+    auto pairs_vec = copy_it->second;
     TF_RET_CHECK(pairs_vec.size() == 1);
     auto instr = pairs_vec[0].ar;
     auto next = instr->users()[0];
@@ -420,7 +422,7 @@ Status ArCrsCombiner::KeepProvablyEqualInstructionGroupsSPMD(
       // guarantee that the HLO produces an array.
       TF_RET_CHECK(next->shape().IsArray());
       if (!replication_analysis->HloInstructionIsReplicatedAt(next, {})) {
-        all_reduce_map_.erase(channel_id);
+        all_reduce_map_.erase(copy_it);
         VLOG(2) << "KeepProvablyEqualInstructionGroups. Erased AllReduce "
                    "channel id: "
                 << channel_id << "\n";
diff --git a/tensorflow/compiler/xla/service/convolution_group_converter.cc b/tensorflow/compiler/xla/service/convolution_group_converter.cc
index f942d6768df..06bcd773f44 100644
--- a/tensorflow/compiler/xla/service/convolution_group_converter.cc
+++ b/tensorflow/compiler/xla/service/convolution_group_converter.cc
@@ -218,14 +218,127 @@ Status ConvolutionVisitor::HandleBatchGroupCount(HloInstruction* convolution) {
 
   int64 input_batch_dimension = dim_numbers.input_batch_dimension();
   int64 output_batch_dimension = dim_numbers.output_batch_dimension();
+  const int64 kernel_output_feature_dimension =
+      dim_numbers.kernel_output_feature_dimension();
   int64 output_feature_dimension = dim_numbers.output_feature_dimension();
 
   int64 input_batch = activation->shape().dimensions(input_batch_dimension);
 
+  const int64 output_feature =
+      filter->shape().dimensions(kernel_output_feature_dimension);
+
+  VLOG(2) << "is_cost_viable_ " << is_cost_viable_(convolution);
+  const bool cost_too_high = !is_cost_viable_(convolution);
+
+  if (output_feature != batch_group_count) {
+    const int64 group_size = output_feature / batch_group_count;
+
+    VLOG(2) << "Need to insert a spatial dimension in activations and in the "
+               "kernel to deal with backprop of grouped convolutions "
+            << " group size " << group_size;
+
+    // Add spatial dimension to the activation, and reshape.
+    Shape reshaped_activation_shape = activation->shape();
+    ShapeUtil::AppendMajorDimension(1, &reshaped_activation_shape);
+    const int64 new_spatial_dim =
+        reshaped_activation_shape.dimensions().size() - 1;
+
+    activation = add(
+        HloInstruction::CreateReshape(reshaped_activation_shape, activation));
+
+    // Insert new spatial dimension after the output feature dimension on the
+    // kernel.
+    auto dims = filter->shape().dimensions();
+    std::vector<int64> new_dims;
+    for (int i = 0; i < dims.size(); i++) {
+      if (i == kernel_output_feature_dimension) {
+        new_dims.push_back(batch_group_count);
+        new_dims.push_back(group_size);
+      } else {
+        new_dims.push_back(dims[i]);
+      }
+    }
+
+    Shape reshaped_filter_shape = ShapeUtil::MakeShapeWithDescendingLayout(
+        filter->shape().element_type(), new_dims);
+
+    filter = add(HloInstruction::CreateReshape(reshaped_filter_shape, filter));
+
+    Shape new_output_shape = convolution->shape();
+    ShapeUtil::AppendMajorDimension(1, &new_output_shape);
+
+    // Edit convolution dimension numbers. Note that kernel_input_feature_dim
+    // now becomes a spatial dimension, and the newly added dimension of size
+    // 1 is the new kernel_input_feature_dim.
+    dim_numbers.add_input_spatial_dimensions(new_spatial_dim);
+
+    // Update spatial dimension numbers if they show up after the newly added
+    // spatial dimension.
+    for (auto& d : *dim_numbers.mutable_kernel_spatial_dimensions()) {
+      if (d > kernel_output_feature_dimension) {
+        ++d;
+      }
+    }
+
+    // Same for input feature dimension.
+    if (dim_numbers.kernel_input_feature_dimension() >
+        kernel_output_feature_dimension) {
+      dim_numbers.set_kernel_input_feature_dimension(
+          dim_numbers.kernel_input_feature_dimension() + 1);
+    }
+
+    dim_numbers.add_kernel_spatial_dimensions(kernel_output_feature_dimension +
+                                              1);
+
+    dim_numbers.add_output_spatial_dimensions(output_batch_dimension);
+
+    dim_numbers.set_output_batch_dimension(new_spatial_dim);
+
+    // Add window for the new spatial dimension.
+    Window new_window = convolution->window();
+    auto* dim = new_window.add_dimensions();
+    dim->set_window_dilation(1);
+    dim->set_base_dilation(1);
+    dim->set_stride(1);
+    dim->set_size(group_size);
+    dim->set_padding_high(group_size - 1);
+    dim->set_padding_low(group_size - 1);
+    dim->set_window_reversal(false);
+
+    auto new_convolution = add(HloInstruction::CreateConvolve(
+        new_output_shape, activation, filter, /*feature_group_count=*/1,
+        batch_group_count, new_window, dim_numbers,
+        convolution->precision_config()));
+
+    VLOG(2) << "New convolution " << new_convolution->ToString();
+
+    // This reversal is not done via set_window_reversal because GPUs don't
+    // support it.
+    auto rev = add(HloInstruction::CreateReverse(
+        new_output_shape, new_convolution, {output_batch_dimension}));
+
+    // Delete the extra spatial dimension, and reshape.
+    Shape reshaped_convolution_shape =
+        ShapeUtil::DeleteDimension(new_spatial_dim, rev->shape());
+    auto reshaped_convolution =
+        HloInstruction::CreateReshape(reshaped_convolution_shape, rev);
+
+    VLOG(2) << "Reshaped convolution " << reshaped_convolution->ToString();
+
+    TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction(
+        convolution, std::move(reshaped_convolution)));
+
+    changed_ = true;
+
+    convolution = new_convolution;
+    dim_numbers = convolution->convolution_dimension_numbers();
+    output_batch_dimension = new_spatial_dim;
+  }
+
   // We are not yet supporting batch_group of sizes greater than 1.
   TF_RET_CHECK(input_batch == batch_group_count);
 
-  if (!is_cost_viable_(convolution) || filter_expansion_) {
+  if (cost_too_high || filter_expansion_) {
     // We first obtain the expanded the filter (which is the convolution
     // output). The batch dimension is the expanded one (which originally
     // represents kernel input feature dimension). We mask the filter to zero
@@ -238,11 +351,17 @@ Status ConvolutionVisitor::HandleBatchGroupCount(HloInstruction* convolution) {
     auto expanded_filter_shape = ExpandedFilterShape(
         convolution->shape(), batch_group_count, output_batch_dimension);
 
+    VLOG(2) << "output_batch_dimension " << output_batch_dimension;
+    VLOG(2) << "New output shape of convolution "
+            << expanded_filter_shape.ToString();
+
     auto new_convolution = add(HloInstruction::CreateConvolve(
         expanded_filter_shape, activation, filter,
         /*feature_group_count=*/1, /*batch_group_count=*/1,
         convolution->window(), dim_numbers, convolution->precision_config()));
 
+    VLOG(2) << "Expanded convolution " << new_convolution->ToString();
+
     auto zero = add(HloInstruction::CreateConstant(
         LiteralUtil::Zero(expanded_filter_shape.element_type())));
     auto zero_filter =
@@ -354,6 +473,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
       changed_ = false;
       return Status::OK();
     }
+    VLOG(2) << "is_cost_viable_ " << is_cost_viable_(convolution);
     // We want to repeat 'filter' in the 'input_feature_dim' dimension
     // 'group_count' times.
     if (!is_cost_viable_(convolution) || filter_expansion_) {
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index bec66aea27f..713f10b146f 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -77,7 +77,6 @@ cc_library(
         ":buffer_info_util",
         ":conv_canonicalization",
         ":cpu_executable",
-        ":cpu_hlo_support_checker",
         ":cpu_instruction_fusion",
         ":cpu_layout_assignment",
         ":cpu_options",
@@ -148,15 +147,15 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
-        "@llvm//:core",
-        "@llvm//:mc",
-        "@llvm//:object",
-        "@llvm//:support",
-        "@llvm//:target",
-        "@llvm//:x86_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:mc",
+        "@llvm-project//llvm:object",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
+        "@llvm-project//llvm:x86_code_gen",  # fixdeps: keep
     ] + select({
         "//tensorflow:linux_ppc64le": [
-            "@llvm//:powerpc_code_gen",  # fixdeps: keep
+            "@llvm-project//llvm:powerpc_code_gen",  # fixdeps: keep
         ],
         "//conditions:default": [
         ],
@@ -188,12 +187,12 @@ cc_library(
         ":runtime_single_threaded_fft",
         ":runtime_single_threaded_matmul",
         "@com_google_absl//absl/memory",
-        "@llvm//:execution_engine",
-        "@llvm//:core",
-        "@llvm//:mc",  # fixdeps: keep
-        "@llvm//:orc_jit",
-        "@llvm//:support",
-        "@llvm//:target",  # fixdeps: keep
+        "@llvm-project//llvm:execution_engine",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:mc",  # fixdeps: keep
+        "@llvm-project//llvm:orc_jit",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",  # fixdeps: keep
         "//tensorflow/compiler/xla/service:custom_call_target_registry",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
@@ -257,7 +256,7 @@ cc_library(
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
-        "@llvm//:orc_jit",
+        "@llvm-project//llvm:orc_jit",
     ],
 )
 
@@ -315,10 +314,10 @@ cc_library(
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
-        "@llvm//:code_gen",
-        "@llvm//:core",
-        "@llvm//:support",
-        "@llvm//:target",
+        "@llvm-project//llvm:code_gen",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
     ],
 )
 
@@ -332,8 +331,8 @@ cc_library(
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
-        "@llvm//:analysis",
-        "@llvm//:target",
+        "@llvm-project//llvm:analysis",
+        "@llvm-project//llvm:target",
     ],
 )
 
@@ -362,7 +361,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -378,7 +377,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings:str_format",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -394,7 +393,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -425,7 +424,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -463,13 +462,13 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/memory",
-        "@llvm//:analysis",
-        "@llvm//:core",
-        "@llvm//:ipo",
-        "@llvm//:mc",
-        "@llvm//:object",
-        "@llvm//:support",
-        "@llvm//:target",
+        "@llvm-project//llvm:analysis",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:ipo",
+        "@llvm-project//llvm:mc",
+        "@llvm-project//llvm:object",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
     ],
 )
 
@@ -527,8 +526,8 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/compiler/xla/service/llvm_ir:math_ops",
         "//tensorflow/core:lib",
-        "@llvm//:core",
-        "@llvm//:transform_utils",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:transform_utils",
     ],
 )
 
@@ -762,7 +761,7 @@ cc_library(
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla/service:hlo",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -818,6 +817,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/service:hlo_matchers",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/types:span",
     ],
@@ -914,6 +914,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/service:hlo_matchers",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
     ],
@@ -936,7 +937,7 @@ cc_library(
     hdrs = ["orc_jit_memory_mapper.h"],
     deps = [
         "//tensorflow/core:lib",
-        "@llvm//:execution_engine",
+        "@llvm-project//llvm:execution_engine",
     ],
 )
 
@@ -953,34 +954,8 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
-        "@llvm//:support",
-    ],
-)
-
-cc_library(
-    name = "cpu_hlo_support_checker",
-    srcs = ["cpu_hlo_support_checker.cc"],
-    hdrs = ["cpu_hlo_support_checker.h"],
-    deps = [
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:xla_data_proto_cc",
-        "//tensorflow/compiler/xla/service:hlo_pass",
-        "//tensorflow/core:lib",
-    ],
-)
-
-tf_cc_test(
-    name = "cpu_hlo_support_checker_test",
-    srcs = ["cpu_hlo_support_checker_test.cc"],
-    deps = [
-        ":cpu_hlo_support_checker",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1007,8 +982,8 @@ tf_cc_test(
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "@llvm//:core",
-        "@llvm//:support",
-        "@llvm//:target",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 6a331ba4f19..a04a39b4461 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -60,7 +60,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h"
 #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h"
-#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
@@ -248,7 +247,6 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
   pipeline.AddPass<ZeroSizedHloElimination>();
 
   pipeline.AddPass<DynamicIndexSplitter>();
-  pipeline.AddPass<CpuHloSupportChecker>();
 
   pipeline.AddPass<ConditionalToSelect>();
   pipeline.AddPass<MapInliner>();
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
deleted file mode 100644
index 4ac61f44d9f..00000000000
--- a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
-
-#include "tensorflow/compiler/xla/layout_util.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace xla {
-
-StatusOr<bool> CpuHloSupportChecker::Run(HloModule* module) {
-  for (auto* computation : module->computations()) {
-    for (const auto& instruction : computation->instructions()) {
-      TF_RETURN_IF_ERROR(
-          ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
-      TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
-          instruction->shape(),
-          [&instruction](const Shape& subshape, const ShapeIndex&) {
-            if (LayoutUtil::IsSparseArray(subshape)) {
-              return xla::Unimplemented(
-                  "CPU backend does not support HLO instruction %s with shape "
-                  "containing a sparse layout: %s",
-                  instruction->ToString(),
-                  ShapeUtil::HumanStringWithLayout(instruction->shape()));
-            }
-            return Status::OK();
-          }));
-    }
-  }
-  return false;
-}
-
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
deleted file mode 100644
index a39a9d47246..00000000000
--- a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
-
-#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
-
-namespace xla {
-
-// This pass should run early in the HLO pipeline and checks for HLO constructs
-// which are not supported by the CPU backend and cannot be removed via HLO
-// transformations (eg, sparse layouts).
-class CpuHloSupportChecker : public HloModulePass {
- public:
-  CpuHloSupportChecker() = default;
-  ~CpuHloSupportChecker() override = default;
-
-  absl::string_view name() const override { return "cpu_hlo_support_checker"; }
-
-  // Note: always returns false (no instructions are ever modified by this
-  // pass).
-  StatusOr<bool> Run(HloModule* module) override;
-};
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
deleted file mode 100644
index 7a905928e6d..00000000000
--- a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
-
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/test.h"
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/protobuf/error_codes.pb.h"
-
-namespace xla {
-namespace {
-
-using ::testing::HasSubstr;
-
-class CpuHloSupportCheckerTest : public HloTestBase {
- protected:
-  CpuHloSupportChecker& checker() { return checker_; }
-
- private:
-  CpuHloSupportChecker checker_;
-};
-
-TEST_F(CpuHloSupportCheckerTest, Add) {
-  HloComputation::Builder builder(TestName());
-  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
-  HloInstruction* param0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, scalar_shape, "param0"));
-  HloInstruction* param1 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, scalar_shape, "param1"));
-  builder.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape, HloOpcode::kAdd, param0, param1));
-  auto module = CreateNewVerifiedModule();
-  module->AddEntryComputation(builder.Build());
-
-  TF_ASSERT_OK(checker().Run(module.get()).status());
-}
-
-TEST_F(CpuHloSupportCheckerTest, SparseUnimplemented) {
-  HloComputation::Builder builder(TestName());
-  const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
-  HloInstruction* param0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, sparse_shape, "param0"));
-  HloInstruction* param1 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, sparse_shape, "param1"));
-  builder.AddInstruction(HloInstruction::CreateBinary(
-      sparse_shape, HloOpcode::kAdd, param0, param1));
-  // Since verifier is reporting sparse layouts as errors, we should
-  // use a regular HloModule instead of VerifiedHloModule to avoid
-  // verifier errors being triggered in the destructor.
-  auto module = CreateNewUnverifiedModule();
-  module->AddEntryComputation(builder.Build());
-
-  Status status = checker().Run(module.get()).status();
-  ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
-  EXPECT_THAT(status.error_message(),
-              HasSubstr("CPU backend does not support"));
-  EXPECT_THAT(status.error_message(),
-              HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
-}
-
-}  // namespace
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 394d1fc979d..24718e16e22 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -37,6 +37,7 @@ limitations under the License.
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
index 70a6d0af02c..7831c1b1b5b 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
@@ -70,11 +70,11 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort(
         index % sort_dimension_offset +
         (index - index % sort_dimension_offset) * sort_dimension_elements;
     auto compare_function = [&](int64 a, int64 b) -> bool {
-      int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) *
-                               values_primitive_type_size_in_bytes[0];
-      int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) *
-                               values_primitive_type_size_in_bytes[0];
       for (int32 i = 0; i < values_count; ++i) {
+        int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) *
+                                 values_primitive_type_size_in_bytes[i];
+        int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) *
+                                 values_primitive_type_size_in_bytes[i];
         comparison_values[i * 2] = values[i] + memory_index_lhs;
         comparison_values[i * 2 + 1] = values[i] + memory_index_rhs;
       }
diff --git a/tensorflow/compiler/xla/service/cpu/tests/BUILD b/tensorflow/compiler/xla/service/cpu/tests/BUILD
index 51a12aee22f..f52de3394fe 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/tests/BUILD
@@ -95,7 +95,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/memory",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -110,9 +110,9 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/strings",
-        "@llvm//:arm_code_gen",  # fixdeps: keep
-        "@llvm//:target",
-        "@llvm//:x86_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:arm_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:target",
+        "@llvm-project//llvm:x86_code_gen",  # fixdeps: keep
     ],
 )
 
@@ -142,9 +142,9 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/strings",
-        "@llvm//:arm_code_gen",  # fixdeps: keep
-        "@llvm//:target",
-        "@llvm//:x86_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:arm_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:target",
+        "@llvm-project//llvm:x86_code_gen",  # fixdeps: keep
     ],
 )
 
@@ -246,8 +246,8 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/strings",
-        "@llvm//:arm_code_gen",  # fixdeps: keep
-        "@llvm//:target",
-        "@llvm//:x86_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:arm_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:target",
+        "@llvm-project//llvm:x86_code_gen",  # fixdeps: keep
     ],
 )
diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
index 333626ef3b9..266e5be0d66 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
@@ -189,7 +189,7 @@ Status DynamicDimensionInferenceVisitor::HandleCustomCall(HloInstruction* hlo) {
         // dimensions.
         ShapeIndex data_output = {0};
         parent_->SetDynamicSize(hlo, data_output, i, dynamic_size,
-                                {.stride = 1, .multiple_of = 1});
+                                DimensionConstraint(1, 1));
       }
     }
     return Status::OK();
@@ -215,11 +215,6 @@ Status DynamicDimensionInferenceVisitor::HandleSort(HloInstruction* hlo) {
                int64 dynamic_dimension, int64 operand_index,
                HloInstruction* dynamic_size, DimensionConstraint constraint) {
         HloSortInstruction* sort = Cast<HloSortInstruction>(hlo);
-        int64 sort_dimension = sort->sort_dimension();
-        if (sort_dimension == dynamic_dimension) {
-          return Unimplemented(
-              "Dynamic dimension on sorting dimension is not supported");
-        }
         if (sort->values_count() == 0) {
           parent_->SetDynamicSize(hlo, {}, dynamic_dimension, dynamic_size,
                                   constraint);
@@ -466,7 +461,7 @@ Status DynamicDimensionInferenceVisitor::HandleConcatenate(
                                        dim_size_total, dynamic_dim));
     }
     parent_->SetDynamicSize(hlo, {}, hlo->concatenate_dimension(),
-                            dim_size_total, {.stride = 1, .multiple_of = 1});
+                            dim_size_total, DimensionConstraint(1, 1));
   }
 
   // Simply pass through non-concat dynamic dimensions.
@@ -521,7 +516,7 @@ Status DynamicDimensionInferenceVisitor::HandleSetDimensionSize(
     // Propagate dynamic dimension indicated by this set dimension size
     // instruction.
     parent_->SetDynamicSize(hlo, {}, hlo->dimension(), hlo->mutable_operand(1),
-                            {.stride = 1, .multiple_of = 1});
+                            DimensionConstraint(1, 1));
   }
 
   // Also Propagate dynamic dimension already set by operands.
@@ -865,7 +860,7 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) {
 
           parent_->SetDynamicSize(
               reshape, {}, output_dynamic_dimension, new_dynamic_size,
-              {.stride = 1, .multiple_of = constraint.multiple_of / divisor});
+              DimensionConstraint(1, constraint.multiple_of / divisor));
         }
 
         if (input_dim_size < output_dim_size) {
@@ -902,12 +897,12 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) {
               hlo->parent()->AddInstruction(HloInstruction::CreateBinary(
                   output_dynamic_size->shape(), HloOpcode::kMultiply,
                   new_dynamic_size, operand_dynamic_size));
+          int64 new_multiple_of_constraint =
+              constraint.multiple_of * output_dim_size /
+              operand->shape().dimensions(input_dynamic_dimension);
           parent_->SetDynamicSize(
               reshape, {}, output_dynamic_dimension, new_dynamic_size,
-              {.stride = 1,
-               .multiple_of =
-                   constraint.multiple_of * output_dim_size /
-                   operand->shape().dimensions(input_dynamic_dimension)});
+              DimensionConstraint(1, new_multiple_of_constraint));
         }
 
         return Status::OK();
@@ -1279,7 +1274,7 @@ Status DynamicDimensionInferenceVisitor::HandleParameter(HloInstruction* hlo) {
         parent_->SetDynamicSize(target_parameter,
                                 dynamic_dimension.parameter_index,
                                 dynamic_dimension.dimension, dynamic_size,
-                                {.stride = 1, .multiple_of = 1});
+                                DimensionConstraint(1, 1));
         return Status::OK();
       });
 }
diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
index 21808385ec2..070127796d6 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
@@ -149,6 +149,9 @@ class DynamicDimensionInference {
   //
   //
   struct DimensionConstraint {
+    explicit DimensionConstraint(int64 s, int64 m)
+        : stride(s), multiple_of(m) {}
+    DimensionConstraint() : stride(1), multiple_of(1) {}
     // Stride represents the distance of a newly placed element and the previous
     // placed element on this dynamic dimension.
     int64 stride;
diff --git a/tensorflow/compiler/xla/service/dynamic_padder.cc b/tensorflow/compiler/xla/service/dynamic_padder.cc
index f41a965825d..e09138f3e11 100644
--- a/tensorflow/compiler/xla/service/dynamic_padder.cc
+++ b/tensorflow/compiler/xla/service/dynamic_padder.cc
@@ -21,16 +21,21 @@ limitations under the License.
 #include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/comparison_util.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
 #include "tensorflow/compiler/xla/service/dynamic_dimension_inference.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_dce.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace xla {
@@ -569,6 +574,7 @@ Status RewriteDynamicReshapeSingleDim(
   }
   return Status::OK();
 }
+
 StatusOr<bool> RewriteDynamicConcat(
     HloInstruction* concat,
     DynamicDimensionInference* dynamic_dimension_inference) {
@@ -618,6 +624,100 @@ StatusOr<bool> RewriteDynamicConcat(
       concat, rewritten_concat, {}));
   return true;
 }
+
+StatusOr<bool> RewriteDynamicSort(
+    HloInstruction* hlo,
+    DynamicDimensionInference* dynamic_dimension_inference) {
+  HloInstruction* dynamic_size = nullptr;
+  HloSortInstruction* sort = Cast<HloSortInstruction>(hlo);
+  HloComputation* comp = hlo->parent();
+  int64 sort_dim = sort->sort_dimension();
+  // Find the dynamic dimension in the operand.
+  for (auto* operand : sort->operands()) {
+    if (dynamic_size == nullptr) {
+      dynamic_size =
+          dynamic_dimension_inference->GetDynamicSize(operand, {}, sort_dim);
+    }
+  }
+
+  if (dynamic_size == nullptr) {
+    // Not a dynamic sort, ignore.
+    return false;
+  }
+
+  Shape operand_shape =
+      ShapeUtil::ChangeElementType(sort->operand(0)->shape(), S32);
+  HloInstruction* iota =
+      comp->AddInstruction(HloInstruction::CreateIota(operand_shape, sort_dim));
+  HloInstruction* dynamic_size_broadcasted = comp->AddInstruction(
+      HloInstruction::CreateBroadcast(operand_shape, dynamic_size, {}));
+  HloInstruction* lt = comp->AddInstruction(HloInstruction::CreateCompare(
+      ShapeUtil::ChangeElementType(operand_shape, PRED), iota,
+      dynamic_size_broadcasted, ComparisonDirection::kLt));
+  sort->AppendOperand(lt);
+
+  const int64 param_number_before_rewritten =
+      sort->called_computations()[0]->num_parameters();
+  auto new_param_0 = HloInstruction::CreateParameter(
+      param_number_before_rewritten, ShapeUtil::MakeScalarShape(PRED),
+      "inbound_lhs");
+  auto new_param_1 = HloInstruction::CreateParameter(
+      param_number_before_rewritten + 1, ShapeUtil::MakeScalarShape(PRED),
+      "inbound_rhs");
+  std::vector<const HloInstruction*> extra_parameters{new_param_0.get(),
+                                                      new_param_1.get()};
+  HloComputation* sort_comp = sort->parent()->parent()->AddEmbeddedComputation(
+      sort->called_computations()[0]->CloneWithReplacements(
+          /*replacements=*/absl::flat_hash_map<
+              const HloInstruction*, std::unique_ptr<HloInstruction>>(),
+          extra_parameters));
+  auto inbound_lhs =
+      sort_comp->parameter_instruction(param_number_before_rewritten);
+  auto inbound_rhs =
+      sort_comp->parameter_instruction(param_number_before_rewritten + 1);
+  sort->ReplaceCalledComputations(
+      [&](HloComputation* comp) { return sort_comp; });
+
+  // inbound_lhs & (sort_comp | !in_bound_rhs)
+  // Select the lhs if it is in bounds and the rhs is out of bounds or the
+  // sort_comp returns true.
+  auto out_of_bound_rhs = sort_comp->AddInstruction(HloInstruction::CreateUnary(
+      ShapeUtil::MakeScalarShape(PRED), HloOpcode::kNot, inbound_rhs));
+  auto sort_comp_or_out_of_bound_rhs =
+      sort_comp->AddInstruction(HloInstruction::CreateBinary(
+          ShapeUtil::MakeScalarShape(PRED), HloOpcode::kOr,
+          sort_comp->root_instruction(), out_of_bound_rhs));
+
+  auto new_root = sort_comp->AddInstruction(HloInstruction::CreateBinary(
+      ShapeUtil::MakeScalarShape(PRED), HloOpcode::kAnd, inbound_lhs,
+      sort_comp_or_out_of_bound_rhs));
+  sort_comp->set_root_instruction(new_root);
+  Shape compare_shape =
+      ShapeUtil::ChangeElementType(sort->operand(0)->shape(), PRED);
+  if (sort->shape().IsTuple()) {
+    // For sort that is already tuple, simply add another result to the tuple.
+    *sort->mutable_shape()->add_tuple_shapes() =
+        ShapeUtil::ChangeElementType(operand_shape, PRED);
+  } else {
+    auto sort_users = sort->users();
+    auto sort_clone = comp->AddInstruction(sort->Clone());
+    *sort_clone->mutable_shape() = ShapeUtil::MakeTupleShape(
+        {sort->shape(), ShapeUtil::ChangeElementType(operand_shape, PRED)});
+    auto rewritten_sort = comp->AddInstruction(
+        HloInstruction::CreateGetTupleElement(sort->shape(), sort_clone, 0));
+    for (HloInstruction* user : sort_users) {
+      TF_RETURN_IF_ERROR(sort->ReplaceUseWith(user, rewritten_sort));
+    }
+    TF_RETURN_IF_ERROR(dynamic_dimension_inference->ForwardDynamicSize(
+        sort, rewritten_sort, {}));
+    if (comp->root_instruction() == sort) {
+      comp->set_root_instruction(rewritten_sort);
+    }
+  }
+
+  return true;
+}
+
 StatusOr<bool> RewriteDynamicReshape(
     HloInstruction* reshape,
     DynamicDimensionInference* dynamic_dimension_inference) {
@@ -920,12 +1020,17 @@ StatusOr<bool> DynamicPadder::Run(HloModule* module) {
                       DynamicDimensionInference::Run(module));
 
   for (HloComputation* computation : module->computations()) {
-    for (HloInstruction* inst : computation->instructions()) {
+    for (HloInstruction* inst : computation->MakeInstructionPostOrder()) {
       if (inst->opcode() == HloOpcode::kConcatenate) {
         TF_ASSIGN_OR_RETURN(
             changed, RewriteDynamicConcat(inst, &dynamic_dimension_inference));
         continue;
       }
+      if (inst->opcode() == HloOpcode::kSort) {
+        TF_ASSIGN_OR_RETURN(
+            changed, RewriteDynamicSort(inst, &dynamic_dimension_inference));
+        continue;
+      }
       for (int64 operand_num = 0; operand_num < inst->operand_count();
            ++operand_num) {
         HloInstruction* original_operand = inst->mutable_operand(operand_num);
diff --git a/tensorflow/compiler/xla/service/dynamic_padder_test.cc b/tensorflow/compiler/xla/service/dynamic_padder_test.cc
index 0e60e420d47..57e4a4e9af3 100644
--- a/tensorflow/compiler/xla/service/dynamic_padder_test.cc
+++ b/tensorflow/compiler/xla/service/dynamic_padder_test.cc
@@ -827,5 +827,84 @@ ENTRY main {
   EXPECT_EQ(result, expected);
 }
 
+XLA_TEST_F(ExecutionTest, DynamicSort) {
+  const string hlo_text = R"(
+HloModule TEST
+
+update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  rhs = s32[] parameter(1)
+  ROOT add = s32[] add(lhs, rhs)
+}
+
+%compare-greater-than (lhs: s32[], rhs: s32[]) -> pred[] {
+  %lhs = s32[] parameter(0)
+  %rhs = s32[] parameter(1)
+  ROOT %compare = pred[] compare(s32[] %lhs, s32[] %rhs), direction=GT
+}
+
+ENTRY main {
+  param = s32[4] parameter(0)
+  size = s32[] constant(3)
+  param_dynamic_size = s32[4] set-dimension-size(param, size),
+    dimensions={0}
+  sort = s32[4]{0} sort(s32[4]{0} %param_dynamic_size),
+    dimensions={0}, is_stable=false, to_apply=%compare-greater-than
+  full_size = s32[] constant(4)
+  ROOT result = s32[4] set-dimension-size(sort, full_size), dimensions={0}    
+}
+)";
+
+  Literal operand = LiteralUtil::CreateR1<int32>({1, 4, 3, 2});
+  auto module = GetHloModule(hlo_text);
+
+  Literal result = PadAndExecute(std::move(module), {&operand});
+  Literal expected = LiteralUtil::CreateR1<int32>({4, 3, 1, 2});
+
+  EXPECT_EQ(result, expected);
+}
+
+XLA_TEST_F(ExecutionTest, DynamicTupleSort) {
+  const string hlo_text = R"(
+HloModule TEST
+
+%compare-greater-than (lhs: s32[], rhs: s32[], lhs_2: s32[], lhs_2: s32[]) -> pred[] {
+  %lhs = s32[] parameter(0)
+  %rhs = s32[] parameter(1)
+  %lhs_2 = s32[] parameter(2)
+  %rhs_2 = s32[] parameter(3)
+  ROOT %compare = pred[] compare(s32[] %lhs, s32[] %rhs), direction=GT
+}
+
+update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  rhs = s32[] parameter(1)
+  ROOT add = s32[] add(lhs, rhs)
+}
+
+ENTRY main {
+  param = s32[3] parameter(0)
+  size = s32[] constant(2)
+  param_dynamic_size = s32[3] set-dimension-size(param, size),
+    dimensions={0}
+  sort = (s32[3]{0}, s32[3]{0}) sort(s32[3]{0} %param_dynamic_size,
+                                     s32[3]{0} %param_dynamic_size),
+    dimensions={0}, is_stable=true, to_apply=%compare-greater-than
+  get-tuple-element = s32[3]{0} get-tuple-element((s32[3]{0}, s32[3]{0}) %sort),
+    index=0
+  full_size = s32[] constant(3)
+  ROOT result = s32[3] set-dimension-size(get-tuple-element, full_size), dimensions={0}
+}
+)";
+
+  Literal operand = LiteralUtil::CreateR1<int32>({0, 4, 2});
+  auto module = GetHloModule(hlo_text);
+
+  Literal result = PadAndExecute(std::move(module), {&operand});
+  Literal expected = LiteralUtil::CreateR1<int32>({4, 0, 2});
+
+  EXPECT_EQ(result, expected);
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/g3doc/hlo_parser.md b/tensorflow/compiler/xla/service/g3doc/hlo_parser.md
index f0f3dd7785c..5c3b1540600 100644
--- a/tensorflow/compiler/xla/service/g3doc/hlo_parser.md
+++ b/tensorflow/compiler/xla/service/g3doc/hlo_parser.md
@@ -116,29 +116,6 @@ non_tuple
   | rank2345
   ;
 rank2345
-  : shape sparse_or_nested_array
+  : nested_array
   ;
-sparse_or_nested_array
-  : sparse_array
-  | nested_array
-  ;
-sparse_array
-  : '{' sparse_array1 '}'
-  ;
-sparse_array1
-  : sparse_array_item
-  | sparse_array1 ',' sparse_array_item
-  ;
-sparse_array_item
-  : multi_index ':' scalar
-  ;
-multi_index
-  : kInt
-  | '[' multi_index1 ']'
-  ;
-multi_index1
-  : kInt
-  | multi_index1 ',' kInt
-  ;
-
 ```
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 13e8a3f4409..fb085a237f1 100755
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -184,7 +184,7 @@ cc_library(
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -198,8 +198,8 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -287,8 +287,8 @@ cc_library(
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -306,7 +306,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
         "//tensorflow/core:lib",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -335,8 +335,8 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -594,7 +594,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -1068,7 +1068,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "@com_google_absl//absl/memory",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
     alwayslink = True,  # Contains per-platform transfer manager registration
 )
@@ -1093,7 +1093,6 @@ cc_library(
         ":gpu_copy_insertion",
         ":gpu_executable",
         ":gpu_hlo_schedule",
-        ":gpu_hlo_support_checker",
         ":gpu_layout_assignment",
         ":gpu_sanitize_constant_names",
         ":gpu_scatter_expander",
@@ -1116,6 +1115,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:call_inliner",
         "//tensorflow/compiler/xla/service:conditional_simplifier",
+        "//tensorflow/compiler/xla/service:convolution_group_converter",
         "//tensorflow/compiler/xla/service:depthwise_convolution_converter",
         "//tensorflow/compiler/xla/service:dot_decomposer",
         "//tensorflow/compiler/xla/service:dump",
@@ -1161,7 +1161,7 @@ cc_library(
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -1196,6 +1196,7 @@ cc_library(
         ":gpu_conv_padding_legalization",
         ":gpu_conv_rewriter",
         ":gpu_layout_assignment",
+        ":ir_emission_utils",
         ":reduction_degenerate_dim_remover",
         ":reduction_dimension_grouper",
         ":reduction_layout_normalizer",
@@ -1414,18 +1415,6 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "gpu_hlo_support_checker",
-    srcs = ["gpu_hlo_support_checker.cc"],
-    hdrs = ["gpu_hlo_support_checker.h"],
-    deps = [
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:xla_data_proto_cc",
-        "//tensorflow/compiler/xla/service:hlo_pass",
-        "//tensorflow/core:lib",
-    ],
-)
-
 cc_library(
     name = "stream_executor_util",
     srcs = ["stream_executor_util.cc"],
@@ -1453,20 +1442,6 @@ cc_library(
     ],
 )
 
-tf_cc_test(
-    name = "gpu_hlo_support_checker_test",
-    srcs = ["gpu_hlo_support_checker_test.cc"],
-    deps = [
-        ":gpu_hlo_support_checker",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-    ],
-)
-
 cc_library(
     name = "buffer_comparator",
     srcs = ["buffer_comparator.cc"],
@@ -1604,6 +1579,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/service:hlo_parser",
         "//tensorflow/compiler/xla/service:pattern_matcher",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc b/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc
index 37095adf7c6..4ecf6ed8007 100644
--- a/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc
+++ b/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc
@@ -577,10 +577,24 @@ static StatusOr<bool> DeviceCompare(se::Stream* stream,
   se::DeviceMemory<ElementT> rhs_typed(rhs);
   uint64 buffer_size = lhs_typed.ElementCount();
 
-  TF_ASSIGN_OR_RETURN(absl::Span<const uint8> compiled_ptx,
-                      se::CompileGpuAsmOrGetCached(executor->device_ordinal(),
-                                                   buffer_compare_ptx,
-                                                   PtxOptsFromConfig(config)));
+  absl::Span<const uint8> compiled_ptx = {};
+  StatusOr<absl::Span<const uint8>> compiled_ptx_or =
+      se::CompileGpuAsmOrGetCached(executor->device_ordinal(),
+                                   buffer_compare_ptx,
+                                   PtxOptsFromConfig(config));
+  if (compiled_ptx_or.ok()) {
+    compiled_ptx = compiled_ptx_or.ConsumeValueOrDie();
+  } else {
+    static std::once_flag ptxas_not_found_logged;
+    std::call_once(ptxas_not_found_logged, [&]() {
+      LOG(WARNING)
+          << compiled_ptx_or.status().ToString()
+          << "\nRelying on driver to perform ptx compilation. "
+          << "\nSetting XLA_FLAGS=--xla_gpu_cuda_data_dir=/path/to/cuda "
+          << " or modifying $PATH can be used to set the location of ptxas"
+          << "\nThis message will only be logged once.";
+    });
+  }
 
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<ComparisonKernelT<ElementT>> comparison_kernel,
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 30b204e6fd5..04761123127 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -36,6 +36,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
 #include "tensorflow/compiler/xla/service/conditional_simplifier.h"
+#include "tensorflow/compiler/xla/service/convolution_group_converter.h"
 #include "tensorflow/compiler/xla/service/depthwise_convolution_converter.h"
 #include "tensorflow/compiler/xla/service/dot_decomposer.h"
 #include "tensorflow/compiler/xla/service/dump.h"
@@ -48,7 +49,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_hlo_schedule.h"
-#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_sanitize_constant_names.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_scatter_expander.h"
@@ -134,15 +134,31 @@ Status GpuCompiler::OptimizeHloModule(
     pipeline.AddPass<GpuScatterExpander>();
 
     pipeline.AddPass<DynamicIndexSplitter>();
-    pipeline.AddPass<GpuHloSupportChecker>();
 
     // TODO(b/64094172): make Call work on GPU instead of inlining.
     pipeline.AddPass<CallInliner>();
+
+    pipeline.AddPass<DotDecomposer>();
+
+    // We use the ConvolutionGroupConverter to convert backprops of filter
+    // grouped convolutions into non-grouped equivalents.
+    auto batch_group_cost_model = [](HloInstruction* conv) {
+      auto dim_numbers = conv->convolution_dimension_numbers();
+      const int64 input_batch_size = conv->operand(0)->shape().dimensions(
+          dim_numbers.input_batch_dimension());
+      return conv->batch_group_count() != input_batch_size;
+    };
+
+    pipeline.AddPass<ConvolutionGroupConverter>(
+        batch_group_cost_model,
+        /*convert_batch_groups_only=*/true,
+        /*canonicalize_depthwise_filter=*/false);
+
     auto cost_model = [](HloInstruction* conv) {
       // We need a cost model for GPUs. Currently, do nothing.
       return false;
     };
-    pipeline.AddPass<DotDecomposer>();
+
     pipeline.AddPass<DepthwiseConvolutionConverter>(cost_model);
     // Expand the sort op to support stable sorting if required.
     pipeline.AddPass<StableSortExpander>();
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
deleted file mode 100644
index 4765f67c4b1..00000000000
--- a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
-
-#include "tensorflow/compiler/xla/layout_util.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace xla {
-
-StatusOr<bool> GpuHloSupportChecker::Run(HloModule* module) {
-  for (auto* computation : module->computations()) {
-    for (const auto& instruction : computation->instructions()) {
-      TF_RETURN_IF_ERROR(
-          ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
-      TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
-          instruction->shape(),
-          [&instruction](const Shape& subshape, const ShapeIndex&) {
-            if (LayoutUtil::IsSparseArray(subshape)) {
-              return xla::Unimplemented(
-                  "GPU backend does not support HLO instruction %s with shape "
-                  "containing a sparse layout: %s",
-                  instruction->ToString(),
-                  ShapeUtil::HumanStringWithLayout(instruction->shape()));
-            }
-            return Status::OK();
-          }));
-    }
-  }
-  return false;
-}
-
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
deleted file mode 100644
index 8b19769a781..00000000000
--- a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
-
-#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
-
-namespace xla {
-
-// This pass should run early in the HLO pipeline and checks for HLO constructs
-// which are not supported by the GPU backend and cannot be removed via HLO
-// transformations (eg, sparse layouts).
-class GpuHloSupportChecker : public HloModulePass {
- public:
-  GpuHloSupportChecker() = default;
-  ~GpuHloSupportChecker() override = default;
-
-  absl::string_view name() const override { return "gpu_hlo_support_checker"; }
-
-  // Note: always returns false (no instructions are ever modified by this
-  // pass).
-  StatusOr<bool> Run(HloModule* module) override;
-};
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
deleted file mode 100644
index 0bd43ec9b23..00000000000
--- a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
-
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/test.h"
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/protobuf/error_codes.pb.h"
-
-namespace xla {
-namespace {
-
-using ::testing::HasSubstr;
-
-class GpuHloSupportCheckerTest : public HloTestBase {
- protected:
-  GpuHloSupportChecker& checker() { return checker_; }
-
- private:
-  GpuHloSupportChecker checker_;
-};
-
-TEST_F(GpuHloSupportCheckerTest, Add) {
-  HloComputation::Builder builder(TestName());
-  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
-  HloInstruction* param0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, scalar_shape, "param0"));
-  HloInstruction* param1 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, scalar_shape, "param1"));
-  builder.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape, HloOpcode::kAdd, param0, param1));
-  auto module = CreateNewVerifiedModule();
-  module->AddEntryComputation(builder.Build());
-
-  TF_ASSERT_OK(checker().Run(module.get()).status());
-}
-
-TEST_F(GpuHloSupportCheckerTest, SparseUnimplemented) {
-  HloComputation::Builder builder(TestName());
-  const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
-  HloInstruction* param0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, sparse_shape, "param0"));
-  HloInstruction* param1 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, sparse_shape, "param1"));
-  builder.AddInstruction(HloInstruction::CreateBinary(
-      sparse_shape, HloOpcode::kAdd, param0, param1));
-  // Since verifier is reporting sparse layouts as errors, we should
-  // use a regular HloModule instead of VerifiedHloModule to avoid
-  // verifier errors being triggered in the destructor.
-  auto module = CreateNewUnverifiedModule();
-  module->AddEntryComputation(builder.Build());
-
-  Status status = checker().Run(module.get()).status();
-  ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
-  EXPECT_THAT(status.error_message(),
-              HasSubstr("GPU backend does not support"));
-  EXPECT_THAT(status.error_message(),
-              HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
-}
-
-}  // namespace
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index b2067fe916d..2ff03354ea8 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <array>
 #include <vector>
 
+#include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/Module.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/service/gpu/target_util.h"
@@ -234,6 +235,31 @@ bool IsReductionFromOrToContiguousDimensions(const HloInstruction& reduce) {
   return reduction_dimensions.dimensions[1] >= kWarpSize;
 }
 
+bool IsInputFusibleSlices(const HloInstruction& unnested_hlo,
+                          bool verify_no_strides) {
+  if (!unnested_hlo.IsInputFusion()) {
+    return false;
+  }
+
+  auto is_non_strided = [](const std::vector<int64>& strides) -> bool {
+    return absl::c_all_of(strides, [](int stride) { return stride == 1; });
+  };
+
+  const HloInstruction* root = unnested_hlo.fused_expression_root();
+  if (root->opcode() == HloOpcode::kSlice) {
+    return !verify_no_strides || is_non_strided(root->slice_strides());
+  }
+
+  if (root->opcode() != HloOpcode::kTuple) {
+    return false;
+  }
+
+  return absl::c_all_of(root->operands(), [&](const HloInstruction* instr) {
+    return instr->opcode() == HloOpcode::kSlice &&
+           (!verify_no_strides || is_non_strided(instr->slice_strides()));
+  });
+}
+
 ReductionDimensions GetReductionKindAndContiguousComponents(
     const HloInstruction& reduce) {
   const Shape& input_shape = reduce.operand(0)->shape();
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index 2c37a63c05a..601a63ccede 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -157,6 +157,12 @@ bool ImplementedAsLibraryCall(const HloInstruction& hlo);
 // kept are contiguous in the input of the reduce instruction.
 bool IsReductionFromOrToContiguousDimensions(const HloInstruction& reduce);
 
+// Returns whether unnested_hlo is an input fusion whose root is either a slice
+// or a tuple of slices. If verify_no_strides is true, returns false unless all
+// ROOT slices have no strides.
+bool IsInputFusibleSlices(const HloInstruction& unnested_hlo,
+                          bool verify_no_strides = false);
+
 struct ReductionDimensions {
   // Indicates whether the reduction is a row reduction or a column reduction.
   bool is_row_reduction;
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index b65c5c7461d..684a513bf1e 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -301,6 +301,44 @@ llvm::Type* GetIndexTypeForKernel(const HloInstruction* hlo, int64 launch_size,
   return b->getInt32Ty();
 }
 
+// Gets the input shape of the ROOT slices, which will be used as the kernel
+// launch dims. The slice input fusion requires the input shapes of the ROOT
+// slices to be the same although the (slice) output shapes can be different.
+//
+// Returns the input shape of the ROOT slices if all the input shapes of ROOT
+// slices are the same and the slices are non-strided. Otherwise, returns
+// FailedPrecondition.
+StatusOr<Shape> GetConsistentInputShapeForRootSlices(
+    const HloInstruction& fusion) {
+  if (!IsInputFusibleSlices(fusion, /*verify_no_strides=*/true)) {
+    return FailedPrecondition(
+        "Unsupported root for slice input fusion. "
+        "Only non-strided slices are supported.");
+  }
+
+  const HloInstruction& root = *fusion.fused_expression_root();
+  if (root.opcode() == HloOpcode::kSlice) {
+    return root.operands()[0]->shape();
+  }
+
+  CHECK_EQ(root.opcode(), HloOpcode::kTuple);
+  const Shape& first_slice_operand_shape =
+      root.operands()[0]->operands()[0]->shape();
+  for (size_t i = 1; i < root.operands().size(); ++i) {
+    const HloInstruction* slice = root.operands()[i];
+    const Shape& operand_shape = slice->operands()[0]->shape();
+    if (!ShapeUtil::EqualIgnoringElementType(first_slice_operand_shape,
+                                             operand_shape)) {
+      return FailedPrecondition(
+          "Fused slices do not have the same input shape, fused computation = "
+          "%s.",
+          root.parent()->name());
+    }
+  }
+
+  return first_slice_operand_shape;
+}
+
 }  // namespace
 
 Status IrEmitterUnnested::DefaultAction(HloInstruction* hlo) {
@@ -388,7 +426,13 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
             absl::make_unique<SequentialThunk>(std::move(thunks), fusion));
         return Status::OK();
       }
+      // In the case of root tuple, it can be either reduce or slice input
+      // fusion.
       case HloOpcode::kTuple: {
+        if (IsInputFusibleSlices(*fusion)) {
+          return EmitInputFusibleNonStridedSlices(fusion);
+        }
+
         CHECK_GE(root->operand_count(), 1);
         return EmitReductionFromOrToContiguousDimensions(fusion,
                                                          root->operands());
@@ -404,6 +448,9 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
         }
         return EmitReductionFromOrToContiguousDimensions(fusion, {root});
       }
+      case HloOpcode::kSlice: {
+        return EmitInputFusibleNonStridedSlices(fusion);
+      }
       default:
         LOG(FATAL) << "Bad opcode for input fusion: "
                    << fusion->fused_expression_root()->opcode();
@@ -3060,5 +3107,121 @@ Status IrEmitterUnnested::EmitConstantGlobals() {
   return Status::OK();
 }
 
+// Emits code for slices based on the below structure. An if statement with
+// a guarding condition is generated for each ROOT slice.
+//
+// Pseudo code:
+//
+// Compute values of slice input operands
+//
+// Compute guarding_cond0
+// if (guarding_cond0) {
+//   Write to output of slice0
+// }
+//
+// Compute guarding_cond1
+// if (guarding_cond1) {
+//   Write to output of slice1
+// }
+//
+void IrEmitterUnnested::EmitElementForInputFusibleSlices(
+    HloInstruction* unnested_hlo, const llvm_ir::IrArray::Index& index) {
+  VLOG(10) << "Emitting slice input fusion for " << unnested_hlo->ToString();
+
+  HloInstruction* slice_or_tuple = unnested_hlo->fused_expression_root();
+  auto slice_instructions = [&]() -> absl::Span<HloInstruction* const> {
+    if (slice_or_tuple->opcode() == HloOpcode::kSlice) {
+      return absl::Span<HloInstruction* const>(&slice_or_tuple, 1);
+    }
+    CHECK_EQ(slice_or_tuple->opcode(), HloOpcode::kTuple);
+    return slice_or_tuple->operands();
+  }();
+
+  // Emit input operand values of slices.
+  std::vector<llvm::Value*> input_ir_values;
+  GpuElementalIrEmitter elem_emitter(hlo_module_config_, module_, &b_,
+                                     GetNestedComputer());
+  FusedIrEmitter fused_emitter(GetGeneratorForOperandIrArrays(unnested_hlo),
+                               &elem_emitter);
+  TF_CHECK_OK(unnested_hlo->fused_expression_root()->Accept(&fused_emitter));
+  for (const HloInstruction* slice : slice_instructions) {
+    auto input_generator = fused_emitter.GetGenerator(slice->operand(0));
+    input_ir_values.push_back(input_generator(index).ValueOrDie());
+  }
+
+  // Emit for slice_instructions.
+  KernelSupportLibrary ksl(&b_, llvm_ir::UnrollMode::kDefaultUnroll);
+  for (int64 i = 0; i < slice_instructions.size(); ++i) {
+    HloInstruction* slice = slice_instructions[i];
+
+    // guarding_cond := index >= start && index < limit, for each dim.
+    std::vector<llvm::Value*> index_within_ranges;
+    for (size_t dim = 0; dim < slice->slice_starts().size(); ++dim) {
+      CHECK_EQ(slice->slice_strides(dim), 1);
+      auto larger_or_equal_than_start = b_.CreateICmpSGE(
+          index.multidim()[dim],
+          index.GetConstantWithIndexType(slice->slice_starts(dim)));
+      llvm::Value* smaller_than_limit = b_.CreateICmpSLT(
+          index.multidim()[dim],
+          index.GetConstantWithIndexType(slice->slice_limits(dim)));
+      llvm::Value* within_range =
+          b_.CreateAnd(larger_or_equal_than_start, smaller_than_limit);
+      index_within_ranges.push_back(within_range);
+    }
+    llvm::Value* guarding_cond = b_.CreateAnd(index_within_ranges);
+
+    auto emit_slice_elem_func = [&] {
+      const std::vector<llvm::Value*>& src_multidim = index.multidim();
+      std::vector<llvm::Value*> dst_multidim(src_multidim.size());
+      for (size_t dim = 0; dim < src_multidim.size(); ++dim) {
+        dst_multidim[dim] =
+            Sub(src_multidim[dim],
+                index.GetConstantWithIndexType(slice->slice_starts(dim)));
+      }
+      ShapeIndex shape_index = (slice_or_tuple->opcode() == HloOpcode::kSlice)
+                                   ? ShapeIndex()
+                                   : ShapeIndex({i});
+      llvm_ir::IrArray src_ir_array =
+          GetIrArray(*unnested_hlo, *unnested_hlo, shape_index);
+      IrArray::Index slice_dst_index(dst_multidim, slice->shape(),
+                                     index.GetType());
+      llvm::Value* dst_addr = src_ir_array.EmitArrayElementAddress(
+          slice_dst_index, &b_, "slice.dest");
+      b_.CreateStore(input_ir_values[i], dst_addr);
+    };
+
+    ksl.If(StrCat("slice", i), guarding_cond, emit_slice_elem_func);
+  }
+}
+
+Status IrEmitterUnnested::EmitInputFusibleNonStridedSlices(
+    HloInstruction* unnested_hlo) {
+  constexpr int unroll_factor = 1;
+  std::unique_ptr<KernelThunk> kernel_thunk = BuildKernelThunk(
+      unnested_hlo, /*implements_whole_instruction=*/true, unroll_factor);
+
+  TF_ASSIGN_OR_RETURN(Shape element_shape,
+                      GetConsistentInputShapeForRootSlices(*unnested_hlo));
+  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
+      element_shape, ir_emitter_context_->device_description(), unroll_factor);
+  UpdateLaunchDimensions(launch_dimensions, kernel_thunk.get(),
+                         ir_emitter_context_->llvm_module());
+
+  Status emit_status =
+      ParallelLoopEmitter(
+          [&](const llvm_ir::IrArray::Index index) -> Status {
+            EmitElementForInputFusibleSlices(unnested_hlo, index);
+            return Status::OK();
+          },
+          element_shape, launch_dimensions, &b_)
+          .EmitLoop(IrName(unnested_hlo),
+                    GetIndexTypeForKernel(
+                        unnested_hlo, launch_dimensions.launch_bound(), &b_));
+
+  thunk_sequence_->emplace_back(std::move(kernel_thunk));
+
+  return emit_status;
+}
+
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index fb64da6b43e..42a18e6547d 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -184,6 +184,19 @@ class IrEmitterUnnested : public IrEmitter,
   ReductionCodegenInfo ComputeReductionCodegenInfo(
       const HloInstruction* unnested_hlo, const HloInstruction* first_reduce);
 
+  // Generates code for input-fusible slices.
+  //
+  // Prerequisite: ROOT is either a slice or a tuple of slices. The input shapes
+  // of all ROOT slices need to be the same while their output shapes can be
+  // different. On the other hand, the input ranges of slices can be
+  // overlapping. Further generalization/specialization when the needs are seen
+  // in the future.
+  Status EmitInputFusibleNonStridedSlices(HloInstruction* unnested_hlo);
+
+  void EmitElementForInputFusibleSlices(
+      HloInstruction* unnested_hlo,
+      const llvm_ir::IrArray::Index& slice_input_index);
+
   // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in
   // the process. `scatter` may be fused, scatter indices are taken from
   // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD
index db26d36c71a..9203664e4c7 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD
@@ -38,20 +38,20 @@ cc_library(
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
-        "@llvm//:amdgpu_code_gen",
-        "@llvm//:analysis",
-        "@llvm//:bit_reader",
-        "@llvm//:bit_writer",
-        "@llvm//:code_gen",
-        "@llvm//:core",
-        "@llvm//:ipo",
-        "@llvm//:ir_reader",
-        "@llvm//:linker",
-        "@llvm//:nvptx_code_gen",  # buildcleaner: keep
-        "@llvm//:objc_arc",  # buildcleaner: keep
-        "@llvm//:scalar",
-        "@llvm//:support",
-        "@llvm//:target",
+        "@llvm-project//llvm:amdgpu_code_gen",
+        "@llvm-project//llvm:analysis",
+        "@llvm-project//llvm:bit_reader",
+        "@llvm-project//llvm:bit_writer",
+        "@llvm-project//llvm:code_gen",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:ipo",
+        "@llvm-project//llvm:ir_reader",
+        "@llvm-project//llvm:linker",
+        "@llvm-project//llvm:nvptx_code_gen",  # buildcleaner: keep
+        "@llvm-project//llvm:objc_arc",  # buildcleaner: keep
+        "@llvm-project//llvm:scalar",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
     ],
 )
 
@@ -68,7 +68,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
old mode 100755
new mode 100644
index fa01d75d35a..d48c36b4b29
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_conv_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h"
 #include "tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h"
 #include "tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h"
@@ -134,6 +135,8 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization(
                                           /*allow_mixed_precision=*/false);
 
     AlgebraicSimplifierOptions options;
+    options.set_cudnn_batchnorm_forward_training_metadata(
+        kCudnnBatchNormForwardTrainingCallTarget);
     pass.AddPass<AlgebraicSimplifier>(options);
   }
 
@@ -432,7 +435,7 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
                 "Can't find ptxas binary in ${CUDA_DIR}/bin.  Will back to the "
                 "GPU driver for PTX -> sass compilation.  This is OK so long "
                 "as you don't see a warning below about an out-of-date driver "
-                "version.",
+                "version. Custom ptxas location can be specified using $PATH.",
                 hlo_module_config);
           }
 
diff --git a/tensorflow/compiler/xla/service/gpu/target_util.cc b/tensorflow/compiler/xla/service/gpu/target_util.cc
index 48c703183fc..49eadd8c6be 100644
--- a/tensorflow/compiler/xla/service/gpu/target_util.cc
+++ b/tensorflow/compiler/xla/service/gpu/target_util.cc
@@ -18,6 +18,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/target_util.h"
 
 #include "absl/strings/str_cat.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/MDBuilder.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index 51a12e1f2fe..d723a1a6927 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -338,6 +338,21 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "gpu_input_fusible_slice_test",
+    srcs = ["gpu_input_fusible_slice_test.cc"],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_module_config",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 xla_test(
     name = "gpu_convolution_regression_test",
     srcs = ["gpu_convolution_regression_test.cc"],
diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_input_fusible_slice_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_input_fusible_slice_test.cc
new file mode 100644
index 00000000000..7f345c19331
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_input_fusible_slice_test.cc
@@ -0,0 +1,158 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+class GpuSliceInputFusionTest : public GpuCodegenTest {
+ protected:
+  GpuSliceInputFusionTest() {}
+
+  HloModuleConfig ConfigWithoutLayoutAssignment() {
+    HloModuleConfig config;
+    auto debug_options = HloTestBase::GetDebugOptionsForTest();
+    // Disable the layout_assignment pass to use the preassigned layouts;
+    // otherwise, the pass throws away the layouts in the fusion computation.
+    debug_options.add_xla_disable_hlo_passes("layout-assignment");
+    config.set_debug_options(debug_options);
+    return config;
+  }
+};
+
+TEST_F(GpuSliceInputFusionTest, InputFusionWithOnlyOneSlice) {
+  const char *const kHloString = R"(
+  HloModule input_fusion_with_only_one_slice
+
+  fused_computation {
+    arg.1 = f16[1024,512]{1,0} parameter(0)
+    arg.2 = f16[1024,512]{1,0} parameter(1)
+    arg1.conv = f32[1024,512]{1,0} convert(arg.1)
+    arg2.conv = f32[1024,512]{1,0} convert(arg.2)
+    add.1 = f32[1024,512]{1,0} add(arg1.conv, arg2.conv)
+    ROOT slice.1 = f32[512,511]{1,0} slice(add.1), slice={[512:1024], [1:512]}
+  }
+
+  ENTRY kernel_entry {
+    arg.1 = f16[1024,512]{1,0} parameter(0)
+    arg.2 = f16[1024,512]{1,0} parameter(1)
+    ROOT fusion = f32[512, 511]{1,0} fusion(arg.1, arg.2), kind=kInput,
+        calls=fused_computation
+  })";
+
+  auto hlo_module =
+      ParseAndReturnVerifiedModule(kHloString, ConfigWithoutLayoutAssignment())
+          .ValueOrDie();
+  CompileAndVerifyIr(std::move(hlo_module),
+                     R"(
+; CHECK-LABEL: define void @fusion
+; CHECK: slice0
+; CHECK: }
+)",
+                     /*match_optimized_ir=*/false);
+  // Check that the kernel runs correctly.
+  EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{0, 0}));
+}
+
+TEST_F(GpuSliceInputFusionTest, InputFusionWithATupleOfSlices) {
+  const char *const kHloString = R"(
+  HloModule input_fusion_with_a_tuple_of_slices
+
+  fused_computation {
+    arg.1 = f16[1024,512]{1,0} parameter(0)
+    arg.2 = f16[1024,512]{1,0} parameter(1)
+    mul.1 = f16[1024,512]{1,0} multiply(arg.1, arg.2)
+    add.1 = f16[1024,512]{1,0} add(mul.1, arg.2)
+    slice.1 = f16[512,511]{1,0} slice(arg.1), slice={[512:1024], [1:512]}
+    slice.2 = f16[0,512]{1,0} slice(add.1), slice={[512:512], [0:512]}
+    slice.3 = f16[1,1]{1,0} slice(add.1), slice={[512:513], [511:512]}
+    ROOT tuple.1 = (f16[512,511]{1,0}, f16[0,512]{1,0}, f16[1,1]{1,0})
+        tuple(slice.1, slice.2, slice.3)
+  }
+
+  ENTRY kernel_entry {
+    arg.1 = f16[1024,512]{1,0} parameter(0)
+    arg.2 = f16[1024,512]{1,0} parameter(1)
+    ROOT fusion = (f16[512,511]{1,0}, f16[0,512]{1,0}, f16[1,1]{1,0})
+        fusion(arg.1, arg.2), kind=kInput, calls=fused_computation
+  })";
+
+  auto hlo_module =
+      ParseAndReturnVerifiedModule(kHloString, ConfigWithoutLayoutAssignment())
+          .ValueOrDie();
+  CompileAndVerifyIr(std::move(hlo_module),
+                     R"(
+; CHECK-LABEL: define void @fusion
+; CHECK: slice2
+; CHECK: }
+)",
+                     /*match_optimized_ir=*/false);
+  // Check that the kernel runs correctly.
+  EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{0, 0}));
+}
+
+TEST_F(GpuSliceInputFusionTest, ConcatThenSplit) {
+  const char *const kHloString = R"(
+  HloModule input_fusion_with_a_tuple_of_slices
+
+  fused_computation {
+    arg.1 = f16[1024]{0} parameter(0)
+    arg.2 = f16[1024]{0} parameter(1)
+    arg.3 = f16[1023]{0} parameter(2)
+    arg.4 = f16[1023]{0} parameter(3)
+    mul.1 = f16[1024]{0} multiply(arg.1, arg.2)
+    add.1 = f16[1023]{0} add(arg.3, arg.4)
+    concat.1 = f16[2047]{0} concatenate(mul.1, add.1), dimensions={0}
+    slice.1 = f16[1024]{0} slice(concat.1), slice={[0:1024]}
+    slice.2 = f16[1023]{0} slice(concat.1), slice={[1024:2047]}
+    slice.3 = f16[0]{0} slice(concat.1), slice={[2047:2047]}
+    ROOT tuple.1 = (f16[1024]{0}, f16[1023]{0}, f16[0]{0})
+        tuple(slice.1, slice.2, slice.3)
+  }
+
+  ENTRY kernel_entry {
+    arg.1 = f16[1024]{0} parameter(0)
+    arg.2 = f16[1024]{0} parameter(1)
+    arg.3 = f16[1023]{0} parameter(2)
+    arg.4 = f16[1023]{0} parameter(3)
+    ROOT fusion = (f16[1024]{0}, f16[1023]{0}, f16[0]{0})
+        fusion(arg.1, arg.2, arg.3, arg.4), kind=kInput, calls=fused_computation
+  })";
+
+  auto hlo_module =
+      ParseAndReturnVerifiedModule(kHloString, ConfigWithoutLayoutAssignment())
+          .ValueOrDie();
+  CompileAndVerifyIr(std::move(hlo_module),
+                     R"(
+; CHECK-LABEL: define void @fusion
+; CHECK: slice2
+; CHECK: }
+)",
+                     /*match_optimized_ir=*/false);
+  // Check that the kernel runs correctly.
+  EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{0, 0}));
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index 65b813b2e24..962be890102 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -31,6 +31,12 @@ namespace xla {
 using absl::flat_hash_map;
 using absl::flat_hash_set;
 
+bool HeapSimulator::Chunk::OverlapsWith(Chunk other_chunk) const {
+  CHECK_NE(size, 0);
+  CHECK_NE(other_chunk.size, 0);
+  return offset < other_chunk.chunk_end() && other_chunk.offset < chunk_end();
+}
+
 /*static*/
 StatusOr<int64> HeapSimulator::MinimumMemoryForModule(
     const HloSchedule& schedule,
@@ -591,8 +597,7 @@ void GlobalDecreasingSizeBestFitHeap::Free(const HloValue* buffer, int64 size) {
 
 using Chunk = HeapSimulator::Chunk;
 
-void GlobalDecreasingSizeBestFitHeap::BufferIntervalTree::Add(
-    int64 start, int64 end, const Chunk& chunk) {
+void BufferIntervalTree::Add(int64 start, int64 end, const Chunk& chunk) {
   node_storage_.emplace_back(
       BufferIntervalTreeNode{start, end, end, chunk, nullptr, nullptr});
 
@@ -620,8 +625,7 @@ void GlobalDecreasingSizeBestFitHeap::BufferIntervalTree::Add(
   }
 }
 
-std::vector<Chunk>
-GlobalDecreasingSizeBestFitHeap::BufferIntervalTree::ChunksOverlappingInTime(
+std::vector<Chunk> BufferIntervalTree::ChunksOverlappingInTime(
     int64 start, int64 end) const {
   std::vector<Chunk> result;
   if (node_storage_.empty()) {
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index ac047de3ec7..2bb0eda249f 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -57,6 +57,8 @@ class HeapSimulator {
     int64 size;
 
     int64 chunk_end() const { return offset + size; }
+
+    bool OverlapsWith(Chunk other_chunk) const;
   };
 
   // Result represents the result of the heap simulation.
@@ -284,6 +286,39 @@ class NoFragmentationStatsHeap : public HeapAlgorithm {
   int64 max_heap_size_ = 0;
 };
 
+// Node in BufferIntervalTree that stores the alloc and free times of a buffer,
+// and the chunk assigned to it.
+struct BufferIntervalTreeNode {
+  // Alloc time.
+  int64 start;
+  // Free time.
+  int64 end;
+  // Maximum free time of all nodes in the subtree where this node is the root.
+  int64 subtree_end;
+  // Allocated chunk for the buffer.
+  HeapSimulator::Chunk chunk;
+  // Left child.
+  BufferIntervalTreeNode* left;
+  // Right child.
+  BufferIntervalTreeNode* right;
+};
+
+// An interval tree that can query buffers overlapping in time.
+class BufferIntervalTree {
+ public:
+  using Chunk = HeapSimulator::Chunk;
+  // Adds a buffer to the interval tree, with the time interval and allocated
+  // chunk specified.
+  void Add(int64 start, int64 end, const Chunk& chunk);
+
+  // Returns vector of allocated chunks that overlap with the given time
+  // interval.
+  std::vector<Chunk> ChunksOverlappingInTime(int64 start, int64 end) const;
+
+ private:
+  std::list<BufferIntervalTreeNode> node_storage_;
+};
+
 // GlobalDecreasingSizeBestFitHeap collects the live intervals of all buffers,
 // then allocates them in decreasing spatial or temporal size regardless of the
 // alloc/free time. It internally tracks the allocated buffers and their live
@@ -334,39 +369,6 @@ class GlobalDecreasingSizeBestFitHeap : public HeapAlgorithm {
   static BufferIntervalCompare GetSpatialBufferIntervalCompare();
 
  protected:
-  // Node in BufferIntervalTree that stores the alloc and free times of a
-  // buffer, and the chunk assigned to it.
-  struct BufferIntervalTreeNode {
-    // Alloc time.
-    int64 start;
-    // Free time.
-    int64 end;
-    // Maximum free time of all nodes in the subtree where this node is the
-    // root.
-    int64 subtree_end;
-    // Allocated chunk for the buffer.
-    HeapSimulator::Chunk chunk;
-    // Left child.
-    BufferIntervalTreeNode* left;
-    // Right child.
-    BufferIntervalTreeNode* right;
-  };
-
-  // An interval tree that can query buffers overlapping in time.
-  class BufferIntervalTree {
-   public:
-    // Adds a buffer to the interval tree, with the time interval and allocated
-    // chunk specified.
-    void Add(int64 start, int64 end, const Chunk& chunk);
-
-    // Returns vector of allocated chunks that overlap with the given time
-    // interval.
-    std::vector<Chunk> ChunksOverlappingInTime(int64 start, int64 end) const;
-
-   private:
-    std::list<BufferIntervalTreeNode> node_storage_;
-  };
-
   // The candidate contains a chunk and the resultant heap size if this
   // chunk is to be committed.
   struct ChunkCandidate {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index f8fbaf19c5c..4322c26b2de 100755
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2987,8 +2987,8 @@ static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
         visitor->GetVisitState(current_id);
     if (visit_state == Visitor::kVisited) {
       dfs_stack.pop_back();
-      VLOG(3) << "Not visiting HLO %" << current_node->name()
-              << " as it was already visited.";
+      VLOG(3) << "Not visiting HLO (id = " << current_id
+              << ") as it was already visited.";
       continue;
     }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 2ab606d7100..104bca8e876 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1792,6 +1792,10 @@ class HloInstruction {
   // Delegates to HloCholeskyInstruction::cholesky_options().
   const CholeskyOptions& cholesky_options() const;
 
+  // Appends operand to the list of operands and adds this instruction as a user
+  // of the operand.
+  void AppendOperand(HloInstruction* operand);
+
   // Old methods kept for smooth subclassing transition END.
 
  protected:
@@ -1831,10 +1835,6 @@ class HloInstruction {
   // by factory methods.
   HloInstruction(HloOpcode opcode, const Shape& shape);
 
-  // Appends operand to the list of operands and adds this instruction as a user
-  // of the operand.
-  void AppendOperand(HloInstruction* operand);
-
   void RemoveOperandAt(int index) {
     operands_.erase(operands_.begin() + index);
   }
diff --git a/tensorflow/compiler/xla/service/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc
index 5de3717e26c..bc1745a0791 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/service/hlo_lexer.cc
@@ -280,7 +280,6 @@ TokKind HloLexer::LexIdentifier() {
   KEYWORD(ROOT);
   KEYWORD(maximal);
   KEYWORD(replicated);
-  KEYWORD(sparse);
 
 #undef KEYWORD
 
@@ -496,8 +495,6 @@ string TokKindToString(TokKind kind) {
       return "kw_inf";
     case TokKind::kNegInf:
       return "kNegInf";
-    case TokKind::kw_sparse:
-      return "kw_sparse";
     case TokKind::kPrimitiveType:
       return "kPrimitiveType";
     case TokKind::kName:
diff --git a/tensorflow/compiler/xla/service/hlo_lexer.h b/tensorflow/compiler/xla/service/hlo_lexer.h
index d4a49fea200..6a59f180ad8 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.h
+++ b/tensorflow/compiler/xla/service/hlo_lexer.h
@@ -63,7 +63,6 @@ enum class TokKind {
   kw_replicated,
   kw_nan,
   kw_inf,
-  kw_sparse,
 
   kNegInf,  // -inf
 
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index b05f76a1d29..ecb25298288 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -72,10 +72,6 @@ HloSchedule ScheduleFromInstructionOrder(HloModule* module) {
   return schedule;
 }
 
-// Some functions accept either a linear index or a multi-dimensional index
-// (used for indexing into sparse literals).
-using LinearOrMultiIndex = absl::variant<int64, absl::Span<const int64>>;
-
 // Parser for the HloModule::ToString() format text.
 class HloParserImpl : public HloParser {
  public:
@@ -137,24 +133,21 @@ class HloParserImpl : public HloParser {
   bool ParseTupleLiteral(Literal* literal, const Shape& shape);
   bool ParseNonTupleLiteral(Literal* literal, const Shape& shape);
   bool ParseDenseLiteral(Literal* literal, const Shape& shape);
-  bool ParseSparseLiteral(Literal* literal, const Shape& shape);
 
-  // Sets the sub-value of literal at the given linear or sparse index to the
-  // given value. If the literal is dense, it myst have the default layout.
+  // Sets the sub-value of literal at the given linear index to the
+  // given value. If the literal is dense, it must have the default layout.
   //
   // `loc` should be the source location of the value.
-  bool SetValueInLiteral(LocTy loc, int64 value, LinearOrMultiIndex index,
+  bool SetValueInLiteral(LocTy loc, int64 value, int64 index, Literal* literal);
+  bool SetValueInLiteral(LocTy loc, double value, int64 index,
                          Literal* literal);
-  bool SetValueInLiteral(LocTy loc, double value, LinearOrMultiIndex index,
+  bool SetValueInLiteral(LocTy loc, bool value, int64 index, Literal* literal);
+  bool SetValueInLiteral(LocTy loc, std::complex<double> value, int64 index,
                          Literal* literal);
-  bool SetValueInLiteral(LocTy loc, bool value, LinearOrMultiIndex index,
-                         Literal* literal);
-  bool SetValueInLiteral(LocTy loc, std::complex<double> value,
-                         LinearOrMultiIndex index, Literal* literal);
   // `loc` should be the source location of the value.
   template <typename LiteralNativeT, typename ParsedElemT>
-  bool SetValueInLiteralHelper(LocTy loc, ParsedElemT value,
-                               LinearOrMultiIndex index, Literal* literal);
+  bool SetValueInLiteralHelper(LocTy loc, ParsedElemT value, int64 index,
+                               Literal* literal);
 
   // Checks whether the given value is within the range of LiteralNativeT.
   // `loc` should be the source location of the value.
@@ -2125,8 +2118,7 @@ bool HloParserImpl::ParseInstructionNames(
                     "expects '}' at the end of instruction name list");
 }
 
-bool HloParserImpl::SetValueInLiteral(LocTy loc, int64 value,
-                                      LinearOrMultiIndex index,
+bool HloParserImpl::SetValueInLiteral(LocTy loc, int64 value, int64 index,
                                       Literal* literal) {
   const Shape& shape = literal->shape();
   switch (shape.element_type()) {
@@ -2160,8 +2152,7 @@ bool HloParserImpl::SetValueInLiteral(LocTy loc, int64 value,
   }
 }
 
-bool HloParserImpl::SetValueInLiteral(LocTy loc, double value,
-                                      LinearOrMultiIndex index,
+bool HloParserImpl::SetValueInLiteral(LocTy loc, double value, int64 index,
                                       Literal* literal) {
   const Shape& shape = literal->shape();
   switch (shape.element_type()) {
@@ -2180,8 +2171,7 @@ bool HloParserImpl::SetValueInLiteral(LocTy loc, double value,
   }
 }
 
-bool HloParserImpl::SetValueInLiteral(LocTy loc, bool value,
-                                      LinearOrMultiIndex index,
+bool HloParserImpl::SetValueInLiteral(LocTy loc, bool value, int64 index,
                                       Literal* literal) {
   const Shape& shape = literal->shape();
   switch (shape.element_type()) {
@@ -2194,8 +2184,7 @@ bool HloParserImpl::SetValueInLiteral(LocTy loc, bool value,
 }
 
 bool HloParserImpl::SetValueInLiteral(LocTy loc, std::complex<double> value,
-                                      LinearOrMultiIndex index,
-                                      Literal* literal) {
+                                      int64 index, Literal* literal) {
   const Shape& shape = literal->shape();
   switch (shape.element_type()) {
     case C64:
@@ -2221,54 +2210,21 @@ std::string StringifyValue(std::complex<double> val) {
 
 template <typename LiteralNativeT, typename ParsedElemT>
 bool HloParserImpl::SetValueInLiteralHelper(LocTy loc, ParsedElemT value,
-                                            LinearOrMultiIndex index,
-                                            Literal* literal) {
+                                            int64 index, Literal* literal) {
   if (!CheckParsedValueIsInRange<LiteralNativeT>(loc, value)) {
     return false;
   }
 
   // Check that the index is in range and assign into the literal
-  if (auto* linear_index = absl::get_if<int64>(&index)) {
-    if (*linear_index >= ShapeUtil::ElementsIn(literal->shape())) {
-      return Error(loc, StrCat("trys to set value ", StringifyValue(value),
-                               " to a literal in shape ",
-                               ShapeUtil::HumanString(literal->shape()),
-                               " at linear index ", *linear_index,
-                               ", but the index is out of range"));
-    }
-    literal->data<LiteralNativeT>().at(*linear_index) =
-        static_cast<LiteralNativeT>(value);
-  } else {
-    auto* multi_index = absl::get_if<absl::Span<const int64>>(&index);
-    CHECK(multi_index != nullptr);
-
-    auto invalid_idx = [&](std::string msg) {
-      return Error(loc, StrFormat("Invalid sparse index [%s]. %s",
-                                  absl::StrJoin(*multi_index, ", "), msg));
-    };
-
-    const auto& shape = literal->shape();
-    if (shape.rank() != multi_index->size()) {
-      return invalid_idx(
-          StrFormat("Has rank %d, but constant has shape %s, which has rank %d",
-                    multi_index->size(), shape.ToString(), shape.rank()));
-    }
-    for (int64 i = 0; i < shape.rank(); ++i) {
-      auto idx = (*multi_index)[i];
-      if (idx < 0) {
-        return invalid_idx(StrFormat(
-            "Sub-index value at %d, namely %d, cannot be negative.", i, idx));
-      }
-      if (idx >= shape.dimensions(i)) {
-        return invalid_idx(
-            StrFormat("Sub-index at %d, namely %d, doesn't fit within shape "
-                      "dimension %d in %s",
-                      i, idx, shape.dimensions(i), shape.ToString()));
-      }
-    }
-    literal->AppendSparseElement(*multi_index,
-                                 static_cast<LiteralNativeT>(value));
+  if (index >= ShapeUtil::ElementsIn(literal->shape())) {
+    return Error(loc, StrCat("trys to set value ", StringifyValue(value),
+                             " to a literal in shape ",
+                             ShapeUtil::HumanString(literal->shape()),
+                             " at linear index ", index,
+                             ", but the index is out of range"));
   }
+  literal->data<LiteralNativeT>().at(index) =
+      static_cast<LiteralNativeT>(value);
   return true;
 }
 
@@ -2314,12 +2270,8 @@ bool HloParserImpl::ParseTupleLiteral(Literal* literal, const Shape& shape) {
 // non_tuple
 //   ::= rank01
 //   ::= rank2345
-// rank2345 ::= shape sparse_or_nested_array
+// rank2345 ::= shape nested_array
 bool HloParserImpl::ParseNonTupleLiteral(Literal* literal, const Shape& shape) {
-  if (LayoutUtil::IsSparseArray(shape)) {
-    return ParseSparseLiteral(literal, shape);
-  }
-
   CHECK(LayoutUtil::IsDenseArray(shape)) << shape.ToString(true);
   return ParseDenseLiteral(literal, shape);
 }
@@ -2500,98 +2452,6 @@ bool HloParserImpl::ParseDenseLiteral(Literal* literal, const Shape& shape) {
   return true;
 }
 
-bool HloParserImpl::ParseSparseLiteral(Literal* literal, const Shape& shape) {
-  *literal = Literal(shape);
-  if (!ParseToken(TokKind::kLbrace,
-                  "expects '{' at the beginning of a sparse literal")) {
-    return false;
-  }
-
-  for (;;) {
-    if (lexer_.GetKind() == TokKind::kRbrace) {
-      lexer_.Lex();
-      break;
-    }
-
-    std::vector<int64> index;
-    if (lexer_.GetKind() == TokKind::kInt) {
-      int64 single_index = lexer_.GetInt64Val();
-      lexer_.Lex();
-      index.push_back(single_index);
-    } else {
-      if (!ParseInt64List(TokKind::kLsquare, TokKind::kRsquare, TokKind::kComma,
-                          &index)) {
-        return false;
-      }
-    }
-    if (!ParseToken(TokKind::kColon,
-                    "expects ':' after after the sparse array index and before "
-                    "the sparse array value")) {
-      return false;
-    }
-
-    LocTy value_loc = lexer_.GetLoc();
-    if (lexer_.GetKind() == TokKind::kw_true ||
-        lexer_.GetKind() == TokKind::kw_false) {
-      bool value = lexer_.GetKind() == TokKind::kw_true;
-      if (!SetValueInLiteral(lexer_.GetLoc(), value, index, literal)) {
-        return false;
-      }
-      lexer_.Lex();
-    } else if (primitive_util::IsIntegralType(shape.element_type())) {
-      int64 value;
-      if (!ParseInt64(&value)) {
-        return Error(value_loc,
-                     StrCat("expects integer for primitive type: ",
-                            PrimitiveType_Name(shape.element_type())));
-      }
-      if (!SetValueInLiteral(value_loc, value, index, literal)) {
-        return false;
-      }
-    } else if (primitive_util::IsFloatingPointType(shape.element_type())) {
-      double value;
-      if (!ParseDouble(&value)) {
-        return Error(value_loc,
-                     StrCat("expects floating point value for primitive type: ",
-                            PrimitiveType_Name(shape.element_type())));
-      }
-      if (!SetValueInLiteral(value_loc, value, index, literal)) {
-        return false;
-      }
-    } else if (primitive_util::IsComplexType(shape.element_type())) {
-      std::complex<double> value;
-      if (!ParseComplex(&value)) {
-        return Error(value_loc,
-                     StrCat("expects complex value for primitive type: ",
-                            PrimitiveType_Name(shape.element_type())));
-      }
-      if (!SetValueInLiteral(value_loc, value, index, literal)) {
-        return false;
-      }
-    } else {
-      LOG(FATAL) << "Unexpected element type: "
-                 << PrimitiveType_Name(shape.element_type());
-    }
-
-    if (lexer_.GetKind() != TokKind::kRbrace &&
-        !ParseToken(TokKind::kComma,
-                    "expects ',' separator between sparse array elements")) {
-      return false;
-    }
-
-    if (literal->sparse_element_count() + 1 ==
-        LayoutUtil::MaxSparseElements(shape.layout())) {
-      return Error(
-          lexer_.GetLoc(),
-          StrCat("number of sparse elements exceeds maximum for layout: ",
-                 ShapeUtil::HumanStringWithLayout(shape)));
-    }
-  }
-
-  literal->SortSparseElements();
-  return true;
-}
-
 // MaxFiniteValue is a type-traits helper used by
 // HloParserImpl::CheckParsedValueIsInRange.
 template <typename T>
@@ -2615,18 +2475,37 @@ struct MinMaxFiniteValue<bfloat16> {
   static double min() { return -max(); }
 };
 
+// MSVC's standard C++ library does not define isnan/isfinite for integer types.
+// To work around that we will need to provide our own.
+template <typename T>
+std::enable_if_t<std::is_floating_point<T>::value, bool> IsFinite(T val) {
+  return std::isfinite(val);
+}
+template <typename T>
+std::enable_if_t<std::is_floating_point<T>::value, bool> IsNaN(T val) {
+  return std::isnan(val);
+}
+template <typename T>
+std::enable_if_t<std::is_integral<T>::value, bool> IsFinite(T val) {
+  return std::isfinite(static_cast<double>(val));
+}
+template <typename T>
+std::enable_if_t<std::is_integral<T>::value, bool> IsNaN(T val) {
+  return std::isnan(static_cast<double>(val));
+}
+
 template <typename LiteralNativeT, typename ParsedElemT>
 bool HloParserImpl::CheckParsedValueIsInRange(LocTy loc, ParsedElemT value) {
   if (std::is_floating_point<ParsedElemT>::value) {
     auto value_as_native_t = static_cast<LiteralNativeT>(value);
     auto value_double_converted = static_cast<ParsedElemT>(value_as_native_t);
-    if (!std::isfinite(value) || std::isfinite(value_double_converted)) {
+    if (!IsFinite(value) || IsFinite(value_double_converted)) {
       value = value_double_converted;
     }
   }
   PrimitiveType literal_ty =
       primitive_util::NativeToPrimitiveType<LiteralNativeT>();
-  if (std::isnan(value) ||
+  if (IsNaN(value) ||
       (std::numeric_limits<ParsedElemT>::has_infinity &&
        (std::numeric_limits<ParsedElemT>::infinity() == value ||
         -std::numeric_limits<ParsedElemT>::infinity() == value))) {
@@ -3820,21 +3699,6 @@ bool HloParserImpl::ParseShape(Shape* result) {
   }
   LayoutUtil::SetToDefaultLayout(result);
 
-  if (lexer_.GetKind() == TokKind::kw_sparse) {
-    lexer_.Lex();
-    const std::string message =
-        "expects a brace-bracketed integer for sparse layout";
-    int64 max_sparse_elements;
-    if (!ParseToken(TokKind::kLbrace, message) ||
-        !ParseInt64(&max_sparse_elements) ||
-        !ParseToken(TokKind::kRbrace, message)) {
-      return false;
-    }
-    *result->mutable_layout() =
-        LayoutUtil::MakeSparseLayout(max_sparse_elements);
-    return true;
-  }
-
   // We need to lookahead to see if a following open brace is the start of a
   // layout. The specific problematic case is:
   //
@@ -4386,6 +4250,7 @@ bool HloParserImpl::ParseSingleInstruction(HloModule* module) {
   for (auto& comp : computations_) {
     module->AddEmbeddedComputation(std::move(comp));
   }
+  TF_CHECK_OK(module->set_schedule(ScheduleFromInstructionOrder(module)));
   return true;
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index d65613fc4b8..e3431a4731f 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -841,50 +841,6 @@ ENTRY %fusion.v3 () -> f32[3,2,1,1] {
 )"
 },
 {
-"Sparse",
-R"(HloModule sparse_f32
-
-ENTRY %sparse () -> f32[2,3,4] {
-  ROOT %foo = f32[2,3,4]sparse{10} constant({[0, 1, 2]: 1, [1, 2, 2]: 2, [1, 2, 3]: 3})
-}
-
-)",
-/*enable_verification=*/false
-},
-{
-"SparseC128",
-R"(HloModule sparse_c128
-
-ENTRY %sparse () -> c128[2,3,4] {
-  ROOT %foo = c128[2,3,4]sparse{10} constant({[0, 1, 2]: (1, 0), [1, 2, 2]: (2, 5), [1, 2, 3]: (3, 10)})
-}
-
-)",
-/*enable_verification=*/false
-},
-{
-"SparseEmpty",
-R"(HloModule sparse_f32_empty
-
-ENTRY %sparse_f32_empty () -> f32[2,3,4] {
-  ROOT %foo = f32[2,3,4]sparse{10} constant({})
-}
-
-)",
-/*enable_verification=*/false,
-},
-{
-"SparseR1",
-R"(HloModule sparse_f32_r1
-
-ENTRY %sparse_f32_r1 () -> f32[9] {
-  ROOT %foo = f32[9]sparse{10} constant({1: 2, 3: 4, 5: 6})
-}
-
-)",
-/*enable_verification=*/false,
-},
-{
 "Gather",
 R"(HloModule StringifyGather
 
@@ -1982,17 +1938,6 @@ TEST_F(HloParserTest, ConstantBf16Overflow) {
       "out of range");
 }
 
-TEST_F(HloParserTest, ConstantF16OverflowInSparseArray) {
-  const string original = R"(
-    HloModule test_module
-    ENTRY test {
-      ROOT c = f16[5]sparse{10} constant({[0]: 0, [1]: -65520})
-    })";
-  ExpectHasSubstr(
-      ParseAndReturnUnverifiedModule(original).status().error_message(),
-      "is out of range for literal's primitive type F16");
-}
-
 TEST_F(HloParserTest, ConstantUnsignedUnderflow) {
   const string original = R"(
       HloModule ConstantUnsignedUnderflow_module
@@ -2852,50 +2797,6 @@ ENTRY %entrycomp (p: f32[2,2]) -> f32[2,2] {
                   " with the shape of the operand instruction f32[2,2]{1,0}.");
 }
 
-TEST_F(HloParserTest, OutOfRangeSparseIndex) {
-  const string original = R"(
-    HloModule test_module
-    ENTRY test {
-      ROOT c = f16[5]sparse{10} constant({[100]: 0})
-    })";
-  ExpectHasSubstr(
-      ParseAndReturnUnverifiedModule(original).status().error_message(),
-      "Invalid sparse index");
-}
-
-TEST_F(HloParserTest, NegativeSparseIndex) {
-  const string original = R"(
-    HloModule test_module
-    ENTRY test {
-      ROOT c = f16[5]sparse{10} constant({-1: 0})
-    })";
-  ExpectHasSubstr(
-      ParseAndReturnUnverifiedModule(original).status().error_message(),
-      "Invalid sparse index");
-}
-
-TEST_F(HloParserTest, SparseIndexWithRankTooLarge) {
-  const string original = R"(
-    HloModule test_module
-    ENTRY test {
-      ROOT c = f16[5]sparse{10} constant({[0, 0]: 0})
-    })";
-  ExpectHasSubstr(
-      ParseAndReturnUnverifiedModule(original).status().error_message(),
-      "Invalid sparse index");
-}
-
-TEST_F(HloParserTest, SparseIndexWithRankTooSmall) {
-  const string original = R"(
-    HloModule test_module
-    ENTRY test {
-      ROOT c = f16[5, 5]sparse{10} constant({[0]: 0})
-    })";
-  ExpectHasSubstr(
-      ParseAndReturnUnverifiedModule(original).status().error_message(),
-      "Invalid sparse index");
-}
-
 TEST_F(HloParserTest, ParseShapeStringR2F32) {
   string shape_string = "f32[123,456]";
   TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
@@ -2994,15 +2895,6 @@ TEST_F(HloParserTest, ParseShapeStringWithTilingLayout) {
                   "Dimensions size is 3, but minor to major size is 1.");
 }
 
-TEST_F(HloParserTest, ParseShapeStringWithSparseLayout) {
-  string shape_string = "f32[123,456]sparse{10}";
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
-  Shape expected = ShapeUtil::MakeShapeWithSparseLayout(F32, {123, 456}, 10);
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual: " << ShapeUtil::HumanString(actual);
-}
-
 TEST_F(HloParserTest, ParseShapeStringWithMemorySpaceLayout) {
   // Tile, element size, and memory space.
   string shape_string = "pred[123,456]{1,0:T(2,128)E(1)S(3)}";
@@ -3047,10 +2939,8 @@ TEST_F(HloParserTest, ParseTokenType) {
 }
 
 TEST_F(HloParserTest, ParseInvalidShapeString) {
-  string shape_strings[] = {
-      "f32[123,456]foobar{0,1}", "f32[123,456]sparse{0,1}", "f32[123,456]{foo}",
-      "f32[123,456]dense{foo}",  "f32[123,456]sparse{foo}",
-  };
+  string shape_strings[] = {"f32[123,456]foobar{0,1}", "f32[123,456]{foo}",
+                            "f32[123,456]dense{foo}"};
   for (const string& shape_string : shape_strings) {
     StatusOr<Shape> result = ParseShape(shape_string);
     ASSERT_FALSE(result.ok()) << "shape: " << shape_string;
diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc
index defd6abd8f6..1b6494bf3cb 100644
--- a/tensorflow/compiler/xla/service/hlo_query.cc
+++ b/tensorflow/compiler/xla/service/hlo_query.cc
@@ -133,5 +133,17 @@ bool ContainsLayoutConstrainedAllReduce(const HloModule& module) {
   return false;
 }
 
+int64 NextChannelId(const HloModule& module) {
+  int64 next_channel_id = 1;
+  for (const HloComputation* comp : module.computations()) {
+    for (const HloInstruction* hlo : comp->instructions()) {
+      if (DynCast<HloChannelInstruction>(hlo)) {
+        next_channel_id = std::max(next_channel_id, *hlo->channel_id() + 1);
+      }
+    }
+  }
+  return next_channel_id;
+}
+
 }  // namespace hlo_query
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_query.h b/tensorflow/compiler/xla/service/hlo_query.h
index 0ea36ae83f8..b7fbc465dcb 100644
--- a/tensorflow/compiler/xla/service/hlo_query.h
+++ b/tensorflow/compiler/xla/service/hlo_query.h
@@ -77,6 +77,10 @@ bool MatchBinaryInstructionOperandOpcode(HloOpcode opcode,
 // layout.
 bool ContainsLayoutConstrainedAllReduce(const HloModule& module);
 
+// Returns the next available channel id that can be used in the given module
+// (for HloChannelInstructions).
+int64 NextChannelId(const HloModule& module);
+
 }  // namespace hlo_query
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 445a3ea97d2..5d38bbeec32 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -370,7 +370,8 @@ class MemoryUsageTracker {
       const HloRematerialization::ShapeSizeFunction& size_function,
       const HloRematerialization::CompactShapeFunction& compact_shape_function,
       const TuplePointsToAnalysis& points_to_analysis,
-      const InstructionList& instruction_list);
+      const InstructionList& instruction_list,
+      HloRematerialization::RematerializationMode mode);
 
   // Starts the placement of the given instruction. This adds the sizes of the
   // LogicalBuffers defined by the instruction to the current memory
@@ -607,6 +608,7 @@ class MemoryUsageTracker {
   // between the calling of BeginInstruction and EndInstruction.
   Item* in_progress_item_ = nullptr;
 
+  HloRematerialization::RematerializationMode mode_;
   // All buffers in the computation.
   std::vector<Buffer> buffers_;
 };
@@ -616,11 +618,13 @@ MemoryUsageTracker::MemoryUsageTracker(
     const HloRematerialization::ShapeSizeFunction& size_function,
     const HloRematerialization::CompactShapeFunction& compact_shape_function,
     const TuplePointsToAnalysis& points_to_analysis,
-    const InstructionList& instruction_list)
+    const InstructionList& instruction_list,
+    HloRematerialization::RematerializationMode mode)
     : computation_(computation),
       instruction_list_(instruction_list),
       size_function_(size_function),
-      compact_shape_function_(compact_shape_function) {
+      compact_shape_function_(compact_shape_function),
+      mode_(mode) {
   PointsToSet::BufferSet live_out_set =
       points_to_analysis.GetPointsToSet(computation_->root_instruction())
           .CreateFlattenedSet();
@@ -1155,7 +1159,10 @@ MemoryUsageTracker::PickRematerializationCandidate(
       continue;
     }
 
-    if (item->buffers_output.size() == 1) {
+    if (item->buffers_output.size() == 1 &&
+        (mode_ == HloRematerialization::RematerializationMode::kCompressOnly ||
+         mode_ == HloRematerialization::RematerializationMode::
+                      kRecomputeAndCompress)) {
       // Only consider compressing single output instruction.
       const Buffer& output_buffer = buffers_.at(item->buffers_output[0]);
 
@@ -1196,6 +1203,11 @@ MemoryUsageTracker::PickRematerializationCandidate(
       continue;
     }
 
+    // Do not consider recomputation in compress-only mode.
+    if (mode_ == HloRematerialization::RematerializationMode::kCompressOnly) {
+      continue;
+    }
+
     const int64 memory_reduced = MemoryReducedIfRematerialized(item);
 
     if (memory_reduced > 0) {
@@ -1370,7 +1382,7 @@ StatusOr<int64> HloRematerialization::ComputePeakMemory(
   InstructionList instruction_list(order);
   MemoryUsageTracker tracker(computation, size_function_,
                              compact_shape_function_, *points_to_analysis_,
-                             instruction_list);
+                             instruction_list, mode_);
   int64 peak_memory = tracker.memory_usage();
   for (auto* item = instruction_list.first(); item != nullptr;
        item = instruction_list.next(item)) {
@@ -1412,9 +1424,9 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
   CHECK(!ContainsKey(rematerialized_computations_, computation));
 
   InstructionList instruction_list(schedule->sequence(computation));
-  MemoryUsageTracker memory_tracker(computation, size_function_,
-                                    compact_shape_function_,
-                                    *points_to_analysis_, instruction_list);
+  MemoryUsageTracker memory_tracker(
+      computation, size_function_, compact_shape_function_,
+      *points_to_analysis_, instruction_list, mode_);
   bool changed = false;
 
   // If the rematerialization makes the source instruction dead, then the
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 9ab34b4862d..69cdc84991b 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -49,6 +49,13 @@ class HloRematerialization : public HloModulePass {
     int64 after_bytes;
   };
 
+  // Mode in which the rematerialization algorithm should be run.
+  enum class RematerializationMode {
+    kRecomputeOnly,        // Only consider the kCompress RematStrategy.
+    kCompressOnly,         // Only consider the kRecompute RematStrategy.
+    kRecomputeAndCompress  // Consider both kRecompute and kRemat.
+  };
+
   static Shape DefaultCompactShapeFunction(const Shape& shape) { return shape; }
 
   // Constructor parameters:
@@ -69,13 +76,15 @@ class HloRematerialization : public HloModulePass {
   explicit HloRematerialization(
       const ShapeSizeFunction& size_function, int64 memory_limit_bytes,
       RematerializationSizes* sizes,
-      CompactShapeFunction compact_shape_function = nullptr)
+      CompactShapeFunction compact_shape_function = nullptr,
+      RematerializationMode mode = RematerializationMode::kRecomputeAndCompress)
       : size_function_(size_function),
         memory_limit_bytes_(memory_limit_bytes),
         sizes_(sizes),
         compact_shape_function_(compact_shape_function == nullptr
                                     ? DefaultCompactShapeFunction
-                                    : std::move(compact_shape_function)) {}
+                                    : std::move(compact_shape_function)),
+        mode_(mode) {}
   ~HloRematerialization() override = default;
 
   absl::string_view name() const override { return "rematerialization"; }
@@ -152,6 +161,8 @@ class HloRematerialization : public HloModulePass {
   // uses of the original instruction and the original instruction is
   // dead. Hence, no net instructions were added.
   int64 net_instructions_added_ = 0;
+
+  RematerializationMode mode_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 1218f7dfc6f..b2beb9dda55 100755
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -33,17 +33,6 @@ limitations under the License.
 
 namespace xla {
 
-Status VerifyNotSparse(const Shape& shape) {
-  return ShapeUtil::ForEachSubshapeWithStatus(
-      shape, [](const Shape& subshape, const ShapeIndex&) -> Status {
-        if (LayoutUtil::IsSparseArray(subshape)) {
-          return InternalError("Sparse arrays are not yet fully supported: %s",
-                               ShapeUtil::HumanStringWithLayout(subshape));
-        }
-        return Status::OK();
-      });
-}
-
 bool IsCallerInstruction(HloInstruction* hlo) {
   switch (hlo->opcode()) {
     case HloOpcode::kCall:
@@ -93,8 +82,6 @@ Status ShapeVerifier::Preprocess(HloInstruction* hlo) {
         "Called computations specified for non-caller instruction  %s",
         hlo->ToString());
   }
-  TF_RETURN_IF_ERROR(VerifyNotSparse(hlo->shape()));
-
   absl::optional<int> arity = HloOpcodeArity(hlo->opcode());
   if (arity) {
     TF_RETURN_IF_ERROR(CheckOperandCount(hlo, *arity));
@@ -1109,8 +1096,6 @@ Status ShapeVerifier::VerifyEntryComputationLayout(const HloModule& module) {
   TF_RETURN_IF_ERROR(
       ShapeUtil::ValidateShapeWithOptionalLayout(result_layout.shape()));
 
-  TF_RETURN_IF_ERROR(VerifyNotSparse(result_layout.shape()));
-
   if (!ShapeUtil::Compatible(computation->root_instruction()->shape(),
                              result_layout.shape())) {
     return InternalError(
@@ -1131,7 +1116,6 @@ Status ShapeVerifier::VerifyEntryComputationLayout(const HloModule& module) {
     const HloInstruction* parameter = computation->parameter_instruction(i);
     TF_RETURN_IF_ERROR(
         ShapeUtil::ValidateShapeWithOptionalLayout(layout.parameter_shape(i)));
-    TF_RETURN_IF_ERROR(VerifyNotSparse(layout.parameter_shape(i)));
     if (!ShapeUtil::Compatible(parameter->shape(), layout.parameter_shape(i))) {
       return InternalError(
           "Shape of the entry computation parameter %d is %s should be "
@@ -1333,37 +1317,24 @@ Status VerifyLayoutConstrainedAllReduce(const HloModule& module) {
   return Status::OK();
 }
 
-// Checks various invariants of send and recv instructions.
-Status VerifySendsAndRecvs(const HloModule& module) {
-  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
-  // Host send/recv instructions must have their own unique channel.
-  auto check_unique_host_channel = [&](const HloInstruction* instruction) {
-    const HloSendRecvInstruction* sendrecv =
-        DynCast<const HloSendRecvInstruction>(instruction);
-    if (sendrecv->is_host_transfer()) {
-      auto it_inserted =
-          host_channels.insert({*sendrecv->channel_id(), sendrecv});
-      if (!it_inserted.second) {
-        return FailedPrecondition(
-            "Channel %d is used for multiple host send/recv instructions: "
-            "%s "
-            "and "
-            "%s",
-            *sendrecv->channel_id(), sendrecv->ToString(),
-            it_inserted.first->second->ToString());
-      }
-    }
-
-    return Status::OK();
-  };
+// Checks various invariants of channel instructions (send/recv and
+// collectives).
+Status VerifyChannels(const HloModule& module) {
+  absl::flat_hash_map<int64, std::vector<const HloInstruction*>>
+      channel_instructions;
 
   // Send/Recv instruction must have a single user: the corresponding
   // SendDone/RecvDone. with matching channel.
   for (const HloComputation* computation : module.computations()) {
     for (const HloInstruction* instruction : computation->instructions()) {
+      auto channel_instr = DynCast<HloChannelInstruction>(instruction);
+      if (!channel_instr || !channel_instr->channel_id()) {
+        continue;
+      }
+      channel_instructions[*channel_instr->channel_id()].push_back(instruction);
+
       switch (instruction->opcode()) {
         case HloOpcode::kSend: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
           TF_RET_CHECK(instruction->users().size() == 1);
           const HloInstruction* send_done = instruction->users().front();
           TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
@@ -1372,7 +1343,6 @@ Status VerifySendsAndRecvs(const HloModule& module) {
           break;
         }
         case HloOpcode::kRecv: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
           TF_RET_CHECK(instruction->users().size() == 1);
           const HloInstruction* recv_done = instruction->users().front();
           TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
@@ -1393,6 +1363,39 @@ Status VerifySendsAndRecvs(const HloModule& module) {
       }
     }
   }
+
+  // Iterate over each channel to check invariants.
+  for (auto& pair : channel_instructions) {
+    auto& instructions = pair.second;
+    const HloInstruction* first = instructions[0];
+    auto sendrecv = DynCast<HloSendRecvInstruction>(first);
+    if (sendrecv) {
+      absl::flat_hash_set<HloOpcode> opcodes;
+      for (const HloInstruction* instr : instructions) {
+        opcodes.insert(instr->opcode());
+        auto cast = DynCast<HloSendRecvInstruction>(instr);
+        TF_RET_CHECK(cast != nullptr)
+            << "channel " << pair.first
+            << " is used for different types of channel instructions";
+      }
+      if (sendrecv->is_host_transfer()) {
+        TF_RET_CHECK(instructions.size() == 2)
+            << "channel " << pair.first
+            << " is used for multiple host send/recv instructions";
+      } else {
+        TF_RET_CHECK(instructions.size() == opcodes.size())
+            << "channel " << pair.first
+            << " is used for multiple send/recv instructions";
+      }
+    } else {
+      for (const HloInstruction* instr : instructions) {
+        TF_RET_CHECK(first->opcode() == instr->opcode())
+            << "channel " << pair.first
+            << " is used for different types of channel instructions";
+      }
+    }
+  }
+
   return Status::OK();
 }
 
@@ -1696,7 +1699,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
 
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifyAsynchronousCopies(*module));
-  TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
+  TF_RETURN_IF_ERROR(VerifyChannels(*module));
 
   std::unique_ptr<ShapeVerifier> shape_verifier =
       target_metadata_->GetVerifier();
diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
index 1b273909991..c174af6dec0 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
@@ -1013,5 +1013,56 @@ TEST_F(HloVerifierTest, AllReduceVerifier) {
       HasSubstr("mix of layout constrained and unconstrained AllReduce"));
 }
 
+TEST_F(HloVerifierTest, ChannelVerifier) {
+  const char* const kModuleStr = R"(
+  HloModule test
+
+  add {
+    lhs = f32[] parameter(0)
+    rhs = f32[] parameter(1)
+    ROOT add = f32[] add(lhs, rhs)
+  }
+
+  ENTRY entry {
+    %input = f32[8,12] parameter(0)
+    %token0 = token[] after-all()
+    %send = (f32[8,12], u32[], token[]) send(%input, %token0), channel_id=1
+    %send-done = token[] send-done(%send), channel_id=1
+    %crs = f32[8,12] all-reduce(%input), replica_groups={}, to_apply=add,
+      channel_id=1
+    ROOT result = (f32[8,12]{0,1}, f32[8,12]{0,1}) tuple(%input, %crs)
+  }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnUnverifiedModule(kModuleStr));
+  EXPECT_THAT(verifier().Run(module.get()).status().error_message(),
+              HasSubstr("used for different types of channel instructions"));
+}
+
+TEST_F(HloVerifierTest, CollectiveChannelVerifier) {
+  const char* const kModuleStr = R"(
+  HloModule test
+
+  add {
+    lhs = f32[] parameter(0)
+    rhs = f32[] parameter(1)
+    ROOT add = f32[] add(lhs, rhs)
+  }
+
+  ENTRY entry {
+    %input = f32[8,12] parameter(0)
+    %permute = f32[8,12] collective-permute(%input),
+      source_target_pairs={{0,1},{1,0}}, channel_id=1
+    %crs = f32[8,12] all-reduce(%input), replica_groups={}, to_apply=add,
+      channel_id=1
+    ROOT result = (f32[8,12]{0,1}, f32[8,12]{0,1}) tuple(%permute, %crs)
+  }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnUnverifiedModule(kModuleStr));
+  EXPECT_THAT(verifier().Run(module.get()).status().error_message(),
+              HasSubstr("used for different types of channel instructions"));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index f0c29efffde..39399df7ad8 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -42,7 +42,7 @@ cc_library(
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -78,10 +78,10 @@ cc_library(
         "@com_google_absl//absl/base",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
-        "@llvm//:support",
-        "@llvm//:target",
-        "@llvm//:transform_utils",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
+        "@llvm-project//llvm:transform_utils",
     ],
 )
 
@@ -100,7 +100,7 @@ cc_library(
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -118,7 +118,7 @@ cc_library(
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -136,7 +136,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings:str_format",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -161,7 +161,7 @@ cc_library(
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -200,8 +200,8 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -217,7 +217,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/types:span",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -229,7 +229,7 @@ cc_library(
         ":llvm_loop",
         ":llvm_util",
         "@com_google_absl//absl/strings",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -249,7 +249,7 @@ cc_library(
     hdrs = ["math_ops.h"],
     deps = [
         ":llvm_util",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
@@ -258,6 +258,6 @@ cc_library(
     srcs = [],
     hdrs = ["ir_builder_mixin.h"],
     deps = [
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc
index caf8fce0f2e..4c56bc55609 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment.cc
+++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc
@@ -91,6 +91,11 @@ bool InstructionCountPrefetchIntervalPicker::CanAllocateInAlternateMemoryNoCopy(
   return end_time - start_time <= max_overlap_count_;
 }
 
+int64 InstructionCountPrefetchIntervalPicker::PreferredEvictionEndTime(
+    const Shape& shape, int64 start_time, int64 latest_end_time) const {
+  return std::min(start_time + min_overlap_count_, latest_end_time);
+}
+
 void InstructionCountPrefetchIntervalPicker::Begin(const HloUse& use,
                                                    int64 start_time,
                                                    int64 end_time) {
@@ -153,6 +158,21 @@ bool CostAnalysisPrefetchIntervalPicker::CanAllocateInAlternateMemoryNoCopy(
          logical_interval_elapsed;
 }
 
+int64 CostAnalysisPrefetchIntervalPicker::PreferredEvictionEndTime(
+    const Shape& shape, int64 start_time, int64 latest_end_time) const {
+  float async_copy_elapsed = cost_analysis_.GetAsyncCopyElapsed(shape);
+  int64 end_time;
+  for (end_time = start_time + 1; end_time <= latest_end_time; ++end_time) {
+    float logical_interval_elapsed =
+        GetLogicalIntervalElapsed(start_time, end_time);
+    if (logical_interval_elapsed >=
+        min_async_copy_to_overlap_ratio_ * async_copy_elapsed) {
+      break;
+    }
+  }
+  return end_time;
+}
+
 void CostAnalysisPrefetchIntervalPicker::Begin(const HloUse& use,
                                                int64 start_time,
                                                int64 end_time) {
@@ -337,8 +357,7 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() {
             absl::make_unique<MemorySpaceAssignment::Allocation>(
                 value->defining_instruction(), value->defining_position(),
                 aliased_allocation->memory_space(), aliased_allocation->chunk(),
-                aliased_allocation->start_time(),
-                aliased_allocation->end_time()));
+                definition_time, definition_time));
       }
 
       // Iterate over the uses.
@@ -418,6 +437,28 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() {
   return result_;
 }
 
+bool operator<(const AsynchronousCopy& a, const AsynchronousCopy& b) {
+  return (a.start_time < b.start_time && a.end_time <= b.end_time) ||
+         (a.start_time <= b.start_time && a.end_time < b.end_time);
+}
+
+void AsynchronousCopyOrdering::AddCopy(const AsynchronousCopy& copy) {
+  auto it_and_inserted = ranges_.insert(copy);
+  CHECK(it_and_inserted.second ||
+        it_and_inserted.first->start_time == copy.start_time);
+}
+
+bool AsynchronousCopyOrdering::ViolatesOrdering(int64 start_time,
+                                                int64 end_time) const {
+  // We allow identical start and end times. It is enough to check for just the
+  // start time in case we find a match in ranges_ because the found value will
+  // either be identical to {start_time, end_time} (and this doesn't violate) or
+  // its start_time will be smaller and end_time will be larger (this violates).
+  auto copy_it = ranges_.find(
+      {start_time, end_time, MemorySpaceAssignment::MemorySpace::kAlternate});
+  return copy_it != ranges_.end() && copy_it->start_time != start_time;
+}
+
 void AlternateMemoryBestFitHeap::AddInputAndOutputRequiredAssignments() {
   // Go through the parameters and outputs and pin them to the corresponding
   // memory by adding a required assignment.
@@ -520,14 +561,7 @@ void AlternateMemoryBestFitHeap::CommitPendingChunks() {
                                     kDummyChunk);
     }
     if (interval.destination == MemorySpace::kAlternate) {
-      // If there is already an asynchronous copy ending the same time, pick
-      // the earliest copy start time.
-      auto range_it = async_copy_range_map_.find(interval.end_time);
-      if (range_it != async_copy_range_map_.end()) {
-        range_it->second = std::min(range_it->second, interval.start_time);
-      } else {
-        async_copy_range_map_[interval.end_time] = interval.start_time;
-      }
+      async_copy_ordering_.AddCopy(interval);
     }
   }
   pending_async_copies_.clear();
@@ -627,48 +661,68 @@ bool AlternateMemoryBestFitHeap::FindAllocation(
     }
   }
 
-  // Since copies couldn't be removed, create an allocation in the default
-  // memory space.
-  if (prev_allocation_in_default_mem != nullptr) {
-    if (prev_allocation == prev_allocation_in_default_mem) {
-      // The latest allocation is also in the default memory, simply extend
-      // that.
-      prev_allocation->Extend(end_time);
-    } else {
-      // The latest allocation is different. Create a new allocation in default
-      // memory.
-      allocations->push_back(
-          absl::make_unique<MemorySpaceAssignment::Allocation>(
-              non_bitcast_operand, defining_position, MemorySpace::kDefault,
-              kDummyChunk, prev_allocation_in_default_mem->end_time(),
-              end_time));
-    }
-  } else if (prev_allocation != nullptr &&
-             prev_allocation->memory_space() == MemorySpace::kAlternate &&
-             prev_allocation->defining_position() == defining_position) {
+  if (prev_allocation_in_default_mem == nullptr && prev_allocation != nullptr &&
+      prev_allocation->memory_space() == MemorySpace::kAlternate &&
+      prev_allocation->defining_position() == defining_position) {
     // If there was an allocation for this HloValue that was in the alternate
     // memory space, we also need to perform an eviction.
-    // TODO(berkin): For now evictions happen relative to the most recent
-    // allocation in the alternate memory. We can potentially start evictions
-    // earlier and end later.
+    int64 eviction_start_time = prev_allocation->start_time();
+    int64 eviction_end_time = prev_allocation->end_time();
+    CHECK(eviction_start_time <= eviction_end_time);
+
+    int64 preferred_eviction_end_time = std::max(
+        options_.prefetch_interval_picker->PreferredEvictionEndTime(
+            non_bitcast_operand->shape(), eviction_start_time, end_time),
+        eviction_end_time);
+
+    BufferInterval eviction_mem_interval;
+    eviction_mem_interval.buffer = buffer;
+    eviction_mem_interval.size = size;
+    // Try to reserve a buffer from the end of the previous allocation to the
+    // preferred eviction end time.
+    eviction_mem_interval.start = prev_allocation->end_time() + 1;
+    eviction_mem_interval.end = preferred_eviction_end_time;
+    int64 preferred_offset = prev_allocation->chunk().offset;
+    VLOG(4) << "Eviction (" << eviction_start_time << ", " << eviction_end_time
+            << ") preferred end time = " << preferred_eviction_end_time;
+
+    while (preferred_eviction_end_time > eviction_end_time) {
+      ChunkCandidate chunk_candidate =
+          FindChunkCandidate(eviction_mem_interval, preferred_offset);
+      if (chunk_candidate.chunk.offset == preferred_offset) {
+        eviction_end_time = preferred_eviction_end_time;
+        AddToPendingChunks(eviction_mem_interval, chunk_candidate);
+        break;
+      }
+      eviction_mem_interval.end = --preferred_eviction_end_time;
+    }
+
     VLOG(3) << "Evicting buffer at " << prev_allocation->chunk().offset << " ("
-            << prev_allocation->start_time() << ", "
-            << prev_allocation->end_time() << ")";
+            << eviction_start_time << ", " << eviction_end_time << ")";
+
+    bool eviction_interval_too_short =
+        (eviction_start_time == eviction_end_time);
+    bool eviction_violates_outstanding_copies =
+        ViolatesMaximumOutstandingAsyncCopies(eviction_start_time,
+                                              eviction_end_time);
 
     // See if this interval would violate the asynchronous copy limit.
-    if (!ViolatesMaximumOutstandingAsyncCopies(prev_allocation->start_time(),
-                                               prev_allocation->end_time())) {
+    if (!eviction_interval_too_short && !eviction_violates_outstanding_copies) {
+      prev_allocation->Extend(eviction_end_time);
       AddAsyncCopy(*prev_allocation, MemorySpace::kDefault, kDummyChunk,
-                   prev_allocation->start_time(), prev_allocation->end_time(),
-                   prev_allocation->end_time(), allocations);
-
+                   eviction_start_time, prev_allocation->end_time(),
+                   eviction_end_time, allocations);
     } else {
-      VLOG(3) << "This violates the maximum async copies.";
+      if (eviction_violates_outstanding_copies) {
+        VLOG(3) << "This violates the maximum async copies.";
+      } else {
+        VLOG(3) << "Eviction interval is too short (" << eviction_start_time
+                << ", " << eviction_end_time << ").";
+      }
       // If the original interval violated the limit, try sub-intervals within
       // this interval.
       bool eviction_scheduled = false;
-      for (int64 time = prev_allocation->start_time();
-           time <= prev_allocation->end_time(); ++time) {
+      for (int64 time = eviction_start_time; time < eviction_end_time; ++time) {
         VLOG(3) << "Try evicting (" << time << ", " << time << ")";
         if (!ViolatesMaximumOutstandingAsyncCopies(time, time)) {
           VLOG(3) << "Eviction successful.";
@@ -686,25 +740,31 @@ bool AlternateMemoryBestFitHeap::FindAllocation(
                 << " because we hit the limit of maximum asynchronous copies "
                 << "between "
                 << hlo_live_range_.flattened_instruction_sequence()
-                       .instructions()[prev_allocation->start_time()]
+                       .instructions()[eviction_start_time]
                 << " and "
                 << hlo_live_range_.flattened_instruction_sequence()
-                       .instructions()[prev_allocation->end_time()];
+                       .instructions()[eviction_end_time];
         return false;
       }
     }
-  } else {
+    prev_allocation_in_default_mem = allocations->back().get();
+  } else if (prev_allocation_in_default_mem == nullptr) {
     allocations->push_back(absl::make_unique<MemorySpaceAssignment::Allocation>(
         non_bitcast_operand, defining_position, MemorySpace::kDefault,
         kDummyChunk, start_time, end_time));
+    prev_allocation_in_default_mem = allocations->back().get();
   }
 
+  CHECK_NE(prev_allocation_in_default_mem, nullptr);
+  CHECK(prev_allocation_in_default_mem->memory_space() ==
+        MemorySpace::kDefault);
+
   // If the use requires the buffer to be in default memory, don't try to
   // prefetch.
   if (use_requires_buffer_in_default_mem) {
     VLOG(4)
         << "Not trying to prefetch because use requires buffer in default mem.";
-    allocations->back()->AddUse(use);
+    prev_allocation_in_default_mem->AddUse(use);
     return true;
   }
 
@@ -736,8 +796,8 @@ bool AlternateMemoryBestFitHeap::FindAllocation(
       VLOG(4) << "This would violate the outstanding async copy limit.";
       continue;
     }
-    if (ViolatesAsynchronousCopyOrdering(alternate_mem_interval.start,
-                                         alternate_mem_interval.end)) {
+    if (async_copy_ordering_.ViolatesOrdering(alternate_mem_interval.start,
+                                              alternate_mem_interval.end)) {
       VLOG(4) << "This would violate asynchronous copy ordering.";
       continue;
     }
@@ -754,7 +814,7 @@ bool AlternateMemoryBestFitHeap::FindAllocation(
               << options_.prefetch_interval_picker->ToDebugString();
       AddToPendingChunks(alternate_mem_interval, chunk_candidate);
 
-      AddAsyncCopy(*allocations->back().get(), MemorySpace::kAlternate,
+      AddAsyncCopy(*prev_allocation_in_default_mem, MemorySpace::kAlternate,
                    chunk_candidate.chunk, alternate_mem_interval.start,
                    end_time, latest_prefetch_time, allocations);
 
@@ -763,8 +823,9 @@ bool AlternateMemoryBestFitHeap::FindAllocation(
     }
   }
 
-  // If a copy wasn't inserted, then add this use to the latest allocation.
-  allocations->back()->AddUse(use);
+  // If a copy wasn't inserted, then add this use to the latest allocation in
+  // default memory.
+  prev_allocation_in_default_mem->AddUse(use);
   return true;
 }
 
@@ -812,13 +873,6 @@ bool AlternateMemoryBestFitHeap::ViolatesMaximumOutstandingAsyncCopies(
   return num_async_copies + 1 > options_.max_outstanding_async_copies;
 }
 
-bool AlternateMemoryBestFitHeap::ViolatesAsynchronousCopyOrdering(
-    int64 start_time, int64 end_time) const {
-  auto async_copy_range_it = async_copy_range_map_.lower_bound(end_time);
-  return async_copy_range_it != async_copy_range_map_.end() &&
-         async_copy_range_it->second < start_time;
-}
-
 bool AlternateMemoryBestFitHeap::TryAllocatingInAlternateMemoryNoCopy(
     int64 start_time, int64 end_time, int64 last_use_time,
     HloPosition defining_position, HloUse use,
@@ -844,7 +898,7 @@ bool AlternateMemoryBestFitHeap::TryAllocatingInAlternateMemoryNoCopy(
   }
 
   if (!options_.prefetch_interval_picker->CanAllocateInAlternateMemoryNoCopy(
-          non_bitcast_operand->shape(), start_time, end_time)) {
+          non_bitcast_operand->shape(), start_time + 1, end_time)) {
     return false;
   }
 
@@ -1032,6 +1086,10 @@ MemorySpaceAssignment::Run(HloModule* module, const Options& options) {
   VLOG(1) << "Maximum number of outstanding async copies: "
           << CountMaximumOutstandingAsyncCopies(*module);
 
+  if (options.verify || VLOG_IS_ON(1)) {
+    TF_RETURN_IF_ERROR(memory_space_assignment.Verify());
+  }
+
   return std::move(memory_space_assignment.preset_assignments_);
 }
 
@@ -1313,6 +1371,13 @@ void MemorySpaceAssignment::EnsureInstructionAndOperandsInserted(
     return;
   }
   for (HloInstruction* operand : new_instruction->operands()) {
+    // CopyStart/CopyDone dependencies should always be already inserted; it is
+    // a red flag when they haven't already been inserted.
+    CHECK((operand->opcode() != HloOpcode::kCopyStart &&
+           operand->opcode() != HloOpcode::kCopyDone) ||
+          inserted_instructions->contains(operand))
+        << "Inserted instruction " << new_instruction->ToString()
+        << " has un-inserted dependency: " << operand->ToString();
     EnsureInstructionAndOperandsInserted(operand, new_sequence,
                                          inserted_instructions);
   }
@@ -1404,10 +1469,14 @@ Status MemorySpaceAssignment::FixSchedule() {
       }
       HloInstruction* instruction = flattened_instructions_[instruction_index];
       // Insert only if it is not deleted (SimplifyGraph sets it to nullptr if
-      // it was deleted) and not previously inserted.
+      // it was deleted) and not previously inserted. Also bitcasts and tuples
+      // are treated specially and only inserted as a result of operand
+      // dependencies.
       if (instruction != nullptr &&
           !inserted_instructions.contains(instruction) &&
-          instruction->parent() == computation) {
+          instruction->parent() == computation &&
+          instruction->opcode() != HloOpcode::kBitcast &&
+          instruction->opcode() != HloOpcode::kTuple) {
         EnsureInstructionAndOperandsInserted(instruction, &new_sequence,
                                              &inserted_instructions);
       }
@@ -1435,4 +1504,62 @@ Status MemorySpaceAssignment::FixSchedule() {
   return Status::OK();
 }
 
+Status MemorySpaceAssignment::Verify() const {
+  VLOG(3) << "Verifying:";
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloAliasAnalysis> alias_analysis,
+                      HloAliasAnalysis::Run(module_));
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloLiveRange> hlo_live_range,
+                      HloLiveRange::Run(module_->schedule(), *alias_analysis,
+                                        module_->entry_computation()));
+
+  BufferIntervalTree interval_tree;
+  absl::flat_hash_set<int64> seen_buffers;
+
+  for (const auto& position_and_chunk : preset_assignments_->chunks()) {
+    const HloPosition& position = position_and_chunk.first;
+    const Chunk& chunk = position_and_chunk.second;
+    const HloBuffer& buffer =
+        alias_analysis->GetUniqueBufferAt(position.instruction, position.index);
+    if (seen_buffers.contains(buffer.id())) {
+      continue;
+    }
+    seen_buffers.insert(buffer.id());
+
+    int64 start_time = INT64_MAX;
+    int64 end_time = -1;
+    for (const HloValue* value : buffer.values()) {
+      const HloLiveRange::TimeBound& time_bound =
+          hlo_live_range->buffer_live_ranges().at(value);
+      VLOG(3) << "  value: " << value->ToShortString() << " ("
+              << time_bound.start << ", " << time_bound.end << ")";
+      start_time = std::min(start_time, time_bound.start);
+      end_time = std::max(end_time, time_bound.end);
+    }
+    CHECK_GE(start_time, 0);
+    CHECK_GT(end_time, 0);
+    // Get the chunks overlapping in time and search if they overlap in space as
+    // well.
+    // TODO(berkin): For now checking against end_time - 1 (exclusive), but we
+    // really should check against end_time (inclusive) for cases where the
+    // operand can't share buffer with user (see
+    // HloDataflowAnalysis::CanShareOperandBufferWithUser).
+    for (const Chunk& overlapping_chunk :
+         interval_tree.ChunksOverlappingInTime(start_time, end_time - 1)) {
+      if (chunk.OverlapsWith(overlapping_chunk)) {
+        return InternalError(
+            ("Buffer %s (%d, %d) off: %d size: %d overlaps with another chunk"
+             " off: %d size: %d"),
+            buffer.ToString(), start_time, end_time, chunk.offset, chunk.size,
+            overlapping_chunk.offset, overlapping_chunk.size);
+      }
+    }
+    interval_tree.Add(start_time, end_time - 1, chunk);
+    VLOG(3) << " buffer: " << buffer.ToString() << ": (" << start_time << ", "
+            << end_time << ") off: " << position_and_chunk.second.offset
+            << ", size: " << position_and_chunk.second.size;
+  }
+
+  return Status::OK();
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h
index 67ced4c4909..d83e888f5ab 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment.h
+++ b/tensorflow/compiler/xla/service/memory_space_assignment.h
@@ -123,6 +123,11 @@ class PrefetchIntervalPicker {
                                                   int64 start_time,
                                                   int64 end_time) const = 0;
 
+  // Returns the preferred end time for an eviction that starts at a given time
+  // and must end by the given end time.
+  virtual int64 PreferredEvictionEndTime(const Shape& shape, int64 start_time,
+                                         int64 latest_end_time) const = 0;
+
   // Begins the iterator for the first start time of the prefetch.
   virtual void Begin(const HloUse& use, int64 start_time, int64 end_time) = 0;
 
@@ -166,6 +171,9 @@ class InstructionCountPrefetchIntervalPicker : public PrefetchIntervalPicker {
   bool CanAllocateInAlternateMemoryNoCopy(const Shape& shape, int64 start_time,
                                           int64 end_time) const override;
 
+  int64 PreferredEvictionEndTime(const Shape& shape, int64 start_time,
+                                 int64 latest_end_time) const override;
+
   void Begin(const HloUse& use, int64 start_time, int64 end_time) override;
 
   int64 Next() override;
@@ -206,6 +214,9 @@ class CostAnalysisPrefetchIntervalPicker : public PrefetchIntervalPicker {
   bool CanAllocateInAlternateMemoryNoCopy(const Shape& shape, int64 start_time,
                                           int64 end_time) const override;
 
+  int64 PreferredEvictionEndTime(const Shape& shape, int64 start_time,
+                                 int64 latest_end_time) const override;
+
   void Begin(const HloUse& use, int64 start_time, int64 end_time) override;
 
   int64 Next() override;
@@ -288,6 +299,10 @@ class MemorySpaceAssignment {
     // If true, tries allocating buffers across (e.g., before and inside a while
     // loop body) sequential calls (kWhile, kCall, and kConditional).
     bool allocate_across_sequential_calls = false;
+
+    // If true, verifies the memory space assignment against overlapping
+    // buffers.
+    bool verify = false;
   };
 
   // This class represents an allocation that might either be in the default or
@@ -460,6 +475,9 @@ class MemorySpaceAssignment {
   static BufferIntervalCompare GetMemoryBoundednessBufferIntervalCompare(
       const MemorySpaceAssignmentCostAnalysis& cost_analysis);
 
+  // Verify that the memory space assignment is free of overlapping buffers.
+  Status Verify() const;
+
  private:
   MemorySpaceAssignment(HloModule* module, int64 alternate_memory_space,
                         const HloLiveRange& hlo_live_range)
@@ -526,6 +544,48 @@ struct RequiredMemoryAssignment {
   int64 time;
 };
 
+// A struct representing an asynchronous copy with its logical start and end
+// time and its destination memory space.
+struct AsynchronousCopy {
+  int64 start_time;
+  int64 end_time;
+  MemorySpaceAssignment::MemorySpace destination;
+};
+
+// Compare asynchronous copies such that an earlier start time has the same or
+// earlier end time and an earlier end time has the same or earlier start time.
+bool operator<(const AsynchronousCopy& a, const AsynchronousCopy& b);
+
+// Helper class to enforce asynchronous copy ordering. We only allow
+// asynchronous copies that are pipelined: if an asynchronous copy ends earlier
+// than another asynchronous copy, it must start the same time or earlier than
+// the other asynchronous copy; and if an asynchronous copy starts earlier than
+// another asynchronous copy, it must end the same time or earlier than the
+// other asynchronous copy.
+class AsynchronousCopyOrdering {
+ public:
+  AsynchronousCopyOrdering() = default;
+
+  // Adds an asynchronous copy.
+  void AddCopy(const AsynchronousCopy& copy);
+
+  // Returns true if the addition of an asynchronous copy in the the given time
+  // interval would violate the asynchronous copy ordering. E.g., consider the
+  // following scenario:
+  //                                  CS          CD
+  //  already committed async copy:   +-----------+
+  //                new async copy:     +--------+
+  //
+  // The new asynchronous copy would violate the ordering guarantee because the
+  // copy start is after an already committed asynchronous copy while its copy
+  // done is before the committed copy.
+  bool ViolatesOrdering(int64 start_time, int64 end_time) const;
+
+ private:
+  // Stores asynchronous copies in a tree set respecting the pipelining order.
+  std::set<AsynchronousCopy> ranges_;
+};
+
 // This class inherits from GlobalDecreasingSizeBestFitHeap with a notion of
 // maximum size.
 class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
@@ -551,14 +611,6 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
   HeapSimulator::Result Finish() override;
 
  private:
-  // A struct representing an asynchronous copy with its logical start and end
-  // time and its destination memory space.
-  struct AsynchronousCopy {
-    int64 start_time;
-    int64 end_time;
-    MemorySpace destination;
-  };
-
   // Finds an allocation for the given interval. Internally, it will attempt to
   // find a suitable chunk candidate within the heap size and prefetch interval
   // limits, and append the new allocation(s) to allocations. The new
@@ -603,18 +655,6 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
   bool ViolatesMaximumOutstandingAsyncCopies(int64 start_time,
                                              int64 end_time) const;
 
-  // Returns true if the addition of an asynchronous copy in the the given time
-  // interval would violate the asynchronous copy ordering. E.g., consider the
-  // following scenario:
-  //                                  CS          CD
-  //  already committed async copy:   +-----------+
-  //                new async copy:     +--------+
-  //
-  // The new asynchronous copy would violate the ordering guarantee because the
-  // copy start is after an already committed asynchronous copy while its copy
-  // done is before the committed copy.
-  bool ViolatesAsynchronousCopyOrdering(int64 start_time, int64 end_time) const;
-
   // Adds an asynchronous copy to the allocations.
   void AddAsyncCopy(const MemorySpaceAssignment::Allocation& prev_allocation,
                     MemorySpace memory_space, Chunk chunk, int64 start_time,
@@ -639,9 +679,7 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
   // We use a interval tree to keep track of the number of outstanding
   // asynchronous copies.
   BufferIntervalTree async_copy_interval_tree_;
-  // Given the logical time for CopyDone in key, stores the earliest time for
-  // the corresponding CopyStart.
-  std::map<int64, int64> async_copy_range_map_;
+  AsynchronousCopyOrdering async_copy_ordering_;
   std::vector<std::pair<BufferInterval, ChunkCandidate>> pending_chunks_;
   std::vector<AsynchronousCopy> pending_async_copies_;
   // This map contains required memory assignments for HloValues (e.g., input
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc
index 238bbed37c4..1d015507867 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc
@@ -67,9 +67,9 @@ class MemorySpaceAssignmentTest : public HloTestBase,
 
   std::unique_ptr<PresetAssignments> AssignMemorySpace(
       HloModule* module, int64 max_outstanding_async_copies = -1,
-      int64 max_prefetch_interval = 10) {
+      int64 max_prefetch_interval = 10, int64 min_prefetch_interval = 2) {
     InstructionCountPrefetchIntervalPicker prefetch_interval_picker(
-        /*min_overlap_count=*/2, max_prefetch_interval);
+        min_prefetch_interval, max_prefetch_interval);
     return AssignMemorySpace(module, max_outstanding_async_copies,
                              /*buffer_interval_compare=*/{},
                              &prefetch_interval_picker);
@@ -107,6 +107,7 @@ class MemorySpaceAssignmentTest : public HloTestBase,
     options.is_allowed_in_alternate_mem_fn = is_allowed_in_alternate_mem;
     options.max_outstanding_async_copies = max_outstanding_async_copies;
     options.allocate_across_sequential_calls = GetParam();
+    options.verify = true;
     std::unique_ptr<PresetAssignments> preset_assignments =
         MemorySpaceAssignment::Run(module, options).ValueOrDie();
     CheckPresetAssignments(preset_assignments.get());
@@ -430,6 +431,103 @@ TEST_P(MemorySpaceAssignmentTest, DontEvictWhenThereIsDefaultMemAllocation) {
   EXPECT_THAT(h, op::Multiply(op::Subtract(), op::Multiply()));
 }
 
+TEST_P(MemorySpaceAssignmentTest, EvictAndPrefetchAndPrefetch) {
+  // Test for a memory corruption bug involving evict/prefetch/prefetch pattern,
+  // where the last prefetch copied from the original buffer in alternate buffer
+  // instead of evicted buffer.
+  HloComputation::Builder builder(TestName());
+  Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
+  HloInstruction* p0 =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "p0"));
+  HloInstruction* p1 =
+      builder.AddInstruction(HloInstruction::CreateParameter(1, shape, "p1"));
+  HloInstruction* tanh = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kTanh, p0));
+  HloInstruction* a = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, p0, tanh));
+  HloInstruction* b = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kSubtract, p0, p1));
+  HloInstruction* c = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, p0, p1));
+  HloInstruction* d = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kSubtract, p0, p1));
+  HloInstruction* e = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, b));
+  HloInstruction* f = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, c));
+  HloInstruction* g = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, d));
+  HloInstruction* h = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, b, c));
+  HloInstruction* i = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, b, d));
+  HloInstruction* j = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, c, d));
+  HloInstruction* k = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, e, f));
+  HloInstruction* l = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, g, h));
+  HloInstruction* m = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, i, j));
+  HloInstruction* n = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, k, l));
+  HloInstruction* o = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, n, m));
+  HloInstruction* add0 = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, o, tanh));
+  HloInstruction* negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, add0));
+  HloInstruction* negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate0));
+  HloInstruction* negate2 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate1));
+  HloInstruction* negate3 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate2));
+  HloInstruction* negate4 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate3));
+  HloInstruction* negate5 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate4));
+  HloInstruction* negate6 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate5));
+  HloInstruction* negate7 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate6));
+  HloInstruction* negate8 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate7));
+  HloInstruction* negate9 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate8));
+  HloInstruction* add1 = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, negate9, tanh));
+
+  auto module = CreateNewVerifiedModule();
+  HloComputation* computation = module->AddEntryComputation(builder.Build());
+
+  HloSchedule schedule(module.get());
+  schedule.set_sequence(
+      computation,
+      {p0,      p1,      tanh,    a,       b,       c,       d,       e,
+       f,       g,       h,       i,       j,       k,       l,       m,
+       n,       o,       add0,    negate0, negate1, negate2, negate3, negate4,
+       negate5, negate6, negate7, negate8, negate9, add1});
+  TF_CHECK_OK(module->set_schedule(schedule));
+
+  AssignMemorySpace(module.get());
+
+  // Check that both prefetches (add0 and add1) prefetch from the eviction
+  // instead of tanh, which will be placed in the alternate memory directly.
+  EXPECT_THAT(
+      add0,
+      op::Add(op::Add(),
+              op::AsyncCopy(kAlternateMemorySpace, kDefaultMemorySpace,
+                            op::AsyncCopy(kDefaultMemorySpace,
+                                          kAlternateMemorySpace, op::Tanh()))));
+  EXPECT_THAT(
+      add1,
+      op::Add(op::Negate(),
+              op::AsyncCopy(kAlternateMemorySpace, kDefaultMemorySpace,
+                            op::AsyncCopy(kDefaultMemorySpace,
+                                          kAlternateMemorySpace, op::Tanh()))));
+}
+
 TEST_P(MemorySpaceAssignmentTest, While) {
   auto module = CreateNewVerifiedModule();
   Shape shape = ShapeUtil::MakeShape(xla::F32, {2, 3});
@@ -759,6 +857,77 @@ TEST_P(MemorySpaceAssignmentTest, BitcastTuple) {
   AssignMemorySpace(module.get());
 }
 
+TEST_P(MemorySpaceAssignmentTest, BitcastScheduleBug) {
+  // Bitcasts can force asynchronous copies to be scheduled too early, possibly
+  // leading to memory corruption.
+  //  Bug:
+  //    p0------------------>neg-->neg-->neg ... -->neg-->neg-->neg->add
+  //                                                                 /
+  //    p1->cs->cd->bitcast-----------------------------------------+
+  //
+  //  Expected:
+  //    p0-->neg-->neg-->neg ... -->neg-->neg-->neg------------->add
+  //                                                             /
+  //    p1--------------------->cs----------------->cd->bitcast-+
+  HloComputation::Builder builder(TestName());
+  Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
+  Shape param_shape = ShapeUtil::MakeShape(F32, {6});
+  HloInstruction* p0 =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "p0"));
+  HloInstruction* p1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, param_shape, "p1"));
+  HloInstruction* bitcast =
+      builder.AddInstruction(HloInstruction::CreateBitcast(shape, p1));
+  HloInstruction* negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, p0));
+  HloInstruction* negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate0));
+  HloInstruction* negate2 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate1));
+  HloInstruction* negate3 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate2));
+  HloInstruction* negate4 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate3));
+  HloInstruction* negate5 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate4));
+  HloInstruction* negate6 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate5));
+  HloInstruction* negate7 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate6));
+  HloInstruction* negate8 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate7));
+  HloInstruction* negate9 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kNegate, negate8));
+  HloInstruction* add = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, bitcast, negate9));
+
+  auto module = CreateNewVerifiedModule();
+  HloComputation* computation = module->AddEntryComputation(builder.Build());
+
+  HloSchedule schedule(module.get());
+  schedule.set_sequence(
+      computation, {p0, p1, bitcast, negate0, negate1, negate2, negate3,
+                    negate4, negate5, negate6, negate7, negate8, negate9, add});
+  TF_CHECK_OK(module->set_schedule(schedule));
+
+  AssignMemorySpace(module.get(), /*max_outstanding_async_copies=*/-1,
+                    /*max_prefetch_interval=*/5, /*min_prefetch_interval=*/4);
+
+  EXPECT_EQ(bitcast->shape().layout().memory_space(), kAlternateMemorySpace);
+  const auto& instructions =
+      module->schedule().sequence(module->entry_computation()).instructions();
+  for (int i = 0; i < instructions.size(); ++i) {
+    // Expect that there is a negate before and after the CopyStart and there is
+    // a negate before CopyDone.
+    if (instructions.at(i)->opcode() == HloOpcode::kCopyStart) {
+      EXPECT_EQ(instructions.at(i - 1)->opcode(), HloOpcode::kNegate);
+      EXPECT_EQ(instructions.at(i + 1)->opcode(), HloOpcode::kNegate);
+    } else if (instructions.at(i)->opcode() == HloOpcode::kCopyDone) {
+      EXPECT_EQ(instructions.at(i - 1)->opcode(), HloOpcode::kNegate);
+    }
+  }
+}
+
 TEST_P(MemorySpaceAssignmentTest, LastUseOpt) {
   // Test that checks the last use optimization. It uses two buffers that should
   // be placed in alternate memory.
@@ -2266,5 +2435,38 @@ INSTANTIATE_TEST_SUITE_P(MemorySpaceAssignmentInstantiation,
                          MemorySpaceAssignmentTest,
                          ::testing::Values(false, true));
 
+using AsynchronousCopyOrderingTest = ::testing::Test;
+
+TEST_F(AsynchronousCopyOrderingTest, Simple) {
+  // Given asynchronous copies like the following, ensure the pipelining order
+  // is maintained (earlier start time must have earlier end time).
+  // 3,11       +-------+         OK
+  // 1,8      +------+            OK
+  // 5,14         +--------+      OK
+  // 7,14           +------+      OK
+  // 2,16      +-------------+    Violate
+  // 9,12             +--+        Violate
+  // 6,17          +----------+   Violate
+  // 5,13         +-------+       OK (same start as 5,14)
+  // 5,14         +--------+      OK (same as 5,14)
+  auto alternate_mem_space = MemorySpaceAssignment::MemorySpace::kAlternate;
+  AsynchronousCopyOrdering ordering;
+  EXPECT_FALSE(ordering.ViolatesOrdering(3, 11));
+  ordering.AddCopy({3, 11, alternate_mem_space});
+  EXPECT_FALSE(ordering.ViolatesOrdering(1, 8));
+  ordering.AddCopy({1, 8, alternate_mem_space});
+  EXPECT_FALSE(ordering.ViolatesOrdering(5, 14));
+  ordering.AddCopy({5, 14, alternate_mem_space});
+  EXPECT_FALSE(ordering.ViolatesOrdering(7, 14));
+  ordering.AddCopy({7, 14, alternate_mem_space});
+  EXPECT_TRUE(ordering.ViolatesOrdering(2, 16));
+  EXPECT_TRUE(ordering.ViolatesOrdering(9, 12));
+  EXPECT_TRUE(ordering.ViolatesOrdering(6, 17));
+  EXPECT_FALSE(ordering.ViolatesOrdering(5, 13));
+  ordering.AddCopy({5, 13, alternate_mem_space});
+  EXPECT_FALSE(ordering.ViolatesOrdering(5, 14));
+  ordering.AddCopy({5, 14, alternate_mem_space});
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/BUILD
index b687d72d3d9..20b448286d5 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/BUILD
+++ b/tensorflow/compiler/xla/service/mlir_gpu/BUILD
@@ -37,7 +37,7 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla/service:hlo",
         "@com_google_absl//absl/strings",
-        "@local_config_mlir//:IR",
+        "@llvm-project//mlir:IR",
     ],
 )
 
@@ -46,8 +46,8 @@ cc_library(
     srcs = ["inject_errors_pass.cc"],
     hdrs = ["inject_errors_pass.h"],
     deps = [
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
     ],
 )
 
@@ -81,12 +81,12 @@ cc_library(
         "//tensorflow/stream_executor:stream_executor_headers",
         "//tensorflow/stream_executor/gpu:asm_compiler",
         "@com_google_absl//absl/container:flat_hash_map",
-        "@local_config_mlir//:GPUDialect",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:LLVMDialect",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TargetNVVMIR",
+        "@llvm-project//mlir:GPUDialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LLVMDialect",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TargetNVVMIR",
     ],
     alwayslink = True,  # Contains compiler registration
 )
@@ -103,9 +103,9 @@ cc_library(
         "//tensorflow/compiler/xla:status",
         "//tensorflow/compiler/xla/service:hlo",
         "@com_google_absl//absl/types:span",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
     ],
 )
 
@@ -127,9 +127,9 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/stream_executor:stream_executor_headers",
         "@com_google_absl//absl/container:flat_hash_map",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:LLVMDialect",
-        "@local_config_mlir//:StandardOps",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LLVMDialect",
+        "@llvm-project//mlir:StandardOps",
     ],
 )
 
@@ -151,26 +151,26 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
-        "@local_config_mlir//:AffineDialectRegistration",
-        "@local_config_mlir//:CFGTransforms",
-        "@local_config_mlir//:GPUDialect",
-        "@local_config_mlir//:GPUDialectRegistration",
-        "@local_config_mlir//:GPUToNVVMTransforms",
-        "@local_config_mlir//:GPUTransforms",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:LLVMDialect",
-        "@local_config_mlir//:LLVMTransforms",
-        "@local_config_mlir//:Linalg",
-        "@local_config_mlir//:LinalgDialectRegistration",
-        "@local_config_mlir//:LinalgToLLVM",
-        "@local_config_mlir//:LoopDialectRegistration",
-        "@local_config_mlir//:LoopOps",
-        "@local_config_mlir//:LoopsToGPUPass",
-        "@local_config_mlir//:NVVMDialect",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardDialectRegistration",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//mlir:AffineDialectRegistration",
+        "@llvm-project//mlir:CFGTransforms",
+        "@llvm-project//mlir:GPUDialect",
+        "@llvm-project//mlir:GPUDialectRegistration",
+        "@llvm-project//mlir:GPUToNVVMTransforms",
+        "@llvm-project//mlir:GPUTransforms",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LLVMDialect",
+        "@llvm-project//mlir:LLVMTransforms",
+        "@llvm-project//mlir:Linalg",
+        "@llvm-project//mlir:LinalgDialectRegistration",
+        "@llvm-project//mlir:LinalgToLLVM",
+        "@llvm-project//mlir:LoopDialectRegistration",
+        "@llvm-project//mlir:LoopOps",
+        "@llvm-project//mlir:LoopsToGPUPass",
+        "@llvm-project//mlir:NVVMDialect",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardDialectRegistration",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Transforms",
     ],
 )
 
@@ -191,8 +191,8 @@ cc_library(
         "//tensorflow/core:test",
         "//tensorflow/core/platform:test",
         "@com_google_absl//absl/memory",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/emission_context.cc b/tensorflow/compiler/xla/service/mlir_gpu/emission_context.cc
index 08a133a9b52..3c27dc662fe 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/emission_context.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/emission_context.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/mlir_gpu/emission_context.h"
 
 #include "absl/strings/substitute.h"
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/emission_context.h b/tensorflow/compiler/xla/service/mlir_gpu/emission_context.h
index cbea4c48568..db702dbc014 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/emission_context.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/emission_context.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <memory>
 
-#include "mlir/IR/Diagnostics.h"  // TF:local_config_mlir
+#include "mlir/IR/Diagnostics.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
index eda65583fb5..72acc5463ca 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
@@ -31,11 +31,11 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "@com_google_absl//absl/types:span",
-        "@llvm//:support",
-        "@local_config_mlir//:AffineOps",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:TransformUtils",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:AffineOps",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:TransformUtils",
     ],
 )
 
@@ -50,13 +50,13 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/platform:test",
-        "@llvm//:support",
-        "@local_config_mlir//:AffineDialectRegistration",
-        "@local_config_mlir//:AffineToStandardTransforms",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:LLVMTransforms",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardDialectRegistration",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:AffineDialectRegistration",
+        "@llvm-project//mlir:AffineToStandardTransforms",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LLVMTransforms",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardDialectRegistration",
+        "@llvm-project//mlir:Transforms",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
index 84e239ae196..4ed8745a251 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
@@ -30,13 +30,13 @@ limitations under the License.
 #include "absl/types/span.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/AffineExpr.h"  // TF:local_config_mlir
-#include "mlir/IR/AffineMap.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/Transforms/LoopUtils.h"  // TF:local_config_mlir
-#include "mlir/Transforms/RegionUtils.h"  // TF:local_config_mlir
+#include "mlir/Dialect/AffineOps/AffineOps.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/AffineExpr.h"  // TF:llvm-project
+#include "mlir/IR/AffineMap.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/Transforms/LoopUtils.h"  // TF:llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/compiler/xla/window_util.h"
 
@@ -117,18 +117,18 @@ bool IsSimpleLoop(mlir::AffineForOp loop) {
 
 struct BoundAffineMap {
   mlir::AffineMap affine_map;
-  std::vector<mlir::Value*> operands;
+  std::vector<mlir::Value> operands;
 };
 
 BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op) {
   if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
     return {load.getAffineMap(),
-            std::vector<mlir::Value*>(load.getMapOperands().begin(),
-                                      load.getMapOperands().end())};
+            std::vector<mlir::Value>(load.getMapOperands().begin(),
+                                     load.getMapOperands().end())};
   } else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
     return {store.getAffineMap(),
-            std::vector<mlir::Value*>(store.getMapOperands().begin(),
-                                      store.getMapOperands().end())};
+            std::vector<mlir::Value>(store.getMapOperands().begin(),
+                                     store.getMapOperands().end())};
   } else {
     CHECK(false);
   }
@@ -150,7 +150,7 @@ mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
   }
 }
 
-void SetMemRef(mlir::Operation* op, mlir::Value* memref) {
+void SetMemRef(mlir::Operation* op, mlir::Value memref) {
   if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
     load.setMemRef(memref);
   } else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
@@ -257,7 +257,7 @@ mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
   }
 
   for (mlir::IROperand& use :
-       llvm::make_early_inc_range(loop.getInductionVar()->getUses())) {
+       llvm::make_early_inc_range(loop.getInductionVar().getUses())) {
     mlir::Operation* owner = use.getOwner();
     BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
     unsigned new_dim = affine_map.operands.size();
@@ -325,12 +325,12 @@ mlir::Operation* HoistAndFix(llvm::iplist<mlir::Operation>::iterator begin_op,
     auto new_alloc =
         builder.create<mlir::AllocOp>(builder.getUnknownLoc(), new_type);
 
-    std::vector<mlir::Value*> indvars;
+    std::vector<mlir::Value> indvars;
     for (auto ancestor : ancestors) {
       indvars.push_back(ancestor.getInductionVar());
     }
     for (mlir::IROperand& use :
-         llvm::make_early_inc_range(alloc.getResult()->getUses())) {
+         llvm::make_early_inc_range(alloc.getResult().getUses())) {
       mlir::Operation* owner = use.getOwner();
       BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
       affine_map.operands.insert(affine_map.operands.begin(), indvars.begin(),
@@ -418,7 +418,7 @@ struct InitialMlirConvAnchors {
 //     output[...] = output_acc[]
 //   }
 StatusOr<InitialMlirConvAnchors> CreateNaiveMlirConv(
-    mlir::Value* input, mlir::Value* filter, mlir::Value* output,
+    mlir::Value input, mlir::Value filter, mlir::Value output,
     const ShapeInfo& input_shape_info, const ShapeInfo& filter_shape_info,
     const ShapeInfo& output_shape_info, const Window& window,
     mlir::OpBuilder builder) {
@@ -440,7 +440,7 @@ StatusOr<InitialMlirConvAnchors> CreateNaiveMlirConv(
       location,
       builder.create<mlir::ConstantOp>(
           location, mlir::FloatAttr::get(builder.getF32Type(), 0)),
-      output_acc, llvm::ArrayRef<mlir::Value*>());
+      output_acc, llvm::ArrayRef<mlir::Value>());
 
   std::vector<mlir::AffineForOp> reduction_loops;
   reduction_loops = CreateNestedSimpleLoops(
@@ -450,11 +450,11 @@ StatusOr<InitialMlirConvAnchors> CreateNaiveMlirConv(
   mlir::AffineForOp loop_o = cartesian_product_loops[1];
   mlir::AffineForOp loop_c = reduction_loops[0];
 
-  std::vector<mlir::Value*> output_spatial_indvars;
+  std::vector<mlir::Value> output_spatial_indvars;
   for (auto loop : absl::MakeSpan(cartesian_product_loops).subspan(2)) {
     output_spatial_indvars.push_back(loop.getInductionVar());
   }
-  std::vector<mlir::Value*> filter_spatial_indvars;
+  std::vector<mlir::Value> filter_spatial_indvars;
   for (auto loop : absl::MakeSpan(reduction_loops).subspan(1)) {
     filter_spatial_indvars.push_back(loop.getInductionVar());
   }
@@ -463,7 +463,7 @@ StatusOr<InitialMlirConvAnchors> CreateNaiveMlirConv(
 
   builder = reduction_loops.back().getBodyBuilder();
 
-  mlir::Value* loaded_input = [&] {
+  mlir::Value loaded_input = [&] {
     std::vector<mlir::AffineExpr> input_indices;
     input_indices.push_back(builder.getAffineDimExpr(0));
     input_indices.push_back(builder.getAffineDimExpr(1));
@@ -479,7 +479,7 @@ StatusOr<InitialMlirConvAnchors> CreateNaiveMlirConv(
           builder.getAffineDimExpr(2 + num_spatial_dims + i) -
           window_dim.padding_low());
     }
-    std::vector<mlir::Value*> input_vars;
+    std::vector<mlir::Value> input_vars;
     input_vars.push_back(loop_n.getInductionVar());
     input_vars.push_back(loop_c.getInductionVar());
     input_vars.insert(input_vars.end(), output_spatial_indvars.begin(),
@@ -499,8 +499,8 @@ StatusOr<InitialMlirConvAnchors> CreateNaiveMlirConv(
         builder.getF32Type());
   }();
 
-  mlir::Value* loaded_filter = [&] {
-    std::vector<mlir::Value*> filter_vars;
+  mlir::Value loaded_filter = [&] {
+    std::vector<mlir::Value> filter_vars;
     filter_vars.push_back(loop_o.getInductionVar());
     filter_vars.push_back(loop_c.getInductionVar());
     filter_vars.insert(filter_vars.end(), filter_spatial_indvars.begin(),
@@ -519,11 +519,11 @@ StatusOr<InitialMlirConvAnchors> CreateNaiveMlirConv(
           location,
           builder.createOrFold<mlir::AffineLoadOp>(location, output_acc),
           builder.create<mlir::MulFOp>(location, loaded_input, loaded_filter)),
-      output_acc, llvm::ArrayRef<mlir::Value*>());
+      output_acc, llvm::ArrayRef<mlir::Value>());
 
   builder.setInsertionPointAfter(reduction_loops[0]);
   {
-    std::vector<mlir::Value*> output_vars;
+    std::vector<mlir::Value> output_vars;
     output_vars.push_back(loop_n.getInductionVar());
     output_vars.push_back(loop_o.getInductionVar());
     output_vars.insert(output_vars.end(), output_spatial_indvars.begin(),
@@ -735,9 +735,9 @@ StatusOr<mlir::FuncOp> EmitConvolutionForwardAsMlir(
   builder.create<mlir::ReturnOp>(builder.getUnknownLoc());
   builder.setInsertionPointToStart(entry_block);
 
-  mlir::Value* input = entry_block->getArgument(1);
-  mlir::Value* filter = entry_block->getArgument(2);
-  mlir::Value* output = entry_block->getArgument(0);
+  mlir::Value input = entry_block->getArgument(1);
+  mlir::Value filter = entry_block->getArgument(2);
+  mlir::Value output = entry_block->getArgument(0);
 
   TF_RETURN_IF_ERROR(ConvIsImplemented(conv));
 
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h
index f0b95876775..5f01dffb756 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_H_
 
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
+#include "mlir/IR/Function.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_test.cc b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_test.cc
index 00a93455a8b..78cc83dd0bd 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_test.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_test.cc
@@ -18,13 +18,13 @@ limitations under the License.
 #include <vector>
 
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/tests/filecheck.h"
 #include "tensorflow/compiler/xla/tests/verified_hlo_module.h"
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.cc b/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.cc
index 60b5d086d15..ae3e42bc20d 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.cc
@@ -16,10 +16,10 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.h"
 
 #include "llvm/ADT/STLExtras.h"
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/hlo_utils.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/xla/comparison_util.h"
@@ -43,14 +43,21 @@ using ::mlir::Value;
 namespace hlo = ::mlir::xla_hlo;
 
 // TODO(b/137624192) Use tablegen for this.
-StatusOr<Value*> InsertMlirOp(
-    HloOpcode opcode, OpBuilder func_builder, Location loc, ArrayRef<Type> rets,
-    ArrayRef<Value*> args, ArrayRef<std::pair<Identifier, Attribute>> attrs) {
+StatusOr<Value> InsertMlirOp(HloOpcode opcode, OpBuilder func_builder,
+                             Location loc, ArrayRef<Type> rets,
+                             ArrayRef<Value> args,
+                             ArrayRef<std::pair<Identifier, Attribute>> attrs) {
   switch (opcode) {
+    case HloOpcode::kAbs:
+      return {func_builder.create<hlo::AbsOp>(loc, rets, args, attrs)};
     case HloOpcode::kAdd:
       return {func_builder.create<hlo::AddOp>(loc, rets, args, attrs)};
     case HloOpcode::kAnd:
       return {func_builder.create<hlo::AndOp>(loc, rets, args, attrs)};
+    case HloOpcode::kCeil:
+      return {func_builder.create<hlo::CeilOp>(loc, rets, args, attrs)};
+    case HloOpcode::kCos:
+      return {func_builder.create<hlo::CosOp>(loc, rets, args, attrs)};
     case HloOpcode::kDivide:
       return {func_builder.create<hlo::DivOp>(loc, rets, args, attrs)};
     case HloOpcode::kExp:
@@ -61,10 +68,18 @@ StatusOr<Value*> InsertMlirOp(
       return {func_builder.create<hlo::MinOp>(loc, rets, args, attrs)};
     case HloOpcode::kMultiply:
       return {func_builder.create<hlo::MulOp>(loc, rets, args, attrs)};
+    case HloOpcode::kNegate:
+      return {func_builder.create<hlo::NegOp>(loc, rets, args, attrs)};
+    case HloOpcode::kRemainder:
+      return {func_builder.create<hlo::RemOp>(loc, rets, args, attrs)};
     case HloOpcode::kSelect:
       return {func_builder.create<hlo::SelectOp>(loc, rets, args, attrs)};
+    case HloOpcode::kSign:
+      return {func_builder.create<hlo::SignOp>(loc, rets, args, attrs)};
     case HloOpcode::kSubtract:
       return {func_builder.create<hlo::SubOp>(loc, rets, args, attrs)};
+    case HloOpcode::kTanh:
+      return {func_builder.create<hlo::TanhOp>(loc, rets, args, attrs)};
     default:
       return tensorflow::errors::Internal(absl::StrCat(
           "HLO Opcode ", HloOpcodeString(opcode), " is not supported."));
@@ -78,7 +93,7 @@ mlir::Location HloDialectEmitter::getLocation(
   return emission_context_->getLocation(instr);
 }
 
-StatusOr<Value*> HloDialectEmitter::EmitComputation(
+StatusOr<Value> HloDialectEmitter::EmitComputation(
     const HloComputation& computation) {
   const auto root = computation.root_instruction();
   TF_RETURN_IF_ERROR(root->Accept(this));
@@ -88,7 +103,7 @@ StatusOr<Value*> HloDialectEmitter::EmitComputation(
 Status HloDialectEmitter::DefaultAction(HloInstruction* instr) {
   TF_ASSIGN_OR_RETURN(auto res_type, ConvertTensorShapeToType<RankedTensorType>(
                                          instr->shape(), builder_));
-  llvm::SmallVector<Value*, 4> arguments;
+  llvm::SmallVector<Value, 4> arguments;
   for (auto operand : instr->operands()) {
     arguments.push_back(instruction_to_values_[operand]);
   }
@@ -135,7 +150,7 @@ Status HloDialectEmitter::HandleConstant(HloInstruction* constant) {
 }
 
 Status HloDialectEmitter::HandleReduce(HloInstruction* reduce) {
-  llvm::SmallVector<Value*, 4> operands;
+  llvm::SmallVector<Value, 4> operands;
   for (auto operand : reduce->operands()) {
     operands.push_back(instruction_to_values_.at(operand));
   }
@@ -152,7 +167,7 @@ Status HloDialectEmitter::HandleReduce(HloInstruction* reduce) {
   {
     auto computation = reduce->to_apply();
     auto block = new mlir::Block();
-    llvm::SmallVector<Value*, 4> arguments;
+    llvm::SmallVector<Value, 4> arguments;
     arguments.reserve(computation->num_parameters());
     for (auto parameter : computation->parameter_instructions()) {
       TF_ASSIGN_OR_RETURN(auto param_type,
@@ -166,7 +181,7 @@ Status HloDialectEmitter::HandleReduce(HloInstruction* reduce) {
     OpBuilder body_builder(block);
     body_builder.setInsertionPointToEnd(block);
     body_builder.create<hlo::ReturnOp>(getLocation(reduce),
-                                       ArrayRef<Value*>{result});
+                                       ArrayRef<Value>{result});
   }
   // TODO(b/137624192) Add support for multiple results.
   instruction_to_values_[reduce] = reduceOp.getResult(0);
@@ -180,7 +195,7 @@ Status HloDialectEmitter::HandleCompare(HloInstruction* compare) {
       "comparison_direction",
       builder_.getStringAttr(
           ComparisonDirectionToString(compare->comparison_direction())));
-  llvm::SmallVector<Value*, 4> arguments;
+  llvm::SmallVector<Value, 4> arguments;
   for (auto operand : compare->operands()) {
     arguments.push_back(instruction_to_values_[operand]);
   }
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.h b/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.h
index 86ed97b3c58..a1ec6d88644 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.h
@@ -20,10 +20,10 @@ limitations under the License.
 
 #include "absl/types/span.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -37,19 +37,19 @@ class HloDialectEmitter : public DfsHloVisitorWithDefault {
  public:
   HloDialectEmitter(xla::mlir_gpu::EmissionContext* emission_context,
                     ::mlir::Region* region,
-                    llvm::ArrayRef<::mlir::Value*> arguments)
+                    llvm::ArrayRef<::mlir::Value> arguments)
       : emission_context_(emission_context),
         builder_(region),
         arguments_(arguments) {}
 
   HloDialectEmitter(xla::mlir_gpu::EmissionContext* emission_context,
                     ::mlir::OpBuilder builder,
-                    llvm::ArrayRef<::mlir::Value*> arguments)
+                    llvm::ArrayRef<::mlir::Value> arguments)
       : emission_context_(emission_context),
         builder_(builder),
         arguments_(arguments) {}
 
-  StatusOr<mlir::Value*> EmitComputation(const HloComputation& computation);
+  StatusOr<mlir::Value> EmitComputation(const HloComputation& computation);
 
   Status DefaultAction(HloInstruction* instr) override;
   Status HandleBroadcast(HloInstruction* broadcast) override;
@@ -64,8 +64,8 @@ class HloDialectEmitter : public DfsHloVisitorWithDefault {
 
   xla::mlir_gpu::EmissionContext* emission_context_;
   ::mlir::OpBuilder builder_;
-  llvm::ArrayRef<::mlir::Value*> arguments_;
-  absl::flat_hash_map<const xla::HloInstruction*, ::mlir::Value*>
+  llvm::ArrayRef<::mlir::Value> arguments_;
+  absl::flat_hash_map<const xla::HloInstruction*, ::mlir::Value>
       instruction_to_values_;
 };
 
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/inject_errors_pass.h b/tensorflow/compiler/xla/service/mlir_gpu/inject_errors_pass.h
index 832d43ad562..1e0e41868ca 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/inject_errors_pass.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/inject_errors_pass.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_INJECT_ERRORS_PASS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_INJECT_ERRORS_PASS_H_
 
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
 
 namespace mlir {
 
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
index 186dacc06e6..c878c90ef2a 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
@@ -17,33 +17,32 @@ limitations under the License.
 
 #include <memory>
 
-#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
-#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"  // TF:local_config_mlir
-#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"  // TF:local_config_mlir
-#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h"  // TF:local_config_mlir
-#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"  // TF:local_config_mlir
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"  // TF:local_config_mlir
-#include "mlir/Dialect/GPU/GPUDialect.h"  // TF:local_config_mlir
-#include "mlir/Dialect/GPU/Passes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // TF:local_config_mlir
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/Linalg/Passes.h"  // TF:local_config_mlir
-#include "mlir/Dialect/LoopOps/LoopOps.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
-#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
-#include "mlir/IR/Region.h"  // TF:local_config_mlir
-#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
-#include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
-#include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"  // TF:llvm-project
+#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"  // TF:llvm-project
+#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h"  // TF:llvm-project
+#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"  // TF:llvm-project
+#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"  // TF:llvm-project
+#include "mlir/Dialect/GPU/GPUDialect.h"  // TF:llvm-project
+#include "mlir/Dialect/GPU/Passes.h"  // TF:llvm-project
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // TF:llvm-project
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"  // TF:llvm-project
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // TF:llvm-project
+#include "mlir/Dialect/Linalg/Passes.h"  // TF:llvm-project
+#include "mlir/Dialect/LoopOps/LoopOps.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/OperationSupport.h"  // TF:llvm-project
+#include "mlir/IR/PatternMatch.h"  // TF:llvm-project
+#include "mlir/IR/Region.h"  // TF:llvm-project
+#include "mlir/Pass/Pass.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
+#include "mlir/Transforms/Passes.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
 #include "tensorflow/compiler/mlir/xla/transforms/rewriters.h"
@@ -108,8 +107,8 @@ struct FusionOpRemover : public mlir::FunctionPass<FusionOpRemover> {
 struct SingleTripLoopRemoval
     : public mlir::FunctionPass<SingleTripLoopRemoval> {
   void runOnFunction() override {
-    auto getConstantValue = [](mlir::Value* value) -> llvm::Optional<int64_t> {
-      auto definingOp = value->getDefiningOp();
+    auto getConstantValue = [](mlir::Value value) -> llvm::Optional<int64_t> {
+      auto definingOp = value.getDefiningOp();
       if (!definingOp) return llvm::None;
       auto constantOp = llvm::dyn_cast<mlir::ConstantOp>(definingOp);
       if (!constantOp) return llvm::None;
@@ -145,7 +144,7 @@ struct SingleTripLoopRemoval
 // same address with the stored value. This needs generalization.
 struct StoreForwardingPass : mlir::FunctionPass<StoreForwardingPass> {
   void runOnFunction() override {
-    absl::flat_hash_map<mlir::Value*, mlir::Operation*> memrefToAllocOp;
+    llvm::DenseMap<mlir::Value, mlir::Operation*> memrefToAllocOp;
 
     getFunction().walk([&](mlir::LoadOp loadOp) {
       auto* block = loadOp.getOperation()->getBlock();
@@ -180,10 +179,10 @@ struct StoreForwardingPass : mlir::FunctionPass<StoreForwardingPass> {
 
   // Recursively checks defining ops until finds AllocOp. Return either AllocOp
   // if it is found or nullptr.
-  mlir::Operation* SearchAllocOp(mlir::Value* memref) {
-    mlir::Operation* defOp = memref->getDefiningOp();
+  mlir::Operation* SearchAllocOp(mlir::Value memref) {
+    mlir::Operation* defOp = memref.getDefiningOp();
     while (auto subviewOp = mlir::dyn_cast_or_null<mlir::SubViewOp>(defOp)) {
-      defOp = subviewOp.source()->getDefiningOp();
+      defOp = subviewOp.source().getDefiningOp();
     }
     if (auto allocOp = mlir::dyn_cast_or_null<mlir::AllocOp>(defOp)) {
       return allocOp.getOperation();
@@ -193,8 +192,8 @@ struct StoreForwardingPass : mlir::FunctionPass<StoreForwardingPass> {
 
   // Retrieves AllocOp from the cache or actually looks for it.
   mlir::Operation* GetAllocOp(
-      mlir::Value* memref,
-      absl::flat_hash_map<mlir::Value*, mlir::Operation*>* memrefToAllocOp) {
+      mlir::Value memref,
+      llvm::DenseMap<mlir::Value, mlir::Operation*>* memrefToAllocOp) {
     auto allocOpIt = memrefToAllocOp->find(memref);
     if (allocOpIt != memrefToAllocOp->end()) {
       return allocOpIt->second;
@@ -212,7 +211,7 @@ struct StoreForwardingPass : mlir::FunctionPass<StoreForwardingPass> {
 struct DeadTempBufferRemoval : mlir::FunctionPass<DeadTempBufferRemoval> {
   bool operationConsideredDead(mlir::Operation* op) {
     for (auto result : op->getResults()) {
-      if (!llvm::all_of(result->getUsers(), [&](mlir::Operation* op) {
+      if (!llvm::all_of(result.getUsers(), [&](mlir::Operation* op) {
             // Store and Dealloc is OK.
             if (llvm::isa<mlir::StoreOp>(op) ||
                 llvm::isa<mlir::DeallocOp>(op)) {
@@ -236,7 +235,7 @@ struct DeadTempBufferRemoval : mlir::FunctionPass<DeadTempBufferRemoval> {
 
   void recursiveErase(mlir::Operation* op) {
     for (auto result : op->getResults()) {
-      for (auto user : llvm::make_early_inc_range(result->getUsers())) {
+      for (auto user : llvm::make_early_inc_range(result.getUsers())) {
         recursiveErase(user);
       }
     }
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h
index 3d4cdf49461..027c3c93dca 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_
 
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/statusor.h"
 
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc b/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc
index fd38cd3bf5e..585223efa7b 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc
@@ -15,14 +15,14 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h"
 
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Identifier.h"  // TF:local_config_mlir
-#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
-#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Identifier.h"  // TF:llvm-project
+#include "mlir/IR/StandardTypes.h"  // TF:llvm-project
+#include "mlir/IR/Types.h"  // TF:llvm-project
 #include "tensorflow/compiler/mlir/xla/hlo_utils.h"
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 #include "tensorflow/compiler/xla/service/gpu/thunk.h"
@@ -59,15 +59,24 @@ namespace lhlo = ::mlir::xla_lhlo;
 
 // TODO(b/137624192) Use tablegen for this.
 Status InsertMlirOp(HloOpcode opcode, OpBuilder func_builder, Location loc,
-                    ArrayRef<Type> rets, ArrayRef<Value*> args,
+                    ArrayRef<Type> rets, ArrayRef<Value> args,
                     ArrayRef<std::pair<Identifier, Attribute>> attrs) {
   switch (opcode) {
+    case HloOpcode::kAbs:
+      func_builder.create<lhlo::AbsOp>(loc, rets, args, attrs);
+      break;
     case HloOpcode::kAdd:
       func_builder.create<lhlo::AddOp>(loc, rets, args, attrs);
       break;
     case HloOpcode::kAnd:
       func_builder.create<lhlo::AndOp>(loc, rets, args, attrs);
       break;
+    case HloOpcode::kCeil:
+      func_builder.create<lhlo::CeilOp>(loc, rets, args, attrs);
+      break;
+    case HloOpcode::kCos:
+      func_builder.create<lhlo::CosOp>(loc, rets, args, attrs);
+      break;
     case HloOpcode::kDivide:
       func_builder.create<lhlo::DivOp>(loc, rets, args, attrs);
       break;
@@ -83,12 +92,24 @@ Status InsertMlirOp(HloOpcode opcode, OpBuilder func_builder, Location loc,
     case HloOpcode::kMultiply:
       func_builder.create<lhlo::MulOp>(loc, rets, args, attrs);
       break;
+    case HloOpcode::kNegate:
+      func_builder.create<lhlo::NegOp>(loc, rets, args, attrs);
+      break;
+    case HloOpcode::kRemainder:
+      func_builder.create<lhlo::RemOp>(loc, rets, args, attrs);
+      break;
     case HloOpcode::kSelect:
       func_builder.create<lhlo::SelectOp>(loc, rets, args, attrs);
       break;
+    case HloOpcode::kSign:
+      func_builder.create<lhlo::SignOp>(loc, rets, args, attrs);
+      break;
     case HloOpcode::kSubtract:
       func_builder.create<lhlo::SubOp>(loc, rets, args, attrs);
       break;
+    case HloOpcode::kTanh:
+      func_builder.create<lhlo::TanhOp>(loc, rets, args, attrs);
+      break;
     default:
       return tensorflow::errors::Internal(absl::StrCat(
           "LHLO opcode ", HloOpcodeString(opcode), " is not supported."));
@@ -168,8 +189,8 @@ StatusOr<FuncOp> LhloDialectEmitter::CreateFunction(
 Status LhloDialectEmitter::DefaultAction(HloInstruction* instr) {
   TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
   OpBuilder func_builder(function.getBody());
-  llvm::SmallVector<Value*, 4> arg_values{function.args_begin(),
-                                          function.args_end()};
+  llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
+                                         function.args_end()};
   TF_RETURN_IF_ERROR(InsertMlirOp(instr->opcode(), func_builder,
                                   getLocation(instr), ArrayRef<Type>{},
                                   arg_values, llvm::None));
@@ -197,7 +218,7 @@ Status LhloDialectEmitter::HandleFusion(HloInstruction* fusion) {
   // Load the HLO argument tensors from the corresponding buffers. The last
   // argument is for the result, so no need to load it.
   OpBuilder body_builder(fusion_op.region());
-  llvm::SmallVector<Value*, 4> arg_values;
+  llvm::SmallVector<Value, 4> arg_values;
   for (int i = 0, e = function.getNumArguments() - 1; i < e; ++i) {
     arg_values.push_back(body_builder.create<::mlir::TensorLoadOp>(
         getLocation(fusion), function.getArgument(i)));
@@ -211,7 +232,7 @@ Status LhloDialectEmitter::HandleFusion(HloInstruction* fusion) {
   // Insert the write-back from the HLO computation to the result argument
   // buffer.
   body_builder.setInsertionPoint(fusion_op.region().back().getTerminator());
-  Value* result_memref = function.getArgument(function.getNumArguments() - 1);
+  Value result_memref = function.getArgument(function.getNumArguments() - 1);
   body_builder.create<::mlir::TensorStoreOp>(getLocation(fusion), result,
                                              result_memref);
 
@@ -220,8 +241,8 @@ Status LhloDialectEmitter::HandleFusion(HloInstruction* fusion) {
 
 Status LhloDialectEmitter::HandleReduce(HloInstruction* reduce) {
   TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*reduce));
-  llvm::SmallVector<Value*, 4> arg_values{function.args_begin(),
-                                          function.args_end()};
+  llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
+                                         function.args_end()};
   OpBuilder builder(function.getBody());
   auto loc = getLocation(reduce);
   int input_count = reduce->operand_count() / 3;
@@ -239,7 +260,7 @@ Status LhloDialectEmitter::HandleReduce(HloInstruction* reduce) {
   OpBuilder body_builder(reduce_op.body());
   auto block = body_builder.getInsertionBlock();
   auto to_apply = reduce->to_apply();
-  llvm::SmallVector<Value*, 4> reduce_arg_values;
+  llvm::SmallVector<Value, 4> reduce_arg_values;
   // First map parameters to memrefs on the operation.
   for (auto param : to_apply->parameter_instructions()) {
     TF_ASSIGN_OR_RETURN(auto arg_type, ConvertShapeToType<MemRefType>(
@@ -280,8 +301,8 @@ Status LhloDialectEmitter::HandleCompare(HloInstruction* compare) {
 
   TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*compare));
   OpBuilder func_builder(function.getBody());
-  llvm::SmallVector<Value*, 4> arg_values{function.args_begin(),
-                                          function.args_end()};
+  llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
+                                         function.args_end()};
   func_builder.create<lhlo::CompareOp>(getLocation(compare), llvm::None,
                                        arg_values, comparison_direction_attr);
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h b/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h
index 09d6fc3a5bb..48d275ef5e0 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h
@@ -19,10 +19,10 @@ limitations under the License.
 #include <memory>
 
 #include "absl/container/flat_hash_map.h"
-#include "mlir/IR/Builders.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/thunk_emitter.h"
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
index d332392ab2f..67ef9506fe2 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
@@ -18,17 +18,17 @@ limitations under the License.
 #include <memory>
 
 #include "absl/container/flat_hash_map.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"  // TF:local_config_mlir
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // TF:local_config_mlir
-#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
-#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
-#include "mlir/IR/Function.h"  // TF:local_config_mlir
-#include "mlir/IR/Location.h"  // TF:local_config_mlir
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/IR/Value.h"  // TF:local_config_mlir
-#include "mlir/Support/LLVM.h"  // TF:local_config_mlir
-#include "mlir/Target/NVVMIR.h"  // TF:local_config_mlir
+#include "mlir/Dialect/GPU/GPUDialect.h"  // TF:llvm-project
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // TF:llvm-project
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:llvm-project
+#include "mlir/IR/Attributes.h"  // TF:llvm-project
+#include "mlir/IR/Function.h"  // TF:llvm-project
+#include "mlir/IR/Location.h"  // TF:llvm-project
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/IR/Value.h"  // TF:llvm-project
+#include "mlir/Support/LLVM.h"  // TF:llvm-project
+#include "mlir/Target/NVVMIR.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/dump.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
@@ -197,23 +197,23 @@ static absl::optional<int64> getLaunchBound(const mlir::gpu::KernelDim3& dim) {
     op->emitError() << "bound " << name << " is not constant";
     return absl::nullopt;
   };
-  auto y_op = dim.y->getDefiningOp();
+  auto y_op = dim.y.getDefiningOp();
   auto dim_y = get_constant(y_op, "y");
   if (!dim_y.has_value() || dim_y.value() != 1) {
     y_op->emitError() << "bound 'y' is not constant 1";
     return absl::nullopt;
   }
-  auto z_op = dim.z->getDefiningOp();
+  auto z_op = dim.z.getDefiningOp();
   auto dim_z = get_constant(z_op, "z");
   if (!dim_z.has_value() || dim_z.value() != 1) {
     z_op->emitError() << "bound 'z' is not constant 1";
     return absl::nullopt;
   }
-  return get_constant(dim.x->getDefiningOp(), "x");
+  return get_constant(dim.x.getDefiningOp(), "x");
 }
 
 using OperandToValueMap =
-    absl::flat_hash_map<const HloInstruction*, std::vector<BlockArgument*>>;
+    absl::flat_hash_map<const HloInstruction*, std::vector<BlockArgument>>;
 
 static StatusOr<std::vector<const HloInstruction*>> ComputeOperandToValueMap(
     OperandToValueMap* operand_to_value_map, const HloInstruction* instr,
@@ -224,7 +224,7 @@ static StatusOr<std::vector<const HloInstruction*>> ComputeOperandToValueMap(
   for (int kernel_index = 0; kernel_index < launchOp.getNumKernelOperands();
        ++kernel_index) {
     auto launchop_operand =
-        dyn_cast<BlockArgument>(launchOp.getKernelOperand(kernel_index));
+        launchOp.getKernelOperand(kernel_index).dyn_cast<BlockArgument>();
     if (!launchop_operand) {
       launchOp.emitError("argument to kernel is not a function input");
       has_failed = true;
@@ -233,7 +233,7 @@ static StatusOr<std::vector<const HloInstruction*>> ComputeOperandToValueMap(
     // host_index is the argument position to the surrounding function that
     // contains the launch. This index corresponds to HLO operand indices
     // by construction.
-    auto host_index = launchop_operand->getArgNumber();
+    auto host_index = launchop_operand.getArgNumber();
     // The trailing argument to the outer function are the results.
     auto operand =
         (host_index < operands.size()) ? operands[host_index] : instr;
@@ -272,7 +272,7 @@ Status InsertBufferLoadPreduleIntoKernel(
   std::vector<mlir::Type> as_mlir_types(new_arg_types.begin(),
                                         new_arg_types.end());
   auto new_args = kernel.front().addArguments(as_mlir_types);
-  std::vector<Value*> buffer_args(new_args.begin(), new_args.end());
+  std::vector<Value> buffer_args(new_args.begin(), new_args.end());
 
   auto zero = builder.create<mlir::LLVM::ConstantOp>(
       loc, offset_type, builder.getI64IntegerAttr(0));
@@ -304,29 +304,27 @@ Status InsertBufferLoadPreduleIntoKernel(
       //   { baseptr, dataptr, offset, shape_vect, stride_vect }
       // where shape_vect and stride_vect are integer vectors with length
       // matching the rank of the tensor.
-      auto target_type = value->getType().cast<LLVMType>();
+      auto target_type = value.getType().cast<LLVMType>();
       auto struct_type = target_type.getPointerElementTy();
       auto descPtr =
           builder.create<mlir::LLVM::AllocaOp>(loc, target_type, one, 0);
       // Fill the base and aligned pointers.
       auto casted = builder.create<mlir::LLVM::BitcastOp>(
-          loc, struct_type.getStructElementType(0),
-          llvm::ArrayRef<Value*>{ptr});
+          loc, struct_type.getStructElementType(0), llvm::ArrayRef<Value>{ptr});
       auto structPtrAddr = builder.create<mlir::LLVM::GEPOp>(
           loc, struct_type.getStructElementType(0), descPtr,
-          llvm::ArrayRef<Value*>{zero, baseIndex});
+          llvm::ArrayRef<Value>{zero, baseIndex});
       builder.create<mlir::LLVM::StoreOp>(loc, casted, structPtrAddr);
       casted = builder.create<mlir::LLVM::BitcastOp>(
-          loc, struct_type.getStructElementType(1),
-          llvm::ArrayRef<Value*>{ptr});
+          loc, struct_type.getStructElementType(1), llvm::ArrayRef<Value>{ptr});
       structPtrAddr = builder.create<mlir::LLVM::GEPOp>(
           loc, struct_type.getStructElementType(1), descPtr,
-          llvm::ArrayRef<Value*>{zero, dataIndex});
+          llvm::ArrayRef<Value>{zero, dataIndex});
       builder.create<mlir::LLVM::StoreOp>(loc, casted, structPtrAddr);
       // Fill the offset value.
       auto structOffsetAddr = builder.create<mlir::LLVM::GEPOp>(
           loc, struct_type.getStructElementType(1), descPtr,
-          llvm::ArrayRef<Value*>{zero, offsetIndex});
+          llvm::ArrayRef<Value>{zero, offsetIndex});
       builder.create<mlir::LLVM::StoreOp>(loc, offset, structOffsetAddr);
       // Fill the shape.
       auto shape = operand->shape();
@@ -341,7 +339,7 @@ Status InsertBufferLoadPreduleIntoKernel(
               loc, offset_type, builder.getI64IntegerAttr(extent.index()));
           auto shapeEntryPtr = builder.create<mlir::LLVM::GEPOp>(
               loc, entry_type, descPtr,
-              llvm::ArrayRef<Value*>{zero, shapeIndex, index});
+              llvm::ArrayRef<Value>{zero, shapeIndex, index});
           auto extentValue = builder.create<mlir::LLVM::ConstantOp>(
               loc, entry_type, builder.getI64IntegerAttr(extent.value()));
           builder.create<mlir::LLVM::StoreOp>(loc, extentValue, shapeEntryPtr);
@@ -349,13 +347,13 @@ Status InsertBufferLoadPreduleIntoKernel(
         // Finally, fill the strides.
         // TODO(b/137624192): Take assigned layout into account.
         entry_type = struct_type.getStructElementType(4).getArrayElementType();
-        Value* accumulator = nullptr;
+        Value accumulator = nullptr;
         for (int64 idx = shape.rank() - 1; idx >= 0; --idx) {
           auto indexValue = builder.create<mlir::LLVM::ConstantOp>(
               loc, offset_type, builder.getI64IntegerAttr(idx));
           auto strideEntryPtr = builder.create<mlir::LLVM::GEPOp>(
               loc, entry_type, descPtr,
-              llvm::ArrayRef<Value*>{zero, strideIndex, indexValue});
+              llvm::ArrayRef<Value>{zero, strideIndex, indexValue});
           if (accumulator) {
             auto strideValue = builder.create<mlir::LLVM::ConstantOp>(
                 loc, entry_type,
@@ -369,7 +367,7 @@ Status InsertBufferLoadPreduleIntoKernel(
         }
       }
       // Now we can use the descriptor instead of the original argument.
-      value->replaceAllUsesWith(descPtr);
+      value.replaceAllUsesWith(descPtr);
     }
   }
 
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h
index d84b72cadcf..bb852b47f22 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h
@@ -17,8 +17,8 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_MLIR_COMPILER_H_
 
 #include "absl/container/flat_hash_map.h"
-#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
+#include "mlir/IR/MLIRContext.h"  // TF:llvm-project
+#include "mlir/IR/Module.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/service/mlir_gpu/emission_context.h"
 
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_irgen_test_base.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_irgen_test_base.cc
index da42e6462e2..dbc6efe9ec9 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_irgen_test_base.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_irgen_test_base.cc
@@ -22,8 +22,8 @@ limitations under the License.
 
 #include "absl/memory/memory.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/IR/Module.h"  // TF:local_config_mlir
-#include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
+#include "mlir/IR/Module.h"  // TF:llvm-project
+#include "mlir/Pass/PassManager.h"  // TF:llvm-project
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.h"
 #include "tensorflow/compiler/xla/service/mlir_gpu/inject_errors_pass.h"
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc b/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc
index 505d16d11cc..afcac65bdc7 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc
@@ -393,5 +393,104 @@ ENTRY %AddReduce (x: f32[100,10], c: f32[]) -> f32[100] {
       )");
 }
 
+TEST_F(LhloGenTest, Abs) {
+  CompileAndVerifyIr(R"(
+HloModule Abs
+ENTRY %Abs (val: f32[2,2]) -> f32[2,2] {
+  %val = f32[2,2]{1,0} parameter(0)
+  ROOT %abs = f32[2,2]{1,0} abs(f32[2,2]{1,0} %val)
+})",
+                     R"(
+;CHECK: func @abs(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
+;CHECK:   "xla_lhlo.abs"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
+;CHECK: }
+      )");
+}
+
+TEST_F(LhloGenTest, Ceil) {
+  CompileAndVerifyIr(R"(
+HloModule Ceil
+ENTRY %Ceil (val: f32[2,2]) -> f32[2,2] {
+  %val = f32[2,2]{1,0} parameter(0)
+  ROOT %ceil = f32[2,2]{1,0} ceil(f32[2,2]{1,0} %val)
+})",
+                     R"(
+;CHECK: func @ceil(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
+;CHECK:   "xla_lhlo.ceil"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
+;CHECK: }
+      )");
+}
+
+TEST_F(LhloGenTest, Cos) {
+  CompileAndVerifyIr(R"(
+HloModule Cos
+ENTRY %Cos (val: f32[2,2]) -> f32[2,2] {
+  %val = f32[2,2]{1,0} parameter(0)
+  ROOT %cos = f32[2,2]{1,0} cosine(f32[2,2]{1,0} %val)
+})",
+                     R"(
+;CHECK: func @cosine(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
+;CHECK:   "xla_lhlo.cos"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
+;CHECK: }
+      )");
+}
+
+TEST_F(LhloGenTest, Neg) {
+  CompileAndVerifyIr(R"(
+HloModule Neg
+ENTRY %Neg (val: f32[2,2]) -> f32[2,2] {
+  %val = f32[2,2]{1,0} parameter(0)
+  ROOT %neg = f32[2,2]{1,0} negate(f32[2,2]{1,0} %val)
+})",
+                     R"(
+;CHECK: func @negate(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
+;CHECK:   "xla_lhlo.neg"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
+;CHECK: }
+      )");
+}
+
+TEST_F(LhloGenTest, Rem) {
+  CompileAndVerifyIr(R"(
+HloModule Rem
+ENTRY %Rem(x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
+  %x = f32[2,2]{1,0} parameter(0)
+  %y = f32[2,2]{1,0} parameter(1)
+  ROOT %rem = f32[2,2]{1,0} remainder(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
+})",
+                     R"(
+;CHECK: func @remainder(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]) {
+;CHECK:   "xla_lhlo.remainder"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : ([[TYPE]], [[TYPE]], [[TYPE]]) -> ()
+;CHECK: }
+      )");
+}
+
+TEST_F(LhloGenTest, Sign) {
+  CompileAndVerifyIr(R"(
+HloModule Sign
+ENTRY %Sign (val: f32[2,2]) -> f32[2,2] {
+  %val = f32[2,2]{1,0} parameter(0)
+  ROOT %sign = f32[2,2]{1,0} sign(f32[2,2]{1,0} %val)
+})",
+                     R"(
+;CHECK: func @sign(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
+;CHECK:   "xla_lhlo.sign"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
+;CHECK: }
+      )");
+}
+
+TEST_F(LhloGenTest, Tanh) {
+  CompileAndVerifyIr(R"(
+HloModule Tanh
+ENTRY %Tanh (val: f32[2,2]) -> f32[2,2] {
+  %val = f32[2,2]{1,0} parameter(0)
+  ROOT %tanh = f32[2,2]{1,0} tanh(f32[2,2]{1,0} %val)
+})",
+                     R"(
+;CHECK: func @tanh(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
+;CHECK:   "xla_lhlo.tanh"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
+;CHECK: }
+      )");
+}
+
 }  // namespace mlir_gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index 41e2b0e9cb1..16e34331ac5 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -151,6 +151,37 @@ HloInstruction* MultiOutputFusion::Fuse(HloInstruction* instr1,
   return remaining;
 }
 
+HloInstruction* MultiOutputFusion::CreateFusion(HloInstruction* base,
+                                                HloInstruction* to_fuse) {
+  HloInstruction* input_fusion =
+      computation()->AddInstruction(HloInstruction::CreateFusion(
+          base->shape(), HloInstruction::FusionKind::kLoop, base));
+
+  // Update candidate_ and all_fusion_candidates_.
+  std::vector<std::pair<HloInstruction*, int64>> new_fusibles =
+      GetNewFusibles(base, to_fuse);
+  int64 index;
+  if (candidates_index_.contains(input_fusion)) {
+    index = candidates_index_[input_fusion];
+  } else {
+    index = candidates_.size();
+    InsertOrDie(&candidates_index_, input_fusion, index);
+    candidates_.emplace_back(input_fusion);
+    all_fusion_candidates_.push_back(input_fusion);
+  }
+
+  // Update the worklist_.
+  FusionCandidate& candidate_node = candidates_[index];
+  for (auto it : new_fusibles) {
+    candidate_node.fusibles.emplace_back(it.first, it.second);
+    worklist_.emplace(input_fusion, it.first, it.second);
+  }
+
+  reachability_->Replace(base, input_fusion);
+  TF_CHECK_OK(computation()->ReplaceInstruction(base, input_fusion));
+  return input_fusion;
+}
+
 bool MultiOutputFusion::IsProfitableOperand(HloInstruction* instr) {
   // kConstant instruction will not have memory reads, so it won't be a profit
   // source. Skip them.
@@ -167,29 +198,12 @@ bool MultiOutputFusion::IsProfitableOperand(HloInstruction* instr) {
   return true;
 }
 
-void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
-  HloInstruction* fusion = instr1;
-  HloInstruction* fused = instr2;
-  if (is_fused(instr1)) {
-    fusion = instr2;
-    fused = instr1;
-  }
-
-  // Insert the newly created instruction (if any), to candidates_.
-  for (auto use : fusion->users()) {
-    if (candidates_index_.find(use) == candidates_index_.end()) {
-      int64 index = candidates_.size();
-      candidates_.emplace_back(use);
-      InsertOrDie(&candidates_index_, use, index++);
-    }
-  }
+std::vector<std::pair<HloInstruction*, int64>>
+MultiOutputFusion::GetNewFusibles(HloInstruction* fusion,
+                                  HloInstruction* fused) {
   FusionCandidate& fusion_node = candidates_[get_candidate_id(fusion)];
   FusionCandidate& fused_node = candidates_[get_candidate_id(fused)];
 
-  // Update the reachability graph.
-  UpdateReachability(fusion, fused, all_fusion_candidates_,
-                     [this](HloInstruction* instr) { return is_fused(instr); });
-
   // Update the fusible list for fusion. Variable new_fusibles keeps
   // track of the new or changed entries.
   std::vector<std::pair<HloInstruction*, int64>> new_fusibles;
@@ -227,6 +241,33 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
   }
   fused_node.fusibles.clear();
 
+  return new_fusibles;
+}
+
+void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
+  HloInstruction* fusion = instr1;
+  HloInstruction* fused = instr2;
+  if (is_fused(instr1)) {
+    fusion = instr2;
+    fused = instr1;
+  }
+
+  // Insert the newly created instruction (if any), to candidates_.
+  for (auto use : fusion->users()) {
+    if (candidates_index_.find(use) == candidates_index_.end()) {
+      int64 index = candidates_.size();
+      candidates_.emplace_back(use);
+      InsertOrDie(&candidates_index_, use, index++);
+    }
+  }
+
+  // Update the reachability graph.
+  UpdateReachability(fusion, fused, all_fusion_candidates_,
+                     [this](HloInstruction* instr) { return is_fused(instr); });
+
+  std::vector<std::pair<HloInstruction*, int64>> new_fusibles =
+      GetNewFusibles(fusion, fused);
+
   // Update the worklist_.
   for (auto it : new_fusibles) {
     worklist_.emplace(fusion, it.first, it.second);
@@ -235,10 +276,15 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
 
 bool MultiOutputFusion::LegalToFuse(HloInstruction* instr1,
                                     HloInstruction* instr2) {
-  if (instr1 == instr2) {
+  if (instr1->opcode() != HloOpcode::kFusion) {
     return false;
   }
-  if (instr1->opcode() != HloOpcode::kFusion) {
+  return LegalToFuseMainConstraints(instr1, instr2);
+}
+
+bool MultiOutputFusion::LegalToFuseMainConstraints(HloInstruction* instr1,
+                                                   HloInstruction* instr2) {
+  if (instr1 == instr2) {
     return false;
   }
 
@@ -342,7 +388,12 @@ bool MultiOutputFusion::Perform() {
       }
       Update(instr1, instr2);
       HloInstruction* ret = Fuse(instr1, instr2);
-      set_is_fused(ret == instr1 ? instr2 : instr1);
+      if (ret != instr1) {
+        set_is_fused(instr1);
+      }
+      if (ret != instr2) {
+        set_is_fused(instr2);
+      }
       changed = true;
       VLOG(2) << "After fusion, \t this: " << ret->name() << "\n"
               << ret->fused_instructions_computation()->ToString(
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h
index 9be69f808c4..55cb15e94fc 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.h
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.h
@@ -79,6 +79,11 @@ class MultiOutputFusion : public HloModulePass {
   // Test if it's legal to fuse instr1 and instr2 into one fusion instruction.
   virtual bool LegalToFuse(HloInstruction* instr1, HloInstruction* instr2);
 
+  // Test if it's legal to fuse instr1 and instr2 into one fusion instruction
+  // using main constraints.
+  bool LegalToFuseMainConstraints(HloInstruction* instr1,
+                                  HloInstruction* instr2);
+
   // Fuse HloInstruction instr1 and instr2 and return the fused instruction.
   // The other instruction is removed from its parent computation.
   virtual HloInstruction* Fuse(HloInstruction* instr1, HloInstruction* instr2);
@@ -105,6 +110,17 @@ class MultiOutputFusion : public HloModulePass {
   // InstructionFusion instead.
   virtual bool DoProducerConsumerMultiOutputFusion();
 
+  // Return a list of new fusible instructions that can be fused into `fusion'
+  // fused with `fused'. The second entry in the vector is a profit value from
+  // fusing the corresponding instruction.
+  std::vector<std::pair<HloInstruction*, int64>> GetNewFusibles(
+      HloInstruction* fusion, HloInstruction* fused);
+
+  // Create a new fusion instruction and add `base' into it.
+  // Prepare for fusing `to_fuse' into the created fusion by updating
+  // reachability, worklist, and fusion candidates.
+  HloInstruction* CreateFusion(HloInstruction* base, HloInstruction* to_fuse);
+
  private:
   // An internal data structure for each instruction in current computation.
   // When an instruction is removed, member 'hlo' is set to nullptr.
diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index 32e4c636327..3a5f6da3b7c 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -73,7 +73,7 @@ namespace xla {
 //     - EqualTo
 //     - CompatibleTo
 //     - IsScalar/IsEffectiveScalar/IsArray/IsTuple
-//     - IsDenseArray/IsSparseArray
+//     - IsDenseArray
 //     - WithLayout: layout shape's layout matches the given pattern (e.g.
 //       Layout().WithDenseFormat())
 //     - WithLayoutEqualTo: shape's layout equals the argument (i.e. another
@@ -87,7 +87,7 @@ namespace xla {
 //
 //  Layout():
 //     - EqualTo
-//     - WithDenseFormat/WithSparseFormat
+//     - WithDenseFormat
 //
 // Op(), Shape(), and Layout() may be passed an argument of type
 // HloInstruction**, Shape**, or Layout**, respectively, or const versions of
@@ -506,12 +506,6 @@ class LayoutPattern {
     return AppendImpl(LayoutPatternFormatImpl(DENSE));
   }
 
-  // Modifies the pattern to match only if the layout has a sparse format.
-  constexpr auto WithSparseFormat() const
-      -> decltype(this->AppendImpl(LayoutPatternFormatImpl(SPARSE))) {
-    return AppendImpl(LayoutPatternFormatImpl(SPARSE));
-  }
-
  private:
   Impl impl_;
   LayoutType** matched_layout_;
@@ -1060,11 +1054,6 @@ class ShapePattern {
     return WithLayout(Layout().WithDenseFormat());
   }
 
-  constexpr auto IsSparseArray() const
-      -> decltype(this->WithLayout(Layout().WithSparseFormat())) {
-    return WithLayout(Layout().WithSparseFormat());
-  }
-
   // Modifies the pattern to match only if the shape has a subshape that matches
   // the given pattern.
   template <typename SubshapeType, typename SubshapeImpl>
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_gmock_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_gmock_test.cc
index f51a18b1389..a2ba8b888dc 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_gmock_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_gmock_test.cc
@@ -56,9 +56,6 @@ TEST(PatternMatcherGmock, MatchShape) {
 TEST(PatternMatcherGmock, MatchLayout) {
   Layout l = LayoutUtil::MakeLayout({0, 1});
   EXPECT_THAT(l, GmockMatch(m::Layout()));
-  EXPECT_THAT(&l, Not(GmockMatch(m::Layout().WithSparseFormat())));
-  EXPECT_THAT(Describe<Layout>(GmockMatch(m::Layout().WithSparseFormat())),
-              "a layout with format SPARSE");
 }
 
 TEST(PatternMatchGmock, MatchInstruction) {
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
index b923117318a..5e1287e5ddc 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
@@ -89,7 +89,6 @@ TEST_F(PatternMatcherTest, DenseArrayShape) {
   EXPECT_TRUE(Match(&array_shape, match::Shape(&matched_shape).IsArray()));
   EXPECT_EQ(matched_shape, &array_shape);
   EXPECT_TRUE(Match(&array_shape, match::Shape().IsDenseArray()));
-  EXPECT_FALSE(Match(&array_shape, match::Shape().IsSparseArray()));
   EXPECT_FALSE(Match(&array_shape, match::Shape().IsScalar()));
   EXPECT_FALSE(Match(&array_shape, match::Shape().IsTuple()));
   EXPECT_TRUE(Match(&array_shape, match::Shape().WithElementType(F32)));
@@ -97,38 +96,12 @@ TEST_F(PatternMatcherTest, DenseArrayShape) {
   EXPECT_FALSE(
       Match(&array_shape, match::Shape().WithSubshape({0}, match::Shape())));
   Layout* matched_layout;
-  EXPECT_FALSE(Match(&array_shape,
-                     match::Shape().WithLayout(
-                         match::Layout(&matched_layout).WithSparseFormat())));
   EXPECT_TRUE(Match(&array_shape,
                     match::Shape().WithLayout(
                         match::Layout(&matched_layout).WithDenseFormat())));
   EXPECT_EQ(matched_layout, &array_shape.layout());
 }
 
-TEST_F(PatternMatcherTest, SparseArrayShape) {
-  auto array_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {2, 3, 4}, 10);
-  Shape* matched_shape;
-  EXPECT_TRUE(Match(&array_shape, match::Shape(&matched_shape).IsArray()));
-  EXPECT_EQ(matched_shape, &array_shape);
-  EXPECT_FALSE(Match(&array_shape, match::Shape().IsDenseArray()));
-  EXPECT_TRUE(Match(&array_shape, match::Shape().IsSparseArray()));
-  EXPECT_FALSE(Match(&array_shape, match::Shape().IsScalar()));
-  EXPECT_FALSE(Match(&array_shape, match::Shape().IsTuple()));
-  EXPECT_TRUE(Match(&array_shape, match::Shape().WithElementType(F32)));
-  EXPECT_TRUE(Match(&array_shape, match::Shape().WithRank(3)));
-  EXPECT_FALSE(
-      Match(&array_shape, match::Shape().WithSubshape({0}, match::Shape())));
-  Layout* matched_layout;
-  EXPECT_FALSE(Match(&array_shape,
-                     match::Shape().WithLayout(
-                         match::Layout(&matched_layout).WithDenseFormat())));
-  EXPECT_TRUE(Match(&array_shape,
-                    match::Shape().WithLayout(
-                        match::Layout(&matched_layout).WithSparseFormat())));
-  EXPECT_EQ(matched_layout, &array_shape.layout());
-}
-
 TEST_F(PatternMatcherTest, TupleShape) {
   auto tuple_shape = ShapeUtil::MakeTupleShape({
       ShapeUtil::MakeShape(F32, {1, 2, 3}),
@@ -568,15 +541,6 @@ TEST_F(PatternMatcherTest, LayoutDescribeToAndExplain) {
   EXPECT_DESC_AND_EXPLANATION(layout2, m::Layout().EqualTo(&layout),
                               "a layout equal to {1,2}",
                               "Layout {2,2} is not equal to expected {1,2}");
-  EXPECT_DESC_AND_EXPLANATION(layout2, m::Layout().WithSparseFormat(),
-                              "a layout with format SPARSE",
-                              "Layout has format DENSE but expected SPARSE");
-  EXPECT_DESC_AND_EXPLANATION(layout,
-                              m::Layout().EqualTo(&layout).WithSparseFormat(),
-                              "a layout:\n"
-                              " * equal to {1,2} AND\n"
-                              " * with format SPARSE",
-                              "Layout has format DENSE but expected SPARSE");
 }
 
 TEST_F(PatternMatcherTest, CustomCallTargetMatcherDescribeAndExplain) {
@@ -665,11 +629,6 @@ TEST_F(PatternMatcherTest, ShapeDescribeToAndExplain) {
       "a shape with\n  a layout equal to {0,1}",
       "Layout {1,0} is not equal to expected {0,1}\n"
       "in f32[1,2]{1,0}");
-  EXPECT_DESC_AND_EXPLANATION(
-      shape, m::Shape().WithLayout(m::Layout().WithSparseFormat()),
-      "a shape with\n  a layout with format SPARSE",
-      "Layout has format DENSE but expected SPARSE\n"
-      "in f32[1,2]{0,1}");
   EXPECT_DESC_AND_EXPLANATION(shape,
                               m::Shape().WithSubshapeEqualTo({10}, &shape),
                               "a shape with subshape at index {10} which is\n"
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index ec6a97e928a..816047fcf5d 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -408,7 +408,18 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
   for (size_t i = 1; i < arg_shapes.size(); ++i) {
     new_dimensions[dimension] += arg_shapes[i]->dimensions(dimension);
   }
-  return ShapeUtil::MakeShape(element_type, new_dimensions);
+
+  Shape result = ShapeUtil::MakeShape(element_type, new_dimensions);
+
+  // Set dynamic dimensions if any input has dynamic dimension.
+  for (const Shape* shape : arg_shapes) {
+    for (int64 i = 0; i < shape->dimensions_size(); ++i) {
+      if (shape->is_dynamic_dimension(i)) {
+        result.set_dynamic_dimension(i, true);
+      }
+    }
+  }
+  return result;
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferConvertShape(
@@ -1720,7 +1731,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
   const int64 kernel_output_features =
       rhs.dimensions(dnums.kernel_output_feature_dimension());
 
-  if (batch_group_count > 1 && kernel_output_features != batch_group_count) {
+  if (batch_group_count > 1 &&
+      kernel_output_features % batch_group_count != 0) {
     return InvalidArgument(
         "Expected output feature dimension size (value %d) to be equal to "
         "batch group count %d; got <conv>(%s, %s)\n"
@@ -1759,7 +1771,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
         dnums.DebugString());
   }
 
-  if (input_batch % batch_group_count > 0) {
+  if (input_batch % batch_group_count != 0) {
     return InvalidArgument(
         "Expected input batch dimension (value %d) to be divisible by "
         "batch_group_count (value %d); "
@@ -1793,6 +1805,13 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
   std::vector<int64> dimensions(num_dims);
   dimensions[dnums.output_batch_dimension()] = input_batch / batch_group_count;
   dimensions[dnums.output_feature_dimension()] = kernel_output_features;
+
+  if (batch_group_count > 1) {
+    dimensions[dnums.output_batch_dimension()] =
+        kernel_output_features / batch_group_count;
+    dimensions[dnums.output_feature_dimension()] = batch_group_count;
+  }
+
   for (int i = 0; i < num_spatial_dims; ++i) {
     dimensions[dnums.output_spatial_dimensions(i)] =
         window_output_shape.dimensions(i);
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index b189e047254..41a54e81792 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -1592,6 +1592,20 @@ TEST_F(ShapeInferenceTest, WhileWithBadShapes) {
               HasSubstr("parameter of condition and body"));
 }
 
+// Tests for the concatenate instruction with dynamic shapes.
+TEST_F(ShapeInferenceTest, ConcatenateWithDynamicShapes) {
+  auto dynamic_shape_1 =
+      ShapeUtil::MakeShape(F32, {32, 160, 10}, {true, false, false});
+  auto dynamic_shape_2 =
+      ShapeUtil::MakeShape(F32, {32, 160, 10}, {false, true, false});
+  auto inferred_status = ShapeInference::InferConcatOpShape(
+      {&dynamic_shape_1, &dynamic_shape_2}, /*dimension=*/0);
+  ASSERT_IS_OK(inferred_status.status());
+  Shape inferred = inferred_status.ValueOrDie();
+  ASSERT_TRUE(ShapeUtil::Equal(
+      ShapeUtil::MakeShape(F32, {64, 160, 10}, {true, true, false}), inferred));
+}
+
 // Tests for the concatenate instruction with proper shapes.
 TEST_F(ShapeInferenceTest, ConcatenateWithCorrectShapes) {
   auto inferred_status_1 = ShapeInference::InferConcatOpShape(
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 484673b8b6b..146d03fa0c5 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -229,16 +229,6 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
   return MakeShapeWithLayout(element_type, dimensions, layout);
 }
 
-/* static */ Shape ShapeUtil::MakeShapeWithSparseLayout(
-    PrimitiveType element_type, absl::Span<const int64> dimensions,
-    int64 max_sparse_elements) {
-  CHECK(IsArrayPrimitiveType(element_type));
-  Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
-  *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
-  TF_DCHECK_OK(ShapeUtil::ValidateShape(shape));
-  return shape;
-}
-
 /* static */ Shape
 ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
     const Shape& shape) {
@@ -637,9 +627,6 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
     return ByteSizeOfTupleIndexTable(shape, pointer_size);
   } else if (shape.IsArray()) {
     int64 byte_size = ByteSizeOfElements(shape);
-    if (LayoutUtil::IsSparseArray(shape)) {
-      byte_size += ByteSizeOfSparseIndices(shape);
-    }
     return byte_size;
   } else if (shape.element_type() == TOKEN) {
     return 0;
@@ -664,23 +651,12 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
   CHECK(shape.IsArray());
   int64 allocated_element_count;
 
-  if (LayoutUtil::IsSparseArray(shape)) {
-    allocated_element_count = LayoutUtil::MaxSparseElements(shape.layout());
-  } else {
-    CHECK(LayoutUtil::IsDenseArray(shape)) << shape.ShortDebugString();
-    allocated_element_count = ElementsIn(shape);
-  }
+  CHECK(LayoutUtil::IsDenseArray(shape)) << shape.ShortDebugString();
+  allocated_element_count = ElementsIn(shape);
   return allocated_element_count *
          ByteSizeOfPrimitiveType(shape.element_type());
 }
 
-/* static */ int64 ShapeUtil::ByteSizeOfSparseIndices(const Shape& shape) {
-  TF_DCHECK_OK(ValidateShape(shape));
-  CHECK(LayoutUtil::IsSparseArray(shape));
-  return LayoutUtil::MaxSparseElements(shape.layout()) * shape.rank() *
-         sizeof(int64);
-}
-
 /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal(
     const Shape& shape) {
   if (shape.element_type() == PRIMITIVE_TYPE_INVALID ||
@@ -721,9 +697,6 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
     return Status::OK();
   }
 
-  if (LayoutUtil::IsSparseArray(shape) && shape.rank() == 0) {
-    return InvalidArgument("sparse arrays must have rank > 0");
-  }
   for (int64 i = 0; i < shape.rank(); ++i) {
     int64 dimension = shape.dimensions(i);
     if (dimension < 0) {
@@ -744,43 +717,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
     return Status::OK();
   }
 
-  // We can only reason about some aspects of array's shape if it has a valid
-  // layout, these aspects will be ignored otherwise.
-  bool shape_has_valid_layout = LayoutUtil::HasLayout(shape) &&
-                                LayoutUtil::ValidateLayoutInShape(shape).ok();
-
   int64 shape_size = [&]() {
-    if (shape_has_valid_layout && LayoutUtil::IsSparseArray(shape)) {
-      int64 max_sparse_elements = LayoutUtil::MaxSparseElements(shape.layout());
-      if (max_sparse_elements < 0) {
-        return max_sparse_elements;
-      }
-      int64 sparse_elements_size = MultiplyWithoutOverflow(
-          max_sparse_elements, ByteSizeOfPrimitiveType(shape.element_type()));
-      if (sparse_elements_size < 0) {
-        return sparse_elements_size;
-      }
-      int64 sparse_indices_size =
-          MultiplyWithoutOverflow(max_sparse_elements, shape.rank());
-      if (sparse_indices_size < 0) {
-        return sparse_indices_size;
-      }
-      sparse_indices_size =
-          MultiplyWithoutOverflow(sparse_indices_size, sizeof(int64));
-      if (sparse_indices_size < 0) {
-        return sparse_indices_size;
-      }
-      // At this point, both sparse_indices_size and sparse_elements_size are
-      // non-negative, so we can easily check if adding them wraps.
-      if (static_cast<uint64>(sparse_elements_size) +
-              static_cast<uint64>(sparse_indices_size) >
-          INT64_MAX) {
-        return static_cast<int64>(-1);
-      }
-    }
-
-    // This is intentionally unconditional: even if the shape is sparse, we want
-    // to verify the densified version has a reasonable size.
     int64 dense_shape_size = 1;
     if (shape.dimensions().empty()) {
       return dense_shape_size;
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 769094b1f0b..7e05e17865d 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -192,10 +192,7 @@ class ShapeUtil {
   };
 
   // Returns the number of elements are contained within the provided shape;
-  // e.g. for rank 0 (scalars) the result is always 1. Note that sparse shapes
-  // may not actually be able to store this number of elements. See
-  // LayoutUtil::MaxSparseElements(shape) to obtain the maximum number of
-  // elements that can be stored in a sparse shape.
+  // e.g. for rank 0 (scalars) the result is always 1.
   // Precondition: shape.IsArray()
   static int64 ElementsIn(const Shape& shape);
 
@@ -228,20 +225,12 @@ class ShapeUtil {
                                          int64 pointer_size);
 
   // Returns the number of bytes required for the elements in an allocation of
-  // `shape`, which must be an array shape. The return value does not include
-  // the bytes needed to store sparse indices. Dense shapes use a separate
+  // `shape`, which must be an array shape. Shapes use a separate
   // memory location for each element, and so for these shapes,
-  // `ByteSizeOf(shape) == ByteSizeOfElements(shape)`. For dense shapes, this
-  // size also includes padding if present in the layout. For sparse shapes,
-  // `ByteSizeOf(shape) == ByteSizeOfElements(shape) +
-  // ByteSizeOfSparseindices(shape)`.
+  // `ByteSizeOf(shape) == ByteSizeOfElements(shape)`. This
+  // size also includes padding if present in the layout.
   static int64 ByteSizeOfElements(const Shape& shape);
 
-  // Returns the number of bytes required for the sparse indices in an
-  // allocation of shape. The shape must be an array shape. The return value
-  // does not include the bytes needed to store sparse indices.
-  static int64 ByteSizeOfSparseIndices(const Shape& shape);
-
   // Returns a human-readable string that represents the given shape, with or
   // without layout. e.g. "f32[42x12] {0, 1}" or "f32[64]".
   static string HumanString(const Shape& shape);
@@ -427,9 +416,6 @@ class ShapeUtil {
                                    int64 element_size_in_bits = 0,
                                    int64 memory_space = 0);
 
-  static Shape MakeShapeWithSparseLayout(PrimitiveType element_type,
-                                         absl::Span<const int64> dimensions,
-                                         int64 max_sparse_elements);
   // Returns the same shape except with all dimensions set to be static.
   static Shape MakeShapeWithStaticDimensions(const Shape& shape);
 
diff --git a/tensorflow/compiler/xla/sparse_index_array.cc b/tensorflow/compiler/xla/sparse_index_array.cc
deleted file mode 100644
index 82091bdee65..00000000000
--- a/tensorflow/compiler/xla/sparse_index_array.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/sparse_index_array.h"
-
-#include "tensorflow/compiler/xla/index_util.h"
-#include "tensorflow/compiler/xla/layout_util.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-
-namespace xla {
-
-SparseIndexArray::SparseIndexArray() : rank_(0), max_indices_(0) {}
-
-SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
-                                   std::vector<int64> indices)
-    : indices_(std::move(indices)), rank_(rank), max_indices_(max_indices) {
-  CHECK_GT(rank_, 0);
-  CHECK_EQ(indices_.size() % rank_, 0)
-      << "indices_.size(): " << indices_.size() << ", rank_: " << rank_;
-  CHECK_LE(index_count(), max_indices_);
-}
-
-SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
-                                   absl::Span<const int64> indices)
-    : SparseIndexArray(max_indices, rank,
-                       std::vector<int64>(indices.begin(), indices.end())) {}
-
-SparseIndexArray::SparseIndexArray(int64 max_indices,
-                                   const Array2D<int64>& indices)
-    : SparseIndexArray(max_indices, indices.n2(),
-                       std::vector<int64>(indices.begin(), indices.end())) {}
-
-int64 SparseIndexArray::index_count() const {
-  CHECK_GT(rank_, 0);
-  CHECK_EQ(indices_.size() % rank_, 0);
-  return indices_.size() / rank_;
-}
-
-absl::Span<const int64> SparseIndexArray::At(
-    int64 sparse_element_number) const {
-  CHECK_GT(rank_, 0);
-  CHECK_GE(sparse_element_number, 0);
-  CHECK_LE(rank_ * sparse_element_number + rank_, indices_.size());
-  return absl::Span<const int64>(
-      indices_.data() + rank_ * sparse_element_number, rank_);
-}
-
-absl::Span<int64> SparseIndexArray::At(int64 sparse_element_number) {
-  CHECK_GT(rank_, 0);
-  CHECK_GE(sparse_element_number, 0);
-  CHECK_LE(rank_ * sparse_element_number + rank_, indices_.size());
-  return absl::Span<int64>(indices_.data() + rank_ * sparse_element_number,
-                           rank_);
-}
-
-void SparseIndexArray::Append(absl::Span<const int64> index) {
-  CHECK_GT(rank_, 0);
-  CHECK_EQ(index.size(), rank_);
-  indices_.insert(indices_.end(), index.begin(), index.end());
-}
-
-void SparseIndexArray::Clear() { indices_.clear(); }
-
-void SparseIndexArray::Resize(int64 num_indices) {
-  CHECK_GT(rank_, 0);
-  indices_.resize(rank_ * num_indices);
-}
-
-bool SparseIndexArray::Validate(const Shape& shape) const {
-  if (rank_ == 0 || rank_ != shape.rank()) {
-    return false;
-  }
-  int64 num_indices = index_count();
-  if (num_indices > LayoutUtil::MaxSparseElements(shape.layout())) {
-    return false;
-  }
-  if (num_indices < 2) {
-    return true;
-  }
-  absl::Span<const int64> last = At(0);
-  if (!IndexUtil::IndexInBounds(shape, last)) {
-    return false;
-  }
-  for (int64 n = 1; n < num_indices; ++n) {
-    absl::Span<const int64> next = At(n);
-    if (!IndexUtil::IndexInBounds(shape, next)) {
-      return false;
-    }
-    if (IndexUtil::CompareIndices(last, next) >= 0) {
-      return false;
-    }
-    last = next;
-  }
-  return true;
-}
-
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/sparse_index_array.h b/tensorflow/compiler/xla/sparse_index_array.h
deleted file mode 100644
index 0c25355467d..00000000000
--- a/tensorflow/compiler/xla/sparse_index_array.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Utility class for managing sparse array indices.
-
-#ifndef TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
-#define TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
-
-#include <vector>
-
-#include "absl/container/inlined_vector.h"
-#include "absl/types/span.h"
-#include "tensorflow/compiler/xla/array2d.h"
-#include "tensorflow/compiler/xla/index_util.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
-
-namespace xla {
-
-// Encapsulates the array of indices for a sparse array.  A SparseIndexArray
-// contain indices for up to `max_indices` elements of a sparse array.  Each
-// sparse index is an array of `rank` int64 value that gives the location of a
-// value within a sparse array.  Note that the dimensions of the array are not
-// checked (except for the rank).  To avoid confusion, we refer to the position
-// of an index within a SparseIndexArray as a sparse index number.
-class SparseIndexArray {
- public:
-  SparseIndexArray();
-  SparseIndexArray(const SparseIndexArray&) = default;
-  SparseIndexArray(SparseIndexArray&&) = default;
-  SparseIndexArray& operator=(const SparseIndexArray&) = default;
-  SparseIndexArray& operator=(SparseIndexArray&&) = default;
-
-  // Constructs a SparseIndexArray that can hold up to `max_indices` sparse
-  // indices, with an initial contents obtained from the given array.  The rank
-  // is taken from the minor dimension of the array.  The major dimension of the
-  // array must not exceed `max_indices`.
-  SparseIndexArray(int64 max_indices, const Array2D<int64>& indices);
-
-  // Like above, but the array is flattened.  For example, the following are
-  // equivalent:
-  //
-  //  SparseIndexArray(10, 3,
-  //                   Array2D{
-  //                     {0, 1, 2},
-  //                     {3, 4, 5},
-  //                     {6, 7, 8},
-  //                     {9, 10, 11},
-  //                   })
-  //
-  //  SparseIndexArray(10, 3,
-  //                   {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
-  //
-  SparseIndexArray(int64 max_indices, int64 rank,
-                   std::vector<int64> indices = {});
-  SparseIndexArray(int64 max_indices, int64 rank,
-                   absl::Span<const int64> indices);
-
-  // Returns the number of elements represented by the indices stored in the
-  // array.
-  int64 index_count() const;
-
-  // Returns a slice that refers to the given sparse index number. The argument
-  // must be in the range [0, element_count()).
-  absl::Span<const int64> At(int64 sparse_element_number) const;
-  absl::Span<int64> At(int64 sparse_element_number);
-
-  // Adds the given index at the end of the array.  The new size of the
-  // SparseIndexArray must not exceed `max_indices`.
-  void Append(absl::Span<const int64> index);
-
-  // Removes all indices from the array.
-  void Clear();
-
-  // Resizes the array to contain the given number of sparse indices.  The new
-  // size must be smaller than `max_indices`.  If the new size is larger than
-  // the old size, the value of the new indices is not specified.
-  void Resize(int64 num_indices);
-
-  // Returns true iff all indices are unique and occur in sorted order, and are
-  // valid for the given shape.
-  bool Validate(const Shape& shape) const;
-
-  int64 rank() const { return rank_; }
-  int64 max_indices() const { return max_indices_; }
-
-  // Returns a pointer to the int64 array that holds the sparse indices.
-  absl::Span<int64> mutable_data() { return absl::MakeSpan(indices_); }
-  absl::Span<const int64> data() const { return indices_; }
-
-  // Sorts this sparse index array along with the set of corresponding values.
-  // The indices and values are sorted in the lexicographic order of the
-  // indices, from smallest to largest.
-  //
-  // For example:
-  //
-  //   std::vector<float> v{10.0, 11.0, 12.0};
-  //   SparseIndexArray a(10, 3,
-  //                      {{3, 4, 5},
-  //                       {1, 2, 3},
-  //                       {2, 3, 4}});
-  //   a.SortWithValues(&v);
-  //   // Prints "11.0, 12.0, 10.0":
-  //   std::cout << v[0] << ", " << v[1] << ", " << v[2] << std::endl;
-  //
-  template <typename NativeT>
-  void SortWithValues(absl::Span<NativeT> values);
-
- private:
-  std::vector<int64> indices_;
-  int64 rank_;
-  int64 max_indices_;
-};
-
-template <typename NativeT>
-void SparseIndexArray::SortWithValues(absl::Span<NativeT> values) {
-  int64 num_elements = index_count();
-  CHECK_EQ(values.size(), num_elements);
-  std::vector<int64> sort_order;
-  sort_order.reserve(num_elements);
-  for (int64 i = 0; i < num_elements; ++i) {
-    sort_order.push_back(i);
-  }
-  auto sort_order_less = [this](int64 lhs, int64 rhs) {
-    return IndexUtil::CompareIndices(At(lhs), At(rhs)) < 0;
-  };
-  absl::c_sort(sort_order, sort_order_less);
-
-  // Reorder the array elements according to sort_order.  Work through the array
-  // and follow cycles so we can do the reorder in-place.
-  absl::InlinedVector<int64, 8> saved_index(rank());
-  for (int64 i = 0; i < num_elements; ++i) {
-    // sort_order[i] == -1 indicates the element has already been copied.
-    if (sort_order[i] < 0) {
-      continue;
-    } else if (i == sort_order[i]) {
-      // The element is already in sorted order.
-      sort_order[i] = -1;
-      continue;
-    }
-
-    std::copy_n(At(i).begin(), rank(), saved_index.begin());
-    NativeT saved_value = values[i];
-    int64 j = i;
-    for (;;) {
-      if (sort_order[j] == i) {
-        std::copy_n(saved_index.begin(), rank(), At(j).begin());
-        values[j] = saved_value;
-        sort_order[j] = -1;
-        break;
-      }
-
-      std::copy_n(At(sort_order[j]).begin(), rank(), At(j).begin());
-      values[j] = values[sort_order[j]];
-
-      int64 k = sort_order[j];
-      sort_order[j] = -1;
-      j = k;
-    }
-  }
-}
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
diff --git a/tensorflow/compiler/xla/sparse_index_array_test.cc b/tensorflow/compiler/xla/sparse_index_array_test.cc
deleted file mode 100644
index e54057c4007..00000000000
--- a/tensorflow/compiler/xla/sparse_index_array_test.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/sparse_index_array.h"
-
-#include <vector>
-
-#include "tensorflow/compiler/xla/test.h"
-
-namespace xla {
-namespace {
-
-TEST(SparseIndexArrayTest, Sort) {
-  SparseIndexArray a(10, 3);
-  a.Append({2, 3, 4});
-  a.Append({3, 4, 5});
-  a.Append({1, 2, 3});
-  a.Append({5, 6, 7});
-  a.Append({4, 5, 6});
-  a.Append({6, 7, 8});
-  std::vector<double> values = {
-      12.0, 13.0, 11.0, 15.0, 14.0, 16.0,
-  };
-  a.SortWithValues<double>(absl::MakeSpan(values));
-  ASSERT_EQ(a.data(), std::vector<int64>({1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6, 5,
-                                          6, 7, 6, 7, 8}));
-  ASSERT_EQ(values, std::vector<double>({11.0, 12.0, 13.0, 14.0, 15.0, 16.0}));
-}
-
-}  // namespace
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 0a0eaa190ee..b2cc8050c42 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -175,7 +175,7 @@ tf_cc_binary(
         "//tensorflow/compiler/xla/service/cpu:cpu_compiler",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -255,7 +255,7 @@ cc_library(
     srcs = ["filecheck.cc"],
     hdrs = ["filecheck.h"],
     data = [
-        "@llvm//:FileCheck",
+        "@llvm-project//llvm:FileCheck",
     ],
     deps = [
         "//tensorflow/compiler/xla:statusor",
@@ -2136,7 +2136,7 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/stream_executor",
         "@com_google_absl//absl/memory",
-        "@llvm//:core",
+        "@llvm-project//llvm:core",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
old mode 100644
new mode 100755
index 17e37607be1..07465885a69
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -364,7 +364,6 @@ StatusOr<::testing::AssertionResult> HloTestBase::RunAndCompareInternal(
     instruction->set_raw_backend_config_string(backend_config);
   }
 
-  // return ::testing::AssertionSuccess();
   auto output = test_runner_.Execute(std::move(module), fake_argument_ptrs,
                                      /*run_hlo_passes=*/run_hlo_passes,
                                      /*profile=*/profile);
@@ -501,6 +500,19 @@ HloInstruction* HloTestBase::FindInstruction(HloModule* module,
   return nullptr;
 }
 
+HloInstruction* HloTestBase::FindInstruction(HloModule* module,
+                                             HloOpcode opcode) {
+  for (const HloComputation* c : module->computations()) {
+    auto instructions = c->instructions();
+    auto it = absl::c_find_if(
+        instructions, [&](HloInstruction* i) { return i->opcode() == opcode; });
+    if (it != instructions.end()) {
+      return *it;
+    }
+  }
+  return nullptr;
+}
+
 Backend& HloTestBase::backend() { return test_runner_.backend(); }
 
 /* static */
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
old mode 100644
new mode 100755
index 848b334cfec..45917f39b6c
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h
@@ -274,6 +274,8 @@ class HloTestBase : public ::testing::Test {
   // inspect a particular computation or instruction.
   HloComputation* FindComputation(HloModule* module, absl::string_view name);
   HloInstruction* FindInstruction(HloModule* module, absl::string_view name);
+  // Gets the instruction from the given module with the given opcode.
+  HloInstruction* FindInstruction(HloModule* module, HloOpcode opcode);
 
   // Return an HLO verifier constructed for the test backend.
   HloVerifier& verifier() const { return *hlo_verifier_; }
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 4563d7e0df2..76488917257 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -218,6 +218,23 @@ void PopulateWithFloatingPointData<bfloat16>(Literal* literal,
   }
 }
 
+// uniform_int_distribution is not defined for 8-bit integers.
+// Use 'short' for those types.
+template <typename IntT>
+struct RngT {
+  using type = IntT;
+};
+
+template <>
+struct RngT<int8> {
+  using type = int16;
+};
+
+template <>
+struct RngT<uint8> {
+  using type = uint16;
+};
+
 template <typename IntT>
 void PopulateWithRandomIntegralData(Literal* literal, std::minstd_rand0* engine,
                                     bool no_duplicates) {
@@ -230,7 +247,7 @@ void PopulateWithRandomIntegralData(Literal* literal, std::minstd_rand0* engine,
     std::shuffle(literal->data<IntT>().begin(), literal->data<IntT>().end(),
                  *engine);
   } else {
-    std::uniform_int_distribution<IntT> generator(
+    std::uniform_int_distribution<typename RngT<IntT>::type> generator(
         std::numeric_limits<IntT>::lowest(), std::numeric_limits<IntT>::max());
     for (IntT& value : literal->data<IntT>()) {
       value = generator(*engine);
@@ -324,9 +341,6 @@ StatusOr<Literal> MakeFakeLiteralInternal(const Shape& shape,
           }));
       break;
     }
-    // Token requires no data.
-    case TOKEN:
-      break;
     default:
       return Unimplemented("Unsupported type for fake literal generation: %s",
                            ShapeUtil::HumanString(shape));
@@ -341,7 +355,7 @@ void PopulateWithRandomIntegralDataWithBounds(Literal* literal,
   CHECK(engine != nullptr);
   CHECK_EQ(literal->shape().element_type(),
            primitive_util::NativeToPrimitiveType<IntT>());
-  std::uniform_int_distribution<IntT> generator(min, max);
+  std::uniform_int_distribution<typename RngT<IntT>::type> generator(min, max);
   for (IntT& value : literal->data<IntT>()) {
     value = generator(*engine);
   }
diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc
index 9db08a5b72f..2a0d98ad1f1 100644
--- a/tensorflow/compiler/xla/tests/test_utils_test.cc
+++ b/tensorflow/compiler/xla/tests/test_utils_test.cc
@@ -56,24 +56,6 @@ XLA_TEST_F(TestUtilsTest, UnusedParam) {
   TF_ASSERT_OK(MakeFakeArguments(&module).status());
 }
 
-XLA_TEST_F(TestUtilsTest, Token) {
-  auto module = ParseAndReturnUnverifiedModule(
-                    R"(HloModule outfeed_module
-
-    ENTRY InfeedToOutfeed {
-      token0 = token[] parameter(0)
-      infeed = ((u32[3]{0}, pred[]), token[]) infeed(token0)
-      infeed.data = (u32[3]{0}, pred[]) get-tuple-element(infeed), index=0
-      outfeed = token[] outfeed(infeed.data, token0)
-      ROOT infeed.1 = ((u32[3]{0}, pred[]), token[]) infeed(token0)
-      infeed.1.data = (u32[3]{0}, pred[]) get-tuple-element(infeed.1), index=0
-      infeed.1.token = token[] get-tuple-element(infeed.1), index=1
-      outfeed.1 = token[] outfeed(infeed.1.data, infeed.1.token)
-    })")
-                    .ValueOrDie();
-  TF_ASSERT_OK(MakeFakeArguments(module.get()).status());
-}
-
 XLA_TEST_F(TestUtilsTest, MultipleIndexSpacesForDynamicSlices) {
   auto module = ParseAndReturnVerifiedModule(
                     R"(HloModule index_space_module
diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD
index 603e94ca938..db819c308ce 100644
--- a/tensorflow/compiler/xla/tools/BUILD
+++ b/tensorflow/compiler/xla/tools/BUILD
@@ -206,6 +206,7 @@ tf_cc_test(
         ":hlo_extractor",
         "//tensorflow/compiler/xla/service:hlo_matchers",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
diff --git a/tensorflow/compiler/xla/tools/driver.cc b/tensorflow/compiler/xla/tools/driver.cc
index 4b3ed2b58b7..8949843b67b 100644
--- a/tensorflow/compiler/xla/tools/driver.cc
+++ b/tensorflow/compiler/xla/tools/driver.cc
@@ -365,6 +365,9 @@ void Fill(void* buffer, ArrayShape shape) {
 }
 
 template <typename T>
+#if defined(MEMORY_SANITIZER)
+__attribute__((no_sanitize_memory))
+#endif
 void DisplayT(void* buffer, int num_elements) {
   T* casted = static_cast<T*>(buffer);
   for (int i = 0; i < num_elements; i++) {
diff --git a/tensorflow/compiler/xla/tools/hlo_module_loader.cc b/tensorflow/compiler/xla/tools/hlo_module_loader.cc
index 8eb170b25e5..0b16c877964 100644
--- a/tensorflow/compiler/xla/tools/hlo_module_loader.cc
+++ b/tensorflow/compiler/xla/tools/hlo_module_loader.cc
@@ -86,8 +86,8 @@ StatusOr<std::unique_ptr<HloModule>> LoadModuleFromData(
         return InvalidArgument("Failed to parse input as HLO protobuf binary");
       }
     } else if (format == "pbtxt") {
-      if (!proto2::TextFormat::ParseFromString(data, &proto) &&
-          !proto2::TextFormat::ParseFromString(data, proto.mutable_hlo())) {
+      if (!google::protobuf::TextFormat::ParseFromString(data, &proto) &&
+          !google::protobuf::TextFormat::ParseFromString(data, proto.mutable_hlo())) {
         return InvalidArgument("Failed to parse input as HLO protobuf text");
       }
     } else {
diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index 095655085e5..639f91b8b53 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -349,7 +349,7 @@ StatusOr<std::vector<HloSnapshot>> ParseRecordIoFile(absl::string_view filename,
   tensorflow::tstring record;
   while (reader.ReadRecord(&offset, &record).ok()) {
     HloSnapshot snapshot;
-    if (snapshot.mutable_hlo()->ParseFromStringPiece(record)) {
+    if (snapshot.mutable_hlo()->ParseFromString(record)) {
       snapshots.push_back(std::move(snapshot));
     } else {
       LOG(ERROR) << "Encountered bad proto";
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index b0b97f1eb45..5a3da69f9fc 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -115,9 +115,8 @@ enum Format {
   INVALID_FORMAT = 0;
   // The default layout, with exactly one storage location per element.
   DENSE = 1;
-  // A sparsely encoded layout, providing only the index/value pairs of non-zero
-  // elements.
-  SPARSE = 2;
+  reserved 2;
+  reserved "SPARSE";
 }
 
 // Describes a tile used in tiling-based layout. Refer to
@@ -156,10 +155,8 @@ message LayoutProto {
   reserved 3;
   reserved "padding_value";
 
-  // The maximum number of elements that can be stored for SPARSE formats.  This
-  // can be used to determine the maximum size in bytes of arrays stored in
-  // memory.  This field must be unset unless the format is SPARSE.
-  int64 max_sparse_elements = 5;
+  reserved 5;
+  reserved "max_sparse_elements";
 
   // A sequence of tiles, starting from the tile that's applied first to the
   // Shape.
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 29cb438473a..fbdcb4d65c8 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -79,7 +79,6 @@ load(
     "tf_cc_tests",
     "tf_copts",
     "tf_cuda_library",
-    "tf_cuda_only_cc_test",
     "tf_features_nomodules_if_android",
     "tf_features_nomodules_if_emscripten",
     "tf_gen_op_libs",
@@ -89,15 +88,29 @@ load(
     "tf_opts_nortti_if_emscripten",
     "transitive_hdrs",
 )
+
+# buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "if_nccl")
+
+# buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "tensorflow_opensource_extra_deps")
 
+# buildifier: disable=same-origin-load
 # load("//tensorflow:tensorflow.bzl", "tf_android_full_lite_protos")
+
+# buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu")
+
+# buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "tf_cc_tests_gpu")
+
+# buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
 
+# buildifier: disable=same-origin-load
 # Placeholder: load("//tensorflow:tensorflow.bzl", "tf_portable_proto_lib")
+
+# buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "tf_portable_proto_library")
 
 # For platform specific build config
@@ -311,20 +324,15 @@ cc_library(
         "//tensorflow/core/platform:threadpool_interface",
         "//tensorflow/core/platform:threadpool_options",
         "//tensorflow/core/platform:types",
-        "//tensorflow/core/platform/default/build_config:base",
         "@com_google_absl//absl/base",
         "@com_google_absl//absl/strings",
     ],
 )
 
-cc_library(
+alias(
     name = "framework_bounds_check",
-    hdrs = ["//tensorflow/core/framework:bounds_check.h"],
+    actual = "//tensorflow/core/framework:bounds_check",
     visibility = ["//tensorflow/core/kernels:friends"],
-    deps = [
-        ":platform_base",
-        "//third_party/eigen3",
-    ],
 )
 
 filegroup(
@@ -492,7 +500,7 @@ cc_library(
         "//tensorflow/core/lib/monitoring:legacy_lib_monitoring_lib_headers",
         "//tensorflow/core/lib/random:legacy_lib_random_headers",
         "//tensorflow/core/lib/strings:legacy_lib_string_headers",
-        "//tensorflow/core/util:gpu_cuda_alias.h",
+        "//tensorflow/core/util:lib_hdrs",
     ],
     visibility = ["//visibility:public"],
     deps = [
@@ -556,7 +564,7 @@ cc_library(
         "//tensorflow/core/lib/core:legacy_lib_core_status_test_util_header",
         "//tensorflow/core/platform:test.h",
         "//tensorflow/core/platform:test_benchmark.h",
-        "//tensorflow/core/util:reporter.h",
+        "//tensorflow/core/util:test_hdrs",
     ],
     copts = tf_copts(),
     linkopts = select({
@@ -644,46 +652,15 @@ tf_cuda_library(
         "//tensorflow/core/framework:variant_op_registry.h",
         "//tensorflow/core/framework:variant_tensor_data.h",
         "//tensorflow/core/util/sparse:framework_group",
-        "//tensorflow/core/util:activation_mode.h",
-        "//tensorflow/core/util:batch_util.h",
-        "//tensorflow/core/util:bcast.h",
-        "//tensorflow/core/util:debug_events_writer.h",
-        "//tensorflow/core/util:device_name_utils.h",
-        "//tensorflow/core/util:dump_graph.h",
-        "//tensorflow/core/util:einsum_op_util.h",
-        "//tensorflow/core/util:events_writer.h",
-        "//tensorflow/core/util:example_proto_fast_parsing.h",
-        "//tensorflow/core/util:example_proto_helper.h",
-        "//tensorflow/core/util:gpu_kernel_helper.h",
-        "//tensorflow/core/util:guarded_philox_random.h",
-        "//tensorflow/core/util:matmul_autotune.h",
-        "//tensorflow/core/util:matmul_bcast.h",
-        "//tensorflow/core/util:mirror_pad_mode.h",
-        "//tensorflow/core/util:padding.h",
-        "//tensorflow/core/util:port.h",
-        "//tensorflow/core/util:ptr_util.h",
-        "//tensorflow/core/util:reffed_status_callback.h",
-        "//tensorflow/core/util:saved_tensor_slice_util.h",
-        "//tensorflow/core/util:stat_summarizer.h",
-        "//tensorflow/core/util:stat_summarizer_options.h",
-        "//tensorflow/core/util:stream_executor_util.h",
-        "//tensorflow/core/util:strided_slice_op.h",
-        "//tensorflow/core/util:tensor_format.h",
-        "//tensorflow/core/util:tensor_ops_util.h",
-        "//tensorflow/core/util:tensor_slice_reader.h",
-        "//tensorflow/core/util:tensor_slice_reader_cache.h",
-        "//tensorflow/core/util:tensor_slice_writer.h",
-        "//tensorflow/core/util:use_cudnn.h",
-        "//tensorflow/core/util:util.h",
-        "//tensorflow/core/util:work_sharder.h",
-        "public/version.h",
+        "//tensorflow/core/util:framework_srcs",
+        "//tensorflow/core/public:version.h",
     ] + select({
         "//tensorflow:windows": [],
         "//conditions:default": [
             "//tensorflow/core/util:memmapped_file_system_hdrs",
         ],
     }) + if_mkl([
-        "//tensorflow/core/util:mkl_util.h",
+        "//tensorflow/core/util:mkl_util_hdrs",
     ]),
     visibility = ["//visibility:public"],
     deps = [
@@ -706,24 +683,19 @@ alias(
     visibility = ["//visibility:public"],
 )
 
-cc_library(
+alias(
     name = "overflow",
-    hdrs = ["//tensorflow/core/util:overflow.h"],
-    deps = [
-        ":framework_lite",
-        ":lib",
-    ],
+    actual = "//tensorflow/core/util:overflow",
 )
 
-cc_library(
+alias(
     name = "exec_on_stall",
-    hdrs = ["//tensorflow/core/util:exec_on_stall.h"],
-    deps = [":framework_lite"],
+    actual = "//tensorflow/core/util:exec_on_stall",
 )
 
-cc_library(
+alias(
     name = "ptr_util",
-    hdrs = ["//tensorflow/core/util:ptr_util.h"],
+    actual = "//tensorflow/core/util:ptr_util",
 )
 
 # TODO(gonnet): Remove this alias once all users have been moved to the actual target.
@@ -742,7 +714,7 @@ alias(
 
 cc_library(
     name = "session_options",
-    hdrs = ["public/session_options.h"],
+    hdrs = ["//tensorflow/core/public:session_options.h"],
     visibility = ["//visibility:public"],
     deps = [
         ":lib",
@@ -1169,8 +1141,8 @@ tf_cuda_library(
         "graph/node_builder.h",
         "graph/validate.h",
         "graph/while_context.h",
-        "public/session.h",
-        "public/session_options.h",
+        "//tensorflow/core/public:session.h",
+        "//tensorflow/core/public:session_options.h",
     ],
     visibility = ["//visibility:public"],
     deps = [
@@ -1295,7 +1267,9 @@ cc_library(
 cc_library(
     name = "dynamic_kernels_impl",
     visibility = [":__subpackages__"],
-    deps = [],
+    deps = [
+        "//tensorflow/core/kernels:sobol_op",
+    ],
 )
 
 cc_library(
@@ -1471,8 +1445,9 @@ filegroup(
         "//tensorflow/core/lib/random:legacy_lib_random_all_srcs",
         "//tensorflow/core/lib/strings:legacy_lib_strings_all_headers",
         "//tensorflow/core/lib/strings:legacy_lib_strings_all_srcs",
-        "//tensorflow/core/platform/default/build_config:android_srcs",
+        "//tensorflow/core/platform:legacy_mobile_srcs",
         "//tensorflow/core/profiler:mobile_srcs",
+        "//tensorflow/core/public:mobile_srcs_no_runtime",
         "//tensorflow/core/util/ctc:android_srcs",
         "//tensorflow/core/util/sparse:mobile_srcs_no_runtime_group",
         "//tensorflow/core/util:mobile_srcs_no_runtime",
@@ -1481,7 +1456,6 @@ filegroup(
             "client/**/*.cc",
             "lib/**/*.h",
             "lib/**/*.cc",
-            "public/**/*.h",
         ],
         exclude = [
             "**/*test.*",
@@ -1748,8 +1722,7 @@ filegroup(
         "//tensorflow/core/framework:android_test_hdrs",
         "//tensorflow/core/framework:android_test_srcs",
         "//tensorflow/core/platform:test.h",
-        "//tensorflow/core/util:reporter.cc",
-        "//tensorflow/core/util:reporter.h",
+        "//tensorflow/core/util:android_test_srcs",
     ],
     visibility = ["//visibility:public"],
 )
@@ -1761,8 +1734,7 @@ filegroup(
         "//tensorflow/core/framework:android_test_hdrs",
         "//tensorflow/core/framework:android_test_srcs_no_core",
         "//tensorflow/core/platform:test.h",
-        "//tensorflow/core/util:reporter.cc",
-        "//tensorflow/core/util:reporter.h",
+        "//tensorflow/core/util:android_test_srcs",
     ],
     visibility = ["//visibility:public"],
 )
@@ -1774,6 +1746,7 @@ cc_library(
     srcs = if_android([":android_test_srcs"]),
     hdrs = [
         "//tensorflow/core/framework:android_test_hdrs",
+        "//tensorflow/core/util:android_test_hdrs",
     ],
     copts = tf_copts(android_optimization_level_override = None),
     tags = [
@@ -1783,7 +1756,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":android_tensorflow_lib",
-        ":protos_cc",
+        ":protos_all_cc",
         "//tensorflow/core/platform/default/build_config:gtest",
         "//third_party/eigen3",
     ],
@@ -2012,7 +1985,7 @@ LIB_INTERNAL_PUBLIC_HEADERS = [
     "//tensorflow/core/platform:tracing.h",
     "//tensorflow/core/platform:unbounded_work_queue.h",
     "//tensorflow/core/platform:legacy_platform_lib_hdrs",
-    "//tensorflow/core/util:env_var.h",
+    "//tensorflow/core/util:lib_internal_public_hdrs",
 ]
 
 cc_library(
@@ -2276,7 +2249,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":platform_base",
-        "//tensorflow/core/platform/default/build_config:logging",
+        "//tensorflow/core/platform:logging",
     ],
 )
 
@@ -2309,8 +2282,8 @@ cc_library(
         ":core_stringpiece",
         "//tensorflow/core/platform:dynamic_annotations",
         "//tensorflow/core/platform:jpeg",
+        "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:stringpiece",
-        "//tensorflow/core/platform/default/build_config:logging",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/strings",
     ],
@@ -2344,10 +2317,10 @@ cc_library(
         "//tensorflow/core/lib/strings:strcat",
         "//tensorflow/core/platform:dynamic_annotations",
         "//tensorflow/core/platform:gif",
+        "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:numbers",
         "//tensorflow/core/platform:strcat",
         "//tensorflow/core/platform:stringpiece",
-        "//tensorflow/core/platform/default/build_config:logging",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/strings",
     ],
@@ -2402,11 +2375,9 @@ alias(
     actual = "//tensorflow/core/lib/core:error_codes_proto_cc",
 )
 
-cc_library(
+alias(
     name = "version_lib",
-    srcs = ["//tensorflow/core/util:version_info.cc"],
-    hdrs = ["public/version.h"],
-    copts = tf_copts(),
+    actual = "//tensorflow/core/util:version_info",
 )
 
 FRAMEWORK_INTERNAL_PRIVATE_HEADERS = [
@@ -2551,6 +2522,9 @@ tf_cuda_library(
         "//tensorflow/core/framework:attr_value_proto_text",
         "//tensorflow/core/framework:bfloat16",
         "//tensorflow/core/framework:numeric_types",
+        "//tensorflow/core/framework:resource_handle",
+        "//tensorflow/core/framework:tensor",
+        "//tensorflow/core/framework:tensor_shape",
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/profiler/internal:annotation_stack_impl",
@@ -2558,6 +2532,7 @@ tf_cuda_library(
         "//tensorflow/core/profiler/lib:traceme",
         "//tensorflow/core/util:port",
         "//tensorflow/core/util:stats_calculator_portable",
+        "//tensorflow/compiler/jit:common",
     ] + if_static(
         extra_deps = ["@com_google_protobuf//:protobuf"],
         otherwise = ["@com_google_protobuf//:protobuf_headers"],
@@ -2621,20 +2596,10 @@ cc_library(
     ],
 )
 
-tf_cuda_library(
+alias(
     name = "cuda_device_functions",
-    hdrs = [
-        "//tensorflow/core/util:gpu_device_functions.h",
-    ],
+    actual = "//tensorflow/core/util:gpu_device_functions",
     visibility = ["//visibility:public"],
-    deps = [":framework_lite"],
-)
-
-# TODO(josh11b): Is this needed, or can we just use ":protos_all_cc"?
-cc_library(
-    name = "protos_cc",
-    visibility = ["//visibility:public"],
-    deps = ["//tensorflow/core/platform/default/build_config:protos_cc"],
 )
 
 # Library containing all of the graph construction code that is
@@ -2713,7 +2678,7 @@ CORE_CPU_BASE_HDRS = GRAPH_HDRS + [
 
 tf_cuda_library(
     name = "core_cpu_base",
-    hdrs = CORE_CPU_BASE_HDRS + ["public/session.h"],
+    hdrs = CORE_CPU_BASE_HDRS + ["//tensorflow/core/public:session.h"],
     copts = tf_copts(),
     deps = [":core_cpu_base_no_ops"] + if_static([
         ":function_ops_op_lib",
@@ -2735,10 +2700,10 @@ tf_cuda_library(
         "common_runtime/graph_optimizer.h",
         "graph/graph_constructor.cc",  # Depends on common_runtime.
         "graph/graph_def_builder_util.cc",  # Depends on common_runtime.
-        "public/session_options.h",
-        "public/version.h",
+        "//tensorflow/core/public:session_options.h",
+        "//tensorflow/core/public:version.h",
     ] + CORE_CPU_BASE_HDRS,
-    hdrs = CORE_CPU_BASE_HDRS + ["public/session.h"],
+    hdrs = CORE_CPU_BASE_HDRS + ["//tensorflow/core/public:session.h"],
     copts = tf_copts(),
     deps = [
         ":graph",
@@ -2880,9 +2845,9 @@ tf_cuda_library(
         "graph/mkl_layout_pass.cc",
         "graph/mkl_tfconversion_pass.cc",
         "graph/quantize_training.cc",
-        "public/session.h",
-        "public/session_options.h",
-        "public/version.h",
+        "//tensorflow/core/public:session.h",
+        "//tensorflow/core/public:session_options.h",
+        "//tensorflow/core/public:version.h",
     ],
     hdrs = CORE_CPU_LIB_HEADERS,
     copts = tf_copts() + tf_openmp_copts(),
@@ -3006,7 +2971,7 @@ tf_cuda_library(
     srcs = ["common_runtime/direct_session.cc"],
     hdrs = [
         "common_runtime/direct_session.h",
-        "//tensorflow/core/util:env_var.h",
+        "//tensorflow/core/util:lib_internal_public_hdrs",
     ],
     copts = tf_copts(),
     deps = [
@@ -3513,30 +3478,6 @@ tf_cc_test(
     ],
 )
 
-tf_cc_test(
-    name = "util_overflow_test",
-    size = "small",
-    srcs = ["//tensorflow/core/util:overflow_test.cc"],
-    deps = [
-        ":framework_lite",
-        ":overflow",
-        ":test",
-        ":test_main",
-    ],
-)
-
-tf_cc_test(
-    name = "exec_on_stall_test",
-    size = "small",
-    srcs = ["//tensorflow/core/util:exec_on_stall_test.cc"],
-    deps = [
-        ":exec_on_stall",
-        ":framework_lite",
-        ":test",
-        ":test_main",
-    ],
-)
-
 tf_cc_test(
     name = "lib_jpeg_jpeg_mem_unittest",
     srcs = ["lib/jpeg/jpeg_mem_unittest.cc"],
@@ -3628,6 +3569,7 @@ test_suite(
     tests = [
         ":core_higher_level_tests",
         "//tensorflow/core/framework:higher_level_tests",
+        "//tensorflow/core/util:higher_level_tests",
     ],
 )
 
@@ -3660,29 +3602,6 @@ tf_cc_tests(
         "graph/subgraph_test.cc",
         "graph/tensor_id_test.cc",
         "graph/validate_test.cc",
-        "//tensorflow/core/util:bcast_test.cc",
-        "//tensorflow/core/util:command_line_flags_test.cc",
-        "//tensorflow/core/util:debug_events_writer_test.cc",
-        "//tensorflow/core/util:device_name_utils_test.cc",
-        "//tensorflow/core/util:dump_graph_test.cc",
-        "//tensorflow/core/util:equal_graph_def_test.cc",
-        "//tensorflow/core/util:events_writer_test.cc",
-        "//tensorflow/core/util:example_proto_fast_parsing_test.cc",
-        "//tensorflow/core/util:example_proto_helper_test.cc",
-        "//tensorflow/core/util:matmul_bcast_test.cc",
-        "//tensorflow/core/util:memmapped_file_system_test.cc",
-        "//tensorflow/core/util:presized_cuckoo_map_test.cc",
-        "//tensorflow/core/util:reffed_status_callback_test.cc",
-        "//tensorflow/core/util:reporter_test.cc",
-        "//tensorflow/core/util:saved_tensor_slice_util_test.cc",
-        "//tensorflow/core/util:semver_test.cc",
-        "//tensorflow/core/util:stat_summarizer_test.cc",
-        "//tensorflow/core/util:tensor_format_test.cc",
-        "//tensorflow/core/util:tensor_slice_reader_test.cc",
-        "//tensorflow/core/util:tensor_slice_set_test.cc",
-        "//tensorflow/core/util:tensor_slice_util_test.cc",
-        "//tensorflow/core/util:tensor_slice_writer_test.cc",
-        "//tensorflow/core/util:work_sharder_test.cc",
         "//tensorflow/core/util/sparse:higher_level_tests_group",
     ],
     create_named_test_suite = True,
@@ -3910,7 +3829,7 @@ tf_cc_test_mkl(
     srcs = [
         "graph/mkl_layout_pass_test.cc",
         "graph/mkl_tfconversion_pass_test.cc",
-        "//tensorflow/core/util:mkl_util_test.cc",
+        "//tensorflow/core/util:mkl_util_test_srcs",
     ],
     linkstatic = 1,
     deps = [
@@ -4063,18 +3982,6 @@ tf_cc_test_gpu(
     ],
 )
 
-tf_cuda_only_cc_test(
-    name = "util_gpu_kernel_helper_test",
-    srcs = [
-        "//tensorflow/core/util:gpu_kernel_helper_test.cu.cc",
-    ],
-    deps = [
-        ":test",
-        ":test_main",
-        "//third_party/eigen3",
-    ] + mkl_deps(),
-)
-
 tf_cc_test_gpu(
     name = "memory_types_test",
     size = "small",
diff --git a/tensorflow/core/api_def/base_api/api_def_Asin.pbtxt b/tensorflow/core/api_def/base_api/api_def_Asin.pbtxt
index 16531612fdf..1d5b62703ce 100644
--- a/tensorflow/core/api_def/base_api/api_def_Asin.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Asin.pbtxt
@@ -5,7 +5,7 @@ op {
 The `tf.math.asin` operation returns the inverse of `tf.math.sin`, such that
 if `y = tf.math.sin(x)` then, `x = tf.math.asin(y)`.
 
-**Note**: The output of `tf.math.asin` will lie within the invertible range 
+**Note**: The output of `tf.math.asin` will lie within the invertible range
 of sine, i.e [-pi/2, pi/2].
 
 For example:
diff --git a/tensorflow/core/api_def/base_api/api_def_Atan.pbtxt b/tensorflow/core/api_def/base_api/api_def_Atan.pbtxt
index 65ce42cb942..8ab19b7515a 100644
--- a/tensorflow/core/api_def/base_api/api_def_Atan.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Atan.pbtxt
@@ -5,7 +5,7 @@ op {
 The `tf.math.atan` operation returns the inverse of `tf.math.tan`, such that
 if `y = tf.math.tan(x)` then, `x = tf.math.atan(y)`.
 
-**Note**: The output of `tf.math.atan` will lie within the invertible range 
+**Note**: The output of `tf.math.atan` will lie within the invertible range
 of tan, i.e (-pi/2, pi/2).
 
 For example:
diff --git a/tensorflow/core/api_def/base_api/api_def_AudioSpectrogram.pbtxt b/tensorflow/core/api_def/base_api/api_def_AudioSpectrogram.pbtxt
index 172696395ba..8af18098574 100644
--- a/tensorflow/core/api_def/base_api/api_def_AudioSpectrogram.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_AudioSpectrogram.pbtxt
@@ -43,8 +43,8 @@ This op expects to receive audio data as an input, stored as floats in the range
 -1 to 1, together with a window width in samples, and a stride specifying how
 far to move the window between slices. From this it generates a three
 dimensional output. The first dimension is for the channels in the input, so a
-stereo audio input would have two here for example. The second dimension is time, 
-with successive frequency slices. The third dimension has an amplitude value for 
+stereo audio input would have two here for example. The second dimension is time,
+with successive frequency slices. The third dimension has an amplitude value for
 each frequency during that time slice.
 
 This means the layout when converted and saved as an image is rotated 90 degrees
diff --git a/tensorflow/core/api_def/base_api/api_def_BlockLSTMV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_BlockLSTMV2.pbtxt
index 4da9ebaf863..936099e70af 100644
--- a/tensorflow/core/api_def/base_api/api_def_BlockLSTMV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_BlockLSTMV2.pbtxt
@@ -129,7 +129,7 @@ for x1 in unpack(x):
   h.append(h1)
 return pack(i), pack(cs), pack(f), pack(o), pack(ci), pack(ch), pack(h)
 
-Note that unlike LSTMBlockCell (and BlockLSTM) which uses ICFO gate layout, 
+Note that unlike LSTMBlockCell (and BlockLSTM) which uses ICFO gate layout,
 this op uses IFCO. So in order for the following snippet to be equivalent
 all gate-related outputs should be reordered.
 ```
diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestFeatureSplitV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestFeatureSplitV2.pbtxt
new file mode 100644
index 00000000000..2bbaba26257
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestFeatureSplitV2.pbtxt
@@ -0,0 +1,124 @@
+op {
+  graph_op_name: "BoostedTreesCalculateBestFeatureSplitV2"
+  visibility: HIDDEN
+  in_arg {
+    name: "node_id_range"
+    description: <<END
+A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
+END
+  }
+  in_arg {
+    name: "stats_summaries_list"
+    description: <<END
+A list of Rank 4 tensor (#shape=[max_splits, feature_dims, bucket, stats_dims]) for accumulated stats summary (gradient/hessian) per node, per dimension, per buckets for each feature.
+The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
+END
+  }
+  in_arg {
+    name: "split_types"
+    description: <<END
+A Rank 1 tensor indicating if this Op should perform inequality split or equality split per feature.
+END
+  }
+  in_arg {
+    name: "candidate_feature_ids"
+    description: <<END
+Rank 1 tensor with ids for each feature. This is the real id of the feature.
+END
+  }
+  in_arg {
+    name: "l1"
+    description: <<END
+l1 regularization factor on leaf weights, per instance based.
+END
+  }
+  in_arg {
+    name: "l2"
+    description: <<END
+l2 regularization factor on leaf weights, per instance based.
+END
+  }
+  in_arg {
+    name: "tree_complexity"
+    description: <<END
+adjustment to the gain, per leaf based.
+END
+  }
+  in_arg {
+    name: "min_node_weight"
+    description: <<END
+mininum avg of hessians in a node before required for the node to be considered for splitting.
+END
+  }
+  out_arg {
+    name: "node_ids"
+    description: <<END
+A Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.
+END
+  }
+  out_arg {
+    name: "gains"
+    description: <<END
+A Rank 1 tensor indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.
+END
+  }
+  out_arg {
+    name: "feature_ids"
+    description: <<END
+A Rank 1 tensors indicating the best feature id for each node. See above for details like shapes and sizes.
+END
+  }
+  out_arg {
+    name: "feature_dimensions"
+    description: <<END
+A Rank 1 tensors indicating the best feature dimension for each feature to split for certain nodes if the feature is multi-dimension. See above for details like shapes and sizes.
+END
+  }
+  out_arg {
+    name: "thresholds"
+    description: <<END
+A Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.
+END
+  }
+  out_arg {
+    name: "left_node_contribs"
+    description: <<END
+A Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.
+END
+  }
+  out_arg {
+    name: "right_node_contribs"
+    description: <<END
+A Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
+END
+  }
+  out_arg {
+    name: "split_with_default_directions"
+    description: <<END
+A Rank 1 tensors indicating the which direction to go if data is missing. See above for details like shapes and sizes.
+Inequality with default left returns 0, inequality with default right returns 1, equality with default right returns 2.
+END
+  }
+  attr {
+    name: "num_features"
+    description: <<END
+inferred from the size of `stats_summary_list`; the number of total features.
+END
+}
+  attr {
+    name: "logits_dimension"
+    description: <<END
+The dimension of logit, i.e., number of classes.
+END
+  }
+  summary: "Calculates gains for each feature and returns the best possible split information for each node. However, if no split is found, then no split information is returned for that node."
+  description: <<END
+The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+
+It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+
+In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+
+The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt
index 206fa3cc989..2f87b6f8f1e 100644
--- a/tensorflow/core/api_def/base_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt
@@ -29,7 +29,7 @@ END
   }
   summary: "Debugging/model interpretability outputs for each example."
   description: <<END
-It traverses all the trees and computes debug metrics for individual examples, 
+It traverses all the trees and computes debug metrics for individual examples,
 such as getting split feature ids and logits after each split along the decision
 path used to compute directional feature contributions.
 END
diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesFlushQuantileSummaries.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesFlushQuantileSummaries.pbtxt
index bcd7cc5978d..87cbe1bc39c 100644
--- a/tensorflow/core/api_def/base_api/api_def_BoostedTreesFlushQuantileSummaries.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesFlushQuantileSummaries.pbtxt
@@ -10,7 +10,7 @@ END
   summary: "Flush the quantile summaries from each quantile stream resource."
   description: <<END
 An op that outputs a list of quantile summaries of a quantile stream resource.
-Each summary Tensor is rank 2, containing summaries (value, weight, min_rank, 
+Each summary Tensor is rank 2, containing summaries (value, weight, min_rank,
 max_rank) for a single feature.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_CombinedNonMaxSuppression.pbtxt b/tensorflow/core/api_def/base_api/api_def_CombinedNonMaxSuppression.pbtxt
index 475662e40b3..7d5dd80d2b0 100644
--- a/tensorflow/core/api_def/base_api/api_def_CombinedNonMaxSuppression.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_CombinedNonMaxSuppression.pbtxt
@@ -3,8 +3,8 @@ op {
   in_arg {
     name: "boxes"
     description: <<END
-A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then 
-same boxes are used for all classes otherwise, if `q` is equal to number of 
+A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then
+same boxes are used for all classes otherwise, if `q` is equal to number of
 classes, class-specific boxes are used.
 END
   }
@@ -18,7 +18,7 @@ END
   in_arg {
     name: "max_output_size_per_class"
     description: <<END
-A scalar integer tensor representing the maximum number of 
+A scalar integer tensor representing the maximum number of
 boxes to be selected by non max suppression per class
 END
   }
@@ -63,21 +63,21 @@ END
   out_arg {
     name: "nmsed_boxes"
     description: <<END
-A [batch_size, max_detections, 4] float32 tensor 
+A [batch_size, max_detections, 4] float32 tensor
 containing the non-max suppressed boxes.
 END
   }
   out_arg {
     name: "nmsed_scores"
     description: <<END
-A [batch_size, max_detections] float32 tensor 
+A [batch_size, max_detections] float32 tensor
 containing the scores for the boxes.
 END
   }
   out_arg {
     name: "nmsed_classes"
     description: <<END
-A [batch_size, max_detections] float32 tensor 
+A [batch_size, max_detections] float32 tensor
 containing the classes for the boxes.
 END
   }
diff --git a/tensorflow/core/api_def/base_api/api_def_DecodePaddedRaw.pbtxt b/tensorflow/core/api_def/base_api/api_def_DecodePaddedRaw.pbtxt
index 1f15678699c..a0958346593 100644
--- a/tensorflow/core/api_def/base_api/api_def_DecodePaddedRaw.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DecodePaddedRaw.pbtxt
@@ -9,7 +9,7 @@ op {
     description: <<END
 Length in bytes for each element of the decoded output. Must be a multiple
 of the size of the output type.
-END 
+END
   }
   out_arg {
     name: "output"
diff --git a/tensorflow/core/api_def/base_api/api_def_Einsum.pbtxt b/tensorflow/core/api_def/base_api/api_def_Einsum.pbtxt
index f84fd23e5e2..bf2833a5cd9 100644
--- a/tensorflow/core/api_def/base_api/api_def_Einsum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Einsum.pbtxt
@@ -62,7 +62,7 @@ Operations are applied to the input(s) according to the following rules:
      Considering the batch matrix multiplication equation again
      (`bij,bjk->bik`), the contracted axis label is `j`.
 
- (e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis
+ (e) Expand Diagonal: If the output subscripts contain repeated (explicit) axis
      labels, the opposite operation of (a) is applied. For example, in the
      equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]`
      are all zeros, except for the (generalized) diagonal which is populated
@@ -70,7 +70,7 @@ Operations are applied to the input(s) according to the following rules:
      Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is
      provided to enable computing the symbolic gradient of `tf.einsum`.
 
-The output subcripts must contain only labels appearing in at least one of the
+The output subscripts must contain only labels appearing in at least one of the
 input subscripts. Furthermore, all dimensions mapping to the same axis label
 must be equal.
 
@@ -82,7 +82,7 @@ according to standard NumPy broadcasting
 
 The broadcasted dimensions are placed in the corresponding location of the
 ellipsis in the output subscript. If the broadcasted dimensions are non-empty
-and the output subcripts do not contain ellipsis, then an InvalidArgument error
+and the output subscripts do not contain ellipsis, then an InvalidArgument error
 is raised.
 
 @compatibility(numpy)
diff --git a/tensorflow/core/api_def/base_api/api_def_LeftShift.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeftShift.pbtxt
index 3855c5095a7..b7bf38535a2 100644
--- a/tensorflow/core/api_def/base_api/api_def_LeftShift.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_LeftShift.pbtxt
@@ -16,9 +16,9 @@ dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
 for dtype in dtype_list:
   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-  
+
   left_shift_result = bitwise_ops.left_shift(lhs, rhs)
-  
+
   print(left_shift_result)
 
 # This will print:
diff --git a/tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt b/tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt
index 5ce825ae043..c2b0405c93d 100644
--- a/tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_LowerBound.pbtxt
@@ -28,7 +28,7 @@ Each set of rows with the same index in (sorted_inputs, values) is treated
 independently.  The resulting row is the equivalent of calling
 `np.searchsorted(sorted_inputs, values, side='left')`.
 
-The result is not a global index to the entire 
+The result is not a global index to the entire
 `Tensor`, but rather just the index in the last dimension.
 
 A 2-D example:
diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt
index e667c328ae5..4fc86807200 100644
--- a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt
@@ -49,7 +49,7 @@ in the batch:
 If `fast` is `True`, then the solution is computed by solving the normal
 equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
 \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
-problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). 
+problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\).
 If \\(m \lt n\\) then `output` is computed as
 \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
 minimum-norm solution to the under-determined linear system, i.e.
diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixSquareRoot.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixSquareRoot.pbtxt
index a9f1e593ccb..1e1a80e7648 100644
--- a/tensorflow/core/api_def/base_api/api_def_MatrixSquareRoot.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MatrixSquareRoot.pbtxt
@@ -24,10 +24,10 @@ The input matrix should be invertible. If the input matrix is real, it should
 have no eigenvalues which are real and negative (pairs of complex conjugate
 eigenvalues are allowed).
 
-The matrix square root is computed by first reducing the matrix to 
-quasi-triangular form with the real Schur decomposition. The square root 
-of the quasi-triangular matrix is then computed directly. Details of 
-the algorithm can be found in: Nicholas J. Higham, "Computing real 
+The matrix square root is computed by first reducing the matrix to
+quasi-triangular form with the real Schur decomposition. The square root
+of the quasi-triangular matrix is then computed directly. Details of
+the algorithm can be found in: Nicholas J. Higham, "Computing real
 square roots of a real matrix", Linear Algebra Appl., 1987.
 
 The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
diff --git a/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDataset.pbtxt
index 939c64fe925..e30395cbfd3 100644
--- a/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDataset.pbtxt
@@ -11,14 +11,14 @@ END
     name: "other_arguments"
     description: <<END
 Additional arguments to pass to `f` beyond those produced by `input_dataset`.
-Evaluated once when the dataset is instantiated. 
+Evaluated once when the dataset is instantiated.
 END
   }
   in_arg {
     name: "cycle_length"
     description: <<END
 Number of datasets (each created by applying `f` to the elements of
-`input_dataset`) among which the `ParallelInterleaveDataset` will cycle in a 
+`input_dataset`) among which the `ParallelInterleaveDataset` will cycle in a
 round-robin fashion.
 END
   }
@@ -88,7 +88,7 @@ allows the training step to proceed so long as some data is available.
 dataset will not be deterministic!
 
 This dataset has been superseded by `ParallelInterleaveDatasetV2`.  New code
-should use `ParallelInterleaveDatasetV2`. 
+should use `ParallelInterleaveDatasetV2`.
 
 The Python API `tf.data.experimental.parallel_interleave` creates instances of
 this op. `tf.data.experimental.parallel_interleave` is a deprecated API.
diff --git a/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDatasetV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDatasetV2.pbtxt
index 65215ad0052..38180ff92c6 100644
--- a/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDatasetV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ParallelInterleaveDatasetV2.pbtxt
@@ -33,7 +33,7 @@ END
     name: "num_parallel_calls"
     description: <<END
 Determines the number of threads that should be used for fetching data from
-input datasets in parallel. The Python API `tf.data.experimental.AUTOTUNE` 
+input datasets in parallel. The Python API `tf.data.experimental.AUTOTUNE`
 constant can be used to indicate that the level of parallelism should be autotuned.
 END
   }
@@ -68,7 +68,7 @@ is set to any value other than `None`.
 
 By default, the output of this dataset will be deterministic, which may result
 in the dataset blocking if the next data item to be returned isn't available.
-In order to avoid head-of-line blocking, one can set the 
+In order to avoid head-of-line blocking, one can set the
 `experimental_deterministic` parameter of `tf.data.Options` to `False`,
 which can improve performance at the expense of non-determinism.
 END
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DPerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DPerChannel.pbtxt
index cf16b1d2130..6b497968d7f 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DPerChannel.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DPerChannel.pbtxt
@@ -24,7 +24,7 @@ op {
   in_arg {
     name: "max_filter"
     description: "The maximum value of the filter tensor."
-  }  
+  }
   out_arg {
     name: "output"
     description: "The output tensor."
@@ -40,13 +40,13 @@ op {
   attr {
     name: "Tinput"
     description: <<END
-The quantized type of input tensor that needs to be converted. 
+The quantized type of input tensor that needs to be converted.
 END
   }
   attr {
     name: "Tfilter"
     description: <<END
-The quantized type of filter tensor that needs to be converted. 
+The quantized type of filter tensor that needs to be converted.
 END
   }
   attr {
diff --git a/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
index 18bf94a7abb..fb5c94ddc4b 100644
--- a/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
@@ -20,7 +20,7 @@ END
 Creates a Dataset that returns a stream of uniformly distributed
 pseudorandom 64-bit signed integers.
 
-In the TensorFlow Python API, you can instantiate this dataset via the 
+In the TensorFlow Python API, you can instantiate this dataset via the
 class `tf.data.experimental.RandomDataset`.
 
 Instances of this dataset are also created as a result of the
diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt
index 58cf1222500..5151217dd55 100644
--- a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt
@@ -34,14 +34,14 @@ END
   attr {
     name: "T"
     description: <<END
-The quantized type of input tensor that needs to be converted. 
+The quantized type of input tensor that needs to be converted.
 END
   }
   attr {
     name: "clip_value_max"
     description: <<END
 The maximum value of the output that needs to be clipped.
-Example: set this to 6 for Relu6. 
+Example: set this to 6 for Relu6.
 END
   }
   summary: "Computes requantization range per channel."
diff --git a/tensorflow/core/api_def/base_api/api_def_Requantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_Requantize.pbtxt
index 23e1656288d..289a7ef047e 100644
--- a/tensorflow/core/api_def/base_api/api_def_Requantize.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Requantize.pbtxt
@@ -48,7 +48,7 @@ END
 The type of the output. Should be a lower bit depth than Tinput.
 END
   }
-  summary: 
+  summary:
 "Converts the quantized `input` tensor into a lower-precision `output`."
   description: <<END
 Converts the quantized `input` tensor into a lower-precision `output`, using the
diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt
index bce6c3dd5a0..f53ab6b26c8 100644
--- a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt
@@ -30,7 +30,7 @@ END
     description: <<END
 The maximum value of the output tensor requested.
 END
-  }  
+  }
   out_arg {
     name: "output"
     description: <<END
@@ -52,7 +52,7 @@ END
   attr {
     name: "T"
     description: <<END
-The quantized type of input tensor that needs to be converted. 
+The quantized type of input tensor that needs to be converted.
 END
   }
   attr {
diff --git a/tensorflow/core/api_def/base_api/api_def_RightShift.pbtxt b/tensorflow/core/api_def/base_api/api_def_RightShift.pbtxt
index c8fa5b8cdd5..407041cb7c5 100644
--- a/tensorflow/core/api_def/base_api/api_def_RightShift.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RightShift.pbtxt
@@ -19,11 +19,11 @@ dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
 for dtype in dtype_list:
   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-  
+
   right_shift_result = bitwise_ops.right_shift(lhs, rhs)
-  
+
   print(right_shift_result)
-  
+
 # This will print:
 # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int8)
 # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int16)
diff --git a/tensorflow/core/api_def/base_api/api_def_RngSkip.pbtxt b/tensorflow/core/api_def/base_api/api_def_RngSkip.pbtxt
index b85bc26e6e8..839c7cb0481 100644
--- a/tensorflow/core/api_def/base_api/api_def_RngSkip.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RngSkip.pbtxt
@@ -24,6 +24,6 @@ END
 The state of the RNG after
 `rng_skip(n)` will be the same as that after `stateful_uniform([n])`
 (or any other distribution). The actual increment added to the
-counter is an unspecified implementation detail.  
+counter is an unspecified implementation detail.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_SamplingDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_SamplingDataset.pbtxt
index 48c01e9cae1..5ef37c32fc0 100644
--- a/tensorflow/core/api_def/base_api/api_def_SamplingDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SamplingDataset.pbtxt
@@ -4,7 +4,7 @@ op {
   in_arg {
     name: "rate"
     description: <<END
-A scalar representing the sample rate. Each element of `input_dataset` is 
+A scalar representing the sample rate. Each element of `input_dataset` is
 retained with this probability, independent of all other elements.
 END
   }
diff --git a/tensorflow/core/api_def/base_api/api_def_SobolSample.pbtxt b/tensorflow/core/api_def/base_api/api_def_SobolSample.pbtxt
new file mode 100644
index 00000000000..b80fff30fae
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SobolSample.pbtxt
@@ -0,0 +1,41 @@
+op {
+  graph_op_name: "SobolSample"
+  visibility: HIDDEN
+  in_arg {
+    name: "dim"
+    description: <<END
+Positive scalar `Tensor` representing each sample's dimension.
+END
+  }
+  in_arg {
+    name: "num_results"
+    description: <<END
+Positive scalar `Tensor` of dtype int32. The number of Sobol points to return
+in the output.
+END
+  }
+  in_arg {
+    name: "skip"
+    description: <<END
+Positive scalar `Tensor` of dtype int32. The number of initial points of the
+Sobol sequence to skip.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The type of the sample. One of: `float32` or `float64`.
+END
+  }
+  out_arg {
+    name: "samples"
+    description: <<END
+`Tensor` of samples from Sobol sequence with `shape` [num_results, dim].
+END
+  }
+  summary: "Generates points from the Sobol sequence."
+  description: <<END
+Creates a Sobol sequence with `num_results` samples. Each sample has dimension
+`dim`. Skips the first `skip` samples.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_StatefulUniformFullInt.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatefulUniformFullInt.pbtxt
index 6d576052c0a..d220500a65f 100644
--- a/tensorflow/core/api_def/base_api/api_def_StatefulUniformFullInt.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_StatefulUniformFullInt.pbtxt
@@ -33,6 +33,6 @@ END
   }
   summary: "Outputs random integers from a uniform distribution."
   description: <<END
-The generated values are uniform integers covering the whole range of `dtype`. 
+The generated values are uniform integers covering the whole range of `dtype`.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListGather.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListGather.pbtxt
index 3022fccb1e9..cdc4c40801c 100644
--- a/tensorflow/core/api_def/base_api/api_def_TensorListGather.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListGather.pbtxt
@@ -3,7 +3,7 @@ op {
   summary: "Creates a Tensor by indexing into the TensorList."
   description: <<END
 Each row in the produced Tensor corresponds to the element in the TensorList
-specified by the given index (see `tf.gather`).  
+specified by the given index (see `tf.gather`).
 
 input_handle: The input tensor list.
 indices: The indices used to index into the list.
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListScatter.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListScatter.pbtxt
index 35194b353eb..540934d6578 100644
--- a/tensorflow/core/api_def/base_api/api_def_TensorListScatter.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListScatter.pbtxt
@@ -8,7 +8,7 @@ specified by the given index (see `tf.gather`).
 tensor: The input tensor.
 indices: The indices used to index into the list.
 element_shape: The shape of the elements in the list (can be less specified than
-  the shape of the tensor).  
+  the shape of the tensor).
 output_handle: The TensorList.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_Unique.pbtxt b/tensorflow/core/api_def/base_api/api_def_Unique.pbtxt
index 3497757db5d..081779208dc 100644
--- a/tensorflow/core/api_def/base_api/api_def_Unique.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Unique.pbtxt
@@ -22,7 +22,7 @@ END
   description: <<END
 This operation returns a tensor `y` containing all of the unique elements of `x`
 sorted in the same order that they occur in `x`; `x` does not need to be sorted.
-This operation also returns a tensor `idx` the same size as `x` that contains 
+This operation also returns a tensor `idx` the same size as `x` that contains
 the index of each value of `x` in the unique output `y`. In other words:
 
 `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
diff --git a/tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt b/tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt
index 0630f6e82a1..fc309a35359 100644
--- a/tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UpperBound.pbtxt
@@ -28,7 +28,7 @@ Each set of rows with the same index in (sorted_inputs, values) is treated
 independently.  The resulting row is the equivalent of calling
 `np.searchsorted(sorted_inputs, values, side='right')`.
 
-The result is not a global index to the entire 
+The result is not a global index to the entire
 `Tensor`, but rather just the index in the last dimension.
 
 A 2-D example:
diff --git a/tensorflow/core/api_def/base_api/api_def_ZipDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ZipDataset.pbtxt
index 7bf8b98db9e..f0f212c4908 100644
--- a/tensorflow/core/api_def/base_api/api_def_ZipDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ZipDataset.pbtxt
@@ -19,7 +19,7 @@ END
   }
   summary: "Creates a dataset that zips together `input_datasets`."
   description: <<END
-The elements of the resulting dataset are created by zipping corresponding 
+The elements of the resulting dataset are created by zipping corresponding
 elements from each of the input datasets.
 
 The size of the resulting dataset will match the size of the smallest input
diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc
index 69b0160a2f3..e83f4d81d4b 100644
--- a/tensorflow/core/common_runtime/base_collective_executor.cc
+++ b/tensorflow/core/common_runtime/base_collective_executor.cc
@@ -214,7 +214,7 @@ CollectiveAdapter* MakeCollectiveAdapter(Tensor* output, int num_chunks,
 BaseCollectiveExecutor::~BaseCollectiveExecutor() {}
 
 void BaseCollectiveExecutor::StartAbort(const Status& s) {
-  LOG(WARNING) << "BaseCollectiveExecutor::StartAbort " << s;
+  VLOG(1) << "BaseCollectiveExecutor::StartAbort " << s;
   remote_access_->StartAbort(s);
 }
 
@@ -268,8 +268,8 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx,
   remote_access_->RunClosure([col_impl, col_ctx, done_safe, ctx]() {
     profiler::TraceMe activity(
         [&] {
-          return strings::StrCat(ctx->op_kernel().name(), ":",
-                                 ctx->op_kernel().type_string(),
+          return strings::StrCat(ctx->op_kernel().name_view(), ":",
+                                 ctx->op_kernel().type_string_view(),
                                  "#id=", ctx->step_id(), "#");
         },
         profiler::TraceMeLevel::kInfo);
diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc
index 3cc5f256257..d71acaee4b9 100644
--- a/tensorflow/core/common_runtime/collective_param_resolver_local.cc
+++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc
@@ -127,7 +127,10 @@ void CollectiveParamResolverLocal::CompleteGroupLocal(
     // If there is ever an error associated with a group key, we store the error
     // status and invoke all waiting and future callbacks with this error
     // status.
-    if (gr->status.ok() && !gr->device_set.empty()) {
+    VLOG(2) << "gr device_type=" << gr->group.device_type
+            << " cp device_type=" << cp->group.device_type
+            << " current device=" << device;
+    if (gr->status.ok()) {
       // Check for consistency with existing GroupRec.
       if (cp->group.device_type != gr->group.device_type) {
         gr->status = errors::Internal(
diff --git a/tensorflow/core/common_runtime/colocation_graph.cc b/tensorflow/core/common_runtime/colocation_graph.cc
index c073535e585..a58651c5ffa 100644
--- a/tensorflow/core/common_runtime/colocation_graph.cc
+++ b/tensorflow/core/common_runtime/colocation_graph.cc
@@ -1333,7 +1333,7 @@ Status ColocationGraph::InitializeMember(const Node& node, Member* member) {
       return errors::InvalidArgument(
           "No OpKernel was registered to support Op '", node.type_string(),
           "' used by ", errors::FormatNodeNameForError(node.name()),
-          "with these attrs: [", node.attrs().DebugString(),
+          " with these attrs: [", node.attrs().DebugString(),
           "]\n"
           "Registered devices: [",
           absl::StrJoin(registered_device_types, ", "), "]\n",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index c836cb23898..9731d74b069 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -793,6 +793,17 @@ Status DirectSession::Run(const RunOptions& run_options,
                           const std::vector<string>& target_nodes,
                           std::vector<Tensor>* outputs,
                           RunMetadata* run_metadata) {
+  return Run(run_options, inputs, output_names, target_nodes, outputs,
+             run_metadata, thread::ThreadPoolOptions());
+}
+
+Status DirectSession::Run(const RunOptions& run_options,
+                          const NamedTensorList& inputs,
+                          const std::vector<string>& output_names,
+                          const std::vector<string>& target_nodes,
+                          std::vector<Tensor>* outputs,
+                          RunMetadata* run_metadata,
+                          const thread::ThreadPoolOptions& threadpool_options) {
   TF_RETURN_IF_ERROR(CheckNotClosed());
   TF_RETURN_IF_ERROR(CheckGraphCreated("Run()"));
   direct_session_runs->GetCell()->IncrementBy(1);
@@ -852,7 +863,7 @@ Status DirectSession::Run(const RunOptions& run_options,
 
   TF_RETURN_IF_ERROR(RunInternal(step_id, run_options, &call_frame,
                                  executors_and_keys, run_metadata,
-                                 thread::ThreadPoolOptions()));
+                                 threadpool_options));
 
   // Receive outputs.
   if (outputs) {
@@ -1302,10 +1313,12 @@ Status DirectSession::CreateExecutors(
       options_.config.experimental().has_session_metadata()
           ? &options_.config.experimental().session_metadata()
           : nullptr;
+  const CustomKernelCreator* custom_kernel_creator =
+      GetDefaultCustomKernelCreator();
   func_info->proc_flr.reset(new ProcessFunctionLibraryRuntime(
       device_mgr_.get(), options_.env, &options_.config, graph_def_version,
       func_info->flib_def.get(), optimizer_opts, thread_pools_[0].first,
-      nullptr, nullptr, session_metadata));
+      nullptr, custom_kernel_creator, session_metadata));
 
   GraphOptimizer optimizer(optimizer_opts);
   for (auto iter = graphs.begin(); iter != graphs.end(); ++iter) {
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 7bbb198ef44..526a0e9a0a6 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -84,6 +84,14 @@ class DirectSession : public Session {
                            std::vector<Tensor>* outputs,
                            RunMetadata* run_metadata) override;
 
+  // NOTE: Experimental and subject to change.
+  ::tensorflow::Status Run(
+      const ::tensorflow::RunOptions& run_options,
+      const NamedTensorList& inputs, const std::vector<string>& output_names,
+      const std::vector<string>& target_nodes, std::vector<Tensor>* outputs,
+      RunMetadata* run_metadata,
+      const thread::ThreadPoolOptions& threadpool_options) override;
+
   // NOTE: PRunSetup and PRun are added to support partial execution. This
   // feature is experimental and subject to change.
   ::tensorflow::Status PRunSetup(const std::vector<string>& input_names,
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index e6825cb2090..5119dcdf562 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -439,6 +439,23 @@ tf_cc_test(
     ],
 )
 
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "attr_builder.h",
+        "context.h",
+        "eager_executor.h",
+        "eager_operation.h",
+        "kernel_and_device.h",
+        "tensor_handle.h",
+        "tensor_handle_data.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
+
 filegroup(
     name = "srcs",
     srcs = glob(
diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index a58122b05bb..989580df1cc 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -174,7 +174,7 @@ void EagerContext::SetExecutorForThread(EagerExecutor* executor) {
   }
 }
 
-void EagerContext::ClearCaches() {
+void EagerContext::ClearCachesAndThreadExecutors() {
   std::unordered_map<std::thread::id, EagerExecutor*> executors_copy;
   {
     mutex_lock l(executor_map_mu_);
@@ -183,16 +183,18 @@ void EagerContext::ClearCaches() {
   for (const auto& entry : executors_copy) {
     entry.second->WaitForAllPendingNodes().IgnoreError();
   }
-  {
-    // The executor stores pointers to kernels, so we need to make sure that no
-    // async eager ops are still executing. We lock the cache during this time
-    // as well.
-    mutex_lock ml(cache_mu_);
-    default_executor_.WaitForAllPendingNodes().IgnoreError();
-    kernel_cache_.clear();
-    for (auto& entry : registered_functions_) {
-      entry.second->cached_kernel_keys->clear();
-    }
+  ClearCachesAndDefaultExecutor();
+}
+
+void EagerContext::ClearCachesAndDefaultExecutor() {
+  // The executor stores pointers to kernels, so we need to make sure that no
+  // async eager ops are still executing. We lock the cache during this time
+  // as well.
+  mutex_lock ml(cache_mu_);
+  default_executor_.WaitForAllPendingNodes().IgnoreError();
+  kernel_cache_.clear();
+  for (auto& entry : registered_functions_) {
+    entry.second->cached_kernel_keys->clear();
   }
 }
 
@@ -288,7 +290,7 @@ void EagerContext::CloseRemoteContexts(
 #endif  // !IS_MOBILE_PLATFORM
 
 void EagerContext::WaitForAndCloseRemoteContexts() {
-  ClearCaches();
+  ClearCachesAndThreadExecutors();
 
 #if !defined(IS_MOBILE_PLATFORM)
   {
@@ -328,7 +330,7 @@ void EagerContext::WaitForAndCloseRemoteContexts() {
 }
 
 EagerContext::~EagerContext() {
-  ClearCaches();
+  ClearCachesAndThreadExecutors();
   for (auto& entry : registered_functions_) {
     while (!entry.second->Unref()) {
       // remove all references.
@@ -703,7 +705,7 @@ Status EagerContext::StoreCollectiveOpsServer(
   host_cpu_device_ = local_device_manager_.Get()->ListDevices()[0];
 
   InitPrioritizedDeviceTypeList();
-  ClearCaches();
+  ClearCachesAndThreadExecutors();
   default_executor_.ClearError();
   {
     tensorflow::mutex_lock l(executor_map_mu_);
@@ -799,11 +801,30 @@ Status EagerContext::UpdateRemoteMaster(
                             std::end(add_remote_contexts));
   }
   std::vector<const FunctionDef*> function_defs = ListRegisteredFunctions();
-  TF_RETURN_IF_ERROR(SetMasterContextState(
-      /*server=*/nullptr, worker_env, /*worker_session=*/nullptr,
-      std::move(remote_eager_workers), /*remote_device_manager=*/nullptr,
-      context_id, GetContextViewId() + 1, r, local_device_mgr, keep_alive_secs,
-      cluster_flr, /*remote_mgr=*/nullptr));
+
+  {
+    mutex_lock l(remote_state_mu_);
+    context_view_id_++;
+
+    worker_env_ = worker_env;
+    if (rendezvous_ != nullptr) rendezvous_->Unref();
+    rendezvous_ = r;
+    remote_eager_workers_ = std::move(remote_eager_workers);
+    ResetClusterFLR(cluster_flr);
+    InitPrioritizedDeviceTypeList();
+
+    default_executor_.ClearError();
+    {
+      tensorflow::mutex_lock l(executor_map_mu_);
+      for (auto& entry : thread_local_executor_) {
+        entry.second->ClearError();
+      }
+    }
+    const auto* config = pflr_->config();
+    ResetPFLR(local_device_manager_.Get(), env_, config, TF_GRAPH_DEF_VERSION,
+              &func_lib_def_, config->graph_options().optimizer_options(),
+              thread_pool_.get(), cluster_flr_.Get(), custom_kernel_creator_);
+  }
 
   // Register existing functions to the newly added remote workers. Note that
   // this should happen only after updating `remote_contexts_` because new
@@ -818,10 +839,7 @@ Status EagerContext::UpdateRemoteMaster(
   return Status::OK();
 }
 
-// Set distributed execution related fields in the master context. Passing
-// nullptr to `server` / `worker_session` / `remote_device_mgr` will only update
-// the existing GRPC server / worker session / remote device manager in the
-// master context (instead of resetting with new ones).
+// Set distributed execution related state in the master context.
 Status EagerContext::SetMasterContextState(
     std::unique_ptr<ServerInterface> server, WorkerEnv* worker_env,
     std::shared_ptr<WorkerSession> worker_session,
@@ -845,35 +863,25 @@ Status EagerContext::SetMasterContextState(
   if (rendezvous_ != nullptr) rendezvous_->Unref();
   rendezvous_ = r;
 
-  if (server != nullptr) {
-    // Memory leak!
-    if (server_ != nullptr) {
-      LOG(WARNING) << "Unable to destroy server_ object, so releasing instead. "
-                      "Servers don't support clean shutdown.";
-      server_.release();
-    }
-    server_ = std::move(server);
-  }
-  DCHECK(server_ != nullptr);
-  if (remote_mgr != nullptr) {
-    remote_mgr_ = std::move(remote_mgr);
+  // Memory leak!
+  if (server_ != nullptr) {
+    LOG(WARNING) << "Unable to destroy server_ object, so releasing instead. "
+                    "Servers don't support clean shutdown.";
+    server_.release();
   }
+  server_ = std::move(server);
+
+  remote_mgr_ = std::move(remote_mgr);
   worker_env_ = worker_env;
-  if (worker_session != nullptr) {
-    worker_session_ = worker_session;
-  }
-  DCHECK(worker_session_ != nullptr);
+  worker_session_ = std::move(worker_session);
   remote_eager_workers_ = std::move(remote_eager_workers);
 
-  if (remote_device_manager != nullptr) {
-    remote_device_manager_.Reset(std::move(remote_device_manager));
-  }
-  DCHECK(remote_device_manager_.Owned());
+  remote_device_manager_.Reset(std::move(remote_device_manager));
   ResetClusterFLR(cluster_flr);
 
   InitPrioritizedDeviceTypeList();
 
-  ClearCaches();
+  ClearCachesAndThreadExecutors();
   default_executor_.ClearError();
   {
     tensorflow::mutex_lock l(executor_map_mu_);
@@ -989,7 +997,7 @@ Status EagerContext::InitializeRemoteWorker(
             thread_pool_.get(), cluster_flr_.Get(), custom_kernel_creator_);
   InitPrioritizedDeviceTypeList();
 
-  ClearCaches();
+  ClearCachesAndThreadExecutors();
   default_executor_.ClearError();
   {
     tensorflow::mutex_lock l(executor_map_mu_);
@@ -1028,7 +1036,7 @@ Status EagerContext::UpdateRemoteWorker(
   remote_device_manager_.Reset(remote_device_mgr);
   InitPrioritizedDeviceTypeList();
 
-  ClearCaches();
+  ClearCachesAndThreadExecutors();
   default_executor_.ClearError();
   {
     tensorflow::mutex_lock l(executor_map_mu_);
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 6807e0a9d5a..d83b441ef99 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -145,8 +145,10 @@ class EagerContext : public core::RefCounted {
     return prioritized_device_type_list_;
   }
 
-  // Clears the kernel caches.
-  void ClearCaches();
+  // Clear pending nodes in thread executors and kernel caches.
+  void ClearCachesAndThreadExecutors();
+  // Clear pending nodes in default executor and kernel caches.
+  void ClearCachesAndDefaultExecutor();
 
   // Sets the device placement policy for the current thread.
   void SetThreadLocalDevicePlacementPolicy(ContextDevicePlacementPolicy policy);
diff --git a/tensorflow/core/common_runtime/eager/eager_executor.cc b/tensorflow/core/common_runtime/eager/eager_executor.cc
index a0071ce3f5e..930f70b74e5 100644
--- a/tensorflow/core/common_runtime/eager/eager_executor.cc
+++ b/tensorflow/core/common_runtime/eager/eager_executor.cc
@@ -91,14 +91,11 @@ Status EagerExecutor::SyncExecute(EagerNode* node) {
   if (node->AsAsync() != nullptr) {
     return errors::Internal("Executor does not support executing async nodes");
   }
-  Status s = status();
-  if (!s.ok()) {
-    return s;
-  }
+  // NOTE: SyncExecute runs every node regardless of error status in executor.
 
   uint64 id = next_node_id_++;
 
-  s = node->Prepare();
+  Status s = node->Prepare();
   if (!s.ok()) {
     return s;
   }
@@ -129,11 +126,8 @@ Status EagerExecutor::AddOrExecute(std::unique_ptr<EagerNode> node) {
 
   // Inline execution in sync mode.
   if (!Async()) {
-    status = this->status();
-    if (status.ok()) {
-      status = RunItem(std::move(item), false);
-    }
-    return status;
+    // In sync mode, run the node item regardless of executor status.
+    return RunItem(std::move(item), false);
   } else {
     tensorflow::mutex_lock l(node_queue_mutex_);
     DVLOG(3) << "Add node [id " << item->id << "]" << item->node->DebugString()
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 9584056295c..1d80f59d453 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -352,8 +352,8 @@ void AppendTensorShapeToFingerprint(const PartialTensorShape& shape,
   }
 }
 
-Status ShouldCompileWithXLA(const EagerOperation* op, const EagerContext* ctx,
-                            bool* compile_with_xla) {
+Status MustCompileWithXLA(const EagerOperation* op, const EagerContext* ctx,
+                          bool* compile_with_xla) {
   if (!op->is_function()) {
     *compile_with_xla = false;
     return Status::OK();
@@ -368,7 +368,7 @@ Status ShouldCompileWithXLA(const EagerOperation* op, const EagerContext* ctx,
   }
 
   // Does node have an explicit request to compile or not?
-  Status status = op->Attrs().Get(kXlaCompileAttr, compile_with_xla);
+  Status status = op->Attrs().Get(kXlaMustCompileAttr, compile_with_xla);
   if (status.ok()) {
     DVLOG(2) << "Caller explicitly requested "
              << (*compile_with_xla ? "" : "not ")
@@ -383,7 +383,7 @@ Status ShouldCompileWithXLA(const EagerOperation* op, const EagerContext* ctx,
     return errors::NotFound("Failed to find function '", op->Name(), "'");
   }
 
-  status = GetNodeAttr(AttrSlice(&function_def->attr()), kXlaCompileAttr,
+  status = GetNodeAttr(AttrSlice(&function_def->attr()), kXlaMustCompileAttr,
                        compile_with_xla);
   if (status.ok()) {
     DVLOG(2) << "Function definition explicitly specifies "
@@ -511,12 +511,12 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
     bool run_function_with_flr = false;
     if (op->is_function()) {
       bool compile_with_xla;
-      TF_RETURN_IF_ERROR(ShouldCompileWithXLA(op, ctx, &compile_with_xla));
+      TF_RETURN_IF_ERROR(MustCompileWithXLA(op, ctx, &compile_with_xla));
       if (compile_with_xla) {
         // Note that it is not ideal, but currently correct, to set this
         // attribute after computing the kernel cache key above.
         // Note: If the attribute is already set to true, this is a noop.
-        op->MutableAttrs()->Set(kXlaCompileAttr, true);
+        op->MutableAttrs()->Set(kXlaMustCompileAttr, true);
       } else {
         run_function_with_flr = true;
       }
diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
index 2f39e9ba2da..b1a27ae21cc 100644
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
@@ -74,7 +74,7 @@ std::function<void(std::function<void()>)>* KernelAndDevice::get_runner()
   } else {
     static auto* default_runner =
         new std::function<void(std::function<void()>)>(
-            [](std::function<void()> f) { f(); });
+            [](const std::function<void()>& f) { f(); });
     return default_runner;
   }
 }
@@ -279,14 +279,15 @@ Status KernelAndDeviceOp::Run(
   OpKernelContext context(&params);
 
   {
-    const string& op_name = kernel_->name();
+    absl::string_view op_name = kernel_->name_view();
     // 'ScopedActivity' will trace the OpKernel scheduling time on host.
     profiler::TraceMe activity(
-        [&] { return absl::StrCat(op_name, ":", kernel_->type_string()); },
+        [&] { return absl::StrCat(op_name, ":", kernel_->type_string_view()); },
         profiler::TraceMeLevel::kInfo);
     // 'ScopedAnnotation' will trace the OpKernel execution time on device.
-    profiler::ScopedAnnotation annotation(
-        [&]() { return absl::StrCat(op_name, ":", kernel_->type_string()); });
+    profiler::ScopedAnnotation annotation([&]() {
+      return absl::StrCat(op_name, ":", kernel_->type_string_view());
+    });
     device_->Compute(kernel_.get(), &context);
   }
 
diff --git a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
index 44d72fc503e..2e208ef6ffd 100644
--- a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
+++ b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
@@ -87,7 +87,7 @@ REGISTER_REWRITE(EagerOpRewriteRegistry::PRE_EXECUTION, MklEagerOpRewrite);
 
 // Constructor
 MklEagerOpRewrite::MklEagerOpRewrite(string name, string file, string line)
-    : EagerOpRewrite(name, file, line) {
+    : EagerOpRewrite(name, file, line), registered_kernels_map_() {
   InsertMKLEagerOps({"BatchMatMul", AlwaysRewrite, CreateGenericMklOp});
   InsertMKLEagerOps({"BatchMatMulV2", AlwaysRewrite, CreateGenericMklOp});
   InsertMKLEagerOps({"Conv2D", RewriteConv2D, CreateMklConv2DOp});
@@ -127,7 +127,6 @@ Status MklEagerOpRewrite::SetupNewOp(
   }
 
   // Copy all attributes to the new op.
-  string name;
   const NodeDef& orig_ndef = orig_op->MutableAttrs()->BuildNodeDef();
 
   AttrSlice attr_list(orig_ndef);
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 1c04adf7872..3a818dbdc40 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/costmodel_manager.h"
 #include "tensorflow/core/common_runtime/executor_factory.h"
+#include "tensorflow/core/common_runtime/metrics.h"
 #include "tensorflow/core/common_runtime/pending_counts.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
@@ -698,6 +699,19 @@ Status ExecutorImpl::Initialize(const Graph& graph) {
       TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "frame_name", &enter_name));
       EnsureFrameInfo(enter_name)->input_count++;
     }
+
+    // Record information about whether each output of the op is used.
+    std::vector<bool> used_outputs(n->num_outputs(), false);
+    for (const Edge* e : n->out_edges()) {
+      if (e->src_output() >= 0) {
+        used_outputs[e->src_output()] = true;
+      }
+    }
+    for (bool used_output : used_outputs) {
+      if (!used_output) {
+        metrics::RecordUnusedOutput(n->type_string());
+      }
+    }
   }
 
   // Initialize PendingCounts only after item->pending_id is initialized for
@@ -1852,8 +1866,8 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
         {
           profiler::TraceMe activity(
               [&] {
-                return strings::StrCat(op_kernel->name(), ":",
-                                       op_kernel->type_string());
+                return strings::StrCat(op_kernel->name_view(), ":",
+                                       op_kernel->type_string_view());
               },
               profiler::GetTFTraceMeLevel(op_kernel->IsExpensive()));
           device->ComputeAsync(async, &state->ctx, done);
@@ -1864,17 +1878,18 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
         nodestats::SetOpStart(stats);
 
         if (TF_PREDICT_FALSE(MightTrace(item, event_collector_))) {
-          const string& op_name = op_kernel->name();
+          absl::string_view op_name = op_kernel->name_view();
           const string kernel_label =
-              strings::StrCat(op_name, ":", op_kernel->type_string());
+              strings::StrCat(op_name, ":", op_kernel->type_string_view());
           tracing::ScopedRegion region(tracing::EventCategory::kCompute,
                                        op_name);
+          absl::string_view kernel_label_view(kernel_label);
           // 'TraceMe' will trace the OpKernel scheduling time.
           profiler::TraceMe activity(
-              absl::string_view(kernel_label),
+              kernel_label_view,
               profiler::GetTFTraceMeLevel(op_kernel->IsExpensive()));
           // 'ScopedAnnotation' will trace the OpKernel execution time.
-          profiler::ScopedAnnotation annotation(kernel_label);
+          profiler::ScopedAnnotation annotation(kernel_label_view);
           device->Compute(op_kernel, &ctx);
         } else {
           // In the common case, avoid creating any tracing objects.
@@ -2125,8 +2140,9 @@ void ExecutorState::PropagateOutputs(const TaggedNode& tagged_node,
                                      TaggedNodeSeq* ready) {
   auto activity_handle = absl::make_unique<profiler::TraceMe>(
       [&]() {
-        return strings::StrCat("ExecutorPropagateOutputs:",
-                               item->kernel->name(), "#id=", step_id_, "#");
+        return strings::StrCat(
+            "ExecutorPropagateOutputs:", item->kernel->name_view(),
+            "#id=", step_id_, "#");
       },
       profiler::GetTFTraceMeLevel(/*is_expensive=*/false));
 
@@ -2532,7 +2548,7 @@ void ExecutorState::Finish() {
       }
     }
     delete this;
-    runner([=]() {
+    runner([step_id, status, done_cb = std::move(done_cb)]() {
       profiler::TraceMe traceme(
           [&] {
             return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
@@ -2548,10 +2564,10 @@ void ExecutorState::Finish() {
     // devices like GPUs that continue to execute Ops after their Compute
     // methods have completed, this ensures that control is not returned to
     // the user until the step (and its side-effects) has actually completed.
-    device->Sync([=](Status new_status) mutable {
-      status.Update(new_status);
+    device->Sync([this, step_id, runner = std::move(runner),
+                  done_cb = std::move(done_cb)](const Status& status) mutable {
       delete this;
-      runner([=]() {
+      runner([step_id, status, done_cb = std::move(done_cb)]() {
         profiler::TraceMe traceme(
             [&] {
               return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
@@ -2562,7 +2578,7 @@ void ExecutorState::Finish() {
     });
   } else {
     delete this;
-    runner([=]() {
+    runner([step_id, status, done_cb = std::move(done_cb)]() {
       profiler::TraceMe traceme(
           [&] {
             return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 89e4daa50b3..c1247190d2d 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -275,7 +275,6 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
       opts.runner = nullptr;
     }
     Notification done;
-    std::vector<Tensor> out;
     Status status;
     flr->Run(opts, handle, frame, [&status, &done](const Status& s) {
       status = s;
diff --git a/tensorflow/core/common_runtime/function_threadpool_test.cc b/tensorflow/core/common_runtime/function_threadpool_test.cc
index 8f31cda9310..719834193be 100644
--- a/tensorflow/core/common_runtime/function_threadpool_test.cc
+++ b/tensorflow/core/common_runtime/function_threadpool_test.cc
@@ -171,7 +171,6 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
       opts.runner = nullptr;
     }
     Notification done;
-    std::vector<Tensor> out;
     Status status;
     flr->Run(opts, handle, frame, [&status, &done](const Status& s) {
       status = s;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 2287bf889ab..eaf16d2cc66 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -833,6 +833,9 @@ int64 MinSystemMemory(int64 available_memory) {
   // RAM and Video RAM
   min_system_memory = 1 << 30;
 #endif
+
+  VLOG(5) << "available_memory = " << available_memory;
+  VLOG(5) << "min_system_memory = " << min_system_memory;
   return min_system_memory;
 }
 
@@ -1186,7 +1189,7 @@ static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id,
                          ", name: ", desc.name(),
                          ", pci bus id: ", desc.pci_bus_id(),
                          ", compute capability: ", cc_major, ".", cc_minor);
-  // LINT.ThenChange(//tensorflow/python/platform/test.py)
+  // LINT.ThenChange(//tensorflow/python/framework/gpu_util.py)
 #elif TENSORFLOW_USE_ROCM
   return strings::StrCat("device: ", platform_gpu_id.value(),
                          ", name: ", desc.name(),
diff --git a/tensorflow/core/common_runtime/metrics.cc b/tensorflow/core/common_runtime/metrics.cc
index 2fedffdf211..efe0a58a26b 100644
--- a/tensorflow/core/common_runtime/metrics.cc
+++ b/tensorflow/core/common_runtime/metrics.cc
@@ -54,6 +54,10 @@ auto* graph_run_output_tensor_bytes = monitoring::Sampler<0>::New(
     // Power of 2 with bucket count 14 (256G)
     {monitoring::Buckets::Exponential(1, 4, 14)});
 
+auto* graph_unused_outputs = monitoring::Counter<1>::New(
+    "/tensorflow/core/graph_unused_outputs",
+    "The number of unused outputs for ops of a given type.", "name");
+
 auto* tf_data_autotune_counter = monitoring::Counter<1>::New(
     "/tensorflow/data/autotune", "tf.data autotuning", "name");
 
@@ -226,5 +230,9 @@ void IncrementMLIRImportFailureCount() {
   mlir_import_failure_count_cell->IncrementBy(1);
 }
 
+void RecordUnusedOutput(const string& op_name) {
+  graph_unused_outputs->GetCell(op_name)->IncrementBy(1);
+}
+
 }  // namespace metrics
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/metrics.h b/tensorflow/core/common_runtime/metrics.h
index 4b73c315fc6..b208ff2e3be 100644
--- a/tensorflow/core/common_runtime/metrics.h
+++ b/tensorflow/core/common_runtime/metrics.h
@@ -68,6 +68,9 @@ void RecordGraphOutputTensors(const size_t size);
 
 void UpdateGraphExecTime(const uint64 running_time_usecs);
 
+// Records that one output of an op of type `op_name` was unused.
+void RecordUnusedOutput(const string& op_name);
+
 // Updates the metrics stored about time spent building graphs.
 //
 // By "GraphBuild", we refer to building a client graph, which is a sub-graph of
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc
index 7bd5d09be97..447c6b6f731 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc
@@ -724,8 +724,8 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice(
     options.graph_collector->CollectOptimizedGraph(def);
   }
 
-  VLOG(2) << "Main function graph to be partitioned:";
-  VLOG(2) << DebugString(graph->ToGraphDefDebug());
+  VLOG(4) << "Main function graph to be partitioned:";
+  VLOG(4) << DebugString(graph->ToGraphDefDebug());
 
   std::unordered_map<string, std::unique_ptr<Graph>> subgraphs;
   TF_RETURN_IF_ERROR(
@@ -843,7 +843,7 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice(
       auto attrs = AttrSlice(&shard.attr());
       VLOG(1) << "Start instantiating component function " << unique_name
               << " on device " << target;
-      VLOG(2) << DebugString(shard);
+      VLOG(4) << DebugString(shard);
 
       auto* component_handle = new FunctionLibraryRuntime::Handle;
       auto done = [this, status, unique_name, comp_data, component_handle,
@@ -851,7 +851,7 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice(
         status->Update(s);
 
         VLOG(1) << "Finished instantiating component function " << unique_name
-                << "with handle " << *component_handle << " status: " << s;
+                << " with handle " << *component_handle << " status: " << s;
         if (status->ok()) {
           {
             mutex_lock l(mu_);
diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc
index 42fc6f668f2..004ebb3f0f8 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.cc
+++ b/tensorflow/core/common_runtime/step_stats_collector.cc
@@ -92,14 +92,14 @@ void NodeExecStatsWrapper::Done(const string& device) {
     string recv_device;
     TF_CHECK_OK(GetNodeAttr(attrs, "recv_device", &recv_device));
     text = strings::StrCat(memory, node_->name(), " = ", node_->op(), "(",
-                           tensor_name, " @", recv_device);
+                           tensor_name, " @", recv_device, ")");
   } else if (IsRecv(node_)) {
     string tensor_name;
     TF_CHECK_OK(GetNodeAttr(attrs, "tensor_name", &tensor_name));
     string send_device;
     TF_CHECK_OK(GetNodeAttr(attrs, "send_device", &send_device));
     text = strings::StrCat(memory, node_->name(), " = ", node_->op(), "(",
-                           tensor_name, " @", send_device);
+                           tensor_name, " @", send_device, ")");
   } else {
     text = strings::StrCat(memory, node_->name(), " = ", node_->op(), "(",
                            absl::StrJoin(node_->input(), ", "), ")");
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index c2da0e778da..2156dcfc3d3 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -783,3 +783,20 @@ tf_cc_test(
         "//tensorflow/core:worker_proto_cc",
     ],
 )
+
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "call_options.h",
+        "message_wrappers.h",
+        "rendezvous_mgr_interface.h",
+        "server_lib.h",
+        "worker_cache.h",
+        "worker_env.h",
+        "worker_interface.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
diff --git a/tensorflow/core/distributed_runtime/eager/BUILD b/tensorflow/core/distributed_runtime/eager/BUILD
index 6cd525b317d..a4f7309e07a 100644
--- a/tensorflow/core/distributed_runtime/eager/BUILD
+++ b/tensorflow/core/distributed_runtime/eager/BUILD
@@ -216,3 +216,16 @@ cc_library(
         "@com_google_absl//absl/types:optional",
     ],
 )
+
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "eager_client.h",
+        "remote_tensor_handle.h",
+        "remote_tensor_handle_data.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
diff --git a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc
index 3f940284396..6f395f04290 100644
--- a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc
+++ b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc
@@ -131,7 +131,7 @@ void EagerClusterFunctionLibraryRuntime::Run(
     function_data = &function_data_[handle];
   }
 
-  EagerClient* eager_client = function_data->eager_client;
+  EagerClient* eager_client = function_data->eager_client.get();
   if (eager_client == nullptr) {
     done(errors::Internal("Could not find eager client"));
     return;
@@ -195,7 +195,7 @@ void EagerClusterFunctionLibraryRuntime::CleanUp(
     function_data = &function_data_[handle];
   }
 
-  EagerClient* eager_client = function_data->eager_client;
+  EagerClient* eager_client = function_data->eager_client.get();
   if (eager_client == nullptr) {
     done(errors::Internal("Could not find eager client"));
     return;
diff --git a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h
index c5b7ada2241..3d21637225e 100644
--- a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h
+++ b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h
@@ -70,12 +70,16 @@ class EagerClusterFunctionLibraryRuntime
 
   struct FunctionData {
     const string target;
-    EagerClient* eager_client = nullptr;
+    core::RefCountPtr<EagerClient> eager_client;
     std::unique_ptr<EagerOperation> op;
 
     FunctionData(const string& target, EagerClient* eager_client,
                  std::unique_ptr<EagerOperation> op)
-        : target(target), eager_client(eager_client), op(std::move(op)) {}
+        : target(target),
+          eager_client(core::RefCountPtr<EagerClient>(eager_client)),
+          op(std::move(op)) {
+      eager_client->Ref();
+    }
   };
 
   mutable mutex mu_;
diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
index 7e4e8fed16c..b94efd10169 100644
--- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
+++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
@@ -115,12 +115,6 @@ Status EagerServiceImpl::CreateContext(const CreateContextRequest* request,
     return tensorflow::errors::Internal(
         "invalid eager env_ or env_->rendezvous_mgr.");
   }
-  std::vector<DeviceAttributes> cluster_device_attributes;
-  cluster_device_attributes.reserve(
-      request->cluster_device_attributes().size());
-  for (const auto& cluster_device : request->cluster_device_attributes()) {
-    cluster_device_attributes.push_back(cluster_device);
-  }
 
   auto* r = env_->rendezvous_mgr->Find(request->context_id());
   auto session_name =
@@ -238,14 +232,15 @@ Status EagerServiceImpl::UpdateContext(const UpdateContextRequest* request,
   // TODO(b/143914772): Potential memory leak if rendezvous has pending
   // tensors for removed / replaced workers.
 
-  std::vector<DeviceAttributes> cluster_device_attributes;
-  cluster_device_attributes.reserve(
-      request->cluster_device_attributes().size());
-  for (const auto& cluster_device : request->cluster_device_attributes()) {
-    cluster_device_attributes.push_back(cluster_device);
-  }
   auto session_name =
       tensorflow::strings::StrCat("eager_", request->context_id());
+
+  // Hold `context_update_mu_` exclusively update the context state. This lock
+  // prevents other threads from processing an enqueued request at the same
+  // time. Each enqueue request will be processed either with context state
+  // before or after the update, but the exact ordering needs to be enforced
+  // by the client if desired.
+  mutex_lock l(context_update_mu_);
   TF_RETURN_IF_ERROR(env_->session_mgr->UpdateSession(
       session_name, request->server_def(), request->cluster_device_attributes(),
       true));
@@ -276,23 +271,14 @@ Status EagerServiceImpl::UpdateContext(const UpdateContextRequest* request,
   DistributedFunctionLibraryRuntime* cluster_flr =
       eager::CreateClusterFLR(request->context_id(), ctx, worker_session.get());
 
-  {
-    // Hold `context_update_mu_` exclusively update the context state. This lock
-    // prevents other threads from processing an enqueued request at the same
-    // time. Each enqueue request will be processed either with context state
-    // before or after the update, but the exact ordering needs to be enforced
-    // by the client if desired.
-    mutex_lock l(context_update_mu_);
-    ctx->ClearCaches();
-    Status s = ctx->UpdateRemoteWorker(
-        device_mgr, std::move(remote_eager_workers),
-        worker_session->remote_device_mgr(), remote_workers,
-        request->context_id(), cluster_flr);
-    if (!s.ok()) {
-      VLOG(1) << "EagerContext::UpdateRemoteWorker failed with "
-              << s.ToString();
-      return s;
-    }
+  ctx->ClearCachesAndThreadExecutors();
+  Status s = ctx->UpdateRemoteWorker(
+      device_mgr, std::move(remote_eager_workers),
+      worker_session->remote_device_mgr(), remote_workers,
+      request->context_id(), cluster_flr);
+  if (!s.ok()) {
+    VLOG(1) << "EagerContext::UpdateRemoteWorker failed with " << s.ToString();
+    return s;
   }
 
   std::vector<DeviceAttributes> device_attributes;
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 9c95c29b020..897efc0df9f 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -31,7 +31,6 @@ limitations under the License.
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/collective.h"
 #include "tensorflow/core/framework/cost_graph.pb.h"
-#include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -473,8 +472,8 @@ Status MasterSession::ReffedClientGraph::DoRegisterPartitions(
     c->req.set_session_handle(session_handle_);
     c->req.set_create_worker_session_called(!should_deregister_);
     c->req.mutable_graph_def()->Swap(&graph_partitions[part.name]);
-    StripDefaultAttributes(*OpRegistry::Global(),
-                           c->req.mutable_graph_def()->mutable_node());
+    // TODO(b/146354085): Default attributes should be stripped here from
+    // c->req.graph_def(), but this causes some TFX pipelines to fail.
     *c->req.mutable_config_proto() = session_opts_.config;
     *c->req.mutable_graph_options() = session_opts_.config.graph_options();
     *c->req.mutable_debug_options() =
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_state.h b/tensorflow/core/distributed_runtime/rpc/grpc_state.h
index 0f0aa66d6b7..1418b4d6e8b 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_state.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_state.h
@@ -325,7 +325,7 @@ class ExchangeQueue {
                protobuf::Message* response, StatusCallback cb,
                std::string debug_string);
 
-  // Returns an exchange for which we can initiated request writing, if any.
+  // Returns an exchange for which we can initiate request writing, if any.
   // Returns nullptr if there is no such exchange.
   Exchange* GetReadyForRequestWriting();
 
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
index 6c4d72c53f4..b96baf93e03 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
@@ -28,7 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/protobuf/worker.pb.h"
 
-ABSL_FLAG(bool, grpc_deepcopy_tensor_response, false, "Disables mem sharing");
+// (Omitted internal-only flag)
 
 namespace tensorflow {
 namespace grpc {
@@ -185,9 +185,7 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val, bool require_ack,
     // We enable this behavior if the tensor is large.
     bool share_tensor_slice_memory = (tdata.size() > kLargeTensorBytes);
 
-    if (absl::GetFlag(FLAGS_grpc_deepcopy_tensor_response)) {
-      share_tensor_slice_memory = false;
-    }
+    // (Omitted internal-only conditional)
 
     size_t encoder_size = expected_size - tdata.size();
 
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc
index 9b118ce8363..7ffff94e2da 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc
@@ -80,7 +80,6 @@ Status TestCluster::MakeTestCluster(const string& binary_path,
 
   std::unique_ptr<GrpcSession> session;
   TF_RETURN_IF_ERROR(GrpcSession::Create(options_copy, &session));
-  std::vector<DeviceAttributes> device_attributes;
   TF_RETURN_IF_ERROR(session->ListDevices(&ret->devices_));
 
   *out_cluster = std::move(ret);
diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc
index d6fb07fd0cd..e2151e068f6 100644
--- a/tensorflow/core/distributed_runtime/session_mgr.cc
+++ b/tensorflow/core/distributed_runtime/session_mgr.cc
@@ -199,7 +199,6 @@ Status SessionMgr::UpdateSession(
   }
   protobuf::RepeatedPtrField<DeviceAttributes> added_cluster_device_attrs_pb(
       added_cluster_device_attrs.begin(), added_cluster_device_attrs.end());
-  std::unique_ptr<DeviceMgr> remote_devices;
   AsRemoteDevices(worker_env_->env, added_cluster_device_attrs_pb, nullptr,
                   &added_remote_devices);
 
diff --git a/tensorflow/core/example/feature_util.cc b/tensorflow/core/example/feature_util.cc
index 16a508bb2b9..3e4c3c05353 100644
--- a/tensorflow/core/example/feature_util.cc
+++ b/tensorflow/core/example/feature_util.cc
@@ -50,6 +50,15 @@ bool HasFeature<string>(const string& key, const Features& features) {
          (it->second.kind_case() == Feature::KindCase::kBytesList);
 }
 
+#ifdef USE_TSTRING
+template <>
+bool HasFeature<tstring>(const string& key, const Features& features) {
+  auto it = features.feature().find(key);
+  return (it != features.feature().end()) &&
+         (it->second.kind_case() == Feature::KindCase::kBytesList);
+}
+#endif
+
 bool HasFeatureList(const string& key,
                     const SequenceExample& sequence_example) {
   auto& feature_list = sequence_example.feature_lists().feature_list();
@@ -79,12 +88,28 @@ protobuf::RepeatedField<float>* GetFeatureValues<float>(Feature* feature) {
   return feature->mutable_float_list()->mutable_value();
 }
 
+#ifdef USE_TSTRING
+template <>
+const protobuf::RepeatedPtrField<string>& GetFeatureValues<tstring>(
+    const Feature& feature) {
+  return feature.bytes_list().value();
+}
+#endif
+
 template <>
 const protobuf::RepeatedPtrField<string>& GetFeatureValues<string>(
     const Feature& feature) {
   return feature.bytes_list().value();
 }
 
+#ifdef USE_TSTRING
+template <>
+protobuf::RepeatedPtrField<string>* GetFeatureValues<tstring>(
+    Feature* feature) {
+  return feature->mutable_bytes_list()->mutable_value();
+}
+#endif
+
 template <>
 protobuf::RepeatedPtrField<string>* GetFeatureValues<string>(Feature* feature) {
   return feature->mutable_bytes_list()->mutable_value();
@@ -117,6 +142,13 @@ void ClearFeatureValues<string>(Feature* feature) {
   feature->mutable_bytes_list()->Clear();
 }
 
+#ifdef USE_TSTRING
+template <>
+void ClearFeatureValues<tstring>(Feature* feature) {
+  feature->mutable_bytes_list()->Clear();
+}
+#endif
+
 template <>
 Features* GetFeatures<Features>(Features* proto) {
   return proto;
@@ -156,6 +188,18 @@ template <>
 const protobuf::RepeatedPtrField<string>& GetFeatureValues<string>(
     const Feature& feature);
 
+#ifdef USE_TSTRING
+template <>
+const protobuf::RepeatedPtrField<string>& GetFeatureValues<tstring>(
+    const Feature& feature);
+#endif
+
 template <>
 protobuf::RepeatedPtrField<string>* GetFeatureValues<string>(Feature* feature);
+
+#ifdef USE_TSTRING
+template <>
+protobuf::RepeatedPtrField<string>* GetFeatureValues<tstring>(Feature* feature);
+#endif
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h
index 595e0408f71..2569976c280 100644
--- a/tensorflow/core/example/feature_util.h
+++ b/tensorflow/core/example/feature_util.h
@@ -149,6 +149,13 @@ struct RepeatedFieldTrait<float> {
   using Type = protobuf::RepeatedField<float>;
 };
 
+#ifdef USE_TSTRING
+template <>
+struct RepeatedFieldTrait<tstring> {
+  using Type = protobuf::RepeatedPtrField<string>;
+};
+#endif
+
 template <>
 struct RepeatedFieldTrait<string> {
   using Type = protobuf::RepeatedPtrField<string>;
@@ -186,6 +193,11 @@ struct is_string<string> : std::true_type {};
 template <>
 struct is_string<::tensorflow::StringPiece> : std::true_type {};
 
+#ifdef USE_TSTRING
+template <>
+struct is_string<tstring> : std::true_type {};
+#endif
+
 template <typename ValueType>
 struct FeatureTrait<
     ValueType, typename std::enable_if<is_string<ValueType>::value>::type> {
diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD
index 67f6f44485a..23b18a0759b 100644
--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
@@ -8,6 +8,7 @@ load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_tests",
     "tf_copts",
+    "tf_cuda_library",
     "tf_generate_proto_text_sources",
 )
 load(
@@ -28,7 +29,6 @@ exports_files(
     srcs = [
         "allocator_registry.h",
         "attr_value_util.h",
-        "bounds_check.h",
         "cancellation.h",
         "collective.h",
         "common_shape_fns.h",
@@ -42,7 +42,6 @@ exports_files(
         "graph_to_functiondef.h",
         "kernel_def_builder.h",
         "kernel_def_util.h",
-        "log_memory.h",
         "logging.h",
         "lookup_interface.h",
         "memory_types.h",
@@ -60,10 +59,8 @@ exports_files(
         "queue_interface.h",
         "reader_interface.h",
         "reader_op_kernel.h",
-        "register_types.h",
         "register_types_traits.h",
         "rendezvous.h",
-        "resource_handle.h",
         "resource_mgr.h",
         "resource_op_kernel.h",
         "resource_var.h",
@@ -74,23 +71,12 @@ exports_files(
         "shape_inference.h",
         "shared_ptr_variant.h",
         "stats_aggregator.h",
-        "tensor.h",
         "tensor_reference.h",
-        "tensor_shape.h",
         "tensor_slice.h",
-        "tensor_types.h",
         "tensor_util.h",
         "thread_factory.h",
         "tracking_allocator.h",
-        "type_index.h",
-        "type_traits.h",
-        "typed_allocator.h",
-        "types.h",
         "unique_tensor_references.h",
-        "variant.h",
-        "variant_encode_decode.h",
-        "variant_op_registry.h",
-        "variant_tensor_data.h",
         "versions.h",
     ],
     visibility = ["//tensorflow/core:__pkg__"],
@@ -149,76 +135,252 @@ exports_files(
 # Files needed for core:framework_internal_impl.
 filegroup(
     name = "framework_internal_private_hdrs",
-    srcs = glob(
-        [
-            "**/*.h",
-        ],
-        exclude = [
-            "**/*test*",
-            "**/*main.cc",
-            "fake_input.*",
-            "op_gen_lib.*",
-            "reader_base.*",
-        ],
-    ),
+    srcs = [
+        "allocator.h",
+        "allocator_registry.h",
+        "attr_value_util.h",
+        "bfloat16.h",
+        "bounds_check.h",
+        "cancellation.h",
+        "collective.h",
+        "common_shape_fns.h",
+        "control_flow.h",
+        "dataset.h",
+        "dataset_stateful_op_whitelist.h",
+        "device_base.h",
+        "function.h",
+        "function_handle_cache.h",
+        "graph_def_util.h",
+        "graph_to_functiondef.h",
+        "kernel_def_builder.h",
+        "kernel_def_util.h",
+        "local_rendezvous.h",
+        "log_memory.h",
+        "logging.h",
+        "lookup_interface.h",
+        "memory_types.h",
+        "model.h",
+        "node_def_builder.h",
+        "node_def_util.h",
+        "numeric_op.h",
+        "numeric_types.h",
+        "op.h",
+        "op_def_builder.h",
+        "op_def_util.h",
+        "op_kernel.h",
+        "op_segment.h",
+        "ops_util.h",
+        "partial_tensor_shape.h",
+        "queue_interface.h",
+        "reader_interface.h",
+        "reader_op_kernel.h",
+        "register_types.h",
+        "register_types_traits.h",
+        "rendezvous.h",
+        "resource_handle.h",
+        "resource_mgr.h",
+        "resource_op_kernel.h",
+        "resource_var.h",
+        "run_handler.h",
+        "run_handler_util.h",
+        "selective_registration.h",
+        "session_state.h",
+        "shape_inference.h",
+        "shared_ptr_variant.h",
+        "stats_aggregator.h",
+        "tensor.h",
+        "tensor_reference.h",
+        "tensor_shape.h",
+        "tensor_slice.h",
+        "tensor_types.h",
+        "tensor_util.h",
+        "thread_factory.h",
+        "tracking_allocator.h",
+        "type_index.h",
+        "type_traits.h",
+        "typed_allocator.h",
+        "types.h",
+        "unique_tensor_references.h",
+        "variant.h",
+        "variant_encode_decode.h",
+        "variant_op_registry.h",
+        "variant_tensor_data.h",
+        "versions.h",
+    ],
 )
 
 filegroup(
     name = "framework_internal_impl_srcs",
-    srcs = glob(
-        [
-            "**/*.cc",
-        ],
-        exclude = [
-            "**/*test*",
-            "**/*main.cc",
-            "allocator.cc",
-            "allocator_registry.cc",
-            "bfloat16.cc",
-            "cpu_allocator_impl.cc",
-            "fake_input.*",
-            "op_gen_lib.*",
-            "reader_base.*",
-            "tracking_allocator.cc",
-        ],
-    ),
+    srcs = [
+        "attr_value_util.cc",
+        "cancellation.cc",
+        "collective.cc",
+        "common_shape_fns.cc",
+        "dataset.cc",
+        "device_base.cc",
+        "function.cc",
+        "function_handle_cache.cc",
+        "graph_def_util.cc",
+        "graph_to_functiondef.cc",
+        "kernel_def_builder.cc",
+        "kernel_def_util.cc",
+        "load_library.cc",
+        "local_rendezvous.cc",
+        "logging.cc",
+        "lookup_interface.cc",
+        "memory_types.cc",
+        "model.cc",
+        "node_def_builder.cc",
+        "node_def_util.cc",
+        "op.cc",
+        "op_def_builder.cc",
+        "op_def_util.cc",
+        "op_kernel.cc",
+        "op_segment.cc",
+        "ops_util.cc",
+        "rendezvous.cc",
+        "resource_mgr.cc",
+        "run_handler.cc",
+        "run_handler_util.cc",
+        "shape_inference.cc",
+        "tensor_slice.cc",
+        "tensor_util.cc",
+        "unique_tensor_references.cc",
+        "versions.cc",
+    ],
 )
 
 # Files needed for core:mobile_srcs_(no|only)_runtime.
 filegroup(
     name = "mobile_srcs_no_runtime",
-    srcs = glob(
-        [
-            "**/*.h",
-            "**/*.cc",
-        ],
-        exclude = [
-            "**/*test.*",
-            "**/*testutil*",
-            "**/*testlib*",
-            "**/*main.cc",
-            "debug/**/*",
-            "op_gen_*",
-            "dataset.*",
-            "node_def_util.cc",
-            "op_kernel.cc",
-        ],
-    ),
+    srcs = [
+        "allocator.cc",
+        "allocator.h",
+        "allocator_registry.cc",
+        "allocator_registry.h",
+        "attr_value_util.cc",
+        "attr_value_util.h",
+        "bfloat16.cc",
+        "bfloat16.h",
+        "bounds_check.h",
+        "cancellation.cc",
+        "cancellation.h",
+        "collective.cc",
+        "collective.h",
+        "common_shape_fns.cc",
+        "common_shape_fns.h",
+        "control_flow.h",
+        "cpu_allocator_impl.cc",
+        "dataset_stateful_op_whitelist.h",
+        "device_base.cc",
+        "device_base.h",
+        "fake_input.cc",
+        "fake_input.h",
+        "function.cc",
+        "function.h",
+        "function_handle_cache.cc",
+        "function_handle_cache.h",
+        "graph_def_util.cc",
+        "graph_def_util.h",
+        "graph_to_functiondef.cc",
+        "graph_to_functiondef.h",
+        "kernel_def_builder.cc",
+        "kernel_def_builder.h",
+        "kernel_def_util.cc",
+        "kernel_def_util.h",
+        "load_library.cc",
+        "local_rendezvous.cc",
+        "local_rendezvous.h",
+        "log_memory.cc",
+        "log_memory.h",
+        "logging.cc",
+        "logging.h",
+        "lookup_interface.cc",
+        "lookup_interface.h",
+        "memory_types.cc",
+        "memory_types.h",
+        "model.cc",
+        "model.h",
+        "node_def_builder.cc",
+        "node_def_builder.h",
+        "node_def_util.h",
+        "numeric_op.h",
+        "numeric_types.h",
+        "op.cc",
+        "op.h",
+        "op_def_builder.cc",
+        "op_def_builder.h",
+        "op_def_util.cc",
+        "op_def_util.h",
+        "op_kernel.h",
+        "op_segment.cc",
+        "op_segment.h",
+        "ops_util.cc",
+        "ops_util.h",
+        "partial_tensor_shape.h",
+        "queue_interface.h",
+        "reader_base.cc",
+        "reader_base.h",
+        "reader_interface.h",
+        "reader_op_kernel.h",
+        "register_types.h",
+        "register_types_traits.h",
+        "rendezvous.cc",
+        "rendezvous.h",
+        "resource_handle.cc",
+        "resource_handle.h",
+        "resource_mgr.cc",
+        "resource_mgr.h",
+        "resource_op_kernel.h",
+        "resource_var.h",
+        "run_handler.cc",
+        "run_handler.h",
+        "run_handler_util.cc",
+        "run_handler_util.h",
+        "selective_registration.h",
+        "session_state.h",
+        "shape_inference.cc",
+        "shape_inference.h",
+        "shared_ptr_variant.h",
+        "stats_aggregator.h",
+        "tensor.cc",
+        "tensor.h",
+        "tensor_reference.h",
+        "tensor_shape.cc",
+        "tensor_shape.h",
+        "tensor_slice.cc",
+        "tensor_slice.h",
+        "tensor_types.h",
+        "tensor_util.cc",
+        "tensor_util.h",
+        "thread_factory.h",
+        "tracking_allocator.cc",
+        "tracking_allocator.h",
+        "type_index.h",
+        "type_traits.h",
+        "typed_allocator.cc",
+        "typed_allocator.h",
+        "types.cc",
+        "types.h",
+        "unique_tensor_references.cc",
+        "unique_tensor_references.h",
+        "variant.cc",
+        "variant.h",
+        "variant_encode_decode.h",
+        "variant_op_registry.cc",
+        "variant_op_registry.h",
+        "variant_tensor_data.cc",
+        "variant_tensor_data.h",
+        "versions.cc",
+        "versions.h",
+    ],
 )
 
 filegroup(
     name = "mobile_srcs_only_runtime",
-    srcs = glob(
-        [
-            "dataset.*",
-        ],
-        exclude = [
-            "**/*test.*",
-            "**/*testutil*",
-            "**/*testlib*",
-            "**/*main.cc",
-        ],
-    ) + [
+    srcs = [
+        "dataset.cc",
+        "dataset.h",
         "node_def_util.cc",
         "op_kernel.cc",
     ],
@@ -227,25 +389,25 @@ filegroup(
 filegroup(
     name = "android_test_hdrs",
     srcs = [
-        ":fake_input.h",
-        ":shape_inference_testutil.h",
-        ":tensor_testutil.h",
+        "fake_input.h",
+        "shape_inference_testutil.h",
+        "tensor_testutil.h",
     ],
 )
 
 filegroup(
     name = "android_test_srcs",
     srcs = [
+        "fake_input.cc",
         ":android_test_srcs_no_core",
-        ":fake_input.cc",
     ],
 )
 
 filegroup(
     name = "android_test_srcs_no_core",
     srcs = [
-        ":shape_inference_testutil.cc",
-        ":tensor_testutil.cc",
+        "shape_inference_testutil.cc",
+        "tensor_testutil.cc",
     ],
 )
 
@@ -259,10 +421,8 @@ cc_library(
     srcs = [
         "allocator.cc",
         "allocator_registry.h",
-        "numeric_types.h",
         "tracking_allocator.cc",
         "tracking_allocator.h",
-        "type_traits.h",
     ],
     hdrs = [
         "allocator.h",
@@ -270,11 +430,28 @@ cc_library(
     features = ["parse_headers"],
     visibility = ["//tensorflow/core:__subpackages__"],
     deps = [
+        ":numeric_types",
+        ":type_traits",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
-        "//third_party/eigen3",
-        "//tensorflow/core:lib",
-    ] + if_static(extra_deps = [":allocator_registry_impl"]),
+    ] + if_static(
+        extra_deps = [
+            ":allocator_registry_impl",
+            "//tensorflow/core/lib/gtl:inlined_vector",
+            "//tensorflow/core/lib/strings:strcat",
+            "//tensorflow/core/lib/strings:stringprintf",
+            "//tensorflow/core/platform:env",
+            "//tensorflow/core/platform:logging",
+            "//tensorflow/core/platform:macros",
+            "//tensorflow/core/platform:mutex",
+            "//tensorflow/core/platform:platform_port",
+            "//tensorflow/core/platform:thread_annotations",
+            "//tensorflow/core/platform:types",
+        ],
+        otherwise = [
+            "//tensorflow/core:lib",
+        ],
+    ),
     alwayslink = 1,
 )
 
@@ -289,14 +466,21 @@ cc_library(
         "allocator_registry.cc",
         "allocator_registry.h",
         "cpu_allocator_impl.cc",
-        "numeric_types.h",
         "tracking_allocator.h",
-        "type_traits.h",
     ],
     visibility = ["//tensorflow/core:__subpackages__"],
     deps = [
-        "//tensorflow/core:lib",
-        "//third_party/eigen3",
+        ":numeric_types",
+        ":type_traits",
+        "//tensorflow/core/lib/gtl:inlined_vector",
+        "//tensorflow/core/lib/strings:strcat",
+        "//tensorflow/core/lib/strings:stringprintf",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:macros",
+        "//tensorflow/core/platform:mutex",
+        "//tensorflow/core/platform:platform_port",
+        "//tensorflow/core/platform:thread_annotations",
+        "//tensorflow/core/platform:types",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
     ],
@@ -400,6 +584,7 @@ cc_library(
         "//tensorflow/core/platform:byte_order",
         "//tensorflow/core/platform:types",
     ],
+    alwayslink = 1,
 )
 
 cc_library(
@@ -413,6 +598,144 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "bounds_check",
+    hdrs = ["bounds_check.h"],
+    visibility = ["//tensorflow/core:__pkg__"],
+    deps = [
+        "//tensorflow/core/platform:macros",
+        "//third_party/eigen3",
+    ],
+)
+
+cc_library(
+    name = "tensor_shape",
+    srcs = ["tensor_shape.cc"],
+    hdrs = ["tensor_shape.h"],
+    visibility = ["//tensorflow/core:__pkg__"],
+    deps = [
+        ":bounds_check",
+        ":tensor_shape_proto_cc",
+        ":types_proto_cc",
+        "//tensorflow/core/lib/core:errors",
+        "//tensorflow/core/lib/core:status",
+        "//tensorflow/core/lib/core:stringpiece",
+        "//tensorflow/core/lib/gtl:array_slice",
+        "//tensorflow/core/lib/gtl:inlined_vector",
+        "//tensorflow/core/lib/strings:str_util",
+        "//tensorflow/core/lib/strings:strcat",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/util:overflow",
+        "//third_party/eigen3",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "resource_handle",
+    srcs = ["resource_handle.cc"],
+    hdrs = ["resource_handle.h"],
+    visibility = ["//tensorflow/core:__pkg__"],
+    deps = [
+        ":resource_handle_proto_cc",
+        ":tensor_shape",
+        ":types_proto_cc",
+        "//tensorflow/core/lib/strings:strcat",
+        "//tensorflow/core/platform:tensor_coding",
+        "//tensorflow/core/platform:types",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "type_index",
+    hdrs = ["type_index.h"],
+    visibility = ["//visibility:private"],
+    deps = ["//tensorflow/core/platform:types"],
+)
+
+cc_library(
+    name = "tensor_types",
+    hdrs = ["tensor_types.h"],
+    visibility = ["//visibility:private"],
+    deps = ["//third_party/eigen3"],
+)
+
+cc_library(
+    name = "type_traits",
+    hdrs = ["type_traits.h"],
+    visibility = ["//visibility:private"],
+    deps = [
+        ":numeric_types",
+        "//tensorflow/core/platform:types",
+    ],
+)
+
+tf_cuda_library(
+    name = "tensor",
+    srcs = [
+        "log_memory.cc",
+        "tensor.cc",
+        "typed_allocator.cc",
+        "types.cc",
+        "variant.cc",
+        "variant_op_registry.cc",
+        "variant_tensor_data.cc",
+    ],
+    hdrs = [
+        "log_memory.h",
+        "register_types.h",
+        "tensor.h",
+        "typed_allocator.h",
+        "types.h",
+        "variant.h",
+        "variant_encode_decode.h",
+        "variant_op_registry.h",
+        "variant_tensor_data.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+    deps = [
+        ":allocation_description_proto_cc",
+        ":allocator",
+        ":bfloat16",
+        ":log_memory_proto_cc",
+        ":numeric_types",
+        ":resource_handle",
+        ":resource_handle_proto_cc",
+        ":tensor_description_proto_cc",
+        ":tensor_proto_cc",
+        ":tensor_shape",
+        ":tensor_types",
+        ":type_index",
+        ":type_traits",
+        ":types_proto_cc",
+        "//tensorflow/core/lib/core:coding",
+        "//tensorflow/core/lib/core:errors",
+        "//tensorflow/core/lib/core:refcount",
+        "//tensorflow/core/lib/core:status",
+        "//tensorflow/core/lib/core:stringpiece",
+        "//tensorflow/core/lib/gtl:array_slice",
+        "//tensorflow/core/lib/gtl:flatmap",
+        "//tensorflow/core/lib/gtl:inlined_vector",
+        "//tensorflow/core/lib/hash",
+        "//tensorflow/core/lib/strings:str_util",
+        "//tensorflow/core/lib/strings:strcat",
+        "//tensorflow/core/platform:abi",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:macros",
+        "//tensorflow/core/platform:platform_port",
+        "//tensorflow/core/platform:protobuf",
+        "//tensorflow/core/platform:strcat",
+        "//tensorflow/core/platform:tensor_coding",
+        "//tensorflow/core/platform:types",
+        "//tensorflow/core/public:version",
+        "//third_party/eigen3",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
 # Files whose users still need to be migrated from core:framework to the
 # above targets.
 # TODO(gonnet): Remove these files once targets depending on them have
@@ -421,13 +744,28 @@ exports_files(
     srcs = [
         "allocator.h",
         "bfloat16.h",
+        "bounds_check.h",
         "fake_input.h",
         "function_testlib.h",
+        "log_memory.h",
         "numeric_types.h",
         "op_gen_lib.h",
         "reader_base.h",
+        "register_types.h",
+        "resource_handle.h",
         "shape_inference_testutil.h",
+        "tensor.h",
+        "tensor_shape.h",
         "tensor_testutil.h",
+        "tensor_types.h",
+        "type_index.h",
+        "type_traits.h",
+        "typed_allocator.h",
+        "types.h",
+        "variant.h",
+        "variant_encode_decode.h",
+        "variant_op_registry.h",
+        "variant_tensor_data.h",
     ],
 )
 
@@ -515,6 +853,18 @@ tf_cc_tests(
     ],
 )
 
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "op_gen_lib.h",
+        "rendezvous.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
+
 # All framewrok protos are self-contained, i.e. they only import other
 # protos from the same package, so we can build the protos here and then
 # link them from core:protos_all without circular dependencies.
diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc
index f911b5b7b6f..5b5cf649f6b 100644
--- a/tensorflow/core/framework/attr_value_util.cc
+++ b/tensorflow/core/framework/attr_value_util.cc
@@ -481,6 +481,19 @@ DEFINE_SET_ATTR_VALUE_LIST(const std::vector<bool>&, b)
 DEFINE_SET_ATTR_VALUE_LIST(std::initializer_list<bool>, b)
 DEFINE_SET_ATTR_VALUE_BOTH(DataType, type)
 
+#ifdef USE_TSTRING
+void SetAttrValue(const tstring& value, AttrValue* out) {
+  out->set_s(value.data(), value.size());
+}
+
+void SetAttrValue(gtl::ArraySlice<tstring> value, AttrValue* out) {
+  out->mutable_list()->Clear();
+  for (const auto& v : value) {
+    out->mutable_list()->add_s(v.data(), v.size());
+  }
+}
+#endif
+
 void SetAttrValue(StringPiece value, AttrValue* out) {
   out->set_s(value.data(), value.size());
 }
diff --git a/tensorflow/core/framework/attr_value_util.h b/tensorflow/core/framework/attr_value_util.h
index e302e656805..966e716e39a 100644
--- a/tensorflow/core/framework/attr_value_util.h
+++ b/tensorflow/core/framework/attr_value_util.h
@@ -52,6 +52,7 @@ bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out);
 
 // Sets *out based on the type of value.
 void SetAttrValue(const string& value, AttrValue* out);
+void SetAttrValue(const tstring& value, AttrValue* out);
 void SetAttrValue(const char* value, AttrValue* out);
 void SetAttrValue(StringPiece value, AttrValue* out);
 void SetAttrValue(int64 value, AttrValue* out);
@@ -68,6 +69,7 @@ void SetAttrValue(const TensorProto& value, AttrValue* out);
 void SetAttrValue(const NameAttrList& value, AttrValue* out);
 
 void SetAttrValue(gtl::ArraySlice<string> value, AttrValue* out);
+void SetAttrValue(gtl::ArraySlice<tstring> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<const char*> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<StringPiece> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<int64> value, AttrValue* out);
diff --git a/tensorflow/core/framework/cpu_allocator_impl.cc b/tensorflow/core/framework/cpu_allocator_impl.cc
index e2bab9900e1..59b94b606f4 100644
--- a/tensorflow/core/framework/cpu_allocator_impl.cc
+++ b/tensorflow/core/framework/cpu_allocator_impl.cc
@@ -80,7 +80,7 @@ class CPUAllocator : public Allocator {
       ++single_allocation_warning_count_;
       LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
                    << 100 * kLargeAllocationWarningThreshold
-                   << "% of system memory.";
+                   << "% of free system memory.";
     }
 
     void* p = port::AlignedMalloc(num_bytes, alignment);
@@ -99,7 +99,7 @@ class CPUAllocator : public Allocator {
         ++total_allocation_warning_count_;
         LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
                      << "exceeds " << 100 * kTotalAllocationWarningThreshold
-                     << "% of system memory";
+                     << "% of free system memory";
       }
     }
     return p;
diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index 71538648537..a5f3499ef11 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -478,10 +478,8 @@ const char DatasetBase::kDatasetGraphKey[] = "_DATASET_GRAPH";
 const char DatasetBase::kDatasetGraphOutputNodeKey[] =
     "_DATASET_GRAPH_OUTPUT_NODE";
 
-BackgroundWorker::BackgroundWorker(Env* env, const string& name) {
-  thread_.reset(env->StartThread({} /* thread_options */, name,
-                                 [this]() { WorkerLoop(); }));
-}
+BackgroundWorker::BackgroundWorker(Env* env, const char* name)
+    : env_(env), name_(name) {}
 
 BackgroundWorker::~BackgroundWorker() {
   {
@@ -500,6 +498,10 @@ BackgroundWorker::~BackgroundWorker() {
 void BackgroundWorker::Schedule(std::function<void()> work_item) {
   {
     mutex_lock l(mu_);
+    if (!thread_) {
+      thread_ = absl::WrapUnique(env_->StartThread(
+          {} /* thread_options */, name_, [this]() { WorkerLoop(); }));
+    }
     work_queue_.push_back(std::move(work_item));
   }
   cond_var_.notify_one();
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 0e7a34cd26a..074c8ff547b 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -70,25 +70,48 @@ class DatasetBase;
 class SerializationContext;
 
 // Interface for reading values from a key-value store.
-// Used for restoring iterator state.
+// Used for restoring iterator state. This class is thread safe.
+// Please see comment on IteratorStateWriter for guidance around using the
+// Read*(key, val) vs Read*(name, key, val).
 class IteratorStateReader {
  public:
   virtual Status ReadScalar(StringPiece key, int64* val) = 0;
   virtual Status ReadScalar(StringPiece key, tstring* val) = 0;
   virtual Status ReadTensor(StringPiece key, Tensor* val) = 0;
+
+  virtual Status ReadScalar(StringPiece name, StringPiece key, int64* val) = 0;
+  virtual Status ReadScalar(StringPiece name, StringPiece key,
+                            tstring* val) = 0;
+  virtual Status ReadTensor(StringPiece name, StringPiece key, Tensor* val) = 0;
+
   virtual bool Contains(StringPiece key) = 0;
+  virtual bool Contains(StringPiece name, StringPiece key) = 0;
 
   virtual ~IteratorStateReader() {}
 };
 
 // Interface for writing values to a key-value store.
-// Used for saving iterator state.
+// Used for saving iterator state. Not thread safe.
+// The IteratorStateWriter creates a tensor for each unique iterator name it
+// sees. For the Write*(key, val) API's the key is expected to encode this
+// name as keys are required to be produced using the full_name() method.
+// Each tensor has an upper limit of 2 GB and so if the state for an iterator
+// might exceed the 2 GB limit, you can pass an explicit name in via the
+// Write*(name, key, val) APIs allowing you to further split up the state
+// into more manageable chunks.
 class IteratorStateWriter {
  public:
   virtual Status WriteScalar(StringPiece key, const int64 val) = 0;
   virtual Status WriteScalar(StringPiece key, const tstring& val) = 0;
   virtual Status WriteTensor(StringPiece key, const Tensor& val) = 0;
 
+  virtual Status WriteScalar(StringPiece name, StringPiece key,
+                             const int64 val) = 0;
+  virtual Status WriteScalar(StringPiece name, StringPiece key,
+                             const tstring& val) = 0;
+  virtual Status WriteTensor(StringPiece name, StringPiece key,
+                             const Tensor& val) = 0;
+
   virtual ~IteratorStateWriter() {}
 };
 
@@ -475,6 +498,14 @@ class SerializationContext {
     // latter makes sense to do when performing data agnostic graph rewrites to
     // reduce the memory usage.
     bool serialize_data_tensors = true;
+
+    // Indicates whether datasets that use random seeds should have the values
+    // of random seeds serialized or not. If the values of random seeds are
+    // serialized, the deserialized dataset will have the same seeds as the
+    // original dataset. Otherwise, the deserialized dataset will use different
+    // seeds. This param does not affect datasets that use fixed seeds; fixed
+    // seeds will always be preserved.
+    bool preserve_random_seeds = true;
   };
 
   explicit SerializationContext(Params params) : params_(params) {}
@@ -491,6 +522,8 @@ class SerializationContext {
 
   bool serialize_data_tensors() const { return params_.serialize_data_tensors; }
 
+  bool preserve_random_seeds() const { return params_.preserve_random_seeds; }
+
  private:
   Params params_;
 
@@ -887,7 +920,17 @@ class DatasetBaseIterator : public IteratorBase {
   }
 
   Status Save(SerializationContext* ctx, IteratorStateWriter* writer) final {
-    TF_RETURN_IF_ERROR(params_.dataset->CheckExternalState());
+    Status s = params_.dataset->CheckExternalState();
+    if (!s.ok()) {
+      if (ctx->external_state_policy() ==
+          SerializationContext::ExternalStatePolicy::kWarn) {
+        LOG(WARNING) << "Dataset contains external state: " << s.ToString();
+      }
+      if (ctx->external_state_policy() ==
+          SerializationContext::ExternalStatePolicy::kFail) {
+        return s;
+      }
+    }
     return IteratorBase::Save(ctx, writer);
   }
 
@@ -1101,7 +1144,7 @@ class BinaryDatasetOpKernel : public DatasetOpKernel {
 // overhead is tolerable.
 class BackgroundWorker {
  public:
-  BackgroundWorker(Env* env, const string& name);
+  BackgroundWorker(Env* env, const char* name);
 
   ~BackgroundWorker();
 
@@ -1110,6 +1153,9 @@ class BackgroundWorker {
  private:
   void WorkerLoop();
 
+  Env* const env_;
+  const char* const name_;
+
   std::unique_ptr<Thread> thread_;
   mutex mu_;
   condition_variable cond_var_;
diff --git a/tensorflow/core/framework/memory_types.cc b/tensorflow/core/framework/memory_types.cc
index 246f50acd26..5393b162e80 100644
--- a/tensorflow/core/framework/memory_types.cc
+++ b/tensorflow/core/framework/memory_types.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <utility>
 
+#include "tensorflow/compiler/jit/defs.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/kernel_def.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -97,6 +99,11 @@ Status MemoryTypesForNode(const OpRegistryInterface* op_registry,
   inp_mtypes->clear();
   out_mtypes->clear();
 
+  bool has_xla_compile = [&] {
+    const auto& it = ndef.attr().find(kXlaMustCompileAttr);
+    return it != ndef.attr().end() && it->second.b();
+  }();
+
   // For functions (which have no KernelDef) and their gradients, we can only
   // best-effort derive the memory type from the data type. For now, we assume
   // int32 is always on host memory and other types are always on device memory.
@@ -104,7 +111,7 @@ Status MemoryTypesForNode(const OpRegistryInterface* op_registry,
   // to derive the correct input/output memory types. We should also split
   // host-memory and non host-memory arguments into separate type lists.
   if (!status.ok() || IsFunctionCallOp(ndef.op())) {
-    if (device_type.type_string() == "TPU") {
+    if (device_type.type_string() == "TPU" || has_xla_compile) {
       // Here we assume that if tf.function() is called within
       // "with tf.device('/device:TPU:0')", the whole function will be compiled
       // and executed on TPU. This is true today, but when we implement auto
diff --git a/tensorflow/core/framework/node_def_builder.cc b/tensorflow/core/framework/node_def_builder.cc
index 9011b61715e..98ca1d38bee 100644
--- a/tensorflow/core/framework/node_def_builder.cc
+++ b/tensorflow/core/framework/node_def_builder.cc
@@ -309,6 +309,9 @@ ATTR(const NameAttrList&)
 ATTR(gtl::ArraySlice<StringPiece>)
 ATTR(gtl::ArraySlice<const char*>)
 ATTR(gtl::ArraySlice<string>)
+#ifdef USE_TSTRING
+ATTR(gtl::ArraySlice<tstring>)
+#endif
 ATTR(gtl::ArraySlice<int32>)
 ATTR(gtl::ArraySlice<int64>)
 ATTR(gtl::ArraySlice<float>)
diff --git a/tensorflow/core/framework/node_def_builder.h b/tensorflow/core/framework/node_def_builder.h
index b4509662e15..60d4f9fdb28 100644
--- a/tensorflow/core/framework/node_def_builder.h
+++ b/tensorflow/core/framework/node_def_builder.h
@@ -109,6 +109,9 @@ class NodeDefBuilder {
   NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<StringPiece> value);
   NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<const char*> value);
   NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<string> value);
+#ifdef USE_TSTRING
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<tstring> value);
+#endif
   NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<int32> value);
   NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<int64> value);
   NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<float> value);
diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index b9b953fc66b..de384500a68 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc
@@ -285,7 +285,10 @@ bool AttrSlice::EqualAttrs(AttrSlice other, Scratch* scratch) const {
     }                                                                     \
     return true;                                                          \
   }
-
+#ifdef USE_TSTRING
+DEFINE_GET_ATTR(tstring, s, "string", emplace_back, v, ;)
+DEFINE_TRY_GET_ATTR(tstring, s, "string", emplace_back, v, ;)
+#endif
 DEFINE_GET_ATTR(string, s, "string", emplace_back, v, ;)
 DEFINE_TRY_GET_ATTR(string, s, "string", emplace_back, v, ;)
 DEFINE_GET_ATTR(int64, i, "int", emplace_back, v, ;)
@@ -740,6 +743,7 @@ void AddDefaultsToNodeDef(const OpDef& op_def, NodeDef* node_def) {
 
 namespace {
 
+using ::tensorflow::tstring;
 using ::tensorflow::strings::Scanner;
 
 bool IsValidNodeName(StringPiece sp) {
diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index 8f58607701b..94135bf286a 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -189,6 +189,8 @@ bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name);
 // a matching type, a non-ok status will be returned.
 Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    string* value);  // type: "string"
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+                   tstring* value);  // type: "tstring"
 Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    int64* value);  // type: "int"
 Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
@@ -209,6 +211,8 @@ Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    Tensor* value);  // type: "tensor"
 Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<string>* value);  // type "list(string)"
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+                   std::vector<tstring>* value);  // type "list(tstring)"
 Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<int64>* value);  // type "list(int)"
 Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
@@ -273,6 +277,8 @@ bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
 
 bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                     std::vector<string>* value);  // type: "list(string)"
+bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+                    std::vector<tstring>* value);  // type: "list(tstring)"
 bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                     std::vector<int32>* value);  // type: "list(int)"
 bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 3236d1897c0..c8ac08bee4e 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -84,6 +84,27 @@ struct NumTraits<tensorflow::bfloat16>
   }
 };
 
+#ifdef USE_TSTRING
+template <>
+struct NumTraits<tensorflow::tstring> : GenericNumTraits<tensorflow::tstring> {
+  enum {
+    RequireInitialization = 1,
+    ReadCost = HugeCost,
+    AddCost = HugeCost,
+    MulCost = HugeCost
+  };
+
+  static inline int digits10() { return 0; }
+
+ private:
+  static inline tensorflow::tstring epsilon();
+  static inline tensorflow::tstring dummy_precision();
+  static inline tensorflow::tstring lowest();
+  static inline tensorflow::tstring highest();
+  static inline tensorflow::tstring infinity();
+  static inline tensorflow::tstring quiet_NaN();
+};
+#endif  // USE_TSTRING
 
 using ::tensorflow::operator==;
 using ::tensorflow::operator!=;
diff --git a/tensorflow/core/framework/op.cc b/tensorflow/core/framework/op.cc
index 4fafa56e7ac..76b0e1c678e 100644
--- a/tensorflow/core/framework/op.cc
+++ b/tensorflow/core/framework/op.cc
@@ -18,6 +18,8 @@ limitations under the License.
 #include <algorithm>
 #include <memory>
 #include <vector>
+
+#include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -58,24 +60,43 @@ void OpRegistry::Register(const OpRegistrationDataFactory& op_data_factory) {
   }
 }
 
+namespace {
+// Helper function that returns Status message for failed LookUp.
+Status OpNotFound(const string& op_type_name) {
+  Status status = errors::NotFound(
+      "Op type not registered '", op_type_name, "' in binary running on ",
+      port::Hostname(), ". ",
+      "Make sure the Op and Kernel are registered in the binary running in "
+      "this process. Note that if you are loading a saved graph which used ops "
+      "from tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done "
+      "before importing the graph, as contrib ops are lazily registered when "
+      "the module is first accessed.");
+  VLOG(1) << status.ToString();
+  return status;
+}
+}  // namespace
+
 Status OpRegistry::LookUp(const string& op_type_name,
                           const OpRegistrationData** op_reg_data) const {
+  if ((*op_reg_data = LookUp(op_type_name))) return Status::OK();
+  return OpNotFound(op_type_name);
+}
+
+const OpRegistrationData* OpRegistry::LookUp(const string& op_type_name) const {
   {
     tf_shared_lock l(mu_);
     if (initialized_) {
       if (const OpRegistrationData* res =
               gtl::FindWithDefault(registry_, op_type_name, nullptr)) {
-        *op_reg_data = res;
-        return Status::OK();
+        return res;
       }
     }
   }
-  return LookUpSlow(op_type_name, op_reg_data);
+  return LookUpSlow(op_type_name);
 }
 
-Status OpRegistry::LookUpSlow(const string& op_type_name,
-                              const OpRegistrationData** op_reg_data) const {
-  *op_reg_data = nullptr;
+const OpRegistrationData* OpRegistry::LookUpSlow(
+    const string& op_type_name) const {
   const OpRegistrationData* res = nullptr;
 
   bool first_call = false;
@@ -106,20 +127,8 @@ Status OpRegistry::LookUpSlow(const string& op_type_name,
         }
       }
     }
-    Status status = errors::NotFound(
-        "Op type not registered '", op_type_name, "' in binary running on ",
-        port::Hostname(), ". ",
-        "Make sure the Op and Kernel are registered in the "
-        "binary running in this process. Note that if you "
-        "are loading a saved graph which used ops from "
-        "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done "
-        "before importing the graph, as contrib ops are lazily registered "
-        "when the module is first accessed.");
-    VLOG(1) << status.ToString();
-    return status;
   }
-  *op_reg_data = res;
-  return Status::OK();
+  return res;
 }
 
 void OpRegistry::GetRegisteredOps(std::vector<OpDef>* op_defs) {
@@ -260,23 +269,19 @@ OpListOpRegistry::~OpListOpRegistry() {
   for (const auto& e : index_) delete e.second;
 }
 
-Status OpListOpRegistry::LookUp(const string& op_type_name,
-                                const OpRegistrationData** op_reg_data) const {
+const OpRegistrationData* OpListOpRegistry::LookUp(
+    const string& op_type_name) const {
   auto iter = index_.find(op_type_name);
   if (iter == index_.end()) {
-    *op_reg_data = nullptr;
-    return errors::NotFound(
-        "Op type not registered '", op_type_name, "' in binary running on ",
-        port::Hostname(), ". ",
-        "Make sure the Op and Kernel are registered in the "
-        "binary running in this process. Note that if you "
-        "are loading a saved graph which used ops from "
-        "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done "
-        "before importing the graph, as contrib ops are lazily registered "
-        "when the module is first accessed.");
+    return nullptr;
   }
-  *op_reg_data = iter->second;
-  return Status::OK();
+  return iter->second;
+}
+
+Status OpListOpRegistry::LookUp(const string& op_type_name,
+                                const OpRegistrationData** op_reg_data) const {
+  if ((*op_reg_data = LookUp(op_type_name))) return Status::OK();
+  return OpNotFound(op_type_name);
 }
 
 // Other registration ---------------------------------------------------------
diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h
index 026ef47dabb..3e734a6d590 100644
--- a/tensorflow/core/framework/op.h
+++ b/tensorflow/core/framework/op.h
@@ -74,6 +74,9 @@ class OpRegistry : public OpRegistryInterface {
   Status LookUp(const string& op_type_name,
                 const OpRegistrationData** op_reg_data) const override;
 
+  // Returns OpRegistrationData* of registered op type, else returns nullptr.
+  const OpRegistrationData* LookUp(const string& op_type_name) const;
+
   // Fills *ops with all registered OpDefs (except those with names
   // starting with '_' if include_internal == false) sorted in
   // ascending alphabetical order.
@@ -144,8 +147,7 @@ class OpRegistry : public OpRegistryInterface {
   Status RegisterAlreadyLocked(const OpRegistrationDataFactory& op_data_factory)
       const EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
-  Status LookUpSlow(const string& op_type_name,
-                    const OpRegistrationData** op_reg_data) const;
+  const OpRegistrationData* LookUpSlow(const string& op_type_name) const;
 
   mutable mutex mu_;
   // Functions in deferred_ may only be called with mu_ held.
@@ -172,6 +174,9 @@ class OpListOpRegistry : public OpRegistryInterface {
   Status LookUp(const string& op_type_name,
                 const OpRegistrationData** op_reg_data) const override;
 
+  // Returns OpRegistrationData* of op type in list, else returns nullptr.
+  const OpRegistrationData* LookUp(const string& op_type_name) const;
+
  private:
   // Values are owned.
   std::unordered_map<string, const OpRegistrationData*> index_;
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index c097c2fd044..c6f20a531a7 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -109,6 +109,8 @@ OpKernel::OpKernel(OpKernelConstruction* context,
                            context->output_memory_types().end()),
       input_name_map_(context->num_inputs()),
       output_name_map_(context->num_outputs()),
+      name_view_(def_->name()),
+      type_string_view_(def_->op()),
       graph_def_version_(context->graph_def_version()),
       is_deferred_(is_deferred),
       cost_estimate_(OpKernel::kInitialCostEstimateCycles) {
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 5bcb774a31a..ea82aff6442 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -169,7 +169,9 @@ class OpKernel {
   // Accessors.
   const NodeDef& def() const { return *def_; }
   const string& name() const;              // Same as def().name()
+  absl::string_view name_view() const { return name_view_; }
   const string& type_string() const;       // Same as def().op()
+  absl::string_view type_string_view() const { return type_string_view_; }
   const string& requested_device() const;  // Same as def().device()
 
   int num_inputs() const { return input_types_.size(); }
@@ -228,6 +230,8 @@ class OpKernel {
   const MemoryTypeVector output_memory_types_;
   NameRangeMap input_name_map_;
   NameRangeMap output_name_map_;
+  const absl::string_view name_view_;
+  const absl::string_view type_string_view_;
   const int graph_def_version_;
   const bool is_deferred_;
   bool expensive_;
@@ -258,10 +262,10 @@ class AsyncOpKernel : public OpKernel {
   typedef std::function<void()> DoneCallback;
   virtual void ComputeAsync(OpKernelContext* context, DoneCallback done) = 0;
 
-  AsyncOpKernel* AsAsync() final { return this; }
-  const AsyncOpKernel* AsAsync() const final { return this; }
+  AsyncOpKernel* AsAsync() override { return this; }
+  const AsyncOpKernel* AsAsync() const override { return this; }
 
-  void Compute(OpKernelContext* context) final;
+  void Compute(OpKernelContext* context) override;
 };
 
 // Wraps a tensor that is held by an Op across calls to Compute(). For memory
diff --git a/tensorflow/core/framework/resource_mgr.cc b/tensorflow/core/framework/resource_mgr.cc
index 34ef6e694d3..67fc398f82d 100644
--- a/tensorflow/core/framework/resource_mgr.cc
+++ b/tensorflow/core/framework/resource_mgr.cc
@@ -38,7 +38,6 @@ ResourceHandle MakeResourceHandle(
     const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes) {
   ResourceHandle result;
   result.set_device(device.name());
-  string actual_container;
   result.set_container(container);
   if (name == ResourceHandle::ANONYMOUS_NAME) {
     result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 2b717e5e59c..be26a3125aa 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -851,7 +851,6 @@ class SubBuffer : public TensorBuffer {
 
  private:
   TensorBuffer* root_;
-  T* data_;
   int64 elem_;
 
   ~SubBuffer() override { root_->Unref(); }
diff --git a/tensorflow/core/framework/variant.h b/tensorflow/core/framework/variant.h
index df705aa1c0b..3200d7c81fa 100644
--- a/tensorflow/core/framework/variant.h
+++ b/tensorflow/core/framework/variant.h
@@ -198,6 +198,25 @@ class Variant {
     return *this;
   }
 
+  // Constructs a value of type T with the given args in-place in this Variant.
+  // Returns a reference to the newly constructed value.
+  // The signature is based on std::variant<Types...>::emplace() in C++17.
+  template <typename T, class... Args>
+  T& emplace(Args&&... args) {
+    ResetMemory();
+    is_inline_ = CanInlineType<T>();
+    if (is_inline_) {
+      new (&inline_value_)
+          InlineValue(InlineValue::Tag<T>{}, std::forward<Args>(args)...);
+      return static_cast<Variant::Value<T>*>(inline_value_.AsValueInterface())
+          ->value;
+    } else {
+      new (&heap_value_) HeapValue(
+          absl::make_unique<Value<T>>(InPlace(), std::forward<Args>(args)...));
+      return static_cast<Variant::Value<T>*>(heap_value_.get())->value;
+    }
+  }
+
   bool is_empty() const { return GetValue() == nullptr; }
 
   void clear() noexcept;
diff --git a/tensorflow/core/framework/variant_test.cc b/tensorflow/core/framework/variant_test.cc
index 46d8dff0a61..83dc7204447 100644
--- a/tensorflow/core/framework/variant_test.cc
+++ b/tensorflow/core/framework/variant_test.cc
@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/variant.h"
+
+#include <cstddef>
 #if defined(__x86_64__)
 #include <xmmintrin.h>
 #endif
@@ -214,6 +216,88 @@ TEST(VariantTest, MoveAndCopyBetweenBigAndSmallVariants) {
   EXPECT_EQ(deleted_small, 1);
 }
 
+namespace {
+
+template <bool BIG>
+class MoveAndCopyCounter {
+ public:
+  MoveAndCopyCounter()
+      : big_{}, move_counter_(nullptr), copy_counter_(nullptr) {}
+  explicit MoveAndCopyCounter(int* move_counter, int* copy_counter)
+      : big_{}, move_counter_(move_counter), copy_counter_(copy_counter) {}
+
+  MoveAndCopyCounter& operator=(const MoveAndCopyCounter& rhs) {
+    copy_counter_ = rhs.copy_counter_;
+    if (copy_counter_) ++*copy_counter_;
+    return *this;
+  }
+  MoveAndCopyCounter& operator=(MoveAndCopyCounter&& rhs) {
+    move_counter_ = rhs.move_counter_;
+    if (move_counter_) ++*move_counter_;
+    return *this;
+  }
+  MoveAndCopyCounter(MoveAndCopyCounter&& rhs) {
+    move_counter_ = rhs.move_counter_;
+    if (move_counter_) ++*move_counter_;
+  }
+  MoveAndCopyCounter(const MoveAndCopyCounter& rhs) {
+    copy_counter_ = rhs.copy_counter_;
+    if (copy_counter_) ++*copy_counter_;
+  }
+  char big_[BIG ? 256 : 0];
+  int* move_counter_;
+  int* copy_counter_;
+
+  string TypeName() const { return "MoveAndCopyCounter"; }
+  void Encode(VariantTensorData* data) const {}
+  bool Decode(VariantTensorData data) { return false; }
+};
+
+}  // namespace
+
+TEST(VariantTest, EmplaceBigAndSmallVariants) {
+  {
+    int moved_big = 0;
+    int moved_small = 0;
+    int copied_big = 0;
+    int copied_small = 0;
+    Variant x = MoveAndCopyCounter</*BIG=*/true>(&moved_big, &copied_big);
+    EXPECT_EQ(moved_big, 1);
+    EXPECT_EQ(copied_big, 0);
+    Variant y = MoveAndCopyCounter</*BIG=*/false>(&moved_small, &copied_small);
+    EXPECT_EQ(moved_small, 1);
+    EXPECT_EQ(copied_small, 0);
+  }
+
+  {
+    int moved_big = 0;
+    int moved_small = 0;
+    int copied_big = 0;
+    int copied_small = 0;
+    Variant x(MoveAndCopyCounter</*BIG=*/true>(&moved_big, &copied_big));
+    EXPECT_EQ(moved_big, 1);
+    EXPECT_EQ(copied_big, 0);
+    Variant y(MoveAndCopyCounter</*BIG=*/false>(&moved_small, &copied_small));
+    EXPECT_EQ(moved_small, 1);
+    EXPECT_EQ(copied_small, 0);
+  }
+
+  {
+    int moved_big = 0;
+    int moved_small = 0;
+    int copied_big = 0;
+    int copied_small = 0;
+    Variant x;
+    x.emplace<MoveAndCopyCounter</*BIG=*/true>>(&moved_big, &copied_big);
+    EXPECT_EQ(moved_big, 0);
+    EXPECT_EQ(copied_big, 0);
+    Variant y;
+    y.emplace<MoveAndCopyCounter</*BIG=*/false>>(&moved_small, &copied_small);
+    EXPECT_EQ(moved_small, 0);
+    EXPECT_EQ(copied_small, 0);
+  }
+}
+
 template <bool BIG>
 void TestDestructOnVariantMove() {
   CHECK_EQ(MaybeAlive<BIG>::LiveCounter(), 0);
@@ -371,6 +455,19 @@ TEST(VariantTest, Tensor) {
   x.get<Tensor>()->flat<float>()(0) += 1.0f;
   EXPECT_EQ(x.get<Tensor>()->flat<float>()(0), 43.0f);
   EXPECT_EQ(x.TypeName(), "tensorflow::Tensor");
+
+  Tensor& foo_t = x.emplace<Tensor>("foo");
+  EXPECT_NE(x.get<Tensor>(), nullptr);
+  EXPECT_EQ(x.get<Tensor>()->scalar<tstring>()(), "foo");
+  EXPECT_EQ(&foo_t, x.get<Tensor>());
+  EXPECT_EQ(x.TypeName(), "tensorflow::Tensor");
+
+  Tensor& bar_t = x.emplace<Tensor>(DT_INT64, TensorShape({1}));
+  EXPECT_EQ(&bar_t, x.get<Tensor>());
+  bar_t.vec<int64>()(0) = 17;
+  EXPECT_EQ(x.get<Tensor>()->vec<int64>()(0), 17);
+  bar_t.vec<int64>()(0) += 1;
+  EXPECT_EQ(x.get<Tensor>()->vec<int64>()(0), 18);
 }
 
 TEST(VariantTest, NontrivialTensorVariantCopy) {
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 6f2a90dcbd6..551193262e2 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1486,7 +1486,6 @@ rinfo_.push_back({csinfo_.tanh_grad,
   //           false otherwise.
   static bool FusedMatMulRewrite(const Node* n) {
     bool trans_a;
-    std::vector<string> fused_ops;
 
     // Do not rewrite with transpose attribute because reorder has performance
     // impact.
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 61fb5d13442..329f7706e3f 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -866,898 +866,387 @@ REGISTER_TEST(NodeMerge_PadWithConv2D_Common_InOutput, DT_BFLOAT16,
   }
 REGISTER_TEST_ALL_TYPES(NodeMerge_PadWithConv2D_Negative);
 #undef REGISTER_TEST
+
+#define REGISTER_TEST(NAME, T, INPUT)                                          \
+  TEST_F(MklLayoutPassTest, NAME##_##T) {                                      \
+    InitGraph(                                                                 \
+        "node { name: 'Input0' op: '" #INPUT "'}"                              \
+        "node { name: 'Input1' op: '" #INPUT "'}"                              \
+        "node { name: 'Const0' op: 'Const'"                                    \
+        "  attr {key: 'dtype'                  value { type: DT_INT32 } }"     \
+        "  attr {key: 'value'                  value { "                       \
+        "       tensor {"                                                      \
+        "       dtype: DT_INT32"                                               \
+        "       tensor_shape { dim { size: 4 } }"                              \
+        "       tensor_content: "                                              \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\" \
+        "000\\000\\000'"                                                       \
+        "        }"                                                            \
+        "     }"                                                               \
+        "  }"                                                                  \
+        "}"                                                                    \
+        "node { name: 'Const1' op: 'Const'"                                    \
+        "  attr {key: 'dtype'                   value { type: DT_INT32 } }"    \
+        "  attr {key: 'value'"                                                 \
+        "   value {"                                                           \
+        "     tensor {"                                                        \
+        "       dtype: DT_INT32"                                               \
+        "       tensor_shape {dim {size: 4 }}"                                 \
+        "       tensor_content: "                                              \
+        "'\\000\\000\\000\\000\\003\\000\\000\\000\\001"                       \
+        "\\000\\000\\000\\002\\000\\000\\000'"                                 \
+        "     }"                                                               \
+        "   }"                                                                 \
+        " }"                                                                   \
+        "}"                                                                    \
+        "node { name: 'Transpose0' op: 'Transpose'"                            \
+        " input: ['Input0', 'Const0']"                                         \
+        " attr { key: 'T'                       value { type: " #T  "} }"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 } } }"  \
+        "node { name: 'Conv2D'     op: 'Conv2D'"                               \
+        " input: ['Transpose0', 'Input1']"                                     \
+        " attr { key: 'T'                       value { type: " #T "} }"       \
+        " attr { key: 'data_format'             value { s: 'NHWC' }}"          \
+        " attr { key: 'dilations'            value {list: {i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'padding'                 value {s: 'SAME'}}"            \
+        " attr { key: 'strides'              value {list: {i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'use_cudnn_on_gpu'      value {b: true}}}"               \
+        "node { name: 'Transpose1' op: 'Transpose'"                            \
+        " input: ['Conv2D', 'Const1' ]"                                        \
+        " attr { key: 'T'                       value { type: " #T "}}"        \
+        " attr { key: 'Tperm'                   value { type: DT_INT32  }}}"   \
+        "node { name: 'Relu' op: 'Relu'"                                       \
+        " attr { key: 'T'                value { type: " #T "} }"              \
+        " input: ['Transpose1'] }");                                           \
+    EXPECT_EQ(DoMklLayoutOptimizationPass(),                                   \
+              "Const0(Const);Const1(Const);Conv2D(_MklConv2D);DMT/_0(Const);"  \
+              "DMT/_1(Const);Input0(" #INPUT ");Input1(" #INPUT ");"           \
+              "Relu(_MklRelu)|Conv2D->Relu;Conv2D:2->Relu:1;DMT/_0->Conv2D:2;" \
+              "DMT/_1->Conv2D:3;Input0->Conv2D;Input0:control->DMT/_0:control;"\
+              "Input0:control->DMT/_1:control;Input1->Conv2D:1");              \
+}
+REGISTER_TEST_ALL_TYPES(NodeMerge_TransposeConv2DTranspose_Positive);
+#undef REGISTER_TEST
+
+#define REGISTER_TEST(NAME, T, INPUT)                                          \
+  TEST_F(MklLayoutPassTest, NAME##_##T) {                                      \
+    InitGraph(                                                                 \
+        "node { name: 'Input0' op: '" #INPUT "'}"                              \
+        "node { name: 'Input1' op: '" #INPUT "'}"                              \
+        "node { name: 'Const0' op: 'Const'"                                    \
+        " attr { key: 'dtype'                   value { type: DT_INT32 } }"    \
+        " attr { key: 'value'"                                                 \
+        "   value {"                                                           \
+        "     tensor {"                                                        \
+        "       dtype: DT_INT32"                                               \
+        "       tensor_shape {dim {size: 4}}"                                  \
+        "       tensor_content: "                                              \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\" \
+        "000\\000\\000'"                                                       \
+        "     }"                                                               \
+        "   }"                                                                 \
+        " }"                                                                   \
+        "}"                                                                    \
+        "node { name: 'Const1' op: 'Const'"                                    \
+        "  attr { key: 'dtype'                  value { type: DT_INT32 }}"     \
+        "  attr {"                                                             \
+        "   key: 'value'"                                                      \
+        "   value {"                                                           \
+        "     tensor {"                                                        \
+        "       dtype: DT_INT32"                                               \
+        "       tensor_shape { dim { size: 4 }}"                               \
+        "       tensor_content: "                                              \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\" \
+        "000\\000\\000'"                                                       \
+        "     }"                                                               \
+        "   }"                                                                 \
+        " }"                                                                   \
+        "}"                                                                    \
+        "node {name: 'Transpose0'   op: 'Transpose'"                           \
+        " input: ['Input0', 'Const0']"                                         \
+        " attr { key: 'T'                       value { type: " #T " }}"       \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"    \
+        "node { name: 'Conv2D'  op: 'Conv2D'"                                  \
+        " input: ['Transpose0', 'Input1']"                                     \
+        " attr { key: 'T'                       value { type: " #T "}}"        \
+        " attr { key: 'data_format'             value { s: 'NHWC'  }}"         \
+        " attr { key: 'dilations'           value { list: {i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'padding'                 value { s: 'SAME' }}"          \
+        " attr { key: 'strides'             value { list: {i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'use_cudnn_on_gpu'        value { b: true }}}"           \
+        "node {name: 'Transpose1' op: 'Transpose'"                             \
+        " input: ['Conv2D', 'Const1']"                                         \
+        " attr { key: 'T'                       value { type: " #T "}}"        \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"    \
+        "node { name: 'Relu' op: 'Relu'"                                       \
+        " attr { key: 'T'                       value { type: " #T "}}"        \
+        " input: ['Transpose1'] }");                                           \
+    EXPECT_EQ(DoMklLayoutOptimizationPass(),                                   \
+              "Const0(Const);Const1(Const);Conv2D(_MklConv2D);DMT/_0(Const);"  \
+              "DMT/_1(Const);DMT/_2(Const);Input0(" #INPUT ");Input1(" #INPUT  \
+              ");Relu(_MklRelu);Transpose0(_MklTranspose);"                    \
+              "Transpose1(_MklTranspose)|Const0->Transpose0:1;"                \
+              "Const1->Transpose1:1;Conv2D->Transpose1;DMT/_0->Conv2D:2;"      \
+              "DMT/_1->Conv2D:3;DMT/_2->Relu:1;Input0->Transpose0;"            \
+              "Input1->Conv2D:1;Transpose0->Conv2D;Transpose0:control->DMT/"   \
+              "_0:control;Transpose0:control->DMT/_1:control;Transpose1->Relu;"\
+              "Transpose1:control->DMT/_2:control");                           \
+}
+REGISTER_TEST_ALL_TYPES(NodeMerge_TransposeConv2DTranspose_Negative);
+#undef REGISTER_TEST
+
+
+#define REGISTER_TEST(NAME, T, INPUT)                                         \
+  TEST_F(MklLayoutPassTest, NAME##_##T) {                                     \
+    InitGraph(                                                                \
+        "node { name: 'Input0' op: '" #INPUT "'}"                             \
+        "node { name: 'Input1' op: '" #INPUT "'}"                             \
+        "node { name: 'Const0' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        " attr { key: 'value'"                                                \
+        "  value {"                                                           \
+        "     tensor {"                                                       \
+        "       dtype: DT_INT32"                                              \
+        "       tensor_shape { dim {size: 5}}"                                \
+        "       tensor_content:"                                              \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004"  \
+        "\\000\\000\\000\\001\\000\\000\\000'"                                \
+        "     }"                                                              \
+        "   }"                                                                \
+        " }"                                                                  \
+        "}"                                                                   \
+        "node { name: 'Const1' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        "  attr { key: 'value'"                                               \
+        "    value {"                                                         \
+        "      tensor {"                                                      \
+        "        dtype: DT_INT32"                                             \
+        "        tensor_shape { dim { size: 5 }}"                             \
+        "        tensor_content: "                                            \
+        "'\\000\\000\\000\\000\\004\\000\\000\\000\\001\\000\\000\\000\\002"  \
+        "\\000\\000\\000\\003\\000\\000\\000'"                                \
+        "      }"                                                             \
+        "    }"                                                               \
+        "  }"                                                                 \
+        "}"                                                                   \
+        "node { name: 'Transpose0' op: 'Transpose'"                           \
+        " input: ['Input0', 'Const0']"                                        \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'Conv3D' op: 'Conv3D'"                                  \
+        "input: ['Transpose0', 'Input1']"                                     \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'data_format'             value { s: 'NDHWC'  }}"       \
+        " attr { key: 'dilations'      value { list: {i:1,i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'padding'                 value { s: 'SAME' }}"         \
+        " attr { key: 'strides'        value { list: {i:1,i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'use_cudnn_on_gpu'        value { b: true }}}"          \
+        "node { name: 'Transpose1' op: 'Transpose'"                           \
+        " input: ['Conv3D', 'Const1']"                                        \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'Relu' op: 'Relu'"                                      \
+        " attr { key: 'T'                value { type: " #T " } }"            \
+        " input: ['Transpose1'] }");                                          \
+    EXPECT_EQ(DoMklLayoutOptimizationPass(),                                  \
+              "Const0(Const);Const1(Const);Conv3D(_MklConv3D);DMT/_0(Const);" \
+              "DMT/_1(Const);Input0(" #INPUT ");Input1(" #INPUT ");"          \
+              "Relu(_MklRelu)|Conv3D->Relu;Conv3D:2->Relu:1;"                 \
+              "DMT/_0->Conv3D:2;DMT/_1->Conv3D:3;Input0->Conv3D;"             \
+              "Input0:control->DMT/_0:control;"                               \
+              "Input0:control->DMT/_1:control;Input1->Conv3D:1");             \
+}
+REGISTER_TEST_ALL_TYPES(NodeMerge_TransposeConv3DTranspose_Positive);
+#undef REGISTER_TEST
+
+#define REGISTER_TEST(NAME, T, INPUT)                                         \
+  TEST_F(MklLayoutPassTest, NAME##_##T) {                                     \
+    InitGraph(                                                                \
+        "node { name: 'Input0' op: '" #INPUT "'}"                             \
+        "node { name: 'Input1' op: '" #INPUT "'}"                             \
+        "node { name: 'Const0' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        " attr { key: 'value'"                                                \
+        "  value {"                                                           \
+        "     tensor {"                                                       \
+        "       dtype: DT_INT32"                                              \
+        "       tensor_shape { dim {size: 5}}"                                \
+        "       tensor_content:"                                              \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004"  \
+        "\\000\\000\\000\\001\\000\\000\\000'"                                \
+        "     }"                                                              \
+        "   }"                                                                \
+        " }"                                                                  \
+        "}"                                                                   \
+        "node { name: 'Const1' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        "  attr { key: 'value'"                                               \
+        "    value {"                                                         \
+        "      tensor {"                                                      \
+        "        dtype: DT_INT32"                                             \
+        "        tensor_shape { dim { size: 5 }}"                             \
+        "        tensor_content: "                                            \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004"  \
+        "\\000\\000\\000\\001\\000\\000\\000'"                                \
+        "      }"                                                             \
+        "    }"                                                               \
+        "  }"                                                                 \
+        "}"                                                                   \
+        "node { name: 'Transpose0' op: 'Transpose'"                           \
+        " input: ['Input0', 'Const0']"                                        \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'Conv3D' op: 'Conv3D'"                                  \
+        "input: ['Transpose0', 'Input1']"                                     \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'data_format'             value { s: 'NDHWC'  }}"       \
+        " attr { key: 'dilations'      value { list: {i:1,i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'padding'                 value { s: 'SAME' }}"         \
+        " attr { key: 'strides'        value { list: {i:1,i:1,i:1,i:1,i:1}}}" \
+        " attr { key: 'use_cudnn_on_gpu'        value { b: true }}}"          \
+        "node { name: 'Transpose1' op: 'Transpose'"                           \
+        " input: ['Conv3D', 'Const1']"                                        \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'Relu' op: 'Relu'"                                      \
+        " attr { key: 'T'                value { type: " #T " } }"            \
+        " input: ['Transpose1'] }");                                          \
+    EXPECT_EQ(DoMklLayoutOptimizationPass(),                                  \
+              "Const0(Const);Const1(Const);Conv3D(_MklConv3D);DMT/_0(Const);" \
+              "DMT/_1(Const);DMT/_2(Const);Input0(" #INPUT ");"               \
+              "Input1(" #INPUT ");Relu(_MklRelu);Transpose0(_MklTranspose);"  \
+              "Transpose1(_MklTranspose)|Const0->Transpose0:1;Const1->"       \
+              "Transpose1:1;Conv3D->Transpose1;DMT/_0->Conv3D:2;"             \
+              "DMT/_1->Conv3D:3;DMT/_2->Relu:1;Input0->Transpose0;Input1->"   \
+              "Conv3D:1;Transpose0->Conv3D;Transpose0:control->"              \
+              "DMT/_0:control;Transpose0:control->DMT/_1:control;"            \
+              "Transpose1->Relu;Transpose1:control->DMT/_2:control");         \
+}
+REGISTER_TEST_ALL_TYPES(NodeMerge_TransposeConv3DTranspose_Negative);
+#undef REGISTER_TEST
+
+#define REGISTER_TEST(NAME, T, INPUT)                                         \
+  TEST_F(MklLayoutPassTest, NAME##_##T) {                                     \
+    InitGraph(                                                                \
+        "node { name: 'Input0' op: '" #INPUT "'}"                             \
+        "node { name: 'Const0' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        " attr { key: 'value'"                                                \
+        "  value {"                                                           \
+        "     tensor {"                                                       \
+        "       dtype: DT_INT32"                                              \
+        "       tensor_shape { dim {size: 5}}"                                \
+        "       tensor_content:"                                              \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004"  \
+        "\\000\\000\\000\\001\\000\\000\\000'"                                \
+        "     }"                                                              \
+        "   }"                                                                \
+        " }"                                                                  \
+        "}"                                                                   \
+        "node { name: 'Const1' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        "  attr { key: 'value'"                                               \
+        "    value {"                                                         \
+        "      tensor {"                                                      \
+        "        dtype: DT_INT32"                                             \
+        "        tensor_shape { dim { size: 5 }}"                             \
+        "        tensor_content: "                                            \
+        "'\\000\\000\\000\\000\\004\\000\\000\\000\\001\\000\\000\\000\\002"  \
+        "\\000\\000\\000\\003\\000\\000\\000'"                                \
+        "      }"                                                             \
+        "    }"                                                               \
+        "  }"                                                                 \
+        "}"                                                                   \
+        "node { name: 'Transpose0' op: 'Transpose'"                           \
+        " input: ['Input0', 'Const0']"                                        \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'MaxPool3D'   op: 'MaxPool3D'"                          \
+        "input: ['Transpose0']"                                               \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'data_format'             value { s: 'NDHWC'  }}"       \
+        " attr { key: 'padding'                 value { s: 'SAME' }}"         \
+        " attr { key: 'strides'        value { list: {i:1,i:2,i:2,i:2,i:1}}}" \
+        " attr { key: 'ksize'          value { list: {i:1,i:1,i:1,i:1,i:1}}}}"\
+        "node { name: 'Transpose1' op: 'Transpose'"                           \
+        " input: ['MaxPool3D', 'Const1']"                                     \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'Relu' op: 'Relu'"                                      \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " input: ['Transpose1'] }");                                          \
+    EXPECT_EQ(DoMklLayoutOptimizationPass(),                                  \
+            "Const0(Const);Const1(Const);DMT/_0(Const);Input0(" #INPUT ");"   \
+            "MaxPool3D(_MklMaxPool3D);Relu(_MklRelu)|DMT/_0->MaxPool3D:1;"    \
+            "Input0->MaxPool3D;Input0:control->DMT/_0:control;"               \
+            "MaxPool3D->Relu;MaxPool3D:2->Relu:1");                           \
+}
+REGISTER_TEST_ALL_TYPES(NodeMerge_TransposeMaxPool3DTranspose_Positive);
+#undef REGISTER_TEST
+
+#define REGISTER_TEST(NAME, T, INPUT)                                         \
+  TEST_F(MklLayoutPassTest, NAME##_##T) {                                     \
+    InitGraph(                                                                \
+        "node { name: 'Input0' op: '" #INPUT "'}"                             \
+        "node { name: 'Const0' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        " attr { key: 'value'"                                                \
+        "  value {"                                                           \
+        "     tensor {"                                                       \
+        "       dtype: DT_INT32"                                              \
+        "       tensor_shape { dim {size: 5}}"                                \
+        "       tensor_content:"                                              \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004"  \
+        "\\000\\000\\000\\001\\000\\000\\000'"                                \
+        "     }"                                                              \
+        "   }"                                                                \
+        " }"                                                                  \
+        "}"                                                                   \
+        "node { name: 'Const1' op: 'Const'"                                   \
+        " attr { key: 'dtype' value { type: DT_INT32 } }"                     \
+        "  attr { key: 'value'"                                               \
+        "    value {"                                                         \
+        "      tensor {"                                                      \
+        "        dtype: DT_INT32"                                             \
+        "        tensor_shape { dim { size: 5 }}"                             \
+        "        tensor_content: "                                            \
+        "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004"  \
+        "\\000\\000\\000\\001\\000\\000\\000'"                                \
+        "      }"                                                             \
+        "    }"                                                               \
+        "  }"                                                                 \
+        "}"                                                                   \
+        "node { name: 'Transpose0' op: 'Transpose'"                           \
+        " input: ['Input0', 'Const0']"                                        \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'MaxPool3D'   op: 'MaxPool3D'"                          \
+        "input: ['Transpose0']"                                               \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'data_format'             value { s: 'NDHWC'  }}"       \
+        " attr { key: 'padding'                 value { s: 'SAME' }}"         \
+        " attr { key: 'strides'        value { list: {i:1,i:2,i:2,i:2,i:1}}}" \
+        " attr { key: 'ksize'          value { list: {i:1,i:1,i:1,i:1,i:1}}}}"\
+        "node { name: 'Transpose1' op: 'Transpose'"                           \
+        " input: ['MaxPool3D', 'Const1']"                                     \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " attr { key: 'Tperm'                   value { type: DT_INT32 }}}"   \
+        "node { name: 'Relu' op: 'Relu'"                                      \
+        " attr { key: 'T'                       value { type: " #T " }}"      \
+        " input: ['Transpose1'] }");                                          \
+    EXPECT_EQ(DoMklLayoutOptimizationPass(),                                  \
+      "Const0(Const);Const1(Const);DMT/_0(Const);DMT/_1(Const);Input0(" #INPUT\
+      ");MaxPool3D(_MklMaxPool3D);Relu(_MklRelu);Transpose0(_MklTranspose);"  \
+      "Transpose1(_MklTranspose)|Const0->Transpose0:1;"                       \
+      "Const1->Transpose1:1;DMT/_0->MaxPool3D:1;DMT/_1->Relu:1;Input0->"      \
+      "Transpose0;MaxPool3D->Transpose1;Transpose0->MaxPool3D;Transpose0:"    \
+      "control->DMT/_0:control;Transpose1->Relu;Transpose1:control->"         \
+      "DMT/_1:control");                                                      \
+}
+REGISTER_TEST_ALL_TYPES(NodeMerge_TransposeMaxPool3DTranspose_Negative);
+#undef REGISTER_TEST
 // clang-format on
 
-TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Positive) {
-  InitGraph(
-      "node { name: 'Input0' op: 'Input'}"
-      "node { name: 'Input1' op: 'Input'}"
-      "node { name: 'Const0' op: 'Const'"
-      "  attr {"
-      "   key: 'dtype'"
-      "   value {"
-      "     type: DT_INT32"
-      "   }"
-      "  }"
-      " attr {"
-      "   key: 'value'"
-      "   value {"
-      "     tensor {"
-      "       dtype: DT_INT32"
-      "       tensor_shape {"
-      "         dim {"
-      "           size: 4"
-      "         }"
-      "       }"
-      "       tensor_content: "
-      "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000"
-      "\\000\\000'"
-      "     }"
-      "   }"
-      " }"
-      "}"
-      "node { name: 'Const1' op: 'Const'"
-      "  attr {"
-      "   key: 'dtype'"
-      "   value {"
-      "     type: DT_INT32"
-      "   }"
-      "  }"
-      " attr {"
-      "   key: 'value'"
-      "   value {"
-      "     tensor {"
-      "       dtype: DT_INT32"
-      "       tensor_shape {"
-      "         dim {"
-      "           size: 4"
-      "         }"
-      "       }"
-      "       tensor_content: "
-      "'\\000\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000\\002\\000"
-      "\\000\\000'"
-      "     }"
-      "   }"
-      " }"
-      "}"
-      "node {              \
-      name: 'Transpose0' \
-      op: 'Transpose'    \
-      input: 'Input0'    \
-      input: 'Const0'    \
-      attr {             \
-        key: 'T'         \
-        value {          \
-          type: DT_FLOAT \
-        }                \
-      }                  \
-      attr {             \
-        key: 'Tperm'     \
-        value {          \
-          type: DT_INT32 \
-        }                \
-      }                  \
-    }"
-      "node {                 \
-      name: 'Conv2D'        \
-      op: 'Conv2D'          \
-      input: 'Transpose0'   \
-      input: 'Input1'       \
-      attr {                \
-        key: 'T'            \
-        value {             \
-          type: DT_FLOAT    \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'data_format'  \
-        value {             \
-          s: 'NHWC'         \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'dilations'    \
-        value {             \
-          list {            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-          }                 \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'padding'      \
-        value {             \
-          s: 'SAME'         \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'strides'      \
-        value {             \
-          list {            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-          }                 \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'use_cudnn_on_gpu' \
-        value {                 \
-          b: true               \
-        }                       \
-      }                         \
-    }"
-      "node {              \
-      name: 'Transpose1' \
-      op: 'Transpose'    \
-      input: 'Conv2D'    \
-      input: 'Const1'    \
-      attr {             \
-        key: 'T'         \
-        value {          \
-          type: DT_FLOAT \
-        }                \
-      }                  \
-      attr {             \
-        key: 'Tperm'     \
-        value {          \
-          type: DT_INT32 \
-        }                \
-      }                  \
-    }"
-      "node { name: 'Relu' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['Transpose1'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "Const0(Const);Const1(Const);"
-            "Conv2D(_MklConv2D);DMT/_0(Const);DMT/_1(Const);Input0(Input);"
-            "Input1(Input);Relu(_MklRelu)|Conv2D->Relu;Conv2D:2->Relu:1;DMT/"
-            "_0->Conv2D:2;DMT/_1->Conv2D:3;Input0->Conv2D;"
-            "Input0:control->DMT/_0:control;Input0:control->DMT/"
-            "_1:control;Input1->Conv2D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Negative) {
-  InitGraph(
-      "node { name: 'Input0' op: 'Input'}"
-      "node { name: 'Input1' op: 'Input'}"
-      "node { name: 'Const0' op: 'Const'"
-      "  attr {"
-      "   key: 'dtype'"
-      "   value {"
-      "     type: DT_INT32"
-      "   }"
-      "  }"
-      " attr {"
-      "   key: 'value'"
-      "   value {"
-      "     tensor {"
-      "       dtype: DT_INT32"
-      "       tensor_shape {"
-      "         dim {"
-      "           size: 4"
-      "         }"
-      "       }"
-      "       tensor_content: "
-      "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000"
-      "\\000\\000'"
-      "     }"
-      "   }"
-      " }"
-      "}"
-      "node { name: 'Const1' op: 'Const'"
-      "  attr {"
-      "   key: 'dtype'"
-      "   value {"
-      "     type: DT_INT32"
-      "   }"
-      "  }"
-      " attr {"
-      "   key: 'value'"
-      "   value {"
-      "     tensor {"
-      "       dtype: DT_INT32"
-      "       tensor_shape {"
-      "         dim {"
-      "           size: 4"
-      "         }"
-      "       }"
-      "       tensor_content: "
-      "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000"
-      "\\000\\000'"
-      "     }"
-      "   }"
-      " }"
-      "}"
-      "node {              \
-      name: 'Transpose0' \
-      op: 'Transpose'    \
-      input: 'Input0'    \
-      input: 'Const0'    \
-      attr {             \
-        key: 'T'         \
-        value {          \
-          type: DT_FLOAT \
-        }                \
-      }                  \
-      attr {             \
-        key: 'Tperm'     \
-        value {          \
-          type: DT_INT32 \
-        }                \
-      }                  \
-    }"
-      "node {                 \
-      name: 'Conv2D'        \
-      op: 'Conv2D'          \
-      input: 'Transpose0'   \
-      input: 'Input1'       \
-      attr {                \
-        key: 'T'            \
-        value {             \
-          type: DT_FLOAT    \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'data_format'  \
-        value {             \
-          s: 'NHWC'         \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'dilations'    \
-        value {             \
-          list {            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-          }                 \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'padding'      \
-        value {             \
-          s: 'SAME'         \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'strides'      \
-        value {             \
-          list {            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-            i: 1            \
-          }                 \
-        }                   \
-      }                     \
-      attr {                \
-        key: 'use_cudnn_on_gpu' \
-        value {                 \
-          b: true               \
-        }                       \
-      }                         \
-    }"
-      "node {              \
-      name: 'Transpose1' \
-      op: 'Transpose'    \
-      input: 'Conv2D'    \
-      input: 'Const1'    \
-      attr {             \
-        key: 'T'         \
-        value {          \
-          type: DT_FLOAT \
-        }                \
-      }                  \
-      attr {             \
-        key: 'Tperm'     \
-        value {          \
-          type: DT_INT32 \
-        }                \
-      }                  \
-    }"
-      "node { name: 'Relu' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['Transpose1'] }");
-  EXPECT_EQ(
-      DoMklLayoutOptimizationPass(),
-      "Const0(Const);Const1(Const);Conv2D(_MklConv2D);"
-      "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);Input0(Input);"
-      "Input1(Input);Relu(_MklRelu);Transpose0(_MklTranspose);"
-      "Transpose1(_MklTranspose)|Const0->Transpose0:1;"
-      "Const1->Transpose1:1;Conv2D->Transpose1;DMT/_0->Conv2D:2;"
-      "DMT/_1->Conv2D:3;DMT/_2->Relu:1;Input0->Transpose0;"
-      "Input1->Conv2D:1;Transpose0->Conv2D;Transpose0:control->DMT/_0:control;"
-      "Transpose0:control->DMT/_1:control;Transpose1->Relu;"
-      "Transpose1:control->DMT/_2:control");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv3DTranspose_Positive) {
-  InitGraph(
-      "node { name: 'Input0' op: 'Input'}                                     \
-       node { name: 'Input1' op: 'Input'}                                     \
-       node { name: 'Const0' op: 'Const'                                      \
-         attr { key: 'dtype' value { type: DT_INT32 } }                       \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004'    \
-       '\\000\\000\\000\\001\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }                                                                      \
-       node { name: 'Const1' op: 'Const'                                      \
-         attr { key: 'dtype' value { type: DT_INT32 } }                       \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\004\\000\\000\\000\\001\\000\\000\\000\\002'    \
-       '\\000\\000\\000\\003\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }"
-      "node {              \
-        name: 'Transpose0' \
-        op: 'Transpose'    \
-        input: 'Input0'    \
-        input: 'Const0'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node {                 \
-        name: 'Conv3D'        \
-        op: 'Conv3D'          \
-        input: 'Transpose0'   \
-        input: 'Input1'       \
-        attr {                \
-          key: 'T'            \
-          value {             \
-            type: DT_FLOAT    \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'data_format'  \
-          value {             \
-            s: 'NDHWC'        \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'dilations'    \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'padding'      \
-          value {             \
-            s: 'SAME'         \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'strides'      \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'use_cudnn_on_gpu' \
-          value {                 \
-            b: true               \
-          }                       \
-        }                         \
-      }"
-      "node {              \
-        name: 'Transpose1' \
-        op: 'Transpose'    \
-        input: 'Conv3D'    \
-        input: 'Const1'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node { name: 'Relu' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['Transpose1'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "Const0(Const);Const1(Const);Conv3D(_MklConv3D);DMT/_0(Const);"
-            "DMT/_1(Const);Input0(Input);Input1(Input);"
-            "Relu(_MklRelu)|Conv3D->Relu;Conv3D:2->Relu:1;"
-            "DMT/_0->Conv3D:2;DMT/_1->Conv3D:3;Input0->Conv3D;"
-            "Input0:control->DMT/_0:control;"
-            "Input0:control->DMT/_1:control;Input1->Conv3D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv3DTranspose_Negative) {
-  InitGraph(
-      "node { name: 'Input0' op: 'Input'}                                     \
-       node { name: 'Input1' op: 'Input'}                                     \
-       node { name: 'Const0' op: 'Const'                                      \
-         attr {                                                               \
-           key: 'dtype'                                                       \
-           value {                                                            \
-             type: DT_INT32                                                   \
-           }                                                                  \
-         }                                                                    \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004'    \
-       '\\000\\000\\000\\001\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }                                                                      \
-       node { name: 'Const1' op: 'Const'                                      \
-         attr {                                                               \
-           key: 'dtype'                                                       \
-           value {                                                            \
-             type: DT_INT32                                                   \
-           }                                                                  \
-         }                                                                    \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004'    \
-       '\\000\\000\\000\\001\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }"
-      "node {              \
-        name: 'Transpose0' \
-        op: 'Transpose'    \
-        input: 'Input0'    \
-        input: 'Const0'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node {                 \
-        name: 'Conv3D'        \
-        op: 'Conv3D'          \
-        input: 'Transpose0'   \
-        input: 'Input1'       \
-        attr {                \
-          key: 'T'            \
-          value {             \
-            type: DT_FLOAT    \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'data_format'  \
-          value {             \
-            s: 'NDHWC'        \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'dilations'    \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'padding'      \
-          value {             \
-            s: 'SAME'         \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'strides'      \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'use_cudnn_on_gpu' \
-          value {                 \
-            b: true               \
-          }                       \
-        }                         \
-      }"
-      "node {              \
-        name: 'Transpose1' \
-        op: 'Transpose'    \
-        input: 'Conv3D'    \
-        input: 'Const1'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node { name: 'Relu' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['Transpose1'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "Const0(Const);Const1(Const);Conv3D(_MklConv3D);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);"
-            "Input0(Input);Input1(Input);Relu(_MklRelu);"
-            "Transpose0(_MklTranspose);Transpose1(_MklTranspose)"
-            "|Const0->Transpose0:1;Const1->Transpose1:1;"
-            "Conv3D->Transpose1;DMT/_0->Conv3D:2;DMT/_1->Conv3D:3;"
-            "DMT/_2->Relu:1;Input0->Transpose0;Input1->Conv3D:1;"
-            "Transpose0->Conv3D;Transpose0:control->DMT/_0:control;"
-            "Transpose0:control->DMT/_1:control;Transpose1->Relu;"
-            "Transpose1:control->DMT/_2:control");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_TransposeMaxPool3DTranspose_Positive) {
-  InitGraph(
-      "node { name: 'Input0' op: 'Input'}                                     \
-       node { name: 'Const0' op: 'Const'                                      \
-         attr { key: 'dtype' value { type: DT_INT32 } }                       \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004'    \
-       '\\000\\000\\000\\001\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }                                                                      \
-       node { name: 'Const1' op: 'Const'                                      \
-         attr { key: 'dtype' value { type: DT_INT32 } }                       \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\004\\000\\000\\000\\001\\000\\000\\000\\002'    \
-       '\\000\\000\\000\\003\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }"
-      "node {              \
-        name: 'Transpose0' \
-        op: 'Transpose'    \
-        input: 'Input0'    \
-        input: 'Const0'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node {                 \
-        name: 'MaxPool3D'     \
-        op: 'MaxPool3D'       \
-        input: 'Transpose0'   \
-        attr {                \
-          key: 'T'            \
-          value {             \
-            type: DT_FLOAT    \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'data_format'  \
-          value {             \
-            s: 'NDHWC'        \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'padding'      \
-          value {             \
-            s: 'SAME'         \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'strides'      \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 2            \
-              i: 2            \
-              i: 2            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'ksize'        \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'use_cudnn_on_gpu' \
-          value {                 \
-            b: true               \
-          }                       \
-        }                         \
-      }"
-      "node {              \
-        name: 'Transpose1' \
-        op: 'Transpose'    \
-        input: 'MaxPool3D' \
-        input: 'Const1'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node { name: 'Relu' op: 'Relu'"
-      " attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['Transpose1'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "Const0(Const);Const1(Const);DMT/_0(Const);Input0(Input);"
-            "MaxPool3D(_MklMaxPool3D);Relu(_MklRelu)"
-            "|DMT/_0->MaxPool3D:1;Input0->MaxPool3D;"
-            "Input0:control->DMT/_0:control;MaxPool3D->Relu;"
-            "MaxPool3D:2->Relu:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_TransposeMaxPool3DTranspose_Negative) {
-  InitGraph(
-      "node { name: 'Input0' op: 'Input'}                                     \
-       node { name: 'Const0' op: 'Const'                                      \
-         attr { key: 'dtype' value { type: DT_INT32 } }                       \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\004'    \
-       '\\000\\000\\000\\001\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }                                                                      \
-       node { name: 'Const1' op: 'Const'                                      \
-         attr { key: 'dtype' value { type: DT_INT32 } }                       \
-         attr {                                                               \
-           key: 'value'                                                       \
-           value {                                                            \
-             tensor {                                                         \
-               dtype: DT_INT32                                                \
-               tensor_shape {                                                 \
-                 dim {                                                        \
-                   size: 5                                                    \
-                 }                                                            \
-               }                                                              \
-               tensor_content:                                                \
-       '\\000\\000\\000\\000\\004\\000\\000\\000\\001\\000\\000\\000\\004'    \
-       '\\000\\000\\000\\003\\000\\000\\000'                                  \
-             }                                                                \
-           }                                                                  \
-         }                                                                    \
-       }"
-      "node {              \
-        name: 'Transpose0' \
-        op: 'Transpose'    \
-        input: 'Input0'    \
-        input: 'Const0'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node {                 \
-        name: 'MaxPool3D'     \
-        op: 'MaxPool3D'       \
-        input: 'Transpose0'   \
-        attr {                \
-          key: 'T'            \
-          value {             \
-            type: DT_FLOAT    \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'data_format'  \
-          value {             \
-            s: 'NDHWC'        \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'padding'      \
-          value {             \
-            s: 'SAME'         \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'strides'      \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 2            \
-              i: 2            \
-              i: 2            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-        attr {                \
-          key: 'ksize'        \
-          value {             \
-            list {            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-              i: 1            \
-            }                 \
-          }                   \
-        }                     \
-      }"
-      "node {              \
-        name: 'Transpose1' \
-        op: 'Transpose'    \
-        input: 'MaxPool3D' \
-        input: 'Const1'    \
-        attr {             \
-          key: 'T'         \
-          value {          \
-            type: DT_FLOAT \
-          }                \
-        }                  \
-        attr {             \
-          key: 'Tperm'     \
-          value {          \
-            type: DT_INT32 \
-          }                \
-        }                  \
-      }"
-      "node { name: 'Relu' op: 'Relu'"
-      " attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['Transpose1'] }");
-  EXPECT_EQ(
-      DoMklLayoutOptimizationPass(),
-      "Const0(Const);Const1(Const);DMT/_0(Const);DMT/_1(Const);Input0(Input);"
-      "MaxPool3D(_MklMaxPool3D);Relu(_MklRelu);"
-      "Transpose0(_MklTranspose);Transpose1(_MklTranspose)|Const0->Transpose0:"
-      "1;"
-      "Const1->Transpose1:1;DMT/_0->MaxPool3D:1;"
-      "DMT/_1->Relu:1;Input0->Transpose0;MaxPool3D->Transpose1;"
-      "Transpose0->MaxPool3D;Transpose0:control->DMT/_0:control;"
-      "Transpose1->Relu;Transpose1:control->DMT/_1:control");
-}
-
 /////////////////////////////////////////////////////////////////////
 //  Unit tests related to rewriting node to Mkl node
 /////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index fd2ea4f893c..3f79c023caf 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -23,20 +23,14 @@ cc_library(
     hdrs = ["utils.h"],
     visibility = ["//visibility:public"],
     deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-    ] + select({
-        "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
-        ],
-        "//conditions:default": [
-            "//tensorflow/core:framework",
-            "//tensorflow/core:graph",
-            "//tensorflow/core:lib",
-            "//tensorflow/core:lib_internal",
-            "//tensorflow/core:protos_all_cc",
-        ],
-    }),
+    ],
 )
 
 tf_cc_test(
diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index 3fb249c1221..409e68cc2d5 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -136,6 +136,7 @@ tf_cuda_library(
     hdrs = ["utils.h"],
     visibility = ["//visibility:public"],
     deps = [
+        ":cost_estimator",
         "//third_party/eigen3",
         "//tensorflow/core:framework",
         "//tensorflow/core:graph",
@@ -289,6 +290,7 @@ cc_library(
     deps = [
         ":cost_estimator",
         ":op_context",
+        ":utils",
         "//third_party/eigen3",
         "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 4a179751982..f018e88ae3b 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/grappler/clusters/utils.h"
+#include "tensorflow/core/grappler/costs/utils.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -659,7 +660,7 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
       Costs::NanoSeconds(intermediate_read_time);
   costs.intermediate_memory_write_time =
       Costs::NanoSeconds(intermediate_write_time);
-  CombineCostsAndUpdateExecutionTime(&costs);
+  CombineCostsAndUpdateExecutionTime(compute_memory_overlap_, &costs);
   return costs;
 }
 
@@ -1715,7 +1716,7 @@ Costs OpLevelCostEstimator::PredictFusedOp(
     fused_cost.intermediate_memory_time += op_cost.intermediate_memory_time;
   }
 
-  CombineCostsAndUpdateExecutionTime(&fused_cost);
+  CombineCostsAndUpdateExecutionTime(compute_memory_overlap_, &fused_cost);
   return fused_cost;
 }
 
@@ -2050,17 +2051,5 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad(
   costs.max_memory = total_output_size;
   return costs;
 }
-
-void OpLevelCostEstimator::CombineCostsAndUpdateExecutionTime(
-    Costs* costs) const {
-  if (compute_memory_overlap_) {
-    costs->execution_time =
-        std::max(costs->intermediate_memory_time,
-                 std::max(costs->compute_time, costs->memory_time));
-  } else {
-    costs->execution_time = costs->compute_time + costs->memory_time +
-                            costs->intermediate_memory_time;
-  }
-}
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index 9183c543f11..b76884e014a 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -194,11 +194,6 @@ class OpLevelCostEstimator {
   static OpInfo::TensorProperties DescribeTensor(
       DataType type, const std::vector<int64>& dims);
 
-  // This method calculates the execution time depending on whether IO can
-  // overlap with computation. It assumes the memory and the compute times have
-  // already been calculated.
-  void CombineCostsAndUpdateExecutionTime(Costs* costs) const;
-
  protected:
   std::map<string, int> elementwise_ops_;
   typedef std::function<Costs(const OpContext& op_context)> CostImpl;
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index 2f3d17191ee..f3bcf5386f7 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -504,5 +504,16 @@ string GetStatsStringFromRunMetadata(const RunMetadata& run_metadata,
   return output.str();
 }
 
+void CombineCostsAndUpdateExecutionTime(bool compute_memory_overlap,
+                                        Costs* costs) {
+  if (compute_memory_overlap) {
+    costs->execution_time =
+        std::max(costs->intermediate_memory_time,
+                 std::max(costs->compute_time, costs->memory_time));
+  } else {
+    costs->execution_time = costs->compute_time + costs->memory_time +
+                            costs->intermediate_memory_time;
+  }
+}
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/utils.h b/tensorflow/core/grappler/costs/utils.h
index ea64e5a41df..3dfbd676a06 100644
--- a/tensorflow/core/grappler/costs/utils.h
+++ b/tensorflow/core/grappler/costs/utils.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/graph/types.h"
+#include "tensorflow/core/grappler/costs/cost_estimator.h"
 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/protobuf/config.pb.h"
@@ -119,6 +120,12 @@ string GetDeviceClass(const string& device_name);
 string GetStatsStringFromRunMetadata(const RunMetadata& run_metadata,
                                      bool verbosity);
 
+// This method calculates the execution time depending on whether IO can
+// overlap with computation. It assumes the memory and the compute times have
+// already been calculated.
+void CombineCostsAndUpdateExecutionTime(bool compute_memory_overlap,
+                                        Costs* costs);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 559101c22f0..62f78fb141c 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -826,8 +826,10 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
                                    ? 1
                                    : node->attr().at(kExecutionCount).i();
 
-  Costs total_node_costs =
-      MultiplyCosts(node_costs, node_state.execution_count);
+  node_state.node_costs = node_costs;
+  // TotalNodeCosts() Should be called after node_costs and execution_count.
+  Costs total_node_costs = node_state.TotalNodeCosts();
+
   graph_costs_ = CombineCosts(graph_costs_, total_node_costs);
   const string& op_name = node->op();
 
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index ab8084b1a4b..d380947f158 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -70,6 +70,10 @@ struct NodeState {
   // Each output port uses up memory space from time_scheduled to its
   // time_no_references.
 
+  Costs node_costs;  // Node costs per execution
+  Costs TotalNodeCosts() const {
+    return MultiplyCosts(node_costs, execution_count);
+  }
   // How many times this node has been executed, e.g. in a while loop.
   int execution_count;
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index debb3e04909..9a088702e19 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -2856,14 +2856,23 @@ class OptimizeMaxOrMinOfMonotonicStage : public ArithmeticOptimizerStage {
     // 2. inner_function's output is not being consumed elsewhere.
     // 3. is monotonic increasing if reduction_node is a pooling operation
     //    since we don't have MinPool operations.
-    // 4. inner_functions is not a Relu node with an input from FusedBatchNorm.
-    //    This pattern will be fused later by remapper.
+    // 4. inner_functions is not a Relu node with an input from FusedBatchNorm
+    //    or BiasAdd. This pattern will be fused later by remapper.
+    auto can_be_fused_by_remapper = [](const NodeDef& consumer,
+                                       const NodeDef& producer) -> bool {
+      if (IsRelu(consumer) || IsRelu6(consumer)) {
+        if (IsFusedBatchNorm(producer) || IsBiasAdd(producer)) {
+          return true;
+        }
+      }
+      return false;
+    };
     bool is_non_decreasing = false;
     if (!IsInPreserveSet(*inner_function) &&
         IsElementWiseMonotonic(*inner_function, &is_non_decreasing) &&
         ctx().node_map->GetOutputs(inner_function->name()).size() == 1 &&
         (is_non_decreasing || !IsAnyMaxPool(*reduction_node)) &&
-        !(IsRelu(*inner_function) && IsFusedBatchNorm(*inner_function_input))) {
+        !can_be_fused_by_remapper(*inner_function, *inner_function_input)) {
       // Swap the first inputs of the inner function Op & the reduction Op.
       NodeDef* inner_input;
       TF_RETURN_IF_ERROR(GetInputNode(inner_function->input(0), &inner_input));
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index aa0878be4a5..4c703439420 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -3707,6 +3707,41 @@ TEST_F(ArithmeticOptimizerTest,
   test::ExpectTensorNear<float>(tensors[0], tensors_expected[0], 1e-6);
 }
 
+TEST_F(ArithmeticOptimizerTest, OptimizeMaxOrMinOfMonotonicBiasAddReluMaxPool) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output weights = ops::Const(s.WithOpName("weights"),
+                              Input::Initializer(1.0f, {5, 5, 3, 4}));
+  Output biases =
+      ops::Const(s.WithOpName("biases"), Input::Initializer(2.0f, {4}));
+  Output input = ops::Const(s.WithOpName("input"),
+                            Input::Initializer(1.0f, {1, 28, 28, 3}));
+  Output output =
+      ops::Conv2D(s.WithOpName("conv"), input, weights, {1, 1, 1, 1}, "SAME");
+  output = ops::BiasAdd(s.WithOpName("biasadd"), output, biases);
+  output = ops::Relu(s.WithOpName("relu"), output);
+  output = ops::MaxPool(s.WithOpName("max_pool"), output, {1, 2, 2, 1},
+                        {1, 2, 2, 1}, "VALID");
+  output = ops::Identity(s.WithOpName("output"), output);
+
+  GrapplerItem item;
+  item.fetch = {"output"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+  ASSERT_EQ(tensors_expected.size(), 1);
+
+  GraphDef new_graph;
+  ArithmeticOptimizer optimizer;
+  EnableOnlyOptimizeMaxOrMinOfMonotonic(&optimizer);
+  OptimizeTwice(&optimizer, &item, &new_graph);
+
+  // Should be a NoOp
+  VerifyGraphsMatch(item.graph, new_graph, __LINE__);
+
+  auto tensors = EvaluateNodes(new_graph, item.fetch);
+  ASSERT_EQ(tensors.size(), 1);
+  test::ExpectTensorNear<float>(tensors[0], tensors_expected[0], 1e-6);
+}
+
 TEST_F(ArithmeticOptimizerTest, OptimizeMaxOrMinOfMonotonicElementWiseMaxPool) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto x = ops::Const(s.WithOpName("x"), 1.5f, {3, 3, 3, 1});
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index b18c7c54747..8324f403b46 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -3594,7 +3594,6 @@ bool ConstantFolding::MergeConcat(bool use_shape_info,
 
   protobuf::RepeatedPtrField<string> parent_inputs;
   parent_inputs.Swap(parent->mutable_input());
-  std::vector<string> ctrl_output;
   // TODO(rmlarsen): IF the child occurs more than once, is it beneficial to
   // collapse it into the parent multiple times? Probably not.
   for (const auto& input : parent_inputs) {
diff --git a/tensorflow/core/grappler/optimizers/data/auto_shard.cc b/tensorflow/core/grappler/optimizers/data/auto_shard.cc
index bcc8feb0517..7ed80a1056b 100644
--- a/tensorflow/core/grappler/optimizers/data/auto_shard.cc
+++ b/tensorflow/core/grappler/optimizers/data/auto_shard.cc
@@ -396,7 +396,6 @@ Status OptimizeGraph(const GrapplerItem& item, int64 num_workers, int64 index,
   MutableGraphView graph(output);
   FunctionLibraryDefinition flib(OpRegistry::Global(), item.graph.library());
 
-  NodeDef target_node;
   absl::flat_hash_set<string> nodes_to_delete;
 
   NodeDef* sink_node;
diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD
index 7572141d415..8941d5552b6 100644
--- a/tensorflow/core/grappler/utils/BUILD
+++ b/tensorflow/core/grappler/utils/BUILD
@@ -386,17 +386,11 @@ cc_library(
     hdrs = ["transitive_fanin.h"],
     visibility = ["//visibility:public"],
     deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:utils",
-    ] + select({
-        "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
-        ],
-        "//conditions:default": [
-            "//tensorflow/core:framework",
-            "//tensorflow/core:lib",
-            "//tensorflow/core:protos_all_cc",
-        ],
-    }),
+    ],
 )
 
 tf_cc_test(
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 640640a0698..80db46e3ec6 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -65,7 +65,7 @@ package_group(
     name = "friends",
     packages = [
         "//learning/brain/contrib/...",
-        "//learning/brain/research/sparse_matrix/...",
+        "//learning/brain/research/...",
         "//learning/faster_training/...",
         "//tensorflow/...",
         "//tensorflow_text/...",
@@ -3166,6 +3166,7 @@ tf_cc_tests(
         "adjust_contrast_op_test.cc",
         "colorspace_op_test.cc",
         "crop_and_resize_op_test.cc",
+        "mirror_pad_op_test.cc",
         "non_max_suppression_op_test.cc",
         "resize_area_op_test.cc",
         "resize_bicubic_op_test.cc",
@@ -3178,6 +3179,7 @@ tf_cc_tests(
     }),
     deps = [
         ":image",
+        ":mirror_pad_op",
         ":ops_testutil",
         ":ops_util",
         ":sampling_kernels",
@@ -3244,6 +3246,22 @@ tf_cuda_cc_test(
     ],
 )
 
+tf_cuda_cc_test(
+    name = "mirror_pad_op_benchmark_test",
+    srcs = ["mirror_pad_op_benchmark_test.cc"],
+    deps = [
+        ":mirror_pad_op",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "non_max_suppression_op_gpu_test",
     srcs = ["non_max_suppression_op_gpu_test.cc"],
@@ -6705,6 +6723,7 @@ filegroup(
             "decode_proto_op.cc",
             "encode_proto_op.cc",
             "rpc_op.cc",
+            "sobol_op.cc",
             # Excluded due to experimental status:
             "debug_ops.*",
             "mutex_ops.*",
@@ -8278,3 +8297,16 @@ exports_files([
     "sparse_reshape_op.cc",
     "unary_ops_composition.cc",
 ])
+
+tf_kernel_library(
+    name = "sobol_op",
+    srcs = [
+        "sobol_op.cc",
+    ],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+        "@sobol_data",
+    ],
+)
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index ead0efb635e..0f22daba43c 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -158,6 +158,12 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
                 errors::InvalidArgument("tensor_in must be 4-dimensional"));
 
     TensorShape output_shape = params.forward_output_shape();
+    if (output_shape.num_elements() == 0) {
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, output_shape, &output));
+      return;
+    }
 
 #if CUDNN_VERSION >= 7300
     DnnPoolingOp<T>::Compute(context, se::dnn::PoolingMode::kAverage, ksize_,
@@ -279,6 +285,9 @@ class AvgPoolingGradOp : public OpKernel {
     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
     output->flat<T>().setZero();
 
+    if (output_shape.num_elements() == 0) {
+      return;
+    }
     const int window_rows = ksize_[1];
     const int window_cols = ksize_[2];
     const int depth_window = ksize_[3];
@@ -428,6 +437,13 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
       output_shape.AddDim(shape_vec(i));
     }
 
+    if (output_shape.num_elements() == 0) {
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, output_shape, &output));
+      return;
+    }
+
     DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage,
                                  ksize_, stride_, padding_, data_format_,
                                  nullptr, nullptr, out_backprop, output_shape,
@@ -506,6 +522,12 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
     for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) {
       output_shape.AddDim(shape_vec(i));
     }
+    if (output_shape.num_elements() == 0) {
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, output_shape, &output));
+      return;
+    }
 
 #if CUDNN_VERSION >= 7300
     DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage,
diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
index 45dc248bffd..851e5b78e84 100644
--- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
@@ -34,7 +34,10 @@ using MatrixMap = Eigen::Map<Matrix>;
 using ConstVectorMap = Eigen::Map<const Eigen::VectorXf>;
 using VectorMap = Eigen::Map<Eigen::VectorXf>;
 
-// V1 Op. Deprecated. BoostedTreesCalculateBestFeatureSplitOp is V2.
+constexpr char kInequalitySplit[] = "inequality";
+constexpr char kEqualitySplit[] = "equality";
+
+// V1 Op. Deprecated. BoostedTreesCalculateBestFeatureSplitOpV2 is V2.
 class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
  public:
   explicit BoostedTreesCalculateBestGainsPerFeatureOp(
@@ -227,7 +230,7 @@ REGISTER_KERNEL_BUILDER(
     Name("BoostedTreesCalculateBestGainsPerFeature").Device(DEVICE_CPU),
     BoostedTreesCalculateBestGainsPerFeatureOp);
 
-// V2 Op.
+// Deprecated op. Use BoostedTreesCalculateBestFeatureSplitOpV2.
 class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
  public:
   explicit BoostedTreesCalculateBestFeatureSplitOp(
@@ -545,11 +548,394 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
   string split_type_;
 };
 
-// v2 op that supports multi-class.
+// Deprecated op. Use BoostedTreesCalculateBestFeatureSplitOpV2.
 REGISTER_KERNEL_BUILDER(
     Name("BoostedTreesCalculateBestFeatureSplit").Device(DEVICE_CPU),
     BoostedTreesCalculateBestFeatureSplitOp);
 
+// V2 Op.
+class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
+ public:
+  explicit BoostedTreesCalculateBestFeatureSplitV2(
+      OpKernelConstruction* const context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("logits_dimension", &logits_dim_));
+    OP_REQUIRES_OK(context, context->GetAttr("num_features", &num_features_));
+  }
+
+  void Compute(OpKernelContext* const context) override {
+    // node_id_range
+    const Tensor* node_id_range_t;
+    OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
+    const auto node_id_range = node_id_range_t->vec<int32>();
+    const int32 node_id_first = node_id_range(0);  // Inclusive.
+    const int32 node_id_last = node_id_range(1);   // Exclusive.
+
+    // Get stats_summaries_list.
+    OpInputList stats_summaries_list;
+    OP_REQUIRES_OK(context, context->input_list("stats_summaries_list",
+                                                &stats_summaries_list));
+
+    // Infer dimensions of a stats_summary.
+    DCHECK_GT(stats_summaries_list.size(), 0);
+    const int32 feature_dims = stats_summaries_list[0].dim_size(1);
+    // The last bucket is for default/missing value.
+    const int32 num_buckets = stats_summaries_list[0].dim_size(2) - 1;
+    const int32 logits_dim = logits_dim_;
+    const int32 hessian_dim = stats_summaries_list[0].dim_size(3) - logits_dim;
+    DCHECK_GT(hessian_dim, 0);
+    DCHECK_LE(hessian_dim, logits_dim * logits_dim);
+
+    // Vector of stats_summaries; each element is stats for feature of shape
+    // [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim].
+    std::vector<TTypes<float, 4>::ConstTensor> stats_summaries;
+    DCHECK_EQ(stats_summaries_list.size(), num_features_);
+    stats_summaries.reserve(num_features_);
+    for (const auto& tensor : stats_summaries_list) {
+      stats_summaries.emplace_back(tensor.tensor<float, 4>());
+    }
+
+    // Split types.
+    const Tensor* split_types_t;
+    OP_REQUIRES_OK(context, context->input("split_types", &split_types_t));
+    const auto split_types = split_types_t->vec<tstring>();
+    DCHECK_EQ(split_types.size(), num_features_);
+    // Validate.
+    for (int i = 0; i < num_features_; ++i) {
+      if (!(split_types(i) == kInequalitySplit ||
+            split_types(i) == kEqualitySplit)) {
+        OP_REQUIRES_OK(
+            context,
+            errors::Aborted(
+                "Operation received an exception: Incorrect split type"));
+      }
+    }
+    // Feature ids.
+    const Tensor* candidate_feature_ids_t;
+    OP_REQUIRES_OK(context, context->input("candidate_feature_ids",
+                                           &candidate_feature_ids_t));
+    const auto candidate_feature_ids = candidate_feature_ids_t->vec<int32>();
+    DCHECK_EQ(candidate_feature_ids.size(), num_features_);
+
+    // L1, L2, tree_complexity, min_node_weight.
+    const Tensor* l1_t;
+    OP_REQUIRES_OK(context, context->input("l1", &l1_t));
+    const auto l1 = l1_t->scalar<float>()();
+    DCHECK_GE(l1, 0);
+    if (logits_dim_ > 1) {
+      // Multi-class L1 regularization not supported yet.
+      DCHECK_EQ(l1, 0);
+    }
+    const Tensor* l2_t;
+    OP_REQUIRES_OK(context, context->input("l2", &l2_t));
+    const auto l2 = l2_t->scalar<float>()();
+    DCHECK_GE(l2, 0);
+    const Tensor* tree_complexity_t;
+    OP_REQUIRES_OK(context,
+                   context->input("tree_complexity", &tree_complexity_t));
+    const auto tree_complexity = tree_complexity_t->scalar<float>()();
+    const Tensor* min_node_weight_t;
+    OP_REQUIRES_OK(context,
+                   context->input("min_node_weight", &min_node_weight_t));
+    const auto min_node_weight = min_node_weight_t->scalar<float>()();
+
+    std::vector<int32> output_node_ids;
+    std::vector<float> output_gains;
+    std::vector<int32> output_feature_ids;
+    std::vector<int32> output_feature_dimensions;
+    std::vector<int32> output_thresholds;
+    std::vector<Eigen::VectorXf> output_left_node_contribs;
+    std::vector<Eigen::VectorXf> output_right_node_contribs;
+    std::vector<string> output_split_types;
+
+    // TODO(tanzheny) parallelize the computation.
+    // Iterate each node and find the best gain per node.
+    float parent_gain;
+    for (int32 node_id = node_id_first; node_id < node_id_last; ++node_id) {
+      float best_gain = std::numeric_limits<float>::lowest();
+      int32 best_bucket;
+      int32 best_f_id;
+      int32 best_f_dim;
+      string best_split_type;
+      Eigen::VectorXf best_contrib_for_left(logits_dim);
+      Eigen::VectorXf best_contrib_for_right(logits_dim);
+
+      // Sum of gradient and hessian. Compute parent gain using first feature.
+      ConstMatrixMap stats_mat(&stats_summaries[0](node_id, 0, 0, 0),
+                               num_buckets + 1,  // Including default bucket.
+                               logits_dim + hessian_dim);
+      const Eigen::VectorXf total_grad =
+          stats_mat.leftCols(logits_dim).colwise().sum();
+      const Eigen::VectorXf total_hess =
+          stats_mat.rightCols(hessian_dim).colwise().sum();
+      if (total_hess.norm() < min_node_weight) {
+        continue;
+      }
+      Eigen::VectorXf unused(logits_dim);
+      CalculateWeightsAndGains(total_grad, total_hess, l1, l2, &unused,
+                               &parent_gain);
+      for (int f_idx = 0; f_idx < num_features_; ++f_idx) {
+        const string split_type = split_types(f_idx);
+        TTypes<float, 4>::ConstTensor stats_summary = stats_summaries[f_idx];
+        float f_best_gain = std::numeric_limits<float>::lowest();
+        int32 f_best_bucket;
+        int32 f_best_f_dim;
+        string f_best_split_type;
+        Eigen::VectorXf f_best_contrib_for_left(logits_dim);
+        Eigen::VectorXf f_best_contrib_for_right(logits_dim);
+
+        if (split_type == kInequalitySplit) {
+          CalculateBestInequalitySplit(
+              stats_summary, node_id, feature_dims, logits_dim, hessian_dim,
+              num_buckets, min_node_weight, l1, l2, &f_best_gain,
+              &f_best_bucket, &f_best_f_dim, &f_best_split_type,
+              &f_best_contrib_for_left, &f_best_contrib_for_right);
+        } else {
+          CalculateBestEqualitySplit(
+              stats_summary, total_grad, total_hess, node_id, feature_dims,
+              logits_dim, hessian_dim, num_buckets, l1, l2, &f_best_gain,
+              &f_best_bucket, &f_best_f_dim, &f_best_split_type,
+              &f_best_contrib_for_left, &f_best_contrib_for_right);
+        }
+        if (f_best_gain > best_gain) {
+          best_gain = f_best_gain;
+          best_f_id = candidate_feature_ids(f_idx);
+          best_f_dim = f_best_f_dim;
+          best_split_type = f_best_split_type;
+          best_bucket = f_best_bucket;
+          best_contrib_for_left = f_best_contrib_for_left;
+          best_contrib_for_right = f_best_contrib_for_right;
+        }
+      }  // For feature id.
+      if (best_gain == std::numeric_limits<float>::lowest()) {
+        // Do not add the node if no split is found.
+        continue;
+      }
+      output_node_ids.push_back(node_id);
+      // Remove the parent gain for the parent node.
+      output_gains.push_back(best_gain - parent_gain);
+      output_feature_ids.push_back(best_f_id);
+      output_feature_dimensions.push_back(best_f_dim);
+      // Default direction is fixed for dense splits.
+      // TODO(tanzheny) account for default values.
+      output_split_types.push_back(best_split_type);
+      output_thresholds.push_back(best_bucket);
+      output_left_node_contribs.push_back(best_contrib_for_left);
+      output_right_node_contribs.push_back(best_contrib_for_right);
+    }  // for node id.
+    const int num_nodes = output_node_ids.size();
+    // output_node_ids
+    Tensor* output_node_ids_t = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output("node_ids", {num_nodes},
+                                                     &output_node_ids_t));
+    auto output_node_ids_vec = output_node_ids_t->vec<int32>();
+
+    // output_gains
+    Tensor* output_gains_t;
+    OP_REQUIRES_OK(context, context->allocate_output("gains", {num_nodes},
+                                                     &output_gains_t));
+    auto output_gains_vec = output_gains_t->vec<float>();
+
+    // output_feature_ids
+    Tensor* output_features_ids_t;
+    OP_REQUIRES_OK(context, context->allocate_output("feature_ids", {num_nodes},
+                                                     &output_features_ids_t));
+    auto output_features_vec = output_features_ids_t->vec<int32>();
+
+    // output_feature_dimensions
+    Tensor* output_feature_dimension_t;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output("feature_dimensions", {num_nodes},
+                                            &output_feature_dimension_t));
+    auto output_feature_dimensions_vec =
+        output_feature_dimension_t->vec<int32>();
+
+    // output_thresholds
+    Tensor* output_thresholds_t;
+    OP_REQUIRES_OK(context, context->allocate_output("thresholds", {num_nodes},
+                                                     &output_thresholds_t));
+    auto output_thresholds_vec = output_thresholds_t->vec<int32>();
+
+    // output_left_node_contribs
+    Tensor* output_left_node_contribs_t;
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                "left_node_contribs", {num_nodes, logits_dim},
+                                &output_left_node_contribs_t));
+    auto output_left_node_contribs_matrix =
+        output_left_node_contribs_t->matrix<float>();
+
+    // output_right_node_contribs
+    Tensor* output_right_node_contribs_t;
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                "right_node_contribs", {num_nodes, logits_dim},
+                                &output_right_node_contribs_t));
+    auto output_right_node_contribs_matrix =
+        output_right_node_contribs_t->matrix<float>();
+
+    // split type
+    Tensor* output_split_types_t;
+    OP_REQUIRES_OK(
+        context, context->allocate_output("split_with_default_directions",
+                                          {num_nodes}, &output_split_types_t));
+    auto output_split_types_vec = output_split_types_t->vec<tstring>();
+
+    // Sets output tensors from vectors.
+    for (int i = 0; i < num_nodes; ++i) {
+      output_node_ids_vec(i) = output_node_ids[i];
+      output_features_vec(i) = output_feature_ids[i];
+      // Adjust the gains to penalize by tree complexity.
+      output_gains_vec(i) = output_gains[i] - tree_complexity;
+      output_feature_dimensions_vec(i) = output_feature_dimensions[i];
+      output_thresholds_vec(i) = output_thresholds[i];
+      for (int j = 0; j < logits_dim; ++j) {
+        output_left_node_contribs_matrix(i, j) =
+            output_left_node_contribs[i][j];
+        output_right_node_contribs_matrix(i, j) =
+            output_right_node_contribs[i][j];
+      }
+      output_split_types_vec(i) = output_split_types[i];
+    }
+  }
+
+ private:
+  // TODO(crawles): Simplify inequality path just like equality b/138329196
+  // Currently this is not simplify-able due to numerical instability in math
+  // i.e. gain = -g.transpose() * hessian_and_reg.colPivHouseholderQr().solve(g)
+  // It caused gain to be Inf when g is approaching 0 but not exactly 0 while
+  // there is no regularization.
+  // Calculate the best inequality split per node.
+  void CalculateBestInequalitySplit(
+      TTypes<float, 4>::ConstTensor stats_summary, const int32 node_id,
+      const int32 feature_dims, const int32 logits_dim, const int32 hessian_dim,
+      const int32 num_buckets, const float min_node_weight, const float l1,
+      const float l2, float* best_gain, int32* best_bucket, int32* best_f_dim,
+      string* best_split_type, Eigen::VectorXf* best_contrib_for_left,
+      Eigen::VectorXf* best_contrib_for_right) {
+    std::vector<Eigen::VectorXf> cum_grad;
+    std::vector<Eigen::VectorXf> cum_hess;
+    // get all cumulative gradients including default bucket.
+    cum_grad.reserve(num_buckets);
+    cum_hess.reserve(num_buckets);
+
+    for (int f_dim = 0; f_dim < feature_dims; ++f_dim) {
+      ConstVectorMap default_stats_vec(
+          &stats_summary(node_id, f_dim, num_buckets, 0),
+          logits_dim + hessian_dim);
+      Eigen::VectorXf missing_bucket_grad = default_stats_vec.head(logits_dim);
+      Eigen::VectorXf missing_bucket_hess = default_stats_vec.tail(hessian_dim);
+      cum_grad.clear();
+      cum_hess.clear();
+      Eigen::VectorXf total_grad = Eigen::VectorXf::Zero(logits_dim);
+      Eigen::VectorXf total_hess = Eigen::VectorXf::Zero(hessian_dim);
+      // sum all the gradients including default bucket.
+      for (int bucket = 0; bucket <= num_buckets; ++bucket) {
+        for (int i = 0; i < logits_dim; ++i) {
+          total_grad[i] += stats_summary(node_id, f_dim, bucket, i);
+        }
+        for (int i = 0; i < hessian_dim; ++i) {
+          // Full hessian.
+          total_hess[i] +=
+              stats_summary(node_id, f_dim, bucket, logits_dim + i);
+        }
+        if (bucket < num_buckets) {
+          cum_grad.push_back(total_grad);
+          cum_hess.push_back(total_hess);
+        }
+      }
+      const string kInequalityDefaultLeft =
+          boosted_trees::SplitTypeWithDefault_Name(
+              boosted_trees::INEQUALITY_DEFAULT_LEFT);
+      const string kInequalityDefaultRight =
+          boosted_trees::SplitTypeWithDefault_Name(
+              boosted_trees::INEQUALITY_DEFAULT_RIGHT);
+
+      // Iterate from left to right, excluding default bucket.
+      for (int bucket = 0; bucket < num_buckets; ++bucket) {
+        // default value goes to left node.
+        const Eigen::VectorXf total_left_grad =
+            cum_grad[bucket] + missing_bucket_grad;
+        const Eigen::VectorXf total_left_hess =
+            cum_hess[bucket] + missing_bucket_hess;
+        MaybeUpdateBestSplit(
+            total_left_grad, total_grad - total_left_grad, total_left_hess,
+            total_hess - total_left_hess, logits_dim, bucket, f_dim, l1, l2,
+            kInequalityDefaultLeft, best_gain, best_bucket, best_f_dim,
+            best_split_type, best_contrib_for_left, best_contrib_for_right);
+        // default value goes to right node.
+        MaybeUpdateBestSplit(
+            cum_grad[bucket], total_grad - cum_grad[bucket], cum_hess[bucket],
+            total_hess - cum_hess[bucket], logits_dim, bucket, f_dim, l1, l2,
+            kInequalityDefaultRight, best_gain, best_bucket, best_f_dim,
+            best_split_type, best_contrib_for_left, best_contrib_for_right);
+      }  // for bucket
+    }
+  }
+
+  // Calculate the best equality split per node.
+  void CalculateBestEqualitySplit(
+      TTypes<float, 4>::ConstTensor stats_summary,
+      const Eigen::VectorXf& total_grad, const Eigen::VectorXf& total_hess,
+      const int32 node_id, const int32 feature_dims, const int32 logits_dim,
+      const int32 hessian_dim, const int32 num_buckets, const float l1,
+      const float l2, float* best_gain, int32* best_bucket, int32* best_f_dim,
+      string* best_split_type, Eigen::VectorXf* best_contrib_for_left,
+      Eigen::VectorXf* best_contrib_for_right) {
+    const string kEqualityDefaultRight =
+        boosted_trees::SplitTypeWithDefault_Name(
+            boosted_trees::EQUALITY_DEFAULT_RIGHT);
+    for (int f_dim = 0; f_dim < feature_dims; ++f_dim) {
+      for (int bucket = 0; bucket < num_buckets; ++bucket) {
+        ConstVectorMap stats_vec(&stats_summary(node_id, f_dim, bucket, 0),
+                                 logits_dim + hessian_dim);
+        Eigen::VectorXf curr_grad = stats_vec.head(logits_dim);
+        Eigen::VectorXf curr_hess = stats_vec.tail(hessian_dim);
+        MaybeUpdateBestSplit(curr_grad, total_grad - curr_grad, curr_hess,
+                             total_hess - curr_hess, logits_dim, bucket, f_dim,
+                             l1, l2, kEqualityDefaultRight, best_gain,
+                             best_bucket, best_f_dim, best_split_type,
+                             best_contrib_for_left, best_contrib_for_right);
+      }
+    }
+  }
+
+  void MaybeUpdateBestSplit(const Eigen::VectorXf& grad_for_left,
+                            const Eigen::VectorXf& grad_for_right,
+                            const Eigen::VectorXf& hess_for_left,
+                            const Eigen::VectorXf& hess_for_right,
+                            const int32 logits_dim, const int32 bucket,
+                            const int32 f_dim, const float l1, const float l2,
+                            const string split_type, float* best_gain,
+                            int32* best_bucket, int32* best_f_dim,
+                            string* best_split_type,
+                            Eigen::VectorXf* best_contrib_for_left,
+                            Eigen::VectorXf* best_contrib_for_right) {
+    // Left child.
+    Eigen::VectorXf contrib_for_left(logits_dim);
+    float gain_for_left;
+    CalculateWeightsAndGains(grad_for_left, hess_for_left, l1, l2,
+                             &contrib_for_left, &gain_for_left);
+    Eigen::VectorXf contrib_for_right(logits_dim);
+    float gain_for_right;
+    CalculateWeightsAndGains(grad_for_right, hess_for_right, l1, l2,
+                             &contrib_for_right, &gain_for_right);
+    if (GainIsLarger(gain_for_left + gain_for_right, *best_gain)) {
+      *best_gain = gain_for_left + gain_for_right;
+      *best_bucket = bucket;
+      *best_f_dim = f_dim;
+      *best_contrib_for_left = contrib_for_left;
+      *best_contrib_for_right = contrib_for_right;
+      *best_split_type = split_type;
+    }
+  }
+  int num_features_;
+  int logits_dim_;
+};
+
+// v2 op that supports multi-class.
+REGISTER_KERNEL_BUILDER(
+    Name("BoostedTreesCalculateBestFeatureSplitV2").Device(DEVICE_CPU),
+    BoostedTreesCalculateBestFeatureSplitV2);
+
 // Map from bucket id to vector of statistics.
 typedef std::map<int32, std::vector<float>> BucketMap;
 typedef BucketMap::iterator BucketMapIterator;
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index fdcc1e1e49c..446187c4e9b 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_CWISE_OPS_H_
 #define TENSORFLOW_CORE_KERNELS_CWISE_OPS_H_
 
+#define _USE_MATH_DEFINES
 #include <cmath>
 #include <functional>
 #include <type_traits>
@@ -546,30 +547,40 @@ struct functor_traits<google_floor_mod<Scalar>> {
 #endif
 
 template <typename Scalar, bool IsInteger = Eigen::NumTraits<Scalar>::IsInteger>
-struct scalar_round_op_google {
+struct scalar_round_half_to_even_op {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
   operator()(const Scalar& x) const {
     EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex),
                         NUMERIC_TYPE_MUST_BE_REAL)
 
-    Scalar round_val = Eigen::numext::floor(x);
-    const Scalar fraction = x - round_val;
-    if (fraction > Scalar(.5)) {
-      round_val += Scalar(1.0);
-    } else if (fraction == Scalar(.5)) {
-      const Scalar nearest_even_int =
-          round_val - Scalar(2) * Eigen::numext::floor(Scalar(.5) * x);
-      bool is_odd = (nearest_even_int == Scalar(1));
-      if (is_odd) {
-        round_val += Scalar(1);
-      }
+    const Scalar round_val = Eigen::numext::floor(x + Scalar(0.5));
+    const Scalar fraction = round_val - x;
+    if (TF_PREDICT_FALSE(fraction == Scalar(.5))) {
+      return Scalar(2) * Eigen::numext::floor(Scalar(.5) * x + Scalar(0.5));
+    } else {
+      return round_val;
+    }
+  }
+
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+    Packet half = pset1<Packet>(Scalar(0.5));
+    Packet round_val = pfloor(padd(x, half));
+    Packet fraction = psub(round_val, x);
+    Packet half_mask = pcmp_eq(fraction, half);
+    bool any_halves = predux_any(half_mask);
+    if (TF_PREDICT_FALSE(any_halves)) {
+      Packet two = pset1<Packet>(Scalar(2));
+      Packet nearest_even = pmul(two, pfloor(pmadd(half, x, half)));
+      return pselect(half_mask, nearest_even, round_val);
+    } else {
+      return round_val;
     }
-    return round_val;
   }
 };
 
 template <typename Scalar>
-struct scalar_round_op_google<Scalar, true> {
+struct scalar_round_half_to_even_op<Scalar, true> {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
   operator()(const Scalar& x) const {
     return x;
@@ -581,14 +592,16 @@ struct scalar_round_op_google<Scalar, true> {
 };
 
 template <typename Scalar>
-struct functor_traits<scalar_round_op_google<Scalar>> {
+struct functor_traits<scalar_round_half_to_even_op<Scalar>> {
   enum {
     Cost = Eigen::NumTraits<Scalar>::IsInteger ? 0
                                                : 4 * NumTraits<Scalar>::AddCost,
 #if TENSORFLOW_USE_ROCM
     PacketAccess = false,
 #else
-    PacketAccess = Eigen::NumTraits<Scalar>::IsInteger
+    PacketAccess = packet_traits<Scalar>::HasFloor &&
+                   packet_traits<Scalar>::HasAdd &&
+                   packet_traits<Scalar>::HasMul,
 #endif
   };
 };
@@ -729,13 +742,17 @@ template <typename T>
 struct scalar_erfinv_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_erfinv_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
-    T y = numext::ndtri((x + static_cast<T>(1.)) / static_cast<T>(2.));
-    return y / static_cast<T>(numext::sqrt(2.));
+    constexpr T half = T(0.5);
+    T y = numext::ndtri(half * x + half);
+    constexpr T half_sqrt = T(M_SQRT1_2);
+    return y * half_sqrt;
   }
   template <typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
-    Packet y = pndtri<Packet>(pmadd(pset1<Packet>(0.5), x, pset1<Packet>(0.5)));
-    return pdiv(y, psqrt(pset1<Packet>(2.)));
+    Packet half = pset1<Packet>(T(0.5));
+    Packet y = pndtri<Packet>(pmadd(half, x, half));
+    Packet half_sqrt = pset1<Packet>(T(M_SQRT1_2));
+    return pmul(y, half_sqrt);
   }
 };
 
@@ -960,12 +977,12 @@ template <typename T>
 struct floor : base<T, Eigen::internal::scalar_floor_op<T>> {};
 
 template <typename T>
-struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {};
+struct round : base<T, Eigen::internal::scalar_round_half_to_even_op<T>> {};
 
 template <typename T>
 struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {};
 
-/** this should go in Eigen
+/** TODO(tokarip): This should go in Eigen
  * \brief Template functor to compute the round to int value of a scalar
  */
 template <typename Scalar>
diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h
index f23d998a3f9..f2d0a95e20f 100644
--- a/tensorflow/core/kernels/cwise_ops_common.h
+++ b/tensorflow/core/kernels/cwise_ops_common.h
@@ -17,6 +17,8 @@ limitations under the License.
 #define TENSORFLOW_CORE_KERNELS_CWISE_OPS_COMMON_H_
 
 // See docs in ../ops/math_ops.cc.
+#define _USE_MATH_DEFINES
+#include <cmath>
 
 #define EIGEN_USE_THREADS
 
diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
index cb042fb7a55..ecc58da315f 100644
--- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
+++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
@@ -20,10 +20,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_CWISE_OPS_GPU_COMMON_CU_H_
 #define TENSORFLOW_CORE_KERNELS_CWISE_OPS_GPU_COMMON_CU_H_
 
-#define EIGEN_USE_GPU
-
+#define _USE_MATH_DEFINES
+#include <cmath>
 #include <complex>
 
+#define EIGEN_USE_GPU
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/cwise_ops.h"
 #include "tensorflow/core/platform/types.h"
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
index 739ccf7730a..43f631e3c00 100644
--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -86,6 +86,15 @@ BM_UNARY(cpu, Rint, float, DT_FLOAT);
 BM_UNARY(gpu, Rint, float, DT_FLOAT);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
+BM_UNARY(cpu, Round, double, DT_DOUBLE);
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+BM_UNARY(gpu, Round, double, DT_DOUBLE);
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+BM_UNARY(cpu, Round, float, DT_FLOAT);
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+BM_UNARY(gpu, Round, float, DT_FLOAT);
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
 // data func scalar.
 Graph* BinaryScalar(int num, const string& func) {
   Graph* g = new Graph(OpRegistry::Global());
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 81d4002c3fd..22a6ce08aa2 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -615,6 +615,7 @@ tf_cc_test(
     name = "parallel_interleave_dataset_op_test",
     size = "small",
     srcs = ["parallel_interleave_dataset_op_test.cc"],
+    tags = ["notsan"],  # TODO(b/147147071): Remove this tag once bug fix lands.
     deps = [
         ":captured_function",
         ":dataset_test_base",
diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index 407819d81a0..cd6682d198d 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -480,24 +480,23 @@ void FunctionMetadata::ValidateMultiDevice() {
 
 /* static */
 Status CapturedFunction::Create(
-    OpKernelContext* ctx,
-    const std::shared_ptr<const FunctionMetadata> metadata,
+    OpKernelContext* ctx, std::shared_ptr<const FunctionMetadata> metadata,
     const string& argument_name,
     std::unique_ptr<CapturedFunction>* out_function) {
   OpInputList inputs;
   TF_RETURN_IF_ERROR(ctx->input_list(argument_name, &inputs));
   std::vector<Tensor> captured_inputs(inputs.begin(), inputs.end());
-  return Create(ctx, metadata, std::move(captured_inputs), out_function);
+  return Create(ctx, std::move(metadata), std::move(captured_inputs),
+                out_function);
 }
 
 /* static */
 Status CapturedFunction::Create(
-    OpKernelContext* ctx,
-    const std::shared_ptr<const FunctionMetadata> metadata,
+    OpKernelContext* ctx, std::shared_ptr<const FunctionMetadata> metadata,
     std::vector<Tensor>&& captured_inputs,
     std::unique_ptr<CapturedFunction>* out_function) {
   *out_function = absl::WrapUnique(
-      new CapturedFunction(metadata, std::move(captured_inputs)));
+      new CapturedFunction(std::move(metadata), std::move(captured_inputs)));
   return Status::OK();
 }
 
@@ -602,8 +601,7 @@ Status CapturedFunction::Instantiate(
   *instantiated_captured_function =
       absl::WrapUnique<InstantiatedCapturedFunction>(
           new InstantiatedCapturedFunction(lib, f_handle, std::move(ret_types),
-                                           *ctx->runner(),
-                                           ctx->cancellation_manager(), this));
+                                           *ctx->runner(), this));
   return Status::OK();
 }
 
@@ -620,12 +618,11 @@ Status CapturedFunction::CheckExternalState() const {
 InstantiatedCapturedFunction::InstantiatedCapturedFunction(
     FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
     DataTypeVector ret_types, std::function<void(std::function<void()>)> runner,
-    CancellationManager* cancellation_manager, CapturedFunction* captured_func)
+    CapturedFunction* captured_func)
     : lib_(lib),
       f_handle_(f_handle),
       ret_types_(std::move(ret_types)),
       captured_runner_(std::move(runner)),
-      captured_cancellation_manager_(cancellation_manager),
       captured_func_(captured_func) {}
 
 // NOTE: We don't release f_handle_ here and instead delegate the function
@@ -664,7 +661,7 @@ Status InstantiatedCapturedFunction::Run(IteratorContext* ctx,
             "InstantiatedCapturedFunction::Run#id=", f_opts.step_id, "#");
       },
       profiler::TraceMeLevel::kInfo);
-  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
+  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](const Status& func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -704,7 +701,7 @@ Status InstantiatedCapturedFunction::RunWithBorrowedArgs(
             f_opts.step_id, "#");
       },
       profiler::TraceMeLevel::kInfo);
-  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
+  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](const Status& func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -728,7 +725,7 @@ Status InstantiatedCapturedFunction::RunInstantiated(
   f_opts.step_container = &step_container;
   f_opts.runner = &captured_runner_;
   f_opts.create_rendezvous = ShouldCreateRendezvous();
-  CancellationManager cancellation_manager(captured_cancellation_manager_);
+  CancellationManager cancellation_manager;
   f_opts.cancellation_manager = &cancellation_manager;
 
   BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(),
@@ -742,7 +739,7 @@ Status InstantiatedCapturedFunction::RunInstantiated(
                             f_opts.step_id, "#");
       },
       profiler::TraceMeLevel::kInfo);
-  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) {
+  lib_->Run(f_opts, f_handle_, &frame, [&n, &s](const Status& func_status) {
     s.Update(func_status);
     n.Notify();
   });
@@ -849,9 +846,10 @@ bool InstantiatedCapturedFunction::ShouldCreateRendezvous() const {
 }
 
 CapturedFunction::CapturedFunction(
-    const std::shared_ptr<const FunctionMetadata> metadata,
+    std::shared_ptr<const FunctionMetadata> metadata,
     std::vector<Tensor> captured_inputs)
-    : metadata_(metadata), captured_inputs_(std::move(captured_inputs)) {}
+    : metadata_(std::move(metadata)),
+      captured_inputs_(std::move(captured_inputs)) {}
 
 Status CapturedFunction::IsMultiDevice(IteratorContext* ctx,
                                        bool* is_multi_device) {
diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h
index 2ac5f800162..8747e735cdf 100644
--- a/tensorflow/core/kernels/data/captured_function.h
+++ b/tensorflow/core/kernels/data/captured_function.h
@@ -98,7 +98,6 @@ class InstantiatedCapturedFunction {
       FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
       DataTypeVector ret_types,
       std::function<void(std::function<void()>)> runner,
-      CancellationManager* cancellation_manager,
       CapturedFunction* captured_func);
 
   // Determines whether a rendezvous object should be created when running the
@@ -110,10 +109,9 @@ class InstantiatedCapturedFunction {
   FunctionLibraryRuntime* const lib_;
   const FunctionLibraryRuntime::Handle f_handle_;
   const DataTypeVector ret_types_;
-  // Note: Since we have no IteratorContext in `RunInstantiated`, we have to
-  // capture these at function instantiation time.
+  // Note: We capture the runner at function instantiation time to be able to
+  // run the function without `IteratorContext` via `RunInstantiated`.
   std::function<void(std::function<void()>)> captured_runner_;
-  CancellationManager* captured_cancellation_manager_;
   CapturedFunction* const captured_func_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(InstantiatedCapturedFunction);
@@ -192,14 +190,14 @@ class CapturedFunction {
   // Creates a new instance using a list of named attributes, fetching captured
   // inputs from a context argument.
   static Status Create(OpKernelContext* ctx,
-                       const std::shared_ptr<const FunctionMetadata> metadata,
+                       std::shared_ptr<const FunctionMetadata> metadata,
                        const string& argument_name,
                        std::unique_ptr<CapturedFunction>* out_function);
 
   // Creates a new instance using a list of named attributes, using provided
   // captured inputs.
   static Status Create(OpKernelContext* ctx,
-                       const std::shared_ptr<const FunctionMetadata> metadata,
+                       std::shared_ptr<const FunctionMetadata> metadata,
                        std::vector<Tensor>&& captured_inputs,
                        std::unique_ptr<CapturedFunction>* out_function);
 
@@ -258,7 +256,7 @@ class CapturedFunction {
   }
 
  private:
-  CapturedFunction(const std::shared_ptr<const FunctionMetadata> metadata,
+  CapturedFunction(std::shared_ptr<const FunctionMetadata> metadata,
                    std::vector<Tensor> captured_inputs);
 
   // Determines whether the captured function requires the use of the
diff --git a/tensorflow/core/kernels/data/dataset_test_base.cc b/tensorflow/core/kernels/data/dataset_test_base.cc
index 2a7061aa5ef..ce194a87a3c 100644
--- a/tensorflow/core/kernels/data/dataset_test_base.cc
+++ b/tensorflow/core/kernels/data/dataset_test_base.cc
@@ -426,7 +426,6 @@ Status DatasetOpsTestBase::CreateOpKernelContext(
   params->op_kernel = kernel;
   params->resource_manager = resource_mgr_.get();
   params->runner = &runner_;
-  checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
   slice_reader_cache_ =
       absl::make_unique<checkpoint::TensorSliceReaderCacheWrapper>();
   params->slice_reader_cache = slice_reader_cache_.get();
@@ -823,6 +822,14 @@ RangeDatasetParams::RangeDatasetParams(int64 start, int64 stop, int64 step)
       stop_(stop),
       step_(step) {}
 
+RangeDatasetParams::RangeDatasetParams(int64 start, int64 stop, int64 step,
+                                       DataTypeVector output_dtypes)
+    : DatasetParams(std::move(output_dtypes), {PartialTensorShape({})},
+                    "range_dataset"),
+      start_(start),
+      stop_(stop),
+      step_(step) {}
+
 std::vector<Tensor> RangeDatasetParams::GetInputTensors() const {
   Tensor start_tensor = CreateTensor<int64>(TensorShape({}), {start_});
   Tensor stop_tensor = CreateTensor<int64>(TensorShape({}), {stop_});
diff --git a/tensorflow/core/kernels/data/dataset_test_base.h b/tensorflow/core/kernels/data/dataset_test_base.h
index 35114c648f8..d8afcdf84d8 100644
--- a/tensorflow/core/kernels/data/dataset_test_base.h
+++ b/tensorflow/core/kernels/data/dataset_test_base.h
@@ -172,6 +172,9 @@ class RangeDatasetParams : public DatasetParams {
 
   RangeDatasetParams(int64 start, int64 stop, int64 step);
 
+  RangeDatasetParams(int64 start, int64 stop, int64 step,
+                     DataTypeVector output_dtypes);
+
   std::vector<Tensor> GetInputTensors() const override;
 
   Status GetInputNames(std::vector<string>* input_names) const override;
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index 3a260f3573b..dea569c02b6 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -448,11 +449,31 @@ Status VariantTensorDataReader::ReadTensor(StringPiece key, Tensor* val) {
   return ReadTensorInternal(key, val);
 }
 
+Status VariantTensorDataReader::ReadScalar(StringPiece name, StringPiece key,
+                                           int64* val) {
+  return ReadScalarInternal(name, key, val);
+}
+
+Status VariantTensorDataReader::ReadScalar(StringPiece name, StringPiece key,
+                                           tstring* val) {
+  return ReadScalarInternal(name, key, val);
+}
+
+Status VariantTensorDataReader::ReadTensor(StringPiece name, StringPiece key,
+                                           Tensor* val) {
+  return ReadTensorInternal(name, key, val);
+}
+
 bool VariantTensorDataReader::Contains(StringPiece key) {
   string name;
   if (!GetIteratorName(key, &name).ok()) {
     return false;
   }
+  return Contains(name, key);
+}
+
+bool VariantTensorDataReader::Contains(StringPiece n, StringPiece key) {
+  string name(n);
   return map_[name].find(string(key)) != map_[name].end();
 }
 
@@ -460,6 +481,20 @@ template <typename T>
 Status VariantTensorDataReader::ReadScalarInternal(StringPiece key, T* val) {
   string name;
   TF_RETURN_IF_ERROR(GetIteratorName(key, &name));
+  return ReadScalarInternal(name, key, val);
+}
+
+Status VariantTensorDataReader::ReadTensorInternal(StringPiece key,
+                                                   Tensor* val) {
+  string name;
+  TF_RETURN_IF_ERROR(GetIteratorName(key, &name));
+  return ReadTensorInternal(name, key, val);
+}
+
+template <typename T>
+Status VariantTensorDataReader::ReadScalarInternal(StringPiece n,
+                                                   StringPiece key, T* val) {
+  string name(n);
   if (map_[name].find(string(key)) == map_[name].end()) {
     return errors::NotFound(key);
   }
@@ -467,10 +502,10 @@ Status VariantTensorDataReader::ReadScalarInternal(StringPiece key, T* val) {
   return Status::OK();
 }
 
-Status VariantTensorDataReader::ReadTensorInternal(StringPiece key,
+Status VariantTensorDataReader::ReadTensorInternal(StringPiece n,
+                                                   StringPiece key,
                                                    Tensor* val) {
-  string name;
-  TF_RETURN_IF_ERROR(GetIteratorName(key, &name));
+  string name(n);
   if (map_[name].find(string(key)) == map_[name].end()) {
     return errors::NotFound(key);
   }
@@ -492,6 +527,21 @@ Status VariantTensorDataWriter::WriteTensor(StringPiece key,
   return WriteTensorInternal(key, val);
 }
 
+Status VariantTensorDataWriter::WriteScalar(StringPiece name, StringPiece key,
+                                            const int64 val) {
+  return WriteScalarInternal(name, key, val);
+}
+
+Status VariantTensorDataWriter::WriteScalar(StringPiece name, StringPiece key,
+                                            const tstring& val) {
+  return WriteScalarInternal(name, key, val);
+}
+
+Status VariantTensorDataWriter::WriteTensor(StringPiece name, StringPiece key,
+                                            const Tensor& val) {
+  return WriteTensorInternal(name, key, val);
+}
+
 void VariantTensorDataWriter::MaybeFlush() {
   if (is_flushed_) return;
   for (auto& keys : keys_) {
@@ -535,9 +585,9 @@ Status VariantTensorDataWriter::WriteScalarInternal(StringPiece key,
     return errors::FailedPrecondition(
         "Cannot call WriteScalar after GetData or ReleaseData is called");
   }
-  Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
-  val_t.scalar<T>()() = val;
-  return WriteTensorInternal(key, val_t);
+  string name;
+  TF_RETURN_IF_ERROR(GetIteratorName(key, &name));
+  return WriteScalarInternal(name, key, val);
 }
 
 Status VariantTensorDataWriter::WriteTensorInternal(StringPiece key,
@@ -548,7 +598,31 @@ Status VariantTensorDataWriter::WriteTensorInternal(StringPiece key,
   }
   string name;
   TF_RETURN_IF_ERROR(GetIteratorName(key, &name));
+  return WriteTensorInternal(name, key, val);
+}
+
+template <typename T>
+Status VariantTensorDataWriter::WriteScalarInternal(StringPiece name,
+                                                    StringPiece key,
+                                                    const T& val) {
+  if (is_flushed_) {
+    return errors::FailedPrecondition(
+        "Cannot call WriteScalar after GetData or ReleaseData is called");
+  }
+  Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
+  val_t.scalar<T>()() = val;
+  return WriteTensorInternal(name, key, val_t);
+}
+
+Status VariantTensorDataWriter::WriteTensorInternal(StringPiece n,
+                                                    StringPiece key,
+                                                    const Tensor& val) {
+  if (is_flushed_) {
+    return errors::FailedPrecondition(
+        "Cannot call WriteTensor after GetData or ReleaseData is called");
+  }
   DCHECK_EQ(key.find(kDelimiter), string::npos);
+  string name(n);
   if (keys_.count(name) == 0) {
     keys_[name] = std::vector<string>();
   }
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 82f05e94af7..0401e3d6ad8 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -173,11 +173,20 @@ class VariantTensorDataReader : public IteratorStateReader {
   Status ReadTensor(StringPiece key, Tensor* val) override;
   bool Contains(StringPiece key) override;
 
+  Status ReadScalar(StringPiece name, StringPiece key, int64* val) override;
+  Status ReadScalar(StringPiece name, StringPiece key, tstring* val) override;
+  Status ReadTensor(StringPiece name, StringPiece key, Tensor* val) override;
+  bool Contains(StringPiece name, StringPiece key) override;
+
  private:
   template <typename T>
   Status ReadScalarInternal(StringPiece key, T* val);
   Status ReadTensorInternal(StringPiece key, Tensor* val);
 
+  template <typename T>
+  Status ReadScalarInternal(StringPiece name, StringPiece key, T* val);
+  Status ReadTensorInternal(StringPiece name, StringPiece key, Tensor* val);
+
   std::map<string, std::map<string, size_t>> map_;
   std::map<string, const VariantTensorData*> data_;  // Not owned.
 };
@@ -198,6 +207,13 @@ class VariantTensorDataWriter : public IteratorStateWriter {
   Status WriteScalar(StringPiece key, const tstring& val) override;
   Status WriteTensor(StringPiece key, const Tensor& val) override;
 
+  Status WriteScalar(StringPiece name, StringPiece key,
+                     const int64 val) override;
+  Status WriteScalar(StringPiece name, StringPiece key,
+                     const tstring& val) override;
+  Status WriteTensor(StringPiece name, StringPiece key,
+                     const Tensor& val) override;
+
   // Releases the built VariantTensorData's to `variants`. Clears out all
   // class state.
   void ReleaseData(std::vector<std::unique_ptr<VariantTensorData>>* variants);
@@ -213,6 +229,11 @@ class VariantTensorDataWriter : public IteratorStateWriter {
   Status WriteScalarInternal(StringPiece key, const T& val);
   Status WriteTensorInternal(StringPiece key, const Tensor& val);
 
+  template <typename T>
+  Status WriteScalarInternal(StringPiece name, StringPiece key, const T& val);
+  Status WriteTensorInternal(StringPiece name, StringPiece key,
+                             const Tensor& val);
+
   bool is_flushed_ = false;
   std::map<string, std::unique_ptr<VariantTensorData>> data_;
   std::map<string, std::vector<string>> keys_;
diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
index b8de8559e21..5ad0d0b24ab 100644
--- a/tensorflow/core/kernels/data/dataset_utils_test.cc
+++ b/tensorflow/core/kernels/data/dataset_utils_test.cc
@@ -91,6 +91,45 @@ TEST(DatasetUtilsTest, VariantTensorDataNonExistentKey) {
             reader.ReadTensor(full_name("NonExistentKey"), &val_tensor).code());
 }
 
+TEST(DatasetUtilsTest, VariantTensorDataRoundtripIteratorName) {
+  VariantTensorDataWriter writer;
+  TF_ASSERT_OK(writer.WriteScalar("Iterator", "Int64", 24));
+  Tensor input_tensor(DT_FLOAT, {1});
+  input_tensor.flat<float>()(0) = 2.0f;
+  TF_ASSERT_OK(writer.WriteTensor("Iterator", "Tensor", input_tensor));
+  std::vector<const VariantTensorData*> data;
+  writer.GetData(&data);
+
+  VariantTensorDataReader reader(data);
+  int64 val_int64;
+  TF_ASSERT_OK(reader.ReadScalar("Iterator", "Int64", &val_int64));
+  EXPECT_EQ(val_int64, 24);
+  Tensor val_tensor;
+  TF_ASSERT_OK(reader.ReadTensor("Iterator", "Tensor", &val_tensor));
+  EXPECT_EQ(input_tensor.NumElements(), val_tensor.NumElements());
+  EXPECT_EQ(input_tensor.flat<float>()(0), val_tensor.flat<float>()(0));
+}
+
+TEST(DatasetUtilsTest, VariantTensorDataNonExistentKeyIteratorName) {
+  VariantTensorData data;
+  strings::StrAppend(&data.metadata_, "key1", "@@");
+  data.tensors_.push_back(Tensor(DT_INT64, {1}));
+  std::vector<const VariantTensorData*> reader_data;
+  reader_data.push_back(&data);
+  VariantTensorDataReader reader(reader_data);
+  int64 val_int64;
+  tstring val_string;
+  Tensor val_tensor;
+  EXPECT_EQ(error::NOT_FOUND,
+            reader.ReadScalar("Iterator", "NonExistentKey", &val_int64).code());
+  EXPECT_EQ(
+      error::NOT_FOUND,
+      reader.ReadScalar("Iterator", "NonExistentKey", &val_string).code());
+  EXPECT_EQ(
+      error::NOT_FOUND,
+      reader.ReadTensor("Iterator", "NonExistentKey", &val_tensor).code());
+}
+
 TEST(DatasetUtilsTest, VariantTensorDataWriteAfterFlushing) {
   VariantTensorDataWriter writer;
   TF_ASSERT_OK(writer.WriteScalar(full_name("Int64"), 24));
diff --git a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc
index 3e99a6f5713..d5b13e114dc 100644
--- a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/blocking_counter.h"
 
 namespace tensorflow {
 namespace data {
@@ -362,81 +363,105 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       if (input_impl_) {
         TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
       } else {
-        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kInputExhausted), ""));
+        TF_RETURN_IF_ERROR(writer->WriteScalar(prefix(), kInputExhausted, ""));
       }
       TF_RETURN_IF_ERROR(
-          writer->WriteScalar(full_name(kNextIndex), next_index_));
+          writer->WriteScalar(prefix(), kNextIndex, next_index_));
       TF_RETURN_IF_ERROR(
-          writer->WriteScalar(full_name(kBlockCount), block_count_));
+          writer->WriteScalar(prefix(), kBlockCount, block_count_));
       TF_RETURN_IF_ERROR(
-          writer->WriteScalar(full_name(kWorkersSize), workers_.size()));
+          writer->WriteScalar(prefix(), kWorkersSize, workers_.size()));
       for (int i = 0; i < workers_.size(); ++i) {
         TF_RETURN_IF_ERROR(WriteWorkerStateLocked(writer, i));
       }
       for (int i = 0; i < worker_thread_states_.size(); ++i) {
         TF_RETURN_IF_ERROR(WriteWorkerThreadStateLocked(writer, i));
       }
-      TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kInterleaveSize),
+      TF_RETURN_IF_ERROR(writer->WriteScalar(prefix(), kInterleaveSize,
                                              interleave_indices_.size()));
       for (int i = 0; i < interleave_indices_.size(); ++i) {
         TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(kInterleaveIndices, "_", i)),
+            prefix(), strings::StrCat(kInterleaveIndices, "_", i),
             interleave_indices_[i]));
       }
-      TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kStagingSize),
-                                             staging_indices_.size()));
+      TF_RETURN_IF_ERROR(
+          writer->WriteScalar(prefix(), kStagingSize, staging_indices_.size()));
       for (int i = 0; i < staging_indices_.size(); ++i) {
         TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(kStagingIndices, "_", i)),
+            prefix(), strings::StrCat(kStagingIndices, "_", i),
             staging_indices_[i]));
       }
       if (!worker_threads_.empty()) {
         TF_RETURN_IF_ERROR(
-            writer->WriteScalar(full_name(kWorkerThreadsRunning), ""));
+            writer->WriteScalar(prefix(), kWorkerThreadsRunning, ""));
       }
       return Status::OK();
     }
 
     Status RestoreInternal(IteratorContext* ctx,
                            IteratorStateReader* reader) override {
-      // The order of locking is important here to avoid deadlock.
+      {
+        // The order of locking is important here to avoid deadlock.
+        mutex_lock l(mu_);
+        mutex_lock ckpt_l(ckpt_mu_);
+        if (!reader->Contains(prefix(), kInputExhausted)) {
+          TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
+        } else {
+          input_impl_.reset();
+        }
+        int64 temp;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(prefix(), kNextIndex, &temp));
+        next_index_ = size_t(temp);
+        TF_RETURN_IF_ERROR(reader->ReadScalar(prefix(), kBlockCount, &temp));
+        block_count_ = size_t(temp);
+
+        // Restore WorkerStates.
+        TF_RETURN_IF_ERROR(reader->ReadScalar(prefix(), kWorkersSize, &temp));
+        if (temp != dataset()->num_threads()) {
+          return errors::Internal("Expected ", dataset()->num_threads(),
+                                  " worker states but found ", temp, ".");
+        }
+        for (size_t i = 0; i < dataset()->num_threads(); ++i) {
+          TF_RETURN_IF_ERROR(ReadWorkerStateLocked(reader, i, ctx));
+        }
+      }
+      std::unique_ptr<thread::ThreadPool> threadpool = ctx->CreateThreadPool(
+          "read_worker_thread_state", dataset()->num_threads());
+      Status s = Status::OK();
+      BlockingCounter counter(dataset()->num_threads());
+      for (size_t i = 0; i < dataset()->num_threads(); ++i) {
+        threadpool->Schedule([this, i, ctx, reader, &s, &counter] {
+          WorkerThreadState state;
+          Status result = ReadWorkerThreadStateLocked(reader, i, ctx, &state);
+          mutex_lock l(mu_);
+          mutex_lock ckpt_l(ckpt_mu_);
+          if (!result.ok()) {
+            s.Update(result);
+            counter.DecrementCount();
+            return;
+          }
+          worker_thread_states_[i] = std::move(state);
+          counter.DecrementCount();
+        });
+      }
+      counter.Wait();
+      if (!s.ok()) {
+        return s;
+      }
+
       mutex_lock l(mu_);
       mutex_lock ckpt_l(ckpt_mu_);
-      if (!reader->Contains(full_name(kInputExhausted))) {
-        TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
-      } else {
-        input_impl_.reset();
-      }
-      int64 temp;
-      TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kNextIndex), &temp));
-      next_index_ = size_t(temp);
-      TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kBlockCount), &temp));
-      block_count_ = size_t(temp);
-
-      // Restore WorkerStates.
-      TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kWorkersSize), &temp));
-      if (temp != dataset()->num_threads()) {
-        return errors::Internal("Expected ", dataset()->num_threads(),
-                                " worker states but found ", temp, ".");
-      }
-      for (size_t i = 0; i < dataset()->num_threads(); ++i) {
-        TF_RETURN_IF_ERROR(ReadWorkerStateLocked(reader, i, ctx));
-      }
-      for (size_t i = 0; i < dataset()->num_threads(); ++i) {
-        TF_RETURN_IF_ERROR(ReadWorkerThreadStateLocked(reader, i, ctx));
-      }
-
       // Restore `interleave_indices_`.
       std::set<int64> all_indices;
       {
         int64 interleave_size;
         TF_RETURN_IF_ERROR(
-            reader->ReadScalar(full_name(kInterleaveSize), &interleave_size));
+            reader->ReadScalar(prefix(), kInterleaveSize, &interleave_size));
         interleave_indices_.reserve(interleave_size);
         for (int64 i = 0; i < interleave_size; ++i) {
           int64 temp;
           TF_RETURN_IF_ERROR(reader->ReadScalar(
-              full_name(strings::StrCat(kInterleaveIndices, "_", i)), &temp));
+              prefix(), strings::StrCat(kInterleaveIndices, "_", i), &temp));
           if (temp >= 0 && all_indices.find(temp) != all_indices.end()) {
             return errors::Internal(
                 "Duplicate entry for ", temp,
@@ -453,11 +478,11 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       {
         int64 staging_size;
         TF_RETURN_IF_ERROR(
-            reader->ReadScalar(full_name(kStagingSize), &staging_size));
+            reader->ReadScalar(prefix(), kStagingSize, &staging_size));
         for (int i = 0; i < staging_size; ++i) {
           int64 temp;
           TF_RETURN_IF_ERROR(reader->ReadScalar(
-              full_name(strings::StrCat(kStagingIndices, "_", i)), &temp));
+              prefix(), strings::StrCat(kStagingIndices, "_", i), &temp));
           if (all_indices.find(temp) != all_indices.end()) {
             return errors::Internal(
                 "Duplicate entry for ", temp,
@@ -471,7 +496,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       }
 
       // Start Worker threads.
-      if (reader->Contains(full_name(kWorkerThreadsRunning))) {
+      if (reader->Contains(prefix(), kWorkerThreadsRunning)) {
         worker_threads_.reserve(dataset()->num_threads());
         for (size_t i = 0; i < dataset()->num_threads(); ++i) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
@@ -806,26 +831,25 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
 
     Status WriteWorkerStateLocked(IteratorStateWriter* writer, int index)
         EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
-      string prefix = strings::StrCat(kWorker, "_", index);
-      TF_RETURN_IF_ERROR(writer->WriteScalar(
-          full_name(strings::StrCat(prefix, "_", kInputSize)),
-          workers_[index].input.size()));
+      string iterator_name =
+          strings::StrCat(prefix(), "::", kWorker, "_", index);
+      TF_RETURN_IF_ERROR(writer->WriteScalar(iterator_name, kInputSize,
+                                             workers_[index].input.size()));
       for (int i = 0; i < workers_[index].input.size(); ++i) {
-        TF_RETURN_IF_ERROR(writer->WriteTensor(
-            full_name(strings::StrCat(prefix, "_", kInput, "_", i)),
-            workers_[index].input[i]));
+        TF_RETURN_IF_ERROR(writer->WriteTensor(iterator_name,
+                                               strings::StrCat(kInput, "_", i),
+                                               workers_[index].input[i]));
       }
-      TF_RETURN_IF_ERROR(writer->WriteScalar(
-          full_name(strings::StrCat(prefix, "_", kOutputsSize)),
-          workers_[index].outputs.size()));
+      TF_RETURN_IF_ERROR(writer->WriteScalar(iterator_name, kOutputsSize,
+                                             workers_[index].outputs.size()));
       for (int i = 0; i < workers_[index].outputs.size(); ++i) {
         TF_RETURN_IF_ERROR(WriteOutputElemLocked(
-            writer, workers_[index].outputs[i],
-            strings::StrCat(prefix, "_", kOutputs, "_", i)));
+            writer, workers_[index].outputs[i], iterator_name,
+            strings::StrCat(kOutputs, "_", i)));
       }
       if (workers_[index].is_producing) {
-        TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(prefix, "_", kIsProducing)), ""));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(iterator_name, kIsProducing, ""));
       }
       return Status::OK();
     }
@@ -833,31 +857,29 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
     Status ReadWorkerStateLocked(IteratorStateReader* reader, int index,
                                  IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
-      string worker_prefix = strings::StrCat(kWorker, "_", index);
+      string worker_prefix =
+          strings::StrCat(prefix(), "::", kWorker, "_", index);
       // Restore inputs.
       int64 input_size;
-      TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(worker_prefix, "_", kInputSize)),
-          &input_size));
+      TF_RETURN_IF_ERROR(
+          reader->ReadScalar(worker_prefix, kInputSize, &input_size));
       workers_[index].input.reserve(input_size);
       for (int i = 0; i < input_size; ++i) {
         workers_[index].input.emplace_back();
-        TF_RETURN_IF_ERROR(reader->ReadTensor(
-            full_name(strings::StrCat(worker_prefix, "_", kInput, "_", i)),
-            &workers_[index].input.back()));
+        TF_RETURN_IF_ERROR(reader->ReadTensor(worker_prefix,
+                                              strings::StrCat(kInput, "_", i),
+                                              &workers_[index].input.back()));
       }
       int64 outputs_size;
-      TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(worker_prefix, "_", kOutputsSize)),
-          &outputs_size));
+      TF_RETURN_IF_ERROR(
+          reader->ReadScalar(worker_prefix, kOutputsSize, &outputs_size));
       for (int i = 0; i < outputs_size; ++i) {
         workers_[index].outputs.emplace_back(Status::OK());
         TF_RETURN_IF_ERROR(ReadOutputElemLocked(
-            reader, &workers_[index].outputs.back(),
-            strings::StrCat(worker_prefix, "_", kOutputs, "_", i)));
+            reader, &workers_[index].outputs.back(), worker_prefix,
+            strings::StrCat(kOutputs, "_", i)));
       }
-      if (reader->Contains(
-              full_name(strings::StrCat(worker_prefix, "_", kIsProducing)))) {
+      if (reader->Contains(worker_prefix, kIsProducing)) {
         workers_[index].is_producing = true;
       } else {
         workers_[index].is_producing = false;
@@ -867,139 +889,144 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
 
     Status WriteWorkerThreadStateLocked(IteratorStateWriter* writer, int index)
         EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
-      string prefix = strings::StrCat(kWorkerThread, "_", index);
+      string iterator_name =
+          strings::StrCat(prefix(), "::", kWorkerThread, "_", index);
       if (worker_thread_states_[index].iterator != nullptr) {
         TF_RETURN_IF_ERROR(
             SaveInput(writer, worker_thread_states_[index].iterator));
       } else {
-        TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(prefix, "_", kIteratorExhausted)), ""));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(iterator_name, kIteratorExhausted, ""));
       }
-      TF_RETURN_IF_ERROR(writer->WriteScalar(
-          full_name(strings::StrCat(prefix, "_", kInputSize)),
-          worker_thread_states_[index].input.size()));
+      TF_RETURN_IF_ERROR(
+          writer->WriteScalar(iterator_name, kInputSize,
+                              worker_thread_states_[index].input.size()));
       for (int i = 0; i < worker_thread_states_[index].input.size(); ++i) {
-        TF_RETURN_IF_ERROR(writer->WriteTensor(
-            full_name(strings::StrCat(prefix, "_", kInput, "_", i)),
-            worker_thread_states_[index].input[i]));
+        TF_RETURN_IF_ERROR(
+            writer->WriteTensor(iterator_name, strings::StrCat(kInput, "_", i),
+                                worker_thread_states_[index].input[i]));
       }
       TF_RETURN_IF_ERROR(WriteStatusLocked(
-          writer, strings::StrCat(prefix, "_", kIteratorCreationStatus),
+          writer, iterator_name, kIteratorCreationStatus,
           worker_thread_states_[index].iterator_creation_status));
       TF_RETURN_IF_ERROR(WriteOutputElemLocked(
-          writer, worker_thread_states_[index].output_elem,
-          strings::StrCat(prefix, "_", kOutput)));
+          writer, worker_thread_states_[index].output_elem, iterator_name,
+          kOutput));
       if (worker_thread_states_[index].end_of_sequence) {
-        TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(prefix, "_", kEndOfSequence)), ""));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(iterator_name, kEndOfSequence, ""));
       }
       return Status::OK();
     }
 
     Status ReadWorkerThreadStateLocked(IteratorStateReader* reader, int index,
-                                       IteratorContext* ctx)
-        EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
-      string worker_prefix = strings::StrCat(kWorkerThread, "_", index);
+                                       IteratorContext* ctx,
+                                       WorkerThreadState* state) {
+      string worker_prefix =
+          strings::StrCat(prefix(), "::", kWorkerThread, "_", index);
       // Restore inputs.
       int64 input_size;
-      TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(worker_prefix, "_", kInputSize)),
-          &input_size));
-      worker_thread_states_[index].input.reserve(input_size);
+      TF_RETURN_IF_ERROR(
+          reader->ReadScalar(worker_prefix, kInputSize, &input_size));
+      state->input.reserve(input_size);
       for (int i = 0; i < input_size; ++i) {
-        worker_thread_states_[index].input.emplace_back();
-        TF_RETURN_IF_ERROR(reader->ReadTensor(
-            full_name(strings::StrCat(worker_prefix, "_", kInput, "_", i)),
-            &worker_thread_states_[index].input.back()));
+        state->input.emplace_back();
+        TF_RETURN_IF_ERROR(reader->ReadTensor(worker_prefix,
+                                              strings::StrCat(kInput, "_", i),
+                                              &state->input.back()));
       }
-      // Restore iterator.
-      if (reader->Contains(full_name(
-              strings::StrCat(worker_prefix, "_", kIteratorExhausted)))) {
-        worker_thread_states_[index].iterator.reset();
+      // Restore iterator
+      if (reader->Contains(worker_prefix, kIteratorExhausted)) {
+        state->iterator.reset();
       } else {
         std::unique_ptr<IteratorBase> iterator;
-        Status s = MakeIteratorFromInputElement(
-            ctx, worker_thread_states_[index].input, index,
-            *instantiated_captured_func_, prefix(), &iterator);
+        Status s = MakeIteratorFromInputElement(ctx, state->input, index,
+                                                *instantiated_captured_func_,
+                                                prefix(), &iterator);
         TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, iterator));
-        worker_thread_states_[index].iterator.swap(iterator);
+        state->iterator.swap(iterator);
       }
-      TF_RETURN_IF_ERROR(ReadStatusLocked(
-          reader, strings::StrCat(worker_prefix, "_", kIteratorCreationStatus),
-          &worker_thread_states_[index].iterator_creation_status));
-      TF_RETURN_IF_ERROR(ReadOutputElemLocked(
-          reader, &worker_thread_states_[index].output_elem,
-          strings::StrCat(worker_prefix, "_", kOutput)));
-      if (reader->Contains(
-              full_name(strings::StrCat(worker_prefix, "_", kEndOfSequence)))) {
-        worker_thread_states_[index].end_of_sequence = true;
+      TF_RETURN_IF_ERROR(ReadStatusLocked(reader, worker_prefix,
+                                          kIteratorCreationStatus,
+                                          &state->iterator_creation_status));
+      TF_RETURN_IF_ERROR(ReadOutputElemLocked(reader, &state->output_elem,
+                                              worker_prefix, kOutput));
+      if (reader->Contains(worker_prefix, kEndOfSequence)) {
+        state->end_of_sequence = true;
       } else {
-        worker_thread_states_[index].end_of_sequence = false;
+        state->end_of_sequence = false;
       }
       return Status::OK();
     }
 
     Status WriteOutputElemLocked(IteratorStateWriter* writer,
                                  const OutputElem& output_elem,
+                                 const string& iterator_name,
                                  const string& prefix)
         EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
       TF_RETURN_IF_ERROR(WriteStatusLocked(
-          writer, strings::StrCat(prefix, "_", kStatus), output_elem.status));
+          writer, iterator_name, strings::StrCat(prefix, "_", kStatus),
+          output_elem.status));
       TF_RETURN_IF_ERROR(writer->WriteScalar(
-          full_name(strings::StrCat(prefix, "_", kOutputSize)),
+          iterator_name, strings::StrCat(prefix, "_", kOutputSize),
           output_elem.output.size()));
       for (int i = 0; i < output_elem.output.size(); ++i) {
         TF_RETURN_IF_ERROR(writer->WriteTensor(
-            full_name(strings::StrCat(prefix, "_", kOutput, "_", i)),
+            iterator_name, strings::StrCat(prefix, "_", kOutput, "_", i),
             output_elem.output[i]));
       }
       return Status::OK();
     }
 
     Status ReadOutputElemLocked(IteratorStateReader* reader,
-                                OutputElem* output_elem, const string& prefix)
-        EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
-      TF_RETURN_IF_ERROR(ReadStatusLocked(
-          reader, strings::StrCat(prefix, "_", kStatus), &output_elem->status));
+                                OutputElem* output_elem,
+                                const string& iterator_name,
+                                const string& prefix) {
+      TF_RETURN_IF_ERROR(ReadStatusLocked(reader, iterator_name,
+                                          strings::StrCat(prefix, "_", kStatus),
+                                          &output_elem->status));
       int64 output_size;
       TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(prefix, "_", kOutputSize)), &output_size));
+          iterator_name, strings::StrCat(prefix, "_", kOutputSize),
+          &output_size));
       output_elem->output.reserve(output_size);
       for (int i = 0; i < output_size; ++i) {
         output_elem->output.emplace_back();
         TF_RETURN_IF_ERROR(reader->ReadTensor(
-            full_name(strings::StrCat(prefix, "_", kOutput, "_", i)),
+            iterator_name, strings::StrCat(prefix, "_", kOutput, "_", i),
             &output_elem->output.back()));
       }
       return Status::OK();
     }
 
-    Status WriteStatusLocked(IteratorStateWriter* writer, const string& prefix,
+    Status WriteStatusLocked(IteratorStateWriter* writer,
+                             const string& iterator_name, const string& prefix,
                              const Status& status)
         EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
-      TF_RETURN_IF_ERROR(
-          writer->WriteScalar(full_name(strings::StrCat(prefix, "_", kCode)),
-                              static_cast<int64>(status.code())));
+      TF_RETURN_IF_ERROR(writer->WriteScalar(
+          iterator_name, strings::StrCat(prefix, "_", kCode),
+          static_cast<int64>(status.code())));
       if (!status.ok()) {
         TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(prefix, "_", KMessage)),
+            iterator_name, strings::StrCat(prefix, "_", KMessage),
             status.error_message()));
       }
       return Status::OK();
     }
 
-    Status ReadStatusLocked(IteratorStateReader* reader, const string& prefix,
-                            Status* status)
-        EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) {
+    Status ReadStatusLocked(IteratorStateReader* reader,
+                            const string& iterator_name, const string& prefix,
+                            Status* status) {
       int64 code_int;
       TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(prefix, "_", kCode)), &code_int));
+          iterator_name, strings::StrCat(prefix, "_", kCode), &code_int));
       error::Code code = static_cast<error::Code>(code_int);
 
       if (code != error::Code::OK) {
         tstring error_message;
         TF_RETURN_IF_ERROR(reader->ReadScalar(
-            full_name(strings::StrCat(prefix, "_", KMessage)), &error_message));
+            iterator_name, strings::StrCat(prefix, "_", KMessage),
+            &error_message));
         *status = Status(code, error_message);
       } else {
         *status = Status::OK();
diff --git a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
index a35e58d3423..1b2d1f54895 100644
--- a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
@@ -71,6 +71,11 @@ const int64 kSnappyReaderOutputBufferSizeBytes = 16 << 20;  // 16 MiB
 
 const size_t kHeaderSize = sizeof(uint64);
 
+constexpr char kModeAuto[] = "auto";
+constexpr char kModeWrite[] = "write";
+constexpr char kModeRead[] = "read";
+constexpr char kModePassthrough[] = "passthrough";
+
 constexpr char kSnapshotFilename[] = "snapshot.metadata";
 constexpr char kSnapshotReaderWorkerPool[] = "snapshot_reader_worker_pool";
 constexpr char kSnapshotWriterWorkerPool[] = "snapshot_writer_worker_pool";
@@ -293,7 +298,6 @@ Status ReadMetadataFile(const string& hash_dir,
 
 Status DumpDatasetGraph(const std::string& path, uint64 hash,
                         const GraphDef& graph) {
-  std::unique_ptr<WritableFile> file;
   std::string hash_hex =
       strings::StrCat(strings::Hex(hash, strings::kZeroPad16));
   std::string graph_file =
@@ -304,10 +308,29 @@ Status DumpDatasetGraph(const std::string& path, uint64 hash,
   return WriteTextProto(Env::Default(), graph_file, graph);
 }
 
-Status DetermineOpState(const Status& file_status,
+Status DetermineOpState(const std::string& mode_string,
+                        const Status& file_status,
                         const experimental::SnapshotMetadataRecord& metadata,
                         const uint64 pending_snapshot_expiry_seconds,
                         SnapshotMode* mode) {
+  if (mode_string == kModeRead) {
+    LOG(INFO) << "Overriding mode to reader.";
+    *mode = READER;
+    return Status::OK();
+  }
+
+  if (mode_string == kModeWrite) {
+    LOG(INFO) << "Overriding mode to writer.";
+    *mode = WRITER;
+    return Status::OK();
+  }
+
+  if (mode_string == kModePassthrough) {
+    LOG(INFO) << "Overriding mode to passthrough.";
+    *mode = PASSTHROUGH;
+    return Status::OK();
+  }
+
   if (errors::IsNotFound(file_status)) {
     *mode = WRITER;
     return Status::OK();
@@ -365,6 +388,16 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("seed", &seed_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("seed2", &seed2_));
 
+    mode_ = kModeAuto;
+    if (ctx->HasAttr("mode")) {
+      OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_));
+    }
+
+    snapshot_name_ = "";
+    if (ctx->HasAttr("snapshot_name")) {
+      OP_REQUIRES_OK(ctx, ctx->GetAttr("snapshot_name", &snapshot_name_));
+    }
+
     if (shard_size_bytes_ == -1) shard_size_bytes_ = kDefaultShardSizeBytes;
 
     // Default to 1 day expiry for snapshots.
@@ -389,6 +422,13 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
         ctx, pending_snapshot_expiry_seconds_ >= 1,
         errors::InvalidArgument(
             "pending_snapshot_expiry_seconds must be at least 1 second."));
+
+    OP_REQUIRES(ctx,
+                mode_ == kModeAuto || mode_ == kModeRead ||
+                    mode_ == kModeWrite || mode_ == kModePassthrough,
+                errors::InvalidArgument("mode must be either '", kModeAuto,
+                                        "', '", kModeRead, "', '", kModeWrite,
+                                        "', or '", kModePassthrough, "'."));
   }
 
  protected:
@@ -417,15 +457,16 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
                    << dump_status.ToString();
     }
 
-    LOG(INFO) << "Graph def serialized to hash: " << hash;
+    std::string graph_hash =
+        strings::StrCat(strings::Hex(hash, strings::kZeroPad16));
+    LOG(INFO) << "Graph def serialized to hash: " << graph_hash;
 
-    *output = new Dataset(
-        ctx, input, path,
-        strings::StrCat(strings::Hex(hash, strings::kZeroPad16)),
-        reader_path_prefix_, writer_path_prefix_, compression_,
-        shard_size_bytes_, pending_snapshot_expiry_seconds_,
-        num_reader_threads_, reader_buffer_size_, num_writer_threads_,
-        writer_buffer_size_, shuffle_on_read_, seed_, seed2_);
+    *output = new Dataset(ctx, input, path, graph_hash, reader_path_prefix_,
+                          writer_path_prefix_, compression_, shard_size_bytes_,
+                          pending_snapshot_expiry_seconds_, num_reader_threads_,
+                          reader_buffer_size_, num_writer_threads_,
+                          writer_buffer_size_, shuffle_on_read_, seed_, seed2_,
+                          mode_, snapshot_name_);
   }
 
  private:
@@ -438,7 +479,8 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
             const uint64 pending_snapshot_expiry_seconds,
             const uint64 num_reader_threads, const uint64 reader_buffer_size,
             const uint64 num_writer_threads, const uint64 writer_buffer_size,
-            const bool shuffle_on_read, const uint64 seed, const uint64 seed2)
+            const bool shuffle_on_read, const uint64 seed, const uint64 seed2,
+            const std::string& mode, const std::string& snapshot_name)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           dir_(path),
@@ -454,7 +496,9 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
           writer_buffer_size_(writer_buffer_size),
           shuffle_on_read_(shuffle_on_read),
           seed_(seed),
-          seed2_(seed2) {
+          seed2_(seed2),
+          mode_(mode),
+          snapshot_name_(snapshot_name) {
       input_->Ref();
     }
 
@@ -529,6 +573,12 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
       AttrValue seed2_attr;
       b->BuildAttrValue<int64>(seed2_, &seed2_attr);
 
+      AttrValue mode_attr;
+      b->BuildAttrValue(mode_, &mode_attr);
+
+      AttrValue snapshot_name_attr;
+      b->BuildAttrValue(snapshot_name_, &snapshot_name_attr);
+
       TF_RETURN_IF_ERROR(b->AddDataset(
           this,
           /*inputs=*/
@@ -548,7 +598,9 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
            {"writer_buffer_size", writer_buffer_size_attr},
            {"shuffle_on_read", shuffle_on_read_attr},
            {"seed", seed_attr},
-           {"seed2", seed2_attr}},
+           {"seed2", seed2_attr},
+           {"mode", mode_attr},
+           {"snapshot_name", snapshot_name_attr}},
           output));
       return Status::OK();
     }
@@ -558,7 +610,13 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params) {
-        hash_dir_ = io::JoinPath(dataset()->dir_, dataset()->graph_hash_);
+        if (dataset()->snapshot_name_.empty()) {
+          hash_dir_ = io::JoinPath(dataset()->dir_, dataset()->graph_hash_);
+        } else {
+          hash_dir_ = io::JoinPath(
+              dataset()->dir_,
+              strings::StrCat("custom-", dataset()->snapshot_name_));
+        }
       }
 
       // We have a somewhat non traditional pattern for iterator initialization
@@ -581,8 +639,8 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
           experimental::SnapshotMetadataRecord metadata;
           Status s = ReadMetadataFile(hash_dir_, &metadata);
           TF_RETURN_IF_ERROR(DetermineOpState(
-              s, metadata, dataset()->pending_snapshot_expiry_seconds_,
-              &state_));
+              dataset()->mode_, s, metadata,
+              dataset()->pending_snapshot_expiry_seconds_, &state_));
           TF_RETURN_IF_ERROR(InitializeIterator(ctx, metadata));
         }
         return iterator_->GetNext(ctx, out_tensors, end_of_sequence);
@@ -601,7 +659,7 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
-        string hash_dir;
+        tstring hash_dir;
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kHashDir), &hash_dir));
         if (hash_dir != hash_dir_) {
           LOG(ERROR) << "Dataset has changed while restoring from the "
@@ -626,18 +684,32 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
           IteratorContext* ctx,
           const experimental::SnapshotMetadataRecord& metadata)
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        std::string run_id = "";
+        if (!dataset()->snapshot_name_.empty()) {
+          // We have overridden the snapshot with a custom name, so we don't
+          // generate random run ids, but just use the same one.
+          run_id = "custom";
+        }
+
         switch (state_) {
           case WRITER:
             iterator_ = absl::make_unique<SnapshotWriterIterator>(
                 SnapshotWriterIterator::Params{
                     dataset(), absl::StrCat(prefix(), "WriterImpl")},
-                hash_dir_);
+                hash_dir_, run_id);
             break;
           case READER:
+            if (run_id.empty() && metadata.run_id().empty()) {
+              return errors::NotFound(
+                  "Could not find a valid snapshot to read.");
+            }
+            if (run_id.empty()) {
+              run_id = metadata.run_id();
+            }
             iterator_ = absl::make_unique<SnapshotReaderIterator>(
                 SnapshotReaderIterator::Params{
                     dataset(), absl::StrCat(prefix(), "ReaderImpl")},
-                hash_dir_, metadata);
+                hash_dir_, run_id);
             break;
           case PASSTHROUGH:
             iterator_ = absl::make_unique<SnapshotPassthroughIterator>(
@@ -653,12 +725,12 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
        public:
         static constexpr const char* const kParse = "Parse";
 
-        explicit SnapshotReaderIterator(
-            const Params& params, const string& hash_dir,
-            const experimental::SnapshotMetadataRecord& metadata)
+        explicit SnapshotReaderIterator(const Params& params,
+                                        const string& hash_dir,
+                                        const string& run_id)
             : DatasetIterator<Dataset>(params),
               hash_dir_(hash_dir),
-              metadata_(metadata) {}
+              run_id_(run_id) {}
 
         ~SnapshotReaderIterator() override {
           mutex_lock l(mu_);
@@ -673,14 +745,17 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
           mutex_lock l(mu_);
           thread_pool_ = ctx->CreateThreadPool(kSnapshotReaderWorkerPool,
                                                dataset()->num_reader_threads_);
-          run_id_ = metadata_.run_id();
           run_dir_ = io::JoinPath(hash_dir_, run_id_);
           // Get all the files in the run_dir.
+          std::vector<std::string> filenames_str;
           TF_RETURN_IF_ERROR(ctx->env()->GetMatchingPaths(
-              absl::StrCat(run_dir_, "/*"), &filenames_));
+              absl::StrCat(absl::string_view(run_dir_), "/*"), &filenames_str));
+          filenames_.resize(filenames_str.size());
+          std::copy(filenames_str.begin(), filenames_str.end(),
+                    filenames_.begin());
           if (filenames_.empty()) {
-            return errors::InvalidArgument("Could not find any files in dir: ",
-                                           run_dir_);
+            return errors::NotFound("Could not find any files in dir: ",
+                                    run_dir_);
           }
 
           if (dataset()->shuffle_on_read_) {
@@ -814,7 +889,7 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
         Status RestoreInternal(IteratorContext* ctx,
                                IteratorStateReader* reader) override {
           mutex_lock l(mu_);
-          string hash_dir, run_id, run_dir;
+          tstring hash_dir, run_id, run_dir;
           TF_RETURN_IF_ERROR(
               reader->ReadScalar(full_name(kHashDir), &hash_dir));
           TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kHashDir), &run_id));
@@ -898,7 +973,7 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
               }
             }
 #if !defined(PLATFORM_GOOGLE)
-            string record_bytes;
+            tstring record_bytes;
             Status s = reader->ReadRecord(&record_bytes);
 #else
             absl::Cord record_cord;
@@ -1040,9 +1115,9 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
 
         const string hash_dir_;
         const experimental::SnapshotMetadataRecord metadata_;
-        string run_id_ GUARDED_BY(mu_);
-        string run_dir_ GUARDED_BY(mu_);
-        std::vector<std::string> filenames_;
+        tstring run_id_ GUARDED_BY(mu_);
+        tstring run_dir_ GUARDED_BY(mu_);
+        std::vector<tstring> filenames_;
 
         uint64 elements_produced_ GUARDED_BY(mu_) = 0;
         int64 time_spent_micros_ GUARDED_BY(mu_) = 0;
@@ -1058,7 +1133,7 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
         bool background_threads_finished_ GUARDED_BY(mu_) = false;
         int64 num_elements_read_ GUARDED_BY(mu_) = 0;
         // curr_filenames_ tracks which file is being read by each thread.
-        std::vector<std::string> curr_filenames_ GUARDED_BY(mu_);
+        std::vector<tstring> curr_filenames_ GUARDED_BY(mu_);
       };
 
       class SnapshotWriterIterator : public DatasetIterator<Dataset> {
@@ -1067,8 +1142,11 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
             "ProcessOneElement";
 
         explicit SnapshotWriterIterator(const Params& params,
-                                        const string& hash_dir)
-            : DatasetIterator<Dataset>(params), hash_dir_(hash_dir) {}
+                                        const string& hash_dir,
+                                        const string& run_id)
+            : DatasetIterator<Dataset>(params),
+              hash_dir_(hash_dir),
+              run_id_(run_id) {}
 
         ~SnapshotWriterIterator() override {
           mutex_lock l(mu_);
@@ -1083,8 +1161,10 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
           mutex_lock l(mu_);
           thread_pool_ = ctx->CreateThreadPool(kSnapshotWriterWorkerPool,
                                                dataset()->num_writer_threads_);
-          run_id_ = strings::StrCat(
-              strings::Hex(random::New64(), strings::kZeroPad4));
+          if (run_id_.empty()) {
+            run_id_ = strings::StrCat(
+                strings::Hex(random::New64(), strings::kZeroPad4));
+          }
           run_dir_ =
               io::JoinPath(dataset()->writer_path_prefix_, hash_dir_, run_id_);
           return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_);
@@ -1110,7 +1190,7 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
                 experimental::SnapshotMetadataRecord metadata;
                 metadata.set_creation_timestamp(EnvTime::NowMicros());
                 metadata.set_graph_hash(dataset()->graph_hash_);
-                metadata.set_run_id(run_id_);
+                metadata.set_run_id(run_id_.data(), run_id_.size());
                 metadata.set_finalized(false);
                 TF_RETURN_IF_ERROR(WriteMetadataFile(hash_dir_, metadata));
               }
@@ -1245,7 +1325,7 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
           mutex_lock l(mu_);
           buffer_.clear();
           TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
-          string hash_dir;
+          tstring hash_dir;
           TF_RETURN_IF_ERROR(
               reader->ReadScalar(full_name(kHashDir), &hash_dir));
           // If the hash dir has changed then we restart writing.
@@ -1541,8 +1621,8 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
         std::unique_ptr<IteratorBase> input_impl_;
 
         const string hash_dir_;
-        string run_id_ GUARDED_BY(mu_);
-        string run_dir_ GUARDED_BY(mu_);
+        tstring run_id_ GUARDED_BY(mu_);
+        tstring run_dir_ GUARDED_BY(mu_);
         bool is_restored_ GUARDED_BY(mu_) = false;
 
         uint64 elements_produced_ GUARDED_BY(mu_) = 0;
@@ -1615,6 +1695,9 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
 
     const uint64 seed_;
     const uint64 seed2_;
+
+    const std::string mode_;
+    const std::string snapshot_name_;
   };
 
   const int graph_def_version_;
@@ -1635,6 +1718,9 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
 
   int64 seed_;
   int64 seed2_;
+
+  std::string mode_;
+  std::string snapshot_name_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("SnapshotDataset").Device(DEVICE_CPU),
diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc
index e57a18540a4..7a5d7e4f7b6 100644
--- a/tensorflow/core/kernels/data/generator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/generator_dataset_op.cc
@@ -143,11 +143,10 @@ class GeneratorDatasetOp::Dataset : public DatasetBase {
         s = Status::OK();
         *end_of_sequence = true;
 
-        // NOTE(mrry): We ignore any tensors returned by the
-        // finalize function.
+        // NOTE(mrry): We ignore any tensors returned by the finalize function.
         std::vector<Tensor> ignored;
-        TF_RETURN_IF_ERROR(
-            instantiated_finalize_func_->RunInstantiated(state_, &ignored));
+        TF_RETURN_IF_ERROR(instantiated_finalize_func_->RunWithBorrowedArgs(
+            ctx, state_, &ignored));
         finalized_ = true;
       }
       return s;
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 88fed8e6d78..2c1dceb8f4e 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -45,6 +45,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/refcount.h"
 #include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
@@ -62,6 +63,9 @@ const char kOutputTypes[] = "output_types";
 
 }  // namespace
 
+/* static */ constexpr const char* const
+    SerializeIteratorOp::kExternalStatePolicy;
+
 Status IteratorResource::GetNext(OpKernelContext* ctx,
                                  std::vector<Tensor>* out_tensors,
                                  bool* end_of_sequence) {
@@ -294,10 +298,10 @@ class IteratorVariantSerializer {
 
   // Calls `Save` on the iterator_resource to build up the list of
   // IteratorStateVariant objects.
-  Status InitializeFromIterator(IteratorResource* iterator_resource) {
-    SerializationContext serialization_ctx({});
+  Status InitializeFromIterator(SerializationContext* serialization_ctx,
+                                IteratorResource* iterator_resource) {
     VariantTensorDataWriter writer;
-    TF_RETURN_IF_ERROR(iterator_resource->Save(&serialization_ctx, &writer));
+    TF_RETURN_IF_ERROR(iterator_resource->Save(serialization_ctx, &writer));
     std::vector<std::unique_ptr<VariantTensorData>> data;
     writer.ReleaseData(&data);
     variants_.clear();
@@ -492,24 +496,31 @@ Status AnonymousIteratorHandleOp::CreateResource(
   return Status::OK();
 }
 
-void MakeIteratorOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
+HybridAsyncOpKernel::HybridAsyncOpKernel(OpKernelConstruction* ctx,
+                                         const char* background_worker_name)
+    : AsyncOpKernel(ctx),
+      background_worker_(ctx->env(), background_worker_name) {}
+
+void HybridAsyncOpKernel::ComputeAsync(OpKernelContext* ctx,
+                                       DoneCallback done) {
+  background_worker_.Schedule([this, ctx, done = std::move(done)]() {
+    ctx->SetStatus(DoCompute(ctx));
+    done();
+  });
+}
+
+void HybridAsyncOpKernel::Compute(OpKernelContext* ctx) {
+  ctx->SetStatus(DoCompute(ctx));
+}
+
+Status MakeIteratorOp::DoCompute(OpKernelContext* ctx) {
   DatasetBase* dataset;
-  OP_REQUIRES_OK_ASYNC(
-      ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset), done);
+  TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(ctx->input(0), &dataset));
   IteratorResource* iterator_resource;
-  OP_REQUIRES_OK_ASYNC(
-      ctx, LookupResource(ctx, HandleFromInput(ctx, 1), &iterator_resource),
-      done);
-  background_worker_.Schedule(std::bind(
-      [ctx, iterator_resource, dataset](DoneCallback done) {
-        Status s = iterator_resource->SetIteratorFromDataset(ctx, dataset);
-        iterator_resource->Unref();
-        if (!s.ok()) {
-          ctx->SetStatus(s);
-        }
-        done();
-      },
-      std::move(done)));
+  TF_RETURN_IF_ERROR(
+      LookupResource(ctx, HandleFromInput(ctx, 1), &iterator_resource));
+  core::ScopedUnref unref_iterator(iterator_resource);
+  return iterator_resource->SetIteratorFromDataset(ctx, dataset);
 }
 
 void DeleteIteratorOp::Compute(OpKernelContext* ctx) {
@@ -522,24 +533,13 @@ void DeleteIteratorOp::Compute(OpKernelContext* ctx) {
 
 namespace {
 
-class ToSingleElementOp : public AsyncOpKernel {
+class ToSingleElementOp : public HybridAsyncOpKernel {
  public:
   explicit ToSingleElementOp(OpKernelConstruction* ctx)
-      : AsyncOpKernel(ctx),
-        background_worker_(ctx->env(), "tf_data_to_single_element") {}
+      : HybridAsyncOpKernel(ctx, "tf_data_to_single_element") {}
 
-  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
-    // The call to `iterator->GetNext()` may block and depend on an
-    // inter-op thread pool thread, so we issue the call from the
-    // owned thread pool.
-    background_worker_.Schedule([this, ctx, done = std::move(done)]() {
-      OP_REQUIRES_OK_ASYNC(ctx, DoCompute(ctx), done);
-      done();
-    });
-  }
-
- private:
-  Status DoCompute(OpKernelContext* ctx) {
+ protected:
+  Status DoCompute(OpKernelContext* ctx) override {
     DatasetBase* dataset;
     TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(ctx->input(0), &dataset));
 
@@ -579,15 +579,12 @@ class ToSingleElementOp : public AsyncOpKernel {
     }
     return Status::OK();
   }
-
-  BackgroundWorker background_worker_;
 };
 
-class ReduceDatasetOp : public AsyncOpKernel {
+class ReduceDatasetOp : public HybridAsyncOpKernel {
  public:
   explicit ReduceDatasetOp(OpKernelConstruction* ctx)
-      : AsyncOpKernel(ctx),
-        background_worker_(ctx->env(), "tf_data_reduce_dataset") {
+      : HybridAsyncOpKernel(ctx, "tf_data_reduce_dataset") {
     FunctionMetadata::Params params;
     OP_REQUIRES_OK(ctx, ctx->GetAttr("use_inter_op_parallelism",
                                      &params.use_inter_op_parallelism));
@@ -598,18 +595,8 @@ class ReduceDatasetOp : public AsyncOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr(kOutputShapes, &output_shapes_));
   }
 
-  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
-    // The call to `iterator->GetNext()` may block and depend on an
-    // inter-op thread pool thread, so we issue the call from the
-    // owned thread pool.
-    background_worker_.Schedule([this, ctx, done = std::move(done)]() {
-      OP_REQUIRES_OK_ASYNC(ctx, DoCompute(ctx), done);
-      done();
-    });
-  }
-
- private:
-  Status DoCompute(OpKernelContext* ctx) {
+ protected:
+  Status DoCompute(OpKernelContext* ctx) override {
     DatasetBase* dataset;
     TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(ctx->input(0), &dataset));
     OpInputList inputs;
@@ -706,7 +693,6 @@ class ReduceDatasetOp : public AsyncOpKernel {
   std::shared_ptr<FunctionMetadata> func_metadata_ = nullptr;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
-  BackgroundWorker background_worker_;
 };
 
 class OneShotIteratorOp : public AsyncOpKernel {
@@ -895,104 +881,61 @@ class OneShotIteratorOp : public AsyncOpKernel {
 
 }  // namespace
 
-void IteratorGetNextOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
-  IteratorResource* iterator;
-  OP_REQUIRES_OK_ASYNC(
-      ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator), done);
-  // The call to `iterator->GetNext()` may block and depend on an
-  // inter-op thread pool thread, so we issue the call from the
-  // owned thread pool.
-  background_worker_.Schedule(std::bind(
-      [ctx, iterator](DoneCallback done) {
-        std::vector<Tensor> components;
-        bool end_of_sequence = false;
-
-        Status s = iterator->GetNext(ctx, &components, &end_of_sequence);
-        // NOTE(mrry): We must unref the iterator before calling `done()`, to
-        // avoid destruction races.
-        iterator->Unref();
-
-        if (!s.ok()) {
-          ctx->SetStatus(s);
-        } else if (end_of_sequence) {
-          ctx->SetStatus(errors::OutOfRange("End of sequence"));
-        } else {
-          for (int i = 0; i < components.size(); ++i) {
-            // TODO(mrry): Check that the shapes match the shape attrs.
-            ctx->set_output(i, components[i]);
-          }
-        }
-        done();
-      },
-      std::move(done)));
+AsyncOpKernel* IteratorGetNextOp::AsAsync() {
+  return type_string() == "IteratorGetNextSync" ? nullptr : this;
+}
+const AsyncOpKernel* IteratorGetNextOp::AsAsync() const {
+  return type_string() == "IteratorGetNextSync" ? nullptr : this;
 }
 
-void IteratorGetNextSyncOp::Compute(OpKernelContext* ctx) {
+Status IteratorGetNextOp::DoCompute(OpKernelContext* ctx) {
   IteratorResource* iterator;
-  OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator));
+  TF_RETURN_IF_ERROR(LookupResource(ctx, HandleFromInput(ctx, 0), &iterator));
   core::ScopedUnref unref_iterator(iterator);
   std::vector<Tensor> components;
   bool end_of_sequence = false;
 
-  OP_REQUIRES_OK(ctx, iterator->GetNext(ctx, &components, &end_of_sequence));
-  OP_REQUIRES(ctx, !end_of_sequence, errors::OutOfRange("End of sequence"));
-
+  TF_RETURN_IF_ERROR(iterator->GetNext(ctx, &components, &end_of_sequence));
+  if (end_of_sequence) {
+    return errors::OutOfRange("End of sequence");
+  }
   for (int i = 0; i < components.size(); ++i) {
     // TODO(mrry): Check that the shapes match the shape attrs.
     ctx->set_output(i, components[i]);
   }
+  return Status::OK();
 }
 
-void IteratorGetNextAsOptionalOp::ComputeAsync(OpKernelContext* ctx,
-                                               DoneCallback done) {
+Status IteratorGetNextAsOptionalOp::DoCompute(OpKernelContext* ctx) {
   IteratorResource* iterator;
-  OP_REQUIRES_OK_ASYNC(
-      ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator), done);
-  // The call to `iterator->GetNext()` may block and depend on an
-  // inter-op thread pool thread, so we issue the call from the
-  // owned thread pool.
-  background_worker_.Schedule(std::bind(
-      [this, ctx, iterator](DoneCallback done) {
-        std::vector<Tensor> components;
-        bool end_of_sequence = false;
+  TF_RETURN_IF_ERROR(LookupResource(ctx, HandleFromInput(ctx, 0), &iterator));
+  core::ScopedUnref unref_iterator(iterator);
+  std::vector<Tensor> components;
+  bool end_of_sequence = false;
 
-        Status s = iterator->GetNext(ctx, &components, &end_of_sequence);
-        // NOTE(mrry): We must unref the iterator before calling `done()`, to
-        // avoid destruction races.
-        iterator->Unref();
+  TF_RETURN_IF_ERROR(iterator->GetNext(ctx, &components, &end_of_sequence));
 
-        if (!s.ok()) {
-          ctx->SetStatus(s);
-        } else if (end_of_sequence) {
-          OP_REQUIRES_OK_ASYNC(ctx, WriteOptionalNoneToOutput(ctx, 0), done);
-        } else {
-          for (int i = 0; i < components.size(); ++i) {
-            OP_REQUIRES_ASYNC(
-                ctx, components[i].dtype() == output_types_[i],
-                errors::InvalidArgument(
-                    "The given optional does not match the expected type for "
-                    "component ",
-                    i, ". Expected: ", DataTypeString(output_types_[i]),
-                    ". Actual: ", DataTypeString(components[i].dtype()), "."),
-                done);
-            OP_REQUIRES_ASYNC(
-                ctx, output_shapes_[i].IsCompatibleWith(components[i].shape()),
-                errors::InvalidArgument(
-                    "The given optional does not match the expected shape "
-                    "for component ",
-                    i, ". Expected: ", output_shapes_[i].DebugString(),
-                    ". Actual: ", components[i].shape().DebugString(), "."),
-                done);
-          }
-
-          OP_REQUIRES_OK_ASYNC(
-              ctx,
-              WriteOptionalWithValueToOutput(ctx, 0, std::move(components)),
-              done);
-        }
-        done();
-      },
-      std::move(done)));
+  if (end_of_sequence) {
+    return WriteOptionalNoneToOutput(ctx, 0);
+  } else {
+    for (int i = 0; i < components.size(); ++i) {
+      if (components[i].dtype() != output_types_[i]) {
+        return errors::InvalidArgument(
+            "The given optional does not match the expected type for "
+            "component ",
+            i, ". Expected: ", DataTypeString(output_types_[i]),
+            ". Actual: ", DataTypeString(components[i].dtype()), ".");
+      }
+      if (!output_shapes_[i].IsCompatibleWith(components[i].shape())) {
+        return errors::InvalidArgument(
+            "The given optional does not match the expected shape "
+            "for component ",
+            i, ". Expected: ", output_shapes_[i].DebugString(),
+            ". Actual: ", components[i].shape().DebugString(), ".");
+      }
+    }
+    return WriteOptionalWithValueToOutput(ctx, 0, std::move(components));
+  }
 }
 
 void IteratorToStringHandleOp::Compute(OpKernelContext* ctx) {
@@ -1066,52 +1009,55 @@ void IteratorFromStringHandleOp::Compute(OpKernelContext* ctx) {
   resource_handle_t->scalar<ResourceHandle>()() = resource_handle;
 }
 
-namespace {
-
-class SerializeIteratorOp : public OpKernel {
- public:
-  explicit SerializeIteratorOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& resource_handle_t = ctx->input(0);
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(resource_handle_t.shape()),
-                errors::InvalidArgument("resource_handle must be a scalar"));
-
-    // Validate that the handle corresponds to a real resource, and
-    // that it is an IteratorResource.
-    IteratorResource* iterator_resource;
-    OP_REQUIRES_OK(
-        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
-    core::ScopedUnref unref_iterator(iterator_resource);
-    IteratorVariantSerializer serializer;
-    OP_REQUIRES_OK(ctx, serializer.InitializeFromIterator(iterator_resource));
-    Tensor* serialized_t;
-    OP_REQUIRES_OK(
-        ctx, ctx->allocate_output(0, TensorShape({serializer.NumTensors()}),
-                                  &serialized_t));
-    OP_REQUIRES_OK(ctx, serializer.Serialize(serialized_t));
-  }
-};
-
-class DeserializeIteratorOp : public OpKernel {
- public:
-  explicit DeserializeIteratorOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    // Validate that the handle corresponds to a real resource, and
-    // that it is an IteratorResource.
-    IteratorResource* iterator_resource;
-    OP_REQUIRES_OK(
-        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
-    core::ScopedUnref unref_iterator(iterator_resource);
-    const Tensor* serialized_t;
-    OP_REQUIRES_OK(ctx, ctx->input("serialized", &serialized_t));
-    IteratorVariantSerializer serializer;
-    OP_REQUIRES_OK(ctx, serializer.InitFromTensor(serialized_t));
+SerializeIteratorOp::SerializeIteratorOp(OpKernelConstruction* ctx)
+    : OpKernel(ctx) {
+  if (ctx->HasAttr(kExternalStatePolicy)) {
+    int64 state_change_option;
     OP_REQUIRES_OK(ctx,
-                   iterator_resource->Restore(ctx, serializer.GetReader()));
+                   ctx->GetAttr(kExternalStatePolicy, &state_change_option));
+    external_state_policy_ =
+        SerializationContext::ExternalStatePolicy(state_change_option);
   }
-};
+}
+
+void SerializeIteratorOp::Compute(OpKernelContext* ctx) {
+  const Tensor& resource_handle_t = ctx->input(0);
+  OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(resource_handle_t.shape()),
+              errors::InvalidArgument("resource_handle must be a scalar"));
+  // Validate that the handle corresponds to a real resource, and
+  // that it is an IteratorResource.
+  IteratorResource* iterator_resource;
+  OP_REQUIRES_OK(
+      ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
+  core::ScopedUnref unref_iterator(iterator_resource);
+  IteratorVariantSerializer serializer;
+  SerializationContext::Params params;
+  params.external_state_policy = external_state_policy_;
+  SerializationContext serialization_ctx(params);
+  OP_REQUIRES_OK(ctx, serializer.InitializeFromIterator(&serialization_ctx,
+                                                        iterator_resource));
+  Tensor* serialized_t;
+  OP_REQUIRES_OK(ctx,
+                 ctx->allocate_output(0, TensorShape({serializer.NumTensors()}),
+                                      &serialized_t));
+  OP_REQUIRES_OK(ctx, serializer.Serialize(serialized_t));
+}
+
+void DeserializeIteratorOp::Compute(OpKernelContext* ctx) {
+  // Validate that the handle corresponds to a real resource, and
+  // that it is an IteratorResource.
+  IteratorResource* iterator_resource;
+  OP_REQUIRES_OK(
+      ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
+  core::ScopedUnref unref_iterator(iterator_resource);
+  const Tensor* serialized_t;
+  OP_REQUIRES_OK(ctx, ctx->input("serialized", &serialized_t));
+  IteratorVariantSerializer serializer;
+  OP_REQUIRES_OK(ctx, serializer.InitFromTensor(serialized_t));
+  OP_REQUIRES_OK(ctx, iterator_resource->Restore(ctx, serializer.GetReader()));
+}
+
+namespace {
 
 REGISTER_KERNEL_BUILDER(Name("Iterator").Device(DEVICE_CPU), IteratorHandleOp);
 REGISTER_KERNEL_BUILDER(Name("IteratorV2").Device(DEVICE_CPU).Priority(2),
@@ -1154,10 +1100,10 @@ REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE_GPU).Priority(1),
                         IteratorGetNextOp);
 REGISTER_KERNEL_BUILDER(
     Name("IteratorGetNextSync").Device(DEVICE_CPU).Priority(2),
-    IteratorGetNextSyncOp);
+    IteratorGetNextOp);
 REGISTER_KERNEL_BUILDER(
     Name("IteratorGetNextSync").Device(DEVICE_GPU).Priority(1),
-    IteratorGetNextSyncOp);
+    IteratorGetNextOp);
 REGISTER_KERNEL_BUILDER(
     Name("IteratorGetNextAsOptional").Device(DEVICE_CPU).Priority(2),
     IteratorGetNextAsOptionalOp);
diff --git a/tensorflow/core/kernels/data/iterator_ops.h b/tensorflow/core/kernels/data/iterator_ops.h
index f45fdaf0f19..dd80ead1f86 100644
--- a/tensorflow/core/kernels/data/iterator_ops.h
+++ b/tensorflow/core/kernels/data/iterator_ops.h
@@ -74,9 +74,9 @@ class IteratorResource : public ResourceBase {
           std::shared_ptr<ProcessFunctionLibraryRuntime> pflr,
           FunctionLibraryRuntime* flr,
           std::unique_ptr<DatasetBaseIterator> iterator)
-        : flib_def(flib_def),
+        : flib_def(std::move(flib_def)),
           flr(flr),
-          pflr(pflr),
+          pflr(std::move(pflr)),
           function_handle_cache(absl::make_unique<FunctionHandleCache>(flr)),
           iterator(std::move(iterator)) {}
 
@@ -174,28 +174,55 @@ class AnonymousIteratorHandleOp : public AnonymousResourceOp<IteratorResource> {
   const int graph_def_version_;
 };
 
-class MakeIteratorOp : public AsyncOpKernel {
+// A hybrid asynchronous-and-synchronous OpKernel with efficient support for
+// both modes.
+//
+// Inherit from this class when the application logic of the kernel (i) is
+// implemented synchronously, (ii) must run on a background thread when the
+// kernel executes in the inter-op threadpool (typically because it depends on
+// inter-op threadpool threads, e.g. for function execution), and (iii) can run
+// synchronously on the calling thread when the caller donates a thread
+// (typically in eager execution). The implementation avoids a thread-hop in
+// case (iii).
+//
+// NOTE: Unlike typical OpKernel subclasses, the application logic is
+// implemented in a method (DoCompute()) that returns Status. Use
+// TF_RETURN_IF_ERROR for error-related control flow rather than
+// OP_REQUIRES_OK().
+class HybridAsyncOpKernel : public AsyncOpKernel {
  public:
-  explicit MakeIteratorOp(OpKernelConstruction* ctx)
-      : AsyncOpKernel(ctx),
-        background_worker_(ctx->env(), "tf_data_make_iterator") {}
+  HybridAsyncOpKernel(OpKernelConstruction* ctx,
+                      const char* background_worker_name);
 
-  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override;
+  void Compute(OpKernelContext* ctx) final;
+  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) final;
+
+ protected:
+  virtual Status DoCompute(OpKernelContext* ctx) = 0;
 
  private:
   BackgroundWorker background_worker_;
 };
 
-class IteratorGetNextOp : public AsyncOpKernel {
+class MakeIteratorOp : public HybridAsyncOpKernel {
+ public:
+  explicit MakeIteratorOp(OpKernelConstruction* ctx)
+      : HybridAsyncOpKernel(ctx, "tf_data_make_iterator") {}
+
+ protected:
+  Status DoCompute(OpKernelContext* ctx) override;
+};
+
+class IteratorGetNextOp : public HybridAsyncOpKernel {
  public:
   explicit IteratorGetNextOp(OpKernelConstruction* ctx)
-      : AsyncOpKernel(ctx),
-        background_worker_(ctx->env(), "tf_data_iterator_get_next") {}
+      : HybridAsyncOpKernel(ctx, "tf_data_iterator_get_next") {}
 
-  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override;
+  AsyncOpKernel* AsAsync() override;
+  const AsyncOpKernel* AsAsync() const override;
 
- private:
-  BackgroundWorker background_worker_;
+ protected:
+  Status DoCompute(OpKernelContext* ctx) override;
 };
 
 class DeleteIteratorOp : public OpKernel {
@@ -205,31 +232,22 @@ class DeleteIteratorOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override;
 };
 
-class IteratorGetNextAsOptionalOp : public AsyncOpKernel {
+class IteratorGetNextAsOptionalOp : public HybridAsyncOpKernel {
  public:
   explicit IteratorGetNextAsOptionalOp(OpKernelConstruction* ctx)
-      : AsyncOpKernel(ctx),
-        background_worker_(ctx->env(),
-                           "tf_data_iterator_get_next_as_optional") {
+      : HybridAsyncOpKernel(ctx, "tf_data_iterator_get_next_as_optional") {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
   }
 
-  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override;
+ protected:
+  Status DoCompute(OpKernelContext* ctx) override;
 
  private:
-  BackgroundWorker background_worker_;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
 };
 
-class IteratorGetNextSyncOp : public OpKernel {
- public:
-  explicit IteratorGetNextSyncOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override;
-};
-
 class IteratorToStringHandleOp : public OpKernel {
  public:
   explicit IteratorToStringHandleOp(OpKernelConstruction* ctx)
@@ -249,6 +267,27 @@ class IteratorFromStringHandleOp : public OpKernel {
   std::vector<PartialTensorShape> output_shapes_;
 };
 
+class SerializeIteratorOp : public OpKernel {
+ public:
+  static constexpr const char* const kExternalStatePolicy =
+      "external_state_policy";
+
+  explicit SerializeIteratorOp(OpKernelConstruction* ctx);
+
+  void Compute(OpKernelContext* ctx) override;
+
+ private:
+  SerializationContext::ExternalStatePolicy external_state_policy_ =
+      SerializationContext::ExternalStatePolicy::kWarn;
+};
+
+class DeserializeIteratorOp : public OpKernel {
+ public:
+  explicit DeserializeIteratorOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override;
+};
+
 }  // namespace data
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 33de1303c28..6c09e30ce21 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/blocking_counter.h"
 #include "tensorflow/core/platform/cpu_info.h"
 
 namespace tensorflow {
@@ -332,13 +333,13 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       VLOG(4) << "State before save:\n" << DebugString();
       TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
       TF_RETURN_IF_ERROR(
-          writer->WriteScalar(full_name(kBlockIndex), block_index_));
+          writer->WriteScalar(prefix(), kBlockIndex, block_index_));
       TF_RETURN_IF_ERROR(
-          writer->WriteScalar(full_name(kCycleIndex), cycle_index_));
+          writer->WriteScalar(prefix(), kCycleIndex, cycle_index_));
       if (end_of_input_) {
-        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kEndOfInput), ""));
+        TF_RETURN_IF_ERROR(writer->WriteScalar(prefix(), kEndOfInput, ""));
       }
-      TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kElementIdCounter),
+      TF_RETURN_IF_ERROR(writer->WriteScalar(prefix(), kElementIdCounter,
                                              element_id_counter_));
       TF_RETURN_IF_ERROR(WriteCurrentElements(writer));
       TF_RETURN_IF_ERROR(WriteFutureElements(writer));
@@ -350,19 +351,22 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
 
     Status RestoreInternal(IteratorContext* ctx,
                            IteratorStateReader* reader) override {
-      mutex_lock l(*mu_);
-      DCHECK(!threads_initialized_);
-      DCHECK(!initial_elements_created_);
-      TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
-      TF_RETURN_IF_ERROR(
-          reader->ReadScalar(full_name(kBlockIndex), &block_index_));
-      TF_RETURN_IF_ERROR(
-          reader->ReadScalar(full_name(kCycleIndex), &cycle_index_));
-      TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kElementIdCounter),
-                                            &element_id_counter_));
-      end_of_input_ = reader->Contains(full_name(kEndOfInput));
+      {
+        mutex_lock l(*mu_);
+        DCHECK(!threads_initialized_);
+        DCHECK(!initial_elements_created_);
+        TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(prefix(), kBlockIndex, &block_index_));
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(prefix(), kCycleIndex, &cycle_index_));
+        TF_RETURN_IF_ERROR(reader->ReadScalar(prefix(), kElementIdCounter,
+                                              &element_id_counter_));
+        end_of_input_ = reader->Contains(prefix(), kEndOfInput);
+      }
       TF_RETURN_IF_ERROR(ReadCurrentElements(ctx, reader));
       TF_RETURN_IF_ERROR(ReadFutureElements(ctx, reader));
+      mutex_lock l(*mu_);
       initial_elements_created_ = false;
       for (int i = 0; i < current_elements_.size(); ++i) {
         int index = (cycle_index_ + i) % current_elements_.size();
@@ -972,30 +976,30 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
     }
 
     Status WriteStatusLocked(IteratorStateWriter* writer,
-                             const string& key_prefix, size_t idx,
+                             const string& iterator_name, size_t idx,
                              const Status& status)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       TF_RETURN_IF_ERROR(writer->WriteScalar(
-          CodeKey(key_prefix, idx), static_cast<int64>(status.code())));
+          iterator_name, CodeKey(idx), static_cast<int64>(status.code())));
       if (!status.ok()) {
-        TF_RETURN_IF_ERROR(writer->WriteScalar(ErrorMessageKey(key_prefix, idx),
-                                               status.error_message()));
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            iterator_name, ErrorMessageKey(idx), status.error_message()));
       }
       return Status::OK();
     }
 
     Status ReadStatusLocked(IteratorStateReader* reader,
-                            const string& key_prefix, size_t idx,
+                            const string& iterator_name, size_t idx,
                             Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       int64 code_int;
       TF_RETURN_IF_ERROR(
-          reader->ReadScalar(CodeKey(key_prefix, idx), &code_int));
+          reader->ReadScalar(iterator_name, CodeKey(idx), &code_int));
       error::Code code = static_cast<error::Code>(code_int);
 
       if (code != error::Code::OK) {
         tstring error_message;
-        TF_RETURN_IF_ERROR(reader->ReadScalar(ErrorMessageKey(key_prefix, idx),
-                                              &error_message));
+        TF_RETURN_IF_ERROR(reader->ReadScalar(
+            iterator_name, ErrorMessageKey(idx), &error_message));
         *status = Status(code, error_message);
       } else {
         *status = Status::OK();
@@ -1003,65 +1007,58 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       return Status::OK();
     }
 
-    string CodeKey(const string& key_prefix, size_t idx) {
-      return full_name(strings::StrCat(key_prefix, kResultsSuffix, "[", idx,
-                                       "]", kCodeSuffix));
+    string CodeKey(size_t idx) {
+      return absl::StrCat(kResultsSuffix, "[", idx, "]", kCodeSuffix);
     }
 
-    string ErrorMessageKey(const string& key_prefix, size_t idx) {
-      return full_name(strings::StrCat(key_prefix, kResultsSuffix, "[", idx,
-                                       "]", kErrorMessageSuffix));
+    string ErrorMessageKey(size_t idx) {
+      return absl::StrCat(kResultsSuffix, "[", idx, "]", kErrorMessageSuffix);
     }
 
     Status WriteElement(std::shared_ptr<Element> element, int idx,
                         const string& key_prefix, IteratorStateWriter* writer)
         EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+      const auto& iterator_name =
+          absl::StrCat(prefix(), "::", key_prefix, "::", idx);
       if (element->iterator) {
         TF_RETURN_IF_ERROR(SaveInput(writer, element->iterator));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(iterator_name, kIdSuffix, element->id));
         TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(key_prefix, "[", idx, "]", kIdSuffix)),
-            element->id));
-        TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(key_prefix, "[", idx, "]", kInputsSuffix,
-                                      kSizeSuffix)),
+            iterator_name, absl::StrCat(kInputsSuffix, kSizeSuffix),
             element->inputs->size()));
         for (int i = 0; i < element->inputs->size(); i++) {
           TF_RETURN_IF_ERROR(writer->WriteTensor(
-              full_name(strings::StrCat(key_prefix, "[", idx, "]",
-                                        kInputsSuffix, "[", i, "]")),
+              iterator_name, absl::StrCat(kInputsSuffix, "[", i, "]"),
               element->inputs->at(i)));
         }
       }
       TF_RETURN_IF_ERROR(writer->WriteScalar(
-          full_name(strings::StrCat(key_prefix, "[", idx, "]", kResultsSuffix,
-                                    kSizeSuffix)),
+          iterator_name, absl::StrCat(kResultsSuffix, kSizeSuffix),
           element->results.size()));
       for (size_t i = 0; i < element->results.size(); i++) {
         std::shared_ptr<Result> result = element->results[i];
-        TF_RETURN_IF_ERROR(WriteStatusLocked(
-            writer, strings::StrCat(key_prefix, "[", idx, "]"), i,
-            result->status));
+        TF_RETURN_IF_ERROR(
+            WriteStatusLocked(writer, iterator_name, i, result->status));
         TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(key_prefix, "[", idx, "]", kResultsSuffix,
-                                      "[", i, "]", kSizeSuffix)),
+            iterator_name,
+            absl::StrCat(kResultsSuffix, "[", i, "]", kSizeSuffix),
             result->return_values.size()));
         for (size_t j = 0; j < result->return_values.size(); j++) {
           TF_RETURN_IF_ERROR(writer->WriteTensor(
-              full_name(strings::StrCat(key_prefix, "[", idx, "]",
-                                        kResultsSuffix, "[", i, "][", j, "]")),
+              iterator_name, absl::StrCat(kResultsSuffix, "[", i, "][", j, "]"),
               result->return_values[j]));
         }
         TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name(strings::StrCat(key_prefix, "[", idx, "]", kResultsSuffix,
-                                      "[", i, "]", kIsReadySuffix)),
-            ""));
+            iterator_name,
+            absl::StrCat(kResultsSuffix, "[", i, "]", kIsReadySuffix), ""));
       }
       return Status::OK();
     }
 
     Status WriteCurrentElements(IteratorStateWriter* writer)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kCurrentElementsSize),
+      TF_RETURN_IF_ERROR(writer->WriteScalar(prefix(), kCurrentElementsSize,
                                              current_elements_.size()));
       for (int idx = 0; idx < current_elements_.size(); idx++) {
         if (current_elements_[idx]) {
@@ -1074,7 +1071,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
 
     Status WriteFutureElements(IteratorStateWriter* writer)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kFutureElementsSize),
+      TF_RETURN_IF_ERROR(writer->WriteScalar(prefix(), kFutureElementsSize,
                                              future_elements_.size()));
       for (int idx = 0; idx < future_elements_.size(); idx++) {
         if (future_elements_[idx]) {
@@ -1087,95 +1084,141 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
 
     Status ReadElement(IteratorContext* ctx, IteratorStateReader* reader,
                        int idx, const string& key_prefix,
-                       std::shared_ptr<Element>* out)
-        EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      if (!reader->Contains(full_name(strings::StrCat(
-              key_prefix, "[", idx, "]", kResultsSuffix, kSizeSuffix)))) {
-        return Status::OK();
-      }
+                       std::shared_ptr<Element>* out) {
+      std::unique_ptr<IteratorBase> iterator;
       auto element = std::make_shared<Element>();
-      int64 results_size;
-      TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(key_prefix, "[", idx, "]", kResultsSuffix,
-                                    kSizeSuffix)),
-          &results_size));
-      element->results.resize(results_size);
-      for (size_t i = 0; i < results_size; i++) {
-        auto result = std::make_shared<Result>();
-        TF_RETURN_IF_ERROR(
-            ReadStatusLocked(reader, strings::StrCat(key_prefix, "[", idx, "]"),
-                             i, &result->status));
-        int64 num_return_values;
-        TF_RETURN_IF_ERROR(reader->ReadScalar(
-            full_name(strings::StrCat(key_prefix, "[", idx, "]", kResultsSuffix,
-                                      "[", i, "]", kSizeSuffix)),
-            &num_return_values));
-        result->return_values.reserve(num_return_values);
-        for (size_t j = 0; j < num_return_values; j++) {
-          result->return_values.emplace_back();
-          TF_RETURN_IF_ERROR(reader->ReadTensor(
-              full_name(strings::StrCat(key_prefix, "[", idx, "]",
-                                        kResultsSuffix, "[", i, "][", j, "]")),
-              &result->return_values.back()));
+      {
+        mutex_lock l(*mu_);
+        const auto& iterator_name =
+            absl::StrCat(prefix(), "::", key_prefix, "::", idx);
+        if (!reader->Contains(iterator_name,
+                              absl::StrCat(kResultsSuffix, kSizeSuffix))) {
+          return Status::OK();
         }
-        element->results[i] = std::move(result);
+        int64 results_size;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(
+            iterator_name, absl::StrCat(kResultsSuffix, kSizeSuffix),
+            &results_size));
+        element->results.resize(results_size);
+        for (size_t i = 0; i < results_size; i++) {
+          auto result = std::make_shared<Result>();
+          TF_RETURN_IF_ERROR(
+              ReadStatusLocked(reader, iterator_name, i, &result->status));
+          int64 num_return_values;
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              iterator_name,
+              absl::StrCat(kResultsSuffix, "[", i, "]", kSizeSuffix),
+              &num_return_values));
+          result->return_values.reserve(num_return_values);
+          for (size_t j = 0; j < num_return_values; j++) {
+            result->return_values.emplace_back();
+            TF_RETURN_IF_ERROR(reader->ReadTensor(
+                iterator_name,
+                absl::StrCat(kResultsSuffix, "[", i, "][", j, "]"),
+                &result->return_values.back()));
+          }
+          element->results[i] = std::move(result);
+        }
+        if (!reader->Contains(iterator_name,
+                              absl::StrCat(kInputsSuffix, kSizeSuffix))) {
+          element->iterator.reset();
+          *out = std::move(element);
+          return Status::OK();
+        }
+        int64 inputs_size;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(
+            iterator_name, absl::StrCat(kInputsSuffix, kSizeSuffix),
+            &inputs_size));
+        element->inputs = std::make_unique<std::vector<Tensor>>(inputs_size);
+        for (int i = 0; i < inputs_size; i++) {
+          TF_RETURN_IF_ERROR(reader->ReadTensor(
+              iterator_name, absl::StrCat(kInputsSuffix, "[", i, "]"),
+              &element->inputs->at(i)));
+        }
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(iterator_name, kIdSuffix, &element->id));
+        TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
+            ctx, *element->inputs, element->id,
+            *instantiated_captured_func_.get(), prefix(), &iterator));
       }
-      if (!reader->Contains(full_name(strings::StrCat(
-              key_prefix, "[", idx, "]", kInputsSuffix, kSizeSuffix)))) {
-        element->iterator.reset();
-        *out = std::move(element);
-        return Status::OK();
-      }
-      int64 inputs_size;
-      TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(key_prefix, "[", idx, "]", kInputsSuffix,
-                                    kSizeSuffix)),
-          &inputs_size));
-      element->inputs = std::make_unique<std::vector<Tensor>>(inputs_size);
-      for (int i = 0; i < inputs_size; i++) {
-        TF_RETURN_IF_ERROR(reader->ReadTensor(
-            full_name(strings::StrCat(key_prefix, "[", idx, "]", kInputsSuffix,
-                                      "[", i, "]")),
-            &element->inputs->at(i)));
-      }
-      TF_RETURN_IF_ERROR(reader->ReadScalar(
-          full_name(strings::StrCat(key_prefix, "[", idx, "]", kIdSuffix)),
-          &element->id));
-      TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
-          ctx, *element->inputs, element->id,
-          *instantiated_captured_func_.get(), prefix(), &element->iterator));
-      TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, element->iterator));
+      TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, iterator));
+      mutex_lock l(*mu_);
+      element->iterator = std::move(iterator);
       *out = std::move(element);
       return Status::OK();
     }
 
     Status ReadCurrentElements(IteratorContext* ctx,
-                               IteratorStateReader* reader)
-        EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                               IteratorStateReader* reader) {
       int64 size;
+      {
+        mutex_lock l(*mu_);
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(prefix(), kCurrentElementsSize, &size));
+        DCHECK_EQ(current_elements_.size(), size);
+      }
+      if (size == 0) {
+        return Status::OK();
+      }
+      std::vector<std::shared_ptr<Element>> elements;
       TF_RETURN_IF_ERROR(
-          reader->ReadScalar(full_name(kCurrentElementsSize), &size));
-      DCHECK_EQ(current_elements_.size(), size);
-      for (int idx = 0; idx < current_elements_.size(); idx++) {
-        TF_RETURN_IF_ERROR(ReadElement(ctx, reader, idx, kCurrentElements,
-                                       &current_elements_[idx]));
+          ReadElementsParallel(ctx, reader, size, kCurrentElements, &elements));
+      mutex_lock l(*mu_);
+      for (int idx = 0; idx < size; ++idx) {
+        current_elements_[idx] = std::move(elements[idx]);
       }
       return Status::OK();
     }
 
-    Status ReadFutureElements(IteratorContext* ctx, IteratorStateReader* reader)
-        EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    Status ReadFutureElements(IteratorContext* ctx,
+                              IteratorStateReader* reader) {
       int64 size;
+      {
+        mutex_lock l(*mu_);
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(prefix(), kFutureElementsSize, &size));
+        future_elements_.resize(size);
+      }
+      if (size == 0) {
+        return Status::OK();
+      }
+      std::vector<std::shared_ptr<Element>> elements;
       TF_RETURN_IF_ERROR(
-          reader->ReadScalar(full_name(kFutureElementsSize), &size));
-      future_elements_.resize(size);
-      for (int idx = 0; idx < future_elements_.size(); idx++) {
-        TF_RETURN_IF_ERROR(ReadElement(ctx, reader, idx, kFutureElements,
-                                       &future_elements_[idx]));
+          ReadElementsParallel(ctx, reader, size, kFutureElements, &elements));
+      mutex_lock l(*mu_);
+      for (int idx = 0; idx < size; ++idx) {
+        future_elements_[idx] = std::move(elements[idx]);
       }
       return Status::OK();
     }
 
+    Status ReadElementsParallel(
+        IteratorContext* ctx, IteratorStateReader* reader, int64 size,
+        const string& name, std::vector<std::shared_ptr<Element>>* elements) {
+      elements->resize(size);
+      std::unique_ptr<thread::ThreadPool> threadpool =
+          ctx->CreateThreadPool(absl::StrCat("read_", name), size);
+      Status s = Status::OK();
+      BlockingCounter counter(size);
+      for (int idx = 0; idx < size; ++idx) {
+        threadpool->Schedule(
+            [this, ctx, reader, idx, name, &s, &counter, elements] {
+              std::shared_ptr<Element> elem;
+              Status ret_status = ReadElement(ctx, reader, idx, name, &elem);
+              mutex_lock l(*mu_);
+              if (!ret_status.ok()) {
+                s.Update(ret_status);
+                counter.DecrementCount();
+                return;
+              }
+              (*elements)[idx] = elem;
+              counter.DecrementCount();
+            });
+      }
+      counter.Wait();
+      return s;
+    }
+
     std::string DebugString() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       std::string result;
       result.append(strings::StrCat("Cycle index: ", cycle_index_, "\n"));
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 3f5f98af96a..9c5b3ca7013 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -228,18 +228,18 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
       mutex_lock l(*mu_);
       TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
       TF_RETURN_IF_ERROR(
-          writer->WriteScalar(full_name(kBufferSize), buffer_.size()));
+          writer->WriteScalar(prefix(), kBufferSize, buffer_.size()));
       for (size_t i = 0; i < buffer_.size(); i++) {
         auto& buffer_element = buffer_[i];
         TF_RETURN_IF_ERROR(WriteStatus(writer, i, buffer_element.status));
         if (buffer_element.status.ok()) {
           TF_RETURN_IF_ERROR(writer->WriteScalar(
-              full_name(strings::StrCat(kBuffer, "[", i, "]", kSizeSuffix)),
-              buffer_element.value.size()));
+              absl::StrCat(prefix(), "::", i),
+              absl::StrCat(kBuffer, kSizeSuffix), buffer_element.value.size()));
           for (size_t j = 0; j < buffer_element.value.size(); j++) {
             TF_RETURN_IF_ERROR(writer->WriteTensor(
-                full_name(strings::StrCat(kBuffer, "[", i, "][", j, "]")),
-                buffer_element.value[j]));
+                absl::StrCat(prefix(), "::", i),
+                absl::StrCat(kBuffer, "[", j, "]"), buffer_element.value[j]));
           }
         }
       }
@@ -255,7 +255,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
       size_t buffer_size;
       {
         int64 temp;
-        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kBufferSize), &temp));
+        TF_RETURN_IF_ERROR(reader->ReadScalar(prefix(), kBufferSize, &temp));
         buffer_size = static_cast<size_t>(temp);
       }
       for (size_t i = 0; i < buffer_size; i++) {
@@ -266,17 +266,18 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
           size_t value_size;
           {
             int64 temp;
-            TF_RETURN_IF_ERROR(reader->ReadScalar(
-                full_name(strings::StrCat(kBuffer, "[", i, "]", kSizeSuffix)),
-                &temp));
+            TF_RETURN_IF_ERROR(
+                reader->ReadScalar(absl::StrCat(prefix(), "::", i),
+                                   absl::StrCat(kBuffer, kSizeSuffix), &temp));
             value_size = static_cast<size_t>(temp);
           }
           buffer_element.value.reserve(value_size);
           for (size_t j = 0; j < value_size; j++) {
             buffer_element.value.emplace_back();
-            TF_RETURN_IF_ERROR(reader->ReadTensor(
-                full_name(strings::StrCat(kBuffer, "[", i, "][", j, "]")),
-                &buffer_element.value.back()));
+            TF_RETURN_IF_ERROR(
+                reader->ReadTensor(absl::StrCat(prefix(), "::", i),
+                                   absl::StrCat(kBuffer, "[", j, "]"),
+                                   &buffer_element.value.back()));
           }
         }
       }
@@ -435,11 +436,13 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
 
     Status WriteStatus(IteratorStateWriter* writer, size_t index,
                        const Status& status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
-      TF_RETURN_IF_ERROR(writer->WriteScalar(
-          CodeKey(index), static_cast<int64>(status.code())));
+      TF_RETURN_IF_ERROR(
+          writer->WriteScalar(absl::StrCat(prefix(), "::", index), CodeKey(),
+                              static_cast<int64>(status.code())));
       if (!status.ok()) {
-        TF_RETURN_IF_ERROR(writer->WriteScalar(ErrorMessageKey(index),
-                                               status.error_message()));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(absl::StrCat(prefix(), "::", index),
+                                ErrorMessageKey(), status.error_message()));
       }
       return Status::OK();
     }
@@ -447,13 +450,15 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status ReadStatus(IteratorStateReader* reader, size_t index, Status* status)
         EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
       int64 code_int;
-      TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
+      TF_RETURN_IF_ERROR(reader->ReadScalar(absl::StrCat(prefix(), "::", index),
+                                            CodeKey(), &code_int));
       error::Code code = static_cast<error::Code>(code_int);
 
       if (code != error::Code::OK) {
         tstring error_message;
         TF_RETURN_IF_ERROR(
-            reader->ReadScalar(ErrorMessageKey(index), &error_message));
+            reader->ReadScalar(absl::StrCat(prefix(), "::", index),
+                               ErrorMessageKey(), &error_message));
         *status = Status(code, error_message);
       } else {
         *status = Status::OK();
@@ -461,13 +466,10 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
       return Status::OK();
     }
 
-    string CodeKey(size_t index) {
-      return full_name(strings::StrCat(kStatus, "[", index, "]", kCodeSuffix));
-    }
+    string CodeKey() { return absl::StrCat(kStatus, kCodeSuffix); }
 
-    string ErrorMessageKey(size_t index) {
-      return full_name(
-          strings::StrCat(kStatus, "[", index, "]", kErrorMessageSuffix));
+    string ErrorMessageKey() {
+      return absl::StrCat(kStatus, kErrorMessageSuffix);
     }
 
     // This mutex is used to ensure exclusivity between multiple threads
diff --git a/tensorflow/core/kernels/data/range_dataset_op.cc b/tensorflow/core/kernels/data/range_dataset_op.cc
index 8e870859913..447bdf9a677 100644
--- a/tensorflow/core/kernels/data/range_dataset_op.cc
+++ b/tensorflow/core/kernels/data/range_dataset_op.cc
@@ -36,11 +36,13 @@ constexpr char kNext[] = "next";
 
 class RangeDatasetOp::Dataset : public DatasetBase {
  public:
-  Dataset(OpKernelContext* ctx, int64 start, int64 stop, int64 step)
+  Dataset(OpKernelContext* ctx, int64 start, int64 stop, int64 step,
+          DataTypeVector output_dtypes)
       : DatasetBase(DatasetContext(ctx)),
         start_(start),
         stop_(stop),
-        step_(step) {}
+        step_(step),
+        output_dtypes_(output_dtypes) {}
 
   std::unique_ptr<IteratorBase> MakeIteratorInternal(
       const string& prefix) const override {
@@ -49,8 +51,7 @@ class RangeDatasetOp::Dataset : public DatasetBase {
   }
 
   const DataTypeVector& output_dtypes() const override {
-    static DataTypeVector* dtypes = new DataTypeVector({DT_INT64});
-    return *dtypes;
+    return output_dtypes_;
   }
 
   const std::vector<PartialTensorShape>& output_shapes() const override {
@@ -106,7 +107,20 @@ class RangeDatasetOp::Dataset : public DatasetBase {
         return Status::OK();
       }
       out_tensors->reserve(1);
-      out_tensors->emplace_back(next_);
+      Tensor result(dataset()->output_dtypes()[0]);
+      switch (dataset()->output_dtypes()[0]) {
+#define HANDLE_TYPE(type)                                \
+  case DataTypeToEnum<type>::value: {                    \
+    out_tensors->emplace_back(static_cast<type>(next_)); \
+    break;                                               \
+  }
+        TF_CALL_NUMBER_TYPES(HANDLE_TYPE);
+#undef HANDLE_TYPE
+        default:
+          return errors::InvalidArgument(
+              "Unsupported data type: ",
+              DataTypeString(dataset()->output_dtypes()[0]));
+      }
       *end_of_sequence = false;
       next_ += dataset()->step_;
 
@@ -140,10 +154,13 @@ class RangeDatasetOp::Dataset : public DatasetBase {
   const int64 start_;
   const int64 stop_;
   const int64 step_;
+  const DataTypeVector output_dtypes_;
 };
 
 RangeDatasetOp::RangeDatasetOp(OpKernelConstruction* ctx)
-    : DatasetOpKernel(ctx) {}
+    : DatasetOpKernel(ctx) {
+  OP_REQUIRES_OK(ctx, ctx->GetAttr(kOutputTypes, &output_types_));
+}
 
 void RangeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase** output) {
   int64 start;
@@ -157,7 +174,7 @@ void RangeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase** output) {
   OP_REQUIRES(ctx, step != 0,
               errors::InvalidArgument("step must be a non-zero integer."));
 
-  *output = new Dataset(ctx, start, stop, step);
+  *output = new Dataset(ctx, start, stop, step, output_types_);
 }
 
 namespace {
diff --git a/tensorflow/core/kernels/data/range_dataset_op.h b/tensorflow/core/kernels/data/range_dataset_op.h
index c560a7babd3..077987b72e1 100644
--- a/tensorflow/core/kernels/data/range_dataset_op.h
+++ b/tensorflow/core/kernels/data/range_dataset_op.h
@@ -36,6 +36,7 @@ class RangeDatasetOp : public DatasetOpKernel {
 
  private:
   class Dataset;
+  DataTypeVector output_types_;
 };
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/range_dataset_op_test.cc b/tensorflow/core/kernels/data/range_dataset_op_test.cc
index bd620f38930..13a027ecdc6 100644
--- a/tensorflow/core/kernels/data/range_dataset_op_test.cc
+++ b/tensorflow/core/kernels/data/range_dataset_op_test.cc
@@ -34,6 +34,16 @@ RangeDatasetParams ZeroStepRangeDatasetParams() {
   return RangeDatasetParams(/*start=*/10, /*stop=*/0, /*step=*/0);
 }
 
+RangeDatasetParams RangeDatasetParams1() {
+  return RangeDatasetParams(/*start=*/0, /*stop=*/10, /*step=*/3,
+                            /*output_dtypes=*/{DT_INT32});
+}
+
+RangeDatasetParams RangeDatasetParams2() {
+  return RangeDatasetParams(/*start=*/0, /*stop=*/10, /*step=*/3,
+                            /*output_dtypes=*/{DT_INT64});
+}
+
 std::vector<GetNextTestCase<RangeDatasetParams>> GetNextTestCases() {
   return {{/*dataset_params=*/PositiveStepRangeDatasetParams(),
            /*expected_outputs=*/
@@ -59,12 +69,17 @@ TEST_F(RangeDatasetOpTest, DatasetTypeString) {
       CheckDatasetTypeString(name_utils::OpName(RangeDatasetOp::kDatasetType)));
 }
 
-TEST_F(RangeDatasetOpTest, DatasetOutputDtypes) {
-  auto range_dataset_params = PositiveStepRangeDatasetParams();
-  TF_ASSERT_OK(Initialize(range_dataset_params));
-  TF_ASSERT_OK(CheckDatasetOutputDtypes({DT_INT64}));
+std::vector<DatasetOutputDtypesTestCase<RangeDatasetParams>>
+DatasetOutputDtypesTestCases() {
+  return {{/*dataset_params=*/RangeDatasetParams1(),
+           /*expected_output_dtypes=*/{DT_INT32}},
+          {/*dataset_params=*/RangeDatasetParams2(),
+           /*expected_output_dtypes=*/{DT_INT64}}};
 }
 
+DATASET_OUTPUT_DTYPES_TEST_P(RangeDatasetOpTest, RangeDatasetParams,
+                             DatasetOutputDtypesTestCases())
+
 TEST_F(RangeDatasetOpTest, DatasetOutputShapes) {
   auto range_dataset_params = PositiveStepRangeDatasetParams();
   TF_ASSERT_OK(Initialize(range_dataset_params));
@@ -81,12 +96,17 @@ std::vector<CardinalityTestCase<RangeDatasetParams>> CardinalityTestCases() {
 DATASET_CARDINALITY_TEST_P(RangeDatasetOpTest, RangeDatasetParams,
                            CardinalityTestCases())
 
-TEST_F(RangeDatasetOpTest, IteratorOutputDtypes) {
-  auto range_dataset_params = PositiveStepRangeDatasetParams();
-  TF_ASSERT_OK(Initialize(range_dataset_params));
-  TF_ASSERT_OK(CheckIteratorOutputDtypes({DT_INT64}));
+std::vector<IteratorOutputDtypesTestCase<RangeDatasetParams>>
+IteratorOutputDtypesTestCases() {
+  return {{/*dataset_params=*/RangeDatasetParams1(),
+           /*expected_output_dtypes=*/{DT_INT32}},
+          {/*dataset_params=*/RangeDatasetParams2(),
+           /*expected_output_dtypes=*/{DT_INT64}}};
 }
 
+ITERATOR_OUTPUT_DTYPES_TEST_P(RangeDatasetOpTest, RangeDatasetParams,
+                              IteratorOutputDtypesTestCases())
+
 TEST_F(RangeDatasetOpTest, IteratorOutputShapes) {
   auto range_dataset_params = PositiveStepRangeDatasetParams();
   TF_ASSERT_OK(Initialize(range_dataset_params));
diff --git a/tensorflow/core/kernels/data/rewrite_utils.cc b/tensorflow/core/kernels/data/rewrite_utils.cc
index a284aa86c2e..8c43b9594b3 100644
--- a/tensorflow/core/kernels/data/rewrite_utils.cc
+++ b/tensorflow/core/kernels/data/rewrite_utils.cc
@@ -151,6 +151,7 @@ Status RewriteDataset(OpKernelContext* ctx, const DatasetBase* input,
       SerializationContext::ExternalStatePolicy::kIgnore;
   params.fail_if_unimplemented = false;
   params.serialize_data_tensors = false;
+  params.preserve_random_seeds = false;
   SerializationContext serialization_ctx(params);
   GraphDef graph_def;
   TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 684ab0ba07e..327fe3a2c58 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -70,6 +70,29 @@ constexpr char kFixedSeedDatasetPrefix[] = "FixedSeed";
 constexpr char kReshufflingDatasetPrefix[] = "Reshuffling";
 constexpr char kShuffleDataset[] = "ShuffleDataset";
 
+namespace {
+class Seeds {
+ public:
+  Seeds(int64 seed, int64 seed2) {
+    input_seed_ = seed;
+    input_seed2_ = seed2;
+    seed_ = seed;
+    seed2_ = seed2;
+    // By TensorFlow convention, if both seeds are 0, then shuffling should be
+    // seeded non-deterministically.
+    if (seed == 0 && seed2 == 0) {
+      seed_ = random::New64();
+      seed2_ = random::New64();
+    }
+  }
+
+  int64 input_seed_;
+  int64 input_seed2_;
+  int64 seed_;
+  int64 seed2_;
+};
+}  // namespace
+
 ShuffleDatasetOpBase::ShuffleDatasetOpBase(OpKernelConstruction* ctx)
     : UnaryDatasetOpKernel(ctx) {}
 
@@ -110,6 +133,18 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
   }
 
  protected:
+  // Adds the seeds to the given graphdef builder. `preserve_random_seeds`
+  // controls whether to add the input seeds or the resolved seeds.
+  Status AddSeeds(Seeds seeds, bool preserve_random_seeds,
+                  DatasetGraphDefBuilder* b, Node** seed, Node** seed2) const {
+    int64 seed_to_add = preserve_random_seeds ? seeds.input_seed_ : seeds.seed_;
+    int64 seed2_to_add =
+        preserve_random_seeds ? seeds.input_seed2_ : seeds.seed2_;
+    TF_RETURN_IF_ERROR(b->AddScalar(seed_to_add, seed));
+    TF_RETURN_IF_ERROR(b->AddScalar(seed2_to_add, seed2));
+    return Status::OK();
+  }
+
   template <class T>
   class Iterator : public DatasetIterator<T> {
    public:
@@ -408,29 +443,6 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
   const int64 count_;
 };
 
-namespace {
-class Seeds {
- public:
-  Seeds(int64 seed, int64 seed2) {
-    input_seed_ = seed;
-    input_seed2_ = seed2;
-    seed_ = seed;
-    seed2_ = seed2;
-    // By TensorFlow convention, if both seeds are 0, then shuffling should be
-    // seeded non-deterministically.
-    if (seed == 0 && seed2 == 0) {
-      seed_ = random::New64();
-      seed2_ = random::New64();
-    }
-  }
-
-  int64 input_seed_;
-  int64 input_seed2_;
-  int64 seed_;
-  int64 seed2_;
-};
-}  // namespace
-
 // A dataset that uses a pseudorandom sequence of seeds for the iterators
 // created from it. Used when `reshuffle_each_iteration` is true.
 class ShuffleDatasetOp::ReshufflingDataset : public ShuffleDatasetBase {
@@ -543,8 +555,8 @@ class ShuffleDatasetOp::ReshufflingDataset : public ShuffleDatasetBase {
     AttrValue reshuffle_each_iteration;
 
     TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size));
-    TF_RETURN_IF_ERROR(b->AddScalar(seeds_.input_seed_, &seed));
-    TF_RETURN_IF_ERROR(b->AddScalar(seeds_.input_seed2_, &seed2));
+    TF_RETURN_IF_ERROR(
+        AddSeeds(seeds_, ctx->preserve_random_seeds(), b, &seed, &seed2));
     b->BuildAttrValue(true, &reshuffle_each_iteration);
     TF_RETURN_IF_ERROR(b->AddDataset(
         this, {input_graph_node, buffer_size, seed, seed2},  // Inputs
@@ -700,8 +712,8 @@ class ShuffleDatasetOp::FixedSeedDataset : public ShuffleDatasetBase {
     AttrValue reshuffle_each_iteration;
 
     TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size));
-    TF_RETURN_IF_ERROR(b->AddScalar(seeds_.input_seed_, &seed));
-    TF_RETURN_IF_ERROR(b->AddScalar(seeds_.input_seed2_, &seed2));
+    TF_RETURN_IF_ERROR(
+        AddSeeds(seeds_, ctx->preserve_random_seeds(), b, &seed, &seed2));
     b->BuildAttrValue(false, &reshuffle_each_iteration);
     TF_RETURN_IF_ERROR(b->AddDataset(
         this, {input_graph_node, buffer_size, seed, seed2},  // Inputs
@@ -799,8 +811,8 @@ class ShuffleAndRepeatDatasetOp::Dataset : public ShuffleDatasetBase {
     Node* count = nullptr;
 
     TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size));
-    TF_RETURN_IF_ERROR(b->AddScalar(seeds_.input_seed_, &seed));
-    TF_RETURN_IF_ERROR(b->AddScalar(seeds_.input_seed2_, &seed2));
+    TF_RETURN_IF_ERROR(
+        AddSeeds(seeds_, ctx->preserve_random_seeds(), b, &seed, &seed2));
     TF_RETURN_IF_ERROR(b->AddScalar(count_, &count));
     TF_RETURN_IF_ERROR(b->AddDataset(
         this, {input_graph_node, buffer_size, seed, seed2, count},  // Inputs
diff --git a/tensorflow/core/kernels/debug_ops.cc b/tensorflow/core/kernels/debug_ops.cc
index 03c7cfdac38..db42b9f6511 100644
--- a/tensorflow/core/kernels/debug_ops.cc
+++ b/tensorflow/core/kernels/debug_ops.cc
@@ -160,6 +160,9 @@ TF_CALL_half(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
 TF_CALL_bfloat16(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
 TF_CALL_float(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
 TF_CALL_double(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
+TF_CALL_INTEGRAL_TYPES(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
+TF_CALL_bool(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
+// TODO(cais): Add string support.
 
 #define REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE(type)                 \
   REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")                \
@@ -171,39 +174,31 @@ TF_CALL_half(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
 TF_CALL_bfloat16(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
 TF_CALL_float(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
 TF_CALL_double(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
+TF_CALL_INTEGRAL_TYPES(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
+TF_CALL_bool(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
+// TODO(cais): Add string support.
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Eigen::half>("T")
-                            .TypeConstraint<float>("output_dtype"),
-                        DebugNumericSummaryV2Op<GPUDevice, Eigen::half, float>);
-REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>("T")
-                            .TypeConstraint<float>("output_dtype"),
-                        DebugNumericSummaryV2Op<GPUDevice, float, float>);
-REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<double>("T")
-                            .TypeConstraint<float>("output_dtype"),
-                        DebugNumericSummaryV2Op<GPUDevice, double, float>);
-REGISTER_KERNEL_BUILDER(
-    Name("DebugNumericSummaryV2")
-        .Device(DEVICE_GPU)
-        .TypeConstraint<Eigen::half>("T")
-        .TypeConstraint<double>("output_dtype"),
-    DebugNumericSummaryV2Op<GPUDevice, Eigen::half, double>);
-REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>("T")
-                            .TypeConstraint<double>("output_dtype"),
-                        DebugNumericSummaryV2Op<GPUDevice, float, double>);
-REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<double>("T")
-                            .TypeConstraint<double>("output_dtype"),
-                        DebugNumericSummaryV2Op<GPUDevice, double, double>);
+#define REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(in_type, out_type) \
+  REGISTER_KERNEL_BUILDER(                                       \
+      Name("DebugNumericSummaryV2")                              \
+          .Device(DEVICE_GPU)                                    \
+          .TypeConstraint<in_type>("T")                          \
+          .TypeConstraint<out_type>("output_dtype"),             \
+      DebugNumericSummaryV2Op<GPUDevice, in_type, out_type>);
+
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(Eigen::half, float);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(float, float);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(double, float);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(int16, float);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(int32, float);
+
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(Eigen::half, double);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(float, double);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(double, double);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(int16, double);
+REGISTER_DEBUG_NUMERIC_SUMMARY_V2_GPU(int32, double);
+
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h
index 5d1c78e9d15..8ac9684adcc 100644
--- a/tensorflow/core/kernels/debug_ops.h
+++ b/tensorflow/core/kernels/debug_ops.h
@@ -686,7 +686,8 @@ class DebugNumericSummaryV2Op<CPUDevice, Tin, Tout> : public OpKernel {
   static constexpr int kNegInfBit = 0x01;
   static constexpr int kPosInfBit = 0x02;
   static constexpr int kNaNBit = 0x04;
-  static constexpr int64 kMaxTensorId = 1L << std::numeric_limits<Tout>::digits;
+  static constexpr int64 kMaxTensorId = 1LL
+                                        << std::numeric_limits<Tout>::digits;
 };
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/debug_ops_gpu.cu.cc b/tensorflow/core/kernels/debug_ops_gpu.cu.cc
index 2e93c3ca24d..e1df486b0f1 100644
--- a/tensorflow/core/kernels/debug_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/debug_ops_gpu.cu.cc
@@ -42,9 +42,9 @@ __global__ void CurtHealthKernel(const Tin* __restrict__ data, int size,
   const int32 total_thread_count = gridDim.x * blockDim.x;
 
   int32 offset = thread_id;
-
   while (offset < size) {
-    if (isinf(data[offset]) || isnan(data[offset])) {
+    if (Eigen::numext::isinf(data[offset]) ||
+        Eigen::numext::isnan(data[offset])) {
       output[0] = 1.0;
     }
     offset += total_thread_count;
@@ -63,14 +63,14 @@ __global__ void ConciseHealthKernel(const Tin* __restrict__ data, int size,
   Tout accum[3] = {0.0, 0.0, 0.0};
 
   while (offset < size) {
-    if (isinf(data[offset])) {
+    if (Eigen::numext::isinf(data[offset])) {
       if (data[offset] < static_cast<Tin>(0.f)) {
         ++accum[0];
       } else {
         ++accum[1];
       }
     }
-    if (isnan(data[offset])) {
+    if (Eigen::numext::isnan(data[offset])) {
       ++accum[2];
     }
     offset += total_thread_count;
@@ -94,13 +94,13 @@ __global__ void FullHealthKernel(const Tin* __restrict__ data, int size,
   Tout accum[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
 
   while (offset < size) {
-    if (isinf(data[offset])) {
+    if (Eigen::numext::isinf(data[offset])) {
       if (data[offset] < static_cast<Tin>(0.f)) {
         ++accum[0];
       } else {
         ++accum[1];
       }
-    } else if (isnan(data[offset])) {
+    } else if (Eigen::numext::isnan(data[offset])) {
       ++accum[2];
     } else {
       if (data[offset] < static_cast<Tin>(0.f)) {
@@ -136,14 +136,14 @@ __global__ void ReduceInfNanThreeSlotsKernel(const Tin* __restrict__ data,
   int32 offset = thread_id;
 
   while (offset < size) {
-    if (isinf(data[offset])) {
+    if (Eigen::numext::isinf(data[offset])) {
       if (data[offset] < static_cast<Tin>(0.f)) {
         output[0] = -std::numeric_limits<Tout>::infinity();
       } else {
         output[1] = std::numeric_limits<Tout>::infinity();
       }
     }
-    if (isnan(data[offset])) {
+    if (Eigen::numext::isnan(data[offset])) {
       output[2] = std::numeric_limits<Tout>::quiet_NaN();
     }
     offset += total_thread_count;
@@ -168,9 +168,13 @@ struct CurtHealthLaunch {
 template struct CurtHealthLaunch<Eigen::half, float>;
 template struct CurtHealthLaunch<float, float>;
 template struct CurtHealthLaunch<double, float>;
+template struct CurtHealthLaunch<int16, float>;
+template struct CurtHealthLaunch<int32, float>;
 template struct CurtHealthLaunch<Eigen::half, double>;
 template struct CurtHealthLaunch<float, double>;
 template struct CurtHealthLaunch<double, double>;
+template struct CurtHealthLaunch<int16, double>;
+template struct CurtHealthLaunch<int32, double>;
 
 template <typename Tin, typename Tout>
 struct ConciseHealthLaunch {
@@ -188,9 +192,13 @@ struct ConciseHealthLaunch {
 template struct ConciseHealthLaunch<Eigen::half, float>;
 template struct ConciseHealthLaunch<float, float>;
 template struct ConciseHealthLaunch<double, float>;
+template struct ConciseHealthLaunch<int16, float>;
+template struct ConciseHealthLaunch<int32, float>;
 template struct ConciseHealthLaunch<Eigen::half, double>;
 template struct ConciseHealthLaunch<float, double>;
 template struct ConciseHealthLaunch<double, double>;
+template struct ConciseHealthLaunch<int16, double>;
+template struct ConciseHealthLaunch<int32, double>;
 
 template <typename Tin, typename Tout>
 struct FullHealthLaunch {
@@ -208,9 +216,13 @@ struct FullHealthLaunch {
 template struct FullHealthLaunch<Eigen::half, float>;
 template struct FullHealthLaunch<float, float>;
 template struct FullHealthLaunch<double, float>;
+template struct FullHealthLaunch<int16, float>;
+template struct FullHealthLaunch<int32, float>;
 template struct FullHealthLaunch<Eigen::half, double>;
 template struct FullHealthLaunch<float, double>;
 template struct FullHealthLaunch<double, double>;
+template struct FullHealthLaunch<int16, double>;
+template struct FullHealthLaunch<int32, double>;
 
 template <typename Tin, typename Tout>
 struct ReduceInfNanThreeSlotsLaunch {
@@ -229,9 +241,13 @@ struct ReduceInfNanThreeSlotsLaunch {
 template struct ReduceInfNanThreeSlotsLaunch<Eigen::half, float>;
 template struct ReduceInfNanThreeSlotsLaunch<float, float>;
 template struct ReduceInfNanThreeSlotsLaunch<double, float>;
+template struct ReduceInfNanThreeSlotsLaunch<int16, float>;
+template struct ReduceInfNanThreeSlotsLaunch<int32, float>;
 template struct ReduceInfNanThreeSlotsLaunch<Eigen::half, double>;
 template struct ReduceInfNanThreeSlotsLaunch<float, double>;
 template struct ReduceInfNanThreeSlotsLaunch<double, double>;
+template struct ReduceInfNanThreeSlotsLaunch<int16, double>;
+template struct ReduceInfNanThreeSlotsLaunch<int32, double>;
 
 }  // namespace tensorflow
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
index 78cb183d019..5dc45fb2c30 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
@@ -1003,7 +1003,8 @@ struct FusedBatchNormGrad<GPUDevice, T, U> {
 
     std::unique_ptr<functor::CudnnBatchNormAllocatorInTemp<uint8>>
         workspace_allocator;
-    DeviceMemory<uint8>* reserve_space_data = nullptr;
+    DeviceMemory<uint8>* reserve_space_data_ptr = nullptr;
+    DeviceMemory<uint8> reserve_space_data;
 #if CUDNN_VERSION >= 7402
     if (use_reserved_space) {
       const Tensor& reserve_space = context->input(5);
@@ -1013,9 +1014,9 @@ struct FusedBatchNormGrad<GPUDevice, T, U> {
       // the cudnn kernel outputs inverse variance in forward and reuse it in
       // backward
       if (reserve_space.dims() != 0) {
-        auto reserve_space_uint8 = functor::CastDeviceMemory<uint8, U>(
+        reserve_space_data = functor::CastDeviceMemory<uint8, U>(
             const_cast<Tensor*>(&reserve_space));
-        reserve_space_data = &reserve_space_uint8;
+        reserve_space_data_ptr = &reserve_space_data;
       }
     }
 #endif  // CUDNN_VERSION >= 7402
@@ -1026,7 +1027,7 @@ struct FusedBatchNormGrad<GPUDevice, T, U> {
                 y_backprop_ptr, x_ptr, scale_ptr, mean_ptr, inv_variance_ptr,
                 x_desc, scale_offset_desc, static_cast<double>(epsilon),
                 &x_backprop_ptr, &scale_backprop_ptr, &offset_backprop_ptr,
-                reserve_space_data, workspace_allocator.get())
+                reserve_space_data_ptr, workspace_allocator.get())
             .ok();
 
     if (!cudnn_launch_status) {
diff --git a/tensorflow/core/kernels/gpu_utils.cc b/tensorflow/core/kernels/gpu_utils.cc
index 4681c624eb4..52676f64245 100644
--- a/tensorflow/core/kernels/gpu_utils.cc
+++ b/tensorflow/core/kernels/gpu_utils.cc
@@ -40,7 +40,6 @@ se::DeviceMemoryBase WrapRedzoneBestEffort(se::RedzoneAllocator* rz_allocator,
   if (RedzoneCheckDisabled()) {
     return buffer;
   }
-  se::DeviceMemoryBase output_tensor;
   auto output_rz_or = rz_allocator->AllocateBytes(buffer.size());
   if (!output_rz_or.ok()) {
     static std::once_flag rz_allocation_failure_logged;
diff --git a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
index 690d13c4e65..7a6924e2ebf 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
@@ -548,7 +548,6 @@ TEST(GraphTransferer, DISABLED_CheckShapeInferencePerformance) {
   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
   std::vector<string> output_node_names = {"softmax"};
 
-  RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info0;
   GraphTransferer gt0;
   gt0.EnableStrictCheckMode(false);
   ClockCycleProfiler prof0;
@@ -568,7 +567,6 @@ TEST(GraphTransferer, DISABLED_CheckShapeInferencePerformance) {
   LOG(INFO) << "(0) node count: " << gfi0.node_info_size() << ", "
             << gfi0.const_node_info_size();
 
-  RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info1;
   GraphTransferer gt1;
   gt1.EnableStrictCheckMode(true);
   ClockCycleProfiler prof1;
diff --git a/tensorflow/core/kernels/lookup_table_op.h b/tensorflow/core/kernels/lookup_table_op.h
index 56818470cda..d44410a38ef 100644
--- a/tensorflow/core/kernels/lookup_table_op.h
+++ b/tensorflow/core/kernels/lookup_table_op.h
@@ -42,9 +42,15 @@ class LookupTableOp : public OpKernel {
   // ctx is not owned by this class.
   explicit LookupTableOp(OpKernelConstruction* ctx)
       : OpKernel(ctx), table_handle_set_(false) {
-    OP_REQUIRES_OK(ctx, ctx->allocate_persistent(tensorflow::DT_STRING,
-                                                 tensorflow::TensorShape({2}),
-                                                 &table_handle_, nullptr));
+    if (ctx->output_type(0) == DT_RESOURCE) {
+      OP_REQUIRES_OK(ctx, ctx->allocate_persistent(tensorflow::DT_RESOURCE,
+                                                   tensorflow::TensorShape({}),
+                                                   &table_handle_, nullptr));
+    } else {
+      OP_REQUIRES_OK(ctx, ctx->allocate_persistent(tensorflow::DT_STRING,
+                                                   tensorflow::TensorShape({2}),
+                                                   &table_handle_, nullptr));
+    }
     OP_REQUIRES_OK(
         ctx, ctx->GetAttr("use_node_name_sharing", &use_node_name_sharing_));
   }
@@ -86,11 +92,13 @@ class LookupTableOp : public OpKernel {
                             DataTypeToEnum<value_dtype>::v(), cinfo_.name()));
 
     if (ctx->expected_output_dtype(0) == DT_RESOURCE) {
-      Tensor* handle;
-      OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle));
-      handle->scalar<ResourceHandle>()() =
-          MakeResourceHandle<lookup::LookupInterface>(ctx, cinfo_.container(),
-                                                      cinfo_.name());
+      if (!table_handle_set_) {
+        auto h =
+            table_handle_.AccessTensor(ctx)->template scalar<ResourceHandle>();
+        h() = MakeResourceHandle<lookup::LookupInterface>(
+            ctx, cinfo_.container(), cinfo_.name());
+      }
+      ctx->set_output(0, *table_handle_.AccessTensor(ctx));
     } else {
       if (!table_handle_set_) {
         auto h = table_handle_.AccessTensor(ctx)->template flat<tstring>();
@@ -173,7 +181,12 @@ class HashTable : public InitializableLookupTable {
  public:
   HashTable(OpKernelContext* ctx, OpKernel* kernel) {}
 
-  size_t size() const override { return table_.size(); }
+  size_t size() const override {
+    if (!is_initialized())
+      return 0;
+    else
+      return table_.size();
+  }
 
   Status ExportValues(OpKernelContext* context) override {
     if (!is_initialized()) {
@@ -222,7 +235,7 @@ class HashTable : public InitializableLookupTable {
     for (int64 i = 0; i < key_values.size(); ++i) {
       auto&& key = SubtleMustCopyIfIntegral(key_values(i));
       auto&& value = SubtleMustCopyIfIntegral(value_values(i));
-      auto result = table_.emplace(key, value);
+      auto result = table_.try_emplace(key, value);
       if (!result.second && result.first->second != value) {
         return errors::FailedPrecondition(
             "HashTable has different value for same key. Key ", key, " has ",
@@ -246,6 +259,9 @@ class HashTable : public InitializableLookupTable {
   }
 
   int64 MemoryUsed() const override {
+    if (!is_initialized()) {
+      return 0;
+    }
     const int64 num_elements = table_.size();
     return num_elements * (sizeof(K) + sizeof(V));
   }
diff --git a/tensorflow/core/kernels/mirror_pad_op.h b/tensorflow/core/kernels/mirror_pad_op.h
index 7001257ad8b..23ab574b8b6 100644
--- a/tensorflow/core/kernels/mirror_pad_op.h
+++ b/tensorflow/core/kernels/mirror_pad_op.h
@@ -223,7 +223,8 @@ struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>,
     const Index right =
         (dimensions_[dim] - padding_[dim].second) * output_strides_[dim];
 
-    if (left <= index && (index + kPacketSize - 1) < right) {
+    const Index index_mod = index % (dimensions_[dim] * output_strides_[dim]);
+    if (left <= index_mod && (index_mod + kPacketSize - 1) < right) {
       return impl_.template packet<Unaligned>(input_index);
     }
 
diff --git a/tensorflow/core/kernels/mirror_pad_op_benchmark_test.cc b/tensorflow/core/kernels/mirror_pad_op_benchmark_test.cc
new file mode 100644
index 00000000000..733d2350fdd
--- /dev/null
+++ b/tensorflow/core/kernels/mirror_pad_op_benchmark_test.cc
@@ -0,0 +1,59 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+
+static Graph* BM_MirrorPad(int batches, int height, int width, int depth,
+                           int pad, const char* mode) {
+  Graph* g = new Graph(OpRegistry::Global());
+  Tensor in(DT_FLOAT, TensorShape({batches, height, width, depth}));
+  in.flat<float>().setRandom();
+  Tensor padding(DT_INT32, TensorShape({4, 2}));
+  auto boxes_tensor = padding.flat<int>().setZero();
+  for (int i = 2; i < 6; i++) boxes_tensor(i) = pad;
+
+  Node* ret;
+  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "MirrorPad")
+                  .Input(test::graph::Constant(g, in))
+                  .Input(test::graph::Constant(g, padding))
+                  .Attr("mode", mode)
+                  .Finalize(g, &ret));
+  return g;
+}
+
+#define BM_MirrorPadDev(DEVICE, B, W, H, D, P, MODE)                         \
+  static void BM_MirrorPad_##DEVICE##_##B##_##W##_##H##_##D##_##P##_##MODE(  \
+      int iters) {                                                           \
+    testing::ItemsProcessed(iters* B*(W + 2 * P) * (H + 2 * P) * D / 32);    \
+    test::Benchmark(#DEVICE, BM_MirrorPad(B, W, H, D, P, #MODE)).Run(iters); \
+  }                                                                          \
+  BENCHMARK(BM_MirrorPad_##DEVICE##_##B##_##W##_##H##_##D##_##P##_##MODE);
+
+BM_MirrorPadDev(cpu, 1, 16, 16, 32, 1, REFLECT);
+BM_MirrorPadDev(cpu, 1, 16, 16, 32, 8, REFLECT);
+BM_MirrorPadDev(cpu, 1, 512, 512, 16, 1, REFLECT);
+BM_MirrorPadDev(cpu, 1, 512, 512, 16, 256, REFLECT);
+BM_MirrorPadDev(cpu, 1, 16, 16, 32, 1, SYMMETRIC);
+BM_MirrorPadDev(cpu, 1, 16, 16, 32, 8, SYMMETRIC);
+BM_MirrorPadDev(cpu, 1, 512, 512, 16, 1, SYMMETRIC);
+BM_MirrorPadDev(cpu, 1, 512, 512, 16, 256, SYMMETRIC);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mirror_pad_op_test.cc b/tensorflow/core/kernels/mirror_pad_op_test.cc
new file mode 100644
index 00000000000..6cb6e72deb6
--- /dev/null
+++ b/tensorflow/core/kernels/mirror_pad_op_test.cc
@@ -0,0 +1,205 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/tensor_util.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+class MirrorPadOpTest : public OpsTestBase {
+ protected:
+  template <typename T>
+  void MakeOp(const string& mode) {
+    TF_EXPECT_OK(NodeDefBuilder("mirror_pad_op", "MirrorPad")
+                     .Input(FakeInput(DataTypeToEnum<T>::value))
+                     .Input(FakeInput(DT_INT32))
+                     .Attr("mode", mode)
+                     .Finalize(node_def()));
+    TF_EXPECT_OK(InitOp());
+  }
+};
+
+#define REGISTER_TEST(T)                                                     \
+  TEST_F(MirrorPadOpTest, TestMirrorPadReflect##T) {                         \
+    MakeOp<T>("REFLECT");                                                    \
+    AddInputFromArray<T>(TensorShape({1, 2, 3, 1}), {1, 2, 3, 4, 5, 6});     \
+    AddInputFromArray<int32>(TensorShape({4, 2}), {0, 0, 1, 1, 2, 2, 0, 0}); \
+    TF_ASSERT_OK(RunOpKernel());                                             \
+                                                                             \
+    Tensor expected(allocator(), DataTypeToEnum<T>::value,                   \
+                    TensorShape({1, 4, 7, 1}));                              \
+    test::FillValues<T>(&expected,                                           \
+                        {6, 5, 4, 5, 6, 5, 4, 3, 2, 1, 2, 3, 2, 1,           \
+                         6, 5, 4, 5, 6, 5, 4, 3, 2, 1, 2, 3, 2, 1});         \
+    test::ExpectTensorEqual<T>(expected, *GetOutput(0));                     \
+  }                                                                          \
+                                                                             \
+  TEST_F(MirrorPadOpTest, TestMirrorPadSymmetric##T) {                       \
+    MakeOp<T>("SYMMETRIC");                                                  \
+    AddInputFromArray<T>(TensorShape({1, 2, 1, 3}), {1, 2, 3, 4, 5, 6});     \
+    AddInputFromArray<int32>(TensorShape({4, 2}), {1, 1, 0, 0, 0, 0, 2, 2}); \
+    TF_ASSERT_OK(RunOpKernel());                                             \
+                                                                             \
+    Tensor expected(allocator(), DataTypeToEnum<T>::value,                   \
+                    TensorShape({3, 2, 1, 7}));                              \
+    test::FillValues<T>(                                                     \
+        &expected,                                                           \
+        {2, 1, 1, 2, 3, 3, 2, 5, 4, 4, 5, 6, 6, 5, 2, 1, 1, 2, 3, 3, 2,      \
+         5, 4, 4, 5, 6, 6, 5, 2, 1, 1, 2, 3, 3, 2, 5, 4, 4, 5, 6, 6, 5});    \
+    test::ExpectTensorEqual<T>(expected, *GetOutput(0));                     \
+  }
+
+REGISTER_TEST(float)
+REGISTER_TEST(double)
+REGISTER_TEST(uint8)
+REGISTER_TEST(uint16)
+REGISTER_TEST(int8)
+REGISTER_TEST(int16)
+REGISTER_TEST(int32)
+REGISTER_TEST(int64)
+
+#undef REGISTER_TEST
+
+TEST_F(MirrorPadOpTest, TestMirrorPadReflectLargeInput) {
+  MakeOp<float>("REFLECT");
+  // Generate a relatively large input
+  const int kInput = 1000;
+  const int kPad = 10;
+  const int kOutput = kInput + 2 * kPad;
+
+  // Input:
+  //  0, 1, 2, ..., 999
+  //  0, 1, 2, ..., 999
+  //  ... (altogether 1000 lines)
+  //  0, 1, 2, ..., 999
+  AddInput<float>(TensorShape({1, kInput, kInput, 1}),
+                  [](int i) -> float { return i % kInput; });
+  AddInputFromArray<int32>(TensorShape({4, 2}),
+                           {0, 0, kPad, kPad, kPad, kPad, 0, 0});
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({1, kOutput, kOutput, 1}));
+  test::FillFn<float>(&expected, [](int i) -> float {
+    i = i % kOutput;
+    if (0 <= i && i < kPad)
+      return kPad - i;
+    else if (kPad <= i && i < kInput + kPad)
+      return i - kPad;
+    else if (kInput + kPad <= i && i < kOutput)
+      return 2 * kInput + kPad - 2 - i;
+    else
+      return -1;
+  });
+
+  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(MirrorPadOpTest, TestMirrorPadSymmetricLargeInput) {
+  MakeOp<float>("SYMMETRIC");
+  // Generate a relatively large input
+  const int kInput = 1000;
+  const int kPad = 10;
+  const int kOutput = kInput + 2 * kPad;
+
+  // Input:
+  //  0, 1, 2, ..., 999
+  //  0, 1, 2, ..., 999
+  //  ... (altogether 1000 lines)
+  //  0, 1, 2, ..., 999
+  AddInput<float>(TensorShape({1, kInput, kInput, 1}),
+                  [](int i) -> float { return i % kInput; });
+  AddInputFromArray<int32>(TensorShape({4, 2}),
+                           {0, 0, kPad, kPad, kPad, kPad, 0, 0});
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({1, kOutput, kOutput, 1}));
+  test::FillFn<float>(&expected, [](int i) -> float {
+    i = i % kOutput;
+    if (0 <= i && i < kPad)
+      return kPad - i - 1;
+    else if (kPad <= i && i < kInput + kPad)
+      return i - kPad;
+    else if (kInput + kPad <= i && i < kOutput)
+      return 2 * kInput + kPad - 1 - i;
+    else
+      return -1;
+  });
+
+  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+class MirrorPadGradOpTest : public OpsTestBase {
+ protected:
+  template <typename T>
+  void MakeOp(const string& mode) {
+    TF_EXPECT_OK(NodeDefBuilder("mirror_pad_grad_op", "MirrorPadGrad")
+                     .Input(FakeInput(DataTypeToEnum<T>::value))
+                     .Input(FakeInput(DT_INT32))
+                     .Attr("mode", mode)
+                     .Finalize(node_def()));
+    TF_EXPECT_OK(InitOp());
+  }
+};
+
+#define REGISTER_TEST(T)                                                      \
+  TEST_F(MirrorPadGradOpTest, TestMirrorPadGradReflect##T) {                  \
+    MakeOp<T>("REFLECT");                                                     \
+    AddInput<T>(TensorShape({1, 4, 7, 1}), [](int i) -> T { return i % 7; }); \
+    AddInputFromArray<int32>(TensorShape({4, 2}), {0, 0, 1, 1, 2, 2, 0, 0});  \
+    TF_ASSERT_OK(RunOpKernel());                                              \
+                                                                              \
+    Tensor expected(allocator(), DataTypeToEnum<T>::value,                    \
+                    TensorShape({1, 2, 3, 1}));                               \
+    test::FillValues<T>(&expected, {16, 18, 8, 16, 18, 8});                   \
+    test::ExpectTensorEqual<T>(expected, *GetOutput(0));                      \
+  }                                                                           \
+                                                                              \
+  TEST_F(MirrorPadGradOpTest, TestMirrorPadGradSymmetric##T) {                \
+    MakeOp<T>("SYMMETRIC");                                                   \
+    AddInput<T>(TensorShape({3, 2, 1, 7}), [](int i) -> T { return i % 7; }); \
+    AddInputFromArray<int32>(TensorShape({4, 2}), {1, 1, 0, 0, 0, 0, 2, 2});  \
+    TF_ASSERT_OK(RunOpKernel());                                              \
+                                                                              \
+    Tensor expected(allocator(), DataTypeToEnum<T>::value,                    \
+                    TensorShape({1, 2, 1, 3}));                               \
+    test::FillValues<T>(&expected, {9, 27, 27, 9, 27, 27});                   \
+    test::ExpectTensorEqual<T>(expected, *GetOutput(0));                      \
+  }
+
+REGISTER_TEST(float)
+REGISTER_TEST(double)
+REGISTER_TEST(uint8)
+REGISTER_TEST(uint16)
+REGISTER_TEST(int8)
+REGISTER_TEST(int16)
+REGISTER_TEST(int32)
+REGISTER_TEST(int64)
+
+#undef REGISTER_TEST
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index e7543411b62..e7288ba97a0 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -792,7 +792,8 @@ class MklConvOp : public OpKernel {
         // Tensorflow format to MKL format by caching the filter when it is
         // converted for the first time. This cached filter can then be reused
         // in subsequent iterations.
-        if (is_filter_const_) {
+        bool do_cache_filter = src_dims[MklDnnDims::Dim_N] > kSmallBatchSize;
+        if (is_filter_const_ && do_cache_filter) {
           if (IsFilterCacheEmpty(context)) {
             // Cache filter if it is not already cached.
             CacheFilter(context, conv_fwd_pd, filter_data, filter_tensor,
@@ -805,6 +806,13 @@ class MklConvOp : public OpKernel {
           filter_data = GetCachedFilter(
               context, GET_WEIGHTS_FORMAT_FROM_OP_PD(conv_fwd_pd, conv_fwd));
           is_filter_cached = (filter_data != nullptr);
+          if (filter_out_tensor != nullptr) {
+            Tfilter* filter_out_tensor_buf =
+                static_cast<Tfilter*>(const_cast<Tfilter*>(
+                    filter_out_tensor->flat<Tfilter>().data()));
+            memcpy(filter_out_tensor_buf, filter_data,
+                   filter_out_tensor->AllocatedBytes());
+          }
         }
         if (!is_filter_cached) {
           filter.SetUsrMem(filter_md, &filter_tensor);
@@ -1307,7 +1315,7 @@ class MklConvOp : public OpKernel {
         *cached_filter_md_ptensor_.AccessTensor(context);
 
 // Check if the memory descriptor of the cached weights is same as
-// filter_md. If so, we can used the cached weights; otherwise
+// filter_md. If so, we can use the cached weights; otherwise
 // return NULL.
 #ifdef ENABLE_MKLDNN_V1
     if (cached_filter_md.scalar<int64>().size() &&
@@ -1591,8 +1599,6 @@ class MklQuantizedConv2DOp
     const float* min_filter = min_filter_vector.flat<float>().data();
     const float* max_filter = max_filter_vector.flat<float>().data();
 
-    std::vector<mkldnn::primitive> net;
-
     const float int_const_scale_limit =
         (std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
     // Re-scale bias if either of following 2 conditions are met:
diff --git a/tensorflow/core/kernels/mkl_quantize_op.cc b/tensorflow/core/kernels/mkl_quantize_op.cc
index e161f8e3d92..985f1cd8c88 100644
--- a/tensorflow/core/kernels/mkl_quantize_op.cc
+++ b/tensorflow/core/kernels/mkl_quantize_op.cc
@@ -293,26 +293,6 @@ class MklQuantizeV2Op : public OpKernel {
         ctx, ctx->GetAttr("ensure_minimum_range", &ensure_minimum_range_));
   }
 
-  ~MklQuantizeV2Op() {
-    if (minfirst_input_ != nullptr) {
-      delete minfirst_input_;
-      minfirst_input_ = nullptr;
-    }
-  }
-
-  float* GetMinfirstInputBuf(int size) {
-    if (!minfirst_input_) {
-      minfirst_input_ = new float[size];
-      minfirst_input_size_ = size;
-    } else if (size > minfirst_input_size_) {
-      delete minfirst_input_;
-      minfirst_input_ = new float[size];
-      minfirst_input_size_ = size;
-    }
-
-    return minfirst_input_;
-  }
-
   void ComputeScalar(OpKernelContext* ctx, float min_range, float max_range) {
     // TODO(intel-tf): Scalar support has to be added for SCALE mode
     OP_REQUIRES(ctx, (mode_ == QUANTIZE_MODE_MIN_FIRST),
@@ -434,8 +414,11 @@ class MklQuantizeV2Op : public OpKernel {
     // If the mode is min_first, input data has to be subtracted from
     // min_range, before being scaled
     auto flat_input = input.flat<float>().data();
+    Tensor minfirst_tmpinput;
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_temp(DT_FLOAT, input.shape(), &minfirst_tmpinput));
     if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      float* minfirst_input = GetMinfirstInputBuf(input.NumElements());
+      auto minfirst_input = minfirst_tmpinput.flat<float>().data();
       const Eigen::TensorOpCost cost(
           sizeof(float), /*load bytes*/
           sizeof(float), /*saved bytes*/
@@ -557,8 +540,6 @@ class MklQuantizeV2Op : public OpKernel {
   int round_mode_;
   int axis_;
   bool narrow_range_;
-  float* minfirst_input_ = nullptr;
-  int minfirst_input_size_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("_MklQuantizeV2")
diff --git a/tensorflow/core/kernels/non_max_suppression_op.cc b/tensorflow/core/kernels/non_max_suppression_op.cc
index ff48bd5f836..f9dd7c69a8a 100644
--- a/tensorflow/core/kernels/non_max_suppression_op.cc
+++ b/tensorflow/core/kernels/non_max_suppression_op.cc
@@ -368,7 +368,6 @@ void BatchedNonMaxSuppressionOp(
       }
 
       std::vector<int> selected;
-      std::vector<float> selected_boxes;
       Candidate next_candidate;
 
       std::sort(candidate_vector.begin(), candidate_vector.end(), cmp);
diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc
index 94315f75c38..4b4705150a6 100644
--- a/tensorflow/core/kernels/pack_op.cc
+++ b/tensorflow/core/kernels/pack_op.cc
@@ -160,6 +160,8 @@ TF_CALL_bfloat16(REGISTER_GPU);
 TF_CALL_int64(REGISTER_GPU);
 TF_CALL_int16(REGISTER_GPU);
 TF_CALL_bool(REGISTER_GPU);
+TF_CALL_complex64(REGISTER_GPU);
+TF_CALL_complex128(REGISTER_GPU);
 #undef REGISTER_GPU
 
 // A special GPU kernel for int32.
diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.h b/tensorflow/core/kernels/quantize_and_dequantize_op.h
index 3ecb89d2129..4dd6e5c839b 100644
--- a/tensorflow/core/kernels/quantize_and_dequantize_op.h
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.h
@@ -81,7 +81,8 @@ void ClampScaleAndRound(const Device& d, ConstVec input, T min_range,
   switch (round_mode) {
     case ROUND_HALF_TO_EVEN:
       ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
-                         Eigen::internal::scalar_round_op_google<T>(), output);
+                         Eigen::internal::scalar_round_half_to_even_op<T>(),
+                         output);
       break;
     case ROUND_HALF_UP:
       ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
@@ -107,7 +108,7 @@ void ScaleAndRound(const Device& d, ConstVec input, T scale, T inverse_scale,
   switch (round_mode) {
     case ROUND_HALF_TO_EVEN:
       ScaleAndRound(d, input, scale, inverse_scale,
-                    Eigen::internal::scalar_round_op_google<T>(), output);
+                    Eigen::internal::scalar_round_half_to_even_op<T>(), output);
       break;
     case ROUND_HALF_UP:
       ScaleAndRound(d, input, scale, inverse_scale,
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc
index a783c07af0b..a523c4b9cd0 100644
--- a/tensorflow/core/kernels/quantize_op.cc
+++ b/tensorflow/core/kernels/quantize_op.cc
@@ -276,16 +276,15 @@ class QuantizeV2Op : public OpKernel {
       min_range = min_output_value / scale_factor;
       max_range = max_output_value / scale_factor;
       if (round_mode_ == ROUND_HALF_TO_EVEN) {
-        // scalar_round_op_google implements "round-half-to-even".
         output.device(d) =
             (input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor)
-                .unaryExpr(Eigen::internal::scalar_round_op_google<float>())
+                .unaryExpr(
+                    Eigen::internal::scalar_round_half_to_even_op<float>())
                 .template cast<T>();
       } else if (round_mode_ == ROUND_HALF_AWAY_FROM_ZERO) {
-        // scalar_round_op implements "round-half-away-from-zero".
         output.device(d) =
             (input.cwiseMin(max_range).cwiseMax(min_range) * scale_factor)
-                .unaryExpr(Eigen::internal::scalar_round_op<float>())
+                .round()
                 .template cast<T>();
       }
     }
diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc b/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc
index 44251e6ff8e..a55ea394bd7 100644
--- a/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc
+++ b/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc
@@ -87,7 +87,6 @@ class FuseRemoteGraphMultipleAddOpsTest : public ::testing::Test {
 
   Status FuseByInOut() {
     // Feed output shapes and types
-    RemoteFusedGraphExecuteUtils::TensorShapeMap tensor_shape_map;
     GraphDef graph_def_with_shapetype = graph_def_;
     TF_RETURN_IF_ERROR(RemoteFusedGraphExecuteUtils::BuildAndAddTensorShapes(
         input_tensors_, /*dry_run_inference*/ true, &graph_def_with_shapetype));
diff --git a/tensorflow/core/kernels/sobol_op.cc b/tensorflow/core/kernels/sobol_op.cc
new file mode 100644
index 00000000000..b613cd90eee
--- /dev/null
+++ b/tensorflow/core/kernels/sobol_op.cc
@@ -0,0 +1,182 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Based on "Notes on generating Sobol sequences. August 2008" by Joe and Kuo.
+// [1] https://web.maths.unsw.edu.au/~fkuo/sobol/joe-kuo-notes.pdf
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <limits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "sobol_data.h"
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/platform_strings.h"
+
+namespace tensorflow {
+
+// Embed the platform strings in this binary.
+TF_PLATFORM_STRINGS()
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+namespace {
+
+// Each thread will calculate at least kMinBlockSize points in the sequence.
+constexpr int kMinBlockSize = 512;
+
+// Returns number of digits in binary representation of n.
+// Example: n=13. Binary representation is 1101. NumBinaryDigits(13) -> 4.
+int NumBinaryDigits(int n) {
+  return static_cast<int>(std::log2(n) + 1);
+}
+
+// Returns position of rightmost zero digit in binary representation of n.
+// Example: n=13. Binary representation is 1101. RightmostZeroBit(13) -> 1.
+int RightmostZeroBit(int n) {
+  int k = 0;
+  while (n & 1) {
+    n >>= 1;
+    ++k;
+  }
+  return k;
+}
+
+// Returns an integer representation of point `i` in the Sobol sequence of
+// dimension `dim` using the given direction numbers.
+Eigen::VectorXi GetFirstPoint(int i, int dim,
+                              const Eigen::MatrixXi& direction_numbers) {
+  // Index variables used in this function, consistent with notation in [1].
+  // i - point in the Sobol sequence
+  // j - dimension
+  // k - binary digit
+  Eigen::VectorXi integer_sequence = Eigen::VectorXi::Zero(dim);
+  // go/wiki/Sobol_sequence#A_fast_algorithm_for_the_construction_of_Sobol_sequences
+  int gray_code = i ^ (i >> 1);
+  int num_digits = NumBinaryDigits(i);
+  for (int j = 0; j < dim; ++j) {
+    for (int k = 0; k < num_digits; ++k) {
+      if ((gray_code >> k) & 1) integer_sequence(j) ^= direction_numbers(j, k);
+    }
+  }
+  return integer_sequence;
+}
+
+// Calculates `num_results` Sobol points of dimension `dim` starting at the
+// point `start_point + skip` and writes them into `output` starting at point
+// `start_point`.
+template <typename T>
+void CalculateSobolSample(int32_t dim, int32_t num_results, int32_t skip,
+                          int32_t start_point,
+                          typename TTypes<T>::Flat output) {
+  // Index variables used in this function, consistent with notation in [1].
+  // i - point in the Sobol sequence
+  // j - dimension
+  // k - binary digit
+  const int num_digits =
+      NumBinaryDigits(skip + start_point + num_results + 1);
+  Eigen::MatrixXi direction_numbers(dim, num_digits);
+
+  // Shift things so we can use integers everywhere. Before we write to output,
+  // divide by constant to convert back to floats.
+  const T normalizing_constant = 1./(1 << num_digits);
+  for (int j = 0; j < dim; ++j) {
+    for (int k = 0; k < num_digits; ++k) {
+      direction_numbers(j, k) = sobol_data::kDirectionNumbers[j][k]
+                                << (num_digits - k - 1);
+    }
+  }
+
+  // If needed, skip ahead to the appropriate point in the sequence. Otherwise
+  // we start with the first column of direction numbers.
+  Eigen::VectorXi integer_sequence =
+      (skip + start_point > 0)
+          ? GetFirstPoint(skip + start_point + 1, dim, direction_numbers)
+          : direction_numbers.col(0);
+
+  for (int j = 0; j < dim; ++j) {
+    output(start_point * dim + j) = integer_sequence(j) * normalizing_constant;
+  }
+  // go/wiki/Sobol_sequence#A_fast_algorithm_for_the_construction_of_Sobol_sequences
+  for (int i = start_point + 1; i < num_results + start_point; ++i) {
+    // The Gray code for the current point differs from the preceding one by
+    // just a single bit -- the rightmost bit.
+    int k = RightmostZeroBit(i + skip);
+    // Update the current point from the preceding one with a single XOR
+    // operation per dimension.
+    for (int j = 0; j < dim; ++j) {
+      integer_sequence(j) ^= direction_numbers(j, k);
+      output(i * dim + j) = integer_sequence(j) * normalizing_constant;
+    }
+  }
+}
+
+}  // namespace
+
+template <typename Device, typename T>
+class SobolSampleOp : public OpKernel {
+ public:
+  explicit SobolSampleOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    int32_t dim = context->input(0).scalar<int32_t>()();
+    int32_t num_results = context->input(1).scalar<int32_t>()();
+    int32_t skip = context->input(2).scalar<int32_t>()();
+
+    OP_REQUIRES(context, dim >= 1,
+                errors::InvalidArgument("dim must be at least one"));
+    OP_REQUIRES(context, dim <= sobol_data::kMaxSobolDim,
+                errors::InvalidArgument("dim must be at most ",
+                                        sobol_data::kMaxSobolDim));
+    OP_REQUIRES(context, num_results >= 1,
+                errors::InvalidArgument("num_results must be at least one"));
+    OP_REQUIRES(context, skip >= 0,
+                errors::InvalidArgument("skip must be non-negative"));
+    OP_REQUIRES(context,
+                num_results < std::numeric_limits<int32_t>::max() - skip,
+                errors::InvalidArgument("num_results+skip must be less than ",
+                                        std::numeric_limits<int32_t>::max()));
+
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(
+                       0, TensorShape({num_results, dim}), &output));
+    auto output_flat = output->flat<T>();
+    const DeviceBase::CpuWorkerThreads& worker_threads =
+        *(context->device()->tensorflow_cpu_worker_threads());
+    int num_threads = worker_threads.num_threads;
+    int block_size = std::max(
+        kMinBlockSize, static_cast<int>(std::ceil(
+                           static_cast<float>(num_results) / num_threads)));
+    worker_threads.workers->TransformRangeConcurrently(
+        block_size, num_results /* total */,
+        [&dim, &skip, &output_flat](const int start, const int end) {
+          CalculateSobolSample<T>(dim, end - start /* num_results */, skip,
+                                  start, output_flat);
+        });
+  }
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("SobolSample").Device(DEVICE_CPU).TypeConstraint<double>("dtype"),
+    SobolSampleOp<CPUDevice, double>);
+REGISTER_KERNEL_BUILDER(
+    Name("SobolSample").Device(DEVICE_CPU).TypeConstraint<float>("dtype"),
+    SobolSampleOp<CPUDevice, float>);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
index 53f9fbff377..a03d60ed155 100644
--- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
@@ -369,8 +369,6 @@ class CSRSparseMatMulGPUOp : public OpKernel {
 
     CSRSparseMatrix c;
     Tensor c_row_ptrs;
-    Tensor c_col_inds;
-    Tensor c_values;
 
     // TODO(ebrevdo): Re-enable transposing within the GEMM kernel when cuSparse
     // stops spitting out CUSPARSE_STATUS_INTERNAL_ERROR values for transposes.
diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc
index afeac063a2c..4123b4b8225 100644
--- a/tensorflow/core/kernels/unpack_op.cc
+++ b/tensorflow/core/kernels/unpack_op.cc
@@ -146,6 +146,8 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_bfloat16(REGISTER_GPU);
 TF_CALL_uint8(REGISTER_GPU);
 TF_CALL_bool(REGISTER_GPU);
+TF_CALL_complex64(REGISTER_GPU);
+TF_CALL_complex128(REGISTER_GPU);
 #undef REGISTER_GPU
 
 // A special GPU kernel for int32.
diff --git a/tensorflow/core/lib/gtl/BUILD b/tensorflow/core/lib/gtl/BUILD
index 8f03633cba0..ffac0ce12ea 100644
--- a/tensorflow/core/lib/gtl/BUILD
+++ b/tensorflow/core/lib/gtl/BUILD
@@ -8,7 +8,7 @@ package(
         "//tensorflow/core/lib/strings:__pkg__",
         # tensorflow/core/lib/histogram uses array_slice
         "//tensorflow/core/lib/histogram:__pkg__",
-        # tensorflow/core/framework uses map_util
+        # tensorflow/core/framework uses array_slice, map_util, and flatmap
         "//tensorflow/core/framework:__pkg__",
     ],
     licenses = ["notice"],  # Apache 2.0
@@ -52,6 +52,7 @@ cc_library(
     visibility = [
         "//tensorflow/c/eager:__pkg__",
         "//tensorflow/core:__pkg__",
+        "//tensorflow/core/framework:__pkg__",
         "//tensorflow/core/lib/histogram:__pkg__",
         "//tensorflow/core/lib/random:__pkg__",
         "//tensorflow/core/lib/strings:__pkg__",
diff --git a/tensorflow/core/lib/gtl/map_util.h b/tensorflow/core/lib/gtl/map_util.h
index 6a48d5566e0..8f0c92ef598 100644
--- a/tensorflow/core/lib/gtl/map_util.h
+++ b/tensorflow/core/lib/gtl/map_util.h
@@ -158,6 +158,29 @@ typename Collection::value_type::second_type& LookupOrInsert(
                         typename Collection::value_type(key, value));
 }
 
+// Saves the reverse mapping into reverse. Returns true if values could all be
+// inserted.
+template <typename M, typename ReverseM>
+bool ReverseMap(const M& m, ReverseM* reverse) {
+  bool all_unique = true;
+  for (const auto& kv : m) {
+    if (!InsertOrUpdate(reverse, kv.second, kv.first)) {
+      all_unique = false;
+    }
+  }
+  return all_unique;
+}
+
+// Like ReverseMap above, but returns its output m. Return type has to
+// be specified explicitly. Example:
+// M::M(...) : m_(...), r_(ReverseMap<decltype(r_)>(m_)) {}
+template <typename ReverseM, typename M>
+ReverseM ReverseMap(const M& m) {
+  typename std::remove_const<ReverseM>::type reverse;
+  ReverseMap(m, &reverse);
+  return reverse;
+}
+
 // Erases the m item identified by the given key, and returns the value
 // associated with that key. It is assumed that the value (i.e., the
 // mapped_type) is a pointer. Returns null if the key was not found in the
diff --git a/tensorflow/core/lib/hash/BUILD b/tensorflow/core/lib/hash/BUILD
index de2eebc785f..ffe5ef957c2 100644
--- a/tensorflow/core/lib/hash/BUILD
+++ b/tensorflow/core/lib/hash/BUILD
@@ -16,6 +16,8 @@ package(
         "//tensorflow/core/lib/strings:__pkg__",
         # tensorflow/core/platform:tracing depends on hash
         "//tensorflow/core/platform:__subpackages__",
+        # tensorflow/core/framework:tensor depends on hash
+        "//tensorflow/core/framework:__pkg__",
     ],
     licenses = ["notice"],  # Apache 2.0
 )
diff --git a/tensorflow/core/lib/io/BUILD b/tensorflow/core/lib/io/BUILD
index 123e24db3c7..8f8e0dd0da8 100644
--- a/tensorflow/core/lib/io/BUILD
+++ b/tensorflow/core/lib/io/BUILD
@@ -113,6 +113,7 @@ cc_library(
     hdrs = ["proto_encode_helper.h"],
     deps = [
         "//tensorflow/core/lib/core:coding",
+        "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:protobuf",
         "//tensorflow/core/platform:stringpiece",
     ],
diff --git a/tensorflow/core/lib/io/proto_encode_helper.h b/tensorflow/core/lib/io/proto_encode_helper.h
index 4e533ea6cb8..ff0537ebb1c 100644
--- a/tensorflow/core/lib/io/proto_encode_helper.h
+++ b/tensorflow/core/lib/io/proto_encode_helper.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_PROTO_ENCODE_HELPER_H_
 
 #include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/stringpiece.h"
 
diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h
index 6fb6babe7ec..6f40816aedb 100644
--- a/tensorflow/core/lib/random/random_distributions.h
+++ b/tensorflow/core/lib/random/random_distributions.h
@@ -18,10 +18,7 @@ limitations under the License.
 
 #include <string.h>
 
-#define _USE_MATH_DEFINES
-#include <math.h>
 #include <cmath>
-#undef _USE_MATH_DEFINES
 
 #include <algorithm>
 #include <type_traits>
diff --git a/tensorflow/core/lib/random/random_distributions_test.cc b/tensorflow/core/lib/random/random_distributions_test.cc
index 8868672a10a..a4973160182 100644
--- a/tensorflow/core/lib/random/random_distributions_test.cc
+++ b/tensorflow/core/lib/random/random_distributions_test.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/lib/random/random_distributions.h"
 
-#include <math.h>
 #include <algorithm>
+#include <cmath>
 #include <functional>
 #include <numeric>
 #include <unordered_map>
diff --git a/tensorflow/core/ops/boosted_trees_ops.cc b/tensorflow/core/ops/boosted_trees_ops.cc
index d028ceb7e6d..639a753b5dc 100644
--- a/tensorflow/core/ops/boosted_trees_ops.cc
+++ b/tensorflow/core/ops/boosted_trees_ops.cc
@@ -141,6 +141,74 @@ REGISTER_OP("BoostedTreesCalculateBestFeatureSplit")
       return Status::OK();
     });
 
+REGISTER_OP("BoostedTreesCalculateBestFeatureSplitV2")
+    .Input("node_id_range: int32")
+    .Input("stats_summaries_list: num_features * float32")
+    .Input("split_types: string")
+    .Input("candidate_feature_ids: int32")
+    .Input("l1: float")
+    .Input("l2: float")
+    .Input("tree_complexity: float")
+    .Input("min_node_weight: float")
+    .Attr("num_features: int >= 1")  // not passed but populated automatically.
+    .Attr("logits_dimension: int >= 1")
+    .Output("node_ids: int32")
+    .Output("gains: float32")
+    .Output("feature_ids: int32")
+    .Output("feature_dimensions: int32")
+    .Output("thresholds: int32")
+    .Output("left_node_contribs: float32")
+    .Output("right_node_contribs: float32")
+    .Output("split_with_default_directions: string")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      // Attributes.
+      int num_features;
+      TF_RETURN_IF_ERROR(c->GetAttr("num_features", &num_features));
+      int logits_dimension;
+      TF_RETURN_IF_ERROR(c->GetAttr("logits_dimension", &logits_dimension));
+      // Inputs.
+      shape_inference::ShapeHandle unused_shape;
+      // node id range is rank 1 with 2 values.
+      shape_inference::ShapeHandle node_id_range_shape;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &node_id_range_shape));
+      TF_RETURN_IF_ERROR(
+          c->Merge(node_id_range_shape, c->MakeShape({2}), &unused_shape));
+      // Stats summary validation.
+      shape_inference::ShapeHandle summary_shape_base;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &summary_shape_base));
+      // All stats summary entries are of the same shape.
+      for (int i = 1; i < num_features; ++i) {
+        shape_inference::ShapeHandle summary_shape;
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(1 + i), 4, &summary_shape));
+        TF_RETURN_IF_ERROR(
+            c->Merge(summary_shape_base, summary_shape, &unused_shape));
+      }
+      // Validate rank 1 split_types.
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(1 + num_features), 1, &unused_shape));
+      // Validate rank 1 feature_ids.
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(2 + num_features), 1, &unused_shape));
+      // Validate rank 0: l1, l2, tree_complexity, min_node_weight.
+      for (int i = 0; i < 4; ++i) {
+        TF_RETURN_IF_ERROR(
+            c->WithRank(c->input(3 + num_features + i), 0, &unused_shape));
+      }
+      // Output shapes.
+      ShapeHandle rank_1_output_shape = c->MakeShape({c->UnknownDim()});
+      c->set_output(0, rank_1_output_shape);
+      c->set_output(1, rank_1_output_shape);
+      c->set_output(2, rank_1_output_shape);
+      c->set_output(3, rank_1_output_shape);
+      c->set_output(4, rank_1_output_shape);
+      ShapeHandle contribs_output_shape =
+          c->MakeShape({c->UnknownDim(), logits_dimension});
+      c->set_output(5, contribs_output_shape);
+      c->set_output(6, contribs_output_shape);
+      c->set_output(7, rank_1_output_shape);
+      return Status::OK();
+    });
+
 REGISTER_OP("BoostedTreesSparseCalculateBestFeatureSplit")
     .Input("node_id_range: int32")
     .Input("stats_summary_indices: int32")
@@ -395,7 +463,6 @@ REGISTER_OP("BoostedTreesPredict")
       int num_bucketized_features;
       TF_RETURN_IF_ERROR(
           c->GetAttr("num_bucketized_features", &num_bucketized_features));
-      shape_inference::ShapeHandle unused_input;
       shape_inference::DimensionHandle batch_size = c->Dim(c->input(1), 0);
       for (int i = 0; i < num_bucketized_features; ++i) {
         TF_RETURN_IF_ERROR(
@@ -425,7 +492,6 @@ REGISTER_OP("BoostedTreesExampleDebugOutputs")
       int num_bucketized_features;
       TF_RETURN_IF_ERROR(
           c->GetAttr("num_bucketized_features", &num_bucketized_features));
-      shape_inference::ShapeHandle unused_input;
       shape_inference::DimensionHandle batch_dim = c->Dim(c->input(1), 0);
       for (int i = 0; i < num_bucketized_features; ++i) {
         TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/ops/compat/ops_history_v1/BoostedTreesCalculateBestFeatureSplitV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/BoostedTreesCalculateBestFeatureSplitV2.pbtxt
new file mode 100644
index 00000000000..e900ed9c674
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v1/BoostedTreesCalculateBestFeatureSplitV2.pbtxt
@@ -0,0 +1,80 @@
+op {
+  name: "BoostedTreesCalculateBestFeatureSplitV2"
+  input_arg {
+    name: "node_id_range"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "stats_summaries_list"
+    type: DT_FLOAT
+    number_attr: "num_features"
+  }
+  input_arg {
+    name: "split_types"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "candidate_feature_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "l1"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "l2"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "tree_complexity"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_node_weight"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "node_ids"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "gains"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "feature_ids"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "feature_dimensions"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "thresholds"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "left_node_contribs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "right_node_contribs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "split_with_default_directions"
+    type: DT_STRING
+  }
+  attr {
+    name: "num_features"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "logits_dimension"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/SerializeIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/SerializeIterator.pbtxt
index 618ff2753cd..262c9def883 100644
--- a/tensorflow/core/ops/compat/ops_history_v1/SerializeIterator.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history_v1/SerializeIterator.pbtxt
@@ -10,3 +10,22 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "SerializeIterator"
+  input_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "serialized"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "external_state_policy"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/SnapshotDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/SnapshotDataset.pbtxt
index 44d766d7618..4b55615e3bc 100644
--- a/tensorflow/core/ops/compat/ops_history_v1/SnapshotDataset.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history_v1/SnapshotDataset.pbtxt
@@ -385,3 +385,128 @@ op {
     }
   }
 }
+op {
+  name: "SnapshotDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "path"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "compression"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "reader_path_prefix"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "writer_path_prefix"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shard_size_bytes"
+    type: "int"
+    default_value {
+      i: 10737418240
+    }
+  }
+  attr {
+    name: "pending_snapshot_expiry_seconds"
+    type: "int"
+    default_value {
+      i: 86400
+    }
+  }
+  attr {
+    name: "num_reader_threads"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "reader_buffer_size"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "num_writer_threads"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "writer_buffer_size"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "shuffle_on_read"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "auto"
+    }
+  }
+  attr {
+    name: "snapshot_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/SobolSample.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/SobolSample.pbtxt
new file mode 100644
index 00000000000..182b1e5becf
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v1/SobolSample.pbtxt
@@ -0,0 +1,32 @@
+op {
+  name: "SobolSample"
+  input_arg {
+    name: "dim"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_results"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "skip"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "samples"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_DOUBLE
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 72737d5f757..ac7288c9e82 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -695,6 +695,7 @@ REGISTER_OP("IteratorFromStringHandleV2")
 
 REGISTER_OP("SerializeIterator")
     .Input("resource_handle: resource")
+    .Attr("external_state_policy: int = 0")
     .Output("serialized: variant")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(0, c->Vector(c->UnknownDim()));
diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc
index 7101e763294..e145809a0bc 100644
--- a/tensorflow/core/ops/experimental_dataset_ops.cc
+++ b/tensorflow/core/ops/experimental_dataset_ops.cc
@@ -823,6 +823,8 @@ REGISTER_OP("SnapshotDataset")
     .Attr("shuffle_on_read: bool = false")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
+    .Attr("mode: string = 'auto'")
+    .Attr("snapshot_name: string = ''")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // snapshot_path should be a scalar.
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index ccdcf0b76e6..d8be0b265c4 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1907,4 +1907,28 @@ REGISTER_OP("NextAfter")
     .Output("output: T")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
+REGISTER_OP("SobolSample")
+    .Input("dim: int32")
+    .Input("num_results: int32")
+    .Input("skip: int32")
+    .Attr("dtype: {float, double} = DT_DOUBLE")
+    .Output("samples: dtype")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      ShapeHandle unused;
+      // inputs must be  scalars
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      const Tensor* dim_t = c->input_tensor(0);
+      const Tensor* num_results_t = c->input_tensor(1);
+      if (dim_t == nullptr || num_results_t == nullptr) {
+        c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
+        return Status::OK();
+      }
+      const int32 output_size =
+          dim_t->scalar<int32>()() * num_results_t->scalar<int32>()();
+      c->set_output(0, c->Vector(output_size));
+      return Status::OK();
+    });
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 7ebd7889a35..7c8989f8c9b 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -593,4 +593,15 @@ TEST(MathOpsTest, Bincount_ShapeFn) {
   INFER_OK(op, "[?];[];?", "[?]");
   INFER_OK(op, "[?];[];[?]", "[?]");
 }
+
+TEST(MathOpsTest, SobolSample) {
+  ShapeInferenceTestOp op("SobolSample");
+
+  // All inputs should be scalar.
+  INFER_ERROR("must be rank 0", op, "[1];?;?");
+  INFER_ERROR("must be rank 0", op, "?;[1];?");
+  INFER_ERROR("must be rank 0", op, "?;?;[1]");
+
+  INFER_OK(op, "[];[];[]", "[?]");
+}
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 6f07a4b91c1..b24089c377b 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -5201,6 +5201,86 @@ op {
     }
   }
 }
+op {
+  name: "BoostedTreesCalculateBestFeatureSplitV2"
+  input_arg {
+    name: "node_id_range"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "stats_summaries_list"
+    type: DT_FLOAT
+    number_attr: "num_features"
+  }
+  input_arg {
+    name: "split_types"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "candidate_feature_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "l1"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "l2"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "tree_complexity"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_node_weight"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "node_ids"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "gains"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "feature_ids"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "feature_dimensions"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "thresholds"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "left_node_contribs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "right_node_contribs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "split_with_default_directions"
+    type: DT_STRING
+  }
+  attr {
+    name: "num_features"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "logits_dimension"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "BoostedTreesCalculateBestGainsPerFeature"
   input_arg {
@@ -40455,6 +40535,13 @@ op {
     name: "serialized"
     type: DT_VARIANT
   }
+  attr {
+    name: "external_state_policy"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
   is_stateful: true
 }
 op {
@@ -41345,6 +41432,52 @@ op {
       i: 0
     }
   }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "auto"
+    }
+  }
+  attr {
+    name: "snapshot_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+}
+op {
+  name: "SobolSample"
+  input_arg {
+    name: "dim"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_results"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "skip"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "samples"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_DOUBLE
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
 }
 op {
   name: "Softmax"
diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD
index 2df7477adc8..cefb86ccebc 100644
--- a/tensorflow/core/platform/BUILD
+++ b/tensorflow/core/platform/BUILD
@@ -17,6 +17,7 @@ load(
     "tf_legacy_srcs_no_runtime_google",
     "tf_logging_deps",
     "tf_monitoring_deps",
+    "tf_platform_alias",
     "tf_platform_deps",
     "tf_protobuf_compiler_deps",
     "tf_protobuf_deps",
@@ -380,6 +381,10 @@ cc_library(
         "protobuf_util.cc",
     ],
     hdrs = ["protobuf.h"],
+    defines = select({
+        "//tensorflow:android": ["TENSORFLOW_LITE_PROTOS"],
+        "//conditions:default": [],
+    }),
     deps = [
         ":platform",
         ":types",
@@ -928,6 +933,16 @@ filegroup(
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
+# These are the sources needed to build the target tensorflow/core:mobile_srcs_no_runtime.
+# We want to get rid of all such android targets, as described in
+# https://github.com/tensorflow/community/pull/179.
+# This temporary filegroup is allows us to remove the legacy "build_config" directories.
+filegroup(
+    name = "legacy_mobile_srcs",
+    srcs = tf_platform_alias("legacy_mobile_srcs"),
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
 bzl_library(
     name = "build_config_root_bzl",
     srcs = [
diff --git a/tensorflow/core/platform/build_config.bzl b/tensorflow/core/platform/build_config.bzl
index 4ca965299e7..e30789dafe4 100644
--- a/tensorflow/core/platform/build_config.bzl
+++ b/tensorflow/core/platform/build_config.bzl
@@ -23,6 +23,7 @@ load(
     _tf_lib_proto_parsing_deps = "tf_lib_proto_parsing_deps",
     _tf_logging_deps = "tf_logging_deps",
     _tf_monitoring_deps = "tf_monitoring_deps",
+    _tf_platform_alias = "tf_platform_alias",
     _tf_platform_deps = "tf_platform_deps",
     _tf_proto_library = "tf_proto_library",
     _tf_proto_library_cc = "tf_proto_library_cc",
@@ -60,6 +61,7 @@ tf_legacy_srcs_no_runtime_google = _tf_legacy_srcs_no_runtime_google
 tf_lib_proto_parsing_deps = _tf_lib_proto_parsing_deps
 tf_logging_deps = _tf_logging_deps
 tf_monitoring_deps = _tf_monitoring_deps
+tf_platform_alias = _tf_platform_alias
 tf_platform_deps = _tf_platform_deps
 tf_proto_library = _tf_proto_library
 tf_proto_library_cc = _tf_proto_library_cc
diff --git a/tensorflow/core/platform/cloud/file_block_cache.h b/tensorflow/core/platform/cloud/file_block_cache.h
index d2453016a1c..c354e626958 100644
--- a/tensorflow/core/platform/cloud/file_block_cache.h
+++ b/tensorflow/core/platform/cloud/file_block_cache.h
@@ -33,6 +33,32 @@ limitations under the License.
 
 namespace tensorflow {
 
+class FileBlockCache;
+
+/// FileBlockCacheStatsInterface allows for instrumentation of the block cache.
+///
+/// FileBlockCacheStatsInterface and its subclasses must be safe to use from
+/// multiple threads concurrently.
+///
+/// WARNING! This is an experimental interface that may change or go away at any
+/// time.
+class FileBlockCacheStatsInterface {
+ public:
+  /// Configure is called to provide instrumentation hooks.
+  ///
+  /// Note: Configure can be called multiple times (e.g. if the block cache is
+  /// re-initialized).
+  virtual void Configure(const FileBlockCache* block_cache) = 0;
+
+  /// RecordBlockLoadRequest is called to record the size of a hit block.
+  virtual void RecordCacheHitBlockSize(size_t bytes_transferred) = 0;
+
+  /// RecordBlockLoadRequest is called to record the size of a missed block.
+  virtual void RecordCacheMissBlockSize(size_t bytes_transferred) = 0;
+
+  virtual ~FileBlockCacheStatsInterface() = default;
+};
+
 /// \brief A block cache of file contents, keyed by {filename, offset}.
 ///
 /// This class should be shared by read-only random access files on a remote
@@ -92,6 +118,20 @@ class FileBlockCache {
   // Returns true if the cache is enabled. If false, the BlockFetcher callback
   // is always executed during Read.
   virtual bool IsCacheEnabled() const = 0;
+
+  void SetStats(FileBlockCacheStatsInterface* stats) {
+    if (stats == nullptr) {
+      LOG(ERROR)
+          << "Attempted to monitor a NULL stats object. This may prevent the "
+             "corresponding monitoring data from being exported";
+      return;
+    }
+    cache_stats_ = stats;
+    cache_stats_->Configure(this);
+  }
+
+ protected:
+  FileBlockCacheStatsInterface* cache_stats_ = nullptr;  // Not owned.
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index b6b988047c8..01f049c00e9 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -1745,6 +1745,17 @@ void GcsFileSystem::SetStats(GcsStatsInterface* stats) {
   stats_->Configure(this, &throttle_, file_block_cache_.get());
 }
 
+void GcsFileSystem::SetCacheStats(FileBlockCacheStatsInterface* cache_stats) {
+  tf_shared_lock l(block_cache_lock_);
+  if (file_block_cache_ == nullptr) {
+    LOG(ERROR) << "Tried to set cache stats of non-initialized file block "
+                  "cache object. This may result in not exporting the intended "
+                  "monitoring data";
+    return;
+  }
+  file_block_cache_->SetStats(cache_stats);
+}
+
 void GcsFileSystem::SetAuthProvider(
     std::unique_ptr<AuthProvider> auth_provider) {
   mutex_lock l(mu_);
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index a4d3bcc8f05..dff4720e775 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -166,6 +166,9 @@ class GcsFileSystem : public FileSystem {
   /// Set an object to collect runtime statistics from the GcsFilesystem.
   void SetStats(GcsStatsInterface* stats);
 
+  /// Set an object to collect file block cache stats.
+  void SetCacheStats(FileBlockCacheStatsInterface* cache_stats);
+
   /// These accessors are mainly for testing purposes, to verify that the
   /// environment variables that control these parameters are handled correctly.
   size_t block_size() {
diff --git a/tensorflow/core/platform/cloud/ram_file_block_cache.cc b/tensorflow/core/platform/cloud/ram_file_block_cache.cc
index 573c4478066..cde3bffff00 100644
--- a/tensorflow/core/platform/cloud/ram_file_block_cache.cc
+++ b/tensorflow/core/platform/cloud/ram_file_block_cache.cc
@@ -36,6 +36,9 @@ std::shared_ptr<RamFileBlockCache::Block> RamFileBlockCache::Lookup(
   auto entry = block_map_.find(key);
   if (entry != block_map_.end()) {
     if (BlockNotStale(entry->second)) {
+      if (cache_stats_ != nullptr) {
+        cache_stats_->RecordCacheHitBlockSize(entry->second->data.size());
+      }
       return entry->second;
     } else {
       // Remove the stale block and continue.
@@ -131,6 +134,9 @@ Status RamFileBlockCache::MaybeFetch(const Key& key,
         size_t bytes_transferred;
         status.Update(block_fetcher_(key.first, key.second, block_size_,
                                      block->data.data(), &bytes_transferred));
+        if (cache_stats_ != nullptr) {
+          cache_stats_->RecordCacheMissBlockSize(bytes_transferred);
+        }
         block->mu.lock();  // Reacquire the lock immediately afterwards
         if (status.ok()) {
           block->data.resize(bytes_transferred, 0);
diff --git a/tensorflow/core/platform/cloud/retrying_file_system_test.cc b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
index 1df371a6080..b48831ab238 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
@@ -525,7 +525,6 @@ TEST(RetryingFileSystemTest, DeleteFile_SuccessWith2ndTry) {
   RetryingFileSystem<MockFileSystem> fs(
       std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
-  std::vector<string> result;
   TF_EXPECT_OK(fs.DeleteFile("gs://path/file.txt"));
 }
 
@@ -536,7 +535,6 @@ TEST(RetryingFileSystemTest, DeleteFile_AllRetriesFailed) {
   RetryingFileSystem<MockFileSystem> fs(
       std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
-  std::vector<string> result;
   const auto& status = fs.DeleteFile("gs://path/file.txt");
   EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10"))
       << status;
@@ -551,7 +549,6 @@ TEST(RetryingFileSystemTest, CreateDir_SuccessWith2ndTry) {
   RetryingFileSystem<MockFileSystem> fs(
       std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
-  std::vector<string> result;
   TF_EXPECT_OK(fs.CreateDir("gs://path/newdir"));
 }
 
@@ -562,7 +559,6 @@ TEST(RetryingFileSystemTest, CreateDir_AllRetriesFailed) {
   RetryingFileSystem<MockFileSystem> fs(
       std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
-  std::vector<string> result;
   const auto& status = fs.CreateDir("gs://path/newdir");
   EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10"))
       << status;
@@ -577,7 +573,6 @@ TEST(RetryingFileSystemTest, DeleteDir_SuccessWith2ndTry) {
   RetryingFileSystem<MockFileSystem> fs(
       std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
-  std::vector<string> result;
   TF_EXPECT_OK(fs.DeleteDir("gs://path/dir"));
 }
 
@@ -588,7 +583,6 @@ TEST(RetryingFileSystemTest, DeleteDir_AllRetriesFailed) {
   RetryingFileSystem<MockFileSystem> fs(
       std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
-  std::vector<string> result;
   const auto& status = fs.DeleteDir("gs://path/dir");
   EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10"))
       << status;
diff --git a/tensorflow/core/platform/coding.cc b/tensorflow/core/platform/coding.cc
index ef0df8fa42a..6629ec03321 100644
--- a/tensorflow/core/platform/coding.cc
+++ b/tensorflow/core/platform/coding.cc
@@ -107,6 +107,14 @@ void PutVarint32(string* dst, uint32 v) {
   dst->append(buf, ptr - buf);
 }
 
+#ifdef USE_TSTRING
+void PutVarint32(tstring* dst, uint32 v) {
+  char buf[5];
+  char* ptr = EncodeVarint32(buf, v);
+  dst->append(buf, ptr - buf);
+}
+#endif
+
 char* EncodeVarint64(char* dst, uint64 v) {
   static const int B = 128;
   unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
@@ -124,6 +132,14 @@ void PutVarint64(string* dst, uint64 v) {
   dst->append(buf, ptr - buf);
 }
 
+#ifdef USE_TSTRING
+void PutVarint64(tstring* dst, uint64 v) {
+  char buf[10];
+  char* ptr = EncodeVarint64(buf, v);
+  dst->append(buf, ptr - buf);
+}
+#endif
+
 int VarintLength(uint64_t v) {
   int len = 1;
   while (v >= 128) {
diff --git a/tensorflow/core/platform/coding.h b/tensorflow/core/platform/coding.h
index cd66e54dfdb..e9c6b592ab0 100644
--- a/tensorflow/core/platform/coding.h
+++ b/tensorflow/core/platform/coding.h
@@ -46,6 +46,9 @@ extern void PutFixed64(string* dst, uint64 value);
 extern void PutVarint32(string* dst, uint32 value);
 extern void PutVarint64(string* dst, uint64 value);
 
+extern void PutVarint32(tstring* dst, uint32 value);
+extern void PutVarint64(tstring* dst, uint64 value);
+
 extern bool GetVarint32(StringPiece* input, uint32* value);
 extern bool GetVarint64(StringPiece* input, uint64* value);
 
diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD
index 04893ec9243..491f84536cf 100644
--- a/tensorflow/core/platform/default/BUILD
+++ b/tensorflow/core/platform/default/BUILD
@@ -463,6 +463,11 @@ bzl_library(
     visibility = ["//tensorflow:__subpackages__"],
 )
 
+filegroup(
+    name = "legacy_mobile_srcs",
+    visibility = ["//tensorflow/core/platform:__pkg__"],
+)
+
 package_group(
     name = "core_and_platform_packages",
     packages = [
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 3c50725aa9e..28763305157 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -480,9 +480,6 @@ def tf_proto_library_py(
 def tf_jspb_proto_library(**kwargs):
     pass
 
-def tf_nano_proto_library(**kwargs):
-    pass
-
 def tf_proto_library(
         name,
         srcs = [],
@@ -535,23 +532,6 @@ def tf_proto_library(
         visibility = visibility,
     )
 
-# A list of all files under platform matching the pattern in 'files'. In
-# contrast with 'tf_platform_srcs' below, which seletive collects files that
-# must be compiled in the 'default' platform, this is a list of all headers
-# mentioned in the platform/* files.
-def tf_platform_hdrs(files):
-    return native.glob(["*/" + f for f in files])
-
-def tf_platform_srcs(files):
-    base_set = ["default/" + f for f in files]
-    windows_set = base_set + ["windows/" + f for f in files]
-    posix_set = base_set + ["posix/" + f for f in files]
-
-    return select({
-        clean_dep("//tensorflow:windows"): native.glob(windows_set),
-        "//conditions:default": native.glob(posix_set),
-    })
-
 def tf_additional_lib_hdrs():
     return [
         "//tensorflow/core/platform/default:context.h",
@@ -753,7 +733,10 @@ def tf_windows_aware_platform_deps(name):
         ],
     })
 
-def tf_platform_deps(name):
+def tf_platform_deps(name, platform_dir = "//tensorflow/core/platform/"):
+    return [platform_dir + "default:" + name]
+
+def tf_platform_alias(name):
     return ["//tensorflow/core/platform/default:" + name]
 
 def tf_logging_deps():
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index 23540150b69..7545bc5b2c0 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -1,21 +1,17 @@
 # Description:
 # Platform-specific build configurations.
 
+load("//tensorflow:tensorflow.bzl", "tf_copts", "tf_cuda_library")
+load("//tensorflow/core/platform:build_config_root.bzl", "if_static")
+load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
+load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp")
+
 package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("//tensorflow:tensorflow.bzl", "check_deps")
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
-load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm")
-load("//tensorflow:tensorflow.bzl", "tf_copts")
-load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
-load("//tensorflow/core/platform:build_config_root.bzl", "if_static")
-load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
-load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp")
-
 cc_library(
     name = "gtest",
     testonly = 1,
@@ -133,17 +129,10 @@ cc_library(
     name = "proto_parsing",
     copts = tf_copts(),
     deps = [
-        "//tensorflow/core:protos_cc",
+        "//tensorflow/core:protos_all_cc",
     ],
 )
 
-# Minimal lib so that tools used for mobile compilation
-# don't have to depend on platformlib.
-cc_library(
-    name = "logging",
-    copts = tf_copts(),
-)
-
 # Minimal lib to be used by tensorflow/core:framework_lite.
 # This provides minimal support for writing operator implementations (kernels),
 # and excludes anything that can bloat binary size if used.
@@ -153,49 +142,13 @@ cc_library(
     copts = tf_copts(),
 )
 
-cc_library(
-    name = "base",
-    srcs = [],
-    copts = tf_copts(),
-)
-
-cc_library(
-    name = "port",
-    srcs = [],
-    copts = tf_copts(),
-)
-
-cc_library(
-    name = "protobuf",
-    srcs = [],
-    copts = tf_copts(),
-)
-
-cc_library(
-    name = "env",
-    srcs = [],
-    copts = tf_copts(),
-)
-
-cc_library(
-    name = "other",
-    srcs = [],
-    copts = tf_copts(),
-    deps = [
-        "@com_googlesource_code_re2//:re2",
-        "@farmhash_archive//:farmhash",
-        "@fft2d",
-        "@highwayhash//:sip_hash",
-    ],
-)
-
 cc_library(
     name = "platformlib",
     copts = tf_copts(),
     deps = [
         ":gif",
         ":jpeg",
-        "//tensorflow/core:protos_cc",
+        "//tensorflow/core:protos_all_cc",
         "@com_googlesource_code_re2//:re2",
         "@farmhash_archive//:farmhash",
         "@fft2d",
@@ -204,11 +157,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "stacktrace",
-    srcs = [],
-)
-
 cc_library(
     name = "gif",
     copts = tf_copts(),
@@ -234,29 +182,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "protos_cc_impl",
-    copts = tf_copts(),
-    deps = [
-        "//tensorflow/core:protos_all_cc_impl",
-    ],
-)
-
-cc_library(
-    name = "protos_cc",
-    copts = tf_copts(),
-    deps = [
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-cc_library(
-    name = "test_lite_main",
-    testonly = 1,
-    linkstatic = 1,
-    deps = [],
-)
-
 cc_library(
     name = "test_main",
     testonly = 1,
@@ -264,12 +189,6 @@ cc_library(
     deps = [],
 )
 
-filegroup(
-    name = "android_proto_lib_portable_proto",
-    srcs = [],
-    visibility = ["//visibility:public"],
-)
-
 cc_library(
     name = "cuda",
     data = [
@@ -314,15 +233,3 @@ cc_library(
         ["@local_config_sycl//sycl:sycl_headers"],
     ),
 )
-
-filegroup(
-    name = "mobile_srcs",
-    srcs = glob(["*.h"]),
-    visibility = ["//visibility:public"],
-)
-
-alias(
-    name = "android_srcs",
-    actual = ":mobile_srcs",
-    visibility = ["//visibility:public"],
-)
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index c74ccdc506a..ae05e1f28ac 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -35,10 +35,7 @@ def tf_additional_grpc_deps_py():
     return []
 
 def tf_additional_license_deps():
-    return select({
-        str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"],
-        "//conditions:default": [],
-    })
+    return []
 
 # Include specific extra dependencies when building statically, or
 # another set of dependencies otherwise. If "macos" is provided, that
diff --git a/tensorflow/core/platform/default/strong_hash.h b/tensorflow/core/platform/default/strong_hash.h
index d20ef7065df..8c25bf6c79b 100644
--- a/tensorflow/core/platform/default/strong_hash.h
+++ b/tensorflow/core/platform/default/strong_hash.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_STRONG_HASH_H_
 #define TENSORFLOW_CORE_PLATFORM_DEFAULT_STRONG_HASH_H_
 
-#include "highwayhash/sip_hash.h"
-#include "highwayhash/state_helpers.h"
+#include "highwayhash/sip_hash.h"  // TF:highwayhash
+#include "highwayhash/state_helpers.h"  // TF:highwayhash
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index ee4ae92f905..eedfa2ee48f 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -322,9 +322,9 @@ string Env::GetExecutablePath() {
 #ifdef __APPLE__
   uint32_t buffer_size(0U);
   _NSGetExecutablePath(nullptr, &buffer_size);
-  char unresolved_path[buffer_size];
-  _NSGetExecutablePath(unresolved_path, &buffer_size);
-  CHECK(realpath(unresolved_path, exe_path));
+  std::vector<char> unresolved_path(buffer_size);
+  _NSGetExecutablePath(unresolved_path.data(), &buffer_size);
+  CHECK(realpath(unresolved_path.data(), exe_path));
 #elif defined(__FreeBSD__)
   int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
   size_t exe_path_size = PATH_MAX;
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index 34dc1cf305b..091cb60e5c0 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -135,6 +135,25 @@ const LibHDFS* libhdfs() {
   return libhdfs;
 }
 
+Status SplitArchiveNameAndPath(StringPiece& path, string& nn) {
+  size_t index_end_archive_name = path.find(".har");
+  if (index_end_archive_name == path.npos) {
+    return errors::InvalidArgument(
+        "Hadoop archive path does not contain a .har extension");
+  }
+  // Case of hadoop archive. Namenode is the path to the archive.
+  std::ostringstream namenodestream;
+  namenodestream << "har://" << nn
+                 << path.substr(0, index_end_archive_name + 4);
+  nn = namenodestream.str();
+  path.remove_prefix(index_end_archive_name + 4);
+  if (path.empty()) {
+    // Root of the archive
+    path = "/";
+  }
+  return Status::OK();
+}
+
 // We rely on HDFS connection caching here. The HDFS client calls
 // org.apache.hadoop.fs.FileSystem.get(), which caches the connection
 // internally.
@@ -143,7 +162,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
 
   StringPiece scheme, namenode, path;
   io::ParseURI(fname, &scheme, &namenode, &path);
-  const string nn(namenode);
+  string nn(namenode);
 
   hdfsBuilder* builder = libhdfs()->hdfsNewBuilder();
   if (scheme == "file") {
@@ -163,6 +182,9 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
     // configuration files). See:
     // https://github.com/tensorflow/tensorflow/blob/v1.0.0/third_party/hadoop/hdfs.h#L259
     libhdfs()->hdfsBuilderSetNameNode(builder, "default");
+  } else if (scheme == "har") {
+    TF_RETURN_IF_ERROR(SplitArchiveNameAndPath(path, nn));
+    libhdfs()->hdfsBuilderSetNameNode(builder, nn.c_str());
   } else {
     libhdfs()->hdfsBuilderSetNameNode(builder,
                                       nn.empty() ? "default" : nn.c_str());
@@ -517,5 +539,6 @@ Status HadoopFileSystem::Stat(const string& fname, FileStatistics* stats) {
 
 REGISTER_FILE_SYSTEM("hdfs", HadoopFileSystem);
 REGISTER_FILE_SYSTEM("viewfs", HadoopFileSystem);
+REGISTER_FILE_SYSTEM("har", HadoopFileSystem);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.h b/tensorflow/core/platform/hadoop/hadoop_file_system.h
index 11812c2fd42..f9f2c25e2ea 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.h
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.h
@@ -70,6 +70,8 @@ class HadoopFileSystem : public FileSystem {
   Status Connect(StringPiece fname, hdfsFS* fs);
 };
 
+Status SplitArchiveNameAndPath(StringPiece& path, string& nn);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
index 3104addc4e0..71cf0542d3c 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
@@ -235,6 +235,44 @@ TEST_F(HadoopFileSystemTest, WriteWhileReading) {
   TF_EXPECT_OK(writer->Close());
 }
 
+TEST_F(HadoopFileSystemTest, HarSplit) {
+  string har_path =
+      "har://hdfs-root/user/j.doe/my_archive.har/dir0/dir1/file.txt";
+  StringPiece scheme, namenode, path;
+  io::ParseURI(har_path, &scheme, &namenode, &path);
+  EXPECT_EQ("har", scheme);
+  EXPECT_EQ("hdfs-root", namenode);
+  EXPECT_EQ("/user/j.doe/my_archive.har/dir0/dir1/file.txt", path);
+  string nn(namenode);
+  TF_EXPECT_OK(SplitArchiveNameAndPath(path, nn));
+  EXPECT_EQ("har://hdfs-root/user/j.doe/my_archive.har", nn);
+  EXPECT_EQ("/dir0/dir1/file.txt", path);
+}
+
+TEST_F(HadoopFileSystemTest, NoHarExtension) {
+  string har_path = "har://hdfs-root/user/j.doe/my_archive/dir0/dir1/file.txt";
+  StringPiece scheme, namenode, path;
+  io::ParseURI(har_path, &scheme, &namenode, &path);
+  EXPECT_EQ("har", scheme);
+  EXPECT_EQ("hdfs-root", namenode);
+  EXPECT_EQ("/user/j.doe/my_archive/dir0/dir1/file.txt", path);
+  string nn(namenode);
+  EXPECT_EQ(errors::InvalidArgument("").code(),
+            SplitArchiveNameAndPath(path, nn).code());
+}
+
+TEST_F(HadoopFileSystemTest, HarRootPath) {
+  string har_path = "har://hdfs-root/user/j.doe/my_archive.har";
+  StringPiece scheme, namenode, path;
+  io::ParseURI(har_path, &scheme, &namenode, &path);
+  EXPECT_EQ("har", scheme);
+  EXPECT_EQ("hdfs-root", namenode);
+  EXPECT_EQ("/user/j.doe/my_archive.har", path);
+  string nn(namenode);
+  TF_EXPECT_OK(SplitArchiveNameAndPath(path, nn));
+  EXPECT_EQ("har://hdfs-root/user/j.doe/my_archive.har", nn);
+  EXPECT_EQ("/", path);
+}
 // NewAppendableFile() is not testable. Local filesystem maps to
 // ChecksumFileSystem in Hadoop, where appending is an unsupported operation.
 
diff --git a/tensorflow/core/platform/hash.h b/tensorflow/core/platform/hash.h
index 3a9de99f2bc..0ca18b88ec5 100644
--- a/tensorflow/core/platform/hash.h
+++ b/tensorflow/core/platform/hash.h
@@ -93,6 +93,15 @@ struct hash<string> {
   }
 };
 
+#ifdef USE_TSTRING
+template <>
+struct hash<tstring> {
+  size_t operator()(const tstring& s) const {
+    return static_cast<size_t>(Hash64(s.data(), s.size()));
+  }
+};
+#endif  // USE_TSTRING
+
 template <>
 struct hash<StringPiece> {
   size_t operator()(StringPiece sp) const {
@@ -110,4 +119,15 @@ struct hash<std::pair<T, U>> {
 
 }  // namespace tensorflow
 
+#ifdef USE_TSTRING
+namespace std {
+template <>
+struct hash<tensorflow::tstring> {
+  size_t operator()(const tensorflow::tstring& s) const {
+    return static_cast<size_t>(tensorflow::Hash64(s.data(), s.size()));
+  }
+};
+}  // namespace std
+#endif  // USE_TSTRING
+
 #endif  // TENSORFLOW_CORE_PLATFORM_HASH_H_
diff --git a/tensorflow/core/platform/logging.h b/tensorflow/core/platform/logging.h
index 1ebc93f0084..c3a998d02d5 100644
--- a/tensorflow/core/platform/logging.h
+++ b/tensorflow/core/platform/logging.h
@@ -22,7 +22,7 @@ limitations under the License.
 #if defined(PLATFORM_GOOGLE) || defined(PLATFORM_GOOGLE_ANDROID) || \
     defined(PLATFORM_GOOGLE_IOS) || defined(GOOGLE_LOGGING) ||      \
     defined(__EMSCRIPTEN__)
-#include "tensorflow/core/platform/google/build_config/logging.h"
+#include "tensorflow/core/platform/google/logging.h"
 #else
 #include "tensorflow/core/platform/default/logging.h"
 #endif
diff --git a/tensorflow/core/platform/protobuf.h b/tensorflow/core/platform/protobuf.h
index c4856321ce0..eb8ff77592d 100644
--- a/tensorflow/core/platform/protobuf.h
+++ b/tensorflow/core/platform/protobuf.h
@@ -58,6 +58,12 @@ bool ParseProtoUnlimited(protobuf::MessageLite* proto,
                          const string& serialized);
 bool ParseProtoUnlimited(protobuf::MessageLite* proto, const void* serialized,
                          size_t size);
+#ifdef USE_TSTRING
+inline bool ParseProtoUnlimited(protobuf::MessageLite* proto,
+                                const tstring& serialized) {
+  return ParseProtoUnlimited(proto, serialized.data(), serialized.size());
+}
+#endif  // USE_TSTRING
 
 // Returns the string value for the value of a string or bytes protobuf field.
 inline const string& ProtobufStringToString(const string& s) { return s; }
diff --git a/tensorflow/core/platform/s3/s3_file_system_test.cc b/tensorflow/core/platform/s3/s3_file_system_test.cc
index 98778495f47..102c82d8ee8 100644
--- a/tensorflow/core/platform/s3/s3_file_system_test.cc
+++ b/tensorflow/core/platform/s3/s3_file_system_test.cc
@@ -54,8 +54,7 @@ class S3FileSystemTest : public ::testing::Test {
 
     content->resize(file_size);
     StringPiece result;
-    TF_RETURN_IF_ERROR(
-        reader->Read(0, file_size, &result, gtl::string_as_array(content)));
+    TF_RETURN_IF_ERROR(reader->Read(0, file_size, &result, &(*content)[0]));
     if (file_size != result.size()) {
       return errors::DataLoss("expected ", file_size, " got ", result.size(),
                               " bytes");
@@ -78,14 +77,13 @@ TEST_F(S3FileSystemTest, NewRandomAccessFile) {
   string got;
   got.resize(content.size());
   StringPiece result;
-  TF_EXPECT_OK(
-      reader->Read(0, content.size(), &result, gtl::string_as_array(&got)));
+  TF_EXPECT_OK(reader->Read(0, content.size(), &result, &got[0]));
   EXPECT_EQ(content.size(), result.size());
   EXPECT_EQ(content, result);
 
   got.clear();
   got.resize(4);
-  TF_EXPECT_OK(reader->Read(2, 4, &result, gtl::string_as_array(&got)));
+  TF_EXPECT_OK(reader->Read(2, 4, &result, &got[0]));
   EXPECT_EQ(4, result.size());
   EXPECT_EQ(content.substr(2, 4), result);
 }
diff --git a/tensorflow/core/platform/tstring.h b/tensorflow/core/platform/tstring.h
index 68f19d3f767..c0043b21ad4 100644
--- a/tensorflow/core/platform/tstring.h
+++ b/tensorflow/core/platform/tstring.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_TSTRING_H_
 #define TENSORFLOW_CORE_PLATFORM_TSTRING_H_
 
+#include <ostream>
 #include <string>
 
 // TODO(b/138799229): Used to toggle until global presubmits pass.
@@ -160,12 +161,14 @@ class tstring {
     return T(str_.data(), str_.size());
   }
 
+#ifdef PLATFORM_GOOGLE
   template <typename T,
             typename std::enable_if<std::is_same<T, absl::AlphaNum>::value,
                                     T>::type* = nullptr>
   operator T() const {
     return T(str_);
   }
+#endif  // PLATFORM_GOOGLE
 
   bool empty() const { return str_.empty(); }
 
@@ -287,16 +290,6 @@ inline std::ostream& operator<<(std::ostream& o, const tstring& str) {
 
 }  // namespace tensorflow
 
-namespace std {
-template <>
-struct hash<tensorflow::tstring> {
-  size_t operator()(const tensorflow::tstring& o) const {
-    std::hash<std::string> fn;
-    return fn(o.str_);
-  }
-};
-}  // namespace std
-
 #else  // USE_TSTRING
 
 namespace tensorflow {
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index d7a9ac56943..a75f10822d7 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -490,6 +490,15 @@ Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size) {
   return result;
 }
 
+Status WindowsFileSystem::IsDirectory(const string& fname) {
+  TF_RETURN_IF_ERROR(FileExists(fname));
+  std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname));
+  if (PathIsDirectoryW(ws_translated_fname.c_str())) {
+    return Status::OK();
+  }
+  return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory");
+}
+
 Status WindowsFileSystem::RenameFile(const string& src, const string& target) {
   Status result;
   // rename() is not capable of replacing the existing file as on Linux
@@ -531,7 +540,7 @@ Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) {
   } else {
     stat->mtime_nsec = sbuf.st_mtime * 1e9;
     stat->length = sbuf.st_size;
-    stat->is_directory = PathIsDirectoryW(ws_translated_fname.c_str());
+    stat->is_directory = IsDirectory(fname).ok();
   }
   return result;
 }
diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h
index 2e0de725762..8c2b000fef9 100644
--- a/tensorflow/core/platform/windows/windows_file_system.h
+++ b/tensorflow/core/platform/windows/windows_file_system.h
@@ -62,6 +62,8 @@ class WindowsFileSystem : public FileSystem {
 
   Status GetFileSize(const string& fname, uint64* size) override;
 
+  Status IsDirectory(const string& fname) override;
+
   Status RenameFile(const string& src, const string& target) override;
 
   string TranslateName(const string& name) const override { return name; }
diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD
index 4fbeef18058..044df45121f 100644
--- a/tensorflow/core/profiler/convert/BUILD
+++ b/tensorflow/core/profiler/convert/BUILD
@@ -3,18 +3,48 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
+cc_library(
+    name = "host_threads_xplane_to_tf_metrics_db",
+    srcs = ["host_threads_xplane_to_tf_metrics_db.cc"],
+    hdrs = ["host_threads_xplane_to_tf_metrics_db.h"],
+    deps = [
+        ":op_metrics_db_combiner",
+        ":op_stack",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:event_span",
+        "//tensorflow/core/profiler/utils:op_utils",
+        "//tensorflow/core/profiler/utils:tf_op_utils",
+        "//tensorflow/core/profiler/utils:timespan",
+        "//tensorflow/core/profiler/utils:xplane_visitor",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
+    ],
+)
+
 cc_library(
     name = "run_metadata_to_trace_events",
     srcs = ["run_metadata_to_trace_events.cc"],
     hdrs = ["run_metadata_to_trace_events.h"],
     deps = [
-        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
     ],
 )
 
+cc_library(
+    name = "op_metrics_db_combiner",
+    srcs = ["op_metrics_db_combiner.cc"],
+    hdrs = ["op_metrics_db_combiner.h"],
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "//tensorflow/core/profiler/utils:op_metrics_db_utils",
+    ],
+)
+
 cc_library(
     name = "op_metrics_to_record",
     srcs = ["op_metrics_to_record.cc"],
@@ -35,12 +65,50 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "xplane_to_op_stats",
+    srcs = ["xplane_to_op_stats.cc"],
+    hdrs = ["xplane_to_op_stats.h"],
+    deps = [
+        ":host_threads_xplane_to_tf_metrics_db",
+        "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_utils",
+    ],
+)
+
+cc_library(
+    name = "op_stats_to_input_pipeline_analysis",
+    srcs = ["op_stats_to_input_pipeline_analysis.cc"],
+    hdrs = ["op_stats_to_input_pipeline_analysis.h"],
+    deps = [
+        ":op_metrics_to_record",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
+        "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc",
+        "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
+        "//tensorflow/core/profiler/utils:event_span",
+        "//tensorflow/core/profiler/utils:math_utils",
+        "//tensorflow/core/profiler/utils:tf_op_utils",
+        "//tensorflow/core/profiler/utils:time_utils",
+        "//tensorflow/core/util:stats_calculator_portable",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 cc_library(
     name = "op_stats_to_tf_stats",
     srcs = ["op_stats_to_tf_stats.cc"],
     hdrs = ["op_stats_to_tf_stats.h"],
     deps = [
-        "//tensorflow/core/profiler/convert:op_metrics_to_record",
+        ":op_metrics_to_record",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
         "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
         "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
diff --git a/tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.cc b/tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.cc
new file mode 100644
index 00000000000..0fad13b9812
--- /dev/null
+++ b/tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.cc
@@ -0,0 +1,191 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.h"
+
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/profiler/convert/op_stack.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/op_utils.h"
+#include "tensorflow/core/profiler/utils/timespan.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+// Type of a TensorFlow Op activity, which is either beginning or ending an Op.
+enum TfActivityType { kTfOpBegin, kTfOpEnd };
+
+// Instant activity representing the begin or end of a host-side TF Op.
+struct TfActivity {
+  // The timestamp in picoseconds when this activity happened.
+  uint64 timestamp_ps;
+  // The ID of this Op.
+  uint32 tf_op_id;
+  // Type of this activity.
+  TfActivityType activity_type;
+  // Full TF op name and type of this activity (backed by XEvent::name).
+  TfOp tf_op;
+};
+
+// TF Op metrics stored as element in OpStack.
+struct TfOpInfo {
+  explicit TfOpInfo(uint64 ts) : start_timestamp_ps(ts) {}
+
+  // Start timestamp in picoseconds.
+  uint64 start_timestamp_ps;
+  // Children duration in picoseconds.
+  uint64 children_duration_ps = 0;
+};
+
+// Processes a TF-activity on particular core.
+void ProcessOneTfActivity(const TfActivity& activity,
+                          OpStack<TfOpInfo>* tf_op_stack,
+                          TfMetricsDbData* tf_metrics_data) {
+  uint32 tf_op_id = activity.tf_op_id;
+  switch (activity.activity_type) {
+    case kTfOpBegin: {
+      tf_op_stack->Push(tf_op_id,
+                        absl::make_unique<TfOpInfo>(activity.timestamp_ps));
+      break;
+    }
+    case kTfOpEnd: {
+      std::unique_ptr<TfOpInfo> info = tf_op_stack->Pop(tf_op_id);
+      if (info == nullptr) {
+        // This happens if TraceMes overlap.
+        VLOG(1) << "No begin event found for TF activity id=" << tf_op_id
+                << " name=" << activity.tf_op.name
+                << " type=" << activity.tf_op.type;
+        break;
+      }
+      Timespan tf_op_span =
+          PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
+      tf_metrics_data->tf_metrics_db_builder.EnterOp(
+          activity.tf_op.name, activity.tf_op.type, tf_op_span.duration_ps(),
+          info->children_duration_ps);
+      TfOpInfo* parent_info = tf_op_stack->Top();
+      if (parent_info != nullptr) {
+        parent_info->children_duration_ps += tf_op_span.duration_ps();
+      }
+      if (IsInfeedEnqueueOp(activity.tf_op.type)) {
+        if (tf_metrics_data->last_infeed_enq_duration_ps > 0) {
+          DCHECK(tf_metrics_data->last_infeed_enq_start_timestamp_ps <=
+                 info->start_timestamp_ps);
+          uint64 start_timestamps_ps_diff =
+              info->start_timestamp_ps -
+              tf_metrics_data->last_infeed_enq_start_timestamp_ps;
+          tf_metrics_data->tf_metrics_db_builder.UpdateHostInfeedEnqInfo(
+              tf_metrics_data->last_infeed_enq_duration_ps,
+              start_timestamps_ps_diff);
+        }
+        tf_metrics_data->last_infeed_enq_start_timestamp_ps =
+            info->start_timestamp_ps;
+        tf_metrics_data->last_infeed_enq_duration_ps = tf_op_span.duration_ps();
+      }
+      break;
+    }
+  }
+}
+
+// Processes all TF-activities on the given core.
+void ProcessTfActivities(std::vector<TfActivity>* tf_activities,
+                         TfMetricsDbData* tf_metrics_db_data) {
+  if (tf_activities->empty()) return;
+  absl::c_stable_sort(*tf_activities,
+                      [](const TfActivity& a, const TfActivity& b) {
+                        return a.timestamp_ps < b.timestamp_ps;
+                      });
+  OpStack<TfOpInfo> tf_op_stack;
+  for (const auto& tf_activity : *tf_activities) {
+    ProcessOneTfActivity(tf_activity, &tf_op_stack, tf_metrics_db_data);
+  }
+  tf_metrics_db_data->tf_metrics_db.set_total_time_ps(
+      tf_activities->back().timestamp_ps - tf_activities->front().timestamp_ps);
+}
+
+void CollectTfActivities(const XLineVisitor& line,
+                         const absl::flat_hash_map<int64, TfOp>& tf_ops,
+                         std::vector<TfActivity>* tf_activities) {
+  uint32 tf_op_id = 0;
+  tf_activities->reserve(line.NumEvents() * 2);
+  line.ForEachEvent([&tf_ops, &tf_op_id,
+                     &tf_activities](const XEventVisitor& event) {
+    const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
+    if (tf_op != nullptr) {
+      ++tf_op_id;
+      Timespan span(event.TimestampPs(), event.DurationPs());
+      tf_activities->push_back({span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op});
+      tf_activities->push_back({span.end_ps(), tf_op_id, kTfOpEnd, *tf_op});
+    }
+  });
+}
+
+}  // namespace
+
+absl::flat_hash_map<int64, TfOp> CollectTfOpsFromHostThreadsXPlane(
+    const XPlane& host_trace) {
+  absl::flat_hash_map<int64, TfOp> tf_ops;
+  for (const auto& id_metadata : host_trace.event_metadata()) {
+    const XEventMetadata& metadata = id_metadata.second;
+    // On the host, we have added some user-specified TraceMe's in addition to
+    // the TraceMe's added to every TensorFlow op by the system. These
+    // user-inserted TraceMe's have "unknown" type. We don't count them in
+    // Tf-stats.
+    TfOp tf_op = ParseTfOpFullname(metadata.name());
+    if (!IsUnknownOp(tf_op.type)) {
+      tf_ops.try_emplace(metadata.id(), tf_op);
+    }
+  }
+  return tf_ops;
+}
+
+TfMetricsDbData ConvertHostThreadsXLineToTfMetricsDbData(
+    const XLineVisitor& line, const absl::flat_hash_map<int64, TfOp>& tf_ops) {
+  TfMetricsDbData tf_metrics_db_data;
+  if (!tf_ops.empty()) {
+    std::vector<TfActivity> tf_activities;
+    CollectTfActivities(line, tf_ops, &tf_activities);
+    ProcessTfActivities(&tf_activities, &tf_metrics_db_data);
+  }
+  return tf_metrics_db_data;
+}
+
+void ConsumeTfMetricsDbData(TfMetricsDbData src, OpMetricsDbCombiner* dst) {
+  AddIdleOp(&src.tf_metrics_db);
+  dst->Combine(src.tf_metrics_db);
+  src.tf_metrics_db.Clear();
+}
+
+OpMetricsDb ConvertHostThreadsXPlaneToTfMetricsDb(const XPlane& host_trace) {
+  absl::flat_hash_map<int64, TfOp> tf_ops =
+      CollectTfOpsFromHostThreadsXPlane(host_trace);
+  OpMetricsDb result;
+  OpMetricsDbCombiner combiner(&result);
+  XPlaneVisitor plane(&host_trace);
+  plane.ForEachLine([&tf_ops, &combiner](const XLineVisitor& line) {
+    ConsumeTfMetricsDbData(
+        ConvertHostThreadsXLineToTfMetricsDbData(line, tf_ops), &combiner);
+  });
+  return result;
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.h b/tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.h
new file mode 100644
index 00000000000..c8c6e10c2ef
--- /dev/null
+++ b/tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.h
@@ -0,0 +1,57 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_HOST_THREADS_XPLANE_TO_TF_METRICS_DB_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_HOST_THREADS_XPLANE_TO_TF_METRICS_DB_H_
+
+#include "absl/container/flat_hash_map.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/event_span.h"
+#include "tensorflow/core/profiler/utils/op_utils.h"
+#include "tensorflow/core/profiler/utils/tf_op_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
+
+namespace tensorflow {
+namespace profiler {
+
+// Data per host thread for TensorFlow Op Metrics Database.
+struct TfMetricsDbData {
+  // The start timestamp in ps of the last infeed enqueue op on this core.
+  uint64 last_infeed_enq_start_timestamp_ps = 0;
+  // The duration in ps of the last infeed enqueue op on this core.
+  uint64 last_infeed_enq_duration_ps = 0;
+
+  // A database of TF-Op metrics for this core.
+  OpMetricsDb tf_metrics_db;
+  HostOpMetricsDbBuilder tf_metrics_db_builder{&tf_metrics_db};
+};
+
+absl::flat_hash_map<int64, TfOp> CollectTfOpsFromHostThreadsXPlane(
+    const XPlane& host_trace);
+
+TfMetricsDbData ConvertHostThreadsXLineToTfMetricsDbData(
+    const XLineVisitor& line, const absl::flat_hash_map<int64, TfOp>& tf_ops);
+
+void ConsumeTfMetricsDbData(TfMetricsDbData src, OpMetricsDbCombiner* dst);
+
+OpMetricsDb ConvertHostThreadsXPlaneToTfMetricsDb(const XPlane& host_trace);
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_CONVERT_HOST_THREADS_XPLANE_TO_TF_METRICS_DB_H_
diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc
new file mode 100644
index 00000000000..e9c48f452c7
--- /dev/null
+++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc
@@ -0,0 +1,67 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
+
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+// Combines the src OpMetrics into the dst OpMetrics.
+void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst) {
+  DCHECK(dst != nullptr);
+  DCHECK_EQ(src.hlo_module_id(), dst->hlo_module_id());
+  DCHECK_EQ(src.name(), dst->name());
+  dst->set_category(src.category());
+  dst->set_provenance(src.provenance());
+  dst->set_deduplicated_name(src.deduplicated_name());
+  if (!dst->has_layout() && src.has_layout()) {
+    *dst->mutable_layout() = src.layout();
+  }
+  if (!dst->has_children() && src.has_children()) {
+    *dst->mutable_children() = src.children();
+  }
+  dst->set_occurrences(src.occurrences() + dst->occurrences());
+  dst->set_time_ps(src.time_ps() + dst->time_ps());
+  dst->set_self_time_ps(src.self_time_ps() + dst->self_time_ps());
+  dst->set_flops(src.flops() + dst->flops());
+  dst->set_bytes_accessed(src.bytes_accessed() + dst->bytes_accessed());
+  dst->set_dma_stall_ps(src.dma_stall_ps() + dst->dma_stall_ps());
+}
+
+}  // namespace
+
+void OpMetricsDbCombiner::Combine(const OpMetricsDb& src) {
+  OpMetricsDb* dst = db();
+  dst->set_total_host_infeed_enq_duration_ps(
+      src.total_host_infeed_enq_duration_ps() +
+      dst->total_host_infeed_enq_duration_ps());
+  dst->set_total_host_infeed_enq_start_timestamp_ps_diff(
+      src.total_host_infeed_enq_start_timestamp_ps_diff() +
+      dst->total_host_infeed_enq_start_timestamp_ps_diff());
+  dst->set_total_time_ps(src.total_time_ps() + dst->total_time_ps());
+  dst->set_total_op_time_ps(src.total_op_time_ps() + dst->total_op_time_ps());
+
+  for (const auto& src_metrics : src.metrics_db()) {
+    auto* dst_metrics = LookupOrInsertNewOpMetrics(src_metrics.hlo_module_id(),
+                                                   src_metrics.name());
+    CombineOpMetrics(src_metrics, dst_metrics);
+  }
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.h b/tensorflow/core/profiler/convert/op_metrics_db_combiner.h
new file mode 100644
index 00000000000..af3c78b98ff
--- /dev/null
+++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.h
@@ -0,0 +1,36 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_METRICS_DB_COMBINER_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_OP_METRICS_DB_COMBINER_H_
+
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
+
+namespace tensorflow {
+namespace profiler {
+
+// Helper to combine op metrics databases.
+class OpMetricsDbCombiner : public OpMetricsDbBuilder {
+ public:
+  explicit OpMetricsDbCombiner(OpMetricsDb* dst) : OpMetricsDbBuilder(dst) {}
+
+  void Combine(const OpMetricsDb& src);
+};
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_CONVERT_OP_METRICS_DB_COMBINER_H_
diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc
new file mode 100644
index 00000000000..965cab109c4
--- /dev/null
+++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc
@@ -0,0 +1,402 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "google/protobuf/any.pb.h"
+#include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
+#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
+#include "tensorflow/core/profiler/utils/event_span.h"
+#include "tensorflow/core/profiler/utils/math_utils.h"
+#include "tensorflow/core/profiler/utils/tf_op_utils.h"
+#include "tensorflow/core/profiler/utils/time_utils.h"
+#include "tensorflow/core/util/stats_calculator.h"
+
+namespace tensorflow {
+namespace profiler {
+
+namespace {
+
+const double kNumPsPerMs = 1000000000.0;
+
+template <class Collection>
+double GetTimeInMs(const Collection& type_ps,
+                   EventType event_type) {
+  return PicosToMillis(gtl::FindWithDefault(type_ps, event_type, /*value=*/0));
+}
+
+StepSummary GetStepSummaryForSampleStats(const Stat<double>& sample_stats) {
+  StepSummary step_time_summary;
+  step_time_summary.set_average(sample_stats.avg());
+  step_time_summary.set_standard_deviation(
+      std::sqrt(sample_stats.sample_variance()));
+  step_time_summary.set_minimum(sample_stats.min());
+  step_time_summary.set_maximum(sample_stats.max());
+  return step_time_summary;
+}
+
+GenericStepTimeBreakdown ComputeGenericStepTimeBreakdownInMs(
+    const InputPipelineAnalysisResult& analysis) {
+  Stat<double> unknown_time_ms;
+  Stat<double> infeed_ms;
+  Stat<double> outfeed_ms;
+  Stat<double> device_compute_ms;
+  Stat<double> device_to_device_ms;
+  Stat<double> host_compute_ms;
+  Stat<double> host_prepare_ms;
+  Stat<double> host_compile_ms;
+  GenericStepTimeBreakdown result;
+
+  for (const google::protobuf::Any& step_details : analysis.step_details()) {
+    PerGenericStepDetails details;
+    bool success = step_details.UnpackTo(&details);
+    if (!success) {
+      LOG(ERROR) << "Unable to unpack step_breakdown. Expected: generic"
+                 << std::endl;
+      return {};
+    }
+    unknown_time_ms.UpdateStat(details.unknown_time_ms());
+    infeed_ms.UpdateStat(details.infeed_ms());
+    outfeed_ms.UpdateStat(details.outfeed_ms());
+    device_compute_ms.UpdateStat(details.device_compute_ms());
+    device_to_device_ms.UpdateStat(details.device_to_device_ms());
+    host_compute_ms.UpdateStat(details.host_compute_ms());
+    host_prepare_ms.UpdateStat(details.host_prepare_ms());
+    host_compile_ms.UpdateStat(details.host_compile_ms());
+  }
+  *result.mutable_unknown_time_ms_summary() =
+      GetStepSummaryForSampleStats(unknown_time_ms);
+  *result.mutable_infeed_ms_summary() = GetStepSummaryForSampleStats(infeed_ms);
+  *result.mutable_outfeed_ms_summary() =
+      GetStepSummaryForSampleStats(outfeed_ms);
+  *result.mutable_device_compute_ms_summary() =
+      GetStepSummaryForSampleStats(device_compute_ms);
+  *result.mutable_device_to_device_ms_summary() =
+      GetStepSummaryForSampleStats(device_to_device_ms);
+  *result.mutable_host_compute_ms_summary() =
+      GetStepSummaryForSampleStats(host_compute_ms);
+  *result.mutable_host_prepare_ms_summary() =
+      GetStepSummaryForSampleStats(host_prepare_ms);
+  *result.mutable_host_compile_ms_summary() =
+      GetStepSummaryForSampleStats(host_compile_ms);
+  return result;
+}
+
+InputPipelineAnalysisResult ComputeGenericInputPipelineAnalysisResult(
+    const protobuf::RepeatedPtrField<PerCoreStepInfo>& grouped_by_step) {
+  InputPipelineAnalysisResult result;
+
+  // Computes the summary of step time in ms.
+  *result.mutable_step_time_summary() =
+      ComputeStepTimeSummaryInMs(grouped_by_step);
+
+  Stat<double> infeed_summary_stats_in_percent;
+  for (const auto& coreid_stepinfo_map : grouped_by_step) {
+    // Iterates over each step.
+    const auto* ptr =
+        gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(), 0);
+    if (ptr == nullptr) {
+      // For generic hardware, all step-info is put under core-0. If ptr
+      // is nullptr, it means there is no step at all.
+      continue;
+    }
+    const StepInfoResult& step_info = *ptr;
+    // Adds the details for a new step.
+    PerGenericStepDetails details;
+    details.set_step_number(step_info.step_num());
+    details.set_step_time_ms(PicosToMillis(step_info.duration_ps()));
+    GenericStepBreakdown generic;
+    bool success = step_info.step_breakdown().UnpackTo(&generic);
+    if (!success) {
+      LOG(ERROR) << "Unable to unpack step_breakdown. Expected: generic"
+                 << std::endl;
+      return {};
+    }
+    const auto& type_ps = generic.type_ps();
+    details.set_unknown_time_ms(GetTimeInMs(type_ps, UNKNOWN_TIME));
+    // To be consistent with TPU case, the infeed time includes the time that
+    // the host is reading files, preprocessing, and the time to transfer the
+    // data to the device.
+    details.set_infeed_ms(GetTimeInMs(type_ps, HOST_WAIT_INPUT) +
+                          GetTimeInMs(type_ps, HOST_TO_DEVICE) +
+                          GetTimeInMs(type_ps, DEVICE_WAIT_HOST));
+    details.set_outfeed_ms(GetTimeInMs(type_ps, DEVICE_TO_HOST));
+    details.set_device_compute_ms(GetTimeInMs(type_ps, DEVICE_COMPUTE));
+    details.set_device_to_device_ms(GetTimeInMs(type_ps, DEVICE_TO_DEVICE) +
+                                    GetTimeInMs(type_ps, DEVICE_WAIT_DEVICE));
+    details.set_host_compute_ms(GetTimeInMs(type_ps, HOST_COMPUTE));
+    details.set_host_prepare_ms(GetTimeInMs(type_ps, HOST_PREPARE));
+    details.set_host_compile_ms(GetTimeInMs(type_ps, HOST_COMPILE));
+
+    result.add_step_details()->PackFrom(details);
+
+    const double infeed_pct_of_step_time =
+        100.0 * SafeDivide(details.infeed_ms(), details.step_time_ms());
+    infeed_summary_stats_in_percent.UpdateStat(infeed_pct_of_step_time);
+  }
+
+  // Computes the summary of infeed time as percentage of step time.
+  *result.mutable_infeed_percent_summary() =
+      GetStepSummaryForSampleStats(infeed_summary_stats_in_percent);
+
+  // Computes the breakdown of step time.
+  GenericStepTimeBreakdown generic_step_time_breakdown =
+      ComputeGenericStepTimeBreakdownInMs(result);
+  result.mutable_step_time_breakdown()->PackFrom(generic_step_time_breakdown);
+
+  return result;
+}
+
+// Classification of input processing on the host.
+enum class InputOpCategory {
+  kEnqueue,           // enqueue data to be transferred to device.
+  kDemandedFileRead,  // demanded read from file.
+  kAdvancedFileRead,  // advanced read from file (including cached,
+                      // prefetch, parallel-map, interleave).
+  kPreprocessing      // data preprocessing.
+};
+
+string InputOpCategoryString(InputOpCategory category) {
+  switch (category) {
+    case InputOpCategory::kEnqueue:
+      return "Enqueue";
+    case InputOpCategory::kDemandedFileRead:
+      return "Demanded file read";
+    case InputOpCategory::kAdvancedFileRead:
+      return "Advanced file read";
+    case InputOpCategory::kPreprocessing:
+      return "Preprocessing";
+  }
+}
+
+inline bool IsInputOp(absl::string_view category) {
+  return IsInfeedEnqueueOp(category) || IsDatasetOp(category);
+}
+
+InputOpCategory CategorizeInputOp(absl::string_view name,
+                                  absl::string_view category) {
+  if (IsInfeedEnqueueOp(category)) {
+    return InputOpCategory::kEnqueue;
+  }
+  DCHECK(IsDatasetOp(category));
+  if (absl::EndsWith(name, "::TFRecord") ||
+      absl::EndsWith(name, "::TextLine") ||
+      absl::EndsWith(name, "::FixedLengthRecord") ||
+      absl::EndsWith(name, "::SSTable") || absl::EndsWith(name, "::RecordIO")) {
+    if (absl::StrContains(name, "::MemoryReader") ||
+        absl::StrContains(name, "::MemoryWriter") ||
+        absl::StrContains(name, "::Interleave") ||
+        absl::StrContains(name, "::Prefetch") ||
+        absl::StrContains(name, "::ParallelMap")) {
+      return InputOpCategory::kAdvancedFileRead;
+    } else {
+      return InputOpCategory::kDemandedFileRead;
+    }
+  } else {
+    return InputOpCategory::kPreprocessing;
+  }
+}
+
+struct InputOpMetrics {
+  std::vector<const OpMetrics*> input_op_metrics;
+  uint64 input_op_time_ps = 0;
+};
+
+InputOpMetrics SelectInputOpMetrics(const OpMetricsDb& all_op_metrics) {
+  InputOpMetrics input_op_metrics;
+  for (const OpMetrics* op_metrics : SortedOpMetricsDb(all_op_metrics)) {
+    if (IsInputOp(op_metrics->category())) {
+      input_op_metrics.input_op_metrics.push_back(op_metrics);
+      input_op_metrics.input_op_time_ps += op_metrics->self_time_ps();
+    }
+  }
+  return input_op_metrics;
+}
+
+InputOpDetails ConvertOpMetricsToInputOpDetails(const OpMetrics& op_metrics,
+                                                uint64 input_op_time_ps,
+                                                InputOpCategory category) {
+  InputOpDetails details;
+  details.set_op_name(op_metrics.name());
+  details.set_count(op_metrics.occurrences());
+  details.set_time_in_ms(PicosToMillis(op_metrics.time_ps()));
+  details.set_self_time_in_ms(PicosToMillis(op_metrics.self_time_ps()));
+  details.set_time_in_percent(
+      100.0 * SafeDivide(op_metrics.time_ps(), input_op_time_ps));
+  details.set_self_time_in_percent(
+      100.0 * SafeDivide(op_metrics.self_time_ps(), input_op_time_ps));
+  details.set_category(InputOpCategoryString(category));
+  return details;
+}
+
+void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db,
+                        InputPipelineAnalysisResult* result) {
+  InputOpMetrics input_op_metrics = SelectInputOpMetrics(host_tf_metrics_db);
+  // Return if the program is not using an input pipeline with xprof
+  // instrumentation and no input ops are found.
+  if (input_op_metrics.input_op_metrics.empty()) return;
+
+  absl::flat_hash_map<InputOpCategory, double> aggregated_input_op_times_us;
+  for (const OpMetrics* op_metrics : input_op_metrics.input_op_metrics) {
+    InputOpCategory category =
+        CategorizeInputOp(op_metrics->name(), op_metrics->category());
+    *result->add_input_op_details() = ConvertOpMetricsToInputOpDetails(
+        *op_metrics, input_op_metrics.input_op_time_ps, category);
+    aggregated_input_op_times_us[category] +=
+        PicosToMicros(op_metrics->self_time_ps());
+  }
+
+  double enqueue_time_us =
+      aggregated_input_op_times_us[InputOpCategory::kEnqueue];
+  double total_input_op_time_us =
+      aggregated_input_op_times_us[InputOpCategory::kDemandedFileRead] +
+      aggregated_input_op_times_us[InputOpCategory::kAdvancedFileRead] +
+      aggregated_input_op_times_us[InputOpCategory::kPreprocessing];
+
+  // We use total_host_infeed_enq_start_timestamp_ps_diff_ to approximate the
+  // total host step time.
+  double ratio = SafeDivide(
+      host_tf_metrics_db.total_host_infeed_enq_duration_ps(),
+      host_tf_metrics_db.total_host_infeed_enq_start_timestamp_ps_diff());
+  DCHECK_LE(ratio, 1.0);
+  DCHECK_GE(ratio, 0.0);
+  double non_enqueue_time_us = (ratio != 0.0)
+                                   ? (enqueue_time_us * (1.0 - ratio) / ratio)
+                                   : total_input_op_time_us;
+
+  // Scales the various input-time components wrt to non_enqueue_time_us.
+  double scaled_demanded_fileread_time_us = SafeDivide(
+      non_enqueue_time_us *
+          aggregated_input_op_times_us[InputOpCategory::kDemandedFileRead],
+      total_input_op_time_us);
+  double scaled_advanced_fileread_time_us = SafeDivide(
+      non_enqueue_time_us *
+          aggregated_input_op_times_us[InputOpCategory::kAdvancedFileRead],
+      total_input_op_time_us);
+  double scaled_preprocessing_time_us = SafeDivide(
+      non_enqueue_time_us *
+          aggregated_input_op_times_us[InputOpCategory::kPreprocessing],
+      total_input_op_time_us);
+  double unclassified_non_enqueue_time_us = std::max(
+      0.0, non_enqueue_time_us - scaled_demanded_fileread_time_us -
+               scaled_advanced_fileread_time_us - scaled_preprocessing_time_us);
+
+  InputTimeBreakdown* input_time_breakdown =
+      result->mutable_input_time_breakdown();
+  input_time_breakdown->set_enqueue_us(enqueue_time_us);
+  input_time_breakdown->set_demanded_file_read_us(
+      scaled_demanded_fileread_time_us);
+  input_time_breakdown->set_advanced_file_read_us(
+      scaled_advanced_fileread_time_us);
+  input_time_breakdown->set_preprocessing_us(scaled_preprocessing_time_us);
+  input_time_breakdown->set_unclassified_non_enqueue_us(
+      unclassified_non_enqueue_time_us);
+}
+
+string AnchorElement(absl::string_view url, absl::string_view text) {
+  return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
+}
+
+InputPipelineAnalysisRecommendation GenerateRecommendation() {
+  const absl::string_view kDatasetIntro =
+      "https://www.tensorflow.org/programmers_guide/datasets";
+
+  const absl::string_view kDatasetTopic =
+      "https://www.tensorflow.org/api_docs/python/tf/data/Dataset#";
+
+  const absl::string_view kTfRecordDataset =
+      "https://www.tensorflow.org/api_docs/python/tf/data/"
+      "TFRecordDataset#class_tfrecorddataset";
+
+  InputPipelineAnalysisRecommendation recommendation;
+  *recommendation.add_details() =
+      "Enqueuing data: you may want to combine small input data chunks "
+      "into fewer "
+      "but larger chunks.";
+  *recommendation.add_details() = absl::StrCat(
+      "Data preprocessing: you may increase num_parallel_calls in ",
+      AnchorElement(absl::StrCat(kDatasetTopic, "map"), "Dataset map()"),
+      " or preprocess the data OFFLINE.");
+  *recommendation.add_details() = absl::StrCat(
+      "Reading data from files in advance: you may tune parameters in the "
+      "following Dataset API (",
+      AnchorElement(absl::StrCat(kDatasetTopic, "prefetch"), "prefetch size"),
+      ", ",
+      AnchorElement(absl::StrCat(kDatasetTopic, "interleave"),
+                    "interleave cycle_length"),
+      ", ", AnchorElement(kTfRecordDataset, "reader buffer_size"), ")");
+  *recommendation.add_details() = absl::StrCat(
+      "Reading data from files on demand: you should read data IN ADVANCE "
+      "using the following Dataset API (",
+      AnchorElement(absl::StrCat(kDatasetTopic, "prefetch"), "prefetch"), ", ",
+      AnchorElement(absl::StrCat(kDatasetTopic, "interleave"), "interleave"),
+      ", ", AnchorElement(kTfRecordDataset, "reader buffer"), ")");
+  *recommendation.add_details() = absl::StrCat(
+      "Other data reading or processing: you may consider using the ",
+      AnchorElement(kDatasetIntro, "Dataset API"),
+      " (if you are not using it now)");
+  return recommendation;
+}
+
+}  // namespace
+
+StepSummary ComputeStepTimeSummaryInMs(
+    const protobuf::RepeatedPtrField<PerCoreStepInfo>& grouped_by_step) {
+  Stat<double> total_step_stats_in_ms;
+  // iterates over each step.
+  for (const auto& coreid_stepinfo_map : grouped_by_step) {
+    double max_per_step_stats_in_ms = 0.0;
+    // iterates over each core.
+    for (const auto& coreid_and_stepinfo :
+         coreid_stepinfo_map.step_info_per_core()) {
+      const auto& step_info = coreid_and_stepinfo.second;
+      max_per_step_stats_in_ms = std::max(step_info.duration_ps() / kNumPsPerMs,
+                                          max_per_step_stats_in_ms);
+    }
+    // Step time of each step is determined by the slowest core.
+    total_step_stats_in_ms.UpdateStat(max_per_step_stats_in_ms);
+  }
+
+  return GetStepSummaryForSampleStats(total_step_stats_in_ms);
+}
+
+InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
+    const OpStats& op_stats, const HardwareType& hardware_type) {
+  InputPipelineAnalysisResult result =
+      ComputeGenericInputPipelineAnalysisResult(
+          op_stats.step_db().step_sequence());
+  result.set_hardware_type(hardware_type);
+  GenerateHostResult(op_stats.host_op_metrics_db(), &result);
+  *result.mutable_recommendation() = GenerateRecommendation();
+  return result;
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h
new file mode 100644
index 00000000000..2bbe16e7831
--- /dev/null
+++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h
@@ -0,0 +1,39 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_INPUT_PIPELINE_ANALYSIS_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_INPUT_PIPELINE_ANALYSIS_H_
+
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
+#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
+
+namespace tensorflow {
+namespace profiler {
+
+InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
+    const OpStats& op_stats, const HardwareType& hardware_type);
+
+// Computes the summary of step time in milliseconds.
+StepSummary ComputeStepTimeSummaryInMs(
+    const ::tensorflow::protobuf::RepeatedPtrField<PerCoreStepInfo>&
+        grouped_by_step);
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_INPUT_PIPELINE_ANALYSIS_H_
diff --git a/tensorflow/core/profiler/convert/run_metadata_to_trace_events.cc b/tensorflow/core/profiler/convert/run_metadata_to_trace_events.cc
index 6d2705c3c2b..caad3064986 100644
--- a/tensorflow/core/profiler/convert/run_metadata_to_trace_events.cc
+++ b/tensorflow/core/profiler/convert/run_metadata_to_trace_events.cc
@@ -21,7 +21,7 @@ limitations under the License.
 
 #include "absl/strings/str_split.h"
 #include "absl/strings/string_view.h"
-#include "tensorflow/core/common_runtime/step_stats_collector.h"
+#include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/platform/env_time.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
new file mode 100644
index 00000000000..be061efc389
--- /dev/null
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
@@ -0,0 +1,35 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
+
+#include "tensorflow/core/profiler/convert/host_threads_xplane_to_tf_metrics_db.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
+
+namespace tensorflow {
+namespace profiler {
+
+OpStats ConvertXSpaceToOpStats(const XSpace& space) {
+  OpStats op_stats;
+  if (const XPlane* host_trace = FindPlaneWithName(space, kHostThreads)) {
+    *op_stats.mutable_host_op_metrics_db() =
+        ConvertHostThreadsXPlaneToTfMetricsDb(*host_trace);
+  }
+  return op_stats;
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/xla/python/tpu_driver/c_api.h b/tensorflow/core/profiler/convert/xplane_to_op_stats.h
similarity index 59%
rename from tensorflow/compiler/xla/python/tpu_driver/c_api.h
rename to tensorflow/core/profiler/convert/xplane_to_op_stats.h
index 5b892dfdaa3..edee4bac20a 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/c_api.h
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.h
@@ -13,18 +13,18 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_C_API_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_C_API_H_
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_OP_STATS_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_OP_STATS_H_
 
-#define TPUDRIVER_CAPI_EXPORT __attribute__((visibility("default")))
+#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 
-extern "C" {
+namespace tensorflow {
+namespace profiler {
 
-TPUDRIVER_CAPI_EXPORT extern void TpuDriver_Initialize();
+OpStats ConvertXSpaceToOpStats(const XSpace& space);
 
-TPUDRIVER_CAPI_EXPORT extern void TpuDriver_Open(const char* worker);
+}  // namespace profiler
+}  // namespace tensorflow
 
-TPUDRIVER_CAPI_EXPORT extern const char* TpuDriver_Version(void);
-}
-
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_C_API_H_
+#endif  // TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_OP_STATS_H_
diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index 3e9f80807cf..304e5253072 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD
@@ -523,3 +523,14 @@ tf_cc_test(
         "//tensorflow/core:testlib",
     ],
 )
+
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "profiler_interface.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
diff --git a/tensorflow/core/profiler/internal/tfprof_show.cc b/tensorflow/core/profiler/internal/tfprof_show.cc
index e7a5b03a558..5d57c1bba41 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show.cc
@@ -170,7 +170,6 @@ string TFShow::FormatNode(ShowNode* node, const Options& opts) const {
     }
     info.push_back(fops);
   }
-  std::vector<string> attrs;
   if (opts.select.find(kShown[0]) != opts.select.end()) {
     info.push_back(FormatNodeMemory(node, node->proto().requested_bytes(),
                                     node->proto().total_requested_bytes()));
diff --git a/tensorflow/core/profiler/internal/traceme_recorder.cc b/tensorflow/core/profiler/internal/traceme_recorder.cc
index 3257a347d66..21852f394d5 100644
--- a/tensorflow/core/profiler/internal/traceme_recorder.cc
+++ b/tensorflow/core/profiler/internal/traceme_recorder.cc
@@ -92,6 +92,9 @@ class EventQueue {
   // Retrieve and remove all events in the queue at the time of invocation.
   // If Push is called while PopAll is active, the new event will not be
   // removed from the queue.
+  // PopAll is only called from ThreadLocalRecorder::Clear, which in turn is
+  // only called while holding TraceMeRecorder::Mutex, so PopAll has a single
+  // caller at a time.
   std::vector<TraceMeRecorder::Event> PopAll() {
     // Read index before contents.
     size_t end = end_.load(std::memory_order_acquire);
@@ -172,7 +175,10 @@ class TraceMeRecorder::ThreadLocalRecorder {
   }
 
   // The destructor is called when the thread shuts down early.
-  ~ThreadLocalRecorder() { TraceMeRecorder::Get()->UnregisterThread(Clear()); }
+  ~ThreadLocalRecorder() {
+    // Unregister the thread. Clear() will be called from TraceMeRecorder.
+    TraceMeRecorder::Get()->UnregisterThread(info_.tid);
+  }
 
   // Record is only called from the owner thread.
   void Record(TraceMeRecorder::Event&& event) { queue_.Push(std::move(event)); }
@@ -196,11 +202,15 @@ void TraceMeRecorder::RegisterThread(int32 tid, ThreadLocalRecorder* thread) {
   threads_.emplace(tid, thread);
 }
 
-void TraceMeRecorder::UnregisterThread(TraceMeRecorder::ThreadEvents&& events) {
+void TraceMeRecorder::UnregisterThread(int32 tid) {
   mutex_lock lock(mutex_);
-  threads_.erase(events.thread.tid);
-  if (!events.events.empty()) {
-    orphaned_events_.push_back(std::move(events));
+  auto it = threads_.find(tid);
+  if (it != threads_.end()) {
+    auto events = it->second->Clear();
+    if (!events.events.empty()) {
+      orphaned_events_.push_back(std::move(events));
+    }
+    threads_.erase(it);
   }
 }
 
diff --git a/tensorflow/core/profiler/internal/traceme_recorder.h b/tensorflow/core/profiler/internal/traceme_recorder.h
index a5de271c709..8d164349a19 100644
--- a/tensorflow/core/profiler/internal/traceme_recorder.h
+++ b/tensorflow/core/profiler/internal/traceme_recorder.h
@@ -102,7 +102,7 @@ class TraceMeRecorder {
   TF_DISALLOW_COPY_AND_ASSIGN(TraceMeRecorder);
 
   void RegisterThread(int32 tid, ThreadLocalRecorder* thread);
-  void UnregisterThread(ThreadEvents&& events);
+  void UnregisterThread(int32 tid);
 
   bool StartRecording(int level);
   Events StopRecording();
diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD
index e64a8e1fcc6..2cda295fc2f 100644
--- a/tensorflow/core/profiler/lib/BUILD
+++ b/tensorflow/core/profiler/lib/BUILD
@@ -15,18 +15,19 @@ cc_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":profiler_utils",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/platform",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/internal:profiler_factory",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/util:ptr_util",
     ] + select({
         "//tensorflow:android": [],
         "//conditions:default": [
             "//tensorflow/core/profiler/convert:run_metadata_to_trace_events",
-            "//tensorflow/core/platform",
-            "//tensorflow/core:lib",
-            "//tensorflow/core:lib_internal",
-            "//tensorflow/core:framework",
-            "//tensorflow/core:protos_all_cc",
         ],
     }),
 )
@@ -43,6 +44,17 @@ tf_cuda_library(
     alwayslink = True,
 )
 
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "profiler_session.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
+
 cc_library(
     name = "traceme",
     hdrs = ["traceme.h"],
diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc
index 3882a63432e..ff2e5befbd1 100644
--- a/tensorflow/core/profiler/lib/profiler_session.cc
+++ b/tensorflow/core/profiler/lib/profiler_session.cc
@@ -20,17 +20,18 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/platform.h"
 #include "tensorflow/core/platform/types.h"
-#if !defined(IS_MOBILE_PLATFORM)
-#include "tensorflow/core/profiler/convert/run_metadata_to_trace_events.h"
-#include "tensorflow/core/profiler/internal/profiler_factory.h"
-#include "tensorflow/core/profiler/lib/profiler_utils.h"
-#endif
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/protobuf/error_codes.pb.h"
 #include "tensorflow/core/protobuf/trace_events.pb.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/ptr_util.h"
 
+#if !defined(IS_MOBILE_PLATFORM)
+#include "tensorflow/core/profiler/convert/run_metadata_to_trace_events.h"
+#include "tensorflow/core/profiler/internal/profiler_factory.h"
+#include "tensorflow/core/profiler/lib/profiler_utils.h"
+#endif
+
 namespace tensorflow {
 
 /*static*/ std::unique_ptr<ProfilerSession> ProfilerSession::Create(
diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD
index c9275d92544..a42c70bf3c3 100644
--- a/tensorflow/core/profiler/protobuf/BUILD
+++ b/tensorflow/core/profiler/protobuf/BUILD
@@ -26,6 +26,16 @@ exports_files(
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
+tf_proto_library(
+    name = "input_pipeline_proto",
+    srcs = ["input_pipeline.proto"],
+    cc_api_version = 2,
+    protodeps = [":hardware_types_proto"],
+    visibility = [
+        ":friends",
+    ],
+)
+
 tf_proto_library(
     name = "op_metrics_proto",
     srcs = ["op_metrics.proto"],
diff --git a/tensorflow/core/profiler/protobuf/input_pipeline.proto b/tensorflow/core/profiler/protobuf/input_pipeline.proto
new file mode 100644
index 00000000000..7b14e4ad233
--- /dev/null
+++ b/tensorflow/core/profiler/protobuf/input_pipeline.proto
@@ -0,0 +1,118 @@
+syntax = "proto3";
+
+package tensorflow.profiler;
+
+import "google/protobuf/any.proto";
+import "tensorflow/core/profiler/protobuf/hardware_types.proto";
+
+// Used for both step duration and Op duration.
+message StepSummary {
+  double average = 1;
+  double standard_deviation = 2;
+  double minimum = 3;
+  double maximum = 4;
+}
+
+// Per-step details on generic hardware.
+message PerGenericStepDetails {
+  // The step number of a step.
+  int32 step_number = 1;
+  // The step time (in ms).
+  double step_time_ms = 2;
+  // Breakdown of the step time in different event categories.
+  // The unknown time (in ms).
+  double unknown_time_ms = 3;
+  // The infeed time (in ms).
+  double infeed_ms = 4;
+  // The outfeed time (in ms).
+  double outfeed_ms = 5;
+  // The device-compute time (in ms).
+  double device_compute_ms = 6;
+  // The device-to-device communication time (in ms).
+  double device_to_device_ms = 7;
+  // The host-compute time (in ms).
+  double host_compute_ms = 8;
+  // The host-prepare time (in ms).
+  double host_prepare_ms = 9;
+  // The time spent on compiling (in ms).
+  double host_compile_ms = 10;
+}
+
+message InputTimeBreakdown {
+  // Time spent on demanded file read in microseconds.
+  double demanded_file_read_us = 1;
+  // Time spent on advanced file read in microseconds.
+  double advanced_file_read_us = 2;
+  // Time spent on data preprocessing in microseconds.
+  double preprocessing_us = 3;
+  // The infeed enqueue time in microseconds.
+  double enqueue_us = 4;
+  // This entry is for the situtation where we can't further
+  // break down the non-enqueue input time (because the input pipeline
+  // is not instrumented).
+  double unclassified_non_enqueue_us = 5;
+}
+
+message InputOpDetails {
+  // The Op's name.
+  string op_name = 1;
+  // The number of occurrences.
+  uint64 count = 2;
+  // Time (accumulated over all occurrences) in milliseconds.
+  double time_in_ms = 3;
+  // Time (accumulated over all occurrences) in
+  // percentage of the total input processing time.
+  double time_in_percent = 4;
+  // Self time (accumulated over all occurrences) in milliseconds.
+  double self_time_in_ms = 5;
+  // Self time (accumulated over all occurrences) in
+  // percentage of the total input processing time.
+  double self_time_in_percent = 6;
+  // Possible categories: "Enqueue", "Advanced file read",
+  // "Demanded file read", "Preprocessing", "Unknown".
+  string category = 7;
+}
+
+message InputPipelineAnalysisRecommendation {
+  // A list of detailed recommendations.
+  repeated string details = 1;
+}
+
+message GenericStepTimeBreakdown {
+  // Summary of all unknown time as a part of step in ms.
+  StepSummary unknown_time_ms_summary = 1;
+  // Summary of all infeed time as a part of step in ms.
+  StepSummary infeed_ms_summary = 2;
+  // Summary of all outfeed time as a part of step in ms.
+  StepSummary outfeed_ms_summary = 3;
+  // Summary of all device-compute time as a part of step in ms.
+  StepSummary device_compute_ms_summary = 4;
+  // Summary of all device-to-device time as a part of step in ms.
+  StepSummary device_to_device_ms_summary = 5;
+  // Summary of all host-compute time as a part of step in ms.
+  StepSummary host_compute_ms_summary = 6;
+  // Summary of all host-prepare time as a part of step in ms.
+  StepSummary host_prepare_ms_summary = 7;
+  // Summary of all compilation time as a part of step in ms.
+  StepSummary host_compile_ms_summary = 8;
+}
+
+message InputPipelineAnalysisResult {
+  // Hardware type.
+  HardwareType hardware_type = 1;
+  // Summary of all step duration across all cores.
+  StepSummary step_time_summary = 2;
+  // Summary of all infeed dequeue op duration as percentage of step duration.
+  StepSummary infeed_percent_summary = 3;
+  // Details of each step. Can be unpacked into a PerGenericStepDetails.
+  repeated google.protobuf.Any step_details = 4;
+  // The breakdown of the input processing time.
+  InputTimeBreakdown input_time_breakdown = 5;
+  // Details of each input Op executed.
+  repeated InputOpDetails input_op_details = 6;
+  // Recommendation for next steps to users.
+  InputPipelineAnalysisRecommendation recommendation = 7;
+  // Breakdown of the step time. Can be unpacked into a
+  // GenericStepTimeBreakdown.
+  google.protobuf.Any step_time_breakdown = 8;
+}
diff --git a/tensorflow/core/profiler/protobuf/op_metrics.proto b/tensorflow/core/profiler/protobuf/op_metrics.proto
index 56f151bd0e3..e30057d74a5 100644
--- a/tensorflow/core/profiler/protobuf/op_metrics.proto
+++ b/tensorflow/core/profiler/protobuf/op_metrics.proto
@@ -26,7 +26,7 @@ message LayoutAnalysis {
 }
 
 // Metrics for an operation (accumulated over all occurrences).
-// Next ID: 17
+// Next ID: 18
 message OpMetrics {
   // HLO module id. 0 for TF ops.
   uint64 hlo_module_id = 13;
@@ -40,6 +40,8 @@ message OpMetrics {
   uint32 occurrences = 3;
   // Total time (self + children) in picoseconds.
   uint64 time_ps = 7;
+  // Minimum time (self + children) among all occurrences.
+  uint64 min_time_ps = 17;
   // Total self time in picoseconds.
   uint64 self_time_ps = 1;
   // Total FLOPs.
diff --git a/tensorflow/core/profiler/rpc/BUILD b/tensorflow/core/profiler/rpc/BUILD
index f22cd23333e..467f21d9e7f 100644
--- a/tensorflow/core/profiler/rpc/BUILD
+++ b/tensorflow/core/profiler/rpc/BUILD
@@ -12,10 +12,15 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/profiler:profiler_service_proto_cc",
-        "//tensorflow/core/profiler/lib:profiler_lib",
+        "//tensorflow/core/profiler/convert:op_stats_to_tf_stats",
+        "//tensorflow/core/profiler/convert:xplane_to_op_stats",
         "//tensorflow/core/profiler/lib:profiler_session",
+        "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/strings",
     ],
-    alwayslink = True,
 )
 
 cc_library(
@@ -29,8 +34,6 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/profiler:profiler_service_proto_cc",
-        "//tensorflow/core/profiler/lib:profiler_lib",
-        "//tensorflow/core/profiler/lib:profiler_session",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/profiler/rpc/profiler_server.cc b/tensorflow/core/profiler/rpc/profiler_server.cc
index d1027308b44..faa83b9099f 100644
--- a/tensorflow/core/profiler/rpc/profiler_server.cc
+++ b/tensorflow/core/profiler/rpc/profiler_server.cc
@@ -21,7 +21,6 @@ limitations under the License.
 #include "grpcpp/grpcpp.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/profiler/lib/profiler_session.h"
 #include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
 #include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
 #include "tensorflow/core/util/ptr_util.h"
diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.cc b/tensorflow/core/profiler/rpc/profiler_service_impl.cc
index 3cadf837dfb..701b8849e44 100644
--- a/tensorflow/core/profiler/rpc/profiler_service_impl.cc
+++ b/tensorflow/core/profiler/rpc/profiler_service_impl.cc
@@ -16,13 +16,54 @@ limitations under the License.
 #include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
 
 #include "grpcpp/support/status.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
+#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
 #include "tensorflow/core/profiler/lib/profiler_session.h"
+#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace {
 
+const absl::string_view kTensorflowStats = "tensorflow_stats";
+
+template <typename Proto>
+void AddToolData(absl::string_view tool_name, const Proto& tool_output,
+                 ProfileResponse* response) {
+  auto* tool_data = response->add_tool_data();
+  tool_data->set_name(string(tool_name));
+  tool_output.SerializeToString(tool_data->mutable_data());
+}
+
+Status CollectDataToResponse(const ProfileRequest& req,
+                             ProfilerSession* profiler,
+                             ProfileResponse* response) {
+  // For now, only support a single tool at a time.
+  absl::flat_hash_set<absl::string_view> tools(req.tools().begin(),
+                                               req.tools().end());
+  if (tools.size() == 1 && tools.contains(kTensorflowStats)) {
+    profiler::XSpace space;
+    TF_RETURN_IF_ERROR(profiler->CollectData(&space));
+    profiler::OpStats op_stats = profiler::ConvertXSpaceToOpStats(space);
+    profiler::TfStatsDatabase tf_stats_db =
+        profiler::ConvertOpStatsToTfStats(op_stats);
+    AddToolData(kTensorflowStats, tf_stats_db, response);
+  } else {  // By default, return "trace_viewer" data.
+    TF_RETURN_IF_ERROR(
+        profiler->SerializeToString(response->mutable_encoded_trace()));
+  }
+  return Status::OK();
+}
+
 class ProfilerServiceImpl : public grpc::ProfilerService::Service {
  public:
   ::grpc::Status Monitor(::grpc::ServerContext* ctx, const MonitorRequest* req,
@@ -32,29 +73,32 @@ class ProfilerServiceImpl : public grpc::ProfilerService::Service {
 
   ::grpc::Status Profile(::grpc::ServerContext* ctx, const ProfileRequest* req,
                          ProfileResponse* response) override {
-    LOG(INFO) << "Received a profile request.";
+    LOG(INFO) << "Received a profile request: " << req->DebugString();
     std::unique_ptr<ProfilerSession> profiler = ProfilerSession::Create();
-    if (!profiler->Status().ok()) {
+    Status status = profiler->Status();
+    if (!status.ok()) {
       return ::grpc::Status(::grpc::StatusCode::INTERNAL,
-                            profiler->Status().error_message());
+                            status.error_message());
     }
 
     Env* env = Env::Default();
     for (size_t i = 0; i < req->duration_ms(); ++i) {
-      env->SleepForMicroseconds(1000);
+      env->SleepForMicroseconds(EnvTime::kMillisToMicros);
       if (ctx->IsCancelled()) {
         return ::grpc::Status::CANCELLED;
       }
     }
 
-    Status s = profiler->SerializeToString(response->mutable_encoded_trace());
-    if (!s.ok()) {
-      return ::grpc::Status(::grpc::StatusCode::INTERNAL, s.error_message());
+    status = CollectDataToResponse(*req, profiler.get(), response);
+    if (!status.ok()) {
+      return ::grpc::Status(::grpc::StatusCode::INTERNAL,
+                            status.error_message());
     }
 
     return ::grpc::Status::OK;
   }
 };
+
 }  // namespace
 
 std::unique_ptr<grpc::ProfilerService::Service> CreateProfilerService() {
diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD
index 92946d327dd..6475b0da290 100644
--- a/tensorflow/core/profiler/utils/BUILD
+++ b/tensorflow/core/profiler/utils/BUILD
@@ -129,6 +129,17 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "xplane_utils",
+    srcs = ["xplane_utils.cc"],
+    hdrs = ["xplane_utils.h"],
+    visibility = [":friends"],
+    deps = [
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 cc_library(
     name = "xplane_visitor",
     hdrs = ["xplane_visitor.h"],
@@ -142,3 +153,32 @@ cc_library(
         "@com_google_absl//absl/strings",
     ],
 )
+
+cc_library(
+    name = "metadata_matcher",
+    srcs = ["metadata_matcher.cc"],
+    hdrs = ["metadata_matcher.h"],
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_test(
+    name = "metadata_matcher_test",
+    size = "small",
+    srcs = ["metadata_matcher_test.cc"],
+    deps = [
+        ":metadata_matcher",
+        ":xplane_schema",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
diff --git a/tensorflow/core/profiler/utils/event_span.cc b/tensorflow/core/profiler/utils/event_span.cc
index 8c31c55da8c..e6e8fd21406 100644
--- a/tensorflow/core/profiler/utils/event_span.cc
+++ b/tensorflow/core/profiler/utils/event_span.cc
@@ -116,17 +116,17 @@ EventType ClassifyGpuEvent(absl::string_view event_name) {
 }
 
 EventType ClassifyCpuEvent(absl::string_view event_name, int64 correlation_id) {
-  if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoD"))
+  if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoD") ||
+      absl::StrContains(event_name, "Infeed"))
     return HOST_TO_DEVICE;
   if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoH")) return HOST_TO_HOST;
   if (correlation_id >= 0 ||
       absl::StartsWithIgnoreCase(event_name, "ExecutorState::Process")) {
     return HOST_PREPARE;
-  } else {
-    if (absl::StartsWithIgnoreCase(event_name, "IteratorGetNext"))
-      return HOST_WAIT_INPUT;
-    return HOST_COMPUTE;
   }
+  if (absl::StartsWithIgnoreCase(event_name, "IteratorGetNext"))
+    return HOST_WAIT_INPUT;
+  return HOST_COMPUTE;
 }
 
 std::string PrintEventType(EventType event_type) {
diff --git a/tensorflow/core/profiler/utils/metadata_matcher.cc b/tensorflow/core/profiler/utils/metadata_matcher.cc
new file mode 100644
index 00000000000..7abdd77941a
--- /dev/null
+++ b/tensorflow/core/profiler/utils/metadata_matcher.cc
@@ -0,0 +1,145 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/utils/metadata_matcher.h"
+
+#include "absl/strings/string_view.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+using ::tensorflow::profiler::XEvent;
+using ::tensorflow::profiler::XPlane;
+using ::tensorflow::profiler::XStat;
+
+absl::flat_hash_map<int64, int> CreateEventMetadataMap(
+    const XPlane& xplane,
+    const std::vector<std::pair<const absl::Span<const absl::string_view>,
+                                /*first_event_type*/ int>>&
+        event_type_metadata_maps) {
+  absl::flat_hash_map<int64, int> id_to_event_type_map;
+  for (const auto& id_and_event_metadata : xplane.event_metadata()) {
+    int64 id = id_and_event_metadata.first;
+    absl::string_view event_name = id_and_event_metadata.second.name();
+    for (const auto& event_type_metadata_map_and_first_event_type :
+         event_type_metadata_maps) {
+      auto event_type_metadata_map =
+          event_type_metadata_map_and_first_event_type.first;
+      int first_event_type =
+          event_type_metadata_map_and_first_event_type.second;
+      for (int i = 0; i < event_type_metadata_map.size(); ++i) {
+        if (event_type_metadata_map[i] == event_name) {
+          id_to_event_type_map[id] = first_event_type + i;
+          break;
+        }
+      }
+    }
+  }
+  return id_to_event_type_map;
+}
+
+absl::flat_hash_map<int64, int> CreateStatMetadataMap(
+    const XPlane& xplane,
+    const absl::Span<const absl::string_view> stat_type_str_map) {
+  absl::flat_hash_map<int64, int> id_to_stat_type_map;
+  for (const auto& id_and_stat_metadata : xplane.stat_metadata()) {
+    int64 id = id_and_stat_metadata.first;
+    absl::string_view stat_name = id_and_stat_metadata.second.name();
+    for (int stat_type = 0; stat_type < stat_type_str_map.size(); ++stat_type) {
+      if (stat_type_str_map[stat_type] == stat_name) {
+        id_to_stat_type_map[id] = stat_type;
+        break;
+      }
+    }
+  }
+  return id_to_stat_type_map;
+}
+
+}  // namespace
+
+MetadataMatcher::MetadataMatcher(
+    const XPlane& xplane,
+    const std::vector<std::pair<const absl::Span<const absl::string_view>,
+                                /*first_event_type*/ int>>&
+        event_type_metadata_maps,
+    const absl::Span<const absl::string_view> stat_type_str_map)
+    : id_to_event_type_map_(
+          CreateEventMetadataMap(xplane, event_type_metadata_maps)),
+      id_to_stat_type_map_(CreateStatMetadataMap(xplane, stat_type_str_map)),
+      event_type_to_id_map_(gtl::ReverseMap<decltype(event_type_to_id_map_)>(
+          id_to_event_type_map_)),
+      stat_type_to_id_map_(gtl::ReverseMap<decltype(stat_type_to_id_map_)>(
+          id_to_stat_type_map_)) {}
+
+const XStat* MetadataMatcher::GetStat(const XEvent& event,
+                                      int stat_type) const {
+  for (const auto& stat : event.stats()) {
+    if (GetStatType(stat) == stat_type) {
+      return &stat;
+    }
+  }
+  return nullptr;
+}
+
+absl::optional<std::tuple<const XStat*, const XStat*>>
+MetadataMatcher::GetStats(const XEvent& event, int first_stat_type,
+                          int second_stat_type) const {
+  const XStat* first_stat = nullptr;
+  const XStat* second_stat = nullptr;
+  for (const auto& stat : event.stats()) {
+    if (GetStatType(stat) == first_stat_type) {
+      first_stat = &stat;
+    } else if (GetStatType(stat) == second_stat_type) {
+      second_stat = &stat;
+    }
+  }
+  if (first_stat && second_stat) {
+    return std::make_tuple(first_stat, second_stat);
+  }
+  return absl::nullopt;
+}
+
+absl::optional<std::tuple<const XStat*, const XStat*, const XStat*>>
+MetadataMatcher::GetStats(const XEvent& event, int first_stat_type,
+                          int second_stat_type, int third_stat_type) const {
+  const XStat* first_stat = nullptr;
+  const XStat* second_stat = nullptr;
+  const XStat* third_stat = nullptr;
+  for (const auto& stat : event.stats()) {
+    if (GetStatType(stat) == first_stat_type) {
+      first_stat = &stat;
+    } else if (GetStatType(stat) == second_stat_type) {
+      second_stat = &stat;
+    } else if (GetStatType(stat) == third_stat_type) {
+      third_stat = &stat;
+    }
+  }
+  if (first_stat && second_stat && third_stat) {
+    return std::make_tuple(first_stat, second_stat, third_stat);
+  }
+  return absl::nullopt;
+}
+
+absl::optional<int64> MetadataMatcher::GetIntStatValue(const XEvent& event,
+                                                       int stat_type) const {
+  if (const XStat* stat = GetStat(event, stat_type)) {
+    return stat->int64_value();
+  }
+  return absl::nullopt;
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/utils/metadata_matcher.h b/tensorflow/core/profiler/utils/metadata_matcher.h
new file mode 100644
index 00000000000..beaba5ecd70
--- /dev/null
+++ b/tensorflow/core/profiler/utils/metadata_matcher.h
@@ -0,0 +1,108 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_UTILS_METADATA_MATCHER_H_
+#define TENSORFLOW_CORE_PROFILER_UTILS_METADATA_MATCHER_H_
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "absl/types/span.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+
+namespace tensorflow {
+namespace profiler {
+
+// Builds mapping between metadata ids and interesting event and stat types.
+// Event and stat types are represented in integer ids. Multiple spans of event
+// types can be passed with offset values (i.e., first_event_type) to be
+// used to calculate integer ids for event types. Spans and offset values are
+// expected to result in a unique integer id for each event type.
+class MetadataMatcher {
+ public:
+  explicit MetadataMatcher(
+      const XPlane& xplane,
+      const std::vector<std::pair<const absl::Span<const absl::string_view>,
+                                  /*first_event_type*/ int>>&
+          event_type_metadata_maps,
+      const absl::Span<const absl::string_view> stat_type_str_map);
+
+  // Returns EventType if input is one of interesting event types.
+  // Otherwise, it returns kUnknownEventType.
+  int GetEventType(const XEvent& xevent) const {
+    return gtl::FindWithDefault(id_to_event_type_map_, xevent.metadata_id(),
+                                /*kUnknownEventType*/ 0);
+  }
+
+  // Overload of GetEventType function.
+  // Returns EventType if input is one of interesting event types.
+  // Otherwise, it returns kUnknownEventType.
+  int GetEventType(int64 metadata_id) const {
+    return gtl::FindWithDefault(id_to_event_type_map_, metadata_id,
+                                /*kUnknownEventType*/ 0);
+  }
+
+  // Returns metadata id if xplane has the input event type.
+  absl::optional<int64> GetEventMetadataId(int event_type) const {
+    if (const int64* id = gtl::FindOrNull(event_type_to_id_map_, event_type)) {
+      return *id;
+    }
+    return absl::nullopt;
+  }
+
+  // Returns StatType if input is one of interesting stat types.
+  // Otherwise, it returns kUnknownStatType.
+  int GetStatType(const XStat& xstat) const {
+    return gtl::FindWithDefault(id_to_stat_type_map_, xstat.metadata_id(),
+                                /*kUnknownStatType*/ 0);
+  }
+
+  // Returns metadata id if xplane has the input stat type.
+  absl::optional<int64> GetStatMetadataId(int stat_type) const {
+    if (const int64* id = gtl::FindOrNull(stat_type_to_id_map_, stat_type)) {
+      return *id;
+    }
+    return absl::nullopt;
+  }
+
+  const XStat* GetStat(const XEvent& event, int stat_type) const;
+
+  absl::optional<std::tuple<const XStat*, const XStat*>> GetStats(
+      const XEvent& event, int first_stat_type, int second_stat_type) const;
+
+  absl::optional<std::tuple<const XStat*, const XStat*, const XStat*>> GetStats(
+      const XEvent& event, int first_stat_type, int second_stat_type,
+      int third_stat_type) const;
+
+  absl::optional<int64> GetIntStatValue(const XEvent& event,
+                                        int stat_type) const;
+
+ private:
+  // Maps from metada ids to interesting event and stat types.
+  // Uninteresting event and stat types are not cached in these maps and
+  // considered to be kUnknown*.
+  const absl::flat_hash_map<int64, int> id_to_event_type_map_;
+  const absl::flat_hash_map<int64, int> id_to_stat_type_map_;
+  // Reverse of the above.
+  const absl::flat_hash_map<int, int64> event_type_to_id_map_;
+  const absl::flat_hash_map<int, int64> stat_type_to_id_map_;
+};
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_UTILS_METADATA_MATCHER_H_
diff --git a/tensorflow/core/profiler/utils/metadata_matcher_test.cc b/tensorflow/core/profiler/utils/metadata_matcher_test.cc
new file mode 100644
index 00000000000..d430b44fc64
--- /dev/null
+++ b/tensorflow/core/profiler/utils/metadata_matcher_test.cc
@@ -0,0 +1,69 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/utils/metadata_matcher.h"
+
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+using ::tensorflow::profiler::XEventMetadata;
+using ::tensorflow::profiler::XPlane;
+using ::tensorflow::profiler::XStatMetadata;
+
+TEST(MetadataMatcherTest, GetHostEventTypeTest) {
+  for (int event_type = HostEventType::kFirstHostEventType;
+       event_type <= HostEventType::kLastHostEventType; ++event_type) {
+    XPlane xplane;
+    XEventMetadata& metadata = (*xplane.mutable_event_metadata())[0];
+    metadata.set_id(0);
+    metadata.set_name(std::string(
+        GetHostEventTypeStr(static_cast<HostEventType>(event_type))));
+    MetadataMatcher metadata_matcher(
+        xplane,
+        {{GetHostEventTypeStrMap(), HostEventType::kFirstHostEventType}},
+        GetStatTypeStrMap());
+    XEvent event;
+    event.set_metadata_id(0);
+    EXPECT_EQ(metadata_matcher.GetEventType(event), event_type);
+  }
+}
+
+TEST(MetadataMatcherTest, GetStatTypeTest) {
+  for (int stat_type = StatType::kFirstStatType;
+       stat_type <= StatType::kLastStatType; ++stat_type) {
+    XPlane xplane;
+    XStatMetadata& metadata = (*xplane.mutable_stat_metadata())[0];
+    metadata.set_id(0);
+    metadata.set_name(
+        std::string(GetStatTypeStr(static_cast<StatType>(stat_type))));
+    MetadataMatcher metadata_matcher(
+        xplane,
+        {{GetHostEventTypeStrMap(), HostEventType::kFirstHostEventType}},
+        GetStatTypeStrMap());
+    XStat stat;
+    stat.set_metadata_id(0);
+    EXPECT_EQ(metadata_matcher.GetStatType(stat), stat_type);
+  }
+}
+
+}  // namespace
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/utils/op_utils.h b/tensorflow/core/profiler/utils/op_utils.h
index 44bfa508b09..d420ecfcfb4 100644
--- a/tensorflow/core/profiler/utils/op_utils.h
+++ b/tensorflow/core/profiler/utils/op_utils.h
@@ -48,30 +48,6 @@ class HostOpMetricsDbBuilder : public OpMetricsDbBuilder {
                                uint64 start_timestamp_ps_diff);
 };
 
-// Type of a TensorFlow Op activity, which is either beginning or ending an Op.
-enum TfActivityType { kTfOpBegin, kTfOpEnd };
-
-// Instant activity representing the begin or end of a host-side TF Op.
-struct TfActivity {
-  // The timestamp in picoseconds when this activity happened.
-  uint64 timestamp_ps;
-  // The ID of this Op.
-  uint32 tf_op_id;
-  // Type of this activity.
-  TfActivityType activity_type;
-  // Full TF op name and type of this activity (backed by XEvent::name).
-  TfOp tf_op;
-};
-
-// TF Op metrics stored as element in OpStack.
-struct TfOpInfo {
-  explicit TfOpInfo(uint64 ts) : start_timestamp_ps(ts) {}
-
-  // Start timestamp in picoseconds.
-  uint64 start_timestamp_ps;
-  // Children duration in picoseconds.
-  uint64 children_duration_ps = 0;
-};
 }  // namespace profiler
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/profiler/utils/tf_op_utils.cc b/tensorflow/core/profiler/utils/tf_op_utils.cc
index ae9e8bd9c16..0453ba2eeaa 100644
--- a/tensorflow/core/profiler/utils/tf_op_utils.cc
+++ b/tensorflow/core/profiler/utils/tf_op_utils.cc
@@ -19,14 +19,17 @@ limitations under the License.
 
 #include "absl/strings/ascii.h"
 #include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/strip.h"
 #include "tensorflow/core/platform/regexp.h"
 
 namespace tensorflow {
 namespace profiler {
 namespace {
 
-constexpr absl::string_view kIterator = "Iterator";
-constexpr absl::string_view kSeparator = "::";
+const absl::string_view kIterator = "Iterator";
+const absl::string_view kSeparator = "::";
 
 }  // namespace
 
diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h
index 5e459ab214b..761f2ea2b46 100644
--- a/tensorflow/core/profiler/utils/tf_op_utils.h
+++ b/tensorflow/core/profiler/utils/tf_op_utils.h
@@ -16,13 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TF_OP_UTILS_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_TF_OP_UTILS_H_
 
-#include <utility>
-
 #include "absl/strings/match.h"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_split.h"
 #include "absl/strings/string_view.h"
-#include "absl/strings/strip.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -53,6 +48,16 @@ inline bool IsDatasetOp(absl::string_view tf_op_type) {
   return tf_op_type == kDatasetOp;
 }
 
+// Returns true if the given name is a TensorFlow Infeed Enqueue Op.
+inline bool IsInfeedEnqueueOp(absl::string_view tf_op_type) {
+  return tf_op_type == "InfeedEnqueue" || tf_op_type == "InfeedEnqueueTuple";
+}
+
+// Returns true if the given name is a TensorFlow embedding op.
+inline bool IsEmbeddingOp(absl::string_view tf_op_fullname) {
+  return absl::StrContains(tf_op_fullname, "Embedding");
+}
+
 }  // namespace profiler
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/profiler/utils/tf_op_utils_test.cc b/tensorflow/core/profiler/utils/tf_op_utils_test.cc
index ecf5bae328e..ff62c822e65 100644
--- a/tensorflow/core/profiler/utils/tf_op_utils_test.cc
+++ b/tensorflow/core/profiler/utils/tf_op_utils_test.cc
@@ -22,7 +22,7 @@ namespace profiler {
 namespace {
 
 TEST(TfOpUtilsTest, TfOpTest) {
-  constexpr absl::string_view kName = "OpName:OpType";
+  const absl::string_view kName = "OpName:OpType";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, "OpName");
   EXPECT_EQ(tf_op.type, "OpType");
@@ -30,7 +30,7 @@ TEST(TfOpUtilsTest, TfOpTest) {
 }
 
 TEST(TfOpUtilsTest, InternalTfOpTest) {
-  constexpr absl::string_view kName = "OpName:_InternalOpType";
+  const absl::string_view kName = "OpName:_InternalOpType";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, "OpName");
   EXPECT_EQ(tf_op.type, "_InternalOpType");
@@ -38,7 +38,7 @@ TEST(TfOpUtilsTest, InternalTfOpTest) {
 }
 
 TEST(TfOpUtilsTest, TfOpWithPathTest) {
-  constexpr absl::string_view kName = "path/to/name:OpType";
+  const absl::string_view kName = "path/to/name:OpType";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, "path/to/name");
   EXPECT_EQ(tf_op.type, "OpType");
@@ -46,7 +46,7 @@ TEST(TfOpUtilsTest, TfOpWithPathTest) {
 }
 
 TEST(TfOpUtilsTest, ShortDatasetOpTest) {
-  constexpr absl::string_view kName = "Iterator::Batch";
+  const absl::string_view kName = "Iterator::Batch";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, kName);
   EXPECT_TRUE(IsDatasetOp(tf_op.type));
@@ -54,7 +54,7 @@ TEST(TfOpUtilsTest, ShortDatasetOpTest) {
 }
 
 TEST(TfOpUtilsTest, LongDatasetOpTest) {
-  constexpr absl::string_view kName = "Iterator::Batch::Map::TfRecord";
+  const absl::string_view kName = "Iterator::Batch::Map::TfRecord";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, kName);
   EXPECT_TRUE(IsDatasetOp(tf_op.type));
@@ -62,7 +62,7 @@ TEST(TfOpUtilsTest, LongDatasetOpTest) {
 }
 
 TEST(TfOpUtilsTest, TraceMeTest) {
-  constexpr absl::string_view kName = "MyTraceMe";
+  const absl::string_view kName = "MyTraceMe";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, kName);
   EXPECT_TRUE(IsUnknownOp(tf_op.type));
@@ -71,7 +71,7 @@ TEST(TfOpUtilsTest, TraceMeTest) {
 
 TEST(TfOpUtilsTest, TraceMeWithColonTest) {
   // "12345" is not a valid op type.
-  constexpr absl::string_view kName = "RunStep/Server:54635";
+  const absl::string_view kName = "RunStep/Server:54635";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, kName);
   EXPECT_TRUE(IsUnknownOp(tf_op.type));
@@ -79,7 +79,7 @@ TEST(TfOpUtilsTest, TraceMeWithColonTest) {
 }
 
 TEST(TfOpUtilsTest, TraceMeWithDoubleColonTest) {
-  constexpr absl::string_view kName = "XLA::StartProgram";
+  const absl::string_view kName = "XLA::StartProgram";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, kName);
   EXPECT_TRUE(IsUnknownOp(tf_op.type));
@@ -87,8 +87,8 @@ TEST(TfOpUtilsTest, TraceMeWithDoubleColonTest) {
 }
 
 TEST(TfOpUtilsTest, TraceMeWithTrailingWhitespaceTest) {
-  constexpr absl::string_view kName = "SessionRun ";
-  constexpr absl::string_view kNameTrimmed = "SessionRun";
+  const absl::string_view kName = "SessionRun ";
+  const absl::string_view kNameTrimmed = "SessionRun";
   TfOp tf_op = ParseTfOpFullname(kName);
   EXPECT_EQ(tf_op.name, kName);
   EXPECT_TRUE(IsUnknownOp(tf_op.type));
diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc
index b05b851ef60..4af32c76457 100644
--- a/tensorflow/core/profiler/utils/xplane_schema.cc
+++ b/tensorflow/core/profiler/utils/xplane_schema.cc
@@ -15,15 +15,55 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 
+#include "absl/strings/string_view.h"
+
 namespace tensorflow {
 namespace profiler {
 
 const absl::string_view kHostThreads = "Host Threads";
 
-const int kNumStatTypes = static_cast<int>(StatType::kHloModule) + 1;
+constexpr int kNumHostEventTypes =
+    HostEventType::kLastHostEventType - HostEventType::kFirstHostEventType + 1;
 
-static const absl::string_view kStatTypeStrMap[kNumStatTypes] = {
-    "unknown",         "id",
+constexpr int kNumStatTypes =
+    StatType::kLastStatType - StatType::kFirstStatType + 1;
+
+static const absl::string_view kHostEventTypeMetadataMap[] = {
+    "UnknownHostEventType",
+    "TraceContext",
+    "SessionRun",
+    "FunctionRun",
+    "RunGraph",
+    "EagerKernelExecute",
+    "ExecutorState::Process",
+    "ExecutorDoneCallback",
+    // tf data captured function events.
+    "InstantiatedCapturedFunction::Run",
+    "InstantiatedCapturedFunction::RunWithBorrowedArgs",
+    "InstantiatedCapturedFunction::RunInstantiated",
+    "InstantiatedCapturedFunction::RunAsync",
+    // Functional ops.
+    "CallOp",
+    "ParallelForOp",
+    "ForeverOp",
+    "NumericalGradientOp-EvalRight",
+    "NumericalGradientOp-EvalLeft",
+    "SymbolicGradientOp",
+    "RemoteCallOp",
+    "IfOp",
+    "CaseOp",
+    "WhileOp-EvalCond",
+    "WhileOp-StartBody",
+    "ForOp",
+    "PartitionedCallOp",
+};
+
+static_assert(sizeof(kHostEventTypeMetadataMap) / sizeof(absl::string_view) ==
+                  kNumHostEventTypes,
+              "Mismatch between enum and string map.");
+
+static const absl::string_view kStatTypeStrMap[] = {
+    "UnknownStatType", "id",
     "parent_step_id",  "function_step_id",
     "device_ordinal",  "chip_ordinal",
     "node_ordinal",    "model_id",
@@ -39,6 +79,14 @@ static const absl::string_view kStatTypeStrMap[kNumStatTypes] = {
     "hlo_module",
 };
 
+static_assert(sizeof(kStatTypeStrMap) / sizeof(absl::string_view) ==
+                  kNumStatTypes,
+              "Mismatch between enum and string map.");
+
+absl::Span<const absl::string_view> GetHostEventTypeStrMap() {
+  return absl::MakeConstSpan(kHostEventTypeMetadataMap, kNumHostEventTypes);
+}
+
 absl::Span<const absl::string_view> GetStatTypeStrMap() {
   return absl::MakeConstSpan(kStatTypeStrMap, kNumStatTypes);
 }
diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h
index 27977858604..4216450d653 100644
--- a/tensorflow/core/profiler/utils/xplane_schema.h
+++ b/tensorflow/core/profiler/utils/xplane_schema.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
 
+#include "absl/strings/match.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/core/platform/logging.h"
@@ -26,8 +27,42 @@ namespace profiler {
 // Name of XPlane that contains TraceMe events.
 ABSL_CONST_INIT extern const absl::string_view kHostThreads;
 
+// Interesting event types (i.e., TraceMe names).
+enum HostEventType {
+  kFirstHostEventType = 0,
+  kUnknownHostEventType = kFirstHostEventType,
+  kTraceContext,
+  kSessionRun,
+  kFunctionRun,
+  kRunGraph,
+  kEagerKernelExecute,
+  kExecutorStateProcess,
+  kExecutorDoneCallback,
+  // tf.data captured function events.
+  kTfDataCapturedFunctionRun,
+  kTfDataCapturedFunctionRunWithBorrowedArgs,
+  kTfDataCapturedFunctionRunInstantiated,
+  kTfDataCapturedFunctionRunAsync,
+  // Functional ops.
+  kCallOp,
+  kParallelForOp,
+  kForeverOp,
+  kNumericalGradientOpEvalRight,
+  kNumericalGradientOpEvalLeft,
+  kSymbolicGradientOp,
+  kRemoteCallOp,
+  kIfOp,
+  kCaseOp,
+  kWhileOpEvalCond,
+  kWhileOpStartBody,
+  kForOp,
+  kPartitionedCallOp,
+  kLastHostEventType = kPartitionedCallOp,
+};
+
 enum StatType {
-  kUnknown = 0,
+  kFirstStatType = 0,
+  kUnknownStatType = kFirstStatType,
   // TraceMe arguments.
   kStepId,
   kParentStepId,
@@ -56,14 +91,23 @@ enum StatType {
   kTfOp,
   kHloOp,
   kHloModule,
+  kLastStatType = kHloModule,
 };
 
-ABSL_CONST_INIT extern const int kNumStatTypes;
+absl::Span<const absl::string_view> GetHostEventTypeStrMap();
+
+inline absl::string_view GetHostEventTypeStr(HostEventType event_type) {
+  return GetHostEventTypeStrMap()[event_type];
+}
+
+inline bool IsHostEventType(HostEventType event_type,
+                            absl::string_view event_name) {
+  return GetHostEventTypeStrMap()[event_type] == event_name;
+}
 
 absl::Span<const absl::string_view> GetStatTypeStrMap();
 
 inline absl::string_view GetStatTypeStr(StatType stat_type) {
-  DCHECK_LT(stat_type, kNumStatTypes);
   return GetStatTypeStrMap()[stat_type];
 }
 
diff --git a/tensorflow/lite/micro/examples/magic_wand/angle_micro_features_data.h b/tensorflow/core/profiler/utils/xplane_utils.cc
similarity index 62%
rename from tensorflow/lite/micro/examples/magic_wand/angle_micro_features_data.h
rename to tensorflow/core/profiler/utils/xplane_utils.cc
index 43d85a68d2f..f37ac16d692 100644
--- a/tensorflow/lite/micro/examples/magic_wand/angle_micro_features_data.h
+++ b/tensorflow/core/profiler/utils/xplane_utils.cc
@@ -12,12 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
 
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MAGIC_WAND_ANGLE_MICRO_FEATURES_DATA_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_MAGIC_WAND_ANGLE_MICRO_FEATURES_DATA_H_
+namespace tensorflow {
+namespace profiler {
 
-extern const int g_angle_micro_f2e59fea_nohash_1_length;
-extern const int g_angle_micro_f2e59fea_nohash_1_dim;
-extern const float g_angle_micro_f2e59fea_nohash_1_data[];
+const XPlane* FindPlaneWithName(const XSpace& space, absl::string_view name) {
+  for (const XPlane& plane : space.planes()) {
+    if (plane.name() == name) return &plane;
+  }
+  return nullptr;
+}
 
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_MAGIC_WAND_ANGLE_MICRO_FEATURES_DATA_H_
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/lite/micro/examples/magic_wand/circle_micro_features_data.h b/tensorflow/core/profiler/utils/xplane_utils.h
similarity index 57%
rename from tensorflow/lite/micro/examples/magic_wand/circle_micro_features_data.h
rename to tensorflow/core/profiler/utils/xplane_utils.h
index 96e0532ebd7..ef5298c3b8a 100644
--- a/tensorflow/lite/micro/examples/magic_wand/circle_micro_features_data.h
+++ b/tensorflow/core/profiler/utils/xplane_utils.h
@@ -12,12 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_UTILS_H_
+#define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_UTILS_H_
 
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MAGIC_WAND_CIRCLE_MICRO_FEATURES_DATA_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_MAGIC_WAND_CIRCLE_MICRO_FEATURES_DATA_H_
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 
-extern const int g_circle_micro_f9643d42_nohash_4_length;
-extern const int g_circle_micro_f9643d42_nohash_4_dim;
-extern const float g_circle_micro_f9643d42_nohash_4_data[];
+namespace tensorflow {
+namespace profiler {
 
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_MAGIC_WAND_CIRCLE_MICRO_FEATURES_DATA_H_
+// Returns the plane with the given name or nullptr if not found.
+const XPlane* FindPlaneWithName(const XSpace& space, absl::string_view name);
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_UTILS_H_
diff --git a/tensorflow/core/profiler/utils/xplane_visitor.h b/tensorflow/core/profiler/utils/xplane_visitor.h
index d204a444f2d..ed6e79b3f82 100644
--- a/tensorflow/core/profiler/utils/xplane_visitor.h
+++ b/tensorflow/core/profiler/utils/xplane_visitor.h
@@ -43,28 +43,13 @@ class XStatVisitor {
 
   XStat::ValueCase ValueCase() const { return stat_->value_case(); }
 
-  template <typename Number>
-  Number Value() const {
-    switch (stat_->value_case()) {
-      case XStat::kDoubleValue:
-        return stat_->double_value();
-      case XStat::kUint64Value:
-        return stat_->uint64_value();
-      case XStat::kInt64Value:
-        return stat_->int64_value();
-      case XStat::kStrValue:
-      case XStat::VALUE_NOT_SET:
-        return 0;
-    }
-  }
+  int64 IntValue() const { return stat_->int64_value(); }
 
-  template <>
-  absl::string_view Value() const {
-    if (stat_->value_case() == XStat::kStrValue) {
-      return stat_->str_value();
-    }
-    return absl::string_view();
-  }
+  uint64 UintValue() const { return stat_->uint64_value(); }
+
+  double DoubleValue() const { return stat_->double_value(); }
+
+  absl::string_view StrValue() const { return stat_->str_value(); }
 
   const XStat& RawStat() const { return *stat_; }
 
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index bce52c64434..4f9b0aa5359 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -367,6 +367,17 @@ message ConfigProto {
   // The execution of an individual op (for some op types) can be
   // parallelized on a pool of intra_op_parallelism_threads.
   // 0 means the system picks an appropriate number.
+  //
+  // If you create an ordinary session, e.g., from Python or C++,
+  // then there is exactly one intra op thread pool per process.
+  // The first session created determines the number of threads in this pool.
+  // All subsequent sessions reuse/share this one global pool.
+  //
+  // There are notable exceptions to the default behavior describe above:
+  // 1. There is an environment variable  for overriding this thread pool,
+  //    named TF_OVERRIDE_GLOBAL_THREADPOOL.
+  // 2. When connecting to a server, such as a remote `tf.train.Server`
+  //    instance, then this option will be ignored altogether.
   int32 intra_op_parallelism_threads = 2;
 
   // Nodes that perform blocking operations are enqueued on a pool of
diff --git a/tensorflow/core/protobuf/debug_event.proto b/tensorflow/core/protobuf/debug_event.proto
index 8f9680f38d9..badd518fa69 100644
--- a/tensorflow/core/protobuf/debug_event.proto
+++ b/tensorflow/core/protobuf/debug_event.proto
@@ -100,6 +100,9 @@ message DebugEvent {
     // The ID of the graph (i.e., FuncGraph) executed here: applicable only
     // to the execution of a FuncGraph.
     string graph_id = 11;
+
+    // A device on which debugger-instrumented ops and/or tensors reside.
+    DebuggedDevice debugged_device = 12;
   }
 }
 
@@ -162,6 +165,7 @@ message GraphOpCreation {
   string graph_name = 3;
 
   // Unique ID of the graph (generated by debugger).
+  // This is the ID of the immediately-enclosing graph.
   string graph_id = 4;
 
   // Name of the device that the op is assigned to (if available).
@@ -204,6 +208,18 @@ message DebuggedGraph {
   string outer_context_id = 6;
 }
 
+// A device on which ops and/or tensors are instrumented by the debugger.
+message DebuggedDevice {
+  // Name of the device.
+  string device_name = 1;
+
+  // A debugger-generated ID for the device. Guaranteed to be unique within
+  // the scope of the debugged TensorFlow program, including single-host and
+  // multi-host settings.
+  // TODO(cais): Test the uniqueness guarantee in multi-host settings.
+  int32 device_id = 2;
+}
+
 // Data relating to the eager execution of an op or a Graph.
 // For a op that generates N output tensors (N >= 0), only one
 // Execution proto will be used to describe the execution event.
@@ -236,6 +252,11 @@ message Execution {
   // Stack trace of the eager execution.
   CodeLocation code_location = 8;
 
+  // Debugged-generated IDs of the devices on which the output tensors reside.
+  // To look up details about the device (e.g., name), cross-reference this
+  // field with the DebuggedDevice messages.
+  repeated int32 output_tensor_device_ids = 9;
+
   // TODO(cais): When backporting to V1 Session.run() support, add more fields
   // such as fetches and feeds.
 }
diff --git a/tensorflow/core/public/BUILD b/tensorflow/core/public/BUILD
new file mode 100644
index 00000000000..9a5a8c924f4
--- /dev/null
+++ b/tensorflow/core/public/BUILD
@@ -0,0 +1,30 @@
+package(
+    default_visibility = [
+        "//tensorflow/core:__subpackages__",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+exports_files(
+    srcs = [
+        "session.h",
+        "session_options.h",
+        "version.h",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "mobile_srcs_no_runtime",
+    srcs = [
+        "session.h",
+        "session_options.h",
+        "version.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+cc_library(
+    name = "version",
+    hdrs = ["version.h"],
+)
diff --git a/tensorflow/core/public/session.h b/tensorflow/core/public/session.h
index 5a53c49c6cf..772b57b5d20 100644
--- a/tensorflow/core/public/session.h
+++ b/tensorflow/core/public/session.h
@@ -174,6 +174,19 @@ class Session {
                      const std::vector<string>& target_node_names,
                      std::vector<Tensor>* outputs, RunMetadata* run_metadata);
 
+  /// \brief Like `Run` with `RunOptions` proto, but allows user to provide
+  /// custom threadpool implementation via ThreadPoolOptions.
+  /// NOTE: This API is still experimental and may change.
+  virtual Status Run(const RunOptions& run_options,
+                     const std::vector<std::pair<string, Tensor> >& inputs,
+                     const std::vector<string>& output_tensor_names,
+                     const std::vector<string>& target_node_names,
+                     std::vector<Tensor>* outputs, RunMetadata* run_metadata,
+                     const thread::ThreadPoolOptions& threadpool_options) {
+    return errors::Unimplemented(
+        "Run with threadpool is not supported for this session.");
+  }
+
   /// \brief Sets up a graph for partial execution. All future feeds and
   /// fetches are specified by `input_names` and `output_names`. Returns
   /// `handle` that can be used to perform a sequence of partial feeds and
@@ -245,7 +258,8 @@ class Session {
   }
 
   /// \brief Invokes the subgraph named by `handle` with the given options and
-  /// input tensors.
+  /// input tensors. User can provide custom threadpool implementation via
+  /// threadpool_options.
   ///
   /// The order of tensors in `feed_tensors` must and `fetch_tensors` will
   /// match the order of names in `CallableOptions::feed()` and
diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD
index dad3ca406b4..8568507cafd 100644
--- a/tensorflow/core/util/BUILD
+++ b/tensorflow/core/util/BUILD
@@ -1,13 +1,21 @@
 load(
     "//tensorflow/core/platform:build_config.bzl",
+    "tf_kernel_tests_linkstatic",
     "tf_proto_library",
 )
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "tf_cc_tests",
     "tf_copts",
+    "tf_cuda_library",
+    "tf_cuda_only_cc_test",
 )
 load("//tensorflow:tensorflow.bzl", "tf_version_info_genrule")
+load(
+    "//third_party/mkl:build_defs.bzl",
+    "mkl_deps",
+)
 
 package(
     default_visibility = [
@@ -20,132 +28,51 @@ package(
 # This avoids breading all the rules in tensorflow/core/BUILD.
 exports_files(
     srcs = [
-        "activation_mode.cc",
         "activation_mode.h",
-        "batch_util.cc",
         "batch_util.h",
-        "bcast.cc",
         "bcast.h",
-        "command_line_flags.cc",
         "command_line_flags.h",
-        "debug_events_writer.cc",
         "debug_events_writer.h",
-        "device_name_utils.cc",
         "device_name_utils.h",
-        "dump_graph.cc",
         "dump_graph.h",
-        "einsum_op_util.cc",
         "einsum_op_util.h",
-        "env_var.cc",
         "env_var.h",
-        "equal_graph_def.cc",
         "equal_graph_def.h",
-        "events_writer.cc",
         "events_writer.h",
-        "example_proto_fast_parsing.cc",
         "example_proto_fast_parsing.h",
-        "example_proto_helper.cc",
         "example_proto_helper.h",
-        "exec_on_stall.h",
-        "gpu_cuda_alias.h",
-        "gpu_device_functions.h",
         "gpu_kernel_helper.h",
-        "gpu_kernel_helper_test.cu.cc",
         "gpu_launch_config.h",
-        "guarded_philox_random.cc",
         "guarded_philox_random.h",
-        "matmul_autotune.cc",
         "matmul_autotune.h",
-        "matmul_bcast.cc",
         "matmul_bcast.h",
-        "memmapped_file_system.cc",
         "memmapped_file_system.h",
-        "memmapped_file_system_writer.cc",
         "memmapped_file_system_writer.h",
-        "mirror_pad_mode.cc",
         "mirror_pad_mode.h",
         "mkl_util.h",
-        "overflow.h",
-        "padding.cc",
         "padding.h",
         "permutation_input_iterator.h",
         "permutation_output_iterator.h",
-        "port.cc",
         "port.h",
-        "presized_cuckoo_map.h",
-        "ptr_util.h",
-        "reffed_status_callback.h",
-        "reporter.cc",
         "reporter.h",
-        "saved_tensor_slice_util.cc",
         "saved_tensor_slice_util.h",
-        "stat_summarizer.cc",
         "stat_summarizer.h",
         "stat_summarizer_options.h",
-        "stats_calculator.cc",
         "stats_calculator.h",
-        "stream_executor_util.h",
-        "strided_slice_op.cc",
         "strided_slice_op.h",
-        "tensor_format.cc",
         "tensor_format.h",
-        "tensor_ops_util.h",
-        "tensor_slice_reader.cc",
         "tensor_slice_reader.h",
-        "tensor_slice_reader_cache.cc",
         "tensor_slice_reader_cache.h",
-        "tensor_slice_set.cc",
         "tensor_slice_set.h",
-        "tensor_slice_util.h",
-        "tensor_slice_writer.cc",
         "tensor_slice_writer.h",
-        "transform_output_iterator.h",
-        "use_cudnn.cc",
         "use_cudnn.h",
-        "util.cc",
         "util.h",
-        "work_sharder.cc",
         "work_sharder.h",
-        "xla_config_registry.cc",
         "xla_config_registry.h",
     ],
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
-# List of exported test source files that do not yet have local build rules.
-exports_files(
-    srcs = [
-        "bcast_test.cc",
-        "command_line_flags_test.cc",
-        "debug_events_writer_test.cc",
-        "device_name_utils_test.cc",
-        "dump_graph_test.cc",
-        "equal_graph_def_test.cc",
-        "events_writer_test.cc",
-        "example_proto_fast_parsing_test.cc",
-        "example_proto_helper_test.cc",
-        "exec_on_stall_test.cc",
-        "matmul_bcast_test.cc",
-        "memmapped_file_system_test.cc",
-        "mkl_util_test.cc",
-        "overflow_test.cc",
-        "presized_cuckoo_map_test.cc",
-        "reffed_status_callback_test.cc",
-        "reporter_test.cc",
-        "saved_tensor_slice_util_test.cc",
-        "semver_test.cc",
-        "stat_summarizer_test.cc",
-        "stats_calculator_test.cc",
-        "tensor_format_test.cc",
-        "tensor_slice_reader_test.cc",
-        "tensor_slice_set_test.cc",
-        "tensor_slice_util_test.cc",
-        "tensor_slice_writer_test.cc",
-        "work_sharder_test.cc",
-    ],
-    visibility = ["//tensorflow/core:__pkg__"],
-)
-
 # List of exported proto source files.
 exports_files(
     srcs = [
@@ -234,6 +161,22 @@ filegroup(
     ),
 )
 
+filegroup(
+    name = "lib_internal_public_hdrs",
+    srcs = [
+        "env_var.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+filegroup(
+    name = "lib_hdrs",
+    srcs = [
+        "gpu_cuda_alias.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
 filegroup(
     name = "memmapped_file_system_hdrs",
     srcs = [
@@ -261,6 +204,85 @@ filegroup(
     ],
 )
 
+filegroup(
+    name = "test_hdrs",
+    srcs = [
+        "reporter.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+filegroup(
+    name = "mkl_util_hdrs",
+    srcs = [
+        "mkl_util.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+filegroup(
+    name = "mkl_util_test_srcs",
+    srcs = [
+        "mkl_util_test.cc",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+filegroup(
+    name = "android_test_hdrs",
+    srcs = [
+        "reporter.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+filegroup(
+    name = "android_test_srcs",
+    srcs = [
+        "reporter.cc",
+        ":android_test_hdrs",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+filegroup(
+    name = "framework_srcs",
+    srcs = [
+        "activation_mode.h",
+        "batch_util.h",
+        "bcast.h",
+        "debug_events_writer.h",
+        "device_name_utils.h",
+        "dump_graph.h",
+        "einsum_op_util.h",
+        "events_writer.h",
+        "example_proto_fast_parsing.h",
+        "example_proto_helper.h",
+        "gpu_kernel_helper.h",
+        "guarded_philox_random.h",
+        "matmul_autotune.h",
+        "matmul_bcast.h",
+        "mirror_pad_mode.h",
+        "padding.h",
+        "port.h",
+        "ptr_util.h",
+        "reffed_status_callback.h",
+        "saved_tensor_slice_util.h",
+        "stat_summarizer.h",
+        "stat_summarizer_options.h",
+        "stream_executor_util.h",
+        "strided_slice_op.h",
+        "tensor_format.h",
+        "tensor_ops_util.h",
+        "tensor_slice_reader.h",
+        "tensor_slice_reader_cache.h",
+        "tensor_slice_writer.h",
+        "use_cudnn.h",
+        "util.h",
+        "work_sharder.h",
+    ],
+)
+
 # Version info generation needs to be generated in the same package where it
 # is written.
 tf_version_info_genrule(
@@ -321,6 +343,159 @@ cc_library(
     alwayslink = 1,
 )
 
+tf_cuda_library(
+    name = "gpu_cuda_alias",
+    hdrs = ["gpu_cuda_alias.h"],
+)
+
+tf_cuda_library(
+    name = "gpu_device_functions",
+    hdrs = ["gpu_device_functions.h"],
+    deps = [
+        ":gpu_cuda_alias",
+        "//tensorflow/core/platform:types",
+        "//third_party/eigen3",
+    ],
+)
+
+cc_library(
+    name = "overflow",
+    hdrs = ["overflow.h"],
+    deps = [
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:macros",
+        "//tensorflow/core/platform:types",
+    ],
+)
+
+cc_library(
+    name = "exec_on_stall",
+    hdrs = ["exec_on_stall.h"],
+    deps = [
+        "//tensorflow/core/platform:env",
+        "//tensorflow/core/platform:mutex",
+    ],
+)
+
+cc_library(
+    name = "ptr_util",
+    hdrs = ["ptr_util.h"],
+)
+
+cc_library(
+    name = "version_info",
+    srcs = ["version_info.cc"],
+    hdrs = ["//tensorflow/core/public:version.h"],
+    copts = tf_copts(),
+)
+
+# Tests.
+
+tf_cc_test(
+    name = "overflow_test",
+    size = "small",
+    srcs = ["overflow_test.cc"],
+    deps = [
+        ":overflow",
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+tf_cc_test(
+    name = "exec_on_stall_test",
+    size = "small",
+    srcs = ["exec_on_stall_test.cc"],
+    deps = [
+        ":exec_on_stall",
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+tf_cuda_only_cc_test(
+    name = "gpu_kernel_helper_test",
+    srcs = [
+        "gpu_kernel_helper_test.cu.cc",
+    ],
+    deps = [
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//third_party/eigen3",
+    ] + mkl_deps(),
+)
+
+tf_cc_tests(
+    name = "higher_level_tests",
+    size = "small",
+    srcs = [
+        "bcast_test.cc",
+        "command_line_flags_test.cc",
+        "debug_events_writer_test.cc",
+        "device_name_utils_test.cc",
+        "dump_graph_test.cc",
+        "equal_graph_def_test.cc",
+        "events_writer_test.cc",
+        "example_proto_fast_parsing_test.cc",
+        "example_proto_helper_test.cc",
+        "matmul_bcast_test.cc",
+        "memmapped_file_system_test.cc",
+        "presized_cuckoo_map_test.cc",
+        "reffed_status_callback_test.cc",
+        "reporter_test.cc",
+        "saved_tensor_slice_util_test.cc",
+        "semver_test.cc",
+        "stat_summarizer_test.cc",
+        "tensor_format_test.cc",
+        "tensor_slice_reader_test.cc",
+        "tensor_slice_set_test.cc",
+        "tensor_slice_util_test.cc",
+        "tensor_slice_writer_test.cc",
+        "work_sharder_test.cc",
+    ],
+    create_named_test_suite = True,
+    linkopts = select({
+        "//tensorflow:macos": ["-headerpad_max_install_names"],
+        "//conditions:default": [],
+    }),
+    linkstatic = tf_kernel_tests_linkstatic(),
+    visibility = [
+        "//tensorflow/core:__pkg__",
+    ],
+    deps = [
+        ":protos_test_cc",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
+        "//tensorflow/cc:function_ops",
+        "//tensorflow/cc:ops",
+        "//tensorflow/cc:scope",
+        "//tensorflow/cc:sendrecv_ops",
+        "//tensorflow/cc:while_loop",
+        "//tensorflow/core",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:direct_session_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_util",
+        "//tensorflow/core/platform:regexp",
+        "//third_party/eigen3",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 # Proto libraries.
 tf_proto_library(
     name = "test_log_proto_impl",
diff --git a/tensorflow/core/util/batch_util.cc b/tensorflow/core/util/batch_util.cc
index 556359bf749..51cd1dbdc6d 100644
--- a/tensorflow/core/util/batch_util.cc
+++ b/tensorflow/core/util/batch_util.cc
@@ -42,17 +42,17 @@ Status ValidateInput(const Tensor& parent, const Tensor& element, int64 index) {
 }
 
 template <typename T>
-Status HandleElementToSlice(T* src, T* dest, int64 num_values,
-                            bool /* can_move */) {
+Status HandleElementToSlice(const Tensor& /* element */, T* src, T* dest,
+                            int64 num_values) {
   static_assert(is_simple_type<T>::value, "Memcpy requires a simple type.");
   memcpy(dest, src, num_values * sizeof(T));
   return Status::OK();
 }
 
 template <>
-Status HandleElementToSlice<tstring>(tstring* src, tstring* dest,
-                                     int64 num_values, bool can_move) {
-  if (can_move) {
+Status HandleElementToSlice<tstring>(const Tensor& element, tstring* src,
+                                     tstring* dest, int64 num_values) {
+  if (element.RefCountIsOne()) {
     for (int64 i = 0; i < num_values; ++i) {
       *dest++ = std::move(*src++);
     }
@@ -63,9 +63,9 @@ Status HandleElementToSlice<tstring>(tstring* src, tstring* dest,
 }
 
 template <>
-Status HandleElementToSlice<Variant>(Variant* src, Variant* dest,
-                                     int64 num_values, bool can_move) {
-  if (can_move) {
+Status HandleElementToSlice<Variant>(const Tensor& element, Variant* src,
+                                     Variant* dest, int64 num_values) {
+  if (element.RefCountIsOne()) {
     for (int64 i = 0; i < num_values; ++i) {
       *dest++ = std::move(*src++);
     }
@@ -76,18 +76,18 @@ Status HandleElementToSlice<Variant>(Variant* src, Variant* dest,
 }
 
 template <>
-Status HandleElementToSlice<ResourceHandle>(ResourceHandle* src,
+Status HandleElementToSlice<ResourceHandle>(const Tensor& /* element */,
+                                            ResourceHandle* src,
                                             ResourceHandle* dest,
-                                            int64 num_values,
-                                            bool /* can_move */) {
+                                            int64 num_values) {
   std::copy_n(src, num_values, dest);
   return Status::OK();
 }
 
 template <>
-Status HandleElementToSlice<Eigen::half>(Eigen::half* src, Eigen::half* dest,
-                                         int64 num_values,
-                                         bool /* can_move */) {
+Status HandleElementToSlice<Eigen::half>(const Tensor& /* element */,
+                                         Eigen::half* src, Eigen::half* dest,
+                                         int64 num_values) {
   std::copy_n(src, num_values, dest);
   return Status::OK();
 }
@@ -99,17 +99,16 @@ void HandleSliceToElement(const Tensor& parent, Tensor* element, int64 index) {
 }
 
 template <typename T>
-void HandleSliceToElement(Tensor* parent, Tensor* element, int64 index,
-                          bool can_move) {
+void HandleSliceToElement(Tensor* parent, Tensor* element, int64 index) {
   element->flat<T>() = parent->flat_outer_dims<T>().chip(index, 0);
 }
 
 template <>
-void HandleSliceToElement<tstring>(Tensor* parent, Tensor* element, int64 index,
-                                   bool can_move) {
+void HandleSliceToElement<tstring>(Tensor* parent, Tensor* element,
+                                   int64 index) {
   auto parent_as_matrix = parent->flat_outer_dims<tstring>();
   auto element_flat = element->flat<tstring>();
-  if (can_move) {
+  if (parent->RefCountIsOne()) {
     for (int64 i = 0; i < element->NumElements(); ++i) {
       element_flat(i) = std::move(parent_as_matrix(index, i));
     }
@@ -119,11 +118,11 @@ void HandleSliceToElement<tstring>(Tensor* parent, Tensor* element, int64 index,
 }
 
 template <>
-void HandleSliceToElement<Variant>(Tensor* parent, Tensor* element, int64 index,
-                                   bool can_move) {
+void HandleSliceToElement<Variant>(Tensor* parent, Tensor* element,
+                                   int64 index) {
   auto parent_as_matrix = parent->flat_outer_dims<Variant>();
   auto element_flat = element->flat<Variant>();
-  if (can_move) {
+  if (parent->RefCountIsOne()) {
     for (int64 i = 0; i < element->NumElements(); ++i) {
       element_flat(i) = std::move(parent_as_matrix(index, i));
     }
@@ -138,12 +137,11 @@ void HandleSliceToElement<Variant>(Tensor* parent, Tensor* element, int64 index,
 Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
   TF_RETURN_IF_ERROR(ValidateInput(*parent, element, index));
   const int64 num_values = element.NumElements();
-  bool can_move = element.RefCountIsOne();
-#define HANDLE_TYPE(T)                                               \
-  case DataTypeToEnum<T>::value: {                                   \
-    T* src = element.base<T>();                                      \
-    T* dest = parent->base<T>() + (num_values * index);              \
-    return HandleElementToSlice<T>(src, dest, num_values, can_move); \
+#define HANDLE_TYPE(T)                                              \
+  case DataTypeToEnum<T>::value: {                                  \
+    T* src = element.base<T>();                                     \
+    T* dest = parent->base<T>() + (num_values * index);             \
+    return HandleElementToSlice<T>(element, src, dest, num_values); \
   }
 
   switch (element.dtype()) {
@@ -186,12 +184,11 @@ Status CopySliceToElement(const Tensor& parent, Tensor* element, int64 index) {
 // This is particularly important for DT_STRING tensors.
 Status MaybeMoveSliceToElement(Tensor* parent, Tensor* element, int64 index) {
   TF_RETURN_IF_ERROR(ValidateInput(*parent, *element, index));
-  bool can_move = parent->RefCountIsOne();
 
-#define HANDLE_TYPE(T)                                         \
-  case DataTypeToEnum<T>::value: {                             \
-    HandleSliceToElement<T>(parent, element, index, can_move); \
-    return Status::OK();                                       \
+#define HANDLE_TYPE(T)                               \
+  case DataTypeToEnum<T>::value: {                   \
+    HandleSliceToElement<T>(parent, element, index); \
+    return Status::OK();                             \
   }
 
   switch (parent->dtype()) {
diff --git a/tensorflow/core/util/debug_events_writer.cc b/tensorflow/core/util/debug_events_writer.cc
index 58994e7a9fd..595f92d07c0 100644
--- a/tensorflow/core/util/debug_events_writer.cc
+++ b/tensorflow/core/util/debug_events_writer.cc
@@ -322,6 +322,23 @@ void DebugEventsWriter::WriteSerializedExecutionDebugEvent(
   }
 }
 
+int DebugEventsWriter::RegisterDeviceAndGetId(const string& device_name) {
+  mutex_lock l(device_mu_);
+  int& device_id = device_name_to_id_[device_name];
+  if (device_id == 0) {
+    device_id = device_name_to_id_.size();
+    DebugEvent debug_event;
+    MaybeSetDebugEventTimestamp(&debug_event, env_);
+    DebuggedDevice* debugged_device = debug_event.mutable_debugged_device();
+    debugged_device->set_device_name(device_name);
+    debugged_device->set_device_id(device_id);
+    string serialized;
+    debug_event.SerializeToString(&serialized);
+    graphs_writer_->WriteSerializedDebugEvent(serialized);
+  }
+  return device_id;
+}
+
 Status DebugEventsWriter::FlushNonExecutionFiles() {
   TF_RETURN_IF_ERROR(Init());
   if (source_files_writer_ != nullptr) {
@@ -448,7 +465,9 @@ DebugEventsWriter::DebugEventsWriter(const string& dump_root,
       execution_buffer_(),
       execution_buffer_mu_(),
       graph_execution_trace_buffer_(),
-      graph_execution_trace_buffer_mu_() {}
+      graph_execution_trace_buffer_mu_(),
+      device_name_to_id_(),
+      device_mu_() {}
 
 Status DebugEventsWriter::InitNonMetadataFile(DebugEventFileType type) {
   std::unique_ptr<SingleDebugEventFileWriter>* writer = nullptr;
diff --git a/tensorflow/core/util/debug_events_writer.h b/tensorflow/core/util/debug_events_writer.h
index 951dcba1dfb..78c23e3b851 100644
--- a/tensorflow/core/util/debug_events_writer.h
+++ b/tensorflow/core/util/debug_events_writer.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <deque>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/record_writer.h"
@@ -177,6 +178,11 @@ class DebugEventsWriter {
   void WriteSerializedExecutionDebugEvent(const string& debug_event_str,
                                           DebugEventFileType type);
 
+  // Given name of the device, retrieve a unique integer ID. As a side effect,
+  // if this is the first time this object encounters the device name,
+  // writes a DebuggedDevice proto to the .graphs file in the file set.
+  int RegisterDeviceAndGetId(const string& device_name);
+
   // EventWriter automatically flushes and closes on destruction, but
   // this method is provided for users who want to write to disk sooner
   // and/or check for success.
@@ -233,6 +239,9 @@ class DebugEventsWriter {
       GUARDED_BY(graph_execution_trace_buffer_mu_);
   mutex graph_execution_trace_buffer_mu_;
 
+  absl::flat_hash_map<string, int> device_name_to_id_ GUARDED_BY(device_mu_);
+  mutex device_mu_;
+
   std::unique_ptr<SingleDebugEventFileWriter> metadata_writer_;
   std::unique_ptr<SingleDebugEventFileWriter> source_files_writer_;
   std::unique_ptr<SingleDebugEventFileWriter> stack_frames_writer_;
diff --git a/tensorflow/core/util/debug_events_writer_test.cc b/tensorflow/core/util/debug_events_writer_test.cc
index 6ce7a063b92..e442a417e99 100644
--- a/tensorflow/core/util/debug_events_writer_test.cc
+++ b/tensorflow/core/util/debug_events_writer_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -756,6 +757,50 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
   EXPECT_EQ(actuals.size(), 0);
 }
 
+TEST_F(DebugEventsWriterTest, RegisterDeviceAndGetIdTrace) {
+  DebugEventsWriter* writer =
+      DebugEventsWriter::GetDebugEventsWriter(dump_root_);
+  TF_ASSERT_OK(writer->Init());
+
+  // Register and get some device IDs in a concurrent fashion.
+  thread::ThreadPool* thread_pool =
+      new thread::ThreadPool(Env::Default(), "test_pool", 8);
+  int device_ids[8];
+  for (int i = 0; i < 8; ++i) {
+    thread_pool->Schedule([i, &writer, &device_ids]() {
+      const string device_name = strings::Printf(
+          "/job:localhost/replica:0/task:0/device:GPU:%d", i % 4);
+      device_ids[i] = writer->RegisterDeviceAndGetId(device_name);
+    });
+  }
+  delete thread_pool;
+  TF_ASSERT_OK(writer->FlushNonExecutionFiles());
+  TF_ASSERT_OK(writer->Close());
+
+  // There should be only 4 unique device IDs, because there are only 4 unique
+  // device names.
+  EXPECT_EQ(device_ids[0], device_ids[4]);
+  EXPECT_EQ(device_ids[1], device_ids[5]);
+  EXPECT_EQ(device_ids[2], device_ids[6]);
+  EXPECT_EQ(device_ids[3], device_ids[7]);
+  // Assert that the four device IDs are all unique.
+  EXPECT_EQ(absl::flat_hash_set<int>(device_ids, device_ids + 8).size(), 4);
+
+  std::vector<DebugEvent> actuals;
+  ReadDebugEventProtos(writer, DebugEventFileType::GRAPHS, &actuals);
+  // Due to the `% 4`, there are only 4 unique device names, even though there
+  // are 8 threads each calling `RegisterDeviceAndGetId`.
+  EXPECT_EQ(actuals.size(), 4);
+  for (const DebugEvent& actual : actuals) {
+    const string& device_name = actual.debugged_device().device_name();
+    int device_index = -1;
+    CHECK(absl::SimpleAtoi(device_name.substr(strlen(
+                               "/job:localhost/replica:0/task:0/device:GPU:")),
+                           &device_index));
+    EXPECT_EQ(actual.debugged_device().device_id(), device_ids[device_index]);
+  }
+}
+
 TEST_F(DebugEventsWriterTest, DisableCyclicBufferBeahavior) {
   const size_t kCyclicBufferSize = 0;  // A value <= 0 disables cyclic behavior.
   DebugEventsWriter* writer =
diff --git a/tensorflow/core/util/device_name_utils_test.cc b/tensorflow/core/util/device_name_utils_test.cc
index 49bce7a82fc..24657ae1d95 100644
--- a/tensorflow/core/util/device_name_utils_test.cc
+++ b/tensorflow/core/util/device_name_utils_test.cc
@@ -426,8 +426,6 @@ static void MergeOverrideHelper(const string& target, const string& name,
 }
 
 TEST(DeviceNameUtilsTest, MergeDevNames) {
-  DeviceNameUtils::ParsedName target;
-
   // Idempotence tests.
   MergeDevNamesHelper("", "", "");
   MergeDevNamesHelper("/job:foo/replica:1/task:2/cpu:1",
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index 0243c029be8..47ff6041a06 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -546,7 +546,7 @@ void LogSparseFeatureDataLoss(StringPiece feature_name) {
 }
 
 Status FastParseSerializedExample(
-    const string& serialized_example, const string& example_name,
+    const tstring& serialized_example, const tstring& example_name,
     const size_t example_index, const Config& config,
     const PresizedCuckooMap<std::pair<size_t, Type>>& config_index,
     SeededHasher hasher, std::vector<Tensor>* output_dense,
@@ -926,7 +926,7 @@ inline void ReportUnexpectedDataType(DataType dtype) {
       << "in variable that should have been checked by CheckConfigDataType().";
 }
 
-Status CheckConfigDataTypes(Config config) {
+Status CheckConfigDataTypes(const Config& config) {
   // Check config so we can safely CHECK(false) in switches on config.*.dtype
   for (auto& c : config.sparse) {
     TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
diff --git a/tensorflow/core/util/sparse/BUILD b/tensorflow/core/util/sparse/BUILD
index 01433f0a25d..1b22b5082ba 100644
--- a/tensorflow/core/util/sparse/BUILD
+++ b/tensorflow/core/util/sparse/BUILD
@@ -14,6 +14,7 @@ filegroup(
         "dim_comparator.h",
         "group_iterator.cc",
         "group_iterator.h",
+        "sparse_tensor.cc",
         "sparse_tensor.h",
     ],
     visibility = ["//tensorflow/core:__pkg__"],
@@ -42,6 +43,7 @@ filegroup(
     name = "framework_internal_impl_group",
     srcs = [
         "group_iterator.cc",
+        "sparse_tensor.cc",
     ],
     visibility = ["//tensorflow/core:__pkg__"],
 )
diff --git a/tensorflow/core/util/sparse/sparse_tensor.cc b/tensorflow/core/util/sparse/sparse_tensor.cc
new file mode 100644
index 00000000000..1eb9cb9aac9
--- /dev/null
+++ b/tensorflow/core/util/sparse/sparse_tensor.cc
@@ -0,0 +1,184 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/sparse/sparse_tensor.h"
+
+#include "tensorflow/core/lib/strings/strcat.h"
+
+namespace tensorflow {
+namespace sparse {
+
+namespace {
+
+int UnsafeGetDimsFromIx(const Tensor& ix) {
+  DCHECK(TensorShapeUtils::IsMatrix(ix.shape()));
+  return ix.dim_size(1);
+}
+
+Status GetDimsFromIx(const Tensor& ix, int* result) {
+  if (!TensorShapeUtils::IsMatrix(ix.shape())) {
+    return errors::InvalidArgument("indices must be a matrix, but got: ",
+                                   ix.shape().DebugString());
+  }
+  *result = UnsafeGetDimsFromIx(ix);
+  return Status();
+}
+
+}  // namespace
+
+/* static */ Status SparseTensor::Create(Tensor ix, Tensor vals,
+                                         const VarDimArray shape,
+                                         const VarDimArray order,
+                                         SparseTensor* result) {
+  if (ix.dtype() != DT_INT64) {
+    return errors::InvalidArgument("indices must be type int64 but got: ",
+                                   ix.dtype());
+  }
+  if (!TensorShapeUtils::IsVector(vals.shape())) {
+    return errors::InvalidArgument("vals must be a vec, but got: ",
+                                   vals.shape().DebugString());
+  }
+  if (ix.shape().dim_size(0) != vals.shape().dim_size(0)) {
+    return errors::InvalidArgument(
+        "indices and values rows (indexing "
+        "dimension) must match. (indices = ",
+        ix.shape().dim_size(0), ", values = ", vals.shape().dim_size(0), ")");
+  }
+  int dims = 0;
+  TF_RETURN_IF_ERROR(GetDimsFromIx(ix, &dims));
+  if (order.size() != dims) {
+    return errors::InvalidArgument("Order length must be SparseTensor rank.");
+  }
+  if (shape.size() != dims) {
+    return errors::InvalidArgument("Shape rank must be SparseTensor rank.");
+  }
+
+  *result = SparseTensor(std::move(ix), std::move(vals), shape, order);
+  return Status::OK();
+}
+
+/* static */ Status SparseTensor::Create(Tensor ix, Tensor vals,
+                                         const TensorShape& shape,
+                                         SparseTensor* result) {
+  return Create(std::move(ix), std::move(vals), TensorShapeToVector(shape),
+                UndefinedOrder(TensorShapeToVector(shape)), result);
+}
+
+/* static */ Status SparseTensor::Create(Tensor ix, Tensor vals,
+                                         const VarDimArray shape,
+                                         SparseTensor* result) {
+  return Create(std::move(ix), std::move(vals), shape, UndefinedOrder(shape),
+                result);
+}
+
+/* static */ Status SparseTensor::Create(Tensor ix, Tensor vals,
+                                         const TensorShape& shape,
+                                         const VarDimArray order,
+                                         SparseTensor* result) {
+  return Create(std::move(ix), std::move(vals), TensorShapeToVector(shape),
+                order, result);
+}
+
+SparseTensor::SparseTensor(Tensor ix, Tensor vals, const VarDimArray shape,
+                           const VarDimArray order)
+    : ix_(std::move(ix)),
+      vals_(std::move(vals)),
+      shape_(shape.begin(), shape.end()),
+      order_(order.begin(), order.end()),
+      dims_(UnsafeGetDimsFromIx(ix_)) {
+  DCHECK_EQ(ix_.dtype(), DT_INT64)
+      << "indices must be type int64 but got: " << ix_.dtype();
+  DCHECK(TensorShapeUtils::IsVector(vals_.shape()))
+      << "vals must be a vec, but got: " << vals_.shape().DebugString();
+  DCHECK_EQ(ix_.shape().dim_size(0), vals_.shape().dim_size(0))
+      << "indices and values rows (indexing dimension) must match.";
+  DCHECK_EQ(order.size(), dims_) << "Order length must be SparseTensor rank.";
+  DCHECK_EQ(shape.size(), dims_) << "Shape rank must be SparseTensor rank.";
+}
+
+template <bool standard_order>
+Status SparseTensor::IndicesValidHelper() const {
+  const auto ix_t = ix_.matrix<int64>();
+  const int64* const shape_ptr = shape_.data();
+
+  for (std::size_t n = 0; n < num_entries(); ++n) {
+    bool valid = true;
+    bool different = false;
+    bool increasing = true;
+    if (n == 0) {
+      for (int di = 0; di < dims_; ++di) {
+        if (ix_t(n, di) < 0 || ix_t(n, di) >= shape_ptr[di]) valid = false;
+      }
+      different = true;
+    } else {
+      for (int di = 0; di < dims_; ++di) {
+        if (ix_t(n, di) < 0 || ix_t(n, di) >= shape_ptr[di]) valid = false;
+        int ordered_dim;
+        if (standard_order) {
+          ordered_dim = di;
+        } else {
+          ordered_dim = order_[di];
+        }
+        int64 diff = ix_t(n, ordered_dim) - ix_t(n - 1, ordered_dim);
+        if (diff > 0) different = true;
+        if (!different && diff < 0) increasing = false;
+      }
+    }
+    if (TF_PREDICT_FALSE(!valid || !increasing || !different)) {
+      string index = strings::StrCat("indices[", n, "] = [");
+      for (int di = 0; di < dims_; ++di) {
+        strings::StrAppend(&index, ix_t(n, di), di < dims_ - 1 ? "," : "]");
+      }
+      if (!valid) {
+        return errors::InvalidArgument(index,
+                                       " is out of bounds: need 0 <= index < [",
+                                       str_util::Join(shape_, ","), "]");
+      }
+      if (!increasing) {
+        return errors::InvalidArgument(
+            index,
+            " is out of order. Many sparse ops require sorted indices.\n"
+            "    Use `tf.sparse.reorder` to create a correctly ordered copy."
+            "\n\n");
+      }
+      if (!different) {
+        return errors::InvalidArgument(index, " is repeated");
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+Status SparseTensor::IndicesValid() const {
+  bool standard_order = true;
+  for (size_t i = 0; i < order_.size(); ++i) {
+    if (order_[i] < 0) {
+      return errors::FailedPrecondition(
+          "Order was not provided.  Provide an order at "
+          "construction time or run ReorderInPlace");
+    }
+    standard_order = standard_order && order_[i] == i;
+  }
+
+  if (standard_order) {
+    return IndicesValidHelper<true>();
+  } else {
+    return IndicesValidHelper<false>();
+  }
+}
+
+}  // namespace sparse
+}  // namespace tensorflow
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index 4e3b7251e67..1de1374161a 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -30,7 +30,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/sparse/dim_comparator.h"
@@ -45,56 +44,16 @@ class SparseTensor {
   typedef typename gtl::InlinedVector<int64, 8> ShapeArray;
 
   static Status Create(Tensor ix, Tensor vals, const VarDimArray shape,
-                       const VarDimArray order, SparseTensor* result) {
-    if (ix.dtype() != DT_INT64) {
-      return Status(
-          error::INVALID_ARGUMENT,
-          strings::StrCat("indices must be type int64 but got: ", ix.dtype()));
-    }
-    if (!TensorShapeUtils::IsVector(vals.shape())) {
-      return Status(error::INVALID_ARGUMENT,
-                    strings::StrCat("vals must be a vec, but got: ",
-                                    vals.shape().DebugString()));
-    }
-    if (ix.shape().dim_size(0) != vals.shape().dim_size(0)) {
-      return Status(error::INVALID_ARGUMENT,
-                    strings::StrCat("indices and values rows (indexing "
-                                    "dimension) must match. (indices = ",
-                                    ix.shape().dim_size(0), ", values = ",
-                                    vals.shape().dim_size(0), ")"));
-    }
-    int dims = 0;
-    TF_RETURN_IF_ERROR(GetDimsFromIx(ix, &dims));
-    if (order.size() != dims) {
-      return Status(error::INVALID_ARGUMENT,
-                    "Order length must be SparseTensor rank.");
-    }
-    if (shape.size() != dims) {
-      return Status(error::INVALID_ARGUMENT,
-                    "Shape rank must be SparseTensor rank.");
-    }
-
-    *result = SparseTensor(std::move(ix), std::move(vals), shape, order);
-    return Status();
-  }
+                       const VarDimArray order, SparseTensor* result);
 
   static Status Create(Tensor ix, Tensor vals, const TensorShape& shape,
-                       SparseTensor* result) {
-    return Create(std::move(ix), std::move(vals), TensorShapeToVector(shape),
-                  UndefinedOrder(TensorShapeToVector(shape)), result);
-  }
+                       SparseTensor* result);
 
   static Status Create(Tensor ix, Tensor vals, const VarDimArray shape,
-                       SparseTensor* result) {
-    return Create(std::move(ix), std::move(vals), shape, UndefinedOrder(shape),
-                  result);
-  }
+                       SparseTensor* result);
 
   static Status Create(Tensor ix, Tensor vals, const TensorShape& shape,
-                       const VarDimArray order, SparseTensor* result) {
-    return Create(std::move(ix), std::move(vals), TensorShapeToVector(shape),
-                  order, result);
-  }
+                       const VarDimArray order, SparseTensor* result);
 
   SparseTensor() : dims_(0) {}
 
@@ -116,21 +75,7 @@ class SparseTensor {
 
   ABSL_DEPRECATED("Use Create() functions instead of constructors directly.")
   SparseTensor(Tensor ix, Tensor vals, const VarDimArray shape,
-               const VarDimArray order)
-      : ix_(std::move(ix)),
-        vals_(std::move(vals)),
-        shape_(shape.begin(), shape.end()),
-        order_(order.begin(), order.end()),
-        dims_(UnsafeGetDimsFromIx(ix_)) {
-    DCHECK_EQ(ix_.dtype(), DT_INT64)
-        << "indices must be type int64 but got: " << ix_.dtype();
-    DCHECK(TensorShapeUtils::IsVector(vals_.shape()))
-        << "vals must be a vec, but got: " << vals_.shape().DebugString();
-    DCHECK_EQ(ix_.shape().dim_size(0), vals_.shape().dim_size(0))
-        << "indices and values rows (indexing dimension) must match.";
-    DCHECK_EQ(order.size(), dims_) << "Order length must be SparseTensor rank.";
-    DCHECK_EQ(shape.size(), dims_) << "Shape rank must be SparseTensor rank.";
-  }
+               const VarDimArray order);
 
   SparseTensor(const SparseTensor& other)
       : SparseTensor(other.ix_, other.vals_, other.shape_, other.order_) {}
@@ -167,22 +112,7 @@ class SparseTensor {
 
   DataType dtype() const { return vals_.dtype(); }
 
-  Status IndicesValid() const {
-    const auto ix_t = ix_.matrix<int64>();
-    for (int64 ord : order_) {
-      if (ord < 0) {
-        return errors::FailedPrecondition(
-            "Order was not provided.  Provide an order at "
-            "construction time or run ReorderInPlace");
-      }
-    }
-
-    for (std::size_t n = 0; n < num_entries(); ++n) {
-      TF_RETURN_IF_ERROR(IndexValid(ix_t, n));
-    }
-
-    return Status::OK();
-  }
+  Status IndicesValid() const;
 
   VarDimArray shape() const { return shape_; }
 
@@ -261,21 +191,6 @@ class SparseTensor {
   }
 
  private:
-  static Status GetDimsFromIx(const Tensor& ix, int* result) {
-    if (!TensorShapeUtils::IsMatrix(ix.shape())) {
-      return Status(error::INVALID_ARGUMENT,
-                    strings::StrCat("indices must be a matrix, but got: ",
-                                    ix.shape().DebugString()));
-    }
-    *result = UnsafeGetDimsFromIx(ix);
-    return Status();
-  }
-
-  static int UnsafeGetDimsFromIx(const Tensor& ix) {
-    DCHECK(TensorShapeUtils::IsMatrix(ix.shape()));
-    return ix.dim_size(1);
-  }
-
   static inline ShapeArray UndefinedOrder(const VarDimArray shape) {
     return ShapeArray(shape.size(), -1);
   }
@@ -286,48 +201,8 @@ class SparseTensor {
     return vec;
   }
 
-  // Helper for IndicesValid()
-  inline Status IndexValid(const TTypes<int64>::ConstMatrix& ix_t,
-                           int n) const {
-    bool valid = true;
-    bool different = false;
-    bool increasing = true;
-    if (n == 0) {
-      for (int di = 0; di < dims_; ++di) {
-        if (ix_t(n, di) < 0 || ix_t(n, di) >= shape_[di]) valid = false;
-      }
-      different = true;
-    } else {
-      for (int di = 0; di < dims_; ++di) {
-        if (ix_t(n, di) < 0 || ix_t(n, di) >= shape_[di]) valid = false;
-        int64 diff = ix_t(n, order_[di]) - ix_t(n - 1, order_[di]);
-        if (diff > 0) different = true;
-        if (!different && diff < 0) increasing = false;
-      }
-    }
-    if (TF_PREDICT_FALSE(!valid || !increasing || !different)) {
-      string index = strings::StrCat("indices[", n, "] = [");
-      for (int di = 0; di < dims_; ++di) {
-        strings::StrAppend(&index, ix_t(n, di), di < dims_ - 1 ? "," : "]");
-      }
-      if (!valid) {
-        return errors::InvalidArgument(index,
-                                       " is out of bounds: need 0 <= index < [",
-                                       str_util::Join(shape_, ","), "]");
-      }
-      if (!increasing) {
-        return errors::InvalidArgument(
-            index,
-            " is out of order. Many sparse ops require sorted indices.\n"
-            "    Use `tf.sparse.reorder` to create a correctly ordered copy."
-            "\n\n");
-      }
-      if (!different) {
-        return errors::InvalidArgument(index, " is repeated");
-      }
-    }
-    return Status::OK();
-  }
+  template <bool standard_order>
+  Status IndicesValidHelper() const;
 
   // Helper for ToDense<T>()
   template <typename T>
@@ -601,14 +476,12 @@ inline Status SparseTensor::Split(const SparseTensor& input_tensor,
   const int split_size = split_dim_size / num_split;
 
   if (!(num_split > 0 && num_split <= split_dim_size)) {
-    return Status(error::INVALID_ARGUMENT,
-                  strings::StrCat("num_split must be in the interval (0, ",
-                                  split_dim_size, "]"));
+    return errors::InvalidArgument("num_split must be in the interval (0, ",
+                                   split_dim_size, "]");
   }
   if (!(split_dim >= 0 && split_dim < num_dim)) {
-    return Status(
-        error::INVALID_ARGUMENT,
-        strings::StrCat("num_dim must be in the interval [0, ", num_dim, ")"));
+    return errors::InvalidArgument("num_dim must be in the interval [0, ",
+                                   num_dim, ")");
   }
 
   const int residual = split_dim_size % num_split;
diff --git a/tensorflow/core/util/stats_calculator.h b/tensorflow/core/util/stats_calculator.h
index 5005ee08a4b..20cbe572325 100644
--- a/tensorflow/core/util/stats_calculator.h
+++ b/tensorflow/core/util/stats_calculator.h
@@ -71,8 +71,21 @@ class Stat {
                    : static_cast<HighPrecisionValueType>(sum_) / count_;
   }
 
+  // Returns sample variance.
+  ValueType sample_variance() const {
+    return all_same()
+               ? 0
+               : (squared_sum_ - std::pow(sum_, 2.0) / count_) / (count_ - 1);
+  }
+
+  // Returns population variance.
+  ValueType variance() const {
+    return all_same() ? 0 : (squared_sum_ / count_) - (avg() * avg());
+  }
+
+  // Returns population stddev.
   ValueType std_deviation() const {
-    return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
+    return all_same() ? 0 : std::sqrt(variance());
   }
 
   void OutputToStream(std::ostream* stream) const {
diff --git a/tensorflow/core/util/stats_calculator_test.cc b/tensorflow/core/util/stats_calculator_test.cc
index 00d7bfc2f95..d7efae37ce8 100644
--- a/tensorflow/core/util/stats_calculator_test.cc
+++ b/tensorflow/core/util/stats_calculator_test.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/util/stats_calculator.h"
+
+#include <cfloat>
+
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -72,5 +75,34 @@ TEST(StatsCalculatorTest, AddNodeStatsUpdate) {
   EXPECT_EQ(run1_mem_used + run2_mem_used, detail.mem_used.sum());
 }
 
+TEST(StatsCalculatorTest, UpdateStat) {
+  Stat<double> stat;
+  EXPECT_TRUE(stat.empty());
+  EXPECT_TRUE(stat.all_same());
+  stat.UpdateStat(1);
+  EXPECT_TRUE(stat.all_same());
+  stat.UpdateStat(-1.0);
+  EXPECT_FALSE(stat.all_same());
+  stat.UpdateStat(100);
+  stat.UpdateStat(0);
+  EXPECT_EQ(4, stat.count());
+  EXPECT_EQ(-1, stat.min());
+  EXPECT_EQ(100, stat.max());
+  EXPECT_EQ(25, stat.avg());
+  EXPECT_EQ(1, stat.first());
+  EXPECT_EQ(0, stat.newest());
+  EXPECT_EQ(10002, stat.squared_sum());
+  EXPECT_EQ(625, stat.avg() * stat.avg());
+  // Sample variance
+  EXPECT_EQ(7502.0 / 3, stat.sample_variance());
+  // Sample standard deviation, from WolframAlpha
+  EXPECT_NEAR(50.00666622228147160678152, std::sqrt(stat.sample_variance()),
+              FLT_EPSILON);
+  // Population variance
+  EXPECT_NEAR(7502.0 / 4, stat.variance(), FLT_EPSILON);
+  // Population standard deviation, from WolframAlpha
+  EXPECT_NEAR(43.30704330706496060826769, stat.std_deviation(), FLT_EPSILON);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py b/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py
index 444af7c9d8b..a90d90d4373 100644
--- a/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py
+++ b/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 import tempfile
+
 from absl import app
 from absl import flags
 
diff --git a/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py b/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py
index 9fccc9ce472..47a10fbb608 100644
--- a/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py
+++ b/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import tempfile
+
 from absl import app
 from absl import flags
 
diff --git a/tensorflow/examples/saved_model/integration_tests/use_rnn_cell.py b/tensorflow/examples/saved_model/integration_tests/use_rnn_cell.py
index 8a0173c8aa7..2caca306cac 100644
--- a/tensorflow/examples/saved_model/integration_tests/use_rnn_cell.py
+++ b/tensorflow/examples/saved_model/integration_tests/use_rnn_cell.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import tempfile
+
 from absl import app
 from absl import flags
 import numpy as np
diff --git a/tensorflow/examples/saved_model/integration_tests/use_text_embedding_in_dataset.py b/tensorflow/examples/saved_model/integration_tests/use_text_embedding_in_dataset.py
index b22102b90d3..be147a86d4c 100644
--- a/tensorflow/examples/saved_model/integration_tests/use_text_embedding_in_dataset.py
+++ b/tensorflow/examples/saved_model/integration_tests/use_text_embedding_in_dataset.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import tempfile
+
 from absl import app
 from absl import flags
 
diff --git a/tensorflow/examples/saved_model/integration_tests/use_text_rnn_model.py b/tensorflow/examples/saved_model/integration_tests/use_text_rnn_model.py
index ad7dea6ed6e..a3c0f230976 100644
--- a/tensorflow/examples/saved_model/integration_tests/use_text_rnn_model.py
+++ b/tensorflow/examples/saved_model/integration_tests/use_text_rnn_model.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import tempfile
+
 from absl import app
 from absl import flags
 import tensorflow.compat.v2 as tf
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index cecf26065e5..38759ee4d4c 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -3646,6 +3646,54 @@ func BoostedTreesSparseCalculateBestFeatureSplit(scope *Scope, node_id_range tf.
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
 }
 
+// Calculates gains for each feature and returns the best possible split information for each node. However, if no split is found, then no split information is returned for that node.
+//
+// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+//
+// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+//
+// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+//
+// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature.
+//
+// Arguments:
+//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
+//	stats_summaries_list: A list of Rank 4 tensor (#shape=[max_splits, feature_dims, bucket, stats_dims]) for accumulated stats summary (gradient/hessian) per node, per dimension, per buckets for each feature.
+// The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
+//	split_types: A Rank 1 tensor indicating if this Op should perform inequality split or equality split per feature.
+//	candidate_feature_ids: Rank 1 tensor with ids for each feature. This is the real id of the feature.
+//	l1: l1 regularization factor on leaf weights, per instance based.
+//	l2: l2 regularization factor on leaf weights, per instance based.
+//	tree_complexity: adjustment to the gain, per leaf based.
+//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
+//	logits_dimension: The dimension of logit, i.e., number of classes.
+//
+// Returns:
+//	node_ids: A Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.
+//	gains: A Rank 1 tensor indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.
+//	feature_ids: A Rank 1 tensors indicating the best feature id for each node. See above for details like shapes and sizes.
+//	feature_dimensions: A Rank 1 tensors indicating the best feature dimension for each feature to split for certain nodes if the feature is multi-dimension. See above for details like shapes and sizes.
+//	thresholds: A Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.
+//	left_node_contribs: A Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.
+//	right_node_contribs: A Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
+//	split_with_default_directions: A Rank 1 tensors indicating the which direction to go if data is missing. See above for details like shapes and sizes.
+// Inequality with default left returns 0, inequality with default right returns 1, equality with default right returns 2.
+func BoostedTreesCalculateBestFeatureSplitV2(scope *Scope, node_id_range tf.Output, stats_summaries_list []tf.Output, split_types tf.Output, candidate_feature_ids tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64) (node_ids tf.Output, gains tf.Output, feature_ids tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCalculateBestFeatureSplitV2",
+		Input: []tf.Input{
+			node_id_range, tf.OutputList(stats_summaries_list), split_types, candidate_feature_ids, l1, l2, tree_complexity, min_node_weight,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7)
+}
+
 // Calculates gains for each feature and returns the best possible split information for the feature.
 //
 // The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
@@ -6223,77 +6271,6 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp
 	return key, values
 }
 
-// OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage.
-type OrderedMapUnstageAttr func(optionalAttr)
-
-// OrderedMapUnstageCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapUnstageCapacity(value int64) OrderedMapUnstageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// OrderedMapUnstageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapUnstageMemoryLimit(value int64) OrderedMapUnstageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapUnstageContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageContainer(value string) OrderedMapUnstageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// OrderedMapUnstageSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageSharedName(value string) OrderedMapUnstageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes and returns the values associated with the key
-//
-// from the underlying container.   If the underlying container
-// does not contain this key, the op will block until it does.
-func OrderedMapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageAttr) (values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OrderedMapUnstage",
-		Input: []tf.Input{
-			key, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("OrderedMapUnstage", err)
-		return
-	}
-	return values
-}
-
 // OrderedMapPeekAttr is an optional argument to OrderedMapPeek.
 type OrderedMapPeekAttr func(optionalAttr)
 
@@ -13545,58 +13522,6 @@ func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...Fix
 	return op.Output(0)
 }
 
-// RestoreSliceAttr is an optional argument to RestoreSlice.
-type RestoreSliceAttr func(optionalAttr)
-
-// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`. See the documentation for `Restore`.
-// If not specified, defaults to -1
-func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
-	return func(m optionalAttr) {
-		m["preferred_shard"] = value
-	}
-}
-
-// Restores a tensor from checkpoint files.
-//
-// This is like `Restore` except that restored tensor can be listed as filling
-// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
-// larger tensor and the slice that the restored tensor covers.
-//
-// The `shape_and_slice` input has the same format as the
-// elements of the `shapes_and_slices` input of the `SaveSlices` op.
-//
-// Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	shape_and_slice: Scalar. The shapes and slice specifications to use when
-// restoring a tensors.
-//	dt: The type of the tensor to be restored.
-//
-// Returns The restored tensor.
-func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RestoreSlice",
-		Input: []tf.Input{
-			file_pattern, tensor_name, shape_and_slice,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Saves the input tensors to disk.
 //
 // The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
@@ -16590,6 +16515,174 @@ func Roll(scope *Scope, input tf.Output, shift tf.Output, axis tf.Output) (outpu
 	return op.Output(0)
 }
 
+// RestoreSliceAttr is an optional argument to RestoreSlice.
+type RestoreSliceAttr func(optionalAttr)
+
+// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`. See the documentation for `Restore`.
+// If not specified, defaults to -1
+func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// This is like `Restore` except that restored tensor can be listed as filling
+// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
+// larger tensor and the slice that the restored tensor covers.
+//
+// The `shape_and_slice` input has the same format as the
+// elements of the `shapes_and_slices` input of the `SaveSlices` op.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	shape_and_slice: Scalar. The shapes and slice specifications to use when
+// restoring a tensors.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RestoreSlice",
+		Input: []tf.Input{
+			file_pattern, tensor_name, shape_and_slice,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage.
+type OrderedMapUnstageAttr func(optionalAttr)
+
+// OrderedMapUnstageCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapUnstageCapacity(value int64) OrderedMapUnstageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapUnstageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapUnstageMemoryLimit(value int64) OrderedMapUnstageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapUnstageContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageContainer(value string) OrderedMapUnstageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// OrderedMapUnstageSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageSharedName(value string) OrderedMapUnstageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes and returns the values associated with the key
+//
+// from the underlying container.   If the underlying container
+// does not contain this key, the op will block until it does.
+func OrderedMapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageAttr) (values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OrderedMapUnstage",
+		Input: []tf.Input{
+			key, indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("OrderedMapUnstage", err)
+		return
+	}
+	return values
+}
+
+// SobolSampleAttr is an optional argument to SobolSample.
+type SobolSampleAttr func(optionalAttr)
+
+// SobolSampleDtype sets the optional dtype attribute to value.
+//
+// value: The type of the sample. One of: `float32` or `float64`.
+// If not specified, defaults to DT_DOUBLE
+func SobolSampleDtype(value tf.DataType) SobolSampleAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Generates points from the Sobol sequence.
+//
+// Creates a Sobol sequence with `num_results` samples. Each sample has dimension
+// `dim`. Skips the first `skip` samples.
+//
+// Arguments:
+//	dim: Positive scalar `Tensor` representing each sample's dimension.
+//	num_results: Positive scalar `Tensor` of dtype int32. The number of Sobol points to return
+// in the output.
+//	skip: Positive scalar `Tensor` of dtype int32. The number of initial points of the
+// Sobol sequence to skip.
+//
+// Returns `Tensor` of samples from Sobol sequence with `shape` [num_results, dim].
+func SobolSample(scope *Scope, dim tf.Output, num_results tf.Output, skip tf.Output, optional ...SobolSampleAttr) (samples tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SobolSample",
+		Input: []tf.Input{
+			dim, num_results, skip,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // QuantizedReluAttr is an optional argument to QuantizedRelu.
 type QuantizedReluAttr func(optionalAttr)
 
@@ -16898,7 +16991,7 @@ func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (outp
 //      Considering the batch matrix multiplication equation again
 //      (`bij,bjk->bik`), the contracted axis label is `j`.
 //
-//  (e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis
+//  (e) Expand Diagonal: If the output subscripts contain repeated (explicit) axis
 //      labels, the opposite operation of (a) is applied. For example, in the
 //      equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]`
 //      are all zeros, except for the (generalized) diagonal which is populated
@@ -16906,7 +16999,7 @@ func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (outp
 //      Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is
 //      provided to enable computing the symbolic gradient of `tf.einsum`.
 //
-// The output subcripts must contain only labels appearing in at least one of the
+// The output subscripts must contain only labels appearing in at least one of the
 // input subscripts. Furthermore, all dimensions mapping to the same axis label
 // must be equal.
 //
@@ -16918,7 +17011,7 @@ func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (outp
 //
 // The broadcasted dimensions are placed in the corresponding location of the
 // ellipsis in the output subscript. If the broadcasted dimensions are non-empty
-// and the output subcripts do not contain ellipsis, then an InvalidArgument error
+// and the output subscripts do not contain ellipsis, then an InvalidArgument error
 // is raised.
 //
 // @compatibility(numpy)
@@ -18407,6 +18500,22 @@ func SnapshotDatasetSeed2(value int64) SnapshotDatasetAttr {
 	}
 }
 
+// SnapshotDatasetMode sets the optional mode attribute to value.
+// If not specified, defaults to "auto"
+func SnapshotDatasetMode(value string) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["mode"] = value
+	}
+}
+
+// SnapshotDatasetSnapshotName sets the optional snapshot_name attribute to value.
+// If not specified, defaults to ""
+func SnapshotDatasetSnapshotName(value string) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["snapshot_name"] = value
+	}
+}
+
 // Creates a dataset that will write to / read from a snapshot.
 //
 // This dataset attempts to determine whether a valid snapshot exists at the
@@ -45823,6 +45932,17 @@ func UnsortedSegmentJoin(scope *Scope, inputs tf.Output, segment_ids tf.Output,
 	return op.Output(0)
 }
 
+// SerializeIteratorAttr is an optional argument to SerializeIterator.
+type SerializeIteratorAttr func(optionalAttr)
+
+// SerializeIteratorExternalStatePolicy sets the optional external_state_policy attribute to value.
+// If not specified, defaults to 0
+func SerializeIteratorExternalStatePolicy(value int64) SerializeIteratorAttr {
+	return func(m optionalAttr) {
+		m["external_state_policy"] = value
+	}
+}
+
 // Converts the given `resource_handle` representing an iterator to a variant tensor.
 //
 // Arguments:
@@ -45830,15 +45950,20 @@ func UnsortedSegmentJoin(scope *Scope, inputs tf.Output, segment_ids tf.Output,
 //
 // Returns A variant tensor storing the state of the iterator contained in the
 // resource.
-func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) {
+func SerializeIterator(scope *Scope, resource_handle tf.Output, optional ...SerializeIteratorAttr) (serialized tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
 		Type: "SerializeIterator",
 		Input: []tf.Input{
 			resource_handle,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 119903d4150..eef5eeb0d6f 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -37,6 +37,12 @@ config_setting(
     },
 )
 
+config_setting(
+    name = "tflite_experimental_runtime",
+    values = {"define": "tflite_experimental_runtime=true"},
+    visibility = ["//visibility:public"],
+)
+
 TFLITE_DEFAULT_COPTS = if_not_windows([
     "-Wall",
     "-Wno-comment",
diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl
index 5234fb32b76..b736af57780 100644
--- a/tensorflow/lite/build_def.bzl
+++ b/tensorflow/lite/build_def.bzl
@@ -659,7 +659,7 @@ def flex_dep(target_op_sets):
     else:
         return []
 
-def gen_model_coverage_test(src, model_name, data, failure_type, tags):
+def gen_model_coverage_test(src, model_name, data, failure_type, tags, size = "medium"):
     """Generates Python test targets for testing TFLite models.
 
     Args:
@@ -682,7 +682,7 @@ def gen_model_coverage_test(src, model_name, data, failure_type, tags):
             name = "model_coverage_test_%s_%s" % (model_name, target_op_sets.lower().replace(",", "_")),
             srcs = [src],
             main = src,
-            size = "large",
+            size = size,
             args = [
                 "--model_name=%s" % model_name,
                 "--target_ops=%s" % target_op_sets,
@@ -691,6 +691,7 @@ def gen_model_coverage_test(src, model_name, data, failure_type, tags):
             srcs_version = "PY2AND3",
             python_version = "PY3",
             tags = [
+                "no_gpu",  # Executing with TF GPU configurations is redundant.
                 "no_oss",
                 "no_windows",
             ] + tags,
@@ -700,3 +701,18 @@ def gen_model_coverage_test(src, model_name, data, failure_type, tags):
                 "//tensorflow/python:client_testlib",
             ] + flex_dep(target_op_sets),
         )
+
+def if_tflite_experimental_runtime(if_true, if_false = []):
+    return select({
+        "//tensorflow/lite:tflite_experimental_runtime": if_true,
+        "//conditions:default": if_false,
+    })
+
+def tflite_experimental_runtime_linkopts():
+    return if_tflite_experimental_runtime(
+        if_true = [
+            # "//tensorflow/lite/experimental/tf_runtime:interpreter",
+            # "//tensorflow/lite/experimental/tf_runtime:model",
+        ],
+        if_false = [],
+    )
diff --git a/tensorflow/lite/builtin_ops.h b/tensorflow/lite/builtin_ops.h
index 7cb04c84b2e..ad5f6112baa 100644
--- a/tensorflow/lite/builtin_ops.h
+++ b/tensorflow/lite/builtin_ops.h
@@ -150,6 +150,7 @@ typedef enum {
   kTfLiteBuiltinNonMaxSuppressionV5 = 121,
   kTfLiteBuiltinScatterNd = 122,
   kTfLiteBuiltinSelectV2 = 123,
+  kTfLiteBuiltinDensify = 124,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus
diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD
index 0fe9d974d6e..83886530077 100644
--- a/tensorflow/lite/c/BUILD
+++ b/tensorflow/lite/c/BUILD
@@ -50,26 +50,20 @@ tflite_cc_shared_object(
 
 cc_library(
     name = "c_api_internal",
-    srcs = [
-        "c_api.h",
-        "common.h",
-    ],
     hdrs = ["c_api_internal.h"],
     copts = tflite_copts(),
     visibility = ["//visibility:private"],
     deps = [
         ":common",
         "//tensorflow/lite:framework",
+        "//tensorflow/lite/core/api",
     ],
 )
 
 cc_library(
     name = "c_api",
     srcs = ["c_api.cc"],
-    hdrs = [
-        "c_api.h",
-        "common.h",
-    ],
+    hdrs = ["c_api.h"],
     copts = tflite_copts(),
     visibility = [
         ":experimental",
@@ -79,6 +73,7 @@ cc_library(
         ":common",
         "//tensorflow/lite:framework",
         "//tensorflow/lite:version",
+        "//tensorflow/lite/core/api",
         "//tensorflow/lite/kernels:builtin_ops",
     ],
     alwayslink = 1,
@@ -92,6 +87,8 @@ cc_library(
     deps = [
         ":c_api",
         ":c_api_internal",
+        ":common",
+        "//tensorflow/lite:framework",
         "//tensorflow/lite:kernel_api",
     ],
     alwayslink = 1,
@@ -107,7 +104,7 @@ cc_test(
     ],
     deps = [
         ":c_api",
-        "//tensorflow/lite/c:c_api_internal",
+        ":common",
         "//tensorflow/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
@@ -121,6 +118,7 @@ cc_test(
     deps = [
         ":c_api",
         ":c_api_experimental",
+        ":common",
         "//tensorflow/lite:kernel_api",
         "//tensorflow/lite/testing:util",
         "@com_google_googletest//:gtest",
@@ -136,6 +134,7 @@ cc_library(
     ],
     build_for_embedded = True,
     visibility = [
+        "//speech/micro/nn:__pkg__",
         "//tensorflow/lite:__subpackages__",
     ],
     alwayslink = 1,
diff --git a/tensorflow/lite/c/builtin_op_data_test.cc b/tensorflow/lite/c/builtin_op_data_test.cc
index af4f47433ed..8d015284594 100644
--- a/tensorflow/lite/c/builtin_op_data_test.cc
+++ b/tensorflow/lite/c/builtin_op_data_test.cc
@@ -75,6 +75,7 @@ TEST(IntArray, CanCompileStructs) {
   TfLiteRankParams rank_params;
   TfLiteFakeQuantParams fake_quant_params;
   TfLitePackParams pack_params;
+  TfLiteUnpackParams unpack_params;
   TfLiteOneHotParams one_hot_params;
   TfLiteBidirectionalSequenceRNNParams bidi_sequence_rnn_params;
   TfLiteBidirectionalSequenceLSTMParams bidi_sequence_lstm_params;
diff --git a/tensorflow/lite/c/c_api_experimental.cc b/tensorflow/lite/c/c_api_experimental.cc
index 4b812172937..dbf4cd7a175 100644
--- a/tensorflow/lite/c/c_api_experimental.cc
+++ b/tensorflow/lite/c/c_api_experimental.cc
@@ -15,7 +15,15 @@ limitations under the License.
 
 #include "tensorflow/lite/c/c_api_experimental.h"
 
+#include <stdint.h>
+
+#include <memory>
+
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/c/c_api.h"
 #include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/mutable_op_resolver.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/tensorflow/lite/c/c_api_experimental.h b/tensorflow/lite/c/c_api_experimental.h
index a8f1a4294f5..bf21e2ee4b5 100644
--- a/tensorflow/lite/c/c_api_experimental.h
+++ b/tensorflow/lite/c/c_api_experimental.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/c/c_api.h"
+#include "tensorflow/lite/c/common.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -50,4 +51,4 @@ TFL_CAPI_EXPORT void TfLiteInterpreterOptionsAddCustomOp(
 }  // extern "C"
 #endif  // __cplusplus
 
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_C_C_API_EXPERIMENTAL_H_
+#endif  // TENSORFLOW_LITE_C_C_API_EXPERIMENTAL_H_
diff --git a/tensorflow/lite/c/c_api_experimental_test.cc b/tensorflow/lite/c/c_api_experimental_test.cc
index ce72954774c..6de8236d5e7 100644
--- a/tensorflow/lite/c/c_api_experimental_test.cc
+++ b/tensorflow/lite/c/c_api_experimental_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <gtest/gtest.h>
 #include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/c/c_api.h"
+#include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/testing/util.h"
 
 namespace {
diff --git a/tensorflow/lite/c/c_api_internal.h b/tensorflow/lite/c/c_api_internal.h
index 474482d159a..973d822fce4 100644
--- a/tensorflow/lite/c/c_api_internal.h
+++ b/tensorflow/lite/c/c_api_internal.h
@@ -15,10 +15,15 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_C_C_API_INTERNAL_H_
 #define TENSORFLOW_LITE_C_C_API_INTERNAL_H_
 
-#include "tensorflow/lite/c/common.h"
+#include <stdarg.h>
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/model.h"
-#include "tensorflow/lite/op_resolver.h"
+#include "tensorflow/lite/mutable_op_resolver.h"
 
 // Internal structures used by the C API. These are likely to change and should
 // not be depended on directly by any C API clients.
@@ -59,4 +64,4 @@ struct TfLiteInterpreter {
   std::unique_ptr<tflite::Interpreter> impl;
 };
 
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_C_C_API_INTERNAL_H_
+#endif  // TENSORFLOW_LITE_C_C_API_INTERNAL_H_
diff --git a/tensorflow/lite/c/c_api_test.cc b/tensorflow/lite/c/c_api_test.cc
index eb2a70f9f0b..03d22a81376 100644
--- a/tensorflow/lite/c/c_api_test.cc
+++ b/tensorflow/lite/c/c_api_test.cc
@@ -15,11 +15,15 @@ limitations under the License.
 
 #include "tensorflow/lite/c/c_api.h"
 
+#include <stdarg.h>
+#include <stdint.h>
+
 #include <array>
 #include <fstream>
 #include <vector>
 
 #include <gtest/gtest.h>
+#include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/testing/util.h"
 
 namespace {
diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc
index f7b8197b068..3d32daf275a 100644
--- a/tensorflow/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -820,6 +820,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_NON_MAX_SUPPRESSION_V4:
     case BuiltinOperator_NON_MAX_SUPPRESSION_V5:
     case BuiltinOperator_SCATTER_ND:
+    case BuiltinOperator_DENSIFY:
       break;
   }
   return kTfLiteOk;
diff --git a/tensorflow/lite/core/api/profiler.h b/tensorflow/lite/core/api/profiler.h
index 7bc296510d4..dcbdf9455d0 100644
--- a/tensorflow/lite/core/api/profiler.h
+++ b/tensorflow/lite/core/api/profiler.h
@@ -25,9 +25,15 @@ class Profiler {
   enum class EventType {
     // Default event type, the metadata field has no special significance.
     DEFAULT = 0,
+
     // The event is an operator invocation and the event_metadata field is the
     // index of operator node.
-    OPERATOR_INVOKE_EVENT = 1
+    OPERATOR_INVOKE_EVENT = 1,
+
+    // The event is an invocation for an internal operator of a TFLite delegate.
+    // The event_metadata field is the index of operator node that's specific to
+    // the delegate.
+    DELEGATE_OPERATOR_INVOKE_EVENT = 2
   };
 
   virtual ~Profiler() {}
@@ -81,6 +87,15 @@ class ScopedOperatorProfile : public ScopedProfile {
                       static_cast<uint32_t>(node_index)) {}
 };
 
+class ScopedDelegateOperatorProfile : public ScopedProfile {
+ public:
+  ScopedDelegateOperatorProfile(Profiler* profiler, const char* tag,
+                                int node_index)
+      : ScopedProfile(profiler, tag,
+                      Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT,
+                      static_cast<uint32_t>(node_index)) {}
+};
+
 }  // namespace tflite
 
 #define TFLITE_VARNAME_UNIQ(name, ctr) name##ctr
@@ -93,8 +108,8 @@ class ScopedOperatorProfile : public ScopedProfile {
   tflite::ScopedOperatorProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
       (profiler), (tag), (node_index))
 
-#define TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(profiler, node_index)   \
-  TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE((profiler), "DelegateOpInvoke", \
-                                        (node_index))
+#define TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(profiler, tag, node_index) \
+  tflite::ScopedDelegateOperatorProfile TFLITE_VARNAME_UNIQ(               \
+      _profile_, __COUNTER__)((profiler), (tag), (node_index))
 
 #endif  // TENSORFLOW_LITE_CORE_API_PROFILER_H_
diff --git a/tensorflow/lite/delegates/flex/kernel.cc b/tensorflow/lite/delegates/flex/kernel.cc
index f733364539c..09a1a738f00 100644
--- a/tensorflow/lite/delegates/flex/kernel.cc
+++ b/tensorflow/lite/delegates/flex/kernel.cc
@@ -529,7 +529,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Execute the TensorFlow Ops sequentially.
   for (auto& node_data : op_data->nodes) {
     TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(
-        reinterpret_cast<Profiler*>(context->profiler), node_data->index());
+        reinterpret_cast<Profiler*>(context->profiler),
+        node_data->name().c_str(), node_data->index());
 
     auto status = ExecuteFlexOp(context, buffer_map, node_data.get());
     TF_LITE_ENSURE_OK(context, ConvertStatus(context, status));
diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD
index 8da62f0d09f..9b787e7d196 100644
--- a/tensorflow/lite/delegates/gpu/BUILD
+++ b/tensorflow/lite/delegates/gpu/BUILD
@@ -1,5 +1,6 @@
 load("//tensorflow/lite:special_rules.bzl", "tflite_extra_gles_deps")
 load("@build_bazel_rules_apple//apple:ios.bzl", "ios_static_framework")
+load("@build_bazel_rules_apple//apple:macos.bzl", "macos_dylib")
 
 package(
     default_visibility = ["//visibility:public"],
@@ -101,6 +102,7 @@ objc_library(
         "//tensorflow/lite/delegates/gpu/metal:inference_context",
         "@com_google_absl//absl/types:span",
     ],
+    alwayslink = 1,
 )
 
 objc_library(
@@ -110,6 +112,7 @@ objc_library(
     deps = [
         "//tensorflow/lite/delegates/gpu:metal_delegate",
     ],
+    alwayslink = 1,
 )
 
 # build -c opt --config android_arm64 --copt -Os --copt -DTFLITE_GPU_BINARY_RELEASE --copt --linkopt -s --strip always :libtensorflowlite_gpu_gl.so
@@ -173,6 +176,22 @@ ios_static_framework(
     deps = [":metal_delegate"],
 )
 
+# Note: Support for MacOS is best-effort at the moment.
+# bazel build -c opt --copt -Os --copt -DTFLITE_GPU_BINARY_RELEASE --copt -fvisibility=hidden --linkopt -s --strip always --cxxopt=-std=c++14 :tensorflow_lite_gpu_dylib --apple_platform_type=macos
+macos_dylib(
+    name = "tensorflow_lite_gpu_dylib",
+    minimum_os_version = "10.13",
+    tags = [
+        "manual",
+        "nobuilder",
+        "notap",
+    ],
+    deps = [
+        ":metal_delegate",
+        ":metal_delegate_internal",
+    ],
+)
+
 cc_library(
     name = "api",
     srcs = ["api.cc"],
diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD
index 1749e3b4ba0..b58d40f960d 100644
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@@ -89,6 +89,7 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/common:status",
         "//tensorflow/lite/delegates/gpu/common:types",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
     ],
 )
 
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
index 5371baae6d6..a18f627e240 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
 
+#include <map>
+#include <string>
 #include <vector>
 
 #include "absl/strings/str_cat.h"
@@ -208,7 +210,8 @@ ProfilingInfo ProfilingCommandQueue::GetProfilingInfo() const {
   result.dispatches.resize(events_.size());
   for (int i = 0; i < events_.size(); ++i) {
     result.dispatches[i].label = events_[i].GetName();
-    result.dispatches[i].time_ns = events_[i].GetEventTimeNs();
+    result.dispatches[i].duration =
+        absl::Nanoseconds(events_[i].GetEventTimeNs());
   }
   return result;
 }
@@ -321,6 +324,42 @@ Status CreateProfilingCommandQueue(const CLDevice& device,
   return OkStatus();
 }
 
+absl::Duration ProfilingInfo::GetTotalTime() const {
+  absl::Duration total_time;
+  for (auto dispatch : dispatches) {
+    total_time += dispatch.duration;
+  }
+  return total_time;
+}
+
+std::string ProfilingInfo::GetDetailedReport() const {
+  std::string result;
+  std::map<std::string, double> timing;
+  result +=
+      "Per kernel timing(" + std::to_string(dispatches.size()) + " kernels):\n";
+  for (auto dispatch : dispatches) {
+    result += "  " + dispatch.label + " - " +
+              std::to_string(absl::ToDoubleMilliseconds(dispatch.duration)) +
+              "ms\n";
+    auto name = dispatch.label.substr(0, dispatch.label.find(" "));
+    if (timing.find(name) != timing.end()) {
+      timing[name] += absl::ToDoubleMilliseconds(dispatch.duration);
+    } else {
+      timing[name] = absl::ToDoubleMilliseconds(dispatch.duration);
+    }
+  }
+  result += "--------------------\n";
+  result += "Accumulated time per operation type:\n";
+  for (auto& t : timing) {
+    result += "  " + t.first + " - " + std::to_string(t.second) + "ms\n";
+  }
+  result += "--------------------\n";
+  result += "Ideal total time: " +
+            std::to_string(absl::ToDoubleMilliseconds(GetTotalTime())) + "\n";
+  result += "--------------------\n";
+  return result;
+}
+
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h
index caea7c41628..915dbaf4dfb 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h
+++ b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/time/time.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
@@ -35,11 +36,26 @@ namespace cl {
 struct ProfilingInfo {
   struct DispatchInfo {
     std::string label;
-    uint64_t time_ns;
-    double GetTimeMs() const { return static_cast<double>(time_ns) * 1e-6; }
+    absl::Duration duration;
   };
 
   std::vector<DispatchInfo> dispatches;
+
+  absl::Duration GetTotalTime() const;
+
+  // Returns report (string of lines delimited by \n)
+  // This method uses GPU counters and measure GPU time only.
+  // Report has next structure:
+  // Per kernel timing(K kernels):
+  //   conv2d 3.2ms
+  //   ...
+  // --------------------
+  // Accumulated time per operation type:
+  //   conv2d - 14.5ms
+  //   ....
+  // --------------------
+  // Ideal total time: 23.4ms // Total time for all kernels
+  std::string GetDetailedReport() const;
 };
 
 // A wrapper around opencl command queue
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc
index 6c29d7b358c..108d4ab8038 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc
@@ -135,7 +135,7 @@ Vendor ParseVendor(const std::string& device_name,
 
 // check that gpu_version belong to range min_version-max_version
 // min_version is included and max_version is excluded.
-bool isGPUVersionInRange(int gpu_version, int min_version, int max_version) {
+bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) {
   return gpu_version >= min_version && gpu_version < max_version;
 }
 }  // namespace
@@ -262,10 +262,14 @@ DeviceInfo::DeviceInfo(cl_device_id id)
   extensions =
       absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
   supports_fp16 = false;
+  supports_image3d_writes = false;
   for (const auto& ext : extensions) {
     if (ext == "cl_khr_fp16") {
       supports_fp16 = true;
     }
+    if (ext == "cl_khr_3d_image_writes") {
+      supports_image3d_writes = true;
+    }
   }
   if (vendor == Vendor::POWERVR && !supports_fp16) {
     // PowerVR doesn't have full support of fp16 and so doesn't list this
@@ -273,9 +277,17 @@ DeviceInfo::DeviceInfo(cl_device_id id)
     // so we will use it.
     supports_fp16 = true;
   }
+
+  if (vendor == Vendor::QUALCOMM &&
+      IsGPUVersionInRange(adreno_info.gpu_version, 400, 500)) {
+    // in local tests Adreno 430 can write in image 3d, at least on small sizes,
+    // but it doesn't have cl_khr_3d_image_writes in list of available
+    // extensions
+    supports_image3d_writes = true;
+  }
   compute_units_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
-  image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
-  image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
+  image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
+  image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
   buffer_max_size = GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
   if (cl_version >= OpenCLVersion::CL_1_2) {
     image_buffer_max_size =
@@ -283,6 +295,9 @@ DeviceInfo::DeviceInfo(cl_device_id id)
     image_array_max_layers =
         GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
   }
+  image3d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
+  image3d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
+  image3d_max_depth = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
   GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
 }
 
@@ -294,6 +309,8 @@ bool DeviceInfo::SupportsImageBuffer() const {
   return cl_version >= OpenCLVersion::CL_1_2;
 }
 
+bool DeviceInfo::SupportsImage3D() const { return supports_image3d_writes; }
+
 CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
     : id_(id), platform_id_(platform_id), info_(id) {}
 
@@ -347,6 +364,8 @@ bool CLDevice::SupportsImageBuffer() const {
   return info_.SupportsImageBuffer();
 }
 
+bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); }
+
 std::string CLDevice::GetPlatformVersion() const {
   return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
 }
@@ -355,22 +374,22 @@ bool CLDevice::IsAdreno() const { return info_.vendor == Vendor::QUALCOMM; }
 
 bool CLDevice::IsAdreno3xx() const {
   return IsAdreno() &&
-         isGPUVersionInRange(info_.adreno_info.gpu_version, 300, 400);
+         IsGPUVersionInRange(info_.adreno_info.gpu_version, 300, 400);
 }
 
 bool CLDevice::IsAdreno4xx() const {
   return IsAdreno() &&
-         isGPUVersionInRange(info_.adreno_info.gpu_version, 400, 500);
+         IsGPUVersionInRange(info_.adreno_info.gpu_version, 400, 500);
 }
 
 bool CLDevice::IsAdreno5xx() const {
   return IsAdreno() &&
-         isGPUVersionInRange(info_.adreno_info.gpu_version, 500, 600);
+         IsGPUVersionInRange(info_.adreno_info.gpu_version, 500, 600);
 }
 
 bool CLDevice::IsAdreno6xx() const {
   return IsAdreno() &&
-         isGPUVersionInRange(info_.adreno_info.gpu_version, 600, 700);
+         IsGPUVersionInRange(info_.adreno_info.gpu_version, 600, 700);
 }
 
 bool CLDevice::IsAdreno6xxOrHigher() const {
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h
index b0515462114..c19415c6169 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_device.h
+++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h
@@ -66,9 +66,11 @@ struct DeviceInfo {
 
   bool SupportsTextureArray() const;
   bool SupportsImageBuffer() const;
+  bool SupportsImage3D() const;
 
   std::vector<std::string> extensions;
   bool supports_fp16;
+  bool supports_image3d_writes;
   Vendor vendor;
   OpenCLVersion cl_version;
   int compute_units_count;
@@ -77,6 +79,9 @@ struct DeviceInfo {
   uint64_t image2d_max_height;
   uint64_t image_buffer_max_size;
   uint64_t image_array_max_layers;
+  uint64_t image3d_max_width;
+  uint64_t image3d_max_height;
+  uint64_t image3d_max_depth;
   int3 max_work_group_sizes;
 
   AdrenoInfo adreno_info;
@@ -107,6 +112,7 @@ class CLDevice {
   bool SupportsFP16() const;
   bool SupportsTextureArray() const;
   bool SupportsImageBuffer() const;
+  bool SupportsImage3D() const;
   bool SupportsExtension(const std::string& extension) const;
   bool IsAdreno() const;
   bool IsAdreno3xx() const;
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc
index 98156ea5686..b52d21446da 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc
@@ -190,6 +190,8 @@ bool Environment::IsSupported(TensorStorageType storage_type) const {
       return device_.SupportsTextureArray();
     case TensorStorageType::IMAGE_BUFFER:
       return device_.IsAdreno() && device_.SupportsImageBuffer();
+    case TensorStorageType::TEXTURE_3D:
+      return false;
     case TensorStorageType::SINGLE_TEXTURE_2D:
       return false;
     case TensorStorageType::UNKNOWN:
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
index 5b127842d28..c9bc7d5f2c0 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@@ -262,6 +262,7 @@ cc_library(
     deps = [
         ":gpu_operation",
         ":util",
+        ":work_group_picking",
         "//tensorflow/lite/delegates/gpu/cl:buffer",
         "//tensorflow/lite/delegates/gpu/cl:cl_device",
         "//tensorflow/lite/delegates/gpu/cl:linear_storage",
@@ -379,6 +380,7 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/common:status",
         "//tensorflow/lite/delegates/gpu/common:tensor",
         "//tensorflow/lite/delegates/gpu/common:types",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -446,6 +448,7 @@ cc_library(
     deps = [
         ":gpu_operation",
         ":util",
+        ":work_group_picking",
         "//tensorflow/lite/delegates/gpu/cl:buffer",
         "//tensorflow/lite/delegates/gpu/cl:linear_storage",
         "//tensorflow/lite/delegates/gpu/cl:precision",
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/add.cc b/tensorflow/lite/delegates/gpu/cl/kernels/add.cc
index 006969080ec..579bf65dcd9 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/add.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/add.cc
@@ -131,8 +131,9 @@ std::string Add::GetArgsDeclaration() const {
   for (int i = 1; i < src_depthes_.size(); ++i) {
     const std::string tensor_name =
         absl::StrCat("src_data_", link_index_, "_", i);
-    TensorCodeGenerator src_tensor(tensor_name, "", definition_.src_tensors[i]);
-    absl::StrAppend(&args, ",\n", src_tensor.GetDeclaration(AccessType::READ));
+    absl::StrAppend(&args, ",\n",
+                    GetTensorDeclaration(AccessType::READ, tensor_name,
+                                         definition_.src_tensors[i]));
   }
   for (int i = 1; i < src_depthes_.size(); ++i) {
     const std::string size_name =
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask.cc b/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask.cc
index 7e8b5d90bce..e6569a8b3a2 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask.cc
@@ -47,7 +47,9 @@ void ApplyMask::SetLinkIndex(int index) { link_index_ = index; }
 std::string ApplyMask::GetCoreCode(const LinkingContext& context) const {
   const std::string size_name = "mask_size_op" + std::to_string(link_index_);
   const std::string tensor_name = absl::StrCat("mask_data_op", link_index_);
-  TensorCodeGenerator mask(tensor_name, size_name, definition_.src_tensors[1]);
+  TensorCodeGenerator mask(
+      tensor_name, {size_name + ".x", size_name + ".y", size_name + ".z"},
+      definition_.src_tensors[1]);
   switch (mask_type_) {
     case MaskType::TENSOR:
       return context.var_name + " *= " +
@@ -66,8 +68,9 @@ std::string ApplyMask::GetCoreCode(const LinkingContext& context) const {
 std::string ApplyMask::GetArgsDeclaration() const {
   std::string args;
   const std::string tensor_name = absl::StrCat("mask_data_op", link_index_);
-  TensorCodeGenerator src_tensor(tensor_name, "", definition_.src_tensors[1]);
-  absl::StrAppend(&args, ",\n", src_tensor.GetDeclaration(AccessType::READ));
+  absl::StrAppend(&args, ",\n",
+                  GetTensorDeclaration(AccessType::READ, tensor_name,
+                                       definition_.src_tensors[1]));
   const std::string size_name = "mask_size_op" + std::to_string(link_index_);
   absl::StrAppend(&args, ",\n   int4 ", size_name);
   return args;
@@ -75,7 +78,7 @@ std::string ApplyMask::GetArgsDeclaration() const {
 
 Status ApplyMask::BindArguments(CLKernel* kernel) {
   RETURN_IF_ERROR(kernel->SetMemoryAuto(src_[1]->GetMemoryPtr()));
-  RETURN_IF_ERROR(kernel->SetBytesAuto(src_[1]->GetSizeWithDepth()));
+  RETURN_IF_ERROR(kernel->SetBytesAuto(src_[1]->GetWBatchedHDB()));
   return OkStatus();
 }
 
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
index a0e618ee41c..7f661f4f7ba 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 
 #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
 #include "tensorflow/lite/delegates/gpu/cl/precision.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
@@ -28,6 +29,47 @@ limitations under the License.
 namespace tflite {
 namespace gpu {
 namespace cl {
+namespace {
+std::string GenerateUploadByThreads(const std::string& local_ptr_name,
+                                    const std::string& global_ptr_name,
+                                    const std::string& global_offset_name,
+                                    const std::string& lid_name,
+                                    int total_work_items,
+                                    int elements_to_upload) {
+  std::string c;
+  std::string offset =
+      global_offset_name.empty() ? "" : global_offset_name + " + ";
+  const int groups = elements_to_upload / total_work_items;
+  const int reminder = elements_to_upload % total_work_items;
+  for (int i = 0; i < groups; ++i) {
+    c += "    " + local_ptr_name + "[" + lid_name + " + " +
+         std::to_string(total_work_items * i) + "] = " + global_ptr_name + "[" +
+         offset + lid_name + " + " + std::to_string(total_work_items * i) +
+         "];\n";
+  }
+  if (reminder != 0) {
+    c += "    if (" + lid_name + " < " + std::to_string(reminder) + ") {\n";
+    c += "      " + local_ptr_name + "[" + lid_name + " + " +
+         std::to_string(total_work_items * groups) + "] = " + global_ptr_name +
+         "[" + offset + lid_name + " + " +
+         std::to_string(total_work_items * groups) + "];\n";
+    c += "    }\n";
+  }
+  return c;
+}
+
+std::string GenerateAsyncUpload(const std::string& local_ptr_name,
+                                const std::string& global_ptr_name,
+                                const std::string& global_offset_name,
+                                int elements_to_upload) {
+  std::string c;
+  std::string offset =
+      global_offset_name.empty() ? "" : " + " + global_offset_name;
+  c += "    async_work_group_copy(" + local_ptr_name + ", " + global_ptr_name +
+       offset + ", " + std::to_string(elements_to_upload) + ", 0);\n";
+  return c;
+}
+}  // namespace
 
 ConvPowerVR::ConvPowerVR(const OperationDef& definition,
                          const Convolution2DAttributes& attr,
@@ -123,6 +165,23 @@ int3 ConvPowerVR::GetGridSize() const {
                   conv_params_.work_group_size.z);
 }
 
+Status ConvPowerVR::Tune(const TuningParameters& params) {
+  if (conv_params_.weights_upload_type ==
+          WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP ||
+      conv_params_.weights_upload_type ==
+          WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+    return OkStatus();
+  }
+  if (conv_params_.work_group_launch_order[0] == 0 &&
+      conv_params_.work_group_launch_order[1] == 1 &&
+      conv_params_.work_group_launch_order[2] == 2) {
+    RETURN_IF_ERROR(BindArguments());
+    return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
+                                &conv_params_.work_group_size);
+  }
+  return OkStatus();
+}
+
 Status ConvPowerVR::AddToQueue(CLCommandQueue* queue) {
   RETURN_IF_ERROR(BindArguments());
   return queue->DispatchImplicit(kernel_, GetGridSize(),
@@ -147,21 +206,20 @@ std::string GenerateConvPowerVR1x1(
                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;
   const bool manual_clamp = buffer_type && !is1x1;
 
-  c += "#define SIMD_BARRIER " +
-       (!conv_params.explicit_sync
-            ? std::string("")
-            : std::string("barrier(CLK_LOCAL_MEM_FENCE)")) +
-       "\n";
-  c += "#define SIMD_WAIT_EVENT(E) " +
-       (!conv_params.explicit_sync ? std::string("")
-                                   : std::string("wait_group_events(1, &E);")) +
-       "\n";
+  const bool need_local_mem =
+      conv_params.weights_upload_type ==
+          ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS ||
+      conv_params.weights_upload_type ==
+          ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
+
   const int3 work_group_size = conv_params.work_group_size;
   const int3 block_size = conv_params.block_size;
-  c += "__attribute__((reqd_work_group_size(" +
-       std::to_string(work_group_size.x) + ", " +
-       std::to_string(work_group_size.y) + ", " +
-       std::to_string(work_group_size.z) + ")))\n";
+  if (need_local_mem) {  // we use fixed workgroup size when use local mem
+    c += "__attribute__((reqd_work_group_size(" +
+         std::to_string(work_group_size.x) + ", " +
+         std::to_string(work_group_size.y) + ", " +
+         std::to_string(work_group_size.z) + ")))\n";
+  }
   c += "__kernel void main_function(\n";
   c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
   c += "    __global ACCUM_FLT4* filters_buffer,    \n";
@@ -179,15 +237,37 @@ std::string GenerateConvPowerVR1x1(
   launch_remap[conv_params.work_group_launch_order.x] = 0;
   launch_remap[conv_params.work_group_launch_order.y] = 1;
   launch_remap[conv_params.work_group_launch_order.z] = 2;
-  c += "  int X = (get_group_id(" + std::to_string(launch_remap[0]) + ") * " +
-       std::to_string(work_group_size.x) + " + get_local_id(0)) * " +
-       std::to_string(block_size.x) + ";\n";
-  c += "  int Y = (get_group_id(" + std::to_string(launch_remap[1]) + ") * " +
-       std::to_string(work_group_size.y) + " + get_local_id(1)) * " +
-       std::to_string(block_size.y) + ";\n";
-  c += "  int Z = (get_group_id(" + std::to_string(launch_remap[2]) + ") * " +
-       std::to_string(work_group_size.z) + " + get_local_id(2)) * " +
-       std::to_string(block_size.z) + ";\n";
+  if (conv_params.work_group_launch_order[0] == 0) {
+    c += "  int X = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
+  } else {
+    c += "  int X = (get_group_id(" + std::to_string(launch_remap[0]) +
+         ") * get_local_size(0) + get_local_id(0)) * " +
+         std::to_string(block_size.x) + ";\n";
+  }
+  if (conv_params.work_group_launch_order[1] == 1) {
+    c += "  int Y = get_global_id(1) * " + std::to_string(block_size.y) + ";\n";
+  } else {
+    c += "  int Y = (get_group_id(" + std::to_string(launch_remap[1]) +
+         ") * get_local_size(1) + get_local_id(1)) * " +
+         std::to_string(block_size.y) + ";\n";
+  }
+  if (conv_params.work_group_launch_order[2] == 2) {
+    c += "  int Z = get_global_id(2) * " + std::to_string(block_size.z) + ";\n";
+  } else {
+    c += "  int Z = (get_group_id(" + std::to_string(launch_remap[2]) +
+         ") * get_local_size(2) + get_local_id(2)) * " +
+         std::to_string(block_size.z) + ";\n";
+  }
+  if (!need_local_mem) {
+    c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) {\n";
+    c += "    return;\n";
+    c += "  }\n";
+  }
+  if (conv_params.weights_upload_type ==
+      ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+    c += "  int lid = get_local_id(1) * " + std::to_string(work_group_size.x) +
+         " + get_local_id(0);\n";
+  }
   for (int z = 0; z < block_size.z; ++z) {
     for (int y = 0; y < block_size.y; ++y) {
       for (int x = 0; x < block_size.x; ++x) {
@@ -215,9 +295,15 @@ std::string GenerateConvPowerVR1x1(
            " * stride_padding.y + stride_padding.w;\n";
     }
   }
-  c += "  __local ACCUM_FLT4 data[" +
-       std::to_string(block_size.z * 4 * conv_params.src_depth_loop_size) +
-       "];\n";
+  if (need_local_mem) {
+    c += "  __local ACCUM_FLT4 weights_cache[" +
+         std::to_string(block_size.z * 4 * conv_params.src_depth_loop_size) +
+         "];\n";
+  }
+  if (conv_params.weights_upload_type ==
+      ConvPowerVR::WeightsUploadType::GLOBAL_MEM) {
+    c += "    __global ACCUM_FLT4* weights_cache;\n";
+  }
   if (is1x1) {
     c += "  __global ACCUM_FLT4* filters_loc = filters_buffer + Z * 4 * "
          "src_size.z;\n";
@@ -332,7 +418,7 @@ std::string GenerateConvPowerVR1x1(
         for (int y = 0; y < block_size.y; ++y) {
           for (int x = 0; x < block_size.x; ++x) {
             std::string id = std::to_string(y) + std::to_string(x);
-            c += "    r" + std::to_string(z) + id + " += data[" +
+            c += "    r" + std::to_string(z) + id + " += weights_cache[" +
                  std::to_string(z * 4 + ch + shared_offset) + "] * src" + id +
                  "." + channels[ch] + ";\n";
           }
@@ -344,13 +430,30 @@ std::string GenerateConvPowerVR1x1(
   c += "  int s = 0;\n";
   c += "  do {\n";
   declare_src();
-  c += "    SIMD_BARRIER;\n";
-  c += "    event_t e = async_work_group_copy(data, filters_loc, " +
-       std::to_string(block_size.z * 4 * conv_params.src_depth_loop_size) +
-       ", 0);\n";
+  const int total_work_items =
+      work_group_size.x * work_group_size.y * work_group_size.z;
+  if (conv_params.weights_upload_type ==
+      ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP) {
+    c +=
+        GenerateAsyncUpload("weights_cache", "filters_loc",
+                            /*global_offset_name*/ "",
+                            block_size.z * 4 * conv_params.src_depth_loop_size);
+  } else if (conv_params.weights_upload_type ==
+             ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+    c += GenerateUploadByThreads(
+        "weights_cache", "filters_loc",
+        /*global_offset_name*/ "", "lid", total_work_items,
+        block_size.z * 4 * conv_params.src_depth_loop_size);
+  } else {  // GLOBAL_MEM
+    c += "    weights_cache = filters_loc;\n";
+  }
   read_src();
-  c += "    SIMD_WAIT_EVENT(e);\n";
   c += "    s += 1;\n";
+  if (conv_params.weights_upload_type ==
+      ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+  }
   conv_core(0);
   for (int i = 1; i < conv_params.src_depth_loop_size; ++i) {
     read_src();
@@ -365,13 +468,23 @@ std::string GenerateConvPowerVR1x1(
     c += "  };\n";
     c += "  };\n";
   }
-  c += "  SIMD_BARRIER;\n";
-  c += "  event_t e = async_work_group_copy(data, biases + Z, " +
-       std::to_string(block_size.z) + ", 0);\n";
-  c += "  SIMD_WAIT_EVENT(e);\n";
-  c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) {\n";
-  c += "    return;\n";
-  c += "  }\n";
+  if (conv_params.weights_upload_type ==
+      ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP) {
+    c += GenerateAsyncUpload("weights_cache", "biases", "Z", block_size.z);
+  } else if (conv_params.weights_upload_type ==
+             ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+    c += GenerateUploadByThreads("weights_cache", "biases", "Z", "lid",
+                                 total_work_items, block_size.z);
+    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+  } else {  // GLOBAL_MEM
+    c += "    weights_cache = biases + Z;\n";
+  }
+  if (need_local_mem) {
+    c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) {\n";
+    c += "    return;\n";
+    c += "  }\n";
+  }
   for (int z = 0; z < block_size.z; ++z) {
     c += "  if (Z + " + std::to_string(z) + " >= dst_size.z) return;\n";
     for (int y = 0; y < block_size.y; ++y) {
@@ -392,7 +505,7 @@ std::string GenerateConvPowerVR1x1(
         } else {
           c += "  {\n";
         }
-        c += "    FLT4 res = TO_FLT4(r" + r_id + " + data[" +
+        c += "    FLT4 res = TO_FLT4(r" + r_id + " + weights_cache[" +
              std::to_string(z) + "]);\n";
         const LinkingContext context{"res", xs, ys, zs};
         c += PostProcess(linked_operations, context);
@@ -409,49 +522,86 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
     const CLDevice& device, const OperationDef& definition, int src_depth,
     int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1) const {
   ConvParams conv_params;
-  conv_params.block_size = int3(1, 1, 4);
-  conv_params.work_group_size = int3(8, 4, 1);
-  conv_params.work_group_launch_order = int3(2, 0, 1);
-  conv_params.src_depth_loop_size = 1;
-  conv_params.explicit_sync = !device.IsPowerVR();
-  if (dst_depth % 8 == 0 || dst_depth >= 32) {
-    conv_params.block_size.z = 8;
-  } else if (dst_depth % 4 == 0 || dst_depth >= 8) {
-    conv_params.block_size.z = 4;
-  } else if (dst_depth % 2 == 0 || dst_depth >= 4) {
-    conv_params.block_size.z = 2;
-  } else {
-    conv_params.block_size.z = dst_depth;
-  }
-  if (definition.precision == CalculationsPrecision::F16) {
-    conv_params.block_size.z = std::min(4, conv_params.block_size.z);
+  conv_params.x_kernel_is_1 = x_kernel_is_1;
+  conv_params.y_kernel_is_1 = y_kernel_is_1;
+  if (device.IsNvidia()) {
+    conv_params.block_size = int3(1, 1, 4);
+    conv_params.work_group_size = int3(8, 4, 1);
+    conv_params.work_group_launch_order = int3(2, 0, 1);
+    conv_params.src_depth_loop_size = 1;
+    conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
+    if (dst_depth % 4 == 0 || dst_depth >= 8) {
+      conv_params.block_size.z = 4;
+    } else if (dst_depth % 2 == 0 || dst_depth >= 4) {
+      conv_params.block_size.z = 2;
+    } else {
+      conv_params.block_size.z = dst_depth;
+    }
     if (src_depth % 2 == 0) {
       conv_params.src_depth_loop_size = 2;
     }
     if (src_depth % 4 == 0 && conv_params.block_size.z <= 2) {
       conv_params.src_depth_loop_size = 4;
     }
-    if (conv_params.block_size.z == 1) {
-      if (src_depth % 8 == 0) {
-        conv_params.src_depth_loop_size = 8;
-      }
-      if (src_depth % 4 == 0) {
-        conv_params.src_depth_loop_size = 4;
-      }
+  } else if (device.IsPowerVR()) {
+    conv_params.block_size = int3(1, 1, 4);
+    conv_params.work_group_size = int3(8, 4, 1);
+    conv_params.work_group_launch_order = int3(2, 0, 1);
+    conv_params.src_depth_loop_size = 1;
+    conv_params.weights_upload_type =
+        WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
+    if (dst_depth % 8 == 0 || dst_depth >= 32) {
+      conv_params.block_size.z = 8;
+    } else if (dst_depth % 4 == 0 || dst_depth >= 8) {
+      conv_params.block_size.z = 4;
+    } else if (dst_depth % 2 == 0 || dst_depth >= 4) {
+      conv_params.block_size.z = 2;
+    } else {
+      conv_params.block_size.z = dst_depth;
+    }
+    if (definition.precision == CalculationsPrecision::F16) {
+      conv_params.block_size.z = std::min(4, conv_params.block_size.z);
       if (src_depth % 2 == 0) {
         conv_params.src_depth_loop_size = 2;
       }
-      if (src_depth <= 8) {
-        conv_params.src_depth_loop_size = src_depth;
+      if (src_depth % 4 == 0 && conv_params.block_size.z <= 2) {
+        conv_params.src_depth_loop_size = 4;
       }
+      if (conv_params.block_size.z == 1) {
+        if (src_depth % 2 == 0) {
+          conv_params.src_depth_loop_size = 2;
+        }
+        if (src_depth % 4 == 0) {
+          conv_params.src_depth_loop_size = 4;
+        }
+        if (src_depth <= 8) {
+          conv_params.src_depth_loop_size = src_depth;
+        }
+      }
+      conv_params.block_size.x = 2;
+      conv_params.work_group_size = int3(4, 8, 1);
+    }
+  } else {
+    conv_params.block_size = int3(1, 1, 4);
+    conv_params.work_group_size = int3(8, 4, 1);
+    conv_params.work_group_launch_order = int3(0, 1, 2);
+    conv_params.src_depth_loop_size = 1;
+    conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
+    if (dst_depth % 4 == 0 || dst_depth >= 8) {
+      conv_params.block_size.z = 4;
+    } else if (dst_depth % 2 == 0 || dst_depth >= 4) {
+      conv_params.block_size.z = 2;
+    } else {
+      conv_params.block_size.z = dst_depth;
+    }
+    if (src_depth % 2 == 0) {
+      conv_params.src_depth_loop_size = 2;
+    }
+    if (src_depth % 4 == 0 && conv_params.block_size.z <= 2) {
+      conv_params.src_depth_loop_size = 4;
     }
-    conv_params.block_size.x = 2;
-    conv_params.work_group_size = int3(4, 8, 1);
   }
 
-  conv_params.x_kernel_is_1 = x_kernel_is_1;
-  conv_params.y_kernel_is_1 = y_kernel_is_1;
-
   return conv_params;
 }
 
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
index 2167c368e56..3ba1b1e72fc 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
@@ -40,7 +40,7 @@ class ConvPowerVR : public GPUOperation {
  public:
   ConvPowerVR() = default;
   Status AddToQueue(CLCommandQueue* queue) override;
-
+  Status Tune(const TuningParameters& params) override;
   Status Compile(const CreationContext& creation_context) override;
 
   // Move only
@@ -50,12 +50,18 @@ class ConvPowerVR : public GPUOperation {
   ConvPowerVR& operator=(const ConvPowerVR&) = delete;
 
  private:
+  enum class WeightsUploadType {
+    LOCAL_MEM_ASYNC_SUBGROUP,  // we use it for PowerVR with workgroup size = 32
+    LOCAL_MEM_BY_THREADS,
+    GLOBAL_MEM,
+  };
+
   struct ConvParams {
     int3 block_size;
     int3 work_group_size;
     int3 work_group_launch_order;
     int src_depth_loop_size;
-    bool explicit_sync;
+    WeightsUploadType weights_upload_type;
     bool x_kernel_is_1;
     bool y_kernel_is_1;
   };
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
index 33f8773f9b4..d872073efc6 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
@@ -45,8 +45,9 @@ class OpenClConverterImpl : public TensorObjectConverter {
     RETURN_IF_ERROR(kernel_.SetMemoryAuto(input));
     RETURN_IF_ERROR(kernel_.SetMemoryAuto(output));
     int3 grid = int3(dims_.w, dims_.h, dims_.d());
-    int4 size = int4(dims_.w, dims_.h, dims_.c, dims_.d());
+    int4 size = int4(dims_.w, dims_.h, dims_.d(), dims_.b);
     RETURN_IF_ERROR(kernel_.SetBytesAuto(size));
+    RETURN_IF_ERROR(kernel_.SetBytesAuto(dims_.c));
     return queue_->DispatchImplicit(kernel_, grid, {16, 8, 1});
   }
 
@@ -104,16 +105,16 @@ class FromTensorConverter : public OpenClConverterImpl {
         "__global " + ToCLDataType(output_def.object_def.data_type) + "* dst",
         R"(
   int c = d * 4;
-  int index = (y * size.x + x) * size.z + c;
+  int index = (y * size.x + x) * channels + c;
 
   dst[index] = input.x;
-  if (c + 1 < size.z) {
+  if (c + 1 < channels) {
     dst[index + 1] = input.y;
   }
-  if (c + 2 < size.z) {
+  if (c + 2 < channels) {
     dst[index + 2] = input.z;
   }
-  if (c + 3 < size.z) {
+  if (c + 3 < channels) {
     dst[index + 3] = input.w;
   })");
   }
@@ -130,7 +131,8 @@ class FromTensorConverter : public OpenClConverterImpl {
     TensorDescriptor src_descr;
     src_descr.storage_type = src_tensor_type;
     src_descr.data_type = input_def.object_def.data_type;
-    TensorCodeGenerator src_tensor("src", "size", src_descr);
+    TensorCodeGenerator src_tensor(
+        "src", {"size.x", "size.y", "size.z", "size.w"}, src_descr);
 
     std::string shader_src =
         R"(
@@ -140,11 +142,11 @@ const sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_
 
 __kernel void from_tensor()" +
         src_tensor.GetDeclaration(AccessType::READ) + ", " +
-        params_kernel.first + R"(, int4 size) {
+        params_kernel.first + R"(, int4 size, int channels) {
   int x = get_global_id(0);
   int y = get_global_id(1);
   int d = get_global_id(2);
-  if (x >= size.x || y >= size.y || d >= size.w) return;
+  if (x >= size.x || y >= size.y || d >= size.z) return;
   )" + ToCLDataType(input_def.object_def.data_type, 4) +
         " input = " + src_tensor.Read3D("x", "y", "d") + ";\n" +
         params_kernel.second + "\n}";
@@ -215,11 +217,11 @@ class ToTensorConverter : public OpenClConverterImpl {
     return std::make_pair(
         "__global " + ToCLDataType(input_def.object_def.data_type) + "* src",
         R"(int c = d * 4;
-  int index = (y * size.x + x) * size.z + c;
+  int index = (y * size.x + x) * channels + c;
   result.x = src[index];
-  result.y = c + 1 < size.z ? src[index + 1] : 1;
-  result.z = c + 2 < size.z ? src[index + 2] : 2;
-  result.w = c + 3 < size.z ? src[index + 3] : 3;
+  result.y = c + 1 < channels ? src[index + 1] : 1;
+  result.z = c + 2 < channels ? src[index + 2] : 2;
+  result.w = c + 3 < channels ? src[index + 3] : 3;
 )");
   }
 
@@ -234,7 +236,8 @@ class ToTensorConverter : public OpenClConverterImpl {
     TensorDescriptor dst_descr;
     dst_descr.storage_type = dst_tensor_type;
     dst_descr.data_type = output_def.object_def.data_type;
-    TensorCodeGenerator dst_tensor("dst", "size", dst_descr);
+    TensorCodeGenerator dst_tensor(
+        "dst", {"size.x", "size.y", "size.z", "size.w"}, dst_descr);
     std::string shader_src =
         R"(
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
@@ -242,12 +245,12 @@ class ToTensorConverter : public OpenClConverterImpl {
 __kernel void to_tensor()" +
         params_kernel.first + ", " +
         dst_tensor.GetDeclaration(AccessType::WRITE) +
-        R"(, int4 size) {
+        R"(, int4 size, int channels) {
   int x = get_global_id(0);
   int y = get_global_id(1);
   int d = get_global_id(2);
 
-  if (x >= size.x || y >= size.y || d >= size.w) return;
+  if (x >= size.x || y >= size.y || d >= size.z) return;
   )" + ToCLDataType(output_def.object_def.data_type, 4) +
         " result;\n" + params_kernel.second + "\n  " +
         dst_tensor.Write3D("result", "x", "y", "d") + ";\n}";
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
index 080af242aaf..aeed3f4a454 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/strings/substitute.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
 
 namespace tflite {
 namespace gpu {
@@ -29,7 +31,7 @@ namespace {
 
 std::string GenerateConvolutionTransposedCode(
     const OperationDef& op_def, const LinearStorage& biases,
-    const CLDevice& device,
+    const CLDevice& device, bool weights_are_buffer, const int3& block_size,
     const std::vector<ElementwiseOperation*>& linked_operations) {
   const TensorCodeGenerator::SizeVariablesNames src_size(
       "src_size.x", "src_size.y", "src_size.z", "src_size.w");
@@ -37,39 +39,43 @@ std::string GenerateConvolutionTransposedCode(
       "dst_size.x", "dst_size.y", "dst_size.z", "dst_size.w");
   TensorCodeGenerator src_tensor("src_data", src_size, op_def.src_tensors[0]);
   TensorCodeGenerator dst_tensor("dst_data", dst_size, op_def.dst_tensors[0]);
+
   const auto src_tensor_type = op_def.src_tensors[0].storage_type;
+  bool image_buffer = src_tensor_type == TensorStorageType::IMAGE_BUFFER;
+  bool manual_clamp =
+      image_buffer || src_tensor_type == TensorStorageType::BUFFER;
 
   const std::string batch_id = op_def.batch_support ? "B" : "";
   std::string c = GetCommonDefines(op_def.precision);
 
-  switch (op_def.precision) {
-    case CalculationsPrecision::F32:
-    case CalculationsPrecision::F16:
-      if (src_tensor_type == TensorStorageType::BUFFER) {
-        c += "#define CONV(R, S)   \\\n";
-        c += "R += S.x * f0.s0123; \\\n";
-        c += "R += S.y * f0.s4567; \\\n";
-        c += "R += S.z * f0.s89ab; \\\n";
-        c += "R += S.w * f0.scdef;   \n";
-      } else {
-        c += "#define CONV(R, S)  \\\n";
-        c += "R += S.x * f[0];    \\\n";
-        c += "R += S.y * f[1];    \\\n";
-        c += "R += S.z * f[2];    \\\n";
-        c += "R += S.w * f[3];      \n";
-      }
-      break;
-    case CalculationsPrecision::F32_F16:
-      if (src_tensor_type == TensorStorageType::BUFFER) {
-        c += "#define CONV(R, S) \\\n";
-        c += "R += convert_float4(S.x * f0.s0123 + S.y * f0.s4567 + S.z * "
-             "f0.s89ab + S.w * f0.scdef);\n";
-      } else {
-        c += "#define CONV(R, S) \\\n";
-        c += "R += convert_float4(S.x * f[0] + S.y * f[1]";
-        c += "+ S.z * f[2] + S.w * f[3]);\n";
-      }
-      break;
+  for (int z = 0; z < block_size.z; ++z) {
+    const std::string f0 =
+        weights_are_buffer ? "weights_cache[" + std::to_string(z) + "].s0123"
+                           : "f" + std::to_string(z * 4 + 0);
+    const std::string f1 =
+        weights_are_buffer ? "weights_cache[" + std::to_string(z) + "].s4567"
+                           : "f" + std::to_string(z * 4 + 1);
+    const std::string f2 =
+        weights_are_buffer ? "weights_cache[" + std::to_string(z) + "].s89ab"
+                           : "f" + std::to_string(z * 4 + 2);
+    const std::string f3 =
+        weights_are_buffer ? "weights_cache[" + std::to_string(z) + "].scdef"
+                           : "f" + std::to_string(z * 4 + 3);
+    switch (op_def.precision) {
+      case CalculationsPrecision::F32:
+      case CalculationsPrecision::F16:
+        c += "#define CONV" + std::to_string(z) + "(R, S)    \\\n";
+        c += "R += S.x * " + f0 + "; \\\n";
+        c += "R += S.y * " + f1 + "; \\\n";
+        c += "R += S.z * " + f2 + "; \\\n";
+        c += "R += S.w * " + f3 + ";   \n";
+        break;
+      case CalculationsPrecision::F32_F16:
+        c += "#define CONV" + std::to_string(z) + "(R, S) \\\n";
+        c += "R += convert_float4(S.x * " + f0 + " + S.y * " + f1 +
+             " + S.z * " + f2 + " + S.w * " + f3 + ");\n";
+        break;
+    }
   }
 
   switch (op_def.precision) {
@@ -84,179 +90,298 @@ std::string GenerateConvolutionTransposedCode(
 
   c += "__kernel void main_function(\n";
   c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
-  if (src_tensor_type == TensorStorageType::BUFFER) {
+  if (weights_are_buffer) {
     c += "    __global FLT16* filters,  \n";
-    c += "    __global FLT4* biases";
   } else {
-    c += "    __read_only image2d_t filters,  \n";
-    c += "    __read_only image2d_t biases";
+    c += "    __read_only image2d_t filters0,  \n";
+    c += "    __read_only image2d_t filters1,  \n";
+    c += "    __read_only image2d_t filters2,  \n";
+    c += "    __read_only image2d_t filters3,  \n";
   }
+  c += biases.GetDeclaration();
   c += GetArgsDeclaration(linked_operations);
   c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n";
   c += "    int2 kernel_size,          \n";
   c += "    int2 stride,               \n";
   c += "    int2 padding,              \n";
-  c += "    int2 k_offset,        \n";
-  c += "    int2 inner_size,           \n";
   c += "    int4 src_size,             \n";
   c += "    int4 dst_size              \n";
   c += ") {\n";
   if (op_def.batch_support) {
     c += "  int linear_id = get_global_id(0);\n";
-    c += "  int X = linear_id / dst_size.w;\n";
+    c += "  int dst_x = (linear_id / dst_size.w);\n";
     c += "  int B = linear_id % dst_size.w;\n";
   } else {
-    c += "  int X = get_global_id(0);\n";
+    c += "  int dst_x = get_global_id(0);\n";
   }
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) return;\n";
-  if (src_tensor_type == TensorStorageType::BUFFER) {
-    c += "  int f_base = Z * src_size.z * kernel_size.x * kernel_size.y;\n";
-  }
-  c += "  int2 offset = (int2)(X, Y) + padding - k_offset;\n";
-  c += "  offset.x = offset.x % stride.x;\n";
-  c += "  offset.y = offset.y % stride.y;\n";
-  c += "  offset += stride;\n";
-  c += "  offset.x = offset.x % stride.x;\n";
-  c += "  offset.y = offset.y % stride.y;\n";
-  c += "  int2 f_offset;\n";
-  c += "  f_offset.x = offset.x == 0 ? 0 : stride.x - offset.x;\n";
-  c += "  f_offset.y = offset.y == 0 ? 0 : stride.y - offset.y;\n";
-  c += "  ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  for (int ky = 0; ky < inner_size.y; ++ky) {\n";
-  c += "    int index_y = ky * stride.y + f_offset.y;\n";
-  c += "    bool inside_y = index_y < kernel_size.y;\n";
-  c += "    int s_y = (Y + index_y + padding.y - k_offset.y) / stride.y;\n";
-  c += "    index_y = kernel_size.y - 1 - index_y;\n";
-  c += "    bool out_y = s_y < 0 || s_y >= src_size.y;\n";
-  c += "    for (int kx = 0; kx < inner_size.x; ++kx) {\n";
-  c += "      int index_x = kx * stride.x + f_offset.x;\n";
-  c += "      bool inside_kernel = index_x < kernel_size.x && inside_y;\n";
-  c += "      int s_x = (X + index_x + padding.x - k_offset.x) / stride.x;\n";
-  c += "      index_x = kernel_size.x - 1 - index_x;\n";
-  c += "      bool out_x = s_x < 0 || s_x >= src_size.x;\n";
-  c += "      int kernel_index = index_y * kernel_size.x + index_x;\n";
-  c += "      if (inside_kernel && !(out_x || out_y)) {\n";
-  if (src_tensor_type == TensorStorageType::BUFFER) {
-    c += "        int f_offset = f_base + kernel_index * src_size.z;\n";
-  } else {
-    c += "        int x_c = kernel_index * src_size.z * 4;\n";
-  }
-  c += "        for (int l = 0; l < src_size.z; ++l) {\n";
-  c += "          FLT4 src =" + src_tensor.Read4D("s_x", "s_y", "l", batch_id) +
+  c += "  int rem_x = dst_x % stride.x;\n";
+  c += "  int ceil_x = dst_x / stride.x;\n";
+  c += "  dst_x = ceil_x * stride.x * " + std::to_string(block_size.x) +
+       " + rem_x;\n";
+  c += "  int dst_y = get_global_id(1);\n";
+  c += "  int rem_y = dst_y % stride.y;\n";
+  c += "  int ceil_y = dst_y / stride.y;\n";
+  c += "  dst_y = ceil_y * stride.y * " + std::to_string(block_size.y) +
+       " + rem_y;\n";
+  c += "  int dst_z = get_global_id(2) * " + std::to_string(block_size.z) +
        ";\n";
-  if (src_tensor_type == TensorStorageType::BUFFER) {
-    c += "          FLT16 f0 = filters[f_offset]; f_offset++;\n";
-  } else {
-    c += "          FLT4 f[4];\n";
-    c += "          f[0] = READ_IMAGE(filters, smp_none, (int2)(x_c, Z)); "
-         "x_c++;\n";
-    c += "          f[1] = READ_IMAGE(filters, smp_none, (int2)(x_c, Z)); "
-         "x_c++;\n";
-    c += "          f[2] = READ_IMAGE(filters, smp_none, (int2)(x_c, Z)); "
-         "x_c++;\n";
-    c += "          f[3] = READ_IMAGE(filters, smp_none, (int2)(x_c, Z)); "
-         "x_c++;\n";
+  c += "  if (dst_x >= dst_size.x || dst_y >= dst_size.y || dst_z >= "
+       "dst_size.z) return;\n";
+  if (weights_are_buffer) {
+    c += "  int f_base = dst_z * src_size.z * kernel_size.x * kernel_size.y;\n";
+  }
+  for (int i = 0; i < block_size.x * block_size.y * block_size.z; ++i) {
+    c += "  ACCUM_FLT4 r" + std::to_string(i) +
+         " = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
+  }
+  c += "  int kernel_first_dst_x = dst_x + padding.x;\n";
+  c += "  int kernel_first_dst_y = dst_y + padding.y;\n";
+  c += "  int kernel_last_dst_x = kernel_first_dst_x - kernel_size.x;\n";
+  c += "  int kernel_last_dst_y = kernel_first_dst_y - kernel_size.y;\n";
+  c += "  int offset_x = abs(padding.x);\n";
+  c += "  int offset_x_strided = offset_x * stride.x;\n";
+  c += "  int src_x = (kernel_first_dst_x + offset_x_strided) / stride.x - "
+       "offset_x;\n";
+  c += "  int offset_y = abs(padding.y);\n";
+  c += "  int offset_y_strided = offset_y * stride.y;\n";
+  c += "  int src_y = (kernel_first_dst_y + offset_y_strided) / stride.y - "
+       "offset_y;\n";
+  c += "  int src_as_dst_y = src_y * stride.y;\n";
+  c += "  for (;src_as_dst_y > kernel_last_dst_y; src_y -= 1, src_as_dst_y -= "
+       "stride.y) {\n";
+  for (int y = 0; y < block_size.y; ++y) {
+    const std::string yindex = std::to_string(y);
+    c += "    int sy" + yindex + " = src_y + " + yindex + ";\n";
+    if (manual_clamp) {
+      c += "    bool in_y" + yindex + " = sy" + yindex + " >= 0 && sy" +
+           yindex + " < src_size.y;\n";
+      if (!image_buffer) {
+        c += "    sy" + yindex + " = clamp(sy" + yindex +
+             ", 0, src_size.y - 1);\n";
+      }
+    }
+  }
+  c += "    int kernel_y = kernel_first_dst_y - src_as_dst_y;\n";
+  c += "    int src_as_dst_x = src_x * stride.x;\n";
+  c += "    int src_x_copy = src_x;\n";
+  c += "    for (;src_as_dst_x > kernel_last_dst_x; src_x_copy -= 1, "
+       "src_as_dst_x "
+       "-= stride.x) {\n";
+  for (int x = 0; x < block_size.x; ++x) {
+    const std::string xindex = std::to_string(x);
+    c += "      int sx" + xindex + " = src_x_copy + " + xindex + ";\n";
+    if (manual_clamp) {
+      c += "      bool in_x" + xindex + " = sx" + xindex + " >= 0 && sx" +
+           xindex + " < src_size.x;\n";
+      if (!image_buffer) {
+        c += "      sx" + xindex + " = clamp(sx" + xindex +
+             ", 0, src_size.x - 1);\n";
+      }
+    }
+  }
+  const std::string layer_offset =
+      std::string("src_size.x * src_size.y") +
+      (op_def.batch_support ? " * src_size.w" : "");
+  for (int y = 0; y < block_size.y; ++y) {
+    const std::string yindex = std::to_string(y);
+    for (int x = 0; x < block_size.x; ++x) {
+      const std::string xindex = std::to_string(x);
+      const std::string id = std::to_string(y * block_size.x + x);
+      if (image_buffer) {
+        c += "      " + src_tensor.GetAddress("addr_" + id, "sx" + xindex,
+                                              "sy" + yindex, "0", batch_id);
+        c += "      addr_" + id + " = select(-1, addr_" + id + ", (in_x" +
+             xindex + " && in_y" + yindex + "));\n";
+        c += absl::Substitute(
+            "      int dz_$0 = select(0, $3, (in_x$1 && "
+            "in_y$2));\n",
+            y * block_size.x + x, x, y, layer_offset);
+      } else {
+        c += "      " + src_tensor.GetAddress("addr_" + id, "sx" + xindex,
+                                              "sy" + yindex, "0", batch_id);
+      }
+    }
+  }
+  if (src_tensor_type == TensorStorageType::BUFFER) {
+    c += "      int dz = " + layer_offset + ";\n";
+  }
+  if (block_size.x == 1 && block_size.y == 1 && manual_clamp) {
+    c += "      if (!in_x0 || !in_y0) continue;\n";
+  }
+  c += "      int kernel_x = kernel_first_dst_x - src_as_dst_x;\n";
+  c += "      int kernel_index = kernel_y * kernel_size.x + kernel_x;\n";
+  if (weights_are_buffer) {
+    c += "      int f_offset = f_base + kernel_index * src_size.z * " +
+         std::to_string(block_size.z) + ";\n";
+  } else {
+    c += "      int x_c = kernel_index * src_size.z;\n";
+  }
+  c += "      for (int s = 0; s < src_size.z; ++s) {\n";
+  const auto mode = GetFastestZeroMode(device);
+  for (int y = 0; y < block_size.y; ++y) {
+    const std::string yindex = std::to_string(y);
+    for (int x = 0; x < block_size.x; ++x) {
+      const std::string xindex = std::to_string(x);
+      const std::string id = std::to_string(y * block_size.x + x);
+      if (image_buffer) {
+        c += "        FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) +
+             "; addr_" + id + " += dz_" + id + ";\n";
+      } else if (manual_clamp) {
+        c += "        FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) +
+             " * (FLT)(in_x" + xindex + " && in_y" + yindex + "); addr_" + id +
+             " += dz;\n";
+      } else {
+        c += "        FLT4 src" + id + " = " +
+             src_tensor.Read4D("sx" + xindex, "sy" + yindex, "s", batch_id,
+                               mode) +
+             ";\n";
+      }
+    }
+  }
+  if (weights_are_buffer) {
+    c += "        __global FLT16* weights_cache = filters + f_offset;\n";
+    c += "        f_offset += " + std::to_string(block_size.z) + ";\n";
+  } else {
+    for (int z = 0; z < block_size.z; ++z) {
+      const std::string fc = "(int2)(dst_z + " + std::to_string(z) + ", x_c)";
+      c += absl::Substitute(
+          R"(        FLT4 f$1 = READ_IMAGE(filters0, smp_none, $0);
+        FLT4 f$2 = READ_IMAGE(filters1, smp_none, $0);
+        FLT4 f$3 = READ_IMAGE(filters2, smp_none, $0);
+        FLT4 f$4 = READ_IMAGE(filters3, smp_none, $0);
+)",
+          fc, z * 4 + 0, z * 4 + 1, z * 4 + 2, z * 4 + 3);
+    }
+    c += "        x_c++;\n";
+  }
+  for (int z = 0; z < block_size.z; ++z) {
+    for (int i = 0; i < block_size.x * block_size.y; ++i) {
+      c += "        CONV" + std::to_string(z) + "(r" +
+           std::to_string(i + z * block_size.x * block_size.y) + ", src" +
+           std::to_string(i) + ");\n";
+    }
   }
-  c += "          CONV(r0, src);\n";
-  c += "        }\n";
   c += "      }\n";
   c += "    }\n";
   c += "  }\n";
-  c += "  FLT4 bias_val = " + biases.ReadLinearFLT4("Z") + ";\n";
-  c += "  FLT4 res0 = TO_FLT4(r0) + bias_val;\n";
-  std::string x_3dcoord = op_def.batch_support ? "X * dst_size.w + B" : "X";
-  const LinkingContext context{"res0", x_3dcoord, "Y", "Z"};
-  c += PostProcess(linked_operations, context);
-  c += "  " + dst_tensor.Write4D("res0", "X", "Y", "Z", batch_id) + "\n";
+  for (int z = 0; z < block_size.z; ++z) {
+    c += "  if (dst_z < dst_size.z) {\n";
+    c += "    FLT4 bias_val = " + biases.ReadLinearFLT4("dst_z") + ";\n";
+    for (int y = 0; y < block_size.y; ++y) {
+      for (int x = 0; x < block_size.x; ++x) {
+        const std::string id =
+            std::to_string((z * block_size.y + y) * block_size.x + x);
+        c += "    {\n";
+        c += "      int xc = dst_x + stride.x * " + std::to_string(x) + ";\n";
+        c += "      int yc = dst_y + stride.y * " + std::to_string(y) + ";\n";
+        c += "      if (xc < dst_size.x && yc < dst_size.y) {\n";
+        c += "        FLT4 res = TO_FLT4(r" + id + ") + bias_val;\n";
+        std::string x_3dcoord =
+            op_def.batch_support ? "xc * dst_size.w + B" : "xc";
+        const LinkingContext context{"res", x_3dcoord, "yc", "dst_z"};
+        c += PostProcess(linked_operations, context);
+        c += "        " +
+             dst_tensor.Write4D("res", "xc", "yc", "dst_z", batch_id) + "\n";
+        c += "      }\n";
+        c += "    }\n";
+      }
+    }
+    c += "  }\n";
+    c += "  dst_z++;\n";
+  }
   c += "}\n";
-
   return c;
 }
 }  // namespace
 
 ConvolutionTransposed::ConvolutionTransposed(
-    const OperationDef& definition, const ConvolutionTransposedAttributes& attr)
+    const OperationDef& definition, const ConvolutionTransposedAttributes& attr,
+    const CLDevice& device)
     : GPUOperation(definition),
+      weights_are_buffer_(device.IsMali()),
       kernel_size_(attr.weights.shape.w, attr.weights.shape.h),
       stride_(attr.stride.w, attr.stride.h),
       padding_(attr.padding.prepended.w, attr.padding.prepended.h),
-      src_channels_(attr.weights.shape.i),
-      dst_channels_(attr.weights.shape.o) {
-  const int inner_size_x = (kernel_size_.x - 1) / stride_.x + 1;
-  const int inner_size_y = (kernel_size_.y - 1) / stride_.y + 1;
-  inner_size_ = int2(inner_size_x, inner_size_y);
-  kernel_offset_ = int2(kernel_size_.x - 1, kernel_size_.y - 1);
-}
+      block_size_(2, 2, 2) {}
 
-ConvolutionTransposed::ConvolutionTransposed(ConvolutionTransposed&& kernel)
-    : GPUOperation(std::move(kernel)),
-      biases_(std::move(kernel.biases_)),
-      weights_tex2d_(std::move(kernel.weights_tex2d_)),
-      weights_buf_(std::move(kernel.weights_buf_)),
-      weights_(kernel.weights_),
-      kernel_size_(kernel.kernel_size_),
-      stride_(kernel.stride_),
-      padding_(kernel.padding_),
-      kernel_offset_(kernel.kernel_offset_),
-      inner_size_(kernel.inner_size_),
-      src_channels_(kernel.src_channels_),
-      dst_channels_(kernel.dst_channels_),
-      kernel_(std::move(kernel.kernel_)),
-      work_group_size_(kernel.work_group_size_) {}
+ConvolutionTransposed::ConvolutionTransposed(ConvolutionTransposed&& operation)
+    : GPUOperation(std::move(operation)),
+      biases_(std::move(operation.biases_)),
+      weights_0_(std::move(operation.weights_0_)),
+      weights_1_(std::move(operation.weights_1_)),
+      weights_2_(std::move(operation.weights_2_)),
+      weights_3_(std::move(operation.weights_3_)),
+      weights_buf_(std::move(operation.weights_buf_)),
+      weights_are_buffer_(operation.weights_are_buffer_),
+      kernel_size_(operation.kernel_size_),
+      stride_(operation.stride_),
+      padding_(operation.padding_),
+      block_size_(operation.block_size_),
+      kernel_(std::move(operation.kernel_)),
+      work_group_size_(operation.work_group_size_) {}
 
 ConvolutionTransposed& ConvolutionTransposed::operator=(
-    ConvolutionTransposed&& kernel) {
-  if (this != &kernel) {
-    biases_ = std::move(kernel.biases_);
-    weights_tex2d_ = std::move(kernel.weights_tex2d_);
-    weights_buf_ = std::move(kernel.weights_buf_);
-    std::swap(weights_, kernel.weights_);
-    std::swap(kernel_size_, kernel.kernel_size_);
-    std::swap(stride_, kernel.stride_);
-    std::swap(padding_, kernel.padding_);
-    std::swap(kernel_offset_, kernel.kernel_offset_);
-    std::swap(inner_size_, kernel.inner_size_);
-    std::swap(src_channels_, kernel.src_channels_);
-    std::swap(dst_channels_, kernel.dst_channels_);
-    kernel_ = std::move(kernel.kernel_);
-    std::swap(work_group_size_, kernel.work_group_size_);
-    GPUOperation::operator=(std::move(kernel));
+    ConvolutionTransposed&& operation) {
+  if (this != &operation) {
+    biases_ = std::move(operation.biases_);
+    weights_0_ = std::move(operation.weights_0_);
+    weights_1_ = std::move(operation.weights_1_);
+    weights_2_ = std::move(operation.weights_2_);
+    weights_3_ = std::move(operation.weights_3_);
+    weights_buf_ = std::move(operation.weights_buf_);
+    std::swap(weights_are_buffer_, operation.weights_are_buffer_);
+    std::swap(kernel_size_, operation.kernel_size_);
+    std::swap(stride_, operation.stride_);
+    std::swap(padding_, operation.padding_);
+    std::swap(block_size_, operation.block_size_);
+    kernel_ = std::move(operation.kernel_);
+    std::swap(work_group_size_, operation.work_group_size_);
+    GPUOperation::operator=(std::move(operation));
   }
   return *this;
 }
 
 Status ConvolutionTransposed::Compile(const CreationContext& creation_context) {
   const auto code = GenerateConvolutionTransposedCode(
-      definition_, biases_, *creation_context.device, linked_operations_);
+      definition_, biases_, *creation_context.device, weights_are_buffer_,
+      block_size_, linked_operations_);
 
+  std::vector<CompilerOptions> options;
+  // options.push_back(CompilerOptions::POWERVR_FP16);
   return creation_context.cache->GetOrCreateCLKernel(
-      code, "main_function", *creation_context.context,
+      code, "main_function", options, *creation_context.context,
       *creation_context.device, &kernel_);
 }
 
 Status ConvolutionTransposed::BindArguments() {
   kernel_.ResetBindingCounter();
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
-  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
+  if (weights_are_buffer_) {
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_buf_.GetMemoryPtr()));
+  } else {
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_0_.GetMemoryPtr()));
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_1_.GetMemoryPtr()));
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_2_.GetMemoryPtr()));
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_3_.GetMemoryPtr()));
+  }
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(biases_.GetMemoryPtr()));
   RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
   RETURN_IF_ERROR(kernel_.SetBytesAuto(kernel_size_));
   RETURN_IF_ERROR(kernel_.SetBytesAuto(stride_));
   RETURN_IF_ERROR(kernel_.SetBytesAuto(padding_));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(kernel_offset_));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(inner_size_));
   RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHDB()));
   RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHDB()));
   return OkStatus();
 }
 
 int3 ConvolutionTransposed::GetGridSize() const {
-  const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-  const int grid_y = dst_[0]->Height();
-  const int grid_z = dst_[0]->Depth();
+  const int aligned_w = AlignByN(dst_[0]->Width(), stride_.x * block_size_.x);
+  const int aligned_h = AlignByN(dst_[0]->Height(), stride_.y * block_size_.y);
+  const int grid_x =
+      IntegralDivideRoundUp(aligned_w, block_size_.x) * dst_[0]->Batch();
+  const int grid_y = IntegralDivideRoundUp(aligned_h, block_size_.y);
+  const int grid_z = IntegralDivideRoundUp(dst_[0]->Depth(), block_size_.z);
   return int3(grid_x, grid_y, grid_z);
 }
 
@@ -275,7 +400,7 @@ Status CreateConvolutionTransposed(const CreationContext& creation_context,
                                    const OperationDef& definition,
                                    const ConvolutionTransposedAttributes& attr,
                                    ConvolutionTransposed* result) {
-  *result = ConvolutionTransposed(definition, attr);
+  *result = ConvolutionTransposed(definition, attr, *creation_context.device);
   RETURN_IF_ERROR(
       result->UploadWeights(attr.weights, creation_context.context));
   LinearStorageCreateInfo create_info;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
index 52d4b892dce..73fce020f5a 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
@@ -44,8 +44,8 @@ class ConvolutionTransposed : public GPUOperation {
   Status Compile(const CreationContext& creation_context) override;
 
   // Move only
-  ConvolutionTransposed(ConvolutionTransposed&& kernel);
-  ConvolutionTransposed& operator=(ConvolutionTransposed&& kernel);
+  ConvolutionTransposed(ConvolutionTransposed&& operation);
+  ConvolutionTransposed& operator=(ConvolutionTransposed&& operation);
   ConvolutionTransposed(const ConvolutionTransposed&) = delete;
   ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
 
@@ -55,7 +55,8 @@ class ConvolutionTransposed : public GPUOperation {
       const ConvolutionTransposedAttributes& attr,
       ConvolutionTransposed* result);
   explicit ConvolutionTransposed(const OperationDef& definition,
-                                 const ConvolutionTransposedAttributes& attr);
+                                 const ConvolutionTransposedAttributes& attr,
+                                 const CLDevice& device);
   template <DataType T>
   Status UploadWeights(const ::tflite::gpu::Tensor<OHWI, T>& weights,
                        CLContext* context);
@@ -69,17 +70,18 @@ class ConvolutionTransposed : public GPUOperation {
 
   LinearStorage biases_;
 
-  Texture2D weights_tex2d_;
+  Texture2D weights_0_;
+  Texture2D weights_1_;
+  Texture2D weights_2_;
+  Texture2D weights_3_;
   Buffer weights_buf_;
-  cl_mem weights_;
+  bool weights_are_buffer_;
 
   int2 kernel_size_;
   int2 stride_;
   int2 padding_;
-  int2 kernel_offset_;
-  int2 inner_size_;
-  int src_channels_;
-  int dst_channels_;
+
+  int3 block_size_ = int3(1, 1, 1);
 
   CLKernel kernel_;
   int3 work_group_size_ = int3(8, 4, 1);
@@ -88,90 +90,118 @@ class ConvolutionTransposed : public GPUOperation {
 template <DataType T>
 Status ConvolutionTransposed::UploadWeights(
     const ::tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
-  const int dst_depth = IntegralDivideRoundUp(dst_channels_, 4);
-  const int src_depth = IntegralDivideRoundUp(src_channels_, 4);
+  const int dst_depth =
+      AlignByN(IntegralDivideRoundUp(weights.shape.o, 4), block_size_.z);
+  const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
   const int kernel_x = kernel_size_.x;
   const int kernel_y = kernel_size_.y;
+  int texture_width = dst_depth;
+  int texture_height = src_depth * kernel_x * kernel_y;
 
   const int elements_count = kernel_x * kernel_y * src_depth * dst_depth * 4;
-  bool is_buffer_storage =
-      definition_.GetPrimaryStorageType() == TensorStorageType::BUFFER;
+  const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
 
-  const int float4_size =
-      definition_.precision == CalculationsPrecision::F32 ? 16 : 8;
+  const int float4_size = f32_weights ? 16 : 8;
 
-  if (definition_.GetDataType() == DataType::FLOAT32) {
+  if (f32_weights) {
     std::vector<float4> gpu_data(elements_count);
     RearrangeWeightsData(weights, absl::MakeSpan(gpu_data));
-    if (is_buffer_storage) {
+    if (weights_are_buffer_) {
       RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * elements_count,
                                            gpu_data.data(), context,
                                            &weights_buf_));
     } else {
       RETURN_IF_ERROR(CreateTexture2DRGBA(
-          definition_.GetDataType(), src_depth * kernel_x * kernel_y * 4,
-          dst_depth, gpu_data.data(), context, &weights_tex2d_));
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data(), context, &weights_0_));
+      RETURN_IF_ERROR(CreateTexture2DRGBA(
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data() + texture_width * texture_height, context,
+          &weights_1_));
+      RETURN_IF_ERROR(CreateTexture2DRGBA(
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data() + texture_width * texture_height * 2, context,
+          &weights_2_));
+      RETURN_IF_ERROR(CreateTexture2DRGBA(
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data() + texture_width * texture_height * 3, context,
+          &weights_3_));
     }
   } else {
     std::vector<half4> gpu_data(elements_count);
     RearrangeWeightsData(weights, absl::MakeSpan(gpu_data));
-    if (is_buffer_storage) {
+    if (weights_are_buffer_) {
       RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * elements_count,
                                            gpu_data.data(), context,
                                            &weights_buf_));
     } else {
       RETURN_IF_ERROR(CreateTexture2DRGBA(
-          definition_.GetDataType(), src_depth * kernel_x * kernel_y * 4,
-          dst_depth, gpu_data.data(), context, &weights_tex2d_));
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data(), context, &weights_0_));
+      RETURN_IF_ERROR(CreateTexture2DRGBA(
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data() + texture_width * texture_height, context,
+          &weights_1_));
+      RETURN_IF_ERROR(CreateTexture2DRGBA(
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data() + texture_width * texture_height * 2, context,
+          &weights_2_));
+      RETURN_IF_ERROR(CreateTexture2DRGBA(
+          definition_.GetDataType(), dst_depth, src_depth * kernel_x * kernel_y,
+          gpu_data.data() + texture_width * texture_height * 3, context,
+          &weights_3_));
     }
   }
 
-  if (is_buffer_storage) {
-    weights_ = weights_buf_.GetMemoryPtr();
-  } else {
-    weights_ = weights_tex2d_.GetMemoryPtr();
-  }
-
   return OkStatus();
 }
 
 template <DataType S, typename T>
 void ConvolutionTransposed::RearrangeWeightsData(
     const ::tflite::gpu::Tensor<OHWI, S>& weights, absl::Span<T> dst) {
-  const int dst_depth = IntegralDivideRoundUp(dst_channels_, 4);
-  const int src_depth = IntegralDivideRoundUp(src_channels_, 4);
+  const int dst_depth =
+      AlignByN(IntegralDivideRoundUp(weights.shape.o, 4), block_size_.z);
+  const int src_depth = IntegralDivideRoundUp(weights.shape.i, 4);
   const int kernel_x = kernel_size_.x;
   const int kernel_y = kernel_size_.y;
+  int texture_width = dst_depth;
+  int texture_height = src_depth * kernel_x * kernel_y;
 
   int counter = 0;
-  for (int d = 0; d < dst_depth; ++d) {
+  for (int d = 0; d < dst_depth / block_size_.z; ++d) {
     for (int y = 0; y < kernel_y; ++y) {
       for (int x = 0; x < kernel_x; ++x) {
         for (int s = 0; s < src_depth; ++s) {
-          T filters[4];
-          for (int j = 0; j < 4; ++j) {
+          for (int sub_d = 0; sub_d < block_size_.z; ++sub_d) {
+            T filters[4];
             for (int i = 0; i < 4; ++i) {
-              const int s_ch = s * 4 + j;
-              const int d_ch = d * 4 + i;
-              if (s_ch < src_channels_ && d_ch < dst_channels_) {
-                const int f_index =
-                    weights.shape.LinearIndex({d_ch, y, x, s_ch});
-                filters[i][j] = weights.data[f_index];
-              } else {
-                filters[i][j] = 0.0f;
+              for (int j = 0; j < 4; ++j) {
+                const int s_ch = s * 4 + j;
+                const int d_ch = (d * block_size_.z + sub_d) * 4 + i;
+                if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
+                  const int f_index =
+                      weights.shape.LinearIndex({d_ch, y, x, s_ch});
+                  filters[j][i] = weights.data[f_index];
+                } else {
+                  filters[j][i] = 0.0f;
+                }
               }
             }
-          }
-          T filters_new[4];
-          for (int i = 0; i < 4; ++i) {
-            for (int j = 0; j < 4; ++j) {
-              filters_new[i][j] = filters[j][i];
+            if (weights_are_buffer_) {
+              dst[counter++] = filters[0];
+              dst[counter++] = filters[1];
+              dst[counter++] = filters[2];
+              dst[counter++] = filters[3];
+            } else {
+              int x_coord = d * block_size_.z + sub_d;
+              int y_coord = (y * kernel_x + x) * src_depth + s;
+              int offset = y_coord * dst_depth + x_coord;
+              dst[offset + texture_width * texture_height * 0] = filters[0];
+              dst[offset + texture_width * texture_height * 1] = filters[1];
+              dst[offset + texture_width * texture_height * 2] = filters[2];
+              dst[offset + texture_width * texture_height * 3] = filters[3];
             }
           }
-          dst[counter++] = filters_new[0];
-          dst[counter++] = filters_new[1];
-          dst[counter++] = filters_new[2];
-          dst[counter++] = filters_new[3];
         }
       }
     }
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc
index 6faaaa88105..44d7307da16 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
 #include "tensorflow/lite/delegates/gpu/cl/precision.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 
@@ -30,7 +31,8 @@ namespace {
 
 std::string GenerateConvolutionTransposedCode(
     const OperationDef& op_def,
-    const std::vector<ElementwiseOperation*>& linked_operations) {
+    const std::vector<ElementwiseOperation*>& linked_operations,
+    ConvolutionTransposed4x4::WeightsUploadType weights_upload_type) {
   std::string c = GetCommonDefines(op_def.precision);
 
   TensorCodeGenerator src_tensor("src_data",
@@ -44,6 +46,12 @@ std::string GenerateConvolutionTransposedCode(
   const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
                             src_tensor_type == TensorStorageType::IMAGE_BUFFER;
 
+  const bool need_local_mem =
+      weights_upload_type ==
+          ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_BY_THREADS ||
+      weights_upload_type ==
+          ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_ASYNC;
+
   switch (op_def.precision) {
     case CalculationsPrecision::F32:
     case CalculationsPrecision::F16:
@@ -62,7 +70,9 @@ std::string GenerateConvolutionTransposedCode(
   }
 
   const std::string pixel_stride = op_def.batch_support ? "dst_size.w" : "1";
-  c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n";
+  if (need_local_mem) {  // we use fixed workgroup size when use local mem
+    c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n";
+  }
   c += "__kernel void main_function(\n";
   c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
   c += "    __global FLT4* filters,\n";
@@ -81,12 +91,28 @@ std::string GenerateConvolutionTransposedCode(
   c += "  int X = get_global_id(0);\n";
   c += "  int Y = get_global_id(1);\n";
   c += "  int Z = get_global_id(2);\n";
+  if (!need_local_mem) {
+    if (op_def.batch_support) {
+      c += "  if (X0 * 2 * dst_size.w > dst_size.x || Y * 2 > dst_size.y || Z "
+           ">= "
+           "dst_size.z) return;\n";
+    } else {
+      c += "  if (X * 2 > dst_size.x || Y * 2 > dst_size.y || Z >= dst_size.z) "
+           "return;\n";
+    }
+  }
   c += "  ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f);\n";
   c += "  ACCUM_FLT4 r1 = (ACCUM_FLT4)(0.0f);\n";
   c += "  ACCUM_FLT4 r2 = (ACCUM_FLT4)(0.0f);\n";
   c += "  ACCUM_FLT4 r3 = (ACCUM_FLT4)(0.0f);\n";
   c += "  int f_offset = Z * filter_offset;\n";
-  c += "  __local FLT4 weights_cache[64];\n";
+  if (need_local_mem) {
+    c += "  __local FLT4 weights_cache[64];\n";
+  }
+  if (weights_upload_type ==
+      ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+    c += "  int local_id = (int)(get_local_id(1) * 8 + get_local_id(0));\n";
+  }
   if (manual_clamp) {
     const std::string prev_x = "X - " + pixel_stride;
     c += "  bool in_x0 = " + prev_x + " >= 0 && " + prev_x + " < src_size.x;\n";
@@ -140,14 +166,30 @@ std::string GenerateConvolutionTransposedCode(
     }
   };
   c += "  for (int s = 0; s < src_size.z; ++s) {\n";
-  c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
-  c += "    async_work_group_copy(weights_cache, filters + f_offset, 64, 0);\n";
+  if (need_local_mem) {
+    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+  }
+  if (weights_upload_type ==
+      ConvolutionTransposed4x4::WeightsUploadType::LOCAL_MEM_ASYNC) {
+    c += "    async_work_group_copy(weights_cache, filters + f_offset, 64, "
+         "0);\n";
+  } else if (weights_upload_type ==
+             ConvolutionTransposed4x4::WeightsUploadType::
+                 LOCAL_MEM_BY_THREADS) {
+    c += "    weights_cache[local_id] = filters[f_offset + local_id];\n";
+    c += "    weights_cache[local_id + 32] = filters[f_offset + local_id + "
+         "32];\n";
+  } else {  // GLOBAL_MEM
+    c += "    __global FLT4* weights_cache = filters + f_offset;\n";
+  }
   c += "    FLT4 src0 = " + read_src(0, 0) + ";\n";
   c += "    FLT4 src1 = " + read_src(1, 0) + ";\n";
   c += "    FLT4 src2 = " + read_src(0, 1) + ";\n";
   c += "    FLT4 src3 = " + read_src(1, 1) + ";\n";
   c += "    f_offset += 64;\n";
-  c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+  if (need_local_mem) {
+    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+  }
   c += "    CONV(r0, src0, 0);\n";
   c += "    CONV(r1, src0, 4);\n";
   c += "    CONV(r2, src0, 8);\n";
@@ -166,12 +208,15 @@ std::string GenerateConvolutionTransposedCode(
   c += "    CONV(r3, src3, 60);\n";
   c += "  }\n";
   c += "\n";
-  if (op_def.batch_support) {
-    c += "  if (X0 * 2 * dst_size.w > dst_size.x || Y * 2 > dst_size.y || Z >= "
-         "dst_size.z) return;\n";
-  } else {
-    c += "  if (X * 2 > dst_size.x || Y * 2 > dst_size.y || Z >= dst_size.z) "
-         "return;\n";
+  if (need_local_mem) {
+    if (op_def.batch_support) {
+      c += "  if (X0 * 2 * dst_size.w > dst_size.x || Y * 2 > dst_size.y || Z "
+           ">= "
+           "dst_size.z) return;\n";
+    } else {
+      c += "  if (X * 2 > dst_size.x || Y * 2 > dst_size.y || Z >= dst_size.z) "
+           "return;\n";
+    }
   }
   if (op_def.batch_support) {
     c += "  X = X0 * 2 * dst_size.w + B - dst_size.w;\n";
@@ -214,13 +259,22 @@ std::string GenerateConvolutionTransposedCode(
 }  // namespace
 
 ConvolutionTransposed4x4::ConvolutionTransposed4x4(
-    const OperationDef& definition)
-    : GPUOperation(definition) {}
+    const OperationDef& definition, const CLDevice& device)
+    : GPUOperation(definition) {
+  if (device.IsPowerVR()) {
+    weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC;
+  } else if (device.IsNvidia()) {
+    weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS;
+  } else {
+    weights_upload_type_ = WeightsUploadType::GLOBAL_MEM;
+  }
+}
 
 ConvolutionTransposed4x4::ConvolutionTransposed4x4(
     ConvolutionTransposed4x4&& operation)
     : GPUOperation(std::move(operation)),
       weights_(std::move(operation.weights_)),
+      weights_upload_type_(operation.weights_upload_type_),
       biases_(std::move(operation.biases_)),
       kernel_(std::move(operation.kernel_)),
       work_group_size_(operation.work_group_size_) {}
@@ -229,6 +283,7 @@ ConvolutionTransposed4x4& ConvolutionTransposed4x4::operator=(
     ConvolutionTransposed4x4&& operation) {
   if (this != &operation) {
     weights_ = std::move(operation.weights_);
+    std::swap(weights_upload_type_, operation.weights_upload_type_);
     biases_ = std::move(operation.biases_);
     kernel_ = std::move(operation.kernel_);
     std::swap(work_group_size_, operation.work_group_size_);
@@ -239,8 +294,8 @@ ConvolutionTransposed4x4& ConvolutionTransposed4x4::operator=(
 
 Status ConvolutionTransposed4x4::Compile(
     const CreationContext& creation_context) {
-  const auto code =
-      GenerateConvolutionTransposedCode(definition_, linked_operations_);
+  const auto code = GenerateConvolutionTransposedCode(
+      definition_, linked_operations_, weights_upload_type_);
 
   std::vector<CompilerOptions> options;
   if (definition_.precision == CalculationsPrecision::F16 &&
@@ -277,6 +332,16 @@ int3 ConvolutionTransposed4x4::GetGridSize() const {
   return int3(grid_x, grid_y, grid_z);
 }
 
+Status ConvolutionTransposed4x4::Tune(const TuningParameters& params) {
+  if (weights_upload_type_ == WeightsUploadType::LOCAL_MEM_ASYNC ||
+      weights_upload_type_ == WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+    return OkStatus();
+  }
+  RETURN_IF_ERROR(BindArguments());
+  return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
+                              &work_group_size_);
+}
+
 Status ConvolutionTransposed4x4::AddToQueue(CLCommandQueue* queue) {
   RETURN_IF_ERROR(BindArguments());
   return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
@@ -299,7 +364,7 @@ Status CreateConvolutionTransposed4x4(
     return InvalidArgumentError(
         "ConvolutionTransposed4x4 doesn't support this attributes");
   }
-  *result = ConvolutionTransposed4x4(definition);
+  *result = ConvolutionTransposed4x4(definition, *creation_context.device);
   RETURN_IF_ERROR(
       result->UploadWeights(attr.weights, creation_context.context));
   LinearStorageCreateInfo create_info;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h
index dee0b2d2eb3..3be09096384 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h
@@ -38,6 +38,7 @@ class ConvolutionTransposed4x4 : public GPUOperation {
  public:
   ConvolutionTransposed4x4() = default;
   Status AddToQueue(CLCommandQueue* queue) override;
+  Status Tune(const TuningParameters& params) override;
   Status Compile(const CreationContext& creation_context) override;
 
   // Move only
@@ -46,8 +47,15 @@ class ConvolutionTransposed4x4 : public GPUOperation {
   ConvolutionTransposed4x4(const ConvolutionTransposed4x4&) = delete;
   ConvolutionTransposed4x4& operator=(const ConvolutionTransposed4x4&) = delete;
 
+  enum class WeightsUploadType {
+    LOCAL_MEM_ASYNC,
+    LOCAL_MEM_BY_THREADS,
+    GLOBAL_MEM,
+  };
+
  private:
-  explicit ConvolutionTransposed4x4(const OperationDef& definition);
+  ConvolutionTransposed4x4(const OperationDef& definition,
+                           const CLDevice& device);
   friend Status CreateConvolutionTransposed4x4(
       const CreationContext& creation_context, const OperationDef& definition,
       const ConvolutionTransposedAttributes& attr,
@@ -64,6 +72,7 @@ class ConvolutionTransposed4x4 : public GPUOperation {
   int3 GetGridSize() const;
 
   Buffer weights_;
+  WeightsUploadType weights_upload_type_;
   LinearStorage biases_;
 
   CLKernel kernel_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
index 82f71cc1708..038b1ec31ec 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
@@ -87,24 +87,17 @@ std::string GenerateConvolutionTransposedCode(
     for (int x = 0; x < kernel_size.x; ++x) {
       std::string r_s =
           "  r[" + std::to_string(y) + "][" + std::to_string(x) + "]";
-      const std::string to_accum =
-          op_def.precision == CalculationsPrecision::F32_F16 ? "convert_float"
-                                                             : "";
       for (int d = 0; d < dst_channels; ++d) {
-        c += r_s + postfix[d] + " = " + to_accum + "(dot(src, filters[" +
-             std::to_string(index) + "]));\n";
+        c += r_s + postfix[d] + " = dot(src, filters[" + std::to_string(index) +
+             "]);\n";
         index++;
       }
     }
   }
   c += "  }\n";
   for (int i = 1; i < src_depth; ++i) {
-    if (op_def.precision != CalculationsPrecision::F32_F16) {
-      c += "  if (X > " + std::to_string(-i) +
-           ") {  // always true, to reduce registers usage\n";
-    } else {
-      c += "  {\n";
-    }
+    c += "  if (X > " + std::to_string(-i) +
+         ") {  // always true, to reduce registers usage\n";
     c += "  FLT4 src = " +
          src_tensor.Read4D("X", "Y", std::to_string(i), batch_id) + ";\n";
     for (int y = 0; y < kernel_size.y; ++y) {
@@ -112,8 +105,8 @@ std::string GenerateConvolutionTransposedCode(
         std::string r_s =
             "  r[" + std::to_string(y) + "][" + std::to_string(x) + "]";
         for (int d = 0; d < dst_channels; ++d) {
-          c += r_s + postfix[d] + " += TO_ACCUM_FLT(dot(src, filters[" +
-               std::to_string(index) + "]));\n";
+          c += r_s + postfix[d] + " += dot(src, filters[" +
+               std::to_string(index) + "]);\n";
           index++;
         }
       }
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3.cc
index ea6ffb51e93..d202d031496 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3.cc
@@ -33,8 +33,12 @@ std::string GenerateDepthWiseConvCode(
     const std::vector<ElementwiseOperation*>& linked_operations,
     const CLDevice& device, bool weights_are_buffer, bool local_mem_uploads) {
   std::string c = GetCommonDefines(op_def.precision);
-  TensorCodeGenerator src_tensor("src_data", "dst_size", op_def.src_tensors[0]);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);
+  TensorCodeGenerator src_tensor("src_data",
+                                 {"dst_size.x", "dst_size.y", "dst_size.z"},
+                                 op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data",
+                                 {"dst_size.x", "dst_size.y", "dst_size.z"},
+                                 op_def.dst_tensors[0]);
   const auto src_tensor_type = op_def.src_tensors[0].storage_type;
 
   const auto mode = GetFastestZeroMode(device);
@@ -64,7 +68,7 @@ std::string GenerateDepthWiseConvCode(
   c += "   ACCUM_FLT4 r2 = (ACCUM_FLT4)(0.0f);\n";
   c += "   ACCUM_FLT4 r3 = (ACCUM_FLT4)(0.0f);\n";
   if (!local_mem_uploads) {
-    c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.w) "
+    c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) "
          "return;\n";
   }
   if (local_mem_uploads) {
@@ -227,7 +231,7 @@ std::string GenerateDepthWiseConvCode(
   c += "  r2 += TO_ACCUM_TYPE(" + bias + ");\n";
   c += "  r3 += TO_ACCUM_TYPE(" + bias + ");\n";
   if (local_mem_uploads) {
-    c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.w) "
+    c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) "
          "return;\n";
   }
   c += "  if(X + 0 < dst_size.x && Y + 0 < dst_size.y) {\n";
@@ -313,7 +317,7 @@ Status DepthWiseConv3x3::BindArguments() {
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_));
   RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetSizeWithDepth()));
+  RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHDB()));
 
   return OkStatus();
 }
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture.cc
index 7f842f22586..0d17606b7b0 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture.cc
@@ -35,8 +35,10 @@ std::string GetFullyConnectedKernelCode(
     const OperationDef& op_def,
     const std::vector<ElementwiseOperation*>& linked_operations,
     const int3& work_group_size) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);
+  TensorCodeGenerator src_tensor("src_data", {"1", "1", "depthes.x"},
+                                 op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", {"1", "1", "depthes.y"},
+                                 op_def.dst_tensors[0]);
 
   std::string c = GetCommonDefines(op_def.precision);
 
@@ -56,9 +58,7 @@ std::string GetFullyConnectedKernelCode(
   c += "    __read_only image2d_t biases";
   c += GetArgsDeclaration(linked_operations);
   c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n";
-  c += "    int4 src_size,             \n";
-  c += "    int4 dst_size,             \n";
-  c += "    int src_depth_x4          \n";
+  c += "    int4 depthes              \n";
   c += ") {\n";
   c += "  int gid = get_global_id(0);\n";
   c += "  int2 tid = (int2)(get_local_id(0), get_local_id(1));\n";
@@ -66,7 +66,7 @@ std::string GetFullyConnectedKernelCode(
   c += "  uint c = tid.y;\n";       // vector coord for every thread
   c += "  uint c2 = tid.y * 2;\n";  // it should be * 4, so as we have FLT4
   // but we keep half8 in float4 so, we have * 2 y_coord for texture
-  c += "  for (int i = 0; i < src_depth_x4; ++i, c += 4, c2 += 8) {\n";
+  c += "  for (int i = 0; i < depthes.z; ++i, c += 4, c2 += 8) {\n";
   c += "    FLT4 v = " + src_tensor.Read3D("0", "0", "c") + ";\n";
   if (op_def.precision != CalculationsPrecision::F32) {
     c += "   half8 m0 = as_half8(read_imagef(filters, smp_none, (int2)(gid, "
@@ -96,7 +96,7 @@ std::string GetFullyConnectedKernelCode(
        std::to_string(work_group_size.y) + "];\n";
   c += "  temp[tid.x][tid.y] = s;\n";
   c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-  c += "  if (tid.y == 0 && gid < dst_size.w) {\n";
+  c += "  if (tid.y == 0 && gid < depthes.y) {\n";
   c += "    s += temp[tid.x][1];\n";
   c += "    s += temp[tid.x][2];\n";
   c += "    s += temp[tid.x][3];\n";
@@ -151,16 +151,15 @@ Status FullyConnectedTexture::Compile(const CreationContext& creation_context) {
 }
 
 Status FullyConnectedTexture::AddToQueue(CLCommandQueue* queue) {
-  const int src_depth_x4 = IntegralDivideRoundUp(src_[0]->Depth(), 4);
   kernel_.ResetBindingCounter();
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(biases_.GetMemoryPtr()));
   RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetSizeWithDepth()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetSizeWithDepth()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(src_depth_x4));
+  const int src_depth_x4 = IntegralDivideRoundUp(src_[0]->Depth(), 4);
+  RETURN_IF_ERROR(kernel_.SetBytesAuto(
+      int4(src_[0]->Depth(), dst_[0]->Depth(), src_depth_x4, 1)));
 
   return queue->DispatchImplicit(kernel_, {dst_[0]->Depth(), 1, 1},
                                  work_group_size_);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
index cdd5fa4ba3a..f2beb154269 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
@@ -28,10 +28,14 @@ namespace {
 std::string GetSoftmaxKernelCode(
     const OperationDef& op_def,
     const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "tensor_size",
-                                 op_def.src_tensors[0]);
-  TensorCodeGenerator dst_tensor("dst_data", "tensor_size",
-                                 op_def.dst_tensors[0]);
+  TensorCodeGenerator src_tensor(
+      "src_data",
+      {"tensor_size.x", "tensor_size.y", "tensor_size.z", "tensor_size.w"},
+      op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor(
+      "dst_data",
+      {"tensor_size.x", "tensor_size.y", "tensor_size.z", "tensor_size.w"},
+      op_def.dst_tensors[0]);
 
   const std::string batch_id = op_def.batch_support ? "batch_id" : "";
   std::string c = GetCommonDefines(op_def.precision);
@@ -41,14 +45,11 @@ std::string GetSoftmaxKernelCode(
   c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n";
   c += "    int4 tensor_size,\n";
   c += "    int2 size,\n";
-  if (op_def.batch_support) {
-    c += "    int BATCH_SIZE,  \n";
-  }
   c += "    float4 mask\n";
   c += ") {\n";
   if (op_def.batch_support) {
     c += "  int batch_id = get_global_id(1);\n";
-    c += "  if (batch_id >= BATCH_SIZE) return;\n";
+    c += "  if (batch_id >= tensor_size.w) return;\n";
   }
   c += "  int offset = 0;\n";
   c += "  float sum = 0.0f;\n";
@@ -126,13 +127,10 @@ Status Softmax1x1::AddToQueue(CLCommandQueue* queue) {
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
   RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetSizeWithDepth()));
+  RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHDB()));
   const int depth = src_[0]->Depth();
   RETURN_IF_ERROR(
       kernel_.SetBytesAuto(int2(depth, IntegralDivideRoundUp(depth, 32))));
-  if (definition_.batch_support) {
-    RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->Batch()));
-  }
   RETURN_IF_ERROR(
       kernel_.SetBytesAuto(GetMaskForLastPlane(src_[0]->Channels())));
 
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc
index 614fd59e54d..782b929beb6 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc
@@ -138,43 +138,13 @@ TensorCodeGenerator::SizeVariablesNames::SizeVariablesNames(
       depth(depth_name),
       batch(batch_name) {}
 
-TensorCodeGenerator::TensorCodeGenerator(const std::string& name,
-                                         const std::string& uniform_size_name,
-                                         const TensorDescriptor& descriptor)
-    : tensor_name_(name), descriptor_(descriptor) {
-  sizes_.width = uniform_size_name + ".x";
-  sizes_.height = uniform_size_name + ".y";
-  sizes_.channels = uniform_size_name + ".z";
-  sizes_.depth = uniform_size_name + ".w";
-  sizes_.batch = "BATCH_SIZE";
-}
-
 TensorCodeGenerator::TensorCodeGenerator(const std::string& name,
                                          const SizeVariablesNames& sizes,
                                          const TensorDescriptor& descriptor)
     : tensor_name_(name), sizes_(sizes), descriptor_(descriptor) {}
 
 std::string TensorCodeGenerator::GetDeclaration(AccessType access_type) const {
-  switch (descriptor_.storage_type) {
-    case TensorStorageType::BUFFER:
-      return absl::StrCat("__global ", ToCLDataType(descriptor_.data_type, 4),
-                          "* ", tensor_name_);
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return GetImageModifier(access_type) + " image2d_t " + tensor_name_;
-    case TensorStorageType::TEXTURE_ARRAY:
-      return GetImageModifier(access_type) + " image2d_array_t " + tensor_name_;
-    case TensorStorageType::IMAGE_BUFFER:
-      if (access_type == AccessType::WRITE) {
-        return absl::StrCat("__global ", ToCLDataType(descriptor_.data_type, 4),
-                            "* ", tensor_name_);
-      } else {
-        return GetImageModifier(access_type) + " image1d_buffer_t " +
-               tensor_name_;
-      }
-    case TensorStorageType::UNKNOWN:
-      return "error";
-  }
+  return GetTensorDeclaration(access_type, tensor_name_, descriptor_);
 }
 
 std::string TensorCodeGenerator::Read3D(const std::string& x,
@@ -232,6 +202,7 @@ std::string TensorCodeGenerator::GetGlobalAddressNoDeclaration(
     case TensorStorageType::SINGLE_TEXTURE_2D:
       return absl::StrCat("(int2)(", x, ", ", y, ")");
     case TensorStorageType::TEXTURE_ARRAY:
+    case TensorStorageType::TEXTURE_3D:
       return absl::StrCat("(int4)(", x, ", ", y, ", ", z, ", 0)");
     case TensorStorageType::UNKNOWN:
       return "error";
@@ -257,6 +228,7 @@ std::string TensorCodeGenerator::GetGlobalAddressNoDeclaration(
       return absl::Substitute("(int2)(($0) * ($3) + ($1), ($2))", x, b, y,
                               sizes_.batch);
     case TensorStorageType::TEXTURE_ARRAY:
+    case TensorStorageType::TEXTURE_3D:
       return absl::Substitute("(int4)(($0) * ($4) + ($1), ($2), ($3), 0)", x, b,
                               y, z, sizes_.batch);
     case TensorStorageType::UNKNOWN:
@@ -276,6 +248,7 @@ std::string TensorCodeGenerator::DeclareAddress(
     case TensorStorageType::SINGLE_TEXTURE_2D:
       return absl::StrCat("int2 ", var_name, " = ", address, ";\n");
     case TensorStorageType::TEXTURE_ARRAY:
+    case TensorStorageType::TEXTURE_3D:
       return absl::StrCat("int4 ", var_name, " = ", address, ";\n");
     case TensorStorageType::UNKNOWN:
       return "";
@@ -303,6 +276,7 @@ std::string TensorCodeGenerator::Read(const std::string& global_address,
     case TensorStorageType::BUFFER:
       return absl::StrCat(tensor_name_, "[", global_address, "]");
     case TensorStorageType::TEXTURE_2D:
+    case TensorStorageType::TEXTURE_3D:
     case TensorStorageType::SINGLE_TEXTURE_2D:
     case TensorStorageType::TEXTURE_ARRAY:
       return absl::StrCat(
@@ -324,6 +298,7 @@ std::string TensorCodeGenerator::ReadAsFloat(
       return absl::StrCat("convert_float4(", tensor_name_, "[", global_address,
                           "])");
     case TensorStorageType::TEXTURE_2D:
+    case TensorStorageType::TEXTURE_3D:
     case TensorStorageType::SINGLE_TEXTURE_2D:
     case TensorStorageType::TEXTURE_ARRAY:
       return absl::StrCat(
@@ -346,6 +321,7 @@ std::string TensorCodeGenerator::Write(
       return absl::StrCat(tensor_name_, "[", global_address, "] = ", var_name,
                           ";\n");
     case TensorStorageType::TEXTURE_2D:
+    case TensorStorageType::TEXTURE_3D:
     case TensorStorageType::SINGLE_TEXTURE_2D:
     case TensorStorageType::TEXTURE_ARRAY:
       return absl::StrCat(GetWriteImageFromDataType(descriptor_.data_type), "(",
@@ -356,6 +332,32 @@ std::string TensorCodeGenerator::Write(
   }
 }
 
+std::string GetTensorDeclaration(AccessType access,
+                                 const std::string& tensor_name,
+                                 const TensorDescriptor& descriptor) {
+  switch (descriptor.storage_type) {
+    case TensorStorageType::BUFFER:
+      return absl::StrCat("__global ", ToCLDataType(descriptor.data_type, 4),
+                          "* ", tensor_name);
+    case TensorStorageType::TEXTURE_2D:
+    case TensorStorageType::SINGLE_TEXTURE_2D:
+      return GetImageModifier(access) + " image2d_t " + tensor_name;
+    case TensorStorageType::TEXTURE_ARRAY:
+      return GetImageModifier(access) + " image2d_array_t " + tensor_name;
+    case TensorStorageType::TEXTURE_3D:
+      return GetImageModifier(access) + " image3d_t " + tensor_name;
+    case TensorStorageType::IMAGE_BUFFER:
+      if (access == AccessType::WRITE) {
+        return absl::StrCat("__global ", ToCLDataType(descriptor.data_type, 4),
+                            "* ", tensor_name);
+      } else {
+        return GetImageModifier(access) + " image1d_buffer_t " + tensor_name;
+      }
+    case TensorStorageType::UNKNOWN:
+      return "error";
+  }
+}
+
 std::string GetXStrideCorrected(const std::string& src_x,
                                 const std::string& batch_size,
                                 const std::string& stride_x,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.h b/tensorflow/lite/delegates/gpu/cl/kernels/util.h
index 446f6b1125f..3d3618c1995 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/util.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.h
@@ -55,15 +55,10 @@ class TensorCodeGenerator {
 
     std::string width = "unknown";
     std::string height = "unknown";
-    std::string channels = "unknown";
     std::string depth = "unknown";
     std::string batch = "unknown";
   };
   TensorCodeGenerator() = default;
-  TensorCodeGenerator(const std::string& name,
-                      const std::string& uniform_size_name,
-                      const TensorDescriptor& descriptor);
-
   TensorCodeGenerator(const std::string& name, const SizeVariablesNames& sizes,
                       const TensorDescriptor& descriptor);
 
@@ -83,7 +78,6 @@ class TensorCodeGenerator {
       const std::string& x, const std::string& y, const std::string& z,
       TextureAddressMode address_mode = TextureAddressMode::DONT_CARE) const;
 
-  // Read4D supports BUFFER and IMAGE_BUFFER storage types.
   std::string Read4D(
       const std::string& x, const std::string& y, const std::string& z,
       const std::string& b,
@@ -135,6 +129,10 @@ class TensorCodeGenerator {
   TensorDescriptor descriptor_;
 };
 
+std::string GetTensorDeclaration(AccessType access,
+                                 const std::string& tensor_name,
+                                 const TensorDescriptor& descriptor);
+
 // Calculates correct X coordinate when stride != 1 and batch != 1 for
 // DHWBC4, HDWBC4, HWBC layouts
 std::string GetXStrideCorrected(const std::string& src_x,
diff --git a/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.cc b/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.cc
index 1bfa04b32f2..627c781e5c7 100644
--- a/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.cc
+++ b/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.cc
@@ -297,6 +297,22 @@ cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
                            image_desc->image_row_pitch, host_ptr, errcode_ret);
   }
 }
+
+cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
+                           const cl_image_format* image_format,
+                           const cl_image_desc* image_desc, void* host_ptr,
+                           cl_int* errcode_ret) {
+  if (clCreateImage) {  // clCreateImage available since OpenCL 1.2
+    return clCreateImage(context, flags, image_format, image_desc, host_ptr,
+                         errcode_ret);
+  } else {
+    return clCreateImage3D(context, flags, image_format,
+                           image_desc->image_width, image_desc->image_height,
+                           image_desc->image_depth, image_desc->image_row_pitch,
+                           image_desc->image_slice_pitch, host_ptr,
+                           errcode_ret);
+  }
+}
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h b/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h
index a84cf8b8ad0..acfee78ee5c 100644
--- a/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h
+++ b/tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h
@@ -627,6 +627,13 @@ cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
                            const cl_image_desc *image_desc, void *host_ptr,
                            cl_int *errcode_ret);
 
+// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
+// otherwise it will use legacy clCreateImage3D
+cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
+                           const cl_image_format *image_format,
+                           const cl_image_desc *image_desc, void *host_ptr,
+                           cl_int *errcode_ret);
+
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/run_tests.sh b/tensorflow/lite/delegates/gpu/cl/run_tests.sh
index c21b61a3882..16d2feb8a5a 100755
--- a/tensorflow/lite/delegates/gpu/cl/run_tests.sh
+++ b/tensorflow/lite/delegates/gpu/cl/run_tests.sh
@@ -61,12 +61,22 @@ cleanup_device() {
 ADB shell mkdir -p $OPENCL_DIR
 trap "cleanup_device" EXIT
 
+declare -a BUILD_CONFIG
+abi_version=$(ADB shell getprop ro.product.cpu.abi | tr -d '\r')
+if [[ "$abi_version" == "armeabi-v7a" ]]; then
+#"32 bit"
+BUILD_CONFIG=( --config=android_arm -c opt --copt=-fPIE --linkopt=-pie )
+else
+#"64 bit"
+BUILD_CONFIG=( --config=android_arm64 -c opt )
+fi
+
 targets=($(bazel query 'tests('$test_target')'))
 num_targets=${#targets[@]}
 if ((num_targets == 1)); then
   target=${targets[0]}
   executable=${target##*:}  #finds last token after ':'
-  bazel build --config=android_arm64 -c opt $target
+  bazel build "${BUILD_CONFIG[@]}" $target
   test_path=$(echo $target | tr : /)
   exec_path=bazel-bin/$(echo $test_path | cut -c 3-)
   ADB push "$exec_path" $OPENCL_DIR
@@ -77,7 +87,7 @@ else # Cleaning log records for multiple test targets
   for ((i = 0; i < num_targets; i++)); do
     target=${targets[i]}
     executable=${target##*:}  #finds last token after ':'
-    bazel build --config=android_arm64 -c opt $target > /dev/null 2>&1
+    bazel build "${BUILD_CONFIG[@]}" $target > /dev/null 2>&1
     test_path=$(echo $target | tr : /)
     exec_path=bazel-bin/$(echo $test_path | cut -c 3-)
     ADB push "$exec_path" $OPENCL_DIR > /dev/null 2>&1
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
index b570c10105e..8e666ce3904 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
@@ -48,6 +48,22 @@ Status SelectConvolutionAdreno(const Convolution2DAttributes& attr,
   return OkStatus();
 }
 
+Status SelectConvolutionNVidia(const Convolution2DAttributes& attr,
+                               const CreationContext& creation_context,
+                               const OperationDef& op_def,
+                               std::unique_ptr<GPUOperation>* ptr) {
+  if (IsConvConstantsSupported(*creation_context.device, op_def, attr)) {
+    ConvConstants conv;
+    RETURN_IF_ERROR(CreateConvConstants(creation_context, op_def, attr, &conv));
+    *ptr = absl::make_unique<ConvConstants>(std::move(conv));
+  } else {
+    ConvPowerVR conv;
+    RETURN_IF_ERROR(CreateConvPowerVR(creation_context, op_def, attr, &conv));
+    *ptr = absl::make_unique<ConvPowerVR>(std::move(conv));
+  }
+  return OkStatus();
+}
+
 Status SelectConvolutionPowerVR(const Convolution2DAttributes& attr,
                                 const CreationContext& creation_context,
                                 const OperationDef& op_def,
@@ -91,6 +107,8 @@ Status SelectConvolution(const Convolution2DAttributes& attr,
                                      hints, ptr);
     case Vendor::POWERVR:
       return SelectConvolutionPowerVR(attr, creation_context, op_def, ptr);
+    case Vendor::NVIDIA:
+      return SelectConvolutionNVidia(attr, creation_context, op_def, ptr);
     case Vendor::MALI:
       return SelectConvolutionMali(attr, creation_context, op_def, ptr);
     default:
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.cc b/tensorflow/lite/delegates/gpu/cl/tensor.cc
index 97daa7946b1..dbb7a1a752c 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc
@@ -127,6 +127,7 @@ int3 Tensor::GetFullTensorRegion() const {
   switch (descriptor_.storage_type) {
     case TensorStorageType::BUFFER:
     case TensorStorageType::TEXTURE_ARRAY:
+    case TensorStorageType::TEXTURE_3D:
     case TensorStorageType::IMAGE_BUFFER:
       return {shape_.w * shape_.b, shape_.h, Depth()};
     case TensorStorageType::TEXTURE_2D:
@@ -175,6 +176,7 @@ uint64_t Tensor::GetMemorySizeInBytes() const {
     case TensorStorageType::IMAGE_BUFFER:
     case TensorStorageType::TEXTURE_ARRAY:
     case TensorStorageType::TEXTURE_2D:
+    case TensorStorageType::TEXTURE_3D:
       return flt4_size * shape_.b * shape_.w * shape_.h * Depth();
     case TensorStorageType::SINGLE_TEXTURE_2D:
       return flt_size * shape_.w * shape_.h * shape_.c * shape_.b;
@@ -217,6 +219,7 @@ Status Tensor::WriteDataBHWC(absl::Span<const float> in,
       break;
     case TensorStorageType::TEXTURE_ARRAY:
     case TensorStorageType::TEXTURE_2D:
+    case TensorStorageType::TEXTURE_3D:
     case TensorStorageType::SINGLE_TEXTURE_2D:
       RETURN_IF_ERROR(
           queue->EnqueueWriteImage(memory_, GetFullTensorRegion(), data_ptr));
@@ -256,6 +259,7 @@ Status Tensor::ReadDataBHWC(absl::Span<float> out,
       break;
     case TensorStorageType::TEXTURE_ARRAY:
     case TensorStorageType::TEXTURE_2D:
+    case TensorStorageType::TEXTURE_3D:
     case TensorStorageType::SINGLE_TEXTURE_2D:
       RETURN_IF_ERROR(
           queue->EnqueueReadImage(memory_, GetFullTensorRegion(), data_ptr));
@@ -292,6 +296,10 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
     case TensorStorageType::IMAGE_BUFFER:
       return shape.b * shape.w * shape.h * depth <=
              device.GetInfo().image_buffer_max_size;
+    case TensorStorageType::TEXTURE_3D:
+      return shape.w * shape.b <= device.GetInfo().image3d_max_width &&
+             shape.h <= device.GetInfo().image3d_max_height &&
+             depth <= device.GetInfo().image3d_max_depth;
     case TensorStorageType::TEXTURE_ARRAY:
       return shape.w * shape.b <= device.GetInfo().image2d_max_width &&
              shape.h <= device.GetInfo().image2d_max_height &&
@@ -370,6 +378,34 @@ Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
       *result = CLMemory(memory, true);
       return OkStatus();
     }
+    case TensorStorageType::TEXTURE_3D: {
+      cl_image_desc desc;
+      desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+      desc.image_width = shape.w * shape.b;
+      desc.image_height = shape.h;
+      desc.image_depth = depth;
+      desc.image_row_pitch = 0;
+      desc.image_slice_pitch = 0;
+      desc.num_mip_levels = 0;
+      desc.num_samples = 0;
+      desc.buffer = nullptr;
+
+      cl_image_format format;
+      format.image_channel_order = CL_RGBA;
+      format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
+
+      cl_int error_code;
+      cl_mem memory = CreateImage3DLegacy(context.context(), CL_MEM_READ_WRITE,
+                                          &format, &desc, nullptr, &error_code);
+      if (error_code != CL_SUCCESS) {
+        return UnknownError(
+            absl::StrCat("Failed to create Texture3D (clCreateImage)",
+                         CLErrorCodeToString(error_code)));
+      }
+
+      *result = CLMemory(memory, true);
+      return OkStatus();
+    }
     case TensorStorageType::TEXTURE_ARRAY: {
       cl_image_desc desc;
       desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.h b/tensorflow/lite/delegates/gpu/cl/tensor.h
index 7952484d50b..97e455b2e5e 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor.h
+++ b/tensorflow/lite/delegates/gpu/cl/tensor.h
@@ -104,6 +104,7 @@ class Tensor {
       case TensorStorageType::BUFFER:
       case TensorStorageType::IMAGE_BUFFER:
       case TensorStorageType::TEXTURE_ARRAY:
+      case TensorStorageType::TEXTURE_3D:
         return (((d * shape_.h + y) * shape_.w + x) * shape_.b + b) * 4 +
                sub_d;  // DHWBC4
       case TensorStorageType::TEXTURE_2D:
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_test.cc b/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
index 98ba67d45ab..02a29c49203 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
@@ -90,6 +90,16 @@ TEST_F(OpenCLTest, Texture2DF16) {
       TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_2D}, &env_));
 }
 
+TEST_F(OpenCLTest, Texture3DF32) {
+  ASSERT_OK(
+      TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_3D}, &env_));
+}
+
+TEST_F(OpenCLTest, Texture3DF16) {
+  ASSERT_OK(
+      TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_3D}, &env_));
+}
+
 TEST_F(OpenCLTest, TextureArrayF32) {
   ASSERT_OK(TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY},
                         &env_));
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc
index 28b2fe6659f..8ca73c2dec7 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc
@@ -29,6 +29,8 @@ std::string ToString(TensorStorageType type) {
       return "TensorStorageType::TEXTURE_ARRAY";
     case TensorStorageType::TEXTURE_2D:
       return "TensorStorageType::TEXTURE_2D";
+    case TensorStorageType::TEXTURE_3D:
+      return "TensorStorageType::TEXTURE_2D";
     case TensorStorageType::SINGLE_TEXTURE_2D:
       return "TensorStorageType::SINGLE_TEXTURE_2D";
     case TensorStorageType::IMAGE_BUFFER:
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.h b/tensorflow/lite/delegates/gpu/cl/tensor_type.h
index 48e6357f9c2..f576ea88090 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor_type.h
+++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.h
@@ -30,6 +30,7 @@ enum class TensorStorageType {
   BUFFER,
   IMAGE_BUFFER,
   TEXTURE_2D,
+  TEXTURE_3D,
   TEXTURE_ARRAY,
   SINGLE_TEXTURE_2D
 };
diff --git a/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc b/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc
index 4c64bb8309c..4f67e3d4a2d 100644
--- a/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc
+++ b/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc
@@ -123,7 +123,10 @@ Status RunModelSample(const std::string& model_name) {
 
   InferenceContext::CreateInferenceInfo create_info;
   create_info.precision = CalculationsPrecision::F16;
-  create_info.storage_type = TensorStorageType::TEXTURE_2D;
+  create_info.storage_type = GetFastestStorageType(env.device());
+  std::cout << "Precision: " << ToString(create_info.precision) << std::endl;
+  std::cout << "Storage type: " << ToString(create_info.storage_type)
+            << std::endl;
   InferenceContext context;
   RETURN_IF_ERROR(
       context.InitFromGraphWithTransforms(create_info, &graph_cl, &env));
@@ -131,26 +134,27 @@ Status RunModelSample(const std::string& model_name) {
   auto* queue = env.profiling_queue();
   ProfilingInfo profiling_info;
   RETURN_IF_ERROR(context.Profile(queue, &profiling_info));
-  double total_ms_time = 0.0;
-  for (auto dispatch : profiling_info.dispatches) {
-    std::cout << dispatch.label << " - " << dispatch.GetTimeMs() << "ms"
-              << std::endl;
-    total_ms_time += dispatch.GetTimeMs();
-  }
-  std::cout << "Ideal total time - " << total_ms_time << std::endl;
+  std::cout << profiling_info.GetDetailedReport() << std::endl;
+  uint64_t mem_bytes = context.GetSizeOfMemoryAllocatedForIntermediateTensors();
+  std::cout << "Memory for intermediate tensors - "
+            << mem_bytes / 1024.0 / 1024.0 << " MB" << std::endl;
 
-  int runs1000ms = std::max(1, static_cast<int>(1000.0f / total_ms_time));
+  const int num_runs_per_sec = std::max(
+      1, static_cast<int>(1000.0f / absl::ToDoubleMilliseconds(
+                                        profiling_info.GetTotalTime())));
 
-  for (int i = 0; i < 10; ++i) {
+  const int kNumRuns = 10;
+  for (int i = 0; i < kNumRuns; ++i) {
     const auto start = absl::Now();
-    for (int k = 0; k < runs1000ms; ++k) {
+    for (int k = 0; k < num_runs_per_sec; ++k) {
       RETURN_IF_ERROR(context.AddToQueue(env.queue()));
     }
     RETURN_IF_ERROR(env.queue()->WaitForCompletion());
     const auto end = absl::Now();
-    const double time_ms =
+    const double total_time_ms =
         static_cast<double>((end - start) / absl::Nanoseconds(1)) * 1e-6;
-    std::cout << "Total time - " << time_ms / runs1000ms << "ms" << std::endl;
+    const double average_inference_time = total_time_ms / num_runs_per_sec;
+    std::cout << "Total time - " << average_inference_time << "ms" << std::endl;
   }
 
   return OkStatus();
@@ -168,13 +172,13 @@ int main(int argc, char** argv) {
 
   auto load_status = tflite::gpu::cl::LoadOpenCL();
   if (!load_status.ok()) {
-    std::cerr << load_status.error_message();
+    std::cerr << load_status.message();
     return -1;
   }
 
   auto run_status = tflite::gpu::cl::RunModelSample(argv[1]);
   if (!run_status.ok()) {
-    std::cerr << run_status.error_message();
+    std::cerr << run_status.message();
     return -1;
   }
 
diff --git a/tensorflow/lite/delegates/gpu/cl/testing/run_performance_profiling.sh b/tensorflow/lite/delegates/gpu/cl/testing/run_performance_profiling.sh
index e02ce4f6656..0fd2d33de14 100755
--- a/tensorflow/lite/delegates/gpu/cl/testing/run_performance_profiling.sh
+++ b/tensorflow/lite/delegates/gpu/cl/testing/run_performance_profiling.sh
@@ -30,6 +30,7 @@ How to use:
 
 model_path=""
 alias ADB='adb'
+host=""
 
 while [[ "$1" != "" ]]; do
   case $1 in
@@ -39,6 +40,10 @@ while [[ "$1" != "" ]]; do
       ;;
     -d | --device)
       shift
+      if [[ "$1" == "HOST" ]]
+      then
+      host="HOST"
+      fi
       alias ADB='adb -s '$1''
       ;;
     -h | --help)
@@ -57,19 +62,36 @@ exit
 fi
 
 SHELL_DIR=$(dirname "$0")
+BINARY_NAME=performance_profiling
+
+if [[ "$host" == "HOST" ]]
+then
+bazel build -c opt //"$SHELL_DIR":"$BINARY_NAME"
+chmod +x bazel-bin/"$SHELL_DIR"/"$BINARY_NAME"
+./bazel-bin/"$SHELL_DIR"/"$BINARY_NAME" "$model_path"
+exit
+fi
 
 model_name=${model_path##*/}  # finds last token after '/'
 
-declare OPENCL_DIR=/data/local/tmp/profiling_inference/
-declare BINARY_NAME=performance_profiling
+OPENCL_DIR=/data/local/tmp/profiling_inference/
 
 ADB shell mkdir -p $OPENCL_DIR
 
 ADB push "$model_path" "$OPENCL_DIR"
 
-# push executables and data files to device
-# bazel build --config=android_arm -c opt --copt=-fPIE --linkopt=-pie //$SHELL_DIR:$BINARY_NAME  # for 32bit version
-bazel build --config=android_arm64 -c opt //$SHELL_DIR:$BINARY_NAME
+declare -a BUILD_CONFIG
+abi_version=$(ADB shell getprop ro.product.cpu.abi | tr -d '\r')
+if [[ "$abi_version" == "armeabi-v7a" ]]; then
+#"32 bit"
+BUILD_CONFIG=( --config=android_arm -c opt --copt=-fPIE --linkopt=-pie )
+else
+#"64 bit"
+BUILD_CONFIG=( --config=android_arm64 -c opt )
+fi
+
+bazel build "${BUILD_CONFIG[@]}" //$SHELL_DIR:$BINARY_NAME
+
 ADB push bazel-bin/$SHELL_DIR/$BINARY_NAME $OPENCL_DIR
 
 ADB shell chmod +x $OPENCL_DIR/$BINARY_NAME
diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc
index 4e5ee940841..e1397c6a034 100644
--- a/tensorflow/lite/delegates/gpu/common/model_builder.cc
+++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc
@@ -2333,6 +2333,38 @@ class TransformLandmarksOperationParser : public TFLiteOperationParser {
  private:
 };
 
+class Landmarks2TransformMatrixOperationParser : public TFLiteOperationParser {
+ public:
+  Status IsSupported(const TfLiteContext* context,
+                     const TfLiteNode* tflite_node,
+                     const TfLiteRegistration* registration) final {
+    return CheckInputsOutputs(context, tflite_node, /*inputs=*/1,
+                              /*outputs=*/1);
+  }
+
+  Status Parse(const TfLiteNode* tflite_node,
+               const TfLiteRegistration* registration, GraphFloat32* graph,
+               ObjectReader* reader) final {
+    Node* node = graph->NewNode();
+    RETURN_IF_ERROR(reader->AddInput(node, 0));  // landmarks
+    RETURN_IF_ERROR(reader->AddOutputs(node));   // transform matrix
+
+    const std::string op_name = "landmarks_to_transform_matrix";
+    node->operation.type = op_name;
+    BHWC output_shape;
+    RETURN_IF_ERROR(
+        ParseCustomAttributes(op_name, tflite_node->custom_initial_data,
+                              tflite_node->custom_initial_data_size,
+                              &(node->operation.attributes), &output_shape));
+
+    auto output_value = graph->FindOutputs(node->id)[0];
+    output_value->tensor.shape = output_shape;
+    return OkStatus();
+  }
+
+ private:
+};
+
 class UnsupportedOperationParser : public TFLiteOperationParser {
  public:
   Status IsSupported(const TfLiteContext* context,
@@ -2450,6 +2482,10 @@ std::unique_ptr<TFLiteOperationParser> NewOperationParser(
         return absl::make_unique<TransformLandmarksOperationParser>();
       }
 
+      if (custom_name == "Landmarks2TransformMatrix") {
+        return absl::make_unique<Landmarks2TransformMatrixOperationParser>();
+      }
+
       break;
   }
   return absl::make_unique<UnsupportedOperationParser>();
diff --git a/tensorflow/lite/delegates/gpu/common/shape.cc b/tensorflow/lite/delegates/gpu/common/shape.cc
index 3ffc651765e..e0c3650777d 100644
--- a/tensorflow/lite/delegates/gpu/common/shape.cc
+++ b/tensorflow/lite/delegates/gpu/common/shape.cc
@@ -62,6 +62,8 @@ std::string ToString(Axis axis) {
       return "width";
     case Axis::VALUE:
       return "value";
+    case Axis::DEPTH:
+      return "depth";
     case Axis::UNKNOWN:
       return "unknown";
   }
@@ -76,10 +78,14 @@ std::string ToString(Layout layout) {
       return "linear";
     case Layout::HW:
       return "hw";
+    case Layout::HWD:
+      return "hwd";
     case Layout::CHW:
       return "chw";
     case Layout::HWC:
       return "hwc";
+    case Layout::HWDC:
+      return "hwdc";
     case Layout::OHWI:
       return "ohwi";
     case Layout::IHWO:
@@ -90,6 +96,10 @@ std::string ToString(Layout layout) {
       return "iohw";
     case Layout::BHWC:
       return "bhwc";
+    case Layout::BHWDC:
+      return "bhwdc";
+    case Layout::OHWDI:
+      return "ohwi";
     case Layout::UNKNOWN:
       return "unknown";
   }
diff --git a/tensorflow/lite/delegates/gpu/common/shape.h b/tensorflow/lite/delegates/gpu/common/shape.h
index f1fb040ec29..863cecd7877 100644
--- a/tensorflow/lite/delegates/gpu/common/shape.h
+++ b/tensorflow/lite/delegates/gpu/common/shape.h
@@ -40,6 +40,7 @@ enum class Axis {
   WIDTH = 5,
   BATCH = 6,
   VALUE = 7,
+  DEPTH = 8,
 };
 
 std::string ToString(Axis t);
@@ -57,6 +58,10 @@ enum class Layout {
   IHWO = 8,
   IOHW = 9,
   BHWC = 10,
+  HWDC = 11,
+  BHWDC = 12,
+  HWD = 13,
+  OHWDI = 14,
 };
 
 std::string ToString(Layout l);
@@ -176,11 +181,14 @@ struct StrongShape;
 using Scalar = StrongShape<Layout::SCALAR>;
 using Linear = StrongShape<Layout::LINEAR>;
 using HW = StrongShape<Layout::HW>;
+using HWD = StrongShape<Layout::HWD>;
 
 // Common tensor shape for CNN models working with images.
 using CHW = StrongShape<Layout::CHW>;
 using HWC = StrongShape<Layout::HWC>;
+using HWDC = StrongShape<Layout::HWDC>;
 using BHWC = StrongShape<Layout::BHWC>;
+using BHWDC = StrongShape<Layout::BHWDC>;
 
 // Tensor shape used in convolution_2d weights.
 using OIHW = StrongShape<Layout::OIHW>;
@@ -188,6 +196,9 @@ using OHWI = StrongShape<Layout::OHWI>;
 using IHWO = StrongShape<Layout::IHWO>;
 using IOHW = StrongShape<Layout::IOHW>;
 
+// Tensor shape used in convolution_3d weights.
+using OHWDI = StrongShape<Layout::OHWDI>;
+
 // -----------------------------------------------------------------------------
 // Everything below are internal implementation details.
 // -----------------------------------------------------------------------------
@@ -218,6 +229,7 @@ TFLITE_GPU_AXIS_TRAITS(INPUT_CHANNELS, i);
 TFLITE_GPU_AXIS_TRAITS(OUTPUT_CHANNELS, o);
 TFLITE_GPU_AXIS_TRAITS(BATCH, b);
 TFLITE_GPU_AXIS_TRAITS(VALUE, v);
+TFLITE_GPU_AXIS_TRAITS(DEPTH, d);
 
 #undef TFLITE_GPU_AXIS_TRAITS
 
@@ -326,6 +338,7 @@ struct LayoutTraits;
   }
 
 TFLITE_GPU_LAYOUT_TRAITS(HW, Axis::HEIGHT, Axis::WIDTH);
+TFLITE_GPU_LAYOUT_TRAITS(HWD, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH);
 TFLITE_GPU_LAYOUT_TRAITS(OHWI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
                          Axis::INPUT_CHANNELS);
 TFLITE_GPU_LAYOUT_TRAITS(OIHW, Axis::OUTPUT_CHANNELS, Axis::INPUT_CHANNELS,
@@ -336,10 +349,16 @@ TFLITE_GPU_LAYOUT_TRAITS(IHWO, Axis::INPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
                          Axis::OUTPUT_CHANNELS);
 TFLITE_GPU_LAYOUT_TRAITS(CHW, Axis::CHANNELS, Axis::HEIGHT, Axis::WIDTH);
 TFLITE_GPU_LAYOUT_TRAITS(HWC, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS);
+TFLITE_GPU_LAYOUT_TRAITS(HWDC, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH,
+                         Axis::CHANNELS);
 TFLITE_GPU_LAYOUT_TRAITS(LINEAR, Axis::VALUE);
 TFLITE_GPU_LAYOUT_TRAITS(SCALAR, Axis::VALUE);
 TFLITE_GPU_LAYOUT_TRAITS(BHWC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH,
                          Axis::CHANNELS);
+TFLITE_GPU_LAYOUT_TRAITS(BHWDC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH,
+                         Axis::DEPTH, Axis::CHANNELS);
+TFLITE_GPU_LAYOUT_TRAITS(OHWDI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT,
+                         Axis::WIDTH, Axis::DEPTH, Axis::INPUT_CHANNELS);
 
 #undef TFLITE_GPU_LAYOUT_TRAITS
 
@@ -559,8 +578,12 @@ auto DispatchByLayout(Layout type, F f)
   switch (type) {
     case Layout::HW:
       return f.template operator()<Layout::HW>();
+    case Layout::HWD:
+      return f.template operator()<Layout::HWD>();
     case Layout::HWC:
       return f.template operator()<Layout::HWC>();
+    case Layout::HWDC:
+      return f.template operator()<Layout::HWDC>();
     case Layout::CHW:
       return f.template operator()<Layout::CHW>();
     case Layout::OIHW:
@@ -577,6 +600,10 @@ auto DispatchByLayout(Layout type, F f)
       return f.template operator()<Layout::SCALAR>();
     case Layout::BHWC:
       return f.template operator()<Layout::BHWC>();
+    case Layout::BHWDC:
+      return f.template operator()<Layout::BHWDC>();
+    case Layout::OHWDI:
+      return f.template operator()<Layout::OHWDI>();
     case Layout::UNKNOWN:
       return f.template operator()<Layout::UNKNOWN>();
   }
diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD
index 4bf443195df..e291eba1b56 100644
--- a/tensorflow/lite/delegates/gpu/metal/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/BUILD
@@ -50,7 +50,6 @@ objc_library(
     copts = DEFAULT_COPTS,
     sdk_frameworks = [
         "Metal",
-        "UIKit",
     ],
     deps = [
         "//tensorflow/lite/delegates/gpu/common:status",
@@ -155,7 +154,6 @@ objc_library(
     copts = DEFAULT_COPTS,
     sdk_frameworks = [
         "Metal",
-        "UIKit",
     ],
     deps = [
         ":common",
diff --git a/tensorflow/lite/delegates/gpu/metal/environment.mm b/tensorflow/lite/delegates/gpu/metal/environment.mm
index 3bc3b544284..27c51100897 100644
--- a/tensorflow/lite/delegates/gpu/metal/environment.mm
+++ b/tensorflow/lite/delegates/gpu/metal/environment.mm
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/metal/environment.h"
 
 #import <Metal/Metal.h>
-#import <UIKit/UIKit.h>
 
 #include <unordered_map>
 #include <utility>
@@ -64,6 +63,17 @@ GpuType GetGpuType() {
       max_feature_set = std::max(max_feature_set, type.second);
     }
   }
+#elif defined(__MAC_10_5) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_5
+  std::vector<std::pair<MTLFeatureSet, int>> features;
+  if (@available(macOS 10.15, *)) {
+    features.emplace_back(MTLFeatureSet_macOS_GPUFamily2_v1, 12);
+  }
+  id<MTLDevice> device = GetBestSupportedMetalDevice();
+  for (auto &type : features) {
+    if ([device supportsFeatureSet:type.first]) {
+      max_feature_set = std::max(max_feature_set, type.second);
+    }
+  }
 #endif
   switch (max_feature_set) {
     case 7:
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc b/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc
index 2f365b56f97..6fe1a17d6d2 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc
@@ -329,7 +329,8 @@ std::vector<ComputeTaskDescriptorPtr> ConcatY(
        [output_id](const std::map<ValueId, BHWC>& buffers) {
          const auto& dimension = buffers.find(output_id)->second;
          std::vector<int> uniform_params{dimension.w, dimension.h,
-                                         IntegralDivideRoundUp(dimension.c, 4)};
+                                         IntegralDivideRoundUp(dimension.c, 4),
+                                         /*padding=*/0};
          return GetByteBuffer(uniform_params);
        }},
   };
diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.h b/tensorflow/lite/delegates/gpu/metal_delegate.h
index 6f8767ddb3c..032c92c486d 100644
--- a/tensorflow/lite/delegates/gpu/metal_delegate.h
+++ b/tensorflow/lite/delegates/gpu/metal_delegate.h
@@ -16,6 +16,20 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_
 
+#ifdef SWIG
+#define TFL_CAPI_EXPORT
+#else
+#if defined(_WIN32)
+#ifdef TFL_COMPILE_LIBRARY
+#define TFL_CAPI_EXPORT __declspec(dllexport)
+#else
+#define TFL_CAPI_EXPORT __declspec(dllimport)
+#endif  // TFL_COMPILE_LIBRARY
+#else
+#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
+#endif  // _WIN32
+#endif  // SWIG
+
 #ifdef __cplusplus
 extern "C" {
 #else
@@ -51,10 +65,11 @@ typedef struct {
 // When `options` is set to `nullptr`, the following default values are used:
 // .precision_loss_allowed = false,
 // .wait_type = kPassive,
-TfLiteDelegate* TFLGpuDelegateCreate(const TFLGpuDelegateOptions* options);
+TFL_CAPI_EXPORT extern TfLiteDelegate* TFLGpuDelegateCreate(
+    const TFLGpuDelegateOptions* options);
 
 // Destroys a delegate created with `TFLGpuDelegateCreate` call.
-void TFLGpuDelegateDelete(TfLiteDelegate* delegate);
+TFL_CAPI_EXPORT extern void TFLGpuDelegateDelete(TfLiteDelegate* delegate);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java b/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
index 5e1e8960f40..91299d7707f 100644
--- a/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
+++ b/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
@@ -25,8 +25,79 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
 
   private long delegateHandle;
 
+  /** Delegate options. */
+  public static final class Options {
+    public Options() {}
+
+    /**
+     * undefined, specifies default behavior. so far, the default setting of NNAPI is
+     * EXECUTION_PREFERENCE_FAST_SINGLE_ANSWER
+     */
+    public static final int EXECUTION_PREFERENCE_UNDEFINED = -1;
+
+    /**
+     * Prefer executing in a way that minimizes battery drain. This is desirable for compilations
+     * that will be executed often.
+     */
+    public static final int EXECUTION_PREFERENCE_LOW_POWER = 0;
+
+    /**
+     * Prefer returning a single answer as fast as possible, even if this causes more power
+     * consumption.
+     */
+    public static final int EXECUTION_PREFERENCE_FAST_SINGLE_ANSWER = 1;
+
+    /**
+     * Prefer maximizing the throughput of successive frames, for example when processing successive
+     * frames coming from the camera.
+     */
+    public static final int EXECUTION_PREFERENCE_SUSTAINED_SPEED = 2;
+
+    /**
+     * Sets the inference preference for precision/compilation/runtime tradeoffs.
+     *
+     * @param preference One of EXECUTION_PREFERENCE_LOW_POWER,
+     *     EXECUTION_PREFERENCE_FAST_SINGLE_ANSWER, and EXECUTION_PREFERENCE_SUSTAINED_SPEED.
+     */
+    public Options setExecutionPreference(int preference) {
+      this.executionPreference = preference;
+      return this;
+    }
+
+    public Options setAcceleratorName(String name) {
+      this.accelerator_name = name;
+      return this;
+    }
+
+    public Options setCacheDir(String name) {
+      this.cache_dir = name;
+      return this;
+    }
+
+    public Options setModelToken(String name) {
+      this.model_token = name;
+      return this;
+    }
+
+    int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
+    String accelerator_name = null;
+    String cache_dir = null;
+    String model_token = null;
+  }
+
+  public NnApiDelegate(Options options) {
+    // Ensure the native TensorFlow Lite libraries are available.
+    TensorFlowLite.init();
+    delegateHandle =
+        createDelegate(
+            options.executionPreference,
+            options.accelerator_name,
+            options.cache_dir,
+            options.model_token);
+  }
+
   public NnApiDelegate() {
-    delegateHandle = createDelegate();
+    this(new Options());
   }
 
   @Override
@@ -35,19 +106,20 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
   }
 
   /**
-   * The NNAPI delegate is singleton. Nothing to delete for now, so mark the handle invalid only.
+   * Frees TFLite resources in C runtime.
+   *
+   * <p>User is expected to call this method explicitly.
    */
   @Override
   public void close() {
     if (delegateHandle != INVALID_DELEGATE_HANDLE) {
+      deleteDelegate(delegateHandle);
       delegateHandle = INVALID_DELEGATE_HANDLE;
     }
   }
 
-  private static native long createDelegate();
+  private static native long createDelegate(
+      int preference, String device_name, String cache_dir, String model_token);
 
-  static {
-    // Ensure the native TensorFlow Lite libraries are available.
-    TensorFlowLite.init();
-  }
+  private static native void deleteDelegate(long delegateHandle);
 }
diff --git a/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc b/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc
index d68ff5efac1..65d39b0a1de 100644
--- a/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc
+++ b/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc
@@ -21,10 +21,47 @@ limitations under the License.
 extern "C" {
 #endif  // __cplusplus
 
+using namespace tflite;
+
 JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(JNIEnv* env,
-                                                            jclass clazz) {
-  return reinterpret_cast<jlong>(tflite::NnApiDelegate());
+Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
+    JNIEnv* env, jclass clazz, jint preference, jstring accelerator_name,
+    jstring cache_dir, jstring model_token) {
+  StatefulNnApiDelegate::Options options = StatefulNnApiDelegate::Options();
+  options.execution_preference =
+      (StatefulNnApiDelegate::Options::ExecutionPreference)preference;
+  if (accelerator_name) {
+    options.accelerator_name = env->GetStringUTFChars(accelerator_name, NULL);
+  }
+  if (cache_dir) {
+    options.cache_dir = env->GetStringUTFChars(cache_dir, NULL);
+  }
+  if (model_token) {
+    options.model_token = env->GetStringUTFChars(model_token, NULL);
+  }
+
+  auto delegate = new StatefulNnApiDelegate(options);
+
+  if (options.accelerator_name) {
+    env->ReleaseStringUTFChars(accelerator_name, options.accelerator_name);
+  }
+
+  if (options.cache_dir) {
+    env->ReleaseStringUTFChars(cache_dir, options.accelerator_name);
+  }
+
+  if (options.model_token) {
+    env->ReleaseStringUTFChars(model_token, options.accelerator_name);
+  }
+
+  return reinterpret_cast<jlong>(delegate);
+}
+
+JNIEXPORT void JNICALL
+Java_org_tensorflow_lite_nnapi_NnApiDelegate_deleteDelegate(JNIEnv* env,
+                                                            jclass clazz,
+                                                            jlong delegate) {
+  delete reinterpret_cast<TfLiteDelegate*>(delegate);
 }
 
 #ifdef __cplusplus
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index 1bb27baf7d4..08763dd55c3 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -2185,27 +2185,18 @@ TfLiteStatus NNAPIDelegateKernel::Map(
     case kTfLiteBuiltinConv2d: {
       auto builtin =
           reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
+      mapping_args.builder->AddScalarInt32Operand(builtin->padding);
+      mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
+      mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
+      mapping_args.builder->AddScalarInt32Operand(builtin->activation);
       // NNAPI supports dilated Conv2D since NNAPI 1.2.
       if (builtin->dilation_width_factor != 1 ||
           builtin->dilation_height_factor != 1) {
-        auto builtin = reinterpret_cast<TfLiteConvParams*>(
-            mapping_args.node->builtin_data);
-        mapping_args.builder->AddScalarInt32Operand(builtin->padding);
-        mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
-        mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
-        mapping_args.builder->AddScalarInt32Operand(builtin->activation);
         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
         mapping_args.builder->AddScalarInt32Operand(
             builtin->dilation_width_factor);
         mapping_args.builder->AddScalarInt32Operand(
             builtin->dilation_height_factor);
-      } else {
-        auto builtin = reinterpret_cast<TfLiteConvParams*>(
-            mapping_args.node->builtin_data);
-        mapping_args.builder->AddScalarInt32Operand(builtin->padding);
-        mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
-        mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
-        mapping_args.builder->AddScalarInt32Operand(builtin->activation);
       }
       *nn_op_type = ANEURALNETWORKS_CONV_2D;
     } break;
@@ -2437,8 +2428,10 @@ TfLiteStatus NNAPIDelegateKernel::Map(
     case kTfLiteBuiltinTransposeConv: {
       const bool hybrid_op = IsHybridOperator(
           mapping_args.context, kTfLiteBuiltinTransposeConv, mapping_args.node);
-      mapping_args.builder->AddTensorInput(/*kDataInputTensor*/ 2, hybrid_op);
-      mapping_args.builder->AddTensorInput(/*kWeightsTensor*/ 1, hybrid_op);
+      mapping_args.builder->AddTensorInput(
+          mapping_args.node->inputs->data[/*kDataInputTensor*/ 2], hybrid_op);
+      mapping_args.builder->AddTensorInput(
+          mapping_args.node->inputs->data[/*kWeightsTensor*/ 1], hybrid_op);
 
       // NNAPI requires a bias tensor, so we allocate a new tensor to fill
       // it with zeroes. It is deleted with other tensors in the context
@@ -2494,7 +2487,8 @@ TfLiteStatus NNAPIDelegateKernel::Map(
             /*zero_point=*/0);
       }
 
-      mapping_args.builder->AddTensorInput(/*kOutputShapeTensor*/ 0, hybrid_op);
+      mapping_args.builder->AddTensorInput(
+          mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
 
       auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
           mapping_args.node->builtin_data);
diff --git a/tensorflow/lite/examples/experimental_new_converter/stack_trace_example.py b/tensorflow/lite/examples/experimental_new_converter/stack_trace_example.py
index b5ac33afa61..f0940db79e0 100644
--- a/tensorflow/lite/examples/experimental_new_converter/stack_trace_example.py
+++ b/tensorflow/lite/examples/experimental_new_converter/stack_trace_example.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import sys
+
 from absl import app
 
 import tensorflow as tf # TF2
diff --git a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
index e92a4d9ab1c..665131195e4 100644
--- a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
+++ b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
@@ -22,6 +22,7 @@
 #include <fstream>
 #include <iostream>
 #include <queue>
+#include <vector>
 
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
@@ -357,11 +358,11 @@ void ProcessInputWithQuantizedModel(
     uint8_t* quantized_output = interpreter->typed_output_tensor<uint8_t>(0);
     int32_t zero_point = input_tensor->params.zero_point;
     float scale = input_tensor->params.scale;
-    float output[output_size];
+    std::vector<float> output(output_size);
     for (int i = 0; i < output_size; ++i) {
       output[i] = (quantized_output[i] - zero_point) * scale;
     }
-    GetTopN(output, output_size, kNumResults, kThreshold, &top_results);
+    GetTopN(output.data(), output_size, kNumResults, kThreshold, &top_results);
   } else {
     float* output = interpreter->typed_output_tensor<float>(0);
     GetTopN(output, output_size, kNumResults, kThreshold, &top_results);
diff --git a/tensorflow/lite/examples/python/label_image.py b/tensorflow/lite/examples/python/label_image.py
index 6c753389831..2ef1aa14fb2 100644
--- a/tensorflow/lite/examples/python/label_image.py
+++ b/tensorflow/lite/examples/python/label_image.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+
 import numpy as np
 
 from PIL import Image
diff --git a/tensorflow/lite/experimental/delegates/hexagon/java/src/main/java/org/tensorflow/lite/experimental/HexagonDelegate.java b/tensorflow/lite/experimental/delegates/hexagon/java/src/main/java/org/tensorflow/lite/experimental/HexagonDelegate.java
index ac335884cd4..2139a7f445a 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/java/src/main/java/org/tensorflow/lite/experimental/HexagonDelegate.java
+++ b/tensorflow/lite/experimental/delegates/hexagon/java/src/main/java/org/tensorflow/lite/experimental/HexagonDelegate.java
@@ -23,6 +23,7 @@ public class HexagonDelegate implements Delegate, Closeable {
 
   private static final long INVALID_DELEGATE_HANDLE = 0;
   private static final String TFLITE_HEXAGON_LIB = "tensorflowlite_hexagon_jni";
+  private static volatile boolean nativeLibraryLoaded = false;
 
   private long delegateHandle;
 
@@ -32,6 +33,7 @@ public class HexagonDelegate implements Delegate, Closeable {
    * on this device.
    */
   public HexagonDelegate(Context context) throws UnsupportedOperationException {
+    ensureNativeLibraryLoaded();
     setAdspLibraryPath(context.getApplicationInfo().nativeLibraryDir);
     delegateHandle = createDelegate();
     if (delegateHandle == INVALID_DELEGATE_HANDLE) {
@@ -57,8 +59,16 @@ public class HexagonDelegate implements Delegate, Closeable {
     }
   }
 
-  static {
-    System.loadLibrary(TFLITE_HEXAGON_LIB);
+  private static void ensureNativeLibraryLoaded() {
+    if (nativeLibraryLoaded) {
+      return;
+    }
+    try {
+      System.loadLibrary(TFLITE_HEXAGON_LIB);
+      nativeLibraryLoaded = true;
+    } catch (Exception e) {
+      throw new UnsupportedOperationException("Failed to load native Hexagon shared library: " + e);
+    }
   }
 
   private static native long createDelegate();
diff --git a/tensorflow/lite/experimental/delegates/testdata/BUILD b/tensorflow/lite/experimental/delegates/testdata/BUILD
new file mode 100644
index 00000000000..1935dfce190
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/testdata/BUILD
@@ -0,0 +1,3 @@
+licenses(["notice"])
+
+exports_files(glob(["*.tflite"]))
diff --git a/tensorflow/lite/experimental/delegates/testdata/README.txt b/tensorflow/lite/experimental/delegates/testdata/README.txt
new file mode 100644
index 00000000000..b10966d1150
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/testdata/README.txt
@@ -0,0 +1 @@
+posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite: downloaded from https://storage.googleapis.com/download.tensorflow.org/models/tflite/posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite
diff --git a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py
index d4b5e2b663a..8c48ef6cec6 100644
--- a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_lstm_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import tempfile
+
 import numpy as np
 from six.moves import range
 import tensorflow as tf
@@ -236,8 +237,8 @@ class BidirectionalSequenceLstmTest(test_util.TensorFlowTestCase):
     """
     converter = tf.lite.TFLiteConverter.from_session(sess, [input_tensor],
                                                      [output_tensor])
-    tflite = converter.convert()
     converter.experimental_new_converter = use_mlir_converter
+    tflite = converter.convert()
 
     interpreter = tf.lite.Interpreter(model_content=tflite)
 
diff --git a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py
index b90d4d52b29..49b0b8c85f2 100644
--- a/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/bidirectional_sequence_rnn_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import tempfile
+
 import numpy as np
 from six.moves import range
 import tensorflow as tf
diff --git a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
index ba936a4e8cd..48e434a9591 100644
--- a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import tempfile
+
 import numpy as np
 from six.moves import range
 import tensorflow as tf
@@ -220,8 +221,8 @@ class UnidirectionalSequenceLstmTest(test_util.TensorFlowTestCase):
     """
     converter = tf.lite.TFLiteConverter.from_session(sess, [input_tensor],
                                                      [output_tensor])
-    tflite = converter.convert()
     converter.experimental_new_converter = use_mlir_converter
+    tflite = converter.convert()
 
     interpreter = tf.lite.Interpreter(model_content=tflite)
 
diff --git a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py
index 49c3d5e7757..47799b705a3 100644
--- a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_rnn_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import tempfile
+
 import numpy as np
 from six.moves import range
 import tensorflow as tf
@@ -215,8 +216,8 @@ class UnidirectionalSequenceRnnTest(test_util.TensorFlowTestCase):
     """
     converter = tf.lite.TFLiteConverter.from_session(sess, [input_tensor],
                                                      [output_tensor])
-    tflite = converter.convert()
     converter.experimental_new_converter = use_mlir_converter
+    tflite = converter.convert()
 
     interpreter = tf.lite.Interpreter(model_content=tflite)
     interpreter.allocate_tensors()
diff --git a/tensorflow/lite/experimental/microfrontend/lib/window_util.c b/tensorflow/lite/experimental/microfrontend/lib/window_util.c
index 3e544f5dd38..eee6e7b56ef 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/window_util.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/window_util.c
@@ -14,8 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
 
-// This macro is required to make MSVC defines math constants in math.h
-#define _USE_MATH_DEFINES
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm
index 8ef4c571558..e0cca1076f6 100644
--- a/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm
+++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm
@@ -14,6 +14,8 @@
 
 #import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h"
 
+#include <vector>
+
 #import "TFLErrorUtil.h"
 #import "TFLQuantizationParameters+Internal.h"
 #import "TFLTensor+Internal.h"
@@ -168,7 +170,7 @@ static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_
     return NO;
   }
 
-  int cDimensions[self.inputTensorCount];
+  std::vector<int> cDimensions(self.inputTensorCount);
   for (int dimIndex = 0; dimIndex < shape.count; ++dimIndex) {
     int dimension = shape[dimIndex].intValue;
     if (dimension <= 0) {
@@ -181,7 +183,7 @@ static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_
     cDimensions[dimIndex] = dimension;
   }
 
-  if (TfLiteInterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions,
+  if (TfLiteInterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions.data(),
                                        (int32_t)shape.count) != kTfLiteOk) {
     NSString *errorDescription = [NSString
         stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index];
diff --git a/tensorflow/lite/experimental/ruy/dispatch.h b/tensorflow/lite/experimental/ruy/dispatch.h
index 0aaaccafb2e..de5f3c3e9b4 100644
--- a/tensorflow/lite/experimental/ruy/dispatch.h
+++ b/tensorflow/lite/experimental/ruy/dispatch.h
@@ -382,13 +382,15 @@ struct CompileTimeEnabledReferenceMul</*ReferenceMulIsEnabled=*/false> {
   }
 };
 
-inline void HandlePrepackedCaching(TrMulParams* params, Context* context) {
+inline void HandlePrepackedCaching(TrMulParams* params,
+                                   const SidePair<bool>& cacheable,
+                                   Context* context) {
   if (context->cache_policy == CachePolicy::kNoCache) {
     return;
   }
 
   if (context->cache_policy == CachePolicy::kCacheLHSOnGemV) {
-    if (params->dst.layout.cols != 1) {
+    if (!cacheable[Side::kLhs] || params->dst.layout.cols != 1) {
       return;
     }
     PrepackedCache* prepacked_cache = context->GetPrepackedCache();
@@ -465,7 +467,8 @@ void DispatchMul(const Matrix<LhsScalar>& lhs, const Matrix<RhsScalar>& rhs,
   TrMulParams params;
   CreateTrMulParams<TrMulCompiledPaths>(transposed_lhs, rhs, spec, context, dst,
                                         the_path, &params);
-  HandlePrepackedCaching(&params, context);
+  SidePair<bool> cacheable(lhs.cacheable, rhs.cacheable);
+  HandlePrepackedCaching(&params, cacheable, context);
   TrMul(&params, context);
 }
 
diff --git a/tensorflow/lite/experimental/ruy/kernel_avx2.cc b/tensorflow/lite/experimental/ruy/kernel_avx2.cc
index fdf94b6ba53..dfc0b1f55bc 100644
--- a/tensorflow/lite/experimental/ruy/kernel_avx2.cc
+++ b/tensorflow/lite/experimental/ruy/kernel_avx2.cc
@@ -35,11 +35,21 @@ void Kernel8bitAvx2(const KernelParams8bit<8, 8>& params) {
   RUY_DCHECK(false);
 }
 
+void Kernel8bitAvx2SingleCol(const KernelParams8bit<8, 8>& params) {
+  // CPU-ID-based checks should disable the path that would reach this point.
+  RUY_DCHECK(false);
+}
+
 void KernelFloatAvx2(const KernelParamsFloat<8, 8>& params) {
   // CPU-ID-based checks should disable the path that would reach this point.
   RUY_DCHECK(false);
 }
 
+void KernelFloatAvx2SingleCol(const KernelParamsFloat<8, 8>& params) {
+  // CPU-ID-based checks should disable the path that would reach this point.
+  RUY_DCHECK(false);
+}
+
 #else  // RUY_PLATFORM(AVX2) && RUY_OPT_ENABLED(RUY_OPT_ASM)
 
 static constexpr int kAvx8bitBlockSize = 8;
@@ -346,6 +356,7 @@ inline void mm256_n_storeu_ps(float* dst, int residual_rows, const __m256 v) {
 
 void Kernel8bitAvx2(const KernelParams8bit<8, 8>& params) {
   gemmlowp::ScopedProfilingLabel label("Kernel kAvx2 8-bit");
+
   const std::int8_t splitter_idx_data[32] = {
       0, 1, 4, 5, 8,  9,  12, 13,  //
       2, 3, 6, 7, 10, 11, 14, 15,  //
@@ -1137,6 +1148,272 @@ void Kernel8bitAvx2(const KernelParams8bit<8, 8>& params) {
   }  // End col-block loop.
 }  // NOLINT(readability/fn_size)
 
+void Kernel8bitAvx2SingleCol(const KernelParams8bit<8, 8>& params) {
+  gemmlowp::ScopedProfilingLabel label("Kernel kAvx2 8-bit GEMV");
+
+  RUY_DCHECK_EQ(params.dst_cols, 1);
+  RUY_DCHECK_EQ(params.last_col, 0);
+  RUY_DCHECK_EQ(params.start_col, 0);
+
+  const std::int8_t splitter_idx_data[32] = {
+      0, 1, 4, 5, 8,  9,  12, 13,  //
+      2, 3, 6, 7, 10, 11, 14, 15,  //
+      0, 1, 4, 5, 8,  9,  12, 13,  //
+      2, 3, 6, 7, 10, 11, 14, 15   //
+  };
+
+  int bias_ptr_block_increment =
+      params.flags & RUY_ASM_FLAG_HAS_BIAS ? kAvx8bitBlockSize : 0;
+
+  const std::int8_t* rhs_col_ptr = params.rhs_base_ptr;
+  void* dst_col_ptr = params.dst_base_ptr;
+  const std::int32_t* bias_col_ptr = params.bias;
+  if (params.flags & RUY_ASM_FLAG_HAS_BIAS) {
+    bias_col_ptr += params.start_row;
+  }
+
+  const std::int8_t* lhs_col_ptr = params.lhs_base_ptr;
+  void* dst_ptr = dst_col_ptr;
+  const std::int32_t* bias_ptr = bias_col_ptr;
+
+  const std::int32_t lhs_zero_point = params.lhs_zero_point;
+  const bool has_rhs_sums_offsets =
+      (params.flags & RUY_ASM_FLAG_HAS_RHS_SUMS) && lhs_zero_point;
+  std::int32_t rhs_sums_offsets[8];
+  if (has_rhs_sums_offsets) {
+    const __m256i rhs_sums_offset_v = _mm256_mullo_epi32(
+        _mm256_set1_epi32(lhs_zero_point),
+        _mm256_loadu_si256(
+            reinterpret_cast<__m256i const*>(&params.rhs_sums[0])));
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(rhs_sums_offsets),
+                        rhs_sums_offset_v);
+  }
+
+  for (int row = params.start_row; row <= params.last_row;
+       row += kAvx8bitBlockSize) {
+    const int residual_rows =
+        std::min(params.dst_rows - row, kAvx8bitBlockSize);
+
+    const __m256i splitter_idx =
+        _mm256_loadu_si256(reinterpret_cast<__m256i const*>(splitter_idx_data));
+
+    __m256i accum_data_v0;
+
+    // Initialize with bias.
+    __m256i initial_accum_data =
+        intrin_utils::mm256_n_loadu_epi32(residual_rows, bias_ptr);
+    bias_ptr += bias_ptr_block_increment;
+
+    // Adjustments common across columns.
+    const std::int32_t rhs_zero_point = params.rhs_zero_point;
+    if ((params.flags & RUY_ASM_FLAG_HAS_LHS_SUMS) && rhs_zero_point) {
+      const __m256i lhs_sums_offset = _mm256_mullo_epi32(
+          _mm256_set1_epi32(rhs_zero_point),
+          _mm256_loadu_si256(
+              reinterpret_cast<__m256i const*>(&params.lhs_sums[row])));
+      initial_accum_data =
+          _mm256_sub_epi32(initial_accum_data, lhs_sums_offset);
+    }
+    const std::int32_t prod_zp_depth = params.prod_zp_depth;
+    if (prod_zp_depth) {
+      initial_accum_data = _mm256_add_epi32(initial_accum_data,
+                                            _mm256_set1_epi32(prod_zp_depth));
+    }
+
+    // Adjustments differing across columns.
+    if (has_rhs_sums_offsets) {
+      accum_data_v0 = _mm256_sub_epi32(initial_accum_data,
+                                       _mm256_set1_epi32(rhs_sums_offsets[0]));
+    } else {
+      accum_data_v0 = initial_accum_data;
+    }
+
+    const std::int8_t* lhs_ptr = lhs_col_ptr;
+    const std::int8_t* rhs_ptr = rhs_col_ptr;
+    for (int d = 0; d < params.depth; d += kAvx8bitInnerSize) {
+      const __m256i lhs_data =
+          _mm256_load_si256(reinterpret_cast<const __m256i*>(lhs_ptr));
+      const __m128i rhs_data_8bit = _mm_loadu_si32(rhs_ptr);
+
+      // Each "int32" is two 16-bit RHS values, sign extended from 8-bit.
+      // For simplicity we load 4x the data that we need and process twice the
+      // data  that we need  and store only the data we need.
+      std::int32_t rhs_data[2];
+      const __m128i rhs_16_bit_dup = _mm_cvtepi8_epi16(rhs_data_8bit);
+      // Now that we have cast the RHS data, we store it so that each value
+      // can be separately loaded in the accumulation loop.
+      _mm_storeu_si64(reinterpret_cast<__m128i*>(rhs_data), rhs_16_bit_dup);
+
+      const __m256i lhs_data_split =
+          _mm256_shuffle_epi8(lhs_data, splitter_idx);
+      const __m256i lhs_data_split_expand_bottom =
+          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(lhs_data_split, 0));
+      const __m256i lhs_data_split_expand_top =
+          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(lhs_data_split, 1));
+
+      // Take bytes 0, 1, 4, 5, 8, 9, ... expanded to 16-bit.
+      const __m256i lhs_16_bit_low = _mm256_permute2x128_si256(
+          lhs_data_split_expand_bottom, lhs_data_split_expand_top, 0x20);
+      // Take bytes 2, 3, 6, 7, 10, 11, ... expanded to 16-bit.
+      const __m256i lhs_16_bit_high = _mm256_permute2x128_si256(
+          lhs_data_split_expand_bottom, lhs_data_split_expand_top, 0x31);
+      // Accumulate for column 0.
+      const std::int32_t low_rhs_value = rhs_data[0];
+      const std::int32_t high_rhs_value = rhs_data[1];
+
+      const __m256i rhs_16_bit_dup_low = _mm256_set1_epi32(low_rhs_value);
+      const __m256i rhs_16_bit_dup_high = _mm256_set1_epi32(high_rhs_value);
+
+      accum_data_v0 = _mm256_add_epi32(
+          accum_data_v0, _mm256_madd_epi16(lhs_16_bit_low, rhs_16_bit_dup_low));
+      accum_data_v0 = _mm256_add_epi32(
+          accum_data_v0,
+          _mm256_madd_epi16(lhs_16_bit_high, rhs_16_bit_dup_high));
+
+      lhs_ptr += kAvx8bitBlockSize * kAvx8bitInnerSize;
+      rhs_ptr += kAvx8bitBlockSize * kAvx8bitInnerSize;
+    }
+
+    if (params.dst_type_id != DstTypeId<std::int32_t>::kValue) {
+      __m256i m_vector;
+      __m256i e_vector;
+      // Does not make use of RUY_ASM_FLAG_NEEDS_LEFT_SHIFT.
+      if (params.flags & RUY_ASM_FLAG_HAS_PERCHANNEL) {
+        m_vector = intrin_utils::mm256_n_loadu_epi32(
+            residual_rows, &params.multiplier_fixedpoint[row]);
+        e_vector = intrin_utils::mm256_n_loadu_epi32(
+            residual_rows, &params.multiplier_exponent[row]);
+      } else {
+        // These arrays have size LhsCols, and are pre-filled.
+        m_vector = _mm256_set1_epi32(params.multiplier_fixedpoint[0]);
+        e_vector = _mm256_set1_epi32(params.multiplier_exponent[0]);
+      }
+
+      const __m256i m_64bit_low =
+          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(m_vector, 0));
+      const __m256i m_64bit_high =
+          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(m_vector, 1));
+
+      const __m256i zero_vector = _mm256_setzero_si256();
+      const __m256i left_shift = _mm256_max_epi32(e_vector, zero_vector);
+      const __m256i neg_e_vector = _mm256_sub_epi32(zero_vector, e_vector);
+      const __m256i right_shift = _mm256_max_epi32(neg_e_vector, zero_vector);
+      const __m256i final_right_shift =
+          _mm256_add_epi32(right_shift, _mm256_set1_epi32(31));
+      const __m256i final_right_shift_low =
+          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(final_right_shift, 0));
+      const __m256i final_right_shift_high =
+          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(final_right_shift, 1));
+      // Really we want 0x100000000, but use half to avoid overflowing.
+      const __m256i convert_to_signed_halved =
+          _mm256_srlv_epi32(_mm256_set1_epi32(0x80000000), right_shift);
+      const __m256i convert_to_unsigned_64 =
+          _mm256_set1_epi64x(0x8000000000000000);
+
+      __m256i post_scaling_offset =
+          _mm256_add_epi32(convert_to_signed_halved, convert_to_signed_halved);
+
+      const __m256i offset_vector =
+          _mm256_slli_epi64(_mm256_set1_epi64x(1), 30);
+      // Really these should be shifted by neg_e_vector, but tests pass when
+      // using right_shift.
+      const __m256i offset_vector_low = _mm256_add_epi64(
+          _mm256_sllv_epi64(
+              offset_vector,
+              _mm256_cvtepi32_epi64(_mm256_extracti128_si256(right_shift, 0))),
+          convert_to_unsigned_64);
+      const __m256i offset_vector_high = _mm256_add_epi64(
+          _mm256_sllv_epi64(
+              offset_vector,
+              _mm256_cvtepi32_epi64(_mm256_extracti128_si256(right_shift, 1))),
+          convert_to_unsigned_64);
+
+      if (params.dst_zero_point) {
+        const __m256i dst_zero_point = _mm256_set1_epi32(params.dst_zero_point);
+        // The post-scaling offset is subtracted later, so this has the effect
+        // of adding the zero point.
+        post_scaling_offset =
+            _mm256_sub_epi32(post_scaling_offset, dst_zero_point);
+      }
+
+#if !RUY_OPT_ENABLED(RUY_OPT_NATIVE_ROUNDING)
+      RUY_DCHECK(false);
+#endif
+      const __m256i repack_perm = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7);
+
+      // See GEMM version for details of this process.
+      {
+        __m256i shifted_accum = _mm256_sllv_epi32(accum_data_v0, left_shift);
+        // Apply the fixed-point part of the multiplier.
+        __m256i scaled_v_low = _mm256_mul_epi32(
+            _mm256_cvtepi32_epi64(_mm256_extracti128_si256(shifted_accum, 0)),
+            m_64bit_low);
+        __m256i scaled_v_high = _mm256_mul_epi32(
+            _mm256_cvtepi32_epi64(_mm256_extracti128_si256(shifted_accum, 1)),
+            m_64bit_high);
+
+        scaled_v_low = _mm256_add_epi64(scaled_v_low, offset_vector_low);
+        scaled_v_high = _mm256_add_epi64(scaled_v_high, offset_vector_high);
+
+        scaled_v_low = _mm256_srlv_epi64(scaled_v_low, final_right_shift_low);
+        scaled_v_high =
+            _mm256_srlv_epi64(scaled_v_high, final_right_shift_high);
+
+        scaled_v_high = _mm256_slli_epi64(scaled_v_high, 32);
+        __m256i results = _mm256_blend_epi32(scaled_v_low, scaled_v_high, 0xaa);
+        results = _mm256_permutevar8x32_epi32(results, repack_perm);
+
+        accum_data_v0 = _mm256_sub_epi32(results, post_scaling_offset);
+      }
+    }
+    const __m256i clamp_max_v = _mm256_set1_epi32(params.clamp_max);
+    const __m256i clamp_min_v = _mm256_set1_epi32(params.clamp_min);
+
+    if (params.dst_type_id == DstTypeId<std::int8_t>::kValue) {
+      std::int8_t* tmp_ptr = static_cast<std::int8_t*>(dst_ptr);
+      __m256 result = accum_data_v0;
+      result = _mm256_min_epi32(result, clamp_max_v);
+      result = _mm256_max_epi32(result, clamp_min_v);
+      intrin_utils::mm256_n_storeu_cvtepi32_epi8(tmp_ptr, residual_rows,
+                                                 result);
+      dst_ptr = static_cast<void*>(static_cast<std::int8_t*>(dst_ptr) +
+                                   kAvx8bitBlockSize);
+    } else if (params.dst_type_id == DstTypeId<std::uint8_t>::kValue) {
+      std::uint8_t* tmp_ptr = static_cast<std::uint8_t*>(dst_ptr);
+      __m256 result = accum_data_v0;
+      result = _mm256_min_epi32(result, clamp_max_v);
+      result = _mm256_max_epi32(result, clamp_min_v);
+      intrin_utils::mm256_n_storeu_cvtepi32_epi8(tmp_ptr, residual_rows,
+                                                 result);
+      dst_ptr = static_cast<void*>(static_cast<std::uint8_t*>(dst_ptr) +
+                                   kAvx8bitBlockSize);
+    } else if (params.dst_type_id == DstTypeId<std::int16_t>::kValue) {
+      std::int16_t* tmp_ptr = static_cast<std::int16_t*>(dst_ptr);
+      __m256 result = accum_data_v0;
+      result = _mm256_min_epi32(result, clamp_max_v);
+      result = _mm256_max_epi32(result, clamp_min_v);
+      intrin_utils::mm256_n_storeu_cvtepi32_epi16(tmp_ptr, residual_rows,
+                                                  result);
+      dst_ptr = static_cast<void*>(static_cast<std::int16_t*>(dst_ptr) +
+                                   kAvx8bitBlockSize);
+    } else if (params.dst_type_id == DstTypeId<std::int32_t>::kValue) {
+      std::int32_t* dst_block_ptr = static_cast<std::int32_t*>(dst_ptr);
+      intrin_utils::mm256_n_storeu_epi32(dst_block_ptr, residual_rows,
+                                         accum_data_v0);
+      dst_ptr = static_cast<void*>(static_cast<std::int32_t*>(dst_ptr) +
+                                   kAvx8bitBlockSize);
+    } else {
+      RUY_DCHECK(false);
+    }
+
+    lhs_col_ptr += kAvx8bitBlockSize * params.lhs_stride;
+  }  // End row-block loop.
+
+  dst_col_ptr = static_cast<void*>(static_cast<char*>(dst_col_ptr) +
+                                   kAvx8bitBlockSize * params.dst_stride);
+  rhs_col_ptr += kAvx8bitBlockSize * params.rhs_stride;
+}  // NOLINT(readability/fn_size)
+
 void KernelFloatAvx2(const KernelParamsFloat<8, 8>& params) {
   gemmlowp::ScopedProfilingLabel label("Kernel kAvx2 float");
 
@@ -1274,6 +1551,111 @@ void KernelFloatAvx2(const KernelParamsFloat<8, 8>& params) {
   }    // End col-block terminal conditional.
 }
 
+void KernelFloatAvx2SingleCol(const KernelParamsFloat<8, 8>& params) {
+  gemmlowp::ScopedProfilingLabel label("Kernel kAvx2 float GEMV");
+
+  RUY_DCHECK_EQ(params.dst_cols, 1);
+  RUY_DCHECK_EQ(params.last_col, 0);
+  RUY_DCHECK_EQ(params.start_col, 0);
+
+  // As parameters are defined, we need to scale by sizeof(float).
+  const std::int64_t lhs_stride = params.lhs_stride >> 2;
+  //
+  int bias_ptr_block_increment = params.flags & RUY_ASM_FLAG_HAS_BIAS ? 1 : 0;
+  // AVX2 float block size = 8.
+  const int end_row = std::min(params.dst_rows, params.last_row + 8);
+
+  float* adj_dst_col_ptr = params.dst_base_ptr - params.start_row;
+  const float* adj_lhs_col_ptr =
+      params.lhs_base_ptr - params.start_row * lhs_stride;
+  const float* bias_col_ptr = params.bias;
+
+  const __m256 clamp_max_v = _mm256_set1_ps(params.clamp_max);
+  const __m256 clamp_min_v = _mm256_set1_ps(params.clamp_min);
+
+  __m256 accum_data_v;
+
+  const float* rhs_col_ptr = params.rhs_base_ptr;
+  float* dst_col_ptr = adj_dst_col_ptr;
+
+  int row = params.start_row;
+  for (; row <= end_row - 8; row += 8) {
+    const float* lhs_col_ptr = adj_lhs_col_ptr + row * lhs_stride;
+    float* dst_ptr = dst_col_ptr + row;
+    const float* bias_ptr = bias_col_ptr + row * bias_ptr_block_increment;
+
+    // Initialize with bias.
+    accum_data_v = _mm256_loadu_ps(bias_ptr);
+
+    const float* lhs_ptr = lhs_col_ptr;
+    const float* rhs_ptr = rhs_col_ptr;
+    int d = 0;
+    for (; d <= params.depth - 4; d += 4) {
+      const __m256 lhs_data_0 = _mm256_loadu_ps(lhs_ptr);
+      const __m256 dup_rhs_element_0 = _mm256_set1_ps(rhs_ptr[0]);
+      accum_data_v =
+          _mm256_fmadd_ps(lhs_data_0, dup_rhs_element_0, accum_data_v);
+      const __m256 dup_rhs_element_1 = _mm256_set1_ps(rhs_ptr[8]);
+      const __m256 lhs_data_1 = _mm256_loadu_ps(lhs_ptr + 8);
+      accum_data_v =
+          _mm256_fmadd_ps(lhs_data_1, dup_rhs_element_1, accum_data_v);
+
+      const __m256 lhs_data_2 = _mm256_loadu_ps(lhs_ptr + 16);
+      const __m256 dup_rhs_element_2 = _mm256_set1_ps(rhs_ptr[16]);
+      accum_data_v =
+          _mm256_fmadd_ps(lhs_data_2, dup_rhs_element_2, accum_data_v);
+      const __m256 dup_rhs_element_3 = _mm256_set1_ps(rhs_ptr[24]);
+      const __m256 lhs_data_3 = _mm256_loadu_ps(lhs_ptr + 24);
+      accum_data_v =
+          _mm256_fmadd_ps(lhs_data_3, dup_rhs_element_3, accum_data_v);
+      lhs_ptr += 32;  // Loaded 8 * 4 floats.
+      rhs_ptr += 32;
+    }
+    for (; d < params.depth; ++d) {
+      const __m256 lhs_data = _mm256_loadu_ps(lhs_ptr);
+      const float* rhs_data = rhs_ptr;
+
+      const __m256 dup_rhs_element_j = _mm256_set1_ps(rhs_data[0]);
+      accum_data_v = _mm256_fmadd_ps(lhs_data, dup_rhs_element_j, accum_data_v);
+      lhs_ptr += 8;
+      rhs_ptr += 8;
+    }
+
+    accum_data_v = _mm256_min_ps(accum_data_v, clamp_max_v);
+    accum_data_v = _mm256_max_ps(accum_data_v, clamp_min_v);
+    _mm256_storeu_ps(dst_ptr, accum_data_v);
+  }  // End row-block loop.
+
+  if (row < end_row) {
+    const int residual_rows = end_row - row;
+    RUY_CHECK_GE(residual_rows, 1);
+    RUY_CHECK_LT(residual_rows, 8);
+
+    const float* lhs_col_ptr = adj_lhs_col_ptr + row * lhs_stride;
+    float* dst_ptr = dst_col_ptr + row;
+    const float* bias_ptr = bias_col_ptr + row * bias_ptr_block_increment;
+
+    // Initialize with bias.
+    accum_data_v = intrin_utils::mm256_n_loadu_ps(residual_rows, bias_ptr);
+
+    const float* lhs_ptr = lhs_col_ptr;
+    const float* rhs_ptr = rhs_col_ptr;
+    for (int d = 0; d < params.depth; ++d) {
+      const __m256 lhs_data = _mm256_loadu_ps(lhs_ptr);
+      const float* rhs_data = rhs_ptr;
+
+      const __m256 dup_rhs_element_j = _mm256_set1_ps(rhs_data[0]);
+      accum_data_v = _mm256_fmadd_ps(lhs_data, dup_rhs_element_j, accum_data_v);
+      lhs_ptr += 8;
+      rhs_ptr += 8;
+    }
+
+    accum_data_v = _mm256_min_ps(accum_data_v, clamp_max_v);
+    accum_data_v = _mm256_max_ps(accum_data_v, clamp_min_v);
+    intrin_utils::mm256_n_storeu_ps(dst_ptr, residual_rows, accum_data_v);
+  }  // End handling of residual rows.
+}
+
 #endif  //  RUY_PLATFORM(AVX2) && RUY_OPT_ENABLED(RUY_OPT_ASM)
 
 }  // namespace ruy
diff --git a/tensorflow/lite/experimental/ruy/kernel_avx512.cc b/tensorflow/lite/experimental/ruy/kernel_avx512.cc
index a0b3afcf2ae..f74f3383fd2 100644
--- a/tensorflow/lite/experimental/ruy/kernel_avx512.cc
+++ b/tensorflow/lite/experimental/ruy/kernel_avx512.cc
@@ -35,11 +35,21 @@ void Kernel8bitAvx512(const KernelParams8bit<16, 16>& params) {
   RUY_DCHECK(false);
 }
 
+void Kernel8bitAvx512SingleCol(const KernelParams8bit<16, 16>& params) {
+  // CPU-ID-based checks should disable the path that would reach this point.
+  RUY_DCHECK(false);
+}
+
 void KernelFloatAvx512(const KernelParamsFloat<16, 16>& params) {
   // CPU-ID-based checks should disable the path that would reach this point.
   RUY_DCHECK(false);
 }
 
+void KernelFloatAvx512SingleCol(const KernelParamsFloat<16, 16>& params) {
+  // CPU-ID-based checks should disable the path that would reach this point.
+  RUY_DCHECK(false);
+}
+
 #else  // RUY_PLATFORM(AVX512) && RUY_OPT_ENABLED(RUY_OPT_ASM)
 
 void Kernel8bitAvx512(const KernelParams8bit<16, 16>& params) {
@@ -1039,6 +1049,232 @@ void Kernel8bitAvx512(const KernelParams8bit<16, 16>& params) {
   }  // End col-block loop.
 }  // NOLINT(readability/fn_size)
 
+void Kernel8bitAvx512SingleCol(const KernelParams8bit<16, 16>& params) {
+  gemmlowp::ScopedProfilingLabel label("Kernel kAvx512 8-bit GEMV");
+
+  RUY_DCHECK_EQ(params.dst_cols, 1);
+  RUY_DCHECK_EQ(params.last_col, 0);
+  RUY_DCHECK_EQ(params.start_col, 0);
+
+  std::int32_t dst_stride;
+  if ((params.dst_type_id == DstTypeId<std::int8_t>::kValue) ||
+      (params.dst_type_id == DstTypeId<std::uint8_t>::kValue)) {
+    dst_stride = params.dst_stride;
+  } else if (params.dst_type_id == DstTypeId<std::int16_t>::kValue) {
+    dst_stride = params.dst_stride / sizeof(std::int16_t);
+  } else if (params.dst_type_id == DstTypeId<std::int32_t>::kValue) {
+    dst_stride = params.dst_stride / sizeof(std::int32_t);
+  } else {
+    RUY_DCHECK(false);
+  }
+
+  int bias_ptr_block_increment = params.flags & RUY_ASM_FLAG_HAS_BIAS ? 16 : 0;
+
+  const std::int8_t* rhs_col_ptr = params.rhs_base_ptr;
+  void* dst_col_ptr = params.dst_base_ptr;
+  const std::int32_t* bias_col_ptr = params.bias;
+  if (params.flags & RUY_ASM_FLAG_HAS_BIAS) {
+    bias_col_ptr += params.start_row;
+  }
+
+  const std::int8_t* lhs_col_ptr = params.lhs_base_ptr;
+  void* dst_ptr = dst_col_ptr;
+  const std::int32_t* bias_ptr = bias_col_ptr;
+
+  const std::int32_t lhs_zero_point = params.lhs_zero_point;
+  const bool has_rhs_sums_offsets =
+      (params.flags & RUY_ASM_FLAG_HAS_RHS_SUMS) && lhs_zero_point;
+  std::int32_t rhs_sums_offsets[16];
+  if (has_rhs_sums_offsets) {
+    const __m512i rhs_sums_offset_v =
+        _mm512_mullo_epi32(_mm512_set1_epi32(lhs_zero_point),
+                           _mm512_loadu_epi32(&params.rhs_sums[0]));
+    _mm512_storeu_si512(reinterpret_cast<__m512i*>(rhs_sums_offsets),
+                        rhs_sums_offset_v);
+  }
+
+  for (int row = params.start_row; row <= params.last_row; row += 16) {
+    const int residual_rows = std::min(params.dst_rows - row, 16);
+
+    __m512i accum_data_v0;
+
+    // Initialize with bias.
+    const __mmask16 row_mask =
+        (static_cast<std::uint32_t>(1) << residual_rows) - 1;
+    __m512i initial_accum_data = _mm512_maskz_loadu_epi32(row_mask, bias_ptr);
+    bias_ptr += bias_ptr_block_increment;
+
+    const std::int32_t rhs_zero_point = params.rhs_zero_point;
+    if ((params.flags & RUY_ASM_FLAG_HAS_LHS_SUMS) && rhs_zero_point) {
+      const __m512i lhs_sums_offset =
+          _mm512_mullo_epi32(_mm512_set1_epi32(rhs_zero_point),
+                             _mm512_loadu_epi32(&params.lhs_sums[row]));
+      initial_accum_data =
+          _mm512_sub_epi32(initial_accum_data, lhs_sums_offset);
+    }
+
+    const std::int32_t prod_zp_depth = params.prod_zp_depth;
+    if (prod_zp_depth != 0) {
+      initial_accum_data = _mm512_add_epi32(initial_accum_data,
+                                            _mm512_set1_epi32(prod_zp_depth));
+    }
+
+    // Adjustments differing across columns.
+    if (has_rhs_sums_offsets) {
+      accum_data_v0 = _mm512_sub_epi32(initial_accum_data,
+                                       _mm512_set1_epi32(rhs_sums_offsets[0]));
+    } else {
+      accum_data_v0 = initial_accum_data;
+    }
+
+    const std::int8_t* lhs_ptr = lhs_col_ptr;
+    const std::int8_t* rhs_ptr = rhs_col_ptr;
+    for (int d = 0; d < params.depth; d += 4) {
+      const __m512i lhs_data = _mm512_loadu_epi8(lhs_ptr);
+      const __m128i rhs_data_8bit = _mm_loadu_epi8(rhs_ptr);
+
+      // Each "int32" is two 16-bit RHS values, sign extended from 8-bit.
+      // For simplicity we load 4x the data that we need and process twice the
+      // data  that we need  and store only the data we need.
+      std::int32_t rhs_data[2];
+      const __m128i rhs_16_bit_dup = _mm_cvtepi8_epi16(rhs_data_8bit);
+      // Now that we have cast the RHS data, we store it so that each value
+      // can be separately loaded in the accumulation loop.
+      _mm_storeu_si64(reinterpret_cast<__m128i*>(rhs_data), rhs_16_bit_dup);
+
+      // Take bytes 0, 1, 4, 5, 8, 9, ... and expand to 16-bit.
+      const __m512i lhs_16_bit_low =
+          _mm512_cvtepi8_epi16(_mm512_cvtepi32_epi16(lhs_data));
+      // Take bytes 2, 3, 6, 7, 10, 11, ... and expand to 16-bit.
+      const __m512i lhs_16_bit_high = _mm512_cvtepi8_epi16(
+          _mm512_cvtepi32_epi16(_mm512_srli_epi32(lhs_data, 16)));
+
+      // Process column 0.
+      __m512i accum_v = accum_data_v0;
+      constexpr int index = 0;
+
+      const __m512i rhs_16_bit_dup_low = _mm512_set1_epi32(rhs_data[index]);
+      const __m512i rhs_16_bit_dup_high =
+          _mm512_set1_epi32(rhs_data[index + 1]);
+
+      accum_v = _mm512_add_epi32(
+          accum_v, _mm512_madd_epi16(lhs_16_bit_low, rhs_16_bit_dup_low));
+      accum_v = _mm512_add_epi32(
+          accum_v, _mm512_madd_epi16(lhs_16_bit_high, rhs_16_bit_dup_high));
+      accum_data_v0 = accum_v;
+
+      lhs_ptr += 16 * 4;
+      rhs_ptr += 16 * 4;
+    }
+
+    if (params.dst_type_id != DstTypeId<std::int32_t>::kValue) {
+      __m512i m_vector;
+      __m512i e_vector;
+      // Does not make use of RUY_ASM_FLAG_NEEDS_LEFT_SHIFT.
+      if (params.flags & RUY_ASM_FLAG_HAS_PERCHANNEL) {
+        m_vector = _mm512_maskz_loadu_epi32(row_mask,
+                                            &params.multiplier_fixedpoint[row]);
+        e_vector = _mm512_maskz_loadu_epi32(row_mask,
+                                            &params.multiplier_exponent[row]);
+      } else {
+        // These arrays have size LhsCols, and are pre-filled.
+        m_vector = _mm512_set1_epi32(params.multiplier_fixedpoint[0]);
+        e_vector = _mm512_set1_epi32(params.multiplier_exponent[0]);
+      }
+
+      const __m512i m_64bit_low =
+          _mm512_cvtepi32_epi64(_mm512_extracti32x8_epi32(m_vector, 0));
+      const __m512i m_64bit_high =
+          _mm512_cvtepi32_epi64(_mm512_extracti32x8_epi32(m_vector, 1));
+
+      const __m512i zero_vector = _mm512_setzero_epi32();
+      const __m512i left_shift = _mm512_max_epi32(e_vector, zero_vector);
+      const __m512i neg_e_vector = _mm512_sub_epi32(zero_vector, e_vector);
+      const __m512i right_shift = _mm512_max_epi32(neg_e_vector, zero_vector);
+      const __m512i final_right_shift =
+          _mm512_add_epi32(right_shift, _mm512_set1_epi32(31));
+      const __m512i final_right_shift_low = _mm512_cvtepi32_epi64(
+          _mm512_extracti32x8_epi32(final_right_shift, 0));
+      const __m512i final_right_shift_high = _mm512_cvtepi32_epi64(
+          _mm512_extracti32x8_epi32(final_right_shift, 1));
+
+      const __m512i offset_vector = _mm512_slli_epi64(_mm512_set1_epi64(1), 30);
+      // Really these should be shifted by neg_e_vector, but tests pass when
+      // using right_shift.
+      const __m512i offset_vector_low = _mm512_sllv_epi64(
+          offset_vector,
+          _mm512_cvtepi32_epi64(_mm512_extracti32x8_epi32(right_shift, 0)));
+      const __m512i offset_vector_high = _mm512_sllv_epi64(
+          offset_vector,
+          _mm512_cvtepi32_epi64(_mm512_extracti32x8_epi32(right_shift, 1)));
+
+      // Shift and round column 0.
+      accum_data_v0 = _mm512_sllv_epi32(accum_data_v0, left_shift);
+      // Apply the fixed-point part of the multiplier.
+      __m512i scaled_v_low = _mm512_mul_epi32(
+          _mm512_cvtepi32_epi64(_mm512_extracti32x8_epi32(accum_data_v0, 0)),
+          m_64bit_low);
+      __m512i scaled_v_high = _mm512_mul_epi32(
+          _mm512_cvtepi32_epi64(_mm512_extracti32x8_epi32(accum_data_v0, 1)),
+          m_64bit_high);
+
+      scaled_v_low = _mm512_add_epi64(scaled_v_low, offset_vector_low);
+      scaled_v_high = _mm512_add_epi64(scaled_v_high, offset_vector_high);
+
+      scaled_v_low = _mm512_srav_epi64(scaled_v_low, final_right_shift_low);
+      scaled_v_high = _mm512_srav_epi64(scaled_v_high, final_right_shift_high);
+
+      accum_data_v0 =
+          _mm512_castsi256_si512(_mm512_cvtepi64_epi32(scaled_v_low));
+      accum_data_v0 = _mm512_inserti32x8(
+          accum_data_v0, _mm512_cvtepi64_epi32(scaled_v_high), 1);
+#if !RUY_OPT_ENABLED(RUY_OPT_NATIVE_ROUNDING)
+      RUY_DCHECK(false);
+#endif
+
+      if (params.dst_zero_point != 0) {
+        __m512i dst_zero_point = _mm512_set1_epi32(params.dst_zero_point);
+        accum_data_v0 = _mm512_add_epi32(accum_data_v0, dst_zero_point);
+      }
+    }
+
+    const __m512i clamp_max_v = _mm512_set1_epi32(params.clamp_max);
+    const __m512i clamp_min_v = _mm512_set1_epi32(params.clamp_min);
+
+    if (params.dst_type_id == DstTypeId<std::int8_t>::kValue) {
+      std::int8_t* tmp_ptr = static_cast<std::int8_t*>(dst_ptr);
+      __m512i result = accum_data_v0;
+      result = _mm512_min_epi32(result, clamp_max_v);
+      result = _mm512_max_epi32(result, clamp_min_v);
+      _mm_mask_storeu_epi8(tmp_ptr, row_mask, _mm512_cvtepi32_epi8(result));
+      dst_ptr = static_cast<void*>(static_cast<std::int8_t*>(dst_ptr) + 16);
+    } else if (params.dst_type_id == DstTypeId<std::uint8_t>::kValue) {
+      std::uint8_t* tmp_ptr = static_cast<std::uint8_t*>(dst_ptr);
+      __m512i result = accum_data_v0;
+      result = _mm512_min_epi32(result, clamp_max_v);
+      result = _mm512_max_epi32(result, clamp_min_v);
+      _mm_mask_storeu_epi8(tmp_ptr, row_mask, _mm512_cvtepi32_epi8(result));
+      dst_ptr = static_cast<void*>(static_cast<std::uint8_t*>(dst_ptr) + 16);
+    } else if (params.dst_type_id == DstTypeId<std::int16_t>::kValue) {
+      std::int16_t* tmp_ptr = static_cast<std::int16_t*>(dst_ptr);
+      __m512i result = accum_data_v0;
+      result = _mm512_min_epi32(result, clamp_max_v);
+      result = _mm512_max_epi32(result, clamp_min_v);
+      _mm256_mask_storeu_epi16(tmp_ptr, row_mask,
+                               _mm512_cvtepi32_epi16(result));
+      dst_ptr = static_cast<void*>(static_cast<std::int16_t*>(dst_ptr) + 16);
+    } else if (params.dst_type_id == DstTypeId<std::int32_t>::kValue) {
+      std::int32_t* tmp_ptr = static_cast<std::int32_t*>(dst_ptr);
+      _mm512_mask_storeu_epi32(tmp_ptr, row_mask, accum_data_v0);
+      dst_ptr = static_cast<void*>(static_cast<std::int32_t*>(dst_ptr) + 16);
+    } else {
+      RUY_DCHECK(false);
+    }
+
+    lhs_col_ptr += 16 * params.lhs_stride;
+  }  // End row-block loop.
+}  // NOLINT(readability/fn_size)
+
 void KernelFloatAvx512(const KernelParamsFloat<16, 16>& params) {
   gemmlowp::ScopedProfilingLabel label("Kernel kAvx512 float");
 
@@ -1495,6 +1731,90 @@ void KernelFloatAvx512(const KernelParamsFloat<16, 16>& params) {
   }      // Residual cols.
 }
 
+void KernelFloatAvx512SingleCol(const KernelParamsFloat<16, 16>& params) {
+  gemmlowp::ScopedProfilingLabel label("Kernel kAvx512 float GEMV");
+
+  RUY_DCHECK_EQ(params.dst_cols, 1);
+  RUY_DCHECK_EQ(params.last_col, 0);
+  RUY_DCHECK_EQ(params.start_col, 0);
+
+  // As parameters are defined, we need to scale by sizeof(float).
+  const std::int64_t lhs_stride = params.lhs_stride >> 2;
+
+  int bias_ptr_block_increment = params.flags & RUY_ASM_FLAG_HAS_BIAS ? 1 : 0;
+  const int end_row = std::min(params.dst_rows, params.last_row + 16);
+
+  float* adj_dst_col_ptr = params.dst_base_ptr - params.start_row;
+  const float* adj_lhs_col_ptr =
+      params.lhs_base_ptr - params.start_row * lhs_stride;
+  const float* bias_col_ptr = params.bias;
+
+  const __m512 clamp_max_v = _mm512_set1_ps(params.clamp_max);
+  const __m512 clamp_min_v = _mm512_set1_ps(params.clamp_min);
+
+  __m512 accum_data_v;
+
+  const float* rhs_col_ptr = params.rhs_base_ptr;
+  float* dst_col_ptr = adj_dst_col_ptr;
+
+  int row = params.start_row;
+  for (; row <= end_row - 16; row += 16) {
+    const float* lhs_col_ptr = adj_lhs_col_ptr + row * lhs_stride;
+    float* dst_ptr = dst_col_ptr + row;
+    const float* bias_ptr = bias_col_ptr + row * bias_ptr_block_increment;
+
+    // Initialize with bias.
+    accum_data_v = _mm512_loadu_ps(bias_ptr);
+
+    const float* lhs_ptr = lhs_col_ptr;
+    const float* rhs_ptr = rhs_col_ptr;
+    for (int d = 0; d < params.depth; ++d) {
+      const __m512 lhs_data = _mm512_loadu_ps(lhs_ptr);
+      const float rhs_data = *rhs_ptr;
+
+      const __m512 dup_rhs_element_j = _mm512_set1_ps(rhs_data);
+      accum_data_v = _mm512_fmadd_ps(lhs_data, dup_rhs_element_j, accum_data_v);
+      lhs_ptr += 16;
+      rhs_ptr += 16;
+    }
+
+    accum_data_v = _mm512_min_ps(accum_data_v, clamp_max_v);
+    accum_data_v = _mm512_max_ps(accum_data_v, clamp_min_v);
+    _mm512_storeu_ps(dst_ptr, accum_data_v);
+  }  // End row-block loop.
+
+  if (row < end_row) {
+    const int residual_rows = end_row - row;
+    RUY_CHECK_GE(residual_rows, 1);
+    RUY_CHECK_LT(residual_rows, 16);
+
+    const float* lhs_col_ptr = adj_lhs_col_ptr + row * lhs_stride;
+    float* dst_ptr = dst_col_ptr + row;
+    const float* bias_ptr = bias_col_ptr + row * bias_ptr_block_increment;
+
+    // Initialize with bias.
+    const __mmask16 row_mask =
+        (static_cast<std::uint32_t>(1) << residual_rows) - 1;
+    accum_data_v = _mm512_maskz_loadu_ps(row_mask, bias_ptr);
+
+    const float* lhs_ptr = lhs_col_ptr;
+    const float* rhs_ptr = rhs_col_ptr;
+    for (int d = 0; d < params.depth; ++d) {
+      const __m512 lhs_data = _mm512_loadu_ps(lhs_ptr);
+      const float rhs_data = *rhs_ptr;
+
+      const __m512 dup_rhs_element_j = _mm512_set1_ps(rhs_data);
+      accum_data_v = _mm512_fmadd_ps(lhs_data, dup_rhs_element_j, accum_data_v);
+      lhs_ptr += 16;
+      rhs_ptr += 16;
+    }
+
+    accum_data_v = _mm512_min_ps(accum_data_v, clamp_max_v);
+    accum_data_v = _mm512_max_ps(accum_data_v, clamp_min_v);
+    _mm512_mask_storeu_ps(dst_ptr, row_mask, accum_data_v);
+  }  // End handling of residual rows.
+}
+
 #endif  //  RUY_PLATFORM(AVX512) && RUY_OPT_ENABLED(RUY_OPT_ASM)
 
 }  // namespace ruy
diff --git a/tensorflow/lite/experimental/ruy/kernel_x86.h b/tensorflow/lite/experimental/ruy/kernel_x86.h
index 78dcffb5958..65648757095 100644
--- a/tensorflow/lite/experimental/ruy/kernel_x86.h
+++ b/tensorflow/lite/experimental/ruy/kernel_x86.h
@@ -32,6 +32,7 @@ namespace ruy {
 
 #if RUY_PLATFORM(X86)
 void Kernel8bitAvx512(const KernelParams8bit<16, 16>& params);
+void Kernel8bitAvx512SingleCol(const KernelParams8bit<16, 16>& params);
 
 template <typename DstScalar>
 struct Kernel<Path::kAvx512, std::int8_t, std::int8_t, DstScalar,
@@ -48,11 +49,16 @@ struct Kernel<Path::kAvx512, std::int8_t, std::int8_t, DstScalar,
     KernelParams8bit<LhsLayout::kCols, RhsLayout::kCols> params;
     MakeKernelParams8bit(lhs, rhs, spec, start_row, start_col, end_row, end_col,
                          dst, &params);
-    Kernel8bitAvx512(params);
+    if (dst->layout.cols == 1) {
+      Kernel8bitAvx512SingleCol(params);
+    } else {
+      Kernel8bitAvx512(params);
+    }
   }
 };
 
 void KernelFloatAvx512(const KernelParamsFloat<16, 16>& params);
+void KernelFloatAvx512SingleCol(const KernelParamsFloat<16, 16>& param);
 
 template <>
 struct Kernel<Path::kAvx512, float, float, float, BasicSpec<float, float>> {
@@ -66,11 +72,16 @@ struct Kernel<Path::kAvx512, float, float, float, BasicSpec<float, float>> {
     KernelParamsFloat<LhsLayout::kCols, RhsLayout::kCols> params;
     MakeKernelParamsFloat(lhs, rhs, spec, start_row, start_col, end_row,
                           end_col, dst, &params);
-    KernelFloatAvx512(params);
+    if (dst->layout.cols == 1) {
+      KernelFloatAvx512SingleCol(params);
+    } else {
+      KernelFloatAvx512(params);
+    }
   }
 };
 
 void Kernel8bitAvx2(const KernelParams8bit<8, 8>& params);
+void Kernel8bitAvx2SingleCol(const KernelParams8bit<8, 8>& params);
 
 template <typename DstScalar>
 struct Kernel<Path::kAvx2, std::int8_t, std::int8_t, DstScalar,
@@ -87,11 +98,16 @@ struct Kernel<Path::kAvx2, std::int8_t, std::int8_t, DstScalar,
     KernelParams8bit<LhsLayout::kCols, RhsLayout::kCols> params;
     MakeKernelParams8bit(lhs, rhs, spec, start_row, start_col, end_row, end_col,
                          dst, &params);
-    Kernel8bitAvx2(params);
+    if (dst->layout.cols == 1) {
+      Kernel8bitAvx2SingleCol(params);
+    } else {
+      Kernel8bitAvx2(params);
+    }
   }
 };
 
 void KernelFloatAvx2(const KernelParamsFloat<8, 8>& params);
+void KernelFloatAvx2SingleCol(const KernelParamsFloat<8, 8>& params);
 
 template <>
 struct Kernel<Path::kAvx2, float, float, float, BasicSpec<float, float>> {
@@ -105,7 +121,11 @@ struct Kernel<Path::kAvx2, float, float, float, BasicSpec<float, float>> {
     KernelParamsFloat<LhsLayout::kCols, RhsLayout::kCols> params;
     MakeKernelParamsFloat(lhs, rhs, spec, start_row, start_col, end_row,
                           end_col, dst, &params);
-    KernelFloatAvx2(params);
+    if (dst->layout.cols == 1) {
+      KernelFloatAvx2SingleCol(params);
+    } else {
+      KernelFloatAvx2(params);
+    }
   }
 };
 #endif  // RUY_PLATFORM(X86)
diff --git a/tensorflow/lite/experimental/ruy/matrix.h b/tensorflow/lite/experimental/ruy/matrix.h
index bd11248c8c1..978714c353e 100644
--- a/tensorflow/lite/experimental/ruy/matrix.h
+++ b/tensorflow/lite/experimental/ruy/matrix.h
@@ -108,6 +108,7 @@ template <typename Scalar>
 struct Matrix final {
   Matrix& operator=(const Matrix& other) {
     data = other.data;
+    cacheable = other.cacheable;
     layout = other.layout;
     zero_point = other.zero_point;
     return *this;
@@ -120,6 +121,10 @@ struct Matrix final {
   // The zero_point, i.e. which Scalar value is to be interpreted as zero.
   // When Scalar is floating-point, this must be 0.
   Scalar zero_point = 0;
+  // Clients of Ruy must set this flag to enable any caching behavior. Doesn't
+  // impact numerical results, but caching can impact observable metrics like
+  // latency, memory usage, power, etc.
+  bool cacheable = false;
 };
 
 inline void MakeSimpleLayout(int rows, int cols, Order order, Layout* layout) {
diff --git a/tensorflow/lite/experimental/ruy/prepacked_cache_test.cc b/tensorflow/lite/experimental/ruy/prepacked_cache_test.cc
index efb6f2b358c..e4b1379b43a 100644
--- a/tensorflow/lite/experimental/ruy/prepacked_cache_test.cc
+++ b/tensorflow/lite/experimental/ruy/prepacked_cache_test.cc
@@ -25,7 +25,6 @@ namespace ruy {
 namespace {
 
 TEST(PrepackedCacheTest, TestCacheEjection) {
-  ruy::Context* context = new ruy::Context();
   // Create the cache.
   PrepackedCache prepacked_cache(32);
   // Allocate the prepacked matrix.
@@ -54,11 +53,9 @@ TEST(PrepackedCacheTest, TestCacheEjection) {
   // The cache size was exceeded by inserting mat2. Ensure that mat1 was
   // ejected.
   EXPECT_EQ(prepacked_cache.FindAndUpdate(cache_key1), prepacked_cache.cend());
-  delete context;
 }
 
 TEST(PrepackedCacheTest, TestCacheBasic) {
-  ruy::Context* context = new ruy::Context();
   // Create the cache.
   PrepackedCache prepacked_cache(48);
   // Allocate the prepacked matrix.
@@ -83,11 +80,9 @@ TEST(PrepackedCacheTest, TestCacheBasic) {
   // The cache size was not exceeded by inserting mat2. Ensure that mat1 was not
   // ejected.
   EXPECT_NE(prepacked_cache.FindAndUpdate(cache_key1), prepacked_cache.cend());
-  delete context;
 }
 
 TEST(PrepackedCacheTest, TestCacheEjection2) {
-  ruy::Context* context = new ruy::Context();
   // Create the cache.
   PrepackedCache prepacked_cache(73);
   // Allocate the prepacked matrix 1.
@@ -137,7 +132,39 @@ TEST(PrepackedCacheTest, TestCacheEjection2) {
   EXPECT_NE(prepacked_cache.FindAndUpdate(cache_key3), prepacked_cache.cend());
   EXPECT_NE(prepacked_cache.FindAndUpdate(cache_key1), prepacked_cache.cend());
   EXPECT_NE(prepacked_cache.FindAndUpdate(cache_key4), prepacked_cache.cend());
-  delete context;
+}
+
+TEST(PrepackedCacheTest, TestCacheOnCacheable) {
+  // Create context and set the cache policy
+  ruy::Context context;
+  context.cache_policy = ruy::kCacheLHSOnGemV;
+  PrepackedCache* cache = context.GetPrepackedCache();
+  EXPECT_EQ(cache->TotalSize(), 0);
+
+  const float lhs_data[] = {1, 2, 3, 4};
+  const float rhs_data[] = {1, 2};
+  float dst_data[4];
+
+  ruy::Matrix<float> lhs;
+  ruy::MakeSimpleLayout(2, 2, ruy::Order::kRowMajor, &lhs.layout);
+  lhs.data = lhs_data;
+  ruy::Matrix<float> rhs;
+  ruy::MakeSimpleLayout(2, 1, ruy::Order::kColMajor, &rhs.layout);
+  rhs.data = rhs_data;
+  ruy::Matrix<float> dst;
+  ruy::MakeSimpleLayout(2, 1, ruy::Order::kColMajor, &dst.layout);
+  dst.data = dst_data;
+
+  ruy::BasicSpec<float, float> spec;
+  // Perform the multiplication and confirm no caching occured.
+  ruy::Mul<ruy::kAllPaths>(lhs, rhs, spec, &context, &dst);
+  EXPECT_EQ(cache->TotalSize(), 0);
+
+  // Set cacheable for the LHS, repeat the multiplication, and see
+  // that caching did occur.
+  lhs.cacheable = true;
+  ruy::Mul<ruy::kAllPaths>(lhs, rhs, spec, &context, &dst);
+  EXPECT_NE(cache->TotalSize(), 0);
 }
 
 }  // namespace
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java
index 23b57d9b8ba..136a85cd6e4 100644
--- a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java
@@ -263,8 +263,8 @@ public class TensorImage {
       // Create a new bitmap and reallocate memory for it.
       if (bitmapImage == null || bitmapImage.getAllocationByteCount() < requiredAllocation) {
         int[] shape = bufferImage.getShape();
-        int w = shape[0];
-        int h = shape[1];
+        int h = shape[0];
+        int w = shape[1];
         bitmapImage = Bitmap.createBitmap(w, h, Config.ARGB_8888);
       }
       ImageConversions.convertTensorBufferToBitmap(bufferImage, bitmapImage);
diff --git a/tensorflow/lite/experimental/tflite_api_dispatcher/BUILD b/tensorflow/lite/experimental/tflite_api_dispatcher/BUILD
new file mode 100644
index 00000000000..880016e879c
--- /dev/null
+++ b/tensorflow/lite/experimental/tflite_api_dispatcher/BUILD
@@ -0,0 +1,6 @@
+package(
+    default_visibility = ["//tensorflow:internal"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+exports_files(["tflite_api_dispatcher.h"])
diff --git a/tensorflow/lite/experimental/tflite_api_dispatcher/tflite_api_dispatcher.h b/tensorflow/lite/experimental/tflite_api_dispatcher/tflite_api_dispatcher.h
new file mode 100644
index 00000000000..91b53388f74
--- /dev/null
+++ b/tensorflow/lite/experimental/tflite_api_dispatcher/tflite_api_dispatcher.h
@@ -0,0 +1,50 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// The purpose of this file is to indirect how implementations of the TensorFlow
+// Lite API are selected by providing a single namespace tflite_api_dispatcher.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_TFLITE_API_DISPATCHER_TFLITE_API_DISPATCHER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_TFLITE_API_DISPATCHER_TFLITE_API_DISPATCHER_H_
+
+#ifndef TFLITE_EXPERIMENTAL_RUNTIME
+#define TFLITE_EXPERIMENTAL_RUNTIME (0)
+#endif
+
+// Import the relevant interpreter and model files.
+#if TFLITE_EXPERIMENTAL_RUNTIME
+#include "tensorflow/lite/experimental/tf_runtime/interpreter.h"
+#include "tensorflow/lite/experimental/tf_runtime/model.h"
+#else
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/model.h"
+#endif
+
+namespace tflite_api_dispatcher {
+
+// Use the correct interpreter.
+#if TFLITE_EXPERIMENTAL_RUNTIME
+using Interpreter = tflrt::TfLiteInterpreterAPI;
+using InterpreterBuilder = tflrt::TfLiteInterpreterBuilderAPI;
+using TfLiteModel = tflrt::BEFModel;
+#else
+using tflite::Interpreter;
+using tflite::InterpreterBuilder;
+
+typedef tflite::FlatBufferModel TfLiteModel;
+#endif
+
+}  // namespace tflite_api_dispatcher
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_TFLITE_API_DISPATCHER_TFLITE_API_DISPATCHER_H_
diff --git a/tensorflow/lite/g3doc/_book.yaml b/tensorflow/lite/g3doc/_book.yaml
index a8c9750bbc0..f22dc63c9c8 100644
--- a/tensorflow/lite/g3doc/_book.yaml
+++ b/tensorflow/lite/g3doc/_book.yaml
@@ -42,8 +42,6 @@ upper_tabs:
         path: /lite/convert/quantization
       - title: "Convert RNN models"
         path: /lite/convert/rnn
-      - title: "Generate a concrete function"
-        path: /lite/convert/concrete_function
 
       - heading: "Inference"
       - title: "Overview"
@@ -80,6 +78,8 @@ upper_tabs:
         path: /lite/performance/gpu
       - title: "Advanced GPU"
         path: /lite/performance/gpu_advanced
+      - title: "Hexagon delegate"
+        path: /lite/performance/hexagon_delegate
       - title: "Quantization specification"
         path: /lite/performance/quantization_spec
 
diff --git a/tensorflow/lite/g3doc/convert/concrete_function.md b/tensorflow/lite/g3doc/convert/concrete_function.md
deleted file mode 100644
index 32a86346399..00000000000
--- a/tensorflow/lite/g3doc/convert/concrete_function.md
+++ /dev/null
@@ -1,208 +0,0 @@
-# Generate a concrete function
-
-In order to convert TensorFlow 2.0 models to TensorFlow Lite, the model needs to
-be exported as a concrete function. This document outlines what a concrete
-function is and how to generate one for an existing model.
-
-[TOC]
-
-## Background
-
-In TensorFlow 2.0, eager execution is on by default. TensorFlow's eager
-execution is an imperative programming environment that evaluates operations
-immediately, without building graphs. Operations return concrete values instead
-of constructing a computational graph to run later. A detailed guide on eager
-execution is available
-[here](https://www.tensorflow.org/guide/eager).
-
-While running imperatively with eager execution makes development and debugging
-more interactive, it doesn't allow for deploying on-device. The `tf.function`
-API makes it possible to save models as graphs, which is required to run
-TensorFlow Lite in 2.0. All operations wrapped in the `tf.function` decorator
-can be exported as a graph which can then be converted to the TensorFlow Lite
-FlatBuffer format.
-
-## Terminology
-
-The following terminology is used in this document:
-
-*   **Signature** - The inputs and outputs for a set of operations.
-*   **Concrete function** - Graph with a single signature.
-*   **Polymorphic function** - Python callable that encapsulates several
-    concrete function graphs behind one API.
-
-## Methodology
-
-This section describes how to export a concrete function.
-
-### Annotate functions with `tf.function`
-
-Annotating a function with `tf.function` generates a *polymorphic function*
-containing those operations. All operations that are not annotated with
-`tf.function` will be evaluated with eager execution. The examples below show
-how to use `tf.function`.
-
-```python
-@tf.function
-def pow(x):
-  return x ** 2
-```
-
-```python
-tf.function(lambda x : x ** 2)
-```
-
-### Create an object to save
-
-The `tf.function` can be optionally stored as part of a `tf.Module` object.
-Variables should only be defined once within the `tf.Module`. The examples below
-show two different approaches for creating a class that derives `Checkpoint`.
-
-```python
-class BasicModel(tf.Module):
-
-  def __init__(self):
-    self.const = None
-
-  @tf.function
-  def pow(self, x):
-    if self.const is None:
-      self.const = tf.Variable(2.)
-    return x ** self.const
-
-root = BasicModel()
-```
-
-```python
-root = tf.Module()
-root.const = tf.Variable(2.)
-root.pow = tf.function(lambda x : x ** root.const)
-```
-
-### Exporting the concrete function
-
-The concrete function defines a graph that can be converted to TensorFlow Lite
-model or be exported to a SavedModel. In order to export a concrete function
-from the polymorphic function, the signature needs to be defined. The signature
-can be defined the following ways:
-
-*   Define `input_signature` parameter in `tf.function`.
-*   Pass in `tf.TensorSpec` into `get_concrete_function`: e.g.
-    `tf.TensorSpec(shape=[1], dtype=tf.float32)`.
-*   Pass in a sample input tensor into `get_concrete_function`: e.g.
-    `tf.constant(1., shape=[1])`.
-
-The follow example shows how to define the `input_signature` parameter for
-`tf.function`.
-
-```python
-class BasicModel(tf.Module):
-
-  def __init__(self):
-    self.const = None
-
-  @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.float32)])
-  def pow(self, x):
-    if self.const is None:
-      self.const = tf.Variable(2.)
-    return x ** self.const
-
-# Create the tf.Module object.
-root = BasicModel()
-
-# Get the concrete function.
-concrete_func = root.pow.get_concrete_function()
-```
-
-The example below passes in a sample input tensor into `get_concrete_function`.
-
-```python
-# Create the tf.Module object.
-root = tf.Module()
-root.const = tf.Variable(2.)
-root.pow = tf.function(lambda x : x ** root.const)
-
-# Get the concrete function.
-input_data = tf.constant(1., shape=[1])
-concrete_func = root.pow.get_concrete_function(input_data)
-```
-
-## Example program
-
-```python
-import tensorflow as tf
-
-# Initialize the tf.Module object.
-root = tf.Module()
-
-# Instantiate the variable once.
-root.var = None
-
-# Define a function so that the operations aren't computed in advance.
-@tf.function
-def exported_function(x):
-  # Each variable can only be defined once. The variable can be defined within
-  # the function but needs to contain a reference outside of the function.
-  if root.var is None:
-    root.var = tf.Variable(tf.random.uniform([2, 2]))
-  root.const = tf.constant([[37.0, -23.0], [1.0, 4.0]])
-  root.mult = tf.matmul(root.const, root.var)
-  return root.mult * x
-
-# Save the function as part of the tf.Module object.
-root.func = exported_function
-
-# Get the concrete function.
-concrete_func = root.func.get_concrete_function(
-  tf.TensorSpec([1, 1], tf.float32))
-```
-
-## Common Questions
-
-### How do I save a concrete function as a SavedModel?
-
-Users who want to save their TensorFlow model before converting it to TensorFlow
-Lite should save it as a SavedModel. After getting the concrete function, call
-`tf.saved_model.save` to save the model. The example above can be saved using
-the following instruction.
-
-```python
-tf.saved_model.save(root, export_dir, concrete_func)
-```
-
-Reference the
-[SavedModel guide](https://www.tensorflow.org/guide/saved_model)
-for detailed instructions on using SavedModels.
-
-### How do I get a concrete function from the SavedModel?
-
-Each concrete function within a SavedModel can be identified by a signature key.
-The default signature key is `tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY`.
-The example below shows how to get the concrete function from a model.
-
-```python
-model = tf.saved_model.load(export_dir)
-concrete_func = model.signatures[
-  tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-```
-
-### How do I get a concrete function for a `tf.Keras` model?
-
-There are two approaches that you can use:
-
-1.  Save the model as a SavedModel. A concrete function will be generated during
-    the saving process, which can be accessed upon loading the model.
-2.  Annotate the model with `tf.function` as seen below.
-
-```python
-model = tf.keras.Sequential([tf.keras.layers.Dense(units=1, input_shape=[1])])
-model.compile(optimizer='sgd', loss='mean_squared_error')
-model.fit(x=[-1, 0, 1, 2, 3, 4], y=[-3, -1, 1, 3, 5, 7], epochs=50)
-
-# Get the concrete function from the Keras model.
-run_model = tf.function(lambda x : model(x))
-
-# Save the concrete function.
-concrete_func = run_model.get_concrete_function(
-    tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))
-```
diff --git a/tensorflow/lite/g3doc/convert/index.md b/tensorflow/lite/g3doc/convert/index.md
index f9870a6d1d6..c445361d6b9 100644
--- a/tensorflow/lite/g3doc/convert/index.md
+++ b/tensorflow/lite/g3doc/convert/index.md
@@ -5,7 +5,7 @@ TensorFlow Lite [`FlatBuffer`](https://google.github.io/flatbuffers/) file
 (`.tflite`). The converter supports
 [SavedModel directories](https://www.tensorflow.org/guide/saved_model),
 [`tf.keras` models](https://www.tensorflow.org/guide/keras/overview), and
-[concrete functions](concrete_function.md).
+[concrete functions](https://tensorflow.org/guide/concrete_function).
 
 Note: This page contains documentation on the converter API for TensorFlow 2.0.
 The API for TensorFlow 1.X is available
diff --git a/tensorflow/lite/g3doc/convert/python_api.md b/tensorflow/lite/g3doc/convert/python_api.md
index 5cc97ff414e..b8f7312c3fc 100644
--- a/tensorflow/lite/g3doc/convert/python_api.md
+++ b/tensorflow/lite/g3doc/convert/python_api.md
@@ -20,7 +20,7 @@ to convert a model based on the original model format:
 *   `TFLiteConverter.from_keras_model()`: Converts
     [`tf.keras` models](https://www.tensorflow.org/guide/keras/overview).
 *   `TFLiteConverter.from_concrete_functions()`: Converts
-    [concrete functions](concrete_function.md).
+    [concrete functions](https://tensorflow.org/guide/concrete_function).
 
 This document contains [example usages](#examples) of the API, a detailed list
 of [changes in the API between Tensorflow 1 and TensorFlow 2](#differences), and
@@ -93,8 +93,8 @@ tflite_model = converter.convert()
 ### Converting a concrete function <a name="concrete_function"></a>
 
 The following example shows how to convert a TensorFlow
-[concrete function](concrete_function.md) into a TensorFlow Lite
-[`FlatBuffer`](https://google.github.io/flatbuffers/).
+[concrete function](https://tensorflow.org/guide/concrete_function) into a
+TensorFlow Lite [`FlatBuffer`](https://google.github.io/flatbuffers/).
 
 ```python
 import tensorflow as tf
diff --git a/tensorflow/lite/g3doc/guide/get_started.md b/tensorflow/lite/g3doc/guide/get_started.md
index 50f6d03a5ba..61a4be7ae3e 100644
--- a/tensorflow/lite/g3doc/guide/get_started.md
+++ b/tensorflow/lite/g3doc/guide/get_started.md
@@ -127,7 +127,7 @@ When [converting TensorFlow 2.x models](../convert/python_api.md), these are:
 
 *   [SavedModel directories](https://www.tensorflow.org/guide/saved_model)
 *   [`tf.keras` models](https://www.tensorflow.org/guide/keras/overview)
-*   [Concrete functions](../convert/concrete_function.md)
+*   [Concrete functions](https://tensorflow.org/guide/concrete_function)
 
 The converter can be configured to apply various optimizations that can improve
 performance or reduce file size. This is covered in section 4,
diff --git a/tensorflow/lite/g3doc/guide/index.md b/tensorflow/lite/g3doc/guide/index.md
index bb658237bba..6597c92e4c1 100644
--- a/tensorflow/lite/g3doc/guide/index.md
+++ b/tensorflow/lite/g3doc/guide/index.md
@@ -28,9 +28,10 @@ improve:
 TensorFlow Lite works with a huge range of devices, from tiny microcontrollers
 to powerful mobile phones.
 
-Key Point: The TensorFlow Lite binary is smaller than 300KB when all supported
-operators are linked, and less than 200KB when using only the operators needed
-for supporting the common image classification models InceptionV3 and MobileNet.
+Key Point: The TensorFlow Lite binary is smaller than 1MB when all supported
+operators are linked (for 32-bit ARM builds), and less than 300KB when using
+only the operators needed for supporting the common image classification models
+InceptionV3 and MobileNet.
 
 ## Get started
 
diff --git a/tensorflow/lite/g3doc/performance/hexagon_delegate.md b/tensorflow/lite/g3doc/performance/hexagon_delegate.md
index 0e5c82ab680..f862716e84f 100644
--- a/tensorflow/lite/g3doc/performance/hexagon_delegate.md
+++ b/tensorflow/lite/g3doc/performance/hexagon_delegate.md
@@ -21,7 +21,6 @@ are supported, including:
 *   Snapdragon 710/845 (685 DSP)
 *   Snapdragon 8150/855 (690 DSP)
 
-
 **Supported models:**
 
 The Hexagon delegate currently supports quantized models generated using
@@ -34,7 +33,7 @@ Sample models include
 [MobileNet V1](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz),
 [SSD Mobilenet](https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip).
 
-## Hexagon Delegate Java API {#hexagon-delegate-java-api}
+## Hexagon Delegate Java API
 
 ```
 public class HexagonDelegate implements Delegate, Closeable {
@@ -57,15 +56,24 @@ public class HexagonDelegate implements Delegate, Closeable {
 }
 ```
 
-## Example Usage from Java {#example-usage-from-java}
+## Example Usage from Java
+
+NOTE: As of 19 Dec 2019 you need to use the nightly build for TFLite (typically
+imported in gradle via `implementation
+'org.tensorflow:tensorflow-lite:0.0.0-nightly'`).
 
 1.  Add the ‘tensorflow-lite-hexagon.aar’ to your app - this is in addition to
     the standard tensorflow-lite AAR (nightly or release).
     [Relevant instructions](https://stackoverflow.com/questions/16682847/how-to-manually-include-external-aar-package-using-new-gradle-android-build-syst).
-1.  Run “hexagon_nn_skel.run” - Note: you will need to accept the license
-    agreement. It should provide 3 different shared libraries
-    “libhexagon_nn_skel.so”, “libhexagon_nn_skel_v65.so”,
-    “libhexagon_nn_skel_v66.so” \
+    You can do this by running bazel command like example below for arm64. We
+    will provide a version hosted on JCenter soon.
+    *   `bazel build -c opt --config=android_arm64
+        tensorflow/lite/experimental/delegates/hexagon/java:tensorflow-lite-hexagon`
+1.  Download and run
+    [“hexagon_nn_skel.run](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_1_10_3_1.run)” -
+    Note: you will need to accept the license agreement. It should provide 3
+    different shared libraries “libhexagon_nn_skel.so”,
+    “libhexagon_nn_skel_v65.so”, “libhexagon_nn_skel_v66.so” \
     Include all 3 in your app with other shared libraries. See
     [How to add shared library to your app](#how-to-add-shared-library-to-your-app)
     \
@@ -96,7 +104,7 @@ if (hexagonDelegate != null) {
 }
 ```
 
-## Hexagon Delegate C API {#hexagon-delegate-c-api}
+## Hexagon Delegate C API
 
 ```
 struct TfLiteHexagonDelegateOptions {
@@ -141,14 +149,16 @@ Void TfLiteHexagonInit();
 Void TfLiteHexagonTearDown();
 ```
 
-## Example Usage from C {#example-usage-from-c}
+## Example Usage from C
 
 1.  Add the ‘tensorflow-lite-hexagon.aar’ to your app - this is in addition to
     the standard tensorflow-lite AAR (nightly or release).
     [Relevant instructions](https://stackoverflow.com/questions/16682847/how-to-manually-include-external-aar-package-using-new-gradle-android-build-syst).
 1.  Include the provided hexagon_delegate.h
-1.  Run “hexagon_nn_skel.run” - Note: you will need to accept the license
-    agreement. It should provide 3 different shared libraries \
+1.  Download and run
+    [“hexagon_nn_skel.run](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_1_10_3_1.run)” -
+    Note: you will need to accept the license agreement. It should provide 3
+    different shared libraries \
     “libhexagon_nn_skel.so”, “libhexagon_nn_skel_v65.so”,
     “libhexagon_nn_skel_v66.so” \
     Include all 3 in your app with other shared libraries. See How to add shared
@@ -187,7 +197,7 @@ Void TfLiteHexagonTearDown();
     TfLiteHexagonTearDown();  // Needed once at end of app/DSP usage.
     ```
 
-## How to add shared library to your app {#how-to-add-shared-library-to-your-app}
+## How to add shared library to your app
 
 Create folder “app/src/main/jniLibs”, then for each target architecture create a
 directory.
@@ -200,7 +210,7 @@ Arm32 bit: “app/src/main/jniLibs/armeabi-v7a”
 
 Put your .so in the directory that match the architecture.
 
-## Feedback {#feedback}
+## Feedback
 
 For issues, please create a
 [github](https://github.com/tensorflow/tensorflow/issues/new?template=50-other-issues.md)
@@ -208,7 +218,7 @@ issue with all the necessary repro details, including the phone model and board
 used (`adb shell getprop ro.product.device` and `adb shell getprop
 ro.board.platform`).
 
-## FAQ {#faq}
+## FAQ
 
 *   Will the delegate support models created using
     [post-training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)?
diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD
index 110a1f82b05..76544ef661e 100644
--- a/tensorflow/lite/java/BUILD
+++ b/tensorflow/lite/java/BUILD
@@ -41,14 +41,6 @@ aar_with_jni(
     android_library = ":tensorflowlite_flex",
 )
 
-# DEPRECATED: AAR target that supports TensorFlow op execution with TFLite.
-# Please use `tensorflowlite-select-tf-ops` instead (along with the standard
-# `tensorflowlite` AAR).
-aar_with_jni(
-    name = "tensorflow-lite-with-select-tf-ops",
-    android_library = ":tensorflowlite_flex_deprecated",
-)
-
 # EXPERIMENTAL: AAR target for GPU acceleration. Note that this .aar contains
 # *only* the GPU delegate; clients must also include the core `tensorflow-lite`
 # runtime.
@@ -86,22 +78,6 @@ android_library(
     ],
 )
 
-# DEPRECATED: Android target that supports TensorFlow op execution with TFLite.
-# Please use `tensorflowlite_flex`.
-android_library(
-    name = "tensorflowlite_flex_deprecated",
-    srcs = JAVA_SRCS + [
-        "//tensorflow/lite/delegates/flex/java/src/main/java/org/tensorflow/lite/flex:flex_delegate",
-    ],
-    manifest = "AndroidManifest.xml",
-    proguard_specs = ["proguard.flags"],
-    deps = [
-        ":tensorflowlite",
-        ":tensorflowlite_native_flex",
-        "@org_checkerframework_qual",
-    ],
-)
-
 # EXPERIMENTAL: Android target target for GPU acceleration. Note that this
 # library contains *only* the GPU delegate and its Java wrapper; clients must
 # also include the core `tensorflowlite` runtime.
@@ -166,6 +142,35 @@ java_test(
     ],
 )
 
+java_test(
+    name = "TensorFlowLiteNoNativeLibTest",
+    size = "small",
+    srcs = JAVA_SRCS + ["src/test/java/org/tensorflow/lite/TensorFlowLiteNoNativeLibTest.java"],
+    javacopts = JAVACOPTS,
+    test_class = "org.tensorflow.lite.TensorFlowLiteNoNativeLibTest",
+    visibility = ["//visibility:private"],
+    deps = [
+        "@com_google_truth",
+        "@junit",
+        "@org_checkerframework_qual",
+    ],
+)
+
+java_test(
+    name = "TensorFlowLiteInvalidNativeLibTest",
+    size = "small",
+    srcs = JAVA_SRCS + ["src/test/java/org/tensorflow/lite/TensorFlowLiteInvalidNativeLibTest.java"],
+    javacopts = JAVACOPTS,
+    test_class = "org.tensorflow.lite.TensorFlowLiteInvalidNativeLibTest",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//tensorflow/lite/java/src/test/native:libtensorflowlite_jni.so",
+        "@com_google_truth",
+        "@junit",
+        "@org_checkerframework_qual",
+    ],
+)
+
 java_test(
     name = "DataTypeTest",
     size = "small",
diff --git a/tensorflow/lite/java/aar_with_jni.bzl b/tensorflow/lite/java/aar_with_jni.bzl
index e33479e7009..71da735703d 100644
--- a/tensorflow/lite/java/aar_with_jni.bzl
+++ b/tensorflow/lite/java/aar_with_jni.bzl
@@ -72,12 +72,12 @@ zip -r $$origdir/$(location :{1}.aar) jni/*/*.so
         for src in headers:
             if flatten_headers:
                 cmd += """
-                    cp -rL $$origdir/$(location {0}) headers/$$(basename $(location {0}))
+                    cp -RL $$origdir/$(location {0}) headers/$$(basename $(location {0}))
                 """.format(src)
             else:
                 cmd += """
                     mkdir -p headers/$$(dirname $(location {0}))
-                    cp -rL $$origdir/$(location {0}) headers/$(location {0})
+                    cp -RL $$origdir/$(location {0}) headers/$(location {0})
                 """.format(src)
         cmd += "zip -r $$origdir/$(location :{0}.aar) headers".format(name)
 
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 73ecaf419c5..a6285895e4f 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -37,17 +37,19 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     this(modelPath, /* options= */ null);
   }
 
+  NativeInterpreterWrapper(ByteBuffer byteBuffer) {
+    this(byteBuffer, /* options= */ null);
+  }
+
   NativeInterpreterWrapper(String modelPath, Interpreter.Options options) {
+    TensorFlowLite.init();
     long errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     long modelHandle = createModel(modelPath, errorHandle);
     init(errorHandle, modelHandle, options);
   }
 
-  NativeInterpreterWrapper(ByteBuffer byteBuffer) {
-    this(byteBuffer, /* options= */ null);
-  }
-
   NativeInterpreterWrapper(ByteBuffer buffer, Interpreter.Options options) {
+    TensorFlowLite.init();
     if (buffer == null
         || (!(buffer instanceof MappedByteBuffer)
             && (!buffer.isDirect() || buffer.order() != ByteOrder.nativeOrder()))) {
@@ -443,8 +445,4 @@ final class NativeInterpreterWrapper implements AutoCloseable {
   private static native void resetVariableTensors(long interpreterHandle, long errorHandle);
 
   private static native void delete(long errorHandle, long modelHandle, long interpreterHandle);
-
-  static {
-    TensorFlowLite.init();
-  }
 }
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
index 3916c90582b..68952ff6e49 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
@@ -391,8 +391,4 @@ public final class Tensor {
   private static native int index(long handle);
 
   private static native String name(long handle);
-
-  static {
-    TensorFlowLite.init();
-  }
 }
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
index c3a34959d6e..3c2e7b4cbd9 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
@@ -20,6 +20,21 @@ public final class TensorFlowLite {
 
   private static final String LIBNAME = "tensorflowlite_jni";
 
+  private static final Throwable LOAD_LIBRARY_EXCEPTION;
+  private static volatile boolean isInit = false;
+
+  static {
+    // Attempt to load the default native libraries. If unavailable, cache the exception; the client
+    // may choose to link the native deps into their own custom native library.
+    Throwable loadLibraryException = null;
+    try {
+      System.loadLibrary(LIBNAME);
+    } catch (UnsatisfiedLinkError e) {
+      loadLibraryException = e;
+    }
+    LOAD_LIBRARY_EXCEPTION = loadLibraryException;
+  }
+
   private TensorFlowLite() {}
 
   /**
@@ -33,27 +48,44 @@ public final class TensorFlowLite {
   }
 
   /** Returns the version of the underlying TensorFlowLite runtime. */
-  public static native String runtimeVersion();
+  public static String runtimeVersion() {
+    init();
+    return nativeRuntimeVersion();
+  }
 
   /** Returns the version of the underlying TensorFlowLite model schema. */
-  public static native String schemaVersion();
+  public static String schemaVersion() {
+    init();
+    return nativeSchemaVersion();
+  }
 
   /**
-   * Load the TensorFlowLite runtime C library.
+   * Ensure the TensorFlowLite native library has been loaded.
    *
-   * @hide
+   * <p>If unsuccessful, throws an UnsatisfiedLinkError with the appropriate error message.
    */
-  public static boolean init() {
+  public static void init() {
+    if (isInit) {
+      return;
+    }
+
     try {
-      System.loadLibrary(LIBNAME);
-      return true;
+      // Try to invoke a native method (the method itself doesn't really matter) to ensure that
+      // native libs are available.
+      nativeRuntimeVersion();
+      isInit = true;
     } catch (UnsatisfiedLinkError e) {
-      System.err.println("TensorFlowLite: failed to load native library: " + e);
-      return false;
+      // Prefer logging the original library loading exception if native methods are unavailable.
+      Throwable exceptionToLog = LOAD_LIBRARY_EXCEPTION != null ? LOAD_LIBRARY_EXCEPTION : e;
+      throw new UnsatisfiedLinkError(
+          "Failed to load native TensorFlow Lite methods. Check "
+              + "that the correct native libraries are present, and, if using "
+              + "a custom native library, have been properly loaded via System.loadLibrary():\n  "
+              + exceptionToLog);
     }
   }
 
-  static {
-    init();
-  }
+  public static native String nativeRuntimeVersion();
+
+  public static native String nativeSchemaVersion();
 }
diff --git a/tensorflow/lite/java/src/main/native/tensorflow_lite_jni.cc b/tensorflow/lite/java/src/main/native/tensorflow_lite_jni.cc
index e2d0dfdea43..3de7b2bd22f 100644
--- a/tensorflow/lite/java/src/main/native/tensorflow_lite_jni.cc
+++ b/tensorflow/lite/java/src/main/native/tensorflow_lite_jni.cc
@@ -23,14 +23,15 @@ extern "C" {
 #endif  // __cplusplus
 
 JNIEXPORT jstring JNICALL
-Java_org_tensorflow_lite_TensorFlowLite_runtimeVersion(JNIEnv* env,
-                                                       jclass /*clazz*/) {
+Java_org_tensorflow_lite_TensorFlowLite_nativeRuntimeVersion(JNIEnv* env,
+                                                             jclass /*clazz*/) {
   const char* kTfLiteVersionString = TFLITE_VERSION_STRING;
   return env->NewStringUTF(kTfLiteVersionString);
 }
 
-JNIEXPORT jstring JNICALL Java_org_tensorflow_lite_TensorFlowLite_schemaVersion(
-    JNIEnv* env, jclass /*clazz*/) {
+JNIEXPORT jstring JNICALL
+Java_org_tensorflow_lite_TensorFlowLite_nativeSchemaVersion(JNIEnv* env,
+                                                            jclass /*clazz*/) {
   char buf[64];
   snprintf(buf, sizeof(buf), "%d", TFLITE_SCHEMA_VERSION);
   return env->NewStringUTF(buf);
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteInvalidNativeLibTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteInvalidNativeLibTest.java
new file mode 100644
index 00000000000..61e43d61413
--- /dev/null
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteInvalidNativeLibTest.java
@@ -0,0 +1,51 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link org.tensorflow.lite.TensorFlowLite} when the native lib is loaded but the
+ * necessary native methods are unavailable.
+ */
+@RunWith(JUnit4.class)
+public final class TensorFlowLiteInvalidNativeLibTest {
+  @Test
+  public void testInit() {
+    try {
+      TensorFlowLite.init();
+      fail();
+    } catch (UnsatisfiedLinkError e) {
+      assertThat(e).hasMessageThat().contains("Failed to load native TensorFlow Lite methods");
+    }
+  }
+
+  @Test
+  public void testInterpreter() {
+    try {
+      new Interpreter(new File("path/does/not/matter.tflite"));
+      fail();
+    } catch (UnsatisfiedLinkError e) {
+      assertThat(e).hasMessageThat().contains("Failed to load native TensorFlow Lite methods");
+    }
+  }
+}
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteNoNativeLibTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteNoNativeLibTest.java
new file mode 100644
index 00000000000..0708464fd56
--- /dev/null
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorFlowLiteNoNativeLibTest.java
@@ -0,0 +1,50 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link org.tensorflow.lite.TensorFlowLite} when no native lib is available. */
+@RunWith(JUnit4.class)
+public final class TensorFlowLiteNoNativeLibTest {
+  @Test
+  public void testCheckInit() {
+    try {
+      TensorFlowLite.init();
+      fail();
+    } catch (UnsatisfiedLinkError e) {
+      assertThat(e).hasMessageThat().contains("Failed to load native TensorFlow Lite methods");
+      assertThat(e).hasMessageThat().contains("no tensorflowlite_jni in java.library.path");
+    }
+  }
+
+  @Test
+  public void testInterpreter() {
+    try {
+      new Interpreter(new File("path/does/not/matter.tflite"));
+      fail();
+    } catch (UnsatisfiedLinkError e) {
+      assertThat(e).hasMessageThat().contains("Failed to load native TensorFlow Lite methods");
+      assertThat(e).hasMessageThat().contains("no tensorflowlite_jni in java.library.path");
+    }
+  }
+}
diff --git a/tensorflow/lite/java/src/test/native/BUILD b/tensorflow/lite/java/src/test/native/BUILD
index c73b1193a93..b9a64930e6a 100644
--- a/tensorflow/lite/java/src/test/native/BUILD
+++ b/tensorflow/lite/java/src/test/native/BUILD
@@ -27,3 +27,9 @@ tflite_jni_binary(
     testonly = 1,
     deps = [":native"],
 )
+
+# Dummy native library which doesn't actually contain the TFLite implementation.
+tflite_jni_binary(
+    name = "libtensorflowlite_jni.so",
+    testonly = 1,
+)
diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD
index c61e866a790..7d86af5cc21 100644
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@@ -430,6 +430,7 @@ cc_library(
         "comparisons.cc",
         "concatenation.cc",
         "conv.cc",
+        "densify.cc",
         "depth_to_space.cc",
         "depthwise_conv.cc",
         "dequantize.cc",
@@ -517,6 +518,7 @@ cc_library(
         ":eigen_support",
         ":kernel_util",
         ":lstm_eval",
+        ":lstm_shared",
         ":op_macros",
         ":padding",
         "//tensorflow/lite:framework",
@@ -611,6 +613,12 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "lstm_shared",
+    hdrs = ["lstm_shared.h"],
+    copts = tflite_copts(),
+)
+
 cc_library(
     name = "builtin_ops",
     srcs = ["register.cc"],
@@ -855,6 +863,23 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "densify_test",
+    size = "small",
+    srcs = ["densify_test.cc"],
+    tags = ["tflite_nnapi"],
+    deps = [
+        ":builtin_ops",
+        ":test_main",
+        ":test_util",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/kernels/internal:types",
+        "//third_party/eigen3",
+        "@com_google_absl//absl/memory",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "depthwise_conv_test",
     size = "small",
diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc
index 3d3e6b36533..d9b8c87eeb7 100644
--- a/tensorflow/lite/kernels/add.cc
+++ b/tensorflow/lite/kernels/add.cc
@@ -118,15 +118,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     QuantizeMultiplierSmallerThanOneExp(
         real_output_multiplier, &data->output_multiplier, &data->output_shift);
 
-    if (output->type == kTfLiteUInt8) {
-      CalculateActivationRangeUint8(params->activation, output,
-                                    &data->output_activation_min,
-                                    &data->output_activation_max);
-    } else {
-      CalculateActivationRangeInt8(params->activation, output,
-                                   &data->output_activation_min,
-                                   &data->output_activation_max);
-    }
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
   } else if (output->type == kTfLiteInt16) {
     // 16bit -> 16bit special quantized path, supporting only a rather
     // narrow case of quantization parameters: zero_points must all be 0
@@ -164,9 +158,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE(context, data->input1_shift <= 0);
     TF_LITE_ENSURE(context, data->input2_shift <= 0);
 
-    CalculateActivationRangeQuantized(context, params->activation, output,
-                                      &data->output_activation_min,
-                                      &data->output_activation_max);
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
   }
 
   return context->ResizeTensor(context, output, output_size);
diff --git a/tensorflow/lite/kernels/builtin_op_kernels.h b/tensorflow/lite/kernels/builtin_op_kernels.h
index cb0676c58e9..67669f85d0e 100644
--- a/tensorflow/lite/kernels/builtin_op_kernels.h
+++ b/tensorflow/lite/kernels/builtin_op_kernels.h
@@ -43,6 +43,7 @@ TfLiteRegistration* Register_CEIL();
 TfLiteRegistration* Register_CONCATENATION();
 TfLiteRegistration* Register_CONV_2D();
 TfLiteRegistration* Register_COS();
+TfLiteRegistration* Register_DENSIFY();
 TfLiteRegistration* Register_DEPTH_TO_SPACE();
 TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
 TfLiteRegistration* Register_DEQUANTIZE();
diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_params.h b/tensorflow/lite/kernels/cpu_backend_gemm_params.h
index 763e931614d..66700ea9cdf 100644
--- a/tensorflow/lite/kernels/cpu_backend_gemm_params.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_params.h
@@ -47,6 +47,10 @@ struct MatrixParams {
   // The zero_point, i.e. which Scalar value is to be interpreted as zero.
   // When Scalar is floating-point, this must be 0.
   Scalar zero_point = 0;
+  // Indicate whether the underlying data will remain unchanged for
+  // some period of time. Defaults to false, but should be set to true
+  // for unchanging data (e.g. weights buffers in many cases)
+  bool cacheable = false;
 };
 
 // Enumeration of broad categories of Gemm.
diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_ruy.h b/tensorflow/lite/kernels/cpu_backend_gemm_ruy.h
index f3b2430db63..4e1158bc0cc 100644
--- a/tensorflow/lite/kernels/cpu_backend_gemm_ruy.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_ruy.h
@@ -41,6 +41,7 @@ void MakeRuyMatrix(const MatrixParams<Scalar>& params, DataPointer data_ptr,
   // It does care whether we assign to it a Scalar* or a const Scalar*.
   dst->data = data_ptr;
   dst->zero_point = params.zero_point;
+  dst->cacheable = params.cacheable;
 }
 
 template <typename GemmParamsType, typename RuySpecType>
diff --git a/tensorflow/lite/kernels/densify.cc b/tensorflow/lite/kernels/densify.cc
new file mode 100644
index 00000000000..0c2742d8696
--- /dev/null
+++ b/tensorflow/lite/kernels/densify.cc
@@ -0,0 +1,116 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/densify.h"
+
+#include <string.h>
+
+#include <cstdint>
+#include <vector>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace densify {
+
+struct OpContext {
+  OpContext(TfLiteContext* context, TfLiteNode* node) {
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  const TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+struct OpData {
+  bool dense_weights_initialized;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  auto* op_data = new OpData();
+  op_data->dense_weights_initialized = false;
+  return op_data;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<OpData*>(buffer);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  OpContext op_context(context, node);
+
+  TF_LITE_ENSURE(context, op_context.input->type != kTfLiteString);
+  TF_LITE_ENSURE(context, IsConstantTensor(op_context.input));
+  TF_LITE_ENSURE(context, op_context.input->sparsity != nullptr);
+
+  op_context.output->type = op_context.input->type;
+  op_context.output->allocation_type = kTfLiteArenaRwPersistent;
+
+  return context->ResizeTensor(context, op_context.output,
+                               TfLiteIntArrayCopy(op_context.input->dims));
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  OpContext op_context(context, node);
+  if (op_data->dense_weights_initialized) {
+    return kTfLiteOk;
+  }
+
+  switch (op_context.input->type) {
+    case kTfLiteFloat32:
+      reference_ops::Densify(op_context.input->sparsity,
+                             GetTensorShape(op_context.input),
+                             GetTensorData<float>(op_context.input),
+                             GetTensorShape(op_context.output),
+                             GetTensorData<float>(op_context.output));
+      break;
+    case kTfLiteInt8:
+      reference_ops::Densify(op_context.input->sparsity,
+                             GetTensorShape(op_context.input),
+                             GetTensorData<int8_t>(op_context.input),
+                             GetTensorShape(op_context.output),
+                             GetTensorData<int8_t>(op_context.output));
+      break;
+
+    default:
+      context->ReportError(context, "Type %d not supported.",
+                           op_context.input->type);
+      return kTfLiteError;
+  }
+
+  op_data->dense_weights_initialized = true;
+  return kTfLiteOk;
+}
+
+}  // namespace densify
+
+TfLiteRegistration* Register_DENSIFY() {
+  static TfLiteRegistration r = {densify::Init, densify::Free, densify::Prepare,
+                                 densify::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/kernels/densify_test.cc b/tensorflow/lite/kernels/densify_test.cc
new file mode 100644
index 00000000000..dee3e388512
--- /dev/null
+++ b/tensorflow/lite/kernels/densify_test.cc
@@ -0,0 +1,41 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <cstdint>
+
+#include <gtest/gtest.h>
+#include "absl/memory/memory.h"
+#include "third_party/eigen3/Eigen/Core"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/model.h"
+
+namespace tflite {
+
+namespace ops {
+namespace builtin {
+
+TfLiteRegistration* Register_DENSIFY();
+
+}  // namespace builtin
+}  // namespace ops
+
+namespace {
+
+TEST(DensifyOpTest, Float) {}
+
+}  // namespace
+}  // namespace tflite
diff --git a/tensorflow/lite/kernels/div.cc b/tensorflow/lite/kernels/div.cc
index c90410721f9..21480884e94 100644
--- a/tensorflow/lite/kernels/div.cc
+++ b/tensorflow/lite/kernels/div.cc
@@ -85,9 +85,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   if (output->type == kTfLiteUInt8) {
-    CalculateActivationRangeUint8(params->activation, output,
-                                  &data->output_activation_min,
-                                  &data->output_activation_max);
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
     const double real_multiplier =
         input1->params.scale / (input2->params.scale * output->params.scale);
     QuantizeMultiplier(real_multiplier, &data->output_multiplier,
diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index 2367174c1b3..7919df2a6fe 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@@ -190,6 +190,13 @@ config_setting(
     },
 )
 
+config_setting(
+    name = "windows",
+    values = {
+        "cpu": "x64_windows",
+    },
+)
+
 config_setting(
     name = "raspberry_pi_with_neon",
     define_values = {
@@ -263,6 +270,7 @@ cc_library(
         ":darwin": tflite_deps_intel,
         ":darwin_x86_64": tflite_deps_intel,
         ":freebsd": tflite_deps_intel,
+        ":windows": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
@@ -309,6 +317,7 @@ cc_library(
         ":darwin": tflite_deps_intel,
         ":darwin_x86_64": tflite_deps_intel,
         ":freebsd": tflite_deps_intel,
+        ":windows": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
@@ -419,6 +428,7 @@ cc_library(
         "reference/comparisons.h",
         "reference/concatenation.h",
         "reference/conv.h",
+        "reference/densify.h",
         "reference/depthwiseconv_float.h",
         "reference/depthwiseconv_uint8.h",
         "reference/dequantize.h",
@@ -479,6 +489,7 @@ cc_library(
         ":darwin": tflite_deps_intel,
         ":darwin_x86_64": tflite_deps_intel,
         ":freebsd": tflite_deps_intel,
+        ":windows": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
@@ -494,6 +505,7 @@ cc_library(
         "reference/comparisons.h",
         "reference/concatenation.h",
         "reference/conv.h",
+        "reference/densify.h",
         "reference/depthwiseconv_float.h",
         "reference/depthwiseconv_uint8.h",
         "reference/dequantize.h",
@@ -539,6 +551,7 @@ cc_library(
         ":darwin": tflite_deps_intel,
         ":darwin_x86_64": tflite_deps_intel,
         ":freebsd": tflite_deps_intel,
+        ":windows": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
@@ -736,6 +749,7 @@ cc_library(
         ":freebsd": [
             ":sse_tensor_utils",
         ],
+        ":windows": [":sse_tensor_utils"],
         "//conditions:default": [
             ":portable_tensor_utils",
         ],
@@ -972,6 +986,7 @@ cc_library(
         ":darwin": tflite_deps_intel,
         ":darwin_x86_64": tflite_deps_intel,
         ":freebsd": tflite_deps_intel,
+        ":windows": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
index 9622f30d2ea..7371a9f6904 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <sys/types.h>
-#include <unistd.h>
 
 #include <algorithm>
 #include <cmath>
@@ -962,6 +961,7 @@ void NeonCpuBackendGemm(const int8_t* input, const int32_t* bias,
   lhs_params.order = cpu_backend_gemm::Order::kRowMajor;
   lhs_params.rows = n_output;
   lhs_params.cols = n_input;
+  lhs_params.cacheable = true;
 
   MatrixParams<int8_t> rhs_params;
   rhs_params.order = cpu_backend_gemm::Order::kColMajor;
@@ -1817,54 +1817,6 @@ void NeonSparseMatrixBatchVectorMultiplyAccumulate(
   free(aligned_vec_free);
 }
 
-void NeonVectorVectorCwiseProduct(const float* vector1, const float* vector2,
-                                  int v_size, float* result) {
-  // If v_size is not divisible by the vector size, then we need to process the
-  // final few elements sequentially. postamble_start shows the start index
-  // where this should happen.
-  const int postamble_start =
-      RoundDownVectors<kFloatValuesPerNeonVector>(v_size);
-  int v = 0;
-  for (; v < postamble_start; v += kFloatValuesPerNeonVector) {
-    // Load 4 float values from vector1 and vector2.
-    const float32x4_t v1_f32x4 = vld1q_f32(vector1 + v);
-    const float32x4_t v2_f32x4 = vld1q_f32(vector2 + v);
-    // Vector multiply 4 float
-    const float32x4_t mul_32x4 = vmulq_f32(v1_f32x4, v2_f32x4);
-    // Save to result array.
-    vst1q_f32(result + v, mul_32x4);
-  }
-#pragma clang loop vectorize(disable) unroll(disable)
-  for (; v < v_size; v++) {
-    result[v] = vector1[v] * vector2[v];
-  }
-}
-
-void NeonVectorVectorCwiseProductAccumulate(const float* vector1,
-                                            const float* vector2, int v_size,
-                                            float* result) {
-  // If v_size is not divisible by the vector size, then we need to process the
-  // final few elements sequentially. postamble_start shows the start index
-  // where this should happen.
-  const int postamble_start =
-      RoundDownVectors<kFloatValuesPerNeonVector>(v_size);
-  int v = 0;
-  for (; v < postamble_start; v += kFloatValuesPerNeonVector) {
-    // Load 4 float values from vector1 and vector2 and accumulator.
-    const float32x4_t v1_f32x4 = vld1q_f32(vector1 + v);
-    const float32x4_t v2_f32x4 = vld1q_f32(vector2 + v);
-    float32x4_t acc_32x4 = vld1q_f32(result + v);
-    // Vector multiply-accumulate 4 float
-    acc_32x4 = vmlaq_f32(acc_32x4, v1_f32x4, v2_f32x4);
-    // Save to result array.
-    vst1q_f32(result + v, acc_32x4);
-  }
-#pragma clang loop vectorize(disable) unroll(disable)
-  for (; v < v_size; v++) {
-    result[v] += vector1[v] * vector2[v];
-  }
-}
-
 void NeonSub1Vector(const float* vector, int v_size, float* result) {
   // If v_size is not divisible by the vector size, then we need to process the
   // final few elements sequentially. postamble_start shows the start index
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
index cbb2cab36ac..571d3ff108f 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
@@ -142,11 +142,6 @@ void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch,
   NEON_OR_PORTABLE(CwiseClipping, input, clipping_value, n_batch, n_input);
 }
 
-void VectorVectorCwiseProduct(const float* vector1, const float* vector2,
-                              int v_size, float* result) {
-  NEON_OR_PORTABLE(VectorVectorCwiseProduct, vector1, vector2, v_size, result);
-}
-
 void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
                                       const int16_t* vector2, int v_size,
                                       int n_batch, int32_t* result,
@@ -155,13 +150,6 @@ void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
       vector1, vector2, v_size, n_batch, result, result_stride);
 }
 
-void VectorVectorCwiseProductAccumulate(const float* vector1,
-                                        const float* vector2, int v_size,
-                                        float* result) {
-  NEON_OR_PORTABLE(VectorVectorCwiseProductAccumulate, vector1, vector2, v_size,
-                   result);
-}
-
 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
                                              const int16_t* batch_vector,
                                              int n_batch, int32_t multiplier,
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
index ec98185a7ba..8e604d9b33e 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
@@ -107,16 +107,6 @@ void NeonSparseMatrixBatchVectorMultiplyAccumulate(
     const float* scaling_factors, int n_batch, float* __restrict__ result,
     int result_stride);
 
-// Cwise product of two vectors.
-void NeonVectorVectorCwiseProduct(const float* vector1, const float* vector2,
-                                  int v_size, float* result);
-
-// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
-// assumption here is that result array is initialized to valid values.
-void NeonVectorVectorCwiseProductAccumulate(const float* vector1,
-                                            const float* vector2, int v_size,
-                                            float* result);
-
 // Dot product of two vectors.
 float NeonVectorVectorDotProduct(const float* vector1, const float* vector2,
                                  int v_size);
diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
index 0127645539c..9ceaa2760da 100644
--- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
@@ -152,11 +152,6 @@ void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch,
   PortableCwiseClipping(input, clipping_value, n_batch, n_input);
 }
 
-void VectorVectorCwiseProduct(const float* vector1, const float* vector2,
-                              int v_size, float* result) {
-  NEON_OR_PORTABLE(VectorVectorCwiseProduct, vector1, vector2, v_size, result);
-}
-
 void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
                                       const int16_t* vector2, int v_size,
                                       int n_batch, int32_t* result,
@@ -165,13 +160,6 @@ void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
       vector1, vector2, v_size, n_batch, result, result_stride);
 }
 
-void VectorVectorCwiseProductAccumulate(const float* vector1,
-                                        const float* vector2, int v_size,
-                                        float* result) {
-  NEON_OR_PORTABLE(VectorVectorCwiseProductAccumulate, vector1, vector2, v_size,
-                   result);
-}
-
 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
                                              const int16_t* batch_vector,
                                              int n_batch, int32_t multiplier,
diff --git a/tensorflow/lite/kernels/internal/reference/densify.h b/tensorflow/lite/kernels/internal/reference/densify.h
new file mode 100644
index 00000000000..3b1770796d0
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/densify.h
@@ -0,0 +1,37 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DENSIFY_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DENSIFY_H_
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+namespace reference_ops {
+
+template <typename T>
+inline void Densify(const TfLiteSparsity* sparsity,
+                    const RuntimeShape& input_shape, const T* input_data,
+                    const RuntimeShape& output_shape, T* output_data) {
+  const int flat_size = output_shape.FlatSize();
+  memset(output_data, 0, sizeof(T) * flat_size);
+}
+
+}  // namespace reference_ops
+
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DENSIFY_H_
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
index 1b36144cdff..8648096f0c3 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -504,14 +504,6 @@ void PortableCwiseClipping(int8_t* input, const int8_t clipping_value,
   }
 }
 
-void PortableVectorVectorCwiseProduct(const float* vector1,
-                                      const float* vector2, int v_size,
-                                      float* result) {
-  for (int v = 0; v < v_size; v++) {
-    result[v] = vector1[v] * vector2[v];
-  }
-}
-
 float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
                                      int v_size) {
   float result = 0.0;
@@ -545,14 +537,6 @@ void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
   }
 }
 
-void PortableVectorVectorCwiseProductAccumulate(const float* vector1,
-                                                const float* vector2,
-                                                int v_size, float* result) {
-  for (int v = 0; v < v_size; v++) {
-    result[v] += vector1[v] * vector2[v];
-  }
-}
-
 void PortableVectorBatchVectorCwiseProductAccumulate(
     const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
     int32_t multiplier, int shift, int16_t* result) {
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
index f3f41f704e3..b3f7c0834ca 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
@@ -176,17 +176,6 @@ void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch,
   PortableCwiseClipping(input, clipping_value, n_batch, n_input);
 }
 
-void VectorVectorCwiseProduct(const float* vector1, const float* vector2,
-                              int v_size, float* result) {
-  PortableVectorVectorCwiseProduct(vector1, vector2, v_size, result);
-}
-
-void VectorVectorCwiseProductAccumulate(const float* vector1,
-                                        const float* vector2, int v_size,
-                                        float* result) {
-  PortableVectorVectorCwiseProductAccumulate(vector1, vector2, v_size, result);
-}
-
 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
                                              const int16_t* batch_vector,
                                              int n_batch, int32_t multiplier,
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
index 0398edfa181..96d46eea63f 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@@ -78,17 +78,6 @@ void PortableSparseMatrixBatchVectorMultiplyAccumulate(
     const float* scaling_factors, int n_batch, float* __restrict__ result,
     int result_stride);
 
-// Cwise product of two vectors.
-void PortableVectorVectorCwiseProduct(const float* vector1,
-                                      const float* vector2, int v_size,
-                                      float* result);
-
-// Cwise product and accumulate of two vectors. Since it's a MAC opertation, the
-// assumption here is that result array is initialized to valid values.
-void PortableVectorVectorCwiseProductAccumulate(const float* vector1,
-                                                const float* vector2,
-                                                int v_size, float* result);
-
 // Dot product of two vectors.
 float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
                                      int v_size);
diff --git a/tensorflow/lite/kernels/internal/tensor_utils.h b/tensorflow/lite/kernels/internal/tensor_utils.h
index 76162e3d976..62fe08ba7c0 100644
--- a/tensorflow/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/tensor_utils.h
@@ -314,14 +314,26 @@ void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch,
                    int32_t n_input);
 
 // Cwise product of two vectors.
-void VectorVectorCwiseProduct(const float* vector1, const float* vector2,
-                              int v_size, float* result);
+template <typename T>
+inline void VectorVectorCwiseProduct(const T* __restrict__ vector1,
+                                     const T* __restrict__ vector2, int v_size,
+                                     T* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    *result++ = *vector1++ * *vector2++;
+  }
+}
 
 // Cwise product and accumulate of two vectors. Since it's a MAC opertation, the
 // assumption here is that result array is initialized to valid values.
-void VectorVectorCwiseProductAccumulate(const float* vector1,
-                                        const float* vector2, int v_size,
-                                        float* result);
+template <typename T>
+inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
+                                               const T* __restrict__ vector2,
+                                               int v_size,
+                                               T* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    *result++ += *vector1++ * *vector2++;
+  }
+}
 
 // Dot product of two vectors.
 float VectorVectorDotProduct(const float* vector1, const float* vector2,
diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc
index 202140dea4a..715a530317e 100644
--- a/tensorflow/lite/kernels/kernel_util.cc
+++ b/tensorflow/lite/kernels/kernel_util.cc
@@ -84,8 +84,9 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
     // Populate quantization parameteters with multiplier and shift.
     QuantizeMultiplier(real_multiplier, multiplier, &exponent);
     *shift = -exponent;
-    CalculateActivationRangeUint8(activation, output, output_activation_min,
-                                  output_activation_max);
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, activation, output, output_activation_min,
+        output_activation_max));
   }
   return kTfLiteOk;
 }
@@ -174,26 +175,6 @@ TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
   return kTfLiteOk;
 }
 
-void CalculateActivationRangeUint8(TfLiteFusedActivation activation,
-                                   TfLiteTensor* output, int32_t* act_min,
-                                   int32_t* act_max) {
-  const int32_t qmin = std::numeric_limits<uint8_t>::min();
-  const int32_t qmax = std::numeric_limits<uint8_t>::max();
-
-  CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
-                                        act_max);
-}
-
-void CalculateActivationRangeInt8(TfLiteFusedActivation activation,
-                                  TfLiteTensor* output, int32_t* act_min,
-                                  int32_t* act_max) {
-  const int32_t qmin = std::numeric_limits<int8_t>::min();
-  const int32_t qmax = std::numeric_limits<int8_t>::max();
-
-  CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
-                                        act_max);
-}
-
 bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
   return TfLiteIntArrayEqual(input1->dims, input2->dims);
 }
diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h
index 6155ed6a862..b0caaa539d2 100644
--- a/tensorflow/lite/kernels/kernel_util.h
+++ b/tensorflow/lite/kernels/kernel_util.h
@@ -143,12 +143,7 @@ TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
                                                TfLiteTensor* output,
                                                int32_t* act_min,
                                                int32_t* act_max);
-void CalculateActivationRangeUint8(TfLiteFusedActivation activation,
-                                   TfLiteTensor* output, int32_t* act_min,
-                                   int32_t* act_max);
-void CalculateActivationRangeInt8(TfLiteFusedActivation activation,
-                                  TfLiteTensor* output, int32_t* act_min,
-                                  int32_t* act_max);
+
 // Calculates the useful range of an activation layer given its activation
 // tensor.a
 template <typename T>
diff --git a/tensorflow/lite/kernels/lstm.cc b/tensorflow/lite/kernels/lstm.cc
index bbb9e173a4d..2c9a792cd03 100644
--- a/tensorflow/lite/kernels/lstm.cc
+++ b/tensorflow/lite/kernels/lstm.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/lstm_eval.h"
+#include "tensorflow/lite/kernels/lstm_shared.h"
 
 namespace tflite {
 namespace ops {
@@ -49,68 +50,18 @@ struct OpData {
   TfLiteLSTMKernelType kernel_type;
 
   // If the lstm is layer norm.
-  bool is_layer_norm_lstm;
+  bool use_layer_norm_lstm;
 
   // These fields are only used by full kernel.
   int scratch_tensor_index;
-  lstm_eval::QuantizedLstmParameter quantized_lstm_param;
+  lstm_eval::IntegerLstmParameter integer_lstm_param;
 };
 
-// For full inputs kernel (24-inputs).
-// Please note the 20-input full kernel is deprecated and only kept
-// here for backward compatibility.
 namespace full {
-
-// Input Tensors of size {n_batch, n_input}
-constexpr int kInputTensor = 0;
-
-// Input weight tensors of size: {n_cell, n_input}
-constexpr int kInputToInputWeightsTensor = 1;  // Optional
-constexpr int kInputToForgetWeightsTensor = 2;
-constexpr int kInputToCellWeightsTensor = 3;
-constexpr int kInputToOutputWeightsTensor = 4;
-
-// Recurrent weight tensors of size {n_cell, n_output}
-constexpr int kRecurrentToInputWeightsTensor = 5;  // Optional
-constexpr int kRecurrentToForgetWeightsTensor = 6;
-constexpr int kRecurrentToCellWeightsTensor = 7;
-constexpr int kRecurrentToOutputWeightsTensor = 8;
-
-// Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
-constexpr int kCellToInputWeightsTensor = 9;    // Optional
-constexpr int kCellToForgetWeightsTensor = 10;  // Optional
-constexpr int kCellToOutputWeightsTensor = 11;  // Optional
-
-// Gates bias tensors of size {n_cell}
-constexpr int kInputGateBiasTensor = 12;  // Optional
-constexpr int kForgetGateBiasTensor = 13;
-constexpr int kCellGateBiasTensor = 14;
-constexpr int kOutputGateBiasTensor = 15;
-
-// Projection weight tensor of size {n_output, n_cell}
-constexpr int kProjectionWeightsTensor = 16;  // Optional
-// Projection bias tensor of size {n_output}
-constexpr int kProjectionBiasTensor = 17;  // Optional
-
-// These state tensors are defined as variable tensors, and will be modified by
-// this op.
-constexpr int kInputActivationStateTensor = 18;
-constexpr int kInputCellStateTensor = 19;
-
-// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
-// matrix.
-constexpr int kInputLayerNormCoefficientsTensor = 20;   // Optional
-constexpr int kForgetLayerNormCoefficientsTensor = 21;  // Optional
-constexpr int kCellLayerNormCoefficientsTensor = 22;    // Optional
-constexpr int kOutputLayerNormCoefficientsTensor = 23;  // Optional
-
-// Output tensors.
-constexpr int kOutputTensor = 0;
-
 namespace {
 TfLiteStatus PopulateQuantizedLstmParams(
     TfLiteContext* context, TfLiteNode* node,
-    lstm_eval::QuantizedLstmParameter* quantized_lstm_param) {
+    lstm_eval::IntegerLstmParameter* integer_lstm_param) {
   // Calculate quantized clip for projection and cell.
   const auto* params = static_cast<TfLiteLSTMParams*>(node->builtin_data);
   const float cell_clip = params->cell_clip;
@@ -126,22 +77,22 @@ TfLiteStatus PopulateQuantizedLstmParams(
   auto* proj_params = static_cast<TfLiteAffineQuantization*>(
       output_tensor->quantization.params);
   if (cell_clip > 0.0) {
-    quantized_lstm_param->quantized_cell_clip = static_cast<int32_t>(
+    integer_lstm_param->quantized_cell_clip = static_cast<int32_t>(
         std::min(std::max(cell_clip / cell_params->scale->data[0], -32768.0f),
                  32767.0f));
   } else {
-    quantized_lstm_param->quantized_cell_clip = 0;
+    integer_lstm_param->quantized_cell_clip = 0;
   }
   if (proj_clip > 0.0) {
-    quantized_lstm_param->quantized_proj_clip = static_cast<int32_t>(std::min(
+    integer_lstm_param->quantized_proj_clip = static_cast<int32_t>(std::min(
         std::max(proj_clip / proj_params->scale->data[0], -128.0f), 127.0f));
   } else {
-    quantized_lstm_param->quantized_proj_clip = 0;
+    integer_lstm_param->quantized_proj_clip = 0;
   }
 
   // Calculate effective scales.
   OpData* op_data = static_cast<OpData*>(node->user_data);
-  const bool is_layer_norm_lstm = op_data->is_layer_norm_lstm;
+  const bool use_layer_norm_lstm = op_data->use_layer_norm_lstm;
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
 
@@ -196,7 +147,7 @@ TfLiteStatus PopulateQuantizedLstmParams(
   std::vector<float> intermediate_scale;
   std::vector<int32> intermediate_zp;
   for (int i = 0; i < 4; ++i) {
-    if (is_layer_norm_lstm) {
+    if (use_layer_norm_lstm) {
       const TfLiteTensor* intermediate = GetIntermediates(context, node, i);
       auto* params = static_cast<TfLiteAffineQuantization*>(
           intermediate->quantization.params);
@@ -267,7 +218,7 @@ TfLiteStatus PopulateQuantizedLstmParams(
     cell_to_output_weight_scale = cell_to_output_weights->params.scale;
   }
 
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     if (!use_cifg) {
       layer_norm_input_scale = input_layer_norm_coefficients->params.scale;
     }
@@ -295,7 +246,7 @@ TfLiteStatus PopulateQuantizedLstmParams(
   TF_LITE_ENSURE(context, CheckedLog2(cell_state->params.scale, &cell_scale));
 
   TF_LITE_ENSURE(context, cell_scale <= -9);
-  quantized_lstm_param->cell_scale = cell_scale;
+  integer_lstm_param->cell_scale = cell_scale;
   input_scale = input->params.scale;
 
   // Calculate effective scales.
@@ -345,73 +296,71 @@ TfLiteStatus PopulateQuantizedLstmParams(
 
   // Decompose scales.
   QuantizeMultiplier(effective_input_to_input_scale,
-                     &quantized_lstm_param->effective_input_to_input_scale_a,
-                     &quantized_lstm_param->effective_input_to_input_scale_b);
-  QuantizeMultiplier(
-      effective_recurrent_to_input_scale,
-      &quantized_lstm_param->effective_recurrent_to_input_scale_a,
-      &quantized_lstm_param->effective_recurrent_to_input_scale_b);
+                     &integer_lstm_param->effective_input_to_input_scale_a,
+                     &integer_lstm_param->effective_input_to_input_scale_b);
+  QuantizeMultiplier(effective_recurrent_to_input_scale,
+                     &integer_lstm_param->effective_recurrent_to_input_scale_a,
+                     &integer_lstm_param->effective_recurrent_to_input_scale_b);
   QuantizeMultiplier(effective_cell_to_input_scale,
-                     &quantized_lstm_param->effective_cell_to_input_scale_a,
-                     &quantized_lstm_param->effective_cell_to_input_scale_b);
+                     &integer_lstm_param->effective_cell_to_input_scale_a,
+                     &integer_lstm_param->effective_cell_to_input_scale_b);
   QuantizeMultiplier(effective_input_to_forget_scale,
-                     &quantized_lstm_param->effective_input_to_forget_scale_a,
-                     &quantized_lstm_param->effective_input_to_forget_scale_b);
+                     &integer_lstm_param->effective_input_to_forget_scale_a,
+                     &integer_lstm_param->effective_input_to_forget_scale_b);
   QuantizeMultiplier(
       effective_recurrent_to_forget_scale,
-      &quantized_lstm_param->effective_recurrent_to_forget_scale_a,
-      &quantized_lstm_param->effective_recurrent_to_forget_scale_b);
+      &integer_lstm_param->effective_recurrent_to_forget_scale_a,
+      &integer_lstm_param->effective_recurrent_to_forget_scale_b);
   QuantizeMultiplier(effective_cell_to_forget_scale,
-                     &quantized_lstm_param->effective_cell_to_forget_scale_a,
-                     &quantized_lstm_param->effective_cell_to_forget_scale_b);
+                     &integer_lstm_param->effective_cell_to_forget_scale_a,
+                     &integer_lstm_param->effective_cell_to_forget_scale_b);
   QuantizeMultiplier(effective_input_to_cell_scale,
-                     &quantized_lstm_param->effective_input_to_cell_scale_a,
-                     &quantized_lstm_param->effective_input_to_cell_scale_b);
-  QuantizeMultiplier(
-      effective_recurrent_to_cell_scale,
-      &quantized_lstm_param->effective_recurrent_to_cell_scale_a,
-      &quantized_lstm_param->effective_recurrent_to_cell_scale_b);
+                     &integer_lstm_param->effective_input_to_cell_scale_a,
+                     &integer_lstm_param->effective_input_to_cell_scale_b);
+  QuantizeMultiplier(effective_recurrent_to_cell_scale,
+                     &integer_lstm_param->effective_recurrent_to_cell_scale_a,
+                     &integer_lstm_param->effective_recurrent_to_cell_scale_b);
   QuantizeMultiplier(effective_input_to_output_scale,
-                     &quantized_lstm_param->effective_input_to_output_scale_a,
-                     &quantized_lstm_param->effective_input_to_output_scale_b);
+                     &integer_lstm_param->effective_input_to_output_scale_a,
+                     &integer_lstm_param->effective_input_to_output_scale_b);
   QuantizeMultiplier(
       effective_recurrent_to_output_scale,
-      &quantized_lstm_param->effective_recurrent_to_output_scale_a,
-      &quantized_lstm_param->effective_recurrent_to_output_scale_b);
+      &integer_lstm_param->effective_recurrent_to_output_scale_a,
+      &integer_lstm_param->effective_recurrent_to_output_scale_b);
   QuantizeMultiplier(effective_cell_to_output_scale,
-                     &quantized_lstm_param->effective_cell_to_output_scale_a,
-                     &quantized_lstm_param->effective_cell_to_output_scale_b);
+                     &integer_lstm_param->effective_cell_to_output_scale_a,
+                     &integer_lstm_param->effective_cell_to_output_scale_b);
   QuantizeMultiplier(effective_proj_scale,
-                     &quantized_lstm_param->effective_proj_scale_a,
-                     &quantized_lstm_param->effective_proj_scale_b);
+                     &integer_lstm_param->effective_proj_scale_a,
+                     &integer_lstm_param->effective_proj_scale_b);
   QuantizeMultiplier(effective_hidden_scale,
-                     &quantized_lstm_param->effective_hidden_scale_a,
-                     &quantized_lstm_param->effective_hidden_scale_b);
+                     &integer_lstm_param->effective_hidden_scale_a,
+                     &integer_lstm_param->effective_hidden_scale_b);
   QuantizeMultiplier(layer_norm_input_scale,
-                     &quantized_lstm_param->layer_norm_input_scale_a,
-                     &quantized_lstm_param->layer_norm_input_scale_b);
+                     &integer_lstm_param->layer_norm_input_scale_a,
+                     &integer_lstm_param->layer_norm_input_scale_b);
   QuantizeMultiplier(layer_norm_forget_scale,
-                     &quantized_lstm_param->layer_norm_forget_scale_a,
-                     &quantized_lstm_param->layer_norm_forget_scale_b);
+                     &integer_lstm_param->layer_norm_forget_scale_a,
+                     &integer_lstm_param->layer_norm_forget_scale_b);
   QuantizeMultiplier(layer_norm_cell_scale,
-                     &quantized_lstm_param->layer_norm_cell_scale_a,
-                     &quantized_lstm_param->layer_norm_cell_scale_b);
+                     &integer_lstm_param->layer_norm_cell_scale_a,
+                     &integer_lstm_param->layer_norm_cell_scale_b);
   QuantizeMultiplier(layer_norm_output_scale,
-                     &quantized_lstm_param->layer_norm_output_scale_a,
-                     &quantized_lstm_param->layer_norm_output_scale_b);
+                     &integer_lstm_param->layer_norm_output_scale_a,
+                     &integer_lstm_param->layer_norm_output_scale_b);
 
-  quantized_lstm_param->hidden_zp = intermediate_zp[4];
+  integer_lstm_param->hidden_zp = intermediate_zp[4];
 
   // 10000 is used to make sure the kernel logic does not overflow.
   if (!use_cifg) {
-    quantized_lstm_param->inv_large_value[0] =
+    integer_lstm_param->inv_large_value[0] =
         std::min(1, static_cast<int32_t>(10000 * layer_norm_input_scale));
   }
-  quantized_lstm_param->inv_large_value[1] =
+  integer_lstm_param->inv_large_value[1] =
       std::min(1, static_cast<int32_t>(10000 * layer_norm_forget_scale));
-  quantized_lstm_param->inv_large_value[2] =
+  integer_lstm_param->inv_large_value[2] =
       std::min(1, static_cast<int32_t>(10000 * layer_norm_cell_scale));
-  quantized_lstm_param->inv_large_value[3] =
+  integer_lstm_param->inv_large_value[3] =
       std::min(1, static_cast<int32_t>(10000 * layer_norm_output_scale));
 
   return kTfLiteOk;
@@ -431,8 +380,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
                                         TfLiteNode* node, int n_input,
                                         int n_output, int n_cell,
-                                        bool is_layer_norm_lstm,
-                                        bool is_fully_quantized) {
+                                        bool use_layer_norm_lstm,
+                                        bool is_integer) {
   const auto* params = static_cast<TfLiteLSTMParams*>(node->builtin_data);
 
   // Making sure clipping parameters have valid values.
@@ -516,7 +465,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(
         context, cell_to_input_weights->type,
-        is_fully_quantized ? kTfLiteInt16 : input_to_forget_weights->type);
+        is_integer ? kTfLiteInt16 : input_to_forget_weights->type);
   }
 
   const TfLiteTensor* cell_to_forget_weights =
@@ -526,7 +475,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(
         context, cell_to_forget_weights->type,
-        is_fully_quantized ? kTfLiteInt16 : input_to_forget_weights->type);
+        is_integer ? kTfLiteInt16 : input_to_forget_weights->type);
   }
 
   const TfLiteTensor* cell_to_output_weights =
@@ -536,7 +485,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(
         context, cell_to_output_weights->type,
-        is_fully_quantized ? kTfLiteInt16 : input_to_forget_weights->type);
+        is_integer ? kTfLiteInt16 : input_to_forget_weights->type);
   }
 
   // Making sure the peephole weights are there all or none.
@@ -557,7 +506,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   } else {
     TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell);
-    if (is_fully_quantized) {
+    if (is_integer) {
       TF_LITE_ENSURE_EQ(context, input_gate_bias->type, kTfLiteInt32);
     } else {
       TF_LITE_ENSURE_EQ(context, input_gate_bias->type, kTfLiteFloat32);
@@ -568,7 +517,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
       GetInput(context, node, kForgetGateBiasTensor);
   TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell);
-  if (is_fully_quantized) {
+  if (is_integer) {
     TF_LITE_ENSURE_EQ(context, forget_gate_bias->type, kTfLiteInt32);
   } else {
     TF_LITE_ENSURE_EQ(context, forget_gate_bias->type, kTfLiteFloat32);
@@ -577,7 +526,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   const TfLiteTensor* cell_bias = GetInput(context, node, kCellGateBiasTensor);
   TF_LITE_ENSURE_EQ(context, cell_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, cell_bias->dims->data[0], n_cell);
-  if (is_fully_quantized) {
+  if (is_integer) {
     TF_LITE_ENSURE_EQ(context, cell_bias->type, kTfLiteInt32);
   } else {
     TF_LITE_ENSURE_EQ(context, cell_bias->type, kTfLiteFloat32);
@@ -587,7 +536,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
       GetInput(context, node, kOutputGateBiasTensor);
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell);
-  if (is_fully_quantized) {
+  if (is_integer) {
     TF_LITE_ENSURE_EQ(context, output_gate_bias->type, kTfLiteInt32);
   } else {
     TF_LITE_ENSURE_EQ(context, output_gate_bias->type, kTfLiteFloat32);
@@ -608,7 +557,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   if (projection_bias != nullptr) {
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output);
-    if (is_fully_quantized) {
+    if (is_integer) {
       TF_LITE_ENSURE_EQ(context, projection_bias->type, kTfLiteInt32);
     } else {
       TF_LITE_ENSURE_EQ(context, projection_bias->type, kTfLiteFloat32);
@@ -624,7 +573,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
       ((projection_weights != nullptr) || (projection_bias == nullptr));
   TF_LITE_ENSURE(context, projection_tensors_consistent == true);
 
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     const TfLiteTensor* input_layer_norm_coefficients = GetOptionalInputTensor(
         context, node, kInputLayerNormCoefficientsTensor);
     if (use_cifg) {
@@ -634,7 +583,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
       TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->size, 1);
       TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->data[0],
                         n_cell);
-      if (is_fully_quantized) {
+      if (is_integer) {
         TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->type,
                           kTfLiteInt16);
       } else {
@@ -649,7 +598,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->data[0],
                       n_cell);
-    if (is_fully_quantized) {
+    if (is_integer) {
       TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->type,
                         kTfLiteInt16);
     } else {
@@ -663,7 +612,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->data[0],
                       n_cell);
-    if (is_fully_quantized) {
+    if (is_integer) {
       TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->type,
                         kTfLiteInt16);
     } else {
@@ -677,7 +626,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->data[0],
                       n_cell);
-    if (is_fully_quantized) {
+    if (is_integer) {
       TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->type,
                         kTfLiteInt16);
     } else {
@@ -750,8 +699,8 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
   const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, kProjectionBiasTensor);
 
-  lstm_eval::QuantizedLstmParameter* quantized_lstm_params =
-      &op_data->quantized_lstm_param;
+  lstm_eval::IntegerLstmParameter* integer_lstm_params =
+      &op_data->integer_lstm_param;
 
   const TfLiteTensor* intermediate =
       &context->tensors[node->intermediates->data[4]];
@@ -763,7 +712,7 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
   // When there is layer normalization, the gate bias does not apply to matmul
   // directly:
   //      y = ln(w * x + w * r + w * c) + b.
-  const bool is_layer_norm = op_data->is_layer_norm_lstm;
+  const bool is_layer_norm = op_data->use_layer_norm_lstm;
 
   // Forget gate.
   const TfLiteTensor* forget_gate_bias =
@@ -772,13 +721,13 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, input_zero_point, input_to_forget_weights, forget_gate_bias,
-          &(quantized_lstm_params->input_to_forget_effective_bias)));
+          &(integer_lstm_params->input_to_forget_effective_bias)));
 
   TF_LITE_ENSURE_OK(
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, activation_zero_point, recurrent_to_forget_weights, nullptr,
-          &(quantized_lstm_params->recurrent_to_forget_effective_bias)));
+          &(integer_lstm_params->recurrent_to_forget_effective_bias)));
 
   // Modulation gate.
   const TfLiteTensor* cell_gate_bias =
@@ -787,12 +736,12 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, input_zero_point, input_to_cell_weights, cell_gate_bias,
-          &(quantized_lstm_params->input_to_cell_effective_bias)));
+          &(integer_lstm_params->input_to_cell_effective_bias)));
   TF_LITE_ENSURE_OK(
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, activation_zero_point, recurrent_to_cell_weights, nullptr,
-          &(quantized_lstm_params->recurrent_to_cell_effective_bias)));
+          &(integer_lstm_params->recurrent_to_cell_effective_bias)));
 
   // Output gate.
   const TfLiteTensor* output_gate_bias =
@@ -801,13 +750,13 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, input_zero_point, input_to_output_weights, output_gate_bias,
-          &(quantized_lstm_params->input_to_output_effective_bias)));
+          &(integer_lstm_params->input_to_output_effective_bias)));
 
   TF_LITE_ENSURE_OK(
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, activation_zero_point, recurrent_to_output_weights, nullptr,
-          &(quantized_lstm_params->recurrent_to_output_effective_bias)));
+          &(integer_lstm_params->recurrent_to_output_effective_bias)));
 
   // Input gate. The calculation is only meaningful for non-cifg case.
   const TfLiteTensor* input_gate_bias =
@@ -816,18 +765,18 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, input_zero_point, input_to_input_weights, input_gate_bias,
-          &(quantized_lstm_params->input_to_input_effective_bias)));
+          &(integer_lstm_params->input_to_input_effective_bias)));
   TF_LITE_ENSURE_OK(
       context,
       PrecomputeZeroPointTimesWeightWithBias(
           context, activation_zero_point, recurrent_to_input_weights, nullptr,
-          &(quantized_lstm_params->recurrent_to_input_effective_bias)));
+          &(integer_lstm_params->recurrent_to_input_effective_bias)));
 
   // Projection bias. The calculation is only meaningful for with projection.
   TF_LITE_ENSURE_OK(context,
                     PrecomputeZeroPointTimesWeightWithBias(
                         context, hidden_zp, projection_weights, projection_bias,
-                        &(quantized_lstm_params->projection_effective_bias)));
+                        &(integer_lstm_params->projection_effective_bias)));
   return kTfLiteOk;
 }
 
@@ -849,13 +798,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     const TfLiteTensor* forget_layer_norm_coefficients = GetOptionalInputTensor(
         context, node, kForgetLayerNormCoefficientsTensor);
     if (forget_layer_norm_coefficients == nullptr) {
-      op_data->is_layer_norm_lstm = false;
+      op_data->use_layer_norm_lstm = false;
     } else {
-      op_data->is_layer_norm_lstm = true;
+      op_data->use_layer_norm_lstm = true;
     }
   } else if (node->inputs->size == 20) {
     // This is deprecated and is only kept here for backward compatibility.
-    op_data->is_layer_norm_lstm = false;
+    op_data->use_layer_norm_lstm = false;
   } else {
     context->ReportError(
         context, "The LSTM Full kernel expects 20 or 24 inputs. Got %d inputs",
@@ -863,12 +812,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     return kTfLiteError;
   }
 
-  const bool is_layer_norm_lstm = op_data->is_layer_norm_lstm;
+  const bool use_layer_norm_lstm = op_data->use_layer_norm_lstm;
 
   // Inferring batch size, number of outputs and number of cells from the
   // input tensors.
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const bool is_fully_quantized = input->type == kTfLiteInt8;
+  const bool is_integer = input->type == kTfLiteInt8;
   TF_LITE_ENSURE(context, input->dims->size > 1);
   const int n_batch = input->dims->data[0];
   const int n_input = input->dims->data[1];
@@ -889,7 +838,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Check that input tensor dimensions matches with each other.
   TF_LITE_ENSURE_OK(context, CheckInputTensorDimensions(
                                  context, node, n_input, n_output, n_cell,
-                                 is_layer_norm_lstm, is_fully_quantized));
+                                 use_layer_norm_lstm, is_integer));
 
   // Get the pointer to output, activation_state and cell_state tensors.
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
@@ -920,7 +869,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TfLiteIntArrayFree(node->temporaries);
   if (is_hybrid_op) {
     node->temporaries = TfLiteIntArrayCreate(7);
-  } else if (is_fully_quantized) {
+  } else if (is_integer) {
     node->temporaries = TfLiteIntArrayCreate(6);
   } else {
     node->temporaries = TfLiteIntArrayCreate(1);
@@ -929,7 +878,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Create a scratch buffer tensor for float case and hybrid case.
   // TODO(jianlijianli): Create a is_float boolean and reorginze the temporary
   // buffer allocation logic.
-  if (!is_fully_quantized) {
+  if (!is_integer) {
     node->temporaries->data[0] = op_data->scratch_tensor_index;
     TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0);
     scratch_buffer->type = input->type;
@@ -1037,9 +986,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     }
   }
 
-  if (is_fully_quantized) {
+  if (is_integer) {
     // Populate quantization parameters.
-    PopulateQuantizedLstmParams(context, node, &op_data->quantized_lstm_param);
+    PopulateQuantizedLstmParams(context, node, &op_data->integer_lstm_param);
 
     // Allocate scratch buffer. Need 6 16bit buffer with size n_batch * n_cell
     // and 1 8bit buffer with size n_batch * n_cell. We also need 1 32 bit
@@ -1219,9 +1168,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
             forget_layer_norm_coefficients, cell_layer_norm_coefficients,
             output_layer_norm_coefficients, input_gate_bias, forget_gate_bias,
             cell_bias, output_gate_bias, projection_weights, projection_bias,
-            params, &op_data->quantized_lstm_param, activation_state,
-            cell_state, output, scratch0, scratch1, scratch2, scratch3,
-            scratch4, scratch5, CpuBackendContext::GetFromContext(context));
+            params, &op_data->integer_lstm_param, activation_state, cell_state,
+            output, scratch0, scratch1, scratch2, scratch3, scratch4, scratch5,
+            CpuBackendContext::GetFromContext(context));
         return kTfLiteOk;
       }
     }
diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc
index f4d89691efc..3208184f9b8 100644
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@@ -134,12 +134,12 @@ inline void LstmStepWithAuxInput(
   // check the existence of only one to the get the condition.
   const bool use_cifg = (input_to_input_weights_ptr == nullptr);
   const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
-  const bool is_layer_norm_lstm =
+  const bool use_layer_norm_lstm =
       (forget_layer_norm_coefficients_ptr != nullptr);
 
   // Initialize scratch buffers with bias for regular lstm or initialize with
   // zero for layer norm lstm.
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     if (!use_cifg) {
       std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
     }
@@ -221,7 +221,7 @@ inline void LstmStepWithAuxInput(
           cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch,
           input_gate_scratch);
     }
-    if (is_layer_norm_lstm) {
+    if (use_layer_norm_lstm) {
       tensor_utils::MeanStddevNormalization(
           input_gate_scratch, input_gate_scratch, n_cell, n_batch);
       tensor_utils::VectorBatchVectorCwiseProduct(
@@ -240,7 +240,7 @@ inline void LstmStepWithAuxInput(
         cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch,
         forget_gate_scratch);
   }
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     tensor_utils::MeanStddevNormalization(forget_gate_scratch,
                                           forget_gate_scratch, n_cell, n_batch);
     tensor_utils::VectorBatchVectorCwiseProduct(
@@ -255,7 +255,7 @@ inline void LstmStepWithAuxInput(
   // For each batch and cell: update the cell.
   tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
                                          n_batch * n_cell, cell_state_ptr);
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     tensor_utils::MeanStddevNormalization(cell_scratch, cell_scratch, n_cell,
                                           n_batch);
     tensor_utils::VectorBatchVectorCwiseProduct(
@@ -286,7 +286,7 @@ inline void LstmStepWithAuxInput(
         cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch,
         output_gate_scratch);
   }
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     tensor_utils::MeanStddevNormalization(output_gate_scratch,
                                           output_gate_scratch, n_cell, n_batch);
     tensor_utils::VectorBatchVectorCwiseProduct(
@@ -461,11 +461,11 @@ inline void LstmStepWithAuxInput(
   // can check the existence of only one to the get the condition.
   const bool use_cifg = (input_to_input_weights_ptr == nullptr);
   const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
-  const bool is_layer_norm_lstm =
+  const bool use_layer_norm_lstm =
       (forget_layer_norm_coefficients_ptr != nullptr);
 
   // Initialize scratch buffers with bias.
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     if (!use_cifg) {
       std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
     }
@@ -646,7 +646,7 @@ inline void LstmStepWithAuxInput(
           recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
           input_gate_scratch);
     }
-    if (is_layer_norm_lstm) {
+    if (use_layer_norm_lstm) {
       tensor_utils::MeanStddevNormalization(
           input_gate_scratch, input_gate_scratch, n_cell, n_batch);
       tensor_utils::VectorBatchVectorCwiseProduct(
@@ -668,7 +668,7 @@ inline void LstmStepWithAuxInput(
         recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
         forget_gate_scratch);
   }
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     tensor_utils::MeanStddevNormalization(forget_gate_scratch,
                                           forget_gate_scratch, n_cell, n_batch);
     tensor_utils::VectorBatchVectorCwiseProduct(
@@ -685,7 +685,7 @@ inline void LstmStepWithAuxInput(
     tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
                                            n_batch * n_cell, cell_state_ptr);
   }
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     tensor_utils::MeanStddevNormalization(cell_scratch, cell_scratch, n_cell,
                                           n_batch);
     tensor_utils::VectorBatchVectorCwiseProduct(
@@ -721,7 +721,7 @@ inline void LstmStepWithAuxInput(
         recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
         output_gate_scratch);
   }
-  if (is_layer_norm_lstm) {
+  if (use_layer_norm_lstm) {
     tensor_utils::MeanStddevNormalization(output_gate_scratch,
                                           output_gate_scratch, n_cell, n_batch);
     tensor_utils::VectorBatchVectorCwiseProduct(
@@ -886,7 +886,7 @@ inline void LstmStepWithAuxInput(
 //   output_state_ptr - size 'n_batch * n_output'
 //   cell_state_ptr   - size 'n_batch * n_cell'
 //   output_ptr       - size 'n_batch * n_output'
-inline void LstmStepQuantized(
+inline void LstmStepInteger(
     const int8_t* input_ptr, const int8_t* input_to_input_weight_ptr,
     int32_t effective_input_to_input_scale_a,
     int32_t effective_input_to_input_scale_b,
@@ -1551,7 +1551,7 @@ TfLiteStatus EvalInteger(
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
     const TfLiteLSTMParams* params,
-    const lstm_eval::QuantizedLstmParameter* quantized_lstm_param,
+    const lstm_eval::IntegerLstmParameter* integer_lstm_param,
     TfLiteTensor* activation_state, TfLiteTensor* cell_state,
     TfLiteTensor* output, TfLiteTensor* scratch0, TfLiteTensor* scratch1,
     TfLiteTensor* scratch2, TfLiteTensor* scratch3, TfLiteTensor* scratch4,
@@ -1584,75 +1584,74 @@ TfLiteStatus EvalInteger(
     const int t_rel = t;
     int8_t* output_ptr = GetTensorData<int8_t>(output) + t_rel * output_step;
     const int8_t* input_ptr = GetTensorData<int8_t>(input) + t_rel * input_step;
-    LstmStepQuantized(
+    LstmStepInteger(
         input_ptr, GetTensorData<int8_t>(input_to_input_weights),
-        quantized_lstm_param->effective_input_to_input_scale_a,
-        quantized_lstm_param->effective_input_to_input_scale_b,
+        integer_lstm_param->effective_input_to_input_scale_a,
+        integer_lstm_param->effective_input_to_input_scale_b,
         GetTensorData<int8_t>(input_to_forget_weights),
-        quantized_lstm_param->effective_input_to_forget_scale_a,
-        quantized_lstm_param->effective_input_to_forget_scale_b,
+        integer_lstm_param->effective_input_to_forget_scale_a,
+        integer_lstm_param->effective_input_to_forget_scale_b,
         GetTensorData<int8_t>(input_to_cell_weights),
-        quantized_lstm_param->effective_input_to_cell_scale_a,
-        quantized_lstm_param->effective_input_to_cell_scale_b,
+        integer_lstm_param->effective_input_to_cell_scale_a,
+        integer_lstm_param->effective_input_to_cell_scale_b,
         GetTensorData<int8_t>(input_to_output_weights),
-        quantized_lstm_param->effective_input_to_output_scale_a,
-        quantized_lstm_param->effective_input_to_output_scale_b,
+        integer_lstm_param->effective_input_to_output_scale_a,
+        integer_lstm_param->effective_input_to_output_scale_b,
         GetTensorData<int8_t>(recurrent_to_input_weights),
-        quantized_lstm_param->effective_recurrent_to_input_scale_a,
-        quantized_lstm_param->effective_recurrent_to_input_scale_b,
+        integer_lstm_param->effective_recurrent_to_input_scale_a,
+        integer_lstm_param->effective_recurrent_to_input_scale_b,
         GetTensorData<int8_t>(recurrent_to_forget_weights),
-        quantized_lstm_param->effective_recurrent_to_forget_scale_a,
-        quantized_lstm_param->effective_recurrent_to_forget_scale_b,
+        integer_lstm_param->effective_recurrent_to_forget_scale_a,
+        integer_lstm_param->effective_recurrent_to_forget_scale_b,
         GetTensorData<int8_t>(recurrent_to_cell_weights),
-        quantized_lstm_param->effective_recurrent_to_cell_scale_a,
-        quantized_lstm_param->effective_recurrent_to_cell_scale_b,
+        integer_lstm_param->effective_recurrent_to_cell_scale_a,
+        integer_lstm_param->effective_recurrent_to_cell_scale_b,
         GetTensorData<int8_t>(recurrent_to_output_weights),
-        quantized_lstm_param->effective_recurrent_to_output_scale_a,
-        quantized_lstm_param->effective_recurrent_to_output_scale_b,
+        integer_lstm_param->effective_recurrent_to_output_scale_a,
+        integer_lstm_param->effective_recurrent_to_output_scale_b,
         GetTensorData<int16_t>(cell_to_input_weights),
-        quantized_lstm_param->effective_cell_to_input_scale_a,
-        quantized_lstm_param->effective_cell_to_input_scale_b,
+        integer_lstm_param->effective_cell_to_input_scale_a,
+        integer_lstm_param->effective_cell_to_input_scale_b,
         GetTensorData<int16_t>(cell_to_forget_weights),
-        quantized_lstm_param->effective_cell_to_forget_scale_a,
-        quantized_lstm_param->effective_cell_to_forget_scale_b,
+        integer_lstm_param->effective_cell_to_forget_scale_a,
+        integer_lstm_param->effective_cell_to_forget_scale_b,
         GetTensorData<int16_t>(cell_to_output_weights),
-        quantized_lstm_param->effective_cell_to_output_scale_a,
-        quantized_lstm_param->effective_cell_to_output_scale_b,
+        integer_lstm_param->effective_cell_to_output_scale_a,
+        integer_lstm_param->effective_cell_to_output_scale_b,
         GetTensorData<int8_t>(projection_weights),
-        quantized_lstm_param->effective_proj_scale_a,
-        quantized_lstm_param->effective_proj_scale_b,
-        quantized_lstm_param->hidden_zp,
-        quantized_lstm_param->effective_hidden_scale_a,
-        quantized_lstm_param->effective_hidden_scale_b,
+        integer_lstm_param->effective_proj_scale_a,
+        integer_lstm_param->effective_proj_scale_b,
+        integer_lstm_param->hidden_zp,
+        integer_lstm_param->effective_hidden_scale_a,
+        integer_lstm_param->effective_hidden_scale_b,
         GetTensorData<int16_t>(input_layer_norm_coefficients),
-        quantized_lstm_param->layer_norm_input_scale_a,
-        quantized_lstm_param->layer_norm_input_scale_b,
+        integer_lstm_param->layer_norm_input_scale_a,
+        integer_lstm_param->layer_norm_input_scale_b,
         GetTensorData<int16_t>(forget_layer_norm_coefficients),
-        quantized_lstm_param->layer_norm_forget_scale_a,
-        quantized_lstm_param->layer_norm_forget_scale_b,
+        integer_lstm_param->layer_norm_forget_scale_a,
+        integer_lstm_param->layer_norm_forget_scale_b,
         GetTensorData<int16_t>(cell_layer_norm_coefficients),
-        quantized_lstm_param->layer_norm_cell_scale_a,
-        quantized_lstm_param->layer_norm_cell_scale_b,
+        integer_lstm_param->layer_norm_cell_scale_a,
+        integer_lstm_param->layer_norm_cell_scale_b,
         GetTensorData<int16_t>(output_layer_norm_coefficients),
-        quantized_lstm_param->layer_norm_output_scale_a,
-        quantized_lstm_param->layer_norm_output_scale_b,
+        integer_lstm_param->layer_norm_output_scale_a,
+        integer_lstm_param->layer_norm_output_scale_b,
         GetTensorData<int32_t>(input_gate_bias),
         GetTensorData<int32_t>(forget_gate_bias),
         GetTensorData<int32_t>(cell_bias),
         GetTensorData<int32_t>(output_gate_bias),
-        quantized_lstm_param->quantized_cell_clip,
-        quantized_lstm_param->quantized_proj_clip,
-        quantized_lstm_param->cell_scale,
-        quantized_lstm_param->inv_large_value.data(),
-        quantized_lstm_param->input_to_forget_effective_bias.get(),
-        quantized_lstm_param->recurrent_to_forget_effective_bias.get(),
-        quantized_lstm_param->input_to_cell_effective_bias.get(),
-        quantized_lstm_param->recurrent_to_cell_effective_bias.get(),
-        quantized_lstm_param->input_to_output_effective_bias.get(),
-        quantized_lstm_param->recurrent_to_output_effective_bias.get(),
-        quantized_lstm_param->input_to_input_effective_bias.get(),
-        quantized_lstm_param->recurrent_to_input_effective_bias.get(),
-        quantized_lstm_param->projection_effective_bias.get(), n_batch, n_cell,
+        integer_lstm_param->quantized_cell_clip,
+        integer_lstm_param->quantized_proj_clip, integer_lstm_param->cell_scale,
+        integer_lstm_param->inv_large_value.data(),
+        integer_lstm_param->input_to_forget_effective_bias.get(),
+        integer_lstm_param->recurrent_to_forget_effective_bias.get(),
+        integer_lstm_param->input_to_cell_effective_bias.get(),
+        integer_lstm_param->recurrent_to_cell_effective_bias.get(),
+        integer_lstm_param->input_to_output_effective_bias.get(),
+        integer_lstm_param->recurrent_to_output_effective_bias.get(),
+        integer_lstm_param->input_to_input_effective_bias.get(),
+        integer_lstm_param->recurrent_to_input_effective_bias.get(),
+        integer_lstm_param->projection_effective_bias.get(), n_batch, n_cell,
         n_input, n_output, GetTensorData<int8_t>(activation_state),
         activation_zp, GetTensorData<int16_t>(cell_state), output_ptr,
         GetTensorData<int16_t>(scratch0), GetTensorData<int16_t>(scratch1),
diff --git a/tensorflow/lite/kernels/lstm_eval.h b/tensorflow/lite/kernels/lstm_eval.h
index 4b9832cf03d..e0349b36309 100644
--- a/tensorflow/lite/kernels/lstm_eval.h
+++ b/tensorflow/lite/kernels/lstm_eval.h
@@ -29,8 +29,8 @@ namespace builtin {
 namespace lstm_eval {
 
 // Pamameters for quantized lstm.
-struct QuantizedLstmParameter {
-  QuantizedLstmParameter() : inv_large_value(4) {}
+struct IntegerLstmParameter {
+  IntegerLstmParameter() : inv_large_value(4) {}
   int32_t effective_input_to_input_scale_a;
   int32_t effective_input_to_input_scale_b;
   int32_t effective_recurrent_to_input_scale_a;
@@ -173,7 +173,7 @@ TfLiteStatus EvalInteger(
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
     const TfLiteLSTMParams* params,
-    const lstm_eval::QuantizedLstmParameter* quantized_lstm_param,
+    const lstm_eval::IntegerLstmParameter* integer_lstm_param,
     TfLiteTensor* activation_state, TfLiteTensor* cell_state,
     TfLiteTensor* output, TfLiteTensor* scratch0, TfLiteTensor* scratch1,
     TfLiteTensor* scratch2, TfLiteTensor* scratch3, TfLiteTensor* scratch4,
diff --git a/tensorflow/lite/kernels/lstm_eval_test.cc b/tensorflow/lite/kernels/lstm_eval_test.cc
index 2cbf83a0b77..2d347e1616a 100644
--- a/tensorflow/lite/kernels/lstm_eval_test.cc
+++ b/tensorflow/lite/kernels/lstm_eval_test.cc
@@ -146,81 +146,85 @@ class QuantizedLstmParam {
   }
 
   // Set up quantization parameters.
-  ops::builtin::lstm_eval::QuantizedLstmParameter* GetQuantParam() {
-    quant_lstm_parm_.effective_input_to_input_scale_a = 1808677632;
-    quant_lstm_parm_.effective_input_to_input_scale_b = -1;
-    quant_lstm_parm_.effective_recurrent_to_input_scale_a = 1078887680;
-    quant_lstm_parm_.effective_recurrent_to_input_scale_b = -1;
-    quant_lstm_parm_.effective_cell_to_input_scale_a = 1073741824;
-    quant_lstm_parm_.effective_cell_to_input_scale_b = 1;
-    quant_lstm_parm_.effective_input_to_forget_scale_a = 1845996800;
-    quant_lstm_parm_.effective_input_to_forget_scale_b = -3;
-    quant_lstm_parm_.effective_recurrent_to_forget_scale_a = 1477412736;
-    quant_lstm_parm_.effective_recurrent_to_forget_scale_b = -2;
-    quant_lstm_parm_.effective_cell_to_forget_scale_a = 1073741824;
-    quant_lstm_parm_.effective_cell_to_forget_scale_b = 1;
-    quant_lstm_parm_.effective_input_to_cell_scale_a = 1648385408;
-    quant_lstm_parm_.effective_input_to_cell_scale_b = -2;
-    quant_lstm_parm_.effective_recurrent_to_cell_scale_a = 1185544192,
-    quant_lstm_parm_.effective_recurrent_to_cell_scale_b = -1;
-    quant_lstm_parm_.effective_input_to_output_scale_a = 1328153600;
-    quant_lstm_parm_.effective_input_to_output_scale_b = -1;
-    quant_lstm_parm_.effective_recurrent_to_output_scale_a = 1479582592;
-    quant_lstm_parm_.effective_recurrent_to_output_scale_b = -1;
-    quant_lstm_parm_.effective_cell_to_output_scale_a = 1073741824,
-    quant_lstm_parm_.effective_cell_to_output_scale_b = 1;
-    quant_lstm_parm_.effective_proj_scale_a = 1105682560;
-    quant_lstm_parm_.effective_proj_scale_b = -8;
-    quant_lstm_parm_.effective_hidden_scale_a = 0;
-    quant_lstm_parm_.effective_hidden_scale_b = 0;
-    quant_lstm_parm_.layer_norm_input_scale_a = 2011617664;
-    quant_lstm_parm_.layer_norm_input_scale_b = -11;
-    quant_lstm_parm_.layer_norm_forget_scale_a = 1968024960;
-    quant_lstm_parm_.layer_norm_forget_scale_b = -13;
-    quant_lstm_parm_.layer_norm_cell_scale_a = 1097334528,
-    quant_lstm_parm_.layer_norm_cell_scale_b = -12;
-    quant_lstm_parm_.layer_norm_output_scale_a = 1837163008;
-    quant_lstm_parm_.layer_norm_output_scale_b = -12;
-    quant_lstm_parm_.quantized_cell_clip = 20480;
-    quant_lstm_parm_.quantized_proj_clip = 0;
-    quant_lstm_parm_.cell_scale = -11;
-    quant_lstm_parm_.inv_large_value[0] = 1;
-    quant_lstm_parm_.inv_large_value[1] = 2;
-    quant_lstm_parm_.inv_large_value[2] = 2;
-    quant_lstm_parm_.inv_large_value[3] = 1;
-    quant_lstm_parm_.hidden_zp = 0;
-    quant_lstm_parm_.input_to_forget_effective_bias.reset(new int32_t[n_cell_]);
-    quant_lstm_parm_.recurrent_to_forget_effective_bias.reset(
+  ops::builtin::lstm_eval::IntegerLstmParameter* GetQuantParam() {
+    integer_lstm_param_.effective_input_to_input_scale_a = 1808677632;
+    integer_lstm_param_.effective_input_to_input_scale_b = -1;
+    integer_lstm_param_.effective_recurrent_to_input_scale_a = 1078887680;
+    integer_lstm_param_.effective_recurrent_to_input_scale_b = -1;
+    integer_lstm_param_.effective_cell_to_input_scale_a = 1073741824;
+    integer_lstm_param_.effective_cell_to_input_scale_b = 1;
+    integer_lstm_param_.effective_input_to_forget_scale_a = 1845996800;
+    integer_lstm_param_.effective_input_to_forget_scale_b = -3;
+    integer_lstm_param_.effective_recurrent_to_forget_scale_a = 1477412736;
+    integer_lstm_param_.effective_recurrent_to_forget_scale_b = -2;
+    integer_lstm_param_.effective_cell_to_forget_scale_a = 1073741824;
+    integer_lstm_param_.effective_cell_to_forget_scale_b = 1;
+    integer_lstm_param_.effective_input_to_cell_scale_a = 1648385408;
+    integer_lstm_param_.effective_input_to_cell_scale_b = -2;
+    integer_lstm_param_.effective_recurrent_to_cell_scale_a = 1185544192,
+    integer_lstm_param_.effective_recurrent_to_cell_scale_b = -1;
+    integer_lstm_param_.effective_input_to_output_scale_a = 1328153600;
+    integer_lstm_param_.effective_input_to_output_scale_b = -1;
+    integer_lstm_param_.effective_recurrent_to_output_scale_a = 1479582592;
+    integer_lstm_param_.effective_recurrent_to_output_scale_b = -1;
+    integer_lstm_param_.effective_cell_to_output_scale_a = 1073741824,
+    integer_lstm_param_.effective_cell_to_output_scale_b = 1;
+    integer_lstm_param_.effective_proj_scale_a = 1105682560;
+    integer_lstm_param_.effective_proj_scale_b = -8;
+    integer_lstm_param_.effective_hidden_scale_a = 0;
+    integer_lstm_param_.effective_hidden_scale_b = 0;
+    integer_lstm_param_.layer_norm_input_scale_a = 2011617664;
+    integer_lstm_param_.layer_norm_input_scale_b = -11;
+    integer_lstm_param_.layer_norm_forget_scale_a = 1968024960;
+    integer_lstm_param_.layer_norm_forget_scale_b = -13;
+    integer_lstm_param_.layer_norm_cell_scale_a = 1097334528,
+    integer_lstm_param_.layer_norm_cell_scale_b = -12;
+    integer_lstm_param_.layer_norm_output_scale_a = 1837163008;
+    integer_lstm_param_.layer_norm_output_scale_b = -12;
+    integer_lstm_param_.quantized_cell_clip = 20480;
+    integer_lstm_param_.quantized_proj_clip = 0;
+    integer_lstm_param_.cell_scale = -11;
+    integer_lstm_param_.inv_large_value[0] = 1;
+    integer_lstm_param_.inv_large_value[1] = 2;
+    integer_lstm_param_.inv_large_value[2] = 2;
+    integer_lstm_param_.inv_large_value[3] = 1;
+    integer_lstm_param_.hidden_zp = 0;
+    integer_lstm_param_.input_to_forget_effective_bias.reset(
         new int32_t[n_cell_]);
-    quant_lstm_parm_.input_to_cell_effective_bias.reset(new int32_t[n_cell_]);
-    quant_lstm_parm_.recurrent_to_cell_effective_bias.reset(
+    integer_lstm_param_.recurrent_to_forget_effective_bias.reset(
         new int32_t[n_cell_]);
-    quant_lstm_parm_.input_to_output_effective_bias.reset(new int32_t[n_cell_]);
-    quant_lstm_parm_.recurrent_to_output_effective_bias.reset(
+    integer_lstm_param_.input_to_cell_effective_bias.reset(
         new int32_t[n_cell_]);
-    quant_lstm_parm_.input_to_input_effective_bias.reset(new int32_t[n_cell_]);
-    quant_lstm_parm_.recurrent_to_input_effective_bias.reset(
+    integer_lstm_param_.recurrent_to_cell_effective_bias.reset(
         new int32_t[n_cell_]);
-    quant_lstm_parm_.projection_effective_bias.reset(new int32_t[n_output_]);
-    std::fill_n(quant_lstm_parm_.input_to_forget_effective_bias.get(), n_cell_,
-                152);
-    std::fill_n(quant_lstm_parm_.recurrent_to_forget_effective_bias.get(),
+    integer_lstm_param_.input_to_output_effective_bias.reset(
+        new int32_t[n_cell_]);
+    integer_lstm_param_.recurrent_to_output_effective_bias.reset(
+        new int32_t[n_cell_]);
+    integer_lstm_param_.input_to_input_effective_bias.reset(
+        new int32_t[n_cell_]);
+    integer_lstm_param_.recurrent_to_input_effective_bias.reset(
+        new int32_t[n_cell_]);
+    integer_lstm_param_.projection_effective_bias.reset(new int32_t[n_output_]);
+    std::fill_n(integer_lstm_param_.input_to_forget_effective_bias.get(),
+                n_cell_, 152);
+    std::fill_n(integer_lstm_param_.recurrent_to_forget_effective_bias.get(),
                 n_cell_, 315);
-    std::fill_n(quant_lstm_parm_.input_to_cell_effective_bias.get(), n_cell_,
+    std::fill_n(integer_lstm_param_.input_to_cell_effective_bias.get(), n_cell_,
                 165);
-    std::fill_n(quant_lstm_parm_.recurrent_to_cell_effective_bias.get(),
+    std::fill_n(integer_lstm_param_.recurrent_to_cell_effective_bias.get(),
                 n_cell_, 1165);
-    std::fill_n(quant_lstm_parm_.input_to_output_effective_bias.get(), n_cell_,
-                159);
-    std::fill_n(quant_lstm_parm_.recurrent_to_output_effective_bias.get(),
+    std::fill_n(integer_lstm_param_.input_to_output_effective_bias.get(),
+                n_cell_, 159);
+    std::fill_n(integer_lstm_param_.recurrent_to_output_effective_bias.get(),
                 n_cell_, 915);
-    std::fill_n(quant_lstm_parm_.input_to_input_effective_bias.get(), n_cell_,
-                -15);
-    std::fill_n(quant_lstm_parm_.recurrent_to_input_effective_bias.get(),
+    std::fill_n(integer_lstm_param_.input_to_input_effective_bias.get(),
+                n_cell_, -15);
+    std::fill_n(integer_lstm_param_.recurrent_to_input_effective_bias.get(),
                 n_cell_, 315);
-    std::fill_n(quant_lstm_parm_.projection_effective_bias.get(), n_output_,
+    std::fill_n(integer_lstm_param_.projection_effective_bias.get(), n_output_,
                 115);
-    return &quant_lstm_parm_;
+    return &integer_lstm_param_;
   }
 
   // Create scratch buffers.
@@ -540,7 +544,7 @@ class QuantizedLstmParam {
   TfLiteTensor output_tensor_;
 
   // quantized_lstm_param
-  ops::builtin::lstm_eval::QuantizedLstmParameter quant_lstm_parm_;
+  ops::builtin::lstm_eval::IntegerLstmParameter integer_lstm_param_;
 
   // 5 scratch buffers.
   std::vector<int16_t> scratch0_;
diff --git a/tensorflow/lite/kernels/lstm_shared.h b/tensorflow/lite/kernels/lstm_shared.h
new file mode 100644
index 00000000000..9e29650a3d8
--- /dev/null
+++ b/tensorflow/lite/kernels/lstm_shared.h
@@ -0,0 +1,78 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_LSTM_SHARED_H_
+#define TENSORFLOW_LITE_KERNELS_LSTM_SHARED_H_
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace lstm {
+// For full inputs kernel (24-inputs).
+// Please note the 20-input full kernel is deprecated and only kept
+// here for backward compatibility.
+namespace full {
+
+// Input Tensors of size {n_batch, n_input}
+constexpr int kInputTensor = 0;
+
+// Input weight tensors of size: {n_cell, n_input}
+constexpr int kInputToInputWeightsTensor = 1;  // Optional
+constexpr int kInputToForgetWeightsTensor = 2;
+constexpr int kInputToCellWeightsTensor = 3;
+constexpr int kInputToOutputWeightsTensor = 4;
+
+// Recurrent weight tensors of size {n_cell, n_output}
+constexpr int kRecurrentToInputWeightsTensor = 5;  // Optional
+constexpr int kRecurrentToForgetWeightsTensor = 6;
+constexpr int kRecurrentToCellWeightsTensor = 7;
+constexpr int kRecurrentToOutputWeightsTensor = 8;
+
+// Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
+constexpr int kCellToInputWeightsTensor = 9;    // Optional
+constexpr int kCellToForgetWeightsTensor = 10;  // Optional
+constexpr int kCellToOutputWeightsTensor = 11;  // Optional
+
+// Gates bias tensors of size {n_cell}
+constexpr int kInputGateBiasTensor = 12;  // Optional
+constexpr int kForgetGateBiasTensor = 13;
+constexpr int kCellGateBiasTensor = 14;
+constexpr int kOutputGateBiasTensor = 15;
+
+// Projection weight tensor of size {n_output, n_cell}
+constexpr int kProjectionWeightsTensor = 16;  // Optional
+// Projection bias tensor of size {n_output}
+constexpr int kProjectionBiasTensor = 17;  // Optional
+
+// These state tensors are defined as variable tensors, and will be modified by
+// this op.
+constexpr int kInputActivationStateTensor = 18;
+constexpr int kInputCellStateTensor = 19;
+
+// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
+// matrix.
+constexpr int kInputLayerNormCoefficientsTensor = 20;   // Optional
+constexpr int kForgetLayerNormCoefficientsTensor = 21;  // Optional
+constexpr int kCellLayerNormCoefficientsTensor = 22;    // Optional
+constexpr int kOutputLayerNormCoefficientsTensor = 23;  // Optional
+
+// Output tensors.
+constexpr int kOutputTensor = 0;
+}  // namespace full
+
+}  // namespace lstm
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_KERNELS_LSTM_SHARED_H_
diff --git a/tensorflow/lite/kernels/mul.cc b/tensorflow/lite/kernels/mul.cc
index ab4cf0879c0..de50619ac6d 100644
--- a/tensorflow/lite/kernels/mul.cc
+++ b/tensorflow/lite/kernels/mul.cc
@@ -83,19 +83,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     output_size = TfLiteIntArrayCopy(input1->dims);
   }
 
-  if (output->type == kTfLiteUInt8) {
-    CalculateActivationRangeUint8(params->activation, output,
-                                  &data->output_activation_min,
-                                  &data->output_activation_max);
-  }
-  if (output->type == kTfLiteInt8) {
-    CalculateActivationRangeInt8(params->activation, output,
-                                 &data->output_activation_min,
-                                 &data->output_activation_max);
-  }
-
   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
       output->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
     double real_multiplier =
         input1->params.scale * input2->params.scale / output->params.scale;
     QuantizeMultiplier(real_multiplier, &data->output_multiplier,
diff --git a/tensorflow/lite/kernels/numeric_verify.cc b/tensorflow/lite/kernels/numeric_verify.cc
index 861cf976543..798e2b4b847 100644
--- a/tensorflow/lite/kernels/numeric_verify.cc
+++ b/tensorflow/lite/kernels/numeric_verify.cc
@@ -142,15 +142,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     }
     float dequant = GetTensorData<float>(dequantized)[i];
     float reference = GetTensorData<float>(op_context.ref)[i];
-
-    if (std::abs(reference - dequant) / (reference + 1e-8) >
-        op_data->tolerance) {
+    float diff = std::abs(reference - dequant);
+    float error = diff / (reference + 1e-8);
+    // It is fine if the error is introduced by rounding so the diff will be
+    // smaller than `scale`.
+    if (diff > op_context.input->params.scale && error > op_data->tolerance) {
       context->ReportError(context,
                            "Mismatch: %f is quantized to %d with (%f, %d). "
-                           "abs((%f - %f) / %f) > %f (tolerance).\n",
+                           "abs((%f - %f) / %f) = %f > %f (tolerance).\n",
                            reference, value, op_context.input->params.scale,
                            op_context.input->params.zero_point, reference,
-                           dequant, reference, op_data->tolerance);
+                           dequant, reference, error, op_data->tolerance);
       return kTfLiteError;
     }
   }
diff --git a/tensorflow/lite/kernels/pooling.cc b/tensorflow/lite/kernels/pooling.cc
index 69342a16262..e871b72f4a1 100644
--- a/tensorflow/lite/kernels/pooling.cc
+++ b/tensorflow/lite/kernels/pooling.cc
@@ -144,8 +144,8 @@ void AverageEvalQuantizedUint8(TfLiteContext* context, TfLiteNode* node,
                                TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
-  CalculateActivationRangeUint8(params->activation, output, &activation_min,
-                                &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 #define TF_LITE_AVERAGE_POOL(type)                                         \
   tflite::PoolParams op_params;                                            \
   op_params.stride_height = params->stride_height;                         \
@@ -173,8 +173,9 @@ void AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
                               const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
-  CalculateActivationRangeInt8(params->activation, output, &activation_min,
-                               &activation_max);
+
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 #define TF_LITE_AVERAGE_POOL(type)                                        \
   tflite::PoolParams op_params;                                           \
   op_params.stride_height = params->stride_height;                        \
@@ -229,8 +230,8 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
                            const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
-  CalculateActivationRangeUint8(params->activation, output, &activation_min,
-                                &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 #define TF_LITE_MAX_POOL(type)                                         \
   tflite::PoolParams op_params;                                        \
   op_params.stride_height = params->stride_height;                     \
@@ -258,8 +259,8 @@ void MaxEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
                           const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
-  CalculateActivationRangeInt8(params->activation, output, &activation_min,
-                               &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 #define TF_LITE_MAX_POOL(type)                                        \
   tflite::PoolParams op_params;                                       \
   op_params.stride_height = params->stride_height;                    \
diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc
index 3c747488eff..620f6ee0654 100644
--- a/tensorflow/lite/kernels/register.cc
+++ b/tensorflow/lite/kernels/register.cc
@@ -247,7 +247,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
   AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(),
              /* min_version */ 1,
-             /* max_version */ 2);
+             /* max_version */ 3);
   AddBuiltin(BuiltinOperator_FLOOR_DIV, Register_FLOOR_DIV(),
              /* min_version */ 1,
              /* max_version */ 2);
@@ -278,6 +278,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_NON_MAX_SUPPRESSION_V5,
              Register_NON_MAX_SUPPRESSION_V5());
   AddBuiltin(BuiltinOperator_SCATTER_ND, Register_SCATTER_ND());
+  AddBuiltin(BuiltinOperator_DENSIFY, Register_DENSIFY());
   AddCustom("NumericVerify", tflite::ops::custom::Register_NUMERIC_VERIFY());
   // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
   // custom ops aren't always included by default.
diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc
index f3a81033cd6..f2913faeb76 100644
--- a/tensorflow/lite/kernels/sub.cc
+++ b/tensorflow/lite/kernels/sub.cc
@@ -131,15 +131,10 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context,
   tflite::QuantizeMultiplierSmallerThanOneExp(real_output_multiplier,
                                               &op_params->output_multiplier,
                                               &op_params->output_shift);
-  if (output->type == kTfLiteUInt8) {
-    CalculateActivationRangeUint8(params->activation, output,
-                                  &op_params->output_activation_min,
-                                  &op_params->output_activation_max);
-  } else {
-    CalculateActivationRangeInt8(params->activation, output,
-                                 &op_params->output_activation_min,
-                                 &op_params->output_activation_max);
-  }
+
+  TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+      context, params->activation, output, &op_params->output_activation_min,
+      &op_params->output_activation_max));
   return kTfLiteOk;
 }
 
@@ -183,9 +178,9 @@ TfLiteStatus PrepareInt16SubOp(TfLiteContext* context,
   TF_LITE_ENSURE(context, data->input1_shift <= 0);
   TF_LITE_ENSURE(context, data->input2_shift <= 0);
 
-  CalculateActivationRangeQuantized(context, params->activation, output,
-                                    &data->output_activation_min,
-                                    &data->output_activation_max);
+  TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+      context, params->activation, output, &data->output_activation_min,
+      &data->output_activation_max));
   return kTfLiteOk;
 }
 
diff --git a/tensorflow/lite/kernels/unpack.cc b/tensorflow/lite/kernels/unpack.cc
index 7de891ca67a..8e66432e9cd 100644
--- a/tensorflow/lite/kernels/unpack.cc
+++ b/tensorflow/lite/kernels/unpack.cc
@@ -43,7 +43,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
   TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input));
   if (input->type != kTfLiteInt32 && input->type != kTfLiteFloat32 &&
-      input->type != kTfLiteUInt8 && input->type != kTfLiteInt8) {
+      input->type != kTfLiteUInt8 && input->type != kTfLiteInt8 &&
+      input->type != kTfLiteBool) {
     context->ReportError(context, "Type '%s' is not supported by unpack.",
                          TfLiteTypeGetName(input->type));
     return kTfLiteError;
@@ -112,6 +113,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       UnpackImpl<int8_t>(context, node, input, data->num, data->axis);
       break;
     }
+    case kTfLiteBool: {
+      UnpackImpl<bool>(context, node, input, data->num, data->axis);
+      break;
+    }
     default: {
       context->ReportError(context, "Type '%s' is not supported by unpack.",
                            TfLiteTypeGetName(input->type));
diff --git a/tensorflow/lite/kernels/unpack_test.cc b/tensorflow/lite/kernels/unpack_test.cc
index 28d21cc4508..88eb706e969 100644
--- a/tensorflow/lite/kernels/unpack_test.cc
+++ b/tensorflow/lite/kernels/unpack_test.cc
@@ -87,43 +87,43 @@ void Check(int axis, const std::initializer_list<int>& input_shape,
 TEST(UnpackOpTest, FloatThreeOutputs) {
   Check<float>(/*axis=*/0, /*input_shape=*/{3, 2},
                /*input_data=*/{1, 2, 3, 4, 5, 6},
-               /*expected_output_shape=*/{{2}, {2}, {2}},
-               /*expected_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+               /*exp_output_shape=*/{{2}, {2}, {2}},
+               /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
 }
 
 TEST(UnpackOpTest, FloatThreeOutputsAxisOne) {
   Check<float>(/*axis=*/1, /*input_shape=*/{3, 2},
                /*input_data=*/{1, 2, 3, 4, 5, 6},
-               /*expected_output_shape=*/{{3}, {3}},
-               /*expected_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+               /*exp_output_shape=*/{{3}, {3}},
+               /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
 }
 
 TEST(UnpackOpTest, FloatThreeOutputsNegativeAxisOne) {
   Check<float>(/*axis=*/-1, /*input_shape=*/{3, 2},
                /*input_data=*/{1, 2, 3, 4, 5, 6},
-               /*expected_output_shape=*/{{3}, {3}},
-               /*expected_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+               /*exp_output_shape=*/{{3}, {3}},
+               /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
 }
 
 TEST(UnpackOpTest, FloatThreeOutputsNegativeAxisTwo) {
   Check<float>(/*axis=*/-2, /*input_shape=*/{3, 2},
                /*input_data=*/{1, 2, 3, 4, 5, 6},
-               /*expected_output_shape=*/{{2}, {2}, {2}},
-               /*expected_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+               /*exp_output_shape=*/{{2}, {2}, {2}},
+               /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
 }
 
 TEST(UnpackOpTest, FloatOneOutput) {
   Check<float>(/*axis=*/0, /*input_shape=*/{1, 6},
                /*input_data=*/{1, 2, 3, 4, 5, 6},
-               /*expected_output_shape=*/{{6}},
-               /*expected_output_data=*/{{1, 2, 3, 4, 5, 6}});
+               /*exp_output_shape=*/{{6}},
+               /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}});
 }
 
 TEST(UnpackOpTest, FloatThreeDimensionsOutputs) {
   Check<float>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
                /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
-               /*expected_output_shape=*/{{2, 2}, {2, 2}},
-               /*expected_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}});
+               /*exp_output_shape=*/{{2, 2}, {2, 2}},
+               /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}});
 }
 
 TEST(UnpackOpTest, FloatVectorToScalar) {
@@ -137,32 +137,32 @@ TEST(UnpackOpTest, FloatVectorToScalar) {
 TEST(UnpackOpTest, IntThreeOutputs) {
   Check<int32_t>(/*axis=*/0, /*input_shape=*/{3, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{2}, {2}, {2}},
-                 /*expected_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
+                 /*exp_output_shape=*/{{2}, {2}, {2}},
+                 /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
                  /*type=*/TensorType_INT32);
 }
 
 TEST(UnpackOpTest, IntThreeOutputsAxisOne) {
   Check<int32_t>(/*axis=*/1, /*input_shape=*/{3, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{3}, {3}},
-                 /*expected_output_data=*/{{1, 3, 5}, {2, 4, 6}},
+                 /*exp_output_shape=*/{{3}, {3}},
+                 /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}},
                  /*type=*/TensorType_INT32);
 }
 
 TEST(UnpackOpTest, IntOneOutput) {
   Check<int32_t>(/*axis=*/0, /*input_shape=*/{1, 6},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{6}},
-                 /*expected_output_data=*/{{1, 2, 3, 4, 5, 6}},
+                 /*exp_output_shape=*/{{6}},
+                 /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}},
                  /*type=*/TensorType_INT32);
 }
 
 TEST(UnpackOpTest, IntThreeDimensionsOutputs) {
   Check<int32_t>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
-                 /*expected_output_shape=*/{{2, 2}, {2, 2}},
-                 /*expected_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}},
+                 /*exp_output_shape=*/{{2, 2}, {2, 2}},
+                 /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}},
                  /*type=*/TensorType_INT32);
 }
 
@@ -178,48 +178,48 @@ TEST(UnpackOpTest, IntVectorToScalar) {
 TEST(UnpackOpTest, Uint8ThreeOutputs) {
   Check<uint8_t>(/*axis=*/0, /*input_shape=*/{3, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{2}, {2}, {2}},
-                 /*expected_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
+                 /*exp_output_shape=*/{{2}, {2}, {2}},
+                 /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
                  /*type=*/TensorType_UINT8);
 }
 
 TEST(UnpackOpTest, Uint8ThreeOutputsAxisOne) {
   Check<uint8_t>(/*axis=*/1, /*input_shape=*/{3, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{3}, {3}},
-                 /*expected_output_data=*/{{1, 3, 5}, {2, 4, 6}},
+                 /*exp_output_shape=*/{{3}, {3}},
+                 /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}},
                  /*type=*/TensorType_UINT8);
 }
 
 TEST(UnpackOpTest, Uint8ThreeOutputsNegativeAxisOne) {
   Check<uint8_t>(/*axis=*/-1, /*input_shape=*/{3, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{3}, {3}},
-                 /*expected_output_data=*/{{1, 3, 5}, {2, 4, 6}},
+                 /*exp_output_shape=*/{{3}, {3}},
+                 /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}},
                  /*type=*/TensorType_UINT8);
 }
 
 TEST(UnpackOpTest, Uint8ThreeOutputsNegativeAxisTwo) {
   Check<uint8_t>(/*axis=*/-2, /*input_shape=*/{3, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{2}, {2}, {2}},
-                 /*expected_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
+                 /*exp_output_shape=*/{{2}, {2}, {2}},
+                 /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
                  /*type=*/TensorType_UINT8);
 }
 
 TEST(UnpackOpTest, Uint8OneOutput) {
   Check<uint8_t>(/*axis=*/0, /*input_shape=*/{1, 6},
                  /*input_data=*/{1, 2, 3, 4, 5, 6},
-                 /*expected_output_shape=*/{{6}},
-                 /*expected_output_data=*/{{1, 2, 3, 4, 5, 6}},
+                 /*exp_output_shape=*/{{6}},
+                 /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}},
                  /*type=*/TensorType_UINT8);
 }
 
 TEST(UnpackOpTest, Uint8ThreeDimensionsOutputs) {
   Check<uint8_t>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
                  /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
-                 /*expected_output_shape=*/{{2, 2}, {2, 2}},
-                 /*expected_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}},
+                 /*exp_output_shape=*/{{2, 2}, {2, 2}},
+                 /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}},
                  /*type=*/TensorType_UINT8);
 }
 
@@ -235,48 +235,48 @@ TEST(UnpackOpTest, Uint8VectorToScalar) {
 TEST(UnpackOpTest, Int8ThreeOutputs) {
   Check<int8_t>(/*axis=*/0, /*input_shape=*/{3, 2},
                 /*input_data=*/{1, 2, 3, 4, 5, 6},
-                /*expected_output_shape=*/{{2}, {2}, {2}},
-                /*expected_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
+                /*exp_output_shape=*/{{2}, {2}, {2}},
+                /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
                 /*type=*/TensorType_INT8);
 }
 
 TEST(UnpackOpTest, Int8ThreeOutputsAxisOne) {
   Check<int8_t>(/*axis=*/1, /*input_shape=*/{3, 2},
                 /*input_data=*/{1, 2, 3, 4, 5, 6},
-                /*expected_output_shape=*/{{3}, {3}},
-                /*expected_output_data=*/{{1, 3, 5}, {2, 4, 6}},
+                /*exp_output_shape=*/{{3}, {3}},
+                /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}},
                 /*type=*/TensorType_INT8);
 }
 
 TEST(UnpackOpTest, Int8ThreeOutputsNegativeAxisOne) {
   Check<int8_t>(/*axis=*/-1, /*input_shape=*/{3, 2},
                 /*input_data=*/{1, 2, 3, 4, 5, 6},
-                /*expected_output_shape=*/{{3}, {3}},
-                /*expected_output_data=*/{{1, 3, 5}, {2, 4, 6}},
+                /*exp_output_shape=*/{{3}, {3}},
+                /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}},
                 /*type=*/TensorType_INT8);
 }
 
 TEST(UnpackOpTest, Int8ThreeOutputsNegativeAxisTwo) {
   Check<int8_t>(/*axis=*/-2, /*input_shape=*/{3, 2},
                 /*input_data=*/{1, 2, 3, 4, 5, 6},
-                /*expected_output_shape=*/{{2}, {2}, {2}},
-                /*expected_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
+                /*exp_output_shape=*/{{2}, {2}, {2}},
+                /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}},
                 /*type=*/TensorType_INT8);
 }
 
 TEST(UnpackOpTest, Int8OneOutput) {
   Check<int8_t>(/*axis=*/0, /*input_shape=*/{1, 6},
                 /*input_data=*/{1, 2, 3, 4, 5, 6},
-                /*expected_output_shape=*/{{6}},
-                /*expected_output_data=*/{{1, 2, 3, 4, 5, 6}},
+                /*exp_output_shape=*/{{6}},
+                /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}},
                 /*type=*/TensorType_INT8);
 }
 
 TEST(UnpackOpTest, Int8ThreeDimensionsOutputs) {
   Check<int8_t>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
                 /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
-                /*expected_output_shape=*/{{2, 2}, {2, 2}},
-                /*expected_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}},
+                /*exp_output_shape=*/{{2, 2}, {2, 2}},
+                /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}},
                 /*type=*/TensorType_INT8);
 }
 
@@ -288,5 +288,69 @@ TEST(UnpackOpTest, Int8VectorToScalar) {
                 /*type=*/TensorType_INT8);
 }
 
+// bool tests.
+TEST(UnpackOpTest, BoolThreeOutputs) {
+  Check<bool>(
+      /*axis=*/0, /*input_shape=*/{3, 2},
+      /*input_data=*/{true, false, true, false, true, false},
+      /*exp_output_shape=*/{{2}, {2}, {2}},
+      /*exp_output_data=*/{{true, false}, {true, false}, {true, false}},
+      /*type=*/TensorType_BOOL);
+}
+
+TEST(UnpackOpTest, BoolThreeOutputsAxisOne) {
+  Check<bool>(
+      /*axis=*/1, /*input_shape=*/{3, 2},
+      /*input_data=*/{true, false, true, false, true, false},
+      /*exp_output_shape=*/{{3}, {3}},
+      /*exp_output_data=*/{{true, true, true}, {false, false, false}},
+      /*type=*/TensorType_BOOL);
+}
+
+TEST(UnpackOpTest, BoolThreeOutputsNegativeAxisOne) {
+  Check<bool>(
+      /*axis=*/-1, /*input_shape=*/{3, 2},
+      /*input_data=*/{true, false, true, false, true, false},
+      /*exp_output_shape=*/{{3}, {3}},
+      /*exp_output_data=*/{{true, true, true}, {false, false, false}},
+      /*type=*/TensorType_BOOL);
+}
+
+TEST(UnpackOpTest, BoolThreeOutputsNegativeAxisTwo) {
+  Check<bool>(
+      /*axis=*/-2, /*input_shape=*/{3, 2},
+      /*input_data=*/{true, false, true, false, true, false},
+      /*exp_output_shape=*/{{2}, {2}, {2}},
+      /*exp_output_data=*/{{true, false}, {true, false}, {true, false}},
+      /*type=*/TensorType_BOOL);
+}
+
+TEST(UnpackOpTest, BoolOneOutput) {
+  Check<bool>(
+      /*axis=*/0, /*input_shape=*/{1, 6},
+      /*input_data=*/{true, false, true, false, true, false},
+      /*exp_output_shape=*/{{6}},
+      /*exp_output_data=*/{{true, false, true, false, true, false}},
+      /*type=*/TensorType_BOOL);
+}
+
+TEST(UnpackOpTest, BoolThreeDimensionsOutputs) {
+  Check<bool>(
+      /*axis=*/2, /*input_shape=*/{2, 2, 2},
+      /*input_data=*/{true, false, true, false, true, false, true, false},
+      /*exp_output_shape=*/{{2, 2}, {2, 2}},
+      /*exp_output_data=*/
+      {{true, true, true, true}, {false, false, false, false}},
+      /*type=*/TensorType_BOOL);
+}
+
+TEST(UnpackOpTest, BoolVectorToScalar) {
+  Check<bool>(/*axis=*/0, /*input_shape=*/{5},
+              /*input_data=*/{true, false, true, false, true},
+              /*exp_output_shape=*/{{}, {}, {}, {}, {}},
+              /*exp_output_data=*/{{true}, {false}, {true}, {false}, {true}},
+              /*type=*/TensorType_BOOL);
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/examples/magic_wand/README.md b/tensorflow/lite/micro/examples/magic_wand/README.md
index b16de499233..7241ce7aaf8 100644
--- a/tensorflow/lite/micro/examples/magic_wand/README.md
+++ b/tensorflow/lite/micro/examples/magic_wand/README.md
@@ -27,14 +27,9 @@ The sample has been tested with the following devices:
 
 ### Install the Arduino_TensorFlowLite library
 
-Download the current nightly build of the library:
-[magic_wand.zip](https://storage.googleapis.com/tensorflow-nightly/github/tensorflow/tensorflow/lite/micro/tools/make/gen/arduino_x86_64/prj/magic_wand/magic_wand.zip)
-
-Next, import this zip file into the Arduino Desktop IDE by going to `Sketch
-->Include Library -> Add .ZIP Library...`. This example application is included
-as part of the official TensorFlow Lite Arduino library. To install it, open the
-Arduino library manager in `Tools -> Manage Libraries...` and search for
-`Arduino_TensorFlowLite`.
+This example application is included as part of the official TensorFlow Lite
+Arduino library. To install it, open the Arduino library manager in
+`Tools -> Manage Libraries...` and search for `Arduino_TensorFlowLite`.
 
 ### Install and patch the accelerometer driver
 
@@ -354,4 +349,4 @@ with the model and sample inputs.
 ## Train your own model
 
 To train your own model, or create a new model for a new set of gestures,
-follow the instructions in [magic_wand/train/README.md](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md).
+follow the instructions in [magic_wand/train/README.md](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/magic_wand/train/README.md).
diff --git a/tensorflow/lite/micro/examples/magic_wand/angle_micro_features_data.cc b/tensorflow/lite/micro/examples/magic_wand/angle_micro_features_data.cc
deleted file mode 100644
index 102dc54833b..00000000000
--- a/tensorflow/lite/micro/examples/magic_wand/angle_micro_features_data.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/micro/examples/magic_wand/angle_micro_features_data.h"
-
-const int g_angle_micro_f2e59fea_nohash_1_length = 128;
-const int g_angle_micro_f2e59fea_nohash_1_dim = 3;
-// Raw accelerometer data with a sample rate of 25Hz
-const float g_angle_micro_f2e59fea_nohash_1_data[] = {
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    -766.0, 132.0,  709.0,  -751.0, 249.0,  659.0,
-    -714.0, 314.0,  630.0,  -709.0, 244.0,  623.0,  -707.0, 230.0,  659.0,
-    -704.0, 202.0,  748.0,  -714.0, 219.0,  728.0,  -722.0, 239.0,  710.0,
-    -744.0, 116.0,  612.0,  -753.0, -49.0,  570.0,  -748.0, -279.0, 527.0,
-    -668.0, -664.0, 592.0,  -601.0, -635.0, 609.0,  -509.0, -559.0, 606.0,
-    -286.0, -162.0, 536.0,  -255.0, -144.0, 495.0,  -209.0, -85.0,  495.0,
-    6.0,    416.0,  698.0,  -33.0,  304.0,  1117.0, -82.0,  405.0,  1480.0,
-    -198.0, 1008.0, 1908.0, -229.0, 990.0,  1743.0, -234.0, 934.0,  1453.0,
-    -126.0, 838.0,  896.0,  -78.0,  792.0,  911.0,  -27.0,  741.0,  918.0,
-    114.0,  734.0,  960.0,  135.0,  613.0,  959.0,  152.0,  426.0,  1015.0,
-    106.0,  -116.0, 1110.0, 63.0,   -314.0, 1129.0, -12.0,  -486.0, 1179.0,
-    -118.0, -656.0, 1510.0, -116.0, -558.0, 1553.0, -126.0, -361.0, 1367.0,
-    -222.0, -76.0,  922.0,  -210.0, -26.0,  971.0,  -194.0, 50.0,   1053.0,
-    -178.0, 72.0,   1082.0, -169.0, 100.0,  1073.0, -162.0, 133.0,  1050.0,
-    -156.0, 226.0,  976.0,  -154.0, 323.0,  886.0,  -130.0, 240.0,  1154.0,
-    -116.0, 124.0,  916.0,  -132.0, 124.0,  937.0,  -153.0, 115.0,  981.0,
-    -184.0, 94.0,   962.0,  -177.0, 85.0,   1017.0, -173.0, 92.0,   1027.0,
-    -168.0, 158.0,  1110.0, -181.0, 101.0,  1030.0, -180.0, 139.0,  1054.0,
-    -152.0, 10.0,   1044.0, -169.0, 74.0,   1007.0,
-};
diff --git a/tensorflow/lite/micro/examples/magic_wand/circle_micro_features_data.cc b/tensorflow/lite/micro/examples/magic_wand/circle_micro_features_data.cc
deleted file mode 100644
index b4ed523e843..00000000000
--- a/tensorflow/lite/micro/examples/magic_wand/circle_micro_features_data.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/micro/examples/magic_wand/circle_micro_features_data.h"
-
-const int g_circle_micro_f9643d42_nohash_4_length = 128;
-const int g_circle_micro_f9643d42_nohash_4_dim = 3;
-// Raw accelerometer data with a sample rate of 25Hz
-const float g_circle_micro_f9643d42_nohash_4_data[] = {
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
-    0.0,    0.0,    0.0,    -665.0, 228.0,  827.0,  -680.0, 339.0,  716.0,
-    -680.0, 564.0,  812.0,  -679.0, 552.0,  818.0,  -665.0, 528.0,  751.0,
-    -658.0, 432.0,  618.0,  -655.0, 445.0,  592.0,  -667.0, 484.0,  556.0,
-    -684.0, 590.0,  510.0,  -674.0, 672.0,  475.0,  -660.0, 786.0,  390.0,
-    -562.0, 1124.0, 128.0,  -526.0, 1140.0, 111.0,  -486.0, 1044.0, 33.0,
-    -416.0, 652.0,  -134.0, -390.0, 534.0,  -143.0, -365.0, 381.0,  -117.0,
-    -314.0, 60.0,   94.0,   -322.0, 7.0,    190.0,  -338.0, -95.0,  342.0,
-    -360.0, -106.0, 842.0,  -351.0, -41.0,  965.0,  -352.0, 12.0,   960.0,
-    -366.0, 42.0,   1124.0, -322.0, 56.0,   1178.0, -312.0, 15.0,   1338.0,
-    -254.0, 10.0,   1532.0, -241.0, 5.0,    1590.0, -227.0, 60.0,   1565.0,
-    -204.0, 282.0,  1560.0, -180.0, 262.0,  1524.0, -138.0, 385.0,  1522.0,
-    -84.0,  596.0,  1626.0, -55.0,  639.0,  1604.0, -19.0,  771.0,  1511.0,
-    16.0,   932.0,  1132.0, 15.0,   924.0,  1013.0, 1.0,    849.0,  812.0,
-    -88.0,  628.0,  500.0,  -114.0, 609.0,  463.0,  -155.0, 559.0,  382.0,
-    -234.0, 420.0,  278.0,  -254.0, 390.0,  272.0,  -327.0, 200.0,  336.0,
-    -558.0, -556.0, 630.0,  -640.0, -607.0, 740.0,  -706.0, -430.0, 868.0,
-    -778.0, 42.0,   1042.0, -763.0, 84.0,   973.0,  -735.0, 185.0,  931.0,
-    -682.0, 252.0,  766.0,  -673.0, 230.0,  757.0,  -671.0, 218.0,  757.0,
-    -656.0, 222.0,  714.0,  -659.0, 238.0,  746.0,  -640.0, 276.0,  731.0,
-    -634.0, 214.0,  754.0,  -637.0, 207.0,  735.0,  -637.0, 194.0,  742.0,
-    -634.0, 248.0,  716.0,  -631.0, 265.0,  697.0,  -628.0, 252.0,  797.0,
-    -592.0, 204.0,  816.0,  -618.0, 218.0,  812.0,  -633.0, 231.0,  828.0,
-    -640.0, 222.0,  736.0,  -634.0, 221.0,  787.0,
-};
diff --git a/tensorflow/lite/micro/examples/magic_wand/magic_wand_model_data.cc b/tensorflow/lite/micro/examples/magic_wand/magic_wand_model_data.cc
index ea609e1d71b..1b8dca8eb0a 100644
--- a/tensorflow/lite/micro/examples/magic_wand/magic_wand_model_data.cc
+++ b/tensorflow/lite/micro/examples/magic_wand/magic_wand_model_data.cc
@@ -32,1638 +32,1639 @@ limitations under the License.
 #endif
 
 const unsigned char g_magic_wand_model_data[] DATA_ALIGN_ATTRIBUTE = {
-  0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
-  0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
-  0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x4c, 0x00, 0x00,
-  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x20, 0x44, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
-  0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
-  0x65, 0x64, 0x2e, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf4, 0x43, 0x00, 0x00,
-  0xa4, 0x43, 0x00, 0x00, 0x84, 0x43, 0x00, 0x00, 0x34, 0x43, 0x00, 0x00,
-  0x2c, 0x43, 0x00, 0x00, 0x1c, 0x42, 0x00, 0x00, 0x14, 0x42, 0x00, 0x00,
-  0x04, 0x0a, 0x00, 0x00, 0xd4, 0x09, 0x00, 0x00, 0xc4, 0x01, 0x00, 0x00,
-  0xbc, 0x01, 0x00, 0x00, 0xb4, 0x01, 0x00, 0x00, 0xac, 0x01, 0x00, 0x00,
-  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x24, 0xb6, 0xff, 0xff, 0x28, 0xb6, 0xff, 0xff,
-  0x2c, 0xb6, 0xff, 0xff, 0x76, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-  0x80, 0x01, 0x00, 0x00, 0x13, 0x00, 0x8a, 0xbe, 0x50, 0x7c, 0x49, 0x3e,
-  0xb4, 0x06, 0x5a, 0xbc, 0xa6, 0xf2, 0xa8, 0xbd, 0x73, 0x50, 0x62, 0x3c,
-  0xdd, 0x0a, 0x4a, 0xbe, 0xe2, 0x77, 0x9c, 0x3e, 0x8b, 0xe0, 0xc7, 0xbd,
-  0xb2, 0xfd, 0xf5, 0x3d, 0x13, 0xb1, 0xb2, 0xbe, 0xcd, 0x78, 0x5a, 0xbd,
-  0xfb, 0x15, 0x4e, 0xbc, 0x58, 0x7b, 0x3b, 0x3e, 0x14, 0x4b, 0x22, 0xbc,
-  0x7e, 0x44, 0xd2, 0x3d, 0xdc, 0xda, 0x72, 0x3c, 0x1a, 0x87, 0xd9, 0x3d,
-  0x3e, 0xdc, 0x13, 0x3d, 0x01, 0x1e, 0x75, 0xbe, 0x3d, 0x4f, 0x6a, 0xbd,
-  0xa6, 0x52, 0x54, 0xbe, 0xc7, 0x7f, 0x5f, 0xbe, 0x97, 0x5f, 0x35, 0xbc,
-  0xc5, 0x84, 0x5b, 0xbe, 0x7c, 0xd5, 0x6f, 0xbd, 0x90, 0x9b, 0x30, 0xbd,
-  0x52, 0x86, 0xec, 0xbc, 0xc0, 0x4e, 0x0b, 0xbf, 0xfc, 0x3d, 0xec, 0xbd,
-  0x92, 0x71, 0x26, 0x3e, 0x34, 0x26, 0x33, 0x3d, 0x06, 0x68, 0xfc, 0xbd,
-  0x54, 0x5f, 0x2f, 0xbd, 0xa2, 0xce, 0xdd, 0x3d, 0x83, 0x6a, 0x76, 0xbc,
-  0x64, 0xba, 0x95, 0xbd, 0x44, 0x69, 0x09, 0x3e, 0xea, 0x7b, 0x08, 0x3e,
-  0xec, 0x13, 0x9f, 0xbd, 0x80, 0x2a, 0x04, 0xbe, 0x64, 0xf5, 0x84, 0x3e,
-  0x31, 0xf8, 0xb4, 0xbd, 0xfa, 0x18, 0xb3, 0xbd, 0x4b, 0x3d, 0xf9, 0xbc,
-  0xee, 0x0e, 0x8f, 0xbd, 0x3b, 0x21, 0x39, 0xbc, 0x35, 0xa0, 0xbb, 0xbc,
-  0xd5, 0x5f, 0xbe, 0xbd, 0x9e, 0xc4, 0x0b, 0x3d, 0x4a, 0x8d, 0x82, 0xbe,
-  0x01, 0xfb, 0x19, 0xbd, 0xb0, 0x51, 0xae, 0x3c, 0xb5, 0xd8, 0x68, 0xbe,
-  0x97, 0x45, 0x73, 0x3d, 0xc7, 0x33, 0x2a, 0x3e, 0x9f, 0x82, 0x09, 0x3e,
-  0x32, 0x36, 0xba, 0xbd, 0x93, 0x0d, 0x7e, 0xbb, 0xc2, 0x5f, 0xa6, 0xbd,
-  0x13, 0x20, 0x55, 0xbe, 0xbf, 0x03, 0x08, 0xbe, 0xeb, 0xe0, 0xa9, 0xbd,
-  0xf6, 0x4a, 0xcc, 0xbd, 0x8f, 0xf6, 0x28, 0xbd, 0x29, 0xe0, 0x81, 0x3d,
-  0x92, 0x9d, 0x65, 0xbd, 0xe3, 0xb6, 0x17, 0x3e, 0x53, 0x07, 0xa6, 0xbc,
-  0xba, 0x44, 0x3c, 0xbb, 0x05, 0x63, 0x36, 0xbc, 0xe1, 0x45, 0x23, 0xbd,
-  0x0e, 0x10, 0x08, 0x3d, 0xee, 0xe5, 0x77, 0x3e, 0xf2, 0xe4, 0x76, 0xbe,
-  0x61, 0x45, 0xbc, 0x3d, 0xda, 0xeb, 0xe4, 0x3e, 0xd4, 0xe1, 0xbc, 0xbd,
-  0x0e, 0x17, 0x9a, 0xbe, 0x2a, 0x52, 0xbf, 0xbe, 0x71, 0x90, 0x91, 0x3e,
-  0xfb, 0xfa, 0x6b, 0xbd, 0xdb, 0x52, 0x68, 0x3e, 0x7f, 0xfb, 0x49, 0x3d,
-  0xd7, 0x8a, 0x5a, 0x3d, 0x20, 0x58, 0x09, 0xbe, 0xc4, 0x74, 0xd7, 0x3d,
-  0x3b, 0x3e, 0xe8, 0xbc, 0x45, 0x92, 0xe0, 0xbc, 0x6d, 0x8e, 0xb8, 0xbe,
-  0x24, 0x52, 0x32, 0xbd, 0x6d, 0x5a, 0x85, 0x3e, 0xb4, 0xc1, 0xaf, 0xbc,
-  0x0e, 0xdf, 0x1a, 0xbe, 0xc8, 0xd1, 0x8e, 0xbe, 0x95, 0xba, 0xb2, 0xbd,
-  0xe6, 0x9d, 0x7e, 0x3d, 0xbc, 0xb7, 0xff, 0xff, 0xc0, 0xb7, 0xff, 0xff,
-  0xc4, 0xb7, 0xff, 0xff, 0x0e, 0xb8, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-  0x00, 0x08, 0x00, 0x00, 0x65, 0x15, 0x07, 0x3e, 0x19, 0xc0, 0x05, 0xbe,
-  0xcb, 0xe0, 0xb8, 0x3d, 0xbc, 0x81, 0x7d, 0xbd, 0xdb, 0xac, 0xcb, 0x3d,
-  0x28, 0x09, 0xa9, 0x3e, 0x16, 0x58, 0x9d, 0xbe, 0x1e, 0xc4, 0xd2, 0xbd,
-  0x87, 0x2e, 0xdb, 0x3d, 0xd2, 0xdc, 0x80, 0xbd, 0xdc, 0x90, 0x41, 0x3d,
-  0xeb, 0x0b, 0x5a, 0xbe, 0x4a, 0x91, 0xa2, 0xbc, 0x93, 0xff, 0x81, 0xbb,
-  0x5f, 0xb4, 0x8e, 0xbd, 0x88, 0xba, 0x5c, 0xbd, 0x7b, 0x71, 0xef, 0x3c,
-  0x46, 0xe5, 0x4d, 0xbf, 0x3a, 0x1f, 0x96, 0xbd, 0x4d, 0x39, 0xa1, 0xbf,
-  0xe4, 0x63, 0x25, 0xbe, 0xc1, 0x1b, 0xa9, 0xbd, 0xba, 0x02, 0x88, 0xbd,
-  0xd8, 0xcf, 0x75, 0xbe, 0x53, 0x42, 0xfa, 0xbd, 0xdd, 0xc5, 0xa5, 0xbe,
-  0x0a, 0x04, 0x21, 0xbe, 0xab, 0x3c, 0x88, 0xbf, 0x2e, 0x1f, 0x50, 0xbe,
-  0xc8, 0xb7, 0xe2, 0xbd, 0x71, 0xed, 0xd5, 0x3e, 0x0c, 0xf3, 0x00, 0xbd,
-  0xae, 0x1e, 0x3e, 0x3d, 0x29, 0xf0, 0x91, 0xbd, 0x72, 0xf6, 0x19, 0xbe,
-  0x29, 0xb6, 0x28, 0xbd, 0x24, 0xa2, 0x03, 0xbe, 0xe9, 0xcc, 0x83, 0xbd,
-  0x4a, 0x72, 0x17, 0x3d, 0xf7, 0xe0, 0xbe, 0xbc, 0xd8, 0x7d, 0x59, 0xbd,
-  0xa1, 0xc0, 0x05, 0x3c, 0xf0, 0xcd, 0x51, 0xbe, 0xfd, 0xb6, 0x15, 0xbd,
-  0xa1, 0x24, 0x0a, 0x3d, 0x9e, 0x14, 0x22, 0xbd, 0xb7, 0x88, 0x1a, 0x3f,
-  0x61, 0x5e, 0x35, 0x3e, 0x90, 0x8c, 0x7c, 0xbc, 0x0d, 0x7a, 0x71, 0xbf,
-  0x35, 0x85, 0xb8, 0xbe, 0x38, 0x20, 0x11, 0xbf, 0x30, 0x01, 0x62, 0xbf,
-  0xce, 0x28, 0x64, 0xbf, 0xab, 0x4d, 0x87, 0xbd, 0x97, 0xbd, 0xeb, 0xbd,
-  0xbd, 0x54, 0x3f, 0x3e, 0x91, 0x0b, 0x9f, 0x3e, 0x6b, 0x12, 0x5b, 0xbe,
-  0x31, 0xa1, 0xf4, 0xbe, 0x37, 0xc2, 0x85, 0xbe, 0x8a, 0x6a, 0x61, 0xbe,
-  0x7c, 0xa0, 0x46, 0xbc, 0x6b, 0x1e, 0x16, 0xbe, 0x8d, 0x2c, 0xae, 0xbd,
-  0xbb, 0x9b, 0x20, 0xbd, 0x96, 0x53, 0x8c, 0xbd, 0xb6, 0x3a, 0x93, 0xbd,
-  0xf8, 0x58, 0xb1, 0xbd, 0x46, 0xf3, 0xdd, 0xbd, 0x5f, 0x9b, 0xa1, 0xbe,
-  0x67, 0x80, 0xb8, 0x3d, 0x77, 0x4f, 0xd4, 0xbc, 0xc9, 0x54, 0xba, 0x3e,
-  0x1c, 0x0e, 0x20, 0xbd, 0xf5, 0x0c, 0x5f, 0x3e, 0x76, 0xbf, 0xfb, 0xbd,
-  0xfd, 0xe5, 0xcf, 0xbd, 0xe5, 0xa7, 0x8a, 0xbe, 0x3e, 0x47, 0x5a, 0xbd,
-  0x27, 0x5e, 0xe8, 0x3c, 0x4d, 0x54, 0xfc, 0x3c, 0x0b, 0x66, 0x4e, 0x3d,
-  0x4f, 0x28, 0x98, 0x3d, 0x15, 0x91, 0x87, 0xbd, 0x57, 0x09, 0x44, 0xbd,
-  0x98, 0xb6, 0x34, 0xbe, 0xe5, 0x89, 0x9e, 0x3d, 0xdf, 0x9a, 0xe4, 0x3b,
-  0xb6, 0x3c, 0x2c, 0x3e, 0x1f, 0xe0, 0x7a, 0x3d, 0xab, 0xa4, 0x1a, 0x3e,
-  0xea, 0x68, 0xdd, 0xbd, 0x60, 0x6a, 0xed, 0xbd, 0xf5, 0x22, 0x37, 0xbe,
-  0x93, 0x1c, 0x81, 0x3e, 0xda, 0xdd, 0x2f, 0x3e, 0xfd, 0x91, 0x0b, 0xbc,
-  0x9a, 0xce, 0xfc, 0xbd, 0x3a, 0x51, 0xf7, 0x3d, 0xe5, 0x05, 0x96, 0x3e,
-  0x96, 0x11, 0x9f, 0xbd, 0x69, 0x79, 0xca, 0xbd, 0x6b, 0x20, 0x0d, 0x3d,
-  0x83, 0x7f, 0x35, 0x3d, 0xce, 0x14, 0x5e, 0x3a, 0x37, 0xce, 0x5e, 0xbd,
-  0xc5, 0xf1, 0x35, 0x3e, 0xc4, 0x9b, 0xc4, 0xbd, 0x85, 0xbc, 0x4b, 0xbe,
-  0x89, 0x78, 0x9a, 0xbd, 0xcc, 0x0f, 0x96, 0x3e, 0xda, 0xe4, 0xee, 0xbd,
-  0x7d, 0x4b, 0x7a, 0xbd, 0xb9, 0xc3, 0x0e, 0x3e, 0x6c, 0x9a, 0xbb, 0xbd,
-  0xd0, 0xa2, 0x11, 0xbf, 0x00, 0xe5, 0x7a, 0xbe, 0xae, 0x1b, 0xd3, 0xbd,
-  0x82, 0x2f, 0x48, 0x3d, 0xb3, 0x89, 0x4e, 0xbe, 0xd8, 0x30, 0x26, 0xbd,
-  0xff, 0xa7, 0x03, 0x3e, 0xff, 0x72, 0x80, 0xbe, 0xf2, 0xe6, 0x90, 0xbe,
-  0x44, 0xf8, 0x94, 0x3d, 0x3b, 0xe8, 0x8d, 0xbd, 0x09, 0xc6, 0x94, 0xbe,
-  0x78, 0xfe, 0x78, 0x3d, 0x1a, 0x39, 0x44, 0xbe, 0xc2, 0x92, 0xf8, 0x3b,
-  0x76, 0x1f, 0x18, 0xbe, 0x4b, 0xfb, 0xbe, 0xbd, 0xdc, 0x05, 0x18, 0x3f,
-  0x3f, 0x5a, 0x93, 0xbe, 0x8c, 0xec, 0x94, 0xbd, 0x80, 0x00, 0x7b, 0xbd,
-  0x83, 0x0d, 0x01, 0xbe, 0x88, 0x9a, 0x86, 0x3d, 0xae, 0x82, 0x25, 0xbe,
-  0xe0, 0xc3, 0xe3, 0xbd, 0x80, 0xd8, 0x1a, 0xbe, 0xb9, 0x65, 0x9c, 0xbe,
-  0x31, 0xae, 0x3d, 0xbe, 0x02, 0xa7, 0xfb, 0xbd, 0x1c, 0xf6, 0x85, 0xbe,
-  0xe7, 0xe5, 0x56, 0x3d, 0xc4, 0xc3, 0x4b, 0x3e, 0x61, 0xca, 0x8f, 0xbe,
-  0x41, 0xca, 0x0d, 0xbe, 0x71, 0x61, 0x85, 0xbe, 0x23, 0xcf, 0x05, 0x3d,
-  0xe9, 0x93, 0xc8, 0xbd, 0x8a, 0xc2, 0xda, 0xbe, 0xdb, 0xbd, 0x0c, 0x3d,
-  0x48, 0x7f, 0x5a, 0xbf, 0x79, 0x35, 0xbb, 0xbe, 0xe7, 0x31, 0x20, 0xbe,
-  0x81, 0x36, 0x84, 0x3e, 0x36, 0x72, 0x1e, 0xbe, 0xd1, 0x0b, 0x56, 0xbd,
-  0x92, 0xc1, 0x06, 0x3c, 0xab, 0x4d, 0x91, 0xbd, 0xe1, 0x1c, 0x1f, 0xbd,
-  0xf7, 0x66, 0x72, 0x3e, 0x34, 0xbf, 0x57, 0x3c, 0xb9, 0x6d, 0xf9, 0x3d,
-  0xec, 0xb4, 0xfe, 0xbc, 0xc1, 0x36, 0x5d, 0x3d, 0xef, 0x44, 0x2b, 0x3d,
-  0xe3, 0x49, 0x80, 0xbc, 0xa4, 0xe2, 0x60, 0xbd, 0x16, 0xb8, 0xa9, 0xbc,
-  0x1d, 0x4e, 0xa5, 0xbd, 0xe4, 0x9f, 0x54, 0x3e, 0x0f, 0xe1, 0x25, 0xbd,
-  0xbf, 0x92, 0xe2, 0x3d, 0xaa, 0x39, 0x38, 0x3d, 0xb7, 0x42, 0xe7, 0x3d,
-  0x3d, 0x38, 0x4a, 0x3d, 0x73, 0xbc, 0x52, 0xbe, 0xed, 0xb3, 0x24, 0xbe,
-  0xba, 0x9a, 0xdd, 0xbe, 0xed, 0xfc, 0xa6, 0x3d, 0xf1, 0xb5, 0x0a, 0x3e,
-  0x0d, 0x25, 0x15, 0xbd, 0xc1, 0xce, 0xed, 0xbd, 0xd8, 0x5d, 0x5d, 0xbd,
-  0x2d, 0x15, 0x52, 0xbe, 0xa9, 0x58, 0x4b, 0xbe, 0x1c, 0x97, 0x9f, 0x3e,
-  0x4d, 0x40, 0xba, 0xbd, 0x41, 0xba, 0x8a, 0x3d, 0xb8, 0x8d, 0x34, 0xbe,
-  0x04, 0x75, 0xc9, 0xbd, 0x5e, 0x58, 0x99, 0xbd, 0xe0, 0xed, 0x47, 0xbe,
-  0x83, 0xf7, 0x93, 0xbc, 0x67, 0x34, 0x49, 0xbe, 0x10, 0xbc, 0x5e, 0xbe,
-  0x3c, 0xa1, 0xa6, 0x3d, 0x7d, 0xaf, 0x82, 0xbd, 0xdf, 0xf9, 0x34, 0xbd,
-  0x5c, 0x02, 0x82, 0xbd, 0xb7, 0x0d, 0x90, 0xbd, 0x64, 0x2e, 0xd4, 0xbd,
-  0x82, 0xea, 0xb3, 0xbd, 0x0d, 0xdd, 0x89, 0xbd, 0xf2, 0x85, 0xa4, 0x39,
-  0x03, 0x77, 0xd3, 0xbd, 0x43, 0x9a, 0xbf, 0xbd, 0xce, 0xa8, 0xa7, 0xbd,
-  0xa9, 0x42, 0x38, 0xbd, 0xe0, 0x11, 0x7e, 0xbd, 0x11, 0x56, 0x33, 0x3e,
-  0x06, 0x51, 0x0f, 0xbd, 0x1c, 0x88, 0xf8, 0xbc, 0xf1, 0x03, 0xb2, 0x3e,
-  0xdb, 0x70, 0x38, 0x3d, 0x72, 0x68, 0x71, 0xbd, 0x24, 0x2f, 0x01, 0xbd,
-  0x5e, 0xc0, 0x37, 0x3d, 0x0e, 0xc6, 0xae, 0x3e, 0x80, 0x25, 0x2a, 0x3e,
-  0x17, 0xee, 0x35, 0xbe, 0x58, 0x77, 0x22, 0x3c, 0xb0, 0x2b, 0x71, 0x3d,
-  0x5e, 0x6f, 0x07, 0x3e, 0x61, 0x0b, 0x16, 0xbd, 0x49, 0x56, 0x8b, 0x3d,
-  0x40, 0x4d, 0x83, 0xbe, 0x03, 0x90, 0x24, 0x3e, 0x90, 0x49, 0x15, 0xbd,
-  0x65, 0xa5, 0xd2, 0xbd, 0x1e, 0x47, 0x60, 0x3e, 0x4e, 0x30, 0xa2, 0xbd,
-  0x5a, 0xc3, 0xe6, 0x3c, 0x13, 0xd6, 0x00, 0x3e, 0x4e, 0x66, 0x35, 0xbe,
-  0x8f, 0xb9, 0xc1, 0xbd, 0xd1, 0x6f, 0x90, 0x3e, 0x15, 0x80, 0x38, 0xbe,
-  0xa1, 0x60, 0x37, 0xbe, 0x6b, 0x42, 0x03, 0xbe, 0x1e, 0xf1, 0x11, 0xbd,
-  0x15, 0xf1, 0x0d, 0xbd, 0x92, 0x64, 0x37, 0xbe, 0xba, 0x45, 0x42, 0xbc,
-  0xa3, 0x48, 0x3a, 0x3e, 0x26, 0x58, 0x4a, 0xbe, 0xa8, 0x08, 0x9b, 0xbe,
-  0x04, 0x3a, 0xf8, 0xbd, 0xa7, 0x3d, 0x2f, 0xbd, 0x9f, 0x78, 0xd9, 0xbd,
-  0xc0, 0x6b, 0xac, 0x3d, 0x8c, 0x68, 0xd9, 0xbb, 0x33, 0x7b, 0xf5, 0xbd,
-  0x61, 0xeb, 0xd6, 0xbd, 0xf5, 0x3d, 0xe8, 0xbd, 0x0d, 0x30, 0xdc, 0xbd,
-  0x5e, 0xcf, 0x5e, 0xbc, 0x32, 0x0e, 0x2b, 0x3d, 0x46, 0xad, 0x2b, 0x3c,
-  0x19, 0x91, 0x17, 0xbe, 0x31, 0x1c, 0x28, 0xbd, 0xfc, 0xe5, 0x40, 0xbc,
-  0x76, 0xe8, 0x1e, 0xbe, 0x00, 0x7f, 0xe1, 0xbc, 0x8f, 0xc2, 0xa9, 0x3d,
-  0xd1, 0x05, 0x16, 0xbc, 0x94, 0xf8, 0x0f, 0x3e, 0xec, 0x92, 0x05, 0xbe,
-  0x5d, 0xc2, 0x7f, 0x3d, 0x39, 0xdb, 0x83, 0xbc, 0xef, 0x1e, 0x27, 0xbe,
-  0x70, 0xa7, 0xed, 0x3d, 0xc8, 0x28, 0x87, 0x3d, 0x95, 0xd5, 0x17, 0xbc,
-  0x34, 0xba, 0xba, 0x3d, 0x47, 0xdf, 0xe5, 0xbd, 0x99, 0xa7, 0x70, 0x3e,
-  0x05, 0x82, 0x59, 0x3d, 0x3a, 0x54, 0x01, 0xbe, 0xbb, 0x90, 0xa4, 0x3e,
-  0x8b, 0x70, 0x82, 0x3d, 0x85, 0xf1, 0x3a, 0x3c, 0x13, 0xd2, 0xb8, 0xbb,
-  0xd4, 0x79, 0x67, 0xbd, 0xe7, 0x66, 0x04, 0xbf, 0x00, 0x2a, 0xd4, 0xbd,
-  0xef, 0xb8, 0xe8, 0x3d, 0x34, 0xc7, 0x37, 0xbf, 0x28, 0x13, 0x82, 0xbd,
-  0x18, 0x6f, 0x8c, 0xbd, 0x5e, 0x9b, 0x8c, 0x3d, 0x0d, 0x39, 0x3d, 0xba,
-  0x1c, 0x41, 0x40, 0xbf, 0x0d, 0x81, 0xbf, 0xbc, 0xcc, 0x20, 0x88, 0xbd,
-  0x9e, 0x17, 0x32, 0xbf, 0xf5, 0x2c, 0xbb, 0xbc, 0xdf, 0x7c, 0x88, 0x3e,
-  0xbc, 0xfa, 0x77, 0x3d, 0x09, 0x39, 0x47, 0x3d, 0xc2, 0x01, 0x6e, 0xbf,
-  0xa1, 0x93, 0x46, 0xbe, 0xf5, 0x92, 0x9f, 0xbc, 0xc0, 0x5e, 0x02, 0xbf,
-  0x74, 0x33, 0xab, 0x3d, 0x0d, 0x66, 0x5d, 0x3d, 0x02, 0x39, 0xbc, 0xbc,
-  0xbe, 0x1d, 0x2a, 0x3d, 0x6d, 0x7b, 0x55, 0xbf, 0x34, 0xff, 0x4b, 0xbe,
-  0xba, 0x10, 0x22, 0x3e, 0xdb, 0x9f, 0xf8, 0xbe, 0x6d, 0x59, 0x64, 0x3e,
-  0x6c, 0x3f, 0x62, 0x3d, 0x11, 0xf8, 0x83, 0xbb, 0xb8, 0xf2, 0xf2, 0xbd,
-  0xe1, 0xe8, 0xb1, 0xbc, 0xa0, 0xec, 0xfb, 0x3c, 0x06, 0x18, 0xb9, 0xbb,
-  0x57, 0xb4, 0xf2, 0x3d, 0xb9, 0xd2, 0x24, 0xbe, 0x8e, 0x77, 0x84, 0xbd,
-  0x45, 0xf8, 0x60, 0x3d, 0x4a, 0x83, 0x90, 0x3e, 0xee, 0x8d, 0xab, 0x3d,
-  0x53, 0x05, 0xfc, 0x3d, 0xf3, 0xe0, 0x07, 0x3d, 0x82, 0x2c, 0xec, 0x3d,
-  0x4c, 0x82, 0x5a, 0xbd, 0x6b, 0x30, 0xa1, 0xbd, 0x27, 0x70, 0x39, 0x3e,
-  0x7e, 0xb3, 0x05, 0xbd, 0xe3, 0x2f, 0xf0, 0x3d, 0xc0, 0xdb, 0x7e, 0xbf,
-  0xaf, 0xb2, 0xec, 0x3c, 0x4d, 0xe6, 0x7f, 0xbf, 0x60, 0xb0, 0xb0, 0xbe,
-  0xa0, 0x89, 0xe1, 0xbd, 0xde, 0xdf, 0x65, 0xbb, 0xdf, 0xe8, 0xd4, 0xbd,
-  0x33, 0xb0, 0x07, 0xbd, 0x65, 0xfe, 0x8d, 0x3d, 0xfc, 0xa0, 0xe5, 0x3c,
-  0x5d, 0x9e, 0xc6, 0xbf, 0x48, 0x58, 0x1d, 0xbf, 0xde, 0x0a, 0x1b, 0xbd,
-  0x3e, 0x08, 0x84, 0xbd, 0x4e, 0x3c, 0x90, 0xbe, 0x7f, 0x92, 0x4b, 0x3d,
-  0x97, 0x3a, 0xa6, 0x3e, 0x60, 0x7c, 0xd5, 0x3c, 0xf3, 0x8f, 0x0d, 0xbd,
-  0xe4, 0x0b, 0x16, 0x3e, 0x8e, 0x51, 0x8d, 0x3e, 0xb4, 0xab, 0x8c, 0xbd,
-  0x1c, 0x39, 0xb4, 0x3d, 0x3e, 0x15, 0x97, 0x3c, 0x40, 0x4e, 0x7d, 0xbd,
-  0x1f, 0x14, 0x49, 0x3c, 0x7b, 0x76, 0x19, 0x3c, 0xf0, 0x8e, 0xe2, 0xbc,
-  0xbf, 0x43, 0x2f, 0xbd, 0x50, 0x19, 0x40, 0xbe, 0x1d, 0x4b, 0x08, 0x3d,
-  0x35, 0x31, 0xd6, 0x3b, 0xf5, 0x60, 0x73, 0xbe, 0xd1, 0x7e, 0x1e, 0x3d,
-  0xdc, 0xcb, 0x01, 0xbd, 0xf4, 0x76, 0x43, 0xbe, 0xf4, 0xf1, 0xaa, 0xbd,
-  0x1a, 0x7c, 0x61, 0xbe, 0x5c, 0xe5, 0xe3, 0x3c, 0xca, 0x20, 0x1b, 0xbd,
-  0x70, 0x90, 0xba, 0xbe, 0x85, 0xce, 0x8e, 0xbc, 0x65, 0x12, 0x92, 0xbe,
-  0x43, 0x76, 0x95, 0xbe, 0x8e, 0xb3, 0xe0, 0xbd, 0xce, 0xd7, 0x67, 0xbd,
-  0xf4, 0x59, 0x75, 0x3d, 0x63, 0x60, 0x48, 0xbd, 0x9e, 0x67, 0xb1, 0xbd,
-  0xdd, 0x99, 0x7b, 0xbd, 0x11, 0x04, 0x11, 0xbd, 0xcb, 0x62, 0x0a, 0xbd,
-  0x0a, 0x96, 0x3d, 0x3d, 0x89, 0xed, 0xb5, 0xbc, 0x4a, 0x5a, 0x5c, 0x3e,
-  0x4d, 0x75, 0x42, 0x3c, 0xe6, 0x3d, 0x95, 0xbd, 0x30, 0x6e, 0x76, 0xbc,
-  0x46, 0x76, 0x9c, 0xbd, 0xc1, 0xde, 0x3f, 0xbc, 0x61, 0xde, 0x86, 0x3d,
-  0xda, 0x7a, 0x42, 0xbd, 0x3c, 0x07, 0x40, 0xbb, 0xd2, 0xda, 0x6c, 0x3d,
-  0xda, 0x02, 0x8a, 0xbd, 0x8b, 0x01, 0x9b, 0xbd, 0xad, 0x5f, 0x96, 0xbd,
-  0xa9, 0xa1, 0x22, 0xbc, 0x86, 0x14, 0x8d, 0xbd, 0xe2, 0x5a, 0x2b, 0xbd,
-  0xcf, 0x71, 0x1c, 0xbd, 0x6b, 0x79, 0xe8, 0x3b, 0xb1, 0x03, 0x0f, 0xbd,
-  0xb2, 0xcf, 0xc0, 0x3e, 0x69, 0xbf, 0x93, 0xbd, 0x90, 0x93, 0xcb, 0xbb,
-  0xb3, 0x02, 0x63, 0x3e, 0xcd, 0x01, 0xfd, 0xbc, 0xee, 0x45, 0x89, 0xbd,
-  0xda, 0x2c, 0x3f, 0x3f, 0xe6, 0xf6, 0x50, 0xbe, 0x93, 0x38, 0xd9, 0xbd,
-  0x07, 0x39, 0xe3, 0xbe, 0x25, 0xe0, 0x14, 0xbd, 0xab, 0xcf, 0x9a, 0x3d,
-  0x8a, 0xc9, 0x22, 0xbe, 0x71, 0x67, 0x9a, 0xbe, 0x4b, 0x9b, 0x9a, 0xbd,
-  0x65, 0xcf, 0x3c, 0xbe, 0x21, 0xef, 0xa7, 0xbe, 0xaf, 0x6e, 0x2c, 0xbe,
-  0x00, 0xc9, 0x3e, 0x3e, 0xa1, 0xcf, 0xf3, 0xbc, 0x6a, 0xfe, 0xc4, 0xbc,
-  0x9c, 0x98, 0xd9, 0xbd, 0xa8, 0xd5, 0x15, 0xbe, 0xef, 0x18, 0x0c, 0xbe,
-  0xcb, 0x6c, 0x7d, 0xbe, 0x1e, 0xdc, 0xee, 0x3d, 0xba, 0x96, 0x90, 0x3b,
-  0xa4, 0x11, 0xad, 0xbd, 0xac, 0x6f, 0x2a, 0x3e, 0x79, 0xbb, 0xeb, 0x3d,
-  0x4f, 0xf4, 0xa5, 0xbe, 0xff, 0x82, 0xa1, 0xbd, 0x6f, 0x36, 0x0e, 0xbe,
-  0xb8, 0xe2, 0xaf, 0x3d, 0xed, 0x05, 0x26, 0xbd, 0x51, 0x5e, 0x0a, 0xbe,
-  0x48, 0xaa, 0x18, 0x3d, 0x37, 0x93, 0x10, 0x3e, 0xd7, 0x45, 0xbd, 0xbc,
-  0x16, 0x89, 0xcf, 0x3d, 0xd3, 0x17, 0x44, 0x3e, 0xdf, 0x89, 0x6e, 0xbd,
-  0xb1, 0xc1, 0x9e, 0xbe, 0x9f, 0x9c, 0x48, 0xbe, 0x4f, 0x84, 0x6b, 0xbd,
-  0xae, 0xed, 0x1b, 0x3e, 0x48, 0xb3, 0xef, 0xbc, 0x33, 0xb1, 0x9a, 0x3d,
-  0x00, 0xf4, 0xfe, 0x3a, 0x45, 0x6e, 0x93, 0xbe, 0xfa, 0xcd, 0x18, 0xbe,
-  0x0c, 0x06, 0x8f, 0xbd, 0xb9, 0x78, 0xda, 0xbc, 0x3a, 0x7f, 0x61, 0xbe,
-  0xf4, 0x3e, 0x85, 0x3e, 0x92, 0x54, 0xc6, 0xbc, 0x92, 0xa3, 0xb2, 0x3d,
-  0xda, 0xd8, 0x3f, 0xbe, 0x8f, 0x01, 0x43, 0x3e, 0xf1, 0x63, 0x0a, 0xbc,
-  0xeb, 0xa8, 0x11, 0xbd, 0x48, 0xbe, 0xf0, 0xbe, 0xb6, 0x72, 0x25, 0x3e,
-  0xe3, 0x2b, 0xcb, 0xbd, 0x55, 0x0e, 0x49, 0xbd, 0xd3, 0x04, 0x9f, 0x3c,
-  0xf0, 0x94, 0xfa, 0x3d, 0x9e, 0xf1, 0x4e, 0x3c, 0xe4, 0x06, 0x12, 0xbe,
-  0x13, 0x9b, 0x0b, 0xbe, 0xe3, 0x59, 0x36, 0xbe, 0x0f, 0x2a, 0x0d, 0x3c,
-  0x26, 0x58, 0xef, 0xbd, 0x44, 0xe0, 0x20, 0xbe, 0xcd, 0xd4, 0x1c, 0xbe,
-  0x2b, 0x21, 0xa2, 0xbd, 0x90, 0x76, 0x01, 0x3e, 0x13, 0xac, 0xfe, 0x3d,
-  0x70, 0xec, 0xb9, 0xbe, 0x44, 0xde, 0x71, 0x3e, 0x04, 0xdd, 0x56, 0x3c,
-  0xe1, 0xb1, 0x1b, 0xbe, 0xf5, 0x9e, 0xf3, 0xbd, 0x4b, 0xe2, 0xa6, 0xbc,
-  0xc4, 0x85, 0xdc, 0xbd, 0xe0, 0x04, 0x24, 0x3e, 0x19, 0x78, 0x6e, 0xbe,
-  0x5a, 0x56, 0xc2, 0x3d, 0x05, 0x9e, 0x4a, 0xbd, 0x89, 0x0e, 0x60, 0xbe,
-  0x76, 0xd1, 0x3a, 0xbd, 0x69, 0xda, 0x15, 0x3d, 0x77, 0x91, 0xb2, 0xbe,
-  0xbc, 0x64, 0xe1, 0xbd, 0x41, 0x8e, 0x86, 0xbd, 0x95, 0xf4, 0xb0, 0x3d,
-  0xca, 0x6b, 0xd2, 0x3e, 0x85, 0xd7, 0xe5, 0xbd, 0x5b, 0x41, 0xb9, 0x3b,
-  0xf3, 0xe7, 0x5d, 0x3d, 0x88, 0x1b, 0xd4, 0xbe, 0x41, 0xf2, 0x44, 0xbc,
-  0x1a, 0xc0, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-  0x74, 0xf0, 0x28, 0xc0, 0xa1, 0x7a, 0x83, 0x3c, 0xde, 0x1a, 0x9d, 0xc0,
-  0x0a, 0x33, 0xac, 0xbf, 0x7e, 0x4e, 0xb6, 0xbe, 0x86, 0xa2, 0xe1, 0xbf,
-  0x43, 0xc5, 0x21, 0xbf, 0xd3, 0xea, 0x4d, 0xbf, 0x46, 0xc0, 0xff, 0xff,
-  0x04, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x51, 0x65, 0x08, 0xbe,
-  0x6a, 0x72, 0x8d, 0xbe, 0x28, 0xac, 0x14, 0x3d, 0xbb, 0xe8, 0xab, 0xbc,
-  0x08, 0xce, 0xe4, 0x3d, 0x97, 0x8a, 0x3f, 0x3d, 0xb9, 0xb3, 0x98, 0xbd,
-  0xff, 0xb0, 0x91, 0xbe, 0x04, 0x0e, 0x86, 0x3c, 0xa3, 0xc9, 0xc5, 0xbd,
-  0xe2, 0x83, 0x5e, 0xbc, 0x68, 0xd2, 0xfd, 0x3a, 0x90, 0x76, 0xa7, 0xbd,
-  0x75, 0x80, 0xcc, 0xbd, 0x7d, 0xe2, 0xa4, 0xbc, 0x60, 0x60, 0xb7, 0x3d,
-  0x87, 0x7d, 0xe6, 0xbd, 0x02, 0xed, 0xf7, 0xbe, 0x7f, 0xb1, 0xa5, 0x3b,
-  0x72, 0x82, 0xfd, 0xbd, 0x3a, 0x88, 0x8f, 0x3e, 0x59, 0x93, 0x3e, 0x3d,
-  0xc7, 0xa3, 0xf5, 0xbe, 0xfe, 0xdb, 0x79, 0xbe, 0x20, 0x7c, 0x2d, 0xbd,
-  0xcb, 0x1e, 0x08, 0x3e, 0x08, 0x4e, 0x8c, 0xbe, 0x5f, 0x57, 0x0f, 0xbe,
-  0xf7, 0xe0, 0x48, 0xbe, 0xd5, 0x16, 0xa7, 0xbe, 0x30, 0xea, 0x47, 0xbd,
-  0x3e, 0xe5, 0x01, 0x3e, 0x28, 0xbb, 0x9a, 0xbe, 0xa4, 0x1a, 0xae, 0xbe,
-  0x19, 0x07, 0x00, 0x3c, 0x6f, 0x9b, 0xe0, 0xbd, 0x27, 0xba, 0x71, 0x3e,
-  0x86, 0xa0, 0xa7, 0x3c, 0x95, 0xca, 0xf8, 0xbc, 0x5b, 0xf5, 0xe0, 0xbe,
-  0xfb, 0x51, 0x8c, 0xbb, 0x9e, 0x2f, 0x52, 0x3d, 0x11, 0x1d, 0x07, 0xbe,
-  0xed, 0xea, 0x0e, 0xbf, 0x52, 0x0f, 0xa3, 0xbe, 0x16, 0xd8, 0xb0, 0xbe,
-  0xba, 0xf0, 0x85, 0xbd, 0x60, 0xc0, 0x31, 0x3e, 0xce, 0x94, 0xb5, 0x3d,
-  0xd9, 0x06, 0xc2, 0xbd, 0xdd, 0xd7, 0x67, 0x3c, 0x5c, 0x4b, 0x85, 0xbe,
-  0x62, 0xc3, 0x0b, 0x3e, 0x8a, 0xb4, 0x84, 0xbc, 0xa7, 0x1c, 0xe3, 0xbd,
-  0xc4, 0x5f, 0x1f, 0xbf, 0x5c, 0xf8, 0x2d, 0xbc, 0x71, 0xad, 0x53, 0x3d,
-  0xab, 0xd8, 0x45, 0xbe, 0xb4, 0x7d, 0xee, 0xbe, 0x17, 0x02, 0x9b, 0xbe,
-  0x71, 0x8b, 0x32, 0xbe, 0xca, 0x90, 0x82, 0xbe, 0xa6, 0xa0, 0x54, 0x3d,
-  0xbc, 0xd1, 0x1e, 0x3b, 0xbb, 0xb7, 0x21, 0x3d, 0x63, 0x9b, 0x25, 0x3c,
-  0x33, 0xf6, 0x80, 0xbe, 0xc0, 0x78, 0xd8, 0xbd, 0xd1, 0xe1, 0x85, 0x3d,
-  0x79, 0xc6, 0x4a, 0xbe, 0xf2, 0x30, 0xa5, 0xbe, 0x4a, 0x82, 0x5f, 0xbc,
-  0x6d, 0xed, 0x27, 0xbe, 0x73, 0x70, 0x5d, 0xbd, 0x6c, 0xc1, 0x8b, 0xbe,
-  0x8b, 0x5b, 0xd0, 0xbe, 0xa7, 0x56, 0xc6, 0xbe, 0x8b, 0xe3, 0xcd, 0xbd,
-  0x21, 0x4a, 0x0a, 0xbf, 0x1e, 0x33, 0xbf, 0xbd, 0x2c, 0x16, 0x8f, 0xbe,
-  0x4f, 0xdf, 0x3e, 0x3c, 0x6c, 0x99, 0x6e, 0xbe, 0x2d, 0xa3, 0x23, 0xbf,
-  0xe4, 0x4a, 0x7f, 0xbd, 0x8b, 0xb0, 0x47, 0x3d, 0xa3, 0x4b, 0x06, 0xbf,
-  0xec, 0x51, 0x9d, 0x3d, 0x6f, 0xff, 0xad, 0xbe, 0x2b, 0x11, 0x6c, 0xbe,
-  0xe1, 0x3b, 0xc7, 0xbe, 0x88, 0x1c, 0x8b, 0xbe, 0xb9, 0x83, 0xe6, 0xbd,
-  0xd8, 0xb8, 0xf9, 0xbd, 0xf1, 0x65, 0x39, 0xbf, 0x6c, 0x6f, 0x4a, 0x3c,
-  0xaa, 0x1c, 0x81, 0xbe, 0xc8, 0x75, 0x2f, 0xbc, 0x48, 0x70, 0xe6, 0xbc,
-  0x85, 0x30, 0x25, 0xbe, 0x66, 0x16, 0x10, 0xbe, 0xda, 0x33, 0x2e, 0xbd,
-  0x8e, 0xf8, 0xa7, 0x3c, 0x93, 0x7d, 0x3b, 0x3d, 0x9e, 0x08, 0xeb, 0xbe,
-  0xfd, 0x39, 0x1b, 0x3e, 0xce, 0xd4, 0xbd, 0x3d, 0xca, 0x9a, 0x25, 0xbe,
-  0x0f, 0x1b, 0x56, 0xbe, 0x78, 0x2b, 0x3e, 0xbd, 0xee, 0xcd, 0x5c, 0xbe,
-  0xe1, 0xbf, 0x4a, 0x3e, 0x29, 0x66, 0xd6, 0x3d, 0xad, 0x7c, 0x7e, 0x3d,
-  0xf4, 0x45, 0xe1, 0xbc, 0xf3, 0x16, 0x72, 0x3e, 0xa5, 0x39, 0x22, 0xbd,
-  0x09, 0x13, 0x12, 0xbd, 0x0c, 0xa7, 0xa3, 0x3c, 0x77, 0x0c, 0xa0, 0x3d,
-  0xfb, 0x86, 0x83, 0xbe, 0xf9, 0x02, 0x13, 0x3d, 0x14, 0xf6, 0x10, 0x3e,
-  0x2c, 0xd6, 0x83, 0xbb, 0x67, 0xda, 0x29, 0x3d, 0x2b, 0x11, 0xb6, 0xbd,
-  0x1c, 0xb6, 0x96, 0xbc, 0x1a, 0xba, 0x47, 0x3e, 0x86, 0x86, 0x49, 0x3e,
-  0xc1, 0x7f, 0xc5, 0x3c, 0x1c, 0xec, 0xb3, 0x3c, 0x6f, 0xc9, 0xb2, 0x3d,
-  0x7b, 0xfd, 0x7c, 0x3c, 0x5b, 0x57, 0x51, 0x3d, 0xd1, 0x90, 0xf2, 0x3d,
-  0xe2, 0x1f, 0x8e, 0xbd, 0x94, 0x45, 0xe2, 0xbd, 0x4f, 0x30, 0x38, 0x3d,
-  0x0e, 0xfb, 0x70, 0x3d, 0xcf, 0x37, 0x09, 0x3d, 0xbd, 0xed, 0xb4, 0xba,
-  0x83, 0x49, 0x89, 0xbc, 0xf3, 0xc2, 0x36, 0xbd, 0xf6, 0x2e, 0x4b, 0x3d,
-  0x55, 0x1b, 0x4f, 0x3d, 0x01, 0xa4, 0x08, 0xbe, 0x6b, 0x3b, 0x6d, 0x3c,
-  0x3c, 0xc8, 0x2e, 0x3e, 0x7e, 0x71, 0xd9, 0xbd, 0x35, 0x82, 0xe3, 0x3c,
-  0xea, 0xcd, 0x15, 0x3d, 0x7e, 0xda, 0x5d, 0xbe, 0xb3, 0x5e, 0x8f, 0x3c,
-  0x19, 0x9b, 0x17, 0x3e, 0xf5, 0x9c, 0x17, 0x3e, 0x64, 0x8d, 0xd2, 0xbd,
-  0xfe, 0x03, 0xd2, 0x38, 0x78, 0x67, 0x1e, 0xbd, 0x1e, 0x84, 0x49, 0xbe,
-  0x01, 0x92, 0xf5, 0xbb, 0xa8, 0x84, 0x64, 0x3d, 0x12, 0x45, 0x94, 0x3c,
-  0xbe, 0xa5, 0x2b, 0xbc, 0x95, 0xc6, 0x95, 0xbd, 0xde, 0xce, 0x66, 0xbd,
-  0xbd, 0x15, 0x5e, 0xbd, 0xe1, 0x2d, 0xcb, 0xbe, 0x6b, 0x78, 0xdc, 0x3d,
-  0xb2, 0xb9, 0xaf, 0x3c, 0xe9, 0x5b, 0xcd, 0x3d, 0xee, 0xdb, 0x7a, 0x3d,
-  0x54, 0x07, 0x3b, 0x3d, 0xd6, 0xc4, 0x3d, 0xbe, 0x3a, 0x39, 0x0f, 0xbe,
-  0x66, 0x97, 0xe3, 0xbd, 0xfc, 0x19, 0x9a, 0xbd, 0x6e, 0x94, 0x2c, 0xbe,
-  0x86, 0x80, 0x05, 0x3e, 0x84, 0xc6, 0x3b, 0xbc, 0xae, 0xea, 0xc2, 0x3d,
-  0x8c, 0xf1, 0xb5, 0x3d, 0x80, 0xe5, 0x5f, 0xbd, 0xe6, 0x67, 0xc1, 0x3d,
-  0xad, 0x07, 0xb3, 0x3c, 0x64, 0xb5, 0xf6, 0x3d, 0x34, 0xdd, 0xb1, 0x3e,
-  0xb2, 0xa3, 0xb6, 0xbc, 0x85, 0x88, 0x2c, 0x3e, 0x50, 0x4a, 0xc0, 0xbe,
-  0xbb, 0x1d, 0xd1, 0xbd, 0x71, 0x41, 0xd4, 0x3b, 0x80, 0xf4, 0x8b, 0xbd,
-  0x28, 0x87, 0x11, 0x3e, 0xee, 0x2e, 0xc6, 0xbe, 0xca, 0x0d, 0x0a, 0x3d,
-  0xbd, 0x9e, 0x89, 0x3d, 0x74, 0x21, 0xaa, 0xbd, 0x31, 0x04, 0x29, 0x3d,
-  0xc1, 0xec, 0x3b, 0x3d, 0xa0, 0xdd, 0xe5, 0xbd, 0x2f, 0x36, 0xd0, 0x3d,
-  0xc2, 0xc1, 0xa6, 0x3e, 0x55, 0x83, 0xf1, 0x3c, 0x73, 0x10, 0x48, 0x3e,
-  0x88, 0x5a, 0xc5, 0xbc, 0x58, 0x8f, 0x33, 0x3d, 0x7f, 0x90, 0xd2, 0xbd,
-  0x48, 0xf8, 0x2e, 0x3e, 0x93, 0x63, 0xb2, 0xbe, 0xf3, 0x64, 0x9f, 0xbf,
-  0x3d, 0x5d, 0x74, 0xbe, 0x04, 0xac, 0x96, 0x3e, 0xad, 0x8d, 0xfc, 0xbd,
-  0x32, 0xe0, 0x65, 0xbe, 0x16, 0x66, 0x1d, 0x3d, 0x08, 0xf3, 0x62, 0x3c,
-  0xe5, 0x22, 0x15, 0x3f, 0x19, 0xc3, 0x9a, 0xbe, 0xf4, 0xee, 0x70, 0x3d,
-  0x38, 0xa6, 0x91, 0x3e, 0x07, 0xc3, 0xe3, 0xbd, 0x3c, 0x7d, 0x94, 0xbc,
-  0xe6, 0x5b, 0x0f, 0x3d, 0xd8, 0x13, 0x29, 0x3e, 0xec, 0x5f, 0xbb, 0x3e,
-  0xbc, 0x9f, 0x47, 0x3d, 0x65, 0xe7, 0x86, 0x3e, 0xec, 0xbe, 0x18, 0x3e,
-  0x00, 0xcc, 0x65, 0x3d, 0x74, 0x25, 0xa0, 0x3e, 0xbe, 0x34, 0x83, 0x3e,
-  0xb7, 0x29, 0xd4, 0x3b, 0x0f, 0xab, 0x83, 0x3b, 0x1d, 0xa0, 0xe0, 0x3e,
-  0xe6, 0x98, 0x49, 0x3e, 0x99, 0xa2, 0xda, 0x3e, 0xa0, 0x91, 0x91, 0x3c,
-  0xf3, 0xb5, 0x67, 0x3e, 0x2c, 0x0b, 0x10, 0x3e, 0xb8, 0x50, 0xc5, 0x3e,
-  0xa3, 0xbc, 0xb4, 0x3e, 0xe7, 0xab, 0x8c, 0xbd, 0xc3, 0x42, 0x52, 0x3d,
-  0x36, 0xc7, 0x96, 0x3e, 0x80, 0x65, 0x7d, 0x3e, 0x93, 0xb5, 0x61, 0xbe,
-  0x58, 0x0f, 0xc0, 0x3d, 0xa3, 0x92, 0xba, 0x39, 0x8c, 0xa3, 0x9b, 0xbd,
-  0x90, 0x35, 0xaa, 0x3e, 0x1a, 0xd9, 0x13, 0x3f, 0xa5, 0xac, 0x30, 0x3e,
-  0x40, 0x80, 0x65, 0xbe, 0x9f, 0xaa, 0xe4, 0x3d, 0x8d, 0x20, 0x2e, 0x3c,
-  0x2b, 0x0a, 0xb6, 0xbd, 0xed, 0x8b, 0x68, 0x3e, 0x5d, 0x45, 0x3a, 0xbc,
-  0xee, 0xda, 0x64, 0x3e, 0x35, 0x0f, 0xa7, 0x3d, 0x3e, 0x00, 0x5d, 0x3e,
-  0x4a, 0xcb, 0x91, 0xbd, 0x58, 0x88, 0x0c, 0xbe, 0xbd, 0x7d, 0xd7, 0xbb,
-  0x11, 0x70, 0xe2, 0xbd, 0x1e, 0x32, 0xc9, 0x3a, 0x1c, 0xa7, 0x6c, 0xbe,
-  0x7e, 0x60, 0xec, 0x3d, 0x8d, 0xc8, 0xa2, 0xbe, 0x20, 0x9b, 0x04, 0x3e,
-  0x51, 0xff, 0x84, 0xbe, 0xe6, 0x13, 0x96, 0x3c, 0x82, 0x00, 0x82, 0xbd,
-  0x5e, 0x93, 0x60, 0x3c, 0x7f, 0xa2, 0xe0, 0x3c, 0xee, 0x3e, 0x00, 0xbe,
-  0xbe, 0xd5, 0x85, 0x3e, 0xf6, 0xd6, 0xcb, 0x3d, 0x6d, 0x2f, 0x21, 0xbe,
-  0x31, 0x8c, 0x07, 0x3d, 0xa3, 0x68, 0x89, 0x3b, 0x01, 0x37, 0x1c, 0xbe,
-  0x34, 0x87, 0x98, 0xbd, 0xff, 0xae, 0x46, 0x3d, 0xf4, 0xc5, 0x91, 0xbe,
-  0xd0, 0xc9, 0x39, 0xbd, 0x2e, 0xa1, 0x8d, 0xbe, 0x3d, 0x09, 0x8a, 0xbe,
-  0x0d, 0x34, 0xc2, 0x3d, 0x59, 0x37, 0x83, 0x3c, 0xa2, 0x3d, 0x2f, 0x3e,
-  0x0a, 0x47, 0x22, 0xbe, 0x5f, 0xa9, 0x21, 0x3d, 0xa8, 0x57, 0xe9, 0xbb,
-  0x48, 0xf6, 0xe6, 0xbd, 0x1a, 0xa2, 0x79, 0x3c, 0xfc, 0x3a, 0x25, 0xbe,
-  0xff, 0x7c, 0x29, 0x3d, 0x3a, 0xe3, 0x14, 0xbc, 0xf2, 0xa1, 0xeb, 0x3b,
-  0xfa, 0xa1, 0x2f, 0xbe, 0xc3, 0xd9, 0x58, 0x3d, 0x17, 0x7b, 0xae, 0xbe,
-  0xb6, 0xb7, 0x88, 0xbe, 0x83, 0xc0, 0xdc, 0xbd, 0x4e, 0xe1, 0x26, 0xbb,
-  0x9b, 0x14, 0x5f, 0x3d, 0x03, 0x83, 0x6c, 0x3d, 0x2c, 0xac, 0x73, 0x3e,
-  0x97, 0x00, 0x20, 0xbe, 0x84, 0x57, 0x09, 0xbe, 0x7c, 0x88, 0xec, 0x3d,
-  0x92, 0xfa, 0x8a, 0x3d, 0xa4, 0xb5, 0x1d, 0xbd, 0x42, 0x73, 0xe6, 0xbc,
-  0x7a, 0x13, 0x93, 0x3d, 0xaf, 0x27, 0x8b, 0xbe, 0x55, 0x63, 0xa9, 0xbc,
-  0x87, 0x88, 0x6b, 0xbe, 0xea, 0x54, 0x79, 0xbe, 0x76, 0x8b, 0x2e, 0x3d,
-  0x5e, 0x9c, 0x5f, 0x3a, 0xb7, 0x02, 0xfe, 0x3d, 0xdc, 0xc1, 0xf6, 0xbc,
-  0xf8, 0x19, 0x3d, 0x3e, 0x14, 0x2b, 0x15, 0xbe, 0x04, 0x85, 0x47, 0xbe,
-  0xbb, 0xd1, 0x24, 0xbd, 0x2e, 0x79, 0x94, 0x3d, 0xb1, 0xd1, 0x10, 0xbe,
-  0xac, 0x70, 0x64, 0xbd, 0x7f, 0xa3, 0x7a, 0x3c, 0xf9, 0xd9, 0x2a, 0xbe,
-  0x36, 0x5a, 0x9d, 0x3d, 0xb0, 0xeb, 0x1b, 0xbd, 0x55, 0xe9, 0x56, 0xbe,
-  0x0b, 0x05, 0x22, 0xbe, 0xa1, 0xc7, 0x9e, 0xbd, 0x62, 0xaa, 0x0a, 0x39,
-  0x77, 0x8f, 0x86, 0xbe, 0xe4, 0x24, 0xe2, 0x3d, 0x67, 0xb9, 0xa0, 0xbd,
-  0xa2, 0xed, 0xdb, 0xbd, 0xd9, 0xde, 0x2a, 0x3d, 0xbb, 0x01, 0x9d, 0xba,
-  0x9b, 0xff, 0x10, 0x3d, 0xfb, 0xe6, 0xd3, 0x3c, 0xfc, 0x63, 0x2a, 0x3d,
-  0x44, 0x96, 0x1c, 0xbe, 0xbf, 0xdd, 0xb5, 0xbc, 0x7d, 0xca, 0x78, 0xbe,
-  0x34, 0x8b, 0x36, 0xbe, 0x93, 0xd1, 0x98, 0xbc, 0x79, 0xd8, 0xd6, 0x3b,
-  0x9c, 0xe9, 0x84, 0x3d, 0x76, 0xd6, 0x79, 0xbd, 0x52, 0x53, 0x3a, 0xbd,
-  0xdf, 0x15, 0x18, 0xbd, 0x31, 0x0e, 0x2a, 0xbe, 0xc4, 0xac, 0x19, 0x3d,
-  0xc7, 0x94, 0xa9, 0x3d, 0x2d, 0x01, 0x68, 0xbc, 0x05, 0x38, 0xbb, 0xbd,
-  0xd4, 0xe5, 0x4a, 0x3d, 0x82, 0x7e, 0x29, 0xbe, 0xb7, 0x4a, 0x0b, 0xbd,
-  0xae, 0x9a, 0x54, 0xbd, 0x0b, 0xf3, 0x7f, 0xbe, 0xfa, 0x90, 0x42, 0xbe,
-  0x89, 0x13, 0x09, 0xbe, 0x3c, 0x22, 0x41, 0x3d, 0x44, 0xa9, 0x86, 0xbe,
-  0x60, 0x6e, 0xda, 0x3d, 0x51, 0x3e, 0x1a, 0xbe, 0x88, 0x8f, 0x22, 0xbe,
-  0xc7, 0xbb, 0x4f, 0x3d, 0x20, 0xcf, 0x21, 0x3e, 0xc0, 0xfc, 0xdc, 0xbd,
-  0x5a, 0xe0, 0x13, 0xbe, 0x9f, 0xe7, 0x68, 0xbd, 0x67, 0x67, 0xa9, 0xbd,
-  0xe3, 0x54, 0x63, 0x3d, 0x2c, 0x90, 0x0e, 0xbe, 0x0e, 0xcf, 0x02, 0x3c,
-  0x6a, 0xf8, 0xf9, 0x3c, 0xff, 0x14, 0x6e, 0xbd, 0x73, 0x5b, 0x26, 0x3d,
-  0x1b, 0x87, 0xea, 0xbd, 0x78, 0x86, 0xb2, 0x3c, 0x55, 0x66, 0x06, 0xbe,
-  0x07, 0x7e, 0x10, 0xbe, 0x26, 0xf7, 0x7f, 0x3d, 0xcf, 0x1d, 0xfa, 0x3d,
-  0xb3, 0x3b, 0x40, 0xbe, 0x8c, 0x81, 0x3e, 0x3c, 0xfb, 0xa5, 0x06, 0xbe,
-  0xf0, 0x50, 0x78, 0xbd, 0xc1, 0x2e, 0x07, 0xbd, 0x70, 0x17, 0xb4, 0xbe,
-  0x21, 0xcf, 0xb5, 0xbd, 0x29, 0x3e, 0xd2, 0xbc, 0x54, 0x68, 0x32, 0xbe,
-  0x53, 0xb0, 0xbf, 0x3d, 0x5a, 0x5b, 0xba, 0x3d, 0x65, 0x81, 0xb1, 0xbd,
-  0xc5, 0xb8, 0x2a, 0xbe, 0x4e, 0x76, 0x65, 0xbe, 0x0d, 0x31, 0x17, 0xbc,
-  0x94, 0x64, 0x0b, 0x3e, 0xb2, 0x11, 0x37, 0xbe, 0x9b, 0xaa, 0xe0, 0xbc,
-  0xed, 0x6e, 0x2c, 0xbc, 0xf1, 0x73, 0x49, 0xbe, 0xff, 0x4c, 0x7d, 0xbc,
-  0x5c, 0xc4, 0xc3, 0xbd, 0xb5, 0x85, 0x01, 0xbe, 0xfa, 0x7d, 0xa8, 0xbd,
-  0x46, 0xf8, 0xfd, 0xbd, 0x48, 0x17, 0x7e, 0xbc, 0x7c, 0xf9, 0x74, 0xbe,
-  0xe9, 0x26, 0x02, 0xbc, 0x48, 0x50, 0x4b, 0xbd, 0xb3, 0x26, 0x63, 0xbe,
-  0x28, 0x92, 0xa2, 0x3d, 0xef, 0xa5, 0x8b, 0x3d, 0x48, 0xae, 0x0f, 0xbe,
-  0x59, 0xb5, 0x78, 0x3d, 0x50, 0xdc, 0xb1, 0x3d, 0x16, 0xc1, 0x3f, 0xbe,
-  0x5d, 0x98, 0x9d, 0xbc, 0xd3, 0x51, 0x04, 0x3e, 0x6e, 0xa4, 0x74, 0x3b,
-  0xe0, 0x11, 0xa8, 0x3e, 0xf7, 0xf2, 0x7b, 0xbd, 0xba, 0xb4, 0xfe, 0x3c,
-  0x1f, 0xd5, 0x49, 0xbc, 0x9d, 0x07, 0x66, 0x3e, 0x5c, 0x6e, 0xc3, 0xbb,
-  0x9c, 0x2d, 0x30, 0x3e, 0x3b, 0xb0, 0x96, 0x3d, 0x31, 0xf1, 0x22, 0xbe,
-  0x8a, 0x02, 0x8b, 0x3e, 0x18, 0x91, 0x2c, 0x3d, 0x33, 0xe5, 0x18, 0x3e,
-  0x71, 0x2f, 0x04, 0xbd, 0xcc, 0xd1, 0x23, 0x3f, 0x5b, 0x92, 0x16, 0x3e,
-  0xb4, 0xae, 0x49, 0xbd, 0xf1, 0x8d, 0x0a, 0x3e, 0x9e, 0x4b, 0x74, 0x3a,
-  0xa3, 0x6c, 0x53, 0x3d, 0xaf, 0x2e, 0x85, 0x3d, 0x57, 0x77, 0x62, 0x3d,
-  0x70, 0x7b, 0xe9, 0xbd, 0x5c, 0x0c, 0x2b, 0xbd, 0xf4, 0x6e, 0x2a, 0xbc,
-  0x57, 0xae, 0x03, 0xbe, 0x05, 0x7c, 0x6c, 0x3d, 0x02, 0x47, 0x86, 0x3c,
-  0x5c, 0xe2, 0x02, 0xbc, 0x20, 0xf5, 0x3e, 0x3e, 0xf8, 0xb1, 0x92, 0x3d,
-  0x3b, 0xa0, 0x86, 0xbc, 0x63, 0x6d, 0xa0, 0xbe, 0x35, 0x09, 0x42, 0x3e,
-  0x8a, 0xe9, 0x90, 0xbc, 0x72, 0xd0, 0xe5, 0xbd, 0x4f, 0x52, 0x77, 0x3e,
-  0x12, 0xab, 0xe1, 0xbd, 0x81, 0x2a, 0xd3, 0xbe, 0x38, 0x6b, 0x80, 0xbe,
-  0xa1, 0xfa, 0x31, 0x3d, 0xd6, 0x96, 0x8b, 0xbd, 0xa3, 0x3c, 0x4d, 0xbe,
-  0x3e, 0x26, 0x6b, 0x3c, 0x6a, 0x87, 0xd6, 0xbd, 0x88, 0x4a, 0xc1, 0xbb,
-  0xd1, 0x25, 0xc2, 0x3c, 0xbe, 0x9e, 0x89, 0x3e, 0x4c, 0x00, 0xc6, 0xbe,
-  0x80, 0x9e, 0x5b, 0xbd, 0x7f, 0x49, 0xc8, 0x3c, 0x9f, 0x06, 0x72, 0x3d,
-  0xb7, 0x0d, 0x50, 0x3e, 0x3a, 0xe6, 0x43, 0xbd, 0x9a, 0x90, 0xbe, 0xbe,
-  0xc6, 0x7a, 0x62, 0xbe, 0x97, 0x43, 0x47, 0xbd, 0x30, 0x6f, 0x86, 0xbc,
-  0x72, 0x60, 0xb2, 0xbe, 0x21, 0x82, 0x95, 0xbe, 0xa8, 0xf2, 0x2f, 0xbe,
-  0xa9, 0xb6, 0xb3, 0xbe, 0x92, 0xea, 0x9e, 0xbc, 0xd4, 0x68, 0x5f, 0x3d,
-  0x16, 0xc7, 0xc6, 0xbe, 0x5b, 0xcc, 0x67, 0xbe, 0xf1, 0x96, 0xb3, 0x3c,
-  0xbd, 0xd6, 0xad, 0xbc, 0x80, 0x17, 0x03, 0x3c, 0x07, 0x4c, 0x12, 0xbe,
-  0xb7, 0x36, 0x26, 0xbf, 0xd1, 0x8a, 0xaf, 0xbe, 0x99, 0x6e, 0xab, 0xbc,
-  0xe2, 0x91, 0x2e, 0x3d, 0xf2, 0x40, 0xa3, 0xbe, 0x42, 0x3e, 0xb7, 0xbe,
-  0x33, 0xdf, 0xe4, 0xbd, 0xaa, 0xf5, 0x7b, 0xbe, 0x84, 0x23, 0xc7, 0x3a,
-  0xe8, 0xfb, 0x64, 0x3c, 0x9b, 0xd5, 0xbc, 0x3c, 0x80, 0xed, 0x08, 0xbd,
-  0x05, 0x00, 0x80, 0x3d, 0x9d, 0x1e, 0x2d, 0xbe, 0x5c, 0x74, 0x3e, 0x3e,
-  0x0a, 0x27, 0x01, 0x3e, 0xc4, 0x8e, 0xe9, 0x3c, 0x55, 0xa2, 0x6c, 0x3d,
-  0x97, 0x63, 0xb4, 0xbd, 0x7a, 0x18, 0xa9, 0xbd, 0x6b, 0xf8, 0x71, 0xbe,
-  0xf5, 0x36, 0x21, 0xbf, 0xaf, 0x92, 0xa8, 0xbd, 0xea, 0xc5, 0x03, 0xbf,
-  0xad, 0xec, 0x0d, 0xbd, 0x2d, 0xdb, 0xad, 0xbd, 0x98, 0x78, 0x34, 0xbe,
-  0x92, 0x4f, 0xe5, 0x3e, 0x19, 0xb8, 0x59, 0xbb, 0x7a, 0x81, 0x67, 0xbe,
-  0xd4, 0x5d, 0xc4, 0xbd, 0x08, 0x38, 0xdd, 0x3c, 0x38, 0xc2, 0x05, 0x3d,
-  0x2d, 0x51, 0xbc, 0xbd, 0x7c, 0x64, 0xaf, 0xbc, 0x66, 0xf0, 0x6d, 0xbe,
-  0x9f, 0x6b, 0x4b, 0x3d, 0xb9, 0xa3, 0x36, 0xbf, 0xb5, 0x35, 0x14, 0xbd,
-  0x9b, 0x8f, 0xd5, 0xbe, 0xff, 0x9a, 0x11, 0x3d, 0x29, 0x0c, 0x97, 0xbc,
-  0x5a, 0xc6, 0xb8, 0xbe, 0x7e, 0xab, 0xa5, 0xbe, 0x29, 0x5c, 0x9c, 0x3d,
-  0xf5, 0x1d, 0xba, 0xbe, 0xbf, 0x29, 0x47, 0xbd, 0xa8, 0x68, 0xc7, 0xbd,
-  0x5d, 0xa8, 0xb8, 0xbd, 0x2c, 0x5c, 0x3f, 0xbe, 0x19, 0xe4, 0x10, 0xbe,
-  0x6a, 0xdd, 0x1e, 0xbe, 0x7f, 0xd5, 0x76, 0xbd, 0xa9, 0xfe, 0x16, 0xbf,
-  0x87, 0x10, 0xad, 0xbc, 0x26, 0x05, 0x16, 0xbe, 0x34, 0xd5, 0x77, 0xbe,
-  0x77, 0x94, 0xfb, 0x3d, 0xe4, 0x9d, 0x3b, 0x3d, 0x59, 0xa7, 0x9c, 0xbd,
-  0x94, 0x91, 0x03, 0xbb, 0xd8, 0x57, 0xef, 0xbe, 0x1b, 0x23, 0xa0, 0x3d,
-  0xb6, 0x7f, 0x69, 0xbd, 0xdb, 0x6f, 0xa0, 0xbc, 0x88, 0x2c, 0x77, 0xbd,
-  0x0e, 0xb5, 0x4b, 0xbe, 0xa3, 0x71, 0xde, 0x3c, 0xe0, 0x46, 0x20, 0xbe,
-  0xfd, 0xe2, 0x85, 0xbe, 0x1a, 0xe2, 0x8f, 0x3d, 0xd4, 0x21, 0x0b, 0xbf,
-  0x61, 0x74, 0xc6, 0xbc, 0xdb, 0x3c, 0x9c, 0x3d, 0x13, 0xbe, 0x39, 0xbe,
-  0x2b, 0x19, 0x4f, 0x3d, 0x65, 0x4b, 0x59, 0xbd, 0x0d, 0xa4, 0x47, 0xbe,
-  0x3e, 0xad, 0x8e, 0x3e, 0xce, 0x5f, 0xe8, 0xbd, 0xf4, 0x82, 0xe7, 0xbd,
-  0xec, 0x7b, 0xba, 0x3d, 0x16, 0x94, 0x9c, 0xbe, 0x45, 0x7a, 0x48, 0x3c,
-  0x17, 0xd6, 0xad, 0xbe, 0x04, 0x6b, 0x14, 0x3e, 0xe6, 0x9c, 0x0a, 0x3e,
-  0xea, 0xfe, 0x99, 0xbe, 0x4c, 0xe4, 0x3c, 0xbd, 0xfd, 0x98, 0x2e, 0x3e,
-  0x31, 0xf3, 0x5b, 0x3e, 0x6c, 0xce, 0x5f, 0x3e, 0xd4, 0x35, 0xe9, 0xbb,
-  0xb3, 0x51, 0x80, 0xbe, 0x9f, 0xde, 0x84, 0x3e, 0x5d, 0xaf, 0xd8, 0xba,
-  0xbf, 0x58, 0x32, 0x3d, 0x96, 0xb7, 0x34, 0x3d, 0x75, 0x1d, 0x7b, 0xbe,
-  0xd4, 0x3d, 0xd0, 0xbd, 0x51, 0xf9, 0xa0, 0xbd, 0x95, 0x7a, 0x31, 0x3d,
-  0xba, 0x85, 0xb5, 0x3c, 0x5e, 0xe0, 0x39, 0xbe, 0x7d, 0xbc, 0xde, 0xbd,
-  0xfc, 0x54, 0x57, 0x3d, 0x21, 0x22, 0x54, 0x3e, 0xf0, 0xab, 0x14, 0x3e,
-  0x22, 0x50, 0xfd, 0xbd, 0x80, 0x95, 0x1d, 0xbe, 0x2b, 0x71, 0xb6, 0x3e,
-  0x88, 0x87, 0x78, 0x3d, 0x75, 0xd9, 0xdc, 0x3d, 0xa8, 0x06, 0x33, 0x3d,
-  0x08, 0xe0, 0xb9, 0xbd, 0x63, 0x1e, 0xfb, 0xbd, 0x0e, 0x1f, 0x99, 0x3c,
-  0xa5, 0x91, 0xac, 0x3d, 0xd3, 0xf7, 0x65, 0x3d, 0xa4, 0x97, 0x6a, 0xbc,
-  0xda, 0x99, 0xa2, 0xbd, 0x54, 0x1b, 0x43, 0x3d, 0xc9, 0xed, 0x1d, 0x3e,
-  0x9f, 0x4e, 0x79, 0x3e, 0x06, 0x3b, 0x2e, 0x3c, 0x76, 0x00, 0x59, 0xbe,
-  0x5b, 0x29, 0x66, 0x3e, 0xd2, 0xad, 0x86, 0xbb, 0xf1, 0x9c, 0xc3, 0x3d,
-  0x39, 0x62, 0xe7, 0x3d, 0x48, 0x95, 0x14, 0x3e, 0x66, 0x7b, 0xb8, 0xbc,
-  0x9e, 0xdb, 0x27, 0x3c, 0x08, 0xf3, 0x14, 0x3e, 0xcc, 0x20, 0x32, 0xbd,
-  0x5b, 0xa5, 0x4b, 0xbc, 0xd1, 0xa4, 0x6c, 0xbd, 0x06, 0xee, 0x8d, 0x3d,
-  0xde, 0x1a, 0x02, 0x3e, 0x48, 0x84, 0x05, 0x3e, 0x9f, 0xa2, 0x22, 0x3d,
-  0xb4, 0xe8, 0x97, 0xbe, 0x9f, 0xae, 0x10, 0x3e, 0x40, 0x00, 0x44, 0x3d,
-  0x15, 0xf7, 0x21, 0x3e, 0x63, 0x06, 0x8e, 0x3c, 0x19, 0x86, 0xca, 0x3d,
-  0xbb, 0xa3, 0xa0, 0x3d, 0x66, 0x6f, 0x1b, 0x3e, 0xec, 0xb3, 0x1e, 0x3d,
-  0x3e, 0x2f, 0x5a, 0xbc, 0x3b, 0x15, 0x01, 0xbe, 0x5a, 0x6c, 0x01, 0xbe,
-  0xf2, 0xa8, 0x8b, 0xbe, 0xca, 0x27, 0x99, 0x3b, 0xf9, 0x1b, 0xf8, 0xba,
-  0xd2, 0x64, 0x34, 0x3d, 0x22, 0xee, 0x28, 0xbd, 0x8c, 0xda, 0x16, 0xbe,
-  0xca, 0xc6, 0x3e, 0xb9, 0x51, 0xc4, 0x88, 0x3e, 0xbc, 0x01, 0x08, 0x3e,
-  0x89, 0xe1, 0x82, 0x3e, 0x02, 0x9a, 0x86, 0xbe, 0x72, 0x13, 0x0f, 0x3c,
-  0xcb, 0x5d, 0x7d, 0xbc, 0xea, 0xe3, 0x89, 0xbd, 0xd7, 0xc3, 0x07, 0xbe,
-  0x1a, 0xca, 0x47, 0xbd, 0x45, 0xab, 0x62, 0xbd, 0xce, 0x31, 0x2d, 0xbd,
-  0x96, 0x59, 0xa4, 0x3d, 0xb1, 0xa1, 0x9a, 0x3b, 0x28, 0xf7, 0x8a, 0xbb,
-  0xe3, 0xfe, 0xb6, 0x3c, 0x55, 0x09, 0x39, 0xbe, 0x58, 0xd0, 0xed, 0xbd,
-  0xb7, 0x81, 0xc2, 0xbd, 0x7d, 0xa9, 0x20, 0xbe, 0x6a, 0xd7, 0x05, 0xbd,
-  0x2c, 0x2e, 0x68, 0x3d, 0x1d, 0xba, 0x88, 0xbc, 0x19, 0x87, 0x4f, 0x3c,
-  0x2b, 0xa2, 0x33, 0xbb, 0x32, 0x17, 0x00, 0xbe, 0x08, 0x41, 0xd8, 0x3c,
-  0x80, 0x2c, 0x8d, 0x3d, 0xc9, 0x58, 0xcc, 0xbd, 0x33, 0x97, 0x0c, 0xbe,
-  0xf5, 0x8b, 0x44, 0xbe, 0x45, 0xd4, 0x30, 0x3d, 0xe8, 0x83, 0xd0, 0x3c,
-  0xf2, 0x86, 0x4b, 0x3d, 0xf3, 0xdb, 0x2d, 0x3c, 0x30, 0x12, 0xcc, 0xbc,
-  0x30, 0x19, 0xaf, 0xbd, 0x17, 0xe9, 0xb9, 0x3d, 0xb7, 0x7c, 0xa8, 0x3d,
-  0x5c, 0xb3, 0x76, 0xbd, 0x6a, 0xd0, 0xe2, 0xbd, 0x91, 0xb5, 0x08, 0xbd,
-  0xe4, 0xcf, 0xee, 0xbd, 0xb0, 0x0a, 0xe5, 0xbd, 0x83, 0x2a, 0xd8, 0xbd,
-  0x6e, 0x37, 0x59, 0xbe, 0x1c, 0xdf, 0x5e, 0xbd, 0x4f, 0xf7, 0x98, 0x3d,
-  0xc3, 0x62, 0x42, 0x3a, 0x8c, 0x59, 0x06, 0xbe, 0xa0, 0x77, 0xd7, 0x3d,
-  0x8a, 0x88, 0xb9, 0xbd, 0xa2, 0x69, 0xd4, 0x3d, 0xa4, 0x20, 0x9c, 0x3b,
-  0x71, 0x43, 0xa1, 0xbd, 0x7a, 0xbf, 0x35, 0xbe, 0x97, 0xa2, 0x1f, 0x3d,
-  0x47, 0x87, 0x25, 0xbc, 0x95, 0xd7, 0xa1, 0xbc, 0x7b, 0x9b, 0xd9, 0x3d,
-  0xac, 0x3f, 0xd9, 0xbd, 0xcf, 0xe2, 0x90, 0x3d, 0x3b, 0x94, 0x3b, 0xbe,
-  0x0f, 0x98, 0x24, 0xbe, 0x9e, 0x99, 0xb9, 0xbd, 0x97, 0xcf, 0xe6, 0x3d,
-  0x22, 0x7a, 0xc6, 0x3d, 0x00, 0x6a, 0x36, 0xbe, 0x97, 0xdf, 0x90, 0xbc,
-  0x8d, 0x8b, 0xc2, 0xbd, 0xef, 0x38, 0x21, 0xbe, 0x68, 0xc1, 0xc3, 0xbc,
-  0x37, 0xc3, 0x1a, 0xbe, 0x07, 0x71, 0x85, 0xbd, 0xef, 0xae, 0x0b, 0xbe,
-  0xda, 0xcc, 0x09, 0xbd, 0xe4, 0x36, 0x0c, 0xbd, 0xa8, 0x2e, 0x09, 0xbd,
-  0xe4, 0x1a, 0x96, 0x3d, 0x7b, 0x8f, 0xf6, 0x3d, 0x69, 0xba, 0x83, 0xbd,
-  0xbc, 0xb5, 0x90, 0x3d, 0xfd, 0x5b, 0x29, 0xbd, 0xac, 0xc7, 0x3d, 0x3c,
-  0xa5, 0x08, 0x05, 0x3e, 0x55, 0x17, 0x95, 0x3d, 0x11, 0xf5, 0x08, 0xbd,
-  0x52, 0xe8, 0x1f, 0xbe, 0xfb, 0x88, 0x18, 0xbd, 0x08, 0x10, 0x97, 0xbd,
-  0x07, 0x59, 0x59, 0xbd, 0x12, 0xef, 0x0c, 0xbe, 0xb4, 0x09, 0x81, 0xbd,
-  0xa8, 0xf3, 0x8e, 0xbd, 0x74, 0xc9, 0x8d, 0xbc, 0x28, 0x2f, 0x8b, 0x3d,
-  0x24, 0xd0, 0x44, 0xbe, 0x29, 0x65, 0x37, 0xbe, 0x73, 0x3a, 0xc4, 0xbd,
-  0x6c, 0xda, 0x05, 0xbe, 0x06, 0x3a, 0x40, 0x3c, 0xc7, 0x1e, 0x6b, 0x3d,
-  0x1e, 0xc2, 0x55, 0x3d, 0xc3, 0x25, 0x36, 0xbe, 0x5f, 0x0f, 0x89, 0x3c,
-  0xb9, 0x13, 0x40, 0xbe, 0x3e, 0x1c, 0x0e, 0xbd, 0x4d, 0x50, 0xbf, 0x3d,
-  0xc2, 0x97, 0xa0, 0xbd, 0x35, 0x36, 0x37, 0xbe, 0xc5, 0x4a, 0x88, 0xbd,
-  0xe9, 0x8c, 0x35, 0xbd, 0x1c, 0xb0, 0xa1, 0xbd, 0x94, 0x23, 0xc0, 0xbd,
-  0x5c, 0xc9, 0xba, 0x3d, 0x11, 0x1e, 0x99, 0xbd, 0x9f, 0x76, 0x19, 0xbe,
-  0xe7, 0xfe, 0x45, 0xbd, 0x25, 0x75, 0x55, 0xbd, 0x99, 0xd5, 0x04, 0xbd,
-  0x4a, 0x14, 0xab, 0xbc, 0x29, 0xc6, 0x5e, 0xbd, 0xd4, 0xfe, 0x88, 0xbd,
-  0xb4, 0xc3, 0x1b, 0xbe, 0xc5, 0x14, 0xb0, 0x3d, 0xe1, 0x04, 0x35, 0xbe,
-  0x53, 0xe2, 0x49, 0xbe, 0x0b, 0xd9, 0xff, 0xb9, 0x87, 0xb6, 0x89, 0xbd,
-  0xbd, 0x80, 0xcf, 0x3c, 0xf1, 0xa8, 0x05, 0xbd, 0x34, 0xc4, 0x17, 0xbe,
-  0xc3, 0x8c, 0xd0, 0xbb, 0x12, 0x67, 0x21, 0xbd, 0x54, 0x5c, 0x0d, 0x3d,
-  0xef, 0x00, 0x40, 0xbd, 0xeb, 0x1d, 0xbd, 0xbd, 0x98, 0xd9, 0x94, 0xbc,
-  0xd2, 0x24, 0xbd, 0x3d, 0xbe, 0x85, 0xc4, 0xbd, 0x70, 0xf0, 0xb0, 0x3d,
-  0x1c, 0x55, 0xf4, 0xbd, 0xe3, 0x60, 0x07, 0xbc, 0xd5, 0xf3, 0x8f, 0xbc,
-  0x73, 0xac, 0xae, 0xbd, 0xc0, 0x55, 0x50, 0xbc, 0x32, 0xa0, 0x27, 0xbd,
-  0x6a, 0x9c, 0xa6, 0x3d, 0x7a, 0x1e, 0xe7, 0x3d, 0x09, 0x5a, 0x0a, 0xbe,
-  0xa2, 0xee, 0x91, 0xbd, 0x19, 0xd4, 0x12, 0x3a, 0xf2, 0x2b, 0x1a, 0xbd,
-  0x58, 0x36, 0x61, 0x3d, 0x88, 0x27, 0xe5, 0xbd, 0xe1, 0x6e, 0x18, 0xbd,
-  0xd3, 0xc5, 0x50, 0x3d, 0x49, 0xd7, 0xe3, 0xbd, 0x5c, 0xe9, 0x1f, 0xbe,
-  0xe5, 0x12, 0x0a, 0xbe, 0x84, 0x7a, 0x04, 0xbe, 0x3a, 0x10, 0x5d, 0x3c,
-  0x84, 0x7f, 0xc0, 0xbd, 0x9f, 0xd3, 0xa4, 0xbd, 0x26, 0xfc, 0x05, 0xbe,
-  0xba, 0xff, 0x01, 0xbe, 0x1b, 0xbf, 0x54, 0xbc, 0x1f, 0x41, 0x63, 0xbe,
-  0x97, 0x95, 0x70, 0xbd, 0x4b, 0x69, 0x0f, 0xbd, 0xe3, 0x8a, 0xc2, 0xbc,
-  0x4c, 0xa2, 0x89, 0x3d, 0xaa, 0x69, 0xc0, 0x3d, 0x75, 0x2f, 0x7e, 0x3d,
-  0x5b, 0x43, 0x61, 0xbe, 0x03, 0x02, 0xaa, 0xbc, 0x21, 0x53, 0x46, 0xbe,
-  0xd9, 0x8b, 0x33, 0xbe, 0x69, 0x45, 0x12, 0xbc, 0xa1, 0x6f, 0x06, 0x3d,
-  0x01, 0x50, 0xe5, 0xbc, 0x80, 0xe4, 0xb1, 0xbd, 0xf3, 0xd1, 0xa5, 0xbd,
-  0x0e, 0x36, 0x98, 0xbd, 0x7a, 0xc6, 0x3d, 0x3d, 0x7d, 0x67, 0x42, 0xbe,
-  0x5d, 0xb5, 0xcc, 0x3c, 0x69, 0xf6, 0x1c, 0x3d, 0x2b, 0xc6, 0x54, 0xba,
-  0xd0, 0x1d, 0xf3, 0xbd, 0xb2, 0xe2, 0x1c, 0xbc, 0x8d, 0x7f, 0x0a, 0x3d,
-  0x58, 0x09, 0x2a, 0xbe, 0xbc, 0x22, 0x70, 0xbe, 0x63, 0x6b, 0x00, 0xbe,
-  0x8f, 0xb0, 0x2a, 0xbd, 0xab, 0x41, 0x6a, 0xbc, 0x74, 0x4c, 0xfe, 0xbd,
-  0x26, 0xd0, 0x18, 0xbc, 0x5f, 0xa3, 0x1c, 0xbd, 0x24, 0x92, 0xfe, 0xbd,
-  0x6f, 0x03, 0x12, 0xbe, 0xcc, 0x07, 0x6d, 0x3a, 0x38, 0xf9, 0xe7, 0x3c,
-  0xab, 0x6a, 0xbe, 0xbd, 0x10, 0x01, 0x41, 0xbd, 0xfc, 0x0d, 0x14, 0xbe,
-  0x20, 0x6f, 0x2f, 0xbe, 0x54, 0x18, 0x42, 0xbe, 0xab, 0x0c, 0xc9, 0x3b,
-  0x90, 0x79, 0x8f, 0xbd, 0x8c, 0x1c, 0x60, 0x3c, 0x9e, 0x23, 0x39, 0x3d,
-  0x7c, 0x18, 0x0f, 0xbe, 0xf4, 0x30, 0xd4, 0xbd, 0x4a, 0xe3, 0x2f, 0x3d,
-  0x5d, 0x1c, 0xd9, 0x3c, 0x99, 0x4b, 0xa2, 0xbd, 0x52, 0x5f, 0x2b, 0xbe,
-  0x8c, 0x1d, 0x75, 0x3d, 0xfb, 0x43, 0xff, 0xbc, 0x05, 0x2f, 0x89, 0xbd,
-  0xbe, 0x2e, 0xd1, 0xbd, 0x67, 0x7c, 0x1b, 0xbe, 0x7f, 0xa6, 0xc6, 0xbd,
-  0xc8, 0x57, 0x49, 0xbc, 0x9f, 0x9a, 0xc0, 0xbd, 0xba, 0x26, 0x25, 0xbd,
-  0xa2, 0x48, 0xd8, 0xbd, 0xfd, 0xa8, 0x12, 0x3b, 0x27, 0x03, 0x06, 0xbd,
-  0x99, 0xcb, 0x94, 0xbd, 0xd1, 0x34, 0x09, 0xbe, 0x6c, 0x75, 0x6f, 0xbc,
-  0xe7, 0xa5, 0x2c, 0xbe, 0x6e, 0x39, 0xd1, 0xbc, 0xbd, 0x54, 0xe3, 0xbb,
-  0x38, 0x4f, 0xf5, 0x3c, 0x8f, 0x18, 0xd4, 0x3c, 0x2b, 0x8c, 0xd3, 0x3d,
-  0x23, 0x06, 0x0e, 0x3d, 0x45, 0x3d, 0x52, 0xbc, 0x06, 0x2e, 0x0a, 0xbc,
-  0xb8, 0x33, 0x97, 0x3c, 0xaa, 0x6e, 0x06, 0xbe, 0xa0, 0x52, 0x39, 0x3e,
-  0x2d, 0xea, 0xc7, 0x3c, 0x35, 0xa4, 0x6b, 0x3e, 0xa9, 0xec, 0xb4, 0x3c,
-  0x3f, 0xec, 0xd9, 0x3c, 0xf2, 0x08, 0x89, 0x3d, 0xe9, 0x93, 0xb2, 0xbd,
-  0xdb, 0xa2, 0xf2, 0xba, 0x80, 0xb6, 0x1d, 0xbd, 0x50, 0x87, 0xbf, 0xbd,
-  0x4a, 0x4f, 0x88, 0x3e, 0x33, 0x4d, 0xa4, 0x3d, 0xe8, 0x46, 0x2f, 0xbe,
-  0xd8, 0xde, 0x0f, 0x3e, 0x5b, 0xc0, 0x39, 0xbc, 0xd3, 0x54, 0xf7, 0xbc,
-  0xaf, 0x0e, 0x13, 0x3e, 0x89, 0x58, 0x54, 0xbe, 0x3e, 0x50, 0x3e, 0x3c,
-  0xa5, 0x50, 0x2f, 0xbe, 0x99, 0xa4, 0xe7, 0xbb, 0xbf, 0x57, 0xf2, 0xbc,
-  0x48, 0xe2, 0x23, 0xbe, 0xda, 0x15, 0xfa, 0xbd, 0xac, 0xaa, 0x24, 0xbd,
-  0x4d, 0x66, 0xf9, 0xbd, 0x31, 0x3e, 0x3e, 0x3e, 0xaa, 0x51, 0x3d, 0x3d,
-  0x3d, 0x44, 0x68, 0xbe, 0x59, 0x7c, 0x93, 0xbe, 0xef, 0x7d, 0x79, 0x3c,
-  0xcd, 0xe8, 0xae, 0x3c, 0xde, 0x1d, 0x31, 0x3e, 0xa8, 0xc9, 0x99, 0xbe,
-  0xca, 0xba, 0xa3, 0xbd, 0x08, 0x18, 0x41, 0x3a, 0xf4, 0xdf, 0x9e, 0x3d,
-  0x58, 0x94, 0xc4, 0xbc, 0x5e, 0x08, 0x50, 0xbe, 0x3f, 0x39, 0xf4, 0xbd,
-  0x10, 0x83, 0x06, 0xbc, 0xae, 0x56, 0x77, 0xbe, 0x53, 0x59, 0x96, 0x3c,
-  0x62, 0xcd, 0xbd, 0x3c, 0xd1, 0x5b, 0x92, 0xbe, 0x09, 0x3b, 0xa2, 0xbe,
-  0x9b, 0x6a, 0x82, 0xbc, 0x53, 0x14, 0x40, 0xbd, 0x19, 0x68, 0xea, 0x3c,
-  0x30, 0xe0, 0xac, 0xbe, 0xf4, 0x0d, 0x49, 0xbd, 0x5a, 0x9b, 0x1f, 0xbd,
-  0x0b, 0x2f, 0x8f, 0xbd, 0x04, 0x54, 0x05, 0x3e, 0xdf, 0xcd, 0xab, 0xbe,
-  0x4f, 0xbd, 0x62, 0xbe, 0x9b, 0x58, 0xc1, 0xbb, 0xf4, 0xba, 0x01, 0xbe,
-  0x39, 0x4c, 0x05, 0xbe, 0xd3, 0xf4, 0x80, 0x3d, 0xec, 0x05, 0x39, 0xbf,
-  0x7b, 0x41, 0x25, 0xbe, 0x57, 0xd3, 0xd2, 0xbd, 0xb8, 0x7a, 0xb4, 0xbd,
-  0x93, 0x9a, 0xf4, 0x3d, 0x99, 0x32, 0xee, 0x3b, 0x26, 0x3c, 0x03, 0xbd,
-  0x57, 0x1b, 0xb4, 0xbe, 0x4f, 0xfe, 0x21, 0xbe, 0x6f, 0x1f, 0xb2, 0x3c,
-  0x5d, 0xb3, 0x5a, 0xbe, 0x99, 0x4e, 0x91, 0xbe, 0x90, 0x78, 0x83, 0xbc,
-  0x3e, 0xa0, 0x39, 0xbe, 0x66, 0x25, 0x5a, 0x3e, 0x29, 0x09, 0x04, 0xbd,
-  0xd7, 0x02, 0xbf, 0xbe, 0x83, 0x06, 0x10, 0xbe, 0xdf, 0xa1, 0x05, 0xbe,
-  0x56, 0x60, 0x0b, 0xbd, 0xd6, 0xea, 0x8d, 0x3d, 0x3d, 0xcd, 0xac, 0xbe,
-  0x05, 0xc3, 0xdf, 0x3c, 0x04, 0x87, 0xa8, 0xbe, 0x1f, 0xe7, 0x28, 0xbd,
-  0xd1, 0xe5, 0xa9, 0xbc, 0xff, 0x7a, 0x1e, 0xbe, 0x4a, 0x57, 0xda, 0xbe,
-  0x68, 0x43, 0xa2, 0x38, 0xbf, 0x6f, 0xc9, 0xbe, 0x5f, 0xe1, 0x07, 0xbd,
-  0xa3, 0xdf, 0x2d, 0xbe, 0xad, 0x95, 0xdd, 0xbe, 0x28, 0xb9, 0x88, 0xbe,
-  0x29, 0xb2, 0xca, 0xbd, 0x36, 0xbe, 0x55, 0x3c, 0x2c, 0x51, 0xec, 0x3a,
-  0x0c, 0x61, 0x08, 0xbf, 0x2f, 0x85, 0xe1, 0xbd, 0x47, 0x9e, 0xc3, 0xbe,
-  0x9b, 0x8f, 0x14, 0xbe, 0x7f, 0xea, 0x9d, 0xbe, 0xfd, 0xdd, 0x56, 0xbe,
-  0x31, 0xc6, 0xc9, 0xbe, 0xf0, 0xb6, 0x29, 0xbc, 0xdc, 0xa8, 0x0a, 0xbf,
-  0x32, 0x7d, 0xbc, 0x3d, 0x63, 0xe6, 0xb9, 0xbd, 0x0c, 0x53, 0x88, 0xbe,
-  0x48, 0x6b, 0xb9, 0xbe, 0xa7, 0x7c, 0xa7, 0xbc, 0x06, 0x6e, 0x1e, 0x3e,
-  0x83, 0x3e, 0x82, 0x3d, 0xbd, 0x5d, 0xf0, 0xbe, 0xf6, 0xfe, 0x82, 0xbe,
-  0x7d, 0xa9, 0x4f, 0xbe, 0xb9, 0x06, 0x97, 0xbe, 0x40, 0xb4, 0xc4, 0xbe,
-  0x70, 0xb4, 0x82, 0xbc, 0xe2, 0x0f, 0x6e, 0xbe, 0xa5, 0xf3, 0x0c, 0x3d,
-  0x71, 0x95, 0xa4, 0xbe, 0xee, 0x3e, 0xa8, 0xbe, 0x31, 0xee, 0x35, 0xbd,
-  0x58, 0x84, 0xfd, 0xbd, 0x42, 0x69, 0xc5, 0xbd, 0x50, 0x25, 0x8c, 0x3d,
-  0x16, 0x21, 0x2f, 0xbd, 0xa0, 0x26, 0x84, 0x3d, 0xfd, 0xd8, 0xb9, 0xbe,
-  0xf0, 0x13, 0x95, 0xbe, 0x14, 0x5b, 0x4d, 0xbe, 0x7a, 0x7f, 0x98, 0xbe,
-  0x25, 0x1a, 0xd3, 0xbe, 0x94, 0xc8, 0x64, 0x3e, 0x1e, 0x87, 0xe0, 0x3e,
-  0xcf, 0x06, 0xe4, 0x3c, 0xa2, 0xba, 0x48, 0x3e, 0xd1, 0xaa, 0x4f, 0xbf,
-  0x13, 0xf7, 0x0c, 0xbd, 0x6e, 0x86, 0x17, 0xbd, 0xcb, 0x9e, 0x68, 0x3e,
-  0xb7, 0x33, 0x0e, 0x3e, 0xc0, 0xc6, 0x69, 0xbe, 0x0c, 0x65, 0x92, 0x3d,
-  0x8b, 0x10, 0x4d, 0x3d, 0x35, 0x32, 0xa4, 0xbd, 0x54, 0xee, 0x1d, 0x3e,
-  0x75, 0x9e, 0x49, 0xbe, 0x68, 0x0f, 0xfe, 0xbd, 0x55, 0xbf, 0x1a, 0x3e,
-  0x03, 0xb3, 0x91, 0x3e, 0xdb, 0x3e, 0xbf, 0x3c, 0xb2, 0xa4, 0x3c, 0x3d,
-  0x8a, 0x24, 0x33, 0xbd, 0xdd, 0x8b, 0xe5, 0x3c, 0x84, 0x5a, 0x2e, 0x3c,
-  0x16, 0x89, 0x1a, 0x3e, 0x10, 0xdc, 0x9e, 0x3d, 0x66, 0xf8, 0xf5, 0xbe,
-  0x43, 0xb1, 0x54, 0x3e, 0x54, 0x91, 0xe8, 0x3d, 0x77, 0x2b, 0xf8, 0x3b,
-  0x0d, 0x63, 0xc4, 0x3c, 0xdf, 0xd4, 0xad, 0xbd, 0xba, 0xea, 0xac, 0xbd,
-  0x27, 0xb0, 0x47, 0xbd, 0x42, 0x6d, 0x0c, 0xbe, 0xa9, 0x41, 0x35, 0x3e,
-  0xf0, 0xd6, 0xc7, 0xbc, 0xe0, 0x72, 0x8e, 0x3d, 0x82, 0xae, 0xb3, 0x3d,
-  0x74, 0xc7, 0x73, 0x3d, 0x0e, 0x68, 0x89, 0x3d, 0x94, 0x54, 0x57, 0x3d,
-  0x42, 0xc7, 0x6e, 0xbe, 0x00, 0x64, 0x18, 0x3e, 0x4c, 0xfd, 0x61, 0x3c,
-  0x40, 0x0c, 0xe3, 0x3c, 0x89, 0x73, 0x51, 0xbd, 0x93, 0x1d, 0xba, 0xbb,
-  0xff, 0xa0, 0xde, 0xbd, 0x05, 0x30, 0x53, 0xbd, 0xcc, 0x54, 0x62, 0xbd,
-  0xa1, 0x56, 0x1b, 0xbe, 0x58, 0x30, 0x99, 0x3d, 0xf0, 0x2e, 0x2f, 0x3d,
-  0xba, 0xca, 0x4b, 0xbd, 0x2d, 0xad, 0xa3, 0x3d, 0x55, 0xd0, 0x66, 0x3d,
-  0x18, 0x06, 0x14, 0xbd, 0x99, 0x79, 0xdd, 0xbd, 0x3e, 0x3f, 0x13, 0x3e,
-  0x15, 0x5a, 0x1f, 0x3d, 0x4b, 0x71, 0xf9, 0x3d, 0x2e, 0xe2, 0x10, 0x3d,
-  0xd2, 0x3b, 0x61, 0x3e, 0x73, 0x71, 0x5c, 0x3e, 0xbe, 0xf0, 0xb5, 0x3d,
-  0x37, 0x15, 0x91, 0xbe, 0x46, 0x10, 0xa2, 0xbf, 0xdf, 0x3d, 0x84, 0xbd,
-  0xbf, 0x14, 0x1a, 0x3e, 0x89, 0x2f, 0xb0, 0xbd, 0xab, 0x5c, 0x10, 0xbe,
-  0x27, 0x16, 0xab, 0xbd, 0x74, 0x92, 0x54, 0x3c, 0xb2, 0x2c, 0x0c, 0x3f,
-  0x53, 0xb6, 0xd2, 0xbe, 0xeb, 0xa4, 0xc5, 0xbc, 0x68, 0xba, 0x64, 0x3e,
-  0xae, 0xf3, 0xc3, 0x3c, 0x55, 0x18, 0x00, 0x3e, 0x97, 0x9f, 0x81, 0x3c,
-  0x14, 0x50, 0xa0, 0xbd, 0x5e, 0x56, 0x70, 0xbe, 0x11, 0x21, 0x8e, 0x3d,
-  0x22, 0x03, 0xa0, 0x3d, 0x35, 0x7a, 0x4c, 0x3e, 0x2d, 0xfe, 0x31, 0x3e,
-  0xed, 0x55, 0x7b, 0xbe, 0x29, 0x99, 0x83, 0xbc, 0x3d, 0x90, 0x14, 0xbd,
-  0x12, 0x63, 0xbd, 0xbd, 0x17, 0x0c, 0xb0, 0xbd, 0x20, 0x39, 0x13, 0x3d,
-  0x6f, 0xbd, 0x04, 0x3d, 0x92, 0x58, 0x0f, 0xbe, 0xdf, 0xa2, 0xcb, 0xbc,
-  0x31, 0xf9, 0x03, 0xbd, 0x53, 0x16, 0x13, 0xbf, 0x43, 0xf4, 0xa8, 0xbe,
-  0xc0, 0x7c, 0x6d, 0x3d, 0x48, 0xc1, 0x83, 0xbe, 0x54, 0xd0, 0xcd, 0x3c,
-  0xf2, 0x6b, 0x79, 0xbb, 0x94, 0x21, 0x3c, 0x3e, 0xe3, 0x7c, 0xdc, 0x3d,
-  0x30, 0xd9, 0x8a, 0xbc, 0x99, 0x00, 0x63, 0x3d, 0xef, 0xab, 0x5d, 0xbe,
-  0x50, 0x7b, 0x09, 0xbe, 0xb5, 0x1b, 0x4b, 0x3d, 0xd7, 0xba, 0x86, 0xbe,
-  0xf3, 0xe5, 0x5f, 0xbc, 0xe5, 0x75, 0x81, 0xbc, 0x0f, 0x70, 0x83, 0x3e,
-  0x30, 0xf5, 0x41, 0xbe, 0xf9, 0x35, 0x81, 0x3d, 0xaf, 0x07, 0x82, 0xbe,
-  0xdc, 0xd6, 0x93, 0x3d, 0x76, 0xdf, 0xbf, 0x3c, 0x4c, 0x4d, 0x1d, 0x3e,
-  0x94, 0x7c, 0x6f, 0x3e, 0xa7, 0x3a, 0xde, 0x3c, 0x39, 0xaf, 0x26, 0xbe,
-  0xcc, 0x77, 0x0f, 0x3e, 0x13, 0xdb, 0xf9, 0xbd, 0x09, 0x17, 0xb1, 0x3d,
-  0x80, 0x68, 0xc7, 0xbc, 0xec, 0x9d, 0xc7, 0xbd, 0x60, 0xc6, 0xf1, 0x3d,
-  0x38, 0x4e, 0xba, 0x3d, 0x31, 0x3a, 0x76, 0x3e, 0xa3, 0x8a, 0x41, 0x3d,
-  0xfe, 0x0a, 0x6b, 0xbe, 0x42, 0x2e, 0x69, 0x3e, 0x66, 0x24, 0x01, 0x3e,
-  0x42, 0x97, 0x05, 0xbe, 0x76, 0x45, 0xa5, 0xbd, 0x0d, 0xa3, 0x20, 0xbe,
-  0x48, 0xbb, 0x8a, 0xbe, 0xec, 0x1c, 0x25, 0xbb, 0x36, 0x80, 0x7c, 0x3d,
-  0x5a, 0x73, 0x16, 0x3d, 0xc6, 0x28, 0x02, 0x3d, 0xb8, 0xe4, 0x40, 0xbe,
-  0x35, 0x1b, 0x97, 0xbc, 0x4a, 0xb3, 0x7d, 0x3d, 0xd1, 0x9d, 0xcb, 0xba,
-  0x6e, 0xf8, 0x60, 0xbd, 0x55, 0x8e, 0x2f, 0xbe, 0x87, 0xc2, 0x6e, 0x3e,
-  0x8b, 0xc5, 0xac, 0x3c, 0x13, 0x8d, 0x8b, 0xbb, 0x7b, 0x4c, 0xf0, 0xbc,
-  0xd9, 0x89, 0x33, 0xbe, 0x04, 0xd9, 0x5a, 0x3d, 0xad, 0x5a, 0x46, 0x3c,
-  0x97, 0xe6, 0x08, 0x3e, 0xd7, 0x70, 0x90, 0x3c, 0x96, 0x8d, 0x37, 0xbe,
-  0xe1, 0x25, 0x3f, 0xbd, 0x49, 0x98, 0x95, 0x3d, 0x1c, 0x55, 0x6d, 0x3d,
-  0xdd, 0x08, 0x9c, 0x3e, 0x2b, 0x2d, 0xdc, 0x3d, 0x61, 0x8f, 0x8e, 0xbe,
-  0x80, 0x66, 0x62, 0x3e, 0xa3, 0x09, 0xdc, 0x3d, 0xd9, 0x94, 0x07, 0x3e,
-  0xa0, 0x7a, 0x32, 0xbd, 0x42, 0x14, 0x0a, 0xbe, 0xb3, 0xfa, 0x35, 0x3d,
-  0xa2, 0x26, 0x1d, 0x3d, 0x35, 0xb7, 0x87, 0x3c, 0xc7, 0x87, 0xcd, 0x3d,
-  0x5a, 0x7f, 0xd4, 0xbd, 0x63, 0x29, 0xfa, 0xbd, 0xfe, 0x64, 0xb1, 0x3d,
-  0x2d, 0xfa, 0x31, 0x3e, 0xe3, 0x21, 0x9a, 0x3c, 0xbf, 0xd3, 0x5b, 0xbc,
-  0xc5, 0xbe, 0x97, 0xbe, 0x9a, 0xe8, 0x72, 0x3e, 0xe4, 0x5a, 0xb8, 0xba,
-  0x7f, 0x3c, 0xd3, 0xbb, 0xa2, 0x1e, 0x8a, 0x3c, 0xf1, 0xfd, 0x44, 0xbe,
-  0x75, 0x23, 0x0b, 0x3e, 0x3b, 0xda, 0x06, 0xbd, 0x92, 0x2e, 0x84, 0x3d,
-  0x2b, 0xa7, 0xd7, 0x3c, 0xb7, 0x45, 0x35, 0xbd, 0x47, 0x60, 0xd0, 0xbc,
-  0xee, 0x6f, 0xc2, 0xbc, 0xd1, 0x2b, 0x36, 0x3e, 0x5e, 0x34, 0xbc, 0x3d,
-  0x97, 0xa5, 0x65, 0x3d, 0xc9, 0x9d, 0x88, 0xbe, 0xad, 0x3a, 0x97, 0x3e,
-  0x59, 0x30, 0x04, 0x3d, 0x89, 0x47, 0x9a, 0x3d, 0x56, 0xde, 0x5c, 0xbd,
-  0x50, 0x0d, 0xdd, 0xbd, 0xf9, 0x43, 0xf1, 0xbd, 0xfb, 0xb6, 0xa7, 0xbd,
-  0xcd, 0x08, 0xbd, 0x3d, 0x93, 0x12, 0x06, 0xbd, 0x1b, 0x59, 0xc3, 0xbd,
-  0x8d, 0x84, 0xb1, 0xbd, 0xf2, 0x75, 0x3b, 0xbc, 0x9a, 0xe4, 0xeb, 0x3d,
-  0x4d, 0xe4, 0x2f, 0x3e, 0xee, 0x7c, 0xc2, 0x3d, 0x15, 0xb6, 0xe1, 0xbe,
-  0x04, 0x9a, 0x84, 0x3e, 0xc7, 0xd5, 0xd3, 0x3d, 0x11, 0xef, 0xca, 0x3d,
-  0x03, 0xa5, 0x36, 0x3c, 0xa0, 0x78, 0x9c, 0xbd, 0x06, 0x60, 0xaa, 0xbc,
-  0x72, 0xee, 0x5d, 0xbe, 0x50, 0xd1, 0x32, 0x3d, 0x58, 0x35, 0x23, 0xbc,
-  0x20, 0xc3, 0xba, 0xbd, 0x23, 0x78, 0xed, 0xbd, 0xa1, 0xad, 0xc3, 0x3c,
-  0xbf, 0xc2, 0x09, 0x3e, 0x55, 0x5f, 0x4e, 0x3e, 0xbe, 0x15, 0x1f, 0x3c,
-  0x31, 0x20, 0xbc, 0xbe, 0xe1, 0x69, 0xce, 0x3e, 0xc1, 0x0b, 0x9d, 0x3d,
-  0x61, 0xa0, 0xea, 0x3d, 0x37, 0x8f, 0x23, 0xbd, 0xd6, 0xfd, 0xec, 0xbd,
-  0xd5, 0x39, 0xb9, 0xbd, 0x67, 0x21, 0x55, 0xbd, 0x02, 0x3b, 0x7d, 0x3c,
-  0xbf, 0x75, 0x23, 0xbd, 0x94, 0xa6, 0xde, 0xbd, 0x05, 0x4d, 0x27, 0xbe,
-  0x3b, 0xf9, 0x92, 0x3d, 0x70, 0x6e, 0x4f, 0x3e, 0xd3, 0x87, 0x62, 0x3e,
-  0x4b, 0x0c, 0x15, 0x3d, 0x93, 0x56, 0x8a, 0xbe, 0xea, 0xa6, 0x8a, 0x3e,
-  0xde, 0x06, 0xaf, 0xbd, 0xdf, 0x0b, 0xb7, 0x3d, 0x4e, 0x4a, 0x0d, 0xbd,
-  0xd1, 0x4f, 0xbe, 0xbc, 0xb6, 0x01, 0x0e, 0xbe, 0x2d, 0x74, 0xf6, 0x3d,
-  0xb0, 0xb8, 0xd9, 0x3d, 0xcf, 0xc4, 0x55, 0xbc, 0x1d, 0x55, 0xd6, 0xbd,
-  0xfa, 0x1d, 0x12, 0xbe, 0xa9, 0x68, 0xde, 0x3d, 0xf6, 0x81, 0xa0, 0x3e,
-  0x24, 0x3a, 0x8c, 0x3d, 0xbd, 0xa6, 0x84, 0x3d, 0x19, 0xaa, 0xe1, 0xbe,
-  0x38, 0xde, 0x9e, 0x3c, 0x18, 0x62, 0xab, 0xbd, 0x8d, 0x5e, 0xcd, 0x3d,
-  0x63, 0x0f, 0xc6, 0x3d, 0xcc, 0x70, 0x97, 0x3c, 0xbf, 0xc8, 0xf2, 0xbd,
-  0x23, 0x59, 0x8d, 0xbd, 0x85, 0x7c, 0x25, 0x3e, 0xf8, 0x02, 0x30, 0xbd,
-  0x45, 0xc0, 0x0f, 0xbe, 0x0e, 0xff, 0xfc, 0xbd, 0x6a, 0x25, 0x90, 0x3d,
-  0xef, 0x19, 0x74, 0x3e, 0x89, 0x24, 0xc8, 0x3d, 0xa6, 0x66, 0x56, 0x3d,
-  0xac, 0x62, 0xc8, 0xbd, 0x76, 0xfe, 0xff, 0xbd, 0xb4, 0x51, 0xd6, 0xbd,
-  0xbd, 0xad, 0xf7, 0x3c, 0xcc, 0x95, 0xa1, 0x3d, 0xc3, 0xf8, 0xc9, 0x3d,
-  0x59, 0x47, 0x25, 0xbf, 0xc4, 0xf0, 0xa5, 0xbd, 0x4e, 0x30, 0xaf, 0x3d,
-  0x7d, 0x81, 0x3c, 0xbe, 0x9e, 0x67, 0xb5, 0xbc, 0x4e, 0xc0, 0x6c, 0xbe,
-  0x9f, 0xcf, 0xe5, 0xbd, 0x4c, 0x11, 0x37, 0xbe, 0xd5, 0x39, 0x16, 0xbe,
-  0xc5, 0xd9, 0x0e, 0x3e, 0x6c, 0x92, 0x03, 0x3c, 0xab, 0x34, 0xa3, 0xbd,
-  0x50, 0xe0, 0x7d, 0xbe, 0x3c, 0x60, 0xf6, 0x3d, 0xc9, 0x51, 0xa8, 0xbe,
-  0x30, 0x4f, 0x5a, 0x3e, 0x37, 0x97, 0x76, 0xbf, 0x7d, 0x59, 0x18, 0x3c,
-  0xc1, 0xef, 0x3b, 0xbc, 0xd4, 0xe6, 0x8e, 0xbe, 0x89, 0xa1, 0x47, 0xbe,
-  0x69, 0x10, 0xc7, 0xbe, 0x44, 0x38, 0xdd, 0xbe, 0x30, 0x7d, 0x5f, 0x3e,
-  0xa4, 0xe6, 0x04, 0x3f, 0x40, 0xc8, 0x8b, 0x3d, 0xe2, 0x2b, 0xbb, 0x3e,
-  0x0f, 0x74, 0xbd, 0x3d, 0x8b, 0x41, 0x8a, 0xbc, 0xc5, 0x27, 0x96, 0x3e,
-  0x5d, 0x6c, 0x19, 0x3e, 0xb4, 0xb1, 0xea, 0x3d, 0x6b, 0x50, 0xe2, 0xbd,
-  0xe8, 0xa0, 0xe1, 0x3e, 0x71, 0x0c, 0x20, 0x3d, 0xb3, 0x71, 0x56, 0x3e,
-  0x59, 0x48, 0x24, 0x3f, 0xb2, 0xfa, 0x6e, 0x3d, 0xa7, 0x11, 0xe2, 0x3d,
-  0x2b, 0xbc, 0x71, 0x3e, 0x2a, 0x4f, 0x39, 0x3f, 0xd0, 0xbf, 0x11, 0x3d,
-  0xfb, 0x9b, 0x39, 0x3d, 0x94, 0x08, 0xab, 0x3e, 0xc6, 0x55, 0x23, 0x3e,
-  0x10, 0xb4, 0x0c, 0xbe, 0x0b, 0x04, 0xd3, 0x3c, 0x6c, 0xb4, 0xc5, 0x3d,
-  0x75, 0xfc, 0x80, 0xbe, 0xeb, 0x7c, 0x54, 0x3e, 0x52, 0x0d, 0x24, 0x3f,
-  0x81, 0xd0, 0xda, 0x3d, 0x5c, 0xd6, 0x1f, 0x3e, 0xb8, 0x60, 0xf6, 0x3d,
-  0x56, 0x7a, 0x01, 0x3c, 0xaa, 0x9c, 0x00, 0xbe, 0x36, 0x21, 0x38, 0xbe,
-  0xe2, 0x0c, 0x12, 0xbe, 0xe4, 0xcf, 0x6a, 0x3e, 0x1f, 0x3f, 0x95, 0x3d,
-  0x36, 0xd1, 0x54, 0x3e, 0xa8, 0xea, 0x2a, 0xbe, 0x21, 0x4f, 0x00, 0xbd,
-  0xeb, 0xaf, 0x10, 0x3d, 0x34, 0xba, 0x53, 0xbe, 0xb1, 0xc6, 0x93, 0xbd,
-  0x41, 0x20, 0x08, 0xbd, 0xd9, 0xe9, 0x81, 0x3d, 0xbd, 0x24, 0x93, 0xbe,
-  0x4e, 0x30, 0xc7, 0x3d, 0xc5, 0x27, 0x44, 0xbd, 0xc1, 0xdc, 0x25, 0xbd,
-  0xb4, 0x22, 0xb6, 0xbe, 0xb7, 0x4a, 0x43, 0xbd, 0x05, 0x6b, 0x46, 0x3d,
-  0xb9, 0xa4, 0xcd, 0xbd, 0x6c, 0xdc, 0xa8, 0x3e, 0x61, 0x8e, 0xd6, 0xbd,
-  0x9e, 0x80, 0x00, 0xbd, 0xa9, 0x33, 0x9f, 0xbd, 0x27, 0x49, 0x08, 0xbd,
-  0x8e, 0xb7, 0xa7, 0xbc, 0xda, 0x62, 0xec, 0x3c, 0x5f, 0x7e, 0x06, 0x3d,
-  0xee, 0xeb, 0xe8, 0xbd, 0x53, 0xbd, 0xef, 0x3d, 0x1e, 0x22, 0x15, 0xbe,
-  0x23, 0x6f, 0x5f, 0xbe, 0xb1, 0x3c, 0x84, 0xbe, 0xec, 0x84, 0xbc, 0xbc,
-  0x0a, 0xce, 0x08, 0x3c, 0x36, 0xff, 0x7b, 0xbd, 0xb1, 0xd0, 0x52, 0x3e,
-  0x5a, 0xfe, 0x35, 0x3d, 0x1c, 0xd2, 0xd4, 0xbd, 0xc7, 0x53, 0x84, 0x3d,
-  0x00, 0xb9, 0xa8, 0x3d, 0x3e, 0xd7, 0x96, 0x3d, 0xb4, 0x7f, 0x72, 0xbd,
-  0x14, 0x36, 0x1a, 0x3d, 0x68, 0xa3, 0x95, 0xbe, 0xf9, 0xa2, 0xa9, 0x3d,
-  0x08, 0xc6, 0xf4, 0xbd, 0x9a, 0xbd, 0x43, 0xbd, 0x6b, 0x8c, 0xe7, 0xbe,
-  0x37, 0x84, 0x22, 0xbd, 0x06, 0x91, 0x48, 0xbd, 0x1a, 0xee, 0x75, 0x3e,
-  0x14, 0xe4, 0xa1, 0x3d, 0xba, 0x02, 0x5c, 0xbd, 0x56, 0x29, 0xbe, 0xbc,
-  0x45, 0x6b, 0x89, 0x3d, 0x37, 0xe9, 0x42, 0x3d, 0x3a, 0xdc, 0x2c, 0x3d,
-  0x45, 0x30, 0x4a, 0xb9, 0xaa, 0xf3, 0x11, 0x3d, 0xe1, 0xad, 0x4e, 0xbe,
-  0xc7, 0x41, 0xde, 0x3d, 0x5b, 0x07, 0x9c, 0xbe, 0x1c, 0x04, 0xd1, 0xbd,
-  0xe6, 0x2d, 0xc6, 0xbc, 0x01, 0x35, 0x67, 0xbd, 0x42, 0xc5, 0xf5, 0x3c,
-  0xbf, 0x95, 0x85, 0x3d, 0x7e, 0xf7, 0x00, 0x3e, 0xbe, 0x33, 0x89, 0xbd,
-  0xeb, 0x9f, 0x41, 0xbd, 0x31, 0x36, 0x22, 0xbc, 0xa4, 0x37, 0x69, 0xbc,
-  0xaa, 0xfa, 0x15, 0xbe, 0x1a, 0x91, 0x59, 0x3d, 0xf4, 0xf0, 0x59, 0xbd,
-  0xd7, 0xda, 0x49, 0xbe, 0x8a, 0x21, 0xd5, 0x3d, 0x7e, 0x56, 0x7d, 0xbd,
-  0x30, 0x2d, 0x01, 0xbe, 0x71, 0x1a, 0x2e, 0xbe, 0x53, 0xd8, 0xb7, 0xbd,
-  0x64, 0x41, 0xcf, 0x3d, 0xa7, 0x59, 0x00, 0xbe, 0x2c, 0xb1, 0x09, 0x3e,
-  0xdb, 0xd1, 0x02, 0xbe, 0x8a, 0xd5, 0xbb, 0xbd, 0xb0, 0xde, 0xb0, 0x3d,
-  0x90, 0x25, 0x22, 0x3c, 0xbd, 0xc3, 0x84, 0xbd, 0x69, 0x9b, 0xbe, 0x3d,
-  0x04, 0x9b, 0x92, 0xbb, 0x14, 0xad, 0x0f, 0xbe, 0x7f, 0x14, 0xa2, 0x3c,
-  0x6f, 0xc6, 0xbf, 0xbd, 0xa9, 0xfa, 0xa2, 0xbe, 0x93, 0xae, 0x09, 0xbe,
-  0x91, 0x2d, 0x0f, 0xbe, 0x9c, 0x2b, 0xf1, 0x3d, 0xc1, 0x6a, 0x06, 0xbe,
-  0x05, 0xf1, 0x48, 0x3d, 0x89, 0x60, 0xe0, 0xbd, 0xe1, 0xf7, 0x0a, 0xbe,
-  0x86, 0xf4, 0x42, 0x3d, 0x55, 0xb4, 0xa7, 0x3d, 0xbc, 0xa3, 0x8f, 0x3d,
-  0xec, 0x59, 0xae, 0x3d, 0x6a, 0x78, 0x95, 0x3d, 0x57, 0x04, 0x78, 0xbe,
-  0x85, 0x67, 0x57, 0x3d, 0x41, 0x8e, 0x9f, 0xbd, 0xa8, 0x5b, 0x38, 0xbe,
-  0xb5, 0x5b, 0x99, 0xbe, 0x68, 0xda, 0x7c, 0xbe, 0xa3, 0x89, 0x26, 0x3e,
-  0x6f, 0x72, 0x41, 0xbe, 0x4c, 0xee, 0x1b, 0xbb, 0x41, 0xbf, 0x65, 0xbe,
-  0xe0, 0x10, 0xf5, 0xbd, 0x92, 0xa3, 0xd1, 0xbc, 0xf9, 0x87, 0xfe, 0x3d,
-  0x66, 0xc3, 0x6b, 0xbd, 0xe5, 0x15, 0xa7, 0x3d, 0xc2, 0x28, 0x0d, 0x3d,
-  0x5f, 0x68, 0x88, 0xbe, 0x55, 0x4d, 0x55, 0x3d, 0x43, 0x4a, 0xbb, 0xbd,
-  0x7c, 0x92, 0x81, 0xbe, 0x0d, 0x1e, 0x49, 0xbe, 0x36, 0x38, 0x49, 0xbe,
-  0x32, 0xe8, 0x8c, 0x3d, 0x11, 0x50, 0x36, 0xbe, 0xf6, 0x62, 0x15, 0xbd,
-  0x1d, 0x0a, 0x81, 0xbe, 0x67, 0x2b, 0xe0, 0xbc, 0xe4, 0x14, 0x24, 0xbc,
-  0xdf, 0x62, 0x1e, 0x3e, 0xc1, 0xc8, 0xda, 0xbd, 0x5e, 0x17, 0x0c, 0x3b,
-  0x42, 0x5c, 0xbe, 0xbc, 0x32, 0x72, 0x2d, 0xbe, 0xb0, 0x84, 0x21, 0xbd,
-  0xec, 0xb0, 0x5e, 0xbc, 0x5e, 0x31, 0x4e, 0xbe, 0xad, 0x07, 0x02, 0x3c,
-  0x30, 0x64, 0x15, 0xbe, 0xb7, 0x44, 0x22, 0x3d, 0x6f, 0x13, 0x8a, 0x3d,
-  0x26, 0x86, 0xe0, 0x3d, 0x43, 0x72, 0xa7, 0xbd, 0x07, 0x4c, 0x30, 0xbd,
-  0x93, 0x3a, 0xc5, 0x3d, 0xc4, 0x9d, 0xa4, 0x3c, 0xde, 0x10, 0x80, 0xbe,
-  0x64, 0x31, 0x12, 0xbc, 0x81, 0x40, 0x9d, 0xbd, 0x13, 0x7d, 0x4e, 0xbe,
-  0x2a, 0x25, 0xf1, 0x3c, 0xe5, 0x22, 0x02, 0xbd, 0xff, 0x20, 0x44, 0xbe,
-  0x0e, 0x32, 0xd5, 0xbc, 0xb5, 0x60, 0x63, 0xbd, 0xcf, 0xdd, 0xcf, 0xbc,
-  0x13, 0x18, 0x81, 0xbe, 0xce, 0x00, 0x0a, 0x3e, 0x0d, 0x74, 0x27, 0x3d,
-  0x7b, 0x28, 0xc7, 0xbd, 0xa2, 0xcc, 0x54, 0x3d, 0x6f, 0x0f, 0xad, 0xbe,
-  0x63, 0x70, 0xf0, 0xbd, 0xaa, 0x4a, 0xe9, 0x3d, 0x5e, 0x29, 0x29, 0x3d,
-  0x29, 0xd4, 0x06, 0xbe, 0xb4, 0x45, 0xc5, 0x3d, 0xe8, 0x20, 0x81, 0x3e,
-  0x82, 0x91, 0xbb, 0x3d, 0x1d, 0x15, 0xa8, 0x3e, 0xd2, 0x6a, 0xa9, 0xbe,
-  0x66, 0x78, 0x20, 0x3c, 0x03, 0xff, 0x78, 0xbd, 0xba, 0xdf, 0xe4, 0x3d,
-  0x3c, 0x40, 0x4e, 0x3e, 0x46, 0x0d, 0x80, 0x3e, 0x06, 0x22, 0xdf, 0x3d,
-  0x0d, 0x2e, 0x95, 0x3d, 0xf6, 0x29, 0x53, 0x3d, 0x8c, 0xa4, 0xa5, 0x3c,
-  0x88, 0xaa, 0xb0, 0x3d, 0xbc, 0x7f, 0x97, 0xbc, 0xe3, 0xd6, 0x09, 0x3f,
-  0xd9, 0x0e, 0x1f, 0x3e, 0x0a, 0x9d, 0xab, 0x3d, 0x2a, 0x87, 0xae, 0x3d,
-  0xba, 0x72, 0xc1, 0xbd, 0x4f, 0xce, 0x07, 0xbe, 0x46, 0x31, 0x3a, 0xbe,
-  0x81, 0x34, 0x14, 0x3d, 0x4a, 0x11, 0xa6, 0xbd, 0x56, 0x3c, 0x32, 0x3d,
-  0xec, 0x1a, 0x55, 0xbe, 0x74, 0x5e, 0xbc, 0x3d, 0x37, 0xd1, 0x12, 0xbe,
-  0xdd, 0x87, 0x10, 0xbd, 0xcb, 0xae, 0x2a, 0xbe, 0x28, 0xcd, 0x0e, 0xbd,
-  0xb4, 0xce, 0xe7, 0xbc, 0x99, 0xde, 0x3c, 0xbe, 0x59, 0xf0, 0xf3, 0xbd,
-  0xac, 0xed, 0x33, 0xbe, 0x48, 0x07, 0x97, 0xbc, 0xef, 0x69, 0x67, 0xbe,
-  0x4d, 0x53, 0x2f, 0x3b, 0x4b, 0xb2, 0xa0, 0xbc, 0xd5, 0xc5, 0x93, 0x3d,
-  0x03, 0x09, 0x29, 0x3d, 0x5b, 0x1c, 0x97, 0xbd, 0xc0, 0xee, 0x8a, 0xb8,
-  0x5d, 0xe4, 0xcd, 0xbd, 0xbb, 0x6c, 0x51, 0x3c, 0x73, 0xce, 0x2c, 0xbd,
-  0x2a, 0xd4, 0x17, 0xbe, 0xcc, 0x22, 0x07, 0xbd, 0x6a, 0x7c, 0x45, 0xbe,
-  0xe7, 0xdc, 0xc5, 0xbd, 0xf6, 0x6f, 0xa4, 0x3d, 0xc6, 0xa5, 0x2e, 0xbe,
-  0x5c, 0x03, 0x6e, 0xbe, 0x26, 0x5a, 0xf2, 0xbd, 0xe6, 0x73, 0x16, 0xbe,
-  0x52, 0x75, 0xa9, 0x3a, 0xfe, 0xe5, 0x30, 0xbe, 0xb0, 0xe7, 0x02, 0x3d,
-  0xc8, 0x3f, 0x90, 0xbd, 0xc0, 0xec, 0xaa, 0xbd, 0xfd, 0x46, 0x23, 0xbe,
-  0xf6, 0x9d, 0xdf, 0xba, 0x01, 0x87, 0xa3, 0x3d, 0x5a, 0xa6, 0x45, 0xbc,
-  0x3c, 0xd2, 0x80, 0x3d, 0x4a, 0x03, 0x3a, 0x3d, 0x00, 0xdb, 0xe9, 0xbd,
-  0xf6, 0xe4, 0x6f, 0xbd, 0xdd, 0x5f, 0x82, 0x3d, 0xeb, 0x5c, 0x5a, 0x3d,
-  0x98, 0x92, 0xe1, 0xbc, 0xe4, 0xfd, 0xe7, 0xbd, 0x4b, 0x54, 0xcb, 0xbd,
-  0x30, 0x3b, 0x59, 0xbb, 0xe7, 0x1e, 0x9e, 0x3c, 0x55, 0x1f, 0x41, 0xbd,
-  0x0b, 0x81, 0x31, 0xbd, 0x56, 0x18, 0x08, 0xbe, 0xb5, 0x8c, 0xcb, 0xbd,
-  0x83, 0x5d, 0x8c, 0xbd, 0xd3, 0x43, 0x07, 0xbe, 0x3b, 0xb8, 0xe0, 0xbc,
-  0x40, 0x1d, 0xa4, 0xbd, 0xc4, 0x96, 0xeb, 0xbd, 0xf6, 0x0b, 0x3a, 0x3d,
-  0xad, 0xe7, 0x34, 0x3d, 0x8d, 0x48, 0x77, 0xbb, 0x6c, 0xe9, 0x85, 0x3d,
-  0x6a, 0xe2, 0x11, 0x3d, 0xf8, 0x44, 0x29, 0x3d, 0xc5, 0xa3, 0xab, 0x3d,
-  0x6e, 0xfe, 0x99, 0x3d, 0x31, 0xd2, 0x2f, 0xbe, 0x48, 0x56, 0xff, 0x3c,
-  0x0d, 0x6c, 0x2b, 0xbe, 0xd3, 0x99, 0x93, 0xbd, 0x9e, 0xfc, 0xe4, 0xbd,
-  0xb3, 0x52, 0x9f, 0x3d, 0xd2, 0x89, 0xb0, 0x3d, 0x3d, 0xfd, 0x53, 0x3d,
-  0x5d, 0x0d, 0x52, 0xbe, 0x8e, 0xbd, 0x18, 0xbe, 0x2e, 0x74, 0x5e, 0xbb,
-  0xcf, 0xbb, 0x81, 0x3c, 0x30, 0x87, 0x1b, 0xbe, 0x2c, 0xe5, 0x93, 0xbd,
-  0xb3, 0x52, 0xb8, 0x3c, 0x39, 0x30, 0xee, 0xbd, 0xbf, 0xcb, 0xa0, 0xbc,
-  0x08, 0xa0, 0x9a, 0x3c, 0xae, 0xd0, 0x2e, 0xbd, 0x48, 0xef, 0x62, 0xbd,
-  0x79, 0xbe, 0xcb, 0xbd, 0xf1, 0xe5, 0x41, 0xbd, 0xfa, 0xd2, 0x1b, 0xbe,
-  0xa8, 0x11, 0xaf, 0xbd, 0x51, 0xec, 0x85, 0xbc, 0x7b, 0x0b, 0x34, 0xbe,
-  0x5a, 0x00, 0xaf, 0x3c, 0xf1, 0x1a, 0x4d, 0xbe, 0xa0, 0xf3, 0x8d, 0xbd,
-  0x40, 0xb5, 0x60, 0xbe, 0xd8, 0xcd, 0x0f, 0xbe, 0xf5, 0x95, 0x43, 0xbe,
-  0xff, 0x65, 0x3e, 0xbc, 0xf4, 0x00, 0x01, 0x3d, 0x78, 0xce, 0x1f, 0xbe,
-  0x7b, 0x17, 0x00, 0x3c, 0xb7, 0x39, 0x2a, 0x3c, 0x11, 0x1b, 0xe5, 0xbd,
-  0x6c, 0x62, 0xe7, 0xbc, 0xd9, 0x77, 0x18, 0xbe, 0x80, 0x9b, 0x39, 0xbc,
-  0xf0, 0xea, 0x6a, 0xbd, 0xe3, 0xc0, 0xca, 0xbb, 0xb3, 0x49, 0xbf, 0xbd,
-  0xc3, 0x76, 0x19, 0xbd, 0x69, 0x6b, 0x83, 0x3c, 0x1b, 0x3a, 0xc7, 0xbd,
-  0x2d, 0x1e, 0x33, 0xbd, 0xcf, 0x7f, 0x75, 0x3d, 0x9b, 0x3e, 0xc6, 0xbd,
-  0x81, 0xf1, 0xe6, 0xbd, 0xb9, 0x54, 0x9c, 0x3c, 0x3a, 0xe6, 0x16, 0xbe,
-  0x76, 0x7f, 0xa7, 0xbc, 0x3e, 0x32, 0x27, 0xbe, 0x51, 0x02, 0xcc, 0xbd,
-  0x80, 0x5b, 0x81, 0xbd, 0x9a, 0xef, 0x1e, 0x3d, 0x76, 0x21, 0xdc, 0xbd,
-  0xdf, 0x2e, 0x03, 0xbd, 0x26, 0x4d, 0xe5, 0x3b, 0xb8, 0xa1, 0xbf, 0xbd,
-  0x11, 0x21, 0xcb, 0x3c, 0x00, 0xbe, 0xbc, 0x3d, 0x69, 0x50, 0x65, 0xbd,
-  0x87, 0x6c, 0x69, 0xbe, 0xa5, 0xdd, 0xb5, 0x3c, 0x79, 0x39, 0x13, 0x3d,
-  0x25, 0xea, 0x11, 0xbe, 0x51, 0xa6, 0x45, 0xbe, 0xa3, 0x84, 0x07, 0xbe,
-  0xe4, 0x77, 0x7e, 0xbd, 0x68, 0x04, 0xf7, 0xbc, 0x1c, 0x98, 0x2c, 0xbe,
-  0x53, 0xfb, 0xae, 0xbd, 0x58, 0x42, 0x40, 0x3d, 0x70, 0x64, 0x8c, 0xbd,
-  0x71, 0xd2, 0x22, 0xbe, 0x77, 0x7b, 0xd9, 0xbc, 0x62, 0x5a, 0x0d, 0x3d,
-  0x3a, 0x08, 0x15, 0xbe, 0xee, 0x24, 0x3d, 0xbe, 0x0f, 0x4c, 0x2c, 0xbe,
-  0xd4, 0x30, 0x01, 0xbe, 0x86, 0xb6, 0x09, 0xbe, 0x28, 0xcd, 0x8d, 0x3d,
-  0xe7, 0x9a, 0x04, 0x3c, 0x71, 0xda, 0xe8, 0xbc, 0x64, 0x99, 0x8a, 0xbd,
-  0x7f, 0x9d, 0xd7, 0xbd, 0xc6, 0x45, 0x84, 0xbd, 0x5e, 0xb6, 0xa2, 0xbd,
-  0x2c, 0x3f, 0x51, 0xbe, 0x41, 0x3f, 0xf1, 0xbd, 0x90, 0x2e, 0xd8, 0x3c,
-  0xfe, 0x52, 0x40, 0xbd, 0x26, 0x1b, 0x1d, 0xbd, 0x77, 0xdd, 0x57, 0xbe,
-  0xeb, 0xfe, 0x06, 0x3d, 0x32, 0x96, 0x39, 0x3d, 0xf4, 0xb2, 0x26, 0xbe,
-  0x1a, 0xc7, 0x10, 0xbe, 0x5c, 0xb8, 0xc0, 0xbc, 0x2a, 0x33, 0x3b, 0xbe,
-  0xdd, 0x2a, 0xa8, 0xbd, 0xb7, 0xa2, 0x72, 0xbd, 0x3d, 0xdb, 0x11, 0x3d,
-  0x7d, 0x46, 0x00, 0xbe, 0x08, 0xf0, 0x22, 0x3d, 0x69, 0xc7, 0x14, 0xbe,
-  0xea, 0x65, 0xcb, 0xbc, 0xca, 0x9f, 0x9e, 0x3c, 0x28, 0x31, 0x09, 0xbd,
-  0x19, 0x10, 0x11, 0xbd, 0xfb, 0xa9, 0x03, 0xbe, 0x30, 0x97, 0x2d, 0xbe,
-  0xd9, 0x4c, 0x15, 0xbe, 0x29, 0x35, 0xf9, 0xbd, 0xdc, 0x4a, 0x3c, 0x3c,
-  0x2f, 0x44, 0x42, 0xbe, 0x2f, 0x06, 0x67, 0xbd, 0xaa, 0x84, 0x26, 0xbe,
-  0x1d, 0xd8, 0xd7, 0xbd, 0x84, 0x1a, 0x88, 0xbb, 0xb9, 0x0c, 0x3e, 0xbe,
-  0x19, 0x38, 0x0a, 0xbe, 0xfe, 0x4b, 0xe1, 0xbd, 0x86, 0x7c, 0x47, 0xbe,
-  0x33, 0x85, 0x7f, 0xbe, 0x54, 0x42, 0x67, 0xbc, 0xbd, 0xd5, 0x17, 0xbe,
-  0x70, 0x39, 0xd2, 0xbc, 0x3d, 0x74, 0xbd, 0xbc, 0x38, 0x96, 0x0a, 0xbd,
-  0xcc, 0x73, 0xf0, 0xbd, 0xd7, 0xd8, 0xea, 0x3c, 0xc4, 0xa0, 0x3c, 0xbe,
-  0x95, 0x21, 0x2c, 0x3d, 0x81, 0xc3, 0x76, 0xbd, 0x0e, 0x36, 0xc5, 0xbd,
-  0xff, 0xc4, 0xa0, 0x3d, 0x26, 0xaf, 0x06, 0xbe, 0x46, 0x2a, 0x32, 0xbe,
-  0xc9, 0xd0, 0xda, 0xbc, 0xf2, 0x30, 0x83, 0xbe, 0x33, 0x66, 0x6d, 0xbe,
-  0xda, 0x14, 0x90, 0xbd, 0xd5, 0xa5, 0x16, 0xbe, 0x5f, 0x41, 0x27, 0xbe,
-  0x8f, 0x79, 0xbe, 0x3d, 0x9a, 0xa9, 0x80, 0xbb, 0x07, 0x53, 0x33, 0xbe,
-  0x1e, 0xb8, 0x94, 0xbd, 0x6f, 0xe2, 0x78, 0x3d, 0xea, 0x83, 0x6d, 0x3d,
-  0x07, 0x2d, 0x3a, 0xbe, 0x5c, 0x8f, 0xa4, 0xbc, 0xf2, 0xb9, 0xc8, 0xbd,
-  0x49, 0x4a, 0x4c, 0x3c, 0x90, 0x69, 0xf4, 0x3c, 0xa4, 0xaa, 0x3e, 0xbe,
-  0x82, 0x60, 0x27, 0xbe, 0xfa, 0x9c, 0x0f, 0x3d, 0xf9, 0x8b, 0x14, 0xbe,
-  0xcf, 0x1c, 0xf6, 0xbd, 0x4f, 0x8a, 0x1a, 0x3d, 0x4d, 0xba, 0x4a, 0x3c,
-  0x7b, 0x1b, 0x98, 0xbd, 0xfd, 0x3d, 0x64, 0x3d, 0x3e, 0x05, 0xc5, 0x3d,
-  0x0d, 0x10, 0x43, 0xbe, 0x67, 0x2a, 0x06, 0xbe, 0x35, 0xd0, 0xbb, 0xba,
-  0x3d, 0x6f, 0x64, 0xbe, 0xd1, 0x5c, 0xf4, 0x3d, 0x4c, 0x13, 0xde, 0xbd,
-  0xa3, 0x5f, 0x93, 0x3d, 0xa3, 0x63, 0xaf, 0xbb, 0x8e, 0x60, 0x2c, 0xbe,
-  0x65, 0xc6, 0x41, 0xbe, 0x4d, 0x16, 0x35, 0xbe, 0x94, 0xc7, 0x05, 0xbd,
-  0x51, 0xfb, 0x28, 0xbe, 0x46, 0x77, 0xa9, 0x3d, 0x5c, 0xab, 0x97, 0x3b,
-  0x69, 0x4f, 0x1d, 0xbd, 0x1a, 0x88, 0xa4, 0x3d, 0x33, 0x54, 0x55, 0x3d,
-  0xb4, 0xee, 0x34, 0xbe, 0x5e, 0x36, 0x21, 0xbd, 0xc0, 0x7c, 0x9f, 0x3d,
-  0xee, 0x60, 0x89, 0xbd, 0x15, 0x6c, 0xda, 0xbd, 0x49, 0x7c, 0xdd, 0xbd,
-  0x09, 0x1d, 0x96, 0x3c, 0xaf, 0x78, 0x75, 0x3d, 0xb0, 0x39, 0xf1, 0xbc,
-  0xed, 0x4c, 0xdf, 0x3d, 0x11, 0x86, 0x1e, 0x3d, 0x6c, 0xe4, 0x9c, 0x3c,
-  0xe5, 0xb4, 0x59, 0x3d, 0xdb, 0x17, 0x46, 0x3d, 0x55, 0x72, 0x83, 0xbd,
-  0xb2, 0xd9, 0x0f, 0xbe, 0x14, 0x59, 0x8f, 0x3d, 0x2f, 0xb2, 0x82, 0x3c,
-  0x8e, 0x39, 0x1a, 0xbe, 0xfb, 0xf2, 0xb5, 0x3d, 0x6d, 0xd5, 0x18, 0xbe,
-  0xe9, 0xc9, 0x6a, 0x3c, 0x5b, 0xc6, 0xfd, 0xbd, 0x89, 0x76, 0x47, 0x3d,
-  0xf6, 0xb2, 0xec, 0x3d, 0xec, 0x11, 0x9d, 0xbb, 0x70, 0x17, 0x32, 0x3d,
-  0x74, 0x8b, 0x33, 0xbe, 0x3d, 0xdd, 0x39, 0xbe, 0xe4, 0x72, 0xdd, 0xbd,
-  0x25, 0x62, 0xe1, 0xbd, 0x13, 0x8e, 0x8e, 0xbd, 0x0e, 0xac, 0x34, 0xbe,
-  0xc0, 0x99, 0xa1, 0xbd, 0x6f, 0x10, 0x01, 0x3e, 0x0e, 0xfa, 0x8a, 0xbd,
-  0xa7, 0x54, 0x7b, 0xbd, 0x02, 0xc7, 0x96, 0x3d, 0x67, 0x2d, 0xc2, 0x3d,
-  0x8c, 0x1b, 0x17, 0xbe, 0x11, 0xb8, 0xbb, 0x3d, 0xb6, 0xda, 0x0f, 0xbe,
-  0x79, 0xf6, 0xd5, 0xbd, 0x5a, 0x07, 0x05, 0x3b, 0xfa, 0x57, 0x22, 0x3d,
-  0x6c, 0x5e, 0x02, 0xbe, 0x28, 0x24, 0x0e, 0xbe, 0x85, 0xe6, 0x14, 0xbe,
-  0xce, 0xf9, 0xcc, 0xbd, 0x33, 0xa4, 0x06, 0xbe, 0x50, 0xe2, 0x04, 0xbe,
-  0xba, 0x6e, 0x7c, 0x3d, 0x6a, 0xd6, 0x42, 0xbd, 0x70, 0x7c, 0x11, 0xbe,
-  0xc5, 0xd8, 0x31, 0xbd, 0x4e, 0x90, 0x82, 0x3d, 0xa9, 0x4a, 0x6b, 0x3d,
-  0x7f, 0x66, 0x38, 0xbe, 0x3b, 0x46, 0xdc, 0x3c, 0x6d, 0x54, 0x2f, 0xbe,
-  0xe4, 0x7f, 0x0b, 0xbe, 0x67, 0x89, 0xe5, 0xbc, 0x28, 0x6a, 0xea, 0xbc,
-  0xb8, 0xfe, 0x09, 0x3d, 0x8c, 0xbc, 0xd0, 0xbd, 0xad, 0x2a, 0x96, 0x3c,
-  0x59, 0x9d, 0x6e, 0x3c, 0x6b, 0xc1, 0x97, 0xbd, 0xec, 0xb2, 0x32, 0xbe,
-  0x68, 0xd3, 0x55, 0xbc, 0xb2, 0x06, 0x85, 0xbd, 0xc2, 0x41, 0xb3, 0x3c,
-  0x3c, 0x7a, 0x0c, 0x3e, 0xdb, 0x8e, 0x4e, 0xbe, 0x81, 0x14, 0x31, 0x3d,
-  0xdf, 0x20, 0x02, 0x3d, 0xcd, 0xd8, 0x87, 0xbb, 0xa6, 0xc4, 0x96, 0x3d,
-  0x54, 0x05, 0xa9, 0xbd, 0x6c, 0x25, 0x35, 0x3d, 0x54, 0x47, 0xfc, 0xbd,
-  0x0c, 0x63, 0x5e, 0x3c, 0x4e, 0xba, 0xb7, 0xbd, 0x35, 0x29, 0x2b, 0x3d,
-  0xd8, 0x7f, 0xf1, 0xbd, 0x9c, 0x72, 0x86, 0xbc, 0xdf, 0x27, 0x52, 0x3d,
-  0x39, 0xf6, 0x5f, 0xbe, 0x88, 0xe8, 0x38, 0xbd, 0x0c, 0x0f, 0xc0, 0xbd,
-  0x61, 0x89, 0xad, 0xbd, 0x52, 0x20, 0xf7, 0xbd, 0x1d, 0x27, 0x8f, 0xbc,
-  0x0d, 0x3c, 0x2a, 0xbe, 0x4d, 0x7d, 0xc2, 0xbd, 0x22, 0x71, 0x63, 0x3d,
-  0x61, 0x26, 0x23, 0xbe, 0x63, 0x66, 0x1c, 0xbe, 0x95, 0xb4, 0x24, 0xbe,
-  0xd7, 0xc0, 0x81, 0x3d, 0xa3, 0xe1, 0xbd, 0x3d, 0x47, 0x07, 0x01, 0xbe,
-  0x77, 0x65, 0x1e, 0x3d, 0x67, 0x0a, 0x2e, 0xbe, 0xa2, 0x00, 0xc4, 0xbd,
-  0x32, 0xae, 0xee, 0x3c, 0x75, 0x02, 0x4a, 0xbe, 0x60, 0x10, 0x4e, 0x3d,
-  0xe2, 0xd6, 0x3d, 0xbe, 0xb0, 0x8e, 0xc1, 0xbd, 0x8a, 0x54, 0xf0, 0xbd,
-  0x57, 0x39, 0x1d, 0xbe, 0x3d, 0x31, 0xd6, 0x3d, 0x60, 0x95, 0x8e, 0xbc,
-  0xb6, 0x30, 0xd8, 0xbd, 0xc6, 0x52, 0x32, 0xbd, 0xa3, 0xb9, 0xd3, 0xbd,
-  0x54, 0x04, 0x9d, 0xbd, 0x02, 0x45, 0x86, 0xbd, 0x80, 0xdc, 0x85, 0xbb,
-  0xf2, 0x22, 0x2e, 0xbe, 0x42, 0x66, 0xb7, 0x3d, 0x6f, 0x93, 0xb6, 0x3d,
-  0x22, 0xa6, 0xd2, 0xbd, 0x1f, 0x71, 0x9c, 0xbd, 0xbd, 0xfc, 0x48, 0xbe,
-  0x4e, 0xf8, 0xfc, 0xbd, 0x22, 0x9a, 0x80, 0xbd, 0xf6, 0x68, 0xee, 0xbc,
-  0x3e, 0x77, 0x0f, 0xbe, 0x44, 0x8e, 0xe0, 0x3d, 0x2b, 0x6c, 0x8d, 0xbd,
-  0x34, 0xd2, 0xfb, 0xbd, 0xcc, 0x74, 0x24, 0xbe, 0x89, 0x50, 0x1a, 0x3d,
-  0x66, 0x7d, 0x93, 0xbd, 0x87, 0x48, 0x3b, 0xbe, 0xaa, 0xfc, 0x6e, 0xbd,
-  0x84, 0x1c, 0xa0, 0x3c, 0xc5, 0x1e, 0x65, 0x3d, 0x54, 0xc3, 0x62, 0x3d,
-  0x28, 0x65, 0xb0, 0x3b, 0x07, 0x6a, 0x24, 0xbe, 0xba, 0x8a, 0x21, 0x3d,
-  0x17, 0xbe, 0x97, 0xbc, 0x30, 0x90, 0x86, 0xbd, 0x7a, 0xb4, 0xa1, 0xbd,
-  0x4d, 0x41, 0x4a, 0xbd, 0x5b, 0x7d, 0x4c, 0xbe, 0x98, 0xb7, 0xa7, 0x3d,
-  0xeb, 0x1c, 0xe9, 0xbc, 0x97, 0xc3, 0xd4, 0xbd, 0xdf, 0x53, 0x7b, 0x3a,
-  0x15, 0x0f, 0x4a, 0xbe, 0x65, 0xf2, 0x0b, 0xbd, 0x9f, 0x21, 0x1a, 0x3b,
-  0x8c, 0x7f, 0x0c, 0xbe, 0x50, 0x6a, 0xc8, 0x3a, 0x56, 0x93, 0xf9, 0xbd,
-  0x99, 0x4f, 0x1a, 0xbe, 0x8c, 0x61, 0x1b, 0xbe, 0x02, 0x51, 0x5b, 0xbd,
-  0x65, 0xa4, 0x1c, 0xbe, 0x37, 0x2c, 0x5e, 0xbc, 0x3c, 0x24, 0xfc, 0x3c,
-  0x47, 0xb2, 0xe5, 0x3c, 0xd1, 0xc7, 0xcb, 0xbd, 0x88, 0xf1, 0x43, 0xbe,
-  0xe7, 0xb5, 0xd2, 0x3d, 0xec, 0xa3, 0x57, 0xbe, 0x11, 0xe0, 0x30, 0xbb,
-  0x3d, 0xf4, 0x37, 0xbe, 0x81, 0x9d, 0x2f, 0xbe, 0x38, 0x66, 0x3d, 0xbe,
-  0xad, 0x19, 0x70, 0x3c, 0x02, 0x84, 0x85, 0x3c, 0x03, 0x25, 0x9c, 0x3d,
-  0xde, 0x60, 0x19, 0xbe, 0xd9, 0xfa, 0xf1, 0x3d, 0xa4, 0xd6, 0x32, 0xbe,
-  0x4c, 0x62, 0x30, 0xbe, 0x51, 0x28, 0x99, 0xbd, 0xe7, 0x54, 0x21, 0xbe,
-  0x8b, 0x0c, 0x78, 0xbc, 0xcb, 0xe0, 0xf8, 0xbd, 0xbb, 0xd3, 0xe2, 0xbd,
-  0xc4, 0xab, 0x4a, 0x3e, 0x19, 0x1b, 0x62, 0xbc, 0x27, 0x7a, 0x01, 0x3d,
-  0xde, 0x47, 0xf7, 0x3d, 0x8b, 0x03, 0x56, 0x3d, 0x2d, 0x5f, 0x88, 0xbe,
-  0xaa, 0x91, 0xc8, 0xba, 0xab, 0x3d, 0x25, 0xbd, 0xa8, 0x81, 0x11, 0xbd,
-  0xdf, 0xea, 0xdc, 0x3d, 0x37, 0x3b, 0x58, 0x3d, 0x34, 0x29, 0x1f, 0xbe,
-  0x81, 0x90, 0xc4, 0xbd, 0x7c, 0xf3, 0x6a, 0x3d, 0x55, 0x20, 0x20, 0xbe,
-  0xd8, 0x9e, 0x4b, 0xbe, 0x4e, 0x38, 0x01, 0xbe, 0xba, 0x8f, 0x0e, 0x3d,
-  0x57, 0xee, 0x41, 0xbe, 0x90, 0x1e, 0xef, 0x3d, 0x45, 0x4b, 0x68, 0xbd,
-  0x28, 0xa9, 0x38, 0xbe, 0xdd, 0x77, 0x24, 0xbe, 0x05, 0x63, 0x04, 0xbd,
-  0xd3, 0xab, 0xea, 0xbd, 0xbd, 0xa5, 0x70, 0xbe, 0xe1, 0xb1, 0x32, 0xbe,
-  0xe3, 0xae, 0xe6, 0xbd, 0x64, 0xca, 0xa9, 0xbe, 0x9e, 0xbf, 0x22, 0xbe,
-  0x39, 0xf0, 0x54, 0xbb, 0x1b, 0xcf, 0xc0, 0xbc, 0x95, 0x25, 0x9b, 0xbe,
-  0xc6, 0xde, 0xb2, 0x3b, 0xcf, 0x3f, 0x40, 0xbd, 0xda, 0xf8, 0x09, 0xbe,
-  0x1d, 0x7c, 0xdc, 0x3c, 0xe5, 0xbc, 0xc4, 0x3d, 0xa2, 0x1e, 0xd9, 0x3d,
-  0x7d, 0x35, 0xc3, 0xbc, 0x9f, 0x5d, 0x63, 0xbe, 0x65, 0xb0, 0x30, 0xbd,
-  0x6f, 0x1d, 0x97, 0xbe, 0xa3, 0xdd, 0x03, 0xbe, 0xa7, 0x47, 0x70, 0xbd,
-  0x13, 0xac, 0xcb, 0x3c, 0xd3, 0xcb, 0xea, 0xbd, 0xb1, 0xdf, 0x4c, 0xbd,
-  0x1d, 0x2a, 0x25, 0x3e, 0xdf, 0x50, 0x0e, 0x3d, 0xab, 0xd1, 0x8c, 0xbd,
-  0x02, 0xcc, 0x40, 0xbd, 0x24, 0x27, 0x10, 0x3d, 0x75, 0x0e, 0x02, 0xbd,
-  0x2c, 0xf4, 0x7d, 0xbd, 0x2e, 0xe5, 0x32, 0xbd, 0xbf, 0xf6, 0x65, 0xbe,
-  0x3f, 0x1a, 0x37, 0xbd, 0x05, 0x73, 0x91, 0xbe, 0xa3, 0x40, 0x8a, 0xbc,
-  0xc2, 0xd6, 0xc9, 0x3d, 0x21, 0x03, 0x92, 0xbd, 0x97, 0xea, 0x33, 0x3d,
-  0xaa, 0x81, 0x68, 0x3b, 0x5b, 0x83, 0xc4, 0xbc, 0x0a, 0xa9, 0x25, 0x3d,
-  0x9b, 0x2c, 0x25, 0xbe, 0x14, 0x3e, 0xac, 0x3c, 0xc1, 0x23, 0x08, 0xba,
-  0x8b, 0x1e, 0xbc, 0x3d, 0x97, 0x46, 0x30, 0x3d, 0x26, 0x0d, 0x65, 0xbe,
-  0xa6, 0x8a, 0x41, 0x3c, 0x15, 0xf2, 0xd3, 0xbd, 0xfa, 0xc8, 0xf3, 0xbd,
-  0x5f, 0x15, 0x43, 0x3d, 0xd3, 0x93, 0x61, 0xbe, 0x92, 0x8b, 0xfa, 0xbc,
-  0xba, 0x67, 0x25, 0x3d, 0x84, 0x5e, 0xdc, 0xbc, 0x84, 0x0d, 0x2f, 0xbe,
-  0x88, 0x7b, 0x80, 0xbc, 0x5d, 0x24, 0x64, 0xbe, 0xc5, 0xec, 0xe4, 0x3e,
-  0xeb, 0x0b, 0xda, 0x3d, 0xc1, 0xb1, 0x01, 0x3c, 0x6d, 0xb4, 0xd5, 0xbd,
-  0xa8, 0xf8, 0x8c, 0xbe, 0x3c, 0x26, 0x2a, 0x3e, 0xe6, 0xc3, 0xaf, 0xbd,
-  0xa2, 0x32, 0x8a, 0xbd, 0x72, 0x85, 0x25, 0x3e, 0x18, 0x8d, 0xa7, 0xbe,
-  0x6a, 0xd1, 0xf4, 0xbd, 0x7b, 0xd8, 0x1c, 0x3e, 0x8f, 0xa7, 0xe6, 0x3d,
-  0xba, 0xb7, 0xe9, 0x3d, 0x92, 0xff, 0x1b, 0x39, 0xd6, 0xeb, 0xa9, 0xbe,
-  0x4e, 0x35, 0x89, 0x3e, 0x22, 0x86, 0xce, 0x3d, 0x91, 0x39, 0x39, 0x3d,
-  0x89, 0x38, 0x7f, 0xbd, 0xbd, 0x81, 0x95, 0xbe, 0xc3, 0xfc, 0xa0, 0x3c,
-  0x39, 0xdc, 0x59, 0x3d, 0xfc, 0x49, 0x82, 0xbd, 0x43, 0x94, 0xf6, 0x3d,
-  0x8d, 0x26, 0x7e, 0xbe, 0x72, 0xaa, 0x00, 0xbd, 0x19, 0xc0, 0xd3, 0x3d,
-  0x19, 0xf2, 0x31, 0x3e, 0x15, 0x09, 0x15, 0x3e, 0x8a, 0x23, 0x21, 0xbe,
-  0x31, 0xea, 0x87, 0xbe, 0x97, 0x3c, 0x88, 0x3e, 0x95, 0x2e, 0xd4, 0xba,
-  0xbf, 0x26, 0x22, 0x3e, 0x6f, 0x2e, 0xb6, 0x3b, 0x97, 0xfd, 0x6d, 0xbe,
-  0xdb, 0x4a, 0x5d, 0xbc, 0x64, 0x0b, 0x07, 0xbd, 0xf0, 0x4f, 0xd2, 0x3d,
-  0xf1, 0x6e, 0x08, 0x3e, 0x7a, 0xae, 0x2d, 0xbe, 0x17, 0xa3, 0x6d, 0x3b,
-  0x20, 0xed, 0x04, 0x3e, 0x5a, 0xea, 0xcc, 0x3d, 0x25, 0x06, 0xa2, 0x3e,
-  0x3a, 0x03, 0x3e, 0xbc, 0xa1, 0x25, 0x70, 0xbe, 0x87, 0x25, 0x77, 0x3e,
-  0x5b, 0xc3, 0xb6, 0xbc, 0xae, 0xab, 0x0a, 0x3c, 0x11, 0x12, 0x9b, 0x3d,
-  0xe4, 0x8b, 0x7a, 0xbd, 0xdc, 0x8e, 0x30, 0x3d, 0xd1, 0xa4, 0xe0, 0xbd,
-  0x1f, 0xe5, 0x8a, 0x3d, 0xad, 0x0e, 0xdf, 0x3d, 0x20, 0x65, 0x9f, 0xbd,
-  0xaf, 0xa1, 0x35, 0xbd, 0xa5, 0x64, 0xa8, 0x3d, 0x2c, 0x9f, 0x0e, 0x3e,
-  0x0a, 0x1f, 0xbe, 0x3e, 0x56, 0xb8, 0x1c, 0xbd, 0xf3, 0xee, 0x84, 0xbe,
-  0xb6, 0x05, 0x2b, 0x3e, 0xe0, 0x9a, 0x33, 0x3d, 0x02, 0x7f, 0xca, 0x3c,
-  0x97, 0x85, 0xe2, 0x3c, 0x3a, 0xfa, 0xbb, 0x3d, 0x2c, 0x24, 0x0a, 0x3e,
-  0x01, 0x6d, 0x53, 0x3d, 0xfb, 0xcf, 0xfd, 0x3d, 0x41, 0x61, 0x80, 0x3c,
-  0x09, 0xb0, 0x69, 0x3d, 0x61, 0x38, 0x15, 0xbe, 0x97, 0x60, 0xf9, 0xbd,
-  0xcd, 0xe6, 0x39, 0x3e, 0xb0, 0xae, 0x5e, 0x3e, 0x31, 0x32, 0x2a, 0x3c,
-  0x9e, 0xa0, 0xa4, 0xbe, 0xcd, 0xb6, 0x2b, 0x3e, 0x07, 0xb6, 0x46, 0x3c,
-  0x44, 0xa0, 0x2e, 0x3d, 0x68, 0x10, 0x6d, 0xbd, 0xe3, 0xce, 0xde, 0x3d,
-  0xca, 0xbe, 0x04, 0x3e, 0x27, 0x2d, 0x11, 0x3e, 0x63, 0xca, 0x85, 0x3d,
-  0x89, 0x51, 0xc7, 0xbd, 0x00, 0xdc, 0xa9, 0x3c, 0xeb, 0x09, 0x6e, 0xbe,
-  0x9f, 0x97, 0x86, 0xbe, 0xfa, 0x44, 0xe9, 0xba, 0x05, 0x4f, 0xfe, 0xbd,
-  0x44, 0x2e, 0xd3, 0xba, 0x90, 0x31, 0xac, 0xbe, 0x4b, 0x71, 0xbc, 0xbe,
-  0x9c, 0x9e, 0x3e, 0xbe, 0x7e, 0x25, 0x6a, 0x3d, 0x1e, 0xbc, 0x39, 0xbe,
-  0x16, 0xcb, 0x10, 0x3e, 0x8a, 0x63, 0x30, 0xbe, 0x8e, 0x13, 0x21, 0xbe,
-  0xd8, 0x96, 0xf3, 0xbd, 0x85, 0xba, 0x0c, 0xbe, 0xc9, 0xd6, 0x97, 0xbe,
-  0xd1, 0xa0, 0x14, 0xbe, 0xd3, 0x16, 0xa8, 0xbe, 0x6c, 0x1a, 0xa5, 0xbe,
-  0x6b, 0x6f, 0x81, 0xbd, 0xcb, 0x80, 0xc5, 0x3d, 0x71, 0xfe, 0x0b, 0xbd,
-  0x32, 0x87, 0xbe, 0xbe, 0x0b, 0x64, 0x0f, 0xbe, 0xe7, 0x2f, 0x3c, 0xbe,
-  0xc1, 0xe2, 0x28, 0xbd, 0xa4, 0x5c, 0xd8, 0x3d, 0xfc, 0x5f, 0xc1, 0xbe,
-  0x95, 0x90, 0x9e, 0xbe, 0x17, 0xfb, 0xad, 0xbd, 0x91, 0x30, 0xec, 0xbd,
-  0xaf, 0xbf, 0x43, 0x3c, 0xdd, 0x67, 0x55, 0xbe, 0xf9, 0x25, 0x8c, 0x3e,
-  0x1b, 0x3c, 0x0b, 0xbe, 0x0b, 0x35, 0x85, 0xbe, 0x7c, 0xc2, 0x3f, 0x3e,
-  0x2f, 0x5e, 0xf2, 0xb9, 0x15, 0x1a, 0xda, 0x3d, 0xc6, 0x50, 0x3b, 0xbe,
-  0x9d, 0x9d, 0xaf, 0xbe, 0x9d, 0x8e, 0x34, 0xbd, 0x51, 0x22, 0x2f, 0xbe,
-  0xa2, 0x60, 0xaa, 0xbe, 0x5d, 0x8a, 0x94, 0x3c, 0x0d, 0xb3, 0xfb, 0x3c,
-  0xf1, 0x3c, 0xe8, 0xbd, 0x58, 0x82, 0x20, 0xbc, 0x1a, 0xe6, 0xb5, 0xbe,
-  0xde, 0x09, 0xc2, 0x3d, 0x83, 0xac, 0xa9, 0x3c, 0xf2, 0x22, 0x58, 0xbe,
-  0x71, 0x1c, 0x44, 0x3d, 0x83, 0xda, 0xc1, 0xbd, 0x1b, 0xdb, 0x3d, 0xbd,
-  0x9a, 0x3d, 0xa5, 0x3d, 0x99, 0xda, 0x96, 0xbe, 0xbe, 0xf8, 0xe4, 0xbd,
-  0x97, 0x5a, 0x8e, 0x3d, 0x90, 0xc8, 0x3c, 0xbc, 0xfd, 0xcc, 0x1d, 0xbd,
-  0x6c, 0xd9, 0x72, 0xbd, 0x20, 0x59, 0xe7, 0xbd, 0x96, 0xbf, 0x2c, 0xbe,
-  0x35, 0x83, 0x6d, 0x3a, 0x7b, 0x12, 0xb8, 0xbd, 0xae, 0xcd, 0x13, 0x3e,
-  0xc6, 0x77, 0x89, 0xbe, 0x78, 0x0c, 0xa5, 0x3c, 0xa1, 0xd2, 0x74, 0xbe,
-  0x38, 0x35, 0xd9, 0xbd, 0x6d, 0x6b, 0xcf, 0xbd, 0xb6, 0xb3, 0x22, 0xbf,
-  0xe3, 0x2c, 0x91, 0xbe, 0xc5, 0xce, 0x16, 0x3d, 0x60, 0x54, 0x73, 0xbd,
-  0x60, 0x5b, 0x46, 0xbc, 0xb6, 0xe1, 0x22, 0xbe, 0xa6, 0xe4, 0xcb, 0xbd,
-  0x24, 0xdd, 0xaf, 0xbe, 0xec, 0x5d, 0x24, 0xbe, 0x4c, 0xf7, 0x50, 0xbe,
-  0x1c, 0x47, 0xbb, 0xbc, 0xbc, 0x5c, 0x83, 0xbe, 0xcc, 0x3f, 0x4c, 0x3d,
-  0x1c, 0xa0, 0x7f, 0xbe, 0x4d, 0xe1, 0x95, 0xbd, 0x2a, 0xfd, 0x96, 0xbc,
-  0x9f, 0xb1, 0x8e, 0x3d, 0xff, 0x4d, 0xf1, 0xbd, 0x78, 0x7c, 0x37, 0x3c,
-  0x1b, 0x0d, 0x9a, 0xbd, 0x5e, 0xc0, 0x93, 0xbe, 0xf2, 0x86, 0x2b, 0xbf,
-  0x89, 0x8b, 0x7d, 0xbe, 0xa3, 0xa6, 0x08, 0xbf, 0x39, 0x38, 0x19, 0xbe,
-  0xc8, 0xdb, 0x8c, 0xbe, 0x78, 0x36, 0x1e, 0xbe, 0x04, 0xb4, 0x53, 0xbe,
-  0xba, 0xb2, 0x8c, 0x3d, 0x08, 0xbb, 0x75, 0xbe, 0x98, 0x92, 0x2a, 0xbc,
-  0x2b, 0x5e, 0x06, 0xbe, 0x21, 0xfe, 0x09, 0x3b, 0xb5, 0x4d, 0xfe, 0xbe,
-  0x23, 0xb7, 0x4d, 0x3e, 0xa7, 0x88, 0x2e, 0x3c, 0x61, 0x2c, 0x3f, 0xbe,
-  0xf9, 0x50, 0x36, 0xbf, 0xc5, 0x26, 0x35, 0xbe, 0x50, 0x45, 0xb1, 0x3d,
-  0x31, 0x16, 0xde, 0xbd, 0xe7, 0x2c, 0xe6, 0xbd, 0x9b, 0xae, 0xb1, 0x3d,
-  0x24, 0xa9, 0xad, 0x3c, 0x9a, 0x3a, 0x98, 0x3c, 0xfc, 0xa3, 0x28, 0xbc,
-  0x49, 0x45, 0x84, 0xbe, 0x7f, 0x97, 0x7e, 0xbd, 0xbf, 0xd4, 0x3f, 0x3d,
-  0xfc, 0x7f, 0xb5, 0xbc, 0x9b, 0x93, 0x9e, 0x3d, 0xab, 0x88, 0x9f, 0xbe,
-  0x44, 0xe4, 0xa0, 0xbd, 0x82, 0x13, 0x1a, 0x3e, 0xa0, 0xdf, 0x0a, 0xbc,
-  0x5c, 0x20, 0x06, 0xbe, 0xd1, 0x2a, 0x53, 0xbd, 0xd0, 0xf1, 0x2b, 0xbe,
-  0x3d, 0xed, 0x4d, 0x3e, 0x85, 0xfc, 0xf9, 0xbd, 0x67, 0x4c, 0x11, 0xbd,
-  0x0b, 0x96, 0x40, 0xbc, 0x58, 0x16, 0x18, 0xbd, 0x23, 0x34, 0xfe, 0x3b,
-  0xb6, 0xfb, 0x8b, 0x3a, 0xea, 0x06, 0x95, 0x3d, 0x10, 0xcd, 0xa1, 0x3d,
-  0xc5, 0x71, 0x4c, 0xbe, 0xb1, 0x53, 0x81, 0x3d, 0xb4, 0x47, 0x13, 0x3e,
-  0x94, 0x6e, 0x17, 0xbd, 0x5a, 0xcb, 0x8c, 0x3d, 0x83, 0x49, 0x3b, 0xbe,
-  0x2e, 0xa6, 0x17, 0xbe, 0x9f, 0x1a, 0xa9, 0x3d, 0xc1, 0xaf, 0xe4, 0x3d,
-  0x31, 0x59, 0x32, 0xbd, 0xe4, 0xf4, 0x61, 0x3c, 0x1c, 0x13, 0xe7, 0x3d,
-  0x51, 0xcf, 0x80, 0xbd, 0x98, 0x24, 0xf3, 0xba, 0xe8, 0x29, 0x8f, 0x3c,
-  0x38, 0x73, 0x5b, 0x3d, 0xd2, 0x0a, 0x90, 0xbe, 0xe9, 0xbb, 0x8f, 0x3d,
-  0x8b, 0xa2, 0x69, 0x3d, 0xed, 0xa3, 0x6e, 0xbd, 0xfc, 0x91, 0x15, 0xbe,
-  0xe4, 0xf2, 0x82, 0xbd, 0x43, 0x1f, 0x34, 0xbe, 0x32, 0x26, 0xdb, 0x3d,
-  0x1b, 0x8c, 0x82, 0x3e, 0x31, 0xa0, 0x82, 0x3c, 0xfd, 0xf9, 0x30, 0x3d,
-  0x43, 0x0a, 0x49, 0x3e, 0x39, 0x25, 0x9b, 0xbd, 0x49, 0x70, 0xd5, 0x3c,
-  0x85, 0x0e, 0x22, 0xbd, 0x42, 0xfe, 0x84, 0x3d, 0x4a, 0x16, 0xce, 0xbd,
-  0x93, 0x1b, 0xd2, 0x3d, 0x3d, 0xb3, 0x20, 0x3e, 0x09, 0x6f, 0x12, 0x3d,
-  0x15, 0x35, 0x9c, 0xbd, 0x57, 0xfb, 0x86, 0xbd, 0x84, 0xad, 0xa9, 0xbd,
-  0x4d, 0x00, 0x75, 0x3d, 0x03, 0x13, 0xbc, 0x3c, 0x8e, 0x74, 0xbf, 0x3c,
-  0x5c, 0xbd, 0xac, 0x3c, 0xa1, 0x42, 0xc5, 0xbc, 0xf4, 0x73, 0x99, 0x3b,
-  0x05, 0xe3, 0x6a, 0x3d, 0x14, 0x1e, 0xa4, 0xbc, 0x7e, 0x6d, 0x01, 0x3d,
-  0xfe, 0x09, 0x9a, 0xbd, 0x1e, 0x4f, 0xa5, 0x3d, 0x09, 0x7c, 0x11, 0x3d,
-  0xa0, 0x60, 0xce, 0x3d, 0x6d, 0x50, 0x2d, 0xbc, 0x6d, 0xf9, 0x47, 0xbd,
-  0xe9, 0x80, 0xde, 0xbc, 0x78, 0xdb, 0xc1, 0x3d, 0x62, 0x70, 0x87, 0x3d,
-  0x14, 0xe1, 0x5b, 0x3e, 0x1a, 0x00, 0xa3, 0x3a, 0x3e, 0x19, 0x0c, 0x3e,
-  0x52, 0xd9, 0x91, 0x3d, 0x52, 0x1c, 0xb2, 0x3c, 0x64, 0x74, 0x6d, 0x3d,
-  0xd0, 0x12, 0x16, 0x3d, 0xcf, 0x02, 0xc3, 0xbc, 0x7a, 0x0a, 0xad, 0x3d,
-  0x6f, 0x3f, 0xa2, 0x3d, 0x19, 0x2d, 0x00, 0x3e, 0x7f, 0xcb, 0xdd, 0xbd,
-  0x0e, 0x7c, 0x01, 0x3c, 0x00, 0x01, 0xe6, 0xbd, 0xc3, 0x6a, 0x1b, 0xbc,
-  0xa7, 0xff, 0x97, 0xbd, 0xd9, 0x84, 0xf7, 0xbd, 0x5a, 0x3b, 0x11, 0xbe,
-  0x91, 0xfc, 0x3c, 0x3d, 0xe1, 0x9c, 0xaf, 0x3c, 0x69, 0x77, 0x00, 0x3e,
-  0xfd, 0xee, 0xbb, 0x3d, 0x06, 0x52, 0xb6, 0xbe, 0x1e, 0x8e, 0x1f, 0x3d,
-  0x2f, 0xc2, 0xcb, 0x3e, 0x6a, 0xdb, 0x31, 0x3d, 0xab, 0x2d, 0xd0, 0x3d,
-  0xc7, 0xf5, 0x91, 0xbe, 0x65, 0x4d, 0x13, 0x3e, 0x10, 0x24, 0xcf, 0xbd,
-  0x40, 0xde, 0x08, 0xbe, 0x12, 0x5d, 0x71, 0xbe, 0x5d, 0xb2, 0xa6, 0xbf,
-  0x53, 0x7a, 0x2f, 0xbe, 0x47, 0xf8, 0x51, 0x3d, 0x43, 0x58, 0xc7, 0xbe,
-  0x5e, 0x8c, 0xeb, 0xbc, 0xdb, 0xbf, 0x36, 0xbe, 0xd8, 0xc1, 0x53, 0xbe,
-  0x62, 0x50, 0xdf, 0x3d, 0xe0, 0xdc, 0x88, 0x3d, 0xfc, 0x2e, 0xbf, 0x3d,
-  0x18, 0x9d, 0x6e, 0x3d, 0xa8, 0x03, 0xf2, 0xbc, 0xd9, 0x3b, 0x31, 0x3d,
-  0x1e, 0x09, 0xf3, 0x3d, 0xd8, 0x5f, 0xdd, 0xbd, 0x86, 0x22, 0x63, 0xbe,
-  0x8c, 0x13, 0xb0, 0xbf, 0x6a, 0x75, 0x46, 0xbe, 0xfd, 0x03, 0x5a, 0x3e,
-  0x61, 0x03, 0xc3, 0xbe, 0x87, 0x78, 0xb8, 0xbe, 0x50, 0x6c, 0x8b, 0xbd,
-  0x0b, 0x20, 0x86, 0xbe, 0x27, 0xfd, 0x09, 0x3e, 0xec, 0x74, 0x51, 0xbd,
-  0x9b, 0x46, 0xbd, 0xbc, 0x4f, 0x6b, 0xad, 0x3d, 0x85, 0xbe, 0x9d, 0xbe,
-  0xb0, 0x4d, 0x9b, 0xbe, 0xf6, 0x3f, 0xc0, 0xbb, 0x81, 0x32, 0x70, 0x3a,
-  0x9b, 0x91, 0xaa, 0xbe, 0x55, 0x99, 0x7c, 0xbf, 0x7a, 0xac, 0x4a, 0xbe,
-  0x47, 0xf4, 0xaa, 0x3d, 0x0d, 0xb4, 0xbb, 0xbd, 0xb3, 0x66, 0x83, 0xbe,
-  0x4a, 0xaf, 0xad, 0x3d, 0x36, 0x81, 0x48, 0xbf, 0xbd, 0x08, 0x55, 0x3e,
-  0xb3, 0x47, 0xcf, 0xbe, 0x7c, 0x6b, 0x82, 0xbc, 0xe0, 0x19, 0x35, 0x3c,
-  0x05, 0x79, 0x2d, 0x3c, 0x78, 0x94, 0x62, 0x3c, 0x80, 0x26, 0xe6, 0x3e,
-  0x2f, 0x2b, 0x0d, 0xbe, 0xce, 0x6d, 0x03, 0xbe, 0xe3, 0x47, 0x18, 0xbd,
-  0x51, 0x31, 0x9d, 0xbd, 0x13, 0x5a, 0x8a, 0x3d, 0x97, 0x14, 0x9a, 0xbc,
-  0xfe, 0xf8, 0x9a, 0xbd, 0x72, 0x24, 0x9b, 0xbd, 0xa5, 0x7d, 0xe0, 0xbd,
-  0xb2, 0xbf, 0x7d, 0x3e, 0x44, 0xbe, 0xf9, 0xbd, 0xd8, 0x53, 0xef, 0x3c,
-  0x9a, 0x0c, 0xfb, 0xbd, 0x1b, 0x7b, 0x24, 0xbf, 0x9a, 0x19, 0x4d, 0x3c,
-  0x84, 0xb6, 0x1e, 0xbe, 0x55, 0x11, 0x57, 0x3d, 0x72, 0x25, 0x82, 0xbd,
-  0x0d, 0x3b, 0xcb, 0x3c, 0x15, 0xc6, 0x38, 0x3e, 0xc2, 0x13, 0x8b, 0xbe,
-  0x13, 0x3f, 0x44, 0xbd, 0xfd, 0xa6, 0xec, 0x3b, 0xfd, 0x2c, 0x2d, 0xbd,
-  0x70, 0xdc, 0x0d, 0xbe, 0x91, 0x67, 0x90, 0x3e, 0x3a, 0xe4, 0x1d, 0xbe,
-  0x0f, 0xaa, 0x72, 0xbe, 0xf9, 0xf7, 0x09, 0xbb, 0x19, 0xae, 0x93, 0xbc,
-  0x5f, 0x29, 0x5b, 0xbd, 0xc8, 0x13, 0x0d, 0x3e, 0x10, 0x64, 0xcf, 0xbe,
-  0x68, 0x54, 0x0c, 0x3e, 0xbf, 0x9c, 0xdf, 0x3c, 0xc5, 0xf3, 0xc5, 0x3c,
-  0x9d, 0x2c, 0xa8, 0x3b, 0x5c, 0x79, 0x74, 0xbd, 0xaa, 0x55, 0x3b, 0x3d,
-  0xc0, 0xfc, 0xd2, 0xbd, 0xdd, 0xbf, 0x8a, 0xbd, 0x49, 0x32, 0x99, 0x3e,
-  0x4c, 0xfc, 0x60, 0xbe, 0xc5, 0x19, 0x2b, 0xbe, 0xb8, 0x87, 0x97, 0x3d,
-  0xde, 0x93, 0x40, 0x3e, 0x4c, 0x60, 0xdb, 0x3c, 0xab, 0x0a, 0xbd, 0x3d,
-  0xb9, 0xdb, 0xd1, 0xbd, 0x7a, 0xe3, 0x0f, 0xbe, 0x36, 0x1c, 0x00, 0xbc,
-  0x1e, 0x22, 0xbd, 0x3d, 0x35, 0x60, 0x22, 0x3e, 0x96, 0xd5, 0x64, 0xbe,
-  0xa4, 0x7b, 0x87, 0x3d, 0x4a, 0x4a, 0xae, 0x3d, 0xc0, 0xa6, 0x6c, 0x3d,
-  0xc7, 0x26, 0x47, 0x3e, 0x9c, 0x97, 0x3e, 0x3e, 0x3c, 0x81, 0x14, 0xbe,
-  0x99, 0xb0, 0x66, 0x3d, 0x41, 0xf2, 0x82, 0xbc, 0x43, 0x79, 0x1f, 0x3e,
-  0x6c, 0x61, 0x90, 0x3d, 0x95, 0x81, 0xb8, 0x3d, 0x68, 0x30, 0x17, 0xbe,
-  0x6c, 0x5f, 0x20, 0xbd, 0x24, 0x03, 0xcc, 0x3d, 0xe5, 0xe1, 0x2b, 0xbe,
-  0x1f, 0x42, 0x68, 0xbd, 0x51, 0xe0, 0x2f, 0x3d, 0x74, 0xff, 0xf0, 0x3d,
-  0x92, 0xbe, 0xbb, 0x3c, 0x9c, 0xa9, 0x2f, 0xbc, 0x33, 0xdd, 0x1d, 0x3e,
-  0x05, 0x8a, 0x5f, 0xbe, 0x30, 0x4b, 0x56, 0x3c, 0x64, 0x0c, 0x42, 0x3e,
-  0xce, 0xa6, 0x30, 0xbd, 0x8c, 0xcc, 0x29, 0x3e, 0x0a, 0x83, 0x54, 0x3d,
-  0x5e, 0xf6, 0xb8, 0x3c, 0xea, 0x39, 0xd4, 0xbd, 0x5a, 0xf2, 0xa0, 0x3d,
-  0x0e, 0x5b, 0x59, 0xbe, 0xad, 0x47, 0x3f, 0xbe, 0x2b, 0xdd, 0xcc, 0x3c,
-  0x5b, 0xf8, 0xf8, 0x3c, 0x70, 0xde, 0x8d, 0x3d, 0x44, 0x74, 0x72, 0xbc,
-  0x56, 0x47, 0xa0, 0x3d, 0x22, 0x81, 0xfb, 0xbc, 0xf0, 0xdc, 0x3b, 0xbd,
-  0x84, 0xe8, 0x01, 0x3e, 0x67, 0xc4, 0xc9, 0x3d, 0x89, 0xc4, 0x2e, 0x3e,
-  0xf1, 0x50, 0x11, 0xbe, 0x13, 0x78, 0x9c, 0xbe, 0xa3, 0x16, 0xe7, 0xbc,
-  0x71, 0xf7, 0x9f, 0x3d, 0x30, 0xe0, 0xb8, 0xbe, 0x02, 0x50, 0x80, 0xbd,
-  0x93, 0x01, 0x7d, 0x3d, 0x37, 0xd7, 0xbe, 0x3d, 0xfa, 0xda, 0xbb, 0x3d,
-  0xdd, 0xa4, 0x8c, 0xbc, 0x64, 0xf7, 0x24, 0x3e, 0x18, 0xfa, 0x90, 0xbd,
-  0xb8, 0x50, 0x3e, 0x3d, 0x29, 0xf3, 0x31, 0x3e, 0x98, 0xfc, 0xfd, 0x3c,
-  0x2f, 0x27, 0x11, 0x3e, 0x81, 0x72, 0x74, 0x3c, 0xf7, 0xf2, 0x9a, 0xbd,
-  0xc1, 0xdc, 0x30, 0xbd, 0x95, 0x24, 0x7c, 0xbc, 0x5d, 0x1b, 0x15, 0xbe,
-  0xe5, 0xe1, 0x7a, 0xbd, 0xd5, 0xa1, 0x25, 0xbc, 0x8a, 0x2c, 0x81, 0x3d,
-  0x17, 0x03, 0xde, 0x3c, 0xda, 0x46, 0x2e, 0x3e, 0x48, 0x3e, 0x50, 0x3d,
-  0x36, 0x32, 0x01, 0xbe, 0x45, 0x72, 0x1e, 0x3d, 0xcd, 0xbe, 0xeb, 0x3d,
-  0x8e, 0x04, 0xfd, 0x3c, 0xf4, 0x5b, 0xce, 0x3d, 0xfc, 0x6e, 0xb0, 0xbc,
-  0xb6, 0xf6, 0x0b, 0xbe, 0xa4, 0x70, 0x61, 0x3d, 0x21, 0x74, 0x03, 0x3d,
-  0x98, 0xe1, 0x35, 0xbe, 0x58, 0xbb, 0x17, 0xbd, 0x26, 0x4a, 0x1a, 0xbc,
-  0xbe, 0xf7, 0xb4, 0x3d, 0x63, 0x36, 0x86, 0xbc, 0x05, 0xdd, 0x6b, 0xbe,
-  0x4e, 0xbb, 0x40, 0x3e, 0x4b, 0x4c, 0x03, 0xbe, 0x3e, 0x52, 0xad, 0x3c,
-  0xbc, 0x71, 0x3f, 0x3e, 0xbe, 0x5b, 0x3e, 0x3d, 0x5f, 0x43, 0x09, 0x3e,
-  0x81, 0x00, 0x25, 0xbd, 0x38, 0xad, 0x07, 0xbe, 0x18, 0x53, 0x4d, 0x3d,
-  0x4c, 0x43, 0x61, 0x3d, 0x0c, 0xa1, 0x07, 0xbd, 0x4e, 0xba, 0xea, 0x3c,
-  0xea, 0x72, 0x01, 0xbd, 0xc0, 0x41, 0x81, 0x3d, 0xe1, 0x81, 0x5b, 0x3d,
-  0x4c, 0xb6, 0x97, 0xbe, 0xe7, 0x0b, 0xa5, 0xbc, 0x48, 0xf2, 0x32, 0xbe,
-  0xd2, 0xda, 0x84, 0x3d, 0x8c, 0x5f, 0x5e, 0x3e, 0xa1, 0xc7, 0x02, 0x3e,
-  0x0c, 0xc9, 0x31, 0x3e, 0x80, 0xd2, 0x72, 0x3c, 0x21, 0x71, 0x2f, 0x3d,
-  0x69, 0x6f, 0xa3, 0xbd, 0x8e, 0x40, 0xd2, 0x3d, 0xe0, 0xc8, 0x9b, 0xbe,
-  0x47, 0x50, 0x0e, 0xbd, 0x5b, 0x11, 0xa7, 0xbd, 0x8e, 0x8b, 0x3d, 0x3e,
-  0xa3, 0xc7, 0xac, 0x3c, 0x39, 0x1c, 0x62, 0xbd, 0xee, 0x82, 0x84, 0xbd,
-  0xc7, 0x1b, 0x80, 0xbe, 0x74, 0xb2, 0xcf, 0xbd, 0xe7, 0x3f, 0x80, 0x3e,
-  0x4c, 0x39, 0x64, 0x3e, 0xfb, 0x88, 0xd1, 0x3d, 0x87, 0x2f, 0xbe, 0x3c,
-  0x20, 0xd9, 0xa1, 0x3d, 0x56, 0xa4, 0x1a, 0xbd, 0xe4, 0xef, 0x2a, 0x3e,
-  0x28, 0x30, 0x66, 0xbe, 0xf6, 0x48, 0x88, 0xbd, 0xda, 0x26, 0x73, 0xbe,
-  0x86, 0x7c, 0x3d, 0x3d, 0xb6, 0x51, 0x21, 0xbe, 0x59, 0x39, 0x45, 0xbe,
-  0xb5, 0xf1, 0x8b, 0xbc, 0xf2, 0x0c, 0x8d, 0xbe, 0x38, 0x94, 0x7a, 0xbe,
-  0x38, 0xf4, 0xdc, 0x3e, 0x50, 0x49, 0xa9, 0x3b, 0xad, 0xce, 0xd0, 0x3e,
-  0x09, 0x89, 0x7c, 0xbe, 0x22, 0x0b, 0x23, 0x3e, 0x32, 0xe2, 0x6d, 0x3d,
-  0x93, 0x63, 0xa1, 0xbc, 0xd8, 0xbf, 0x2b, 0xbe, 0x03, 0x66, 0x85, 0xbd,
-  0xb1, 0x64, 0x05, 0xbf, 0x62, 0x73, 0x02, 0x3e, 0x64, 0xd3, 0x3f, 0xbe,
-  0x5b, 0xa5, 0x81, 0xbd, 0xac, 0xae, 0xfb, 0x3d, 0x14, 0xe3, 0x42, 0xbe,
-  0x82, 0xe4, 0x8c, 0xbe, 0x93, 0x8a, 0xfd, 0x3e, 0xd5, 0xc6, 0xdb, 0xbd,
-  0x3e, 0xb8, 0xdf, 0x3a, 0xec, 0x0c, 0x96, 0xbe, 0xcc, 0xe5, 0x00, 0xbe,
-  0xb9, 0x58, 0x06, 0x3e, 0x55, 0x01, 0x22, 0xbd, 0x71, 0x20, 0xa9, 0xbe,
-  0x5d, 0xec, 0xd8, 0xbd, 0x74, 0x9f, 0xe8, 0xbe, 0x5f, 0x04, 0x08, 0xbf,
-  0x33, 0x01, 0x59, 0xbe, 0x2f, 0xca, 0x3d, 0x3c, 0xd6, 0xba, 0x4e, 0xbd,
-  0xbe, 0xce, 0xbd, 0xbd, 0x1f, 0x27, 0xae, 0xbe, 0x00, 0x1d, 0x02, 0x3b,
-  0x64, 0x89, 0x51, 0xbe, 0xdc, 0xd5, 0x38, 0xbe, 0x6d, 0x74, 0x51, 0xbd,
-  0xec, 0xf8, 0x1d, 0xbe, 0xd2, 0xdc, 0x1d, 0xbe, 0xa5, 0x83, 0x47, 0xbe,
-  0x9e, 0x83, 0x2e, 0xbe, 0x4b, 0x5b, 0x5e, 0xbd, 0xbb, 0x71, 0x3d, 0xbe,
-  0x6e, 0x37, 0x01, 0xbc, 0x23, 0x82, 0xde, 0xbd, 0x75, 0x10, 0x3d, 0xbe,
-  0x4e, 0x0b, 0x6d, 0xbd, 0xad, 0xd3, 0x51, 0xbe, 0x40, 0xb4, 0x4e, 0x3d,
-  0xd0, 0x14, 0x46, 0xbd, 0x96, 0x9f, 0x14, 0x3b, 0x92, 0x6d, 0xaf, 0x3c,
-  0x83, 0x38, 0xe2, 0x3d, 0x0e, 0x19, 0xe3, 0xbd, 0x7c, 0xf9, 0x4a, 0xbe,
-  0x18, 0x3e, 0xef, 0x3c, 0xf6, 0xe1, 0x5f, 0xbd, 0x57, 0x00, 0x15, 0xbe,
-  0x62, 0xd6, 0xa2, 0x3d, 0xaf, 0xad, 0x39, 0x3b, 0x0a, 0x48, 0x6e, 0xbd,
-  0x93, 0xd2, 0xa8, 0xbd, 0x56, 0x66, 0x72, 0xbc, 0xc8, 0x72, 0x73, 0xbd,
-  0x09, 0xfd, 0xba, 0xbd, 0x46, 0xcd, 0x21, 0x3d, 0x27, 0xb5, 0xae, 0x3d,
-  0x16, 0xb6, 0x07, 0xbd, 0x5d, 0x77, 0xa7, 0x3d, 0xa9, 0x08, 0xdb, 0xbd,
-  0xfb, 0xca, 0x1e, 0xbe, 0xe3, 0x5f, 0xe6, 0x3c, 0x8c, 0x4e, 0xca, 0xbc,
-  0x6e, 0xf4, 0x4e, 0xbe, 0x15, 0x45, 0x28, 0x3d, 0x27, 0x81, 0x80, 0xbd,
-  0x7c, 0x6d, 0xb3, 0x3d, 0x42, 0xaf, 0xb8, 0xbd, 0x66, 0x4f, 0x9b, 0xbd,
-  0xc6, 0x85, 0xb3, 0x3d, 0x78, 0x3f, 0x13, 0xbe, 0xdd, 0x23, 0xe7, 0x3c,
-  0x21, 0x1f, 0x32, 0xbc, 0x5c, 0x0e, 0xde, 0x3d, 0xc3, 0xbc, 0x6d, 0x3c,
-  0x26, 0xe5, 0x21, 0xbe, 0x3d, 0xbe, 0x0a, 0xbc, 0x67, 0x62, 0x05, 0xbe,
-  0xb3, 0xda, 0xa7, 0x3d, 0x56, 0xe4, 0xad, 0xbd, 0xbb, 0xf0, 0xa0, 0xbd,
-  0xfa, 0x95, 0x27, 0x3d, 0x7f, 0x8b, 0x9f, 0xbd, 0xbf, 0xe7, 0x3e, 0xbe,
-  0xfe, 0xbc, 0x71, 0xbd, 0x03, 0x90, 0x3c, 0x3b, 0x78, 0x8d, 0x01, 0xbe,
-  0x43, 0x0b, 0x07, 0xbd, 0xcf, 0xa0, 0x05, 0xbe, 0xf3, 0x7d, 0xcd, 0x3d,
-  0xf3, 0xdb, 0x32, 0xba, 0x71, 0x00, 0xee, 0xbd, 0xfd, 0x8c, 0x1f, 0x3d,
-  0xd8, 0x98, 0xb2, 0xbd, 0x51, 0x9d, 0x68, 0x3d, 0x8b, 0x76, 0xa4, 0x3c,
-  0xeb, 0x34, 0x3f, 0xbe, 0x80, 0x0c, 0xeb, 0x3c, 0xef, 0x6f, 0x09, 0xbe,
-  0x04, 0x71, 0xb5, 0xbc, 0xfa, 0x53, 0x31, 0xbe, 0x94, 0xf8, 0x75, 0xbd,
-  0x93, 0x96, 0x73, 0xbd, 0xc6, 0x55, 0x25, 0xbe, 0x01, 0x35, 0x9b, 0xbd,
-  0x95, 0x30, 0x48, 0x3d, 0x6f, 0x3e, 0x7f, 0x3d, 0x81, 0x06, 0x23, 0xbe,
-  0x29, 0x41, 0x82, 0xbb, 0x4f, 0xda, 0x90, 0x3b, 0x17, 0x92, 0x67, 0xbc,
-  0x37, 0x1e, 0x30, 0xbe, 0x4b, 0x99, 0xb6, 0x3c, 0xe1, 0xdb, 0x2a, 0x3d,
-  0x74, 0x6d, 0x50, 0xbe, 0x48, 0xe0, 0x9e, 0x3d, 0xd3, 0x44, 0xce, 0xbb,
-  0xd4, 0xb7, 0x20, 0xbd, 0x0c, 0xaf, 0x10, 0xbe, 0x0c, 0xe1, 0x4e, 0x3d,
-  0xbe, 0xf9, 0xda, 0x3d, 0xde, 0x3a, 0xde, 0xbc, 0xd0, 0x93, 0x1b, 0x3d,
-  0x7f, 0x3a, 0xf3, 0xbd, 0xcc, 0x62, 0x9a, 0xbc, 0x91, 0x0a, 0x19, 0xbe,
-  0x47, 0x64, 0xe1, 0xbd, 0xe7, 0x66, 0xd4, 0xbd, 0x04, 0x6e, 0x06, 0xbe,
-  0x1a, 0xe8, 0x3e, 0xbd, 0xbd, 0xcd, 0x21, 0xbe, 0x6c, 0x13, 0x96, 0xbd,
-  0x62, 0xb7, 0x05, 0xbd, 0x6c, 0xda, 0xd6, 0xbd, 0x4c, 0x75, 0xe3, 0xbb,
-  0xe6, 0x13, 0x5f, 0x3d, 0xde, 0xcd, 0x03, 0xbd, 0x28, 0xc6, 0xcc, 0xbd,
-  0x73, 0x0b, 0xe2, 0x3c, 0x40, 0x3c, 0x0b, 0x3e, 0xed, 0xbc, 0xfd, 0xbd,
-  0x77, 0x82, 0x93, 0x3b, 0x79, 0xbe, 0x02, 0x3e, 0xa7, 0xa9, 0x39, 0xbe,
-  0x04, 0x0e, 0xa9, 0xbc, 0x62, 0x00, 0x07, 0x3c, 0x2a, 0x45, 0x9d, 0xbd,
-  0xd8, 0x19, 0xd3, 0x3d, 0x8d, 0x9f, 0xd5, 0xbc, 0xcc, 0x7c, 0x3c, 0xbe,
-  0xf1, 0x59, 0x82, 0x3a, 0x9f, 0x7c, 0x08, 0xbe, 0xfe, 0xb1, 0x2e, 0xbe,
-  0x00, 0x11, 0xbe, 0xbd, 0xb8, 0x0a, 0x57, 0x3d, 0x78, 0x76, 0xd0, 0x3d,
-  0xcb, 0x99, 0x55, 0xbd, 0x1e, 0x3f, 0x9d, 0x3c, 0xf9, 0xe4, 0x1f, 0xbd,
-  0x47, 0x65, 0x2a, 0x3d, 0x98, 0xaf, 0x32, 0xbe, 0xd9, 0xdb, 0x9b, 0xbd,
-  0x16, 0x03, 0x3d, 0x3d, 0x7e, 0x94, 0x96, 0x3d, 0x9c, 0xa5, 0x84, 0x3d,
-  0xea, 0x1e, 0xa4, 0xbd, 0x75, 0xea, 0x0e, 0xbd, 0x65, 0xa6, 0x8a, 0xbd,
-  0xce, 0xcb, 0xe2, 0xbd, 0xe2, 0xd0, 0x14, 0x3c, 0x94, 0x2b, 0x0d, 0x3c,
-  0xb0, 0x98, 0x96, 0x3c, 0x7b, 0x06, 0xf7, 0xbd, 0x0f, 0x9f, 0x99, 0xbd,
-  0x3a, 0xab, 0x81, 0x3d, 0xc8, 0x58, 0x4d, 0xbd, 0x47, 0xdc, 0x41, 0xbe,
-  0xcc, 0x1a, 0xd5, 0xbd, 0x5d, 0xd0, 0xbf, 0xbc, 0x4f, 0xb5, 0xdd, 0xbc,
-  0xba, 0x42, 0x7c, 0x3d, 0x82, 0x2a, 0x5d, 0xbe, 0x69, 0xcc, 0x0c, 0x3d,
-  0x03, 0x0e, 0x1d, 0xbc, 0x08, 0x62, 0x26, 0xbe, 0xcf, 0x3a, 0x69, 0x3d,
-  0x0f, 0x37, 0x0c, 0x3b, 0x1f, 0xd6, 0x00, 0xbe, 0x8b, 0x1b, 0xfb, 0x3b,
-  0xd7, 0x48, 0x0a, 0xbe, 0xe8, 0xed, 0xf2, 0xbd, 0xb5, 0x73, 0x82, 0xbe,
-  0x72, 0x26, 0xc8, 0xbc, 0xe3, 0xa4, 0x08, 0x3d, 0x49, 0xbd, 0x79, 0x3d,
-  0xcc, 0xaf, 0x68, 0xbd, 0x4a, 0x79, 0xce, 0xbd, 0x1c, 0x2f, 0xe3, 0xbd,
-  0xf5, 0x22, 0x5d, 0x3c, 0x03, 0xfd, 0x16, 0xbe, 0x90, 0x73, 0x9e, 0xbb,
-  0xaa, 0x00, 0xd2, 0xbd, 0x5d, 0x67, 0x0e, 0xbd, 0x13, 0x75, 0xa6, 0xbc,
-  0x77, 0x8a, 0x5d, 0xbd, 0x2f, 0xb0, 0x14, 0xbe, 0xca, 0xd1, 0x82, 0xbd,
-  0x62, 0x82, 0xf5, 0xbd, 0x1a, 0x49, 0x38, 0xbc, 0x3f, 0xcd, 0xa8, 0x3c,
-  0x25, 0x82, 0xdf, 0xbd, 0xef, 0x74, 0x2a, 0xbd, 0x0d, 0x65, 0x45, 0xbe,
-  0xe2, 0x98, 0x70, 0xbe, 0x19, 0xe7, 0xf9, 0xbd, 0x14, 0xc2, 0xa7, 0xbc,
-  0x0c, 0x41, 0x88, 0x3d, 0x11, 0xdc, 0x93, 0xbd, 0x7b, 0x88, 0xa7, 0x3c,
-  0xad, 0x5a, 0xad, 0xbd, 0xe0, 0x30, 0x3f, 0xbd, 0x3d, 0x38, 0xbf, 0xbd,
-  0x53, 0x11, 0x87, 0x3d, 0x09, 0x50, 0x71, 0xbc, 0xbb, 0x0a, 0x2f, 0x3c,
-  0x07, 0x2a, 0xa4, 0x3d, 0x93, 0x65, 0xae, 0xbb, 0xeb, 0xbd, 0x11, 0x3d,
-  0x83, 0xb3, 0xd4, 0xbd, 0xca, 0x7b, 0xa9, 0xbd, 0xf3, 0x58, 0xdc, 0xbd,
-  0x3c, 0x78, 0x37, 0xbe, 0xb7, 0x9c, 0x01, 0xbe, 0x0e, 0xb9, 0x2d, 0xbe,
-  0xc1, 0x26, 0x3d, 0x3c, 0xb6, 0x82, 0x56, 0x3c, 0xfa, 0x8a, 0x83, 0xbd,
-  0xe1, 0xea, 0x54, 0xbe, 0x25, 0x99, 0x5d, 0x3b, 0x35, 0xd7, 0x81, 0x3c,
-  0x47, 0x23, 0x57, 0xbe, 0xae, 0x9f, 0x12, 0xbe, 0x70, 0x8a, 0x90, 0xbd,
-  0x8c, 0x8c, 0x90, 0x3d, 0x8a, 0xab, 0x08, 0xbc, 0x6d, 0xe6, 0xff, 0xbc,
-  0x49, 0xd6, 0x4b, 0xbe, 0xce, 0x0a, 0x03, 0xbe, 0xec, 0x92, 0x55, 0xbd,
-  0x53, 0xd9, 0x58, 0x3c, 0xda, 0x71, 0x4a, 0x3d, 0x83, 0x8e, 0xd0, 0xbc,
-  0x4c, 0xa3, 0xb9, 0xbd, 0x64, 0x98, 0xbe, 0xbe, 0x7b, 0x90, 0x41, 0x3c,
-  0x35, 0xd0, 0x07, 0xbe, 0x25, 0x75, 0x17, 0xbf, 0xf0, 0xbc, 0x59, 0xbd,
-  0x8c, 0xd5, 0x09, 0xbe, 0x31, 0xed, 0xac, 0x3e, 0x0d, 0xae, 0x37, 0xbe,
-  0xe0, 0x63, 0xc1, 0x3c, 0xbc, 0x5f, 0x3b, 0xbe, 0x7d, 0x19, 0x1f, 0xbf,
-  0xba, 0x8d, 0xa0, 0xbd, 0xee, 0x0e, 0x4a, 0xbd, 0xf2, 0x50, 0xab, 0x3e,
-  0x1f, 0x9d, 0x39, 0x3e, 0x00, 0x99, 0x1b, 0xbd, 0xfb, 0x90, 0x9c, 0xbe,
-  0xf2, 0xff, 0x0e, 0xbe, 0x0a, 0xde, 0x28, 0x3d, 0x11, 0xf9, 0xb4, 0xbd,
-  0x78, 0x38, 0x80, 0xbd, 0xc8, 0x49, 0x02, 0xbe, 0xe0, 0xb6, 0x2d, 0x3e,
-  0xa1, 0x09, 0x93, 0xbd, 0xeb, 0xce, 0x0c, 0xbf, 0xe5, 0xc3, 0x63, 0xbe,
-  0xfa, 0x55, 0x81, 0x3d, 0x70, 0x7c, 0x98, 0xbe, 0x18, 0xb3, 0xaa, 0xbe,
-  0xa2, 0xab, 0x32, 0xbd, 0x36, 0x7c, 0x4e, 0x3e, 0xb8, 0x9e, 0x29, 0x3d,
-  0xf9, 0xd3, 0xdc, 0xbd, 0x2e, 0x1b, 0xac, 0xbe, 0x87, 0x2c, 0xa5, 0xbc,
-  0xfe, 0x34, 0xac, 0x3d, 0x67, 0x24, 0xc9, 0xbd, 0xdd, 0x28, 0x07, 0xbe,
-  0x9b, 0x67, 0x79, 0x3e, 0x34, 0xc0, 0x6e, 0xbe, 0x16, 0x73, 0x33, 0xbf,
-  0xcf, 0x8b, 0x19, 0xbd, 0xfa, 0xcb, 0x8f, 0x3d, 0x2b, 0x37, 0x99, 0xbe,
-  0x78, 0x90, 0xbb, 0xbd, 0x22, 0xb2, 0xc3, 0xbe, 0xeb, 0xd9, 0x6e, 0xbd,
-  0xc0, 0x7b, 0xab, 0xbb, 0x8b, 0x70, 0x04, 0x3e, 0x8c, 0x9a, 0xda, 0x3c,
-  0xc0, 0x13, 0x4d, 0xbe, 0x34, 0xf5, 0xb3, 0x3d, 0xbe, 0x31, 0x2d, 0xbe,
-  0xda, 0xf7, 0x93, 0xbd, 0x60, 0xfb, 0x8b, 0x3e, 0x0e, 0xae, 0x21, 0xbc,
-  0x8e, 0x7d, 0xb7, 0xbe, 0xa2, 0x37, 0xe7, 0x3d, 0x16, 0x8a, 0x2e, 0xbe,
-  0xa2, 0xa9, 0x32, 0x3d, 0x23, 0x15, 0x81, 0x3e, 0xb9, 0x81, 0x28, 0x3e,
-  0xc3, 0xc1, 0x52, 0x3b, 0xa4, 0xe2, 0x00, 0x3d, 0xea, 0xa0, 0x0a, 0x3e,
-  0xee, 0x8c, 0xd8, 0x3d, 0x95, 0xce, 0x09, 0xbe, 0x58, 0xf5, 0x39, 0xbe,
-  0x0c, 0xb5, 0x3c, 0x3e, 0x6a, 0x34, 0x3c, 0xbe, 0xdf, 0x9d, 0x54, 0x3e,
-  0x2a, 0xfb, 0xe8, 0x3d, 0x80, 0x51, 0x47, 0xbe, 0x6e, 0xe5, 0xb8, 0x3d,
-  0x1c, 0x48, 0x07, 0x3e, 0xa6, 0x79, 0x26, 0x3e, 0x46, 0x02, 0x04, 0x3e,
-  0xe6, 0x61, 0xf8, 0x3d, 0x5d, 0x43, 0x7c, 0x3d, 0x9f, 0xab, 0x1d, 0xbe,
-  0xc3, 0x37, 0x38, 0x3e, 0x76, 0x01, 0x6d, 0xbe, 0x56, 0xc2, 0xa7, 0xbd,
-  0xde, 0xa0, 0x55, 0xbd, 0xbb, 0xda, 0xd0, 0x3d, 0xa3, 0xa4, 0x38, 0xbc,
-  0x4e, 0x49, 0x10, 0x3e, 0xa6, 0x31, 0xa4, 0xbd, 0x03, 0x72, 0x93, 0xbe,
-  0x6f, 0x0d, 0x86, 0xbd, 0x05, 0x7f, 0xa1, 0x3e, 0xda, 0x61, 0xc2, 0x3d,
-  0xe5, 0x8d, 0xb2, 0x3d, 0x4b, 0x95, 0x46, 0x3d, 0xea, 0x15, 0x20, 0xbd,
-  0xaf, 0x63, 0x96, 0xbd, 0x41, 0x67, 0x5e, 0x3d, 0x1c, 0x81, 0x14, 0xbf,
-  0xb4, 0xfc, 0xcf, 0xbd, 0xe0, 0x2d, 0x79, 0x3c, 0x3e, 0x06, 0x84, 0x3d,
-  0x5f, 0x90, 0x85, 0x3d, 0xba, 0xb2, 0x6a, 0x3c, 0x0c, 0x53, 0x47, 0x3d,
-  0x59, 0x81, 0x17, 0xbe, 0x6b, 0x35, 0x05, 0x3d, 0xed, 0xbb, 0x12, 0x3e,
-  0xb1, 0x03, 0x70, 0x3d, 0x28, 0xf3, 0x19, 0x3e, 0xaa, 0x14, 0x9d, 0x3d,
-  0xe5, 0x3c, 0xc7, 0xbd, 0x45, 0xe5, 0x42, 0xbd, 0xb8, 0xf0, 0x92, 0xb9,
-  0xd0, 0xf3, 0xa7, 0xbe, 0xdc, 0x6a, 0x01, 0xbe, 0xeb, 0xb2, 0xfd, 0xbd,
-  0x8b, 0x73, 0x61, 0x3d, 0xa5, 0x00, 0x86, 0x3d, 0x81, 0x66, 0xd5, 0x3d,
-  0xfb, 0xdb, 0x27, 0x3e, 0xd1, 0xd1, 0x18, 0xbe, 0x31, 0xcc, 0x89, 0x3d,
-  0x6b, 0xb9, 0x72, 0x3e, 0x1f, 0x4f, 0x62, 0x3d, 0x8b, 0x57, 0x02, 0x3e,
-  0xc7, 0x56, 0x0c, 0xbd, 0x21, 0x4a, 0x74, 0xbe, 0xcf, 0x0b, 0x6c, 0xbd,
-  0xd3, 0xc8, 0x95, 0x3d, 0xf8, 0xaf, 0x35, 0xbe, 0xae, 0xe1, 0xb4, 0xbd,
-  0x49, 0x6e, 0x66, 0xbd, 0x37, 0x2f, 0xc2, 0x3d, 0x39, 0xc8, 0xe0, 0x3b,
-  0xfe, 0x09, 0x94, 0xbd, 0xfc, 0x43, 0x4b, 0x3d, 0xec, 0x5a, 0x66, 0xbe,
-  0xbb, 0x44, 0x52, 0x3c, 0xa6, 0x28, 0x88, 0x3e, 0x26, 0x58, 0x35, 0x3e,
-  0xd4, 0x3f, 0xf6, 0x3d, 0x66, 0x6c, 0x6b, 0xbd, 0x2b, 0x07, 0x8b, 0xbd,
-  0xea, 0x1e, 0x0d, 0x3d, 0x81, 0x04, 0x08, 0x3e, 0x83, 0x25, 0x6c, 0xbe,
-  0xb2, 0xdd, 0xf6, 0xbc, 0x2e, 0x25, 0x17, 0xbd, 0x85, 0xfc, 0xc2, 0x3d,
-  0xc2, 0x55, 0x74, 0x3d, 0x6a, 0x9d, 0x34, 0xbd, 0x47, 0x35, 0xf9, 0x3d,
-  0x1f, 0xbd, 0x76, 0xbe, 0xfe, 0x3e, 0x76, 0x3d, 0x4c, 0x4a, 0x56, 0x3e,
-  0x34, 0x8f, 0xb9, 0x3d, 0xa6, 0xf1, 0x35, 0x3e, 0x19, 0xe1, 0xe6, 0xbc,
-  0xa4, 0x8d, 0xb1, 0xbd, 0x07, 0xca, 0x57, 0x3b, 0x8b, 0xef, 0x06, 0x3d,
-  0x7d, 0x4b, 0x5b, 0xbe, 0xcf, 0xc6, 0xab, 0xbd, 0x96, 0xb9, 0xac, 0x3c,
-  0xe1, 0x6e, 0xc7, 0x3d, 0xd5, 0x7e, 0x10, 0xbd, 0x21, 0x0e, 0xfe, 0xbc,
-  0x09, 0xb9, 0x60, 0xbd, 0xfd, 0xd5, 0x42, 0xbe, 0x28, 0xa1, 0x1e, 0x3d,
-  0xfc, 0x72, 0x48, 0x3e, 0x5e, 0xca, 0xf5, 0x3d, 0x33, 0x7a, 0xd1, 0x3d,
-  0x50, 0x29, 0xc4, 0x3b, 0x05, 0x59, 0xed, 0xbc, 0xa5, 0xf1, 0x84, 0xbd,
-  0x12, 0x18, 0xa6, 0x3d, 0x62, 0xd5, 0x47, 0xbe, 0xef, 0x48, 0xb1, 0x3c,
-  0x26, 0x74, 0x40, 0x39, 0xbe, 0x73, 0x3e, 0x3d, 0x3e, 0xaa, 0x8a, 0xbc,
-  0x50, 0x94, 0x83, 0xbe, 0x48, 0x6c, 0xa4, 0x3d, 0xf3, 0x42, 0x35, 0xbe,
-  0x64, 0xd1, 0xd8, 0x3c, 0xb3, 0x43, 0x66, 0x3e, 0x1f, 0xd1, 0xde, 0x3d,
-  0xd6, 0x0a, 0x02, 0x3e, 0x6e, 0x7f, 0xf0, 0xbd, 0xb7, 0xc5, 0x8f, 0x3d,
-  0x64, 0x71, 0xb0, 0xbd, 0x89, 0xc7, 0xb4, 0x3d, 0xd7, 0x54, 0xa7, 0xbd,
-  0x68, 0xca, 0x11, 0xbd, 0x17, 0xa1, 0x1f, 0xbe, 0xc8, 0xb8, 0x20, 0x3e,
-  0x01, 0x2f, 0xd6, 0x3c, 0x47, 0xd6, 0x44, 0xbe, 0x85, 0x0f, 0x88, 0x3a,
-  0x81, 0xb9, 0x06, 0xbe, 0xd0, 0x5c, 0x37, 0xbe, 0xe0, 0xa5, 0x40, 0x3e,
-  0xc0, 0x6d, 0xff, 0x3d, 0xce, 0x49, 0x9f, 0x3e, 0x3c, 0xf0, 0x5b, 0xbe,
-  0xea, 0x3e, 0x4c, 0xbe, 0x23, 0x3e, 0xaa, 0x3d, 0xeb, 0xd9, 0xd3, 0x3c,
-  0xc4, 0x42, 0x19, 0xbe, 0x25, 0x3c, 0xb7, 0x3d, 0x4b, 0x31, 0xe2, 0xbe,
-  0xcb, 0xe8, 0x79, 0xbe, 0x66, 0xab, 0x79, 0xbd, 0xb3, 0xd9, 0xb9, 0xbd,
-  0xf8, 0x6d, 0x6b, 0x3d, 0x0e, 0x54, 0xe6, 0xbb, 0x42, 0x0a, 0xbd, 0xbe,
-  0x2c, 0xbb, 0x16, 0x3e, 0x04, 0xec, 0x7c, 0xbe, 0x89, 0x5b, 0x38, 0x3e,
-  0xc3, 0x0a, 0x06, 0xbe, 0x0b, 0xd8, 0x53, 0xbc, 0x0e, 0xb3, 0xa4, 0x3d,
-  0x73, 0x1f, 0x43, 0x3d, 0x86, 0xcc, 0xee, 0xbd, 0x7d, 0x44, 0x21, 0x3e,
-  0x50, 0xf0, 0x3a, 0xbe, 0x28, 0xab, 0x25, 0xbe, 0x26, 0x80, 0x13, 0x3d,
-  0xc8, 0xf3, 0x5d, 0xbe, 0x13, 0xcc, 0x35, 0x3d, 0x8d, 0xfb, 0xc4, 0xbc,
-  0xe1, 0x74, 0xfd, 0xbd, 0x74, 0x3d, 0x45, 0xbe, 0x1f, 0xe9, 0x89, 0x3d,
-  0x37, 0x6b, 0x87, 0xbc, 0x38, 0xc1, 0xa5, 0xbe, 0x8d, 0x87, 0x96, 0x3d,
-  0xd7, 0x71, 0x4b, 0x3d, 0xc0, 0x06, 0xaf, 0xbc, 0xc1, 0x01, 0x82, 0xbc,
-  0xc6, 0x74, 0x25, 0x3e, 0xd4, 0x7f, 0xdc, 0xbe, 0xc7, 0x90, 0x58, 0xbe,
-  0xfd, 0x6e, 0x44, 0x3d, 0x1f, 0x60, 0xa1, 0xbe, 0x5a, 0x32, 0x0b, 0x3e,
-  0x13, 0xed, 0xe6, 0xbe, 0x03, 0x2b, 0x70, 0xbe, 0xcf, 0xb2, 0xf2, 0xbe,
-  0x4e, 0x04, 0x81, 0xbd, 0xd4, 0x71, 0xa0, 0xbe, 0x72, 0xef, 0x2c, 0xbe,
-  0x4b, 0xaa, 0xa8, 0xbd, 0xfe, 0xe6, 0xee, 0x3d, 0x6f, 0xf3, 0xf1, 0xbd,
-  0x50, 0x70, 0xd7, 0x3d, 0x51, 0xed, 0xb3, 0x3d, 0xc2, 0xf7, 0xed, 0xbd,
-  0x70, 0x9f, 0x2e, 0xbe, 0xf7, 0xaf, 0x02, 0x3e, 0x46, 0x28, 0x8c, 0xbe,
-  0xec, 0x68, 0x00, 0xbe, 0xf3, 0x92, 0x1c, 0xbf, 0x33, 0x34, 0x78, 0xbe,
-  0x8b, 0xf4, 0x45, 0xbf, 0x25, 0xc3, 0xb6, 0xbd, 0x36, 0xa3, 0xc4, 0xbe,
-  0x72, 0xc4, 0xd5, 0xbd, 0xd6, 0x1e, 0xc9, 0x3d, 0x95, 0xca, 0x45, 0x3d,
-  0x07, 0x09, 0xc5, 0xbd, 0xef, 0x4a, 0x1b, 0xbd, 0x3b, 0xf1, 0x07, 0xbe,
-  0x07, 0x95, 0x93, 0xba, 0x89, 0xf0, 0x71, 0xbc, 0x73, 0x86, 0x3f, 0x3e,
-  0xba, 0xcf, 0x30, 0x3c, 0xf2, 0x57, 0x64, 0xbe, 0x56, 0x43, 0xd7, 0xbb,
-  0xc0, 0xf1, 0xa6, 0xbc, 0xa5, 0x1d, 0xca, 0xbd, 0xea, 0xf1, 0x78, 0xbd,
-  0x72, 0x47, 0x7e, 0xbd, 0x30, 0xb4, 0x09, 0x3e, 0xfc, 0x3b, 0x01, 0x3c,
-  0x13, 0x7a, 0x71, 0x3d, 0x58, 0x1e, 0xe7, 0xbd, 0xd0, 0x79, 0xc7, 0x3d,
-  0x13, 0xf4, 0x32, 0xbd, 0xf0, 0x78, 0x36, 0xbd, 0xe4, 0x65, 0x0a, 0x3d,
-  0xf4, 0xd3, 0x3e, 0x3c, 0xe5, 0x1e, 0x7b, 0xbe, 0xb5, 0x85, 0x22, 0xbe,
-  0xd3, 0xd8, 0xc8, 0x3d, 0x50, 0x94, 0x88, 0xbb, 0x90, 0xf3, 0x1c, 0x3e,
-  0xa9, 0x49, 0x39, 0x3c, 0x33, 0xb0, 0x57, 0xbe, 0x75, 0x6c, 0x98, 0x3d,
-  0x73, 0x93, 0x0f, 0xbe, 0x6a, 0x9f, 0x53, 0xbd, 0xa5, 0x5b, 0xee, 0x3d,
-  0x13, 0xca, 0x27, 0xbd, 0x13, 0x44, 0x2f, 0x3c, 0xa8, 0x58, 0x80, 0xbd,
-  0x98, 0x53, 0x9d, 0x3b, 0x1f, 0xfe, 0x89, 0x3d, 0x4e, 0x8e, 0x8e, 0xbe,
-  0x53, 0x37, 0x0e, 0xbc, 0x36, 0xac, 0x4e, 0x3e, 0x6a, 0x05, 0xeb, 0xbc,
-  0x9e, 0xbe, 0x9e, 0xbe, 0x31, 0x25, 0xcb, 0xbd, 0xde, 0x47, 0x3f, 0xbe,
-  0x8a, 0xfc, 0x0a, 0x3d, 0xd3, 0x47, 0xd9, 0x3d, 0x24, 0x83, 0x3c, 0xbd,
-  0xfb, 0x97, 0x99, 0xbc, 0x3f, 0xa7, 0x3b, 0x3e, 0xcc, 0x40, 0xdc, 0xbc,
-  0xdf, 0xab, 0x69, 0xbd, 0xee, 0x8a, 0x1e, 0xbc, 0xd9, 0x24, 0x53, 0x3d,
-  0x1c, 0xc1, 0x2a, 0xbd, 0xee, 0xf3, 0x73, 0x3d, 0xb2, 0x7e, 0x86, 0x3d,
-  0xe7, 0x74, 0xd1, 0xbd, 0x0e, 0x57, 0x87, 0xbe, 0xa0, 0x85, 0xf7, 0xbd,
-  0xea, 0xc0, 0x93, 0xbe, 0x33, 0xa2, 0x83, 0x3d, 0xdf, 0x5c, 0x00, 0x3d,
-  0x0d, 0xa4, 0x4c, 0x3d, 0x86, 0x7e, 0x11, 0xbd, 0x3b, 0x90, 0x2f, 0xbb,
-  0x89, 0x6c, 0x52, 0xbd, 0xd8, 0x3a, 0x0c, 0xbc, 0xec, 0xd4, 0xb9, 0xbd,
-  0x24, 0xa1, 0xb8, 0x3d, 0x8a, 0x98, 0x5b, 0xbd, 0xdb, 0x89, 0xdb, 0x3c,
-  0x26, 0x16, 0xe9, 0x3d, 0x69, 0x44, 0x3b, 0xbd, 0x3a, 0x36, 0xb5, 0xbd,
-  0x44, 0xc1, 0x86, 0xbd, 0x08, 0x18, 0xec, 0xbd, 0x07, 0x39, 0x85, 0x3d,
-  0xc5, 0x3e, 0xcc, 0x3d, 0x49, 0xc9, 0xa7, 0xbc, 0xe9, 0x9a, 0x23, 0x3d,
-  0xc8, 0xe6, 0xc6, 0xbd, 0xbd, 0x50, 0xb0, 0xba, 0x55, 0x49, 0xd1, 0xbc,
-  0xb2, 0x93, 0xb5, 0xbc, 0x23, 0x5b, 0x29, 0x3d, 0x6a, 0x3d, 0x11, 0xbe,
-  0xe9, 0xd6, 0xba, 0x3d, 0x78, 0x03, 0x94, 0x3d, 0x1c, 0x78, 0x8e, 0xbd,
-  0x03, 0xc4, 0x87, 0xbc, 0x22, 0x61, 0xc4, 0xbd, 0xab, 0x41, 0x04, 0xbe,
-  0x85, 0xb2, 0x18, 0x3d, 0x22, 0xc3, 0x40, 0x3d, 0x23, 0x28, 0x20, 0x3d,
-  0xc9, 0x6c, 0xcd, 0x3c, 0x53, 0xe0, 0x49, 0x3d, 0xf5, 0x46, 0xc9, 0xbc,
-  0x4e, 0xa0, 0x6e, 0x3c, 0x38, 0x20, 0x51, 0x3c, 0xd2, 0x33, 0xa2, 0x3c,
-  0xf3, 0x9a, 0x00, 0xbe, 0x1f, 0x82, 0x90, 0x3d, 0xfa, 0x40, 0x23, 0x3e,
-  0xfe, 0xbe, 0xa2, 0xbb, 0x18, 0xb3, 0x00, 0xbe, 0x3a, 0x7a, 0x2c, 0xbd,
-  0x49, 0x85, 0x02, 0xbe, 0xe8, 0x7d, 0x9b, 0x3c, 0x3e, 0x7f, 0xb4, 0xbc,
-  0xee, 0x5f, 0xeb, 0x3d, 0x2c, 0x4f, 0x91, 0x3d, 0x5b, 0xbd, 0x08, 0x3d,
-  0x66, 0xb9, 0x0b, 0x3d, 0x1f, 0xbc, 0xce, 0x3c, 0x8e, 0x47, 0x80, 0xbc,
-  0x87, 0xd3, 0x30, 0x3c, 0x64, 0x2c, 0x95, 0x3d, 0xe4, 0x42, 0x0d, 0x3e,
-  0x5a, 0x05, 0x39, 0x3d, 0xa2, 0x5e, 0x48, 0x3d, 0x9c, 0x87, 0xd0, 0xbd,
-  0x93, 0x76, 0xc3, 0xbb, 0x8c, 0xcd, 0xda, 0xbc, 0x6e, 0x25, 0x0a, 0xbd,
-  0xff, 0x03, 0x9a, 0xbd, 0x40, 0x67, 0xd9, 0xbd, 0xbf, 0x7e, 0xd2, 0x3c,
-  0x35, 0x27, 0xb2, 0x3c, 0x09, 0x81, 0x48, 0xbc, 0x80, 0x43, 0xab, 0x3c,
-  0xcc, 0xff, 0xd2, 0xbc, 0xf8, 0x70, 0x20, 0xbe, 0xd9, 0x99, 0x2d, 0x3e,
-  0x75, 0x9e, 0x77, 0x3e, 0x85, 0x2c, 0x67, 0x3d, 0xbb, 0xcd, 0x75, 0x3d,
-  0xde, 0x1f, 0x07, 0xbe, 0x35, 0x92, 0x7b, 0xbc, 0x4e, 0x22, 0x4f, 0x3c,
-  0xc0, 0x9f, 0x58, 0xbd, 0x78, 0x05, 0x0d, 0xbe, 0x5c, 0x89, 0x8f, 0xbf,
-  0x2e, 0x4e, 0xdb, 0xbd, 0xab, 0x6a, 0xe5, 0x3d, 0x2f, 0x9f, 0x07, 0xbe,
-  0x7d, 0xf6, 0x56, 0xbd, 0xb9, 0xa5, 0x69, 0xbe, 0x5d, 0xbc, 0x2f, 0xbe,
-  0xb5, 0x20, 0x72, 0x3e, 0x45, 0xfc, 0x8a, 0x3e, 0x68, 0x35, 0x55, 0x3d,
-  0xd4, 0x4c, 0x38, 0x3e, 0x95, 0x44, 0x92, 0xbd, 0xc9, 0xe9, 0x0b, 0xbd,
-  0x3f, 0xdc, 0x1a, 0xbe, 0x0c, 0xc1, 0x54, 0x3e, 0xfb, 0xb2, 0xfd, 0xbe,
-  0xe6, 0xa0, 0xba, 0xbf, 0x21, 0x2f, 0x07, 0xbe, 0xb1, 0x51, 0x86, 0x3e,
-  0xb7, 0x31, 0x12, 0xbe, 0x4d, 0x5b, 0x0a, 0xbf, 0xb6, 0xec, 0xd9, 0x3d,
-  0x89, 0x5b, 0x19, 0xbf, 0x1a, 0x70, 0x3f, 0x3f, 0x65, 0x22, 0xc9, 0xbe,
-  0x87, 0x1c, 0x58, 0x3b, 0x1b, 0x6d, 0xa2, 0x3e, 0xf6, 0x2f, 0xc0, 0xbd,
-  0x06, 0x5f, 0x0e, 0x3e, 0xf0, 0x16, 0x54, 0x3e, 0xd0, 0x79, 0x31, 0x3e,
-  0xb9, 0x68, 0x6a, 0x3e, 0x26, 0x84, 0x24, 0x3d, 0x4b, 0x40, 0x5b, 0xbd,
-  0xa9, 0xa8, 0x2e, 0x3e, 0x5b, 0x65, 0x43, 0x3e, 0x86, 0x43, 0x82, 0x3e,
-  0x7e, 0x3c, 0x41, 0x3e, 0x29, 0x09, 0x62, 0x3c, 0x29, 0xcb, 0xb6, 0x3a,
-  0xd8, 0xf4, 0x99, 0x3e, 0x27, 0x0a, 0x06, 0x3e, 0x10, 0x0e, 0xae, 0x3e,
-  0x78, 0x10, 0x95, 0x3e, 0x27, 0x75, 0x31, 0x3e, 0x66, 0x54, 0xc5, 0x3d,
-  0x36, 0xc8, 0x62, 0x3e, 0x34, 0xd6, 0xbc, 0x3e, 0x02, 0x85, 0x0a, 0x3d,
-  0x32, 0x12, 0x23, 0xbd, 0x60, 0x5b, 0x1d, 0xbe, 0xeb, 0x9f, 0xda, 0x3d,
-  0x3e, 0x15, 0x39, 0xbd, 0x61, 0x1c, 0xfc, 0xbd, 0x0e, 0x70, 0x8a, 0x3d,
-  0x5b, 0x53, 0x81, 0xbc, 0x79, 0xb3, 0xce, 0x3e, 0x8e, 0xd0, 0x00, 0x3d,
-  0xa8, 0x53, 0x3a, 0x3e, 0xb0, 0x90, 0x8a, 0xbc, 0x44, 0x48, 0xd7, 0xbd,
-  0x83, 0xc7, 0x98, 0xbd, 0xa2, 0x56, 0xfa, 0xbd, 0x04, 0xe7, 0x84, 0x3e,
-  0x1f, 0xe2, 0x09, 0x3d, 0xaf, 0xf4, 0xa4, 0xbd, 0x73, 0x9d, 0x90, 0xbe,
-  0xfb, 0xc1, 0x05, 0x3e, 0x00, 0xbe, 0xe8, 0xbd, 0xa2, 0x67, 0x8b, 0xbe,
-  0xab, 0xd4, 0xb3, 0x3d, 0x9d, 0x70, 0x8e, 0x3d, 0x99, 0xb3, 0xb7, 0x3d,
-  0x4b, 0x5b, 0x98, 0xbd, 0x12, 0xa0, 0x94, 0x3d, 0xcb, 0xbb, 0x86, 0xbd,
-  0xfb, 0x28, 0x2c, 0xbd, 0xc9, 0xb2, 0x5a, 0xbe, 0x89, 0x8e, 0x28, 0xbe,
-  0xdf, 0x97, 0xad, 0x3d, 0x2e, 0x58, 0x55, 0x3c, 0xf4, 0xee, 0xf4, 0xbc,
-  0x2d, 0x3f, 0x97, 0xbe, 0x47, 0x80, 0xbf, 0x3d, 0x70, 0x4f, 0x19, 0xbd,
-  0x71, 0xec, 0x22, 0xbe, 0x2d, 0x78, 0xca, 0x3c, 0x8c, 0x65, 0xea, 0xbc,
-  0x4c, 0xdb, 0x5b, 0xbc, 0x81, 0x89, 0x85, 0xbd, 0x5a, 0xb0, 0x7d, 0x3d,
-  0x94, 0x9b, 0x42, 0xbe, 0xbe, 0x35, 0x3f, 0x3d, 0xd5, 0xa1, 0x5c, 0xbe,
-  0xbf, 0x73, 0xae, 0xbe, 0x3b, 0x05, 0x03, 0x3d, 0x88, 0x3e, 0x7e, 0xba,
-  0x75, 0x14, 0xf4, 0xbc, 0x3b, 0x68, 0x31, 0xbe, 0x8e, 0x1c, 0xdd, 0x3d,
-  0x66, 0x27, 0x54, 0xbc, 0x0c, 0x25, 0xc9, 0xbd, 0xf6, 0xd4, 0x08, 0xbb,
-  0x86, 0xcb, 0x7c, 0xbd, 0x9a, 0xad, 0x04, 0xbe, 0xe7, 0x95, 0x19, 0xbe,
-  0x63, 0x65, 0x7f, 0x3b, 0xed, 0x1a, 0xba, 0xbd, 0x80, 0xfc, 0xf5, 0x3c,
-  0x88, 0xe4, 0x8e, 0xbe, 0x7c, 0xfe, 0x36, 0xbe, 0x5b, 0x6a, 0x2f, 0xbd,
-  0x56, 0xa0, 0xff, 0x3c, 0xff, 0x6e, 0x8d, 0x3c, 0x60, 0xd1, 0xb6, 0xbe,
-  0x4b, 0xef, 0x09, 0x3e, 0x96, 0x01, 0x14, 0xbc, 0x84, 0xd1, 0x81, 0xbe,
-  0xfc, 0x3e, 0x66, 0x3d, 0x09, 0x9a, 0xa5, 0x3d, 0xa7, 0x66, 0x31, 0xbe,
-  0xc3, 0x77, 0x88, 0xbe, 0xd8, 0x9f, 0x26, 0x3d, 0xed, 0xd5, 0x49, 0xbe,
-  0xb2, 0xfa, 0x02, 0xbe, 0x2d, 0x02, 0x7b, 0xbe, 0xbd, 0xbe, 0x6b, 0xbe,
-  0xfa, 0x78, 0x0e, 0x3e, 0x77, 0xae, 0x0f, 0x3c, 0x56, 0xaf, 0xa0, 0xbd,
-  0x6a, 0xe5, 0xd3, 0xbe, 0xa1, 0x98, 0xf0, 0x3d, 0xb6, 0x0c, 0xc1, 0xbd,
-  0xd2, 0xf0, 0x7d, 0xbe, 0x14, 0xdb, 0x61, 0xbc, 0xf8, 0x15, 0x23, 0xbb,
-  0xb0, 0x61, 0xc7, 0xbb, 0xf8, 0x55, 0xf6, 0xbd, 0x7d, 0x14, 0x9b, 0x3c,
-  0xf0, 0x22, 0x1c, 0xbe, 0x3b, 0x91, 0xa3, 0xbd, 0xca, 0xcc, 0x68, 0xbe,
-  0x59, 0x2e, 0x93, 0xbe, 0x92, 0xd0, 0x67, 0xbc, 0x73, 0x3d, 0x73, 0x3b,
-  0xc5, 0xdb, 0x0c, 0xbe, 0xc1, 0x62, 0x6e, 0xbe, 0x79, 0xaf, 0x30, 0x3e,
-  0xa6, 0x17, 0x03, 0xbe, 0x43, 0x18, 0x50, 0xbe, 0x1f, 0x1f, 0x5c, 0xbc,
-  0x0a, 0x2b, 0x55, 0x3e, 0x64, 0xa9, 0x88, 0x3c, 0xc8, 0x17, 0x00, 0xbe,
-  0x58, 0x18, 0x9c, 0xbc, 0x72, 0x50, 0x62, 0xbe, 0xf6, 0x19, 0xa7, 0xbd,
-  0x9c, 0x40, 0x82, 0xbe, 0xf1, 0x3b, 0xf7, 0xbd, 0xbd, 0x82, 0xb2, 0x3d,
-  0x8a, 0x07, 0xbd, 0xba, 0x32, 0x14, 0xa1, 0xbd, 0x85, 0x2a, 0x7b, 0xbe,
-  0x1f, 0xae, 0x06, 0x3d, 0xf1, 0xdc, 0x19, 0xbe, 0xde, 0x44, 0x72, 0xbe,
-  0x7d, 0x76, 0x1e, 0xbd, 0x5a, 0x2b, 0xa6, 0xbc, 0x2e, 0x67, 0x15, 0x3e,
-  0x26, 0x55, 0xc0, 0xbe, 0x5d, 0x7f, 0xc9, 0xbd, 0x2b, 0xc8, 0xfd, 0xbd,
-  0xc0, 0x93, 0x16, 0xbe, 0xe2, 0x34, 0x6e, 0xbe, 0xec, 0x15, 0xa5, 0xbe,
-  0x9f, 0xdb, 0xf5, 0xbd, 0x1a, 0x96, 0xac, 0xbc, 0x7d, 0x85, 0x06, 0xbd,
-  0x92, 0x4d, 0xc8, 0xbd, 0xd5, 0xa0, 0xc5, 0x3d, 0xd7, 0xe0, 0x12, 0xbd,
-  0x71, 0xf6, 0x63, 0xbe, 0x38, 0xa0, 0x58, 0x3c, 0x28, 0x7d, 0xf0, 0xbc,
-  0x5d, 0x6c, 0x14, 0xbe, 0x4d, 0xc8, 0x88, 0xbe, 0xb2, 0x45, 0xbb, 0xbd,
-  0x6f, 0x69, 0x25, 0xbd, 0x5c, 0x1f, 0xc3, 0xbb, 0x73, 0xdd, 0x79, 0xbd,
-  0x1e, 0x61, 0x44, 0xbc, 0xbb, 0xe8, 0x69, 0xbd, 0xc9, 0x1d, 0x6f, 0x3d,
-  0x90, 0xbe, 0xb2, 0xbc, 0xa6, 0x1e, 0x9e, 0x3d, 0x79, 0x90, 0x02, 0x3d,
-  0x18, 0x63, 0xfb, 0xbb, 0x17, 0xd1, 0x81, 0xbe, 0xda, 0x61, 0x86, 0x3d,
-  0x18, 0xd5, 0xaa, 0xbd, 0xf8, 0x62, 0x2d, 0xbe, 0x41, 0x44, 0x0b, 0xbe,
-  0x1e, 0x7e, 0xa0, 0xbd, 0x58, 0x6e, 0x56, 0x3c, 0xf3, 0xd5, 0x9e, 0xbd,
-  0x5e, 0x65, 0xa4, 0xbe, 0xde, 0x9d, 0x98, 0xbe, 0xf8, 0x00, 0x56, 0xbd,
-  0x06, 0x03, 0x20, 0xbd, 0x5e, 0x83, 0x55, 0xbe, 0x38, 0x80, 0x88, 0xbd,
-  0x8c, 0x43, 0xa1, 0xbb, 0x1a, 0x39, 0x3d, 0xbe, 0x47, 0x59, 0xd0, 0xbe,
-  0x06, 0x65, 0x8a, 0x3c, 0xc3, 0xaa, 0x7c, 0x3e, 0x84, 0x06, 0x02, 0xbe,
-  0xe2, 0x0e, 0xa6, 0xbe, 0xd8, 0x7c, 0x14, 0xbb, 0xf2, 0x59, 0x89, 0xbe,
-  0x64, 0xba, 0xb8, 0xbd, 0x24, 0x5b, 0x37, 0xbe, 0x73, 0xa5, 0x51, 0x3c,
-  0xe4, 0xa1, 0xaf, 0xbd, 0x5b, 0xca, 0x0c, 0x3c, 0xc3, 0x6c, 0x40, 0xbc,
-  0x36, 0x7b, 0x0b, 0xbe, 0x63, 0x81, 0xbf, 0xbd, 0x6a, 0xb0, 0x7a, 0xbe,
-  0x89, 0x23, 0x94, 0xbe, 0x6a, 0x61, 0x01, 0x3c, 0x18, 0xf9, 0x9a, 0x3e,
-  0x50, 0x93, 0xb3, 0xbc, 0x6e, 0x54, 0x17, 0xbe, 0x50, 0x11, 0x35, 0x3c,
-  0xe0, 0xcb, 0xe0, 0xbd, 0x08, 0xb6, 0x4b, 0xbe, 0x20, 0x8d, 0xb9, 0xbd,
-  0xc6, 0x50, 0x75, 0x3d, 0x8b, 0xbc, 0x56, 0x3e, 0xf0, 0xf7, 0x99, 0x3d,
-  0x4a, 0x7e, 0x22, 0x3d, 0x5f, 0x76, 0xf8, 0x3d, 0x48, 0xc6, 0xee, 0xbc,
-  0xc2, 0x94, 0x3b, 0xbd, 0x43, 0x55, 0xbb, 0x3b, 0x9c, 0xa3, 0xff, 0x3d,
-  0xa4, 0x8b, 0x4e, 0x3e, 0x64, 0xb5, 0x44, 0x3d, 0x40, 0x25, 0xdf, 0x3c,
-  0x92, 0x95, 0x58, 0x3e, 0xcc, 0x0e, 0xd6, 0xbd, 0x3b, 0xf3, 0xc4, 0xbc,
-  0x27, 0x0a, 0x80, 0xbd, 0x0c, 0xf8, 0xff, 0xff, 0x56, 0xf8, 0xff, 0xff,
-  0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x66, 0x07, 0xd7, 0x3c,
-  0x02, 0xfe, 0x8c, 0x3e, 0x2a, 0xbf, 0x8d, 0x3e, 0xa5, 0x46, 0xe6, 0xbe,
-  0x9d, 0x8d, 0xaa, 0x3d, 0xd4, 0xcb, 0xe7, 0x3e, 0x93, 0xd1, 0xac, 0xbd,
-  0xcc, 0xd5, 0x6c, 0xbe, 0xf7, 0xe0, 0xa1, 0xbe, 0x32, 0x17, 0x42, 0x3e,
-  0x8f, 0x35, 0xf4, 0xbc, 0x00, 0xce, 0xe9, 0x3e, 0x3e, 0xe5, 0xec, 0x3e,
-  0x79, 0x01, 0xcd, 0xbc, 0xae, 0x90, 0xc2, 0x3e, 0x49, 0x47, 0x88, 0x3c,
-  0x58, 0x93, 0x41, 0x3d, 0xac, 0xfa, 0x8f, 0x3e, 0xf7, 0x43, 0xa8, 0x3e,
-  0x59, 0x81, 0x8c, 0xbc, 0x94, 0xa7, 0x36, 0x3d, 0xe2, 0x26, 0x05, 0x3f,
-  0x4b, 0x77, 0xa8, 0xbd, 0x23, 0x20, 0x9a, 0xbe, 0x19, 0x48, 0x02, 0x3f,
-  0x0a, 0xfc, 0x78, 0x3e, 0x0b, 0x86, 0xa2, 0xbc, 0x0f, 0x60, 0xdb, 0x3e,
-  0x5f, 0xc4, 0xd5, 0xbe, 0x7d, 0xef, 0x92, 0xbd, 0x9c, 0x52, 0xc1, 0x3e,
-  0xc5, 0x54, 0x14, 0x3e, 0x79, 0x20, 0xf2, 0x3d, 0xa8, 0x96, 0x8c, 0x3e,
-  0xcf, 0x38, 0x9c, 0x3e, 0x82, 0x48, 0xd0, 0xbe, 0x7a, 0x33, 0xc2, 0x3d,
-  0xb1, 0xb3, 0xf8, 0x3e, 0xa4, 0x10, 0x95, 0xbd, 0xbb, 0x13, 0x0d, 0xbf,
-  0x41, 0x52, 0x6c, 0x3e, 0x42, 0x9d, 0x49, 0x3e, 0xd1, 0xda, 0xfe, 0x3c,
-  0xe9, 0x25, 0xcc, 0x3e, 0xe0, 0x67, 0xa4, 0xbe, 0x36, 0x77, 0xa3, 0xbd,
-  0x78, 0x22, 0xe9, 0x3e, 0xaa, 0xfa, 0x74, 0x3e, 0x25, 0xf0, 0x63, 0x3d,
-  0x8b, 0x69, 0xa4, 0x3e, 0x90, 0x48, 0x80, 0x3e, 0x90, 0xf8, 0xd5, 0x3e,
-  0xb2, 0x76, 0x33, 0x3b, 0xa5, 0xae, 0xf4, 0x3e, 0x7e, 0xf9, 0x38, 0xbd,
-  0x2c, 0x8d, 0xed, 0xbe, 0x7d, 0xf7, 0xf8, 0xbe, 0x5c, 0xba, 0x21, 0x3e,
-  0xba, 0x34, 0x5f, 0xbd, 0x30, 0x07, 0xd7, 0x3e, 0x02, 0x3c, 0xe9, 0xbe,
-  0x48, 0xfe, 0x7b, 0xbd, 0xcb, 0xf7, 0xdd, 0x3e, 0x3f, 0x3a, 0xbf, 0x3e,
-  0x1c, 0xf9, 0xff, 0xff, 0x66, 0xf9, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-  0x40, 0x00, 0x00, 0x00, 0xa3, 0x9b, 0xdd, 0xbf, 0x6d, 0x88, 0x19, 0x3d,
-  0xb5, 0xbf, 0x10, 0xbf, 0x1b, 0x6c, 0xb3, 0xbf, 0x84, 0xae, 0x12, 0xbf,
-  0x41, 0xd6, 0xc3, 0x3f, 0xd6, 0x2c, 0x46, 0xc0, 0x6f, 0xd0, 0x51, 0xc0,
-  0x60, 0x43, 0x85, 0x3f, 0xd6, 0x91, 0xf8, 0x3e, 0x17, 0xda, 0xdc, 0xbe,
-  0x2d, 0x59, 0xbf, 0xbf, 0x76, 0x42, 0x0a, 0xbe, 0x7d, 0xb1, 0x48, 0xc0,
-  0xfb, 0xf5, 0x0f, 0xc0, 0xbf, 0x84, 0x3b, 0xc0, 0xb2, 0xf9, 0xff, 0xff,
-  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x32, 0xda, 0xff, 0xbd,
-  0x17, 0x4e, 0xb8, 0x3d, 0x5c, 0x21, 0x9a, 0xbf, 0xdc, 0x3a, 0x10, 0x3f,
-  0xce, 0xf9, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
-  0x95, 0x1a, 0xe2, 0x3f, 0xe0, 0xce, 0x21, 0x40, 0xb8, 0xf4, 0xab, 0x3f,
-  0x90, 0x96, 0x43, 0xbe, 0xbe, 0x61, 0x6f, 0x3f, 0x49, 0x32, 0xca, 0x3f,
-  0x6c, 0x76, 0x13, 0x40, 0x03, 0x2a, 0x5a, 0xbe, 0x54, 0x18, 0x13, 0xbe,
-  0x3e, 0x50, 0x3b, 0x3f, 0x35, 0x52, 0x5d, 0x3f, 0x79, 0x40, 0xe0, 0xbe,
-  0xe1, 0xed, 0x47, 0xbe, 0x31, 0xa0, 0x5c, 0xbf, 0x02, 0x2a, 0x0c, 0x40,
-  0x54, 0x74, 0x2d, 0x40, 0xd4, 0xf9, 0xff, 0xff, 0x0c, 0x00, 0x14, 0x00,
-  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
-  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-  0x74, 0x05, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x3c, 0x04, 0x00, 0x00, 0xe4, 0x01, 0x00, 0x00, 0x28, 0x01, 0x00, 0x00,
-  0xe0, 0x02, 0x00, 0x00, 0xd4, 0x00, 0x00, 0x00, 0x78, 0x02, 0x00, 0x00,
-  0x58, 0x04, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x14, 0x03, 0x00, 0x00,
-  0xf0, 0x04, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x68, 0x03, 0x00, 0x00,
-  0xac, 0x03, 0x00, 0x00, 0x84, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-  0x54, 0x01, 0x00, 0x00, 0x3a, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
-  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35,
-  0x2f, 0x6d, 0x61, 0x78, 0x5f, 0x70, 0x6f, 0x6f, 0x6c, 0x69, 0x6e, 0x67,
-  0x32, 0x64, 0x5f, 0x31, 0x30, 0x2f, 0x4d, 0x61, 0x78, 0x50, 0x6f, 0x6f,
-  0x6c, 0x00, 0x00, 0x00, 0xa0, 0xfa, 0xff, 0xff, 0x92, 0xfb, 0xff, 0xff,
-  0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x2a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x1b, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-  0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f,
-  0x31, 0x31, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0xec, 0xfa, 0xff, 0xff,
-  0xde, 0xfb, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
-  0x1c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
-  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x63, 0x6f, 0x6e,
-  0x76, 0x32, 0x64, 0x5f, 0x31, 0x30, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00,
-  0x38, 0xfb, 0xff, 0xff, 0x2a, 0xfc, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x0d, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
-  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35,
-  0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f, 0x31, 0x30, 0x2f, 0x43,
-  0x6f, 0x6e, 0x76, 0x32, 0x44, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61,
-  0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x00, 0x00, 0x00, 0x00,
-  0x98, 0xfb, 0xff, 0xff, 0x8a, 0xfc, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x0c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
-  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35,
-  0x2f, 0x6d, 0x61, 0x78, 0x5f, 0x70, 0x6f, 0x6f, 0x6c, 0x69, 0x6e, 0x67,
-  0x32, 0x64, 0x5f, 0x31, 0x31, 0x2f, 0x4d, 0x61, 0x78, 0x50, 0x6f, 0x6f,
-  0x6c, 0x00, 0x00, 0x00, 0xf0, 0xfb, 0xff, 0xff, 0xe2, 0xfc, 0xff, 0xff,
-  0x10, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-  0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x80, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x0f, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f, 0x31,
-  0x30, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00, 0x08, 0x00, 0x0c, 0x00,
-  0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,
-  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0xfd, 0xff, 0xff,
-  0x10, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-  0x30, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x10, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
-  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x64, 0x65, 0x6e,
-  0x73, 0x65, 0x5f, 0x31, 0x30, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00,
-  0x94, 0xfc, 0xff, 0xff, 0x86, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x09, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
-  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35,
-  0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f, 0x31, 0x31, 0x2f, 0x43,
-  0x6f, 0x6e, 0x76, 0x32, 0x44, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61,
-  0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x00, 0x00, 0x00, 0x00,
-  0xf4, 0xfc, 0xff, 0xff, 0xe6, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
-  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35,
-  0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f, 0x31, 0x30, 0x2f, 0x43,
-  0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00,
-  0x3c, 0xfd, 0xff, 0xff, 0x2e, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x07, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00,
-  0x35, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-  0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x31,
-  0x30, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61,
-  0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f,
-  0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00,
-  0x9c, 0xfd, 0xff, 0xff, 0x8e, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x1d, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-  0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x31,
-  0x31, 0x2f, 0x42, 0x69, 0x61, 0x73, 0x41, 0x64, 0x64, 0x00, 0x00, 0x00,
-  0xe4, 0xfd, 0xff, 0xff, 0xd6, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x05, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x35, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-  0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x31,
-  0x31, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61,
-  0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f,
-  0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00,
-  0x44, 0xfe, 0xff, 0xff, 0x36, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x08, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79,
-  0x00, 0x00, 0x00, 0x00, 0x78, 0xfe, 0xff, 0xff, 0x6a, 0xff, 0xff, 0xff,
-  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x22, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-  0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f,
-  0x31, 0x31, 0x2f, 0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69,
-  0x61, 0x73, 0x00, 0x00, 0xc0, 0xfe, 0xff, 0xff, 0xb2, 0xff, 0xff, 0xff,
-  0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x38, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x21, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-  0x61, 0x6c, 0x5f, 0x35, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x31,
-  0x31, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61,
-  0x73, 0x00, 0x00, 0x00, 0x04, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00,
-  0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
-  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x35,
-  0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x31, 0x30, 0x2f, 0x4d, 0x61,
-  0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00,
-  0x64, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
-  0x90, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00,
-  0x8c, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0x30, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x04, 0x00, 0x00, 0x00,
-  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
-  0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x80, 0x3f, 0x70, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x08,
-  0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-  0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-  0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb0, 0xfe, 0xff, 0xff,
-  0x00, 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-  0x1c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-  0x03, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-  0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x2e, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
-  0xf0, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x05, 0x03, 0x00, 0x00, 0x00,
-  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
-  0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00,
-  0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-  0x14, 0x00, 0x18, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00,
-  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-  0x30, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-  0x0e, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x0c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00,
-  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0xa8, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x05,
-  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-  0x2c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x07, 0x00,
-  0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x1c, 0x00,
-  0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
-  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-  0x34, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
-  0x10, 0x00, 0x07, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-  0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
-  0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-  0xce, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x19, 0xd6, 0xff, 0xff, 0xff,
-  0x00, 0x00, 0x00, 0x09, 0xde, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
-  0xe6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x11, 0xfa, 0xff, 0xff, 0xff,
-  0x00, 0x03, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
-  0x00, 0x11, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x04};
-const int g_magic_wand_model_data_len = 19600;
+    0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,
+    0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
+    0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x24, 0x4c, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x14, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e,
+    0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0xb0, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00,
+    0x8c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+    0x68, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+    0x50, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+    0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x6e, 0xb6, 0xff, 0xff, 0x68, 0x00, 0x00, 0x00, 0x30, 0xb6, 0xff, 0xff,
+    0x34, 0xb6, 0xff, 0xff, 0x7e, 0xb6, 0xff, 0xff, 0xf0, 0x01, 0x00, 0x00,
+    0x86, 0xb6, 0xff, 0xff, 0xc8, 0x03, 0x00, 0x00, 0x48, 0xb6, 0xff, 0xff,
+    0x4c, 0xb6, 0xff, 0xff, 0x50, 0xb6, 0xff, 0xff, 0x54, 0xb6, 0xff, 0xff,
+    0x58, 0xb6, 0xff, 0xff, 0x5c, 0xb6, 0xff, 0xff, 0xa6, 0xb6, 0xff, 0xff,
+    0xc0, 0x0d, 0x00, 0x00, 0xae, 0xb6, 0xff, 0xff, 0x00, 0x46, 0x00, 0x00,
+    0xb6, 0xb6, 0xff, 0xff, 0x60, 0x46, 0x00, 0x00, 0xbe, 0xb6, 0xff, 0xff,
+    0xe0, 0x46, 0x00, 0x00, 0xc6, 0xb6, 0xff, 0xff, 0x48, 0x47, 0x00, 0x00,
+    0xce, 0xb6, 0xff, 0xff, 0x98, 0x48, 0x00, 0x00, 0x90, 0xb6, 0xff, 0xff,
+    0x05, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00,
+    0x54, 0xf4, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x75, 0x6e, 0x74,
+    0x69, 0x6d, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x00,
+    0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x88, 0x48, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x9c, 0x0c, 0x00, 0x00, 0x14, 0x0b, 0x00, 0x00,
+    0xd4, 0x00, 0x00, 0x00, 0x24, 0x45, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+    0xa4, 0x02, 0x00, 0x00, 0x80, 0x45, 0x00, 0x00, 0x9c, 0x0b, 0x00, 0x00,
+    0xb0, 0x0c, 0x00, 0x00, 0xc4, 0x47, 0x00, 0x00, 0x50, 0x0b, 0x00, 0x00,
+    0x2c, 0x0c, 0x00, 0x00, 0x48, 0x46, 0x00, 0x00, 0xec, 0x45, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0xc8, 0x0b, 0x00, 0x00, 0x66, 0xb8, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x2a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x2f, 0x6d, 0x61, 0x78, 0x5f, 0x70, 0x6f, 0x6f, 0x6c, 0x69,
+    0x6e, 0x67, 0x32, 0x64, 0x2f, 0x4d, 0x61, 0x78, 0x50, 0x6f, 0x6f, 0x6c,
+    0x00, 0x00, 0x00, 0x00, 0x88, 0xb7, 0xff, 0xff, 0xba, 0xb8, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x80, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x2f, 0x52, 0x65,
+    0x6c, 0x75, 0x00, 0x00, 0xd0, 0xb7, 0xff, 0xff, 0x02, 0xb9, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x27, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32, 0x64, 0x2f, 0x43, 0x6f,
+    0x6e, 0x76, 0x32, 0x44, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72,
+    0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x00, 0x28, 0xb8, 0xff, 0xff,
+    0x80, 0x01, 0x00, 0x00, 0x1c, 0x6a, 0xf6, 0xbd, 0xaa, 0x16, 0xfd, 0x3c,
+    0xf6, 0xd9, 0x20, 0x3e, 0x64, 0xf8, 0xdd, 0x3d, 0x07, 0xc0, 0x82, 0xbe,
+    0x9e, 0xa3, 0x38, 0xbd, 0x13, 0x41, 0xa1, 0x3d, 0xb0, 0x81, 0x90, 0xbd,
+    0xc7, 0xdd, 0xbc, 0xbb, 0x87, 0x9c, 0x24, 0xbe, 0x72, 0x08, 0x6a, 0xbd,
+    0x10, 0x1b, 0x61, 0x3e, 0x79, 0x49, 0x18, 0xbe, 0xda, 0x09, 0x88, 0xbe,
+    0x2d, 0x70, 0x4d, 0x3d, 0x5c, 0x4a, 0x9e, 0xbd, 0x0f, 0xf1, 0x46, 0x3e,
+    0x1c, 0xbd, 0x02, 0xbf, 0x56, 0xbc, 0x07, 0x3e, 0x63, 0x92, 0x39, 0xbe,
+    0x4e, 0xde, 0x84, 0x3e, 0x64, 0x38, 0x88, 0xbd, 0xa0, 0x32, 0xc3, 0xbd,
+    0x0f, 0x94, 0xb7, 0xbe, 0xd6, 0x11, 0x27, 0xbc, 0xcc, 0x7e, 0xf3, 0x3d,
+    0xf3, 0x4d, 0xaa, 0x3d, 0xbc, 0x8a, 0x28, 0x3e, 0xa2, 0xb5, 0xda, 0xbd,
+    0x92, 0x1a, 0xb6, 0xbd, 0x9a, 0x49, 0xb1, 0x3d, 0xfc, 0x93, 0x1c, 0x3d,
+    0x74, 0xa1, 0xa1, 0xbd, 0xc7, 0x48, 0x1d, 0xbe, 0x3a, 0x53, 0xb2, 0x3b,
+    0x92, 0x51, 0xa5, 0xbd, 0x6a, 0xc4, 0x3c, 0xbd, 0xdb, 0x61, 0x6d, 0xbd,
+    0x78, 0x9f, 0x03, 0xbe, 0x40, 0x1f, 0x30, 0xbd, 0x17, 0xde, 0xad, 0x3d,
+    0xd7, 0xee, 0x74, 0xbd, 0xb6, 0x5c, 0xc2, 0x3d, 0x1c, 0x89, 0x65, 0xbe,
+    0xfd, 0xc4, 0x48, 0x3e, 0xb2, 0x29, 0x13, 0x3d, 0xcc, 0x56, 0x13, 0x3d,
+    0xf8, 0xce, 0x1b, 0xbc, 0xb5, 0x4b, 0xe8, 0xbc, 0x48, 0x05, 0x5c, 0xbe,
+    0xaf, 0xfa, 0x0d, 0x3e, 0x74, 0x84, 0xa4, 0x3d, 0x4c, 0x84, 0x04, 0x3e,
+    0x09, 0x7a, 0xba, 0x3c, 0xb3, 0xa6, 0x07, 0x3e, 0x7d, 0xe5, 0xe5, 0x3d,
+    0x7e, 0xb9, 0xa5, 0x3c, 0x4e, 0x70, 0x49, 0x3e, 0x39, 0xfe, 0x12, 0xbe,
+    0xfa, 0x8b, 0x01, 0xbe, 0xb9, 0x8e, 0xe6, 0xbc, 0xc8, 0x2f, 0xb3, 0xbd,
+    0x1b, 0x2b, 0x9e, 0xbd, 0xe7, 0x7f, 0x0e, 0x3d, 0x3e, 0xa3, 0x2a, 0x3d,
+    0xa1, 0x73, 0x31, 0x3d, 0xc8, 0xc7, 0x03, 0xbd, 0x07, 0x71, 0xaf, 0xbd,
+    0xb2, 0x6b, 0x2b, 0xbe, 0x06, 0xc2, 0x1f, 0xbe, 0x3b, 0xbf, 0x30, 0xbe,
+    0x7e, 0x51, 0x22, 0x3e, 0x5a, 0xa7, 0x92, 0x3d, 0xb8, 0x60, 0x35, 0xbe,
+    0xa7, 0xdf, 0x8f, 0x3d, 0xbc, 0xfc, 0x42, 0x3e, 0x42, 0x86, 0x7d, 0xbc,
+    0x3a, 0xd0, 0xd8, 0x3c, 0xea, 0x45, 0x40, 0xbc, 0x04, 0xd3, 0x9d, 0xb7,
+    0xe3, 0xdf, 0xae, 0xbd, 0x80, 0x5e, 0x59, 0xbe, 0x88, 0x15, 0xc0, 0xbd,
+    0xea, 0x86, 0xaa, 0xbd, 0x3b, 0x4a, 0x64, 0x3d, 0x89, 0x25, 0x42, 0xbe,
+    0xc2, 0x29, 0x93, 0xbe, 0x62, 0x85, 0x00, 0x3e, 0xf1, 0x0e, 0xda, 0xbd,
+    0x48, 0x09, 0xb8, 0xbe, 0xad, 0xe2, 0x4d, 0xbe, 0x69, 0x26, 0x99, 0xbe,
+    0x86, 0x3c, 0xcd, 0xbe, 0x05, 0xe6, 0x4e, 0xbd, 0xdb, 0x8f, 0xfb, 0x3d,
+    0xc6, 0xf5, 0x97, 0x3e, 0xde, 0xba, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
+    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x63,
+    0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f, 0x31, 0x2f, 0x43, 0x6f, 0x6e, 0x76,
+    0x32, 0x44, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61,
+    0x62, 0x6c, 0x65, 0x4f, 0x70, 0x00, 0x00, 0x00, 0x08, 0xba, 0xff, 0xff,
+    0x00, 0x08, 0x00, 0x00, 0x40, 0x7a, 0x10, 0xbf, 0x9b, 0xf1, 0xcc, 0xbe,
+    0x78, 0x88, 0x1d, 0xbf, 0xc2, 0xb7, 0x09, 0xbf, 0xbd, 0xd6, 0xaf, 0xbe,
+    0xf5, 0x29, 0xed, 0xbe, 0xad, 0x18, 0x70, 0xbe, 0x3d, 0x91, 0x40, 0x3b,
+    0x2f, 0xcf, 0x55, 0xbe, 0xc6, 0x0b, 0xed, 0x3e, 0xbe, 0x7a, 0x80, 0xbe,
+    0x3c, 0x3c, 0xb1, 0x3a, 0xab, 0xea, 0xa6, 0x3d, 0x88, 0x57, 0x55, 0xbe,
+    0x0c, 0x62, 0xdc, 0xbe, 0x07, 0x6b, 0xbe, 0x3e, 0x9e, 0x0d, 0x28, 0x3d,
+    0xd9, 0xd2, 0xb1, 0x3d, 0x4c, 0x45, 0xbd, 0xbd, 0x3c, 0xe0, 0x1f, 0x3d,
+    0x76, 0xb5, 0x48, 0xbd, 0x2b, 0xe8, 0x3a, 0xbe, 0xd3, 0x21, 0x57, 0xbe,
+    0xdc, 0x21, 0x70, 0x3e, 0x00, 0x71, 0xec, 0x3d, 0x9c, 0xeb, 0x61, 0x3d,
+    0x0d, 0x8a, 0x9c, 0x3d, 0x2a, 0x35, 0x03, 0x3d, 0x1b, 0x1c, 0x28, 0x3b,
+    0xa6, 0xf9, 0xa9, 0xbd, 0x20, 0xfb, 0x2f, 0xbe, 0x8c, 0xcb, 0x04, 0x3d,
+    0x43, 0x62, 0x64, 0x3d, 0xf8, 0x65, 0xd6, 0x3d, 0xf3, 0xbe, 0x3a, 0xbd,
+    0xaf, 0x2b, 0x53, 0x3d, 0x99, 0x58, 0xf8, 0xbd, 0x3a, 0xbe, 0x43, 0xbd,
+    0xd0, 0xe7, 0xc0, 0xbc, 0xa4, 0x8d, 0xf3, 0x3d, 0xa2, 0xd7, 0x9c, 0xbc,
+    0x87, 0x1b, 0xb0, 0x3e, 0x18, 0x20, 0x88, 0xbd, 0xee, 0x9e, 0xc5, 0x3e,
+    0xc1, 0x35, 0xaf, 0x3c, 0xc0, 0x97, 0x39, 0xbd, 0xa5, 0x6f, 0x04, 0xbd,
+    0x7b, 0x03, 0x1c, 0xbe, 0x0f, 0x75, 0xbb, 0xbd, 0xdf, 0x1b, 0x54, 0xbd,
+    0xfb, 0xbf, 0xcc, 0xbd, 0x1b, 0x01, 0x10, 0xbd, 0x20, 0x67, 0xb4, 0xbc,
+    0xdf, 0xa6, 0x40, 0x3c, 0x74, 0xb4, 0x28, 0x3d, 0x65, 0xe7, 0xc3, 0xbd,
+    0x8d, 0x38, 0x91, 0xbd, 0x8f, 0xba, 0x45, 0xbc, 0x69, 0xc7, 0x49, 0xbc,
+    0xa3, 0xcb, 0xf8, 0x3c, 0x69, 0xed, 0x2d, 0x3e, 0x0d, 0xf4, 0xc6, 0xbc,
+    0xd6, 0xe7, 0xfe, 0xbd, 0xfa, 0xa0, 0x86, 0xbd, 0x30, 0x83, 0xf0, 0xbd,
+    0xf5, 0xb9, 0xd1, 0xbd, 0x1c, 0x39, 0x90, 0xbe, 0x74, 0x62, 0xd8, 0xbd,
+    0x13, 0x28, 0x07, 0xbe, 0xbf, 0xbd, 0xbf, 0xbd, 0x15, 0xcf, 0xa8, 0xbb,
+    0x9c, 0x1a, 0x8f, 0xbd, 0x91, 0x23, 0x93, 0x3d, 0x79, 0x63, 0xc4, 0xbd,
+    0xec, 0xdd, 0x72, 0xbe, 0xd2, 0xd8, 0xac, 0xbe, 0xbf, 0x1e, 0x21, 0xbe,
+    0xae, 0xab, 0x20, 0xbd, 0x8f, 0xe9, 0x90, 0x3d, 0xbb, 0x47, 0xdd, 0x3d,
+    0xb1, 0x93, 0x37, 0xbd, 0xeb, 0xcf, 0x45, 0xbc, 0x33, 0x8f, 0x15, 0xbe,
+    0xeb, 0x19, 0x9f, 0xbe, 0x66, 0x21, 0xe3, 0xbd, 0x53, 0x82, 0x60, 0x3c,
+    0x11, 0xdc, 0xf1, 0xbc, 0x0c, 0x01, 0x1e, 0x3d, 0xdb, 0xd5, 0x1d, 0x3f,
+    0x58, 0xa3, 0x61, 0x3d, 0x0a, 0x2b, 0x16, 0xbe, 0x01, 0x9d, 0x50, 0xbe,
+    0xac, 0xac, 0x63, 0x3e, 0x76, 0xdb, 0x8a, 0xbc, 0x57, 0xec, 0x8f, 0xbc,
+    0xad, 0x20, 0xd6, 0x3d, 0xc2, 0x63, 0x89, 0x3d, 0xc3, 0x1e, 0xe9, 0x3e,
+    0xa8, 0x41, 0x9e, 0xbd, 0xac, 0x2c, 0x2b, 0xbe, 0x98, 0x73, 0xbf, 0x3d,
+    0x7a, 0x22, 0x54, 0xbd, 0x44, 0xaf, 0x2c, 0xbe, 0x05, 0x45, 0xd9, 0xbc,
+    0x74, 0xaa, 0x20, 0x3e, 0x6e, 0x1e, 0x95, 0x3e, 0x54, 0x20, 0x12, 0xbc,
+    0xbe, 0x20, 0xcf, 0x3d, 0xa6, 0x02, 0x28, 0xbe, 0xd1, 0xfe, 0xe8, 0xbd,
+    0x1f, 0x2a, 0x83, 0xbe, 0x33, 0x44, 0xf3, 0x3d, 0xff, 0x48, 0xda, 0xbd,
+    0x8d, 0x1e, 0x79, 0x3e, 0xdb, 0xee, 0xb2, 0x3d, 0xdb, 0xde, 0x5b, 0xbe,
+    0x55, 0x57, 0x49, 0xbe, 0x62, 0x4b, 0x29, 0xbf, 0x93, 0xbf, 0x23, 0xbf,
+    0xea, 0xa3, 0x71, 0x3d, 0xa5, 0x50, 0x00, 0xbe, 0xb6, 0xd1, 0xc0, 0xbf,
+    0x1d, 0x22, 0x7d, 0xbd, 0x00, 0x09, 0x81, 0x3d, 0xde, 0x28, 0x75, 0xbe,
+    0x03, 0x1b, 0x2e, 0xbf, 0x3c, 0x3a, 0x3b, 0xbf, 0x52, 0x1a, 0xe6, 0xbd,
+    0x9d, 0xe9, 0xea, 0xbd, 0x49, 0x71, 0x2c, 0x3d, 0xe8, 0x8b, 0x55, 0xbb,
+    0x6c, 0x97, 0x24, 0xbe, 0x44, 0xe8, 0xe0, 0xba, 0x6d, 0x45, 0x4b, 0x3f,
+    0x7f, 0x26, 0x38, 0x3e, 0xab, 0x04, 0x6e, 0xbe, 0x70, 0x59, 0x0e, 0xbe,
+    0xd6, 0xfb, 0x7a, 0x3d, 0x45, 0x72, 0xa2, 0xbd, 0xb5, 0x6f, 0x2e, 0xbe,
+    0xda, 0xf5, 0x07, 0x3e, 0xe0, 0x2b, 0xac, 0xbd, 0xaf, 0x35, 0xf6, 0xbd,
+    0xd0, 0x2b, 0xac, 0xbd, 0x26, 0x2d, 0x11, 0xbe, 0x7e, 0xfa, 0x87, 0x3d,
+    0x3a, 0xb7, 0xf6, 0xbd, 0xb1, 0xd0, 0xe2, 0xbc, 0xc8, 0xa2, 0x86, 0xbd,
+    0x19, 0xf5, 0xb1, 0xbd, 0xf6, 0x65, 0x4d, 0xbe, 0x23, 0x63, 0x47, 0x3e,
+    0xb7, 0x26, 0xd3, 0xbd, 0x57, 0xf4, 0x12, 0xbf, 0x93, 0xd4, 0x39, 0xbe,
+    0x77, 0xf2, 0x62, 0x3d, 0xf6, 0x3d, 0xc3, 0xbe, 0xb6, 0xf5, 0x2b, 0xbe,
+    0xbe, 0x8a, 0x76, 0xbe, 0xb1, 0x39, 0x63, 0x3e, 0xde, 0xbe, 0x3c, 0xbe,
+    0xd4, 0x01, 0x94, 0xbe, 0x19, 0x1a, 0x97, 0xbb, 0xcb, 0x83, 0x4e, 0xbe,
+    0x50, 0x19, 0x94, 0xbd, 0xf8, 0x8a, 0x95, 0xbe, 0xc8, 0xab, 0x86, 0xbe,
+    0x18, 0x57, 0x6d, 0x3e, 0x87, 0xad, 0x8b, 0x3c, 0x72, 0x7b, 0x8d, 0x3e,
+    0x54, 0x39, 0x95, 0x3d, 0x1d, 0xfa, 0x4b, 0xbe, 0x97, 0xd2, 0x7a, 0xbe,
+    0x68, 0x4a, 0xcb, 0xbe, 0xf0, 0x10, 0x04, 0xbf, 0x2b, 0xb5, 0x82, 0x3e,
+    0xf8, 0x71, 0x1a, 0x3e, 0x29, 0xf0, 0x29, 0x3d, 0x74, 0x5a, 0x1a, 0x3e,
+    0x58, 0x75, 0xd1, 0xbd, 0x38, 0x6c, 0x99, 0x3e, 0x6c, 0xd4, 0x63, 0xbe,
+    0xc3, 0x51, 0x90, 0xbe, 0xcf, 0xff, 0xae, 0xbe, 0xfe, 0xf1, 0x00, 0x3d,
+    0x52, 0x64, 0x90, 0xbd, 0x02, 0x1a, 0xce, 0xbd, 0x86, 0x74, 0x00, 0x3d,
+    0x82, 0x40, 0x04, 0x3e, 0x38, 0x03, 0x82, 0x3e, 0x8f, 0x1c, 0xf4, 0x3e,
+    0x6f, 0x04, 0x68, 0xbe, 0x00, 0x12, 0xe3, 0x3d, 0x01, 0xf6, 0xb5, 0x3d,
+    0xd9, 0x99, 0x36, 0x3d, 0x40, 0xad, 0xde, 0x3e, 0xaf, 0x74, 0xc1, 0x3d,
+    0xb7, 0x8e, 0x6f, 0xbe, 0xb3, 0xa6, 0x32, 0xbe, 0xff, 0xca, 0x23, 0x3e,
+    0x0c, 0xf1, 0x42, 0x3e, 0xe3, 0x85, 0x2c, 0x3d, 0xca, 0xc8, 0xb7, 0xba,
+    0x1a, 0x94, 0x53, 0xbd, 0x9a, 0x33, 0xaa, 0x3d, 0x9c, 0x7c, 0x79, 0xbe,
+    0x84, 0xb2, 0x71, 0xbe, 0x48, 0xc1, 0x2b, 0xbd, 0xf4, 0x89, 0x6c, 0xbd,
+    0x1f, 0xd2, 0xf2, 0xbd, 0xd2, 0x4f, 0x28, 0xbd, 0xb4, 0xbb, 0xb3, 0xbd,
+    0x6f, 0x96, 0xab, 0xbc, 0x23, 0x9d, 0x82, 0xbe, 0xe6, 0x6b, 0x59, 0xbd,
+    0x27, 0x09, 0x03, 0xbe, 0x42, 0xa6, 0xac, 0xbd, 0xb6, 0x12, 0x20, 0x3d,
+    0x0a, 0x63, 0x24, 0xbe, 0x75, 0x27, 0x28, 0xbe, 0xa5, 0x62, 0x2b, 0xbe,
+    0x1f, 0x48, 0x06, 0xbe, 0x7e, 0xd0, 0xb2, 0xbd, 0xa9, 0xd6, 0x80, 0x3a,
+    0xff, 0x7a, 0x11, 0xbe, 0x76, 0x5f, 0x41, 0x3e, 0x17, 0xa9, 0xfa, 0xbd,
+    0x5b, 0xd1, 0x71, 0xbd, 0xf3, 0x23, 0xaf, 0xbd, 0x63, 0x24, 0xe0, 0xbc,
+    0xc6, 0x62, 0x9d, 0x3e, 0xd6, 0x19, 0x47, 0xbe, 0x92, 0x69, 0xf1, 0xbd,
+    0x8a, 0x67, 0x82, 0x3d, 0x17, 0x33, 0x69, 0x3d, 0x1a, 0x91, 0x25, 0xbe,
+    0xf1, 0xab, 0xae, 0x3d, 0x3a, 0x21, 0xc1, 0x3e, 0xd8, 0xc4, 0x5d, 0xbd,
+    0xc7, 0x58, 0xa6, 0xbe, 0xc6, 0xb0, 0xed, 0x3b, 0x75, 0xd6, 0xa2, 0x3c,
+    0x64, 0xa8, 0x1d, 0xbe, 0xe5, 0x1f, 0x3a, 0xbe, 0x7b, 0x03, 0x39, 0xbd,
+    0x14, 0xa2, 0x81, 0x3d, 0xdb, 0xfd, 0xb2, 0xbc, 0xca, 0x96, 0x9a, 0xbe,
+    0x7c, 0xcc, 0xc9, 0x3c, 0xb8, 0x7d, 0x88, 0x3d, 0x36, 0x39, 0x0b, 0xbd,
+    0x5e, 0x1f, 0x3c, 0xbe, 0x27, 0x36, 0x83, 0x3c, 0x38, 0xa1, 0x23, 0xbd,
+    0xba, 0xfa, 0xf6, 0x3b, 0x8d, 0xa9, 0xc3, 0xbe, 0x50, 0x34, 0xf0, 0xbd,
+    0x92, 0x0f, 0xb3, 0xbd, 0xd9, 0xad, 0x5e, 0xbe, 0xc1, 0x27, 0xb2, 0x3c,
+    0x6a, 0x29, 0x07, 0xbe, 0x0f, 0xb5, 0x26, 0xbe, 0xc8, 0xf9, 0x27, 0x3e,
+    0x2a, 0x97, 0xa4, 0x3e, 0xe1, 0x45, 0x53, 0x3e, 0xec, 0xd7, 0xa0, 0x3d,
+    0xfd, 0x1a, 0x8c, 0xbe, 0x1d, 0x4c, 0xd6, 0xbd, 0x4a, 0x78, 0x63, 0xbe,
+    0x18, 0xa4, 0xd9, 0xbc, 0x5a, 0xaa, 0x37, 0x3d, 0xff, 0xe8, 0x3b, 0xbe,
+    0x6b, 0x8c, 0x67, 0x3e, 0x13, 0xec, 0x12, 0xbc, 0xae, 0xcc, 0xab, 0xbc,
+    0x2e, 0x9b, 0x72, 0xbd, 0x46, 0x3f, 0xb4, 0x3e, 0xdb, 0xba, 0xd3, 0xbd,
+    0x7b, 0xdb, 0x86, 0xbe, 0x6a, 0x66, 0xd9, 0xbe, 0x8c, 0x5c, 0x80, 0x3d,
+    0x60, 0x64, 0x4d, 0xbe, 0x4d, 0x91, 0x58, 0x3e, 0xa9, 0xfc, 0x0e, 0xbe,
+    0x32, 0xc8, 0xce, 0x3e, 0xa8, 0xc8, 0xb3, 0xbe, 0x4d, 0x07, 0xae, 0xbe,
+    0xbc, 0xa3, 0x2c, 0xbf, 0x57, 0x9c, 0x21, 0xbe, 0x0e, 0x6d, 0x6e, 0xbe,
+    0x30, 0xa6, 0x15, 0xbf, 0xd6, 0x76, 0x01, 0xbf, 0x80, 0x3e, 0xab, 0xbe,
+    0xbe, 0x98, 0x2d, 0xbe, 0xe2, 0x02, 0x48, 0xbe, 0xc8, 0x4b, 0x96, 0xbe,
+    0x48, 0xaa, 0x2e, 0x3e, 0xa2, 0x19, 0x01, 0x3f, 0xa8, 0xec, 0x8f, 0xbe,
+    0x15, 0xd2, 0x24, 0x3e, 0x5c, 0x80, 0xc2, 0xbc, 0xf0, 0x78, 0x29, 0xbe,
+    0xfe, 0x1d, 0x63, 0xbe, 0x32, 0xf1, 0x22, 0xbd, 0x35, 0x8c, 0x1d, 0x3e,
+    0xb9, 0x22, 0xc2, 0x3e, 0xde, 0x75, 0xc0, 0xbe, 0x27, 0x71, 0x73, 0xbb,
+    0x37, 0x41, 0xde, 0x3d, 0x0a, 0x71, 0xfe, 0xbd, 0x9e, 0x66, 0xf6, 0xbd,
+    0x2b, 0x93, 0x07, 0xbc, 0x75, 0x1e, 0x90, 0x3d, 0x01, 0x49, 0x59, 0x3e,
+    0x0a, 0xb2, 0xbe, 0xbe, 0xd4, 0x65, 0x9f, 0xbc, 0x43, 0x20, 0xdd, 0x3d,
+    0xef, 0x01, 0x74, 0xbd, 0xb2, 0xa4, 0xd5, 0x3b, 0xa4, 0x30, 0xf9, 0xbc,
+    0xb8, 0x15, 0x68, 0xb8, 0x58, 0xa2, 0xa7, 0xbe, 0x5a, 0x25, 0xa5, 0x3d,
+    0x2d, 0x86, 0xb2, 0xbe, 0xc9, 0x31, 0xc2, 0x3e, 0xd2, 0x61, 0x28, 0x3d,
+    0xa6, 0xfe, 0xba, 0x3d, 0x4b, 0x6c, 0xf6, 0xbd, 0xaa, 0x14, 0xdc, 0xbc,
+    0xf6, 0x7d, 0xdc, 0xbd, 0xce, 0xb6, 0x75, 0xbd, 0x0b, 0xa5, 0xa8, 0xbe,
+    0x9b, 0xb5, 0x4a, 0x3e, 0xfc, 0xfa, 0x98, 0x3d, 0x27, 0xd6, 0x39, 0x3d,
+    0x1a, 0xbf, 0x67, 0x3d, 0x3f, 0x04, 0x04, 0xbc, 0x07, 0x56, 0x42, 0xbd,
+    0xd8, 0xe6, 0x52, 0xbe, 0x72, 0xff, 0xc7, 0xbd, 0xd8, 0x5b, 0xba, 0xbd,
+    0xe9, 0xb9, 0xc8, 0xbd, 0xe2, 0x54, 0x05, 0xbe, 0xb5, 0x8f, 0xf2, 0x3e,
+    0x74, 0xe9, 0x68, 0xbd, 0x6f, 0x16, 0xcd, 0xbe, 0x2a, 0x22, 0x40, 0x3c,
+    0xfc, 0x03, 0xf2, 0x3d, 0x91, 0x74, 0xaa, 0x3d, 0x7d, 0xb1, 0x1f, 0xbe,
+    0x95, 0xc1, 0x14, 0xbe, 0xbb, 0xe5, 0x89, 0xbe, 0xae, 0xff, 0x5a, 0x3d,
+    0x31, 0x79, 0x07, 0xbe, 0x07, 0xfb, 0xba, 0x3e, 0x4e, 0xd0, 0x86, 0xbd,
+    0x68, 0x36, 0x29, 0x3e, 0xec, 0x14, 0xc7, 0x3d, 0xef, 0xf6, 0x06, 0x3b,
+    0x76, 0xa0, 0xe8, 0x3c, 0x97, 0x57, 0xac, 0x3c, 0xec, 0x02, 0x8d, 0xbe,
+    0x43, 0xaf, 0x42, 0x3d, 0x13, 0x39, 0x0e, 0x3d, 0xf4, 0xed, 0x5a, 0x3e,
+    0xbb, 0xf1, 0x18, 0xbe, 0x71, 0x1f, 0xc8, 0xbb, 0x6d, 0x8c, 0x0b, 0xbe,
+    0xfe, 0x5d, 0xc3, 0xbd, 0x2c, 0xae, 0x87, 0xbe, 0x58, 0x74, 0x3e, 0x3d,
+    0x14, 0x52, 0x13, 0xbe, 0x41, 0x11, 0x55, 0xbe, 0x43, 0x03, 0x0e, 0x3e,
+    0xf8, 0x4c, 0x2e, 0x3e, 0x09, 0x6a, 0xea, 0xbd, 0xec, 0xe1, 0xc3, 0xbd,
+    0xd5, 0xdf, 0x2a, 0xbe, 0x2e, 0xc1, 0xd9, 0xbd, 0xc1, 0x5b, 0x72, 0xbe,
+    0x73, 0xe9, 0x0f, 0xbe, 0xab, 0xc3, 0x0c, 0x3e, 0x85, 0xd1, 0x5e, 0x3e,
+    0x08, 0x70, 0x0d, 0xbe, 0x6f, 0xb7, 0x01, 0x3d, 0x0c, 0x0f, 0x86, 0xbd,
+    0x0d, 0x23, 0x56, 0x3e, 0x16, 0x6f, 0x10, 0xbc, 0x4f, 0x98, 0x42, 0xbf,
+    0x85, 0x4e, 0x44, 0xbe, 0xf0, 0x20, 0x0b, 0xbe, 0x5b, 0xa3, 0x0f, 0xbc,
+    0xbd, 0x33, 0x45, 0xbd, 0x84, 0xfb, 0x48, 0xbd, 0x11, 0x99, 0x8c, 0x3c,
+    0x41, 0x1e, 0x08, 0x3e, 0xe3, 0x3e, 0x6c, 0xbf, 0x97, 0x2b, 0x0c, 0xbe,
+    0x94, 0xec, 0x23, 0xbb, 0x8f, 0x35, 0x4f, 0x3c, 0xea, 0xec, 0x0c, 0xbd,
+    0x04, 0x13, 0x3d, 0xbe, 0x13, 0x76, 0x23, 0x3e, 0x37, 0x0d, 0x99, 0x3c,
+    0xd4, 0xa3, 0xf4, 0xbe, 0x18, 0x6a, 0x6c, 0xbe, 0x3d, 0x3c, 0xf6, 0xbd,
+    0xf8, 0x51, 0xaf, 0xbc, 0x1f, 0x6e, 0x8a, 0xbc, 0x55, 0xc5, 0x8c, 0xbe,
+    0x9e, 0x9c, 0x79, 0xbd, 0x13, 0x14, 0xb7, 0xbd, 0x89, 0xcd, 0x1a, 0xbe,
+    0x79, 0x14, 0x2e, 0x3e, 0xdd, 0xa2, 0x71, 0x3e, 0xad, 0x71, 0xbe, 0xbc,
+    0xa3, 0xc9, 0x22, 0x3f, 0x66, 0x4b, 0x0f, 0x3d, 0x45, 0x1c, 0x29, 0xbe,
+    0xf6, 0x79, 0x93, 0xbe, 0x71, 0x18, 0xb6, 0x3d, 0xcc, 0xcb, 0x9d, 0x3c,
+    0xa1, 0xbb, 0xfd, 0xbc, 0xc9, 0x75, 0x05, 0x3e, 0x77, 0x4b, 0xad, 0xbd,
+    0x81, 0x1d, 0x5c, 0x3e, 0x2d, 0xcc, 0x24, 0xbd, 0x3a, 0xce, 0x36, 0xbe,
+    0xb8, 0x37, 0x27, 0xbe, 0xe6, 0x3e, 0x75, 0x3b, 0xb7, 0xb4, 0x2c, 0xbd,
+    0x1f, 0x05, 0x47, 0x3c, 0x81, 0x1d, 0x33, 0x3e, 0x8a, 0xfd, 0x4f, 0x3e,
+    0xaf, 0x7c, 0x3b, 0x3d, 0x00, 0xa0, 0xda, 0xbd, 0x39, 0xd1, 0x20, 0xbf,
+    0xc9, 0x78, 0xf3, 0xbd, 0x9d, 0x01, 0xa3, 0xbe, 0x42, 0x44, 0xbb, 0xbc,
+    0x5a, 0xc1, 0xd4, 0xbd, 0xfd, 0xe7, 0x3c, 0xbf, 0x46, 0x37, 0x85, 0x3d,
+    0x79, 0x4e, 0xbc, 0x3d, 0xa4, 0xcd, 0x7f, 0xbf, 0x1d, 0xca, 0x69, 0xbf,
+    0x97, 0xeb, 0x69, 0xbf, 0xaa, 0xc9, 0x9f, 0x3c, 0xb4, 0x82, 0x9d, 0x3e,
+    0xf1, 0x94, 0x77, 0x3e, 0xf2, 0x74, 0x84, 0xbe, 0x88, 0x66, 0x9c, 0xbe,
+    0xdf, 0x4e, 0xf1, 0xbd, 0xa2, 0x9e, 0x31, 0x3e, 0x8b, 0xc9, 0x49, 0x3d,
+    0x5a, 0x63, 0x5c, 0x3e, 0xf9, 0xa5, 0x4e, 0x3d, 0x95, 0x3f, 0x8d, 0x3d,
+    0x1c, 0xe0, 0x68, 0xbe, 0xb6, 0xe1, 0x7c, 0xbe, 0x82, 0x2b, 0x63, 0xbe,
+    0x76, 0x6c, 0x02, 0xbe, 0xfe, 0x30, 0x36, 0xbe, 0x8f, 0x5f, 0x36, 0x3d,
+    0x17, 0x52, 0x15, 0x3c, 0x1e, 0xc8, 0x88, 0xbf, 0x0a, 0xa1, 0x5d, 0x3d,
+    0xe8, 0x31, 0x71, 0x3e, 0xd2, 0x45, 0x01, 0xbc, 0x41, 0x3c, 0x27, 0xbe,
+    0xbb, 0xa9, 0x4d, 0xbc, 0x0f, 0xde, 0x9d, 0x3c, 0xbf, 0x35, 0xc3, 0xbd,
+    0x5b, 0x0e, 0x70, 0xbf, 0xe9, 0xf4, 0xd5, 0x3b, 0x60, 0x9b, 0xec, 0x3d,
+    0x8b, 0x75, 0x23, 0xbc, 0x17, 0x03, 0x84, 0xbe, 0x99, 0x04, 0xd0, 0x3c,
+    0xdd, 0x01, 0x08, 0xbe, 0x82, 0xd5, 0x75, 0xbd, 0x05, 0xaa, 0xec, 0x3c,
+    0xb9, 0x4d, 0x45, 0x3d, 0xa3, 0x11, 0x69, 0xbb, 0xa3, 0xb0, 0x50, 0x3e,
+    0x7a, 0x5f, 0xaa, 0xbd, 0x6a, 0x73, 0xbe, 0xbd, 0x91, 0x25, 0xa9, 0xbd,
+    0x0f, 0x8e, 0xe0, 0xbd, 0x50, 0x51, 0x8f, 0x3c, 0xf4, 0x7d, 0xb9, 0x3d,
+    0xa2, 0x11, 0x50, 0x3d, 0x3a, 0xb5, 0x32, 0x3e, 0xe1, 0x28, 0x87, 0x3e,
+    0x44, 0x83, 0x09, 0x3e, 0xc3, 0x5f, 0x0a, 0xbe, 0xc4, 0xb8, 0x0f, 0xbe,
+    0xaa, 0xb2, 0xab, 0xbd, 0x93, 0x40, 0x5c, 0xbd, 0x35, 0xf0, 0x19, 0xbd,
+    0x4a, 0xa8, 0x02, 0x3b, 0x3c, 0x51, 0x1a, 0x3c, 0xbe, 0x2d, 0xdd, 0xbb,
+    0x55, 0x5d, 0xc3, 0x3d, 0x10, 0x6f, 0x7c, 0xbd, 0x62, 0xf7, 0x45, 0xbe,
+    0xd5, 0xda, 0xe1, 0x3d, 0x25, 0xd5, 0x13, 0x3e, 0xf0, 0xd6, 0xea, 0xbd,
+    0x62, 0x2b, 0x56, 0xbd, 0x80, 0x0c, 0xb1, 0x3d, 0x19, 0xbe, 0xa5, 0x3d,
+    0x3e, 0xc3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6e, 0x76,
+    0x32, 0x64, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x9e, 0xc3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x64,
+    0x65, 0x6e, 0x73, 0x65, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00,
+    0xac, 0xc2, 0xff, 0xff, 0xde, 0xc3, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x63,
+    0x6f, 0x6e, 0x76, 0x32, 0x64, 0x5f, 0x31, 0x2f, 0x52, 0x65, 0x6c, 0x75,
+    0x00, 0x00, 0x00, 0x00, 0xf8, 0xc2, 0xff, 0xff, 0x2a, 0xc4, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x2f, 0x6d, 0x61, 0x78, 0x5f, 0x70, 0x6f, 0x6f, 0x6c, 0x69,
+    0x6e, 0x67, 0x32, 0x64, 0x5f, 0x31, 0x2f, 0x4d, 0x61, 0x78, 0x50, 0x6f,
+    0x6f, 0x6c, 0x00, 0x00, 0x4c, 0xc3, 0xff, 0xff, 0x7e, 0xc4, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x30, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
+    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65,
+    0x5f, 0x31, 0x2f, 0x42, 0x69, 0x61, 0x73, 0x41, 0x64, 0x64, 0x00, 0x00,
+    0x90, 0xc3, 0xff, 0xff, 0xc2, 0xc4, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79,
+    0x00, 0x00, 0x00, 0x00, 0xc4, 0xc3, 0xff, 0xff, 0xf6, 0xc4, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0xe0, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
+    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65,
+    0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,
+    0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,
+    0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,
+    0x20, 0xc4, 0xff, 0xff, 0x00, 0x38, 0x00, 0x00, 0x91, 0x78, 0x9e, 0x3d,
+    0x02, 0x6e, 0x72, 0x3e, 0xaf, 0x5f, 0x65, 0xbc, 0x83, 0x89, 0xa5, 0x3e,
+    0x99, 0x1e, 0xf5, 0x3d, 0xb4, 0x02, 0x92, 0x3d, 0xaf, 0x31, 0x96, 0x3d,
+    0x44, 0x77, 0xa2, 0xbe, 0xf0, 0x3c, 0x73, 0xbe, 0x0f, 0xec, 0x35, 0x3f,
+    0x47, 0xd1, 0x20, 0xbc, 0xae, 0x8d, 0x48, 0xbe, 0xce, 0xcc, 0x3d, 0x3e,
+    0xad, 0x49, 0x78, 0x3e, 0x11, 0x2e, 0x82, 0xbd, 0xa7, 0xf3, 0x7e, 0x3d,
+    0x7b, 0xea, 0x7a, 0x3d, 0xd1, 0xe5, 0x1f, 0x3e, 0x92, 0x8c, 0x7a, 0x3d,
+    0xe8, 0x22, 0x46, 0xbe, 0xe4, 0x5c, 0x24, 0x3e, 0xa2, 0x0d, 0x6b, 0x3c,
+    0xfb, 0x04, 0x21, 0xbd, 0x1c, 0x6e, 0xd1, 0xbe, 0xd5, 0xc6, 0xd9, 0xbc,
+    0xb6, 0xe8, 0xdf, 0x3d, 0xd8, 0x73, 0x09, 0x3d, 0xcb, 0x45, 0xb1, 0xbe,
+    0xda, 0x6a, 0x0e, 0x3d, 0x40, 0xbe, 0xef, 0xbc, 0xe4, 0xbb, 0xcb, 0xbd,
+    0xf6, 0x35, 0x30, 0x3d, 0x25, 0x3a, 0x56, 0xbe, 0x1f, 0x35, 0x0a, 0x3d,
+    0x95, 0x31, 0x21, 0x3d, 0xde, 0xaa, 0x54, 0xbe, 0x8d, 0x0a, 0x6b, 0x3e,
+    0xd5, 0x70, 0x02, 0xbd, 0xdc, 0x18, 0xaa, 0x3c, 0x2a, 0x0c, 0x79, 0xbe,
+    0xee, 0xc5, 0x04, 0x3b, 0x2c, 0xb9, 0xbe, 0x3d, 0x0f, 0x55, 0x82, 0xbc,
+    0x94, 0xf6, 0x00, 0xbf, 0x0a, 0xa2, 0x02, 0xbe, 0xa3, 0x2b, 0x58, 0xbd,
+    0x09, 0x4f, 0xd3, 0xbd, 0x57, 0x98, 0x36, 0xbe, 0xcd, 0xed, 0x81, 0xbe,
+    0x78, 0x4d, 0x3b, 0xbd, 0xa1, 0xf9, 0xdc, 0xbd, 0x18, 0xc4, 0x29, 0xbd,
+    0xf5, 0x6d, 0xb2, 0x3e, 0x43, 0x7b, 0x53, 0x3e, 0x2b, 0x6a, 0x69, 0x3c,
+    0xec, 0x2e, 0x13, 0xbf, 0x6a, 0x0d, 0x2c, 0xbe, 0x3d, 0xe3, 0x32, 0x3e,
+    0xf4, 0x41, 0x39, 0x3d, 0x48, 0xd3, 0x49, 0xbe, 0x7f, 0x25, 0x9a, 0xbe,
+    0xd3, 0x36, 0x0b, 0xbe, 0xa5, 0xa3, 0x89, 0xbd, 0x09, 0x30, 0xe5, 0xbd,
+    0x13, 0x17, 0x83, 0xbe, 0x1a, 0x4c, 0xc4, 0xbc, 0x81, 0x1e, 0x67, 0xbe,
+    0x82, 0x77, 0xdf, 0xbd, 0x02, 0x7d, 0x33, 0x3e, 0xd3, 0x35, 0x02, 0x3e,
+    0x8a, 0xc0, 0x90, 0x3c, 0x8b, 0xd0, 0x95, 0xbe, 0x2a, 0x67, 0x6f, 0xbe,
+    0xf3, 0xf4, 0x20, 0x3e, 0x01, 0x28, 0xba, 0x3c, 0x55, 0x65, 0xaa, 0xbd,
+    0x76, 0x1d, 0x90, 0xbd, 0xa5, 0x37, 0xce, 0x3d, 0x8f, 0xd7, 0x80, 0xbd,
+    0x02, 0xea, 0x7d, 0xbc, 0xd1, 0xff, 0xe6, 0xbd, 0x96, 0xb5, 0xa0, 0x3d,
+    0xea, 0xb0, 0x90, 0xbe, 0x9e, 0xed, 0x99, 0xbd, 0x2e, 0xee, 0xd9, 0x3d,
+    0xe9, 0xf4, 0xb5, 0x3e, 0xa2, 0xb4, 0xfe, 0x3d, 0xe5, 0x4b, 0x30, 0x3d,
+    0x07, 0xf3, 0x58, 0xbe, 0x29, 0xa8, 0x2a, 0x3b, 0xf6, 0x0c, 0x40, 0x3e,
+    0xb9, 0x87, 0xc0, 0xbc, 0xf3, 0x12, 0x67, 0x3d, 0xd7, 0x33, 0x82, 0xbd,
+    0xba, 0x47, 0x15, 0xbd, 0x64, 0xca, 0x29, 0x3e, 0xca, 0x70, 0x5d, 0x3e,
+    0x9c, 0xa8, 0xcb, 0x3d, 0xbe, 0xe3, 0xaf, 0xbd, 0xaf, 0x93, 0x2c, 0xbe,
+    0x07, 0x36, 0xb5, 0x3e, 0xfc, 0xff, 0x34, 0x3e, 0x71, 0x9a, 0xbb, 0xbd,
+    0xa8, 0x92, 0x4b, 0x3e, 0xb1, 0x22, 0x29, 0xbe, 0xce, 0x5d, 0x3e, 0x3e,
+    0xda, 0xca, 0x42, 0x3e, 0x20, 0x1a, 0x58, 0x3d, 0x5c, 0x0a, 0x0c, 0x3d,
+    0xe2, 0xff, 0xf0, 0x3d, 0x79, 0xfd, 0x0c, 0x3e, 0x69, 0x5f, 0x03, 0x3e,
+    0x66, 0xd3, 0x2e, 0xbb, 0x3a, 0x63, 0x64, 0x3c, 0x10, 0x2f, 0x48, 0xbe,
+    0xc5, 0xa7, 0x47, 0xbe, 0xda, 0x5a, 0x97, 0x3e, 0xc1, 0x6e, 0xcd, 0xbc,
+    0x9a, 0x9c, 0x51, 0xbd, 0x31, 0x27, 0xdd, 0x3d, 0x0b, 0xb2, 0x80, 0xbd,
+    0xbf, 0x75, 0xa7, 0xbc, 0xd5, 0x65, 0x2f, 0x3e, 0xc4, 0x0d, 0x1b, 0x3e,
+    0xcf, 0x7f, 0xf2, 0x3d, 0x73, 0xc7, 0xf2, 0x3d, 0x69, 0x2e, 0x98, 0xbb,
+    0xa8, 0x5b, 0xa8, 0x3d, 0xfd, 0xb0, 0xbf, 0xbd, 0xa3, 0x49, 0xfc, 0xbd,
+    0xad, 0xf5, 0x02, 0xbe, 0x60, 0x1e, 0x26, 0xbe, 0x1d, 0x96, 0x3d, 0x3e,
+    0xf7, 0x23, 0x2c, 0x3e, 0x44, 0x1b, 0x86, 0x3d, 0x88, 0x56, 0x48, 0xbd,
+    0xad, 0xf6, 0xee, 0x3c, 0x0d, 0x81, 0x13, 0x3d, 0xd0, 0x76, 0x09, 0x3e,
+    0x49, 0x83, 0x83, 0xbd, 0x50, 0xd6, 0x79, 0xbe, 0x8c, 0x17, 0x4f, 0x3d,
+    0xec, 0xe5, 0x90, 0x3d, 0x1e, 0x19, 0x4f, 0x3d, 0x1f, 0x3c, 0x9f, 0xbd,
+    0xe5, 0x47, 0x4b, 0xbe, 0x33, 0xf0, 0x14, 0xbe, 0x58, 0xbf, 0x21, 0x3d,
+    0xd2, 0x8c, 0x42, 0x3e, 0x31, 0xe6, 0x9a, 0x3d, 0xf9, 0x4e, 0xab, 0xbd,
+    0x6f, 0x46, 0x1f, 0xbe, 0x9e, 0xf1, 0x21, 0x3d, 0x04, 0x72, 0xfb, 0x3d,
+    0x29, 0xca, 0x24, 0x3e, 0x32, 0x01, 0xa1, 0xbe, 0x07, 0x9b, 0x45, 0xbe,
+    0xf9, 0x09, 0xc5, 0x3d, 0xc9, 0x84, 0x44, 0xbd, 0xde, 0xb5, 0x68, 0xbd,
+    0x0a, 0xf6, 0x3e, 0xbe, 0x78, 0x6e, 0xbc, 0xbd, 0x03, 0xf8, 0x38, 0xbd,
+    0xe9, 0xf6, 0x17, 0xbd, 0x1a, 0x19, 0x3b, 0x3e, 0x43, 0xb1, 0xdb, 0x3c,
+    0xc5, 0x5b, 0x1e, 0xbb, 0xcc, 0x9b, 0x00, 0xbe, 0x01, 0xe4, 0xe4, 0xba,
+    0xe5, 0x8d, 0x26, 0x3e, 0x4b, 0x09, 0x0a, 0xbc, 0x50, 0x4e, 0xe0, 0xbe,
+    0xe3, 0x93, 0xf3, 0xbc, 0xe8, 0xe9, 0x20, 0x3d, 0x23, 0xa7, 0xe2, 0x3c,
+    0xe2, 0x05, 0xa7, 0x3d, 0xd4, 0xda, 0x29, 0xbd, 0xb3, 0x43, 0xa7, 0xbc,
+    0x28, 0x61, 0x0d, 0xbd, 0x7e, 0x55, 0xa7, 0x3d, 0x5f, 0x27, 0x3f, 0x3e,
+    0x12, 0x19, 0xca, 0x3b, 0xc9, 0x89, 0x0b, 0xbd, 0x57, 0x99, 0x33, 0xbd,
+    0x61, 0x8f, 0xda, 0xbc, 0x6a, 0x54, 0x5a, 0x3e, 0x31, 0xeb, 0x2b, 0x3d,
+    0x8c, 0x95, 0x97, 0xbe, 0x5b, 0x2d, 0x85, 0x3e, 0x49, 0x3f, 0xf4, 0xbc,
+    0x20, 0xbb, 0x62, 0x3c, 0x01, 0x69, 0xae, 0xbd, 0xe1, 0x2c, 0x43, 0xbe,
+    0xe9, 0x5d, 0x84, 0x3d, 0xb3, 0x61, 0x17, 0x3e, 0x47, 0x07, 0x95, 0xbc,
+    0xcd, 0x7c, 0x87, 0x3e, 0xd9, 0xb3, 0x03, 0x3d, 0x1c, 0x7e, 0x15, 0x3d,
+    0xe1, 0x0b, 0xb0, 0xbd, 0x23, 0xfe, 0x94, 0x3c, 0xf0, 0x36, 0xd7, 0x3d,
+    0x9e, 0x2b, 0x82, 0x3c, 0x78, 0x43, 0x9b, 0xbe, 0xf9, 0x5d, 0x0c, 0xbe,
+    0x07, 0x45, 0xda, 0x3d, 0x79, 0x36, 0x19, 0x3d, 0x49, 0xff, 0xbc, 0x3c,
+    0xd6, 0x6e, 0xec, 0xbe, 0x6c, 0xb2, 0xd5, 0x3c, 0x2a, 0xb0, 0x92, 0x3b,
+    0x45, 0x87, 0x3a, 0x3e, 0xd3, 0xe5, 0xb8, 0xbd, 0x92, 0x1a, 0x2e, 0x3c,
+    0x9b, 0x33, 0x3c, 0x3e, 0x0f, 0x3d, 0xa8, 0xbe, 0x53, 0x7c, 0xa6, 0x3d,
+    0x3b, 0x9e, 0x98, 0xbe, 0x96, 0x91, 0xd6, 0x3c, 0x71, 0x5b, 0x99, 0xbe,
+    0x73, 0x0b, 0x04, 0x3e, 0xfa, 0x8a, 0xc0, 0x3d, 0x7f, 0x1b, 0xdd, 0x3d,
+    0xe4, 0x01, 0x84, 0xbd, 0xcf, 0x63, 0xdb, 0xbd, 0xda, 0x5d, 0x8d, 0xbd,
+    0x44, 0xe1, 0x46, 0xbd, 0x65, 0x6c, 0x05, 0xbe, 0x0a, 0x83, 0xb1, 0xbd,
+    0x97, 0x4a, 0x59, 0xbe, 0x77, 0x26, 0xa7, 0x3d, 0x0d, 0x22, 0xea, 0xbd,
+    0x70, 0x48, 0x14, 0xbe, 0x01, 0x31, 0x04, 0x3e, 0xe0, 0x5e, 0xb8, 0xbd,
+    0xd3, 0xe3, 0xee, 0xbd, 0x4e, 0x6f, 0xc4, 0xbc, 0x2f, 0xab, 0x53, 0x3d,
+    0xd2, 0x79, 0x2c, 0xbe, 0xea, 0x5e, 0xdb, 0xbd, 0x02, 0x40, 0x4d, 0xbd,
+    0xcf, 0x47, 0x5d, 0xbd, 0x1e, 0x48, 0x97, 0xbd, 0x7c, 0x3b, 0xca, 0x3d,
+    0x75, 0x1d, 0x43, 0xbe, 0xb7, 0xab, 0x86, 0x3b, 0xfa, 0x51, 0xe2, 0x3b,
+    0xcc, 0x0c, 0x1a, 0xbe, 0xda, 0x56, 0xc0, 0x3d, 0xd2, 0xa5, 0x6b, 0xbd,
+    0x46, 0xe8, 0x27, 0xbe, 0x95, 0x71, 0x4e, 0xbe, 0x78, 0xda, 0xb0, 0x3d,
+    0xec, 0xfd, 0x31, 0xbe, 0x5f, 0xb5, 0x44, 0xbe, 0x2b, 0x48, 0x06, 0xbe,
+    0x28, 0x5c, 0xf4, 0xbd, 0x1a, 0xb1, 0xa3, 0x3c, 0x77, 0xd6, 0xef, 0xbd,
+    0xec, 0xe1, 0x93, 0xbd, 0x85, 0xb2, 0xcd, 0xbd, 0xf8, 0x0b, 0x52, 0xbd,
+    0x16, 0x95, 0xd7, 0x3c, 0xb2, 0x00, 0x29, 0x3c, 0x42, 0x8c, 0xb6, 0x3d,
+    0xa8, 0x79, 0x1f, 0xbe, 0xa5, 0xfe, 0xe8, 0xbd, 0x28, 0x30, 0xb8, 0x3d,
+    0xb8, 0x23, 0x9e, 0x3d, 0x7f, 0xe1, 0x33, 0xbe, 0x2b, 0xf8, 0x3f, 0xbe,
+    0x05, 0x8c, 0x70, 0xbd, 0x30, 0x32, 0xe0, 0xbd, 0xff, 0xd3, 0x45, 0xbe,
+    0x29, 0x82, 0x33, 0xbc, 0x2b, 0x86, 0x13, 0xbe, 0x0b, 0x81, 0x07, 0xbd,
+    0xb1, 0xd4, 0xa8, 0x3c, 0x42, 0xd6, 0x2d, 0xbc, 0xfc, 0x19, 0x33, 0xba,
+    0xd5, 0xf7, 0x29, 0xbe, 0xff, 0xb9, 0x18, 0xbe, 0x34, 0x96, 0x36, 0xbe,
+    0x8d, 0x80, 0xcc, 0xbd, 0x55, 0x1f, 0xe9, 0xbc, 0xa1, 0xdd, 0x69, 0xbe,
+    0xd3, 0x86, 0xb4, 0x3c, 0x3a, 0xc2, 0x0f, 0xbe, 0xc0, 0x63, 0xcd, 0xbc,
+    0xcb, 0xf8, 0xcf, 0xbd, 0x45, 0x7f, 0x5f, 0x3d, 0x95, 0x59, 0xbd, 0x3d,
+    0x7b, 0x9c, 0xf0, 0xbd, 0x57, 0xaf, 0xfb, 0x3c, 0xad, 0x44, 0xaf, 0xbd,
+    0xa5, 0xf3, 0xbc, 0xbd, 0xb4, 0xe1, 0x59, 0xbd, 0xa6, 0x28, 0x29, 0x3d,
+    0xcb, 0x8b, 0x50, 0xbe, 0x20, 0x85, 0x95, 0xbd, 0x33, 0xcf, 0xfa, 0xbb,
+    0xde, 0xfc, 0x1c, 0x3d, 0x91, 0xb6, 0x43, 0xbe, 0x54, 0x84, 0xaf, 0xbd,
+    0xdc, 0xde, 0x04, 0xbe, 0x69, 0xc6, 0x19, 0xbe, 0x43, 0xcf, 0x23, 0xbd,
+    0x77, 0x3b, 0x58, 0xbe, 0x50, 0x09, 0x50, 0xbd, 0x17, 0xa2, 0x2d, 0xbd,
+    0xe0, 0xad, 0xb5, 0xba, 0x47, 0x9d, 0xcc, 0x3d, 0x06, 0x72, 0xe3, 0x3d,
+    0x92, 0x81, 0x9f, 0x3c, 0x55, 0x1d, 0x06, 0xbe, 0xa0, 0x79, 0x9c, 0x3c,
+    0xe1, 0xec, 0xe6, 0xbd, 0x63, 0x0c, 0x9a, 0xbd, 0xc1, 0x82, 0x5e, 0xbe,
+    0x01, 0x4c, 0x38, 0xbe, 0x64, 0x06, 0x52, 0xbd, 0xd1, 0x54, 0x08, 0xbe,
+    0x0c, 0xb8, 0xc2, 0x3d, 0x5a, 0xd2, 0xb4, 0x3d, 0x84, 0xcb, 0x24, 0xbe,
+    0x80, 0xb4, 0x8f, 0x3c, 0x84, 0x69, 0x0c, 0xbe, 0x3d, 0xda, 0x05, 0xbe,
+    0x4c, 0x48, 0x0c, 0x3e, 0xdc, 0x35, 0xcf, 0xbc, 0x80, 0x1b, 0x0b, 0xbe,
+    0xaa, 0x3a, 0x9c, 0xbc, 0x21, 0xd4, 0x83, 0x3d, 0x26, 0x6e, 0xe8, 0x3d,
+    0xe4, 0x41, 0x28, 0x3d, 0x88, 0x35, 0x1d, 0xbe, 0x2c, 0xfb, 0xb2, 0x3d,
+    0xe4, 0xbb, 0x0a, 0xbe, 0x95, 0x00, 0xfe, 0xbd, 0x3d, 0x87, 0x89, 0x3c,
+    0x19, 0x9f, 0xa0, 0xbc, 0xac, 0xce, 0x6f, 0xbd, 0x16, 0x48, 0x21, 0xbe,
+    0xd8, 0x21, 0x13, 0xbe, 0x15, 0x49, 0xfc, 0xbd, 0x6c, 0x10, 0x31, 0x3e,
+    0x93, 0x04, 0xa2, 0xbd, 0xbc, 0xce, 0xbe, 0xbd, 0x59, 0xce, 0x51, 0xbd,
+    0xd6, 0xf1, 0x60, 0x3d, 0x3a, 0x92, 0x76, 0xbd, 0xb8, 0xef, 0x66, 0x3b,
+    0x26, 0x2c, 0x8e, 0x3d, 0xf1, 0xff, 0x1e, 0xbe, 0xc2, 0x6f, 0x26, 0x3d,
+    0xe7, 0xb2, 0x4e, 0xbe, 0x31, 0x1e, 0xc4, 0xbd, 0x2f, 0x0a, 0x81, 0x3c,
+    0xb9, 0x73, 0xea, 0xbd, 0x41, 0xdf, 0x4b, 0xbd, 0x9d, 0x47, 0x88, 0xbd,
+    0xab, 0x21, 0x68, 0xbd, 0x77, 0x20, 0xf9, 0xbc, 0x40, 0x55, 0xaa, 0x3d,
+    0x26, 0xe1, 0xf9, 0xbd, 0x97, 0xbe, 0xdd, 0x3d, 0x57, 0xe8, 0x91, 0xbd,
+    0x00, 0xbb, 0x3e, 0xbe, 0x22, 0x51, 0x91, 0xbd, 0x6b, 0xe6, 0xa1, 0x3d,
+    0x7d, 0xf1, 0xa6, 0x3d, 0xa9, 0x89, 0x86, 0x3d, 0x57, 0x91, 0xef, 0xbd,
+    0xcb, 0x06, 0x01, 0xba, 0x9d, 0xc0, 0x4a, 0x3d, 0x71, 0xaf, 0x35, 0xbe,
+    0x01, 0x3d, 0x16, 0xbc, 0x01, 0xa4, 0x81, 0xbd, 0xa4, 0xf6, 0x2e, 0xbd,
+    0xb7, 0xe9, 0x0d, 0xbd, 0x94, 0xef, 0x26, 0xbe, 0xee, 0x31, 0x20, 0xbe,
+    0x43, 0x8a, 0x30, 0x3d, 0x09, 0xa3, 0xb1, 0xbd, 0x20, 0xb8, 0x11, 0x3c,
+    0x55, 0x7c, 0x1e, 0x3d, 0xbd, 0x60, 0x4f, 0xbe, 0x05, 0x06, 0xa3, 0x3c,
+    0x92, 0x48, 0xfa, 0xbb, 0x6a, 0x53, 0x0a, 0xbe, 0xd5, 0x01, 0x19, 0xbc,
+    0x69, 0xf6, 0x2a, 0xbe, 0xf9, 0xbe, 0x08, 0xbe, 0x4b, 0x17, 0x49, 0x3c,
+    0xb4, 0x10, 0x79, 0x3d, 0x4f, 0xb1, 0xf2, 0xbc, 0xc4, 0x6b, 0x8a, 0x3c,
+    0x6c, 0xa7, 0x35, 0xbe, 0xe2, 0xfb, 0xe0, 0xbd, 0xf3, 0xc0, 0x2a, 0xbd,
+    0xe6, 0x47, 0xbd, 0xbd, 0xc3, 0x30, 0x66, 0xbe, 0xfb, 0x2d, 0x35, 0x3d,
+    0x13, 0xd6, 0xad, 0xbd, 0x7f, 0xd2, 0x01, 0xbe, 0x9e, 0xe1, 0x57, 0xbd,
+    0x8c, 0x02, 0xe2, 0x3c, 0x21, 0x90, 0x11, 0xbe, 0x56, 0x8f, 0xab, 0x3d,
+    0xba, 0x5b, 0xdc, 0x3d, 0xaa, 0x5e, 0x77, 0xbe, 0x1c, 0xc9, 0x64, 0x3d,
+    0xfa, 0xf6, 0xd4, 0x3b, 0x72, 0x3d, 0x4a, 0x3d, 0x8c, 0xd5, 0x34, 0xbe,
+    0x32, 0x30, 0xa8, 0x3b, 0x60, 0x0c, 0x8e, 0x3c, 0x7b, 0xc7, 0x30, 0x3d,
+    0x86, 0x51, 0xb9, 0xbb, 0xed, 0x50, 0x0e, 0x3e, 0xb3, 0x70, 0x8a, 0xbc,
+    0xc6, 0x3a, 0x1d, 0xbe, 0x77, 0x4d, 0x09, 0xbe, 0xb7, 0x5b, 0x39, 0xbd,
+    0x23, 0xc9, 0x94, 0x3d, 0x8c, 0x6b, 0x7d, 0xbd, 0xc7, 0x7e, 0x45, 0xbe,
+    0xf7, 0x39, 0xb8, 0xbd, 0x22, 0x46, 0x41, 0xbe, 0x9c, 0xcc, 0x64, 0x3c,
+    0x97, 0xae, 0x94, 0xbd, 0xf9, 0x00, 0x8e, 0xbd, 0x34, 0xd3, 0xae, 0xbd,
+    0x95, 0x7e, 0x4c, 0x3d, 0x16, 0x3f, 0x81, 0x3d, 0x77, 0x7e, 0x9b, 0xbc,
+    0x47, 0x7b, 0x87, 0xbc, 0xb8, 0xc1, 0x14, 0xbe, 0x48, 0x64, 0xff, 0xbd,
+    0x41, 0x09, 0xe2, 0xbc, 0xcb, 0x02, 0x2d, 0xbd, 0x52, 0x57, 0x26, 0xba,
+    0x2b, 0x92, 0x83, 0xbd, 0x12, 0x88, 0x81, 0xbe, 0x11, 0x87, 0xe4, 0x3d,
+    0xf6, 0x25, 0x51, 0xbe, 0xd5, 0x2d, 0xe9, 0xbd, 0xca, 0xc7, 0x6b, 0xbe,
+    0x20, 0x33, 0x42, 0x3c, 0xfd, 0x3b, 0x54, 0xbe, 0xcc, 0x6d, 0x18, 0xbd,
+    0x50, 0x31, 0x1f, 0xbe, 0x15, 0x5a, 0x48, 0x3e, 0x6a, 0xa8, 0x1e, 0x3e,
+    0x1c, 0x72, 0x9d, 0xbe, 0xc2, 0xcf, 0x19, 0x3e, 0xda, 0x99, 0x3e, 0x3d,
+    0x33, 0x9c, 0x84, 0xbf, 0xc3, 0xf1, 0x19, 0x3d, 0x3f, 0xf7, 0x24, 0xbd,
+    0x29, 0x2a, 0xf7, 0x3e, 0x48, 0xf5, 0x48, 0xbe, 0xf4, 0xbc, 0xf4, 0xbd,
+    0xed, 0x44, 0x7f, 0xbd, 0x3b, 0x94, 0x8a, 0x3e, 0xcd, 0x23, 0x5d, 0xbc,
+    0x58, 0xdb, 0x8b, 0x3e, 0xe7, 0x74, 0xac, 0xbe, 0x6e, 0x53, 0x84, 0xbd,
+    0x18, 0x4a, 0x4a, 0x3e, 0x96, 0x8c, 0xf1, 0x3d, 0xae, 0xc0, 0x5b, 0xbc,
+    0x92, 0x87, 0x52, 0x3e, 0x51, 0xcd, 0x55, 0xbd, 0x2d, 0x96, 0x0f, 0x3d,
+    0xba, 0xee, 0x95, 0xbc, 0x79, 0xf2, 0x32, 0x3e, 0x9a, 0x6c, 0xd8, 0xbe,
+    0x67, 0xfd, 0x06, 0xbe, 0x47, 0x20, 0xd5, 0xbc, 0x67, 0x20, 0xf5, 0x3d,
+    0xb6, 0x84, 0xbb, 0x3b, 0x20, 0x91, 0x3a, 0x3e, 0x86, 0x8c, 0x2b, 0xbe,
+    0x94, 0x2b, 0xeb, 0x3d, 0x57, 0xbd, 0x17, 0x3e, 0x8f, 0x5f, 0x18, 0xbc,
+    0x7d, 0x8f, 0x65, 0xbd, 0x99, 0x37, 0xc3, 0x3e, 0x04, 0x8c, 0xa8, 0xbd,
+    0x8a, 0x8d, 0xd3, 0x3d, 0xdc, 0x19, 0xa9, 0xbd, 0x92, 0x13, 0x84, 0x3d,
+    0x91, 0xb8, 0xa3, 0xbe, 0x7b, 0x31, 0x87, 0xbd, 0x5b, 0xf4, 0x29, 0xbb,
+    0x99, 0x9a, 0x44, 0x3e, 0x7a, 0x99, 0x26, 0xbd, 0xe1, 0xd1, 0x03, 0x3e,
+    0x37, 0xac, 0xa1, 0x3c, 0x46, 0xe3, 0x0d, 0x3e, 0xcc, 0xad, 0x96, 0x3d,
+    0x34, 0xab, 0xf9, 0xbd, 0xcb, 0x7e, 0x36, 0xbe, 0x75, 0xa7, 0x8c, 0x3e,
+    0x65, 0x58, 0x41, 0xbe, 0x12, 0x45, 0xa3, 0xba, 0xd5, 0x51, 0xe2, 0xbc,
+    0xac, 0x2c, 0xc8, 0xbc, 0x8a, 0x1d, 0x70, 0xbe, 0x58, 0xb0, 0x65, 0x3c,
+    0x00, 0x29, 0xdc, 0x3d, 0xf7, 0x94, 0x9d, 0x3e, 0x24, 0xfa, 0x84, 0xbd,
+    0xa0, 0x06, 0xfe, 0x3d, 0x84, 0x08, 0x10, 0xbd, 0xf0, 0x0a, 0xc5, 0x3d,
+    0xd4, 0xf2, 0xd3, 0x3c, 0xfd, 0xa3, 0xd5, 0xbd, 0xac, 0x95, 0x4e, 0xbb,
+    0x0a, 0x6d, 0x99, 0x3e, 0x5a, 0x84, 0x1d, 0x3c, 0x56, 0x76, 0x8c, 0xbc,
+    0xa3, 0xff, 0xa8, 0xbc, 0xb2, 0x9d, 0x4b, 0x3c, 0xe3, 0x87, 0x8b, 0xbe,
+    0x30, 0xe9, 0xe6, 0xbd, 0x97, 0xf3, 0xef, 0xbc, 0x67, 0x40, 0x9f, 0x3e,
+    0x7e, 0x95, 0x9c, 0xbd, 0xa1, 0xd7, 0xf4, 0x3d, 0x14, 0x05, 0x44, 0xbd,
+    0x32, 0x50, 0x40, 0x3e, 0x7f, 0x4f, 0x0e, 0xbe, 0x24, 0xb4, 0x35, 0xbd,
+    0xbb, 0x01, 0x13, 0xbe, 0x75, 0x97, 0x72, 0x3e, 0x72, 0xb5, 0xc4, 0xbc,
+    0x2d, 0x03, 0xa3, 0xbe, 0x30, 0x9c, 0x85, 0xbd, 0xe9, 0x8a, 0xdd, 0x3d,
+    0x66, 0x85, 0xe1, 0xbd, 0x00, 0x78, 0x16, 0xbe, 0xa6, 0xe0, 0x5d, 0xbd,
+    0x39, 0xa7, 0x61, 0x3e, 0x40, 0xe9, 0xfa, 0xbd, 0x03, 0x1a, 0x78, 0x3e,
+    0xae, 0x8a, 0x10, 0xbe, 0xff, 0x69, 0x73, 0x3d, 0x83, 0xc1, 0xd1, 0xbd,
+    0xe9, 0xdc, 0x01, 0xbe, 0xef, 0xa7, 0x5f, 0x3d, 0x1d, 0xe3, 0x3f, 0x3e,
+    0xe2, 0x74, 0x36, 0x3d, 0xda, 0xb4, 0x5d, 0xbe, 0xdf, 0x67, 0x56, 0xbd,
+    0x3b, 0xe8, 0xca, 0x3d, 0xdb, 0x14, 0x21, 0xbe, 0x26, 0x0e, 0x21, 0xbe,
+    0x70, 0xee, 0xce, 0xbd, 0xce, 0xd1, 0x8d, 0x3e, 0xf7, 0x98, 0xdb, 0xbd,
+    0x76, 0xd8, 0x78, 0x3d, 0xd9, 0xc5, 0x25, 0xbe, 0x7b, 0x1e, 0x97, 0x3d,
+    0x36, 0x31, 0x11, 0xbe, 0x1b, 0x15, 0x09, 0xbe, 0x20, 0xa6, 0x0b, 0xbb,
+    0x25, 0xa1, 0xa0, 0x3e, 0x1b, 0xb2, 0xbb, 0xbd, 0x6d, 0x78, 0x9f, 0xbe,
+    0xdf, 0xfb, 0x6a, 0xbd, 0xae, 0xdc, 0xc3, 0xbd, 0xb2, 0xe1, 0x8a, 0xbe,
+    0x5a, 0x86, 0x90, 0xbd, 0x36, 0x2c, 0x91, 0xbd, 0xa1, 0x4f, 0x8b, 0x3e,
+    0xef, 0x10, 0x11, 0xbd, 0x4e, 0xcc, 0xa8, 0x3d, 0x54, 0xf0, 0x7b, 0xbe,
+    0x28, 0x40, 0x60, 0x3d, 0xa8, 0x9b, 0x00, 0x3d, 0xcf, 0xce, 0x28, 0xbd,
+    0x3c, 0x31, 0x48, 0x3d, 0x41, 0xa1, 0xa4, 0x3e, 0xa7, 0x2a, 0x8b, 0xbe,
+    0xdf, 0xd5, 0xf4, 0xbd, 0xac, 0xc1, 0xa2, 0x3b, 0xc4, 0x44, 0xcf, 0xbc,
+    0x4f, 0x37, 0x5b, 0xbe, 0x67, 0xc2, 0x4c, 0xbd, 0x8b, 0x10, 0xb2, 0x3c,
+    0x50, 0xe3, 0x26, 0x3e, 0xf8, 0x5e, 0xc2, 0x3d, 0x03, 0x12, 0x05, 0x3e,
+    0x62, 0xbd, 0x1f, 0xbe, 0x1c, 0xec, 0xfe, 0x3d, 0x6e, 0x47, 0x50, 0x3d,
+    0x60, 0x32, 0x89, 0x3d, 0xac, 0x39, 0x92, 0xbd, 0x23, 0x38, 0x3c, 0x3e,
+    0x5f, 0x1e, 0x76, 0x3c, 0x94, 0xe2, 0x19, 0xbd, 0x7a, 0xd7, 0xc8, 0xbc,
+    0xd8, 0xe3, 0x91, 0x3b, 0x0d, 0x26, 0x99, 0xbe, 0x2a, 0xad, 0x5d, 0xbe,
+    0x94, 0x8e, 0x89, 0x3c, 0x4f, 0x99, 0xe6, 0x3d, 0x38, 0xd7, 0x98, 0x3b,
+    0xe2, 0x9d, 0x12, 0x3e, 0xe8, 0xb1, 0x8e, 0xbe, 0x2c, 0x89, 0x28, 0x3e,
+    0x8f, 0xd0, 0x3e, 0x3e, 0x22, 0x07, 0x50, 0xbd, 0x01, 0x49, 0xb5, 0xbd,
+    0x06, 0x7e, 0x76, 0x3e, 0xaa, 0x8b, 0xa0, 0xbd, 0xa5, 0x43, 0x4a, 0xbc,
+    0xe5, 0x64, 0x84, 0xbc, 0x7c, 0xb1, 0x36, 0x3d, 0xae, 0x00, 0xc6, 0xbe,
+    0x7f, 0x17, 0x0a, 0xbe, 0xa0, 0x53, 0x3c, 0xbd, 0xb2, 0x43, 0xab, 0x3d,
+    0xf2, 0xff, 0xcd, 0x3d, 0x42, 0xde, 0xb6, 0x3e, 0x36, 0x05, 0x07, 0xbf,
+    0x2d, 0x47, 0x54, 0xbd, 0x73, 0xa0, 0x21, 0x3e, 0xc1, 0x61, 0x05, 0x3e,
+    0x27, 0x6b, 0xcb, 0xbd, 0x9a, 0xe1, 0x5b, 0x3e, 0xfe, 0xdc, 0x33, 0xbd,
+    0x12, 0x05, 0x2f, 0x3e, 0x84, 0x79, 0x28, 0xbc, 0x3e, 0x68, 0x66, 0xbe,
+    0x7a, 0x0f, 0xb7, 0xbe, 0xcb, 0xff, 0xa6, 0xbe, 0xe1, 0x4c, 0x6c, 0x3d,
+    0x10, 0x0f, 0xbe, 0x3e, 0x90, 0xef, 0xd3, 0xbd, 0x20, 0x98, 0x8e, 0x3e,
+    0x3f, 0x83, 0xbb, 0xbe, 0x22, 0x0e, 0xc8, 0xbd, 0x5b, 0x94, 0x08, 0x3e,
+    0xf7, 0x96, 0x9b, 0x3d, 0x53, 0x44, 0x46, 0xbd, 0x31, 0x47, 0xbc, 0x3d,
+    0x4b, 0x9a, 0xee, 0xbd, 0x38, 0xf9, 0x35, 0x3d, 0xce, 0xb3, 0x3e, 0x3d,
+    0x9b, 0x95, 0x95, 0x3d, 0xcd, 0x6f, 0xbf, 0xbe, 0x13, 0x43, 0x6a, 0xbe,
+    0xa7, 0x4d, 0xe2, 0x3d, 0x76, 0xb4, 0x0f, 0x3f, 0x8e, 0x98, 0xa0, 0xbc,
+    0x26, 0x91, 0x33, 0xbe, 0xc6, 0x43, 0x7c, 0xbe, 0xbe, 0x98, 0xd5, 0xbc,
+    0x48, 0x72, 0x43, 0x3e, 0xf7, 0x74, 0x88, 0xbc, 0xc2, 0x58, 0xec, 0x3c,
+    0xb6, 0x16, 0xa7, 0x3d, 0x17, 0x8c, 0x39, 0x3e, 0x84, 0xf5, 0x00, 0xbe,
+    0xc4, 0xa8, 0xe2, 0x3d, 0x56, 0xc9, 0x22, 0x3e, 0xa1, 0x57, 0x96, 0xbe,
+    0x06, 0x1c, 0x97, 0xbd, 0xda, 0x24, 0x82, 0x3d, 0xb1, 0xaf, 0x1e, 0xbc,
+    0x4c, 0x3d, 0x7e, 0xbe, 0xca, 0xe6, 0xbc, 0x3d, 0xec, 0xd0, 0x93, 0xbd,
+    0x13, 0x35, 0xeb, 0xbd, 0xbb, 0x88, 0x0e, 0xbd, 0xeb, 0x2b, 0xf4, 0x3c,
+    0x13, 0x20, 0x46, 0x3e, 0x54, 0x9f, 0x78, 0xbd, 0x3d, 0x11, 0x44, 0xbd,
+    0xb2, 0x3d, 0xe3, 0xbd, 0x3d, 0x61, 0xa8, 0x3d, 0xa9, 0xf8, 0x10, 0xbd,
+    0xe2, 0xdc, 0x94, 0x3d, 0x14, 0x39, 0xde, 0xbd, 0x65, 0xa4, 0xde, 0xbd,
+    0x72, 0xdd, 0x92, 0xbd, 0xb3, 0x05, 0x53, 0xbd, 0xcf, 0x8d, 0x1d, 0x3d,
+    0x0a, 0x84, 0xa3, 0xbe, 0x5c, 0x03, 0x86, 0xbe, 0x16, 0xb6, 0xcb, 0xbe,
+    0x30, 0x14, 0xfd, 0xbe, 0xe8, 0xfe, 0x3f, 0x3d, 0xec, 0x02, 0xc4, 0xbd,
+    0x55, 0x4b, 0x99, 0x3c, 0xcb, 0xa5, 0x1e, 0xbd, 0xb4, 0x06, 0x82, 0x3d,
+    0x2a, 0xd8, 0x92, 0xbe, 0x49, 0xea, 0xa1, 0xbd, 0x25, 0x78, 0x7b, 0xbe,
+    0x34, 0xd6, 0xe8, 0xbd, 0xf3, 0x6a, 0x95, 0xbe, 0xde, 0x0c, 0xa1, 0xbd,
+    0x81, 0xac, 0x78, 0x3b, 0x55, 0xa5, 0xd6, 0xbe, 0xfb, 0xfa, 0x65, 0xbe,
+    0xd2, 0xa0, 0xb2, 0xbe, 0x01, 0x33, 0x4f, 0xbe, 0xc6, 0xa4, 0x8f, 0x3d,
+    0xb4, 0x35, 0x72, 0x3d, 0xfd, 0xfb, 0xc4, 0xbc, 0x8a, 0xc5, 0x02, 0xbc,
+    0x2e, 0x81, 0x1b, 0x3d, 0xf1, 0x88, 0x71, 0xbe, 0x10, 0xe7, 0x23, 0xbe,
+    0xe5, 0xd4, 0x9b, 0xbd, 0x7e, 0x92, 0x9c, 0xbd, 0x32, 0x5d, 0xa5, 0xbe,
+    0x3f, 0x12, 0x10, 0xbd, 0x06, 0xa8, 0xa4, 0xbc, 0x81, 0xe0, 0xb0, 0xbe,
+    0x1a, 0xef, 0x5d, 0xbe, 0x15, 0x9b, 0xca, 0xbe, 0xa3, 0xb4, 0x36, 0xbe,
+    0x80, 0x96, 0x54, 0xbd, 0x27, 0x41, 0xd4, 0xbc, 0xe1, 0x96, 0x7a, 0x3d,
+    0xb0, 0xf1, 0xa4, 0x3d, 0x2e, 0x9f, 0x37, 0x3d, 0x69, 0x19, 0xa0, 0xbe,
+    0xb0, 0xd8, 0xae, 0x3d, 0x42, 0x80, 0x0e, 0xbd, 0xbe, 0xbb, 0x8d, 0x3c,
+    0x1f, 0x71, 0x93, 0xbe, 0xe5, 0x13, 0xa0, 0xbc, 0x55, 0xb8, 0xcd, 0xbc,
+    0x55, 0xf0, 0xc0, 0x3b, 0xba, 0xe0, 0x2c, 0xbe, 0xa5, 0x38, 0xfc, 0xbe,
+    0x03, 0x08, 0xdc, 0xbd, 0x8c, 0x72, 0x14, 0xbd, 0xef, 0xd9, 0x67, 0x3c,
+    0xc9, 0x03, 0xe0, 0x3d, 0x69, 0xe2, 0xa0, 0x3d, 0xe1, 0x86, 0x06, 0x3d,
+    0xa4, 0x52, 0x90, 0xbe, 0xda, 0x56, 0x29, 0xbc, 0x2b, 0x9c, 0xbd, 0x3d,
+    0x12, 0xf7, 0xf3, 0xbc, 0x96, 0xad, 0x41, 0xbd, 0xb6, 0x4a, 0x10, 0xbd,
+    0x7a, 0xee, 0xb5, 0xbd, 0x88, 0x83, 0xaa, 0x3d, 0xae, 0x03, 0xbd, 0xbe,
+    0x4d, 0xaf, 0xe1, 0xbe, 0x32, 0x22, 0x4a, 0x3c, 0x6b, 0xa2, 0x90, 0xbd,
+    0x7e, 0x81, 0x95, 0xbd, 0xc7, 0xe1, 0xbc, 0x3d, 0x56, 0x42, 0x7e, 0xbd,
+    0xb4, 0xdb, 0xcb, 0x3d, 0xfe, 0x8e, 0x0e, 0xbf, 0x68, 0xe9, 0x60, 0x3e,
+    0xea, 0x83, 0xce, 0x3c, 0x04, 0x08, 0x6d, 0xbb, 0xff, 0xb2, 0x38, 0x3d,
+    0x26, 0xe2, 0x82, 0x3c, 0x71, 0x20, 0x10, 0xbe, 0x82, 0x64, 0x13, 0x3e,
+    0xa7, 0x1a, 0xc6, 0xbe, 0x3e, 0xe8, 0xc7, 0xbe, 0x30, 0x1e, 0xd8, 0x3d,
+    0x66, 0x87, 0x50, 0xbe, 0x5d, 0xbf, 0x4b, 0xbe, 0xf9, 0x9e, 0xb8, 0xbd,
+    0x22, 0x9e, 0x04, 0x3d, 0x89, 0x8f, 0x7a, 0x3d, 0x4a, 0xd9, 0x15, 0xbe,
+    0x4f, 0x77, 0x5e, 0x3e, 0xc0, 0x19, 0x08, 0x3d, 0xe0, 0xd6, 0x47, 0xbd,
+    0xfb, 0x2b, 0xb6, 0x3d, 0x64, 0xa3, 0xf1, 0x3c, 0x36, 0xee, 0xd1, 0xbd,
+    0x3c, 0x60, 0x60, 0x3d, 0x23, 0xae, 0x75, 0xbe, 0xc8, 0x00, 0x89, 0xbe,
+    0xc4, 0x9c, 0x22, 0x3e, 0xc9, 0x29, 0x88, 0xbe, 0xd5, 0x6a, 0xc2, 0xbe,
+    0x87, 0x71, 0xca, 0xbd, 0x76, 0x80, 0xa3, 0xbc, 0x84, 0xcf, 0xbc, 0xbd,
+    0x4c, 0xac, 0x17, 0xbe, 0xaa, 0xd8, 0x91, 0x3e, 0xa9, 0x44, 0x52, 0x3c,
+    0xc0, 0xee, 0xfa, 0xbd, 0x2c, 0x3b, 0x24, 0x3d, 0xc8, 0x0a, 0x8c, 0x3d,
+    0x37, 0x10, 0x07, 0x3d, 0x98, 0x78, 0xdf, 0x3d, 0x0c, 0xe2, 0xe5, 0xbd,
+    0x2c, 0x38, 0x34, 0xbe, 0xe5, 0x49, 0xb7, 0xbd, 0xc7, 0xcf, 0xd8, 0xbe,
+    0x54, 0xf3, 0x6e, 0xbe, 0x2d, 0xbc, 0x19, 0xbe, 0xe4, 0x0f, 0x8d, 0x3d,
+    0xf1, 0x48, 0xdc, 0xbd, 0xa2, 0x21, 0xdc, 0x3c, 0x86, 0x4c, 0x9d, 0x3e,
+    0x93, 0xcd, 0xe7, 0x3d, 0x30, 0x77, 0xbf, 0xbd, 0xe0, 0xd2, 0x9f, 0xbc,
+    0x55, 0x3a, 0x8e, 0x3d, 0xf2, 0x3f, 0x6e, 0xbe, 0xfc, 0xb4, 0x96, 0x3d,
+    0xf3, 0xfe, 0xa1, 0xbe, 0x80, 0xf7, 0xfc, 0xbd, 0x34, 0xd4, 0x6c, 0x3d,
+    0x31, 0x5b, 0x44, 0xbe, 0xcc, 0x50, 0x45, 0xbe, 0xd0, 0x2b, 0xf4, 0xbd,
+    0xf7, 0x13, 0x02, 0xbe, 0x60, 0x08, 0xd7, 0xbd, 0xe5, 0x7e, 0x95, 0x3e,
+    0xed, 0x1e, 0x7e, 0x3e, 0x30, 0x5f, 0x10, 0x3d, 0xc7, 0xf2, 0x47, 0xbe,
+    0x69, 0x3c, 0x8e, 0x3b, 0xf1, 0xee, 0x51, 0xbd, 0x8e, 0x09, 0x41, 0xbe,
+    0x6f, 0x3e, 0xbf, 0x3d, 0x30, 0x8d, 0x09, 0xbe, 0xc1, 0xa9, 0x19, 0xbe,
+    0xa7, 0xb8, 0x96, 0xbd, 0x6c, 0xd8, 0x82, 0x3c, 0x45, 0x45, 0x3d, 0xbd,
+    0x81, 0xb8, 0x4f, 0xbe, 0xd5, 0xe1, 0x32, 0x3d, 0x26, 0x85, 0x51, 0xbc,
+    0x86, 0x09, 0x8b, 0x3e, 0xa3, 0x45, 0x87, 0x3e, 0x50, 0xeb, 0x52, 0x3e,
+    0x17, 0xe7, 0x97, 0x3c, 0xc6, 0x63, 0x18, 0xbe, 0x34, 0xff, 0xd4, 0xbc,
+    0xf9, 0xdc, 0xe5, 0xbe, 0x4c, 0x05, 0x86, 0x3d, 0xee, 0x91, 0xf2, 0xbc,
+    0x9f, 0x83, 0xfa, 0xbc, 0x02, 0x38, 0x9e, 0xbd, 0x7c, 0x8a, 0xfe, 0xbc,
+    0x9a, 0x13, 0x02, 0xbe, 0xc3, 0xcf, 0x08, 0xbe, 0x49, 0xfb, 0x0d, 0x3d,
+    0x17, 0xf6, 0x29, 0xbd, 0x88, 0x7a, 0xdd, 0x3e, 0x6f, 0x3b, 0x01, 0x3e,
+    0x19, 0xdd, 0x5b, 0x3e, 0x47, 0xbc, 0x19, 0xbd, 0x43, 0xc4, 0x9a, 0xbd,
+    0xdd, 0x16, 0x82, 0x3d, 0xd4, 0x08, 0x89, 0xbe, 0x1f, 0xb5, 0xd8, 0xbe,
+    0x5a, 0x44, 0x69, 0x3d, 0x35, 0x15, 0x60, 0xbe, 0x06, 0x34, 0x9e, 0xbe,
+    0xff, 0x5a, 0xa1, 0x3e, 0x61, 0x69, 0x86, 0xbe, 0x90, 0xee, 0x8a, 0xbd,
+    0x4a, 0x55, 0x36, 0xbe, 0x43, 0x71, 0x57, 0x3d, 0xaa, 0xcb, 0x0e, 0xbf,
+    0xb4, 0x91, 0x35, 0xbf, 0x6f, 0x5b, 0x0f, 0xbf, 0x40, 0x82, 0x03, 0x3e,
+    0x07, 0x78, 0x98, 0xbe, 0xc2, 0x15, 0x90, 0xbe, 0xf9, 0x72, 0xaa, 0xbe,
+    0xe2, 0x17, 0x9c, 0xbe, 0x08, 0x3e, 0xa9, 0x3d, 0x9f, 0xae, 0x1a, 0xbf,
+    0xf0, 0xf7, 0x28, 0xbf, 0xe1, 0x8b, 0xfc, 0x3d, 0x5b, 0xb5, 0xec, 0xbe,
+    0x64, 0x14, 0xa9, 0x3c, 0x5f, 0x3a, 0xdb, 0xbc, 0xc2, 0xc8, 0xd3, 0xbd,
+    0x1d, 0xa2, 0x52, 0xbf, 0x8d, 0x06, 0x85, 0x3d, 0x59, 0xfc, 0xaf, 0xbe,
+    0xab, 0xa1, 0xb0, 0x3c, 0x4d, 0xa0, 0x2f, 0xbe, 0x04, 0x1f, 0xd3, 0xbb,
+    0x20, 0x49, 0xe2, 0xbc, 0x12, 0xc7, 0x02, 0x3d, 0x76, 0x89, 0x31, 0x3e,
+    0xb6, 0x83, 0x48, 0x3d, 0x40, 0xc8, 0xcf, 0xbe, 0xa3, 0x9e, 0xdb, 0x3d,
+    0x2f, 0x47, 0x2a, 0x3e, 0xa8, 0xf7, 0x8d, 0xbf, 0xfc, 0x3a, 0x25, 0x3d,
+    0x82, 0x9c, 0xb3, 0xbd, 0xe4, 0x25, 0x05, 0x3e, 0x56, 0x7d, 0xc0, 0xbe,
+    0x7c, 0xec, 0x9f, 0x3d, 0x48, 0x59, 0x8e, 0xbd, 0xcd, 0x4b, 0x91, 0x3e,
+    0x53, 0x6e, 0x0d, 0x3e, 0x91, 0x99, 0x93, 0x3e, 0x54, 0x9c, 0x16, 0xbe,
+    0x68, 0x8f, 0xbf, 0x3d, 0xbd, 0xef, 0xa1, 0x3d, 0xc4, 0x83, 0xcf, 0x3b,
+    0xd6, 0xc4, 0xc1, 0xbe, 0x11, 0x47, 0x3e, 0x3e, 0x37, 0x5c, 0xf5, 0xbd,
+    0xc2, 0x27, 0xdb, 0x3d, 0x5d, 0x79, 0x97, 0xbd, 0x98, 0xf2, 0x0c, 0x3d,
+    0x6c, 0xa6, 0xaa, 0xbe, 0xd9, 0x30, 0xb5, 0xbd, 0x90, 0x05, 0x01, 0x3a,
+    0xf1, 0x8f, 0x35, 0x3e, 0x82, 0x5d, 0xfd, 0xbc, 0xc7, 0xf8, 0x54, 0x3e,
+    0x9e, 0xe1, 0xd8, 0xbc, 0xba, 0x9d, 0x23, 0x3e, 0xc5, 0x87, 0x06, 0x3e,
+    0x6b, 0xe7, 0xee, 0x3c, 0xae, 0x75, 0x2a, 0xbe, 0x9b, 0xf5, 0x2c, 0x3e,
+    0xc5, 0x58, 0x9d, 0xbc, 0x4b, 0x3b, 0xbb, 0x3c, 0x96, 0x33, 0x7a, 0x3c,
+    0xd7, 0xd3, 0xab, 0xbe, 0x33, 0x03, 0x5e, 0xbe, 0x24, 0xe4, 0x2c, 0x3d,
+    0xc6, 0x66, 0x09, 0xbc, 0x6f, 0x16, 0x8e, 0x3e, 0xd1, 0x41, 0x07, 0x3b,
+    0x25, 0xcc, 0x40, 0x3e, 0xe2, 0x18, 0x0f, 0x3d, 0x15, 0xe8, 0x82, 0x3e,
+    0x41, 0x69, 0xdb, 0x3d, 0xe1, 0x27, 0x37, 0xbe, 0x00, 0x5b, 0x9d, 0xbe,
+    0xcb, 0xdb, 0x40, 0x3e, 0xe1, 0x3c, 0xea, 0xbb, 0x0e, 0x61, 0x30, 0xbc,
+    0xba, 0x37, 0x95, 0x3d, 0xfd, 0x17, 0xf3, 0xbe, 0xc2, 0x74, 0x50, 0xbe,
+    0x9a, 0x7a, 0x7e, 0xbd, 0x30, 0x34, 0xc3, 0x3d, 0x54, 0xe2, 0xc3, 0x3e,
+    0x6d, 0xa6, 0x9a, 0x3d, 0xaf, 0xa2, 0x88, 0x3e, 0xd8, 0x11, 0x71, 0x3d,
+    0x4b, 0xa8, 0x26, 0x3e, 0xb8, 0x2f, 0xeb, 0x3b, 0x65, 0x63, 0x2b, 0xbd,
+    0x92, 0xd3, 0x37, 0x3c, 0x95, 0xf0, 0xa6, 0x3e, 0x00, 0x6a, 0x15, 0xbe,
+    0x36, 0x55, 0x37, 0x3d, 0x1d, 0x59, 0x4d, 0x3d, 0x7c, 0xdf, 0x91, 0xbe,
+    0xa2, 0x6b, 0x6f, 0xbe, 0x27, 0x4b, 0xe2, 0x3c, 0x1a, 0x00, 0x50, 0xbc,
+    0xe9, 0x40, 0x9e, 0x3e, 0x99, 0xaa, 0x01, 0xbe, 0xc2, 0x58, 0x70, 0xbb,
+    0x83, 0x8c, 0xd9, 0x3c, 0x0d, 0x3e, 0xe6, 0x3d, 0xf1, 0x6f, 0x81, 0x3d,
+    0xcb, 0x83, 0x7b, 0x3c, 0x78, 0xff, 0x3b, 0xbd, 0xe1, 0xae, 0x79, 0x3e,
+    0x27, 0x72, 0x85, 0xbc, 0x72, 0xb1, 0x0a, 0xbe, 0xda, 0xc6, 0x8a, 0x3b,
+    0x6b, 0xe5, 0x14, 0xbe, 0xaf, 0x70, 0x9e, 0xbd, 0xa2, 0xba, 0xc3, 0x3d,
+    0x3e, 0x64, 0x10, 0xbd, 0x6c, 0xe2, 0x94, 0x3e, 0x65, 0x6a, 0x08, 0xbd,
+    0x70, 0x66, 0x9a, 0x3e, 0x9e, 0x96, 0xd4, 0xbd, 0xb3, 0xcd, 0x70, 0x3e,
+    0x93, 0x7a, 0x0f, 0x3e, 0x8c, 0x94, 0xfa, 0xbd, 0xa4, 0x90, 0x63, 0xbd,
+    0xcb, 0x07, 0x88, 0x3e, 0x06, 0xab, 0x48, 0x3d, 0x44, 0x5b, 0x3e, 0x3e,
+    0x7f, 0xd6, 0x6c, 0xbb, 0x04, 0xed, 0xf0, 0xbc, 0xe4, 0xc3, 0x7c, 0xbe,
+    0x26, 0x94, 0x2c, 0xbd, 0x4a, 0x27, 0xcc, 0x3c, 0xfc, 0xc2, 0x92, 0x3e,
+    0x82, 0x1b, 0x27, 0xbd, 0x9a, 0xf4, 0x9b, 0x3e, 0x36, 0x53, 0x8b, 0xbe,
+    0x0f, 0x9c, 0x8c, 0x3e, 0x2f, 0x68, 0x99, 0x3d, 0x20, 0x62, 0xaa, 0x3c,
+    0xbb, 0x1e, 0x6f, 0xbc, 0x9d, 0x1f, 0x4b, 0x3e, 0x54, 0x7f, 0x45, 0xbe,
+    0x9b, 0xba, 0xb5, 0x3c, 0x4b, 0x19, 0x2b, 0x3c, 0x99, 0xe6, 0xb7, 0x3c,
+    0x0b, 0x45, 0x23, 0xbe, 0x78, 0xd6, 0x2a, 0x3d, 0x02, 0xe2, 0x07, 0x3e,
+    0xd6, 0x26, 0x66, 0x3e, 0x3a, 0x28, 0x2b, 0xbe, 0x45, 0x77, 0x36, 0x3e,
+    0x2e, 0xf3, 0xf6, 0xbd, 0xb9, 0x28, 0xea, 0x3d, 0x74, 0xdf, 0xdb, 0x3d,
+    0x15, 0x69, 0x82, 0x3a, 0xe2, 0x89, 0xa1, 0x3c, 0x04, 0xfe, 0xb6, 0x3e,
+    0xc2, 0x5b, 0x1c, 0x3e, 0x73, 0x7a, 0x08, 0xbd, 0x4d, 0x50, 0x0e, 0x3d,
+    0xa0, 0x55, 0xb3, 0xbd, 0x18, 0x24, 0x1d, 0xbe, 0xbc, 0x5b, 0xd1, 0xbd,
+    0xcc, 0xb0, 0xd9, 0x3d, 0xa1, 0x2c, 0x95, 0x3d, 0x79, 0x70, 0x08, 0xbe,
+    0xeb, 0x53, 0x18, 0x3e, 0xc5, 0x8d, 0x43, 0xbd, 0x5d, 0xa3, 0x6e, 0x3d,
+    0x66, 0x05, 0x0f, 0x3e, 0xa4, 0x01, 0x8b, 0xbd, 0x57, 0xb8, 0x47, 0xbd,
+    0xe8, 0x97, 0x8a, 0x3e, 0x77, 0x52, 0x13, 0xbd, 0x40, 0xa2, 0xae, 0x3d,
+    0x8f, 0xbc, 0xd4, 0x3a, 0x8b, 0x08, 0xd4, 0xbd, 0x56, 0x1a, 0x44, 0xbe,
+    0x22, 0x87, 0x35, 0xbd, 0xb1, 0x29, 0xa9, 0xbc, 0xfe, 0x06, 0x8c, 0x3e,
+    0xca, 0x88, 0x38, 0xbd, 0xba, 0xdd, 0x04, 0x3e, 0x66, 0x0b, 0xa9, 0xbe,
+    0x9d, 0xa8, 0x9d, 0xbe, 0x55, 0x60, 0x32, 0x3b, 0x7d, 0x09, 0x09, 0x3e,
+    0x4f, 0xb5, 0x3f, 0xbc, 0x71, 0x79, 0xde, 0x3e, 0x2d, 0xe9, 0x38, 0xbe,
+    0x9a, 0x4c, 0x76, 0xbd, 0xa2, 0xe8, 0xa0, 0xbc, 0xf4, 0xc7, 0x10, 0xbd,
+    0x71, 0x07, 0x20, 0x3c, 0x6d, 0xc8, 0x46, 0x3d, 0x44, 0x3c, 0x16, 0xbd,
+    0xfb, 0x5a, 0x5a, 0x3e, 0x43, 0x01, 0xe0, 0xbe, 0x2e, 0xbf, 0x41, 0xbf,
+    0x14, 0x7f, 0x1e, 0xbf, 0xcf, 0x88, 0x4d, 0xbf, 0x85, 0xb0, 0xa6, 0xbe,
+    0x65, 0xd0, 0x9a, 0x3d, 0xad, 0xbe, 0x1c, 0xbc, 0xfa, 0xc1, 0x85, 0x3d,
+    0xfc, 0x72, 0xae, 0xbd, 0x14, 0xf5, 0xc8, 0xbd, 0x0c, 0x20, 0x08, 0x3d,
+    0xe8, 0x35, 0x63, 0x3e, 0xc0, 0xf5, 0x74, 0x3d, 0x8b, 0xd5, 0x0d, 0xbf,
+    0x3d, 0x94, 0x4b, 0x3d, 0x69, 0xff, 0x33, 0xbe, 0x6f, 0xb7, 0x81, 0xbf,
+    0x2a, 0x05, 0x5e, 0xbf, 0x21, 0x19, 0x94, 0xbe, 0x34, 0x2a, 0xed, 0xbe,
+    0x10, 0xf1, 0x03, 0xbf, 0x40, 0xf0, 0x80, 0xbf, 0x08, 0x44, 0x1e, 0xbd,
+    0x0d, 0x59, 0x46, 0xbf, 0x31, 0xa6, 0xb3, 0xbe, 0x58, 0x5d, 0xc4, 0xbe,
+    0x1a, 0x05, 0x10, 0xbd, 0xa7, 0xe6, 0xe1, 0x3e, 0xf7, 0x06, 0xb9, 0xbe,
+    0xce, 0xf0, 0xa0, 0xbf, 0xf4, 0xcf, 0x85, 0xbc, 0x3a, 0xe2, 0x82, 0xbe,
+    0xfd, 0x85, 0x73, 0xbf, 0x24, 0x22, 0x55, 0xbf, 0x49, 0xad, 0x20, 0xbf,
+    0x90, 0x58, 0x3f, 0xbc, 0x5d, 0xbf, 0xb4, 0xbe, 0x23, 0x4b, 0x61, 0xbf,
+    0x4a, 0x9a, 0x4d, 0x3c, 0x7a, 0x20, 0x3d, 0xbf, 0xfd, 0x8c, 0xa1, 0x3b,
+    0xc4, 0x15, 0x0e, 0xbe, 0x2a, 0xb8, 0x56, 0xbc, 0xb5, 0x41, 0x0c, 0x3f,
+    0xf6, 0xfa, 0xcf, 0xbe, 0x3b, 0x1c, 0xfc, 0xbe, 0xb5, 0x8b, 0x24, 0xbd,
+    0xca, 0xe0, 0xfd, 0x3e, 0x2a, 0xf3, 0xc9, 0x3e, 0x58, 0x8b, 0x1a, 0x3b,
+    0x06, 0x37, 0x0f, 0x3f, 0xe5, 0x79, 0x5a, 0xbe, 0xcb, 0x6c, 0xa5, 0x3d,
+    0x61, 0xa0, 0xc6, 0x3e, 0x86, 0x27, 0xbc, 0x3c, 0xee, 0x51, 0x4b, 0xbe,
+    0x88, 0x4a, 0x0d, 0x3f, 0x16, 0x0e, 0x82, 0xbe, 0xca, 0xd9, 0xcd, 0xbd,
+    0x53, 0x26, 0xec, 0x3e, 0x67, 0xa6, 0x89, 0x3e, 0x3e, 0x89, 0x21, 0x3d,
+    0x89, 0x94, 0x8a, 0xba, 0x4e, 0xbc, 0x99, 0xbe, 0xb9, 0xf0, 0xdc, 0x3d,
+    0x4b, 0x61, 0xa7, 0xbe, 0x5a, 0x3c, 0x2e, 0xbe, 0xa8, 0x2e, 0x82, 0x3e,
+    0x68, 0x10, 0x85, 0xbd, 0x28, 0x23, 0x7c, 0xbe, 0x82, 0x7b, 0x01, 0x3e,
+    0xc1, 0xc3, 0x1b, 0xbe, 0x60, 0x8b, 0xc5, 0x3e, 0xda, 0x55, 0xb1, 0xbd,
+    0xd7, 0x46, 0x29, 0x3d, 0x8e, 0xff, 0xcd, 0x3d, 0xf1, 0xac, 0xe3, 0xbd,
+    0x72, 0x78, 0x43, 0xbe, 0xd3, 0xed, 0x52, 0x3e, 0x6d, 0x9e, 0xe8, 0xbe,
+    0x79, 0x9a, 0x9a, 0x3d, 0x98, 0xf6, 0x63, 0xbe, 0x0c, 0x7c, 0xbf, 0xbe,
+    0x83, 0x45, 0x9a, 0x3e, 0x1d, 0x35, 0x0b, 0xbe, 0xc5, 0x00, 0x67, 0x3b,
+    0x75, 0x3e, 0xf4, 0x3d, 0x77, 0x2b, 0x1b, 0xbe, 0xa5, 0xff, 0xeb, 0xbc,
+    0xb6, 0xb3, 0x74, 0x3e, 0x52, 0xbf, 0x3d, 0xbd, 0x7a, 0xdc, 0x2d, 0xbe,
+    0x8a, 0x5b, 0x3d, 0xbe, 0xfb, 0x30, 0x6d, 0xbe, 0x67, 0xd8, 0x35, 0x3e,
+    0x0e, 0x72, 0xb8, 0xbe, 0xd2, 0x0a, 0x80, 0x3a, 0x43, 0x8b, 0x8d, 0x3d,
+    0xd9, 0x8f, 0x9a, 0xbe, 0xba, 0x80, 0x8a, 0x3e, 0xc7, 0x1b, 0x02, 0xbd,
+    0x2a, 0xae, 0x80, 0xbb, 0x31, 0x31, 0x08, 0x3e, 0x55, 0x75, 0x2f, 0xbe,
+    0xad, 0xc6, 0x48, 0xbd, 0x43, 0x03, 0x7e, 0x3e, 0x48, 0xe4, 0xb8, 0xbd,
+    0x75, 0xc6, 0xe8, 0xbd, 0x06, 0x3c, 0xa1, 0xbd, 0xd7, 0xa2, 0xc2, 0x3d,
+    0xe5, 0x47, 0x07, 0x3e, 0xa6, 0x42, 0xdb, 0xbe, 0x6b, 0xbf, 0x05, 0xbe,
+    0x84, 0x62, 0xc7, 0xbd, 0xb7, 0x2e, 0x97, 0xbe, 0xdc, 0x29, 0x9e, 0x3e,
+    0x3b, 0xc5, 0x0a, 0x3d, 0x78, 0xd2, 0x28, 0xbd, 0x41, 0x00, 0x0b, 0xbc,
+    0x08, 0xcc, 0xc4, 0xbc, 0xa2, 0x88, 0xe0, 0xbd, 0x23, 0x48, 0xb0, 0x3e,
+    0x00, 0x0e, 0x2f, 0xbe, 0x69, 0xb0, 0x84, 0x3d, 0x20, 0x9d, 0x83, 0xbd,
+    0x53, 0xe3, 0xa6, 0xbc, 0x99, 0x74, 0xd1, 0x3d, 0x36, 0x33, 0x8d, 0xbe,
+    0x35, 0x2e, 0xfe, 0xbc, 0xd4, 0xea, 0xf6, 0xbc, 0xc2, 0xf9, 0x90, 0xbe,
+    0x72, 0x67, 0xa7, 0x3d, 0x44, 0xae, 0x08, 0x3e, 0xed, 0x6f, 0x49, 0x3d,
+    0xed, 0xe3, 0xa4, 0xbd, 0xc8, 0x11, 0x25, 0x3d, 0xc0, 0x7b, 0x30, 0xbd,
+    0x80, 0x6e, 0xf7, 0x3d, 0xb9, 0xe9, 0x94, 0xbe, 0x12, 0x80, 0xba, 0x3d,
+    0x03, 0xfb, 0xac, 0xbc, 0xbe, 0x53, 0xf4, 0xbc, 0x8c, 0x26, 0x27, 0xbd,
+    0x71, 0x13, 0xc7, 0xbe, 0x6e, 0xc5, 0x02, 0xbe, 0x54, 0x9d, 0x23, 0x3d,
+    0xa5, 0xba, 0x39, 0xbe, 0xf0, 0x26, 0x52, 0x3d, 0xfe, 0xf6, 0xc9, 0x3d,
+    0x94, 0x18, 0xa8, 0x3d, 0xe6, 0xd3, 0x1c, 0xbe, 0x9a, 0xd2, 0xe6, 0xbd,
+    0xbc, 0xb0, 0x1d, 0xbd, 0x22, 0x0a, 0xb0, 0x3d, 0x09, 0x0f, 0x74, 0xbe,
+    0x22, 0x36, 0xde, 0x3d, 0xab, 0xc6, 0x20, 0x3d, 0x5b, 0xeb, 0xd1, 0xbd,
+    0xaf, 0x09, 0x1a, 0xbd, 0xaa, 0x83, 0x31, 0xbe, 0x2d, 0x9d, 0xbd, 0xbc,
+    0x8f, 0xca, 0xb6, 0xbd, 0x8c, 0xa0, 0x4a, 0xbe, 0x4b, 0x67, 0xfa, 0x3d,
+    0x3f, 0x99, 0x01, 0x3e, 0x0e, 0x64, 0xa5, 0x3d, 0xe3, 0xe9, 0xd3, 0xba,
+    0x9d, 0x09, 0x6a, 0xbc, 0x28, 0x4f, 0x82, 0x3e, 0xe3, 0x4c, 0x0a, 0x3e,
+    0x44, 0xfd, 0x6e, 0xbd, 0x94, 0xfc, 0xa0, 0x3d, 0x8e, 0x35, 0x83, 0x3d,
+    0xef, 0x14, 0xea, 0xbc, 0x4b, 0xf9, 0x10, 0xbd, 0x42, 0x43, 0x8e, 0xbd,
+    0x0b, 0x78, 0x3b, 0xbd, 0x0d, 0xc8, 0x2f, 0xbd, 0x43, 0xc0, 0xb1, 0xbe,
+    0xf4, 0x92, 0x85, 0x3e, 0xfd, 0x10, 0xea, 0x3d, 0x3f, 0xa1, 0x89, 0x3d,
+    0xb9, 0xd2, 0x2a, 0xbd, 0x6c, 0xfd, 0x99, 0xbe, 0x68, 0xf0, 0x22, 0x3e,
+    0x4e, 0x3b, 0xf8, 0xbd, 0x6a, 0xa0, 0x18, 0xbe, 0x91, 0xee, 0x1d, 0xbd,
+    0x48, 0x14, 0x0e, 0x3d, 0x64, 0x2b, 0xb2, 0xbd, 0x47, 0x34, 0xad, 0x3c,
+    0xb1, 0x9c, 0xe7, 0xbd, 0xd0, 0x9a, 0x8b, 0x3d, 0xc6, 0x5b, 0xe5, 0x3d,
+    0xa5, 0x1f, 0x21, 0xbe, 0x0a, 0x26, 0x0b, 0x3e, 0xa4, 0x1a, 0x5a, 0xbd,
+    0x3e, 0x53, 0xe8, 0xbd, 0xda, 0xb5, 0xc9, 0x3b, 0xc0, 0x68, 0x53, 0xbe,
+    0x85, 0x31, 0x9a, 0x3e, 0xf9, 0x64, 0x1a, 0x3e, 0xe0, 0x4a, 0x35, 0xbd,
+    0xe8, 0xaa, 0xa0, 0xbd, 0xac, 0xf7, 0xbd, 0x3d, 0xbf, 0xbd, 0x1a, 0xbe,
+    0x71, 0x3e, 0x41, 0x3d, 0x5c, 0x55, 0xfb, 0xbd, 0x4b, 0x79, 0xe0, 0x3d,
+    0xc1, 0x0e, 0x87, 0x3d, 0xac, 0xfa, 0x83, 0xbd, 0x1c, 0x1b, 0x1e, 0x3e,
+    0xcd, 0xe6, 0xa7, 0x3d, 0xdd, 0x2a, 0xa3, 0xbd, 0xd2, 0x48, 0xc0, 0x3c,
+    0x4e, 0x4a, 0xd5, 0xbe, 0x61, 0xa2, 0x16, 0x3e, 0x97, 0xe6, 0x82, 0xbc,
+    0x79, 0x7e, 0xaf, 0xbd, 0x3b, 0x1a, 0xfb, 0x3d, 0xc9, 0x30, 0x3d, 0x3e,
+    0x3d, 0x27, 0x86, 0xbe, 0xd1, 0x31, 0x5f, 0x3e, 0x42, 0xfc, 0x99, 0xbe,
+    0xe0, 0x82, 0xc3, 0x3d, 0x91, 0x85, 0xd1, 0x3d, 0x95, 0xbf, 0xea, 0x3c,
+    0x39, 0x19, 0xe4, 0x3d, 0x4f, 0x40, 0x17, 0xbe, 0x5b, 0x8f, 0xd8, 0xbd,
+    0x16, 0x8b, 0x63, 0xbd, 0xd5, 0x12, 0xa1, 0x3b, 0xc0, 0xd5, 0x68, 0x3c,
+    0x55, 0x96, 0xb8, 0xbd, 0xbc, 0xf3, 0x9d, 0xbe, 0xf8, 0x79, 0xd5, 0x3d,
+    0xac, 0x1a, 0xd5, 0x3d, 0xd0, 0x00, 0x14, 0xbd, 0xca, 0x1d, 0xad, 0xba,
+    0x79, 0xed, 0x2c, 0xbe, 0xc4, 0x05, 0xc6, 0xbc, 0x0d, 0x1c, 0xf7, 0x3d,
+    0xd8, 0xd0, 0x8f, 0xbd, 0x79, 0x0e, 0xba, 0x3d, 0x3c, 0x33, 0x30, 0x3d,
+    0x2c, 0x08, 0xbf, 0xbd, 0x4a, 0x7a, 0x71, 0xbd, 0xc2, 0x3b, 0x45, 0xbc,
+    0x7a, 0xac, 0x8e, 0xbe, 0x13, 0x06, 0xa9, 0xbd, 0x0e, 0xd4, 0x3b, 0xbe,
+    0xc5, 0x4b, 0x47, 0x3d, 0x90, 0xb5, 0x85, 0xbd, 0xe5, 0xce, 0x0f, 0xbe,
+    0x02, 0xae, 0xc7, 0x3c, 0xc1, 0x9c, 0x4d, 0xbe, 0x60, 0x07, 0x8f, 0x3d,
+    0xa0, 0x57, 0x78, 0xbd, 0x48, 0x93, 0x86, 0x3e, 0xe4, 0xed, 0xfb, 0xbd,
+    0x6e, 0x57, 0x74, 0x3d, 0x85, 0x53, 0x01, 0xbd, 0x56, 0xaf, 0xff, 0xbd,
+    0xfe, 0xe2, 0x07, 0x3e, 0x13, 0xb4, 0xdb, 0xbd, 0xfb, 0x89, 0x9f, 0xbe,
+    0x74, 0xb9, 0xe2, 0xbd, 0x4b, 0xac, 0x8c, 0x3e, 0xf8, 0x2a, 0x8d, 0xbd,
+    0x76, 0x7d, 0x08, 0xbe, 0x39, 0xc8, 0x84, 0x3d, 0x62, 0x02, 0xe9, 0xbd,
+    0xb0, 0x1c, 0xfc, 0x3a, 0x52, 0x7e, 0x06, 0xbd, 0xec, 0xf1, 0xce, 0xbc,
+    0x00, 0x3b, 0xc3, 0x3c, 0x08, 0x4e, 0x3e, 0x3d, 0x5c, 0x19, 0x45, 0xbe,
+    0xbb, 0x06, 0xbd, 0xbd, 0x9d, 0xbb, 0xf7, 0xba, 0x9c, 0xa5, 0x87, 0x3b,
+    0x1a, 0x83, 0x85, 0xbd, 0xa3, 0x38, 0xeb, 0xbc, 0x83, 0x65, 0xdc, 0x3d,
+    0x5c, 0x8d, 0x0e, 0x3e, 0xcf, 0x7f, 0xe6, 0x3b, 0x7d, 0x2c, 0xaf, 0xbd,
+    0x9c, 0xc5, 0xaf, 0x3c, 0xd7, 0x6a, 0xc7, 0xbc, 0x7a, 0xc2, 0x48, 0xbe,
+    0xfc, 0x1a, 0x0e, 0x3d, 0x9e, 0x97, 0x3e, 0x3d, 0xf1, 0x34, 0xd0, 0xba,
+    0xbb, 0x60, 0xee, 0xbd, 0x85, 0x2f, 0xec, 0xbd, 0x8e, 0x6f, 0x7b, 0x3c,
+    0xb9, 0x28, 0xfc, 0xbd, 0x4d, 0x84, 0x6b, 0xbe, 0x26, 0x35, 0x4c, 0xbd,
+    0x8a, 0xe9, 0x2f, 0xbd, 0x2a, 0x11, 0x1c, 0xbe, 0xbb, 0x7e, 0x08, 0xbe,
+    0xf6, 0x73, 0x81, 0xbe, 0x36, 0xfd, 0x11, 0xbd, 0x6a, 0x85, 0xcf, 0xbd,
+    0x64, 0x9f, 0x93, 0xbd, 0xb8, 0xfe, 0xff, 0xbd, 0x4f, 0xa5, 0x19, 0xbe,
+    0xd9, 0x9b, 0x34, 0xbe, 0x17, 0xac, 0xdb, 0x3a, 0x2d, 0x1c, 0xa1, 0xbd,
+    0x22, 0x94, 0x90, 0x3c, 0xcb, 0xa9, 0x6d, 0xbb, 0x84, 0xad, 0x9e, 0x3d,
+    0xfd, 0x9a, 0x11, 0xbe, 0x93, 0xaa, 0x13, 0x3d, 0x42, 0x61, 0x8b, 0xbd,
+    0x5d, 0x21, 0x28, 0xbe, 0x15, 0x7b, 0xd2, 0xbd, 0xf5, 0x7e, 0x54, 0x3d,
+    0xc1, 0x76, 0x81, 0x3d, 0x45, 0x13, 0xe7, 0xbd, 0xac, 0x4f, 0x12, 0x3d,
+    0x8f, 0x1c, 0x0e, 0x3c, 0xb3, 0x6d, 0x70, 0x3c, 0x6b, 0xfa, 0xdc, 0xbb,
+    0xd6, 0xab, 0xf9, 0xbc, 0x49, 0x49, 0xf7, 0xbc, 0x4b, 0x50, 0x59, 0xbd,
+    0x10, 0x1e, 0xb0, 0xbd, 0x43, 0x71, 0x39, 0xbd, 0xd5, 0xe0, 0xc1, 0xbd,
+    0x33, 0xc2, 0x5f, 0x3d, 0x48, 0xa7, 0x4a, 0xbe, 0xef, 0x8d, 0x35, 0xbe,
+    0xa1, 0x9e, 0x3c, 0xbd, 0xb0, 0x3a, 0xa5, 0xbd, 0xa8, 0x9d, 0xa1, 0x3d,
+    0x61, 0xb0, 0x73, 0x3d, 0xdb, 0x36, 0xbd, 0x3d, 0xa1, 0xc1, 0x03, 0xbd,
+    0xde, 0x98, 0xac, 0xbd, 0xe1, 0x5a, 0xd4, 0x3d, 0x87, 0x04, 0xa1, 0xbd,
+    0x22, 0xcf, 0x2f, 0x3d, 0xae, 0xf3, 0xc2, 0xbd, 0x7d, 0xe0, 0x30, 0xbe,
+    0x1d, 0x36, 0x03, 0xbe, 0xc3, 0x86, 0xb1, 0x3d, 0x00, 0xf1, 0xbb, 0xbd,
+    0x5f, 0x98, 0x56, 0xbd, 0xdd, 0xcd, 0x06, 0xbe, 0xcf, 0x6e, 0xa5, 0x3b,
+    0xaa, 0xa0, 0x88, 0xbd, 0x99, 0x87, 0x56, 0xbe, 0x59, 0xce, 0x37, 0x3d,
+    0xc6, 0x1e, 0x9c, 0xbb, 0xf9, 0x24, 0x1f, 0xbe, 0x6e, 0xee, 0x89, 0x3d,
+    0xa5, 0xd5, 0xe8, 0xbb, 0xde, 0x47, 0x7d, 0xbd, 0xe3, 0x46, 0x26, 0xbe,
+    0x8b, 0x42, 0xc7, 0x3d, 0xb5, 0xc8, 0xc6, 0xbd, 0x58, 0xc1, 0xc3, 0xbd,
+    0x4d, 0x67, 0x4f, 0x3d, 0x9f, 0xdd, 0x2b, 0xbe, 0x13, 0x24, 0x02, 0xbd,
+    0x8c, 0x30, 0x2a, 0xbd, 0x99, 0x30, 0x54, 0x3c, 0x86, 0xce, 0xa4, 0x3c,
+    0x3c, 0xea, 0x1b, 0x3c, 0x18, 0x20, 0x00, 0x3d, 0xf5, 0x1d, 0x0a, 0xbc,
+    0x3f, 0xc0, 0x74, 0x3d, 0x5a, 0x4f, 0xf4, 0xbb, 0x1e, 0x77, 0xc5, 0xbd,
+    0xee, 0x16, 0xe4, 0xbd, 0xb5, 0xb4, 0x60, 0x3d, 0x25, 0xa2, 0xed, 0xbd,
+    0x97, 0x7d, 0x9e, 0xbd, 0xc9, 0xd4, 0x36, 0xbd, 0xbf, 0x1b, 0xa6, 0x3c,
+    0x6e, 0xb9, 0x27, 0x3d, 0xe8, 0x11, 0x2e, 0x3d, 0xa5, 0xae, 0xbd, 0xbd,
+    0xdd, 0xfa, 0x04, 0xbd, 0xe6, 0x01, 0xe5, 0xbd, 0x03, 0x43, 0x65, 0x3d,
+    0x36, 0xf4, 0x34, 0xbd, 0xc8, 0x33, 0xd1, 0xbd, 0x06, 0x06, 0x1c, 0xbe,
+    0x17, 0x94, 0xb7, 0x3b, 0x64, 0x11, 0x01, 0xbe, 0xdf, 0xb1, 0xe6, 0xbd,
+    0xa5, 0x78, 0x3b, 0xbe, 0x0d, 0x16, 0x58, 0xbd, 0x4b, 0xbc, 0xd4, 0x3c,
+    0x92, 0x23, 0x52, 0xbe, 0x0d, 0x3f, 0x15, 0xbe, 0x8c, 0xd3, 0x52, 0xbe,
+    0x00, 0xa4, 0x35, 0x3b, 0x37, 0x84, 0xa5, 0x3d, 0xdf, 0xe3, 0x02, 0xbe,
+    0xbd, 0x99, 0x0f, 0xbe, 0x9d, 0x37, 0x33, 0xbe, 0x87, 0xd1, 0xfd, 0x3d,
+    0xcb, 0x26, 0x39, 0xbe, 0x5d, 0x3f, 0x0e, 0xbe, 0xe6, 0xb1, 0x2d, 0xbe,
+    0x4c, 0x00, 0x7d, 0xbd, 0xf7, 0x32, 0xd8, 0x3b, 0xc5, 0xcf, 0x86, 0x3d,
+    0x60, 0x62, 0xa8, 0xbd, 0x44, 0x0d, 0x2f, 0xbe, 0xed, 0x1e, 0xf9, 0x3d,
+    0x4d, 0x79, 0x51, 0xbe, 0x22, 0xdd, 0x50, 0xbe, 0x74, 0x3b, 0x1e, 0xbe,
+    0xe1, 0x4f, 0x77, 0xbd, 0x00, 0x02, 0x1d, 0xbe, 0x07, 0x5f, 0x44, 0xbd,
+    0x44, 0x3f, 0xb8, 0x3c, 0x9d, 0x95, 0xa9, 0xbb, 0x4a, 0xe8, 0x55, 0x3d,
+    0xfd, 0x6e, 0x5d, 0x3d, 0xcd, 0x7c, 0x20, 0xbe, 0xb5, 0x23, 0xa2, 0x3d,
+    0x92, 0xc8, 0x84, 0x3d, 0xa4, 0x4a, 0x3f, 0xbe, 0xae, 0xd7, 0x75, 0xbd,
+    0x46, 0x5e, 0xc4, 0xbd, 0xb9, 0xe0, 0x24, 0xbd, 0x2a, 0xba, 0x83, 0xbd,
+    0xdd, 0xfe, 0xd9, 0x3d, 0x98, 0x72, 0x94, 0x3d, 0x6b, 0x91, 0x09, 0x3d,
+    0x51, 0xae, 0x88, 0x3b, 0x65, 0x6a, 0xd8, 0xbb, 0x44, 0x15, 0x17, 0xbe,
+    0xcf, 0xbb, 0xb3, 0x3d, 0xfe, 0xa1, 0x3f, 0xbe, 0x57, 0xbf, 0xd8, 0xbd,
+    0xcf, 0xc5, 0x9d, 0x3d, 0xce, 0x80, 0x59, 0xbc, 0x14, 0x38, 0xdd, 0xbd,
+    0x49, 0xb5, 0x56, 0xbe, 0xc5, 0xb0, 0xa5, 0x3d, 0xa9, 0x99, 0x10, 0xbd,
+    0x39, 0xea, 0x95, 0xbc, 0x73, 0x55, 0x3f, 0xbe, 0xfd, 0x57, 0x85, 0xbd,
+    0x1d, 0xf1, 0x65, 0x3d, 0xab, 0xe9, 0x06, 0xbd, 0x77, 0x53, 0xf8, 0xbc,
+    0xfa, 0xbc, 0x75, 0xbd, 0x32, 0xc8, 0x6c, 0x3d, 0xec, 0xfb, 0x42, 0xbe,
+    0x4f, 0xe3, 0x8a, 0xbd, 0xc9, 0xd4, 0x32, 0xbc, 0xe0, 0x14, 0xbe, 0x3d,
+    0xdd, 0xd2, 0x2a, 0xbe, 0x2d, 0xf3, 0x42, 0xbe, 0x8a, 0x61, 0x6e, 0x3d,
+    0x34, 0xed, 0x85, 0xbb, 0xcc, 0x83, 0x3c, 0xbd, 0xf4, 0xe4, 0x8f, 0xbc,
+    0x6a, 0x4e, 0x2a, 0xbd, 0x32, 0xb8, 0x1b, 0xbe, 0x45, 0xd2, 0x73, 0x3d,
+    0xa9, 0x63, 0x64, 0xbd, 0x34, 0x23, 0xb5, 0xbd, 0xe3, 0x16, 0x00, 0x3e,
+    0xa5, 0xac, 0xf0, 0xbd, 0x1c, 0xef, 0x3d, 0x3d, 0xfc, 0xba, 0xff, 0x3c,
+    0xbc, 0xfa, 0x0b, 0x3d, 0xfc, 0xd7, 0x60, 0xbd, 0x91, 0x10, 0xdd, 0xbb,
+    0xe4, 0x68, 0xbe, 0x3d, 0xa5, 0x92, 0x50, 0xbe, 0x98, 0x7b, 0x27, 0xbe,
+    0x78, 0xe3, 0xa2, 0xbe, 0xa6, 0x63, 0x21, 0xbd, 0x6d, 0xe4, 0xc0, 0x3d,
+    0x68, 0x9c, 0xd2, 0xbd, 0x5b, 0xb4, 0x11, 0x3e, 0xa7, 0xef, 0xa9, 0xbd,
+    0xf9, 0xfb, 0x83, 0x3d, 0xd8, 0x6d, 0xe5, 0x3c, 0xc2, 0x59, 0x3d, 0xbd,
+    0x6f, 0x02, 0xe3, 0xbc, 0xdd, 0x52, 0xcf, 0xbb, 0x58, 0x34, 0xbc, 0xbd,
+    0xa9, 0x43, 0x45, 0xbd, 0x1e, 0xf0, 0x9a, 0xbd, 0x80, 0x43, 0x61, 0x3d,
+    0x77, 0xa4, 0x72, 0xbe, 0xa1, 0x17, 0xb0, 0x3e, 0x36, 0x5f, 0xd1, 0x3e,
+    0x12, 0x20, 0xfe, 0xbd, 0x5d, 0x4d, 0x7c, 0xbc, 0xfe, 0x0a, 0x3d, 0x3e,
+    0xd5, 0xcf, 0x22, 0x3b, 0xdc, 0x2e, 0x0f, 0x3c, 0x79, 0x3c, 0x90, 0x3d,
+    0xac, 0x83, 0x2e, 0x3d, 0x75, 0xce, 0x22, 0xbf, 0x23, 0x4e, 0x35, 0xbe,
+    0x92, 0x0b, 0x61, 0x3c, 0xcf, 0xdc, 0x70, 0x3e, 0x3f, 0xe4, 0xa2, 0xbe,
+    0x63, 0x01, 0x9e, 0x3d, 0x61, 0x05, 0xa8, 0xbe, 0x1b, 0x7d, 0xe0, 0x3d,
+    0xb5, 0x3b, 0xcc, 0x3e, 0xdc, 0x97, 0x9d, 0xbe, 0x13, 0xf1, 0x60, 0x3d,
+    0xfc, 0x5b, 0x95, 0xbd, 0xd8, 0x27, 0x06, 0x3e, 0x27, 0x3e, 0x65, 0xbd,
+    0x3a, 0xc3, 0x7b, 0xbd, 0x77, 0xa5, 0x37, 0x3d, 0x30, 0x67, 0x14, 0xbe,
+    0xf9, 0x10, 0xbe, 0xbe, 0x4f, 0xec, 0x53, 0x3e, 0xf5, 0x26, 0xd9, 0x3d,
+    0x48, 0x3b, 0x2d, 0xbf, 0xd1, 0x9b, 0x35, 0xbc, 0x7e, 0x60, 0xae, 0x3d,
+    0xdf, 0x08, 0x5b, 0x3d, 0x58, 0xe9, 0x9b, 0xbe, 0xb6, 0xc6, 0xf6, 0xbe,
+    0x57, 0xa3, 0x45, 0x3e, 0xa1, 0x11, 0x19, 0xbd, 0xaf, 0xb8, 0x2d, 0x3e,
+    0xb8, 0x9b, 0x06, 0x3e, 0xbc, 0x03, 0x31, 0xbe, 0x49, 0xe0, 0xa4, 0x3c,
+    0x67, 0x3d, 0x16, 0xbd, 0x5d, 0xa2, 0x54, 0xbe, 0xc7, 0x85, 0x1b, 0x3d,
+    0xfb, 0x76, 0x05, 0xbb, 0x20, 0x1f, 0xd5, 0xbb, 0xc3, 0x12, 0x65, 0xbe,
+    0x1e, 0x60, 0x9d, 0x3e, 0x74, 0x8d, 0x03, 0xbd, 0xe6, 0x87, 0x01, 0xbf,
+    0x4e, 0x58, 0xc3, 0xbe, 0x0b, 0x3e, 0x09, 0x3e, 0xae, 0x0f, 0x17, 0xbd,
+    0xab, 0xeb, 0x5b, 0x3d, 0x33, 0x41, 0x89, 0x3e, 0xfe, 0x66, 0x14, 0xbe,
+    0x82, 0xfd, 0x63, 0xbc, 0x07, 0x1d, 0xbb, 0xbe, 0x80, 0x13, 0x7e, 0x3d,
+    0x51, 0x53, 0xdf, 0xbe, 0xb3, 0x78, 0x91, 0x3d, 0x7e, 0x79, 0xac, 0xbd,
+    0x2e, 0xe2, 0x3f, 0xbe, 0x46, 0x85, 0x01, 0x3d, 0xec, 0xf8, 0x64, 0x3e,
+    0x06, 0x8e, 0xed, 0xbe, 0xf7, 0x7b, 0x1c, 0xbf, 0x09, 0x5b, 0x42, 0x3d,
+    0x5b, 0xfd, 0x82, 0x3b, 0xde, 0xa5, 0xe4, 0xbd, 0xcb, 0x7c, 0x82, 0x3e,
+    0xd6, 0xe7, 0x77, 0xbe, 0x28, 0x8e, 0xe3, 0xba, 0xf5, 0x19, 0x16, 0xbf,
+    0x47, 0xf9, 0x0c, 0x3e, 0x93, 0xa4, 0xa7, 0xbe, 0xa1, 0xfd, 0x0b, 0xbd,
+    0x34, 0x25, 0x2a, 0xbe, 0x79, 0xa6, 0x3e, 0xbe, 0x72, 0x46, 0x7a, 0x3e,
+    0xdd, 0xb1, 0xd4, 0x3e, 0xa6, 0xae, 0xd7, 0x3d, 0xcd, 0xa3, 0xb8, 0xbe,
+    0x83, 0xf2, 0xea, 0xbd, 0xda, 0x19, 0xfc, 0xbc, 0x8d, 0x76, 0xa2, 0xbc,
+    0x17, 0x56, 0xa2, 0x3c, 0xf4, 0xa9, 0x00, 0xbe, 0x5b, 0xde, 0x36, 0x3d,
+    0x0f, 0x78, 0x2d, 0xbf, 0x7f, 0xdc, 0x7e, 0xbd, 0x8b, 0x23, 0x93, 0xbd,
+    0x0b, 0x44, 0x0e, 0x3d, 0xd1, 0xda, 0x73, 0xbe, 0xcf, 0x9f, 0x5b, 0xbe,
+    0x50, 0xe9, 0x25, 0x3e, 0x83, 0x6c, 0x95, 0x3e, 0x72, 0x08, 0x76, 0x3d,
+    0xae, 0xe5, 0x51, 0xbe, 0xd4, 0xf0, 0xa4, 0xbd, 0xf9, 0x06, 0xce, 0xbd,
+    0x90, 0x98, 0x14, 0x3e, 0x24, 0x29, 0x20, 0x3c, 0x11, 0x75, 0x1c, 0xbe,
+    0x99, 0x76, 0x2b, 0x3c, 0x9b, 0x64, 0xc1, 0xbe, 0x58, 0x5d, 0x08, 0xbe,
+    0xcc, 0x09, 0x0f, 0x3e, 0x6e, 0x15, 0x1e, 0xbd, 0x17, 0x45, 0xed, 0xbd,
+    0x15, 0xb4, 0x00, 0xbe, 0x71, 0x06, 0x83, 0x3d, 0xe5, 0x7e, 0xb2, 0x3e,
+    0x72, 0x95, 0x57, 0x3e, 0x10, 0x2b, 0x78, 0xbe, 0x8f, 0x8b, 0x17, 0xbe,
+    0x7a, 0xcb, 0x69, 0xbd, 0x93, 0xe9, 0x35, 0xbd, 0x70, 0x4a, 0x13, 0x3e,
+    0x29, 0x2f, 0xde, 0xbd, 0x49, 0x2c, 0x0f, 0x3d, 0xb4, 0xd1, 0x7b, 0x3e,
+    0x15, 0xff, 0x1a, 0xbd, 0x09, 0xeb, 0x16, 0x3d, 0x16, 0x20, 0x94, 0x3d,
+    0x62, 0x81, 0x37, 0xbe, 0xf4, 0xb4, 0xa9, 0xbc, 0xa1, 0xe9, 0xfb, 0xbc,
+    0x6a, 0xe0, 0xfc, 0x3d, 0x34, 0x3e, 0x4f, 0x3e, 0xc3, 0x45, 0x37, 0xbe,
+    0xbd, 0x7c, 0x13, 0x3d, 0x80, 0xc5, 0xa1, 0x3c, 0x95, 0x2d, 0x94, 0xbd,
+    0x37, 0xf2, 0xc3, 0x3d, 0xea, 0x4c, 0xb5, 0xbc, 0x3c, 0x6c, 0x0e, 0x3d,
+    0x95, 0x03, 0xea, 0x3d, 0x00, 0xcb, 0xa5, 0xbd, 0xc6, 0xdb, 0x94, 0x3d,
+    0xe0, 0x75, 0x9f, 0x3d, 0x35, 0x2b, 0xcd, 0xbc, 0x4b, 0xd6, 0x9d, 0xbc,
+    0x19, 0xe6, 0x34, 0x3e, 0x95, 0xc7, 0xa8, 0x3d, 0x39, 0xa8, 0x4d, 0x3e,
+    0x1a, 0xfe, 0x2e, 0xbe, 0x63, 0x46, 0x8c, 0x3d, 0xb2, 0x63, 0x0c, 0xbe,
+    0xcb, 0x53, 0x82, 0xbc, 0x0d, 0xcc, 0x23, 0x3d, 0x93, 0xee, 0xff, 0x3c,
+    0xa1, 0xba, 0x10, 0xbd, 0xf4, 0x9f, 0xfd, 0xbb, 0xe4, 0x63, 0x52, 0xbe,
+    0xa0, 0x3f, 0xdb, 0x3d, 0xe3, 0x8c, 0xa1, 0xbd, 0x75, 0x4f, 0x75, 0xbd,
+    0x64, 0xc3, 0xe6, 0x3c, 0x91, 0x6a, 0x96, 0x3e, 0x5b, 0x8f, 0x41, 0x3e,
+    0x0b, 0xed, 0x8d, 0x3e, 0xe1, 0xfa, 0x47, 0xbd, 0xe8, 0x79, 0xa6, 0x3d,
+    0x7f, 0x4e, 0x77, 0x3c, 0x3d, 0xbf, 0xf1, 0xba, 0x83, 0x2e, 0x92, 0x3d,
+    0x14, 0xc8, 0xc3, 0xbd, 0x42, 0x69, 0x00, 0x3d, 0x86, 0x93, 0x8b, 0x3d,
+    0xc6, 0xb3, 0x37, 0xbe, 0x5d, 0x8c, 0x27, 0xbc, 0xee, 0x70, 0xc9, 0xbc,
+    0xf7, 0xf5, 0x96, 0xbe, 0x3f, 0x20, 0xe1, 0x3d, 0x07, 0x7a, 0x8b, 0x3e,
+    0xc1, 0x46, 0x17, 0x3d, 0x39, 0xcc, 0x3c, 0x3e, 0xd4, 0xa0, 0x11, 0xbe,
+    0x24, 0x8b, 0xe3, 0x3d, 0x9b, 0x32, 0x49, 0x3d, 0xc1, 0x6f, 0x08, 0xbe,
+    0x5f, 0x21, 0x0e, 0x3d, 0xc0, 0xc1, 0x1a, 0x3d, 0x4d, 0x30, 0x8e, 0x3d,
+    0x0d, 0xdf, 0x2e, 0x3e, 0x58, 0x84, 0x4b, 0xbe, 0x96, 0x08, 0x85, 0x3d,
+    0x9a, 0x2e, 0x4f, 0x3c, 0x26, 0x4d, 0xc2, 0xbe, 0x78, 0x76, 0xea, 0x3d,
+    0xd6, 0x01, 0x0a, 0x3d, 0xf5, 0xef, 0x78, 0xbe, 0x66, 0x9b, 0x82, 0x3e,
+    0x90, 0x6b, 0x5f, 0xbe, 0x8c, 0xdf, 0xf9, 0x3d, 0x84, 0x98, 0xe9, 0xbe,
+    0x38, 0x4e, 0x73, 0x3c, 0xce, 0xac, 0x2a, 0x3e, 0x1d, 0x29, 0x80, 0x3d,
+    0x04, 0xd4, 0x94, 0xbe, 0x5d, 0x8b, 0x2f, 0xbe, 0x02, 0xde, 0xc5, 0xbe,
+    0xbc, 0x6b, 0x0e, 0xbd, 0x0e, 0x34, 0xb2, 0xbd, 0xfc, 0xc1, 0x71, 0xbe,
+    0xb3, 0x9a, 0xa4, 0xbc, 0xd2, 0x28, 0x55, 0x3e, 0x99, 0x4b, 0x15, 0x3e,
+    0x6b, 0x9e, 0x8b, 0xbe, 0x0f, 0xa7, 0xa6, 0xbe, 0x9f, 0x1c, 0x7f, 0x3d,
+    0x03, 0x97, 0xd6, 0xbe, 0xec, 0x2d, 0x7e, 0xbd, 0x5a, 0xa2, 0x2a, 0x3e,
+    0x96, 0x01, 0xf9, 0x3c, 0x02, 0xaa, 0xd3, 0xbe, 0xb1, 0x3a, 0x1e, 0xbf,
+    0x4b, 0xee, 0x50, 0x3e, 0x33, 0x84, 0x4a, 0xbe, 0xae, 0x5e, 0xca, 0xbe,
+    0x6d, 0xad, 0x3d, 0xbe, 0x35, 0xdc, 0x8b, 0xbd, 0x7f, 0x3b, 0x19, 0xbe,
+    0xf7, 0x3f, 0xe9, 0xbd, 0x39, 0x0d, 0xa9, 0xbe, 0x62, 0xaf, 0x2f, 0xbe,
+    0x50, 0x42, 0xf2, 0xbd, 0x0b, 0xa2, 0xec, 0x3d, 0x87, 0x39, 0x6e, 0xbe,
+    0xa0, 0xe2, 0x18, 0xbd, 0xd0, 0xf9, 0x9d, 0xbd, 0x44, 0xca, 0x1c, 0x3c,
+    0x22, 0x53, 0xc3, 0x3e, 0x8b, 0x89, 0x93, 0xbe, 0x5b, 0xc0, 0x2a, 0xbe,
+    0xc5, 0x08, 0xe9, 0xbc, 0x55, 0x7a, 0xbb, 0xbd, 0xa6, 0x59, 0x44, 0xbe,
+    0x92, 0x5a, 0x9e, 0xbe, 0x44, 0x21, 0x92, 0xbe, 0x97, 0x4f, 0x57, 0xbe,
+    0xe6, 0x26, 0x19, 0xbe, 0x7c, 0x1a, 0xf9, 0xbe, 0xa7, 0x2d, 0x9a, 0x3c,
+    0xa1, 0x6c, 0xb9, 0xbe, 0x17, 0x1f, 0xf0, 0xbe, 0x36, 0x41, 0x63, 0xbe,
+    0xe1, 0xfb, 0x72, 0xbd, 0x77, 0x52, 0xe0, 0x3e, 0xa8, 0x23, 0x01, 0xbf,
+    0x34, 0x23, 0x34, 0xbe, 0x86, 0x7c, 0xbe, 0xbd, 0x95, 0x31, 0xde, 0xbe,
+    0xea, 0x79, 0x04, 0xbe, 0x23, 0x82, 0xcc, 0xbe, 0xf6, 0xc9, 0x1c, 0xbf,
+    0x1a, 0x7d, 0x41, 0x3c, 0xcb, 0xc1, 0x6b, 0xbe, 0xf7, 0x47, 0x2d, 0xbf,
+    0xfe, 0x87, 0x32, 0xbd, 0xad, 0x73, 0x82, 0xbe, 0xa2, 0xd1, 0x3e, 0xbe,
+    0x4c, 0xfa, 0x08, 0xbd, 0xef, 0xe1, 0x3d, 0xbd, 0xaf, 0x4c, 0x72, 0x3e,
+    0x79, 0x46, 0xa1, 0xbe, 0x5a, 0x73, 0xe1, 0xbe, 0x0d, 0x0b, 0xc8, 0xbd,
+    0x2d, 0x22, 0x91, 0x3e, 0x3f, 0x09, 0x7d, 0xbe, 0x18, 0xe7, 0x25, 0xbe,
+    0x20, 0x50, 0x49, 0xbd, 0x08, 0x29, 0x4b, 0xbc, 0xce, 0xd6, 0x82, 0xbe,
+    0x98, 0xe0, 0xec, 0xbe, 0xcd, 0x3d, 0x2d, 0x3d, 0xf4, 0xf8, 0xb0, 0xbd,
+    0x7a, 0xee, 0xf3, 0xbd, 0x21, 0xd4, 0xd8, 0xb8, 0xf1, 0xf3, 0x81, 0xbd,
+    0x2f, 0x08, 0x0c, 0x3d, 0x80, 0x46, 0x19, 0x3e, 0x80, 0x3c, 0x9c, 0xbe,
+    0xc1, 0x46, 0xf2, 0xbd, 0x1d, 0x89, 0x3a, 0x3e, 0xa9, 0x64, 0x0a, 0xbe,
+    0xb5, 0xe3, 0x2f, 0xbd, 0x99, 0x8d, 0x55, 0xbe, 0x19, 0x11, 0xcf, 0xbd,
+    0x00, 0x5d, 0x3a, 0xbd, 0xef, 0x70, 0xb8, 0xbe, 0x2f, 0xe7, 0x4e, 0xbc,
+    0x57, 0xbc, 0xa1, 0x3e, 0xbc, 0xcd, 0xd9, 0xbc, 0x7d, 0x6a, 0xbe, 0xbd,
+    0x23, 0x6e, 0x3a, 0xbd, 0xc8, 0x65, 0x77, 0xbd, 0xb2, 0xe9, 0x1a, 0xbe,
+    0x4f, 0xb3, 0x7a, 0xbe, 0x92, 0x29, 0x3d, 0xbd, 0xc2, 0xf1, 0xac, 0x3e,
+    0x60, 0x35, 0x39, 0xbd, 0x49, 0xa6, 0xe8, 0x3c, 0x35, 0xf4, 0x04, 0x3e,
+    0x41, 0x26, 0x09, 0xbc, 0x51, 0x4e, 0xa3, 0x3e, 0x56, 0x74, 0x9a, 0xbd,
+    0xae, 0x10, 0x94, 0x3d, 0x01, 0xe9, 0x51, 0x3d, 0xc0, 0x1d, 0x45, 0x3d,
+    0x80, 0xb1, 0xb4, 0xbc, 0x9b, 0x51, 0xcb, 0xbd, 0xa8, 0xca, 0x2d, 0xbd,
+    0xa3, 0x12, 0x52, 0xbc, 0x22, 0xee, 0x05, 0xbe, 0xf0, 0x4c, 0x66, 0xbd,
+    0x12, 0x0a, 0xa3, 0x3e, 0xae, 0xe7, 0x8d, 0xbc, 0xa2, 0x32, 0x48, 0x3e,
+    0xd9, 0x25, 0xc6, 0xbd, 0x9f, 0xe7, 0x0e, 0x3e, 0x83, 0xd4, 0x20, 0x3e,
+    0xf5, 0x11, 0xf0, 0xbc, 0xd8, 0xb1, 0x8c, 0x3d, 0xad, 0x03, 0xd5, 0x3b,
+    0x87, 0x4f, 0xc0, 0x3d, 0xb5, 0x17, 0x36, 0x3d, 0x13, 0xdc, 0x08, 0xbd,
+    0x6d, 0x28, 0x23, 0x3d, 0x24, 0x48, 0x94, 0xbe, 0xd9, 0xbe, 0x4f, 0xbd,
+    0x0b, 0x18, 0x90, 0xbd, 0x04, 0xee, 0x91, 0x3e, 0x60, 0x12, 0x20, 0x3e,
+    0xa4, 0x50, 0x31, 0x3e, 0x79, 0xb4, 0x91, 0xbd, 0xb6, 0x3b, 0xfd, 0x3d,
+    0x87, 0x3a, 0xce, 0x3d, 0xab, 0x25, 0x3c, 0x3d, 0x2e, 0xba, 0xf6, 0x3b,
+    0x22, 0xbe, 0x88, 0x3e, 0x41, 0x1d, 0x91, 0x3d, 0x40, 0x2e, 0x92, 0x3d,
+    0x02, 0xd8, 0x90, 0x3d, 0x68, 0x3f, 0x58, 0xbe, 0x9b, 0xac, 0xd7, 0xbd,
+    0x18, 0x6d, 0x8a, 0xbb, 0x1f, 0xf4, 0x7d, 0xbc, 0xe9, 0x45, 0x09, 0x3e,
+    0x9f, 0xfe, 0x95, 0x3d, 0x9d, 0xf8, 0x2a, 0x3e, 0x55, 0x59, 0x18, 0xbe,
+    0xb1, 0xfd, 0x36, 0x3e, 0x2a, 0xaa, 0x3b, 0x3e, 0xa6, 0xf6, 0x39, 0xba,
+    0xa1, 0x4d, 0x74, 0xbd, 0xd9, 0xe6, 0x8d, 0x3e, 0x80, 0xa5, 0x8c, 0xbc,
+    0x61, 0x74, 0x25, 0x3d, 0x34, 0xfe, 0x71, 0x3d, 0xbc, 0xb5, 0x8f, 0xbe,
+    0x46, 0x41, 0xf4, 0xbe, 0x17, 0xf2, 0x9e, 0x3a, 0x80, 0x45, 0xce, 0x3c,
+    0xa4, 0xd1, 0x8f, 0x3e, 0xe3, 0x2e, 0x43, 0xbc, 0x3d, 0x24, 0xb4, 0x3e,
+    0xb1, 0x1e, 0x30, 0xbd, 0x98, 0x36, 0xcd, 0x3d, 0x0c, 0x48, 0x55, 0x3d,
+    0x39, 0x6f, 0x16, 0xbe, 0x1d, 0x82, 0x34, 0xbe, 0xc0, 0x7d, 0x81, 0x3e,
+    0x73, 0xe1, 0x0a, 0xbe, 0x1d, 0x95, 0x13, 0x3d, 0x52, 0x6b, 0xf5, 0xbb,
+    0x42, 0x7e, 0xbc, 0xbe, 0x0c, 0x5a, 0xcc, 0xbe, 0x6b, 0xc1, 0xc3, 0xbc,
+    0x85, 0xa2, 0xf7, 0x3c, 0x11, 0xef, 0x4b, 0x3e, 0xa4, 0xb9, 0x85, 0x3d,
+    0x1b, 0x76, 0xbe, 0x3e, 0x9f, 0x20, 0xd4, 0xbd, 0x65, 0x15, 0x2a, 0x3e,
+    0x6e, 0xdd, 0xfe, 0x3d, 0x72, 0x35, 0x21, 0xbd, 0x54, 0x0f, 0x48, 0xbe,
+    0xfe, 0xe7, 0x46, 0x3e, 0x6f, 0x80, 0x80, 0xbd, 0xf4, 0x2c, 0xc8, 0x3d,
+    0x45, 0x0b, 0x8e, 0xbd, 0xb3, 0xd3, 0x99, 0xbe, 0x12, 0x64, 0x19, 0xbe,
+    0x8e, 0x98, 0x34, 0x3c, 0x3f, 0x82, 0x5c, 0x3d, 0x05, 0x5c, 0x8d, 0x3e,
+    0x66, 0x6d, 0x1d, 0x3d, 0xe8, 0xe4, 0x79, 0x3e, 0x09, 0x06, 0x0c, 0xbd,
+    0xfc, 0x25, 0x90, 0x3d, 0xd8, 0xda, 0xf1, 0x3d, 0xbb, 0x7e, 0x18, 0x3e,
+    0xa7, 0x79, 0xe1, 0x3c, 0x68, 0x65, 0x31, 0x3d, 0x80, 0x16, 0x0f, 0xbe,
+    0xa6, 0x2a, 0x79, 0x3c, 0x4f, 0x6a, 0xf5, 0x3d, 0x2d, 0x8e, 0x83, 0xbe,
+    0xef, 0x1a, 0x86, 0xbd, 0xd7, 0x98, 0xd3, 0xbc, 0xdc, 0x99, 0xfb, 0x3d,
+    0xe8, 0x2d, 0xd0, 0x3e, 0x93, 0x99, 0x23, 0xbe, 0x36, 0xdd, 0x84, 0x3e,
+    0x41, 0x0f, 0x0a, 0x3e, 0x90, 0x4b, 0xd5, 0xbc, 0xd3, 0x40, 0xac, 0x3d,
+    0x8e, 0x08, 0x95, 0x3c, 0xcc, 0x5b, 0x6f, 0xbe, 0x02, 0x7b, 0x89, 0x3e,
+    0x6f, 0x2a, 0x55, 0xbe, 0x64, 0x57, 0xcd, 0xbd, 0x8d, 0xa2, 0xa8, 0x3d,
+    0xaa, 0x1f, 0x42, 0xbd, 0x12, 0x22, 0x5c, 0xbe, 0x9a, 0xfa, 0x43, 0xbd,
+    0xe0, 0x93, 0x81, 0xbd, 0xa6, 0x78, 0x74, 0xbd, 0x78, 0x8d, 0xd4, 0xbe,
+    0x7e, 0xc7, 0x32, 0xbf, 0x01, 0xc0, 0x06, 0xbf, 0xd8, 0xad, 0x7f, 0xbd,
+    0xd2, 0xc7, 0xa7, 0xbe, 0xbe, 0xc5, 0xff, 0xbe, 0x1e, 0xf5, 0x03, 0x3c,
+    0x96, 0xae, 0x07, 0xbf, 0x43, 0xf6, 0x60, 0xbe, 0x1c, 0x7e, 0x8c, 0xbe,
+    0xd4, 0x05, 0x51, 0xbd, 0x21, 0x90, 0x8c, 0x3e, 0x13, 0xcb, 0x85, 0xbe,
+    0xdf, 0xb4, 0x05, 0xbf, 0xc3, 0x79, 0xa7, 0x3b, 0x60, 0x54, 0x39, 0x3c,
+    0x12, 0x9a, 0x91, 0x3d, 0xe2, 0x53, 0x8d, 0x3d, 0xe8, 0x49, 0x5b, 0xbb,
+    0x28, 0xcc, 0x95, 0xbe, 0x00, 0x79, 0xd7, 0xbc, 0x2a, 0x7e, 0x80, 0x3c,
+    0xc9, 0x5f, 0x15, 0xbe, 0x4f, 0x7e, 0x4c, 0x3d, 0x4a, 0x6d, 0x58, 0xbc,
+    0x6d, 0xa2, 0x17, 0xbe, 0x1c, 0x64, 0x11, 0xbe, 0x05, 0xf2, 0x7d, 0x3e,
+    0x0f, 0x16, 0xb4, 0x3b, 0x98, 0x36, 0x88, 0x3d, 0x87, 0x91, 0xc0, 0xbe,
+    0x3a, 0xbb, 0xcd, 0x3c, 0x89, 0x5d, 0xa8, 0xbe, 0x67, 0xdb, 0xe2, 0x3d,
+    0xd0, 0xe3, 0x5f, 0xbd, 0x2b, 0xad, 0x0e, 0xbe, 0x4c, 0x1e, 0x24, 0xbe,
+    0xe7, 0x6f, 0x8f, 0xbe, 0x0a, 0xa3, 0xb5, 0x3c, 0xfe, 0xf3, 0x8b, 0xbd,
+    0x42, 0xfb, 0xc2, 0x3d, 0x7b, 0x30, 0x30, 0xbd, 0xbd, 0xbf, 0x49, 0xbe,
+    0xb6, 0x92, 0x14, 0x3e, 0x97, 0x15, 0x3d, 0xbd, 0xfa, 0x58, 0x8b, 0xbd,
+    0x88, 0x5d, 0x89, 0xbe, 0x8d, 0x80, 0x64, 0xbe, 0x3b, 0xb3, 0x6c, 0xbe,
+    0x87, 0xd4, 0xda, 0xbb, 0x92, 0x31, 0x64, 0xbe, 0x8a, 0x85, 0xf4, 0xbd,
+    0x9e, 0x89, 0xa4, 0xbd, 0x5e, 0xd7, 0x5d, 0xbe, 0xbf, 0x73, 0x45, 0xbe,
+    0x77, 0xa5, 0x52, 0x3c, 0x3a, 0x4a, 0xa7, 0xbd, 0x37, 0xfb, 0x3e, 0xbc,
+    0x24, 0x9c, 0x8e, 0xbd, 0x1e, 0x54, 0x91, 0xbc, 0x48, 0xaf, 0x45, 0xbd,
+    0x25, 0x8c, 0x1e, 0x3d, 0xf2, 0x1a, 0x92, 0xbd, 0x7b, 0xb7, 0x3a, 0x3e,
+    0x0c, 0xe9, 0x98, 0xbe, 0x70, 0x74, 0xb1, 0x3d, 0x43, 0xa9, 0x59, 0xbe,
+    0xe1, 0xe3, 0x18, 0x3d, 0xc3, 0x81, 0x90, 0xbe, 0x53, 0x0c, 0x08, 0xbe,
+    0x06, 0x59, 0xa5, 0xbd, 0x09, 0x71, 0xa2, 0x3c, 0x7c, 0x30, 0xa6, 0x3d,
+    0xd7, 0x3a, 0xab, 0x3b, 0x00, 0xf9, 0x68, 0xbd, 0x79, 0x2a, 0x19, 0xbe,
+    0x6d, 0x83, 0xb1, 0xbd, 0x20, 0xc3, 0x15, 0xbd, 0x70, 0x6e, 0x95, 0xbd,
+    0xda, 0x24, 0x53, 0xbd, 0x6c, 0x88, 0xb6, 0xbe, 0x14, 0x65, 0x5f, 0x3d,
+    0x80, 0xd5, 0x41, 0xbe, 0xaf, 0x31, 0x37, 0x3e, 0xa3, 0xad, 0x1c, 0xbd,
+    0x2f, 0xfa, 0x0b, 0xbe, 0xed, 0x59, 0x2e, 0xbe, 0x6a, 0x9d, 0x5b, 0x3e,
+    0x37, 0xde, 0x8f, 0xbd, 0x39, 0x9b, 0xb0, 0x3d, 0xaf, 0xc0, 0x03, 0xbd,
+    0x85, 0xc3, 0x04, 0x3e, 0x0f, 0x4f, 0x38, 0xbc, 0x0f, 0xb3, 0x06, 0xbe,
+    0x64, 0x20, 0x2b, 0xbe, 0x81, 0xb0, 0x1d, 0x3e, 0x57, 0x08, 0x8c, 0xbe,
+    0x62, 0xbc, 0xe7, 0x3d, 0x45, 0xdf, 0x4e, 0x3d, 0xa0, 0x7d, 0x29, 0x3d,
+    0x87, 0xd4, 0xc4, 0xbd, 0xb2, 0xa7, 0x54, 0xbd, 0x87, 0xfa, 0x8f, 0x3d,
+    0x4b, 0x05, 0x34, 0x3e, 0xef, 0x5a, 0xa9, 0x3d, 0xd5, 0xd2, 0xc9, 0xbd,
+    0x66, 0xbd, 0x16, 0xbd, 0x54, 0xcf, 0x08, 0x3e, 0x54, 0x8a, 0xf4, 0x3c,
+    0xc6, 0xcd, 0x31, 0x3e, 0xf6, 0x6d, 0x7c, 0xbc, 0x7e, 0x1e, 0x10, 0xbd,
+    0x30, 0x46, 0x1d, 0xbe, 0x5d, 0x65, 0x9c, 0xbc, 0xea, 0xbe, 0x4d, 0xbe,
+    0x76, 0xc0, 0x91, 0xbc, 0x6d, 0x84, 0x33, 0x3d, 0x63, 0xc5, 0x1a, 0xbe,
+    0x48, 0xa0, 0xaa, 0x3d, 0x2c, 0x55, 0x27, 0x3e, 0xee, 0x41, 0x94, 0x3d,
+    0x3a, 0x2b, 0xa2, 0x3c, 0x14, 0xa0, 0x15, 0xbe, 0x33, 0x75, 0xdb, 0x3b,
+    0xe2, 0x0e, 0x5d, 0xbd, 0x8d, 0x94, 0xc3, 0x3c, 0x76, 0x8e, 0xc3, 0xbe,
+    0x26, 0xd0, 0x05, 0xbe, 0xb3, 0x7c, 0xc3, 0xbd, 0x95, 0xbd, 0xa3, 0x3d,
+    0x98, 0x07, 0xc7, 0xbc, 0xbb, 0xc0, 0x77, 0xbd, 0x0b, 0x87, 0x80, 0x3c,
+    0x3b, 0xda, 0x88, 0xbd, 0xc3, 0x53, 0x03, 0x3e, 0xe3, 0x7d, 0x00, 0xbe,
+    0x57, 0xf1, 0x40, 0x3c, 0xd3, 0xf2, 0x23, 0xbd, 0xcf, 0x47, 0xcf, 0xbd,
+    0x55, 0x35, 0x0c, 0xbd, 0x3a, 0x41, 0x60, 0xbd, 0xb3, 0xc1, 0x21, 0x3e,
+    0x17, 0x79, 0x79, 0xbe, 0x4f, 0x9e, 0x0e, 0xbe, 0x81, 0x91, 0x00, 0xbe,
+    0xaf, 0x5b, 0xbc, 0xbc, 0xe2, 0xbc, 0xd0, 0xbc, 0xa5, 0xfe, 0x9d, 0x3d,
+    0x03, 0xbd, 0x93, 0xbe, 0x1e, 0x59, 0xa9, 0xbd, 0x1f, 0xea, 0xd0, 0xbd,
+    0xc9, 0x61, 0x03, 0x3e, 0xe8, 0x4c, 0x16, 0x3e, 0xe5, 0x83, 0x41, 0xbb,
+    0xd3, 0x77, 0xd2, 0xbd, 0x9e, 0x9e, 0x8d, 0xbc, 0x75, 0x41, 0x37, 0xbc,
+    0x61, 0x40, 0x70, 0xbd, 0xbf, 0xec, 0xde, 0xbd, 0x8c, 0x63, 0xee, 0xbc,
+    0xc1, 0x06, 0xe8, 0xbd, 0x15, 0x17, 0x4e, 0x3c, 0xee, 0xaa, 0xf0, 0xbd,
+    0x17, 0x12, 0x02, 0xbd, 0xbb, 0xbf, 0x67, 0xbe, 0x35, 0xcb, 0x44, 0xbe,
+    0x40, 0xaf, 0xa2, 0x3d, 0xc1, 0xe9, 0x9d, 0xbc, 0x84, 0x51, 0x61, 0x3e,
+    0xea, 0xad, 0x7f, 0x3d, 0x5f, 0x13, 0x82, 0x3c, 0x87, 0x1c, 0xf5, 0xbc,
+    0x5c, 0xc4, 0xe8, 0x3c, 0xc1, 0xa3, 0x68, 0xbd, 0x2c, 0xbf, 0x98, 0xbe,
+    0xf7, 0xa1, 0xd2, 0x3b, 0x70, 0x4c, 0x24, 0x3c, 0xe2, 0x19, 0x8b, 0xbd,
+    0xd0, 0x95, 0x17, 0xbe, 0xa4, 0x5e, 0x2b, 0xbe, 0x55, 0x11, 0x53, 0xbe,
+    0x33, 0xdc, 0x7c, 0xbe, 0xa3, 0x5f, 0x00, 0x3e, 0x41, 0x5c, 0xf2, 0x3d,
+    0x0d, 0xab, 0xe8, 0xbd, 0xdd, 0xf9, 0x24, 0x3d, 0x7f, 0x07, 0x06, 0x3e,
+    0x62, 0xd0, 0x26, 0x3d, 0x72, 0x32, 0xf9, 0xbd, 0x80, 0x7a, 0xce, 0xbd,
+    0xa8, 0x00, 0x1c, 0xbe, 0x28, 0x3a, 0x33, 0xbe, 0xef, 0xfc, 0xe6, 0xbc,
+    0x69, 0xd4, 0xe4, 0x3d, 0x9a, 0x5d, 0x33, 0xbe, 0xb1, 0x1f, 0xd9, 0x3c,
+    0xa8, 0xe8, 0x5c, 0xbd, 0xdb, 0x5e, 0x9d, 0xbe, 0x17, 0xac, 0xb8, 0xbc,
+    0x5a, 0x52, 0x4d, 0x3d, 0x3d, 0x00, 0x97, 0xbc, 0x9a, 0xaa, 0x53, 0xbe,
+    0xc0, 0x8c, 0x18, 0xbe, 0xdd, 0x93, 0x28, 0xbd, 0xa5, 0x6a, 0x97, 0x3d,
+    0xe1, 0x09, 0x55, 0xbd, 0xea, 0xdb, 0xaa, 0xbe, 0xb1, 0x0d, 0xa0, 0xbd,
+    0x33, 0xb7, 0x0c, 0xbe, 0xf3, 0x7c, 0xe5, 0x3d, 0x9e, 0x05, 0x9c, 0xbe,
+    0x52, 0x7b, 0x0e, 0xbe, 0x7d, 0x50, 0x0b, 0xbe, 0x8a, 0x99, 0x1a, 0xbe,
+    0x70, 0x90, 0xde, 0x3c, 0x73, 0x98, 0x7f, 0xbc, 0xf2, 0x72, 0x1b, 0x3e,
+    0x64, 0x71, 0x72, 0xbd, 0xd5, 0xdb, 0x7c, 0xbe, 0xb0, 0xb4, 0xc6, 0xbc,
+    0x80, 0x0a, 0x43, 0xbc, 0x7c, 0x3e, 0xa8, 0x3d, 0x9a, 0xd9, 0xda, 0xbe,
+    0xe6, 0x9d, 0x21, 0xbe, 0x5e, 0x36, 0x30, 0xbe, 0xc5, 0x14, 0x38, 0x3e,
+    0xd1, 0xa1, 0xa3, 0x3d, 0x22, 0xdd, 0x3d, 0xbe, 0xd6, 0x71, 0x99, 0xbd,
+    0xe8, 0xab, 0x8d, 0x3e, 0x02, 0x68, 0x38, 0x3a, 0x4c, 0x4f, 0xc6, 0xbd,
+    0x32, 0xb3, 0x38, 0x3e, 0x29, 0x0d, 0x23, 0xbe, 0x84, 0x39, 0xd8, 0xbd,
+    0x82, 0x93, 0xaf, 0x3d, 0x5b, 0xbb, 0x98, 0x3c, 0x33, 0x97, 0xa7, 0x3d,
+    0x25, 0x92, 0x96, 0xbe, 0x51, 0x1f, 0x79, 0xbe, 0x24, 0x78, 0x0b, 0xbe,
+    0x2b, 0x85, 0x78, 0xbe, 0xf7, 0xd1, 0x40, 0x3f, 0xa2, 0xd5, 0x9a, 0x3e,
+    0x9b, 0xff, 0x63, 0xbe, 0x0c, 0xcb, 0x08, 0xbe, 0xb8, 0x8a, 0x99, 0x3d,
+    0xd1, 0xb9, 0x25, 0x3d, 0xd0, 0x91, 0x31, 0xbd, 0x3d, 0x06, 0x88, 0x3d,
+    0x47, 0x65, 0xed, 0x3d, 0xaa, 0xc4, 0xd2, 0xbe, 0xd9, 0x6c, 0xc4, 0xbe,
+    0xa1, 0xee, 0xc2, 0x3d, 0x56, 0xbc, 0x62, 0x3e, 0x76, 0x7d, 0x41, 0xbe,
+    0x35, 0x60, 0x56, 0x3d, 0x6c, 0x50, 0x1d, 0xbe, 0xcf, 0xf4, 0x84, 0x3d,
+    0x61, 0x1d, 0x4e, 0x3e, 0xea, 0xcd, 0x36, 0xbe, 0xc4, 0xc2, 0x77, 0x3c,
+    0xa1, 0x6a, 0x14, 0xbd, 0xf2, 0x01, 0xcb, 0x3d, 0x73, 0xe6, 0x0d, 0x3e,
+    0xf7, 0x54, 0x4d, 0xbd, 0x1b, 0x0b, 0xf0, 0x3d, 0xd8, 0x6b, 0x4c, 0x3e,
+    0x05, 0xd9, 0xbf, 0xbe, 0x29, 0x5a, 0x0a, 0x3e, 0x54, 0x15, 0xaa, 0x3d,
+    0x6a, 0xd0, 0xad, 0xbe, 0xb5, 0xa9, 0x7e, 0x3d, 0x5b, 0x9b, 0xa2, 0xbd,
+    0x66, 0xeb, 0x1e, 0x3e, 0xdf, 0xfa, 0xb3, 0x3d, 0x01, 0xb9, 0xeb, 0xbd,
+    0xed, 0x60, 0x7b, 0x3d, 0xdd, 0xc3, 0xab, 0xbc, 0x9c, 0xf9, 0xa7, 0x3d,
+    0x7c, 0x05, 0xd7, 0x3d, 0xf3, 0x62, 0xb7, 0xbe, 0x9d, 0x89, 0x7b, 0x3d,
+    0x2e, 0x25, 0x20, 0x3c, 0xdb, 0xe4, 0x3d, 0xbe, 0x0e, 0xee, 0x3d, 0x3d,
+    0x9c, 0xd3, 0xdc, 0x3c, 0x00, 0x4d, 0x38, 0xbe, 0xd6, 0x3c, 0x1e, 0xbd,
+    0x8b, 0x41, 0x14, 0x3e, 0x56, 0x6b, 0x21, 0x3e, 0xfc, 0x0b, 0xb4, 0xbe,
+    0x8c, 0xee, 0x24, 0xbe, 0x92, 0xf6, 0x48, 0x3e, 0x01, 0xa4, 0x4e, 0x3c,
+    0x86, 0xf3, 0x0b, 0x3d, 0xde, 0x84, 0x1d, 0x3e, 0xe1, 0xa9, 0x00, 0xbf,
+    0xa5, 0x03, 0xa7, 0x3d, 0x2b, 0xf4, 0x32, 0xbd, 0x7b, 0xed, 0x30, 0xbd,
+    0xd7, 0xb6, 0x2a, 0xbe, 0xa4, 0xa7, 0xaa, 0xbd, 0x76, 0xa8, 0x59, 0x3e,
+    0x63, 0x2b, 0x3f, 0xbd, 0x6a, 0xb1, 0x8d, 0x3c, 0x1c, 0xf7, 0x90, 0x3e,
+    0xad, 0xa8, 0xb2, 0xbd, 0x5e, 0xe2, 0x80, 0xbe, 0xb1, 0x3b, 0x67, 0xbd,
+    0xaa, 0x58, 0x88, 0xbc, 0xce, 0x93, 0x66, 0xbe, 0x3d, 0xe4, 0x85, 0x3d,
+    0xfc, 0x7d, 0xd8, 0xbe, 0x73, 0x8d, 0xeb, 0xbc, 0xf7, 0x65, 0x6c, 0xbe,
+    0xc5, 0xd4, 0x51, 0x3d, 0x62, 0x0d, 0x4b, 0xbe, 0xb0, 0xc3, 0x1f, 0xbd,
+    0x87, 0x98, 0x69, 0x3d, 0x4d, 0x80, 0xd2, 0xbc, 0x9c, 0x0c, 0x38, 0x3e,
+    0x89, 0x8b, 0x81, 0x3e, 0x55, 0x79, 0xea, 0x3d, 0xd0, 0xf2, 0xc8, 0xbd,
+    0xb4, 0x9c, 0xa9, 0x3d, 0x7f, 0x05, 0x83, 0xbd, 0x05, 0x77, 0xb0, 0x3b,
+    0x62, 0xbd, 0x4c, 0x3d, 0x49, 0x44, 0xc9, 0xbd, 0xdc, 0x3f, 0xb9, 0x3d,
+    0x56, 0xc5, 0xe3, 0xbe, 0x5e, 0x61, 0x26, 0x3d, 0x31, 0x0e, 0x12, 0xbc,
+    0x2b, 0xe8, 0x93, 0x3b, 0xc0, 0x15, 0x10, 0xbe, 0xdb, 0x6b, 0xd4, 0xbd,
+    0xa7, 0xf7, 0x97, 0x3d, 0x61, 0xa0, 0x08, 0x3e, 0x54, 0x8a, 0x65, 0x3e,
+    0x58, 0xcc, 0x1d, 0xbe, 0x33, 0x44, 0x63, 0x3c, 0x69, 0x14, 0xf6, 0xbd,
+    0xff, 0x62, 0xd4, 0x3d, 0x05, 0x2b, 0x8b, 0x3d, 0x22, 0x2c, 0x13, 0xbe,
+    0x52, 0x0c, 0x0f, 0x3d, 0x48, 0x96, 0xc7, 0xbd, 0x4d, 0x35, 0xab, 0x3d,
+    0x20, 0xbe, 0xeb, 0x3d, 0x64, 0xf0, 0x86, 0xbd, 0xcb, 0x71, 0xa5, 0xbd,
+    0xd5, 0x2a, 0x95, 0x3c, 0x57, 0xaa, 0x06, 0x3e, 0x5e, 0x14, 0x3c, 0x3e,
+    0xcb, 0xf8, 0x11, 0x3e, 0x34, 0x34, 0x6d, 0x3d, 0xa4, 0x66, 0x0b, 0x3c,
+    0x52, 0x5b, 0x9d, 0xbb, 0x7a, 0x84, 0x90, 0xbd, 0x11, 0xc8, 0x4a, 0x3e,
+    0x75, 0xf9, 0x8a, 0xbd, 0x83, 0x65, 0x22, 0xbd, 0x64, 0x5d, 0x15, 0xbd,
+    0x91, 0x22, 0x5e, 0xbc, 0xbd, 0x09, 0x12, 0x3d, 0xfe, 0x19, 0xeb, 0x3c,
+    0xa3, 0x78, 0x2e, 0xbd, 0x62, 0xf3, 0x7f, 0x3c, 0x05, 0x5e, 0x03, 0x3e,
+    0x6c, 0x9e, 0xb7, 0x3d, 0xb1, 0x03, 0x4d, 0x3e, 0x66, 0x72, 0xfd, 0xbd,
+    0xc5, 0x12, 0x11, 0x3d, 0x99, 0x39, 0xab, 0x3c, 0xc8, 0xf5, 0x5a, 0xbd,
+    0x33, 0x84, 0x98, 0x3c, 0x0a, 0xd6, 0x99, 0x3d, 0x4f, 0xcb, 0xb5, 0xbb,
+    0x00, 0x39, 0x41, 0x3e, 0xdc, 0x20, 0x27, 0xbc, 0x34, 0xdb, 0xbb, 0x3d,
+    0x33, 0x3c, 0x26, 0x3d, 0x3a, 0x99, 0x2b, 0x3d, 0xa4, 0x31, 0x76, 0x3d,
+    0x50, 0x79, 0x91, 0x3d, 0x80, 0x50, 0xc3, 0x3d, 0x5c, 0x3d, 0x39, 0x3e,
+    0xc9, 0x7b, 0xed, 0xbc, 0xf2, 0xbb, 0x88, 0x3d, 0x76, 0xb2, 0x5e, 0xbe,
+    0x0c, 0x80, 0xb9, 0xbd, 0x10, 0x91, 0x12, 0x3d, 0xca, 0xfb, 0x47, 0x3d,
+    0xc4, 0x43, 0xc4, 0xbd, 0xd3, 0x36, 0x82, 0xbd, 0x03, 0x96, 0xda, 0xbd,
+    0xac, 0x46, 0xbf, 0x3d, 0x26, 0x6b, 0x26, 0x3d, 0x02, 0xa1, 0x0f, 0xbe,
+    0x68, 0x09, 0x42, 0x3e, 0x32, 0xf7, 0x5d, 0x3e, 0xad, 0xa5, 0xec, 0x3d,
+    0x97, 0x8a, 0xb1, 0x3e, 0xc7, 0x0e, 0xee, 0xbd, 0x12, 0x2d, 0xf0, 0x3d,
+    0x05, 0x02, 0xae, 0xbe, 0x63, 0x2c, 0xe3, 0x3c, 0x1c, 0x86, 0x47, 0x3e,
+    0xe5, 0xa6, 0x46, 0x3e, 0x6b, 0x88, 0x8c, 0xbd, 0x6b, 0x84, 0x28, 0x3e,
+    0xad, 0xc4, 0x79, 0xbe, 0x77, 0x46, 0xfc, 0xbc, 0x40, 0x27, 0x28, 0x3e,
+    0x2c, 0x30, 0xe0, 0xbd, 0xe5, 0xb6, 0xd8, 0x3c, 0x68, 0xac, 0x90, 0x3e,
+    0x14, 0x75, 0x1c, 0xbe, 0x0d, 0xc9, 0x9e, 0xbe, 0x4b, 0xec, 0x4f, 0xbe,
+    0x1b, 0x78, 0xcb, 0x3d, 0xf6, 0xeb, 0xd3, 0xbe, 0x38, 0xab, 0xed, 0xbd,
+    0x86, 0x1a, 0xf4, 0x3d, 0x43, 0x3f, 0xd2, 0x3c, 0xbf, 0x91, 0xc3, 0xbe,
+    0x76, 0x19, 0x6a, 0x3d, 0xd9, 0x29, 0x35, 0xbe, 0x3b, 0x83, 0xe7, 0xba,
+    0x6e, 0xcd, 0x2e, 0xbf, 0xec, 0x5f, 0x25, 0x3e, 0xb6, 0x01, 0x9f, 0x3d,
+    0xd6, 0x6e, 0x37, 0x3e, 0x09, 0x82, 0x49, 0xbe, 0x31, 0x65, 0x67, 0xbf,
+    0x1a, 0xf7, 0xfc, 0xbe, 0xa9, 0x80, 0x7a, 0xbc, 0x09, 0x7e, 0xa0, 0xbe,
+    0x62, 0x1c, 0x38, 0xbd, 0xce, 0x50, 0x9e, 0x3d, 0x69, 0x4b, 0x47, 0xbe,
+    0x53, 0x6d, 0x8f, 0xbe, 0xee, 0x17, 0xa4, 0xbe, 0x42, 0x75, 0x52, 0x3c,
+    0xe5, 0x67, 0x1a, 0xbe, 0x34, 0xb7, 0x53, 0xbf, 0xc7, 0xdf, 0xb3, 0xbd,
+    0x6a, 0x60, 0x00, 0xbe, 0x32, 0xd9, 0xc0, 0x3d, 0x15, 0x83, 0x9c, 0x3b,
+    0x79, 0x94, 0x9a, 0xbe, 0xac, 0x0b, 0xf1, 0xbe, 0x13, 0xd8, 0x11, 0xbe,
+    0x25, 0x26, 0xb3, 0xbe, 0x13, 0xce, 0xc7, 0x3d, 0x3a, 0xb6, 0x17, 0x3e,
+    0xb7, 0xd8, 0x6f, 0x3d, 0x1f, 0xc1, 0xe2, 0xbe, 0x5a, 0x21, 0x26, 0xbe,
+    0xbd, 0x0b, 0x39, 0x3e, 0xf3, 0xb2, 0x87, 0x3d, 0x47, 0x17, 0xf0, 0xbe,
+    0x09, 0xb9, 0xd8, 0x3d, 0x3c, 0x61, 0x81, 0x3e, 0x21, 0xe5, 0xba, 0xbd,
+    0x26, 0x80, 0x0c, 0x3e, 0x95, 0x3e, 0x9a, 0xbd, 0x45, 0xc1, 0x31, 0xbe,
+    0x3d, 0xa0, 0x14, 0xbd, 0x02, 0x58, 0x32, 0xbd, 0x59, 0xc7, 0xff, 0x3c,
+    0x15, 0x05, 0xe2, 0x3e, 0x86, 0x1d, 0x0f, 0xbe, 0xa0, 0x40, 0xbc, 0xbe,
+    0x68, 0x5e, 0x44, 0x3e, 0x99, 0x1f, 0xfb, 0x3d, 0xfe, 0x0e, 0x21, 0xbe,
+    0x13, 0xf4, 0x67, 0xbe, 0x39, 0x82, 0xe1, 0xbc, 0xf4, 0x0d, 0xdb, 0x3d,
+    0xd6, 0x05, 0x98, 0xbe, 0xd5, 0x96, 0x43, 0xbe, 0x0d, 0x8b, 0x04, 0xbe,
+    0xdb, 0x1b, 0x9b, 0xbd, 0xa7, 0xb8, 0x86, 0xbe, 0x93, 0xa7, 0x48, 0x3e,
+    0xff, 0x83, 0x70, 0xbe, 0x81, 0x92, 0xb6, 0x3e, 0x00, 0xde, 0x2d, 0xbe,
+    0xcb, 0xea, 0xd1, 0xbe, 0x26, 0x5f, 0x89, 0x3d, 0xa9, 0x78, 0x94, 0xbc,
+    0x46, 0xe4, 0xc0, 0xbc, 0xf8, 0x7a, 0x7f, 0xbe, 0x7f, 0x6c, 0xda, 0xbd,
+    0xf0, 0x26, 0x7a, 0x3d, 0xc8, 0xe1, 0x68, 0xbe, 0xf9, 0x78, 0x22, 0xbd,
+    0xc6, 0xe1, 0x7e, 0xbe, 0x48, 0xa7, 0xa1, 0xbd, 0x79, 0xdc, 0x63, 0xbc,
+    0x3b, 0x33, 0x25, 0x3e, 0xe9, 0x5d, 0x8a, 0xbe, 0x68, 0x6a, 0x42, 0xbe,
+    0xb6, 0x15, 0x24, 0xbd, 0x8a, 0xab, 0xec, 0xbe, 0x57, 0x0d, 0xf0, 0x3d,
+    0xea, 0x0e, 0xd0, 0xbd, 0x6e, 0xe6, 0xd4, 0xbd, 0xc1, 0x3c, 0x84, 0xbe,
+    0xfe, 0xc7, 0x0f, 0xbe, 0xe0, 0x6e, 0x3b, 0xbd, 0xb5, 0x5b, 0x75, 0xbe,
+    0x57, 0xb2, 0x70, 0xbe, 0x9a, 0xb9, 0x45, 0x3c, 0xc0, 0xe1, 0xd2, 0xbc,
+    0x71, 0xc3, 0xbc, 0xbd, 0xc1, 0x3a, 0x0b, 0x3d, 0x0d, 0x92, 0xd4, 0xbc,
+    0x90, 0x72, 0x14, 0xbd, 0x19, 0x0f, 0x79, 0x3c, 0x8d, 0xb0, 0xc9, 0xbe,
+    0xec, 0x8b, 0xe0, 0x3d, 0x61, 0xeb, 0x92, 0xbd, 0xcc, 0x90, 0xf1, 0xbd,
+    0xc4, 0x38, 0x92, 0xbd, 0x89, 0xa0, 0x33, 0xbc, 0x4e, 0xe5, 0x4d, 0xbd,
+    0x7a, 0xd2, 0x83, 0xbd, 0x61, 0x9a, 0xd0, 0x3d, 0x33, 0xca, 0x94, 0xbd,
+    0x8c, 0x1a, 0xb3, 0x3d, 0x4e, 0x06, 0x51, 0x3e, 0xd5, 0x7a, 0xb3, 0x3d,
+    0x6a, 0x1d, 0x01, 0xbe, 0x3a, 0xc0, 0xc4, 0x3e, 0xe6, 0xf7, 0xee, 0xbd,
+    0xe2, 0x8e, 0x1f, 0xbe, 0xc2, 0xfd, 0x96, 0x3e, 0x6c, 0xe1, 0x62, 0xbe,
+    0xff, 0xd8, 0x13, 0xbe, 0xbd, 0xb6, 0x81, 0xbd, 0xd8, 0xdc, 0x4a, 0x3b,
+    0xe5, 0xe2, 0xcb, 0x3a, 0x49, 0x4c, 0xc3, 0xbd, 0xfb, 0x38, 0xa1, 0x3d,
+    0x96, 0x5e, 0xa7, 0xbc, 0xcb, 0xde, 0x23, 0x3d, 0x0f, 0x0d, 0x8f, 0x3e,
+    0x9b, 0x54, 0x04, 0x3e, 0x7d, 0x1b, 0x78, 0xbd, 0x06, 0x2f, 0x2a, 0x3e,
+    0x3e, 0x9a, 0xce, 0xbe, 0xa2, 0xa0, 0xa7, 0xbe, 0x58, 0x1f, 0x5e, 0x3e,
+    0x23, 0xe4, 0xc5, 0x3c, 0x25, 0xc4, 0x1f, 0xbe, 0x9a, 0x80, 0x56, 0x3b,
+    0x69, 0xb9, 0x17, 0x3e, 0x4d, 0xd6, 0xf1, 0x3d, 0xf3, 0x63, 0x4a, 0xbe,
+    0x2b, 0xa0, 0xcd, 0x3d, 0x14, 0xc3, 0x28, 0xbe, 0x61, 0x71, 0x05, 0x3e,
+    0x6a, 0xb8, 0x07, 0x3d, 0x55, 0xf9, 0x2b, 0x3e, 0x39, 0x86, 0x0f, 0xbd,
+    0x38, 0xd5, 0x61, 0x3e, 0xdb, 0x24, 0xa6, 0xbe, 0x20, 0xde, 0x2c, 0xbe,
+    0x15, 0x3f, 0xc8, 0x3d, 0x9e, 0x9b, 0x45, 0x3d, 0xed, 0x09, 0x22, 0xbe,
+    0x2a, 0x2c, 0x33, 0xbe, 0x37, 0x8a, 0x54, 0xbd, 0x48, 0x0c, 0x84, 0xba,
+    0x2f, 0x23, 0x12, 0xbe, 0xc4, 0x7c, 0x48, 0x3e, 0x82, 0xd5, 0x95, 0xbe,
+    0x79, 0xda, 0x06, 0x3c, 0x2d, 0xf1, 0x33, 0x3d, 0x55, 0x60, 0x77, 0xbd,
+    0x1d, 0x61, 0x05, 0xbe, 0x81, 0xcb, 0x8d, 0x3e, 0x06, 0x08, 0x01, 0xbe,
+    0x95, 0xc7, 0x51, 0xbd, 0xf0, 0x72, 0x99, 0xbd, 0x06, 0xb2, 0xc6, 0x3d,
+    0xfb, 0x0f, 0xca, 0x3d, 0x8e, 0x73, 0x7e, 0x3b, 0xfd, 0x34, 0x67, 0xbd,
+    0x70, 0x32, 0xdd, 0xbc, 0x2d, 0x99, 0xea, 0xbe, 0xc2, 0x73, 0x64, 0xbd,
+    0x00, 0x94, 0x0f, 0xbe, 0x75, 0x7a, 0xba, 0xbd, 0x85, 0xc4, 0x2a, 0x3d,
+    0x76, 0x88, 0x30, 0x3d, 0x53, 0x72, 0x9d, 0xbd, 0x30, 0x57, 0x81, 0x3e,
+    0x6e, 0x96, 0x95, 0xbd, 0x3c, 0x35, 0x38, 0x3d, 0x9b, 0x98, 0x6f, 0xbb,
+    0xf0, 0x78, 0x27, 0x3d, 0xdc, 0xa7, 0x13, 0x3d, 0x6d, 0x26, 0x01, 0x3e,
+    0x0c, 0x56, 0x0d, 0x3c, 0x34, 0x73, 0x26, 0x3d, 0x9c, 0xd1, 0xe5, 0xbe,
+    0x3a, 0x3d, 0x80, 0xbd, 0x12, 0x3e, 0xf8, 0xbd, 0xaf, 0x15, 0x82, 0xbd,
+    0x50, 0x20, 0x49, 0xbe, 0x69, 0x45, 0x3e, 0x3e, 0xaa, 0xf1, 0x2d, 0xbe,
+    0x46, 0x98, 0x5a, 0xbe, 0x3a, 0x09, 0xb8, 0xbe, 0x10, 0xa7, 0x6b, 0xbe,
+    0xf4, 0x2a, 0x71, 0x3e, 0x34, 0xde, 0x1a, 0xbd, 0x14, 0x25, 0xc5, 0x3d,
+    0xac, 0x10, 0x0e, 0xbd, 0xdd, 0xd6, 0x6c, 0x3e, 0x4a, 0x10, 0x6d, 0x3d,
+    0x8a, 0x02, 0x45, 0xbe, 0x79, 0x00, 0x82, 0x3d, 0xa0, 0x2d, 0x6d, 0x3e,
+    0xcf, 0x3b, 0xaa, 0xbd, 0x62, 0xbc, 0x3b, 0x3d, 0x1d, 0x84, 0x85, 0x3e,
+    0x74, 0x75, 0x49, 0xbe, 0x72, 0x3d, 0x81, 0xbe, 0xd5, 0xb1, 0x13, 0xbe,
+    0x15, 0x6e, 0x95, 0xbe, 0x6f, 0x24, 0x95, 0x3e, 0x07, 0xc3, 0x22, 0x3e,
+    0x71, 0x27, 0x1c, 0xbd, 0xf8, 0x56, 0x55, 0xbe, 0x4d, 0x4b, 0x50, 0xbe,
+    0x4e, 0x36, 0x61, 0xbd, 0xb4, 0x21, 0x08, 0xbf, 0x18, 0x79, 0xe1, 0xbc,
+    0x05, 0x0e, 0x58, 0x3e, 0xaf, 0xa3, 0xff, 0xbd, 0x20, 0x4e, 0x78, 0xbd,
+    0xce, 0x7a, 0xfe, 0xbc, 0x8d, 0xca, 0x02, 0xbe, 0x0c, 0x26, 0xb8, 0xbe,
+    0x64, 0x00, 0xab, 0xbe, 0xd2, 0x25, 0x2b, 0xbe, 0x1f, 0x46, 0x4e, 0xbd,
+    0x30, 0x12, 0xa0, 0xbd, 0xea, 0xae, 0x29, 0x3e, 0x7f, 0xe0, 0x8f, 0xbe,
+    0x9e, 0x18, 0x2b, 0x3d, 0xe2, 0xa8, 0x14, 0xbe, 0x23, 0xe3, 0x17, 0x3c,
+    0xee, 0xc3, 0x21, 0x3e, 0x43, 0xdf, 0xce, 0x3d, 0x12, 0x1c, 0xd2, 0xbb,
+    0x7f, 0xd5, 0xca, 0x3d, 0x3c, 0x5c, 0xf1, 0x3d, 0x95, 0x71, 0x44, 0x3d,
+    0x93, 0xf3, 0xba, 0xbe, 0x68, 0x93, 0x5c, 0xbe, 0x10, 0x67, 0xf1, 0xbe,
+    0x2a, 0x15, 0x18, 0x3d, 0x73, 0xe2, 0x82, 0x3b, 0xd6, 0x91, 0xbc, 0x3d,
+    0x52, 0xcb, 0xb9, 0xbe, 0xcb, 0x4a, 0xd7, 0x3c, 0x6c, 0x72, 0xa3, 0xbe,
+    0xf7, 0xb6, 0xed, 0xbc, 0xf2, 0x4b, 0x78, 0x3a, 0x22, 0x3b, 0x92, 0xbe,
+    0x93, 0xd9, 0x90, 0x3e, 0x45, 0x47, 0x15, 0xbe, 0x15, 0xb5, 0x29, 0xbc,
+    0x12, 0x00, 0xe3, 0xbd, 0xfb, 0xb2, 0xa7, 0xbe, 0x88, 0x19, 0x9b, 0xbe,
+    0x18, 0x47, 0x29, 0xbe, 0x65, 0xe8, 0xec, 0xbb, 0xd7, 0x95, 0x5e, 0xbe,
+    0x44, 0xf0, 0xae, 0xbd, 0x5e, 0xb2, 0x63, 0xbe, 0x8f, 0x8c, 0xda, 0xbd,
+    0x21, 0xec, 0xce, 0x3c, 0x61, 0xec, 0xc9, 0xbd, 0xc4, 0xbc, 0xae, 0xbe,
+    0x55, 0x77, 0xa7, 0xbe, 0x5b, 0x6f, 0x43, 0xbe, 0x09, 0x7c, 0x72, 0x3d,
+    0x07, 0x9c, 0x9d, 0xbe, 0xa3, 0x3f, 0x50, 0x3c, 0x1c, 0xa9, 0x0c, 0xbe,
+    0x67, 0x2d, 0x3b, 0xbd, 0xc4, 0xed, 0x10, 0x3d, 0x50, 0xa2, 0xd4, 0xbe,
+    0x47, 0x29, 0x0c, 0xbe, 0x9d, 0x91, 0x12, 0x3d, 0x88, 0xdd, 0x67, 0xb8,
+    0x38, 0x07, 0x2f, 0xbe, 0x1a, 0x06, 0x37, 0xbc, 0x62, 0x98, 0xa3, 0xbe,
+    0x98, 0xcc, 0x81, 0x3e, 0x6e, 0x97, 0xd6, 0xbd, 0x95, 0x8b, 0xdd, 0xbd,
+    0x79, 0xa4, 0xf3, 0x3c, 0xb5, 0xc2, 0x11, 0xbe, 0xd3, 0xec, 0x66, 0xbe,
+    0xa3, 0x56, 0x80, 0xbc, 0x01, 0x00, 0x63, 0x3c, 0x56, 0x89, 0x87, 0x3b,
+    0xe2, 0x59, 0x39, 0xbe, 0x4d, 0x6d, 0xfb, 0x3d, 0xa0, 0x39, 0xcc, 0x3d,
+    0x69, 0x45, 0x8e, 0xbd, 0x5f, 0x84, 0xd6, 0x3d, 0xa1, 0x84, 0xbd, 0x3d,
+    0x10, 0x07, 0x38, 0xbe, 0x9e, 0xfa, 0x10, 0x3e, 0xb7, 0xa7, 0x29, 0xbe,
+    0x97, 0x88, 0x6a, 0xbd, 0xbe, 0x44, 0x49, 0xbc, 0x6d, 0xaf, 0x52, 0xbe,
+    0x1b, 0x47, 0x60, 0xbd, 0x2d, 0x4d, 0x09, 0x3e, 0xaf, 0x9b, 0x4d, 0x3d,
+    0x0e, 0x61, 0x8d, 0x3b, 0x79, 0xf0, 0x43, 0x3e, 0xf1, 0xe4, 0x6e, 0x3e,
+    0x30, 0x82, 0xb4, 0x3d, 0xb1, 0x9a, 0x1f, 0xbe, 0xf0, 0x6d, 0xe7, 0x3d,
+    0xa4, 0x03, 0x72, 0x3d, 0xf6, 0x2b, 0xa5, 0x3b, 0xd0, 0x1c, 0xd9, 0x3d,
+    0x46, 0x22, 0xaa, 0xbe, 0xd3, 0x08, 0x54, 0xbd, 0xcd, 0xb1, 0xc2, 0xba,
+    0x07, 0xf2, 0xb6, 0xbd, 0xfe, 0x75, 0x73, 0x3c, 0xe4, 0x9b, 0x0a, 0xbd,
+    0xc7, 0x90, 0x9e, 0xbd, 0xf1, 0xea, 0xa7, 0x3c, 0xc8, 0x26, 0x52, 0x3e,
+    0xfc, 0x27, 0xc7, 0x3e, 0x97, 0x6f, 0xa9, 0x3d, 0x7f, 0xcb, 0x24, 0xbe,
+    0x7d, 0x06, 0xb8, 0xbd, 0xa3, 0x40, 0x5e, 0x3d, 0x27, 0x47, 0x2b, 0xbe,
+    0x44, 0x61, 0x8d, 0x3e, 0x04, 0x26, 0x8c, 0xbe, 0x5c, 0x59, 0xfa, 0xbd,
+    0xcf, 0xf0, 0xa6, 0xbd, 0xc6, 0xdd, 0x9c, 0x3d, 0x44, 0xe8, 0x70, 0xbe,
+    0xda, 0x14, 0x31, 0xbe, 0x38, 0xdb, 0xd2, 0xbd, 0x50, 0xa7, 0x19, 0x3d,
+    0xf0, 0x2d, 0x3b, 0x3e, 0xfd, 0x9c, 0xe7, 0x3e, 0xb3, 0x62, 0xbb, 0x3d,
+    0xe6, 0xe4, 0x95, 0x3c, 0xa4, 0x26, 0x10, 0x3e, 0xf6, 0x96, 0x4d, 0x3d,
+    0xd8, 0x93, 0x34, 0xbd, 0x87, 0x9d, 0x81, 0xbd, 0x4f, 0xb4, 0x85, 0xbd,
+    0x7a, 0xb6, 0x55, 0xbe, 0xbd, 0xfd, 0x84, 0x3d, 0x53, 0xa2, 0x15, 0x3e,
+    0x10, 0x1e, 0x93, 0xbe, 0x4e, 0xab, 0xc5, 0xbd, 0x55, 0xd9, 0xdf, 0x3d,
+    0xca, 0xf4, 0x6d, 0x3d, 0xdb, 0xfe, 0x84, 0x3e, 0x95, 0x98, 0x36, 0x3e,
+    0x93, 0x3e, 0xfd, 0x3c, 0x61, 0x70, 0x04, 0xbd, 0x46, 0x72, 0xe4, 0x3c,
+    0xa9, 0x0e, 0x58, 0xbe, 0xe5, 0x86, 0x33, 0x3d, 0x2d, 0x46, 0x03, 0xbe,
+    0x94, 0xdc, 0xf0, 0xbd, 0xaa, 0x48, 0x27, 0xbe, 0x34, 0xb0, 0x63, 0xbd,
+    0x56, 0x1f, 0xb7, 0xbb, 0xcd, 0xce, 0xde, 0xbd, 0xc7, 0xb0, 0x4d, 0x3d,
+    0x65, 0x11, 0x01, 0x3e, 0xf5, 0xab, 0x8a, 0x3d, 0x34, 0xe1, 0x10, 0xbe,
+    0x49, 0x06, 0xff, 0xbd, 0xb6, 0xe4, 0x69, 0xbe, 0xc8, 0x48, 0x78, 0xbd,
+    0x1a, 0x63, 0x2b, 0x3d, 0x7f, 0x73, 0x35, 0xbe, 0x1e, 0x01, 0xf5, 0xbd,
+    0x89, 0xad, 0x6b, 0xbe, 0x6a, 0x85, 0x09, 0x3d, 0x74, 0x69, 0xf6, 0xbd,
+    0x62, 0x39, 0xc9, 0xbe, 0x72, 0x00, 0x8e, 0x3d, 0xfc, 0xa5, 0x9c, 0xbe,
+    0x53, 0x4e, 0x95, 0xbd, 0xc7, 0x7d, 0xae, 0xbb, 0xa7, 0x61, 0xb9, 0xba,
+    0xb8, 0x9a, 0xce, 0xbe, 0x80, 0xcc, 0xd2, 0xbe, 0xcf, 0xeb, 0x5c, 0xbe,
+    0x69, 0xa4, 0x0e, 0xbd, 0x12, 0xa9, 0x1e, 0xbe, 0xc9, 0x6d, 0x50, 0xbe,
+    0xfc, 0xd5, 0xe6, 0xbd, 0x9f, 0x5c, 0xd0, 0xbe, 0x33, 0xcf, 0xbb, 0xbd,
+    0x70, 0x66, 0xa5, 0xbe, 0x6b, 0x92, 0xfa, 0xbe, 0x27, 0xa0, 0x4b, 0x3d,
+    0x5d, 0x26, 0x75, 0xbe, 0x46, 0x63, 0x8c, 0xbe, 0x57, 0x65, 0xaf, 0xbd,
+    0xea, 0xd8, 0x1c, 0x3d, 0xcd, 0x4f, 0xc2, 0xbe, 0x87, 0xf0, 0x0b, 0xbf,
+    0x25, 0xdf, 0xd9, 0xbe, 0x5d, 0x6a, 0x4e, 0x3d, 0xaa, 0x06, 0xf1, 0xbd,
+    0x31, 0xe2, 0x60, 0xbe, 0x72, 0x6f, 0xf2, 0xbc, 0xb9, 0x4a, 0x9e, 0xbe,
+    0xfb, 0xb0, 0xd7, 0xbd, 0x9f, 0xfa, 0x96, 0xbe, 0xc8, 0x14, 0x13, 0xbf,
+    0xaa, 0x6a, 0x39, 0xbc, 0x7f, 0x24, 0x5a, 0xbd, 0x12, 0x36, 0xa9, 0x3d,
+    0x9e, 0x86, 0x60, 0x3d, 0xbc, 0x33, 0x85, 0x3c, 0x4c, 0x72, 0xf2, 0xbe,
+    0x39, 0x9f, 0xba, 0xbe, 0xc2, 0xa5, 0x68, 0xbe, 0x35, 0x06, 0xa1, 0x3c,
+    0xb5, 0xad, 0x53, 0x3d, 0x3e, 0xd0, 0xa2, 0xbd, 0xfd, 0x9b, 0xaa, 0x3c,
+    0x0a, 0x8f, 0xa8, 0x3e, 0x24, 0xca, 0x09, 0xbd, 0x14, 0x2c, 0x4c, 0xbe,
+    0x56, 0x1b, 0xae, 0xbd, 0x9d, 0x45, 0x60, 0xbc, 0xb7, 0x16, 0x5f, 0xbe,
+    0x57, 0x8f, 0x8d, 0x3e, 0x5d, 0xab, 0x50, 0xbd, 0xcc, 0x11, 0xb0, 0x3c,
+    0x52, 0x21, 0xd1, 0xbe, 0xfb, 0xff, 0xb2, 0xbc, 0x9b, 0xd1, 0x14, 0xbe,
+    0xfc, 0x2c, 0x7e, 0xbd, 0x39, 0x94, 0xfb, 0xb9, 0xca, 0x93, 0xca, 0xbd,
+    0x0a, 0xa7, 0xfc, 0xbd, 0xf6, 0xd3, 0x7f, 0x3e, 0xd3, 0xda, 0x0b, 0xbe,
+    0x8e, 0xf9, 0x99, 0xbd, 0xc5, 0xe4, 0x73, 0xbd, 0xba, 0x7d, 0xae, 0xbd,
+    0x5b, 0x0e, 0x24, 0x3d, 0xcf, 0x46, 0x90, 0x3d, 0x42, 0x80, 0x88, 0xbc,
+    0x93, 0x44, 0xa0, 0x3c, 0x02, 0x2c, 0x53, 0xbb, 0xb3, 0x0d, 0xdf, 0x3e,
+    0xf3, 0x46, 0xc8, 0x3c, 0xc5, 0x84, 0xa4, 0xbd, 0xf8, 0x13, 0xad, 0x3b,
+    0x6d, 0x41, 0xb5, 0x3c, 0xeb, 0x74, 0x5f, 0xbe, 0x67, 0x82, 0xd0, 0x3e,
+    0x3c, 0xd0, 0x9c, 0xbd, 0xa0, 0x97, 0x48, 0x3e, 0x1f, 0x0a, 0x36, 0xbe,
+    0xf6, 0x51, 0x4a, 0xbd, 0x4f, 0xe5, 0x20, 0xbe, 0xa1, 0x8e, 0x0d, 0xbd,
+    0x70, 0x13, 0x9c, 0xbd, 0xe8, 0x43, 0xb1, 0xbb, 0xc3, 0x59, 0xa4, 0x3e,
+    0x06, 0xd6, 0x10, 0x3e, 0x8e, 0x8e, 0x88, 0x3e, 0xe8, 0xa8, 0xe1, 0xbd,
+    0xb3, 0x6e, 0x6b, 0xbe, 0x28, 0x96, 0x7f, 0xbe, 0xde, 0xcc, 0xab, 0xbe,
+    0xb0, 0xc9, 0xa6, 0xbe, 0x60, 0x75, 0xc5, 0x3d, 0x39, 0x36, 0xea, 0x3d,
+    0x53, 0x43, 0xea, 0xbe, 0xb0, 0x9d, 0x9e, 0x3d, 0x06, 0x01, 0x7c, 0xbe,
+    0xb8, 0x49, 0x51, 0xbe, 0x7c, 0xad, 0xe8, 0x3c, 0xe2, 0x4e, 0xc6, 0x3c,
+    0x9f, 0x1f, 0x56, 0xbe, 0x5a, 0xd7, 0x79, 0x3d, 0x62, 0x73, 0x81, 0xbe,
+    0xe6, 0xc1, 0xd1, 0x3c, 0x4e, 0x1c, 0x18, 0x3f, 0x2a, 0xe7, 0xa4, 0x3e,
+    0xf6, 0x80, 0x24, 0x3e, 0x06, 0xf7, 0x00, 0x3f, 0x34, 0x8e, 0x91, 0xbe,
+    0x17, 0x9f, 0xf3, 0x3c, 0xc5, 0x69, 0x15, 0x3f, 0x18, 0xd8, 0xab, 0x3d,
+    0x98, 0xd2, 0x55, 0x3d, 0xf1, 0x5f, 0xdb, 0x3e, 0x21, 0x48, 0x05, 0xbf,
+    0xf7, 0x44, 0x65, 0xbd, 0xbd, 0x27, 0xf6, 0x3e, 0x4e, 0x58, 0xaa, 0x3e,
+    0x1f, 0x87, 0xd5, 0x3b, 0x4e, 0x6f, 0xa8, 0xbd, 0x1f, 0x08, 0xd0, 0xbe,
+    0x06, 0xd5, 0x76, 0x3d, 0x15, 0xf8, 0x48, 0x3e, 0x38, 0xe0, 0x0e, 0xbf,
+    0xaa, 0x5c, 0xaf, 0xbd, 0xfd, 0xe2, 0xbd, 0xbe, 0x54, 0x5b, 0x2f, 0xbf,
+    0x37, 0xe6, 0xd9, 0x3d, 0xd4, 0x6d, 0x89, 0xbe, 0xe4, 0xa0, 0xc4, 0x3c,
+    0x76, 0x37, 0xe2, 0xbe, 0xd2, 0xf0, 0x98, 0x3c, 0xe7, 0x7a, 0xcc, 0x3e,
+    0xaa, 0x3f, 0x11, 0xbf, 0x86, 0x4a, 0x3e, 0xbf, 0xae, 0xa4, 0xe5, 0x3b,
+    0x62, 0x98, 0x31, 0xbe, 0xff, 0x4e, 0x80, 0x3d, 0x79, 0xe7, 0xba, 0xbd,
+    0x93, 0x5b, 0xca, 0xbe, 0x3c, 0x97, 0x97, 0x3d, 0xf1, 0x41, 0x2d, 0xbf,
+    0xc9, 0xa4, 0x4b, 0xbf, 0x9e, 0x2a, 0xde, 0x3d, 0xad, 0x4e, 0x84, 0xbe,
+    0x01, 0x2f, 0x5c, 0x3b, 0xab, 0x1c, 0x45, 0xbe, 0x32, 0x41, 0x2b, 0x3c,
+    0x73, 0x53, 0x50, 0x3e, 0xe2, 0xc4, 0x06, 0xbf, 0x62, 0xef, 0x13, 0xbf,
+    0x6e, 0x3d, 0x86, 0x3c, 0x6d, 0x9a, 0xee, 0xbd, 0x49, 0xbc, 0xa8, 0x3d,
+    0x68, 0xfe, 0x11, 0x3e, 0xde, 0x38, 0xfa, 0xbe, 0xbc, 0x06, 0x81, 0xbc,
+    0xb7, 0x85, 0xc3, 0xbe, 0x26, 0x4f, 0x24, 0xbf, 0xe8, 0x94, 0xbb, 0xbb,
+    0x16, 0x0e, 0x9b, 0xbe, 0xca, 0xe8, 0x52, 0xbe, 0xa2, 0xb7, 0x53, 0xbd,
+    0x04, 0xef, 0x52, 0x3b, 0x08, 0xad, 0x1e, 0xbd, 0x72, 0xba, 0x01, 0xbf,
+    0xd7, 0x49, 0x87, 0xbe, 0xf3, 0x7b, 0xf2, 0x3c, 0xfc, 0x8e, 0x84, 0x3d,
+    0xcf, 0xcc, 0x3a, 0x3d, 0x35, 0xe0, 0xe4, 0xbc, 0x88, 0x88, 0x05, 0x3d,
+    0x8d, 0x2f, 0xab, 0x3d, 0x33, 0x4f, 0x49, 0xbe, 0xc6, 0x68, 0xfc, 0xbe,
+    0xfc, 0x90, 0x2f, 0xbb, 0x20, 0x38, 0xc1, 0xbd, 0x19, 0x53, 0x27, 0xbd,
+    0xa2, 0xee, 0x09, 0xbe, 0x5e, 0x6f, 0x6b, 0x3d, 0x1e, 0xf9, 0x0d, 0x3d,
+    0x76, 0xd5, 0xdd, 0xbe, 0xe3, 0xa8, 0xac, 0xbe, 0xaa, 0xad, 0x0d, 0x3d,
+    0x91, 0x62, 0xce, 0xbd, 0x83, 0x52, 0xbb, 0xbc, 0xfd, 0x4e, 0x18, 0x3e,
+    0x98, 0x73, 0x8f, 0xbd, 0x55, 0x1f, 0x55, 0x39, 0xb9, 0x85, 0x67, 0xbe,
+    0x1a, 0xcf, 0xd3, 0xbe, 0x97, 0x08, 0x14, 0xbd, 0xca, 0xf1, 0x11, 0xbe,
+    0x8b, 0x4b, 0x39, 0xbe, 0xe8, 0xe0, 0xd8, 0xbc, 0x0a, 0xf4, 0x26, 0xbc,
+    0x0d, 0xda, 0x20, 0xbd, 0x52, 0xe7, 0xb1, 0xbe, 0xfc, 0x69, 0xd3, 0xbe,
+    0x6f, 0x10, 0xc8, 0x3c, 0xb8, 0xb5, 0x20, 0xbe, 0xcd, 0xd6, 0x72, 0x3d,
+    0xb4, 0x1b, 0xca, 0xbd, 0x1c, 0xa7, 0xfb, 0xbd, 0xf4, 0x0c, 0x91, 0xbd,
+    0x95, 0xec, 0xda, 0xbc, 0x00, 0xff, 0x2b, 0xbe, 0x17, 0x4b, 0x45, 0xbd,
+    0x45, 0x0c, 0x85, 0xbe, 0x97, 0x38, 0x88, 0xbd, 0x76, 0xfb, 0x0a, 0xbe,
+    0xe3, 0xb5, 0xdf, 0x3c, 0x80, 0x2c, 0xd5, 0x3d, 0xc7, 0xb8, 0xb5, 0xbe,
+    0x94, 0x5d, 0xba, 0xbe, 0x1e, 0x73, 0xba, 0x3c, 0xe6, 0x81, 0x25, 0xbe,
+    0xff, 0x3d, 0x49, 0x3d, 0xc1, 0x10, 0x7d, 0x3e, 0x62, 0x4f, 0x55, 0xbe,
+    0x3e, 0x29, 0x2f, 0xbe, 0x8c, 0x31, 0x6c, 0xbe, 0xb3, 0x0f, 0xdf, 0xbe,
+    0x63, 0x87, 0xb2, 0xbc, 0xf0, 0x8a, 0x3f, 0xbe, 0x24, 0xe6, 0x92, 0x3c,
+    0xb3, 0x20, 0xa7, 0xbd, 0x89, 0xa6, 0xb4, 0x3b, 0x73, 0xe7, 0xfc, 0x3d,
+    0xe7, 0x9d, 0xbc, 0xbe, 0x09, 0x19, 0x97, 0xbe, 0x7c, 0xfc, 0x78, 0x3d,
+    0xae, 0x7b, 0x23, 0xbe, 0xe5, 0x86, 0x3a, 0x3d, 0x07, 0x56, 0x87, 0xbd,
+    0x8e, 0x90, 0x87, 0xbe, 0xc4, 0x20, 0x4e, 0xbe, 0x22, 0x0e, 0x38, 0xbc,
+    0xe7, 0x44, 0xae, 0xbe, 0xa8, 0xd5, 0x1a, 0x3d, 0x93, 0x4e, 0x93, 0xbe,
+    0x00, 0xb9, 0x2f, 0xbc, 0x30, 0xcf, 0xb5, 0xbe, 0xfb, 0x54, 0x8b, 0xbb,
+    0x65, 0xd0, 0x00, 0x3e, 0xd5, 0x24, 0xda, 0xbe, 0xcf, 0xc4, 0xd8, 0xbe,
+    0x34, 0x93, 0xb9, 0x3c, 0x02, 0x97, 0x8a, 0xbe, 0x8f, 0xd4, 0x22, 0x3d,
+    0x9a, 0xfc, 0xa8, 0xbe, 0x3c, 0x29, 0x5c, 0xbd, 0xab, 0x08, 0xf5, 0x3c,
+    0x45, 0x1e, 0x42, 0xbe, 0xc4, 0x6e, 0xc9, 0xbe, 0x0a, 0xb0, 0xce, 0x3c,
+    0x7c, 0xf9, 0x94, 0xbe, 0x09, 0xeb, 0x1f, 0xbe, 0x82, 0x6a, 0x9e, 0xbe,
+    0x29, 0x1f, 0x9e, 0x3c, 0x00, 0x19, 0x0d, 0x3d, 0x47, 0x54, 0x77, 0xbe,
+    0x31, 0x68, 0x44, 0xbf, 0x0a, 0x4c, 0x14, 0x3d, 0xdd, 0x90, 0xe0, 0x3d,
+    0xd4, 0xc2, 0x8e, 0x3d, 0x64, 0xfb, 0xa7, 0xbe, 0x76, 0x35, 0x63, 0xbe,
+    0xdf, 0xe6, 0x18, 0xbe, 0x82, 0x50, 0x08, 0xbf, 0xd5, 0x97, 0x1b, 0xbf,
+    0x28, 0x8d, 0xce, 0xbc, 0x09, 0xa0, 0xaa, 0xbe, 0xbc, 0x7f, 0xce, 0xbd,
+    0xae, 0x93, 0x7a, 0xbe, 0x6c, 0x9f, 0xec, 0x3b, 0xf2, 0x7c, 0x89, 0x3d,
+    0xa7, 0x8e, 0x9b, 0xbe, 0x7d, 0x7e, 0x01, 0xbf, 0x0d, 0xae, 0x7e, 0x3d,
+    0xfc, 0xe0, 0x09, 0xbd, 0xf4, 0x70, 0x5a, 0x3d, 0xd3, 0x33, 0xf9, 0xbd,
+    0xc5, 0xf8, 0xbb, 0xbe, 0xd5, 0xc6, 0x8d, 0xbe, 0xc9, 0xb1, 0x5e, 0xbe,
+    0x93, 0x6e, 0xd6, 0xbe, 0xae, 0x9a, 0xca, 0x3c, 0x59, 0x9d, 0x62, 0xbe,
+    0x95, 0x6f, 0x62, 0xbe, 0xa8, 0x20, 0xac, 0xbe, 0x40, 0xb2, 0xb0, 0x3c,
+    0x08, 0xf4, 0xed, 0x3d, 0x39, 0x3c, 0x42, 0xbe, 0x31, 0x1c, 0xe3, 0xbe,
+    0x1a, 0x1a, 0x0e, 0x3c, 0x59, 0x79, 0xe9, 0x3b, 0xa2, 0x22, 0x10, 0xbe,
+    0x30, 0xaa, 0xa5, 0x3d, 0x71, 0x8f, 0x06, 0xbf, 0x16, 0x24, 0xa0, 0xbe,
+    0x67, 0xb2, 0xc9, 0xbe, 0x52, 0x7b, 0x0f, 0xbf, 0xf9, 0xe9, 0x4f, 0x3d,
+    0xac, 0x49, 0x17, 0xbe, 0xae, 0x48, 0x68, 0xbd, 0x7b, 0x00, 0xb6, 0xbe,
+    0xeb, 0xb6, 0x68, 0x3c, 0xc9, 0xc1, 0xe9, 0x3d, 0xed, 0x2d, 0xc3, 0xbe,
+    0xa0, 0x62, 0x41, 0xbf, 0x62, 0x09, 0x25, 0x3d, 0x9b, 0x06, 0x56, 0xbe,
+    0x9f, 0x8b, 0x17, 0x3e, 0x34, 0xe7, 0x3c, 0x3c, 0x8a, 0xbb, 0x17, 0x3e,
+    0x9d, 0xe9, 0x15, 0x3c, 0x24, 0x65, 0x04, 0xbf, 0x3e, 0x1f, 0x0c, 0xbe,
+    0xd7, 0x33, 0x83, 0x3c, 0xc7, 0x55, 0xa0, 0xbc, 0xb4, 0xc6, 0xbf, 0xbd,
+    0x5a, 0xb1, 0xfb, 0xbd, 0x64, 0x9e, 0x0b, 0x3d, 0x1a, 0xbc, 0x5f, 0x3e,
+    0x3b, 0x78, 0xc8, 0x3d, 0x60, 0x10, 0xd6, 0xbd, 0x2d, 0xe4, 0x7d, 0x3d,
+    0x9d, 0x40, 0x9b, 0xbe, 0xeb, 0x0c, 0xd1, 0xbc, 0x58, 0x00, 0x2e, 0xbe,
+    0xb9, 0x75, 0xff, 0x3e, 0x4d, 0x20, 0xab, 0x3e, 0x26, 0x60, 0xed, 0xbd,
+    0xc3, 0xb9, 0xdd, 0xbe, 0xb5, 0xda, 0xa8, 0x3e, 0x33, 0x68, 0xa2, 0x3d,
+    0x43, 0xd0, 0xa9, 0x3c, 0xf1, 0x7f, 0x8b, 0x3b, 0x3f, 0x6b, 0xbf, 0x3e,
+    0x6c, 0x69, 0x34, 0xbe, 0x79, 0x58, 0xf3, 0xbe, 0xf1, 0x2e, 0xdb, 0xbd,
+    0x62, 0x48, 0x98, 0x3e, 0x59, 0x6d, 0x75, 0xbd, 0x37, 0x2c, 0x5a, 0x3c,
+    0x13, 0xe7, 0x30, 0xbd, 0xa1, 0xac, 0xfc, 0x3c, 0xb2, 0xd8, 0x2a, 0x3e,
+    0x22, 0x4b, 0xb6, 0xbd, 0xd1, 0xa3, 0x81, 0xbe, 0x73, 0x91, 0x15, 0xbd,
+    0xe7, 0xfa, 0x76, 0x3d, 0x60, 0xf1, 0xdb, 0x3c, 0x07, 0xcf, 0xc2, 0x3c,
+    0x7d, 0xaa, 0x8a, 0x3c, 0xa2, 0x13, 0xb1, 0x3e, 0xfa, 0x90, 0xa3, 0xbe,
+    0x83, 0x37, 0x1f, 0x3e, 0xd3, 0xe8, 0xb0, 0x3d, 0xda, 0x49, 0x68, 0xbe,
+    0x9d, 0x3c, 0x6b, 0x3e, 0xbc, 0x5a, 0x1d, 0xbc, 0xc9, 0x02, 0x25, 0x3d,
+    0x9e, 0x16, 0x15, 0x3e, 0xb5, 0x40, 0x17, 0xbe, 0x19, 0x17, 0x1a, 0x3e,
+    0x17, 0x95, 0x8c, 0xbd, 0xe5, 0x42, 0x0a, 0xbd, 0x61, 0x6f, 0xc9, 0x3d,
+    0x16, 0x3c, 0x58, 0xbe, 0x19, 0x15, 0x7a, 0xbe, 0x2a, 0xb1, 0xa2, 0x3e,
+    0x52, 0xa2, 0x5c, 0xbd, 0x92, 0x5f, 0x11, 0x3e, 0xb5, 0x1f, 0x2c, 0xbd,
+    0xa5, 0xf8, 0x20, 0xbe, 0x0b, 0x75, 0xc9, 0x3d, 0x1e, 0x00, 0x82, 0x3e,
+    0xfe, 0xa1, 0x61, 0x3d, 0xd5, 0x3f, 0x41, 0x3e, 0x09, 0x65, 0xcb, 0x3d,
+    0xc5, 0x75, 0x1e, 0x3c, 0xbb, 0x0f, 0x76, 0xbe, 0xbf, 0xb6, 0xa0, 0x3d,
+    0xdf, 0x04, 0xae, 0x3d, 0x9b, 0xda, 0x06, 0xbf, 0x38, 0x8c, 0x84, 0xbe,
+    0x92, 0x5c, 0x0c, 0x3e, 0xb8, 0x67, 0xfc, 0xbd, 0xb7, 0xec, 0x95, 0xbd,
+    0x13, 0x50, 0x15, 0xbe, 0x75, 0xb1, 0x8d, 0xbd, 0x0a, 0x71, 0x1e, 0x3e,
+    0x7d, 0x79, 0xf8, 0x3d, 0x40, 0x97, 0x77, 0x3d, 0x3b, 0xe7, 0x9d, 0xbe,
+    0xb2, 0x88, 0xaa, 0x3c, 0x79, 0x16, 0x44, 0x3d, 0xb9, 0xdc, 0x02, 0xbe,
+    0xe1, 0xcb, 0xa3, 0xbc, 0x29, 0xda, 0xff, 0xbb, 0xb2, 0x4a, 0xb8, 0xbe,
+    0xd9, 0x15, 0xfb, 0xbe, 0xf4, 0x0b, 0x43, 0xbd, 0x3e, 0xc0, 0x17, 0xbd,
+    0x04, 0xa3, 0xae, 0xbc, 0xd0, 0xff, 0xdf, 0xbe, 0x72, 0xcb, 0x0f, 0x3e,
+    0x03, 0x79, 0xff, 0x3d, 0x91, 0x65, 0x40, 0x3e, 0xb6, 0x77, 0xbb, 0xbd,
+    0xa5, 0x8f, 0x15, 0xbf, 0x30, 0x61, 0x1e, 0xbe, 0x8b, 0xa9, 0xc2, 0x3d,
+    0x6e, 0xdf, 0x06, 0xbe, 0x1e, 0x89, 0xdf, 0x3d, 0x8a, 0x8f, 0xf2, 0x3c,
+    0xfd, 0xcd, 0x43, 0xbc, 0x9c, 0x4c, 0xa2, 0xbe, 0x30, 0xb4, 0x19, 0xbe,
+    0xb3, 0x94, 0x45, 0x3d, 0x6c, 0xaf, 0xc1, 0xbd, 0xd4, 0x46, 0x12, 0xbf,
+    0xbb, 0xde, 0x5b, 0x3e, 0x30, 0xb4, 0xb0, 0x3d, 0xce, 0x2e, 0x84, 0x3d,
+    0x3e, 0x76, 0xce, 0xbd, 0x82, 0x5a, 0x93, 0xbe, 0x2d, 0x20, 0xc8, 0x3d,
+    0x00, 0x41, 0x07, 0xbd, 0xb1, 0x5a, 0x36, 0xbe, 0x40, 0x9e, 0xce, 0xbc,
+    0x04, 0x6a, 0x0a, 0x3e, 0x1d, 0xc6, 0x05, 0xbe, 0x48, 0xb8, 0x22, 0xbd,
+    0xf7, 0x49, 0x95, 0xbe, 0xfb, 0xbd, 0xca, 0xbd, 0x25, 0x34, 0x31, 0xbe,
+    0xc3, 0xe3, 0x0f, 0xbf, 0xcf, 0xb1, 0x32, 0x3d, 0x73, 0x0d, 0x86, 0x3d,
+    0x05, 0xbb, 0xcf, 0xbc, 0x4a, 0xd5, 0x8d, 0xbd, 0xb3, 0x6e, 0x0d, 0xbf,
+    0x64, 0xec, 0x2f, 0xbe, 0x52, 0xf8, 0x15, 0xbe, 0xd3, 0xa2, 0x80, 0x3d,
+    0xac, 0x9f, 0x6a, 0xbd, 0x5e, 0xc7, 0xf9, 0xbd, 0xc8, 0x90, 0x81, 0xbd,
+    0xe7, 0x9d, 0x49, 0x3d, 0xd4, 0x4a, 0x06, 0xbe, 0x43, 0x04, 0x00, 0x3d,
+    0x27, 0xa5, 0xc2, 0xbe, 0x90, 0x68, 0xfa, 0xbe, 0xe9, 0xb1, 0x1c, 0x3e,
+    0x6c, 0x40, 0x94, 0xbd, 0xee, 0xe3, 0x39, 0x3c, 0x40, 0xd9, 0xd7, 0xbd,
+    0xe8, 0x19, 0x46, 0xbe, 0x72, 0x04, 0xaa, 0xbe, 0x80, 0xd8, 0xd3, 0xbd,
+    0xe6, 0x8e, 0x04, 0xbe, 0x80, 0xea, 0xf8, 0xbc, 0xa2, 0x17, 0x51, 0x3e,
+    0xd9, 0x2b, 0x59, 0x3d, 0x0c, 0x8f, 0x75, 0xbb, 0xdd, 0x53, 0x82, 0xbd,
+    0x06, 0xc3, 0x50, 0x3d, 0xf2, 0x35, 0x54, 0xbe, 0xce, 0xf5, 0xea, 0xbe,
+    0x22, 0x31, 0x61, 0xbc, 0x03, 0x0f, 0x4b, 0xbe, 0x1a, 0x66, 0x1e, 0x3d,
+    0xb2, 0x6b, 0x22, 0xbe, 0xd7, 0x96, 0xe0, 0xbe, 0x08, 0x6f, 0xb3, 0xbe,
+    0x30, 0x04, 0xf9, 0xbd, 0xea, 0x57, 0x23, 0xbe, 0x79, 0x3f, 0x8b, 0xbd,
+    0xea, 0xa2, 0x62, 0x3d, 0xdc, 0x5b, 0xfb, 0x3c, 0x91, 0x51, 0x17, 0x3d,
+    0xe7, 0x8d, 0x58, 0xbe, 0xab, 0x9a, 0x4f, 0xbd, 0x7c, 0xab, 0xad, 0xbe,
+    0xa8, 0x66, 0x06, 0xbf, 0xbf, 0xfb, 0x9e, 0x3c, 0x48, 0xf1, 0x1e, 0xbd,
+    0xad, 0xaf, 0x94, 0x3c, 0xf7, 0xdc, 0x81, 0xbe, 0xaa, 0x56, 0xda, 0xbe,
+    0x41, 0xca, 0x1a, 0xbe, 0xaf, 0x06, 0xab, 0xbe, 0x9e, 0xb7, 0xa3, 0xbd,
+    0x08, 0x79, 0xb6, 0x3d, 0x19, 0x31, 0x04, 0xbd, 0x90, 0xb5, 0xed, 0xbc,
+    0x5d, 0xf7, 0x8a, 0x3c, 0x16, 0x86, 0x37, 0xbe, 0xa6, 0x12, 0xcd, 0x3d,
+    0x82, 0x94, 0x42, 0xbe, 0xd7, 0x01, 0xad, 0xbe, 0xd1, 0xec, 0xb4, 0x3d,
+    0x43, 0xf6, 0x2d, 0x3c, 0xeb, 0x80, 0xcc, 0x3d, 0x65, 0x14, 0x85, 0xbe,
+    0x83, 0xc6, 0xdb, 0xbe, 0xf4, 0xd0, 0x8b, 0xbe, 0x97, 0x01, 0xfd, 0xbc,
+    0x99, 0xdc, 0x9e, 0xbe, 0xf9, 0xc5, 0x09, 0x3e, 0x12, 0x1a, 0xc0, 0x3d,
+    0x95, 0x7d, 0x2c, 0x3e, 0x40, 0x16, 0x88, 0xbd, 0xd0, 0x85, 0x3b, 0xbe,
+    0xea, 0xef, 0x51, 0x3d, 0x63, 0xb3, 0x5b, 0xbe, 0x50, 0x49, 0xc5, 0xbe,
+    0xc3, 0xa6, 0x2c, 0x3e, 0x3e, 0x64, 0x47, 0x3b, 0xfd, 0xe2, 0x22, 0x3d,
+    0x1e, 0xf2, 0x52, 0xbe, 0x9d, 0x8f, 0xd6, 0xbe, 0x39, 0xf8, 0x0e, 0xbe,
+    0x60, 0x67, 0x72, 0xbd, 0x05, 0x60, 0x0a, 0xbe, 0x43, 0x19, 0x8f, 0x3c,
+    0x84, 0xe1, 0x3d, 0x3e, 0x7d, 0x8b, 0x24, 0x3d, 0x17, 0xf5, 0x3e, 0xbd,
+    0xab, 0xf2, 0xe3, 0xbd, 0x8c, 0xc2, 0x9c, 0x3d, 0xf6, 0x55, 0x21, 0xbd,
+    0x64, 0x77, 0x8a, 0xbe, 0xca, 0x5e, 0x97, 0xbb, 0x57, 0x52, 0x08, 0x3e,
+    0x82, 0xb2, 0xd1, 0x3d, 0x7c, 0x53, 0x0b, 0xbd, 0x9b, 0x78, 0x9e, 0x3c,
+    0xe5, 0xe2, 0xa5, 0xbd, 0x1a, 0x31, 0xbe, 0xbd, 0x37, 0x59, 0x8a, 0xbd,
+    0xb5, 0x48, 0x5e, 0x3e, 0x6f, 0xd3, 0xe2, 0x3d, 0x7f, 0x74, 0x26, 0xbd,
+    0x8d, 0xfe, 0x8c, 0xbe, 0x35, 0xbc, 0x75, 0xbe, 0xdf, 0xa9, 0x07, 0x3d,
+    0x2c, 0x90, 0xf4, 0x3d, 0x63, 0xb0, 0x9e, 0x3d, 0x38, 0x84, 0x78, 0x3e,
+    0xa4, 0x5c, 0xd1, 0x3e, 0x0c, 0x9d, 0xa8, 0xbd, 0x08, 0x4f, 0x8e, 0x3e,
+    0x12, 0xc1, 0x96, 0xbe, 0x9a, 0x4c, 0xee, 0xbb, 0xe2, 0x69, 0x9a, 0x3e,
+    0x71, 0x14, 0xc7, 0xbe, 0xc8, 0xaa, 0xe9, 0xbc, 0x1a, 0x81, 0xae, 0x3e,
+    0x0d, 0x0f, 0x0b, 0xbf, 0x9f, 0x40, 0x15, 0xbf, 0x99, 0xec, 0x9f, 0x3e,
+    0xfe, 0x81, 0x42, 0x3e, 0x74, 0xb0, 0x42, 0xbe, 0x25, 0x93, 0x96, 0xbe,
+    0xee, 0x36, 0x36, 0x3d, 0xa5, 0x9f, 0xa2, 0x3d, 0x07, 0x8e, 0x3e, 0x3e,
+    0x89, 0x2a, 0x9f, 0x3d, 0xe5, 0x83, 0x58, 0xbd, 0x72, 0xf7, 0x01, 0xbe,
+    0x9c, 0x8b, 0x67, 0xbc, 0x28, 0xc1, 0xf7, 0xbd, 0x3b, 0x5d, 0x86, 0xbd,
+    0x8c, 0x2d, 0x34, 0xbd, 0x6f, 0x8f, 0xde, 0xbe, 0x8a, 0xf3, 0xf2, 0xbe,
+    0x3b, 0x49, 0xb9, 0x3e, 0xfb, 0x63, 0x83, 0xbd, 0xda, 0x2f, 0x93, 0xbe,
+    0xd6, 0x44, 0xa8, 0xbe, 0x3d, 0xec, 0xff, 0x3b, 0xb5, 0xd6, 0x04, 0xbd,
+    0xe3, 0xc9, 0x19, 0x3d, 0x6c, 0xdb, 0x87, 0x3d, 0x70, 0xbc, 0x9e, 0xbc,
+    0x76, 0x16, 0x24, 0x3e, 0x63, 0xb6, 0x6e, 0x3d, 0xb7, 0x58, 0x9a, 0xbe,
+    0x79, 0xd0, 0x21, 0x3e, 0x16, 0xe7, 0x22, 0x3b, 0x4c, 0x44, 0x4a, 0xbd,
+    0x78, 0x34, 0xcc, 0xbe, 0xfd, 0x92, 0x82, 0x3d, 0x3d, 0x22, 0x9b, 0xbd,
+    0x01, 0xba, 0xcb, 0xbd, 0xf0, 0xac, 0xed, 0xbc, 0x92, 0x5f, 0x09, 0x3e,
+    0x76, 0x6e, 0x97, 0xbd, 0x6e, 0x7d, 0x18, 0x3d, 0xed, 0x55, 0xf8, 0x3c,
+    0x01, 0xaa, 0x4a, 0x3e, 0x49, 0xf7, 0x77, 0x3e, 0xa9, 0xd2, 0xd4, 0x3d,
+    0x6b, 0x7a, 0xac, 0xbd, 0xb1, 0x67, 0x90, 0xbd, 0x30, 0xf5, 0x94, 0xbd,
+    0xdd, 0xb9, 0xd7, 0x3d, 0xc3, 0x2e, 0x04, 0xbe, 0xfd, 0xc1, 0xa8, 0x3d,
+    0x80, 0xf0, 0x41, 0xbe, 0xae, 0x23, 0x8e, 0x3c, 0x03, 0x8a, 0x05, 0xbc,
+    0x50, 0x33, 0x26, 0x3e, 0xdd, 0x04, 0xca, 0x3d, 0x11, 0xd9, 0x82, 0xbe,
+    0xc9, 0x2b, 0x42, 0x3e, 0xbb, 0x94, 0x72, 0x3e, 0xdb, 0x8f, 0xa5, 0x3c,
+    0xde, 0x89, 0xd0, 0x3d, 0x24, 0xb6, 0x86, 0xbe, 0x1b, 0xa7, 0x54, 0xbe,
+    0x48, 0x2e, 0xd1, 0xbd, 0x5d, 0x47, 0x00, 0x3e, 0x8c, 0xd7, 0xa9, 0x3d,
+    0xc9, 0x22, 0x9b, 0xbd, 0x9c, 0xa0, 0xe8, 0x3d, 0xc0, 0x7b, 0x1c, 0xbd,
+    0x27, 0xd2, 0xdd, 0x3d, 0x24, 0x8e, 0xa0, 0x3d, 0x62, 0xf4, 0x53, 0xbc,
+    0x16, 0x38, 0xba, 0xbe, 0x7c, 0x6e, 0xe3, 0x3d, 0x7b, 0x0d, 0x44, 0x3e,
+    0xae, 0x3c, 0x8d, 0x3d, 0x1b, 0x4e, 0x4f, 0x3d, 0x73, 0x75, 0x4d, 0xbe,
+    0x98, 0x9b, 0x44, 0xbe, 0xd2, 0xe8, 0x74, 0xbe, 0x8f, 0xd9, 0x56, 0x3e,
+    0x74, 0x2a, 0x4e, 0x3e, 0xda, 0xb1, 0xd8, 0x3d, 0x38, 0xd7, 0x85, 0x3d,
+    0x09, 0xbe, 0x3c, 0x3d, 0x24, 0x28, 0x6d, 0x3c, 0xbb, 0xcd, 0x1f, 0x3d,
+    0xfb, 0x77, 0x3f, 0xbe, 0x33, 0xde, 0xbe, 0xbc, 0x5c, 0xa1, 0xb3, 0x3c,
+    0x6b, 0xd1, 0x9d, 0x3d, 0xf1, 0x7b, 0x3b, 0x3e, 0x4c, 0x2c, 0x75, 0x3d,
+    0x70, 0xef, 0xb2, 0xbe, 0xb5, 0x17, 0xe0, 0xbd, 0x61, 0x87, 0x65, 0xbd,
+    0x3a, 0x09, 0x3e, 0x3e, 0x48, 0x49, 0xbf, 0xbb, 0x2f, 0x30, 0xaa, 0x3d,
+    0x65, 0x75, 0x07, 0x3d, 0xbd, 0xc2, 0x1f, 0x3e, 0xe9, 0x8c, 0xf8, 0xbd,
+    0x7c, 0x97, 0xe8, 0x3c, 0x9d, 0xc7, 0x51, 0xbb, 0x40, 0x8d, 0x89, 0xbd,
+    0xd5, 0x27, 0x87, 0x3c, 0x92, 0xa1, 0x65, 0xbd, 0xf7, 0x7a, 0xa6, 0xbc,
+    0xec, 0x7c, 0xd6, 0x3c, 0xf7, 0x47, 0x14, 0xbe, 0xd5, 0x0d, 0x92, 0x3c,
+    0x3a, 0x11, 0x01, 0xbb, 0x9a, 0x3b, 0x03, 0x3e, 0xe0, 0xde, 0x22, 0xbe,
+    0x37, 0xad, 0xe4, 0xbc, 0x4e, 0xb3, 0x03, 0xbb, 0x3f, 0xe8, 0xf6, 0xbd,
+    0x87, 0x10, 0xf7, 0xbb, 0xe2, 0xc3, 0x9a, 0xbd, 0x0e, 0x42, 0x0b, 0x3e,
+    0x92, 0x26, 0x9d, 0xbe, 0x4f, 0xe3, 0x32, 0xbc, 0x26, 0x0a, 0x03, 0x3e,
+    0xf3, 0x60, 0xa7, 0x3c, 0x0c, 0x24, 0x05, 0x3d, 0x3e, 0xc8, 0x94, 0xbe,
+    0x50, 0x31, 0x02, 0xbe, 0xff, 0xd4, 0x69, 0xbd, 0x1d, 0x42, 0x72, 0x3d,
+    0xe7, 0x8c, 0x7f, 0xbe, 0x33, 0x87, 0x16, 0x3d, 0x93, 0x2c, 0xa2, 0xbd,
+    0x88, 0xf0, 0xe3, 0xbb, 0xa8, 0x96, 0x84, 0x3d, 0xda, 0xc0, 0x40, 0x3e,
+    0x8a, 0x68, 0x58, 0x3d, 0xad, 0xb0, 0x19, 0xbe, 0x97, 0x4e, 0x26, 0xbc,
+    0x1c, 0x26, 0xe6, 0x3d, 0x48, 0x68, 0x55, 0x3d, 0xc6, 0xe3, 0xc3, 0x3d,
+    0xa7, 0xe6, 0xc8, 0xbb, 0xf3, 0x00, 0x99, 0xbd, 0x2d, 0x63, 0xda, 0x3c,
+    0xb4, 0xbd, 0x81, 0x3e, 0xdf, 0xb9, 0x92, 0xbe, 0x48, 0x3a, 0xb2, 0xbd,
+    0x9f, 0xcb, 0xd2, 0x3d, 0x0b, 0x38, 0x97, 0x3c, 0xe2, 0x95, 0xb7, 0xbc,
+    0xf6, 0x82, 0x13, 0x39, 0xc0, 0x98, 0x1a, 0x3d, 0xc2, 0xf4, 0x51, 0x3d,
+    0x0d, 0xeb, 0xf5, 0x3c, 0x1d, 0xda, 0x52, 0x3d, 0xad, 0xb0, 0x77, 0x3d,
+    0xca, 0x58, 0x51, 0x3d, 0x5a, 0x2d, 0xb0, 0x3c, 0xf0, 0x8b, 0xeb, 0xbd,
+    0x0d, 0xd8, 0x50, 0x3e, 0x65, 0x25, 0x18, 0xbe, 0xb0, 0x18, 0xfd, 0xbe,
+    0xc4, 0x02, 0x3b, 0xbd, 0xfa, 0x7b, 0x82, 0x3d, 0xce, 0xa0, 0x4d, 0xbe,
+    0x0b, 0xc6, 0x32, 0xbd, 0x84, 0x62, 0xc0, 0x3d, 0x60, 0x6e, 0x1e, 0x3d,
+    0x31, 0x01, 0x28, 0x3e, 0x31, 0x76, 0x04, 0x3e, 0x91, 0x6b, 0x60, 0xbd,
+    0x14, 0xf5, 0x20, 0x3e, 0x5c, 0x38, 0x67, 0xbd, 0x36, 0x21, 0xc2, 0x3d,
+    0x13, 0x10, 0x7e, 0xbe, 0x19, 0xd1, 0x9f, 0x3e, 0xd0, 0x1a, 0x16, 0xbe,
+    0x34, 0xb5, 0xaf, 0xbe, 0x86, 0x67, 0x16, 0x3d, 0x11, 0x05, 0x58, 0xbe,
+    0xfb, 0x0d, 0xd0, 0x3c, 0x90, 0x88, 0x20, 0xbd, 0xdc, 0xcc, 0x9a, 0x3d,
+    0x11, 0x29, 0x7a, 0x3e, 0x05, 0x44, 0xbf, 0xbe, 0x54, 0x1a, 0x0a, 0x3e,
+    0xff, 0x6f, 0xb4, 0xbd, 0xeb, 0xa4, 0x86, 0x3d, 0x5e, 0x43, 0x00, 0x3e,
+    0xfa, 0x4f, 0xd9, 0xbc, 0xad, 0x7b, 0xa2, 0xbd, 0x0e, 0xf6, 0x01, 0x3e,
+    0x90, 0xf0, 0xb4, 0xbd, 0xd0, 0x21, 0x9a, 0xbe, 0x74, 0x43, 0x14, 0x3e,
+    0x4d, 0xe8, 0x0b, 0xbe, 0x80, 0x2b, 0x93, 0xbd, 0x87, 0x39, 0x35, 0x3e,
+    0x90, 0x63, 0xb8, 0x3d, 0xcf, 0x2c, 0x83, 0x3e, 0xbd, 0xe5, 0x0b, 0xbf,
+    0x5f, 0xf7, 0x90, 0x3c, 0xa0, 0x61, 0x52, 0xbc, 0x8f, 0x88, 0xac, 0xbc,
+    0x0c, 0x64, 0xd3, 0x3d, 0x9c, 0xd9, 0x8b, 0x3d, 0x80, 0xb7, 0x39, 0xbe,
+    0x4e, 0x30, 0x95, 0x3e, 0x73, 0x3e, 0xda, 0xbe, 0x51, 0x56, 0x84, 0x3d,
+    0xe6, 0x28, 0x85, 0x3d, 0xb9, 0xe9, 0x87, 0xbe, 0x46, 0x01, 0x6c, 0xbd,
+    0x4b, 0x64, 0xd5, 0x3d, 0x56, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+    0x05, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x63,
+    0x6f, 0x6e, 0x76, 0x32, 0x64, 0x2f, 0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44,
+    0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x68, 0xfc, 0xff, 0xff,
+    0x20, 0x00, 0x00, 0x00, 0xe4, 0xe5, 0x3e, 0xc0, 0x22, 0xe7, 0x92, 0x3f,
+    0x57, 0x04, 0xde, 0xbf, 0xda, 0x8f, 0x1c, 0x3e, 0x47, 0xbf, 0x05, 0xc0,
+    0x53, 0xab, 0xcb, 0xbf, 0x68, 0x6a, 0x6a, 0xbf, 0xd2, 0x0b, 0xe4, 0xbf,
+    0xbe, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
+    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x32,
+    0x64, 0x5f, 0x31, 0x2f, 0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62,
+    0x69, 0x61, 0x73, 0x00, 0xd0, 0xfc, 0xff, 0xff, 0x40, 0x00, 0x00, 0x00,
+    0xa3, 0x15, 0xb7, 0x3e, 0x29, 0x67, 0x95, 0x3f, 0x4b, 0x96, 0x62, 0xbe,
+    0x61, 0x5f, 0xfc, 0x3e, 0xa2, 0xd4, 0x3e, 0xbf, 0x45, 0x1c, 0x0d, 0xbf,
+    0x29, 0xdd, 0x70, 0xbe, 0x9a, 0x75, 0x97, 0xbf, 0xfc, 0x0a, 0x6f, 0xbe,
+    0xcc, 0x56, 0x25, 0x3f, 0xdf, 0xac, 0x98, 0xbf, 0x0e, 0x1c, 0x8b, 0xbf,
+    0xa5, 0xd8, 0x8c, 0x3f, 0xa5, 0x42, 0xd5, 0x3c, 0xa9, 0x8e, 0x7a, 0xbf,
+    0x9e, 0xdb, 0x71, 0xbe, 0x46, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x64,
+    0x65, 0x6e, 0x73, 0x65, 0x5f, 0x31, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75,
+    0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x58, 0xfd, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0xc3, 0x99, 0xb6, 0xbf, 0xe9, 0x87, 0x8b, 0x3f,
+    0xac, 0x83, 0x9a, 0xbf, 0x4c, 0x49, 0x3d, 0xbe, 0x9e, 0xfe, 0xff, 0xff,
+    0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
+    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65,
+    0x5f, 0x31, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65,
+    0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70,
+    0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00,
+    0xc8, 0xfd, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x40, 0x5c, 0x8a, 0xbe,
+    0xc5, 0x88, 0xaa, 0x3e, 0xb0, 0x76, 0x1f, 0xbe, 0x35, 0x3a, 0x58, 0x3e,
+    0x84, 0xa5, 0xa3, 0x3e, 0x38, 0xd9, 0x49, 0x3d, 0x58, 0x63, 0x78, 0xbc,
+    0x82, 0x94, 0x2e, 0xbe, 0x97, 0xde, 0x6e, 0xbe, 0xea, 0x27, 0x9f, 0xbe,
+    0x1d, 0x24, 0xc0, 0x3d, 0x21, 0x31, 0x66, 0x3c, 0x80, 0xf8, 0x88, 0xbe,
+    0xdd, 0x06, 0x19, 0xbe, 0x3f, 0x4b, 0xb3, 0xbe, 0x70, 0xdc, 0x8d, 0x3e,
+    0x20, 0xee, 0x93, 0xbe, 0xde, 0x7c, 0xbf, 0xbe, 0xda, 0x3a, 0x50, 0xbe,
+    0x0e, 0x91, 0x6e, 0x3e, 0x18, 0xbc, 0x81, 0x3e, 0x18, 0x9c, 0xfe, 0xb9,
+    0x11, 0x2d, 0x9b, 0xbe, 0xa2, 0x73, 0x3f, 0xbe, 0x0c, 0x6c, 0xa3, 0xbe,
+    0x37, 0x4b, 0x8c, 0xbe, 0x91, 0x26, 0xa0, 0x3d, 0xb3, 0x04, 0xbd, 0x3e,
+    0x01, 0x3e, 0x70, 0xbe, 0xd1, 0xdb, 0x69, 0xbe, 0xb4, 0xc0, 0x98, 0xbe,
+    0xd4, 0xd9, 0x80, 0x3e, 0x62, 0xa9, 0x74, 0xbe, 0x8a, 0xe9, 0x83, 0xbe,
+    0x7a, 0x92, 0x54, 0xbe, 0x92, 0x5d, 0x43, 0x3e, 0xe3, 0x35, 0x7b, 0x3e,
+    0xee, 0x81, 0x2e, 0x3d, 0xbb, 0x68, 0xec, 0x3d, 0x70, 0x72, 0x1b, 0xbe,
+    0x64, 0x20, 0xa4, 0xbe, 0x4f, 0x1f, 0x8d, 0xbd, 0xee, 0xd6, 0xf8, 0x3d,
+    0xdb, 0x83, 0xb0, 0x3e, 0xd1, 0x99, 0x8c, 0xbe, 0x99, 0x21, 0x45, 0xbe,
+    0x97, 0x04, 0x82, 0xbe, 0x25, 0xdf, 0x88, 0x3e, 0xe2, 0xe6, 0x5b, 0xbe,
+    0xe5, 0x53, 0x68, 0x3d, 0x0b, 0xcd, 0x40, 0xbe, 0x4e, 0xea, 0x55, 0x3e,
+    0x54, 0xd8, 0x85, 0x3e, 0x3c, 0xba, 0x82, 0x3d, 0x58, 0xc0, 0xe9, 0x3e,
+    0x0c, 0xcc, 0x29, 0xbe, 0x3a, 0x6e, 0xa2, 0xbe, 0x84, 0x4a, 0x12, 0x3e,
+    0x32, 0xd9, 0xcb, 0x3d, 0xad, 0x16, 0xd4, 0x3e, 0xf5, 0xa8, 0x85, 0xbe,
+    0x5c, 0x34, 0xc8, 0xbd, 0x31, 0x02, 0xac, 0xbe, 0x3e, 0x15, 0x9e, 0x3e,
+    0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00,
+    0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x64,
+    0x65, 0x6e, 0x73, 0x65, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,
+    0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x20, 0xff, 0xff, 0xff,
+    0x40, 0x00, 0x00, 0x00, 0xa9, 0xa9, 0x93, 0x3f, 0x3c, 0xda, 0x21, 0xbe,
+    0x33, 0xe3, 0x3d, 0x3d, 0xd8, 0xed, 0x33, 0xbf, 0x4c, 0x9d, 0x3e, 0xbf,
+    0xba, 0x40, 0xa6, 0x3f, 0x6e, 0x4e, 0x25, 0xbe, 0x6e, 0x5e, 0x93, 0x3f,
+    0x48, 0xa2, 0xc5, 0xbe, 0x67, 0xaa, 0x33, 0x3e, 0xc0, 0x66, 0x20, 0x3f,
+    0xb3, 0x25, 0x7e, 0xbe, 0x8a, 0x2f, 0xd9, 0xbe, 0xda, 0x64, 0xc8, 0x3f,
+    0x1a, 0x14, 0x89, 0xbd, 0xea, 0x48, 0x89, 0x3e, 0x07, 0x00, 0x00, 0x00,
+    0x00, 0x02, 0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00,
+    0xd0, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x30, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
+    0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x70, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0xb0, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x01, 0xf0, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x05,
+    0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00, 0x00, 0x00, 0x08, 0x00,
+    0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00,
+    0x0c, 0x00, 0x07, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa8, 0xff, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x05, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+    0x18, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x1c, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xce, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x19,
+    0xd6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0xde, 0xff, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x09, 0xe6, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x11,
+    0xfa, 0xff, 0xff, 0xff, 0x00, 0x03, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x11, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04};
+unsigned int model_tflite_len = 19616;
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md b/tensorflow/lite/micro/examples/magic_wand/train/README.md
similarity index 88%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md
rename to tensorflow/lite/micro/examples/magic_wand/train/README.md
index 6bd45375341..f85ca015a9f 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md
+++ b/tensorflow/lite/micro/examples/magic_wand/train/README.md
@@ -14,7 +14,7 @@ project by Jennifer Wang.
 
 ## Training
 
-### Data and pre-trained model
+### Dataset
 
 Three magic gestures were chosen, and data were collected from 7
 different people. Some random long movement sequences were collected and divided
@@ -25,10 +25,6 @@ The dataset can be downloaded from the following URL:
 
 [download.tensorflow.org/models/tflite/magic_wand/data.tar.gz](http://download.tensorflow.org/models/tflite/magic_wand/data.tar.gz)
 
-A pre-trained, quantized model can be downloaded from the following URL:
-
-[download.tensorflow.org/models/tflite/magic_wand/model_quantized.tflite](http://download.tensorflow.org/models/tflite/magic_wand/model_quantized.tflite)
-
 ### Training in Colab
 
 The following [Google Colaboratory](https://colab.research.google.com)
@@ -37,10 +33,10 @@ started:
 
 <table class="tfo-notebook-buttons" align="left">
   <td>
-    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
+    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/magic_wand/train/train_magic_wand_model.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
   </td>
   <td>
-    <a target="_blank" href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
+    <a target="_blank" href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/magic_wand/train/train_magic_wand_model.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
   </td>
 </table>
 
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation.py b/tensorflow/lite/micro/examples/magic_wand/train/data_augmentation.py
similarity index 99%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_augmentation.py
index 45700b9e4a8..8d30fa19404 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation.py
+++ b/tensorflow/lite/micro/examples/magic_wand/train/data_augmentation.py
@@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function
 
 import random
+
 import numpy as np
 
 
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation_test.py b/tensorflow/lite/micro/examples/magic_wand/train/data_augmentation_test.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation_test.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_augmentation_test.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load.py b/tensorflow/lite/micro/examples/magic_wand/train/data_load.py
similarity index 99%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_load.py
index 321b9c7ea0a..ceb24a7712a 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load.py
+++ b/tensorflow/lite/micro/examples/magic_wand/train/data_load.py
@@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function
 
 import json
+
 import numpy as np
 import tensorflow as tf
 
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load_test.py b/tensorflow/lite/micro/examples/magic_wand/train/data_load_test.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load_test.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_load_test.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare.py b/tensorflow/lite/micro/examples/magic_wand/train/data_prepare.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_prepare.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare_test.py b/tensorflow/lite/micro/examples/magic_wand/train/data_prepare_test.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare_test.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_prepare_test.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split.py b/tensorflow/lite/micro/examples/magic_wand/train/data_split.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_split.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person.py b/tensorflow/lite/micro/examples/magic_wand/train/data_split_person.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_split_person.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person_test.py b/tensorflow/lite/micro/examples/magic_wand/train/data_split_person_test.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person_test.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_split_person_test.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_test.py b/tensorflow/lite/micro/examples/magic_wand/train/data_split_test.py
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_test.py
rename to tensorflow/lite/micro/examples/magic_wand/train/data_split_test.py
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/netmodels/CNN/weights.h5 b/tensorflow/lite/micro/examples/magic_wand/train/netmodels/CNN/weights.h5
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/netmodels/CNN/weights.h5
rename to tensorflow/lite/micro/examples/magic_wand/train/netmodels/CNN/weights.h5
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/requirements.txt b/tensorflow/lite/micro/examples/magic_wand/train/requirements.txt
similarity index 100%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/requirements.txt
rename to tensorflow/lite/micro/examples/magic_wand/train/requirements.txt
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train.py b/tensorflow/lite/micro/examples/magic_wand/train/train.py
similarity index 99%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/train.py
rename to tensorflow/lite/micro/examples/magic_wand/train/train.py
index 0f17f33a0dd..6ccaa8ca5c0 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train.py
+++ b/tensorflow/lite/micro/examples/magic_wand/train/train.py
@@ -26,6 +26,7 @@ import argparse
 import datetime
 import os
 from data_load import DataLoader
+
 import numpy as np
 import tensorflow as tf
 
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb b/tensorflow/lite/micro/examples/magic_wand/train/train_magic_wand_model.ipynb
similarity index 88%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb
rename to tensorflow/lite/micro/examples/magic_wand/train/train_magic_wand_model.ipynb
index 1995ef02dc3..65f439f9090 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb
+++ b/tensorflow/lite/micro/examples/magic_wand/train/train_magic_wand_model.ipynb
@@ -32,16 +32,16 @@
         "colab_type": "text"
       },
       "source": [
-        "This notebook demonstrates how to train a 20kb gesture recognition model for [TensorFlow Lite for Microcontrollers](https://tensorflow.org/lite/microcontrollers/overview). It will produce the same model used in the [magic_wand](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/magic_wand) example application.\n",
+        "This notebook demonstrates how to train a 20kb gesture recognition model for [TensorFlow Lite for Microcontrollers](https://tensorflow.org/lite/microcontrollers/overview). It will produce the same model used in the [magic_wand](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/magic_wand) example application.\n",
         "\n",
         "The model is designed to be used with [Google Colaboratory](https://colab.research.google.com).\n",
         "\n",
         "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
         "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/magic_wand/train/train_magic_wand_model.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
         "  </td>\n",
         "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/magic_wand/train/train_magic_wand_model.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
         "  </td>\n",
         "</table>\n"
       ]
@@ -102,7 +102,7 @@
         "# Clone the repository from GitHub\n",
         "!git clone --depth 1 -q https://github.com/tensorflow/tensorflow\n",
         "# Copy the training scripts into our workspace\n",
-        "!cp -r tensorflow/tensorflow/lite/experimental/micro/examples/magic_wand/train train"
+        "!cp -r tensorflow/tensorflow/lite/micro/examples/magic_wand/train train"
       ],
       "execution_count": 0,
       "outputs": []
@@ -224,7 +224,7 @@
       "source": [
         "## Create a C source file\n",
         "\n",
-        "The `train.py` script writes a quantized model, `model_quantized.tflite`, to the training scripts' directory.\n",
+        "The `train.py` script writes a model, `model.tflite`, to the training scripts' directory.\n",
         "\n",
         "In the following cell, we convert this model into a C++ source file we can use with TensorFlow Lite for Microcontrollers."
       ]
@@ -240,9 +240,9 @@
         "# Install xxd if it is not available\n",
         "!apt-get -qq install xxd\n",
         "# Save the file as a C source file\n",
-        "!xxd -i model_quantized.tflite > /content/model_quantized.cc\n",
+        "!xxd -i model.tflite > /content/model.cc\n",
         "# Print the source file\n",
-        "!cat /content/model_quantized.cc"
+        "!cat /content/model.cc"
       ],
       "execution_count": 0,
       "outputs": []
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_test.py b/tensorflow/lite/micro/examples/magic_wand/train/train_test.py
similarity index 99%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/train/train_test.py
rename to tensorflow/lite/micro/examples/magic_wand/train/train_test.py
index 18467abeae3..4790eb281e3 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_test.py
+++ b/tensorflow/lite/micro/examples/magic_wand/train/train_test.py
@@ -21,6 +21,7 @@ from __future__ import division
 from __future__ import print_function
 
 import unittest
+
 import numpy as np
 import tensorflow as tf
 from train import build_cnn
diff --git a/tensorflow/lite/micro/examples/micro_speech/CMSIS/create_constants.py b/tensorflow/lite/micro/examples/micro_speech/CMSIS/create_constants.py
index 6d0b4e2b2fe..7d14dc60f96 100755
--- a/tensorflow/lite/micro/examples/micro_speech/CMSIS/create_constants.py
+++ b/tensorflow/lite/micro/examples/micro_speech/CMSIS/create_constants.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 # import soundfile as sf
+
 import numpy as np
 
 
diff --git a/tensorflow/lite/micro/examples/micro_speech/apollo3/captured_data_to_wav.py b/tensorflow/lite/micro/examples/micro_speech/apollo3/captured_data_to_wav.py
index 52604f5f2fe..c9ba8fdb347 100644
--- a/tensorflow/lite/micro/examples/micro_speech/apollo3/captured_data_to_wav.py
+++ b/tensorflow/lite/micro/examples/micro_speech/apollo3/captured_data_to_wav.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import struct
 # import matplotlib.pyplot as plt
+
 import numpy as np
 import soundfile as sf
 
diff --git a/tensorflow/lite/micro/examples/micro_speech/apollo3/compare_1k.py b/tensorflow/lite/micro/examples/micro_speech/apollo3/compare_1k.py
index fab178b3176..b0a0cd52447 100644
--- a/tensorflow/lite/micro/examples/micro_speech/apollo3/compare_1k.py
+++ b/tensorflow/lite/micro/examples/micro_speech/apollo3/compare_1k.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import struct
 import matplotlib.pyplot as plt
+
 import numpy as np
 
 # import soundfile as sf
diff --git a/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap.py b/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap.py
index 6658c60e69d..4ebb8490409 100644
--- a/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap.py
+++ b/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap.py
@@ -36,6 +36,7 @@ import argparse
 import os
 import os.path
 import re
+
 import numpy as np
 
 _DICT_RESOLUTIONS = {
diff --git a/tensorflow/lite/micro/kernels/add.cc b/tensorflow/lite/micro/kernels/add.cc
index 10d44161cc4..e100cb7ca47 100644
--- a/tensorflow/lite/micro/kernels/add.cc
+++ b/tensorflow/lite/micro/kernels/add.cc
@@ -95,15 +95,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
     QuantizeMultiplierSmallerThanOneExp(
         real_output_multiplier, &data->output_multiplier, &data->output_shift);
 
-    if (output->type == kTfLiteUInt8) {
-      CalculateActivationRangeUint8(params->activation, output,
-                                    &data->output_activation_min,
-                                    &data->output_activation_max);
-    } else {
-      CalculateActivationRangeInt8(params->activation, output,
-                                   &data->output_activation_min,
-                                   &data->output_activation_max);
-    }
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
   }
 
   return kTfLiteOk;
diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc
new file mode 100644
index 00000000000..f97c567b7de
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc
@@ -0,0 +1,213 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+
+#include "arm_nnfunctions.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace add {
+
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+struct OpData {
+  bool requires_broadcast;
+
+  // These fields are used in both the general 8-bit -> 8bit quantized path,
+  // and the special 16-bit -> 16bit quantized path
+  int input1_shift;
+  int input2_shift;
+  int32 output_activation_min;
+  int32 output_activation_max;
+
+  // These fields are used only in the general 8-bit -> 8bit quantized path
+  int32 input1_multiplier;
+  int32 input2_multiplier;
+  int32 output_multiplier;
+  int output_shift;
+  int left_shift;
+  int32 input1_offset;
+  int32 input2_offset;
+  int32 output_offset;
+};
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
+                             const TfLiteTensor* input1,
+                             const TfLiteTensor* input2, TfLiteTensor* output,
+                             OpData* data) {
+  data->requires_broadcast = !HaveSameShapes(input1, input2);
+
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input1_offset = -input1->params.zero_point;
+    data->input2_offset = -input2->params.zero_point;
+    data->output_offset = output->params.zero_point;
+    data->left_shift = 20;
+    const double twice_max_input_scale =
+        2 * std::max(input1->params.scale, input2->params.scale);
+    const double real_input1_multiplier =
+        input1->params.scale / twice_max_input_scale;
+    const double real_input2_multiplier =
+        input2->params.scale / twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << data->left_shift) * output->params.scale);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+    if (output->type == kTfLiteUInt8) {
+      CalculateActivationRangeUint8(params->activation, output,
+                                    &data->output_activation_min,
+                                    &data->output_activation_max);
+    } else {
+      CalculateActivationRangeInt8(params->activation, output,
+                                   &data->output_activation_min,
+                                   &data->output_activation_max);
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
+             const OpData* data, const TfLiteTensor* input1,
+             const TfLiteTensor* input2, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRange(params->activation, &output_activation_min,
+                           &output_activation_max);
+  tflite::ArithmeticParams op_params;
+  SetActivationParams(output_activation_min, output_activation_max, &op_params);
+#define TF_LITE_ADD(opname)                                                   \
+  reference_ops::opname(op_params, GetTensorShape(input1),                    \
+                        GetTensorData<float>(input1), GetTensorShape(input2), \
+                        GetTensorData<float>(input2), GetTensorShape(output), \
+                        GetTensorData<float>(output))
+  if (data->requires_broadcast) {
+    TF_LITE_ADD(BroadcastAdd4DSlow);
+  } else {
+    TF_LITE_ADD(Add);
+  }
+#undef TF_LITE_ADD
+}
+
+TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
+                              TfLiteAddParams* params, const OpData* data,
+                              const TfLiteTensor* input1,
+                              const TfLiteTensor* input2,
+                              TfLiteTensor* output) {
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+    tflite::ArithmeticParams op_params;
+    op_params.left_shift = data->left_shift;
+    op_params.input1_offset = data->input1_offset;
+    op_params.input1_multiplier = data->input1_multiplier;
+    op_params.input1_shift = data->input1_shift;
+    op_params.input2_offset = data->input2_offset;
+    op_params.input2_multiplier = data->input2_multiplier;
+    op_params.input2_shift = data->input2_shift;
+    op_params.output_offset = data->output_offset;
+    op_params.output_multiplier = data->output_multiplier;
+    op_params.output_shift = data->output_shift;
+    SetActivationParams(data->output_activation_min,
+                        data->output_activation_max, &op_params);
+    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+        GetTensorShape(input1), GetTensorShape(input2), &op_params);
+#define TF_LITE_ADD(type, opname, dtype)                             \
+  type::opname(op_params, GetTensorShape(input1),                    \
+               GetTensorData<dtype>(input1), GetTensorShape(input2), \
+               GetTensorData<dtype>(input2), GetTensorShape(output), \
+               GetTensorData<dtype>(output));
+    if (output->type == kTfLiteInt8) {
+      if (need_broadcast) {
+        TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
+      } else {
+        arm_elementwise_add_s8(
+            GetTensorData<int8_t>(input1), GetTensorData<int8_t>(input2),
+            op_params.input1_offset, op_params.input1_multiplier,
+            op_params.input1_shift, op_params.input2_offset,
+            op_params.input2_multiplier, op_params.input2_shift,
+            op_params.left_shift, GetTensorData<int8_t>(output),
+            op_params.output_offset, op_params.output_multiplier,
+            op_params.output_shift, op_params.quantized_activation_min,
+            op_params.quantized_activation_max,
+            MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2),
+                                 GetTensorShape(output)));
+      }
+    } else {
+      if (need_broadcast) {
+        TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
+      } else {
+        TF_LITE_ADD(reference_ops, Add, uint8_t);
+      }
+    }
+#undef TF_LITE_ADD
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
+
+  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  OpData data;
+  TF_LITE_ENSURE_STATUS(
+      CalculateOpData(context, params, input1, input2, output, &data));
+
+  if (output->type == kTfLiteFloat32) {
+    EvalAdd(context, node, params, &data, input1, input2, output);
+  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+    TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
+                                                input1, input2, output));
+  } else {
+    context->ReportError(context,
+                         "Inputs and outputs not all float|uint8|int8 types.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace add
+
+TfLiteRegistration* Register_ADD() {
+  static TfLiteRegistration r = {nullptr /* Init */, nullptr /* Free */,
+                                 nullptr /* Prepare */, add::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc b/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc
new file mode 100644
index 00000000000..7b6bb136687
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc
@@ -0,0 +1,180 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+
+#include "arm_nnfunctions.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace mul {
+
+constexpr int kInput1Tensor = 0;
+constexpr int kInput2Tensor = 1;
+constexpr int kOutputTensor = 0;
+
+struct OpData {
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+
+  int32_t output_multiplier;
+  int output_shift;
+};
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
+                             TfLiteMulParams* params, OpData* data) {
+  const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
+  const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
+
+  if (output->type == kTfLiteUInt8) {
+    CalculateActivationRangeUint8(params->activation, output,
+                                  &data->output_activation_min,
+                                  &data->output_activation_max);
+  } else if (output->type == kTfLiteInt8) {
+    CalculateActivationRangeInt8(params->activation, output,
+                                 &data->output_activation_min,
+                                 &data->output_activation_max);
+  }
+
+  double real_multiplier =
+      input1->params.scale * input2->params.scale / output->params.scale;
+  QuantizeMultiplier(real_multiplier, &data->output_multiplier,
+                     &data->output_shift);
+
+  return kTfLiteOk;
+}
+
+void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                   TfLiteMulParams* params, OpData* data,
+                   const TfLiteTensor* input1, const TfLiteTensor* input2,
+                   TfLiteTensor* output) {
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
+    tflite::ArithmeticParams op_params;
+    SetActivationParams(data->output_activation_min,
+                        data->output_activation_max, &op_params);
+    op_params.input1_offset = -input1->params.zero_point;
+    op_params.input2_offset = -input2->params.zero_point;
+    op_params.output_offset = output->params.zero_point;
+    op_params.output_multiplier = data->output_multiplier;
+    op_params.output_shift = data->output_shift;
+    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+        GetTensorShape(input1), GetTensorShape(input2), &op_params);
+
+#define TF_LITE_MUL(type, opname, dtype)                             \
+  type::opname(op_params, GetTensorShape(input1),                    \
+               GetTensorData<dtype>(input1), GetTensorShape(input2), \
+               GetTensorData<dtype>(input2), GetTensorShape(output), \
+               GetTensorData<dtype>(output));
+
+    if (output->type == kTfLiteInt8) {
+      if (need_broadcast) {
+        TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
+      } else {
+        arm_elementwise_mul_s8(
+            GetTensorData<int8_t>(input1), GetTensorData<int8_t>(input2),
+            op_params.input1_offset, op_params.input2_offset,
+            GetTensorData<int8_t>(output), op_params.output_offset,
+            op_params.output_multiplier, op_params.output_shift,
+            op_params.quantized_activation_min,
+            op_params.quantized_activation_max,
+            MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2),
+                                 GetTensorShape(output)));
+      }
+    } else if (output->type == kTfLiteUInt8) {
+      if (need_broadcast) {
+        TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
+      } else {
+        TF_LITE_MUL(reference_ops, Mul, uint8_t);
+      }
+    }
+#undef TF_LITE_MUL
+  }
+}
+
+void EvalFloat(TfLiteContext* context, TfLiteNode* node,
+               TfLiteMulParams* params, OpData* data,
+               const TfLiteTensor* input1, const TfLiteTensor* input2,
+               TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRange(params->activation, &output_activation_min,
+                           &output_activation_max);
+  tflite::ArithmeticParams op_params;
+  SetActivationParams(output_activation_min, output_activation_max, &op_params);
+
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      GetTensorShape(input1), GetTensorShape(input2), &op_params);
+#define TF_LITE_MUL(opname)                                                   \
+  reference_ops::opname(op_params, GetTensorShape(input1),                    \
+                        GetTensorData<float>(input1), GetTensorShape(input2), \
+                        GetTensorData<float>(input2), GetTensorShape(output), \
+                        GetTensorData<float>(output));
+
+  if (need_broadcast) {
+    TF_LITE_MUL(BroadcastMul4DSlow);
+  } else {
+    TF_LITE_MUL(Mul);
+  }
+#undef TF_LITE_MUL
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
+  OpData data;
+
+  const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
+  const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  CalculateOpData(context, node, params, &data);
+
+  switch (input1->type) {
+    case kTfLiteUInt8:
+    case kTfLiteInt8:
+      EvalQuantized(context, node, params, &data, input1, input2, output);
+      break;
+    case kTfLiteFloat32:
+      EvalFloat(context, node, params, &data, input1, input2, output);
+      break;
+    default:
+      context->ReportError(context, "Type %s (%d) not supported.",
+                           TfLiteTypeGetName(input1->type), input1->type);
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+}  // namespace mul
+
+TfLiteRegistration* Register_MUL() {
+  static TfLiteRegistration r = {nullptr /* Init */, nullptr /* Free */,
+                                 nullptr /* Prepare */, mul::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc
index 901dc65c3d0..adee3b84aa4 100644
--- a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc
@@ -81,8 +81,8 @@ void AverageEvalUint8(const TfLiteContext* context, const TfLiteNode* node,
                       const TfLitePoolParams* params, const OpData* data,
                       const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min, activation_max;
-  CalculateActivationRangeUint8(params->activation, output, &activation_min,
-                                &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 
   PoolParams op_params;
   op_params.stride_height = params->stride_height;
@@ -102,8 +102,8 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node,
                              const TfLitePoolParams* params, const OpData* data,
                              TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min, activation_max;
-  CalculateActivationRangeInt8(params->activation, output, &activation_min,
-                               &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 
   TFLITE_DCHECK_LE(activation_min, activation_max);
 
@@ -184,8 +184,8 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
                            TfLitePoolParams* params, OpData* data,
                            const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min, activation_max;
-  CalculateActivationRangeUint8(params->activation, output, &activation_min,
-                                &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 
   tflite::PoolParams op_params;
   op_params.stride_height = params->stride_height;
diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc
index 04c33de85af..932f4a26d8f 100644
--- a/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -116,8 +116,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
   op_params.padding_values.height = data->padding.height;
   op_params.stride_width = params->stride_width;
   op_params.stride_height = params->stride_height;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
   op_params.depth_multiplier = params->depth_multiplier;
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
@@ -174,8 +174,8 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
   op_params.padding_values.height = data->padding.height;
   op_params.stride_width = params->stride_width;
   op_params.stride_height = params->stride_height;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
   op_params.depth_multiplier = params->depth_multiplier;
   op_params.quantized_activation_min = data->output_activation_min;
   op_params.quantized_activation_max = data->output_activation_max;
diff --git a/tensorflow/lite/micro/kernels/mul.cc b/tensorflow/lite/micro/kernels/mul.cc
index 2e6464208bd..2dae837a28f 100644
--- a/tensorflow/lite/micro/kernels/mul.cc
+++ b/tensorflow/lite/micro/kernels/mul.cc
@@ -50,15 +50,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
 
   TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
 
-  if (output->type == kTfLiteUInt8) {
-    CalculateActivationRangeUint8(params->activation, output,
-                                  &data->output_activation_min,
-                                  &data->output_activation_max);
-  } else if (output->type == kTfLiteInt8) {
-    CalculateActivationRangeInt8(params->activation, output,
-                                 &data->output_activation_min,
-                                 &data->output_activation_max);
-  }
+  TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+      context, params->activation, output, &data->output_activation_min,
+      &data->output_activation_max));
 
   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
     double real_multiplier =
diff --git a/tensorflow/lite/micro/kernels/pooling.cc b/tensorflow/lite/micro/kernels/pooling.cc
index 20ab5d96ca2..434bfa36412 100644
--- a/tensorflow/lite/micro/kernels/pooling.cc
+++ b/tensorflow/lite/micro/kernels/pooling.cc
@@ -74,12 +74,12 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
       GetTensorShape(output), GetTensorData<float>(output));
 }
 
-void AverageEvalUint8(const TfLiteContext* context, const TfLiteNode* node,
+void AverageEvalUint8(TfLiteContext* context, const TfLiteNode* node,
                       const TfLitePoolParams* params, const OpData* data,
                       const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min, activation_max;
-  CalculateActivationRangeUint8(params->activation, output, &activation_min,
-                                &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 
   PoolParams op_params;
   op_params.stride_height = params->stride_height;
@@ -95,12 +95,12 @@ void AverageEvalUint8(const TfLiteContext* context, const TfLiteNode* node,
       GetTensorShape(output), GetTensorData<uint8_t>(output));
 }
 
-void AverageEvalInt8(const TfLiteContext* context, const TfLiteNode* node,
+void AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node,
                      const TfLitePoolParams* params, const OpData* data,
                      const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min, activation_max;
-  CalculateActivationRangeInt8(params->activation, output, &activation_min,
-                               &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 
   PoolParams op_params;
   op_params.stride_height = params->stride_height;
@@ -141,8 +141,8 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
                            TfLitePoolParams* params, OpData* data,
                            const TfLiteTensor* input, TfLiteTensor* output) {
   int32_t activation_min, activation_max;
-  CalculateActivationRangeUint8(params->activation, output, &activation_min,
-                                &activation_max);
+  (void)CalculateActivationRangeQuantized(context, params->activation, output,
+                                          &activation_min, &activation_max);
 
   tflite::PoolParams op_params;
   op_params.stride_height = params->stride_height;
diff --git a/tensorflow/lite/micro/kernels/svdf.cc b/tensorflow/lite/micro/kernels/svdf.cc
index dfecd44f524..1fb334aae79 100644
--- a/tensorflow/lite/micro/kernels/svdf.cc
+++ b/tensorflow/lite/micro/kernels/svdf.cc
@@ -72,7 +72,7 @@ static inline void ApplyTimeWeightsBiasAndActivation(
 
   // Initialize output with bias if provided.
   if (bias) {
-    // TODO(kreeger): doc me - VectorBatchVectorAssign
+    // VectorBatchVectorAssign
     const float* bias_data = GetTensorData<float>(bias);
     float* output_data = GetTensorData<float>(output);
     for (int i = 0; i < batch_size; ++i) {
@@ -95,10 +95,9 @@ static inline void ApplyTimeWeightsBiasAndActivation(
     float* scratch_ptr_batch = GetTensorData<float>(scratch) + b * num_filters;
 
     // Reduction sum vector
-    const float* input_vector_ptr = scratch_ptr_batch;
     for (int i = 0; i < num_units; ++i) {
       for (int j = 0; j < rank; j++) {
-        output_ptr_batch[i] += *input_vector_ptr++;
+        output_ptr_batch[i] += *scratch_ptr_batch++;
       }
     }
   }
@@ -274,6 +273,150 @@ inline void EvalHybridSVDF(
       params->activation, activation_state, scratch, output);
 }
 
+void EvalIntegerSVDF(
+    TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input_tensor,
+    const TfLiteTensor* weights_feature_tensor,
+    const TfLiteTensor* weights_time_tensor, const TfLiteTensor* bias_tensor,
+    const TfLiteSVDFParams* params, TfLiteTensor* activation_state_tensor,
+    TfLiteTensor* output_tensor, TfLiteTensor* scratch_tensor,
+    TfLiteTensor* scratch_output_tensor, int32_t scale_1_a, int scale_1_b,
+    int32_t scale_2_a, int scale_2_b, int32_t input_zp, int32_t output_zp) {
+  const int n_rank = params->rank;
+  const int n_batch = input_tensor->dims->data[0];
+  const int n_input = input_tensor->dims->data[1];
+  const int n_filter = weights_feature_tensor->dims->data[0];
+  const int n_unit = n_filter / n_rank;
+  const int n_memory = weights_time_tensor->dims->data[1];
+
+  // Rewrite last bit of state.
+  {
+    for (int b = 0; b < n_batch; ++b) {
+      int16_t* state_ptr_batch =
+          GetTensorData<int16_t>(activation_state_tensor) +
+          b * n_memory * n_filter;
+      for (int c = 0; c < n_filter; ++c) {
+        int16_t* state_ptr = state_ptr_batch + c * n_memory;
+        state_ptr[n_memory - 1] = 0;
+      }
+    }
+  }
+
+  // Feature matmul.
+  {
+    int16_t* state = GetTensorData<int16_t>(activation_state_tensor);
+    const int8_t* input = GetTensorData<int8_t>(input_tensor);
+    const int8_t* weight_feature =
+        GetTensorData<int8_t>(weights_feature_tensor);
+    const int32_t output_max = std::numeric_limits<int16_t>::max();
+    const int32_t output_min = std::numeric_limits<int16_t>::min();
+    int16_t* result_in_batch = state + (n_memory - 1);
+    for (int b = 0; b < n_batch; b++) {
+      const int8_t* matrix_ptr = weight_feature;
+      for (int r = 0; r < n_filter; r++) {
+        int32_t dot_prod = 0;
+        const int8_t* vector_in_batch = input + b * n_input;
+        for (int c = 0; c < n_input; c++) {
+          dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+        }
+        dot_prod =
+            MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+        dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+        *result_in_batch = dot_prod;
+        result_in_batch += n_memory;
+      }
+    }
+  }
+
+  // Time.
+  {
+    for (int b = 0; b < n_batch; ++b) {
+      int32_t* scratch_ptr_batch =
+          GetTensorData<int32_t>(scratch_tensor) + b * n_filter;
+
+      // Perform batched vector dot product:
+      const int16_t* vector1_ptr = GetTensorData<int16_t>(weights_time_tensor);
+      const int16_t* vector2_ptr =
+          GetTensorData<int16_t>(activation_state_tensor) +
+          b * n_memory * n_filter;
+
+      for (int i = 0; i < n_filter; i++) {
+        *scratch_ptr_batch = 0;
+        for (int j = 0; j < n_memory; j++) {
+          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+        }
+        scratch_ptr_batch++;
+      }
+    }
+  }
+
+  // Reduce, add bias, rescale, activation.
+  {
+    int32_t* output_temp = GetTensorData<int32_t>(scratch_output_tensor);
+    // Add bias.
+    if (bias_tensor) {
+      // Vector batch assign:
+      const int32_t* bias_data = GetTensorData<int32_t>(bias_tensor);
+      for (int i = 0; i < n_batch; ++i) {
+        int32_t* output_ptr = output_temp + i * n_unit;
+        const int32_t* bias_ptr = bias_data;
+        for (int j = 0; j < n_unit; ++j) {
+          *output_ptr++ = *bias_ptr++;
+        }
+      }
+    } else {
+      int32_t* output_ptr = output_temp;
+      for (int i = 0; i < n_batch * n_unit; ++i) {
+        *output_ptr++ = 0;
+      }
+    }
+
+    // Reduce.
+    for (int b = 0; b < n_batch; ++b) {
+      int32_t* output_temp_ptr = output_temp + b * n_unit;
+      int32_t* scratch_ptr_batch =
+          GetTensorData<int32_t>(scratch_tensor) + b * n_filter;
+
+      // Reduction sum vector
+      for (int i = 0; i < n_unit; ++i) {
+        for (int j = 0; j < n_rank; ++j) {
+          output_temp_ptr[i] += *scratch_ptr_batch++;
+        }
+      }
+    }
+
+    // Rescale.
+    const int32_t output_max = std::numeric_limits<int8_t>::max();
+    const int32_t output_min = std::numeric_limits<int8_t>::min();
+    for (int i = 0; i < n_batch * n_unit; ++i) {
+      int32_t x1 = output_temp[i];
+      int32_t x2 = MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+      int32_t x3 = x2 + output_zp;
+      int32_t x4 = std::min(std::max(output_min, x3), output_max);
+      GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
+    }
+  }
+
+  // Shift state.
+  {
+    for (int b = 0; b < n_batch; ++b) {
+      int16_t* state_ptr_batch =
+          GetTensorData<int16_t>(activation_state_tensor) +
+          b * n_memory * n_filter;
+      for (int f = 0; f < n_filter; ++f) {
+        // Shift the vector left:
+        int16_t* batch_ptr = state_ptr_batch;
+        int16_t* batch_start = state_ptr_batch + 1;
+        int16_t* batch_end = state_ptr_batch + n_memory;
+        while (batch_start != batch_end) {
+          *batch_ptr++ = *batch_start++;
+        }
+        state_ptr_batch[n_memory - 1] = 0;
+        state_ptr_batch += n_memory;
+      }
+    }
+  }
+}
+
 }  // namespace
 
 // Input tensors.
@@ -303,10 +446,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // [3] = Bias (optional), {1, num_units}
   // [4] = Activation State (variable),
   //         {2, batch_size, memory_size * num_filters}
-  // TODO(kreeger): Use input tensor as variable until scratch tensor allocation
-  // has been implemented (cl/263032056)
-  // TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
-  TF_LITE_ENSURE_EQ(context, node->inputs->size, 6);
+
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* weights_feature =
       GetInput(context, node, kWeightsFeatureTensor);
@@ -325,10 +465,23 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int num_units = num_filters / rank;
   const int memory_size = weights_time->dims->data[1];
 
+  // The weights are of consistent type, so it suffices to check one.
+  const bool is_hybrid_op = IsHybridOp(input, weights_feature);
+  const bool is_full_integer = input->type == kTfLiteInt8;
+
   // Validate Input Tensor:
-  TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
+  TF_LITE_ENSURE(context,
+                 input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
   TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
 
+  // Validate Tensor Output:
+  // [0] = float/int8, {2, batch_size, num_units}
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
+  TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
+  TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
+
   // Validate Weights Feature Input Tensor:
   TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
   TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
@@ -341,11 +494,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Validate Optional Bias Input Tensor:
   if (bias) {
     TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
-    TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
   }
 
   // Validate Activation State Input Tensor:
-  TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
   TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
   TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
   TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
@@ -354,26 +505,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Validate shared Scratch Tensor (same for full float and hybrid):
   // [0] = Holds dot-product of time-forward calculations in
   //       ApplyTimeWeightsBiasAndActivation():
-  //         float, {2, batch_size, num_filters}
+  //         float/int32, {2, batch_size, num_filters}
   // TODO(kreeger): Use input tensor as variable until scratch tensor allocation
-  // has been implemented (cl/263032056)
+  // has been implemented (b/132070898)
   // TfLiteTensor* scratch_tensor = GetTemporary(context, node, 0);
   TfLiteTensor* scratch_tensor = &context->tensors[node->inputs->data[5]];
 
-  TF_LITE_ENSURE_EQ(context, scratch_tensor->type, kTfLiteFloat32);
   TF_LITE_ENSURE_EQ(context, NumDimensions(scratch_tensor), 2);
   TF_LITE_ENSURE_EQ(context, scratch_tensor->dims->data[0], batch_size);
   TF_LITE_ENSURE_EQ(context, scratch_tensor->dims->data[1], num_filters);
 
-  // The weights are of consistent type, so it suffices to check one.
-  const bool is_hybrid_op = IsHybridOp(input, weights_feature);
-  // TODO(kreeger): Handle full quant svdf b/139435798
   if (is_hybrid_op) {
+    TF_LITE_ENSURE_EQ(context, node->inputs->size, 6);
+
     // Validate Input Tensor dtypes:
     TF_LITE_ENSURE(context, weights_feature->type == kTfLiteUInt8 ||
                                 weights_feature->type == kTfLiteInt8);
     TF_LITE_ENSURE(context, weights_time->type == kTfLiteUInt8 ||
                                 weights_time->type == kTfLiteInt8);
+    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
+
+    if (bias) {
+      TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
+    }
 
     // Validate Scratch Tensors:
     // [0] = (shared - see above for usage)
@@ -385,6 +539,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TfLiteTensor* scratch_scaling_factors = GetTemporary(context, node, 2);
     TfLiteTensor* scratch_float_weights_time = GetTemporary(context, node, 3);
 
+    // Validate shared scratch tensor type:
+    TF_LITE_ENSURE_EQ(context, scratch_tensor->type, kTfLiteFloat32);
+
     // Validate Input Quantized Scratch Tensor:
     TF_LITE_ENSURE(context, scratch_input_quantized->type == kTfLiteUInt8 ||
                                 scratch_input_quantized->type == kTfLiteInt8);
@@ -412,37 +569,75 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     // called. Use this time to do a one-time de-quantization copy of
     // the input values from the Weights Time tensor to the float weights time
     // scratch tensor.
-    // TODO(kreeger): Consider doing this at model conversion time?
+    // TODO(b/146029510): Consider doing this at model conversion time.
     SymmetricDequantize(GetTensorData<int8_t>(weights_time),
                         NumElements(scratch_float_weights_time),
                         weights_time->params.scale,
                         GetTensorData<float>(scratch_float_weights_time));
+
+    TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
+  } else if (is_full_integer) {
+    // TODO(b/132070898): Use input tensor as variable until scratch tensor
+    // allocation has been implemented
+    TF_LITE_ENSURE_EQ(context, node->inputs->size, 8);
+
+    TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
+    TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
+
+    if (bias) {
+      TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
+    }
+
+    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
+
+    // Validate Scratch Tensors:
+    // [0] = (shared - see above for usage)
+    // [1] = Output Temp, int8_t, {2, num_units, batch_size}
+    // TODO(b/132070898): Use input tensor as variable until scratch tensor
+    // allocation has been implemented.
+    /* TF_LITE_ENSURE_EQ(context, node->temporaries->size, 2); */
+
+    // Validate shared scratch tensor type:
+    TF_LITE_ENSURE_EQ(context, scratch_tensor->type, kTfLiteInt32);
+
+    // Validate Output Temp Scratch Tensor:
+    TfLiteTensor* scratch_output = &context->tensors[node->inputs->data[6]];
+    TF_LITE_ENSURE_EQ(context, scratch_output->type, kTfLiteInt32);
+    TF_LITE_ENSURE_EQ(context, NumDimensions(scratch_output), 2);
+    TF_LITE_ENSURE_EQ(context, scratch_output->dims->data[0], num_units);
+    TF_LITE_ENSURE_EQ(context, scratch_output->dims->data[1], batch_size);
+
+    // Validate output tensor:
+    TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
   } else {
+    TF_LITE_ENSURE_EQ(context, node->inputs->size, 6);
+
     // Validate Input Tensor dtypes:
     TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
     TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
+    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
+
+    if (bias) {
+      TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
+    }
 
     // Full-float SVDF only uses the one shared scratch tensor (see above for
     // usage).
-    // TODO(kreeger): Use input tensor as variable until scratch tensor
-    // allocation has been implemented (cl/263032056)
+    // TODO(b/132070898): Use input tensor as variable until scratch tensor
+    // allocation has been implemented.
     // TF_LITE_ENSURE_EQ(context, node->temporaries->size, 1);
-  }
 
-  // Validate Tensor Output:
-  // [0] = float, {2, batch_size, num_units}
-  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
-  TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
-  TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
-  TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
+    // Validate shared scratch tensor type:
+    TF_LITE_ENSURE_EQ(context, scratch_tensor->type, kTfLiteFloat32);
+
+    TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
+  }
 
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
+  auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* weights_feature =
@@ -451,15 +646,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       GetInput(context, node, kWeightsTimeTensor);
   const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
 
-  // TODO(kreeger): Use input tensor as variable until scratch tensor allocation
-  // has been implemented (cl/263032056)
-  // TfLiteTensor* scratch = GetTemporary(context, node, /*index=*/0);
+  // TODO(b/132070898): Use input tensor as variable until scratch tensor
+  // allocation has been implemented. TfLiteTensor* scratch =
+  // GetTemporary(context, node, /*index=*/0);
   TfLiteTensor* scratch = &context->tensors[node->inputs->data[5]];
 
   TfLiteTensor* activation_state =
       &context->tensors[node->inputs->data[kInputActivationStateTensor]];
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
+  const bool is_full_integer = input->type == kTfLiteInt8;
+
   switch (weights_feature->type) {
     case kTfLiteFloat32: {
       EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
@@ -470,19 +667,46 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
     case kTfLiteUInt8:
     case kTfLiteInt8: {
-      TfLiteTensor* scratch_input_quantized = GetTemporary(context, node, 1);
-      TfLiteTensor* scratch_scaling_factors = GetTemporary(context, node, 2);
-      TfLiteTensor* scratch_float_weights_time = GetTemporary(context, node, 3);
-      EvalHybridSVDF(context, node, input, weights_feature,
-                     scratch_float_weights_time, bias, params, scratch,
-                     scratch_scaling_factors, scratch_input_quantized,
-                     activation_state, output);
-      return kTfLiteOk;
+      if (is_full_integer) {
+        // TODO(b/146029510): In order to prevent expensive scale calculations
+        // during each eval of this Op, pre-calculated values are being stored
+        // in a Tensor in the flatbuffer. Inside this Tensor, the 4 scale values
+        // are stored in a int32 buffer.
+        const TfLiteTensor* effective_scale_data_tensor =
+            GetInput(context, node, 7);
+        const int32_t* effective_scale_data =
+            GetTensorData<int32_t>(effective_scale_data_tensor);
+
+        // TODO(b/132070898): Use input tensor as variable until scratch tensor
+        // allocation has been implemented TfLiteTensor*
+        // output_temp = GetTemporary(context, node, /*index=*/2);
+        TfLiteTensor* output_temp = &context->tensors[node->inputs->data[6]];
+
+        // Currently supports only ReLU.
+        TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
+        EvalIntegerSVDF(context, node, input, weights_feature, weights_time,
+                        bias, params, activation_state, output, scratch,
+                        output_temp, effective_scale_data[0],
+                        effective_scale_data[1], effective_scale_data[2],
+                        effective_scale_data[3], input->params.zero_point,
+                        output->params.zero_point);
+        return kTfLiteOk;
+      } else {
+        // Hybrid quantized:
+        TfLiteTensor* scratch_input_quantized = GetTemporary(context, node, 1);
+        TfLiteTensor* scratch_scaling_factors = GetTemporary(context, node, 2);
+        TfLiteTensor* scratch_float_weights_time =
+            GetTemporary(context, node, 3);
+        EvalHybridSVDF(context, node, input, weights_feature,
+                       scratch_float_weights_time, bias, params, scratch,
+                       scratch_scaling_factors, scratch_input_quantized,
+                       activation_state, output);
+        return kTfLiteOk;
+      }
       break;
     }
 
     default:
-      // TODO(kreeger): Handle this case for full quant svdf b/139435798
       context->ReportError(context, "Type %s not currently supported.",
                            TfLiteTypeGetName(weights_feature->type));
       return kTfLiteError;
diff --git a/tensorflow/lite/micro/kernels/svdf_test.cc b/tensorflow/lite/micro/kernels/svdf_test.cc
index 69288e15c96..03ce6d07469 100644
--- a/tensorflow/lite/micro/kernels/svdf_test.cc
+++ b/tensorflow/lite/micro/kernels/svdf_test.cc
@@ -146,7 +146,7 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units,
 
   // Bias is an optional tensor:
   // TODO(kreeger): Use input tensor as variable until scratch tensor allocation
-  // has been implemented (cl/263032056)
+  // has been implemented (b/132070898)
   // int inputs_array_data[] = {5, 0, 1, 2, kTfLiteOptionalTensor, 3};
   int inputs_array_data[] = {6, 0, 1, 2, kTfLiteOptionalTensor, 3, 5};
   TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
@@ -166,7 +166,6 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units,
   node.outputs = outputs_array;
   if (is_hybrid_op) {
     node.temporaries = hybrid_temporaries_array;
-
   } else {
     node.temporaries = temporaries_array;
   }
@@ -203,6 +202,81 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units,
   }
 }
 
+void ValidateIntegerSVDFGoldens(const int batch_size, const int num_units,
+                                const int input_size, const int rank,
+                                TfLiteTensor* tensors, const int tensor_count,
+                                int8_t* golden_input_data,
+                                const int golden_input_data_size,
+                                int8_t* output_data, int8_t* expected_output) {
+  TfLiteContext context;
+  PopulateContext(tensors, tensor_count, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SVDF, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSVDFParams params;
+  params.rank = rank;
+  params.activation = kTfLiteActRelu;
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+
+  // TODO(b/132070898): Use input tensor as variable until scratch tensor
+  // allocation has been implemented. int inputs_array_data[] = {5, 0, 1, 2, 3,
+  // 4};
+  int inputs_array_data[] = {8, 0, 1, 2, 3, 4, 6, 7, 8};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+
+  int outputs_array_data[] = {1, 5};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+
+  int temporaries_array_data[] = {2, 7, 8};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&params);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  int input_sequence_size =
+      golden_input_data_size / sizeof(int8_t) / (input_size * batch_size);
+  for (int i = 0; i < input_sequence_size; ++i) {
+    int8_t* input_batch_start = golden_input_data + i * input_size * batch_size;
+    int8_t* input_batch_end = input_batch_start + input_size * batch_size;
+    int8_t* tensor_data = tensors[0].data.int8;
+    while (input_batch_start != input_batch_end) {
+      *tensor_data++ = *input_batch_start++;
+    }
+
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+
+    int output_idx = 0;
+    int golden_idx = i * batch_size * num_units;
+    for (int j = golden_idx; j < golden_idx + batch_size * num_units; ++j) {
+      TF_LITE_MICRO_EXPECT_NEAR(expected_output[j], output_data[output_idx], 0);
+      output_idx++;
+    }
+  }
+
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+}
+
 void TestSVDF(const int batch_size, const int num_units, const int input_size,
               const int memory_size, const int rank, float* input_data,
               float* weights_feature_data, float* weights_time_data,
@@ -383,6 +457,88 @@ inline void TestHybridSVDFUint8(
                       tolerance);
 }
 
+inline void TestIntegerSVDF(
+    const int batch_size, const int num_units, const int input_size,
+    const int memory_size, const int rank, int8_t* input_data,
+    float input_scale, int8_t* weights_feature_data,
+    float weights_feature_scale, int16_t* weights_time_data,
+    float weights_time_scale, int32_t* bias_data, float bias_scale,
+    int16_t* activation_state_data, float activation_scale,
+    int32_t* scratch_data, int32_t* scratch_output_data, int8_t* output_data,
+    float output_scale, int32_t effective_scale_1_a,
+    int32_t effective_scale_1_b, int32_t effective_scale_2_a,
+    int32_t effective_scale_2_b, int8_t* golden_input_data,
+    int golden_input_data_size, int8_t* expected_output) {
+  const int num_filters = num_units * rank;
+
+  const int input_dims_arg[] = {2, batch_size, input_size};
+  TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_arg);
+
+  const int weights_feature_dims_args[] = {2, num_filters, input_size};
+  TfLiteIntArray* weights_feature_dims =
+      IntArrayFromInts(weights_feature_dims_args);
+
+  const int weights_time_dims_args[] = {2, num_filters, memory_size};
+  TfLiteIntArray* weights_time_dims = IntArrayFromInts(weights_time_dims_args);
+
+  const int bias_dims_data[] = {1, num_units};
+  TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
+
+  const int activation_state_dims_args[] = {2, batch_size,
+                                            memory_size * num_filters};
+  TfLiteIntArray* activation_state_dims =
+      IntArrayFromInts(activation_state_dims_args);
+
+  // Scratch output is the same shape as output:
+  const int scratch_dims_args[] = {2, batch_size, num_filters};
+  TfLiteIntArray* scratch_dims = IntArrayFromInts(scratch_dims_args);
+
+  // Full integer requires one more scratch tensor:
+  const int scratch_output_dims_args[] = {2, num_units, batch_size};
+  TfLiteIntArray* scratch_output_dims =
+      IntArrayFromInts(scratch_output_dims_args);
+
+  const int output_dims_args[] = {2, batch_size, num_units};
+  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_args);
+
+  // Tensor size is higher due to workarounds in micro buffer usage
+  // (b/132070898) and re-working scale calculations (b/146029510).
+  const int tensor_count = 9;  // 5 inputs, 1 output, 2 scratch, 1 temp
+
+  const int effective_scale_dims_args[] = {1, 4};
+  int32_t effective_scale_data[] = {effective_scale_1_a, effective_scale_1_b,
+                                    effective_scale_2_a, effective_scale_2_b};
+  TfLiteIntArray* effective_scale_dims =
+      IntArrayFromInts(effective_scale_dims_args);
+
+  TfLiteTensor tensors[] = {
+      CreateQuantizedTensor(input_data, input_dims, input_scale,
+                            0 /* zero-point */, "input"),
+      CreateQuantizedTensor(weights_feature_data, weights_feature_dims,
+                            weights_feature_scale, 0 /* zero-point */,
+                            "weights_feature"),
+      CreateQuantizedTensor(weights_time_data, weights_time_dims,
+                            weights_time_scale, 0 /* zero-point */,
+                            "weights_time"),
+      CreateQuantized32Tensor(bias_data, bias_dims, "bias", bias_scale),
+      CreateQuantizedTensor(activation_state_data, activation_state_dims,
+                            activation_scale, 0 /* zero-point */,
+                            "activation_state", true /* is_variable */),
+      CreateQuantizedTensor(output_data, output_dims, output_scale,
+                            0 /* zero-point */, "output"),
+      CreateQuantized32Tensor(scratch_data, scratch_dims, "scratch",
+                              1.f /* scale-placeholder */),
+      CreateQuantized32Tensor(scratch_output_data, scratch_output_dims,
+                              "scratch_output", 1.f /* scale-placeholder */),
+      CreateTensor(effective_scale_data, effective_scale_dims,
+                   "effective_scale"),
+  };
+
+  ValidateIntegerSVDFGoldens(
+      batch_size, num_units, input_size, rank, tensors, tensor_count,
+      golden_input_data, golden_input_data_size, output_data, expected_output);
+}  // namespace
+
 }  // namespace
 }  // namespace testing
 }  // namespace tflite
@@ -754,4 +910,83 @@ TF_LITE_MICRO_TEST(BlackBoxTestHybridRank2Uint8) {
       tflite::testing::svdf_golden_output_rank_2, 0.00625109 /* tolerance */);
 }
 
+TF_LITE_MICRO_TEST(BlackBoxTestIntegerRank1) {
+  constexpr int batch_size = 2;
+  constexpr int num_units = 4;
+  constexpr int input_size = 3;
+  constexpr int memory_size = 10;
+  constexpr int rank = 1;
+  constexpr int num_filters = num_units * rank;
+
+  int8_t weights_feature_data[] = {-81, -92, 2,   96,  57,  32,
+                                   71,  70,  100, -92, -17, -27};
+  const int weights_feature_dims_count = num_filters * input_size;
+
+  int16_t weights_time_data[] = {
+      -10464, 12324, 9142,  -11842, -11836, 7273,  9029,  -2175, 260,   4067,
+      12795,  -3488, -3202, 5011,   12987,  -887,  12875, 5171,  7185,  10174,
+      -12098, 12461, -7072, 8870,   7739,   11447, 5954,  11765, -5733, 10643,
+      -3534,  8912,  4693,  -7761,  -8886,  -519,  -4898, 5067,  3205,  -1107,
+  };
+  const int weights_time_dims_count = num_filters * memory_size;
+
+  int32_t bias_data[] = {-409707, 641518, 1662434, -113372};
+
+  int8_t input_sequences_data[] = {
+      64, 25,   34,   23,  68, -99, 16, -59,  -114, 46,  47, 94,
+      18, -128, -96,  -73, 16, 96,  64, 25,   34,   23,  68, -99,
+      16, -59,  -114, 46,  47, 94,  18, -128, -96,  -73, 16, 96,
+      64, 25,   34,   23,  68, -99, 16, -59,  -114, 46,  47, 94,
+      18, -128, -96,  -73, 16, 96,  64, 25,   34,   23,  68, -99,
+      16, -59,  -114, 46,  47, 94,  18, -128, -96,  -73, 16, 96,
+  };
+
+  int8_t expected_output[] = {
+      -9,  24,  31,   1,   -10, 10,  -3,  0,   2,   4,   -44, -7,  -10,  32,
+      52,  1,   12,   -17, 9,   -8,  7,   16,  -11, -8,  -26, 29,  28,   16,
+      -23, 26,  30,   -6,  -8,  -25, -86, -5,  -44, 59,  81,  15,  62,   -16,
+      -37, 3,   27,   14,  34,  -10, 1,   24,  -25, 23,  31,  61,  67,   11,
+      -64, -65, -128, -25, -53, 59,  127, 20,  20,  -29, -20, -15, -28,  0,
+      8,   -27, 54,   61,  -67, 38,  38,  64,  115, 0,   -44, -75, -128, -20,
+      -19, 93,  101,  35,  -5,  -56, 30,  -18, -40, -9,  -8,  -31,
+  };
+
+  const int input_size_dims_count = batch_size * input_size;
+  int8_t input_data[input_size_dims_count];
+
+  const int activation_state_dims_count =
+      batch_size * memory_size * num_filters;
+  int16_t activation_state_data[activation_state_dims_count];
+
+  const int scratch_dims_count = batch_size * num_filters;
+  int32_t scratch_data[scratch_dims_count];
+
+  const int scratch_output_dims_count = batch_size * num_units;
+  int32_t scratch_output_data[scratch_output_dims_count];
+
+  const int output_dims_count = batch_size * num_units;
+  int8_t output_data[output_dims_count];
+
+  float input_scale = 1.f / INT8_MAX;            // Range  is [-1, 1]
+  float weights_feature_scale = 0.5 / INT8_MAX;  // Range is [-0.5, 0.5]
+  float weights_time_scale = 1 / INT16_MAX;      // Range is [-1, 1]
+  float activation_scale = 16.f / INT16_MAX;     // Range is [-16, 16]
+  float bias_scale = 512 / INT32_MAX;            // Range is [-512, 512]
+  float output_scale = 0.5f / INT8_MAX;          // Range is [-0.5, 0.5]
+
+  int32_t effective_scale_1_a = 1082163456;
+  int32_t effective_scale_1_b = -3;
+  int32_t effective_scale_2_a = 2139160192;
+  int32_t effective_scale_2_b = -18;
+
+  tflite::testing::TestIntegerSVDF(
+      batch_size, num_units, input_size, memory_size, rank, input_data,
+      input_scale, weights_feature_data, weights_feature_scale,
+      weights_time_data, weights_time_scale, bias_data, bias_scale,
+      activation_state_data, activation_scale, scratch_data,
+      scratch_output_data, output_data, output_scale, effective_scale_1_a,
+      effective_scale_1_b, effective_scale_2_a, effective_scale_2_b,
+      input_sequences_data, sizeof(input_sequences_data), expected_output);
+}
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc
index 150c2c9eb4e..6f6f0491b85 100644
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@@ -258,7 +258,8 @@ TfLiteStatus MicroAllocator::FinishTensorAllocation() {
     for (size_t n = 0; n < op->inputs()->size(); ++n) {
       const int tensor_index = op->inputs()->Get(n);
       TensorInfo* current = &tensor_info[tensor_index];
-      if ((current->last_used == -1) || (current->last_used > i)) {
+      if (!current->flatbuffer_tensor->is_variable() &&
+          ((current->last_used == -1) || (current->last_used > i))) {
         current->last_used = i;
       }
     }
@@ -418,16 +419,14 @@ TfLiteStatus MicroAllocator::InitializeRuntimeTensor(
   size_t type_size;
   TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
       flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
-  // Copy the shape of the tensor from the serialized data into the runtime
-  // form. We have to allocate memory for this.
-  result->dims =
-      reinterpret_cast<TfLiteIntArray*>(memory_allocator_.AllocateFromTail(
-          TfLiteIntArrayGetSizeInBytes(flatbuffer_tensor.shape()->Length()),
-          alignof(TfLiteIntArray)));
-  result->dims->size = flatbuffer_tensor.shape()->Length();
-  for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
-    result->dims->data[n] = flatbuffer_tensor.shape()->Get(n);
-  }
+
+  // TFLM doesn't allow reshaping the tensor which requires dynamic memory
+  // allocation so it is safe to drop the const qualifier. In the future, if we
+  // really want to update the tensor shape, we can always pass in a new
+  // TfLiteIntArray - especially we have to do so if the dimension is changed.
+  result->dims = const_cast<TfLiteIntArray*>(
+      reinterpret_cast<const TfLiteIntArray*>(flatbuffer_tensor.shape()));
+
   // Copy the quantization information from the serialized data.
   const auto* src_quantization = flatbuffer_tensor.quantization();
   if (src_quantization && src_quantization->scale() &&
diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc
index 695aff651e0..7e5d72fef29 100644
--- a/tensorflow/lite/micro/micro_allocator_test.cc
+++ b/tensorflow/lite/micro/micro_allocator_test.cc
@@ -20,10 +20,50 @@ limitations under the License.
 #include "tensorflow/lite/micro/test_helpers.h"
 #include "tensorflow/lite/micro/testing/micro_test.h"
 
+namespace tflite {
+namespace testing {
+namespace {
+
+constexpr int kExpectedAlignment = 4;
+
+void VerifyMockTensor(TfLiteTensor* tensor, bool is_variable = false) {
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, tensor->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(is_variable, tensor->is_variable);
+  TF_LITE_MICRO_EXPECT_EQ(4, tensor->bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw);
+  TF_LITE_MICRO_EXPECT_EQ(0,
+                          (reinterpret_cast<std::uintptr_t>(tensor->data.raw) %
+                           kExpectedAlignment));
+}
+
+void VerifyMockWeightTensor(TfLiteTensor* tensor) {
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, tensor->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw);
+}
+
+void EnsureUniqueVariableTensorBuffer(TfLiteContext* context,
+                                      const int variable_tensor_idx) {
+  for (int i = 0; i < context->tensors_size; i++) {
+    if (i != variable_tensor_idx) {
+      TF_LITE_MICRO_EXPECT_NE(context->tensors[variable_tensor_idx].data.raw,
+                              context->tensors[i].data.raw);
+    }
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite
+
 TF_LITE_MICRO_TESTS_BEGIN
 
 TF_LITE_MICRO_TEST(TestInitializeRuntimeTensor) {
-  const tflite::Model* model = tflite::testing::GetMockModel();
+  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
   TfLiteContext context;
   constexpr size_t arena_size = 1024;
   uint8_t arena[arena_size];
@@ -46,7 +86,7 @@ TF_LITE_MICRO_TEST(TestInitializeRuntimeTensor) {
 }
 
 TF_LITE_MICRO_TEST(TestMissingQuantization) {
-  const tflite::Model* model = tflite::testing::GetMockModel();
+  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
   TfLiteContext context;
   constexpr size_t arena_size = 1024;
   uint8_t arena[arena_size];
@@ -70,7 +110,7 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) {
 }
 
 TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
-  const tflite::Model* model = tflite::testing::GetMockModel();
+  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
   TfLiteContext context;
   constexpr size_t arena_size = 1024;
   uint8_t arena[arena_size];
@@ -82,31 +122,10 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
   // No allocation to be done afterwards.
   TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator.FinishTensorAllocation());
 
-  constexpr int kExpectedAlignment = 4;
-
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, context.tensors[0].type);
-  TF_LITE_MICRO_EXPECT_EQ(1, context.tensors[0].dims->size);
-  TF_LITE_MICRO_EXPECT_EQ(1, context.tensors[0].dims->data[0]);
-  TF_LITE_MICRO_EXPECT_EQ(4, context.tensors[0].bytes);
-  TF_LITE_MICRO_EXPECT_NE(nullptr, context.tensors[0].data.raw);
-  TF_LITE_MICRO_EXPECT_EQ(
-      0, (reinterpret_cast<std::uintptr_t>(context.tensors[0].data.raw) %
-          kExpectedAlignment));
-
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, context.tensors[1].type);
-  TF_LITE_MICRO_EXPECT_EQ(1, context.tensors[1].dims->size);
-  TF_LITE_MICRO_EXPECT_EQ(1, context.tensors[1].dims->data[0]);
-  TF_LITE_MICRO_EXPECT_EQ(1, context.tensors[1].bytes);
-  TF_LITE_MICRO_EXPECT_NE(nullptr, context.tensors[1].data.raw);
-
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, context.tensors[2].type);
-  TF_LITE_MICRO_EXPECT_EQ(1, context.tensors[2].dims->size);
-  TF_LITE_MICRO_EXPECT_EQ(1, context.tensors[2].dims->data[0]);
-  TF_LITE_MICRO_EXPECT_EQ(4, context.tensors[2].bytes);
-  TF_LITE_MICRO_EXPECT_NE(nullptr, context.tensors[2].data.raw);
-  TF_LITE_MICRO_EXPECT_EQ(
-      0, (reinterpret_cast<std::uintptr_t>(context.tensors[2].data.raw) %
-          kExpectedAlignment));
+  // NOTE: Tensor indexes match the values in GetSimpleMockModel().
+  tflite::testing::VerifyMockTensor(&context.tensors[0]);
+  tflite::testing::VerifyMockWeightTensor(&context.tensors[1]);
+  tflite::testing::VerifyMockTensor(&context.tensors[2]);
 
   TF_LITE_MICRO_EXPECT_NE(context.tensors[1].data.raw,
                           context.tensors[0].data.raw);
@@ -116,4 +135,38 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
                           context.tensors[2].data.raw);
 }
 
+TF_LITE_MICRO_TEST(TestFinishComplexTensorAllocation) {
+  const tflite::Model* model = tflite::testing::GetComplexMockModel();
+  TfLiteContext context;
+  constexpr size_t arena_size = 2048;
+  uint8_t arena[arena_size];
+  tflite::MicroAllocator allocator(&context, model, arena, arena_size,
+                                   micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_EQ(10, context.tensors_size);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation());
+  // No allocation to be done afterwards.
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator.FinishTensorAllocation());
+
+  // NOTE: Tensor indexes match the values in GetComplexMockModel().
+  tflite::testing::VerifyMockTensor(&context.tensors[0]);
+  tflite::testing::VerifyMockTensor(&context.tensors[1],
+                                    true /* is_variable */);
+  tflite::testing::VerifyMockWeightTensor(&context.tensors[2]);
+  tflite::testing::VerifyMockTensor(&context.tensors[3]);
+  tflite::testing::VerifyMockTensor(&context.tensors[4],
+                                    true /* is_variable */);
+  tflite::testing::VerifyMockWeightTensor(&context.tensors[5]);
+  tflite::testing::VerifyMockTensor(&context.tensors[6]);
+  tflite::testing::VerifyMockTensor(&context.tensors[7],
+                                    true /* is_variable */);
+  tflite::testing::VerifyMockWeightTensor(&context.tensors[8]);
+  tflite::testing::VerifyMockTensor(&context.tensors[9]);
+
+  // Ensure that variable tensors have unique address
+  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 1);
+  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 4);
+  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 7);
+}
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc
index 2ee985077cc..338074685e5 100644
--- a/tensorflow/lite/micro/micro_interpreter_test.cc
+++ b/tensorflow/lite/micro/micro_interpreter_test.cc
@@ -71,7 +71,7 @@ class MockOpResolver : public OpResolver {
 TF_LITE_MICRO_TESTS_BEGIN
 
 TF_LITE_MICRO_TEST(TestInterpreter) {
-  const tflite::Model* model = tflite::testing::GetMockModel();
+  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
   TF_LITE_MICRO_EXPECT_NE(nullptr, model);
   tflite::MockOpResolver mock_resolver;
   constexpr size_t allocator_buffer_size = 1024;
diff --git a/tensorflow/lite/micro/micro_utils.cc b/tensorflow/lite/micro/micro_utils.cc
index 5882eac8ce1..fbd4a5e1c8e 100644
--- a/tensorflow/lite/micro/micro_utils.cc
+++ b/tensorflow/lite/micro/micro_utils.cc
@@ -27,13 +27,19 @@ namespace tflite {
 namespace {
 
 static const uint8_t kAsymmetricUInt8Min = 0;
-static const uint8_t kAsymmetricUInt8Max = 255;
+static const uint8_t kAsymmetricUInt8Max = UINT8_MAX;
 static const uint8_t kSymmetricUInt8Min = 1;
-static const uint8_t kSymmetricUInt8Max = 255;
-static const int8_t kAsymmetricInt8Min = -128;
-static const int8_t kAsymmetricInt8Max = 127;
+static const uint8_t kSymmetricUInt8Max = UINT8_MAX;
+static const int8_t kAsymmetricInt8Min = INT8_MIN;
+static const int8_t kAsymmetricInt8Max = INT8_MAX;
 static const int kSymmetricInt8Scale = kAsymmetricInt8Max;
 
+static const int16_t kAsymmetricInt16Max = INT16_MAX;
+static const int kSymmetricInt16Scale = kAsymmetricInt16Max;
+
+static const int32_t kAsymmetricInt32Max = INT32_MAX;
+static const int kSymmetricInt32Scale = kAsymmetricInt32Max;
+
 }  // namespace
 
 int ElementCount(const TfLiteIntArray& dims) {
@@ -187,6 +193,47 @@ void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
   }
 }
 
+void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
+                             int16_t* quantized_values, float* scaling_factor) {
+  int input_size = ElementCount(*dims);
+
+  float min = 0;
+  float max = 0;
+  for (int i = 0; i < input_size; i++) {
+    min = fminf(min, values[i]);
+    max = fmaxf(max, values[i]);
+  }
+  *scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt16Scale;
+  for (int i = 0; i < input_size; i++) {
+    const int32_t quantized_value =
+        static_cast<int32_t>(roundf(values[i] / *scaling_factor));
+    // Clamp: just in case some odd numeric offset.
+    quantized_values[i] = fminf(kSymmetricInt16Scale,
+                                fmaxf(-kSymmetricInt16Scale, quantized_value));
+  }
+}
+
+void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
+                             int32_t* quantized_values, float* scaling_factor) {
+  int input_size = ElementCount(*dims);
+
+  float min = 0;
+  float max = 0;
+  for (int i = 0; i < input_size; i++) {
+    min = fminf(min, values[i]);
+    max = fmaxf(max, values[i]);
+  }
+
+  *scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt32Scale;
+  for (int i = 0; i < input_size; i++) {
+    const int32_t quantized_value =
+        static_cast<int32_t>(roundf(values[i] / *scaling_factor));
+    // Clamp: just in case some odd numeric offset.
+    quantized_values[i] = fminf(kSymmetricInt32Scale,
+                                fmaxf(-kSymmetricInt32Scale, quantized_value));
+  }
+}
+
 void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
                        uint8_t* quantized_values, float* scaling_factor) {
   SignedSymmetricQuantize(values, dims,
diff --git a/tensorflow/lite/micro/micro_utils.h b/tensorflow/lite/micro/micro_utils.h
index 90670a2653a..42b33dc810e 100644
--- a/tensorflow/lite/micro/micro_utils.h
+++ b/tensorflow/lite/micro/micro_utils.h
@@ -74,6 +74,12 @@ void SignedSymmetricPerChannelQuantize(const float* values,
 void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
                              int8_t* quantized_values, float* scaling_factor);
 
+void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
+                             int16_t* quantized_values, float* scaling_factor);
+
+void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
+                             int32_t* quantized_values, float* scaling_factor);
+
 void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
                        uint8_t* quantized_values, float* scaling_factor);
 
diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc
index 587571ed727..3c062293d61 100644
--- a/tensorflow/lite/micro/test_helpers.cc
+++ b/tensorflow/lite/micro/test_helpers.cc
@@ -63,7 +63,7 @@ flatbuffers::FlatBufferBuilder* BuilderInstance() {
   return inst;
 }
 
-const Model* BuildMockModel() {
+const Model* BuildSimpleMockModel() {
   using flatbuffers::Offset;
   flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
 
@@ -126,24 +126,184 @@ const Model* BuildMockModel() {
   return model;
 }
 
+const Model* BuildComplexMockModel() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffer_data_size = 1;
+  const uint8_t buffer_data_1[buffer_data_size] = {21};
+  const uint8_t buffer_data_2[buffer_data_size] = {21};
+  const uint8_t buffer_data_3[buffer_data_size] = {21};
+  constexpr size_t buffers_size = 7;
+  const Offset<Buffer> buffers[buffers_size] = {
+      // Op 1 buffers:
+      CreateBuffer(*builder),
+      CreateBuffer(*builder),
+      CreateBuffer(*builder,
+                   builder->CreateVector(buffer_data_1, buffer_data_size)),
+      // Op 2 buffers:
+      CreateBuffer(*builder),
+      CreateBuffer(*builder,
+                   builder->CreateVector(buffer_data_2, buffer_data_size)),
+      // Op 3 buffers:
+      CreateBuffer(*builder),
+      CreateBuffer(*builder,
+                   builder->CreateVector(buffer_data_3, buffer_data_size)),
+  };
+  constexpr size_t tensor_shape_size = 1;
+  const int32_t tensor_shape[tensor_shape_size] = {1};
+
+  constexpr size_t tensors_size = 10;
+  const Offset<Tensor> tensors[tensors_size] = {
+      // Op 1 inputs:
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT32, 0, builder->CreateString("test_input_tensor_1"), 0,
+          false /* is_variable */),
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT32, 1, builder->CreateString("test_variable_tensor_1"),
+          0, true /* is_variable */),
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_1"), 0,
+          false /* is_variable */),
+      // Op 1 output / Op 2 input:
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT32, 0, builder->CreateString("test_output_tensor_1"), 0,
+          false /* is_variable */),
+      // Op 2 inputs:
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT32, 1, builder->CreateString("test_variable_tensor_2"),
+          0, true /* is_variable */),
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_2"), 0,
+          false /* is_variable */),
+      // Op 2 output / Op 3 input:
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT32, 0, builder->CreateString("test_output_tensor_2"), 0,
+          false /* is_variable */),
+      // Op 3 inputs:
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT32, 1, builder->CreateString("test_variable_tensor_3"),
+          0, true /* is_variable */),
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_3"), 0,
+          false /* is_variable */),
+      // Op 3 output:
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT32, 0, builder->CreateString("test_output_tensor_3"), 0,
+          false /* is_variable */),
+  };
+
+  constexpr size_t operators_size = 3;
+  Offset<Operator> operators[operators_size];
+  {
+    // Set Op 1 attributes:
+    constexpr size_t operator_inputs_size = 3;
+    const int32_t operator_inputs[operator_inputs_size] = {0, 1, 2};
+    constexpr size_t operator_outputs_size = 1;
+    const int32_t operator_outputs[operator_outputs_size] = {3};
+
+    operators[0] = {CreateOperator(
+        *builder, 0,
+        builder->CreateVector(operator_inputs, operator_inputs_size),
+        builder->CreateVector(operator_outputs, operator_outputs_size),
+        BuiltinOptions_NONE)};
+  }
+
+  {
+    // Set Op 2 attributes
+    constexpr size_t operator_inputs_size = 3;
+    const int32_t operator_inputs[operator_inputs_size] = {3, 4, 5};
+    constexpr size_t operator_outputs_size = 1;
+    const int32_t operator_outputs[operator_outputs_size] = {6};
+
+    operators[1] = {CreateOperator(
+        *builder, 0,
+        builder->CreateVector(operator_inputs, operator_inputs_size),
+        builder->CreateVector(operator_outputs, operator_outputs_size),
+        BuiltinOptions_NONE)};
+  }
+
+  {
+    // Set Op 3 attributes
+    constexpr size_t operator_inputs_size = 3;
+    const int32_t operator_inputs[operator_inputs_size] = {6, 7, 8};
+    constexpr size_t operator_outputs_size = 1;
+    const int32_t operator_outputs[operator_outputs_size] = {9};
+
+    operators[2] = {CreateOperator(
+        *builder, 0,
+        builder->CreateVector(operator_inputs, operator_inputs_size),
+        builder->CreateVector(operator_outputs, operator_outputs_size),
+        BuiltinOptions_NONE)};
+  }
+
+  constexpr size_t inputs_size = 1;
+  const int32_t inputs[inputs_size] = {0};
+  constexpr size_t outputs_size = 1;
+  const int32_t outputs[outputs_size] = {9};
+
+  constexpr size_t subgraphs_size = 1;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size),
+                     builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(outputs, outputs_size),
+                     builder->CreateVector(operators, operators_size),
+                     builder->CreateString("test_subgraph"))};
+
+  constexpr size_t operator_codes_size = 1;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, BuiltinOperator_CUSTOM, "mock_custom",
+                               0)};
+
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
 }  // namespace
 
-const Model* GetMockModel() {
+const Model* GetSimpleMockModel() {
   static Model* model = nullptr;
   if (!model) {
-    model = const_cast<Model*>(BuildMockModel());
+    model = const_cast<Model*>(BuildSimpleMockModel());
   }
   return model;
 }
 
-const Tensor* Create1dFlatbufferTensor(int size) {
+const Model* GetComplexMockModel() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildComplexMockModel());
+  }
+  return model;
+}
+
+const Tensor* Create1dFlatbufferTensor(int size, bool is_variable) {
   using flatbuffers::Offset;
   flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
   constexpr size_t tensor_shape_size = 1;
   const int32_t tensor_shape[tensor_shape_size] = {size};
   const Offset<Tensor> tensor_offset = CreateTensor(
       *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
-      TensorType_INT32, 0, builder->CreateString("test_tensor"), 0, false);
+      TensorType_INT32, 0, builder->CreateString("test_tensor"), 0,
+      is_variable);
   builder->Finish(tensor_offset);
   void* tensor_pointer = builder->GetBufferPointer();
   const Tensor* tensor = flatbuffers::GetRoot<Tensor>(tensor_pointer);
@@ -307,6 +467,18 @@ TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
   return result;
 }
 
+TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
+                                   float scale, int zero_point,
+                                   const char* name, bool is_variable) {
+  TfLiteTensor result = CreateTensor(dims, name, is_variable);
+  result.type = kTfLiteInt16;
+  result.data.i16 = const_cast<int16_t*>(data);
+  result.params = {scale, zero_point};
+  result.quantization = {kTfLiteAffineQuantization, nullptr};
+  result.bytes = ElementCount(*dims) * sizeof(int16_t);
+  return result;
+}
+
 TfLiteTensor CreateQuantizedTensor(const float* input, int8_t* quantized,
                                    TfLiteIntArray* dims, float scale,
                                    int zero_point, const char* name,
diff --git a/tensorflow/lite/micro/test_helpers.h b/tensorflow/lite/micro/test_helpers.h
index f41f5151bc7..a3f57c2f712 100644
--- a/tensorflow/lite/micro/test_helpers.h
+++ b/tensorflow/lite/micro/test_helpers.h
@@ -25,11 +25,16 @@ limitations under the License.
 namespace tflite {
 namespace testing {
 
-// Returns an example flatbuffer TensorFlow Lite model.
-const Model* GetMockModel();
+// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
+// 1 layer of weights, 1 output Tensor, and 1 operator.
+const Model* GetSimpleMockModel();
+
+// Returns a flatbuffer TensorFlow Lite model with more inputs, variable
+// tensors, and operators.
+const Model* GetComplexMockModel();
 
 // Builds a one-dimensional flatbuffer tensor of the given size.
-const Tensor* Create1dFlatbufferTensor(int size);
+const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false);
 
 // Creates a one-dimensional tensor with no quantization metadata.
 const Tensor* CreateMissingQuantizationFlatbufferTensor(int size);
@@ -79,6 +84,10 @@ TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
                                    float scale, int zero_point,
                                    const char* name, bool is_variable = false);
 
+TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
+                                   float scale, int zero_point,
+                                   const char* name, bool is_variable = false);
+
 TfLiteTensor CreateQuantizedTensor(const float* input, int8_t* quantized,
                                    TfLiteIntArray* dims, float scale,
                                    int zero_point, const char* name,
diff --git a/tensorflow/lite/micro/testing/test_utils.h b/tensorflow/lite/micro/testing/test_utils.h
index 6b75f6b9e00..47535d579af 100644
--- a/tensorflow/lite/micro/testing/test_utils.h
+++ b/tensorflow/lite/micro/testing/test_utils.h
@@ -215,6 +215,24 @@ inline TfLiteTensor CreateQuantizedTensor(float* data, int8_t* quantized_data,
   return result;
 }
 
+inline TfLiteTensor CreateQuantizedTensor(float* data, int16_t* quantized_data,
+                                          TfLiteIntArray* dims,
+                                          const char* name,
+                                          bool is_variable = false) {
+  TfLiteTensor result;
+  SignedSymmetricQuantize(data, dims, quantized_data, &result.params.scale);
+  result.data.i16 = quantized_data;
+  result.type = kTfLiteInt16;
+  result.dims = dims;
+  result.params.zero_point = 0;
+  result.allocation_type = kTfLiteMemNone;
+  result.bytes = ElementCount(*dims) * sizeof(int16_t);
+  result.allocation = nullptr;
+  result.name = name;
+  result.is_variable = is_variable;
+  return result;
+}
+
 inline TfLiteTensor CreateQuantized32Tensor(const int32_t* data,
                                             TfLiteIntArray* dims,
                                             const char* name, float scale,
diff --git a/tensorflow/lite/micro/testing/test_xtensa_xpg_binary.sh b/tensorflow/lite/micro/testing/test_xtensa_xpg_binary.sh
index dc4d50c0a49..9eeeed352e5 100755
--- a/tensorflow/lite/micro/testing/test_xtensa_xpg_binary.sh
+++ b/tensorflow/lite/micro/testing/test_xtensa_xpg_binary.sh
@@ -26,8 +26,7 @@ declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
 declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
 mkdir -p ${MICRO_LOG_PATH}
 
-# TODO(kreeger): Enable the ability to pass a different core:
-xt-run --xtensa-core=hifi3_bd5 $1 2>&1 | tee ${MICRO_LOG_FILENAME}
+xt-run --xtensa-core=${XTENSA_CORE} $1 2>&1 | tee ${MICRO_LOG_FILENAME}
 
 if grep -q "$2" ${MICRO_LOG_FILENAME}
 then
diff --git a/tensorflow/lite/micro/tools/ci_build/test_arc.sh b/tensorflow/lite/micro/tools/ci_build/test_arc.sh
new file mode 100644
index 00000000000..24bbb3e4c63
--- /dev/null
+++ b/tensorflow/lite/micro/tools/ci_build/test_arc.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Tests the microcontroller code using arc platform.
+# These tests require a metaware compiler.
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR=${SCRIPT_DIR}/../../../../..
+cd ${ROOT_DIR}
+
+source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean
+
+TARGET_ARCH=arc
+
+# TODO(b/143715361): downloading first to allow for parallel builds.
+readable_run make -f tensorflow/lite/micro/tools/make/Makefile TARGET_ARCH=${TARGET_ARCH} third_party_downloads
+readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TARGET_ARCH=${TARGET_ARCH} generate_hello_world_test_make_project
+readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TARGET_ARCH=${TARGET_ARCH} generate_person_detection_test_make_project
+readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TARGET_ARCH=${TARGET_ARCH} hello_world_test
diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh
index 8a82cc06a99..1cf150cab99 100755
--- a/tensorflow/lite/micro/tools/make/download_and_extract.sh
+++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh
@@ -68,18 +68,27 @@ patch_kissfft() {
   echo "Finished patching kissfft"
 }
 
+build_embarc_mli() {
+  gmake -j 4 -C ${1}/lib/make TCF_FILE=${2}
+}
+
 # Main function handling the download, verify, extract, and patch process.
 download_and_extract() {
-  local usage="Usage: download_and_extract URL MD5 DIR [ACTION]"
+  local usage="Usage: download_and_extract URL MD5 DIR [ACTION] [ACTION_PARAM]"
   local url="${1:?${usage}}"
   local expected_md5="${2:?${usage}}"
   local dir="${3:?${usage}}"
   local action=${4}
+  local action_param1=${5}  # optional action parameter
   local tempdir=$(mktemp -d)
   local tempdir2=$(mktemp -d)
   local tempfile=${tempdir}/temp_file
   local curl_retries=3
 
+  command -v curl >/dev/null 2>&1 || {
+    echo >&2 "The required 'curl' tool isn't installed. Try 'apt-get install curl'."; exit 1;
+  }
+  
   echo "downloading ${url}" >&2
   mkdir -p "${dir}"
   # We've been seeing occasional 56 errors from valid URLs, so set up a retry
@@ -136,10 +145,12 @@ download_and_extract() {
     patch_am_sdk ${dir}
   elif [[ ${action} == "patch_kissfft" ]]; then
     patch_kissfft ${dir}
+  elif [[ ${action} == "build_embarc_mli" ]]; then
+    build_embarc_mli ${dir} ${action_param1}
   elif [[ ${action} ]]; then
     echo "Unknown action '${action}'"
     exit 1
   fi
 }
 
-download_and_extract "$1" "$2" "$3" "$4"
+download_and_extract "$1" "$2" "$3" "$4" "$5"
diff --git a/tensorflow/lite/micro/tools/make/fix_arduino_subfolders.py b/tensorflow/lite/micro/tools/make/fix_arduino_subfolders.py
index 246504968a9..a68267ca5f9 100755
--- a/tensorflow/lite/micro/tools/make/fix_arduino_subfolders.py
+++ b/tensorflow/lite/micro/tools/make/fix_arduino_subfolders.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 import argparse
 import glob
 import os
+
 import six
 
 
diff --git a/tensorflow/lite/micro/tools/make/generate_keil_project.py b/tensorflow/lite/micro/tools/make/generate_keil_project.py
index 5af4b4e87a2..5a9950cfd96 100644
--- a/tensorflow/lite/micro/tools/make/generate_keil_project.py
+++ b/tensorflow/lite/micro/tools/make/generate_keil_project.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 import argparse
 import os.path
 import re
+
 import six
 
 
diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc
index cad543efe34..5a162675f85 100644
--- a/tensorflow/lite/micro/tools/make/helper_functions.inc
+++ b/tensorflow/lite/micro/tools/make/helper_functions.inc
@@ -99,6 +99,40 @@ list_$(3)_$(1)_files:
 ALL_PROJECT_TARGETS += generate_$(3)_$(1)_project
 endef
 
+# Creates a set of rules to build a standalone makefile project for the ARC platform
+# including all of the source and header files required in a
+# separate folder and a simple makefile.
+# Arguments are:
+# 1 - Project type (make, mbed, etc).
+# 2 - Project file template name.
+# 3 - Name of executable.
+# 4 - List of C/C++ source files needed to build the target.
+# 5 - List of C/C++ header files needed to build the target.
+# 6 - Linker flags required.
+# 7 - C++ compilation flags needed.
+# 8 - C compilation flags needed.
+# Calling eval on the output will create a <Name>_makefile target that you
+# can invoke to create the standalone project.
+define generate_arc_project
+
+ifeq ($(TARGET_ARCH), arc)
+$(PRJDIR)$(3)/$(1)/Makefile: tensorflow/lite/micro/tools/make/templates/Makefile.tpl
+	@mkdir -p $$(dir $$@)
+	@sed -E 's#\%\{SRCS\}\%#$(4)#g' $$< | \
+	sed -E '1 i\CC = ccac\nCXX = ccac\nLD = ccac\n' | \
+	sed -E 's#\%\{EXECUTABLE\}\%#$(3).elf#g' | \
+	sed -E 's#\%\{LINKER_FLAGS\}\%#$(6)#g' | \
+	sed -E 's#\%\{CXX_FLAGS\}\%#$(7)#g' | \
+	sed -E 's#\%\{CC_FLAGS\}\%#$(8)#g' > $$@
+
+# Special rule to copy TCF in case the local filesystem file name has been defined
+ifneq ($(TCF_FILE_NAME), )
+$(PRJDIR)$(3)/$(1)/$(TCF_FILE_NAME): $(TCF_FILE)
+	@cp $$< $$@
+endif
+endif
+endef
+
 # Creates a set of rules to build a standalone Arduino project for an
 # executable, including all of the source and header files required in a
 # separate folder and a simple makefile.
@@ -303,6 +337,7 @@ endef
 # generate the standalone project.
 define generate_microlite_projects
 $(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES))
+$(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES))
 $(call generate_project,mbed,$(MBED_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(MICROLITE_LIBS),$(CXXFLAGS),$(CCFLAGS))
 $(call generate_project,keil,$(KEIL_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(MICROLITE_LIBS),$(CXXFLAGS),$(CCFLAGS))
 $(call generate_arduino_project,$(ARDUINO_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(MICROLITE_LIBS),$(CXXFLAGS),$(CCFLAGS))
@@ -347,10 +382,11 @@ endef
 # 2 - MD5 sum of archive, to check integrity. Use md5sum tool to generate.
 # 3 - Folder name to unpack library into, inside tf/l/x/m/t/downloads root.
 # 4 - Optional patching action, must match clause in download_and_extract.sh.
+# 5 - Optional patching action parameter
 # These arguments are packed into a single '!' separated string, so no element
 # can contain a '!'.
 define add_third_party_download
-THIRD_PARTY_DOWNLOADS += $(1)!$(2)!tensorflow/lite/micro/tools/make/downloads/$(3)!$(4)
+THIRD_PARTY_DOWNLOADS += $(1)!$(2)!tensorflow/lite/micro/tools/make/downloads/$(3)!$(4)!$(5)
 endef
 
 # Unpacks an entry in a list of strings created by add_third_party_download, and
diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc
new file mode 100644
index 00000000000..3c397a0ab80
--- /dev/null
+++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc
@@ -0,0 +1,86 @@
+# Settings for arc processors
+ifeq ($(TARGET_ARCH), arc)
+
+  CC_TOOL = ccac
+  AR_TOOL = arac
+  CXX_TOOL = ccac
+
+ifneq ($(TCF_FILE), )
+  TARGET = $(basename $(notdir $(TCF_FILE)))
+else
+  TARGET = em7d_voice_audio
+  TCF_FILE = em7d_voice_audio 
+endif
+
+# The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), this variable is used later to add the option to the linker/compiler flags.
+# This condition also handles the case when the user/makefile specifies the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying.
+ifneq (,$(findstring .tcf,$(TCF_FILE)))
+  TCF_FILE_NAME = $(notdir $(TCF_FILE))
+  THIRD_PARTY_CC_HDRS += $(TCF_FILE_NAME)
+else
+  TCF_FILE_NAME = $(TCF_FILE)
+endif
+
+  PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -fslp-vectorize-aggressive -ffunction-sections -fdata-sections
+  PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map
+
+  CXXFLAGS += $(PLATFORM_FLAGS)
+  CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS))
+  CCFLAGS += $(PLATFORM_FLAGS)
+  LDFLAGS += $(PLATFORM_LDFLAGS)
+
+  MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS))
+
+  USE_EMBARC_MLI ?= true
+
+ifeq ($(USE_EMBARC_MLI), true)
+  ALL_TAGS += arc
+
+ifeq ($(PRE_COMPILED_MLI),true)
+  $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,))
+
+  MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include
+  MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a
+
+  THIRD_PARTY_CC_HDRS += \
+    third_party/embarc_osp/LICENSE
+else
+  MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME))
+
+  $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE)))
+
+  MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include
+  MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a
+  MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a
+
+  THIRD_PARTY_CC_HDRS += \
+    third_party/$(MLI_LIB_DIR)/LICENSE
+endif
+
+  THIRD_PARTY_CC_HDRS += $(MLI_LIB)
+  GENERATED_PROJECT_LIBS += $(MLI_LIB)
+
+  INCLUDES += \
+    -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \
+    -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api
+
+  GENERATED_PROJECT_INCLUDES += \
+    -I. \
+    -I./third_party/$(MLI_INCLUDE_FOLDER) \
+    -I./third_party/$(MLI_INCLUDE_FOLDER)/api
+
+
+  THIRD_PARTY_CC_HDRS += \
+    third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \
+    third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \
+
+endif # USE_EMBARC_MLI
+
+endif
diff --git a/tensorflow/lite/micro/tools/make/targets/xtensa_xpg_makefile.inc b/tensorflow/lite/micro/tools/make/targets/xtensa_xpg_makefile.inc
index 4161882d30e..55bff78aba4 100644
--- a/tensorflow/lite/micro/tools/make/targets/xtensa_xpg_makefile.inc
+++ b/tensorflow/lite/micro/tools/make/targets/xtensa_xpg_makefile.inc
@@ -6,6 +6,9 @@ ifeq ($(TARGET), xtensa-xpg)
   TARGET_ARCH := xtensa-xpg
 
   PLATFORM_ARGS = \
+    -DTF_LITE_STATIC_MEMORY \
+    -DNDEBUG \
+    -DTF_LITE_MCU_DEBUG_LOG \
     --xtensa-core=$(XTENSA_CORE) \
     -g -O2 \
     -fmessage-length=0 \
diff --git a/tensorflow/lite/micro/tools/make/templates/Makefile.tpl b/tensorflow/lite/micro/tools/make/templates/Makefile.tpl
index ca6519c1390..f72658f4aa0 100644
--- a/tensorflow/lite/micro/tools/make/templates/Makefile.tpl
+++ b/tensorflow/lite/micro/tools/make/templates/Makefile.tpl
@@ -1,3 +1,5 @@
+RM = rm -f
+
 SRCS := \
 %{SRCS}%
 
@@ -19,3 +21,7 @@ LDFLAGS += %{LINKER_FLAGS}%
 	$(CXX) $(CXXFLAGS) -o $@ $(OBJS) $(LDFLAGS)
 
 all: %{EXECUTABLE}%
+
+clean:
+	-$(RM) $(OBJS)
+	-$(RM) %{EXECUTABLE}%
diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc
index 49565c4c3d5..9621016f0fd 100644
--- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc
+++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc
@@ -54,3 +54,9 @@ KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca"
 
 PERSON_MODEL_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_grayscale_2019_11_21.zip"
 PERSON_MODEL_MD5 := "fe2934bd0788f1dcc7af3f0a954542ab"
+
+EMBARC_OSP_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp/archive/embarc_mli.zip"
+EMBARC_OSP_MD5 := "9eaf7b3a1ed05872a03da9796672a776"
+
+EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/datatypes.zip"
+EMBARC_MLI_MD5 := "e2243f53c88ca3bedbb8cc8c3bb44053"
diff --git a/tensorflow/lite/micro/tools/make/transform_arduino_source.py b/tensorflow/lite/micro/tools/make/transform_arduino_source.py
index e6b026520de..c5c74b7a131 100644
--- a/tensorflow/lite/micro/tools/make/transform_arduino_source.py
+++ b/tensorflow/lite/micro/tools/make/transform_arduino_source.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 import argparse
 import re
 import sys
+
 import six
 
 
diff --git a/tensorflow/lite/micro/tools/make/transform_source.py b/tensorflow/lite/micro/tools/make/transform_source.py
index f7eaaa08c58..7957476121e 100644
--- a/tensorflow/lite/micro/tools/make/transform_source.py
+++ b/tensorflow/lite/micro/tools/make/transform_source.py
@@ -26,6 +26,7 @@ import argparse
 import os
 import re
 import sys
+
 import six
 
 
diff --git a/tensorflow/lite/model.cc b/tensorflow/lite/model.cc
index d7523dbd99b..0556f47adba 100644
--- a/tensorflow/lite/model.cc
+++ b/tensorflow/lite/model.cc
@@ -129,23 +129,23 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromBuffer(
 }
 
 std::unique_ptr<FlatBufferModel> FlatBufferModel::VerifyAndBuildFromBuffer(
-    const char* buffer, size_t buffer_size, TfLiteVerifier* extra_verifier,
-    ErrorReporter* error_reporter) {
+    const char* caller_owned_buffer, size_t buffer_size,
+    TfLiteVerifier* extra_verifier, ErrorReporter* error_reporter) {
   error_reporter = ValidateErrorReporter(error_reporter);
 
-  flatbuffers::Verifier base_verifier(reinterpret_cast<const uint8_t*>(buffer),
-                                      buffer_size);
+  flatbuffers::Verifier base_verifier(
+      reinterpret_cast<const uint8_t*>(caller_owned_buffer), buffer_size);
   if (!VerifyModelBuffer(base_verifier)) {
     error_reporter->Report("The model is not a valid Flatbuffer buffer");
     return nullptr;
   }
 
-  if (extra_verifier &&
-      !extra_verifier->Verify(buffer, buffer_size, error_reporter)) {
+  if (extra_verifier && !extra_verifier->Verify(caller_owned_buffer,
+                                                buffer_size, error_reporter)) {
     return nullptr;
   }
 
-  return BuildFromBuffer(buffer, buffer_size, error_reporter);
+  return BuildFromBuffer(caller_owned_buffer, buffer_size, error_reporter);
 }
 
 std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromModel(
diff --git a/tensorflow/lite/model.h b/tensorflow/lite/model.h
index b8b4b4457da..159f8002ddb 100644
--- a/tensorflow/lite/model.h
+++ b/tensorflow/lite/model.h
@@ -110,7 +110,7 @@ class FlatBufferModel {
   /// and must ensure its lifetime is longer than the FlatBufferModel instance.
   /// Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> VerifyAndBuildFromBuffer(
-      const char* buffer, size_t buffer_size,
+      const char* caller_owned_buffer, size_t buffer_size,
       TfLiteVerifier* extra_verifier = nullptr,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
diff --git a/tensorflow/lite/profiling/profile_summarizer.cc b/tensorflow/lite/profiling/profile_summarizer.cc
index 0b51b653923..4b394f15536 100644
--- a/tensorflow/lite/profiling/profile_summarizer.cc
+++ b/tensorflow/lite/profiling/profile_summarizer.cc
@@ -96,7 +96,9 @@ tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() {
 
 }  // namespace
 
-ProfileSummarizer::ProfileSummarizer() {
+ProfileSummarizer::ProfileSummarizer()
+    : delegate_stats_calculator_(
+          new tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {
   // Create stats calculator for the primary graph.
   stats_calculator_map_[0] = std::unique_ptr<tensorflow::StatsCalculator>(
       new tensorflow::StatsCalculator(GetProfileSummarizerOptions()));
@@ -126,6 +128,7 @@ void ProfileSummarizer::ProcessProfiles(
 
   // Total time will be accumulated per subgraph.
   std::map<uint32_t, int64_t> total_us_per_subgraph_map;
+  int64_t delegate_internal_total_us = 0;
 
   for (auto event : events) {
     const auto subgraph_index = event->event_subgraph_index;
@@ -156,6 +159,17 @@ void ProfileSummarizer::ProcessProfiles(
       stats_calculator->AddNodeStats(node_name_in_stats, type_in_stats,
                                      node_num, start_us, node_exec_time,
                                      0 /*memory */);
+    } else if (event->event_type ==
+               Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
+      const std::string node_name(event->tag);
+      // Append event_metadata to node name because 'stats_calculator' can not
+      // distinguish two nodes w/ the same 'node_name'.
+      const auto node_name_in_stats =
+          "Delegate/" + node_name + ":" + std::to_string(event->event_metadata);
+
+      delegate_stats_calculator_->AddNodeStats(
+          node_name_in_stats, "DelegateOpInvoke", node_num, start_us,
+          node_exec_time, 0 /*memory */);
     } else {
       // TODO(b/139812778) consider use a different stats_calculator to record
       // non-op-invoke events so that these could be separated from
@@ -171,8 +185,11 @@ void ProfileSummarizer::ProcessProfiles(
 
     // Add total time except actual delegate ops since the elapsed time of the
     // delegate ops inside are already combined at a fused DELEGATE op.
-    if (strcmp(event->tag, "DelegateOpInvoke") != 0) {
+    if (event->event_type !=
+        Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT) {
       total_us_per_subgraph_map[subgraph_index] += node_exec_time;
+    } else {
+      delegate_internal_total_us += node_exec_time;
     }
     ++node_num;
   }
@@ -182,6 +199,9 @@ void ProfileSummarizer::ProcessProfiles(
         GetStatsCalculator(total_us_per_subgraph_pair.first);
     stats_calculator->UpdateRunTotalUs(total_us_per_subgraph_pair.second);
   }
+  if (delegate_internal_total_us > 0) {
+    delegate_stats_calculator_->UpdateRunTotalUs(delegate_internal_total_us);
+  }
 }
 
 tensorflow::StatsCalculator* ProfileSummarizer::GetStatsCalculator(
@@ -217,6 +237,15 @@ std::string ProfileSummarizer::GenerateReport(std::string tag,
     }
     stream << subgraph_stats->GetShortSummary() << std::endl;
   }
+
+  if (delegate_stats_calculator_->num_runs() > 0) {
+    stream << "Delegate internal: " << std::endl;
+    if (include_output_string) {
+      stream << delegate_stats_calculator_->GetOutputString();
+    }
+    stream << delegate_stats_calculator_->GetShortSummary() << std::endl;
+  }
+
   return stream.str();
 }
 
diff --git a/tensorflow/lite/profiling/profile_summarizer.h b/tensorflow/lite/profiling/profile_summarizer.h
index fa128765a83..d0972319a25 100644
--- a/tensorflow/lite/profiling/profile_summarizer.h
+++ b/tensorflow/lite/profiling/profile_summarizer.h
@@ -61,6 +61,8 @@ class ProfileSummarizer {
   std::map<uint32_t, std::unique_ptr<tensorflow::StatsCalculator>>
       stats_calculator_map_;
 
+  std::unique_ptr<tensorflow::StatsCalculator> delegate_stats_calculator_;
+
   // GenerateReport returns the report of subgraphs in a string format.
   std::string GenerateReport(std::string tag, bool include_output_string);
 };
diff --git a/tensorflow/lite/profiling/profile_summarizer_test.cc b/tensorflow/lite/profiling/profile_summarizer_test.cc
index 0c4b9fcd88f..87e689e9985 100644
--- a/tensorflow/lite/profiling/profile_summarizer_test.cc
+++ b/tensorflow/lite/profiling/profile_summarizer_test.cc
@@ -178,7 +178,6 @@ TEST_F(ProfileSummarizerIfOpTest, TestIfTrue) {
   TfLiteTensor* output = interpreter_->tensor(interpreter_->outputs()[0]);
   subgraph_test_util::CheckIntTensor(output, {1, 2}, {6, 9});
 
-  ProfileSummarizer summarizer;
   auto events = profiler.GetProfileEvents();
   EXPECT_EQ(2, events.size());
   int event_count_of_subgraph_zero = std::count_if(
@@ -206,7 +205,6 @@ TEST_F(ProfileSummarizerIfOpTest, TestIfFalse) {
   TfLiteTensor* output = interpreter_->tensor(interpreter_->outputs()[0]);
   subgraph_test_util::CheckIntTensor(output, {1, 2}, {5, 14});
 
-  ProfileSummarizer summarizer;
   auto events = profiler.GetProfileEvents();
   EXPECT_EQ(2, events.size());
   int event_count_of_subgraph_zero = std::count_if(
diff --git a/tensorflow/lite/python/interpreter.py b/tensorflow/lite/python/interpreter.py
index 30f224e652b..153b6f17c3c 100644
--- a/tensorflow/lite/python/interpreter.py
+++ b/tensorflow/lite/python/interpreter.py
@@ -120,7 +120,7 @@ class Delegate(object):
       raise ValueError(capture.message)
 
   def __del__(self):
-    # __del__ can be called multiple times, so if the delegate is destroyed.
+    # __del__ can not be called multiple times, so if the delegate is destroyed.
     # don't try to destroy it twice.
     if self._library is not None:
       self._library.tflite_plugin_destroy_delegate.argtypes = [ctypes.c_void_p]
diff --git a/tensorflow/lite/python/interpreter_test.py b/tensorflow/lite/python/interpreter_test.py
index bfe34591a02..9c8dbbaa9c2 100644
--- a/tensorflow/lite/python/interpreter_test.py
+++ b/tensorflow/lite/python/interpreter_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import ctypes
 import io
 import sys
+
 import numpy as np
 import six
 
@@ -416,7 +417,10 @@ class InterpreterDelegateTest(test_util.TensorFlowTestCase):
 
   def testFail(self):
     with self.assertRaisesRegexp(
-        ValueError, 'Failed to load delegate from .*\nFail argument sent.'):
+        # Due to exception chaining in PY3, we can't be more specific here and check that
+        # the phrase 'Fail argument sent' is present.
+        ValueError,
+        r'Failed to load delegate from'):
       interpreter_wrapper.load_delegate(
           self._delegate_file, options={'fail': 'fail'})
 
diff --git a/tensorflow/lite/python/interpreter_wrapper/BUILD b/tensorflow/lite/python/interpreter_wrapper/BUILD
index ac6acd02efe..5e3f0d698d7 100644
--- a/tensorflow/lite/python/interpreter_wrapper/BUILD
+++ b/tensorflow/lite/python/interpreter_wrapper/BUILD
@@ -1,4 +1,5 @@
 load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
+load("//tensorflow/lite:build_def.bzl", "if_tflite_experimental_runtime", "tflite_experimental_runtime_linkopts")
 
 package(
     default_visibility = ["//visibility:public"],
@@ -20,20 +21,27 @@ cc_library(
 cc_library(
     name = "interpreter_wrapper_lib",
     srcs = ["interpreter_wrapper.cc"],
-    hdrs = ["interpreter_wrapper.h"],
+    hdrs = [
+        "interpreter_wrapper.h",
+        "//tensorflow/lite/experimental/tflite_api_dispatcher:tflite_api_dispatcher.h",
+    ],
+    defines = if_tflite_experimental_runtime(
+        if_false = [],
+        if_true = ["TFLITE_EXPERIMENTAL_RUNTIME"],
+    ),
     deps = [
         ":numpy",
         ":python_error_reporter",
         ":python_utils",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings:str_format",
+        "//third_party/python_runtime:headers",
         "//tensorflow/lite:framework",
         "//tensorflow/lite:string_util",
         "//tensorflow/lite/c:common",
         "//tensorflow/lite/core/api",
         "//tensorflow/lite/kernels:builtin_ops",
-        "//third_party/python_runtime:headers",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/strings:str_format",
-    ],
+    ] + tflite_experimental_runtime_linkopts(),
 )
 
 cc_library(
diff --git a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
index 08cda2dec91..10566570e44 100644
--- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
@@ -70,8 +70,8 @@ namespace {
 
 using python_utils::PyDecrefDeleter;
 
-std::unique_ptr<tflite::Interpreter> CreateInterpreter(
-    const tflite::FlatBufferModel* model,
+std::unique_ptr<tflite_api_dispatcher::Interpreter> CreateInterpreter(
+    const tflite_api_dispatcher::TfLiteModel* model,
     const tflite::ops::builtin::BuiltinOpResolver& resolver) {
   if (!model) {
     return nullptr;
@@ -79,8 +79,9 @@ std::unique_ptr<tflite::Interpreter> CreateInterpreter(
 
   ::tflite::python::ImportNumpy();
 
-  std::unique_ptr<tflite::Interpreter> interpreter;
-  if (tflite::InterpreterBuilder(*model, resolver)(&interpreter) != kTfLiteOk) {
+  std::unique_ptr<tflite_api_dispatcher::Interpreter> interpreter;
+  if (tflite_api_dispatcher::InterpreterBuilder(
+          *model, resolver)(&interpreter) != kTfLiteOk) {
     return nullptr;
   }
   return interpreter;
@@ -146,7 +147,7 @@ bool RegisterCustomOpByName(const char* registerer_name,
 }  // namespace
 
 InterpreterWrapper* InterpreterWrapper::CreateInterpreterWrapper(
-    std::unique_ptr<tflite::FlatBufferModel> model,
+    std::unique_ptr<tflite_api_dispatcher::TfLiteModel> model,
     std::unique_ptr<PythonErrorReporter> error_reporter,
     const std::vector<std::string>& registerers, std::string* error_msg) {
   if (!model) {
@@ -172,10 +173,10 @@ InterpreterWrapper* InterpreterWrapper::CreateInterpreterWrapper(
 }
 
 InterpreterWrapper::InterpreterWrapper(
-    std::unique_ptr<tflite::FlatBufferModel> model,
+    std::unique_ptr<tflite_api_dispatcher::TfLiteModel> model,
     std::unique_ptr<PythonErrorReporter> error_reporter,
     std::unique_ptr<tflite::ops::builtin::BuiltinOpResolver> resolver,
-    std::unique_ptr<tflite::Interpreter> interpreter)
+    std::unique_ptr<tflite_api_dispatcher::Interpreter> interpreter)
     : model_(std::move(model)),
       error_reporter_(std::move(error_reporter)),
       resolver_(std::move(resolver)),
@@ -455,8 +456,9 @@ namespace {
 
 // Checks to see if a tensor access can succeed (returns nullptr on error).
 // Otherwise returns Py_None.
-PyObject* CheckGetTensorArgs(Interpreter* interpreter_, int tensor_index,
-                             TfLiteTensor** tensor, int* type_num) {
+PyObject* CheckGetTensorArgs(tflite_api_dispatcher::Interpreter* interpreter_,
+                             int tensor_index, TfLiteTensor** tensor,
+                             int* type_num) {
   TFLITE_PY_ENSURE_VALID_INTERPRETER();
   TFLITE_PY_TENSOR_BOUNDS_CHECK(tensor_index);
 
@@ -565,8 +567,9 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromFile(
     const char* model_path, const std::vector<std::string>& registerers,
     std::string* error_msg) {
   std::unique_ptr<PythonErrorReporter> error_reporter(new PythonErrorReporter);
-  std::unique_ptr<tflite::FlatBufferModel> model =
-      tflite::FlatBufferModel::BuildFromFile(model_path, error_reporter.get());
+  std::unique_ptr<tflite_api_dispatcher::TfLiteModel> model =
+      tflite_api_dispatcher::TfLiteModel::BuildFromFile(model_path,
+                                                        error_reporter.get());
   return CreateInterpreterWrapper(std::move(model), std::move(error_reporter),
                                   registerers, error_msg);
 }
@@ -581,9 +584,9 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer(
   if (python_utils::ConvertFromPyString(data, &buf, &length) == -1) {
     return nullptr;
   }
-  std::unique_ptr<tflite::FlatBufferModel> model =
-      tflite::FlatBufferModel::BuildFromBuffer(buf, length,
-                                               error_reporter.get());
+  std::unique_ptr<tflite_api_dispatcher::TfLiteModel> model =
+      tflite_api_dispatcher::TfLiteModel::BuildFromBuffer(buf, length,
+                                                          error_reporter.get());
   return CreateInterpreterWrapper(std::move(model), std::move(error_reporter),
                                   registerers, error_msg);
 }
diff --git a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h
index 51351d57b39..be9086f307b 100644
--- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h
+++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h
@@ -26,6 +26,8 @@ limitations under the License.
 // automatically move <Python.h> before <locale>.
 #include <Python.h>
 
+#include "tensorflow/lite/experimental/tflite_api_dispatcher/tflite_api_dispatcher.h"
+
 struct TfLiteDelegate;
 
 // We forward declare TFLite classes here to avoid exposing them to SWIG.
@@ -91,15 +93,15 @@ class InterpreterWrapper {
   // It only returns InterpreterWrapper if it can construct an `Interpreter`.
   // Otherwise it returns `nullptr`.
   static InterpreterWrapper* CreateInterpreterWrapper(
-      std::unique_ptr<tflite::FlatBufferModel> model,
+      std::unique_ptr<tflite_api_dispatcher::TfLiteModel> model,
       std::unique_ptr<PythonErrorReporter> error_reporter,
       const std::vector<std::string>& registerers, std::string* error_msg);
 
   InterpreterWrapper(
-      std::unique_ptr<tflite::FlatBufferModel> model,
+      std::unique_ptr<tflite_api_dispatcher::TfLiteModel> model,
       std::unique_ptr<PythonErrorReporter> error_reporter,
       std::unique_ptr<tflite::ops::builtin::BuiltinOpResolver> resolver,
-      std::unique_ptr<tflite::Interpreter> interpreter);
+      std::unique_ptr<tflite_api_dispatcher::Interpreter> interpreter);
 
   // InterpreterWrapper is not copyable or assignable. We avoid the use of
   // InterpreterWrapper() = delete here for SWIG compatibility.
@@ -109,10 +111,10 @@ class InterpreterWrapper {
   // The public functions which creates `InterpreterWrapper` should ensure all
   // these member variables are initialized successfully. Otherwise it should
   // report the error and return `nullptr`.
-  const std::unique_ptr<tflite::FlatBufferModel> model_;
+  const std::unique_ptr<tflite_api_dispatcher::TfLiteModel> model_;
   const std::unique_ptr<PythonErrorReporter> error_reporter_;
   const std::unique_ptr<tflite::ops::builtin::BuiltinOpResolver> resolver_;
-  const std::unique_ptr<tflite::Interpreter> interpreter_;
+  const std::unique_ptr<tflite_api_dispatcher::Interpreter> interpreter_;
 };
 
 }  // namespace interpreter_wrapper
diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py
index f4a6a4e6d19..1f0156d6524 100644
--- a/tensorflow/lite/python/lite_v2_test.py
+++ b/tensorflow/lite/python/lite_v2_test.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 from absl.testing import parameterized
 import numpy as np
 from six.moves import range
diff --git a/tensorflow/lite/python/optimize/calibration_wrapper.cc b/tensorflow/lite/python/optimize/calibration_wrapper.cc
index d4c7a3da611..89ffb3430ea 100644
--- a/tensorflow/lite/python/optimize/calibration_wrapper.cc
+++ b/tensorflow/lite/python/optimize/calibration_wrapper.cc
@@ -56,6 +56,12 @@ std::unique_ptr<tflite::ModelT> CreateMutableModel(const tflite::Model& model) {
   return copied_model;
 }
 
+bool NoOpModel(const tflite::FlatBufferModel& model) {
+  return model->subgraphs()->size() == 1 &&
+         (!model->subgraphs()->begin()->operators() ||
+          model->subgraphs()->begin()->operators()->size() == 0);
+}
+
 inline TensorType TfLiteTypeToSchemaType(TfLiteType type) {
   switch (type) {
     case kTfLiteNoType:
@@ -92,12 +98,14 @@ CalibrationWrapper::CalibrationWrapper(
     std::unique_ptr<tflite::interpreter_wrapper::PythonErrorReporter>
         error_reporter,
     std::unique_ptr<tflite::FlatBufferModel> model,
-    std::unique_ptr<tflite::optimize::calibration::CalibrationReader> reader)
+    std::unique_ptr<tflite::optimize::calibration::CalibrationReader> reader,
+    std::unique_ptr<std::string> model_str)
     : interpreter_(std::move(interpreter)),
       error_reporter_(std::move(error_reporter)),
       resolver_(std::move(resolver)),
       model_(std::move(model)),
-      reader_(std::move(reader)) {}
+      reader_(std::move(reader)),
+      model_str_(std::move(model_str)) {}
 
 CalibrationWrapper::~CalibrationWrapper() {}
 
@@ -197,6 +205,11 @@ PyObject* CalibrationWrapper::QuantizeModel(int input_py_type,
                                             int output_py_type,
                                             bool allow_float,
                                             bool enable_mlir_quantizer) {
+  if (NoOpModel(*model_)) {
+    return python_utils::ConvertToPyString(model_str_->data(),
+                                           model_str_->size());
+  }
+
   TfLiteType input_type = python_utils::TfLiteTypeFromPyType(input_py_type);
   TfLiteType output_type = python_utils::TfLiteTypeFromPyType(output_py_type);
   if (input_type == kTfLiteNoType || output_type == kTfLiteNoType) {
@@ -288,9 +301,16 @@ PyObject* CalibrationWrapper::QuantizeModel(int input_py_type,
     return nullptr;
   }
 
+  auto model_str = std::make_unique<std::string>(buf, length);
+  // If we are not going to use this string during quantization, reset the
+  // pointer and release the memory.
+  if (!NoOpModel(*model)) {
+    model_str.reset();
+  }
+
   auto wrapper = new CalibrationWrapper(
       std::move(interpreter), std::move(resolver), std::move(error_reporter),
-      std::move(model), std::move(reader));
+      std::move(model), std::move(reader), std::move(model_str));
   return wrapper;
 }
 
diff --git a/tensorflow/lite/python/optimize/calibration_wrapper.h b/tensorflow/lite/python/optimize/calibration_wrapper.h
index 2484858bc97..0fefc29dd81 100644
--- a/tensorflow/lite/python/optimize/calibration_wrapper.h
+++ b/tensorflow/lite/python/optimize/calibration_wrapper.h
@@ -77,7 +77,8 @@ class CalibrationWrapper {
       std::unique_ptr<tflite::interpreter_wrapper::PythonErrorReporter>
           error_reporter,
       std::unique_ptr<tflite::FlatBufferModel> model,
-      std::unique_ptr<tflite::optimize::calibration::CalibrationReader> reader);
+      std::unique_ptr<tflite::optimize::calibration::CalibrationReader> reader,
+      std::unique_ptr<std::string> model_str_);
 
   CalibrationWrapper(const CalibrationWrapper& rhs);
 
@@ -89,6 +90,7 @@ class CalibrationWrapper {
   std::unique_ptr<tflite::ops::builtin::BuiltinOpResolver> resolver_;
   std::unique_ptr<tflite::FlatBufferModel> model_;
   std::unique_ptr<tflite::optimize::calibration::CalibrationReader> reader_;
+  std::unique_ptr<std::string> model_str_;
 };
 
 }  // namespace calibration_wrapper
diff --git a/tensorflow/lite/python/optimize/calibrator_test.py b/tensorflow/lite/python/optimize/calibrator_test.py
index 934e441ebad..28e8723f23d 100644
--- a/tensorflow/lite/python/optimize/calibrator_test.py
+++ b/tensorflow/lite/python/optimize/calibrator_test.py
@@ -18,6 +18,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 from absl.testing import parameterized
 import numpy as np
 from six.moves import range
diff --git a/tensorflow/lite/python/testdata/test_delegate.cc b/tensorflow/lite/python/testdata/test_delegate.cc
index a7c48e01d24..98854cab114 100644
--- a/tensorflow/lite/python/testdata/test_delegate.cc
+++ b/tensorflow/lite/python/testdata/test_delegate.cc
@@ -66,7 +66,12 @@ void set_destroy_callback(int (*callback)(const char* s)) {
 void tflite_plugin_destroy_delegate(TfLiteDelegate* delegate) {
   num_delegates_destroyed++;
   delete delegate;
-  if (destruction_callback) destruction_callback("test_delegate");
+  if (destruction_callback) {
+    destruction_callback("test_delegate");
+    // destruction_callback is a global variable,
+    // so it should be set to nullptr here to avoid crashes
+    destruction_callback = nullptr;
+  }
 }
 
 void initialize_counters() {
diff --git a/tensorflow/lite/python/tflite_convert.py b/tensorflow/lite/python/tflite_convert.py
index 734baecd413..2d8aca1862d 100644
--- a/tensorflow/lite/python/tflite_convert.py
+++ b/tensorflow/lite/python/tflite_convert.py
@@ -205,8 +205,8 @@ def _convert_tf1_model(flags):
   if flags.conversion_summary_dir:
     converter.conversion_summary_dir = flags.conversion_summary_dir
 
-  if flags.experimental_new_converter:
-    converter.experimental_new_converter = True
+  if flags.experimental_new_converter is not None:
+    converter.experimental_new_converter = flags.experimental_new_converter
 
   # Convert model.
   output_data = converter.convert()
@@ -230,8 +230,8 @@ def _convert_tf2_model(flags):
     model = keras.models.load_model(flags.keras_model_file)
     converter = lite.TFLiteConverterV2.from_keras_model(model)
 
-  if flags.experimental_new_converter:
-    converter.experimental_new_converter = True
+  if flags.experimental_new_converter is not None:
+    converter.experimental_new_converter = flags.experimental_new_converter
 
   # Convert the model.
   tflite_model = converter.convert()
@@ -592,7 +592,7 @@ def _get_parser(use_v2_converter):
 
 
 def run_main(_):
-  """Main in toco_convert.py."""
+  """Main in tflite_convert.py."""
   use_v2_converter = tf2.enabled()
   parser = _get_parser(use_v2_converter)
   tflite_flags, unparsed = parser.parse_known_args(args=sys.argv[1:])
diff --git a/tensorflow/lite/python/tflite_convert_test.py b/tensorflow/lite/python/tflite_convert_test.py
index 2033000afc3..1e80907edbd 100644
--- a/tensorflow/lite/python/tflite_convert_test.py
+++ b/tensorflow/lite/python/tflite_convert_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.lite.python import tflite_convert
@@ -186,7 +187,7 @@ class TfLiteConvertV1Test(TestModels):
     keras_file = self._getKerasModelFile()
     log_dir = self.get_temp_dir()
 
-    flags_str = ('--keras_model_file={} '
+    flags_str = ('--keras_model_file={} --experimental_new_converter=false '
                  '--conversion_summary_dir={}'.format(keras_file, log_dir))
     self._run(flags_str, should_succeed=True)
     os.remove(keras_file)
diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs
index a41ac0275d6..ea310734525 100644
--- a/tensorflow/lite/schema/schema.fbs
+++ b/tensorflow/lite/schema/schema.fbs
@@ -316,7 +316,8 @@ enum BuiltinOperator : byte {
   NON_MAX_SUPPRESSION_V4 = 120,
   NON_MAX_SUPPRESSION_V5 = 121,
   SCATTER_ND = 122,
-  SELECT_V2 = 123
+  SELECT_V2 = 123,
+  DENSIFY = 124
 }
 
 
@@ -419,7 +420,8 @@ union BuiltinOptions {
   NonMaxSuppressionV4Options,
   NonMaxSuppressionV5Options,
   ScatterNdOptions,
-  SelectV2Options
+  SelectV2Options,
+  DensifyOptions
 }
 
 enum Padding : byte { SAME, VALID }
@@ -906,6 +908,9 @@ table ScatterNdOptions {
 table SelectV2Options {
 }
 
+table DensifyOptions {
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h
index b5c80ed55b1..51ae63a5441 100755
--- a/tensorflow/lite/schema/schema_generated.h
+++ b/tensorflow/lite/schema/schema_generated.h
@@ -331,6 +331,9 @@ struct ScatterNdOptionsT;
 struct SelectV2Options;
 struct SelectV2OptionsT;
 
+struct DensifyOptions;
+struct DensifyOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -641,11 +644,12 @@ enum BuiltinOperator {
   BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
   BuiltinOperator_SCATTER_ND = 122,
   BuiltinOperator_SELECT_V2 = 123,
+  BuiltinOperator_DENSIFY = 124,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_SELECT_V2
+  BuiltinOperator_MAX = BuiltinOperator_DENSIFY
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[124] {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[125] {
   static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -770,7 +774,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[124] {
     BuiltinOperator_NON_MAX_SUPPRESSION_V4,
     BuiltinOperator_NON_MAX_SUPPRESSION_V5,
     BuiltinOperator_SCATTER_ND,
-    BuiltinOperator_SELECT_V2
+    BuiltinOperator_SELECT_V2,
+    BuiltinOperator_DENSIFY
   };
   return values;
 }
@@ -901,13 +906,14 @@ inline const char * const *EnumNamesBuiltinOperator() {
     "NON_MAX_SUPPRESSION_V5",
     "SCATTER_ND",
     "SELECT_V2",
+    "DENSIFY",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameBuiltinOperator(BuiltinOperator e) {
-  if (e < BuiltinOperator_ADD || e > BuiltinOperator_SELECT_V2) return "";
+  if (e < BuiltinOperator_ADD || e > BuiltinOperator_DENSIFY) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOperator()[index];
 }
@@ -1012,11 +1018,12 @@ enum BuiltinOptions {
   BuiltinOptions_NonMaxSuppressionV5Options = 96,
   BuiltinOptions_ScatterNdOptions = 97,
   BuiltinOptions_SelectV2Options = 98,
+  BuiltinOptions_DensifyOptions = 99,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_SelectV2Options
+  BuiltinOptions_MAX = BuiltinOptions_DensifyOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[99] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[100] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -1116,7 +1123,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[99] {
     BuiltinOptions_NonMaxSuppressionV4Options,
     BuiltinOptions_NonMaxSuppressionV5Options,
     BuiltinOptions_ScatterNdOptions,
-    BuiltinOptions_SelectV2Options
+    BuiltinOptions_SelectV2Options,
+    BuiltinOptions_DensifyOptions
   };
   return values;
 }
@@ -1222,13 +1230,14 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "NonMaxSuppressionV5Options",
     "ScatterNdOptions",
     "SelectV2Options",
+    "DensifyOptions",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameBuiltinOptions(BuiltinOptions e) {
-  if (e < BuiltinOptions_NONE || e > BuiltinOptions_SelectV2Options) return "";
+  if (e < BuiltinOptions_NONE || e > BuiltinOptions_DensifyOptions) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOptions()[index];
 }
@@ -1629,6 +1638,10 @@ template<> struct BuiltinOptionsTraits<SelectV2Options> {
   static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
 };
 
+template<> struct BuiltinOptionsTraits<DensifyOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -2445,6 +2458,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_SelectV2Options ?
       reinterpret_cast<const SelectV2OptionsT *>(value) : nullptr;
   }
+  DensifyOptionsT *AsDensifyOptions() {
+    return type == BuiltinOptions_DensifyOptions ?
+      reinterpret_cast<DensifyOptionsT *>(value) : nullptr;
+  }
+  const DensifyOptionsT *AsDensifyOptions() const {
+    return type == BuiltinOptions_DensifyOptions ?
+      reinterpret_cast<const DensifyOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -8598,6 +8619,46 @@ inline flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(
 
 flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct DensifyOptionsT : public flatbuffers::NativeTable {
+  typedef DensifyOptions TableType;
+  DensifyOptionsT() {
+  }
+};
+
+struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DensifyOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  DensifyOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DensifyOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DensifyOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
+  flatbuffers::Offset<DensifyOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DensifyOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  DensifyOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -9028,6 +9089,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const SelectV2Options *builtin_options_as_SelectV2Options() const {
     return builtin_options_type() == BuiltinOptions_SelectV2Options ? static_cast<const SelectV2Options *>(builtin_options()) : nullptr;
   }
+  const DensifyOptions *builtin_options_as_DensifyOptions() const {
+    return builtin_options_type() == BuiltinOptions_DensifyOptions ? static_cast<const DensifyOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -9456,6 +9520,10 @@ template<> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Op
   return builtin_options_as_SelectV2Options();
 }
 
+template<> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const {
+  return builtin_options_as_DensifyOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -12727,6 +12795,29 @@ inline flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(flatbuffers::F
       _fbb);
 }
 
+inline DensifyOptionsT *DensifyOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new DensifyOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void DensifyOptions::UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<DensifyOptions> DensifyOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDensifyOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DensifyOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateDensifyOptions(
+      _fbb);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -13412,6 +13503,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return true;
   }
 }
@@ -13822,6 +13917,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -14220,6 +14319,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const SelectV2OptionsT *>(value);
       return CreateSelectV2Options(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<const DensifyOptionsT *>(value);
+      return CreateDensifyOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -14618,6 +14721,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new SelectV2OptionsT(*reinterpret_cast<SelectV2OptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_DensifyOptions: {
+      value = new DensifyOptionsT(*reinterpret_cast<DensifyOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -15115,6 +15222,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<DensifyOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
diff --git a/tensorflow/lite/testdata/sparse_tensor.bin b/tensorflow/lite/testdata/sparse_tensor.bin
index d1445ac6480..497ce68a3ac 100644
Binary files a/tensorflow/lite/testdata/sparse_tensor.bin and b/tensorflow/lite/testdata/sparse_tensor.bin differ
diff --git a/tensorflow/lite/testdata/sparse_tensor.json b/tensorflow/lite/testdata/sparse_tensor.json
index 3e743c68c11..ce627e2bb2d 100644
--- a/tensorflow/lite/testdata/sparse_tensor.json
+++ b/tensorflow/lite/testdata/sparse_tensor.json
@@ -1,7 +1,6 @@
 {
   "version": 3,
-  "operator_codes": [
-  ],
+  "operator_codes": [{"builtin_code": "CUSTOM", "custom_code": "FakeOp"}],
   "subgraphs": [
     {
       "tensors": [
@@ -12,7 +11,7 @@
           ],
           "name": "sparse_tensor",
           "buffer": 1,
-          "type": "FLOAT32",
+          "type": "INT8",
           "quantization": {
           },
           "is_variable": "false",
@@ -41,12 +40,9 @@
           }
         }
       ],
-      "inputs": [
-      ],
-      "outputs": [
-      ],
-      "operators": [
-      ]
+      "inputs": [0],
+      "outputs": [0],
+      "operators": [{"inputs":[-1], "outputs":[-1]}]
     }
   ],
   "buffers": [
diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD
index 2e8d56d6a2f..a42228313f9 100644
--- a/tensorflow/lite/testing/BUILD
+++ b/tensorflow/lite/testing/BUILD
@@ -219,16 +219,18 @@ cc_library(
         ":join",
         ":split",
         ":test_runner",
+        "@com_google_absl//absl/strings",
         "//tensorflow/lite:builtin_op_data",
         "//tensorflow/lite:framework",
         "//tensorflow/lite:string_util",
-        "//tensorflow/lite/delegates/flex:delegate",
         "//tensorflow/lite/kernels:builtin_ops",
         "//tensorflow/lite/kernels:custom_ops",
         "//tensorflow/lite/kernels:reference_ops",
         "//tensorflow/lite/tools/evaluation:utils",
-        "@com_google_absl//absl/strings",
-    ],
+    ] + select({
+        "//tensorflow:ios": [],
+        "//conditions:default": ["//tensorflow/lite/delegates/flex:delegate"],
+    }),
 )
 
 tf_cc_test(
@@ -355,6 +357,7 @@ cc_library(
         ":join",
         ":split",
         ":tf_driver",
+        ":tflite_driver",
         "//tensorflow/lite:string",
     ] + select({
         "//conditions:default": [
@@ -403,6 +406,9 @@ cc_library(
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib",
         ],
+        "//tensorflow:ios": [
+            "//tensorflow/core:ios_tensorflow_lib",
+        ],
     }),
 )
 
@@ -435,6 +441,9 @@ cc_library(
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib",
         ],
+        "//tensorflow:ios": [
+            "//tensorflow/core:ios_tensorflow_lib",
+        ],
     }),
 )
 
diff --git a/tensorflow/lite/testing/generate_examples.py b/tensorflow/lite/testing/generate_examples.py
index 39a0ca48ae7..fd21d42d2d5 100644
--- a/tensorflow/lite/testing/generate_examples.py
+++ b/tensorflow/lite/testing/generate_examples.py
@@ -29,7 +29,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 import tensorflow as tf
 import argparse
 import os
diff --git a/tensorflow/lite/testing/generate_examples_lib.py b/tensorflow/lite/testing/generate_examples_lib.py
index 1d257e1f3c7..c974070b8f9 100644
--- a/tensorflow/lite/testing/generate_examples_lib.py
+++ b/tensorflow/lite/testing/generate_examples_lib.py
@@ -34,6 +34,7 @@ import datetime
 import os
 import re
 import zipfile
+
 import tensorflow as tf
 
 # TODO(aselle): Disable GPU for now
diff --git a/tensorflow/lite/testing/generate_testspec.cc b/tensorflow/lite/testing/generate_testspec.cc
index 99021c9f317..e7435e19f49 100644
--- a/tensorflow/lite/testing/generate_testspec.cc
+++ b/tensorflow/lite/testing/generate_testspec.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/lite/testing/join.h"
 #include "tensorflow/lite/testing/split.h"
 #include "tensorflow/lite/testing/tf_driver.h"
+#include "tensorflow/lite/testing/tflite_driver.h"
 
 namespace tflite {
 namespace testing {
@@ -83,6 +84,68 @@ std::vector<string> GenerateInputValues(
   return input_values;
 }
 
+bool GenerateTestSpecFromRunner(std::iostream& stream, int num_invocations,
+                                const std::vector<string>& input_layer,
+                                const std::vector<string>& input_layer_type,
+                                const std::vector<string>& input_layer_shape,
+                                const std::vector<string>& output_layer,
+                                TestRunner* runner) {
+  stream << "reshape {\n";
+  for (const auto& shape : input_layer_shape) {
+    stream << "  input: \"" << shape << "\"\n";
+  }
+  stream << "}\n";
+
+  // Generate inputs.
+  std::mt19937 random_engine;
+  for (int i = 0; i < num_invocations; ++i) {
+    // Note that the input values are random, so each invocation will have a
+    // different set.
+    std::vector<string> input_values = GenerateInputValues(
+        &random_engine, input_layer, input_layer_type, input_layer_shape);
+    if (input_values.empty()) {
+      std::cerr << "Unable to generate input values for the TensorFlow model. "
+                   "Make sure the correct values are defined for "
+                   "input_layer, input_layer_type, and input_layer_shape."
+                << std::endl;
+      return false;
+    }
+
+    // Run TensorFlow.
+    auto inputs = runner->GetInputs();
+    for (int j = 0; j < input_values.size(); j++) {
+      runner->SetInput(inputs[j], input_values[j]);
+      if (!runner->IsValid()) {
+        std::cerr << runner->GetErrorMessage() << std::endl;
+        return false;
+      }
+    }
+
+    runner->Invoke();
+    if (!runner->IsValid()) {
+      std::cerr << runner->GetErrorMessage() << std::endl;
+      return false;
+    }
+
+    // Write second part of test spec, with inputs and outputs.
+    stream << "invoke {\n";
+    for (const auto& value : input_values) {
+      stream << "  input: \"" << value << "\"\n";
+    }
+    auto outputs = runner->GetOutputs();
+    for (int j = 0; j < output_layer.size(); j++) {
+      stream << "  output: \"" << runner->ReadOutput(outputs[j]) << "\"\n";
+      if (!runner->IsValid()) {
+        std::cerr << runner->GetErrorMessage() << std::endl;
+        return false;
+      }
+    }
+    stream << "}\n";
+  }
+
+  return true;
+}
+
 }  // namespace
 
 bool GenerateTestSpecFromTensorflowModel(
@@ -108,61 +171,29 @@ bool GenerateTestSpecFromTensorflowModel(
     std::cerr << runner.GetErrorMessage() << std::endl;
     return false;
   }
-
   // Write first part of test spec, defining model and input shapes.
   stream << "load_model: " << tflite_model_path << "\n";
-  stream << "reshape {\n";
-  for (const auto& shape : input_layer_shape) {
-    stream << "  input: \"" << shape << "\"\n";
+  return GenerateTestSpecFromRunner(stream, num_invocations, input_layer,
+                                    input_layer_type, input_layer_shape,
+                                    output_layer, &runner);
+}
+
+bool GenerateTestSpecFromTFLiteModel(
+    std::iostream& stream, const string& tflite_model_path, int num_invocations,
+    const std::vector<string>& input_layer,
+    const std::vector<string>& input_layer_type,
+    const std::vector<string>& input_layer_shape,
+    const std::vector<string>& output_layer) {
+  TfLiteDriver runner;
+  runner.LoadModel(tflite_model_path);
+  if (!runner.IsValid()) {
+    std::cerr << runner.GetErrorMessage() << std::endl;
+    return false;
   }
-  stream << "}\n";
-
-  // Generate inputs.
-  std::mt19937 random_engine;
-  for (int i = 0; i < num_invocations; ++i) {
-    // Note that the input values are random, so each invocation will have a
-    // different set.
-    std::vector<string> input_values = GenerateInputValues(
-        &random_engine, input_layer, input_layer_type, input_layer_shape);
-    if (input_values.empty()) {
-      std::cerr << "Unable to generate input values for the TensorFlow model. "
-                   "Make sure the correct values are defined for "
-                   "input_layer, input_layer_type, and input_layer_shape."
-                << std::endl;
-      return false;
-    }
-
-    // Run TensorFlow.
-    for (int j = 0; j < input_values.size(); j++) {
-      runner.SetInput(j, input_values[j]);
-      if (!runner.IsValid()) {
-        std::cerr << runner.GetErrorMessage() << std::endl;
-        return false;
-      }
-    }
-
-    runner.Invoke();
-    if (!runner.IsValid()) {
-      std::cerr << runner.GetErrorMessage() << std::endl;
-      return false;
-    }
-
-    // Write second part of test spec, with inputs and outputs.
-    stream << "invoke {\n";
-    for (const auto& value : input_values) {
-      stream << "  input: \"" << value << "\"\n";
-    }
-    for (int j = 0; j < output_layer.size(); j++) {
-      stream << "  output: \"" << runner.ReadOutput(j) << "\"\n";
-      if (!runner.IsValid()) {
-        std::cerr << runner.GetErrorMessage() << std::endl;
-        return false;
-      }
-    }
-    stream << "}\n";
-  }
-
-  return true;
+  runner.AllocateTensors();
+  return GenerateTestSpecFromRunner(stream, num_invocations, input_layer,
+                                    input_layer_type, input_layer_shape,
+                                    output_layer, &runner);
 }
 
 }  // namespace testing
diff --git a/tensorflow/lite/testing/generate_testspec.h b/tensorflow/lite/testing/generate_testspec.h
index 58f8065972b..79d0114ce8e 100644
--- a/tensorflow/lite/testing/generate_testspec.h
+++ b/tensorflow/lite/testing/generate_testspec.h
@@ -46,6 +46,14 @@ bool GenerateTestSpecFromTensorflowModel(
     const std::vector<string>& input_layer_shape,
     const std::vector<string>& output_layer);
 
+// Generate test spec by executing TFLite model on random inputs.
+bool GenerateTestSpecFromTFLiteModel(
+    std::iostream& stream, const string& tflite_model_path, int num_invocations,
+    const std::vector<string>& input_layer,
+    const std::vector<string>& input_layer_type,
+    const std::vector<string>& input_layer_shape,
+    const std::vector<string>& output_layer);
+
 // Generates random values that are filled into the tensor.
 template <typename T, typename RandomFunction>
 std::vector<T> GenerateRandomTensor(const std::vector<int>& shape,
diff --git a/tensorflow/lite/testing/generated_examples_zip_test.cc b/tensorflow/lite/testing/generated_examples_zip_test.cc
index d1b3d267eba..8179fbfa2be 100644
--- a/tensorflow/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/lite/testing/generated_examples_zip_test.cc
@@ -109,9 +109,6 @@ const std::map<string, string>& GetKnownBrokenTests() {
       {R"(^\/floor_div.*dtype=tf\.int64)", "119126484"},
       {R"(^\/squared_difference.*dtype=tf\.int64)", "119126484"},
 
-      // Select kernel doesn't support broadcasting yet.
-      {R"(^\/where.*1,2,3,1)", "134692786"},
-
       // Strided slice doesn't support ellipsis.
       {R"(strided_slice.*Ellipsis)", "138098220"},
   });
diff --git a/tensorflow/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
index 6d050eb2791..30d102c4fd9 100644
--- a/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 from six import PY3
 
diff --git a/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py
index 3414903da67..3f445aa8e98 100644
--- a/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py
+++ b/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 import tempfile
+
 import numpy as np
 
 from tensorflow.lite.python import lite
diff --git a/tensorflow/lite/testing/op_tests/transpose_conv.py b/tensorflow/lite/testing/op_tests/transpose_conv.py
index f403ba04254..7919763ea03 100644
--- a/tensorflow/lite/testing/op_tests/transpose_conv.py
+++ b/tensorflow/lite/testing/op_tests/transpose_conv.py
@@ -66,6 +66,16 @@ def make_transpose_conv_tests(options):
           "channel_multiplier": [1],
           "output_shape": [[1, 6, 6, 2]],
           "fully_quantize": [True]
+      },
+      {
+          "input_shape": [[1, 4, 3, 1]],
+          "filter_size": [[3, 3, 2, 1]],
+          "strides": [[1, 2, 2, 1]],
+          "padding": ["SAME"],
+          "data_format": ["NHWC"],
+          "channel_multiplier": [1],
+          "output_shape": [[1, 8, 6, 2]],
+          "fully_quantize": [True]
       }
   ]
 
diff --git a/tensorflow/lite/testing/op_tests/unpack.py b/tensorflow/lite/testing/op_tests/unpack.py
index c40874859ef..0b594442654 100644
--- a/tensorflow/lite/testing/op_tests/unpack.py
+++ b/tensorflow/lite/testing/op_tests/unpack.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 import tensorflow as tf
 from tensorflow.lite.testing.zip_test_utils import create_tensor_data
 from tensorflow.lite.testing.zip_test_utils import make_zip_of_tests
@@ -31,6 +30,7 @@ def make_unpack_tests(options):
   test_parameters = [{
       "base_shape": [[3, 4, 3], [3, 4], [5, 6, 7, 8]],
       "axis": [0, 1, 2, 3],
+      "dtype": [tf.int32, tf.bool, tf.float32],
   }]
 
   def get_valid_axis(parameters):
@@ -43,12 +43,15 @@ def make_unpack_tests(options):
 
   def build_graph(parameters):
     input_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, name=("input"), shape=parameters["base_shape"])
+        dtype=parameters["dtype"],
+        name=("input"),
+        shape=parameters["base_shape"])
     outs = tf.unstack(input_tensor, axis=get_valid_axis(parameters))
     return [input_tensor], [outs[0]]
 
   def build_inputs(parameters, sess, inputs, outputs):
-    input_value = create_tensor_data(np.float32, shape=parameters["base_shape"])
+    input_value = create_tensor_data(
+        parameters["dtype"], shape=parameters["base_shape"])
     return [input_value], sess.run(
         outputs, feed_dict=dict(zip(inputs, [input_value])))
 
diff --git a/tensorflow/lite/testing/op_tests/where.py b/tensorflow/lite/testing/op_tests/where.py
index e5145d7e68b..06a54054a22 100644
--- a/tensorflow/lite/testing/op_tests/where.py
+++ b/tensorflow/lite/testing/op_tests/where.py
@@ -33,11 +33,6 @@ def make_where_tests(options):
           "input_shape_set": [([1, 2, 3, 4], [1, 2, 3, 4]),],
           "use_where_v2": [False, True],
       },
-      {
-          "input_dtype": [tf.float32, tf.int32],
-          "input_shape_set": [([1, 2, 3, 4], [1, 2, 3, 1]),],
-          "use_where_v2": [True],
-      },
   ]
 
   def build_graph(parameters):
diff --git a/tensorflow/lite/testing/tflite_diff_util.cc b/tensorflow/lite/testing/tflite_diff_util.cc
index 721830adc4d..2e628fd710d 100644
--- a/tensorflow/lite/testing/tflite_diff_util.cc
+++ b/tensorflow/lite/testing/tflite_diff_util.cc
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/lite/testing/tflite_diff_util.h"
+
 #include <cstdarg>
 #include <cstdio>
 #include <cstdlib>
@@ -19,11 +21,27 @@ limitations under the License.
 
 #include "tensorflow/lite/testing/generate_testspec.h"
 #include "tensorflow/lite/testing/parse_testdata.h"
-#include "tensorflow/lite/testing/tflite_diff_util.h"
 #include "tensorflow/lite/testing/tflite_driver.h"
 
 namespace tflite {
 namespace testing {
+namespace {
+bool SingleRunDiffTestWithProvidedRunner(::tflite::testing::DiffOptions options,
+                                         int num_invocations,
+                                         TestRunner* (*runner_factory)()) {
+  std::stringstream tflite_stream;
+  if (!GenerateTestSpecFromTFLiteModel(
+          tflite_stream, options.tflite_model, num_invocations,
+          options.input_layer, options.input_layer_type,
+          options.input_layer_shape, options.output_layer)) {
+    return false;
+  }
+
+  std::unique_ptr<TestRunner> runner(runner_factory());
+  runner->LoadModel(options.tflite_model);
+  return ParseAndRunTests(&tflite_stream, runner.get());
+}
+}  // namespace
 
 bool RunDiffTest(const DiffOptions& options, int num_invocations) {
   std::stringstream tflite_stream;
@@ -35,7 +53,39 @@ bool RunDiffTest(const DiffOptions& options, int num_invocations) {
   }
   TfLiteDriver tflite_driver(options.delegate);
   tflite_driver.LoadModel(options.tflite_model);
-  return tflite::testing::ParseAndRunTests(&tflite_stream, &tflite_driver);
+  return ParseAndRunTests(&tflite_stream, &tflite_driver);
+}
+
+bool RunDiffTestWithProvidedRunner(const tflite::testing::DiffOptions& options,
+                                   TestRunner* (*runner_factory)()) {
+  int failure_count = 0;
+  for (int i = 0; i < options.num_runs_per_pass; i++) {
+    if (!SingleRunDiffTestWithProvidedRunner(options,
+                                             /*num_invocations=*/1,
+                                             runner_factory)) {
+      ++failure_count;
+    }
+  }
+  int failures_in_first_pass = failure_count;
+
+  if (failure_count == 0) {
+    // Let's try again with num_invocations > 1 to make sure we can do multiple
+    // invocations without resetting the interpreter.
+    for (int i = 0; i < options.num_runs_per_pass; i++) {
+      if (!SingleRunDiffTestWithProvidedRunner(options,
+                                               /*num_invocations=*/2,
+                                               runner_factory)) {
+        ++failure_count;
+      }
+    }
+  }
+
+  fprintf(stderr, "Num errors in single-inference pass: %d\n",
+          failures_in_first_pass);
+  fprintf(stderr, "Num errors in multi-inference pass : %d\n",
+          failure_count - failures_in_first_pass);
+
+  return failure_count == 0;
 }
 }  // namespace testing
 
diff --git a/tensorflow/lite/testing/tflite_diff_util.h b/tensorflow/lite/testing/tflite_diff_util.h
index 362bc64a6bc..3cf4342b810 100644
--- a/tensorflow/lite/testing/tflite_diff_util.h
+++ b/tensorflow/lite/testing/tflite_diff_util.h
@@ -52,6 +52,14 @@ struct DiffOptions {
 // Run a single TensorFLow Lite diff test with a given options.
 bool RunDiffTest(const DiffOptions& options, int num_invocations);
 
+// Runs diff test for custom TestRunner identified by the factory methiodd
+// 'runner_factory' against TFLite CPU given 'options' 'runner_factory' should
+// return instance of TestRunner, caller will take ownership of the returned
+// object.
+// Function returns True if test pass, false otherwise.
+bool RunDiffTestWithProvidedRunner(const tflite::testing::DiffOptions& options,
+                                   TestRunner* (*runner_factory)());
+
 }  // namespace testing
 }  // namespace tflite
 
diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc
index 3d988eb624a..9aeba87bbea 100644
--- a/tensorflow/lite/testing/tflite_driver.cc
+++ b/tensorflow/lite/testing/tflite_driver.cc
@@ -18,9 +18,12 @@ limitations under the License.
 #include <complex>
 #include <memory>
 #include <vector>
+
 #include "absl/strings/escaping.h"
 #include "tensorflow/lite/builtin_op_data.h"
+#if !defined(__APPLE__)
 #include "tensorflow/lite/delegates/flex/delegate.h"
+#endif
 #include "tensorflow/lite/kernels/custom_ops_register.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/kernels/register_ref.h"
@@ -331,10 +334,12 @@ TfLiteDriver::TfLiteDriver(DelegateType delegate_type, bool reference_kernel)
       delegate_ = evaluation::CreateGPUDelegate(/*model=*/nullptr);
       break;
     case DelegateType::kFlex:
+#if !defined(__APPLE__)
       delegate_ = Interpreter::TfLiteDelegatePtr(
           FlexDelegate::Create().release(), [](TfLiteDelegate* delegate) {
             delete static_cast<tflite::FlexDelegate*>(delegate);
           });
+#endif
       break;
   }
 }
diff --git a/tensorflow/lite/testing/tflite_driver.h b/tensorflow/lite/testing/tflite_driver.h
index 258902606a5..bce3e9c4c01 100644
--- a/tensorflow/lite/testing/tflite_driver.h
+++ b/tensorflow/lite/testing/tflite_driver.h
@@ -18,7 +18,9 @@ limitations under the License.
 #include <map>
 #include <memory>
 
+#if !defined(__APPLE__)
 #include "tensorflow/lite/delegates/flex/delegate.h"
+#endif
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/kernels/register_ref.h"
diff --git a/tensorflow/lite/testing/zip_test_utils.py b/tensorflow/lite/testing/zip_test_utils.py
index 459b72bae60..3d380ff0385 100644
--- a/tensorflow/lite/testing/zip_test_utils.py
+++ b/tensorflow/lite/testing/zip_test_utils.py
@@ -25,6 +25,7 @@ import re
 import string
 import traceback
 import zipfile
+
 import numpy as np
 from six import StringIO
 
diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc
index dd7a9e3d835..26ce2afd802 100644
--- a/tensorflow/lite/toco/import_tensorflow.cc
+++ b/tensorflow/lite/toco/import_tensorflow.cc
@@ -1194,7 +1194,11 @@ tensorflow::Status ConvertSoftmaxOperator(
   softmax->outputs.push_back(node.name());
   // TensorFlow's Softmax doesn't seem to admit a 'beta' parameter.
   CHECK(!node.attr().count("beta"));  // Stab in the dark, just in case.
-  softmax->beta = 1.f;
+  if (node.attr().count("_softmax_beta")) {
+    softmax->beta = GetFloatAttr(node, "_softmax_beta");
+  } else {
+    softmax->beta = 1.f;
+  }
   model->operators.emplace_back(softmax);
   return tensorflow::Status::OK();
 }
@@ -2235,7 +2239,6 @@ bool InlineAllFunctions(GraphDef* graphdef) {
   tensorflow::FunctionLibraryDefinition fld(tensorflow::OpRegistry::Global(),
                                             graphdef_copy.library());
   tensorflow::StaticDeviceMgr device_mgr(std::move(devices));
-  tensorflow::OptimizerOptions o_opts;
   tensorflow::ProcessFunctionLibraryRuntime pflr(
       &device_mgr, tensorflow::Env::Default(), &options.config,
       TF_GRAPH_DEF_VERSION, &fld,
diff --git a/tensorflow/lite/toco/import_tensorflow_test.cc b/tensorflow/lite/toco/import_tensorflow_test.cc
index 3e0c530290b..eb6ed3fdd74 100644
--- a/tensorflow/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/lite/toco/import_tensorflow_test.cc
@@ -186,6 +186,43 @@ TEST(FlexImportTest, ConditionalConst) {
   EXPECT_FALSE(model.HasArray("BadType"));
 }
 
+TEST(FlexImportTest, SoftmaxWithBeta) {
+  NodeDef node;
+  node.set_op("Softmax");
+  node.set_name("softmax");
+  node.add_input();
+  node.set_input(0, "logits");
+
+  AttrValue dtype_attr;
+  SetAttrValue(0.5, &dtype_attr);
+  (*node.mutable_attr())["_softmax_beta"] = dtype_attr;
+  Model model;
+  EXPECT_TRUE(ImportNode(node, &model).ok());
+
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kSoftmax);
+  const SoftmaxOperator* op =
+      static_cast<const SoftmaxOperator*>(model.operators[0].get());
+  EXPECT_EQ(op->beta, 0.5);
+}
+
+TEST(FlexImportTest, SoftmaxWithoutBeta) {
+  NodeDef node;
+  node.set_op("Softmax");
+  node.set_name("softmax");
+  node.add_input();
+  node.set_input(0, "logits");
+
+  Model model;
+  EXPECT_TRUE(ImportNode(node, &model).ok());
+
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kSoftmax);
+  const SoftmaxOperator* op =
+      static_cast<const SoftmaxOperator*>(model.operators[0].get());
+  EXPECT_EQ(op->beta, 1.0);
+}
+
 class ShapeImportTest : public ::testing::TestWithParam<tensorflow::DataType> {
 };
 
diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc
index 241048f8797..456d8773805 100644
--- a/tensorflow/lite/toco/tflite/op_version.cc
+++ b/tensorflow/lite/toco/tflite/op_version.cc
@@ -168,6 +168,8 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kOneHot, 1}, "1.11.0"},
           {{OperatorType::kCTCBeamSearchDecoder, 1}, "1.11.0"},
           {{OperatorType::kUnpack, 1}, "1.11.0"},
+          {{OperatorType::kUnpack, 2}, "1.14.0"},
+          {{OperatorType::kUnpack, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kLeakyRelu, 1}, "1.13.1"},
           {{OperatorType::kLogistic, 1}, "1.14.0"},
           {{OperatorType::kLogistic, 2}, "1.14.0"},
diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc
index f98a621b362..f106e4ca670 100644
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc
@@ -1349,6 +1349,21 @@ class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
     op->num = options.num();
     op->axis = options.axis();
   }
+
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // If the op take int8/uint8 input, it is version 2.
+    if (input_array.data_type == ArrayDataType::kInt8 ||
+        input_array.data_type == ArrayDataType::kUint8) {
+      return 2;
+    }
+    // If the op take bool input, it is version 3.
+    if (input_array.data_type == ArrayDataType::kBool) {
+      return 3;
+    }
+    return 1;
+  }
 };
 
 class LeakyRelu
diff --git a/tensorflow/lite/tools/BUILD b/tensorflow/lite/tools/BUILD
index 914866b9f16..da4fcf9f0f5 100644
--- a/tensorflow/lite/tools/BUILD
+++ b/tensorflow/lite/tools/BUILD
@@ -84,6 +84,9 @@ cc_test(
     name = "verifier_test",
     size = "small",
     srcs = ["verifier_test.cc"],
+    data = [
+        "//tensorflow/lite:testdata/sparse_tensor.bin",
+    ],
     tags = [
         "tflite_not_portable",
     ],
diff --git a/tensorflow/lite/tools/accuracy/csv_writer.h b/tensorflow/lite/tools/accuracy/csv_writer.h
index 85c0f5c2044..e8f298fd211 100644
--- a/tensorflow/lite/tools/accuracy/csv_writer.h
+++ b/tensorflow/lite/tools/accuracy/csv_writer.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_LITE_TOOLS_ACCURACY_CSV_WRITER_H_
 
 #include <fstream>
+#include <memory>
 #include <vector>
 
 #include "tensorflow/core/platform/logging.h"
@@ -28,15 +29,16 @@ namespace metrics {
 // columns. This supports a very limited set of CSV spec and doesn't do any
 // escaping.
 // Usage:
-// std::ofstream * output_stream = ...
-// CSVWriter writer({"column1", "column2"}, output_stream);
+// std::unqiue_str<std::ofstream> output_stream = ...
+// CSVWriter writer({"column1", "column2"}, std::move(output_stream));
 // writer.WriteRow({4, 5});
 // writer.Flush(); // flush results immediately.
 class CSVWriter {
  public:
-  CSVWriter(const std::vector<string>& columns, std::ofstream* output_stream)
-      : num_columns_(columns.size()), output_stream_(output_stream) {
-    if (WriteRow(columns, output_stream_) != kTfLiteOk) {
+  CSVWriter(const std::vector<string>& columns,
+            std::unique_ptr<std::ofstream> output_stream)
+      : num_columns_(columns.size()), output_stream_(std::move(output_stream)) {
+    if (WriteRow(columns, output_stream_.get()) != kTfLiteOk) {
       LOG(ERROR) << "Could not write column names to file";
     }
   }
@@ -48,7 +50,7 @@ class CSVWriter {
                  << " expected: " << num_columns_;
       return kTfLiteError;
     }
-    return WriteRow(values, output_stream_);
+    return WriteRow(values, output_stream_.get());
   }
 
   void Flush() { output_stream_->flush(); }
@@ -76,7 +78,7 @@ class CSVWriter {
     return kTfLiteOk;
   }
   const size_t num_columns_;
-  std::ofstream* output_stream_;
+  std::unique_ptr<std::ofstream> output_stream_;
 };
 }  // namespace metrics
 }  // namespace tensorflow
diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/BUILD b/tensorflow/lite/tools/accuracy/ilsvrc/BUILD
index d0e5810351d..9af47e2c1d7 100644
--- a/tensorflow/lite/tools/accuracy/ilsvrc/BUILD
+++ b/tensorflow/lite/tools/accuracy/ilsvrc/BUILD
@@ -33,9 +33,10 @@ cc_library(
     ],
 )
 
-cc_binary(
-    name = "imagenet_accuracy_eval",
+cc_library(
+    name = "imagenet_accuracy_eval_lib",
     srcs = ["imagenet_accuracy_eval.cc"],
+    hdrs = ["imagenet_accuracy_eval.h"],
     copts = tflite_copts(),
     linkopts = common_linkopts,
     deps = [
@@ -43,9 +44,19 @@ cc_binary(
         "//tensorflow/core:tflite_portable_logging",
         "//tensorflow/lite/c:common",
         "//tensorflow/lite/profiling:time",
-        "//tensorflow/lite/tools:command_line_flags",
         "//tensorflow/lite/tools/accuracy:csv_writer",
         "//tensorflow/lite/tools/evaluation/proto:evaluation_stages_cc_proto",
         "@com_google_absl//absl/memory",
     ],
 )
+
+cc_binary(
+    name = "imagenet_accuracy_eval",
+    srcs = ["imagenet_accuracy_eval_main.cc"],
+    copts = tflite_copts(),
+    linkopts = common_linkopts,
+    deps = [
+        ":imagenet_accuracy_eval_lib",
+        "//tensorflow/lite/tools:command_line_flags",
+    ],
+)
diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.cc b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.cc
index ea4805a2f58..eb1ad42e8e0 100644
--- a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.cc
+++ b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.cc
@@ -13,12 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.h"
+
 #include <cstdlib>
 #include <iomanip>
 #include <memory>
-#include <mutex>  // NOLINT(build/c++11)
-#include <ostream>
-#include <string>
 
 #include "absl/memory/memory.h"
 #include "tensorflow/core/platform/logging.h"
@@ -26,46 +25,56 @@ limitations under the License.
 #include "tensorflow/lite/profiling/time.h"
 #include "tensorflow/lite/tools/accuracy/csv_writer.h"
 #include "tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.h"
-#include "tensorflow/lite/tools/command_line_flags.h"
 #include "tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h"
 
 namespace tensorflow {
 namespace metrics {
 
-namespace {
-
 using ::tflite::evaluation::TopkAccuracyEvalMetrics;
 
-constexpr char kNumThreadsFlag[] = "num_threads";
-constexpr char kOutputFilePathFlag[] = "output_file_path";
-constexpr char kProtoOutputFilePathFlag[] = "proto_output_file_path";
+ResultsWriter::ResultsWriter(int top_k, const std::string& output_file_path)
+    : top_k_(top_k) {
+  if (output_file_path.empty()) {
+    LOG(ERROR) << "Empty output file path.";
+    return;
+  }
 
-// TODO(b/130823599): Move to tools/evaluation/stages/topk_accuracy_eval_stage.
-// Computes total number of images processed & aggregates Top-K accuracies
-// into 'accuracies'.
-void AggregateAccuraciesAndNumImages(
-    int k,
-    const std::unordered_map<uint64_t, TopkAccuracyEvalMetrics>&
-        shard_id_accuracy_metrics_map,
-    const std::unordered_map<uint64_t, int>& shard_id_done_image_count_map,
+  std::unique_ptr<std::ofstream> output_stream(
+      new std::ofstream(output_file_path, std::ios::out));
+  if (!output_stream) {
+    LOG(ERROR) << "Unable to open output file path: '" << output_file_path
+               << "'";
+  }
+
+  (*output_stream) << std::setprecision(3) << std::fixed;
+  std::vector<string> columns;
+  columns.reserve(top_k);
+  for (int i = 0; i < top_k; i++) {
+    std::string column_name = "Top ";
+    column_name = column_name + std::to_string(i + 1);
+    columns.push_back(column_name);
+  }
+
+  writer_.reset(new CSVWriter(columns, std::move(output_stream)));
+}
+
+void ResultsWriter::AggregateAccuraciesAndNumImages(
     std::vector<double>* accuracies, int* num_done_images) {
   // Total images done.
   *num_done_images = 0;
-  for (auto iter = shard_id_done_image_count_map.begin();
-       iter != shard_id_done_image_count_map.end(); ++iter) {
-    *num_done_images += iter->second;
+  for (const auto entry : shard_id_done_image_count_map_) {
+    *num_done_images += entry.second;
   }
 
   // Aggregated accuracies.
-  for (int i = 0; i < k; ++i) {
+  for (int i = 0; i < top_k_; ++i) {
     double correct_inferences = 0;
     double total_inferences = 0;
-    for (auto iter = shard_id_done_image_count_map.begin();
-         iter != shard_id_done_image_count_map.end(); ++iter) {
-      const uint64_t shard_id = iter->first;
+    for (const auto entry : shard_id_done_image_count_map_) {
+      const uint64_t shard_id = entry.first;
       const TopkAccuracyEvalMetrics& accuracy_metrics =
-          shard_id_accuracy_metrics_map.at(shard_id);
-      const int num_images = iter->second;
+          shard_id_accuracy_metrics_map_.at(shard_id);
+      const int num_images = entry.second;
       correct_inferences += num_images * accuracy_metrics.topk_accuracies(i);
       total_inferences += num_images;
     }
@@ -74,40 +83,6 @@ void AggregateAccuraciesAndNumImages(
   }
 }
 
-}  // namespace
-
-// Writes results to a CSV file & logs progress to standard output with
-// `kLogDelayUs` microseconds.
-class ResultsWriter : public ImagenetModelEvaluator::Observer {
- public:
-  explicit ResultsWriter(int k, std::unique_ptr<CSVWriter> writer)
-      : k_(k), writer_(std::move(writer)) {}
-
-  void OnEvaluationStart(const std::unordered_map<uint64_t, int>&
-                             shard_id_image_count_map) override;
-
-  void OnSingleImageEvaluationComplete(uint64_t shard_id,
-                                       const TopkAccuracyEvalMetrics& metrics,
-                                       const string& image) override;
-
-  TopkAccuracyEvalMetrics AggregatedMetrics();
-
- private:
-  // For writing to CSV.
-  int k_;
-  std::unordered_map<uint64_t, TopkAccuracyEvalMetrics>
-      shard_id_accuracy_metrics_map_;
-  std::unordered_map<uint64_t, int> shard_id_done_image_count_map_;
-  std::unique_ptr<CSVWriter> writer_;
-
-  // For logging to stdout.
-  uint64_t last_logged_time_us_ = 0;
-  int total_num_images_;
-  static constexpr int kLogDelayUs = 500 * 1000;
-
-  std::mutex mu_;
-};
-
 void ResultsWriter::OnEvaluationStart(
     const std::unordered_map<uint64_t, int>& shard_id_image_count_map) {
   int total_num_images = 0;
@@ -129,9 +104,7 @@ void ResultsWriter::OnSingleImageEvaluationComplete(
 
   int num_evaluated;
   std::vector<double> total_accuracies;
-  AggregateAccuraciesAndNumImages(k_, shard_id_accuracy_metrics_map_,
-                                  shard_id_done_image_count_map_,
-                                  &total_accuracies, &num_evaluated);
+  AggregateAccuraciesAndNumImages(&total_accuracies, &num_evaluated);
   if (writer_->WriteRow(total_accuracies) != kTfLiteOk) {
     LOG(ERROR) << "Could not write to file";
     return;
@@ -152,9 +125,7 @@ TopkAccuracyEvalMetrics ResultsWriter::AggregatedMetrics() {
   std::lock_guard<std::mutex> lock(mu_);
   int num_evaluated;
   std::vector<double> total_accuracies;
-  AggregateAccuraciesAndNumImages(k_, shard_id_accuracy_metrics_map_,
-                                  shard_id_done_image_count_map_,
-                                  &total_accuracies, &num_evaluated);
+  AggregateAccuraciesAndNumImages(&total_accuracies, &num_evaluated);
   TopkAccuracyEvalMetrics aggregated_metrics;
   for (auto accuracy : total_accuracies) {
     aggregated_metrics.add_topk_accuracies(accuracy);
@@ -162,74 +133,39 @@ TopkAccuracyEvalMetrics ResultsWriter::AggregatedMetrics() {
   return aggregated_metrics;
 }
 
-int Main(int argc, char* argv[]) {
-  std::string output_file_path, proto_output_file_path;
-  int num_threads = 4;
-  std::vector<tflite::Flag> flag_list = {
-      tflite::Flag::CreateFlag(kNumThreadsFlag, &num_threads,
-                               "Number of threads."),
-      tflite::Flag::CreateFlag(kOutputFilePathFlag, &output_file_path,
-                               "Path to output file."),
-      tflite::Flag::CreateFlag(kProtoOutputFilePathFlag,
-                               &proto_output_file_path,
-                               "Path to proto output file."),
-  };
-  tflite::Flags::Parse(&argc, const_cast<const char**>(argv), flag_list);
-
-  std::unique_ptr<ImagenetModelEvaluator> evaluator;
-  if (output_file_path.empty()) {
-    LOG(ERROR) << "Invalid output file path.";
-    return EXIT_FAILURE;
-  }
-
-  if (num_threads <= 0) {
-    LOG(ERROR) << "Invalid number of threads.";
-    return EXIT_FAILURE;
-  }
-
-  if (ImagenetModelEvaluator::Create(argc, argv, num_threads, &evaluator) !=
-      kTfLiteOk)
-    return EXIT_FAILURE;
-
-  std::ofstream output_stream(output_file_path, std::ios::out);
-  if (!output_stream) {
-    LOG(ERROR) << "Unable to open output file path: '" << output_file_path
-               << "'";
-  }
-
-  output_stream << std::setprecision(3) << std::fixed;
-  std::vector<string> columns;
-  columns.reserve(evaluator->params().num_ranks);
-  for (int i = 0; i < evaluator->params().num_ranks; i++) {
-    std::string column_name = "Top ";
-    column_name = column_name + std::to_string(i + 1);
-    columns.push_back(column_name);
-  }
-
-  ResultsWriter results_writer(
-      evaluator->params().num_ranks,
-      absl::make_unique<CSVWriter>(columns, &output_stream));
-  evaluator->AddObserver(&results_writer);
-  LOG(ERROR) << "Starting evaluation with: " << num_threads << " threads.";
-  if (evaluator->EvaluateModel() != kTfLiteOk) {
-    LOG(ERROR) << "Failed to evaluate the model!";
-    return EXIT_FAILURE;
-  }
-
-  if (!proto_output_file_path.empty()) {
-    std::ofstream proto_out_file(proto_output_file_path,
+void ResultsWriter::OutputEvalMetriccProto(
+    const std::string& proto_output_file) {
+  if (!proto_output_file.empty()) {
+    std::ofstream proto_out_file(proto_output_file,
                                  std::ios::out | std::ios::binary);
-    TopkAccuracyEvalMetrics metrics = results_writer.AggregatedMetrics();
+    TopkAccuracyEvalMetrics metrics = AggregatedMetrics();
     proto_out_file << metrics.SerializeAsString();
     proto_out_file.close();
+    LOG(INFO) << "The result metrics proto is written to " << proto_output_file;
+  } else {
+    LOG(INFO) << "Metrics proto output file path is not specified!";
+  }
+}
+
+std::unique_ptr<ImagenetModelEvaluator> CreateImagenetModelEvaluator(
+    int* argc, char* argv[], int num_threads) {
+  std::unique_ptr<ImagenetModelEvaluator> evaluator;
+  if (ImagenetModelEvaluator::Create(argc, argv, num_threads, &evaluator) !=
+      kTfLiteOk) {
+    evaluator.reset(nullptr);
   }
 
-  return EXIT_SUCCESS;
+  return evaluator;
+}
+
+std::unique_ptr<ResultsWriter> CreateImagenetEvalResultsWriter(
+    int top_k, const std::string& output_file_path) {
+  std::unique_ptr<ResultsWriter> writer(
+      new ResultsWriter(top_k, output_file_path));
+  if (!writer->IsValid()) return nullptr;
+
+  return writer;
 }
 
 }  // namespace metrics
 }  // namespace tensorflow
-
-int main(int argc, char* argv[]) {
-  return tensorflow::metrics::Main(argc, argv);
-}
diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.h b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.h
new file mode 100644
index 00000000000..f764a6bb8b7
--- /dev/null
+++ b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.h
@@ -0,0 +1,83 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_TOOLS_ACCURACY_ILSVRC_IMAGENET_ACCURACY_EVAL_H_
+#define TENSORFLOW_LITE_TOOLS_ACCURACY_ILSVRC_IMAGENET_ACCURACY_EVAL_H_
+
+#include <memory>
+#include <mutex>  // NOLINT(build/c++11)
+#include <ostream>
+#include <string>
+
+#include "tensorflow/lite/tools/accuracy/csv_writer.h"
+#include "tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.h"
+
+namespace tensorflow {
+namespace metrics {
+
+// Writes topK accuracy results to a CSV file & logs progress to standard output
+// with `kLogDelayUs` microseconds.
+class ResultsWriter : public ImagenetModelEvaluator::Observer {
+ public:
+  ResultsWriter(int top_k, const std::string& output_file_path);
+
+  bool IsValid() const { return writer_ != nullptr; }
+
+  void OnEvaluationStart(const std::unordered_map<uint64_t, int>&
+                             shard_id_image_count_map) override;
+
+  void OnSingleImageEvaluationComplete(
+      uint64_t shard_id,
+      const tflite::evaluation::TopkAccuracyEvalMetrics& metrics,
+      const std::string& image) override;
+
+  tflite::evaluation::TopkAccuracyEvalMetrics AggregatedMetrics();
+
+  void OutputEvalMetriccProto(const std::string& proto_output_file);
+
+ private:
+  void AggregateAccuraciesAndNumImages(std::vector<double>* accuracies,
+                                       int* num_done_images);
+
+  int top_k_ = 0;
+  std::unordered_map<uint64_t, tflite::evaluation::TopkAccuracyEvalMetrics>
+      shard_id_accuracy_metrics_map_;
+  std::unordered_map<uint64_t, int> shard_id_done_image_count_map_;
+
+  std::unique_ptr<CSVWriter> writer_;
+
+  // For logging to stdout.
+  uint64_t last_logged_time_us_ = 0;
+  int total_num_images_ = 0;
+  static constexpr int kLogDelayUs = 500 * 1000;
+
+  std::mutex mu_;
+};
+
+// Create an evaluator by parsing command line arguments.
+// Note argc and argv will be updated accordingly as matching arguments will
+// be removed argv.
+std::unique_ptr<ImagenetModelEvaluator> CreateImagenetModelEvaluator(
+    int* argc, char* argv[],
+    int num_threads = 1  // the number of threads used for evaluation.
+);
+
+std::unique_ptr<ResultsWriter> CreateImagenetEvalResultsWriter(
+    int top_k, const std::string& output_file_path);
+
+}  // namespace metrics
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_LITE_TOOLS_ACCURACY_ILSVRC_IMAGENET_ACCURACY_EVAL_H_
diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval_main.cc b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval_main.cc
new file mode 100644
index 00000000000..af9bdf354fe
--- /dev/null
+++ b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval_main.cc
@@ -0,0 +1,70 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/tools/accuracy/ilsvrc/imagenet_accuracy_eval.h"
+#include "tensorflow/lite/tools/command_line_flags.h"
+
+namespace {
+constexpr char kNumThreadsFlag[] = "num_threads";
+constexpr char kOutputFilePathFlag[] = "output_file_path";
+constexpr char kProtoOutputFilePathFlag[] = "proto_output_file_path";
+}  // namespace
+
+int main(int argc, char* argv[]) {
+  std::string output_file_path, proto_output_file_path;
+  int num_threads = 4;
+  std::vector<tflite::Flag> flag_list = {
+      tflite::Flag::CreateFlag(kNumThreadsFlag, &num_threads,
+                               "Number of threads."),
+      tflite::Flag::CreateFlag(kOutputFilePathFlag, &output_file_path,
+                               "Path to output file."),
+      tflite::Flag::CreateFlag(kProtoOutputFilePathFlag,
+                               &proto_output_file_path,
+                               "Path to proto output file."),
+  };
+  tflite::Flags::Parse(&argc, const_cast<const char**>(argv), flag_list);
+
+  if (num_threads <= 0) {
+    LOG(ERROR) << "Invalid number of threads.";
+    return EXIT_FAILURE;
+  }
+
+  std::unique_ptr<tensorflow::metrics::ImagenetModelEvaluator> evaluator =
+      tensorflow::metrics::CreateImagenetModelEvaluator(&argc, argv,
+                                                        num_threads);
+
+  if (!evaluator) {
+    LOG(ERROR) << "Fail to create the ImagenetModelEvaluator.";
+    return EXIT_FAILURE;
+  }
+
+  std::unique_ptr<tensorflow::metrics::ResultsWriter> writer =
+      tensorflow::metrics::CreateImagenetEvalResultsWriter(
+          evaluator->params().num_ranks, output_file_path);
+  if (!writer) {
+    LOG(ERROR) << "Fail to create the ResultsWriter.";
+    return EXIT_FAILURE;
+  }
+
+  evaluator->AddObserver(writer.get());
+  LOG(ERROR) << "Starting evaluation with: " << num_threads << " threads.";
+  if (evaluator->EvaluateModel() != kTfLiteOk) {
+    LOG(ERROR) << "Failed to evaluate the model!";
+    return EXIT_FAILURE;
+  }
+
+  writer->OutputEvalMetriccProto(proto_output_file_path);
+  return EXIT_SUCCESS;
+}
diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc
index d44d5609056..0e0c7786cbf 100644
--- a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc
+++ b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.cc
@@ -106,7 +106,7 @@ class CompositeObserver : public ImagenetModelEvaluator::Observer {
 };
 
 /*static*/ TfLiteStatus ImagenetModelEvaluator::Create(
-    int argc, char* argv[], int num_threads,
+    int* argc, char* argv[], int num_threads,
     std::unique_ptr<ImagenetModelEvaluator>* model_evaluator) {
   Params params;
   params.number_of_images = 100;
@@ -147,7 +147,7 @@ class CompositeObserver : public ImagenetModelEvaluator::Observer {
                                "Generates the top-1 to top-k accuracy values"
                                "where k = num_ranks. Default: 10"),
   };
-  tflite::Flags::Parse(&argc, const_cast<const char**>(argv), flag_list);
+  tflite::Flags::Parse(argc, const_cast<const char**>(argv), flag_list);
 
   if (params.number_of_images < 0) {
     LOG(ERROR) << "Invalid: num_examples";
diff --git a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.h b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.h
index c4c2d66f4ba..8776a20ae33 100644
--- a/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.h
+++ b/tensorflow/lite/tools/accuracy/ilsvrc/imagenet_model_evaluator.h
@@ -108,8 +108,10 @@ class ImagenetModelEvaluator {
       : params_(params), num_threads_(num_threads) {}
 
   // Factory method to create the evaluator by parsing command line arguments.
+  // Note argc and argv will be updated accordingly as matching arguments will
+  // be removed in argv.
   static TfLiteStatus Create(
-      int argc, char* argv[], int num_threads,
+      int* argc, char* argv[], int num_threads,
       std::unique_ptr<ImagenetModelEvaluator>* evaluator);
 
   // Adds an observer that can observe evaluation events..
diff --git a/tensorflow/lite/tools/benchmark/android/README.md b/tensorflow/lite/tools/benchmark/android/README.md
index db82c59acd3..1ff91230684 100644
--- a/tensorflow/lite/tools/benchmark/android/README.md
+++ b/tensorflow/lite/tools/benchmark/android/README.md
@@ -14,7 +14,7 @@ binary executed via `adb shell ...`. This tailored behavior is most evident when
 enabling multi-threaded CPU execution with TensorFlow Lite.
 
 To that end, this app offers perhaps a more faithful view of runtime performance
-that developers can expected when deploying TensorFlow Lite with their
+that developers can expect when deploying TensorFlow Lite with their
 application.
 
 ## To build/install/run
diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
index 2edbbd06ec4..f013be883cb 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -223,7 +223,6 @@ TfLiteStatus PopulateInputLayerInfo(
 
   // Populate input value range if it's specified.
   std::vector<std::string> value_ranges = Split(value_ranges_string, ':');
-  std::vector<int> tmp_range;
   for (const auto val : value_ranges) {
     std::vector<std::string> name_range = Split(val, ',');
     if (name_range.size() != 3) {
diff --git a/tensorflow/lite/tools/evaluation/stages/tflite_inference_stage.cc b/tensorflow/lite/tools/evaluation/stages/tflite_inference_stage.cc
index cf69714e367..2d9602bc5a1 100644
--- a/tensorflow/lite/tools/evaluation/stages/tflite_inference_stage.cc
+++ b/tensorflow/lite/tools/evaluation/stages/tflite_inference_stage.cc
@@ -95,8 +95,6 @@ TfLiteStatus TfliteInferenceStage::Init() {
   }
   interpreter_->SetNumThreads(params.num_threads());
 
-  // TODO(b/122482115): Add support for multiple delegates in
-  // TfLiteInferenceParams.
   if (params.delegate() == TfliteInferenceParams::NNAPI) {
     Interpreter::TfLiteDelegatePtr delegate = CreateNNAPIDelegate();
     if (delegate) {
@@ -115,7 +113,7 @@ TfLiteStatus TfliteInferenceStage::Init() {
   for (int i = 0; i < delegates_.size(); ++i) {
     if (interpreter_->ModifyGraphWithDelegate(delegates_[i].get()) !=
         kTfLiteOk) {
-      LOG(FATAL) << "Failed to apply delegate %d" << i;
+      LOG(FATAL) << "Failed to apply delegate " << i;
     }
   }
   interpreter_->AllocateTensors();
diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD
index bdc1baf892e..bf7e1baafd9 100644
--- a/tensorflow/lite/tools/optimize/BUILD
+++ b/tensorflow/lite/tools/optimize/BUILD
@@ -241,12 +241,15 @@ tf_cc_test(
         "//tensorflow/lite/tools/optimize:testdata/minimum.bin",
         "//tensorflow/lite/tools/optimize:testdata/mixed.bin",
         "//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
+        "//tensorflow/lite/tools/optimize:testdata/pack.bin",
         "//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
         "//tensorflow/lite/tools/optimize:testdata/single_conv_no_bias.bin",
         "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
         "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_minus_127_max_plus_127.bin",
         "//tensorflow/lite/tools/optimize:testdata/single_softmax_min_minus_5_max_plus_5.bin",
         "//tensorflow/lite/tools/optimize:testdata/split.bin",
+        "//tensorflow/lite/tools/optimize:testdata/svdf_calibrated.bin",
+        "//tensorflow/lite/tools/optimize:testdata/svdf_quantized.bin",
         "//tensorflow/lite/tools/optimize:testdata/unpack.bin",
     ],
     tags = [
diff --git a/tensorflow/lite/tools/optimize/calibration/BUILD b/tensorflow/lite/tools/optimize/calibration/BUILD
index f7f3d87879d..99175ac4daa 100644
--- a/tensorflow/lite/tools/optimize/calibration/BUILD
+++ b/tensorflow/lite/tools/optimize/calibration/BUILD
@@ -19,6 +19,7 @@ cc_library(
         "//tensorflow/lite:framework",
         "//tensorflow/lite/c:common",
         "//tensorflow/lite/kernels:kernel_util",
+        "//tensorflow/lite/kernels:lstm_shared",
         "//tensorflow/lite/kernels:op_macros",
         "//tensorflow/lite/kernels/internal:kernel_utils",
         "//tensorflow/lite/kernels/internal:optimized_base",
diff --git a/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc b/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc
index cc35b14d7db..11f9b648b85 100644
--- a/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc
+++ b/tensorflow/lite/tools/optimize/calibration/builtin_logging_ops/lstm.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/lstm_shared.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/tools/optimize/calibration/calibration_logger.h"
 
@@ -485,52 +486,6 @@ struct OpData {
   int scratch_tensor_index;
 };
 
-// Input Tensors of size {n_batch, n_input}
-constexpr int kInputTensor = 0;
-
-// Input weight tensors of size: {n_cell, n_input}
-constexpr int kInputToInputWeightsTensor = 1;  // Optional
-constexpr int kInputToForgetWeightsTensor = 2;
-constexpr int kInputToCellWeightsTensor = 3;
-constexpr int kInputToOutputWeightsTensor = 4;
-
-// Recurrent weight tensors of size {n_cell, n_output}
-constexpr int kRecurrentToInputWeightsTensor = 5;  // Optional
-constexpr int kRecurrentToForgetWeightsTensor = 6;
-constexpr int kRecurrentToCellWeightsTensor = 7;
-constexpr int kRecurrentToOutputWeightsTensor = 8;
-
-// Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
-constexpr int kCellToInputWeightsTensor = 9;    // Optional
-constexpr int kCellToForgetWeightsTensor = 10;  // Optional
-constexpr int kCellToOutputWeightsTensor = 11;  // Optional
-
-// Gates bias tensors of size {n_cell}
-constexpr int kInputGateBiasTensor = 12;  // Optional
-constexpr int kForgetGateBiasTensor = 13;
-constexpr int kCellGateBiasTensor = 14;
-constexpr int kOutputGateBiasTensor = 15;
-
-// Projection weight tensor of size {n_output, n_cell}
-constexpr int kProjectionWeightsTensor = 16;  // Optional
-// Projection bias tensor of size {n_output}
-constexpr int kProjectionBiasTensor = 17;  // Optional
-
-// These state tensors are defined as variable tensors, and will be modified by
-// this op.
-constexpr int kInputActivationStateTensor = 18;
-constexpr int kInputCellStateTensor = 19;
-
-// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
-// matrix.
-constexpr int kInputLayerNormCoefficientsTensor = 20;   // Optional
-constexpr int kForgetLayerNormCoefficientsTensor = 21;  // Optional
-constexpr int kCellLayerNormCoefficientsTensor = 22;    // Optional
-constexpr int kOutputLayerNormCoefficientsTensor = 23;  // Optional
-
-// Output tensors.
-constexpr int kOutputTensor = 0;
-
 // Resize the output, state tensors based on the sizes of the input tensors.
 // Allocate a temporary scratch tensor. Also check that the sizes of the input
 // tensors match each other.
@@ -538,66 +493,73 @@ TfLiteStatus lstm_eval(TfLiteContext* context, TfLiteNode* node,
                        Logger* logger) {
   const auto* params = static_cast<TfLiteLSTMParams*>(node->builtin_data);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* input =
+      GetInput(context, node, ops::builtin::lstm::full::kInputTensor);
 
-  const TfLiteTensor* input_to_input_weights =
-      GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
-  const TfLiteTensor* input_to_forget_weights =
-      GetInput(context, node, kInputToForgetWeightsTensor);
-  const TfLiteTensor* input_to_cell_weights =
-      GetInput(context, node, kInputToCellWeightsTensor);
-  const TfLiteTensor* input_to_output_weights =
-      GetInput(context, node, kInputToOutputWeightsTensor);
+  const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kInputToInputWeightsTensor);
+  const TfLiteTensor* input_to_forget_weights = GetInput(
+      context, node, ops::builtin::lstm::full::kInputToForgetWeightsTensor);
+  const TfLiteTensor* input_to_cell_weights = GetInput(
+      context, node, ops::builtin::lstm::full::kInputToCellWeightsTensor);
+  const TfLiteTensor* input_to_output_weights = GetInput(
+      context, node, ops::builtin::lstm::full::kInputToOutputWeightsTensor);
 
-  const TfLiteTensor* recurrent_to_input_weights =
-      GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor);
-  const TfLiteTensor* recurrent_to_forget_weights =
-      GetInput(context, node, kRecurrentToForgetWeightsTensor);
-  const TfLiteTensor* recurrent_to_cell_weights =
-      GetInput(context, node, kRecurrentToCellWeightsTensor);
-  const TfLiteTensor* recurrent_to_output_weights =
-      GetInput(context, node, kRecurrentToOutputWeightsTensor);
+  const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kRecurrentToInputWeightsTensor);
+  const TfLiteTensor* recurrent_to_forget_weights = GetInput(
+      context, node, ops::builtin::lstm::full::kRecurrentToForgetWeightsTensor);
+  const TfLiteTensor* recurrent_to_cell_weights = GetInput(
+      context, node, ops::builtin::lstm::full::kRecurrentToCellWeightsTensor);
+  const TfLiteTensor* recurrent_to_output_weights = GetInput(
+      context, node, ops::builtin::lstm::full::kRecurrentToOutputWeightsTensor);
 
-  const TfLiteTensor* cell_to_input_weights =
-      GetOptionalInputTensor(context, node, kCellToInputWeightsTensor);
-  const TfLiteTensor* cell_to_forget_weights =
-      GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor);
-  const TfLiteTensor* cell_to_output_weights =
-      GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
+  const TfLiteTensor* cell_to_input_weights = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kCellToInputWeightsTensor);
+  const TfLiteTensor* cell_to_forget_weights = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kCellToForgetWeightsTensor);
+  const TfLiteTensor* cell_to_output_weights = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kCellToOutputWeightsTensor);
 
-  const TfLiteTensor* input_layer_norm_coefficients =
-      GetOptionalInputTensor(context, node, kInputLayerNormCoefficientsTensor);
-  const TfLiteTensor* forget_layer_norm_coefficients =
-      GetOptionalInputTensor(context, node, kForgetLayerNormCoefficientsTensor);
-  const TfLiteTensor* cell_layer_norm_coefficients =
-      GetOptionalInputTensor(context, node, kCellLayerNormCoefficientsTensor);
-  const TfLiteTensor* output_layer_norm_coefficients =
-      GetOptionalInputTensor(context, node, kOutputLayerNormCoefficientsTensor);
+  const TfLiteTensor* input_layer_norm_coefficients = GetOptionalInputTensor(
+      context, node,
+      ops::builtin::lstm::full::kInputLayerNormCoefficientsTensor);
+  const TfLiteTensor* forget_layer_norm_coefficients = GetOptionalInputTensor(
+      context, node,
+      ops::builtin::lstm::full::kForgetLayerNormCoefficientsTensor);
+  const TfLiteTensor* cell_layer_norm_coefficients = GetOptionalInputTensor(
+      context, node,
+      ops::builtin::lstm::full::kCellLayerNormCoefficientsTensor);
+  const TfLiteTensor* output_layer_norm_coefficients = GetOptionalInputTensor(
+      context, node,
+      ops::builtin::lstm::full::kOutputLayerNormCoefficientsTensor);
 
-  const TfLiteTensor* input_gate_bias =
-      GetOptionalInputTensor(context, node, kInputGateBiasTensor);
+  const TfLiteTensor* input_gate_bias = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kInputGateBiasTensor);
   const TfLiteTensor* forget_gate_bias =
-      GetInput(context, node, kForgetGateBiasTensor);
-  const TfLiteTensor* cell_bias = GetInput(context, node, kCellGateBiasTensor);
+      GetInput(context, node, ops::builtin::lstm::full::kForgetGateBiasTensor);
+  const TfLiteTensor* cell_bias =
+      GetInput(context, node, ops::builtin::lstm::full::kCellGateBiasTensor);
   const TfLiteTensor* output_gate_bias =
-      GetInput(context, node, kOutputGateBiasTensor);
+      GetInput(context, node, ops::builtin::lstm::full::kOutputGateBiasTensor);
 
-  const TfLiteTensor* projection_weights =
-      GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
-  const TfLiteTensor* projection_bias =
-      GetOptionalInputTensor(context, node, kProjectionBiasTensor);
+  const TfLiteTensor* projection_weights = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kProjectionWeightsTensor);
+  const TfLiteTensor* projection_bias = GetOptionalInputTensor(
+      context, node, ops::builtin::lstm::full::kProjectionBiasTensor);
 
   // Index the scratch buffers pointers to the global scratch buffer.
   TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0);
 
-  TfLiteTensor* activation_state =
-      GetVariableInput(context, node, kInputActivationStateTensor);
+  TfLiteTensor* activation_state = GetVariableInput(
+      context, node, ops::builtin::lstm::full::kInputActivationStateTensor);
   TF_LITE_ENSURE(context, activation_state != nullptr);
-  TfLiteTensor* cell_state =
-      GetVariableInput(context, node, kInputCellStateTensor);
+  TfLiteTensor* cell_state = GetVariableInput(
+      context, node, ops::builtin::lstm::full::kInputCellStateTensor);
   TF_LITE_ENSURE(context, cell_state != nullptr);
 
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      GetOutput(context, node, ops::builtin::lstm::full::kOutputTensor);
 
   std::vector<int> intemediate_tensor_indexes(node->intermediates->size);
   for (int i = 0; i < node->intermediates->size; ++i) {
diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc
index 9b94bd3d44f..b2044c27f12 100644
--- a/tensorflow/lite/tools/optimize/operator_property.cc
+++ b/tensorflow/lite/tools/optimize/operator_property.cc
@@ -871,6 +871,29 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
       property.version = 2;
       break;
     }
+    case BuiltinOperator_SVDF: {
+      TensorProperty tensor_property_time;
+      // Only 10bits are needed because 6bits are reserved for the reduce
+      // operation after elemement-wise multiplication between state and time
+      // weights.
+      tensor_property_time.number_of_bits = 10;
+      TensorProperty tensor_property_bias;
+      tensor_property_bias.use_derived_scale = true;
+      tensor_property_bias.number_of_bits = 32;
+      tensor_property_bias.derived_scale = {{2, 4}, {}, {}};
+      TensorProperty tensor_property_state;
+      tensor_property_state.number_of_bits = 16;
+      tensor_property_state.state_tensor = true;
+
+      property.inputs = {{0, {}},
+                         {1, {}},
+                         {2, tensor_property_time},
+                         {4, tensor_property_state},
+                         {3, tensor_property_bias}};
+      property.outputs = {{0, {}}};
+      property.version = 2;
+      break;
+    }
     case BuiltinOperator_TRANSPOSE:
       property.inputs = {{0, {}}};
       property.outputs = {{0, {}}};
diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc
index 26d595947cd..6fc19ff2a56 100644
--- a/tensorflow/lite/tools/optimize/quantize_model.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model.cc
@@ -479,8 +479,26 @@ TfLiteStatus QuantizeOpInput(
         return utils::SymmetricPerLayerBiasQuantize(model, tensor, scale,
                                                     error_reporter);
 
+      } else if (tensor_property.number_of_bits == 10) {
+        // When the number of bits is 10 (instead of 16), quantize the tensor to
+        // [-512, 512], instead of [-32767, 32767].
+        TensorT* tensor = subgraph->tensors[tensor_idx].get();
+        int total_size = 1;
+        for (int i = 0; i < tensor->shape.size(); ++i) {
+          total_size *= tensor->shape[i];
+        }
+        BufferT* buffer = model->buffers[tensor->buffer].get();
+        float* buffer_data = reinterpret_cast<float*>(buffer->data.data());
+        auto minmax =
+            std::minmax_element(buffer_data, buffer_data + total_size);
+        const float range =
+            std::max(std::abs(*minmax.first), std::abs(*minmax.second));
+        const float quantized_range = 512.0;
+        const float scale = range / quantized_range;
+        return utils::SymmetricQuantizeFloatsToInt16(model, tensor, scale,
+                                                     error_reporter);
       } else {
-        // Only 8, 16, 32 are supported.
+        // Only 8, 16, 32, 10 are supported.
         // TODO(jianlijianli): extend this to support arbitrary bits.
         error_reporter->Report(
             "Unable to quantize buffer or min/max value for input %d "
@@ -499,14 +517,15 @@ TfLiteStatus QuantizeOpInput(
           utils::QuantizeActivation(tensor);
         } else if (tensor_property.number_of_bits == 16) {
           TensorT* tensor = subgraph->tensors[tensor_idx].get();
+          float quantized_range = 32767.0;
           float range = std::max(std::abs(tensor->quantization->min[0]),
                                  std::abs(tensor->quantization->max[0]));
           if (tensor_property.extend_to_power_of_two) {
             const int power_of_two_scale = utils::GetPowerOfTwoScale(
                 tensor->quantization->min[0], tensor->quantization->max[0]);
             range = std::pow(2, power_of_two_scale);
+            quantized_range = 32768.0;
           }
-          const float quantized_range = 32768.0;
           const float scale = range / quantized_range;
           utils::QuantizeActivationToInt16(tensor, scale);
         }
diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc
index 89038ad764f..da1b293c84b 100644
--- a/tensorflow/lite/tools/optimize/quantize_model_test.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc
@@ -1115,6 +1115,65 @@ TEST_F(QuantizeLSTM2Test, VerifyLSTM) {
   }
 }
 
+class QuantizeSVDFTest : public QuantizeModelTest {
+ protected:
+  QuantizeSVDFTest() {
+    input_model_ = ReadModel(internal::kSvdfCalibrated);
+    readonly_model_ = input_model_->GetModel();
+    readonly_model_->UnPackTo(&model_);
+  }
+};
+
+TEST_F(QuantizeSVDFTest, VerifySVDF) {
+  // Quantize model.
+  auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
+                              TensorType_INT8, &error_reporter_);
+  ASSERT_EQ(kTfLiteOk, status);
+
+  // Read expected model.
+  auto expected_fb_model = ReadModel(internal::kSvdfQuantized);
+  auto expected_read_only_model = expected_fb_model->GetModel();
+  ModelT expected_model;
+  expected_read_only_model->UnPackTo(&expected_model);
+
+  // Comparison.
+  ASSERT_EQ(model_.subgraphs.size(), expected_model.subgraphs.size());
+  for (size_t subgraph_idx = 0; subgraph_idx < model_.subgraphs.size();
+       subgraph_idx++) {
+    const auto graph = model_.subgraphs[subgraph_idx].get();
+    const auto expected_graph = expected_model.subgraphs[subgraph_idx].get();
+    ASSERT_EQ(graph->tensors.size(), expected_graph->tensors.size());
+    for (size_t i = 0; i < graph->tensors.size(); i++) {
+      const auto tensor = graph->tensors[i].get();
+      const auto expected_tensor = expected_graph->tensors[i].get();
+      EXPECT_EQ(tensor->buffer, expected_tensor->buffer);
+      EXPECT_EQ(tensor->is_variable, expected_tensor->is_variable);
+      EXPECT_EQ(tensor->shape, expected_tensor->shape);
+      EXPECT_EQ(tensor->name, expected_tensor->name);
+      EXPECT_EQ(tensor->type, expected_tensor->type);
+      const auto quantization_params = tensor->quantization.get();
+      const auto expected_quantization_params =
+          expected_tensor->quantization.get();
+      if (quantization_params != nullptr ||
+          expected_quantization_params != nullptr) {
+        EXPECT_NE(quantization_params, nullptr);
+        EXPECT_NE(expected_quantization_params, nullptr);
+        EXPECT_EQ(quantization_params->scale,
+                  expected_quantization_params->scale);
+        EXPECT_EQ(quantization_params->zero_point,
+                  expected_quantization_params->zero_point);
+      }
+    }
+  }
+  ASSERT_EQ(model_.buffers.size(), expected_model.buffers.size());
+  for (size_t buffer_idx = 0; buffer_idx < model_.buffers.size();
+       ++buffer_idx) {
+    const auto buffer = model_.buffers[buffer_idx].get()->data;
+    const auto expected_buffer = expected_model.buffers[buffer_idx].get()->data;
+    EXPECT_EQ(buffer, expected_buffer);
+  }
+}
+
 class QuantizeFCTest : public QuantizeModelTest {
  protected:
   QuantizeFCTest() {
@@ -1201,6 +1260,68 @@ TEST_F(QuantizeCustomOpTest, VerifyMixedQuantization) {
   }
 }
 
+class QuantizePackTest : public QuantizeModelTest {
+ protected:
+  QuantizePackTest() {
+    input_model_ = ReadModel(internal::kModelPack);
+    readonly_model_ = input_model_->GetModel();
+    readonly_model_->UnPackTo(&model_);
+  }
+};
+
+TEST_F(QuantizePackTest, VerifyPack) {
+  auto status = QuantizeModel(&builder_, &model_, &error_reporter_);
+
+  ASSERT_EQ(kTfLiteOk, status);
+
+  const auto subgraph = model_.subgraphs[0].get();
+
+  // The model should only have 3 inputs and 1 output.
+  EXPECT_EQ(subgraph->inputs.size(), 3);
+  EXPECT_EQ(subgraph->outputs.size(), 1);
+
+  const auto& op1 = subgraph->operators[1].get();
+  const auto& op2 = subgraph->operators[2].get();
+  const auto& op3 = subgraph->operators[3].get();
+  const auto& op4 = subgraph->operators[4].get();
+
+  ASSERT_EQ(model_.operator_codes[op1->opcode_index].get()->builtin_code,
+            BuiltinOperator_QUANTIZE);
+  ASSERT_EQ(model_.operator_codes[op2->opcode_index].get()->builtin_code,
+            BuiltinOperator_QUANTIZE);
+  ASSERT_EQ(model_.operator_codes[op3->opcode_index].get()->builtin_code,
+            BuiltinOperator_PACK);
+  ASSERT_EQ(model_.operator_codes[op4->opcode_index].get()->builtin_code,
+            BuiltinOperator_DEQUANTIZE);
+
+  const auto& pack_input0 = subgraph->tensors[op3->inputs[0]].get();
+  const auto& pack_input1 = subgraph->tensors[op3->inputs[1]].get();
+  const auto& pack_input2 = subgraph->tensors[op3->inputs[2]].get();
+
+  const auto& pack_output = subgraph->tensors[op3->outputs[0]].get();
+
+  // Check quantization parameters for input and output.
+  EXPECT_FLOAT_EQ(pack_input0->quantization->scale[0],
+                  pack_input1->quantization->scale[0]);
+  EXPECT_FLOAT_EQ(pack_input1->quantization->scale[0],
+                  pack_input2->quantization->scale[0]);
+  EXPECT_FLOAT_EQ(pack_input0->quantization->zero_point[0],
+                  pack_input1->quantization->zero_point[0]);
+  EXPECT_FLOAT_EQ(pack_input1->quantization->zero_point[0],
+                  pack_input2->quantization->zero_point[0]);
+
+  EXPECT_FLOAT_EQ(pack_input1->quantization->scale[0],
+                  pack_output->quantization->scale[0]);
+  EXPECT_FLOAT_EQ(pack_input1->quantization->zero_point[0],
+                  pack_output->quantization->zero_point[0]);
+
+  // Check type of input and output.
+  EXPECT_EQ(pack_output->type, TensorType_INT8);
+  EXPECT_EQ(pack_input0->type, TensorType_INT8);
+  EXPECT_EQ(pack_input1->type, TensorType_INT8);
+  EXPECT_EQ(pack_input2->type, TensorType_INT8);
+}
+
 class QuantizeMinimumMaximumTest
     : public QuantizeModelTest,
       public testing::WithParamInterface<const char*> {
diff --git a/tensorflow/lite/tools/optimize/test_util.cc b/tensorflow/lite/tools/optimize/test_util.cc
index be99f9e9ddb..7d5e9d65f06 100644
--- a/tensorflow/lite/tools/optimize/test_util.cc
+++ b/tensorflow/lite/tools/optimize/test_util.cc
@@ -51,6 +51,8 @@ const char* kModelMixed = "mixed.bin";
 
 const char* kModelSplit = "split.bin";
 
+const char* kModelPack = "pack.bin";
+
 const char* kLstmCalibrated = "lstm_calibrated.bin";
 const char* kLstmQuantized = "lstm_quantized.bin";
 
@@ -59,6 +61,9 @@ const char* kModelWithMaximumOp = "maximum.bin";
 const char* kLstmCalibrated2 = "lstm_calibrated2.bin";
 const char* kLstmQuantized2 = "lstm_quantized2.bin";
 
+const char* kSvdfCalibrated = "svdf_calibrated.bin";
+const char* kSvdfQuantized = "svdf_quantized.bin";
+
 const char* kModelWithUnpack = "unpack.bin";
 
 int FailOnErrorReporter::Report(const char* format, va_list args) {
diff --git a/tensorflow/lite/tools/optimize/test_util.h b/tensorflow/lite/tools/optimize/test_util.h
index 0d394b0badc..abcdbc21d36 100644
--- a/tensorflow/lite/tools/optimize/test_util.h
+++ b/tensorflow/lite/tools/optimize/test_util.h
@@ -79,6 +79,9 @@ extern const char* kModelMixed;
 // Test model with split op.
 extern const char* kModelSplit;
 
+// Test model with pack op.
+extern const char* kModelPack;
+
 // Test model with LSTM op that has layer norm, has projection, without
 // peephole, without cifg.
 extern const char* kLstmCalibrated;
@@ -95,6 +98,10 @@ extern const char* kModelWithMaximumOp;
 extern const char* kLstmCalibrated2;
 extern const char* kLstmQuantized2;
 
+// Test model with SVDF op.
+extern const char* kSvdfCalibrated;
+extern const char* kSvdfQuantized;
+
 // Test model with an unpack op.
 extern const char* kModelWithUnpack;
 
diff --git a/tensorflow/lite/tools/optimize/testdata/maximum.bin b/tensorflow/lite/tools/optimize/testdata/maximum.bin
index 8e767ee792b..492cd0a9b47 100644
Binary files a/tensorflow/lite/tools/optimize/testdata/maximum.bin and b/tensorflow/lite/tools/optimize/testdata/maximum.bin differ
diff --git a/tensorflow/lite/tools/optimize/testdata/minimum.bin b/tensorflow/lite/tools/optimize/testdata/minimum.bin
index a67d2492b33..fd01245e9a7 100644
Binary files a/tensorflow/lite/tools/optimize/testdata/minimum.bin and b/tensorflow/lite/tools/optimize/testdata/minimum.bin differ
diff --git a/tensorflow/lite/tools/optimize/testdata/pack.bin b/tensorflow/lite/tools/optimize/testdata/pack.bin
new file mode 100644
index 00000000000..c367eea06a5
Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/pack.bin differ
diff --git a/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin b/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin
new file mode 100644
index 00000000000..e363b4a6d83
Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin differ
diff --git a/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin b/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin
new file mode 100644
index 00000000000..fd30ba72cd2
Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin differ
diff --git a/tensorflow/lite/tools/verifier.cc b/tensorflow/lite/tools/verifier.cc
index 46ef2697c22..c16030be4e1 100644
--- a/tensorflow/lite/tools/verifier.cc
+++ b/tensorflow/lite/tools/verifier.cc
@@ -14,7 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/lite/tools/verifier.h"
+
 #include <climits>
+#include <cstdint>
+
 #include "absl/container/flat_hash_set.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/string_util.h"
@@ -110,6 +113,116 @@ bool VerifyStringTensorBuffer(const Tensor& tensor, const Buffer& buffer,
   return true;
 }
 
+// The sparsity parameter defines a tree structure to map each non-zero element
+// stored in the flattened buffer back to its index in the conceptual dense
+// tensor.
+// Traverse the tree level by level, count total number of elements, and
+// validate the sparsity parameters along the way.
+absl::optional<uint64_t> VerifyAndCountElements(
+    const SparsityParameters& sparsity, const std::vector<int>& dim_sizes) {
+  const int total_level = sparsity.traversal_order()->size();
+  uint64_t num_elements = 1;
+  for (int i = 0; i < total_level; i++) {
+    const int original_dim = sparsity.traversal_order()->Get(i);
+    const auto* dim_metadata = sparsity.dim_metadata()->Get(i);
+    if (dim_metadata->format() == DimensionType_DENSE) {
+      if (dim_metadata->dense_size() != dim_sizes[original_dim]) {
+        return absl::nullopt;
+      }
+
+      // Each index in a dense dimension is stored implicitly.
+      num_elements *= dim_metadata->dense_size();
+    } else {
+      const auto* array_segments = dim_metadata->array_segments();
+      const auto* array_indices = dim_metadata->array_indices();
+      if (array_segments == nullptr || array_indices == nullptr) {
+        return absl::nullopt;
+      }
+
+      for (int j = 0; j < array_segments->size() - 1; j++) {
+        if (array_segments->Get(j) < 0 || array_segments->Get(j + 1) < 0 ||
+            array_segments->Get(j) > array_segments->Get(j + 1)) {
+          return absl::nullopt;
+        }
+      }
+
+      if (num_elements != array_segments->size() - 1) {
+        return absl::nullopt;
+      }
+
+      if (array_indices->size() !=
+          array_segments->Get(array_segments->size() - 1)) {
+        return absl::nullopt;
+      }
+
+      for (int j = 0; j < array_indices->size(); j++) {
+        if (array_indices->Get(j) < 0 ||
+            array_indices->Get(j) >= dim_sizes[original_dim]) {
+          return absl::nullopt;
+        }
+      }
+
+      // Need to reset num_elements when seeing a sparse dimension.
+      num_elements = array_indices->size();
+    }
+  }
+
+  return num_elements;
+}
+
+absl::optional<uint64_t> VerifyAndCountSparseElements(const Tensor& tensor) {
+  const auto* sparsity = tensor.sparsity();
+  if (sparsity->traversal_order() == nullptr ||
+      sparsity->dim_metadata() == nullptr) {
+    return absl::nullopt;
+  }
+
+  const int total_dims = sparsity->traversal_order()->size();
+
+  if (total_dims < tensor.shape()->size() ||
+      sparsity->dim_metadata()->size() != total_dims) {
+    return absl::nullopt;
+  }
+
+  const int block_rank = total_dims - tensor.shape()->size();
+  if (block_rank > 0 && (sparsity->block_map() == nullptr ||
+                         sparsity->block_map()->size() != block_rank)) {
+    return absl::nullopt;
+  }
+
+  // For a n-dimensional tensor (d0, ..., dn-1) with k-dimensional block (dn,
+  // ..., dn+k-1), the expanded_dim_sizes holds the size of each dimension in
+  // the order of (d0, ..., dn-1, dn, ..., dn+k-1), not the traversal order.
+  // For example, a 4x4 tensor with 2x2 block has expanded_dim_sizes = {2, 2, 2,
+  // 2}.
+  std::vector<int> expanded_dim_sizes;
+  expanded_dim_sizes.resize(total_dims);
+  const int original_rank = tensor.shape()->size();
+  // First go through the original tensor dimensions, populate their sizes.
+  for (int i = 0; i < original_rank; i++) {
+    expanded_dim_sizes[i] = tensor.shape()->Get(i);
+  }
+  // Then go through the block dimensions, and
+  //   1. populate block dimension size.
+  //   2. block_map[i] has the original dimension that block dimension i maps
+  //   to. Divide the size of the original dimension by the size of the ith
+  //   block dimension.
+  for (int i = 0; i < block_rank; i++) {
+    int original_block_dim =
+        sparsity->traversal_order()->Get(i + original_rank);
+    int block_dim_size =
+        sparsity->dim_metadata()->Get(i + original_rank)->dense_size();
+    if (block_dim_size == 0) {
+      return absl::nullopt;
+    }
+
+    expanded_dim_sizes[original_block_dim] = block_dim_size;
+    expanded_dim_sizes[sparsity->block_map()->Get(i)] /= block_dim_size;
+  }
+
+  return VerifyAndCountElements(*sparsity, expanded_dim_sizes);
+}
+
 // Verifies numeric tensor has legit buffer.
 bool VerifyNumericTensorBuffer(const Tensor& tensor, const Buffer& buffer,
                                ErrorReporter* error_reporter) {
@@ -118,14 +231,30 @@ bool VerifyNumericTensorBuffer(const Tensor& tensor, const Buffer& buffer,
     // Empty tensor. Avoid further checks.
     return true;
   }
-  for (int dim : *tensor.shape()) {
-    bytes_required *= dim;
+  if (tensor.sparsity() != nullptr) {
+    const auto num_elements = VerifyAndCountSparseElements(tensor);
+    if (!num_elements.has_value()) {
+      ReportError(error_reporter, "Tensor %s has invalid sparsity parameters",
+                  tensor.name()->c_str());
+      return false;
+    }
+    bytes_required = num_elements.value();
     if (bytes_required > UINT_MAX) {
       ReportError(error_reporter, "Tensor %s dimension overflow",
                   tensor.name()->c_str());
       return false;
     }
+  } else {
+    for (int dim : *tensor.shape()) {
+      bytes_required *= dim;
+      if (bytes_required > UINT_MAX) {
+        ReportError(error_reporter, "Tensor %s dimension overflow",
+                    tensor.name()->c_str());
+        return false;
+      }
+    }
   }
+
   switch (tensor.type()) {
     case TensorType_FLOAT32:
       bytes_required *= sizeof(float);
diff --git a/tensorflow/lite/tools/verifier_test.cc b/tensorflow/lite/tools/verifier_test.cc
index be3a06f2eb2..a945e980030 100644
--- a/tensorflow/lite/tools/verifier_test.cc
+++ b/tensorflow/lite/tools/verifier_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/tools/verifier.h"
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -25,6 +26,8 @@ limitations under the License.
 #include "tensorflow/lite/allocation.h"
 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
 #include "tensorflow/lite/error_reporter.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/mutable_op_resolver.h"
 #include "tensorflow/lite/op_resolver.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/testing/util.h"
@@ -33,6 +36,11 @@ limitations under the License.
 
 namespace tflite {
 
+namespace {
+static const char* kSparseTensorTestModel =
+    "tensorflow/lite/testdata/sparse_tensor.bin";
+}  // namespace
+
 class MockErrorReporter : public ErrorReporter {
  public:
   MockErrorReporter() : buffer_size_(0) {}
@@ -552,6 +560,135 @@ TEST(VerifyModel, TypedTensorShapeMatchesTensorBufferSize) {
   }
 }
 
+TEST(VerifyModel, SimpleValidSparseTensor) {
+  const auto model = FlatBufferModel::BuildFromFile(kSparseTensorTestModel);
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<ModelT> scoped_model;
+  scoped_model.reset(model->GetModel()->UnPack());
+
+  flatbuffers::FlatBufferBuilder builder;
+  auto model_ = Model::Pack(builder, scoped_model.get());
+
+  ::tflite::FinishModelBuffer(builder, model_);
+  MockErrorReporter mock_reporter;
+  MutableOpResolver resolver;
+  TfLiteRegistration fake_op;
+  resolver.AddCustom("FakeOp", &fake_op);
+  Verify(builder.GetBufferPointer(), builder.GetSize(), resolver,
+         &mock_reporter);
+  ASSERT_TRUE(Verify(builder.GetBufferPointer(), builder.GetSize(), resolver,
+                     &mock_reporter));
+}
+
+TEST(VerifyModel, InvalidSparseTensorMissingBlockMap) {
+  const auto model = FlatBufferModel::BuildFromFile(kSparseTensorTestModel);
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<ModelT> scoped_model;
+  scoped_model.reset(model->GetModel()->UnPack());
+
+  auto* tensor = scoped_model->subgraphs[0]->tensors[0].get();
+  tensor->sparsity->block_map = {};
+
+  flatbuffers::FlatBufferBuilder builder;
+  auto model_ = Model::Pack(builder, scoped_model.get());
+
+  ::tflite::FinishModelBuffer(builder, model_);
+  MockErrorReporter mock_reporter;
+  MutableOpResolver resolver;
+  TfLiteRegistration fake_op;
+  resolver.AddCustom("FakeOp", &fake_op);
+  ASSERT_FALSE(Verify(builder.GetBufferPointer(), builder.GetSize(), resolver,
+                      &mock_reporter));
+  EXPECT_THAT(mock_reporter.GetAsString(),
+              ::testing::ContainsRegex("invalid sparsity parameters"));
+}
+
+TEST(VerifyModel, InvalidSparseTensorIndexOutOfBound) {
+  const auto model = FlatBufferModel::BuildFromFile(kSparseTensorTestModel);
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<ModelT> scoped_model;
+  scoped_model.reset(model->GetModel()->UnPack());
+
+  auto* tensor = scoped_model->subgraphs[0]->tensors[0].get();
+  tensor->sparsity->dim_metadata[1]->array_indices[1] = 5;
+
+  flatbuffers::FlatBufferBuilder builder;
+  auto model_ = Model::Pack(builder, scoped_model.get());
+
+  ::tflite::FinishModelBuffer(builder, model_);
+  MockErrorReporter mock_reporter;
+  MutableOpResolver resolver;
+  TfLiteRegistration fake_op;
+  resolver.AddCustom("FakeOp", &fake_op);
+  ASSERT_FALSE(Verify(builder.GetBufferPointer(), builder.GetSize(), resolver,
+                      &mock_reporter));
+  EXPECT_THAT(mock_reporter.GetAsString(),
+              ::testing::ContainsRegex("invalid sparsity parameters"));
+}
+
+TEST(VerifyModel, InvalidSparseTensorInvalidBuffer) {
+  const auto model = FlatBufferModel::BuildFromFile(kSparseTensorTestModel);
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<ModelT> scoped_model;
+  scoped_model.reset(model->GetModel()->UnPack());
+
+  // Expected to have 12 numbers in buffer.
+  scoped_model->buffers[1]->data = {0, 1, 2, 3, 4, 5, 6, 7};
+
+  flatbuffers::FlatBufferBuilder builder;
+  auto model_ = Model::Pack(builder, scoped_model.get());
+
+  ::tflite::FinishModelBuffer(builder, model_);
+  MockErrorReporter mock_reporter;
+  MutableOpResolver resolver;
+  TfLiteRegistration fake_op;
+  resolver.AddCustom("FakeOp", &fake_op);
+  ASSERT_FALSE(Verify(builder.GetBufferPointer(), builder.GetSize(), resolver,
+                      &mock_reporter));
+  EXPECT_THAT(mock_reporter.GetAsString(),
+              ::testing::ContainsRegex(
+                  "requires 12 bytes, but is allocated with 8 bytes buffer"));
+}
+
+TEST(VerifyModel, ValidSparseTensorBCSC) {
+  const auto model = FlatBufferModel::BuildFromFile(kSparseTensorTestModel);
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<ModelT> scoped_model;
+  scoped_model.reset(model->GetModel()->UnPack());
+
+  auto* tensor = scoped_model->subgraphs[0]->tensors[0].get();
+  tensor->sparsity->traversal_order = {1, 0, 3, 2};
+  tensor->sparsity->block_map = {0, 1};
+  tensor->sparsity->dim_metadata[0]->format = DimensionType_DENSE;
+  tensor->sparsity->dim_metadata[0]->dense_size = 2;
+
+  tensor->sparsity->dim_metadata[1]->format = DimensionType_SPARSE_CSR;
+  tensor->sparsity->dim_metadata[1]->array_segments = {0, 1, 3};
+  tensor->sparsity->dim_metadata[1]->array_indices = {0, 0, 1};
+
+  tensor->sparsity->dim_metadata[2]->format = DimensionType_DENSE;
+  tensor->sparsity->dim_metadata[2]->dense_size = 2;
+  tensor->sparsity->dim_metadata[3]->format = DimensionType_DENSE;
+  tensor->sparsity->dim_metadata[3]->dense_size = 2;
+
+  flatbuffers::FlatBufferBuilder builder;
+  auto model_ = Model::Pack(builder, scoped_model.get());
+
+  ::tflite::FinishModelBuffer(builder, model_);
+  MockErrorReporter mock_reporter;
+  MutableOpResolver resolver;
+  TfLiteRegistration fake_op;
+  resolver.AddCustom("FakeOp", &fake_op);
+  ASSERT_TRUE(Verify(builder.GetBufferPointer(), builder.GetSize(), resolver,
+                     &mock_reporter));
+}
+
+// TODO(b/145614687): Add more tricky test cases for sparse tensor verification.
 // TODO(yichengfan): make up malicious files to test with.
 
 }  // namespace tflite
diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc
index e638840606d..213e7ff614e 100644
--- a/tensorflow/lite/tools/versioning/op_version.cc
+++ b/tensorflow/lite/tools/versioning/op_version.cc
@@ -219,6 +219,10 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
           op_sig.input_types.at(0) == TensorType_UINT8) {
         return 2;
       }
+      // If the op take bool input, it is version 3.
+      if (op_sig.input_types.at(0) == TensorType_BOOL) {
+        return 3;
+      }
       return 1;
 
     case BuiltinOperator_DEQUANTIZE:
diff --git a/tensorflow/lite/tutorials/mnist_tflite.py b/tensorflow/lite/tutorials/mnist_tflite.py
index 60df2664dcd..62c5e27da10 100644
--- a/tensorflow/lite/tutorials/mnist_tflite.py
+++ b/tensorflow/lite/tutorials/mnist_tflite.py
@@ -17,6 +17,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 import numpy as np
 import tensorflow as tf  # pylint: disable=g-bad-import-order
 from tensorflow.lite.tutorials import dataset
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index e4c0e70aeff..02abe62472d 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -1,4 +1,3 @@
-llvm/llvm/projects/google_mlir/mlir_configure.bzl
 tensorflow/__init__.py
 tensorflow/api_template.__init__.py
 tensorflow/api_template_v1.__init__.py
@@ -266,6 +265,7 @@ tensorflow/third_party/toolchains/preconfig/ubuntu16.04/tensorrt6.0/build_defs.b
 tensorflow/third_party/toolchains/preconfig/win_1803/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_025/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_026/BUILD
+tensorflow/third_party/toolchains/preconfig/win_1803/bazel_121/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/py36/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/py37/BUILD
 tensorflow/third_party/toolchains/remote/BUILD
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 253c69a7347..8e9360b8167 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -21,6 +21,7 @@ load(
 visibility = [
     "//engedu/ml/tf_from_scratch:__pkg__",
     "//third_party/cloud_tpu/convergence_tools:__subpackages__",
+    "//third_party/mlperf:__subpackages__",
     "//tensorflow:internal",
     "//tensorflow/lite/toco/python:__pkg__",
     "//tensorflow_models:__subpackages__",
@@ -171,6 +172,7 @@ py_library(
         ":platform",
         ":proto_ops",
         ":pywrap_tensorflow",
+        ":pywrap_tfe",
         ":rnn_ops_gen",
         ":saver_test_utils",
         ":script_ops",
@@ -251,6 +253,7 @@ py_library(
     deps = [
         ":_pywrap_util_port",
         ":lib",
+        ":pywrap_tfe",
         ":util",
         "//tensorflow/core:protos_all_py",
         "@absl_py//absl:app",
@@ -477,13 +480,13 @@ cc_library(
 cc_library(
     name = "pybind11_status",
     hdrs = [
+        "lib/core/py_exception_registry.h",
         "lib/core/pybind11_status.h",
         "//tensorflow/c:headers",
     ],
     features = ["-parse_headers"],
     visibility = tf_external_workspace_visible(visibility),
     deps = [
-        ":py_exception_registry",
         "//tensorflow/c:tf_status_headers",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -851,18 +854,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "py_record_writer_lib",
-    srcs = ["lib/io/py_record_writer.cc"],
-    hdrs = ["lib/io/py_record_writer.h"],
-    deps = [
-        "//tensorflow/c:c_api",
-        "//tensorflow/c:tf_status_helper",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
 tf_cc_shared_object(
     name = "framework/test_file_system.so",
     srcs = ["framework/test_file_system.cc"],
@@ -1110,6 +1101,7 @@ py_library(
         ":lib",
         ":platform",
         ":pywrap_tensorflow",
+        ":pywrap_tfe",
         ":random_seed",
         ":sparse_tensor",
         ":tensor_spec",
@@ -1748,6 +1740,12 @@ py_library(
     ],
 )
 
+py_library(
+    name = "gpu_util",
+    srcs = ["framework/gpu_util.py"],
+    deps = [],
+)
+
 py_library(
     name = "framework_test_lib",
     srcs = ["framework/test_util.py"],
@@ -1761,6 +1759,7 @@ py_library(
         ":client",
         ":errors",
         ":framework_for_generated_wrappers",
+        ":gpu_util",
         ":platform",
         ":platform_test",
         ":pywrap_tensorflow",
@@ -1847,6 +1846,17 @@ py_library(
     ],
 )
 
+tf_py_test(
+    name = "framework_constant_op_test",
+    size = "small",
+    srcs = ["framework/constant_op_test.py"],
+    main = "framework/constant_op_test.py",
+    python_version = "PY3",
+    deps = [
+        ":constant_op",
+    ],
+)
+
 tf_py_test(
     name = "framework_registry_test",
     size = "small",
@@ -3649,9 +3659,6 @@ cuda_py_test(
     size = "small",
     srcs = ["training/experimental/mixed_precision_test.py"],
     python_version = "PY3",
-    tags = [
-        "no_rocm",
-    ],
     deps = [
         ":mixed_precision",
         "//tensorflow/python:client_testlib",
@@ -3677,6 +3684,7 @@ cuda_py_test(
     name = "loss_scaling_gradient_tape_test",
     size = "medium",
     srcs = ["training/experimental/loss_scaling_gradient_tape_test.py"],
+    shard_count = 2,
     deps = [
         ":client_testlib",
         ":constant_op",
@@ -4762,7 +4770,6 @@ cuda_py_test(
     srcs = ["ops/nn_fused_batchnorm_test.py"],
     python_version = "PY3",
     shard_count = 16,
-    tags = ["no_rocm"],
     deps = [
         ":array_ops",
         ":client_testlib",
@@ -4837,6 +4844,23 @@ py_test(
     ],
 )
 
+cuda_py_test(
+    name = "sobol_ops_test",
+    size = "small",
+    srcs = ["ops/sobol_ops_test.py"],
+    additional_deps = [
+        ":framework_for_generated_wrappers",
+        ":framework_test_lib",
+        ":math_ops",
+        ":platform_test",
+        "//third_party/py/numpy",
+    ],
+    kernels = [
+        "//tensorflow/core/kernels:libtfkernel_sobol_op.so",
+    ],
+    tags = ["no_windows_gpu"],
+)
+
 cuda_py_test(
     name = "special_math_ops_test",
     size = "medium",
@@ -5488,11 +5512,8 @@ tf_py_wrap_cc(
         "grappler/item.i",
         "grappler/tf_optimizer.i",
         "lib/core/strings.i",
-        "lib/io/file_io.i",
         "lib/io/py_record_reader.i",
-        "lib/io/py_record_writer.i",
         "platform/base.i",
-        "pywrap_tfe.i",
         "//tensorflow/compiler/mlir/python:mlir.i",
     ],
     # add win_def_file for pywrap_tensorflow
@@ -5511,7 +5532,6 @@ tf_py_wrap_cc(
         ":py_exception_registry",
         ":py_func_lib",
         ":py_record_reader_lib",
-        ":py_record_writer_lib",
         ":python_op_gen",
         ":tf_session_helper",
         "//third_party/python_runtime:headers",
@@ -5573,7 +5593,12 @@ WIN_LIB_FILES_FOR_EXPORTED_SYMBOLS = [
     ":safe_ptr",  # checkpoint_reader
     ":python_op_gen",  # python_op_gen
     ":bfloat16_lib",  # bfloat16
+    "//tensorflow/python/eager:pywrap_tfe_lib",  # pywrap_tfe_lib
     "//tensorflow/core/util/tensor_bundle",  # checkpoint_reader
+    "//tensorflow/core/common_runtime/eager:eager_executor",  # tfe
+    "//tensorflow/core/common_runtime/eager:context",  # tfe
+    "//tensorflow/core/profiler/lib:profiler_session",  # tfe
+    "//tensorflow/c:tf_status_helper",  # tfe
 ]
 
 # Filter the DEF file to reduce the number of symbols to 64K or less.
@@ -5673,6 +5698,19 @@ cc_import(
 
 # ** Targets for Windows build (end) **
 
+tf_python_pybind_extension(
+    name = "_pywrap_file_io",
+    srcs = ["lib/io/file_io_wrapper.cc"],
+    module_name = "_pywrap_file_io",
+    deps = [
+        ":pybind11_absl",
+        ":pybind11_status",
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:protos_all_cc",
+        "@pybind11",
+    ],
+)
+
 py_library(
     name = "lib",
     srcs = [
@@ -5682,6 +5720,8 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":_pywrap_file_io",
+        ":_pywrap_record_io",
         ":errors",
         ":pywrap_tensorflow",
         ":util",
@@ -5689,6 +5729,21 @@ py_library(
     ],
 )
 
+tf_python_pybind_extension(
+    name = "_pywrap_record_io",
+    srcs = ["lib/io/record_io_wrapper.cc"],
+    module_name = "_pywrap_record_io",
+    deps = [
+        ":pybind11_absl",
+        ":pybind11_status",
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/platform:types",
+        "@com_google_absl//absl/memory",
+        "@pybind11",
+    ],
+)
+
 py_library(
     name = "session",
     srcs = ["client/session.py"],
@@ -7555,6 +7610,67 @@ py_library(
     ],
 )
 
+py_library(
+    name = "pywrap_tfe",
+    srcs = ["pywrap_tfe.py"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":_pywrap_tfe",
+        ":pywrap_tensorflow",
+    ],
+)
+
+tf_python_pybind_extension(
+    name = "_pywrap_tfe",
+    srcs = ["tfe_wrapper.cc"],
+    hdrs = [
+        "lib/core/safe_ptr.h",
+        "util/util.h",
+        ":py_exception_registry_hdr",
+        "//tensorflow/c:headers",
+        "//tensorflow/c:pywrap_eager_hdrs",
+        "//tensorflow/c/eager:headers",
+        "//tensorflow/c/eager:pywrap_eager_hdrs",
+        "//tensorflow/core/common_runtime/eager:pywrap_eager_hdrs",
+        "//tensorflow/core/distributed_runtime:pywrap_eager_hdrs",
+        "//tensorflow/core/distributed_runtime/eager:pywrap_eager_hdrs",
+        "//tensorflow/core/framework:pywrap_eager_hdrs",
+        "//tensorflow/core/profiler/internal:pywrap_eager_hdrs",
+        "//tensorflow/core/profiler/lib:pywrap_eager_hdrs",
+        "//tensorflow/python/eager:pywrap_eager_hdrs",
+    ],
+    module_name = "_pywrap_tfe",
+    deps = [
+        ":pybind11_lib",
+        ":pybind11_status",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/hash",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+        "@pybind11",
+        "//third_party/python_runtime:headers",
+        "//tensorflow/core:core_cpu_headers_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/platform:platform",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+    ] + if_static(
+        extra_deps = [
+            "//tensorflow/core:eager_service_proto_cc",
+            "//tensorflow/core:master_proto_cc",
+            "//tensorflow/core:worker_proto_cc",
+        ],
+        otherwise = [
+            "//tensorflow/core:eager_service_proto_cc_headers_only",
+            "//tensorflow/core:master_proto_cc_headers_only",
+            "//tensorflow/core:worker_proto_cc_headers_only",
+        ],
+    ),
+)
+
 tf_python_pybind_extension(
     name = "_pywrap_graph_analyzer",
     srcs = ["grappler/graph_analyzer_tool_wrapper.cc"],
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index 7ba4d4278fc..97eb7111fa5 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -21,6 +21,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+
 import tensorflow as tf
 """
 
diff --git a/tensorflow/python/autograph/converters/BUILD b/tensorflow/python/autograph/converters/BUILD
index 7fe43cf33ed..3bd51ce2eb4 100644
--- a/tensorflow/python/autograph/converters/BUILD
+++ b/tensorflow/python/autograph/converters/BUILD
@@ -26,6 +26,7 @@ py_library(
         "conditional_expressions.py",
         "continue_statements.py",
         "control_flow.py",
+        "control_flow_deprecated_py2.py",
         "directives.py",
         "function_scopes.py",
         "list_comprehensions.py",
@@ -134,6 +135,8 @@ py_test(
         ":converters",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python/autograph/core:test_lib",
+        # TOODO(b/145618471): Remove this transitive dependency.
+        "//tensorflow/python/distribute:input_lib",
     ],
 )
 
diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index 5bf488cd209..8cc4ecc1c0a 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py
@@ -21,11 +21,13 @@ from __future__ import print_function
 import gast
 
 from tensorflow.python.autograph.core import converter
+from tensorflow.python.autograph.lang import directives
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import ast_util
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import templates
 from tensorflow.python.autograph.pyct.static_analysis import annos
+from tensorflow.python.autograph.utils import compat_util
 
 
 # TODO(mdan): Refactor functions to make them smaller.
@@ -151,6 +153,20 @@ class ControlFlowTransformer(converter.Base):
 
     return node
 
+  def _create_loop_options(self, node):
+    if not anno.hasanno(node, anno.Basic.DIRECTIVES):
+      return gast.Dict([], [])
+
+    loop_directives = anno.getanno(node, anno.Basic.DIRECTIVES)
+    if directives.set_loop_options not in loop_directives:
+      return gast.Dict([], [])
+
+    opts_dict = loop_directives[directives.set_loop_options]
+    str_keys, values = zip(*opts_dict.items())
+    keys = [gast.Str(s) for s in str_keys]
+    values = list(values)  # ast and gast don't play well with tuples.
+    return gast.Dict(keys, values)
+
   def _create_undefined_assigns(self, undefined_symbols):
     assignments = []
     for s in undefined_symbols:
@@ -383,8 +399,7 @@ class ControlFlowTransformer(converter.Base):
     composite_symbol_names = tuple(
         gast.Str(str(symbol)) for symbol in composite_loop_vars)
 
-    # TODO(b/140125096): Populate.
-    opts = gast.Dict([], [])
+    opts = self._create_loop_options(node)
 
     # TODO(mdan): Use a single template.
     # If the body and test functions took a single tuple for loop_vars, instead
@@ -507,8 +522,7 @@ class ControlFlowTransformer(converter.Base):
     composite_symbol_names = tuple(
         gast.Str(str(symbol)) for symbol in composite_loop_vars)
 
-    # TODO(b/140125096): Populate.
-    opts = gast.Dict([], [])
+    opts = self._create_loop_options(node)
 
     # TODO(mdan): Use a single template.
     # If the body and test functions took a single tuple for loop_vars, instead
@@ -591,3 +605,6 @@ class ControlFlowTransformer(converter.Base):
 def transform(node, ctx):
   node = ControlFlowTransformer(ctx).visit(node)
   return node
+
+
+compat_util.deprecated_py2_support(__name__)
diff --git a/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py b/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py
new file mode 100644
index 00000000000..a3159bac054
--- /dev/null
+++ b/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py
@@ -0,0 +1,609 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Handles control flow statements: while, for, if.
+
+Python 2 compatibility version. Not maintained.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.python.autograph.core import converter
+from tensorflow.python.autograph.lang import directives
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import ast_util
+from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import annos
+
+
+# TODO(mdan): Refactor functions to make them smaller.
+
+
+class ControlFlowTransformer(converter.Base):
+  """Transforms control flow structures like loops an conditionals."""
+
+  def _create_cond_branch(self, body_name, aliased_orig_names,
+                          aliased_new_names, body, returns):
+    if len(returns) == 1:
+      template = """
+        return retval
+      """
+      return_stmt = templates.replace(template, retval=returns[0])
+    else:
+      template = """
+        return (retvals,)
+      """
+      return_stmt = templates.replace(template, retvals=returns)
+
+    if aliased_orig_names:
+      template = """
+        def body_name():
+          aliased_new_names, = aliased_orig_names,
+          body
+          return_stmt
+      """
+      return templates.replace(
+          template,
+          body_name=body_name,
+          body=body,
+          aliased_orig_names=aliased_orig_names,
+          aliased_new_names=aliased_new_names,
+          return_stmt=return_stmt)
+    else:
+      template = """
+        def body_name():
+          body
+          return_stmt
+      """
+      return templates.replace(
+          template, body_name=body_name, body=body, return_stmt=return_stmt)
+
+  def _create_cond_expr(self, results, test, body_name, orelse_name,
+                        state_getter_name, state_setter_name,
+                        basic_symbol_names, composite_symbol_names):
+    if results is not None:
+      template = """
+        results = ag__.if_stmt(test, body_name, orelse_name,
+                               state_getter_name, state_setter_name,
+                               (basic_symbol_names,),
+                               (composite_symbol_names,))
+      """
+      return templates.replace(
+          template,
+          test=test,
+          results=results,
+          body_name=body_name,
+          orelse_name=orelse_name,
+          state_getter_name=state_getter_name,
+          state_setter_name=state_setter_name,
+          basic_symbol_names=basic_symbol_names,
+          composite_symbol_names=composite_symbol_names)
+    else:
+      template = """
+        ag__.if_stmt(test, body_name, orelse_name, getter_name, setter_name,
+                     (basic_symbol_names,), (composite_symbol_names,))
+      """
+      return templates.replace(
+          template,
+          test=test,
+          body_name=body_name,
+          orelse_name=orelse_name,
+          getter_name=state_getter_name,
+          setter_name=state_setter_name,
+          basic_symbol_names=basic_symbol_names,
+          composite_symbol_names=composite_symbol_names)
+
+  def _fmt_symbols(self, symbol_set):
+    if not symbol_set:
+      return 'no variables'
+    return ', '.join(map(str, symbol_set))
+
+  def _determine_aliased_symbols(self, scope, node_defined_in, block):
+    if block:
+      block_live_in = set(anno.getanno(block[0], anno.Static.LIVE_VARS_IN))
+    else:
+      block_live_in = set()
+
+    modified_live = scope.modified & node_defined_in & block_live_in
+    # Composite symbols are handled elsewhere see _create_state_functions
+    return {s for s in modified_live if not s.is_composite()}
+
+  def _create_state_functions(self, composites, state_getter_name,
+                              state_setter_name):
+
+    if composites:
+      composite_tuple = tuple(composites)
+
+      template = """
+        def state_getter_name():
+          return composite_tuple,
+        def state_setter_name(vals):
+          composite_tuple, = vals
+      """
+      node = templates.replace(
+          template,
+          state_getter_name=state_getter_name,
+          state_setter_name=state_setter_name,
+          composite_tuple=composite_tuple)
+    else:
+      template = """
+        def state_getter_name():
+          return ()
+        def state_setter_name(_):
+          pass
+        """
+      node = templates.replace(
+          template,
+          state_getter_name=state_getter_name,
+          state_setter_name=state_setter_name)
+
+    return node
+
+  def _create_loop_options(self, node):
+    if not anno.hasanno(node, anno.Basic.DIRECTIVES):
+      return gast.Dict([], [])
+
+    loop_directives = anno.getanno(node, anno.Basic.DIRECTIVES)
+    if directives.set_loop_options not in loop_directives:
+      return gast.Dict([], [])
+
+    opts_dict = loop_directives[directives.set_loop_options]
+    str_keys, values = zip(*opts_dict.items())
+    keys = [gast.Str(s) for s in str_keys]
+    values = list(values)  # ast and gast don't play well with tuples.
+    return gast.Dict(keys, values)
+
+  def _create_undefined_assigns(self, undefined_symbols):
+    assignments = []
+    for s in undefined_symbols:
+      template = '''
+        var = ag__.Undefined(symbol_name)
+      '''
+      assignments += templates.replace(
+          template,
+          var=s,
+          symbol_name=gast.Str(s.ssf()))
+    return assignments
+
+  def visit_If(self, node):
+    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
+    orelse_scope = anno.getanno(node, annos.NodeAnno.ORELSE_SCOPE)
+    defined_in = anno.getanno(node, anno.Static.DEFINED_VARS_IN)
+    live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
+
+    # Note: this information needs to be extracted before the body conversion
+    # that happens in the call to generic_visit below, because the conversion
+    # generates nodes that lack static analysis annotations.
+    need_alias_in_body = self._determine_aliased_symbols(
+        body_scope, defined_in, node.body)
+    need_alias_in_orelse = self._determine_aliased_symbols(
+        orelse_scope, defined_in, node.orelse)
+
+    node = self.generic_visit(node)
+
+    modified_in_cond = body_scope.modified | orelse_scope.modified
+    returned_from_cond = set()
+    composites = set()
+    for s in modified_in_cond:
+      if s in live_out and not s.is_composite():
+        returned_from_cond.add(s)
+      if s.is_composite():
+        # Special treatment for compound objects, always return them.
+        # This allows special handling within the if_stmt itself.
+        # For example, in TensorFlow we need to restore the state of composite
+        # symbols to ensure that only effects from the executed branch are seen.
+        composites.add(s)
+
+    created_in_body = body_scope.modified & returned_from_cond - defined_in
+    created_in_orelse = orelse_scope.modified & returned_from_cond - defined_in
+
+    basic_created_in_body = tuple(
+        s for s in created_in_body if not s.is_composite())
+    basic_created_in_orelse = tuple(
+        s for s in created_in_orelse if not s.is_composite())
+
+    # These variables are defined only in a single branch. This is fine in
+    # Python so we pass them through. Another backend, e.g. Tensorflow, may need
+    # to handle these cases specially or throw an Error.
+    possibly_undefined = (set(basic_created_in_body) ^
+                          set(basic_created_in_orelse))
+
+    # Alias the closure variables inside the conditional functions, to allow
+    # the functions access to the respective variables.
+    # We will alias variables independently for body and orelse scope,
+    # because different branches might write different variables.
+    aliased_body_orig_names = tuple(need_alias_in_body)
+    aliased_orelse_orig_names = tuple(need_alias_in_orelse)
+    aliased_body_new_names = tuple(
+        self.ctx.namer.new_symbol(s.ssf(), body_scope.referenced)
+        for s in aliased_body_orig_names)
+    aliased_orelse_new_names = tuple(
+        self.ctx.namer.new_symbol(s.ssf(), orelse_scope.referenced)
+        for s in aliased_orelse_orig_names)
+
+    alias_body_map = dict(zip(aliased_body_orig_names, aliased_body_new_names))
+    alias_orelse_map = dict(
+        zip(aliased_orelse_orig_names, aliased_orelse_new_names))
+
+    node_body = ast_util.rename_symbols(node.body, alias_body_map)
+    node_orelse = ast_util.rename_symbols(node.orelse, alias_orelse_map)
+
+    cond_var_name = self.ctx.namer.new_symbol('cond', body_scope.referenced)
+    body_name = self.ctx.namer.new_symbol('if_true', body_scope.referenced)
+    orelse_name = self.ctx.namer.new_symbol('if_false', orelse_scope.referenced)
+    all_referenced = body_scope.referenced | orelse_scope.referenced
+    state_getter_name = self.ctx.namer.new_symbol('get_state', all_referenced)
+    state_setter_name = self.ctx.namer.new_symbol('set_state', all_referenced)
+
+    returned_from_cond = tuple(returned_from_cond)
+    composites = tuple(composites)
+
+    if returned_from_cond:
+      if len(returned_from_cond) == 1:
+        cond_results = returned_from_cond[0]
+      else:
+        cond_results = gast.Tuple([s.ast() for s in returned_from_cond], None)
+
+      returned_from_body = tuple(
+          alias_body_map[s] if s in need_alias_in_body else s
+          for s in returned_from_cond)
+      returned_from_orelse = tuple(
+          alias_orelse_map[s] if s in need_alias_in_orelse else s
+          for s in returned_from_cond)
+
+    else:
+      # When the cond would return no value, we leave the cond called without
+      # results. That in turn should trigger the side effect guards. The
+      # branch functions will return a dummy value that ensures cond
+      # actually has some return value as well.
+      cond_results = None
+      # TODO(mdan): Replace with None once side_effect_guards is retired.
+      returned_from_body = (templates.replace_as_expression(
+          'ag__.match_staging_level(1, cond_var_name)',
+          cond_var_name=cond_var_name),)
+      returned_from_orelse = (templates.replace_as_expression(
+          'ag__.match_staging_level(1, cond_var_name)',
+          cond_var_name=cond_var_name),)
+
+    cond_assign = self.create_assignment(cond_var_name, node.test)
+    body_def = self._create_cond_branch(
+        body_name,
+        aliased_orig_names=aliased_body_orig_names,
+        aliased_new_names=aliased_body_new_names,
+        body=node_body,
+        returns=returned_from_body)
+    orelse_def = self._create_cond_branch(
+        orelse_name,
+        aliased_orig_names=aliased_orelse_orig_names,
+        aliased_new_names=aliased_orelse_new_names,
+        body=node_orelse,
+        returns=returned_from_orelse)
+    undefined_assigns = self._create_undefined_assigns(possibly_undefined)
+    composite_defs = self._create_state_functions(
+        composites, state_getter_name, state_setter_name)
+
+    basic_symbol_names = tuple(
+        gast.Str(str(symbol)) for symbol in returned_from_cond)
+    composite_symbol_names = tuple(
+        gast.Str(str(symbol)) for symbol in composites)
+
+    cond_expr = self._create_cond_expr(cond_results, cond_var_name, body_name,
+                                       orelse_name, state_getter_name,
+                                       state_setter_name, basic_symbol_names,
+                                       composite_symbol_names)
+
+    if_ast = (
+        undefined_assigns + composite_defs + body_def + orelse_def +
+        cond_assign + cond_expr)
+    return if_ast
+
+  def _get_basic_loop_vars(self, modified_symbols, live_in, live_out):
+    # The loop variables corresponding to simple symbols (e.g. `x`).
+    basic_loop_vars = []
+    for s in modified_symbols:
+      if s.is_composite():
+        # TODO(mdan): Raise an error when this happens for a TF loop.
+        continue
+      # Variables not live into or out of the loop are considered local to the
+      # loop.
+      if s not in live_in and s not in live_out:
+        continue
+      basic_loop_vars.append(s)
+    return frozenset(basic_loop_vars)
+
+  def _get_composite_loop_vars(self, modified_symbols, live_in):
+    # The loop variables corresponding to composite symbols (e.g. `self.x`).
+    composite_loop_vars = []
+    for s in modified_symbols:
+      if not s.is_composite():
+        continue
+      # Mutations made to objects created inside the loop will appear as writes
+      # to composite symbols. Because these mutations appear as modifications
+      # made to composite symbols, we check whether the composite's parent is
+      # actually live into the loop.
+      # Example:
+      #   while cond:
+      #     x = Foo()
+      #     x.foo = 2 * x.foo  # x.foo is live into the loop, but x is not.
+      #
+      # Note that some parents might not be symbols - for example, in x['foo'],
+      # 'foo' is a parent, but it's a literal, not a symbol. We don't check the
+      # liveness of literals.
+      support_set_symbols = tuple(
+          sss for sss in s.support_set if sss.is_symbol())
+      if not all(sss in live_in for sss in support_set_symbols):
+        continue
+      composite_loop_vars.append(s)
+    return frozenset(composite_loop_vars)
+
+  def _get_loop_vars(self, node, modified_symbols):
+    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
+    defined_in = anno.getanno(node, anno.Static.DEFINED_VARS_IN)
+    live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN)
+    live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
+    reserved_symbols = body_scope.referenced
+
+    basic_loop_vars = self._get_basic_loop_vars(
+        modified_symbols, live_in, live_out)
+    composite_loop_vars = self._get_composite_loop_vars(
+        modified_symbols, live_in)
+
+    # Variable that are used or defined inside the loop, but not defined
+    # before entering the loop. Only simple variables must be defined. The
+    # composite ones will be implicitly checked at runtime.
+    undefined_lives = basic_loop_vars - defined_in
+
+    return (basic_loop_vars, composite_loop_vars, reserved_symbols,
+            undefined_lives)
+
+  def _loop_var_constructs(self, basic_loop_vars):
+    loop_vars = tuple(basic_loop_vars)
+    loop_vars_ast_tuple = gast.Tuple([n.ast() for n in loop_vars], None)
+
+    if len(loop_vars) == 1:
+      loop_vars = loop_vars[0]
+
+    return loop_vars, loop_vars_ast_tuple
+
+  def visit_While(self, node):
+    node = self.generic_visit(node)
+
+    (basic_loop_vars, composite_loop_vars, reserved_symbols,
+     possibly_undefs) = self._get_loop_vars(
+         node,
+         anno.getanno(node, annos.NodeAnno.BODY_SCOPE).modified)
+    loop_vars, loop_vars_ast_tuple = self._loop_var_constructs(
+        basic_loop_vars)
+
+    state_getter_name = self.ctx.namer.new_symbol('get_state', reserved_symbols)
+    state_setter_name = self.ctx.namer.new_symbol('set_state', reserved_symbols)
+    state_functions = self._create_state_functions(
+        composite_loop_vars, state_getter_name, state_setter_name)
+
+    basic_symbol_names = tuple(
+        gast.Str(str(symbol)) for symbol in basic_loop_vars)
+    composite_symbol_names = tuple(
+        gast.Str(str(symbol)) for symbol in composite_loop_vars)
+
+    opts = self._create_loop_options(node)
+
+    # TODO(mdan): Use a single template.
+    # If the body and test functions took a single tuple for loop_vars, instead
+    # of *loop_vars, then a single template could be used.
+    if loop_vars:
+      template = """
+        state_functions
+        def body_name(loop_vars):
+          body
+          return loop_vars,
+        def test_name(loop_vars):
+          return test
+        loop_vars_ast_tuple = ag__.while_stmt(
+            test_name,
+            body_name,
+            state_getter_name,
+            state_setter_name,
+            (loop_vars,),
+            (basic_symbol_names,),
+            (composite_symbol_names,),
+            opts)
+      """
+      node = templates.replace(
+          template,
+          loop_vars=loop_vars,
+          loop_vars_ast_tuple=loop_vars_ast_tuple,
+          test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols),
+          test=node.test,
+          body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
+          body=node.body,
+          state_functions=state_functions,
+          state_getter_name=state_getter_name,
+          state_setter_name=state_setter_name,
+          basic_symbol_names=basic_symbol_names,
+          composite_symbol_names=composite_symbol_names,
+          opts=opts)
+    else:
+      template = """
+        state_functions
+        def body_name():
+          body
+          return ()
+        def test_name():
+          return test
+        ag__.while_stmt(
+            test_name,
+            body_name,
+            state_getter_name,
+            state_setter_name,
+            (),
+            (),
+            (composite_symbol_names,),
+            opts)
+      """
+      node = templates.replace(
+          template,
+          test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols),
+          test=node.test,
+          body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
+          body=node.body,
+          state_functions=state_functions,
+          state_getter_name=state_getter_name,
+          state_setter_name=state_setter_name,
+          composite_symbol_names=composite_symbol_names,
+          opts=opts)
+
+    undefined_assigns = self._create_undefined_assigns(possibly_undefs)
+    return undefined_assigns + node
+
+  def visit_For(self, node):
+    node = self.generic_visit(node)
+
+    (basic_loop_vars, composite_loop_vars,
+     reserved_symbols, possibly_undefs) = self._get_loop_vars(
+         node, (anno.getanno(node, annos.NodeAnno.BODY_SCOPE).modified
+                | anno.getanno(node, annos.NodeAnno.ITERATE_SCOPE).modified))
+    loop_vars, loop_vars_ast_tuple = self._loop_var_constructs(
+        basic_loop_vars)
+    body_name = self.ctx.namer.new_symbol('loop_body', reserved_symbols)
+
+    state_getter_name = self.ctx.namer.new_symbol('get_state', reserved_symbols)
+    state_setter_name = self.ctx.namer.new_symbol('set_state', reserved_symbols)
+    state_functions = self._create_state_functions(
+        composite_loop_vars, state_getter_name, state_setter_name)
+
+    if anno.hasanno(node, 'extra_test'):
+      extra_test = anno.getanno(node, 'extra_test')
+      extra_test_name = self.ctx.namer.new_symbol(
+          'extra_test', reserved_symbols)
+      template = """
+        def extra_test_name(loop_vars):
+          return extra_test_expr
+      """
+      extra_test_function = templates.replace(
+          template,
+          extra_test_name=extra_test_name,
+          loop_vars=loop_vars,
+          extra_test_expr=extra_test)
+    else:
+      extra_test_name = parser.parse_expression('None')
+      extra_test_function = []
+
+    # Workaround for PEP-3113
+    # iterates_var holds a single variable with the iterates, which may be a
+    # tuple.
+    iterates_var_name = self.ctx.namer.new_symbol(
+        'iterates', reserved_symbols)
+    template = """
+      iterates = iterates_var_name
+    """
+    iterate_expansion = templates.replace(
+        template,
+        iterates=node.target,
+        iterates_var_name=iterates_var_name)
+
+    undefined_assigns = self._create_undefined_assigns(possibly_undefs)
+
+    basic_symbol_names = tuple(
+        gast.Str(str(symbol)) for symbol in basic_loop_vars)
+    composite_symbol_names = tuple(
+        gast.Str(str(symbol)) for symbol in composite_loop_vars)
+
+    opts = self._create_loop_options(node)
+
+    # TODO(mdan): Use a single template.
+    # If the body and test functions took a single tuple for loop_vars, instead
+    # of *loop_vars, then a single template could be used.
+    if loop_vars:
+      template = """
+        undefined_assigns
+        state_functions
+        def body_name(iterates_var_name, loop_vars):
+          iterate_expansion
+          body
+          return loop_vars,
+        extra_test_function
+        loop_vars_ast_tuple = ag__.for_stmt(
+            iter_,
+            extra_test_name,
+            body_name,
+            state_getter_name,
+            state_setter_name,
+            (loop_vars,),
+            (basic_symbol_names,),
+            (composite_symbol_names,),
+            opts)
+      """
+      return templates.replace(
+          template,
+          undefined_assigns=undefined_assigns,
+          loop_vars=loop_vars,
+          loop_vars_ast_tuple=loop_vars_ast_tuple,
+          iter_=node.iter,
+          iterate_expansion=iterate_expansion,
+          iterates_var_name=iterates_var_name,
+          extra_test_name=extra_test_name,
+          extra_test_function=extra_test_function,
+          body_name=body_name,
+          body=node.body,
+          state_functions=state_functions,
+          state_getter_name=state_getter_name,
+          state_setter_name=state_setter_name,
+          basic_symbol_names=basic_symbol_names,
+          composite_symbol_names=composite_symbol_names,
+          opts=opts)
+    else:
+      template = """
+        undefined_assigns
+        state_functions
+        def body_name(iterates_var_name):
+          iterate_expansion
+          body
+          return ()
+        extra_test_function
+        ag__.for_stmt(
+            iter_,
+            extra_test_name,
+            body_name,
+            state_getter_name,
+            state_setter_name,
+            (),
+            (),
+            (composite_symbol_names,),
+            opts)
+      """
+      return templates.replace(
+          template,
+          undefined_assigns=undefined_assigns,
+          iter_=node.iter,
+          iterate_expansion=iterate_expansion,
+          iterates_var_name=iterates_var_name,
+          extra_test_name=extra_test_name,
+          extra_test_function=extra_test_function,
+          body_name=body_name,
+          body=node.body,
+          state_functions=state_functions,
+          state_getter_name=state_getter_name,
+          state_setter_name=state_setter_name,
+          composite_symbol_names=composite_symbol_names,
+          opts=opts)
+
+
+def transform(node, ctx):
+  node = ControlFlowTransformer(ctx).visit(node)
+  return node
diff --git a/tensorflow/python/autograph/core/BUILD b/tensorflow/python/autograph/core/BUILD
index 1b441211205..d81723cf04c 100644
--- a/tensorflow/python/autograph/core/BUILD
+++ b/tensorflow/python/autograph/core/BUILD
@@ -47,6 +47,7 @@ py_library(
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":core",
+        "//tensorflow/python/autograph/lang",
         "//tensorflow/python/autograph/operators",
         "//tensorflow/python/autograph/pyct",
         "//tensorflow/python/autograph/pyct/static_analysis",
diff --git a/tensorflow/python/autograph/core/config.py b/tensorflow/python/autograph/core/config.py
index b336ea771d3..dae441b9937 100644
--- a/tensorflow/python/autograph/core/config.py
+++ b/tensorflow/python/autograph/core/config.py
@@ -48,6 +48,7 @@ CONVERSION_RULES = (
 
     # Known libraries
     DoNotConvert('numpy'),
+    DoNotConvert('pandas'),
     DoNotConvert('tensorflow'),
     DoNotConvert('PIL'),
 
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index dbcdf4333c6..9e976b3a9ca 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -30,6 +30,7 @@ import textwrap
 import traceback
 
 # pylint:disable=g-bad-import-order
+
 import six
 # pylint:enable=g-bad-import-order
 
diff --git a/tensorflow/python/autograph/lang/directives.py b/tensorflow/python/autograph/lang/directives.py
index 5373a7cd187..26b5ffa97ac 100644
--- a/tensorflow/python/autograph/lang/directives.py
+++ b/tensorflow/python/autograph/lang/directives.py
@@ -26,7 +26,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.util.tf_export import tf_export
-from tensorflow.tools.docs.doc_controls import do_not_generate_docs
 
 UNSPECIFIED = object()
 
@@ -47,37 +46,53 @@ def set_element_type(entity, dtype, shape=UNSPECIFIED):
   del shape
 
 
-# TODO(b/140125096): Implement.
-@do_not_generate_docs
 @tf_export('autograph.experimental.set_loop_options')
 def set_loop_options(
     parallel_iterations=UNSPECIFIED,
-    back_prop=UNSPECIFIED,
     swap_memory=UNSPECIFIED,
-    maximum_iterations=UNSPECIFIED):
+    maximum_iterations=UNSPECIFIED,
+    shape_invariants=UNSPECIFIED):
   """Specifies additional arguments to be passed to the enclosing while_loop.
 
   The parameters apply to and only to the immediately enclosing loop. It only
   has effect if the loop is staged as a TF while_loop; otherwise the parameters
   have no effect.
 
-  Usage example:
+  Usage:
 
-    @tf.function(autograph=True)
-    def dynamic_rnn(..., parallel_iterations=32):
-      num_steps = ...
-      for t in tf.range(num_steps):
-        tf.autograph.experimental.set_loop_options(
-            parallel_iterations=parallel_iterations)
-        ...
+    >>> @tf.function(autograph=True)
+    ... def f():
+    ...   n = 0
+    ...   for i in tf.range(10):
+    ...     tf.autograph.experimental.set_loop_options(maximum_iterations=3)
+    ...     n += 1
+    ...   return n
+
+    >>> @tf.function(autograph=True)
+    ... def f():
+    ...   v = tf.constant((0,))
+    ...   for i in tf.range(3):
+    ...     tf.autograph.experimental.set_loop_options(
+    ...         shape_invariants=[(v, tf.TensorShape([None]))]
+    ...     )
+    ...     v = tf.concat((v, [i]), 0)
+    ...   return v
+
+  Also see tf.while_loop.
 
   Args:
-    parallel_iterations: See tf.while_loop.
-    back_prop: See tf.while_loop.
-    swap_memory: See tf.while_loop.
-    maximum_iterations: See tf.while_loop.
+    parallel_iterations: The maximum number of iterations allowed to run in
+        parallel at any given time. Note that this does not guarantee parallel
+        execution.
+    swap_memory: Whether to store intermediate values needed for
+        gradients on the CPU instead of GPU.
+    maximum_iterations: Allows limiting the total number of iterations executed
+        by the loop.
+    shape_invariants: Allows controlling the argument with the same name passed
+        to tf.while_loop. Unlike tf.while_loop, this is a list of
+        `(tensor, shape)` pairs.
   """
   del parallel_iterations
-  del back_prop
   del swap_memory
   del maximum_iterations
+  del shape_invariants
diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD
index fd92a32967a..c6e13789816 100644
--- a/tensorflow/python/autograph/operators/BUILD
+++ b/tensorflow/python/autograph/operators/BUILD
@@ -23,6 +23,7 @@ py_library(
     srcs = [
         "__init__.py",
         "control_flow.py",
+        "control_flow_deprecated_py2.py",
         "data_structures.py",
         "exceptions.py",
         "logical.py",
@@ -73,6 +74,8 @@ py_test(
     deps = [
         ":operators",
         "//tensorflow/python:client_testlib",
+        # TODO(b/145618471): Remove this transitive dependency.
+        "//tensorflow/python/distribute:input_lib",
     ],
 )
 
@@ -108,6 +111,8 @@ py_test(
         ":operators",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python/autograph/core",
+        # TODO(b/145618471): Remove this transitive dependency.
+        "//tensorflow/python/distribute:input_lib",
     ],
 )
 
diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py
index c862379e1d0..972f59e2e42 100644
--- a/tensorflow/python/autograph/operators/control_flow.py
+++ b/tensorflow/python/autograph/operators/control_flow.py
@@ -60,11 +60,13 @@ from __future__ import division
 from __future__ import print_function
 
 import functools
+
 import numpy as np
 
 from tensorflow.python.autograph.operators import py_builtins
 from tensorflow.python.autograph.operators import special_values
 from tensorflow.python.autograph.utils import ag_logging
+from tensorflow.python.autograph.utils import compat_util
 from tensorflow.python.autograph.utils import misc
 from tensorflow.python.autograph.utils import tensors
 from tensorflow.python.data.experimental.ops import scan_ops
@@ -78,13 +80,19 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.util import lazy_loader
 from tensorflow.python.util import nest
 
 
+# TODO(b/145618471): Remove this dependency.
+# Lazy import to work around circular dependencies
+input_lib = lazy_loader.LazyLoader(
+    'input_lib', globals(),
+    'tensorflow.python.distribute.input_lib')
+
 LIMIT_PYTHON_ITERATIONS = True
 PYTHON_MAX_ITERATIONS = 100000000  # Fails in about one minute for empty loops.
 WARN_INEFFICIENT_UNROLL = True
@@ -125,68 +133,91 @@ def _is_subshape(left, right):
   return True
 
 
-def _verify_single_loop_var(name, check_shape, init_loop_var, first_iter_var):
-  """Verifies whether init_loop_var and first_iter_var are consistent."""
-  if isinstance(init_loop_var, (bool, int, float, str)):
-    init_loop_var = ops.convert_to_tensor_v2(init_loop_var)
+# TODO(mdan): Remove these verifications once TF ops can properly report names.
+def _verify_single_loop_var(
+    name, check_shape, init, entry, exit_, shape_invariant):
+  """Verifies whether the initial, entry and exit values are consistent."""
+  if isinstance(init, (bool, int, float, str, np.ndarray)):
+    init = ops.convert_to_tensor_v2(init)
+  if isinstance(entry, (bool, int, float, str, np.ndarray)):
+    entry = ops.convert_to_tensor_v2(entry)
+  if isinstance(exit_, (bool, int, float, str)):
+    exit_ = ops.convert_to_tensor_v2(exit_)
 
-  if isinstance(first_iter_var, (bool, int, float, str)):
-    first_iter_var = ops.convert_to_tensor_v2(first_iter_var)
-
-  if (not tensor_util.is_tensor(init_loop_var) or
-      not tensor_util.is_tensor(first_iter_var)):
+  if (not tensor_util.is_tensor(entry) or
+      not tensor_util.is_tensor(exit_)):
     return
 
   # TODO(mdan): Properly account for CompositeTensors.
-  if (not hasattr(init_loop_var, 'dtype') or
-      not hasattr(first_iter_var, 'dtype')):
+  if (not hasattr(entry, 'dtype') or
+      not hasattr(exit_, 'dtype')):
     return
-  if (not hasattr(init_loop_var, 'shape') or
-      not hasattr(first_iter_var, 'shape')):
+  if (not hasattr(entry, 'shape') or
+      not hasattr(exit_, 'shape')):
     return
 
-  if init_loop_var.dtype != first_iter_var.dtype:
+  if entry.dtype != exit_.dtype:
     raise TypeError(
         '"{}" has dtype {} before the loop, but dtype {} after one'
         ' iteration. TensorFlow control flow requires it stays the'
         ' same.'.format(
             name,
-            init_loop_var.dtype.name,
-            first_iter_var.dtype.name,
+            entry.dtype.name,
+            exit_.dtype.name,
         ))
-
   if check_shape:
-    init_shape = init_loop_var.shape
-    first_iter_shape = first_iter_var.shape
-    # TODO(b/135183013): Update needed once we support shape_invariants.
-    if not _is_subshape(first_iter_shape, init_shape):
-      raise ValueError(
-          '"{}" has shape {} before the loop, but shape {} after one'
-          ' iteration. TensorFlow control flow requires it stays the'
-          ' same or be more specific.'.format(name, init_shape,
-                                              first_iter_shape))
+    exit_shape = exit_.shape
+    if shape_invariant is None:
+      entry_shape = entry.shape
+      if not _is_subshape(exit_shape, entry_shape):
+        raise ValueError(
+            '"{}" has shape {} before the loop, but shape {} after one'
+            ' iteration. Use tf.autograph.experimental.set_loop_options to set'
+            ' shape invariants.'.format(name, entry_shape, exit_shape))
+    else:
+      init_shape = init.shape
+      if not _is_subshape(init_shape, shape_invariant):
+        raise ValueError(
+            '"{}" has shape {} before the loop, which does not conform with'
+            ' the shape invariant {}.'.format(name, init_shape,
+                                              shape_invariant))
+      if not _is_subshape(exit_shape, shape_invariant):
+        raise ValueError(
+            '"{}" has shape {} after the loop, which does not conform with'
+            ' the shape invariant {}.'.format(
+                name, exit_shape, shape_invariant))
 
 
-def _verify_tf_loop_vars(init_loop_vars,
-                         first_iter_vars,
+def _verify_tf_loop_vars(init_vars,
+                         iter_entry_vars,
+                         iter_exit_vars,
                          symbol_names,
                          opts,
                          check_shapes=True):
   """Verifies loop variables for consistency."""
-  # TODO(b/140125096): Use this.
-  del opts
+  if check_shapes and 'shape_invariants' in opts:
+    shape_invariants = opts['shape_invariants']
+  else:
+    shape_invariants = nest.map_structure(lambda _: None, iter_entry_vars)
 
-  named_vars = zip(symbol_names, init_loop_vars, first_iter_vars)
-  for name, init_loop_var, first_iter_var in named_vars:
+  named_vars = zip(symbol_names, init_vars, iter_entry_vars, iter_exit_vars,
+                   shape_invariants)
+  for name, init, entry, exit_, invariant in named_vars:
     try:
-      nest.assert_same_structure(
-          init_loop_var, first_iter_var, expand_composites=True)
+      nest.assert_same_structure(entry, exit_, expand_composites=True)
     except (ValueError, TypeError) as e:
       raise TypeError('"{}" does not have the same nested structure after one'
                       ' iteration.\n\n{}'.format(name, e))
+    if invariant is not None:
+      try:
+        nest.assert_same_structure(init, invariant, expand_composites=False)
+      except (ValueError, TypeError) as e:
+        raise TypeError('"{}" does not have the same nested structure as its'
+                        ' corresponding shape invariant.\n\n{}'.format(name, e))
+
     nest.map_structure(
-        functools.partial(_verify_single_loop_var, name, check_shapes),
-        init_loop_var, first_iter_var)
+        functools.partial(_verify_single_loop_var, name, check_shapes), init,
+        entry, exit_, invariant)
 
 
 def _verify_single_cond_var(name, body_var, orelse_var):
@@ -319,6 +350,11 @@ def for_stmt(iter_,
                                init_vars, basic_symbol_names,
                                composite_symbol_names, opts)
 
+  if isinstance(iter_, input_lib.DistributedIterator):
+    raise NotImplementedError(
+        'distributed iterators not supported yet, use the distributed dataset'
+        ' directly')
+
   # Note: This experimental interface is subject to change.
   custom_handler = getattr(iter_, '_autograph_for_loop', None)
   if custom_handler is not None:
@@ -333,12 +369,19 @@ def for_stmt(iter_,
 def _py_for_stmt(iter_, extra_test, body, get_state, set_state, init_vars):
   """Overload of for_stmt that executes a Python for loop."""
   del get_state, set_state
-
   state = init_vars
-  for target in iter_:
-    if extra_test is not None and not extra_test(*state):
-      break
-    state = body(target, *state)
+
+  if extra_test is not None:
+    if extra_test(*state):
+      for target in iter_:
+        state = body(target, *state)
+        if not extra_test(*state):
+          break
+
+  else:
+    for target in iter_:
+      state = body(target, *state)
+
   return state
 
 
@@ -379,9 +422,7 @@ def _known_len_tf_for_stmt(iter_,
                                    lambda: False)
     return iterate_index < n
 
-  # TODO(b/134181679): Let the op itself handle optimizations.
-  if control_flow_util.GraphOrParentsInXlaContext(ops.get_default_graph()):
-    opts['maximum_iterations'] = n
+  opts['maximum_iterations'] = n
 
   results = _tf_while_stmt(
       while_cond,
@@ -425,6 +466,8 @@ def _tf_ragged_for_stmt(iter_,
   else:
     n = iter_.row_lengths()[0]
 
+  opts['maximum_iterations'] = n
+
   def while_body(iterate_index, *loop_vars):
     """Main loop body."""
     iterate = iter_[iterate_index]
@@ -493,26 +536,9 @@ def _tf_range_for_stmt(iter_,
 
   def while_cond(iterate, *loop_vars):
     """Cond function for `tf.while_loop`."""
-
-    def build_main_test():
-      """Main iteration condition."""
-      # TODO(b/138857806): The optimizer should handle this.
-      # LogicalAnd is slow on GPU so we avoid adding it if `delta` is a
-      # compile time constant.
-      delta_const = tensor_util.constant_value(delta)
-      if delta_const is not None:
-        # Support single element arrays.
-        delta_const = np.asscalar(delta_const)
-        if delta_const >= 0:
-          return iterate < limit
-        else:
-          return iterate > limit
-      else:
-        return math_ops.logical_or(
-            math_ops.logical_and(delta >= 0, iterate < limit),
-            math_ops.logical_and(delta < 0, iterate > limit))
-
-    main_test = build_main_test()
+    main_test = math_ops.logical_or(
+        math_ops.logical_and(delta >= 0, iterate < limit),
+        math_ops.logical_and(delta < 0, iterate > limit))
     if extra_test is not None:
       return control_flow_ops.cond(
           main_test,
@@ -521,11 +547,8 @@ def _tf_range_for_stmt(iter_,
       )
     return main_test
 
-  # TODO(b/134181679): The op should handle this optimizations.
-  if control_flow_util.GraphOrParentsInXlaContext(ops.get_default_graph()):
-    # This specific dtype is required by while_loop.
-    opts['maximum_iterations'] = math_ops.cast(
-        misc.get_range_len(start, limit, delta), dtypes.int32)
+  opts['maximum_iterations'] = math_ops.cast(
+      misc.get_range_len(start, limit, delta), dtypes.int32)
 
   results = _tf_while_stmt(
       while_cond,
@@ -566,7 +589,7 @@ def _tf_iterator_for_stmt(itr, extra_test, body, get_state, set_state,
     # Note: this verification duplicates that perfrmed in tf_while_stmt,
     # but needs to be done earlier to prevent the tf.cond inside while_body
     # from blowing up first.
-    _verify_tf_loop_vars(loop_vars, new_vars,
+    _verify_tf_loop_vars(init_vars, loop_vars, new_vars,
                          basic_symbol_names + composite_symbol_names, opts)
     return new_vars
 
@@ -653,20 +676,26 @@ def _dataset_for_stmt_with_extra_test(ds, extra_test, body, get_state,
 
   # TODO(mdan): Simplify this - following it is extremely difficult.
 
+  init_state = get_state()
+  aug_init_vars = init_vars, init_state
+
   def scan_body(aug_vars, iterate):
     """The main loop body wrapper. Only calculates the stop condition."""
     loop_vars, state = aug_vars
 
     def true_fn():
+      """Main path - stop condition is not set."""
       set_state(state)
-      outputs = body(iterate, *loop_vars)
+      new_vars = body(iterate, *loop_vars)
+      new_state = get_state()
       _verify_tf_loop_vars(
+          init_vars + init_state,
           loop_vars + state,
-          outputs + state,
+          new_vars + new_state,
           basic_symbol_names + composite_symbol_names,
           opts,
           check_shapes=False)
-      return outputs, get_state()
+      return new_vars, new_state
 
     extra_cond = extra_test(*loop_vars)
     new_vars, new_state = control_flow_ops.cond(
@@ -690,11 +719,9 @@ def _dataset_for_stmt_with_extra_test(ds, extra_test, body, get_state,
     del extra_cond
     return output_aug_vars, output_state
 
-  init_state = get_state()
-  aug_vars = init_vars, init_state
-  ds = _general_purpose_scan(ds, aug_vars, scan_body)
+  ds = _general_purpose_scan(ds, aug_init_vars, scan_body)
   ds = ds.apply(take_while_ops.take_while(take_while_predicate))
-  final_aug_vars = ds.reduce(aug_vars, reduce_body)
+  final_aug_vars = ds.reduce(aug_init_vars, reduce_body)
   final_vars, final_state = final_aug_vars
   set_state(final_state)
   return final_vars
@@ -741,6 +768,7 @@ def _dataset_for_stmt_no_extra_test(ds, body, get_state, set_state, init_vars,
       new_state = get_state()
 
     _verify_tf_loop_vars(
+        init_vars + init_state,
         loop_vars + state,
         new_vars + new_state,
         symbol_names,
@@ -824,11 +852,23 @@ def while_stmt(test,
   return _py_while_stmt(test, body, get_state, set_state, init_vars, opts)
 
 
+def _shape_invariants_mapping_to_positional_list(mapping, keys):
+  # The keys are not expected to be hashable.
+  mapping = {id(k): (k, v) for k, v in mapping}
+  result = []
+  for k in keys:
+    map_key, map_val = mapping.get(id(k), (None, None))
+    result.append(map_val if map_key is k else None)
+  return tuple(result)
+
+
 def _tf_while_stmt(test, body, get_state, set_state, init_vars,
                    basic_symbol_names, composite_symbol_names, opts):
   """Overload of while_stmt that stages a TF while_stmt."""
   _disallow_undefs_into_loop(*init_vars)
 
+  aug_init_vars = init_vars + get_state()
+
   # TODO(mdan): Simplify this.
   loop_vars_slice = slice(len(init_vars))
   state_slice = slice(len(init_vars), None)
@@ -844,7 +884,7 @@ def _tf_while_stmt(test, body, get_state, set_state, init_vars,
     set_state(state)
     loop_vars = body(*aug_loop_vars[loop_vars_slice])
     new_state = loop_vars + get_state()
-    _verify_tf_loop_vars(aug_loop_vars, new_state,
+    _verify_tf_loop_vars(aug_init_vars, aug_loop_vars, new_state,
                          basic_symbol_names + composite_symbol_names, opts)
 
     return new_state
@@ -853,7 +893,10 @@ def _tf_while_stmt(test, body, get_state, set_state, init_vars,
   # This enforces consistency across versions.
   opts['return_same_structure'] = True
 
-  aug_init_vars = init_vars + get_state()
+  if 'shape_invariants' in opts:
+    opts['shape_invariants'] = _shape_invariants_mapping_to_positional_list(
+        opts['shape_invariants'], aug_init_vars)
+
   final_aug_vars = control_flow_ops.while_loop(aug_test, aug_body,
                                                aug_init_vars, **opts)
   final_state = final_aug_vars[state_slice]
@@ -1091,3 +1134,6 @@ def _wrap_disallow_undefs_from_cond(func, branch_name):
 def _py_if_stmt(cond, body, orelse):
   """Overload of if_stmt that executes a Python if statement."""
   return body() if cond else orelse()
+
+
+compat_util.deprecated_py2_support(__name__)
diff --git a/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py b/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py
new file mode 100644
index 00000000000..77117a8e2c8
--- /dev/null
+++ b/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py
@@ -0,0 +1,1137 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Control flow statements: loops, conditionals, etc.
+
+Python 2 compatibility version. Not maintained.
+
+Note: most of these operators accept pairs of get_state/set_state functions, to
+capture mutations that the corresponding code blocks might make. These
+mutations only need to be captured when staging the control flow, and they just
+work when reverting to Python behavior.
+
+__Examples__
+
+```
+while cond:
+  self.x += i
+```
+
+When the functionalized version is executed as a Python loop, it just works:
+
+```
+def loop_body():
+  self.x += i     # works as expected for Python loops
+```
+
+But it won't work for TF loops:
+
+```
+def loop_body():
+  self.x += i     # self.x has the wrong value!
+```
+
+get_state/set_state allow piping the mutations through the loop variables as
+well, in effect changing the loop body:
+
+```
+def loop_body(self_x):
+  self.x = self_x  # self.x now has the proper value
+  self.x += i      # the original block
+  self_x = self.x  # write self.x back into the loop vars
+  return self_x
+
+self_x = tf.while_loop(...)
+self.x = self_x    # the result is not properly captured
+```
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+
+import numpy as np
+
+from tensorflow.python.autograph.operators import py_builtins
+from tensorflow.python.autograph.operators import special_values
+from tensorflow.python.autograph.utils import ag_logging
+from tensorflow.python.autograph.utils import misc
+from tensorflow.python.autograph.utils import tensors
+from tensorflow.python.data.experimental.ops import scan_ops
+from tensorflow.python.data.experimental.ops import take_while_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import func_graph
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.util import lazy_loader
+from tensorflow.python.util import nest
+
+
+# TODO(b/145618471): Remove this dependency.
+# Lazy import to work around circular dependencies
+input_lib = lazy_loader.LazyLoader(
+    'input_lib', globals(),
+    'tensorflow.python.distribute.input_lib')
+
+LIMIT_PYTHON_ITERATIONS = True
+PYTHON_MAX_ITERATIONS = 100000000  # Fails in about one minute for empty loops.
+WARN_INEFFICIENT_UNROLL = True
+INEFFICIENT_UNROLL_MIN_ITERATIONS = 3000
+INEFFICIENT_UNROLL_MIN_OPS = 1
+
+
+def _disallow_undefs_into_loop(*values):
+  """Ensures that all values in the state are defined when entering a loop."""
+  undefined = tuple(filter(special_values.is_undefined, values))
+  if undefined:
+    raise ValueError(
+        'TensorFlow requires that the following symbols must be defined'
+        ' before the loop: {}'.format(tuple(s.symbol_name for s in undefined)))
+
+  for value in values:
+    if special_values.is_undefined_return(value):
+      # Assumption: the loop will only capture the variable which tracks the
+      # return value if the loop contained a return statement.
+      # TODO(mdan): This should be checked at the place where return occurs.
+      raise ValueError(
+          'return statements are not supported within a TensorFlow loop.')
+
+
+def _is_subshape(left, right):
+  """Returns True if left shape is at least as specific as right shape."""
+  # TODO(mdan): This code should be in TensorShape.
+  # Note: this is not the same as TensorShape.is_compatible_with, which is
+  # symmetric.
+  # This code also duplicates _ShapeLessThanOrEqual from  control_flow_ops.py.
+  if right.dims is None:
+    return True
+  if left.ndims != right.ndims:
+    return False
+  for ldim, rdim in zip(left.dims, right.dims):
+    if rdim.value is not None and ldim.value != rdim.value:
+      return False
+  return True
+
+
+# TODO(mdan): Remove these verifications once TF ops can properly report names.
+def _verify_single_loop_var(
+    name, check_shape, init, entry, exit_, shape_invariant):
+  """Verifies whether the initial, entry and exit values are consistent."""
+  if isinstance(init, (bool, int, float, str, np.ndarray)):
+    init = ops.convert_to_tensor_v2(init)
+  if isinstance(entry, (bool, int, float, str, np.ndarray)):
+    entry = ops.convert_to_tensor_v2(entry)
+  if isinstance(exit_, (bool, int, float, str)):
+    exit_ = ops.convert_to_tensor_v2(exit_)
+
+  if (not tensor_util.is_tensor(entry) or
+      not tensor_util.is_tensor(exit_)):
+    return
+
+  # TODO(mdan): Properly account for CompositeTensors.
+  if (not hasattr(entry, 'dtype') or
+      not hasattr(exit_, 'dtype')):
+    return
+  if (not hasattr(entry, 'shape') or
+      not hasattr(exit_, 'shape')):
+    return
+
+  if entry.dtype != exit_.dtype:
+    raise TypeError(
+        '"{}" has dtype {} before the loop, but dtype {} after one'
+        ' iteration. TensorFlow control flow requires it stays the'
+        ' same.'.format(
+            name,
+            entry.dtype.name,
+            exit_.dtype.name,
+        ))
+  if check_shape:
+    exit_shape = exit_.shape
+    if shape_invariant is None:
+      entry_shape = entry.shape
+      if not _is_subshape(exit_shape, entry_shape):
+        raise ValueError(
+            '"{}" has shape {} before the loop, but shape {} after one'
+            ' iteration. Use tf.autograph.experimental.set_loop_options to set'
+            ' shape invariants.'.format(name, entry_shape, exit_shape))
+    else:
+      init_shape = init.shape
+      if not _is_subshape(init_shape, shape_invariant):
+        raise ValueError(
+            '"{}" has shape {} before the loop, which does not conform with'
+            ' the shape invariant {}.'.format(name, init_shape,
+                                              shape_invariant))
+      if not _is_subshape(exit_shape, shape_invariant):
+        raise ValueError(
+            '"{}" has shape {} after the loop, which does not conform with'
+            ' the shape invariant {}.'.format(
+                name, exit_shape, shape_invariant))
+
+
+def _verify_tf_loop_vars(init_vars,
+                         iter_entry_vars,
+                         iter_exit_vars,
+                         symbol_names,
+                         opts,
+                         check_shapes=True):
+  """Verifies loop variables for consistency."""
+  if check_shapes and 'shape_invariants' in opts:
+    shape_invariants = opts['shape_invariants']
+  else:
+    shape_invariants = nest.map_structure(lambda _: None, iter_entry_vars)
+
+  named_vars = zip(symbol_names, init_vars, iter_entry_vars, iter_exit_vars,
+                   shape_invariants)
+  for name, init, entry, exit_, invariant in named_vars:
+    try:
+      nest.assert_same_structure(entry, exit_, expand_composites=True)
+    except (ValueError, TypeError) as e:
+      raise TypeError('"{}" does not have the same nested structure after one'
+                      ' iteration.\n\n{}'.format(name, e))
+    if invariant is not None:
+      try:
+        nest.assert_same_structure(init, invariant, expand_composites=False)
+      except (ValueError, TypeError) as e:
+        raise TypeError('"{}" does not have the same nested structure as its'
+                        ' corresponding shape invariant.\n\n{}'.format(name, e))
+
+    nest.map_structure(
+        functools.partial(_verify_single_loop_var, name, check_shapes), init,
+        entry, exit_, invariant)
+
+
+def _verify_single_cond_var(name, body_var, orelse_var):
+  """Verifies whether body_var and orelse_var are consistent."""
+  if isinstance(body_var, (bool, int, float, str)):
+    body_var = ops.convert_to_tensor_v2(body_var)
+
+  if isinstance(orelse_var, (bool, int, float, str)):
+    orelse_var = ops.convert_to_tensor_v2(orelse_var)
+
+  if (not tensor_util.is_tensor(body_var) or
+      not tensor_util.is_tensor(orelse_var)):
+    return
+
+  # TODO(mdan): Properly account for CompositeTensors.
+  if (not hasattr(body_var, 'dtype') or
+      not hasattr(orelse_var, 'dtype')):
+    return
+
+  if body_var.dtype != orelse_var.dtype:
+    raise TypeError(
+        '"{}" has dtype {} in the TRUE branch, but dtype={} in the FALSE'
+        ' branch. TensorFlow control flow requires that they are the'
+        ' same.'.format(name, body_var.dtype.name,
+                        orelse_var.dtype.name))
+
+
+def _verify_tf_cond_vars(body_vars, orelse_vars, symbol_names):
+  """Verifies variables manipulated by a conditional for consistency."""
+  basic_body_vars, composite_body_vars = body_vars
+  basic_orelse_vars, composite_orelse_vars = orelse_vars
+  assert isinstance(composite_body_vars, tuple)
+  assert isinstance(composite_orelse_vars, tuple)
+
+  # TODO(kkimlabs): Make this more consistent.
+  # The basic outputs should always be a tuple.
+  if not isinstance(basic_body_vars, tuple):
+    basic_body_vars = (basic_body_vars,)
+  if not isinstance(basic_orelse_vars, tuple):
+    basic_orelse_vars = (basic_orelse_vars,)
+
+  body_vars = basic_body_vars + composite_body_vars
+  orelse_vars = basic_orelse_vars + composite_orelse_vars
+
+  named_vars = zip(symbol_names, body_vars, orelse_vars)
+  for name, body_var, orelse_var in named_vars:
+    try:
+      nest.assert_same_structure(
+          body_var, orelse_var, expand_composites=True)
+    except (ValueError, TypeError) as e:
+      raise TypeError(
+          '"{}" does not have the same nested structure in the TRUE and FALSE'
+          ' branches.\n\n{}'.format(name, str(e)))
+
+    nest.map_structure(
+        functools.partial(_verify_single_cond_var, name), body_var, orelse_var)
+
+
+def for_stmt(iter_,
+             extra_test,
+             body,
+             get_state,
+             set_state,
+             init_vars,
+             basic_symbol_names,
+             composite_symbol_names,
+             opts):
+  """Functional form of a for statement.
+
+  The loop operates on a state, which includes all symbols that are
+  variant across loop iterations, excluding the iterate as well as the
+  variables local to the loop.
+
+  For example, given the loop below that calculates the geometric and
+  arithmetic means or some numbers:
+
+    geo_mean = 1
+    arith_mean = 0
+    for i in range(n):
+      a = numbers[i]
+      geo_mean *= a
+      arith_mean += a
+
+  The state is represented by the variables geo_mean and arith_mean. The
+  argument for initial_state may contain the tuple (1, 0), the body will
+  include the arguments geo_mean and arith_mean and will return a tuple
+  representing the new values for geo_mean and respectively arith_mean.
+
+  Args:
+    iter_: The entity being iterated over.
+    extra_test: Callable with the state as arguments, and boolean return type.
+      An additional loop condition.
+    body: Callable with the iterate and the state as arguments, and state as
+      return type. The actual loop body.
+    get_state: Additional callable which can capture additional state (such as
+      the values of composite symbols). This is only useful when staging the
+      loop.
+    set_state: Additional callable which save values captured by get_state back
+      into the Python environment. This is only useful when staging the loop.
+    init_vars: Tuple containing the initial state.
+    basic_symbol_names: Tuple containing basic loop var names.
+    composite_symbol_names: Tuple containing composite loop var names.
+    opts: Optional dict of extra loop parameters.
+
+  Returns:
+    Tuple containing the final state.
+  """
+  if tensor_util.is_tensor(iter_):
+    if tensors.is_range_tensor(iter_):
+      return _tf_range_for_stmt(iter_, extra_test, body, get_state, set_state,
+                                init_vars, basic_symbol_names,
+                                composite_symbol_names, opts)
+    else:
+      return _known_len_tf_for_stmt(iter_, extra_test, body, get_state,
+                                    set_state, init_vars, basic_symbol_names,
+                                    composite_symbol_names, opts)
+
+  if isinstance(iter_, dataset_ops.DatasetV2):
+    return _tf_dataset_for_stmt(iter_, extra_test, body, get_state, set_state,
+                                init_vars, basic_symbol_names,
+                                composite_symbol_names, opts)
+
+  if isinstance(iter_, iterator_ops.OwnedIterator):
+    return _tf_iterator_for_stmt(iter_, extra_test, body, get_state, set_state,
+                                 init_vars, basic_symbol_names,
+                                 composite_symbol_names, opts)
+
+  if isinstance(iter_, ragged_tensor.RaggedTensor):
+    return _tf_ragged_for_stmt(iter_, extra_test, body, get_state, set_state,
+                               init_vars, basic_symbol_names,
+                               composite_symbol_names, opts)
+
+  if isinstance(iter_, input_lib.DistributedIterator):
+    raise NotImplementedError(
+        'distributed iterators not supported yet, use the distributed dataset'
+        ' directly')
+
+  # Note: This experimental interface is subject to change.
+  custom_handler = getattr(iter_, '_autograph_for_loop', None)
+  if custom_handler is not None:
+    # TODO(mdan): TensorFlow-specific verification - handlers should perform it.
+    _disallow_undefs_into_loop(*init_vars)
+    # TODO(mdan): Enable get_state/set_state separately.
+    return custom_handler(extra_test, body, init_vars)
+
+  return _py_for_stmt(iter_, extra_test, body, get_state, set_state, init_vars)
+
+
+def _py_for_stmt(iter_, extra_test, body, get_state, set_state, init_vars):
+  """Overload of for_stmt that executes a Python for loop."""
+  del get_state, set_state
+  state = init_vars
+
+  if extra_test is not None:
+    if extra_test(*state):
+      for target in iter_:
+        state = body(target, *state)
+        if not extra_test(*state):
+          break
+
+  else:
+    for target in iter_:
+      state = body(target, *state)
+
+  return state
+
+
+def _known_len_tf_for_stmt(iter_,
+                           extra_test,
+                           body,
+                           get_state,
+                           set_state,
+                           init_vars,
+                           basic_symbol_names,
+                           composite_symbol_names,
+                           opts):
+  """Overload of for_stmt that iterates over TF entities that admit a length."""
+  _disallow_undefs_into_loop(*init_vars)
+
+  n = py_builtins.len_(iter_)
+  # TODO(b/117628877): Revisit performance once XLA has the necessary support.
+  # Note: using a TensorArray creates an extra copy, but can calculate
+  # gradients more efficiently than StridedSlice.
+  ta = tensor_array_ops.TensorArray(iter_.dtype, size=n)
+  iter_ = ta.unstack(iter_)
+
+  def while_body(iterate_index, *loop_vars):
+    """Main loop body."""
+    iterate = iter_.read(iterate_index)
+    new_vars = body(iterate, *loop_vars)
+
+    loop_vars = (iterate_index + 1,)
+    if new_vars:
+      loop_vars += new_vars
+
+    return loop_vars
+
+  def while_cond(iterate_index, *loop_vars):
+    if extra_test is not None:
+      return control_flow_ops.cond(iterate_index < n,
+                                   lambda: extra_test(*loop_vars),
+                                   lambda: False)
+    return iterate_index < n
+
+  opts['maximum_iterations'] = n
+
+  results = _tf_while_stmt(
+      while_cond,
+      while_body,
+      get_state,
+      set_state,
+      (array_ops.zeros_like(n),) + init_vars,
+      ('<internal iterate>',) + basic_symbol_names,
+      composite_symbol_names,
+      opts,
+  )
+
+  # Note: the iteration index is not returned by the while loop, however
+  # if a symbol with the same name exists outside the loop, it will be captured
+  # by the loop variables and ultimately updated correctly.
+  if isinstance(results, (tuple, list)):
+    assert len(results) >= 1  # Has at least the iterate.
+    if len(results) > 1:
+      results = results[1:]
+  else:
+    results = ()
+
+  return results
+
+
+def _tf_ragged_for_stmt(iter_,
+                        extra_test,
+                        body,
+                        get_state,
+                        set_state,
+                        init_vars,
+                        basic_symbol_names,
+                        composite_symbol_names,
+                        opts):
+  """Overload of for_stmt that iterates over TF ragged tensors."""
+  _disallow_undefs_into_loop(*init_vars)
+
+  # TODO(mdan): Move this into len()? Requires eager support.
+  if iter_.shape and iter_.shape[0] is not None:
+    n = iter_.shape[0]
+  else:
+    n = iter_.row_lengths()[0]
+
+  opts['maximum_iterations'] = n
+
+  def while_body(iterate_index, *loop_vars):
+    """Main loop body."""
+    iterate = iter_[iterate_index]
+    new_vars = body(iterate, *loop_vars)
+
+    loop_vars = (iterate_index + 1,)
+    if new_vars:
+      loop_vars += new_vars
+
+    return loop_vars
+
+  def while_cond(iterate_index, *loop_vars):
+    if extra_test is not None:
+      return control_flow_ops.cond(
+          iterate_index < n,
+          lambda: extra_test(*loop_vars),
+          lambda: False,
+      )
+    return iterate_index < n
+
+  opts['maximum_iterations'] = n
+
+  results = _tf_while_stmt(
+      while_cond,
+      while_body,
+      get_state,
+      set_state,
+      (array_ops.zeros_like(n),) + init_vars,
+      ('<internal iterate>',) + basic_symbol_names,
+      composite_symbol_names,
+      opts,
+  )
+
+  if isinstance(results, (tuple, list)):
+    assert len(results) >= 1  # Has at least the iterate.
+    if len(results) > 1:
+      results = results[1:]
+  else:
+    results = ()
+
+  return results
+
+
+def _tf_range_for_stmt(iter_,
+                       extra_test,
+                       body,
+                       get_state,
+                       set_state,
+                       init_vars,
+                       basic_symbol_names,
+                       composite_symbol_names,
+                       opts):
+  """Overload of for_stmt that iterates over a TF range (and elides it)."""
+  _disallow_undefs_into_loop(*init_vars)
+
+  start, limit, delta = iter_.op.inputs
+
+  def while_body(iterate, *loop_vars):
+    new_vars = body(iterate, *loop_vars)
+    loop_vars = (iterate + delta,)
+
+    if new_vars:
+      loop_vars += new_vars
+
+    return loop_vars
+
+  def while_cond(iterate, *loop_vars):
+    """Cond function for `tf.while_loop`."""
+    main_test = math_ops.logical_or(
+        math_ops.logical_and(delta >= 0, iterate < limit),
+        math_ops.logical_and(delta < 0, iterate > limit))
+    if extra_test is not None:
+      return control_flow_ops.cond(
+          main_test,
+          lambda: extra_test(*loop_vars),
+          lambda: False,
+      )
+    return main_test
+
+  opts['maximum_iterations'] = math_ops.cast(
+      misc.get_range_len(start, limit, delta), dtypes.int32)
+
+  results = _tf_while_stmt(
+      while_cond,
+      while_body,
+      get_state,
+      set_state,
+      (start,) + init_vars,
+      ('<internal iterate>',) + basic_symbol_names,
+      composite_symbol_names,
+      opts,
+  )
+
+  # Note: the iteration index is not returned by the while loop, however
+  # if a symbol with the same name exists outside the loop, it will be captured
+  # by the loop variables and ultimately updated correctly.
+  if isinstance(results, (tuple, list)):
+    assert len(results) >= 1  # Has at least the iterate.
+    if len(results) > 1:
+      results = results[1:]
+  else:
+    results = ()
+
+  return results
+
+
+def _tf_iterator_for_stmt(itr, extra_test, body, get_state, set_state,
+                          init_vars, basic_symbol_names,
+                          composite_symbol_names, opts):
+  """Overload of for_stmt that iterates over TF Iterators. See for_loop."""
+  _disallow_undefs_into_loop(*init_vars)
+
+  def while_body_actual(opt_iterate, *loop_vars):
+    """Actual main loop body."""
+    new_vars = body(opt_iterate.get_value(), *loop_vars)
+    # TODO(mdan): Fix this inconsistency in the converter.
+    if new_vars is None:
+      new_vars = ()
+    # Note: this verification duplicates that perfrmed in tf_while_stmt,
+    # but needs to be done earlier to prevent the tf.cond inside while_body
+    # from blowing up first.
+    _verify_tf_loop_vars(init_vars, loop_vars, new_vars,
+                         basic_symbol_names + composite_symbol_names, opts)
+    return new_vars
+
+  def while_body(has_next, *loop_vars):
+    """Main loop body."""
+    opt_iterate = iterator_ops.get_next_as_optional(itr)
+    has_next = opt_iterate.has_value()
+
+    if not init_vars:
+      # cond_v2 requires at least one state tensor in V1.
+      dummy_state = (constant_op.constant(()),)
+    else:
+      dummy_state = ()
+
+    # TODO(mdan): If tf.while_loop supported Optional, this could be avoided.
+    new_vars = control_flow_ops.cond(
+        has_next,
+        lambda: dummy_state + while_body_actual(opt_iterate, *loop_vars),
+        lambda: dummy_state + loop_vars,
+    )
+
+    if dummy_state:
+      new_vars = new_vars[1:]
+
+    return (has_next,) + new_vars
+
+  def while_cond(has_next, *loop_vars):
+    if extra_test is not None:
+      return control_flow_ops.cond(
+          has_next,
+          lambda: extra_test(*loop_vars),
+          lambda: False,
+      )
+    return has_next
+
+  final_vars = _tf_while_stmt(
+      while_cond,
+      while_body,
+      get_state,
+      set_state,
+      (True,) + init_vars,
+      ('<internal has_next>',) + basic_symbol_names,
+      composite_symbol_names,
+      opts,
+  )
+  return final_vars[1:]
+
+
+def _tf_dataset_for_stmt(ds, extra_test, body, get_state, set_state, init_vars,
+                         basic_symbol_names, composite_symbol_names, opts):
+  """Overload of for_stmt that iterates over TF Datasets."""
+  _disallow_undefs_into_loop(*init_vars)
+
+  if extra_test is not None:
+    assert init_vars, 'Lowering should always add state.'
+    return _dataset_for_stmt_with_extra_test(ds, extra_test, body, get_state,
+                                             set_state, init_vars,
+                                             basic_symbol_names,
+                                             composite_symbol_names, opts)
+
+  return _dataset_for_stmt_no_extra_test(ds, body, get_state, set_state,
+                                         init_vars, basic_symbol_names,
+                                         composite_symbol_names, opts)
+
+
+def _general_purpose_scan(ds, init_state, body):
+  """Variant of Dataset.scan with semantics of general-purpose computation."""
+  # Datasets are typically intended for data preprocessing. However, in
+  # autograph loops they usually appear as general-purpose computations (for
+  # example, a custom training loop). These two use cases require significantly
+  # different optimization policies, the most important of which is the device
+  # placement. The flag override for use_default_device below instructs the
+  # runtime to treat the computation as general-purpose, rather than data
+  # preprocessing.
+  # TODO(mdan): s/use_default_device/specialize_for_input_pipeline.
+  # TODO(mdan): Don't use private symbols.
+  return scan_ops._ScanDataset(ds, init_state, body, use_default_device=False)  # pylint:disable=protected-access
+
+
+def _dataset_for_stmt_with_extra_test(ds, extra_test, body, get_state,
+                                      set_state, init_vars, basic_symbol_names,
+                                      composite_symbol_names, opts):
+  """Overload of _dataset_for_stmt with early stopping. See for_stmt."""
+
+  # TODO(mdan): Simplify this - following it is extremely difficult.
+
+  init_state = get_state()
+  aug_init_vars = init_vars, init_state
+
+  def scan_body(aug_vars, iterate):
+    """The main loop body wrapper. Only calculates the stop condition."""
+    loop_vars, state = aug_vars
+
+    def true_fn():
+      """Main path - stop condition is not set."""
+      set_state(state)
+      new_vars = body(iterate, *loop_vars)
+      new_state = get_state()
+      _verify_tf_loop_vars(
+          init_vars + init_state,
+          loop_vars + state,
+          new_vars + new_state,
+          basic_symbol_names + composite_symbol_names,
+          opts,
+          check_shapes=False)
+      return new_vars, new_state
+
+    extra_cond = extra_test(*loop_vars)
+    new_vars, new_state = control_flow_ops.cond(
+        extra_cond,
+        true_fn,
+        lambda: (loop_vars, state),
+    )
+
+    scan_outputs = new_vars, new_state, extra_cond
+    # Note: new_aug_vars is the actual state of scan; scan_outputs is its output
+    # (hence the redundancy).
+    # get_state will pull any mutations that body may have made.
+    new_aug_vars = new_vars, new_state
+    return new_aug_vars, scan_outputs
+
+  def take_while_predicate(unused_loop_vars, unused_state, extra_cond):
+    return extra_cond
+
+  def reduce_body(unused_aug_vars, scan_outputs):
+    output_aug_vars, output_state, extra_cond = scan_outputs
+    del extra_cond
+    return output_aug_vars, output_state
+
+  ds = _general_purpose_scan(ds, aug_init_vars, scan_body)
+  ds = ds.apply(take_while_ops.take_while(take_while_predicate))
+  final_aug_vars = ds.reduce(aug_init_vars, reduce_body)
+  final_vars, final_state = final_aug_vars
+  set_state(final_state)
+  return final_vars
+
+
+def _dataset_for_stmt_no_extra_test(ds, body, get_state, set_state, init_vars,
+                                    basic_symbol_names, composite_symbol_names,
+                                    opts):
+  """Overload of _dataset_for_stmt without early stopping. See for_stmt."""
+  init_state = get_state()
+  assert isinstance(init_vars, tuple)
+  assert isinstance(init_state, tuple)
+
+  symbol_names = basic_symbol_names + composite_symbol_names
+
+  # Workaround for Dataset.reduce not allowing empty state tensors - create
+  # a dummy state variable that remains unused.
+  # TODO(mdan): reduce should allow and match empty structures.
+  no_vars = not init_vars
+  no_state = not init_state
+
+  if no_vars:
+    init_vars = (constant_op.constant(0),)
+    symbol_names = ('<internal dummy>',) + symbol_names
+  if no_state:
+    init_state = (constant_op.constant(0),)
+    symbol_names = symbol_names + ('<internal dummy>',)
+
+  def scan_body(aug_vars, iterate):
+    """The main loop body wrapper."""
+    loop_vars, state = aug_vars
+    if not no_state:
+      set_state(state)
+
+    if no_vars:
+      body(iterate)
+      new_vars = loop_vars
+    else:
+      new_vars = body(iterate, *loop_vars)
+
+    if no_state:
+      new_state = state
+    else:
+      new_state = get_state()
+
+    _verify_tf_loop_vars(
+        init_vars + init_state,
+        loop_vars + state,
+        new_vars + new_state,
+        symbol_names,
+        opts,
+        check_shapes=False)
+
+    scan_outputs = new_vars, new_state
+    # Note: new_aug_vars is the actual state of scan; scan_outputs is its output
+    # (hence the redundancy).
+    # get_state will pull any mutations that body may have made.
+    new_aug_vars = new_vars, new_state
+    return new_aug_vars, scan_outputs
+
+  def reduce_body(unused_aug_vars, scan_outputs):
+    output_aug_vars, output_state = scan_outputs
+    return output_aug_vars, output_state
+
+  aug_vars = init_vars, get_state()
+  ds = _general_purpose_scan(ds, aug_vars, scan_body)
+  final_vars, final_state = ds.reduce(aug_vars, reduce_body)
+  set_state(final_state)
+
+  if no_vars:
+    return ()
+  return final_vars
+
+
+def while_stmt(test,
+               body,
+               get_state,
+               set_state,
+               init_vars,
+               basic_symbol_names,
+               composite_symbol_names,
+               opts):
+  """Functional form of a while statement.
+
+  The loop operates on a so-called state, which includes all symbols that are
+  variant across loop iterations. In what follows we refer to state as either
+  a tuple of entities that represent an actual state, or a list of arguments
+  of the corresponding types.
+
+  Args:
+    test: Callable with the state as arguments, and boolean return type. The
+      loop condition.
+    body: Callable with the state as arguments, and state as return type. The
+      actual loop body.
+    get_state: Additional callable which can capture additional state (such as
+      the values of composite symbols). This is only useful when staging the
+      loop.
+    set_state: Additional callable which save values captured by get_state back
+      into the Python environment. This is only useful when staging the loop.
+    init_vars: Tuple containing the initial state.
+    basic_symbol_names: Tuple containing basic loop var names.
+    composite_symbol_names: Tuple containing composite loop var names.
+    opts: Optional dict of extra loop parameters.
+
+  Returns:
+    Tuple containing the final state.
+  """
+
+  # Evaluate the initial test once in order to do the dispatch. The evaluation
+  # is isolated to minimize unwanted side effects.
+  # TODO(mdan): Do a full iteration - some state types might lower to Tensor.
+  with func_graph.FuncGraph('tmp').as_default():
+    init_test = test(*init_vars)
+
+  # TensorFlow: Multiple evaluations are acceptable in this case, so we're fine
+  # with the re-evaluation of `test` that `_tf_while_stmt` will make.
+  if tensors.is_dense_tensor(init_test):
+    return _tf_while_stmt(test, body, get_state, set_state, init_vars,
+                          basic_symbol_names, composite_symbol_names, opts)
+
+  # Normal Python: We already consumed one evaluation of `test`; consistently,
+  # unroll one iteration before dispatching to a normal loop.
+  # TODO(mdan): Push the "init_test" value via opts into _py_while_stmt?
+  if not init_test:
+    return init_vars
+  init_vars = body(*init_vars)
+
+  return _py_while_stmt(test, body, get_state, set_state, init_vars, opts)
+
+
+def _shape_invariants_mapping_to_positional_list(mapping, keys):
+  # The keys are not expected to be hashable.
+  mapping = {id(k): (k, v) for k, v in mapping}
+  result = []
+  for k in keys:
+    map_key, map_val = mapping.get(id(k), (None, None))
+    result.append(map_val if map_key is k else None)
+  return tuple(result)
+
+
+def _tf_while_stmt(test, body, get_state, set_state, init_vars,
+                   basic_symbol_names, composite_symbol_names, opts):
+  """Overload of while_stmt that stages a TF while_stmt."""
+  _disallow_undefs_into_loop(*init_vars)
+
+  aug_init_vars = init_vars + get_state()
+
+  # TODO(mdan): Simplify this.
+  loop_vars_slice = slice(len(init_vars))
+  state_slice = slice(len(init_vars), None)
+
+  def aug_test(*aug_loop_vars):
+    state = aug_loop_vars[state_slice]
+    set_state(state)
+    return test(*aug_loop_vars[loop_vars_slice])
+
+  def aug_body(*aug_loop_vars):
+    """Main loop body."""
+    state = aug_loop_vars[state_slice]
+    set_state(state)
+    loop_vars = body(*aug_loop_vars[loop_vars_slice])
+    new_state = loop_vars + get_state()
+    _verify_tf_loop_vars(aug_init_vars, aug_loop_vars, new_state,
+                         basic_symbol_names + composite_symbol_names, opts)
+
+    return new_state
+
+  # Non-v2 while_loop unpacks the results when there is only one return value.
+  # This enforces consistency across versions.
+  opts['return_same_structure'] = True
+
+  if 'shape_invariants' in opts:
+    opts['shape_invariants'] = _shape_invariants_mapping_to_positional_list(
+        opts['shape_invariants'], aug_init_vars)
+
+  final_aug_vars = control_flow_ops.while_loop(aug_test, aug_body,
+                                               aug_init_vars, **opts)
+  final_state = final_aug_vars[state_slice]
+  set_state(final_state)
+  return final_aug_vars[loop_vars_slice]
+
+
+class _PythonLoopChecker(object):
+  """Verifies Python loops for TF-specific limits."""
+
+  def __init__(self):
+    self.iterations = 0
+    self.check_inefficient_unroll = WARN_INEFFICIENT_UNROLL
+
+    # Triggered when we decided to test the op counts.
+    self.check_op_count_after_iteration = False
+
+  def _get_ops(self):
+    return ops.get_default_graph().get_operations()
+
+  def _check_unroll_limits(self):
+    if LIMIT_PYTHON_ITERATIONS and self.iterations > PYTHON_MAX_ITERATIONS:
+      raise ValueError('iteration limit exceeded')
+
+  def _stop_checking_inefficient_unroll(self):
+    self.check_inefficient_unroll = False
+    self.ops_before_iteration = None
+
+  def _verify_ineffcient_unroll(self):
+    """Checks for possibly-inefficient creation of ops in a Python loop."""
+    assert self.ops_before_iteration is not None
+    ops_after_iteration = self._get_ops()
+    new_ops = tuple(
+        op for op in ops_after_iteration if op not in self.ops_before_iteration)
+
+    if len(new_ops) < INEFFICIENT_UNROLL_MIN_OPS:
+      return False
+
+    # TODO(mdan): Add location information.
+    ag_logging.warn(
+        'TensorFlow ops are being created in a Python loop with large number'
+        ' of iterations. This can lead to slow startup. Did you mean to use a'
+        ' TensorFlow loop? For example, `while True:` is a Python loop, and'
+        ' `while tf.constant(True):` is a TensorFlow loop. The following'
+        ' ops were created after iteration %s: %s', self.iterations, new_ops)
+    return True
+
+  def before_iteration(self):
+    """Called before each iteration in a Python loop."""
+    if (self.check_inefficient_unroll and
+        self.iterations > INEFFICIENT_UNROLL_MIN_ITERATIONS):
+      self.ops_before_iteration = self._get_ops()
+      self.check_op_count_after_iteration = True
+
+  def after_iteration(self):
+    """Called after each iteration in a Python loop."""
+    self.iterations += 1
+
+    self._check_unroll_limits()
+
+    if self.check_inefficient_unroll and self.check_op_count_after_iteration:
+      did_warn = self._verify_ineffcient_unroll()
+      if did_warn:
+        self._stop_checking_inefficient_unroll()  # Only warn once.
+      elif self.iterations > INEFFICIENT_UNROLL_MIN_ITERATIONS + 3:
+        # Once deciding to check the op counts, only do it for a few iterations.
+        self._stop_checking_inefficient_unroll()
+
+
+def _py_while_stmt(test, body, get_state, set_state, init_vars, opts):
+  """Overload of while_stmt that executes a Python while loop."""
+  del opts, get_state, set_state
+
+  if __debug__:
+    checker = _PythonLoopChecker()
+
+  loop_vars = init_vars
+  while test(*loop_vars):
+
+    if __debug__:
+      checker.before_iteration()
+
+    loop_vars = body(*loop_vars)
+
+    if __debug__:
+      checker.after_iteration()
+
+  return loop_vars
+
+
+def if_stmt(cond,
+            body,
+            orelse,
+            get_state,
+            set_state,
+            basic_symbol_names,
+            composite_symbol_names):
+  """Functional form of an if statement.
+
+  Args:
+    cond: Boolean.
+    body: Callable with no arguments, and outputs of the positive (if) branch as
+      return type.
+    orelse: Callable with no arguments, and outputs of the negative (else)
+      branch as return type.
+    get_state: Function that returns a tuple containing the values of all
+      composite symbols modified within the conditional. This allows access to
+      state that branches may mutate through side effects. This function is not
+      needed and should not be called when dispatching to code matching Python's
+      default semantics. This is useful for checkpointing to avoid unintended
+      side-effects when staging requires evaluating all code-paths.
+    set_state: Function to set the values of all composite symbols modified
+      within the conditional. This is the complement to get_state, used to
+      restore checkpointed values. The single argument a tuple containing values
+      for each composite symbol that may be modified in a branch of the
+      conditional. The is usually the result of a call to get_state.
+    basic_symbol_names: Tuple containing basic loop var names.
+    composite_symbol_names: Tuple containing composite loop var names.
+
+  Returns:
+    Tuple containing the statement outputs.
+  """
+  # Note: tf.cond doesn't support SparseTensor.
+  if tensors.is_dense_tensor(cond):
+    return tf_if_stmt(cond, body, orelse, get_state, set_state,
+                      basic_symbol_names, composite_symbol_names)
+  else:
+    return _py_if_stmt(cond, body, orelse)
+
+
+def tf_if_stmt(cond, body, orelse, get_state, set_state, basic_symbol_names,
+               composite_symbol_names):
+  """Overload of if_stmt that stages a TF cond."""
+  body = _wrap_disallow_undefs_from_cond(body, branch_name='if')
+  orelse = _wrap_disallow_undefs_from_cond(orelse, branch_name='else')
+  body = _isolate_state(body, get_state, set_state)
+  orelse = _isolate_state(orelse, get_state, set_state)
+
+  # `state` currently includes the values of any composite symbols (e.g. `a.b`)
+  # composites modified by the loop. `final_vars` includes the values of basic
+  # symbols (e.g. `a`) which cannot be passed by reference and must be returned.
+  # See _isolate_state.
+  # TODO(mdan): We should minimize calls to get/set_state.
+
+  body_branch = 0
+  orelse_branch = 1
+  result = [None, None]
+
+  def error_checking_body():
+    result[body_branch] = body()
+    if result[orelse_branch] is not None:
+      _verify_tf_cond_vars(result[body_branch], result[orelse_branch],
+                           basic_symbol_names + composite_symbol_names)
+    return result[body_branch]
+
+  def error_checking_orelse():
+    result[orelse_branch] = orelse()
+    if result[body_branch] is not None:
+      _verify_tf_cond_vars(result[body_branch], result[orelse_branch],
+                           basic_symbol_names + composite_symbol_names)
+    return result[orelse_branch]
+
+  final_vars, final_state = control_flow_ops.cond(cond, error_checking_body,
+                                                  error_checking_orelse)
+
+  set_state(final_state)
+
+  return final_vars
+
+
+def _isolate_state(func, get_state, set_state):
+  """Wraps func to (best-effort) isolate state mutations that func may do.
+
+  The simplest example of state mutation is mutation of variables (via e.g.
+  attributes), or modification of globals.
+
+  This allows us to more safely execute this function without worrying about
+  side effects when the function wasn't normally expected to execute. For
+  example, staging requires that the function is executed ahead of time, and
+  we need to ensure its effects are not observed during normal execution.
+
+  Args:
+    func: () -> Any
+    get_state: () -> Any, returns the current state
+    set_state: (Any) -> None, resets the state to the specified values.
+      Typically the result of an earlier call to `get_state`.
+
+  Returns:
+    Tuple[Any, Any], where the first element is the return value of `func`,
+    and the second is the final state values.
+  """
+
+  def wrapper():
+    init_state = get_state()
+    new_vars = func()
+    # TODO(mdan): These should be copies, lest set_state might affect them.
+    new_state = get_state()
+    set_state(init_state)
+    return new_vars, new_state
+
+  return wrapper
+
+
+def _wrap_disallow_undefs_from_cond(func, branch_name):
+  """Wraps conditional branch to disallow returning undefined symbols."""
+
+  def wrapper():
+    """Calls function and raises an error if undefined symbols are returned."""
+    results = func()
+
+    if isinstance(results, tuple):
+      results_tuple = results
+    else:
+      results_tuple = results,
+    undefined = tuple(filter(special_values.is_undefined, results_tuple))
+    if undefined:
+      raise ValueError(
+          'The following symbols must also be initialized in the {} branch: {}.'
+          ' Alternatively, you may initialize them before the if'
+          ' statement.'.format(branch_name,
+                               tuple(s.symbol_name for s in undefined)))
+
+    for result in results_tuple:
+      if special_values.is_undefined_return(result):
+        raise ValueError(
+            'A value must also be returned from the {} branch. If a value is '
+            'returned from one branch of a conditional a value must be '
+            'returned from all branches.'.format(branch_name))
+
+    return results
+
+  return wrapper
+
+
+def _py_if_stmt(cond, body, orelse):
+  """Overload of if_stmt that executes a Python if statement."""
+  return body() if cond else orelse()
diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py
index a85d74246a1..ee5b85e7c0e 100644
--- a/tensorflow/python/autograph/operators/control_flow_test.py
+++ b/tensorflow/python/autograph/operators/control_flow_test.py
@@ -166,6 +166,53 @@ class ForLoopTest(test.TestCase):
         opts={})
     self.assertEqual(s, (1234,))
 
+  def test_python_generator_with_early_stopping(self):
+    def new_generator():
+      for i in range(1, 5):
+        yield i
+
+    gen = new_generator()
+    def run_loop():
+      return control_flow.for_stmt(
+          gen,
+          extra_test=lambda s, c: c == 0,  # Break after first iteration
+          body=lambda i, s, c: (s * 10 + i, c + 1),
+          get_state=None,
+          set_state=None,
+          init_vars=(0, 0),
+          basic_symbol_names=('s', 'c'),
+          composite_symbol_names=(),
+          opts={})
+
+    self.assertEqual(run_loop(), (1, 1))
+    self.assertEqual(run_loop(), (2, 1))
+    self.assertEqual(run_loop(), (3, 1))
+
+    self.assertEqual(next(gen), 4)
+
+  def test_python_generator_with_early_stopping_before_loop(self):
+    def new_generator():
+      for i in range(5):
+        yield i
+
+    gen = new_generator()
+    def run_loop():
+      return control_flow.for_stmt(
+          gen,
+          extra_test=lambda s: False,  # Break before loop
+          body=lambda i, s: (s * 10 + i,),
+          get_state=None,
+          set_state=None,
+          init_vars=(0,),
+          basic_symbol_names=('s',),
+          composite_symbol_names=(),
+          opts={})
+
+    self.assertEqual(run_loop(), (0,))
+    self.assertEqual(run_loop(), (0,))
+
+    self.assertEqual(next(gen), 0)
+
   def test_tf_dataset(self):
     s = control_flow.for_stmt(
         dataset_ops.Dataset.range(5),
diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py
index 2d00daf481a..7df4781524f 100644
--- a/tensorflow/python/autograph/operators/py_builtins.py
+++ b/tensorflow/python/autograph/operators/py_builtins.py
@@ -38,9 +38,17 @@ from tensorflow.python.ops import gen_parsing_ops
 from tensorflow.python.ops import gen_string_ops
 from tensorflow.python.ops import list_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.util import lazy_loader
 from tensorflow.python.util import nest
 
 
+# TODO(b/145618471): Remove this dependency.
+# Lazy import to work around circular dependencies
+input_lib = lazy_loader.LazyLoader(
+    'input_lib', globals(),
+    'tensorflow.python.distribute.input_lib')
+
+
 UNSPECIFIED = object()
 
 
@@ -341,6 +349,10 @@ def _py_range(start_or_stop, stop, step):
 def enumerate_(s, start=0):
   if isinstance(s, dataset_ops.DatasetV2):
     return _tf_dataset_enumerate(s, start)
+  if isinstance(
+      s, (input_lib.DistributedIterator, input_lib.DistributedDataset)):
+    raise NotImplementedError(
+        'use a for loop over the dataset and keep a separate counter')
   return _py_enumerate(s, start)
 
 
diff --git a/tensorflow/python/autograph/operators/symbols.py b/tensorflow/python/autograph/operators/symbols.py
index 15c4481e308..0dd7e0a5956 100644
--- a/tensorflow/python/autograph/operators/symbols.py
+++ b/tensorflow/python/autograph/operators/symbols.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Abstract representation of composite symbols that can used in staging code.
+"""Abstract representation of composite symbols that can be used in staging code.
 
 This provides a way to checkpoint the values of symbols that may be undefined
 entering staged control flow. This checkpointing is necessary to prevent some
diff --git a/tensorflow/python/autograph/pyct/BUILD b/tensorflow/python/autograph/pyct/BUILD
index b9931236428..6ea3d8d2235 100644
--- a/tensorflow/python/autograph/pyct/BUILD
+++ b/tensorflow/python/autograph/pyct/BUILD
@@ -27,6 +27,7 @@ py_library(
         "cfg.py",
         "error_utils.py",
         "errors.py",
+        "gast_util.py",
         "inspect_utils.py",
         "loader.py",
         "origin_info.py",
diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py
index a8ae864cd88..2a815305475 100644
--- a/tensorflow/python/autograph/pyct/anno.py
+++ b/tensorflow/python/autograph/pyct/anno.py
@@ -24,6 +24,7 @@ from __future__ import print_function
 import enum
 
 # pylint:disable=g-bad-import-order
+
 import gast
 # pylint:enable=g-bad-import-order
 
diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index c2da09ef72b..02618a835e1 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@@ -35,6 +35,7 @@ import weakref
 from enum import Enum
 
 # pylint:disable=g-bad-import-order
+
 import gast
 # pylint:enable=g-bad-import-order
 
diff --git a/tensorflow/python/autograph/pyct/gast_util.py b/tensorflow/python/autograph/pyct/gast_util.py
new file mode 100644
index 00000000000..49eb3146cae
--- /dev/null
+++ b/tensorflow/python/autograph/pyct/gast_util.py
@@ -0,0 +1,78 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Gast compatibility library. Supports 0.2.2 and 0.3.2."""
+# TODO(mdan): Remove this file once it's safe to break compatibility.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+
+import gast
+
+
+GAST2 = hasattr(gast, 'Str')
+GAST3 = not GAST2
+
+
+def _is_constant_gast_2(node):
+  return isinstance(node, (gast.Num, gast.Str, gast.Bytes, gast.Ellipsis,
+                           gast.NameConstant))
+
+
+def _is_constant_gast_3(node):
+  return isinstance(node, gast.Constant)
+
+
+def is_literal(node):
+  """Tests whether node represents a Python literal."""
+  # Normal literals, True/False/None/Etc. in Python3
+  if is_constant(node):
+    return True
+
+  # True/False/None/Etc. in Python2
+  if isinstance(node, gast.Name) and node.id in ['True', 'False', 'None']:
+    return True
+
+  return False
+
+
+def _is_ellipsis_gast_2(node):
+  return isinstance(node, gast.Ellipsis)
+
+
+def _is_ellipsis_gast_3(node):
+  return isinstance(node, gast.Constant) and node.value == Ellipsis
+
+
+if GAST2:
+  is_constant = _is_constant_gast_2
+  is_ellipsis = _is_ellipsis_gast_2
+
+  Module = gast.Module
+  Name = gast.Name
+  Str = gast.Str
+
+elif GAST3:
+  is_constant = _is_constant_gast_3
+  is_ellipsis = _is_ellipsis_gast_3
+
+  Module = functools.partial(gast.Module, type_ignores=None)  # pylint:disable=invalid-name
+  Name = functools.partial(gast.Name, type_comment=None)  # pylint:disable=invalid-name
+  Str = functools.partial(gast.Constant, kind=None)  # pylint:disable=invalid-name
+
+else:
+  assert False
diff --git a/tensorflow/python/autograph/pyct/loader.py b/tensorflow/python/autograph/pyct/loader.py
index 3690833b793..098e8f155bb 100644
--- a/tensorflow/python/autograph/pyct/loader.py
+++ b/tensorflow/python/autograph/pyct/loader.py
@@ -33,7 +33,6 @@ import six
 
 from tensorflow.python.autograph.pyct import origin_info
 from tensorflow.python.autograph.pyct import parser
-from tensorflow.python.autograph.utils import ag_logging
 
 
 def load_source(source, delete_on_exit):
@@ -49,7 +48,7 @@ def load_source(source, delete_on_exit):
     module_name = os.path.basename(f.name[:-3])
     f.write(source)
 
-  if delete_on_exit and ag_logging.get_verbosity() < 3:
+  if delete_on_exit:
     atexit.register(lambda: os.remove(f.name))
   return imp.load_source(module_name, f.name), f.name
 
diff --git a/tensorflow/python/autograph/pyct/origin_info.py b/tensorflow/python/autograph/pyct/origin_info.py
index ae1d5e18334..3501754d729 100644
--- a/tensorflow/python/autograph/pyct/origin_info.py
+++ b/tensorflow/python/autograph/pyct/origin_info.py
@@ -29,7 +29,6 @@ from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import ast_util
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import pretty_printer
-from tensorflow.python.autograph.utils import ag_logging as logging
 from tensorflow.python.util import tf_inspect
 
 
@@ -137,20 +136,23 @@ def create_source_map(nodes, code, filepath):
 
       source_map[line_loc] = origin_info
 
-  except ValueError:
-    if logging.has_verbosity(3):
-      for n, rn in zip(nodes, reparsed_nodes):
-        nodes_str = pretty_printer.fmt(n, color=False, noanno=True)
-        reparsed_nodes_str = pretty_printer.fmt(rn, color=False, noanno=True)
-        diff = difflib.context_diff(
-            nodes_str.split('\n'),
-            reparsed_nodes_str.split('\n'),
-            fromfile='Original nodes',
-            tofile='Reparsed nodes',
-            n=7)
-        diff = '\n'.join(diff)
-        logging.log(3, 'AST seems to lack integrity. Diff:\n%s', diff)
-    raise
+  except ValueError as err:
+    new_msg = 'Inconsistent ASTs detected. This is a bug. Cause: \n'
+    new_msg += str(err)
+    new_msg += 'Diff:\n'
+
+    for n, rn in zip(nodes, reparsed_nodes):
+      nodes_str = pretty_printer.fmt(n, color=False, noanno=True)
+      reparsed_nodes_str = pretty_printer.fmt(rn, color=False, noanno=True)
+      diff = difflib.context_diff(
+          nodes_str.split('\n'),
+          reparsed_nodes_str.split('\n'),
+          fromfile='Original nodes',
+          tofile='Reparsed nodes',
+          n=7)
+      diff = '\n'.join(diff)
+      new_msg += diff + '\n'
+    raise ValueError(new_msg)
 
   return source_map
 
diff --git a/tensorflow/python/autograph/pyct/pretty_printer.py b/tensorflow/python/autograph/pyct/pretty_printer.py
index d6e8f86c854..c4d74d042be 100644
--- a/tensorflow/python/autograph/pyct/pretty_printer.py
+++ b/tensorflow/python/autograph/pyct/pretty_printer.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 import gast
 import six
 import termcolor
diff --git a/tensorflow/python/autograph/utils/BUILD b/tensorflow/python/autograph/utils/BUILD
index 60e1a0a408c..440a9fab2d2 100644
--- a/tensorflow/python/autograph/utils/BUILD
+++ b/tensorflow/python/autograph/utils/BUILD
@@ -23,6 +23,7 @@ py_library(
     srcs = [
         "__init__.py",
         "ag_logging.py",
+        "compat_util.py",
         "context_managers.py",
         "misc.py",
         "py_func.py",
diff --git a/tensorflow/python/autograph/utils/compat_util.py b/tensorflow/python/autograph/utils/compat_util.py
new file mode 100644
index 00000000000..a2a251c329c
--- /dev/null
+++ b/tensorflow/python/autograph/utils/compat_util.py
@@ -0,0 +1,34 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Miscellaneous utilities that don't fit anywhere else."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+import six
+
+
+def deprecated_py2_support(module_name):
+  if six.PY2:
+    legacy_module = __import__(module_name + '_deprecated_py2')
+    current_module = sys.modules[module_name]
+    current_module.__dict__.update({
+        k: v
+        for k, v in legacy_module.__dict__.items()
+        if not k.startswith('__')
+    })
diff --git a/tensorflow/python/client/debug_events_writer_wrapper.cc b/tensorflow/python/client/debug_events_writer_wrapper.cc
index 3c0cd311a7d..75abf70d749 100644
--- a/tensorflow/python/client/debug_events_writer_wrapper.cc
+++ b/tensorflow/python/client/debug_events_writer_wrapper.cc
@@ -29,7 +29,7 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
   using namespace tensorflow::tfdbg;  // NOLINT(build/namespaces)
 
   m.def("Init",
-        [](const std::string dump_root, const int64 circular_buffer_size) {
+        [](const std::string& dump_root, const int64 circular_buffer_size) {
           DebugEventsWriter* writer = DebugEventsWriter::GetDebugEventsWriter(
               dump_root, circular_buffer_size);
           if (!writer->Init().ok()) {
@@ -39,7 +39,7 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
           }
         });
   m.def("WriteSourceFile",
-        [](const std::string dump_root, const py::object obj) {
+        [](const std::string& dump_root, const py::object obj) {
           CheckProtoType(obj, "tensorflow.DebugEvent");
           DebugEventsWriter* writer =
               DebugEventsWriter::GetDebugEventsWriter(dump_root);
@@ -48,7 +48,7 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
               tfdbg::DebugEventFileType::SOURCE_FILES);
         });
   m.def("WriteStackFrameWithId",
-        [](const std::string dump_root, const py::object obj) {
+        [](const std::string& dump_root, const py::object& obj) {
           CheckProtoType(obj, "tensorflow.DebugEvent");
           DebugEventsWriter* writer =
               DebugEventsWriter::GetDebugEventsWriter(dump_root);
@@ -57,7 +57,7 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
               tfdbg::DebugEventFileType::STACK_FRAMES);
         });
   m.def("WriteGraphOpCreation",
-        [](const std::string dump_root, const py::object obj) {
+        [](const std::string& dump_root, const py::object& obj) {
           CheckProtoType(obj, "tensorflow.DebugEvent");
           DebugEventsWriter* writer =
               DebugEventsWriter::GetDebugEventsWriter(dump_root);
@@ -66,7 +66,7 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
               tfdbg::DebugEventFileType::GRAPHS);
         });
   m.def("WriteDebuggedGraph",
-        [](const std::string dump_root, const py::object obj) {
+        [](const std::string& dump_root, const py::object& obj) {
           CheckProtoType(obj, "tensorflow.DebugEvent");
           DebugEventsWriter* writer =
               DebugEventsWriter::GetDebugEventsWriter(dump_root);
@@ -75,7 +75,7 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
               tfdbg::DebugEventFileType::GRAPHS);
         });
   m.def("WriteExecution",
-        [](const std::string dump_root, const py::object obj) {
+        [](const std::string& dump_root, const py::object& obj) {
           CheckProtoType(obj, "tensorflow.DebugEvent");
           DebugEventsWriter* writer =
               DebugEventsWriter::GetDebugEventsWriter(dump_root);
@@ -84,7 +84,7 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
               tfdbg::DebugEventFileType::EXECUTION);
         });
   m.def("WriteGraphExecutionTrace",
-        [](const std::string dump_root, const py::object obj) {
+        [](const std::string& dump_root, const py::object& obj) {
           CheckProtoType(obj, "tensorflow.DebugEvent");
           DebugEventsWriter* writer =
               DebugEventsWriter::GetDebugEventsWriter(dump_root);
@@ -92,17 +92,23 @@ PYBIND11_MODULE(_pywrap_debug_events_writer, m) {
               obj.attr("SerializeToString")().cast<std::string>(),
               tfdbg::DebugEventFileType::GRAPH_EXECUTION_TRACES);
         });
-  m.def("FlushNonExecutionFiles", [](const std::string dump_root) {
+  m.def("RegisterDeviceAndGetId",
+        [](const std::string& dump_root, const std::string& device_name) {
+          DebugEventsWriter* writer =
+              DebugEventsWriter::GetDebugEventsWriter(dump_root);
+          return writer->RegisterDeviceAndGetId(device_name);
+        });
+  m.def("FlushNonExecutionFiles", [](const std::string& dump_root) {
     DebugEventsWriter* writer =
         DebugEventsWriter::GetDebugEventsWriter(dump_root);
     writer->FlushNonExecutionFiles();
   });
-  m.def("FlushExecutionFiles", [](const std::string dump_root) {
+  m.def("FlushExecutionFiles", [](const std::string& dump_root) {
     DebugEventsWriter* writer =
         DebugEventsWriter::GetDebugEventsWriter(dump_root);
     writer->FlushExecutionFiles();
   });
-  m.def("Close", [](const std::string dump_root) {
+  m.def("Close", [](const std::string& dump_root) {
     DebugEventsWriter* writer =
         DebugEventsWriter::GetDebugEventsWriter(dump_root);
     writer->Close();
diff --git a/tensorflow/python/client/events_writer_wrapper.cc b/tensorflow/python/client/events_writer_wrapper.cc
index b37f970dea6..22b3811c93d 100644
--- a/tensorflow/python/client/events_writer_wrapper.cc
+++ b/tensorflow/python/client/events_writer_wrapper.cc
@@ -33,8 +33,7 @@ PYBIND11_MODULE(_pywrap_events_writer, m) {
       .def("FileName",
            [](tensorflow::EventsWriter& self) { return self.FileName(); })
       .def("_WriteSerializedEvent",
-           [](tensorflow::EventsWriter& self,
-              const absl::string_view event_str) {
+           [](tensorflow::EventsWriter& self, const std::string& event_str) {
              self.WriteSerializedEvent(event_str);
            })
       .def("Flush", [](tensorflow::EventsWriter& self) { return self.Flush(); })
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 4abef7b6ec5..bf8536e641f 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+%include "tensorflow/python/lib/core/strings.i"
 %include "tensorflow/python/platform/base.i"
 
 %{
@@ -23,6 +24,13 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/public/version.h"
 #include "tensorflow/python/client/tf_session_helper.h"
+#include "tensorflow/c/c_api_experimental.h"
+#include "tensorflow/python/lib/core/safe_ptr.h"
+#include "tensorflow/python/eager/pywrap_tfe.h"
+// We were getting lucky on imports with safe_ptr.h being placed prior to
+// tf_session which imported safe_ptr. We also need pywrap_tfe.h to cast
+// one of the inputs to a graph function from a Python string to const char*.
+
 
 // Helper function to convert a Python list of Tensors to a C++ vector of
 // TF_Outputs.
@@ -78,6 +86,9 @@ void PyInt64ListToVector(PyObject* py_int_seq, std::vector<int64_t>* vec) {
 
 %}
 
+%include "tensorflow/c/tf_datatype.h"
+%include "tensorflow/c/tf_status.h"
+
 %include "tensorflow/python/client/tf_sessionrun_wrapper.i"
 
 // Required to use PyArray_* functions.
@@ -85,6 +96,14 @@ void PyInt64ListToVector(PyObject* py_int_seq, std::vector<int64_t>* vec) {
 tensorflow::ImportNumpy();
 %}
 
+// For const parameters in a function, SWIG pretty much ignores the const.
+// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
+// Hence the 'const_cast'.
+%typemap(in) const char* op_name {
+  $1 = const_cast<char*>(TFE_GetPythonString($input));
+}
+
+
 // TensorFlow version and GraphDef versions
 %constant const char* __version__ = TF_VERSION_STRING;
 %constant int GRAPH_DEF_VERSION = TF_GRAPH_DEF_VERSION;
@@ -174,6 +193,12 @@ tensorflow::ImportNumpy();
 // See comment for "%noexception TF_SessionRun_wrapper;"
 %noexception TF_OperationGetControlInputs_wrapper;
 
+
+// Migrate one function from pywrap_tfe.i
+%include "tensorflow/c/c_api_experimental.h"
+%unignore TF_ImportGraphDefOptionsSetValidateColocationConstraints;
+%noexception TF_ImportGraphDefOptionsSetValidateColocationConstraints;
+
 // Build a Python list of TF_Operation* and return it.
 %typemap(out) std::vector<TF_Operation*> tensorflow::TF_OperationGetControlInputs_wrapper {
   $result = PyList_New($1.size());
diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index b30cb7e8241..b13ba30acf2 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 16)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 1, 5)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS"
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py
index 0c5a4300b73..f4ef27ac20d 100644
--- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py
+++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py
@@ -27,6 +27,7 @@ import re
 import shutil
 import tempfile
 import warnings
+
 import numpy as np
 import six
 
@@ -213,7 +214,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
                            run_params,
                            conversion_params,
                            disable_non_trt_optimizers=False):
-    trt_convert.get_tensorrt_rewriter_config(
+    return trt_convert.get_tensorrt_rewriter_config(
         conversion_params=conversion_params,
         is_v2=run_params.is_v2,
         disable_non_trt_optimizers=disable_non_trt_optimizers)
@@ -586,7 +587,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
             trt_op_names.append(node.name)
     # Remove the function name prefix.
     def _Canonicalize(names):
-      return set([self._ToString(name.split("/")[-1]) for name in names])
+      return set(self._ToString(name.split("/")[-1]) for name in names)
 
     all_op_names = _Canonicalize(all_op_names)
     trt_op_names = _Canonicalize(trt_op_names)
diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py
index a0da2a4615f..dc1a7a3097c 100644
--- a/tensorflow/python/compiler/tensorrt/trt_convert.py
+++ b/tensorflow/python/compiler/tensorrt/trt_convert.py
@@ -180,7 +180,7 @@ _TRT_ENGINE_OP_NAME = "TRTEngineOp"
 
 
 def _check_conversion_params(conversion_params, is_v2=False):
-  """Validate the provided TrtConversionParams
+  """Validate the provided TrtConversionParams.
 
   Args:
     conversion_params: a TrtConversionParams instance.
@@ -209,15 +209,17 @@ def _check_conversion_params(conversion_params, is_v2=False):
     for optimizer in rewriter_cfg.custom_optimizers:
       if optimizer.name == "TensorRTOptimizer":
         if trt_optimizer:
-          raise TypeError("Found more than one TensorRTOptimizer in "
-                          "rewriter_config_template while only one is allowed.")
+          raise ValueError(
+              "Found more than one TensorRTOptimizer in "
+              "rewriter_config_template while only one is allowed.")
         trt_optimizer = optimizer
     # If rewriter_config_template is set, it should inculde TensorRTOptimizer.
     # It is possible to remove this requirement if needed.
     if not trt_optimizer:
-      raise TypeError("Found no TensorRTOptimizer in rewriter_config_template.")
+      raise ValueError(
+          "Found no TensorRTOptimizer in rewriter_config_template.")
     if not trt_optimizer.parameter_map:
-      raise TypeError("Found no parameter_map in TensorRTOptimizer.")
+      raise ValueError("Found no parameter_map in TensorRTOptimizer.")
     if ("precision_mode" in trt_optimizer.parameter_map.keys() and
         trt_optimizer.parameter_map["precision_mode"].s not in map(
             _to_bytes, supported_precision_modes)):
@@ -226,8 +228,8 @@ def _check_conversion_params(conversion_params, is_v2=False):
                             trt_optimizer.parameter_map["precision_mode"],
                             supported_precision_modes))
     if is_v2:
-      # Static mode (building TRT engine without executing the op) is not supported
-      # in TF 2.0. See TrtGraphConverterV2 for more details.
+      # Static mode (building TRT engine without executing the op) is not
+      # supported in TF 2.0. See TrtGraphConverterV2 for more details.
       if ("is_dynamic_op" in trt_optimizer.parameter_map.keys() and
           not trt_optimizer.parameter_map["is_dynamic_op"]):
         raise ValueError("Option is_dynamic_op=False is not supported "
@@ -251,8 +253,8 @@ def _check_trt_version_compatibility():
   if loaded_version < linked_version:
     tf_logging.error(
         "Loaded TensorRT %s but linked TensorFlow against TensorRT %s. " %
-        (".".join([str(x) for x in loaded_version]),
-         ".".join([str(x) for x in linked_version])) +
+        (".".join(str(x) for x in loaded_version), ".".join(
+            str(x) for x in linked_version)) +
         "TensorRT does not support forward compatibility. " +
         "It is also required to use the same major version of TensorRT " +
         "during compilation and runtime.")
@@ -260,16 +262,16 @@ def _check_trt_version_compatibility():
   if loaded_version[0] > linked_version[0]:
     tf_logging.error(
         "Loaded TensorRT %s but linked TensorFlow against TensorRT %s. " %
-        (".".join([str(x) for x in loaded_version]),
-         ".".join([str(x) for x in linked_version])) +
+        (".".join(str(x) for x in loaded_version), ".".join(
+            str(x) for x in linked_version)) +
         "It is required to use the same major version " +
         "of TensorRT during compilation and runtime.")
     raise RuntimeError("Incompatible TensorRT major version")
   if loaded_version != linked_version:
     tf_logging.info(
         "Loaded TensorRT %s and linked TensorFlow against TensorRT %s. " %
-        (".".join([str(x) for x in loaded_version]),
-         ".".join([str(x) for x in linked_version])) +
+        (".".join(str(x) for x in loaded_version), ".".join(
+            str(x) for x in linked_version)) +
         "This is supported because TensorRT " +
         " minor/patch upgrades are backward compatible")
 
@@ -1020,11 +1022,7 @@ class TrtGraphConverterV2(object):
       calibration_input_fn: a generator function that yields input data as a
         list or tuple, which will be used to execute the converted signature for
         calibration. All the returned input data should have the same shape.
-        Example:
-        ```
-        def input_fn():
-          yield input1, input2, input3
-        ```
+        Example: `def input_fn(): yield input1, input2, input3`
 
     Raises:
       ValueError: if the input combination is invalid.
@@ -1097,11 +1095,7 @@ class TrtGraphConverterV2(object):
       input_fn: a generator function that yields input data as a list or tuple,
         which will be used to execute the converted signature to generate TRT
         engines.
-        Example:
-        ```
-        def input_fn():
-          yield input1, input2, input3
-        ```
+        Example: `def input_fn(): yield input1, input2, input3`
     """
     for inp in input_fn():
       self._converted_func(*map(ops.convert_to_tensor, inp))
diff --git a/tensorflow/python/compiler/tensorrt/trt_convert_test.py b/tensorflow/python/compiler/tensorrt/trt_convert_test.py
index 46135f94fdb..f168ee2f3be 100644
--- a/tensorflow/python/compiler/tensorrt/trt_convert_test.py
+++ b/tensorflow/python/compiler/tensorrt/trt_convert_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import gc
 import os
 import tempfile
+
 from absl.testing import parameterized
 import numpy as np
 
@@ -144,14 +145,14 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     trt_optimizer = None
     for optimizer in rewriter_cfg.custom_optimizers:
       if optimizer.name == "TensorRTOptimizer":
-        self.assertTrue(trt_optimizer is None)
+        self.assertIsNone(trt_optimizer)
         trt_optimizer = optimizer
-    self.assertTrue(trt_optimizer is not None)
+    self.assertIsNotNone(trt_optimizer)
     for key in [
         "minimum_segment_size", "max_batch_size", "is_dynamic_op",
         "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines"
     ]:
-      self.assertTrue(key in trt_optimizer.parameter_map)
+      self.assertIn(key, trt_optimizer.parameter_map)
     self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i)
     self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i)
     self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b)
diff --git a/tensorflow/python/compiler/xla/BUILD b/tensorflow/python/compiler/xla/BUILD
index 2061f0cca2f..a8c4ce22b5b 100644
--- a/tensorflow/python/compiler/xla/BUILD
+++ b/tensorflow/python/compiler/xla/BUILD
@@ -70,7 +70,6 @@ cuda_py_test(
     srcs = ["xla_test.py"],
     tags = [
         "no_mac",
-        "no_rocm",  # XLA support is not enabled on the ROCm platform
         "no_windows",
     ],
     xla_enabled = True,
@@ -91,3 +90,20 @@ cuda_py_test(
         "@absl_py//absl/testing:parameterized",
     ],
 )
+
+cuda_py_test(
+    name = "experimental_compile_test",
+    srcs = ["experimental_compile_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:resource_variable_ops",
+    ],
+    python_version = "PY3",
+    tags = [
+        "no_mac",
+        "no_windows",
+    ],
+    xla_enabled = True,
+)
diff --git a/tensorflow/python/compiler/xla/experimental_compile_test.py b/tensorflow/python/compiler/xla/experimental_compile_test.py
new file mode 100644
index 00000000000..c0a1c4bf307
--- /dev/null
+++ b/tensorflow/python/compiler/xla/experimental_compile_test.py
@@ -0,0 +1,113 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class ExperimentalCompileTest(test.TestCase):
+
+  def testBasic(self):
+    with ops.Graph().as_default() as g:
+
+      def fn(x, a):
+        return x + a
+
+      xla_func = def_function.function(fn, experimental_compile=True)
+      inputs = array_ops.placeholder(dtypes.float32, [5])
+      # XLA support is not yet enabled for TF ROCm
+      if not test.is_built_with_rocm():
+        x = xla_func(inputs, 1)
+        with session.Session(graph=g) as sess:
+          y = sess.run(x, feed_dict={inputs: [1, 2, 2, 3, 3]})
+          self.assertTrue(x.graph.as_graph_def().library.function[0]
+                          .attr["_XlaMustCompile"].b)
+          self.assertAllClose([2, 3, 3, 4, 4], y)
+
+  def testDerivative(self):
+    # XLA support is not yet enabled for TF ROCm
+    if test.is_built_with_rocm():
+      return
+
+    def fn(x, a):
+      return 2 * x + a
+
+    with ops.Graph().as_default() as g:
+      xla_func = def_function.function(fn, experimental_compile=True)
+      with backprop.GradientTape() as tape:
+        inputs = array_ops.placeholder(dtypes.float32, [5])
+        tape.watch(inputs)
+        outputs = xla_func(inputs, 1)
+      grads = tape.gradient(outputs, inputs)
+
+    with session.Session(graph=g) as sess:
+      grads_tensor = sess.run(grads, feed_dict={inputs: [1, 2, 2, 3, 3]})
+      self.assertAllClose([2, 2, 2, 2, 2], grads_tensor)
+      (forward, backward) = xla_func.get_concrete_function(
+          inputs, 1)._delayed_rewrite_functions.forward_backward()
+
+      # Check that the must-compile attribute gets correctly propagated to the
+      # created derivatives.
+      self.assertTrue(forward.definition.attr["_XlaMustCompile"])
+      self.assertTrue(backward.function_def.attr["_XlaMustCompile"])
+
+  def testBasicInt32(self):
+    with ops.Graph().as_default() as g:
+
+      def fn(x, a):
+        return x + a
+
+      xla_func = def_function.function(fn, experimental_compile=True)
+      inputs = array_ops.placeholder(dtypes.int32, [5])
+      # XLA support is not yet enabled for TF ROCm
+      if not test.is_built_with_rocm():
+        x = xla_func(inputs, 1)
+        with session.Session(graph=g) as sess:
+          y = sess.run(x, feed_dict={inputs: [1, 2, 2, 3, 3]})
+          self.assertTrue(x.graph.as_graph_def().library.function[0]
+                          .attr["_XlaMustCompile"].b)
+          self.assertAllClose([2, 3, 3, 4, 4], y)
+
+  # Checking that we crash on an unsupported operation lets us test that the XLA
+  # compiler was actually invoked.
+  def testUnsupportedOps(self):
+    with ops.Graph().as_default() as g:
+
+      def fn(x):
+        return array_ops.unique(x).y  # Unique is not supported by XLA
+
+      xla_func = def_function.function(fn, experimental_compile=True)
+      inputs = array_ops.placeholder(dtypes.float32, [5])
+      x = xla_func(inputs)
+      # XLA support is not yet enabled for TF ROCm
+      if not test.is_built_with_rocm():
+        with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                     "not compilable"):
+          with session.Session(graph=g) as sess:
+            sess.run(x, feed_dict={inputs: [1, 2, 2, 3, 3]})
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/compiler/xla/jit.py b/tensorflow/python/compiler/xla/jit.py
index f9c805ffe6d..3ccf2959b76 100644
--- a/tensorflow/python/compiler/xla/jit.py
+++ b/tensorflow/python/compiler/xla/jit.py
@@ -47,6 +47,8 @@ def experimental_jit_scope(compile_ops=True, separate_compiled_gradients=False):
   The compilation is a hint and only supported on a best-effort basis.
 
   Example usage:
+
+    ```python
     with tf.xla.experimental.jit_scope():
       c = tf.matmul(a, b)  # compiled
     with tf.xla.experimental.jit_scope(compile_ops=False):
@@ -54,8 +56,11 @@ def experimental_jit_scope(compile_ops=True, separate_compiled_gradients=False):
     with tf.xla.experimental.jit_scope(
         compile_ops=lambda node_def: 'matmul' in node_def.op.lower()):
       e = tf.matmul(a, b) + d  # matmul is compiled, the addition is not.
+    ```
 
-  Example of separate_compiled_gradients:
+  Example of `separate_compiled_gradients`:
+
+    ```python
     # In the example below, the computations for f, g and h will all be compiled
     # in separate scopes.
     with tf.xla.experimental.jit_scope(
@@ -63,6 +68,7 @@ def experimental_jit_scope(compile_ops=True, separate_compiled_gradients=False):
       f = tf.matmul(a, b)
     g = tf.gradients([f], [a, b], name='mygrads1')
     h = tf.gradients([f], [a, b], name='mygrads2')
+    ```
 
   Args:
     compile_ops: Whether to enable or disable compilation in the scope.
diff --git a/tensorflow/python/compiler/xla/xla.py b/tensorflow/python/compiler/xla/xla.py
index 110ebbb367d..622fadfba8a 100644
--- a/tensorflow/python/compiler/xla/xla.py
+++ b/tensorflow/python/compiler/xla/xla.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import contextlib
+
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.jit.ops import xla_ops
diff --git a/tensorflow/python/data/benchmarks/BUILD b/tensorflow/python/data/benchmarks/BUILD
index a9422e83edc..256f10dcefd 100644
--- a/tensorflow/python/data/benchmarks/BUILD
+++ b/tensorflow/python/data/benchmarks/BUILD
@@ -56,6 +56,7 @@ tf_py_test(
     srcs = ["from_tensor_slices_benchmark.py"],
     deps = [
         ":benchmark_base",
+        "//tensorflow/python/data/experimental/ops:get_single_element",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
diff --git a/tensorflow/python/data/benchmarks/benchmark_base.py b/tensorflow/python/data/benchmarks/benchmark_base.py
index 85c894551f1..40a8c61556a 100644
--- a/tensorflow/python/data/benchmarks/benchmark_base.py
+++ b/tensorflow/python/data/benchmarks/benchmark_base.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import time
+
 import numpy as np
 
 from tensorflow.python.client import session
diff --git a/tensorflow/python/data/benchmarks/from_tensor_slices_benchmark.py b/tensorflow/python/data/benchmarks/from_tensor_slices_benchmark.py
index 3af174acc32..7b1371e4ff8 100644
--- a/tensorflow/python/data/benchmarks/from_tensor_slices_benchmark.py
+++ b/tensorflow/python/data/benchmarks/from_tensor_slices_benchmark.py
@@ -20,7 +20,9 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.data.benchmarks import benchmark_base
+from tensorflow.python.data.experimental.ops import get_single_element
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import sparse_tensor
 
 
 # TODO(b/119837791): Add eager benchmarks.
@@ -63,6 +65,30 @@ class FromTensorSlicesBenchmark(benchmark_base.DatasetBenchmarkBase):
         name="reshape_slice_repeat_input_%d" % input_size,
     )
 
+  def benchmark_slice_repeat_sparse(self):
+    non_zeros_per_row_values = [0, 1, 5, 10, 100]
+    num_rows_values = [32, 64, 128, 1024]
+
+    for non_zeros_per_row in non_zeros_per_row_values:
+      tensor = sparse_tensor.SparseTensor(
+          indices=np.arange(non_zeros_per_row, dtype=np.int64)[:, np.newaxis],
+          values=np.arange(non_zeros_per_row, dtype=np.int64),
+          dense_shape=[1000])
+
+      for num_rows in num_rows_values:
+        batched = dataset_ops.Dataset.from_tensors(
+            tensor).repeat(num_rows).batch(num_rows)
+        batched_tensor = get_single_element.get_single_element(batched)
+
+        dataset = dataset_ops.Dataset.from_tensors(batched_tensor).flat_map(
+            dataset_ops.Dataset.from_tensor_slices).repeat()
+        self.run_and_report_benchmark(
+            dataset,
+            num_elements=100000,
+            iters=5,
+            name="slice_repeat_sparse_elements_per_row_%d_num_rows_%d" % (
+                non_zeros_per_row, num_rows))
+
   def benchmark_slice_batch_cache_repeat(self):
     input_size = 10000
     batch_size = 100
diff --git a/tensorflow/python/data/benchmarks/meta_benchmark.py b/tensorflow/python/data/benchmarks/meta_benchmark.py
index 7a699a5b22a..ed336e73708 100644
--- a/tensorflow/python/data/benchmarks/meta_benchmark.py
+++ b/tensorflow/python/data/benchmarks/meta_benchmark.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import timeit
+
 import numpy as np
 
 from tensorflow.python.client import session
diff --git a/tensorflow/python/data/experimental/benchmarks/csv_dataset_benchmark.py b/tensorflow/python/data/experimental/benchmarks/csv_dataset_benchmark.py
index 10bddb0ab83..9348ae8c821 100644
--- a/tensorflow/python/data/experimental/benchmarks/csv_dataset_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/csv_dataset_benchmark.py
@@ -51,11 +51,11 @@ class CsvDatasetBenchmark(test.Benchmark):
     self._filenames = []
     for n in self._num_cols:
       fn = os.path.join(self._temp_dir, 'file%d.csv' % n)
-      with open(fn, 'wb') as f:
+      with open(fn, 'w') as f:
         # Just write 100 rows and use `repeat`... Assumes the cost
         # of creating an iterator is not significant
-        row = ','.join([str_val for _ in range(n)])
-        f.write('\n'.join([row for _ in range(100)]))
+        row = ','.join(str_val for _ in range(n))
+        f.write('\n'.join(row for _ in range(100)))
       self._filenames.append(fn)
 
   def _tear_down(self):
diff --git a/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py b/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py
index d6950a0ad89..ac3646af302 100644
--- a/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py
@@ -116,7 +116,7 @@ class MapAndBatchBenchmark(test.Benchmark):
     def name(method, label, num_calls, inter_op, element_size, batch_size):
       return ("%s_id_%s_num_calls_%d_inter_op_%d_elem_size_%d_batch_size_%d" % (
           method,
-          hashlib.sha1(label).hexdigest()[:8],
+          hashlib.sha1((label).encode("utf-8")).hexdigest()[:8],
           num_calls,
           inter_op,
           element_size,
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index ad85fbf493a..8ac485effb2 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -556,6 +556,7 @@ tf_py_test(
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:image_ops",
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python/data/experimental/ops:readers",
         "//tensorflow/python/data/kernel_tests:test_base",
diff --git a/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py b/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py
index 1240b704119..d52f3484ae1 100644
--- a/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import os
 import shutil
 import tempfile
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import matching_files
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
index 397703e1c40..90c269a6825 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
@@ -221,7 +221,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                 " To enable rewrites, use resource variables instead by "
                 "calling `tf.enable_resource_variables()` at the start of the "
                 "program." % (", ".join(options._graph_rewrites())))
-    self.assertTrue(any([expected in str(warning) for warning in w]))
+    self.assertTrue(any(expected in str(warning) for warning in w))
 
     # Check that outputs are the same in the optimized and unoptimized cases,
     # when the variable value is changing.
diff --git a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
index d7944042c6e..cf768d3145d 100644
--- a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
@@ -120,7 +120,7 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
     graph = graph_pb2.GraphDef().FromString(
         self.evaluate(dataset._as_serialized_graph()))
     self.assertTrue(
-        any([node.op != "MaxIntraOpParallelismDataset" for node in graph.node]))
+        any(node.op != "MaxIntraOpParallelismDataset" for node in graph.node))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
index 14d3c9d6d7f..c7f0754d50b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import itertools
 import math
 import threading
 import time
@@ -39,7 +38,6 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-# TODO(feihugis): refactor this test to be parameterized.
 class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def setUp(self):
@@ -117,49 +115,46 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
             num_open -= 1
             break
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testPythonImplementation(self):
-    input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6],
-                   [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]]
-
-    # Cycle length 1 acts like `Dataset.flat_map()`.
-    expected_elements = itertools.chain(*input_lists)
-    for expected, produced in zip(expected_elements,
-                                  self._interleave(input_lists, 1, 1)):
-      self.assertEqual(expected, produced)
-
-    # Cycle length > 1.
-    expected_elements = [
-        4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6, 5,
-        6, 5, 6, 5, 6, 6
-    ]
+  @combinations.generate(
+      combinations.times(
+          combinations.combine(
+              input_lists=[[[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6],
+                            [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]]],
+              expected_elements=[[
+                  4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 5, 5,
+                  5, 5, 5, 6, 6, 6, 6, 6, 6
+              ]],
+              cycle_length=1,
+              block_length=1) +
+          combinations.combine(
+              input_lists=[[[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6],
+                            [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]]],
+              expected_elements=[[
+                  4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, 6,
+                  5, 6, 5, 6, 5, 6, 5, 6, 6
+              ]],
+              cycle_length=2,
+              block_length=1) + combinations.combine(
+                  input_lists=[[[4] * 4, [5] * 5, [6] * 6] * 2],
+                  expected_elements=[[
+                      4, 4, 5, 5, 4, 4, 5, 5, 5, 6, 6, 4, 4, 6, 6, 4, 4, 6, 6,
+                      5, 5, 6, 6, 5, 5, 6, 6, 5, 6, 6
+                  ]],
+                  cycle_length=2,
+                  block_length=2) +
+          combinations.combine(
+              input_lists=[[[4, 4, 4, 4], [], [6, 6, 6, 6, 6, 6], [4, 4, 4, 4],
+                            [], [6, 6, 6, 6, 6, 6]]],
+              expected_elements=[[
+                  4, 4, 6, 4, 6, 4, 6, 6, 4, 6, 4, 6, 4, 4, 6, 6, 6, 6, 6, 6
+              ]],
+              cycle_length=2,
+              block_length=1)))
+  def testPythonImplementation(self, input_lists, expected_elements,
+                               cycle_length, block_length):
     for index, (expected, produced) in enumerate(
-        zip_longest(expected_elements, self._interleave(input_lists, 2, 1))):
-      self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
-                       (index, expected, produced))
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testPythonImplementationBlockLength(self):
-    input_lists = [[4] * 4, [5] * 5, [6] * 6] * 2
-    expected_elements = [
-        4, 4, 5, 5, 4, 4, 5, 5, 5, 6, 6, 4, 4, 6, 6, 4, 4, 6, 6, 5, 5, 6, 6, 5,
-        5, 6, 6, 5, 6, 6
-    ]
-    for index, (expected, produced) in enumerate(
-        zip_longest(expected_elements, self._interleave(input_lists, 2, 2))):
-      self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
-                       (index, expected, produced))
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testPythonImplementationEmptyLists(self):
-    input_lists = [[4, 4, 4, 4], [], [6, 6, 6, 6, 6, 6], [4, 4, 4, 4], [],
-                   [6, 6, 6, 6, 6, 6]]
-
-    expected_elements = [
-        4, 4, 6, 4, 6, 4, 6, 6, 4, 6, 4, 6, 4, 4, 6, 6, 6, 6, 6, 6
-    ]
-    for index, (expected, produced) in enumerate(
-        zip_longest(expected_elements, self._interleave(input_lists, 2, 1))):
+        zip_longest(expected_elements,
+                    self._interleave(input_lists, cycle_length, block_length))):
       self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
                        (index, expected, produced))
 
@@ -172,7 +167,12 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     for i in range(4, 7):
       self.write_coordination_events[i].set()
 
-  def _testSingleThreaded(self, sloppy=False, prefetch_input_elements=0):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(
+              sloppy=[False, True], prefetch_input_elements=[0, 1])))
+  def testSingleThreaded(self, sloppy, prefetch_input_elements):
     # cycle_length=1,block_length=1 acts like `Dataset.interleave()` and
     # `Dataset.flat_map()` and is single-threaded. No synchronization required.
     self.skipTest("b/131722904")
@@ -193,22 +193,6 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testSingleThreaded(self):
-    self._testSingleThreaded()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testSingleThreadedSloppy(self):
-    self._testSingleThreaded(sloppy=True)
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testSingleThreadedPrefetch1Itr(self):
-    self._testSingleThreaded(prefetch_input_elements=1)
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testSingleThreadedPrefetch1ItrSloppy(self):
-    self._testSingleThreaded(prefetch_input_elements=1, sloppy=True)
-
   @combinations.generate(test_base.default_test_combinations())
   def testSingleThreadedRagged(self):
     # Tests a sequence with wildly different elements per iterator.
@@ -237,7 +221,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  def _testTwoThreadsNoContention(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def testTwoThreadsNoContention(self, sloppy):
     # num_threads > 1.
     # Explicit coordination should result in `Dataset.interleave()` behavior
     self.skipTest("b/131722904")
@@ -268,15 +255,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContention(self):
-    self._testTwoThreadsNoContention()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContentionSloppy(self):
-    self._testTwoThreadsNoContention(sloppy=True)
-
-  def _testTwoThreadsNoContentionWithRaces(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def testTwoThreadsNoContentionWithRaces(self, sloppy):
     """Tests where all the workers race in producing elements.
 
     Note: this is in contrast with the previous test which carefully sequences
@@ -317,15 +299,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContentionWithRaces(self):
-    self._testTwoThreadsNoContentionWithRaces()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContentionWithRacesSloppy(self):
-    self._testTwoThreadsNoContentionWithRaces(sloppy=True)
-
-  def _testTwoThreadsNoContentionBlockLength(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def testTwoThreadsNoContentionBlockLength(self, sloppy):
     # num_threads > 1.
     # Explicit coordination should result in `Dataset.interleave()` behavior
     self.skipTest("b/131722904")
@@ -356,15 +333,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContentionBlockLength(self):
-    self._testTwoThreadsNoContentionBlockLength()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContentionBlockLengthSloppy(self):
-    self._testTwoThreadsNoContentionBlockLength(sloppy=True)
-
-  def _testTwoThreadsNoContentionWithRacesAndBlocking(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def testTwoThreadsNoContentionWithRacesAndBlocking(self, sloppy):
     """Tests where all the workers race in producing elements.
 
     Note: this is in contrast with the previous test which carefully sequences
@@ -406,15 +378,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContentionWithRacesAndBlocking(self):
-    self._testTwoThreadsNoContentionWithRacesAndBlocking()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testTwoThreadsNoContentionWithRacesAndBlockingSloppy(self):
-    self._testTwoThreadsNoContentionWithRacesAndBlocking(sloppy=True)
-
-  def _testEmptyInput(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def testEmptyInput(self, sloppy):
     # Empty input.
     self._clear_coordination_events()
     next_element = self.getNext(
@@ -428,15 +395,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testEmptyInput(self):
-    self._testEmptyInput()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testEmptyInputSloppy(self):
-    self._testEmptyInput(sloppy=True)
-
-  def _testNonEmptyInputIntoEmptyOutputs(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def _testNonEmptyInputIntoEmptyOutputs(self, sloppy):
     # Non-empty input leading to empty output.
     self._clear_coordination_events()
     next_element = self.getNext(
@@ -450,15 +412,12 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testNonEmptyInputIntoEmptyOutputs(self):
-    self._testNonEmptyInputIntoEmptyOutputs()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testNonEmptyInputIntoEmptyOutputsSloppy(self):
-    self._testNonEmptyInputIntoEmptyOutputs(sloppy=True)
-
-  def _testPartiallyEmptyOutputs(self, sloppy=False, prefetch_input_elements=1):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(
+              sloppy=[False, True], prefetch_input_elements=[1, 0])))
+  def testPartiallyEmptyOutputs(self, sloppy, prefetch_input_elements):
     race_indices = {2, 8, 14}  # Sequence points when sloppy mode has race conds
     # Mixture of non-empty and empty interleaved datasets.
     self.skipTest("b/131722904")
@@ -490,14 +449,6 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
           "At index %s: %s expected, got: %s" % (i, expected_element,
                                                  actual_element))
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testPartiallyEmptyOutputs(self):
-    self._testPartiallyEmptyOutputs()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testPartiallyEmptyOutputsSloppy(self):
-    self._testPartiallyEmptyOutputs(sloppy=True, prefetch_input_elements=0)
-
   @combinations.generate(test_base.default_test_combinations())
   def testDelayedOutputSloppy(self):
     # Explicitly control the sequence of events to ensure we correctly avoid
@@ -558,7 +509,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
-  def _testEarlyExit(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def testEarlyExit(self, sloppy):
     # Exiting without consuming all input should not block
     self.skipTest("b/131722904")
     self._clear_coordination_events()
@@ -582,15 +536,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.read_coordination_events[i].acquire()
       self.write_coordination_events[i].set()
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testEarlyExit(self):
-    self._testEarlyExit()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testEarlyExitSloppy(self):
-    self._testEarlyExit(sloppy=True)
-
-  def _testTooManyReaders(self, sloppy=False):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(sloppy=[False, True])))
+  def testTooManyReaders(self, sloppy=False):
 
     def interleave_fn(x):
       dataset = dataset_ops.Dataset.from_tensors(x)
@@ -611,14 +560,6 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
         [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 2)
     self.assertItemsEqual(output_values, expected_values)
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testTooManyReaders(self):
-    self._testTooManyReaders()
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testTooManyReadersSloppy(self):
-    self._testTooManyReaders(sloppy=True)
-
   @combinations.generate(test_base.default_test_combinations())
   def testSparse(self):
     def _map_fn(i):
diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
index 30496658529..789ba69010b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
@@ -34,8 +34,10 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import image_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import variables
@@ -62,6 +64,30 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output = [[k for k in range(i, i + 8)] for i in range(0, 1024, 8)]  # pylint: disable=g-complex-comprehension
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testCanHandleUnknownRank(self):
+    dataset = dataset_ops.Dataset.from_tensors("xxx")
+    # decode_image results in a tensor of completely unknown shape (i.e. unknown
+    # rank)
+    dataset = dataset.map(image_ops.decode_image)
+    self.assertEqual([tensor_shape.TensorShape(None)], _flat_shapes(dataset))
+    rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4)
+    # Note that we are just testing the dataset shapes, not the actual output.
+    self.assertEqual([tensor_shape.TensorShape(None)],
+                     _flat_shapes(rebatched_dataset))
+
+  @combinations.generate(test_base.default_test_combinations())
+  def testCanHandleUnknownDims(self):
+    dataset = dataset_ops.Dataset.range(1000)
+    dataset = dataset.batch(10, drop_remainder=False)
+    dataset = dataset.batch(10, drop_remainder=False)
+    self.assertEqual([[None, None]],
+                     [ts.as_list() for ts in _flat_shapes(dataset)])
+    rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4)
+    # Note that we are just testing the dataset shapes, not the actual output.
+    self.assertEqual([[None, None]],
+                     [ts.as_list() for ts in _flat_shapes(rebatched_dataset)])
+
   @combinations.generate(test_base.default_test_combinations())
   def testScalarInputError(self):
     dataset = dataset_ops.Dataset.range(1024)
diff --git a/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
index fb1d4ea5d3a..e9cefb2c616 100644
--- a/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/replicate_test.py b/tensorflow/python/data/experimental/kernel_tests/replicate_test.py
index 49b412fc4a4..1008f740cfe 100644
--- a/tensorflow/python/data/experimental/kernel_tests/replicate_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/replicate_test.py
@@ -23,8 +23,8 @@ from tensorflow.core.protobuf import cluster_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import tensorflow_server_pb2
 from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.compat import compat
 from tensorflow.python.data.experimental.ops import distribute
+from tensorflow.python.data.experimental.ops import distribute_options
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
@@ -89,50 +89,80 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase):
   @combinations.generate(
       combinations.combine(tf_api_version=[1], mode=["graph", "eager"]))
   def testExternalStatePolicyIgnore(self):
-    with compat.forward_compatibility_horizon(2019, 11, 30):
-      with ops.device(self._device0):
-        dataset0 = dataset_ops.Dataset.range(100).map(
-            lambda _: random_ops.random_uniform(  # pylint:disable=g-long-lambda
-                [],
-                minval=1,
-                maxval=10,
-                dtype=dtypes.float32))
-        opt = dataset_ops.Options()
-        opt.experimental_external_state_policy = (
-            dataset_ops.ExternalStatePolicy.IGNORE)
-        dataset0 = dataset0.with_options(opt)
-      replicated_ds = distribute.replicate(dataset0,
-                                           [self._device1, self._device2])
-      dataset1 = replicated_ds[self._device1]
-      dataset2 = replicated_ds[self._device2]
+    with ops.device(self._device0):
+      dataset0 = dataset_ops.Dataset.range(100).map(
+          lambda _: random_ops.random_uniform(  # pylint:disable=g-long-lambda
+              [],
+              minval=1,
+              maxval=10,
+              dtype=dtypes.float32))
+      opt = dataset_ops.Options()
+      opt.experimental_external_state_policy = (
+          distribute_options.ExternalStatePolicy.IGNORE)
+      dataset0 = dataset0.with_options(opt)
+    replicated_ds = distribute.replicate(dataset0,
+                                         [self._device1, self._device2])
+    dataset1 = replicated_ds[self._device1]
+    dataset2 = replicated_ds[self._device2]
 
-      with ops.device(self._device0):
-        get_next0 = self.getNext(dataset0)
-      with ops.device(self._device1):
-        get_next1 = self.getNext(dataset1)
-      with ops.device(self._device2):
-        get_next2 = self.getNext(dataset2)
+    with ops.device(self._device0):
+      get_next0 = self.getNext(dataset0)
+    with ops.device(self._device1):
+      get_next1 = self.getNext(dataset1)
+    with ops.device(self._device2):
+      get_next2 = self.getNext(dataset2)
 
-      for _ in range(100):
-        self.evaluate(get_next0())
-        self.evaluate(get_next1())
-        self.evaluate(get_next2())
+    for _ in range(100):
+      self.evaluate(get_next0())
+      self.evaluate(get_next1())
+      self.evaluate(get_next2())
 
   @combinations.generate(
       combinations.combine(tf_api_version=[1], mode=["graph", "eager"]))
   def testExternalStatePolicyWarn(self):
-    with compat.forward_compatibility_horizon(2019, 11, 30):
-      with ops.device(self._device0):
-        dataset0 = dataset_ops.Dataset.range(100).map(
-            lambda _: random_ops.random_uniform(  # pylint:disable=g-long-lambda
-                [],
-                minval=1,
-                maxval=10,
-                dtype=dtypes.float32))
-        opt = dataset_ops.Options()
-        opt.experimental_external_state_policy = (
-            dataset_ops.ExternalStatePolicy.WARN)
-        dataset0 = dataset0.with_options(opt)
+    with ops.device(self._device0):
+      dataset0 = dataset_ops.Dataset.range(100).map(
+          lambda _: random_ops.random_uniform(  # pylint:disable=g-long-lambda
+              [],
+              minval=1,
+              maxval=10,
+              dtype=dtypes.float32))
+      opt = dataset_ops.Options()
+      opt.experimental_external_state_policy = (
+          distribute_options.ExternalStatePolicy.WARN)
+      dataset0 = dataset0.with_options(opt)
+    replicated_ds = distribute.replicate(dataset0,
+                                         [self._device1, self._device2])
+    dataset1 = replicated_ds[self._device1]
+    dataset2 = replicated_ds[self._device2]
+
+    with ops.device(self._device0):
+      get_next0 = self.getNext(dataset0)
+    with ops.device(self._device1):
+      get_next1 = self.getNext(dataset1)
+    with ops.device(self._device2):
+      get_next2 = self.getNext(dataset2)
+
+    for _ in range(100):
+      self.evaluate(get_next0())
+      self.evaluate(get_next1())
+      self.evaluate(get_next2())
+
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph", "eager"]))
+  def testExternalStatePolicyFail(self):
+    with ops.device(self._device0):
+      dataset0 = dataset_ops.Dataset.range(100).map(
+          lambda _: random_ops.random_uniform(  # pylint:disable=g-long-lambda
+              [],
+              minval=1,
+              maxval=10,
+              dtype=dtypes.float32))
+      opt = dataset_ops.Options()
+      opt.experimental_external_state_policy = (
+          distribute_options.ExternalStatePolicy.FAIL)
+      dataset0 = dataset0.with_options(opt)
+    with self.assertRaises(errors.FailedPreconditionError):
       replicated_ds = distribute.replicate(dataset0,
                                            [self._device1, self._device2])
       dataset1 = replicated_ds[self._device1]
@@ -150,39 +180,6 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase):
         self.evaluate(get_next1())
         self.evaluate(get_next2())
 
-  @combinations.generate(
-      combinations.combine(tf_api_version=[1], mode=["graph", "eager"]))
-  def testExternalStatePolicyFail(self):
-    with compat.forward_compatibility_horizon(2019, 11, 30):
-      with ops.device(self._device0):
-        dataset0 = dataset_ops.Dataset.range(100).map(
-            lambda _: random_ops.random_uniform(  # pylint:disable=g-long-lambda
-                [],
-                minval=1,
-                maxval=10,
-                dtype=dtypes.float32))
-        opt = dataset_ops.Options()
-        opt.experimental_external_state_policy = (
-            dataset_ops.ExternalStatePolicy.FAIL)
-        dataset0 = dataset0.with_options(opt)
-      with self.assertRaises(errors.FailedPreconditionError):
-        replicated_ds = distribute.replicate(dataset0,
-                                             [self._device1, self._device2])
-        dataset1 = replicated_ds[self._device1]
-        dataset2 = replicated_ds[self._device2]
-
-        with ops.device(self._device0):
-          get_next0 = self.getNext(dataset0)
-        with ops.device(self._device1):
-          get_next1 = self.getNext(dataset1)
-        with ops.device(self._device2):
-          get_next2 = self.getNext(dataset2)
-
-        for _ in range(100):
-          self.evaluate(get_next0())
-          self.evaluate(get_next1())
-          self.evaluate(get_next2())
-
 
 JOB_NAME = "remote_device"
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/snapshot_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/snapshot_dataset_serialization_test.py
index c1d55dab54f..6bdb6e089e2 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/snapshot_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/snapshot_dataset_serialization_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
diff --git a/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py b/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py
index 5f3f1a66120..55f730b4e2a 100644
--- a/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py
@@ -18,7 +18,9 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import shutil
 import time
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
@@ -27,6 +29,7 @@ from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.framework import combinations
+from tensorflow.python.framework import errors
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
@@ -164,6 +167,93 @@ class SnapshotDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase,
 
     self.assertSnapshotDirectoryContains(tmpdir, 1, 1, 1)
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testSpecifySnapshotNameWriteAndRead(self):
+    tmpdir = self.makeSnapshotDirectory()
+
+    dataset = dataset_ops.Dataset.range(10)
+    dataset = dataset.apply(
+        snapshot.snapshot(tmpdir, snapshot_name="my_custom_snapshot"))
+    dataset = dataset.repeat(10)
+    self.assertDatasetProduces(dataset, list(range(10)) * 10)
+
+    self.assertSnapshotDirectoryContains(tmpdir, 1, 1, 1)
+    self.assertTrue(
+        os.path.exists(os.path.join(tmpdir, "custom-my_custom_snapshot")))
+    self.assertTrue(
+        os.path.exists(
+            os.path.join(tmpdir, "custom-my_custom_snapshot", "custom")))
+
+  @combinations.generate(test_base.default_test_combinations())
+  def testForcePassthroughMode(self):
+    tmpdir = self.makeSnapshotDirectory()
+
+    dataset = dataset_ops.Dataset.range(10)
+    dataset = dataset.apply(snapshot.snapshot(tmpdir, mode="passthrough"))
+    dataset = dataset.repeat(10)
+    self.assertDatasetProduces(dataset, list(range(10)) * 10)
+
+    self.assertSnapshotDirectoryContains(tmpdir, 0, 0, 0)
+
+  @combinations.generate(test_base.default_test_combinations())
+  def testForceWriteMode(self):
+    tmpdir = self.makeSnapshotDirectory()
+
+    dataset = dataset_ops.Dataset.range(10)
+    dataset = dataset.apply(snapshot.snapshot(tmpdir, mode="write"))
+    dataset = dataset.repeat(10)
+    self.assertDatasetProduces(dataset, list(range(10)) * 10)
+
+    # We will end up writing 10 different runs.
+    self.assertSnapshotDirectoryContains(tmpdir, 1, 10, 1)
+
+  @combinations.generate(test_base.default_test_combinations())
+  def testForceReadMode(self):
+    tmpdir = self.makeSnapshotDirectory()
+
+    # We write a copy of the snapshot first.
+    dataset = dataset_ops.Dataset.range(10)
+    dataset = dataset.apply(
+        snapshot.snapshot(
+            tmpdir, mode="write", snapshot_name="my_custom_snapshot"))
+    self.assertDatasetProduces(dataset, list(range(10)))
+
+    # We move the run to a new name.
+    shutil.move(
+        os.path.join(tmpdir, "custom-my_custom_snapshot"),
+        os.path.join(tmpdir, "custom-my_custom_snapshot_2"))
+
+    # Even though the snapshot.metadata is pointing to the old run that no
+    # longer exists after we moved, we force it to read from the run we specify.
+    dataset = dataset_ops.Dataset.range(10)
+    dataset = dataset.apply(
+        snapshot.snapshot(
+            tmpdir, mode="read", snapshot_name="my_custom_snapshot_2"))
+    self.assertDatasetProduces(dataset, list(range(10)))
+
+    # We should still have one snapshot and one run.
+    self.assertSnapshotDirectoryContains(tmpdir, 1, 1, 1)
+
+  @combinations.generate(test_base.default_test_combinations())
+  def testForceReadNonexistentSnapshot(self):
+    tmpdir = self.makeSnapshotDirectory()
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaises(errors.NotFoundError):
+      dataset = dataset.apply(snapshot.snapshot(tmpdir, mode="read"))
+      get_next = self.getNext(dataset)
+      self.evaluate(get_next())
+
+  @combinations.generate(test_base.default_test_combinations())
+  def testForceReadNonexistentNamedSnapshot(self):
+    tmpdir = self.makeSnapshotDirectory()
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaises(errors.NotFoundError):
+      dataset = dataset.apply(
+          snapshot.snapshot(
+              tmpdir, mode="read", snapshot_name="my_nonexistent_snapshot"))
+      get_next = self.getNext(dataset)
+      self.evaluate(get_next())
+
   @combinations.generate(
       combinations.times(
           test_base.default_test_combinations(),
@@ -231,6 +321,41 @@ class SnapshotDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase,
     dataset3 = dataset3.apply(snapshot.snapshot(tmpdir, shuffle_on_read=True))
     self.assertDatasetProduces(dataset3, expected, assert_items_equal=True)
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testReadShuffledSnapshotWithSeedAfterWrite(self):
+    self.setUpTFRecord(num_files=10, num_records=50)
+    filenames = self.test_filenames
+
+    expected = [
+        b"Record %d of file %d" % (r, f)  # pylint:disable=g-complex-comprehension
+        for f in range(0, 10)
+        for r in range(0, 50)
+    ]
+
+    tmpdir = self.makeSnapshotDirectory()
+    dataset = core_readers._TFRecordDataset(filenames)
+    dataset = dataset.apply(snapshot.snapshot(tmpdir, shard_size_bytes=10))
+    self.assertDatasetProduces(dataset, expected)
+
+    # remove the original files and try to read the data back only from snapshot
+    self.removeTFRecords()
+
+    dataset2 = core_readers._TFRecordDataset(filenames)
+    dataset2 = dataset2.apply(
+        snapshot.snapshot(tmpdir, shuffle_on_read=True, shuffle_seed=123456))
+    next2 = self.getNext(dataset2)
+
+    dataset3 = core_readers._TFRecordDataset(filenames)
+    dataset3 = dataset3.apply(
+        snapshot.snapshot(tmpdir, shuffle_on_read=True, shuffle_seed=123456))
+    next3 = self.getNext(dataset3)
+
+    # make sure that the items are read back in the same order for both datasets
+    for _ in range(500):
+      res2 = self.evaluate(next2())
+      res3 = self.evaluate(next3())
+      self.assertEqual(res2, res3)
+
   @combinations.generate(test_base.default_test_combinations())
   def testReadSnapshotParallelAfterWrite(self):
     self.setUpTFRecord(10, 4000)
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
index 23647eceda9..bf70b698832 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import os
 import re
+
 import numpy as np
 
 from tensorflow.core.framework import summary_pb2
diff --git a/tensorflow/python/data/experimental/ops/distribute.py b/tensorflow/python/data/experimental/ops/distribute.py
index 24130db1ad3..3b9bc5be1fe 100644
--- a/tensorflow/python/data/experimental/ops/distribute.py
+++ b/tensorflow/python/data/experimental/ops/distribute.py
@@ -87,6 +87,10 @@ class _RebatchDataset(dataset_ops.UnaryDataset):
 
     def recalculate_batch_size(output_shapes):
       """Recalculates the output_shapes after dividing it by num_replicas."""
+      # If the output shape is unknown, we set the batch dimension to unknown.
+      if output_shapes.rank is None:
+        return None
+
       if len(output_shapes) < 1:
         raise ValueError(
             "Input shape should have at least one dimension. "
diff --git a/tensorflow/python/data/experimental/ops/distribute_options.py b/tensorflow/python/data/experimental/ops/distribute_options.py
index a41f713b7b4..9a4c42d2e5b 100644
--- a/tensorflow/python/data/experimental/ops/distribute_options.py
+++ b/tensorflow/python/data/experimental/ops/distribute_options.py
@@ -36,6 +36,12 @@ class AutoShardPolicy(enum.IntEnum):
   DATA = 2
 
 
+class ExternalStatePolicy(enum.Enum):
+  WARN = 0
+  IGNORE = 1
+  FAIL = 2
+
+
 @tf_export("data.experimental.DistributeOptions")
 class DistributeOptions(options.OptionsBase):
   """Represents options for distributed data processing.
diff --git a/tensorflow/python/data/experimental/ops/iterator_ops.py b/tensorflow/python/data/experimental/ops/iterator_ops.py
index 3324612a8b4..e8b72bc25bb 100644
--- a/tensorflow/python/data/experimental/ops/iterator_ops.py
+++ b/tensorflow/python/data/experimental/ops/iterator_ops.py
@@ -17,9 +17,10 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import distribute_options
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.training import basic_session_run_hooks
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import saver as saver_lib
@@ -27,18 +28,39 @@ from tensorflow.python.training import session_run_hook
 from tensorflow.python.util.tf_export import tf_export
 
 
+def _convert_external_state_policy_to_enum(external_state_policy):
+  if external_state_policy == "warn":
+    return distribute_options.ExternalStatePolicy.WARN
+  if external_state_policy == "ignore":
+    return distribute_options.ExternalStatePolicy.IGNORE
+  if external_state_policy == "fail":
+    return distribute_options.ExternalStatePolicy.FAIL
+  raise ValueError(
+      "Failed to convert {} to an instance of ExternalStatePolicy."
+      "Supported values include: 'warn', 'ignore' and 'fail'".format(
+          external_state_policy))
+
+
 @tf_export("data.experimental.make_saveable_from_iterator")
-def make_saveable_from_iterator(iterator):
+def make_saveable_from_iterator(iterator, external_state_policy="fail"):
   """Returns a SaveableObject for saving/restoring iterator state using Saver.
 
   Args:
     iterator: Iterator.
+    external_state_policy: A string that identifies how to handle input
+      pipelines that depend on external state. Possible values are
+      'ignore': The external state is silently ignored.
+      'warn': The external state is ignored, logging a warning.
+      'fail': The operation fails upon encountering external state.
+      By default we set it to 'fail'.
 
   Returns:
     A SaveableObject for saving/restoring iterator state using Saver.
 
   Raises:
     ValueError: If iterator does not support checkpointing.
+    ValueError: If `external_state_policy` is not one of 'warn', 'ignore' or
+      'fail'.
 
   For example:
 
@@ -69,24 +91,11 @@ def make_saveable_from_iterator(iterator):
   Note: Not all iterators support checkpointing yet. Attempting to save the
   state of an unsupported iterator will throw an error.
   """
-  return _Saveable(iterator._iterator_resource)  # pylint: disable=protected-access
-
-
-class _Saveable(saver_lib.BaseSaverBuilder.SaveableObject):
-  """SaveableObject for saving/restoring iterator state."""
-
-  def __init__(self, iterator_resource):
-    serialized_iterator = gen_dataset_ops.serialize_iterator(iterator_resource)
-    specs = [
-        saver_lib.BaseSaverBuilder.SaveSpec(serialized_iterator, "",
-                                            iterator_resource.name + "-state")
-    ]
-    super(_Saveable, self).__init__(iterator_resource, specs,
-                                    iterator_resource.name)
-
-  def restore(self, restored_tensors, unused_restored_shapes):
-    with ops.colocate_with(self.op):
-      return gen_dataset_ops.deserialize_iterator(self.op, restored_tensors[0])
+  policy_enum = _convert_external_state_policy_to_enum(external_state_policy)
+  return iterator_ops._IteratorSaveable(  # pylint: disable=protected-access
+      iterator._iterator_resource,  # pylint: disable=protected-access
+      iterator._iterator_resource.name,  # pylint: disable=protected-access
+      external_state_policy=policy_enum)
 
 
 @tf_export("data.experimental.CheckpointInputPipelineHook")
@@ -135,16 +144,45 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
   collector when building the eval graph.
   """
 
-  def __init__(self, estimator):
+  def __init__(self, estimator, external_state_policy="fail"):
     """Initializes a `CheckpointInputPipelineHook`.
 
+    If the input pipeline depends on external state (e.g. seeds for
+    RandomUniform) beyond the input pipeline, this hook would be unable to
+    serialize and deserialize that state. If its acceptable to ignore that state
+    change the external_state_policy argument to 'warn' or 'ignore'. For e.g.
+
+    ```python
+    est = tf.estimator.Estimator(model_fn)
+    while True:
+      est.train(
+          train_input_fn,
+          hooks=[tf.data.experimental.CheckpointInputPipelineHook(
+              est, external_state_policy='warn')],
+          steps=train_steps_per_eval)
+      # Note: We do not pass the hook here.
+      metrics = est.evaluate(eval_input_fn)
+      if should_stop_the_training(metrics):
+        break
+    ```
+
     Args:
       estimator: Estimator.
+      external_state_policy: A string that identifies how to handle input
+        pipelines that depend on external state. Possible values are
+        'ignore': The external state is silently ignored.
+        'warn': The external state is ignored, logging a warning.
+        'fail': The operation fails upon encountering external state.
+        By default we set it to 'fail'.
 
     Raises:
       ValueError: One of `save_steps` or `save_secs` should be set.
       ValueError: At most one of saver or scaffold should be set.
+      ValueError: If `external_state_policy` is not one of 'warn', 'ignore' or
+        'fail'.
     """
+    self._external_state_policy = _convert_external_state_policy_to_enum(
+        external_state_policy)
     # `checkpoint_basename` is "input.ckpt" for non-distributed pipelines or
     # of the form "input_<task_type>_<task_id>.ckpt" for distributed pipelines.
     # Note: The default `checkpoint_basename` used by `CheckpointSaverHook` is
@@ -189,9 +227,13 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
     if (self._checkpoint_saver_hook._saver is None and
         self._checkpoint_saver_hook._scaffold is None):
       iterators = ops.get_collection(iterator_ops.GLOBAL_ITERATORS)
-      saveables = [_Saveable(i) for i in iterators]
-      self._checkpoint_saver_hook._saver = _CustomSaver(saveables,
-                                                        self._latest_filename)
+      saveables = [
+          iterator_ops._IteratorSaveable(
+              i, i.name, external_state_policy=self._external_state_policy)
+          for i in iterators
+      ]
+      self._checkpoint_saver_hook._saver = _CustomSaver(
+          saveables, self._latest_filename, sharded=True)
     # pylint: enable=protected-access
     self._checkpoint_saver_hook.begin()
 
@@ -255,8 +297,8 @@ class _CustomSaver(saver_lib.Saver):
   the model ckpt saved by the `CheckpointSaverHook`.
   """
 
-  def __init__(self, var_list, latest_filename):
-    super(_CustomSaver, self).__init__(var_list)
+  def __init__(self, var_list, latest_filename, sharded=False):
+    super(_CustomSaver, self).__init__(var_list, sharded=sharded)
     self._latest_filename = latest_filename
 
   def save(self,
diff --git a/tensorflow/python/data/experimental/ops/scan_ops.py b/tensorflow/python/data/experimental/ops/scan_ops.py
index 257437f7c2d..2572f643193 100644
--- a/tensorflow/python/data/experimental/ops/scan_ops.py
+++ b/tensorflow/python/data/experimental/ops/scan_ops.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.compat import compat
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import structure
@@ -125,8 +124,7 @@ class _ScanDataset(dataset_ops.UnaryDataset):
     self._scan_func = wrapped_func
     self._scan_func.function.add_to_graph(ops.get_default_graph())
     # pylint: disable=protected-access
-    if compat.forward_compatible(2019, 10,
-                                 15) or use_default_device is not None:
+    if use_default_device is not None:
       variant_tensor = gen_experimental_dataset_ops.scan_dataset(
           self._input_dataset._variant_tensor,
           structure.to_tensor_list(self._state_structure, self._initial_state),
diff --git a/tensorflow/python/data/experimental/ops/snapshot.py b/tensorflow/python/data/experimental/ops/snapshot.py
index 3b6babd44e4..9bba2757dd7 100644
--- a/tensorflow/python/data/experimental/ops/snapshot.py
+++ b/tensorflow/python/data/experimental/ops/snapshot.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.compat import compat
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -45,7 +46,9 @@ class _SnapshotDataset(dataset_ops.UnaryUnchangedStructureDataset):
                num_writer_threads=None,
                writer_buffer_size=None,
                shuffle_on_read=None,
-               seed=None):
+               shuffle_seed=None,
+               mode=None,
+               snapshot_name=None):
 
     self._compression = compression if compression is not None else ""
     self._reader_path_prefix = (
@@ -67,28 +70,51 @@ class _SnapshotDataset(dataset_ops.UnaryUnchangedStructureDataset):
         writer_buffer_size if writer_buffer_size is not None else -1)
     self._shuffle_on_read = (
         shuffle_on_read if shuffle_on_read is not None else False)
+    self._mode = (mode if mode is not None else "auto")
+    self._snapshot_name = (snapshot_name if snapshot_name is not None else "")
 
-    self._seed, self._seed2 = random_seed.get_seed(seed)
+    self._seed, self._seed2 = random_seed.get_seed(shuffle_seed)
 
     self._input_dataset = input_dataset
     self._path = ops.convert_to_tensor(path, dtype=dtypes.string, name="path")
 
-    variant_tensor = ged_ops.snapshot_dataset(
-        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
-        path=self._path,
-        compression=self._compression,
-        reader_path_prefix=self._reader_path_prefix,
-        writer_path_prefix=self._writer_path_prefix,
-        shard_size_bytes=self._shard_size_bytes,
-        pending_snapshot_expiry_seconds=self._pending_snapshot_expiry_seconds,
-        num_reader_threads=self._num_reader_threads,
-        reader_buffer_size=self._reader_buffer_size,
-        num_writer_threads=self._num_writer_threads,
-        writer_buffer_size=self._writer_buffer_size,
-        shuffle_on_read=self._shuffle_on_read,
-        seed=self._seed,
-        seed2=self._seed2,
-        **self._flat_structure)
+    if compat.forward_compatible(2020, 1, 10) or mode or snapshot_name:
+      variant_tensor = ged_ops.snapshot_dataset(
+          self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+          path=self._path,
+          compression=self._compression,
+          reader_path_prefix=self._reader_path_prefix,
+          writer_path_prefix=self._writer_path_prefix,
+          shard_size_bytes=self._shard_size_bytes,
+          pending_snapshot_expiry_seconds=self._pending_snapshot_expiry_seconds,
+          num_reader_threads=self._num_reader_threads,
+          reader_buffer_size=self._reader_buffer_size,
+          num_writer_threads=self._num_writer_threads,
+          writer_buffer_size=self._writer_buffer_size,
+          shuffle_on_read=self._shuffle_on_read,
+          seed=self._seed,
+          seed2=self._seed2,
+          mode=self._mode,
+          snapshot_name=self._snapshot_name,
+          **self._flat_structure)
+    else:
+      variant_tensor = ged_ops.snapshot_dataset(
+          self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+          path=self._path,
+          compression=self._compression,
+          reader_path_prefix=self._reader_path_prefix,
+          writer_path_prefix=self._writer_path_prefix,
+          shard_size_bytes=self._shard_size_bytes,
+          pending_snapshot_expiry_seconds=self._pending_snapshot_expiry_seconds,
+          num_reader_threads=self._num_reader_threads,
+          reader_buffer_size=self._reader_buffer_size,
+          num_writer_threads=self._num_writer_threads,
+          writer_buffer_size=self._writer_buffer_size,
+          shuffle_on_read=self._shuffle_on_read,
+          seed=self._seed,
+          seed2=self._seed2,
+          **self._flat_structure)
+
     super(_SnapshotDataset, self).__init__(input_dataset, variant_tensor)
 
 
@@ -103,7 +129,9 @@ def snapshot(path,
              num_writer_threads=None,
              writer_buffer_size=None,
              shuffle_on_read=None,
-             seed=None):
+             shuffle_seed=None,
+             mode=None,
+             snapshot_name=None):
   """Writes to/reads from a snapshot of a dataset.
 
   This function attempts to determine whether a valid snapshot exists at the
@@ -122,39 +150,58 @@ def snapshot(path,
       Defaults to None.
     shard_size_bytes: The size of each shard to be written by the snapshot
       dataset op. Defaults to 10 GiB.
-    pending_snapshot_expiry_seconds: How long to wait (in seconds) before
-      the snapshot op considers a previously unfinished snapshot to be stale.
+    pending_snapshot_expiry_seconds: How long to wait (in seconds) before the
+      snapshot op considers a previously unfinished snapshot to be stale.
     num_reader_threads: Number of threads to parallelize reading from snapshot.
       Especially useful if compression is turned on since the decompression
       operation tends to be intensive. Defaults to 1. If > 1, then this might
-      introduce non-determinism i.e. the order in which the elements are
-      read from the snapshot are different from the order they're written.
+      introduce non-determinism i.e. the order in which the elements are read
+      from the snapshot are different from the order they're written.
     reader_buffer_size: Maximum number of elements we can prefetch reading from
-      the snapshot. Defaults to 1. Increasing this might improve performance
-      but will increase memory consumption.
+      the snapshot. Defaults to 1. Increasing this might improve performance but
+      will increase memory consumption.
     num_writer_threads: Number of threads to parallelize writing from snapshot.
       We'll open up `num_writer_threads` files and write to them in parallel.
       Especially useful if compression is turned on since the compression
       operation tends to be intensive. Defaults to 1. If > 1, then this might
-      introduce non-determinism i.e. the order in which the elements are
-      read from the upstream iterator are different from the order they're
-      written.
+      introduce non-determinism i.e. the order in which the elements are read
+      from the upstream iterator are different from the order they're written.
     writer_buffer_size: Maximum number of pipeline elements to fill up the
       buffer before writing them out using `num_writer_threads`.
     shuffle_on_read: If this is True, then the order in which examples are
       produced when reading from a snapshot will be random. Defaults to False.
-    seed: If seed is set, the random number generator is seeded by the given
-      seed. Otherwise, it is seeded by a random seed.
+    shuffle_seed: Optional. If shuffle_seed is set, the random number generator
+      used for shuffling (when shuffle_on_read is turned on) is seeded by the
+      given seed. Otherwise, it is seeded by a random seed that differs for
+      every run.
+    mode: The mode at which snapshot should operate. Valid options are "auto",
+      "read", "write", and "passthrough". The default mode is "auto", where the
+      snapshot op will automatically determine what mode to operate in.
+    snapshot_name: If set, use the supplied string as a named snapshot name
+      instead of introspecting the data pipeline and automatically generating a
+      unique identifier for the snapshot.
+
   Returns:
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
 
   def _apply_fn(dataset):
-    return _SnapshotDataset(dataset, path, compression, reader_path_prefix,
-                            writer_path_prefix, shard_size_bytes,
-                            pending_snapshot_expiry_seconds, num_reader_threads,
-                            reader_buffer_size, num_writer_threads,
-                            writer_buffer_size, shuffle_on_read, seed)
+    return _SnapshotDataset(
+        input_dataset=dataset,
+        path=path,
+        compression=compression,
+        reader_path_prefix=reader_path_prefix,
+        writer_path_prefix=writer_path_prefix,
+        shard_size_bytes=shard_size_bytes,
+        pending_snapshot_expiry_seconds=pending_snapshot_expiry_seconds,
+        num_reader_threads=num_reader_threads,
+        reader_buffer_size=reader_buffer_size,
+        num_writer_threads=num_writer_threads,
+        writer_buffer_size=writer_buffer_size,
+        shuffle_on_read=shuffle_on_read,
+        shuffle_seed=shuffle_seed,
+        mode=mode,
+        snapshot_name=snapshot_name)
 
   return _apply_fn
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 3f6906e8fd4..ecaaecd8bc5 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -83,9 +83,6 @@ tf_py_test(
     name = "dataset_test",
     size = "small",
     srcs = ["dataset_test.py"],
-    tags = [
-        "no_rocm",
-    ],
     deps = [
         ":test_base",
         "//tensorflow/core:protos_all_py",
diff --git a/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py b/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py
index b704906a3ae..ea80389b0a5 100644
--- a/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/data/kernel_tests/dataset_test.py b/tensorflow/python/data/kernel_tests/dataset_test.py
index df151a85db0..784350c7c59 100644
--- a/tensorflow/python/data/kernel_tests/dataset_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_test.py
@@ -24,6 +24,7 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.core.framework import graph_pb2
+from tensorflow.python.data.experimental.ops import distribute_options
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import optional_ops
@@ -52,14 +53,15 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.range(10)
     graph = graph_pb2.GraphDef().FromString(
         self.evaluate(dataset._as_serialized_graph()))
-    self.assertTrue(any([node.op == "RangeDataset" for node in graph.node]))
+    self.assertTrue(any(node.op == "RangeDataset" for node in graph.node))
 
   def testAsSerializedGraphStateful(self):
     dataset = dataset_ops.Dataset.range(10).map(
         lambda _: random_ops.random_uniform(()))
     with self.assertRaises(errors.FailedPreconditionError):
-      self.evaluate(dataset._as_serialized_graph(
-          external_state_policy=dataset_ops.ExternalStatePolicy.FAIL))
+      self.evaluate(
+          dataset._as_serialized_graph(external_state_policy=distribute_options
+                                       .ExternalStatePolicy.FAIL))
 
   @combinations.generate(test_base.default_test_combinations())
   def testAsFunctionWithMap(self):
diff --git a/tensorflow/python/data/kernel_tests/fixed_length_record_dataset_test.py b/tensorflow/python/data/kernel_tests/fixed_length_record_dataset_test.py
index a4e34a5bb18..9caf1177ae9 100644
--- a/tensorflow/python/data/kernel_tests/fixed_length_record_dataset_test.py
+++ b/tensorflow/python/data/kernel_tests/fixed_length_record_dataset_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import gzip
 import os
 import zlib
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.kernel_tests import test_base
diff --git a/tensorflow/python/data/kernel_tests/iterator_test.py b/tensorflow/python/data/kernel_tests/iterator_test.py
index fcb2e4c0b1f..fdba0b6aa0a 100644
--- a/tensorflow/python/data/kernel_tests/iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/iterator_test.py
@@ -25,7 +25,6 @@ import numpy as np
 from tensorflow.core.protobuf import cluster_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
-from tensorflow.python.compat import compat as forward_compat
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
@@ -464,69 +463,68 @@ class IteratorTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @combinations.generate(test_base.graph_only_combinations())
   def testIteratorStringHandleFuture(self):
-    with forward_compat.forward_compatibility_horizon(2018, 8, 4):
-      dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
-      dataset_4 = dataset_ops.Dataset.from_tensor_slices([10, 20, 30, 40])
+    dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
+    dataset_4 = dataset_ops.Dataset.from_tensor_slices([10, 20, 30, 40])
 
-      iterator_3 = dataset_ops.make_one_shot_iterator(dataset_3)
-      iterator_4 = dataset_ops.make_one_shot_iterator(dataset_4)
+    iterator_3 = dataset_ops.make_one_shot_iterator(dataset_3)
+    iterator_4 = dataset_ops.make_one_shot_iterator(dataset_4)
 
-      handle_placeholder = array_ops.placeholder(dtypes.string, shape=[])
-      feedable_iterator = iterator_ops.Iterator.from_string_handle(
-          handle_placeholder, dataset_ops.get_legacy_output_types(dataset_3),
-          dataset_ops.get_legacy_output_shapes(dataset_3))
-      next_element = feedable_iterator.get_next()
+    handle_placeholder = array_ops.placeholder(dtypes.string, shape=[])
+    feedable_iterator = iterator_ops.Iterator.from_string_handle(
+        handle_placeholder, dataset_ops.get_legacy_output_types(dataset_3),
+        dataset_ops.get_legacy_output_shapes(dataset_3))
+    next_element = feedable_iterator.get_next()
 
-      self.assertTrue(
-          structure.are_compatible(
-              dataset_ops.get_structure(dataset_3),
-              dataset_ops.get_structure(feedable_iterator)))
+    self.assertTrue(
+        structure.are_compatible(
+            dataset_ops.get_structure(dataset_3),
+            dataset_ops.get_structure(feedable_iterator)))
 
-      with self.cached_session() as sess:
-        iterator_3_handle = sess.run(iterator_3.string_handle())
-        iterator_4_handle = sess.run(iterator_4.string_handle())
+    with self.cached_session() as sess:
+      iterator_3_handle = sess.run(iterator_3.string_handle())
+      iterator_4_handle = sess.run(iterator_4.string_handle())
 
-        self.assertEqual(
-            10,
-            sess.run(
-                next_element,
-                feed_dict={handle_placeholder: iterator_4_handle}))
-        self.assertEqual(
-            1,
-            sess.run(
-                next_element,
-                feed_dict={handle_placeholder: iterator_3_handle}))
-        self.assertEqual(
-            20,
-            sess.run(
-                next_element,
-                feed_dict={handle_placeholder: iterator_4_handle}))
-        self.assertEqual(
-            2,
-            sess.run(
-                next_element,
-                feed_dict={handle_placeholder: iterator_3_handle}))
-        self.assertEqual(
-            30,
-            sess.run(
-                next_element,
-                feed_dict={handle_placeholder: iterator_4_handle}))
-        self.assertEqual(
-            3,
-            sess.run(
-                next_element,
-                feed_dict={handle_placeholder: iterator_3_handle}))
-        self.assertEqual(
-            40,
-            sess.run(
-                next_element,
-                feed_dict={handle_placeholder: iterator_4_handle}))
-        with self.assertRaises(errors.OutOfRangeError):
+      self.assertEqual(
+          10,
           sess.run(
-              next_element, feed_dict={handle_placeholder: iterator_3_handle})
-        with self.assertRaises(errors.OutOfRangeError):
+              next_element,
+              feed_dict={handle_placeholder: iterator_4_handle}))
+      self.assertEqual(
+          1,
           sess.run(
-              next_element, feed_dict={handle_placeholder: iterator_4_handle})
+              next_element,
+              feed_dict={handle_placeholder: iterator_3_handle}))
+      self.assertEqual(
+          20,
+          sess.run(
+              next_element,
+              feed_dict={handle_placeholder: iterator_4_handle}))
+      self.assertEqual(
+          2,
+          sess.run(
+              next_element,
+              feed_dict={handle_placeholder: iterator_3_handle}))
+      self.assertEqual(
+          30,
+          sess.run(
+              next_element,
+              feed_dict={handle_placeholder: iterator_4_handle}))
+      self.assertEqual(
+          3,
+          sess.run(
+              next_element,
+              feed_dict={handle_placeholder: iterator_3_handle}))
+      self.assertEqual(
+          40,
+          sess.run(
+              next_element,
+              feed_dict={handle_placeholder: iterator_4_handle}))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(
+            next_element, feed_dict={handle_placeholder: iterator_3_handle})
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(
+            next_element, feed_dict={handle_placeholder: iterator_4_handle})
 
   @combinations.generate(test_base.graph_only_combinations())
   def testIteratorStringHandleReuseTensorObject(self):
diff --git a/tensorflow/python/data/kernel_tests/memory_cleanup_test.py b/tensorflow/python/data/kernel_tests/memory_cleanup_test.py
index 5b0ea02a054..4917d6ec163 100644
--- a/tensorflow/python/data/kernel_tests/memory_cleanup_test.py
+++ b/tensorflow/python/data/kernel_tests/memory_cleanup_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import gc
 import time
+
 from absl.testing import parameterized
 import six
 
diff --git a/tensorflow/python/data/kernel_tests/padded_batch_test.py b/tensorflow/python/data/kernel_tests/padded_batch_test.py
index a3b8f3945f3..6e151af2063 100644
--- a/tensorflow/python/data/kernel_tests/padded_batch_test.py
+++ b/tensorflow/python/data/kernel_tests/padded_batch_test.py
@@ -99,18 +99,26 @@ class PaddedBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
                 batch_size=4, padded_shapes=[-1]))
     self.assertDatasetProduces(dataset, expected_output=[[[], [], [], []]])
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testPaddedBatchDatasetNonDefaultPadding(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(
+              padding_values=[(-1, '<end>', {'structure': ''}),
+                              (-1, '<end>', None)])))
+  def testPaddedBatchDatasetNonDefaultPadding(self, padding_values):
 
     def fill_tuple(x):
       filled = array_ops.fill([x], x)
-      return (filled, string_ops.as_string(filled))
+      return (filled, string_ops.as_string(filled), {
+          'structure': string_ops.as_string(filled)
+      })
 
     random_seq_lens = np.random.randint(20, size=(32,)).astype(np.int32)
     dataset = (
         dataset_ops.Dataset.from_tensor_slices(random_seq_lens).map(fill_tuple)
         .padded_batch(
-            4, padded_shapes=([-1], [-1]), padding_values=(-1, '<end>')))
+            4, padded_shapes=([-1], [-1], {'structure': [-1]}),
+            padding_values=padding_values))
 
     get_next = self.getNext(dataset)
     for i in range(8):
@@ -118,6 +126,7 @@ class PaddedBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       padded_len = np.max(result[0])
       self.assertEqual((4, padded_len), result[0].shape)
       self.assertEqual((4, padded_len), result[1].shape)
+      self.assertEqual((4, padded_len), result[2]['structure'].shape)
       for j in range(4):
         seq_len = random_seq_lens[(i * 4) + j]
         self.assertAllEqual(result[0][j, :seq_len], [seq_len] * seq_len)
@@ -127,6 +136,10 @@ class PaddedBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
                             [compat.as_bytes(str(seq_len))] * seq_len)
         self.assertAllEqual(result[1][j, seq_len:],
                             [b'<end>'] * (padded_len - seq_len))
+        self.assertAllEqual(result[2]['structure'][j, :seq_len],
+                            [compat.as_bytes(str(seq_len))] * seq_len)
+        self.assertAllEqual(result[2]['structure'][j, seq_len:],
+                            [b''] * (padded_len - seq_len))
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
diff --git a/tensorflow/python/data/kernel_tests/range_test.py b/tensorflow/python/data/kernel_tests/range_test.py
index d136565ce42..4139b4dab3c 100644
--- a/tensorflow/python/data/kernel_tests/range_test.py
+++ b/tensorflow/python/data/kernel_tests/range_test.py
@@ -18,63 +18,129 @@ from __future__ import division
 from __future__ import print_function
 
 from absl.testing import parameterized
+import numpy as np
 
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import combinations
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
 
 
 class RangeTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testStop(self):
-    dataset = dataset_ops.Dataset.range(5)
-    self.assertDatasetProduces(dataset, expected_output=range(5))
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testStop(self, output_type):
+    stop = 5
+    dataset = dataset_ops.Dataset.range(stop, output_type=output_type)
+    expected_output = np.arange(stop, dtype=output_type.as_numpy_dtype)
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
+    self.assertEqual(output_type, dataset_ops.get_legacy_output_types(dataset))
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testStartStop(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testStartStop(self, output_type):
     start, stop = 2, 5
-    dataset = dataset_ops.Dataset.range(start, stop)
-    self.assertDatasetProduces(dataset, expected_output=range(2, 5))
+    dataset = dataset_ops.Dataset.range(start, stop, output_type=output_type)
+    expected_output = np.arange(start, stop, dtype=output_type.as_numpy_dtype)
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
+    self.assertEqual(output_type, dataset_ops.get_legacy_output_types(dataset))
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testStartStopStep(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testStartStopStep(self, output_type):
     start, stop, step = 2, 10, 2
-    dataset = dataset_ops.Dataset.range(start, stop, step)
-    self.assertDatasetProduces(dataset, expected_output=range(2, 10, 2))
+    dataset = dataset_ops.Dataset.range(
+        start, stop, step, output_type=output_type)
+    expected_output = np.arange(
+        start, stop, step, dtype=output_type.as_numpy_dtype)
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
+    self.assertEqual(output_type, dataset_ops.get_legacy_output_types(dataset))
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testZeroStep(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testZeroStep(self, output_type):
     start, stop, step = 2, 10, 0
     with self.assertRaises(errors.InvalidArgumentError):
-      dataset = dataset_ops.Dataset.range(start, stop, step)
+      dataset = dataset_ops.Dataset.range(
+          start, stop, step, output_type=output_type)
       self.evaluate(dataset._variant_tensor)
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testNegativeStep(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testNegativeStep(self, output_type):
     start, stop, step = 2, 10, -1
-    dataset = dataset_ops.Dataset.range(start, stop, step)
-    self.assertDatasetProduces(dataset, expected_output=range(2, 10, -1))
+    dataset = dataset_ops.Dataset.range(
+        start, stop, step, output_type=output_type)
+    expected_output = np.arange(
+        start, stop, step, dtype=output_type.as_numpy_dtype)
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
+    self.assertEqual(output_type, dataset_ops.get_legacy_output_types(dataset))
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testStopLessThanStart(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testStopLessThanStart(self, output_type):
     start, stop = 10, 2
-    dataset = dataset_ops.Dataset.range(start, stop)
-    self.assertDatasetProduces(dataset, expected_output=range(10, 2))
+    dataset = dataset_ops.Dataset.range(start, stop, output_type=output_type)
+    expected_output = np.arange(start, stop, dtype=output_type.as_numpy_dtype)
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
+    self.assertEqual(output_type, dataset_ops.get_legacy_output_types(dataset))
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testStopLessThanStartWithPositiveStep(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testStopLessThanStartWithPositiveStep(self, output_type):
     start, stop, step = 10, 2, 2
-    dataset = dataset_ops.Dataset.range(start, stop, step)
-    self.assertDatasetProduces(dataset, expected_output=range(10, 2, 2))
+    dataset = dataset_ops.Dataset.range(
+        start, stop, step, output_type=output_type)
+    expected_output = np.arange(
+        start, stop, step, dtype=output_type.as_numpy_dtype)
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
+    self.assertEqual(output_type, dataset_ops.get_legacy_output_types(dataset))
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testStopLessThanStartWithNegativeStep(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(output_type=[
+              dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
+          ])))
+  def testStopLessThanStartWithNegativeStep(self, output_type):
     start, stop, step = 10, 2, -1
-    dataset = dataset_ops.Dataset.range(start, stop, step)
-    self.assertDatasetProduces(dataset, expected_output=range(10, 2, -1))
+    dataset = dataset_ops.Dataset.range(
+        start, stop, step, output_type=output_type)
+    expected_output = np.arange(
+        start, stop, step, dtype=output_type.as_numpy_dtype)
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
+    self.assertEqual(output_type, dataset_ops.get_legacy_output_types(dataset))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/shuffle_test.py b/tensorflow/python/data/kernel_tests/shuffle_test.py
index a42abb5e432..7a1273c9d47 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_test.py
@@ -243,16 +243,14 @@ class ShuffleTest(test_base.DatasetTestBase, parameterized.TestCase):
           combinations.combine(tf_api_version=2, mode="eager"),
           combinations.combine(reshuffle=[True, False], seed=[None, 42])))
   def testReshuffleIterationEpochs(self, reshuffle, seed):
+    # TensorFlow unit tests set the global graph seed. We unset it here so that
+    # we can control determinism via the `seed` parameter.
+    random_seed.set_random_seed(None)
     dataset = dataset_ops.Dataset.range(10).shuffle(
         10, seed=seed, reshuffle_each_iteration=reshuffle)
 
-    first_epoch = []
-    for elem in dataset:
-      first_epoch.append(elem.numpy())
-
-    second_epoch = []
-    for elem in dataset:
-      second_epoch.append(elem.numpy())
+    first_epoch = self.getDatasetOutput(dataset)
+    second_epoch = self.getDatasetOutput(dataset)
 
     self.assertEqual(first_epoch == second_epoch, not reshuffle)
 
diff --git a/tensorflow/python/data/kernel_tests/text_line_dataset_test.py b/tensorflow/python/data/kernel_tests/text_line_dataset_test.py
index 35b479faa21..2a81dff0058 100644
--- a/tensorflow/python/data/kernel_tests/text_line_dataset_test.py
+++ b/tensorflow/python/data/kernel_tests/text_line_dataset_test.py
@@ -103,13 +103,13 @@ class TextLineDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     # Basic test: read from both files.
     expected_output = [self._lineText(0, i) for i in range(5)]
-    expected_output.extend([self._lineText(1, i) for i in range(5)])
+    expected_output.extend(self._lineText(1, i) for i in range(5))
     self.assertDatasetProduces(
         dataset_fn(test_filenames, 1), expected_output=expected_output)
 
     # Test repeated iteration through both files.
     expected_output = [self._lineText(0, i) for i in range(5)]
-    expected_output.extend([self._lineText(1, i) for i in range(5)])
+    expected_output.extend(self._lineText(1, i) for i in range(5))
     self.assertDatasetProduces(
         dataset_fn(test_filenames, 10), expected_output=expected_output * 10)
 
@@ -125,7 +125,7 @@ class TextLineDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     files = dataset_ops.Dataset.from_tensor_slices(test_filenames).repeat(10)
     expected_output = []
     for j in range(10):
-      expected_output.extend([self._lineText(j, i) for i in range(10)])
+      expected_output.extend(self._lineText(j, i) for i in range(10))
     dataset = readers.TextLineDataset(files, num_parallel_reads=4)
     self.assertDatasetProduces(
         dataset, expected_output=expected_output * 10, assert_items_equal=True)
diff --git a/tensorflow/python/data/kernel_tests/unbatch_test.py b/tensorflow/python/data/kernel_tests/unbatch_test.py
index 44d949385b0..b16d5b28648 100644
--- a/tensorflow/python/data/kernel_tests/unbatch_test.py
+++ b/tensorflow/python/data/kernel_tests/unbatch_test.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 06bdfd03eb8..75d8b3ab664 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -18,7 +18,6 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
-import enum
 import functools
 import sys
 import threading
@@ -31,7 +30,6 @@ from six.moves import queue as Queue  # pylint: disable=redefined-builtin
 
 from tensorflow.core.framework import graph_pb2
 from tensorflow.python import tf2
-from tensorflow.python.compat import compat
 from tensorflow.python.data.experimental.ops import distribute_options
 from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.experimental.ops import stats_options
@@ -94,12 +92,6 @@ AUTOTUNE = -1
 tf_export("data.experimental.AUTOTUNE").export_constant(__name__, "AUTOTUNE")
 
 
-class ExternalStatePolicy(enum.Enum):
-  WARN = 0
-  IGNORE = 1
-  FAIL = 2
-
-
 @tf_export("data.Dataset", v1=[])
 @six.add_metaclass(abc.ABCMeta)
 class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
@@ -115,7 +107,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
   Iteration happens in a streaming fashion, so the full dataset does not need to
   fit into memory.
 
-  # Source Datasets
+  Source Datasets:
 
   The simplest way to create a dataset is to create it from a python `list`:
 
@@ -142,7 +134,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
   See `tf.data.FixedLengthRecordDataset` and `tf.data.Dataset.from_generator`
   for more ways to create datasets.
 
-  # Transformations
+  Transformations:
 
   Once you have a dataset, you can apply transformations to prepare the data for
   your model:
@@ -152,7 +144,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
   >>> list(dataset.as_numpy_iterator())
   [2, 4, 6]
 
-  # Common Terms
+  Common Terms:
 
   **Element**: A single output from calling `next()` on a dataset iterator.
     Elements may be nested structures containing multiple components. For
@@ -160,7 +152,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
     tuple. The components are `1`, `3`, and `"apple"`.
   **Component**: The leaf in the nested structure of an element.
 
-  # Supported types
+  Supported types:
 
   Elements can be nested structures of tuples, named tuples, and dictionaries.
   Element components can be of any type representable by `tf.TypeSpec`,
@@ -174,6 +166,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
   >>> Point = collections.namedtuple("Point", ["x", "y"]) # doctest: +SKIP
   >>> e = Point(1, 2) # Named tuple # doctest: +SKIP
   >>> f = tf.data.Dataset.range(10) # Dataset element
+
   """
 
   def __init__(self, variant_tensor):
@@ -209,10 +202,11 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
 
   @deprecation.deprecated_args(None, "Use external_state_policy instead",
                                "allow_stateful")
-  def _as_serialized_graph(self,
-                           allow_stateful=None,
-                           strip_device_assignment=None,
-                           external_state_policy=ExternalStatePolicy.WARN):
+  def _as_serialized_graph(
+      self,
+      allow_stateful=None,
+      strip_device_assignment=None,
+      external_state_policy=distribute_options.ExternalStatePolicy.WARN):
     """Produces serialized graph representation of the dataset.
 
     Args:
@@ -228,7 +222,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
       A scalar `tf.Tensor` of `tf.string` type, representing this dataset as a
       serialized graph.
     """
-    if compat.forward_compatible(2019, 11, 25) or external_state_policy:
+    if external_state_policy:
       policy = None
       if external_state_policy:
         policy = external_state_policy.value
@@ -236,7 +230,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
           self._variant_tensor,
           external_state_policy=policy,
           strip_device_assignment=strip_device_assignment)
-    if compat.forward_compatible(2019, 11, 16) or strip_device_assignment:
+    if strip_device_assignment:
       return gen_dataset_ops.dataset_to_graph(
           self._variant_tensor,
           allow_stateful=allow_stateful,
@@ -311,10 +305,10 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
       # If the captured tensor is an eager tensor, we cannot trace its inputs.
       if isinstance(tensor, ops._EagerTensorBase):  # pylint: disable=protected-access
         return False
-      return any([is_tensor_or_parent_ref(x) for x in tensor.op.inputs])
+      return any(is_tensor_or_parent_ref(x) for x in tensor.op.inputs)
 
     for fn in self._functions():
-      if any([is_tensor_or_parent_ref(t) for t in fn.function.captured_inputs]):
+      if any(is_tensor_or_parent_ref(t) for t in fn.function.captured_inputs):
         return True
 
     return any(
@@ -869,7 +863,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
     return id_dataset.flat_map(flat_map_fn)
 
   @staticmethod
-  def range(*args):
+  def range(*args, **kwargs):
     """Creates a `Dataset` of a step-separated range of values.
 
     >>> list(Dataset.range(5).as_numpy_iterator())
@@ -884,12 +878,18 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
     []
     >>> list(Dataset.range(5, 1, -2).as_numpy_iterator())
     [5, 3]
+    >>> list(Dataset.range(2, 5, output_type=tf.int32).as_numpy_iterator())
+    [2, 3, 4]
+    >>> list(Dataset.range(1, 5, 2, output_type=tf.float32).as_numpy_iterator())
+    [1.0, 3.0]
 
     Args:
       *args: follows the same semantics as python's xrange.
         len(args) == 1 -> start = 0, stop = args[0], step = 1
         len(args) == 2 -> start = args[0], stop = args[1], step = 1
         len(args) == 3 -> start = args[0], stop = args[1, stop = args[2]
+      **kwargs:
+        - output_type: Its expected dtype. (Optional, default: `tf.int64`).
 
     Returns:
       Dataset: A `RangeDataset`.
@@ -897,7 +897,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
     Raises:
       ValueError: if len(args) == 0.
     """
-    return RangeDataset(*args)
+    return RangeDataset(*args, **kwargs)
 
   @staticmethod
   def zip(datasets):
@@ -1165,7 +1165,6 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
     [1, 0, 2]
     >>> list(dataset.as_numpy_iterator())  # doctest: +SKIP
     [1, 0, 2]
-    ```
 
     Args:
       buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
@@ -1390,7 +1389,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
 
     Unlike `tf.data.Dataset.batch`, the input elements to be batched may have
     different shapes, and this transformation will pad each component to the
-    respective shape in `padding_shapes`. The `padding_shapes` argument
+    respective shape in `padded_shapes`. The `padded_shapes` argument
     determines the resulting shape for each dimension of each component in an
     output element:
 
@@ -1457,8 +1456,9 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
         maximum size of that dimension in each batch.
       padding_values: (Optional.) A nested structure of scalar-shaped
         `tf.Tensor`, representing the padding values to use for the respective
-        components.  Defaults are `0` for numeric types and the empty string for
-        string types.
+        components. None represents that the nested structure should be padded
+        with default values.  Defaults are `0` for numeric types and the empty
+        string for string types.
       drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
         whether the last batch should be dropped in the case it has fewer than
         `batch_size` elements; the default behavior is not to drop the smaller
@@ -2227,8 +2227,8 @@ class DatasetV1(DatasetV2):
 
   @staticmethod
   @functools.wraps(DatasetV2.range)
-  def range(*args):
-    return DatasetV1Adapter(DatasetV2.range(*args))
+  def range(*args, **kwargs):
+    return DatasetV1Adapter(DatasetV2.range(*args, **kwargs))
 
   @staticmethod
   @functools.wraps(DatasetV2.zip)
@@ -2654,7 +2654,7 @@ class Options(options_lib.OptionsBase):
 
   experimental_external_state_policy = options_lib.create_option(
       name="experimental_external_state_policy",
-      ty=ExternalStatePolicy,
+      ty=distribute_options.ExternalStatePolicy,
       docstring="By default, tf.data will refuse to serialize a dataset or "
       "checkpoint its iterator if the dataset contains a stateful op as the "
       "serialization / checkpointing won't be able to capture its state. "
@@ -2663,7 +2663,7 @@ class Options(options_lib.OptionsBase):
       "in these ops. There are three settings available - IGNORE: in which we"
       "completely ignore any state; WARN: We warn the user that some state "
       "might be thrown away; FAIL: We fail if any state is being captured.",
-      default_factory=lambda: ExternalStatePolicy.WARN)
+      default_factory=lambda: distribute_options.ExternalStatePolicy.WARN)
 
   def _graph_rewrites(self):
     """Produces the list of enabled static graph rewrites."""
@@ -3343,10 +3343,10 @@ class RepeatDataset(UnaryUnchangedStructureDataset):
 class RangeDataset(DatasetSource):
   """A `Dataset` of a step separated range of values."""
 
-  def __init__(self, *args):
+  def __init__(self, *args, **kwargs):
     """See `Dataset.range()` for details."""
-    self._parse_args(*args)
-    self._structure = tensor_spec.TensorSpec([], dtypes.int64)
+    self._parse_args(*args, **kwargs)
+    self._structure = tensor_spec.TensorSpec([], self._output_type)
     variant_tensor = gen_dataset_ops.range_dataset(
         start=self._start,
         stop=self._stop,
@@ -3354,7 +3354,7 @@ class RangeDataset(DatasetSource):
         **self._flat_structure)
     super(RangeDataset, self).__init__(variant_tensor)
 
-  def _parse_args(self, *args):
+  def _parse_args(self, *args, **kwargs):
     """Parse arguments according to the same rules as the `range()` builtin."""
     if len(args) == 1:
       self._start = self._build_tensor(0, "start")
@@ -3370,6 +3370,10 @@ class RangeDataset(DatasetSource):
       self._step = self._build_tensor(args[2], "step")
     else:
       raise ValueError("Invalid arguments to RangeDataset: %s" % str(args))
+    if "output_type" in kwargs:
+      self._output_type = kwargs["output_type"]
+    else:
+      self._output_type = dtypes.int64
 
   def _build_tensor(self, int64_value, name):
     return ops.convert_to_tensor(int64_value, dtype=dtypes.int64, name=name)
@@ -3765,8 +3769,8 @@ def _padding_value_to_tensor(value, output_type):
   return value
 
 
-def _default_padding(input_dataset):
-  """Returns default padding tensors in a structure matching `input_dataset`."""
+def _padding_values_or_default(padding_values, input_dataset):
+  """Returns padding values with None elements replaced with default values."""
   def make_zero(t):
     if t.base_dtype == dtypes.string:
       return ""
@@ -3778,9 +3782,13 @@ def _default_padding(input_dataset):
       raise TypeError(error_msg)
     else:
       return np.zeros_like(t.as_numpy_dtype())
+  def value_or_default(value, default):
+    return default if value is None else value
 
-  return nest.map_structure(
-      make_zero, get_legacy_output_types(input_dataset))
+  default_padding = nest.map_structure(make_zero,
+                                       get_legacy_output_types(input_dataset))
+  return nest.map_structure_up_to(padding_values, value_or_default,
+                                  padding_values, default_padding)
 
 
 class PaddedBatchDataset(UnaryDataset):
@@ -3797,9 +3805,7 @@ class PaddedBatchDataset(UnaryDataset):
     self._input_dataset = input_dataset
     self._batch_size = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
-    padding_values = (
-        padding_values
-        if padding_values is not None else _default_padding(input_dataset))
+    padding_values = _padding_values_or_default(padding_values, input_dataset)
 
     input_shapes = get_legacy_output_shapes(input_dataset)
     flat_padded_shapes = nest.flatten_up_to(input_shapes, padded_shapes)
diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index ff8d02f9d71..5fd5a938079 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import threading
 import warnings
 
+from tensorflow.python.data.experimental.ops import distribute_options
 from tensorflow.python.data.ops import optional_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import structure
@@ -648,12 +649,7 @@ class OwnedIterator(trackable.Trackable, composite_tensor.CompositeTensor):
         # TODO(ashankar): Consider removing this ops.device() contextmanager
         # and instead mimic ops placement in graphs: Operations on resource
         # handles execute on the same device as where the resource is placed.
-        # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next`
-        # because in eager mode this code will run synchronously on the calling
-        # thread. Therefore we do not need to make a defensive context switch
-        # to a background thread, and can achieve a small constant performance
-        # boost by invoking the iterator synchronously.
-        ret = gen_dataset_ops.iterator_get_next_sync(
+        ret = gen_dataset_ops.iterator_get_next(
             self._iterator_resource,
             output_types=self._flat_output_types,
             output_shapes=self._flat_output_shapes)
@@ -793,10 +789,19 @@ class IteratorSpec(type_spec.TypeSpec):
 class _IteratorSaveable(BaseSaverBuilder.SaveableObject):
   """SaveableObject for saving/restoring iterator state."""
 
-  def __init__(self, iterator_resource, name):
-    serialized_iterator = gen_dataset_ops.serialize_iterator(iterator_resource)
+  def __init__(
+      self,
+      iterator_resource,
+      name,
+      external_state_policy=distribute_options.ExternalStatePolicy.FAIL):
+    serialized_iterator = gen_dataset_ops.serialize_iterator(
+        iterator_resource, external_state_policy=external_state_policy.value)
     specs = [
-        BaseSaverBuilder.SaveSpec(serialized_iterator, "", name + "_STATE")
+        BaseSaverBuilder.SaveSpec(
+            serialized_iterator,
+            "",
+            name + "_STATE",
+            device=iterator_resource.device)
     ]
     super(_IteratorSaveable, self).__init__(iterator_resource, specs, name)
 
diff --git a/tensorflow/python/data/util/traverse_test.py b/tensorflow/python/data/util/traverse_test.py
index 2e847aa6a1a..64df77d6b6f 100644
--- a/tensorflow/python/data/util/traverse_test.py
+++ b/tensorflow/python/data/util/traverse_test.py
@@ -53,7 +53,7 @@ class TraverseTest(test.TestCase):
     variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
     self.assertSetEqual(
         set(["MapDataset", "RangeDataset"]),
-        set([x.name for x in variant_tensor_ops]))
+        set(x.name for x in variant_tensor_ops))
 
   @test_util.run_deprecated_v1
   def testConcat(self):
@@ -63,7 +63,7 @@ class TraverseTest(test.TestCase):
     variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
     self.assertSetEqual(
         set(["ConcatenateDataset", "RangeDataset", "RangeDataset_1"]),
-        set([x.name for x in variant_tensor_ops]))
+        set(x.name for x in variant_tensor_ops))
 
   @test_util.run_deprecated_v1
   def testZip(self):
@@ -73,7 +73,7 @@ class TraverseTest(test.TestCase):
     variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
     self.assertSetEqual(
         set(["ZipDataset", "RangeDataset", "RangeDataset_1"]),
-        set([x.name for x in variant_tensor_ops]))
+        set(x.name for x in variant_tensor_ops))
 
   @test_util.run_deprecated_v1
   def testMultipleVariantTensors(self):
@@ -82,7 +82,7 @@ class TraverseTest(test.TestCase):
     variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
     self.assertSetEqual(
         set(["RangeDataset", "ModelDataset", "PrefetchDataset"]),
-        set([x.name for x in variant_tensor_ops]))
+        set(x.name for x in variant_tensor_ops))
 
   @test_util.run_deprecated_v1
   def testFlatMap(self):
@@ -102,7 +102,7 @@ class TraverseTest(test.TestCase):
         set([
             "FlatMapDataset", "PrefetchDataset", "RepeatDataset",
             "RangeDataset", "RangeDataset_1"
-        ]), set([x.name for x in variant_tensor_ops]))
+        ]), set(x.name for x in variant_tensor_ops))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index c3b49e7564a..80194857747 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -120,6 +120,8 @@ py_library(
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:framework",
+        "//tensorflow/python:lib",
+        "@six_archive//:six",
     ],
 )
 
@@ -715,13 +717,13 @@ cuda_py_test(
     tags = [
         "guitar",
         "multi_and_single_gpu",
-        "no_rocm",
         "no_windows",  # TODO(b/142475891): Enable this test on Windows.
         "no_windows_gpu",  # TODO(b/130551176)
     ],
     xla_enable_strict_auto_jit = False,  # Node names are different with autojit
     deps = [
         ":check_numerics_callback",
+        ":debug_events_reader",
         ":debug_events_writer",
         ":dumping_callback",
         ":dumping_callback_test_lib",
diff --git a/tensorflow/python/debug/cli/analyzer_cli.py b/tensorflow/python/debug/cli/analyzer_cli.py
index 5c21b1518ed..0be1f5894c2 100644
--- a/tensorflow/python/debug/cli/analyzer_cli.py
+++ b/tensorflow/python/debug/cli/analyzer_cli.py
@@ -1200,12 +1200,12 @@ class DebugAnalyzer(object):
       return debugger_cli_common.rich_text_lines_from_rich_line_list(lines)
 
     path_column_width = max(
-        max([len(item[0]) for item in source_list]), len(path_head)) + 1
+        max(len(item[0]) for item in source_list), len(path_head)) + 1
     num_nodes_column_width = max(
-        max([len(str(item[2])) for item in source_list]),
+        max(len(str(item[2])) for item in source_list),
         len(num_nodes_head)) + 1
     num_tensors_column_width = max(
-        max([len(str(item[3])) for item in source_list]),
+        max(len(str(item[3])) for item in source_list),
         len(num_tensors_head)) + 1
 
     head = RL(path_head + " " * (path_column_width - len(path_head)), color)
diff --git a/tensorflow/python/debug/cli/curses_ui_test.py b/tensorflow/python/debug/cli/curses_ui_test.py
index ba7886b87e4..3c09ad64876 100644
--- a/tensorflow/python/debug/cli/curses_ui_test.py
+++ b/tensorflow/python/debug/cli/curses_ui_test.py
@@ -42,7 +42,7 @@ def string_to_codes(cmd):
 
 def codes_to_string(cmd_code):
   # Omit non-ASCII key codes.
-  return "".join([chr(code) for code in cmd_code if code < 256])
+  return "".join(chr(code) for code in cmd_code if code < 256)
 
 
 class MockCursesUI(curses_ui.CursesUI):
diff --git a/tensorflow/python/debug/cli/profile_analyzer_cli.py b/tensorflow/python/debug/cli/profile_analyzer_cli.py
index 0e846d2d2d9..4dcb16677cc 100644
--- a/tensorflow/python/debug/cli/profile_analyzer_cli.py
+++ b/tensorflow/python/debug/cli/profile_analyzer_cli.py
@@ -624,7 +624,7 @@ class ProfileAnalyzer(object):
       device_stats = self._run_metadata.step_stats.dev_stats[index]
       if device_name_regex and not device_name_regex.match(device_stats.device):
         continue
-      profile_data.extend([datum for datum in data_generator(device_stats)])
+      profile_data.extend(data_generator(device_stats))
 
     source_annotation = source_utils.annotate_source_against_profile(
         profile_data,
diff --git a/tensorflow/python/debug/examples/v2/debug_mnist_v2.py b/tensorflow/python/debug/examples/v2/debug_mnist_v2.py
index 9d410b36c98..539be3cd54f 100644
--- a/tensorflow/python/debug/examples/v2/debug_mnist_v2.py
+++ b/tensorflow/python/debug/examples/v2/debug_mnist_v2.py
@@ -100,7 +100,8 @@ def parse_args():
       type=str,
       default="NO_TENSOR",
       help="Mode for dumping tensor values. Options: NO_TENSOR, CURT_HEALTH, "
-      "FULL_TENSOR. This is relevant only when --dump_dir is set.")
+      "CONCISE_HEALTH, SHAPE, FULL_TENSOR. This is relevant only when "
+      "--dump_dir is set.")
   # TODO(cais): Add more tensor debug mode strings once they are supported.
   parser.add_argument(
       "--dump_circular_buffer_size",
diff --git a/tensorflow/python/debug/lib/debug_events_reader.py b/tensorflow/python/debug/lib/debug_events_reader.py
index a20cc175ebb..ac8d9db22d2 100644
--- a/tensorflow/python/debug/lib/debug_events_reader.py
+++ b/tensorflow/python/debug/lib/debug_events_reader.py
@@ -18,16 +18,24 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import glob
 import os
 import threading
 
+import six
+
 from tensorflow.core.protobuf import debug_event_pb2
-from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.lib.io import tf_record
 from tensorflow.python.util import compat
 
 
+DebugEventWithOffset = collections.namedtuple(
+    "DebugEventWithOffset", "debug_event offset")
+
+
 class DebugEventsReader(object):
   """Reader class for a tfdbg v2 DebugEvents directory."""
 
@@ -53,8 +61,12 @@ class DebugEventsReader(object):
     self._graph_execution_traces_path = compat.as_bytes(
         "%s.graph_execution_traces" % prefix)
     self._readers = dict()  # A map from file path to reader.
+    # A map from file path to current reading offset.
+    self._reader_offsets = dict()
     self._readers_lock = threading.Lock()
 
+    self._offsets = dict()
+
   def __enter__(self):
     return self
 
@@ -63,25 +75,44 @@ class DebugEventsReader(object):
     self.close()
 
   def _generic_iterator(self, file_path):
-    """A helper method that makes an iterator given a debug-events file path."""
+    """A helper method that makes an iterator given a debug-events file path.
+
+    Repeated calls to this method create iterators that remember the last
+    successful reading position (offset) for each given `file_path`. So the
+    iterators are meant for incremental reading of the file.
+
+    Args:
+      file_path: Path to the file to create the iterator for.
+
+    Yields:
+      A tuple of (offset, debug_event_proto) on each `next()` call.
+    """
+    reader = self._get_reader(file_path)
+    while True:
+      current_offset = self._reader_offsets[file_path]
+      try:
+        record, self._reader_offsets[file_path] = reader.read(current_offset)
+      except (errors.DataLossError, IndexError):
+        # We ignore partial read exceptions, because a record may be truncated.
+        # The PyRandomRecordReader throws an `IndexError` when offset goes out
+        # of bound.
+        break
+      yield DebugEventWithOffset(
+          debug_event=debug_event_pb2.DebugEvent.FromString(record),
+          offset=current_offset)
+
+  def _get_reader(self, file_path):
+    """Get a random-access reader for TFRecords file at file_path."""
+    file_path = compat.as_bytes(file_path)
     # The following code uses the double-checked locking pattern to optimize
     # the common case (where the reader is already initialized).
     if file_path not in self._readers:  # 1st check, without lock.
       with self._readers_lock:
         if file_path not in self._readers:  # 2nd check, with lock.
-          with errors.raise_exception_on_not_ok_status() as status:
-            self._readers[file_path] = pywrap_tensorflow.PyRecordReader_New(
-                compat.as_bytes(file_path), 0, b"", status)
-    reader = self._readers[file_path]
-    while True:
-      try:
-        reader.GetNext()
-      except (errors.DataLossError, errors.OutOfRangeError):
-        # We ignore partial read exceptions, because a record may be truncated.
-        # PyRecordReader holds the offset prior to the failed read, so retrying
-        # will succeed.
-        break
-      yield debug_event_pb2.DebugEvent.FromString(reader.record())
+          self._readers[file_path] = tf_record.tf_record_random_reader(
+              file_path)
+          self._reader_offsets[file_path] = 0
+    return self._readers[file_path]
 
   def metadata_iterator(self):
     return self._generic_iterator(self._metadata_path)
@@ -95,12 +126,915 @@ class DebugEventsReader(object):
   def graphs_iterator(self):
     return self._generic_iterator(self._graphs_path)
 
+  def read_source_files_event(self, offset):
+    """Read a DebugEvent proto at given offset from the .source_files file."""
+    return debug_event_pb2.DebugEvent.FromString(
+        self._get_reader(self._source_files_path).read(offset)[0])
+
+  def read_graphs_event(self, offset):
+    """Read a DebugEvent proto at a given offset from the .graphs file.
+
+    Args:
+      offset: Offset to read the DebugEvent proto from.
+
+    Returns:
+      A DebugEventProto.
+
+    Raises:
+      `errors.DataLossError` if offset is at a wrong location.
+      `IndexError` if offset is out of range of the file.
+    """
+    return debug_event_pb2.DebugEvent.FromString(
+        self._get_reader(self._graphs_path).read(offset)[0])
+
   def execution_iterator(self):
     return self._generic_iterator(self._execution_path)
 
+  def read_execution_debug_event(self, offset):
+    """Read a DebugEvent proto at a given offset from the .execution file.
+
+    Args:
+      offset: Offset to read the DebugEvent proto from.
+
+    Returns:
+      A DebugEventProto.
+
+    Raises:
+      `errors.DataLossError` if offset is at a wrong location.
+      `IndexError` if offset is out of range of the file.
+    """
+    return debug_event_pb2.DebugEvent.FromString(
+        self._get_reader(self._execution_path).read(offset)[0])
+
   def graph_execution_traces_iterator(self):
     return self._generic_iterator(self._graph_execution_traces_path)
 
+  def read_graph_execution_traces_event(self, offset):
+    """Read DebugEvent at given offset from .graph_execution_traces file.
+
+    Args:
+      offset: Offset to read the DebugEvent proto from.
+
+    Returns:
+      A DebugEventProto.
+
+    Raises:
+      `errors.DataLossError` if offset is at a wrong location.
+      `IndexError` if offset is out of range of the file.
+    """
+    return debug_event_pb2.DebugEvent.FromString(
+        self._get_reader(self._graph_execution_traces_path).read(offset)[0])
+
   def close(self):
-    for reader in self._readers.values():
-      reader.Close()
+    with self._readers_lock:
+      file_paths = list(self._readers.keys())
+      for file_path in file_paths:
+        self._readers[file_path].close()
+        del self._readers[file_path]
+
+
+class BaseDigest(object):
+  """Base class for digest.
+
+  Properties:
+    wall_time: A timestamp for the digest (unit: s).
+    offset: A offset number in the corresponding file that can be used for
+      fast random read access.
+  """
+
+  def __init__(self, wall_time, offset):
+    self._wall_time = wall_time
+    self._offset = offset
+
+  @property
+  def wall_time(self):
+    return self._wall_time
+
+  @property
+  def offset(self):
+    return self._offset
+
+
+class ExecutionDigest(BaseDigest):
+  """Light-weight digest summarizing top-level execution event.
+
+  Use `DebugDataReader.read_execution(execution_digest)` to load the more
+  detailed data object concerning the execution event (`Execution`).
+
+  Properties:
+    op_type: Type name of the executed op. In the case of the eager execution of
+      an individual op, it is the name of the op (e.g., "MatMul").
+      In the case of the execution of a tf.function (FuncGraph), this is the
+      internally-generated name of the function (e.g.,
+      "__inference_my_func_123").
+    output_tensor_device_ids: IDs of the devices on which the output tensors of
+      the execution reside. For no-output execution, this is `None`.
+  """
+
+  def __init__(self,
+               wall_time,
+               offset,
+               op_type,
+               output_tensor_device_ids=None):
+    super(ExecutionDigest, self).__init__(wall_time, offset)
+    self._op_type = op_type
+    self._output_tensor_device_ids = output_tensor_device_ids
+
+  @property
+  def op_type(self):
+    return self._op_type
+
+  @property
+  def output_tensor_device_ids(self):
+    return self._output_tensor_device_ids
+
+  # TODO(cais): Implement to_json().
+
+
+class Execution(ExecutionDigest):
+  """Detailed data relating to a top-level execution event.
+
+  The execution is of an individual op or a tf.function, which may have any
+  number of output tensors.
+
+  Properties (beyond the base class `ExecutionDigest`):
+    stack_frame_ids: Reference IDs for stack frames, ordered from bottommost to
+      topmost. Use `DebugDataReader.read_execution_stack_trace()` to load the
+      detailed stack frames (filepath, lineno and function name).
+    tensor_debug_mode: TensorDebugMode enum value, as an `int`.
+    graph_id: ID of the executed FuncGraph (applicable only the execution of a
+      tf.function). `None` for the eager execution of an individual op.
+    input_tensor_ids: IDs of the input (eager) tensor(s) for this execution, if
+      any.
+    output_tensor_ids: IDs of the output (eager) tensor(s) from this execution,
+      if any.
+    debug_tensor_values: Values of the debug tensor(s), applicable only to
+      non-FULL_TENSOR tensor debug mode. A tuple of list of numbers. Each
+      element of the tuple corresponds to an output tensor of the execution.
+      See documentation of the various TensorDebugModes for the semantics of the
+      numbers.
+  """
+
+  def __init__(self,
+               execution_digest,
+               stack_frame_ids,
+               tensor_debug_mode,
+               graph_id=None,
+               input_tensor_ids=None,
+               output_tensor_ids=None,
+               debug_tensor_values=None):
+    super(Execution, self).__init__(
+        execution_digest.wall_time,
+        execution_digest.offset,
+        execution_digest.op_type,
+        output_tensor_device_ids=execution_digest.output_tensor_device_ids)
+    self._stack_frame_ids = stack_frame_ids
+    self._tensor_debug_mode = tensor_debug_mode
+    self._graph_id = graph_id
+    self._input_tensor_ids = input_tensor_ids
+    self._output_tensor_ids = output_tensor_ids
+    self._debug_tensor_values = debug_tensor_values
+
+  @property
+  def stack_frame_ids(self):
+    return self._stack_frame_ids
+
+  @property
+  def tensor_debug_mode(self):
+    return self._tensor_debug_mode
+
+  @property
+  def graph_id(self):
+    return self._graph_id
+
+  @property
+  def input_tensor_ids(self):
+    return self._input_tensor_ids
+
+  @property
+  def num_outputs(self):
+    return len(self._output_tensor_ids)
+
+  @property
+  def output_tensor_ids(self):
+    return self._output_tensor_ids
+
+  @property
+  def debug_tensor_values(self):
+    return self._debug_tensor_values
+
+  # TODO(cais): Implement to_json().
+
+
+class DebuggedGraph(object):
+  """Data object representing debugging information about a tf.Graph.
+
+  Includes `FuncGraph`s.
+
+  Properties:
+    name: Name of the graph (if any). May be `None` for non-function graphs.
+    graph_id: Debugger-generated ID for the graph.
+    inner_graph_ids: A list of the debugger-generated IDs for the graphs
+      enclosed by this graph.
+    outer_graph_id: If this graph is nested within an outer graph, ID of the
+      outer graph. If this is an outermost graph, `None`.
+  """
+
+  def __init__(self,
+               name,
+               graph_id,
+               outer_graph_id=None):
+    self._name = name
+    self._graph_id = graph_id
+    self._outer_graph_id = outer_graph_id
+    self._inner_graph_ids = []
+    # A dictionary from op name to GraphOpCreationDigest.
+    self._op_by_name = dict()
+
+  def add_inner_graph_id(self, inner_graph_id):
+    """Add the debugger-generated ID of a graph nested within this graph.
+
+    Args:
+      inner_graph_id: The debugger-generated ID of the nested inner graph.
+    """
+    assert isinstance(inner_graph_id, six.string_types)
+    self._inner_graph_ids.append(inner_graph_id)
+
+  def add_op(self, graph_op_creation_digest):
+    """Add an op creation data object.
+
+    Args:
+      graph_op_creation_digest: A GraphOpCreationDigest data object describing
+        the creation of an op inside this graph.
+    """
+    assert graph_op_creation_digest.op_name not in self._op_by_name
+    self._op_by_name[
+        graph_op_creation_digest.op_name] = graph_op_creation_digest
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def graph_id(self):
+    return self._graph_id
+
+  @property
+  def outer_graph_id(self):
+    return self._outer_graph_id
+
+  @property
+  def inner_graph_ids(self):
+    return self._inner_graph_ids
+
+  def get_op_type(self, op_name):
+    return self._op_by_name[op_name].op_type
+
+  def get_tensor_id(self, op_name, output_slot):
+    """Get the ID of a symbolic tensor in this graph."""
+    return self._op_by_name[op_name].output_tensor_ids[output_slot]
+
+  # TODO(cais): Implement to_json().
+
+
+class DebuggedDevice(object):
+  """Debugger data regarding a device involved in the debugged program.
+
+  Properties:
+    device_name: Name of the device, as a str.
+    device_id: An integer ID for the device, unique for each device within
+      the scope of the debugged TensorFlow program.
+  """
+
+  def __init__(self,
+               device_name,
+               device_id):
+    self._device_name = device_name
+    self._device_id = device_id
+
+  @property
+  def device_name(self):
+    return self._device_name
+
+  @property
+  def device_id(self):
+    return self._device_id
+
+  # TODO(cais): Implement to_json().
+
+
+class GraphOpCreationDigest(BaseDigest):
+  """Data object describing the creation of an op inside a graph.
+
+  For size efficiency, this digest object does not contain any stack frames or
+  any references to them. To obtain the stack frames, use
+  `DataReader.read_graph_op_creation_stack_trace()`.
+
+  Properties (beyond the base class):
+    graph_id: Debugger-generated ID of the immediately-enclosing graph.
+    op_type: Type name of the op (e.g., "MatMul").
+    op_name: Name of the op (e.g., "dense_1/MatMul").
+    output_tensor_ids: Debugger-generated IDs for the output(s) of the op.
+    input_names: Names of the input tensors to the op.
+    device_name: The name of the device that the op is placed on (if available).
+  """
+
+  def __init__(self,
+               wall_time,
+               offset,
+               graph_id,
+               op_type,
+               op_name,
+               output_tensor_ids,
+               input_names=None,
+               device_name=None):
+    super(GraphOpCreationDigest, self).__init__(wall_time, offset)
+    self._graph_id = graph_id
+    self._op_type = op_type
+    self._op_name = op_name
+    self._output_tensor_ids = output_tensor_ids
+    self._input_names = input_names
+    self._device_name = device_name
+
+  @property
+  def graph_id(self):
+    return self._graph_id
+
+  @property
+  def op_type(self):
+    return self._op_type
+
+  @property
+  def op_name(self):
+    return self._op_name
+
+  @property
+  def output_tensor_ids(self):
+    return self._output_tensor_ids
+
+  @property
+  def num_outputs(self):
+    return len(self._output_tensor_ids)
+
+  @property
+  def input_names(self):
+    return self._input_names
+
+  @property
+  def device_name(self):
+    return self._device_name
+
+  # TODO(cais): Implement to_json().
+
+
+class GraphExecutionTraceDigest(BaseDigest):
+  """Light-weight summary of a intra-graph tensor execution event.
+
+  Use `DebugDataReader.read_graph_execution_trace()` on this object to read more
+  detailed data (`GraphExecutionTrace`).
+
+  Properties (beyond the base class):
+    op_type: Type name of the executed op (e.g., "Conv2D").
+    op_name: Name of the op (e.g., "conv_2d_3/Conv2D").
+    output_slot: Output slot index of the tensor.
+  """
+
+  def __init__(self,
+               wall_time,
+               offset,
+               op_type,
+               op_name,
+               output_slot):
+    super(GraphExecutionTraceDigest, self).__init__(wall_time, offset)
+    self._op_type = op_type
+    self._op_name = op_name
+    self._output_slot = output_slot
+
+  @property
+  def op_type(self):
+    return self._op_type
+
+  @property
+  def op_name(self):
+    return self._op_name
+
+  @property
+  def output_slot(self):
+    return self._output_slot
+
+  # TODO(cais): Implement to_json().
+
+
+class GraphExecutionTrace(GraphExecutionTraceDigest):
+  """Detailed data object describing an intra-graph tensor execution.
+
+  Attributes (in addition to GraphExecutionTraceDigest):
+    graph_ids: The debugger-generated IDs of the graphs that enclose the
+      executed op (tensor), ordered from the outermost to the innermost.
+    graph_id: The debugger-generated ID of the innermost (immediately-enclosing)
+      graph.
+    tensor_debug_mode: TensorDebugMode enum value.
+    debug_tensor_value: Debug tensor values (only for non-FULL_TENSOR
+      tensor_debug_mode). A list of numbers. See the documentation of the
+      TensorDebugModes for the semantics of the numbers.
+    device_name: Device on which the tensor resides (if available)
+  """
+
+  def __init__(self,
+               graph_execution_trace_digest,
+               graph_ids,
+               tensor_debug_mode,
+               debug_tensor_value=None,
+               device_name=None):
+    super(GraphExecutionTrace, self).__init__(
+        graph_execution_trace_digest.wall_time,
+        graph_execution_trace_digest.offset,
+        graph_execution_trace_digest.op_type,
+        graph_execution_trace_digest.op_name,
+        graph_execution_trace_digest.output_slot)
+    self._graph_ids = graph_ids
+    self._tensor_debug_mode = tensor_debug_mode
+    self._debug_tensor_value = debug_tensor_value
+    self._device_name = device_name
+
+  @property
+  def graph_ids(self):
+    return self._graph_ids
+
+  @property
+  def graph_id(self):
+    return self._graph_ids[-1]
+
+  @property
+  def tensor_debug_mode(self):
+    return self._tensor_debug_mode
+
+  @property
+  def debug_tensor_value(self):
+    return self._debug_tensor_value
+
+  @property
+  def device_name(self):
+    return self._device_name
+
+  # TODO(cais): Implement to_json().
+
+
+def _parse_tensor_value(tensor_proto, return_list=False):
+  """Helper method for reading a tensor value from a tensor proto.
+
+  The rationale for the distinction between `True` and `False value of
+  `return_list` is as follows:
+  - `return_list=True` is used for TensorDebugMode values other than
+    FULL_TENSOR, e.g., CONCISE_HEALTH, SHAPE and FULL_HEATLH. Under
+    those modes, the value is guaranteed (by contract) to be a 1D float64
+    tensor.
+  - `return_list=False` is used for the FULL_HEALTH TensorDebugMode
+    specifically. Instead, we use `numpy.ndarray` to maximally preserve
+    the shape, dtype and value information regarding the underlying tensor
+    value. Under that mode, we don't use a python list to represent the
+    tensor value because that can lead to loss of information (e.g., both
+    float16 and float32 dtypes get mapped to Python floats).
+
+  Args:
+    tensor_proto: The TensorProto instance from which the tensor value will be
+      loaded.
+    return_list: Whether the return value will be a nested Python list that
+      comes out from `numpy.ndarray.tolist()`.
+
+  Returns:
+    If parsing is successful, the tensor value as a `numpy.ndarray` or the
+      nested Python list converted from it.
+    If parsing fails, `None`.
+  """
+  try:
+    ndarray = tensor_util.MakeNdarray(tensor_proto)
+    return ndarray.tolist() if return_list else ndarray
+  except TypeError:
+    # Depending on tensor_debug_mode, certain dtype of tensors don't
+    # have logged debug tensor values.
+    return None
+
+
+class DebugDataReader(object):
+  """A reader that reads structured debugging data in the tfdbg v2 format.
+
+  The set of data read by an object of this class concerns the execution history
+  of a tfdbg2-instrumented TensorFlow program.
+
+  Note:
+    - An object of this class incrementally reads data from files that belong to
+      the tfdbg v2 DebugEvent file set. Calling `update()` triggers the reading
+      from the last-successful reading positions in the files.
+    - This object can be used as a context manager. Its `__exit__()` call
+      closes the file readers cleanly.
+  """
+
+  def __init__(self, dump_root):
+    self._reader = DebugEventsReader(dump_root)
+    self._load_metadata()
+
+    # TODO(cais): Implement pagination for memory constraints.
+    self._execution_digests = []
+
+    # Mapping (host_name, file_path) tuple to offset in the .source_files file.
+    self._host_name_file_path_to_offset = collections.OrderedDict()
+    # A dict mapping id to (host_name, file_path, lineno, func) tuple.
+    self._stack_frame_by_id = dict()
+    # Stores unprocessed stack frame IDs. This is necessary to handle the
+    # case in which reading of the .stack_frames file gets ahead of the reading
+    # of the .source_files file.
+    self._unprocessed_stack_frames = dict()
+    # A dict mapping id to DebuggedDevice objects.
+    self._device_by_id = dict()
+    # A dict mapping id to DebuggedGraph objects.
+    self._graph_by_id = dict()
+    self._graph_op_digests = []
+    # TODO(cais): Implement pagination for memory constraints.
+    self._graph_execution_trace_digests = []
+
+  def _load_metadata(self):
+    metadata_iter = self._reader.metadata_iterator()
+    debug_event = next(metadata_iter).debug_event
+    self._starting_wall_time = debug_event.wall_time
+    self._tensorflow_version = debug_event.debug_metadata.tensorflow_version
+
+  def _load_source_files(self):
+    """Incrementally read the .source_files DebugEvent file."""
+    source_files_iter = self._reader.source_files_iterator()
+    for debug_event, offset in source_files_iter:
+      source_file = debug_event.source_file
+      self._host_name_file_path_to_offset[
+          (source_file.host_name, source_file.file_path)] = offset
+
+  def _load_stack_frames(self):
+    """Incrementally read the .stack_frames file.
+
+    This must be called after _load_source_files().
+    It assumes that the following contract is honored by the writer of the tfdbg
+    v2 data file set:
+      - Before a stack frame is written to the .stack_frames file, the
+        corresponding source file information must have been written to the
+        .source_files file first.
+    """
+    stack_frames_iter = self._reader.stack_frames_iterator()
+    for debug_event, _ in stack_frames_iter:
+      stack_frame_with_id = debug_event.stack_frame_with_id
+      file_line_col = stack_frame_with_id.file_line_col
+      self._unprocessed_stack_frames[stack_frame_with_id.id] = file_line_col
+    # We do the processing in a separate stage, because the reading in the
+    # .source_files file may sometimes get ahead of the .source_files file.
+    unprocessed_stack_frame_ids = tuple(self._unprocessed_stack_frames.keys())
+    for stack_frame_id in unprocessed_stack_frame_ids:
+      file_line_col = self._unprocessed_stack_frames[stack_frame_id]
+      if len(self._host_name_file_path_to_offset) > file_line_col.file_index:
+        host_name, file_path = list(self._host_name_file_path_to_offset.keys())[
+            file_line_col.file_index]
+        self._stack_frame_by_id[stack_frame_id] = (
+            host_name, file_path, file_line_col.line, file_line_col.func)
+      del self._unprocessed_stack_frames[stack_frame_id]
+
+  def _load_graphs(self):
+    """Incrementally read the .graphs file.
+
+    Compiles the DebuggedGraph and GraphOpCreation data.
+    """
+    graphs_iter = self._reader.graphs_iterator()
+    for debug_event, offset in graphs_iter:
+      if debug_event.graph_op_creation.ByteSize():
+        op_creation_proto = debug_event.graph_op_creation
+        op_digest = GraphOpCreationDigest(
+            debug_event.wall_time,
+            offset,
+            op_creation_proto.graph_id,
+            op_creation_proto.op_type,
+            op_creation_proto.op_name,
+            tuple(op_creation_proto.output_tensor_ids),
+            input_names=tuple(op_creation_proto.input_names))
+        self._graph_op_digests.append(op_digest)
+        self._graph_by_id[op_creation_proto.graph_id].add_op(op_digest)
+      elif debug_event.debugged_graph.ByteSize():
+        graph_proto = debug_event.debugged_graph
+        graph = DebuggedGraph(
+            graph_proto.graph_name or None,
+            graph_proto.graph_id,
+            outer_graph_id=graph_proto.outer_context_id or None)
+        self._graph_by_id[graph_proto.graph_id] = graph
+        if graph_proto.outer_context_id:
+          self._graph_by_id[
+              graph_proto.outer_context_id].add_inner_graph_id(graph.graph_id)
+      elif debug_event.debugged_device.ByteSize():
+        device_proto = debug_event.debugged_device
+        self._device_by_id[device_proto.device_id] = DebuggedDevice(
+            device_proto.device_name, device_proto.device_id)
+
+  def _load_graph_execution_traces(self):
+    """Incrementally load the .graph_execution_traces file."""
+    traces_iter = self._reader.graph_execution_traces_iterator()
+    for debug_event, offset in traces_iter:
+      trace_proto = debug_event.graph_execution_trace
+      op_name = trace_proto.op_name
+      op_type = self._lookup_op_type(trace_proto.tfdbg_context_id, op_name)
+      digest = GraphExecutionTraceDigest(
+          debug_event.wall_time,
+          offset,
+          op_type,
+          op_name,
+          trace_proto.output_slot)
+      self._graph_execution_trace_digests.append(digest)
+
+  def _lookup_op_type(self, graph_id, op_name):
+    """Lookup the type of an op by name and the immediately enclosing graph.
+
+    Args:
+      graph_id: Debugger-generated ID of the immediately-enclosing graph.
+      op_name: Name of the op.
+
+    Returns:
+      Op type as a str.
+    """
+    return self._graph_by_id[graph_id].get_op_type(op_name)
+
+  def _load_execution(self):
+    """Incrementally read the .execution file."""
+    execution_iter = self._reader.execution_iterator()
+    for debug_event, offset in execution_iter:
+      self._execution_digests.append(ExecutionDigest(
+          debug_event.wall_time,
+          offset,
+          debug_event.execution.op_type,
+          output_tensor_device_ids=(
+              debug_event.execution.output_tensor_device_ids or None)))
+
+  def update(self):
+    """Perform incremental read of the file set."""
+    self._load_source_files()
+    self._load_stack_frames()
+    self._load_graphs()
+    self._load_graph_execution_traces()
+    self._load_execution()
+
+  def source_lines(self, host_name, file_path):
+    """Read the line-by-line content of a source file.
+
+    Args:
+      host_name: Host name on which the source file is located.
+      file_path: File path at which the source file is located.
+
+    Returns:
+      Lines of the source file as a `list` of `str`s.
+    """
+    offset = self._host_name_file_path_to_offset[(host_name, file_path)]
+    return list(self._reader.read_source_files_event(offset).source_file.lines)
+
+  def starting_wall_time(self):
+    """Wall timestamp for when the debugged TensorFlow program started.
+
+    Returns:
+      Stating wall time as seconds since the epoch, as a `float`.
+    """
+    return self._starting_wall_time
+
+  def tensorflow_version(self):
+    """TensorFlow version used in the debugged TensorFlow program.
+
+    Note: this is not necessarily the same as the version of TensorFlow used to
+    load the DebugEvent file set.
+
+    Returns:
+      TensorFlow version used by the debugged program, as a `str`.
+    """
+    return self._tensorflow_version
+
+  def outermost_graphs(self):
+    """Get the number of outer most graphs read so far."""
+    return [graph for graph in self._graph_by_id.values()
+            if not graph.outer_graph_id]
+
+  def graph_by_id(self, graph_id):
+    """Get a DebuggedGraph object by its ID."""
+    return self._graph_by_id[graph_id]
+
+  def device_name_by_id(self, device_id):
+    """Get the name of a device by the debugger-generated ID of the device."""
+    return self._device_by_id[device_id].device_name
+
+  def device_names(self):
+    """Get a set of all device names known to the debugger."""
+    return set(device.device_name for device in self._device_by_id.values())
+
+  def graph_op_digests(self, op_type=None):
+    """Get the list of the digests for graph-op creation so far.
+
+    Args:
+      op_type: Optional op type to filter the creation events with.
+
+    Returns:
+      A list of `GraphOpCreationDigest` objects.
+    """
+    if op_type is not None:
+      return [digest for digest in self._graph_op_digests
+              if digest.op_type == op_type]
+    else:
+      return self._graph_op_digests
+
+  def graph_execution_traces(self, digest=False):
+    """Get all the intra-graph execution tensor traces read so far.
+
+    TODO(cais): Support begin and end to enable partial loading.
+
+    Args:
+      digest: Whether the results will be returned in the more light-weight
+        digest form.
+
+    Returns:
+      If `digest`: a `list` of `GraphExecutionTraceDigest` objects.
+      Else: a `list` of `GraphExecutionTrace` objects.
+    """
+    if digest:
+      return self._graph_execution_trace_digests
+    else:
+      return [self.read_graph_execution_trace(digest)
+              for digest in self._graph_execution_trace_digests]
+
+  def num_graph_execution_traces(self):
+    """Get the number of graph execution traces read so far."""
+    return len(self._graph_execution_trace_digests)
+
+  def executions(self, digest=False):
+    """Get `Execution`s or `ExecutionDigest`s this reader has read so far.
+
+    # TODO(cais): Support begin index and end index to support partial loading.
+
+    Args:
+      digest: Whether the results are returned in a digest form, i.e.,
+        `ExecutionDigest` format, instead of the more detailed `Execution`
+        format.
+
+    Returns:
+      If `digest`: a `list` of `ExecutionDigest` objects.
+      Else: a `list` of `Execution` objects.
+    """
+    if digest:
+      return self._execution_digests
+    else:
+      # TODO(cais): Optimizer performance removing repeated file open/close.
+      return [self.read_execution(digest) for digest in self._execution_digests]
+
+  def num_executions(self):
+    """Get the number of execution events read so far."""
+    return len(self._execution_digests)
+
+  def read_execution(self, execution_digest):
+    """Read a detailed Execution object."""
+    debug_event = self._reader.read_execution_debug_event(
+        execution_digest.offset)
+    execution_proto = debug_event.execution
+
+    debug_tensor_values = None
+    if (execution_proto.tensor_debug_mode ==
+        debug_event_pb2.TensorDebugMode.FULL_TENSOR):
+      pass  # TODO(cais): Build tensor store.
+    elif (execution_proto.tensor_debug_mode !=
+          debug_event_pb2.TensorDebugMode.NO_TENSOR):
+      debug_tensor_values = []
+      for tensor_proto in execution_proto.tensor_protos:
+        # TODO(cais): Refactor into a helper method.
+        debug_tensor_values.append(
+            _parse_tensor_value(tensor_proto, return_list=True))
+    return Execution(
+        execution_digest,
+        tuple(execution_proto.code_location.stack_frame_ids),
+        execution_proto.tensor_debug_mode,
+        graph_id=execution_proto.graph_id,
+        input_tensor_ids=tuple(execution_proto.input_tensor_ids),
+        output_tensor_ids=tuple(execution_proto.output_tensor_ids),
+        debug_tensor_values=tuple(
+            debug_tensor_values) if debug_tensor_values else None)
+
+  def read_graph_execution_trace(self, graph_execution_trace_digest):
+    """Read the detailed graph execution trace.
+
+    Args:
+      graph_execution_trace_digest: A `GraphExecutionTraceDigest` object.
+
+    Returns:
+      The corresponding `GraphExecutionTrace` object.
+    """
+    debug_event = self._reader.read_graph_execution_traces_event(
+        graph_execution_trace_digest.offset)
+    trace_proto = debug_event.graph_execution_trace
+
+    graph_ids = [trace_proto.tfdbg_context_id]
+    # Exhaust the outer contexts (graphs).
+    while True:
+      graph = self.graph_by_id(graph_ids[0])
+      if graph.outer_graph_id:
+        graph_ids.insert(0, graph.outer_graph_id)
+      else:
+        break
+
+    debug_tensor_value = None
+    if (trace_proto.tensor_debug_mode ==
+        debug_event_pb2.TensorDebugMode.FULL_TENSOR):
+      pass  # TODO(cais): Build tensor store.
+    else:
+      debug_tensor_value = _parse_tensor_value(
+          trace_proto.tensor_proto, return_list=True)
+    return GraphExecutionTrace(
+        graph_execution_trace_digest,
+        graph_ids=graph_ids,
+        tensor_debug_mode=trace_proto.tensor_debug_mode,
+        debug_tensor_value=debug_tensor_value,
+        device_name=trace_proto.device_name or None)
+
+  def read_execution_stack_trace(self, execution):
+    """Read the stack trace of a given Execution object.
+
+    Args:
+      execution: The Execution object of interest.
+
+    Returns:
+      A tuple consisting of:
+        1. The host name.
+        2. The stack trace, as a list of (file_path, lineno, func) tuples.
+    """
+    host_name = self._stack_frame_by_id[execution.stack_frame_ids[0]][0]
+    return (host_name, [
+        self._stack_frame_by_id[frame_id][1:]
+        for frame_id in execution.stack_frame_ids])
+
+  def read_graph_op_creation_stack_trace(self, graph_op_creation_digest):
+    """Read the stack trace of a given graph op creation object.
+
+    Args:
+      graph_op_creation_digest: The GraphOpCreationDigest object of interest.
+
+    Returns:
+      A tuple consisting of:
+        1. The host name.
+        2. The stack trace, as a list of (file_path, lineno, func) tuples.
+    """
+    debug_event = self._reader.read_graphs_event(
+        graph_op_creation_digest.offset)
+    graph_op_creation = debug_event.graph_op_creation
+    host_name = graph_op_creation.code_location.host_name
+    return host_name, [
+        self._stack_frame_by_id[frame_id][1:]
+        for frame_id in graph_op_creation.code_location.stack_frame_ids]
+
+  # TODO(cais): Add graph_execution_digests() with an ExecutionDigest
+  #   as a kwarg, to establish the association between top-level and intra-graph
+  #   execution events.
+
+  def execution_to_tensor_values(self, execution):
+    """Read the full tensor values from an Execution or ExecutionDigest.
+
+    Args:
+      execution: An `ExecutionDigest` or `ExeuctionDigest` object.
+
+    Returns:
+      A list of numpy arrays representing the output tensor values of the
+        execution event.
+    """
+    debug_event = self._reader.read_execution_debug_event(execution.offset)
+    return [_parse_tensor_value(tensor_proto)
+            for tensor_proto in debug_event.execution.tensor_protos]
+
+  def graph_execution_trace_to_tensor_value(self, trace):
+    """Read full tensor values from an Execution or ExecutionDigest.
+
+    Args:
+      trace: An `GraphExecutionTraceDigest` or `GraphExecutionTrace` object.
+
+    Returns:
+      A numpy array representing the output tensor value of the intra-graph
+        tensor execution event.
+    """
+    debug_event = self._reader.read_graph_execution_traces_event(trace.offset)
+    return _parse_tensor_value(debug_event.graph_execution_trace.tensor_proto)
+
+  def symbolic_tensor_id(self, graph_id, op_name, output_slot):
+    """Get the ID of a symbolic tensor.
+
+    Args:
+      graph_id: The ID of the immediately-enclosing graph.
+      op_name: Name of the op.
+      output_slot: Output slot as an int.
+
+    Returns:
+      The ID of the symbolic tensor as an int.
+    """
+    return self._graph_by_id[graph_id].get_tensor_id(op_name, output_slot)
+
+  def graph_execution_trace_to_tensor_id(self, trace):
+    """Get symbolic tensor ID from a GraphExecutoinTraceDigest object."""
+    return self.symbolic_tensor_id(
+        trace.graph_id, trace.op_name, trace.output_slot)
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, exception_type, exception_value, traceback):
+    del exception_type, exception_value, traceback  # Unused
+    self._reader.close()
diff --git a/tensorflow/python/debug/lib/debug_events_writer.py b/tensorflow/python/debug/lib/debug_events_writer.py
index 7f7ae38434c..3de0ab78b8a 100644
--- a/tensorflow/python/debug/lib/debug_events_writer.py
+++ b/tensorflow/python/debug/lib/debug_events_writer.py
@@ -128,6 +128,10 @@ class DebugEventsWriter(object):
     _pywrap_debug_events_writer.WriteGraphExecutionTrace(
         self._dump_root, debug_event)
 
+  def RegisterDeviceAndGetId(self, device_name):
+    return _pywrap_debug_events_writer.RegisterDeviceAndGetId(
+        self._dump_root, device_name)
+
   def FlushNonExecutionFiles(self):
     """Flush the non-execution debug event files."""
     _pywrap_debug_events_writer.FlushNonExecutionFiles(self._dump_root)
diff --git a/tensorflow/python/debug/lib/debug_events_writer_test.py b/tensorflow/python/debug/lib/debug_events_writer_test.py
index f6e973befed..fadf8f4458c 100644
--- a/tensorflow/python/debug/lib/debug_events_writer_test.py
+++ b/tensorflow/python/debug/lib/debug_events_writer_test.py
@@ -21,12 +21,14 @@ from __future__ import print_function
 import glob
 import os
 import threading
+import time
 
 from tensorflow.core.protobuf import debug_event_pb2
 from tensorflow.python.debug.lib import debug_events_reader
 from tensorflow.python.debug.lib import debug_events_writer
 from tensorflow.python.debug.lib import dumping_callback_test_lib
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import versions
 from tensorflow.python.platform import googletest
 
 
@@ -76,20 +78,20 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
     writer.FlushNonExecutionFiles()
 
     with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      actuals = list(reader.source_files_iterator())
+      actuals = list(item.debug_event.source_file
+                     for item in reader.source_files_iterator())
       self.assertLen(actuals, num_protos)
       for i in range(num_protos):
-        self.assertEqual(actuals[i].source_file.file_path,
-                         "/home/tf2user/main.py")
-        self.assertEqual(actuals[i].source_file.host_name, "machine.cluster")
-        self.assertEqual(actuals[i].source_file.lines, ["print(%d)" % i])
+        self.assertEqual(actuals[i].file_path, "/home/tf2user/main.py")
+        self.assertEqual(actuals[i].host_name, "machine.cluster")
+        self.assertEqual(actuals[i].lines, ["print(%d)" % i])
 
-      actuals = list(reader.stack_frames_iterator())
+      actuals = list(item.debug_event.stack_frame_with_id
+                     for item in reader.stack_frames_iterator())
       self.assertLen(actuals, num_protos)
       for i in range(num_protos):
-        self.assertEqual(actuals[i].stack_frame_with_id.id, "stack_%d" % i)
-        self.assertEqual(
-            actuals[i].stack_frame_with_id.file_line_col.file_index, i * 10)
+        self.assertEqual(actuals[i].id, "stack_%d" % i)
+        self.assertEqual(actuals[i].file_line_col.file_index, i * 10)
 
   def testWriteGraphOpCreationAndDebuggedGraphs(self):
     writer = debug_events_writer.DebugEventsWriter(self.dump_root)
@@ -106,7 +108,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
     writer.FlushNonExecutionFiles()
 
     reader = debug_events_reader.DebugEventsReader(self.dump_root)
-    actuals = list(reader.graphs_iterator())
+    actuals = list(item.debug_event for item in reader.graphs_iterator())
     self.assertLen(actuals, num_op_creations + 1)
     for i in range(num_op_creations):
       self.assertEqual(actuals[i].graph_op_creation.op_type, "Conv2D")
@@ -172,26 +174,35 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
     # Verify the content of the .source_files file.
     with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
       source_files_iter = reader.source_files_iterator()
-      actuals = list(source_files_iter)
-      file_paths = sorted([actual.source_file.file_path for actual in actuals])
+      actuals = list(item.debug_event.source_file for item in source_files_iter)
+      file_paths = sorted([actual.file_path for actual in actuals])
       self.assertEqual(file_paths, [
           "/home/tf2user/file_0.py", "/home/tf2user/file_1.py",
           "/home/tf2user/file_2.py"
       ])
 
     # Verify the content of the .stack_frames file.
-    actuals = list(reader.stack_frames_iterator())
-    stack_frame_ids = sorted(
-        [actual.stack_frame_with_id.id for actual in actuals])
+    actuals = list(item.debug_event.stack_frame_with_id
+                   for item in reader.stack_frames_iterator())
+    stack_frame_ids = sorted([actual.id for actual in actuals])
     self.assertEqual(stack_frame_ids,
                      ["stack_frame_0", "stack_frame_1", "stack_frame_2"])
 
     # Verify the content of the .graphs file.
-    actuals = list(reader.graphs_iterator())
-    graph_op_names = sorted(
-        [actual.graph_op_creation.op_name for actual in actuals])
+    actuals = list(item.debug_event.graph_op_creation
+                   for item in reader.graphs_iterator())
+    graph_op_names = sorted([actual.op_name for actual in actuals])
     self.assertEqual(graph_op_names, ["Op0", "Op1", "Op2"])
 
+  def testWriteAndReadMetadata(self):
+    t0 = time.time()
+    writer = debug_events_writer.DebugEventsWriter(self.dump_root)
+    writer.Close()
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      self.assertIsInstance(reader.starting_wall_time(), float)
+      self.assertGreaterEqual(reader.starting_wall_time(), t0)
+      self.assertEqual(reader.tensorflow_version(), versions.__version__)
+
   def testWriteExecutionEventsWithCircularBuffer(self):
     writer = debug_events_writer.DebugEventsWriter(self.dump_root)
     num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2
@@ -200,17 +211,19 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       execution.op_type = "OpType%d" % i
       writer.WriteExecution(execution)
 
-    # Before FlushExecutionFiles() is called. No data should have been written
-    # to the file.
-    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
-    self.assertFalse(executed_op_types)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      # Before FlushExecutionFiles() is called. No data should have been written
+      # to the file.
+      reader.update()
+      self.assertFalse(reader.executions())
 
-    writer.FlushExecutionFiles()
-    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
-    for i, executed_op_type in enumerate(executed_op_types):
-      self.assertEqual(
-          executed_op_type,
-          "OpType%d" % (i + debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE))
+      writer.FlushExecutionFiles()
+      reader.update()
+      executions = reader.executions()
+      for i, execution in enumerate(executions):
+        self.assertEqual(
+            execution.op_type,
+            "OpType%d" % (i + debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE))
 
   def testWriteExecutionEventsWithoutCircularBufferBehavior(self):
     # A circular buffer size of 0 abolishes the circular buffer behavior.
@@ -222,10 +235,12 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       writer.WriteExecution(execution)
     writer.FlushExecutionFiles()
 
-    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
-    self.assertLen(executed_op_types, num_execution_events)
-    for i, executed_op_type in enumerate(executed_op_types):
-      self.assertEqual(executed_op_type, "OpType%d" % i)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      executions = reader.executions()
+      self.assertLen(executions, num_execution_events)
+      for i, execution in enumerate(executions):
+        self.assertEqual(execution.op_type, "OpType%d" % i)
 
   def testWriteGraphExecutionTraceEventsWithCircularBuffer(self):
     writer = debug_events_writer.DebugEventsWriter(self.dump_root)
@@ -242,11 +257,12 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       self.assertEqual(len(actuals), 0)
 
       writer.FlushExecutionFiles()
-      actuals = list(reader.graph_execution_traces_iterator())
+      actuals = list(item.debug_event.graph_execution_trace
+                     for item in reader.graph_execution_traces_iterator())
       self.assertLen(actuals, debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE)
       for i in range(debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE):
         self.assertEqual(
-            actuals[i].graph_execution_trace.op_name,
+            actuals[i].op_name,
             "Op%d" % (i + debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE))
 
   def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self):
@@ -260,15 +276,19 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
     writer.FlushExecutionFiles()
 
     with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      actuals = list(reader.graph_execution_traces_iterator())
+      actuals = list(item.debug_event.graph_execution_trace
+                     for item in reader.graph_execution_traces_iterator())
     self.assertLen(actuals, num_execution_events)
     for i in range(num_execution_events):
-      self.assertEqual(actuals[i].graph_execution_trace.op_name, "Op%d" % i)
+      self.assertEqual(actuals[i].op_name, "Op%d" % i)
 
   def testConcurrentWritesToExecutionFiles(self):
     circular_buffer_size = 5
     writer = debug_events_writer.DebugEventsWriter(self.dump_root,
                                                    circular_buffer_size)
+    debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1",
+                                                   graph_name="graph1")
+    writer.WriteDebuggedGraph(debugged_graph)
 
     execution_state = {"counter": 0, "lock": threading.Lock()}
 
@@ -282,10 +302,14 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
     graph_execution_trace_state = {"counter": 0, "lock": threading.Lock()}
 
     def WriteGraphExecutionTrace():
-      trace = debug_event_pb2.GraphExecutionTrace()
       with graph_execution_trace_state["lock"]:
-        trace.op_name = "Op%d" % graph_execution_trace_state["counter"]
+        op_name = "Op%d" % graph_execution_trace_state["counter"]
+        graph_op_creation = debug_event_pb2.GraphOpCreation(
+            op_type="FooOp", op_name=op_name, graph_id="graph1")
+        trace = debug_event_pb2.GraphExecutionTrace(
+            op_name=op_name, tfdbg_context_id="graph1")
         graph_execution_trace_state["counter"] += 1
+      writer.WriteGraphOpCreation(graph_op_creation)
       writer.WriteGraphExecutionTrace(trace)
 
     threads = []
@@ -299,18 +323,19 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       threads.append(thread)
     for thread in threads:
       thread.join()
+    writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    # Verify the content of the .execution file.
-    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
-    self.assertLen(executed_op_types, circular_buffer_size)
-    self.assertLen(executed_op_types, len(set(executed_op_types)))
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      # Verify the content of the .execution file.
+      executions = reader.executions()
+      executed_op_types = [execution.op_type for execution in executions]
+      self.assertLen(executed_op_types, circular_buffer_size)
+      self.assertLen(executed_op_types, len(set(executed_op_types)))
 
-    # Verify the content of the .execution file.
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      actuals = list(reader.graph_execution_traces_iterator())
-      op_names = sorted(
-          [actual.graph_execution_trace.op_name for actual in actuals])
+      # Verify the content of the .graph_execution_traces file.
+      op_names = [trace.op_name for trace in reader.graph_execution_traces()]
       self.assertLen(op_names, circular_buffer_size)
       self.assertLen(op_names, len(set(op_names)))
 
diff --git a/tensorflow/python/debug/lib/debug_grappler_test.py b/tensorflow/python/debug/lib/debug_grappler_test.py
index 5fa2cfe6083..0e9f5517860 100644
--- a/tensorflow/python/debug/lib/debug_grappler_test.py
+++ b/tensorflow/python/debug/lib/debug_grappler_test.py
@@ -88,7 +88,7 @@ class SessionDebugGrapplerInteractionTest(test_util.TensorFlowTestCase):
           self._dump_root, partition_graphs=run_metadata.partition_graphs,
           validate=True)
 
-      original_node_names = set([op.name for op in sess.graph.get_operations()])
+      original_node_names = set(op.name for op in sess.graph.get_operations())
       dumped_node_names = set(dump_data.nodes())
       grappler_created_node_names = dumped_node_names - original_node_names
       grappler_removed_node_names = original_node_names - dumped_node_names
diff --git a/tensorflow/python/debug/lib/debug_v2_ops_test.py b/tensorflow/python/debug/lib/debug_v2_ops_test.py
index c665da7132d..d6f0d4310a2 100644
--- a/tensorflow/python/debug/lib/debug_v2_ops_test.py
+++ b/tensorflow/python/debug/lib/debug_v2_ops_test.py
@@ -88,7 +88,7 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       metadata_iter = reader.metadata_iterator()
       # Check that the .metadata DebugEvents data file has been created, even
       # before FlushExecutionFiles() is called.
-      debug_event = next(metadata_iter)
+      debug_event = next(metadata_iter).debug_event
       self.assertGreater(debug_event.wall_time, 0)
       self.assertTrue(debug_event.debug_metadata.tensorflow_version)
       self.assertTrue(
@@ -107,7 +107,7 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       # The circular buffer has a size of 4. So only the data from the
       # last two iterations should have been written to self.dump_root.
       for _ in range(2):
-        debug_event = next(graph_trace_iter)
+        debug_event = next(graph_trace_iter).debug_event
         self.assertGreater(debug_event.wall_time, 0)
         trace = debug_event.graph_execution_trace
         self.assertEqual(trace.tfdbg_context_id, "deadbeaf")
@@ -118,7 +118,7 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
         tensor_value = tensor_util.MakeNdarray(trace.tensor_proto)
         self.assertAllClose(tensor_value, [9.0, 16.0])
 
-        debug_event = next(graph_trace_iter)
+        debug_event = next(graph_trace_iter).debug_event
         self.assertGreater(debug_event.wall_time, 0)
         trace = debug_event.graph_execution_trace
         self.assertEqual(trace.tfdbg_context_id, "beafdead")
@@ -165,7 +165,7 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
         x_values = []
         timestamp = 0
         while True:
-          debug_event = next(graph_trace_iter)
+          debug_event = next(graph_trace_iter).debug_event
           self.assertGreater(debug_event.wall_time, timestamp)
           timestamp = debug_event.wall_time
           trace = debug_event.graph_execution_trace
@@ -210,7 +210,7 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       with debug_events_reader.DebugEventsReader(debug_root) as reader:
         graph_trace_iter = reader.graph_execution_traces_iterator()
 
-        debug_event = next(graph_trace_iter)
+        debug_event = next(graph_trace_iter).debug_event
         trace = debug_event.graph_execution_trace
         self.assertEqual(trace.tfdbg_context_id, "deadbeaf")
         self.assertEqual(trace.op_name, "")
diff --git a/tensorflow/python/debug/lib/distributed_callbacks_test.py b/tensorflow/python/debug/lib/distributed_callbacks_test.py
index 7ae555c285a..d79021cea70 100644
--- a/tensorflow/python/debug/lib/distributed_callbacks_test.py
+++ b/tensorflow/python/debug/lib/distributed_callbacks_test.py
@@ -25,6 +25,7 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.debug.lib import check_numerics_callback
+from tensorflow.python.debug.lib import debug_events_reader
 from tensorflow.python.debug.lib import dumping_callback
 from tensorflow.python.debug.lib import dumping_callback_test_lib
 from tensorflow.python.distribute import combinations
@@ -39,40 +40,6 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import gradient_descent
 
 
-def filter_by_device_name(items, device_names, target_device_name):
-  """Filter a list of items by device name.
-
-  Args:
-    items: A list of items to be filtered according to their corresponding
-      device names.
-    device_names: A list of the device names. Must have the same legnth
-      as `items`.
-    target_device_name: A `str` representing the desired device name.
-
-  Returns:
-    Filtered items from `items`.
-  """
-  assert len(items) == len(device_names)
-  assert all(device_names), "device_names are not all non-empty strings"
-  # Note: we use `endswith` instead of `==` for device-name filtering because
-  # in some cases, the device names from kernel/op execution can have slightly
-  # different values than the device names from
-  # `distribution.extended.worker_devices`.
-  return [items[i] for i, device_name in enumerate(device_names)
-          if device_name.endswith(target_device_name)]
-
-
-def filter_by_device_name_and_op_type(
-    items, device_names, op_types, target_device_name, target_op_type):
-  assert len(items) == len(device_names)
-  assert len(items) == len(op_types)
-  assert all(device_names), "device_names are not all non-empty strings"
-  assert all(op_types), "op_types are not all non-empty strings"
-  return [items[i] for i, device_name in enumerate(device_names)
-          if device_name.endswith(target_device_name)
-          and op_types[i] == target_op_type]
-
-
 class MiniModel(keras.Model):
   """Minimal subclassed Keras model."""
 
@@ -176,56 +143,57 @@ class DistributedDumpingCallbackTest(
       writer.FlushNonExecutionFiles()
       writer.FlushExecutionFiles()
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, _,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
-    (op_names, device_names, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-
     device_name_0 = distribution.extended.worker_devices[0]
     logging.info("device_name_0 = %s", device_name_0)
     if num_devices > 1:
       device_name_1 = distribution.extended.worker_devices[1]
       logging.info("device_name_1 = %s", device_name_1)
 
-    device_0_executed_op_types = filter_by_device_name(
-        executed_op_types, device_names, device_name_0)
-    if num_devices > 1:
-      device_1_executed_op_types = filter_by_device_name(
-          executed_op_types, device_names, device_name_1)
-    # Verify graph-execution traces are available for both devices.
-    # We don't assert MatMul occurs exactly once because the gradient of MatMul
-    # involves MatMul.
-    self.assertIn("MatMul", device_0_executed_op_types)
-    self.assertEqual(device_0_executed_op_types.count("BiasAdd"), 1)
-    if num_devices > 1:
-      self.assertIn("MatMul", device_1_executed_op_types)
-      self.assertEqual(device_1_executed_op_types.count("BiasAdd"), 1)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      traces = reader.graph_execution_traces()
 
-    if tensor_debug_mode == "NO_TENSOR":
-      for value_list in tensor_values:
-        for tensor_value in value_list:
-          self.assertEqual(tensor_value.dtype, np.float32)
-          self.assertEqual(tensor_value.shape, [])
-    elif tensor_debug_mode == "FULL_TENSOR":
-      device_0_matmul_values = filter_by_device_name_and_op_type(
-          tensor_values, device_names, executed_op_types, device_name_0,
-          "MatMul")
-      device_0_bias_add_values = filter_by_device_name_and_op_type(
-          tensor_values, device_names, executed_op_types, device_name_0,
-          "BiasAdd")
-      self.assertAllClose(device_0_matmul_values[0], [[10.0]])
-      self.assertAllClose(device_0_bias_add_values[0], [[11.0]])
+      # Verify graph-execution traces are available for both devices.
+      # We don't assert MatMul occurs exactly once because the gradient of
+      # MatMul involves MatMul.
+      device_0_executed_op_types = [
+          trace.op_type for trace in traces
+          if trace.device_name.endswith(device_name_0)]
       if num_devices > 1:
-        device_1_matmul_values = filter_by_device_name_and_op_type(
-            tensor_values, device_names, executed_op_types, device_name_1,
-            "MatMul")
-        device_1_bias_add_values = filter_by_device_name_and_op_type(
-            tensor_values, device_names, executed_op_types, device_name_1,
-            "BiasAdd")
-        self.assertAllClose(device_1_matmul_values[0], [[10.0]])
-        self.assertAllClose(device_1_bias_add_values[0], [[11.0]])
+        device_1_executed_op_types = [
+            trace.op_type for trace in traces
+            if trace.device_name.endswith(device_name_1)]
+      self.assertIn("MatMul", device_0_executed_op_types)
+      self.assertEqual(device_0_executed_op_types.count("BiasAdd"), 1)
+      if num_devices > 1:
+        self.assertIn("MatMul", device_1_executed_op_types)
+        self.assertEqual(device_1_executed_op_types.count("BiasAdd"), 1)
+
+      if tensor_debug_mode == "NO_TENSOR":
+        for trace in traces:
+          self.assertEqual(trace.debug_tensor_value, [])
+      elif tensor_debug_mode == "FULL_TENSOR":
+        device_0_matmul_values = [
+            reader.graph_execution_trace_to_tensor_value(trace)
+            for trace in traces if trace.op_type == "MatMul" and
+            trace.device_name.endswith(device_name_0)]
+        device_0_bias_add_values = [
+            reader.graph_execution_trace_to_tensor_value(trace)
+            for trace in traces if trace.op_type == "BiasAdd" and
+            trace.device_name.endswith(device_name_0)]
+        self.assertAllClose(device_0_matmul_values[0], [[10.0]])
+        self.assertAllClose(device_0_bias_add_values[0], [[11.0]])
+        if num_devices > 1:
+          device_1_matmul_values = [
+              reader.graph_execution_trace_to_tensor_value(trace)
+              for trace in traces if trace.op_type == "MatMul" and
+              trace.device_name.endswith(device_name_1)]
+          device_1_bias_add_values = [
+              reader.graph_execution_trace_to_tensor_value(trace)
+              for trace in traces if trace.op_type == "BiasAdd" and
+              trace.device_name.endswith(device_name_1)]
+          self.assertAllClose(device_1_matmul_values[0], [[10.0]])
+          self.assertAllClose(device_1_bias_add_values[0], [[11.0]])
 
   @combinations.generate(
       combinations.combine(
@@ -259,78 +227,81 @@ class DistributedDumpingCallbackTest(
       writer.FlushNonExecutionFiles()
       writer.FlushExecutionFiles()
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, _,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
-    (op_names, device_names, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      executions = reader.executions()
+      fit_executions = [
+          execution.op_type
+          for execution in executions
+          if "_distributed_function" in execution.op_type
+      ]
+      self.assertLen(fit_executions, epochs)
 
-    # Eager execution of tf.function should be recorded.
-    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
-    fit_functions = [op_type for op_type in executed_op_types
-                     if "_distributed_function" in op_type]
-    self.assertLen(fit_functions, epochs)
-
-    num_devices = len(distribution.extended.worker_devices)
-
-    device_name_0 = distribution.extended.worker_devices[0]
-    logging.info("device_name_0 = %s", device_name_0)
-    if num_devices > 1:
-      device_name_1 = distribution.extended.worker_devices[1]
-      logging.info("device_name_1 = %s", device_name_1)
-
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-    device_0_executed_op_types = filter_by_device_name(
-        executed_op_types, device_names, device_name_0)
-    if num_devices > 1:
-      device_1_executed_op_types = filter_by_device_name(
-          executed_op_types, device_names, device_name_1)
-
-    self.assertIn("MatMul", device_0_executed_op_types)
-    self.assertIn("BiasAdd", device_0_executed_op_types)
-    self.assertIn("Relu", device_0_executed_op_types)
-    self.assertIn("ReluGrad", device_0_executed_op_types)
-    if num_devices > 1:
-      # If there are two devices involved, assert the ops inside tf.functions
-      # are executed and recorded for the equal numbers of times by the
-      # dumping op-callback.
-      self.assertEqual(device_0_executed_op_types.count("MatMul"),
-                       device_1_executed_op_types.count("MatMul"))
-      self.assertEqual(device_0_executed_op_types.count("BiasAdd"),
-                       device_1_executed_op_types.count("BiasAdd"))
-      self.assertEqual(device_0_executed_op_types.count("Relu"),
-                       device_1_executed_op_types.count("Relu"))
-      self.assertEqual(device_0_executed_op_types.count("ReluGrad"),
-                       device_1_executed_op_types.count("ReluGrad"))
-
-    if tensor_debug_mode == "NO_TENSOR":
-      for value_list in tensor_values:
-        for tensor_value in value_list:
-          self.assertEqual(tensor_value.dtype, np.float32)
-          self.assertEqual(tensor_value.shape, [])
-    elif tensor_debug_mode == "FULL_TENSOR":
-      gpu_0_relu_values = filter_by_device_name_and_op_type(
-          tensor_values, device_names, executed_op_types, device_name_0, "Relu")
-      self.assertTrue(gpu_0_relu_values)
-      gpu_0_relu_grad_values = filter_by_device_name_and_op_type(
-          tensor_values, device_names, executed_op_types, device_name_0,
-          "ReluGrad")
-      self.assertTrue(gpu_0_relu_grad_values)
+      traces = reader.graph_execution_traces()
+      num_devices = len(distribution.extended.worker_devices)
+      device_name_0 = distribution.extended.worker_devices[0]
       if num_devices > 1:
-        gpu_1_relu_values = filter_by_device_name_and_op_type(
-            tensor_values, device_names, executed_op_types, device_name_1,
-            "Relu")
-        self.assertTrue(gpu_1_relu_values)
-        for i in range(len(gpu_0_relu_values)):
-          self.assertEqual(gpu_0_relu_values[i].shape,
-                           gpu_1_relu_values[i].shape)
-        gpu_1_relu_grad_values = filter_by_device_name_and_op_type(
-            tensor_values, device_names, executed_op_types, device_name_1,
-            "ReluGrad")
-        self.assertTrue(gpu_1_relu_grad_values)
-        for i in range(len(gpu_0_relu_grad_values)):
-          self.assertEqual(
-              gpu_0_relu_grad_values[i].shape, gpu_1_relu_grad_values[i].shape)
+        device_name_1 = distribution.extended.worker_devices[1]
+      device_0_executed_op_types = [
+          trace.op_type for trace in traces
+          if trace.device_name.endswith(device_name_0)]
+      if num_devices > 1:
+        device_1_executed_op_types = [
+            trace.op_type for trace in traces
+            if trace.device_name.endswith(device_name_1)]
+
+      self.assertIn("MatMul", device_0_executed_op_types)
+      self.assertIn("BiasAdd", device_0_executed_op_types)
+      self.assertIn("Relu", device_0_executed_op_types)
+      self.assertIn("ReluGrad", device_0_executed_op_types)
+      if num_devices > 1:
+        # If there are two devices involved, assert the ops inside tf.functions
+        # are executed and recorded for the equal numbers of times by the
+        # dumping op-callback.
+        self.assertEqual(
+            device_0_executed_op_types.count("MatMul"),
+            device_1_executed_op_types.count("MatMul"))
+        self.assertEqual(
+            device_0_executed_op_types.count("BiasAdd"),
+            device_1_executed_op_types.count("BiasAdd"))
+        self.assertEqual(
+            device_0_executed_op_types.count("Relu"),
+            device_1_executed_op_types.count("Relu"))
+        self.assertEqual(
+            device_0_executed_op_types.count("ReluGrad"),
+            device_1_executed_op_types.count("ReluGrad"))
+
+      if tensor_debug_mode == "NO_TENSOR":
+        for trace in traces:
+          self.assertEqual(trace.debug_tensor_value, [])
+      elif tensor_debug_mode == "FULL_TENSOR":
+        gpu_0_relu_values = [
+            reader.graph_execution_trace_to_tensor_value(trace)
+            for trace in traces if trace.op_type == "Relu" and
+            trace.device_name.endswith(device_name_0)]
+        self.assertTrue(gpu_0_relu_values)
+        gpu_0_relu_grad_values = [
+            reader.graph_execution_trace_to_tensor_value(trace)
+            for trace in traces if trace.op_type == "ReluGrad" and
+            trace.device_name.endswith(device_name_0)]
+        self.assertTrue(gpu_0_relu_grad_values)
+        if num_devices > 1:
+          gpu_1_relu_values = [
+              reader.graph_execution_trace_to_tensor_value(trace)
+              for trace in traces if trace.op_type == "Relu" and
+              trace.device_name.endswith(device_name_1)]
+          self.assertTrue(gpu_1_relu_values)
+          for i in range(len(gpu_0_relu_values)):
+            self.assertEqual(gpu_0_relu_values[i].shape,
+                             gpu_1_relu_values[i].shape)
+          gpu_1_relu_grad_values = [
+              reader.graph_execution_trace_to_tensor_value(trace)
+              for trace in traces if trace.op_type == "ReluGrad" and
+              trace.device_name.endswith(device_name_1)]
+          self.assertTrue(gpu_1_relu_grad_values)
+          for i in range(len(gpu_0_relu_grad_values)):
+            self.assertEqual(gpu_0_relu_grad_values[i].shape,
+                             gpu_1_relu_grad_values[i].shape)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py
index 2532bd2e7e3..4ffbb98cc4b 100644
--- a/tensorflow/python/debug/lib/dumping_callback.py
+++ b/tensorflow/python/debug/lib/dumping_callback.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import atexit
+import os
 import re
 import socket
 import threading
@@ -232,15 +233,16 @@ class _DumpingCallback(object):
     stack_frame_ids = []
     writer = None
     for file_path, lineno, func, _ in stack_frames:
-      if (file_path, lineno, func) in self._stack_frame_to_id:
+      abs_path = os.path.abspath(file_path)
+      if (abs_path, lineno, func) in self._stack_frame_to_id:
         stack_frame_ids.append(
-            self._stack_frame_to_id[(file_path, lineno, func)])
+            self._stack_frame_to_id[(abs_path, lineno, func)])
         continue
       with self._stack_frame_to_id_lock:
-        if (file_path, lineno, func) not in self._stack_frame_to_id:
+        if (abs_path, lineno, func) not in self._stack_frame_to_id:
           stack_frame_id = _get_id()
-          self._stack_frame_to_id[(file_path, lineno, func)] = stack_frame_id
-          file_index = self._write_source_file_content(file_path)
+          self._stack_frame_to_id[(abs_path, lineno, func)] = stack_frame_id
+          file_index = self._write_source_file_content(abs_path)
           file_line_col = graph_debug_info_pb2.GraphDebugInfo.FileLineCol(
               file_index=file_index, line=lineno, func=func)
           stack_frame_with_id = debug_event_pb2.StackFrameWithId(
@@ -248,7 +250,7 @@ class _DumpingCallback(object):
           writer = self.get_writer()
           writer.WriteStackFrameWithId(stack_frame_with_id)
         stack_frame_ids.append(
-            self._stack_frame_to_id[(file_path, lineno, func)])
+            self._stack_frame_to_id[(abs_path, lineno, func)])
 
     code_location = debug_event_pb2.CodeLocation(
         host_name=self._hostname, stack_frame_ids=stack_frame_ids)
@@ -324,10 +326,20 @@ class _DumpingCallback(object):
               debug_tensor.op)
           instrumented_tensors.append(identity)
       return instrumented_tensors
-    elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.CURT_HEALTH:
+    elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,
+                               debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
+                               debug_event_pb2.TensorDebugMode.SHAPE):
       for output_slot, tensor in enumerate(tensors):
+        dtype = tensor.dtype
+        dtype_is_dumpable = (
+            tensor_debug_mode in (
+                debug_event_pb2.TensorDebugMode.CURT_HEALTH,
+                debug_event_pb2.TensorDebugMode.CONCISE_HEALTH) and
+            dtype.is_floating or
+            tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE and
+            (dtype.is_floating or dtype.is_integer or dtype.is_bool))
         if (not self._should_dump_tensor(op_type, tensor.dtype) or
-            not tensor.dtype.is_floating):
+            not dtype_is_dumpable):
           if is_v1_graph_mode:
             instrumented_tensors.append(tensor)
           continue
@@ -376,6 +388,7 @@ class _DumpingCallback(object):
                           tensors,
                           op_type,
                           input_tensor_ids,
+                          output_tensor_device_ids,
                           graph_id=None):
     """Dump the value of eager tensors.
 
@@ -390,6 +403,9 @@ class _DumpingCallback(object):
         value transform.
       op_type: Type of the op that generates the tensors, as a string.
       input_tensor_ids: IDs of the input EagerTensors to the op.
+      output_tensor_device_ids: Debugged-generated IDs for the devices on which
+        the output tensors are allocated, as a `list` of `int`s. Must match
+        `tensors` in length.
       graph_id: ID of the executed graph, applicable only to eager execution of
         a FuncGraph.
 
@@ -399,6 +415,7 @@ class _DumpingCallback(object):
     tensor_debug_mode = self._tensor_debug_mode
     output_tensor_ids = [
         t._id for t in tensors]  # pylint:disable=protected-access
+    assert len(tensors) == len(output_tensor_device_ids)
     if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
       return debug_event_pb2.Execution(
           op_type=op_type,
@@ -406,9 +423,12 @@ class _DumpingCallback(object):
           num_outputs=len(tensors),
           input_tensor_ids=input_tensor_ids,
           output_tensor_ids=output_tensor_ids,
+          output_tensor_device_ids=output_tensor_device_ids,
           tensor_debug_mode=tensor_debug_mode,
           code_location=self._process_stack_frames())
     elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,
+                               debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
+                               debug_event_pb2.TensorDebugMode.SHAPE,
                                debug_event_pb2.TensorDebugMode.FULL_TENSOR):
       execution_proto = debug_event_pb2.Execution(
           op_type=op_type,
@@ -416,12 +436,15 @@ class _DumpingCallback(object):
           graph_id=graph_id,
           input_tensor_ids=input_tensor_ids,
           output_tensor_ids=output_tensor_ids,
+          output_tensor_device_ids=output_tensor_device_ids,
           tensor_debug_mode=tensor_debug_mode,
           code_location=self._process_stack_frames())
       for tensor in tensors:
         if (self._should_dump_tensor(op_type, tensor.dtype) and
             tensor.dtype.is_numpy_compatible):
-          if tensor_debug_mode == debug_event_pb2.TensorDebugMode.CURT_HEALTH:
+          if tensor_debug_mode in (
+              debug_event_pb2.TensorDebugMode.CURT_HEALTH,
+              debug_event_pb2.TensorDebugMode.CONCISE_HEALTH):
             if tensor.dtype.is_floating:
               tensor_proto = _concrete_tensor_to_proto(
                   gen_debug_ops.debug_numeric_summary_v2(
@@ -431,6 +454,17 @@ class _DumpingCallback(object):
             else:
               # A placeholder for non-floating-type output tensors.
               tensor_proto = tensor_pb2.TensorProto()
+          elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE:
+            if (tensor.dtype.is_floating or tensor.dtype.is_integer or
+                tensor.dtype.is_bool):
+              tensor_proto = _concrete_tensor_to_proto(
+                  gen_debug_ops.debug_numeric_summary_v2(
+                      tensor,
+                      tensor_debug_mode=tensor_debug_mode,
+                      output_dtype=dtypes.float64))
+            else:
+              # A placeholder for non-floating-type output tensors.
+              tensor_proto = tensor_pb2.TensorProto()
           elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
             tensor_proto = _concrete_tensor_to_proto(tensor)
           if tensor_proto:
@@ -480,8 +514,11 @@ class _DumpingCallback(object):
         return None
       context_id = self._func_graph_id_from_func_name(op_type)
       input_ids = [t._id for t in inputs]  # pylint:disable=protected-access
+      output_tensor_device_ids = [writer.RegisterDeviceAndGetId(output.device)
+                                  for output in outputs] if outputs else []
       writer.WriteExecution(self._dump_eager_tensors(
-          outputs, op_type, input_ids, graph_id=context_id))
+          outputs, op_type, input_ids, output_tensor_device_ids,
+          graph_id=context_id))
 
   def _func_graph_id_from_func_name(self, op_type):
     """Attempt to get the ID of a FuncGraph based on an op type name.
@@ -657,6 +694,8 @@ def enable_dump_debug_info(dump_root,
   tensor_debug_mode = debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode)
   if tensor_debug_mode not in (debug_event_pb2.TensorDebugMode.NO_TENSOR,
                                debug_event_pb2.TensorDebugMode.CURT_HEALTH,
+                               debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
+                               debug_event_pb2.TensorDebugMode.SHAPE,
                                debug_event_pb2.TensorDebugMode.FULL_TENSOR):
     raise NotImplementedError(
         "tfdbg dumping: support for tensor debug mode %s is not "
diff --git a/tensorflow/python/debug/lib/dumping_callback_test.py b/tensorflow/python/debug/lib/dumping_callback_test.py
index 9400610b946..9038a602fb2 100644
--- a/tensorflow/python/debug/lib/dumping_callback_test.py
+++ b/tensorflow/python/debug/lib/dumping_callback_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import collections
 import os
 import shutil
+import socket
 import tempfile
 import threading
 
@@ -36,7 +37,6 @@ from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import models
 from tensorflow.python.keras.applications import mobilenet_v2
@@ -61,6 +61,10 @@ def _create_simple_recurrent_keras_model(input_shape):
   return model
 
 
+_host_name = socket.gethostname()
+_current_file_full_path = os.path.abspath(__file__)
+
+
 class TracingCallbackTest(
     dumping_callback_test_lib.DumpingCallbackTestBase, parameterized.TestCase):
 
@@ -74,6 +78,26 @@ class TracingCallbackTest(
     dumping_callback.disable_dump_debug_info()
     super(TracingCallbackTest, self).tearDown()
 
+  def _verifyStackFrames(self, stack_frames):
+    """Verify the correctness of the stack frames.
+
+    Currently, it simply asserts that the current file is found in the stack
+    frames.
+    TODO(cais): Perhaps implement a stricter check later.
+
+    Args:
+      stack_frames: The stack frames to verify.
+    """
+    self.assertTrue([
+        frame for frame in stack_frames if frame[0] == _current_file_full_path])
+
+  def _expectedDefaultDeviceName(self):
+    gpu_name = test_util.gpu_device_name()
+    if gpu_name:
+      return "/job:localhost/replica:0/task:0" + gpu_name
+    else:
+      return "/job:localhost/replica:0/task:0/device:CPU:0"
+
   def testInvalidTensorDebugModeCausesError(self):
     with self.assertRaisesRegexp(
         ValueError,
@@ -88,6 +112,8 @@ class TracingCallbackTest(
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
       ("CurtHealth", "CURT_HEALTH"),
+      ("ConciseHealth", "CONCISE_HEALTH"),
+      ("Shape", "SHAPE"),
       ("FullTensor", "FULL_TENSOR"),
   )
   def testPureEagerOpExecution(self, tensor_debug_mode):
@@ -109,53 +135,74 @@ class TracingCallbackTest(
 
     writer.FlushNonExecutionFiles()
     self._readAndCheckMetadataFile()
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
 
-    # Before FlushExecutionFiles() is called, the .execution file should be
-    # empty.
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      execution_iter = reader.execution_iterator()
-      with self.assertRaises(StopIteration):
-        next(execution_iter)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      # Before FlushExecutionFiles() is called, the .execution file should be
+      # empty.
+      self.assertFalse(reader.executions())
 
       # After the flushing, the .execution file should hold the appropriate
       # contents.
       writer.FlushExecutionFiles()
-      execution_iter = reader.execution_iterator()
+      reader.update()
+      executions = reader.executions()
       prev_wall_time = 1
       executed_op_types = []
       tensor_values = collections.defaultdict(lambda: [])
-      for debug_event in execution_iter:
-        self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
-        prev_wall_time = debug_event.wall_time
-        execution = debug_event.execution
+      for execution in executions:
+        self.assertGreaterEqual(execution.wall_time, prev_wall_time)
+        prev_wall_time = execution.wall_time
         executed_op_types.append(execution.op_type)
+        # Check the device name.
+        if execution.op_type in ("AddV2", "Mul", "RealDiv"):
+          self.assertLen(execution.output_tensor_device_ids, 1)
+          self.assertEqual(
+              reader.device_name_by_id(execution.output_tensor_device_ids[0]),
+              self._expectedDefaultDeviceName(),
+              "Unexpected device name from eager op %s" % execution.op_type)
+
         # No graph IDs should have been logged for eager op executions.
         self.assertFalse(execution.graph_id)
         self.assertTrue(execution.input_tensor_ids)
         self.assertTrue(execution.output_tensor_ids)
+        self.assertEqual(
+            debug_event_pb2.TensorDebugMode.keys()[execution.tensor_debug_mode],
+            tensor_debug_mode)
         if tensor_debug_mode == "NO_TENSOR":
           # Due to the NO_TENSOR tensor debug mode, tensor_protos ought to
           # be empty.
-          self.assertFalse(execution.tensor_protos)
+          self.assertFalse(execution.debug_tensor_values)
         elif tensor_debug_mode == "CURT_HEALTH":
-          self.assertLen(execution.tensor_protos, 1)
+          self.assertLen(execution.debug_tensor_values, 1)
           if execution.op_type in ("AddV2", "Mul", "RealDiv"):
             # 1st element: -1 is the unset tensor_id for eager op execution.
             # 2nd element: 0 means there is no inf or nan.
+            self.assertAllClose(execution.debug_tensor_values, [[-1.0, 0.0]])
+        elif tensor_debug_mode == "CONCISE_HEALTH":
+          if execution.op_type in ("AddV2", "Mul", "RealDiv"):
+            # 1st element: -1 is the unset tensor_id for eager op execution.
+            # 2nd element: each scalar tensor has 1 element.
+            # Remaining elements: no -inf, inf or nan in these
             self.assertAllClose(
-                tensor_util.MakeNdarray(execution.tensor_protos[0]),
-                [-1.0, 0.0])
+                execution.debug_tensor_values, [[-1, 1, 0, 0, 0]])
+        elif tensor_debug_mode == "SHAPE":
+          if execution.op_type in ("AddV2", "Mul", "RealDiv"):
+            # 1st element: -1 is the unset tensor_id for eager op execution.
+            # 2nd element: dtype enum value (float32).
+            # 3rd element: rank (scalar).
+            # 4th element: element count (4).
+            # Remaining elements: shape at fixed length (6).
+            self.assertAllClose(execution.debug_tensor_values,
+                                [[-1, 1, 0, 1, 0, 0, 0, 0, 0, 0]])
         elif tensor_debug_mode == "FULL_TENSOR":
-          # Under the FULL_TENSOR mode, the value of the tensor should be
-          # available through `tensor_protos`.
-          tensor_value = float(
-              tensor_util.MakeNdarray(execution.tensor_protos[0]))
-          tensor_values[execution.op_type].append(tensor_value)
-        # Verify the code_location field.
-        self.assertTrue(execution.code_location.stack_frame_ids)
-        for stack_frame_id in execution.code_location.stack_frame_ids:
-          self.assertIn(stack_frame_id, stack_frame_by_id)
+          tensor_values[execution.op_type].append(
+              reader.execution_to_tensor_values(execution)[0])
+
+        host_name, stack_frames = reader.read_execution_stack_trace(execution)
+        self.assertEqual(host_name, _host_name)
+        self._verifyStackFrames(stack_frames)
+
       if tensor_debug_mode == "FULL_TENSOR":
         self.assertAllClose(tensor_values["Greater"], [1, 1, 1, 1, 1, 1, 0])
         self.assertAllClose(tensor_values["RealDiv"], [5, 8, 4, 2, 1])
@@ -195,16 +242,125 @@ class TracingCallbackTest(
 
       # Due to the pure eager op execution, the .graph file and the
       # .graph_execution_traces file ought to be empty.
-      graphs_iterator = reader.graphs_iterator()
-      with self.assertRaises(StopIteration):
-        next(graphs_iterator)
-      graph_trace_iter = reader.graph_execution_traces_iterator()
-      with self.assertRaises(StopIteration):
-        next(graph_trace_iter)
+      self.assertFalse(reader.outermost_graphs())
+      self.assertEqual(reader.num_graph_execution_traces(), 0)
+
+  @parameterized.named_parameters(
+      ("CurtHealth", "CURT_HEALTH"),
+      ("ConciseHealth", "CONCISE_HEALTH"),
+      ("Shape", "SHAPE"),
+  )
+  @test_util.run_in_graph_and_eager_modes
+  def testModesSummarizingBadNumericalValue(self, tensor_debug_mode):
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode=tensor_debug_mode)
+
+    @def_function.function
+    def func(x, y):
+      return (x + y) / (x - y)
+
+    x = np.array([-3, -1, 0, 0, 1, 1, 1, 2], dtype=np.float16)
+    y = np.array([2, -1, 0, 0, 1, 1, 1, 3], dtype=np.float16)
+    # (x + y) / (x - y) = [0.2, -inf, nan, nan, inf, inf, inf, -5].
+    self.evaluate(func(x, y))
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_exec_traces = reader.graph_execution_traces()
+      executed_op_types = [trace.op_type for trace in graph_exec_traces]
+      self.assertCountEqual(executed_op_types, ["AddV2", "Sub", "RealDiv"])
+      if tensor_debug_mode == "CURT_HEALTH":
+        for trace in graph_exec_traces:
+          # 1st element: tensor_id, should be >= 0.
+          # 2nd element: indicates if there is any inf or nan.
+          tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+          self.assertGreaterEqual(tensor_id, 0)
+          if trace.op_type == "RealDiv":
+            self.assertAllClose(trace.debug_tensor_value, [tensor_id, 1])
+          else:
+            self.assertAllClose(trace.debug_tensor_value, [tensor_id, 0])
+      elif tensor_debug_mode == "CONCISE_HEALTH":
+        for trace in graph_exec_traces:
+          # 1st element: tensor_id, should be >= 0.
+          # 2nd element: element count (8).
+          # Remaining 3 elements: The counts of -inf, inf and nan.
+          tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+          self.assertGreaterEqual(tensor_id, 0)
+          if trace.op_type == "RealDiv":
+            self.assertAllClose(trace.debug_tensor_value,
+                                [tensor_id, 8, 1, 3, 2])
+          else:
+            self.assertAllClose(trace.debug_tensor_value,
+                                [tensor_id, 8, 0, 0, 0])
+      else:  # SHAPE.
+        for trace in graph_exec_traces:
+          # 1st element: tensor_id, should be >= 0.
+          # 2nd element: dtype enum value (float16 = 19).
+          # 3rd element: rank (1)
+          # 4th element: element count (8).
+          # Remaining elements: shape at fixed length (6).
+          tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+          self.assertGreaterEqual(tensor_id, 0)
+          self.assertAllClose(trace.debug_tensor_value,
+                              [tensor_id, 19, 1, 8, 8, 0, 0, 0, 0, 0])
+
+  @parameterized.named_parameters(
+      ("Shape", "SHAPE"),
+  )
+  @test_util.run_in_graph_and_eager_modes
+  def testBooleanTensors(self, tensor_debug_mode):
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode=tensor_debug_mode)
+
+    @def_function.function
+    def func(x, y):
+      return math_ops.logical_not(math_ops.logical_and(x, y))
+
+    x = np.array([[False, False], [True, True]], dtype=np.bool)
+    y = np.array([[False, True], [False, True]], dtype=np.bool)
+    self.assertAllEqual(
+        self.evaluate(func(x, y)), [[True, True], [True, False]])
+
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_exec_traces = reader.graph_execution_traces()
+      executed_op_types = [trace.op_type for trace in graph_exec_traces]
+      self.assertEqual(executed_op_types, ["LogicalAnd", "LogicalNot"])
+      for trace in graph_exec_traces:
+        tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+        self.assertGreaterEqual(tensor_id, 0)
+        # 1st element: tensor_id, should be >= 0.
+        # 2nd element: dtype enum value (bool).
+        # 3rd element: rank (2).
+        # 4th element: element count (4).
+        # Remaining elements: shape at fixed length.
+        self.assertAllClose(
+            trace.debug_tensor_value, [tensor_id, 10, 2, 4, 2, 2, 0, 0, 0, 0])
+
+  def testReadingSourceLines(self):
+    writer = dumping_callback.enable_dump_debug_info(self.dump_root)
+    # Run a simple eager execution event, so that the source-file contents are
+    # dumped.
+    self.assertAllClose(math_ops.truediv(7.0, 1.0 / 6.0), 42.0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      with open(_current_file_full_path, "rt") as f:
+        file_lines = f.read().split("\n")
+      self.assertEqual(
+          reader.source_lines(_host_name, _current_file_full_path), file_lines)
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
       ("CurtHealth", "CURT_HEALTH"),
+      ("ConciseHealth", "CONCISE_HEALTH"),
+      ("Shape", "SHAPE"),
       ("FullTensor", "FULL_TENSOR"),
   )
   @test_util.run_in_graph_and_eager_modes
@@ -226,62 +382,157 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    if context.executing_eagerly():
-      # NOTE(b/142486213): Execution of the TF function happens with
-      # Session.run() in v1 graph mode, so doesn't get logged to the
-      # .execution file.
-      (executed_op_types, executed_graph_ids,
-       _, _, _, _) = self._readAndCheckExecutionFile()
-      executed_op_types = [op_type for op_type in executed_op_types
-                           if "sin1p_log_sum" in op_type]
-      self.assertLen(executed_op_types, 1)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      outermost_graphs = reader.outermost_graphs()
+      self.assertLen(outermost_graphs, 1)
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, op_types, op_name_to_op_type,
-     op_name_to_context_id) = self._readAndCheckGraphsFile(stack_frame_by_id)
+      if context.executing_eagerly():
+        # NOTE(b/142486213): Execution of the TF function happens with
+        # Session.run() in v1 graph mode, so doesn't get logged to the
+        # .execution file.
+        executions = reader.executions()
+        self.assertLen(executions, 1)
+        self.assertIn("sin1p_log_sum", executions[0].op_type)
+        # Get the executed graph and verify its identity and inner graph.
+        graph = reader.graph_by_id(executions[0].graph_id)
+        self.assertEqual(graph.name, "sin1p_log_sum")
+        self.assertLen(graph.inner_graph_ids, 1)
+        inner_graph = reader.graph_by_id(graph.inner_graph_ids[0])
+        self.assertEqual(inner_graph.name, "log_sum")
+        # Check device names.
+        self.assertLen(executions[0].output_tensor_device_ids, 1)
+        self.assertEqual(
+            reader.device_name_by_id(executions[0].output_tensor_device_ids[0]),
+            self._expectedDefaultDeviceName())
+        self.assertIn(self._expectedDefaultDeviceName(), reader.device_names())
 
-    self.assertIn("AddV2", op_types)
-    self.assertIn("Log", op_types)
-    self.assertIn("Sin", op_types)
-    if context.executing_eagerly():
-      # Check the correctness of the ID of the executed graph ID.
-      sin_op_name = [op_name for op_name in op_name_to_op_type
-                     if op_name_to_op_type[op_name] == "Sin"]
-      self.assertLen(sin_op_name, 1)
-      sin_context_id = op_name_to_context_id[sin_op_name[0]]
-      # The executed "op" is a FuncGraph, and its graph ID should have been
-      # recorded properly and be the ID of the graph that the Sin op belongs to.
-      executed_graph_ids = [
-          executed_graph_ids[i] for i, op_type
-          in enumerate(executed_op_types) if "sin1p_log_sum" in op_type]
-      self.assertEqual(executed_graph_ids[0], sin_context_id)
+      # Verify the recorded graph-building history.
+      add_op_digests = reader.graph_op_digests(op_type="AddV2")
+      self.assertLen(add_op_digests, 2)
+      self.assertEqual(
+          reader.graph_by_id(add_op_digests[0].graph_id).name, "log_sum")
+      self.assertEqual(
+          reader.graph_by_id(add_op_digests[1].graph_id).name, "sin1p_log_sum")
+      log_op_digests = reader.graph_op_digests(op_type="Log")
+      self.assertLen(log_op_digests, 1)
+      self.assertEqual(
+          reader.graph_by_id(log_op_digests[0].graph_id).name, "log_sum")
+      sin_op_digests = reader.graph_op_digests(op_type="Sin")
+      self.assertLen(sin_op_digests, 1)
+      self.assertEqual(
+          reader.graph_by_id(sin_op_digests[0].graph_id).name, "sin1p_log_sum")
 
-    (op_names, _, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-    self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2", "Sin"])
+      # Verify the output tensor IDs and the stack traces.
+      for op_digest in add_op_digests + log_op_digests + sin_op_digests:
+        # These are all single-output ops.
+        self.assertLen(op_digest.output_tensor_ids, 1)
+        self.assertGreaterEqual(op_digest.output_tensor_ids[0], 0)
+        _, stack_frames = reader.read_graph_op_creation_stack_trace(op_digest)
+        self._verifyStackFrames(stack_frames)
 
-    if tensor_debug_mode == "NO_TENSOR":
-      # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
-      # be an empty float32 tensor.
-      for tensor_value in tensor_values:
-        self.assertEqual(tensor_value.dtype, np.float32)
-        self.assertEqual(tensor_value.shape, (0,))
-    elif tensor_debug_mode == "CURT_HEALTH":
-      for tensor_value in tensor_values:
-        self.assertLen(tensor_value, 2)
+      graph_exec_traces = reader.graph_execution_traces()
+      executed_op_types = [digest.op_type for digest in graph_exec_traces]
+      self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2", "Sin"])
+
+      # Verify the graph ID stack of each op.
+      # 1st AddV2 op.
+      self.assertEqual(
+          reader.graph_by_id(graph_exec_traces[0].graph_ids[-1]).name,
+          "log_sum")
+      self.assertEqual(
+          reader.graph_by_id(graph_exec_traces[0].graph_ids[-2]).name,
+          "sin1p_log_sum")
+      # Log op.
+      self.assertEqual(
+          reader.graph_by_id(graph_exec_traces[1].graph_ids[-1]).name,
+          "log_sum")
+      self.assertEqual(
+          reader.graph_by_id(graph_exec_traces[1].graph_ids[-2]).name,
+          "sin1p_log_sum")
+      # 2nd AddV2 op.
+      self.assertEqual(
+          reader.graph_by_id(graph_exec_traces[2].graph_ids[-1]).name,
+          "sin1p_log_sum")
+      # Sin op.
+      self.assertEqual(
+          reader.graph_by_id(graph_exec_traces[3].graph_ids[-1]).name,
+          "sin1p_log_sum")
+
+      if tensor_debug_mode == "NO_TENSOR":
+        # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought
+        # to be an empty float32 tensor.
+        for trace in graph_exec_traces:
+          self.assertEqual(trace.debug_tensor_value, [])
+      elif tensor_debug_mode == "CURT_HEALTH":
+        # Test the association between graph exec and prior graph building.
+        # In each case, the 1st element of debug_tensor_value is the ID of the
+        # symbolic tenosr and the 2nd element is a zero indicating there is no
+        # inf or nan.
+        self.assertAllClose(
+            graph_exec_traces[0].debug_tensor_value,
+            [add_op_digests[0].output_tensor_ids[0], 0.0])  # 1st AddV2 op.
+        self.assertAllClose(
+            graph_exec_traces[1].debug_tensor_value,
+            [log_op_digests[0].output_tensor_ids[0], 0.0])  # Log op.
+        self.assertAllClose(
+            graph_exec_traces[2].debug_tensor_value,
+            [add_op_digests[1].output_tensor_ids[0], 0.0])  # 2nd AddV2 op.
+        self.assertAllClose(
+            graph_exec_traces[3].debug_tensor_value,
+            [sin_op_digests[0].output_tensor_ids[0], 0.0])  # Sin op.
+      elif tensor_debug_mode == "CONCISE_HEALTH":
         # 1st element: tensor_id, should be >= 0.
-        # TODO(cais): Assert on detailed value once Function-graph association
-        # is in place.
-        self.assertGreaterEqual(tensor_value[0], 0)
-        # 2nd element: 0 means there is no inf or nan.
-        self.assertEqual(tensor_value[1], 0)
-    elif tensor_debug_mode == "FULL_TENSOR":
-      self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
-      self.assertAllClose(tensor_values[1], np.log(5.0))  # Log op.
-      self.assertAllClose(tensor_values[2], np.log(5.0) + 1.0)  # 2nd AddV2 op.
-      self.assertAllClose(tensor_values[3],
-                          np.sin(np.log(5.0) + 1.0))  # Sin op.
+        # 2nd element: element count. Remaining elements: all zero because there
+        # is no -inf, inf or nan.
+        # 1st AddV2 op.
+        self.assertAllClose(
+            graph_exec_traces[0].debug_tensor_value,
+            [add_op_digests[0].output_tensor_ids[0], 1.0, 0.0, 0.0, 0.0])
+        # Log op.
+        self.assertAllClose(
+            graph_exec_traces[1].debug_tensor_value,
+            [log_op_digests[0].output_tensor_ids[0], 1.0, 0.0, 0.0, 0.0])
+        # 2nd AddV2 op.
+        self.assertAllClose(
+            graph_exec_traces[2].debug_tensor_value,
+            [add_op_digests[1].output_tensor_ids[0], 1.0, 0.0, 0.0, 0.0])
+        # Sin op.
+        self.assertAllClose(
+            graph_exec_traces[3].debug_tensor_value,
+            [sin_op_digests[0].output_tensor_ids[0], 1.0, 0.0, 0.0, 0.0])
+      elif tensor_debug_mode == "SHAPE":
+        # 1st element: tensor_id.
+        # 2nd element: dtype (float32).
+        # 3rd element: rank (scalar).
+        # 4th element: element count (1).
+        # Remaining elements: shape padded to fixed length (6).
+        # 1st AddV2 op.
+        self.assertAllClose(
+            graph_exec_traces[0].debug_tensor_value,
+            [add_op_digests[0].output_tensor_ids[0], 1, 0, 1, 0, 0, 0, 0, 0, 0])
+        # Log op.
+        self.assertAllClose(
+            graph_exec_traces[1].debug_tensor_value,
+            [log_op_digests[0].output_tensor_ids[0], 1, 0, 1, 0, 0, 0, 0, 0, 0])
+        # 2nd AddV2 op.
+        self.assertAllClose(
+            graph_exec_traces[2].debug_tensor_value,
+            [add_op_digests[1].output_tensor_ids[0], 1, 0, 1, 0, 0, 0, 0, 0, 0])
+        # Sin op.
+        self.assertAllClose(
+            graph_exec_traces[3].debug_tensor_value,
+            [sin_op_digests[0].output_tensor_ids[0], 1, 0, 1, 0, 0, 0, 0, 0, 0])
+      else:  # FULL_TENSOR.
+        full_tensor_values = [
+            reader.graph_execution_trace_to_tensor_value(trace)
+            for trace in graph_exec_traces]
+        self.assertAllClose(full_tensor_values[0], 5.0)  # 1st AddV2 op.
+        self.assertAllClose(full_tensor_values[1], np.log(5.0))  # Log op.
+        self.assertAllClose(
+            full_tensor_values[2], np.log(5.0) + 1.0)  # 2nd AddV2 op.
+        self.assertAllClose(
+            full_tensor_values[3], np.sin(np.log(5.0) + 1.0))  # Sin op.
 
   def testCapturingExecutedGraphIdsOfTwoCompilationsOfSameFunction(self):
     """Test correct executed IDs of two FuncGraphs from the same Py function."""
@@ -303,15 +554,21 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    (executed_op_types, executed_graph_ids,
-     _, _, _, _) = self._readAndCheckExecutionFile()
-    self.assertLen(executed_op_types, 4)
-    for executed_op_type in executed_op_types:
-      self.assertStartsWith(executed_op_type, "__inference_ceil_times_two_")
-    self.assertLen(executed_graph_ids, 4)
-    self.assertEqual(executed_graph_ids[0], executed_graph_ids[2])
-    self.assertEqual(executed_graph_ids[1], executed_graph_ids[3])
-    self.assertLen(set(executed_graph_ids), 2)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+
+      executions = reader.executions()
+      self.assertLen(executions, 4)
+      for execution in executions:
+        self.assertStartsWith(execution.op_type, "__inference_ceil_times_two_")
+      executed_graph_ids = [execution.graph_id for execution in executions]
+      self.assertEqual(executed_graph_ids[0], executed_graph_ids[2])
+      self.assertEqual(executed_graph_ids[1], executed_graph_ids[3])
+      self.assertNotEqual(executed_graph_ids[0], executed_graph_ids[1])
+      self.assertNotEqual(executed_graph_ids[2], executed_graph_ids[3])
+      for executed_graph_id in executed_graph_ids:
+        self.assertEqual(
+            reader.graph_by_id(executed_graph_id).name, "ceil_times_two")
 
   def testCapturingExecutedGraphIdsOfDuplicateFunctionNames(self):
     """Two FuncGraphs compiled from Python functions with identical names."""
@@ -339,15 +596,20 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    (executed_op_types, executed_graph_ids,
-     _, _, _, _) = self._readAndCheckExecutionFile()
-    self.assertLen(executed_op_types, 4)
-    for executed_op_type in executed_op_types:
-      self.assertStartsWith(executed_op_type, "__inference_ceil_times_two_")
-    self.assertLen(executed_graph_ids, 4)
-    self.assertEqual(executed_graph_ids[0], executed_graph_ids[2])
-    self.assertEqual(executed_graph_ids[1], executed_graph_ids[3])
-    self.assertLen(set(executed_graph_ids), 2)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      executions = reader.executions()
+      self.assertLen(executions, 4)
+      for execution in executions:
+        self.assertStartsWith(execution.op_type, "__inference_ceil_times_two_")
+      executed_graph_ids = [execution.graph_id for execution in executions]
+      self.assertEqual(executed_graph_ids[0], executed_graph_ids[2])
+      self.assertEqual(executed_graph_ids[1], executed_graph_ids[3])
+      self.assertNotEqual(executed_graph_ids[0], executed_graph_ids[1])
+      self.assertNotEqual(executed_graph_ids[2], executed_graph_ids[3])
+      for executed_graph_id in executed_graph_ids:
+        self.assertEqual(
+            reader.graph_by_id(executed_graph_id).name, "ceil_times_two")
 
   @parameterized.named_parameters(
       ("AddV2", "AddV2"),
@@ -375,32 +637,35 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, op_types,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
-    self.assertIn("AddV2", op_types)
-    self.assertIn("Log", op_types)
-    self.assertIn("Sin", op_types)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_op_digests = reader.graph_op_digests()
+      op_types = [digest.op_type for digest in graph_op_digests]
+      self.assertIn("AddV2", op_types)
+      self.assertIn("Log", op_types)
+      self.assertIn("Sin", op_types)
 
-    (op_names, _, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-
-    if op_regex == "AddV2":
-      self.assertEqual(executed_op_types, ["AddV2", "AddV2"])
-      self.assertLen(tensor_values, 2)
-      self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
-      self.assertAllClose(tensor_values[1], np.log(5.0) + 1.0)  # 2nd AddV2 op.
-    elif op_regex == "Log":
-      self.assertEqual(executed_op_types, ["Log"])
-      self.assertLen(tensor_values, 1)
-      self.assertAllClose(tensor_values[0], np.log(5.0))  # Log op.
-    else:  # "(AddV2|Log)"
-      self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2"])
-      self.assertLen(tensor_values, 3)
-      self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
-      self.assertAllClose(tensor_values[1], np.log(5.0))  # Log op.
-      self.assertAllClose(tensor_values[2], np.log(5.0) + 1.0)  # 2nd AddV2 op.
+      graph_exec_digests = reader.graph_execution_traces(digest=True)
+      executed_op_types = [digest.op_type for digest in graph_exec_digests]
+      tensor_values = [reader.graph_execution_trace_to_tensor_value(digest)
+                       for digest in graph_exec_digests]
+      if op_regex == "AddV2":
+        self.assertEqual(executed_op_types, ["AddV2", "AddV2"])
+        self.assertLen(tensor_values, 2)
+        self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
+        self.assertAllClose(
+            tensor_values[1], np.log(5.0) + 1.0)  # 2nd AddV2 op.
+      elif op_regex == "Log":
+        self.assertEqual(executed_op_types, ["Log"])
+        self.assertLen(tensor_values, 1)
+        self.assertAllClose(tensor_values[0], np.log(5.0))  # Log op.
+      else:  # "(AddV2|Log)"
+        self.assertEqual(executed_op_types, ["AddV2", "Log", "AddV2"])
+        self.assertLen(tensor_values, 3)
+        self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
+        self.assertAllClose(tensor_values[1], np.log(5.0))  # Log op.
+        self.assertAllClose(
+            tensor_values[2], np.log(5.0) + 1.0)  # 2nd AddV2 op.
 
   def testIncorrectTensorDTypeArgFormatLeadsToError(self):
     with self.assertRaisesRegexp(
@@ -453,48 +718,54 @@ class TracingCallbackTest(
 
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, _,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
-    (op_names, _, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
 
-    if tensor_dtypes == [dtypes.float32] and not op_regex:
-      self.assertEqual(executed_op_types, ["Unique", "Sum"])
-      self.assertLen(tensor_values, 2)
-      self.assertAllClose(tensor_values[0], [2., 6., 8., 1.])  # Unique values.
-      self.assertAllClose(tensor_values[1], 17.)  # Sum.
-    elif tensor_dtypes == ["float32"] and op_regex == "Sum":
-      self.assertEqual(executed_op_types, ["Sum"])
-      self.assertLen(tensor_values, 1)
-      self.assertAllClose(tensor_values[0], 17.)  # Sum.
-    elif tensor_dtypes == (dtypes.float32,) and op_regex == "(?!Sum)":
-      self.assertEqual(executed_op_types, ["Unique"])
-      self.assertLen(tensor_values, 1)
-      self.assertAllClose(tensor_values[0], [2., 6., 8., 1.])  # Unique values.
-    elif tensor_dtypes == [dtypes.int32] and not op_regex:
-      self.assertEqual(executed_op_types, ["Unique"])
-      self.assertLen(tensor_values, 1)
-      self.assertAllEqual(tensor_values[0], [0, 1, 2, 3, 0])  # Unique indices.
-    elif callable(tensor_dtypes) and not op_regex:
-      self.assertEqual(executed_op_types, ["Unique"])
-      self.assertLen(tensor_values, 1)
-      self.assertAllEqual(tensor_values[0], [0, 1, 2, 3, 0])  # Unique indices.
-    elif not tensor_dtypes and op_regex == "(?!Sum)":
-      self.assertEqual(executed_op_types, ["Unique", "Unique"])
-      self.assertLen(tensor_values, 2)
-      self.assertAllClose(tensor_values[0], [2., 6., 8., 1.])  # Unique values.
-      self.assertAllEqual(tensor_values[1], [0, 1, 2, 3, 0])  # Unique indices.
-    else:  # "All".
-      self.assertEqual(executed_op_types, ["Unique", "Unique", "Sum"])
-      self.assertLen(tensor_values, 3)
-      self.assertAllClose(tensor_values[0], [2., 6., 8., 1.])  # Unique values.
-      self.assertAllEqual(tensor_values[1], [0, 1, 2, 3, 0])  # Unique indices.
-      self.assertAllClose(tensor_values[2], 17.)  # Sum.
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_exec_digests = reader.graph_execution_traces(digest=True)
+      executed_op_types = [digest.op_type for digest in graph_exec_digests]
+      tensor_values = [reader.graph_execution_trace_to_tensor_value(digest)
+                       for digest in graph_exec_digests]
+
+      if tensor_dtypes == [dtypes.float32] and not op_regex:
+        self.assertEqual(executed_op_types, ["Unique", "Sum"])
+        self.assertLen(tensor_values, 2)
+        self.assertAllClose(tensor_values[0], [2, 6, 8, 1])  # Unique values.
+        self.assertAllClose(tensor_values[1], 17.)  # Sum.
+      elif tensor_dtypes == ["float32"] and op_regex == "Sum":
+        self.assertEqual(executed_op_types, ["Sum"])
+        self.assertLen(tensor_values, 1)
+        self.assertAllClose(tensor_values[0], 17.)  # Sum.
+      elif tensor_dtypes == (dtypes.float32,) and op_regex == "(?!Sum)":
+        self.assertEqual(executed_op_types, ["Unique"])
+        self.assertLen(tensor_values, 1)
+        self.assertAllClose(tensor_values[0], [2, 6, 8, 1])  # Unique values.
+      elif tensor_dtypes == [dtypes.int32] and not op_regex:
+        self.assertEqual(executed_op_types, ["Unique"])
+        self.assertLen(tensor_values, 1)
+        self.assertAllEqual(
+            tensor_values[0], [0, 1, 2, 3, 0])  # Unique indices.
+      elif callable(tensor_dtypes) and not op_regex:
+        self.assertEqual(executed_op_types, ["Unique"])
+        self.assertLen(tensor_values, 1)
+        self.assertAllEqual(
+            tensor_values[0], [0, 1, 2, 3, 0])  # Unique indices.
+      elif not tensor_dtypes and op_regex == "(?!Sum)":
+        self.assertEqual(executed_op_types, ["Unique", "Unique"])
+        self.assertLen(tensor_values, 2)
+        self.assertAllClose(tensor_values[0], [2, 6, 8, 1])  # Unique values.
+        self.assertAllEqual(
+            tensor_values[1], [0, 1, 2, 3, 0])  # Unique indices.
+      else:  # "All".
+        self.assertEqual(executed_op_types, ["Unique", "Unique", "Sum"])
+        self.assertLen(tensor_values, 3)
+        self.assertAllClose(tensor_values[0], [2, 6, 8, 1])  # Unique values.
+        self.assertAllEqual(
+            tensor_values[1], [0, 1, 2, 3, 0])  # Unique indices.
+        self.assertAllClose(tensor_values[2], 17)  # Sum.
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
+      ("CurtHealth", "CURT_HEALTH"),
       ("FullTensor", "FULL_TENSOR"),
   )
   @test_util.run_in_graph_and_eager_modes
@@ -515,86 +786,78 @@ class TracingCallbackTest(
     self.assertAllClose(self.evaluate(iterative_doubling(x, times)), 8.0)
 
     writer.FlushNonExecutionFiles()
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_op_digests = reader.graph_op_digests()
+      op_types = [digest.op_type for digest in graph_op_digests]
+      self.assertIn("Less", op_types)
+      self.assertIn("Mul", op_types)
+      self.assertIn("AddV2", op_types)
 
-    # Verify the content of the .graphs file.
-    context_ids, op_types, op_name_to_op_type, _ = (
-        self._readAndCheckGraphsFile(stack_frame_by_id))
-    self.assertIn("Less", op_types)
-    self.assertIn("Mul", op_types)
-    self.assertIn("AddV2", op_types)
-
-    # Before FlushExecutionFiles() is called, the .execution and
-    # .graph_execution_traces files should be both empty.
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      execution_iter = reader.execution_iterator()
-      graph_execution_traces_iter = reader.graph_execution_traces_iterator()
-      with self.assertRaises(StopIteration):
-        next(execution_iter)
-      with self.assertRaises(StopIteration):
-        next(graph_execution_traces_iter)
+      # Before FlushExecutionFiles() is called, the .execution and
+      # .graph_execution_traces files should be both empty.
+      self.assertEqual(reader.num_executions(), 0)
+      self.assertEqual(reader.num_graph_execution_traces(), 0)
 
       # TODO(cais): Backport execution instrumentation to tf.Session.
       writer.FlushExecutionFiles()
       # After the flushing, the .execution file should hold the appropriate
       # contents.
+      reader.update()
       if context.executing_eagerly():
-        (executed_op_types, _, input_tensor_ids, output_tensor_ids,
-         tensor_debug_modes, tensor_values) = self._readAndCheckExecutionFile()
         # NOTE(b/142486213): Execution of the TF function happens with
         # Session.run() in v1 graph mode, hence it doesn't get logged to the
-        # .execution file.
-        self.assertLen(executed_op_types, 1)
-        self.assertIn("iterative_doubling", executed_op_types[0])
-        self.assertLen(input_tensor_ids[0], 2)
-        self.assertLen(output_tensor_ids[0], 1)
+        executions = reader.executions()
+        self.assertLen(executions, 1)
+        executed_op_types = [execution.op_type for execution in executions]
+        self.assertIn("iterative_doubling", executions[0].op_type)
+        execution = executions[0]
+        self.assertLen(execution.input_tensor_ids, 2)
+        self.assertLen(execution.output_tensor_ids, 1)
         self.assertEqual(
-            tensor_debug_modes[0],
-            debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode))
+            debug_event_pb2.TensorDebugMode.keys()[execution.tensor_debug_mode],
+            tensor_debug_mode)
         if tensor_debug_mode == "FULL_TENSOR":
-          self.assertAllClose(tensor_values, [[8.0]])
+          tensor_values = reader.execution_to_tensor_values(execution)
+          self.assertAllClose(tensor_values, [8.0])
 
-      (op_names, _, output_slots,
-       tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-      executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-      # The Less op should have been executed 5 times.
-      self.assertEqual(executed_op_types.count("Less"), 5)
-      # The last executed op should be Less.
-      self.assertEqual(executed_op_types[-1], "Less")
+      graph_exec_traces = reader.graph_execution_traces()
+      executed_op_types = [trace.op_type for trace in graph_exec_traces]
+      if tensor_debug_mode != "CURT_HEALTH":
+        # Less outputs a boolean tensor, which is not tracked under CURT_HEALTH.
+        # The Less op should have been executed 5 times.
+        self.assertEqual(executed_op_types.count("Less"), 5)
+        # The last executed op should be Less.
+        self.assertEqual(executed_op_types[-1], "Less")
+        # AddV2 produces an int tensor, which is not tracked under CURT_HEALTH.
+        # The AddV2 op should have been run, but we refrain from asserting on
+        # how many times it's executed.
+        self.assertIn("AddV2", executed_op_types)
+        for trace in graph_exec_traces:
+          self.assertEqual(trace.output_slot, 0)
       # The Mul op should have been executed 4 times.
       self.assertEqual(executed_op_types.count("Mul"), 4)
-      # The AddV2 op should have been run, but we refrain from asserting on how
-      # many times it's executed.
-      self.assertIn("AddV2", executed_op_types)
-      for output_slot in output_slots:
-        self.assertEqual(output_slot, 0)
+
+      tensor_values = [reader.graph_execution_trace_to_tensor_value(trace)
+                       for trace in graph_exec_traces]
       if tensor_debug_mode == "NO_TENSOR":
         # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought
         # to be an empty float32 tensor.
         for tensor_value in tensor_values:
-          self.assertEqual(tensor_value.dtype, np.float32)
-          self.assertEqual(tensor_value.shape, (0,))
-      elif tensor_debug_mode == "CURT_TENSOR":
-        for tensor_value in tensor_values:
-          self.assertLen(tensor_value, 2)
-          # 1st element: tensor_id, should be >= 0.
-          # TODO(cais): Assert on detailed value once Function-graph association
-          # is in place.
-          self.assertGreaterEqual(tensor_value[0], 0)
-          # 2nd element: 0 means there is no inf or nan.
-          self.assertEqual(tensor_value[1], 0)
+          self.assertAllEqual(tensor_value, [])
+      elif tensor_debug_mode == "CURT_HEALTH":
+        for trace in graph_exec_traces:
+          tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+          # 1st element: tensor_id; 2nd element: 0 indicating no inf or nan.
+          self.assertAllClose(trace.debug_tensor_value, [tensor_id, 0.0])
       elif tensor_debug_mode == "FULL_TENSOR":
         less_values = [
-            tensor_values[i]
-            for i, op_type in enumerate(executed_op_types)
-            if op_type == "Less"
-        ]
-        self.assertAllClose(less_values, [True, True, True, True, False])
+            reader.graph_execution_trace_to_tensor_value(trace)
+            for trace in graph_exec_traces if trace.op_type == "Less"]
+        self.assertAllEqual(less_values, [True, True, True, True, False])
         mul_values = [
-            tensor_values[i]
-            for i, op_type in enumerate(executed_op_types)
-            if op_type == "Mul"
-        ]
+            reader.graph_execution_trace_to_tensor_value(trace)
+            for trace in graph_exec_traces if trace.op_type == "Mul"]
         self.assertAllClose(mul_values, [1.0, 2.0, 4.0, 8.0])
 
   def testCallingEnableTracingTwiceWithTheSameDumpRootIsIdempotent(self):
@@ -608,17 +871,16 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      execution_iter = reader.execution_iterator()
-      for _ in range(2):
-        debug_event = next(execution_iter)
-        self.assertGreater(debug_event.wall_time, 0)
-        execution = debug_event.execution
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      executions = reader.executions()
+      self.assertLen(executions, 2)
+      for execution in executions:
+        self.assertGreater(execution.wall_time, 0)
         self.assertEqual(execution.op_type, "Unique")
         self.assertEqual(execution.num_outputs, 2)
-        self.assertTrue(execution.code_location)
-      with self.assertRaises(StopIteration):
-        next(execution_iter)
+        _, stack_frames = reader.read_execution_stack_trace(execution)
+        self._verifyStackFrames(stack_frames)
 
   def testCallingEnableTracingTwiceWithDifferentDumpRootsOverwrites(self):
     dumping_callback.enable_dump_debug_info(self.dump_root)
@@ -632,27 +894,26 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    with debug_events_reader.DebugEventsReader(new_dump_root) as reader:
-      execution_iter = reader.execution_iterator()
-      for _ in range(2):
-        debug_event = next(execution_iter)
-        self.assertGreater(debug_event.wall_time, 0)
-        execution = debug_event.execution
+    with debug_events_reader.DebugDataReader(new_dump_root) as reader:
+      reader.update()
+      executions = reader.executions()
+      self.assertLen(executions, 2)
+      for execution in executions:
+        self.assertGreater(execution.wall_time, 0)
         self.assertEqual(execution.op_type, "Unique")
         self.assertEqual(execution.num_outputs, 2)
-        self.assertTrue(execution.code_location)
-      with self.assertRaises(StopIteration):
-        next(execution_iter)
+        _, stack_frames = reader.read_execution_stack_trace(execution)
+        self._verifyStackFrames(stack_frames)
 
-      with debug_events_reader.DebugEventsReader(
-          self.dump_root) as old_dump_root_reader:
-        execution_iter = old_dump_root_reader.execution_iterator()
-        # The old dump root shouldn't have been written to.
-        with self.assertRaises(StopIteration):
-          next(execution_iter)
+    with debug_events_reader.DebugDataReader(
+        self.dump_root) as old_dump_root_reader:
+      old_dump_root_reader.update()
+      # The old dump root shouldn't have been written to.
+      self.assertEqual(old_dump_root_reader.num_executions(), 0)
+      self.assertFalse(old_dump_root_reader.outermost_graphs())
 
   def testCallingEnableRepeatedlyWithDifferentTensorDebugMode(self):
-    """Assert that calling enable_dump_debug_info() with different tensor-debug modes.
+    """Assert calling enable_dump_debug_info() with two tensor-debug modes.
 
     It should lead to overwriting of the previously-configured mode.
     """
@@ -666,16 +927,16 @@ class TracingCallbackTest(
     self.assertAllClose(add_1_divide_by_2(constant_op.constant(4.0)), 2.5)
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    context_ids, _, _, _ = self._readAndCheckGraphsFile(stack_frame_by_id)
-    _, _, _, _, _, tensor_values = self._readAndCheckExecutionFile()
-    self.assertEqual(tensor_values, [[]])
-    (_, _, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    self.assertLen(tensor_values, 2)
-    for tensor_value in tensor_values:
-      self.assertEqual(tensor_value.dtype, np.float32)
-      self.assertEqual(tensor_value.shape, (0,))
+
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_exec_digests = reader.graph_execution_traces(digest=True)
+      tensor_values = [reader.graph_execution_trace_to_tensor_value(digest)
+                       for digest in graph_exec_digests]
+      for tensor_value in tensor_values:
+        # Under NO_TENSOR mode, each tensor is summarized as an empty float32
+        # array.
+        self.assertAllEqual(tensor_value, [])
 
     with self.assertRaisesRegexp(
         ValueError, r"already.*NO_TENSOR.*FULL_TENSOR.*not be honored"):
@@ -698,21 +959,17 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      source_files_iter = reader.source_files_iterator()
-      stack_frames_iter = reader.stack_frames_iterator()
-      execution_iter = reader.execution_iterator()
-      # No source-file, stack-frame or execution data should have been dumped.
-      with self.assertRaises(StopIteration):
-        next(source_files_iter)
-      with self.assertRaises(StopIteration):
-        next(stack_frames_iter)
-      with self.assertRaises(StopIteration):
-        next(execution_iter)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      self.assertEqual(reader.num_executions(), 0)
+      self.assertEqual(reader.num_graph_execution_traces(), 0)
+      self.assertFalse(reader.outermost_graphs())
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
       ("CurtHealth", "CURT_HEALTH"),
+      ("ConciseHealth", "CONCISE_HEALTH"),
+      ("Shape", "SHAPE"),
       ("FullTensor", "FULL_TENSOR"),
   )
   def testMultiThreadedExecutionWithSameSetting(self, tensor_debug_mode):
@@ -742,44 +999,54 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      execution_iter = reader.execution_iterator()
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      exec_digests = reader.executions(digest=True)
       prev_wall_time = 1
-      for debug_event in execution_iter:
-        self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
-        prev_wall_time = debug_event.wall_time
+      for exec_digest in exec_digests:
+        self.assertGreaterEqual(exec_digest.wall_time, prev_wall_time)
+        prev_wall_time = exec_digest.wall_time
 
-    (context_ids, _,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
+      graph_exec_traces = reader.graph_execution_traces()
+      executed_op_types = [trace.op_type for trace in graph_exec_traces]
+      self.assertEqual(executed_op_types.count("Mul"), 1 + num_threads)
+      self.assertEqual(
+          executed_op_types.count("ReadVariableOp"), 2 * (1 + num_threads))
+      for trace in graph_exec_traces:
+        # These are all single-output tensors.
+        self.assertEqual(trace.output_slot, 0)
 
-    (op_names, _, output_slots,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-    self.assertEqual(executed_op_types.count("Mul"), 1 + num_threads)
-    self.assertEqual(
-        executed_op_types.count("ReadVariableOp"), 2 * (1 + num_threads))
-    for output_slot in output_slots:
-      self.assertEqual(output_slot, 0)
+    tensor_values = [reader.graph_execution_trace_to_tensor_value(trace)
+                     for trace in graph_exec_traces]
     if tensor_debug_mode == "NO_TENSOR":
       for tensor_value in tensor_values:
-        self.assertEqual(tensor_value.dtype, np.float32)
-        self.assertEqual(tensor_value.shape, (0,))
+        self.assertAllEqual(tensor_value, [])
     elif tensor_debug_mode == "CURT_HEALTH":
+      for trace in graph_exec_traces:
+        tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+        # 1st element: tensor ID; 2nd element: 0 indicating no inf or nan.
+        self.assertAllClose(trace.debug_tensor_value, [tensor_id, 0])
+    elif tensor_debug_mode == "CONCISE_HEALTH":
       for tensor_value in tensor_values:
-        self.assertLen(tensor_value, 2)
-        # 1st element: tensor_id, should be >= 0.
-        # TODO(cais): Assert on detailed value once Function-graph association
-        # is in place.
-        self.assertGreaterEqual(tensor_value[0], 0)
-        # 2nd element: 0 means there is no inf or nan.
-        self.assertEqual(tensor_value[1], 0)
+        tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+        # 1st element: tensor ID.
+        # 2nd element: element count. Remaining elements: all zero because there
+        # is no -inf, inf or nan.
+        self.assertAllClose(trace.debug_tensor_value, [tensor_id, 1, 0, 0, 0])
+    elif tensor_debug_mode == "SHAPE":
+      for trace in graph_exec_traces:
+        if trace.op_type == "Mul":
+          tensor_id = reader.graph_execution_trace_to_tensor_id(trace)
+          mul_value = reader.graph_execution_trace_to_tensor_value(trace)
+          # 1st element: tensor_id, should be >= 0.
+          # 2nd element: dtype enum value (float32).
+          # 3rd element: rank.
+          # 4th element: element count.
+          self.assertAllClose(mul_value, [tensor_id, 1, 0, 1, 0, 0, 0, 0, 0, 0])
     elif tensor_debug_mode == "FULL_TENSOR":
       mul_values = [
-          tensor_values[i]
-          for i, op_type in enumerate(executed_op_types)
-          if op_type == "Mul"
-      ]
+          reader.graph_execution_trace_to_tensor_value(trace)
+          for trace in graph_exec_traces if trace.op_type == "Mul"]
       self.assertAllClose(mul_values, [6.0, 6.0, 6.0, 6.0])
 
   def testMultiThreadedDumpingWithDifferentSettings(self):
@@ -822,23 +1089,28 @@ class TracingCallbackTest(
     self.assertAllClose(v1.read_value(), -67084290.0)
     self.assertAllClose(v2.read_value(), -6.0)
 
-    (executed_op_types, _, _, _, _,
-     tensor_values) = self._readAndCheckExecutionFile(dump_root=dump_root_1)
-    v1_squared_values = [
-        tensor_values[i] for i, op_type in enumerate(executed_op_types)
-        if op_type == "Pow"]
-    negative_v1_squared_values = [
-        tensor_values[i] for i, op_type in enumerate(executed_op_types)
-        if op_type == "Neg"]
-    self.assertAllClose(v1_squared_values, [[100.0], [8100.0], [67076100.0]])
-    self.assertAllClose(
-        negative_v1_squared_values, [[-100.0], [-8100.0], [-67076100.0]])
+    with debug_events_reader.DebugDataReader(dump_root_1) as reader:
+      reader.update()
+      exec_digests = reader.executions(digest=True)
+      v1_squared_values = [
+          reader.execution_to_tensor_values(digest)
+          for digest in exec_digests if digest.op_type == "Pow"]
+      negative_v1_squared_values = [
+          reader.execution_to_tensor_values(digest)
+          for digest in exec_digests if digest.op_type == "Neg"]
+      self.assertAllClose(v1_squared_values, [[100.0], [8100.0], [67076100.0]])
+      self.assertAllClose(
+          negative_v1_squared_values, [[-100.0], [-8100.0], [-67076100.0]])
 
-    (executed_op_types, _, _, _, _,
-     tensor_values) = self._readAndCheckExecutionFile(dump_root=dump_root_2)
-    self.assertNotIn("Neg", executed_op_types)
-    v2_squared_values = tensor_values[executed_op_types.index("Pow")]
-    self.assertAllClose(v2_squared_values, [9.0])
+    with debug_events_reader.DebugDataReader(dump_root_2) as reader:
+      reader.update()
+      exec_digests = reader.executions(digest=True)
+      executed_op_types = [digest.op_type for digest in exec_digests]
+      self.assertNotIn("Neg", executed_op_types)
+      v2_squared_values = [
+          reader.execution_to_tensor_values(digest)
+          for digest in exec_digests if digest.op_type == "Pow"]
+      self.assertAllClose(v2_squared_values, [[9.0]])
 
   @test_util.run_in_graph_and_eager_modes
   def testNestedContextIsCapturedByGraphOpCreationHistory(self):
@@ -860,36 +1132,18 @@ class TracingCallbackTest(
 
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
-
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (_, _, op_name_to_op_type,
-     op_name_to_context_id) = self._readAndCheckGraphsFile(stack_frame_by_id)
-
-    less_op_names = [op_name for op_name in op_name_to_op_type
-                     if op_name_to_op_type[op_name] == "Less"]
-    less_context_ids = [op_name_to_context_id[op_name]
-                        for op_name in less_op_names]
-    mul_op_names = [op_name for op_name in op_name_to_op_type
-                    if op_name_to_op_type[op_name] == "Mul"]
-    mul_context_ids = [op_name_to_context_id[op_name]
-                       for op_name in mul_op_names]
-    sub_op_names = [op_name for op_name in op_name_to_op_type
-                    if op_name_to_op_type[op_name] == "Sub"]
-    sub_context_ids = [op_name_to_context_id[op_name]
-                       for op_name in sub_op_names]
-    self.assertLen(less_context_ids, 1)
-    self.assertLen(mul_context_ids, 1)
-    self.assertLen(sub_context_ids, 1)
-    self.assertTrue(less_context_ids[0])
-    self.assertTrue(mul_context_ids[0])
-    self.assertTrue(sub_context_ids[0])
-    # The Less op is from the while-loop cond context and hence should have
-    # a different innermost context ID from the mul and sub ops, which are both
-    # from the while-loop body context.
-    self.assertNotEqual(less_context_ids[0], mul_context_ids[0])
-    self.assertNotEqual(less_context_ids[0], sub_context_ids[0])
-    # The Mul and Sub ops are from the same innermost context.
-    self.assertEqual(mul_context_ids[0], sub_context_ids[0])
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      less_op_digest = reader.graph_op_digests(op_type="Less")[-1]
+      mul_op_digest = reader.graph_op_digests(op_type="Mul")[-1]
+      sub_op_digest = reader.graph_op_digests(op_type="Sub")[-1]
+      # The Less op is from the while-loop cond context and hence should have
+      # a different innermost context ID from the mul and sub ops, which are
+      # both from the while-loop body context.
+      self.assertNotEqual(less_op_digest.graph_id, mul_op_digest.graph_id)
+      self.assertNotEqual(less_op_digest.graph_id, sub_op_digest.graph_id)
+      # The Mul and Sub ops are from the same innermost context.
+      self.assertEqual(mul_op_digest.graph_id, sub_op_digest.graph_id)
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
@@ -907,53 +1161,38 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, op_types,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
-    # Simply assert that graph are recorded and refrain from asserting on the
-    # internal details of the Keras model.
-    self.assertTrue(context_ids)
-    self.assertTrue(op_types)
-    self.assertTrue(op_name_to_op_type)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      if context.executing_eagerly():
+        # NOTE(b/142486213): Execution of the TF function happens with
+        # Session.run() in v1 graph mode, hence it doesn't get logged to the
+        # .execution file.
+        self.assertTrue(reader.executions(digest=True))
 
-    if context.executing_eagerly():
-      # NOTE(b/142486213): Execution of the TF function happens with
-      # Session.run() in v1 graph mode, hence it doesn't get logged to the
-      # .execution file.
-      (executed_op_types, _, _, _, _,
-       tensor_values) = self._readAndCheckExecutionFile()
-      self.assertTrue(executed_op_types)
+      graph_exec_digests = reader.graph_execution_traces(digest=True)
+      executed_op_types = [digest.op_type for digest in graph_exec_digests]
+      # These are the ops that we can safely assume to have been executed during
+      # the model prediction.
+      self.assertIn("MatMul", executed_op_types)
+      self.assertIn("BiasAdd", executed_op_types)
+      # On the GPU, CudnnRNN is used in lieu of the default op-by-op
+      # implementation.
+      self.assertTrue(
+          ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types or
+           "CudnnRNN" in executed_op_types))
 
-      for value_list in tensor_values:
-        if tensor_debug_mode == "NO_TENSOR":
-          self.assertFalse(value_list)
-
-    (op_names, _, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-    # These are the ops that we can safely assume to have been executed during
-    # the model prediction.
-    self.assertIn("MatMul", executed_op_types)
-    self.assertIn("BiasAdd", executed_op_types)
-    # On the GPU, CudnnRNN is used in lieu of the default op-by-op
-    # implementation.
-    self.assertTrue(
-        ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types or
-         "CudnnRNN" in executed_op_types))
-    # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
-    # be an empty float32 tensor.
-    if tensor_debug_mode == "NO_TENSOR":
-      for tensor_value in tensor_values:
-        self.assertEqual(tensor_value.dtype, np.float32)
-        self.assertEqual(tensor_value.shape, (0,))
-    else:
-      # Refrain from asserting the internal implementation details of the LSTM
-      # layer.
-      concrete_tensor_values = [
-          value for value in tensor_values
-          if value is not None and value.size > 0
-      ]
-      self.assertTrue(concrete_tensor_values)
+      # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
+      # be an empty float32 tensor.
+      tensor_values = [reader.graph_execution_trace_to_tensor_value(digest)
+                       for digest in graph_exec_digests]
+      if tensor_debug_mode == "NO_TENSOR":
+        for tensor_value in tensor_values:
+          self.assertAllEqual(tensor_value, [])
+      else:
+        # Refrain from asserting the internal implementation details of the LSTM
+        # layer.
+        self.assertTrue(any(
+            bool(tensor_value.size) for tensor_value in tensor_values))
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
@@ -974,48 +1213,38 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, op_types,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
-    # Simply assert that graph are recorded and refrain from asserting on the
-    # internal details of the Keras model.
-    self.assertTrue(context_ids)
-    self.assertTrue(op_types)
-    self.assertTrue(op_name_to_op_type)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      if context.executing_eagerly():
+        exec_digests = reader.executions(digest=True)
+        self.assertTrue(exec_digests)
+        if tensor_debug_mode == "NO_TENSOR":
+          for digest in exec_digests:
+            tensor_values = reader.execution_to_tensor_values(digest)
+            for tensor_value in tensor_values:
+              self.assertEqual(tensor_value, [])
 
-    if context.executing_eagerly():
-      # NOTE(b/142486213): Execution of the TF function happens with
-      # Session.run() in v1 graph mode, hence it doesn't get logged to the
-      # .execution file.
-      (executed_op_types, _, _, _, _,
-       tensor_values) = self._readAndCheckExecutionFile()
-      self.assertTrue(executed_op_types)
+      graph_exec_digests = reader.graph_execution_traces(digest=True)
+      executed_op_types = [digest.op_type for digest in graph_exec_digests]
+      # These are the ops that we can safely assume to have been executed during
+      # the recurrent model's fit() call.
+      self.assertIn("MatMul", executed_op_types)
+      self.assertIn("BiasAdd", executed_op_types)
+
+      # On the GPU, CudnnRNN is used in lieu of the default op-by-op
+      # implementation.
+      self.assertTrue(
+          ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types or
+           "CudnnRNN" in executed_op_types))
+      self.assertTrue(
+          ("SigmoidGrad" in executed_op_types and
+           "TanhGrad" in executed_op_types or
+           "CudnnRNNBackprop" in executed_op_types))
       if tensor_debug_mode == "NO_TENSOR":
-        for value_list in tensor_values:
-          self.assertFalse(value_list)
-
-    (op_names, _, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-    # These are the ops that we can safely assume to have been executed during
-    # the recurrent model's fit() call.
-    self.assertIn("MatMul", executed_op_types)
-    self.assertIn("BiasAdd", executed_op_types)
-    # On the GPU, CudnnRNN is used in lieu of the default op-by-op
-    # implementation.
-    self.assertTrue(
-        ("Sigmoid" in executed_op_types and "Tanh" in executed_op_types or
-         "CudnnRNN" in executed_op_types))
-    self.assertTrue(
-        ("SigmoidGrad" in executed_op_types and
-         "TanhGrad" in executed_op_types or
-         "CudnnRNNBackprop" in executed_op_types))
-    if tensor_debug_mode == "NO_TENSOR":
-      # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought
-      # to be an empty float32 tensor.
-      for tensor_value in tensor_values:
-        self.assertEqual(tensor_value.dtype, np.float32)
-        self.assertEqual(tensor_value.shape, (0,))
+        for digest in graph_exec_digests:
+          tensor_values = reader.graph_execution_trace_to_tensor_value(digest)
+          for tensor_value in tensor_values:
+            self.assertEqual(tensor_value, [])
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
@@ -1047,72 +1276,60 @@ class TracingCallbackTest(
     writer.FlushNonExecutionFiles()
     writer.FlushExecutionFiles()
 
-    stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, op_types,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
-    # Simply assert that graph are recorded and refrain from asserting on the
-    # internal details of the Keras model.
-    self.assertTrue(context_ids)
-    self.assertTrue(op_types)
-    self.assertTrue(op_name_to_op_type)
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      if context.executing_eagerly():
+        # NOTE(b/142486213): Execution of the TF function happens with
+        # Session.run() in v1 graph mode, hence it doesn't get logged to the
+        # .execution file.
+        exec_digests = reader.executions(digest=True)
+        self.assertTrue(exec_digests)
 
-    if context.executing_eagerly():
-      # NOTE(b/142486213): Execution of the TF function happens with
-      # Session.run() in v1 graph mode, hence it doesn't get logged to the
-      # .execution file.
-      executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
-      self.assertTrue(executed_op_types)
+      graph_exec_digests = reader.graph_execution_traces()
+      executed_op_types = [digest.op_type for digest in graph_exec_digests]
+      # These are the ops that we can safely assume to have been executed during
+      # the model's fit() call.
+      self.assertIn("Conv2D", executed_op_types)
+      self.assertIn("Relu6", executed_op_types)
+      self.assertIn("Conv2DBackpropFilter", executed_op_types)
+      self.assertIn("Relu6Grad", executed_op_types)
 
-    (op_names, _, _,
-     tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
-    executed_op_types = [op_name_to_op_type[op_name] for op_name in op_names]
-    # These are the ops that we can safely assume to have been executed during
-    # the model's fit() call.
-    self.assertIn("Conv2D", executed_op_types)
-    self.assertIn("Relu6", executed_op_types)
-    self.assertIn("Conv2DBackpropFilter", executed_op_types)
-    self.assertIn("Relu6Grad", executed_op_types)
-    if tensor_debug_mode == "NO_TENSOR":
-      # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought to
-      # be an empty float32 tensor.
-      for tensor_value in tensor_values:
-        self.assertEqual(tensor_value.dtype, np.float32)
-        self.assertEqual(tensor_value.shape, (0,))
-    elif tensor_debug_mode == "FULL_TENSOR":
-      conv2d_values = [
-          tensor_values[i]
-          for i, op_type in enumerate(executed_op_types)
-          if op_type == "Conv2D"
-      ]
-      self.assertTrue(conv2d_values)
-      for conv2d_value in conv2d_values:
-        self.assertGreater(len(conv2d_value.shape), 1)
-        self.assertEqual(conv2d_value.shape[0], batch_size)
-      relu6_values = [
-          tensor_values[i]
-          for i, op_type in enumerate(executed_op_types)
-          if op_type == "Relu6"
-      ]
-      self.assertTrue(relu6_values)
-      for relu6_value in relu6_values:
-        self.assertGreater(len(relu6_value.shape), 1)
-        self.assertEqual(relu6_value.shape[0], batch_size)
-      conv2d_bp_filter_values = [
-          tensor_values[i]
-          for i, op_type in enumerate(executed_op_types)
-          if op_type == "Conv2DBackpropFilter"
-      ]
-      self.assertTrue(conv2d_bp_filter_values)
-      for conv2d_bp_filter_value in conv2d_bp_filter_values:
-        self.assertGreater(len(conv2d_bp_filter_value.shape), 1)
-      relu6_grad_values = [
-          tensor_values[i]
-          for i, op_type in enumerate(executed_op_types)
-          if op_type == "Relu6Grad"
-      ]
-      self.assertTrue(relu6_grad_values)
-      for relu6_grad_value in relu6_grad_values:
-        self.assertGreater(len(relu6_grad_value.shape), 1)
+      if tensor_debug_mode == "NO_TENSOR":
+        # Under the default NO_TENSOR tensor-debug mode, the tensor_proto ought
+        # to be an empty float32 tensor.
+        tensor_values = [
+            reader.graph_execution_trace_to_tensor_value(digest)
+            for digest in graph_exec_digests]
+        for tensor_value in tensor_values:
+          self.assertAllEqual(tensor_value, [])
+      elif tensor_debug_mode == "FULL_TENSOR":
+        conv2d_values = [
+            reader.graph_execution_trace_to_tensor_value(digest)
+            for digest in graph_exec_digests if digest.op_type == "Conv2D"]
+        self.assertTrue(conv2d_values)
+        for conv2d_value in conv2d_values:
+          self.assertGreater(len(conv2d_value.shape), 1)
+          self.assertEqual(conv2d_value.shape[0], batch_size)
+        relu6_values = [
+            reader.graph_execution_trace_to_tensor_value(digest)
+            for digest in graph_exec_digests if digest.op_type == "Relu6"]
+        self.assertTrue(relu6_values)
+        for relu6_value in relu6_values:
+          self.assertGreater(len(relu6_value.shape), 1)
+          self.assertEqual(relu6_value.shape[0], batch_size)
+        conv2d_bp_filter_values = [
+            reader.graph_execution_trace_to_tensor_value(digest)
+            for digest in graph_exec_digests
+            if digest.op_type == "Conv2DBackpropFilter"]
+        self.assertTrue(conv2d_bp_filter_values)
+        for conv2d_bp_filter_value in conv2d_bp_filter_values:
+          self.assertGreater(len(conv2d_bp_filter_value.shape), 1)
+        relu6_grad_values = [
+            reader.graph_execution_trace_to_tensor_value(digest)
+            for digest in graph_exec_digests if digest.op_type == "Relu6Grad"]
+        self.assertTrue(relu6_grad_values)
+        for relu6_grad_value in relu6_grad_values:
+          self.assertGreater(len(relu6_grad_value.shape), 1)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/debug/lib/dumping_callback_test_lib.py b/tensorflow/python/debug/lib/dumping_callback_test_lib.py
index 6144f2ba9cc..164644c57fa 100644
--- a/tensorflow/python/debug/lib/dumping_callback_test_lib.py
+++ b/tensorflow/python/debug/lib/dumping_callback_test_lib.py
@@ -18,18 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
 import os
 import shutil
-import socket
 import tempfile
 
-from tensorflow.core.framework import types_pb2
 from tensorflow.python.debug.lib import check_numerics_callback
 from tensorflow.python.debug.lib import debug_events_reader
 from tensorflow.python.debug.lib import dumping_callback
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import test_util
 from tensorflow.python.framework import versions
 
@@ -52,231 +47,6 @@ class DumpingCallbackTestBase(test_util.TensorFlowTestCase):
     """Read and check the .metadata debug-events file."""
     with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
       metadata_iter = reader.metadata_iterator()
-      metadata = next(metadata_iter).debug_metadata
+      metadata = next(metadata_iter).debug_event.debug_metadata
       self.assertEqual(metadata.tensorflow_version, versions.__version__)
       self.assertTrue(metadata.file_version.startswith("debug.Event"))
-
-  def _readAndCheckSourceFilesAndStackFrames(self):
-    """Read and verify the .source_files & .stack_frames debug-event files.
-
-    Returns:
-      An OrderedDict mapping stack frame IDs to stack frames (FileLineCol).
-    """
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      # Check the content of the .source_files file.
-      source_files_iter = reader.source_files_iterator()
-      source_file_paths = []
-      prev_wall_time = 1
-      for debug_event in source_files_iter:
-        self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
-        prev_wall_time = debug_event.wall_time
-        source_file = debug_event.source_file
-        self.assertEqual(source_file.host_name, socket.gethostname())
-        self.assertTrue(source_file.file_path)
-        if source_file.lines:
-          self.assertTrue(os.path.isfile(source_file.file_path))
-        source_file_paths.append(source_file.file_path)
-      # Assert the file paths are unique.
-      self.assertEqual(len(source_file_paths), len(set(source_file_paths)))
-
-      # Check the content of the .stack_frames file.
-      # A map from ID to stack frame.
-      stack_frame_by_id = collections.OrderedDict()
-      stack_frames_iter = reader.stack_frames_iterator()
-      prev_wall_time = 0
-      for debug_event in stack_frames_iter:
-        self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
-        prev_wall_time = debug_event.wall_time
-        stack_frame_with_id = debug_event.stack_frame_with_id
-        stack_frame_id = stack_frame_with_id.id
-        file_line_col = stack_frame_with_id.file_line_col
-        self.assertTrue(stack_frame_id)
-        self.assertNotIn(stack_frame_id, stack_frame_by_id,
-                         "Duplicate stack frame ID: %s" % id)
-        stack_frame_by_id[stack_frame_id] = (file_line_col.file_index,
-                                             file_line_col.line,
-                                             file_line_col.func)
-        self.assertGreaterEqual(file_line_col.file_index, 0)
-        self.assertLess(file_line_col.file_index, len(source_file_paths))
-        self.assertTrue(file_line_col.line)  # Line numbers are 1-based.
-        self.assertTrue(file_line_col.func)
-      # Assert the stack frames are unique.
-      self.assertEqual(
-          len(stack_frame_by_id.values()), len(set(stack_frame_by_id.values())))
-      return stack_frame_by_id
-
-  def _readAndCheckGraphsFile(self, stack_frame_by_id):
-    """Read and verify the content of the .graphs debug-event file.
-
-    Args:
-      stack_frame_by_id: A dict mapping unique string IDs to stack frames.
-        It is used by this method to look up stack frames.
-
-    Returns:
-      context_ids: IDs of op creation contexts (e.g., TensorFlow graphs), as a
-        `list` of `str`s.
-      op_types: Types of the ops that are created, as a `list` of `str`s with
-        the same length as `context_ids`.
-      op_name_to_op_type: An `OrderedDict` mapping op name to op type.
-      op_name_to_context_id: A `dict` mapping op name to the ID of the innermost
-        containing graph (context).
-    """
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      graphs_iter = reader.graphs_iterator()
-      prev_wall_time = 0
-      op_types = []
-      op_name_to_op_type = collections.OrderedDict()
-      op_name_to_context_id = dict()  # Maps op name to ID of innermost context.
-      context_ids = set()
-      symbolic_tensor_ids = set()
-      # Maps context ID to ID of directly enclosing context (`None` for
-      # outermost contexts).
-      context_id_to_outer_id = dict()
-
-      for debug_event in graphs_iter:
-        self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
-        prev_wall_time = debug_event.wall_time
-        # A DebugEvent in the .graphs file contains either of the two fields:
-        # - graph_op_creation for creation of a symbolic op in a graph context.
-        # - debugged_graph for information regarding the graph (context).
-        if debug_event.graph_op_creation.ByteSize():
-          graph_op_creation = debug_event.graph_op_creation
-          self.assertTrue(graph_op_creation.op_type)
-          op_types.append(graph_op_creation.op_type)
-          self.assertTrue(graph_op_creation.op_name)
-          op_name_to_op_type[
-              graph_op_creation.op_name] = graph_op_creation.op_type
-          op_name_to_context_id[
-              graph_op_creation.op_name] = graph_op_creation.graph_id
-          self.assertTrue(graph_op_creation.graph_id)
-          context_ids.add(graph_op_creation.graph_id)
-          self.assertTrue(graph_op_creation.code_location)
-          if graph_op_creation.num_outputs:
-            self.assertLen(graph_op_creation.output_tensor_ids,
-                           graph_op_creation.num_outputs)
-            # Check that all symblic tensor IDs are unique.
-            for tensor_id in graph_op_creation.output_tensor_ids:
-              self.assertNotIn(tensor_id, symbolic_tensor_ids)
-              symbolic_tensor_ids.add(tensor_id)
-          for stack_frame_id in graph_op_creation.code_location.stack_frame_ids:
-            self.assertIn(stack_frame_id, stack_frame_by_id)
-        else:
-          debugged_graph = debug_event.debugged_graph
-          if debugged_graph.outer_context_id:
-            inner_id = debugged_graph.graph_id
-            outer_id = debugged_graph.outer_context_id
-            if inner_id in context_id_to_outer_id:
-              # The outer context of a context must be always the same.
-              self.assertEqual(context_id_to_outer_id[inner_id], outer_id)
-            else:
-              context_id_to_outer_id[inner_id] = outer_id
-          else:
-            # This is an outermost context.
-            if debugged_graph.graph_id in context_id_to_outer_id:
-              self.assertIsNone(context_id_to_outer_id[debugged_graph.graph_id])
-            else:
-              context_id_to_outer_id[debugged_graph.graph_id] = None
-
-      # If any graph is created, the graph context hierarchy must be populated.
-      # In addition, the context of each graph op must be locatable within the
-      # graph context hierarchy.
-      for context_id in op_name_to_context_id.values():
-        self.assertIn(context_id, context_id_to_outer_id)
-      return context_ids, op_types, op_name_to_op_type, op_name_to_context_id
-
-  def _readAndCheckExecutionFile(self, dump_root=None):
-    """Read and verify the content of the .execution debug-event file.
-
-    Args:
-      dump_root: Optional argument that can be used to override the default
-        dump root to read the data from.
-
-    Returns:
-      executed_op_types: Types of ops that are created, as a `list` of `str`.
-      executed_graph_ids: A `list` of the same length as `executed_op_types`.
-        If the executed op is a FuncGraph, the corresponding element of the
-        `list` will be the ID of the FuncGraph. Else, the corresponding element
-        will be an empty string. This allows establishing connection between
-        eagerly executed FuncGraphs and their prior graph building.
-      input_tensor_ids: Input tensor IDs for each of the ops executed, as a
-        `list` of `list` of `int`s, with the same length as `executed_op_types`.
-      output_tensor_ids: Output tensor IDs for each of the ops executed, as a
-        `list` of `list` of `int`s, with the same length as `executed_op_types`.
-      tensor_debug_modes: Tensor debug modes used to instrument each of ops
-        executed.
-      tensor_values: A `list` of `list` of `np.ndarray`s, representing the
-        tensor values. Each item of the outer `list` corresponds to one
-        execution event. Each item of the inner `list` corresponds to one
-        output tensor slot of the executed op or Function.
-    """
-    dump_root = self.dump_root if dump_root is None else dump_root
-    with debug_events_reader.DebugEventsReader(dump_root) as reader:
-      execution_iter = reader.execution_iterator()
-      prev_wall_time = 1
-      executed_op_types = []
-      executed_graph_ids = []  # Empty string for execution of inidividual ops.
-      input_tensor_ids = []
-      output_tensor_ids = []
-      tensor_debug_modes = []
-      tensor_values = []
-      for debug_event in execution_iter:
-        self.assertGreaterEqual(debug_event.wall_time, prev_wall_time)
-        prev_wall_time = debug_event.wall_time
-        execution = debug_event.execution
-        executed_op_types.append(execution.op_type)
-        executed_graph_ids.append(execution.graph_id)
-        input_tensor_ids.append(execution.input_tensor_ids)
-        output_tensor_ids.append(execution.output_tensor_ids)
-        tensor_debug_modes.append(execution.tensor_debug_mode)
-        tensor_values.append([
-            tensor_util.MakeNdarray(tensor_proto)
-            for tensor_proto in execution.tensor_protos
-        ])
-      # TODO(cais): When tensor debug modes other than NO_TENSOR is supported,
-      # return tensor_values as well.
-      return (executed_op_types, executed_graph_ids, input_tensor_ids,
-              output_tensor_ids, tensor_debug_modes, tensor_values)
-
-  def _readAndCheckGraphExecutionTracesFile(self, context_ids):
-    """Read & verify the content of the .graph_execution_trace debug-event file.
-
-    Args:
-      context_ids: Op-creation context IDs from _readAndCheckGraphsFile().
-
-    Returns:
-      op_names: Names of the ops that are executed, as a `list` of `str`s.
-      device_names: Names of the devices that the ops belong to, respectively.
-        A `list` of `str`s of the same length as `op_name`s.
-      output_slots: Output slots, as a `list` of `int`s, with the same length as
-        `op_names`. In other words, for an executed op with N output tensors,
-        there will be N entries in this `list` and in `op_names`, at
-        corresponding indices.
-      tensor_values: Tensor values or their concise summaries, depending on
-        TensorDebugMode.
-    """
-    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
-      graph_execution_traces_iter = reader.graph_execution_traces_iterator()
-      op_names = []
-      device_names = []
-      output_slots = []
-      tensor_values = []
-      for debug_event in graph_execution_traces_iter:
-        self.assertGreaterEqual(debug_event.wall_time, 0)
-        graph_execution_trace = debug_event.graph_execution_trace
-        op_names.append(graph_execution_trace.op_name)
-        self.assertTrue(graph_execution_trace.device_name)
-        device_names.append(graph_execution_trace.device_name)
-        # All the ops in the graph have only one output.
-        self.assertTrue(graph_execution_trace.tfdbg_context_id)
-        self.assertIn(graph_execution_trace.tfdbg_context_id, context_ids)
-        output_slots.append(graph_execution_trace.output_slot)
-        dtype = dtypes.DType(graph_execution_trace.tensor_proto.dtype)
-        if (dtype.is_numpy_compatible and
-            dtype._type_enum != types_pb2.DT_STRING):  # pylint:disable=protected-access
-          # TODO(cais): Figure out how to properly convert string tensor proto
-          # to numpy representation.
-          tensor_values.append(
-              tensor_util.MakeNdarray(graph_execution_trace.tensor_proto))
-        else:
-          tensor_values.append(None)
-      return op_names, device_names, output_slots, tensor_values
diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD
index 16ed490dd8b..de881242692 100644
--- a/tensorflow/python/distribute/BUILD
+++ b/tensorflow/python/distribute/BUILD
@@ -172,9 +172,6 @@ py_test(
     srcs = ["distribute_lib_test.py"],
     python_version = "PY3",
     srcs_version = "PY2AND3",
-    tags = [
-        "no_rocm",
-    ],
     deps = [
         ":combinations",
         ":distribute_lib",
@@ -268,7 +265,7 @@ py_library(
         "//tensorflow/python:device",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:summary_ops_v2",
         "//tensorflow/python:tensor_util",
         "//tensorflow/python:training",
@@ -941,7 +938,6 @@ distribute_py_test(
     tags = [
         "multi_and_single_gpu",
         "no_oss",  # TODO(b/139815303): enable after this is fixed.
-        "no_rocm",
         "notap",  # TODO(b/139815303): enable after this is fixed.
     ],
     deps = [
@@ -995,7 +991,6 @@ distribute_py_test(
     main = "step_fn_test.py",
     tags = [
         "multi_and_single_gpu",
-        "no_rocm",
     ],
     deps = [
         ":single_loss_example",
@@ -1056,10 +1051,10 @@ cuda_py_test(
     srcs = ["mirrored_strategy_test.py"],
     shard_count = 5,
     tags = [
-        "guitar",
         "multi_and_single_gpu",
         "no_rocm",
         "no_windows_gpu",  # TODO(b/130551176)
+        "noguitar",
     ],
     deps = [
         ":combinations",
@@ -1258,7 +1253,7 @@ cuda_py_test(
     tags = [
         "multi_and_single_gpu",
     ],
-    xla_enable_strict_auto_jit = False,
+    xla_enable_strict_auto_jit = True,
     deps = [
         ":collective_all_reduce_strategy",
         ":combinations",
diff --git a/tensorflow/python/distribute/checkpoint_utils_test.py b/tensorflow/python/distribute/checkpoint_utils_test.py
index 9648d860bbd..97c4eeb536c 100644
--- a/tensorflow/python/distribute/checkpoint_utils_test.py
+++ b/tensorflow/python/distribute/checkpoint_utils_test.py
@@ -26,6 +26,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 from absl.testing import parameterized
 
 from tensorflow.python.distribute import combinations
diff --git a/tensorflow/python/distribute/cluster_resolver/BUILD b/tensorflow/python/distribute/cluster_resolver/BUILD
index 4ea961c273d..3c105758527 100644
--- a/tensorflow/python/distribute/cluster_resolver/BUILD
+++ b/tensorflow/python/distribute/cluster_resolver/BUILD
@@ -154,23 +154,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "tpu_cluster_resolver_py2_test",
-    size = "small",
-    srcs = ["tpu_cluster_resolver_test.py"],
-    grpc_enabled = True,
-    main = "tpu_cluster_resolver_test.py",
-    python_version = "PY3",
-    deps = [
-        ":tpu_cluster_resolver_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:training_server_lib",
-    ],
-)
-
 tf_py_test(
     name = "slurm_cluster_resolver_py_test",
     size = "small",
diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
index 1cafbcee93a..e15b6ab01f8 100644
--- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import abc
 
 import collections
+
 import six
 
 from tensorflow.python.client import session
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index 30dfea24fa7..2f874de1b87 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -31,9 +31,9 @@ from tensorflow.python.util.tf_export import tf_export
 try:
   from cloud_tpu_client import client  # pylint: disable=g-import-not-at-top
 except ImportError:
-  logging.warning(
-      'Falling back to tensorflow client, its recommended to install the cloud '
-      'tpu client directly with pip install cloud-tpu-client .')
+  logging.debug(
+      'Falling back to TensorFlow client; we recommended you install the Cloud '
+      'TPU client directly with pip install cloud-tpu-client.')
   from tensorflow.python.tpu.client import client
 
 def is_running_in_gce():
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
index 2be44872885..6f862c6e1f0 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
@@ -38,9 +38,9 @@ mock = test.mock
 try:
   from cloud_tpu_client import client  # pylint: disable=g-import-not-at-top
 except ImportError:
-  logging.warning(
-      'Falling back to tensorflow client, its recommended to install the cloud '
-      'tpu client directly with pip install cloud-tpu-client .')
+  logging.debug(
+      'Falling back to TensorFlow client; we recommended you install the Cloud '
+      'TPU client directly with pip install cloud-tpu-client.')
   from tensorflow.python.tpu.client import client
 
 
diff --git a/tensorflow/python/distribute/collective_all_reduce_strategy.py b/tensorflow/python/distribute/collective_all_reduce_strategy.py
index 507e7779cfe..89d13c0777f 100644
--- a/tensorflow/python/distribute/collective_all_reduce_strategy.py
+++ b/tensorflow/python/distribute/collective_all_reduce_strategy.py
@@ -209,6 +209,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
         local_devices = tuple("/device:GPU:%d" % i for i in range(num_gpus))
       else:
         local_devices = ("/device:CPU:0",)
+
     self._worker_device = device_util.canonicalize("/device:CPU:0")
     self._host_input_device = numpy_dataset.SingleDevice(self._worker_device)
 
@@ -327,7 +328,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
     super(CollectiveAllReduceExtended, self)._initialize_single_worker(
         local_devices)
     self._input_workers = input_lib.InputWorkers(
-        self._device_map, [(self._worker_device, self.worker_devices)])
+        [(self._worker_device, self.worker_devices)])
 
     # Add a default device so that ops without specified devices will not end up
     # on other workers.
@@ -523,7 +524,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
       # replicas in which case `value` would be a single value or value could
       # be 0.
       return cross_device_ops_lib.reduce_non_distributed_value(
-          reduce_op, self._device_map, value, destinations)
+          reduce_op, value, destinations, len(self.worker_devices))
     return self._get_cross_device_ops().reduce(
         reduce_op, value, destinations=destinations)
 
diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py
index 9fc49df0ead..e6cd40cad6f 100644
--- a/tensorflow/python/distribute/cross_device_ops.py
+++ b/tensorflow/python/distribute/cross_device_ops.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import enum
+
 import six
 
 from tensorflow.python.client import device_lib
@@ -65,10 +66,7 @@ def validate_destinations(destinations):
           ops.Tensor,
           value_lib.AggregatingVariable,
           six.string_types,
-          value_lib.TPUMirroredVariable,
-          # LogicalDeviceSpec is only used internally, e.g. as a
-          # broadcast destination, never supplied by a user.
-          value_lib.LogicalDeviceSpec)):
+          value_lib.TPUMirroredVariable)):
     raise ValueError("destinations must be one of a `DistributedValues` object,"
                      " a tf.Variable object, or a device string.")
 
@@ -76,7 +74,8 @@ def validate_destinations(destinations):
     raise ValueError("destinations can not be empty")
 
 
-def reduce_non_distributed_value(reduce_op, device_map, value, destinations):
+def reduce_non_distributed_value(
+    reduce_op, value, destinations, num_replicas_in_graph):
   """Reduce a non-DistributedValue `value` to `destinations`."""
   if isinstance(value, value_lib.DistributedValues):
     raise ValueError("You are passing a `DistributedValue` to "
@@ -92,15 +91,16 @@ def reduce_non_distributed_value(reduce_op, device_map, value, destinations):
   # that value should be on all destinations.
   if reduce_op == reduce_util.ReduceOp.MEAN:
     return value
-
-  validate_destinations(destinations)
-  # We do not support a reduce op of SUM if the value is the same across
-  # all replicas. We call this as part of assign functions for MirroredVariables
-  # and summing up identical values across replicas is not clearly defined.
-  if device_map.num_replicas_in_graph != 1:
+  elif num_replicas_in_graph != 1:
+    # We do not support a reduce op of SUM if the value is the same across
+    # all replicas. We call this as part of assign functions for
+    # MirroredVariables and summing up identical values across replicas is not
+    # clearly defined.
     raise ValueError("A non-DistributedValues value %s cannot be reduced with "
                      "the given reduce op %s." % (value, reduce_op))
-  return simple_broadcast(value, destinations)
+  else:
+    validate_destinations(destinations)
+    return simple_broadcast(value, destinations)
 
 
 def _make_tensor_into_per_replica(input_tensor):
@@ -111,16 +111,12 @@ def _make_tensor_into_per_replica(input_tensor):
                      % (input_tensor,))
   if isinstance(input_tensor, value_lib.PerReplica):
     return input_tensor
-
-  try:
-    device = input_tensor.device
-  except AttributeError:
+  elif hasattr(input_tensor, "device"):
+    return value_lib.PerReplica((input_tensor,))
+  else:
     raise ValueError("Cannot convert `input_tensor` to a `PerReplica` object "
                      "because it doesn't have device set.")
 
-  device_map = value_lib.SingleDeviceMap(device)
-  return value_lib.PerReplica(device_map, (input_tensor,))
-
 
 def _normalize_value_destination_pairs(value_destination_pairs):
   """Converts each tensor into a PerReplica object in the input list."""
@@ -161,25 +157,11 @@ def _validate_value_destination_pairs(value_destination_pairs):
 def get_devices_from(destinations):
   if isinstance(destinations, value_lib.DistributedValues):
     return destinations.devices
-  elif isinstance(destinations, value_lib.LogicalDeviceSpec):
-    return destinations.device_map.logical_to_actual_devices(
-        destinations.logical_device)
   elif isinstance(destinations, six.string_types):
     return (device_util.resolve(destinations),)
   return (device_util.resolve(destinations.device),)
 
 
-def get_device_map_from(destinations):
-  if isinstance(destinations, (value_lib.DistributedValues,
-                               value_lib.LogicalDeviceSpec)):
-    return destinations.device_map, destinations.logical_device
-  if isinstance(destinations, six.string_types):
-    device = device_util.resolve(destinations)
-  else:
-    device = destinations.device
-  return value_lib.SingleDeviceMap(device), 0
-
-
 def _devices_match(left, right):
   return set(get_devices_from(left)) == set(get_devices_from(right))
 
@@ -195,8 +177,7 @@ def _all_devices_match(value_destination_pairs):
 
 def simple_broadcast(value, destinations, always_mirrored=False):
   """Broadcast `value` to `destinations` using simple copies."""
-  device_map, logical_device = get_device_map_from(destinations)
-  devices = device_map.logical_to_actual_devices(logical_device)
+  devices = get_devices_from(destinations)
   if len(devices) == 1 and not always_mirrored:
     return cross_device_utils.copy_tensor_or_indexed_slices_to_device(
         value, devices[0])
@@ -204,10 +185,8 @@ def simple_broadcast(value, destinations, always_mirrored=False):
     value_updates = []
     for d in devices:
       value_updates.append(
-          cross_device_utils.copy_tensor_or_indexed_slices_to_device(
-              value, d))
-    return value_lib.regroup(
-        device_map, value_updates, wrap_class=value_lib.Mirrored)
+          cross_device_utils.copy_tensor_or_indexed_slices_to_device(value, d))
+    return value_lib.regroup(value_updates, wrap_class=value_lib.Mirrored)
 
 
 def _simple_reduce(per_replica_value, reduce_to_device, accumulation_fn,
@@ -274,7 +253,6 @@ class CrossDeviceOps(object):
         per_replica_value.values) == 1 and _devices_match(
             per_replica_value, destinations):
       return value_lib.regroup(
-          per_replica_value.device_map,
           per_replica_value.values,
           wrap_class=value_lib.Mirrored)
 
@@ -319,8 +297,7 @@ class CrossDeviceOps(object):
         value_destination_pairs) and len(
             value_destination_pairs[0][0].values) == 1:
       return [
-          value_lib.regroup(
-              v.device_map, v.values, wrap_class=value_lib.Mirrored)
+          value_lib.regroup(v.values, wrap_class=value_lib.Mirrored)
           for v, _ in value_destination_pairs
       ]
 
@@ -498,8 +475,7 @@ def _ungroup_and_make_mirrored(grouped_reduced,
   Returns:
     a list of Mirrored objects.
   """
-  device_map, _ = get_device_map_from(destinations)
-  num_replicas = device_map.num_replicas_in_graph * num_between_graph_workers
+  num_replicas = len(get_devices_from(destinations)) * num_between_graph_workers
   index = [[] for _ in range(len(grouped_reduced[0]))]
   for per_replica_reduced in grouped_reduced:
     for i, (v, _) in enumerate(per_replica_reduced):
@@ -508,10 +484,7 @@ def _ungroup_and_make_mirrored(grouped_reduced,
           index[i].append(v / num_replicas)
       else:
         index[i].append(v)
-  return [
-      value_lib.regroup(device_map, v, wrap_class=value_lib.Mirrored)
-      for v in index
-  ]
+  return [value_lib.regroup(v, wrap_class=value_lib.Mirrored) for v in index]
 
 
 class _ConcatAndSplitPacker(object):
@@ -1036,32 +1009,33 @@ class CollectiveAllReduce(CrossDeviceOps):
 
   def reduce_implementation(self, reduce_op, per_replica_value, destinations):
     all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0]
-    device_map, logical_device = get_device_map_from(destinations)
-    devices = device_map.logical_to_actual_devices(logical_device)
+    devices = get_devices_from(destinations)
 
     if (isinstance(all_reduced, value_lib.Mirrored) and
-        all_reduced.device_map is device_map and
-        all_reduced.logical_device == logical_device):
+        (all_reduced.devices == devices)):
       return all_reduced
 
     # Convert `all_reduced` to a `Mirrored` object, as a simple and uniform
     # utility to access component for a particular device.
     if not isinstance(all_reduced, value_lib.Mirrored):
-      all_reduced = value_lib.Mirrored(
-          value_lib.SingleDeviceMap(all_reduced.device), [all_reduced])
+      all_reduced = value_lib.Mirrored([all_reduced])
 
+    # If we got this far, the destination devices do not match the all-reduce
+    # devices, so we must map from one to the other.
     index = []
+    # We must add these control dependencies, otherwise we can get deadlock.
     with ops.control_dependencies(all_reduced.values):
       for d in devices:
         with ops.device(d):
-          if d in all_reduced.devices:
-            index.append(array_ops.identity(all_reduced.get(d)))
+          for v in all_reduced.values:
+            if v.device == d:
+              index.append(array_ops.identity(v))
+              break
           else:
             # TODO(josh11b): Once we add support for model parallelism, get the
             # copy from the corresponding replica instead of the primary.
             index.append(array_ops.identity(all_reduced.primary))
-
-    return value_lib.regroup(device_map, index, wrap_class=value_lib.Mirrored)
+    return value_lib.regroup(index, wrap_class=value_lib.Mirrored)
 
   def batch_reduce_implementation(self, reduce_op, value_destination_pairs):
     all_devices_match = _all_devices_match(value_destination_pairs)
@@ -1231,7 +1205,7 @@ def choose_the_best(devices, session_config=None):
   Returns:
     A subclass of `CrossDeviceOps`.
   """
-  requested_devices = set([device_util.canonicalize(d) for d in devices])
+  requested_devices = set(device_util.canonicalize(d) for d in devices)
   if ops.executing_eagerly_outside_functions():
     logical_gpus = context.context().list_logical_devices(device_type="GPU")
     physical_gpus = context.context().list_physical_devices(device_type="GPU")
diff --git a/tensorflow/python/distribute/cross_device_ops_test.py b/tensorflow/python/distribute/cross_device_ops_test.py
index 0ec049ad4c1..7af7c48e57d 100644
--- a/tensorflow/python/distribute/cross_device_ops_test.py
+++ b/tensorflow/python/distribute/cross_device_ops_test.py
@@ -66,7 +66,7 @@ def _make_per_replica(values, devices, regroup=False):
     with ops.device(d):
       placed_v = array_ops.identity(v)
     index.append(placed_v)
-  return value_lib.regroup(value_lib.ReplicaDeviceMap(devices), index)
+  return value_lib.regroup(index)
 
 
 # pylint: disable=g-doc-args,g-doc-return-or-yield
@@ -82,7 +82,6 @@ def _fake_mirrored(value, devices):
     with ops.device(d):
       values.append(array_ops.identity(value))
   return value_lib.regroup(
-      value_lib.ReplicaDeviceMap(devices),
       values,
       wrap_class=value_lib.Mirrored)
 
@@ -100,7 +99,6 @@ def _make_mirrored_indexed_slices(devices, values, indices, dense_shape):
   values = [_make_indexed_slices(values, indices, dense_shape, d)
             for d in devices]
   return value_lib.regroup(
-      value_lib.ReplicaDeviceMap(devices),
       values,
       wrap_class=value_lib.Mirrored)
 
@@ -127,8 +125,7 @@ class CrossDeviceOpsTestBase(test.TestCase, parameterized.TestCase):
     else:
       if isinstance(left, value_lib.DistributedValues):
         self.assertEqual(set(left.devices), set(right.devices))
-        self._assert_values_equal([left.get(d) for d in sorted(left.devices)],
-                                  [right.get(d) for d in sorted(right.devices)])
+        self._assert_values_equal(left.values, right.values)
       else:
         self.assertEqual(
             device_util.resolve(left.device), device_util.resolve(right.device))
@@ -217,8 +214,7 @@ class CrossDeviceOpsTestBase(test.TestCase, parameterized.TestCase):
     t0 = _make_indexed_slices([[1., 2.]], [1], dense_shape, devices[0])
     t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], dense_shape,
                               devices[1])
-    per_replica = value_lib.PerReplica(
-        value_lib.ReplicaDeviceMap(devices), (t0, t1))
+    per_replica = value_lib.PerReplica((t0, t1))
 
     if batch_reduce:
       result = cross_device_ops_instance.batch_reduce(
@@ -339,7 +335,6 @@ class SingleWorkerCrossDeviceOpsTest(CrossDeviceOpsTestBase):
           cross_device_ops_lib.choose_the_best(devices),
           cross_device_ops_lib.ReductionToOneDevice)
 
-
   @combinations.generate(combinations.combine(
       mode=["graph", "eager"],
       required_gpus=1))
@@ -347,8 +342,7 @@ class SingleWorkerCrossDeviceOpsTest(CrossDeviceOpsTestBase):
     devices = ["/cpu:0", "/gpu:0"]
     t0 = _make_indexed_slices([[1., 2.]], [1], [5, 2], devices[0])
     t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], [5, 2], devices[1])
-    per_replica = value_lib.PerReplica(
-        value_lib.ReplicaDeviceMap(devices), (t0, t1))
+    per_replica = value_lib.PerReplica((t0, t1))
     result = cross_device_ops_lib._simple_reduce(
         per_replica, devices[0], math_ops.add_n, reduce_util.ReduceOp.SUM)
 
@@ -648,8 +642,7 @@ class CollectiveAllReduceTest(multi_worker_test_base.MultiWorkerTestBase,
       indexed_slices.append(
           _make_indexed_slices(values[idx], indices[idx], dense_shape, d))
     if as_per_replica:
-      per_replica = value_lib.PerReplica(
-          value_lib.ReplicaDeviceMap(devices), indexed_slices)
+      per_replica = value_lib.PerReplica(indexed_slices)
       return per_replica
     else:
       return indexed_slices
diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py
index 90d29eabe7a..febdc2ae556 100644
--- a/tensorflow/python/distribute/cross_device_utils.py
+++ b/tensorflow/python/distribute/cross_device_utils.py
@@ -722,7 +722,7 @@ def is_indexed_slices(value):
   if isinstance(value, ops.IndexedSlices):
     return True
   assert isinstance(value, value_lib.DistributedValues)
-  return all([isinstance(v, ops.IndexedSlices) for v in value.values])
+  return all(isinstance(v, ops.IndexedSlices) for v in value.values)
 
 
 def split_by_sparsity(values):
diff --git a/tensorflow/python/distribute/cross_device_utils_test.py b/tensorflow/python/distribute/cross_device_utils_test.py
index 16caad7615a..217883ea21b 100644
--- a/tensorflow/python/distribute/cross_device_utils_test.py
+++ b/tensorflow/python/distribute/cross_device_utils_test.py
@@ -103,8 +103,7 @@ class IndexedSlicesUtilsTest(test.TestCase, parameterized.TestCase):
         constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
     t1 = math_ops._as_indexed_slices(
         constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
-    device_map = value_lib.ReplicaDeviceMap(("/gpu:0", "/cpu:0"))
-    per_replica = value_lib.PerReplica(device_map, (t0, t1))
+    per_replica = value_lib.PerReplica((t0, t1))
     self.assertTrue(cross_device_utils.contains_indexed_slices(per_replica))
 
   @combinations.generate(combinations.combine(
diff --git a/tensorflow/python/distribute/ctl_correctness_test.py b/tensorflow/python/distribute/ctl_correctness_test.py
index df79c7a8b9a..f942d3bb173 100644
--- a/tensorflow/python/distribute/ctl_correctness_test.py
+++ b/tensorflow/python/distribute/ctl_correctness_test.py
@@ -18,6 +18,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.python import keras
diff --git a/tensorflow/python/distribute/custom_training_loop_test.py b/tensorflow/python/distribute/custom_training_loop_test.py
index 9be72c36c5f..55cb4587a73 100644
--- a/tensorflow/python/distribute/custom_training_loop_test.py
+++ b/tensorflow/python/distribute/custom_training_loop_test.py
@@ -28,8 +28,11 @@ from tensorflow.python.eager import def_function
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.util import nest
 
 
 class InputIterationTest(test.TestCase, parameterized.TestCase):
@@ -95,6 +98,37 @@ class InputIterationTest(test.TestCase, parameterized.TestCase):
       results.append(output)
     self._validate_outputs(results)
 
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
+              strategy_combinations.tpu_strategy
+          ],
+          mode=["eager"]))
+  def testNestedOutput(self, distribution):
+    dataset = self._get_dataset()
+    input_iterator = iter(distribution.experimental_distribute_dataset(dataset))
+
+    @def_function.function
+    def run(iterator):
+
+      def computation(x):
+        return [{
+            "a": x - 1,
+            "b": x + 1
+        }]
+
+      inputs = next(iterator)
+      outputs = distribution.experimental_run_v2(computation, args=(inputs,))
+      return nest.map_structure(distribution.experimental_local_results,
+                                outputs)
+
+    results = run(input_iterator)
+    for replica in range(distribution.num_replicas_in_sync):
+      # The input dataset is range(10), so the replica id is same as input.
+      self.assertAllEqual(results[0]["a"][replica], [replica - 1])
+      self.assertAllEqual(results[0]["b"][replica], [replica + 1])
+
   @combinations.generate(
       combinations.combine(
           distribution=strategy_combinations.all_strategies,
@@ -160,6 +194,51 @@ class InputIterationTest(test.TestCase, parameterized.TestCase):
     # size). Hence the final result is 6.
     self.assertEqual(6.0, (a.numpy()))
 
+  @combinations.generate(
+      combinations.combine(
+          distribution=strategy_combinations.all_strategies,
+          mode=["eager"]
+      ))
+  def testDatasetAssertWithDynamicBatch(self, distribution):
+    # Regression test for github issue 33517.
+    def step_fn(data):
+      assert_op = control_flow_ops.Assert(math_ops.less_equal(
+          math_ops.reduce_max(data), 100.), [data])
+      with ops.control_dependencies([assert_op]):
+        return math_ops.square(data)
+
+    @def_function.function
+    def train(dataset):
+      results = []
+      iterator = iter(dataset)
+      # we iterate through the loop 5 times since we have 3 elements and a
+      # global batch of 2.
+      for _ in range(2):
+        elem = next(iterator)
+        output = distribution.experimental_local_results(
+            distribution.experimental_run_v2(step_fn, args=(elem,)))
+        results.append(output)
+      return results
+
+    dataset = dataset_ops.DatasetV2.from_tensor_slices([5., 6., 7.,]).batch(2)
+    # TODO(b/138326910): Remove Dataset V1 version once bug resolved.
+    if not tf2.enabled():
+      return dataset_ops.Dataset.from_tensor_slices([5., 6., 7.,]).batch(2)
+    dist_dataset = distribution.experimental_distribute_dataset(dataset)
+    results = train(dist_dataset)
+
+    expected_results = [[25., 36.], [49.]]
+    self.assertEqual(len(expected_results), len(results))
+
+    # Need to expand results since output will be grouped differently depending
+    # on the number of replicas.
+    for i, expected_result in enumerate(expected_results):
+      final_result = []
+      actual_result = results[i]
+      for val in actual_result:
+        final_result.extend(val.numpy())
+      self.assertAllEqual(expected_result, final_result)
+
   @combinations.generate(
       combinations.combine(
           distribution=strategy_combinations.all_strategies,
diff --git a/tensorflow/python/distribute/distribute_coordinator_test.py b/tensorflow/python/distribute/distribute_coordinator_test.py
index eb8daa76d36..3e4d2e09253 100644
--- a/tensorflow/python/distribute/distribute_coordinator_test.py
+++ b/tensorflow/python/distribute/distribute_coordinator_test.py
@@ -25,6 +25,7 @@ import os
 import sys
 import threading
 import time
+
 import six
 
 _portpicker_import_error = None
diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index e988499292e..552b739db78 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -102,7 +102,7 @@ import weakref
 
 import six
 
-from tensorflow.python.autograph.core import ag_ctx
+from tensorflow.python.autograph.core import ag_ctx as autograph_ctx
 from tensorflow.python.autograph.impl import api as autograph
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.distribute import device_util
@@ -629,6 +629,7 @@ class Strategy(object):
 
     # Distribute that dataset
     dist_dataset = strategy.experimental_distribute_dataset(dataset)
+
     # Iterate over the distributed dataset
     for x in dist_dataset:
       # process dataset elements
@@ -665,6 +666,32 @@ class Strategy(object):
     please use `experimental_distribute_datasets_from_function` instead, which
     does not do any automatic splitting or sharding.
 
+    You can also use the `element_spec` property of the distributed dataset
+    returned by this API to query the `tf.TypeSpec` of the elements returned
+    by the iterator. This can be used to set the `input_signature` property
+    of a `tf.function`.
+
+     ```python
+    strategy = tf.distribute.MirroredStrategy()
+
+    # Create a dataset
+    dataset = dataset_ops.Dataset.TFRecordDataset([
+      "/a/1.tfr", "/a/2.tfr", "/a/3.tfr", "/a/4.tfr"])
+
+    # Distribute that dataset
+    dist_dataset = strategy.experimental_distribute_dataset(dataset)
+
+    @tf.function(input_signature=[dist_dataset.element_spec])
+    def train_step(inputs):
+      # train model with inputs
+      return
+
+    # Iterate over the distributed dataset
+    for x in dist_dataset:
+      # process dataset elements
+      strategy.experimental_run_v2(train_step, args=(x,))
+    ```
+
     Args:
       dataset: `tf.data.Dataset` that will be sharded across all replicas using
         the rules stated above.
@@ -714,6 +741,26 @@ class Strategy(object):
     the global batch size.  This may be computed using
     `input_context.get_per_replica_batch_size`.
 
+    To query the `tf.TypeSpec` of the elements in the distributed dataset
+    returned by this API, you need to use the `element_spec` property of the
+    distributed iterator. This `tf.TypeSpec` can be used to set the
+    `input_signature` property of a `tf.function`.
+
+    ```python
+    # If you want to specify `input_signature` for a `tf.function` you must
+    # first create the iterator.
+    iterator = iter(inputs)
+
+    @tf.function(input_signature=[iterator.element_spec])
+    def replica_fn_with_signature(inputs):
+      # train the model with inputs
+      return
+
+    for _ in range(steps):
+      strategy.experimental_run_v2(replica_fn_with_signature,
+          args=(next(iterator),))
+    ```
+
     Args:
       dataset_fn: A function taking a `tf.distribute.InputContext` instance and
         returning a `tf.data.Dataset`.
@@ -754,11 +801,15 @@ class Strategy(object):
       structure can either be "per-replica" `Tensor` objects or `Tensor`s
       (for example, if running on a single replica).
     """
+    if not isinstance(args, (list, tuple)):
+      raise ValueError(
+          "positional args must be a list or tuple, got {}".format(type(args)))
+
     with self.scope():
       # tf.distribute supports Eager functions, so AutoGraph should not be
       # applied when when the caller is also in Eager mode.
-      fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
-                                convert_by_default=False)
+      fn = autograph.tf_convert(
+          fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
       return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
 
   def reduce(self, reduce_op, value, axis):
@@ -840,7 +891,7 @@ class Strategy(object):
         if dim is not None:
           # By returning a python value in the static shape case, we can
           # maybe get a fast path for reducing the denominator.
-          return numer, dim
+          return numer, array_ops.constant(dim, dtype=dtypes.int64)
       elif axis < 0:
         axis = axis + array_ops.rank(v)
       if v.shape.rank == 1:
@@ -1539,7 +1590,7 @@ class StrategyExtendedV2(object):
     if kwargs is None:
       kwargs = {}
     fn = autograph.tf_convert(
-        fn, ag_ctx.control_status_ctx(), convert_by_default=False)
+        fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
     with self._container_strategy().scope():
       return self._update(var, fn, args, kwargs, group)
 
@@ -1565,7 +1616,7 @@ class StrategyExtendedV2(object):
     if kwargs is None:
       kwargs = {}
     fn = autograph.tf_convert(
-        fn, ag_ctx.control_status_ctx(), convert_by_default=False)
+        fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
     with self._container_strategy().scope():
       return self._update_non_slot(colocate_with, fn, args, kwargs, group)
 
@@ -1949,8 +2000,8 @@ class ReplicaContext(object):
     require_replica_context(self)
     if kwargs is None:
       kwargs = {}
-    merge_fn = autograph.tf_convert(merge_fn, ag_ctx.control_status_ctx(),
-                                    convert_by_default=False)
+    merge_fn = autograph.tf_convert(
+        merge_fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
     return self._merge_call(merge_fn, args, kwargs)
 
   def _merge_call(self, merge_fn, args, kwargs):
diff --git a/tensorflow/python/distribute/distribute_lib_test.py b/tensorflow/python/distribute/distribute_lib_test.py
index 588605864e2..d8b4902bd85 100644
--- a/tensorflow/python/distribute/distribute_lib_test.py
+++ b/tensorflow/python/distribute/distribute_lib_test.py
@@ -28,7 +28,6 @@ from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import distribution_strategy_context as ds_context
 from tensorflow.python.distribute import input_lib
 from tensorflow.python.distribute import reduce_util
-from tensorflow.python.distribute import values
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
@@ -69,10 +68,8 @@ class _TestExtended(distribute_lib.StrategyExtendedV1):
 
   def __init__(self, distribute):
     super(_TestExtended, self).__init__(distribute)
-    device_map = values.ReplicaDeviceMap(["/device:CPU:0"])
     worker_device_pairs = [("", ["/device:CPU:0"])]
-    self._input_workers = input_lib.InputWorkers(device_map,
-                                                 worker_device_pairs)
+    self._input_workers = input_lib.InputWorkers(worker_device_pairs)
 
   def _call_for_each_replica(self, fn, args, kwargs):
     with _TestReplicaContext(
diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py
index 78c2d68c788..0aa378697d8 100644
--- a/tensorflow/python/distribute/input_lib.py
+++ b/tensorflow/python/distribute/input_lib.py
@@ -130,40 +130,16 @@ def get_distributed_datasets_from_function(dataset_fn,
 class InputWorkers(object):
   """A 1-to-many mapping from input worker devices to compute devices."""
 
-  def __init__(self, device_map, worker_device_pairs=None, logical_device=0):
+  def __init__(self, worker_device_pairs):
     """Initialize an `InputWorkers` object.
 
     Args:
-      device_map: A `DeviceMap` with the computation devices fed by the
-        input workers.
       worker_device_pairs: A sequence of pairs:
         `(input device, a tuple of compute devices fed by that input device)`.
-      logical_device: The logical device of `device_map` to feed.
     """
-    self._device_map = device_map
-    self._logical_device = logical_device
-    if worker_device_pairs is None:
-      devices = device_map.logical_to_actual_devices(logical_device)
-      worker_device_pairs = ((
-          device_util.canonicalize("/device:CPU:0", devices[0]),
-          devices),)
     self._input_worker_devices = tuple(d for d, _ in worker_device_pairs)
     self._fed_devices = tuple(tuple(device_util.canonicalize(d) for d in f)
                               for _, f in worker_device_pairs)
-    flattened = tuple(d for l in self._fed_devices for d in l)
-    assert (len(flattened) ==
-            len(device_map.logical_to_actual_devices(logical_device))), (
-                "flattened: %s logical device %d: %s" %
-                (flattened, logical_device,
-                 device_map.logical_to_actual_devices(logical_device)))
-
-  @property
-  def device_map(self):
-    return self._device_map
-
-  @property
-  def logical_device(self):
-    return self._logical_device
 
   @property
   def num_workers(self):
@@ -181,8 +157,7 @@ class InputWorkers(object):
     debug_repr = ",\n".join("  %d %s: %s" %
                             (i, devices[i], self._fed_devices[i])
                             for i in range(len(devices)))
-    return "%s:{\n%s\n  device_map: %s}" % (
-        self.__class__.__name__, debug_repr, self._device_map)
+    return "%s:{\n%s}" % (self.__class__.__name__, debug_repr)
 
 
 def _get_next_as_optional(iterator, strategy, name=None):
@@ -213,12 +188,9 @@ def _get_next_as_optional(iterator, strategy, name=None):
   # TODO(b/131423105): we should be able to short-cut the all-reduce in some
   # cases.
   if getattr(strategy.extended, "_support_per_replica_values", True):
-    worker_has_values = values.PerReplica(
-        values.WorkerDeviceMap(
-            worker_devices,
-            num_replicas_per_worker=len(
-                strategy.extended._input_workers._input_worker_devices)),  # pylint: disable=protected-access
-        worker_has_values)
+    # Slight hack: `reduce` expects a `PerReplica`, so we pass it one, even
+    # though it doesn't actually have a value per replica.
+    worker_has_values = values.PerReplica(worker_has_values)
     global_has_value = strategy.reduce(
         reduce_util.ReduceOp.SUM, worker_has_values, axis=None)
   else:
@@ -295,7 +267,7 @@ class DistributedIterator(object):
           # Make `replicas` a flat list of values across all replicas.
           replicas.extend(
               self._iterators[i].get_next_as_list_static_shapes(new_name))
-      return values.regroup(self._input_workers.device_map, replicas)
+      return values.regroup(replicas)
 
     out_of_range_replicas = []
     def out_of_range_fn(worker_index, device):
@@ -355,7 +327,7 @@ class DistributedIterator(object):
               dense_shape=dense_shape)
     replicas = nest.pack_sequence_as(replicas, flattened_replicas)
 
-    return values.regroup(self._input_workers.device_map, replicas)
+    return values.regroup(replicas)
 
   # We need a private initializer method for re-initializing multidevice
   # iterators when used with Keras training loops. If we don't reinitialize the
@@ -367,6 +339,11 @@ class DistributedIterator(object):
       init_ops.extend(it.initialize())
     return control_flow_ops.group(init_ops)
 
+  @property
+  def element_spec(self):
+    """The type specification of an element of this iterator."""
+    return self._element_spec
+
 
 class DistributedIteratorV1(DistributedIterator):
   """Input Iterator for tf.data.DatasetV1."""
@@ -462,8 +439,7 @@ class _IterableInput(object):
       else:
         raise ValueError("Dataset iteration within a tf.function is"
                          " not supported for multiple workers.")
-      per_replica_data = values.regroup(self._input_workers.device_map, data)
-      state = reduce_fn(state, per_replica_data)
+      state = reduce_fn(state, values.regroup(data))
       has_data, data = _get_next_as_optional(iterator, self._strategy)
       return has_data, data, state
 
@@ -553,10 +529,9 @@ class DistributedDataset(_IterableInput):
           self._cloned_datasets.append(cloned_dataset)
 
     self._input_workers = input_workers
-    # TODO(anjalisridhar): Identify if we need to set this property on the
-    # iterator.
-    self.element_spec = dataset.element_spec
     self._strategy = strategy
+    self._element_spec = _create_distributed_tensor_spec(self._strategy,
+                                                         dataset.element_spec)  # pylint: disable=protected-access
 
   def __iter__(self):
     if not (context.executing_eagerly() or
@@ -568,9 +543,14 @@ class DistributedDataset(_IterableInput):
                                                     self._input_workers)
     iterator = DistributedIterator(self._input_workers, worker_iterators,
                                    self._strategy)
-    iterator.element_spec = self.element_spec  # pylint: disable=protected-access
+    iterator._element_spec = self.element_spec  # pylint: disable=protected-access
     return iterator
 
+  @property
+  def element_spec(self):
+    """The type specification of an element of this dataset."""
+    return self._element_spec
+
 
 class DistributedDatasetV1(DistributedDataset):
   """Wrapped tf.data.DatasetV1 that supports prefetching to multiple devices."""
@@ -636,7 +616,7 @@ class DistributedDatasetV1(DistributedDataset):
                                                     self._input_workers)
     iterator = DistributedIteratorV1(self._input_workers, worker_iterators,
                                      self._strategy)
-    iterator.element_spec = self.element_spec  # pylint: disable=protected-access
+    iterator._element_spec = self.element_spec  # pylint: disable=protected-access
     return iterator
 
 
@@ -669,6 +649,7 @@ class DistributedDatasetsFromFunction(_IterableInput):
     self._input_workers = input_workers
     self._input_contexts = input_contexts
     self._strategy = strategy
+    self._element_spec = None
 
   def __iter__(self):
     if not (context.executing_eagerly() or
@@ -676,9 +657,25 @@ class DistributedDatasetsFromFunction(_IterableInput):
       raise RuntimeError("__iter__() is only supported inside of tf.function "
                          "or when eager execution is enabled.")
 
-    iterators = _create_iterators_per_worker_with_input_context(
+    iterators, element_spec = _create_iterators_per_worker_with_input_context(
         self._input_contexts, self._input_workers, self._dataset_fn)
-    return DistributedIterator(self._input_workers, iterators, self._strategy)
+    iterator = DistributedIterator(self._input_workers, iterators,
+                                   self._strategy)
+    self._element_spec = _create_distributed_tensor_spec(self._strategy,
+                                                         element_spec)
+    iterator._element_spec = self._element_spec  # pylint: disable=protected-access
+    return iterator
+
+  @property
+  def element_spec(self):
+    """The type specification of an element of this dataset."""
+    if self._element_spec is None:
+      raise ValueError("You must create an iterator before calling "
+                       "`element_spec` on the distributed dataset or iterator. "
+                       "This is because the dataset function is not called "
+                       "before an iterator is created.")
+
+    return self._element_spec
 
 
 class DistributedDatasetsFromFunctionV1(DistributedDatasetsFromFunction):
@@ -705,9 +702,14 @@ class DistributedDatasetsFromFunctionV1(DistributedDatasetsFromFunction):
     return self._get_iterator()
 
   def _get_iterator(self):
-    iterators = _create_iterators_per_worker_with_input_context(
+    iterators, element_spec = _create_iterators_per_worker_with_input_context(
         self._input_contexts, self._input_workers, self._dataset_fn)
-    return DistributedIteratorV1(self._input_workers, iterators, self._strategy)
+    iterator = DistributedIteratorV1(self._input_workers, iterators,
+                                     self._strategy)
+    self._element_spec = _create_distributed_tensor_spec(self._strategy,
+                                                         element_spec)
+    iterator._element_spec = self._element_spec  # pylint: disable=protected-access
+    return iterator
 
 
 # TODO(anjalisridhar): This class will be soon be removed in favor of newer
@@ -798,7 +800,7 @@ class DatasetIterator(DistributedIteratorV1):
         input_workers,
         worker_iterators,  # pylint: disable=protected-access
         strategy)
-    self.element_spec = dist_dataset.element_spec  # pylint: disable=protected-access
+    self._element_spec = dist_dataset.element_spec
 
 
 def _dummy_tensor_fn(value_structure):
@@ -1032,7 +1034,7 @@ def _create_iterators_per_worker_with_input_context(input_contexts,
       devices = input_workers.compute_devices_for_worker(i)
       iterator = _SingleWorkerDatasetIterator(dataset, worker, devices)
       iterators.append(iterator)
-  return iterators
+  return iterators, dataset.element_spec
 
 
 # TODO(sourabhbajaj): Remove this in lieu of distributed datasets
@@ -1198,3 +1200,32 @@ class MultiStepContext(object):
             distribution.experimental_local_results(value))
       distribution_strategy_context.get_replica_context().merge_call(
           merge_fn, args=(output,))
+
+
+def _create_distributed_tensor_spec(strategy, tensor_spec):
+  """Create a `tf.TypeSpec` for a given strategy and input `tensor_spec`.
+
+  Args:
+    strategy: The given `tf.distribute` strategy.
+    tensor_spec: `tf.TensorSpec` of a given value. The batch dimension of the
+      shape should be None if you have partial batches.
+
+  Returns:
+    A `tf.TypeSpec` that matches the values produced by a given strategy. This
+    can be a `tf.TensorSpec` or a `PerRelicaSpec`.
+  """
+  num_replicas = len(strategy.extended.worker_devices)
+
+  # If the number of devices used in the strategy is just 1 then we return
+  # the tensor_spec as is.
+  if num_replicas == 1:
+    return tensor_spec
+
+  # If the number of devices is greater than 1 then we assume the input to
+  # tf.function is a per replica type.
+  def _get_value_per_replica(tensor_spec_per_input):
+    value_specs = [tensor_spec_per_input for _ in range(num_replicas)]
+    return values.PerReplicaSpec(*value_specs)
+
+  return nest.map_structure(_get_value_per_replica, tensor_spec)
+
diff --git a/tensorflow/python/distribute/input_lib_test.py b/tensorflow/python/distribute/input_lib_test.py
index 1cca10a77a2..5df3a090f9a 100644
--- a/tensorflow/python/distribute/input_lib_test.py
+++ b/tensorflow/python/distribute/input_lib_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import collections
 import json
 import threading
+
 from absl.testing import parameterized
 import numpy as np
 
@@ -138,8 +139,7 @@ class DistributedIteratorTestBase(test.TestCase):
       self.skipTest("unsupported test combination.")
 
     devices = nest.flatten([ds for _, ds in worker_device_pairs])
-    device_map = values.ReplicaDeviceMap(devices)
-    input_workers = input_lib.InputWorkers(device_map, worker_device_pairs)
+    input_workers = input_lib.InputWorkers(worker_device_pairs)
 
     if api_type == "wrap_into_iterator":
       iterator = self._wrap_iterator(
@@ -236,9 +236,7 @@ class DistributedIteratorSingleWorkerTest(DistributedIteratorTestBase,
     worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])]
     dataset_fn = lambda _: dataset_ops.DatasetV1.range(10)
 
-    devices = nest.flatten([ds for _, ds in worker_device_pairs])
-    device_map = values.ReplicaDeviceMap(devices)
-    input_workers = input_lib.InputWorkers(device_map, worker_device_pairs)
+    input_workers = input_lib.InputWorkers(worker_device_pairs)
 
     dist_dataset = input_lib.get_distributed_dataset(
         dataset_fn(distribute_lib.InputContext()), input_workers, distribution)
@@ -260,9 +258,7 @@ class DistributedIteratorSingleWorkerTest(DistributedIteratorTestBase,
           ]))
   def testDatasetV2IterError(self, distribution):
     worker_device_pairs = [("", ["/device:CPU:0"])]
-    devices = nest.flatten([ds for _, ds in worker_device_pairs])
-    device_map = values.ReplicaDeviceMap(devices)
-    input_workers = input_lib.InputWorkers(device_map, worker_device_pairs)
+    input_workers = input_lib.InputWorkers(worker_device_pairs)
     dataset_fn = lambda _: dataset_ops.DatasetV2.range(10).batch(2)
 
     dist_dataset = input_lib.get_distributed_dataset(
@@ -351,7 +347,7 @@ class DistributedIteratorSingleWorkerTest(DistributedIteratorTestBase,
   def testTPU(self, input_type, api_type, iteration_type, distribution,
               enable_get_next_as_optional):
     worker_device_pairs = collections.OrderedDict()
-    for tpu_device in distribution.extended._tpu_devices:
+    for tpu_device in distribution.extended.worker_devices:
       host_device = device_util.get_host_for_device(tpu_device)
       worker_device_pairs.setdefault(host_device, [])
       worker_device_pairs[host_device].append(tpu_device)
@@ -426,9 +422,7 @@ class DistributedIteratorSingleWorkerTest(DistributedIteratorTestBase,
           ]))
   def testIterableIterator(self, distribution):
     worker_device_pairs = [("", ["/device:CPU:0"])]
-    devices = nest.flatten([ds for _, ds in worker_device_pairs])
-    device_map = values.ReplicaDeviceMap(devices)
-    input_workers = input_lib.InputWorkers(device_map, worker_device_pairs)
+    input_workers = input_lib.InputWorkers(worker_device_pairs)
 
     dataset = dataset_ops.DatasetV2.range(10)
     dist_dataset = input_lib.get_distributed_dataset(dataset, input_workers,
@@ -962,5 +956,68 @@ class DistributedIteratorMultiWorkerTest(
           sess=sess)
 
 
+class InputTypeSpecTest(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(
+      combinations.combine(
+          mode=["eager"],
+          distribution=[
+              strategy_combinations.one_device_strategy,
+              strategy_combinations.mirrored_strategy_with_one_cpu,
+              strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
+              strategy_combinations.tpu_strategy,
+              strategy_combinations.central_storage_strategy_with_two_gpus,
+          ],
+          input_type=["dataset", "dataset_fn"],
+      ))
+  def testInputSignatureForPerReplicaValues(self, distribution, input_type):
+    def dataset_fn(ctx):
+      del ctx  # unused
+      return dataset_ops.DatasetV2.from_tensor_slices(
+          np.ones([10, 12]).astype(np.float32)).batch(4)
+
+    if input_type == "dataset":
+      ds = distribution.experimental_distribute_dataset(
+          dataset_fn(distribute_lib.InputContext()))
+      type_spec = ds.element_spec
+    else:
+      ds = distribution.experimental_distribute_datasets_from_function(
+          dataset_fn)
+      iterator = iter(ds)
+      type_spec = iterator.element_spec
+
+    @def_function.function(input_signature=[type_spec])
+    def process_inputs(inputs):
+      distribution.experimental_run_v2(lambda inputs: inputs, args=(inputs,))
+
+    for x in ds:
+      process_inputs(x)
+
+  @combinations.generate(
+      combinations.combine(
+          mode=["eager"],
+          distribution=[
+              strategy_combinations.one_device_strategy,
+              strategy_combinations.mirrored_strategy_with_one_cpu,
+              strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
+              strategy_combinations.tpu_strategy,
+              strategy_combinations.central_storage_strategy_with_two_gpus,
+          ],
+      ))
+  def testInputSignatureForNestedPerReplicaValues(self, distribution):
+    a = np.ones((10, 2)) * 5
+    b = np.ones((10, 3)) * 6
+    dataset = dataset_ops.DatasetV2.from_tensor_slices((a, b)).batch(2)
+
+    dist_dataset = distribution.experimental_distribute_dataset(dataset)
+
+    @def_function.function(input_signature=[dist_dataset.element_spec])
+    def process_inputs(inputs):
+      distribution.experimental_run_v2(lambda inputs: inputs, args=(inputs,))
+
+    for x in dist_dataset:
+      process_inputs(x)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/distribute/minimize_loss_test.py b/tensorflow/python/distribute/minimize_loss_test.py
index cb92840481e..d59d6d72f38 100644
--- a/tensorflow/python/distribute/minimize_loss_test.py
+++ b/tensorflow/python/distribute/minimize_loss_test.py
@@ -44,7 +44,11 @@ from tensorflow.python.ops.losses import losses_impl
 VAR_MAP_V1 = {
     "GradientDescent": ("dense/kernel", "dense/bias"),
     "Adagrad": ("dense/kernel/Adagrad", "dense/kernel", "dense/bias/Adagrad",
-                "dense/bias")
+                "dense/bias"),
+    "Ftrl": ("dense/kernel/Ftrl", "dense/kernel", "dense/bias/Ftrl",
+             "dense/bias", "dense/kernel/Ftrl_1", "dense/bias/Ftrl_1"),
+    "RMSProp": ("dense/kernel", "dense/bias/RMSProp", "dense/bias/RMSProp_1",
+                "dense/bias", "dense/kernel/RMSProp_1", "dense/kernel/RMSProp")
 }
 
 VAR_MAP_V2 = {
@@ -215,7 +219,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             for replica in range(1, num_parameter_devices)
         ]
         variables = list(variables) + extended_variables
-        return set([v + ":0" for v in variables])
+        return set(v + ":0" for v in variables)
 
       self.assertEqual(
           get_expected_variables(len(distribution.extended.parameter_devices)),
diff --git a/tensorflow/python/distribute/mirrored_function_strategy.py b/tensorflow/python/distribute/mirrored_function_strategy.py
index aa81aaabfe0..aa9ecfa1fc4 100644
--- a/tensorflow/python/distribute/mirrored_function_strategy.py
+++ b/tensorflow/python/distribute/mirrored_function_strategy.py
@@ -91,8 +91,7 @@ class MirroredFunctionExtended(distribute_lib.StrategyExtendedV1):
     device_tuple = tuple(device_util.resolve(d) for d in devices)
     assert len(set(device_tuple)) == len(device_tuple), (
         "No duplicates allowed in `devices` argument: %s" % (devices,))
-    self._device_map = values.ReplicaDeviceMap(device_tuple)
-
+    self._devices = device_tuple
     self._retrace_functions_for_each_device = False
 
   def _call_for_each_replica(self, fn, args, kwargs):
@@ -116,8 +115,7 @@ class MirroredFunctionExtended(distribute_lib.StrategyExtendedV1):
 
     try:
       with MirroredFunctionReplicaContext(self._container_strategy()):
-        for index, device in enumerate(
-            self._device_map.logical_to_actual_devices(0)):
+        for index, device in enumerate(self._devices):
           _replica_index.current = index
           with ops.device(device):
             if context.executing_eagerly():
@@ -134,7 +132,7 @@ class MirroredFunctionExtended(distribute_lib.StrategyExtendedV1):
       _replica_index.graph_outside_run = None
       _replica_index.current = None
 
-    return values.regroup(self._device_map, return_values)
+    return values.regroup(return_values)
 
   def _local_results(self, val):
     if isinstance(val, values.DistributedValues):
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index 4e45e3bd664..d04bde86d19 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -24,8 +24,8 @@ import functools
 import threading
 import weakref
 
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.autograph.core import ag_ctx
+from tensorflow.python import pywrap_tfe
+from tensorflow.python.autograph.core import ag_ctx as autograph_ctx
 from tensorflow.python.autograph.impl import api as autograph
 from tensorflow.python.distribute import cross_device_ops as cross_device_ops_lib
 from tensorflow.python.distribute import device_util
@@ -91,12 +91,12 @@ class _RequestedStop(Exception):  # pylint: disable=g-bad-exception-name
 
 # TODO(yuefengz): maybe create a common class for those who need to call this
 # _call_for_each_replica.
-def _call_for_each_replica(distribution, device_map, fn, args, kwargs):
+def _call_for_each_replica(distribution, devices, fn, args, kwargs):
   """Run `fn` in separate threads, once per replica/worker device.
 
   Args:
     distribution: the DistributionStrategy object.
-    device_map: the DeviceMap with the devices to run `fn` on.
+    devices: the devices to run `fn` on (logical device 0 for each replica).
     fn: function to run (will be run once per replica, each in its own thread).
     args: positional arguments for `fn`
     kwargs: keyword arguments for `fn`.
@@ -121,11 +121,11 @@ def _call_for_each_replica(distribution, device_map, fn, args, kwargs):
 
   # TODO(isaprykin): Create these threads once instead of during every call.
   threads = []
-  for index in range(device_map.num_replicas_in_graph):
+  for index in range(len(devices)):
     variable_creator_fn = shared_variable_creator.make_fn(
         shared_variable_store, index)
     t = _MirroredReplicaThread(
-        distribution, coord, index, device_map, variable_creator_fn, fn,
+        distribution, coord, index, devices, variable_creator_fn, fn,
         values.select_replica(index, args),
         values.select_replica(index, kwargs))
     threads.append(t)
@@ -175,10 +175,8 @@ def _call_for_each_replica(distribution, device_map, fn, args, kwargs):
             raise RuntimeError("Some replicas made a different number of "
                                "replica_context().merge_call() calls.")
           # get_replica_context().merge_call() case
-          merge_args = values.regroup(
-              device_map, tuple(t.merge_args for t in threads))
-          merge_kwargs = values.regroup(
-              device_map, tuple(t.merge_kwargs for t in threads))
+          merge_args = values.regroup(tuple(t.merge_args for t in threads))
+          merge_kwargs = values.regroup(tuple(t.merge_kwargs for t in threads))
           # We capture the name_scope of the MRT when we call merge_fn
           # to ensure that if we have opened a name scope in the MRT,
           # it will be respected when executing the merge function. We only
@@ -202,7 +200,7 @@ def _call_for_each_replica(distribution, device_map, fn, args, kwargs):
       t.should_run.set()
     coord.join(threads)
 
-  return values.regroup(device_map, tuple(t.main_result for t in threads))
+  return values.regroup(tuple(t.main_result for t in threads))
 
 
 def _is_device_list_single_worker(devices):
@@ -364,8 +362,7 @@ class MirroredStrategy(distribute_lib.Strategy):
   ...   x = tf.Variable(1.)
   >>> x
   MirroredVariable:{
-      0 /job:localhost/replica:0/task:0/device:CPU:0: <tf.Variable ...
-      shape=() dtype=float32, numpy=1.0>
+      0: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.0>
     }
 
   While using distribution strategies, all the variable creation should be done
@@ -385,8 +382,7 @@ class MirroredStrategy(distribute_lib.Strategy):
   ...   create_variable()
   ...   print (x[0])
   MirroredVariable:{
-      0 /job:localhost/replica:0/task:0/device:CPU:0: <tf.Variable ...
-      shape=() dtype=float32, numpy=1.0>
+      0: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.0>
     }
 
   `experimental_distribute_dataset` can be used to distribute the dataset across
@@ -494,8 +490,9 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
 
   def _initialize_single_worker(self, devices):
     """Initializes the object for single-worker training."""
-    self._device_map = values.ReplicaDeviceMap(devices)
-    self._input_workers = input_lib.InputWorkers(self._device_map)
+    self._devices = tuple(device_util.canonicalize(d) for d in devices)
+    self._input_workers = input_lib.InputWorkers(
+        ((device_util.canonicalize("/device:CPU:0", devices[0]), devices),))
     self._inferred_cross_device_ops = None if self._cross_device_ops else (
         cross_device_ops_lib.choose_the_best(devices))
     self._host_input_device = numpy_dataset.SingleDevice(
@@ -530,9 +527,8 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
     self._default_device = workers[0]
     self._host_input_device = numpy_dataset.SingleDevice(workers[0])
 
-    self._device_map = values.ReplicaDeviceMap(devices)
-    self._input_workers = input_lib.InputWorkers(
-        self._device_map, worker_devices)
+    self._devices = tuple(devices)
+    self._input_workers = input_lib.InputWorkers(worker_devices)
     self._is_multi_worker_training = True
 
     if len(workers) > 1:
@@ -577,16 +573,14 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
     """Create a mirrored variable. See `DistributionStrategy.scope`."""
     colocate_with = kwargs.pop("colocate_with", None)
     if colocate_with is None:
-      device_map = self._device_map
-      logical_device = 0  # TODO(josh11b): Get logical device from scope here.
+      devices = self._devices
     elif isinstance(colocate_with, numpy_dataset.SingleDevice):
       with ops.device(colocate_with.device):
         return next_creator(*args, **kwargs)
     else:
-      device_map = colocate_with.device_map
-      logical_device = colocate_with.logical_device
+      devices = colocate_with.devices
 
-    def _real_mirrored_creator(devices, *args, **kwargs):  # pylint: disable=g-missing-docstring
+    def _real_mirrored_creator(*args, **kwargs):  # pylint: disable=g-missing-docstring
       value_list = []
       for i, d in enumerate(devices):
         with ops.device(d):
@@ -612,9 +606,8 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
       return value_list
 
     return values.create_mirrored_variable(
-        self._container_strategy(), device_map, logical_device,
-        _real_mirrored_creator, values.MirroredVariable,
-        values.SyncOnReadVariable, *args, **kwargs)
+        self._container_strategy(), _real_mirrored_creator,
+        values.MirroredVariable, values.SyncOnReadVariable, *args, **kwargs)
 
   def _validate_colocate_with_variable(self, colocate_with_variable):
     values.validate_colocate_distributed_variable(colocate_with_variable, self)
@@ -715,8 +708,7 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
       # For outputs that have already been reduced, wrap them in a Mirrored
       # container, else in a PerReplica container.
       if reduce_op is None:
-        last_step_tensor_outputs_dict[name] = values.regroup(self._device_map,
-                                                             output)
+        last_step_tensor_outputs_dict[name] = values.regroup(output)
       else:
         assert len(output) == 1
         last_step_tensor_outputs_dict[name] = output[0]
@@ -735,8 +727,7 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
     # TODO(josh11b): In eager mode, use one thread per device, or async mode.
     if not destinations:
       # TODO(josh11b): Use current logical device instead of 0 here.
-      destinations = values.LogicalDeviceSpec(
-          device_map=self._device_map, logical_device=0)
+      destinations = self._devices
     return self._get_cross_device_ops().broadcast(tensor, destinations)
 
   def _call_for_each_replica(self, fn, args, kwargs):
@@ -765,9 +756,9 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
       # _call_for_each_replica itself (TF library functions are whitelisted).
       # This makes suresure that the Python function that originally passed to
       # the tf.function is still converted.
-      fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx())
+      fn = autograph.tf_convert(fn, autograph_ctx.control_status_ctx())
 
-    return _call_for_each_replica(self._container_strategy(), self._device_map,
+    return _call_for_each_replica(self._container_strategy(), self._devices,
                                   fn, args, kwargs)
 
   def _configure(self,
@@ -783,8 +774,7 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
     if cluster_spec:
       # TODO(yuefengz): remove the following code once cluster_resolver is
       # added.
-      num_gpus_per_worker = _infer_num_gpus_per_worker(
-          self._device_map.all_devices)
+      num_gpus_per_worker = _infer_num_gpus_per_worker(self._devices)
       multi_worker_devices = _cluster_spec_to_device_list(
           cluster_spec, num_gpus_per_worker)
       self._initialize_multi_worker(multi_worker_devices)
@@ -808,7 +798,7 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
       # replicas in which case `value` would be a single value or value could
       # be 0.
       return cross_device_ops_lib.reduce_non_distributed_value(
-          reduce_op, self._device_map, value, destinations)
+          reduce_op, value, destinations, self._num_replicas_in_sync)
     return self._get_cross_device_ops().reduce(
         reduce_op, value, destinations=destinations)
 
@@ -820,14 +810,16 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
     # TODO(josh11b): In eager mode, use one thread per device.
     assert isinstance(var, values.DistributedVariable)
     updates = []
-    for i, (d, v) in enumerate(zip(var.devices, var.values)):
+    for i, v in enumerate(var.values):
       name = "update_%d" % i
-      with ops.device(d), distribute_lib.UpdateContext(i), ops.name_scope(name):
+      with ops.device(v.device), \
+           distribute_lib.UpdateContext(i), \
+           ops.name_scope(name):
         # If args and kwargs are not mirrored, the value is returned as is.
         updates.append(fn(v,
-                          *values.select_device_mirrored(d, args),
-                          **values.select_device_mirrored(d, kwargs)))
-    return values.update_regroup(self, self._device_map, updates, group)
+                          *values.select_replica_mirrored(i, args),
+                          **values.select_replica_mirrored(i, kwargs)))
+    return values.update_regroup(self, updates, group)
 
   def _update_non_slot(self, colocate_with, fn, args, kwargs, group):
     assert isinstance(colocate_with, tuple)
@@ -836,9 +828,9 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
     for i, d in enumerate(colocate_with):
       name = "update_%d" % i
       with ops.device(d), distribute_lib.UpdateContext(i), ops.name_scope(name):
-        updates.append(fn(*values.select_device_mirrored(d, args),
-                          **values.select_device_mirrored(d, kwargs)))
-    return values.update_regroup(self, self._device_map, updates, group)
+        updates.append(fn(*values.select_replica_mirrored(i, args),
+                          **values.select_replica_mirrored(i, kwargs)))
+    return values.update_regroup(self, updates, group)
 
   def read_var(self, replica_local_var):
     """Read the aggregate value of a replica-local variable."""
@@ -857,19 +849,19 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
 
   @property
   def _num_replicas_in_sync(self):
-    return self._device_map.num_replicas_in_graph
+    return len(self._devices)
 
   @property
   def worker_devices(self):
-    return self._device_map.all_devices
+    return self._devices
 
   @property
   def worker_devices_by_replica(self):
-    return self._device_map.devices_by_replica
+    return [[d] for d in self._devices]
 
   @property
   def parameter_devices(self):
-    return self._device_map.all_devices
+    return self.worker_devices
 
   @property
   def experimental_between_graph(self):
@@ -890,7 +882,7 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
   def non_slot_devices(self, var_list):
     del var_list
     # TODO(josh11b): Should this be the last logical device instead?
-    return self._device_map.logical_to_actual_devices(0)
+    return self._devices
 
   # TODO(priyag): Delete this once all strategies use global batch size.
   @property
@@ -912,12 +904,12 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1):
 class _MirroredReplicaThread(threading.Thread):
   """A thread that runs() a function on a device."""
 
-  def __init__(self, dist, coord, replica_id, device_map, variable_creator_fn,
+  def __init__(self, dist, coord, replica_id, devices, variable_creator_fn,
                fn, args, kwargs):
     super(_MirroredReplicaThread, self).__init__()
     self.coord = coord
     self.distribution = dist
-    self.device_map = device_map
+    self.devices = devices
     self.replica_id = replica_id
     self.variable_creator_fn = variable_creator_fn
     # State needed to run and return the results of `fn`.
@@ -952,7 +944,7 @@ class _MirroredReplicaThread(threading.Thread):
     self.record_thread_local_summary_state()
     self.record_thread_local_eager_context_state()
     self.context_device_policy = (
-        pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy(
+        pywrap_tfe.TFE_ContextGetDevicePlacementPolicy(
             ctx._context_handle))  # pylint: disable=protected-access
     self.graph = ops.get_default_graph()
     with ops.init_scope():
@@ -985,8 +977,7 @@ class _MirroredReplicaThread(threading.Thread):
           context.device_policy(self.context_device_policy), \
           MirroredReplicaContext(self.distribution, constant_op.constant(
               self.replica_id, dtypes.int32)), \
-          ops.device(self.device_map.logical_to_actual_devices(0)[
-              self.replica_id]), \
+          ops.device(self.devices[self.replica_id]), \
           ops.name_scope(self._name_scope), \
           variable_scope.variable_scope(
               self._var_scope, reuse=self.replica_id > 0), \
diff --git a/tensorflow/python/distribute/mirrored_strategy_test.py b/tensorflow/python/distribute/mirrored_strategy_test.py
index fb5525af50a..04341362b37 100644
--- a/tensorflow/python/distribute/mirrored_strategy_test.py
+++ b/tensorflow/python/distribute/mirrored_strategy_test.py
@@ -711,13 +711,21 @@ class MirroredVariableUpdateTest(test.TestCase):
       mirrored_var_result = self.evaluate(
           mirrored_var.assign_add(6.0, read_value=True))
       self.assertEqual(7.0, mirrored_var_result)
-      self.assertEqual(7.0, self.evaluate(mirrored_var.get("/device:CPU:0")))
-      self.assertEqual(7.0, self.evaluate(mirrored_var.get("/device:GPU:0")))
+      self.assertEqual(7.0, self.evaluate(mirrored_var.values[0]))
+      self.assertEqual(7.0, self.evaluate(mirrored_var.values[1]))
+      self.assertEqual(
+          distribution.extended.worker_devices[0], mirrored_var.devices[0])
+      self.assertEqual(
+          distribution.extended.worker_devices[1], mirrored_var.devices[1])
 
       # read_value == False
       self.evaluate(mirrored_var.assign_add(2.0, read_value=False))
-      self.assertEqual(9.0, self.evaluate(mirrored_var.get("/device:CPU:0")))
-      self.assertEqual(9.0, self.evaluate(mirrored_var.get("/device:GPU:0")))
+      self.assertEqual(9.0, self.evaluate(mirrored_var.values[0]))
+      self.assertEqual(9.0, self.evaluate(mirrored_var.values[1]))
+      self.assertEqual(
+          distribution.extended.worker_devices[0], mirrored_var.devices[0])
+      self.assertEqual(
+          distribution.extended.worker_devices[1], mirrored_var.devices[1])
 
   def testAssignAddMirroredVarReplicaContext(self, distribution):
     def var_fn():
@@ -769,8 +777,12 @@ class MirroredVariableUpdateTest(test.TestCase):
       self.assertEqual(5.0, self.evaluate(mirrored_var))
       mirrored_var_result = self.evaluate(mirrored_var.assign_sub(2.0))
       self.assertEqual(3.0, mirrored_var_result)
-      self.assertEqual(3.0, self.evaluate(mirrored_var.get("/device:GPU:0")))
-      self.assertEqual(3.0, self.evaluate(mirrored_var.get("/device:CPU:0")))
+      self.assertEqual(3.0, self.evaluate(mirrored_var.values[0]))
+      self.assertEqual(3.0, self.evaluate(mirrored_var.values[1]))
+      self.assertEqual(
+          distribution.extended.worker_devices[0], mirrored_var.devices[0])
+      self.assertEqual(
+          distribution.extended.worker_devices[1], mirrored_var.devices[1])
 
   def testAssignSubMirroredVarReplicaContext(self, distribution):
     def var_fn():
@@ -981,8 +993,8 @@ class MirroredStrategyDefunTest(test.TestCase):
         per_replica_graph_functions = (
             distribution.extended.call_for_each_replica(
                 defun.get_concrete_function, args=[mock_model] + inputs))
-        for device in devices:
-          graph_function = per_replica_graph_functions.get(device=device)
+        for i in range(len(devices)):
+          graph_function = per_replica_graph_functions.values[i]
           # TODO(b/129555712): re-enable an assertion here that the two sets of
           # variables are the same.
           # self.assertEqual(set(graph_function.graph.variables),
@@ -1053,9 +1065,8 @@ class MirroredStrategyDefunTest(test.TestCase):
     def fn1(mock_model, factor):
       return mock_model(factor)
 
-    device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
-    factors = values.PerReplica(device_map, (5.0, 3.0))
-    expected_result = values.PerReplica(device_map, (5.0 * 1.25, 3.0 * 1.25))
+    factors = values.PerReplica((5.0, 3.0))
+    expected_result = values.PerReplica((5.0 * 1.25, 3.0 * 1.25))
     self._call_and_check(distribution, fn1, [factors], expected_result, [fn1])
 
   def testTrain(self, distribution):
diff --git a/tensorflow/python/distribute/mirrored_variable_test.py b/tensorflow/python/distribute/mirrored_variable_test.py
index f237ee19205..3cc75451827 100644
--- a/tensorflow/python/distribute/mirrored_variable_test.py
+++ b/tensorflow/python/distribute/mirrored_variable_test.py
@@ -87,9 +87,9 @@ class MirroredVariableCreationTest(test.TestCase):
     self.assertIsInstance(var, values.MirroredVariable)
     self.assertEqual(name, var.name)
     self.assertIs(strategy, var.distribute_strategy)
-    for d in var.devices:
-      self.assertEqual(d, var.get(d).device)
-      self.assertIs(strategy, var.get(d)._distribute_strategy)  # pylint: disable=protected-access
+    for i, d in enumerate(var.devices):
+      self.assertEqual(d, var.values[i].device)
+      self.assertIs(strategy, var.values[i]._distribute_strategy)  # pylint: disable=protected-access
 
   def testVariableInFuncGraph(self, distribution):
 
@@ -323,16 +323,15 @@ class MirroredVariableCreationTest(test.TestCase):
           aggregation=aggregation)
       return v0, v1
 
-    devices = distribution.extended.worker_devices
     with distribution.scope():
       v0, v1 = distribution.extended.call_for_each_replica(create_fn)
       self.evaluate(v0.initializer)
-      self.assertEqual(2.0, self.evaluate(v0.get(devices[0])))
-      self.assertEqual(2.0, self.evaluate(v0.get(devices[1])))
+      self.assertEqual(2.0, self.evaluate(v0.values[0]))
+      self.assertEqual(2.0, self.evaluate(v0.values[1]))
       self.assertEqual(2.0, self.evaluate(distribution.extended.read_var(v0)))
       self.evaluate(v1.initializer)
-      self.assertEqual(3.0, self.evaluate(v1.get(devices[0])))
-      self.assertEqual(3.0, self.evaluate(v1.get(devices[1])))
+      self.assertEqual(3.0, self.evaluate(v1.values[0]))
+      self.assertEqual(3.0, self.evaluate(v1.values[1]))
       self.assertEqual(3.0, self.evaluate(distribution.extended.read_var(v1)))
 
       def replica_id_plus_one():
@@ -349,20 +348,20 @@ class MirroredVariableCreationTest(test.TestCase):
 
       # Update "sync on read" variable.
       self.evaluate(distribution.group(update0a))
-      self.assertEqual(2.0 + 5.0, self.evaluate(v0.get(devices[0])))
+      self.assertEqual(2.0 + 5.0, self.evaluate(v0.values[0]))
       # Writes are not synchronized for "sync on read" variables,
       # so device[1] can end up with a different value.
-      self.assertEqual(2.0 + 2 * 5.0, self.evaluate(v0.get(devices[1])))
+      self.assertEqual(2.0 + 2 * 5.0, self.evaluate(v0.values[1]))
       # Always reads from device 0.
       self.assertEqual(2.0 + 5.0,
                        self.evaluate(distribution.extended.read_var(v0)))
 
       # Update "sync on write" variable.
       self.evaluate(distribution.group(update1a))
-      self.assertEqual(3.0 + 7.0, self.evaluate(v1.get(devices[0])))
+      self.assertEqual(3.0 + 7.0, self.evaluate(v1.values[0]))
       # Writes are synchronized for v1, only the argument to assign_add on
       # device[0] is used.
-      self.assertEqual(3.0 + 7.0, self.evaluate(v1.get(devices[1])))
+      self.assertEqual(3.0 + 7.0, self.evaluate(v1.values[1]))
       self.assertEqual(3.0 + 7.0,
                        self.evaluate(distribution.extended.read_var(v1)))
 
@@ -377,16 +376,15 @@ class MirroredVariableCreationTest(test.TestCase):
       self.evaluate(distribution.group(update0b))
 
       # Update "sync on read" variable.
-      self.assertEqual(2.0 + 5.0 + 11.0, self.evaluate(v0.get(devices[0])))
-      self.assertEqual(2.0 + 2 * 5.0 + 2 * 11.0,
-                       self.evaluate(v0.get(devices[1])))
+      self.assertEqual(2.0 + 5.0 + 11.0, self.evaluate(v0.values[0]))
+      self.assertEqual(2.0 + 2 * 5.0 + 2 * 11.0, self.evaluate(v0.values[1]))
       self.assertEqual(2.0 + 5.0 + 11.0,
                        self.evaluate(distribution.extended.read_var(v0)))
 
       # Update "sync on write" variable.
       self.evaluate(distribution.group(update1b))
-      self.assertEqual(3.0 + 7.0 + 13.0, self.evaluate(v1.get(devices[0])))
-      self.assertEqual(3.0 + 7.0 + 13.0, self.evaluate(v1.get(devices[1])))
+      self.assertEqual(3.0 + 7.0 + 13.0, self.evaluate(v1.values[0]))
+      self.assertEqual(3.0 + 7.0 + 13.0, self.evaluate(v1.values[1]))
       self.assertEqual(3.0 + 7.0 + 13.0,
                        self.evaluate(distribution.extended.read_var(v1)))
 
@@ -448,8 +446,7 @@ class MirroredVariableCreationTest(test.TestCase):
       return v
 
     with distribution.scope():
-      device_map = values.ReplicaDeviceMap(distribution.extended.worker_devices)
-      names = values.DistributedValues(device_map, ("foo", "bar"))
+      names = values.DistributedValues(("foo", "bar"))
       with self.assertRaises(RuntimeError):
         _ = distribution.extended.call_for_each_replica(model_fn, args=(names,))
 
@@ -512,10 +509,10 @@ class MirroredVariableCreationTest(test.TestCase):
       ])
       expected_sum = 0.0
       expected_mean = 0.0
-      for i, d in enumerate(distribution.extended.worker_devices):
+      for i, _ in enumerate(distribution.extended.worker_devices):
         # Should see different values on different devices.
-        v_sum_value = self.evaluate(ret_v_sum.get(d).read_value())
-        v_mean_value = self.evaluate(ret_v_mean.get(d).read_value())
+        v_sum_value = self.evaluate(ret_v_sum.values[i].read_value())
+        v_mean_value = self.evaluate(ret_v_mean.values[i].read_value())
         expected = i + 3.0
         self.assertEqual(expected, v_sum_value)
         expected_sum += expected
@@ -578,11 +575,7 @@ class MirroredVariableCreationTest(test.TestCase):
       self.evaluate(variables.global_variables_initializer())
       # Assert that the aggregated value of the sync on read var is the sum
       # of the individual values before running the update ops.
-      self.assertEqual(
-          1.0,
-          self.evaluate(
-              ret_v_sum.get(
-                  distribution.extended.worker_devices[0]).read_value()))
+      self.assertEqual(1.0, self.evaluate(ret_v_sum.values[0].read_value()))
       self.assertEqual(2.0, self.evaluate(ret_v_sum))
 
       # Apply updates.
@@ -591,11 +584,7 @@ class MirroredVariableCreationTest(test.TestCase):
       self.evaluate(update_ops)
       # Assert that the aggregated value of the sync on read vars is the sum
       # of the individual values after running the update ops.
-      self.assertEqual(
-          5.0,
-          self.evaluate(
-              ret_v_sum.get(
-                  distribution.extended.worker_devices[0]).read_value()))
+      self.assertEqual(5.0, self.evaluate(ret_v_sum.values[0].read_value()))
       self.assertEqual(10.0, self.evaluate(ret_v_sum))
 
   def testVarDistributeStrategy(self, distribution):
diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py
index 00265e85f55..7f150d9c6a3 100644
--- a/tensorflow/python/distribute/multi_process_runner.py
+++ b/tensorflow/python/distribute/multi_process_runner.py
@@ -24,6 +24,7 @@ import json
 import os
 import signal
 import sys
+import threading
 import time
 
 import six
@@ -60,6 +61,10 @@ STD_STREAM_QUEUE = 'std_stream_queue'
 # Inter-process queue is used for communications between subprocesses.
 INTER_PROCESS_QUEUE = 'inter_process_queue'
 
+# Parent-to-sub queue is used for communications from parent to subprocess.
+# Currently this is only used to terminate subprocesses.
+PARENT_TO_SUB_QUEUE = 'parent_to_sub_queue'
+
 
 class _LogCollector(object):
   """Tool to collect logs before sending them to std stream."""
@@ -150,7 +155,7 @@ class MultiProcessRunner(object):
     self._grpc_fail_fast = grpc_fail_fast
     self._args = args or ()
     self._kwargs = kwargs or {}
-    self._processes = []
+    self._outstanding_subprocess_count = 0
 
     # Child processes should have the same v2 and eager behavior.
     self._v2_enabled = tf2.enabled()
@@ -200,16 +205,33 @@ class MultiProcessRunner(object):
       self._add_std_stream_data_flattened(stderr_collector.log)
     self._get_process_status_queue().put(process_status_info)
 
-  def _proc_func_wrapper(self, task_type, task_id, *arg, **kwargs):
+  def _message_checking_func(self, task_type, task_id, stdout_collector,
+                             stderr_collector):
+    """A function that regularly checks messages from parent process."""
+    while True:
+      try:
+        message = self._get_parent_to_sub_queue().get(block=False)
+        # Currently the only possible message is termination.
+        assert message.startswith('terminate')
+        if message == 'terminate {} {}'.format(task_type, task_id):
+          break
+        else:
+          # If the message is not targeting this process, put it back to the
+          # queue.
+          self._get_parent_to_sub_queue().put(message)
+          time.sleep(1)
+      except Queue.Empty:
+        time.sleep(0.1)
+    self._finish_process(
+        _ProcessStatusInfo(
+            task_type=task_type, is_successful=True, exc_info=None), None,
+        stdout_collector, stderr_collector)
+    # `os._exit(0)` is used to more reliably terminate a subprocess.
+    os._exit(0)  # pylint: disable=protected-access
+
+  def _proc_func_wrapper(self, proc_func, task_type, task_id,
+                         per_process_cluster_spec, *arg, **kwargs):
     """The wrapper function that actually gets run in child process(es)."""
-    os.environ['GRPC_FAIL_FAST'] = str(self._grpc_fail_fast)
-    os.environ['TF_CONFIG'] = json.dumps({
-        'cluster': self._cluster_spec,
-        'task': {
-            'type': task_type,
-            'index': task_id,
-        }
-    })
 
     if self._capture_std_stream:
       # TODO(yuefengz): consider a lighter way of capturing std streams.
@@ -221,6 +243,20 @@ class MultiProcessRunner(object):
       stdout_collector = None
       stderr_collector = None
 
+    # The thread will be dedicated to checking messages from parent process.
+    threading.Thread(
+        target=self._message_checking_func,
+        args=(task_type, task_id, stdout_collector, stderr_collector)).start()
+
+    os.environ['GRPC_FAIL_FAST'] = str(self._grpc_fail_fast)
+    os.environ['TF_CONFIG'] = json.dumps({
+        'cluster': per_process_cluster_spec,
+        'task': {
+            'type': task_type,
+            'index': task_id,
+        }
+    })
+
     if self._v2_enabled:
       v2_compat.enable_v2_behavior()
 
@@ -236,6 +272,7 @@ class MultiProcessRunner(object):
             _ProcessStatusInfo(
                 task_type=task_type, is_successful=True, exc_info=None), None,
             stdout_collector, stderr_collector)
+        # `os._exit(0)` is used to more reliably terminate a subprocess.
         os._exit(0)  # pylint: disable=protected-access
 
       signal.signal(signal.SIGALRM, handler)
@@ -243,7 +280,7 @@ class MultiProcessRunner(object):
 
     try:
       with self._runtime_mode():
-        return_value = self._proc_func(*arg, **kwargs)
+        return_value = proc_func(*arg, **kwargs)
     except Exception:  # pylint: disable=broad-except
       # Capture all exceptions to be reported to parent process.
       self._finish_process(
@@ -277,10 +314,48 @@ class MultiProcessRunner(object):
       for task_id, _ in enumerate(addresses):
         p = multi_process_lib.Process(
             target=self._proc_func_wrapper,
-            args=(task_type, task_id) + self._args,
+            args=(self._proc_func, task_type, task_id, self._cluster_spec) +
+            self._args,
             kwargs=self._kwargs)
         p.start()
-        self._processes.append(p)
+        self._outstanding_subprocess_count += 1
+
+  def start_single_process(self,
+                           task_type,
+                           task_id,
+                           proc_func=None,
+                           updated_cluster_spec=None,
+                           args=None,
+                           kwargs=None):
+    """Starts a single process.
+
+    This starts a process in the cluster with the task type, task id, and the
+    process function (`proc_func`). If process function is `None`, the function
+    provided at `__init__` will be used. If `updated_cluster_spec` is not
+    `None`, the cluster spec used by this subprocess will be updated.
+
+    TODO(rchao): It is meant that all subprocesses will be updated with the new
+    cluster spec, but this has yet to be implemented. At this time only the
+    newly started subprocess picks up this updated cluster spec.
+
+    Args:
+      task_type: The task type.
+      task_id: The task id.
+      proc_func: The process function to be run on the newly started
+        process. If `None`, the function provided at `__init__` will be used.
+      updated_cluster_spec: If not `None`, the cluster spec used by this
+        subprocess will be updated.
+      args: Optional positional arguments to be supplied in `proc_func`.
+      kwargs: Optional keyword arguments to be supplied in `proc_func`.
+    """
+    self._cluster_spec = updated_cluster_spec or self._cluster_spec
+    proc_func = proc_func or self._proc_func
+    p = multi_process_lib.Process(
+        target=self._proc_func_wrapper,
+        args=(proc_func, task_type, task_id, self._cluster_spec) + (args or ()),
+        kwargs=(kwargs or {}))
+    p.start()
+    self._outstanding_subprocess_count += 1
 
   def _queue_to_list(self, queue_to_convert):
     """Convert `queue.Queue` to `list`."""
@@ -316,9 +391,8 @@ class MultiProcessRunner(object):
         timeout = self._max_run_time + 10  # add 10 seconds grace period
       else:
         timeout = float('inf')
-    num_returned = 0
     start_time = time.time()
-    while num_returned < len(self._processes):
+    while self._outstanding_subprocess_count > 0:
       while True:
         try:
           process_status = self._get_process_status_queue().get(timeout=10)
@@ -329,16 +403,14 @@ class MultiProcessRunner(object):
             raise RuntimeError(
                 'One or more subprocesses timed out. Please use '
                 '`--test_arg=--logtostderr` bazel flag to inspect logs for '
-                'subprocess debugging info. Number of returned processes is '
-                '%d.' % num_returned)
+                'subprocess debugging info. Number of outstanding subprocesses '
+                'is %d.' % self._outstanding_subprocess_count)
 
-      num_returned += 1
+      self._outstanding_subprocess_count -= 1
       assert isinstance(process_status, _ProcessStatusInfo)
       if not process_status.is_successful:
         six.reraise(*process_status.exc_info)
 
-    self._processes = []
-
     if self._capture_std_stream:
       # TODO(yuefengz): we need to make sure elements match the same process in
       # the two returned lists so as to not surprise users. Consider creating a
@@ -350,6 +422,11 @@ class MultiProcessRunner(object):
       return (self._queue_to_list(
           multi_process_lib.get_user_data()[RETURN_VALUE_QUEUE]), None)
 
+  def terminate(self, task_type, task_id):
+    """Terminates the process with `task_type` and `task_id`."""
+    self._get_parent_to_sub_queue().put('terminate {} {}'.format(
+        task_type, task_id))
+
   def _add_return_data(self, data):
     """Adds return data that will be returned by `join`.
 
@@ -379,6 +456,9 @@ class MultiProcessRunner(object):
   def _get_inter_process_queue(self):
     return multi_process_lib.get_user_data()[INTER_PROCESS_QUEUE]
 
+  def _get_parent_to_sub_queue(self):
+    return multi_process_lib.get_user_data()[PARENT_TO_SUB_QUEUE]
+
 
 def run(proc_func,
         cluster_spec,
diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py
index 4144eb6f040..839646a5d1f 100644
--- a/tensorflow/python/distribute/multi_process_runner_test.py
+++ b/tensorflow/python/distribute/multi_process_runner_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import json
+import os
 import time
 
 from six.moves import queue as Queue
@@ -50,6 +52,10 @@ def proc_func_that_return_args_and_kwargs(*args, **kwargs):
 
 class MultiProcessRunnerTest(test.TestCase):
 
+  def _worker_idx(self):
+    config_task = json.loads(os.environ['TF_CONFIG'])['task']
+    return config_task['index']
+
   def test_multi_process_runner(self):
     returned_data, _ = multi_process_runner.run(
         proc_func_that_adds_task_type_in_return_data,
@@ -140,6 +146,55 @@ class MultiProcessRunnerTest(test.TestCase):
       # queue, so verifying it's empty.
       mpr._get_process_status_queue().get(block=False)
 
+  def test_termination(self):
+
+    def proc_func():
+      for i in range(0, 10):
+        print('index {}, iteration {}'.format(self._worker_idx(), i))
+        time.sleep(1)
+
+    mpr = multi_process_runner.MultiProcessRunner(
+        proc_func,
+        multi_worker_test_base.create_cluster_spec(num_workers=2),
+        capture_std_stream=True)
+    mpr.start()
+    time.sleep(5)
+    mpr.terminate('worker', 0)
+    std_stream_result = mpr.join()[1]
+
+    # Worker 0 is terminated in the middle, so it should not have iteration 9
+    # printed.
+    self.assertIn('index 0, iteration 0', std_stream_result)
+    self.assertNotIn('index 0, iteration 9', std_stream_result)
+    self.assertIn('index 1, iteration 0', std_stream_result)
+    self.assertIn('index 1, iteration 9', std_stream_result)
+
+  def test_termination_and_start_single_process(self):
+
+    def proc_func():
+      for i in range(0, 10):
+        print('index {}, iteration {}'.format(self._worker_idx(), i))
+        time.sleep(1)
+
+    mpr = multi_process_runner.MultiProcessRunner(
+        proc_func,
+        multi_worker_test_base.create_cluster_spec(num_workers=2),
+        capture_std_stream=True)
+    mpr.start()
+    time.sleep(5)
+    mpr.terminate('worker', 0)
+    mpr.start_single_process('worker', 0)
+    std_stream_result = mpr.join()[1]
+
+    # Worker 0 is terminated in the middle, but a new worker 0 is added, so it
+    # should still have iteration 9 printed. Moreover, iteration 0 of worker 0
+    # should happen twice.
+    self.assertLen(
+        [s for s in std_stream_result if s == 'index 0, iteration 0'], 2)
+    self.assertIn('index 0, iteration 9', std_stream_result)
+    self.assertIn('index 1, iteration 0', std_stream_result)
+    self.assertIn('index 1, iteration 9', std_stream_result)
+
 
 if __name__ == '__main__':
   multi_process_runner.test_main()
diff --git a/tensorflow/python/distribute/multi_worker_continuous_run_test.py b/tensorflow/python/distribute/multi_worker_continuous_run_test.py
index cca0ef91a5a..9e406e846b8 100644
--- a/tensorflow/python/distribute/multi_worker_continuous_run_test.py
+++ b/tensorflow/python/distribute/multi_worker_continuous_run_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import json
 import os
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/distribute/multi_worker_test_base.py b/tensorflow/python/distribute/multi_worker_test_base.py
index 863a7d7b652..184baf7a1dc 100644
--- a/tensorflow/python/distribute/multi_worker_test_base.py
+++ b/tensorflow/python/distribute/multi_worker_test_base.py
@@ -26,6 +26,7 @@ import subprocess
 import sys
 import threading
 import unittest
+
 import six
 
 _portpicker_import_error = None
diff --git a/tensorflow/python/distribute/one_device_strategy.py b/tensorflow/python/distribute/one_device_strategy.py
index 7963a23c20f..144ce6a8fce 100644
--- a/tensorflow/python/distribute/one_device_strategy.py
+++ b/tensorflow/python/distribute/one_device_strategy.py
@@ -251,9 +251,7 @@ class OneDeviceExtended(distribute_lib.StrategyExtendedV1):
     suffix_loc = self._device.rfind("/")
     self._input_device = self._device[:suffix_loc] + "/device:CPU:0"
     worker_device_pairs = [(self._input_device, [self._device])]
-    device_map = values.SingleDeviceMap(self._device)
-    self._input_workers = input_lib.InputWorkers(
-        device_map, worker_device_pairs)
+    self._input_workers = input_lib.InputWorkers(worker_device_pairs)
 
   def _create_variable(self, next_creator, *args, **kwargs):
     colocate_with = kwargs.pop("colocate_with", None)
diff --git a/tensorflow/python/distribute/parameter_server_strategy.py b/tensorflow/python/distribute/parameter_server_strategy.py
index 1815fc2a669..d5305ed910a 100644
--- a/tensorflow/python/distribute/parameter_server_strategy.py
+++ b/tensorflow/python/distribute/parameter_server_strategy.py
@@ -213,9 +213,10 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
     else:
       compute_devices = (worker_device,)
 
-    self._device_map = values.ReplicaDeviceMap(compute_devices)
+    self._compute_devices = [
+        device_util.canonicalize(d) for d in compute_devices]
     self._input_workers = input_lib.InputWorkers(
-        self._device_map, [(worker_device, compute_devices)])
+        [(worker_device, compute_devices)])
 
     # In distributed mode, place variables on ps jobs in a round-robin fashion.
     # Note that devices returned from `replica_device_setter` are not
@@ -253,9 +254,9 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
     logging.info(
         "Multi-worker ParameterServerStrategy with "
         "cluster_spec = %r, task_type = %r, task_id = %r, "
-        "num_ps_replicas = %r, is_chief = %r, device_map = %r, "
+        "num_ps_replicas = %r, is_chief = %r, compute_devices = %r, "
         "variable_device = %r", cluster_spec.as_dict(), task_type, task_id,
-        num_ps_replicas, self._is_chief, self._device_map,
+        num_ps_replicas, self._is_chief, self._compute_devices,
         self._variable_device)
 
   # TODO(yuefengz): get rid of cluster_resolver argument when contrib's
@@ -279,6 +280,8 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
 
       compute_devices = device_util.local_devices_from_num_gpus(num_gpus)
 
+    compute_devices = [device_util.canonicalize(d) for d in compute_devices]
+
     if parameter_device is None:
       # If there is only one GPU, put everything on that GPU. Otherwise, place
       # variables on CPU.
@@ -287,11 +290,11 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
       else:
         parameter_device = _LOCAL_CPU
 
-    self._device_map = values.ReplicaDeviceMap(compute_devices)
     self._input_workers = input_lib.InputWorkers(
-        self._device_map, [(worker_device, compute_devices)])
+        [(worker_device, compute_devices)])
 
     self._variable_device = parameter_device
+    self._compute_devices = compute_devices
     self._parameter_devices = (parameter_device,)
     self._is_chief = True
     self._cluster_spec = None
@@ -376,8 +379,7 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
       return tensor
     if not cross_device_ops_lib.check_destinations(destinations):
       # TODO(josh11b): Use current logical device instead of 0 here.
-      destinations = values.LogicalDeviceSpec(
-          device_map=self._device_map, logical_device=0)
+      destinations = self._compute_devices
     return self._cross_device_ops.broadcast(tensor, destinations)
 
   def _allow_variable_partition(self):
@@ -449,7 +451,7 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
   def _call_for_each_replica(self, fn, args, kwargs):
     # pylint: disable=protected-access
     return mirrored_strategy._call_for_each_replica(
-        self._container_strategy(), self._device_map, fn, args, kwargs)
+        self._container_strategy(), self._compute_devices, fn, args, kwargs)
 
   def _verify_destinations_not_different_worker(self, destinations):
     if not self._cluster_spec:
@@ -468,7 +470,7 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
     if not isinstance(value, values.DistributedValues):
       # pylint: disable=protected-access
       return cross_device_ops_lib.reduce_non_distributed_value(
-          reduce_op, self._device_map, value, destinations)
+          reduce_op, value, destinations, self._num_replicas_in_sync)
     return self._cross_device_ops.reduce(
         reduce_op, value, destinations=destinations)
 
@@ -605,15 +607,15 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1):
 
   @property
   def _num_replicas_in_sync(self):
-    return self._device_map.num_replicas_in_graph
+    return len(self._compute_devices)
 
   @property
   def worker_devices(self):
-    return self._device_map.all_devices
+    return self._compute_devices
 
   @property
   def worker_devices_by_replica(self):
-    return self._device_map.devices_by_replica
+    return [[d] for d in self._compute_devices]
 
   @property
   def parameter_devices(self):
diff --git a/tensorflow/python/distribute/parameter_server_strategy_test.py b/tensorflow/python/distribute/parameter_server_strategy_test.py
index 66734ccb42c..1b3b26fbf8a 100644
--- a/tensorflow/python/distribute/parameter_server_strategy_test.py
+++ b/tensorflow/python/distribute/parameter_server_strategy_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import copy
 import threading
+
 from absl.testing import parameterized
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.data.ops import dataset_ops
diff --git a/tensorflow/python/distribute/saved_model_test_base.py b/tensorflow/python/distribute/saved_model_test_base.py
index 08156290755..a15c57a17e2 100644
--- a/tensorflow/python/distribute/saved_model_test_base.py
+++ b/tensorflow/python/distribute/saved_model_test_base.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/distribute/strategy_combinations.py b/tensorflow/python/distribute/strategy_combinations.py
index ae5c4a09ce4..95fc7b9df9f 100644
--- a/tensorflow/python/distribute/strategy_combinations.py
+++ b/tensorflow/python/distribute/strategy_combinations.py
@@ -40,6 +40,7 @@ from tensorflow.python.tpu import device_assignment as device_assignment_lib
 from tensorflow.python.tpu import tpu_strategy_util
 from tensorflow.python.training import adagrad
 from tensorflow.python.training import adam
+from tensorflow.python.training import ftrl
 from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import rmsprop
 
@@ -130,11 +131,16 @@ adagrad_optimizer_v1_fn = combinations.NamedObject(
     "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
 adam_optimizer_v1_fn = combinations.NamedObject(
     "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1))
+ftrl_optimizer_v1_fn = combinations.NamedObject(
+    "FtrlV1", lambda: ftrl.FtrlOptimizer(0.001))
 rmsprop_optimizer_v1_fn = combinations.NamedObject(
     "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001))
 
 # TODO(shiningsun): consider adding the other v1 optimizers
-optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn]
+optimizers_v1 = [
+    gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn,
+    ftrl_optimizer_v1_fn, rmsprop_optimizer_v1_fn
+]
 
 adadelta_optimizer_keras_v2_fn = combinations.NamedObject(
     "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001))
diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py
index 1cc1c329787..6f89ac668ab 100644
--- a/tensorflow/python/distribute/tpu_strategy.py
+++ b/tensorflow/python/distribute/tpu_strategy.py
@@ -25,7 +25,7 @@ import weakref
 
 import numpy as np
 
-from tensorflow.python.autograph.core import ag_ctx
+from tensorflow.python.autograph.core import ag_ctx as autograph_ctx
 from tensorflow.python.autograph.impl import api as autograph
 from tensorflow.python.distribute import cross_device_ops as cross_device_ops_lib
 from tensorflow.python.distribute import device_util
@@ -101,9 +101,11 @@ def validate_experimental_run_function(fn):
           fn, function.ConcreteFunction) and not (callable(fn) and isinstance(
               fn.__call__, def_function.Function)):
     raise NotImplementedError(
-        "TPUStrategy.experimental_run_v2(fn, ...) does not support eager "
-        "execution. Either convert `fn` into a tf.function or consider "
-        "calling strategy.experimental_run_v2 inside a tf.function.")
+        "TPUStrategy.experimental_run_v2(fn, ...) does not support pure eager "
+        "execution. please make sure the function passed into "
+        "`strategy.experimental_run_v2` is a `tf.function` or "
+        "`strategy.experimental_run_v2` is called inside a `tf.function` if "
+        "eager behavior is enabled.")
 
 
 @tf_export("distribute.experimental.TPUStrategy", v1=[])
@@ -134,8 +136,8 @@ class TPUStrategy(distribute_lib.Strategy):
     training loop by calling `strategy.experimental_run_v2` directly. Note that
     TPUStrategy doesn't support pure eager execution, so please make sure the
     function passed into `strategy.experimental_run_v2` is a `tf.function` or
-    `strategy.experimental_run_v2` us called inside a `tf.function` if running
-    in eager mode.
+    `strategy.experimental_run_v2` is called inside a `tf.function` if eager
+    behavior is enabled.
 
     Args:
       tpu_cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
@@ -161,7 +163,7 @@ class TPUStrategy(distribute_lib.Strategy):
 
     # Note: the target function is converted to graph even when in Eager mode,
     # so autograph is on by default here.
-    fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx())
+    fn = autograph.tf_convert(fn, autograph_ctx.control_status_ctx())
     return self.extended.tpu_run(fn, args, kwargs)
 
 
@@ -207,7 +209,7 @@ class TPUStrategyV1(distribute_lib.StrategyV1):
     """See base class."""
     validate_experimental_run_function(fn)
 
-    fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx())
+    fn = autograph.tf_convert(fn, autograph_ctx.control_status_ctx())
     return self.extended.tpu_run(fn, args, kwargs)
 
 
@@ -235,27 +237,40 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
     self._tpu_metadata = get_tpu_system_metadata(self._tpu_cluster_resolver)
     self._device_assignment = device_assignment
 
-    self._tpu_devices = [d.name for d in self._tpu_metadata.devices
-                         if "device:TPU:" in d.name]
+    tpu_devices_flat = [
+        d.name for d in self._tpu_metadata.devices if "device:TPU:" in d.name]
 
-    # Only create variables for the number of replicas we're running.
-    if device_assignment is not None:
-      job_name = device_spec.DeviceSpecV2.from_string(self._tpu_devices[0]).job
+    # `self._tpu_devices` is a two-dimensional NumPy array of strings. It is
+    # indexed using `[replica_id][logical_device_id]`.
+    if device_assignment is None:
+      self._tpu_devices = np.array(
+          [[d] for d in tpu_devices_flat], dtype=object)
+    else:
+      job_name = device_spec.DeviceSpecV2.from_string(tpu_devices_flat[0]).job
 
-      self._tpu_devices = []
+      tpu_devices = []
       for replica_id in range(device_assignment.num_replicas):
-        tpu_device = device_assignment.tpu_device(
-            replica=replica_id, logical_core=0, job=job_name)
-        tpu_device = device_util.canonicalize(tpu_device)
-        self._tpu_devices.append(tpu_device)
+        replica_devices = []
 
-    self._host_device = device_util.get_host_for_device(self._tpu_devices[0])
+        for logical_core in range(device_assignment.num_cores_per_replica):
+          replica_devices.append(
+              device_util.canonicalize(
+                  device_assignment.tpu_device(
+                      replica=replica_id,
+                      logical_core=logical_core,
+                      job=job_name)))
 
-    self._device_map = values.ReplicaDeviceMap(self._tpu_devices)
+        tpu_devices.append(replica_devices)
+      self._tpu_devices = np.array(tpu_devices, dtype=object)
 
-    # Preload the data onto the TPUs.
+    self._host_device = device_util.get_host_for_device(self._tpu_devices[0][0])
+
+    # Preload the data onto the TPUs. Currently we always preload onto logical
+    # device 0 for each replica.
+    # TODO(cjfj): Create `InputWorkers` lazily, allowing users to place the
+    # input onto a different logical device?
     input_worker_devices = collections.OrderedDict()
-    for tpu_device in self._tpu_devices:
+    for tpu_device in self._tpu_devices[:, 0]:
       host_device = device_util.get_host_for_device(tpu_device)
       input_worker_devices.setdefault(host_device, [])
       input_worker_devices[host_device].append(tpu_device)
@@ -275,6 +290,8 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
     self.experimental_enable_dynamic_batch_size = True
     self._prefetch_on_host = False
 
+    self._logical_device_stack = [0]
+
   # TODO(bfontain): Remove once a proper dataset API exists for prefetching
   # a dataset to multiple devices exists.
   # If value is true, this forces prefetch of data to the host's memeory rather
@@ -304,7 +321,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
   def _input_workers(self):
     if self._input_workers_obj is None:
       self._input_workers_obj = input_lib.InputWorkers(
-          self._device_map, self._input_worker_devices)
+          self._input_worker_devices)
     return self._input_workers_obj
 
   def _validate_colocate_with_variable(self, colocate_with_variable):
@@ -463,6 +480,26 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
     with _TPUReplicaContext(self._container_strategy()):
       return fn(*args, **kwargs)
 
+  @contextlib.contextmanager
+  def experimental_logical_device(self, logical_device_id):
+    """Places variables and ops on the specified logical device."""
+    num_logical_devices_per_replica = self._tpu_devices.shape[1]
+    if logical_device_id >= num_logical_devices_per_replica:
+      raise ValueError(
+          "`logical_device_id` not in range (was {}, but there are only {} "
+          "logical devices per replica).".format(
+              logical_device_id, num_logical_devices_per_replica))
+
+    self._logical_device_stack.append(logical_device_id)
+    try:
+      if values._enclosing_tpu_context() is None:  # pylint: disable=protected-access
+        yield
+      else:
+        with ops.device(tpu.core(logical_device_id)):
+          yield
+    finally:
+      self._logical_device_stack.pop()
+
   def _experimental_initialize_system(self):
     """Experimental method added to be used by Estimator.
 
@@ -478,16 +515,14 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
 
     colocate_with = kwargs.pop("colocate_with", None)
     if colocate_with is None:
-      device_map = self._device_map
-      logical_device = 0  # TODO(josh11b): Get logical device from scope here.
+      devices = self._tpu_devices[:, self._logical_device_stack[-1]]
     elif isinstance(colocate_with, numpy_dataset.SingleDevice):
       with ops.device(colocate_with.device):
         return next_creator(*args, **kwargs)
     else:
-      device_map = colocate_with.device_map
-      logical_device = colocate_with.logical_device
+      devices = colocate_with.devices
 
-    def _real_mirrored_creator(devices, *args, **kwargs):  # pylint: disable=g-missing-docstring
+    def _real_mirrored_creator(*args, **kwargs):  # pylint: disable=g-missing-docstring
       initial_value = None
       value_list = []
       for i, d in enumerate(devices):
@@ -517,9 +552,11 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
       return value_list
 
     return values.create_mirrored_variable(
-        self._container_strategy(), device_map, logical_device,
-        _real_mirrored_creator, values.TPUMirroredVariable,
-        values.TPUSyncOnReadVariable, *args, **kwargs)
+        self._container_strategy(),
+        _real_mirrored_creator,
+        values.TPUMirroredVariable,
+        values.TPUSyncOnReadVariable,
+        *args, **kwargs)
 
   def _reduce_to(self, reduce_op, value, destinations):
     if values._enclosing_tpu_context() is not None:  # pylint: disable=protected-access
@@ -537,7 +574,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
       # replicas in which case `value` would be a single value or value could
       # be 0.
       return cross_device_ops_lib.reduce_non_distributed_value(
-          reduce_op, self._device_map, value, destinations)
+          reduce_op, value, destinations, self._num_replicas_in_sync)
 
     # TODO(cjfj): Detect when it is possible to use `cross_replica_sum`.
     # Always performs the reduction on the TPU host.
@@ -573,14 +610,16 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
     # Otherwise, we revert to MirroredStrategy behavior and update each variable
     # directly.
     updates = []
-    for i, (d, v) in enumerate(zip(var.devices, var.values)):
+    for i, v in enumerate(var.values):
       name = "update_%d" % i
-      with ops.device(d), distribute_lib.UpdateContext(i), ops.name_scope(name):
+      with ops.device(v.device), \
+           distribute_lib.UpdateContext(i), \
+           ops.name_scope(name):
         # If args and kwargs are not mirrored, the value is returned as is.
         updates.append(fn(v,
-                          *values.select_device_mirrored(d, args),
-                          **values.select_device_mirrored(d, kwargs)))
-    return values.update_regroup(self, self._device_map, updates, group)
+                          *values.select_replica_mirrored(i, args),
+                          **values.select_replica_mirrored(i, kwargs)))
+    return values.update_regroup(self, updates, group)
 
   def read_var(self, var):
     assert isinstance(var, values.TPUVariableMixin) or isinstance(
@@ -663,11 +702,11 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
 
   @property
   def worker_devices(self):
-    return self._tpu_devices
+    return tuple(self._tpu_devices[:, self._logical_device_stack[-1]])
 
   @property
   def parameter_devices(self):
-    return self._tpu_devices
+    return self.worker_devices
 
   def non_slot_devices(self, var_list):
     return self._host_device
@@ -778,7 +817,8 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
       # Remove all no ops that may have been added during 'tpu.replicate()'
       if isinstance(result[0], list):
         result[0] = [
-            output for output in result[0] if tensor_util.is_tensor(output)
+            output for output in result[0] if not isinstance(
+                output, ops.Operation)
         ]
 
       # Workaround for `tpu.replicate` behaviour when single `Tensor` returned.
@@ -789,8 +829,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
             nest.pack_sequence_as(result[0], nest.flatten(replica_output))
             for replica_output in replicate_outputs
         ]
-      device_map = self._device_map  # pylint: disable=protected-access
-      return values.regroup(device_map, replicate_outputs)
+      return values.regroup(replicate_outputs)
 
     if context.executing_eagerly():
       tpu_function = def_function.function(tpu_function)
@@ -831,6 +870,10 @@ class _TPUReplicaContext(distribute_lib.ReplicaContext):
     else:
       return (ds.extended.worker_devices[replica_id],)
 
+  def experimental_logical_device(self, logical_device_id):
+    """Places variables and ops on the specified logical device."""
+    return self.strategy.extended.experimental_logical_device(logical_device_id)
+
 
 def _set_last_step_outputs(ctx, last_step_tensor_outputs):
   """Sets the last step outputs on the given context."""
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index 0c2a9ccdaac..df232545cfa 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 import collections
 import contextlib
 import weakref
-import six
 
 from tensorflow.python.distribute import device_util
 from tensorflow.python.distribute import distribute_lib
@@ -44,325 +43,76 @@ from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 
 
-def _devices_match(d1, d2):
-  return device_util.canonicalize(d1) == device_util.canonicalize(d2)
-
-
-class DeviceMap(object):
-  """A mapping of replicas & logical device ids to devices."""
-
-  @property
-  def all_devices(self):
-    """Returns a tuple of strings with all devices in this DeviceMap."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  @property
-  def devices_by_replica(self):
-    """Returns a tuple `t` where `t[replica]` is the devices for `replica`."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  @property
-  def num_logical_devices(self):
-    """Count of the number of devices each replica may be defined across."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  @property
-  def num_replicas_in_graph(self):
-    """Number of replicas defined in this graph."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  def logical_device_from_values(self, values):
-    """Returns the logical device index `values` is on."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  def logical_to_actual_devices(self, logical_device_id):
-    """Returns sequence of `num_replicas_in_graph` devices."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  def select_for_current_replica(self, values, replica_context):
-    """Select the element of `values` for the current replica."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  def replica_for_device(self, device):
-    """Return the replica id containing `device`."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  def select_for_device(self, values, device):
-    """Select the element of `values` to access from `device`."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-  def is_device_in_replica(self, device, replica_id):
-    """Returns whether `device` is a member of replica `replica_id`."""
-    raise NotImplementedError("Required for DeviceMap implementations.")
-
-
-class SingleDeviceMap(DeviceMap):
-  """A device map for 1 non-computation device.
-
-  Use `SingleDeviceMap` when the device does not correspond to some replica of
-  the computation. For computation devices, use `ReplicaDeviceMap` below (even
-  if there is only a single device in the map).
-  """
-
-  def __init__(self, device):
-    """Initialize a `SingleDeviceMap`.
-
-    Args:
-      device: A string device.
-    """
-    assert isinstance(device, six.string_types)
-    self._device = device_util.canonicalize(device)
-    self._devices = (self._device,)
-
-  @property
-  def all_devices(self):
-    return self._devices
-
-  @property
-  def devices_by_replica(self):
-    raise ValueError("SingleDeviceMap not indexed by replicas")
-
-  @property
-  def num_logical_devices(self):
-    return 1
-
-  @property
-  def num_replicas_in_graph(self):
-    return 1
-
-  def logical_device_from_values(self, values):
-    del values
-    return 0
-
-  def logical_to_actual_devices(self, logical_device_id):
-    assert logical_device_id == 0
-    return self._devices
-
-  def select_for_current_replica(self, values, replica_context):
-    assert len(values) == 1
-    del replica_context
-    return values[0]
-
-  def replica_for_device(self, device):
-    raise ValueError("SingleDeviceMap not indexed by replicas")
-
-  def select_for_device(self, values, device):
-    assert len(values) == 1
-    if self._device != device:
-      raise ValueError("Device %s not found in %s (current device %s)" %
-                       (device, self._devices, device_util.current()))
-    return values[0]
-
-  def is_device_in_replica(self, device, replica_id):
-    raise ValueError("SingleDeviceMap not indexed by replicas")
-
-  def __repr__(self):
-    return "%s(%r)" % (self.__class__.__name__, self._device)
-
-
-class ReplicaDeviceMap(DeviceMap):
-  """A device map for 1 device per replica."""
-
-  def __init__(self, devices):
-    """Initialize a `ReplicaDeviceMap`.
-
-    Args:
-      devices: `devices[i]` is the string device for replica `i`.
-    """
-    self._devices = tuple(device_util.canonicalize(d) for d in devices)
-    if len(set(self._devices)) != len(self._devices):
-      raise ValueError("Duplicate devices in %s, after canonicalization: %s" %
-                       (devices, self._devices))
-    self._device_to_replica = {d: r for r, d in enumerate(self._devices)}
-
-  @property
-  def all_devices(self):
-    return self._devices
-
-  @property
-  def devices_by_replica(self):
-    return ((d,) for d in self._devices)
-
-  @property
-  def num_logical_devices(self):
-    return 1
-
-  @property
-  def num_replicas_in_graph(self):
-    return len(self._devices)
-
-  def logical_device_from_values(self, values):
-    del values
-    return 0
-
-  def logical_to_actual_devices(self, logical_device_id):
-    assert logical_device_id == 0
-    return self._devices
-
-  def select_for_current_replica(self, values, replica_context):
-    assert len(values) == len(self._devices)
+def _get_current_replica_id_as_int():
+  """Returns the current replica ID as an integer, or `None`."""
+  replica_context = distribution_strategy_context.get_replica_context()
+  if replica_context:
     replica_id = replica_context.replica_id_in_sync_group
     if not isinstance(replica_id, int):
       replica_id = tensor_util.constant_value(replica_id)
-    if replica_id is None:
-      replica_id = 0
-    return values[replica_id]
-
-  def replica_for_device(self, device):
-    return self._device_to_replica.get(device)
-
-  def select_for_device(self, values, device):
-    assert len(values) == len(self._devices)
-    replica_id = self._device_to_replica.get(device)
-    if replica_id is None:
-      raise ValueError("Device %s not found in %s (current device %s)" %
-                       (device, self._devices, device_util.current()))
-    return values[replica_id]
-
-  def is_device_in_replica(self, device, replica_id):
-    return _devices_match(device, self._devices[replica_id])
-
-  def __str__(self):
-    return "[%s]" % (", ".join(self._devices))
-
-  def __repr__(self):
-    return "%s([%s])" % (self.__class__.__name__, ", ".join(
-        repr(d) for d in self._devices))
-
-
-LogicalDeviceSpec = collections.namedtuple("LogicalDeviceSpec",
-                                           ("device_map", "logical_device"))
-
-
-class WorkerDeviceMap(DeviceMap):
-  """A device map for one value per worker."""
-
-  def __init__(self, devices, num_replicas_per_worker):
-    """Initialize a `WorkerDeviceMap`.
-
-    Args:
-      devices: `devices[i]` is the string device for worker `i` in in-graph
-        relication case; devices is single-element list for its corresponding
-        worker in between-graph case.
-      num_replicas_per_worker: number of replicas per worker, useful in in-graph
-        replication case.
-    """
-    self._devices = tuple(device_util.canonicalize(d) for d in devices)
-    if len(set(self._devices)) != len(self._devices):
-      raise ValueError("Duplicate devices in %s, after canonicalization: %s" %
-                       (devices, self._devices))
-    self._num_replicas_per_worker = num_replicas_per_worker
-
-  @property
-  def all_devices(self):
-    return self._devices
-
-  @property
-  def devices_by_replica(self):
-    raise ValueError("`WorkerDeviceMap` is not indexed by replicas")
-
-  @property
-  def num_logical_devices(self):
-    return 1
-
-  @property
-  def num_replicas_in_graph(self):
-    return len(self._devices)
-
-  def logical_device_from_values(self, values):
-    del values
-    return 0
-
-  def logical_to_actual_devices(self, logical_device_id):
-    assert logical_device_id == 0
-    return self._devices
-
-  def select_for_current_replica(self, values, replica_context):
-    return values[replica_context.replica_id_in_sync_group //
-                  self._num_replicas_per_worker]
-
-  def replica_for_device(self, device):
-    raise ValueError("`WorkerDeviceMap` not indexed by replicas")
-
-  def select_for_device(self, values, device):
-    # TODO(yuefengz): this should map from any device to the value on its
-    # corresponding worker.
-    return values[self._devices.index(device_util.canonicalize(device))]
-
-  def is_device_in_replica(self, device, replica_id):
-    raise ValueError("WorkerDeviceMap not indexed by replicas")
-
-  def __repr__(self):
-    return "%s(%r, num_replicas_per_worker=%d)" % (
-        self.__class__.__name__, self._devices, self._num_replicas_per_worker)
+  else:
+    replica_id = distribute_lib.get_update_replica_id()
+  return replica_id
 
 
 class DistributedValues(object):
   """Holds a map from replica to values. Either PerReplica or Mirrored."""
 
-  def __init__(self, device_map, values, logical_device=None):
-    assert isinstance(device_map, DeviceMap)
-    self._device_map = device_map
+  def __init__(self, values):
     self._values = tuple(values)
-    if logical_device is None:
-      logical_device = device_map.logical_device_from_values(self._values)
-    self._logical_device = logical_device
 
-  # TODO(josh11b): Split this into two functions, one with device, one without.
-  def get(self, device=None):
+  def get(self):
     """Returns the value for the current device or raises a ValueError."""
-    if device is None:
-      replica_context = distribution_strategy_context.get_replica_context()
-      if replica_context:
-        return self._device_map.select_for_current_replica(
-            self._values, replica_context)
-      else:
-        update_replica_id = distribute_lib.get_update_replica_id()
-        if update_replica_id is None:
-          return self._get_cross_replica()
-        else:
-          return self._values[update_replica_id]
-    device = device_util.canonicalize(device)
-    return self._device_map.select_for_device(self._values, device)
+    replica_id = _get_current_replica_id_as_int()
+    if replica_id is None:
+      return self._get_cross_replica()
+    else:
+      return self._values[replica_id]
+
+  def _get_cross_replica(self):
+    raise NotImplementedError(
+        "This method should be overridden by sub-classes which support cross-"
+        "replica accesses.")
+
+  def _get_closest(self):
+    """Returns value in same replica or device if possible, else the primary."""
+    replica_id = _get_current_replica_id_as_int()
+    if replica_id is None:
+      # Try to find a value on the current device.
+      current_device = device_util.canonicalize(device_util.current())
+      for value in self._values:
+        if device_util.canonicalize(value.device) == current_device:
+          return value
+      return self.primary
+    else:
+      return self._values[replica_id]
 
   @property
   def primary(self):
     """Returns a representative component."""
     return self._values[0]
 
-  @property
-  def devices(self):
-    return self._device_map.logical_to_actual_devices(self._logical_device)
-
-  @property
-  def logical_device(self):
-    return self._logical_device
-
-  @property
-  def device_map(self):
-    return self._device_map
-
   # TODO(josh11b): Replace experimental_local_results with this?
   @property
   def values(self):
     return self._values
 
+  @property
+  def devices(self):
+    return tuple(v.device for v in self._values)
+
   @property
   def is_tensor_like(self):
     return all(tensor_util.is_tensor(v) for v in self._values)
 
   def __str__(self):
-    devices = self.devices
-    assert len(self._values) == len(devices)
-    debug_str = ",\n".join("  %d %s: %s" % (i, devices[i], self._values[i])
-                           for i in range(len(devices)))
+    debug_str = ",\n".join(
+        "  %d: %s" % (i, v) for i, v in enumerate(self._values))
     return "%s:{\n%s\n}" % (self.__class__.__name__, debug_str)
 
   def __repr__(self):
-    devices = self.devices
-    assert len(self._values) == len(devices)
-    debug_repr = ",\n".join("  %d %s: %r" % (i, devices[i], self._values[i])
-                            for i in range(len(devices)))
+    debug_repr = ",\n".join(
+        "  %d: %r" % (i, v) for i, v in enumerate(self._values))
     return "%s:{\n%s\n}" % (self.__class__.__name__, debug_repr)
 
 
@@ -523,28 +273,22 @@ class PerReplica(DistributedValues, composite_tensor.CompositeTensor):
 
   @property
   def _type_spec(self):
-    value_specs = nest.map_structure(type_spec.type_spec_from_value,
-                                     self._values)
-    return PerReplicaSpec(value_specs, self._device_map, self._logical_device)
+    return PerReplicaSpec(
+        *(type_spec.type_spec_from_value(v) for v in self._values))
 
 
 class PerReplicaSpec(type_spec.TypeSpec):
   """Type specification for a `PerReplica`."""
 
-  __slots__ = ["_value_specs", "_device_map", "_logical_device"]
+  __slots__ = ["_value_specs"]
 
   value_type = property(lambda self: PerReplica)
 
-  def __init__(self, value_specs, device_map, logical_device):
-    if isinstance(device_map, tuple):
-      device_map = self._deserialize_device_map(device_map)
+  def __init__(self, *value_specs):
     self._value_specs = tuple(value_specs)
-    self._device_map = device_map
-    self._logical_device = logical_device
 
   def _serialize(self):
-    device_map = self._serialize_device_map(self._device_map)
-    return (self._value_specs, device_map, self._logical_device)
+    return self._value_specs
 
   @property
   def _component_specs(self):
@@ -559,34 +303,7 @@ class PerReplicaSpec(type_spec.TypeSpec):
     return value._values  # pylint: disable=protected-access
 
   def _from_components(self, tensor_list):
-    return PerReplica(
-        self._device_map, tensor_list, logical_device=self._logical_device)
-
-  @staticmethod
-  def _serialize_device_map(device_map):
-    if isinstance(device_map, SingleDeviceMap):
-      return ("single", device_map.all_devices[0])
-    elif isinstance(device_map, ReplicaDeviceMap):
-      return ("replica", device_map.all_devices)
-    elif isinstance(device_map, WorkerDeviceMap):
-      return ("worker", device_map.all_devices,
-              device_map.num_replicas_per_worker)
-    else:
-      raise ValueError("PerReplicaSpec does not support device_map type %s" %
-                       type(device_map).__name__)
-
-  @staticmethod
-  def _deserialize_device_map(device_map_info):
-    device_map_type = device_map_info[0]
-    device_map_args = device_map_info[1:]
-    if device_map_type == "single":
-      return SingleDeviceMap(*device_map_args)
-    elif device_map_type == "replica":
-      return ReplicaDeviceMap(*device_map_args)
-    elif device_map_type == "worker":
-      return WorkerDeviceMap(*device_map_args)
-    else:
-      raise ValueError("Unexpected value in state tuple")
+    return PerReplica(tensor_list)
 
 
 # Note that unlike PerReplica, Mirrored values inherit from
@@ -596,11 +313,7 @@ class Mirrored(DistributedDelegate):
   """Holds a map from replica to values which are kept in sync."""
 
   def _get_cross_replica(self):
-    device = device_util.canonicalize(device_util.current())
-    replica_id = self._device_map.replica_for_device(device)
-    if replica_id is None:
-      return self.primary
-    return self._values[replica_id]
+    return self._get_closest()
 
   def _as_graph_element(self):
     obj = self.get()
@@ -656,10 +369,9 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable):
   # TODO(josh11b): Support changing the set of variables if e.g. if new
   # devices are joining or a device is to leave.
 
-  def __init__(self, strategy, device_map, values, logical_device=None):
+  def __init__(self, strategy, values):
     self._distribute_strategy = strategy
-    super(DistributedVariable, self).__init__(
-        device_map, values, logical_device=logical_device)
+    super(DistributedVariable, self).__init__(values)
     self._common_name = self.primary.name.split(":")[0]
     # Use a weakref to make it easy to map from the contained values
     # to the container without introducing a reference cycle.
@@ -709,21 +421,6 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable):
           tuple(v.initializer for v in self._values))
     return init_op
 
-  def _get_closest(self):
-    """Return member in the same replica if possible, else the primary."""
-    replica_context = distribution_strategy_context.get_replica_context()
-    if replica_context:
-      return self._device_map.select_for_current_replica(
-          self._values, replica_context)
-    update_replica_id = distribute_lib.get_update_replica_id()
-    if update_replica_id is not None:
-      return self._values[update_replica_id]
-    device = device_util.canonicalize(device_util.current())
-    replica_id = self._device_map.replica_for_device(device)
-    if replica_id is None:
-      return self.primary
-    return self._values[replica_id]
-
   def initialized_value(self):
     return self._get_closest().initialized_value()
 
@@ -766,14 +463,12 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable):
 
   @property
   def handle(self):
-    replica_context = distribution_strategy_context.get_replica_context()
-    if replica_context is None:
-      update_replica_id = distribute_lib.get_update_replica_id()
-      if update_replica_id is None:
-        raise ValueError("`handle` is not available outside the replica context"
-                         " or a `tf.distribute.Strategy.update()` call.")
-      return self._values[update_replica_id].handle
-    return self.get().handle
+    replica_id = _get_current_replica_id_as_int()
+    if replica_id is None:
+      raise ValueError("`handle` is not available outside the replica context"
+                       " or a `tf.distribute.Strategy.update()` call.")
+    else:
+      return self._values[replica_id].handle
 
   def eval(self, session=None):
     return self._get_closest().eval(session)
@@ -883,9 +578,9 @@ class TPUVariableMixin(object):
       raise AttributeError(
           "'{}' not accessible within a TPU context.".format(name))
 
-  def get(self, device=None):
-    if (_enclosing_tpu_context() is None) or (device is not None):
-      return super(TPUVariableMixin, self).get(device=device)
+  def get(self):
+    if _enclosing_tpu_context() is None:
+      return super(TPUVariableMixin, self).get()
     else:
       raise NotImplementedError(
           "`TPUVariableMixin.get()` is not supported within a TPU context.")
@@ -917,10 +612,8 @@ class TPUVariableMixin(object):
     if tpu_context is None:
       return self._get_closest().handle
     else:
-      return tpu_context.get_replicated_var_handle(self._handle_id,
-                                                   self._values,
-                                                   self._device_map,
-                                                   self._is_mirrored())
+      return tpu_context.get_replicated_var_handle(
+          self._handle_id, self._values, self._is_mirrored())
 
   @property
   def device(self):
@@ -1035,8 +728,8 @@ class _MirroredSaveable(saver.BaseSaverBuilder.ResourceVariableSaveable):
 
 
 def create_mirrored_variable(  # pylint: disable=missing-docstring
-    strategy, device_map, logical_device, real_mirrored_creator, mirrored_cls,
-    sync_on_read_cls, *args, **kwargs):
+    strategy, real_mirrored_creator, mirrored_cls, sync_on_read_cls,
+    *args, **kwargs):
   # Figure out what collections this variable should be added to.
   # We'll add the MirroredVariable to those collections instead.
   var_collections = kwargs.pop("collections", None)
@@ -1079,17 +772,9 @@ def create_mirrored_variable(  # pylint: disable=missing-docstring
   # was never recorded on the tape instead of having to do this manually
   # here.
   with tape.stop_recording():
-    devices = device_map.logical_to_actual_devices(logical_device)
-    value_list = real_mirrored_creator(devices, *args, **kwargs)
-
+    value_list = real_mirrored_creator(*args, **kwargs)
     var_cls = sync_on_read_cls if is_sync_on_read else mirrored_cls
-
-    result = var_cls(
-        strategy,
-        device_map,
-        value_list,
-        aggregation,
-        logical_device=logical_device)
+    result = var_cls(strategy, value_list, aggregation)
 
   # Add the wrapped variable to the requested collections.
   # The handling of eager mode and the global step matches
@@ -1120,14 +805,8 @@ def create_mirrored_variable(  # pylint: disable=missing-docstring
 class MirroredVariable(DistributedVariable, Mirrored):
   """Holds a map from replica to variables whose values are kept in sync."""
 
-  def __init__(self,
-               strategy,
-               device_map,
-               values,
-               aggregation,
-               logical_device=None):
-    super(MirroredVariable, self).__init__(
-        strategy, device_map, values, logical_device=logical_device)
+  def __init__(self, strategy, values, aggregation):
+    super(MirroredVariable, self).__init__(strategy, values)
     self._aggregation = aggregation
 
   # The arguments to update() are automatically unwrapped so the update()
@@ -1187,17 +866,12 @@ class MirroredVariable(DistributedVariable, Mirrored):
     return self._aggregation
 
   def _get_cross_replica(self):
-    device = device_util.canonicalize(device_util.current())
-    replica_id = self._device_map.replica_for_device(device)
-    if replica_id is None:
-      return array_ops.identity(self.primary)
-    return array_ops.identity(self._values[replica_id])
+    # Return identity, to avoid directly exposing the variable to the user and
+    # allowing it to be modified by mistake.
+    return array_ops.identity(Mirrored._get_cross_replica(self))
 
   def _as_graph_element(self):
-    # pylint: disable=protected-access
-    if distribution_strategy_context.in_cross_replica_context():
-      return self.primary._as_graph_element()
-    return self.get()._as_graph_element()
+    return self._get_closest()._as_graph_element()  # pylint: disable=protected-access
 
   def _gather_saveables_for_checkpoint(self):
     """Overrides Trackable method.
@@ -1344,15 +1018,9 @@ def _assert_replica_context(strategy):
 class SyncOnReadVariable(DistributedVariable):
   """Holds a map from replica to variables whose values are reduced on save."""
 
-  def __init__(self,
-               strategy,
-               device_map,
-               values,
-               aggregation,
-               logical_device=None):
+  def __init__(self, strategy, values, aggregation):
+    super(SyncOnReadVariable, self).__init__(strategy, values)
     self._aggregation = aggregation
-    super(SyncOnReadVariable, self).__init__(
-        strategy, device_map, values, logical_device=logical_device)
 
   def assign_sub(self, *args, **kwargs):
     with _enter_or_assert_strategy(self._distribute_strategy):
@@ -1392,7 +1060,7 @@ class SyncOnReadVariable(DistributedVariable):
         # when saving.
         tensor = args[0]
         if self._aggregation == vs.VariableAggregation.SUM:
-          tensor = math_ops.cast(tensor / len(self.devices), self.dtype)
+          tensor = math_ops.cast(tensor / len(self._values), self.dtype)
         return control_flow_ops.group(
             tuple(_assign_on_device(v.device, v, tensor) for v in self._values))
       else:
@@ -1479,10 +1147,8 @@ class TPUSyncOnReadVariable(TPUVariableMixin, SyncOnReadVariable):
     return False
 
 
-def regroup(device_map, values, wrap_class=PerReplica):
+def regroup(values, wrap_class=PerReplica):
   """Makes a nest per-replica into a nest of PerReplica/Mirrored values."""
-  assert isinstance(device_map, DeviceMap)
-  assert len(values) == device_map.num_replicas_in_graph
   v0 = values[0]
 
   if isinstance(v0, list):
@@ -1491,8 +1157,7 @@ def regroup(device_map, values, wrap_class=PerReplica):
       assert len(v) == len(v0), ("len(v) == %d, len(v0) == %d, v: %s, v0: %s" %
                                  (len(v), len(v0), v, v0))
     return [
-        regroup(device_map, tuple(v[i]
-                                  for v in values), wrap_class)
+        regroup(tuple(v[i] for v in values), wrap_class)
         for i in range(len(v0))
     ]
 
@@ -1501,8 +1166,7 @@ def regroup(device_map, values, wrap_class=PerReplica):
       assert isinstance(v, tuple)
       assert len(v) == len(v0)
     regrouped_tuple = tuple(
-        regroup(device_map, tuple(v[i]
-                                  for v in values), wrap_class)
+        regroup(tuple(v[i] for v in values), wrap_class)
         for i in range(len(v0)))
     if hasattr(v0, "_fields"):
       # This tuple is in fact a namedtuple! Create a new namedtuple instance
@@ -1519,7 +1183,7 @@ def regroup(device_map, values, wrap_class=PerReplica):
       assert set(v.keys()) == v0keys, ("v[0].keys: %s  v[i].keys: %s" %
                                        (v0keys, set(v.keys())))
     return {
-        key: regroup(device_map, tuple(v[key] for v in values), wrap_class)
+        key: regroup(tuple(v[key] for v in values), wrap_class)
         for key in v0keys
     }
 
@@ -1555,20 +1219,14 @@ def regroup(device_map, values, wrap_class=PerReplica):
     # pylint: disable=protected-access
     assert not isinstance(v0, MirroredVariable), (
         "ids = %s, values = %s" % ([id(v) for v in values], values))
-    assert device_map.is_device_in_replica(
-        v0.device,
-        0), ("v0.device = %s, device_map = %s" % (v0.device, device_map))
     distributed_container = v0._distributed_container()
     assert distributed_container is not None
-    for r, v in enumerate(values[1:]):
-      assert device_map.is_device_in_replica(
-          v.device, r + 1), ("v.device = %s, r = %d, device_map = %s" %
-                             (v.device, r + 1, device_map))
+    for v in values[1:]:
       assert distributed_container is v._distributed_container()
     return distributed_container
   # pylint: enable=protected-access
 
-  return wrap_class(device_map, values)
+  return wrap_class(values)
 
 
 def select_replica(replica_id, structured):
@@ -1587,8 +1245,8 @@ def select_replica(replica_id, structured):
   return nest.map_structure(_get, structured)
 
 
-def select_device_mirrored(device, structured):
-  """Specialize a nest of regular & mirrored values for one device."""
+def select_replica_mirrored(replica_id, structured):
+  """Specialize a nest of regular & mirrored values for one replica."""
 
   def _get_mirrored(x):
     if isinstance(x, DistributedValues):
@@ -1596,23 +1254,23 @@ def select_device_mirrored(device, structured):
         raise TypeError(
             "Expected value to be mirrored across replicas: %s in %s." %
             (x, structured))
-      return x.get(device)
+      return x.values[replica_id]
     else:
       return x
 
   return nest.map_structure(_get_mirrored, structured)
 
 
-def update_regroup(extended, device_map, updates, group):
+def update_regroup(extended, updates, group):
   """Regroup for an update, with dependencies to ensure all updates execute."""
   if not group:
-    regrouped = regroup(device_map, updates, Mirrored)
+    regrouped = regroup(updates, Mirrored)
     return nest.map_structure(extended._local_results, regrouped)  # pylint: disable=protected-access
 
-  def _make_grouped_mirrored(device_map, values):
+  def _make_grouped_mirrored(values):
     """Convert per-replica list `values` into Mirrored type with grouping."""
     if len(values) == 1:
-      return Mirrored(device_map, values)
+      return Mirrored(values)
 
     # Make sure we run all updates. Without this, something like
     # session.run(extended.update(...)) may only update one replica.
@@ -1626,17 +1284,14 @@ def update_regroup(extended, device_map, updates, group):
 
     # Otherwise we need tensors with the same values as `values`, but
     # that have a dependency on `g`.
-    devices = device_map.logical_to_actual_devices(
-        device_map.logical_device_from_values(values))
-    assert len(values) == len(devices)
     with_dep = []
-    for v, d in zip(values, devices):
-      with ops.device(d), ops.control_dependencies([g]):
+    for v in values:
+      with ops.device(v.device), ops.control_dependencies([g]):
         with_dep.append(array_ops.identity(v))
 
-    return Mirrored(device_map, with_dep)
+    return Mirrored(with_dep)
 
-  return regroup(device_map, updates, _make_grouped_mirrored)
+  return regroup(updates, _make_grouped_mirrored)
 
 
 def value_container(val):
diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py
index 26d0eb3ac32..58b29c4e0f5 100644
--- a/tensorflow/python/distribute/values_test.py
+++ b/tensorflow/python/distribute/values_test.py
@@ -21,10 +21,11 @@ from __future__ import print_function
 import collections
 import itertools
 import os
+
 from absl.testing import parameterized
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.distribute import combinations
-from tensorflow.python.distribute import device_util
+from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.distribute import strategy_combinations
 from tensorflow.python.distribute import tpu_strategy
@@ -55,63 +56,35 @@ from tensorflow.python.util import nest
 class DistributedValuesTest(test.TestCase):
 
   def testGetEager(self):
-    with ops.device("/device:CPU:0"):
-      one = constant_op.constant(1)
-      two = constant_op.constant(2)
-      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
-      v = values.DistributedValues(device_map, (one, two))
-      self.assertEqual(two, v.get("/device:GPU:0"))
-      self.assertEqual(one, v.get())
-      with self.assertRaises(ValueError):
-        self.assertIsNone(v.get("/device:GPU:2"))
+    one = constant_op.constant(1)
+    two = constant_op.constant(2)
+    v = values.DistributedValues((one, two))
+    self.assertEqual(one, v.get())
+    with distribute_lib.ReplicaContext(None, 1):
+      self.assertEqual(two, v.get())
 
   def testGetGraph(self):
-    with context.graph_mode(), \
-        ops.Graph().as_default(), \
-        ops.device("/device:CPU:0"):
+    with context.graph_mode(), ops.Graph().as_default():
       one = constant_op.constant(1)
       two = constant_op.constant(2)
-      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
-      v = values.DistributedValues(device_map, (one, two))
-      self.assertEqual(two, v.get("/device:GPU:0"))
+      v = values.DistributedValues((one, two))
       self.assertEqual(one, v.get())
-      with self.assertRaises(ValueError):
-        self.assertIsNone(v.get("/device:GPU:2"))
-
-  def testCanonicalization(self):
-    canonical_cpu = ("/job:localhost/replica:0/task:0/device:CPU:0",)
-    v = values.DistributedValues(values.SingleDeviceMap(""), (42,))
-    self.assertEqual(canonical_cpu, v.devices)
-    v = values.DistributedValues(values.SingleDeviceMap("/device:CPU:0"), (42,))
-    self.assertEqual(canonical_cpu, v.devices)
-    v = values.DistributedValues(values.SingleDeviceMap("/cpu:0"), (42,))
-    self.assertEqual(canonical_cpu, v.devices)
-    v = values.DistributedValues(values.SingleDeviceMap("/CPU:0"), (42,))
-    self.assertEqual(canonical_cpu, v.devices)
+      with distribute_lib.ReplicaContext(None, 1):
+        self.assertEqual(two, v.get())
 
   def testIsTensorLike(self):
-    with context.graph_mode(), \
-         ops.Graph().as_default(), \
-         ops.device("/device:CPU:0"):
+    with context.graph_mode(), ops.Graph().as_default():
       one = constant_op.constant(1)
       two = constant_op.constant(2)
-      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
-      v = values.DistributedValues(device_map, (one, two))
-      self.assertEqual(two, v.get("/device:GPU:0"))
-      self.assertEqual(one, v.get())
+      v = values.DistributedValues((one, two))
       self.assertTrue(v.is_tensor_like)
       self.assertTrue(tensor_util.is_tensor(v))
 
   def testIsTensorLikeWithAConstant(self):
-    with context.graph_mode(), \
-         ops.Graph().as_default(), \
-         ops.device("/device:CPU:0"):
+    with context.graph_mode(), ops.Graph().as_default():
       one = constant_op.constant(1)
       two = 2.0
-      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
-      v = values.DistributedValues(device_map, (one, two))
-      self.assertEqual(two, v.get("/device:GPU:0"))
-      self.assertEqual(one, v.get())
+      v = values.DistributedValues((one, two))
       self.assertFalse(v.is_tensor_like)
       self.assertFalse(tensor_util.is_tensor(v))
 
@@ -120,62 +93,59 @@ class DistributedDelegateTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testGetAttr(self):
-    with ops.device("/device:CPU:0"):
+    class Foo(object):
 
-      class Foo(object):
+      def __init__(self, x):
+        self.x = x
 
-        def __init__(self, x):
-          self.x = x
-
-      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
-      v = values.DistributedDelegate(device_map, (Foo(7), Foo(8)))
-      self.assertEqual(7, v.x)
-      with self.assertRaises(AttributeError):
-        _ = v.y
+    v = values.DistributedDelegate((Foo(7), Foo(8)))
+    self.assertEqual(7, v.x)
+    with self.assertRaises(AttributeError):
+      _ = v.y
 
   @test_util.run_in_graph_and_eager_modes
   def testOperatorOverride(self):
-    with ops.device("/device:CPU:0"):
-      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
-      v = values.DistributedDelegate(device_map, (7, 8))
-      # v should act like int(7).
-      self.assertEqual(8, v + 1)
-      self.assertEqual(10, 3 + v)
-      self.assertEqual(14, v + v)
-      self.assertEqual(5, v - 2)
-      self.assertEqual(6, 13 - v)
-      self.assertEqual(0, v - v)
-      self.assertEqual(14, v * 2)
-      self.assertEqual(21, 3 * v)
-      self.assertEqual(49, v * v)
-      self.assertEqual(3.5, v / 2)
-      self.assertEqual(1.5, 10.5 / v)
-      self.assertEqual(3, v // 2)
-      self.assertEqual(2, 15 // v)
-      self.assertEqual(1, v % 2)
-      self.assertEqual(2, 16 % v)
-      self.assertTrue(v < 12)
-      self.assertTrue(v <= 12)
-      self.assertFalse(v > 12)
-      self.assertFalse(v >= 12)
-      self.assertFalse(12 < v)
-      self.assertFalse(12 <= v)
-      self.assertTrue(12 > v)
-      self.assertTrue(12 >= v)
-      self.assertEqual(3, v & 3)
-      self.assertEqual(3, 11 & v)
-      self.assertEqual(15, v | 8)
-      self.assertEqual(23, 16 | v)
-      self.assertEqual(4, v ^ 3)
-      self.assertEqual(12, 11 ^ v)
-      self.assertEqual(343, pow(v, 3))
-      self.assertEqual(3, pow(v, 3, 10))
-      self.assertEqual(128, pow(2, v))
-      self.assertEqual(-7, -v)
-      self.assertEqual(~7, ~v)
-      self.assertEqual(7, abs(v))
-      with self.assertRaises(TypeError):
-        _ = v[2]
+    v = values.DistributedDelegate((7, 8))
+    # v should act like int(7).
+    self.assertEqual(8, v + 1)
+    self.assertEqual(10, 3 + v)
+    self.assertEqual(14, v + v)
+    self.assertEqual(5, v - 2)
+    self.assertEqual(6, 13 - v)
+    self.assertEqual(0, v - v)
+    self.assertEqual(14, v * 2)
+    self.assertEqual(21, 3 * v)
+    self.assertEqual(49, v * v)
+    self.assertEqual(3.5, v / 2)
+    self.assertEqual(1.5, 10.5 / v)
+    self.assertEqual(3, v // 2)
+    self.assertEqual(2, 15 // v)
+    self.assertEqual(1, v % 2)
+    self.assertEqual(2, 16 % v)
+    # pylint: disable=g-generic-assert
+    self.assertTrue(v < 12)
+    self.assertTrue(v <= 12)
+    self.assertFalse(v > 12)
+    self.assertFalse(v >= 12)
+    self.assertFalse(12 < v)
+    self.assertFalse(12 <= v)
+    self.assertTrue(12 > v)
+    self.assertTrue(12 >= v)
+    # pylint: enable=g-generic-assert
+    self.assertEqual(3, v & 3)
+    self.assertEqual(3, 11 & v)
+    self.assertEqual(15, v | 8)
+    self.assertEqual(23, 16 | v)
+    self.assertEqual(4, v ^ 3)
+    self.assertEqual(12, 11 ^ v)
+    self.assertEqual(343, pow(v, 3))
+    self.assertEqual(3, pow(v, 3, 10))
+    self.assertEqual(128, pow(2, v))
+    self.assertEqual(-7, -v)
+    self.assertEqual(~7, ~v)
+    self.assertEqual(7, abs(v))
+    with self.assertRaises(TypeError):
+      _ = v[2]
 
 
 def _device_str(d):
@@ -185,15 +155,15 @@ def _device_str(d):
 def _nested_value(d):
   return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d)
 
+
 def _make_mirrored_val(init_val=5.0):
   v = []
   devices = ["/device:GPU:0", "/device:CPU:0"]
   for d, _ in zip(devices, ["v", "v/replica"]):
     with ops.device(d):
       v.append(constant_op.constant(init_val))
-  device_map = values.ReplicaDeviceMap(devices)
-  mirrored = values.Mirrored(device_map, v)
-  return mirrored
+  return values.Mirrored(v)
+
 
 def _make_mirrored():
   v = []
@@ -202,29 +172,20 @@ def _make_mirrored():
     with ops.device(d):
       v.append(variable_scope.get_variable(
           name=n, initializer=init, use_resource=True))
-  device_map = values.ReplicaDeviceMap(devices)
-  mirrored = values.MirroredVariable(None, device_map, v,
-                                     variable_scope.VariableAggregation.SUM)
-  return v, device_map, mirrored
+  mirrored = values.MirroredVariable(
+      None, v, variable_scope.VariableAggregation.SUM)
+  return mirrored
 
 
 class RegroupAndSelectDeviceTest(test.TestCase):
 
   def _is_per_replica(self, result, expected, klass=values.PerReplica):
     self.assertIsInstance(result, klass)
-    # We canonicalize the devices to match the device strings returned
-    # by PerReplica, which also does device string canonicalization.
-    devices = [device_util.canonicalize(_device_str(i))
-               for i in range(len(expected))]
-    self.assertEqual(set(devices), set(result.devices))
-    for i, d in enumerate(devices):
-      self.assertEqual(expected[i], result.get(d))
-      self.assertEqual(expected[i], result.get(_device_str(i)))
+    for i, exp in enumerate(expected):
+      self.assertEqual(exp, result.values[i])
 
   def testNested(self):
-    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
-    result = values.regroup(device_map,
-                            (_nested_value("1"), _nested_value("2")))
+    result = values.regroup((_nested_value("1"), _nested_value("2")))
     self.assertIsInstance(result, tuple)
     self.assertEqual(3, len(result))
     self._is_per_replica(result[0], ["a1", "a2"])
@@ -247,16 +208,14 @@ class RegroupAndSelectDeviceTest(test.TestCase):
                      values.select_replica(1, result))
     # select_device_mirrored() should fail due to non-mirrored values
     with self.assertRaises(TypeError):
-      values.select_device_mirrored(_device_str(0), result)
+      values.select_replica_mirrored(0, result)
     with self.assertRaises(TypeError):
-      values.select_device_mirrored(_device_str(1), result)
+      values.select_replica_mirrored(1, result)
 
   def testWrapClass(self):
     # Normally a mirrored value would be the same across devices, but
     # for a test it is convenient to be able to tell the values apart.
-    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
-    result = values.regroup(device_map,
-                            (_nested_value("1"), _nested_value("2")),
+    result = values.regroup((_nested_value("1"), _nested_value("2")),
                             values.Mirrored)
     self.assertIsInstance(result, tuple)
     self.assertEqual(3, len(result))
@@ -280,13 +239,12 @@ class RegroupAndSelectDeviceTest(test.TestCase):
                      values.select_replica(1, result))
     # Values are marked as mirrored, so select_device_mirrored() is allowed.
     self.assertEqual(_nested_value("1"),
-                     values.select_device_mirrored(_device_str(0), result))
+                     values.select_replica_mirrored(0, result))
     self.assertEqual(_nested_value("2"),
-                     values.select_device_mirrored(_device_str(1), result))
+                     values.select_replica_mirrored(1, result))
 
   def testWrapAListOfTwoTuples(self):
-    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
-    result = values.regroup(device_map, [("1", "2"), ("3", "4")])
+    result = values.regroup([("1", "2"), ("3", "4")])
     self.assertIsInstance(result, tuple)
     self.assertEqual(2, len(result))
     self._is_per_replica(result[0], ("1", "3"), values.PerReplica)
@@ -295,14 +253,13 @@ class RegroupAndSelectDeviceTest(test.TestCase):
   def testMirroredContainer(self):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
-    v, device_map, mirrored = _make_mirrored()
-    result = values.regroup(device_map, v)
+    mirrored = _make_mirrored()
+    result = values.regroup(mirrored.values)
     self.assertIs(mirrored, result)
 
   def testSameId(self):
     foo = object()
-    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
-    result = values.regroup(device_map, (("a", foo), ("b", foo)))
+    result = values.regroup((("a", foo), ("b", foo)))
     self.assertIsInstance(result, tuple)
     self.assertEqual(2, len(result))
     self._is_per_replica(result[0], ["a", "b"])
@@ -321,8 +278,7 @@ class RegroupAndSelectDeviceTest(test.TestCase):
     self.assertIs(foo, result_1[1])
 
   def testOneDevice(self):
-    device_map = values.ReplicaDeviceMap((_device_str(0),))
-    result = values.regroup(device_map, (_nested_value("1"),))
+    result = values.regroup((_nested_value("1"),))
     # On one device regroup() and select_replica() are basically identity.
     self.assertEqual(_nested_value("1"), result)
     self.assertEqual(_nested_value("1"),
@@ -333,10 +289,9 @@ class RegroupAndSelectDeviceTest(test.TestCase):
     with ops.device(d):
       v = variable_scope.get_variable(
           name="v", initializer=1., use_resource=True)
-      device_map = values.ReplicaDeviceMap((d,))
-    mirrored = values.MirroredVariable(None, device_map, (v,),
+    mirrored = values.MirroredVariable(None, (v,),
                                        variable_scope.VariableAggregation.SUM)
-    result = values.regroup(device_map, (v,))
+    result = values.regroup((v,))
     self.assertIs(mirrored, result)
 
   def testNamedTuple(self):
@@ -356,7 +311,6 @@ class RegroupAndSelectDeviceTest(test.TestCase):
             scaffold=scaffold or Scaffold())
 
     with context.graph_mode(), ops.Graph().as_default():
-      devices = []
       created_estimator_specs = []
 
       for device_id in range(3):
@@ -364,25 +318,21 @@ class RegroupAndSelectDeviceTest(test.TestCase):
             mode=mode_keys.EstimatorModeKeys.TRAIN,
             loss=constant_op.constant(device_id / 2),
             train_op=array_ops.identity(constant_op.constant(device_id)))
-        devices.append(_device_str(device_id))
         created_estimator_specs.append(spec)
 
-      device_map = values.ReplicaDeviceMap(devices)
-      merged_estimator_spec = values.regroup(
-          device_map, created_estimator_specs)
+      merged_estimator_spec = values.regroup(created_estimator_specs)
 
       self.assertIsInstance(merged_estimator_spec, EstimatorSpec)
       self.assertEqual(mode_keys.EstimatorModeKeys.TRAIN,
                        merged_estimator_spec.mode)
       for device_id in range(3):
-        d = _device_str(device_id)
         self.assertEqual(created_estimator_specs[device_id].loss,
-                         merged_estimator_spec.loss.get(d))
+                         merged_estimator_spec.loss.values[device_id])
         self.assertEqual(created_estimator_specs[device_id].train_op,
-                         merged_estimator_spec.train_op.get(d))
+                         merged_estimator_spec.train_op.values[device_id])
         # Scaffold is populated by `EstimatorSpec.__new__`.
         self.assertEqual(created_estimator_specs[device_id].scaffold,
-                         merged_estimator_spec.scaffold.get(d))
+                         merged_estimator_spec.scaffold.values[device_id])
         self.assertIsInstance(created_estimator_specs[device_id].scaffold,
                               Scaffold)
         # Also test that we can undo the merge using select_replica()
@@ -401,28 +351,26 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
 
-    v, _, mirrored = _make_mirrored()
-
-    self.assertEqual(v[0].name, mirrored.name)
-    self.assertEqual(v[0].dtype, mirrored.dtype)
-    self.assertEqual(v[0].shape, mirrored.shape)
+    mirrored = _make_mirrored()
+    v = mirrored.values[0]
+    self.assertEqual(v.name, mirrored.name)
+    self.assertEqual(v.dtype, mirrored.dtype)
+    self.assertEqual(v.shape, mirrored.shape)
 
   @test_util.run_in_graph_and_eager_modes(config=config)
   def testVariableOnAnotherDevice(self):
     v = variable_scope.get_variable(
         name="v", initializer=[1.], use_resource=True)
-    device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
-    mirrored = values.MirroredVariable(None, device_map, (v,),
-                                       variable_scope.VariableAggregation.MEAN)
+    mirrored = values.MirroredVariable(
+        None, (v,), variable_scope.VariableAggregation.MEAN)
 
     self.assertEqual(v.name, mirrored.name)
     self.assertEqual(v.dtype, mirrored.dtype)
     self.assertEqual(v.shape, mirrored.shape)
 
-  def _assign_mirrored(self, devices, v, new):
-    for d, var, n in zip(devices, v, new):
-      with ops.device(d):
-        self.evaluate(var.assign(n))
+  def _assign_mirrored(self, v, new):
+    for var, n in zip(v.values, new):
+      self.evaluate(var.assign(n))
 
   def _save_return_saver(self, sess, var):
     saver = saver_lib.Saver(var_list=[var])
@@ -445,17 +393,17 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
       self.skipTest("A GPU is not available for this test in eager mode.")
 
     with self.cached_session(config=self.config) as sess:
-      v, device_map, mirrored = _make_mirrored()
-      devices = device_map.all_devices
+      mirrored = _make_mirrored()
+      v = mirrored.values
 
       # Overwrite the initial values.
-      self._assign_mirrored(devices, v, [3., 4.])
+      self._assign_mirrored(mirrored, [3., 4.])
 
       # Saves the current value of v[0], 3.
       save_path, saver = self._save_return_saver(sess, mirrored)
 
       # Change the values between save and restore.
-      self._assign_mirrored(devices, v, [5., 6.])
+      self._assign_mirrored(mirrored, [5., 6.])
 
       # Restores the saved value of 3. to both variables.
       saver.restore(sess, save_path)
@@ -464,17 +412,16 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
   def _save_mirrored(self):
     """Save variables with mirroring, returns save_path."""
     with self.session(graph=ops.Graph()) as sess:
-      v, device_map, mirrored = _make_mirrored()
-      devices = device_map.all_devices
+      mirrored = _make_mirrored()
 
       # Overwrite the initial values.
-      self._assign_mirrored(devices, v, [3., 4.])
+      self._assign_mirrored(mirrored, [3., 4.])
 
       # Saves the current value of v[0], 3.
       save_path = self._save(sess, mirrored)
 
       # Change the values between save and restore.
-      self._assign_mirrored(devices, v, [5., 6.])
+      self._assign_mirrored(mirrored, [5., 6.])
     return save_path
 
   def _save_normal(self):
@@ -510,11 +457,11 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
   def _restore_mirrored(self, save_path):
     """Restore to variables with mirroring in a fresh graph."""
     with self.session(graph=ops.Graph()) as sess:
-      v, device_map, mirrored = _make_mirrored()
-      devices = device_map.all_devices
+      mirrored = _make_mirrored()
+      v = mirrored.values
 
       # Overwrite the initial values.
-      self._assign_mirrored(devices, v, [7., 8.])
+      self._assign_mirrored(mirrored, [7., 8.])
 
       # Restores the saved value of 3. to both variables.
       saver = saver_lib.Saver(var_list=[mirrored])
@@ -572,8 +519,7 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
         v = variable_scope.get_variable(
             name="v", initializer=1., use_resource=True)
       mirrored = values.MirroredVariable(
-          distribution, values.ReplicaDeviceMap(("/device:GPU:0",)), (v,),
-          variable_scope.VariableAggregation.MEAN)
+          distribution, (v,), variable_scope.VariableAggregation.MEAN)
       sess.run(variables_lib.global_variables_initializer())
       sess.run({"complicated": mirrored})
 
@@ -734,6 +680,35 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
 
     foo()
 
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              strategy_combinations.mirrored_strategy_with_one_cpu,
+              strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
+              strategy_combinations.tpu_strategy,
+              strategy_combinations.central_storage_strategy_with_two_gpus,
+          ],
+          mode=["graph", "eager"]))
+  def testAggregationOnlyFirstReplica(self, distribution):
+    with distribution.scope():
+      v = variable_scope.variable(
+          15.,
+          synchronization=variables_lib.VariableSynchronization.ON_WRITE,
+          aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA)
+    self.evaluate(variables_lib.global_variables_initializer())
+
+    @def_function.function
+    def assign():
+      ctx = distribution_strategy_context.get_replica_context()
+      replica_id = ctx.replica_id_in_sync_group
+      return v.assign(math_ops.cast(replica_id, dtypes.float32))
+    per_replica_results = self.evaluate(distribution.experimental_local_results(
+        distribution.experimental_run_v2(assign)))
+    # The per-replica values should always match the first replicas value.
+    self.assertAllEqual(
+        array_ops.zeros(distribution.num_replicas_in_sync, dtypes.float32),
+        per_replica_results)
+
 
 _TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1)
 
@@ -744,7 +719,6 @@ def _make_replica_local(method, strategy=None):
   else:
     devices = strategy.extended.worker_devices
 
-  device_map = values.ReplicaDeviceMap(devices)
   v = []
   for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]):
     with ops.device(d):
@@ -755,7 +729,7 @@ def _make_replica_local(method, strategy=None):
     var_cls = values.TPUSyncOnReadVariable
   else:
     var_cls = values.SyncOnReadVariable
-  replica_local = var_cls(strategy, device_map, v, method)
+  replica_local = var_cls(strategy, v, method)
   return v, replica_local
 
 
@@ -777,20 +751,6 @@ class SyncOnReadVariablePropertiesTest(test.TestCase):
     self.assertEqual(variable_scope.VariableAggregation.SUM,
                      replica_local.aggregation)
 
-  @test_util.run_in_graph_and_eager_modes(config=config)
-  def testVariableOnAnotherDevice(self):
-    v = variable_scope.get_variable(
-        name="v", initializer=[1.], use_resource=True)
-    device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
-    replica_local = values.SyncOnReadVariable(
-        None, device_map, (v,), variable_scope.VariableAggregation.MEAN)
-
-    self.assertEqual(v.name, replica_local.name)
-    self.assertEqual(v.dtype, replica_local.dtype)
-    self.assertEqual(v.shape, replica_local.shape)
-    self.assertEqual(variable_scope.VariableAggregation.MEAN,
-                     replica_local.aggregation)
-
   def testTensorConversion(self):
     with context.graph_mode():
       _, replica_local = _make_replica_local(
@@ -812,9 +772,8 @@ class SyncOnReadVariablePropertiesTest(test.TestCase):
 
     v = variable_scope.get_variable(
         name="v", initializer=[1.], use_resource=True)
-    device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
     replica_local = values.SyncOnReadVariable(
-        None, device_map, (v,), variable_scope.VariableAggregation.MEAN)
+        None, (v,), variable_scope.VariableAggregation.MEAN)
     self.assertEqual(2., self.evaluate(add1(replica_local)))
 
 
@@ -827,6 +786,8 @@ def mirrored_and_tpu_strategy_combinations():
       mode=["graph", "eager"])
 
 
+# TODO(b/144432582): Add variable aggregation type to combinations to simplify
+# tests.
 def strategy_and_run_tf_function_combinations():
   # Test the combination of different strategies and whether a tf.function
   # is passed into strategy.experimental_run_v2."""
@@ -1193,6 +1154,68 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         expected = 0
       self.assertEqual(expected, result, aggregation)
 
+  # TODO(b/145574622): Re-enable this test once ReduceOp argument is
+  # respected on GPUs.
+  @combinations.generate(strategy_and_run_tf_function_combinations())
+  def disable_testAllReduce(self, distribution,
+                            experimental_run_tf_function):
+    with distribution.scope():
+      v = variable_scope.variable(
+          2.,
+          synchronization=variables_lib.VariableSynchronization.ON_WRITE,
+          aggregation=variables_lib.VariableAggregation.MEAN)
+    self.evaluate(variables_lib.global_variables_initializer())
+
+    def all_reduce():
+      ctx = distribution_strategy_context.get_replica_context()
+      replica_id = ctx.replica_id_in_sync_group
+      return ctx.all_reduce("SUM", v) + math_ops.cast(replica_id,
+                                                      dtypes.float32)
+
+    if experimental_run_tf_function:
+      all_reduce = def_function.function(all_reduce)
+
+    per_replica_results = self.evaluate(
+        distribution.experimental_local_results(
+            distribution.experimental_run_v2(all_reduce)))
+    expected_result = []
+    for i in range(distribution.num_replicas_in_sync):
+      expected_result.append(2.0 * distribution.num_replicas_in_sync +
+                             1.0 * i)
+    self.assertEqual(per_replica_results, tuple(expected_result))
+
+  @combinations.generate(strategy_and_run_tf_function_combinations())
+  def testAssignPerReplicaBeforeRead(self, distribution,
+                                     experimental_run_tf_function):
+    aggregations = [
+        variables_lib.VariableAggregation.SUM,
+        variables_lib.VariableAggregation.MEAN,
+        variables_lib.VariableAggregation.ONLY_FIRST_REPLICA,
+    ]
+    for aggregation in aggregations:
+      with distribution.scope():
+        v = variable_scope.variable(
+            0.,
+            synchronization=variables_lib.VariableSynchronization.ON_READ,
+            aggregation=aggregation)
+      self.evaluate(variables_lib.global_variables_initializer())
+
+      def assign(var=v):
+        ctx = distribution_strategy_context.get_replica_context()
+        replica_id = ctx.replica_id_in_sync_group
+        return var.assign(math_ops.cast(replica_id, dtypes.float32))
+
+      if experimental_run_tf_function:
+        assign = def_function.function(assign)
+
+      per_replica_results = self.evaluate(
+          distribution.experimental_local_results(
+              distribution.experimental_run_v2(assign)))
+      expected_result = []
+      for i in range(distribution.num_replicas_in_sync):
+        expected_result.append(1.0 * i)
+      self.assertEqual(per_replica_results, tuple(expected_result))
+
   @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution):
     with distribution.scope():
@@ -1224,6 +1247,7 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
     vals = self.evaluate(v[0].values)
     self.assertAllEqual(vals[0], vals[1])
 
+
 class MirroredTest(test.TestCase):
 
   def testAddOp(self):
@@ -1244,49 +1268,39 @@ class MirroredTest(test.TestCase):
 class PerReplicaTest(test.TestCase, parameterized.TestCase):
 
   def testTypeSpec(self):
-    device_map = values.SingleDeviceMap("CPU")
     vals = (constant_op.constant(1.),)
-    per_replica = values.PerReplica(device_map, vals)
+    per_replica = values.PerReplica(vals)
 
     spec = per_replica._type_spec
     self.assertEqual(spec._value_specs,
                      (tensor_spec.TensorSpec([], dtypes.float32),))
-    self.assertEqual(spec._device_map, per_replica.device_map)
-    self.assertEqual(spec._logical_device, per_replica.logical_device)
 
   def testTypeSpecRoundTrip(self):
-    device_map = values.SingleDeviceMap("CPU")
     vals = (constant_op.constant(1.),)
-    per_replica = values.PerReplica(device_map, vals)
+    per_replica = values.PerReplica(vals)
 
     spec = per_replica._type_spec
     tensor_list = spec._to_components(per_replica)
     reconstructed = spec._from_components(tensor_list)
 
-    self.assertEqual(per_replica.device_map, reconstructed.device_map)
-    self.assertEqual(per_replica.logical_device, reconstructed.logical_device)
     self.assertAllEqual(per_replica.values, reconstructed.values)
 
   def testTypeSpecNest(self):
-    device_map = values.ReplicaDeviceMap(["CPU:0", "CPU:1"])
     vals = (constant_op.constant(1.), constant_op.constant([5., 6.0]),)
-    per_replica = values.PerReplica(device_map, vals)
+    per_replica = values.PerReplica(vals)
 
     # Note: nest.map_structutre exercises nest.flatten and
     # nest.pack_sequence_as.
-    result = nest.map_structure(lambda t: t + 10, per_replica,
-                                expand_composites=True)
+    result = nest.map_structure(
+        lambda t: t + 10, per_replica, expand_composites=True)
 
-    self.assertEqual(per_replica.device_map, result.device_map)
-    self.assertEqual(per_replica.logical_device, result.logical_device)
     self.assertLen(result.values, 2)
     self.assertAllEqual(result.values[0], 11.)
     self.assertAllEqual(result.values[1], [15., 16.0])
 
   @test_util.run_in_graph_and_eager_modes
   def testIsGraphTensor(self):
-    per_replica = values.PerReplica(values.SingleDeviceMap("CPU"),
-                                    (constant_op.constant(1.),))
+    per_replica = values.PerReplica((constant_op.constant(1.),))
     for t in nest.flatten(per_replica, expand_composites=True):
       self.assertEqual(hasattr(t, "graph"), not context.executing_eagerly())
 
@@ -1298,8 +1312,7 @@ class PerReplicaTest(test.TestCase, parameterized.TestCase):
       traces.append(None)  # Only happens on trace.
       return x
 
-    per_replica = values.PerReplica(
-        values.SingleDeviceMap("CPU"), (constant_op.constant(1.),))
+    per_replica = values.PerReplica((constant_op.constant(1.),))
 
     # Trace once.
     f(per_replica)
@@ -1315,14 +1328,11 @@ class PerReplicaTest(test.TestCase, parameterized.TestCase):
       output = f(per_replica)
       self.assertIsInstance(output, values.PerReplica)
       self.assertAllEqual(output._values, per_replica._values)
-      self.assertAllEqual(output._device_map, per_replica._device_map)
-      self.assertAllEqual(output._logical_device, per_replica._logical_device)
       self.assertEmpty(traces)  # Make sure we're not re-tracing `f`.
 
   def testFunctionCanReturnPerReplica(self):
     f = def_function.function(lambda x: x)
-    x = values.PerReplica(
-        values.SingleDeviceMap("CPU"), (constant_op.constant(1.),))
+    x = values.PerReplica((constant_op.constant(1.),))
     y = f(x)
     self.assertIsNot(x, y)
     nest.map_structure(self.assertAllEqual, x, y, expand_composites=True)
@@ -1330,40 +1340,32 @@ class PerReplicaTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testCondWithTensorValues(self):
-    device_map = values.SingleDeviceMap("CPU")
-    per_replica_1 = values.PerReplica(device_map, (constant_op.constant("a"),))
-    per_replica_2 = values.PerReplica(device_map,
-                                      (constant_op.constant(["b", "c"]),))
+    per_replica_1 = values.PerReplica((constant_op.constant("a"),))
+    per_replica_2 = values.PerReplica((constant_op.constant(["b", "c"]),))
     condition = array_ops.placeholder_with_default(True, [])
 
     result = control_flow_ops.cond(
         condition, lambda: per_replica_1, lambda: per_replica_2)
 
-    self.assertEqual(per_replica_1.device_map, result.device_map)
-    self.assertEqual(per_replica_1.logical_device, result.logical_device)
     self.assertLen(result.values, 1)
     self.assertAllEqual(result.values[0], "a")
 
   @test_util.run_in_graph_and_eager_modes
   def testCondWithValuesConvertibleToTensor(self):
-    device_map = values.SingleDeviceMap("CPU")
-    per_replica_1 = values.PerReplica(device_map, ("a",))
-    per_replica_2 = values.PerReplica(device_map, ("b",))
+    per_replica_1 = values.PerReplica(("a",))
+    per_replica_2 = values.PerReplica(("b",))
     condition = array_ops.placeholder_with_default(True, [])
 
     result = control_flow_ops.cond(
         condition, lambda: per_replica_1, lambda: per_replica_2)
 
-    self.assertEqual(per_replica_1.device_map, result.device_map)
-    self.assertEqual(per_replica_1.logical_device, result.logical_device)
     self.assertLen(result.values, 1)
     self.assertAllEqual(result.values[0], "a")
 
   @test_util.build_as_function_and_v1_graph
   def testCondWithValuesNotConvertibleToTensor(self):
-    device_map = values.SingleDeviceMap("CPU")
-    per_replica_1 = values.PerReplica(device_map, (set(["a"]),))
-    per_replica_2 = values.PerReplica(device_map, (set(["b", "c"]),))
+    per_replica_1 = values.PerReplica(({"a"},))
+    per_replica_2 = values.PerReplica(({"b", "c"},))
     condition = array_ops.placeholder(dtypes.bool, [])
 
     with self.assertRaisesRegex(TypeError, "Could not build a TypeSpec for"):
@@ -1371,88 +1373,5 @@ class PerReplicaTest(test.TestCase, parameterized.TestCase):
           condition, lambda: per_replica_1, lambda: per_replica_2)
 
 
-class WorkerDeviceMapTest(test.TestCase, parameterized.TestCase):
-
-  class ReplicaContext(object):
-
-    def __init__(self, replica_id_in_sync_group):
-      self.replica_id_in_sync_group = replica_id_in_sync_group
-
-  def testBasic(self):
-    devices = [
-        "/job:worker/replica:0/task:0/device:CPU:0",
-        "/job:worker/replica:0/task:2/device:CPU:0"
-    ]
-    device_map = values.WorkerDeviceMap(devices, 1)
-    self.assertAllEqual(devices, device_map.all_devices)
-
-    # pylint:disable=pointless-statement
-    with self.assertRaisesWithPredicateMatch(
-        ValueError, "`WorkerDeviceMap` is not indexed by replicas"):
-      device_map.devices_by_replica
-
-    self.assertEqual(1, device_map.num_logical_devices)
-
-    self.assertEqual(2, device_map.num_replicas_in_graph)
-
-    self.assertEqual(0, device_map.logical_device_from_values(["a", "b"]))
-
-    self.assertAllEqual(devices, device_map.logical_to_actual_devices(0))
-
-    replica_context = WorkerDeviceMapTest.ReplicaContext(1)
-    self.assertEqual(
-        "b", device_map.select_for_current_replica(["a", "b"], replica_context))
-
-    with self.assertRaisesWithPredicateMatch(
-        ValueError, "`WorkerDeviceMap` not indexed by replicas"):
-      device_map.replica_for_device(devices[1])
-
-    self.assertEqual("b", device_map.select_for_device(["a", "b"], devices[1]))
-
-    with self.assertRaisesWithPredicateMatch(
-        ValueError, "WorkerDeviceMap not indexed by replicas"):
-      device_map.is_device_in_replica(devices[1], 1)
-
-    self.assertEqual(
-        "WorkerDeviceMap(('/job:worker/replica:0/task:0/device:CPU:0', "
-        "'/job:worker/replica:0/task:2/device:CPU:0'), "
-        "num_replicas_per_worker=1)", repr(device_map))
-
-  def testMultipleReplicasPerWorker(self):
-    devices = [
-        "/job:worker/replica:0/task:0/device:CPU:0",
-        "/job:worker/replica:0/task:2/device:CPU:0"
-    ]
-    device_map = values.WorkerDeviceMap(devices, 2)
-
-    replica_context = WorkerDeviceMapTest.ReplicaContext(3)
-    self.assertEqual(
-        "b", device_map.select_for_current_replica(["a", "b"], replica_context))
-
-  @combinations.generate(
-      combinations.combine(
-          distribution=[
-              strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
-              strategy_combinations.tpu_strategy,
-          ],
-          mode=["graph", "eager"]))
-  def testExperimentalLocalResultsOrder(self, distribution):
-    # Create 2 devices in the device map, where the alphabetical order and the
-    # actual order of devices are different.
-    device_map = values.ReplicaDeviceMap(["CPU:2", "CPU:10"])
-    vals = (
-        constant_op.constant(1.),
-        constant_op.constant([5., 6.0]),
-    )
-    per_replica = values.PerReplica(device_map, vals)
-    results = self.evaluate(
-        distribution.experimental_local_results(per_replica))
-
-    # We expect the outputs order the same as the inputs order.
-    self.assertLen(results, 2)
-    self.assertAllEqual(1.0, results[0])
-    self.assertAllEqual([5., 6.], results[1])
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/distribute/warm_starting_util_test.py b/tensorflow/python/distribute/warm_starting_util_test.py
index c312ca8f82e..681b01fbbcd 100644
--- a/tensorflow/python/distribute/warm_starting_util_test.py
+++ b/tensorflow/python/distribute/warm_starting_util_test.py
@@ -26,6 +26,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 from absl.testing import parameterized
 
 from tensorflow.python.distribute import combinations
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index d869a3b627e..809b4a832b3 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -56,6 +56,18 @@ cc_library(
     ],
 )
 
+filegroup(
+    name = "pywrap_eager_hdrs",
+    srcs = [
+        "pywrap_tensor_conversion.h",
+        "pywrap_tfe.h",
+    ],
+    visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/python:__pkg__",
+    ],
+)
+
 # Transitive dependencies of this target will be included in the pip package.
 py_library(
     name = "eager_pip",
@@ -90,7 +102,7 @@ py_library(
     deps = [
         ":context",
         "//tensorflow/python:errors",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
     ],
 )
 
@@ -100,7 +112,7 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
     ],
 )
 
@@ -121,7 +133,7 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
     ],
 )
 
@@ -131,13 +143,14 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
+        ":eager_util",
         ":executor",
         ":monitoring",
         "//tensorflow/python:device",
         "//tensorflow/python:device_spec",
         "//tensorflow/python:errors",
         "//tensorflow/python:platform",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:tf2",
         "//tensorflow/python:util",
         "//third_party/py/numpy",
@@ -164,8 +177,8 @@ py_library(
         "//third_party/py/tf_agents:__subpackages__",
     ],
     deps = [
-        "//tensorflow/python:c_api_util",
-        "//tensorflow/python:pywrap_tensorflow",
+        ":eager_util",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:util",
     ],
 )
@@ -187,7 +200,8 @@ py_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":context",
-        "//tensorflow/python:pywrap_tensorflow",
+        ":eager_util",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:util",
     ],
 )
@@ -209,7 +223,8 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
-        "//tensorflow/python:pywrap_tensorflow",
+        ":eager_util",
+        "//tensorflow/python:pywrap_tfe",
     ],
 )
 
@@ -250,7 +265,6 @@ cuda_py_test(
     srcs = ["backprop_test.py"],
     python_version = "PY3",
     tags = [
-        "no_rocm",
         "no_windows",  #TODO(b/139745667)
     ],
     deps = [
@@ -298,7 +312,7 @@ cuda_py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//third_party/py/numpy",
     ],
 )
@@ -410,7 +424,7 @@ py_library(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:lib",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
         "@six_archive//:six",
@@ -496,7 +510,7 @@ py_library(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:unconnected_gradients",
         "//tensorflow/python:util",
@@ -524,7 +538,7 @@ py_library(
     deps = [
         ":forwardprop_util",
         "//tensorflow/python:platform",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:util",
     ],
 )
@@ -535,7 +549,18 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
+    ],
+)
+
+py_library(
+    name = "eager_util",
+    srcs = ["eager_util.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:internal"],
+    deps = [
+        "//tensorflow/python:pywrap_tfe",
+        "//tensorflow/python:util",
     ],
 )
 
@@ -552,7 +577,7 @@ cuda_py_test(
         ":remote",
         ":test",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:random_ops",
         "//tensorflow/python/keras",
         "//third_party/py/numpy",
@@ -637,7 +662,7 @@ tf_py_test(
         ":test",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:random_ops",
         "//tensorflow/python:test_ops",
         "//third_party/py/numpy",
@@ -649,7 +674,7 @@ py_library(
     srcs = ["imperative_grad.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:pywrap_tfe",
         "//tensorflow/python:unconnected_gradients",
         "//tensorflow/python:util",
     ],
@@ -750,6 +775,8 @@ tf_xla_py_test(
         "//tensorflow/python:constant_op",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:control_flow_util",
+        # TODO(b/145618471): Remove this transitive dependency.
+        "//tensorflow/python/distribute:input_lib",
         "//tensorflow/python:framework_ops",
     ],
 )
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index e2a4992996f..d51597f4cbe 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -24,7 +24,7 @@ import sys
 
 import six
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python import _pywrap_utils
 from tensorflow.python.eager import backprop_util
 from tensorflow.python.eager import context
@@ -71,19 +71,25 @@ def op_attr_type(op_type, attr_name):
   except KeyError:
     context.ensure_initialized()
     h = context.context()._handle  # pylint: disable=protected-access
-    attr_type = pywrap_tensorflow.TFE_OpNameGetAttrType(h, op_type, attr_name)
+    attr_type = pywrap_tfe.TFE_OpNameGetAttrType(h, op_type, attr_name)
   _op_attr_type_cache[(op_type, attr_name)] = attr_type
   return attr_type
 
 
 def make_attr(attr_type, value):
-  if attr_type == pywrap_tensorflow.TF_ATTR_TYPE:
+  # pybind11 enums do not return the raw value like SWIG enums do. They are
+  # useful when comparing amongst each other but not direct integers as we are
+  # doing in most tests.
+  # https://pybind11.readthedocs.io/en/stable/classes.html#enumerations-and-internal-types
+  # TODO(amitpatankar): After all SWIG transitions, convert the enum comparisons
+  # from integer value to class.
+  if attr_type == int(pywrap_tfe.TF_ATTR_TYPE):
     return dtypes.as_dtype(value)
-  elif attr_type == [pywrap_tensorflow.TF_ATTR_TYPE]:
+  elif attr_type == [int(pywrap_tfe.TF_ATTR_TYPE)]:
     return [dtypes.as_dtype(v) for v in value]
-  elif attr_type == pywrap_tensorflow.TF_ATTR_SHAPE:
+  elif attr_type == int(pywrap_tfe.TF_ATTR_SHAPE):
     return tensor_shape.as_shape(value).as_proto()
-  elif attr_type == [pywrap_tensorflow.TF_ATTR_SHAPE]:
+  elif attr_type == [int(pywrap_tfe.TF_ATTR_SHAPE)]:
     return [tensor_shape.as_shape(v).as_proto() for v in value]
   elif isinstance(value, str):
     return value.encode()
@@ -141,16 +147,15 @@ def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs,
   return grad_fn(mock_op, *out_grads)
 
 
-pywrap_tensorflow.TFE_Py_RegisterGradientFunction(_gradient_function)
+pywrap_tfe.TFE_Py_RegisterGradientFunction(_gradient_function)
 
 
 def _must_record_gradient():
-  return not pywrap_tensorflow.TFE_Py_TapeSetIsEmpty()
+  return not pywrap_tfe.TFE_Py_TapeSetIsEmpty()
 
 
 def _record_gradient(op_name, inputs, attrs, results):
-  return pywrap_tensorflow.TFE_Py_RecordGradient(op_name, inputs, attrs,
-                                                 results)
+  return pywrap_tfe.TFE_Py_RecordGradient(op_name, inputs, attrs, results)
 
 
 execute.must_record_gradient = _must_record_gradient
@@ -688,7 +693,7 @@ _default_vspace = imperative_grad.VSpace(
     zeros_like_fn=default_gradient.zeros_like,
     ones_like_fn=default_gradient.ones_like,
     graph_shape_fn=gen_array_ops.shape)
-pywrap_tensorflow.TFE_Py_RegisterVSpace(_default_vspace)
+pywrap_tfe.TFE_Py_RegisterVSpace(_default_vspace)
 
 
 def _handle_or_self(x):
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 62a808f44d7..3fdbec24bd0 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -21,7 +21,7 @@ import functools
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
@@ -127,20 +127,30 @@ class BackpropTest(test.TestCase, parameterized.TestCase):
   @parameterized.named_parameters(
       [('Function', def_function.function),
        ('NoFunction', lambda f: f)])
-  def testIdentityBehaviorConsistent(self, decorator):
+  def testNoOpBehaviorConsistent(self, decorator):
 
     @decorator
     def f(x):
+      # Test all different types of no-ops
       x1 = array_ops.identity(x)
+      x2 = math_ops.add_v2(x, 0)
+      x3 = math_ops.subtract(x, 0)
+      x4 = math_ops.multiply(x, 1)
       with backprop.GradientTape() as t:
         t.watch(x)
         t.watch(x1)
+        t.watch(x2)
+        t.watch(x3)
+        t.watch(x4)
         y1 = x * 2.
         y2 = x1 * 3.
-        loss = y1 + y2
-      return t.gradient(loss, [x, x1])
+        y3 = x2 * 3.
+        y4 = x3 * 3.
+        y5 = x4 * 3.
+        loss = y1 + y2 + y3 + y4 + y5
+      return t.gradient(loss, [x, x1, x2, x3, x4])
 
-    self.assertAllClose([2., 3.], f(constant_op.constant(10.)))
+    self.assertAllClose([2., 3., 3., 3., 3.], f(constant_op.constant(10.)))
 
   def testGradientInsideLoop(self):
     with ops.Graph().as_default():
@@ -1014,19 +1024,19 @@ class BackpropTest(test.TestCase, parameterized.TestCase):
 
   def testGetAttrType(self):
     typ = backprop.op_attr_type('Add', 'T')
-    self.assertEqual(typ, pywrap_tensorflow.TF_ATTR_TYPE)
+    self.assertEqual(typ, int(pywrap_tfe.TF_ATTR_TYPE))
 
   def testGetAttrList(self):
     typ = backprop.op_attr_type('MaxPool', 'ksize')
-    self.assertEqual(typ, [pywrap_tensorflow.TF_ATTR_INT])
+    self.assertEqual(typ, [int(pywrap_tfe.TF_ATTR_INT)])
 
   def testMakeAttrType(self):
     self.assertEqual(dtypes.float32,
-                     backprop.make_attr(pywrap_tensorflow.TF_ATTR_TYPE, 1))
+                     backprop.make_attr(int(pywrap_tfe.TF_ATTR_TYPE), 1))
 
   def testMakeAttrTypeList(self):
     self.assertEqual([dtypes.float32],
-                     backprop.make_attr([pywrap_tensorflow.TF_ATTR_TYPE], [1]))
+                     backprop.make_attr([int(pywrap_tfe.TF_ATTR_TYPE)], [1]))
 
   def testMulType(self):
 
@@ -1040,7 +1050,7 @@ class BackpropTest(test.TestCase, parameterized.TestCase):
   def testMakeAttrShape(self):
     for s in ([], None, [1, 2, 3], [None, None], [1, None, 3]):
       expected = tensor_shape.TensorShape(s).as_proto()
-      actual = backprop.make_attr(pywrap_tensorflow.TF_ATTR_SHAPE, s)
+      actual = backprop.make_attr(int(pywrap_tfe.TF_ATTR_SHAPE), s)
       self.assertEqual(
           expected,
           actual,
@@ -1051,7 +1061,7 @@ class BackpropTest(test.TestCase, parameterized.TestCase):
     shape_list = [[], None, [1, 2, 3], [None, None], [1, None, 3]]
     self.assertEqual(
         [tensor_shape.TensorShape(s).as_proto() for s in shape_list],
-        backprop.make_attr([pywrap_tensorflow.TF_ATTR_SHAPE], shape_list))
+        backprop.make_attr([int(pywrap_tfe.TF_ATTR_SHAPE)], shape_list))
 
   def testArgsGradientFunction(self):
 
diff --git a/tensorflow/python/eager/benchmarks/resnet50/BUILD b/tensorflow/python/eager/benchmarks/resnet50/BUILD
index 14f55d09c4c..6c63658e3c7 100644
--- a/tensorflow/python/eager/benchmarks/resnet50/BUILD
+++ b/tensorflow/python/eager/benchmarks/resnet50/BUILD
@@ -71,6 +71,7 @@ cuda_py_test(
         ":resnet50_test_util",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/eager:forwardprop",
+        "@six_archive//:six",
     ],
 )
 
diff --git a/tensorflow/python/eager/benchmarks/resnet50/hvp_test.py b/tensorflow/python/eager/benchmarks/resnet50/hvp_test.py
index 0e40cafdef6..c5282174ffa 100644
--- a/tensorflow/python/eager/benchmarks/resnet50/hvp_test.py
+++ b/tensorflow/python/eager/benchmarks/resnet50/hvp_test.py
@@ -22,6 +22,7 @@ import gc
 import time
 
 from absl.testing import parameterized
+from six.moves import xrange
 import tensorflow as tf
 
 from tensorflow.python.eager import forwardprop
diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py
index 79abb3bda5d..e7b90a19308 100644
--- a/tensorflow/python/eager/benchmarks_test.py
+++ b/tensorflow/python/eager/benchmarks_test.py
@@ -39,7 +39,7 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python import keras
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import backprop  # pylint: disable=unused-import
 from tensorflow.python.eager import context
@@ -65,6 +65,7 @@ from tensorflow.python.training import gradient_descent
 
 CPU = "/device:CPU:0"
 GPU = "/device:GPU:0"
+GLOBAL_TEST_VALUE = None
 
 
 def c_tfe_py_fastpath_execute(a,
@@ -76,10 +77,10 @@ def c_tfe_py_fastpath_execute(a,
   assert ctx.executing_eagerly(
   ), "The prototype doesn't contain C code for graph construction"
   try:
-    return pywrap_tensorflow.TFE_Py_FastPathExecute(
-        ctx._handle, ctx.device_name, "MatMul", name,
-        ctx.op_callbacks, a, b, "transpose_a", transpose_a,
-        "transpose_b", transpose_b)
+    return pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                             "MatMul", name, ctx.op_callbacks,
+                                             a, b, "transpose_a", transpose_a,
+                                             "transpose_b", transpose_b)
   except core._NotOkStatusException as e:
     if name is not None:
       message = e.message + " name: " + name
@@ -200,10 +201,20 @@ class MicroBenchmarks(test.Benchmark):
 
     self._run(func, 30000)
 
-  def _benchmark_create_constant(self, value, dtype):
-    def func():
+  def _benchmark_create_constant(self, value, dtype, cached=True):
+    global GLOBAL_TEST_VALUE
+    GLOBAL_TEST_VALUE = value
+
+    def cached_func():
       constant_op.constant(value, dtype=dtype)
 
+    def uncached_func():
+      global GLOBAL_TEST_VALUE
+      GLOBAL_TEST_VALUE += 1
+      constant_op.constant(GLOBAL_TEST_VALUE, dtype=dtype)
+
+    func = cached_func if cached else uncached_func
+
     with ops.device("GPU:0" if context.num_gpus() else "CPU:0"):
       for _ in range(1000):
         func()  # Warmup.
@@ -212,13 +223,22 @@ class MicroBenchmarks(test.Benchmark):
   def benchmark_create_float_constant(self):
     self._benchmark_create_constant(42.0, dtype=None)
 
+  def benchmark_create_float_constant_uncached(self):
+    self._benchmark_create_constant(42.0, dtype=None, cached=False)
+
   def benchmark_create_int32_constant(self):
     if context.num_gpus():
       return  # int32 constants are always allocated on CPU.
 
     self._benchmark_create_constant(42, dtype=dtypes.int32)
 
-  def _benchmark_add_scalars(self, a, b):
+  def benchmark_create_int32_constant_uncached(self):
+    if context.num_gpus():
+      return  # int32 constants are always allocated on CPU.
+
+    self._benchmark_create_constant(42, dtype=dtypes.int32, cached=False)
+
+  def _benchmark_add(self, a, b):
     def func():
       return memoryview(math_ops.add(a, b))
 
@@ -228,10 +248,30 @@ class MicroBenchmarks(test.Benchmark):
       self._run(func, 30000)
 
   def benchmark_add_float_scalars(self):
-    self._benchmark_add_scalars(42.0, 24.0)
+    self._benchmark_add(42.0, 24.0)
 
   def benchmark_add_int32_scalars(self):
-    self._benchmark_add_scalars(42, 24)
+    self._benchmark_add(42, 24)
+
+  def benchmark_add_float_scalar_tensor(self):
+    tensor_a = constant_op.constant(42.0)
+    tensor_b = constant_op.constant(24.0)
+    self._benchmark_add(tensor_a, tensor_b)
+
+  def benchmark_add_int32_scalar_tensor(self):
+    tensor_a = constant_op.constant(42)
+    tensor_b = constant_op.constant(24)
+    self._benchmark_add(tensor_a, tensor_b)
+
+  def benchmark_add_float_dense_tensor(self):
+    tensor_a = constant_op.constant([[42.0, 42.0], [42.0, 42.0]])
+    tensor_b = constant_op.constant([[24.0, 24.0], [24.0, 24.0]])
+    self._benchmark_add(tensor_a, tensor_b)
+
+  def benchmark_add_int32_dense_tensor(self):
+    tensor_a = constant_op.constant([[42, 42], [42, 42]])
+    tensor_b = constant_op.constant([[24, 24], [24, 24]])
+    self._benchmark_add(tensor_a, tensor_b)
 
   def benchmark_create_float_tensor_from_list_CPU(self):
     self._benchmark_create_tensor([[3.0]], dtypes.float32.as_datatype_enum, CPU)
@@ -339,8 +379,7 @@ class MicroBenchmarks(test.Benchmark):
     inputs = [m]
 
     def f():
-      pywrap_tensorflow.TFE_Py_Execute(ctx_handle, None, "Identity", inputs,
-                                       attrs, 1)
+      pywrap_tfe.TFE_Py_Execute(ctx_handle, None, "Identity", inputs, attrs, 1)
 
     self._run(f, 30000)
 
@@ -406,8 +445,7 @@ class MicroBenchmarks(test.Benchmark):
              m.dtype.as_datatype_enum)
 
     def func():
-      pywrap_tensorflow.TFE_Py_Execute(ctx_handle, device, "MatMul", inputs,
-                                       attrs, 1)
+      pywrap_tfe.TFE_Py_Execute(ctx_handle, device, "MatMul", inputs, attrs, 1)
 
     self._run(func, num_iters)
 
diff --git a/tensorflow/python/eager/cancellation.py b/tensorflow/python/eager/cancellation.py
index 308289b5826..e01ce384b76 100644
--- a/tensorflow/python/eager/cancellation.py
+++ b/tensorflow/python/eager/cancellation.py
@@ -18,27 +18,27 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 
 
 class CancellationManager(object):
   """A mechanism for cancelling blocking computation."""
 
   def __init__(self):
-    self._impl = pywrap_tensorflow.TFE_NewCancellationManager()
+    self._impl = pywrap_tfe.TFE_NewCancellationManager()
 
   @property
   def is_cancelled(self):
     """Returns `True` if `CancellationManager.start_cancel` has been called."""
-    return pywrap_tensorflow.TFE_CancellationManagerIsCancelled(self._impl)
+    return pywrap_tfe.TFE_CancellationManagerIsCancelled(self._impl)
 
   def start_cancel(self):
     """Cancels blocking operations that have been registered with this object."""
-    pywrap_tensorflow.TFE_CancellationManagerStartCancel(self._impl)
+    pywrap_tfe.TFE_CancellationManagerStartCancel(self._impl)
 
   def get_cancelable_function(self, concrete_function):
     # pylint: disable=protected-access
     return concrete_function._experimental_with_cancellation_manager(self)
 
   def __del__(self):
-    pywrap_tensorflow.TFE_DeleteCancellationManager(self._impl)
+    pywrap_tfe.TFE_DeleteCancellationManager(self._impl)
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index b18f3ebad37..e0fb805500b 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -23,17 +23,18 @@ import contextlib
 import copy
 import random
 import threading
+
 from absl import logging
 import numpy as np
 import six
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python import tf2
+from tensorflow.python.eager import eager_util as c_api_util
 from tensorflow.python.eager import executor
 from tensorflow.python.eager import monitoring
-from tensorflow.python.framework import c_api_util
 from tensorflow.python.framework import device as pydev
 from tensorflow.python.util import compat
 from tensorflow.python.util import is_in_graph_mode
@@ -54,17 +55,17 @@ _starting_device_spec = pydev.DeviceSpec.from_string("")
 
 _MAXINT32 = 2**31 - 1
 
-DEVICE_PLACEMENT_EXPLICIT = pywrap_tensorflow.TFE_DEVICE_PLACEMENT_EXPLICIT
-DEVICE_PLACEMENT_WARN = pywrap_tensorflow.TFE_DEVICE_PLACEMENT_WARN
-DEVICE_PLACEMENT_SILENT = pywrap_tensorflow.TFE_DEVICE_PLACEMENT_SILENT
+DEVICE_PLACEMENT_EXPLICIT = pywrap_tfe.TFE_DEVICE_PLACEMENT_EXPLICIT
+DEVICE_PLACEMENT_WARN = pywrap_tfe.TFE_DEVICE_PLACEMENT_WARN
+DEVICE_PLACEMENT_SILENT = pywrap_tfe.TFE_DEVICE_PLACEMENT_SILENT
 DEVICE_PLACEMENT_SILENT_FOR_INT32 = (
-    pywrap_tensorflow.TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32)
+    pywrap_tfe.TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32)
 
 SYNC = 0
 ASYNC = 1
 
-MIRRORING_NONE = pywrap_tensorflow.TFE_MIRRORING_NONE
-MIRRORING_ALL = pywrap_tensorflow.TFE_MIRRORING_ALL
+MIRRORING_NONE = pywrap_tfe.TFE_MIRRORING_NONE
+MIRRORING_ALL = pywrap_tfe.TFE_MIRRORING_ALL
 
 _KEEP_ALIVE_SECS = 600
 
@@ -444,7 +445,7 @@ class Context(object):
     self._rng = random.Random(seed)
     # Also clear the kernel cache, to reset any existing seeds
     if self._context_handle is not None:
-      pywrap_tensorflow.TFE_ContextClearCaches(self._context_handle)
+      pywrap_tfe.TFE_ContextClearCaches(self._context_handle)
 
   def _internal_operation_seed(self):
     """Returns a fake operation seed.
@@ -463,12 +464,11 @@ class Context(object):
     # Store list of devices
     logical_devices = []
     context_devices = []
-    device_list = pywrap_tensorflow.TFE_ContextListDevices(
-        self._context_handle)
+    device_list = pywrap_tfe.TFE_ContextListDevices(self._context_handle)
     try:
       self._num_gpus = 0
-      for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)):
-        dev_name = pywrap_tensorflow.TF_DeviceListName(device_list, i)
+      for i in range(pywrap_tfe.TF_DeviceListCount(device_list)):
+        dev_name = pywrap_tfe.TF_DeviceListName(device_list, i)
         context_devices.append(pydev.canonical_name(dev_name))
         spec = pydev.DeviceSpec.from_string(dev_name)
         # If the job is localhost, we assume that the cluster has not yet been
@@ -477,14 +477,14 @@ class Context(object):
           spec = spec.replace(job=None, replica=None, task=None)
         logical_devices.append(
             LogicalDevice(name=spec.to_string(), device_type=spec.device_type))
-        dev_type = pywrap_tensorflow.TF_DeviceListType(device_list, i)
+        dev_type = pywrap_tfe.TF_DeviceListType(device_list, i)
         if dev_type == "GPU":
           self._num_gpus += 1
 
     finally:
       self._logical_devices = logical_devices
       self._context_devices = context_devices
-      pywrap_tensorflow.TF_DeleteDeviceList(device_list)
+      pywrap_tfe.TF_DeleteDeviceList(device_list)
 
   def ensure_initialized(self):
     """Initialize handle and devices if not already done so."""
@@ -494,36 +494,34 @@ class Context(object):
       if self._initialized:
         return
       assert self._context_devices is None
-      opts = pywrap_tensorflow.TFE_NewContextOptions()
+      opts = pywrap_tfe.TFE_NewContextOptions()
       try:
         config_str = self.config.SerializeToString()
-        pywrap_tensorflow.TFE_ContextOptionsSetConfig(opts, config_str)
+        pywrap_tfe.TFE_ContextOptionsSetConfig(opts, config_str)
         if self._device_policy is not None:
-          pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy(
+          pywrap_tfe.TFE_ContextOptionsSetDevicePlacementPolicy(
               opts, self._device_policy)
         if self._mirroring_policy is not None:
-          pywrap_tensorflow.TFE_ContextOptionsSetMirroringPolicy(
+          pywrap_tfe.TFE_ContextOptionsSetMirroringPolicy(
               opts, self._mirroring_policy)
         if self._default_is_async == ASYNC:
-          pywrap_tensorflow.TFE_ContextOptionsSetAsync(opts, True)
+          pywrap_tfe.TFE_ContextOptionsSetAsync(opts, True)
         if self._lazy_remote_inputs_copy is not None:
-          pywrap_tensorflow.TFE_ContextOptionsSetLazyRemoteInputsCopy(
+          pywrap_tfe.TFE_ContextOptionsSetLazyRemoteInputsCopy(
               opts, self._lazy_remote_inputs_copy)
-        context_handle = pywrap_tensorflow.TFE_NewContext(opts)
+        context_handle = pywrap_tfe.TFE_NewContext(opts)
       finally:
-        pywrap_tensorflow.TFE_DeleteContextOptions(opts)
+        pywrap_tfe.TFE_DeleteContextOptions(opts)
       assert not (self._server_def and self._collective_ops_server_def), (
           "Cannot enable remote execution as well as collective ops at the "
           "moment. If this is important to you, please file an issue.")
       if self._server_def is not None:
         server_def_str = self._server_def.SerializeToString()
-        pywrap_tensorflow.TFE_ContextSetServerDef(context_handle,
-                                                  _KEEP_ALIVE_SECS,
-                                                  server_def_str)
+        pywrap_tfe.TFE_ContextSetServerDef(context_handle, _KEEP_ALIVE_SECS,
+                                           server_def_str)
       elif self._collective_ops_server_def is not None:
         server_def_str = self._collective_ops_server_def.SerializeToString()
-        pywrap_tensorflow.TFE_EnableCollectiveOps(context_handle,
-                                                  server_def_str)
+        pywrap_tfe.TFE_EnableCollectiveOps(context_handle, server_def_str)
 
       self._context_handle = context_handle
       self._initialize_logical_devices()
@@ -532,7 +530,7 @@ class Context(object):
   def _clear_caches(self):
     self.ones_rank_cache().flush()
     self.zeros_cache().flush()
-    pywrap_tensorflow.TFE_ClearScalarCache()
+    pywrap_tfe.TFE_ClearScalarCache()
 
   def get_server_def(self):
     return self._server_def
@@ -563,8 +561,8 @@ class Context(object):
 
     if self._context_handle:
       server_def_str = server_def.SerializeToString()
-      pywrap_tensorflow.TFE_ContextSetServerDef(self._context_handle,
-                                                keep_alive_secs, server_def_str)
+      pywrap_tfe.TFE_ContextSetServerDef(self._context_handle, keep_alive_secs,
+                                         server_def_str)
       self._initialize_logical_devices()
 
     # Clear all the caches in case there are remote tensors in them.
@@ -592,9 +590,12 @@ class Context(object):
 
     if self._context_handle:
       server_def_str = server_def.SerializeToString()
-      pywrap_tensorflow.TFE_ContextUpdateServerDef(self._context_handle,
-                                                   keep_alive_secs,
-                                                   server_def_str)
+      # Current executor might have pending nodes that involves updated remote
+      # devices. Wait for them to finish before updating.
+      self.executor.wait()
+      self.executor.clear_error()
+      pywrap_tfe.TFE_ContextUpdateServerDef(self._context_handle,
+                                            keep_alive_secs, server_def_str)
       self._initialize_logical_devices()
 
     self._clear_caches()
@@ -614,8 +615,7 @@ class Context(object):
     """
     # TODO(yuefengz): support checking multiple workers.
     if self._context_handle:
-      return pywrap_tensorflow.TFE_ContextCheckAlive(self._context_handle,
-                                                     worker_name)
+      return pywrap_tfe.TFE_ContextCheckAlive(self._context_handle, worker_name)
     else:
       raise ValueError("Context is not initialized.")
 
@@ -808,8 +808,8 @@ class Context(object):
         self.executor.wait()
         executor_new = executor.new_executor(enable_async)
         self._thread_local_data.executor = executor_new
-        pywrap_tensorflow.TFE_ContextSetExecutorForThread(
-            self._context_handle, executor_new.handle())
+        pywrap_tfe.TFE_ContextSetExecutorForThread(self._context_handle,
+                                                   executor_new.handle())
       else:
         self._default_is_async = enable_async
 
@@ -823,13 +823,12 @@ class Context(object):
   def executor(self):
     ensure_initialized()
     return executor.Executor(
-        pywrap_tensorflow.TFE_ContextGetExecutorForThread(self._context_handle))
+        pywrap_tfe.TFE_ContextGetExecutorForThread(self._context_handle))
 
   @executor.setter
   def executor(self, e):
     ensure_initialized()
-    pywrap_tensorflow.TFE_ContextSetExecutorForThread(self._context_handle,
-                                                      e.handle())
+    pywrap_tfe.TFE_ContextSetExecutorForThread(self._context_handle, e.handle())
 
   @property
   def config(self):
@@ -1015,7 +1014,7 @@ class Context(object):
       fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper).
     """
     self.ensure_initialized()
-    pywrap_tensorflow.TFE_ContextAddFunction(self._handle, fn)
+    pywrap_tfe.TFE_ContextAddFunction(self._handle, fn)
 
   def add_function_def(self, fdef):
     """Add a function definition to the context.
@@ -1028,8 +1027,8 @@ class Context(object):
     """
     self.ensure_initialized()
     fdef_string = fdef.SerializeToString()
-    pywrap_tensorflow.TFE_ContextAddFunctionDef(
-        self._handle, fdef_string, len(fdef_string))
+    pywrap_tfe.TFE_ContextAddFunctionDef(self._handle, fdef_string,
+                                         len(fdef_string))
 
   def remove_function(self, name):
     """Remove a function from the context.
@@ -1040,12 +1039,12 @@ class Context(object):
       name: function signature name.
     """
     self.ensure_initialized()
-    pywrap_tensorflow.TFE_ContextRemoveFunction(self._handle, name)
+    pywrap_tfe.TFE_ContextRemoveFunction(self._handle, name)
 
   def has_function(self, name):
     """Check if a function `name` is registered."""
     self.ensure_initialized()
-    return bool(pywrap_tensorflow.TFE_ContextHasFunction(self._handle, name))
+    return bool(pywrap_tfe.TFE_ContextHasFunction(self._handle, name))
 
   def add_op_callback(self, callback):
     """Add a post-op callback to the context.
@@ -1101,7 +1100,7 @@ class Context(object):
       if self._physical_devices is not None:
         return
 
-      devs = pywrap_tensorflow.TF_ListPhysicalDevices()
+      devs = pywrap_tfe.TF_ListPhysicalDevices()
       self._physical_devices = [
           PhysicalDevice(name=d.decode(),
                          device_type=d.decode().split(":")[1]) for d in devs]
@@ -1434,7 +1433,7 @@ class Context(object):
   def device_policy(self):
     # Only get the policy from the context if it has already been initialized
     if self._context_handle is not None:
-      return pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy(self._handle)
+      return pywrap_tfe.TFE_ContextGetDevicePlacementPolicy(self._handle)
 
     return self._device_policy
 
@@ -1448,14 +1447,14 @@ class Context(object):
 
       # Only set the policy if the context has already been initialized
       if self._context_handle is not None:
-        pywrap_tensorflow.TFE_ContextSetThreadLocalDevicePlacementPolicy(
+        pywrap_tfe.TFE_ContextSetThreadLocalDevicePlacementPolicy(
             self._handle, self._device_policy)
 
   @property
   def mirroring_policy(self):
     # Only get the policy from the context if it has already been initialized
     if self._context_handle is not None:
-      return pywrap_tensorflow.TFE_ContextGetMirroringPolicy(self._handle)
+      return pywrap_tfe.TFE_ContextGetMirroringPolicy(self._handle)
 
     return self._mirroring_policy
 
@@ -1464,12 +1463,12 @@ class Context(object):
     if policy is None:
       policy = MIRRORING_NONE
 
-    if self._mirroring_policy != policy:
+    if self._mirroring_policy is None or self._mirroring_policy != policy:
       self._mirroring_policy = policy
 
       # Only set the policy if the context has already been initialized
       if self._context_handle is not None:
-        pywrap_tensorflow.TFE_ContextSetThreadLocalMirroringPolicy(
+        pywrap_tfe.TFE_ContextSetThreadLocalMirroringPolicy(
             self._handle, self._mirroring_policy)
 
   @property
@@ -1495,13 +1494,13 @@ class Context(object):
     and to stop tracing call context.disable_run_metadata().
     """
     self.ensure_initialized()
-    pywrap_tensorflow.TFE_ContextEnableRunMetadata(self._handle)
+    pywrap_tfe.TFE_ContextEnableRunMetadata(self._handle)
 
   def disable_run_metadata(self):
     """Disables tracing of op execution via RunMetadata."""
     if not self._context_handle:
       return
-    pywrap_tensorflow.TFE_ContextDisableRunMetadata(self._context_handle)
+    pywrap_tfe.TFE_ContextDisableRunMetadata(self._context_handle)
 
   def enable_graph_collection(self):
     """Enables graph collection of executed functions.
@@ -1510,13 +1509,13 @@ class Context(object):
     and to stop collecting graphs call context.disable_graph_collection().
     """
     self.ensure_initialized()
-    pywrap_tensorflow.TFE_ContextEnableGraphCollection(self._handle)
+    pywrap_tfe.TFE_ContextEnableGraphCollection(self._handle)
 
   def disable_graph_collection(self):
     """Disables graph collection of executed functions."""
     if not self._context_handle:
       return
-    pywrap_tensorflow.TFE_ContextDisableGraphCollection(self._context_handle)
+    pywrap_tfe.TFE_ContextDisableGraphCollection(self._context_handle)
 
   def export_run_metadata(self):
     """Returns a RunMetadata proto with accumulated information.
@@ -1530,9 +1529,8 @@ class Context(object):
     if not self._context_handle:
       return None
     with c_api_util.tf_buffer() as buffer_:
-      pywrap_tensorflow.TFE_ContextExportRunMetadata(
-          self._context_handle, buffer_)
-      proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
+      pywrap_tfe.TFE_ContextExportRunMetadata(self._context_handle, buffer_)
+      proto_data = pywrap_tfe.TF_GetBuffer(buffer_)
     run_metadata = config_pb2.RunMetadata()
     run_metadata.ParseFromString(compat.as_bytes(proto_data))
     return run_metadata
@@ -1543,10 +1541,10 @@ class Context(object):
     return self._context_switches
 
   def start_step(self):
-    pywrap_tensorflow.TFE_ContextStartStep(self._handle)
+    pywrap_tfe.TFE_ContextStartStep(self._handle)
 
   def end_step(self):
-    pywrap_tensorflow.TFE_ContextEndStep(self._handle)
+    pywrap_tfe.TFE_ContextEndStep(self._handle)
 
 
 class _EagerDeviceContext(object):
@@ -1608,7 +1606,7 @@ _context_lock = threading.Lock()
 
 def _set_context_locked(ctx):
   global _context
-  pywrap_tensorflow.TFE_Py_SetEagerContext(ctx)
+  pywrap_tfe.TFE_Py_SetEagerContext(ctx)
   _context = ctx
 
 
@@ -1911,16 +1909,19 @@ def set_execution_mode(mode):
 @tf_contextlib.contextmanager
 def execution_mode(mode):
   """Context manager for setting execution mode for current thread."""
-  ctx = context()
-  executor_new = executor.new_executor(mode == ASYNC)
-  executor_old = ctx.executor
-  try:
-    executor_old.wait()
-    ctx.executor = executor_new
+  if mode is None:
     yield
-  finally:
-    ctx.executor = executor_old
-    executor_new.wait()
+  else:
+    ctx = context()
+    executor_new = executor.new_executor(mode == ASYNC)
+    executor_old = ctx.executor
+    try:
+      executor_old.wait()
+      ctx.executor = executor_new
+      yield
+    finally:
+      ctx.executor = executor_old
+      executor_new.wait()
 
 
 @tf_contextlib.contextmanager
diff --git a/tensorflow/python/eager/core.py b/tensorflow/python/eager/core.py
index e168b4bd5ff..5216afd12e8 100644
--- a/tensorflow/python/eager/core.py
+++ b/tensorflow/python/eager/core.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.framework import errors
 
 # Trace of execution and memory usage.
@@ -46,7 +46,7 @@ class _NotOkStatusException(Exception):
     return "%s: %s" % (e.__class__.__name__, e)
 
 
-pywrap_tensorflow.TFE_Py_RegisterExceptionClass(_NotOkStatusException)
+pywrap_tfe.TFE_Py_RegisterExceptionClass(_NotOkStatusException)
 
 
 class _FallbackException(Exception):
@@ -71,4 +71,4 @@ class _SymbolicException(Exception):
   pass
 
 
-pywrap_tensorflow.TFE_Py_RegisterFallbackExceptionClass(_FallbackException)
+pywrap_tfe.TFE_Py_RegisterFallbackExceptionClass(_FallbackException)
diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py
index ca224fc0f05..8993efd4085 100644
--- a/tensorflow/python/eager/core_test.py
+++ b/tensorflow/python/eager/core_test.py
@@ -26,7 +26,7 @@ import threading
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import context
 from tensorflow.python.eager import core
 from tensorflow.python.eager import def_function
@@ -602,8 +602,8 @@ class TFETest(test_util.TensorFlowTestCase):
 
   def testRegisterExceptionClass(self):
     with self.assertRaises(TypeError):
-      pywrap_tensorflow.TFE_Py_RegisterExceptionClass(str)
-    pywrap_tensorflow.TFE_Py_RegisterExceptionClass(core._NotOkStatusException)  # pylint: disable=protected-access
+      pywrap_tfe.TFE_Py_RegisterExceptionClass(str)
+    pywrap_tfe.TFE_Py_RegisterExceptionClass(core._NotOkStatusException)  # pylint: disable=protected-access
 
   # TODO(agarwal): add tests passing incorrect typed values to attrs.
   def testExecuteBasic(self):
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 2c1ced2b87d..b6027fa117f 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -405,7 +405,7 @@ class Function(object):
     self._implements = experimental_implements
     self._autograph = autograph
     self._experimental_autograph_options = experimental_autograph_options
-    self.experimental_relax_shapes = experimental_relax_shapes
+    self._experimental_relax_shapes = experimental_relax_shapes
     self._experimental_compile = experimental_compile
     self._created_variables = None  # GUARDED_BY(self._lock)
     self._stateful_fn = None  # GUARDED_BY(self._lock)
@@ -449,7 +449,9 @@ class Function(object):
     if self._implements is not None:
       attributes[function_lib.IMPLEMENTS_ATTRIBUTE_NAME] = self._implements
     if self._experimental_compile is not None:
-      attributes.update(_XlaCompile=bool(self._experimental_compile))
+      attributes.update(_XlaMustCompile=bool(self._experimental_compile))
+      if self._experimental_compile:
+        attributes.update(_noinline=True)
     if not attributes:
       attributes = None
     return function_lib.defun_with_attributes(
@@ -458,7 +460,7 @@ class Function(object):
         attributes=attributes,
         autograph=self._autograph,
         experimental_autograph_options=self._experimental_autograph_options,
-        experimental_relax_shapes=self.experimental_relax_shapes)
+        experimental_relax_shapes=self._experimental_relax_shapes)
 
   def _initialize(self, args, kwds, add_initializers_to=None):
     """Initializes, on the first call.
@@ -514,7 +516,7 @@ class Function(object):
         autograph=self._autograph,
         experimental_implements=self._implements,
         experimental_autograph_options=self._experimental_autograph_options,
-        experimental_relax_shapes=self.experimental_relax_shapes,
+        experimental_relax_shapes=self._experimental_relax_shapes,
         experimental_compile=self._experimental_compile)
 
   def _decorate(self, decorator):
@@ -793,7 +795,7 @@ class Function(object):
     """Returns all concrete functions for serialization.
 
     Returns:
-      A list of instances of `Function`.
+      A list of instances of `ConcreteFunction`.
     """
     if self.input_signature is not None:
       self.get_concrete_function()
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index b558412fd9a..cb0c239ceee 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -681,7 +681,7 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(autograph, cloned._autograph)
     self.assertEqual(implements, cloned._implements)
     self.assertEqual(autograph_options, cloned._experimental_autograph_options)
-    self.assertEqual(relax_shapes, cloned.experimental_relax_shapes)
+    self.assertEqual(relax_shapes, cloned._experimental_relax_shapes)
     self.assertEqual(compile_, cloned._experimental_compile)
 
     # This test does not run with XLA JIT support linked in so we can only check
diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py
index 5338725f88d..c69b5fe512e 100644
--- a/tensorflow/python/eager/def_function_xla_jit_test.py
+++ b/tensorflow/python/eager/def_function_xla_jit_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -45,6 +46,79 @@ class DefFunctionTest(test.TestCase):
       # XLA support is not yet enabled for TF ROCm
       self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1))
 
+  def testBasicInt32(self):
+
+    def fn(x, a):
+      return x + a
+
+    xla_func = def_function.function(fn, experimental_compile=True)
+
+    inputs = constant_op.constant([1, 2, 2, 3, 3], dtype=dtypes.int32)
+    if not test.is_built_with_rocm():
+      # XLA support is not yet enabled for TF ROCm
+      self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1))
+
+  def testDerivative(self):
+    if test.is_built_with_rocm():
+      return
+
+    def fn(x, a):
+      return 2 * x + a
+
+    xla_func = def_function.function(fn, experimental_compile=True)
+
+    with backprop.GradientTape() as tape:
+      inputs = constant_op.constant([1., 2., 2., 3., 3.])
+      tape.watch(inputs)
+      outputs = xla_func(inputs, 1)
+
+    self.assertAllClose([2, 2, 2, 2, 2], tape.gradient(outputs, inputs))
+
+    # pylint: disable=protected-access
+    (forward, backward) = xla_func.get_concrete_function(
+        inputs, 1)._delayed_rewrite_functions.forward_backward()
+
+    # Check that the must-compile attribute gets correctly propagated to the
+    # created derivatives.
+    self.assertTrue(backward.function_def.attr['_XlaMustCompile'])
+    self.assertTrue(forward.definition.attr['_XlaMustCompile'])
+
+  # Calling function with experimental_compile=True from
+  # experimental_compile=False should compile the inner func.
+  def testNestedCall(self):
+
+    def fn(x, a):
+      return x + a
+
+    xla_func = def_function.function(fn, experimental_compile=True)
+
+    def fn2(x, a):
+      return xla_func(x, a)
+
+    func = def_function.function(fn2, experimental_compile=False)
+
+    inputs = constant_op.constant([1, 2, 2, 3, 3])
+    if not test.is_built_with_rocm():
+      # XLA support is not yet enabled for TF ROCm
+      self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1))
+
+  def testNestedCallUnsupportedOps(self):
+
+    def fn(x):
+      return array_ops.unique(x).y
+
+    xla_func = def_function.function(fn, experimental_compile=True)
+
+    def fn2(x):
+      return xla_func(x)
+
+    func = def_function.function(fn2, experimental_compile=False)
+    inputs = constant_op.constant([1, 2, 2, 3, 3])
+    if not test.is_built_with_rocm():
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   'not compilable'):
+        func(inputs)
+
   def testUnsupportedOps(self):
 
     def fn(x):
diff --git a/tensorflow/python/eager/eager_util.py b/tensorflow/python/eager/eager_util.py
new file mode 100644
index 00000000000..7d369c876d6
--- /dev/null
+++ b/tensorflow/python/eager/eager_util.py
@@ -0,0 +1,61 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for using the TensorFlow Eager using the C API."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python import pywrap_tfe as c_api
+from tensorflow.python.util import compat
+from tensorflow.python.util import tf_contextlib
+
+
+# We temporarily need a duplicate tf_buffer function in eager_util. The
+# c_api_util is still relying on SWIG and is thus incompatible until
+# we migrate over. We can delete this once we migrate tf_session.i
+
+
+@tf_contextlib.contextmanager
+def tf_buffer(data=None):
+  """Context manager that creates and deletes TF_Buffer.
+
+  Example usage:
+    with tf_buffer() as buf:
+      # get serialized graph def into buf
+      ...
+      proto_data = c_api.TF_GetBuffer(buf)
+      graph_def.ParseFromString(compat.as_bytes(proto_data))
+    # buf has been deleted
+
+    with tf_buffer(some_string) as buf:
+      c_api.TF_SomeFunction(buf)
+    # buf has been deleted
+
+  Args:
+    data: An optional `bytes`, `str`, or `unicode` object. If not None, the
+      yielded buffer will contain this data.
+
+  Yields:
+    Created TF_Buffer
+  """
+  if data:
+    buf = c_api.TF_NewBufferFromString(compat.as_bytes(data))
+  else:
+    buf = c_api.TF_NewBuffer()
+  try:
+    yield buf
+  finally:
+    c_api.TF_DeleteBuffer(buf)
diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py
index 7a1de7aa305..e206262309e 100644
--- a/tensorflow/python/eager/execute.py
+++ b/tensorflow/python/eager/execute.py
@@ -22,7 +22,7 @@ import six
 
 from google.protobuf import text_format
 from tensorflow.core.framework import tensor_pb2
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import core
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -56,9 +56,8 @@ def quick_execute(op_name, num_outputs, inputs, attrs, ctx, name=None):
   # pylint: disable=protected-access
   try:
     ctx.ensure_initialized()
-    tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,
-                                               op_name, inputs, attrs,
-                                               num_outputs)
+    tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
+                                        inputs, attrs, num_outputs)
   except core._NotOkStatusException as e:
     if name is not None:
       message = e.message + " name: " + name
@@ -111,9 +110,10 @@ def execute_with_cancellation(op_name,
   # pylint: disable=protected-access
   try:
     ctx.ensure_initialized()
-    tensors = pywrap_tensorflow.TFE_Py_ExecuteCancelable(
-        ctx._handle, device_name, op_name, inputs, attrs,
-        cancellation_manager._impl, num_outputs)
+    tensors = pywrap_tfe.TFE_Py_ExecuteCancelable(ctx._handle, device_name,
+                                                  op_name, inputs, attrs,
+                                                  cancellation_manager._impl,
+                                                  num_outputs)
   except core._NotOkStatusException as e:
     if name is not None:
       message = e.message + " name: " + name
diff --git a/tensorflow/python/eager/executor.py b/tensorflow/python/eager/executor.py
index be844015dd0..cd2bf0d0398 100644
--- a/tensorflow/python/eager/executor.py
+++ b/tensorflow/python/eager/executor.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 
 
 class Executor(object):
@@ -45,8 +45,8 @@ class Executor(object):
 
   def __del__(self):
     try:
-      # pywrap_tensorflow.TFE_ExecutorWaitForAllPendingNodes(self._handle)
-      pywrap_tensorflow.TFE_DeleteExecutor(self._handle)
+      # pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
+      pywrap_tfe.TFE_DeleteExecutor(self._handle)
     except TypeError:
       # Suppress some exceptions, mainly for the case when we're running on
       # module deletion. Things that can go wrong include the pywrap module
@@ -57,20 +57,20 @@ class Executor(object):
       # partially unloaded.
 
   def is_async(self):
-    return pywrap_tensorflow.TFE_ExecutorIsAsync(self._handle)
+    return pywrap_tfe.TFE_ExecutorIsAsync(self._handle)
 
   def handle(self):
     return self._handle
 
   def wait(self):
     """Waits for ops dispatched in this executor to finish."""
-    pywrap_tensorflow.TFE_ExecutorWaitForAllPendingNodes(self._handle)
+    pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
 
   def clear_error(self):
     """Clears errors raised in this executor during execution."""
-    pywrap_tensorflow.TFE_ExecutorClearError(self._handle)
+    pywrap_tfe.TFE_ExecutorClearError(self._handle)
 
 
 def new_executor(enable_async):
-  handle = pywrap_tensorflow.TFE_NewExecutor(enable_async)
+  handle = pywrap_tfe.TFE_NewExecutor(enable_async)
   return Executor(handle)
diff --git a/tensorflow/python/eager/forwardprop.py b/tensorflow/python/eager/forwardprop.py
index 6ddaedc2fdb..973e130ef0f 100644
--- a/tensorflow/python/eager/forwardprop.py
+++ b/tensorflow/python/eager/forwardprop.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 import threading
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import backprop_util
 from tensorflow.python.eager import def_function
@@ -166,7 +166,8 @@ def _jvp_dispatch(op_name, attr_tuple, inputs, outputs, tangents):
     return _jvp_relaxed_shapes(
         op_name, attr_tuple, inputs, outputs, tangents)
 
-pywrap_tensorflow.TFE_Py_RegisterJVPFunction(_jvp_dispatch)
+
+pywrap_tfe.TFE_Py_RegisterJVPFunction(_jvp_dispatch)
 
 
 @tf_export("autodiff.ForwardAccumulator", v1=[])
@@ -300,7 +301,7 @@ class ForwardAccumulator(object):
       ValueError: If the same tensor or variable is specified multiple times in
         `primals`.
     """
-    self._accumulator = pywrap_tensorflow.TFE_Py_ForwardAccumulatorNew()
+    self._accumulator = pywrap_tfe.TFE_Py_ForwardAccumulatorNew()
     self._recording = False
     primal_ids = set()
     for primal in nest.flatten(primals):
@@ -323,13 +324,13 @@ class ForwardAccumulator(object):
   def _push_accumulator(self):
     if self._recording:
       raise ValueError("Accumulator is already recording.")
-    pywrap_tensorflow.TFE_Py_ForwardAccumulatorSetAdd(self._accumulator)
+    pywrap_tfe.TFE_Py_ForwardAccumulatorSetAdd(self._accumulator)
     self._recording = True
 
   def _pop_accumulator(self):
     if not self._recording:
       raise ValueError("Accumulator is not recording.")
-    pywrap_tensorflow.TFE_Py_ForwardAccumulatorSetRemove(self._accumulator)
+    pywrap_tfe.TFE_Py_ForwardAccumulatorSetRemove(self._accumulator)
     self._recording = False
 
   def _watch(self, primals, tangents):
@@ -358,7 +359,7 @@ class ForwardAccumulator(object):
         # Run convert_to_tensor to get the captured handle from whichever
         # function we're running if necessary.
         t = ops.convert_to_tensor(t.handle)
-      pywrap_tensorflow.TFE_Py_ForwardAccumulatorWatch(self._accumulator, t, g)
+      pywrap_tfe.TFE_Py_ForwardAccumulatorWatch(self._accumulator, t, g)
 
   def jvp(self, primals, unconnected_gradients=UnconnectedGradients.NONE):
     """Fetches the Jacobian-vector product computed for `primals`.
@@ -384,8 +385,8 @@ class ForwardAccumulator(object):
     def _fetch_jvp(tensor):
       if hasattr(tensor, "handle"):
         tensor = ops.convert_to_tensor(tensor.handle)
-      result = pywrap_tensorflow.TFE_Py_ForwardAccumulatorJVP(
-          self._accumulator, tensor)
+      result = pywrap_tfe.TFE_Py_ForwardAccumulatorJVP(self._accumulator,
+                                                       tensor)
       if result is None and unconnected_gradients == UnconnectedGradients.ZERO:
         return array_ops.zeros_like(tensor)
       return result
diff --git a/tensorflow/python/eager/forwardprop_test.py b/tensorflow/python/eager/forwardprop_test.py
index dd1854c8797..0f88ee2d4a6 100644
--- a/tensorflow/python/eager/forwardprop_test.py
+++ b/tensorflow/python/eager/forwardprop_test.py
@@ -24,7 +24,7 @@ import weakref
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.distribute import mirrored_strategy
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import def_function
@@ -236,13 +236,13 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase):
       x = constant_op.constant(1.)
       with forwardprop.ForwardAccumulator(x, 2.) as acc:
         y = x + x
-        pywrap_tensorflow.TFE_Py_RegisterJVPFunction(
+        pywrap_tfe.TFE_Py_RegisterJVPFunction(
             lambda *args, **kwargs: [constant_op.constant(-15.)])
         z = x + x
       self.assertAllClose(4., acc.jvp(y))
       self.assertAllClose(-15., acc.jvp(z))
     finally:
-      pywrap_tensorflow.TFE_Py_RegisterJVPFunction(previous_fn)
+      pywrap_tfe.TFE_Py_RegisterJVPFunction(previous_fn)
 
   @test_util.assert_no_new_pyobjects_executing_eagerly
   def testFunctionCacheLimited(self):
@@ -738,19 +738,19 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase):
     with forwardprop.ForwardAccumulator(c, c_tangent) as acc:
       with backprop.GradientTape() as tape:
         self.assertFalse(tape_lib.should_record_backprop([c]))
-        self.assertEqual(
-            1, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c]))
+        self.assertEqual(1,
+                         pywrap_tfe.TFE_Py_TapeSetPossibleGradientTypes([c]))
         tape.watch(c)
-        self.assertEqual(
-            2, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c]))
+        self.assertEqual(2,
+                         pywrap_tfe.TFE_Py_TapeSetPossibleGradientTypes([c]))
         self.assertTrue(tape_lib.should_record_backprop([c]))
         with tape_lib.stop_recording():
-          self.assertEqual(
-              0, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c]))
+          self.assertEqual(0,
+                           pywrap_tfe.TFE_Py_TapeSetPossibleGradientTypes([c]))
           self.assertFalse(tape_lib.should_record_backprop([c]))
           d = c * 2.
-        self.assertEqual(
-            2, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c]))
+        self.assertEqual(2,
+                         pywrap_tfe.TFE_Py_TapeSetPossibleGradientTypes([c]))
         self.assertTrue(tape_lib.should_record_backprop([c]))
         self.assertFalse(tape_lib.should_record_backprop([d]))
         self.assertIsNone(acc.jvp(d))
diff --git a/tensorflow/python/eager/forwardprop_util.py b/tensorflow/python/eager/forwardprop_util.py
index 07aa9511bfe..f618525d01b 100644
--- a/tensorflow/python/eager/forwardprop_util.py
+++ b/tensorflow/python/eager/forwardprop_util.py
@@ -24,7 +24,7 @@ from __future__ import print_function
 import collections
 import contextlib
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 
 
 class TangentInfo(
@@ -54,8 +54,7 @@ def pack_tangents(tensors):
       tangents: A flat list of Tensors. Best interpreted as a sequence to be
         appended to `tensors`.
   """
-  return TangentInfo(
-      *pywrap_tensorflow.TFE_Py_PackJVPs(tensors))
+  return TangentInfo(*pywrap_tfe.TFE_Py_PackJVPs(tensors))
 
 
 @contextlib.contextmanager
@@ -73,7 +72,7 @@ def push_forwardprop_state():
     None (used for its side effect).
   """
   try:
-    pywrap_tensorflow.TFE_Py_ForwardAccumulatorPushState()
+    pywrap_tfe.TFE_Py_ForwardAccumulatorPushState()
     yield
   finally:
-    pywrap_tensorflow.TFE_Py_ForwardAccumulatorPopState()
+    pywrap_tfe.TFE_Py_ForwardAccumulatorPopState()
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index fc15f074ebd..7b8c5f33e77 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -32,8 +32,9 @@ from six.moves import map
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.framework import function_pb2
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python import _pywrap_utils
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import backprop_util
 from tensorflow.python.eager import context
@@ -1098,7 +1099,7 @@ class _TapeGradientFunctions(object):
           forward_function.signature.name,
           forward_outputs, forward_inputs, py_backward, None)
       output_indices, output_tangents = (
-          pywrap_tensorflow.TFE_Py_PackJVPs(forward_outputs))
+          pywrap_tfe.TFE_Py_PackJVPs(forward_outputs))
       output_tangents = [forward_wrapper_graph.capture(t)
                          for t in output_tangents]
     return _ForwardWrapper(
@@ -1732,7 +1733,7 @@ class ConcreteFunction(object):
                          "Tensor." % (self._func_graph.name, i, str(arg)))
     args = tensor_inputs + captured_inputs
     possible_gradient_type = (
-        pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes(args))
+        pywrap_tfe.TFE_Py_TapeSetPossibleGradientTypes(args))
     if (possible_gradient_type == _POSSIBLE_GRADIENT_TYPES_NONE
         and executing_eagerly):
       # No tape is watching; skip to running the function.
@@ -2251,10 +2252,9 @@ def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature):
   """Convert inputs to pass into a function with an explicit signature."""
 
   def format_error_message(inputs, input_signature):
-    return ("  inputs: (\n" + "    " +
-            ",\n    ".join([str(i) for i in inputs]) + ")\n" +
-            "  input_signature: (\n" + "    " +
-            ",\n    ".join([str(i) for i in input_signature]) + ")")
+    return ("  inputs: (\n" + "    " + ",\n    ".join(str(i) for i in inputs) +
+            ")\n" + "  input_signature: (\n" + "    " +
+            ",\n    ".join(str(i) for i in input_signature) + ")")
 
   try:
     # TODO(b/124370185): Use all elements as inputs to throw an error if there
@@ -2553,8 +2553,8 @@ class Function(object):
     """Computes the cache key given inputs and execution context."""
     if self.input_signature is None:
       inputs = (args, kwargs) if kwargs else args
-      input_signature = pywrap_tensorflow.TFE_Py_EncodeArg(
-          inputs, include_tensor_ranks_only)
+      input_signature = pywrap_tfe.TFE_Py_EncodeArg(inputs,
+                                                    include_tensor_ranks_only)
     else:
       del args, kwargs
       assert not include_tensor_ranks_only
@@ -3275,7 +3275,8 @@ def class_method_to_instance_method(original_function, instance):
       tf_decorator.make_decorator(bound_method, bound_method_wrapper),
       name=original_function._name,
       autograph=original_function._autograph,
-      input_signature=original_function.input_signature)
+      input_signature=original_function.input_signature,
+      experimental_relax_shapes=original_function._experimental_relax_shapes)
   # pylint: enable=protected-access
 
   # And we wrap the function with tf_decorator so inspection works correctly
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index b19479b8502..29b463a5445 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -323,6 +323,29 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertTrue(unknown_dim[0])
     self.assertLen(total_function_cache(func), 2)
 
+  def testInputShapeRelaxationOnInstanceMethod(self):
+    # Test that experimental_relax_shapes is passed during
+    # instance method bounding.
+    unknown_dim = [False]
+
+    class Foo(object):
+
+      @def_function.function(experimental_relax_shapes=True)
+      def func(self, a):
+        if a._shape_tuple()[0] is None:
+          unknown_dim[0] = True
+        return a + 1
+
+    foo = Foo()
+    foo.func(constant_op.constant([]))
+    self.assertFalse(unknown_dim[0])
+
+    foo.func(constant_op.constant([1.0]))
+    self.assertFalse(unknown_dim[0])
+
+    foo.func(constant_op.constant([1.0, 2.0]))
+    self.assertTrue(unknown_dim[0])
+
   def testCapturesVariables(self):
     a = variables.Variable(1.0, trainable=False)
     b = variables.Variable(1.0)
@@ -2715,11 +2738,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     def get_list():
       return [constant_op.constant(0.), constant_op.constant(1.)]
 
-    expected_msg = (
-        'Function to be traced should not modify structure of input '
-        'arguments. Check if your function has list and dictionary '
-        'operations that alter input arguments, '
-        'such as `list.pop`, `list.append`')
+    expected_msg = '.*() should not modify'
 
     with self.assertRaisesRegexp(ValueError, expected_msg):
 
@@ -2795,11 +2814,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     def get_dict():
       return {'t1': constant_op.constant(0.), 't2': constant_op.constant(1.)}
 
-    expected_msg = (
-        'Function to be traced should not modify structure of input '
-        'arguments. Check if your function has list and dictionary '
-        'operations that alter input arguments, '
-        'such as `list.pop`, `list.append`')
+    expected_msg = '.* should not modify'
 
     with self.assertRaisesRegexp(ValueError, expected_msg):
 
@@ -2842,14 +2857,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       setdefault(get_dict())
 
   def testFunctionModifiesInputNest(self):
-    # Test on functions that modify structure of nested input arguments
-    expected_msg = (
-        'Function to be traced should not modify structure of input '
-        'arguments. Check if your function has list and dictionary '
-        'operations that alter input arguments, '
-        'such as `list.pop`, `list.append`')
-
-    with self.assertRaisesRegexp(ValueError, expected_msg):
+    with self.assertRaisesRegexp(
+        ValueError, 'modify.* should not modify'):
 
       @def_function.function
       def modify(n):
@@ -2863,7 +2872,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
       modify(nested_input)
 
-    with self.assertRaisesRegexp(ValueError, expected_msg):
+    with self.assertRaisesRegexp(
+        ValueError, 'modify_same_flat.* should not modify'):
 
       # The flat list doesn't change whereas the true structure changes
       @def_function.function
diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py
index 3aacbe4ab61..3d6cbb50fbe 100644
--- a/tensorflow/python/eager/imperative_grad.py
+++ b/tensorflow/python/eager/imperative_grad.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 import collections
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.ops.unconnected_gradients import UnconnectedGradients
 from tensorflow.python.util import compat
 
@@ -68,7 +68,7 @@ def imperative_grad(tape,
     raise ValueError(
         "Unknown value for unconnected_gradients: %r" % unconnected_gradients)
 
-  return pywrap_tensorflow.TFE_Py_TapeGradient(
+  return pywrap_tfe.TFE_Py_TapeGradient(
       tape._tape,  # pylint: disable=protected-access
       target,
       sources,
diff --git a/tensorflow/python/eager/lift_to_graph.py b/tensorflow/python/eager/lift_to_graph.py
index f884f6ab2ce..9d4c0068685 100644
--- a/tensorflow/python/eager/lift_to_graph.py
+++ b/tensorflow/python/eager/lift_to_graph.py
@@ -246,7 +246,7 @@ def lift_to_graph(tensors,
 
   # Check that the initializer does not depend on any placeholders.
   sources = object_identity.ObjectIdentitySet(sources or [])
-  visited_ops = set([x.op for x in sources])
+  visited_ops = set(x.op for x in sources)
   op_outputs = collections.defaultdict(set)
 
   # First we extract the subgraph between init_tensors and sources.
diff --git a/tensorflow/python/eager/memory_tests/memory_test_util.py b/tensorflow/python/eager/memory_tests/memory_test_util.py
index 8e2fa0097a6..0bb30899cc4 100644
--- a/tensorflow/python/eager/memory_tests/memory_test_util.py
+++ b/tensorflow/python/eager/memory_tests/memory_test_util.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import collections
 import gc
 import time
+
 import six
 
 from tensorflow.python.eager import context
diff --git a/tensorflow/python/eager/monitoring.py b/tensorflow/python/eager/monitoring.py
index 09838f143e0..b0c6d23f35b 100644
--- a/tensorflow/python/eager/monitoring.py
+++ b/tensorflow/python/eager/monitoring.py
@@ -21,80 +21,80 @@ from __future__ import print_function
 import collections
 
 from tensorflow.core.framework import summary_pb2
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import c_api_util
+from tensorflow.python import pywrap_tfe
+from tensorflow.python.eager import eager_util as c_api_util
 from tensorflow.python.util import compat
 
 _MetricMethod = collections.namedtuple('MetricMethod', 'create delete get_cell')
 _counter_methods = [
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewCounter0,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteCounter0,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellCounter0),
+        create=pywrap_tfe.TFE_MonitoringNewCounter0,
+        delete=pywrap_tfe.TFE_MonitoringDeleteCounter0,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellCounter0),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewCounter1,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteCounter1,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellCounter1),
+        create=pywrap_tfe.TFE_MonitoringNewCounter1,
+        delete=pywrap_tfe.TFE_MonitoringDeleteCounter1,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellCounter1),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewCounter2,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteCounter2,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellCounter2),
+        create=pywrap_tfe.TFE_MonitoringNewCounter2,
+        delete=pywrap_tfe.TFE_MonitoringDeleteCounter2,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellCounter2),
 ]
 _int_gauge_methods = [
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewIntGauge0,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteIntGauge0,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellIntGauge0),
+        create=pywrap_tfe.TFE_MonitoringNewIntGauge0,
+        delete=pywrap_tfe.TFE_MonitoringDeleteIntGauge0,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellIntGauge0),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewIntGauge1,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteIntGauge1,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellIntGauge1),
+        create=pywrap_tfe.TFE_MonitoringNewIntGauge1,
+        delete=pywrap_tfe.TFE_MonitoringDeleteIntGauge1,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellIntGauge1),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewIntGauge2,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteIntGauge2,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellIntGauge2),
+        create=pywrap_tfe.TFE_MonitoringNewIntGauge2,
+        delete=pywrap_tfe.TFE_MonitoringDeleteIntGauge2,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellIntGauge2),
 ]
 _string_gauge_methods = [
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewStringGauge0,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteStringGauge0,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellStringGauge0),
+        create=pywrap_tfe.TFE_MonitoringNewStringGauge0,
+        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge0,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge0),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewStringGauge1,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteStringGauge1,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellStringGauge1),
+        create=pywrap_tfe.TFE_MonitoringNewStringGauge1,
+        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge1,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge1),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewStringGauge2,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteStringGauge2,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellStringGauge2),
+        create=pywrap_tfe.TFE_MonitoringNewStringGauge2,
+        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge2,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge2),
 ]
 _bool_gauge_methods = [
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewBoolGauge0,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteBoolGauge0,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellBoolGauge0),
+        create=pywrap_tfe.TFE_MonitoringNewBoolGauge0,
+        delete=pywrap_tfe.TFE_MonitoringDeleteBoolGauge0,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellBoolGauge0),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewBoolGauge1,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteBoolGauge1,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellBoolGauge1),
+        create=pywrap_tfe.TFE_MonitoringNewBoolGauge1,
+        delete=pywrap_tfe.TFE_MonitoringDeleteBoolGauge1,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellBoolGauge1),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewBoolGauge2,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteBoolGauge2,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellBoolGauge2),
+        create=pywrap_tfe.TFE_MonitoringNewBoolGauge2,
+        delete=pywrap_tfe.TFE_MonitoringDeleteBoolGauge2,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellBoolGauge2),
 ]
 _sampler_methods = [
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewSampler0,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteSampler0,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellSampler0),
+        create=pywrap_tfe.TFE_MonitoringNewSampler0,
+        delete=pywrap_tfe.TFE_MonitoringDeleteSampler0,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellSampler0),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewSampler1,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteSampler1,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellSampler1),
+        create=pywrap_tfe.TFE_MonitoringNewSampler1,
+        delete=pywrap_tfe.TFE_MonitoringDeleteSampler1,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellSampler1),
     _MetricMethod(
-        create=pywrap_tensorflow.TFE_MonitoringNewSampler2,
-        delete=pywrap_tensorflow.TFE_MonitoringDeleteSampler2,
-        get_cell=pywrap_tensorflow.TFE_MonitoringGetCellSampler2),
+        create=pywrap_tfe.TFE_MonitoringNewSampler2,
+        delete=pywrap_tfe.TFE_MonitoringDeleteSampler2,
+        get_cell=pywrap_tfe.TFE_MonitoringGetCellSampler2),
 ]
 
 
@@ -156,11 +156,11 @@ class CounterCell(object):
     Args:
       value: non-negative value.
     """
-    pywrap_tensorflow.TFE_MonitoringCounterCellIncrementBy(self._cell, value)
+    pywrap_tfe.TFE_MonitoringCounterCellIncrementBy(self._cell, value)
 
   def value(self):
     """Retrieves the current value."""
-    return pywrap_tensorflow.TFE_MonitoringCounterCellValue(self._cell)
+    return pywrap_tfe.TFE_MonitoringCounterCellValue(self._cell)
 
 
 class Counter(Metric):
@@ -204,11 +204,11 @@ class IntGaugeCell(object):
     Args:
       value: integer value.
     """
-    pywrap_tensorflow.TFE_MonitoringIntGaugeCellSet(self._cell, value)
+    pywrap_tfe.TFE_MonitoringIntGaugeCellSet(self._cell, value)
 
   def value(self):
     """Retrieves the current value."""
-    return pywrap_tensorflow.TFE_MonitoringIntGaugeCellValue(self._cell)
+    return pywrap_tfe.TFE_MonitoringIntGaugeCellValue(self._cell)
 
 
 class IntGauge(Metric):
@@ -252,13 +252,13 @@ class StringGaugeCell(object):
     Args:
       value: string value.
     """
-    pywrap_tensorflow.TFE_MonitoringStringGaugeCellSet(self._cell, value)
+    pywrap_tfe.TFE_MonitoringStringGaugeCellSet(self._cell, value)
 
   def value(self):
     """Retrieves the current value."""
     with c_api_util.tf_buffer() as buffer_:
-      pywrap_tensorflow.TFE_MonitoringStringGaugeCellValue(self._cell, buffer_)
-      value = pywrap_tensorflow.TF_GetBuffer(buffer_).decode('utf-8')
+      pywrap_tfe.TFE_MonitoringStringGaugeCellValue(self._cell, buffer_)
+      value = pywrap_tfe.TF_GetBuffer(buffer_).decode('utf-8')
     return value
 
 
@@ -303,11 +303,11 @@ class BoolGaugeCell(object):
     Args:
       value: bool value.
     """
-    pywrap_tensorflow.TFE_MonitoringBoolGaugeCellSet(self._cell, value)
+    pywrap_tfe.TFE_MonitoringBoolGaugeCellSet(self._cell, value)
 
   def value(self):
     """Retrieves the current value."""
-    return pywrap_tensorflow.TFE_MonitoringBoolGaugeCellValue(self._cell)
+    return pywrap_tfe.TFE_MonitoringBoolGaugeCellValue(self._cell)
 
 
 class BoolGauge(Metric):
@@ -351,7 +351,7 @@ class SamplerCell(object):
     Args:
       value: float value.
     """
-    pywrap_tensorflow.TFE_MonitoringSamplerCellAdd(self._cell, value)
+    pywrap_tfe.TFE_MonitoringSamplerCellAdd(self._cell, value)
 
   def value(self):
     """Retrieves the current distribution of samples.
@@ -360,8 +360,8 @@ class SamplerCell(object):
       A HistogramProto describing the distribution of samples.
     """
     with c_api_util.tf_buffer() as buffer_:
-      pywrap_tensorflow.TFE_MonitoringSamplerCellValue(self._cell, buffer_)
-      proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
+      pywrap_tfe.TFE_MonitoringSamplerCellValue(self._cell, buffer_)
+      proto_data = pywrap_tfe.TF_GetBuffer(buffer_)
     histogram_proto = summary_pb2.HistogramProto()
     histogram_proto.ParseFromString(compat.as_bytes(proto_data))
     return histogram_proto
@@ -379,7 +379,7 @@ class Buckets(object):
     self.buckets = buckets
 
   def __del__(self):
-    pywrap_tensorflow.TFE_MonitoringDeleteBuckets(self.buckets)
+    pywrap_tfe.TFE_MonitoringDeleteBuckets(self.buckets)
 
 
 class ExponentialBuckets(Buckets):
@@ -399,8 +399,8 @@ class ExponentialBuckets(Buckets):
       bucket_count: integer
     """
     super(ExponentialBuckets, self).__init__(
-        pywrap_tensorflow.TFE_MonitoringNewExponentialBuckets(
-            scale, growth_factor, bucket_count))
+        pywrap_tfe.TFE_MonitoringNewExponentialBuckets(scale, growth_factor,
+                                                       bucket_count))
 
 
 class Sampler(Metric):
diff --git a/tensorflow/python/eager/profiler.py b/tensorflow/python/eager/profiler.py
index e2ba5f4d593..e91700b86ac 100644
--- a/tensorflow/python/eager/profiler.py
+++ b/tensorflow/python/eager/profiler.py
@@ -39,9 +39,9 @@ import os
 import threading
 
 from tensorflow.python import _pywrap_events_writer
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import context
-from tensorflow.python.framework import c_api_util
+from tensorflow.python.eager import eager_util as c_api_util
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import compat
@@ -74,8 +74,8 @@ def start():
       raise ProfilerAlreadyRunningError('Another profiler is running.')
     if context.default_execution_mode == context.EAGER_MODE:
       context.ensure_initialized()
-    _profiler = pywrap_tensorflow.TFE_NewProfiler()
-    if not pywrap_tensorflow.TFE_ProfilerIsOk(_profiler):
+    _profiler = pywrap_tfe.TFE_NewProfiler()
+    if not pywrap_tfe.TFE_ProfilerIsOk(_profiler):
       logging.warning('Another profiler session is running which is probably '
                       'created by profiler server. Please avoid using profiler '
                       'server and profiler APIs at the same time.')
@@ -100,11 +100,9 @@ def stop():
     if context.default_execution_mode == context.EAGER_MODE:
       context.context().executor.wait()
     with c_api_util.tf_buffer() as buffer_:
-      pywrap_tensorflow.TFE_ProfilerSerializeToString(
-          _profiler,
-          buffer_)
-      result = pywrap_tensorflow.TF_GetBuffer(buffer_)
-    pywrap_tensorflow.TFE_DeleteProfiler(_profiler)
+      pywrap_tfe.TFE_ProfilerSerializeToString(_profiler, buffer_)
+      result = pywrap_tfe.TF_GetBuffer(buffer_)
+    pywrap_tfe.TFE_DeleteProfiler(_profiler)
     _profiler = None
     _run_num += 1
   return result
@@ -159,7 +157,7 @@ def start_profiler_server(port):
   """
   if context.default_execution_mode == context.EAGER_MODE:
     context.ensure_initialized()
-  pywrap_tensorflow.TFE_StartProfilerServer(port)
+  pywrap_tfe.TFE_StartProfilerServer(port)
 
 
 class Profiler(object):
diff --git a/tensorflow/python/eager/profiler_client.py b/tensorflow/python/eager/profiler_client.py
index 5d6fcb47b71..c59f8eed216 100644
--- a/tensorflow/python/eager/profiler_client.py
+++ b/tensorflow/python/eager/profiler_client.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import c_api_util
+from tensorflow.python import pywrap_tfe
+from tensorflow.python.eager import eager_util as c_api_util
 from tensorflow.python.framework import errors
 
 
@@ -46,7 +46,7 @@ def start_tracing(service_addr,
   Raises:
     UnavailableError: If no trace event is collected.
   """
-  if not pywrap_tensorflow.TFE_ProfilerClientStartTracing(
+  if not pywrap_tfe.TFE_ProfilerClientStartTracing(
       service_addr, logdir, worker_list, include_dataset_ops, duration_ms,
       num_tracing_attempts):
     raise errors.UnavailableError(None, None, 'No trace event is collected.')
@@ -71,7 +71,7 @@ def monitor(service_addr,
     A string of monitoring output.
   """
   with c_api_util.tf_buffer() as buffer_:
-    pywrap_tensorflow.TFE_ProfilerClientMonitor(service_addr, duration_ms,
-                                                monitoring_level,
-                                                display_timestamp, buffer_)
-    return pywrap_tensorflow.TF_GetBuffer(buffer_)
+    pywrap_tfe.TFE_ProfilerClientMonitor(service_addr, duration_ms,
+                                         monitoring_level, display_timestamp,
+                                         buffer_)
+    return pywrap_tfe.TF_GetBuffer(buffer_)
diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc
index 5e463b28954..519026f6456 100644
--- a/tensorflow/python/eager/pywrap_tensor.cc
+++ b/tensorflow/python/eager/pywrap_tensor.cc
@@ -74,12 +74,13 @@ TFE_Context* GetContextHandle(PyObject* py_context) {
 // Convert a Python numpy.ndarray object to a TFE_TensorHandle.
 // The two may share underlying storage so changes to one may reflect in the
 // other.
-TFE_TensorHandle* NumpyToTFE_TensorHandle(PyObject* obj) {
+TFE_TensorHandle* NumpyToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj) {
   tensorflow::TensorHandle* handle;
   tensorflow::Tensor t;
   auto cppstatus = tensorflow::NdarrayToTensor(obj, &t);
   if (cppstatus.ok()) {
-    cppstatus = tensorflow::TensorHandle::CreateLocalHandle(t, &handle);
+    cppstatus = tensorflow::TensorHandle::CreateLocalHandle(
+        t, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &handle);
   }
   if (!cppstatus.ok()) {
     PyErr_SetString(PyExc_ValueError,
@@ -251,14 +252,16 @@ TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle,
 #undef RETURN_ERROR
 }
 
-TFE_TensorHandle* PySeqToTFE_TensorHandle(PyObject* value, DataType dtype) {
+TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* value,
+                                          DataType dtype) {
   tensorflow::TensorHandle* handle = nullptr;
   tensorflow::Tensor t;
   // TODO(josh11b): Have PySeqToTensor set python errors instead of
   // returning Status.
   auto cppstatus = tensorflow::PySeqToTensor(value, dtype, &t);
   if (cppstatus.ok()) {
-    cppstatus = tensorflow::TensorHandle::CreateLocalHandle(t, &handle);
+    cppstatus = tensorflow::TensorHandle::CreateLocalHandle(
+        t, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &handle);
   }
   if (!cppstatus.ok()) {
     PyErr_SetString(PyExc_ValueError, cppstatus.error_message().c_str());
@@ -312,9 +315,9 @@ TFE_TensorHandle* ConvertToEagerTensorUncached(TFE_Context* ctx,
       }
       value = safe_value.get();
     }
-    handle = make_safe(NumpyToTFE_TensorHandle(value));
+    handle = make_safe(NumpyToTFE_TensorHandle(ctx, value));
   } else {
-    handle = make_safe(PySeqToTFE_TensorHandle(value, dtype));
+    handle = make_safe(PySeqToTFE_TensorHandle(ctx, value, dtype));
   }
 
   if (handle == nullptr) return nullptr;
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index f5508d76583..8fe4b6ac5eb 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -72,11 +72,15 @@ TFE_Op* GetOp(TFE_Context* ctx, const char* op_or_function_name,
   TFE_Op* maybe_op = ReleaseThreadLocalOp();
   if (maybe_op) {
     TFE_OpReset(ctx, op_or_function_name, raw_device_name, status, maybe_op);
-    return maybe_op;
-  } else {
-    return NewOrResetOp(ctx, op_or_function_name, raw_device_name, status,
-                        nullptr);
+    if (status->status.ok()) {
+      return maybe_op;
+    }
+    // Delete op and create a fresh one
+    delete maybe_op;
   }
+
+  return NewOrResetOp(ctx, op_or_function_name, raw_device_name, status,
+                      nullptr);
 }
 
 void ReturnOp(TFE_Op* object) {
diff --git a/tensorflow/python/eager/pywrap_tfe_test.py b/tensorflow/python/eager/pywrap_tfe_test.py
index e29d9b7321a..f8ede96738c 100644
--- a/tensorflow/python/eager/pywrap_tfe_test.py
+++ b/tensorflow/python/eager/pywrap_tfe_test.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import core
@@ -54,14 +54,16 @@ class Tests(test.TestCase):
 
     self.assertAllClose(
         math_ops.matmul(a_2_by_2, b_2_by_2),
-        pywrap_tensorflow.TFE_Py_FastPathExecute(
-            ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2,
-            b_2_by_2, "transpose_a", False, "transpose_b", False))
+        pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                          "MatMul", None, None, a_2_by_2,
+                                          b_2_by_2, "transpose_a", False,
+                                          "transpose_b", False))
     self.assertAllClose(
         math_ops.matmul(a_100_by_784, b_100_by_784, transpose_b=True),
-        pywrap_tensorflow.TFE_Py_FastPathExecute(
-            ctx._handle, ctx.device_name, "MatMul", None, None, a_100_by_784,
-            b_100_by_784, "transpose_a", False, "transpose_b", True))
+        pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                          "MatMul", None, None, a_100_by_784,
+                                          b_100_by_784, "transpose_a", False,
+                                          "transpose_b", True))
 
   @test_util.assert_no_new_tensors
   @test_util.assert_no_garbage_created
@@ -71,12 +73,14 @@ class Tests(test.TestCase):
 
     a_2_by_2 = constant_op.constant(1.0, shape=[2, 2])
     m = resource_variable_ops.ResourceVariable(a_2_by_2)
-    x = pywrap_tensorflow.TFE_Py_FastPathExecute(
-        ctx._handle, ctx.device_name, "MatMul", None, None, m, m, "transpose_a",
-        False, "transpose_b", False)
-    y = pywrap_tensorflow.TFE_Py_FastPathExecute(
-        ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, a_2_by_2,
-        "transpose_a", False, "transpose_b", False)
+    x = pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                          "MatMul", None, None, m, m,
+                                          "transpose_a", False, "transpose_b",
+                                          False)
+    y = pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                          "MatMul", None, None, a_2_by_2,
+                                          a_2_by_2, "transpose_a", False,
+                                          "transpose_b", False)
 
     self.assertAllEqual(x, y)
 
@@ -89,9 +93,10 @@ class Tests(test.TestCase):
     with backprop.GradientTape(persistent=True) as tape:
       a_2_by_2 = constant_op.constant(1.0, shape=[2, 2])
       tape.watch(a_2_by_2)
-      z = pywrap_tensorflow.TFE_Py_FastPathExecute(
-          ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2,
-          a_2_by_2, "transpose_a", False, "transpose_b", False)
+      z = pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                            "MatMul", None, None, a_2_by_2,
+                                            a_2_by_2, "transpose_a", False,
+                                            "transpose_b", False)
     dz_dy = tape.gradient(z, [a_2_by_2])[0]
     self.assertAllEqual(dz_dy.numpy(),
                         constant_op.constant(4.0, shape=[2, 2]).numpy())
@@ -106,9 +111,10 @@ class Tests(test.TestCase):
       a_2_by_2 = constant_op.constant(1.0, shape=[2, 2])
       m = resource_variable_ops.ResourceVariable(a_2_by_2)
       tape.watch(m)
-      z = pywrap_tensorflow.TFE_Py_FastPathExecute(
-          ctx._handle, ctx.device_name, "MatMul", None, None, m, m,
-          "transpose_a", False, "transpose_b", False)
+      z = pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                            "MatMul", None, None, m, m,
+                                            "transpose_a", False, "transpose_b",
+                                            False)
     dz_dy = tape.gradient(z, [m])[0]
     self.assertAllEqual(dz_dy.numpy(),
                         constant_op.constant(4.0, shape=[2, 2]).numpy())
@@ -125,9 +131,8 @@ class Tests(test.TestCase):
 
     self.assertAllClose(
         math_ops.add_n([a_2_by_2, b_2_by_2]),
-        pywrap_tensorflow.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
-                                                 "AddN", None, None,
-                                                 [a_2_by_2, b_2_by_2]))
+        pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name, "AddN",
+                                          None, None, [a_2_by_2, b_2_by_2]))
 
   # Tests homogeneous list op
   @test_util.assert_no_new_tensors
@@ -142,9 +147,9 @@ class Tests(test.TestCase):
     with backprop.GradientTape(persistent=True) as tape:
       tape.watch(a_2_by_2)
       tape.watch(b_2_by_2)
-      z1 = pywrap_tensorflow.TFE_Py_FastPathExecute(
-          ctx._handle, ctx.device_name, "AddN", None, None,
-          [a_2_by_2, b_2_by_2])
+      z1 = pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                             "AddN", None, None,
+                                             [a_2_by_2, b_2_by_2])
       z2 = math_ops.add_n([a_2_by_2, b_2_by_2])
     dz1_dy = tape.gradient(z1, [a_2_by_2])[0]
     dz2_dy = tape.gradient(z2, [a_2_by_2])[0]
@@ -162,9 +167,9 @@ class Tests(test.TestCase):
 
     self.assertAllClose(
         array_ops.identity_n([a_2_by_2, b_2_by_2]),
-        pywrap_tensorflow.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
-                                                 "IdentityN", None, None,
-                                                 [a_2_by_2, b_2_by_2]))
+        pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                          "IdentityN", None, None,
+                                          [a_2_by_2, b_2_by_2]))
 
   # Tests heterogeneous list op
   @test_util.assert_no_new_tensors
@@ -179,9 +184,9 @@ class Tests(test.TestCase):
     with backprop.GradientTape(persistent=True) as tape:
       tape.watch(a_2_by_2)
       tape.watch(b_2_by_2)
-      z1 = pywrap_tensorflow.TFE_Py_FastPathExecute(
-          ctx._handle, ctx.device_name, "IdentityN", None, None,
-          [a_2_by_2, b_2_by_2])
+      z1 = pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
+                                             "IdentityN", None, None,
+                                             [a_2_by_2, b_2_by_2])
       z2 = array_ops.identity_n([a_2_by_2, b_2_by_2])
     dz1_dy = tape.gradient(z1[0], [a_2_by_2])[0]
     dz2_dy = tape.gradient(z2[0], [a_2_by_2])[0]
@@ -201,19 +206,18 @@ class Tests(test.TestCase):
     # Not enough base params
     with self.assertRaisesRegexp(ValueError,
                                  "at least 5 items in the input tuple"):
-      pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name,
-                                               "Identity")
+      pywrap_tfe.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, "Identity")
 
     # Not enough inputs
     with self.assertRaisesRegexp(ValueError,
                                  "Expected to be at least 6, was 5"):
-      pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx_handle,
-                                               "Identity", None, [])
+      pywrap_tfe.TFE_Py_FastPathExecute(ctx_handle, ctx_handle, "Identity",
+                                        None, [])
 
     # Bad type
     with self.assertRaisesRegexp(TypeError, "expected a string for op_name"):
-      pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name,
-                                               ctx_handle, None, [], a_2_by_2)
+      pywrap_tfe.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, ctx_handle,
+                                        None, [], a_2_by_2)
 
   @test_util.assert_no_new_tensors
   @test_util.assert_no_garbage_created
@@ -225,9 +229,9 @@ class Tests(test.TestCase):
 
     ctx_handle = ctx._handle
     with self.assertRaises(core._FallbackException):
-      pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name,
-                                               "Split", None, None, split_dim,
-                                               value, "num_split", -1)
+      pywrap_tfe.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, "Split",
+                                        None, None, split_dim, value,
+                                        "num_split", -1)
 
   @test_util.assert_no_new_tensors
   @test_util.assert_no_garbage_created
@@ -266,10 +270,9 @@ class Tests(test.TestCase):
     ctx = context.context()
     ctx.ensure_initialized()
     with self.assertRaises(core._FallbackException):
-      pywrap_tensorflow.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name,
-                                               "MatMul", None, None, m, m,
-                                               "transpose_a", False,
-                                               "transpose_b", False)
+      pywrap_tfe.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name, "MatMul",
+                                        None, None, m, m, "transpose_a", False,
+                                        "transpose_b", False)
 
   def testOpDefDefaultType(self):
     im = np.random.randint(
diff --git a/tensorflow/python/eager/remote.py b/tensorflow/python/eager/remote.py
index c7e579ac2a9..276f2de9842 100644
--- a/tensorflow/python/eager/remote.py
+++ b/tensorflow/python/eager/remote.py
@@ -19,10 +19,11 @@ from __future__ import division
 from __future__ import print_function
 
 import copy
+
 from absl import logging
 
 from tensorflow.core.protobuf.tensorflow_server_pb2 import ServerDef
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.distribute import device_util
 from tensorflow.python.distribute.cluster_resolver import cluster_resolver
 from tensorflow.python.eager import context
@@ -127,7 +128,7 @@ def connect_to_cluster(cluster_spec_or_resolver,
 
   # Automatically add local job, if not part of the cluster spec.
   if job_name not in cluster_spec.jobs:
-    local_port = pywrap_tensorflow.TF_PickUnusedPortOrDie()
+    local_port = pywrap_tfe.TF_PickUnusedPortOrDie()
     job_def = cluster_def.job.add()
     job_def.name = job_name
     # TODO(fishx): Update this to make sure remote worker has valid ip address
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index b73aaa449f7..70a48c8b0da 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 import contextlib
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.util.lazy_loader import LazyLoader
 
 # There is a circular dependency between this, ops.py, and
@@ -39,24 +39,23 @@ class Tape(object):
     self._tape = tape
 
   def watched_variables(self):
-    return pywrap_tensorflow.TFE_Py_TapeWatchedVariables(self._tape)
+    return pywrap_tfe.TFE_Py_TapeWatchedVariables(self._tape)
 
 
 def push_new_tape(persistent=False, watch_accessed_variables=True):
   """Pushes a new tape onto the tape stack."""
-  tape = pywrap_tensorflow.TFE_Py_TapeSetNew(persistent,
-                                             watch_accessed_variables)
+  tape = pywrap_tfe.TFE_Py_TapeSetNew(persistent, watch_accessed_variables)
   return Tape(tape)
 
 
 def push_tape(tape):
   """Pushes an existing tape onto the tape stack."""
-  pywrap_tensorflow.TFE_Py_TapeSetAdd(tape._tape)  # pylint: disable=protected-access
+  pywrap_tfe.TFE_Py_TapeSetAdd(tape._tape)  # pylint: disable=protected-access
 
 
 def watch(tape, tensor):
   """Marks this tensor to be watched by the given tape."""
-  pywrap_tensorflow.TFE_Py_TapeWatch(tape._tape, tensor)  # pylint: disable=protected-access
+  pywrap_tfe.TFE_Py_TapeWatch(tape._tape, tensor)  # pylint: disable=protected-access
 
 
 def watch_variable(tape, variable):
@@ -68,7 +67,7 @@ def watch_variable(tape, variable):
   else:
     variables = strategy.experimental_local_results(variable)
   for var in variables:
-    pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, var)  # pylint: disable=protected-access
+    pywrap_tfe.TFE_Py_TapeWatchVariable(tape._tape, var)  # pylint: disable=protected-access
 
 
 def variable_accessed(variable):
@@ -84,7 +83,7 @@ def variable_accessed(variable):
   else:
     variables = strategy.experimental_local_results(variable)
   for var in variables:
-    pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var)
+    pywrap_tfe.TFE_Py_TapeVariableAccessed(var)
 
 
 def variables_accessed(variables):
@@ -107,25 +106,25 @@ def variables_accessed(variables):
         accessed.extend(strategy.experimental_local_results(variable))
 
   for var in accessed:
-    pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var)
+    pywrap_tfe.TFE_Py_TapeVariableAccessed(var)
 
 
 def pop_tape(tape):
   """Pops the given tape in the stack."""
-  pywrap_tensorflow.TFE_Py_TapeSetRemove(tape._tape)  # pylint: disable=protected-access
+  pywrap_tfe.TFE_Py_TapeSetRemove(tape._tape)  # pylint: disable=protected-access
 
 
 @contextlib.contextmanager
 def stop_recording():
   """Stop all gradient recording (backprop and forwardprop)."""
-  is_stopped = pywrap_tensorflow.TFE_Py_TapeSetIsStopped()
+  is_stopped = pywrap_tfe.TFE_Py_TapeSetIsStopped()
   try:
     if not is_stopped:
-      pywrap_tensorflow.TFE_Py_TapeSetStopOnThread()
+      pywrap_tfe.TFE_Py_TapeSetStopOnThread()
     yield
   finally:
     if not is_stopped:
-      pywrap_tensorflow.TFE_Py_TapeSetRestartOnThread()
+      pywrap_tfe.TFE_Py_TapeSetRestartOnThread()
 
 
 def should_record_backprop(tensors):
@@ -139,22 +138,23 @@ def should_record_backprop(tensors):
   Returns:
     Boolean, whether any tape watches any of `tensors`.
   """
-  return pywrap_tensorflow.TFE_Py_TapeSetShouldRecordBackprop(tensors)
+  return pywrap_tfe.TFE_Py_TapeSetShouldRecordBackprop(tensors)
 
 
 def record_operation(op_type, output_tensors, input_tensors, backward_function,
                      forward_function=None):
   """Records the operation on all tapes in the stack."""
-  pywrap_tensorflow.TFE_Py_TapeSetRecordOperation(
-      op_type, output_tensors, input_tensors, backward_function,
-      forward_function)
+  pywrap_tfe.TFE_Py_TapeSetRecordOperation(op_type, output_tensors,
+                                           input_tensors, backward_function,
+                                           forward_function)
 
 
 def record_operation_backprop_only(op_type, output_tensors, input_tensors,
                                    backward_function):
   """Records the operation on all backward tapes in the stack."""
-  pywrap_tensorflow.TFE_Py_TapeSetRecordOperationBackprop(
-      op_type, output_tensors, input_tensors, backward_function)
+  pywrap_tfe.TFE_Py_TapeSetRecordOperationBackprop(op_type, output_tensors,
+                                                   input_tensors,
+                                                   backward_function)
 
 
 def record_operation_forwardprop_only(op_type, output_tensors, input_tensors,
@@ -174,16 +174,16 @@ def record_operation_forwardprop_only(op_type, output_tensors, input_tensors,
       Typically these will have come from TFE_Py_PackForwardGradients. May be
       None or an empty sequence if there are no JVP outputs from the operation.
   """
-  pywrap_tensorflow.TFE_Py_TapeSetRecordOperationForwardprop(
+  pywrap_tfe.TFE_Py_TapeSetRecordOperationForwardprop(
       op_type, output_tensors, input_tensors, backward_function,
       forwardprop_output_indices)
 
 
 def delete_trace(tensor_id):
   """Deletes traces for this Tensor from all tapes in the stack."""
-  pywrap_tensorflow.TFE_Py_TapeSetDeleteTrace(tensor_id)
+  pywrap_tfe.TFE_Py_TapeSetDeleteTrace(tensor_id)
 
 
 def could_possibly_record():
   """Returns True if any tape is active."""
-  return not pywrap_tensorflow.TFE_Py_TapeSetIsEmpty()
+  return not pywrap_tfe.TFE_Py_TapeSetIsEmpty()
diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 5f4b75b8bbd..fd961671b52 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -26,7 +26,7 @@ import unittest
 import numpy as np
 import six
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import context
 from tensorflow.python.eager import core
 from tensorflow.python.eager import test
@@ -435,14 +435,14 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
     t2 = _create_tensor([[1, 2, 5], [3, 4, 5]], dtype=dtypes.int32)
     t3 = _create_tensor([[1], [3], [5], [6]], dtype=dtypes.int32)
 
-    r = pywrap_tensorflow.TFE_Py_TensorShapeSlice([t1, t2, t3], 0)
+    r = pywrap_tfe.TFE_Py_TensorShapeSlice([t1, t2, t3], 0)
     self.assertAllEqual(np.array([3, 2, 4]), r.numpy())
 
-    r = pywrap_tensorflow.TFE_Py_TensorShapeSlice([t1, t2, t3], 1)
+    r = pywrap_tfe.TFE_Py_TensorShapeSlice([t1, t2, t3], 1)
     self.assertAllEqual(np.array([2, 3, 1]), r.numpy())
 
   def testEmptyTensorList(self):
-    a = pywrap_tensorflow.TFE_Py_TensorShapeSlice([], 0)
+    a = pywrap_tfe.TFE_Py_TensorShapeSlice([], 0)
     self.assertTrue(isinstance(a, ops.EagerTensor))
     self.assertEqual(0, a.numpy().size)
 
@@ -452,12 +452,12 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(
         TypeError,
         r"Expected a list of EagerTensors but element 1 has type \"str\""):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([t1, "abc"], 0)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([t1, "abc"], 0)
 
     with self.assertRaisesRegexp(
         TypeError,
         r"Expected a list of EagerTensors but element 0 has type \"int\""):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([2, t1], 0)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([2, t1], 0)
 
   def testTensorListNotList(self):
     t1 = _create_tensor([1, 2], dtype=dtypes.int32)
@@ -465,7 +465,7 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(
         TypeError,
         r"tensors argument must be a list or a tuple. Got.*EagerTensor"):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice(t1, -2)
+      pywrap_tfe.TFE_Py_TensorShapeSlice(t1, -2)
 
   def testNegativeSliceDim(self):
     t1 = _create_tensor([1, 2], dtype=dtypes.int32)
@@ -473,7 +473,7 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(
         ValueError,
         r"Slice dimension must be non-negative. Got -2"):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([t1], -2)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([t1], -2)
 
   def testUnicode(self):
     self.assertEqual(constant_op.constant(u"asdf").numpy(), b"asdf")
@@ -493,31 +493,31 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
         IndexError,
         r"Slice dimension \(2\) must be smaller than rank of all tensors, "
         "but tensor at index 0 has rank 2"):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([t1], 2)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([t1], 2)
 
     with self.assertRaisesRegexp(
         IndexError,
         r"Slice dimension \(1\) must be smaller than rank of all tensors, "
         "but tensor at index 0 has rank 1"):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([t2], 1)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([t2], 1)
 
     with self.assertRaisesRegexp(
         IndexError,
         r"Slice dimension \(1\) must be smaller than rank of all tensors, "
         "but tensor at index 1 has rank 1"):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([t1, t2], 1)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([t1, t2], 1)
 
     with self.assertRaisesRegexp(
         IndexError,
         r"Slice dimension \(0\) must be smaller than rank of all tensors, "
         "but tensor at index 0 has rank 0"):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([t3], 0)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([t3], 0)
 
     with self.assertRaisesRegexp(
         IndexError,
         r"Slice dimension \(0\) must be smaller than rank of all tensors, "
         "but tensor at index 2 has rank 0"):
-      pywrap_tensorflow.TFE_Py_TensorShapeSlice([t2, t1, t3], 0)
+      pywrap_tfe.TFE_Py_TensorShapeSlice([t2, t1, t3], 0)
 
   @test_util.assert_no_new_pyobjects_executing_eagerly
   def testTensorDir(self):
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index 6c20340c7b9..0cec3e6f8a9 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -99,6 +99,7 @@ filegroup(
         "testdata/embedding.ckpt.data-00000-of-00001",
         "testdata/embedding.ckpt.index",
         "testdata/embedding.ckpt.meta",
+        "testdata/unicode_vocabulary",
         "testdata/warriors_vocabulary.txt",
         "testdata/wire_vocabulary.txt",
     ],
diff --git a/tensorflow/python/feature_column/dense_features.py b/tensorflow/python/feature_column/dense_features.py
index e9b63937b50..3bc93b377a1 100644
--- a/tensorflow/python/feature_column/dense_features.py
+++ b/tensorflow/python/feature_column/dense_features.py
@@ -31,16 +31,17 @@ class DenseFeatures(fc._BaseFeaturesLayer):  # pylint: disable=protected-access
   """A layer that produces a dense `Tensor` based on given `feature_columns`.
 
   Generally a single example in training data is described with FeatureColumns.
-  At the first layer of the model, this column oriented data should be converted
+  At the first layer of the model, this column-oriented data should be converted
   to a single `Tensor`.
 
   This layer can be called multiple times with different features.
 
-  This is the V1 version of this layer that uses variable_scope's to create
-  variables which works well with PartitionedVariables. Variable scopes are
-  deprecated in V2, so the V2 version uses name_scopes instead. But currently
-  that lacks support for partitioned variables. Use this if you need
-  partitioned variables.
+  This is the V1 version of this layer that uses variable_scope's or partitioner
+  to create variables which works well with PartitionedVariables. Variable
+  scopes are deprecated in V2, so the V2 version uses name_scopes instead. But
+  currently that lacks support for partitioned variables. Use this if you need
+  partitioned variables. Use the partitioner argument if you have a Keras model
+  and uses `tf.compat.v1.keras.estimator.model_to_estimator` for training.
 
   Example:
 
@@ -50,7 +51,9 @@ class DenseFeatures(fc._BaseFeaturesLayer):  # pylint: disable=protected-access
       tf.feature_column.categorical_column_with_hash_bucket("keywords", 10K),
       dimensions=16)
   columns = [price, keywords_embedded, ...]
-  feature_layer = tf.compat.v1.keras.layers.DenseFeatures(columns)
+  partitioner = tf.compat.v1.fixed_size_partitioner(num_shards=4)
+  feature_layer = tf.compat.v1.keras.layers.DenseFeatures(
+      feature_columns=columns, partitioner=partitioner)
 
   features = tf.io.parse_example(
       ..., features=tf.feature_column.make_parse_example_spec(columns))
@@ -62,7 +65,12 @@ class DenseFeatures(fc._BaseFeaturesLayer):  # pylint: disable=protected-access
   ```
   """
 
-  def __init__(self, feature_columns, trainable=True, name=None, **kwargs):
+  def __init__(self,
+               feature_columns,
+               trainable=True,
+               name=None,
+               partitioner=None,
+               **kwargs):
     """Constructs a DenseFeatures layer.
 
     Args:
@@ -75,6 +83,7 @@ class DenseFeatures(fc._BaseFeaturesLayer):  # pylint: disable=protected-access
       trainable:  Boolean, whether the layer's variables will be updated via
         gradient descent during training.
       name: Name to give to the DenseFeatures.
+      partitioner: Partitioner for input layer. Defaults to None.
       **kwargs: Keyword arguments to construct a layer.
 
     Raises:
@@ -84,6 +93,7 @@ class DenseFeatures(fc._BaseFeaturesLayer):  # pylint: disable=protected-access
         feature_columns=feature_columns,
         trainable=trainable,
         name=name,
+        partitioner=partitioner,
         expected_column_type=fc.DenseColumn,
         **kwargs)
 
diff --git a/tensorflow/python/feature_column/dense_features_test.py b/tensorflow/python/feature_column/dense_features_test.py
index c1a970e8e03..7cd523dcc14 100644
--- a/tensorflow/python/feature_column/dense_features_test.py
+++ b/tensorflow/python/feature_column/dense_features_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
 
@@ -98,6 +99,58 @@ class DenseFeaturesTest(test.TestCase):
       self.assertEqual(1, len(variables))
       self.assertIs(variables[0], dense_features.variables[0])
 
+  def test_dense_feature_with_partitioner(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0), (3, 0)),
+          values=(0, 1, 3, 2),
+          dense_shape=(4, 4))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(
+          key='a', num_buckets=4)
+      embedding_dimension = 2
+
+      def _embedding_column_initializer(shape, dtype, partition_info=None):
+        offset = partition_info._var_offset[0]
+        del shape  # unused
+        del dtype  # unused
+        if offset == 0:
+          embedding_values = (
+              (1, 0),  # id 0
+              (0, 1))  # id 1
+        else:
+          embedding_values = (
+              (1, 1),  # id 2
+              (2, 2))  # id 3
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      dense_features = df.DenseFeatures(
+          [embedding_column],
+          partitioner=partitioned_variables.fixed_size_partitioner(2))
+      features = {'a': sparse_input}
+
+      inputs = dense_features(features)
+      variables = dense_features.variables
+
+      # Sanity check: test that the inputs are correct.
+      self.assertAllEqual([[1, 0], [0, 1], [2, 2], [1, 1]], inputs)
+
+      # Check that only one variable was created.
+      self.assertEqual(2, len(variables))
+
+      # Check that invoking dense_features on the same features does not create
+      # additional variables
+      _ = dense_features(features)
+      self.assertEqual(2, len(variables))
+      self.assertIs(variables[0], dense_features.variables[0])
+      self.assertIs(variables[1], dense_features.variables[1])
+
   def test_feature_column_dense_features_gradient(self):
     with context.eager_mode():
       sparse_input = sparse_tensor.SparseTensor(
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 247c455866c..0e8b0763c0c 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -306,8 +306,14 @@ class _StateManagerImpl(StateManager):
           # specifying a default partitioner for an entire layer. In that case,
           # the default getter for Layers should work.
           getter=variable_scope.get_variable)
-    if isinstance(var, trackable.Trackable):
-      self._layer._track_trackable(var, feature_column.name + '/' + name)  # pylint: disable=protected-access
+    if isinstance(var, variables.PartitionedVariable):
+      for v in var:
+        part_name = name + '/' + str(v._get_save_slice_info().var_offset[0])  # pylint: disable=protected-access
+        self._layer._track_trackable(v, feature_column.name + '/' + part_name)  # pylint: disable=protected-access
+    else:
+      if isinstance(var, trackable.Trackable):
+        self._layer._track_trackable(var, feature_column.name + '/' + name)  # pylint: disable=protected-access
+
     self._cols_to_vars_map[feature_column][name] = var
     return var
 
@@ -375,12 +381,19 @@ class _BaseFeaturesLayer(Layer):
     ValueError: if an item in `feature_columns` doesn't match
       `expected_column_type`.
   """
-  def __init__(self, feature_columns, expected_column_type, trainable, name,
+
+  def __init__(self,
+               feature_columns,
+               expected_column_type,
+               trainable,
+               name,
+               partitioner=None,
                **kwargs):
     super(_BaseFeaturesLayer, self).__init__(
         name=name, trainable=trainable, **kwargs)
     self._feature_columns = _normalize_feature_columns(feature_columns)
     self._state_manager = _StateManagerImpl(self, self.trainable)
+    self._partitioner = partitioner
     for column in self._feature_columns:
       if not isinstance(column, expected_column_type):
         raise ValueError(
@@ -391,7 +404,9 @@ class _BaseFeaturesLayer(Layer):
 
   def build(self, _):
     for column in self._feature_columns:
-      with variable_scope._pure_variable_scope(self.name):  # pylint: disable=protected-access
+      with variable_scope._pure_variable_scope(  # pylint: disable=protected-access
+          self.name,
+          partitioner=self._partitioner):
         with variable_scope._pure_variable_scope(column.name):  # pylint: disable=protected-access
           column.create_state(self._state_manager)
     super(_BaseFeaturesLayer, self).build(None)
@@ -438,6 +453,8 @@ class _BaseFeaturesLayer(Layer):
     column_configs = serialization.serialize_feature_columns(
         self._feature_columns)
     config = {'feature_columns': column_configs}
+    config['partitioner'] = generic_utils.serialize_keras_object(
+        self._partitioner)
 
     base_config = super(  # pylint: disable=bad-super-call
         _BaseFeaturesLayer, self).get_config()
@@ -450,6 +467,8 @@ class _BaseFeaturesLayer(Layer):
     config_cp = config.copy()
     config_cp['feature_columns'] = serialization.deserialize_feature_columns(
         config['feature_columns'], custom_objects=custom_objects)
+    config_cp['partitioner'] = generic_utils.deserialize_keras_object(
+        config['partitioner'], custom_objects)
 
     return cls(**config_cp)
 
@@ -1665,7 +1684,7 @@ def categorical_column_with_vocabulary_file_v2(key,
     if not gfile.Exists(vocabulary_file):
       raise ValueError('vocabulary_file in {} does not exist.'.format(key))
 
-    with gfile.GFile(vocabulary_file) as f:
+    with gfile.GFile(vocabulary_file, mode='rb') as f:
       vocabulary_size = sum(1 for _ in f)
     logging.info(
         'vocabulary_size = %d in %s is inferred from the number of elements '
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index 0dcbb53fb13..ce0e19725fa 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -3886,6 +3886,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         'python/feature_column/testdata/wire_vocabulary.txt')
     self._wire_vocabulary_size = 3
 
+    # Contains unicode characters.
+    self._unicode_vocabulary_file_name = test.test_src_dir_path(
+        'python/feature_column/testdata/unicode_vocabulary')
+
   @test_util.run_deprecated_v1
   def test_defaults(self):
     column = fc.categorical_column_with_vocabulary_file(
@@ -3898,6 +3902,17 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
     }, column.parse_example_spec)
     self.assertTrue(column._is_v2_column)
 
+  @test_util.run_deprecated_v1
+  def test_defaults_unicode(self):
+    column = fc.categorical_column_with_vocabulary_file(
+        key='aaa', vocabulary_file=self._unicode_vocabulary_file_name)
+    self.assertEqual('aaa', column.name)
+    self.assertEqual('aaa', column.key)
+    self.assertEqual(165, column.num_buckets)
+    self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.string)},
+                     column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
+
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
       fc.categorical_column_with_vocabulary_file(
diff --git a/tensorflow/python/feature_column/sequence_feature_column_test.py b/tensorflow/python/feature_column/sequence_feature_column_test.py
index c7a21c94e2b..35eaa37cce7 100644
--- a/tensorflow/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/python/feature_column/sequence_feature_column_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/feature_column/testdata/unicode_vocabulary b/tensorflow/python/feature_column/testdata/unicode_vocabulary
new file mode 100644
index 00000000000..0399ca09211
--- /dev/null
+++ b/tensorflow/python/feature_column/testdata/unicode_vocabulary
@@ -0,0 +1,165 @@
+t
+/
+e
+o
+a
+s
+p
+i
+c
+n
+.
+r
+h
+m
+x
+l
+d
+w
+-
+u
+g
+b
+:
+2
+0
+1
+f
+%
+8
+3
+5
+k
+9
+4
+y
+7
+6
+v
+=
+_
+?
+A
+D
+j
+&
+F
+z
+E
+B
+S
+C
+q
+M
+L
+I
+R
+T
+N
+W
+P
+U
+G
+Z
+O
+V
+Y
+H
+J
+X
+Q
+K
++
+#
+,
+;
+~
+)
+@
+!
+|
+'
+(
+$
+*
+]
+[
+{
+}
+\
+ 
+^
+`
+"
+�
+�
+�
+�
+�
+�
+>
+<
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
+�
diff --git a/tensorflow/python/framework/composite_tensor_test.py b/tensorflow/python/framework/composite_tensor_test.py
index eda8289dda4..13f3f006361 100644
--- a/tensorflow/python/framework/composite_tensor_test.py
+++ b/tensorflow/python/framework/composite_tensor_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import gc
 import sys
 import weakref
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/framework/constant_op_test.py b/tensorflow/python/framework/constant_op_test.py
new file mode 100644
index 00000000000..da0fb64fde6
--- /dev/null
+++ b/tensorflow/python/framework/constant_op_test.py
@@ -0,0 +1,61 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.python.framework.constant_op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+
+
+class ConstantOpTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      dtypes.bfloat16,
+      dtypes.complex128,
+      dtypes.complex64,
+      dtypes.double,
+      dtypes.float16,
+      dtypes.float32,
+      dtypes.float64,
+      dtypes.half,
+      dtypes.int16,
+      dtypes.int32,
+      dtypes.int64,
+      dtypes.int8,
+      dtypes.qint16,
+      dtypes.qint32,
+      dtypes.qint8,
+      dtypes.quint16,
+      dtypes.quint8,
+      dtypes.uint16,
+      dtypes.uint32,
+      dtypes.uint64,
+      dtypes.uint8,
+  )
+  def test_convert_string_to_number(self, dtype):
+    with self.assertRaises(TypeError):
+      constant_op.constant("hello", dtype)
+
+
+if __name__ == "__main__":
+  ops.enable_eager_execution()
+  test.main()
diff --git a/tensorflow/python/framework/convert_to_constants_test.py b/tensorflow/python/framework/convert_to_constants_test.py
index 315fe235b17..e44847e2c6e 100644
--- a/tensorflow/python/framework/convert_to_constants_test.py
+++ b/tensorflow/python/framework/convert_to_constants_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.python import keras
diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py
index 828f30c40eb..44d98a9f73c 100644
--- a/tensorflow/python/framework/dtypes.py
+++ b/tensorflow/python/framework/dtypes.py
@@ -633,5 +633,11 @@ def as_dtype(type_value):
   except KeyError:
     pass
 
+  if hasattr(type_value, "dtype"):
+    try:
+      return _NP_TO_TF[np.dtype(type_value.dtype).type]
+    except (KeyError, TypeError):
+      pass
+
   raise TypeError("Cannot convert value %r to a TensorFlow DType." %
                   (type_value,))
diff --git a/tensorflow/python/framework/dtypes_test.py b/tensorflow/python/framework/dtypes_test.py
index e865d8fd246..dd2ea446b78 100644
--- a/tensorflow/python/framework/dtypes_test.py
+++ b/tensorflow/python/framework/dtypes_test.py
@@ -88,6 +88,16 @@ class TypesTest(test_util.TensorFlowTestCase):
     with self.assertRaises(TypeError):
       dtypes.as_dtype(np.dtype([("f1", np.uint), ("f2", np.int32)]))
 
+    class AnObject(object):
+      dtype = "f4"
+
+    self.assertIs(dtypes.float32, dtypes.as_dtype(AnObject))
+
+    class AnotherObject(object):
+      dtype = np.dtype(np.complex64)
+
+    self.assertIs(dtypes.complex64, dtypes.as_dtype(AnotherObject))
+
   def testRealDtype(self):
     for dtype in [
         dtypes.float32, dtypes.float64, dtypes.bool, dtypes.uint8, dtypes.int8,
@@ -319,4 +329,3 @@ class TypesTest(test_util.TensorFlowTestCase):
 
 if __name__ == "__main__":
   googletest.main()
-
diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py
index 57ee6d19cce..e4b086dc152 100644
--- a/tensorflow/python/framework/func_graph.py
+++ b/tensorflow/python/framework/func_graph.py
@@ -94,13 +94,13 @@ def convert_structure_to_signature(structure, arg_names=None):
         # of the function argument.
         name = user_specified_name
       else:
-        name = "/".join([str(p) for p in path])
+        name = "/".join(str(p) for p in path)
       return tensor_spec.TensorSpec(arg.shape, arg.dtype, name)
     if isinstance(arg, composite_tensor.CompositeTensor):
       # TODO(b/133606651) Do we need to inject arg_name?
       return arg._type_spec  # pylint: disable=protected-access
     if isinstance(arg, resource_variable_ops.BaseResourceVariable):
-      name = "/".join([str(p) for p in path])
+      name = "/".join(str(p) for p in path)
       return resource_variable_ops.VariableSpec(arg.shape, arg.dtype, name)
     if isinstance(arg, (
         int,
@@ -975,6 +975,9 @@ def func_graph_from_py_func(name,
         python_func = tf_decorator.rewrap(python_func, original_func,
                                           converted_func)
 
+      else:
+        _, original_func = tf_decorator.unwrap(python_func)
+
       func_outputs = python_func(*func_args, **func_kwargs)
 
       # invariant: `func_outputs` contains only Tensors, CompositeTensors,
@@ -982,8 +985,8 @@ def func_graph_from_py_func(name,
       func_outputs = nest.map_structure(convert, func_outputs,
                                         expand_composites=True)
 
-      check_mutation(func_args_before, func_args)
-      check_mutation(func_kwargs_before, func_kwargs)
+      check_mutation(func_args_before, func_args, original_func)
+      check_mutation(func_kwargs_before, func_kwargs, original_func)
     finally:
       current_scope.set_use_resource(default_use_recource)
 
@@ -1048,13 +1051,15 @@ def device_stack_has_callable(device_stack):
              for spec in device_stack.peek_objs())
 
 
-def check_mutation(n1, n2):
+def check_mutation(n1, n2, func):
   """Check if two list of arguments are exactly the same."""
-  errmsg = ("Function to be traced should not modify structure of input "
-            "arguments. Check if your function has list and dictionary "
-            "operations that alter input arguments, "
-            "such as `list.pop`, `list.append`")
+  func_name = getattr(func, "__name__", func)
+
+  errmsg = ("{}() should not modify its Python input arguments."
+            " Check if it modifies any lists or dicts passed as"
+            " arguments. Modifying a copy is allowed.".format(func_name))
   try:
+    # TODO(mdan): Compare more robustly so that argument names can be reported.
     nest.assert_same_structure(n1, n2, expand_composites=True)
   except ValueError:
     raise ValueError(errmsg)
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 6bd4ac32788..e39d3fc1d6f 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -1296,7 +1296,7 @@ def get_extra_args():
 def _type_list_to_str(types):
   if any(_ not in _DTYPE_TO_STR for _ in types):
     raise ValueError("Unsupported dtypes: %s" % types)
-  return "".join([_DTYPE_TO_STR[_] for _ in types])
+  return "".join(_DTYPE_TO_STR[_] for _ in types)
 
 
 # NOTE: The list needs to be extended when more data types are added.
diff --git a/tensorflow/python/framework/gpu_util.py b/tensorflow/python/framework/gpu_util.py
new file mode 100644
index 00000000000..33beda88380
--- /dev/null
+++ b/tensorflow/python/framework/gpu_util.py
@@ -0,0 +1,57 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains GPU utility functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+
+
+# Matches the DeviceAttributes.physical_device_desc field.
+_PHYSICAL_DEVICE_DESCRIPTION_REGEX = re.compile(
+    r'name: ([^,]*), (?:.*compute capability: (\d+)\.(\d+))?')
+
+
+# compute_capability is a (major version, minor version) pair, or None if this
+# is not an Nvidia GPU.
+GpuInfo = collections.namedtuple('gpu_info', ['name', 'compute_capability'])
+
+
+def compute_capability_from_device_desc(device_attrs):
+  """Returns the GpuInfo given a DeviceAttributes proto.
+
+  Args:
+    device_attrs: A DeviceAttributes proto.
+
+  Returns
+    A gpu_info tuple. Both fields are None if `device_attrs` does not have a
+    valid physical_device_desc field.
+  """
+  # TODO(jingyue): The device description generator has to be in sync with
+  # this file. Another option is to put compute capability in
+  # DeviceAttributes, but I avoided that to keep DeviceAttributes
+  # target-independent. Reconsider this option when we have more things like
+  # this to keep in sync.
+  # LINT.IfChange
+  match = _PHYSICAL_DEVICE_DESCRIPTION_REGEX.search(
+      device_attrs.physical_device_desc)
+  # LINT.ThenChange(//tensorflow/core/common_runtime/gpu/gpu_device.cc)
+  if not match:
+    return GpuInfo(None, None)
+  cc = (int(match.group(2)), int(match.group(3))) if match.group(2) else None
+  return GpuInfo(match.group(1), cc)
diff --git a/tensorflow/python/framework/graph_to_function_def.py b/tensorflow/python/framework/graph_to_function_def.py
index 6e2ba345d75..a0ff548ac38 100644
--- a/tensorflow/python/framework/graph_to_function_def.py
+++ b/tensorflow/python/framework/graph_to_function_def.py
@@ -170,7 +170,7 @@ def graph_to_function_def(graph, operations, inputs, outputs, out_names=None):
   else:
     func.signature.output_arg.extend(
         [_tensor_to_argdef(o, name=n) for o, n in zip(outputs, out_names)])
-  func_arg_placeholders = set([i.name for i in inputs])
+  func_arg_placeholders = set(i.name for i in inputs)
   input_dict = _create_input_dict(graph, func_arg_placeholders,
                                   initial_value=initial_dict)
 
diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py
index 18888ffef10..6b88d7b02c3 100644
--- a/tensorflow/python/framework/graph_util_impl.py
+++ b/tensorflow/python/framework/graph_util_impl.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 import copy
 import re
+
 import six
 
 from tensorflow.core.framework import attr_value_pb2
diff --git a/tensorflow/python/framework/indexed_slices.py b/tensorflow/python/framework/indexed_slices.py
index 4e273b0c640..abf90547e50 100644
--- a/tensorflow/python/framework/indexed_slices.py
+++ b/tensorflow/python/framework/indexed_slices.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 import collections
 import warnings
+
 import numpy as np
 
 from tensorflow.python import tf2
diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py
index 0d4e49ad564..c1451ea5c48 100644
--- a/tensorflow/python/framework/meta_graph.py
+++ b/tensorflow/python/framework/meta_graph.py
@@ -297,9 +297,10 @@ def _find_extraneous_saver_nodes(graph_def, saver_def):
   # but it seems unnecessarily complex given the name scope solution.
 
   # load the graph DAG in minimal form, without initializing a full Graph object
-  nodes = {node_def.name:
-           (set([_op_name(x) for x in node_def.input]), node_def.op)
-           for node_def in graph_def.node}
+  nodes = {
+      node_def.name: (set(_op_name(x) for x in node_def.input), node_def.op)
+      for node_def in graph_def.node
+  }
 
   retain_scope_save = None
   retain_scope_restore = None
@@ -313,12 +314,12 @@ def _find_extraneous_saver_nodes(graph_def, saver_def):
     retain_scope_restore = _get_scope(restore_op_name) + "/"
     retain_scope_save = _get_scope(save_op_name) + "/"
 
-  all_saver_node_names = set([name for name, (_, op) in nodes.items()
-                              if op in SAVE_AND_RESTORE_OPS])
+  all_saver_node_names = set(
+      name for name, (_, op) in nodes.items() if op in SAVE_AND_RESTORE_OPS)
 
-  all_saver_scopes = (set([_get_scope(x) for x in all_saver_node_names])
-                      - all_saver_node_names)
-  all_saver_scopes = set([x + "/" for x in all_saver_scopes])
+  all_saver_scopes = (
+      set(_get_scope(x) for x in all_saver_node_names) - all_saver_node_names)
+  all_saver_scopes = set(x + "/" for x in all_saver_scopes)
 
   extraneous_scopes = all_saver_scopes - set([retain_scope_save,
                                               retain_scope_restore])
@@ -766,9 +767,10 @@ def import_scoped_meta_graph_with_return_elements(
             sorted([compat.as_str(v) for v in field.value]) !=
             sorted(input_map)):
           raise ValueError("Graph contains unbound inputs: %s. Must "
-                           "provide these inputs through input_map." %
-                           ",".join([compat.as_str(v) for v in field.value
-                                     if not input_map or v not in input_map]))
+                           "provide these inputs through input_map." % ",".join(
+                               compat.as_str(v)
+                               for v in field.value
+                               if not input_map or v not in input_map))
         break
 
   # Sets graph to default graph if it's not passed in.
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 01fe9bf71f0..f149a61dfc9 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -36,7 +36,12 @@ from tensorflow.core.framework import node_def_pb2
 from tensorflow.core.framework import op_def_pb2
 from tensorflow.core.framework import versions_pb2
 from tensorflow.core.protobuf import config_pb2
+# pywrap_tensorflow must be imported first to avoid profobuf issues.
+# (b/143110113)
+# pylint: disable=invalid-import-order,g-bad-import-order
 from tensorflow.python import pywrap_tensorflow as c_api
+from tensorflow.python import pywrap_tfe as c_api_new
+# pylint: enable=invalid-import-order,g-bad-import-order
 from tensorflow.python import tf2
 from tensorflow.python.eager import context
 from tensorflow.python.eager import core
@@ -249,7 +254,7 @@ def register_dense_tensor_like_type(tensor_type):
 
 def uid():
   """A unique (within this program execution) integer."""
-  return c_api.TFE_Py_UID()
+  return c_api_new.TFE_Py_UID()
 
 
 def numpy_text(tensor, is_repr=False):
@@ -1135,7 +1140,7 @@ class _EagerTensorBase(Tensor):
 
 # This call creates an EagerTensor class, as a subclass of _EagerTensorBase, and
 # registers it with the current module.
-EagerTensor = c_api.TFE_Py_InitEagerTensor(_EagerTensorBase)
+EagerTensor = c_api_new.TFE_Py_InitEagerTensor(_EagerTensorBase)
 
 
 register_dense_tensor_like_type(Tensor)
@@ -3283,7 +3288,7 @@ class Graph(object):
 
     node_def = _NodeDef(op_type, name, attrs)
 
-    input_ops = set([t.op for t in inputs])
+    input_ops = set(t.op for t in inputs)
     control_inputs = self._control_dependencies_for_inputs(input_ops)
     # _create_op_helper mutates the new Operation. `_mutation_lock` ensures a
     # Session.run call cannot occur between creating and mutating the op.
@@ -4442,7 +4447,7 @@ class Graph(object):
         # Don't add a control input if we already have a data dependency on i.
         # NOTE(mrry): We do not currently track transitive data dependencies,
         #   so we may add redundant control inputs.
-        ret.extend([c for c in controller.control_inputs if c not in input_ops])
+        ret.extend(c for c in controller.control_inputs if c not in input_ops)
     return ret
 
   def _record_op_seen_by_control_dependencies(self, op):
diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc
index 093afc112d7..5912c26a5a0 100644
--- a/tensorflow/python/framework/python_op_gen.cc
+++ b/tensorflow/python/framework/python_op_gen.cc
@@ -789,8 +789,7 @@ void GenEagerPythonOp::AddEagerFastPathExecute() {
 
   strings::StrAppend(&result_, "    try:\n");
   strings::StrAppend(
-      &result_, "      ",
-      "_result = _pywrap_tensorflow.TFE_Py_FastPathExecute(\n",
+      &result_, "      ", "_result = pywrap_tfe.TFE_Py_FastPathExecute(\n",
       WordWrap(strings::StrCat("        "),
                strings::StrCat(fastpath_execute_params, ")"), kRightMargin),
       "\n");
@@ -1000,7 +999,7 @@ This file is MACHINE GENERATED! Do not edit.
 
 import collections
 
-from tensorflow.python import pywrap_tensorflow as _pywrap_tensorflow
+from tensorflow.python import pywrap_tfe as pywrap_tfe
 from tensorflow.python.eager import context as _context
 from tensorflow.python.eager import core as _core
 from tensorflow.python.eager import execute as _execute
diff --git a/tensorflow/python/framework/random_seed.py b/tensorflow/python/framework/random_seed.py
index eff0434f52e..6f41e8d0f87 100644
--- a/tensorflow/python/framework/random_seed.py
+++ b/tensorflow/python/framework/random_seed.py
@@ -252,6 +252,29 @@ def set_seed(seed):
   The reason we get 'A2' instead 'A1' on the second call of `tf.random.uniform`
   above is because the secand call uses a different operation seed.
 
+  Note that `tf.function` acts like a re-run of a program in this case. When
+  the global seed is set but operation seeds are not set, the sequence of random
+  numbers are the same for each `tf.function`. For example:
+
+  ```python
+  tf.random.set_seed(1234)
+
+  @tf.function
+  def f():
+    a = tf.random.uniform([1])
+    b = tf.random.uniform([1])
+    return a, b
+
+  @tf.function
+  def g():
+    a = tf.random.uniform([1])
+    b = tf.random.uniform([1])
+    return a, b
+
+  print(f())  # prints '(A1, A2)'
+  print(g())  # prints '(A1, A2)'
+  ```
+
   If the operation seed is set, we get different results for every call to the
   random op, but the same sequence for every re-run of the program:
 
diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py
index c6e50eb8fcb..72c5b80eee8 100644
--- a/tensorflow/python/framework/sparse_tensor.py
+++ b/tensorflow/python/framework/sparse_tensor.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import numpy as np
 
 from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
diff --git a/tensorflow/python/framework/tensor_spec.py b/tensorflow/python/framework/tensor_spec.py
index c96bfb13b3b..b5275372447 100644
--- a/tensorflow/python/framework/tensor_spec.py
+++ b/tensorflow/python/framework/tensor_spec.py
@@ -255,10 +255,10 @@ class BoundedTensorSpec(TensorSpec):
       raise ValueError("maximum is not compatible with shape. "
                        "Message: {!r}.".format(exception))
 
-    self._minimum = np.array(minimum, dtype=self.dtype.as_numpy_dtype())
+    self._minimum = np.array(minimum, dtype=self.dtype.as_numpy_dtype)
     self._minimum.setflags(write=False)
 
-    self._maximum = np.array(maximum, dtype=self.dtype.as_numpy_dtype())
+    self._maximum = np.array(maximum, dtype=self.dtype.as_numpy_dtype)
     self._maximum.setflags(write=False)
 
   @classmethod
diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py
index 6e1cad42c00..b2ab779386b 100644
--- a/tensorflow/python/framework/tensor_util_test.py
+++ b/tensorflow/python/framework/tensor_util_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import contextlib
 import sys
+
 import numpy as np
 
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/python/framework/test_combinations.py b/tensorflow/python/framework/test_combinations.py
index 0986585fc21..a58520ecd38 100644
--- a/tensorflow/python/framework/test_combinations.py
+++ b/tensorflow/python/framework/test_combinations.py
@@ -39,7 +39,7 @@ The execution of generated tests can be customized in a number of ways:
 -  The test can be skipped if it is not running in the correct environment.
 -  The arguments that are passed to the test can be additionaly transformed.
 -  The test can be run with specific Python context managers.
-These behaviors can customized by providing instances of `TestCombination` to
+These behaviors can be customized by providing instances of `TestCombination` to
 `generate()`.
 """
 
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index dfeca7e8157..2eff46f1051 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -37,14 +37,6 @@ from absl.testing import parameterized
 import numpy as np
 import six
 
-_portpicker_import_error = None
-try:
-  import portpicker  # pylint: disable=g-import-not-at-top
-except ImportError as _error:
-  _portpicker_import_error = _error
-  portpicker = None
-
-# pylint: disable=g-import-not-at-top
 from google.protobuf import descriptor_pool
 from google.protobuf import text_format
 
@@ -64,6 +56,7 @@ from tensorflow.python.framework import device as pydev
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import gpu_util
 from tensorflow.python.framework import importer
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
@@ -1495,28 +1488,12 @@ def is_gpu_available(cuda_only=False, min_cuda_compute_capability=None):
   Returns:
     True if a GPU device of the requested kind is available.
   """
-
-  def compute_capability_from_device_desc(device_desc):
-    # TODO(jingyue): The device description generator has to be in sync with
-    # this file. Another option is to put compute capability in
-    # DeviceAttributes, but I avoided that to keep DeviceAttributes
-    # target-independent. Reconsider this option when we have more things like
-    # this to keep in sync.
-    # LINT.IfChange
-    match = re.search(r"compute capability: (\d+)\.(\d+)", device_desc)
-    # LINT.ThenChange(//tensorflow/core/\
-    #                 common_runtime/gpu/gpu_device.cc)
-    if not match:
-      return 0, 0
-    return int(match.group(1)), int(match.group(2))
-
   try:
     for local_device in device_lib.list_local_devices():
       if local_device.device_type == "GPU":
-        if (min_cuda_compute_capability is None or
-            compute_capability_from_device_desc(
-                local_device.physical_device_desc) >=
-            min_cuda_compute_capability):
+        gpu_info = gpu_util.compute_capability_from_device_desc(local_device)
+        cc = gpu_info.compute_capability or (0, 0)
+        if not min_cuda_compute_capability or cc >= min_cuda_compute_capability:
           return True
       if local_device.device_type == "SYCL" and not cuda_only:
         return True
@@ -2409,7 +2386,7 @@ class TensorFlowTestCase(googletest.TestCase):
                                path=None,
                                msg=None):
     path = path or []
-    path_str = (("[" + "][".join([str(p) for p in path]) + "]") if path else "")
+    path_str = (("[" + "][".join(str(p) for p in path) + "]") if path else "")
     msg = msg if msg else ""
 
     # Check if a and/or b are namedtuples.
@@ -3093,8 +3070,7 @@ def create_local_cluster(num_workers,
   Raises:
     ImportError: if portpicker module was not found at load time
   """
-  if _portpicker_import_error:
-    raise _portpicker_import_error  # pylint: disable=raising-bad-type
+  import portpicker  # pylint: disable=g-import-not-at-top
   worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
   ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
   cluster_dict = {
diff --git a/tensorflow/python/framework/type_spec.py b/tensorflow/python/framework/type_spec.py
index 4b66bc50fd5..ce61290eb68 100644
--- a/tensorflow/python/framework/type_spec.py
+++ b/tensorflow/python/framework/type_spec.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+
 import numpy as np
 import six
 
diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py
index e47a62ec5a1..09ffc23e380 100644
--- a/tensorflow/python/grappler/auto_mixed_precision_test.py
+++ b/tensorflow/python/grappler/auto_mixed_precision_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 import unittest
+
 import numpy as np
 
 from tensorflow.core.framework import types_pb2
@@ -27,7 +28,6 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import tf2
 from tensorflow.python.client import session
-from tensorflow.python.compat import compat
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -394,24 +394,23 @@ class AutoMixedPrecisionTest(test.TestCase):
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn(self):
     """Test graph with convolution followed by batch norm."""
-    with compat.forward_compatibility_horizon(2019, 6, 7):
-      if test.is_gpu_available(cuda_only=True):
-        random_seed.set_random_seed(0)
-        x = _input([2, 8, 8, 1])
-        x = _conv_bn(x)
-        output = _conv_bn(x)
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = _input([2, 8, 8, 1])
+      x = _conv_bn(x)
+      output = _conv_bn(x)
 
-        output_val_ref, output_val, cost_graph = self._run(output)
-        node_map = _build_node_map(cost_graph.node)
-        num_to_fp16, num_to_fp32 = _count_casts(cost_graph.node)
+      output_val_ref, output_val, cost_graph = self._run(output)
+      node_map = _build_node_map(cost_graph.node)
+      num_to_fp16, num_to_fp32 = _count_casts(cost_graph.node)
 
-        self._assert_output_fp16(node_map, 'Conv2D')
-        self._assert_output_fp16(node_map, 'FusedBatchNormV3')
-        self._assert_output_fp16(node_map, 'Conv2D_1')
-        self.assertEqual(num_to_fp16,
-                         3)  # Before Conv2D:0, Conv2D:1, Conv2D_1:1
-        self.assertEqual(num_to_fp32, 1)  # After FusedBatchNormV3:0
-        self.assertAllClose(output_val_ref, output_val, atol=1e-3, rtol=1e-3)
+      self._assert_output_fp16(node_map, 'Conv2D')
+      self._assert_output_fp16(node_map, 'FusedBatchNormV3')
+      self._assert_output_fp16(node_map, 'Conv2D_1')
+      self.assertEqual(num_to_fp16,
+                       3)  # Before Conv2D:0, Conv2D:1, Conv2D_1:1
+      self.assertEqual(num_to_fp32, 1)  # After FusedBatchNormV3:0
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3, rtol=1e-3)
 
   # TODO: enable these tests when cuDNN is upgraded to >= 7.6.2. Same with the
   # test_conv3d() below.
@@ -467,31 +466,30 @@ class AutoMixedPrecisionTest(test.TestCase):
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn_dropout(self):
     """Test dropout precision of convolution batch norm graph."""
-    with compat.forward_compatibility_horizon(2019, 6, 7):
-      if test.is_gpu_available(cuda_only=True):
-        random_seed.set_random_seed(0)
-        x = _input([2, 8, 8, 1])
-        y = _conv_bn(x)
-        y = nn.dropout(y, rate=0.5)
-        y = math_ops.add(y, 1, name='addition')
-        y = _conv_bn(y)
-        y = array_ops.identity(y)
-        optimizer = gradient_descent.GradientDescentOptimizer(
-            learning_rate=0.01)
-        g = optimizer.compute_gradients(y, [x])
-        output = (y, g)
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = _input([2, 8, 8, 1])
+      y = _conv_bn(x)
+      y = nn.dropout(y, rate=0.5)
+      y = math_ops.add(y, 1, name='addition')
+      y = _conv_bn(y)
+      y = array_ops.identity(y)
+      optimizer = gradient_descent.GradientDescentOptimizer(
+          learning_rate=0.01)
+      g = optimizer.compute_gradients(y, [x])
+      output = (y, g)
 
-        output_val_ref, output_val, cost_graph = self._run(output)
-        node_map = _build_node_map(cost_graph.node)
-        self._assert_output_fp16(node_map, 'Conv2D')
-        self._assert_output_fp16(node_map, 'FusedBatchNormV3')
-        # We do not assert dropout's dtype because we do not want to rely on the
-        # node names of dropout's internal implementation.
-        self._assert_output_fp16(node_map, 'addition')
-        self._assert_output_fp16(node_map, 'Conv2D_1')
+      output_val_ref, output_val, cost_graph = self._run(output)
+      node_map = _build_node_map(cost_graph.node)
+      self._assert_output_fp16(node_map, 'Conv2D')
+      self._assert_output_fp16(node_map, 'FusedBatchNormV3')
+      # We do not assert dropout's dtype because we do not want to rely on the
+      # node names of dropout's internal implementation.
+      self._assert_output_fp16(node_map, 'addition')
+      self._assert_output_fp16(node_map, 'Conv2D_1')
 
-        output_val_ref, output_val, cost_graph = self._run(output)
-        self.assertAllClose(output_val_ref, output_val, atol=2e-3, rtol=2e-3)
+      output_val_ref, output_val, cost_graph = self._run(output)
+      self.assertAllClose(output_val_ref, output_val, atol=2e-3, rtol=2e-3)
 
   @test_util.run_deprecated_v1
   @test_util.disable_xla('This test does not pass with XLA')
diff --git a/tensorflow/python/grappler/hierarchical_controller.py b/tensorflow/python/grappler/hierarchical_controller.py
index e39988d96b5..d1b36cb4ddb 100644
--- a/tensorflow/python/grappler/hierarchical_controller.py
+++ b/tensorflow/python/grappler/hierarchical_controller.py
@@ -26,6 +26,7 @@ from __future__ import division
 from __future__ import print_function
 
 import math
+
 import numpy as np
 import six
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index d2b634a4c62..2beed594479 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -90,7 +90,7 @@ class MemoryOptimizerSwapTest(test.TestCase):
 
       self.assertEqual(len(graph.node), graph_size + 2)
       self.assertTrue(
-          set([node.name for node in graph.node]) > set(
+          set(node.name for node in graph.node) > set(
               ['a', 'b', 'c', 'd', 'swap_in_d_0', 'swap_out_d_0']))
       for node in graph.node:
         if node.name == 'swap_in_d_0':
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index b5f565f2a6f..e52573da4af 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -458,6 +458,7 @@ py_library(
         "layers/normalization.py",
         "layers/normalization_v2.py",
         "layers/pooling.py",
+        "layers/preprocessing/categorical.py",
         "layers/preprocessing/image_preprocessing.py",
         "layers/preprocessing/normalization.py",
         "layers/preprocessing/normalization_v1.py",
@@ -656,6 +657,20 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "add_loss_correctness_test",
+    size = "medium",
+    srcs = ["add_loss_correctness_test.py"],
+    additional_deps = [
+        ":keras",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+    ],
+    python_version = "PY3",
+    shard_count = 4,
+)
+
 tf_py_test(
     name = "metrics_functional_test",
     size = "small",
@@ -723,6 +738,34 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "metrics_serialization_test",
+    size = "medium",
+    srcs = ["saving/metrics_serialization_test.py"],
+    python_version = "PY3",
+    shard_count = 8,
+    deps = [
+        ":keras",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+tf_py_test(
+    name = "losses_serialization_test",
+    size = "medium",
+    srcs = ["saving/losses_serialization_test.py"],
+    python_version = "PY3",
+    shard_count = 4,
+    deps = [
+        ":keras",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 tf_py_test(
     name = "advanced_activations_test",
     size = "medium",
@@ -758,7 +801,6 @@ tf_py_test(
     srcs = ["layers/convolutional_recurrent_test.py"],
     python_version = "PY3",
     shard_count = 4,
-    tags = ["no_rocm"],
     deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
@@ -781,6 +823,31 @@ cuda_py_test(
     ],
 )
 
+filegroup(
+    name = "vocabulary_testdata",
+    srcs = [
+        "layers/preprocessing/testdata/wire_vocabulary.txt",
+    ],
+)
+
+cuda_py_test(
+    name = "categorical_test",
+    size = "medium",
+    srcs = ["layers/preprocessing/categorical_test.py"],
+    data = [":vocabulary_testdata"],
+    python_version = "PY3",
+    shard_count = 4,
+    tags = [
+        "no_oss",
+    ],
+    deps = [
+        ":keras",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 cuda_py_test(
     name = "image_preprocessing_test",
     size = "medium",
@@ -815,7 +882,6 @@ cuda_py_test(
     python_version = "PY3",
     shard_count = 4,
     tags = [
-        "no_rocm",
         "no_windows_gpu",
     ],
     deps = [
@@ -958,7 +1024,6 @@ cuda_py_test(
     python_version = "PY3",
     shard_count = 4,
     tags = [
-        "no_rocm",
         "notsan",
     ],
     deps = [
@@ -1353,7 +1418,7 @@ cuda_py_test(
         "guitar",
         "multi_gpu",
     ],
-    xla_enable_strict_auto_jit = False,  # b/142744009
+    xla_enable_strict_auto_jit = True,
     deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
@@ -1566,7 +1631,6 @@ tf_py_test(
     srcs = ["engine/training_arrays_test.py"],
     python_version = "PY3",
     tags = [
-        "no_rocm",
         "nomac",  # TODO(mihaimaruseac): b/127695564
     ],
     deps = [
@@ -1641,7 +1705,6 @@ tf_py_test(
     srcs = ["engine/training_eager_test.py"],
     python_version = "PY3",
     tags = [
-        "no_rocm",
         "nomac",  # TODO(mihaimaruseac): b/127695564
         "notsan",
     ],
@@ -1791,7 +1854,6 @@ tf_py_test(
     srcs = ["engine/base_layer_utils_test.py"],
     python_version = "PY3",
     tags = [
-        "no_rocm",
         "nomac",  # TODO(mihaimaruseac): b/127695564
     ],
     deps = [
diff --git a/tensorflow/python/keras/__init__.py b/tensorflow/python/keras/__init__.py
index d3f2ccbb183..6f79c219867 100644
--- a/tensorflow/python/keras/__init__.py
+++ b/tensorflow/python/keras/__init__.py
@@ -15,7 +15,7 @@
 """Implementation of the Keras API meant to be a high-level API for TensorFlow.
 
 Detailed documentation and user guides are available at
-[keras.io](https://keras.io).
+[tensorflow.org](https://www.tensorflow.org/guide/keras).
 """
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index f26c5a117c2..16f60a7dd11 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -182,6 +182,19 @@ def softsign(x):
   return nn.softsign(x)
 
 
+@keras_export('keras.activations.swish')
+def swish(x):
+  """Swish activation function.
+
+  Arguments:
+      x: Input tensor.
+
+  Returns:
+      The swish activation applied to `x`.
+  """
+  return nn.swish(x)
+
+
 @keras_export('keras.activations.relu')
 def relu(x, alpha=0., max_value=None, threshold=0):
   """Applies the rectified linear unit activation function.
diff --git a/tensorflow/python/keras/add_loss_correctness_test.py b/tensorflow/python/keras/add_loss_correctness_test.py
new file mode 100644
index 00000000000..2f02799ba21
--- /dev/null
+++ b/tensorflow/python/keras/add_loss_correctness_test.py
@@ -0,0 +1,464 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests add_loss API correctness."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.keras import Input
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import layers
+from tensorflow.python.keras import losses
+from tensorflow.python.keras import Model
+from tensorflow.python.keras import optimizer_v2
+from tensorflow.python.keras import Sequential
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+from tensorflow.python.training.rmsprop import RMSPropOptimizer
+
+MAE = losses.MeanAbsoluteError
+mae = losses.mean_absolute_error
+
+
+def get_ctl_train_step(model):
+  optimizer = optimizer_v2.gradient_descent.SGD(0.05)
+
+  def train_step(x, y, w=None):
+    with backprop.GradientTape() as tape:
+      if w is not None:
+        model([x, y, w])
+      else:
+        model([x, y])
+      loss = math_ops.reduce_sum(model.losses)
+    gradients = tape.gradient(loss, model.trainable_weights)
+    optimizer.apply_gradients(zip(gradients, model.trainable_weights))
+    return loss
+
+  return train_step
+
+
+# TODO(psv): Add tests cases where a model is used in loss function but is
+# not part of the training model.
+
+
+class TestAddLossCorrectness(keras_parameterized.TestCase):
+
+  def setUp(self):
+    super(TestAddLossCorrectness, self).setUp()
+    self.x = np.array([[0.], [1.], [2.]], dtype='float32')
+    self.y = np.array([[0.5], [2.], [3.5]], dtype='float32')
+    self.w = np.array([[1.25], [0.5], [1.25]], dtype='float32')
+
+  @keras_parameterized.run_all_keras_modes
+  def test_loss_on_model_fit(self):
+    inputs = Input(shape=(1,))
+    targets = Input(shape=(1,))
+    outputs = testing_utils.Bias()(inputs)
+    model = Model([inputs, targets], outputs)
+    model.add_loss(MAE()(targets, outputs))
+    model.add_loss(math_ops.reduce_mean(mae(targets, outputs)))
+    model.compile(
+        optimizer_v2.gradient_descent.SGD(0.05),
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    history = model.fit([self.x, self.y], batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
+
+  @keras_parameterized.run_with_all_model_types(exclude_models=['sequential'])
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+  def test_loss_callable_on_model_fit(self):
+    model = testing_utils.get_model_from_layers([testing_utils.Bias()],
+                                                input_shape=(1,))
+
+    def callable_loss():
+      return math_ops.reduce_sum(model.weights)
+
+    model.add_loss(callable_loss)
+    model.compile(
+        optimizer_v2.gradient_descent.SGD(0.1),
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    history = model.fit(self.x, batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [0., -.1, -.2, -.3, -.4], 1e-3)
+
+  def test_loss_on_model_ctl(self):
+    with context.eager_mode():
+
+      def get_model_and_train_step():
+        inputs = Input(shape=(1,))
+        targets = Input(shape=(1,))
+        outputs = testing_utils.Bias()(inputs)
+        model = Model([inputs, targets], outputs)
+        model.add_loss(MAE()(targets, outputs))
+        model.add_loss(math_ops.reduce_mean(mae(targets, outputs)))
+        return get_ctl_train_step(model)
+
+      train_step = get_model_and_train_step()
+      loss = [train_step(self.x, self.y) for _ in range(5)]
+      self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
+
+      train_step = def_function.function(get_model_and_train_step())
+      loss = [train_step(self.x, self.y) for _ in range(5)]
+      self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
+
+  def test_loss_callable_on_model_ctl(self):
+    with context.eager_mode():
+
+      def get_model_and_train_step():
+        inputs = Input(shape=(1,))
+        targets = Input(shape=(1,))
+        outputs = testing_utils.Bias()(inputs)
+        model = Model([inputs, targets], outputs)
+
+        def callable_loss():
+          return math_ops.reduce_sum(model.weights)
+
+        model.add_loss(callable_loss)
+        return get_ctl_train_step(model)
+
+      train_step = get_model_and_train_step()
+      loss = [train_step(self.x, self.y) for _ in range(5)]
+      self.assertAllClose(loss, [0., -0.05, -0.1, -0.15, -0.2], 1e-3)
+
+      train_step = def_function.function(get_model_and_train_step())
+      loss = [train_step(self.x, self.y) for _ in range(5)]
+      self.assertAllClose(loss, [0., -0.05, -0.1, -0.15, -0.2], 1e-3)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_loss_with_sample_weight_on_model_fit(self):
+    inputs = Input(shape=(1,))
+    targets = Input(shape=(1,))
+    sw = Input(shape=(1,))
+    outputs = testing_utils.Bias()(inputs)
+    model = Model([inputs, targets, sw], outputs)
+    model.add_loss(MAE()(targets, outputs, sw))
+    model.add_loss(3 * math_ops.reduce_mean(sw * mae(targets, outputs)))
+    model.compile(
+        optimizer_v2.gradient_descent.SGD(0.025),
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3)
+
+  def test_loss_with_sample_weight_on_model_ctl(self):
+    with context.eager_mode():
+
+      def get_model_and_train_step():
+        inputs = Input(shape=(1,))
+        targets = Input(shape=(1,))
+        sw = Input(shape=(1,))
+        outputs = testing_utils.Bias()(inputs)
+        model = Model([inputs, targets, sw], outputs)
+        model.add_loss(MAE()(targets, outputs, sw))
+        model.add_loss(math_ops.reduce_mean(sw * mae(targets, outputs)))
+        return get_ctl_train_step(model)
+
+      train_step = get_model_and_train_step()
+      loss = [train_step(self.x, self.y, self.w) for _ in range(5)]
+      self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
+
+      train_step = def_function.function(get_model_and_train_step())
+      loss = [train_step(self.x, self.y, self.w) for _ in range(5)]
+      self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_loss_with_sample_weight_in_model_call(self):
+
+    class MyModel(Model):
+
+      def __init__(self):
+        super(MyModel, self).__init__()
+        self.bias = testing_utils.Bias()
+
+      def call(self, inputs):
+        outputs = self.bias(inputs[0])
+        self.add_loss(MAE()(inputs[1], outputs, inputs[2]))
+        self.add_loss(math_ops.reduce_mean(inputs[2] * mae(inputs[1], outputs)))
+        return outputs
+
+    model = MyModel()
+    model.predict([self.x, self.y, self.w])
+    model.compile(
+        optimizer_v2.gradient_descent.SGD(0.05),
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5)
+    self.assertEqual(len(model.losses), 2)
+    self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
+
+    eval_out = model.evaluate([self.x, self.y, self.w])
+    self.assertAlmostEqual(eval_out, 1.0, 3)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_loss_with_sample_weight_in_layer_call(self):
+
+    class MyLayer(layers.Layer):
+
+      def __init__(self):
+        super(MyLayer, self).__init__()
+        self.bias = testing_utils.Bias()
+
+      def call(self, inputs):
+        out = self.bias(inputs[0])
+        self.add_loss(MAE()(inputs[1], out, inputs[2]))
+        self.add_loss(math_ops.reduce_mean(inputs[2] * mae(inputs[1], out)))
+        return out
+
+    inputs = Input(shape=(1,))
+    targets = Input(shape=(1,))
+    sw = Input(shape=(1,))
+
+    outputs = MyLayer()([inputs, targets, sw])
+    model = Model([inputs, targets, sw], outputs)
+    model.predict([self.x, self.y, self.w])
+    model.compile(
+        optimizer_v2.gradient_descent.SGD(0.05),
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
+
+    output = model.evaluate([self.x, self.y, self.w])
+    self.assertAlmostEqual(output, 1.0, 3)
+
+    output = model.test_on_batch([self.x, self.y, self.w])
+    self.assertAlmostEqual(output, 1.0, 3)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_loss_on_layer(self):
+
+    class MyLayer(layers.Layer):
+
+      def call(self, inputs):
+        self.add_loss(math_ops.reduce_sum(inputs))
+        return inputs
+
+    inputs = Input((3,))
+    layer = MyLayer()
+    outputs = layer(inputs)
+    model = Model(inputs, outputs)
+    self.assertEqual(len(model.losses), 1)
+    model.compile(
+        'sgd',
+        'mse',
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+    loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
+    self.assertEqual(loss, 2 * 3)
+
+  @keras_parameterized.run_all_keras_modes
+  @keras_parameterized.run_with_all_model_types
+  def test_activity_regularizer(self):
+    loss = {}
+    for reg in [None, 'l2']:
+      model_layers = [
+          layers.Dense(
+              10,
+              activation='relu',
+              activity_regularizer=reg,
+              kernel_initializer='ones',
+              use_bias=False),
+          layers.Dense(
+              1,
+              activation='sigmoid',
+              kernel_initializer='ones',
+              use_bias=False),
+      ]
+
+      model = testing_utils.get_model_from_layers(
+          model_layers, input_shape=(10,))
+
+      x = np.ones((10, 10), 'float32')
+      y = np.ones((10, 1), 'float32')
+
+      optimizer = RMSPropOptimizer(learning_rate=0.001)
+      model.compile(
+          optimizer,
+          'binary_crossentropy',
+          run_eagerly=testing_utils.should_run_eagerly(),
+          experimental_run_tf_function=testing_utils.should_run_tf_function())
+      model.fit(x, y, batch_size=2, epochs=5)
+      loss[reg] = model.evaluate(x, y)
+    self.assertLess(loss[None], loss['l2'])
+
+  @keras_parameterized.run_all_keras_modes
+  @keras_parameterized.run_with_all_model_types
+  def test_activity_regularizer_loss_value(self):
+    layer = layers.Dense(
+        1,
+        kernel_initializer='zeros',
+        bias_initializer='ones',
+        activity_regularizer='l2')
+
+    model = testing_utils.get_model_from_layers([layer], input_shape=(10,))
+
+    x = np.ones((10, 10), 'float32')
+    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    model.compile(
+        optimizer,
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+    loss = model.test_on_batch(x)
+    self.assertAlmostEqual(0.01, loss, places=4)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_activity_regularizer_batch_independent(self):
+    inputs = layers.Input(shape=(10,))
+    x = layers.Dense(10, activation='relu', activity_regularizer='l2')(inputs)
+    outputs = layers.Dense(1, activation='sigmoid')(x)
+    model = Model(inputs, outputs)
+
+    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    model.compile(
+        optimizer,
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    loss_small_batch = model.test_on_batch(np.ones((10, 10), 'float32'))
+    loss_big_batch = model.test_on_batch(np.ones((20, 10), 'float32'))
+    self.assertAlmostEqual(loss_small_batch, loss_big_batch, places=4)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_with_shared_layer(self):
+
+    class LayerWithLoss(layers.Layer):
+
+      def call(self, inputs):
+        self.add_loss(math_ops.reduce_sum(inputs), inputs)
+        return inputs * 2
+
+    shared_layer = LayerWithLoss()
+
+    m = Sequential([shared_layer])
+    m2 = Sequential([shared_layer, m])
+    m2(array_ops.constant([1, 2, 3]))
+    self.assertEqual(len(m2.losses), 2)
+    self.assertAllClose(m2.losses, [6, 12])
+
+  @keras_parameterized.run_all_keras_modes
+  def test_with_shared_nested_layer(self):
+
+    class LayerWithLoss(layers.Layer):
+
+      def call(self, inputs):
+        self.add_loss(math_ops.reduce_sum(inputs), inputs)
+        return inputs * 2
+
+    class LayerWithNestedLayerWithLoss(layers.Layer):
+
+      def __init__(self):
+        super(LayerWithNestedLayerWithLoss, self).__init__()
+        self.loss_layer = LayerWithLoss()
+
+      def call(self, inputs):
+        return self.loss_layer(inputs)
+
+    shared_layer = LayerWithNestedLayerWithLoss()
+
+    m = Sequential([shared_layer])
+    m2 = Sequential([shared_layer, m])
+    m2(array_ops.constant([1, 2, 3]))
+    self.assertEqual(len(m2.losses), 2)
+    self.assertAllClose(m2.losses, [6, 12])
+
+  @keras_parameterized.run_all_keras_modes
+  def test_clear_losses(self):
+
+    class LayerWithSharedNestedLossLayer(layers.Layer):
+
+      def __init__(self):
+        super(LayerWithSharedNestedLossLayer, self).__init__()
+        self.loss_layer = layers.ActivityRegularization(l2=0.001)
+        self.add_weight(shape=(1,), regularizer='l2')
+
+      def call(self, x):
+        x = self.loss_layer(x)
+        return self.loss_layer(x)
+
+    inputs = Input(shape=(1,))
+    l = LayerWithSharedNestedLossLayer()  # Weight loss + 2 activity losses.
+
+    x1 = array_ops.ones((1, 1))
+    _ = l(x1)
+    if not context.executing_eagerly():
+      self.assertEqual(len(l.get_losses_for(x1)), 2)
+      self.assertEqual(len(l.get_losses_for(None)), 1)
+
+    x2 = array_ops.ones((1, 1))
+    _ = l(x2)
+    if not context.executing_eagerly():
+      self.assertEqual(len(l.get_losses_for(x1)), 2)
+      self.assertEqual(len(l.get_losses_for(x2)), 2)
+      self.assertEqual(len(l.get_losses_for(None)), 1)
+
+    outputs = l(inputs)
+    model = Model(inputs, outputs)
+    if not context.executing_eagerly():
+      self.assertEqual(len(model.losses), 7)
+      self.assertEqual(len(l.get_losses_for(x1)), 2)
+      self.assertEqual(len(l.get_losses_for(x2)), 2)
+      self.assertEqual(len(l.get_losses_for(None)), 1)
+
+    x3 = array_ops.ones((1, 1))
+    model(x3)
+    x4 = array_ops.ones((1, 1))
+    model(x4)
+    if context.executing_eagerly():
+      # Eager losses are cleared every `__call__`.
+      self.assertEqual(len(model.losses), 3)
+    else:
+      self.assertEqual(len(model.losses), 11)
+      self.assertEqual(len(model.get_losses_for(x3)), 2)
+      self.assertEqual(len(model.get_losses_for(x4)), 2)
+      self.assertEqual(len(model.get_losses_for(None)), 1)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_invalid_constant_input(self):
+    with context.eager_mode():
+      inputs = Input(shape=(1,))
+      outputs = testing_utils.Bias()(inputs)
+      model = Model(inputs, outputs)
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Expected a symbolic Tensors or a callable for the loss value'):
+        model.add_loss(1.)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_invalid_variable_input(self):
+    with context.eager_mode():
+      inputs = Input(shape=(1,))
+      outputs = testing_utils.Bias()(inputs)
+      model = Model(inputs, outputs)
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Expected a symbolic Tensors or a callable for the loss value'):
+        model.add_loss(model.weights[0])
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/api/BUILD b/tensorflow/python/keras/api/BUILD
index 7004f563605..97c06a3ab8e 100644
--- a/tensorflow/python/keras/api/BUILD
+++ b/tensorflow/python/keras/api/BUILD
@@ -64,6 +64,7 @@ keras_packages = [
     "tensorflow.python.keras.layers.wrappers",
     "tensorflow.python.keras.losses",
     "tensorflow.python.keras.metrics",
+    "tensorflow.python.keras.mixed_precision.experimental.get_layer_policy",
     "tensorflow.python.keras.mixed_precision.experimental.loss_scale_optimizer",
     "tensorflow.python.keras.mixed_precision.experimental.policy",
     "tensorflow.python.keras.models",
diff --git a/tensorflow/python/keras/applications/BUILD b/tensorflow/python/keras/applications/BUILD
index f5faae02a7e..17998dff220 100644
--- a/tensorflow/python/keras/applications/BUILD
+++ b/tensorflow/python/keras/applications/BUILD
@@ -15,6 +15,7 @@ py_library(
     srcs = [
         "__init__.py",
         "densenet.py",
+        "efficientnet.py",
         "imagenet_utils.py",
         "inception_resnet_v2.py",
         "inception_v3.py",
diff --git a/tensorflow/python/keras/applications/applications_test.py b/tensorflow/python/keras/applications/applications_test.py
index b790eb83f95..198bebd904c 100644
--- a/tensorflow/python/keras/applications/applications_test.py
+++ b/tensorflow/python/keras/applications/applications_test.py
@@ -22,6 +22,7 @@ from absl.testing import parameterized
 
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.applications import densenet
+from tensorflow.python.keras.applications import efficientnet
 from tensorflow.python.keras.applications import inception_resnet_v2
 from tensorflow.python.keras.applications import inception_v3
 from tensorflow.python.keras.applications import mobilenet
@@ -52,6 +53,14 @@ MODEL_LIST_NO_NASNET = [
     (densenet.DenseNet121, 1024),
     (densenet.DenseNet169, 1664),
     (densenet.DenseNet201, 1920),
+    (efficientnet.EfficientNetB0, 1280),
+    (efficientnet.EfficientNetB1, 1280),
+    (efficientnet.EfficientNetB2, 1408),
+    (efficientnet.EfficientNetB3, 1536),
+    (efficientnet.EfficientNetB4, 1792),
+    (efficientnet.EfficientNetB5, 2048),
+    (efficientnet.EfficientNetB6, 2304),
+    (efficientnet.EfficientNetB7, 2560),
 ]
 
 NASNET_LIST = [
@@ -72,6 +81,16 @@ class ApplicationsTest(test.TestCase, parameterized.TestCase):
       if v1 != v2:
         raise AssertionError('Shapes differ: %s vs %s' % (shape1, shape2))
 
+  @parameterized.parameters(*MODEL_LIST)
+  def test_application_base(self, app, _):
+    # Can be instantiated with default arguments
+    model = app(weights=None)
+    # Can be serialized and deserialized
+    config = model.get_config()
+    reconstructed_model = model.__class__.from_config(config)
+    self.assertEqual(len(model.weights), len(reconstructed_model.weights))
+    backend.clear_session()
+
   @parameterized.parameters(*MODEL_LIST)
   def test_application_notop(self, app, last_dim):
     if 'NASNet' in app.__name__:
diff --git a/tensorflow/python/keras/applications/efficientnet.py b/tensorflow/python/keras/applications/efficientnet.py
new file mode 100644
index 00000000000..f3d0f1e5b0e
--- /dev/null
+++ b/tensorflow/python/keras/applications/efficientnet.py
@@ -0,0 +1,654 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=invalid-name
+"""EfficientNet models for Keras.
+
+Reference paper:
+  - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]
+    (https://arxiv.org/abs/1905.11946) (ICML 2019)
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import math
+import os
+
+from tensorflow.python.keras import backend
+from tensorflow.python.keras import layers
+from tensorflow.python.keras.applications import imagenet_utils
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.utils import data_utils
+from tensorflow.python.keras.utils import layer_utils
+from tensorflow.python.util.tf_export import keras_export
+
+
+BASE_WEIGHTS_PATH = 'https://storage.googleapis.com/keras-applications/'
+
+WEIGHTS_HASHES = {
+    'b0': ('902e53a9f72be733fc0bcb005b3ebbac',
+           '50bc09e76180e00e4465e1a485ddc09d'),
+    'b1': ('1d254153d4ab51201f1646940f018540',
+           '74c4e6b3e1f6a1eea24c589628592432'),
+    'b2': ('b15cce36ff4dcbd00b6dd88e7857a6ad',
+           '111f8e2ac8aa800a7a99e3239f7bfb39'),
+    'b3': ('ffd1fdc53d0ce67064dc6a9c7960ede0',
+           'af6d107764bb5b1abb91932881670226'),
+    'b4': ('18c95ad55216b8f92d7e70b3a046e2fc',
+           'ebc24e6d6c33eaebbd558eafbeedf1ba'),
+    'b5': ('ace28f2a6363774853a83a0b21b9421a',
+           '38879255a25d3c92d5e44e04ae6cec6f'),
+    'b6': ('165f6e37dce68623721b423839de8be5',
+           '9ecce42647a20130c1f39a5d4cb75743'),
+    'b7': ('8c03f828fec3ef71311cd463b6759d99',
+           'cbcfe4450ddf6f3ad90b1b398090fe4a'),
+}
+
+DEFAULT_BLOCKS_ARGS = [{
+    'kernel_size': 3,
+    'repeats': 1,
+    'filters_in': 32,
+    'filters_out': 16,
+    'expand_ratio': 1,
+    'id_skip': True,
+    'strides': 1,
+    'se_ratio': 0.25
+}, {
+    'kernel_size': 3,
+    'repeats': 2,
+    'filters_in': 16,
+    'filters_out': 24,
+    'expand_ratio': 6,
+    'id_skip': True,
+    'strides': 2,
+    'se_ratio': 0.25
+}, {
+    'kernel_size': 5,
+    'repeats': 2,
+    'filters_in': 24,
+    'filters_out': 40,
+    'expand_ratio': 6,
+    'id_skip': True,
+    'strides': 2,
+    'se_ratio': 0.25
+}, {
+    'kernel_size': 3,
+    'repeats': 3,
+    'filters_in': 40,
+    'filters_out': 80,
+    'expand_ratio': 6,
+    'id_skip': True,
+    'strides': 2,
+    'se_ratio': 0.25
+}, {
+    'kernel_size': 5,
+    'repeats': 3,
+    'filters_in': 80,
+    'filters_out': 112,
+    'expand_ratio': 6,
+    'id_skip': True,
+    'strides': 1,
+    'se_ratio': 0.25
+}, {
+    'kernel_size': 5,
+    'repeats': 4,
+    'filters_in': 112,
+    'filters_out': 192,
+    'expand_ratio': 6,
+    'id_skip': True,
+    'strides': 2,
+    'se_ratio': 0.25
+}, {
+    'kernel_size': 3,
+    'repeats': 1,
+    'filters_in': 192,
+    'filters_out': 320,
+    'expand_ratio': 6,
+    'id_skip': True,
+    'strides': 1,
+    'se_ratio': 0.25
+}]
+
+CONV_KERNEL_INITIALIZER = {
+    'class_name': 'VarianceScaling',
+    'config': {
+        'scale': 2.0,
+        'mode': 'fan_out',
+        'distribution': 'truncated_normal'
+    }
+}
+
+DENSE_KERNEL_INITIALIZER = {
+    'class_name': 'VarianceScaling',
+    'config': {
+        'scale': 1. / 3.,
+        'mode': 'fan_out',
+        'distribution': 'uniform'
+    }
+}
+
+
+def EfficientNet(width_coefficient,
+                 depth_coefficient,
+                 default_size,
+                 dropout_rate=0.2,
+                 drop_connect_rate=0.2,
+                 depth_divisor=8,
+                 activation='swish',
+                 blocks_args='default',
+                 model_name='efficientnet',
+                 include_top=True,
+                 weights='imagenet',
+                 input_tensor=None,
+                 input_shape=None,
+                 pooling=None,
+                 classes=1000):
+  """Instantiates the EfficientNet architecture using given scaling coefficients.
+
+  Optionally loads weights pre-trained on ImageNet.
+  Note that the data format convention used by the model is
+  the one specified in your Keras config at `~/.keras/keras.json`.
+
+  Arguments:
+    width_coefficient: float, scaling coefficient for network width.
+    depth_coefficient: float, scaling coefficient for network depth.
+    default_size: integer, default input image size.
+    dropout_rate: float, dropout rate before final classifier layer.
+    drop_connect_rate: float, dropout rate at skip connections.
+    depth_divisor: integer, a unit of network width.
+    activation: activation function.
+    blocks_args: list of dicts, parameters to construct block modules.
+    model_name: string, model name.
+    include_top: whether to include the fully-connected
+        layer at the top of the network.
+    weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
+    input_tensor: optional Keras tensor
+        (i.e. output of `layers.Input()`)
+        to use as image input for the model.
+    input_shape: optional shape tuple, only to be specified
+        if `include_top` is False.
+        It should have exactly 3 inputs channels.
+    pooling: optional pooling mode for feature extraction
+        when `include_top` is `False`.
+        - `None` means that the output of the model will be
+            the 4D tensor output of the
+            last convolutional layer.
+        - `avg` means that global average pooling
+            will be applied to the output of the
+            last convolutional layer, and thus
+            the output of the model will be a 2D tensor.
+        - `max` means that global max pooling will
+            be applied.
+    classes: optional number of classes to classify images
+        into, only to be specified if `include_top` is True, and
+        if no `weights` argument is specified.
+
+  Returns:
+    A Keras model instance.
+
+  Raises:
+    ValueError: in case of invalid argument for `weights`,
+      or invalid input shape.
+  """
+  if blocks_args == 'default':
+    blocks_args = DEFAULT_BLOCKS_ARGS
+
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
+    raise ValueError('The `weights` argument should be either '
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
+
+  if weights == 'imagenet' and include_top and classes != 1000:
+    raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
+                     ' as true, `classes` should be 1000')
+
+  # Determine proper input shape
+  input_shape = imagenet_utils.obtain_input_shape(
+      input_shape,
+      default_size=default_size,
+      min_size=32,
+      data_format=backend.image_data_format(),
+      require_flatten=include_top,
+      weights=weights)
+
+  if input_tensor is None:
+    img_input = layers.Input(shape=input_shape)
+  else:
+    if not backend.is_keras_tensor(input_tensor):
+      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
+    else:
+      img_input = input_tensor
+
+  bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
+
+  def round_filters(filters, divisor=depth_divisor):
+    """Round number of filters based on depth multiplier."""
+    filters *= width_coefficient
+    new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_filters < 0.9 * filters:
+      new_filters += divisor
+    return int(new_filters)
+
+  def round_repeats(repeats):
+    """Round number of repeats based on depth multiplier."""
+    return int(math.ceil(depth_coefficient * repeats))
+
+  # Build stem
+  x = img_input
+  x = layers.Rescaling(1. / 255.)(x)
+  x = layers.Normalization(axis=bn_axis)(x)
+
+  x = layers.ZeroPadding2D(
+      padding=imagenet_utils.correct_pad(x, 3),
+      name='stem_conv_pad')(x)
+  x = layers.Conv2D(
+      round_filters(32),
+      3,
+      strides=2,
+      padding='valid',
+      use_bias=False,
+      kernel_initializer=CONV_KERNEL_INITIALIZER,
+      name='stem_conv')(x)
+  x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x)
+  x = layers.Activation(activation, name='stem_activation')(x)
+
+  # Build blocks
+  blocks_args = copy.deepcopy(blocks_args)
+
+  b = 0
+  blocks = float(sum(args['repeats'] for args in blocks_args))
+  for (i, args) in enumerate(blocks_args):
+    assert args['repeats'] > 0
+    # Update block input and output filters based on depth multiplier.
+    args['filters_in'] = round_filters(args['filters_in'])
+    args['filters_out'] = round_filters(args['filters_out'])
+
+    for j in range(round_repeats(args.pop('repeats'))):
+      # The first block needs to take care of stride and filter size increase.
+      if j > 0:
+        args['strides'] = 1
+        args['filters_in'] = args['filters_out']
+      x = block(
+          x,
+          activation,
+          drop_connect_rate * b / blocks,
+          name='block{}{}_'.format(i + 1, chr(j + 97)),
+          **args)
+      b += 1
+
+  # Build top
+  x = layers.Conv2D(
+      round_filters(1280),
+      1,
+      padding='same',
+      use_bias=False,
+      kernel_initializer=CONV_KERNEL_INITIALIZER,
+      name='top_conv')(x)
+  x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x)
+  x = layers.Activation(activation, name='top_activation')(x)
+  if include_top:
+    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
+    if dropout_rate > 0:
+      x = layers.Dropout(dropout_rate, name='top_dropout')(x)
+    x = layers.Dense(
+        classes,
+        activation='softmax',
+        kernel_initializer=DENSE_KERNEL_INITIALIZER,
+        name='probs')(x)
+  else:
+    if pooling == 'avg':
+      x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
+    elif pooling == 'max':
+      x = layers.GlobalMaxPooling2D(name='max_pool')(x)
+
+  # Ensure that the model takes into account
+  # any potential predecessors of `input_tensor`.
+  if input_tensor is not None:
+    inputs = layer_utils.get_source_inputs(input_tensor)
+  else:
+    inputs = img_input
+
+  # Create model.
+  model = training.Model(inputs, x, name=model_name)
+
+  # Load weights.
+  if weights == 'imagenet':
+    if include_top:
+      file_suffix = '.h5'
+      file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
+    else:
+      file_suffix = '_notop.h5'
+      file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
+    file_name = model_name + file_suffix
+    weights_path = data_utils.get_file(
+        file_name,
+        BASE_WEIGHTS_PATH + file_name,
+        cache_subdir='models',
+        file_hash=file_hash)
+    model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
+  return model
+
+
+def block(inputs,
+          activation='swish',
+          drop_rate=0.,
+          name='',
+          filters_in=32,
+          filters_out=16,
+          kernel_size=3,
+          strides=1,
+          expand_ratio=1,
+          se_ratio=0.,
+          id_skip=True):
+  """An inverted residual block.
+
+  Arguments:
+      inputs: input tensor.
+      activation: activation function.
+      drop_rate: float between 0 and 1, fraction of the input units to drop.
+      name: string, block label.
+      filters_in: integer, the number of input filters.
+      filters_out: integer, the number of output filters.
+      kernel_size: integer, the dimension of the convolution window.
+      strides: integer, the stride of the convolution.
+      expand_ratio: integer, scaling coefficient for the input filters.
+      se_ratio: float between 0 and 1, fraction to squeeze the input filters.
+      id_skip: boolean.
+
+  Returns:
+      output tensor for the block.
+  """
+  bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
+
+  # Expansion phase
+  filters = filters_in * expand_ratio
+  if expand_ratio != 1:
+    x = layers.Conv2D(
+        filters,
+        1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=CONV_KERNEL_INITIALIZER,
+        name=name + 'expand_conv')(
+            inputs)
+    x = layers.BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x)
+    x = layers.Activation(activation, name=name + 'expand_activation')(x)
+  else:
+    x = inputs
+
+  # Depthwise Convolution
+  if strides == 2:
+    x = layers.ZeroPadding2D(
+        padding=imagenet_utils.correct_pad(x, kernel_size),
+        name=name + 'dwconv_pad')(x)
+    conv_pad = 'valid'
+  else:
+    conv_pad = 'same'
+  x = layers.DepthwiseConv2D(
+      kernel_size,
+      strides=strides,
+      padding=conv_pad,
+      use_bias=False,
+      depthwise_initializer=CONV_KERNEL_INITIALIZER,
+      name=name + 'dwconv')(x)
+  x = layers.BatchNormalization(axis=bn_axis, name=name + 'bn')(x)
+  x = layers.Activation(activation, name=name + 'activation')(x)
+
+  # Squeeze and Excitation phase
+  if 0 < se_ratio <= 1:
+    filters_se = max(1, int(filters_in * se_ratio))
+    se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x)
+    se = layers.Reshape((1, 1, filters), name=name + 'se_reshape')(se)
+    se = layers.Conv2D(
+        filters_se,
+        1,
+        padding='same',
+        activation=activation,
+        kernel_initializer=CONV_KERNEL_INITIALIZER,
+        name=name + 'se_reduce')(
+            se)
+    se = layers.Conv2D(
+        filters,
+        1,
+        padding='same',
+        activation='sigmoid',
+        kernel_initializer=CONV_KERNEL_INITIALIZER,
+        name=name + 'se_expand')(se)
+    x = layers.multiply([x, se], name=name + 'se_excite')
+
+  # Output phase
+  x = layers.Conv2D(
+      filters_out,
+      1,
+      padding='same',
+      use_bias=False,
+      kernel_initializer=CONV_KERNEL_INITIALIZER,
+      name=name + 'project_conv')(x)
+  x = layers.BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x)
+  if id_skip and strides == 1 and filters_in == filters_out:
+    if drop_rate > 0:
+      x = layers.Dropout(
+          drop_rate, noise_shape=(None, 1, 1, 1), name=name + 'drop')(x)
+    x = layers.add([x, inputs], name=name + 'add')
+  return x
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB0',
+              'keras.applications.EfficientNetB0')
+def EfficientNetB0(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      1.0,
+      1.0,
+      224,
+      0.2,
+      model_name='efficientnetb0',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB1',
+              'keras.applications.EfficientNetB1')
+def EfficientNetB1(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      1.0,
+      1.1,
+      240,
+      0.2,
+      model_name='efficientnetb1',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB2',
+              'keras.applications.EfficientNetB2')
+def EfficientNetB2(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      1.1,
+      1.2,
+      260,
+      0.3,
+      model_name='efficientnetb2',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB3',
+              'keras.applications.EfficientNetB3')
+def EfficientNetB3(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      1.2,
+      1.4,
+      300,
+      0.3,
+      model_name='efficientnetb3',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB4',
+              'keras.applications.EfficientNetB4')
+def EfficientNetB4(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      1.4,
+      1.8,
+      380,
+      0.4,
+      model_name='efficientnetb4',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB5',
+              'keras.applications.EfficientNetB5')
+def EfficientNetB5(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      1.6,
+      2.2,
+      456,
+      0.4,
+      model_name='efficientnetb5',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB6',
+              'keras.applications.EfficientNetB6')
+def EfficientNetB6(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      1.8,
+      2.6,
+      528,
+      0.5,
+      model_name='efficientnetb6',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.EfficientNetB7',
+              'keras.applications.EfficientNetB7')
+def EfficientNetB7(include_top=True,
+                   weights='imagenet',
+                   input_tensor=None,
+                   input_shape=None,
+                   pooling=None,
+                   classes=1000,
+                   **kwargs):
+  return EfficientNet(
+      2.0,
+      3.1,
+      600,
+      0.5,
+      model_name='efficientnetb7',
+      include_top=include_top,
+      weights=weights,
+      input_tensor=input_tensor,
+      input_shape=input_shape,
+      pooling=pooling,
+      classes=classes,
+      **kwargs)
+
+
+@keras_export('keras.applications.efficientnet.preprocess_input')
+def preprocess_input(x, data_format=None):  # pylint: disable=unused-argument
+  return x
+
+
+@keras_export('keras.applications.efficientnet.decode_predictions')
+def decode_predictions(preds, top=5):
+  return imagenet_utils.decode_predictions(preds, top=top)
diff --git a/tensorflow/python/keras/applications/resnet.py b/tensorflow/python/keras/applications/resnet.py
index e46583ec074..dca9ed66f84 100644
--- a/tensorflow/python/keras/applications/resnet.py
+++ b/tensorflow/python/keras/applications/resnet.py
@@ -410,7 +410,7 @@ def block3(x,
   output_shape = x_shape + (groups,
                             c) if backend.backend() == 'theano' else None
   x = layers.Lambda(
-      lambda x: sum([x[:, :, :, :, i] for i in range(c)]),
+      lambda x: sum(x[:, :, :, :, i] for i in range(c)),
       output_shape=output_shape,
       name=name + '_2_reduce')(
           x)
diff --git a/tensorflow/python/keras/applications/resnet_v2.py b/tensorflow/python/keras/applications/resnet_v2.py
index 4c782042af2..ce56fbb19cb 100644
--- a/tensorflow/python/keras/applications/resnet_v2.py
+++ b/tensorflow/python/keras/applications/resnet_v2.py
@@ -80,7 +80,7 @@ def ResNet152V2(include_top=True,
 @keras_export('keras.applications.resnet_v2.preprocess_input')
 def preprocess_input(x, data_format=None):
   return imagenet_utils.preprocess_input(
-      x, data_format=data_format, mode='caffe')
+      x, data_format=data_format, mode='tf')
 
 
 @keras_export('keras.applications.resnet_v2.decode_predictions')
diff --git a/tensorflow/python/keras/applications/xception.py b/tensorflow/python/keras/applications/xception.py
index 30ce077f261..47f386cc721 100644
--- a/tensorflow/python/keras/applications/xception.py
+++ b/tensorflow/python/keras/applications/xception.py
@@ -38,13 +38,12 @@ from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.util.tf_export import keras_export
 
 
-TF_WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/'
-                   'releases/download/v0.4/'
-                   'xception_weights_tf_dim_ordering_tf_kernels.h5')
+TF_WEIGHTS_PATH = (
+    'https://storage.googleapis.com/tensorflow/keras-applications/'
+    'xception/xception_weights_tf_dim_ordering_tf_kernels.h5')
 TF_WEIGHTS_PATH_NO_TOP = (
-    'https://github.com/fchollet/deep-learning-models/'
-    'releases/download/v0.4/'
-    'xception_weights_tf_dim_ordering_tf_kernels_notop.h5')
+    'https://storage.googleapis.com/tensorflow/keras-applications/'
+    'xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5')
 
 
 @keras_export('keras.applications.xception.Xception',
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 54f380dab9c..53dc6a67ef3 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -1035,7 +1035,7 @@ def placeholder(shape=None,
     dtype = floatx()
   if not shape:
     if ndim:
-      shape = tuple([None for _ in range(ndim)])
+      shape = (None,) * ndim
   with get_graph().as_default():
     if sparse:
       x = array_ops.sparse_placeholder(dtype, shape=shape, name=name)
@@ -1300,7 +1300,6 @@ def zeros(shape, dtype=None, name=None):
     v = array_ops.zeros(shape=shape, dtype=tf_dtype, name=name)
     if py_all(v.shape.as_list()):
       return variable(v, dtype=dtype, name=name)
-    track_variable(v)
     return v
 
 
@@ -1335,7 +1334,6 @@ def ones(shape, dtype=None, name=None):
     v = array_ops.ones(shape=shape, dtype=tf_dtype, name=name)
     if py_all(v.shape.as_list()):
       return variable(v, dtype=dtype, name=name)
-    track_variable(v)
     return v
 
 
@@ -1450,12 +1448,10 @@ def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None):
 
   Example:
 
-  # TensorFlow example
   >>> kvar = tf.keras.backend.random_uniform_variable((2,3), 0, 1)
   >>> kvar
   <tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=...,
   dtype=float32)>
-
   """
   if dtype is None:
     dtype = floatx()
@@ -1486,12 +1482,10 @@ def random_normal_variable(shape, mean, scale, dtype=None, name=None,
 
   Example:
 
-  # TensorFlow example
   >>> kvar = tf.keras.backend.random_normal_variable((2,3), 0, 1)
   >>> kvar
   <tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=...,
   dtype=float32)>
-
   """
   if dtype is None:
     dtype = floatx()
@@ -1628,27 +1622,23 @@ def dot(x, y):
 
   Examples:
 
-  # dot product between tensors
   >>> x = tf.keras.backend.placeholder(shape=(2, 3))
   >>> y = tf.keras.backend.placeholder(shape=(3, 4))
   >>> xy = tf.keras.backend.dot(x, y)
   >>> xy
   <tf.Tensor ... shape=(2, 4) dtype=float32>
 
-  # dot product between tensors
   >>> x = tf.keras.backend.placeholder(shape=(32, 28, 3))
   >>> y = tf.keras.backend.placeholder(shape=(3, 4))
   >>> xy = tf.keras.backend.dot(x, y)
   >>> xy
   <tf.Tensor ... shape=(32, 28, 4) dtype=float32>
 
-  # Theano-like behavior example
   >>> x = tf.keras.backend.random_uniform_variable(shape=(2, 3), low=0, high=1)
   >>> y = tf.keras.backend.ones((4, 3, 5))
   >>> xy = tf.keras.backend.dot(x, y)
   >>> tf.keras.backend.int_shape(xy)
   (2, 4, 5)
-
   """
   if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2):
     x_shape = []
@@ -1768,7 +1758,7 @@ def batch_dot(x, y, axes=None):
     else:
       axes = [x_ndim - 1, y_ndim - 2]
 
-  if py_any([isinstance(a, (list, tuple)) for a in axes]):
+  if py_any(isinstance(a, (list, tuple)) for a in axes):
     raise ValueError('Multiple target dimensions are not supported. ' +
                      'Expected: None, int, (int, int), ' +
                      'Provided: ' + str(axes))
@@ -1922,6 +1912,24 @@ def gather(reference, indices):
 
   Returns:
       A tensor of same type as `reference`.
+  
+  Examples:
+  
+  >>> var = tf.keras.backend.variable([[1, 2, 3], [4, 5, 6]])
+  >>> tf.keras.backend.eval(var)
+  array([[1., 2., 3.],
+         [4., 5., 6.]], dtype=float32)
+  >>> var_gathered = tf.keras.backend.gather(var, [0])
+  >>> tf.keras.backend.eval(var_gathered)
+  array([[1., 2., 3.]], dtype=float32)
+  >>> var_gathered = tf.keras.backend.gather(var, [1])
+  >>> tf.keras.backend.eval(var_gathered)
+  array([[4., 5., 6.]], dtype=float32)
+  >>> var_gathered = tf.keras.backend.gather(var, [0,1,0])
+  >>> tf.keras.backend.eval(var_gathered)
+  array([[1., 2., 3.],
+         [4., 5., 6.],
+         [1., 2., 3.]], dtype=float32)
   """
   return array_ops.gather(reference, indices)
 
@@ -2400,7 +2408,6 @@ def maximum(x, y):
 
   Examples:
 
-  # maximum of two tensors
   >>> x = tf.Variable([[1, 2], [3, 4]])
   >>> y = tf.Variable([[2, 1], [0, -1]])
   >>> m = tf.keras.backend.maximum(x, y)
@@ -2408,7 +2415,6 @@ def maximum(x, y):
   <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
   array([[2, 2],
          [3, 4]], dtype=int32)>
-
   """
   return math_ops.maximum(x, y)
 
@@ -4580,7 +4586,7 @@ def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1):
     target = flatten(target)
     output = array_ops.reshape(output, [-1, output_shape[-1]])
 
-  if py_any([_is_symbolic_tensor(v) for v in [target, output]]):
+  if py_any(_is_symbolic_tensor(v) for v in [target, output]):
     with get_graph().as_default():
       res = nn.sparse_softmax_cross_entropy_with_logits_v2(
           labels=target, logits=output)
@@ -5424,9 +5430,9 @@ def local_conv(inputs,
     if data_format == 'channels_first':
       slices.append(slice(None))
 
-    slices.extend([slice(position[d] * strides[d],
-                         position[d] * strides[d] + kernel_size[d])
-                   for d in spatial_dimensions])
+    slices.extend(
+        slice(position[d] * strides[d], position[d] * strides[d] +
+              kernel_size[d]) for d in spatial_dimensions)
 
     if data_format == 'channels_last':
       slices.append(slice(None))
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 164d9ba01b8..bf315be229e 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -38,7 +38,6 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.engine import sequential
 from tensorflow.python.keras.optimizer_v2 import gradient_descent
 from tensorflow.python.keras.optimizer_v2 import learning_rate_schedule
@@ -644,18 +643,10 @@ class KerasCallbacksTest(keras_parameterized.TestCase):
       ds = dataset_ops.Dataset.from_tensor_slices((train_input, train_label))
       return ds.batch(8, drop_remainder=True)
 
-    class Bias(base_layer.Layer):
-
-      def build(self, input_shape):
-        self.bias = self.add_variable('bias', (1,), initializer='zeros')
-
-      def call(self, inputs):
-        return inputs + self.bias
-
     # Very simple bias model to eliminate randomness.
     optimizer = gradient_descent.SGD(0.1)
     model = sequential.Sequential()
-    model.add(Bias(input_shape=(1,)))
+    model.add(testing_utils.Bias(input_shape=(1,)))
     model.compile(loss='mae', optimizer=optimizer, metrics=['mae'])
     train_ds = get_input_datasets()
 
diff --git a/tensorflow/python/keras/datasets/imdb.py b/tensorflow/python/keras/datasets/imdb.py
index d9f209add01..d65aa00f3f6 100644
--- a/tensorflow/python/keras/datasets/imdb.py
+++ b/tensorflow/python/keras/datasets/imdb.py
@@ -113,7 +113,7 @@ def load_data(path='imdb.npz',
                        str(maxlen) + ', no sequence was kept. '
                        'Increase maxlen.')
   if not num_words:
-    num_words = max([max(x) for x in xs])
+    num_words = max(max(x) for x in xs)
 
   # by convention, use 2 as OOV word
   # reserve 'index_from' (=3 by default) characters:
diff --git a/tensorflow/python/keras/datasets/reuters.py b/tensorflow/python/keras/datasets/reuters.py
index e1aa1f5d185..7767a1730e1 100644
--- a/tensorflow/python/keras/datasets/reuters.py
+++ b/tensorflow/python/keras/datasets/reuters.py
@@ -99,7 +99,7 @@ def load_data(path='reuters.npz',
     xs, labels = _remove_long_seq(maxlen, xs, labels)
 
   if not num_words:
-    num_words = max([max(x) for x in xs])
+    num_words = max(max(x) for x in xs)
 
   # by convention, use 2 as OOV word
   # reserve 'index_from' (=3 by default) characters:
diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD
index de47f903b33..126fbf567ac 100644
--- a/tensorflow/python/keras/distribute/BUILD
+++ b/tensorflow/python/keras/distribute/BUILD
@@ -199,7 +199,6 @@ distribute_py_test(
     shard_count = 4,
     tags = [
         "multi_and_single_gpu",
-        "no_rocm",  # times out on ROCm
         "no_windows_gpu",
         "notsan",
     ],
diff --git a/tensorflow/python/keras/distribute/keras_correctness_test_base.py b/tensorflow/python/keras/distribute/keras_correctness_test_base.py
index 097a550dd20..1c40a48e830 100644
--- a/tensorflow/python/keras/distribute/keras_correctness_test_base.py
+++ b/tensorflow/python/keras/distribute/keras_correctness_test_base.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import functools
+
 from absl.testing import parameterized
 import numpy as np
 import six
diff --git a/tensorflow/python/keras/distribute/keras_premade_models_test.py b/tensorflow/python/keras/distribute/keras_premade_models_test.py
index fa77ca2413c..d57f50a6935 100644
--- a/tensorflow/python/keras/distribute/keras_premade_models_test.py
+++ b/tensorflow/python/keras/distribute/keras_premade_models_test.py
@@ -78,8 +78,8 @@ class KerasPremadeModelsTest(test.TestCase, parameterized.TestCase):
       linear_model = linear.LinearModel(units=1)
       dnn_model = sequential.Sequential([core.Dense(units=1)])
       wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
-      linear_opt = gradient_descent.SGD(learning_rate=0.1)
-      dnn_opt = adagrad.Adagrad(learning_rate=0.2)
+      linear_opt = gradient_descent.SGD(learning_rate=0.05)
+      dnn_opt = adagrad.Adagrad(learning_rate=0.1)
       wide_deep_model.compile(
           optimizer=[linear_opt, dnn_opt],
           loss='mse',
diff --git a/tensorflow/python/keras/distribute/keras_utils_test.py b/tensorflow/python/keras/distribute/keras_utils_test.py
index f88783c1d15..4bd9c435096 100644
--- a/tensorflow/python/keras/distribute/keras_utils_test.py
+++ b/tensorflow/python/keras/distribute/keras_utils_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import tempfile
+
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.python import keras
@@ -197,9 +198,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
     with self.cached_session():
       a = constant_op.constant([1, 2], shape=(1, 2))
       b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
-      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
-      x = values.DistributedValues(device_map, (a, b))
-      y = values.DistributedValues(device_map, (a, a))
+      x = values.DistributedValues((a, b))
+      y = values.DistributedValues((a, a))
       # Removed device and input tensor shape details from the error message
       # since the order of the device and the corresponding input tensor shape
       # is not deterministic over different runs.
@@ -222,9 +222,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
     with self.cached_session():
       a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
       b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
-      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
-      x = values.DistributedValues(device_map, (a, b))
-      y = values.DistributedValues(device_map, (a, a))
+      x = values.DistributedValues((a, b))
+      y = values.DistributedValues((a, a))
       # Removed device and input tensor dtype details from the error message
       # since the order of the device and the corresponding input tensor dtype
       # is not deterministic over different runs.
diff --git a/tensorflow/python/keras/distribute/multi_worker_fault_tolerance_test.py b/tensorflow/python/keras/distribute/multi_worker_fault_tolerance_test.py
index f998a1d982d..c64e5ce61bf 100644
--- a/tensorflow/python/keras/distribute/multi_worker_fault_tolerance_test.py
+++ b/tensorflow/python/keras/distribute/multi_worker_fault_tolerance_test.py
@@ -23,6 +23,7 @@ import signal
 import sys
 import tempfile
 import threading
+
 from absl.testing import parameterized
 from tensorflow.python.distribute import collective_all_reduce_strategy as collective_strategy
 from tensorflow.python.distribute import combinations
diff --git a/tensorflow/python/keras/distribute/multi_worker_training_state_test.py b/tensorflow/python/keras/distribute/multi_worker_training_state_test.py
index a726a1deb45..984db20b3b9 100644
--- a/tensorflow/python/keras/distribute/multi_worker_training_state_test.py
+++ b/tensorflow/python/keras/distribute/multi_worker_training_state_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 import os
 import sys
+
 from absl.testing import parameterized
 from tensorflow.python.distribute import combinations
 from tensorflow.python.distribute import multi_worker_test_base as test_base
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 0d1bdc42281..0b9c658d24f 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -444,8 +444,6 @@ class Layer(module.Module):
         synchronization=synchronization,
         aggregation=aggregation,
         caching_device=caching_device)
-    backend.track_variable(variable)
-
     if regularizer is not None:
       # TODO(fchollet): in the future, this should be handled at the
       # level of variable creation, and weight regularization losses
@@ -454,10 +452,19 @@ class Layer(module.Module):
       self._handle_weight_regularization(name_in_scope,
                                          variable,
                                          regularizer)
-    if trainable:
-      self._trainable_weights.append(variable)
+    if isinstance(variable, tf_variables.PartitionedVariable):
+      for v in variable:
+        backend.track_variable(v)
+        if trainable:
+          self._trainable_weights.append(v)
+        else:
+          self._non_trainable_weights.append(v)
     else:
-      self._non_trainable_weights.append(variable)
+      backend.track_variable(variable)
+      if trainable:
+        self._trainable_weights.append(variable)
+      else:
+        self._non_trainable_weights.append(variable)
     return variable
 
   @base_layer_utils.default
@@ -888,22 +895,24 @@ class Layer(module.Module):
 
   @property
   def trainable_weights(self):
-    if self.trainable:
-      nested = self._gather_children_attribute('trainable_weights')
-      return self._dedup_weights(self._trainable_weights + nested)
-    else:
-      return []
+    collected_weights = []
+    all_layers = self._gather_unique_layers()
+    for layer in all_layers:
+      if layer.trainable:
+        collected_weights.extend(layer._trainable_weights)
+    return self._dedup_weights(collected_weights)
 
   @property
   def non_trainable_weights(self):
-    if self.trainable:
-      nested = self._gather_children_attribute('non_trainable_weights')
-      non_trainable_weights = self._non_trainable_weights + nested
-    else:
-      nested = self._gather_children_attribute('weights')
-      non_trainable_weights = (
-          self._trainable_weights + self._non_trainable_weights + nested)
-    return self._dedup_weights(non_trainable_weights)
+    collected_weights = []
+    all_layers = self._gather_unique_layers()
+    for layer in all_layers:
+      if layer.trainable:
+        collected_weights.extend(layer._non_trainable_weights)
+      else:
+        collected_weights.extend(layer._trainable_weights +
+                                 layer._non_trainable_weights)
+    return self._dedup_weights(collected_weights)
 
   @property
   def weights(self):
@@ -916,21 +925,23 @@ class Layer(module.Module):
 
   @property
   def updates(self):
-    if not self.trainable and not self.stateful:
-      return []
+    collected_updates = []
+    all_layers = self._gather_unique_layers()
     with backend.get_graph().as_default():
-      updates = []
-      for u in self._updates:
-        if callable(u):
-          try:
-            u = u()
-          except errors.InaccessibleTensorError:
-            base_layer_utils.check_graph_consistency(
-                method='add_update', force_raise=True)
-            raise  # check_graph_consistency may not always raise.
-        base_layer_utils.check_graph_consistency(u, method='add_update')
-        updates.append(u)
-    return updates + self._gather_children_attribute('updates')
+      for layer in all_layers:
+        if not layer.trainable and not layer.stateful:
+          continue
+        for u in layer._updates:
+          if callable(u):
+            try:
+              u = u()
+            except errors.InaccessibleTensorError:
+              base_layer_utils.check_graph_consistency(
+                  method='add_update', force_raise=True)
+              raise  # check_graph_consistency may not always raise.
+          base_layer_utils.check_graph_consistency(u, method='add_update')
+          collected_updates.append(u)
+    return collected_updates
 
   @property
   def losses(self):
@@ -944,20 +955,20 @@ class Layer(module.Module):
       A list of tensors.
     """
     collected_losses = []
-
-    # If any eager losses are present, we assume the model to be part of an
-    # eager training loop (either a custom one or the one used when
-    # `run_eagerly=True`), and so we always return just the eager losses in that
-    # case.
-    if self._eager_losses:
-      collected_losses.extend(self._eager_losses)
-    else:
-      collected_losses.extend(self._losses)
-    for regularizer in self._callable_losses:
-      loss_tensor = regularizer()
-      if loss_tensor is not None:
-        collected_losses.append(loss_tensor)
-    return collected_losses + self._gather_children_attribute('losses')
+    all_layers = self._gather_unique_layers()
+    for layer in all_layers:
+      # If any eager losses are present, we assume the model to be part of an
+      # eager training loop (either a custom one or the one used when
+      # `run_eagerly=True`) and so we always return just the eager losses.
+      if layer._eager_losses:
+        collected_losses.extend(layer._eager_losses)
+      else:
+        collected_losses.extend(layer._losses)
+      for regularizer in layer._callable_losses:
+        loss_tensor = regularizer()
+        if loss_tensor is not None:
+          collected_losses.append(loss_tensor)
+    return collected_losses
 
   @doc_controls.for_subclass_implementers
   def add_loss(self, losses, inputs=None):
@@ -1094,7 +1105,11 @@ class Layer(module.Module):
 
   @property
   def metrics(self):
-    return self._metrics + self._gather_children_attribute('metrics')
+    collected_metrics = []
+    all_layers = self._gather_unique_layers()
+    for layer in all_layers:
+      collected_metrics.extend(layer._metrics)
+    return collected_metrics
 
   @doc_controls.for_subclass_implementers
   def add_metric(self, value, aggregation=None, name=None):
@@ -2370,18 +2385,29 @@ class Layer(module.Module):
     # at __delattr__.
     super(tracking.AutoTrackable, self).__setattr__(name, value)
 
-  def _gather_children_attribute(self, attribute):
-    assert attribute in {
-        'weights', 'trainable_weights', 'non_trainable_weights', 'updates',
-        'losses', 'metrics'
-    }
+  def _gather_unique_layers(self):
+    """Returns the current layer and all its children depth first deduped.
+
+    We are deduping after getting the layers to maintain the order.
+    """
+    all_layers = self._gather_layers()
+    unique_layers, seen_layers = [], object_identity.ObjectIdentitySet()
+    for layer in all_layers:
+      if layer not in seen_layers:
+        unique_layers.append(layer)
+        # Track the Variable's identity to avoid __eq__ issues.
+        seen_layers.add(layer)
+    return unique_layers
+
+  def _gather_layers(self):
+    """Returns the current layer and all its children depth first."""
+    all_layers = [self]
     if hasattr(self, '_layers'):
-      nested_layers = trackable_layer_utils.filter_empty_layer_containers(
+      child_layers = trackable_layer_utils.filter_empty_layer_containers(
           self._layers)
-      return list(
-          itertools.chain.from_iterable(
-              getattr(layer, attribute) for layer in nested_layers))
-    return []
+      for child_layer in child_layers:
+        all_layers.extend(child_layer._gather_layers())
+    return all_layers
 
   @property
   @tracking.cached_per_instance
diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index 02433be946d..fa77088d148 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py
@@ -22,6 +22,7 @@ import copy
 import os
 import sys
 import traceback
+
 import numpy as np
 
 from tensorflow.python import keras
@@ -225,28 +226,6 @@ class BaseLayerTest(keras_parameterized.TestCase):
       self.assertEqual(new_layer.bias_regularizer, bias_reg)
       self.assertEqual(layer.get_config(), new_layer.get_config())
 
-  @keras_parameterized.run_all_keras_modes
-  def test_add_loss_correctness(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def call(self, inputs, training=None):
-        self.add_loss(math_ops.reduce_sum(inputs))
-        return inputs
-
-    inputs = keras.Input((3,))
-    layer = MyLayer()
-    outputs = layer(inputs)
-    model = keras.Model(inputs, outputs)
-    self.assertEqual(len(model.losses), 1)
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
-    loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-    self.assertEqual(loss, 2 * 3)
-
   @test_util.run_in_graph_and_eager_modes
   def test_invalid_forward_pass(self):
     inputs = keras.Input((3,))
diff --git a/tensorflow/python/keras/engine/base_preprocessing_layer.py b/tensorflow/python/keras/engine/base_preprocessing_layer.py
index 29df4744673..9c46f0ce614 100644
--- a/tensorflow/python/keras/engine/base_preprocessing_layer.py
+++ b/tensorflow/python/keras/engine/base_preprocessing_layer.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import abc
 import collections
+
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import cardinality
diff --git a/tensorflow/python/keras/engine/base_preprocessing_layer_test.py b/tensorflow/python/keras/engine/base_preprocessing_layer_test.py
index f5008760f18..8f4b67beb6b 100644
--- a/tensorflow/python/keras/engine/base_preprocessing_layer_test.py
+++ b/tensorflow/python/keras/engine/base_preprocessing_layer_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import json
+
 import numpy as np
 
 from tensorflow.python import keras
diff --git a/tensorflow/python/keras/engine/compile_utils.py b/tensorflow/python/keras/engine/compile_utils.py
index a2eb00c8679..b9241280d0f 100644
--- a/tensorflow/python/keras/engine/compile_utils.py
+++ b/tensorflow/python/keras/engine/compile_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#  Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,11 @@ from __future__ import print_function
 
 import copy
 
+import six
+
 from tensorflow.python.keras import losses as losses_mod
+from tensorflow.python.keras import metrics as metrics_mod
+from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -31,19 +35,38 @@ class LossesContainer(object):
   """A container class for losses passed to `Model.compile`."""
 
   def __init__(self, losses, loss_weights=None, output_names=None):
-
-    # Used only with Functional API models to flatten losses passed as a dict
-    # into a list using the Model's named outputs.
-    if output_names:
-      losses = map_to_outputs(output_names, losses)
-      loss_weights = map_to_outputs(output_names, loss_weights)
-
     self._losses = losses
     self._loss_weights = loss_weights
+    self._output_names = output_names
+    self._per_output_metrics = None  # Per-output losses become metrics.
+    self._loss_metric = metrics_mod.Mean(name='loss')  # Total loss.
     self._built = False
 
+  @property
+  def metrics(self):
+    """Per-output loss metrics."""
+    if not self._built:
+      return []
+    per_output_metrics = [
+        metric_obj for metric_obj in nest.flatten(self._per_output_metrics)
+        if metric_obj is not None
+    ]
+    return [self._loss_metric] + per_output_metrics
+
   def _build(self, y_pred):
     """One-time setup of loss objects."""
+
+    if self._output_names is None:
+      # In Subclass API,  output names like 'output_1' are used for
+      # `Metric` names.
+      self._output_names = create_output_names(y_pred)
+
+    # Accept a dict of losses keyed by output_name when outputs are a flat
+    # list.
+    self._losses = map_to_output_names(y_pred, self._output_names, self._losses)
+    self._loss_weights = map_to_output_names(y_pred, self._output_names,
+                                             self._loss_weights)
+
     # Broadcast single config values to apply to each output.
     if not nest.is_sequence(self._losses):
       self._losses = nest.map_structure(lambda output: self._losses, y_pred)
@@ -56,6 +79,19 @@ class LossesContainer(object):
     # Now that structures have been checked, it is safe to flatten.
     self._losses = nest.flatten(self._losses)
     self._loss_weights = nest.flatten(self._loss_weights)
+
+    # Create per-output loss metrics, but only for multi-output Models.
+    if len(self._output_names) == 1:
+      self._per_output_metrics = [None]
+    else:
+      self._per_output_metrics = []
+      for loss_obj, output_name in zip(self._losses, self._output_names):
+        if loss_obj is None:
+          self._per_output_metrics.append(None)
+        else:
+          self._per_output_metrics.append(
+              metrics_mod.Mean(output_name + '_loss'))
+
     self._built = True
 
   def __call__(self, y_true, y_pred, sample_weight=None):
@@ -92,9 +128,9 @@ class LossesContainer(object):
       sample_weight = sample_weight * len(y_pred)
 
     loss_values = []
-    metric_loss_values = []  # The loss value passed on to `Mean` metrics.
-    zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights)
-    for y_t, y_p, sw, loss_obj, loss_weight in zip(*zip_args):
+    zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights,
+                self._per_output_metrics)
+    for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args):
       if loss_obj is None:  # Ok to have no loss for an output.
         continue
 
@@ -115,23 +151,25 @@ class LossesContainer(object):
           sw = mask
 
       loss_value = loss_obj(y_t, y_p, sample_weight=sw)
+
+      if metric_obj is not None:
+        metric_obj.update_state(loss_value)
+
       if loss_weight is not None:
         loss_value *= loss_weight
-      metric_loss_values.append(loss_value)
 
-      # TODO(omalleyt): Should this be in the `Loss` class?
       if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or
           loss_obj.reduction == losses_utils.ReductionV2.AUTO):
         loss_value = losses_utils.scale_loss_for_distribution(loss_value)
       loss_values.append(loss_value)
 
-    # Ok for a model to have no compiled loss.
-    total_loss = math_ops.add_n(
-        loss_values) if loss_values else array_ops.zeros((1,))
-
-    # TODO(omalleyt): Don't return per-output losses once MetricsContainer
-    # handles this.
-    return total_loss, metric_loss_values
+    if loss_values:
+      total_loss = math_ops.add_n(loss_values)
+      self._loss_metric.update_state(total_loss)
+      return total_loss
+    else:
+      # Ok for a model to have no compiled loss.
+      return array_ops.zeros(shape=())
 
   def _get_loss_object(self, loss):
     """Returns a `Loss` object.
@@ -148,26 +186,323 @@ class LossesContainer(object):
     if loss is None:
       return None  # Ok to have no loss for an output.
 
-    # TODO(omalleyt): Handle special casing for crossentropy.
     loss = losses_mod.get(loss)
     if not isinstance(loss, losses_mod.Loss):
-      loss = losses_mod.LossFunctionWrapper(loss)
-    # Allow AUTO and SUM_OVER_BATCH_SIZE reductions.
-    # TODO(omalleyt): Can we reconcile CTL and built-in loss reductions?
+      loss = losses_mod.LossFunctionWrapper(loss, name=loss.__name__)
     loss._allow_sum_over_batch_size = True  # pylint: disable=protected-access
     return loss
 
 
-def map_to_outputs(output_names, struct):
-  """Map losses/metrics to outputs in the Functional API."""
-  # Used only for Functional API Models: allows users to specify
-  # metrics/losses using a dict with output names as keys.
-  if isinstance(struct, dict):
-    struct = copy.copy(struct)
-    new_struct = [struct.pop(name, None) for name in output_names]
-    if struct:
-      raise ValueError('Found unexpected keys that do not correspond '
-                       'to any Model output: {}. Expected: {}'.format(
-                           struct.keys(), output_names))
-    return new_struct
-  return struct
+class MetricsContainer(object):
+  """A container class for metrics passed to `Model.compile`."""
+
+  def __init__(self, metrics=None, weighted_metrics=None, output_names=None):
+    self._metrics = metrics
+    self._weighted_metrics = weighted_metrics
+    self._output_names = output_names
+    self._built = False
+
+  @property
+  def metrics(self):
+    """Metrics created by this container."""
+    if not self._built:
+      return []
+    metrics = [
+        metric_obj for metric_obj in nest.flatten(self._metrics)
+        if metric_obj is not None
+    ]
+    weighted_metrics = [
+        metric_obj for metric_obj in nest.flatten(self._weighted_metrics)
+        if metric_obj is not None
+    ]
+    return metrics + weighted_metrics
+
+  def _build(self, y_pred, y_true):
+    """One-time setup of metric objects."""
+
+    if self._output_names is None:
+      # Subclass output names like 'output_1' are used for `Metric` names.
+      self._output_names = create_output_names(y_pred)
+
+    # Accept a dict of metrics keyed by output_name when outputs are a flat
+    # list.
+    self._metrics = map_to_output_names(y_pred, self._output_names,
+                                        self._metrics)
+    self._weighted_metrics = map_to_output_names(y_pred, self._output_names,
+                                                 self._weighted_metrics)
+
+    # If a single metric is supplied, apply to all outputs.
+    self._metrics = self._maybe_broadcast(self._metrics, y_pred)
+    self._weighted_metrics = self._maybe_broadcast(self._weighted_metrics,
+                                                   y_pred)
+
+    # Convert to `Metric` objects, potentially disambiguating based on output
+    # properties.
+    self._metrics = nest.map_structure_up_to(y_pred, self._get_metric_objects,
+                                             self._metrics, y_true, y_pred)
+    self._weighted_metrics = nest.map_structure_up_to(y_pred,
+                                                      self._get_metric_objects,
+                                                      self._weighted_metrics,
+                                                      y_true, y_pred)
+
+    self._metrics = nest.flatten_up_to(y_pred, self._metrics, check_types=False)
+    self._weighted_metrics = nest.flatten_up_to(
+        y_pred, self._weighted_metrics, check_types=False)
+
+    # Assumes metrics, weighted_metrics have been flattened up to outputs.
+    self._set_metric_names()
+
+    self._built = True
+
+  def _set_metric_names(self):
+    """Sets unique metric names."""
+    # For multi-output models, prepend the output name to the metric name.
+    # For weighted metrics, prepend "weighted_" if the name would be non-unique.
+    # pylint: disable=protected-access
+    metric_names = set()
+    is_multi_output = len(self._output_names) > 1
+    zip_args = (self._output_names, self._metrics, self._weighted_metrics)
+    for output_name, output_metrics, weighted_output_metrics in zip(*zip_args):
+      for m in output_metrics:
+        if m is None:
+          continue
+        if is_multi_output:
+          m._name = output_name + '_' + m._name
+        if m._name in metric_names:
+          raise ValueError('Found two metrics with the same name: {}'.format(
+              m._name))
+        metric_names.add(m._name)
+
+      for wm in weighted_output_metrics:
+        if wm is None:
+          continue
+        if is_multi_output:
+          wm._name = output_name + '_' + wm._name
+        if wm._name in metric_names:
+          wm._name = 'weighted_' + wm._name
+        if wm._name in metric_names:
+          raise ValueError('Found two metrics with the same name: {}'.format(
+              wm._name))
+        metric_names.add(wm._name)
+    # pylint: enable=protected-access
+
+  def update_state(self, y_true, y_pred, sample_weight=None):
+    """Updates the state of per-output metrics."""
+    flat_y_true = nest.flatten(y_true)
+    flat_y_pred = nest.flatten(y_pred)
+
+    # TODO(omalleyt): Remove ambiguity here (see LossesContainer).
+    if len(flat_y_true) == 1 and len(flat_y_pred) > 1:
+      y_true = nest.map_structure(lambda _: flat_y_true[0], y_pred)
+      flat_y_true = nest.flatten(y_true)
+
+    if not self._built:
+      # `_build` needs unflattened outputs and labels.
+      self._build(y_pred, y_true)
+
+    y_true = flat_y_true
+    y_pred = flat_y_pred
+
+    sample_weight = nest.flatten(sample_weight)
+    # Allows passing one sample-weight array for all outputs.
+    if len(sample_weight) == 1 and len(y_pred) > 1:
+      sample_weight = sample_weight * len(y_pred)
+
+    zip_args = (y_true, y_pred, sample_weight, self._metrics,
+                self._weighted_metrics)
+    for y_t, y_p, sw, metric_objs, weighted_metric_objs in zip(*zip_args):
+      y_t = math_ops.cast(y_t, y_p.dtype)
+      if sw is not None:
+        sw = math_ops.cast(sw, y_p.dtype)
+
+      # Handle Keras mask on outputs.
+      mask = getattr(y_p, '_keras_mask', None)
+      if mask is not None:
+        mask = math_ops.cast(mask, y_p.dtype)
+        if sw is not None:
+          mask, _, sw = (
+              tf_losses_utils.squeeze_or_expand_dimensions(
+                  mask, sample_weight=sw))
+          sw *= mask
+        else:
+          sw = mask
+
+      for metric_obj in metric_objs:
+        if metric_obj is None:
+          continue
+        metric_obj.update_state(y_t, y_p)
+
+      for weighted_metric_obj in weighted_metric_objs:
+        if weighted_metric_obj is None:
+          continue
+        weighted_metric_obj.update_state(y_t, y_p, sample_weight=sw)
+
+  def _get_metric_objects(self, metrics, y_t, y_p):
+    """Convert user-supplied metrics to `Metric` objects."""
+    metrics = generic_utils.to_list(metrics)
+    return [self._get_metric_object(m, y_t, y_p) for m in metrics]
+
+  def _get_metric_object(self, metric, y_t, y_p):
+    """Converts user-supplied metric to a `Metric` object.
+
+    Arguments:
+      metric: A string, function, or `Metric` object.
+      y_t: Sample of label.
+      y_p: Sample of output.
+
+    Returns:
+      A `Metric` object.
+    """
+    if metric is None:
+      return None  # Ok to have no metric for an output.
+
+    # Convenience feature for selecting b/t binary, categorical,
+    # and sparse categorical.
+    if metric not in ['accuracy', 'acc', 'crossentropy', 'ce']:
+      metric_obj = metrics_mod.get(metric)
+    else:
+      y_t_rank = len(y_t.shape.as_list())
+      y_p_rank = len(y_p.shape.as_list())
+      y_t_last_dim = y_t.shape.as_list()[-1]
+      y_p_last_dim = y_p.shape.as_list()[-1]
+
+      is_binary = y_p_last_dim == 1
+      is_sparse_categorical = (
+          y_t_rank < y_p_rank or y_t_last_dim == 1 and y_p_last_dim > 1)
+
+      if metric in ['accuracy', 'acc']:
+        if is_binary:
+          metric_obj = metrics_mod.binary_accuracy
+        elif is_sparse_categorical:
+          metric_obj = metrics_mod.sparse_categorical_accuracy
+        else:
+          metric_obj = metrics_mod.categorical_accuracy
+      else:
+        if is_binary:
+          metric_obj = metrics_mod.binary_crossentropy
+        elif is_sparse_categorical:
+          metric_obj = metrics_mod.sparse_categorical_crossentropy
+        else:
+          metric_obj = metrics_mod.categorical_crossentropy
+
+    if not isinstance(metric_obj, metrics_mod.Metric):
+      if isinstance(metric, six.string_types):
+        metric_name = metric
+      elif hasattr(metric, 'name'):
+        metric_name = metric.name  # TODO(omalleyt): Is this needed?
+      else:
+        # function was passed.
+        metric_name = metric.__name__
+
+      metric_obj = metrics_mod.MeanMetricWrapper(metric_obj, name=metric_name)
+
+    return metric_obj
+
+  def _maybe_broadcast(self, metrics, y_pred):
+    """If a single Metric is supplied, applies it to all outputs."""
+
+    def _should_broadcast(metrics):
+      single_valued_list = (
+          isinstance(metrics, list) and len(metrics) == 1 and
+          not nest.is_sequence(metrics[0]))
+      # I.e. `metrics=['accuracy']` or `metrics='accuracy'`.
+      # In this special case we apply the metric to each output.
+      return not nest.is_sequence(metrics) or single_valued_list
+
+    def _copy(metric):
+      if isinstance(metric, metrics_mod.Metric):
+        return metrics_mod.Metric.from_config(metric.get_config())
+      return metric
+
+    if _should_broadcast(metrics):
+      metric = metrics[0] if isinstance(metrics, list) else metrics
+      return nest.map_structure(lambda _: _copy(metric), y_pred)
+    return metrics
+
+
+def create_output_names(y_pred):
+  """Creates output names for subclassed Model outputs.
+
+  These names are used for naming `Metric`s.
+
+  Example with dict:
+
+  `{'a': [x1, x2], 'b': x3}` becomes:
+  `['a_1', 'a_2', 'b']`
+
+  Example with list:
+
+  `[x, y]` becomes:
+  `['output_1', 'output_2']`
+
+  Arguments:
+    y_pred: `Model`'s outputs.
+
+  Returns:
+    Flattened list of output names.
+  """
+
+  def one_index(ele):
+    # Start with "output_1" instead of "output_0".
+    if isinstance(ele, int):
+      return ele + 1
+    return ele
+
+  flat_paths = list(nest.yield_flat_paths(y_pred))
+  flat_paths = nest.map_structure(one_index, flat_paths)
+  output_names = []
+  for path in flat_paths:
+    if not path:
+      output_name = 'output_1'
+    else:
+      output_name = '_'.join(str(p) for p in path)
+      if isinstance(path[0], int):
+        output_name = 'output_' + output_name
+    output_names.append(output_name)
+  return output_names
+
+
+def map_to_output_names(y_pred, output_names, struct):
+  """Maps a dict to a list using `output_names` as keys.
+
+  This is a convenience feature only. When a `Model`'s outputs
+  are a list, you can specify per-output losses and metrics as
+  a dict, where the keys are the output names. If you specify
+  per-output losses and metrics via the same structure as the
+  `Model`'s outputs (recommended), no mapping is performed.
+
+  For the Functional API, the output names are the names of the
+  last layer of each output. For the Subclass API, the output names
+  are determined by `create_output_names` (For example:
+  `['output_1', 'output_2']` for a list of outputs).
+
+  This mapping preserves backwards compatibility for `compile` and
+  `fit`.
+
+  Arguments:
+    y_pred: Sample outputs of the Model, to determine if this convenience
+      feature should be applied (`struct` is returned unmodified if `y_pred`
+      isn't a flat list).
+    output_names: List. The names of the outputs of the Model.
+    struct: The structure to map.
+
+  Returns:
+    `struct` mapped to a list in same order as `output_names`.
+  """
+  outputs_are_flat_list = (
+      isinstance(y_pred, (list, tuple)) and
+      not any(nest.is_sequence(y_p) for y_p in y_pred))
+  if not outputs_are_flat_list:
+    # In this case, `y_pred` and `struct` must have the same structure.
+    return struct
+
+  if not isinstance(struct, dict):
+    return struct
+
+  struct = copy.copy(struct)
+  new_struct = [struct.pop(name, None) for name in output_names]
+  if struct:
+    raise ValueError('Found unexpected keys that do not correspond '
+                     'to any Model output: {}. Expected: {}'.format(
+                         struct.keys(), output_names))
+  return new_struct
diff --git a/tensorflow/python/keras/engine/compile_utils_test.py b/tensorflow/python/keras/engine/compile_utils_test.py
index 5535c34296e..4865085d3b8 100644
--- a/tensorflow/python/keras/engine/compile_utils_test.py
+++ b/tensorflow/python/keras/engine/compile_utils_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.framework import ops
 from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import metrics as metrics_mod
 from tensorflow.python.keras.engine import compile_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@@ -30,12 +31,16 @@ class LossesContainerTest(keras_parameterized.TestCase):
   def test_single_loss(self):
     loss_container = compile_utils.LossesContainer('mse')
     y_t, y_p = array_ops.ones((10, 5)), array_ops.zeros((10, 5))
-    total_loss, output_losses = self.evaluate(loss_container(y_t, y_p))
+    total_loss = loss_container(y_t, y_p)
 
     self.assertTrue(loss_container._built)
     self.assertLen(loss_container._losses, 1)
-    self.assertEqual(total_loss, 1.)
-    self.assertLen(output_losses, 1)
+    self.assertEqual(total_loss.numpy(), 1.)
+    self.assertLen(loss_container.metrics, 1)
+
+    loss_metric = loss_container.metrics[0]
+    self.assertEqual(loss_metric.name, 'loss')
+    self.assertEqual(loss_metric.result().numpy(), 1.)
 
   def test_loss_list(self):
     loss_container = compile_utils.LossesContainer(['mse', 'mae'], [1, 0.5])
@@ -44,14 +49,24 @@ class LossesContainerTest(keras_parameterized.TestCase):
     y_p = [array_ops.ones((10, 1)), array_ops.ones((10, 1))]
     sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
 
-    total_loss, output_losses = self.evaluate(
-        loss_container(y_t, y_p, sample_weight=sw))
+    total_loss = loss_container(y_t, y_p, sample_weight=sw)
+
+    self.assertEqual(loss_container._output_names, ['output_1', 'output_2'])
 
     self.assertLen(loss_container._losses, 2)
-    self.assertEqual(total_loss, 0.25)
-    self.assertLen(output_losses, 2)
-    self.assertEqual(output_losses[0], 0)
-    self.assertEqual(output_losses[1], 0.25)
+    self.assertEqual(total_loss.numpy(), 0.25)
+
+    loss_metric = loss_container.metrics[0]
+    self.assertEqual(loss_metric.name, 'loss')
+    self.assertEqual(loss_metric.result().numpy(), 0.25)
+
+    output_1_metric = loss_container.metrics[1]
+    self.assertEqual(output_1_metric.name, 'output_1_loss')
+    self.assertEqual(output_1_metric.result().numpy(), 0)
+
+    output_2_metric = loss_container.metrics[2]
+    self.assertEqual(output_2_metric.name, 'output_2_loss')
+    self.assertEqual(output_2_metric.result().numpy(), 0.5)
 
   def test_loss_dict(self):
     loss_container = compile_utils.LossesContainer(
@@ -67,14 +82,23 @@ class LossesContainerTest(keras_parameterized.TestCase):
     y_p = {'out1': array_ops.ones((10, 1)), 'out2': array_ops.ones((10, 1))}
     sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
 
-    total_loss, output_losses = self.evaluate(
-        loss_container(y_t, y_p, sample_weight=sw))
+    total_loss = loss_container(y_t, y_p, sample_weight=sw)
 
     self.assertLen(loss_container._losses, 2)
-    self.assertEqual(total_loss, 0.25)
-    self.assertLen(output_losses, 2)
-    self.assertEqual(output_losses[0], 0)
-    self.assertEqual(output_losses[1], 0.25)
+    self.assertEqual(total_loss.numpy(), 0.25)
+    self.assertLen(loss_container.metrics, 3)
+
+    loss_metric = loss_container.metrics[0]
+    self.assertEqual(loss_metric.name, 'loss')
+    self.assertEqual(loss_metric.result().numpy(), 0.25)
+
+    out1_metric = loss_container.metrics[1]
+    self.assertEqual(out1_metric.name, 'out1_loss')
+    self.assertEqual(out1_metric.result().numpy(), 0)
+
+    out2_metric = loss_container.metrics[2]
+    self.assertEqual(out2_metric.name, 'out2_loss')
+    self.assertEqual(out2_metric.result().numpy(), 0.5)
 
   def test_loss_partial_dict_with_output_names(self):
     loss_container = compile_utils.LossesContainer(
@@ -84,12 +108,18 @@ class LossesContainerTest(keras_parameterized.TestCase):
     y_p = [array_ops.ones((10, 1)), array_ops.ones((10, 1))]
     sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
 
-    total_loss, output_losses = self.evaluate(
-        loss_container(y_t, y_p, sample_weight=sw))
+    total_loss = loss_container(y_t, y_p, sample_weight=sw)
 
-    self.assertEqual(total_loss, 0.5)
-    self.assertLen(output_losses, 1)
-    self.assertEqual(output_losses[0], 0.5)
+    self.assertEqual(total_loss.numpy(), 0.5)
+    self.assertLen(loss_container.metrics, 2)
+
+    loss_metric = loss_container.metrics[0]
+    self.assertEqual(loss_metric.name, 'loss')
+    self.assertEqual(loss_metric.result().numpy(), 0.5)
+
+    out2_metric = loss_container.metrics[1]
+    self.assertEqual(out2_metric.name, 'out2_loss')
+    self.assertEqual(out2_metric.result().numpy(), 0.5)
 
   def test_loss_dict_with_nones(self):
     loss_container = compile_utils.LossesContainer({
@@ -97,16 +127,22 @@ class LossesContainerTest(keras_parameterized.TestCase):
         'out2': 'mae'
     })
 
-    y_t = [array_ops.ones((10, 1)), array_ops.zeros((10, 1))]
-    y_p = [array_ops.ones((10, 1)), array_ops.ones((10, 1))]
+    y_t = {'out1': array_ops.ones((10, 1)), 'out2': array_ops.zeros((10, 1))}
+    y_p = {'out1': array_ops.ones((10, 1)), 'out2': array_ops.ones((10, 1))}
     sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
 
-    total_loss, output_losses = self.evaluate(
-        loss_container(y_t, y_p, sample_weight=sw))
+    total_loss = loss_container(y_t, y_p, sample_weight=sw)
 
-    self.assertEqual(total_loss, 0.5)
-    self.assertLen(output_losses, 1)
-    self.assertEqual(output_losses[0], 0.5)
+    self.assertEqual(total_loss.numpy(), 0.5)
+    self.assertLen(loss_container.metrics, 2)
+
+    loss_metric = loss_container.metrics[0]
+    self.assertEqual(loss_metric.name, 'loss')
+    self.assertEqual(loss_metric.result().numpy(), 0.5)
+
+    out2_metric = loss_container.metrics[1]
+    self.assertEqual(out2_metric.name, 'out2_loss')
+    self.assertEqual(out2_metric.result().numpy(), 0.5)
 
   def test_nested_structure(self):
     loss_container = compile_utils.LossesContainer(
@@ -130,13 +166,21 @@ class LossesContainerTest(keras_parameterized.TestCase):
     }
     sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
 
-    total_loss, output_losses = self.evaluate(
-        loss_container(y_t, y_p, sample_weight=sw))
+    total_loss = loss_container(y_t, y_p, sample_weight=sw)
+    self.assertEqual(total_loss.numpy(), 0.75)
+    self.assertLen(loss_container.metrics, 3)
 
-    self.assertEqual(total_loss, 0.75)
-    self.assertLen(output_losses, 2)
-    self.assertEqual(output_losses[0], 0.5)
-    self.assertEqual(output_losses[1], 0.25)
+    loss_metric = loss_container.metrics[0]
+    self.assertEqual(loss_metric.name, 'loss')
+    self.assertEqual(loss_metric.result().numpy(), 0.75)
+
+    a_metric = loss_container.metrics[1]
+    self.assertEqual(a_metric.name, 'a_loss')
+    self.assertEqual(a_metric.result().numpy(), 0.5)
+
+    b_1_metric = loss_container.metrics[2]
+    self.assertEqual(b_1_metric.name, 'b_1_loss')
+    self.assertEqual(b_1_metric.result().numpy(), 0.5)
 
   def test_broadcast_single_loss(self):
     loss_container = compile_utils.LossesContainer('mse')
@@ -145,14 +189,240 @@ class LossesContainerTest(keras_parameterized.TestCase):
     y_p = [array_ops.ones((10, 1)), array_ops.ones((10, 1))]
     sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
 
-    total_loss, output_losses = self.evaluate(
-        loss_container(y_t, y_p, sample_weight=sw))
+    total_loss = loss_container(y_t, y_p, sample_weight=sw)
+    self.assertEqual(total_loss.numpy(), 0.5)
+    self.assertLen(loss_container.metrics, 3)
 
-    self.assertEqual(total_loss, 0.5)
-    self.assertLen(output_losses, 2)
-    self.assertEqual(output_losses[0], 0.)
-    self.assertEqual(output_losses[1], 0.5)
+    loss_metric = loss_container.metrics[0]
+    self.assertEqual(loss_metric.name, 'loss')
+    self.assertEqual(loss_metric.result().numpy(), 0.5)
+
+    output_1_metric = loss_container.metrics[1]
+    self.assertEqual(output_1_metric.name, 'output_1_loss')
+    self.assertEqual(output_1_metric.result().numpy(), 0.)
+
+    output_2_metric = loss_container.metrics[2]
+    self.assertEqual(output_2_metric.name, 'output_2_loss')
+    self.assertEqual(output_2_metric.result().numpy(), 0.5)
+
+
+class MetricsContainerTest(keras_parameterized.TestCase):
+
+  def test_single_metric(self):
+    metric_container = compile_utils.MetricsContainer('mse')
+    y_t, y_p = array_ops.ones((10, 5)), array_ops.zeros((10, 5))
+    metric_container.update_state(y_t, y_p)
+
+    self.assertLen(metric_container.metrics, 1)
+    metric = metric_container.metrics[0]
+    self.assertEqual(metric.name, 'mse')
+    self.assertEqual(metric.result().numpy(), 1.)
+
+  def test_list_of_metrics_one_output(self):
+    metric_container = compile_utils.MetricsContainer(['mse', 'mae'])
+    y_t, y_p = 2 * array_ops.ones((10, 5)), array_ops.zeros((10, 5))
+    metric_container.update_state(y_t, y_p)
+    self.assertLen(metric_container.metrics, 2)
+
+    mse_metric = metric_container.metrics[0]
+    self.assertEqual(mse_metric.name, 'mse')
+    self.assertEqual(mse_metric.result().numpy(), 4.)
+
+    mae_metric = metric_container.metrics[1]
+    self.assertEqual(mae_metric.name, 'mae')
+    self.assertEqual(mae_metric.result().numpy(), 2.)
+
+  def test_list_of_metrics_list_of_outputs(self):
+    metric_container = compile_utils.MetricsContainer(
+        metrics=['mse', 'mae'],
+        weighted_metrics=['accuracy'])  # Should broadcast to both outputs.
+
+    y_t = [array_ops.ones((10, 1)), array_ops.zeros((10, 1))]
+    y_p = [array_ops.ones((10, 1)), 2 * array_ops.ones((10, 1))]
+    sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    metric_container.update_state(y_t, y_p, sample_weight=sw)
+    self.assertLen(metric_container.metrics, 4)
+
+    mse_metric = metric_container.metrics[0]
+    self.assertEqual(mse_metric.name, 'output_1_mse')
+    self.assertEqual(mse_metric.result().numpy(), 0.)
+
+    mae_metric = metric_container.metrics[1]
+    self.assertEqual(mae_metric.name, 'output_2_mae')
+    self.assertEqual(mae_metric.result().numpy(), 2.)
+
+    acc_metric_1 = metric_container.metrics[2]
+    self.assertEqual(acc_metric_1.name, 'output_1_accuracy')
+    self.assertEqual(acc_metric_1.result().numpy(), 1.)
+    self.assertEqual(acc_metric_1._fn, metrics_mod.binary_accuracy)
+
+    acc_metric_2 = metric_container.metrics[3]
+    self.assertEqual(acc_metric_2.name, 'output_2_accuracy')
+    self.assertEqual(acc_metric_2.result().numpy(), 0.)
+    self.assertEqual(acc_metric_2._fn, metrics_mod.binary_accuracy)
+
+  def test_metric_dict(self):
+    metric_container = compile_utils.MetricsContainer(
+        metrics={
+            'out1': 'mse',
+            'out2': 'mae'
+        },
+        weighted_metrics={
+            'out1': 'mse',
+            'out2': 'mae'
+        })
+
+    y_t = {'out1': array_ops.ones((10, 1)), 'out2': array_ops.zeros((10, 1))}
+    y_p = {'out1': array_ops.ones((10, 1)), 'out2': 2 * array_ops.ones((10, 1))}
+    sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    metric_container.update_state(y_t, y_p, sample_weight=sw)
+
+    mse_metric = metric_container.metrics[0]
+    self.assertEqual(mse_metric.name, 'out1_mse')
+    self.assertEqual(mse_metric.result().numpy(), 0.)
+
+    mae_metric = metric_container.metrics[1]
+    self.assertEqual(mae_metric.name, 'out2_mae')
+    self.assertEqual(mae_metric.result().numpy(), 2.)
+
+    weighted_mse_metric = metric_container.metrics[2]
+    self.assertEqual(weighted_mse_metric.name, 'weighted_out1_mse')
+    self.assertEqual(weighted_mse_metric.result().numpy(), 0.)
+
+    weighted_mae_metric = metric_container.metrics[3]
+    self.assertEqual(weighted_mae_metric.name, 'weighted_out2_mae')
+    self.assertEqual(weighted_mae_metric.result().numpy(), 2.)
+
+  def test_metric_partial_dict_with_output_names(self):
+    metric_container = compile_utils.MetricsContainer(
+        {'out2': 'mae'}, output_names=['out1', 'out2'])
+
+    y_t = [array_ops.ones((10, 1)), array_ops.zeros((10, 1))]
+    y_p = [array_ops.ones((10, 1)), array_ops.ones((10, 1))]
+    sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+
+    metric_container.update_state(y_t, y_p, sample_weight=sw)
+    self.assertLen(metric_container.metrics, 1)
+
+    mae_metric = metric_container.metrics[0]
+    self.assertEqual(mae_metric.name, 'out2_mae')
+    self.assertEqual(mae_metric.result().numpy(), 1.)
+
+  def test_metric_partial_dict_with_nones(self):
+    metric_container = compile_utils.MetricsContainer({
+        'out1': None,
+        'out2': 'mae'
+    })
+
+    y_t = {'out1': array_ops.ones((10, 1)), 'out2': array_ops.zeros((10, 1))}
+    y_p = {'out1': array_ops.ones((10, 1)), 'out2': array_ops.ones((10, 1))}
+    sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+
+    metric_container.update_state(y_t, y_p, sample_weight=sw)
+    self.assertLen(metric_container.metrics, 1)
+
+    mae_metric = metric_container.metrics[0]
+    self.assertEqual(mae_metric.name, 'out2_mae')
+    self.assertEqual(mae_metric.result().numpy(), 1.)
+
+  def test_nested_structure(self):
+    metric_container = compile_utils.MetricsContainer(
+        metrics={
+            'b': ['mse', None],
+            'a': 'mae'
+        },
+        weighted_metrics={
+            'b': [None, None],
+            'a': 'mse'
+        })
+
+    y_t = {
+        'b': [2 * array_ops.ones((10, 1)),
+              array_ops.zeros((10, 1))],
+        'a': array_ops.zeros((10, 1))
+    }
+    y_p = {
+        'b': [array_ops.zeros((10, 1)),
+              array_ops.zeros((10, 1))],
+        'a': array_ops.ones((10, 1))
+    }
+    sw = ops.convert_to_tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+
+    metric_container.update_state(y_t, y_p, sample_weight=sw)
+    self.assertLen(metric_container.metrics, 3)
+
+    a_mae_metric = metric_container.metrics[0]
+    self.assertEqual(a_mae_metric.name, 'a_mae')
+    self.assertEqual(a_mae_metric.result().numpy(), 1.)
+
+    b_1_mse_metric = metric_container.metrics[1]
+    self.assertEqual(b_1_mse_metric.name, 'b_1_mse')
+    self.assertEqual(b_1_mse_metric.result().numpy(), 4.)
+
+    weighted_a_mae_metric = metric_container.metrics[2]
+    self.assertEqual(weighted_a_mae_metric.name, 'a_mse')
+    self.assertEqual(weighted_a_mae_metric.result().numpy(), 1.)
+
+  def test_crossentropy(self):
+    metric_container = compile_utils.MetricsContainer('crossentropy')
+    y_t, y_p = array_ops.ones((10, 1)), array_ops.ones((10, 1))
+    metric_container.update_state(y_t, y_p)
+    self.assertEqual(metric_container.metrics[0]._fn,
+                     metrics_mod.binary_crossentropy)
+
+    metric_container = compile_utils.MetricsContainer('crossentropy')
+    y_t, y_p = array_ops.ones((10, 1)), array_ops.ones((10, 20))
+    self.assertEqual(y_p.shape.as_list()[-1], 20)
+    metric_container.update_state(y_t, y_p)
+    self.assertEqual(metric_container.metrics[0]._fn,
+                     metrics_mod.sparse_categorical_crossentropy)
+
+    metric_container = compile_utils.MetricsContainer('crossentropy')
+    y_t, y_p = array_ops.ones((10, 20)), array_ops.ones((10, 20))
+    metric_container.update_state(y_t, y_p)
+    self.assertEqual(metric_container.metrics[0]._fn,
+                     metrics_mod.categorical_crossentropy)
+
+  def test_accuracy(self):
+    metric_container = compile_utils.MetricsContainer('accuracy')
+    y_t, y_p = array_ops.ones((10, 1)), array_ops.ones((10, 1))
+    metric_container.update_state(y_t, y_p)
+    self.assertEqual(metric_container.metrics[0]._fn,
+                     metrics_mod.binary_accuracy)
+
+    metric_container = compile_utils.MetricsContainer('accuracy')
+    y_t, y_p = array_ops.ones((10, 1)), array_ops.ones((10, 20))
+    self.assertEqual(y_p.shape.as_list()[-1], 20)
+    metric_container.update_state(y_t, y_p)
+    self.assertEqual(metric_container.metrics[0]._fn,
+                     metrics_mod.sparse_categorical_accuracy)
+
+    metric_container = compile_utils.MetricsContainer('accuracy')
+    y_t, y_p = array_ops.ones((10, 20)), array_ops.ones((10, 20))
+    metric_container.update_state(y_t, y_p)
+    self.assertEqual(metric_container.metrics[0]._fn,
+                     metrics_mod.categorical_accuracy)
+
+  def test_metric_weighting(self):
+    metric_container = compile_utils.MetricsContainer(
+        metrics=['mae'], weighted_metrics=['mae'])
+
+    y_t = ops.convert_to_tensor([[0], [3], [0]])
+    y_p = ops.convert_to_tensor([[0], [0], [0]])
+    sw = ops.convert_to_tensor([[1], [0], [1]])
+
+    metric_container.update_state(y_t, y_p, sample_weight=sw)
+    self.assertLen(metric_container.metrics, 2)
+
+    mae_metric = metric_container.metrics[0]
+    self.assertEqual(mae_metric.name, 'mae')
+    self.assertEqual(mae_metric.result().numpy(), 1.)
+
+    weighted_mae_metric = metric_container.metrics[1]
+    self.assertEqual(weighted_mae_metric.name, 'weighted_mae')
+    self.assertEqual(weighted_mae_metric.result().numpy(), 0.)
 
 
 if __name__ == '__main__':
+  ops.enable_eager_execution()
   test.main()
diff --git a/tensorflow/python/keras/engine/correctness_test.py b/tensorflow/python/keras/engine/correctness_test.py
index 3f75b2bc6fb..9f6f4186835 100644
--- a/tensorflow/python/keras/engine/correctness_test.py
+++ b/tensorflow/python/keras/engine/correctness_test.py
@@ -27,23 +27,13 @@ from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 
 
-class Bias(keras.layers.Layer):
-  """Layer that add a bias to its inputs."""
-
-  def build(self, input_shape):
-    self.bias = self.add_variable('bias', (1,), initializer='zeros')
-
-  def call(self, inputs):
-    return inputs + self.bias
-
-
 class MultiInputSubclassed(keras.Model):
   """Subclassed Model that adds its inputs and then adds a bias."""
 
   def __init__(self):
     super(MultiInputSubclassed, self).__init__()
     self.add = keras.layers.Add()
-    self.bias = Bias()
+    self.bias = testing_utils.Bias()
 
   def call(self, inputs):
     added = self.add(inputs)
@@ -56,7 +46,7 @@ def multi_input_functional():
   input_2 = keras.Input(shape=(1,))
   input_3 = keras.Input(shape=(1,))
   added = keras.layers.Add()([input_1, input_2, input_3])
-  output = Bias()(added)
+  output = testing_utils.Bias()(added)
   return keras.Model([input_1, input_2, input_3], output)
 
 
@@ -65,7 +55,8 @@ def multi_input_functional():
 class SimpleBiasTest(keras_parameterized.TestCase):
 
   def _get_simple_bias_model(self):
-    model = testing_utils.get_model_from_layers([Bias()], input_shape=(1,))
+    model = testing_utils.get_model_from_layers([testing_utils.Bias()],
+                                                input_shape=(1,))
     model.compile(
         keras.optimizer_v2.gradient_descent.SGD(0.1),
         'mae',
diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py
index 2062f87d5b8..ebcf0db5d10 100644
--- a/tensorflow/python/keras/engine/data_adapter.py
+++ b/tensorflow/python/keras/engine/data_adapter.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import abc
 import collections
+import contextlib
+import functools
 import itertools
 import math
 import random
@@ -27,8 +29,12 @@ import random
 import numpy as np
 import six
 
+from tensorflow.python.data.experimental.ops import cardinality
+from tensorflow.python.data.experimental.ops import distribute_options
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import distribution_strategy_context as ds_context
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework.ops import composite_tensor
 from tensorflow.python.keras import backend
@@ -200,12 +206,10 @@ class DataAdapter(object):
     """
     raise NotImplementedError
 
-  def should_recreate_iterator(self, steps_per_epoch):
+  @abc.abstractmethod
+  def should_recreate_iterator(self):
     """Returns whether a new iterator should be created every epoch."""
-    # Only recreate iterator when the data has a fixed length, which will be
-    # fully consumed every epoch, or has a unknown length (dataset, generator)
-    # and will be fully consumed (steps_per_epoch is None)
-    return self.get_size() is not None or steps_per_epoch is None
+    raise NotImplementedError
 
 
 class TensorLikeDataAdapter(DataAdapter):
@@ -266,7 +270,7 @@ class TensorLikeDataAdapter(DataAdapter):
       msg = "Data cardinality is ambiguous:\n"
       for label, data in zip(["x", "y", "sample_weight"], inputs):
         msg += "  {} sizes: {}\n".format(
-            label, ", ".join([str(i.shape[0]) for i in nest.flatten(data)]))
+            label, ", ".join(str(i.shape[0]) for i in nest.flatten(data)))
       msg += "Please provide data which shares the same first dimension."
       raise ValueError(msg)
     num_samples = num_samples.pop()
@@ -396,7 +400,7 @@ class TensorLikeDataAdapter(DataAdapter):
     if self._shuffle:
       # See b/141490660 for more details.
       options.experimental_external_state_policy = (
-          dataset_ops.ExternalStatePolicy.IGNORE)
+          distribute_options.ExternalStatePolicy.IGNORE)
     dataset = dataset.with_options(options)
     return dataset
 
@@ -415,7 +419,7 @@ class TensorLikeDataAdapter(DataAdapter):
   def partial_batch_size(self):
     return self._partial_batch_size or None
 
-  def should_recreate_iterator(self, _):
+  def should_recreate_iterator(self):
     # An infinite dataset is always created here.
     return False
 
@@ -614,6 +618,9 @@ class CompositeTensorDataAdapter(DataAdapter):
   def partial_batch_size(self):
     return self._partial_batch_size
 
+  def should_recreate_iterator(self):
+    return True
+
 
 class ListsOfScalarsDataAdapter(DataAdapter):
   """Adapter that handles lists of scalars and lists of lists of scalars."""
@@ -680,6 +687,9 @@ class ListsOfScalarsDataAdapter(DataAdapter):
   def partial_batch_size(self):
     return self._internal_adapter.partial_batch_size()
 
+  def should_recreate_iterator(self):
+    return True
+
 
 class DatasetAdapter(DataAdapter):
   """Adapter that handles `tf.data.Dataset`."""
@@ -688,7 +698,12 @@ class DatasetAdapter(DataAdapter):
   def can_handle(x, y=None):
     return isinstance(x, (dataset_ops.DatasetV1, dataset_ops.DatasetV2))
 
-  def __init__(self, x, y=None, sample_weights=None, standardize_function=None,
+  def __init__(self,
+               x,
+               y=None,
+               sample_weights=None,
+               steps=None,
+               standardize_function=None,
                **kwargs):
     super(DatasetAdapter, self).__init__(x, y, **kwargs)
     if not is_none_or_empty(y):
@@ -705,6 +720,9 @@ class DatasetAdapter(DataAdapter):
     # provided dataset.
     self._dataset = x
 
+    # The user-provided steps.
+    self._user_steps = steps
+
   def get_dataset(self):
     return self._dataset
 
@@ -721,6 +739,13 @@ class DatasetAdapter(DataAdapter):
   def partial_batch_size(self):
     return None
 
+  def should_recreate_iterator(self):
+    # If user doesn't supply `steps`, or if they supply `steps` that
+    # exactly equals the size of the `Dataset`, create a new iterator
+    # each epoch.
+    return (self._user_steps is None or
+            cardinality.cardinality(self._dataset).numpy() == self._user_steps)
+
 
 class GeneratorDataAdapter(DataAdapter):
   """Adapter that handles python generators and iterators."""
@@ -922,7 +947,10 @@ class GeneratorDataAdapter(DataAdapter):
     return False
 
   def partial_batch_size(self):
-    return None
+    return
+
+  def should_recreate_iterator(self):
+    return False
 
 
 class KerasSequenceAdapter(GeneratorDataAdapter):
@@ -980,6 +1008,9 @@ class KerasSequenceAdapter(GeneratorDataAdapter):
   def get_size(self):
     return self._size
 
+  def should_recreate_iterator(self):
+    return True
+
 
 ALL_ADAPTER_CLS = [
     ListsOfScalarsDataAdapter, TensorLikeDataAdapter,
@@ -1111,3 +1142,229 @@ def assert_not_namedtuple(x):
         "support them. If you would like the value to be unpacked, "
         "please explicitly convert it to a tuple before passing it to "
         "Keras.".format(x.__class__, x._fields))
+
+
+class DataHandler(object):
+  """Handles iterating over epoch-level `tf.data.Iterator` objects."""
+
+  # TODO(omalleyt): Handle `validation_split` with separate utility.
+  # TODO(omalleyt): Handle `validation_data` batch size when `x` is a gen.
+  def __init__(self,
+               x,
+               y=None,
+               sample_weight=None,
+               batch_size=None,
+               steps_per_epoch=None,
+               initial_epoch=0,
+               epochs=1,
+               shuffle=False,
+               class_weight=None,
+               max_queue_size=10,
+               workers=1,
+               use_multiprocessing=False):
+
+    self._initial_epoch = initial_epoch
+    self._epochs = epochs
+    self._insufficient_data = False
+
+    train_adapter_cls = select_data_adapter(x, y)
+    self._train_adapter = train_adapter_cls(
+        x,
+        y,
+        batch_size=batch_size,
+        steps=steps_per_epoch,
+        epochs=epochs,
+        sample_weights=sample_weight,
+        shuffle=shuffle,
+        max_queue_size=max_queue_size,
+        workers=workers,
+        use_multiprocessing=use_multiprocessing,
+        distribution_strategy=ds_context.get_strategy())
+
+    strategy = ds_context.get_strategy()
+    dataset = self._train_adapter.get_dataset()
+    if class_weight:
+      dataset = dataset.map(_make_class_weight_map_fn(class_weight))
+    self._train_dataset = strategy.experimental_distribute_dataset(dataset)
+    self._steps_per_epoch = self._infer_steps(steps_per_epoch)
+
+  def enumerate_epochs(self):
+    """Yields `(epoch, tf.data.Iterator)`."""
+    data_iterator = iter(self._train_dataset)
+    for epoch in range(self._initial_epoch, self._epochs):
+      if self._insufficient_data:  # Set by `catch_stop_iteration`.
+        break
+      if self._train_adapter.should_recreate_iterator():
+        data_iterator = iter(self._train_dataset)
+      yield epoch, data_iterator
+
+  @contextlib.contextmanager
+  def catch_stop_iteration(self):
+    """Catches errors when an iterator runs out of data."""
+    try:
+      yield
+    except (StopIteration, errors.OutOfRangeError):
+      if (self._train_adapter.get_size() is None and
+          self._steps_per_epoch is None and self._current_step > 0):
+        # The input passed by the user ran out of batches.
+        # Now we know the cardinality of the input(dataset or generator).
+        self._steps_per_epoch = self._current_step
+      else:
+        self._insufficient_data = True
+        total_epochs = self._epochs - self._initial_epoch
+        logging.warning(
+            "Your input ran out of data; interrupting training. "
+            "Make sure that your dataset or generator can generate at "
+            "least `steps_per_epoch * epochs` batches (in this case, "
+            "{} batches). You may need to use the repeat() function "
+            "when building your dataset.".format(total_epochs *
+                                                 self._steps_per_epoch))
+
+  def steps(self):
+    """Yields steps for the current epoch."""
+    self._current_step = 0
+    # `self._steps_per_epoch` can be changed by `catch_stop_iteration`.
+    while (self._steps_per_epoch is None or
+           self._current_step < self._steps_per_epoch):
+      if self._insufficient_data:  # Set by `catch_stop_iteration`.
+        break
+      yield self._current_step
+      self._current_step += 1
+
+  def _infer_steps(self, steps):
+    """Infers steps_per_epoch needed to loop through a dataset."""
+    if steps is not None:
+      return steps
+
+    adapter_steps = self._train_adapter.get_size()
+    if adapter_steps is not None:
+      return adapter_steps
+
+    dataset = self._train_dataset
+    if (ds_context.get_strategy().extended._in_multi_worker_mode() and  # pylint: disable=protected-access
+        (dataset.options().experimental_distribute.auto_shard_policy !=
+         distribute_options.AutoShardPolicy.OFF)):
+      # If the dataset would be auto-sharded, we should not infer a local
+      # steps_per_epoch due to the possible inbalanced sharding between workers.
+      return None
+
+    size = cardinality.cardinality(dataset)
+    if size == cardinality.INFINITE and steps is None:
+      raise ValueError("When passing an infinitely repeating dataset, you "
+                       "must specify how many steps to draw.")
+    if size >= 0:
+      return size
+    return None
+
+
+def _make_class_weight_map_fn(class_weight):
+  """Applies class weighting to a `Dataset`.
+
+  The `Dataset` is assumed to be in format `(x, y)` or `(x, y, sw)`, where
+  `y` must be a single `Tensor`.
+
+  Arguments:
+    class_weight: A map where the keys are integer class ids and values are
+      the class weights, e.g. `{0: 0.2, 1: 0.6, 2: 0.3}`
+
+  Returns:
+    A function that can be used with `tf.data.Dataset.map` to apply class
+    weighting.
+  """
+  class_ids = list(sorted(class_weight.keys()))
+  expected_class_ids = list(range(len(class_ids)))
+  if class_ids != expected_class_ids:
+    error_msg = (
+        "Expected `class_weight` to be a dict with keys from 0 to one less "
+        "than the number of classes, found {}").format(class_weight)
+    raise ValueError(error_msg)
+
+  class_weight_tensor = ops.convert_to_tensor(
+      [class_weight[c] for c in class_ids])
+
+  def _class_weights_map_fn(*data):
+    """Convert `class_weight` to `sample_weight`."""
+    if len(data) == 2:
+      x, y = data
+      sw = None
+    else:
+      x, y, sw = data
+
+    if nest.is_sequence(y):
+      raise ValueError(
+          "`class_weight` is only supported for `Model`s with a single output.")
+
+    cw = array_ops.gather_v2(class_weight_tensor, y)
+    if sw is not None:
+      cw = math_ops.cast(cw, sw.dtype)
+      if len(cw.shape.as_list()) > len(sw.shape.as_list()):
+        cw = array_ops.squeeze(cw)
+      # `class_weight` and `sample_weight` are multiplicative.
+      sw = sw * cw
+    else:
+      sw = cw
+
+    return x, y, sw
+
+  return _class_weights_map_fn
+
+
+def train_validation_split(arrays, validation_split, shuffle=True):
+  """Split arrays into random train and validation subsets.
+
+  Arguments:
+    arrays: Tensors to split. Allowed inputs are arbitrarily nested structures
+      of Tensors and NumPy arrays.
+    validation_split: Float between 0 and 1. The proportion of the dataset to
+      include in the validation split. The rest of the dataset will be included
+      in the training split.
+    shuffle: Bool. Whether to shuffle the data before performing a split. If
+      `False`, the last `validation_split` fraction of that training data will
+      become the validation split.
+
+  Returns:
+    `(train_arrays, validation_arrays)`
+  """
+
+  def _can_split(t):
+    tensor_types = (ops.Tensor, np.ndarray)
+    if pd:
+      tensor_types = (ops.Tensor, np.ndarray, pd.Series, pd.DataFrame)
+    return isinstance(t, tensor_types) or t is None
+
+  flat_arrays = nest.flatten(arrays)
+  if not all(_can_split(t) for t in flat_arrays):
+    raise ValueError(
+        "`validation_split` is only supported for Tensors or NumPy "
+        "arrays, found: {}".format(arrays))
+
+  if all(t is None for t in flat_arrays):
+    return arrays, arrays
+
+  first_non_none = None
+  for t in flat_arrays:
+    if t is not None:
+      first_non_none = t
+      break
+
+  # Assumes all arrays have the same batch shape or are `None`.
+  batch_dim = int(first_non_none.shape[0])
+  indices = ops.convert_to_tensor(range(batch_dim))
+  if shuffle:
+    indices = random_ops.random_shuffle(indices)
+  split_at = int(math.floor(batch_dim * (1. - validation_split)))
+  train_indices = indices[:split_at]
+  val_indices = indices[split_at:]
+
+  def _split(t, indices):
+    if t is None:
+      return t
+    t = ops.convert_to_tensor(t)
+    return array_ops.gather_v2(t, indices)
+
+  train_arrays = nest.map_structure(
+      functools.partial(_split, indices=train_indices), arrays)
+  val_arrays = nest.map_structure(
+      functools.partial(_split, indices=val_indices), arrays)
+
+  return train_arrays, val_arrays
diff --git a/tensorflow/python/keras/engine/data_adapter_test.py b/tensorflow/python/keras/engine/data_adapter_test.py
index 1b6c92f46fa..b399c6b9a4f 100644
--- a/tensorflow/python/keras/engine/data_adapter_test.py
+++ b/tensorflow/python/keras/engine/data_adapter_test.py
@@ -24,17 +24,21 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.data.experimental.ops import cardinality
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import data_adapter
 from tensorflow.python.keras.utils import data_utils
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
+from tensorflow.python.util import nest
 
 
 class DummyArrayLike(object):
@@ -786,6 +790,335 @@ class KerasSequenceAdapterTest(DataAdapterTestBase):
       self.adapter_cls(self.sequence_input, sample_weights=self.sequence_input)
 
 
+class DataHandlerTest(keras_parameterized.TestCase):
+
+  def test_finite_dataset_with_steps_per_epoch(self):
+    data = dataset_ops.Dataset.from_tensor_slices([0, 1, 2, 3]).batch(1)
+    # User can choose to only partially consume `Dataset`.
+    data_handler = data_adapter.DataHandler(
+        data, initial_epoch=0, epochs=2, steps_per_epoch=2)
+    self.assertFalse(data_handler._train_adapter.should_recreate_iterator())
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator).numpy())
+      returned_data.append(epoch_data)
+    self.assertEqual(returned_data, [[0, 1], [2, 3]])
+
+  def test_finite_dataset_without_steps_per_epoch(self):
+    data = dataset_ops.Dataset.from_tensor_slices([0, 1, 2]).batch(1)
+    data_handler = data_adapter.DataHandler(data, initial_epoch=0, epochs=2)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator).numpy())
+      returned_data.append(epoch_data)
+    self.assertEqual(returned_data, [[0, 1, 2], [0, 1, 2]])
+
+  def test_finite_dataset_with_steps_per_epoch_exact_size(self):
+    data = dataset_ops.Dataset.from_tensor_slices([0, 1, 2, 3]).batch(1)
+    # If user specifies exact size of `Dataset` as `steps_per_epoch`,
+    # create a new iterator each epoch.
+    data_handler = data_adapter.DataHandler(
+        data, initial_epoch=0, epochs=2, steps_per_epoch=4)
+    self.assertTrue(data_handler._train_adapter.should_recreate_iterator())
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator).numpy())
+      returned_data.append(epoch_data)
+    self.assertEqual(returned_data, [[0, 1, 2, 3], [0, 1, 2, 3]])
+
+  def test_infinite_dataset_with_steps_per_epoch(self):
+    data = dataset_ops.Dataset.from_tensor_slices([0, 1, 2]).batch(1).repeat()
+    data_handler = data_adapter.DataHandler(
+        data, initial_epoch=0, epochs=2, steps_per_epoch=3)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator).numpy())
+      returned_data.append(epoch_data)
+    self.assertEqual(returned_data, [[0, 1, 2], [0, 1, 2]])
+
+  def test_unknown_cardinality_dataset_with_steps_per_epoch(self):
+    ds = dataset_ops.DatasetV2.from_tensor_slices([0, 1, 2, 3, 4, 5, 6])
+    filtered_ds = ds.filter(lambda x: x < 4)
+    self.assertEqual(
+        cardinality.cardinality(filtered_ds).numpy(), cardinality.UNKNOWN)
+
+    # User can choose to only partially consume `Dataset`.
+    data_handler = data_adapter.DataHandler(
+        filtered_ds, initial_epoch=0, epochs=2, steps_per_epoch=2)
+    self.assertFalse(data_handler._train_adapter.should_recreate_iterator())
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertEqual(returned_data, [[0, 1], [2, 3]])
+
+  def test_unknown_cardinality_dataset_without_steps_per_epoch(self):
+    ds = dataset_ops.DatasetV2.from_tensor_slices([0, 1, 2, 3, 4, 5, 6])
+    filtered_ds = ds.filter(lambda x: x < 4)
+    self.assertEqual(
+        cardinality.cardinality(filtered_ds).numpy(), cardinality.UNKNOWN)
+
+    data_handler = data_adapter.DataHandler(
+        filtered_ds, initial_epoch=0, epochs=2)
+    self.assertTrue(data_handler._train_adapter.should_recreate_iterator())
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      with data_handler.catch_stop_iteration():
+        for _ in data_handler.steps():
+          epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertEqual(returned_data, [[0, 1, 2, 3], [0, 1, 2, 3]])
+    self.assertEqual(data_handler._steps_per_epoch, 4)
+
+  def test_insufficient_data(self):
+    ds = dataset_ops.DatasetV2.from_tensor_slices([0, 1])
+    data_handler = data_adapter.DataHandler(
+        ds, initial_epoch=0, epochs=2, steps_per_epoch=3)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        with data_handler.catch_stop_iteration():
+          epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertTrue(data_handler._insufficient_data)
+    self.assertEqual(returned_data, [[0, 1]])
+
+  def test_numpy(self):
+    x = np.array([0, 1, 2])
+    y = np.array([0, 2, 4])
+    sw = np.array([0, 4, 8])
+    data_handler = data_adapter.DataHandler(
+        x=x, y=y, sample_weight=sw, batch_size=1, epochs=2)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertEqual(returned_data,
+                     [[(0, 0, 0), (1, 2, 4),
+                       (2, 4, 8)], [(0, 0, 0), (1, 2, 4), (2, 4, 8)]])
+
+  def test_generator(self):
+
+    def generator():
+      for _ in range(2):
+        for step in range(3):
+          yield (ops.convert_to_tensor([step]),)
+
+    data_handler = data_adapter.DataHandler(
+        generator(), epochs=2, steps_per_epoch=3)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertEqual(returned_data, [[([0],), ([1],),
+                                      ([2],)], [([0],), ([1],), ([2],)]])
+
+  def test_composite_tensor(self):
+    st = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 0], [2, 0]], values=[0, 1, 2], dense_shape=[3, 1])
+    data_handler = data_adapter.DataHandler(st, epochs=2, steps_per_epoch=3)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(
+        nest.map_structure(sparse_ops.sparse_tensor_to_dense, returned_data))
+    self.assertEqual(returned_data, [[([0],), ([1],),
+                                      ([2],)], [([0],), ([1],), ([2],)]])
+
+  def test_list_of_scalars(self):
+    data_handler = data_adapter.DataHandler([[0], [1], [2]],
+                                            epochs=2,
+                                            steps_per_epoch=3)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertEqual(returned_data, [[([0],), ([1],),
+                                      ([2],)], [([0],), ([1],), ([2],)]])
+
+  def test_class_weight(self):
+    data_handler = data_adapter.DataHandler(
+        x=[[0], [1], [2]],
+        y=[[2], [1], [0]],
+        class_weight={
+            0: 0.5,
+            1: 1.,
+            2: 1.5
+        },
+        epochs=2,
+        steps_per_epoch=3)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertEqual(returned_data, [[([0], [2], [1.5]), ([1], [1], [1.]),
+                                      ([2], [0], [0.5])],
+                                     [([0], [2], [1.5]), ([1], [1], [1.]),
+                                      ([2], [0], [0.5])]])
+
+  def test_class_weight_and_sample_weight(self):
+    data_handler = data_adapter.DataHandler(
+        x=[[0], [1], [2]],
+        y=[[2], [1], [0]],
+        sample_weight=[[1.], [2.], [4.]],
+        class_weight={
+            0: 0.5,
+            1: 1.,
+            2: 1.5
+        },
+        epochs=2,
+        steps_per_epoch=3)
+    returned_data = []
+    for _, iterator in data_handler.enumerate_epochs():
+      epoch_data = []
+      for _ in data_handler.steps():
+        epoch_data.append(next(iterator))
+      returned_data.append(epoch_data)
+    returned_data = self.evaluate(returned_data)
+    self.assertEqual(returned_data, [[([0], [2], [1.5]), ([1], [1], [2.]),
+                                      ([2], [0], [2.])],
+                                     [([0], [2], [1.5]), ([1], [1], [2.]),
+                                      ([2], [0], [2.])]])
+
+  def test_class_weight_user_errors(self):
+    with self.assertRaisesRegexp(ValueError, 'to be a dict with keys'):
+      data_adapter.DataHandler(
+          x=[[0], [1], [2]],
+          y=[[2], [1], [0]],
+          batch_size=1,
+          sample_weight=[[1.], [2.], [4.]],
+          class_weight={
+              0: 0.5,
+              1: 1.,
+              3: 1.5  # Skips class `2`.
+          })
+
+    with self.assertRaisesRegexp(ValueError, 'with a single output'):
+      data_adapter.DataHandler(
+          x=np.ones((10, 1)),
+          y=[np.ones((10, 1)), np.zeros((10, 1))],
+          batch_size=2,
+          class_weight={
+              0: 0.5,
+              1: 1.,
+              2: 1.5
+          })
+
+
+class TestValidationSplit(keras_parameterized.TestCase):
+
+  @parameterized.named_parameters(('numpy_arrays', True), ('tensors', False))
+  def test_validation_split_shuffled(self, use_numpy):
+    if use_numpy:
+      x = np.array([0, 1, 2, 3, 4])
+      y = np.array([0, 2, 4, 6, 8])
+      sw = np.array([0, 4, 8, 12, 16])
+    else:
+      x = ops.convert_to_tensor([0, 1, 2, 3, 4])
+      y = ops.convert_to_tensor([0, 2, 4, 6, 8])
+      sw = ops.convert_to_tensor([0, 4, 8, 12, 16])
+
+    (train_x, train_y, train_sw), (val_x, val_y, val_sw) = (
+        data_adapter.train_validation_split((x, y, sw), validation_split=0.2))
+
+    self.assertEqual(int(train_x.shape[0]), 4)
+    self.assertEqual(int(train_y.shape[0]), 4)
+    self.assertEqual(int(train_sw.shape[0]), 4)
+    for i in range(4):
+      # Check that all arrays were shuffled in identical order.
+      self.assertEqual(2 * train_x[i].numpy(), train_y[i].numpy())
+      self.assertEqual(2 * train_y[i].numpy(), train_sw[i].numpy())
+
+    self.assertEqual(int(val_x.shape[0]), 1)
+    self.assertEqual(int(val_y.shape[0]), 1)
+    self.assertEqual(int(val_sw.shape[0]), 1)
+    for i in range(1):
+      # Check that all arrays were shuffled in identical order.
+      self.assertEqual(2 * train_x[i].numpy(), train_y[i].numpy())
+      self.assertEqual(2 * train_y[i].numpy(), train_sw[i].numpy())
+
+    # Check that arrays contain expected values.
+    self.assertEqual(
+        sorted(array_ops.concat([train_x, val_x], axis=0).numpy().tolist()),
+        sorted(ops.convert_to_tensor(x).numpy().tolist()))
+    self.assertEqual(
+        sorted(array_ops.concat([train_y, val_y], axis=0).numpy().tolist()),
+        sorted(ops.convert_to_tensor(y).numpy().tolist()))
+    self.assertEqual(
+        sorted(array_ops.concat([train_sw, val_sw], axis=0).numpy().tolist()),
+        sorted(ops.convert_to_tensor(sw).numpy().tolist()))
+
+  @parameterized.named_parameters(('numpy_arrays', True), ('tensors', False))
+  def test_validation_split_unshuffled(self, use_numpy):
+    if use_numpy:
+      x = np.array([0, 1, 2, 3, 4])
+      y = np.array([0, 2, 4, 6, 8])
+      sw = np.array([0, 4, 8, 12, 16])
+    else:
+      x = ops.convert_to_tensor([0, 1, 2, 3, 4])
+      y = ops.convert_to_tensor([0, 2, 4, 6, 8])
+      sw = ops.convert_to_tensor([0, 4, 8, 12, 16])
+
+    (train_x, train_y, train_sw), (val_x, val_y, val_sw) = (
+        data_adapter.train_validation_split((x, y, sw),
+                                            validation_split=0.2,
+                                            shuffle=False))
+
+    self.assertEqual(train_x.numpy().tolist(), [0, 1, 2, 3])
+    self.assertEqual(train_y.numpy().tolist(), [0, 2, 4, 6])
+    self.assertEqual(train_sw.numpy().tolist(), [0, 4, 8, 12])
+
+    self.assertEqual(val_x.numpy().tolist(), [4])
+    self.assertEqual(val_y.numpy().tolist(), [8])
+    self.assertEqual(val_sw.numpy().tolist(), [16])
+
+  def test_validation_split_user_error(self):
+    with self.assertRaisesRegexp(ValueError, 'is only supported for Tensors'):
+      data_adapter.train_validation_split(
+          lambda: np.ones((10, 1)), validation_split=0.2)
+
+  def test_validation_split_none(self):
+    train_sw, val_sw = data_adapter.train_validation_split(
+        None, validation_split=0.2)
+    self.assertIsNone(train_sw)
+    self.assertIsNone(val_sw)
+
+    (_, train_sw), (_, val_sw) = data_adapter.train_validation_split(
+        (np.ones((10, 1)), None), validation_split=0.2)
+    self.assertIsNone(train_sw)
+    self.assertIsNone(val_sw)
+
+
 if __name__ == '__main__':
   ops.enable_eager_execution()
   test.main()
diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py
index bae50419939..8cffe65d612 100644
--- a/tensorflow/python/keras/engine/input_layer.py
+++ b/tensorflow/python/keras/engine/input_layer.py
@@ -237,7 +237,11 @@ def Input(  # pylint: disable=invalid-name
   ```
 
   Raises:
-    ValueError: in case of invalid arguments.
+    ValueError: If both `sparse` and `ragged` are provided.
+    ValueError: If both `shape` and (`batch_input_shape` or `batch_shape`) are
+      provided.
+    ValueError: If both `shape` and `tensor` are None.
+    ValueError: if any unrecognized parameters are provided.
   """
   if sparse and ragged:
     raise ValueError(
diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 7aebdb24e51..4313b378d05 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -156,7 +156,7 @@ class Network(base_layer.Layer):
   # The key of _layer_call_argspecs is a layer. tf.Module._flatten will fail to
   # flatten the key since it is trying to convert Trackable/Layer to a string.
   _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain(
-      ('_layer_call_argspecs',),
+      ('_layer_call_argspecs', '_compiled_trainable_state'),
       base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES
   ))
 
@@ -330,8 +330,6 @@ class Network(base_layer.Layer):
       self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call)
       layer._attribute_sentinel.add_parent(self._attribute_sentinel)
 
-    self._track_layers(layers)
-
     # Create the node linking internal inputs to internal outputs.
     node_module.Node(
         outbound_layer=self,
@@ -397,18 +395,25 @@ class Network(base_layer.Layer):
       return any(layer.dynamic for layer in self.layers)
     return self._dynamic or any(layer.dynamic for layer in self.layers)
 
-  def _track_layers(self, layers):
-    """Add Trackable dependencies on a list of Layers."""
+  @property
+  def _layer_checkpoint_dependencies(self):
+    """Dictionary of layer dependencies to be included in the checkpoint."""
+    # Use getattr becuase this function can be called from __setattr__, at which
+    # point the _is_graph_network attribute has not been created.
+    if (not getattr(self, '_is_graph_network', False) and
+        base_layer_utils.is_subclassed(self)):
+      return {}  # Only add layer dependencies for graph networks
+
     weight_layer_index = 0
-    for layer_index, layer in enumerate(layers):
+
+    dependencies = {}
+    for layer_index, layer in enumerate(self.layers):
       try:
         if layer.weights:
           # Keep a separate index for layers which have weights. This allows
           # users to insert Layers without weights anywhere in the network
           # without breaking checkpoints.
-          self._track_trackable(
-              layer, name='layer_with_weights-%d' % weight_layer_index,
-              overwrite=True)
+          dependencies['layer_with_weights-%d' % weight_layer_index] = layer
           weight_layer_index += 1
       except ValueError:
         # The layer might have weights, but may not be built yet. We just treat
@@ -417,8 +422,31 @@ class Network(base_layer.Layer):
 
       # Even if it doesn't have weights, we should still track everything in
       # case it has/will have Trackable dependencies.
-      self._track_trackable(
-          layer, name='layer-%d' % layer_index, overwrite=True)
+      dependencies['layer-%d' % layer_index] = layer
+    return dependencies
+
+  @property
+  def _checkpoint_dependencies(self):
+    dependencies = [
+        trackable.TrackableReference(name=name, ref=layer)
+        for name, layer in self._layer_checkpoint_dependencies.items()]
+    dependencies.extend(super(Network, self)._checkpoint_dependencies)
+    return dependencies
+
+  def _lookup_dependency(self, name):
+    layer_dependencies = self._layer_checkpoint_dependencies
+    if name in layer_dependencies:
+      return layer_dependencies[name]
+    return super(Network, self)._lookup_dependency(name)
+
+  def _handle_deferred_layer_dependencies(self, layers):
+    """Handles layer checkpoint dependencies that are added after init."""
+    layer_checkpoint_dependencies = self._layer_checkpoint_dependencies
+    layer_to_name = {v: k for k, v in layer_checkpoint_dependencies.items()}
+    for layer in layers:
+      if layer in layer_to_name:
+        self._handle_deferred_dependencies(name=layer_to_name[layer],
+                                           trackable=layer)
 
   def __setattr__(self, name, value):
     if not getattr(self, '_self_setattr_tracking', True):
@@ -686,8 +714,7 @@ class Network(base_layer.Layer):
                            'Instead, in order to instantiate and build your '
                            'model, `call` your model on real tensor data (of '
                            'the correct dtype).')
-    if self._layers:
-      self._track_layers(self._layers)
+
     self.built = True
 
   def call(self, inputs, training=None, mask=None):
@@ -1437,15 +1464,18 @@ class Network(base_layer.Layer):
 
     # Insert layers and update other layer attrs.
     layer_set = set(self._layers)
+    deferred_layers = []
     for layer in layers:
       if layer not in layer_set:
         self._layers.append(layer)
+        deferred_layers.append(layer)
         self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call)
 
         # This allows the added layer to broadcast mutations to the current
         # layer, which is necessary to ensure cache correctness.
         layer._attribute_sentinel.add_parent(self._attribute_sentinel)
         layer_set.add(layer)
+    self._handle_deferred_layer_dependencies(deferred_layers)
 
   def _assert_weights_created(self):
     """Asserts that all the weights for the network have been created.
diff --git a/tensorflow/python/keras/engine/network_test.py b/tensorflow/python/keras/engine/network_test.py
index efa151ded44..fd4f47ac77b 100644
--- a/tensorflow/python/keras/engine/network_test.py
+++ b/tensorflow/python/keras/engine/network_test.py
@@ -121,61 +121,6 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
     self.assertEqual(len(layer.get_updates_for(x1)), 2)
     self.assertEqual(len(layer.get_updates_for(None)), 0)
 
-  @test_util.run_deprecated_v1
-  def test_get_losses(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def build(self, input_shape):
-        self.a = self.add_variable('a',
-                                   (1, 1),
-                                   'float32',
-                                   trainable=False)
-        self.b = self.add_variable('b',
-                                   (1, 1),
-                                   'float32',
-                                   trainable=False)
-        self.add_loss(math_ops.reduce_sum(self.a))
-        self.built = True
-
-      def call(self, inputs):
-        self.add_loss(math_ops.reduce_sum(inputs),
-                      inputs=True)
-        return inputs + 1
-
-    x1 = input_layer_lib.Input(shape=(1,))
-    layer = MyLayer()
-    _ = layer(x1)
-
-    self.assertEqual(len(layer.losses), 2)
-    self.assertEqual(len(layer.get_losses_for(x1)), 1)
-    self.assertEqual(len(layer.get_losses_for(None)), 1)
-
-    x2 = input_layer_lib.Input(shape=(1,))
-    y2 = layer(x2)
-
-    self.assertEqual(len(layer.losses), 3)
-    self.assertEqual(len(layer.get_losses_for(x1)), 1)
-    self.assertEqual(len(layer.get_losses_for(x2)), 1)
-    self.assertEqual(len(layer.get_losses_for(None)), 1)
-
-    network = network_lib.Network(x2, y2)
-    self.assertEqual(len(network.losses), 3)
-    self.assertEqual(len(network.get_losses_for(x1)), 1)
-    self.assertEqual(len(network.get_losses_for(x2)), 1)
-    self.assertEqual(len(network.get_losses_for(None)), 1)
-
-    x3 = input_layer_lib.Input(shape=(1,))
-    _ = layer(x3)
-    self.assertEqual(len(network.losses), 4)
-
-    x4 = input_layer_lib.Input(shape=(1,))
-    _ = network(x4)
-    self.assertEqual(len(network.losses), 5)
-    self.assertEqual(len(network.get_losses_for(x2)), 1)
-    self.assertEqual(len(network.get_losses_for(x4)), 1)
-    self.assertEqual(len(network.get_losses_for(None)), 1)
-
   @test_util.run_in_graph_and_eager_modes()
   def testTopologicalAttributes(self):
     # test layer attributes / methods related to cross-layer connectivity.
diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py
index 369cd31d656..5557a0078ac 100644
--- a/tensorflow/python/keras/engine/sequential.py
+++ b/tensorflow/python/keras/engine/sequential.py
@@ -217,8 +217,7 @@ class Sequential(training.Model):
       self._init_graph_network(self.inputs, self.outputs, name=self.name)
     else:
       self._layers.append(layer)
-    if self._layers:
-      self._track_layers(self._layers)
+      self._handle_deferred_layer_dependencies([layer])
 
     self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call)
     # Different Model types add to `._layers` in different ways, so for safety
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 749ac0aab5c..d6ef71bac7c 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import numpy as np
 
 from tensorflow.python.data.ops import dataset_ops
@@ -255,7 +256,7 @@ class Model(network.Network, version_utils.VersionSelector):
             will then be the *weighted sum* of all individual losses,
             weighted by the `loss_weights` coefficients.
             If a list, it is expected to have a 1:1 mapping
-            to the model's outputs. If a tensor, it is expected to map
+            to the model's outputs. If a dict, it is expected to map
             output names (strings) to scalar coefficients.
         sample_weight_mode: If you need to do timestep-wise
             sample weighting (2D weights), set this to `"temporal"`.
@@ -370,8 +371,9 @@ class Model(network.Network, version_utils.VersionSelector):
     metrics = []
     if self._is_compiled:
       metrics += self._compile_metric_functions
-    metrics.extend(self._metrics)
-    metrics.extend(_get_metrics_from_layers(self._layers))
+    all_layers = self._gather_unique_layers()
+    for l in all_layers:
+      metrics.extend(l._metrics)  # pylint: disable=protected-access
     return metrics
 
   @property
@@ -1319,7 +1321,7 @@ class Model(network.Network, version_utils.VersionSelector):
     """
     if not self._is_compiled:
       return
-    if sample_weights and any([s is not None for s in sample_weights]):
+    if sample_weights and any(s is not None for s in sample_weights):
       for endpoint in self._training_endpoints:
         endpoint.sample_weight_mode = (
             endpoint.sample_weight_mode or 'samplewise')
@@ -1330,8 +1332,8 @@ class Model(network.Network, version_utils.VersionSelector):
   def _recompile_weights_loss_and_weighted_metrics(self):
     if not self._is_compiled:
       return False
-    recompile = any([e.sample_weights_mismatch()
-                     for e in self._training_endpoints])
+    recompile = any(
+        e.sample_weights_mismatch() for e in self._training_endpoints)
 
     if recompile:
       self._compile_weights_loss_and_weighted_metrics()
@@ -2940,27 +2942,3 @@ def _convert_scipy_sparse_tensor(value, expected_input):
       return sparse_tensor.SparseTensor(indices, data, shape)
   else:
     return value
-
-
-def _get_metrics_from_layers(layers):
-  """Returns list of metrics from the given layers.
-
-  This will not include the `compile` metrics of a model layer.
-
-  Arguments:
-    layers: List of layers.
-
-  Returns:
-    List of metrics.
-  """
-  metrics = []
-  layers = trackable_layer_utils.filter_empty_layer_containers(layers)
-  for layer in layers:
-    if isinstance(layer, Model):
-      # We cannot call 'metrics' on the model because we do not want to
-      # include the metrics that were added in compile API of a nested model.
-      metrics.extend(layer._metrics)  # pylint: disable=protected-access
-      metrics.extend(_get_metrics_from_layers(layer.layers))
-    else:
-      metrics.extend(layer.metrics)
-  return metrics
diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py
index 1fabe36891e..3fdc7237566 100644
--- a/tensorflow/python/keras/engine/training_eager_test.py
+++ b/tensorflow/python/keras/engine/training_eager_test.py
@@ -280,18 +280,6 @@ class CorrectnessTest(keras_parameterized.TestCase):
     history = model.fit(dataset, epochs=1, steps_per_epoch=10)
     self.assertAlmostEqual(history.history['loss'][-1], 0.5836, 4)
 
-  def test_loss_in_call(self):
-
-    class HasLoss(keras.layers.Layer):
-
-      def call(self, x):
-        self.add_loss(x)
-        return x
-
-    layer = HasLoss()
-    layer(1.)  # Plain-value inputs are only valid in eager mode.
-    self.assertEqual(1, len(layer.losses))
-
   @parameterized.named_parameters([
       ('_None', contextlib.contextmanager(lambda: iter([None])), 0., 4.),
       ('_0', lambda: keras.backend.learning_phase_scope(0), 4., 4.),
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 2f268b957c8..3674c1e32ba 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -34,7 +34,6 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import function
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import keras_parameterized
@@ -48,6 +47,7 @@ from tensorflow.python.keras.utils import data_utils
 from tensorflow.python.keras.utils import np_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables as variables_lib
@@ -856,96 +856,6 @@ class TrainingTest(keras_parameterized.TestCase):
     with self.assertRaisesRegex(ValueError, x_pattern):
       model.predict(x_function(use_namedtuple=True), **predict_kwargs)
 
-  @keras_parameterized.run_all_keras_modes
-  @keras_parameterized.run_with_all_model_types
-  def test_activity_regularizer_fit(self):
-    loss = {}
-    for reg in [None, 'l2']:
-      layers = [
-          keras.layers.Dense(
-              10, activation='relu', activity_regularizer=reg,
-              kernel_initializer='ones', use_bias=False),
-          keras.layers.Dense(
-              1, activation='sigmoid', kernel_initializer='ones',
-              use_bias=False),
-      ]
-
-      model = testing_utils.get_model_from_layers(
-          layers, input_shape=(10,))
-
-      x = np.ones((10, 10), 'float32')
-      y = np.ones((10, 1), 'float32')
-
-      optimizer = RMSPropOptimizer(learning_rate=0.001)
-      model.compile(
-          optimizer,
-          'binary_crossentropy',
-          run_eagerly=testing_utils.should_run_eagerly(),
-          experimental_run_tf_function=testing_utils.should_run_tf_function())
-      model.fit(x, y, batch_size=2, epochs=5)
-      loss[reg] = model.evaluate(x, y)
-    self.assertLess(loss[None], loss['l2'])
-
-  @keras_parameterized.run_all_keras_modes
-  @keras_parameterized.run_with_all_model_types
-  def test_activity_regularizer_loss_value(self):
-    layer = keras.layers.Dense(
-        1, kernel_initializer=keras.initializers.zeros(),
-        bias_initializer=keras.initializers.ones(), activity_regularizer='l2')
-
-    model = testing_utils.get_model_from_layers([layer], input_shape=(10,))
-
-    x = np.ones((10, 10), 'float32')
-    y = np.ones((10, 1), 'float32')
-    optimizer = RMSPropOptimizer(learning_rate=0.001)
-    model.compile(
-        optimizer,
-        'binary_crossentropy',
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
-    loss = model.test_on_batch(x, y)
-    self.assertAlmostEqual(0.01, loss, places=4)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_activity_regularizer_batch_independent(self):
-    inputs = keras.layers.Input(shape=(10,))
-    x = keras.layers.Dense(
-        10, activation='relu', activity_regularizer='l2')(
-            inputs)
-    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
-    model = keras.Model(inputs, outputs)
-
-    optimizer = RMSPropOptimizer(learning_rate=0.001)
-    model.compile(
-        optimizer,
-        'binary_crossentropy',
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
-
-    x = np.ones((10, 10), 'float32')
-    y = np.ones((10, 1), 'float32')
-    loss_small_batch = model.test_on_batch(x, y)
-
-    x2 = np.ones((20, 10), 'float32')
-    y2 = np.ones((20, 1), 'float32')
-    loss_big_batch = model.test_on_batch(x2, y2)
-
-    self.assertAlmostEqual(loss_small_batch, loss_big_batch, places=4)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_activity_regularizer_in_model_call(self):
-
-    class MyModel(keras.Model):
-
-      def call(self, inputs):
-        self.add_loss(inputs)
-        return inputs
-
-    x = ops.convert_to_tensor(1.)
-    model = MyModel()
-    _ = model(x)
-    self.assertEqual(1, len(model.losses))
-
   @keras_parameterized.run_all_keras_modes
   def test_custom_mapping_in_config(self):
 
@@ -1140,6 +1050,42 @@ class TrainingTest(keras_parameterized.TestCase):
     # be ~0.15, compared to the correct answer of O(1e-7).
     self.assertLess(history.history['loss'][-1], 1e-6)
 
+  @keras_parameterized.run_all_keras_modes
+  def test_weight_shared_across_layers(self):
+
+    class AddWeightLayer(keras.layers.Layer):
+
+      def __init__(self, trainable_var, non_trainable_var):
+        self.trainable_var = trainable_var
+        self.non_trainable_var = non_trainable_var
+        super(AddWeightLayer, self).__init__()
+
+      def call(self, inputs):
+        return inputs + self.trainable_var
+
+    class LayerWithWeightSharedLayers(keras.layers.Layer):
+
+      def __init__(self):
+        super(LayerWithWeightSharedLayers, self).__init__()
+        shared_trainable_var = resource_variable_ops.ResourceVariable(1.)
+        shared_non_trainable_var = resource_variable_ops.ResourceVariable(
+            1., trainable=False)
+        self.layer1 = AddWeightLayer(shared_trainable_var,
+                                     shared_non_trainable_var)
+        self.layer2 = AddWeightLayer(shared_trainable_var,
+                                     shared_non_trainable_var)
+
+      def call(self, inputs):
+        return self.layer2(self.layer1(inputs))
+
+    l = LayerWithWeightSharedLayers()
+    self.assertEqual(l._layers, [l.layer1, l.layer2])
+    self.assertEqual(l.variables,
+                     [l.layer1.trainable_var, l.layer1.non_trainable_var])
+    self.assertEqual(l.trainable_variables, [l.layer1.trainable_var])
+    self.assertEqual(l.non_trainable_variables, [l.layer1.non_trainable_var])
+    self.assertLen(l.get_weights(), 2)
+
   def test_logs_passed_to_callbacks(self):
     with self.cached_session():
       input_dim = 5
@@ -1474,124 +1420,6 @@ class TrainingTest(keras_parameterized.TestCase):
         '`validation_data` is None.'):
       model.fit(x, y, epochs=4, validation_data=None, validation_steps=3)
 
-  @keras_parameterized.run_all_keras_modes
-  def test_add_loss_correctness(self):
-    class Bias(keras.layers.Layer):
-
-      def build(self, input_shape):
-        self.bias = self.add_variable('bias', (1,), initializer='zeros')
-
-      def call(self, inputs):
-        return inputs + self.bias
-
-    inputs = keras.Input(shape=(1,))
-    targets = keras.Input(shape=(1,))
-    outputs = Bias()(inputs)
-    model = keras.Model([inputs, targets], outputs)
-
-    model.add_loss(2 * math_ops.reduce_mean(
-        keras.losses.mean_absolute_error(targets, outputs)))
-
-    model.add_loss(keras.losses.MeanAbsoluteError()(targets, outputs))
-
-    model.compile(
-        keras.optimizer_v2.gradient_descent.SGD(0.025),
-        loss=keras.losses.MeanAbsoluteError(),
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
-
-    x = np.array([[0.], [1.], [2.]])
-    y = np.array([[0.5], [2.], [3.5]])
-    history = model.fit([x, y], y, batch_size=3, epochs=5)
-    self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_add_loss_with_sample_weight_correctness(self):
-
-    class Bias(keras.layers.Layer):
-
-      def build(self, input_shape):
-        self.bias = self.add_variable('bias', (1,), initializer='zeros')
-
-      def call(self, inputs):
-        return inputs + self.bias
-
-    inputs = keras.Input(shape=(1,))
-    targets = keras.Input(shape=(1,))
-    sw = keras.Input(shape=(1,))
-    outputs = Bias()(inputs)
-    model = keras.Model([inputs, targets, sw], outputs)
-
-    model.add_loss(2 * math_ops.reduce_mean(
-        sw * keras.losses.mean_absolute_error(targets, outputs)))
-    model.add_loss(keras.losses.MeanAbsoluteError()(targets, outputs, sw))
-    model.compile(
-        keras.optimizer_v2.gradient_descent.SGD(0.025),
-        loss=keras.losses.MeanAbsoluteError(),
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
-
-    x = np.array([[0.], [1.], [2.]])
-    y = np.array([[0.5], [2.], [3.5]])
-    w = np.array([1.25, 0.5, 1.25])
-    history = model.fit([x, y, w], y, batch_size=3, epochs=5, sample_weight=w)
-    self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_unconditional_add_loss_correctness(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def call(self, inputs, training=None):
-        # Reachable from the inputs but marked as unconditional.
-        self.add_loss(math_ops.reduce_sum(inputs))
-        return inputs
-
-    inputs = keras.Input((3,))
-    layer = MyLayer()
-    outputs = layer(inputs)
-    model = keras.Model(inputs, outputs)
-    self.assertEqual(len(model.losses), 1)
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
-    loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-    self.assertEqual(loss, 2 * 3)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_clear_losses(self):
-
-    class LayerWithSharedNestedLossLayer(keras.layers.Layer):
-
-      def __init__(self):
-        super(LayerWithSharedNestedLossLayer, self).__init__()
-        self.loss_layer = keras.layers.ActivityRegularization(l2=0.001)
-        self.add_weight(shape=(1,), regularizer='l2')
-
-      def call(self, x):
-        x = self.loss_layer(x)
-        return self.loss_layer(x)
-
-    inputs = keras.Input(shape=(1,))
-    outputs = LayerWithSharedNestedLossLayer()(inputs)
-    model = keras.Model(inputs, outputs)
-    # Weight loss + 2 activity losses.
-    self.assertEqual(len(model.losses), 3)
-
-    x = array_ops.ones((1, 1))
-    model(x)
-    y = array_ops.ones((1, 1))
-    model(y)
-    if context.executing_eagerly():
-      # Eager losses are cleared every `__call__`.
-      self.assertEqual(len(model.losses), 3)
-    else:
-      self.assertEqual(len(model.get_losses_for(x)), 2)
-      self.assertEqual(len(model.get_losses_for(y)), 2)
-      self.assertEqual(len(model.get_losses_for(None)), 1)
-
   @keras_parameterized.run_with_all_model_types
   @keras_parameterized.run_all_keras_modes
   def test_layer_with_variable_output(self):
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index ca576935a64..3b42bff8d30 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -253,7 +253,7 @@ class SliceAggregator(Aggregator):
     shape = (self.num_samples,) + batch_element.shape[1:]
     dtype = batch_element.dtype
     if isinstance(batch_element, ops.EagerTensor):
-      dtype = dtype.as_numpy_dtype()
+      dtype = dtype.as_numpy_dtype
 
     self.results = np.empty(shape=shape, dtype=dtype)
 
diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py
index 69acc360054..29ddc2c1a90 100644
--- a/tensorflow/python/keras/engine/training_v1.py
+++ b/tensorflow/python/keras/engine/training_v1.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import numpy as np
 
 from tensorflow.python import tf2
@@ -1492,7 +1493,7 @@ class Model(training_lib.Model):
     """
     if not self._is_compiled:
       return
-    if sample_weights and any([s is not None for s in sample_weights]):
+    if sample_weights and any(s is not None for s in sample_weights):
       for endpoint in self._training_endpoints:
         endpoint.sample_weight_mode = (
             endpoint.sample_weight_mode or 'samplewise')
diff --git a/tensorflow/python/keras/engine/training_v2.py b/tensorflow/python/keras/engine/training_v2.py
index a920b4f484e..e994a8cd187 100644
--- a/tensorflow/python/keras/engine/training_v2.py
+++ b/tensorflow/python/keras/engine/training_v2.py
@@ -239,7 +239,7 @@ class Loop(training_utils.TrainingLoop):
       do_validation = (validation_adapter is not None)
 
       recreate_training_iterator = (
-          training_data_adapter.should_recreate_iterator(steps_per_epoch))
+          training_data_adapter.should_recreate_iterator())
       if not steps_per_epoch:
         # TODO(b/139762795): Add step inference for when steps is None to
         # prevent end of sequence warning message.
@@ -549,6 +549,7 @@ def _process_training_inputs(model,
         x,
         y,
         batch_size=batch_size,
+        steps=steps_per_epoch,
         epochs=epochs,
         sample_weights=sample_weights,
         sample_weight_modes=sample_weight_modes,
@@ -558,6 +559,7 @@ def _process_training_inputs(model,
     val_adapter = adapter_cls(
         val_x,
         val_y,
+        steps=validation_steps,
         sample_weights=val_sample_weights,
         sample_weight_modes=sample_weight_modes,
         batch_size=batch_size,
@@ -570,10 +572,10 @@ def _process_training_inputs(model,
         y,
         sample_weights=sample_weights,
         batch_size=batch_size,
+        steps=steps_per_epoch,
         epochs=epochs,
         class_weights=class_weights,
         shuffle=shuffle,
-        steps=steps_per_epoch,
         distribution_strategy=distribution_strategy,
         max_queue_size=max_queue_size,
         workers=workers,
@@ -594,10 +596,10 @@ def _process_training_inputs(model,
           ModeKeys.TEST,
           val_x,
           val_y,
+          steps=validation_steps,
           sample_weights=val_sample_weights,
           batch_size=batch_size,
           class_weights=class_weights,
-          steps=validation_steps,
           distribution_strategy=distribution_strategy)
     elif validation_steps:
       raise ValueError('`validation_steps` should not be specified if '
diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py
index 07cb1bdf1b3..3f648b46bff 100644
--- a/tensorflow/python/keras/layers/__init__.py
+++ b/tensorflow/python/keras/layers/__init__.py
@@ -44,6 +44,7 @@ else:
   from tensorflow.python.keras.layers.preprocessing.text_vectorization_v1 import TextVectorization
   from tensorflow.python.keras.layers.preprocessing.text_vectorization import TextVectorization as TextVectorizationV2
   TextVectorizationV1 = TextVectorization
+from tensorflow.python.keras.layers.preprocessing.image_preprocessing import Rescaling
 
 # Advanced activations.
 from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 35297bb2fc0..8f1e5a715a5 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -592,8 +592,7 @@ class Flatten(Layer):
     if (self.data_format == 'channels_first'
         and K.ndim(inputs) is not None and K.ndim(inputs) > 1):
       permutation = [0]
-      permutation.extend([i for i in
-                          range(2, K.ndim(inputs))])
+      permutation.extend(range(2, K.ndim(inputs)))
       permutation.append(1)
       inputs = array_ops.transpose(inputs, perm=permutation)
 
@@ -630,7 +629,7 @@ class Flatten(Layer):
       output_shape = tensor_shape.TensorShape([1])
     else:
       output_shape = [input_shape[0]]
-    if all(input_shape[1:]):
+    if np.all(input_shape[1:]):
       output_shape += [np.prod(input_shape[1:], dtype=int)]
     else:
       output_shape += [None]
@@ -858,7 +857,7 @@ class Lambda(Layer):
     untracked_new_vars = [v for v in created_variables
                           if v.experimental_ref() not in tracked_weights]
     if untracked_new_vars:
-      variable_str = '\n'.join(['  {}'.format(i) for i in untracked_new_vars])
+      variable_str = '\n'.join('  {}'.format(i) for i in untracked_new_vars)
       error_str = textwrap.dedent(
           '''
           The following Variables were created within a Lambda layer ({name})
@@ -875,7 +874,7 @@ class Lambda(Layer):
     untracked_used_vars = [v for v in accessed_variables
                            if v.experimental_ref() not in tracked_weights]
     if untracked_used_vars and not self._already_warned:
-      variable_str = '\n'.join(['  {}'.format(i) for i in untracked_used_vars])
+      variable_str = '\n'.join('  {}'.format(i) for i in untracked_used_vars)
       self._warn(textwrap.dedent(
           '''
           The following Variables were used a Lambda layer's call ({name}), but
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index 1c20918ffc8..47c8ccdcf68 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 import tempfile
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py
index ff5e62d0d8b..ba249d7a9a2 100644
--- a/tensorflow/python/keras/layers/dense_attention.py
+++ b/tensorflow/python/keras/layers/dense_attention.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine.base_layer import Layer
+from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -46,6 +47,8 @@ class BaseDenseAttention(Layer):
     causal: Boolean. Set to `True` for decoder self-attention. Adds a mask such
       that position `i` cannot attend to positions `j > i`. This prevents the
       flow of information from the future towards the past.
+    dropout: Float between 0 and 1. Fraction of the units to drop for the
+      attention scores.
 
   Call Arguments:
 
@@ -62,15 +65,18 @@ class BaseDenseAttention(Layer):
       * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`.
         If given, will apply the mask such that values at positions where
         `mask==False` do not contribute to the result.
+    training: Python boolean indicating whether the layer should behave in
+      training mode (adding dropout) or in inference mode (no dropout).
 
   Output shape:
 
     Attention outputs of shape `[batch_size, Tq, dim]`.
   """
 
-  def __init__(self, causal=False, **kwargs):
+  def __init__(self, causal=False, dropout=0.0, **kwargs):
     super(BaseDenseAttention, self).__init__(**kwargs)
     self.causal = causal
+    self.dropout = dropout
     self.supports_masking = True
 
   def _calculate_scores(self, query, key):
@@ -79,12 +85,13 @@ class BaseDenseAttention(Layer):
     Args:
       query: Query tensor of shape `[batch_size, Tq, dim]`.
       key: Key tensor of shape `[batch_size, Tv, dim]`.
+
     Returns:
       Tensor of shape `[batch_size, Tq, Tv]`.
     """
     return NotImplementedError
 
-  def _apply_scores(self, scores, value, scores_mask=None):
+  def _apply_scores(self, scores, value, scores_mask=None, training=None):
     """Applies attention scores to the given value tensor.
 
     To use this method in your attention layer, follow the steps:
@@ -103,6 +110,8 @@ class BaseDenseAttention(Layer):
         `[batch_size, Tq, Tv]`. If given, scores at positions where
         `scores_mask==False` do not contribute to the result. It must contain
         at least one `True` value in each line along the last dimension.
+      training: Python boolean indicating whether the layer should behave in
+        training mode (adding dropout) or in inference mode (no dropout).
 
     Returns:
       Tensor of shape `[batch_size, Tq, dim]`.
@@ -111,11 +120,21 @@ class BaseDenseAttention(Layer):
       padding_mask = math_ops.logical_not(scores_mask)
       # Bias so padding positions do not contribute to attention distribution.
       scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx())
-    attention_distribution = nn.softmax(scores)
-    return math_ops.matmul(attention_distribution, value)
+    if training is None:
+      training = K.learning_phase()
+    weights = nn.softmax(scores)
+
+    def dropped_weights():
+      return nn.dropout(weights, rate=self.dropout)
+
+    weights = tf_utils.smart_cond(
+        training,
+        dropped_weights,
+        lambda: array_ops.identity(weights))
+    return math_ops.matmul(weights, value)
 
   # TODO(b/125916026): Consider exposing a __call__ method with named args.
-  def call(self, inputs, mask=None):
+  def call(self, inputs, mask=None, training=None):
     self._validate_call_args(inputs=inputs, mask=mask)
     q = inputs[0]
     v = inputs[1]
@@ -139,7 +158,8 @@ class BaseDenseAttention(Layer):
     else:
       causal_mask = None
     scores_mask = _merge_masks(v_mask, causal_mask)
-    result = self._apply_scores(scores=scores, value=v, scores_mask=scores_mask)
+    result = self._apply_scores(
+        scores=scores, value=v, scores_mask=scores_mask, training=training)
     if q_mask is not None:
       # Mask of shape [batch_size, Tq, 1].
       q_mask = array_ops.expand_dims(q_mask, axis=-1)
@@ -178,7 +198,10 @@ class BaseDenseAttention(Layer):
             'value_mask]. Given length: {}'.format(class_name, len(mask)))
 
   def get_config(self):
-    config = {'causal': self.causal}
+    config = {
+        'causal': self.causal,
+        'dropout': self.dropout,
+    }
     base_config = super(BaseDenseAttention, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
 
@@ -196,7 +219,7 @@ class Attention(BaseDenseAttention):
   2. Use scores to calculate a distribution with shape
      `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`.
   3. Use `distribution` to create a linear combination of `value` with
-     shape `batch_size, Tq, dim]`:
+     shape `[batch_size, Tq, dim]`:
      `return tf.matmul(distribution, value)`.
 
   Args:
@@ -205,6 +228,8 @@ class Attention(BaseDenseAttention):
     causal: Boolean. Set to `True` for decoder self-attention. Adds a mask such
       that position `i` cannot attend to positions `j > i`. This prevents the
       flow of information from the future towards the past.
+    dropout: Float between 0 and 1. Fraction of the units to drop for the
+      attention scores.
 
   Call Arguments:
 
@@ -221,6 +246,8 @@ class Attention(BaseDenseAttention):
       * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`.
         If given, will apply the mask such that values at positions where
         `mask==False` do not contribute to the result.
+    training: Python boolean indicating whether the layer should behave in
+      training mode (adding dropout) or in inference mode (no dropout).
 
   Output shape:
 
@@ -336,6 +363,8 @@ class AdditiveAttention(BaseDenseAttention):
     causal: Boolean. Set to `True` for decoder self-attention. Adds a mask such
       that position `i` cannot attend to positions `j > i`. This prevents the
       flow of information from the future towards the past.
+    dropout: Float between 0 and 1. Fraction of the units to drop for the
+      attention scores.
 
   Call Arguments:
 
@@ -352,6 +381,8 @@ class AdditiveAttention(BaseDenseAttention):
       * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`.
         If given, will apply the mask such that values at positions where
         `mask==False` do not contribute to the result.
+    training: Python boolean indicating whether the layer should behave in
+      training mode (adding dropout) or in inference mode (no dropout).
 
   Output shape:
 
@@ -375,7 +406,7 @@ class AdditiveAttention(BaseDenseAttention):
   # Query embeddings of shape [batch_size, Tq, dimension].
   query_embeddings = token_embedding(query_input)
   # Value embeddings of shape [batch_size, Tv, dimension].
-  value_embeddings = token_embedding(query_input)
+  value_embeddings = token_embedding(value_input)
 
   # CNN layer.
   cnn_layer = tf.keras.layers.Conv1D(
diff --git a/tensorflow/python/keras/layers/dense_attention_test.py b/tensorflow/python/keras/layers/dense_attention_test.py
index ef1dfd9109e..54748760124 100644
--- a/tensorflow/python/keras/layers/dense_attention_test.py
+++ b/tensorflow/python/keras/layers/dense_attention_test.py
@@ -121,6 +121,22 @@ class BaseDenseAttentionTest(test.TestCase):
     expected = np.array([[[1.6]], [[2.6]]], dtype=np.float32)
     self.assertAllClose(expected, actual)
 
+  def test_shape_with_dropout(self):
+    # scores: Scores float tensor of shape `[batch_size, tq, tv]`.
+    # value: Value tensor of shape `[batch_size, tv, dim]`.
+    batch_size = 4
+    tq = 5
+    tv = 6
+    dim = 7
+    scores = np.ones((batch_size, tq, tv))
+    value = np.ones((batch_size, tv, dim))
+    actual = dense_attention.BaseDenseAttention(dropout=0.1)._apply_scores(
+        scores=scores, value=value, training=False)
+
+    # Expected Tensor of shape `[batch_size, tq, dim]`.
+    expected_shape = [batch_size, tq, dim]
+    self.assertAllEqual(expected_shape, array_ops.shape(actual))
+
   def test_serialization(self):
     # Test serialization with causal
     layer = dense_attention.BaseDenseAttention(causal=True)
diff --git a/tensorflow/python/keras/layers/merge.py b/tensorflow/python/keras/layers/merge.py
index a35b567f23f..a8ca03f858f 100644
--- a/tensorflow/python/keras/layers/merge.py
+++ b/tensorflow/python/keras/layers/merge.py
@@ -387,7 +387,7 @@ class Concatenate(_Merge):
                  'except for the concat axis. Got inputs shapes: %s' %
                  input_shape)
       # Make sure all the shapes have same ranks.
-      ranks = set([len(shape) for shape in shape_set])
+      ranks = set(len(shape) for shape in shape_set)
       if len(ranks) != 1:
         raise ValueError(err_msg)
       # Get the only rank for the set.
@@ -395,8 +395,8 @@ class Concatenate(_Merge):
       for axis in range(rank):
         # Skip the Nones in the shape since they are dynamic, also the axis for
         # concat has been removed above.
-        unique_dims = set([shape[axis] for shape in shape_set
-                           if shape[axis] is not None])
+        unique_dims = set(
+            shape[axis] for shape in shape_set if shape[axis] is not None)
         if len(unique_dims) > 1:
           raise ValueError(err_msg)
 
diff --git a/tensorflow/python/keras/layers/preprocessing/categorical.py b/tensorflow/python/keras/layers/preprocessing/categorical.py
new file mode 100644
index 00000000000..8572a6002f8
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/categorical.py
@@ -0,0 +1,106 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keras categorical preprocessing layers."""
+# pylint: disable=g-classes-have-attributes
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.keras.engine.base_layer import Layer
+from tensorflow.python.ops import lookup_ops
+
+
+class CategoryLookup(Layer):
+  """Category lookup layer.
+
+  This layer looks up tokens (int or string) in a vocabulary table,
+  and return their indices (int). It converts a sequence of int or string to a
+  sequence of int.
+
+  Attributes:
+    vocabulary: The vocabulary to lookup the input. If it is a file, it
+      represents the source vocab file; If it is a list/tuple, it represents the
+      source vocab list. If it is None, the vocabulary can later be set.
+    max_tokens: The maximum size of the vocabulary for this layer. If None,
+      there is no cap on the size of the vocabulary. This is used when `adapt`
+      is called.
+    num_oov_tokens: Non-negative integer. The number of out-of-vocab tokens. All
+      out-of-vocab inputs will be assigned IDs in the range of [0,
+      num_oov_tokens) based on a hash.
+    name: Name to give to the layer.
+    **kwargs: Keyword arguments to construct a layer.
+  Input shape: A string or int tensor of shape `[batch_size, d1, ..., dm]`
+  Output shape: An int tensor of shape `[batch_size, d1, .., dm]`
+  Example: Consider a batch of a single input sample, `[["a", "c", "d", "a",
+    "x"]]`. Let's say the vocabulary is `["a", "b", "c", "d"]` and a single OOV
+    token is used (`num_oov_tokens=1`). Then the corresponding output is `[[1,
+    3, 4, 1, 0]]`. 0 stands for an OOV token.
+  """
+
+  def __init__(self,
+               max_tokens=None,
+               num_oov_tokens=1,
+               vocabulary=None,
+               name=None,
+               **kwargs):
+    if max_tokens is not None:
+      raise ValueError('`max_tokens` and `adapt` is not supported yet.')
+    if vocabulary is None:
+      raise ValueError('for now, you must pass a `vocabulary` argument')
+    self.max_tokens = max_tokens
+    self.num_oov_tokens = num_oov_tokens
+    self.vocabulary = vocabulary
+    super(CategoryLookup, self).__init__(name, **kwargs)
+
+  def __call__(self, inputs, *args, **kwargs):
+    if isinstance(inputs, (np.ndarray, float, int)):
+      inputs = ops.convert_to_tensor(inputs)
+    self._input_dtype = inputs.dtype
+    return super(CategoryLookup, self).__call__(inputs, *args, **kwargs)
+
+  def build(self, input_shape):
+    # categorical with vocabulary list.
+    if isinstance(self.vocabulary, (tuple, list, np.ndarray)):
+      self.table = lookup_ops.index_table_from_tensor(
+          vocabulary_list=self.vocabulary,
+          num_oov_buckets=self.num_oov_tokens,
+          dtype=self._input_dtype)
+    # categorical with vocabulary file.
+    elif self.vocabulary:
+      self.table = lookup_ops.index_table_from_file(
+          vocabulary_file=self.vocabulary,
+          num_oov_buckets=self.num_oov_tokens,
+          key_dtype=self._input_dtype)
+
+  def call(self, inputs):
+    return self.table.lookup(inputs)
+
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
+  def compute_output_signature(self, input_spec):
+    output_shape = self.compute_output_shape(input_spec.shape.as_list())
+    output_dtype = dtypes.int64
+    if isinstance(input_spec, sparse_tensor.SparseTensorSpec):
+      return sparse_tensor.SparseTensorSpec(
+          shape=output_shape, dtype=output_dtype)
+    else:
+      return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
diff --git a/tensorflow/python/keras/layers/preprocessing/categorical_test.py b/tensorflow/python/keras/layers/preprocessing/categorical_test.py
new file mode 100644
index 00000000000..78a08e9946c
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/categorical_test.py
@@ -0,0 +1,145 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for image preprocessing layers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras.layers.preprocessing import categorical
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+class CategoryLookupVocabListTest(keras_parameterized.TestCase):
+
+  def test_vocab_list_basic(self):
+    vocabulary_list = ['A', 'B', 'C', 'D', 'E']
+    layer = categorical.CategoryLookup(
+        vocabulary=vocabulary_list, num_oov_tokens=0)
+    inp = np.asarray([['A', 'D'], ['E', 'C'], ['D', 'A']])
+    output = layer(inp)
+    self.assertAllClose(np.asarray([[0, 3], [4, 2], [3, 0]]), output)
+
+  def test_vocab_list_unknown_input(self):
+    vocabulary_list = ['A', 'B', 'C', 'D', 'E']
+    layer = categorical.CategoryLookup(vocabulary=vocabulary_list)
+    inp = np.asarray([['A', ''], ['E', 'C'], ['D', 'A']])
+    output = layer(inp)
+    self.assertAllClose(np.asarray([[0, 5], [4, 2], [3, 0]]), output)
+
+  def test_vocab_list_invalid_input(self):
+    vocabulary_list = ['A', 'B', 'C', 'D', 'E']
+    layer = categorical.CategoryLookup(
+        vocabulary=vocabulary_list, num_oov_tokens=0)
+    inp = np.asarray([['A', ''], ['E', 'C'], ['D', 'A']])
+    output = layer(inp)
+    self.assertAllClose(np.asarray([[0, -1], [4, 2], [3, 0]]), output)
+
+  def test_vocab_list_compute_output_signature(self):
+    input_shape = tensor_shape.TensorShape([2, 3])
+    input_spec = tensor_spec.TensorSpec(input_shape, dtypes.string)
+    vocabulary_list = ['A', 'B', 'C', 'D', 'E']
+    layer = categorical.CategoryLookup(
+        vocabulary=vocabulary_list, num_oov_tokens=0)
+    output_spec = layer.compute_output_signature(input_spec)
+    self.assertEqual(output_spec.shape.dims, input_shape.dims)
+    self.assertEqual(output_spec.dtype, dtypes.int64)
+
+  def test_vocab_list_sparse_input(self):
+    vocabulary_list = ['A', 'B', 'C', 'D', 'E']
+    layer = categorical.CategoryLookup(
+        vocabulary=vocabulary_list, num_oov_tokens=0)
+    inp = np.asarray([['A', ''], ['E', 'C'], ['D', 'A']])
+    indices = array_ops.where_v2(math_ops.not_equal(inp, ''))
+    sp_inp = sparse_tensor.SparseTensor(
+        indices,
+        array_ops.gather_nd_v2(inp, indices),
+        dense_shape=array_ops.shape_v2(inp, out_type=dtypes.int64))
+    output = layer(sp_inp)
+    self.assertIsInstance(output, sparse_tensor.SparseTensor)
+    self.assertAllClose(
+        np.asarray([[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]), output.indices)
+    self.assertAllClose(np.asarray([0, 4, 2, 3, 0]), output.values)
+
+
+@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+class CategoryLookupVocabFileTest(keras_parameterized.TestCase):
+
+  def setUp(self):
+    super(CategoryLookupVocabFileTest, self).setUp()
+
+    # Contains strings, character names from 'The Wire': omar, stringer, marlo
+    self._wire_vocabulary_file_name = test.test_src_dir_path(
+        'python/keras/layers/preprocessing/testdata/wire_vocabulary.txt')
+    self._wire_vocabulary_size = 3
+
+  def test_vocab_file_basic(self):
+    layer = categorical.CategoryLookup(
+        vocabulary=self._wire_vocabulary_file_name, num_oov_tokens=0)
+    inp = np.asarray([['marlo', 'omar'], ['stringer', 'omar']])
+    output = layer(inp)
+    self.assertAllClose(np.asarray([[2, 0], [1, 0]]), output)
+
+  def test_vocab_file_unknown_input(self):
+    layer = categorical.CategoryLookup(
+        vocabulary=self._wire_vocabulary_file_name)
+    inp = np.asarray([['marlo', 'omar'], ['skywalker', 'omar']])
+    output = layer(inp)
+    self.assertAllClose(np.asarray([[2, 0], [3, 0]]), output)
+
+  def test_vocab_file_invalid_input(self):
+    layer = categorical.CategoryLookup(
+        vocabulary=self._wire_vocabulary_file_name, num_oov_tokens=0)
+    inp = np.asarray([['marlo', 'omar'], ['skywalker', 'omar']])
+    output = layer(inp)
+    self.assertAllClose(np.asarray([[2, 0], [-1, 0]]), output)
+
+  def test_vocab_file_compute_output_signature(self):
+    input_shape = tensor_shape.TensorShape([2, 3])
+    input_spec = tensor_spec.TensorSpec(input_shape, dtypes.string)
+    layer = categorical.CategoryLookup(
+        vocabulary=self._wire_vocabulary_file_name, num_oov_tokens=0)
+    output_spec = layer.compute_output_signature(input_spec)
+    self.assertEqual(output_spec.shape.dims, input_shape.dims)
+    self.assertEqual(output_spec.dtype, dtypes.int64)
+
+  def test_vocab_list_sparse_input(self):
+    layer = categorical.CategoryLookup(
+        vocabulary=self._wire_vocabulary_file_name, num_oov_tokens=0)
+    inp = np.asarray([['omar', ''], ['stringer', 'marlo'], ['marlo', 'omar']])
+    indices = array_ops.where_v2(math_ops.not_equal(inp, ''))
+    sp_inp = sparse_tensor.SparseTensor(
+        indices,
+        array_ops.gather_nd_v2(inp, indices),
+        dense_shape=array_ops.shape_v2(inp, out_type=dtypes.int64))
+    output = layer(sp_inp)
+    self.assertIsInstance(output, sparse_tensor.SparseTensor)
+    self.assertAllClose(
+        np.asarray([[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]), output.indices)
+    self.assertAllClose(np.asarray([0, 1, 2, 2, 0]), output.values)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
index 53822544eab..4504505cd60 100644
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
+++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
@@ -313,6 +313,143 @@ class Rescaling(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
+class RandomFlip(Layer):
+  """Randomly flip each image horizontally and vertically.
+
+  This layer will by default flip the images horizontally and then vertically
+  during training time.
+  `RandomFlip(horizontal=True)` will only flip the input horizontally.
+  `RandomFlip(vertical=True)` will only flip the input vertically.
+  During inference time, the output will be identical to input. Call the layer
+  with `training=True` to flip the input.
+
+  Input shape:
+    4D tensor with shape:
+    `(samples, height, width, channels)`, data_format='channels_last'.
+
+  Output shape:
+    4D tensor with shape:
+    `(samples, height, width, channels)`, data_format='channels_last'.
+
+  Attributes:
+    horizontal: Bool, whether to randomly flip horizontally.
+    width: Bool, whether to randomly flip vertically.
+    seed: Integer. Used to create a random seed.
+  """
+
+  def __init__(self, horizontal=None, vertical=None, seed=None, **kwargs):
+    # If both arguments are None, set both to True.
+    if horizontal is None and vertical is None:
+      self.horizontal = True
+      self.vertical = True
+    else:
+      self.horizontal = horizontal or False
+      self.vertical = vertical or False
+    self.seed = seed
+    self._rng = make_generator(self.seed)
+    self.input_spec = InputSpec(ndim=4)
+    super(RandomFlip, self).__init__(**kwargs)
+
+  def call(self, inputs, training=None):
+    if training is None:
+      training = K.learning_phase()
+
+    def random_flipped_inputs():
+      flipped_outputs = inputs
+      if self.horizontal:
+        flipped_outputs = image_ops.random_flip_up_down(flipped_outputs,
+                                                        self.seed)
+      if self.vertical:
+        flipped_outputs = image_ops.random_flip_left_right(
+            flipped_outputs, self.seed)
+      return flipped_outputs
+
+    output = tf_utils.smart_cond(training, random_flipped_inputs,
+                                 lambda: inputs)
+    output.set_shape(inputs.shape)
+    return output
+
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
+  def get_config(self):
+    config = {
+        'horizontal': self.horizontal,
+        'vertical': self.vertical,
+        'seed': self.seed,
+    }
+    base_config = super(RandomFlip, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+
+class RandomContrast(Layer):
+  """Adjust the contrast of an image or images by a random factor.
+
+  Contrast is adjusted independently for each channel of each image during
+  training.
+
+  For each channel, this layer computes the mean of the image pixels in the
+  channel and then adjusts each component `x` of each pixel to
+  `(x - mean) * contrast_factor + mean`.
+
+  Input shape:
+    4D tensor with shape:
+    `(samples, height, width, channels)`, data_format='channels_last'.
+
+  Output shape:
+    4D tensor with shape:
+    `(samples, height, width, channels)`, data_format='channels_last'.
+
+  Attributes:
+    factor: a positive float represented as fraction of value, or a tuple
+      of size 2 representing lower and upper bound. When represented as a
+      single float, lower = upper. The contrast factor will be randomly picked
+      between [1.0 - lower, 1.0 + upper].
+    seed: Integer. Used to create a random seed.
+  Raise:
+    ValueError: if lower bound is not between [0, 1], or upper bound is
+      negative.
+  """
+
+  def __init__(self, factor, seed=None, **kwargs):
+    self.factor = factor
+    if isinstance(factor, (tuple, list)):
+      self.lower = factor[0]
+      self.upper = factor[1]
+    else:
+      self.lower = self.upper = factor
+    if self.lower < 0. or self.upper < 0. or self.lower > 1.:
+      raise ValueError('Factor cannot have negative values, '
+                       'got {}'.format(factor))
+    self.seed = seed
+    self.input_spec = InputSpec(ndim=4)
+    super(RandomContrast, self).__init__(**kwargs)
+
+  def call(self, inputs, training=None):
+    if training is None:
+      training = K.learning_phase()
+
+    def random_contrasted_inputs():
+      return image_ops.random_contrast(inputs, 1. - self.lower, 1. + self.upper,
+                                       self.seed)
+
+    output = tf_utils.smart_cond(training, random_contrasted_inputs,
+                                 lambda: inputs)
+    output.set_shape(inputs.shape)
+    return output
+
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
+  def get_config(self):
+    config = {
+        'factor': self.factor,
+        'seed': self.seed,
+    }
+    base_config = super(RandomContrast, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+
 def make_generator(seed=None):
   if seed:
     return stateful_random_ops.Generator.from_seed(seed)
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
index 56f3ae697ac..3249136753e 100644
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
@@ -286,5 +286,178 @@ class RescalingTest(keras_parameterized.TestCase):
     self.assertEqual(layer_1.name, layer.name)
 
 
+@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+class RandomFlipTest(keras_parameterized.TestCase):
+
+  def _run_test(self,
+                flip_horizontal,
+                flip_vertical,
+                expected_output=None,
+                mock_random=None):
+    np.random.seed(1337)
+    num_samples = 2
+    orig_height = 5
+    orig_width = 8
+    channels = 3
+    if mock_random is None:
+      mock_random = [1 for _ in range(num_samples)]
+      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
+    inp = np.random.random((num_samples, orig_height, orig_width, channels))
+    if expected_output is None:
+      expected_output = inp
+      if flip_horizontal:
+        expected_output = np.flip(expected_output, axis=1)
+      if flip_vertical:
+        expected_output = np.flip(expected_output, axis=2)
+    with test.mock.patch.object(
+        random_ops, 'random_uniform', return_value=mock_random):
+      with tf_test_util.use_gpu():
+        layer = image_preprocessing.RandomFlip(flip_horizontal, flip_vertical)
+        actual_output = layer(inp, training=1)
+        self.assertAllClose(expected_output, actual_output)
+
+  @parameterized.named_parameters(('random_flip_horizontal', True, False),
+                                  ('random_flip_vertical', False, True),
+                                  ('random_flip_both', True, True),
+                                  ('random_flip_neither', False, False))
+  def test_random_flip(self, flip_horizontal, flip_vertical):
+    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
+      self._run_test(flip_horizontal, flip_vertical)
+
+  def test_random_flip_horizontal_half(self):
+    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
+      np.random.seed(1337)
+      mock_random = [1, 0]
+      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
+      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
+      expected_output = input_images.copy()
+      expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=0)
+      self._run_test(True, False, expected_output, mock_random)
+
+  def test_random_flip_vertical_half(self):
+    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
+      np.random.seed(1337)
+      mock_random = [1, 0]
+      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
+      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
+      expected_output = input_images.copy()
+      expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=1)
+      self._run_test(False, True, expected_output, mock_random)
+
+  def test_random_flip_inference(self):
+    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
+      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
+      expected_output = input_images
+      with tf_test_util.use_gpu():
+        layer = image_preprocessing.RandomFlip(True, True)
+        actual_output = layer(input_images, training=0)
+        self.assertAllClose(expected_output, actual_output)
+
+  def test_random_flip_default(self):
+    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
+      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
+      expected_output = np.flip(np.flip(input_images, axis=1), axis=2)
+      mock_random = [1, 1]
+      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
+      with test.mock.patch.object(
+          random_ops, 'random_uniform', return_value=mock_random):
+        with self.cached_session(use_gpu=True):
+          layer = image_preprocessing.RandomFlip()
+          actual_output = layer(input_images, training=1)
+          self.assertAllClose(expected_output, actual_output)
+
+  @tf_test_util.run_v2_only
+  def test_config_with_custom_name(self):
+    layer = image_preprocessing.RandomFlip(5, 5, name='image_preproc')
+    config = layer.get_config()
+    layer_1 = image_preprocessing.RandomFlip.from_config(config)
+    self.assertEqual(layer_1.name, layer.name)
+
+
+@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+class RandomContrastTest(keras_parameterized.TestCase):
+
+  def _run_test(self,
+                lower,
+                upper,
+                expected_output=None,
+                mock_random=None):
+    np.random.seed(1337)
+    num_samples = 2
+    orig_height = 5
+    orig_width = 8
+    channels = 3
+    if mock_random is None:
+      mock_random = 0.2
+    inp = np.random.random((num_samples, orig_height, orig_width, channels))
+    if expected_output is None:
+      # reduce mean on height.
+      inp_mean = np.mean(inp, axis=1, keepdims=True)
+      # reduce mean on width.
+      inp_mean = np.mean(inp_mean, axis=2, keepdims=True)
+      expected_output = (inp - inp_mean) * mock_random + inp_mean
+    with test.mock.patch.object(
+        random_ops, 'random_uniform', return_value=mock_random):
+      with tf_test_util.use_gpu():
+        layer = image_preprocessing.RandomContrast((lower, upper))
+        actual_output = layer(inp, training=True)
+        self.assertAllClose(expected_output, actual_output)
+
+  @parameterized.named_parameters(
+      ('random_contrast_2_by_5', 0.2, 0.5),
+      ('random_contrast_2_by_13', 0.2, 1.3),
+      ('random_contrast_5_by_2', 0.5, 0.2))
+  def test_random_contrast(self, lower, upper):
+    with CustomObjectScope(
+        {'RandomContrast': image_preprocessing.RandomContrast}):
+      self._run_test(lower, upper)
+
+  @parameterized.named_parameters(
+      ('random_contrast_amplitude_2', 0.2),
+      ('random_contrast_amplitude_5', 0.5))
+  def test_random_contrast_amplitude(self, amplitude):
+    with CustomObjectScope(
+        {'RandomContrast': image_preprocessing.RandomContrast}):
+      input_images = np.random.random((2, 5, 8, 3))
+      with tf_test_util.use_gpu():
+        layer = image_preprocessing.RandomContrast(amplitude)
+        layer(input_images)
+
+  def test_random_contrast_inference(self):
+    with CustomObjectScope(
+        {'RandomContrast': image_preprocessing.RandomContrast}):
+      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
+      expected_output = input_images
+      with tf_test_util.use_gpu():
+        layer = image_preprocessing.RandomContrast((0.1, 0.2))
+        actual_output = layer(input_images, training=False)
+        self.assertAllClose(expected_output, actual_output)
+
+  def test_random_contrast_int_dtype(self):
+    with CustomObjectScope(
+        {'RandomContrast': image_preprocessing.RandomContrast}):
+      input_images = np.random.randint(low=0, high=255, size=(2, 5, 8, 3))
+      with tf_test_util.use_gpu():
+        layer = image_preprocessing.RandomContrast((0.1, 0.2))
+        layer(input_images)
+
+  def test_random_contrast_invalid_bounds(self):
+    with self.assertRaises(ValueError):
+      image_preprocessing.RandomContrast((-0.1, .5))
+
+    with self.assertRaises(ValueError):
+      image_preprocessing.RandomContrast((1.1, .5))
+
+    with self.assertRaises(ValueError):
+      image_preprocessing.RandomContrast((0.1, -0.2))
+
+  @tf_test_util.run_v2_only
+  def test_config_with_custom_name(self):
+    layer = image_preprocessing.RandomContrast((.5, .6), name='image_preproc')
+    config = layer.get_config()
+    layer_1 = image_preprocessing.RandomContrast.from_config(config)
+    self.assertEqual(layer_1.name, layer.name)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py
index 9f8c9d0a903..150dd43d676 100644
--- a/tensorflow/python/keras/layers/preprocessing/normalization.py
+++ b/tensorflow/python/keras/layers/preprocessing/normalization.py
@@ -96,7 +96,7 @@ class Normalization(CombinerPreprocessingLayer):
         name=_VARIANCE_NAME,
         shape=mean_and_var_shape,
         dtype=K.floatx(),
-        initializer=init_ops.zeros_initializer)
+        initializer=init_ops.ones_initializer)
     self.count = self._add_state_variable(
         name=_COUNT_NAME,
         shape=(),
diff --git a/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py b/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py
index d93dcc21d5e..006cab1fb11 100644
--- a/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py
+++ b/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import numpy as np
 
 from tensorflow.python.platform import test
diff --git a/tensorflow/python/keras/layers/preprocessing/testdata/wire_vocabulary.txt b/tensorflow/python/keras/layers/preprocessing/testdata/wire_vocabulary.txt
new file mode 100644
index 00000000000..32c6b5692a0
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/testdata/wire_vocabulary.txt
@@ -0,0 +1,3 @@
+omar
+stringer
+marlo
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
index 6080d0b7ede..dd7212cf93f 100644
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
+++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
@@ -419,7 +419,7 @@ class TextVectorization(CombinerPreprocessingLayer):
     super(TextVectorization, self).adapt(preprocessed_inputs, reset_state)
 
   def get_vocabulary(self):
-    if self.vocab_size == 0:
+    if self._vocab_size == 0:
       return []
 
     keys, values = self._get_table_data()
diff --git a/tensorflow/python/keras/layers/recurrent_v2_test.py b/tensorflow/python/keras/layers/recurrent_v2_test.py
index 487ee81eeff..e4c1a092706 100644
--- a/tensorflow/python/keras/layers/recurrent_v2_test.py
+++ b/tensorflow/python/keras/layers/recurrent_v2_test.py
@@ -21,11 +21,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
+import os
 from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.eager import context
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
@@ -101,6 +102,18 @@ class RNNV2Test(keras_parameterized.TestCase):
               batch_input_shape=[32, None, 5], recurrent_dropout=0.2)
     ])
 
+  def test_recurrent_dropout_saved_model(self):
+    if not context.executing_eagerly():
+      self.skipTest('v2-only test')
+    inputs = keras.Input(shape=(784, 3), name='digits')
+    x = keras.layers.GRU(64, activation='relu', name='GRU', dropout=0.1)(inputs)
+    x = keras.layers.Dense(64, activation='relu', name='dense')(x)
+    outputs = keras.layers.Dense(
+        10, activation='softmax', name='predictions')(
+            x)
+    model = keras.Model(inputs=inputs, outputs=outputs, name='3_layer')
+    model.save(os.path.join(self.get_temp_dir(), 'model'), save_format='tf')
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py b/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py
index a01e56be097..02ad2587dea 100644
--- a/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py
+++ b/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py
@@ -175,7 +175,7 @@ class RNNCellWrapperTest(test.TestCase, parameterized.TestCase):
     _ = rnn_cell(inputs, [state, state])
     weights = base_cell._cells[0].weights
     self.assertLen(weights, expected_len=2)
-    self.assertTrue(all(["_wrapper" in v.name for v in weights]))
+    self.assertTrue(all("_wrapper" in v.name for v in weights))
 
   @parameterized.parameters(
       [rnn_cell_wrapper_v2.DropoutWrapper, rnn_cell_wrapper_v2.ResidualWrapper])
diff --git a/tensorflow/python/keras/layers/serialization.py b/tensorflow/python/keras/layers/serialization.py
index a7c43c60350..afefcc3f040 100644
--- a/tensorflow/python/keras/layers/serialization.py
+++ b/tensorflow/python/keras/layers/serialization.py
@@ -40,6 +40,7 @@ from tensorflow.python.keras.layers.noise import *
 from tensorflow.python.keras.layers.normalization import *
 from tensorflow.python.keras.layers.pooling import *
 from tensorflow.python.keras.layers.preprocessing.image_preprocessing import *
+from tensorflow.python.keras.layers.preprocessing.normalization_v1 import *
 from tensorflow.python.keras.layers.recurrent import *
 from tensorflow.python.keras.layers.rnn_cell_wrapper_v2 import *
 from tensorflow.python.keras.layers.wrappers import *
@@ -49,7 +50,8 @@ from tensorflow.python.util.tf_export import keras_export
 
 if tf2.enabled():
   from tensorflow.python.keras.layers.normalization_v2 import *  # pylint: disable=g-import-not-at-top
-  from tensorflow.python.keras.layers.recurrent_v2 import *     # pylint: disable=g-import-not-at-top
+  from tensorflow.python.keras.layers.recurrent_v2 import *  # pylint: disable=g-import-not-at-top
+  from tensorflow.python.keras.layers.preprocessing.normalization import *  # pylint: disable=g-import-not-at-top
 
 # This deserialization table is added for backward compatibility, as in TF 1.13,
 # BatchNormalizationV1 and BatchNormalizationV2 are used as class name for v1
diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py
index 5e89cf4448b..e7008b5c224 100644
--- a/tensorflow/python/keras/losses.py
+++ b/tensorflow/python/keras/losses.py
@@ -151,8 +151,11 @@ class Loss(object):
     """Invokes the `Loss` instance.
 
     Args:
-      y_true: Ground truth values, with the same shape as 'y_pred'.
-      y_pred: The predicted values.
+      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`
+      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
+
+    Returns:
+      Loss values with the shape `[batch_size, d0, .. dN-1]`.
     """
     NotImplementedError('Must be implemented in subclasses.')
 
diff --git a/tensorflow/python/keras/losses_test.py b/tensorflow/python/keras/losses_test.py
index 5776ebd0b4e..3a500bf22d9 100644
--- a/tensorflow/python/keras/losses_test.py
+++ b/tensorflow/python/keras/losses_test.py
@@ -18,9 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import shutil
-
 import numpy as np
 
 from tensorflow.python import keras
@@ -29,15 +26,9 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
-from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.platform import test
 
-try:
-  import h5py  # pylint:disable=g-import-not-at-top
-except ImportError:
-  h5py = None
-
 ALL_LOSSES = [keras.losses.mean_squared_error,
               keras.losses.mean_absolute_error,
               keras.losses.mean_absolute_percentage_error,
@@ -53,20 +44,6 @@ ALL_LOSSES = [keras.losses.mean_squared_error,
               keras.losses.categorical_hinge]
 
 
-class _MSEMAELoss(object):
-  """Loss function with internal state, for testing serialization code."""
-
-  def __init__(self, mse_fraction):
-    self.mse_fraction = mse_fraction
-
-  def __call__(self, y_true, y_pred, sample_weight=None):
-    return (self.mse_fraction * keras.losses.mse(y_true, y_pred) +
-            (1 - self.mse_fraction) * keras.losses.mae(y_true, y_pred))
-
-  def get_config(self):
-    return {'mse_fraction': self.mse_fraction}
-
-
 class KerasLossesTest(test.TestCase):
 
   def test_objective_shapes_3d(self):
@@ -200,39 +177,6 @@ class KerasLossesTest(test.TestCase):
     loss = keras.backend.eval(keras.losses.categorical_hinge(y_true, y_pred))
     self.assertAllClose(expected_loss, np.mean(loss))
 
-  def test_serializing_loss_class(self):
-    orig_loss_class = _MSEMAELoss(0.3)
-    with generic_utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}):
-      serialized = keras.losses.serialize(orig_loss_class)
-
-    with generic_utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}):
-      deserialized = keras.losses.deserialize(serialized)
-    assert isinstance(deserialized, _MSEMAELoss)
-    assert deserialized.mse_fraction == 0.3
-
-  def test_serializing_model_with_loss_class(self):
-    tmpdir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, tmpdir)
-    model_filename = os.path.join(tmpdir, 'custom_loss.h5')
-
-    with self.cached_session():
-      with generic_utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}):
-        loss = _MSEMAELoss(0.3)
-        inputs = keras.layers.Input((2,))
-        outputs = keras.layers.Dense(1, name='model_output')(inputs)
-        model = keras.models.Model(inputs, outputs)
-        model.compile(optimizer='sgd', loss={'model_output': loss})
-        model.fit(np.random.rand(256, 2), np.random.rand(256, 1))
-
-        if h5py is None:
-          return
-
-        model.save(model_filename)
-
-      with generic_utils.custom_object_scope({'_MSEMAELoss': _MSEMAELoss}):
-        loaded_model = keras.models.load_model(model_filename)
-        loaded_model.predict(np.random.rand(128, 2))
-
   def test_loss_wrapper(self):
     loss_fn = keras.losses.get('mse')
     mse_obj = keras.losses.LossFunctionWrapper(loss_fn, name=loss_fn.__name__)
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 2fd79fc0f2e..24eddcc2c72 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 import abc
 import types
+
 import numpy as np
 import six
 
diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 8e5ad564e5d..89ea53b886c 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import json
 import math
 import os
+
 import numpy as np
 
 from tensorflow.python.eager import context
diff --git a/tensorflow/python/keras/mixed_precision/experimental/BUILD b/tensorflow/python/keras/mixed_precision/experimental/BUILD
index ff595cd5089..73eded603d6 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/BUILD
+++ b/tensorflow/python/keras/mixed_precision/experimental/BUILD
@@ -31,6 +31,7 @@ py_library(
     srcs = ["__init__.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":get_layer_policy",
         ":loss_scale_optimizer",
         ":policy",
     ],
@@ -43,6 +44,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":device_compatibility_check",
         "//tensorflow/python:framework",
         "//tensorflow/python:mixed_precision_global_state",
     ],
@@ -66,6 +68,48 @@ py_test(
     ],
 )
 
+py_library(
+    name = "device_compatibility_check",
+    srcs = ["device_compatibility_check.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:device_lib",
+        "//tensorflow/python:gpu_util",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
+cuda_py_test(
+    name = "device_compatibility_check_test",
+    srcs = ["device_compatibility_check_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":device_compatibility_check",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_library(
+    name = "get_layer_policy",
+    srcs = ["get_layer_policy.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python/keras:base_layer",
+    ],
+)
+
+py_test(
+    name = "get_layer_policy_test",
+    srcs = ["get_layer_policy_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":get_layer_policy",
+        ":policy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/keras:layers",
+    ],
+)
+
 py_library(
     name = "autocast_variable",
     srcs = [
diff --git a/tensorflow/python/keras/mixed_precision/experimental/device_compatibility_check.py b/tensorflow/python/keras/mixed_precision/experimental/device_compatibility_check.py
new file mode 100644
index 00000000000..d92c16d632f
--- /dev/null
+++ b/tensorflow/python/keras/mixed_precision/experimental/device_compatibility_check.py
@@ -0,0 +1,153 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains function to log if devices are compatible with mixed precision."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+from tensorflow.python.client import device_lib
+from tensorflow.python.eager import context
+from tensorflow.python.framework import gpu_util
+from tensorflow.python.platform import tf_logging
+
+
+_COMPAT_CHECK_PREFIX = 'Mixed precision compatibility check (mixed_float16): '
+_COMPAT_CHECK_OK_PREFIX = _COMPAT_CHECK_PREFIX + 'OK'
+_COMPAT_CHECK_WARNING_PREFIX = _COMPAT_CHECK_PREFIX + 'WARNING'
+_COMPAT_CHECK_WARNING_SUFFIX = (
+    'If you will use compatible GPU(s) not attached to this host, e.g. by '
+    'running a multi-worker model, you can ignore this warning. This message '
+    'will only be logged once')
+
+
+def _dedup_strings(device_strs):
+  """Groups together consecutive identical strings.
+
+  For example, given:
+      ['GPU 1', 'GPU 2', 'GPU 2', 'GPU 3', 'GPU 3', 'GPU 3']
+  This function returns:
+      ['GPU 1', 'GPU 2 (x2)', 'GPU 3 (x3)']
+
+  Args:
+    device_strs: A list of strings, each representing a device.
+
+  Returns:
+    A copy of the input, but identical consecutive strings are merged into a
+    single string.
+  """
+  new_device_strs = []
+  for device_str, vals in itertools.groupby(device_strs):
+    num = len(list(vals))
+    if num == 1:
+      new_device_strs.append(device_str)
+    else:
+      new_device_strs.append('%s (x%d)' % (device_str, num))
+  return new_device_strs
+
+
+def _log_device_compatibility_check(policy_name, device_attr_list):
+  """Logs a compatibility check if the devices support the policy.
+
+  Currently only logs for the policy mixed_float16.
+
+  Args:
+    policy_name: The name of the dtype policy.
+    device_attr_list: A list of DeviceAttributes.
+  """
+  if policy_name != 'mixed_float16':
+    # TODO(b/145686977): Log if the policy is 'mixed_bfloat16'. This requires
+    # checking if a TPU is available.
+    return
+  supported_device_strs = []
+  unsupported_device_strs = []
+  for device in device_attr_list:
+    if device.device_type == 'GPU':
+      name, cc = gpu_util.compute_capability_from_device_desc(device)
+      name = name or 'Unknown GPU'
+      if cc:
+        device_str = '%s, compute capability %s.%s' % (name, cc[0], cc[1])
+        if cc >= (7, 0):
+          supported_device_strs.append(device_str)
+        else:
+          unsupported_device_strs.append(device_str)
+      else:
+        unsupported_device_strs.append(
+            name + ', no compute capability (probably not an Nvidia GPU)')
+
+  if unsupported_device_strs:
+    warning_str = _COMPAT_CHECK_WARNING_PREFIX + '\n'
+    if supported_device_strs:
+      warning_str += ('Some of your GPUs may run slowly with dtype policy '
+                      'mixed_float16 because they do not all have compute '
+                      'capability of at least 7.0. Your GPUs:\n')
+    elif len(unsupported_device_strs) == 1:
+      warning_str += ('Your GPU may run slowly with dtype policy mixed_float16 '
+                      'because it does not have compute capability of at least '
+                      '7.0. Your GPU:\n')
+    else:
+      warning_str += ('Your GPUs may run slowly with dtype policy '
+                      'mixed_float16 because they do not have compute '
+                      'capability of at least 7.0. Your GPUs:\n')
+    for device_str in _dedup_strings(supported_device_strs +
+                                     unsupported_device_strs):
+      warning_str += '  ' + device_str + '\n'
+    warning_str += ('See https://developer.nvidia.com/cuda-gpus for a list of '
+                    'GPUs and their compute capabilities.\n')
+    warning_str += _COMPAT_CHECK_WARNING_SUFFIX
+    tf_logging.warn(warning_str)
+  elif not supported_device_strs:
+    tf_logging.warn('%s\n'
+                    'The dtype policy mixed_float16 may run slowly because '
+                    'this machine does not have a GPU. Only Nvidia GPUs with '
+                    'compute capability of at least 7.0 run quickly with '
+                    'mixed_float16.\n%s' % (_COMPAT_CHECK_WARNING_PREFIX,
+                                            _COMPAT_CHECK_WARNING_SUFFIX))
+  elif len(supported_device_strs) == 1:
+    tf_logging.info('%s\n'
+                    'Your GPU will likely run quickly with dtype policy '
+                    'mixed_float16 as it has compute capability of at least '
+                    '7.0. Your GPU: %s' % (_COMPAT_CHECK_OK_PREFIX,
+                                           supported_device_strs[0]))
+  else:
+    tf_logging.info('%s\n'
+                    'Your GPUs will likely run quickly with dtype policy '
+                    'mixed_float16 as they all have compute capability of at '
+                    'least 7.0' % _COMPAT_CHECK_OK_PREFIX)
+
+
+_logged_compatibility_check = False
+
+
+def log_device_compatibility_check(policy_name):
+  """Logs a compatibility check if the devices support the policy.
+
+  Currently only logs for the policy mixed_float16. A log is shown only the
+  first time this function is called.
+
+  Args:
+    policy_name: The name of the dtype policy.
+  """
+  global _logged_compatibility_check
+  # In graph mode, calling list_local_devices may initialize some session state,
+  # so we only call it in eager mode.
+  if not context.executing_eagerly() or _logged_compatibility_check:
+    return
+  _logged_compatibility_check = True
+  device_attr_list = device_lib.list_local_devices()
+  _log_device_compatibility_check(policy_name, device_attr_list)
+
diff --git a/tensorflow/python/keras/mixed_precision/experimental/device_compatibility_check_test.py b/tensorflow/python/keras/mixed_precision/experimental/device_compatibility_check_test.py
new file mode 100644
index 00000000000..c3315caab88
--- /dev/null
+++ b/tensorflow/python/keras/mixed_precision/experimental/device_compatibility_check_test.py
@@ -0,0 +1,162 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests the device compatibility check."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+
+from tensorflow.core.framework import device_attributes_pb2
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+
+def _get_device_attrs(device_type, device_name=None, cc_major=None,
+                      cc_minor=None):
+  if device_type == 'CPU':
+    return device_attributes_pb2.DeviceAttributes(device_type='CPU')
+  assert device_type == 'GPU', 'Invalid device type: %s' % (device_type,)
+  if not device_name:
+    return device_attributes_pb2.DeviceAttributes(device_type='GPU')
+  physical_device_desc = (
+      'device: 0, name: %s, pci bus id: 0:0:0.0' % device_name)
+  if cc_major:
+    physical_device_desc += ', compute capability: %d.%d' % (cc_major, cc_minor)
+  return device_attributes_pb2.DeviceAttributes(
+      device_type='GPU', physical_device_desc=physical_device_desc)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class DeviceCompatibilityCheckTest(test.TestCase):
+
+  def _test_compat_check(self, device_attr_list, should_warn, expected_regex,
+                         policy_name='mixed_float16'):
+    with test.mock.patch.object(tf_logging, 'warn') as mock_warn, \
+         test.mock.patch.object(tf_logging, 'info') as mock_info:
+      device_compatibility_check._log_device_compatibility_check(
+          policy_name, device_attr_list)
+    if should_warn:
+      self.assertRegexpMatches(mock_warn.call_args[0][0], expected_regex)
+      mock_info.assert_not_called()
+    else:
+      self.assertRegexpMatches(mock_info.call_args[0][0], expected_regex)
+      mock_warn.assert_not_called()
+
+  def test_supported(self):
+    device_attrs_list = [_get_device_attrs('GPU', 'GPU 1', 7, 1)]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): OK\n'
+        r'Your GPU will likely run quickly with dtype policy mixed_float16 as '
+        r'it has compute capability of at least 7.0. Your GPU: GPU 1, compute '
+        r'capability 7.1', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, False, regex)
+    device_attrs_list.append(_get_device_attrs('CPU'))
+    self._test_compat_check(device_attrs_list, False, regex)
+
+    device_attrs_list = [
+        _get_device_attrs('CPU', 'CPU'),
+        _get_device_attrs('GPU', 'GPU 1', 7, 0),
+        _get_device_attrs('GPU', 'GPU 2', 7, 1),
+        _get_device_attrs('GPU', 'GPU 3', 8, 0),
+    ]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): OK\n'
+        r'Your GPUs will likely run quickly with dtype policy mixed_float16 as '
+        r'they all have compute capability of at least 7.0', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, False, regex)
+
+  def test_unsupported(self):
+    device_attrs_list = [
+        _get_device_attrs('GPU', 'GPU 1', 6, 0)
+    ]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): WARNING\n'
+        r'Your GPU may run slowly with dtype policy mixed_float16.*\n'
+        r'  GPU 1, compute capability 6.0\n'
+        r'See.*', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, True, regex)
+    device_attrs_list.append(_get_device_attrs('CPU'))
+    self._test_compat_check(device_attrs_list, True, regex)
+
+    device_attrs_list = [
+        _get_device_attrs('GPU')
+    ]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): WARNING\n'
+        r'Your GPU may run slowly with dtype policy mixed_float16.*\n'
+        r'  Unknown GPU, no compute capability \(probably not an Nvidia GPU\)\n'
+        r'See.*', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, True, regex)
+    device_attrs_list.append(_get_device_attrs('CPU'))
+    self._test_compat_check(device_attrs_list, True, regex)
+
+    device_attrs_list = [
+        _get_device_attrs('CPU', 'CPU'),
+        _get_device_attrs('GPU', 'GPU 1', 6, 0),
+        _get_device_attrs('GPU', 'GPU 2', 3, 10),
+    ]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): WARNING\n'
+        r'Your GPUs may run slowly with dtype policy mixed_float16.*\n'
+        r'  GPU 1, compute capability 6.0\n'
+        r'  GPU 2, compute capability 3.10\n'
+        r'See.*', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, True, regex)
+
+    device_attrs_list = [
+        _get_device_attrs('CPU', 'CPU'),
+        _get_device_attrs('GPU', 'GPU 1', 6, 0),
+        _get_device_attrs('GPU', 'GPU 1', 6, 0),
+        _get_device_attrs('GPU', 'GPU 1', 6, 0),
+        _get_device_attrs('GPU', 'GPU 2', 3, 10),
+    ]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): WARNING\n'
+        r'Your GPUs may run slowly with dtype policy mixed_float16.*\n'
+        r'  GPU 1, compute capability 6.0 \(x3\)\n'
+        r'  GPU 2, compute capability 3.10\n'
+        r'See.*', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, True, regex)
+
+    device_attrs_list = [_get_device_attrs('CPU')]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): WARNING\n'
+        r'The dtype policy mixed_float16 may run slowly because this machine '
+        r'does not have a GPU', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, True, regex)
+
+  def test_mix_of_supported_and_unsupported(self):
+    device_attrs_list = [
+        _get_device_attrs('GPU', 'GPU 1', 7, 0),
+        _get_device_attrs('GPU', 'GPU 1', 7, 0),
+        _get_device_attrs('GPU', 'GPU 2', 6, 0)
+    ]
+    regex = re.compile(
+        r'.*compatibility check \(mixed_float16\): WARNING\n'
+        r'Some of your GPUs may run slowly with dtype policy mixed_float16.*\n'
+        r'  GPU 1, compute capability 7.0 \(x2\)\n'
+        r'  GPU 2, compute capability 6.0\n'
+        r'See.*', flags=re.MULTILINE)
+    self._test_compat_check(device_attrs_list, True, regex)
+    device_attrs_list.append(_get_device_attrs('CPU'))
+    self._test_compat_check(device_attrs_list, True, regex)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/mixed_precision/experimental/get_layer_policy.py b/tensorflow/python/keras/mixed_precision/experimental/get_layer_policy.py
new file mode 100644
index 00000000000..8c6fc40705d
--- /dev/null
+++ b/tensorflow/python/keras/mixed_precision/experimental/get_layer_policy.py
@@ -0,0 +1,41 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains the get_layer_policy function.
+
+This is a separate file from policy.py to avoid a circular dependency.
+get_layer_policy() relies on base_layer.py, itself which relies on policy.py.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.engine import base_layer
+from tensorflow.python.util.tf_export import keras_export
+
+
+@keras_export('keras.mixed_precision.experimental.get_layer_policy')
+def get_layer_policy(layer):
+  """Returns the dtype policy of a layer.
+
+  Args:
+    layer: A `tf.keras.layers.Layer`.
+
+  Returns:
+    The `tf.keras.mixed_precision.experimental.Policy` of the layer.
+  """
+  if not isinstance(layer, base_layer.Layer):
+    raise ValueError('get_policy can only be called on a layer, but got: %s'
+                     % (layer,))
+  return layer._dtype_policy  # pylint: disable=protected-access
diff --git a/tensorflow/python/keras/mixed_precision/experimental/get_layer_policy_test.py b/tensorflow/python/keras/mixed_precision/experimental/get_layer_policy_test.py
new file mode 100644
index 00000000000..eeba73550c2
--- /dev/null
+++ b/tensorflow/python/keras/mixed_precision/experimental/get_layer_policy_test.py
@@ -0,0 +1,49 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests the get_layer_policy function."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.engine import base_layer_utils
+from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.mixed_precision.experimental import get_layer_policy
+from tensorflow.python.keras.mixed_precision.experimental import policy
+from tensorflow.python.platform import test
+
+
+class GetLayerPolicyTest(test.TestCase):
+
+  def test_get_layer_policy(self):
+    layer = core.Dense(4)
+    self.assertEqual(get_layer_policy.get_layer_policy(layer).name, 'float32')
+
+    p = policy.Policy('mixed_float16')
+    layer = core.Dense(4, dtype=p)
+    self.assertIs(get_layer_policy.get_layer_policy(layer), p)
+
+    layer = core.Dense(4, dtype='float64')
+    self.assertEqual(get_layer_policy.get_layer_policy(layer).name, 'float64')
+
+  def test_error(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'get_policy can only be called on a layer, but got: 1'):
+      get_layer_policy.get_layer_policy(1)
+
+
+if __name__ == '__main__':
+  base_layer_utils.enable_v2_dtype_behavior()
+  test.main()
diff --git a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
index 6bb73cdfdef..498077829da 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
@@ -39,7 +39,9 @@ from tensorflow.python.keras import optimizers
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.engine import base_layer_utils
+from tensorflow.python.keras.engine import input_spec
 from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.mixed_precision.experimental import get_layer_policy
 from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer
 from tensorflow.python.keras.mixed_precision.experimental import policy
 from tensorflow.python.keras.mixed_precision.experimental import test_util as mp_test_util
@@ -120,12 +122,14 @@ class KerasLayerTest(keras_parameterized.TestCase):
       with strategy_fn().scope(), policy.policy_scope(policy_name):
         layer = mp_test_util.AddLayer(assert_type=dtype)
         self.assertEqual(layer.dtype, dtypes.float32)
-        self.assertEqual(layer._dtype_policy._name, policy_name)
+        self.assertEqual(get_layer_policy.get_layer_policy(layer).name,
+                         policy_name)
         y = layer(x)
         self.assertEqual(layer.v.dtype, dtypes.float32)
         self.assertEqual(y.dtype, dtype)
         self.assertEqual(layer.dtype, dtypes.float32)
-        self.assertEqual(layer._dtype_policy._name, policy_name)
+        self.assertEqual(get_layer_policy.get_layer_policy(layer).name,
+                         policy_name)
         self.evaluate(variables.global_variables_initializer())
         self.assertEqual(self.evaluate(y), 2.)
 
@@ -455,6 +459,12 @@ class KerasModelTest(keras_parameterized.TestCase):
           'strategy_fn': default_strategy_fn,
           'save_format': 'tf',
           'use_regularizer': True,
+      }, {
+          'testcase_name': 'saved_model_input_spec',
+          'strategy_fn': default_strategy_fn,
+          'save_format': 'tf',
+          'use_regularizer': True,
+          'use_input_spec': True,
       }, {
           'testcase_name': 'h5',
           'strategy_fn': default_strategy_fn,
@@ -465,6 +475,12 @@ class KerasModelTest(keras_parameterized.TestCase):
           'strategy_fn': create_mirrored_strategy,
           'save_format': 'tf',
           'use_regularizer': True,
+      }, {
+          'testcase_name': 'saved_model_input_spec_distribute',
+          'strategy_fn': create_mirrored_strategy,
+          'save_format': 'tf',
+          'use_regularizer': True,
+          'use_input_spec': True,
       }, {
           'testcase_name': 'h5_distribute',
           'strategy_fn': create_mirrored_strategy,
@@ -482,6 +498,7 @@ class KerasModelTest(keras_parameterized.TestCase):
                  policy_name='mixed_float16',
                  get_config=False,
                  save_format=None,
+                 use_input_spec=False,
                  experimental_run_tf_function=True):
     self._skip_if_strategy_unsupported(strategy_fn, check_model_type=True)
     self._skip_if_save_format_unsupported(save_format)
@@ -496,6 +513,8 @@ class KerasModelTest(keras_parameterized.TestCase):
             use_operator=use_operator,
             regularizer=regularizer,
             input_shape=(1,))
+        if use_input_spec:
+          layer.input_spec = input_spec.InputSpec(shape=(2, 1))
         cast_f32_layer = layers.Lambda(lambda x: math_ops.cast(x, 'float32'))
         model = testing_utils.get_model_from_layers(
             [layer, cast_f32_layer], input_shape=(1,),
@@ -565,7 +584,8 @@ class KerasModelTest(keras_parameterized.TestCase):
 
     # Ensure various dtype-related aspects of the layer are correct
     self.assertEqual(layer.dtype, 'float32')
-    self.assertEqual(layer._dtype_policy.name, 'mixed_float16')
+    self.assertEqual(get_layer_policy.get_layer_policy(layer).name,
+                     'mixed_float16')
     self.assertEqual(layer.v.dtype, 'float32')
     self.assertEqual(layer(np.ones((2, 1))).dtype, 'float16')
 
diff --git a/tensorflow/python/keras/mixed_precision/experimental/policy.py b/tensorflow/python/keras/mixed_precision/experimental/policy.py
index 67dc947cc1f..6cdec238299 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/policy.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/policy.py
@@ -24,6 +24,7 @@ import six
 from tensorflow.python.framework import dtypes
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import base_layer_utils
+from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check
 from tensorflow.python.keras.mixed_precision.experimental import loss_scale as keras_loss_scale_module
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.platform import tf_logging
@@ -281,6 +282,9 @@ class Policy(object):
                       (loss_scale, name))
     self._loss_scale = keras_loss_scale_module.get(loss_scale)
 
+    if name in ('mixed_float16', 'mixed_bloat16'):
+      device_compatibility_check.log_device_compatibility_check(name)
+
   def _parse_name(self, name):
     """Parses a Policy name into a compute and variable dtype.
 
diff --git a/tensorflow/python/keras/mixed_precision/experimental/policy_test.py b/tensorflow/python/keras/mixed_precision/experimental/policy_test.py
index 78592514ca6..330ed9e4a0c 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/policy_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/policy_test.py
@@ -18,11 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import base_layer_utils
+from tensorflow.python.keras.mixed_precision.experimental import device_compatibility_check
 from tensorflow.python.keras.mixed_precision.experimental import policy as mp_policy
 from tensorflow.python.keras.optimizer_v2 import gradient_descent
 from tensorflow.python.platform import test
@@ -165,6 +167,30 @@ class PolicyTest(test.TestCase):
         mp_policy.Policy(policy_name, loss_scale=2.)
         mock_warn.assert_not_called()
 
+  @testing_utils.enable_v2_dtype_behavior
+  def test_device_compatibility_warning(self):
+    with context.eager_mode():
+      device_compatibility_check._logged_compatibility_check = False
+      with test.mock.patch.object(tf_logging, 'warn') as mock_warn, \
+           test.mock.patch.object(tf_logging, 'info') as mock_info:
+        mp_policy.Policy('mixed_float16')
+      if mock_warn.called:
+        self.assertRegexpMatches(
+            mock_warn.call_args[0][0],
+            r'Mixed precision compatibility check \(mixed_float16\): WARNING.*')
+        mock_info.assert_not_called()
+      else:
+        self.assertRegexpMatches(
+            mock_info.call_args[0][0],
+            r'Mixed precision compatibility check \(mixed_float16\): OK.*')
+
+      # Assert message is only logged once
+      with test.mock.patch.object(tf_logging, 'warn') as mock_warn, \
+           test.mock.patch.object(tf_logging, 'info') as mock_info:
+        mp_policy.Policy('mixed_float16')
+      mock_warn.assert_not_called()
+      mock_info.assert_not_called()
+
   @testing_utils.enable_v2_dtype_behavior
   def test_policy_scope(self):
     if base_layer_utils.v2_dtype_behavior_enabled():
diff --git a/tensorflow/python/keras/mixed_precision/experimental/test_util.py b/tensorflow/python/keras/mixed_precision/experimental/test_util.py
index aefe3ae4dac..fff2689fb72 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/test_util.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/test_util.py
@@ -124,7 +124,7 @@ class AssertTypeLayer(base_layer.Layer):
       for inp in inputs_flattened:
         assert inp.dtype.base_dtype == self._assert_type, (
             'Input tensor has type %s which does not match assert type %s' %
-            (inp.dtype.name, self._assert_type.name))
+            (inp.dtype.name, self._assert_type))
 
 
 class AddLayer(AssertTypeLayer):
diff --git a/tensorflow/python/keras/model_subclassing_compiled_test.py b/tensorflow/python/keras/model_subclassing_compiled_test.py
index 54a91bdcc57..18cb6e50980 100644
--- a/tensorflow/python/keras/model_subclassing_compiled_test.py
+++ b/tensorflow/python/keras/model_subclassing_compiled_test.py
@@ -27,7 +27,6 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import model_subclassing_test_util as model_util
 from tensorflow.python.keras import testing_utils
-from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 try:
@@ -455,29 +454,6 @@ class ModelSubclassCompiledTest(keras_parameterized.TestCase):
     loss = model.train_on_batch(x, y)
     self.assertGreater(loss, 0.1)
 
-  def test_no_loss_in_compile(self):
-
-    class InternalLossModel(keras.Model):
-
-      def __init__(self):
-        super(InternalLossModel, self).__init__()
-        self.dense = keras.layers.Dense(1)
-
-      def call(self, inputs):
-        out = self.dense(inputs)
-        self.add_loss(math_ops.reduce_sum(out))
-        return out
-
-    model = InternalLossModel()
-    x = np.ones((10, 10))
-    model.predict(x)
-    model.compile(
-        optimizer='rmsprop',
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
-    model.fit(x)
-    model.evaluate(x)
-
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index 81f419c02c7..8a101805f33 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -28,6 +28,7 @@ from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import metrics
@@ -312,10 +313,10 @@ def _has_placeholder(graph):
   return any('Placeholder' in s for s in ops_types)
 
 
-@keras_parameterized.run_with_all_model_types
-@keras_parameterized.run_all_keras_modes
 class CheckpointingTests(keras_parameterized.TestCase):
 
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
   def test_optimizer_dependency(self):
     model = _get_model()
     opt = adam.AdamOptimizer(.01)
@@ -337,6 +338,37 @@ class CheckpointingTests(keras_parameterized.TestCase):
     model.load_weights(save_prefix)
     self.assertEqual(12., self.evaluate(beta1_power))
 
+  @keras_parameterized.run_with_all_model_types(exclude_models=['subclass'])
+  def test_layer_tracking(self):
+    with self.cached_session():
+      model = _get_model(input_shape=(4,))
+
+      if testing_utils.get_model_type() == 'subclass':
+        # Subclassed model must be built separately.
+        model._set_inputs(tensor_spec.TensorSpec((None, 4)))
+
+      # Ensure that checkpoints are compatible with another model with the same
+      # layers, even if the model isn't built until after initialization.
+      layers = _get_layers(input_shape=None, add_input_layer=False)
+      model2 = models.Sequential(layers)
+      # Build model by calling it.
+      model2.predict_on_batch(np.random.random((10, 4)))
+
+      model_path = os.path.join(self.get_temp_dir(), 'model_ckpt')
+      model.save_weights(model_path)
+      model2_path = os.path.join(self.get_temp_dir(), 'model2_ckpt')
+      model2.save_weights(model2_path)
+
+      # Check that the checkpoints are compatible with both models.
+      model.load_weights(model2_path)
+      self.assertAllClose(self.evaluate(model.weights),
+                          self.evaluate(model2.weights))
+
+      model.load_weights(model_path)
+      model2.load_weights(model_path)
+      self.assertAllClose(self.evaluate(model.weights),
+                          self.evaluate(model2.weights))
+
 
 @keras_parameterized.run_all_keras_modes
 class TestModelBackend(keras_parameterized.TestCase):
diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD
index bb20d4bb788..6e0153ffae7 100644
--- a/tensorflow/python/keras/optimizer_v2/BUILD
+++ b/tensorflow/python/keras/optimizer_v2/BUILD
@@ -102,7 +102,6 @@ cuda_py_test(
     size = "medium",
     srcs = ["adamax_test.py"],
     shard_count = 4,
-    tags = ["no_rocm"],
     deps = [
         ":optimizer_v2",
         "//tensorflow/python:client_testlib",
@@ -201,7 +200,6 @@ cuda_py_test(
     tags = [
         "manual",
         "no_oss",
-        "no_rocm",
         "no_windows",
         "notap",
     ],
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta_test.py b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
index b3703fa07ea..76f9f1cfb90 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
@@ -121,14 +121,14 @@ class AdadeltaOptimizerTest(test.TestCase):
               # TODO(lxuechen): This is hard to test in eager mode
               for slot_idx in range(2):
                 self.assertAllCloseAccordingToType(
-                    np.array([accum, accum], dtype=dtype.as_numpy_dtype()),
+                    np.array([accum, accum], dtype=dtype.as_numpy_dtype(0)),
                     self.evaluate(slot[slot_idx]),
                     rtol=1e-5)
 
                 self.assertAllCloseAccordingToType(
                     np.array(
                         [accum_update, accum_update],
-                        dtype=dtype.as_numpy_dtype()),
+                        dtype=dtype.as_numpy_dtype(0)),
                     self.evaluate(slot_update[slot_idx]),
                     rtol=1e-5)
 
@@ -136,14 +136,14 @@ class AdadeltaOptimizerTest(test.TestCase):
               self.assertAllCloseAccordingToType(
                   np.array(
                       [var0_init[0] - tot_update, var0_init[1] - tot_update],
-                      dtype=dtype.as_numpy_dtype()),
+                      dtype=dtype.as_numpy_dtype(0)),
                   self.evaluate(var0),
                   rtol=1e-5)
 
               self.assertAllCloseAccordingToType(
                   np.array(
                       [var1_init[0] - tot_update, var1_init[1] - tot_update],
-                      dtype=dtype.as_numpy_dtype()),
+                      dtype=dtype.as_numpy_dtype(0)),
                   self.evaluate(var1),
                   rtol=1e-5)
 
diff --git a/tensorflow/python/keras/optimizer_v2/adam_test.py b/tensorflow/python/keras/optimizer_v2/adam_test.py
index 47a0cf67247..8259fc155a3 100644
--- a/tensorflow/python/keras/optimizer_v2/adam_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adam_test.py
@@ -539,8 +539,8 @@ class AdamOptimizerTest(test.TestCase):
       opt = adam.Adam(1.)
       opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
       # There should be iteration, and two unique slot variables for v1 and v2.
-      self.assertEqual(
-          5, len(set([v.experimental_ref() for v in opt.variables()])))
+      self.assertEqual(5,
+                       len(set(v.experimental_ref() for v in opt.variables())))
       self.assertEqual(
           self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations))
 
diff --git a/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py b/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py
index ce0e43d6a64..6a9fe40d95f 100644
--- a/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py
+++ b/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import math
+
 from absl.testing import parameterized
 
 from tensorflow.python.eager import backprop
diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index 44d266b9604..b847dbe3fdd 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -89,7 +89,7 @@ class Optimizer(object):
           function not implemented).
     """
     grads = K.gradients(loss, params)
-    if any([g is None for g in grads]):
+    if any(g is None for g in grads):
       raise ValueError('An operation has `None` for gradient. '
                        'Please make sure that all of your ops have a '
                        'gradient defined (i.e. are differentiable). '
diff --git a/tensorflow/python/keras/premade/linear.py b/tensorflow/python/keras/premade/linear.py
index 451ac3d5908..dd3e1fdfaeb 100644
--- a/tensorflow/python/keras/premade/linear.py
+++ b/tensorflow/python/keras/premade/linear.py
@@ -64,7 +64,7 @@ class LinearModel(training.Model):
                units=1,
                activation=None,
                use_bias=True,
-               kernel_initializer='glorot_uniform',
+               kernel_initializer='zeros',
                bias_initializer='zeros',
                kernel_regularizer=None,
                bias_regularizer=None,
diff --git a/tensorflow/python/keras/premade/wide_deep.py b/tensorflow/python/keras/premade/wide_deep.py
index c18c2e39fef..bf90314253c 100644
--- a/tensorflow/python/keras/premade/wide_deep.py
+++ b/tensorflow/python/keras/premade/wide_deep.py
@@ -22,14 +22,14 @@ from tensorflow.python.keras import activations
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import layers as layer_module
 from tensorflow.python.keras.engine import base_layer
-from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.engine import training as keras_training
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
 
 
 @keras_export('keras.experimental.WideDeepModel')
-class WideDeepModel(training.Model):
+class WideDeepModel(keras_training.Model):
   r"""Wide & Deep Model for regression and classification problems.
 
   This model jointly train a linear and a dnn model.
@@ -88,15 +88,20 @@ class WideDeepModel(training.Model):
     self.dnn_model = dnn_model
     self.activation = activations.get(activation)
 
-  def call(self, inputs):
+  def call(self, inputs, training=None):
     if not isinstance(inputs, (tuple, list)) or len(inputs) != 2:
       linear_inputs = dnn_inputs = inputs
     else:
       linear_inputs, dnn_inputs = inputs
     linear_output = self.linear_model(linear_inputs)
-    dnn_output = self.dnn_model(dnn_inputs)
-    output = nest.map_structure(lambda x, y: 0.5 * (x + y), linear_output,
-                                dnn_output)
+    # pylint: disable=protected-access
+    if self.dnn_model._expects_training_arg:
+      if training is None:
+        training = K.learning_phase()
+      dnn_output = self.dnn_model(dnn_inputs, training=training)
+    else:
+      dnn_output = self.dnn_model(dnn_inputs)
+    output = nest.map_structure(lambda x, y: (x + y), linear_output, dnn_output)
     if self.activation:
       return nest.map_structure(self.activation, output)
     return output
diff --git a/tensorflow/python/keras/premade/wide_deep_test.py b/tensorflow/python/keras/premade/wide_deep_test.py
index f7e10fc209c..e2f471e3575 100644
--- a/tensorflow/python/keras/premade/wide_deep_test.py
+++ b/tensorflow/python/keras/premade/wide_deep_test.py
@@ -78,9 +78,9 @@ class WideDeepModelTest(keras_parameterized.TestCase):
       self.evaluate(variables.global_variables_initializer())
       wide_deep_model.fit(inputs, output, epochs=1)
       self.assertAllClose(
-          [[0.3]],
+          [[0.6]],
           self.evaluate(wide_deep_model.linear_model.dense_layers[0].kernel))
-      self.assertAllClose([[0.9]],
+      self.assertAllClose([[1.8]],
                           self.evaluate(
                               wide_deep_model.dnn_model.layers[0].kernel))
 
@@ -112,15 +112,15 @@ class WideDeepModelTest(keras_parameterized.TestCase):
       wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
       inp_np = np.asarray([[1.]])
       out1, out2 = wide_deep_model(inp_np)
-      # output should be 0.5 * (0.5 + 0.1), and 0.5 * (0.3 - 0.5)
-      self.assertAllClose([[0.3]], out1)
-      self.assertAllClose([[-0.1]], out2)
+      # output should be (0.5 + 0.1), and (0.3 - 0.5)
+      self.assertAllClose([[0.6]], out1)
+      self.assertAllClose([[-0.2]], out2)
 
       wide_deep_model = wide_deep.WideDeepModel(
           linear_model, dnn_model, activation='relu')
       out1, out2 = wide_deep_model(inp_np)
-      # output should be relu(0.5 * (0.5 + 0.1)), and relu(0.5 * (0.3 - 0.5))
-      self.assertAllClose([[0.3]], out1)
+      # output should be relu((0.5 + 0.1)), and relu((0.3 - 0.5))
+      self.assertAllClose([[0.6]], out1)
       self.assertAllClose([[0.]], out2)
 
   def test_wide_deep_model_with_single_optimizer(self):
diff --git a/tensorflow/python/keras/regularizers_test.py b/tensorflow/python/keras/regularizers_test.py
index c8180ca462b..f700fb91eab 100644
--- a/tensorflow/python/keras/regularizers_test.py
+++ b/tensorflow/python/keras/regularizers_test.py
@@ -146,7 +146,7 @@ class KerasRegularizersTest(keras_parameterized.TestCase,
         optimizer='sgd',
         run_eagerly=testing_utils.should_run_eagerly(),
         experimental_run_tf_function=testing_utils.should_run_tf_function())
-    self.assertEqual(len(model.losses), 5)
+    self.assertLen(model.losses, 5)
 
   @keras_parameterized.run_all_keras_modes
   @parameterized.named_parameters([
@@ -169,7 +169,7 @@ class KerasRegularizersTest(keras_parameterized.TestCase,
         optimizer='sgd',
         run_eagerly=testing_utils.should_run_eagerly(),
         experimental_run_tf_function=testing_utils.should_run_tf_function())
-    self.assertEqual(len(model.losses), 6)
+    self.assertLen(model.losses, 6)
 
   @keras_parameterized.run_all_keras_modes
   @parameterized.named_parameters([
@@ -197,7 +197,13 @@ class KerasRegularizersTest(keras_parameterized.TestCase,
         optimizer='sgd',
         run_eagerly=testing_utils.should_run_eagerly(),
         experimental_run_tf_function=testing_utils.should_run_tf_function())
-    self.assertEqual(len(model.losses), 14)
+
+    # We expect to see 9 losses on the model:
+    # - 2 from the 2 add_loss calls on the outer model.
+    # - 3 from the weight regularizers on the shared_dense layer, unshared_dense
+    # in inner model 1, unshared_dense in inner model 2.
+    # - 4 from activity regularizers on the shared_dense layer.
+    self.assertLen(model.losses, 9)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py
index c896460bfee..e006080e83b 100644
--- a/tensorflow/python/keras/saving/hdf5_format.py
+++ b/tensorflow/python/keras/saving/hdf5_format.py
@@ -804,8 +804,7 @@ def save_attributes_to_hdf5_group(group, name, data):
   if bad_attributes:
     raise RuntimeError('The following attributes cannot be saved to HDF5 '
                        'file because they are larger than %d bytes: %s' %
-                       (HDF5_OBJECT_HEADER_LIMIT,
-                        ', '.join([x for x in bad_attributes])))
+                       (HDF5_OBJECT_HEADER_LIMIT, ', '.join(bad_attributes)))
 
   data_npy = np.asarray(data)
 
diff --git a/tensorflow/python/keras/saving/hdf5_format_test.py b/tensorflow/python/keras/saving/hdf5_format_test.py
index 28101cfb4e7..532379d0193 100644
--- a/tensorflow/python/keras/saving/hdf5_format_test.py
+++ b/tensorflow/python/keras/saving/hdf5_format_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import os
 import shutil
 import tempfile
+
 from absl.testing import parameterized
 import numpy as np
 
@@ -827,6 +828,20 @@ class TestWholeModelSaving(test.TestCase, parameterized.TestCase):
         evaluation_results['sparse_categorical_crossentropy'] +
         evaluation_results['custom_loss'], evaluation_results['loss'], 1e-6)
 
+  def test_save_uncompiled_model_with_optimizer(self):
+    saved_model_dir = self._save_model_dir()
+    save_format = testing_utils.get_save_format()
+    model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(3,))])
+    # Set the model's optimizer but don't compile. This can happen if the model
+    # is trained with a custom training loop.
+    model.optimizer = keras.optimizer_v2.rmsprop.RMSprop(lr=0.0001)
+    model.save(saved_model_dir, save_format=save_format)
+
+    if save_format in ['tf', 'tensorflow']:
+      loaded = keras.models.load_model(saved_model_dir)
+      self.assertIsInstance(loaded.optimizer,
+                            keras.optimizer_v2.optimizer_v2.OptimizerV2)
+
 
 # Factory functions to create models that will be serialized inside a Network.
 def _make_graph_network(input_size, output_size):
diff --git a/tensorflow/python/keras/saving/losses_serialization_test.py b/tensorflow/python/keras/saving/losses_serialization_test.py
new file mode 100644
index 00000000000..60252b1dbf4
--- /dev/null
+++ b/tensorflow/python/keras/saving/losses_serialization_test.py
@@ -0,0 +1,198 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras losses serialization."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import layers
+from tensorflow.python.keras import losses
+from tensorflow.python.keras import optimizer_v2
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.utils import generic_utils
+from tensorflow.python.keras.utils import losses_utils
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+try:
+  import h5py  # pylint:disable=g-import-not-at-top
+except ImportError:
+  h5py = None
+
+
+# Custom loss class
+class MyMeanAbsoluteError(losses.LossFunctionWrapper):
+
+  def __init__(self,
+               reduction=losses_utils.ReductionV2.AUTO,
+               name='mean_absolute_error'):
+    super(MyMeanAbsoluteError, self).__init__(
+        _my_mae, name=name, reduction=reduction)
+
+
+# Custom loss function
+def _my_mae(y_true, y_pred):
+  return keras.backend.mean(math_ops.abs(y_pred - y_true), axis=-1)
+
+
+def _get_multi_io_model():
+  inp_1 = layers.Input(shape=(1,), name='input_1')
+  inp_2 = layers.Input(shape=(1,), name='input_2')
+  d = testing_utils.Bias(name='output')
+  out_1 = d(inp_1)
+  out_2 = d(inp_2)
+  return keras.Model([inp_1, inp_2], [out_1, out_2])
+
+
+@keras_parameterized.run_all_keras_modes
+@parameterized.named_parameters([
+    dict(testcase_name='string', value='mae'),
+    dict(testcase_name='built_in_fn', value=losses.mae),
+    dict(testcase_name='built_in_class', value=losses.MeanAbsoluteError()),
+    dict(testcase_name='custom_fn', value=_my_mae),
+    dict(testcase_name='custom_class', value=MyMeanAbsoluteError()),
+    dict(testcase_name='list_of_strings', value=['mae', 'mae']),
+    dict(testcase_name='list_of_built_in_fns', value=[losses.mae, losses.mae]),
+    dict(
+        testcase_name='list_of_built_in_classes',
+        value=[losses.MeanAbsoluteError(),
+               losses.MeanAbsoluteError()]),
+    dict(testcase_name='list_of_custom_fns', value=[_my_mae, _my_mae]),
+    dict(
+        testcase_name='list_of_custom_classes',
+        value=[MyMeanAbsoluteError(),
+               MyMeanAbsoluteError()]),
+    dict(
+        testcase_name='dict_of_string',
+        value={
+            'output': 'mae',
+            'output_1': 'mae',
+        }),
+    dict(
+        testcase_name='dict_of_built_in_fn',
+        value={
+            'output': losses.mae,
+            'output_1': losses.mae,
+        }),
+    dict(
+        testcase_name='dict_of_built_in_class',
+        value={
+            'output': losses.MeanAbsoluteError(),
+            'output_1': losses.MeanAbsoluteError(),
+        }),
+    dict(
+        testcase_name='dict_of_custom_fn',
+        value={
+            'output': _my_mae,
+            'output_1': _my_mae
+        }),
+    dict(
+        testcase_name='dict_of_custom_class',
+        value={
+            'output': MyMeanAbsoluteError(),
+            'output_1': MyMeanAbsoluteError(),
+        }),
+])
+class LossesSerialization(keras_parameterized.TestCase):
+
+  def setUp(self):
+    super(LossesSerialization, self).setUp()
+    tmpdir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, tmpdir)
+    self.model_filename = os.path.join(tmpdir, 'tmp_model_loss.h5')
+    self.x = np.array([[0.], [1.], [2.]], dtype='float32')
+    self.y = np.array([[0.5], [2.], [3.5]], dtype='float32')
+    self.w = np.array([1.25, 0.5, 1.25], dtype='float32')
+
+  def test_serializing_model_with_loss_with_custom_object_scope(self, value):
+    with generic_utils.custom_object_scope({
+        'MyMeanAbsoluteError': MyMeanAbsoluteError,
+        '_my_mae': _my_mae,
+        'Bias': testing_utils.Bias,
+    }):
+      model = _get_multi_io_model()
+      model.compile(
+          optimizer_v2.gradient_descent.SGD(0.1),
+          loss=value,
+          run_eagerly=testing_utils.should_run_eagerly(),
+          experimental_run_tf_function=testing_utils.should_run_tf_function())
+      history = model.fit([self.x, self.x], [self.y, self.y],
+                          batch_size=3,
+                          epochs=3,
+                          sample_weight=[self.w, self.w])
+
+      # Assert training.
+      self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3)
+      eval_results = model.evaluate([self.x, self.x], [self.y, self.y],
+                                    sample_weight=[self.w, self.w])
+
+      if h5py is None:
+        return
+      model.save(self.model_filename)
+      loaded_model = keras.models.load_model(self.model_filename)
+      loaded_model.predict([self.x, self.x])
+      loaded_eval_results = loaded_model.evaluate(
+          [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w])
+
+      # Assert all evaluation results are the same.
+      self.assertAllClose(eval_results, loaded_eval_results, 1e-9)
+
+  def test_serializing_model_with_loss_with_custom_objects(self, value):
+    model = _get_multi_io_model()
+    model.compile(
+        optimizer_v2.gradient_descent.SGD(0.1),
+        loss=value,
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+    history = model.fit([self.x, self.x], [self.y, self.y],
+                        batch_size=3,
+                        epochs=3,
+                        sample_weight=[self.w, self.w])
+
+    # Assert training.
+    self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3)
+    eval_results = model.evaluate([self.x, self.x], [self.y, self.y],
+                                  sample_weight=[self.w, self.w])
+
+    if h5py is None:
+      return
+    model.save(self.model_filename)
+    loaded_model = keras.models.load_model(
+        self.model_filename,
+        custom_objects={
+            'MyMeanAbsoluteError': MyMeanAbsoluteError,
+            '_my_mae': _my_mae,
+            'Bias': testing_utils.Bias,
+        })
+    loaded_model.predict([self.x, self.x])
+    loaded_eval_results = loaded_model.evaluate([self.x, self.x],
+                                                [self.y, self.y],
+                                                sample_weight=[self.w, self.w])
+
+    # Assert all evaluation results are the same.
+    self.assertAllClose(eval_results, loaded_eval_results, 1e-9)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/saving/metrics_serialization_test.py b/tensorflow/python/keras/saving/metrics_serialization_test.py
new file mode 100644
index 00000000000..10eee4d4175
--- /dev/null
+++ b/tensorflow/python/keras/saving/metrics_serialization_test.py
@@ -0,0 +1,268 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras metrics serialization."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import layers
+from tensorflow.python.keras import metrics
+from tensorflow.python.keras import optimizer_v2
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.utils import generic_utils
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+from tensorflow.python.util import nest
+
+try:
+  import h5py  # pylint:disable=g-import-not-at-top
+except ImportError:
+  h5py = None
+
+
+# Custom metric
+class MyMeanAbsoluteError(metrics.MeanMetricWrapper):
+
+  def __init__(self, name='my_mae', dtype=None):
+    super(MyMeanAbsoluteError, self).__init__(_my_mae, name, dtype=dtype)
+
+
+# Custom metric function
+def _my_mae(y_true, y_pred):
+  return keras.backend.mean(math_ops.abs(y_pred - y_true), axis=-1)
+
+
+def _get_multi_io_model():
+  inp_1 = layers.Input(shape=(1,), name='input_1')
+  inp_2 = layers.Input(shape=(1,), name='input_2')
+  d = testing_utils.Bias(name='output')
+  out_1 = d(inp_1)
+  out_2 = d(inp_2)
+  return keras.Model([inp_1, inp_2], [out_1, out_2])
+
+
+@keras_parameterized.run_all_keras_modes
+@parameterized.named_parameters(
+    dict(testcase_name='string', value=['mae']),
+    dict(testcase_name='built_in_fn', value=[metrics.mae]),
+    dict(testcase_name='built_in_class', value=[metrics.MeanAbsoluteError]),
+    dict(testcase_name='custom_fn', value=[_my_mae]),
+    dict(testcase_name='custom_class', value=[MyMeanAbsoluteError]),
+    dict(testcase_name='list_of_strings', value=['mae', 'mae']),
+    dict(
+        testcase_name='list_of_built_in_fns', value=[metrics.mae, metrics.mae]),
+    dict(
+        testcase_name='list_of_built_in_classes',
+        value=[metrics.MeanAbsoluteError, metrics.MeanAbsoluteError]),
+    dict(testcase_name='list_of_custom_fns', value=[_my_mae, _my_mae]),
+    dict(
+        testcase_name='list_of_custom_classes',
+        value=[MyMeanAbsoluteError, MyMeanAbsoluteError]),
+    dict(testcase_name='list_of_string_and_list', value=['mae', ['mae']]),
+    dict(
+        testcase_name='list_of_built_in_fn_and_list',
+        value=[metrics.mae, [metrics.mae]]),
+    dict(
+        testcase_name='list_of_built_in_class_and_list',
+        value=[metrics.MeanAbsoluteError, [metrics.MeanAbsoluteError]]),
+    dict(
+        testcase_name='list_of_custom_fn_and_list', value=[_my_mae, [_my_mae]]),
+    dict(
+        testcase_name='list_of_custom_class_and_list',
+        value=[MyMeanAbsoluteError, [MyMeanAbsoluteError]]),
+    dict(
+        testcase_name='list_of_lists_of_custom_fns',
+        value=[[_my_mae], [_my_mae, 'mae']]),
+    dict(
+        testcase_name='list_of_lists_of_custom_classes',
+        value=[[MyMeanAbsoluteError], [MyMeanAbsoluteError, 'mae']]),
+    dict(
+        testcase_name='dict_of_list_of_string',
+        value={
+            'output': ['mae'],
+            'output_1': ['mae'],
+        }),
+    dict(
+        testcase_name='dict_of_list_of_built_in_fn',
+        value={
+            'output': [metrics.mae],
+            'output_1': [metrics.mae],
+        }),
+    dict(
+        testcase_name='dict_of_list_of_built_in_class',
+        value={
+            'output': [metrics.MeanAbsoluteError],
+            'output_1': [metrics.MeanAbsoluteError],
+        }),
+    dict(
+        testcase_name='dict_of_list_of_custom_fn',
+        value={
+            'output': [_my_mae],
+            'output_1': [_my_mae],
+        }),
+    dict(
+        testcase_name='dict_of_list_of_custom_class',
+        value={
+            'output': [MyMeanAbsoluteError],
+            'output_1': [MyMeanAbsoluteError],
+        }),
+    dict(
+        testcase_name='dict_of_string',
+        value={
+            'output': 'mae',
+            'output_1': 'mae',
+        }),
+    dict(
+        testcase_name='dict_of_built_in_fn',
+        value={
+            'output': metrics.mae,
+            'output_1': metrics.mae,
+        }),
+    dict(
+        testcase_name='dict_of_built_in_class',
+        value={
+            'output': metrics.MeanAbsoluteError,
+            'output_1': metrics.MeanAbsoluteError,
+        }),
+    dict(
+        testcase_name='dict_of_custom_fn',
+        value={
+            'output': _my_mae,
+            'output_1': _my_mae
+        }),
+    dict(
+        testcase_name='dict_of_custom_class',
+        value={
+            'output': MyMeanAbsoluteError,
+            'output_1': MyMeanAbsoluteError,
+        }),
+)
+class MetricsSerialization(keras_parameterized.TestCase):
+
+  def setUp(self):
+    super(MetricsSerialization, self).setUp()
+    tmpdir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, tmpdir)
+    self.model_filename = os.path.join(tmpdir, 'tmp_model_metric.h5')
+    self.x = np.array([[0.], [1.], [2.]], dtype='float32')
+    self.y = np.array([[0.5], [2.], [3.5]], dtype='float32')
+    self.w = np.array([1.25, 0.5, 1.25], dtype='float32')
+
+  def test_serializing_model_with_metric_with_custom_object_scope(self, value):
+
+    def get_instance(x):
+      if isinstance(x, str):
+        return x
+      if isinstance(x, type) and issubclass(x, metrics.Metric):
+        return x()
+      return x
+
+    metric_input = nest.map_structure(get_instance, value)
+    weighted_metric_input = nest.map_structure(get_instance, value)
+
+    with generic_utils.custom_object_scope({
+        'MyMeanAbsoluteError': MyMeanAbsoluteError,
+        '_my_mae': _my_mae,
+        'Bias': testing_utils.Bias,
+    }):
+      model = _get_multi_io_model()
+      model.compile(
+          optimizer_v2.gradient_descent.SGD(0.1),
+          'mae',
+          metrics=metric_input,
+          weighted_metrics=weighted_metric_input,
+          run_eagerly=testing_utils.should_run_eagerly(),
+          experimental_run_tf_function=testing_utils.should_run_tf_function())
+      history = model.fit([self.x, self.x], [self.y, self.y],
+                          batch_size=3,
+                          epochs=3,
+                          sample_weight=[self.w, self.w])
+
+      # Assert training.
+      self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3)
+      eval_results = model.evaluate([self.x, self.x], [self.y, self.y],
+                                    sample_weight=[self.w, self.w])
+
+      if h5py is None:
+        return
+      model.save(self.model_filename)
+      loaded_model = keras.models.load_model(self.model_filename)
+      loaded_model.predict([self.x, self.x])
+      loaded_eval_results = loaded_model.evaluate(
+          [self.x, self.x], [self.y, self.y], sample_weight=[self.w, self.w])
+
+      # Assert all evaluation results are the same.
+      self.assertAllClose(eval_results, loaded_eval_results, 1e-9)
+
+  def test_serializing_model_with_metric_with_custom_objects(self, value):
+
+    def get_instance(x):
+      if isinstance(x, str):
+        return x
+      if isinstance(x, type) and issubclass(x, metrics.Metric):
+        return x()
+      return x
+
+    metric_input = nest.map_structure(get_instance, value)
+    weighted_metric_input = nest.map_structure(get_instance, value)
+
+    model = _get_multi_io_model()
+    model.compile(
+        optimizer_v2.gradient_descent.SGD(0.1),
+        'mae',
+        metrics=metric_input,
+        weighted_metrics=weighted_metric_input,
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+    history = model.fit([self.x, self.x], [self.y, self.y],
+                        batch_size=3,
+                        epochs=3,
+                        sample_weight=[self.w, self.w])
+
+    # Assert training.
+    self.assertAllClose(history.history['loss'], [2., 1.6, 1.2], 1e-3)
+    eval_results = model.evaluate([self.x, self.x], [self.y, self.y],
+                                  sample_weight=[self.w, self.w])
+
+    if h5py is None:
+      return
+    model.save(self.model_filename)
+    loaded_model = keras.models.load_model(
+        self.model_filename,
+        custom_objects={
+            'MyMeanAbsoluteError': MyMeanAbsoluteError,
+            '_my_mae': _my_mae,
+            'Bias': testing_utils.Bias,
+        })
+    loaded_model.predict([self.x, self.x])
+    loaded_eval_results = loaded_model.evaluate([self.x, self.x],
+                                                [self.y, self.y],
+                                                sample_weight=[self.w, self.w])
+
+    # Assert all evaluation results are the same.
+    self.assertAllClose(eval_results, loaded_eval_results, 1e-9)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/saving/saved_model/save_impl.py b/tensorflow/python/keras/saving/saved_model/save_impl.py
index dca39ba29d0..580e4527d60 100644
--- a/tensorflow/python/keras/saving/saved_model/save_impl.py
+++ b/tensorflow/python/keras/saving/saved_model/save_impl.py
@@ -365,7 +365,7 @@ class LayerCallCollection(object):
       elif layer.input_spec is not None:
 
         def to_tensor_spec_or_none(x):
-          spec = input_spec.to_tensor_spec(x, layer.dtype)
+          spec = input_spec.to_tensor_spec(x, layer._compute_dtype)  # pylint: disable=protected-access
           # If the shape is too general (e.g. multiple dimensions are allowed),
           # return None so that separate functions can be generated for each
           # inferred input signature.
diff --git a/tensorflow/python/keras/saving/saved_model_experimental.py b/tensorflow/python/keras/saving/saved_model_experimental.py
index a4cce315fda..0c6714b8340 100644
--- a/tensorflow/python/keras/saving/saved_model_experimental.py
+++ b/tensorflow/python/keras/saving/saved_model_experimental.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import six
 
 from tensorflow.python.client import session
diff --git a/tensorflow/python/keras/saving/saving_utils.py b/tensorflow/python/keras/saving/saving_utils.py
index bed126c6f09..0949aa10a2b 100644
--- a/tensorflow/python/keras/saving/saving_utils.py
+++ b/tensorflow/python/keras/saving/saving_utils.py
@@ -188,26 +188,31 @@ def model_metadata(model, include_optimizer=True, require_config=True):
           'Prefer using a Keras optimizer instead '
           '(see keras.io/optimizers).')
     else:
-      metadata['training_config'] = {
-          'loss': model.loss,
-          # pylint: disable=protected-access
-          'metrics': model._compile_metrics,
-          'weighted_metrics': model._compile_weighted_metrics,
-          # pylint: enable=protected-access
-          'sample_weight_mode': model.sample_weight_mode,
-          'loss_weights': model.loss_weights,
-      }
-      if isinstance(model.optimizer, optimizer_v2.RestoredOptimizer):
-        raise NotImplementedError(
-            'As of now, Optimizers loaded from SavedModel cannot be saved. '
-            'If you\'re calling `model.save` or `tf.keras.models.save_model`, '
-            'please set the `include_optimizer` option to `False`. For '
-            '`tf.saved_model.save`, delete the optimizer from the model.')
-      else:
-        optimizer_config = {
-            'class_name': model.optimizer.__class__.__name__,
-            'config': model.optimizer.get_config()}
-      metadata['training_config']['optimizer_config'] = optimizer_config
+      try:
+        metadata['training_config'] = {
+            'loss': model.loss,
+            # pylint: disable=protected-access
+            'metrics': model._compile_metrics,
+            'weighted_metrics': model._compile_weighted_metrics,
+            # pylint: enable=protected-access
+            'sample_weight_mode': model.sample_weight_mode,
+            'loss_weights': model.loss_weights,
+        }
+        if isinstance(model.optimizer, optimizer_v2.RestoredOptimizer):
+          raise NotImplementedError(
+              'As of now, Optimizers loaded from SavedModel cannot be saved. '
+              'If you\'re calling `model.save` or `tf.keras.models.save_model`,'
+              ' please set the `include_optimizer` option to `False`. For '
+              '`tf.saved_model.save`, delete the optimizer from the model.')
+        else:
+          optimizer_config = {
+              'class_name': model.optimizer.__class__.__name__,
+              'config': model.optimizer.get_config()}
+        metadata['training_config']['optimizer_config'] = optimizer_config
+      except AttributeError:
+        pass  # If the model has an optimizer, but not all of the attributes
+              # loss, _compile_metrics, etc., then it was not compiled using
+              # model.compile. In this case, do not save the training config.
   return metadata
 
 
@@ -219,6 +224,18 @@ def should_overwrite(filepath, overwrite):
   return True
 
 
+def convert_output_metrics(metrics_config, custom_objects):
+  from tensorflow.python.keras import metrics as metrics_module  # pylint:disable=g-import-not-at-top
+  if isinstance(metrics_config, list):
+    return [convert_output_metrics(mc, custom_objects) for mc in metrics_config]
+  elif (isinstance(metrics_config, dict) or
+        (metrics_config not in ['accuracy', 'acc', 'crossentropy', 'ce'])):
+    # Do not deserialize accuracy and cross-entropy strings as we have special
+    # case handling for these in compile, based on model output shape.
+    return metrics_module.deserialize(metrics_config, custom_objects)
+  return metrics_config
+
+
 def compile_args_from_training_config(training_config, custom_objects=None):
   """Return model.compile arguments from training config."""
   if custom_objects is None:
@@ -228,17 +245,50 @@ def compile_args_from_training_config(training_config, custom_objects=None):
   optimizer = optimizers.deserialize(
       optimizer_config, custom_objects=custom_objects)
 
-  # Recover loss functions and metrics.
-  loss_config = training_config['loss']  # Deserialize loss class.
-  if isinstance(loss_config, dict) and 'class_name' in loss_config:
-    loss_config = losses.get(loss_config)
-  loss = nest.map_structure(
-      lambda obj: custom_objects.get(obj, obj), loss_config)
-  metrics = nest.map_structure(
-      lambda obj: custom_objects.get(obj, obj), training_config['metrics'])
-  weighted_metrics = nest.map_structure(
-      lambda obj: custom_objects.get(obj, obj),
-      training_config.get('weighted_metrics', None))
+  # Recover losses.
+  loss_config = training_config['loss']
+  if isinstance(loss_config, list):  # Loss fed to compile as a list.
+    loss = [losses.deserialize(lc, custom_objects) for lc in loss_config]
+  elif isinstance(loss_config, dict) and 'class_name' not in loss_config:
+    # Loss fed to compile as a dict.
+    loss = {
+        k: losses.deserialize(v, custom_objects)
+        for (k, v) in loss_config.items()
+    }
+  else:  # Loss fed to compile as a str/ function/ class instance.
+    loss = losses.deserialize(loss_config, custom_objects)
+
+  # Recover metrics.
+  metrics_config = training_config.get('metrics', None)
+  if isinstance(metrics_config, dict):  # Metrics fed to compile as a dict.
+    metrics = {
+        k: convert_output_metrics(v, custom_objects)
+        for (k, v) in metrics_config.items()
+    }
+  elif isinstance(metrics_config, list):  # Metrics fed to compile as a list.
+    metrics = [
+        convert_output_metrics(m, custom_objects) for m in metrics_config
+    ]
+  else:  # No metrics.
+    metrics = None
+
+  # Recover weighted metrics.
+  weighted_metrics_config = training_config.get('weighted_metrics', None)
+  if isinstance(weighted_metrics_config, dict):
+    # Metrics fed to compile as a dict.
+    weighted_metrics = {
+        k: convert_output_metrics(v, custom_objects)
+        for (k, v) in weighted_metrics_config.items()
+    }
+  elif isinstance(weighted_metrics_config, list):
+    # Metrics fed to compile as a list.
+    weighted_metrics = [
+        convert_output_metrics(m, custom_objects)
+        for m in weighted_metrics_config
+    ]
+  else:  # No metrics.
+    weighted_metrics = None
+
   sample_weight_mode = training_config['sample_weight_mode']
   loss_weights = training_config['loss_weights']
 
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 2c48434cb68..4ee32ee29f3 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -605,6 +605,15 @@ def get_model_from_layers(layers,
   raise ValueError('Unknown model type {}'.format(model_type))
 
 
+class Bias(keras.layers.Layer):
+
+  def build(self, input_shape):
+    self.bias = self.add_variable('bias', (1,), initializer='zeros')
+
+  def call(self, inputs):
+    return inputs + self.bias
+
+
 class _MultiIOSubclassModel(keras.Model):
   """Multi IO Keras subclass model."""
 
diff --git a/tensorflow/python/keras/utils/conv_utils.py b/tensorflow/python/keras/utils/conv_utils.py
index 5d031428bce..65338415f27 100644
--- a/tensorflow/python/keras/utils/conv_utils.py
+++ b/tensorflow/python/keras/utils/conv_utils.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+
 import numpy as np
 from six.moves import range  # pylint: disable=redefined-builtin
 
diff --git a/tensorflow/python/keras/utils/generic_utils.py b/tensorflow/python/keras/utils/generic_utils.py
index 12dff66dffa..ebab3d79424 100644
--- a/tensorflow/python/keras/utils/generic_utils.py
+++ b/tensorflow/python/keras/utils/generic_utils.py
@@ -96,8 +96,8 @@ def custom_object_scope(*args):
   ```
 
   Arguments:
-      *args: Variable length list of dictionaries of name,
-          class pairs to add to custom objects.
+      *args: Variable length list of dictionaries of name, class pairs to add to
+        custom objects.
 
   Returns:
       Object of type `CustomObjectScope`.
@@ -180,13 +180,63 @@ def register_keras_serializable(package='Custom', name=None):
   return decorator
 
 
-def _get_name_or_custom_name(obj):
+@keras_export('keras.utils.get_registered_name')
+def get_registered_name(obj):
+  """Returns the name registered to an object within the Keras framework.
+
+  This function is part of the Keras serialization and deserialization
+  framework. It maps objects to the string names associated with those objects
+  for serialization/deserialization.
+
+  Args:
+    obj: The object to look up.
+
+  Returns:
+    The name associated with the object, or the default Python name if the
+      object is not registered.
+  """
   if obj in _GLOBAL_CUSTOM_NAMES:
     return _GLOBAL_CUSTOM_NAMES[obj]
   else:
     return obj.__name__
 
 
+@keras_export('keras.utils.get_registered_object')
+def get_registered_object(name, custom_objects=None, module_objects=None):
+  """Returns the class associated with `name` if it is registered with Keras.
+
+  This function is part of the Keras serialization and deserialization
+  framework. It maps strings to the objects associated with them for
+  serialization/deserialization.
+
+  Example:
+  ```
+  def from_config(cls, config, custom_objects=None):
+    if 'my_custom_object_name' in config:
+      config['hidden_cls'] = tf.keras.utils.get_registered_object(
+          config['my_custom_object_name'], custom_objects=custom_objects)
+  ```
+
+  Args:
+    name: The name to look up.
+    custom_objects: A dictionary of custom objects to look the name up in.
+      Generally, custom_objects is provided by the user.
+    module_objects: A dictionary of custom objects to look the name up in.
+      Generally, module_objects is provided by midlevel library implementers.
+
+  Returns:
+    An instantiable class associated with 'name', or None if no such class
+      exists.
+  """
+  if name in _GLOBAL_CUSTOM_OBJECTS:
+    return _GLOBAL_CUSTOM_OBJECTS[name]
+  elif custom_objects and name in custom_objects:
+    return custom_objects[name]
+  elif module_objects and name in module_objects:
+    return module_objects[name]
+  return None
+
+
 @keras_export('keras.utils.serialize_keras_object')
 def serialize_keras_object(instance):
   """Serialize Keras object into JSON."""
@@ -212,22 +262,13 @@ def serialize_keras_object(instance):
       except ValueError:
         serialization_config[key] = item
 
-    name = _get_name_or_custom_name(instance.__class__)
+    name = get_registered_name(instance.__class__)
     return serialize_keras_class_and_config(name, serialization_config)
   if hasattr(instance, '__name__'):
-    return _get_name_or_custom_name(instance)
+    return get_registered_name(instance)
   raise ValueError('Cannot serialize', instance)
 
 
-def _get_custom_objects_by_name(item, custom_objects=None):
-  """Returns the item if it is in either local or global custom objects."""
-  if item in _GLOBAL_CUSTOM_OBJECTS:
-    return _GLOBAL_CUSTOM_OBJECTS[item]
-  elif custom_objects and item in custom_objects:
-    return custom_objects[item]
-  return None
-
-
 def class_and_config_for_serialized_keras_object(
     config,
     module_objects=None,
@@ -239,15 +280,9 @@ def class_and_config_for_serialized_keras_object(
     raise ValueError('Improper config format: ' + str(config))
 
   class_name = config['class_name']
-  if custom_objects and class_name in custom_objects:
-    cls = custom_objects[class_name]
-  elif class_name in _GLOBAL_CUSTOM_OBJECTS:
-    cls = _GLOBAL_CUSTOM_OBJECTS[class_name]
-  else:
-    module_objects = module_objects or {}
-    cls = module_objects.get(class_name)
-    if cls is None:
-      raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
+  cls = get_registered_object(class_name, custom_objects, module_objects)
+  if cls is None:
+    raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
 
   cls_config = config['config']
   deserialized_objects = {}
@@ -258,9 +293,9 @@ def class_and_config_for_serialized_keras_object(
           module_objects=module_objects,
           custom_objects=custom_objects,
           printable_module_name='config_item')
+    # TODO(momernick): Should this also have 'module_objects'?
     elif (isinstance(item, six.string_types) and
-          tf_inspect.isfunction(
-              _get_custom_objects_by_name(item, custom_objects))):
+          tf_inspect.isfunction(get_registered_object(item, custom_objects))):
       # Handle custom functions here. When saving functions, we only save the
       # function's name as a string. If we find a matching string in the custom
       # objects during deserialization, we convert the string back to the
@@ -269,8 +304,7 @@ def class_and_config_for_serialized_keras_object(
       # conflict with a custom function name, but this should be a rare case.
       # This issue does not occur if a string field has a naming conflict with
       # a custom object, since the config of an object will always be a dict.
-      deserialized_objects[key] = _get_custom_objects_by_name(
-          item, custom_objects)
+      deserialized_objects[key] = get_registered_object(item, custom_objects)
   for key, item in deserialized_objects.items():
     cls_config[key] = deserialized_objects[key]
 
@@ -382,6 +416,7 @@ def func_load(code, defaults=None, closure=None, globs=None):
     Returns:
         A value wrapped as a cell object (see function "func_load")
     """
+
     def dummy_fn():
       # pylint: disable=pointless-statement
       value  # just access it so it gets captured in .__closure__
@@ -410,8 +445,8 @@ def has_arg(fn, name, accept_all=False):
   Arguments:
       fn: Callable to inspect.
       name: Check if `fn` can be called with `name` as a keyword argument.
-      accept_all: What to return if there is no parameter called `name`
-                  but the function accepts a `**kwargs` argument.
+      accept_all: What to return if there is no parameter called `name` but the
+        function accepts a `**kwargs` argument.
 
   Returns:
       bool, whether `fn` accepts a `name` keyword argument.
@@ -430,16 +465,20 @@ class Progbar(object):
       target: Total number of steps expected, None if unknown.
       width: Progress bar width on screen.
       verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose)
-      stateful_metrics: Iterable of string names of metrics that
-          should *not* be averaged over time. Metrics in this list
-          will be displayed as-is. All others will be averaged
-          by the progbar before display.
+      stateful_metrics: Iterable of string names of metrics that should *not* be
+        averaged over time. Metrics in this list will be displayed as-is. All
+        others will be averaged by the progbar before display.
       interval: Minimum visual progress update interval (in seconds).
       unit_name: Display name for step counts (usually "step" or "sample").
   """
 
-  def __init__(self, target, width=30, verbose=1, interval=0.05,
-               stateful_metrics=None, unit_name='step'):
+  def __init__(self,
+               target,
+               width=30,
+               verbose=1,
+               interval=0.05,
+               stateful_metrics=None,
+               unit_name='step'):
     self.target = target
     self.width = width
     self.verbose = verbose
@@ -469,11 +508,9 @@ class Progbar(object):
 
     Arguments:
         current: Index of current step.
-        values: List of tuples:
-            `(name, value_for_last_step)`.
-            If `name` is in `stateful_metrics`,
-            `value_for_last_step` will be displayed as-is.
-            Else, an average of the metric over time will be displayed.
+        values: List of tuples: `(name, value_for_last_step)`. If `name` is in
+          `stateful_metrics`, `value_for_last_step` will be displayed as-is.
+          Else, an average of the metric over time will be displayed.
     """
     values = values or []
     for k, v in values:
@@ -538,8 +575,7 @@ class Progbar(object):
         eta = time_per_unit * (self.target - current)
         if eta > 3600:
           eta_format = '%d:%02d:%02d' % (eta // 3600,
-                                         (eta % 3600) // 60,
-                                         eta % 60)
+                                         (eta % 3600) // 60, eta % 60)
         elif eta > 60:
           eta_format = '%d:%02d' % (eta // 60, eta % 60)
         else:
@@ -625,10 +661,8 @@ def slice_arrays(arrays, start=None, stop=None):
 
   Arguments:
       arrays: Single array or list of arrays.
-      start: can be an integer index (start index)
-          or a list/array of indices
-      stop: integer (stop index); should be None if
-          `start` was a list.
+      start: can be an integer index (start index) or a list/array of indices
+      stop: integer (stop index); should be None if `start` was a list.
 
   Returns:
       A slice of the array(s).
@@ -681,7 +715,7 @@ def to_list(x):
 def object_list_uid(object_list):
   """Creates a single string from object ids."""
   object_list = nest.flatten(object_list)
-  return ', '.join([str(abs(id(x))) for x in object_list])
+  return ', '.join(str(abs(id(x))) for x in object_list)
 
 
 def to_snake_case(name):
@@ -711,7 +745,8 @@ def check_for_unexpected_keys(name, input_dict, expected_values):
                                                  expected_values))
 
 
-def validate_kwargs(kwargs, allowed_kwargs,
+def validate_kwargs(kwargs,
+                    allowed_kwargs,
                     error_message='Keyword argument not understood:'):
   """Checks that all keyword arguments are in the set of allowed keys."""
   for kwarg in kwargs:
diff --git a/tensorflow/python/keras/utils/generic_utils_test.py b/tensorflow/python/keras/utils/generic_utils_test.py
index 619d31e8f8c..334758871fa 100644
--- a/tensorflow/python/keras/utils/generic_utils_test.py
+++ b/tensorflow/python/keras/utils/generic_utils_test.py
@@ -129,6 +129,13 @@ class SerializeKerasObjectTest(test.TestCase):
     inst = OtherTestClass(val=5)
     class_name = keras.utils.generic_utils._GLOBAL_CUSTOM_NAMES[OtherTestClass]
     self.assertEqual(serialized_name, class_name)
+    fn_class_name = keras.utils.generic_utils.get_registered_name(
+        OtherTestClass)
+    self.assertEqual(fn_class_name, class_name)
+
+    cls = keras.utils.generic_utils.get_registered_object(fn_class_name)
+    self.assertEqual(OtherTestClass, cls)
+
     config = keras.utils.generic_utils.serialize_keras_object(inst)
     self.assertEqual(class_name, config['class_name'])
     new_inst = keras.utils.generic_utils.deserialize_keras_object(config)
@@ -145,11 +152,17 @@ class SerializeKerasObjectTest(test.TestCase):
     serialized_name = 'Custom>my_fn'
     class_name = keras.utils.generic_utils._GLOBAL_CUSTOM_NAMES[my_fn]
     self.assertEqual(serialized_name, class_name)
+    fn_class_name = keras.utils.generic_utils.get_registered_name(my_fn)
+    self.assertEqual(fn_class_name, class_name)
+
     config = keras.utils.generic_utils.serialize_keras_object(my_fn)
     self.assertEqual(class_name, config)
     fn = keras.utils.generic_utils.deserialize_keras_object(config)
     self.assertEqual(42, fn())
 
+    fn_2 = keras.utils.generic_utils.get_registered_object(fn_class_name)
+    self.assertEqual(42, fn_2())
+
   def test_serialize_custom_class_without_get_config_fails(self):
 
     with self.assertRaisesRegex(
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
index 7e9ec9358b3..465ace7f264 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
@@ -44,10 +44,7 @@ class TestMultiGPUModel(test.TestCase):
   def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
     super(TestMultiGPUModel, self).__init__(methodName)
     gpu_devices = config.list_physical_devices('GPU')
-    xla_gpu_devices = config.list_physical_devices('XLA_GPU')
-    # NOTE: XLA devices don't support the set_logical_device_configuration
-    # codepaths.
-    if len(gpu_devices) == 1 and not xla_gpu_devices:
+    if len(gpu_devices) == 1:
       # A GPU is available, simulate 2 instead.
       config.set_logical_device_configuration(gpu_devices[0], [
           context.LogicalDeviceConfiguration(500),
diff --git a/tensorflow/python/keras/utils/tf_utils.py b/tensorflow/python/keras/utils/tf_utils.py
index cec7497851f..889da83f655 100644
--- a/tensorflow/python/keras/utils/tf_utils.py
+++ b/tensorflow/python/keras/utils/tf_utils.py
@@ -402,7 +402,7 @@ def assert_no_legacy_layers(layers):
   # isinstance check for tf.layers.Layer introduces a circular dependency.
   legacy_layers = [l for l in layers if getattr(l, '_is_legacy_layer', None)]
   if legacy_layers:
-    layer_str = '\n'.join(['  ' + str(l) for l in legacy_layers])
+    layer_str = '\n'.join('  ' + str(l) for l in legacy_layers)
     raise TypeError(
         'The following are legacy tf.layers.Layers:\n{}\nTo use keras as a '
         'framework (for instance using the Network, Model, or Sequential '
diff --git a/tensorflow/python/keras/utils/version_utils_test.py b/tensorflow/python/keras/utils/version_utils_test.py
index 65eee62f9e0..fa4b4a99221 100644
--- a/tensorflow/python/keras/utils/version_utils_test.py
+++ b/tensorflow/python/keras/utils/version_utils_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+
 import numpy as np
 import six
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index fb25e50b322..6ea17b4fa5a 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -120,7 +120,6 @@ cuda_py_test(
     size = "small",
     srcs = ["list_ops_test.py"],
     grpc_enabled = True,
-    tags = ["no_rocm"],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -1287,7 +1286,6 @@ cuda_py_test(
     shard_count = 10,
     tags = [
         "no_oss",  # TODO(b/142818120): Re-enable.
-        "no_rocm",
     ],
     deps = [
         "//tensorflow/python:client_testlib",
@@ -2717,7 +2715,6 @@ cuda_py_test(
     srcs = ["tensor_array_ops_test.py"],
     flaky = 1,  # create_local_cluster sometimes times out.
     shard_count = 10,
-    tags = ["no_rocm"],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -3448,9 +3445,6 @@ cuda_py_test(
     size = "medium",
     srcs = ["qr_op_test.py"],
     shard_count = 20,
-    tags = [
-        "no_rocm",  # TODO(rocm): feature not supported on ROCm platform
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/kernel_tests/argmax_op_test.py b/tensorflow/python/kernel_tests/argmax_op_test.py
index 38709721888..86d2941b8d3 100644
--- a/tensorflow/python/kernel_tests/argmax_op_test.py
+++ b/tensorflow/python/kernel_tests/argmax_op_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import functools
+
 import numpy as np
 
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py
index 789f6e90c9f..df27e8afbba 100644
--- a/tensorflow/python/kernel_tests/basic_gpu_test.py
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@@ -273,7 +273,7 @@ class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase):
     for thread in threads:
       thread.join()
 
-    flat_results = set([x for x in itertools.chain(*results)])
+    flat_results = set(itertools.chain(*results))
     self.assertEqual(1,
                      len(flat_results),
                      'Expected single value, got %r' % flat_results)
diff --git a/tensorflow/python/kernel_tests/batch_matmul_op_test.py b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
index f7855ab85ed..55eca193d64 100644
--- a/tensorflow/python/kernel_tests/batch_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
@@ -22,7 +22,6 @@ import numpy as np
 
 from tensorflow.python import tf2
 from tensorflow.python.client import session
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
@@ -143,9 +142,8 @@ def _GetBatchMatmulOpBroadcastingTest(dtype, adjoint_a, adjoint_b,
                                       use_static_shape):
 
   def Test(self):
-    with compat.forward_compatibility_horizon(2019, 4, 26):
-      np.random.seed(42)
-      self._testBroadcasting(dtype, adjoint_a, adjoint_b, use_static_shape)
+    np.random.seed(42)
+    self._testBroadcasting(dtype, adjoint_a, adjoint_b, use_static_shape)
 
   return Test
 
@@ -200,14 +198,13 @@ def _GetBatchMatmulGradientWithBroadcastingTest(dtype, adjoint_a, adjoint_b):
     def CheckGradients(self, a_shape, b_shape):
       self._compare(a_shape, b_shape, dtype, adjoint_a, adjoint_b)
 
-    with compat.forward_compatibility_horizon(2019, 4, 26):
-      CheckGradients(self, [1, 5, 2, 3], [7, 1, 3, 2])
-      CheckGradients(self, [2, 3], [1, 3, 5])
-      CheckGradients(self, [2, 3], [5, 3, 5])
-      CheckGradients(self, [5, 2, 5], [5, 3])
-      CheckGradients(self, [5, 2, 2, 3], [3, 5])
-      CheckGradients(self, [4, 5, 1, 2, 3], [1, 1, 3, 5])
-      CheckGradients(self, [1, 2, 1, 4, 2, 1, 3, 4], [3, 2, 1, 1, 1, 2, 4, 2])
+    CheckGradients(self, [1, 5, 2, 3], [7, 1, 3, 2])
+    CheckGradients(self, [2, 3], [1, 3, 5])
+    CheckGradients(self, [2, 3], [5, 3, 5])
+    CheckGradients(self, [5, 2, 5], [5, 3])
+    CheckGradients(self, [5, 2, 2, 3], [3, 5])
+    CheckGradients(self, [4, 5, 1, 2, 3], [1, 1, 3, 5])
+    CheckGradients(self, [1, 2, 1, 4, 2, 1, 3, 4], [3, 2, 1, 1, 1, 2, 4, 2])
 
   return Test
 
@@ -231,38 +228,37 @@ class BatchMatMulBenchmark(test.Benchmark):
 
   def benchmarkBatchMatMulBroadcast(self):
     for (a_shape, b_shape) in self.shape_pairs:
-      with compat.forward_compatibility_horizon(2019, 4, 26):
-        with ops.Graph().as_default(), \
-            session.Session(config=benchmark.benchmark_config()) as sess, \
-            ops.device("/cpu:0"):
-          matrix_a = variables.Variable(
-              GetRandomNormalInput(a_shape, np.float32))
-          matrix_b = variables.Variable(
-              GetRandomNormalInput(b_shape, np.float32))
-          variables.global_variables_initializer().run()
+      with ops.Graph().as_default(), \
+          session.Session(config=benchmark.benchmark_config()) as sess, \
+          ops.device("/cpu:0"):
+        matrix_a = variables.Variable(
+            GetRandomNormalInput(a_shape, np.float32))
+        matrix_b = variables.Variable(
+            GetRandomNormalInput(b_shape, np.float32))
+        variables.global_variables_initializer().run()
 
-          # Use batch matmul op's internal broadcasting.
-          self.run_op_benchmark(
-              sess,
-              math_ops.matmul(matrix_a, matrix_b),
-              min_iters=50,
-              name="batch_matmul_cpu_{}_{}".format(a_shape, b_shape))
+        # Use batch matmul op's internal broadcasting.
+        self.run_op_benchmark(
+            sess,
+            math_ops.matmul(matrix_a, matrix_b),
+            min_iters=50,
+            name="batch_matmul_cpu_{}_{}".format(a_shape, b_shape))
 
-          # Manually broadcast the input matrices using the broadcast_to op.
-          broadcasted_batch_shape = array_ops.broadcast_static_shape(
-              matrix_a.shape[:-2], matrix_b.shape[:-2])
-          broadcasted_a_shape = broadcasted_batch_shape.concatenate(
-              matrix_a.shape[-2:])
-          broadcasted_b_shape = broadcasted_batch_shape.concatenate(
-              matrix_b.shape[-2:])
-          self.run_op_benchmark(
-              sess,
-              math_ops.matmul(
-                  array_ops.broadcast_to(matrix_a, broadcasted_a_shape),
-                  array_ops.broadcast_to(matrix_b, broadcasted_b_shape)),
-              min_iters=50,
-              name="batch_matmul_manual_broadcast_cpu_{}_{}".format(
-                  a_shape, b_shape))
+        # Manually broadcast the input matrices using the broadcast_to op.
+        broadcasted_batch_shape = array_ops.broadcast_static_shape(
+            matrix_a.shape[:-2], matrix_b.shape[:-2])
+        broadcasted_a_shape = broadcasted_batch_shape.concatenate(
+            matrix_a.shape[-2:])
+        broadcasted_b_shape = broadcasted_batch_shape.concatenate(
+            matrix_b.shape[-2:])
+        self.run_op_benchmark(
+            sess,
+            math_ops.matmul(
+                array_ops.broadcast_to(matrix_a, broadcasted_a_shape),
+                array_ops.broadcast_to(matrix_b, broadcasted_b_shape)),
+            min_iters=50,
+            name="batch_matmul_manual_broadcast_cpu_{}_{}".format(
+                a_shape, b_shape))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/kernel_tests/bias_op_deterministic_test.py b/tensorflow/python/kernel_tests/bias_op_deterministic_test.py
index 7630f8f145c..738e6cf6fc6 100644
--- a/tensorflow/python/kernel_tests/bias_op_deterministic_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_deterministic_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from absl.testing import parameterized
diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
index 402c6f041e0..c5f58f1f6b2 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
@@ -46,6 +46,24 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
         axis=2)
     return stats_summary
 
+  def add_f_dim_and_append_zeros(self, stats_summaries):
+    """Transform a list of stats summaries, adding a feature dimension.
+
+    The input shape is a list of arrays of shape [max_splits, num_buckets,
+    logits+hess dim]. This transformation returns a list of arrays of shape
+    [max_splits, 1, num_buckets + 1, logits+hess dim].
+
+    Args:
+      stats_summaries: a list of numpy arrays.
+
+    Returns:
+      A list of numpy arrays.
+    """
+    return [
+        self._append_zeros_for_default_bucket(np.expand_dims(feature, axis=1))
+        for feature in stats_summaries
+    ]
+
   def _get_stats_summary_for_split(self):
     return [
         [
@@ -160,7 +178,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllClose([[0.833333], [0.8]], right_node_contribs)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestGainsWithoutRegularization(self):
+  def testCalculateBestGainsWithoutRegularization_v1_op(self):
     """Testing Gain calculation without any regularization."""
     with self.cached_session() as sess:
       max_splits = 7
@@ -189,19 +207,40 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose([[[-.592593], [-.75]], [[-.076923], [.568966]]],
                           self.evaluate(right_node_contribs_list))
 
-  def testCalculateBestMultiDimFeatureSplitsWithoutRegularization(self):
+  def testCalculateBestFeaturesInvalidSplitType_v2_op(self):
     """Testing best split calculation without any regularization."""
+    candidate_feature_ids = [9, 12]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
-    stats_summary = self._append_zeros_for_default_bucket(stats_summary)
+    stats_summaries = self._get_stats_summary_for_split()
+    stats_summaries = self.add_f_dim_and_append_zeros(stats_summaries)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
+    with self.assertRaisesRegexp(Exception, 'Incorrect split type'):
+      self.evaluate(
+          boosted_trees_ops.calculate_best_feature_split_v2(
+              node_id_range,
+              stats_summaries,
+              split_types=['INVALID'] * len(candidate_feature_ids),
+              candidate_feature_ids=candidate_feature_ids,
+              l1=0.0,
+              l2=0.0,
+              tree_complexity=0.0,
+              min_node_weight=0,
+              logits_dimension=1))
+
+  def testCalculateBestFeaturesWithoutRegularization_v2_op(self):
+    """Testing best split calculation without any regularization."""
+    candidate_feature_ids = [9, 12]
+    node_id_range = [1, 3]  # node 1 through 2 will be processed.
+    stats_summaries = self._get_stats_summary_for_split()
+    stats_summaries = self.add_f_dim_and_append_zeros(stats_summaries)
+
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
              node_id_range,
-             stats_summary,
+             stats_summaries,
+             split_types=['inequality'] * len(candidate_feature_ids),
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -209,10 +248,47 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
              logits_dimension=1))
 
     # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
-    # feature dimension that has the best gain.
+    # feature_id and dimension that has the best gain per node.
     self.assertAllEqual([1, 2], node_ids)
     self.assertAllClose([0.02823, 0.41184], gains)
     self.assertAllEqual([1, 1], thresholds)
+    self.assertAllEqual([12, 9], feature_ids)
+    f_dim = 0  # Both features only have one dimension.
+    self.assertAllEqual([f_dim] * 2, feature_dimensions)
+    # The left node contrib will be later added to the previous node value to
+    # make the left node value, and the same for right node contrib.
+    self.assertAllClose([[-.6], [.568966]], left_node_contribs)
+    self.assertAllClose([[-.076923], [-.75]], right_node_contribs)
+    self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
+
+  def testCalculateBestMultiDimFeatureSplitsWithoutRegularization_v2_op(self):
+    """Testing best split without any regularization for a multi-dim feature."""
+    candidate_feature_ids = [4]
+    node_id_range = [1, 3]  # node 1 through 2 will be processed.
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
+    stats_summary = self._append_zeros_for_default_bucket(stats_summary)
+
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.0,
+             l2=0.0,
+             tree_complexity=0.0,
+             min_node_weight=0,
+             logits_dimension=1))
+
+    # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
+    # feature_id and dimension that has the best gain per node.
+    self.assertAllEqual([1, 2], node_ids)
+    self.assertAllClose([0.02823, 0.41184], gains)
+    self.assertAllEqual([1, 1], thresholds)
+    self.assertAllEqual([4, 4], feature_ids)
     self.assertAllEqual([1, 0], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
@@ -220,18 +296,22 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllClose([[-.076923], [-.75]], right_node_contribs)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestMultiDimFeatureSplitWMissingValuesWORegularization(self):
+  def testCalculateBestMultiDimFeatureSplitWMissingValuesWORegularization_v2_op(
+      self):
     """Testing best split calculation without any regularization."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -242,39 +322,44 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     # feature dimension that has the best gain.
     self.assertAllEqual([1, 2], node_ids)
     self.assertAllClose([0.116495, 0.60429], gains)
-    self.assertAllEqual([1, 1], thresholds)
+    self.assertAllEqual([4, 4], feature_ids)
     self.assertAllEqual([1, 1], feature_dimensions)
+    self.assertAllEqual([1, 1], thresholds)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
     self.assertAllClose([[-0.631579], [-0.770833]], left_node_contribs)
     self.assertAllClose([[0.833333], [0.8]], right_node_contribs)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestMultiDimFeatureEqualitySplitsWithoutRegularization(self):
+  def testCalculateBestMultiDimFeatureEqualitySplitsWithoutRegularization_v2_op(
+      self):
     """Testing best split calculation without any regularization."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['equality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
              min_node_weight=0,
-             logits_dimension=1,
-             split_type='equality'))
+             logits_dimension=1))
 
     self.assertAllEqual([1, 2], node_ids)
     # 0.116495 = (-0.05)^2/0.06 + 0.36^2/0.57 - 0.31^2/0.63
     # 0.60429 = (-0.4)^2/0.5 + 0.37^2/0.48 - 0.03^2/0.98
     self.assertAllClose([0.116495, 0.60429], gains)
-    self.assertAllEqual([2, 2], thresholds)
+    self.assertAllEqual([4, 4], feature_ids)
     self.assertAllEqual([1, 1], feature_dimensions)
+    self.assertAllEqual([2, 2], thresholds)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
     # left contrib 0.83 = 0.05/0.06, 0.8 = 0.4/0.5
@@ -283,7 +368,48 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllClose([[-0.631579], [-0.770833]], right_node_contribs)
     self.assertAllEqual([_EQUALITY_DEFAULT_RIGHT] * 2, split_types)
 
-  def testCalculateBestGainsWithL2(self):
+  def testCalculateBestMultiDimFeatureMixedSplitTypeWithoutRegularization_v2_op(
+      self):
+    """Testing best split calculation without any regularization."""
+    candidate_feature_ids = [9, 12]
+    node_id_range = [1, 3]  # node 1 through 2 will be processed.
+    stats_summaries = self._get_stats_summary_for_split()
+    # Add in feature dimension.
+    stats_summaries = [
+        np.expand_dims(feature, axis=1) for feature in stats_summaries
+    ]
+
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range,
+             stats_summaries,
+             split_types=['inequality', 'equality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.0,
+             l2=0.0,
+             tree_complexity=0.0,
+             min_node_weight=0,
+             logits_dimension=1))
+
+    self.assertAllEqual([1, 2], node_ids)
+    # 0.116495 = (-0.05)^2/0.06 + 0.36^2/0.57 - 0.31^2/0.63
+    # 0.60429 = (-0.4)^2/0.5 + 0.37^2/0.48 - 0.03^2/0.98
+    self.assertAllClose([0.116495, 0.60429], gains)
+    self.assertAllEqual([12, 12], feature_ids)
+    f_dim = 0  # Both features only have one dimension.
+    self.assertAllEqual([f_dim, f_dim], feature_dimensions)
+    self.assertAllEqual([2, 2], thresholds)
+    # Same result as equality only test, as feature_1 is chose for both nodes.
+    # left contrib 0.83 = 0.05/0.06, 0.8 = 0.4/0.5
+    self.assertAllClose([[0.833333], [.8]], left_node_contribs)
+    # right contrib -0.6315 = -0.36/0.57, -0.7708 = -0.37/0.48
+    self.assertAllClose([[-0.631579], [-0.770833]], right_node_contribs)
+    # Feature 1 is inequality.
+    self.assertAllEqual([_EQUALITY_DEFAULT_RIGHT, _EQUALITY_DEFAULT_RIGHT],
+                        split_types)
+
+  def testCalculateBestGainsWithL2_v1_op(self):
     """Testing Gain calculation with L2."""
     with self.cached_session() as sess:
       max_splits = 7
@@ -312,19 +438,22 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
                           self.evaluate(right_node_contribs_list))
 
-  def testCalculateMultiDimBestFeatureSplitsWithL2(self):
+  def testCalculateMultiDimBestFeatureSplitsWithL2_v2_op(self):
     """Testing best split calculation with L2."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
     stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.1,
              tree_complexity=0.0,
@@ -334,27 +463,31 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
     # feature dimension that has the best gain.
     self.assertAllEqual([1, 2], node_ids)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 0], feature_dimensions)
     self.assertAllClose([0.01879096, 0.33931375], gains)
     self.assertAllEqual([1, 1], thresholds)
-    self.assertAllEqual([1, 0], feature_dimensions)
     # # The left node contrib will be later added to the previous node value to
     # # make the left node value, and the same for right node contrib.
     self.assertAllClose([[-.5], [.485294]], left_node_contribs)
     self.assertAllClose([[-.043478], [-.6]], right_node_contribs)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateMultiDimBestFeatureSplitsWithMissingValuesL2(self):
+  def testCalculateMultiDimBestFeatureSplitsWithMissingValuesL2_v2_op(self):
     """Testing best split calculation with L2."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.1,
              tree_complexity=0.0,
@@ -364,40 +497,44 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
     # feature dimension that has the best gain.
     self.assertAllEqual([1, 2], node_ids)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllClose([0.077414, 0.501868], gains)
     self.assertAllEqual([1, 1], thresholds)
-    self.assertAllEqual([1, 1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
     self.assertAllClose([[-0.537313], [-0.637931]], left_node_contribs)
     self.assertAllClose([[0.3125], [0.666667]], right_node_contribs)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateMultiDimBestFeatureEqualitySplitsWithL2(self):
+  def testCalculateMultiDimBestFeatureEqualitySplitsWithL2_v2_op(self):
     """Testing best split calculation with L2."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['equality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.1,
              tree_complexity=0.0,
              min_node_weight=0,
-             logits_dimension=1,
-             split_type='equality'))
+             logits_dimension=1))
 
     self.assertAllEqual([1, 2], node_ids)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 1], feature_dimensions)
     # 0.077414 = 0.05^2/0.16 + 0.36^2/0.67 - 0.31^2/0.73
     # 0.501868 = 0.4^2/0.6 + 0.37^2/0.58 - 0.03^2/1.08
     self.assertAllClose([0.077414, 0.501868], gains)
     self.assertAllEqual([2, 2], thresholds)
-    self.assertAllEqual([1, 1], feature_dimensions)
     # # The left node contrib will be later added to the previous node value to
     # # make the left node value, and the same for right node contrib.
     # left contrib 0.3125 = 0.05/0.16, 0.6667 = 0.4/0.6
@@ -434,7 +571,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT, _INEQUALITY_DEFAULT_LEFT],
                         split_types)
 
-  def testCalculateBestGainsWithL1(self):
+  def testCalculateBestGainsWithL1_v1_op(self):
     """Testing Gain calculation with L1."""
     with self.cached_session() as sess:
       max_splits = 7
@@ -466,22 +603,24 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]],
                           self.evaluate(gains_list))
 
-  def testCalculateBestMultiDimFeatureSplitsWithL1(self):
+  def testCalculateBestMultiDimFeatureSplitsWithL1_v2_op(self):
     """Testing best split calculation with L1."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
     stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
-    l1 = 0.1
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=l1,
-             l2=0.,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.1,
+             l2=0.0,
              tree_complexity=0.0,
              min_node_weight=0,
              logits_dimension=1))
@@ -489,29 +628,32 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
     # feature dimension that has the best gain.
     self.assertAllEqual([1, 2], node_ids)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 1], feature_dimensions)
     # Gain should also include an adjustment of the gradient by l1.
     self.assertAllClose([0.01, 0.191207], gains)
     self.assertAllEqual([1, 1], thresholds)
     self.assertAllClose([[-0.4], [-0.5]], left_node_contribs)
     self.assertAllClose([[0.], [0.396552]], right_node_contribs)
-    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestMultiDimFeatureSplitsWithMissingValuesL1(self):
+  def testCalculateBestMultiDimFeatureSplitsWithMissingValuesL1_v2_op(self):
     """Testing best split calculation with L1."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
 
-    l1 = 0.1
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=l1,
-             l2=0.,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.1,
+             l2=0.0,
              tree_complexity=0.0,
              min_node_weight=0,
              logits_dimension=1))
@@ -519,6 +661,8 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     # Get same result as v1 op (CalculateBestGainsPerFeature), and find the
     # feature dimension that has the best gain.
     self.assertAllEqual([1, 2], node_ids)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 1], feature_dimensions)
     # Gain should also include an adjustment of the gradient by l1.
     # (0.36-0.1)^2/0.57 + 0 - (0.31-0.1)^2/0.63 = 0.048597
     # (0.37-0.1)^2/0.48 + (-0.4+0.1)^2/0.5 = 0.331875
@@ -529,35 +673,37 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllClose([[-0.45614], [-0.5625]], left_node_contribs)
     # -(-0.4+0.1)/0.5 = 0.6
     self.assertAllClose([[0.], [0.6]], right_node_contribs)
-    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestMultiDimFeatureEqualitySplitsWithL1(self):
+  def testCalculateBestMultiDimFeatureEqualitySplitsWithL1_v2_op(self):
     """Testing best split calculation with L1."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
+    stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
-    l1 = 0.1
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=l1,
-             l2=0.,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['equality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.1,
+             l2=0.0,
              tree_complexity=0.0,
              min_node_weight=0,
-             logits_dimension=1,
-             split_type='equality'))
+             logits_dimension=1))
 
     self.assertAllEqual([1, 2], node_ids)
     # 0.048597 = 0 + 0.26^2/0.57 - 0.21^2/0.63
     # 0.501868 = 0.3^2/0.5 + 0.27^2/0.48 - 0
     self.assertAllClose([0.048597, 0.331875], gains)
-    self.assertAllEqual([2, 2], thresholds)
+    self.assertAllEqual([4, 4], feature_ids)
     self.assertAllEqual([1, 1], feature_dimensions)
+    self.assertAllEqual([2, 2], thresholds)
     # # The left node contrib will be later added to the previous node value to
     # # make the left node value, and the same for right node contrib.
     # left contrib 0 (-0.05>-0.1), 0.6 = 0.3/0.5
@@ -593,7 +739,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllClose([[0.0], [0.6]], right_node_contribs)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestGainsWithTreeComplexity(self):
+  def testCalculateBestGainsWithTreeComplexity_v1_op(self):
     """Testing best gain calculation with tree complexity."""
     with self.cached_session() as sess:
       max_splits = 7
@@ -626,24 +772,25 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
                           self.evaluate(right_node_contribs_list))
 
-  def testCalculateBestMultiDimFeatureSplitsWithTreeComplexity(self):
+  def testCalculateBestMultiDimFeatureSplitsWithTreeComplexity_v2_op(self):
     """Testing best split calculation with tree complexity."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
     stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
-    l2 = 0.1
-    tree_complexity = 3.
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=0.,
-             l2=l2,
-             tree_complexity=tree_complexity,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.0,
+             l2=0.1,
+             tree_complexity=3,
              min_node_weight=0,
              logits_dimension=1))
 
@@ -652,29 +799,32 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllEqual([1, 2], node_ids)
     # Gain should also include an adjustment of the gradient by l1.
     self.assertAllClose([-2.98120904, -2.66068625], gains)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 0], feature_dimensions)
     self.assertAllEqual([1, 1], thresholds)
     self.assertAllClose([[-0.5], [0.485294]], left_node_contribs)
     self.assertAllClose([[-0.043478], [-.6]], right_node_contribs)
-    self.assertAllEqual([1, 0], feature_dimensions)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestMultiDimFeatureSplitsWMissingValsTreeComplexity(self):
+  def testCalculateBestMultiDimFeatureSplitsWMissingValsTreeComplexity_v2_op(
+      self):
     """Testing best split calculation with tree complexity."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
 
-    l2 = 0.1
-    tree_complexity = 3.
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=0.,
-             l2=l2,
-             tree_complexity=tree_complexity,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.0,
+             l2=0.1,
+             tree_complexity=3,
              min_node_weight=0,
              logits_dimension=1))
 
@@ -683,38 +833,41 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllEqual([1, 2], node_ids)
     # Gain should also include an adjustment of the gradient by l1.
     self.assertAllClose([-2.922586, -2.498132], gains)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([1, 1], thresholds)
     self.assertAllClose([[-0.537313], [-0.637931]], left_node_contribs)
     self.assertAllClose([[0.3125], [0.666667]], right_node_contribs)
-    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestMultiDimFeatureEqualitySplitsWithTreeComplexity(self):
+  def testCalculateBestMultiDimFeatureEqualitySplitsWithTreeComplexity_v2_op(
+      self):
     """Testing best split calculation with tree complexity."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
-    stats_summary = np.asarray(self._get_stats_summary_for_split())
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
-    stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summaries = self._get_stats_summary_for_split()
+    # Convert from list of arrays to a single array and reshape to [max_splits,
+    # feature_dim, num_buckets, 2].
+    stats_summary = np.moveaxis(stats_summaries, 0, 1)
 
-    l2 = 0.1
-    tree_complexity = 3.
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=0.,
-             l2=l2,
-             tree_complexity=tree_complexity,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['equality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.0,
+             l2=0.1,
+             tree_complexity=3,
              min_node_weight=0,
-             logits_dimension=1,
-             split_type='equality'))
+             logits_dimension=1))
 
     self.assertAllEqual([1, 2], node_ids)
     # -2.922586 = 0.05^2/0.16 + 0.36^2/0.67 - 0.31^2/0.73 - 3
     # -2.498132 = 0.4^2/0.6 + 0.37^2/0.58 - 0.03^2/1.08 - 3
     self.assertAllClose([-2.922586, -2.498132], gains)
     self.assertAllEqual([2, 2], thresholds)
+    self.assertAllEqual([4, 4], feature_ids)
     self.assertAllEqual([1, 1], feature_dimensions)
     # # The left node contrib will be later added to the previous node value to
     # # make the left node value, and the same for right node contrib.
@@ -751,7 +904,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllClose([[0.3125], [0.666667]], right_node_contribs)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateBestGainsWithMinNodeWeight(self):
+  def testCalculateBestGainsWithMinNodeWeight_v1_op(self):
     """Testing Gain calculation with min node weight."""
     with self.cached_session() as sess:
       max_splits = 7
@@ -798,8 +951,9 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose([[[-0.75]], [[-0.014925]]],
                           self.evaluate(right_node_contribs_list))
 
-  def testCalculateMultiDimBestSplitsWithMinNodeWeight(self):
+  def testCalculateMultiDimBestSplitsWithMinNodeWeight_v2_op(self):
     """Testing best split calculation with min node weight."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     stats_summary = np.asarray([
         [
@@ -810,7 +964,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 0
+        ],  # f_dim 0
         [
             [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
             [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
@@ -819,34 +973,37 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 1
+        ],  # f_dim 1
     ])  # feature_dim * shape=[max_splits, num_buckets, 2]
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
+    # Reshape to [max_splits, feature_dim, num_buckets, 2].
     stats_summary = np.moveaxis(stats_summary, 0, 1)
     stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=0.,
-             l2=0.,
-             tree_complexity=0.,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.0,
+             l2=0.0,
+             tree_complexity=0.0,
              min_node_weight=1,
              logits_dimension=1))
 
     self.assertAllEqual([1, 2], node_ids)
     # Gain should also include an adjustment of the gradient by l1.
     self.assertAllClose([0.098013, 0.931596], gains)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([1, 1], thresholds)
     self.assertAllClose([[-.6], [-0.315789]], left_node_contribs)
     self.assertAllClose([[-0.014925], [2.53846]], right_node_contribs)
-    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
-  def testCalculateMultiDimBestSplitsWithMissingValuesMinNodeWeight(self):
+  def testCalculateMultiDimBestSplitsWithMissingValuesMinNodeWeight_v2_op(self):
     """Testing best split calculation with min node weight."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     stats_summary = np.asarray([
         [
@@ -857,7 +1014,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 0
+        ],  # f_dim 0
         [
             [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
             [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
@@ -866,29 +1023,31 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 1
+        ],  # f_dim 1
     ])  # feature_dim * shape=[max_splits, num_buckets, 2]
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
+    # Reshape to [max_splits, feature_dim, num_buckets, 2].
     stats_summary = np.moveaxis(stats_summary, 0, 1)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
-             l1=0.,
-             l2=0.,
-             tree_complexity=0.,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
+             l1=0.0,
+             l2=0.0,
+             tree_complexity=0.0,
              min_node_weight=1,
              logits_dimension=1))
 
     self.assertAllEqual([1, 2], node_ids)
     # Gain should also include an adjustment of the gradient by l1.
     self.assertAllClose([0.149398, 3.332075], gains)
+    self.assertAllEqual([4, 4], feature_ids)
+    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([1, 1], thresholds)
     self.assertAllClose([[-0.631579], [-0.359223]], left_node_contribs)
     self.assertAllClose([[0.083333], [7.999989]], right_node_contribs)
-    self.assertAllEqual([1, 1], feature_dimensions)
     self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
 
   def testSparseCalculateBestSplitsWithMinNodeWeight(self):
@@ -942,7 +1101,8 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self.assertAllEqual([_INEQUALITY_DEFAULT_RIGHT, _INEQUALITY_DEFAULT_LEFT],
                         split_types)
 
-  def testCalculateBestGainsWithMinNodeWeightNoSplitOnFeturePossible(self):
+  def testCalculateBestGainsWithMinNodeWeightNoSplitOnFeaturePossible_v1_op(
+      self):
     """Testing Gain calculation without any regularization."""
     with self.cached_session() as sess:
       max_splits = 7
@@ -995,8 +1155,10 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
            max_splits=max_splits)
       self.assertAllEqual([[], []], self.evaluate(node_ids_list))
 
-  def testCalculateBestMultiDimFeatureSplitsWithNoSplitOnFeaturePossible(self):
+  def testCalculateBestMultiDimFeatureSplitsWithNoSplitOnFeaturePossible_v2_op(
+      self):
     """Testing best split calculation with min node weight and no split."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     stats_summary = np.asarray([
         [
@@ -1007,7 +1169,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 0
+        ],  # f_dim 0
         [
             [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
             [[0., 0.], [.3, .5], [-.05, .06], [.06, .7]],  # node 1
@@ -1016,29 +1178,32 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 1
+        ],  # f_dim 1
     ])  # feature_dim * shape=[max_splits, num_buckets, 2]
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
+    # Reshape to [max_splits, feature_dim, num_buckets, 2].
     stats_summary = np.moveaxis(stats_summary, 0, 1)
+    stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
-    (node_ids, _, _, _, _, _,
-     _) = boosted_trees_ops.calculate_best_feature_split(
-         node_id_range,
-         stats_summary,
+    (node_ids, _, _, _, _, _, _,
+     _) = boosted_trees_ops.calculate_best_feature_split_v2(
+         node_id_range, [stats_summary],
+         split_types=['inequality'],
+         candidate_feature_ids=candidate_feature_ids,
          l1=0.0,
          l2=0.0,
          tree_complexity=0.0,
          min_node_weight=1,
          logits_dimension=1)
 
-    # We can't split either of the nodes on the first feature
+    # We can't split either of the nodes on the first feature.
     self.assertAllEqual([1], node_ids)
 
-    # Now check when we can't split on any feature
-    (node_ids, _, _, _, _, _,
-     _) = boosted_trees_ops.calculate_best_feature_split(
-         node_id_range,
-         stats_summary,
+    # Now check when we can't split on any feature.
+    (node_ids, _, _, _, _, _, _,
+     _) = boosted_trees_ops.calculate_best_feature_split_v2(
+         node_id_range, [stats_summary],
+         split_types=['inequality'],
+         candidate_feature_ids=candidate_feature_ids,
          l1=0.0,
          l2=0.0,
          tree_complexity=0.0,
@@ -1046,8 +1211,10 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
          logits_dimension=1)
     self.assertAllEqual([], node_ids)
 
-  def testCalculateBestMultiDimFeatureEqualitySplitsWithNoSplitPossible(self):
+  def testCalculateBestMultiDimFeatureEqualitySplitsWithNoSplitPossible_v2_op(
+      self):
     """Testing best split calculation with min node weight and no split."""
+    candidate_feature_ids = [4]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     stats_summary = np.asarray([
         [
@@ -1058,7 +1225,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 0
+        ],  # f_dim 0
         [
             [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
             [[0., 0.], [.3, .5], [-.05, .06], [.06, .7]],  # node 1
@@ -1067,30 +1234,31 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
             [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
-        ],  # feature 1
+        ],  # f_dim 1
     ])  # feature_dim * shape=[max_splits, num_buckets, 2]
-    # reshape to [max_splits, feature_dim, num_buckets, 2]
+    # Reshape to [max_splits, feature_dim, num_buckets, 2].
     stats_summary = np.moveaxis(stats_summary, 0, 1)
 
-    (node_ids, _, _, _, _, _,
-     _) = boosted_trees_ops.calculate_best_feature_split(
-         node_id_range,
-         stats_summary,
+    (node_ids, _, _, _, _, _, _,
+     _) = boosted_trees_ops.calculate_best_feature_split_v2(
+         node_id_range, [stats_summary],
+         split_types=['equality'],
+         candidate_feature_ids=candidate_feature_ids,
          l1=0.0,
          l2=0.0,
          tree_complexity=0.0,
          min_node_weight=1,
-         logits_dimension=1,
-         split_type='equality')
+         logits_dimension=1)
 
     # We can't split either of the nodes on the first feature
     self.assertAllEqual([1], node_ids)
 
     # Now check when we can't split on any feature
-    (node_ids, _, _, _, _, _,
-     _) = boosted_trees_ops.calculate_best_feature_split(
-         node_id_range,
-         stats_summary,
+    (node_ids, _, _, _, _, _, _,
+     _) = boosted_trees_ops.calculate_best_feature_split_v2(
+         node_id_range, [stats_summary],
+         split_types=['equality'],
+         candidate_feature_ids=candidate_feature_ids,
          l1=0.0,
          l2=0.0,
          tree_complexity=0.0,
@@ -1502,8 +1670,8 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     self._verify_precision(length=50000000)
 
 
-class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
-  """Tests multi-class/multi-regression for best splits."""
+class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest):
+  """Tests multi-class/multi-regression for best splits using V2 op."""
 
   logits_dim = 2
 
@@ -1566,6 +1734,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsSingleClassVsMultiClass(self):
     """Testing same results using same grads/hess with both single and multi."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
 
     # Build same stats summary in single class and multi-class form (using
@@ -1589,23 +1758,25 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     # [max_splits, feature_dim, num_buckets, 4]
     diag_stats_summary = self._add_feature_dim(diag_stats_summary)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
              min_node_weight=0,
              logits_dimension=1))
 
-    (diag_node_ids, diag_gains, diag_feature_dimensions, diag_thresholds,
-     diag_left_node_contribs, diag_right_node_contribs,
+    (diag_node_ids, diag_gains, diag_feature_ids, diag_feature_dimensions,
+     diag_thresholds, diag_left_node_contribs, diag_right_node_contribs,
      diag_split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             diag_stats_summary,
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [diag_stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -1614,8 +1785,9 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
     self.assertAllEqual(node_ids, diag_node_ids)
     self.assertAllClose(gains, diag_gains)
-    self.assertAllEqual(thresholds, diag_thresholds)
+    self.assertAllEqual(feature_ids, diag_feature_ids)
     self.assertAllEqual(feature_dimensions, diag_feature_dimensions)
+    self.assertAllEqual(thresholds, diag_thresholds)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
     zeros = np.zeros_like(left_node_contribs)
@@ -1629,6 +1801,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsDiagonalVsFull(self):
     """Test results are same using diagonal hessian and full hessian."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
 
     # Build same stats summary in diagonal and full hessian form, respectively.
@@ -1651,24 +1824,26 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     ]
     # [max_splits, feature_dim, num_buckets, logits_dim + logits_dim**2]
     full_stats_summary = self._add_feature_dim(full_stats_summary)
-    (diag_node_ids, diag_gains, diag_feature_dimensions, diag_thresholds,
-     diag_left_node_contribs, diag_right_node_contribs,
+    (diag_node_ids, diag_gains, diag_feature_ids, diag_feature_dimensions,
+     diag_thresholds, diag_left_node_contribs, diag_right_node_contribs,
      diag_split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             diag_stats_summary,
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [diag_stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
              min_node_weight=0,
              logits_dimension=self.logits_dim))
 
-    (full_node_ids, full_gains, full_feature_dimensions, full_thresholds,
-     full_left_node_contribs, full_right_node_contribs,
+    (full_node_ids, full_gains, full_feature_ids, full_feature_dimensions,
+     full_thresholds, full_left_node_contribs, full_right_node_contribs,
      full_split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             full_stats_summary,
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [full_stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -1677,8 +1852,9 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
     self.assertAllEqual(diag_node_ids, full_node_ids)
     self.assertAllClose(diag_gains, full_gains)
-    self.assertAllEqual(diag_thresholds, full_thresholds)
+    self.assertAllEqual(diag_feature_ids, full_feature_ids)
     self.assertAllEqual(diag_feature_dimensions, full_feature_dimensions)
+    self.assertAllEqual(diag_thresholds, full_thresholds)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
     self.assertAllClose(diag_left_node_contribs, full_left_node_contribs)
@@ -1687,16 +1863,18 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsWithoutRegularization(self):
     """Testing best split calculation without any regularization."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     # [max_splits, feature_dim, num_buckets, 2*logits_dim]
     stats_summary = self._get_stats_summary_for_split_diagonal_hessian()
     stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -1706,6 +1884,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     self.assertAllEqual([1, 2], node_ids)
     self.assertAllClose([0.912981, 1.446218], gains)
     self.assertAllEqual([2, 1], thresholds)
+    self.assertAllEqual([14, 14], feature_ids)
     self.assertAllEqual([0, 1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
@@ -1717,15 +1896,17 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsWMissingValuesWoRegularization(self):
     """Testing best split calculation without any regularization."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     # [max_splits, feature_dim, num_buckets, 2*logits_dim]
     stats_summary = self._get_stats_summary_for_split_diagonal_hessian()
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -1735,6 +1916,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     self.assertAllEqual([1, 2], node_ids)
     self.assertAllClose([0.912981, 2.79444], gains)
     self.assertAllEqual([0, 1], thresholds)
+    self.assertAllEqual([14, 14], feature_ids)
     self.assertAllEqual([0, 1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
@@ -1746,17 +1928,19 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsWithL2(self):
     """Testing best split calculation inith L2 regularization."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     # [max_splits, feature_dim, num_buckets, 2*logits_dim]
     stats_summary = self._get_stats_summary_for_split_diagonal_hessian()
     stats_summary = self._append_zeros_for_default_bucket(stats_summary)
 
     l2 = 0.1
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=l2,
              tree_complexity=0.0,
@@ -1766,6 +1950,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     self.assertAllEqual([1, 2], node_ids)
     self.assertAllClose([0.475669, 1.009791], gains)
     self.assertAllEqual([1, 1], thresholds)
+    self.assertAllEqual([14, 14], feature_ids)
     self.assertAllEqual([0, 1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
@@ -1777,16 +1962,18 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsWithMissingValuesL2(self):
     """Testing best split calculation inith L2 regularization."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     # [max_splits, feature_dim, num_buckets, 2*logits_dim]
     stats_summary = self._get_stats_summary_for_split_diagonal_hessian()
 
     l2 = 0.1
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=l2,
              tree_complexity=0.0,
@@ -1796,6 +1983,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     self.assertAllEqual([1, 2], node_ids)
     self.assertAllClose([0.475669, 3.467833], gains)
     self.assertAllEqual([1, 0], thresholds)
+    self.assertAllEqual([14, 14], feature_ids)
     self.assertAllEqual([0, 1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
@@ -1808,15 +1996,17 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsWithMinNodeWeight(self):
     """Testing best split calculation with min_node_weight."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     # [max_splits, feature_dim, num_buckets, 2*logits_dim]
     stats_summary = self._get_stats_summary_for_split_diagonal_hessian()
 
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -1827,6 +2017,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     self.assertAllEqual([1, 2], node_ids)
     self.assertAllClose([0.912981, 2.79444], gains)
     self.assertAllEqual([0, 1], thresholds)
+    self.assertAllEqual([14, 14], feature_ids)
     self.assertAllEqual([0, 1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
@@ -1838,17 +2029,19 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsWithTreeComplexity(self):
     """Testing best split calculation with tree complexity."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     # [max_splits, feature_dim, num_buckets, 2*logits_dim]
     stats_summary = self._get_stats_summary_for_split_diagonal_hessian()
 
     l2 = 0.1
     tree_complexity = 3.
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=l2,
              tree_complexity=tree_complexity,
@@ -1860,6 +2053,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     # L2 test result, but subtracted by tree_complexity.
     self.assertAllClose([-2.524331, 0.467833], gains)
     self.assertAllEqual([1, 0], thresholds)
+    self.assertAllEqual([14, 14], feature_ids)
     self.assertAllEqual([0, 1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
@@ -1872,16 +2066,18 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
 
   def testCalculateBestFeatureSplitsWithMinNodeNoSplitOnFeaturePossible(self):
     """Test when parent node hessian doesn't meet min node weight."""
+    candidate_feature_ids = [14]
     node_id_range = [1, 3]  # node 1 through 2 will be processed.
     # [max_splits, feature_dim, num_buckets, 2*logits_dim]
     stats_summary = self._get_stats_summary_for_split_diagonal_hessian()
 
     min_node_weight = 0.8
-    (node_ids, gains, feature_dimensions, thresholds, left_node_contribs,
-     right_node_contribs, split_types) = self.evaluate(
-         boosted_trees_ops.calculate_best_feature_split(
-             node_id_range,
-             stats_summary,
+    (node_ids, gains, feature_ids, feature_dimensions, thresholds,
+     left_node_contribs, right_node_contribs, split_types) = self.evaluate(
+         boosted_trees_ops.calculate_best_feature_split_v2(
+             node_id_range, [stats_summary],
+             split_types=['inequality'],
+             candidate_feature_ids=candidate_feature_ids,
              l1=0.0,
              l2=0.0,
              tree_complexity=0.0,
@@ -1892,6 +2088,7 @@ class BestMultiDimFeatureSplitMultiClass(StatsOpsTest):
     self.assertAllEqual([2], node_ids)
     self.assertAllClose([2.79444], gains)
     self.assertAllEqual([1], thresholds)
+    self.assertAllEqual([14], feature_ids)
     self.assertAllEqual([1], feature_dimensions)
     # The left node contrib will be later added to the previous node value to
     # make the left node value, and the same for right node contrib.
diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 08a940804d3..47f392d7438 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import time
+
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
index a6746185680..305480c6b4b 100644
--- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py
+++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 36eb854950c..b4abcfa3a45 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -363,7 +363,7 @@ class Conv2DTest(test.TestCase):
         tf_logging.debug("actual = %s", value)
         tol_to_use = fp16_tol if value.dtype == np.float16 else tol
         if np.issubdtype(value.dtype, np.integer):
-          self.assertAllEqual(expected, np.ravel(value))
+          self.assertAllEqual(np.rint(expected), np.ravel(value))
         else:
           self.assertAllClose(expected, np.ravel(value), atol=tol_to_use,
                               rtol=tol_to_use)
@@ -2659,7 +2659,7 @@ class SeparableConv2DTest(test.TestCase):
 
       value = self.evaluate(conv)
     tf_logging.debug("value = %s", value)
-    self.assertArrayNear(expected, np.ravel(value), 1e-3)
+    self.assertArrayNear(expected, np.ravel(value), 2e-3)
     self.assertShapeEqual(value, conv)
 
   def _testSeparableConv2D(self, data_format):
diff --git a/tensorflow/python/kernel_tests/cudnn_deterministic_base.py b/tensorflow/python/kernel_tests/cudnn_deterministic_base.py
index 2b526f0ec6b..289cc393042 100644
--- a/tensorflow/python/kernel_tests/cudnn_deterministic_base.py
+++ b/tensorflow/python/kernel_tests/cudnn_deterministic_base.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import numpy as np
 
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/python/kernel_tests/diag_op_test.py b/tensorflow/python/kernel_tests/diag_op_test.py
index e642751b494..f41c4375d07 100644
--- a/tensorflow/python/kernel_tests/diag_op_test.py
+++ b/tensorflow/python/kernel_tests/diag_op_test.py
@@ -21,7 +21,6 @@ import itertools
 
 import numpy as np
 
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
 from tensorflow.python.framework import ops
@@ -33,15 +32,6 @@ from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
 
-# LINT.IfChange
-matrix_diag_v3_forward_compat_date = (2019, 12, 6)
-# LINT.ThenChange(
-#   //tensorflow/compiler/tests/matrix_diag_ops_test.py,
-#   //tensorflow/python/ops/array_ops.py,
-#   //tensorflow/python/ops/parallel_for/array_test.py
-# )
-
-
 default_v2_alignment = "LEFT_LEFT"
 alignment_list = ["RIGHT_LEFT", "LEFT_RIGHT", "LEFT_LEFT", "RIGHT_RIGHT"]
 
@@ -391,21 +381,20 @@ class MatrixDiagTest(test.TestCase):
       self.assertEqual((3, 3), v_diag.get_shape())
       self.assertAllEqual(v_diag.eval(), mat)
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # {Sub,Super}diagonals.
-        for offset in [1, -2, 5]:
-          mat = np.diag(v, offset)
-          v_diag = array_ops.matrix_diag(v, k=offset)
-          self.assertEqual(mat.shape, v_diag.get_shape())
-          self.assertAllEqual(v_diag.eval(), mat)
+      # {Sub,Super}diagonals.
+      for offset in [1, -2, 5]:
+        mat = np.diag(v, offset)
+        v_diag = array_ops.matrix_diag(v, k=offset)
+        self.assertEqual(mat.shape, v_diag.get_shape())
+        self.assertAllEqual(v_diag.eval(), mat)
 
-        # Diagonal bands.
-        for align in alignment_list:
-          for _, tests in [self._moreCases(align), square_cases(align)]:
-            for diags, (vecs, solution) in tests.items():
-              v_diags = array_ops.matrix_diag(vecs[0], k=diags, align=align)
-              self.assertEqual(v_diags.get_shape(), solution[0].shape)
-              self.assertAllEqual(v_diags.eval(), solution[0])
+      # Diagonal bands.
+      for align in alignment_list:
+        for _, tests in [self._moreCases(align), square_cases(align)]:
+          for diags, (vecs, solution) in tests.items():
+            v_diags = array_ops.matrix_diag(vecs[0], k=diags, align=align)
+            self.assertEqual(v_diags.get_shape(), solution[0].shape)
+            self.assertAllEqual(v_diags.eval(), solution[0])
 
   def _testVectorBatch(self, dtype):
     with self.cached_session(use_gpu=True):
@@ -417,31 +406,30 @@ class MatrixDiagTest(test.TestCase):
       self.assertEqual((2, 3, 3), v_batch_diag.get_shape())
       self.assertAllEqual(v_batch_diag.eval(), mat_batch)
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # {Sub,Super}diagonals.
-        for offset in [1, -2, 5]:
-          v_batch_diag = array_ops.matrix_diag(v_batch, k=offset)
-          mats = [
-              np.diag(v_batch[i], offset) for i in range(0, v_batch.shape[0])
-          ]
-          mat_batch = np.stack(mats, axis=0)
-          self.assertEqual(mat_batch.shape, v_batch_diag.get_shape())
-          self.assertAllEqual(v_batch_diag.eval(), mat_batch)
+      # {Sub,Super}diagonals.
+      for offset in [1, -2, 5]:
+        v_batch_diag = array_ops.matrix_diag(v_batch, k=offset)
+        mats = [
+            np.diag(v_batch[i], offset) for i in range(0, v_batch.shape[0])
+        ]
+        mat_batch = np.stack(mats, axis=0)
+        self.assertEqual(mat_batch.shape, v_batch_diag.get_shape())
+        self.assertAllEqual(v_batch_diag.eval(), mat_batch)
 
-        # Diagonal bands with padding_value.
-        for padding_value, align in zip_to_first_list_length([0, 555, -11],
-                                                             alignment_list):
-          for _, tests in [self._moreCases(align), square_cases(align)]:
-            for diags, (vecs, solution) in tests.items():
-              v_diags = array_ops.matrix_diag(
-                  vecs.astype(dtype),
-                  k=diags,
-                  padding_value=padding_value,
-                  align=align)
-              mask = solution == 0
-              solution = (solution + padding_value * mask).astype(dtype)
-              self.assertEqual(v_diags.get_shape(), solution.shape)
-              self.assertAllEqual(v_diags.eval(), solution)
+      # Diagonal bands with padding_value.
+      for padding_value, align in zip_to_first_list_length([0, 555, -11],
+                                                           alignment_list):
+        for _, tests in [self._moreCases(align), square_cases(align)]:
+          for diags, (vecs, solution) in tests.items():
+            v_diags = array_ops.matrix_diag(
+                vecs.astype(dtype),
+                k=diags,
+                padding_value=padding_value,
+                align=align)
+            mask = solution == 0
+            solution = (solution + padding_value * mask).astype(dtype)
+            self.assertEqual(v_diags.get_shape(), solution.shape)
+            self.assertAllEqual(v_diags.eval(), solution)
 
   @test_util.run_deprecated_v1
   def testVectorBatch(self):
@@ -453,100 +441,99 @@ class MatrixDiagTest(test.TestCase):
 
   @test_util.run_deprecated_v1
   def testRectangularBatch(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      with self.cached_session(use_gpu=True):
-        # Stores expected num_rows and num_cols (when the other is given).
-        # expected[d_lower, d_upper] = (expected_num_rows, expected_num_cols)
-        test_list = list()
+    with self.cached_session(use_gpu=True):
+      # Stores expected num_rows and num_cols (when the other is given).
+      # expected[d_lower, d_upper] = (expected_num_rows, expected_num_cols)
+      test_list = list()
 
-        # Square cases:
-        expected = {
-            (-1, -1): (5, 4),
-            (-4, -3): (5, 2),
-            (-2, 1): (5, 5),
-            (2, 4): (3, 5),
-        }
-        # Do not change alignment yet. Re-alignment needs to happen after the
-        # solution shape is updated.
-        test_list.append((expected, square_cases()))
+      # Square cases:
+      expected = {
+          (-1, -1): (5, 4),
+          (-4, -3): (5, 2),
+          (-2, 1): (5, 5),
+          (2, 4): (3, 5),
+      }
+      # Do not change alignment yet. Re-alignment needs to happen after the
+      # solution shape is updated.
+      test_list.append((expected, square_cases()))
 
-        # More cases:
-        expected = {(-3, -1): (5, 4), (-1, 1): (4, 4), (2, 4): (4, 6)}
-        test_list.append((expected, self._moreCases()))
+      # More cases:
+      expected = {(-3, -1): (5, 4), (-1, 1): (4, 4), (2, 4): (4, 6)}
+      test_list.append((expected, self._moreCases()))
 
-        # Tall cases
-        expected = {
-            (0, 0): (3, 3),
-            (-4, -3): (5, 2),
-            (-2, -1): (4, 3),
-            (-2, 1): (3, 3),
-            (1, 2): (2, 3)
-        }
-        test_list.append((expected, tall_cases()))
+      # Tall cases
+      expected = {
+          (0, 0): (3, 3),
+          (-4, -3): (5, 2),
+          (-2, -1): (4, 3),
+          (-2, 1): (3, 3),
+          (1, 2): (2, 3)
+      }
+      test_list.append((expected, tall_cases()))
 
-        # Fat cases
-        expected = {
-            (2, 2): (2, 4),
-            (-2, 0): (3, 3),
-            (-1, 1): (3, 3),
-            (0, 3): (3, 3)
-        }
-        test_list.append((expected, fat_cases()))
+      # Fat cases
+      expected = {
+          (2, 2): (2, 4),
+          (-2, 0): (3, 3),
+          (-1, 1): (3, 3),
+          (0, 3): (3, 3)
+      }
+      test_list.append((expected, fat_cases()))
 
-        for padding_value, align in zip_to_first_list_length([0, 555, -11],
-                                                             alignment_list):
-          # Giving both num_rows and num_cols
-          for _, tests in [tall_cases(align), fat_cases(align)]:
-            for diags, (vecs, solution) in tests.items():
-              v_diags = array_ops.matrix_diag(
-                  vecs,
-                  k=diags,
-                  num_rows=solution.shape[-2],
-                  num_cols=solution.shape[-1],
-                  padding_value=padding_value,
-                  align=align)
-              mask = solution == 0
-              solution = solution + padding_value * mask
-              self.assertEqual(v_diags.get_shape(), solution.shape)
-              self.assertAllEqual(v_diags.eval(), solution)
+      for padding_value, align in zip_to_first_list_length([0, 555, -11],
+                                                           alignment_list):
+        # Giving both num_rows and num_cols
+        for _, tests in [tall_cases(align), fat_cases(align)]:
+          for diags, (vecs, solution) in tests.items():
+            v_diags = array_ops.matrix_diag(
+                vecs,
+                k=diags,
+                num_rows=solution.shape[-2],
+                num_cols=solution.shape[-1],
+                padding_value=padding_value,
+                align=align)
+            mask = solution == 0
+            solution = solution + padding_value * mask
+            self.assertEqual(v_diags.get_shape(), solution.shape)
+            self.assertAllEqual(v_diags.eval(), solution)
 
-          # Giving just num_rows.
-          for expected, (_, tests) in test_list:
-            for diags, (_, new_num_cols) in expected.items():
-              vecs, solution = tests[diags]
-              solution = solution.take(indices=range(new_num_cols), axis=-1)
-              # Repacks the diagonal input according to the new solution shape.
-              vecs = repack_diagonals(
-                  vecs, diags, solution.shape[-2], new_num_cols, align=align)
-              v_diags = array_ops.matrix_diag(
-                  vecs,
-                  k=diags,
-                  num_rows=solution.shape[-2],
-                  padding_value=padding_value,
-                  align=align)
-              mask = solution == 0
-              solution = solution + padding_value * mask
-              self.assertEqual(v_diags.get_shape(), solution.shape)
-              self.assertAllEqual(v_diags.eval(), solution)
+        # Giving just num_rows.
+        for expected, (_, tests) in test_list:
+          for diags, (_, new_num_cols) in expected.items():
+            vecs, solution = tests[diags]
+            solution = solution.take(indices=range(new_num_cols), axis=-1)
+            # Repacks the diagonal input according to the new solution shape.
+            vecs = repack_diagonals(
+                vecs, diags, solution.shape[-2], new_num_cols, align=align)
+            v_diags = array_ops.matrix_diag(
+                vecs,
+                k=diags,
+                num_rows=solution.shape[-2],
+                padding_value=padding_value,
+                align=align)
+            mask = solution == 0
+            solution = solution + padding_value * mask
+            self.assertEqual(v_diags.get_shape(), solution.shape)
+            self.assertAllEqual(v_diags.eval(), solution)
 
-          # Giving just num_cols.
-          for expected, (_, tests) in test_list:
-            for diags, (new_num_rows, _) in expected.items():
-              vecs, solution = tests[diags]
-              solution = solution.take(indices=range(new_num_rows), axis=-2)
-              # Repacks the diagonal input according to the new solution shape.
-              vecs = repack_diagonals(
-                  vecs, diags, new_num_rows, solution.shape[-1], align=align)
-              v_diags = array_ops.matrix_diag(
-                  vecs,
-                  k=diags,
-                  num_cols=solution.shape[-1],
-                  padding_value=padding_value,
-                  align=align)
-              mask = solution == 0
-              solution = solution + padding_value * mask
-              self.assertEqual(v_diags.get_shape(), solution.shape)
-              self.assertAllEqual(v_diags.eval(), solution)
+        # Giving just num_cols.
+        for expected, (_, tests) in test_list:
+          for diags, (new_num_rows, _) in expected.items():
+            vecs, solution = tests[diags]
+            solution = solution.take(indices=range(new_num_rows), axis=-2)
+            # Repacks the diagonal input according to the new solution shape.
+            vecs = repack_diagonals(
+                vecs, diags, new_num_rows, solution.shape[-1], align=align)
+            v_diags = array_ops.matrix_diag(
+                vecs,
+                k=diags,
+                num_cols=solution.shape[-1],
+                padding_value=padding_value,
+                align=align)
+            mask = solution == 0
+            solution = solution + padding_value * mask
+            self.assertEqual(v_diags.get_shape(), solution.shape)
+            self.assertAllEqual(v_diags.eval(), solution)
 
   @test_util.run_deprecated_v1
   def testInvalidShape(self):
@@ -574,21 +561,20 @@ class MatrixDiagTest(test.TestCase):
                                                         y.get_shape().as_list())
         self.assertLess(error, 1e-4)
 
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      # {Sub,super}diagonals/band.
-      tests = dict()  # tests[shape] = (d_lower, d_upper)
-      tests[(3,)] = (-1, -1)
-      tests[(7, 3, 4)] = (-1, 1)
-      with self.session(use_gpu=True):
-        for shape, diags in tests.items():
-          x = constant_op.constant(np.random.rand(*shape), np.float32)
-          for align in alignment_list:
-            y = array_ops.matrix_diag(x, k=diags, align=align)
-            error = gradient_checker.compute_gradient_error(
-                x,
-                x.get_shape().as_list(), y,
-                y.get_shape().as_list())
-            self.assertLess(error, 1e-4)
+    # {Sub,super}diagonals/band.
+    tests = dict()  # tests[shape] = (d_lower, d_upper)
+    tests[(3,)] = (-1, -1)
+    tests[(7, 3, 4)] = (-1, 1)
+    with self.session(use_gpu=True):
+      for shape, diags in tests.items():
+        x = constant_op.constant(np.random.rand(*shape), np.float32)
+        for align in alignment_list:
+          y = array_ops.matrix_diag(x, k=diags, align=align)
+          error = gradient_checker.compute_gradient_error(
+              x,
+              x.get_shape().as_list(), y,
+              y.get_shape().as_list())
+          self.assertLess(error, 1e-4)
 
 
 class MatrixSetDiagTest(test.TestCase):
@@ -604,18 +590,17 @@ class MatrixSetDiagTest(test.TestCase):
       self.assertEqual((3, 3), output.get_shape())
       self.assertAllEqual(mat_set_diag, self.evaluate(output))
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # Diagonal bands.
-        for align in alignment_list:
-          _, tests = square_cases(align)
-          for diags, (vecs, banded_mat) in tests.items():
-            mask = banded_mat[0] == 0
-            input_mat = np.random.randint(10, size=mask.shape)
-            solution = input_mat * mask + banded_mat[0]
-            output = array_ops.matrix_set_diag(
-                input_mat, vecs[0], k=diags, align=align)
-            self.assertEqual(output.get_shape(), solution.shape)
-            self.assertAllEqual(output.eval(), solution)
+      # Diagonal bands.
+      for align in alignment_list:
+        _, tests = square_cases(align)
+        for diags, (vecs, banded_mat) in tests.items():
+          mask = banded_mat[0] == 0
+          input_mat = np.random.randint(10, size=mask.shape)
+          solution = input_mat * mask + banded_mat[0]
+          output = array_ops.matrix_set_diag(
+              input_mat, vecs[0], k=diags, align=align)
+          self.assertEqual(output.get_shape(), solution.shape)
+          self.assertAllEqual(output.eval(), solution)
 
   @test_util.run_deprecated_v1
   def testRectangular(self):
@@ -634,18 +619,17 @@ class MatrixSetDiagTest(test.TestCase):
       self.assertEqual((3, 2), output.get_shape())
       self.assertAllEqual(expected, self.evaluate(output))
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # Diagonal bands.
-        for align in alignment_list:
-          for _, tests in [tall_cases(align), fat_cases(align)]:
-            for diags, (vecs, banded_mat) in tests.items():
-              mask = banded_mat[0] == 0
-              input_mat = np.random.randint(10, size=mask.shape)
-              solution = input_mat * mask + banded_mat[0]
-              output = array_ops.matrix_set_diag(
-                  input_mat, vecs[0], k=diags, align=align)
-              self.assertEqual(output.get_shape(), solution.shape)
-              self.assertAllEqual(output.eval(), solution)
+      # Diagonal bands.
+      for align in alignment_list:
+        for _, tests in [tall_cases(align), fat_cases(align)]:
+          for diags, (vecs, banded_mat) in tests.items():
+            mask = banded_mat[0] == 0
+            input_mat = np.random.randint(10, size=mask.shape)
+            solution = input_mat * mask + banded_mat[0]
+            output = array_ops.matrix_set_diag(
+                input_mat, vecs[0], k=diags, align=align)
+            self.assertEqual(output.get_shape(), solution.shape)
+            self.assertAllEqual(output.eval(), solution)
 
   def _testSquareBatch(self, dtype):
     with self.cached_session(use_gpu=True):
@@ -663,18 +647,17 @@ class MatrixSetDiagTest(test.TestCase):
       self.assertEqual((2, 3, 3), output.get_shape())
       self.assertAllEqual(mat_set_diag_batch, self.evaluate(output))
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # Diagonal bands.
-        for align in alignment_list:
-          _, tests = square_cases(align)
-          for diags, (vecs, banded_mat) in tests.items():
-            mask = banded_mat == 0
-            input_mat = np.random.randint(10, size=mask.shape).astype(dtype)
-            solution = (input_mat * mask + banded_mat).astype(dtype)
-            output = array_ops.matrix_set_diag(
-                input_mat, vecs.astype(dtype), k=diags, align=align)
-            self.assertEqual(output.get_shape(), solution.shape)
-            self.assertAllEqual(output.eval(), solution)
+      # Diagonal bands.
+      for align in alignment_list:
+        _, tests = square_cases(align)
+        for diags, (vecs, banded_mat) in tests.items():
+          mask = banded_mat == 0
+          input_mat = np.random.randint(10, size=mask.shape).astype(dtype)
+          solution = (input_mat * mask + banded_mat).astype(dtype)
+          output = array_ops.matrix_set_diag(
+              input_mat, vecs.astype(dtype), k=diags, align=align)
+          self.assertEqual(output.get_shape(), solution.shape)
+          self.assertAllEqual(output.eval(), solution)
 
   @test_util.run_deprecated_v1
   def testSquareBatch(self):
@@ -697,19 +680,18 @@ class MatrixSetDiagTest(test.TestCase):
       self.assertEqual((2, 2, 3), output.get_shape())
       self.assertAllEqual(mat_set_diag_batch, self.evaluate(output))
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # Diagonal bands.
-        for align in alignment_list:
-          for _, tests in [tall_cases(align), fat_cases(align)]:
-            for diags, pair in tests.items():
-              vecs, banded_mat = pair
-              mask = banded_mat == 0
-              input_mat = np.random.randint(10, size=mask.shape)
-              solution = input_mat * mask + banded_mat
-              output = array_ops.matrix_set_diag(
-                  input_mat, vecs, k=diags, align=align)
-              self.assertEqual(output.get_shape(), solution.shape)
-              self.assertAllEqual(output.eval(), solution)
+      # Diagonal bands.
+      for align in alignment_list:
+        for _, tests in [tall_cases(align), fat_cases(align)]:
+          for diags, pair in tests.items():
+            vecs, banded_mat = pair
+            mask = banded_mat == 0
+            input_mat = np.random.randint(10, size=mask.shape)
+            solution = input_mat * mask + banded_mat
+            output = array_ops.matrix_set_diag(
+                input_mat, vecs, k=diags, align=align)
+            self.assertEqual(output.get_shape(), solution.shape)
+            self.assertAllEqual(output.eval(), solution)
 
   @test_util.run_deprecated_v1
   def testInvalidShape(self):
@@ -727,14 +709,13 @@ class MatrixSetDiagTest(test.TestCase):
       with self.assertRaisesOpError("diagonal must be at least 1-dim"):
         array_ops.matrix_set_diag([[v]], v).eval(feed_dict={v: 0.0})
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        d = array_ops.placeholder(dtype=dtypes_lib.float32)
-        with self.assertRaisesOpError(
-            "first dimensions of diagonal don't match"):
-          array_ops.matrix_set_diag(v, d).eval(feed_dict={
-              v: np.zeros((2, 3, 3)),
-              d: np.ones((2, 4))
-          })
+      d = array_ops.placeholder(dtype=dtypes_lib.float32)
+      with self.assertRaisesOpError(
+          "first dimensions of diagonal don't match"):
+        array_ops.matrix_set_diag(v, d).eval(feed_dict={
+            v: np.zeros((2, 3, 3)),
+            d: np.ones((2, 4))
+        })
 
   def _testGrad(self, input_shape, diag_shape, diags, align):
     with self.session(use_gpu=True):
@@ -743,10 +724,7 @@ class MatrixSetDiagTest(test.TestCase):
       x_diag = constant_op.constant(
           np.random.rand(*diag_shape), dtype=dtypes_lib.float32)
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        y = array_ops.matrix_set_diag(x, x_diag, k=diags, align=align)
-      else:
-        y = array_ops.matrix_set_diag(x, x_diag)
+      y = array_ops.matrix_set_diag(x, x_diag, k=diags, align=align)
       error_x = gradient_checker.compute_gradient_error(x,
                                                         x.get_shape().as_list(),
                                                         y,
@@ -763,8 +741,7 @@ class MatrixSetDiagTest(test.TestCase):
     input_shapes = [(3, 4, 4), (3, 3, 4), (3, 4, 3), (7, 4, 8, 8)]
     diag_bands = [(0, 0)]
 
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      diag_bands.append((-1, 1))
+    diag_bands.append((-1, 1))
     for input_shape, diags, align in itertools.product(input_shapes, diag_bands,
                                                        alignment_list):
       lower_diag_index, upper_diag_index = diags
@@ -805,21 +782,20 @@ class MatrixDiagPartTest(test.TestCase):
       self.assertEqual((3,), mat_diag.get_shape())
       self.assertAllEqual(mat_diag.eval(), v)
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        for offset in [-2, 3]:
-          mat = np.diag(v, offset)
-          mat_diag = array_ops.matrix_diag_part(mat, k=offset)
-          self.assertEqual((3,), mat_diag.get_shape())
-          self.assertAllEqual(mat_diag.eval(), v)
+      for offset in [-2, 3]:
+        mat = np.diag(v, offset)
+        mat_diag = array_ops.matrix_diag_part(mat, k=offset)
+        self.assertEqual((3,), mat_diag.get_shape())
+        self.assertAllEqual(mat_diag.eval(), v)
 
-        # Diagonal bands.
-        for align in alignment_list:
-          mat, tests = square_cases(align)
-          for diags, pair in tests.items():
-            solution, _ = pair
-            mat_diag = array_ops.matrix_diag_part(mat[0], k=diags, align=align)
-            self.assertEqual(mat_diag.get_shape(), solution[0].shape)
-            self.assertAllEqual(mat_diag.eval(), solution[0])
+      # Diagonal bands.
+      for align in alignment_list:
+        mat, tests = square_cases(align)
+        for diags, pair in tests.items():
+          solution, _ = pair
+          mat_diag = array_ops.matrix_diag_part(mat[0], k=diags, align=align)
+          self.assertEqual(mat_diag.get_shape(), solution[0].shape)
+          self.assertAllEqual(mat_diag.eval(), solution[0])
 
   @test_util.run_deprecated_v1
   def testRectangular(self):
@@ -831,16 +807,15 @@ class MatrixDiagPartTest(test.TestCase):
       mat_diag = array_ops.matrix_diag_part(mat)
       self.assertAllEqual(mat_diag.eval(), np.array([1.0, 4.0]))
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # Diagonal bands.
-        for align in alignment_list:
-          for mat, tests in [tall_cases(align), fat_cases(align)]:
-            for diags, pair in tests.items():
-              solution, _ = pair
-              mat_diag = array_ops.matrix_diag_part(
-                  mat[0], k=diags, align=align)
-              self.assertEqual(mat_diag.get_shape(), solution[0].shape)
-              self.assertAllEqual(mat_diag.eval(), solution[0])
+      # Diagonal bands.
+      for align in alignment_list:
+        for mat, tests in [tall_cases(align), fat_cases(align)]:
+          for diags, pair in tests.items():
+            solution, _ = pair
+            mat_diag = array_ops.matrix_diag_part(
+                mat[0], k=diags, align=align)
+            self.assertEqual(mat_diag.get_shape(), solution[0].shape)
+            self.assertAllEqual(mat_diag.eval(), solution[0])
 
   def _testSquareBatch(self, dtype):
     with self.cached_session(use_gpu=True):
@@ -853,22 +828,21 @@ class MatrixDiagPartTest(test.TestCase):
       self.assertEqual((2, 3), mat_batch_diag.get_shape())
       self.assertAllEqual(mat_batch_diag.eval(), v_batch)
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # Diagonal bands with padding_value.
-        for padding_value, align in zip_to_first_list_length([0, 555, -11],
-                                                             alignment_list):
-          mat, tests = square_cases(align)
-          for diags, pair in tests.items():
-            solution, _ = pair
-            mat_batch_diag = array_ops.matrix_diag_part(
-                mat.astype(dtype),
-                k=diags,
-                padding_value=padding_value,
-                align=align)
-            mask = solution == 0
-            solution = (solution + padding_value * mask).astype(dtype)
-            self.assertEqual(mat_batch_diag.get_shape(), solution.shape)
-            self.assertAllEqual(mat_batch_diag.eval(), solution)
+      # Diagonal bands with padding_value.
+      for padding_value, align in zip_to_first_list_length([0, 555, -11],
+                                                           alignment_list):
+        mat, tests = square_cases(align)
+        for diags, pair in tests.items():
+          solution, _ = pair
+          mat_batch_diag = array_ops.matrix_diag_part(
+              mat.astype(dtype),
+              k=diags,
+              padding_value=padding_value,
+              align=align)
+          mask = solution == 0
+          solution = (solution + padding_value * mask).astype(dtype)
+          self.assertEqual(mat_batch_diag.get_shape(), solution.shape)
+          self.assertAllEqual(mat_batch_diag.eval(), solution)
 
   @test_util.run_deprecated_v1
   def testSquareBatch(self):
@@ -889,29 +863,27 @@ class MatrixDiagPartTest(test.TestCase):
       self.assertEqual((2, 2), mat_batch_diag.get_shape())
       self.assertAllEqual(mat_batch_diag.eval(), v_batch)
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        # Diagonal bands with padding_value and align.
-        for padding_value, align in zip_to_first_list_length([0, 555, -11],
-                                                             alignment_list):
-          for mat, tests in [tall_cases(align), fat_cases(align)]:
-            for diags, pair in tests.items():
-              solution, _ = pair
-              mat_batch_diag = array_ops.matrix_diag_part(
-                  mat, k=diags, padding_value=padding_value, align=align)
-              mask = solution == 0
-              solution = solution + padding_value * mask
-              self.assertEqual(mat_batch_diag.get_shape(), solution.shape)
-              self.assertAllEqual(mat_batch_diag.eval(), solution)
+      # Diagonal bands with padding_value and align.
+      for padding_value, align in zip_to_first_list_length([0, 555, -11],
+                                                           alignment_list):
+        for mat, tests in [tall_cases(align), fat_cases(align)]:
+          for diags, pair in tests.items():
+            solution, _ = pair
+            mat_batch_diag = array_ops.matrix_diag_part(
+                mat, k=diags, padding_value=padding_value, align=align)
+            mask = solution == 0
+            solution = solution + padding_value * mask
+            self.assertEqual(mat_batch_diag.get_shape(), solution.shape)
+            self.assertAllEqual(mat_batch_diag.eval(), solution)
 
   @test_util.run_deprecated_v1
   def testUnknownShape(self):
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      matrix = array_ops.placeholder(dtypes_lib.int32, shape=[None, None])
-      result = array_ops.matrix_diag_part(matrix, k=-1)
-      input_matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-      with self.session(use_gpu=True):
-        result_eval = result.eval(feed_dict={matrix: input_matrix})
-      self.assertAllEqual([4, 8], result_eval)
+    matrix = array_ops.placeholder(dtypes_lib.int32, shape=[None, None])
+    result = array_ops.matrix_diag_part(matrix, k=-1)
+    input_matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    with self.session(use_gpu=True):
+      result_eval = result.eval(feed_dict={matrix: input_matrix})
+    self.assertAllEqual([4, 8], result_eval)
 
   @test_util.run_deprecated_v1
   def testInvalidShape(self):
@@ -939,21 +911,20 @@ class MatrixDiagPartTest(test.TestCase):
                                                         y.get_shape().as_list())
         self.assertLess(error, 1e-4)
 
-    if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-      # {Sub,super}diagonals/band.
-      tests = dict()  # tests[shape] = (d_lower, d_upper)
-      tests[(3, 3)] = (-1, -1)
-      tests[(7, 3, 4)] = (-1, 1)
-      with self.session(use_gpu=True):
-        for align in alignment_list:
-          for shape, diags in tests.items():
-            x = constant_op.constant(np.random.rand(*shape), np.float32)
-            y = array_ops.matrix_diag_part(input=x, k=diags, align=align)
-            error = gradient_checker.compute_gradient_error(
-                x,
-                x.get_shape().as_list(), y,
-                y.get_shape().as_list())
-            self.assertLess(error, 1e-4)
+    # {Sub,super}diagonals/band.
+    tests = dict()  # tests[shape] = (d_lower, d_upper)
+    tests[(3, 3)] = (-1, -1)
+    tests[(7, 3, 4)] = (-1, 1)
+    with self.session(use_gpu=True):
+      for align in alignment_list:
+        for shape, diags in tests.items():
+          x = constant_op.constant(np.random.rand(*shape), np.float32)
+          y = array_ops.matrix_diag_part(input=x, k=diags, align=align)
+          error = gradient_checker.compute_gradient_error(
+              x,
+              x.get_shape().as_list(), y,
+              y.get_shape().as_list())
+          self.assertLess(error, 1e-4)
 
 
 class DiagTest(test.TestCase):
diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD
index 7294462ff43..eb732fb5104 100644
--- a/tensorflow/python/kernel_tests/linalg/BUILD
+++ b/tensorflow/python/kernel_tests/linalg/BUILD
@@ -382,6 +382,29 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "linear_operator_tridiag_test",
+    size = "medium",
+    srcs = ["linear_operator_tridiag_test.py"],
+    additional_deps = [
+        "//tensorflow/python/ops/linalg",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:linalg_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:random_ops",
+    ],
+    shard_count = 5,
+    tags = [
+        "noasan",
+        "optonly",
+    ],
+    xla_enable_strict_auto_jit = True,
+)
+
 cuda_py_test(
     name = "linear_operator_zeros_test",
     size = "medium",
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
index 6a7c4362f5c..dc501b17bff 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
@@ -70,9 +70,9 @@ class SquareLinearOperatorBlockDiagTest(
     self._rtol[dtypes.float32] = 1e-4
     self._rtol[dtypes.complex64] = 1e-4
 
-  @property
-  def operator_shape_infos(self):
-    shape_info = linear_operator_test_util.OperatorShapeInfo
+  @staticmethod
+  def operator_shapes_infos():
+    shape_info = linear_operator_test_util.OperatorShapesInfo
     return [
         shape_info((0, 0)),
         shape_info((1, 1)),
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_tridiag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_tridiag_test.py
new file mode 100644
index 00000000000..d69f872f703
--- /dev/null
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_tridiag_test.py
@@ -0,0 +1,184 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import manip_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables as variables_module
+from tensorflow.python.ops.linalg import linalg as linalg_lib
+from tensorflow.python.ops.linalg import linear_operator_test_util
+from tensorflow.python.platform import test
+
+
+class _LinearOperatorTriDiagBase(object):
+
+  def build_operator_and_matrix(
+      self, build_info, dtype, use_placeholder,
+      ensure_self_adjoint_and_pd=False,
+      diagonals_format='sequence'):
+    shape = list(build_info.shape)
+
+    # Ensure that diagonal has large enough values. If we generate a
+    # self adjoint PD matrix, then the diagonal will be dominant guaranteeing
+    # positive definitess.
+    diag = linear_operator_test_util.random_sign_uniform(
+        shape[:-1], minval=4., maxval=6., dtype=dtype)
+    # We'll truncate these depending on the format
+    subdiag = linear_operator_test_util.random_sign_uniform(
+        shape[:-1], minval=1., maxval=2., dtype=dtype)
+    if ensure_self_adjoint_and_pd:
+      # Abs on complex64 will result in a float32, so we cast back up.
+      diag = math_ops.cast(math_ops.abs(diag), dtype=dtype)
+      # The first element of subdiag is ignored. We'll add a dummy element
+      # to superdiag to pad it.
+      superdiag = math_ops.conj(subdiag)
+      superdiag = manip_ops.roll(superdiag, shift=-1, axis=-1)
+    else:
+      superdiag = linear_operator_test_util.random_sign_uniform(
+          shape[:-1], minval=1., maxval=2., dtype=dtype)
+
+    matrix_diagonals = array_ops.stack(
+        [superdiag, diag, subdiag], axis=-2)
+    matrix = gen_array_ops.matrix_diag_v3(
+        matrix_diagonals,
+        k=(-1, 1),
+        num_rows=-1,
+        num_cols=-1,
+        align='LEFT_RIGHT',
+        padding_value=0.)
+
+    if diagonals_format == 'sequence':
+      diagonals = [superdiag, diag, subdiag]
+    elif diagonals_format == 'compact':
+      diagonals = array_ops.stack([superdiag, diag, subdiag], axis=-2)
+    elif diagonals_format == 'matrix':
+      diagonals = matrix
+
+    lin_op_diagonals = diagonals
+
+    if use_placeholder:
+      if diagonals_format == 'sequence':
+        lin_op_diagonals = [array_ops.placeholder_with_default(
+            d, shape=None) for d in lin_op_diagonals]
+      else:
+        lin_op_diagonals = array_ops.placeholder_with_default(
+            lin_op_diagonals, shape=None)
+
+    operator = linalg_lib.LinearOperatorTridiag(
+        diagonals=lin_op_diagonals,
+        diagonals_format=diagonals_format,
+        is_self_adjoint=True if ensure_self_adjoint_and_pd else None,
+        is_positive_definite=True if ensure_self_adjoint_and_pd else None)
+    return operator, matrix
+
+  @staticmethod
+  def operator_shapes_infos():
+    shape_info = linear_operator_test_util.OperatorShapesInfo
+    # non-batch operators (n, n) and batch operators.
+    return [
+        shape_info((3, 3)),
+        shape_info((1, 6, 6)),
+        shape_info((3, 4, 4)),
+        shape_info((2, 1, 3, 3))
+    ]
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class LinearOperatorTriDiagCompactTest(
+    _LinearOperatorTriDiagBase,
+    linear_operator_test_util.SquareLinearOperatorDerivedClassTest):
+  """Most tests done in the base class LinearOperatorDerivedClassTest."""
+
+  def operator_and_matrix(
+      self, build_info, dtype, use_placeholder,
+      ensure_self_adjoint_and_pd=False):
+    return self.build_operator_and_matrix(
+        build_info, dtype, use_placeholder,
+        ensure_self_adjoint_and_pd=ensure_self_adjoint_and_pd,
+        diagonals_format='compact')
+
+  def test_tape_safe(self):
+    diag = variables_module.Variable([[3., 6., 2.], [2., 4., 2.], [5., 1., 2.]])
+    operator = linalg_lib.LinearOperatorTridiag(
+        diag, diagonals_format='compact')
+    self.check_tape_safe(operator)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class LinearOperatorTriDiagSequenceTest(
+    _LinearOperatorTriDiagBase,
+    linear_operator_test_util.SquareLinearOperatorDerivedClassTest):
+  """Most tests done in the base class LinearOperatorDerivedClassTest."""
+
+  def operator_and_matrix(
+      self, build_info, dtype, use_placeholder,
+      ensure_self_adjoint_and_pd=False):
+    return self.build_operator_and_matrix(
+        build_info, dtype, use_placeholder,
+        ensure_self_adjoint_and_pd=ensure_self_adjoint_and_pd,
+        diagonals_format='sequence')
+
+  def test_tape_safe(self):
+    diagonals = [
+        variables_module.Variable([3., 6., 2.]),
+        variables_module.Variable([2., 4., 2.]),
+        variables_module.Variable([5., 1., 2.])]
+    operator = linalg_lib.LinearOperatorTridiag(
+        diagonals, diagonals_format='sequence')
+    # Skip the diagonal part and trace since this only dependent on the
+    # middle variable. We test this below.
+    self.check_tape_safe(operator, skip_options=['diag_part', 'trace'])
+
+    diagonals = [
+        [3., 6., 2.],
+        variables_module.Variable([2., 4., 2.]),
+        [5., 1., 2.]
+    ]
+    operator = linalg_lib.LinearOperatorTridiag(
+        diagonals, diagonals_format='sequence')
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class LinearOperatorTriDiagMatrixTest(
+    _LinearOperatorTriDiagBase,
+    linear_operator_test_util.SquareLinearOperatorDerivedClassTest):
+  """Most tests done in the base class LinearOperatorDerivedClassTest."""
+
+  def operator_and_matrix(
+      self, build_info, dtype, use_placeholder,
+      ensure_self_adjoint_and_pd=False):
+    return self.build_operator_and_matrix(
+        build_info, dtype, use_placeholder,
+        ensure_self_adjoint_and_pd=ensure_self_adjoint_and_pd,
+        diagonals_format='matrix')
+
+  def test_tape_safe(self):
+    matrix = variables_module.Variable([[3., 2., 0.], [1., 6., 4.], [0., 2, 2]])
+    operator = linalg_lib.LinearOperatorTridiag(
+        matrix, diagonals_format='matrix')
+    self.check_tape_safe(operator)
+
+
+if __name__ == '__main__':
+  linear_operator_test_util.add_tests(LinearOperatorTriDiagCompactTest)
+  linear_operator_test_util.add_tests(LinearOperatorTriDiagSequenceTest)
+  linear_operator_test_util.add_tests(LinearOperatorTriDiagMatrixTest)
+  test.main()
diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index 2be675ab899..b9e4323a2b3 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import os
 import tempfile
+
 import numpy as np
 import six
 
diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
index f123492ff15..9f84946397e 100644
--- a/tensorflow/python/kernel_tests/matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import operator
+
 import numpy as np
 
 from tensorflow.python import tf2
diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
index ee6e3bb464f..0f5da8b27a4 100644
--- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 import numpy as np
 
 from tensorflow.python.client import session
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
index 843b6fa6430..0aaead2fa2b 100644
--- a/tensorflow/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -114,7 +114,6 @@ class ParseExampleTest(test.TestCase):
           self.assertEqual(out[k].values.shape.as_list(), [None])
           self.assertEqual(out[k].dense_shape.shape.as_list(), [2])
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testEmptySerializedWithAllDefaults(self):
     sparse_name = "st_a"
     a_name = "a"
@@ -136,32 +135,33 @@ class ParseExampleTest(test.TestCase):
         c_name: np.array(2 * [c_default]),
     }
 
-    self._test({
-        "example_names": np.empty((0,), dtype=bytes),
-        "serialized": ops.convert_to_tensor(["", ""]),
-        "features": {
-            sparse_name:
-                parsing_ops.VarLenFeature(dtypes.int64),
-            a_name:
-                parsing_ops.FixedLenFeature(
-                    (1, 3), dtypes.int64, default_value=a_default),
-            b_name:
-                parsing_ops.FixedLenFeature(
-                    (3, 3), dtypes.string, default_value=b_default),
-            c_name:
-                parsing_ops.FixedLenFeature(
-                    (2,), dtypes.float32, default_value=c_default),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": np.empty((0,), dtype=bytes),
+            "serialized": ops.convert_to_tensor(["", ""]),
+            "features": {
+                sparse_name:
+                    parsing_ops.VarLenFeature(dtypes.int64),
+                a_name:
+                    parsing_ops.FixedLenFeature(
+                        (1, 3), dtypes.int64, default_value=a_default),
+                b_name:
+                    parsing_ops.FixedLenFeature(
+                        (3, 3), dtypes.string, default_value=b_default),
+                c_name:
+                    parsing_ops.FixedLenFeature(
+                        (2,), dtypes.float32, default_value=c_default),
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testEmptySerializedWithoutDefaultsShouldFail(self):
     input_features = {
         "st_a":
             parsing_ops.VarLenFeature(dtypes.int64),
         "a":
-            parsing_ops.FixedLenFeature(
-                (1, 3), dtypes.int64, default_value=[0, 42, 0]),
+            parsing_ops.FixedLenFeature((1, 3),
+                                        dtypes.int64,
+                                        default_value=[0, 42, 0]),
         "b":
             parsing_ops.FixedLenFeature(
                 (3, 3),
@@ -195,7 +195,6 @@ class ParseExampleTest(test.TestCase):
             errors_impl.OpError,
             "Name: in1, Feature: c \\(data type: float\\) is required"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testDenseNotMatchingShapeShouldFail(self):
     original = [
         example(features=features({
@@ -220,7 +219,6 @@ class ParseExampleTest(test.TestCase):
         expected_err=(errors_impl.OpError,
                       "Name: failing, Key: a, Index: 1.  Number of float val"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testDenseDefaultNoShapeShouldFail(self):
     original = [
         example(features=features({
@@ -240,12 +238,9 @@ class ParseExampleTest(test.TestCase):
         },
         expected_err=(ValueError, "Missing shape for feature a"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingSparse(self):
     original = [
-        example(features=features({
-            "st_c": float_feature([3, 4])
-        })),
+        example(features=features({"st_c": float_feature([3, 4])})),
         example(
             features=features({
                 "st_c": float_feature([]),  # empty float list
@@ -277,15 +272,15 @@ class ParseExampleTest(test.TestCase):
         "st_d": expected_st_d,
     }
 
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            "st_c": parsing_ops.VarLenFeature(dtypes.float32),
-            "st_d": parsing_ops.VarLenFeature(dtypes.string)
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "st_c": parsing_ops.VarLenFeature(dtypes.float32),
+                "st_d": parsing_ops.VarLenFeature(dtypes.string)
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingSparseFeature(self):
     original = [
         example(
@@ -322,15 +317,16 @@ class ParseExampleTest(test.TestCase):
         "sp": expected_sp,
     }
 
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            "sp":
-                parsing_ops.SparseFeature(["idx"], "val", dtypes.float32, [13])
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "sp":
+                    parsing_ops.SparseFeature(["idx"], "val", dtypes.float32,
+                                              [13])
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingSparseFeatureReuse(self):
     original = [
         example(
@@ -349,32 +345,37 @@ class ParseExampleTest(test.TestCase):
     serialized = [m.SerializeToString() for m in original]
 
     expected_sp1 = (  # indices, values, shape
-        np.array([[0, 5], [0, 10]], dtype=np.int64),
-        np.array([3.0, 4.0], dtype=np.float32), np.array(
-            [2, 13], dtype=np.int64))  # batch == 2, max_elems = 13
+        np.array([[0, 5], [0, 10]],
+                 dtype=np.int64), np.array([3.0, 4.0], dtype=np.float32),
+        np.array([2, 13], dtype=np.int64))  # batch == 2, max_elems = 13
 
     expected_sp2 = (  # indices, values, shape
-        np.array([[0, 5], [0, 10]], dtype=np.int64),
-        np.array([5.0, 6.0], dtype=np.float32), np.array(
-            [2, 7], dtype=np.int64))  # batch == 2, max_elems = 13
+        np.array([[0, 5], [0, 10]],
+                 dtype=np.int64), np.array([5.0, 6.0], dtype=np.float32),
+        np.array([2, 7], dtype=np.int64))  # batch == 2, max_elems = 13
 
     expected_output = {
         "sp1": expected_sp1,
         "sp2": expected_sp2,
     }
 
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            "sp1":
-                parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13),
-            "sp2":
-                parsing_ops.SparseFeature(
-                    "idx", "val2", dtypes.float32, size=7, already_sorted=True)
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "sp1":
+                    parsing_ops.SparseFeature("idx", "val1", dtypes.float32,
+                                              13),
+                "sp2":
+                    parsing_ops.SparseFeature(
+                        "idx",
+                        "val2",
+                        dtypes.float32,
+                        size=7,
+                        already_sorted=True)
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContaining3DSparseFeature(self):
     original = [
         example(
@@ -406,9 +407,8 @@ class ParseExampleTest(test.TestCase):
 
     expected_sp = (
         # indices
-        np.array(
-            [[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]],
-            dtype=np.int64),
+        np.array([[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]],
+                 dtype=np.int64),
         # values
         np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
         # shape batch == 4, max_elems = 13
@@ -418,16 +418,16 @@ class ParseExampleTest(test.TestCase):
         "sp": expected_sp,
     }
 
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            "sp":
-                parsing_ops.SparseFeature(["idx0", "idx1"], "val",
-                                          dtypes.float32, [13, 3])
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "sp":
+                    parsing_ops.SparseFeature(["idx0", "idx1"], "val",
+                                              dtypes.float32, [13, 3])
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingDense(self):
     aname = "a"
     bname = "b*has+a:tricky_name"
@@ -454,19 +454,21 @@ class ParseExampleTest(test.TestCase):
     }
 
     # No defaults, values required
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            aname:
-                parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
-            bname:
-                parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenFeature(
+                        (1, 2, 1), dtype=dtypes.float32),
+                bname:
+                    parsing_ops.FixedLenFeature(
+                        (1, 1, 1, 1), dtype=dtypes.string),
+            }
+        }, expected_output)
 
   # This test is identical as the previous one except
   # for the creation of 'serialized'.
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingDenseWithConcat(self):
     aname = "a"
     bname = "b*has+a:tricky_name"
@@ -504,17 +506,19 @@ class ParseExampleTest(test.TestCase):
     }
 
     # No defaults, values required
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            aname:
-                parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
-            bname:
-                parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenFeature(
+                        (1, 2, 1), dtype=dtypes.float32),
+                bname:
+                    parsing_ops.FixedLenFeature(
+                        (1, 1, 1, 1), dtype=dtypes.string),
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingDenseScalar(self):
     original = [
         example(features=features({
@@ -530,16 +534,16 @@ class ParseExampleTest(test.TestCase):
             np.array([[1], [-1]], dtype=np.float32)  # 2x1 (column vector)
     }
 
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            "a":
-                parsing_ops.FixedLenFeature(
-                    (1,), dtype=dtypes.float32, default_value=-1),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "a":
+                    parsing_ops.FixedLenFeature(
+                        (1,), dtype=dtypes.float32, default_value=-1),
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingDenseWithDefaults(self):
     original = [
         example(features=features({
@@ -548,44 +552,44 @@ class ParseExampleTest(test.TestCase):
         example(features=features({
             "b": bytes_feature([b"b1"]),
         })),
-        example(features=features({
-            "b": feature()
-        })),
+        example(features=features({"b": feature()})),
     ]
 
     serialized = [m.SerializeToString() for m in original]
 
     expected_output = {
         "a":
-            np.array([[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(
-                3, 1, 2, 1),
+            np.array([[1, 1], [3, -3], [3, -3]],
+                     dtype=np.float32).reshape(3, 1, 2, 1),
         "b":
-            np.array(["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(
-                3, 1, 1, 1, 1),
+            np.array(["tmp_str", "b1", "tmp_str"],
+                     dtype=bytes).reshape(3, 1, 1, 1, 1),
     }
 
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            "a":
-                parsing_ops.FixedLenFeature(
-                    (1, 2, 1), dtype=dtypes.float32, default_value=[3.0, -3.0]),
-            "b":
-                parsing_ops.FixedLenFeature(
-                    (1, 1, 1, 1), dtype=dtypes.string, default_value="tmp_str"),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "a":
+                    parsing_ops.FixedLenFeature((1, 2, 1),
+                                                dtype=dtypes.float32,
+                                                default_value=[3.0, -3.0]),
+                "b":
+                    parsing_ops.FixedLenFeature((1, 1, 1, 1),
+                                                dtype=dtypes.string,
+                                                default_value="tmp_str"),
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
     expected_st_a = (  # indices, values, shape
         np.empty((0, 2), dtype=np.int64),  # indices
         np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
         np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
     expected_sp = (  # indices, values, shape
-        np.array([[0, 0], [0, 3], [1, 7]], dtype=np.int64),
-        np.array(["a", "b", "c"], dtype="|S"), np.array(
-            [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13
+        np.array([[0, 0], [0, 3], [1, 7]],
+                 dtype=np.int64), np.array(["a", "b", "c"], dtype="|S"),
+        np.array([2, 13], dtype=np.int64))  # batch == 4, max_elems = 13
 
     original = [
         example(
@@ -637,16 +641,15 @@ class ParseExampleTest(test.TestCase):
         },
         expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
     expected_idx = (  # indices, values, shape
-        np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
-        np.array([0, 3, 7, 1]),
+        np.array([[0, 0], [0, 1], [1, 0], [1, 1]],
+                 dtype=np.int64), np.array([0, 3, 7, 1]),
         np.array([2, 2], dtype=np.int64))  # batch == 4, max_elems = 2
 
     expected_sp = (  # indices, values, shape
-        np.array([[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64),
-        np.array(["a", "b", "d", "c"], dtype="|S"),
+        np.array([[0, 0], [0, 3], [1, 1], [1, 7]],
+                 dtype=np.int64), np.array(["a", "b", "d", "c"], dtype="|S"),
         np.array([2, 13], dtype=np.int64))  # batch == 4, max_elems = 13
 
     original = [
@@ -670,16 +673,18 @@ class ParseExampleTest(test.TestCase):
         "sp": expected_sp,
     }
 
-    self._test({
-        "example_names": names,
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            "idx":
-                parsing_ops.VarLenFeature(dtypes.int64),
-            "sp":
-                parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": names,
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "idx":
+                    parsing_ops.VarLenFeature(dtypes.int64),
+                "sp":
+                    parsing_ops.SparseFeature(["idx"], "val", dtypes.string,
+                                              [13]),
+            }
+        }, expected_output)
 
   def _testSerializedContainingVarLenDenseLargerBatch(self, batch_size):
     # During parsing, data read from the serialized proto is stored in buffers.
@@ -725,31 +730,31 @@ class ParseExampleTest(test.TestCase):
 
     serialized = [m.SerializeToString() for m in original]
 
-    self._test({
-        "serialized": ops.convert_to_tensor(serialized, dtype=dtypes.string),
-        "features": {
-            "a":
-                parsing_ops.FixedLenSequenceFeature(
-                    shape=(),
-                    dtype=dtypes.int64,
-                    allow_missing=True,
-                    default_value=-1),
-            "b":
-                parsing_ops.FixedLenSequenceFeature(
-                    shape=[],
-                    dtype=dtypes.string,
-                    allow_missing=True,
-                    default_value="default"),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized":
+                ops.convert_to_tensor(serialized, dtype=dtypes.string),
+            "features": {
+                "a":
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=(),
+                        dtype=dtypes.int64,
+                        allow_missing=True,
+                        default_value=-1),
+                "b":
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=[],
+                        dtype=dtypes.string,
+                        allow_missing=True,
+                        default_value="default"),
+            }
+        }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingVarLenDenseLargerBatch(self):
     np.random.seed(3456)
     for batch_size in (1, 10, 20, 100, 256):
       self._testSerializedContainingVarLenDenseLargerBatch(batch_size)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingVarLenDense(self):
     aname = "a"
     bname = "b"
@@ -781,75 +786,74 @@ class ParseExampleTest(test.TestCase):
 
     expected_output = {
         aname:
-            np.array(
-                [
-                    [0, 0, 0, 0],
-                    [1, 1, 0, 0],
-                    [-1, -1, 2, 2],
-                    [0, 0, 0, 0],
-                ],
-                dtype=np.float32).reshape(4, 2, 2, 1),
+            np.array([
+                [0, 0, 0, 0],
+                [1, 1, 0, 0],
+                [-1, -1, 2, 2],
+                [0, 0, 0, 0],
+            ],
+                     dtype=np.float32).reshape(4, 2, 2, 1),
         bname:
-            np.array(
-                [["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]],
-                dtype=bytes).reshape(4, 2, 1, 1, 1),
+            np.array([["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]],
+                     dtype=bytes).reshape(4, 2, 1, 1, 1),
         cname:
             np.array([2, 0, 0, 3], dtype=np.int64).reshape(4, 1),
         dname:
             np.empty(shape=(4, 0), dtype=bytes),
     }
 
-    self._test({
-        "example_names": example_names,
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            aname:
-                parsing_ops.FixedLenSequenceFeature(
-                    (2, 1), dtype=dtypes.float32, allow_missing=True),
-            bname:
-                parsing_ops.FixedLenSequenceFeature(
-                    (1, 1, 1), dtype=dtypes.string, allow_missing=True),
-            cname:
-                parsing_ops.FixedLenSequenceFeature(
-                    shape=[], dtype=dtypes.int64, allow_missing=True),
-            dname:
-                parsing_ops.FixedLenSequenceFeature(
-                    shape=[], dtype=dtypes.string, allow_missing=True),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": example_names,
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (2, 1), dtype=dtypes.float32, allow_missing=True),
+                bname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (1, 1, 1), dtype=dtypes.string, allow_missing=True),
+                cname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=[], dtype=dtypes.int64, allow_missing=True),
+                dname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=[], dtype=dtypes.string, allow_missing=True),
+            }
+        }, expected_output)
 
     # Test with padding values.
     expected_output_custom_padding = dict(expected_output)
-    expected_output_custom_padding[aname] = np.array(
-        [
-            [-2, -2, -2, -2],
-            [1, 1, -2, -2],
-            [-1, -1, 2, 2],
-            [-2, -2, -2, -2],
-        ],
-        dtype=np.float32).reshape(4, 2, 2, 1)
+    expected_output_custom_padding[aname] = np.array([
+        [-2, -2, -2, -2],
+        [1, 1, -2, -2],
+        [-1, -1, 2, 2],
+        [-2, -2, -2, -2],
+    ],
+                                                     dtype=np.float32).reshape(
+                                                         4, 2, 2, 1)
 
-    self._test({
-        "example_names": example_names,
-        "serialized": ops.convert_to_tensor(serialized),
-        "features": {
-            aname:
-                parsing_ops.FixedLenSequenceFeature(
-                    (2, 1),
-                    dtype=dtypes.float32,
-                    allow_missing=True,
-                    default_value=-2.0),
-            bname:
-                parsing_ops.FixedLenSequenceFeature(
-                    (1, 1, 1), dtype=dtypes.string, allow_missing=True),
-            cname:
-                parsing_ops.FixedLenSequenceFeature(
-                    shape=[], dtype=dtypes.int64, allow_missing=True),
-            dname:
-                parsing_ops.FixedLenSequenceFeature(
-                    shape=[], dtype=dtypes.string, allow_missing=True),
-        }
-    }, expected_output_custom_padding)
+    self._test(
+        {
+            "example_names": example_names,
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenSequenceFeature((2, 1),
+                                                        dtype=dtypes.float32,
+                                                        allow_missing=True,
+                                                        default_value=-2.0),
+                bname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (1, 1, 1), dtype=dtypes.string, allow_missing=True),
+                cname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=[], dtype=dtypes.int64, allow_missing=True),
+                dname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=[], dtype=dtypes.string, allow_missing=True),
+            }
+        }, expected_output_custom_padding)
 
     # Change number of required values so the inputs are not a
     # multiple of this size.
@@ -876,11 +880,10 @@ class ParseExampleTest(test.TestCase):
             "serialized": ops.convert_to_tensor(serialized),
             "features": {
                 aname:
-                    parsing_ops.FixedLenSequenceFeature(
-                        (2, 1),
-                        dtype=dtypes.float32,
-                        allow_missing=True,
-                        default_value=[]),
+                    parsing_ops.FixedLenSequenceFeature((2, 1),
+                                                        dtype=dtypes.float32,
+                                                        allow_missing=True,
+                                                        default_value=[]),
                 bname:
                     parsing_ops.FixedLenSequenceFeature(
                         (2, 1, 1), dtype=dtypes.string, allow_missing=True),
@@ -943,7 +946,6 @@ class ParseExampleTest(test.TestCase):
                       "Unsupported: FixedLenSequenceFeature requires "
                       "allow_missing to be True."))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingRaggedFeatureWithNoPartitions(self):
     original = [
         example(features=features({"rt_c": float_feature([3, 4])})),
@@ -1002,7 +1004,6 @@ class ParseExampleTest(test.TestCase):
             "features": test_features
         }, batch_expected_out)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingRaggedFeature(self):
     original = [
         example(
@@ -1121,7 +1122,6 @@ class ParseExampleTest(test.TestCase):
         "features": test_features
     }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingNestedRaggedFeature(self):
     """Test RaggedFeature with 3 partitions."""
     original = [
@@ -1203,7 +1203,6 @@ class ParseSingleExampleTest(test.TestCase):
           self.assertEqual(tuple(out[k].values.shape.as_list()), (None,))
           self.assertEqual(tuple(out[k].dense_shape.shape.as_list()), (1,))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
     original = example(
         features=features({
@@ -1272,7 +1271,6 @@ class ParseSingleExampleTest(test.TestCase):
             "features": test_features,
         }, expected_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSingleExampleWithAllFeatureTypes(self):
     original = example(
         features=features({
@@ -1532,8 +1530,8 @@ class ParseSequenceExampleTest(test.TestCase):
           new_values[k] = np.expand_dims(v, axis=0)
         else:
           # Sparse tensor.
-          new_values[k] = (np.insert(v[0], 0, 0, axis=1), v[1],
-                           np.insert(v[2], 0, 1))
+          new_values[k] = (np.insert(v[0], 0, 0,
+                                     axis=1), v[1], np.insert(v[2], 0, 1))
       expected_context_values = new_values
 
     expected_length_values = {}
@@ -1550,8 +1548,8 @@ class ParseSequenceExampleTest(test.TestCase):
           new_values[k] = np.expand_dims(v, axis=0)
         else:
           # Sparse tensor.
-          new_values[k] = (np.insert(v[0], 0, 0, axis=1), v[1],
-                           np.insert(v[2], 0, 1))
+          new_values[k] = (np.insert(v[0], 0, 0,
+                                     axis=1), v[1], np.insert(v[2], 0, 1))
       expected_feat_list_values = new_values
 
     self._test(
@@ -1562,7 +1560,6 @@ class ParseSequenceExampleTest(test.TestCase):
         expected_err=expected_err,
         batch=True)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleWithSparseAndDenseContext(self):
     original = sequence_example(
         context=features({
@@ -1606,7 +1603,6 @@ class ParseSequenceExampleTest(test.TestCase):
         },
         expected_context_values=expected_context_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleWithMultipleSizeFeatureLists(self):
     original = sequence_example(
         feature_lists=feature_lists({
@@ -1670,7 +1666,6 @@ class ParseSequenceExampleTest(test.TestCase):
         },
         expected_feat_list_values=expected_feature_list_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleWithoutDebugName(self):
     original = sequence_example(
         feature_lists=feature_lists({
@@ -1728,7 +1723,6 @@ class ParseSequenceExampleTest(test.TestCase):
         },
         expected_feat_list_values=expected_feature_list_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleWithSparseAndDenseFeatureLists(self):
     original = sequence_example(
         feature_lists=feature_lists({
@@ -1787,7 +1781,6 @@ class ParseSequenceExampleTest(test.TestCase):
         },
         expected_feat_list_values=expected_feature_list_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleWithEmptyFeatureInFeatureLists(self):
     original = sequence_example(
         feature_lists=feature_lists({
@@ -1820,7 +1813,6 @@ class ParseSequenceExampleTest(test.TestCase):
         },
         expected_feat_list_values=expected_feature_list_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleListWithInconsistentDataFails(self):
     original = sequence_example(
         feature_lists=feature_lists({
@@ -1841,12 +1833,10 @@ class ParseSequenceExampleTest(test.TestCase):
         expected_err=(errors_impl.OpError, "Feature list: a, Index: 1."
                       "  Data types don't match. Expected type: int64"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleListWithWrongDataTypeFails(self):
     original = sequence_example(
-        feature_lists=feature_lists({
-            "a": feature_list([float_feature([2, 3])])
-        }))
+        feature_lists=feature_lists(
+            {"a": feature_list([float_feature([2, 3])])}))
 
     serialized = original.SerializeToString()
 
@@ -1862,7 +1852,6 @@ class ParseSequenceExampleTest(test.TestCase):
                       "Feature list: a, Index: 0.  Data types don't match."
                       " Expected type: int64"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleListWithWrongSparseDataTypeFails(self):
     original = sequence_example(
         feature_lists=feature_lists({
@@ -1888,7 +1877,6 @@ class ParseSequenceExampleTest(test.TestCase):
                       "Name: in1, Feature list: a, Index: 2."
                       "  Data types don't match. Expected type: int64"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleListWithWrongShapeFails(self):
     original = sequence_example(
         feature_lists=feature_lists({
@@ -1918,7 +1906,6 @@ class ParseSequenceExampleTest(test.TestCase):
             r"Total values size: 5 is not consistent with output "
             r"shape: \[\?,2\]"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleListWithWrongShapeFails2(self):
     # This exercises a different code path for FastParseSequenceExample than
     # testSequenceExampleListWithWrongShapeFails (in that test, we can tell that
@@ -1944,7 +1931,6 @@ class ParseSequenceExampleTest(test.TestCase):
                       r"  Number of (int64 )?values != expected."
                       r"  values size: 1 but output shape: \[2\]"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleWithMissingFeatureListFails(self):
     original = sequence_example(feature_lists=feature_lists({}))
 
@@ -1965,7 +1951,6 @@ class ParseSequenceExampleTest(test.TestCase):
             " feature_list_dense_missing_assumed_empty or"
             " feature_list_dense_defaults?"))
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSequenceExampleBatch(self):
     first = sequence_example(
         feature_lists=feature_lists({
@@ -2046,7 +2031,6 @@ class ParseSequenceExampleTest(test.TestCase):
         },
         batch=True)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingRaggedFeatureWithNoPartitions(self):
     original = [
         sequence_example(
@@ -2147,7 +2131,6 @@ class ParseSequenceExampleTest(test.TestCase):
         batch_feature_list_expected_out,
         batch=True)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingNestedRaggedFeature(self):
     """Test RaggedFeatures with nested partitions."""
     original = [
@@ -2271,7 +2254,6 @@ class ParseSequenceExampleTest(test.TestCase):
         },
         batch=False)
 
-  @test_util.with_forward_compatibility_horizons(None, [2019, 10, 31])
   def testSerializedContainingMisalignedNestedRaggedFeature(self):
     """FeatureList with 2 value tensors but only one splits tensor."""
     original = sequence_example(
@@ -2410,41 +2392,25 @@ class DecodeJSONExampleTest(test.TestCase):
 
   def testDenseFeaturesScalar(self):
     self._testRoundTrip(
-        example(features=features({
-            "a": float_feature([1, 1, 3])
-        })))
+        example(features=features({"a": float_feature([1, 1, 3])})))
 
   def testDenseFeaturesVector(self):
     self._testRoundTrip([
-        example(features=features({
-            "a": float_feature([1, 1, 3])
-        })),
-        example(features=features({
-            "a": float_feature([-1, -1, 2])
-        })),
+        example(features=features({"a": float_feature([1, 1, 3])})),
+        example(features=features({"a": float_feature([-1, -1, 2])})),
     ])
 
   def testDenseFeaturesMatrix(self):
     self._testRoundTrip([
-        [example(features=features({
-            "a": float_feature([1, 1, 3])
-        }))],
-        [example(features=features({
-            "a": float_feature([-1, -1, 2])
-        }))],
+        [example(features=features({"a": float_feature([1, 1, 3])}))],
+        [example(features=features({"a": float_feature([-1, -1, 2])}))],
     ])
 
   def testSparseFeatures(self):
     self._testRoundTrip([
-        example(features=features({
-            "st_c": float_feature([3, 4])
-        })),
-        example(features=features({
-            "st_c": float_feature([])
-        })),
-        example(features=features({
-            "st_d": feature()
-        })),
+        example(features=features({"st_c": float_feature([3, 4])})),
+        example(features=features({"st_c": float_feature([])})),
+        example(features=features({"st_d": feature()})),
         example(
             features=features({
                 "st_c": float_feature([1, 2, -1]),
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 583c389e787..d369f8c6cf1 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import os
+
 import numpy as np
 
 from tensorflow.python.eager import context
@@ -252,6 +253,17 @@ class PoolingTest(test.TestCase):
         expected=expected_output,
         use_gpu=use_gpu)
 
+  def _testAvgPoolEmpty(self, use_gpu):
+    expected_output = [7.0, 8.0, 9.0]
+    self._VerifyValues(
+        nn_ops.avg_pool,
+        input_sizes=[1, 3, 3, 0],
+        ksize=[1, 2, 2, 1],
+        strides=[1, 2, 2, 1],
+        padding="VALID",
+        expected=expected_output,
+        use_gpu=use_gpu)
+
   def _testAvgPoolSamePadding(self, use_gpu):
     expected_output = [8.5, 9.5, 10.5, 14.5, 15.5, 16.5]
     self._VerifyValues(
diff --git a/tensorflow/python/kernel_tests/proto/decode_proto_op_test_base.py b/tensorflow/python/kernel_tests/proto/decode_proto_op_test_base.py
index 20bc6991d4f..fb46f835e26 100644
--- a/tensorflow/python/kernel_tests/proto/decode_proto_op_test_base.py
+++ b/tensorflow/python/kernel_tests/proto/decode_proto_op_test_base.py
@@ -330,7 +330,7 @@ class DecodeProtoOpTestBase(test_base.ProtoOpTestBase, parameterized.TestCase):
     # Test against all 3! permutations of fragments, and for each permutation
     # test parsing all possible combination of 2 fields.
     for indices in itertools.permutations(range(len(fragments))):
-      proto = b''.join([fragments[i] for i in indices])
+      proto = b''.join(fragments[i] for i in indices)
       for i in indices:
         if i == 1:
           expected_message_values = [
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index eed8bd7d258..24d36cc0a02 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -22,7 +22,6 @@ import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python import tf2
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import backprop
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -122,7 +121,6 @@ class ReluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.relu, [x]))
-    print("relu (float32) gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   # The gradient for fp16 is inaccurate due to the low-precision.
@@ -171,7 +169,6 @@ class ReluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.relu, [x]))
-    print("relu (float64) gradient err = ", err)
     self.assertLess(err, 1e-10)
 
   def testGradGradFloat32(self):
@@ -190,7 +187,6 @@ class ReluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(f, [x]))
-    print("relu (float32) gradient of gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
@@ -209,7 +205,6 @@ class ReluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(f, [x]))
-    print("relu (float64) gradient of gradient err = ", err)
     self.assertLess(err, 1e-10)
 
   def testGradientScalar(self):
@@ -283,7 +278,6 @@ class Relu6Test(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.relu6, [x]))
-    print("relu6 (float32) gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   def testGradientFloat64(self):
@@ -294,7 +288,6 @@ class Relu6Test(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.relu6, [x]))
-    print("relu6 (float64) gradient err = ", err)
     self.assertLess(err, 1e-10)
 
 
@@ -345,7 +338,6 @@ class LeakyReluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.leaky_relu, [x]))
-    print("leaky_relu (float32) gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   def testGradientFloat64(self):
@@ -356,48 +348,43 @@ class LeakyReluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.leaky_relu, [x]))
-    print("leaky_relu (float64) gradient err = ", err)
     self.assertLess(err, 1e-10)
 
   def testGradGradFloat32(self):
-    with compat.forward_compatibility_horizon(2018, 11, 2):
-      with self.cached_session():
+    with self.cached_session():
 
-        def f(x):
-          assert x.dtype == dtypes.float32
-          with backprop.GradientTape() as tape:
-            tape.watch(x)
-            y = nn_ops.leaky_relu(x)
-          return tape.gradient(y, x)
+      def f(x):
+        assert x.dtype == dtypes.float32
+        with backprop.GradientTape() as tape:
+          tape.watch(x)
+          y = nn_ops.leaky_relu(x)
+        return tape.gradient(y, x)
 
-        x = np.asarray(
-            [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-            dtype=np.float32,
-            order="F")
-        err = gradient_checker_v2.max_error(
-            *gradient_checker_v2.compute_gradient(f, [x]))
-      print("leaky_relu (float32) gradient of gradient err = ", err)
-      self.assertLess(err, 1e-4)
+      x = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float32,
+          order="F")
+      err = gradient_checker_v2.max_error(
+          *gradient_checker_v2.compute_gradient(f, [x]))
+    self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with compat.forward_compatibility_horizon(2018, 11, 2):
-      with self.cached_session():
+    with self.cached_session():
 
-        def f(x):
-          assert x.dtype == dtypes.float64
-          with backprop.GradientTape() as tape:
-            tape.watch(x)
-            y = nn_ops.leaky_relu(x)
-          return tape.gradient(y, x)
+      def f(x):
+        assert x.dtype == dtypes.float64
+        with backprop.GradientTape() as tape:
+          tape.watch(x)
+          y = nn_ops.leaky_relu(x)
+        return tape.gradient(y, x)
 
-        x = np.asarray(
-            [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-            dtype=np.float64,
-            order="F")
-        err = gradient_checker_v2.max_error(
-            *gradient_checker_v2.compute_gradient(f, [x]))
-      print("leaky_relu (float64) gradient of gradient err = ", err)
-      self.assertLess(err, 1e-10)
+      x = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float64,
+          order="F")
+      err = gradient_checker_v2.max_error(
+          *gradient_checker_v2.compute_gradient(f, [x]))
+    self.assertLess(err, 1e-10)
 
   def testGradientScalar(self):
     x = variables.Variable(-100.)
@@ -463,7 +450,6 @@ class EluTest(test.TestCase):
       x = np.asarray(x_val, dtype=np.float32, order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.elu, [x]))
-    print("elu (float32) gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   def testGradientFloat64(self):
@@ -472,7 +458,6 @@ class EluTest(test.TestCase):
       x = np.asarray(x_val, dtype=np.float64, order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.elu, [x]))
-    print("elu (float64) gradient err = ", err)
     self.assertLess(err, 1e-6)
 
   def testGradGrad(self):
@@ -507,7 +492,6 @@ class EluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(f, [x]))
-    print("elu (float32) gradient of gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
@@ -526,7 +510,6 @@ class EluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(f, [x]))
-    print("elu (float64) gradient of gradient err = ", err)
     self.assertLess(err, 1e-6)
 
 
@@ -567,7 +550,6 @@ class SeluTest(test.TestCase):
       x = np.asarray(x_val, dtype=np.float32, order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.selu, [x]))
-    print("selu (float32) gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   def testGradientFloat64(self):
@@ -576,7 +558,6 @@ class SeluTest(test.TestCase):
       x = np.asarray(x_val, dtype=np.float64, order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(nn_ops.selu, [x]))
-    print("selu (float64) gradient err = ", err)
     self.assertLess(err, 1e-6)
 
   def testGradGradFloat32(self):
@@ -595,7 +576,6 @@ class SeluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(f, [x]))
-    print("selu (float32) gradient of gradient err = ", err)
     self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
@@ -614,7 +594,6 @@ class SeluTest(test.TestCase):
           order="F")
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(f, [x]))
-    print("selu (float64) gradient of gradient err = ", err)
     self.assertLess(err, 1e-6)
 
 
diff --git a/tensorflow/python/kernel_tests/signal/BUILD b/tensorflow/python/kernel_tests/signal/BUILD
index 49076bdec95..230b35ccf02 100644
--- a/tensorflow/python/kernel_tests/signal/BUILD
+++ b/tensorflow/python/kernel_tests/signal/BUILD
@@ -25,7 +25,6 @@ cuda_py_tests(
     name = "dct_ops_test",
     srcs = ["dct_ops_test.py"],
     python_version = "PY3",
-    tags = ["no_rocm"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
@@ -70,7 +69,6 @@ cuda_py_tests(
     name = "mfcc_ops_test",
     srcs = ["mfcc_ops_test.py"],
     python_version = "PY3",
-    tags = ["no_rocm"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework",
diff --git a/tensorflow/python/kernel_tests/signal/dct_ops_test.py b/tensorflow/python/kernel_tests/signal/dct_ops_test.py
index 2899f4dea90..d4f9e39590d 100644
--- a/tensorflow/python/kernel_tests/signal/dct_ops_test.py
+++ b/tensorflow/python/kernel_tests/signal/dct_ops_test.py
@@ -87,7 +87,7 @@ def _np_dct2(signals, n=None, norm=None):
     phi = np.cos(np.pi * (np.arange(dct_size) + 0.5) * k / dct_size)
     dct[..., k] = np.sum(signals_mod * phi, axis=-1)
   # SciPy's `dct` has a scaling factor of 2.0 which we follow.
-  # https://github.com/scipy/scipy/blob/v0.15.1/scipy/fftpack/src/dct.c.src
+  # https://github.com/scipy/scipy/blob/v1.2.1/scipy/fftpack/src/dct.c.src
   if norm == "ortho":
     # The orthonormal scaling includes a factor of 0.5 which we combine with
     # the overall scaling of 2.0 to cancel.
@@ -101,7 +101,7 @@ def _np_dct2(signals, n=None, norm=None):
 def _np_dct3(signals, n=None, norm=None):
   """Computes the DCT-III manually with NumPy."""
   # SciPy's `dct` has a scaling factor of 2.0 which we follow.
-  # https://github.com/scipy/scipy/blob/v0.15.1/scipy/fftpack/src/dct.c.src
+  # https://github.com/scipy/scipy/blob/v1.2.1/scipy/fftpack/src/dct.c.src
   signals_mod = _modify_input_for_dct(signals, n=n)
   dct_size = signals_mod.shape[-1]
   signals_mod = np.array(signals_mod)  # make a copy so we can modify
@@ -120,8 +120,30 @@ def _np_dct3(signals, n=None, norm=None):
   return dct
 
 
-NP_DCT = {1: _np_dct1, 2: _np_dct2, 3: _np_dct3}
-NP_IDCT = {1: _np_dct1, 2: _np_dct3, 3: _np_dct2}
+def _np_dct4(signals, n=None, norm=None):
+  """Computes the DCT-IV manually with NumPy."""
+  # SciPy's `dct` has a scaling factor of 2.0 which we follow.
+  # https://github.com/scipy/scipy/blob/v1.2.1/scipy/fftpack/src/dct.c.src
+  signals_mod = _modify_input_for_dct(signals, n=n)
+  dct_size = signals_mod.shape[-1]
+  signals_mod = np.array(signals_mod)  # make a copy so we can modify
+  if norm == "ortho":
+    signals_mod *= np.sqrt(2.0 / dct_size)
+  else:
+    signals_mod *= 2.0
+  dct = np.zeros_like(signals_mod)
+  # X_k = sum_{n=0}^{N-1}
+  #            x_n * cos(\frac{pi}{4N} * (2n + 1) * (2k + 1))  k=0,...,N-1
+  for k in range(dct_size):
+    phi = np.cos(np.pi *
+                 (2 * np.arange(0, dct_size) + 1) * (2 * k + 1) /
+                 (4.0 * dct_size))
+    dct[..., k] = np.sum(signals_mod * phi, axis=-1)
+  return dct
+
+
+NP_DCT = {1: _np_dct1, 2: _np_dct2, 3: _np_dct3, 4: _np_dct4}
+NP_IDCT = {1: _np_dct1, 2: _np_dct3, 3: _np_dct2, 4: _np_dct4}
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -137,7 +159,7 @@ class DCTOpsTest(parameterized.TestCase, test.TestCase):
     tf_idct = dct_ops.idct(signals, type=dct_type, norm=norm)
     self.assertEqual(tf_idct.dtype.as_numpy_dtype, signals.dtype)
     self.assertAllClose(np_idct, tf_idct, atol=atol, rtol=rtol)
-    if fftpack:
+    if fftpack and dct_type != 4:
       scipy_dct = fftpack.dct(signals, n=n, type=dct_type, norm=norm)
       self.assertAllClose(scipy_dct, tf_dct, atol=atol, rtol=rtol)
       scipy_idct = fftpack.idct(signals, type=dct_type, norm=norm)
@@ -159,7 +181,7 @@ class DCTOpsTest(parameterized.TestCase, test.TestCase):
     self.assertAllClose(signals, tf_dct_idct, atol=atol, rtol=rtol)
 
   @parameterized.parameters(itertools.product(
-      [1, 2, 3],
+      [1, 2, 3, 4],
       [None, "ortho"],
       [[2], [3], [10], [2, 20], [2, 3, 25]],
       [np.float32, np.float64]))
diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py
index 22a4d8b4d91..c28ac79a47d 100644
--- a/tensorflow/python/kernel_tests/softmax_op_test.py
+++ b/tensorflow/python/kernel_tests/softmax_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import unittest
+
 import numpy as np
 
 
diff --git a/tensorflow/python/kernel_tests/stack_op_test.py b/tensorflow/python/kernel_tests/stack_op_test.py
index 1db5549241f..564491c42e5 100644
--- a/tensorflow/python/kernel_tests/stack_op_test.py
+++ b/tensorflow/python/kernel_tests/stack_op_test.py
@@ -235,6 +235,16 @@ class StackOpTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, r"axis = -3 not in \[-2, 2\)"):
       array_ops.stack(t, axis=-3)
 
+  def testComplex(self):
+    np.random.seed(7)
+    with self.session(use_gpu=True):
+      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
+        for dtype in [np.complex64, np.complex128]:
+          data = np.random.randn(*shape).astype(dtype)
+          xs = list(map(constant_op.constant, data))
+          c = array_ops.stack(xs)
+          self.assertAllEqual(self.evaluate(c), data)
+
 
 class AutomaticStackingTest(test.TestCase):
 
diff --git a/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py b/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py
index 8725a247385..c7b4a8689e2 100644
--- a/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+
 import numpy as np
 
 from tensorflow.python.client import session
diff --git a/tensorflow/python/kernel_tests/tridiagonal_solve_op_test.py b/tensorflow/python/kernel_tests/tridiagonal_solve_op_test.py
index 25270bb9e24..2b50f1a29d4 100644
--- a/tensorflow/python/kernel_tests/tridiagonal_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/tridiagonal_solve_op_test.py
@@ -19,11 +19,11 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+
 import numpy as np
 
 from tensorflow.python.eager import backprop
 from tensorflow.python.client import session
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -42,8 +42,6 @@ _sample_diags = np.array([[2, 1, 4, 0], [1, 3, 2, 2], [0, 1, -1, 1]])
 _sample_rhs = np.array([1, 2, 3, 4])
 _sample_result = np.array([-9, 5, -4, 4])
 
-FORWARD_COMPATIBLE_DATE = (2019, 10, 18)
-
 # Flag, indicating that test should be run only with partial_pivoting=True
 FLAG_REQUIRES_PIVOTING = "FLAG_REQUIRES_PIVOT"
 
@@ -302,13 +300,10 @@ class TridiagonalSolveOpTest(test.TestCase):
   # Tests with transpose and adjoint
 
   def testTransposeRhs(self):
-    expected = np.array([_sample_result, 2 * _sample_result])
-    if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE):
-      expected = expected.T
     self._testWithLists(
         diags=_sample_diags,
         rhs=np.array([_sample_rhs, 2 * _sample_rhs]),
-        expected=expected,
+        expected=np.array([_sample_result, 2 * _sample_result]).T,
         transpose_rhs=True)
 
   def testConjugateRhs(self):
@@ -320,28 +315,22 @@ class TridiagonalSolveOpTest(test.TestCase):
         conjugate_rhs=True)
 
   def testAdjointRhs(self):
-    expected = np.array(
-        [_sample_result * (1 - 1j), _sample_result * (1 + 2j)])
-    if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE):
-      expected = expected.T
     self._testWithLists(
         diags=_sample_diags,
         rhs=np.array([_sample_rhs * (1 + 1j), _sample_rhs * (1 - 2j)]),
-        expected=expected,
+        expected=np.array(
+            [_sample_result * (1 - 1j), _sample_result * (1 + 2j)]).T,
         transpose_rhs=True,
         conjugate_rhs=True)
 
   def testTransposeRhsWithBatching(self):
-    expected = np.array(
-        [[_sample_result, 2 * _sample_result],
-         [-3 * _sample_result, -4 * _sample_result]])
-    if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE):
-      expected = expected.transpose(0, 2, 1)
     self._testWithLists(
         diags=np.array([_sample_diags, -_sample_diags]),
         rhs=np.array([[_sample_rhs, 2 * _sample_rhs],
                       [3 * _sample_rhs, 4 * _sample_rhs]]),
-        expected=expected,
+        expected=np.array([[_sample_result, 2 * _sample_result],
+                           [-3 * _sample_result,
+                            -4 * _sample_result]]).transpose(0, 2, 1),
         transpose_rhs=True)
 
   def testTransposeRhsWithRhsAsVector(self):
diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py
index 89885cf752b..7a15888686e 100644
--- a/tensorflow/python/kernel_tests/unstack_op_test.py
+++ b/tensorflow/python/kernel_tests/unstack_op_test.py
@@ -167,6 +167,24 @@ class UnstackOpTest(test.TestCase):
     y = self.evaluate(array_ops.unstack(x, axis=1)[0])
     self.assertEqual(y.shape, (0, 2))
 
+  def testComplexGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest('No GPU available')
+
+    np.random.seed(7)
+    with test_util.force_gpu():
+      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
+        for dtype in [np.complex64, np.complex128]:
+          data = np.random.randn(*shape).astype(dtype)
+          # Convert data to a single tensorflow tensor
+          x = constant_op.constant(data)
+          # Unstack into a list of tensors
+          cs = array_ops.unstack(x, num=shape[0])
+          self.assertEqual(type(cs), list)
+          self.assertEqual(len(cs), shape[0])
+          cs = [self.evaluate(c) for c in cs]
+          self.assertAllEqual(cs, data)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 6e72f61a852..dc534f7cfec 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -118,7 +118,7 @@ class VariableScopeTest(test.TestCase):
     vs.get_variable("v2", [2])
     expected_names = ["%s:0" % name for name in ["v1", "v2"]]
     self.assertEqual(
-        set(expected_names), set([v.name for v in vs._vars.values()]))
+        set(expected_names), set(v.name for v in vs._vars.values()))
 
   # TODO(mihaimaruseac): Not converted to use wrap_function because of
   # TypeError: Expected tf.group() expected Tensor arguments not 'None' with
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 51a33cc9406..7672a9da84a 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.core.protobuf import saver_pb2
diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py
index 8e4b274207a..af76ac50d61 100644
--- a/tensorflow/python/layers/utils.py
+++ b/tensorflow/python/layers/utils.py
@@ -231,7 +231,7 @@ def constant_value(pred):
 def object_list_uid(object_list):
   """Creates a single string from object ids."""
   object_list = nest.flatten(object_list)
-  return ', '.join([str(abs(id(x))) for x in object_list])
+  return ', '.join(str(abs(id(x))) for x in object_list)
 
 
 def static_shape(x):
diff --git a/tensorflow/python/lib/core/py_exception_registry.cc b/tensorflow/python/lib/core/py_exception_registry.cc
index 31010761436..a97e345e2c7 100644
--- a/tensorflow/python/lib/core/py_exception_registry.cc
+++ b/tensorflow/python/lib/core/py_exception_registry.cc
@@ -18,35 +18,59 @@ limitations under the License.
 #include <Python.h>
 
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/protobuf/error_codes.pb.h"
 
 namespace tensorflow {
 
 PyExceptionRegistry* PyExceptionRegistry::singleton_ = nullptr;
 
 void PyExceptionRegistry::Init(PyObject* code_to_exc_type_map) {
-  DCHECK(singleton_ == nullptr) << "PyExceptionRegistry::Init() already called";
+  CHECK(singleton_ == nullptr) << "PyExceptionRegistry::Init() already called";
   singleton_ = new PyExceptionRegistry;
 
-  DCHECK(PyDict_Check(code_to_exc_type_map));
+  CHECK(PyDict_Check(code_to_exc_type_map));
   PyObject* key;
   PyObject* value;
   Py_ssize_t pos = 0;
   while (PyDict_Next(code_to_exc_type_map, &pos, &key, &value)) {
-    TF_Code code = static_cast<TF_Code>(PyLong_AsLong(key));
-    singleton_->exc_types_[code] = value;
+    singleton_->exc_types_.emplace(static_cast<TF_Code>(PyLong_AsLong(key)),
+                                   value);
     // The exception classes should also have the lifetime of the process, but
     // incref just in case.
     Py_INCREF(value);
   }
+
+  static const TF_Code kAllCodes[] = {TF_CANCELLED,
+                                      TF_UNKNOWN,
+                                      TF_INVALID_ARGUMENT,
+                                      TF_DEADLINE_EXCEEDED,
+                                      TF_NOT_FOUND,
+                                      TF_ALREADY_EXISTS,
+                                      TF_PERMISSION_DENIED,
+                                      TF_UNAUTHENTICATED,
+                                      TF_RESOURCE_EXHAUSTED,
+                                      TF_FAILED_PRECONDITION,
+                                      TF_ABORTED,
+                                      TF_OUT_OF_RANGE,
+                                      TF_UNIMPLEMENTED,
+                                      TF_INTERNAL,
+                                      TF_UNAVAILABLE,
+                                      TF_DATA_LOSS};
+  for (TF_Code code : kAllCodes) {
+    CHECK(singleton_->exc_types_.find(code) != singleton_->exc_types_.end())
+        << error::Code_Name(static_cast<error::Code>(code))
+        << " is not registered";
+  }
 }
 
 PyObject* PyExceptionRegistry::Lookup(TF_Code code) {
-  DCHECK(singleton_ != nullptr) << "Must call PyExceptionRegistry::Init() "
-                                   "before PyExceptionRegistry::Lookup()";
-  DCHECK_NE(code, TF_OK);
-  DCHECK(singleton_->exc_types_.find(code) != singleton_->exc_types_.end())
+  CHECK(singleton_ != nullptr) << "Must call PyExceptionRegistry::Init() "
+                                  "before PyExceptionRegistry::Lookup()";
+  CHECK_NE(code, TF_OK);
+  auto it = singleton_->exc_types_.find(code);
+  CHECK(it != singleton_->exc_types_.end())
       << "Unknown error code passed to PyExceptionRegistry::Lookup: " << code;
-  return singleton_->exc_types_[code];
+  return it->second;
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index 8770b362a4e..5d4916f48fc 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/python/lib/core/numpy.h"
 #include "tensorflow/python/lib/core/py_util.h"
@@ -396,6 +397,21 @@ typedef Converter<int32> Int32Converter;
 
 // Floating-point support
 
+// Returns `true` if `out` overflows when converted from `as_double`.
+template <class T>
+static inline bool CheckForOverflow(double as_double, T* out) {
+  return (sizeof(T) < sizeof(double) && std::isinf(*out) &&
+          std::isfinite(as_double));
+}
+
+// There is no `std::isinf` that takes `Eigen::half` as argument but Eigen
+// provides `Eigen::half_impl::isinf` instead.
+template <>
+inline bool CheckForOverflow<Eigen::half>(double as_double, Eigen::half* out) {
+  return (sizeof(Eigen::half) < sizeof(double) &&
+          Eigen::half_impl::isinf(*out) && std::isfinite(as_double));
+}
+
 template <class T>
 static const char* ConvertOneFloat(PyObject* v, T* out) {
   if (PyErr_Occurred()) {
@@ -405,20 +421,19 @@ static const char* ConvertOneFloat(PyObject* v, T* out) {
     const double as_double = PyFloat_AS_DOUBLE(v);
     *out = static_cast<T>(as_double);
     // Check for overflow
-    if (TF_PREDICT_FALSE(sizeof(T) < sizeof(double) && std::isinf(*out) &&
-                         std::isfinite(as_double))) {
+    if (TF_PREDICT_FALSE(CheckForOverflow<T>(as_double, out))) {
       return ErrorOutOfRangeDouble;
     }
     return nullptr;
   }
 #if PY_MAJOR_VERSION < 3
   if (PyInt_Check(v)) {
-    *out = PyInt_AS_LONG(v);
+    *out = static_cast<T>(PyInt_AS_LONG(v));
     return nullptr;
   }
 #endif
   if (PyLong_Check(v)) {
-    *out = PyLong_AsDouble(v);
+    *out = static_cast<T>(PyLong_AsDouble(v));
     if (PyErr_Occurred()) return ErrorOutOfRangeDouble;
     return nullptr;
   }
@@ -467,13 +482,7 @@ struct ConverterTraits<Eigen::half> {
   static const tensorflow::DataType kTypeEnum = DT_HALF;
 
   static const char* ConvertScalar(PyObject* v, Eigen::half* out) {
-    // NOTE(nareshmodi): Is there a way to convert to C double without the
-    // intermediate Python double? This will help with ConvertOneFloat as well.
-    Safe_PyObjectPtr as_float = make_safe(PyNumber_Float(v));
-    double v_double = PyFloat_AS_DOUBLE(as_float.get());
-    *out = Eigen::half(v_double);
-
-    return nullptr;
+    return ConvertOneFloat<Eigen::half>(v, out);
   }
 };
 
@@ -613,7 +622,9 @@ Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) {
       break;
 
     case DT_HALF:
-      RETURN_STRING_AS_STATUS(NumpyHalfConverter::Convert(obj, &state, ret));
+      if (NumpyHalfConverter::Convert(obj, &state, ret) == nullptr)
+        return Status::OK();
+      break;
 
     case DT_INT64:
       if (Int64Converter::Convert(obj, &state, ret) == nullptr)
diff --git a/tensorflow/python/lib/core/pybind11_absl.h b/tensorflow/python/lib/core/pybind11_absl.h
index 8a05d419654..db3631dc643 100644
--- a/tensorflow/python/lib/core/pybind11_absl.h
+++ b/tensorflow/python/lib/core/pybind11_absl.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_PYTHON_LIB_CORE_PYBIND11_ABSL_H_
 #define TENSORFLOW_PYTHON_LIB_CORE_PYBIND11_ABSL_H_
 
-#include "pybind11/pybind11.h"
+#include "include/pybind11/pybind11.h"
 #include "tensorflow/core/platform/stringpiece.h"
 
 #ifndef ABSL_USES_STD_STRING_VIEW
diff --git a/tensorflow/python/lib/core/pybind11_lib.h b/tensorflow/python/lib/core/pybind11_lib.h
index 9528efe1a4b..080f50f30bd 100644
--- a/tensorflow/python/lib/core/pybind11_lib.h
+++ b/tensorflow/python/lib/core/pybind11_lib.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "pybind11/pybind11.h"
-#include "pybind11/pytypes.h"
+#include "include/pybind11/pybind11.h"
+#include "include/pybind11/pytypes.h"
 
 #ifndef TENSORFLOW_PYTHON_LIB_CORE_PYBIND11_LIB_H_
 #define TENSORFLOW_PYTHON_LIB_CORE_PYBIND11_LIB_H_
diff --git a/tensorflow/python/lib/core/pybind11_proto.h b/tensorflow/python/lib/core/pybind11_proto.h
index d69d717d5a0..e99518f2ac4 100644
--- a/tensorflow/python/lib/core/pybind11_proto.h
+++ b/tensorflow/python/lib/core/pybind11_proto.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_PYTHON_LIB_CORE_PYBIND11_PROTO_H_
 
 #include "absl/strings/str_cat.h"
-#include "pybind11/pybind11.h"
+#include "include/pybind11/pybind11.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/python/lib/core/pybind11_status.h b/tensorflow/python/lib/core/pybind11_status.h
index e45d894e954..c0ab006ff1a 100644
--- a/tensorflow/python/lib/core/pybind11_status.h
+++ b/tensorflow/python/lib/core/pybind11_status.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <Python.h>
 
-#include "pybind11/pybind11.h"
+#include "include/pybind11/pybind11.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/protobuf/error_codes.pb.h"
@@ -28,7 +28,7 @@ namespace tensorflow {
 
 namespace internal {
 
-PyObject* CodeToPyExc(const int code) {
+inline PyObject* CodeToPyExc(const int code) {
   switch (code) {
     case error::Code::INVALID_ARGUMENT:
       return PyExc_ValueError;
@@ -41,11 +41,11 @@ PyObject* CodeToPyExc(const int code) {
   }
 }
 
-PyObject* StatusToPyExc(const Status& status) {
+inline PyObject* StatusToPyExc(const Status& status) {
   return CodeToPyExc(status.code());
 }
 
-PyObject* TFStatusToPyExc(const TF_Status* status) {
+inline PyObject* TFStatusToPyExc(const TF_Status* status) {
   return CodeToPyExc(TF_GetCode(status));
 }
 
diff --git a/tensorflow/python/lib/io/file_io.i b/tensorflow/python/lib/io/file_io.i
deleted file mode 100644
index cbd619bb764..00000000000
--- a/tensorflow/python/lib/io/file_io.i
+++ /dev/null
@@ -1,302 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-%include "tensorflow/python/lib/core/strings.i"
-%include "tensorflow/python/platform/base.i"
-
-%{
-#include "tensorflow/c/tf_status_helper.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/io/buffered_inputstream.h"
-#include "tensorflow/core/lib/io/inputstream_interface.h"
-#include "tensorflow/core/lib/io/random_inputstream.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/file_statistics.h"
-#include "tensorflow/core/platform/file_system.h"
-#include "tensorflow/core/protobuf/meta_graph.pb.h"
-%}
-
-%{
-inline void FileExists(const string& filename, TF_Status* status) {
-  tensorflow::Status s = tensorflow::Env::Default()->FileExists(filename);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-inline void FileExists(const tensorflow::StringPiece& filename,
-    TF_Status* status) {
-  tensorflow::Status s =
-      tensorflow::Env::Default()->FileExists(string(filename));
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-inline void DeleteFile(const string& filename, TF_Status* status) {
-  tensorflow::Status s = tensorflow::Env::Default()->DeleteFile(filename);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-string ReadFileToString(const string& filename, TF_Status* status) {
-  string file_content;
-  tensorflow::Status s = ReadFileToString(tensorflow::Env::Default(),
-      filename, &file_content);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-  return file_content;
-}
-
-void WriteStringToFile(const string& filename, const string& file_content,
-                       TF_Status* status) {
-  tensorflow::Status s = WriteStringToFile(tensorflow::Env::Default(),
-      filename, file_content);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-std::vector<string> GetChildren(const string& dir, TF_Status* status) {
-  std::vector<string> results;
-  tensorflow::Status s = tensorflow::Env::Default()->GetChildren(
-      dir, &results);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-  return results;
-}
-
-std::vector<string> GetMatchingFiles(const string& filename, TF_Status* status) {
-  std::vector<string> results;
-  tensorflow::Status s = tensorflow::Env::Default()->GetMatchingPaths(
-      filename, &results);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-  return results;
-}
-
-void CreateDir(const string& dirname, TF_Status* status) {
-  tensorflow::Status s = tensorflow::Env::Default()->CreateDir(dirname);
-  if (!s.ok() && s.code() != tensorflow::error::ALREADY_EXISTS) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-void RecursivelyCreateDir(const string& dirname, TF_Status* status) {
-  tensorflow::Status s = tensorflow::Env::Default()->RecursivelyCreateDir(
-      dirname);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-void CopyFile(const string& src, const string& target, bool overwrite,
-              TF_Status* status) {
-  // If overwrite is false and the target file exists then its an error.
-  if (!overwrite && tensorflow::Env::Default()->FileExists(target).ok()) {
-    TF_SetStatus(status, TF_ALREADY_EXISTS, "file already exists");
-    return;
-  }
-  tensorflow::Status s = tensorflow::Env::Default()->CopyFile(src, target);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-void RenameFile(const string& src, const string& target, bool overwrite,
-                TF_Status* status) {
-  // If overwrite is false and the target file exists then its an error.
-  if (!overwrite && tensorflow::Env::Default()->FileExists(target).ok()) {
-    TF_SetStatus(status, TF_ALREADY_EXISTS, "file already exists");
-    return;
-  }
-  tensorflow::Status s = tensorflow::Env::Default()->RenameFile(src, target);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-using tensorflow::int64;
-
-void DeleteRecursively(const string& dirname, TF_Status* status) {
-  int64 undeleted_files, undeleted_dirs;
-  tensorflow::Status s = tensorflow::Env::Default()->DeleteRecursively(
-      dirname, &undeleted_files, &undeleted_dirs);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-    return;
-  }
-  if (undeleted_files > 0 || undeleted_dirs > 0) {
-    TF_SetStatus(status, TF_PERMISSION_DENIED, "could not fully delete dir");
-    return;
-  }
-}
-
-bool IsDirectory(const string& dirname, TF_Status* out_status) {
-  tensorflow::Status status = tensorflow::Env::Default()->IsDirectory(dirname);
-  if (status.ok()) {
-    return true;
-  }
-  // FAILED_PRECONDITION Status response means path exists but isn't a dir.
-  if (status.code() != tensorflow::error::FAILED_PRECONDITION) {
-    Set_TF_Status_from_Status(out_status, status);
-  }
-  return false;
-}
-
-using tensorflow::FileStatistics;
-
-void Stat(const string& filename, FileStatistics* stats, TF_Status* status) {
-  tensorflow::Status s = tensorflow::Env::Default()->Stat(filename,
-                                                               stats);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-tensorflow::io::BufferedInputStream* CreateBufferedInputStream(
-    const string& filename, size_t buffer_size, TF_Status* status) {
-  std::unique_ptr<tensorflow::RandomAccessFile> file;
-  tensorflow::Status s =
-      tensorflow::Env::Default()->NewRandomAccessFile(filename, &file);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-    return nullptr;
-  }
-  std::unique_ptr<tensorflow::io::RandomAccessInputStream> input_stream(
-      new tensorflow::io::RandomAccessInputStream(
-          file.release(), true /* owns_file */));
-  std::unique_ptr<tensorflow::io::BufferedInputStream> buffered_input_stream(
-      new tensorflow::io::BufferedInputStream(
-          input_stream.release(), buffer_size, true /* owns_input_stream */));
-  return buffered_input_stream.release();
-}
-
-tensorflow::WritableFile* CreateWritableFile(
-    const string& filename, const string& mode, TF_Status* status) {
-  std::unique_ptr<tensorflow::WritableFile> file;
-  tensorflow::Status s;
-  if (mode.find("a") != std::string::npos) {
-    s = tensorflow::Env::Default()->NewAppendableFile(filename, &file);
-  } else {
-    s = tensorflow::Env::Default()->NewWritableFile(filename, &file);
-  }
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-    return nullptr;
-  }
-  return file.release();
-}
-
-void AppendToFile(const string& file_content, tensorflow::WritableFile* file,
-                  TF_Status* status) {
-  tensorflow::Status s = file->Append(file_content);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-}
-
-int64 TellFile(tensorflow::WritableFile* file, TF_Status* status) {
-  int64 position = -1;
-  tensorflow::Status s = file->Tell(&position);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(status, s);
-  }
-  return position;
-}
-
-
-string ReadFromStream(tensorflow::io::BufferedInputStream* stream,
-                      size_t bytes,
-                      TF_Status* status) {
-  tensorflow::tstring result;
-  tensorflow::Status s = stream->ReadNBytes(bytes, &result);
-  if (!s.ok() && s.code() != tensorflow::error::OUT_OF_RANGE) {
-    Set_TF_Status_from_Status(status, s);
-    result.clear();
-  }
-  return result;
-}
-
-%}
-
-// Ensure that the returned object is destroyed when its wrapper is
-// garbage collected.
-%newobject CreateBufferedInputStream;
-%newobject CreateWritableFile;
-
-// Wrap the above functions.
-inline void FileExists(const string& filename, TF_Status* status);
-inline void DeleteFile(const string& filename, TF_Status* status);
-string ReadFileToString(const string& filename, TF_Status* status);
-void WriteStringToFile(const string& filename, const string& file_content,
-                       TF_Status* status);
-std::vector<string> GetChildren(const string& dir, TF_Status* status);
-std::vector<string> GetMatchingFiles(const string& filename,
-                                     TF_Status* status);
-void CreateDir(const string& dirname, TF_Status* status);
-void RecursivelyCreateDir(const string& dirname, TF_Status* status);
-void CopyFile(const string& oldpath, const string& newpath, bool overwrite,
-              TF_Status* status);
-void RenameFile(const string& oldname, const string& newname, bool overwrite,
-                TF_Status* status);
-void DeleteRecursively(const string& dirname, TF_Status* status);
-bool IsDirectory(const string& dirname, TF_Status* out_status);
-void Stat(const string& filename, tensorflow::FileStatistics* stats,
-          TF_Status* status);
-tensorflow::io::BufferedInputStream* CreateBufferedInputStream(
-    const string& filename, size_t buffer_size, TF_Status* status);
-tensorflow::WritableFile* CreateWritableFile(const string& filename,
-                                             const string& mode,
-                                             TF_Status* status);
-void AppendToFile(const string& file_content, tensorflow::WritableFile* file,
-                  TF_Status* status);
-int64 TellFile(tensorflow::WritableFile* file, TF_Status* status);
-string ReadFromStream(tensorflow::io::BufferedInputStream* stream,
-                      size_t bytes,
-                      TF_Status* status);
-
-%ignore tensorflow::Status::operator=;
-%include "tensorflow/core/platform/status.h"
-
-%ignoreall
-%unignore tensorflow::io;
-%unignore tensorflow::io::BufferedInputStream;
-%unignore tensorflow::io::BufferedInputStream::~BufferedInputStream;
-%unignore tensorflow::io::BufferedInputStream::ReadLineAsString;
-%unignore tensorflow::io::BufferedInputStream::Seek;
-%unignore tensorflow::io::BufferedInputStream::Tell;
-%unignore tensorflow::WritableFile;
-%unignore tensorflow::WritableFile::Close;
-%unignore tensorflow::WritableFile::Flush;
-%unignore tensorflow::WritableFile::~WritableFile;
-%include "tensorflow/core/platform/file_system.h"
-%include "tensorflow/core/lib/io/inputstream_interface.h"
-%include "tensorflow/core/lib/io/buffered_inputstream.h"
-%unignoreall
-
-%include "tensorflow/c/tf_status_helper.h"
-
-%ignore tensorflow::io::internal::JoinPathImpl;
-%include "tensorflow/core/lib/io/path.h"
-
-%include "tensorflow/core/platform/file_statistics.h"
diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py
index 65c0f0810f1..55b4359d75b 100644
--- a/tensorflow/python/lib/io/file_io.py
+++ b/tensorflow/python/lib/io/file_io.py
@@ -12,11 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""File IO methods that wrap the C++ FileSystem API.
-
-The C++ FileSystem API is SWIG wrapped in file_io.i. These functions call those
-to accomplish basic File IO operations.
-"""
+"""File IO methods that wrap the C++ FileSystem API."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -27,8 +23,7 @@ import uuid
 
 import six
 
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import c_api_util
+from tensorflow.python import _pywrap_file_io
 from tensorflow.python.framework import errors
 from tensorflow.python.util import compat
 from tensorflow.python.util import deprecation
@@ -80,15 +75,15 @@ class FileIO(object):
       if not self._read_check_passed:
         raise errors.PermissionDeniedError(None, None,
                                            "File isn't open for reading")
-      self._read_buf = pywrap_tensorflow.CreateBufferedInputStream(
-          compat.as_bytes(self.__name), 1024 * 512)
+      self._read_buf = _pywrap_file_io.BufferedInputStream(
+          self.__name, 1024 * 512)
 
   def _prewrite_check(self):
     if not self._writable_file:
       if not self._write_check_passed:
         raise errors.PermissionDeniedError(None, None,
                                            "File isn't open for writing")
-      self._writable_file = pywrap_tensorflow.CreateWritableFile(
+      self._writable_file = _pywrap_file_io.WritableFile(
           compat.as_bytes(self.__name), compat.as_bytes(self.__mode))
 
   def _prepare_value(self, val):
@@ -104,8 +99,7 @@ class FileIO(object):
   def write(self, file_content):
     """Writes file_content to the file. Appends to the end of the file."""
     self._prewrite_check()
-    pywrap_tensorflow.AppendToFile(
-        compat.as_bytes(file_content), self._writable_file)
+    self._writable_file.append(compat.as_bytes(file_content))
 
   def read(self, n=-1):
     """Returns the contents of a file as a string.
@@ -124,8 +118,7 @@ class FileIO(object):
       length = self.size() - self.tell()
     else:
       length = n
-    return self._prepare_value(
-        pywrap_tensorflow.ReadFromStream(self._read_buf, length))
+    return self._prepare_value(self._read_buf.read(length))
 
   @deprecation.deprecated_args(
       None, "position is deprecated in favor of the offset argument.",
@@ -158,25 +151,23 @@ class FileIO(object):
     if position is not None:
       offset = position
 
-    with errors.raise_exception_on_not_ok_status() as status:
-      if whence == 0:
-        pass
-      elif whence == 1:
-        offset += self.tell()
-      elif whence == 2:
-        offset += self.size()
-      else:
-        raise errors.InvalidArgumentError(
-            None, None,
-            "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format(
-                whence))
-      ret_status = self._read_buf.Seek(offset)
-      pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
+    if whence == 0:
+      pass
+    elif whence == 1:
+      offset += self.tell()
+    elif whence == 2:
+      offset += self.size()
+    else:
+      raise errors.InvalidArgumentError(
+          None, None,
+          "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format(
+              whence))
+    self._read_buf.seek(offset)
 
   def readline(self):
     r"""Reads the next line from the file. Leaves the '\n' at the end."""
     self._preread_check()
-    return self._prepare_value(self._read_buf.ReadLineAsString())
+    return self._prepare_value(self._read_buf.readline())
 
   def readlines(self):
     """Returns all lines from the file in a list."""
@@ -193,11 +184,11 @@ class FileIO(object):
     """Returns the current position in the file."""
     if self._read_check_passed:
       self._preread_check()
-      return self._read_buf.Tell()
+      return self._read_buf.tell()
     else:
       self._prewrite_check()
 
-      return pywrap_tensorflow.TellFile(self._writable_file)
+      return self._writable_file.tell()
 
   def __enter__(self):
     """Make usable with "with" statement."""
@@ -227,18 +218,14 @@ class FileIO(object):
     data would survive an application crash but not necessarily an OS crash.
     """
     if self._writable_file:
-      with errors.raise_exception_on_not_ok_status() as status:
-        ret_status = self._writable_file.Flush()
-        pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
+      self._writable_file.flush()
 
   def close(self):
     """Closes FileIO. Should be called for the WritableFile to be flushed."""
     self._read_buf = None
     if self._writable_file:
-      with errors.raise_exception_on_not_ok_status() as status:
-        ret_status = self._writable_file.Close()
-        pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
-    self._writable_file = None
+      self._writable_file.close()
+      self._writable_file = None
 
   def seekable(self):
     """Returns True as FileIO supports random access ops of seek()/tell()"""
@@ -277,7 +264,7 @@ def file_exists_v2(path):
     errors.OpError: Propagates any errors reported by the FileSystem API.
   """
   try:
-    pywrap_tensorflow.FileExists(compat.as_bytes(path))
+    _pywrap_file_io.FileExists(compat.as_bytes(path))
   except errors.NotFoundError:
     return False
   return True
@@ -308,7 +295,7 @@ def delete_file_v2(path):
     errors.OpError: Propagates any errors reported by the FileSystem API.  E.g.,
     `NotFoundError` if the path does not exist.
   """
-  pywrap_tensorflow.DeleteFile(compat.as_bytes(path))
+  _pywrap_file_io.DeleteFile(compat.as_bytes(path))
 
 
 def read_file_to_string(filename, binary_mode=False):
@@ -380,7 +367,7 @@ def get_matching_files_v2(pattern):
     return [
         # Convert the filenames to string from bytes.
         compat.as_str_any(matching_filename)
-        for matching_filename in pywrap_tensorflow.GetMatchingFiles(
+        for matching_filename in _pywrap_file_io.GetMatchingFiles(
             compat.as_bytes(pattern))
     ]
   else:
@@ -388,7 +375,7 @@ def get_matching_files_v2(pattern):
         # Convert the filenames to string from bytes.
         compat.as_str_any(matching_filename)  # pylint: disable=g-complex-comprehension
         for single_filename in pattern
-        for matching_filename in pywrap_tensorflow.GetMatchingFiles(
+        for matching_filename in _pywrap_file_io.GetMatchingFiles(
             compat.as_bytes(single_filename))
     ]
 
@@ -422,7 +409,7 @@ def create_dir_v2(path):
   Raises:
     errors.OpError: If the operation fails.
   """
-  pywrap_tensorflow.CreateDir(compat.as_bytes(path))
+  _pywrap_file_io.CreateDir(compat.as_bytes(path))
 
 
 @tf_export(v1=["gfile.MakeDirs"])
@@ -452,7 +439,7 @@ def recursive_create_dir_v2(path):
   Raises:
     errors.OpError: If the operation fails.
   """
-  pywrap_tensorflow.RecursivelyCreateDir(compat.as_bytes(path))
+  _pywrap_file_io.RecursivelyCreateDir(compat.as_bytes(path))
 
 
 @tf_export(v1=["gfile.Copy"])
@@ -484,7 +471,7 @@ def copy_v2(src, dst, overwrite=False):
   Raises:
     errors.OpError: If the operation fails.
   """
-  pywrap_tensorflow.CopyFile(
+  _pywrap_file_io.CopyFile(
       compat.as_bytes(src), compat.as_bytes(dst), overwrite)
 
 
@@ -517,7 +504,7 @@ def rename_v2(src, dst, overwrite=False):
   Raises:
     errors.OpError: If the operation fails.
   """
-  pywrap_tensorflow.RenameFile(
+  _pywrap_file_io.RenameFile(
       compat.as_bytes(src), compat.as_bytes(dst), overwrite)
 
 
@@ -568,7 +555,7 @@ def delete_recursively_v2(path):
   Raises:
     errors.OpError: If the operation fails.
   """
-  pywrap_tensorflow.DeleteRecursively(compat.as_bytes(path))
+  _pywrap_file_io.DeleteRecursively(compat.as_bytes(path))
 
 
 @tf_export(v1=["gfile.IsDirectory"])
@@ -594,8 +581,10 @@ def is_directory_v2(path):
   Returns:
     True, if the path is a directory; False otherwise
   """
-  status = c_api_util.ScopedTFStatus()
-  return pywrap_tensorflow.IsDirectory(compat.as_bytes(path), status)
+  try:
+    return _pywrap_file_io.IsDirectory(compat.as_bytes(path))
+  except errors.OpError:
+    return False
 
 
 @tf_export(v1=["gfile.ListDirectory"])
@@ -643,7 +632,7 @@ def list_directory_v2(path):
   # vector of string should be interpreted as strings, not bytes.
   return [
       compat.as_str_any(filename)
-      for filename in pywrap_tensorflow.GetChildren(compat.as_bytes(path))
+      for filename in _pywrap_file_io.GetChildren(compat.as_bytes(path))
   ]
 
 
@@ -742,9 +731,7 @@ def stat_v2(path):
   Raises:
     errors.OpError: If the operation fails.
   """
-  file_statistics = pywrap_tensorflow.FileStatistics()
-  pywrap_tensorflow.Stat(compat.as_bytes(path), file_statistics)
-  return file_statistics
+  return _pywrap_file_io.Stat(path)
 
 
 def filecmp(filename_a, filename_b):
diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index 5f0125886f3..86e0c602d89 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os.path
+
 import numpy as np
 
 from tensorflow.python.framework import errors
diff --git a/tensorflow/python/lib/io/file_io_wrapper.cc b/tensorflow/python/lib/io/file_io_wrapper.cc
new file mode 100644
index 00000000000..28e55f1d8a3
--- /dev/null
+++ b/tensorflow/python/lib/io/file_io_wrapper.cc
@@ -0,0 +1,205 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "include/pybind11/pybind11.h"
+#include "include/pybind11/stl.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/io/buffered_inputstream.h"
+#include "tensorflow/core/lib/io/random_inputstream.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/file_statistics.h"
+#include "tensorflow/core/platform/file_system.h"
+#include "tensorflow/core/platform/stringpiece.h"
+#include "tensorflow/core/platform/tstring.h"
+#include "tensorflow/python/lib/core/pybind11_absl.h"
+#include "tensorflow/python/lib/core/pybind11_status.h"
+
+namespace {
+namespace py = pybind11;
+
+PYBIND11_MODULE(_pywrap_file_io, m) {
+  m.def("FileExists", [](const std::string& filename) {
+    tensorflow::MaybeRaiseRegisteredFromStatus(
+        tensorflow::Env::Default()->FileExists(filename));
+  });
+  m.def("DeleteFile", [](const std::string& filename) {
+    tensorflow::MaybeRaiseRegisteredFromStatus(
+        tensorflow::Env::Default()->DeleteFile(filename));
+  });
+  m.def("ReadFileToString", [](const std::string& filename) {
+    std::string data;
+    const auto status =
+        ReadFileToString(tensorflow::Env::Default(), filename, &data);
+    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    return py::bytes(data);
+  });
+  m.def("WriteStringToFile",
+        [](const std::string& filename, tensorflow::StringPiece data) {
+          return WriteStringToFile(tensorflow::Env::Default(), filename, data);
+        });
+  m.def("GetChildren", [](const std::string& dirname) {
+    std::vector<std::string> results;
+    const auto status =
+        tensorflow::Env::Default()->GetChildren(dirname, &results);
+    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    return results;
+  });
+  m.def("GetMatchingFiles", [](const std::string& pattern) {
+    std::vector<std::string> results;
+    const auto status =
+        tensorflow::Env::Default()->GetMatchingPaths(pattern, &results);
+    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    return results;
+  });
+  m.def("CreateDir", [](const std::string& dirname) {
+    const auto status = tensorflow::Env::Default()->CreateDir(dirname);
+    if (tensorflow::errors::IsAlreadyExists(status)) {
+      return;
+    }
+    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+  });
+  m.def("RecursivelyCreateDir", [](const std::string& dirname) {
+    tensorflow::MaybeRaiseRegisteredFromStatus(
+        tensorflow::Env::Default()->RecursivelyCreateDir(dirname));
+  });
+  m.def("CopyFile",
+        [](const std::string& src, const std::string& target, bool overwrite) {
+          auto* env = tensorflow::Env::Default();
+          tensorflow::Status status;
+          if (!overwrite && env->FileExists(target).ok()) {
+            status = tensorflow::errors::AlreadyExists("file already exists");
+          } else {
+            status = env->CopyFile(src, target);
+          }
+          tensorflow::MaybeRaiseRegisteredFromStatus(status);
+        });
+  m.def("RenameFile",
+        [](const std::string& src, const std::string& target, bool overwrite) {
+          auto* env = tensorflow::Env::Default();
+          tensorflow::Status status;
+          if (!overwrite && env->FileExists(target).ok()) {
+            status = tensorflow::errors::AlreadyExists("file already exists");
+          } else {
+            status = env->RenameFile(src, target);
+          }
+          tensorflow::MaybeRaiseRegisteredFromStatus(status);
+        });
+  m.def("DeleteRecursively", [](const std::string& dirname) {
+    tensorflow::int64 undeleted_files;
+    tensorflow::int64 undeleted_dirs;
+    auto status = tensorflow::Env::Default()->DeleteRecursively(
+        dirname, &undeleted_files, &undeleted_dirs);
+    if (status.ok() && (undeleted_files > 0 || undeleted_dirs > 0)) {
+      status =
+          tensorflow::errors::PermissionDenied("could not fully delete dir");
+    }
+    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+  });
+  m.def("IsDirectory", [](const std::string& dirname) {
+    const auto status = tensorflow::Env::Default()->IsDirectory(dirname);
+    // FAILED_PRECONDITION response means path exists but isn't a dir.
+    if (tensorflow::errors::IsFailedPrecondition(status)) {
+      return false;
+    }
+
+    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    return true;
+  });
+
+  py::class_<tensorflow::FileStatistics>(m, "FileStatistics")
+      .def_readonly("length", &tensorflow::FileStatistics::length)
+      .def_readonly("mtime_nsec", &tensorflow::FileStatistics::mtime_nsec)
+      .def_readonly("is_directory", &tensorflow::FileStatistics::is_directory);
+
+  m.def("Stat", [](const std::string& filename) {
+    std::unique_ptr<tensorflow::FileStatistics> self(
+        new tensorflow::FileStatistics);
+    const auto status = tensorflow::Env::Default()->Stat(filename, self.get());
+    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    return self.release();
+  });
+
+  using tensorflow::WritableFile;
+  py::class_<WritableFile>(m, "WritableFile")
+      .def(py::init([](const std::string& filename, const std::string& mode) {
+        auto* env = tensorflow::Env::Default();
+        std::unique_ptr<WritableFile> self;
+        const auto status = mode.find("a") == std::string::npos
+                                ? env->NewWritableFile(filename, &self)
+                                : env->NewAppendableFile(filename, &self);
+        tensorflow::MaybeRaiseRegisteredFromStatus(status);
+        return self.release();
+      }))
+      .def("append",
+           [](WritableFile* self, tensorflow::StringPiece data) {
+             tensorflow::MaybeRaiseRegisteredFromStatus(self->Append(data));
+           })
+      // TODO(slebedev): Make WritableFile::Tell const and change self
+      // to be a reference.
+      .def("tell",
+           [](WritableFile* self) {
+             tensorflow::int64 pos = -1;
+             const auto status = self->Tell(&pos);
+             tensorflow::MaybeRaiseRegisteredFromStatus(status);
+             return pos;
+           })
+      .def("flush",
+           [](WritableFile* self) {
+             tensorflow::MaybeRaiseRegisteredFromStatus(self->Flush());
+           })
+      .def("close", [](WritableFile* self) {
+        tensorflow::MaybeRaiseRegisteredFromStatus(self->Close());
+      });
+
+  using tensorflow::io::BufferedInputStream;
+  py::class_<BufferedInputStream>(m, "BufferedInputStream")
+      .def(py::init([](const std::string& filename, size_t buffer_size) {
+        std::unique_ptr<tensorflow::RandomAccessFile> file;
+        const auto status =
+            tensorflow::Env::Default()->NewRandomAccessFile(filename, &file);
+        tensorflow::MaybeRaiseRegisteredFromStatus(status);
+        std::unique_ptr<tensorflow::io::RandomAccessInputStream> input_stream(
+            new tensorflow::io::RandomAccessInputStream(file.release(),
+                                                        /*owns_file=*/true));
+        return new BufferedInputStream(input_stream.release(), buffer_size,
+                                       /*owns_input_stream=*/true);
+      }))
+      .def("read",
+           [](BufferedInputStream* self, tensorflow::int64 bytes_to_read) {
+             tensorflow::tstring result;
+             const auto status = self->ReadNBytes(bytes_to_read, &result);
+             if (!status.ok() && !tensorflow::errors::IsOutOfRange(status)) {
+               result.clear();
+               tensorflow::MaybeRaiseRegisteredFromStatus(status);
+             }
+             return py::bytes(result);
+           })
+      .def("readline",
+           [](BufferedInputStream* self) {
+             return py::bytes(self->ReadLineAsString());
+           })
+      .def("seek",
+           [](BufferedInputStream* self, tensorflow::int64 pos) {
+             tensorflow::MaybeRaiseRegisteredFromStatus(self->Seek(pos));
+           })
+      .def("tell", [](BufferedInputStream* self) { return self->Tell(); });
+}
+}  // namespace
diff --git a/tensorflow/python/lib/io/py_record_writer.cc b/tensorflow/python/lib/io/py_record_writer.cc
deleted file mode 100644
index 03f24d0f8f4..00000000000
--- a/tensorflow/python/lib/io/py_record_writer.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/python/lib/io/py_record_writer.h"
-
-#include "tensorflow/c/tf_status_helper.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/io/record_writer.h"
-#include "tensorflow/core/lib/io/zlib_compression_options.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace io {
-
-PyRecordWriter::PyRecordWriter() {}
-
-PyRecordWriter* PyRecordWriter::New(const string& filename,
-                                    const io::RecordWriterOptions& options,
-                                    TF_Status* out_status) {
-  std::unique_ptr<WritableFile> file;
-  Status s = Env::Default()->NewWritableFile(filename, &file);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(out_status, s);
-    return nullptr;
-  }
-  PyRecordWriter* writer = new PyRecordWriter;
-  writer->file_ = std::move(file);
-  writer->writer_.reset(new RecordWriter(writer->file_.get(), options));
-  return writer;
-}
-
-PyRecordWriter::~PyRecordWriter() {
-  // Writer depends on file during close for zlib flush, so destruct first.
-  writer_.reset();
-  file_.reset();
-}
-
-void PyRecordWriter::WriteRecord(tensorflow::StringPiece record,
-                                 TF_Status* out_status) {
-  if (writer_ == nullptr) {
-    TF_SetStatus(out_status, TF_FAILED_PRECONDITION,
-                 "Writer not initialized or previously closed");
-    return;
-  }
-  Status s = writer_->WriteRecord(record);
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(out_status, s);
-  }
-}
-
-void PyRecordWriter::Flush(TF_Status* out_status) {
-  if (writer_ == nullptr) {
-    TF_SetStatus(out_status, TF_FAILED_PRECONDITION,
-                 "Writer not initialized or previously closed");
-    return;
-  }
-  Status s = writer_->Flush();
-  if (s.ok()) {
-    // Per the RecordWriter contract, flushing the RecordWriter does not
-    // flush the underlying file.  Here we need to do both.
-    s = file_->Flush();
-  }
-  if (!s.ok()) {
-    Set_TF_Status_from_Status(out_status, s);
-    return;
-  }
-}
-
-void PyRecordWriter::Close(TF_Status* out_status) {
-  if (writer_ != nullptr) {
-    Status s = writer_->Close();
-    if (!s.ok()) {
-      Set_TF_Status_from_Status(out_status, s);
-      return;
-    }
-    writer_.reset(nullptr);
-  }
-  if (file_ != nullptr) {
-    Status s = file_->Close();
-    if (!s.ok()) {
-      Set_TF_Status_from_Status(out_status, s);
-      return;
-    }
-    file_.reset(nullptr);
-  }
-}
-
-}  // namespace io
-}  // namespace tensorflow
diff --git a/tensorflow/python/lib/io/py_record_writer.h b/tensorflow/python/lib/io/py_record_writer.h
deleted file mode 100644
index 9b0792c6db8..00000000000
--- a/tensorflow/python/lib/io/py_record_writer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_
-#define TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_
-
-#include <memory>
-
-#include "tensorflow/c/c_api.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/io/record_writer.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-
-class WritableFile;
-
-namespace io {
-
-class RecordWriter;
-
-// A wrapper around io::RecordWriter that is more easily SWIG wrapped for
-// Python.  An instance of this class is not safe for concurrent access
-// by multiple threads.
-class PyRecordWriter {
- public:
-  static PyRecordWriter* New(const string& filename,
-                             const io::RecordWriterOptions& compression_options,
-                             TF_Status* out_status);
-  ~PyRecordWriter();
-
-  void WriteRecord(tensorflow::StringPiece record, TF_Status* out_status);
-  void Flush(TF_Status* out_status);
-  void Close(TF_Status* out_status);
-
- private:
-  PyRecordWriter();
-
-  std::unique_ptr<io::RecordWriter> writer_;
-  std::unique_ptr<WritableFile> file_;
-  TF_DISALLOW_COPY_AND_ASSIGN(PyRecordWriter);
-};
-
-}  // namespace io
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_
diff --git a/tensorflow/python/lib/io/py_record_writer.i b/tensorflow/python/lib/io/py_record_writer.i
deleted file mode 100644
index b2c2bda5dd9..00000000000
--- a/tensorflow/python/lib/io/py_record_writer.i
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-%nothread tensorflow::io::PyRecordWriter::WriteRecord;
-
-%include "tensorflow/python/platform/base.i"
-%include "tensorflow/python/lib/core/strings.i"
-
-// Define int8_t explicitly instead of including "stdint.i", since "stdint.h"
-// and "stdint.i" disagree on the definition of int64_t.
-typedef signed char int8;
-%{ typedef signed char int8; %}
-
-%feature("except") tensorflow::io::PyRecordWriter::New {
-  // Let other threads run while we write
-  Py_BEGIN_ALLOW_THREADS
-  $action
-  Py_END_ALLOW_THREADS
-}
-
-%newobject tensorflow::io::PyRecordWriter::New;
-%newobject tensorflow::io::RecordWriterOptions::CreateRecordWriterOptions;
-
-%feature("except") tensorflow::io::PyRecordWriter::WriteRecord {
-  // Let other threads run while we write
-  Py_BEGIN_ALLOW_THREADS
-  $action
-  Py_END_ALLOW_THREADS
-}
-
-%{
-#include "tensorflow/core/lib/io/record_writer.h"
-#include "tensorflow/core/lib/io/zlib_compression_options.h"
-#include "tensorflow/python/lib/io/py_record_writer.h"
-%}
-
-%ignoreall
-
-%unignore tensorflow;
-%unignore tensorflow::io;
-%unignore tensorflow::io::PyRecordWriter;
-%unignore tensorflow::io::PyRecordWriter::~PyRecordWriter;
-%unignore tensorflow::io::PyRecordWriter::WriteRecord;
-%unignore tensorflow::io::PyRecordWriter::Flush;
-%unignore tensorflow::io::PyRecordWriter::Close;
-%unignore tensorflow::io::PyRecordWriter::New;
-%unignore tensorflow::io::ZlibCompressionOptions;
-%unignore tensorflow::io::ZlibCompressionOptions::flush_mode;
-%unignore tensorflow::io::ZlibCompressionOptions::input_buffer_size;
-%unignore tensorflow::io::ZlibCompressionOptions::output_buffer_size;
-%unignore tensorflow::io::ZlibCompressionOptions::window_bits;
-%unignore tensorflow::io::ZlibCompressionOptions::compression_level;
-%unignore tensorflow::io::ZlibCompressionOptions::compression_method;
-%unignore tensorflow::io::ZlibCompressionOptions::mem_level;
-%unignore tensorflow::io::ZlibCompressionOptions::compression_strategy;
-%unignore tensorflow::io::RecordWriterOptions;
-%unignore tensorflow::io::RecordWriterOptions::CreateRecordWriterOptions;
-%unignore tensorflow::io::RecordWriterOptions::zlib_options;
-
-%include "tensorflow/core/lib/io/record_writer.h"
-%include "tensorflow/core/lib/io/zlib_compression_options.h"
-%include "tensorflow/python/lib/io/py_record_writer.h"
-
-%unignoreall
diff --git a/tensorflow/python/lib/io/record_io_wrapper.cc b/tensorflow/python/lib/io/record_io_wrapper.cc
new file mode 100644
index 00000000000..ba71920bf80
--- /dev/null
+++ b/tensorflow/python/lib/io/record_io_wrapper.cc
@@ -0,0 +1,329 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <string>
+
+#include "absl/memory/memory.h"
+#include "include/pybind11/pybind11.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/io/record_reader.h"
+#include "tensorflow/core/lib/io/record_writer.h"
+#include "tensorflow/core/lib/io/zlib_compression_options.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/file_system.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/python/lib/core/pybind11_absl.h"
+#include "tensorflow/python/lib/core/pybind11_status.h"
+
+namespace {
+
+namespace py = ::pybind11;
+
+class PyRecordReader {
+ public:
+  // NOTE(sethtroisi): At this time PyRecordReader doesn't benefit from taking
+  // RecordReaderOptions, if this changes the API can be updated at that time.
+  static tensorflow::Status New(const std::string& filename,
+                                const std::string& compression_type,
+                                PyRecordReader** out) {
+    std::unique_ptr<tensorflow::RandomAccessFile> file;
+    TF_RETURN_IF_ERROR(
+        tensorflow::Env::Default()->NewRandomAccessFile(filename, &file));
+    auto options =
+        tensorflow::io::RecordReaderOptions::CreateRecordReaderOptions(
+            compression_type);
+    options.buffer_size = kReaderBufferSize;
+    auto reader =
+        absl::make_unique<tensorflow::io::RecordReader>(file.get(), options);
+    *out = new PyRecordReader(std::move(file), std::move(reader));
+    return tensorflow::Status::OK();
+  }
+
+  PyRecordReader() = delete;
+  ~PyRecordReader() { Close(); }
+
+  tensorflow::Status ReadNextRecord(tensorflow::tstring* out) {
+    if (IsClosed()) {
+      return tensorflow::errors::FailedPrecondition("Reader is closed.");
+    }
+
+    return reader_->ReadRecord(&offset_, out);
+  }
+
+  bool IsClosed() const { return file_ == nullptr && reader_ == nullptr; }
+
+  void Close() {
+    reader_ = nullptr;
+    file_ = nullptr;
+  }
+
+ private:
+  static constexpr tensorflow::uint64 kReaderBufferSize = 16 * 1024 * 1024;
+
+  PyRecordReader(std::unique_ptr<tensorflow::RandomAccessFile> file,
+                 std::unique_ptr<tensorflow::io::RecordReader> reader)
+      : offset_(0), file_(std::move(file)), reader_(std::move(reader)) {}
+
+  tensorflow::uint64 offset_;
+  std::unique_ptr<tensorflow::RandomAccessFile> file_;
+  std::unique_ptr<tensorflow::io::RecordReader> reader_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(PyRecordReader);
+};
+
+class PyRecordRandomReader {
+ public:
+  static tensorflow::Status New(const std::string& filename,
+                                PyRecordRandomReader** out) {
+    std::unique_ptr<tensorflow::RandomAccessFile> file;
+    TF_RETURN_IF_ERROR(
+        tensorflow::Env::Default()->NewRandomAccessFile(filename, &file));
+    auto options =
+        tensorflow::io::RecordReaderOptions::CreateRecordReaderOptions("");
+    options.buffer_size = kReaderBufferSize;
+    auto reader =
+        absl::make_unique<tensorflow::io::RecordReader>(file.get(), options);
+    *out = new PyRecordRandomReader(std::move(file), std::move(reader));
+    return tensorflow::Status::OK();
+  }
+
+  PyRecordRandomReader() = delete;
+  ~PyRecordRandomReader() { Close(); }
+
+  tensorflow::Status ReadRecord(tensorflow::uint64* offset,
+                                tensorflow::tstring* out) {
+    if (IsClosed()) {
+      return tensorflow::errors::FailedPrecondition(
+          "Random TFRecord Reader is closed.");
+    }
+    return reader_->ReadRecord(offset, out);
+  }
+
+  bool IsClosed() const { return file_ == nullptr && reader_ == nullptr; }
+
+  void Close() {
+    reader_ = nullptr;
+    file_ = nullptr;
+  }
+
+ private:
+  static constexpr tensorflow::uint64 kReaderBufferSize = 16 * 1024 * 1024;
+
+  PyRecordRandomReader(std::unique_ptr<tensorflow::RandomAccessFile> file,
+                       std::unique_ptr<tensorflow::io::RecordReader> reader)
+      : file_(std::move(file)), reader_(std::move(reader)) {}
+
+  std::unique_ptr<tensorflow::RandomAccessFile> file_;
+  std::unique_ptr<tensorflow::io::RecordReader> reader_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(PyRecordRandomReader);
+};
+
+class PyRecordWriter {
+ public:
+  static tensorflow::Status New(
+      const std::string& filename,
+      const tensorflow::io::RecordWriterOptions& options,
+      PyRecordWriter** out) {
+    std::unique_ptr<tensorflow::WritableFile> file;
+    TF_RETURN_IF_ERROR(
+        tensorflow::Env::Default()->NewWritableFile(filename, &file));
+    auto writer =
+        absl::make_unique<tensorflow::io::RecordWriter>(file.get(), options);
+    *out = new PyRecordWriter(std::move(file), std::move(writer));
+    return tensorflow::Status::OK();
+  }
+
+  PyRecordWriter() = delete;
+  ~PyRecordWriter() { Close(); }
+
+  tensorflow::Status WriteRecord(tensorflow::StringPiece record) {
+    if (IsClosed()) {
+      return tensorflow::errors::FailedPrecondition("Writer is closed.");
+    }
+    return writer_->WriteRecord(record);
+  }
+
+  tensorflow::Status Flush() {
+    if (IsClosed()) {
+      return tensorflow::errors::FailedPrecondition("Writer is closed.");
+    }
+
+    auto status = writer_->Flush();
+    if (status.ok()) {
+      // Per the RecordWriter contract, flushing the RecordWriter does not
+      // flush the underlying file.  Here we need to do both.
+      return file_->Flush();
+    }
+    return status;
+  }
+
+  bool IsClosed() const { return file_ == nullptr && writer_ == nullptr; }
+
+  tensorflow::Status Close() {
+    if (writer_ != nullptr) {
+      auto status = writer_->Close();
+      writer_ = nullptr;
+      if (!status.ok()) return status;
+    }
+    if (file_ != nullptr) {
+      auto status = file_->Close();
+      file_ = nullptr;
+      if (!status.ok()) return status;
+    }
+    return tensorflow::Status::OK();
+  }
+
+ private:
+  PyRecordWriter(std::unique_ptr<tensorflow::WritableFile> file,
+                 std::unique_ptr<tensorflow::io::RecordWriter> writer)
+      : file_(std::move(file)), writer_(std::move(writer)) {}
+
+  std::unique_ptr<tensorflow::WritableFile> file_;
+  std::unique_ptr<tensorflow::io::RecordWriter> writer_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(PyRecordWriter);
+};
+
+PYBIND11_MODULE(_pywrap_record_io, m) {
+  py::class_<PyRecordReader>(m, "RecordIterator")
+      .def(py::init(
+          [](const std::string& filename, const std::string& compression_type) {
+            tensorflow::Status status;
+            PyRecordReader* self = nullptr;
+            {
+              py::gil_scoped_release release;
+              status = PyRecordReader::New(filename, compression_type, &self);
+            }
+            MaybeRaiseRegisteredFromStatus(status);
+            return self;
+          }))
+      .def("__iter__", [](const py::object& self) { return self; })
+      .def("__next__",
+           [](PyRecordReader* self) {
+             if (self->IsClosed()) {
+               throw py::stop_iteration();
+             }
+
+             tensorflow::tstring record;
+             tensorflow::Status status;
+             {
+               py::gil_scoped_release release;
+               status = self->ReadNextRecord(&record);
+             }
+             if (tensorflow::errors::IsOutOfRange(status)) {
+               // Don't close because the file being read could be updated
+               // in-between
+               // __next__ calls.
+               throw py::stop_iteration();
+             }
+             MaybeRaiseRegisteredFromStatus(status);
+             return py::bytes(record);
+           })
+      .def("close", [](PyRecordReader* self) { self->Close(); });
+
+  py::class_<PyRecordRandomReader>(m, "RandomRecordReader")
+      .def(py::init([](const std::string& filename) {
+        tensorflow::Status status;
+        PyRecordRandomReader* self = nullptr;
+        {
+          py::gil_scoped_release release;
+          status = PyRecordRandomReader::New(filename, &self);
+        }
+        MaybeRaiseRegisteredFromStatus(status);
+        return self;
+      }))
+      .def("read",
+           [](PyRecordRandomReader* self, tensorflow::uint64 offset) {
+             tensorflow::uint64 temp_offset = offset;
+             tensorflow::tstring record;
+             tensorflow::Status status;
+             {
+               py::gil_scoped_release release;
+               status = self->ReadRecord(&temp_offset, &record);
+             }
+             if (tensorflow::errors::IsOutOfRange(status)) {
+               throw py::index_error(tensorflow::strings::StrCat(
+                   "Out of range at reading offset ", offset));
+             }
+             MaybeRaiseRegisteredFromStatus(status);
+             return py::make_tuple(py::bytes(record), temp_offset);
+           })
+      .def("close", [](PyRecordRandomReader* self) { self->Close(); });
+
+  using tensorflow::io::ZlibCompressionOptions;
+  py::class_<ZlibCompressionOptions>(m, "ZlibCompressionOptions")
+      .def_readwrite("flush_mode", &ZlibCompressionOptions::flush_mode)
+      .def_readwrite("input_buffer_size",
+                     &ZlibCompressionOptions::input_buffer_size)
+      .def_readwrite("output_buffer_size",
+                     &ZlibCompressionOptions::output_buffer_size)
+      .def_readwrite("window_bits", &ZlibCompressionOptions::window_bits)
+      .def_readwrite("compression_level",
+                     &ZlibCompressionOptions::compression_level)
+      .def_readwrite("compression_method",
+                     &ZlibCompressionOptions::compression_method)
+      .def_readwrite("mem_level", &ZlibCompressionOptions::mem_level)
+      .def_readwrite("compression_strategy",
+                     &ZlibCompressionOptions::compression_strategy);
+
+  using tensorflow::io::RecordWriterOptions;
+  py::class_<RecordWriterOptions>(m, "RecordWriterOptions")
+      .def(py::init(&RecordWriterOptions::CreateRecordWriterOptions))
+      .def_readonly("compression_type", &RecordWriterOptions::compression_type)
+      .def_readonly("zlib_options", &RecordWriterOptions::zlib_options);
+
+  using tensorflow::MaybeRaiseRegisteredFromStatus;
+
+  py::class_<PyRecordWriter>(m, "RecordWriter")
+      .def(py::init(
+          [](const std::string& filename, const RecordWriterOptions& options) {
+            PyRecordWriter* self = nullptr;
+            tensorflow::Status status;
+            {
+              py::gil_scoped_release release;
+              status = PyRecordWriter::New(filename, options, &self);
+            }
+            MaybeRaiseRegisteredFromStatus(status);
+            return self;
+          }))
+      .def("__enter__", [](const py::object& self) { return self; })
+      .def("__exit__",
+           [](PyRecordWriter* self, py::args) {
+             MaybeRaiseRegisteredFromStatus(self->Close());
+           })
+      .def(
+          "write",
+          [](PyRecordWriter* self, tensorflow::StringPiece record) {
+            tensorflow::Status status;
+            {
+              py::gil_scoped_release release;
+              status = self->WriteRecord(record);
+            }
+            MaybeRaiseRegisteredFromStatus(status);
+          },
+          py::arg("record"))
+      .def("flush",
+           [](PyRecordWriter* self) {
+             MaybeRaiseRegisteredFromStatus(self->Flush());
+           })
+      .def("close", [](PyRecordWriter* self) {
+        MaybeRaiseRegisteredFromStatus(self->Close());
+      });
+}
+
+}  // namespace
diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py
index 052aabf9288..6c2be8d40a3 100644
--- a/tensorflow/python/lib/io/tf_record.py
+++ b/tensorflow/python/lib/io/tf_record.py
@@ -19,8 +19,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import errors
+from tensorflow.python import _pywrap_record_io
 from tensorflow.python.util import compat
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
@@ -127,7 +126,7 @@ class TFRecordOptions(object):
 
   def _as_record_writer_options(self):
     """Convert to RecordWriterOptions for use with PyRecordWriter."""
-    options = pywrap_tensorflow.RecordWriterOptions_CreateRecordWriterOptions(
+    options = _pywrap_record_io.RecordWriterOptions(
         compat.as_bytes(
             self.get_compression_type_string(self.compression_type)))
 
@@ -162,34 +161,61 @@ def tf_record_iterator(path, options=None):
     path: The path to the TFRecords file.
     options: (optional) A TFRecordOptions object.
 
-  Yields:
-    Strings.
+  Returns:
+    An iterator of serialized TFRecords.
 
   Raises:
     IOError: If `path` cannot be opened for reading.
   """
   compression_type = TFRecordOptions.get_compression_type_string(options)
-  with errors.raise_exception_on_not_ok_status() as status:
-    reader = pywrap_tensorflow.PyRecordReader_New(
-        compat.as_bytes(path), 0, compat.as_bytes(compression_type), status)
+  return _pywrap_record_io.RecordIterator(path, compression_type)
 
-  if reader is None:
-    raise IOError("Could not open %s." % path)
-  try:
-    while True:
-      try:
-        reader.GetNext()
-      except errors.OutOfRangeError:
-        break
-      yield reader.record()
-  finally:
-    reader.Close()
+
+def tf_record_random_reader(path):
+  """Creates a reader that allows random-access reads from a TFRecords file.
+
+  The created reader object has the following method:
+
+    - `read(offset)`, which returns a tuple of `(record, ending_offset)`, where
+      `record` is the TFRecord read at the offset, and
+      `ending_offset` is the ending offset of the read record.
+
+      The method throws a `tf.errors.DataLossError` if data is corrupted at
+      the given offset. The method throws `IndexError` if the offset is out of
+      range for the TFRecords file.
+
+
+  Usage example:
+  ```py
+  reader = tf_record_random_reader(file_path)
+
+  record_1, offset_1 = reader.read(0)  # 0 is the initial offset.
+  # offset_1 is the ending offset of the 1st record and the starting offset of
+  # the next.
+
+  record_2, offset_2 = reader.read(offset_1)
+  # offset_2 is the ending offset of the 2nd record and the starting offset of
+  # the next.
+  # We can jump back and read the first record again if so desired.
+  reader.read(0)
+  ```
+
+  Args:
+    path: The path to the TFRecords file.
+
+  Returns:
+    An object that supports random-access reading of the serialized TFRecords.
+
+  Raises:
+    IOError: If `path` cannot be opened for reading.
+  """
+  return _pywrap_record_io.RandomRecordReader(path)
 
 
 @tf_export(
     "io.TFRecordWriter", v1=["io.TFRecordWriter", "python_io.TFRecordWriter"])
 @deprecation.deprecated_endpoints("python_io.TFRecordWriter")
-class TFRecordWriter(object):
+class TFRecordWriter(_pywrap_record_io.RecordWriter):
   """A class to write records to a TFRecords file.
 
   [TFRecords tutorial](https://www.tensorflow.org/tutorials/load_data/tfrecord)
@@ -268,35 +294,29 @@ class TFRecordWriter(object):
     if not isinstance(options, TFRecordOptions):
       options = TFRecordOptions(compression_type=options)
 
-    with errors.raise_exception_on_not_ok_status() as status:
-      # pylint: disable=protected-access
-      self._writer = pywrap_tensorflow.PyRecordWriter_New(
-          compat.as_bytes(path), options._as_record_writer_options(), status)
-      # pylint: enable=protected-access
-
-  def __enter__(self):
-    """Enter a `with` block."""
-    return self
-
-  def __exit__(self, unused_type, unused_value, unused_traceback):
-    """Exit a `with` block, closing the file."""
-    self.close()
+    # pylint: disable=protected-access
+    super(TFRecordWriter, self).__init__(
+        compat.as_bytes(path), options._as_record_writer_options())
+    # pylint: enable=protected-access
 
+  # TODO(slebedev): The following wrapper methods are there to compensate
+  # for lack of signatures in pybind11-generated classes. Switch to
+  # __text_signature__ when TensorFlow drops Python 2.X support.
+  # See https://github.com/pybind/pybind11/issues/945
+  # pylint: disable=useless-super-delegation
   def write(self, record):
     """Write a string record to the file.
 
     Args:
       record: str
     """
-    with errors.raise_exception_on_not_ok_status() as status:
-      self._writer.WriteRecord(record, status)
+    super(TFRecordWriter, self).write(record)
 
   def flush(self):
     """Flush the file."""
-    with errors.raise_exception_on_not_ok_status() as status:
-      self._writer.Flush(status)
+    super(TFRecordWriter, self).flush()
 
   def close(self):
     """Close the file."""
-    with errors.raise_exception_on_not_ok_status() as status:
-      self._writer.Close(status)
+    super(TFRecordWriter, self).close()
+  # pylint: enable=useless-super-delegation
diff --git a/tensorflow/python/lib/io/tf_record_test.py b/tensorflow/python/lib/io/tf_record_test.py
index 280c2b10918..34231914966 100644
--- a/tensorflow/python/lib/io/tf_record_test.py
+++ b/tensorflow/python/lib/io/tf_record_test.py
@@ -427,6 +427,48 @@ class TFRecordIteratorTest(TFCompressionTestCase):
         pass
 
 
+class TFRecordRandomReaderTest(TFCompressionTestCase):
+
+  def testRandomReaderReadingWorks(self):
+    """Test read access to random offsets in the TFRecord file."""
+    records = [self._Record(0, i) for i in range(self._num_records)]
+    fn = self._WriteRecordsToFile(records, "uncompressed_records")
+    reader = tf_record.tf_record_random_reader(fn)
+
+    offset = 0
+    offsets = [offset]
+    # Do a pass of forward reading.
+    for i in range(self._num_records):
+      record, offset = reader.read(offset)
+      self.assertEqual(record, records[i])
+      offsets.append(offset)
+    # Reading off the bound should lead to error.
+    with self.assertRaisesRegexp(IndexError, r"Out of range.*offset"):
+      reader.read(offset)
+    # Do a pass of backward reading.
+    for i in range(self._num_records - 1, 0, -1):
+      record, offset = reader.read(offsets[i])
+      self.assertEqual(offset, offsets[i + 1])
+      self.assertEqual(record, records[i])
+
+  def testRandomReaderThrowsErrorForInvalidOffset(self):
+    records = [self._Record(0, i) for i in range(self._num_records)]
+    fn = self._WriteRecordsToFile(records, "uncompressed_records")
+    reader = tf_record.tf_record_random_reader(fn)
+    with self.assertRaisesRegexp(
+        errors_impl.DataLossError, r"corrupted record"):
+      reader.read(1)  # 1 is guaranteed to be an invalid offset.
+
+  def testClosingRandomReaderCausesErrorsForFurtherReading(self):
+    records = [self._Record(0, i) for i in range(self._num_records)]
+    fn = self._WriteRecordsToFile(records, "uncompressed_records")
+    reader = tf_record.tf_record_random_reader(fn)
+    reader.close()
+    with self.assertRaisesRegexp(
+        errors_impl.FailedPreconditionError, r"closed"):
+      reader.read(0)
+
+
 class TFRecordWriterCloseAndFlushTests(test.TestCase):
   """TFRecordWriter close and flush tests"""
 
diff --git a/tensorflow/python/module/module.py b/tensorflow/python/module/module.py
index 9ce7e06c836..714ab6a2d2c 100644
--- a/tensorflow/python/module/module.py
+++ b/tensorflow/python/module/module.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import re
 
+import six
+
 from tensorflow.python import tf2
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variables
@@ -37,35 +39,32 @@ class Module(tracking.AutoTrackable):
   functions which apply to user input. For example a dense layer in a neural
   network might be implemented as a `tf.Module`:
 
-  ```python
-   class Dense(tf.Module):
-     def __init__(self, in_features, output_features, name=None):
-       super(Dense, self).__init__(name=name)
-       self.w = tf.Variable(
-           tf.random.normal([input_features, output_features]), name='w')
-       self.b = tf.Variable(tf.zeros([output_features]), name='b')
-
-     def __call__(self, x):
-       y = tf.matmul(x, self.w) + self.b
-       return tf.nn.relu(y)
-  ```
+   >>> class Dense(tf.Module):
+   ...   def __init__(self, in_features, out_features, name=None):
+   ...     super(Dense, self).__init__(name=name)
+   ...     self.w = tf.Variable(
+   ...       tf.random.normal([in_features, out_features]), name='w')
+   ...     self.b = tf.Variable(tf.zeros([out_features]), name='b')
+   ...   def __call__(self, x):
+   ...     y = tf.matmul(x, self.w) + self.b
+   ...     return tf.nn.relu(y)
 
   You can use the Dense layer as you would expect:
 
-  ```python
-  d = Dense(input_features=64, output_features=10)
-  d(tf.ones([100, 64]))
-  #==> <tf.Tensor: ...>
-  ```
+  >>> d = Dense(in_features=3, out_features=2)
+  >>> d(tf.ones([1, 3]))
+  <tf.Tensor: shape=(1, 2), dtype=float32, numpy=..., dtype=float32)>
+
 
   By subclassing `tf.Module` instead of `object` any `tf.Variable` or
   `tf.Module` instances assigned to object properties can be collected using
   the `variables`, `trainable_variables` or `submodules` property:
 
-  ```python
-  d.variables
-  #==> (<tf.Variable 'b:0' ...>, <tf.Variable 'w:0' ...>)
-  ```
+  >>> d.variables
+      (<tf.Variable 'b:0' shape=(2,) dtype=float32, numpy=...,
+      dtype=float32)>,
+      <tf.Variable 'w:0' shape=(3, 2) dtype=float32, numpy=..., dtype=float32)>)
+
 
   Subclasses of `tf.Module` can also take advantage of the `_flatten` method
   which can be used to implement tracking of any other types.
@@ -176,16 +175,17 @@ class Module(tracking.AutoTrackable):
     Submodules are modules which are properties of this module, or found as
     properties of modules which are properties of this module (and so on).
 
-    ```
-    a = tf.Module()
-    b = tf.Module()
-    c = tf.Module()
-    a.b = b
-    b.c = c
-    assert list(a.submodules) == [b, c]
-    assert list(b.submodules) == [c]
-    assert list(c.submodules) == []
-    ```
+    >>> a = tf.Module()
+    >>> b = tf.Module()
+    >>> c = tf.Module()
+    >>> a.b = b
+    >>> b.c = c
+    >>> list(a.submodules) == [b, c]
+    True
+    >>> list(b.submodules) == [c]
+    True
+    >>> list(c.submodules) == []
+    True
 
     Returns:
       A sequence of all submodules.
@@ -201,7 +201,7 @@ class Module(tracking.AutoTrackable):
 
     Modules are flattened by first walking their attributes in name order.
     Each attribute value is then flattened to find leaf values. If flatten is
-    to be applied `recursive`ly then if the leaf is a `Module` it will also be
+    applied `recursive`ly and if the leaf is a `Module` it will also be
     flattened to find leaves. Finally every leaf value is optionally tested
     against the given `predicate` and finally yielded.
 
@@ -261,25 +261,22 @@ class Module(tracking.AutoTrackable):
   def with_name_scope(cls, method):
     """Decorator to automatically enter the module name scope.
 
-    ```
-    class MyModule(tf.Module):
-      @tf.Module.with_name_scope
-      def __call__(self, x):
-        if not hasattr(self, 'w'):
-          self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
-        return tf.matmul(x, self.w)
-    ```
+    >>> class MyModule(tf.Module):
+    ...   @tf.Module.with_name_scope
+    ...   def __call__(self, x):
+    ...     if not hasattr(self, 'w'):
+    ...       self.w = tf.Variable(tf.random.normal([x.shape[1], 3]))
+    ...     return tf.matmul(x, self.w)
 
     Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
     names included the module name:
 
-    ```
-    mod = MyModule()
-    mod(tf.ones([8, 32]))
-    # ==> <tf.Tensor: ...>
-    mod.w
-    # ==> <tf.Variable ...'my_module/w:0'>
-    ```
+    >>> mod = MyModule()
+    >>> mod(tf.ones([1, 2]))
+    <tf.Tensor: shape=(1, 3), dtype=float32, numpy=..., dtype=float32)>
+    >>> mod.w
+    <tf.Variable 'my_module/Variable:0' shape=(2, 3) dtype=float32,
+    numpy=..., dtype=float32)>
 
     Args:
       method: The method to wrap.
@@ -336,7 +333,16 @@ def _flatten_module(module,
     if key in attributes_to_ignore:
       continue
 
-    for leaf_path, leaf in nest.flatten_with_tuple_paths(module_dict[key]):
+    prop = module_dict[key]
+    try:
+      leaves = nest.flatten_with_tuple_paths(prop)
+    except Exception as cause:  # pylint: disable=broad-except
+      six.raise_from(
+          ValueError(
+              "Error processing property {!r} of {!r}".format(key, prop)),
+          cause)
+
+    for leaf_path, leaf in leaves:
       leaf_path = (key,) + leaf_path
 
       # TODO(tomhennigan) Handle cycles for `with_path=True` (e.g. `a.a = a`).
@@ -362,7 +368,7 @@ def _flatten_module(module,
         recursive=recursive,
         predicate=predicate,
         attribute_traversal_key=attribute_traversal_key,
-        attributes_to_ignore=submodule._TF_MODULE_IGNORED_PROPERTIES,
+        attributes_to_ignore=submodule._TF_MODULE_IGNORED_PROPERTIES,  # pylint: disable=protected-access
         with_path=with_path,
         module_path=submodule_path,
         seen=seen)
diff --git a/tensorflow/python/module/module_test.py b/tensorflow/python/module/module_test.py
index 71594de1058..267da80c0bd 100644
--- a/tensorflow/python/module/module_test.py
+++ b/tensorflow/python/module/module_test.py
@@ -247,13 +247,10 @@ class VariableTrackingTest(test_util.TensorFlowTestCase):
     self.assertEqual(len(m.child.child.trainable_variables), 0)
 
   def test_supports_distributed_variables(self):
-    device_map = distributed_values.SingleDeviceMap("/CPU:0")
     mirrored = distributed_values.MirroredVariable(
-        None, device_map, [variables.Variable(1.)],
-        variables.VariableAggregation.SUM)
+        None, [variables.Variable(1.)], variables.VariableAggregation.SUM)
     tpu = distributed_values.TPUMirroredVariable(
         strategy=None,
-        device_map=device_map,
         values=[variables.Variable(42.)],
         aggregation=None)
     aggregating = distributed_values.AggregatingVariable(
@@ -286,8 +283,8 @@ class ForwardMethodsTest(test_util.TensorFlowTestCase):
 
   def testFunctionType(self):
     mod = ModuleWithFunctionAnnotatedCall()
-    self.assertTrue(isinstance(mod.forward, def_function.Function))
-    self.assertTrue(isinstance(mod.forward_ag, def_function.Function))
+    self.assertIsInstance(mod.forward, def_function.Function)
+    self.assertIsInstance(mod.forward_ag, def_function.Function)
 
   def testEntersNameScope_call(self):
     mod = ModuleWithFunctionAnnotatedCall()
@@ -537,6 +534,29 @@ class FlattenTest(parameterized.TestCase, test_util.TensorFlowTestCase):
     m(layers.Input((1,)))
     self.assertLen(m.variables, 4)
 
+  def test_model_wrapped_in_module_discovers_submodules(self):
+    linear = models.Sequential([layers.Dense(units=1, input_shape=[1])])
+    linear.compile(optimizer="sgd", loss="mean_squared_error")
+    m = module.Module()
+    m.l = linear
+    self.assertNotEmpty(m.submodules)
+    self.assertLen(m.variables, 2)
+
+  def test_raises_error_with_path(self):
+    if six.PY2:
+      class NonOrderable(object):
+        __lt__ = None
+
+      non_orderable = NonOrderable
+    else:
+      non_orderable = object
+
+    m = module.Module()
+    m.layers = {non_orderable(): None, non_orderable(): None}
+    with self.assertRaisesRegexp(ValueError,
+                                 "Error processing property 'layers'"):
+      m.variables  # pylint: disable=pointless-statement
+
 
 class LayerModule(module.Module):
 
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index a08e81bbc49..2757495875f 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -115,8 +116,8 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
       non_neg_concat_dim = (
           concat_dim._numpy().item(0) % input_values[0]._rank())  # pylint: disable=protected-access
       # All inputs are guaranteed to be EagerTensors in eager mode
-      sizes = pywrap_tensorflow.TFE_Py_TensorShapeSlice(input_values,
-                                                        non_neg_concat_dim)
+      sizes = pywrap_tfe.TFE_Py_TensorShapeSlice(input_values,
+                                                 non_neg_concat_dim)
       out_grads = array_ops.split(grad, sizes, non_neg_concat_dim)
     else:
       if constant_op.is_constant(concat_dim):
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 125c9d68ce0..3d712f463ca 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -19,10 +19,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numbers
 import numpy as np
-import six
 
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import composite_tensor
@@ -54,13 +53,6 @@ tf_export("newaxis").export_constant(__name__, "newaxis")
 # existing 'slice' for later use in this module.
 _BaseSlice = slice
 
-# LINT.IfChange
-matrix_diag_v3_forward_compat_date = (2019, 12, 6)
-# LINT.ThenChange(
-#   //tensorflow/compiler/tests/matrix_diag_ops_test.py,
-#   //tensorflow/python/kernel_tests/diag_op_test.py,
-#   //tensorflow/python/ops/parallel_for/array_test.py
-# )
 
 @tf_export("reshape", v1=["reshape", "manip.reshape"])
 def reshape(tensor, shape, name=None):  # pylint: disable=redefined-outer-name
@@ -210,32 +202,33 @@ def fill(dims, value, name=None):
 
   For example:
 
-  ```
-  # Output tensor has shape [2, 3].
-  fill([2, 3], 9) ==> [[9, 9, 9]
-                       [9, 9, 9]]
-  ```
+  >>> tf.fill([2, 3], 9)
+  <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
+  array([[9, 9, 9],
+         [9, 9, 9]], dtype=int32)>
 
-  `tf.fill` differs from `tf.constant` in a few ways:
-
-  *   `tf.fill` only supports scalar contents, whereas `tf.constant` supports
-      Tensor values.
-  *   `tf.fill` creates an Op in the computation graph that constructs the
-  actual
-      Tensor value at runtime. This is in contrast to `tf.constant` which embeds
-      the entire Tensor into the graph with a `Const` node.
-  *   Because `tf.fill` evaluates at graph runtime, it supports dynamic shapes
-      based on other runtime Tensors, unlike `tf.constant`.
+  `tf.fill` evaluates at graph runtime and supports dynamic shapes based on
+  other runtime `tf.Tensors`, unlike `tf.constant(value, shape=dims)`, which
+  embeds the value as a `Const` node.
 
   Args:
-    dims: A `Tensor`. Must be one of the following types: `int32`, `int64`. 1-D.
-      Represents the shape of the output tensor.
-    value: A `Tensor`. 0-D (scalar). Value to fill the returned tensor.
-      @compatibility(numpy) Equivalent to np.full @end_compatibility
-    name: A name for the operation (optional).
+    dims: A 1-D sequence of non-negative numbers. Represents the shape of the
+      output `tf.Tensor`. Entries should be of type: `int32`, `int64`.
+    value: A value to fill the returned `tf.Tensor`.
+    name: Optional string. The name of the output `tf.Tensor`.
 
   Returns:
-    A `Tensor`. Has the same type as `value`.
+    A `tf.Tensor` with shape `dims` and the same dtype as `value`.
+
+  Raises:
+    InvalidArgumentError: `dims` contains negative entries.
+    NotFoundError: `dims` contains non-integer entries.
+
+  @compatibility(numpy)
+  Similar to `np.full`. In `numpy`, more parameters are supported. Passing a
+  number argument as the shape (`np.full(5, value)`) is valid in `numpy` for
+  specifying a 1-D shaped result, while TensorFlow does not support this syntax.
+  @end_compatibility
   """
   result = gen_array_ops.fill(dims, value, name=name)
   tensor_util.maybe_set_static_shape(result, dims)
@@ -542,6 +535,7 @@ def shape_v2(input, out_type=dtypes.int32, name=None):
   """Returns the shape of a tensor.
 
   This operation returns a 1-D integer tensor representing the shape of `input`.
+  This represents the minimal set of known information at definition time.
 
   For example:
 
@@ -563,6 +557,10 @@ def shape_v2(input, out_type=dtypes.int32, name=None):
   >>> a.shape
   TensorShape([None, None, 10])
 
+  `tf.shape` and `Tensor.shape` should be identical in eager mode.  Within
+  `tf.function` or within a `compat.v1` context, not all dimensions may be
+  known until execution time.
+
   Args:
     input: A `Tensor` or `SparseTensor`.
     out_type: (Optional) The specified output type of the operation (`int32` or
@@ -819,7 +817,7 @@ _SUPPORTED_SLICE_DTYPES = (dtypes.int32, dtypes.int32_ref, dtypes.int64,
 
 def _check_index(idx):
   """Check if a given value is a valid index into a tensor."""
-  if isinstance(idx, (six.integer_types, tensor_shape.Dimension)):
+  if isinstance(idx, (numbers.Integral, tensor_shape.Dimension)):
     return
 
   # Optimistic check. Assumptions:
@@ -1881,11 +1879,11 @@ unique_with_counts.__doc__ = gen_array_ops.unique_with_counts.__doc__
 
 @tf_export("split")
 def split(value, num_or_size_splits, axis=0, num=None, name="split"):
-  """Splits a tensor `value` into a list of sub tensors.
+  """Splits a tensor into sub tensors.
 
-  If `num_or_size_splits` is an integer, then `value` is split along the
-  dimension `axis` into `num_split` smaller tensors. This requires that
-  `value.shape[axis]` is divisible by `num_split`.
+  If `num_or_size_splits` is an integer, then `value` is split along dimension
+  `axis` into `num_split` smaller tensors. This requires that `num_split` evenly
+  divides `value.shape[axis]`.
 
   If `num_or_size_splits` is a 1-D Tensor (or list), we call it `size_splits`
   and `value` is split into `len(size_splits)` elements. The shape of the `i`-th
@@ -1894,14 +1892,15 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
 
   For example:
 
-  >>> x = tf.Variable(tf.random.uniform([5, 30], -1, 1))
-
   Split `x` into 3 tensors along dimension 1
+
+  >>> x = tf.Variable(tf.random.uniform([5, 30], -1, 1))
   >>> s0, s1, s2 = tf.split(x, num_or_size_splits=3, axis=1)
   >>> tf.shape(s0).numpy()
   array([ 5, 10], dtype=int32)
 
   Split `x` into 3 tensors with sizes [4, 15, 11] along dimension 1
+
   >>> split0, split1, split2 = tf.split(x, [4, 15, 11], 1)
   >>> tf.shape(split0).numpy()
   array([5, 4], dtype=int32)
@@ -1924,8 +1923,8 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
     name: A name for the operation (optional).
 
   Returns:
-    if `num_or_size_splits` is a scalar returns a list of `num_or_size_splits`
-    `Tensor` objects; if `num_or_size_splits` is a 1-D Tensor returns
+    if `num_or_size_splits` is a scalar returns `num_or_size_splits` `Tensor`
+    objects; if `num_or_size_splits` is a 1-D Tensor returns
     `num_or_size_splits.get_shape[0]` `Tensor` objects resulting from splitting
     `value`.
 
@@ -1934,7 +1933,7 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
   """
   size_splits = ops.convert_to_tensor(num_or_size_splits)
   if isinstance(num_or_size_splits,
-                six.integer_types + (tensor_shape.Dimension,)):
+                (numbers.Integral, tensor_shape.Dimension)):
     return gen_array_ops.split(
         axis=axis, num_split=num_or_size_splits, value=value, name=name)
 
@@ -1956,16 +1955,17 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
 
 @tf_export("transpose", v1=[])
 def transpose_v2(a, perm=None, conjugate=False, name="transpose"):
-  """Transposes `a`.
+  """Transposes `a`, where `a` is a Tensor.
 
-  Permutes the dimensions according to `perm`.
+  Permutes the dimensions according to the value of `perm`.
 
-  The returned tensor's dimension i will correspond to the input dimension
-  `perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is
-  the rank of the input tensor. Hence by default, this operation performs a
-  regular matrix transpose on 2-D input Tensors. If conjugate is True and
-  `a.dtype` is either `complex64` or `complex128` then the values of `a`
-  are conjugated and transposed.
+  The returned tensor's dimension `i` will correspond to the input dimension
+  `perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is the rank
+  of the input tensor. Hence by default, this operation performs a regular
+  matrix transpose on 2-D input Tensors.
+
+  If conjugate is `True` and `a.dtype` is either `complex64` or `complex128`
+  then the values of `a` are conjugated and transposed.
 
   @compatibility(numpy)
   In `numpy` transposes are memory-efficient constant time operations as they
@@ -1977,43 +1977,52 @@ def transpose_v2(a, perm=None, conjugate=False, name="transpose"):
 
   For example:
 
-  ```python
-  x = tf.constant([[1, 2, 3], [4, 5, 6]])
-  tf.transpose(x)  # [[1, 4]
-                   #  [2, 5]
-                   #  [3, 6]]
+  >>> x = tf.constant([[1, 2, 3], [4, 5, 6]])
+  >>> tf.transpose(x)
+  <tf.Tensor: shape=(3, 2), dtype=int32, numpy=
+  array([[1, 4],
+         [2, 5],
+         [3, 6]], dtype=int32)>
 
-  # Equivalently
-  tf.transpose(x, perm=[1, 0])  # [[1, 4]
-                                #  [2, 5]
-                                #  [3, 6]]
+  Equivalently, you could call `tf.transpose(x, perm=[1, 0])`.
 
-  # If x is complex, setting conjugate=True gives the conjugate transpose
-  x = tf.constant([[1 + 1j, 2 + 2j, 3 + 3j],
-                   [4 + 4j, 5 + 5j, 6 + 6j]])
-  tf.transpose(x, conjugate=True)  # [[1 - 1j, 4 - 4j],
-                                   #  [2 - 2j, 5 - 5j],
-                                   #  [3 - 3j, 6 - 6j]]
+  If `x` is complex, setting conjugate=True gives the conjugate transpose:
 
-  # 'perm' is more useful for n-dimensional tensors, for n > 2
-  x = tf.constant([[[ 1,  2,  3],
-                    [ 4,  5,  6]],
-                   [[ 7,  8,  9],
-                    [10, 11, 12]]])
+  >>> x = tf.constant([[1 + 1j, 2 + 2j, 3 + 3j],
+  ...                  [4 + 4j, 5 + 5j, 6 + 6j]])
+  >>> tf.transpose(x, conjugate=True)
+  <tf.Tensor: shape=(3, 2), dtype=complex128, numpy=
+  array([[1.-1.j, 4.-4.j],
+         [2.-2.j, 5.-5.j],
+         [3.-3.j, 6.-6.j]])>
 
-  # Take the transpose of the matrices in dimension-0
-  # (this common operation has a shorthand `linalg.matrix_transpose`)
-  tf.transpose(x, perm=[0, 2, 1])  # [[[1,  4],
-                                   #   [2,  5],
-                                   #   [3,  6]],
-                                   #  [[7, 10],
-                                   #   [8, 11],
-                                   #   [9, 12]]]
-  ```
+  'perm' is more useful for n-dimensional tensors where n > 2:
+
+  >>> x = tf.constant([[[ 1,  2,  3],
+  ...                   [ 4,  5,  6]],
+  ...                  [[ 7,  8,  9],
+  ...                   [10, 11, 12]]])
+
+  As above, simply calling `tf.transpose` will default to `perm=[2,1,0]`.
+
+  To take the transpose of the matrices in dimension-0 (such as when you are
+  transposing matrices where 0 is the batch dimesnion), you would set
+  `perm=[0,2,1]`.
+
+  >>> tf.transpose(x, perm=[0, 2, 1])
+  <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy=
+  array([[[ 1,  4],
+          [ 2,  5],
+          [ 3,  6]],
+          [[ 7, 10],
+          [ 8, 11],
+          [ 9, 12]]], dtype=int32)>
+
+  Note: This has a shorthand `linalg.matrix_transpose`):
 
   Args:
     a: A `Tensor`.
-    perm: A permutation of the dimensions of `a`.
+    perm: A permutation of the dimensions of `a`.  This should be a vector.
     conjugate: Optional bool. Setting it to `True` is mathematically equivalent
       to tf.math.conj(tf.transpose(input)).
     name: A name for the operation (optional).
@@ -2345,24 +2354,19 @@ def matrix_diag(diagonal,
   Returns:
     A Tensor. Has the same type as `diagonal`.
   """
-  if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-    # Special case to sidestep the tf.constant conversion error:
-    # TypeError: Expected bool, got 0 of type 'int' instead.
-    if hasattr(diagonal, "dtype") and diagonal.dtype == "bool":
-      padding_value = bool(padding_value)
+  # Special case to sidestep the tf.constant conversion error:
+  # TypeError: Expected bool, got 0 of type 'int' instead.
+  if hasattr(diagonal, "dtype") and diagonal.dtype == "bool":
+    padding_value = bool(padding_value)
 
-    return gen_array_ops.matrix_diag_v3(
-        diagonal=diagonal,
-        k=k,
-        num_rows=num_rows,
-        num_cols=num_cols,
-        padding_value=padding_value,
-        align=align,
-        name=name)
-
-  # Call v1 to maintain forward compatibility.
-  # (We skip v2 because its alignment conflicts with v3's default alignment.)
-  return gen_array_ops.matrix_diag(diagonal=diagonal, name=name)
+  return gen_array_ops.matrix_diag_v3(
+      diagonal=diagonal,
+      k=k,
+      num_rows=num_rows,
+      num_cols=num_cols,
+      padding_value=padding_value,
+      align=align,
+      name=name)
 
 
 @tf_export("linalg.diag_part", v1=["linalg.diag_part", "matrix_diag_part"])
@@ -2496,18 +2500,13 @@ def matrix_diag_part(
   Returns:
     A Tensor containing diagonals of `input`. Has the same type as `input`.
   """
-  if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-    # Special case to sidestep the tf.constant conversion error:
-    # TypeError: Expected bool, got 0 of type 'int' instead.
-    if hasattr(input, "dtype") and input.dtype == "bool":
-      padding_value = bool(padding_value)
+  # Special case to sidestep the tf.constant conversion error:
+  # TypeError: Expected bool, got 0 of type 'int' instead.
+  if hasattr(input, "dtype") and input.dtype == "bool":
+    padding_value = bool(padding_value)
 
-    return gen_array_ops.matrix_diag_part_v3(
-        input=input, k=k, padding_value=padding_value, align=align, name=name)
-
-  # Call v1 to maintain forward compatibility.
-  # (We skip v2 because its alignment conflicts with v3's default alignment.)
-  return gen_array_ops.matrix_diag_part(input=input, name=name)
+  return gen_array_ops.matrix_diag_part_v3(
+      input=input, k=k, padding_value=padding_value, align=align, name=name)
 
 
 @tf_export("linalg.set_diag", v1=["linalg.set_diag", "matrix_set_diag"])
@@ -2642,14 +2641,8 @@ def matrix_set_diag(
       the left (right-pads the row). It is the packing format LAPACK uses.
       cuSPARSE uses "LEFT_RIGHT", which is the opposite alignment.
   """
-  if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-    return gen_array_ops.matrix_set_diag_v3(
-        input=input, diagonal=diagonal, k=k, align=align, name=name)
-
-  # Call v1 to maintain forward compatibility.
-  # (We skip v2 because its alignment conflicts with v3's default alignment.)
-  return gen_array_ops.matrix_set_diag(
-      input=input, diagonal=diagonal, name=name)
+  return gen_array_ops.matrix_set_diag_v3(
+      input=input, diagonal=diagonal, k=k, align=align, name=name)
 
 
 # pylint: enable=invalid-name
@@ -4904,7 +4897,7 @@ def quantize_v2(
       raise ValueError("input should have known rank to use negative axis.")
     axis %= input.shape.ndims
 
-  if compat.forward_compatible(2019, 11, 13) or ensure_minimum_range != 0.01:
+  if ensure_minimum_range != 0.01:
     return gen_array_ops.quantize_v2(
         input,
         min_range,
@@ -4948,7 +4941,7 @@ def quantize(
     axis=None,
     ensure_minimum_range=0.01):
   """Quantize the input tensor."""
-  if compat.forward_compatible(2019, 11, 13) or ensure_minimum_range != 0.01:
+  if ensure_minimum_range != 0.01:
     return quantize_v2(
         input,
         min_range,
@@ -4990,7 +4983,7 @@ def dequantize(  # pylint: disable=missing-docstring
       raise ValueError("input should have known rank to use negative axis.")
     axis %= input.shape.ndims
 
-  if compat.forward_compatible(2019, 10, 22) or axis >= 0 or narrow_range:
+  if axis >= 0 or narrow_range:
     return gen_array_ops.dequantize(
         input, min_range, max_range, mode=mode, name=name,
         narrow_range=narrow_range, axis=axis)
diff --git a/tensorflow/python/ops/boosted_trees_ops.py b/tensorflow/python/ops/boosted_trees_ops.py
index 844b428a396..354180f8484 100644
--- a/tensorflow/python/ops/boosted_trees_ops.py
+++ b/tensorflow/python/ops/boosted_trees_ops.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import resources
 from tensorflow.python.ops.gen_boosted_trees_ops import boosted_trees_aggregate_stats
 from tensorflow.python.ops.gen_boosted_trees_ops import boosted_trees_bucketize
 from tensorflow.python.ops.gen_boosted_trees_ops import boosted_trees_calculate_best_feature_split as calculate_best_feature_split
+from tensorflow.python.ops.gen_boosted_trees_ops import boosted_trees_calculate_best_feature_split_v2 as calculate_best_feature_split_v2
 from tensorflow.python.ops.gen_boosted_trees_ops import boosted_trees_calculate_best_gains_per_feature as calculate_best_gains_per_feature
 from tensorflow.python.ops.gen_boosted_trees_ops import boosted_trees_center_bias as center_bias
 from tensorflow.python.ops.gen_boosted_trees_ops import boosted_trees_create_quantile_stream_resource as create_quantile_stream_resource
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index 839da5fb90f..242c41b2927 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import numpy as np
 
 from tensorflow.python.eager import context
@@ -351,7 +352,7 @@ def _binary_assert(sym, opname, op_func, static_func, x, y, data, summarize,
       raise errors.InvalidArgumentError(
           node_def=None,
           op=None,
-          message=('\n'.join([_pretty_print(d, summarize) for d in data])))
+          message=('\n'.join(_pretty_print(d, summarize) for d in data)))
 
     else:  # not context.executing_eagerly()
       if data is None:
@@ -1926,6 +1927,13 @@ def is_non_decreasing(x, name=None):
 
   See also:  `is_strictly_increasing`
 
+  >>> x1 = tf.constant([1.0, 1.0, 3.0])
+  >>> tf.math.is_non_decreasing(x1)
+  <tf.Tensor: shape=(), dtype=bool, numpy=True>
+  >>> x2 = tf.constant([3.0, 1.0, 2.0])
+  >>> tf.math.is_non_decreasing(x2)
+  <tf.Tensor: shape=(), dtype=bool, numpy=False>
+
   Args:
     x: Numeric `Tensor`.
     name: A name for this operation (optional).  Defaults to "is_non_decreasing"
@@ -1960,6 +1968,13 @@ def is_strictly_increasing(x, name=None):
 
   See also:  `is_non_decreasing`
 
+  >>> x1 = tf.constant([1.0, 2.0, 3.0])
+  >>> tf.math.is_strictly_increasing(x1)
+  <tf.Tensor: shape=(), dtype=bool, numpy=True>
+  >>> x2 = tf.constant([3.0, 1.0, 2.0])
+  >>> tf.math.is_strictly_increasing(x2)
+  <tf.Tensor: shape=(), dtype=bool, numpy=False>
+
   Args:
     x: Numeric `Tensor`.
     name: A name for this operation (optional).
diff --git a/tensorflow/python/ops/collective_ops_benchmark.py b/tensorflow/python/ops/collective_ops_benchmark.py
index 870dec525b2..02e43a22f5f 100644
--- a/tensorflow/python/ops/collective_ops_benchmark.py
+++ b/tensorflow/python/ops/collective_ops_benchmark.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import time
+
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index c04c55457b1..84603012261 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -503,13 +503,16 @@ def _shape_invariant_to_type_spec(var, shape):
   Returns:
     A `TypeSpec` for `var`, consistent with the given shape.
   """
-  if isinstance(shape, type_spec.TypeSpec):
+  if shape is None:
+    return type_spec.type_spec_from_value(var)
+  elif isinstance(shape, type_spec.TypeSpec):
     if not shape.is_compatible_with(var):
       raise TypeError("TypeSpec %r is not compatible with %r" % (shape, var))
     return shape
   elif not isinstance(shape, tensor_shape.TensorShape):
-    raise TypeError("Expected shape to be a TypeSpec or TensorShape, got %r"
-                    % shape)
+    raise TypeError(
+        "Expected shape to be a TypeSpec, TensorShape or None, got %r for"
+        " value %r" % (shape, var))
 
   if isinstance(var, ops.Tensor):
     return tensor_spec.TensorSpec(shape, var.dtype)
@@ -2408,7 +2411,7 @@ def while_loop_v2(cond,
   ```python
   i = tf.constant(0)
   c = lambda i: tf.less(i, 10)
-  b = lambda i: tf.add(i, 1)
+  b = lambda i: (tf.add(i, 1), )
   r = tf.while_loop(c, b, [i])
   ```
 
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index a1fc8c036fb..8cea6108722 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 90141dc06f2..05020b8d64c 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -82,7 +82,7 @@ def _as_shape_list(shapes,
     if any(not shape.is_fully_defined() for shape in shapes):
       raise ValueError("All shapes must be fully defined: %s" % shapes)
   if not unknown_rank_allowed:
-    if any([shape.dims is None for shape in shapes]):
+    if any(shape.dims is None for shape in shapes):
       raise ValueError("All shapes must have a defined rank: %s" % shapes)
 
   return shapes
diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index ec66b5c2527..aef9e6ed3c5 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import functools
 import hashlib
+
 import numpy as np
 
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py
index c3a050f2b68..69a19e77760 100644
--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@@ -518,13 +518,13 @@ def embedding_lookup_sparse(params,
       elif combiner == "mean":
         embeddings = math_ops.segment_sum(embeddings, segment_ids)
         weight_sum = math_ops.segment_sum(weights, segment_ids)
-        embeddings = math_ops.div(embeddings, weight_sum, name=name)
+        embeddings = math_ops.divide(embeddings, weight_sum, name=name)
       elif combiner == "sqrtn":
         embeddings = math_ops.segment_sum(embeddings, segment_ids)
         weights_squared = math_ops.pow(weights, 2)
         weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
         weight_sum_sqrt = math_ops.sqrt(weight_sum)
-        embeddings = math_ops.div(embeddings, weight_sum_sqrt, name=name)
+        embeddings = math_ops.divide(embeddings, weight_sum_sqrt, name=name)
       else:
         assert False, "Unrecognized combiner"
     else:
diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index a4c3caadce4..4698f870785 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -1125,7 +1125,7 @@ def partitioned_call(args,
   if executor_type is None:
     executor_type = ""
 
-  if executing_eagerly or len(tout):
+  if executing_eagerly:
     if f.stateful_ops:
       outputs = gen_functional_ops.stateful_partitioned_call(
           args=args,
@@ -1158,23 +1158,24 @@ def partitioned_call(args,
   # When running in graph mode, the graph and function graphs are optimized
   # (i.e. run through grappler) per the session options, so we can disable any
   # eager-specific rewriting.
-  config_proto = attr_value_pb2.AttrValue(
-      s=function_utils.get_disabled_rewriter_config())
+  config_proto = attr_value_pb2.AttrValue(s=config)
 
   graph = ops.get_default_graph()
   f.add_to_graph(graph)
   op_name = "StatefulPartitionedCall" if f.stateful_ops else "PartitionedCall"
-  op = graph.create_op(
-      op_name,
-      args,
-      tout,
-      name="PartitionedFunctionCall",
-      attrs={
-          "Tin": tin_attr,
-          "Tout": tout_attr,
-          "f": func_attr,
-          "config_proto": config_proto,
-          "executor_type": executor_type_attr,
-      })
+
+  # Propagate the attribute indicating the need to compile from function to the
+  # call itself.
+  xla_compile_attr = "_XlaMustCompile"
+  op_attrs = {
+      "Tin": tin_attr,
+      "Tout": tout_attr,
+      "f": func_attr,
+      "config_proto": config_proto,
+      "executor_type": executor_type_attr,
+  }
+  if xla_compile_attr in f.definition.attr:
+    op_attrs[xla_compile_attr] = f.definition.attr[xla_compile_attr]
+  op = graph.create_op(op_name, args, tout, name=op_name, attrs=op_attrs)
   outputs = op.outputs
   return outputs if outputs else op
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 99ac046cd95..e3886dd7ca2 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 import sys
 import warnings
+
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.python.client import session
diff --git a/tensorflow/python/ops/gradients_util.py b/tensorflow/python/ops/gradients_util.py
index 797f106a365..8a7be10ed83 100644
--- a/tensorflow/python/ops/gradients_util.py
+++ b/tensorflow/python/ops/gradients_util.py
@@ -834,9 +834,9 @@ def _LogOpGradients(op, out_grads, in_grads):
       return True
 
   logging.vlog(1, "  in  --> %s",
-               ", ".join([x.name for x in out_grads if _FilterGrad(x)]))
+               ", ".join(x.name for x in out_grads if _FilterGrad(x)))
   logging.vlog(1, "  out --> %s",
-               ", ".join([x.name for x in in_grads if _FilterGrad(x)]))
+               ", ".join(x.name for x in in_grads if _FilterGrad(x)))
 
 
 def _MultiDeviceAddN(tensor_list, gradient_uid):
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 80bcd54a650..1b8018d6d87 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1554,7 +1554,7 @@ def per_image_standardization(image):
     adjusted_stddev = math_ops.maximum(stddev, min_stddev)
 
     image -= image_mean
-    image = math_ops.div(image, adjusted_stddev, name=scope)
+    image = math_ops.divide(image, adjusted_stddev, name=scope)
     return convert_image_dtype(image, orig_dtype, saturate=True)
 
 
@@ -1742,7 +1742,7 @@ def adjust_gamma(image, gamma=1, gain=1):
     For gamma less than 1, the histogram will shift towards right and
     the output image will be brighter than the input image.
   References:
-    [1] http://en.wikipedia.org/wiki/Gamma_correction
+    [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction)
   """
 
   with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:
@@ -1823,7 +1823,7 @@ def convert_image_dtype(image, dtype, saturate=False, name=None):
         # cause in.max to be mapped to above out.max but below out.max+1,
         # so that the output is safely in the supported range.
         scale = (scale_in + 1) // (scale_out + 1)
-        scaled = math_ops.div(image, scale)
+        scaled = math_ops.floordiv(image, scale)
 
         if saturate:
           return math_ops.saturate_cast(scaled, dtype, name=name)
@@ -2931,6 +2931,13 @@ def rgb_to_yiq(images):
   Outputs a tensor of the same shape as the `images` tensor, containing the YIQ
   value of the pixels.
   The output is only well defined if the value in images are in [0,1].
+  
+  Usage Example:
+
+  >>> x = tf.constant([[[1.0, 2.0, 3.0]]])
+  >>> tf.image.rgb_to_yiq(x)
+  <tf.Tensor: shape=(1, 1, 3), dtype=float32,
+  numpy=array([[[ 1.815     , -0.91724455,  0.09962624]]], dtype=float32)>
 
   Args:
     images: 2-D or higher rank. Image data to convert. Last dimension must be
@@ -4044,6 +4051,27 @@ def draw_bounding_boxes_v2(images, boxes, colors, name=None):
 
   Returns:
     A `Tensor`. Has the same type as `images`.
+
+  Usage Example:
+
+  >>> # create an empty image
+  >>> img = tf.zeros([1, 3, 3, 3])
+  >>> # draw a box around the image
+  >>> box = np.array([0, 0, 1, 1])
+  >>> boxes = box.reshape([1, 1, 4])
+  >>> # alternate between red and blue
+  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
+  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
+  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
+  array([[[[1., 0., 0.],
+          [1., 0., 0.],
+          [1., 0., 0.]],
+          [[1., 0., 0.],
+          [0., 0., 0.],
+          [1., 0., 0.]],
+          [[1., 0., 0.],
+          [1., 0., 0.],
+          [1., 0., 0.]]]], dtype=float32)>
   """
   if colors is None:
     return gen_image_ops.draw_bounding_boxes(images, boxes, name)
@@ -4075,6 +4103,27 @@ def draw_bounding_boxes(images, boxes, name=None, colors=None):
 
   Returns:
     A `Tensor`. Has the same type as `images`.
+
+  Usage Example:
+
+  >>> # create an empty image
+  >>> img = tf.zeros([1, 3, 3, 3])
+  >>> # draw a box around the image
+  >>> box = np.array([0, 0, 1, 1])
+  >>> boxes = box.reshape([1, 1, 4])
+  >>> # alternate between red and blue
+  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
+  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
+  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
+  array([[[[1., 0., 0.],
+          [1., 0., 0.],
+          [1., 0., 0.]],
+          [[1., 0., 0.],
+          [0., 0., 0.],
+          [1., 0., 0.]],
+          [[1., 0., 0.],
+          [1., 0., 0.],
+          [1., 0., 0.]]]], dtype=float32)>
   """
   return draw_bounding_boxes_v2(images, boxes, colors, name)
 
diff --git a/tensorflow/python/ops/linalg/linalg.py b/tensorflow/python/ops/linalg/linalg.py
index 2bfbb371b7e..94d85cb7340 100644
--- a/tensorflow/python/ops/linalg/linalg.py
+++ b/tensorflow/python/ops/linalg/linalg.py
@@ -39,6 +39,7 @@ from tensorflow.python.ops.linalg.linear_operator_low_rank_update import *
 from tensorflow.python.ops.linalg.linear_operator_lower_triangular import *
 from tensorflow.python.ops.linalg.linear_operator_permutation import *
 from tensorflow.python.ops.linalg.linear_operator_toeplitz import *
+from tensorflow.python.ops.linalg.linear_operator_tridiag import *
 from tensorflow.python.ops.linalg.linear_operator_zeros import *
 # pylint: enable=wildcard-import
 
diff --git a/tensorflow/python/ops/linalg/linalg_impl.py b/tensorflow/python/ops/linalg/linalg_impl.py
index 18d22968c94..09e4e1e3fd4 100644
--- a/tensorflow/python/ops/linalg/linalg_impl.py
+++ b/tensorflow/python/ops/linalg/linalg_impl.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -28,10 +27,8 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops import manip_ops
 from tensorflow.python.ops import map_fn
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import special_math_ops
@@ -486,14 +483,8 @@ def tridiagonal_solve(diagonals,
           'Expected last two dimensions of diagonals to be same, got {} and {}'
           .format(m1, m2))
     m = m1 or m2
-    diagonals = gen_array_ops.matrix_diag_part_v2(
-        diagonals, k=(-1, 1), padding_value=0.)
-    # matrix_diag_part pads at the end. Because the subdiagonal has the
-    # convention of having the padding in the front, we need to rotate the last
-    # Tensor.
-    superdiag, d, subdiag = array_ops.unstack(diagonals, num=3, axis=-2)
-    subdiag = manip_ops.roll(subdiag, shift=1, axis=-1)
-    diagonals = array_ops.stack((superdiag, d, subdiag), axis=-2)
+    diagonals = array_ops.matrix_diag_part(
+        diagonals, k=(-1, 1), padding_value=0., align='LEFT_RIGHT')
     return _tridiagonal_solve_compact_format(
         diagonals, rhs, transpose_rhs, conjugate_rhs, partial_pivoting, name)
 
@@ -545,10 +536,7 @@ def _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
     rhs = math_ops.conj(rhs)
 
   check_num_lhs_matches_num_rhs()
-  result = linalg_ops.tridiagonal_solve(diagonals, rhs, partial_pivoting, name)
-  if transpose_rhs and not compat.forward_compatible(2019, 10, 18):
-    return array_ops.matrix_transpose(result)
-  return result
+  return linalg_ops.tridiagonal_solve(diagonals, rhs, partial_pivoting, name)
 
 
 @tf_export('linalg.tridiagonal_matmul')
@@ -614,20 +602,11 @@ def tridiagonal_matmul(diagonals, rhs, diagonals_format='compact', name=None):
       raise ValueError(
           'Expected last two dimensions of diagonals to be same, got {} and {}'
           .format(m1, m2))
-
-    maindiag = array_ops.matrix_diag_part(diagonals)
-    superdiag = gen_array_ops.matrix_diag_part_v2(
-        diagonals, k=1, padding_value=0.)
-    superdiag = array_ops.concat(
-        [superdiag,
-         array_ops.zeros_like(
-             superdiag[..., 0])[..., array_ops.newaxis]],
-        axis=-1)
-    subdiag = gen_array_ops.matrix_diag_part_v2(
-        diagonals, k=-1, padding_value=0.)
-    subdiag = array_ops.concat([
-        array_ops.zeros_like(subdiag[..., 0])[..., array_ops.newaxis],
-        subdiag], axis=-1)
+    diags = array_ops.matrix_diag_part(
+        diagonals, k=(-1, 1), padding_value=0., align='LEFT_RIGHT')
+    superdiag = diags[..., 0, :]
+    maindiag = diags[..., 1, :]
+    subdiag = diags[..., 2, :]
   else:
     raise ValueError('Unrecognized diagonals_format: %s' % diagonals_format)
 
diff --git a/tensorflow/python/ops/linalg/linear_operator_test_util.py b/tensorflow/python/ops/linalg/linear_operator_test_util.py
index 33b8003ae2e..dc13039ffd3 100644
--- a/tensorflow/python/ops/linalg/linear_operator_test_util.py
+++ b/tensorflow/python/ops/linalg/linear_operator_test_util.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import abc
 import itertools
+
 import numpy as np
 import six
 
diff --git a/tensorflow/python/ops/linalg/linear_operator_tridiag.py b/tensorflow/python/ops/linalg/linear_operator_tridiag.py
new file mode 100644
index 00000000000..422747848c0
--- /dev/null
+++ b/tensorflow/python/ops/linalg/linear_operator_tridiag.py
@@ -0,0 +1,373 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""`LinearOperator` acting like a tridiagonal matrix."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import manip_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.linalg import linalg_impl as linalg
+from tensorflow.python.ops.linalg import linear_operator
+from tensorflow.python.ops.linalg import linear_operator_util
+from tensorflow.python.util.tf_export import tf_export
+
+__all__ = ['LinearOperatorTridiag',]
+
+_COMPACT = 'compact'
+_MATRIX = 'matrix'
+_SEQUENCE = 'sequence'
+_DIAGONAL_FORMATS = frozenset({_COMPACT, _MATRIX, _SEQUENCE})
+
+
+@tf_export('linalg.LinearOperatorTridiag')
+class LinearOperatorTridiag(linear_operator.LinearOperator):
+  """`LinearOperator` acting like a [batch] square tridiagonal matrix.
+
+  This operator acts like a [batch] square tridiagonal matrix `A` with shape
+  `[B1,...,Bb, N, N]` for some `b >= 0`.  The first `b` indices index a
+  batch member.  For every batch index `(i1,...,ib)`, `A[i1,...,ib, : :]` is
+  an `N x M` matrix.  This matrix `A` is not materialized, but for
+  purposes of broadcasting this shape will be relevant.
+
+  Example usage:
+
+  Create a 3 x 3 tridiagonal linear operator.
+
+  >>> superdiag = [3., 4., 5.]
+  >>> diag = [1., -1., 2.]
+  >>> subdiag = [6., 7., 8]
+  >>> operator = tf.linalg.LinearOperatorTridiag(
+  ...    [superdiag, diag, subdiag],
+  ...    diagonals_format='sequence')
+  >>> operator.to_dense()
+  <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
+  array([[ 1.,  3.,  0.],
+         [ 7., -1.,  4.],
+         [ 0.,  8.,  2.]], dtype=float32)>
+  >>> operator.shape
+  TensorShape([3, 3])
+
+  Scalar Tensor output.
+
+  >>> operator.log_abs_determinant()
+  <tf.Tensor: shape=(), dtype=float32, numpy=4.3307333>
+
+  Create a [2, 3] batch of 4 x 4 linear operators.
+
+  >>> diagonals = tf.random.normal(shape=[2, 3, 3, 4])
+  >>> operator = tf.linalg.LinearOperatorTridiag(
+  ...   diagonals,
+  ...   diagonals_format='compact')
+
+  Create a shape [2, 1, 4, 2] vector.  Note that this shape is compatible
+  since the batch dimensions, [2, 1], are broadcast to
+  operator.batch_shape = [2, 3].
+
+  >>> y = tf.random.normal(shape=[2, 1, 4, 2])
+  >>> x = operator.solve(y)
+  >>> x
+  <tf.Tensor: shape=(2, 3, 4, 2), dtype=float32, numpy=...,
+  dtype=float32)>
+
+  #### Shape compatibility
+
+  This operator acts on [batch] matrix with compatible shape.
+  `x` is a batch matrix with compatible shape for `matmul` and `solve` if
+
+  ```
+  operator.shape = [B1,...,Bb] + [N, N],  with b >= 0
+  x.shape =   [C1,...,Cc] + [N, R],
+  and [C1,...,Cc] broadcasts with [B1,...,Bb].
+  ```
+
+  #### Performance
+
+  Suppose `operator` is a `LinearOperatorTridiag` of shape `[N, N]`,
+  and `x.shape = [N, R]`.  Then
+
+  * `operator.matmul(x)` will take O(N * R) time.
+  * `operator.solve(x)` will take O(N * R) time.
+
+  If instead `operator` and `x` have shape `[B1,...,Bb, N, N]` and
+  `[B1,...,Bb, N, R]`, every operation increases in complexity by `B1*...*Bb`.
+
+  #### Matrix property hints
+
+  This `LinearOperator` is initialized with boolean flags of the form `is_X`,
+  for `X = non_singular, self_adjoint, positive_definite, square`.
+  These have the following meaning:
+
+  * If `is_X == True`, callers should expect the operator to have the
+    property `X`.  This is a promise that should be fulfilled, but is *not* a
+    runtime assert.  For example, finite floating point precision may result
+    in these promises being violated.
+  * If `is_X == False`, callers should expect the operator to not have `X`.
+  * If `is_X == None` (the default), callers should have no expectation either
+    way.
+  """
+
+  def __init__(self,
+               diagonals,
+               diagonals_format=_COMPACT,
+               is_non_singular=None,
+               is_self_adjoint=None,
+               is_positive_definite=None,
+               is_square=None,
+               name='LinearOperatorTridiag'):
+    r"""Initialize a `LinearOperatorTridiag`.
+
+    Args:
+      diagonals: `Tensor` or list of `Tensor`s depending on `diagonals_format`.
+
+        If `diagonals_format=sequence`, this is a list of three `Tensor`'s each
+        with shape `[B1, ..., Bb, N]`, `b >= 0, N >= 0`, representing the
+        superdiagonal, diagonal and subdiagonal in that order. Note the
+        superdiagonal is padded with an element in the last position, and the
+        subdiagonal is padded with an element in the front.
+
+        If `diagonals_format=matrix` this is a `[B1, ... Bb, N, N]` shaped
+        `Tensor` representing the full tridiagonal matrix.
+
+        If `diagonals_format=compact` this is a `[B1, ... Bb, 3, N]` shaped
+        `Tensor` with the second to last dimension indexing the
+        superdiagonal, diagonal and subdiagonal in that order. Note the
+        superdiagonal is padded with an element in the last position, and the
+        subdiagonal is padded with an element in the front.
+
+        In every case, these `Tensor`s are all floating dtype.
+      diagonals_format: one of `matrix`, `sequence`, or `compact`. Default is
+        `compact`.
+      is_non_singular:  Expect that this operator is non-singular.
+      is_self_adjoint:  Expect that this operator is equal to its hermitian
+        transpose.  If `diag.dtype` is real, this is auto-set to `True`.
+      is_positive_definite:  Expect that this operator is positive definite,
+        meaning the quadratic form `x^H A x` has positive real part for all
+        nonzero `x`.  Note that we do not require the operator to be
+        self-adjoint to be positive-definite.  See:
+        https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices
+      is_square:  Expect that this operator acts like square [batch] matrices.
+      name: A name for this `LinearOperator`.
+
+    Raises:
+      TypeError:  If `diag.dtype` is not an allowed type.
+      ValueError:  If `diag.dtype` is real, and `is_self_adjoint` is not `True`.
+    """
+
+    with ops.name_scope(name, values=[diagonals]):
+      if diagonals_format not in _DIAGONAL_FORMATS:
+        raise ValueError(
+            'Diagonals Format must be one of compact, matrix, sequence'
+            ', got : {}'.format(diagonals_format))
+      if diagonals_format == _SEQUENCE:
+        self._diagonals = [linear_operator_util.convert_nonref_to_tensor(
+            d, name='diag_{}'.format(i)) for i, d in enumerate(diagonals)]
+        dtype = self._diagonals[0].dtype
+      else:
+        self._diagonals = linear_operator_util.convert_nonref_to_tensor(
+            diagonals, name='diagonals')
+        dtype = self._diagonals.dtype
+      self._diagonals_format = diagonals_format
+
+      super(LinearOperatorTridiag, self).__init__(
+          dtype=dtype,
+          is_non_singular=is_non_singular,
+          is_self_adjoint=is_self_adjoint,
+          is_positive_definite=is_positive_definite,
+          is_square=is_square,
+          name=name)
+
+  def _shape(self):
+    if self.diagonals_format == _MATRIX:
+      return self.diagonals.shape
+    if self.diagonals_format == _COMPACT:
+      # Remove the second to last dimension that contains the value 3.
+      d_shape = self.diagonals.shape[:-2].concatenate(
+          self.diagonals.shape[-1])
+    else:
+      broadcast_shape = array_ops.broadcast_static_shape(
+          self.diagonals[0].shape[:-1],
+          self.diagonals[1].shape[:-1])
+      broadcast_shape = array_ops.broadcast_static_shape(
+          broadcast_shape,
+          self.diagonals[2].shape[:-1])
+      d_shape = broadcast_shape.concatenate(self.diagonals[1].shape[-1])
+    return d_shape.concatenate(d_shape[-1])
+
+  def _shape_tensor(self, diagonals=None):
+    diagonals = diagonals if diagonals is not None else self.diagonals
+    if self.diagonals_format == _MATRIX:
+      return array_ops.shape(diagonals)
+    if self.diagonals_format == _COMPACT:
+      d_shape = array_ops.shape(diagonals[..., 0, :])
+    else:
+      broadcast_shape = array_ops.broadcast_dynamic_shape(
+          array_ops.shape(self.diagonals[0])[:-1],
+          array_ops.shape(self.diagonals[1])[:-1])
+      broadcast_shape = array_ops.broadcast_dynamic_shape(
+          broadcast_shape,
+          array_ops.shape(self.diagonals[2])[:-1])
+      d_shape = array_ops.concat(
+          [broadcast_shape, [array_ops.shape(self.diagonals[1])[-1]]], axis=0)
+    return array_ops.concat([d_shape, [d_shape[-1]]], axis=-1)
+
+  def _assert_self_adjoint(self):
+    # Check the diagonal has non-zero imaginary, and the super and subdiagonals
+    # are conjugate.
+
+    asserts = []
+    diag_message = (
+        'This tridiagonal operator contained non-zero '
+        'imaginary values on the diagonal.')
+    off_diag_message = (
+        'This tridiagonal operator has non-conjugate '
+        'subdiagonal and superdiagonal.')
+
+    if self.diagonals_format == _MATRIX:
+      asserts += [check_ops.assert_equal(
+          self.diagonals, linalg.adjoint(self.diagonals),
+          message='Matrix was not equal to its adjoint.')]
+    elif self.diagonals_format == _COMPACT:
+      diagonals = ops.convert_to_tensor(self.diagonals)
+      asserts += [linear_operator_util.assert_zero_imag_part(
+          diagonals[..., 1, :], message=diag_message)]
+      # Roll the subdiagonal so the shifted argument is at the end.
+      subdiag = manip_ops.roll(diagonals[..., 2, :], shift=-1, axis=-1)
+      asserts += [check_ops.assert_equal(
+          math_ops.conj(subdiag[..., :-1]),
+          diagonals[..., 0, :-1],
+          message=off_diag_message)]
+    else:
+      asserts += [linear_operator_util.assert_zero_imag_part(
+          self.diagonals[1], message=diag_message)]
+      subdiag = manip_ops.roll(self.diagonals[2], shift=-1, axis=-1)
+      asserts += [check_ops.assert_equal(
+          math_ops.conj(subdiag[..., :-1]),
+          self.diagonals[0][..., :-1],
+          message=off_diag_message)]
+    return control_flow_ops.group(asserts)
+
+  def _construct_adjoint_diagonals(self, diagonals):
+    # Constructs adjoint tridiagonal matrix from diagonals.
+    if self.diagonals_format == _SEQUENCE:
+      diagonals = [math_ops.conj(d) for d in reversed(diagonals)]
+      # The subdiag and the superdiag swap places, so we need to shift the
+      # padding argument.
+      diagonals[0] = manip_ops.roll(diagonals[0], shift=-1, axis=-1)
+      diagonals[2] = manip_ops.roll(diagonals[2], shift=1, axis=-1)
+      return diagonals
+    elif self.diagonals_format == _MATRIX:
+      return linalg.adjoint(diagonals)
+    else:
+      diagonals = math_ops.conj(diagonals)
+      superdiag, diag, subdiag = array_ops.unstack(
+          diagonals, num=3, axis=-2)
+      # The subdiag and the superdiag swap places, so we need
+      # to shift all arguments.
+      new_superdiag = manip_ops.roll(subdiag, shift=-1, axis=-1)
+      new_subdiag = manip_ops.roll(superdiag, shift=1, axis=-1)
+      return array_ops.stack([new_superdiag, diag, new_subdiag], axis=-2)
+
+  def _matmul(self, x, adjoint=False, adjoint_arg=False):
+    diagonals = self.diagonals
+    if adjoint:
+      diagonals = self._construct_adjoint_diagonals(diagonals)
+    x = linalg.adjoint(x) if adjoint_arg else x
+    return linalg.tridiagonal_matmul(
+        diagonals, x,
+        diagonals_format=self.diagonals_format)
+
+  def _solve(self, rhs, adjoint=False, adjoint_arg=False):
+    diagonals = self.diagonals
+    if adjoint:
+      diagonals = self._construct_adjoint_diagonals(diagonals)
+
+    # TODO(b/144860784): Remove the broadcasting code below once
+    # tridiagonal_solve broadcasts.
+
+    rhs_shape = array_ops.shape(rhs)
+    k = self._shape_tensor(diagonals)[-1]
+    broadcast_shape = array_ops.broadcast_dynamic_shape(
+        self._shape_tensor(diagonals)[:-2], rhs_shape[:-2])
+    rhs = array_ops.broadcast_to(
+        rhs, array_ops.concat(
+            [broadcast_shape, rhs_shape[-2:]], axis=-1))
+    if self.diagonals_format == _MATRIX:
+      diagonals = array_ops.broadcast_to(
+          diagonals, array_ops.concat(
+              [broadcast_shape, [k, k]], axis=-1))
+    elif self.diagonals_format == _COMPACT:
+      diagonals = array_ops.broadcast_to(
+          diagonals, array_ops.concat(
+              [broadcast_shape, [3, k]], axis=-1))
+    else:
+      diagonals = [
+          array_ops.broadcast_to(d, array_ops.concat(
+              [broadcast_shape, [k]], axis=-1)) for d in diagonals]
+
+    y = linalg.tridiagonal_solve(
+        diagonals, rhs,
+        diagonals_format=self.diagonals_format,
+        transpose_rhs=adjoint_arg,
+        conjugate_rhs=adjoint_arg)
+    return y
+
+  def _diag_part(self):
+    if self.diagonals_format == _MATRIX:
+      return array_ops.matrix_diag_part(self.diagonals)
+    elif self.diagonals_format == _SEQUENCE:
+      diagonal = self.diagonals[1]
+      return array_ops.broadcast_to(
+          diagonal, self.shape_tensor()[:-1])
+    else:
+      return self.diagonals[..., 1, :]
+
+  def _to_dense(self):
+    if self.diagonals_format == _MATRIX:
+      return self.diagonals
+
+    if self.diagonals_format == _COMPACT:
+      return gen_array_ops.matrix_diag_v3(
+          self.diagonals,
+          k=(-1, 1),
+          num_rows=-1,
+          num_cols=-1,
+          align='LEFT_RIGHT',
+          padding_value=0.)
+
+    diagonals = [ops.convert_to_tensor(d) for d in self.diagonals]
+    diagonals = array_ops.stack(diagonals, axis=-2)
+
+    return gen_array_ops.matrix_diag_v3(
+        diagonals,
+        k=(-1, 1),
+        num_rows=-1,
+        num_cols=-1,
+        align='LEFT_RIGHT',
+        padding_value=0.)
+
+  @property
+  def diagonals(self):
+    return self._diagonals
+
+  @property
+  def diagonals_format(self):
+    return self._diagonals_format
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 1ca96e85339..3e6d22accec 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -275,7 +275,7 @@ def _EinsumGrad(op, grad):
         set(output_subs + other_subs + "."))
     # Obtain the input subscripts with the reduced axis labels removed. E.g.
     # "ac" in the above example.
-    left_subs = "".join([s for s in input_subs if s not in reduced_label_set])
+    left_subs = "".join(s for s in input_subs if s not in reduced_label_set)
 
     # Compute the gradient wrt the input, without accounting for the operation
     # "abc->ac". So, now we have the VJP of the operation "ac,cd->ad".
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 04a5ddd1503..3a01ffc4704 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -26,7 +26,7 @@ import sys
 from absl import logging
 import six
 
-from tensorflow.python import pywrap_tensorflow
+from tensorflow.python import pywrap_tfe
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
@@ -45,7 +45,7 @@ from tensorflow.python.util.tf_export import tf_export
 # Register printing to the cell output if we are in a Colab or Jupyter Notebook.
 try:
   get_ipython()  # Exists in an ipython env like Jupyter or Colab
-  pywrap_tensorflow.TFE_Py_EnableInteractivePythonLogging()
+  pywrap_tfe.TFE_Py_EnableInteractivePythonLogging()
 except NameError:
   pass
 
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index fff4f97f979..72317c39f14 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import collections
 import functools
 import uuid
+
 import six
 
 from tensorflow.python.eager import context
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 2f59b661a76..efa3ad1597c 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -329,18 +329,12 @@ def divide(x, y, name=None):
     # override names. Use a dummy class to track the runtime division behavior
     return DivideDelegateWithName(x, name) / y
   else:
-    # We could short-circuit when y is 1, but we'd still have to cast to float,
-    # hence it doesn't seem to be worth optimizing.
     return x / y
 
 
 @tf_export("math.multiply", "multiply")
 @dispatch.add_dispatch_support
-def multiply(x, y, name=None):  # pylint: disable=missing-docstring
-  # Do an is comparison here since this is cheaper than isinstance or __eq__
-  if y is 1:  # pylint: disable=literal-comparison
-    return x
-
+def multiply(x, y, name=None):
   return gen_math_ops.mul(x, y, name)
 
 
@@ -352,28 +346,16 @@ multiply.__doc__ = gen_math_ops.mul.__doc__.replace("Multiply", "tf.multiply")
     "2016-12-30",
     "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`")
 def _mul(x, y, name=None):
-  return multiply(x, y, name=name)
+  return gen_math_ops.mul(x, y, name)
 
 
 _mul.__doc__ = (
     gen_math_ops.mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__))
 
 
-def add_v2(x, y, name=None):
-  # Do an is comparison here since this is cheaper than isinstance or __eq__
-  if y is 0:  # pylint: disable=literal-comparison
-    return x
-
-  return gen_math_ops.add_v2(x, y, name=name)
-
-
 @tf_export("math.subtract", "subtract")
 @dispatch.add_dispatch_support
 def subtract(x, y, name=None):
-  # Do an is comparison here since this is cheaper than isinstance or __eq__
-  if y is 0:  # pylint: disable=literal-comparison
-    return x
-
   return gen_math_ops.sub(x, y, name)
 
 
@@ -385,7 +367,7 @@ subtract.__doc__ = gen_math_ops.sub.__doc__.replace("`Sub`", "`tf.subtract`")
     "2016-12-30",
     "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`")
 def _sub(x, y, name=None):
-  return subtract(x, y, name)
+  return gen_math_ops.sub(x, y, name)
 
 
 _sub.__doc__ = (
@@ -1213,7 +1195,7 @@ def _add_dispatch(x, y, name=None):
   if x.dtype == dtypes.string:
     return gen_math_ops.add(x, y, name=name)
   else:
-    return add_v2(x, y, name=name)
+    return gen_math_ops.add_v2(x, y, name=name)
 
 
 def _mul_dispatch(x, y, name=None):
@@ -1239,7 +1221,7 @@ _OverrideBinaryOperatorHelper(gen_sparse_ops.sparse_dense_cwise_mul, "mul",
                               sparse_tensor.SparseTensor)
 
 _OverrideBinaryOperatorHelper(_add_dispatch, "add")
-_OverrideBinaryOperatorHelper(subtract, "sub")
+_OverrideBinaryOperatorHelper(gen_math_ops.sub, "sub")
 _OverrideBinaryOperatorHelper(_mul_dispatch, "mul")
 _OverrideBinaryOperatorHelper(_div_python2, "div")
 _OverrideBinaryOperatorHelper(_truediv_python3, "truediv")
@@ -1360,6 +1342,7 @@ def equal(x, y, name=None):
   boolean values.
 
   For example:
+
   >>> x = tf.constant([2, 4])
   >>> y = tf.constant(2)
   >>> tf.math.equal(x, y)
@@ -1395,6 +1378,7 @@ def not_equal(x, y, name=None):
   of boolean values.
 
   For example:
+
   >>> x = tf.constant([2, 4])
   >>> y = tf.constant(2)
   >>> tf.math.not_equal(x, y)
@@ -1462,7 +1446,6 @@ def range(start, limit=None, delta=1, dtype=None, name="range"):  # pylint: disa
 
   For example:
 
-  ```python
   >>> start = 3
   >>> limit = 18
   >>> delta = 3
@@ -1482,8 +1465,6 @@ def range(start, limit=None, delta=1, dtype=None, name="range"):  # pylint: disa
   <tf.Tensor: shape=(5,), dtype=int32,
   numpy=array([0, 1, 2, 3, 4], dtype=int32)>
 
-  ```
-
   Args:
     start: A 0-D `Tensor` (scalar). Acts as first entry in the range if `limit`
       is not None; otherwise, acts as range limit and first entry defaults to 0.
@@ -1732,7 +1713,8 @@ def reduce_euclidean_norm(input_tensor, axis=None, keepdims=False, name=None):
                              "keep_dims is deprecated, use keepdims instead",
                              "keep_dims")
 @deprecation.deprecated_args(
-    None, "reduction_indices is deprecated, use axis instead", "axis")
+    None, "reduction_indices is deprecated, use axis instead",
+    "reduction_indices")
 def count_nonzero(input_tensor=None,
                   axis=None,
                   keepdims=None,
@@ -2729,6 +2711,7 @@ def matmul(a,
   datatypes `bfloat16` or `float32`.
 
   A simple 2-D tensor matrix multiplication:
+
   >>> a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3])
   >>> a  # 2-D tensor
   <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
@@ -2746,7 +2729,8 @@ def matmul(a,
   array([[ 58,  64],
          [139, 154]], dtype=int32)>
 
-  A batch matrix multiplication with batch shape [2]
+  A batch matrix multiplication with batch shape [2]:
+
   >>> a = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3])
   >>> a  # 3-D tensor
   <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy=
@@ -2775,6 +2759,7 @@ def matmul(a,
   (see [PEP 465](https://www.python.org/dev/peps/pep-0465/)). In TensorFlow,
   it simply calls the `tf.matmul()` function, so the following lines are
   equivalent:
+
   >>> d = a @ b @ [[10], [11]]
   >>> d = tf.matmul(tf.matmul(a, b), [[10], [11]])
 
@@ -3375,19 +3360,19 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
   element of the input is identical to the first element of the output:
   For example:
 
-  # tf.cumsum([a, b, c])   # [a, a + b, a + b + c]
+  >>> # tf.cumsum([a, b, c])   # [a, a + b, a + b + c]
   >>> x = tf.constant([2, 4, 6, 8])
   >>> tf.cumsum(x)
   <tf.Tensor: shape=(4,), dtype=int32,
   numpy=array([ 2,  6, 12, 20], dtype=int32)>
 
-  # using varying `axis` values
+
+  >>> # using varying `axis` values
   >>> y = tf.constant([[2, 4, 6, 8], [1,3,5,7]])
   >>> tf.cumsum(y, axis=0)
   <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
   array([[ 2,  4,  6,  8],
          [ 3,  7, 11, 15]], dtype=int32)>
-
   >>> tf.cumsum(y, axis=1)
   <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
   array([[ 2,  6, 12, 20],
@@ -3396,7 +3381,7 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
   By setting the `exclusive` kwarg to `True`, an exclusive cumsum is performed
   instead:
 
-  # tf.cumsum([a, b, c], exclusive=True)  => [0, a, a + b]
+  >>> # tf.cumsum([a, b, c], exclusive=True)  => [0, a, a + b]
   >>> x = tf.constant([2, 4, 6, 8])
   >>> tf.cumsum(x, exclusive=True)
   <tf.Tensor: shape=(4,), dtype=int32,
@@ -3405,7 +3390,7 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
   By setting the `reverse` kwarg to `True`, the cumsum is performed in the
   opposite direction:
 
-  # tf.cumsum([a, b, c], reverse=True)  # [a + b + c, b + c, c]
+  >>> # tf.cumsum([a, b, c], reverse=True)  # [a + b + c, b + c, c]
   >>> x = tf.constant([2, 4, 6, 8])
   >>> tf.cumsum(x, reverse=True)
   <tf.Tensor: shape=(4,), dtype=int32,
@@ -3414,7 +3399,7 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
   This is more efficient than using separate `tf.reverse` ops.
   The `reverse` and `exclusive` kwargs can also be combined:
 
-  # tf.cumsum([a, b, c], exclusive=True, reverse=True)  # [b + c, c, 0]
+  >>> # tf.cumsum([a, b, c], exclusive=True, reverse=True)  # [b + c, c, 0]
   >>> x = tf.constant([2, 4, 6, 8])
   >>> tf.cumsum(x, exclusive=True, reverse=True)
   <tf.Tensor: shape=(4,), dtype=int32,
@@ -4133,7 +4118,14 @@ def tensordot(a, b, axes, name=None):
       prod_axes = int(np.prod([shape_a[i] for i in axes]))
       perm = list(axes) + free if flipped else free + list(axes)
       new_shape = [prod_axes, prod_free] if flipped else [prod_free, prod_axes]
-      reshaped_a = array_ops.reshape(array_ops.transpose(a, perm), new_shape)
+      if (perm != np.arange(len(shape_a))).any():
+        a_trans = array_ops.transpose(a, perm)
+      else:
+        a_trans = a
+      if a_trans.get_shape().as_list() != new_shape:
+        reshaped_a = array_ops.reshape(a_trans, new_shape)
+      else:
+        reshaped_a = a_trans
       return reshaped_a, free_dims, free_dims
     else:
       if a.get_shape().ndims is not None and isinstance(axes, (list, tuple)):
@@ -4209,7 +4201,12 @@ def tensordot(a, b, axes, name=None):
         b, b_axes, True)
     ab_matmul = matmul(a_reshape, b_reshape)
     if isinstance(a_free_dims, list) and isinstance(b_free_dims, list):
-      return array_ops.reshape(ab_matmul, a_free_dims + b_free_dims, name=name)
+      if (ab_matmul.get_shape().is_fully_defined() and
+          ab_matmul.get_shape().as_list() == a_free_dims + b_free_dims):
+        return ab_matmul
+      else:
+        return array_ops.reshape(
+            ab_matmul, a_free_dims + b_free_dims, name=name)
     else:
       a_free_dims = ops.convert_to_tensor(a_free_dims, dtype=dtypes.int32)
       b_free_dims = ops.convert_to_tensor(b_free_dims, dtype=dtypes.int32)
@@ -4438,3 +4435,27 @@ def exp(x, name=None):
 
 
 # pylint: enable=g-docstring-has-escape
+
+
+@tf_export("math.sobol_sample")
+def sobol_sample(dim, num_results, skip=0, dtype=None, name=None):
+  """Generates points from the Sobol sequence.
+
+  Creates a Sobol sequence with `num_results` samples. Each sample has dimension
+  `dim`. Skips the first `skip` samples.
+
+  Args:
+    dim: Positive scalar `Tensor` representing each sample's dimension.
+    num_results: Positive scalar `Tensor` of dtype int32. The number of Sobol
+        points to return in the output.
+    skip: (Optional) Positive scalar `Tensor` of dtype int32. The number of
+        initial points of the Sobol sequence to skip. Default value is 0.
+    dtype: (Optional) The dtype of the sample. One of: `float32` or `float64`.
+        Default value is determined by the C++ kernel.
+    name: (Optional) Python `str` name prefixed to ops created by this function.
+
+  Returns:
+    `Tensor` of samples from Sobol sequence with `shape` [num_results, dim].
+  """
+  with ops.name_scope(name, "sobol", [dim, num_results, skip]):
+    return gen_math_ops.sobol_sample(dim, num_results, skip, dtype=dtype)
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 54df055b5f7..37669bfab8f 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -699,42 +699,5 @@ class RangeTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(values, self.evaluate(tensor))
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class ScalarOptimizationTest(test_util.TensorFlowTestCase):
-
-  def testAddZero(self):
-    x = constant_op.constant(1)
-    y = math_ops.add_v2(x, 0)
-    self.assertAllEqual(x, y)
-    self.assertIs(x, y)
-
-    # Optimization not applied
-    y = math_ops.add_v2(x, constant_op.constant(0))
-    self.assertAllEqual(x, y)
-    self.assertIsNot(x, y)
-
-  def testSubtractZero(self):
-    x = constant_op.constant(1)
-    y = math_ops.subtract(x, 0)
-    self.assertAllEqual(x, y)
-    self.assertIs(x, y)
-
-    # Optimization not applied
-    y = math_ops.subtract(x, constant_op.constant(0))
-    self.assertAllEqual(x, y)
-    self.assertIsNot(x, y)
-
-  def testMultiplyOne(self):
-    x = constant_op.constant(1)
-    y = math_ops.multiply(x, 1)
-    self.assertAllEqual(x, y)
-    self.assertIs(x, y)
-
-    # Optimization not applied
-    y = math_ops.multiply(x, constant_op.constant(1))
-    self.assertAllEqual(x, y)
-    self.assertIsNot(x, y)
-
-
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index 54994d6cc33..a4437d65018 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -814,13 +814,13 @@ def auc(labels,
         elif summation_method == 'careful_interpolation':
           # This one is a bit tricky and is handled separately.
           return interpolate_pr_auc(tp, fp, fn)
-      rec = math_ops.div(tp + epsilon, tp + fn + epsilon)
+      rec = math_ops.divide(tp + epsilon, tp + fn + epsilon)
       if curve == 'ROC':
-        fp_rate = math_ops.div(fp, fp + tn + epsilon)
+        fp_rate = math_ops.divide(fp, fp + tn + epsilon)
         x = fp_rate
         y = rec
       else:  # curve == 'PR'.
-        prec = math_ops.div(tp + epsilon, tp + fp + epsilon)
+        prec = math_ops.divide(tp + epsilon, tp + fp + epsilon)
         x = rec
         y = prec
       if summation_method in ('trapezoidal', 'careful_interpolation'):
@@ -1184,7 +1184,7 @@ def mean_iou(labels,
       denominator = array_ops.where(
           math_ops.greater(denominator, 0), denominator,
           array_ops.ones_like(denominator))
-      iou = math_ops.div(cm_diag, denominator)
+      iou = math_ops.divide(cm_diag, denominator)
 
       # If the number of valid entries is 0 (no classes) we return 0.
       result = array_ops.where(
@@ -1266,7 +1266,7 @@ def mean_relative_error(labels,
   predictions.get_shape().assert_is_compatible_with(normalizer.get_shape())
   relative_errors = array_ops.where(
       math_ops.equal(normalizer, 0.0), array_ops.zeros_like(labels),
-      math_ops.div(math_ops.abs(labels - predictions), normalizer))
+      math_ops.divide(math_ops.abs(labels - predictions), normalizer))
   return mean(relative_errors, weights, metrics_collections,
               updates_collections, name or 'mean_relative_error')
 
@@ -2032,7 +2032,7 @@ def precision(labels,
 
     def compute_precision(tp, fp, name):
       return array_ops.where(
-          math_ops.greater(tp + fp, 0), math_ops.div(tp, tp + fp), 0, name)
+          math_ops.greater(tp + fp, 0), math_ops.divide(tp, tp + fp), 0, name)
 
     def once_across_replicas(_, true_p, false_p):
       return compute_precision(true_p, false_p, 'value')
@@ -2113,7 +2113,7 @@ def precision_at_thresholds(labels,
     epsilon = 1e-7
 
     def compute_precision(tp, fp, name):
-      return math_ops.div(tp, epsilon + tp + fp, name='precision_' + name)
+      return math_ops.divide(tp, epsilon + tp + fp, name='precision_' + name)
 
     def precision_across_replicas(_, values):
       return compute_precision(values['tp'], values['fp'], 'value')
@@ -2206,7 +2206,7 @@ def recall(labels,
     def compute_recall(true_p, false_n, name):
       return array_ops.where(
           math_ops.greater(true_p + false_n, 0),
-          math_ops.div(true_p, true_p + false_n), 0, name)
+          math_ops.divide(true_p, true_p + false_n), 0, name)
 
     def once_across_replicas(_, true_p, false_n):
       return compute_recall(true_p, false_n, 'value')
@@ -2645,12 +2645,12 @@ def recall_at_top_k(labels,
         weights=weights)
 
     def compute_recall(_, tp, fn):
-      return math_ops.div(tp, math_ops.add(tp, fn), name=scope)
+      return math_ops.divide(tp, math_ops.add(tp, fn), name=scope)
 
     metric = _aggregate_across_replicas(
         metrics_collections, compute_recall, tp, fn)
 
-    update = math_ops.div(
+    update = math_ops.divide(
         tp_update, math_ops.add(tp_update, fn_update), name='update')
     if updates_collections:
       ops.add_to_collections(updates_collections, update)
@@ -2720,7 +2720,7 @@ def recall_at_thresholds(labels,
     epsilon = 1e-7
 
     def compute_recall(tp, fn, name):
-      return math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
+      return math_ops.divide(tp, epsilon + tp + fn, name='recall_' + name)
 
     def recall_across_replicas(_, values):
       return compute_recall(values['tp'], values['fn'], 'value')
@@ -2884,13 +2884,13 @@ def sensitivity_at_specificity(labels,
         labels, predictions, thresholds, weights)
 
     def compute_sensitivity_at_specificity(tp, tn, fp, fn, name):
-      specificities = math_ops.div(tn, tn + fp + kepsilon)
+      specificities = math_ops.divide(tn, tn + fp + kepsilon)
       tf_index = math_ops.argmin(math_ops.abs(specificities - specificity), 0)
       tf_index = math_ops.cast(tf_index, dtypes.int32)
 
       # Now, we have the implicit threshold, so compute the sensitivity:
-      return math_ops.div(tp[tf_index], tp[tf_index] + fn[tf_index] + kepsilon,
-                          name)
+      return math_ops.divide(tp[tf_index],
+                             tp[tf_index] + fn[tf_index] + kepsilon, name)
 
     def sensitivity_across_replicas(_, values):
       return compute_sensitivity_at_specificity(
@@ -3070,7 +3070,7 @@ def _sparse_average_precision_at_top_k(labels, predictions_idx):
     tp_per_k = math_ops.cumsum(relevant_per_k, axis=-1, name='tp_per_k')
     retrieved_per_k = math_ops.cumsum(
         array_ops.ones_like(relevant_per_k), axis=-1, name='retrieved_per_k')
-    precision_per_k = math_ops.div(
+    precision_per_k = math_ops.divide(
         math_ops.cast(tp_per_k, dtypes.float64),
         math_ops.cast(retrieved_per_k, dtypes.float64),
         name='precision_per_k')
@@ -3086,7 +3086,7 @@ def _sparse_average_precision_at_top_k(labels, predictions_idx):
     # Divide by number of relevant items to get average precision. These are
     # the "num_relevant_items" and "AveP" terms from the formula above.
     num_relevant_items = math_ops.cast(_num_relevant(labels, k), dtypes.float64)
-    return math_ops.div(precision_sum, num_relevant_items, name=scope)
+    return math_ops.divide(precision_sum, num_relevant_items, name=scope)
 
 
 def _streaming_sparse_average_precision_at_top_k(labels,
@@ -3500,12 +3500,12 @@ def precision_at_top_k(labels,
         weights=weights)
 
     def precision_across_replicas(_, tp, fp):
-      return math_ops.div(tp, math_ops.add(tp, fp), name=scope)
+      return math_ops.divide(tp, math_ops.add(tp, fp), name=scope)
 
     metric = _aggregate_across_replicas(
         metrics_collections, precision_across_replicas, tp, fp)
 
-    update = math_ops.div(
+    update = math_ops.divide(
         tp_update, math_ops.add(tp_update, fp_update), name='update')
     if updates_collections:
       ops.add_to_collections(updates_collections, update)
@@ -3718,7 +3718,7 @@ def specificity_at_sensitivity(labels,
       Returns:
         The specificity using the aggregated values.
       """
-      sensitivities = math_ops.div(tp, tp + fn + kepsilon)
+      sensitivities = math_ops.divide(tp, tp + fn + kepsilon)
 
       # We'll need to use this trick until tf.argmax allows us to specify
       # whether we should use the first or last index in case of ties.
@@ -3731,8 +3731,8 @@ def specificity_at_sensitivity(labels,
       tf_index = math_ops.cast(tf_index, dtypes.int32)
 
       # Now, we have the implicit threshold, so compute the specificity:
-      return math_ops.div(tn[tf_index], tn[tf_index] + fp[tf_index] + kepsilon,
-                          name)
+      return math_ops.divide(tn[tf_index],
+                             tn[tf_index] + fp[tf_index] + kepsilon, name)
 
     def specificity_across_replicas(_, values):
       return compute_specificity_at_sensitivity(
diff --git a/tensorflow/python/ops/nccl_ops_test.py b/tensorflow/python/ops/nccl_ops_test.py
index 3b2e2b0175f..d481bd3c2bd 100644
--- a/tensorflow/python/ops/nccl_ops_test.py
+++ b/tensorflow/python/ops/nccl_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from functools import partial
+
 import numpy as np
 
 from tensorflow.python.framework import errors
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index 77584d1b734..130034fbeec 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
@@ -383,26 +382,25 @@ class BatchNormalizationTest(test.TestCase):
           x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testInferenceShape5(self):
-    with compat.forward_compatibility_horizon(2019, 6, 7):
-      x_shape = [0, 131, 127, 6]
-      for dtype in [np.float16, np.float32]:
-        if test.is_gpu_available(cuda_only=True):
-          self._test_inference(
-              x_shape,
-              dtype, [131],
-              np.float32,
-              use_gpu=True,
-              data_format='NCHW')
-          self._test_inference(
-              x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+    x_shape = [0, 131, 127, 6]
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_inference(
             x_shape,
             dtype, [131],
             np.float32,
-            use_gpu=False,
+            use_gpu=True,
             data_format='NCHW')
         self._test_inference(
-            x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
+            x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+      self._test_inference(
+          x_shape,
+          dtype, [131],
+          np.float32,
+          use_gpu=False,
+          data_format='NCHW')
+      self._test_inference(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testTrainingShape1(self):
     x_shape = [1, 1, 6, 1]
@@ -450,26 +448,25 @@ class BatchNormalizationTest(test.TestCase):
 
   @test_util.disable_xla('b/141236973: Empty inputs wrong on CPU.')
   def testTrainingShape5(self):
-    with compat.forward_compatibility_horizon(2019, 6, 7):
-      x_shape = [0, 131, 127, 6]
-      for dtype in [np.float16, np.float32]:
-        if test.is_gpu_available(cuda_only=True):
-          self._test_training(
-              x_shape,
-              dtype, [131],
-              np.float32,
-              use_gpu=True,
-              data_format='NCHW')
-          self._test_training(
-              x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+    x_shape = [0, 131, 127, 6]
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_training(
             x_shape,
             dtype, [131],
             np.float32,
-            use_gpu=False,
+            use_gpu=True,
             data_format='NCHW')
         self._test_training(
-            x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
+            x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+      self._test_training(
+          x_shape,
+          dtype, [131],
+          np.float32,
+          use_gpu=False,
+          data_format='NCHW')
+      self._test_training(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   @test_util.run_deprecated_v1
   def testBatchNormGradShape1(self):
@@ -586,39 +583,38 @@ class BatchNormalizationTest(test.TestCase):
   @test_util.run_deprecated_v1
   @test_util.disable_xla('This test never passed for XLA')
   def testBatchNormGradShape5(self):
-    with compat.forward_compatibility_horizon(2019, 6, 7):
-      for is_training in [True, False]:
-        x_shape = [0, 7, 11, 4]
-        for dtype in [np.float16, np.float32]:
-          if test.is_gpu_available(cuda_only=True):
-            self._test_gradient(
-                x_shape,
-                dtype, [7],
-                np.float32,
-                use_gpu=True,
-                data_format='NCHW',
-                is_training=is_training)
-            self._test_gradient(
-                x_shape,
-                dtype, [4],
-                np.float32,
-                use_gpu=True,
-                data_format='NHWC',
-                is_training=is_training)
-          self._test_gradient(
-              x_shape,
-              dtype, [4],
-              np.float32,
-              use_gpu=False,
-              data_format='NHWC',
-              is_training=is_training)
+    for is_training in [True, False]:
+      x_shape = [0, 7, 11, 4]
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
           self._test_gradient(
               x_shape,
               dtype, [7],
               np.float32,
-              use_gpu=False,
+              use_gpu=True,
               data_format='NCHW',
               is_training=is_training)
+          self._test_gradient(
+              x_shape,
+              dtype, [4],
+              np.float32,
+              use_gpu=True,
+              data_format='NHWC',
+              is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [4],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [7],
+            np.float32,
+            use_gpu=False,
+            data_format='NCHW',
+            is_training=is_training)
 
   def _testBatchNormGradGrad(self, config):
     shape = config['shape']
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index 7e443b91b82..51eec89723d 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -1138,4 +1138,4 @@ def _NthElementGrad(op, grad):
   grad = array_ops.expand_dims(grad, -1)
   num_selected = array_ops.expand_dims(math_ops.reduce_sum(indicators, -1), -1)
 
-  return [math_ops.div(indicators, num_selected) * grad, None]
+  return [math_ops.divide(indicators, num_selected) * grad, None]
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 0058e9629ef..29e79d4864c 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -24,7 +24,6 @@ import os
 
 import numpy as np
 
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -45,7 +44,6 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.platform import device_context
-from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.compat import collections_abc
 from tensorflow.python.util.deprecation import deprecated_args
@@ -1850,7 +1848,7 @@ def conv2d_v2(input,  # pylint: disable=redefined-builtin
                           filter[di, dj, q, k]
 
   Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-  horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+  horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
 
   Args:
     input: A `Tensor`. Must be one of the following types:
@@ -1936,7 +1934,7 @@ def conv2d(  # pylint: disable=redefined-builtin,dangerous-default-value
                           * filter[di, dj, q, k]
 
   Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-  horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+  horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
 
   Args:
     input: A `Tensor`. Must be one of the following types:
@@ -3301,7 +3299,7 @@ def softmax_cross_entropy_with_logits_v2_helper(
 
     # Do the actual op computation.
     # The second output tensor contains the gradients.  We use it in
-    # _CrossEntropyGrad() in nn_grad but not here.
+    # CrossEntropyGrad() in nn_grad but not here.
     cost, unused_backprop = gen_nn_ops.softmax_cross_entropy_with_logits(
         precise_logits, labels, name=name)
 
@@ -4403,100 +4401,51 @@ def dropout_v2(x, rate, noise_shape=None, seed=None, name=None):
       which is likely not what was intended.
   """
   with ops.name_scope(name, "dropout", [x]) as name:
-    # TODO(b/144930399): Remove this once the compatible window is passed.
-    if compat.forward_compatible(2019, 12, 16):
-      is_rate_number = isinstance(rate, numbers.Real)
-      if is_rate_number and (rate < 0 or rate >= 1):
-        raise ValueError("rate must be a scalar tensor or a float in the "
-                         "range [0, 1), got %g" % rate)
-      x = ops.convert_to_tensor(x, name="x")
-      x_dtype = x.dtype
-      if not x_dtype.is_floating:
-        raise ValueError("x has to be a floating point tensor since it's going "
-                         "to be scaled. Got a %s tensor instead." % x_dtype)
-      is_executing_eagerly = context.executing_eagerly()
-      if not tensor_util.is_tensor(rate):
-        if is_rate_number:
-          keep_prob = 1 - rate
-          scale = 1 / keep_prob
-          scale = ops.convert_to_tensor(scale, dtype=x_dtype)
-          ret = gen_math_ops.mul(x, scale)
-        else:
-          raise ValueError("rate is neither scalar nor scalar tensor %r" % rate)
+    is_rate_number = isinstance(rate, numbers.Real)
+    if is_rate_number and (rate < 0 or rate >= 1):
+      raise ValueError("rate must be a scalar tensor or a float in the "
+                       "range [0, 1), got %g" % rate)
+    x = ops.convert_to_tensor(x, name="x")
+    x_dtype = x.dtype
+    if not x_dtype.is_floating:
+      raise ValueError("x has to be a floating point tensor since it's going "
+                       "to be scaled. Got a %s tensor instead." % x_dtype)
+    is_executing_eagerly = context.executing_eagerly()
+    if not tensor_util.is_tensor(rate):
+      if is_rate_number:
+        keep_prob = 1 - rate
+        scale = 1 / keep_prob
+        scale = ops.convert_to_tensor(scale, dtype=x_dtype)
+        ret = gen_math_ops.mul(x, scale)
       else:
-        rate.get_shape().assert_has_rank(0)
-        rate_dtype = rate.dtype
-        if rate_dtype != x_dtype:
-          if not rate_dtype.is_compatible_with(x_dtype):
-            raise ValueError(
-                "Tensor dtype %s is incomptaible with Tensor dtype %s: %r" %
-                (x_dtype.name, rate_dtype.name, rate))
-          rate = gen_math_ops.cast(rate, x_dtype, name="rate")
-        one_tensor = constant_op.constant(1, dtype=x_dtype)
-        ret = gen_math_ops.real_div(x, gen_math_ops.sub(one_tensor, rate))
-
-      noise_shape = _get_noise_shape(x, noise_shape)
-      # Sample a uniform distribution on [0.0, 1.0) and select values larger
-      # than rate.
-      #
-      # NOTE: Random uniform can only generate 2^23 floats on [1.0, 2.0)
-      # and subtract 1.0.
-      random_tensor = random_ops.random_uniform(
-          noise_shape, seed=seed, dtype=x_dtype)
-      # NOTE: if (1.0 + rate) - 1 is equal to rate, then that float is selected,
-      # hence a >= comparison is used.
-      keep_mask = random_tensor >= rate
-      ret = gen_math_ops.mul(ret, gen_math_ops.cast(keep_mask, x_dtype))
-      if not is_executing_eagerly:
-        ret.set_shape(x.get_shape())
-      return ret
+        raise ValueError("rate is neither scalar nor scalar tensor %r" % rate)
     else:
-      x = ops.convert_to_tensor(x, name="x")
-      if not x.dtype.is_floating:
-        raise ValueError("x has to be a floating point tensor since it will "
-                         "be scaled. Got a %s tensor instead." % x.dtype)
-      if isinstance(rate, numbers.Real):
-        if not (rate >= 0 and rate < 1):
-          raise ValueError("rate must be a scalar tensor or a float in the "
-                           "range [0, 1), got %g" % rate)
-        if rate > 0.5:
-          logging.log_first_n(
-              logging.WARN, "Large dropout rate: %g (>0.5). In TensorFlow "
-              "2.x, dropout() uses dropout rate instead of keep_prob. "
-              "Please ensure that this is intended.", 5, rate)
+      rate.get_shape().assert_has_rank(0)
+      rate_dtype = rate.dtype
+      if rate_dtype != x_dtype:
+        if not rate_dtype.is_compatible_with(x_dtype):
+          raise ValueError(
+              "Tensor dtype %s is incomptaible with Tensor dtype %s: %r" %
+              (x_dtype.name, rate_dtype.name, rate))
+        rate = gen_math_ops.cast(rate, x_dtype, name="rate")
+      one_tensor = constant_op.constant(1, dtype=x_dtype)
+      ret = gen_math_ops.real_div(x, gen_math_ops.sub(one_tensor, rate))
 
-      # Early return if nothing needs to be dropped.
-      if isinstance(rate, numbers.Real) and rate == 0:
-        return x
-      if context.executing_eagerly():
-        if isinstance(rate, ops.EagerTensor):
-          if rate.numpy() == 0:
-            return x
-      else:
-        rate = ops.convert_to_tensor(rate, dtype=x.dtype, name="rate")
-        rate.get_shape().assert_has_rank(0)
-
-        # Do nothing if we know rate == 0
-        if tensor_util.constant_value(rate) == 0:
-          return x
-
-      noise_shape = _get_noise_shape(x, noise_shape)
-      # Sample a uniform distribution on [0.0, 1.0) and select values larger
-      # than rate.
-      #
-      # NOTE: Random uniform can only generate 2^23 floats on [1.0, 2.0)
-      # and subtract 1.0.
-      random_tensor = random_ops.random_uniform(
-          noise_shape, seed=seed, dtype=x.dtype)
-      keep_prob = 1 - rate
-      scale = 1 / keep_prob
-      # NOTE: if (1.0 + rate) - 1 is equal to rate, then that
-      # float is selected, hence we use a >= comparison.
-      keep_mask = random_tensor >= rate
-      ret = x * scale * math_ops.cast(keep_mask, x.dtype)
-      if not context.executing_eagerly():
-        ret.set_shape(x.get_shape())
-      return ret
+    noise_shape = _get_noise_shape(x, noise_shape)
+    # Sample a uniform distribution on [0.0, 1.0) and select values larger
+    # than rate.
+    #
+    # NOTE: Random uniform can only generate 2^23 floats on [1.0, 2.0)
+    # and subtract 1.0.
+    random_tensor = random_ops.random_uniform(
+        noise_shape, seed=seed, dtype=x_dtype)
+    # NOTE: if (1.0 + rate) - 1 is equal to rate, then that float is selected,
+    # hence a >= comparison is used.
+    keep_mask = random_tensor >= rate
+    ret = gen_math_ops.mul(ret, gen_math_ops.cast(keep_mask, x_dtype))
+    if not is_executing_eagerly:
+      ret.set_shape(x.get_shape())
+    return ret
 
 
 @tf_export("math.top_k", "nn.top_k")
diff --git a/tensorflow/python/ops/op_selector.py b/tensorflow/python/ops/op_selector.py
index 1ae43aa5bda..1f6d4e01cec 100644
--- a/tensorflow/python/ops/op_selector.py
+++ b/tensorflow/python/ops/op_selector.py
@@ -339,7 +339,7 @@ def _path_from(from_op, tensor, sources):
   if isinstance(from_op, ops.Tensor):
     from_op = from_op.op
 
-  visited_ops = set([x.op for x in sources])
+  visited_ops = set(x.op for x in sources)
   ops_to_visit = [_as_operation(tensor)]
   some_op_output = {}
   while ops_to_visit:
@@ -354,7 +354,7 @@ def _path_from(from_op, tensor, sources):
       while path_op != final_op:
         path_op = some_op_output[path_op]
         path.append(path_op)
-      return " <- ".join(["%s (%s)" % (x.name, x.type) for x in reversed(path)])
+      return " <- ".join("%s (%s)" % (x.name, x.type) for x in reversed(path))
     else:
       for inp in graph_inputs(op):
         if inp not in visited_ops and inp not in sources:
diff --git a/tensorflow/python/ops/parallel_for/array_test.py b/tensorflow/python/ops/parallel_for/array_test.py
index 7986b74ed3b..4e441424bb4 100644
--- a/tensorflow/python/ops/parallel_for/array_test.py
+++ b/tensorflow/python/ops/parallel_for/array_test.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import backprop
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -33,15 +32,6 @@ from tensorflow.python.ops.parallel_for.test_util import PForTestCase
 from tensorflow.python.platform import test
 
 
-# LINT.IfChange
-matrix_diag_v3_forward_compat_date = (2019, 12, 6)
-# LINT.ThenChange(
-#   //tensorflow/compiler/tests/matrix_diag_ops_test.py,
-#   //tensorflow/python/kernel_tests/diag_op_test.py,
-#   //tensorflow/python/ops/array_ops.py
-# )
-
-
 @test_util.run_all_in_graph_and_eager_modes
 class ArrayTest(PForTestCase):
 
@@ -345,10 +335,8 @@ class ArrayTest(PForTestCase):
 
     def loop_fn(i):
       diagonal = array_ops.gather(x, i)
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        return array_ops.matrix_diag(
-            diagonal, k=(0, 1), num_rows=4, num_cols=5, align="RIGHT_LEFT")
-      return array_ops.matrix_diag(diagonal)
+      return array_ops.matrix_diag(
+          diagonal, k=(0, 1), num_rows=4, num_cols=5, align="RIGHT_LEFT")
 
     self._test_loop_fn(loop_fn, 3)
 
@@ -357,10 +345,8 @@ class ArrayTest(PForTestCase):
 
     def loop_fn(i):
       input = array_ops.gather(x, i)  # pylint: disable=redefined-builtin
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        return array_ops.matrix_diag_part(
-            input, k=(-2, 0), padding_value=3, align="RIGHT_LEFT")
-      return array_ops.matrix_diag_part(input)
+      return array_ops.matrix_diag_part(
+          input, k=(-2, 0), padding_value=3, align="RIGHT_LEFT")
 
     self._test_loop_fn(loop_fn, 3)
 
@@ -378,17 +364,16 @@ class ArrayTest(PForTestCase):
           array_ops.matrix_set_diag(matrix_i, diags[0, ...]),
       ]
 
-      if compat.forward_compatible(*matrix_diag_v3_forward_compat_date):
-        k = (-1, 1)
-        band_i = array_ops.gather(bands, i)
-        for align in ["RIGHT_LEFT", "LEFT_RIGHT"]:
-          results.extend([
-              array_ops.matrix_set_diag(matrix_i, band_i, k=k, align=align),
-              array_ops.matrix_set_diag(
-                  matrices[0, ...], band_i, k=k, align=align),
-              array_ops.matrix_set_diag(
-                  matrix_i, bands[0, ...], k=k, align=align)
-          ])
+      k = (-1, 1)
+      band_i = array_ops.gather(bands, i)
+      for align in ["RIGHT_LEFT", "LEFT_RIGHT"]:
+        results.extend([
+            array_ops.matrix_set_diag(matrix_i, band_i, k=k, align=align),
+            array_ops.matrix_set_diag(
+                matrices[0, ...], band_i, k=k, align=align),
+            array_ops.matrix_set_diag(
+                matrix_i, bands[0, ...], k=k, align=align)
+        ])
       return results
 
     self._test_loop_fn(loop_fn, 3)
diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops.py b/tensorflow/python/ops/parallel_for/control_flow_ops.py
index a1c7588ea8f..d70a899c326 100644
--- a/tensorflow/python/ops/parallel_for/control_flow_ops.py
+++ b/tensorflow/python/ops/parallel_for/control_flow_ops.py
@@ -22,7 +22,7 @@ from __future__ import print_function
 import functools
 
 from tensorflow.python.eager import context
-from tensorflow.python.eager import function
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import indexed_slices
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
@@ -74,7 +74,7 @@ def for_loop(loop_fn, loop_fn_dtypes, iters, parallel_iterations=None):
                                                 len(fn_output)))
     outputs = []
     del is_none_list[:]
-    is_none_list.extend([x is None for x in fn_output])
+    is_none_list.extend(x is None for x in fn_output)
     for out, ta in zip(fn_output, ta_list):
       # TODO(agarwal): support returning Operation objects from loop_fn.
       if out is not None:
@@ -184,9 +184,21 @@ def pfor(loop_fn, iters, parallel_iterations=None):
   # Note that we wrap into a tf.function if in eager execution mode or under
   # XLA compilation. The latter is so that we don't compile operations like
   # tf.placeholder that are created by the loop body.
+  functions_run_eagerly = None
   if context.executing_eagerly() or _is_under_xla_context():
-    f = function.defun(f)
-  return f()
+    functions_run_eagerly = def_function.functions_run_eagerly()
+    if functions_run_eagerly:
+      logging.warning(
+          "It looks like tf.function behavior was disabled, perhaps using "
+          "tf.config.experimental_run_functions_eagerly. Vectorization "
+          "primitives (e.g. tf.vectorized_map) require tf.function to work. "
+          "These primitives will override the disable.")
+      def_function.run_functions_eagerly(False)
+    f = def_function.function(f)
+  outputs = f()
+  if functions_run_eagerly is not None:
+    def_function.run_functions_eagerly(functions_run_eagerly)
+  return outputs
 
 
 def _loop_fn_has_config(loop_fn):
@@ -209,6 +221,7 @@ def _loop_fn_has_config(loop_fn):
 
 def _pfor_impl(loop_fn, iters, parallel_iterations=None, pfor_config=None):
   """Implementation of pfor."""
+  assert not context.executing_eagerly()
   loop_fn_has_config = _loop_fn_has_config(loop_fn)
   existing_ops = set(ops.get_default_graph().get_operations())
   # Run the loop body
diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
index d016393a39e..9bc859fb032 100644
--- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
+++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
@@ -28,7 +28,6 @@ import numpy as np
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.client import session
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
@@ -150,6 +149,15 @@ class PForTest(PForTestCase):
                         (batch_size, num_features, 1))
     self.assertAllEqual(per_example_gradients[1].shape, (batch_size, 1))
 
+  def test_disable_tf_function(self):
+    def_function.run_functions_eagerly(True)
+    # vectorized_map should ignore disabling tf.functions
+    self.assertTrue(def_function.functions_run_eagerly())
+    self.assertAllEqual([0, 1, 4, 9],
+                        pfor_control_flow_ops.vectorized_map(
+                            lambda x: x * x, math_ops.range(4)))
+    self.assertTrue(def_function.functions_run_eagerly())
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class IndexedSlicesTest(PForTestCase):
@@ -315,7 +323,7 @@ class BitwiseTest(PForTestCase):
         y1 = array_ops.gather(y, i)
         outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)]
         del output_dtypes[:]
-        output_dtypes.extend([t.dtype for t in outputs])
+        output_dtypes.extend(t.dtype for t in outputs)
         return outputs
       # pylint: enable=cell-var-from-loop
       self._test_loop_fn(loop_fn, 3)
@@ -435,55 +443,54 @@ class NNTest(PForTestCase):
     self._test_loop_fn(loop_fn, 3)
 
   def test_fused_batch_norm(self):
-    with compat.forward_compatibility_horizon(2019, 6, 7):
-      data_formats = ["NHWC"]
-      if test.is_gpu_available():
-        data_formats.append("NCHW")
-      for is_training in (True, False):
-        for data_format in data_formats:
-          with backprop.GradientTape(persistent=True) as g:
-            if data_format == "NCHW":
-              x = random_ops.random_uniform([3, 1, 2, 5, 5])
-            else:
-              x = random_ops.random_uniform([3, 1, 5, 5, 2])
-            g.watch(x)
-            scale = random_ops.random_uniform([2])
-            g.watch(scale)
-            offset = random_ops.random_uniform([2])
-            g.watch(offset)
-            mean = None if is_training else random_ops.random_uniform([2])
-            variance = None if is_training else random_ops.random_uniform([2])
+    data_formats = ["NHWC"]
+    if test.is_gpu_available():
+      data_formats.append("NCHW")
+    for is_training in (True, False):
+      for data_format in data_formats:
+        with backprop.GradientTape(persistent=True) as g:
+          if data_format == "NCHW":
+            x = random_ops.random_uniform([3, 1, 2, 5, 5])
+          else:
+            x = random_ops.random_uniform([3, 1, 5, 5, 2])
+          g.watch(x)
+          scale = random_ops.random_uniform([2])
+          g.watch(scale)
+          offset = random_ops.random_uniform([2])
+          g.watch(offset)
+          mean = None if is_training else random_ops.random_uniform([2])
+          variance = None if is_training else random_ops.random_uniform([2])
 
-          # pylint: disable=cell-var-from-loop
-          def loop_fn(i):
-            with g:
-              x1 = array_ops.gather(x, i)
-              outputs = nn.fused_batch_norm(
-                  x1,
-                  scale,
-                  offset,
-                  mean=mean,
-                  variance=variance,
-                  epsilon=0.01,
-                  data_format=data_format,
-                  is_training=is_training)
-              outputs = list(outputs)
-              # We only test the first value of outputs when is_training is
-              # False. It looks like CPU and GPU have different outputs for
-              # batch_mean and batch_variance for this case.
-              if not is_training:
-                outputs[1] = constant_op.constant(0.)
-                outputs[2] = constant_op.constant(0.)
-              loss = nn.l2_loss(outputs[0])
-            if is_training:
-              gradients = g.gradient(loss, [x1, scale, offset])
-            else:
-              gradients = [constant_op.constant(0.)] * 3
-            return outputs + gradients
+        # pylint: disable=cell-var-from-loop
+        def loop_fn(i):
+          with g:
+            x1 = array_ops.gather(x, i)
+            outputs = nn.fused_batch_norm(
+                x1,
+                scale,
+                offset,
+                mean=mean,
+                variance=variance,
+                epsilon=0.01,
+                data_format=data_format,
+                is_training=is_training)
+            outputs = list(outputs)
+            # We only test the first value of outputs when is_training is
+            # False. It looks like CPU and GPU have different outputs for
+            # batch_mean and batch_variance for this case.
+            if not is_training:
+              outputs[1] = constant_op.constant(0.)
+              outputs[2] = constant_op.constant(0.)
+            loss = nn.l2_loss(outputs[0])
+          if is_training:
+            gradients = g.gradient(loss, [x1, scale, offset])
+          else:
+            gradients = [constant_op.constant(0.)] * 3
+          return outputs + gradients
 
-          # pylint: enable=cell-var-from-loop
+        # pylint: enable=cell-var-from-loop
 
-          self._test_loop_fn(loop_fn, 3)
+        self._test_loop_fn(loop_fn, 3)
 
   def test_log_softmax(self):
     logits = random_ops.random_uniform([3, 2, 4])
@@ -1477,5 +1484,36 @@ class PartitionedCallTest(PForTestCase):
     self._test_loop_fn(loop_fn, 4)
 
 
+class VariableTest(PForTestCase):
+
+  def test_create_variable_once(self):
+    x = array_ops.ones(shape=(3, 2, 2), dtype=dtypes.float32)
+    y = array_ops.ones(shape=(2, 3), dtype=dtypes.float32)
+    a_var = []
+
+    def f(z):
+      if not a_var:
+        a_var.append(variables.Variable(lambda: y, name="a"))
+      return math_ops.matmul(z, a_var[0] / 16)
+
+    pfor_control_flow_ops.vectorized_map(f, x)
+
+  @test_util.run_v2_only
+  def test_create_variable_repeated(self):
+    x = array_ops.ones(shape=(3, 2, 2), dtype=dtypes.float32)
+    y = array_ops.ones(shape=(2, 3), dtype=dtypes.float32)
+
+    def f(z):
+      a_var = variables.Variable(lambda: y, name="a") / 4
+      return math_ops.matmul(z, a_var / 16)
+
+    # Note that this error is only raised under v2 behavior.
+    with self.assertRaisesRegexp(
+        ValueError,
+        "tf.function-decorated function tried to create variables on non-first"
+    ):
+      pfor_control_flow_ops.vectorized_map(f, x)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py
index cb701d2102a..dac633ffd37 100644
--- a/tensorflow/python/ops/parallel_for/math_test.py
+++ b/tensorflow/python/ops/parallel_for/math_test.py
@@ -65,7 +65,7 @@ class MathTest(PForTestCase, parameterized.TestCase):
           if grad is not None:
             outputs.append(grad)
         del output_dtypes[:]
-        output_dtypes.extend([t.dtype for t in outputs])
+        output_dtypes.extend(t.dtype for t in outputs)
         return outputs
 
       # pylint: enable=cell-var-from-loop
@@ -215,7 +215,7 @@ class MathTest(PForTestCase, parameterized.TestCase):
         y1 = array_ops.gather(y, i)
         outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)]
         del output_dtypes[:]
-        output_dtypes.extend([t.dtype for t in outputs])
+        output_dtypes.extend(t.dtype for t in outputs)
         return outputs
       # pylint: enable=cell-var-from-loop
 
diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py
index 0e861ffe2ab..c1965a8a0fd 100644
--- a/tensorflow/python/ops/parallel_for/pfor.py
+++ b/tensorflow/python/ops/parallel_for/pfor.py
@@ -121,7 +121,7 @@ class WhileOp(object):
     """
     self._pfor_config = pfor_config
     self._pfor_ops = set(pfor_ops)
-    self._pfor_op_ids = set([x._id for x in pfor_ops])
+    self._pfor_op_ids = set(x._id for x in pfor_ops)
     assert isinstance(exit_node, ops.Tensor)
     self._while_context = exit_node.op._get_control_flow_context()
     assert isinstance(self._while_context, control_flow_ops.WhileContext)
@@ -1176,7 +1176,7 @@ class PFor(object):
     self._conversion_map = object_identity.ObjectIdentityDictionary()
     self._conversion_map[loop_var] = wrap(self.all_indices, True)
     self._pfor_ops = set(pfor_ops)
-    self._pfor_op_ids = set([x._id for x in pfor_ops])
+    self._pfor_op_ids = set(x._id for x in pfor_ops)
     self._pfor_config = pfor_config
 
   def op_is_inside_loop(self, op):
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index 35853db97fc..8e518e913be 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
@@ -609,83 +608,54 @@ def _parse_sequence_example_raw(serialized,
       feature_list_dense_missing_assumed_empty.append(k)
 
     has_ragged = context.ragged_keys or feature_list.ragged_keys
-    if compat.forward_compatible(2019, 10, 26) or has_ragged:
-      serialized = ops.convert_to_tensor(serialized, name="serialized")
-      if has_ragged and serialized.shape.ndims is None:
-        raise ValueError("serialized must have statically-known rank to "
-                         "parse ragged features.")
-      feature_list_dense_missing_assumed_empty_vector = [
-          key in feature_list_dense_missing_assumed_empty
-          for key in feature_list.dense_keys
-      ]
-      outputs = gen_parsing_ops.parse_sequence_example_v2(
-          # Inputs
-          serialized=serialized,
-          debug_name=debug_name,
-          context_sparse_keys=context.sparse_keys,
-          context_dense_keys=context.dense_keys,
-          context_ragged_keys=context.ragged_keys,
-          feature_list_sparse_keys=feature_list.sparse_keys,
-          feature_list_dense_keys=feature_list.dense_keys,
-          feature_list_ragged_keys=feature_list.ragged_keys,
-          feature_list_dense_missing_assumed_empty=(
-              feature_list_dense_missing_assumed_empty_vector),
-          context_dense_defaults=context.dense_defaults_vec,
-          # Attrs
-          Ncontext_sparse=len(context.sparse_keys),
-          Nfeature_list_sparse=len(feature_list.sparse_keys),
-          Nfeature_list_dense=len(feature_list.dense_keys),
-          context_sparse_types=context.sparse_types,
-          context_ragged_value_types=context.ragged_value_types,
-          context_ragged_split_types=context.ragged_split_types,
-          feature_list_dense_types=feature_list.dense_types,
-          feature_list_sparse_types=feature_list.sparse_types,
-          feature_list_ragged_value_types=feature_list.ragged_value_types,
-          feature_list_ragged_split_types=feature_list.ragged_split_types,
-          context_dense_shapes=context.dense_shapes_as_proto,
-          feature_list_dense_shapes=feature_list.dense_shapes,
-          name=name)
-      (context_sparse_indices, context_sparse_values, context_sparse_shapes,
-       context_dense_values, context_ragged_values, context_ragged_row_splits,
-       feature_list_sparse_indices, feature_list_sparse_values,
-       feature_list_sparse_shapes, feature_list_dense_values,
-       feature_list_dense_lengths, feature_list_ragged_values,
-       feature_list_ragged_outer_splits,
-       feature_list_ragged_inner_splits) = outputs
-      # pylint: disable=protected-access
-      context_ragged_tensors = parsing_config._build_ragged_tensors(
-          serialized.shape, context_ragged_values, context_ragged_row_splits)
-      feature_list_ragged_tensors = parsing_config._build_ragged_tensors(
-          serialized.shape, feature_list_ragged_values,
-          feature_list_ragged_outer_splits, feature_list_ragged_inner_splits)
-    else:
-      outputs = gen_parsing_ops.parse_sequence_example(
-          serialized=serialized,
-          debug_name=debug_name,
-          Ncontext_sparse=len(context.sparse_keys),
-          Ncontext_dense=len(context.dense_keys),
-          Nfeature_list_sparse=len(feature_list.sparse_keys),
-          Nfeature_list_dense=len(feature_list.dense_keys),
-          context_dense_defaults=context.dense_defaults_vec,
-          context_sparse_keys=context.sparse_keys,
-          context_sparse_types=context.sparse_types,
-          context_dense_keys=context.dense_keys,
-          context_dense_shapes=context.dense_shapes_as_proto,
-          feature_list_sparse_keys=feature_list.sparse_keys,
-          feature_list_sparse_types=feature_list.sparse_types,
-          feature_list_dense_keys=feature_list.dense_keys,
-          feature_list_dense_types=feature_list.dense_types,
-          feature_list_dense_shapes=feature_list.dense_shapes,
-          feature_list_dense_missing_assumed_empty=(
-              feature_list_dense_missing_assumed_empty),
-          name=name)
-
-      (context_sparse_indices, context_sparse_values, context_sparse_shapes,
-       context_dense_values, feature_list_sparse_indices,
-       feature_list_sparse_values, feature_list_sparse_shapes,
-       feature_list_dense_values, feature_list_dense_lengths) = outputs
-      context_ragged_tensors = []
-      feature_list_ragged_tensors = []
+    serialized = ops.convert_to_tensor(serialized, name="serialized")
+    if has_ragged and serialized.shape.ndims is None:
+      raise ValueError("serialized must have statically-known rank to "
+                       "parse ragged features.")
+    feature_list_dense_missing_assumed_empty_vector = [
+        key in feature_list_dense_missing_assumed_empty
+        for key in feature_list.dense_keys
+    ]
+    outputs = gen_parsing_ops.parse_sequence_example_v2(
+        # Inputs
+        serialized=serialized,
+        debug_name=debug_name,
+        context_sparse_keys=context.sparse_keys,
+        context_dense_keys=context.dense_keys,
+        context_ragged_keys=context.ragged_keys,
+        feature_list_sparse_keys=feature_list.sparse_keys,
+        feature_list_dense_keys=feature_list.dense_keys,
+        feature_list_ragged_keys=feature_list.ragged_keys,
+        feature_list_dense_missing_assumed_empty=(
+            feature_list_dense_missing_assumed_empty_vector),
+        context_dense_defaults=context.dense_defaults_vec,
+        # Attrs
+        Ncontext_sparse=len(context.sparse_keys),
+        Nfeature_list_sparse=len(feature_list.sparse_keys),
+        Nfeature_list_dense=len(feature_list.dense_keys),
+        context_sparse_types=context.sparse_types,
+        context_ragged_value_types=context.ragged_value_types,
+        context_ragged_split_types=context.ragged_split_types,
+        feature_list_dense_types=feature_list.dense_types,
+        feature_list_sparse_types=feature_list.sparse_types,
+        feature_list_ragged_value_types=feature_list.ragged_value_types,
+        feature_list_ragged_split_types=feature_list.ragged_split_types,
+        context_dense_shapes=context.dense_shapes_as_proto,
+        feature_list_dense_shapes=feature_list.dense_shapes,
+        name=name)
+    (context_sparse_indices, context_sparse_values, context_sparse_shapes,
+     context_dense_values, context_ragged_values, context_ragged_row_splits,
+     feature_list_sparse_indices, feature_list_sparse_values,
+     feature_list_sparse_shapes, feature_list_dense_values,
+     feature_list_dense_lengths, feature_list_ragged_values,
+     feature_list_ragged_outer_splits,
+     feature_list_ragged_inner_splits) = outputs
+    # pylint: disable=protected-access
+    context_ragged_tensors = parsing_config._build_ragged_tensors(
+        serialized.shape, context_ragged_values, context_ragged_row_splits)
+    feature_list_ragged_tensors = parsing_config._build_ragged_tensors(
+        serialized.shape, feature_list_ragged_values,
+        feature_list_ragged_outer_splits, feature_list_ragged_inner_splits)
 
     # pylint: disable=g-complex-comprehension
     context_sparse_tensors = [
@@ -857,71 +827,11 @@ def _parse_single_sequence_example_raw(serialized,
   Raises:
     TypeError: if feature_list.dense_defaults is not either None or a dict.
   """
-  has_ragged = context.ragged_keys or feature_list.ragged_keys
-  if compat.forward_compatible(2019, 10, 26) or has_ragged:
-    with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]):
-      serialized = ops.convert_to_tensor(serialized, name="serialized")
-      serialized = _assert_scalar(serialized, "serialized")
-    return _parse_sequence_example_raw(serialized, debug_name, context,
-                                       feature_list, name)[:2]
-
-  if context.num_features + feature_list.num_features == 0:
-    raise ValueError("Must provide at least one feature key")
-  with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
-    debug_name = "" if debug_name is None else debug_name
-
-    # Internal
-    feature_list_dense_missing_assumed_empty = []
-    for k, v in feature_list.dense_defaults.items():
-      if v is not None:
-        raise ValueError("Value feature_list.dense_defaults[%s] must be None" %
-                         k)
-      feature_list_dense_missing_assumed_empty.append(k)
-
-    outputs = gen_parsing_ops.parse_single_sequence_example(
-        serialized=serialized,
-        debug_name=debug_name,
-        context_dense_defaults=context.dense_defaults_vec,
-        context_sparse_keys=context.sparse_keys,
-        context_sparse_types=context.sparse_types,
-        context_dense_keys=context.dense_keys,
-        context_dense_shapes=context.dense_shapes,
-        feature_list_sparse_keys=feature_list.sparse_keys,
-        feature_list_sparse_types=feature_list.sparse_types,
-        feature_list_dense_keys=feature_list.dense_keys,
-        feature_list_dense_types=feature_list.dense_types,
-        feature_list_dense_shapes=feature_list.dense_shapes,
-        feature_list_dense_missing_assumed_empty=(
-            feature_list_dense_missing_assumed_empty),
-        name=name)
-
-    (context_sparse_indices, context_sparse_values,
-     context_sparse_shapes, context_dense_values,
-     feature_list_sparse_indices, feature_list_sparse_values,
-     feature_list_sparse_shapes, feature_list_dense_values) = outputs
-
-    # pylint: disable=g-complex-comprehension
-    context_sparse_tensors = [
-        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
-        in zip(context_sparse_indices,
-               context_sparse_values,
-               context_sparse_shapes)]
-
-    feature_list_sparse_tensors = [
-        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
-        in zip(feature_list_sparse_indices,
-               feature_list_sparse_values,
-               feature_list_sparse_shapes)]
-    # pylint: enable=g-complex-comprehension
-
-    context_output = dict(
-        zip(context.sparse_keys + context.dense_keys,
-            context_sparse_tensors + context_dense_values))
-    feature_list_output = dict(
-        zip(feature_list.sparse_keys + feature_list.dense_keys,
-            feature_list_sparse_tensors + feature_list_dense_values))
-
-    return (context_output, feature_list_output)
+  with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]):
+    serialized = ops.convert_to_tensor(serialized, name="serialized")
+    serialized = _assert_scalar(serialized, "serialized")
+  return _parse_sequence_example_raw(serialized, debug_name, context,
+                                     feature_list, name)[:2]
 
 
 @tf_export("io.decode_raw", v1=[])
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index 515100df6c8..7d024e53299 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import numpy as np
 
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/python/ops/ragged/ragged_rank_op_test.py b/tensorflow/python/ops/ragged/ragged_rank_op_test.py
index ff051bcd4b1..12c5e736015 100644
--- a/tensorflow/python/ops/ragged/ragged_rank_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_rank_op_test.py
@@ -17,6 +17,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 from absl.testing import parameterized
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops.ragged import ragged_array_ops
diff --git a/tensorflow/python/ops/ragged/ragged_segment_op_test.py b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
index 28ee0837e90..d29708a5f5d 100644
--- a/tensorflow/python/ops/ragged/ragged_segment_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import math
+
 from absl.testing import parameterized
 
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py
index 79f6ad7b0ce..c2edbcf5e43 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import functools
 import operator
+
 import numpy as np
 
 from tensorflow.python import tf2
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_shape.py b/tensorflow/python/ops/ragged/ragged_tensor_shape.py
index 111e2c8962f..eacc397b2f7 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_shape.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_shape.py
@@ -126,8 +126,8 @@ class RaggedTensorDynamicShape(object):
 
       # Convert dimension size tensors to a single dtype.
       if dim_size_dtype is None:
-        dim_size_dtypes = set([p.dtype for p in partitioned_dim_sizes
-                               if p.shape.ndims == 1])
+        dim_size_dtypes = set(
+            p.dtype for p in partitioned_dim_sizes if p.shape.ndims == 1)
         if not dim_size_dtypes:
           dim_size_dtype = dtypes.int64
         elif len(dim_size_dtypes) == 1:
diff --git a/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py b/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
index 6b0149d0723..fef6a7947e4 100644
--- a/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import random
+
 from absl.testing import parameterized
 import numpy as np
 
diff --git a/tensorflow/python/ops/ragged/ragged_where_op_test.py b/tensorflow/python/ops/ragged/ragged_where_op_test.py
index ccefa557cf4..a0c6cfa5bed 100644
--- a/tensorflow/python/ops/ragged/ragged_where_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_where_op_test.py
@@ -17,6 +17,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 from absl.testing import parameterized
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops.ragged import ragged_factory_ops
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 2bb9f9deee7..d8d2cb85de6 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -811,7 +811,7 @@ class BaseResourceVariable(variables.VariableV1):
       it will return the `Operation` that does the assignment, and when in eager
       mode it will return `None`.
     """
-    # Note: not depending on the cached value here since this can used to
+    # Note: not depending on the cached value here since this can be used to
     # initialize the variable.
     with _handle_graph(self.handle):
       value_tensor = ops.convert_to_tensor(value, dtype=self.dtype)
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 77a9670daf3..719b06a41d5 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -1237,7 +1237,7 @@ class MultiRNNCell(RNNCell):
     if not nest.is_sequence(cells):
       raise TypeError("cells must be a list or tuple, but saw: %s." % cells)
 
-    if len(set([id(cell) for cell in cells])) < len(cells):
+    if len(set(id(cell) for cell in cells)) < len(cells):
       logging.log_first_n(
           logging.WARN, "At least two cells provided to MultiRNNCell "
           "are the same object and will share weights.", 1)
diff --git a/tensorflow/python/ops/rnn_cell_wrapper_impl.py b/tensorflow/python/ops/rnn_cell_wrapper_impl.py
index 49d61c5b1e5..f2f17375fdd 100644
--- a/tensorflow/python/ops/rnn_cell_wrapper_impl.py
+++ b/tensorflow/python/ops/rnn_cell_wrapper_impl.py
@@ -210,7 +210,7 @@ class DropoutWrapperBase(object):
 
     # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
     binary_tensor = math_ops.floor(random_tensor)
-    ret = math_ops.div(value, keep_prob) * binary_tensor
+    ret = math_ops.divide(value, keep_prob) * binary_tensor
     ret.set_shape(value.get_shape())
     return ret
 
diff --git a/tensorflow/python/ops/signal/dct_ops.py b/tensorflow/python/ops/signal/dct_ops.py
index 2d87af7c920..d628e54cdf9 100644
--- a/tensorflow/python/ops/signal/dct_ops.py
+++ b/tensorflow/python/ops/signal/dct_ops.py
@@ -34,8 +34,8 @@ def _validate_dct_arguments(input_tensor, dct_type, n, axis, norm):
     raise NotImplementedError("axis must be -1. Got: %s" % axis)
   if n is not None and n < 1:
     raise ValueError("n should be a positive integer or None")
-  if dct_type not in (1, 2, 3):
-    raise ValueError("Only Types I, II and III (I)DCT are supported.")
+  if dct_type not in (1, 2, 3, 4):
+    raise ValueError("Types I, II, III and IV (I)DCT are supported.")
   if dct_type == 1:
     if norm == "ortho":
       raise ValueError("Normalization is not supported for the Type-I DCT.")
@@ -53,22 +53,26 @@ def _validate_dct_arguments(input_tensor, dct_type, n, axis, norm):
 def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disable=redefined-builtin
   """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`.
 
-  Currently only Types I, II and III are supported.
+  Types I, II, III and IV are supported.
   Type I is implemented using a length `2N` padded `tf.signal.rfft`.
   Type II is implemented using a length `2N` padded `tf.signal.rfft`, as
-  described here: [Type 2 DCT using 2N FFT padded (Makhoul)](https://dsp.stackexchange.com/a/10606).
+   described here: [Type 2 DCT using 2N FFT padded (Makhoul)]
+   (https://dsp.stackexchange.com/a/10606).
   Type III is a fairly straightforward inverse of Type II
-  (i.e. using a length `2N` padded `tf.signal.irfft`).
+   (i.e. using a length `2N` padded `tf.signal.irfft`).
+   Type IV is calculated through 2N length DCT2 of padded signal and
+  picking the odd indices.
 
   @compatibility(scipy)
-  Equivalent to [scipy.fftpack.dct](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html)
-   for Type-I, Type-II and Type-III DCT.
+  Equivalent to [scipy.fftpack.dct]
+   (https://docs.scipy.org/doc/scipy-1.4.0/reference/generated/scipy.fftpack.dct.html)
+   for Type-I, Type-II, Type-III and Type-IV DCT.
   @end_compatibility
 
   Args:
     input: A `[..., samples]` `float32`/`float64` `Tensor` containing the
       signals to take the DCT of.
-    type: The DCT type to perform. Must be 1, 2 or 3.
+    type: The DCT type to perform. Must be 1, 2, 3 or 4.
     n: The length of the transform. If length is less than sequence length,
       only the first n elements of the sequence are considered for the DCT.
       If n is greater than the sequence length, zeros are padded and then
@@ -83,7 +87,7 @@ def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disabl
     `input`.
 
   Raises:
-    ValueError: If `type` is not `1`, `2` or `3`, `axis` is
+    ValueError: If `type` is not `1`, `2`, `3` or `4`, `axis` is
       not `-1`, `n` is not `None` or greater than 0,
       or `norm` is not `None` or `'ortho'`.
     ValueError: If `type` is `1` and `norm` is `ortho`.
@@ -163,13 +167,24 @@ def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disabl
 
       return dct3
 
+    elif type == 4:
+      # DCT-2 of 2N length zero-padded signal, unnormalized.
+      dct2 = dct(input, type=2, n=2*axis_dim, axis=axis, norm=None)
+      # Get odd indices of DCT-2 of zero padded 2N signal to obtain
+      # DCT-4 of the original N length signal.
+      dct4 = dct2[..., 1::2]
+      if norm == "ortho":
+        dct4 *= _math.sqrt(0.5) * _math_ops.rsqrt(axis_dim_float)
+
+      return dct4
+
 
 # TODO(rjryan): Implement `n` and `axis` parameters.
 @tf_export("signal.idct", v1=["signal.idct", "spectral.idct"])
 def idct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disable=redefined-builtin
   """Computes the 1D [Inverse Discrete Cosine Transform (DCT)][idct] of `input`.
 
-  Currently only Types I, II and III are supported. Type III is the inverse of
+  Currently Types I, II, III, IV are supported. Type III is the inverse of
   Type II, and vice versa.
 
   Note that you must re-normalize by 1/(2n) to obtain an inverse if `norm` is
@@ -179,14 +194,15 @@ def idct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disab
   `signal == idct(dct(signal, norm='ortho'), norm='ortho')`.
 
   @compatibility(scipy)
-  Equivalent to [scipy.fftpack.idct](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.idct.html)
-   for Type-I, Type-II and Type-III DCT.
+  Equivalent to [scipy.fftpack.idct]
+   (https://docs.scipy.org/doc/scipy-1.4.0/reference/generated/scipy.fftpack.idct.html)
+   for Type-I, Type-II, Type-III and Type-IV DCT.
   @end_compatibility
 
   Args:
     input: A `[..., samples]` `float32`/`float64` `Tensor` containing the
       signals to take the DCT of.
-    type: The IDCT type to perform. Must be 1, 2 or 3.
+    type: The IDCT type to perform. Must be 1, 2, 3 or 4.
     n: For future expansion. The length of the transform. Must be `None`.
     axis: For future expansion. The axis to compute the DCT along. Must be `-1`.
     norm: The normalization to apply. `None` for no normalization or `'ortho'`
@@ -205,5 +221,5 @@ def idct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disab
   https://en.wikipedia.org/wiki/Discrete_cosine_transform#Inverse_transforms
   """
   _validate_dct_arguments(input, type, n, axis, norm)
-  inverse_type = {1: 1, 2: 3, 3: 2}[type]
+  inverse_type = {1: 1, 2: 3, 3: 2, 4: 4}[type]
   return dct(input, type=inverse_type, n=n, axis=axis, norm=norm, name=name)
diff --git a/tensorflow/python/ops/sobol_ops_test.py b/tensorflow/python/ops/sobol_ops_test.py
new file mode 100644
index 00000000000..3a9e52ad47d
--- /dev/null
+++ b/tensorflow/python/ops/sobol_ops_test.py
@@ -0,0 +1,83 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests Sobol sequence generator."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import googletest
+
+
+class SobolSampleOpTest(test_util.TensorFlowTestCase):
+
+  def test_basic(self):
+    for dtype in [np.float64, np.float32]:
+      expected = np.array([[.5, .5], [.75, .25], [.25, .75], [.375, .375]])
+      sample = self.evaluate(math_ops.sobol_sample(2, 4, dtype=dtype))
+      self.assertAllClose(expected, sample, 0.001)
+
+  def test_more_known_values(self):
+    for dtype in [np.float64, np.float32]:
+      sample = math_ops.sobol_sample(5, 31, dtype=dtype)
+      expected = [[0.50, 0.50, 0.50, 0.50, 0.50],
+                  [0.75, 0.25, 0.25, 0.25, 0.75],
+                  [0.25, 0.75, 0.75, 0.75, 0.25],
+                  [0.375, 0.375, 0.625, 0.875, 0.375],
+                  [0.875, 0.875, 0.125, 0.375, 0.875],
+                  [0.625, 0.125, 0.875, 0.625, 0.625],
+                  [0.125, 0.625, 0.375, 0.125, 0.125],
+                  [0.1875, 0.3125, 0.9375, 0.4375, 0.5625],
+                  [0.6875, 0.8125, 0.4375, 0.9375, 0.0625],
+                  [0.9375, 0.0625, 0.6875, 0.1875, 0.3125],
+                  [0.4375, 0.5625, 0.1875, 0.6875, 0.8125],
+                  [0.3125, 0.1875, 0.3125, 0.5625, 0.9375],
+                  [0.8125, 0.6875, 0.8125, 0.0625, 0.4375],
+                  [0.5625, 0.4375, 0.0625, 0.8125, 0.1875],
+                  [0.0625, 0.9375, 0.5625, 0.3125, 0.6875],
+                  [0.09375, 0.46875, 0.46875, 0.65625, 0.28125],
+                  [0.59375, 0.96875, 0.96875, 0.15625, 0.78125],
+                  [0.84375, 0.21875, 0.21875, 0.90625, 0.53125],
+                  [0.34375, 0.71875, 0.71875, 0.40625, 0.03125],
+                  [0.46875, 0.09375, 0.84375, 0.28125, 0.15625],
+                  [0.96875, 0.59375, 0.34375, 0.78125, 0.65625],
+                  [0.71875, 0.34375, 0.59375, 0.03125, 0.90625],
+                  [0.21875, 0.84375, 0.09375, 0.53125, 0.40625],
+                  [0.15625, 0.15625, 0.53125, 0.84375, 0.84375],
+                  [0.65625, 0.65625, 0.03125, 0.34375, 0.34375],
+                  [0.90625, 0.40625, 0.78125, 0.59375, 0.09375],
+                  [0.40625, 0.90625, 0.28125, 0.09375, 0.59375],
+                  [0.28125, 0.28125, 0.15625, 0.21875, 0.71875],
+                  [0.78125, 0.78125, 0.65625, 0.71875, 0.21875],
+                  [0.53125, 0.03125, 0.40625, 0.46875, 0.46875],
+                  [0.03125, 0.53125, 0.90625, 0.96875, 0.96875]]
+      self.assertAllClose(expected, self.evaluate(sample), .001)
+
+  def test_skip(self):
+    dim = 10
+    n = 50
+    skip = 17
+    for dtype in [np.float64, np.float32]:
+      sample_noskip = math_ops.sobol_sample(dim, n + skip, dtype=dtype)
+      sample_skip = math_ops.sobol_sample(dim, n, skip, dtype=dtype)
+
+      self.assertAllClose(
+          self.evaluate(sample_noskip)[skip:, :], self.evaluate(sample_skip))
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py
index f0a1cd8120c..686a6300bf6 100644
--- a/tensorflow/python/ops/special_math_ops.py
+++ b/tensorflow/python/ops/special_math_ops.py
@@ -33,7 +33,6 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tf2xla.ops import gen_xla_ops
-from tensorflow.python.compat import compat as fwd_compat
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
@@ -253,10 +252,7 @@ def einsum(equation, *inputs, **kwargs):
       - the format of `equation` is incorrect,
       - number of inputs or their shapes are inconsistent with `equation`.
   """
-  if fwd_compat.forward_compatible(2019, 10, 18):
-    return _einsum_v2(equation, *inputs, **kwargs)
-  else:
-    return _einsum_v1(equation, *inputs, **kwargs)
+  return _einsum_v2(equation, *inputs, **kwargs)
 
 
 def _einsum_v1(equation, *inputs, **kwargs):
@@ -360,8 +356,8 @@ def _einsum_v1_parse_and_resolve_equation(equation, input_shapes):
   # tensors of different length and unlabeled output.
   ellipsis_axes = ''
   if '...' in equation:
-    unused = ''.join([c for c in string.ascii_letters
-                      if c not in ''.join(input_axis_labels)])
+    unused = ''.join(
+        c for c in string.ascii_letters if c not in ''.join(input_axis_labels))
     for i, ax in enumerate(input_axis_labels):
       if '...' in ax:
         parts = ax.split('...')
@@ -381,7 +377,7 @@ def _einsum_v1_parse_and_resolve_equation(equation, input_shapes):
         if len(replace_axes) > len(ellipsis_axes):
           ellipsis_axes = replace_axes
 
-    if any(['.' in ax for ax in input_axis_labels]):
+    if any('.' in ax for ax in input_axis_labels):
       raise ValueError('period "." found outside of ellipsis')
 
     if output_axis_labels is not None:
diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py
index 7ae9e22858b..77136adc5b4 100644
--- a/tensorflow/python/ops/special_math_ops_test.py
+++ b/tensorflow/python/ops/special_math_ops_test.py
@@ -23,7 +23,6 @@ import opt_einsum
 import six
 
 from tensorflow.python.client import session
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -243,20 +242,19 @@ class EinsumTest(test.TestCase):
     self._check('abc->ca', (3, 4, 5))
     self._check('abc->cab', (3, 4, 5))
 
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # Empty cases.
-      self._check('', ())
-      self._check('->', ())
+    # Empty cases.
+    self._check('', ())
+    self._check('->', ())
 
-      # Repeated indices cases.
-      self._check('aa->', (3, 3))
-      self._check('aa->a', (3, 3))
-      self._check('aaa->', (3, 3, 3))
-      self._check('aaa->a', (3, 3, 3))
-      self._check('aab->a', (3, 3, 4))
-      self._check('aabcc->a', (3, 3, 5, 4, 4))
-      self._check('aabcc->ac', (3, 3, 5, 4, 4))
-      self._check('aabcd->ad', (3, 3, 5, 4, 4))
+    # Repeated indices cases.
+    self._check('aa->', (3, 3))
+    self._check('aa->a', (3, 3))
+    self._check('aaa->', (3, 3, 3))
+    self._check('aaa->a', (3, 3, 3))
+    self._check('aab->a', (3, 3, 4))
+    self._check('aabcc->a', (3, 3, 5, 4, 4))
+    self._check('aabcc->ac', (3, 3, 5, 4, 4))
+    self._check('aabcd->ad', (3, 3, 5, 4, 4))
 
   def test_unary_ellipsis(self):
     self._check('...->', ())
@@ -266,17 +264,16 @@ class EinsumTest(test.TestCase):
     self._check('...ij->...ji', (5, 2, 3))  # batch matrix transpose
     self._check('...ij->...', (5, 2, 3))  # batch sum
 
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      self._check('...->...', ())
-      self._check('->...', ())
+    self._check('...->...', ())
+    self._check('->...', ())
 
-      # Repeated indices.
-      self._check('i...ii->...i', (3, 2, 3, 3))
-      self._check('i...i->i...', (2, 2))
-      self._check('i...i->', (2, 2))
-      self._check('i...i->...', (2, 5, 1, 2))
-      self._check('i...i->i...', (2, 1, 2))
-      self._check('i...i->i...', (2, 3, 4, 5, 2))
+    # Repeated indices.
+    self._check('i...ii->...i', (3, 2, 3, 3))
+    self._check('i...i->i...', (2, 2))
+    self._check('i...i->', (2, 2))
+    self._check('i...i->...', (2, 5, 1, 2))
+    self._check('i...i->i...', (2, 1, 2))
+    self._check('i...i->i...', (2, 3, 4, 5, 2))
 
   def test_binary_simple(self):
     # Binary cases in XLA mode must have either (a) each index appearing exactly
@@ -301,13 +298,12 @@ class EinsumTest(test.TestCase):
     self._check('ab,ab->', (3, 4), (3, 4))
 
   def test_repeated_indices(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # Repeated indices.
-      self._check('ijj,k->ik', (2, 3, 3), (4,))
-      self._check('aba,a->b', (3, 4, 3), (3,))
-      # From https://github.com/dask/dask/pull/3412#discussion_r182413444
-      self._check('aab,bc->ac', (2, 2, 3), (3, 4))
-      self._check('aab,bcc->ac', (2, 2, 3), (3, 4, 4))
+    # Repeated indices.
+    self._check('ijj,k->ik', (2, 3, 3), (4,))
+    self._check('aba,a->b', (3, 4, 3), (3,))
+    # From https://github.com/dask/dask/pull/3412#discussion_r182413444
+    self._check('aab,bc->ac', (2, 2, 3), (3, 4))
+    self._check('aab,bcc->ac', (2, 2, 3), (3, 4, 4))
 
   def test_binary_ellipsis(self):
     # Batch matmul with ellipsis but without broadcasting.
@@ -324,23 +320,22 @@ class EinsumTest(test.TestCase):
     self._check('...i,...j->...ij', (5, 2), (5, 3))  # outer product
 
   def test_broadcasting(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # Batch matmul with broadcasting.
-      self._check('...ij,...jk->...ik', (1, 2, 3), (3, 5))
-      self._check('...ij,...jk->...ik', (2, 3), (1, 3, 5))
-      self._check('...ij,...jk->...ik', (5, 2, 3), (3, 5))
-      self._check('...ij,...jk->...ik', (2, 3), (5, 3, 5))
-      self._check('...ij,...jk->...ik', (3, 1, 2, 3), (1, 1, 7, 3, 5))
-      self._check('i...j,j...k->...ik', (2, 1, 3, 1, 3), (3, 1, 7, 5))
+    # Batch matmul with broadcasting.
+    self._check('...ij,...jk->...ik', (1, 2, 3), (3, 5))
+    self._check('...ij,...jk->...ik', (2, 3), (1, 3, 5))
+    self._check('...ij,...jk->...ik', (5, 2, 3), (3, 5))
+    self._check('...ij,...jk->...ik', (2, 3), (5, 3, 5))
+    self._check('...ij,...jk->...ik', (3, 1, 2, 3), (1, 1, 7, 3, 5))
+    self._check('i...j,j...k->...ik', (2, 1, 3, 1, 3), (3, 1, 7, 5))
 
-      # Broadcasting with repeated indices.
-      self._check('ij,jk...k->i...', (3, 2), (2, 4, 1, 4))
-      self._check('ij,jk...k->...i', (3, 2), (2, 4, 5, 4))
-      self._check('ijj,jk...k->i...', (3, 2, 2), (2, 4, 1, 4))
-      self._check('i...jj,jk...k->i...', (3, 3, 1, 2, 2), (2, 4, 1, 5, 4))
-      # Following 2 from https://stackoverflow.com/a/19203475/1611416
-      self._check('...abc,...abcd->...d', (1, 1, 2, 3, 4), (5, 2, 3, 4, 6))
-      self._check('ab...,b->ab...', (2, 3, 1, 1, 5), (3,))
+    # Broadcasting with repeated indices.
+    self._check('ij,jk...k->i...', (3, 2), (2, 4, 1, 4))
+    self._check('ij,jk...k->...i', (3, 2), (2, 4, 5, 4))
+    self._check('ijj,jk...k->i...', (3, 2, 2), (2, 4, 1, 4))
+    self._check('i...jj,jk...k->i...', (3, 3, 1, 2, 2), (2, 4, 1, 5, 4))
+    # Following 2 from https://stackoverflow.com/a/19203475/1611416
+    self._check('...abc,...abcd->...d', (1, 1, 2, 3, 4), (5, 2, 3, 4, 6))
+    self._check('ab...,b->ab...', (2, 3, 1, 1, 5), (3,))
 
   def test_dtypes(self):
     dtypes = []
@@ -388,22 +383,20 @@ class EinsumTest(test.TestCase):
           ((4, 3), (None, 3)))
 
     # Ellipsis with unknown rank.
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      check('bijl,bjkm->bik', ((9, 2, 3, 5), None), ((9, 3, 4, 7), None))
-      check('...ij,...jk->...ik', ((3, 1, 2, 3), None), ((1, 7, 3, 4), None))
+    check('bijl,bjkm->bik', ((9, 2, 3, 5), None), ((9, 3, 4, 7), None))
+    check('...ij,...jk->...ik', ((3, 1, 2, 3), None), ((1, 7, 3, 4), None))
 
   def test_numpy_input(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # In addition to Tensors, we also support raw numpy arrays as inputs.
-      r = np.random.RandomState(0)
-      s = 'ijk,ijl,ikl->i'
-      x = r.randn(1, 2, 3)
-      y = r.randn(1, 2, 4)
-      z = r.randn(1, 3, 4)
+    # In addition to Tensors, we also support raw numpy arrays as inputs.
+    r = np.random.RandomState(0)
+    s = 'ijk,ijl,ikl->i'
+    x = r.randn(1, 2, 3)
+    y = r.randn(1, 2, 4)
+    z = r.randn(1, 3, 4)
 
-      a = np.einsum(s, x, y, z)
-      b = self.evaluate(special_math_ops.einsum(s, x, y, z))
-      self.assertAllClose(a, b, atol=1e-4, rtol=1e-4)
+    a = np.einsum(s, x, y, z)
+    b = self.evaluate(special_math_ops.einsum(s, x, y, z))
+    self.assertAllClose(a, b, atol=1e-4, rtol=1e-4)
 
   def test_long_cases(self):
     cases = [
@@ -464,58 +457,56 @@ class EinsumTest(test.TestCase):
 
   @test_util.disable_xla('b/131919749')
   def test_long_cases_with_repeated_labels(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      cases = [
-          # Tests from dask.
-          'fdf,cdd,ccd,afe->ae',
-          'fff,fae,bef,def->abd',
-      ]
-      dimension_map = dict((c, ord(c) - ord('a') + 1) for c in 'abcdefghij')
-      for equation in cases:
-        inputs = equation.split('->')[0].replace(' ', '')
-        input_shapes = []
-        for input_str in inputs.split(','):
-          input_shapes.append(tuple([dimension_map[c] for c in input_str]))
-        self._check(equation, *input_shapes)
+    cases = [
+        # Tests from dask.
+        'fdf,cdd,ccd,afe->ae',
+        'fff,fae,bef,def->abd',
+    ]
+    dimension_map = dict((c, ord(c) - ord('a') + 1) for c in 'abcdefghij')
+    for equation in cases:
+      inputs = equation.split('->')[0].replace(' ', '')
+      input_shapes = []
+      for input_str in inputs.split(','):
+        input_shapes.append(tuple([dimension_map[c] for c in input_str]))
+      self._check(equation, *input_shapes)
 
   @test_util.disable_xla('b/131919749')
   @test_util.run_in_graph_and_eager_modes
   def test_invalid_equation(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      r = np.random.RandomState(0)
-      cases = [
-          # invalid equation format.
-          ('a0->a', r.randn(5, 3)),
-          ('a->a,a', r.randn(5)),
-          ('a->a->a', r.randn(5)),
-          ('ijk ijk', r.randn(1, 2, 3), r.randn(1, 2, 3)),
-          ('ij.jk->ik', r.randn(2, 3), r.randn(3, 4)),
-          # output label not present in input.
-          ('a->b', r.randn(5)),
-          ('ij,jk->im', r.randn(2, 3), r.randn(3, 4)),
-          # wrong shape.
-          ('ij,jk->ik', r.randn(1, 2, 3), r.randn(3, 4)),
-          # inconsistent dimensions.
-          ('ij,jk->ik', r.randn(2, 3), r.randn(4, 4)),
-          # output has repeated subscripts.
-          ('ij,jk->iik', r.randn(2, 3), r.randn(3, 4)),
-          # too many ellipses
-          ('...ij...,jk...->ik...', r.randn(2, 3), r.randn(3, 4)),
-          ('...ij,jk...->...ik...', r.randn(2, 3), r.randn(3, 4)),
-          # invalid broadcast dimensions.
-          ('...ij,...jk->...ik', r.randn(5, 2, 3), r.randn(7, 3, 4)),
-          # output should have ellipsis when broadcasting shape is non-empty.
-          ('...ij,...jk->ik', r.randn(2, 2, 3), r.randn(3, 4)),
-      ]
-      for args in cases:
-        with self.assertRaises((ValueError, errors.InvalidArgumentError)):
-          _ = special_math_ops.einsum(*args)
+    r = np.random.RandomState(0)
+    cases = [
+        # invalid equation format.
+        ('a0->a', r.randn(5, 3)),
+        ('a->a,a', r.randn(5)),
+        ('a->a->a', r.randn(5)),
+        ('ijk ijk', r.randn(1, 2, 3), r.randn(1, 2, 3)),
+        ('ij.jk->ik', r.randn(2, 3), r.randn(3, 4)),
+        # output label not present in input.
+        ('a->b', r.randn(5)),
+        ('ij,jk->im', r.randn(2, 3), r.randn(3, 4)),
+        # wrong shape.
+        ('ij,jk->ik', r.randn(1, 2, 3), r.randn(3, 4)),
+        # inconsistent dimensions.
+        ('ij,jk->ik', r.randn(2, 3), r.randn(4, 4)),
+        # output has repeated subscripts.
+        ('ij,jk->iik', r.randn(2, 3), r.randn(3, 4)),
+        # too many ellipses
+        ('...ij...,jk...->ik...', r.randn(2, 3), r.randn(3, 4)),
+        ('...ij,jk...->...ik...', r.randn(2, 3), r.randn(3, 4)),
+        # invalid broadcast dimensions.
+        ('...ij,...jk->...ik', r.randn(5, 2, 3), r.randn(7, 3, 4)),
+        # output should have ellipsis when broadcasting shape is non-empty.
+        ('...ij,...jk->ik', r.randn(2, 2, 3), r.randn(3, 4)),
+    ]
+    for args in cases:
+      with self.assertRaises((ValueError, errors.InvalidArgumentError)):
+        _ = special_math_ops.einsum(*args)
 
-        placeholders = [
-            array_ops.placeholder_with_default(x, shape=None) for x in args[1:]
-        ]
-        with self.assertRaises((ValueError, errors.InvalidArgumentError)):
-          _ = self.evaluate(special_math_ops.einsum(args[0], *placeholders))
+      placeholders = [
+          array_ops.placeholder_with_default(x, shape=None) for x in args[1:]
+      ]
+      with self.assertRaises((ValueError, errors.InvalidArgumentError)):
+        _ = self.evaluate(special_math_ops.einsum(args[0], *placeholders))
 
   @test_util.disable_xla('b/131919749')
   def test_empty(self):
@@ -535,10 +526,9 @@ class EinsumTest(test.TestCase):
     # From transformer xl.
     check('ibnd,ijbn->jnd', [(1, 0, 5, 10), (1, 1, 0, 5)], (1, 5, 10))
 
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # Generalized traces with zero-sized dimensions.
-      check('aab,bc->ac', [(0, 0, 10), (10, 10)], (0, 10))
-      check('aaab,bc->c', [(0, 0, 0, 3), (3, 4)], (4,))
+    # Generalized traces with zero-sized dimensions.
+    check('aab,bc->ac', [(0, 0, 10), (10, 10)], (0, 10))
+    check('aaab,bc->c', [(0, 0, 0, 3), (3, 4)], (4,))
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -556,122 +546,112 @@ class EinsumGradTest(test.TestCase):
 
   @test_util.disable_xla('b/131919749')
   def test_unary(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      self._check_gradient('->', ())
-      self._check_gradient('aaa->a', (3, 3, 3))
-      self._check_gradient('aabcd->ad', (3, 3, 5, 4, 4))
-      self._check_gradient('abcd->da', (3, 5, 4, 2))
+    self._check_gradient('->', ())
+    self._check_gradient('aaa->a', (3, 3, 3))
+    self._check_gradient('aabcd->ad', (3, 3, 5, 4, 4))
+    self._check_gradient('abcd->da', (3, 5, 4, 2))
 
   @test_util.disable_xla('b/131919749')
   def test_unary_ellipsis(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      self._check_gradient('...->...', ())
-      self._check_gradient('...->', ())
-      self._check_gradient('->...', ())
+    self._check_gradient('...->...', ())
+    self._check_gradient('...->', ())
+    self._check_gradient('->...', ())
 
-      # Tests from dask
-      self._check_gradient('a...a->a...', (2, 2))
-      self._check_gradient('a...a->', (2, 2))
-      self._check_gradient('a...a->...', (2, 5, 1, 2))
-      self._check_gradient('a...a->a...', (2, 1, 2))
-      self._check_gradient('a...a->a...', (2, 3, 4, 5, 2))
+    # Tests from dask
+    self._check_gradient('a...a->a...', (2, 2))
+    self._check_gradient('a...a->', (2, 2))
+    self._check_gradient('a...a->...', (2, 5, 1, 2))
+    self._check_gradient('a...a->a...', (2, 1, 2))
+    self._check_gradient('a...a->a...', (2, 3, 4, 5, 2))
 
-      self._check_gradient('...ijk->...ki', (3, 4, 5))
-      self._check_gradient('...ijk->...ki', (1, 3, 4, 5))
-      self._check_gradient('...ijk->...ki', (2, 2, 3, 4, 5))
-      self._check_gradient('ab...cd->da...', (3, 5, 2, 3, 4, 2))
+    self._check_gradient('...ijk->...ki', (3, 4, 5))
+    self._check_gradient('...ijk->...ki', (1, 3, 4, 5))
+    self._check_gradient('...ijk->...ki', (2, 2, 3, 4, 5))
+    self._check_gradient('ab...cd->da...', (3, 5, 2, 3, 4, 2))
 
   def test_binary_simple(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # Binary cases in XLA mode must have either (a) each index appearing
-      # exactly once in both the inputs (batch or contraction index), or
-      # (b) appearing exactly once in an input and in the output (free index).
-      self._check_gradient(',->', (), ())
-      self._check_gradient('a,a->', (3,), (3,))
-      self._check_gradient('a,a->a', (3,), (3,))
-      self._check_gradient('ab,b->a', (3, 4), (4,))
-      self._check_gradient('ab,ab->', (3, 4), (3, 4))
-      self._check_gradient('ab,bc->ac', (3, 4), (4, 5))
-      self._check_gradient('nij,jk->nik', (5, 2, 3), (3, 4))
-      self._check_gradient('abc,bad->abcd', (1, 2, 3), (2, 1, 4))
-      # Based on https://github.com/google/jax/issues/37#issuecomment-448572187
-      self._check_gradient('sa,shb->shab', (2, 1), (2, 3, 4))
+    # Binary cases in XLA mode must have either (a) each index appearing
+    # exactly once in both the inputs (batch or contraction index), or
+    # (b) appearing exactly once in an input and in the output (free index).
+    self._check_gradient(',->', (), ())
+    self._check_gradient('a,a->', (3,), (3,))
+    self._check_gradient('a,a->a', (3,), (3,))
+    self._check_gradient('ab,b->a', (3, 4), (4,))
+    self._check_gradient('ab,ab->', (3, 4), (3, 4))
+    self._check_gradient('ab,bc->ac', (3, 4), (4, 5))
+    self._check_gradient('nij,jk->nik', (5, 2, 3), (3, 4))
+    self._check_gradient('abc,bad->abcd', (1, 2, 3), (2, 1, 4))
+    # Based on https://github.com/google/jax/issues/37#issuecomment-448572187
+    self._check_gradient('sa,shb->shab', (2, 1), (2, 3, 4))
 
   def test_empty(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # From Transformer XL.
-      self._check_gradient('ibnd,ijbn->jnd', (1, 0, 5, 10), (1, 1, 0, 5))
+    # From Transformer XL.
+    self._check_gradient('ibnd,ijbn->jnd', (1, 0, 5, 10), (1, 1, 0, 5))
 
   @test_util.disable_xla('b/131919749')
   def test_reduced_indices(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      self._check_gradient('ba,b->', (3, 2), (3,))
-      self._check_gradient('ab,ab->', (3, 4), (3, 4))
-      self._check_gradient('abce,badf->abcd', (1, 2, 3, 4), (2, 1, 4, 3))
+    self._check_gradient('ba,b->', (3, 2), (3,))
+    self._check_gradient('ab,ab->', (3, 4), (3, 4))
+    self._check_gradient('abce,badf->abcd', (1, 2, 3, 4), (2, 1, 4, 3))
 
   @test_util.disable_xla('b/131919749')
   def test_repeated_indices(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      # Repeated indices.
-      self._check_gradient('aba,a->b', (3, 4, 3), (3,))
-      self._check_gradient('ijj,k->ik', (2, 3, 3), (4,))
-      self._check_gradient('ill,k->ik', (2, 3, 3), (4,))
-      # From https://github.com/dask/dask/pull/3412#discussion_r182413444
-      self._check_gradient('aab,bc->ac', (1, 1, 3), (3, 4))
-      self._check_gradient('aab,bcc->ac', (2, 2, 3), (3, 4, 4))
+    # Repeated indices.
+    self._check_gradient('aba,a->b', (3, 4, 3), (3,))
+    self._check_gradient('ijj,k->ik', (2, 3, 3), (4,))
+    self._check_gradient('ill,k->ik', (2, 3, 3), (4,))
+    # From https://github.com/dask/dask/pull/3412#discussion_r182413444
+    self._check_gradient('aab,bc->ac', (1, 1, 3), (3, 4))
+    self._check_gradient('aab,bcc->ac', (2, 2, 3), (3, 4, 4))
 
   @test_util.disable_xla('b/131919749')
   def test_empty_with_repeated_indices(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      self._check_gradient('aab,bc->ac', (0, 0, 10), (10, 10))
-      self._check_gradient('aab,bc->ac', (1, 1, 0), (0, 10))
-      self._check_gradient('aaab,bc->c', (0, 0, 0, 3), (3, 4))
+    self._check_gradient('aab,bc->ac', (0, 0, 10), (10, 10))
+    self._check_gradient('aab,bc->ac', (1, 1, 0), (0, 10))
+    self._check_gradient('aaab,bc->c', (0, 0, 0, 3), (3, 4))
 
   @test_util.disable_xla('b/131919749')
   def test_broadcasting(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      self._check_gradient('...ij,...jk->...ik', (3, 2), (2, 4))
-      self._check_gradient('ij...,jk...->ik...', (3, 2, 1), (2, 4))
-      self._check_gradient('...ij,...jk->...ik', (3, 1, 3, 2), (1, 5, 2, 4))
-      self._check_gradient('ij,jk...k->i...', (3, 2), (2, 4, 1, 4))
-      self._check_gradient('aab,b...c->a...c', (1, 1, 3), (3, 1, 1, 4))
-      # Tests from dask.
-      self._check_gradient('...i,...j,...k->...ijk', (1, 4, 1, 2), (5, 1, 1, 3),
-                           (1, 1, 1, 1, 9))
-      self._check_gradient('...i,...j,...k->...ijk', (1,), (1,), (1,))
+    self._check_gradient('...ij,...jk->...ik', (3, 2), (2, 4))
+    self._check_gradient('ij...,jk...->ik...', (3, 2, 1), (2, 4))
+    self._check_gradient('...ij,...jk->...ik', (3, 1, 3, 2), (1, 5, 2, 4))
+    self._check_gradient('ij,jk...k->i...', (3, 2), (2, 4, 1, 4))
+    self._check_gradient('aab,b...c->a...c', (1, 1, 3), (3, 1, 1, 4))
+    # Tests from dask.
+    self._check_gradient('...i,...j,...k->...ijk', (1, 4, 1, 2), (5, 1, 1, 3),
+                         (1, 1, 1, 1, 9))
+    self._check_gradient('...i,...j,...k->...ijk', (1,), (1,), (1,))
 
   def test_long_cases(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      cases = [
-          'abhe,hidj,jgba,hiab,gab->ed',
-          # Tests from dask.
-          'ea,fb,abcd,gc,hd->efgh',
-      ]
-      dimension_map = dict(
-          (c, ((ord(c) - ord('a')) % 3) + 1) for c in 'abcdefghij')
-      for equation in cases:
-        inputs = equation.split('->')[0].replace(' ', '')
-        input_shapes = []
-        for input_str in inputs.split(','):
-          input_shapes.append(tuple([dimension_map[c] for c in input_str]))
-        self._check_gradient(equation, *input_shapes)
+    cases = [
+        'abhe,hidj,jgba,hiab,gab->ed',
+        # Tests from dask.
+        'ea,fb,abcd,gc,hd->efgh',
+    ]
+    dimension_map = dict(
+        (c, ((ord(c) - ord('a')) % 3) + 1) for c in 'abcdefghij')
+    for equation in cases:
+      inputs = equation.split('->')[0].replace(' ', '')
+      input_shapes = []
+      for input_str in inputs.split(','):
+        input_shapes.append(tuple([dimension_map[c] for c in input_str]))
+      self._check_gradient(equation, *input_shapes)
 
   @test_util.disable_xla('b/131919749')
   def test_long_cases_with_repeated_labels(self):
-    with compat.forward_compatibility_horizon(2019, 10, 19):
-      cases = [
-          # Tests from dask.
-          'fdf,cdd,ccd,afe->ae',
-          'fff,fae,bef,def->abd',
-      ]
-      dimension_map = dict(
-          (c, ((ord(c) - ord('a')) % 3) + 1) for c in 'abcdefghij')
-      for equation in cases:
-        inputs = equation.split('->')[0].replace(' ', '')
-        input_shapes = []
-        for input_str in inputs.split(','):
-          input_shapes.append(tuple([dimension_map[c] for c in input_str]))
-        self._check_gradient(equation, *input_shapes)
+    cases = [
+        # Tests from dask.
+        'fdf,cdd,ccd,afe->ae',
+        'fff,fae,bef,def->abd',
+    ]
+    dimension_map = dict(
+        (c, ((ord(c) - ord('a')) % 3) + 1) for c in 'abcdefghij')
+    for equation in cases:
+      inputs = equation.split('->')[0].replace(' ', '')
+      input_shapes = []
+      for input_str in inputs.split(','):
+        input_shapes.append(tuple([dimension_map[c] for c in input_str]))
+      self._check_gradient(equation, *input_shapes)
 
 
 class EinsumBenchmark(test.Benchmark):
diff --git a/tensorflow/python/ops/stateful_random_ops.py b/tensorflow/python/ops/stateful_random_ops.py
index ccb9b3be308..95f1ba54475 100644
--- a/tensorflow/python/ops/stateful_random_ops.py
+++ b/tensorflow/python/ops/stateful_random_ops.py
@@ -18,9 +18,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import sys
+import enum  # pylint: disable=g-bad-import-order
 
 import numpy as np
+import six
 
 from tensorflow.python.distribute import distribution_strategy_context as ds_context
 from tensorflow.python.eager import context
@@ -58,15 +59,21 @@ SEED_SIZE = 16  # in units of SEED_TYPE
 
 STATE_TYPE = SEED_TYPE
 ALGORITHM_TYPE = STATE_TYPE
-RNG_ALG_PHILOX = 1
-RNG_ALG_THREEFRY = 2
-DEFAULT_ALGORITHM = RNG_ALG_PHILOX
-
-
 PHILOX_STATE_SIZE = 3
 THREEFRY_STATE_SIZE = 2
 
 
+@tf_export("random.Algorithm", "random.experimental.Algorithm")
+class Algorithm(enum.Enum):
+  PHILOX = 1
+  THREEFRY = 2
+
+
+RNG_ALG_PHILOX = Algorithm.PHILOX.value
+RNG_ALG_THREEFRY = Algorithm.THREEFRY.value
+DEFAULT_ALGORITHM = RNG_ALG_PHILOX
+
+
 def non_deterministic_ints(shape, dtype=dtypes.int64):
   """Non-deterministically generates some integers.
 
@@ -100,8 +107,7 @@ def _make_1d_state(state_size, seed):
   Returns:
     a 1-D tensor of shape [state_size] and dtype STATE_TYPE.
   """
-  int_types = (int,) if sys.version_info >= (3, 0) else (int, long)
-  if isinstance(seed, int_types):
+  if isinstance(seed, six.integer_types):
     # chop the Python integer (infinite precision) into chunks of SEED_TYPE
     ls = []
     for _ in range(state_size):
@@ -149,18 +155,56 @@ def _make_state_from_seed(seed, alg):
   return _make_1d_state(_get_state_size(alg), seed)
 
 
-@tf_export("random.experimental.create_rng_state")
-def create_rng_state(seed, algorithm):
-  """Creates a RNG state.
+def _convert_alg_to_int(alg):
+  """Converts algorithm to an integer.
 
   Args:
-    seed: an integer or 1-D tensor.
-    algorithm: an integer representing the RNG algorithm.
+    alg: can be one of these types: integer, Algorithm, Tensor, string. Allowed
+      strings are "philox" and "threefry".
 
   Returns:
-    a 1-D tensor whose size depends on the algorithm.
+    An integer, unless the input is a Tensor in which case a Tensor is returned.
   """
-  return _make_state_from_seed(seed, algorithm)
+  if isinstance(alg, six.integer_types):
+    return alg
+  if isinstance(alg, Algorithm):
+    return alg.value
+  if isinstance(alg, ops.Tensor):
+    return alg
+  if isinstance(alg, str):
+    if alg == "philox":
+      return RNG_ALG_PHILOX
+    elif alg == "threefry":
+      return RNG_ALG_THREEFRY
+    else:
+      raise ValueError("Unknown algorithm name: %s" % alg)
+  else:
+    raise TypeError("Can't convert algorithm %s of type %s to int" %
+                    (alg, type(alg)))
+
+
+@tf_export("random.create_rng_state", "random.experimental.create_rng_state")
+def create_rng_state(seed, alg):
+  """Creates a RNG state from an integer or a vector.
+
+  Example:
+
+  >>> tf.random.create_rng_state(
+  ...     1234, "philox")
+  array([1234,    0,    0])
+  >>> tf.random.create_rng_state(
+  ...     [12, 34], "threefry")
+  array([12, 34])
+
+  Args:
+    seed: an integer or 1-D numpy array.
+    alg: the RNG algorithm. Can be a string, an `Algorithm` or an integer.
+
+  Returns:
+    a 1-D numpy array whose size depends on the algorithm.
+  """
+  alg = _convert_alg_to_int(alg)
+  return _make_state_from_seed(seed, alg)
 
 
 def _shape_tensor(shape):
@@ -227,24 +271,66 @@ def _create_variable(*args, **kwargs):
     raise ValueError(
         "Creating a generator within a strategy scope is disallowed, because "
         "there is ambiguity on how to replicate a generator (e.g. should it be "
-        "copied so such each replica will get the same random numbers, or "
-        "should it be 'split' into different generators that generate "
-        "different random numbers).")
+        "copied so that each replica gets the same random numbers, or 'split' "
+        "so that each replica gets different random numbers).")
     # TODO(wangpeng): Link to the RNG guide for solutions in such cases.
   var = variables.Variable(*args, **kwargs)
   return var
 
 
-@tf_export("random.experimental.Generator")
+@tf_export("random.Generator", "random.experimental.Generator")
 class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
   """Random-number generator.
 
-  It uses Variable to manage its internal state, and allows choosing an
-  Random-Number-Generation (RNG) algorithm.
+  Example:
+
+  Creating a generator from a seed:
+
+  >>> g = tf.random.Generator.from_seed(1234)
+  >>> g.normal(shape=(2, 3))
+  <tf.Tensor: shape=(2, 3), dtype=float32, numpy=
+  array([[ 0.9356609 ,  1.0854305 , -0.93788373],
+         [-0.5061547 ,  1.3169702 ,  0.7137579 ]], dtype=float32)>
+
+  Creating a generator from a non-deterministic state:
+
+  >>> g = tf.random.Generator.from_non_deterministic_state()
+  >>> g.normal(shape=(2, 3))
+  <tf.Tensor: shape=(2, 3), dtype=float32, numpy=...>
+
+  All the constructors allow explicitly choosing an Random-Number-Generation
+  (RNG) algorithm. Supported algorithms are `"philox"` and `"threefry"`. For
+  example:
+
+  >>> g = tf.random.Generator.from_seed(123, alg="philox")
+  >>> g.normal(shape=(2, 3))
+  <tf.Tensor: shape=(2, 3), dtype=float32, numpy=
+  array([[ 0.8673864 , -0.29899067, -0.9310337 ],
+         [-1.5828488 ,  1.2481191 , -0.6770643 ]], dtype=float32)>
 
   CPU, GPU and TPU with the same algorithm and seed will generate the same
   integer random numbers. Float-point results (such as the output of `normal`)
-  may have small numerical discrepancies between CPU and GPU.
+  may have small numerical discrepancies between different devices.
+
+  This class uses a `tf.Variable` to manage its internal state. Every time
+  random numbers are generated, the state of the generator will change. For
+  example:
+
+  >>> g = tf.random.Generator.from_seed(1234)
+  >>> g.state
+  <tf.Variable ... numpy=array([1234,    0,    0])>
+  >>> g.normal(shape=(2, 3))
+  <...>
+  >>> g.state
+  <tf.Variable ... numpy=array([2770,    0,    0])>
+
+  The shape of the state is algorithm-specific.
+
+  There is also a global generator:
+
+  >>> g = tf.random.get_global_generator()
+  >>> g.normal(shape=(2, 3))
+  <tf.Tensor: shape=(2, 3), dtype=float32, numpy=...>
   """
 
   def __init__(self, copy_from=None, state=None, alg=None):
@@ -263,11 +349,13 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
         RNG, whose length and semantics are algorithm-specific. If it's a
         variable, the generator will reuse it instead of creating a new
         variable.
-      alg: the RNG algorithm. Possible values are `RNG_ALG_PHILOX` for the
-        Philox algorithm and `RNG_ALG_THREEFRY` for the ThreeFry
-        algorithm (see paper 'Parallel Random Numbers: As Easy as 1, 2, 3'
+      alg: the RNG algorithm. Possible values are
+        `tf.random.Algorithm.PHILOX` for the Philox algorithm and
+        `tf.random.Algorithm.THREEFRY` for the ThreeFry algorithm
+        (see paper 'Parallel Random Numbers: As Easy as 1, 2, 3'
         [https://www.thesalmons.org/john/random123/papers/random123sc11.pdf]).
-        Note `RNG_ALG_PHILOX` guarantees the same numbers are produced (given
+        The string names `"philox"` and `"threefry"` can also be used.
+        Note `PHILOX` guarantees the same numbers are produced (given
         the same random state) across all architextures (CPU, GPU, XLA etc).
 
     Throws:
@@ -287,6 +375,7 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
 
     else:
       assert alg is not None and state is not None
+      alg = _convert_alg_to_int(alg)
       if isinstance(state, variables.Variable):
         _check_state_shape(state.shape, alg)
         self._state_var = state
@@ -350,6 +439,7 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
     if alg is None:
       # TODO(wangpeng): more sophisticated algorithm selection
       alg = DEFAULT_ALGORITHM
+    alg = _convert_alg_to_int(alg)
     state = create_rng_state(seed, alg)
     return cls(state=state, alg=alg)
 
@@ -377,6 +467,7 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
     if alg is None:
       # TODO(wangpeng): more sophisticated algorithm selection
       alg = DEFAULT_ALGORITHM
+    alg = _convert_alg_to_int(alg)
     state = non_deterministic_ints(shape=[_get_state_size(alg)],
                                    dtype=SEED_TYPE)
     return cls(state=state, alg=alg)
@@ -408,6 +499,7 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
     """
     counter = _convert_to_state_tensor(counter)
     key = _convert_to_state_tensor(key)
+    alg = _convert_alg_to_int(alg)
     counter.shape.assert_is_compatible_with([_get_state_size(alg) - 1])
     key.shape.assert_is_compatible_with([])
     key = array_ops.reshape(key, [1])
@@ -466,7 +558,7 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
 
   @property
   def algorithm(self):
-    """The RNG algorithm."""
+    """The RNG algorithm id (a Python integer or scalar integer Tensor)."""
     return self._alg
 
   def _standard_normal(self, shape, dtype):
@@ -663,14 +755,14 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
     # Probability of success.
     probs = [0.8]
 
-    rng = tf.random.experimental.Generator.from_seed(seed=234)
+    rng = tf.random.Generator.from_seed(seed=234)
     binomial_samples = rng.binomial(shape=[2], counts=counts, probs=probs)
 
 
     counts = ... # Shape [3, 1, 2]
     probs = ...  # Shape [1, 4, 2]
     shape = [3, 4, 3, 4, 2]
-    rng = tf.random.experimental.Generator.from_seed(seed=1717)
+    rng = tf.random.Generator.from_seed(seed=1717)
     # Sample shape will be [3, 4, 3, 4, 2]
     binomial_samples = rng.binomial(shape=shape, counts=counts, probs=probs)
     ```
@@ -804,8 +896,19 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor):
 global_generator = None
 
 
-@tf_export("random.experimental.get_global_generator")
+@tf_export("random.get_global_generator",
+           "random.experimental.get_global_generator")
 def get_global_generator():
+  """Retrieves the global generator.
+
+  This function will create the global generator the first time it is called,
+  and the generator will be placed at the default device at that time, so one
+  needs to be careful when this function is first called. Using a generator
+  placed on a less-ideal device will incur performance regression.
+
+  Returns:
+    The global `tf.random.Generator` object.
+  """
   global global_generator
   if global_generator is None:
     with ops.init_scope():
@@ -813,7 +916,8 @@ def get_global_generator():
   return global_generator
 
 
-@tf_export("random.experimental.set_global_generator")
+@tf_export("random.set_global_generator",
+           "random.experimental.set_global_generator")
 def set_global_generator(generator):
   """Replaces the global generator with another `Generator` object.
 
diff --git a/tensorflow/python/ops/stateful_random_ops_test.py b/tensorflow/python/ops/stateful_random_ops_test.py
index 8440a1c6eeb..3526ab4cb3b 100644
--- a/tensorflow/python/ops/stateful_random_ops_test.py
+++ b/tensorflow/python/ops/stateful_random_ops_test.py
@@ -197,6 +197,17 @@ class StatefulRandomOpsTest(test.TestCase, parameterized.TestCase):
       check_results(expected_normal1, f(constructor))
       check_results(expected_normal2, f(constructor))
 
+  @parameterized.parameters([
+      ("philox", random.RNG_ALG_PHILOX, random.Algorithm.PHILOX),
+      ("threefry", random.RNG_ALG_THREEFRY, random.Algorithm.THREEFRY)])
+  @test_util.run_v2_only
+  def testAlg(self, name, int_id, enum_id):
+    g_by_name = random.Generator.from_seed(1234, name)
+    g_by_int = random.Generator.from_seed(1234, int_id)
+    g_by_enum = random.Generator.from_seed(1234, enum_id)
+    self.assertEqual(g_by_name.algorithm, g_by_int.algorithm)
+    self.assertEqual(g_by_name.algorithm, g_by_enum.algorithm)
+
   @test_util.run_v2_only
   def testGeneratorCreationWithVar(self):
     """Tests creating generator with a variable.
@@ -695,9 +706,7 @@ class StatefulRandomOpsTest(test.TestCase, parameterized.TestCase):
     devices = ["cpu:0", "cpu:1"]
     strat = MirroredStrategy(devices=devices)
     # Use `PerReplica` to specify which `gen` is sent to which replica
-    gens = dist_values.PerReplica(
-        device_map=dist_values.ReplicaDeviceMap(devices),
-        values=[[g] for g in gens])
+    gens = dist_values.PerReplica([[g] for g in gens])
     with strat.scope():
       def f(gen):
         t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
diff --git a/tensorflow/python/ops/structured/structured_tensor.py b/tensorflow/python/ops/structured/structured_tensor.py
index 9f7efec6b5a..ad5831c9d8e 100644
--- a/tensorflow/python/ops/structured/structured_tensor.py
+++ b/tensorflow/python/ops/structured/structured_tensor.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import re
+
 import numpy as np
 
 from tensorflow.python.framework import composite_tensor
diff --git a/tensorflow/python/ops/structured/structured_tensor_spec_test.py b/tensorflow/python/ops/structured/structured_tensor_spec_test.py
index 6426344d4d9..be926bd38d2 100644
--- a/tensorflow/python/ops/structured/structured_tensor_spec_test.py
+++ b/tensorflow/python/ops/structured/structured_tensor_spec_test.py
@@ -97,10 +97,10 @@ class StructuredTensorSpecTest(test_util.TensorFlowTestCase,
   ])  # pyformat: disable
   def testSerialize(self, spec, expected):
     serialization = spec._serialize()
-    # TensorShape has an unconventional definition of equality, so we can't use
-    # assertEqual directly here.  But repr() is deterministic and lossless for
-    # the expected values, so we can use that instead.
-    self.assertEqual(repr(serialization), repr(expected))
+    # Note that we can only use assertEqual because none of our cases include
+    # a None dimension. A TensorShape with a None dimension is never equal
+    # to another TensorShape.
+    self.assertEqual(serialization, expected)
 
   @parameterized.parameters([
       (StructuredTensorSpec([1, 2, 3], {}), {}),
diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index 74db51826c7..9c7bf25e201 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import contextlib
+
 import numpy as np
 import traceback
 import weakref
@@ -804,7 +805,7 @@ class _EagerTensorArray(object):
             None, None,
             "Tried to write to index %d but array is not resizeable and size "
             "is: %d" % (index, size))
-      self._tensor_array.extend([None for _ in range(index - size + 1)])
+      self._tensor_array.extend(None for _ in range(index - size + 1))
 
     if not isinstance(value, ops.EagerTensor):
       # TODO(b/129870929): Fix after all callers provide proper init dtype.
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 7b233ce331f..5cd329c1715 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -21,6 +21,7 @@ import enum  # pylint: disable=g-bad-import-order
 import itertools
 import functools
 import os
+
 import six
 
 from tensorflow.core.framework import attr_value_pb2
@@ -1324,9 +1325,9 @@ class Variable(six.with_metaclass(VariableMetaclass, trackable.Trackable)):
     @property
     def spec(self):
       """Computes the spec string used for saving."""
-      full_shape_str = " ".join(["%d" % d for d in self.full_shape]) + " "
+      full_shape_str = " ".join("%d" % d for d in self.full_shape) + " "
       sl_spec = ":".join(
-          ["%d,%d" % (o, s) for o, s in zip(self.var_offset, self.var_shape)])
+          "%d,%d" % (o, s) for o, s in zip(self.var_offset, self.var_shape))
       return full_shape_str + sl_spec
 
     def to_proto(self, export_scope=None):
diff --git a/tensorflow/python/platform/base.i b/tensorflow/python/platform/base.i
index 25fffcfb2d2..92f7d8bf987 100644
--- a/tensorflow/python/platform/base.i
+++ b/tensorflow/python/platform/base.i
@@ -20,8 +20,10 @@ limitations under the License.
   #include <vector>
   #include "tensorflow/c/tf_status.h"
   #include "tensorflow/core/platform/types.h"
+  #include "tensorflow/c/tf_datatype.h"
   #include "tensorflow/python/lib/core/py_exception_registry.h"
 
+  using tensorflow::int64;
   using tensorflow::uint64;
   using tensorflow::string;
 
@@ -233,7 +235,50 @@ _COPY_TYPEMAPS(unsigned int, mode_t);
 %define override %enddef
 #endif
 
+
+// This was originally included in pywrap_tfe.i, but is used by tf_session.i
 %include "tensorflow/c/tf_status.h"
+%include "tensorflow/c/tf_datatype.h"
+
+%typemap(in) (const void* proto) {
+  char* c_string;
+  Py_ssize_t py_size;
+  // PyBytes_AsStringAndSize() does not copy but simply interprets the input
+  if (PyBytes_AsStringAndSize($input, &c_string, &py_size) == -1) {
+    // Python has raised an error (likely TypeError or UnicodeEncodeError).
+    SWIG_fail;
+  }
+  $1 = static_cast<void*>(c_string);
+}
+
+%typemap(in) int64_t {
+  $1 = PyLong_AsLongLong($input);
+}
+
+%typemap(out) TF_DataType {
+  $result = PyInt_FromLong($1);
+}
+
+%typemap(out) int64_t {
+  $result = PyInt_FromLong($1);
+}
+
+%typemap(out) TF_AttrType {
+  $result = PyInt_FromLong($1);
+}
+
+%typemap(in, numinputs=0) unsigned char* is_list (unsigned char tmp) {
+  tmp = 0;
+  $1 = &tmp;
+}
+
+%typemap(argout) unsigned char* is_list {
+  if (*$1 == 1) {
+    PyObject* list = PyList_New(1);
+    PyList_SetItem(list, 0, $result);
+    $result = list;
+  }
+}
 
 // Typemaps to automatically raise a Python exception from bad output TF_Status.
 // TODO(b/77295559): expand this to all TF_Status* output params and deprecate
diff --git a/tensorflow/python/platform/flags.py b/tensorflow/python/platform/flags.py
index 6225db77440..7e0e1e14628 100644
--- a/tensorflow/python/platform/flags.py
+++ b/tensorflow/python/platform/flags.py
@@ -22,6 +22,7 @@ import logging as _logging
 import sys as _sys
 
 # go/tf-wildcard-import
+
 from absl.flags import *  # pylint: disable=wildcard-import
 import six as _six
 
diff --git a/tensorflow/python/platform/googletest.py b/tensorflow/python/platform/googletest.py
index cabd85d8e79..cc3194be887 100644
--- a/tensorflow/python/platform/googletest.py
+++ b/tensorflow/python/platform/googletest.py
@@ -25,6 +25,7 @@ import tempfile
 
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import,redefined-builtin
+
 from absl.testing.absltest import *
 # pylint: enable=wildcard-import,redefined-builtin
 
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index c06310310f9..aa0e8539246 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -229,7 +229,7 @@ class PrintModelAnalysisTest(test.TestCase):
         with gfile.Open(outfile, 'r') as f:
           lines = f.read().split('\n')
           self.assertGreater(len(lines), 5)
-          result = '\n'.join([l[:min(len(l), 80)] for l in lines])
+          result = '\n'.join(l[:min(len(l), 80)] for l in lines)
           self.assertTrue(
               compat.as_text(lib.CheckAndRemoveDoc(result))
               .startswith('node name | # parameters | # float_ops'))
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
deleted file mode 100755
index d28a1d1f593..00000000000
--- a/tensorflow/python/pywrap_tfe.i
+++ /dev/null
@@ -1,515 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-%include "tensorflow/python/lib/core/strings.i"
-%include "tensorflow/python/platform/base.i"
-
-%{
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/python/lib/core/ndarray_tensor.h"
-#include "tensorflow/python/lib/core/safe_ptr.h"
-%}
-
-
-%include "tensorflow/c/tf_datatype.h"
-%include "tensorflow/c/tf_status.h"
-
-%ignoreall;
-
-%rename("%s") TF_SetXlaEnableLazyCompilation;
-%rename("%s") TF_SetTfXlaCpuGlobalJit;
-%rename("%s") TF_SetXlaAutoJitMode;
-%rename("%s") TF_SetXlaConstantFoldingDisabled;
-%rename("%s") TF_GetXlaConstantFoldingDisabled;
-%rename("%s") TF_SetXlaMinClusterSize;
-%rename("%s") TFE_NewContext;
-%rename("%s") TFE_DeleteContext;
-%rename("%s") TFE_ContextListDevices;
-%rename("%s") TFE_ContextAddFunction;
-%rename("%s") TFE_ContextAddFunctionDef;
-%rename("%s") TFE_ContextRemoveFunction;
-%rename("%s") TFE_ContextHasFunction;
-%rename("%s") TFE_ContextEnableRunMetadata;
-%rename("%s") TFE_ContextDisableRunMetadata;
-%rename("%s") TFE_ContextEnableGraphCollection;
-%rename("%s") TFE_ContextDisableGraphCollection;
-%rename("%s") TFE_ContextExportRunMetadata;
-%rename("%s") TFE_ContextClearCaches;
-%rename("%s") TFE_ContextGetDevicePlacementPolicy;
-%rename("%s") TFE_ContextGetMirroringPolicy;
-%rename("%s") TFE_ContextSetThreadLocalDevicePlacementPolicy;
-%rename("%s") TFE_ContextSetThreadLocalMirroringPolicy;
-%rename("%s") TFE_ContextSetServerDef;
-%rename("%s") TFE_ContextUpdateServerDef;
-%rename("%s") TFE_ContextCheckAlive;
-%rename("%s") TFE_NewExecutor;
-%rename("%s") TFE_DeleteExecutor;
-%rename("%s") TFE_ExecutorIsAsync;
-%rename("%s") TFE_ExecutorWaitForAllPendingNodes;
-%rename("%s") TFE_ExecutorClearError;
-%rename("%s") TFE_ContextSetExecutorForThread;
-%rename("%s") TFE_ContextGetExecutorForThread;
-%rename("%s") TFE_NewProfiler;
-%rename("%s") TFE_ProfilerIsOk;
-%rename("%s") TFE_DeleteProfiler;
-%rename("%s") TFE_ProfilerSerializeToString;
-%rename("%s") TFE_StartProfilerServer;
-%rename("%s") TFE_ProfilerClientStartTracing;
-%rename("%s") TFE_ProfilerClientMonitor;
-%rename("%s") TFE_OpNameGetAttrType;
-%rename("%s") TFE_Py_InitEagerTensor;
-%rename("%s") TFE_Py_SetEagerTensorProfiler;
-%rename("%s") TFE_Py_RegisterExceptionClass;
-%rename("%s") TFE_Py_RegisterJVPFunction;
-%rename("%s") TFE_Py_RegisterGradientFunction;
-%rename("%s") TFE_Py_RegisterFallbackExceptionClass;
-%rename("%s") TFE_Py_Execute;
-%rename("%s") TFE_Py_ExecuteCancelable;
-%rename("%s") TFE_Py_FastPathExecute;
-%rename("%s") TFE_Py_RecordGradient;
-%rename("%s") TFE_Py_UID;
-%rename("%s") TFE_Py_TapeSetNew;
-%rename("%s") TFE_Py_TapeSetAdd;
-%rename("%s") TFE_Py_TapeSetRemove;
-%rename("%s") TFE_Py_TapeSetStopOnThread;
-%rename("%s") TFE_Py_TapeSetRestartOnThread;
-%rename("%s") TFE_Py_TapeSetIsStopped;
-%rename("%s") TFE_Py_TapeSetIsEmpty;
-%rename("%s") TFE_Py_TapeSetShouldRecordBackprop;
-%rename("%s") TFE_Py_TapeSetPossibleGradientTypes;
-%rename("%s") TFE_Py_TapeSetDeleteTrace;
-%rename("%s") TFE_Py_TapeSetRecordOperation;
-%rename("%s") TFE_Py_TapeSetRecordOperationBackprop;
-%rename("%s") TFE_Py_TapeSetRecordOperationForwardprop;
-%rename("%s") TFE_Py_TapeGradient;
-%rename("%s") TFE_Py_TapeVariableAccessed;
-%rename("%s") TFE_Py_TapeWatch;
-%rename("%s") TFE_Py_TapeWatchVariable;
-%rename("%s") TFE_Py_TapeWatchedVariables;
-%rename("%s") TFE_Py_ForwardAccumulatorNew;
-%rename("%s") TFE_Py_ForwardAccumulatorSetAdd;
-%rename("%s") TFE_Py_ForwardAccumulatorSetRemove;
-%rename("%s") TFE_Py_ForwardAccumulatorWatch;
-%rename("%s") TFE_Py_ForwardAccumulatorJVP;
-%rename("%s") TFE_Py_ForwardAccumulatorPushState;
-%rename("%s") TFE_Py_ForwardAccumulatorPopState;
-%rename("%s") TFE_Py_PackJVPs;
-%rename("%s") TFE_NewContextOptions;
-%rename("%s") TFE_ContextOptionsSetConfig;
-%rename("%s") TFE_ContextOptionsSetDevicePlacementPolicy;
-%rename("%s") TFE_ContextOptionsSetMirroringPolicy;
-%rename("%s") TFE_ContextOptionsSetAsync;
-%rename("%s") TFE_ContextOptionsSetLazyRemoteInputsCopy;
-%rename("%s") TFE_DeleteContextOptions;
-%rename("%s") TFE_Py_TensorShapeSlice;
-%rename("%s") TFE_Py_TensorShapeOnDevice;
-%rename("%s") TFE_Py_EnableInteractivePythonLogging;
-%rename("%s") TFE_Py_SetEagerContext;
-%rename("%s") TFE_ContextStartStep;
-%rename("%s") TFE_ContextEndStep;
-%rename("%s") TFE_Py_RegisterVSpace;
-%rename("%s") TFE_Py_EncodeArg;
-%rename("%s") TFE_EnableCollectiveOps;
-%rename("%s") TF_ListPhysicalDevices;
-%rename("%s") TF_PickUnusedPortOrDie;
-%rename("%s") TFE_MonitoringCounterCellIncrementBy;
-%rename("%s") TFE_MonitoringCounterCellValue;
-%rename("%s") TFE_MonitoringNewCounter0;
-%rename("%s") TFE_MonitoringDeleteCounter0;
-%rename("%s") TFE_MonitoringGetCellCounter0;
-%rename("%s") TFE_MonitoringNewCounter1;
-%rename("%s") TFE_MonitoringDeleteCounter1;
-%rename("%s") TFE_MonitoringGetCellCounter1;
-%rename("%s") TFE_MonitoringNewCounter2;
-%rename("%s") TFE_MonitoringDeleteCounter2;
-%rename("%s") TFE_MonitoringGetCellCounter2;
-%rename("%s") TFE_MonitoringIntGaugeCellSet;
-%rename("%s") TFE_MonitoringIntGaugeCellValue;
-%rename("%s") TFE_MonitoringNewIntGauge0;
-%rename("%s") TFE_MonitoringDeleteIntGauge0;
-%rename("%s") TFE_MonitoringGetCellIntGauge0;
-%rename("%s") TFE_MonitoringNewIntGauge1;
-%rename("%s") TFE_MonitoringDeleteIntGauge1;
-%rename("%s") TFE_MonitoringGetCellIntGauge1;
-%rename("%s") TFE_MonitoringNewIntGauge2;
-%rename("%s") TFE_MonitoringDeleteIntGauge2;
-%rename("%s") TFE_MonitoringGetCellIntGauge2;
-%rename("%s") TFE_MonitoringStringGaugeCellSet;
-%rename("%s") TFE_MonitoringStringGaugeCellValue;
-%rename("%s") TFE_MonitoringNewStringGauge0;
-%rename("%s") TFE_MonitoringDeleteStringGauge0;
-%rename("%s") TFE_MonitoringGetCellStringGauge0;
-%rename("%s") TFE_MonitoringNewStringGauge1;
-%rename("%s") TFE_MonitoringDeleteStringGauge1;
-%rename("%s") TFE_MonitoringGetCellStringGauge1;
-%rename("%s") TFE_MonitoringNewStringGauge2;
-%rename("%s") TFE_MonitoringDeleteStringGauge2;
-%rename("%s") TFE_MonitoringGetCellStringGauge2;
-%rename("%s") TFE_MonitoringBoolGaugeCellSet;
-%rename("%s") TFE_MonitoringBoolGaugeCellValue;
-%rename("%s") TFE_MonitoringNewBoolGauge0;
-%rename("%s") TFE_MonitoringDeleteBoolGauge0;
-%rename("%s") TFE_MonitoringGetCellBoolGauge0;
-%rename("%s") TFE_MonitoringNewBoolGauge1;
-%rename("%s") TFE_MonitoringDeleteBoolGauge1;
-%rename("%s") TFE_MonitoringGetCellBoolGauge1;
-%rename("%s") TFE_MonitoringNewBoolGauge2;
-%rename("%s") TFE_MonitoringDeleteBoolGauge2;
-%rename("%s") TFE_MonitoringGetCellBoolGauge2;
-%rename("%s") TFE_MonitoringSamplerCellAdd;
-%rename("%s") TFE_MonitoringSamplerCellValue;
-%rename("%s") TFE_MonitoringNewExponentialBuckets;
-%rename("%s") TFE_MonitoringDeleteBuckets;
-%rename("%s") TFE_MonitoringNewSampler0;
-%rename("%s") TFE_MonitoringDeleteSampler0;
-%rename("%s") TFE_MonitoringGetCellSampler0;
-%rename("%s") TFE_MonitoringNewSampler1;
-%rename("%s") TFE_MonitoringDeleteSampler1;
-%rename("%s") TFE_MonitoringGetCellSampler1;
-%rename("%s") TFE_MonitoringNewSampler2;
-%rename("%s") TFE_MonitoringDeleteSampler2;
-%rename("%s") TFE_MonitoringGetCellSampler2;
-%rename("%s") TFE_NewCancellationManager;
-%rename("%s") TFE_CancellationManagerIsCancelled;
-%rename("%s") TFE_CancellationManagerStartCancel;
-%rename("%s") TFE_DeleteCancellationManager;
-%rename("%s") TF_ImportGraphDefOptionsSetValidateColocationConstraints;
-%rename("%s") TFE_ClearScalarCache;
-
-%{
-#include "tensorflow/python/eager/pywrap_tfe.h"
-#include "tensorflow/python/util/util.h"
-#include "tensorflow/c/c_api_experimental.h"
-#include "tensorflow/c/tf_status_helper.h"
-#include "tensorflow/c/eager/c_api_experimental.h"
-#include "tensorflow/core/common_runtime/device_factory.h"
-
-static PyObject* TF_ListPhysicalDevices(TF_Status* status) {
-  std::vector<string> devices;
-  tensorflow::Status s = tensorflow::DeviceFactory::ListAllPhysicalDevices(&devices);
-  tensorflow::Set_TF_Status_from_Status(status, s);
-  if (!s.ok()) {
-    Py_RETURN_NONE;
-  };
-  PyObject* result = PyList_New(devices.size());
-  int i = 0;
-  for (auto& dev : devices) {
-    PyObject* dev_obj = PyBytes_FromStringAndSize(dev.data(), dev.size());
-    PyList_SetItem(result, i, dev_obj);
-    ++i;
-  }
-  return result;
-}
-%}
-static PyObject* TF_ListPhysicalDevices(TF_Status* status);
-
-%{
-#include "tensorflow/python/eager/pywrap_tensor_conversion.h"
-
-static PyObject* TFE_ClearScalarCache() {
-  tensorflow::TFE_TensorHandleCache::Get()->Clear();
-  Py_RETURN_NONE;
-}
-%}
-static PyObject* TFE_ClearScalarCache();
-
-%typemap(in) (const void* proto) {
-  char* c_string;
-  Py_ssize_t py_size;
-  // PyBytes_AsStringAndSize() does not copy but simply interprets the input
-  if (PyBytes_AsStringAndSize($input, &c_string, &py_size) == -1) {
-    // Python has raised an error (likely TypeError or UnicodeEncodeError).
-    SWIG_fail;
-  }
-  $1 = static_cast<void*>(c_string);
-}
-
-%typemap(in) int64_t {
-  $1 = PyLong_AsLongLong($input);
-}
-
-%typemap(out) TF_DataType {
-  $result = PyInt_FromLong($1);
-}
-
-%typemap(out) int64_t {
-  $result = PyInt_FromLong($1);
-}
-
-%typemap(out) TF_AttrType {
-  $result = PyInt_FromLong($1);
-}
-
-%typemap(in, numinputs=0) unsigned char* is_list (unsigned char tmp) {
-  tmp = 0;
-  $1 = &tmp;
-}
-
-%typemap(argout) unsigned char* is_list {
-  if (*$1 == 1) {
-    PyObject* list = PyList_New(1);
-    PyList_SetItem(list, 0, $result);
-    $result = list;
-  }
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* serialized_function_def {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* device_name {
-  if ($input == Py_None) {
-    $1 = nullptr;
-  } else {
-    $1 = const_cast<char*>(TFE_GetPythonString($input));
-  }
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* op_name {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* name {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* description {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* label {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* label1 {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* label2 {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* service_addr {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* logdir {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-// For const parameters in a function, SWIG pretty much ignores the const.
-// See: http://www.swig.org/Doc2.0/SWIG.html#SWIG_nn13
-// Hence the 'const_cast'.
-%typemap(in) const char* worker_list {
-  $1 = const_cast<char*>(TFE_GetPythonString($input));
-}
-
-%typemap(in) (TFE_Context*) {
-  $1 = (TFE_Context*)PyCapsule_GetPointer($input, nullptr);
-
-}
-%typemap(out) (TFE_Context*) {
-  // When the TFE_Context* returned is a nullptr, we expect the status is not
-  // OK. This will raise an error (happens in another typemap).
-  if ($1 != nullptr) {
-    $result = PyCapsule_New($1, nullptr, TFE_DeleteContextCapsule);
-  }
-}
-
-%rename("%s") TFE_ContextDevicePlacementPolicy;
-%rename("%s") TFE_DEVICE_PLACEMENT_EXPLICIT;
-%rename("%s") TFE_DEVICE_PLACEMENT_WARN;
-%rename("%s") TFE_DEVICE_PLACEMENT_SILENT;
-%rename("%s") TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32;
-
-%rename("%s") TFE_ContextMirroringPolicy;
-%rename("%s") TFE_MIRRORING_NONE;
-%rename("%s") TFE_MIRRORING_ALL;
-
-%include "tensorflow/c/eager/c_api.h"
-
-%typemap(in) TFE_InputTensorHandles* inputs (TFE_InputTensorHandles temp) {
-  $1 = &temp;
-  if ($input != Py_None) {
-    if (!PyList_Check($input)) {
-      SWIG_exception_fail(SWIG_TypeError,
-                          "must provide a list of Tensors as inputs");
-    }
-    Py_ssize_t len = PyList_Size($input);
-    $1->resize(len);
-    for (Py_ssize_t i = 0; i < len; ++i) {
-      PyObject* elem = PyList_GetItem($input, i);
-      if (!elem) {
-        SWIG_fail;
-      }
-      if (EagerTensor_CheckExact(elem)) {
-        (*$1)[i] = EagerTensor_Handle(elem);
-      } else if (tensorflow::swig::IsEagerTensorSlow(elem)) {
-        // Use equivalent of object.__getattribute__ to get the underlying
-        // tf wrapped EagerTensor (if there is one).
-        tensorflow::Safe_PyObjectPtr tf_should_use_attr(
-#if PY_MAJOR_VERSION < 3
-            PyString_InternFromString("_tf_should_use_wrapped_value")
-#else
-            PyUnicode_InternFromString("_tf_should_use_wrapped_value")
-#endif
-        );
-        tensorflow::Safe_PyObjectPtr value_attr(
-            PyObject_GenericGetAttr(elem, tf_should_use_attr.get()));
-        if (value_attr) {
-          // This is an EagerTensor wrapped inside a TFShouldUse wrapped object.
-          (*$1)[i] = EagerTensor_Handle(value_attr.get());
-        } else {
-          // This is a subclass of EagerTensor that we don't support.
-          PyErr_Clear();
-          SWIG_exception_fail(
-              SWIG_TypeError,
-              tensorflow::strings::StrCat(
-                  "Saw an object that is an instance of a strict subclass of "
-                  "EagerTensor, which is not supported.  Item ",
-                  i, " is type: ", elem->ob_type->tp_name)
-                  .c_str());
-        }
-      } else if (tensorflow::swig::IsTensor(elem)) {
-        // If it isnt an EagerTensor, but is still a Tensor, it must be a graph
-        // tensor.
-        tensorflow::Safe_PyObjectPtr name_attr(
-            PyObject_GetAttrString(elem, "name"));
-        SWIG_exception_fail(
-            SWIG_TypeError,
-            tensorflow::strings::StrCat(
-                "An op outside of the function building code is being passed\n"
-                "a \"Graph\" tensor. It is possible to have Graph tensors\n"
-                "leak out of the function building context by including a\n"
-                "tf.init_scope in your function building code.\n"
-                "For example, the following function will fail:\n",
-                "  @tf.function\n",
-                "  def has_init_scope():\n",
-                "    my_constant = tf.constant(1.)\n",
-                "    with tf.init_scope():\n",
-                "      added = my_constant * 2\n",
-                "The graph tensor has name: ",
-                name_attr ? TFE_GetPythonString(name_attr.get()) : "<unknown>"
-            ).c_str());
-      } else {
-        SWIG_exception_fail(
-            SWIG_TypeError,
-            tensorflow::strings::StrCat(
-                "provided list of inputs contains objects other "
-                "than 'EagerTensor'. Item ",
-                i, " is type: ", elem->ob_type->tp_name).c_str());
-      }
-    }
-  }
-}
-
-// Temporary for the argout
-%typemap(in) TFE_OutputTensorHandles* outputs (TFE_OutputTensorHandles temp) {
-  if (!PyInt_Check($input)) {
-    SWIG_exception_fail(SWIG_TypeError,
-                        "expected an integer value (size of the number of "
-                        "outputs of the operation)");
-  }
-  $1 = &temp;
-  long sz = PyInt_AsLong($input);
-  if (sz > 0) {
-    $1->resize(PyInt_AsLong($input), nullptr);
-  }
-}
-
-// Create new Status object.
-%typemap(in, numinputs=0) TF_Status *out_status {
-  $1 = GetStatus();
-}
-
-%typemap(freearg) (TF_Status* out_status) {
- ReturnStatus($1);
-}
-
-%typemap(argout) (TFE_OutputTensorHandles* outputs, TF_Status* out_status) {
-  if (MaybeRaiseExceptionFromTFStatus($2, nullptr)) {
-    SWIG_fail;
-  } else {
-    int num_outputs = $1->size();
-    Py_CLEAR($result);
-    $result = PyList_New(num_outputs);
-    for (int i = 0; i < num_outputs; ++i) {
-      PyObject *output;
-      output = EagerTensorFromHandle($1->at(i));
-      PyList_SetItem($result, i, output);
-    }
-  }
-}
-
-// SWIG usually unwraps the tuple that the native Python/C interface generates.
-// Since we wanted to have a function with a variable length of arguments, we
-// used the native Python/C interface directly (which by default supports
-// passing all arguments as a tuple).
-%native(TFE_Py_FastPathExecute) TFE_Py_FastPathExecute_C;
-
-%include "tensorflow/python/eager/pywrap_tfe.h"
-%include "tensorflow/c/c_api_experimental.h"
-%include "tensorflow/c/eager/c_api_experimental.h"
-
-// Clear all typemaps.
-%typemap(out) TF_DataType;
-%typemap(in) int64_t;
-%typemap(out) int64_t;
-%typemap(out) TF_AttrType;
-%typemap(in, numinputs=0) TF_Status *out_status;
-%typemap(argout) unsigned char* is_list;
-%typemap(in) const char* description;
-%typemap(in) const char* label1;
-%typemap(in) const char* label2;
-%typemap(in) (TFE_Context*);
-%typemap(out) (TFE_Context*);
-%typemap(in) TFE_OutputTensorHandles* outputs (TFE_OutputTensorHandles temp);
-%typemap(in, numinputs=0) TF_Status *out_status;
-%typemap(freearg) (TF_Status* out_status);
-%typemap(argout) (TFE_OutputTensorHandles* outputs, TF_Status* out_status);
-%typemap(in) (const void* proto);
-
-%unignoreall
diff --git a/tensorflow/python/pywrap_tfe.py b/tensorflow/python/pywrap_tfe.py
new file mode 100644
index 00000000000..8c591e9bf45
--- /dev/null
+++ b/tensorflow/python/pywrap_tfe.py
@@ -0,0 +1,29 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python module for TFE ops and functions exported by pybind11.
+
+This module is created because we are splitting out eager bindings from
+pywrap_tensorflow. This is causing some issues where Graphs are not properly
+initialized when running eager code. Once the graph architecture has been
+removed from pywrap_tensorflow as well, we can remove this file.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=invalid-import-order,g-bad-import-order, wildcard-import, unused-import
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python._pywrap_tfe import *
diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
index 3ae6c8414f1..0a5ee9d04d5 100644
--- a/tensorflow/python/saved_model/function_deserialization.py
+++ b/tensorflow/python/saved_model/function_deserialization.py
@@ -244,8 +244,7 @@ def recreate_function(saved_function, concrete_functions):
 
     def _pretty_format_positional(positional):
       return "Positional arguments ({} total):\n    * {}".format(
-          len(positional),
-          "\n    * ".join([str(a) for a in positional]))
+          len(positional), "\n    * ".join(str(a) for a in positional))
 
     for index, function_name in enumerate(saved_function.concrete_functions):
       concrete_function = concrete_functions[function_name]
diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py
index 4a44dd30e2f..39e6a915379 100644
--- a/tensorflow/python/saved_model/load.py
+++ b/tensorflow/python/saved_model/load.py
@@ -497,23 +497,25 @@ def load(export_dir, tags=None):
   _Importing SavedModels from TensorFlow 1.x_
 
   SavedModels from `tf.estimator.Estimator` or 1.x SavedModel APIs have a flat
-  graph instead of `tf.function` objects. These SavedModels will have functions
-  corresponding to their signatures in the `.signatures` attribute, but also
-  have a `.prune` method which allows you to extract functions for new
-  subgraphs. This is equivalent to importing the SavedModel and naming feeds and
-  fetches in a Session from TensorFlow 1.x.
+  graph instead of `tf.function` objects. These SavedModels will be loaded with
+  the following attributes:
 
-  ```python
-  imported = tf.saved_model.load(path_to_v1_saved_model)
-  pruned = imported.prune("x:0", "out:0")
-  pruned(tf.ones([]))
-  ```
+  * `.signatures`: A dictionary mapping signature names to functions.
+  * `.prune(feeds, fetches) `: A method which allows you to extract
+    functions for new subgraphs. This is equivalent to importing the SavedModel
+    and naming feeds and fetches in a Session from TensorFlow 1.x.
 
-  See `tf.compat.v1.wrap_function` for details. These SavedModels also have a
-  `.variables` attribute containing imported variables, and a `.graph` attribute
-  representing the whole imported graph. For SavedModels exported from
-  `tf.saved_model.save`, variables are instead assigned to whichever attributes
-  they were assigned before export.
+    ```python
+    imported = tf.saved_model.load(path_to_v1_saved_model)
+    pruned = imported.prune("x:0", "out:0")
+    pruned(tf.ones([]))
+    ```
+
+    See `tf.compat.v1.wrap_function` for details.
+  * `.variables`: A list of imported variables.
+  * `.graph`: The whole imported graph.
+  * `.restore(save_path)`: A function that restores variables from a checkpoint
+    saved from `tf.compat.v1.Saver`.
 
   _Consuming SavedModels asynchronously_
 
diff --git a/tensorflow/python/saved_model/load_v1_in_v2.py b/tensorflow/python/saved_model/load_v1_in_v2.py
index 8cbabf7bcf2..ede91da168c 100644
--- a/tensorflow/python/saved_model/load_v1_in_v2.py
+++ b/tensorflow/python/saved_model/load_v1_in_v2.py
@@ -91,19 +91,24 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader):
     # pylint: enable=protected-access
     returns[0] = saver
 
-  def restore_variables(self, wrapped, saver):
+  def _extract_saver_restore(self, wrapped, saver):
+    if saver is None:
+      return None
+    saver_def = saver.saver_def
+    filename_tensor = wrapped.graph.as_graph_element(
+        saver_def.filename_tensor_name)
+    # We both feed and fetch filename_tensor so we have an operation to use to
+    # feed into variable initializers (only relevant for v1 graph building).
+    return wrapped.prune(
+        feeds=[filename_tensor],
+        fetches=[filename_tensor,
+                 wrapped.graph.as_graph_element(saver_def.restore_op_name)])
+
+  def restore_variables(self, wrapped, restore_from_saver):
     """Restores variables from the checkpoint."""
-    if saver is not None:
-      saver_def = saver.saver_def
-      filename_tensor = wrapped.graph.as_graph_element(
-          saver_def.filename_tensor_name)
-      # We both feed and fetch filename_tensor so we have an operation to use to
-      # feed into variable initializers (only relevant for v1 graph building).
-      restore_fn = wrapped.prune(
-          feeds=[filename_tensor],
-          fetches=[filename_tensor,
-                   wrapped.graph.as_graph_element(saver_def.restore_op_name)])
-      initializer, _ = restore_fn(constant_op.constant(self._variables_path))
+    if restore_from_saver is not None:
+      initializer, _ = restore_from_saver(
+          constant_op.constant(self._variables_path))
       if not ops.executing_eagerly_outside_functions():
         # Add the initialization operation to the table initializers collection
         # in case we don't have any lifted variables to attach it to. There
@@ -203,7 +208,8 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader):
         functools.partial(self.load_graph, load_graph_returns, meta_graph_def),
         signature=[])
     saver, = load_graph_returns
-    self.restore_variables(wrapped, saver)
+    restore_from_saver = self._extract_saver_restore(wrapped, saver)
+    self.restore_variables(wrapped, restore_from_saver)
     with wrapped.graph.as_default():
       init_op = loader_impl.get_init_op(
           meta_graph_def) or monitored_session.Scaffold.default_local_init_op()
@@ -211,6 +217,9 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader):
       init_anchor = constant_op.constant(0., name="dummy_fetch")
 
     root = tracking.AutoTrackable()
+    if restore_from_saver is not None:
+      root.restore = (
+          lambda path: restore_from_saver(constant_op.constant(path)))
     asset_feed_tensors = []
     asset_paths = []
     for tensor_name, value in loader_impl.get_asset_tensors(
diff --git a/tensorflow/python/saved_model/load_v1_in_v2_test.py b/tensorflow/python/saved_model/load_v1_in_v2_test.py
index f02ab14b21c..37b439fe649 100644
--- a/tensorflow/python/saved_model/load_v1_in_v2_test.py
+++ b/tensorflow/python/saved_model/load_v1_in_v2_test.py
@@ -37,9 +37,12 @@ from tensorflow.python.framework import versions
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.saved_model import builder_impl
 from tensorflow.python.saved_model import load
@@ -48,6 +51,7 @@ from tensorflow.python.saved_model import signature_def_utils
 from tensorflow.python.saved_model import simple_save
 from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.saved_model import utils_impl
+from tensorflow.python.training import saver
 
 
 class LoadTest(test.TestCase):
@@ -594,6 +598,38 @@ class LoadTest(test.TestCase):
     forty_two = constant_op.constant([42], dtype=dtypes.int64)
     self.assertEqual([45], imported_fn(forty_two)["output"].numpy())
 
+  def test_load_and_restore_partitioned_variables(self):
+    export_graph = ops.Graph()
+    with export_graph.as_default():
+      partitioned_var = variable_scope.get_variable(
+          "a", shape=[6], initializer=init_ops.constant_initializer(13),
+          partitioner=partitioned_variables.fixed_size_partitioner(2),
+          use_resource=True)
+      x = array_ops.placeholder(shape=[], dtype=dtypes.float32)
+      y = x * partitioned_var
+      with session_lib.Session() as session:
+        session.run(variables.global_variables_initializer())
+        path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid()))
+        simple_save.simple_save(session, path,
+                                inputs={"x": x}, outputs={"y": y})
+
+        # Create a name-based checkpoint with different values.
+        session.run(partitioned_var.assign([[5, 4, 3], [2, 1, 0]]))
+        ckpt_path = os.path.join(self.get_temp_dir(), "restore_ckpt")
+        saver.Saver().save(session, ckpt_path)
+
+    imported = load.load(path)
+    self.assertAllClose(self.evaluate(imported.variables),
+                        [[13, 13, 13], [13, 13, 13]])
+
+    self.evaluate(imported.restore(ckpt_path))
+    self.assertAllClose(self.evaluate(imported.variables),
+                        [[5, 4, 3], [2, 1, 0]])
+    self.assertAllClose(
+        self.evaluate(
+            imported.signatures["serving_default"](constant_op.constant(2.))),
+        {"y": [10, 8, 6, 4, 2, 0]})
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/saved_model/nested_structure_coder.py b/tensorflow/python/saved_model/nested_structure_coder.py
index 0dec1749db2..4ddbce3f81e 100644
--- a/tensorflow/python/saved_model/nested_structure_coder.py
+++ b/tensorflow/python/saved_model/nested_structure_coder.py
@@ -32,6 +32,7 @@ from __future__ import print_function
 
 import collections
 import functools
+
 import six
 
 from tensorflow.core.protobuf import struct_pb2
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 68d5bd0ea3b..f80b9cc84f3 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -271,6 +271,11 @@ class _SaveableView(object):
         _process_asset(obj, asset_info, resource_map)
         self.captured_tensor_node_ids[obj.asset_path] = node_id
 
+    # Note: some concrete functions can have been realized when tracing other
+    # functions, and might closure-capture tensors from their parent functions.
+    # This is normal, but it means those concrete functions can't be serialized
+    # as their own independent endpoints, so we filter them out here.
+    bad_functions = []
     for concrete_function in self.concrete_functions:
       if not concrete_function.graph.saveable:
         raise ValueError(
@@ -283,10 +288,8 @@ class _SaveableView(object):
             and capture not in self.captured_tensor_node_ids):
           capture_constant_value = tensor_util.constant_value(capture)
           if capture_constant_value is None:
-            raise ValueError(
-                ("Attempted to save a function {} which references a symbolic "
-                 "Tensor {} that is not a simple constant. This is not "
-                 "supported.").format(concrete_function.name, capture))
+            bad_functions.append(concrete_function)
+            continue
           copied_tensor = constant_op.constant(capture_constant_value)
           node_id = len(self.nodes)
           node = _CapturedConstant(
@@ -297,6 +300,9 @@ class _SaveableView(object):
           self.captured_tensor_node_ids[capture] = node_id
           resource_map[capture] = copied_tensor
 
+    self.concrete_functions = [
+        x for x in self.concrete_functions if x not in bad_functions
+    ]
     return object_map, resource_map, asset_info
 
 
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index 8662cbaea51..8b03e1693d2 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -164,23 +164,6 @@ class SaveTest(test.TestCase):
     root = util.Checkpoint(model=sequential.Sequential([core.Dense(2)]))
     save.save(root, os.path.join(self.get_temp_dir(), "saved_model"))
 
-  def test_captured_symbolic_tensor_exception(self):
-    root = module.Module()
-    symbolic_tensor = []
-
-    @def_function.function
-    def captured_intermediate(x):
-      symbolic_tensor.append(math_ops.add(x, x, name="a_tensor"))
-      return symbolic_tensor[-1] * 2
-
-    captured_intermediate(constant_op.constant(1.))
-
-    root.f = def_function.function(lambda: symbolic_tensor[-1],
-                                   input_signature=[])
-    with self.assertRaisesRegexp(ValueError, "a_tensor"):
-      save.save(root, os.path.join(self.get_temp_dir(), "saved_model"),
-                signatures=root.f)
-
   def test_unsaveable_func_graph(self):
     root = module.Module()
 
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index 7722cd3b14c..c1662af607f 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -1336,11 +1336,7 @@ class SavedModelTest(SavedModelTestBase):
     sess = session.Session(graph=ops.Graph())
     with self.assertRaisesRegexp(
         errors.InvalidArgumentError,
-        "No OpKernel was registered to support Op 'TestAttr' used by node "
-        "test_attr \\(defined at .*\\) with these attrs: \\[.*\\]\n"
-        "Registered devices:.*\n"
-        "Registered kernels:.*"
-    ):
+        "No OpKernel was registered.*DOUBLE"):
       loader.load(sess, ["foo"], export_dir)
 
 
diff --git a/tensorflow/python/saved_model/utils_test.py b/tensorflow/python/saved_model/utils_test.py
index d176b91db1e..2b9e8fb2e03 100644
--- a/tensorflow/python/saved_model/utils_test.py
+++ b/tensorflow/python/saved_model/utils_test.py
@@ -57,7 +57,7 @@ class UtilsTest(test.TestCase):
     x = constant_op.constant(1, name="x")
     y = constant_op.constant(2, name="y")
     init_op_info = utils.build_tensor_info_from_op(my_init_fn(x, y))
-    self.assertEqual("PartitionedFunctionCall", init_op_info.name)
+    self.assertEqual("PartitionedCall", init_op_info.name)
     self.assertEqual(types_pb2.DT_INVALID, init_op_info.dtype)
     self.assertEqual(0, len(init_op_info.tensor_shape.dim))
 
diff --git a/tensorflow/python/tensorflow.i b/tensorflow/python/tensorflow.i
index 761e6f376f8..2faff274498 100644
--- a/tensorflow/python/tensorflow.i
+++ b/tensorflow/python/tensorflow.i
@@ -17,14 +17,9 @@ limitations under the License.
  * The includes are intentionally not alphabetically sorted, as the order of
  * includes follows dependency order */
 
-%include "tensorflow/python/pywrap_tfe.i"
-
 %include "tensorflow/python/client/tf_session.i"
 
-%include "tensorflow/python/lib/io/file_io.i"
-
 %include "tensorflow/python/lib/io/py_record_reader.i"
-%include "tensorflow/python/lib/io/py_record_writer.i"
 
 %include "tensorflow/python/grappler/cluster.i"
 %include "tensorflow/python/grappler/item.i"
@@ -32,3 +27,10 @@ limitations under the License.
 %include "tensorflow/python/grappler/cost_analyzer.i"
 
 %include "tensorflow/compiler/mlir/python/mlir.i"
+
+// TODO(slebedev): This is a temporary workaround for projects implicitly
+// relying on TensorFlow exposing tensorflow::Status.
+%unignoreall
+
+%ignore tensorflow::Status::operator=;
+%include "tensorflow/core/platform/status.h"
diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc
new file mode 100644
index 00000000000..284159762a8
--- /dev/null
+++ b/tensorflow/python/tfe_wrapper.cc
@@ -0,0 +1,1099 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");;
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+
+#include "Python.h"
+#include "include/pybind11/chrono.h"
+#include "include/pybind11/complex.h"
+#include "include/pybind11/functional.h"
+#include "include/pybind11/pybind11.h"
+#include "include/pybind11/stl.h"
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/c_api_experimental.h"
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
+#include "tensorflow/c/eager/c_api_internal.h"
+#include "tensorflow/c/tf_status.h"
+#include "tensorflow/c/tf_status_helper.h"
+#include "tensorflow/python/eager/pywrap_tensor_conversion.h"
+#include "tensorflow/python/eager/pywrap_tfe.h"
+#include "tensorflow/python/lib/core/py_exception_registry.h"
+#include "tensorflow/python/lib/core/pybind11_lib.h"
+#include "tensorflow/python/lib/core/pybind11_status.h"
+#include "tensorflow/python/lib/core/safe_ptr.h"
+#include "tensorflow/python/util/util.h"
+
+namespace py = pybind11;
+
+PYBIND11_MAKE_OPAQUE(TFE_Executor);
+PYBIND11_MAKE_OPAQUE(TFE_ContextOptions);
+PYBIND11_MAKE_OPAQUE(TFE_CancellationManager);
+PYBIND11_MAKE_OPAQUE(TFE_Profiler);
+
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringCounter0);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringCounter1);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringCounter2);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringStringGauge0);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringStringGauge1);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringStringGauge2);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringIntGauge0);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringIntGauge1);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringIntGauge2);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringBoolGauge0);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringBoolGauge1);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringBoolGauge2);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringSampler0);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringSampler1);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringSampler2);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringCounterCell);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringIntGaugeCell);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringStringGaugeCell);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringBoolGaugeCell);
+PYBIND11_MAKE_OPAQUE(TFE_MonitoringSamplerCell);
+
+PYBIND11_MAKE_OPAQUE(TF_DeviceList);
+PYBIND11_MAKE_OPAQUE(TF_Function);
+PYBIND11_MAKE_OPAQUE(TF_Buffer);
+
+// Eager helper functions migrated from pywrap_tfe.i.
+
+namespace tensorflow {
+
+// We cannot use Context as an opaque type. SWIG also had
+// difficult directly passing the pointer around. These
+// typemaps are migrated over from pywrap_tfe.i. I tried
+// using a custom type caster, but we get segfaults periodically.
+
+// TODO(amitpatankar): Move input and output logic of Context into a
+// pybind11 custom type caster.
+
+TFE_Context* InputTFE_Context(const py::handle& ctx) {
+  return static_cast<TFE_Context*>(PyCapsule_GetPointer(ctx.ptr(), nullptr));
+}
+
+PyObject* OutputTFE_Context(TFE_Context* context) {
+  return PyCapsule_New(context, nullptr, TFE_DeleteContextCapsule);
+}
+
+TF_Buffer* ProtoStringToTFBuffer(PyObject* input) {
+  // Convert a Python string object to TF_Buffer.
+  char* c_string;
+  Py_ssize_t py_size;
+  // PyBytes_AsStringAndSize() does not copy but simply interprets the input
+  if (PyBytes_AsStringAndSize(input, &c_string, &py_size) == -1) {
+    // Python has raised an error (likely TypeError or UnicodeEncodeError).
+    throw py::error_already_set();
+  }
+  return TF_NewBufferFromString(static_cast<void*>(c_string),
+                                static_cast<size_t>(py_size));
+}
+
+// These functions are typemaps from the Python side. I did not use
+// a custom type caster since the logic is slightly harder to follow. This
+// converter is also only used once in `TFE_Py_ExecuteCancelable_wrapper`.
+TFE_InputTensorHandles InputTFE_InputTensorHandles(
+    const py::handle& input_tensors) {
+  TFE_InputTensorHandles input_tensor_handles;
+  if (input_tensors.ptr() != Py_None) {
+    if (!PyList_Check(input_tensors.ptr())) {
+      tensorflow::throwTypeError("must provide a list of Tensors as inputs");
+    }
+    Py_ssize_t len = PyList_Size(input_tensors.ptr());
+    input_tensor_handles.resize(len);
+    for (Py_ssize_t i = 0; i < len; ++i) {
+      PyObject* elem = PyList_GetItem(input_tensors.ptr(), i);
+      if (!elem) {
+        tensorflow::throwTypeError("Input Tensor does not exist.");
+      }
+      if (EagerTensor_CheckExact(elem)) {
+        (input_tensor_handles)[i] = EagerTensor_Handle(elem);
+      } else if (tensorflow::swig::IsEagerTensorSlow(elem)) {
+        // Use equivalent of object.__getattribute__ to get the underlying
+        // tf wrapped EagerTensor (if there is one).
+        tensorflow::Safe_PyObjectPtr tf_should_use_attr(
+#if PY_MAJOR_VERSION < 3
+            PyString_InternFromString("_tf_should_use_wrapped_value")
+#else
+            PyUnicode_InternFromString("_tf_should_use_wrapped_value")
+#endif
+        );
+        tensorflow::Safe_PyObjectPtr value_attr(
+            PyObject_GenericGetAttr(elem, tf_should_use_attr.get()));
+        if (value_attr) {
+          // This is an EagerTensor wrapped inside a TFShouldUse wrapped object.
+          (input_tensor_handles)[i] = EagerTensor_Handle(value_attr.get());
+        } else {
+          // This is a subclass of EagerTensor that we don't support.
+          PyErr_Clear();
+          tensorflow::throwTypeError(
+              tensorflow::strings::StrCat(
+                  "Saw an object that is an instance of a strict subclass of "
+                  "EagerTensor, which is not supported.  Item ",
+                  i, " is type: ", elem->ob_type->tp_name)
+                  .c_str());
+        }
+      } else if (tensorflow::swig::IsTensor(elem)) {
+        // If it isnt an EagerTensor, but is still a Tensor, it must be a graph
+        // tensor.
+        tensorflow::Safe_PyObjectPtr name_attr(
+            PyObject_GetAttrString(elem, "name"));
+        tensorflow::throwTypeError(
+            tensorflow::strings::StrCat(
+                "An op outside of the function building code is being passed\n"
+                "a \"Graph\" tensor. It is possible to have Graph tensors\n"
+                "leak out of the function building context by including a\n"
+                "tf.init_scope in your function building code.\n"
+                "For example, the following function will fail:\n",
+                "  @tf.function\n", "  def has_init_scope():\n",
+                "    my_constant = tf.constant(1.)\n",
+                "    with tf.init_scope():\n",
+                "      added = my_constant * 2\n",
+                "The graph tensor has name: ",
+                name_attr ? TFE_GetPythonString(name_attr.get()) : "<unknown>")
+                .c_str());
+      } else {
+        tensorflow::throwTypeError(
+            tensorflow::strings::StrCat(
+                "provided list of inputs contains objects other "
+                "than 'EagerTensor'. Item ",
+                i, " is type: ", elem->ob_type->tp_name)
+                .c_str());
+      }
+    }
+  }
+  return input_tensor_handles;
+}
+
+// These functions are typemaps from the Python side. I did not use
+// a custom type caster since the logic is slightly harder to follow. This
+// converter is also only used once in `TFE_Py_ExecuteCancelable_wrapper`.
+// This function actually takes a number rather than an output Tensor holder.
+TFE_OutputTensorHandles InputTFE_OutputTensorHandles(
+    const py::handle& num_outputs) {
+  TFE_OutputTensorHandles output_tensor_handles;
+#if PY_MAJOR_VERSION < 3
+  if (!PyInt_Check(num_outputs.ptr())) {
+#else
+  if (!PyLong_Check(num_outputs.ptr())) {
+#endif
+    PyErr_SetString(PyExc_TypeError,
+                    "expected an integer value (size of the number of "
+                    "outputs of the operation)");
+    throw py::error_already_set();
+  }
+#if PY_MAJOR_VERSION < 3
+  long sz = PyInt_AsLong(num_outputs.ptr());  // NOLINT
+#else
+  long sz = PyLong_AsLong(num_outputs.ptr());  // NOLINT
+#endif
+  if (sz > 0) {
+#if PY_MAJOR_VERSION < 3
+    output_tensor_handles.resize(PyInt_AsLong(num_outputs.ptr()), nullptr);
+#else
+    output_tensor_handles.resize(PyLong_AsLong(num_outputs.ptr()), nullptr);
+#endif
+  }
+  return output_tensor_handles;
+}
+
+// This function was created from fusing the typemap logic in platform/base.i.
+py::object TFE_Py_ExecuteCancelable_wrapper(
+    const py::handle& context, const char* device_name, const char* op_name,
+    const py::handle& inputs, const py::handle& attrs,
+    TFE_CancellationManager* cancellation_manager,
+    const py::handle& num_outputs) {
+  TFE_Context* ctx = tensorflow::InputTFE_Context(context);
+  TFE_InputTensorHandles input_tensor_handles =
+      InputTFE_InputTensorHandles(inputs);
+  TFE_OutputTensorHandles output_tensor_handles =
+      InputTFE_OutputTensorHandles(num_outputs);
+  tensorflow::Safe_TF_StatusPtr status = tensorflow::make_safe(TF_NewStatus());
+  TFE_Py_ExecuteCancelable(ctx, device_name, op_name, &input_tensor_handles,
+                           attrs.ptr(), cancellation_manager,
+                           &output_tensor_handles, status.get());
+
+  int output_len = output_tensor_handles.size();
+  PyObject* output_list = PyList_New(output_len);
+  for (int i = 0; i < output_len; ++i) {
+    PyObject* output;
+    output = EagerTensorFromHandle(output_tensor_handles.at(i));
+    PyList_SetItem(output_list, i, output);
+  }
+  tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  return tensorflow::pyo_or_throw(output_list);
+}
+
+static py::object TF_ListPhysicalDevices() {
+  tensorflow::Safe_TF_StatusPtr status = tensorflow::make_safe(TF_NewStatus());
+  std::vector<string> devices;
+  tensorflow::Status s =
+      tensorflow::DeviceFactory::ListAllPhysicalDevices(&devices);
+  tensorflow::Set_TF_Status_from_Status(status.get(), s);
+  if (!s.ok()) {
+    return py::none();
+  }
+  PyObject* result = PyList_New(devices.size());
+  int i = 0;
+  for (auto& dev : devices) {
+    PyObject* dev_obj = PyBytes_FromStringAndSize(dev.data(), dev.size());
+    PyList_SetItem(result, i, dev_obj);
+    ++i;
+  }
+  return tensorflow::pyo_or_throw(result);
+}
+
+static py::object TFE_ClearScalarCache() {
+  tensorflow::TFE_TensorHandleCache::Get()->Clear();
+  return py::none();
+}
+
+}  // namespace tensorflow
+
+// py::return_value_policy::reference is defined as specified by the
+// pybind11 documents listed here.
+// https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies
+// This means that C++ maintains ownership of the object. We
+// are only assigning this to functions that return opaque types.
+
+PYBIND11_MODULE(_pywrap_tfe, m) {
+  py::class_<TFE_Context> TFE_Context_class(m, "TFE_Context");
+  py::class_<TFE_Executor> TFE_Executor_class(m, "TFE_Executor");
+  py::class_<TFE_ContextOptions> TFE_ContextOptions_class(m,
+                                                          "TFE_ContextOptions");
+  py::class_<TFE_MonitoringCounter0> TFE_MonitoringCounter0_class(
+      m, "TFE_MonitoringCounter0");
+  py::class_<TFE_MonitoringCounter1> TFE_MonitoringCounter1_class(
+      m, "TFE_MonitoringCounter1");
+  py::class_<TFE_MonitoringCounter2> TFE_MonitoringCounter2_class(
+      m, "TFE_MonitoringCounter2");
+  py::class_<TFE_MonitoringStringGauge0> TFE_MonitoringStringGauge0_class(
+      m, "TFE_MonitoringStringGauge0");
+  py::class_<TFE_MonitoringStringGauge1> TFE_MonitoringStringGauge1_class(
+      m, "TFE_MonitoringStringGauge1");
+  py::class_<TFE_MonitoringStringGauge2> TFE_MonitoringStringGauge2_class(
+      m, "TFE_MonitoringStringGauge2");
+  py::class_<TFE_MonitoringIntGauge0> TFE_MonitoringIntGauge0_class(
+      m, "TFE_MonitoringIntGauge0");
+  py::class_<TFE_MonitoringIntGauge1> TFE_MonitoringIntGauge1_class(
+      m, "TFE_MonitoringIntGauge1");
+  py::class_<TFE_MonitoringIntGauge2> TFE_MonitoringIntGauge2_class(
+      m, "TFE_MonitoringIntGauge2");
+  py::class_<TFE_MonitoringBoolGauge0> TFE_MonitoringBoolGauge0_class(
+      m, "TFE_MonitoringBoolGauge0");
+  py::class_<TFE_MonitoringBoolGauge1> TFE_MonitoringBoolGauge1_class(
+      m, "TFE_MonitoringBoolGauge1");
+  py::class_<TFE_MonitoringBoolGauge2> TFE_MonitoringBoolGauge2_class(
+      m, "TFE_MonitoringBoolGauge2");
+  py::class_<TFE_MonitoringCounterCell> TFE_MonitoringCounterCell_class(
+      m, "TFE_MonitoringCounterCell");
+  py::class_<TFE_MonitoringIntGaugeCell> TFE_MonitoringIntGaugeCell_class(
+      m, "TFE_MonitoringIntGaugeCell");
+  py::class_<TFE_MonitoringStringGaugeCell> TFE_MonitoringStringGaugeCell_class(
+      m, "TFE_MonitoringStringGaugeCell");
+  py::class_<TFE_MonitoringBoolGaugeCell> TFE_MonitoringBoolGaugeCell_class(
+      m, "TFE_MonitoringBoolGaugeCell");
+  py::class_<TFE_MonitoringSamplerCell> TFE_MonitoringSamplerCell_class(
+      m, "TFE_MonitoringSamplerCell");
+  py::class_<TFE_MonitoringBuckets> TFE_MonitoringBuckets_class(
+      m, "TFE_MonitoringBuckets");
+  py::class_<TFE_MonitoringSampler0> TFE_MonitoringSampler0_class(
+      m, "TFE_MonitoringSampler0");
+  py::class_<TFE_MonitoringSampler1> TFE_MonitoringSampler1_class(
+      m, "TFE_MonitoringSampler1");
+  py::class_<TFE_MonitoringSampler2> TFE_MonitoringSampler2_class(
+      m, "TFE_MonitoringSampler2");
+  py::class_<TFE_CancellationManager> TFE_CancellationManager_class(
+      m, "TFE_CancellationManager");
+  py::class_<TFE_Profiler> TFE_Profiler_class(m, "TFE_Profiler");
+
+  py::class_<TF_DeviceList> TF_DeviceList_class(m, "TF_DeviceList");
+  py::class_<TF_Function> TF_Function_class(m, "TF_Function");
+  py::class_<TF_Buffer> TF_Buffer_class(m, "TF_Buffer");
+
+  m.def("TFE_Py_RegisterExceptionClass", [](const py::handle& e) {
+    return tensorflow::pyo_or_throw(TFE_Py_RegisterExceptionClass(e.ptr()));
+  });
+  m.def("TFE_Py_RegisterFallbackExceptionClass", [](const py::handle& e) {
+    return tensorflow::pyo_or_throw(
+        TFE_Py_RegisterFallbackExceptionClass(e.ptr()));
+  });
+
+  // XLA Eager Logic
+  m.def("TF_SetXlaEnableLazyCompilation", &TF_SetXlaEnableLazyCompilation);
+  m.def("TF_SetTfXlaCpuGlobalJit", &TF_SetTfXlaCpuGlobalJit);
+  m.def("TF_SetXlaAutoJitMode", &TF_SetXlaAutoJitMode);
+  m.def("TF_SetXlaConstantFoldingDisabled", &TF_SetXlaConstantFoldingDisabled);
+  m.def("TF_GetXlaConstantFoldingDisabled", &TF_GetXlaConstantFoldingDisabled);
+  m.def("TF_SetXlaMinClusterSize", &TF_SetXlaMinClusterSize);
+
+  // // TFE_Context Logic
+  m.def(
+      "TFE_NewContext",
+      [](const TFE_ContextOptions* opts) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        TFE_Context* context = TFE_NewContext(opts, status.get());
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return tensorflow::pyo_or_throw(tensorflow::OutputTFE_Context(context));
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_DeleteContext", [](py::handle& o) {
+    TFE_DeleteContext(tensorflow::InputTFE_Context(o));
+  });
+  m.def(
+      "TFE_ContextListDevices",
+      [](py::handle& o) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_ContextListDevices(tensorflow::InputTFE_Context(o),
+                                             status.get());
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_ContextAddFunction", [](py::handle& ctx, py::handle& func) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    SwigPyObject* sstable_swig = reinterpret_cast<SwigPyObject*>(func.ptr());
+    auto function = reinterpret_cast<TF_Function*>(sstable_swig->ptr);
+    TFE_ContextAddFunction(tensorflow::InputTFE_Context(ctx), function,
+                           status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TFE_ContextAddFunctionDef",
+        [](py::handle& ctx, const char* serialized_function_def, size_t size) {
+          tensorflow::Safe_TF_StatusPtr status =
+              tensorflow::make_safe(TF_NewStatus());
+          TFE_ContextAddFunctionDef(tensorflow::InputTFE_Context(ctx),
+                                    serialized_function_def, size,
+                                    status.get());
+          tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        });
+  m.def("TFE_ContextRemoveFunction", [](py::handle& ctx, const char* name) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    TFE_ContextRemoveFunction(tensorflow::InputTFE_Context(ctx), name,
+                              status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TFE_ContextHasFunction", [](py::handle& ctx, const char* name) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    auto output =
+        TFE_ContextHasFunction(tensorflow::InputTFE_Context(ctx), name);
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+    return output;
+  });
+  m.def("TFE_ContextEnableRunMetadata", [](py::handle& ctx) {
+    TFE_ContextEnableRunMetadata(tensorflow::InputTFE_Context(ctx));
+  });
+  m.def("TFE_ContextDisableRunMetadata", [](py::handle& ctx) {
+    TFE_ContextEnableRunMetadata(tensorflow::InputTFE_Context(ctx));
+  });
+  m.def("TFE_ContextEnableGraphCollection", [](py::handle& ctx) {
+    TFE_ContextEnableGraphCollection(tensorflow::InputTFE_Context(ctx));
+  });
+  m.def("TFE_ContextDisableGraphCollection", [](py::handle& ctx) {
+    TFE_ContextDisableGraphCollection(tensorflow::InputTFE_Context(ctx));
+  });
+  m.def("TFE_ContextExportRunMetadata", [](py::handle& ctx, TF_Buffer& buf) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    TFE_ContextExportRunMetadata(tensorflow::InputTFE_Context(ctx), &buf,
+                                 status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TFE_ContextClearCaches", [](py::handle& o) {
+    TFE_ContextClearCaches(tensorflow::InputTFE_Context(o));
+  });
+  m.def("TFE_ContextGetDevicePlacementPolicy", [](py::handle& ctx) {
+    return TFE_ContextGetDevicePlacementPolicy(
+        tensorflow::InputTFE_Context(ctx));
+  });
+  m.def("TFE_ContextGetMirroringPolicy", [](py::handle& ctx) {
+    return TFE_ContextGetMirroringPolicy(tensorflow::InputTFE_Context(ctx));
+  });
+  m.def("TFE_ContextSetThreadLocalDevicePlacementPolicy",
+        [](py::handle& ctx, TFE_ContextDevicePlacementPolicy policy) {
+          TFE_ContextSetThreadLocalDevicePlacementPolicy(
+              tensorflow::InputTFE_Context(ctx), policy);
+        });
+  m.def("TFE_ContextSetThreadLocalMirroringPolicy",
+        [](py::handle& ctx, TFE_ContextMirroringPolicy policy) {
+          TFE_ContextSetThreadLocalMirroringPolicy(
+              tensorflow::InputTFE_Context(ctx), policy);
+        });
+  m.def("TFE_ContextSetServerDef", [](py::handle& ctx, int keep_alive_secs,
+                                      py::str proto) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    tensorflow::Safe_TF_BufferPtr buf =
+        tensorflow::make_safe(tensorflow::ProtoStringToTFBuffer(proto.ptr()));
+    TFE_ContextSetServerDef(tensorflow::InputTFE_Context(ctx), keep_alive_secs,
+                            buf.get()->data, buf.get()->length, status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TFE_ContextUpdateServerDef", [](py::handle& ctx, int keep_alive_secs,
+                                         py::str proto) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    tensorflow::Safe_TF_BufferPtr buf =
+        tensorflow::make_safe(tensorflow::ProtoStringToTFBuffer(proto.ptr()));
+    TFE_ContextUpdateServerDef(tensorflow::InputTFE_Context(ctx),
+                               keep_alive_secs, buf.get()->data,
+                               buf.get()->length, status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TFE_ContextCheckAlive", [](py::handle& ctx, const char* worker_name) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    bool output = TFE_ContextCheckAlive(tensorflow::InputTFE_Context(ctx),
+                                        worker_name, status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+    return output;
+  });
+
+  // TFE_Executor logic
+  m.def(
+      "TFE_NewExecutor",
+      [](const bool is_async) {
+        TFE_Executor* exc = TFE_NewExecutor(is_async);
+        return exc;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_DeleteExecutor", &TFE_DeleteExecutor);
+  m.def("TFE_ExecutorIsAsync", &TFE_ExecutorIsAsync);
+  m.def("TFE_ExecutorWaitForAllPendingNodes", [](TFE_Executor& exc) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    TFE_ExecutorWaitForAllPendingNodes(&exc, status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TFE_ExecutorClearError", &TFE_ExecutorClearError);
+  m.def("TFE_ContextSetExecutorForThread", [](py::handle& ctx,
+                                              TFE_Executor& exc) {
+    TFE_ContextSetExecutorForThread(tensorflow::InputTFE_Context(ctx), &exc);
+  });
+  m.def(
+      "TFE_ContextGetExecutorForThread",
+      [](py::handle& o) {
+        return TFE_ContextGetExecutorForThread(tensorflow::InputTFE_Context(o));
+      },
+      py::return_value_policy::reference);
+
+  // Profiler Logic
+  m.def("TFE_NewProfiler", &TFE_NewProfiler,
+        py::return_value_policy::reference);
+  m.def("TFE_ProfilerIsOk", &TFE_ProfilerIsOk);
+  m.def("TFE_DeleteProfiler", &TFE_DeleteProfiler);
+  m.def("TFE_ProfilerSerializeToString",
+        [](TFE_Profiler& profiler, TF_Buffer& buf) {
+          tensorflow::Safe_TF_StatusPtr status =
+              tensorflow::make_safe(TF_NewStatus());
+          TFE_ProfilerSerializeToString(&profiler, &buf, status.get());
+          tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        });
+  m.def("TFE_StartProfilerServer", &TFE_StartProfilerServer);
+  m.def(
+      "TFE_ProfilerClientStartTracing",
+      [](const char* service_addr, const char* logdir, const char* worker_list,
+         bool include_dataset_ops, int duration_ms, int num_tracing_attempts) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        bool output = TFE_ProfilerClientStartTracing(
+            service_addr, logdir, worker_list, include_dataset_ops, duration_ms,
+            num_tracing_attempts, status.get());
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      });
+  m.def("TFE_ProfilerClientMonitor",
+        [](const char* service_addr, int duration_ms, int monitoring_level,
+           bool display_timestamp, TF_Buffer& result) {
+          tensorflow::Safe_TF_StatusPtr status =
+              tensorflow::make_safe(TF_NewStatus());
+          TFE_ProfilerClientMonitor(service_addr, duration_ms, monitoring_level,
+                                    display_timestamp, &result, status.get());
+          tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        });
+
+  m.def("TFE_OpNameGetAttrType",
+        [](py::handle& ctx, const char* op_or_function_name,
+           const char* attr_name) {
+          int temp = 0;
+          unsigned char* is_list = reinterpret_cast<unsigned char*>(&temp);
+          tensorflow::Safe_TF_StatusPtr status =
+              tensorflow::make_safe(TF_NewStatus());
+          auto output = TFE_OpNameGetAttrType(tensorflow::InputTFE_Context(ctx),
+                                              op_or_function_name, attr_name,
+                                              is_list, status.get());
+          tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+#if PY_MAJOR_VERSION < 3
+          PyObject* output_pyo = PyInt_FromLong(output);
+#else
+          PyObject* output_pyo = PyLong_FromLong(output);
+#endif
+          if (*is_list == 1) {
+            PyObject* list = PyList_New(1);
+            PyList_SetItem(list, 0, output_pyo);
+            return tensorflow::pyo_or_throw(list);
+          }
+          return tensorflow::pyo_or_throw(output_pyo);
+        });
+  m.def("TFE_Py_InitEagerTensor", [](const py::handle& o) {
+    return tensorflow::pyo_or_throw(TFE_Py_InitEagerTensor(o.ptr()));
+  });
+  m.def("TFE_Py_SetEagerTensorProfiler", &TFE_Py_SetEagerTensorProfiler);
+  m.def("TFE_Py_RegisterJVPFunction", [](const py::handle& o) {
+    return tensorflow::pyo_or_throw(TFE_Py_RegisterJVPFunction(o.ptr()));
+  });
+  m.def("TFE_Py_RegisterGradientFunction", [](const py::handle& o) {
+    return tensorflow::pyo_or_throw(TFE_Py_RegisterGradientFunction(o.ptr()));
+  });
+  m.def("TFE_Py_Execute",
+        [](const py::handle& context, const char* device_name,
+           const char* op_name, const py::handle& inputs,
+           const py::handle& attrs, const py::handle& num_outputs) {
+          return tensorflow::TFE_Py_ExecuteCancelable_wrapper(
+              context, device_name, op_name, inputs, attrs.ptr(), nullptr,
+              num_outputs);
+        });
+  m.def(
+      "TFE_Py_ExecuteCancelable",
+      [](const py::handle& context, const char* device_name,
+         const char* op_name, const py::handle& inputs, const py::handle& attrs,
+         TFE_CancellationManager& cancellation_manager,
+         const py::handle& num_outputs) {
+        return tensorflow::TFE_Py_ExecuteCancelable_wrapper(
+            context, device_name, op_name, inputs, attrs.ptr(),
+            &cancellation_manager, num_outputs);
+      });
+  m.def("TFE_Py_FastPathExecute", [](const py::args args) {
+    // First argument is a PyObject which is unused.
+    // https://docs.python.org/3/c-api/structures.html#METH_VARARGS
+    // TFE_Py_FastPathExecute requires error checking prior to returning.
+    return tensorflow::pyo_or_throw(
+        TFE_Py_FastPathExecute_C(nullptr, args.ptr()));
+  });
+  m.def("TFE_Py_RecordGradient",
+        [](const py::handle& op_name, const py::handle& inputs,
+           const py::handle& attrs, const py::handle& results) {
+          return tensorflow::pyo_or_throw(TFE_Py_RecordGradient(
+              op_name.ptr(), inputs.ptr(), attrs.ptr(), results.ptr()));
+        });
+  m.def("TFE_Py_UID", []() { return tensorflow::pyo_or_throw(TFE_Py_UID()); });
+
+  // TFE_Py_Tape Logic
+  m.def("TFE_Py_TapeSetNew", [](const py::handle& persistent,
+                                const py::handle& watch_accessed_variables) {
+    return tensorflow::pyo_or_throw(
+        TFE_Py_TapeSetNew(persistent.ptr(), watch_accessed_variables.ptr()));
+  });
+  m.def("TFE_Py_TapeSetAdd",
+        [](const py::handle& tape) { TFE_Py_TapeSetAdd(tape.ptr()); });
+  m.def("TFE_Py_TapeSetRemove",
+        [](const py::handle& tape) { TFE_Py_TapeSetRemove(tape.ptr()); });
+  m.def("TFE_Py_TapeSetStopOnThread", &TFE_Py_TapeSetStopOnThread);
+  m.def("TFE_Py_TapeSetRestartOnThread", &TFE_Py_TapeSetRestartOnThread);
+  m.def("TFE_Py_TapeSetIsStopped",
+        []() { return tensorflow::pyo_or_throw(TFE_Py_TapeSetIsStopped()); });
+  m.def("TFE_Py_TapeSetIsEmpty",
+        []() { return tensorflow::pyo_or_throw(TFE_Py_TapeSetIsEmpty()); });
+  m.def("TFE_Py_TapeSetShouldRecordBackprop", [](const py::handle& tensors) {
+    return tensorflow::pyo_or_throw(
+        TFE_Py_TapeSetShouldRecordBackprop(tensors.ptr()));
+  });
+  m.def("TFE_Py_TapeSetPossibleGradientTypes", [](const py::handle& tensors) {
+    return tensorflow::pyo_or_throw(
+        TFE_Py_TapeSetPossibleGradientTypes(tensors.ptr()));
+  });
+  m.def("TFE_Py_TapeSetDeleteTrace", &TFE_Py_TapeSetDeleteTrace);
+  m.def("TFE_Py_TapeSetRecordOperation",
+        [](const py::handle& op_type, const py::handle& output_tensors,
+           const py::handle& input_tensors, const py::handle& backward_function,
+           const py::handle& forward_function) {
+          return tensorflow::pyo_or_throw(TFE_Py_TapeSetRecordOperation(
+              op_type.ptr(), output_tensors.ptr(), input_tensors.ptr(),
+              backward_function.ptr(), forward_function.ptr()));
+        });
+  m.def(
+      "TFE_Py_TapeSetRecordOperationBackprop",
+      [](const py::handle& op_type, const py::handle& output_tensors,
+         const py::handle& input_tensors, const py::handle& backward_function) {
+        return tensorflow::pyo_or_throw(TFE_Py_TapeSetRecordOperationBackprop(
+            op_type.ptr(), output_tensors.ptr(), input_tensors.ptr(),
+            backward_function.ptr()));
+      });
+  m.def("TFE_Py_TapeSetRecordOperationForwardprop",
+        [](const py::handle& op_type, const py::handle& output_tensors,
+           const py::handle& input_tensors, const py::handle& backward_function,
+           const py::handle& forwardprop_output_indices) {
+          return tensorflow::pyo_or_throw(
+              TFE_Py_TapeSetRecordOperationForwardprop(
+                  op_type.ptr(), output_tensors.ptr(), input_tensors.ptr(),
+                  backward_function.ptr(), forwardprop_output_indices.ptr()));
+        });
+  m.def("TFE_Py_TapeGradient",
+        [](const py::handle& tape, const py::handle& target,
+           const py::handle& sources, const py::handle& output_gradients,
+           const py::handle& sources_raw,
+           const py::handle& unconnected_gradients) {
+          tensorflow::Safe_TF_StatusPtr status =
+              tensorflow::make_safe(TF_NewStatus());
+          PyObject* output = TFE_Py_TapeGradient(
+              tape.ptr(), target.ptr(), sources.ptr(), output_gradients.ptr(),
+              sources_raw.ptr(), unconnected_gradients.ptr(), status.get());
+          tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+          return tensorflow::pyo_or_throw(output);
+        });
+
+  m.def("TFE_Py_TapeVariableAccessed", [](const py::handle& variable) {
+    TFE_Py_TapeVariableAccessed(variable.ptr());
+  });
+  m.def("TFE_Py_TapeWatch",
+        [](const py::handle& tape, const py::handle& tensor) {
+          TFE_Py_TapeWatch(tape.ptr(), tensor.ptr());
+        });
+  m.def("TFE_Py_TapeWatchVariable",
+        [](const py::handle& tape, const py::handle& variable) {
+          TFE_Py_TapeWatchVariable(tape.ptr(), variable.ptr());
+        });
+  m.def("TFE_Py_TapeWatchedVariables", [](const py::handle& tape) {
+    return tensorflow::pyo_or_throw(TFE_Py_TapeWatchedVariables(tape.ptr()));
+  });
+
+  m.def("TFE_Py_ForwardAccumulatorNew", []() {
+    return tensorflow::pyo_or_throw(TFE_Py_ForwardAccumulatorNew());
+  });
+
+  m.def("TFE_Py_ForwardAccumulatorSetAdd", [](const py::handle& accumulator) {
+    return tensorflow::pyo_or_throw(
+        TFE_Py_ForwardAccumulatorSetAdd(accumulator.ptr()));
+  });
+  m.def("TFE_Py_ForwardAccumulatorSetRemove",
+        [](const py::handle& accumulator) {
+          TFE_Py_ForwardAccumulatorSetRemove(accumulator.ptr());
+        });
+
+  m.def("TFE_Py_ForwardAccumulatorWatch",
+        [](const py::handle& accumulator, const py::handle& tensor,
+           const py::handle& tangent) {
+          TFE_Py_ForwardAccumulatorWatch(accumulator.ptr(), tensor.ptr(),
+                                         tangent.ptr());
+        });
+  m.def("TFE_Py_ForwardAccumulatorJVP",
+        [](const py::handle& accumulator, const py::handle& tensor) {
+          return tensorflow::pyo_or_throw(
+              TFE_Py_ForwardAccumulatorJVP(accumulator.ptr(), tensor.ptr()));
+        });
+  m.def("TFE_Py_ForwardAccumulatorPushState", []() {
+    return tensorflow::pyo_or_throw(TFE_Py_ForwardAccumulatorPushState());
+  });
+  m.def("TFE_Py_ForwardAccumulatorPopState", []() {
+    return tensorflow::pyo_or_throw(TFE_Py_ForwardAccumulatorPopState());
+  });
+  m.def("TFE_Py_PackJVPs", [](const py::handle& tensors) {
+    return tensorflow::pyo_or_throw(TFE_Py_PackJVPs(tensors.ptr()));
+  });
+
+  // TFE_ContextOptions Logic
+  m.def("TFE_NewContextOptions", &TFE_NewContextOptions,
+        py::return_value_policy::reference);
+  m.def("TFE_ContextOptionsSetConfig", [](TFE_ContextOptions* options,
+                                          py::str proto) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    tensorflow::Safe_TF_BufferPtr buf =
+        tensorflow::make_safe(tensorflow::ProtoStringToTFBuffer(proto.ptr()));
+    TFE_ContextOptionsSetConfig(options, buf.get()->data, buf.get()->length,
+                                status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TFE_ContextOptionsSetDevicePlacementPolicy",
+        &TFE_ContextOptionsSetDevicePlacementPolicy);
+  m.def("TFE_ContextOptionsSetLazyRemoteInputsCopy",
+        &TFE_ContextOptionsSetLazyRemoteInputsCopy);
+  m.def("TFE_ContextOptionsSetMirroringPolicy",
+        &TFE_ContextOptionsSetMirroringPolicy);
+  m.def("TFE_ContextOptionsSetAsync", &TFE_ContextOptionsSetAsync);
+  m.def("TFE_DeleteContextOptions", &TFE_DeleteContextOptions,
+        py::return_value_policy::reference);
+
+  // TFE_Py_TensorShape Logic
+  m.def("TFE_Py_TensorShapeSlice",
+        [](const py::handle& tensors, int slice_dim) {
+          return tensorflow::pyo_or_throw(
+              TFE_Py_TensorShapeSlice(tensors.ptr(), slice_dim));
+        });
+  m.def("TFE_Py_TensorShapeOnDevice", [](const py::handle& tensors,
+                                         int slice_dim) {
+    return tensorflow::pyo_or_throw(TFE_Py_TensorShapeOnDevice(tensors.ptr()));
+  });
+  m.def("TFE_Py_EnableInteractivePythonLogging",
+        &TFE_Py_EnableInteractivePythonLogging);
+
+  // Additional Context Logic
+  m.def("TFE_Py_SetEagerContext", [](const py::handle& o) {
+    return tensorflow::pyo_or_throw(TFE_Py_SetEagerContext(o.ptr()));
+  });
+  m.def("TFE_ContextStartStep", [](py::handle& o) {
+    TFE_ContextStartStep(tensorflow::InputTFE_Context(o.ptr()));
+  });
+  m.def("TFE_ContextEndStep", &TFE_ContextEndStep);
+  m.def("TFE_Py_RegisterVSpace", [](const py::handle& o) {
+    return tensorflow::pyo_or_throw(TFE_Py_RegisterVSpace(o.ptr()));
+  });
+  m.def("TFE_Py_EncodeArg",
+        [](const py::handle& o, bool include_tensor_ranks_only) {
+          return tensorflow::pyo_or_throw(
+              TFE_Py_EncodeArg(o.ptr(), include_tensor_ranks_only));
+        });
+  m.def("TFE_EnableCollectiveOps", [](const py::handle& ctx, py::str proto) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    tensorflow::Safe_TF_BufferPtr buf =
+        tensorflow::make_safe(tensorflow::ProtoStringToTFBuffer(proto.ptr()));
+    TFE_EnableCollectiveOps(tensorflow::InputTFE_Context(ctx), buf.get()->data,
+                            buf.get()->length, status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+  });
+  m.def("TF_ListPhysicalDevices", &tensorflow::TF_ListPhysicalDevices);
+  m.def("TF_DeleteDeviceList", &TF_DeleteDeviceList,
+        py::return_value_policy::reference);
+  m.def("TF_DeviceListCount", &TF_DeviceListCount);
+  m.def("TF_DeviceListName", [](const TF_DeviceList* list, int index) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    auto output = TF_DeviceListName(list, index, status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+    return output;
+  });
+  m.def("TF_DeviceListType", [](const TF_DeviceList* list, int index) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    auto output = TF_DeviceListType(list, index, status.get());
+    tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+    return output;
+  });
+
+  m.def("TF_PickUnusedPortOrDie", &TF_PickUnusedPortOrDie);
+
+  // TFE_MonitoringCounter Logic
+  m.def("TFE_MonitoringCounterCellIncrementBy",
+        &TFE_MonitoringCounterCellIncrementBy);
+  m.def("TFE_MonitoringCounterCellValue", &TFE_MonitoringCounterCellValue);
+  m.def(
+      "TFE_MonitoringNewCounter0",
+      [](const char* name, const char* description) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output =
+            TFE_MonitoringNewCounter0(name, status.get(), description);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteCounter0", &TFE_MonitoringDeleteCounter0,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellCounter0", &TFE_MonitoringGetCellCounter0,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewCounter1",
+      [](const char* name, const char* description, const char* label1) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output =
+            TFE_MonitoringNewCounter1(name, status.get(), description, label1);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteCounter1", &TFE_MonitoringDeleteCounter1,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellCounter1", &TFE_MonitoringGetCellCounter1,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewCounter2",
+      [](const char* name, const char* description, const char* label1,
+         const char* label2) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewCounter2(name, status.get(), description,
+                                                label1, label2);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteCounter2", &TFE_MonitoringDeleteCounter2,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellCounter2", &TFE_MonitoringGetCellCounter2,
+        py::return_value_policy::reference);
+
+  // TFE_MonitoringIntGauge Logic
+  m.def("TFE_MonitoringIntGaugeCellSet", &TFE_MonitoringIntGaugeCellSet);
+  m.def("TFE_MonitoringIntGaugeCellValue", &TFE_MonitoringIntGaugeCellValue);
+  m.def(
+      "TFE_MonitoringNewIntGauge0",
+      [](const char* name, const char* description) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output =
+            TFE_MonitoringNewIntGauge0(name, status.get(), description);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteIntGauge0", &TFE_MonitoringDeleteIntGauge0,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellIntGauge0", &TFE_MonitoringGetCellIntGauge0,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewIntGauge1",
+      [](const char* name, const char* description, const char* label1) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output =
+            TFE_MonitoringNewIntGauge1(name, status.get(), description, label1);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteIntGauge1", &TFE_MonitoringDeleteIntGauge1,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellIntGauge1", &TFE_MonitoringGetCellIntGauge1,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewIntGauge2",
+      [](const char* name, const char* description, const char* label1,
+         const char* label2) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewIntGauge2(name, status.get(),
+                                                 description, label1, label2);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteIntGauge2", &TFE_MonitoringDeleteIntGauge2,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellIntGauge2", &TFE_MonitoringGetCellIntGauge2,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringStringGaugeCellSet", &TFE_MonitoringStringGaugeCellSet);
+  m.def("TFE_MonitoringStringGaugeCellValue",
+        &TFE_MonitoringStringGaugeCellValue);
+  m.def(
+      "TFE_MonitoringNewStringGauge0",
+      [](const char* name, const char* description) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output =
+            TFE_MonitoringNewStringGauge0(name, status.get(), description);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+
+  // TFE_MonitoringStringGauge Logic
+  m.def("TFE_MonitoringDeleteStringGauge0", &TFE_MonitoringDeleteStringGauge0);
+  m.def("TFE_MonitoringGetCellStringGauge0", &TFE_MonitoringGetCellStringGauge0,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewStringGauge1",
+      [](const char* name, const char* description, const char* label1) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewStringGauge1(name, status.get(),
+                                                    description, label1);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteStringGauge1", &TFE_MonitoringDeleteStringGauge1);
+  m.def("TFE_MonitoringGetCellStringGauge1", &TFE_MonitoringGetCellStringGauge1,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewStringGauge2",
+      [](const char* name, const char* description, const char* label1,
+         const char* label2) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewStringGauge2(
+            name, status.get(), description, label1, label2);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteStringGauge2", &TFE_MonitoringDeleteStringGauge2);
+  m.def("TFE_MonitoringGetCellStringGauge2", &TFE_MonitoringGetCellStringGauge2,
+        py::return_value_policy::reference);
+
+  // TFE_MonitoringBoolGauge Logic
+  m.def("TFE_MonitoringBoolGaugeCellSet", &TFE_MonitoringBoolGaugeCellSet);
+  m.def("TFE_MonitoringBoolGaugeCellValue", &TFE_MonitoringBoolGaugeCellValue);
+  m.def(
+      "TFE_MonitoringNewBoolGauge0",
+      [](const char* name, const char* description) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output =
+            TFE_MonitoringNewBoolGauge0(name, status.get(), description);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteBoolGauge0", &TFE_MonitoringDeleteBoolGauge0,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellBoolGauge0", &TFE_MonitoringGetCellBoolGauge0,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewBoolGauge1",
+      [](const char* name, const char* description, const char* label1) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewBoolGauge1(name, status.get(),
+                                                  description, label1);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteBoolGauge1", &TFE_MonitoringDeleteBoolGauge1,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellBoolGauge1", &TFE_MonitoringGetCellBoolGauge1,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewBoolGauge2",
+      [](const char* name, const char* description, const char* label1,
+         const char* label2) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewBoolGauge2(name, status.get(),
+                                                  description, label1, label2);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteBoolGauge2", &TFE_MonitoringDeleteBoolGauge2,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellBoolGauge2", &TFE_MonitoringGetCellBoolGauge2,
+        py::return_value_policy::reference);
+
+  // TFE_MonitoringSampler Logic
+  m.def("TFE_MonitoringSamplerCellAdd", &TFE_MonitoringSamplerCellAdd);
+  m.def("TFE_MonitoringSamplerCellValue", &TFE_MonitoringSamplerCellValue);
+  m.def("TFE_MonitoringNewExponentialBuckets",
+        &TFE_MonitoringNewExponentialBuckets,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteBuckets", &TFE_MonitoringDeleteBuckets,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewSampler0",
+      [](const char* name, TFE_MonitoringBuckets* buckets,
+         const char* description) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output =
+            TFE_MonitoringNewSampler0(name, buckets, status.get(), description);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteSampler0", &TFE_MonitoringDeleteSampler0,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellSampler0", &TFE_MonitoringGetCellSampler0,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewSampler1",
+      [](const char* name, TFE_MonitoringBuckets* buckets,
+         const char* description, const char* label1) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewSampler1(name, buckets, status.get(),
+                                                description, label1);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteSampler1", &TFE_MonitoringDeleteSampler1,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellSampler1", &TFE_MonitoringGetCellSampler1,
+        py::return_value_policy::reference);
+  m.def(
+      "TFE_MonitoringNewSampler2",
+      [](const char* name, TFE_MonitoringBuckets* buckets,
+         const char* description, const char* label1, const char* label2) {
+        tensorflow::Safe_TF_StatusPtr status =
+            tensorflow::make_safe(TF_NewStatus());
+        auto output = TFE_MonitoringNewSampler2(name, buckets, status.get(),
+                                                description, label1, label2);
+        tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+        return output;
+      },
+      py::return_value_policy::reference);
+  m.def("TFE_MonitoringDeleteSampler2", &TFE_MonitoringDeleteSampler2,
+        py::return_value_policy::reference);
+  m.def("TFE_MonitoringGetCellSampler2", &TFE_MonitoringGetCellSampler2,
+        py::return_value_policy::reference);
+
+  // TFE_CancellationManager Logic
+  m.def("TFE_NewCancellationManager", &TFE_NewCancellationManager,
+        py::return_value_policy::reference);
+  m.def("TFE_CancellationManagerIsCancelled",
+        &TFE_CancellationManagerIsCancelled);
+  m.def("TFE_CancellationManagerStartCancel",
+        &TFE_CancellationManagerStartCancel);
+  m.def("TFE_DeleteCancellationManager", &TFE_DeleteCancellationManager,
+        py::return_value_policy::reference);
+
+  m.def("TFE_ClearScalarCache", &tensorflow::TFE_ClearScalarCache);
+
+  // Util buffer helper functions
+  m.def("TF_NewBufferFromString", &TF_NewBufferFromString,
+        py::return_value_policy::reference);
+  m.def("TF_NewBuffer", &TF_NewBuffer, py::return_value_policy::reference);
+  m.def("TF_GetBuffer", [](TF_Buffer* buf) {
+    return tensorflow::pyo_or_throw(PyBytes_FromStringAndSize(
+        reinterpret_cast<const char*>(buf->data), buf->length));
+  });
+  m.def("TF_DeleteBuffer", &TF_DeleteBuffer,
+        py::return_value_policy::reference);
+
+  // C API Enum
+
+  py::enum_<TFE_ContextDevicePlacementPolicy>(
+      m, "TFE_ContextDevicePlacementPolicy")
+      .value("TFE_DEVICE_PLACEMENT_EXPLICIT", TFE_DEVICE_PLACEMENT_EXPLICIT)
+      .value("TFE_DEVICE_PLACEMENT_WARN", TFE_DEVICE_PLACEMENT_WARN)
+      .value("TFE_DEVICE_PLACEMENT_SILENT", TFE_DEVICE_PLACEMENT_SILENT)
+      .value("TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32",
+             TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32)
+      .export_values();
+
+  py::enum_<TF_AttrType>(m, "TF_AttrType")
+      .value("TF_ATTR_STRING", TF_ATTR_STRING)
+      .value("TF_ATTR_INT", TF_ATTR_INT)
+      .value("TF_ATTR_FLOAT", TF_ATTR_FLOAT)
+      .value("TF_ATTR_BOOL", TF_ATTR_BOOL)
+      .value("TF_ATTR_TYPE", TF_ATTR_TYPE)
+      .value("TF_ATTR_SHAPE", TF_ATTR_SHAPE)
+      .value("TF_ATTR_TENSOR", TF_ATTR_TENSOR)
+      .value("TF_ATTR_PLACEHOLDER", TF_ATTR_PLACEHOLDER)
+      .value("TF_ATTR_FUNC", TF_ATTR_FUNC)
+      .export_values();
+
+  py::enum_<TFE_ContextMirroringPolicy>(m, "TFE_ContextMirroringPolicy")
+      .value("TFE_MIRRORING_NONE", TFE_MIRRORING_NONE)
+      .value("TFE_MIRRORING_ALL", TFE_MIRRORING_ALL)
+      .export_values();
+};
diff --git a/tensorflow/python/tools/api/generator/BUILD b/tensorflow/python/tools/api/generator/BUILD
index 664d368f279..bdf48716144 100644
--- a/tensorflow/python/tools/api/generator/BUILD
+++ b/tensorflow/python/tools/api/generator/BUILD
@@ -82,7 +82,6 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_pip",
-        "no_rocm",
     ],
     deps = [
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/tools/api/generator/api_gen.bzl b/tensorflow/python/tools/api/generator/api_gen.bzl
index ec5317d16f8..3c7f7ac3900 100644
--- a/tensorflow/python/tools/api/generator/api_gen.bzl
+++ b/tensorflow/python/tools/api/generator/api_gen.bzl
@@ -84,10 +84,10 @@ def gen_api_init_files(
     """
     root_init_template_flag = ""
     if root_init_template:
-        root_init_template_flag = "--root_init_template=$(location " + root_init_template + ")"
+        root_init_template_flag = "--root_init_template=" + root_init_template
 
     primary_package = packages[0]
-    api_gen_binary_target = ("create_" + primary_package + "_api_%d_%s") % (api_version, name)
+    api_gen_binary_target = ("create_" + primary_package + "_api_%s") % name
     native.py_binary(
         name = api_gen_binary_target,
         srcs = ["//tensorflow/python/tools/api/generator:create_python_api.py"],
diff --git a/tensorflow/python/tools/optimize_for_inference_lib.py b/tensorflow/python/tools/optimize_for_inference_lib.py
index 31769324e2a..28b653f3611 100644
--- a/tensorflow/python/tools/optimize_for_inference_lib.py
+++ b/tensorflow/python/tools/optimize_for_inference_lib.py
@@ -51,6 +51,7 @@ from __future__ import print_function
 import collections
 import math
 import re
+
 import numpy as np
 
 from tensorflow.core.framework import attr_value_pb2
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index 57ffc3f05c2..15ee6d1909e 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -31,12 +31,14 @@ import sys
 import warnings
 
 import numpy as np
-from six import integer_types
+import six
 
 from tensorflow.core.example import example_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.python.client import session
 from tensorflow.python.debug.wrappers import local_cli_wrapper
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function as defun
 from tensorflow.python.framework import meta_graph as meta_graph_lib
 from tensorflow.python.framework import ops as ops_lib
 from tensorflow.python.framework import tensor_spec
@@ -62,7 +64,7 @@ def _show_tag_sets(saved_model_dir):
   tag_sets = saved_model_utils.get_saved_model_tag_sets(saved_model_dir)
   print('The given SavedModel contains the following tag-sets:')
   for tag_set in sorted(tag_sets):
-    print(', '.join(sorted(tag_set)))
+    print('%r' % ', '.join(sorted(tag_set)))
 
 
 def _show_signature_def_map_keys(saved_model_dir, tag_set):
@@ -182,14 +184,25 @@ def _show_defined_functions(saved_model_dir):
   functions = sorted(functions.items(), key=lambda x: x[0])
   for name, function in functions:
     print('\n  Function Name: \'%s\'' % name)
-    concrete_functions = \
-        function._list_all_concrete_functions_for_serialization()  # pylint: disable=protected-access
+    concrete_functions = []
+    if isinstance(function, defun.ConcreteFunction):
+      concrete_functions.append(function)
+    if isinstance(function, def_function.Function):
+      concrete_functions.extend(
+          function._list_all_concrete_functions_for_serialization())  # pylint: disable=protected-access
     concrete_functions = sorted(concrete_functions, key=lambda x: x.name)
     for index, concrete_function in enumerate(concrete_functions, 1):
-      args, kwargs = concrete_function.structured_input_signature
-      print('    Option #%d' % index)
-      print('      Callable with:')
-      _print_args(args, indent=4)
+      args, kwargs = None, None
+      if concrete_function.structured_input_signature:
+        args, kwargs = concrete_function.structured_input_signature
+      elif concrete_function._arg_keywords:  # pylint: disable=protected-access
+        # For pure ConcreteFunctions we might have nothing better than
+        # _arg_keywords.
+        args = concrete_function._arg_keywords  # pylint: disable=protected-access
+      if args:
+        print('    Option #%d' % index)
+        print('      Callable with:')
+        _print_args(args, indent=4)
       if kwargs:
         _print_args(kwargs, 'Named Argument', indent=4)
 
@@ -215,7 +228,9 @@ def _print_args(arguments, argument_type='Argument', indent=0):
   for index, element in enumerate(arguments, 1):
     if indent == 4:
       in_print('%s #%d' % (argument_type, index))
-    if isinstance(element, tensor_spec.TensorSpec):
+    if isinstance(element, six.string_types):
+      in_print('  %s' % element)
+    elif isinstance(element, tensor_spec.TensorSpec):
       print((indent + 1) * '  ' + '%s: %s' % (element.name, repr(element)))
     elif (isinstance(element, collections.Iterable) and
           not isinstance(element, dict)):
@@ -567,7 +582,7 @@ def _create_example_string(example_dict):
     elif isinstance(feature_list[0], str):
       example.features.feature[feature_name].bytes_list.value.extend(
           feature_list)
-    elif isinstance(feature_list[0], integer_types):
+    elif isinstance(feature_list[0], six.integer_types):
       example.features.feature[feature_name].int64_list.value.extend(
           feature_list)
     else:
diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py
index 671b73a1ccd..74acbf82d56 100644
--- a/tensorflow/python/tools/saved_model_cli_test.py
+++ b/tensorflow/python/tools/saved_model_cli_test.py
@@ -148,7 +148,7 @@ signature_def['serving_default']:
     self.assertMultiLineEqual(output, exp_out)
     self.assertEqual(err.getvalue().strip(), '')
 
-  def testShowAllWithConcreteFunctions(self):
+  def testShowAllWithFunctions(self):
 
     class DummyModel(tracking.AutoTrackable):
       """Model with callable polymorphic functions specified."""
@@ -237,6 +237,73 @@ Defined Functions:
     self.assertMultiLineEqual(output, exp_out)
     self.assertEqual(err.getvalue().strip(), '')
 
+  def testShowAllWithPureConcreteFunction(self):
+
+    class DummyModel(tracking.AutoTrackable):
+      """Model with a callable concrete function."""
+
+      def __init__(self):
+        function = def_function.function(
+            self.multiply,
+            input_signature=[
+                tensor_spec.TensorSpec(shape=(), dtype=dtypes.float32),
+                tensor_spec.TensorSpec(shape=(), dtype=dtypes.float32)
+            ])
+        self.pure_concrete_function = function.get_concrete_function()
+        super(DummyModel, self).__init__()
+
+      def multiply(self, a, b):
+        return a * b
+
+    saved_model_dir = os.path.join(test.get_temp_dir(), 'dummy_model')
+    dummy_model = DummyModel()
+    save.save(dummy_model, saved_model_dir)
+    self.parser = saved_model_cli.create_parser()
+    args = self.parser.parse_args(['show', '--dir', saved_model_dir, '--all'])
+    with captured_output() as (out, err):
+      saved_model_cli.show(args)
+    output = out.getvalue().strip()
+    exp_out = """MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
+
+signature_def['__saved_model_init_op']:
+  The given SavedModel SignatureDef contains the following input(s):
+  The given SavedModel SignatureDef contains the following output(s):
+    outputs['__saved_model_init_op'] tensor_info:
+        dtype: DT_INVALID
+        shape: unknown_rank
+        name: NoOp
+  Method name is: 
+
+signature_def['serving_default']:
+  The given SavedModel SignatureDef contains the following input(s):
+    inputs['a'] tensor_info:
+        dtype: DT_FLOAT
+        shape: ()
+        name: serving_default_a:0
+    inputs['b'] tensor_info:
+        dtype: DT_FLOAT
+        shape: ()
+        name: serving_default_b:0
+  The given SavedModel SignatureDef contains the following output(s):
+    outputs['output_0'] tensor_info:
+        dtype: DT_FLOAT
+        shape: ()
+        name: PartitionedCall:0
+  Method name is: tensorflow/serving/predict
+
+Defined Functions:
+  Function Name: 'pure_concrete_function'
+    Option #1
+      Callable with:
+        Argument #1
+          a: TensorSpec(shape=(), dtype=tf.float32, name='a')
+        Argument #2
+          b: TensorSpec(shape=(), dtype=tf.float32, name='b')
+""".strip()  # pylint: enable=line-too-long
+    self.maxDiff = None  # Produce a useful error msg if the comparison fails
+    self.assertMultiLineEqual(output, exp_out)
+    self.assertEqual(err.getvalue().strip(), '')
+
   def testShowCommandTags(self):
     base_path = test.test_src_dir_path(SAVED_MODEL_PATH)
     self.parser = saved_model_cli.create_parser()
@@ -244,7 +311,7 @@ Defined Functions:
     with captured_output() as (out, err):
       saved_model_cli.show(args)
     output = out.getvalue().strip()
-    exp_out = 'The given SavedModel contains the following tag-sets:\nserve'
+    exp_out = 'The given SavedModel contains the following tag-sets:\n\'serve\''
     self.assertMultiLineEqual(output, exp_out)
     self.assertEqual(err.getvalue().strip(), '')
 
diff --git a/tensorflow/python/tools/saved_model_utils.py b/tensorflow/python/tools/saved_model_utils.py
index 17c4b8cb831..c5c168c1d27 100644
--- a/tensorflow/python/tools/saved_model_utils.py
+++ b/tensorflow/python/tools/saved_model_utils.py
@@ -83,7 +83,8 @@ def get_saved_model_tag_sets(saved_model_dir):
     saved_model_dir: Directory containing the SavedModel.
 
   Returns:
-    String representation of all tag-sets in the SavedModel.
+    List of all tag-sets in the SavedModel, where a tag-set is represented as a
+    list of strings.
   """
   saved_model = read_saved_model(saved_model_dir)
   all_tags = []
@@ -98,10 +99,11 @@ def get_meta_graph_def(saved_model_dir, tag_set):
   Returns the MetaGraphDef for the given tag-set and SavedModel directory.
 
   Args:
-    saved_model_dir: Directory containing the SavedModel to inspect or execute.
+    saved_model_dir: Directory containing the SavedModel to inspect.
     tag_set: Group of tag(s) of the MetaGraphDef to load, in string format,
-        separated by ','. For tag-set contains multiple tags, all tags must be
-        passed in.
+        separated by ','. The empty string tag is ignored so that passing ''
+        means the empty tag set. For tag-set contains multiple tags, all tags
+        must be passed in.
 
   Raises:
     RuntimeError: An error when the given tag-set does not exist in the
@@ -111,10 +113,11 @@ def get_meta_graph_def(saved_model_dir, tag_set):
     A MetaGraphDef corresponding to the tag-set.
   """
   saved_model = read_saved_model(saved_model_dir)
-  set_of_tags = set(tag_set.split(','))
+  # Note: Discard empty tags so that "" can mean the empty tag set.
+  set_of_tags = set([tag for tag in tag_set.split(",") if tag])
   for meta_graph_def in saved_model.meta_graphs:
     if set(meta_graph_def.meta_info_def.tags) == set_of_tags:
       return meta_graph_def
 
-  raise RuntimeError('MetaGraphDef associated with tag-set ' + tag_set +
-                     ' could not be found in SavedModel')
+  raise RuntimeError("MetaGraphDef associated with tag-set %r could not be"
+                     " found in SavedModel" % tag_set)
diff --git a/tensorflow/python/tools/selective_registration_header_lib.py b/tensorflow/python/tools/selective_registration_header_lib.py
index 879ce6cbf00..da34da594e3 100644
--- a/tensorflow/python/tools/selective_registration_header_lib.py
+++ b/tensorflow/python/tools/selective_registration_header_lib.py
@@ -96,7 +96,7 @@ def get_header_from_ops_and_kernels(ops_and_kernels,
   Returns:
     the string of the header that should be written as ops_to_register.h.
   """
-  ops = set([op for op, _ in ops_and_kernels])
+  ops = set(op for op, _ in ops_and_kernels)
   result_list = []
 
   def append(s):
diff --git a/tensorflow/python/tpu/BUILD b/tensorflow/python/tpu/BUILD
index 82dbc04de0b..cf32d933e0c 100644
--- a/tensorflow/python/tpu/BUILD
+++ b/tensorflow/python/tpu/BUILD
@@ -437,6 +437,7 @@ tf_py_test(
 tf_proto_library(
     name = "tensor_tracer_proto",
     srcs = ["tensor_tracer.proto"],
+    cc_api_version = 2,
     protodeps = [
         "//tensorflow/core:protos_all",
     ],
diff --git a/tensorflow/python/tpu/async_checkpoint.py b/tensorflow/python/tpu/async_checkpoint.py
index d42c2438fa6..a32cdc5d75b 100644
--- a/tensorflow/python/tpu/async_checkpoint.py
+++ b/tensorflow/python/tpu/async_checkpoint.py
@@ -26,7 +26,7 @@ import os
 import threading
 import time
 
-from tensorflow.core.util.event_pb2 import SessionLog
+from tensorflow.core.util import event_pb2
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging as logging
@@ -64,14 +64,18 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
       ValueError: One of `save_steps` or `save_secs` should be set.
       ValueError: At most one of `saver` or `scaffold` should be set.
     """
-    logging.info("Create AsyncCheckpointSaverHook.")
+    save_path = os.path.join(checkpoint_dir, checkpoint_basename)
+    logging.info("Create AsyncCheckpointSaverHook saving to path\n%s",
+                 save_path)
+    if listeners:
+      logging.info(" with %d listener(s).", len(listeners))
     if saver is not None and scaffold is not None:
       raise ValueError("You cannot provide both saver and scaffold.")
     self._saver = saver
     self._save_thread = None
     self._write_graph_thread = None
     self._checkpoint_dir = checkpoint_dir
-    self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
+    self._save_path = save_path
     self._scaffold = scaffold
     self._timer = basic_session_run_hooks.SecondOrStepTimer(
         every_secs=save_secs, every_steps=save_steps)
@@ -158,9 +162,9 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
 
       self._get_saver().save(session, self._save_path, global_step=step)
       self._summary_writer.add_session_log(
-          SessionLog(
-              status=SessionLog.CHECKPOINT, checkpoint_path=self._save_path),
-          step)
+          event_pb2.SessionLog(
+              status=event_pb2.SessionLog.CHECKPOINT,
+              checkpoint_path=self._save_path), step)
 
       for l in self._listeners:
         l.after_save(session, step)
diff --git a/tensorflow/python/tpu/async_checkpoint_test.py b/tensorflow/python/tpu/async_checkpoint_test.py
index 3506e099e66..ed558be19b1 100644
--- a/tensorflow/python/tpu/async_checkpoint_test.py
+++ b/tensorflow/python/tpu/async_checkpoint_test.py
@@ -145,6 +145,51 @@ class AsyncCheckpointingTest(test.TestCase):
     mock_listener.before_save.assert_called()
     mock_listener.after_save.assert_called()
 
+  def testAsyncCheckpointHookWithoutListeners(self):
+    resolver = tpu_cluster_resolver.TPUClusterResolver(
+        tpu=FLAGS.tpu, zone=FLAGS.zone, project=FLAGS.project)
+
+    checkpoint_interval = 5
+    keep_checkpoint_max = 10
+    config = tpu_config.RunConfig(
+        master=resolver.master(),
+        model_dir=os.path.join(FLAGS.model_dir, 'runconfig'),
+        save_checkpoints_steps=1000,
+        keep_checkpoint_max=keep_checkpoint_max+1,  # off by one
+        tpu_config=tpu_config.TPUConfig(
+            iterations_per_loop=checkpoint_interval,))
+
+    estimator = tpu_estimator.TPUEstimator(
+        use_tpu=True,
+        model_fn=model_fn,
+        config=config,
+        train_batch_size=32,
+        eval_batch_size=32,
+        predict_batch_size=1,
+        params={},
+    )
+
+    max_steps = 100
+    estimator.train(
+        input_fn=input_fn,
+        max_steps=max_steps,
+        hooks=[
+            async_checkpoint.AsyncCheckpointSaverHook(
+                FLAGS.model_dir,
+                save_steps=checkpoint_interval)
+        ])
+
+    current_step = estimator_lib._load_global_step_from_checkpoint_dir(
+        FLAGS.model_dir)  # pylint: disable=protected-access
+
+    # TODO(power) -- identify a better way to count the number of checkpoints.
+    checkpoints = file_io.get_matching_files(
+        FLAGS.model_dir + '/model.ckpt*.meta')
+    checkpoint_count = len(checkpoints)
+    logging.info('Found %d checkpoints: %s', checkpoint_count, checkpoints)
+    self.assertLessEqual(checkpoint_count, keep_checkpoint_max)
+    self.assertEqual(current_step, max_steps)
+
 
 if __name__ == '__main__':
   v2_compat.disable_v2_behavior()
diff --git a/tensorflow/python/tpu/client/BUILD b/tensorflow/python/tpu/client/BUILD
index 3201c7fccb0..cfaf1657e0f 100644
--- a/tensorflow/python/tpu/client/BUILD
+++ b/tensorflow/python/tpu/client/BUILD
@@ -49,20 +49,3 @@ tf_py_test(
         "//tensorflow/python:training_server_lib",
     ],
 )
-
-tf_py_test(
-    name = "client_py2_test",
-    size = "small",
-    srcs = ["client_test.py"],
-    grpc_enabled = True,
-    main = "client_test.py",
-    python_version = "PY2",
-    deps = [
-        ":client",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:training_server_lib",
-    ],
-)
diff --git a/tensorflow/python/tpu/device_assignment.py b/tensorflow/python/tpu/device_assignment.py
index 71004b1cef7..16081f100cf 100644
--- a/tensorflow/python/tpu/device_assignment.py
+++ b/tensorflow/python/tpu/device_assignment.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import math
+
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
diff --git a/tensorflow/python/tpu/profiler/capture_tpu_profile.py b/tensorflow/python/tpu/profiler/capture_tpu_profile.py
index 7dcd3a67f15..f0d22027e4e 100644
--- a/tensorflow/python/tpu/profiler/capture_tpu_profile.py
+++ b/tensorflow/python/tpu/profiler/capture_tpu_profile.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 import os
 import sys
+
 from absl import app
 from absl import flags
 from distutils.version import LooseVersion
diff --git a/tensorflow/python/tpu/tensor_tracer_report.py b/tensorflow/python/tpu/tensor_tracer_report.py
index 1ac696418d7..e8a122d981f 100644
--- a/tensorflow/python/tpu/tensor_tracer_report.py
+++ b/tensorflow/python/tpu/tensor_tracer_report.py
@@ -111,7 +111,7 @@ def topological_sort(g):
       if op_in_degree[consumer] < 0:
         raise ValueError('consumer:%s degree mismatch'%consumer.name)
 
-  left_ops = set([op for (op, degree) in op_in_degree.items() if degree > 0])
+  left_ops = set(op for (op, degree) in op_in_degree.items() if degree > 0)
   if left_ops:
     return (True, left_ops)
   else:
diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py
index 2489c5aa091..43896da4e01 100644
--- a/tensorflow/python/tpu/tpu.py
+++ b/tensorflow/python/tpu/tpu.py
@@ -259,11 +259,7 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
     self._pivot = pivot
     self._replicated_vars = {}
 
-  def get_replicated_var_handle(self,
-                                name,
-                                vars_,
-                                device_map=None,
-                                is_mirrored=False):
+  def get_replicated_var_handle(self, name, vars_, is_mirrored=False):
     """Returns a variable handle for replicated TPU variable 'var'.
 
     This is a method used by an experimental replicated variable implementation
@@ -272,8 +268,6 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
     Args:
       name: The common name of the variable.
       vars_: The replicated TPU variables.
-      device_map: The DeviceMap used to create the variables if it is a
-        TPUMirroredVariable.
       is_mirrored: Whether the variables are mirrored, which guarantees the
         values in each replica are always the same.
 
@@ -287,15 +281,20 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
     if handle is not None:
       return handle
 
-    replicated_vars = []
-    if device_assignment is not None and device_map is not None:
-      job_name = pydev.DeviceSpec.from_string(device_map.all_devices[0]).job
+    if device_assignment is not None:
+      job_name = pydev.DeviceSpec.from_string(vars_[0].device).job
+
+      tpu_devices = set()
       for replica_id in range(device_assignment.num_replicas):
-        tpu_device = device_assignment.tpu_device(
-            replica=replica_id, logical_core=0, job=job_name)
-        tpu_device = device_util.canonicalize(tpu_device)
-        replica = device_map.replica_for_device(tpu_device)
-        replicated_vars.append(vars_[replica])
+        for logical_core in range(device_assignment.num_cores_per_replica):
+          tpu_devices.add(
+              device_util.canonicalize(
+                  device_assignment.tpu_device(
+                      replica=replica_id,
+                      logical_core=logical_core,
+                      job=job_name)))
+
+      replicated_vars = [v for v in vars_ if v.device in tpu_devices]
     else:
       replicated_vars = vars_
 
@@ -321,8 +320,8 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
 
   def report_unsupported_operations(self):
     if self._unsupported_ops:
-      op_str = "\n".join(["  %s (%s)" % (op.type, op.name)
-                          for op in self._unsupported_ops[:_MAX_WARNING_LINES]])
+      op_str = "\n".join("  %s (%s)" % (op.type, op.name)
+                         for op in self._unsupported_ops[:_MAX_WARNING_LINES])
       logging.warning("%d unsupported operations found: \n%s",
                       len(self._unsupported_ops), op_str)
       if len(self._unsupported_ops) > _MAX_WARNING_LINES:
@@ -1200,7 +1199,7 @@ def split_compile_and_replicate(computation,
 
   if host_compute_core:
     attr_value = attr_value_pb2.AttrValue()
-    attr_value.list.s.extend([compat.as_bytes(x) for x in host_compute_core])
+    attr_value.list.s.extend(compat.as_bytes(x) for x in host_compute_core)
     metadata._set_attr("host_compute_core", attr_value)  # pylint: disable=protected-access
 
   with ops.control_dependencies([metadata]):
@@ -1727,20 +1726,22 @@ def under_tpu_inference_context():
 class _TPUInferenceContext(control_flow_ops.XLAControlFlowContext):
   """A `ControlFlowContext` for nodes inside a TPU inference computation.
 
-  The primary role of `TPUReplicateContext` is to sanity check operators inside
-  a tpu.rewrite_for_inference() computation.
+  The primary role of `_TPUInferenceContext` is to indicate the mode of
+  operation and possibly sanity check operators inside a
+  tpu.rewrite_for_inference() computation.
   """
 
-  def __init__(self, name):
+  def __init__(self, name, check_ops=True):
     super(_TPUInferenceContext, self).__init__()
     self._name = name
+    self._check_ops = check_ops
 
   def AddOp(self, op):
     self._AddOpInternal(op)
 
   def _AddOpInternal(self, op):
     # pylint: disable=protected-access
-    if op.type in _BLACKLISTED_INFERENCE_OPS:
+    if self._check_ops and op.type in _BLACKLISTED_INFERENCE_OPS:
       raise NotImplementedError(
           "Operation of type %s (%s) is not supported on the TPU for inference."
           " Execution will fail if this op is used in the graph. Make sure your"
diff --git a/tensorflow/python/tpu/tpu_embedding.py b/tensorflow/python/tpu/tpu_embedding.py
index 130f4d1fe2f..1e477e6598e 100644
--- a/tensorflow/python/tpu/tpu_embedding.py
+++ b/tensorflow/python/tpu/tpu_embedding.py
@@ -22,6 +22,7 @@ import collections
 import copy
 import math
 import re
+
 import six
 
 from tensorflow.core.protobuf.tpu import optimization_parameters_pb2
@@ -1010,17 +1011,11 @@ class TPUEmbedding(object):
   def _generate_enqueue_op(self, enqueue_datas, device_ordinal):
     enqueue_data0 = list(enqueue_datas.values())[0]
     with ops.colocate_with(enqueue_data0.embedding_indices):
-      (sample_indices_list, embedding_indices_list, aggregation_weights_list,
-       table_ids, max_sequence_lengths) = (
-           self._format_for_tpu_embedding_sparse_tensor_batch(enqueue_datas))
       return tpu_ops.enqueue_tpu_embedding_sparse_tensor_batch(
-          sample_indices_list,
-          embedding_indices_list,
-          aggregation_weights_list,
-          table_ids,
           device_ordinal=device_ordinal,
           combiners=self._combiners,
-          max_sequence_lengths=max_sequence_lengths)
+          **self._format_for_tpu_embedding_sparse_tensor_batch(enqueue_datas)
+      )
 
   def _format_for_tpu_embedding_sparse_tensor_batch(self, enqueue_datas):
     """Format sparse features for `enqueue_tpu_embedding_sparse_tensor_batch()`.
@@ -1030,36 +1025,37 @@ class TPUEmbedding(object):
       dense.
 
     Returns:
-      Arguments for `enqueue_tpu_embedding_sparse_tensor_batch()`.
+      Dict of arguments for `enqueue_tpu_embedding_sparse_tensor_batch()`.
     """
-
-    (sample_indices_list, embedding_indices_list, aggregation_weights_list,
-     table_ids, max_sequence_lengths) = [], [], [], [], []
+    kwargs = {
+        'sample_indices': [],
+        'embedding_indices': [],
+        'aggregation_weights': [],
+        'table_ids': [],
+        'max_sequence_lengths': [],
+    }
     for table_id, table in enumerate(self._table_to_features_dict):
       features = self._table_to_features_dict[table]
       for feature in features:
         enqueue_data = enqueue_datas[feature]
 
-        sample_indices = (
+        kwargs['sample_indices'].append(
             enqueue_data.sample_indices
             if enqueue_data.sample_indices is not None else array_ops.zeros(
                 (0,), dtype=dtypes.int64))
-        sample_indices_list.append(sample_indices)
 
-        aggregation_weights = (
+        kwargs['aggregation_weights'].append(
             enqueue_data.aggregation_weights if
             enqueue_data.aggregation_weights is not None else array_ops.zeros(
                 (0,), dtype=dtypes.float32))
-        aggregation_weights_list.append(aggregation_weights)
 
-        embedding_indices_list.append(enqueue_data.embedding_indices)
+        kwargs['embedding_indices'].append(enqueue_data.embedding_indices)
 
-        table_ids.append(table_id)
-        max_sequence_lengths.append(
+        kwargs['table_ids'].append(table_id)
+        kwargs['max_sequence_lengths'].append(
             self._feature_to_config_dict[feature].max_sequence_length)
 
-    return (sample_indices_list, embedding_indices_list,
-            aggregation_weights_list, table_ids, max_sequence_lengths)
+    return kwargs
 
   def get_activations(self):
     """Get activations for features.
diff --git a/tensorflow/python/tpu/tpu_system_metadata.py b/tensorflow/python/tpu/tpu_system_metadata.py
index 1998e0e0aeb..e7f9b79bbd3 100644
--- a/tensorflow/python/tpu/tpu_system_metadata.py
+++ b/tensorflow/python/tpu/tpu_system_metadata.py
@@ -199,7 +199,7 @@ def master_job(master, cluster_def):
 
   if (not cluster_def or not cluster_def.job):
     return _DEFAULT_JOB_NAME
-  job_names = set([job.name for job in cluster_def.job])
+  job_names = set(job.name for job in cluster_def.job)
   if _DEFAULT_JOB_NAME in job_names:
     # b/37868888 tracks allowing ClusterSpec propagation to reuse job names.
     raise ValueError('Currently, tpu_worker is not an allowed job name.')
diff --git a/tensorflow/python/training/checkpoint_management.py b/tensorflow/python/training/checkpoint_management.py
index 5e487833879..338b5fea5f6 100644
--- a/tensorflow/python/training/checkpoint_management.py
+++ b/tensorflow/python/training/checkpoint_management.py
@@ -577,7 +577,7 @@ class CheckpointManager(object):
     self._save_counter_assign = None
     if max_to_keep is not None and max_to_keep <= 0:
       raise ValueError(
-          ("Expected a positive integer or `None` for `max_to_max_to_keep`, "
+          ("Expected a positive integer or `None` for `max_to_keep`, "
            "got %d.")
           % (max_to_keep,))
     self._max_to_keep = max_to_keep
diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py
index a0fd2dc6bae..5a6a66f1312 100644
--- a/tensorflow/python/training/checkpoint_ops_test.py
+++ b/tensorflow/python/training/checkpoint_ops_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py
index 0a7f4d6e5f7..1c249035980 100644
--- a/tensorflow/python/training/checkpoint_utils.py
+++ b/tensorflow/python/training/checkpoint_utils.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import time
+
 import six
 
 from tensorflow.python.distribute import distribution_strategy_context
@@ -330,7 +331,7 @@ def _init_from_checkpoint(ckpt_dir_or_file, assignment_map):
               ))
         var_name = var.name
       else:
-        var_name = ",".join([v.name for v in var])
+        var_name = ",".join(v.name for v in var)
       _set_variable_or_list_initializer(var, ckpt_file, tensor_name_in_ckpt)
       logging.debug("Initialize variable %s from checkpoint %s with %s",
                     var_name, ckpt_dir_or_file, tensor_name_in_ckpt)
diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py
index 59972e6b230..caba001a08b 100644
--- a/tensorflow/python/training/checkpoint_utils_test.py
+++ b/tensorflow/python/training/checkpoint_utils_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 import time
+
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
index 0afe4c78caf..583b5562e74 100644
--- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
+++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import backprop
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.unconnected_gradients import UnconnectedGradients
@@ -27,7 +29,12 @@ from tensorflow.python.training.experimental import loss_scale as loss_scale_mod
 from tensorflow.python.util import nest
 
 
-# TODO(reedwm): Expose this. Currently it doesn't work with DistributionStrategy
+def _convert_to_per_replica(distribution, value):
+  """Converts a tensor or a DistributedVariable to a PerReplica value."""
+  return distribution.experimental_run_v2(array_ops.identity, args=(value,))
+
+
+# TODO(reedwm): Expose this after testing it on several models.
 class LossScaleGradientTape(backprop.GradientTape):
   """A gradient tape that scales losses and unscales resulting gradients.
 
@@ -94,7 +101,10 @@ class LossScaleGradientTape(backprop.GradientTape):
         request gradients from.
     """
     if not isinstance(loss_scale, loss_scale_module.LossScale):
-      raise ValueError("`loss_scale` must be an instance of LossScale.")
+      raise ValueError("`loss_scale` must be an instance of LossScale, "
+                       "but got: %s" % (loss_scale,))
+    if not ops.executing_eagerly_outside_functions():
+      raise ValueError("LossScaleGradientTape is only supported in Eager mode.")
 
     # always make a persistent tape to loop over loss scaling
     super(LossScaleGradientTape, self).__init__(True,
@@ -158,6 +168,26 @@ class LossScaleGradientTape(backprop.GradientTape):
       self._tape = None  # free up resources if a persistent tape was not needed
     return grads
 
+  def jacobian(self,
+               target,
+               sources,
+               unconnected_gradients=UnconnectedGradients.NONE,
+               parallel_iterations=None,
+               experimental_use_pfor=True):
+    # TODO(reedwm): Implement this
+    raise NotImplementedError("LossScaleGradientTape.jacobian is not "
+                              "yet implemented")
+
+  def batch_jacobian(self,
+                     target,
+                     source,
+                     unconnected_gradients=UnconnectedGradients.NONE,
+                     parallel_iterations=None,
+                     experimental_use_pfor=True):
+    # TODO(reedwm): Implement this
+    raise NotImplementedError("LossScaleGradientTape.batch_jacobian is not "
+                              "yet implemented")
+
 
 def _compute_gradients_until_finite(
     distribution, loss_scale_gradient_tapes, loss_scale, target, sources,
@@ -192,39 +222,79 @@ def _compute_gradients_until_finite(
   # TODO(b/143572314): Fix Autograph so that it can convert this function, then
   # replace the tf.while_loop with a Python while loop.
 
-  def cond(grads, ready_to_update):
+  # For convenience, we only deal with flattened sources
+  flattened_sources = nest.flatten(sources)
+
+  # Define the initial loop variables of the while loop.
+
+  # Dummy value for initial_grads. The first iteration of the loop will
+  # overwrite `grads` to the actual gradients.
+  initial_grads = flattened_sources
+  if distribution_strategy_context.has_strategy():
+    # A while_loop requires the initial values to have the same types as the
+    # return values from the body. However, 'initial_grads' may have type
+    # 'DistributionVariable', while body returns a 'PerReplica'. While both
+    # types subclass 'DistributedValues', while_loop will still throw an error.
+    # So we convert 'initial_grads' to be PerReplica values.
+    # TODO(b/146084534): Once the bug is fixed, remove this special case.
+    initial_grads = [_convert_to_per_replica(distribution, g)
+                     for g in initial_grads]
+  initial_ready_to_update = False
+  initial_is_first_iteration = True
+
+  def cond(grads, ready_to_update, is_first_iteration):
     """The condition of the while loop."""
     del grads
-    # Equivalent to: `not ready_to_update and loss_scale() > 1`
-    return math_ops.logical_and(math_ops.logical_not(ready_to_update),
-                                math_ops.greater(loss_scale(), 1))
+    # Equivalent to:
+    # `is_first_iteration or (not ready_to_update and loss_scale() > 1)`
+    return math_ops.logical_or(
+        is_first_iteration,
+        math_ops.logical_and(
+            math_ops.logical_not(ready_to_update),
+            math_ops.greater(loss_scale(), 1)))
 
-  def body(grads, ready_to_update):
+  # Boolean list specifying whether each gradient is None or not. Set by body().
+  is_nones = []
+
+  def body(grads, ready_to_update, is_first_iteration):
     """The body of the while loop."""
-    del grads, ready_to_update
-    def replica_fn(gradient_tape, target, sources, output_gradients):
+    del grads, ready_to_update, is_first_iteration
+    def replica_fn(gradient_tape, target, flattened_sources, output_gradients):
       """Scales the loss, computes the gradients, and unscales the gradients."""
       loss_scale_val = loss_scale()
       with gradient_tape:  # re-enter gradient tape so it sees the loss scaling
-        scaled_target = nest.map_structure(lambda t: t * loss_scale_val, target)
-      old_grads = super(LossScaleGradientTape, gradient_tape).gradient(
-          scaled_target, sources, output_gradients, unconnected_gradients)
+        scaled_target = nest.map_structure(
+            lambda t: t * math_ops.cast(loss_scale_val, t.dtype), target)
+      scaled_grads = super(LossScaleGradientTape, gradient_tape).gradient(
+          scaled_target, flattened_sources, output_gradients,
+          unconnected_gradients)
+
+      is_nones[:] = [g is None for g in scaled_grads]
       inv_loss_scale = 1.0 / loss_scale_val
-      grads = nest.map_structure(lambda g: inv_loss_scale * g, old_grads)
+      grads = []  # The unscaled gradients
+      for g, initial_grad in zip(scaled_grads, initial_grads):
+        if g is not None:
+          grads.append(g * math_ops.cast(inv_loss_scale, g.dtype))
+        else:
+          # We cannot return None from a tf.while_loop, so we pass a dummy
+          # tensor instead. We use initial_grad as a dummy tensor as it has the
+          # correct shape and dtype. We replace it with None outside the while
+          # loop.
+          grads.append(initial_grad)
       return grads
 
     # Switch to a replica-context to compute gradients once per replica.
     grads = distribution.experimental_run_v2(
-        replica_fn, args=(loss_scale_gradient_tapes, target, sources,
+        replica_fn, args=(loss_scale_gradient_tapes, target, flattened_sources,
                           output_gradients))
-    # Check for non-finite gradients possibly resulting from scaling
+    # Check for non-finite gradients possibly resulting from scaling.
     _, ready_to_update = loss_scale.update(grads)
-    return grads, ready_to_update
+    is_first_iteration = False
+    return grads, ready_to_update, is_first_iteration
 
-  # Dummy value for initial_grads. The first iteration of the loop will
-  # overwrite `grads` to the actual gradients.
-  initial_grads = sources
-  initial_ready_to_update = False
-  grads, _ = control_flow_ops.while_loop(
-      cond, body, [initial_grads, initial_ready_to_update])
+  grads, _, _ = control_flow_ops.while_loop(
+      cond, body, [initial_grads, initial_ready_to_update,
+                   initial_is_first_iteration])
+  grads = [None if is_none else g for g, is_none in zip(grads, is_nones)]
+  grads = nest.pack_sequence_as(sources, grads)
   return grads
diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
index 36d7d18a93b..4278d55d530 100644
--- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
+++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
@@ -27,6 +27,9 @@ from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_combinations
+from tensorflow.python.keras.mixed_precision.experimental import autocast_variable
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training.experimental import loss_scale as loss_scale_module
 from tensorflow.python.training.experimental import loss_scaling_gradient_tape as lsgt
@@ -84,13 +87,14 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   ))
   def test_basic_tapes(self, loss_scale, strategy_fn, use_tf_function):
     loss_scale = loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(3.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         y = x * x
       return g.gradient(y, x)
-    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    dy_dx_list = self._run_with_strategy(run_fn, strategy, use_tf_function)
     self.assertEqual(loss_scale(), 32)
     for dy_dx in dy_dx_list:
       self.assertEqual(dy_dx, 6.0)
@@ -103,10 +107,11 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   ))
   def test_output_gradients(self, loss_scale, strategy_fn, use_tf_function):
     loss_scale = loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(3.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         y = x * x
       return g.gradient(y, x, output_gradients=constant_op.constant(2.0))
     dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
@@ -117,22 +122,81 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   @test_combinations.generate(test_combinations.combine(
       loss_scale=[loss_scale_module.FixedLossScale,
                   loss_scale_module.DynamicLossScale],
-      strategy_fn=[default_strategy_fn],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
       use_tf_function=[True, False]
   ))
-  def test_nested_tapes(self, loss_scale, strategy_fn, use_tf_function):
+  def test_multiple_source_types(self, loss_scale, strategy_fn,
+                                 use_tf_function):
+    loss_scale = loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x1 = variables.Variable(1.0)  # Distributed variable
+      x2 = variables.Variable([1.0, 2.0])  # Distributed non-scalar variable
+      # Distributed AutoCastVariable
+      x3 = autocast_variable.create_autocast_variable(variables.Variable(2.0))
+    x4 = variables.Variable(2.0)  # Non-distributed variable
+    x5 = constant_op.constant(2.0)  # Tensor
+    def run_fn():
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x5)
+        y = x1 * x2 * x3 * x4 * x5
+      return g.gradient(y, [x1, x2, x3, x4, x5])
+    x1g, x2g, x3g, x4g, x5g = self._run_with_strategy(run_fn, strategy,
+                                                      use_tf_function)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx1 in x1g:
+      self.assertEqual(dy_dx1, 24.0)
+    for dy_dx2 in x2g:
+      self.assertAllEqual(dy_dx2, [8.0, 8.0])
+    for dy_dx3 in x3g:
+      self.assertEqual(dy_dx3, 12.0)
+    for dy_dx4 in x4g:
+      self.assertEqual(dy_dx4, 12.0)
+    for dy_dx5 in x5g:
+      self.assertEqual(dy_dx5, 12.0)
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_loss_scale_of_one(self, loss_scale, strategy_fn,
+                             use_tf_function):
+    loss_scale = loss_scale(1)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
+    def run_fn():
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        y = x * x
+      return g.gradient(y, x)
+    dy_dx_list = self._run_with_strategy(run_fn, strategy, use_tf_function)
+    self.assertEqual(loss_scale(), 1)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 6.0)
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn],
+      use_tf_function=[True, False],
+      share_loss_scale=[True, False]
+  ))
+  def test_nested_tapes(self, loss_scale, strategy_fn, use_tf_function,
+                        share_loss_scale):
     # TODO(reedwm): Support nested tapes with mirrored strategy. Currently this
     # does not work, as the set of active gradient tapes is a thread-local
     # variable. Mirrored strategy spawns new threads, making the outer gradient
     # tape non-active when using the inner gradient tape.
     outer_loss_scale = loss_scale(32)
-    inner_loss_scale = loss_scale(32)
+    inner_loss_scale = outer_loss_scale if share_loss_scale else loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(3.0)
       with lsgt.LossScaleGradientTape(outer_loss_scale) as g:
-        g.watch(x)
         with lsgt.LossScaleGradientTape(inner_loss_scale) as gg:
-          gg.watch(x)
           y = x * x
         dy_dx = gg.gradient(y, x)
       d2y_dx2 = g.gradient(dy_dx, x)
@@ -148,10 +212,9 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
       self.assertEqual(d2y_dx2, 2.0)
 
   def test_non_persistent_tapes_error(self):
-    x = constant_op.constant(3.0)
+    x = variables.Variable(3.0)
     with lsgt.LossScaleGradientTape(loss_scale_module.FixedLossScale(32),
                                     persistent=False) as g:
-      g.watch(x)
       y = x * x
       z = y * y
     g.gradient(z, x)
@@ -165,12 +228,12 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
       use_tf_function=[True, False]
   ))
   def test_persistent_tapes(self, loss_scale, strategy_fn, use_tf_function):
-
     ls = loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(3.0)
       with lsgt.LossScaleGradientTape(ls, persistent=True) as g:
-        g.watch(x)
         y = x * x
         z = y * y
       dz_dx = g.gradient(z, x)
@@ -189,10 +252,9 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
                   loss_scale_module.DynamicLossScale],
   ))
   def test_nested_sources(self, loss_scale):
-    x = (constant_op.constant(19.0), (constant_op.constant(8.),
-                                      constant_op.constant(9.)))
+    x = (variables.Variable(19.0), (variables.Variable(8.),
+                                    variables.Variable(9.)))
     with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
       y = x * 13
     dy_dx = g.gradient(y, x)
     self.assertEqual(self.evaluate(dy_dx), (13., (13., 13.)))
@@ -202,15 +264,101 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
                   loss_scale_module.DynamicLossScale],
   ))
   def test_nested_targets(self, loss_scale):
-    w = constant_op.constant(3.0)
+    w = variables.Variable(3.0)
     with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(w)
       x = w * 5
       y = w * 7
       z = w * 11
     grad = g.gradient([x, (y, z)], w)
     self.assertEqual(self.evaluate(grad), 23)
 
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy]
+  ))
+  def test_different_dtypes(self, loss_scale, strategy_fn):
+    loss_scale = loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x1 = variables.Variable(1.0, dtype='float16')
+      x2 = variables.Variable(2.0, dtype='float32')
+      x3 = variables.Variable(3.0, dtype='float64')
+    def run_fn():
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        y1 = x1 * math_ops.cast(x2, 'float16') * math_ops.cast(x3, 'float16')
+        y2 = math_ops.cast(x1, 'float32') * x2 * math_ops.cast(x3, 'float32')
+        y3 = math_ops.cast(x1, 'float64') * math_ops.cast(x2, 'float64') * x3
+      return g.gradient([y1, y2, y3], [x1, x2, x3])
+    dy_dx1_list, dy_dx2_list, dy_dx3_list = self._run_with_strategy(
+        run_fn, strategy)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx1 in dy_dx1_list:
+      self.assertEqual(dy_dx1, 18.0)
+      self.assertEqual(dy_dx1.dtype, 'float16')
+    for dy_dx2 in dy_dx2_list:
+      self.assertEqual(dy_dx2, 9.0)
+      self.assertEqual(dy_dx2.dtype, 'float32')
+    for dy_dx3 in dy_dx3_list:
+      self.assertEqual(dy_dx3, 6.0)
+      self.assertEqual(dy_dx3.dtype, 'float64')
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_none_gradients(self, loss_scale, strategy_fn, use_tf_function):
+    loss_scale = loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x1 = variables.Variable(2.0)
+      x2 = variables.Variable(2.0)
+      x3 = variables.Variable(2.0)
+      x4 = variables.Variable([2.0, 2.0])
+      x5 = constant_op.constant(2.0)
+      x6 = constant_op.constant(2.0)
+    def run_fn():
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        # x6 will have a None gradient because we do not watch it
+        g.watch(x5)
+        y = x1 * x3 * x5 * x6
+      return g.gradient(y, [x1, x2, [x3, [x4], x5], x6])
+    [x1g, x2g, [x3g, [x4g], x5g], x6g] = self._run_with_strategy(
+        run_fn, strategy, use_tf_function)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx1 in x1g:
+      self.assertEqual(dy_dx1, 8.0)
+    self.assertEqual(x2g, [None])
+    for dy_dx3 in x3g:
+      self.assertEqual(dy_dx3, 8.0)
+    self.assertEqual(x4g, [None])
+    for dy_dx5 in x5g:
+      self.assertEqual(dy_dx5, 8.0)
+    self.assertEqual(x6g, [None])
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_zero_gradients(self, loss_scale, strategy_fn, use_tf_function):
+    loss_scale = loss_scale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(0.0)
+    def run_fn():
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        y = x * x
+      return g.gradient(y, x)
+    dy_dx_list = self._run_with_strategy(run_fn, strategy, use_tf_function)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx in dy_dx_list:
+      # Assert zero gradients are not turned into Nones
+      self.assertEqual(dy_dx, 0.0)
+
   @test_combinations.generate(test_combinations.combine(
       loss_scale=[loss_scale_module.FixedLossScale,
                   loss_scale_module.DynamicLossScale],
@@ -220,10 +368,9 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   def test_scaling_non_finite_gradient(self, loss_scale, strategy_fn,
                                        non_finite_term):
     loss_scale = loss_scale(32)
+    x = variables.Variable(1.0)
     def run_fn():
-      x = constant_op.constant(1.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         y = x * non_finite_term
       return g.gradient(y, x)
 
@@ -240,14 +387,15 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   def test_dynamic_scale_to_one_on_non_finite_gradient(
       self, strategy_fn, non_finite_term, use_tf_function):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(1.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         y = x * non_finite_term
       g.gradient(y, x)
 
-    self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self._run_with_strategy(run_fn, strategy, use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 1.0)
 
   @test_combinations.generate(test_combinations.combine(
@@ -260,10 +408,11 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
       # one on the GPU
       self.skipTest('Test requires at least 1 GPU')
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
+    strategy = create_mirrored_strategy()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(1.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         # The gradient will be finite on the first replica, and infinite on the
         # second
         rep_ctx = distribution_strategy_context.get_replica_context()
@@ -274,7 +423,7 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
       return g.gradient(y, x)
 
     replica0_grad, replica1_grad = self._run_with_strategy(
-        run_fn, create_mirrored_strategy(), use_tf_function)
+        run_fn, strategy, use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 1.0)
     self.assertEqual(replica0_grad, 2.0)
     self.assertEqual(replica1_grad, np.inf)
@@ -286,14 +435,15 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   def test_fixed_scaling_no_change_non_finite_gradient(self, strategy_fn,
                                                        non_finite_term):
     loss_scale = loss_scale_module.FixedLossScale(32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(1.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         y = x * non_finite_term
       return g.gradient(y, x)
 
-    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn())
+    dy_dx_list = self._run_with_strategy(run_fn, strategy)
     check_fn = np.isposinf if non_finite_term == np.inf else np.isnan
     for dy_dx in dy_dx_list:
       self.assertTrue(check_fn(self.evaluate(dy_dx)))
@@ -305,14 +455,15 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   ))
   def test_dynamic_loss_scaling_down_loop(self, strategy_fn, use_tf_function):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0)
     def run_fn():
-      x = constant_op.constant(1.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         y = x * (3.0 * (10**37))  # grad will be inf after scaling
       return g.gradient(y, x)
 
-    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    dy_dx_list = self._run_with_strategy(run_fn, strategy, use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 8.0)
     for dy_dx in dy_dx_list:
       self.assertAllClose(self.evaluate(dy_dx), (3.0 * (10**37)), atol=1e-06)
@@ -324,18 +475,43 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
   def test_dynamic_loss_scaling_inf_target_post_scale(self, strategy_fn,
                                                       use_tf_function):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32.0)
+    strategy = strategy_fn()
+    with strategy.scope():
+      x = variables.Variable(3.0 * (10**37))
     def run_fn():
-      x = constant_op.constant(3.0 * (10**37))
       with lsgt.LossScaleGradientTape(loss_scale) as g:
-        g.watch(x)
         y = x * 3.0  # target will be inf after scaling
       return g.gradient(y, x)
 
-    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    dy_dx_list = self._run_with_strategy(run_fn, strategy, use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 32.0)
     for dy_dx in dy_dx_list:
       self.assertAllClose(self.evaluate(dy_dx), 3.0)
 
+  def test_passing_non_loss_scale_raises_error(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        '`loss_scale` must be an instance of LossScale, but got: 2.0'):
+      lsgt.LossScaleGradientTape(2.0)
+
+  def test_jacobian_raises_error(self):
+    loss_scale = loss_scale_module.FixedLossScale(2.)
+    x = variables.Variable([1.0, 2.0])
+    with lsgt.LossScaleGradientTape(loss_scale) as g:
+      y = x * 2
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        'LossScaleGradientTape.jacobian is not yet implemented'):
+      g.jacobian(y, x)
+
+    x = variables.Variable([[1.0, 2.0], [3.0, 4.0]])
+    with lsgt.LossScaleGradientTape(loss_scale) as g:
+      y = x * 2
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        'LossScaleGradientTape.batch_jacobian is not yet implemented'):
+      g.batch_jacobian(y, x)
+
 
 if __name__ == '__main__':
   v2_compat.enable_v2_behavior()
diff --git a/tensorflow/python/training/experimental/mixed_precision_test.py b/tensorflow/python/training/experimental/mixed_precision_test.py
index bbf9badfcf1..7397ae9a086 100644
--- a/tensorflow/python/training/experimental/mixed_precision_test.py
+++ b/tensorflow/python/training/experimental/mixed_precision_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 from absl.testing import parameterized
 
 from tensorflow.core.protobuf import config_pb2
diff --git a/tensorflow/python/training/ftrl.py b/tensorflow/python/training/ftrl.py
index 0007c0e80c5..c7b3867631d 100644
--- a/tensorflow/python/training/ftrl.py
+++ b/tensorflow/python/training/ftrl.py
@@ -132,11 +132,10 @@ class FtrlOptimizer(optimizer.Optimizer):
   def _create_slots(self, var_list):
     # Create the "accum" and "linear" slots.
     for v in var_list:
-      with ops.colocate_with(v):
-        val = constant_op.constant(
-            self._initial_accumulator_value, dtype=v.dtype, shape=v.get_shape())
-        self._get_or_make_slot(v, val, "accum", self._accum_name or self._name)
-        self._zeros_slot(v, "linear", self._linear_name or self._name)
+      val = constant_op.constant(
+          self._initial_accumulator_value, dtype=v.dtype, shape=v.get_shape())
+      self._get_or_make_slot(v, val, "accum", self._accum_name or self._name)
+      self._zeros_slot(v, "linear", self._linear_name or self._name)
 
   def _prepare(self):
     self._learning_rate_tensor = ops.convert_to_tensor(
diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py
index b82a2611d54..e874aaa3fa8 100644
--- a/tensorflow/python/training/input_test.py
+++ b/tensorflow/python/training/input_test.py
@@ -401,7 +401,7 @@ class SliceInputProducerTest(test_lib.TestCase):
         frequency[e] = 0
       for _ in range(num_epochs):
         output = [self.evaluate(slices) for _ in range(len(source_strings))]
-        key = b",".join([s + compat.as_bytes(str(i)) for s, i in output])
+        key = b",".join(s + compat.as_bytes(str(i)) for s, i in output)
         self.assertIn(key, expected)
         frequency[key] += 1
 
@@ -1083,7 +1083,7 @@ class BatchJoinTest(test_lib.TestCase):
         self.assertEqual(len(which_a) + len(which_b), batch_size)
         if which_a and which_b:
           saw_both += 1
-        all_a.extend([results[0][i] for i in which_a])
+        all_a.extend(results[0][i] for i in which_a)
         seen_b += len(which_b)
         self.assertAllEqual([99] * len(which_b),
                             [results[0][i] for i in which_b])
@@ -1185,7 +1185,7 @@ class BatchJoinTest(test_lib.TestCase):
         self.assertEqual(len(which_a) + len(which_b), batch_size)
         if which_a and which_b:
           saw_both += 1
-        all_a.extend([results[0][i] for i in which_a])
+        all_a.extend(results[0][i] for i in which_a)
         seen_b += len(which_b)
         self.assertAllEqual([99] * len(which_b),
                             [results[0][i] for i in which_b])
@@ -1271,7 +1271,7 @@ class BatchJoinTest(test_lib.TestCase):
         self.assertEqual(len(which_a) + len(which_b), batch_size)
         if which_a and which_b:
           saw_both += 1
-        all_a.extend([results[0][i] for i in which_a])
+        all_a.extend(results[0][i] for i in which_a)
         seen_b += len(which_b)
         self.assertAllEqual([99] * len(which_b),
                             [results[0][i] for i in which_b])
@@ -1291,7 +1291,7 @@ class BatchJoinTest(test_lib.TestCase):
       self.assertEqual(len(which_a) + len(which_b), 2 * extra_elements)
       if which_a and which_b:
         saw_both += 1
-      all_a.extend([results[0][i] for i in which_a])
+      all_a.extend(results[0][i] for i in which_a)
       seen_b += len(which_b)
 
       # We'd like to see some minimum level of mixing of the results of both
@@ -1369,7 +1369,7 @@ class BatchJoinTest(test_lib.TestCase):
         self.assertEqual(len(which_a) + len(which_b), batch_size)
         if which_a and which_b:
           saw_both += 1
-        all_a.extend([results[0][i] for i in which_a])
+        all_a.extend(results[0][i] for i in which_a)
         seen_b += len(which_b)
         self.assertAllEqual([99] * len(which_b),
                             [results[0][i] for i in which_b])
@@ -1389,7 +1389,7 @@ class BatchJoinTest(test_lib.TestCase):
       self.assertEqual(len(which_a) + len(which_b), 2 * extra_elements)
       if which_a and which_b:
         saw_both += 1
-      all_a.extend([results[0][i] for i in which_a])
+      all_a.extend(results[0][i] for i in which_a)
       seen_b += len(which_b)
 
       # We'd like to see some minimum level of mixing of the results of both
@@ -2099,7 +2099,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
         self.assertEqual(len(which_a) + len(which_b), batch_size)
         if which_a and which_b:
           saw_both += 1
-        all_a.extend([results[0][i] for i in which_a])
+        all_a.extend(results[0][i] for i in which_a)
         seen_b += len(which_b)
         self.assertAllEqual([99] * len(which_b),
                             [results[0][i] for i in which_b])
@@ -2194,7 +2194,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
         self.assertEqual(len(which_a) + len(which_b), batch_size)
         if which_a and which_b:
           saw_both += 1
-        all_a.extend([results[0][i] for i in which_a])
+        all_a.extend(results[0][i] for i in which_a)
         seen_b += len(which_b)
         self.assertAllEqual([99] * len(which_b),
                             [results[0][i] for i in which_b])
@@ -2213,7 +2213,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
       self.assertEqual(len(which_a) + len(which_b), 2 * extra_elements)
       if which_a and which_b:
         saw_both += 1
-      all_a.extend([results[0][i] for i in which_a])
+      all_a.extend(results[0][i] for i in which_a)
       seen_b += len(which_b)
 
       # Some minimum level of mixing of the results of both threads.
diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py
index afd7a040db9..6b9563fd065 100644
--- a/tensorflow/python/training/moving_averages.py
+++ b/tensorflow/python/training/moving_averages.py
@@ -175,7 +175,7 @@ def weighted_moving_average(value,
     if truediv:
       return math_ops.truediv(numerator, denominator, name=scope.name)
     else:
-      return math_ops.div(numerator, denominator, name=scope.name)
+      return math_ops.divide(numerator, denominator, name=scope.name)
 
 
 def _zero_debias(strategy, unbiased_var, value, decay):
@@ -554,7 +554,7 @@ class ExponentialMovingAverage(object):
     for v in moving_avg_variables:
       name_map[self.average_name(v)] = v
     # Make sure we restore variables without moving averages as well.
-    moving_avg_variable_names = set([v.name for v in moving_avg_variables])
+    moving_avg_variable_names = set(v.name for v in moving_avg_variables)
     for v in list(set(variables.global_variables())):
       if v.name not in moving_avg_variable_names and v.op.name not in name_map:
         name_map[v.op.name] = v
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 3af76873051..f1a31d01dd4 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -768,7 +768,7 @@ class Optimizer(
       # pylint: enable=protected-access
       mirrored_slot = named_slots.get(key, None)
       if mirrored_slot is None: return None
-      return mirrored_slot.get(device=var.device)
+      return mirrored_slot._get_closest()  # pylint: disable=protected-access
 
     return named_slots.get(_var_key(var), None)
 
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 6d2b339e3a8..bce270db8a5 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -401,7 +401,7 @@ class BaseSaverBuilder(object):
     per_device = collections.defaultdict(lambda: [])
     for saveable in saveables:
       canonical_device = set(
-          pydev.canonical_name(spec.tensor.device) for spec in saveable.specs)
+          pydev.canonical_name(spec.device) for spec in saveable.specs)
       if len(canonical_device) != 1:
         raise ValueError("All tensors of a saveable object must be "
                          "on the same device: %s" % saveable.name)
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 40cd26fd2d0..75698ea3e19 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import functools
+import glob
 import math
 import os
 import random
@@ -36,6 +37,7 @@ from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.core.protobuf import saver_pb2
 from tensorflow.python.client import session
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -1108,6 +1110,136 @@ class SaveRestoreShardedTest(test.TestCase):
 class SaveRestoreShardedTestV2(SaveRestoreShardedTest):
   _WRITE_VERSION = saver_pb2.SaverDef.V2
 
+  def testIterators(self):
+    save_path = os.path.join(self.get_temp_dir(), "sharded_iterators")
+
+    # Build a graph with 2 parameter nodes on different devices and save.
+    with session.Session(
+        target="",
+        config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
+      with sess.graph.device("/cpu:0"):
+        ds0 = dataset_ops.Dataset.range(10)
+        it0 = dataset_ops.make_initializable_iterator(ds0)
+        get_next0 = it0.get_next()
+      saveable0 = iterator_ops._IteratorSaveable(
+          it0._iterator_resource, name="saveable_it0")
+
+      with sess.graph.device("/cpu:1"):
+        ds1 = dataset_ops.Dataset.range(20)
+        it1 = dataset_ops.make_initializable_iterator(ds1)
+        get_next1 = it1.get_next()
+      saveable1 = iterator_ops._IteratorSaveable(
+          it1._iterator_resource, name="saveable_it1")
+      saver = saver_module.Saver({
+          "it0": saveable0,
+          "it1": saveable1
+      },
+                                 write_version=self._WRITE_VERSION,
+                                 sharded=True)
+      self.evaluate(it0.initializer)
+      self.evaluate(it1.initializer)
+      self.assertEqual(0, self.evaluate(get_next0))
+      self.assertEqual(1, self.evaluate(get_next0))
+      self.assertEqual(0, self.evaluate(get_next1))
+      val = saver.save(sess, save_path)
+      self.assertEqual(save_path, val)
+      data_files = glob.glob(save_path + ".data*")
+      self.assertEqual(2, len(data_files))
+
+    # Restore
+    with session.Session(
+        target="",
+        config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
+      with sess.graph.device("/cpu:0"):
+        ds0 = dataset_ops.Dataset.range(10)
+        it0 = dataset_ops.make_initializable_iterator(ds0)
+        get_next0 = it0.get_next()
+      saveable0 = iterator_ops._IteratorSaveable(
+          it0._iterator_resource, name="saveable_it0")
+
+      with sess.graph.device("/cpu:1"):
+        ds1 = dataset_ops.Dataset.range(20)
+        it1 = dataset_ops.make_initializable_iterator(ds1)
+        get_next1 = it1.get_next()
+      saveable1 = iterator_ops._IteratorSaveable(
+          it1._iterator_resource, name="saveable_it1")
+      saver = saver_module.Saver({
+          "it0": saveable0,
+          "it1": saveable1
+      },
+                                 write_version=self._WRITE_VERSION,
+                                 sharded=True)
+      self.evaluate(it0.initializer)
+      self.evaluate(it1.initializer)
+      saver.restore(sess, save_path)
+      self.assertEqual(2, self.evaluate(get_next0))
+      self.assertEqual(1, self.evaluate(get_next1))
+
+  def testIteratorsUnshardedRestore(self):
+    save_path = os.path.join(self.get_temp_dir(), "restore_unsharded_iterators")
+
+    # Build a graph with 2 parameter nodes on different devices and save.
+    with session.Session(
+        target="",
+        config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
+      with sess.graph.device("/cpu:0"):
+        ds0 = dataset_ops.Dataset.range(10)
+        it0 = dataset_ops.make_initializable_iterator(ds0)
+        get_next0 = it0.get_next()
+      saveable0 = iterator_ops._IteratorSaveable(
+          it0._iterator_resource, name="saveable_it0")
+
+      with sess.graph.device("/cpu:1"):
+        ds1 = dataset_ops.Dataset.range(20)
+        it1 = dataset_ops.make_initializable_iterator(ds1)
+        get_next1 = it1.get_next()
+      saveable1 = iterator_ops._IteratorSaveable(
+          it1._iterator_resource, name="saveable_it1")
+      saver = saver_module.Saver({
+          "it0": saveable0,
+          "it1": saveable1
+      },
+                                 write_version=self._WRITE_VERSION,
+                                 sharded=True)
+      self.evaluate(it0.initializer)
+      self.evaluate(it1.initializer)
+      self.assertEqual(0, self.evaluate(get_next0))
+      self.assertEqual(1, self.evaluate(get_next0))
+      self.assertEqual(0, self.evaluate(get_next1))
+      val = saver.save(sess, save_path)
+      self.assertEqual(save_path, val)
+      data_files = glob.glob(save_path + ".data*")
+      self.assertEqual(2, len(data_files))
+
+    # Restore
+    with session.Session(
+        target="",
+        config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess:
+      with sess.graph.device("/cpu:0"):
+        ds0 = dataset_ops.Dataset.range(10)
+        it0 = dataset_ops.make_initializable_iterator(ds0)
+        get_next0 = it0.get_next()
+      saveable0 = iterator_ops._IteratorSaveable(
+          it0._iterator_resource, name="saveable_it0")
+
+      with sess.graph.device("/cpu:1"):
+        ds1 = dataset_ops.Dataset.range(20)
+        it1 = dataset_ops.make_initializable_iterator(ds1)
+        get_next1 = it1.get_next()
+      saveable1 = iterator_ops._IteratorSaveable(
+          it1._iterator_resource, name="saveable_it1")
+      saver = saver_module.Saver({
+          "it0": saveable0,
+          "it1": saveable1
+      },
+                                 write_version=self._WRITE_VERSION,
+                                 sharded=False)
+      self.evaluate(it0.initializer)
+      self.evaluate(it1.initializer)
+      saver.restore(sess, save_path)
+      self.assertEqual(2, self.evaluate(get_next0))
+      self.assertEqual(1, self.evaluate(get_next1))
+
 
 class MaxToKeepTest(test.TestCase):
 
diff --git a/tensorflow/python/training/saving/saveable_object.py b/tensorflow/python/training/saving/saveable_object.py
index d018f0ebb54..54f1d1fb237 100644
--- a/tensorflow/python/training/saving/saveable_object.py
+++ b/tensorflow/python/training/saving/saveable_object.py
@@ -45,7 +45,10 @@ class SaveSpec(object):
       self.device = device
     else:
       self.dtype = tensor.dtype
-      self.device = tensor.device
+      if device is not None:
+        self.device = device
+      else:
+        self.device = tensor.device
 
   @property
   def tensor(self):
diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py
index 9c2db27af2c..fe622097e74 100644
--- a/tensorflow/python/training/session_manager.py
+++ b/tensorflow/python/training/session_manager.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import time
+
 import numpy as np
 
 from tensorflow.python.client import session
diff --git a/tensorflow/python/training/tracking/util.py b/tensorflow/python/training/tracking/util.py
index d40e00f9a12..01f86206cbd 100644
--- a/tensorflow/python/training/tracking/util.py
+++ b/tensorflow/python/training/tracking/util.py
@@ -1446,14 +1446,15 @@ class CheckpointV1(tracking.AutoTrackable):
     """
     super(CheckpointV1, self).__init__()
     for k, v in sorted(kwargs.items(), key=lambda item: item[0]):
-      if not isinstance(v, (base.Trackable, def_function.Function)):
+      setattr(self, k, v)
+      if not isinstance(
+          getattr(self, k), (base.Trackable, def_function.Function)):
         raise ValueError(
             ("`Checkpoint` was expecting a trackable object (an object "
              "derived from `TrackableBase`), got %s. If you believe this "
              "object should be trackable (i.e. it is part of the "
              "TensorFlow Python API and manages state), please open an issue.")
             % (v,))
-      setattr(self, k, v)
     self._save_counter = None  # Created lazily for restore-on-create.
     self._save_assign_op = None
     self._saver = saver_with_op_caching(self)
@@ -1783,14 +1784,15 @@ class Checkpoint(tracking.AutoTrackable):
     """
     super(Checkpoint, self).__init__()
     for k, v in sorted(kwargs.items(), key=lambda item: item[0]):
-      if not isinstance(v, (base.Trackable, def_function.Function)):
+      setattr(self, k, v)
+      if not isinstance(
+          getattr(self, k), (base.Trackable, def_function.Function)):
         raise ValueError(
             ("`Checkpoint` was expecting a trackable object (an object "
              "derived from `TrackableBase`), got %s. If you believe this "
              "object should be trackable (i.e. it is part of the "
              "TensorFlow Python API and manages state), please open an issue.")
             % (v,))
-      setattr(self, k, v)
     self._save_counter = None  # Created lazily for restore-on-create.
     self._save_assign_op = None
     self._saver = saver_with_op_caching(self)
diff --git a/tensorflow/python/training/tracking/util_test.py b/tensorflow/python/training/tracking/util_test.py
index af4f504c876..646ca93dc2e 100644
--- a/tensorflow/python/training/tracking/util_test.py
+++ b/tensorflow/python/training/tracking/util_test.py
@@ -1055,9 +1055,9 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def testEmptyContainersIgnored(self):
     checkpoint_directory = self.get_temp_dir()
-    save_root = trackable_utils.Checkpoint()
+    save_root = trackable_utils.Checkpoint(a=[])
     path = save_root.save(checkpoint_directory)
-    load_root = trackable_utils.Checkpoint()
+    load_root = trackable_utils.Checkpoint(b=[])
     load_root.dep = []
     load_root.dep.append([])
     status = load_root.restore(path)
@@ -1396,6 +1396,20 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase):
     load_checkpoint.restore(checkpoint_prefix).run_restore_ops()
     self.assertEqual(3., self.evaluate(load_checkpoint.v))
 
+  def test_inititialize_with_data_structures(self):
+    checkpoint = trackable_utils.Checkpoint(
+        a=[variables_lib.Variable(0.), variables_lib.Variable(1.)],
+        b={"a": variables_lib.Variable(2.), "b": variables_lib.Variable(3.)})
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = checkpoint.save(checkpoint_prefix)
+    load_checkpoint = trackable_utils.Checkpoint(
+        a=[variables_lib.Variable(4.), variables_lib.Variable(5.)],
+        b={"a": variables_lib.Variable(6.), "b": variables_lib.Variable(7.)})
+    load_checkpoint.restore(save_path)
+    self.assertAllClose(self.evaluate(load_checkpoint.a), [0, 1])
+    self.assertAllClose(self.evaluate(load_checkpoint.b), {"a": 2, "b": 3})
+
 
 class _ManualScope(tracking.AutoTrackable):
 
diff --git a/tensorflow/python/training/warm_starting_util.py b/tensorflow/python/training/warm_starting_util.py
index fa334a41875..4d329b4b316 100644
--- a/tensorflow/python/training/warm_starting_util.py
+++ b/tensorflow/python/training/warm_starting_util.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
 import six
 
 from tensorflow.python.framework import errors
diff --git a/tensorflow/python/training/warm_starting_util_test.py b/tensorflow/python/training/warm_starting_util_test.py
index 14fa2430477..084779b332c 100644
--- a/tensorflow/python/training/warm_starting_util_test.py
+++ b/tensorflow/python/training/warm_starting_util_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+
 import numpy as np
 import six
 
diff --git a/tensorflow/python/util/all_util.py b/tensorflow/python/util/all_util.py
index 50d480f8707..f2e4499b64b 100644
--- a/tensorflow/python/util/all_util.py
+++ b/tensorflow/python/util/all_util.py
@@ -46,8 +46,8 @@ def make_all(module_name, doc_string_modules=None):
   """
   if doc_string_modules is None:
     doc_string_modules = [_sys.modules[module_name]]
-  cur_members = set([name for name, _
-                     in _tf_inspect.getmembers(_sys.modules[module_name])])
+  cur_members = set(
+      name for name, _ in _tf_inspect.getmembers(_sys.modules[module_name]))
 
   results = set()
   for doc_module in doc_string_modules:
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 2b4ecd461ee..6187e325001 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -87,7 +87,7 @@ def _get_attrs_items(obj):
 def _sorted(dict_):
   """Returns a sorted list of the dict keys, with error if keys not sortable."""
   try:
-    return sorted(dict_)
+    return sorted(dict_.keys())
   except TypeError:
     raise TypeError("nest only supports dicts with sortable keys.")
 
diff --git a/tensorflow/python/util/object_identity.py b/tensorflow/python/util/object_identity.py
index a5ad1e77245..37f24c4831f 100644
--- a/tensorflow/python/util/object_identity.py
+++ b/tensorflow/python/util/object_identity.py
@@ -172,7 +172,7 @@ class ObjectIdentitySet(collections_abc.MutableSet):
   """Like the built-in set, but compares objects with "is"."""
 
   def __init__(self, *args):
-    self._storage = set([self._wrap_key(obj) for obj in list(*args)])
+    self._storage = set(self._wrap_key(obj) for obj in list(*args))
 
   @staticmethod
   def _from_storage(storage):
diff --git a/tensorflow/security/advisory/tfsa-2019-002.md b/tensorflow/security/advisory/tfsa-2019-002.md
new file mode 100644
index 00000000000..74760c11f64
--- /dev/null
+++ b/tensorflow/security/advisory/tfsa-2019-002.md
@@ -0,0 +1,33 @@
+## TFSA-2019-002: Heap buffer overflow in `UnsortedSegmentSum`
+
+### CVE Number
+
+CVE-2019-16778
+
+### Issue Description
+
+A heap buffer overflow in `UnsortedSegmentSum` can be produced when the `Index`
+template argument is `int32`. In this case `data_size` and `num_segments` fields
+are truncated from `int64` to `int32` and can produce negative numbers,
+resulting in accessing out of bounds heap memory.
+
+### Impact
+
+This is unlikely to be exploitable and was detected and fixed internally. We are
+making the security advisory only to notify users that it is better to update to
+TensorFlow 1.15 or 2.0 or later as these versions already have this fixed.
+
+### Vulnerable Versions
+
+TensorFlow 1.0.0, 1.0.1, 1.1.0, 1.2.0, 1.2.1, 1.3.0, 1.3.1, 1.4.0, 1.4.1, 1.5.0,
+1.5.1, 1.6.0, 1.7.0, 1.7.1, 1.8.0, 1.9.0, 1.10.0, 1.10.1, 1.11.0, 1.12.0,
+1.12.1, 1.12.2, 1.12.3, 1.13.0, 1.13.1, 1.13.2, 1.14.0.
+
+### Mitigation
+
+We have patched the vulnerability in GitHub commit
+[db4f971](https://github.com/tensorflow/tensorflow/commit/db4f9717c41bccc3ce10099ab61996b246099892).
+
+We encourage users to switch to TensorFlow 1.15 or 2.0 as these versions contain
+the fix. If switching is undesirable, consider cherry-picking the above commit
+and building from source.
diff --git a/tensorflow/security/index.md b/tensorflow/security/index.md
index e28f8ff0f87..2a496c2f087 100644
--- a/tensorflow/security/index.md
+++ b/tensorflow/security/index.md
@@ -8,6 +8,7 @@ in [SECURITY.md](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.m
 
 | Advisory Number | Type               | Versions affected | Reported by           | Additional Information      |
 |-----------------|--------------------|:-----------------:|-----------------------|-----------------------------|
+| [TFSA-2019-002](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/security/advisory/tfsa-2019-002.md)   | Heap buffer overflow in `UnsortedSegmentSum` | <= 1.14 | (found internally) |  |
 | [TFSA-2019-001](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/security/advisory/tfsa-2019-001.md)   | Null Pointer Dereference Error in Decoding GIF Files | <= 1.12 | Baidu Security Lab |  |
 | [TFSA-2018-006](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/security/advisory/tfsa-2018-006.md)   | Crafted Configuration File results in Invalid Memory Access | <= 1.7 | Blade Team of Tencent |  |
 | [TFSA-2018-005](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/security/advisory/tfsa-2018-005.md)   | Old Snappy Library Usage Resulting in Memcpy Parameter Overlap | <= 1.7 | Blade Team of Tencent |  |
diff --git a/tensorflow/stream_executor/gpu/redzone_allocator.cc b/tensorflow/stream_executor/gpu/redzone_allocator.cc
index 89f514c420f..7d21062ecea 100644
--- a/tensorflow/stream_executor/gpu/redzone_allocator.cc
+++ b/tensorflow/stream_executor/gpu/redzone_allocator.cc
@@ -311,7 +311,8 @@ port::StatusOr<RedzoneCheckStatus> RedzoneAllocator::CheckRedzones() const {
     std::call_once(ptxas_not_found_logged, [&]() {
       LOG(WARNING) << compiled_ptx_or.status().ToString()
                    << "\nRelying on driver to perform ptx compilation. "
-                   << "This message will be only logged once.";
+                   << "\nModify $PATH to customize ptxas location."
+                   << "\nThis message will be only logged once.";
     });
   }
 
diff --git a/tensorflow/stream_executor/platform/BUILD b/tensorflow/stream_executor/platform/BUILD
index e2ada9d387e..3d20cc2fdaa 100644
--- a/tensorflow/stream_executor/platform/BUILD
+++ b/tensorflow/stream_executor/platform/BUILD
@@ -1,3 +1,4 @@
+load("//tensorflow/core/platform:build_config.bzl", "tf_platform_deps")
 load("//tensorflow/stream_executor:build_defs.bzl", "stream_executor_friends")
 
 package(
@@ -13,17 +14,16 @@ package_group(
 cc_library(
     name = "platform",
     textual_hdrs = [
+        "initialize.h",
         "logging.h",
         "platform.h",
         "port.h",
         "thread_annotations.h",
-        "initialize.h",
     ],
     deps = [
-        "//tensorflow/core:lib",
-        "//tensorflow/stream_executor/platform/default:platform",
         "@com_google_absl//absl/strings",
-    ],
+        "//tensorflow/core:lib",
+    ] + tf_platform_deps("platform", "//tensorflow/stream_executor/platform/"),
 )
 
 cc_library(
@@ -31,6 +31,5 @@ cc_library(
     hdrs = ["dso_loader.h"],
     deps = [
         ":platform",
-        "//tensorflow/stream_executor/platform/default:dso_loader",
-    ],
+    ] + tf_platform_deps("dso_loader", "//tensorflow/stream_executor/platform/"),
 )
diff --git a/tensorflow/stream_executor/platform/default/BUILD b/tensorflow/stream_executor/platform/default/BUILD
index bd6404b92b5..032dc518f1a 100644
--- a/tensorflow/stream_executor/platform/default/BUILD
+++ b/tensorflow/stream_executor/platform/default/BUILD
@@ -6,9 +6,7 @@ load("//tensorflow:tensorflow.bzl", "tf_copts")
 
 cc_library(
     name = "platform",
-    textual_hdrs = [
-        "initialize.h",
-    ],
+    textual_hdrs = ["initialize.h"],
     deps = ["//tensorflow/core:lib"],
 )
 
@@ -21,6 +19,7 @@ cc_library(
     }),
     hdrs = ["dso_loader.h"],
     copts = tf_copts(),
+    tags = ["nobuilder"],
     deps = [
         "//tensorflow/stream_executor:platform",
         "//tensorflow/stream_executor/lib",
diff --git a/tensorflow/stream_executor/platform/logging.h b/tensorflow/stream_executor/platform/logging.h
index 6bc6ccb517d..348349b7eee 100644
--- a/tensorflow/stream_executor/platform/logging.h
+++ b/tensorflow/stream_executor/platform/logging.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
-#if !defined(PLATFORM_GOOGLE)
+#if !defined(PLATFORM_GOOGLE) && !defined(PLATFORM_GOOGLE_ANDROID)
 
 #define PCHECK(invocation) CHECK(invocation)
 
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index c2069ee745c..265371dea70 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -267,6 +267,8 @@ def get_win_copts(is_external = False):
         # "/EHs-c-",
         "/wd4577",
         "/DNOGDI",
+        # Also see build:windows lines in tensorflow/opensource_only/.bazelrc
+        # where we set some other options globally.
     ]
     if is_external:
         return WINDOWS_COPTS + ["/UTF_COMPILE_LIBRARY"]
@@ -2065,9 +2067,9 @@ def py_test(deps = [], data = [], kernels = [], **kwargs):
             clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
         }),
         data = data + select({
-            "//conditions:default": [],
+            "//conditions:default": kernels,
             clean_dep("//tensorflow:no_tensorflow_py_deps"): ["//tensorflow/tools/pip_package:win_pip_package_marker"],
-        }) + tf_binary_dynamic_kernel_dsos(),
+        }),
         exec_compatible_with = tf_exec_compatible_with(kwargs),
         **kwargs
     )
@@ -2481,7 +2483,14 @@ def pybind_extension(
         name = so_file,
         srcs = srcs + hdrs,
         data = data,
-        copts = copts + ["-fexceptions"],
+        copts = copts + [
+            "-fexceptions",
+        ] + select({
+            clean_dep("//tensorflow:windows"): [],
+            "//conditions:default": [
+                "-fvisibility=hidden",
+            ],
+        }),
         linkopts = linkopts + _rpath_linkopts(name) + select({
             "@local_config_cuda//cuda:darwin": [
                 "-Wl,-exported_symbols_list,$(location %s)" % exported_symbols_file,
diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds
index bed2ab4aae4..7e5b06432e0 100644
--- a/tensorflow/tf_exported_symbols.lds
+++ b/tensorflow/tf_exported_symbols.lds
@@ -3,6 +3,7 @@
 *perftools*gputools*
 *tf_*
 *TF_*
+*Eager*
 *TFE_*
 *nsync_*
 *stream_executor*
diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds
index f74644b7a14..ed2395cf913 100644
--- a/tensorflow/tf_version_script.lds
+++ b/tensorflow/tf_version_script.lds
@@ -4,6 +4,7 @@ tensorflow {
     *toco*;
     *perftools*gputools*;
     *TF_*;
+    *Eager*;
     *TFE_*;
     *nsync_*;
     *stream_executor*;
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt
index 1454a2d9567..5450d6448c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt
@@ -10,6 +10,6 @@ tf_module {
   }
   member_method {
     name: "set_loop_options"
-    argspec: "args=[\'parallel_iterations\', \'back_prop\', \'swap_memory\', \'maximum_iterations\'], varargs=None, keywords=None, defaults=[\'<object object instance>\', \'<object object instance>\', \'<object object instance>\', \'<object object instance>\'], "
+    argspec: "args=[\'parallel_iterations\', \'swap_memory\', \'maximum_iterations\', \'shape_invariants\'], varargs=None, keywords=None, defaults=[\'<object object instance>\', \'<object object instance>\', \'<object object instance>\', \'<object object instance>\'], "
   }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
index fe1d7653f16..bbdedf6e960 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
@@ -115,7 +115,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
index 252638a7c6c..a4746be7b94 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -117,7 +117,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
index 579199e36d3..f7468c7425d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -117,7 +117,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
index b8c9b22d349..660125eea13 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
@@ -117,7 +117,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
index 03c16cda8bb..5814f87b6f0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'estimator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'estimator\', \'external_state_policy\'], varargs=None, keywords=None, defaults=[\'fail\'], "
   }
   member_method {
     name: "after_create_session"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
index 79b160ded4e..1fe3f13b8ec 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -117,7 +117,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
index 1fc8d373213..f886bd287b5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -117,7 +117,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
index 029d112aaf7..a39d54b18ba 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -117,7 +117,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
index c082e43ed30..2f8bd9ebe88 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -174,7 +174,7 @@ tf_module {
   }
   member_method {
     name: "make_saveable_from_iterator"
-    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'iterator\', \'external_state_policy\'], varargs=None, keywords=None, defaults=[\'fail\'], "
   }
   member_method {
     name: "map_and_batch"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
index 6fd443f6d78..fa15dc81d52 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
@@ -1,7 +1,8 @@
 path: "tensorflow.io.TFRecordWriter"
 tf_class {
   is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
-  is_instance: "<type \'object\'>"
+  is_instance: "<class \'tensorflow.python._pywrap_record_io.RecordWriter\'>"
+  is_instance: "<class \'pybind11_builtins.pybind11_object\'>"
   member_method {
     name: "__init__"
     argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
index 6192c4f010f..3d3d73da7ac 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
@@ -42,7 +42,7 @@ tf_module {
   }
   member {
     name: "TFRecordWriter"
-    mtype: "<type \'type\'>"
+    mtype: "<class \'pybind11_builtins.pybind11_type\'>"
   }
   member {
     name: "VarLenFeature"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
index eb315e356da..ee3d1f3d4a2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
@@ -52,6 +52,10 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "swish"
+    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "tanh"
     argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
index aba2d4cddee..f269a54b11e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
@@ -139,7 +139,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'None\', \'True\', \'zeros\', \'zeros\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
index e1960038187..5a426370738 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
@@ -171,7 +171,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compile"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt
index c63ff3d3a36..5c1de8a2cd8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt
@@ -145,7 +145,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt
index d42857d041d..ccb4f8d5de8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt
@@ -145,7 +145,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt
index 9d54752f328..27851666a99 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt
@@ -113,7 +113,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'feature_columns\', \'trainable\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\'], "
+    argspec: "args=[\'self\', \'feature_columns\', \'trainable\', \'name\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.pbtxt
index 83ef24cdf7a..edbbd8ba1e0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "Policy"
     mtype: "<type \'type\'>"
   }
+  member_method {
+    name: "get_layer_policy"
+    argspec: "args=[\'layer\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "global_policy"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
index e6a82676a73..6f0000b84fb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
@@ -48,6 +48,14 @@ tf_module {
     name: "get_file"
     argspec: "args=[\'fname\', \'origin\', \'untar\', \'md5_hash\', \'file_hash\', \'cache_subdir\', \'hash_algorithm\', \'extract\', \'archive_format\', \'cache_dir\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'datasets\', \'auto\', \'False\', \'auto\', \'None\'], "
   }
+  member_method {
+    name: "get_registered_name"
+    argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_registered_object"
+    argspec: "args=[\'name\', \'custom_objects\', \'module_objects\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "get_source_inputs"
     argspec: "args=[\'tensor\', \'layer\', \'node_index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.linalg.-linear-operator-tridiag.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.linalg.-linear-operator-tridiag.pbtxt
new file mode 100644
index 00000000000..0609904bbb3
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.linalg.-linear-operator-tridiag.pbtxt
@@ -0,0 +1,185 @@
+path: "tensorflow.linalg.LinearOperatorTridiag"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.linalg.linear_operator_tridiag.LinearOperatorTridiag\'>"
+  is_instance: "<class \'tensorflow.python.ops.linalg.linear_operator.LinearOperator\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "H"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "batch_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "diagonals"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "diagonals_format"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "domain_dimension"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "graph_parents"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_non_singular"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_positive_definite"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_self_adjoint"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_square"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "range_dimension"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "tensor_rank"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'diagonals\', \'diagonals_format\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'compact\', \'None\', \'None\', \'None\', \'None\', \'LinearOperatorTridiag\'], "
+  }
+  member_method {
+    name: "add_to_tensor"
+    argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], "
+  }
+  member_method {
+    name: "adjoint"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'adjoint\'], "
+  }
+  member_method {
+    name: "assert_non_singular"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], "
+  }
+  member_method {
+    name: "assert_positive_definite"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], "
+  }
+  member_method {
+    name: "assert_self_adjoint"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], "
+  }
+  member_method {
+    name: "batch_shape_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], "
+  }
+  member_method {
+    name: "cholesky"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'cholesky\'], "
+  }
+  member_method {
+    name: "cond"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'cond\'], "
+  }
+  member_method {
+    name: "determinant"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], "
+  }
+  member_method {
+    name: "diag_part"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], "
+  }
+  member_method {
+    name: "domain_dimension_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], "
+  }
+  member_method {
+    name: "eigvals"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'eigvals\'], "
+  }
+  member_method {
+    name: "inverse"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse\'], "
+  }
+  member_method {
+    name: "log_abs_determinant"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], "
+  }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], "
+  }
+  member_method {
+    name: "matvec"
+    argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], "
+  }
+  member_method {
+    name: "range_dimension_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], "
+  }
+  member_method {
+    name: "shape_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], "
+  }
+  member_method {
+    name: "solve"
+    argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], "
+  }
+  member_method {
+    name: "solvevec"
+    argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], "
+  }
+  member_method {
+    name: "tensor_rank_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], "
+  }
+  member_method {
+    name: "to_dense"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], "
+  }
+  member_method {
+    name: "trace"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], "
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
index 632400c6570..264294d1a9b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
@@ -72,6 +72,10 @@ tf_module {
     name: "LinearOperatorToeplitz"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "LinearOperatorTridiag"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "LinearOperatorZeros"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
index c904681f633..c24b1c38179 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
@@ -420,6 +420,10 @@ tf_module {
     name: "sinh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sobol_sample"
+    argspec: "args=[\'dim\', \'num_results\', \'skip\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\'], "
+  }
   member_method {
     name: "softmax"
     argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.python_io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.python_io.-t-f-record-writer.pbtxt
index 31775de2d12..f2053da2616 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.python_io.-t-f-record-writer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.python_io.-t-f-record-writer.pbtxt
@@ -1,7 +1,8 @@
 path: "tensorflow.python_io.TFRecordWriter"
 tf_class {
   is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
-  is_instance: "<type \'object\'>"
+  is_instance: "<class \'tensorflow.python._pywrap_record_io.RecordWriter\'>"
+  is_instance: "<class \'pybind11_builtins.pybind11_object\'>"
   member_method {
     name: "__init__"
     argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.python_io.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.python_io.pbtxt
index 7c9953e5fe3..b2dace8e36c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.python_io.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.python_io.pbtxt
@@ -10,7 +10,7 @@ tf_module {
   }
   member {
     name: "TFRecordWriter"
-    mtype: "<type \'type\'>"
+    mtype: "<class \'pybind11_builtins.pybind11_type\'>"
   }
   member_method {
     name: "tf_record_iterator"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.-algorithm.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.-algorithm.pbtxt
new file mode 100644
index 00000000000..f1dea2473d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.-algorithm.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.random.Algorithm"
+tf_class {
+  is_instance: "<enum \'Algorithm\'>"
+  member {
+    name: "PHILOX"
+    mtype: "<enum \'Algorithm\'>"
+  }
+  member {
+    name: "THREEFRY"
+    mtype: "<enum \'Algorithm\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.-generator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.-generator.pbtxt
new file mode 100644
index 00000000000..b42353548f6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.-generator.pbtxt
@@ -0,0 +1,84 @@
+path: "tensorflow.random.Generator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.stateful_random_ops.Generator\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.framework.composite_tensor.CompositeTensor\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "algorithm"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "key"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "state"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'copy_from\', \'state\', \'alg\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "binomial"
+    argspec: "args=[\'self\', \'shape\', \'counts\', \'probs\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "from_key_counter"
+    argspec: "args=[\'cls\', \'key\', \'counter\', \'alg\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_non_deterministic_state"
+    argspec: "args=[\'cls\', \'alg\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_seed"
+    argspec: "args=[\'cls\', \'seed\', \'alg\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_state"
+    argspec: "args=[\'cls\', \'state\', \'alg\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_seeds"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "normal"
+    argspec: "args=[\'self\', \'shape\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "reset"
+    argspec: "args=[\'self\', \'state\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_from_key_counter"
+    argspec: "args=[\'self\', \'key\', \'counter\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_from_seed"
+    argspec: "args=[\'self\', \'seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'delta\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "split"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "truncated_normal"
+    argspec: "args=[\'self\', \'shape\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "uniform"
+    argspec: "args=[\'self\', \'shape\', \'minval\', \'maxval\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "uniform_full_int"
+    argspec: "args=[\'self\', \'shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'uint64\'>\", \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.experimental.-algorithm.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.experimental.-algorithm.pbtxt
new file mode 100644
index 00000000000..ba4de088a7e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.experimental.-algorithm.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.random.experimental.Algorithm"
+tf_class {
+  is_instance: "<enum \'Algorithm\'>"
+  member {
+    name: "PHILOX"
+    mtype: "<enum \'Algorithm\'>"
+  }
+  member {
+    name: "THREEFRY"
+    mtype: "<enum \'Algorithm\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.experimental.pbtxt
index 210717909ba..73f7497934e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.random.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.experimental.pbtxt
@@ -1,12 +1,16 @@
 path: "tensorflow.random.experimental"
 tf_module {
+  member {
+    name: "Algorithm"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "Generator"
     mtype: "<type \'type\'>"
   }
   member_method {
     name: "create_rng_state"
-    argspec: "args=[\'seed\', \'algorithm\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'seed\', \'alg\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_global_generator"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
index ac8412115f9..6d585aebd9a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.random"
 tf_module {
+  member {
+    name: "Algorithm"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+  member {
+    name: "Generator"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "experimental"
     mtype: "<type \'module\'>"
@@ -12,6 +20,10 @@ tf_module {
     name: "categorical"
     argspec: "args=[\'logits\', \'num_samples\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "create_rng_state"
+    argspec: "args=[\'seed\', \'alg\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "fixed_unigram_candidate_sampler"
     argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'vocab_file\', \'distortion\', \'num_reserved_ids\', \'num_shards\', \'shard\', \'unigrams\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'1.0\', \'0\', \'1\', \'0\', \'()\', \'None\', \'None\'], "
@@ -20,6 +32,10 @@ tf_module {
     name: "gamma"
     argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
   }
+  member_method {
+    name: "get_global_generator"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_seed"
     argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
@@ -44,6 +60,10 @@ tf_module {
     name: "poisson"
     argspec: "args=[\'lam\', \'shape\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\', \'None\'], "
   }
+  member_method {
+    name: "set_global_generator"
+    argspec: "args=[\'generator\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "set_random_seed"
     argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
index 9bac746ffc8..e4bd8c56389 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
@@ -492,6 +492,10 @@ tf_module {
     name: "BoostedTreesCalculateBestFeatureSplit"
     argspec: "args=[\'node_id_range\', \'stats_summary\', \'l1\', \'l2\', \'tree_complexity\', \'min_node_weight\', \'logits_dimension\', \'split_type\', \'name\'], varargs=None, keywords=None, defaults=[\'inequality\', \'None\'], "
   }
+  member_method {
+    name: "BoostedTreesCalculateBestFeatureSplitV2"
+    argspec: "args=[\'node_id_range\', \'stats_summaries_list\', \'split_types\', \'candidate_feature_ids\', \'l1\', \'l2\', \'tree_complexity\', \'min_node_weight\', \'logits_dimension\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "BoostedTreesCalculateBestGainsPerFeature"
     argspec: "args=[\'node_id_range\', \'stats_summary_list\', \'l1\', \'l2\', \'tree_complexity\', \'min_node_weight\', \'max_splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -3730,7 +3734,7 @@ tf_module {
   }
   member_method {
     name: "SerializeIterator"
-    argspec: "args=[\'resource_handle\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'resource_handle\', \'external_state_policy\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
   }
   member_method {
     name: "SerializeManySparse"
@@ -3834,7 +3838,11 @@ tf_module {
   }
   member_method {
     name: "SnapshotDataset"
-    argspec: "args=[\'input_dataset\', \'path\', \'output_types\', \'output_shapes\', \'compression\', \'reader_path_prefix\', \'writer_path_prefix\', \'shard_size_bytes\', \'pending_snapshot_expiry_seconds\', \'num_reader_threads\', \'reader_buffer_size\', \'num_writer_threads\', \'writer_buffer_size\', \'shuffle_on_read\', \'seed\', \'seed2\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'\', \'10737418240\', \'86400\', \'1\', \'1\', \'1\', \'1\', \'False\', \'0\', \'0\', \'None\'], "
+    argspec: "args=[\'input_dataset\', \'path\', \'output_types\', \'output_shapes\', \'compression\', \'reader_path_prefix\', \'writer_path_prefix\', \'shard_size_bytes\', \'pending_snapshot_expiry_seconds\', \'num_reader_threads\', \'reader_buffer_size\', \'num_writer_threads\', \'writer_buffer_size\', \'shuffle_on_read\', \'seed\', \'seed2\', \'mode\', \'snapshot_name\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'\', \'10737418240\', \'86400\', \'1\', \'1\', \'1\', \'1\', \'False\', \'0\', \'0\', \'auto\', \'\', \'None\'], "
+  }
+  member_method {
+    name: "SobolSample"
+    argspec: "args=[\'dim\', \'num_results\', \'skip\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float64\'>\", \'None\'], "
   }
   member_method {
     name: "Softmax"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt
index 1454a2d9567..5450d6448c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt
@@ -10,6 +10,6 @@ tf_module {
   }
   member_method {
     name: "set_loop_options"
-    argspec: "args=[\'parallel_iterations\', \'back_prop\', \'swap_memory\', \'maximum_iterations\'], varargs=None, keywords=None, defaults=[\'<object object instance>\', \'<object object instance>\', \'<object object instance>\', \'<object object instance>\'], "
+    argspec: "args=[\'parallel_iterations\', \'swap_memory\', \'maximum_iterations\', \'shape_invariants\'], varargs=None, keywords=None, defaults=[\'<object object instance>\', \'<object object instance>\', \'<object object instance>\', \'<object object instance>\'], "
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
index 878b114cf17..6d188da77f0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
@@ -82,7 +82,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
index 1c2668196c1..4c13d908d8a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
index 793350df44f..c17f4af70e8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
index b023d103278..b641ef7388c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
index 03c16cda8bb..5814f87b6f0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'estimator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'estimator\', \'external_state_policy\'], varargs=None, keywords=None, defaults=[\'fail\'], "
   }
   member_method {
     name: "after_create_session"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
index b6047938a93..f0e7383ea34 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
index 10babb92993..a5f86e6fb84 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
index 7a210097eb1..961a70935ea 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "range"
-    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+    argspec: "args=[], varargs=args, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "reduce"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
index 169d3a40f64..982cc799ecb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -146,7 +146,7 @@ tf_module {
   }
   member_method {
     name: "make_saveable_from_iterator"
-    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'iterator\', \'external_state_policy\'], varargs=None, keywords=None, defaults=[\'fail\'], "
   }
   member_method {
     name: "map_and_batch"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
index 6fd443f6d78..fa15dc81d52 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
@@ -1,7 +1,8 @@
 path: "tensorflow.io.TFRecordWriter"
 tf_class {
   is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
-  is_instance: "<type \'object\'>"
+  is_instance: "<class \'tensorflow.python._pywrap_record_io.RecordWriter\'>"
+  is_instance: "<class \'pybind11_builtins.pybind11_object\'>"
   member_method {
     name: "__init__"
     argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
index a6519237e2e..2f662a63049 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
@@ -22,7 +22,7 @@ tf_module {
   }
   member {
     name: "TFRecordWriter"
-    mtype: "<type \'type\'>"
+    mtype: "<class \'pybind11_builtins.pybind11_type\'>"
   }
   member {
     name: "VarLenFeature"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
index eb315e356da..ee3d1f3d4a2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
@@ -52,6 +52,10 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "swish"
+    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "tanh"
     argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
index aba2d4cddee..f269a54b11e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
@@ -139,7 +139,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'units\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'None\', \'True\', \'zeros\', \'zeros\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
index e1960038187..5a426370738 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
@@ -171,7 +171,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compile"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt
index c63ff3d3a36..5c1de8a2cd8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt
@@ -145,7 +145,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt
index d42857d041d..ccb4f8d5de8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt
@@ -145,7 +145,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.pbtxt
index 83ef24cdf7a..edbbd8ba1e0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "Policy"
     mtype: "<type \'type\'>"
   }
+  member_method {
+    name: "get_layer_policy"
+    argspec: "args=[\'layer\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "global_policy"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
index e6a82676a73..6f0000b84fb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
@@ -48,6 +48,14 @@ tf_module {
     name: "get_file"
     argspec: "args=[\'fname\', \'origin\', \'untar\', \'md5_hash\', \'file_hash\', \'cache_subdir\', \'hash_algorithm\', \'extract\', \'archive_format\', \'cache_dir\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'datasets\', \'auto\', \'False\', \'auto\', \'None\'], "
   }
+  member_method {
+    name: "get_registered_name"
+    argspec: "args=[\'obj\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_registered_object"
+    argspec: "args=[\'name\', \'custom_objects\', \'module_objects\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "get_source_inputs"
     argspec: "args=[\'tensor\', \'layer\', \'node_index\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.linalg.-linear-operator-tridiag.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.linalg.-linear-operator-tridiag.pbtxt
new file mode 100644
index 00000000000..0609904bbb3
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.linalg.-linear-operator-tridiag.pbtxt
@@ -0,0 +1,185 @@
+path: "tensorflow.linalg.LinearOperatorTridiag"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.linalg.linear_operator_tridiag.LinearOperatorTridiag\'>"
+  is_instance: "<class \'tensorflow.python.ops.linalg.linear_operator.LinearOperator\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "H"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "batch_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "diagonals"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "diagonals_format"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "domain_dimension"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "graph_parents"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_non_singular"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_positive_definite"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_self_adjoint"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_square"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "range_dimension"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "tensor_rank"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'diagonals\', \'diagonals_format\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'compact\', \'None\', \'None\', \'None\', \'None\', \'LinearOperatorTridiag\'], "
+  }
+  member_method {
+    name: "add_to_tensor"
+    argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], "
+  }
+  member_method {
+    name: "adjoint"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'adjoint\'], "
+  }
+  member_method {
+    name: "assert_non_singular"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], "
+  }
+  member_method {
+    name: "assert_positive_definite"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], "
+  }
+  member_method {
+    name: "assert_self_adjoint"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], "
+  }
+  member_method {
+    name: "batch_shape_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], "
+  }
+  member_method {
+    name: "cholesky"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'cholesky\'], "
+  }
+  member_method {
+    name: "cond"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'cond\'], "
+  }
+  member_method {
+    name: "determinant"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], "
+  }
+  member_method {
+    name: "diag_part"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], "
+  }
+  member_method {
+    name: "domain_dimension_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], "
+  }
+  member_method {
+    name: "eigvals"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'eigvals\'], "
+  }
+  member_method {
+    name: "inverse"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'inverse\'], "
+  }
+  member_method {
+    name: "log_abs_determinant"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], "
+  }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], "
+  }
+  member_method {
+    name: "matvec"
+    argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], "
+  }
+  member_method {
+    name: "range_dimension_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], "
+  }
+  member_method {
+    name: "shape_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], "
+  }
+  member_method {
+    name: "solve"
+    argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], "
+  }
+  member_method {
+    name: "solvevec"
+    argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], "
+  }
+  member_method {
+    name: "tensor_rank_tensor"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], "
+  }
+  member_method {
+    name: "to_dense"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], "
+  }
+  member_method {
+    name: "trace"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], "
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
index 041041f60ed..7d6f02aa224 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
@@ -72,6 +72,10 @@ tf_module {
     name: "LinearOperatorToeplitz"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "LinearOperatorTridiag"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "LinearOperatorZeros"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
index 2ec2ab27476..33828112832 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
@@ -420,6 +420,10 @@ tf_module {
     name: "sinh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sobol_sample"
+    argspec: "args=[\'dim\', \'num_results\', \'skip\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\'], "
+  }
   member_method {
     name: "softmax"
     argspec: "args=[\'logits\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.-algorithm.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.-algorithm.pbtxt
new file mode 100644
index 00000000000..f1dea2473d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.-algorithm.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.random.Algorithm"
+tf_class {
+  is_instance: "<enum \'Algorithm\'>"
+  member {
+    name: "PHILOX"
+    mtype: "<enum \'Algorithm\'>"
+  }
+  member {
+    name: "THREEFRY"
+    mtype: "<enum \'Algorithm\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.-generator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.-generator.pbtxt
new file mode 100644
index 00000000000..b42353548f6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.-generator.pbtxt
@@ -0,0 +1,84 @@
+path: "tensorflow.random.Generator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.stateful_random_ops.Generator\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.framework.composite_tensor.CompositeTensor\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "algorithm"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "key"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "state"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'copy_from\', \'state\', \'alg\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "binomial"
+    argspec: "args=[\'self\', \'shape\', \'counts\', \'probs\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "from_key_counter"
+    argspec: "args=[\'cls\', \'key\', \'counter\', \'alg\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_non_deterministic_state"
+    argspec: "args=[\'cls\', \'alg\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_seed"
+    argspec: "args=[\'cls\', \'seed\', \'alg\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_state"
+    argspec: "args=[\'cls\', \'state\', \'alg\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_seeds"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "normal"
+    argspec: "args=[\'self\', \'shape\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "reset"
+    argspec: "args=[\'self\', \'state\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_from_key_counter"
+    argspec: "args=[\'self\', \'key\', \'counter\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset_from_seed"
+    argspec: "args=[\'self\', \'seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'delta\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "split"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "truncated_normal"
+    argspec: "args=[\'self\', \'shape\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "uniform"
+    argspec: "args=[\'self\', \'shape\', \'minval\', \'maxval\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "uniform_full_int"
+    argspec: "args=[\'self\', \'shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'uint64\'>\", \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.experimental.-algorithm.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.experimental.-algorithm.pbtxt
new file mode 100644
index 00000000000..ba4de088a7e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.experimental.-algorithm.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.random.experimental.Algorithm"
+tf_class {
+  is_instance: "<enum \'Algorithm\'>"
+  member {
+    name: "PHILOX"
+    mtype: "<enum \'Algorithm\'>"
+  }
+  member {
+    name: "THREEFRY"
+    mtype: "<enum \'Algorithm\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.experimental.pbtxt
index 210717909ba..73f7497934e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.random.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.experimental.pbtxt
@@ -1,12 +1,16 @@
 path: "tensorflow.random.experimental"
 tf_module {
+  member {
+    name: "Algorithm"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "Generator"
     mtype: "<type \'type\'>"
   }
   member_method {
     name: "create_rng_state"
-    argspec: "args=[\'seed\', \'algorithm\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'seed\', \'alg\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_global_generator"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
index bbb3e4b63fb..ae0da656caf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.random"
 tf_module {
+  member {
+    name: "Algorithm"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+  member {
+    name: "Generator"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "experimental"
     mtype: "<type \'module\'>"
@@ -12,6 +20,10 @@ tf_module {
     name: "categorical"
     argspec: "args=[\'logits\', \'num_samples\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "create_rng_state"
+    argspec: "args=[\'seed\', \'alg\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "fixed_unigram_candidate_sampler"
     argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'vocab_file\', \'distortion\', \'num_reserved_ids\', \'num_shards\', \'shard\', \'unigrams\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'1.0\', \'0\', \'1\', \'0\', \'()\', \'None\', \'None\'], "
@@ -20,6 +32,10 @@ tf_module {
     name: "gamma"
     argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
   }
+  member_method {
+    name: "get_global_generator"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "learned_unigram_candidate_sampler"
     argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
@@ -36,6 +52,10 @@ tf_module {
     name: "poisson"
     argspec: "args=[\'shape\', \'lam\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\', \'None\'], "
   }
+  member_method {
+    name: "set_global_generator"
+    argspec: "args=[\'generator\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "set_seed"
     argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
index 9bac746ffc8..e4bd8c56389 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
@@ -492,6 +492,10 @@ tf_module {
     name: "BoostedTreesCalculateBestFeatureSplit"
     argspec: "args=[\'node_id_range\', \'stats_summary\', \'l1\', \'l2\', \'tree_complexity\', \'min_node_weight\', \'logits_dimension\', \'split_type\', \'name\'], varargs=None, keywords=None, defaults=[\'inequality\', \'None\'], "
   }
+  member_method {
+    name: "BoostedTreesCalculateBestFeatureSplitV2"
+    argspec: "args=[\'node_id_range\', \'stats_summaries_list\', \'split_types\', \'candidate_feature_ids\', \'l1\', \'l2\', \'tree_complexity\', \'min_node_weight\', \'logits_dimension\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "BoostedTreesCalculateBestGainsPerFeature"
     argspec: "args=[\'node_id_range\', \'stats_summary_list\', \'l1\', \'l2\', \'tree_complexity\', \'min_node_weight\', \'max_splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -3730,7 +3734,7 @@ tf_module {
   }
   member_method {
     name: "SerializeIterator"
-    argspec: "args=[\'resource_handle\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'resource_handle\', \'external_state_policy\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
   }
   member_method {
     name: "SerializeManySparse"
@@ -3834,7 +3838,11 @@ tf_module {
   }
   member_method {
     name: "SnapshotDataset"
-    argspec: "args=[\'input_dataset\', \'path\', \'output_types\', \'output_shapes\', \'compression\', \'reader_path_prefix\', \'writer_path_prefix\', \'shard_size_bytes\', \'pending_snapshot_expiry_seconds\', \'num_reader_threads\', \'reader_buffer_size\', \'num_writer_threads\', \'writer_buffer_size\', \'shuffle_on_read\', \'seed\', \'seed2\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'\', \'10737418240\', \'86400\', \'1\', \'1\', \'1\', \'1\', \'False\', \'0\', \'0\', \'None\'], "
+    argspec: "args=[\'input_dataset\', \'path\', \'output_types\', \'output_shapes\', \'compression\', \'reader_path_prefix\', \'writer_path_prefix\', \'shard_size_bytes\', \'pending_snapshot_expiry_seconds\', \'num_reader_threads\', \'reader_buffer_size\', \'num_writer_threads\', \'writer_buffer_size\', \'shuffle_on_read\', \'seed\', \'seed2\', \'mode\', \'snapshot_name\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'\', \'10737418240\', \'86400\', \'1\', \'1\', \'1\', \'1\', \'False\', \'0\', \'0\', \'auto\', \'\', \'None\'], "
+  }
+  member_method {
+    name: "SobolSample"
+    argspec: "args=[\'dim\', \'num_results\', \'skip\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float64\'>\", \'None\'], "
   }
   member_method {
     name: "Softmax"
diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index 383dbb4ab1f..3680cad6fe2 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -367,7 +367,9 @@ class ApiCompatibilityTest(test.TestCase):
         api_version=api_version)
 
   def testAPIBackwardsCompatibility(self):
-    api_version = 2 if '_api.v2' in tf.bitwise.__name__ else 1
+    api_version = 1
+    if hasattr(tf, '_major_api_version') and tf._major_api_version == 2:
+      api_version = 2
     golden_file_pattern = os.path.join(
         resource_loader.get_root_dir_with_all_resources(),
         _KeyToFilePath('*', api_version))
@@ -390,7 +392,7 @@ class ApiCompatibilityTest(test.TestCase):
     # Also check that V1 API has contrib
     self.assertTrue(
         api_version == 2 or
-        'tensorflow.python.util.lazy_loader.LazyLoader'
+        'LazyLoader'
         in str(type(tf.contrib)))
     # Check that V2 API does not have contrib
     self.assertTrue(api_version == 1 or not hasattr(tf, 'contrib'))
diff --git a/tensorflow/tools/api/tests/module_test.py b/tensorflow/tools/api/tests/module_test.py
index 1732ba41e70..2b3a7dbe31b 100644
--- a/tensorflow/tools/api/tests/module_test.py
+++ b/tensorflow/tools/api/tests/module_test.py
@@ -73,7 +73,7 @@ class ModuleTest(test.TestCase):
     tf.summary.image
     # If we use v2 API, check for create_file_writer,
     # otherwise check for FileWriter.
-    if '._api.v2' in tf.bitwise.__name__:
+    if hasattr(tf, '_major_api_version') and tf._major_api_version == 2:
       tf.summary.create_file_writer
     else:
       tf.summary.FileWriter
diff --git a/tensorflow/tools/build_info/gen_build_info.py b/tensorflow/tools/build_info/gen_build_info.py
index 098bb5c27c2..df9068fb3d1 100755
--- a/tensorflow/tools/build_info/gen_build_info.py
+++ b/tensorflow/tools/build_info/gen_build_info.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+
 import six
 
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16
index 830b073a7da..f3a6142d1d7 100644
--- a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16
+++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16
@@ -53,7 +53,7 @@ env GOROOT=/usr/local/go
 env PATH=$GOROOT/bin:$PATH
 
 # Install python 3.6.
-RUN add-apt-repository ppa:jonathonf/python-3.6 && \
+RUN add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && apt-get install -y \
     python3.6 python3.6-dev python3-pip python3.6-venv && \
     rm -rf /var/lib/apt/lists/* && \
diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_gpu b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda10.0
similarity index 97%
rename from tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_gpu
rename to tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda10.0
index 0fee596cce2..2732cefc380 100644
--- a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda10.0
@@ -53,7 +53,7 @@ RUN /install/install_buildifier.sh
 ENV TF_NEED_CUDA=1
 
 # Install python 3.6.
-RUN add-apt-repository ppa:jonathonf/python-3.6 && \
+RUN add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && apt-get install -y \
     python3.6 python3.6-dev python3-pip python3.6-venv && \
     rm -rf /var/lib/apt/lists/* && \
diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda10.1 b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda10.1
new file mode 100644
index 00000000000..30775210dd7
--- /dev/null
+++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda10.1
@@ -0,0 +1,77 @@
+# Dockerfile for Ubuntu 16.04 manylinux2010 custom ops with GPU.
+
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 as devtoolset
+
+LABEL maintainer="Amit Patankar <amitpatankar@google.com>"
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+      cpio \
+      file \
+      flex \
+      g++ \
+      make \
+      rpm2cpio \
+      unar \
+      wget \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD devtoolset/fixlinks.sh fixlinks.sh
+ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
+ADD devtoolset/rpm-patch.sh rpm-patch.sh
+
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7.
+RUN /build_devtoolset.sh devtoolset-7 /dt7
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
+RUN /build_devtoolset.sh devtoolset-8 /dt8
+
+# TODO(klimek): Split up into two different docker images.
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04
+
+LABEL maintainer="Amit Patankar <amitpatankar@google.com>"
+
+COPY --from=devtoolset /dt7 /dt7
+COPY --from=devtoolset /dt8 /dt8
+
+# Install TensorRT.
+RUN apt-get update && apt-get install -y \
+    libnvinfer-dev=6.0.1-1+cuda10.1 \
+    libnvinfer6=6.0.1-1+cuda10.1 \
+    libnvinfer-plugin-dev=6.0.1-1+cuda10.1 \
+    libnvinfer-plugin6=6.0.1-1+cuda10.1 \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy and run the install scripts.
+COPY install/*.sh /install/
+ARG DEBIAN_FRONTEND=noninteractive
+RUN /install/install_bootstrap_deb_packages.sh
+RUN /install/install_deb_packages.sh
+RUN /install/install_clang.sh
+RUN /install/install_bazel.sh
+RUN /install/install_buildifier.sh
+
+ENV TF_NEED_CUDA=1
+
+# Install python 3.6.
+RUN add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get update && apt-get install -y \
+    python3.6 python3.6-dev python3-pip python3.6-venv && \
+    rm -rf /var/lib/apt/lists/* && \
+    python3.6 -m pip install pip --upgrade && \
+    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 0
+
+# Install python 3.7
+RUN /install/install_python37.sh
+
+# Install pip3.5
+RUN wget https://bootstrap.pypa.io/get-pip.py && python3.5 get-pip.py && rm get-pip.py
+
+RUN /install/install_pip_packages.sh
+RUN /install/install_auditwheel.sh
+
+# TODO(klimek): Figure out a better way to get the right include paths
+# forwarded when we install new packages.
+RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m"
+RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m"
diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04
index 5bf7d05e0f9..7fb037f0dfa 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04
@@ -16,8 +16,7 @@ RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.
 RUN apt-get update --allow-insecure-repositories && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     rocm-dev rocm-libs hipcub rocm-utils rocm-cmake \
-    rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl \
-    rocm-profiler cxlactivitylogger && \
+    rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
index a083bc6debd..c1928c8e504 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rocm
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -3,7 +3,7 @@
 FROM ubuntu:xenial
 MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
 
-ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/2.8.0/
+ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.0/
 ARG ROCM_PATH=/opt/rocm
 
 ENV DEBIAN_FRONTEND noninteractive
@@ -58,11 +58,11 @@ RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteracti
 RUN apt-get update --allow-insecure-repositories && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     rocm-dev rocm-libs hipcub rocm-utils rocm-cmake \
-    rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl \
-    rocm-profiler cxlactivitylogger && \
+    rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Set up paths
 ENV HCC_HOME=$ROCM_PATH/hcc
 ENV HIP_PATH=$ROCM_PATH/hip
 ENV OPENCL_ROOT=$ROCM_PATH/opencl
diff --git a/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh b/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh
index 94e2aaad505..ce7789b3704 100755
--- a/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh
@@ -57,8 +57,8 @@ pip3 install --upgrade protobuf==3.6.1
 pip2 install --upgrade numpy==1.14.5
 pip3 install --upgrade numpy==1.14.5
 
-pip2 install scipy==1.1.0
-pip3 install scipy==1.1.0
+pip2 install scipy==1.2.2
+pip3 install scipy==1.4.1
 
 pip2 install scikit-learn==0.18.1
 pip3 install scikit-learn==0.18.1
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index d0c922bb947..7fdb8bbf10a 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -78,8 +78,8 @@ else
   pip3 install --upgrade numpy==1.14.5
 fi
 
-pip2 install scipy==1.1.0
-pip3 install scipy==1.1.0
+pip2 install scipy==1.2.2
+pip3 install scipy==1.4.1
 
 pip2 install scikit-learn==0.18.1
 pip3 install scikit-learn==0.18.1
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 1053d999492..bb53fc91981 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -64,7 +64,7 @@ rm -rf /usr/lib/python3/dist-packages/six*
 # This workaround isn't needed for Ubuntu 16.04 or later.
 pip3.5 install --no-binary=:all: --upgrade numpy==1.14.5
 
-pip3.5 install scipy==0.18.1
+pip3.5 install scipy==1.4.1
 
 pip3.5 install scikit-learn==0.19.1
 
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index 3a288908df6..bcf0d0b87ab 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -76,7 +76,7 @@ rm -rf /usr/lib/python3/dist-packages/six*
 # This workaround isn't needed for Ubuntu 16.04 or later.
 pip3 install --no-binary=:all: --upgrade numpy==1.14.5
 
-pip3 install scipy==0.18.1
+pip3 install scipy==1.4.1
 
 pip3 install scikit-learn==0.19.1
 
diff --git a/tensorflow/tools/ci_build/install/install_python37.sh b/tensorflow/tools/ci_build/install/install_python37.sh
old mode 100644
new mode 100755
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
index c07e1a022f5..af3bf0d3295 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
@@ -40,4 +40,4 @@ bazel test --config=cuda --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchma
     --build_tests_only --test_output=errors --local_test_jobs=8 --config=opt \
     --test_size_filters=small,medium \
     --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
-    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
+    //tensorflow/... -//tensorflow/compiler/...
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_mkl.sh b/tensorflow/tools/ci_build/linux/gpu/run_mkl.sh
index 50ee07e727b..aa22f8f7434 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_mkl.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_mkl.sh
@@ -43,5 +43,5 @@ bazel test --config=cuda --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchma
   --test_timeout 300,450,1200,3600 --build_tests_only --test_env=KMP_BLOCKTIME=0\
   --config=mkl --config=opt --test_output=errors --local_test_jobs=8 \
   --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
-  //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
+  //tensorflow/... -//tensorflow/compiler/...
 
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
index 7cefca0b84b..b0f47b1b5cd 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
@@ -40,4 +40,4 @@ bazel test --config=cuda --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchma
     --build_tests_only --test_output=errors --local_test_jobs=8 --config=opt \
     --test_size_filters=small,medium \
     --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
-    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
+    //tensorflow/... -//tensorflow/compiler/...
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
index 0286d0aea4c..0eb7fec7d9e 100755
--- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
@@ -35,10 +35,33 @@ export TF_GPU_COUNT=${N_GPUS}
 yes "" | $PYTHON_BIN_PATH configure.py
 
 # Run bazel test command. Double test timeouts to avoid flakes.
-bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test -k \
-    --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
-    --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} --config=opt \
-    --test_sharding_strategy=disabled \
-    --test_size_filters=small,medium \
-    --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
-    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
+bazel test \
+      --config=rocm \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --test_lang_filters=cc \
+      --jobs=${N_JOBS} \
+      --local_test_jobs=${TF_GPU_COUNT}\
+      --test_timeout 300,450,1200,3600 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --test_size_filters=small,medium \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/... \
+      -//tensorflow/compiler/... \
+      -//tensorflow/lite/delegates/gpu/gl/... \
+      -//tensorflow/lite/delegates/gpu/cl/... \
+&& bazel test \
+      --config=rocm \
+      -k \
+      --test_tag_filters=-no_gpu,-no_rocm,-v1only \
+      --jobs=${N_JOBS} \
+      --local_test_jobs=1 \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      -- \
+      //tensorflow/core/nccl:nccl_manager_test
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
index 424b3e6fa0a..64bfffad149 100755
--- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@@ -35,9 +35,18 @@ export TF_GPU_COUNT=${N_GPUS}
 yes "" | $PYTHON_BIN_PATH configure.py
 
 # Run bazel test command. Double test timeouts to avoid flakes.
-bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test -k \
-    --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 600,900,2400,7200 \
-    --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} --config=opt \
-    --test_sharding_strategy=disabled \
-    --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
-    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
+bazel test \
+      --config=rocm \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --test_lang_filters=py \
+      --jobs=${N_JOBS} \
+      --local_test_jobs=${TF_GPU_COUNT} \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/... \
+      -//tensorflow/compiler/...
diff --git a/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh b/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh
new file mode 100644
index 00000000000..92acb7ab7fe
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# TODO(mihaimaruseac,hyey,ggadde): Convert to py3
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function setup_pip () {
+  install_pip2
+  python -m virtualenv tf_build_env --system-site-packages
+  source tf_build_env/bin/activate
+  install_macos_pip_deps
+}
+
+function run_build () {
+  # Run configure.
+  export TF_NEED_CUDA=0
+  export PYTHON_BIN_PATH=$(which python2)
+  yes "" | $PYTHON_BIN_PATH configure.py
+  tag_filters="-no_oss,-no_oss_py2,-gpu,-tpu,-benchmark-test,-nomac,-no_mac,-v1only"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --build_tag_filters="${tag_filters}" \
+    --test_tag_filters="${tag_filters}" \
+    --action_env=PATH \
+    --remote_accept_cached=true \
+    --spawn_strategy=standalone \
+    --remote_local_fallback=false \
+    --remote_timeout=600 \
+    --strategy=Javac=standalone \
+    --strategy=Closure=standalone \
+    --genrule_strategy=standalone \
+    -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_macos
+which bazel
+set_bazel_outdir
+
+setup_pip
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh b/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh
new file mode 100644
index 00000000000..ffc823a6e2e
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function setup_pip () {
+  python3.7 -m virtualenv tf_build_env --system-site-packages
+  source tf_build_env/bin/activate
+  install_macos_pip_deps
+}
+
+function run_build () {
+  # Run configure.
+  export TF_NEED_CUDA=0
+  export PYTHON_BIN_PATH=$(which python3.7)
+  yes "" | $PYTHON_BIN_PATH configure.py
+  tag_filters="-no_oss,-no_oss_py2,-gpu,-tpu,-benchmark-test,-nomac,-no_mac,-v1only"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --build_tag_filters="${tag_filters}" \
+    --test_tag_filters="${tag_filters}" \
+    --action_env=PATH \
+    --remote_accept_cached=true \
+    --spawn_strategy=standalone \
+    --remote_local_fallback=false \
+    --remote_timeout=600 \
+    --strategy=Javac=standalone \
+    --strategy=Closure=standalone \
+    --genrule_strategy=standalone \
+    -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_macos
+which bazel
+set_bazel_outdir
+
+setup_pip
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/android/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/android/build.sh
new file mode 100644
index 00000000000..5fe3c41ae59
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/android/build.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function run_build () {
+  export ANDROID_NDK_HOME="/opt/android-ndk-r17c"
+  export NDK_HOME=$ANDROID_NDK_HOME
+  export ANDROID_SDK_HOME="/opt/android-sdk/current"
+  export ANDROID_API_LEVEL="23"
+  export ANDROID_BUILD_TOOLS_VERSION="28.0.0"
+
+  ANDROID_OUT=android.out
+  ANDROID_OUT_TARGET=gen_android_out
+
+  # Run the presubmit android build.
+  tensorflow/tools/ci_build/builds/android.sh 2>&1 | tee tensorflow/tools/ci_build/builds/${ANDROID_OUT}
+  RC=${PIPESTATUS[0]}
+
+  # Since we are running the build remotely (rbe), we need to build a bazel
+  # target that would output the log generated above and return the expected
+  # error code.
+  cat << EOF > tensorflow/tools/ci_build/builds/BUILD
+package(default_visibility = ["//tensorflow:internal"])
+
+sh_test(
+    name = "${ANDROID_OUT_TARGET}",
+    srcs = ["${ANDROID_OUT_TARGET}.sh"],
+    data = ["${ANDROID_OUT}"],
+    tags = ["local"],
+)
+EOF
+
+  cat << EOF > tensorflow/tools/ci_build/builds/${ANDROID_OUT_TARGET}.sh
+#!/bin/bash
+cat tensorflow/tools/ci_build/builds/${ANDROID_OUT}
+exit ${RC}
+EOF
+
+  # Now trigger the rbe build that outputs the log
+  chmod +x tensorflow/tools/ci_build/builds/${ANDROID_OUT_TARGET}.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  "${BAZEL_WRAPPER_PATH}" \
+    --host_jvm_args=-Dbazel.DigestFunction=SHA256 \
+    test \
+    --test_output=all \
+    tensorflow/tools/ci_build/builds:${ANDROID_OUT_TARGET}
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh
new file mode 100644
index 00000000000..d852ba3796f
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function run_build () {
+  # Build a unique cache silo string.
+  UBUNTU_VERSION=$(lsb_release -a | grep Release | awk '{print $2}')
+  IMAGE_VERSION=$(cat /VERSION)
+  CACHE_SILO_VAL="cpu-py3-ubuntu-16-${UBUNTU_VERSION}-${IMAGE_VERSION}"
+
+  # Run configure.
+  # Do not run configure.py when doing remote build & test:
+  # Most things we set with configure.py are not used in a remote build setting,
+  # as the build will be defined by pre-configured build files that are checked
+  # in.
+  # TODO(klimek): Allow using the right set of bazel flags without the need to
+  # run configure.py; currently we need to carefully copy them, which is brittle.
+  export TF_NEED_GCP=0
+  export TF_NEED_HDFS=0
+  export TF_NEED_CUDA=0
+  export ACTION_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+  export PYTHON_BIN_PATH="/usr/bin/python3"
+  export TF2_BEHAVIOR=1
+  tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test""$(maybe_skip_v1)"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --config=rbe \
+    --python_path="${PYTHON_BIN_PATH}" \
+    --action_env=PATH="${ACTION_PATH}" \
+    --action_env=PYTHON_BIN_PATH="${PYTHON_BIN_PATH}" \
+    --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+    --action_env=TF_PYTHON_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/py3 \
+    --action_env=TF_ENABLE_XLA=1 \
+    --test_tag_filters="${tag_filters}" \
+    --build_tag_filters="${tag_filters}" \
+    --test_lang_filters=cc,py \
+    --define=with_default_optimizations=true \
+    --define=framework_shared_object=true \
+    --define=with_xla_support=true \
+    -c opt \
+    --copt="-w" \
+    --copt=-mavx \
+    --linkopt=-lrt \
+    --distinct_host_configuration=false \
+    --remote_default_platform_properties="properties:{name:\"build\" value:\"${CACHE_SILO_VAL}\"}" \
+    --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:toolchain \
+    --host_javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.1:jdk8 \
+    --javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.1:jdk8 \
+    --host_java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --extra_toolchains=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:cc-toolchain-k8 \
+    --extra_execution_platforms=@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010 \
+    --host_platform=@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010 \
+    --remote_timeout=3600 \
+    --platforms=@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010 \
+    -- \
+    ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh
new file mode 100644
index 00000000000..357c6957ba3
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function run_build () {
+  # Build a unique cache silo string.
+  UBUNTU_VERSION=$(lsb_release -a | grep Release | awk '{print $2}')
+  IMAGE_VERSION=$(cat /VERSION)
+  CACHE_SILO_VAL="gpu-py3-ubuntu-16-${UBUNTU_VERSION}-${IMAGE_VERSION}"
+
+  # Run configure.
+  # Do not run configure.py when doing remote build & test:
+  # Most things we set with configure.py are not used in a remote build setting,
+  # as the build will be defined by pre-configured build files that are checked
+  # in.
+  # TODO(klimek): Allow using the right set of bazel flags without the need to
+  # run configure.py; currently we need to carefully copy them, which is brittle.
+  export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
+  # TODO(klimek): Remove once we don't try to read it while setting up the remote
+  # config for cuda (we currently don't use it, as it's only used when compiling
+  # with clang, but we still require it to be set anyway).
+  export TF_CUDA_COMPUTE_CAPABILITIES=6.0
+  export ACTION_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+  export PYTHON_BIN_PATH="/usr/bin/python3"
+  export TF2_BEHAVIOR=1
+  tag_filters="gpu,-no_gpu,-nogpu,-benchmark-test,-no_oss,-oss_serial,-no_gpu_presubmit""$(maybe_skip_v1)"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  # TODO(klimek): Re-enable tensorrt tests (with different runtime image) once
+  # we can build them.
+  # TODO(klimek): Stop using action_env for things that are only needed during
+  # setup - we're artificially poisoning the cache.
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --config=rbe \
+    --python_path="${PYTHON_BIN_PATH}" \
+    --action_env=PATH="${ACTION_PATH}" \
+    --action_env=PYTHON_BIN_PATH="${PYTHON_BIN_PATH}" \
+    --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+    --action_env=REMOTE_GPU_TESTING=1 \
+    --action_env=TF_CUDA_COMPUTE_CAPABILITIES="${TF_CUDA_COMPUTE_CAPABILITIES}" \
+    --action_env=TF_CUDA_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/cuda10.0-cudnn7 \
+    --action_env=TF_CUDA_VERSION=10 \
+    --action_env=TF_CUDNN_VERSION=7 \
+    --action_env=TF_NEED_TENSORRT=0 \
+    --action_env=TF_NEED_CUDA=1 \
+    --action_env=TF_PYTHON_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/py3 \
+    --test_env=LD_LIBRARY_PATH \
+    --test_tag_filters="${tag_filters}" \
+    --build_tag_filters="${tag_filters}" \
+    --test_lang_filters=cc,py \
+    --define=with_default_optimizations=true \
+    --define=framework_shared_object=true \
+    --define=with_xla_support=true \
+    --define=using_cuda_nvcc=true \
+    --define=use_fast_cpp_protos=true \
+    --define=allow_oversize_protos=true \
+    --define=grpc_no_ares=true \
+    -c opt \
+    --copt="-w" \
+    --copt=-mavx \
+    --linkopt=-lrt \
+    --distinct_host_configuration=false \
+    --remote_default_platform_properties="properties:{name:\"build\" value:\"${CACHE_SILO_VAL}\"}" \
+    --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain \
+    --host_javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.1:jdk8 \
+    --javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.0:jdk8 \
+    --host_java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --extra_toolchains=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain-linux-x86_64 \
+    --extra_execution_platforms=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010,@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010-gpu \
+    --host_platform=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010 \
+    --local_test_jobs=4 \
+    --remote_timeout=3600 \
+    --platforms=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010 \
+    -- \
+    ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/sanity/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/sanity/build.sh
new file mode 100644
index 00000000000..250b0c1253d
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/sanity/build.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function install_pylint () {
+  # TODO(gunan): figure out why we get stuck with later versions of pylint.
+  # TODO(mihaimaruseac): this is used in the release build in the same way,
+  # maybe extract out to a common?
+  sudo python2 -m pip install pylint==1.6.4
+  sudo python3 -m pip install pylint==1.6.4
+}
+
+function run_sanity_checks () {
+  SANITY_OUT=ci_sanity.out
+  SANITY_OUT_TARGET=gen_ci_sanity_out
+
+  # Run tensorflow sanity checks.
+  tensorflow/tools/ci_build/ci_sanity.sh 2>&1 | tee tensorflow/tools/ci_build/${SANITY_OUT}
+  RC=${PIPESTATUS[0]}
+
+  # Since we are running the sanity build remotely (rbe), we need to build a bazel
+  # target that would output the log generated above and return the expected
+  # error code.
+  cat << EOF > tensorflow/tools/ci_build/BUILD
+package(default_visibility = ["//tensorflow:internal"])
+
+sh_test(
+    name = "${SANITY_OUT_TARGET}",
+    srcs = ["${SANITY_OUT_TARGET}.sh"],
+    data = ["${SANITY_OUT}"],
+    tags = ["local"],
+)
+EOF
+
+  cat << EOF > tensorflow/tools/ci_build/${SANITY_OUT_TARGET}.sh
+#!/bin/bash
+cat tensorflow/tools/ci_build/${SANITY_OUT}
+exit ${RC}
+EOF
+
+  # Now trigger the rbe build that outputs the log
+  chmod +x tensorflow/tools/ci_build/${SANITY_OUT_TARGET}.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  "${BAZEL_WRAPPER_PATH}" \
+    --host_jvm_args=-Dbazel.DigestFunction=SHA256 \
+    test \
+    --test_output=all \
+    tensorflow/tools/ci_build:${SANITY_OUT_TARGET}
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+install_pylint
+
+run_sanity_checks
diff --git a/tensorflow/tools/ci_build/presubmit/windows/cpu_py36_full/build.bat b/tensorflow/tools/ci_build/presubmit/windows/cpu_py36_full/build.bat
new file mode 100644
index 00000000000..fcc079f7b0e
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/windows/cpu_py36_full/build.bat
@@ -0,0 +1,44 @@
+echo on
+setlocal enableextensions enabledelayedexpansion
+
+@REM This is the path to bazel_wrapper.py, should be set as an argument
+set BAZEL_WRAPPER_PATH=%~f1
+
+@REM Load common definitions, install bazel
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+@REM Set up common variables used through the script
+set WIN_OUT=win.out
+set WIN_OUT_TARGET=gen_win_out
+set BUILD_PATH=tensorflow/tools/ci_build/builds
+set GEN_SCRIPT=%BUILD_PATH%/%WIN_OUT_TARGET%.sh
+set GEN_BUILD=%BUILD_PATH%/BUILD
+
+@REM Run the presubmit win build.
+CALL tensorflow\tools\ci_build\windows\cpu\pip\run.bat --enable_remote_cache %* > %BUILD_PATH%/%WIN_OUT% 2>&1
+set RC=%errorlevel%
+
+@REM Since we are running the sanity build remotely (rbe), we need to build a bazel
+@REM target that would output the log generated above and return the expected
+@REM error code.
+echo package(default_visibility = ["//visibility:public"]) > %GEN_BUILD%
+echo. >> %GEN_BUILD%
+echo sh_test( >> %GEN_BUILD%
+echo     name = "%WIN_OUT_TARGET%", >> %GEN_BUILD%
+echo     srcs = ["%WIN_OUT_TARGET%.sh"], >> %GEN_BUILD%
+echo     data = ["%WIN_OUT%"], >> %GEN_BUILD%
+echo     tags = ["local"], >> %GEN_BUILD%
+echo ) >> %GEN_BUILD%
+
+echo #!/bin/bash > %GEN_SCRIPT%
+echo function rlocation() { >> %GEN_SCRIPT%
+echo   fgrep -m1 "$1 " "$RUNFILES_MANIFEST_FILE" ^| cut -d' ' -f2- >> %GEN_SCRIPT%
+echo } >> %GEN_SCRIPT%
+echo cat $(rlocation %BUILD_PATH%/%WIN_OUT%) >> %GEN_SCRIPT%
+echo exit %RC% >> %GEN_SCRIPT%
+
+@REM Now trigger the rbe build that outputs the log
+chmod +x %GEN_SCRIPT%
+
+@REM Run bazel test command.
+%PY_EXE% %BAZEL_WRAPPER_PATH% --output_user_root=%TMPDIR% --host_jvm_args=-Dbazel.DigestFunction=SHA256 test %BUILD_PATH%:%WIN_OUT_TARGET% --test_output=all
diff --git a/tensorflow/tools/ci_build/presubmit/windows/gpu_py36_full/build.bat b/tensorflow/tools/ci_build/presubmit/windows/gpu_py36_full/build.bat
new file mode 100644
index 00000000000..80edefc2bf0
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/windows/gpu_py36_full/build.bat
@@ -0,0 +1,45 @@
+echo on
+setlocal enableextensions enabledelayedexpansion
+
+@REM This is the path to bazel_wrapper.py, should be set as an argument
+set BAZEL_WRAPPER_PATH=%~f1
+
+@REM Load common definitions, install bazel
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+@REM Set up common variables used through the script
+set WIN_OUT=win.out
+set WIN_OUT_TARGET=gen_win_out
+set BUILD_PATH=tensorflow/tools/ci_build/builds
+set GEN_SCRIPT=%BUILD_PATH%/%WIN_OUT_TARGET%.sh
+set GEN_BUILD=%BUILD_PATH%/BUILD
+
+@REM Run the presubmit win build.
+CALL tensorflow\tools\ci_build\windows\gpu\pip\run.bat --enable_remote_cache %* > %BUILD_PATH%/%WIN_OUT% 2>&1
+set RC=%errorlevel%
+
+@REM Since we are running the sanity build remotely (rbe), we need to build a bazel
+@REM target that would output the log generated above and return the expected
+@REM error code.
+echo package(default_visibility = ["//visibility:public"]) > %GEN_BUILD%
+echo. >> %GEN_BUILD%
+echo sh_test( >> %GEN_BUILD%
+echo     name = "%WIN_OUT_TARGET%", >> %GEN_BUILD%
+echo     srcs = ["%WIN_OUT_TARGET%.sh"], >> %GEN_BUILD%
+echo     data = ["%WIN_OUT%"], >> %GEN_BUILD%
+echo     tags = ["local"], >> %GEN_BUILD%
+echo ) >> %GEN_BUILD%
+
+echo #!/bin/bash > %GEN_SCRIPT%
+echo function rlocation() { >> %GEN_SCRIPT%
+echo   fgrep -m1 "$1 " "$RUNFILES_MANIFEST_FILE" ^| cut -d' ' -f2- >> %GEN_SCRIPT%
+echo } >> %GEN_SCRIPT%
+echo cat $(rlocation %BUILD_PATH%/%WIN_OUT%) >> %GEN_SCRIPT%
+echo exit %RC% >> %GEN_SCRIPT%
+
+@REM Now trigger the rbe build that outputs the log
+chmod +x %GEN_SCRIPT%
+
+@REM Run bazel test command.
+%PY_EXE% %BAZEL_WRAPPER_PATH% --output_user_root=%TMPDIR% --host_jvm_args=-Dbazel.DigestFunction=SHA256 test %BUILD_PATH%:%WIN_OUT_TARGET% --test_output=all
+
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/custom_op/nightly.sh b/tensorflow/tools/ci_build/release/ubuntu_16/custom_op/nightly.sh
index 3f80dd42536..84ae1150395 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/custom_op/nightly.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/custom_op/nightly.sh
@@ -17,17 +17,20 @@ set -e
 
 # 1. Build the test server
 UBUNTU16_CPU_IMAGE="tensorflow/tensorflow:nightly-custom-op-ubuntu16"
+UBUNTU16_GPU_CUDA10P0_IMAGE="tensorflow/tensorflow:nightly-custom-op-gpu-ubuntu16-cuda10.0"
 UBUNTU16_GPU_IMAGE="tensorflow/tensorflow:nightly-custom-op-gpu-ubuntu16"
 
 # Build the docker image
 cd tensorflow/tools/ci_build
 docker build --no-cache -t "${UBUNTU16_CPU_IMAGE}" -f Dockerfile.custom_op_ubuntu_16 .
-docker build --no-cache -t "${UBUNTU16_GPU_IMAGE}" -f Dockerfile.custom_op_ubuntu_16_gpu .
+docker build --no-cache -t "${UBUNTU16_GPU_IMAGE}" -f Dockerfile.custom_op_ubuntu_16_cuda10.1 .
+docker build --no-cache -t "${UBUNTU16_GPU_CUDA10P0_IMAGE}" -f Dockerfile.custom_op_ubuntu_16_cuda10.0 .
 
 # Log into docker hub, push the image and log out
 docker login -u "${TF_DOCKER_USERNAME}" -p "${TF_DOCKER_PASSWORD}"
 
 docker push "${UBUNTU16_CPU_IMAGE}"
 docker push "${UBUNTU16_GPU_IMAGE}"
+docker push "${UBUNTU16_GPU_CUDA10P0_IMAGE}"
 
 docker logout#!/bin/bash
diff --git a/tensorflow/tools/ci_build/remote/BUILD b/tensorflow/tools/ci_build/remote/BUILD
index f69803c5e69..597d7d42ace 100644
--- a/tensorflow/tools/ci_build/remote/BUILD
+++ b/tensorflow/tools/ci_build/remote/BUILD
@@ -4,7 +4,7 @@ licenses(["notice"])  # Apache 2.0
 genrule(
     name = "remote-docker-build-cpu",
     srcs = [
-        "@llvm//:docker",  # llvm's docker file group
+        "@llvm-project//llvm:docker",  # llvm's docker file group
     ],
     outs = ["remote-docker-build-cpu.txt"],
     cmd = " ./$(location remote_docker_build.sh) -c -f $$(realpath $<) > $@",
@@ -16,7 +16,7 @@ genrule(
 genrule(
     name = "remote-docker-build-gpu",
     srcs = [
-        "@llvm//:docker",  # llvm's docker file group
+        "@llvm-project//llvm:docker",  # llvm's docker file group
     ],
     outs = ["remote-docker-build-gpu.txt"],
     cmd = " ./$(location remote_docker_build.sh) -g -f $$(realpath $<) > $@",
diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
index 72924fb1c44..9288b7b3582 100755
--- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
@@ -27,6 +27,7 @@ echo ""
 
 # Run configure.
 export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
 
 export TF_NEED_ROCM=1
 export TF_GPU_COUNT=${N_GPUS}
@@ -34,12 +35,50 @@ export TF_GPU_COUNT=${N_GPUS}
 yes "" | $PYTHON_BIN_PATH configure.py
 echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
 
-bazel clean
 # Run bazel test command. Double test timeouts to avoid flakes.
-bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss,-no_rocm -k \
-    --jobs=${N_JOBS} --test_timeout 600,900,2400,7200 \
-    --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} \
-    --test_sharding_strategy=disabled \
-    --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-    --config=xla -- \
-    //tensorflow/compiler/...
+bazel test \
+      --config=rocm \
+      --config=xla \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --jobs=${N_JOBS} \
+      --local_test_jobs=${TF_GPU_COUNT} \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/compiler/... \
+      -//tensorflow/compiler/tests:dense_layer_test \
+      -//tensorflow/compiler/tests:dense_layer_test_gpu \
+      -//tensorflow/compiler/tests:jit_test \
+      -//tensorflow/compiler/tests:jit_test_gpu \
+      -//tensorflow/compiler/tests:matrix_triangular_solve_op_test \
+      -//tensorflow/compiler/tests:tensor_array_ops_test \
+      -//tensorflow/compiler/tests:xla_ops_test \
+      -//tensorflow/compiler/xla/client/lib:svd_test \
+      -//tensorflow/compiler/tests:lstm_test \
+&& bazel test \
+      --config=rocm \
+      --config=xla \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --jobs=${N_JOBS} \
+      --local_test_jobs=${TF_GPU_COUNT} \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --test_env=TF2_BEHAVIOR=0 \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/compiler/tests:dense_layer_test \
+      //tensorflow/compiler/tests:dense_layer_test_gpu \
+      //tensorflow/compiler/tests:jit_test \
+      //tensorflow/compiler/tests:jit_test_gpu \
+      //tensorflow/compiler/tests:matrix_triangular_solve_op_test \
+      //tensorflow/compiler/tests:tensor_array_ops_test \
+      //tensorflow/compiler/tests:xla_ops_test \
+      //tensorflow/compiler/xla/client/lib:svd_test \
+      //tensorflow/compiler/tests:lstm_test
diff --git a/tensorflow/tools/compatibility/ast_edits.py b/tensorflow/tools/compatibility/ast_edits.py
index 71ebcef1fa6..06ba648aaa4 100644
--- a/tensorflow/tools/compatibility/ast_edits.py
+++ b/tensorflow/tools/compatibility/ast_edits.py
@@ -899,13 +899,16 @@ class ASTCodeUpgrader(object):
                       type(api_change_spec))
     self._api_change_spec = api_change_spec
 
-  def process_file(self, in_filename, out_filename, no_out_on_error=False):
+  def process_file(self,
+                   in_filename,
+                   out_filename,
+                   no_change_to_outfile_on_error=False):
     """Process the given python file for incompatible changes.
 
     Args:
       in_filename: filename to parse
       out_filename: output file to write to
-      no_out_on_error: do not modify the output file when errors happen
+      no_change_to_outfile_on_error: not modify the output file on errors
     Returns:
       A tuple representing number of files processed, log of actions, errors
     """
@@ -918,7 +921,7 @@ class ASTCodeUpgrader(object):
                                      temp_file)
     # pylint: enable=g-backslash-continuation
 
-    if no_out_on_error and ret[-1]:
+    if no_change_to_outfile_on_error and (ret[0] == 0 or ret[-1]):
       os.remove(temp_file.name)
     else:
       shutil.move(temp_file.name, out_filename)
diff --git a/tensorflow/tools/compatibility/ast_edits_test.py b/tensorflow/tools/compatibility/ast_edits_test.py
index d6a366d7220..2e5018b1ee4 100644
--- a/tensorflow/tools/compatibility/ast_edits_test.py
+++ b/tensorflow/tools/compatibility/ast_edits_test.py
@@ -46,6 +46,7 @@ from __future__ import print_function
 
 import ast
 import os
+
 import six
 
 from tensorflow.python.framework import test_util
diff --git a/tensorflow/tools/compatibility/ipynb.py b/tensorflow/tools/compatibility/ipynb.py
index 77c8fedf709..ffebfcac725 100644
--- a/tensorflow/tools/compatibility/ipynb.py
+++ b/tensorflow/tools/compatibility/ipynb.py
@@ -25,6 +25,7 @@ import json
 import re
 import shutil
 import tempfile
+
 import six
 
 CodeLine = collections.namedtuple("CodeLine", ["cell_number", "code"])
diff --git a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
index 70a00e1220b..b07585cdc4e 100644
--- a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
+++ b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 import shutil
 import tempfile
+
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.framework import test_util
diff --git a/tensorflow/tools/compatibility/testdata/test_file_v1_12.py b/tensorflow/tools/compatibility/testdata/test_file_v1_12.py
index ca33adb4e33..483c06ac3d5 100644
--- a/tensorflow/tools/compatibility/testdata/test_file_v1_12.py
+++ b/tensorflow/tools/compatibility/testdata/test_file_v1_12.py
@@ -17,6 +17,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 import tensorflow as tf
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test as test_lib
diff --git a/tensorflow/tools/compatibility/tf_upgrade_test.py b/tensorflow/tools/compatibility/tf_upgrade_test.py
index ca0e80564ff..90e2010ff14 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_test.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 import os
 import tempfile
+
 import six
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test as test_lib
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index e41d8d0b927..92a4c0bedb7 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -2365,6 +2365,31 @@ class TestUpgradeFiles(test_util.TensorFlowTestCase):
     self.assertAllEqual(open(temp_file.name).read(), upgraded)
     os.unlink(temp_file.name)
 
+  def testInplaceNoOutputChangeOnErrorHandling(self):
+    """In place file should not be modified when parsing error is handled."""
+    temp_file = tempfile.NamedTemporaryFile("w", delete=False)
+    original = "print 'a' \n"
+    upgraded = "print 'a' \n"
+    temp_file.write(original)
+    temp_file.close()
+    upgrader = ast_edits.ASTCodeUpgrader(tf_upgrade_v2.TFAPIChangeSpec())
+    upgrader.process_file(
+        temp_file.name, temp_file.name, no_change_to_outfile_on_error=True)
+    self.assertAllEqual(open(temp_file.name).read(), upgraded)
+    os.unlink(temp_file.name)
+
+  def testInplaceEmptyOutputOnError(self):
+    """In place file becomes empty when parsing error is not handled."""
+    temp_file = tempfile.NamedTemporaryFile("w", delete=False)
+    original = "print 'a' \n"
+    upgraded = ""
+    temp_file.write(original)
+    temp_file.close()
+    upgrader = ast_edits.ASTCodeUpgrader(tf_upgrade_v2.TFAPIChangeSpec())
+    upgrader.process_file(temp_file.name, temp_file.name)
+    self.assertAllEqual(open(temp_file.name).read(), upgraded)
+    os.unlink(temp_file.name)
+
 
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py b/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py
index c4fd8aab90c..b45a6067b15 100644
--- a/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py
+++ b/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py
@@ -20,6 +20,7 @@ To update reorders_v2.py, run:
   bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
 """
 # pylint: enable=line-too-long
+
 import tensorflow as tf
 
 # This import is needed so that TensorFlow python modules are in sys.modules.
diff --git a/tensorflow/tools/def_file_filter/symbols_pybind.txt b/tensorflow/tools/def_file_filter/symbols_pybind.txt
index 052e757441f..e657edc4fbf 100644
--- a/tensorflow/tools/def_file_filter/symbols_pybind.txt
+++ b/tensorflow/tools/def_file_filter/symbols_pybind.txt
@@ -1,4 +1,4 @@
-[cpp_python_util] # util
+[cpp_python_util] # util tfe
 tensorflow::swig::IsSequence
 tensorflow::swig::IsSequenceOrComposite
 tensorflow::swig::IsCompositeTensor
@@ -17,6 +17,7 @@ tensorflow::swig::IsSequenceForData
 tensorflow::swig::FlattenForData
 tensorflow::swig::AssertSameStructureForData
 tensorflow::swig::RegisterType
+tensorflow::swig::IsEagerTensorSlow
 
 [util_port] # util_port
 tensorflow::IsGoogleCudaEnabled
@@ -74,11 +75,12 @@ tensorflow::Status::code
 tensorflow::Status::error_message
 tensorflow::Status::ok()
 
-[core_cpu_impl]  # device_lib
+[core_cpu_impl]  # device_lib tfe
 tensorflow::Device::attributes
 tensorflow::DeviceFactory::AddDevices
 tensorflow::SessionOptions::SessionOptions
 tensorflow::DoQuantizeTrainingOnSerializedGraphDef
+tensorflow::DeviceFactory::ListAllPhysicalDevices
 
 [protos_all]  # device_lib, dtypes
 tensorflow::DataType_IsValid
@@ -123,3 +125,67 @@ tensorflow::make_safe
 
 [python_op_gen] # python_op_gen
 tensorflow::GetPythonWrappers
+
+[pywrap_tfe_lib] # tfe
+tensorflow::TFE_TensorHandleCache
+tensorflow::TFE_TensorHandleCache::Clear
+EagerTensor_CheckExact
+EagerTensorFromHandle
+EagerTensor_Handle
+TFE_Py_ExecuteCancelable
+TFE_Py_RegisterExceptionClass
+TFE_Py_RegisterVSpace
+TFE_Py_RegisterFallbackExceptionClass
+TFE_Py_RegisterGradientFunction
+TFE_Py_RegisterJVPFunction
+TFE_GetPythonString
+TFE_Py_UID
+TFE_DeleteContextCapsule
+TFE_Py_InitEagerTensor
+TFE_Py_SetEagerTensorProfiler
+TFE_Py_TapeSetNew
+TFE_Py_TapeSetRemove
+TFE_Py_TapeSetAdd
+TFE_Py_TapeSetIsEmpty
+TFE_Py_TapeSetShouldRecordBackprop
+TFE_Py_TapeSetPossibleGradientTypes
+TFE_Py_TapeWatch
+TFE_Py_TapeSetDeleteTrace
+TFE_Py_TapeSetStopOnThread
+TFE_Py_TapeSetRestartOnThread
+TFE_Py_TapeSetIsStopped
+TFE_Py_TapeSetRecordOperation
+TFE_Py_TapeSetRecordOperationBackprop
+TFE_Py_TapeSetRecordOperationForwardprop
+TFE_Py_TapeVariableAccessed
+TFE_Py_TapeWatchVariable
+TFE_Py_TapeGradient
+TFE_Py_FastPathExecute_C
+TFE_Py_RecordGradient
+TFE_Py_TapeWatchedVariables
+TFE_Py_ForwardAccumulatorNew
+TFE_Py_ForwardAccumulatorSetAdd
+TFE_Py_ForwardAccumulatorSetRemove
+TFE_Py_ForwardAccumulatorWatch
+TFE_Py_ForwardAccumulatorJVP
+TFE_Py_ForwardAccumulatorPushState
+TFE_Py_ForwardAccumulatorPopState
+TFE_Py_PackJVPs
+TFE_Py_TensorShapeSlice
+TFE_Py_TensorShapeOnDevice
+TFE_Py_EncodeArg
+TFE_Py_EnableInteractivePythonLogging
+TFE_Py_SetEagerContext
+
+[eager_executor] # tfe
+tensorflow::EagerExecutor::~EagerExecutor
+tensorflow::EagerContext::WaitForAndCloseRemoteContexts
+
+[profiler_session] # tfe
+tensorflow::ProfilerSession::~ProfilerSession
+
+[tf_status_helper] # tfe
+tensorflow::Set_TF_Status_from_Status
+
+[context] # tfe
+tensorflow::EagerContext::WaitForAndCloseRemoteContexts
diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py
index 770779ce41b..7b3dcbd33c0 100644
--- a/tensorflow/tools/dockerfiles/assembler.py
+++ b/tensorflow/tools/dockerfiles/assembler.py
@@ -656,7 +656,7 @@ def main(argv):
                 eprint('>>> No test standard out.')
               if err:
                 eprint('>>> Output stderr:')
-                eprint(out.decode('utf-8'))
+                eprint(err.decode('utf-8'))
               else:
                 eprint('>>> No test standard err.')
               if code != 0:
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
index 46443bb6946..8e839233b50 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile
index bf1d51894d7..6e7e29fd10d 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
index 19732f36e03..fe0b9019e2a 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         pkg-config \
         rsync \
         software-properties-common \
-	sudo \
+        sudo \
         unzip \
         zip \
         zlib1g-dev \
@@ -71,7 +71,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
index 05528a1d4b5..293934db8bf 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         pkg-config \
         rsync \
         software-properties-common \
-	sudo \
+        sudo \
         unzip \
         zip \
         zlib1g-dev \
@@ -71,7 +71,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -108,3 +108,4 @@ RUN mkdir /bazel && \
     rm -f /bazel/installer.sh
 
 COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
index 4df7f841d51..ba4f620a7f0 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,22 +22,30 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
 ARG CUDNN_MAJOR_VERSION=7
 ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-dev-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        libcublas-dev=10.2.1.243-1 \
+        cuda-nvrtc-${CUDA/./-} \
+        cuda-nvrtc-dev-${CUDA/./-} \
         cuda-cudart-dev-${CUDA/./-} \
         cuda-cufft-dev-${CUDA/./-} \
         cuda-curand-dev-${CUDA/./-} \
@@ -61,18 +69,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
+# Install TensorRT if not building for PowerPC
 RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
-        libnvinfer-dev=5.1.5-1+cuda${CUDA} \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
         && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON python
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
-ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
 ENV TF_CUDA_VERSION=${CUDA}
 ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
 # CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
@@ -104,7 +113,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
index 79787ad3440..ae6ad2a5a69 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,22 +22,30 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
 ARG CUDNN_MAJOR_VERSION=7
 ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-dev-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        libcublas-dev=10.2.1.243-1 \
+        cuda-nvrtc-${CUDA/./-} \
+        cuda-nvrtc-dev-${CUDA/./-} \
         cuda-cudart-dev-${CUDA/./-} \
         cuda-cufft-dev-${CUDA/./-} \
         cuda-curand-dev-${CUDA/./-} \
@@ -61,18 +69,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
+# Install TensorRT if not building for PowerPC
 RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
-        libnvinfer-dev=5.1.5-1+cuda${CUDA} \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
         && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON python
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
-ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
 ENV TF_CUDA_VERSION=${CUDA}
 ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
 # CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
@@ -104,7 +113,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
index fe2045bf193..30d918385f0 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,13 +22,17 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
@@ -36,7 +40,11 @@ SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        cuda-nvrtc-${CUDA/./-} \
         cuda-cufft-${CUDA/./-} \
         cuda-curand-${CUDA/./-} \
         cuda-cusolver-${CUDA/./-} \
@@ -50,10 +58,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         software-properties-common \
         unzip
 
-RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
+# Install TensorRT if not building for PowerPC
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*)
+        && rm -rf /var/lib/apt/lists/*; }
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
@@ -81,7 +91,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
index bfeaebe00c8..d6ea4150866 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,13 +22,17 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
@@ -36,7 +40,11 @@ SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        cuda-nvrtc-${CUDA/./-} \
         cuda-cufft-${CUDA/./-} \
         cuda-curand-${CUDA/./-} \
         cuda-cusolver-${CUDA/./-} \
@@ -50,10 +58,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         software-properties-common \
         unzip
 
-RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
+# Install TensorRT if not building for PowerPC
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*)
+        && rm -rf /var/lib/apt/lists/*; }
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
@@ -81,7 +91,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile
index b5bb5d69d19..6ac98b94191 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile
@@ -34,7 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         pkg-config \
         rsync \
         software-properties-common \
-	sudo \
+        sudo \
         unzip \
         zip \
         zlib1g-dev \
@@ -50,6 +50,8 @@ ENV CI_BUILD_PYTHON python
 ARG CACHE_STOP=1
 # Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
+# In case of Python 2.7+ we need to add passwd entries for user and group id
+RUN chmod a+w /etc/passwd /etc/group
 RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
 
 ARG USE_PYTHON_3_NOT_2
@@ -97,7 +99,7 @@ RUN ${PIP} --no-cache-dir install \
     enum34
 
 # Install bazel
-ARG BAZEL_VERSION=0.24.1
+ARG BAZEL_VERSION=1.1.0
 RUN mkdir /bazel && \
     wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
     wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
@@ -168,8 +170,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile
index f4162a28f7a..e35e8773ebc 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile
@@ -34,7 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         pkg-config \
         rsync \
         software-properties-common \
-	sudo \
+        sudo \
         unzip \
         zip \
         zlib1g-dev \
@@ -50,6 +50,8 @@ ENV CI_BUILD_PYTHON python
 ARG CACHE_STOP=1
 # Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
+# In case of Python 2.7+ we need to add passwd entries for user and group id
+RUN chmod a+w /etc/passwd /etc/group
 RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true
 
 ARG USE_PYTHON_3_NOT_2
@@ -97,7 +99,7 @@ RUN ${PIP} --no-cache-dir install \
     enum34
 
 # Install bazel
-ARG BAZEL_VERSION=0.24.1
+ARG BAZEL_VERSION=1.1.0
 RUN mkdir /bazel && \
     wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
     wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile
index 5ba0fe65500..cb1155a128f 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile
@@ -23,6 +23,8 @@ ARG UBUNTU_VERSION=18.04
 
 FROM ubuntu:${UBUNTU_VERSION} as base
 
+RUN apt-get update && apt-get install -y curl
+
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
 ARG PYTHON=python${_PY_SUFFIX}
@@ -116,8 +118,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile
index e08b910a1bb..9102967f71d 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile
@@ -23,6 +23,8 @@ ARG UBUNTU_VERSION=18.04
 
 FROM ubuntu:${UBUNTU_VERSION} as base
 
+RUN apt-get update && apt-get install -y curl
+
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
 ARG PYTHON=python${_PY_SUFFIX}
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile
index 907d6af7b3c..72a33cdad7f 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile
index 3ec3f3a6486..1abf31b8cef 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile
index 8006db472af..d4fb001c7d4 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         pkg-config \
         rsync \
         software-properties-common \
-	sudo \
+        sudo \
         unzip \
         zip \
         zlib1g-dev \
@@ -71,7 +71,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile
index 06f2b779be4..15ca28632f7 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         pkg-config \
         rsync \
         software-properties-common \
-	sudo \
+        sudo \
         unzip \
         zip \
         zlib1g-dev \
@@ -71,7 +71,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile
index 5fc850a34ff..be13cffb7a9 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,22 +22,30 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
 ARG CUDNN_MAJOR_VERSION=7
 ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-dev-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        libcublas-dev=10.2.1.243-1 \
+        cuda-nvrtc-${CUDA/./-} \
+        cuda-nvrtc-dev-${CUDA/./-} \
         cuda-cudart-dev-${CUDA/./-} \
         cuda-cufft-dev-${CUDA/./-} \
         cuda-curand-dev-${CUDA/./-} \
@@ -61,18 +69,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
+# Install TensorRT if not building for PowerPC
 RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
-        libnvinfer-dev=5.1.5-1+cuda${CUDA} \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
         && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON python
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
-ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
 ENV TF_CUDA_VERSION=${CUDA}
 ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
 # CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
@@ -104,7 +113,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile
index 21cd014c75f..015fc39f1df 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,22 +22,30 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
 ARG CUDNN_MAJOR_VERSION=7
 ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-dev-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        libcublas-dev=10.2.1.243-1 \
+        cuda-nvrtc-${CUDA/./-} \
+        cuda-nvrtc-dev-${CUDA/./-} \
         cuda-cudart-dev-${CUDA/./-} \
         cuda-cufft-dev-${CUDA/./-} \
         cuda-curand-dev-${CUDA/./-} \
@@ -61,18 +69,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
+# Install TensorRT if not building for PowerPC
 RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
-        libnvinfer-dev=5.1.5-1+cuda${CUDA} \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
         && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON python
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
-ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
 ENV TF_CUDA_VERSION=${CUDA}
 ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
 # CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
@@ -104,7 +113,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 RUN apt-get update && apt-get install -y \
     build-essential \
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile
index 71a1b79a3db..b2ebddb140b 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,13 +22,17 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
@@ -36,7 +40,11 @@ SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        cuda-nvrtc-${CUDA/./-} \
         cuda-cufft-${CUDA/./-} \
         cuda-curand-${CUDA/./-} \
         cuda-cusolver-${CUDA/./-} \
@@ -50,10 +58,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         software-properties-common \
         unzip
 
-RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
+# Install TensorRT if not building for PowerPC
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*)
+        && rm -rf /var/lib/apt/lists/*; }
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
@@ -81,7 +91,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile
index 4655b1d5509..cef34a585a2 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,13 +22,17 @@
 ARG UBUNTU_VERSION=18.04
 
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
@@ -36,7 +40,11 @@ SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        cuda-nvrtc-${CUDA/./-} \
         cuda-cufft-${CUDA/./-} \
         cuda-curand-${CUDA/./-} \
         cuda-cusolver-${CUDA/./-} \
@@ -50,10 +58,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         software-properties-common \
         unzip
 
-RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
+# Install TensorRT if not building for PowerPC
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*)
+        && rm -rf /var/lib/apt/lists/*; }
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
@@ -81,7 +91,7 @@ RUN ${PIP} --no-cache-dir install --upgrade \
     setuptools
 
 # Some TF tools expect a "python" binary
-RUN ln -s $(which ${PYTHON}) /usr/local/bin/python 
+RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
 
 # Options:
 #   tensorflow
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
index 8b4f1a756d0..5f6caa6e96c 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
@@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         pkg-config \
         rsync \
         software-properties-common \
-	sudo \
+        sudo \
         unzip \
         zip \
         zlib1g-dev \
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
index 496b3ac9e49..d7e01071a14 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
@@ -1,20 +1,28 @@
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
 ARG CUDNN_MAJOR_VERSION=7
 ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-dev-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        libcublas-dev=10.2.1.243-1 \
+        cuda-nvrtc-${CUDA/./-} \
+        cuda-nvrtc-dev-${CUDA/./-} \
         cuda-cudart-dev-${CUDA/./-} \
         cuda-cufft-dev-${CUDA/./-} \
         cuda-curand-dev-${CUDA/./-} \
@@ -38,18 +46,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a
 
+# Install TensorRT if not building for PowerPC
 RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
-        libnvinfer-dev=5.1.5-1+cuda${CUDA} \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin-dev=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
         && rm -rf /var/lib/apt/lists/*; }
 
 # Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON python
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
-ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
 ENV TF_CUDA_VERSION=${CUDA}
 ENV TF_CUDNN_VERSION=${CUDNN_MAJOR_VERSION}
 # CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
index 8593d1fa2b7..555caf08cb7 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
@@ -1,11 +1,15 @@
 ARG ARCH=
-ARG CUDA=10.0
+ARG CUDA=10.1
 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
 # ARCH and CUDA are specified again because the FROM directive resets ARGs
 # (but their default value is retained if set previously)
 ARG ARCH
 ARG CUDA
-ARG CUDNN=7.6.2.24-1
+ARG CUDNN=7.6.4.38-1
+ARG CUDNN_MAJOR_VERSION=7
+ARG LIB_DIR_PREFIX=x86_64
+ARG LIBNVINFER=6.0.1-1
+ARG LIBNVINFER_MAJOR_VERSION=6
 
 # Needed for string substitution
 SHELL ["/bin/bash", "-c"]
@@ -13,7 +17,11 @@ SHELL ["/bin/bash", "-c"]
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-${CUDA/./-} \
+        # There appears to be a regression in libcublas10=10.2.2.89-1 which
+        # prevents cublas from initializing in TF. See
+        # https://github.com/tensorflow/tensorflow/issues/9489#issuecomment-562394257
+        libcublas10=10.2.1.243-1 \ 
+        cuda-nvrtc-${CUDA/./-} \
         cuda-cufft-${CUDA/./-} \
         cuda-curand-${CUDA/./-} \
         cuda-cusolver-${CUDA/./-} \
@@ -27,10 +35,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         software-properties-common \
         unzip
 
-RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
-        apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda${CUDA} \
+# Install TensorRT if not building for PowerPC
+RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
+        apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
+        libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda${CUDA} \
         && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*)
+        && rm -rf /var/lib/apt/lists/*; }
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml
index 5a64b70bacb..29a4f74cbab 100644
--- a/tensorflow/tools/dockerfiles/spec.yml
+++ b/tensorflow/tools/dockerfiles/spec.yml
@@ -57,6 +57,8 @@ releases:
             - "{ubuntu-devel}{jupyter}"
             - "{ubuntu-ppc64le}{jupyter}"
             - "{ubuntu-devel-ppc64le}{jupyter}"
+            - "{ubuntu-horovod}{jupyter}"
+            - "{ubuntu-devel-horovod}{jupyter}"
 
 slice_sets:
 
@@ -83,21 +85,6 @@ slice_sets:
               - ubuntu/python
               - tensorflow
               - shell
-        - add_to_name: "-horovod"
-          dockerfile_exclusive_name: "horovod"
-          dockerfile_subdirectory: "mkl_horovod"
-          partials:
-              - ubuntu/version
-              - ubuntu/cpu
-              - ubuntu/python
-              - tensorflow
-              - mkl_horovod/mpi
-              - mkl_horovod/horovod
-              - shell
-          tests:
-              - import-mkl-horovod.sh
-          args:
-              - TF_PACKAGE=intel-tensorflow
         - add_to_name: "-gpu"
           dockerfile_exclusive_name: "gpu"
           args:
@@ -125,6 +112,38 @@ slice_sets:
               - build-cpu.sh
           args:
               - CHECKOUT_TF_SRC=1
+        - add_to_name: "devel-gpu"
+          dockerfile_exclusive_name: "devel-gpu"
+          partials:
+              - ubuntu/version
+              - ubuntu/devel-nvidia
+              - ubuntu/python
+              - ubuntu/bazel
+              - shell
+          tests:
+              - build-gpu.sh
+          test_runtime: nvidia
+          args:
+              - CHECKOUT_TF_SRC=1
+
+    ubuntu-horovod:
+        - add_to_name: "-horovod"
+          dockerfile_exclusive_name: "horovod"
+          dockerfile_subdirectory: "mkl_horovod"
+          partials:
+              - ubuntu/version
+              - ubuntu/cpu
+              - ubuntu/python
+              - tensorflow
+              - mkl_horovod/mpi
+              - mkl_horovod/horovod
+              - shell
+          tests:
+              - import-mkl-horovod.sh
+          args:
+              - TF_PACKAGE=intel-tensorflow
+
+    ubuntu-devel-horovod:
         - add_to_name: "devel-horovod"
           dockerfile_exclusive_name: "devel-horovod"
           dockerfile_subdirectory: "mkl_horovod"
@@ -141,19 +160,6 @@ slice_sets:
           args:
               - CHECKOUT_TF_SRC=1
               - CHECKOUT_HOROVOD_SRC=1
-        - add_to_name: "devel-gpu"
-          dockerfile_exclusive_name: "devel-gpu"
-          partials:
-              - ubuntu/version
-              - ubuntu/devel-nvidia
-              - ubuntu/python
-              - ubuntu/bazel
-              - shell
-          tests:
-              - build-gpu.sh
-          test_runtime: nvidia
-          args:
-              - CHECKOUT_TF_SRC=1
 
     ubuntu-ppc64le:
         - add_to_name: "-ppc64le"
diff --git a/tensorflow/tools/dockerfiles/tests/build-cpu.sh b/tensorflow/tools/dockerfiles/tests/build-cpu.sh
index bcdc4c2139c..d17d3525205 100755
--- a/tensorflow/tools/dockerfiles/tests/build-cpu.sh
+++ b/tensorflow/tools/dockerfiles/tests/build-cpu.sh
@@ -15,23 +15,24 @@
 # limitations under the License.
 # ============================================================================
 
-# Download and build TensorFlow.
-set -euxo pipefail
-git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow
+set -ex
+git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow || true
 cd /tensorflow
+ln -snf $(which ${PYTHON}) /usr/local/bin/python 
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=0
+# TensorRT build failing as of 2019-12-18, see
+# https://github.com/tensorflow/tensorflow/issues/35115
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.7)
+export TMP=/tmp
+yes "" | /usr/local/bin/python configure.py
 
-ln -s $(which ${PYTHON}) /usr/local/bin/python 
-
-# For optimized builds appropriate for the hardware platform of your choosing, uncomment below...
-# For ivy-bridge or sandy-bridge
-# --copt=-march="ivybridge" \
-# for haswell, broadwell, or skylake
-# --copt=-march="haswell" \
-tensorflow/tools/ci_build/builds/configured CPU \
-  bazel build -c opt --copt=-mavx --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
-      tensorflow/tools/pip_package:build_pip_package && \
-  bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
-  pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \
-  rm -rf /tmp/pip && \
-  rm -rf /root/.cache
+# Build the pip package and import
+bazel build --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package
+./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag
+pip --no-cache-dir install --upgrade /tmp/pip_pkg/tensorflow-*.whl
 
diff --git a/tensorflow/tools/dockerfiles/tests/build-gpu.sh b/tensorflow/tools/dockerfiles/tests/build-gpu.sh
index 0e107e3f85b..f9713cf324c 100755
--- a/tensorflow/tools/dockerfiles/tests/build-gpu.sh
+++ b/tensorflow/tools/dockerfiles/tests/build-gpu.sh
@@ -16,19 +16,25 @@
 # ============================================================================
 
 # Download and build TensorFlow.
-set -euxo pipefail
-git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow
+
+set -ex
+git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow || true
 cd /tensorflow
+ln -snf $(which ${PYTHON}) /usr/local/bin/python 
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+# TensorRT build failing as of 2019-12-18, see
+# https://github.com/tensorflow/tensorflow/issues/35115
+export TF_NEED_TENSORRT=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.7)
+export TMP=/tmp
+yes "" | /usr/local/bin/python configure.py
 
-ln -s $(which ${PYTHON}) /usr/local/bin/python 
-
-LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
-tensorflow/tools/ci_build/builds/configured GPU \
-bazel build -c opt --copt=-mavx --config=cuda \
-    --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
-    tensorflow/tools/pip_package:build_pip_package && \
-rm /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
-bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
-pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \
-rm -rf /tmp/pip && \
-rm -rf /root/.cache
+# Build the pip package and import
+bazel build --config=cuda --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package
+./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag
+pip --no-cache-dir install --upgrade /tmp/pip_pkg/tensorflow-*.whl
diff --git a/tensorflow/tools/docs/generate2.py b/tensorflow/tools/docs/generate2.py
index 4c41e5cf57c..6df4fc3a13e 100644
--- a/tensorflow/tools/docs/generate2.py
+++ b/tensorflow/tools/docs/generate2.py
@@ -20,9 +20,9 @@ python generate2.py --output_dir=/tmp/out
 
 Requires a local installation of `tensorflow_docs`:
 
-  ```
-  pip install git+https://github.com/tensorflow/docs
-  ```
+```
+pip install git+https://github.com/tensorflow/docs
+```
 """
 
 from __future__ import absolute_import
@@ -34,7 +34,6 @@ import textwrap
 
 from absl import app
 from absl import flags
-from distutils.version import LooseVersion
 
 import tensorflow as tf
 
@@ -56,7 +55,6 @@ parser.tf_inspect = tf_inspect
 # So patch `tf.__all__` to list everything.
 tf.__all__ = [item_name for item_name, value in tf_inspect.getmembers(tf)]
 
-
 FLAGS = flags.FLAGS
 
 flags.DEFINE_string(
@@ -64,47 +62,31 @@ flags.DEFINE_string(
     "/code/stable/tensorflow",
     "A url to prepend to code paths when creating links to defining code")
 
-flags.DEFINE_string(
-    "output_dir", "/tmp/out",
-    "A directory, where the docs will be output to.")
+flags.DEFINE_string("output_dir", "/tmp/out",
+                    "A directory, where the docs will be output to.")
 
 flags.DEFINE_bool("search_hints", True,
                   "Include meta-data search hints at the top of each file.")
 
-flags.DEFINE_string("site_path", "",
-                    "The prefix ({site-path}/api_docs/python/...) used in the "
-                    "`_toc.yaml` and `_redirects.yaml` files")
+flags.DEFINE_string(
+    "site_path", "", "The prefix ({site-path}/api_docs/python/...) used in the "
+    "`_toc.yaml` and `_redirects.yaml` files")
 
+_PRIVATE_MAP = {
+    "tf": ["python", "core", "compiler", "examples", "tools"],
+    # There's some aliasing between the compats and v1/2s, so it's easier to
+    # block by name and location than by deleting, or hiding objects.
+    "tf.compat.v1.compat": ["v1", "v2"],
+    "tf.compat.v2.compat": ["v1", "v2"]
+}
 
-if tf.__version__.startswith('1'):
-  PRIVATE_MAP = {
-      'tf.test': ['mock'],
-      'tf': ['python', 'core', 'compiler', 'examples', 'tools', 'contrib'],
-      # There's some aliasing between the compats and v1/2s, so it's easier to
-      # block by name and location than by deleting, or hiding objects.
-      'tf.compat.v1.compat': ['v1', 'v2'],
-      'tf.compat.v2.compat': ['v1', 'v2']
-  }
+tf.__doc__ = """
+  ## TensorFlow
 
-  DO_NOT_DESCEND_MAP = {
-      'tf': ['cli', 'lib', 'wrappers', 'contrib'],
-  }
-else:
-  PRIVATE_MAP = {
-      'tf': ['python', 'core', 'compiler', 'examples', 'tools'],
-      # There's some aliasing between the compats and v1/2s, so it's easier to
-      # block by name and location than by deleting, or hiding objects.
-      'tf.compat.v1.compat': ['v1', 'v2'],
-      'tf.compat.v2.compat': ['v1', 'v2']
-  }
-  DO_NOT_DESCEND_MAP = {}
-  tf.__doc__ = """
-    ## TensorFlow
-
-    ```
-    pip install tensorflow
-    ```
-    """
+  ```
+  pip install tensorflow
+  ```
+  """
 
 _raw_ops_doc = textwrap.dedent("""\n
   Note: `tf.raw_ops` provides direct/low level access to all TensorFlow ops. See \
@@ -112,27 +94,14 @@ _raw_ops_doc = textwrap.dedent("""\n
   for details. Unless you are library writer, you likely do not need to use these
   ops directly.""")
 
-if LooseVersion(tf.__version__) < LooseVersion('2'):
-  tf.raw_ops.__doc__ = _raw_ops_doc
-  tf.contrib.__doc__ = """
-    Contrib module containing volatile or experimental code.
-
-    Warning: The `tf.contrib` module will not be included in TensorFlow 2.0. Many
-    of its submodules have been integrated into TensorFlow core, or spun-off into
-    other projects like [`tensorflow_io`](https://github.com/tensorflow/io), or
-    [`tensorflow_addons`](https://github.com/tensorflow/addons). For instructions
-    on how to upgrade see the
-    [Migration guide](https://www.tensorflow.org/guide/migrate).
-    """
-else:
-  tf.raw_ops.__doc__ += _raw_ops_doc
+tf.raw_ops.__doc__ += _raw_ops_doc
 
 
 # The doc generator isn't aware of tf_export.
 # So prefix the score tuples with -1 when this is the canonical name, +1
 # otherwise. The generator chooses the name with the lowest score.
-class TfExportAwareDocGeneratorVisitor(
-    doc_generator_visitor.DocGeneratorVisitor):
+class TfExportAwareDocGeneratorVisitor(doc_generator_visitor.DocGeneratorVisitor
+                                      ):
   """A `tf_export` aware doc_visitor."""
 
   def _score_name(self, name):
@@ -214,30 +183,25 @@ def build_docs(output_dir, code_url_prefix, search_hints=True):
       "https://github.com/tensorflow/estimator/tree/master/tensorflow_estimator",
   )
 
-  if LooseVersion(tf.__version__) < LooseVersion('2'):
-    root_title = 'TensorFlow'
-  elif LooseVersion(tf.__version__) >= LooseVersion('2'):
-    root_title = 'TensorFlow 2.0'
-
   doc_generator = generate_lib.DocGenerator(
-      root_title=root_title,
+      root_title="TensorFlow 2.0",
       py_modules=[("tf", tf)],
       base_dir=base_dirs,
       search_hints=search_hints,
       code_url_prefix=code_url_prefixes,
       site_path=FLAGS.site_path,
       visitor_cls=TfExportAwareDocGeneratorVisitor,
-      private_map=PRIVATE_MAP,
-      do_not_descend_map=DO_NOT_DESCEND_MAP)
+      private_map=_PRIVATE_MAP)
 
   doc_generator.build(output_dir)
 
 
 def main(argv):
   del argv
-  build_docs(output_dir=FLAGS.output_dir,
-             code_url_prefix=FLAGS.code_url_prefix,
-             search_hints=FLAGS.search_hints)
+  build_docs(
+      output_dir=FLAGS.output_dir,
+      code_url_prefix=FLAGS.code_url_prefix,
+      search_hints=FLAGS.search_hints)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/tools/docs/generate2_test.py b/tensorflow/tools/docs/generate2_test.py
index e4cd3447294..27756678bc5 100644
--- a/tensorflow/tools/docs/generate2_test.py
+++ b/tensorflow/tools/docs/generate2_test.py
@@ -32,6 +32,7 @@ from tensorflow.tools.docs import generate2
 del tf.compat.v2
 del tf.compat.v1
 
+
 class Generate2Test(googletest.TestCase):
 
   def test_end_to_end(self):
diff --git a/tensorflow/tools/docs/pretty_docs.py b/tensorflow/tools/docs/pretty_docs.py
index e215d19f403..98b5c7a3b39 100644
--- a/tensorflow/tools/docs/pretty_docs.py
+++ b/tensorflow/tools/docs/pretty_docs.py
@@ -29,6 +29,7 @@ from __future__ import division
 from __future__ import print_function
 
 import textwrap
+
 import six
 
 
diff --git a/tensorflow/tools/docs/py_guide_parser.py b/tensorflow/tools/docs/py_guide_parser.py
index 70149c4dd9e..8d1cee6912a 100644
--- a/tensorflow/tools/docs/py_guide_parser.py
+++ b/tensorflow/tools/docs/py_guide_parser.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 
 import os
 import re
+
 import six
 
 
diff --git a/tensorflow/tools/docs/tf_doctest_lib.py b/tensorflow/tools/docs/tf_doctest_lib.py
index cac1a4a4934..b33d14a0a40 100644
--- a/tensorflow/tools/docs/tf_doctest_lib.py
+++ b/tensorflow/tools/docs/tf_doctest_lib.py
@@ -191,7 +191,7 @@ class TfDoctestOutputChecker(doctest.OutputChecker, object):
     message = textwrap.dedent("""\n
         #############################################################
         Check the documentation
-        (go/testable-docstrings) on how to write testable docstrings.
+        (https://www.tensorflow.org/community/contribute/docs_ref) on how to write testable docstrings.
         #############################################################""")
 
     got.append(message)
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index d110cd114b5..0e124bfa25b 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -1,8 +1,6 @@
 # Packaging for TensorFlow artifacts other than the Python API (pip whl).
 # This includes the C API, Java API, and protocol buffer files.
 
-package(default_visibility = ["//visibility:private"])
-
 load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 load("@local_config_syslibs//:build_defs.bzl", "if_not_system_lib")
@@ -10,6 +8,8 @@ load("//tensorflow:tensorflow.bzl", "VERSION", "VERSION_MAJOR", "if_macos")
 load("//tensorflow/core/platform:build_config_root.bzl", "tf_additional_license_deps")
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
 
+package(default_visibility = ["//visibility:private"])
+
 genrule(
     name = "libtensorflow_proto",
     srcs = ["//tensorflow/core:protos_all_proto_srcs"],
@@ -154,6 +154,8 @@ genrule(
         "@icu//:icu4c/LICENSE",
         "@libjpeg_turbo//:LICENSE.md",
         "@lmdb//:LICENSE",
+        "@llvm-project//llvm:LICENSE.TXT",
+        "@llvm-project//mlir:LICENSE.TXT",
         "@local_config_sycl//sycl:LICENSE.text",
         "@local_config_tensorrt//:LICENSE",
         "@nasm//:LICENSE",
@@ -161,6 +163,7 @@ genrule(
         "@png//:LICENSE",
         "@com_google_protobuf//:LICENSE",
         "@snappy//:COPYING",
+        "@sobol_data//:LICENSE",
         "@zlib_archive//:zlib.h",
         "@six_archive//:LICENSE",
     ] + select({
@@ -224,6 +227,8 @@ genrule(
         "@hwloc//:COPYING",
         "@icu//:icu4j/main/shared/licenses/LICENSE",
         "@libjpeg_turbo//:LICENSE.md",
+        "@llvm-project//llvm:LICENSE.TXT",
+        "@llvm-project//mlir:LICENSE.TXT",
         "@lmdb//:LICENSE",
         "@local_config_sycl//sycl:LICENSE.text",
         "@local_config_tensorrt//:LICENSE",
@@ -232,6 +237,7 @@ genrule(
         "@png//:LICENSE",
         "@com_google_protobuf//:LICENSE",
         "@snappy//:COPYING",
+        "@sobol_data//:LICENSE",
         "@zlib_archive//:zlib.h",
         "@grpc//:LICENSE",
         "@grpc//third_party/address_sorting:LICENSE",
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index aa9fb122285..4728ca2112b 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -45,6 +45,11 @@ py_binary(
     deps = ["//tensorflow:tensorflow_py"],
 )
 
+# Add dynamic kernel dso files here.
+DYNAMIC_LOADED_KERNELS = [
+    "//tensorflow/core/kernels:libtfkernel_sobol_op.so",
+]
+
 COMMON_PIP_DEPS = [
     ":licenses",
     "MANIFEST.in",
@@ -144,7 +149,8 @@ filegroup(
         "@kissfft//:COPYING",
         "@libjpeg_turbo//:LICENSE.md",
         "@lmdb//:LICENSE",
-        "@local_config_mlir//:LICENSE.TXT",
+        "@llvm-project//llvm:LICENSE.TXT",
+        "@llvm-project//mlir:LICENSE.TXT",
         "@local_config_sycl//sycl:LICENSE.text",
         "@local_config_tensorrt//:LICENSE",
         "@nasm//:LICENSE",
@@ -156,6 +162,7 @@ filegroup(
         "@com_google_protobuf//:LICENSE",
         "@six_archive//:LICENSE",
         "@snappy//:COPYING",
+        "@sobol_data//:LICENSE",
         "@swig//:LICENSE",
         "@termcolor_archive//:COPYING.txt",
         "@zlib_archive//:zlib.h",
@@ -223,6 +230,10 @@ sh_binary(
                "//conditions:default": [
                    ":simple_console",
                ],
+           }) +
+           select({
+               "//tensorflow:dynamic_loaded_kernels": DYNAMIC_LOADED_KERNELS,
+               "//conditions:default": [],
            }) + if_mkl_ml(["//third_party/mkl:intel_binary_blob"]),
 )
 
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index 7e3643f65b7..73c8cfb9db9 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -84,6 +84,7 @@ DEPENDENCY_BLACKLIST = [
     "//tensorflow/core/kernels/cloud:bigquery_reader_ops",
     "//tensorflow/python/debug:grpc_tensorflow_server.par",
     "//tensorflow/python/feature_column:vocabulary_testdata",
+    "//tensorflow/python/keras:vocabulary_testdata",
     "//tensorflow/python:framework/test_file_system.so",
     "//tensorflow/python:util_nest_test_main_lib",
     # lite
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index f580826edcd..ea0851769e5 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -56,6 +56,7 @@ REQUIRED_PACKAGES = [
     'enum34 >= 1.1.6;python_version<"3.4"',
     'gast == 0.2.2',
     'google_pasta >= 0.1.8',
+    'h5py >= 2.10.0, < 2.11.0',
     'keras_preprocessing >= 1.1.0',
     'numpy >= 1.16.0, < 2.0',
     'opt_einsum >= 2.3.2',
@@ -72,6 +73,10 @@ REQUIRED_PACKAGES = [
     # functools comes with python3, need to install the backport for python2
     'functools32 >= 3.2.3;python_version<"3"',
     'six >= 1.12.0',
+    # scipy < 1.4.1 causes segfaults due to pybind11
+    # Latest scipy pip for py2 is scipy==1.2.2
+    'scipy == 1.4.1;python_version>="3"',
+    'scipy == 1.2.2;python_version<"3"',
 ]
 
 if sys.byteorder == 'little':
@@ -91,7 +96,7 @@ if '--project_name' in sys.argv:
 if 'tf_nightly' in project_name:
   for i, pkg in enumerate(REQUIRED_PACKAGES):
     if 'tensorboard' in pkg:
-      REQUIRED_PACKAGES[i] = 'tb-nightly >= 2.1.0a0, < 2.2.0a0'
+      REQUIRED_PACKAGES[i] = 'tb-nightly >= 2.2.0a0, < 2.3.0a0'
     elif 'tensorflow_estimator' in pkg and '2.0' in project_name:
       REQUIRED_PACKAGES[i] = 'tensorflow-estimator-2.0-preview'
     elif 'tensorflow_estimator' in pkg:
diff --git a/tensorflow/tools/proto_text/BUILD b/tensorflow/tools/proto_text/BUILD
index 85fba7e54fb..9e0f9763fb1 100644
--- a/tensorflow/tools/proto_text/BUILD
+++ b/tensorflow/tools/proto_text/BUILD
@@ -41,7 +41,7 @@ cc_binary(
         "@com_google_protobuf//:protobuf",
         "//tensorflow/core/platform:protobuf_compiler",
         "//tensorflow/core:lib_proto_parsing",
-    ] + if_ios(["//tensorflow/core/platform/default/build_config:logging"]),
+    ] + if_ios(["//tensorflow/core/platform:logging"]),
 )
 
 cc_library(
@@ -67,7 +67,7 @@ cc_library(
     }),
     deps = [
         "//tensorflow/core:lib_proto_parsing",
-    ] + if_ios(["//tensorflow/core/platform/default/build_config:logging"]),
+    ] + if_ios(["//tensorflow/core/platform:logging"]),
 )
 
 tf_proto_library_cc(
diff --git a/tensorflow/tools/tensorflow_builder/config_detector/config_detector.py b/tensorflow/tools/tensorflow_builder/config_detector/config_detector.py
index 2c24780bcfd..090e3172c34 100755
--- a/tensorflow/tools/tensorflow_builder/config_detector/config_detector.py
+++ b/tensorflow/tools/tensorflow_builder/config_detector/config_detector.py
@@ -69,6 +69,7 @@ import json
 import re
 import subprocess
 import sys
+
 from absl import app
 from absl import flags
 import six
diff --git a/tensorflow/tools/tensorflow_builder/config_detector/data/cuda_compute_capability.py b/tensorflow/tools/tensorflow_builder/config_detector/data/cuda_compute_capability.py
index 9d17dbc9178..4bbdb79fad3 100644
--- a/tensorflow/tools/tensorflow_builder/config_detector/data/cuda_compute_capability.py
+++ b/tensorflow/tools/tensorflow_builder/config_detector/data/cuda_compute_capability.py
@@ -40,6 +40,7 @@ import collections
 import difflib
 import os
 import re
+
 from absl import app
 from absl import flags
 
diff --git a/tensorflow/tools/test/run_and_gather_logs_lib.py b/tensorflow/tools/test/run_and_gather_logs_lib.py
index f629e3a10b6..f92fb7b9138 100644
--- a/tensorflow/tools/test/run_and_gather_logs_lib.py
+++ b/tensorflow/tools/test/run_and_gather_logs_lib.py
@@ -158,7 +158,10 @@ def run_and_gather_logs(name, test_name, test_args,
 
   try:
     if not gfile.Exists(test_executable):
-      raise ValueError("Executable does not exist: %s" % test_executable)
+      test_executable_py3 = test_executable + ".python3"
+      if not gfile.Exists(test_executable_py3):
+        raise ValueError("Executable does not exist: %s" % test_executable)
+      test_executable = test_executable_py3
     test_args = shlex.split(test_args)
 
     # This key is defined in tf/core/util/reporter.h as
diff --git a/tensorflow/virtual_root_template_v1.__init__.py b/tensorflow/virtual_root_template_v1.__init__.py
index 9a45bc0355d..cc2575daeec 100644
--- a/tensorflow/virtual_root_template_v1.__init__.py
+++ b/tensorflow/virtual_root_template_v1.__init__.py
@@ -100,6 +100,8 @@ for _m in _top_level_modules:
 # We still need all the names that are toplevel on tensorflow_core
 from tensorflow_core import *
 
+_major_api_version = 1
+
 # In V1 API we need to print deprecation messages
 from tensorflow.python.util import deprecation_wrapper as _deprecation
 if not isinstance(_sys.modules[__name__], _deprecation.DeprecationWrapper):
diff --git a/tensorflow/virtual_root_template_v2.__init__.py b/tensorflow/virtual_root_template_v2.__init__.py
index bd8c903e455..22dfbb0c44f 100644
--- a/tensorflow/virtual_root_template_v2.__init__.py
+++ b/tensorflow/virtual_root_template_v2.__init__.py
@@ -100,6 +100,8 @@ for _m in _top_level_modules:
 # We still need all the names that are toplevel on tensorflow_core
 from tensorflow_core import *
 
+_major_api_version = 2
+
 # These should not be visible in the main tf module.
 try:
   del core
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 1ac2b795b7a..30cdebd4ae9 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -7,7 +7,6 @@ load("//third_party/nccl:nccl_configure.bzl", "nccl_configure")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
 load("//third_party/git:git_configure.bzl", "git_configure")
 load("//third_party/py:python_configure.bzl", "python_configure")
-load("//third_party/mlir:mlir_configure.bzl", "mlir_configure")
 load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
 load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure")
 load("//third_party/toolchains/remote:configure.bzl", "remote_execution_configure")
@@ -38,6 +37,7 @@ load("//third_party/kissfft:workspace.bzl", kissfft = "repo")
 load("//third_party/pasta:workspace.bzl", pasta = "repo")
 load("//third_party/psimd:workspace.bzl", psimd = "repo")
 load("//third_party/pthreadpool:workspace.bzl", pthreadpool = "repo")
+load("//third_party/sobol_data:workspace.bzl", sobol_data = "repo")
 
 def initialize_third_party():
     """ Load third party repositories.  See above load() statements. """
@@ -58,6 +58,7 @@ def initialize_third_party():
     pasta()
     psimd()
     pthreadpool()
+    sobol_data()
 
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
@@ -86,7 +87,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     syslibs_configure(name = "local_config_syslibs")
     python_configure(name = "local_config_python")
     rocm_configure(name = "local_config_rocm")
-    mlir_configure(name = "local_config_mlir")
     remote_execution_configure(name = "local_config_remote_execution")
 
     initialize_third_party()
@@ -192,11 +192,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         name = "eigen_archive",
         build_file = clean_dep("//third_party:eigen.BUILD"),
         patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"),
-        sha256 = "f64712872161624ae80c0d3e53f7336e125e833660c10505961905b47728673d",
-        strip_prefix = "eigen-963ba1015bd448a9128ccb24f07232f36962e488",
+        sha256 = "22a69745812cb040b3e8e8d3cd002932999252727897ad3326b4b6e72a1f24e9",
+        strip_prefix = "eigen-7252163335f56f23fcc7381c1efdea47161005fa",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/963ba1015bd448a9128ccb24f07232f36962e488/eigen-963ba1015bd448a9128ccb24f07232f36962e488.tar.gz",
-            "https://gitlab.com/libeigen/eigen/-/archive/963ba1015bd448a9128ccb24f07232f36962e488/eigen-963ba1015bd448a9128ccb24f07232f36962e488.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/7252163335f56f23fcc7381c1efdea47161005fa/eigen-7252163335f56f23fcc7381c1efdea47161005fa.tar.gz",
+            "https://gitlab.com/libeigen/eigen/-/archive/7252163335f56f23fcc7381c1efdea47161005fa/eigen-7252163335f56f23fcc7381c1efdea47161005fa.tar.gz",
         ],
     )
 
@@ -566,15 +566,23 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
+    # Check out LLVM and MLIR from llvm-project.
+    LLVM_COMMIT = "a21beccea2020f950845cbb68db663d0737e174c"
+    LLVM_SHA256 = "73682f2b78c1c46621afb69b850e50c4d787f9c77fb3b53ac50fc42ffbac0493"
+    LLVM_URLS = [
+        "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT),
+        "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT),
+    ]
     tf_http_archive(
-        name = "llvm",
-        build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
-        sha256 = "b4acae8e7d9c66521dbb30db3edc221a4ef6f3ef48b783a0141094d9de0c7380",
-        strip_prefix = "llvm-project-5bcd34a03ff343674c106b9a6a0406bf249b9b31/llvm",
-        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/5bcd34a03ff343674c106b9a6a0406bf249b9b31.tar.gz",
-            "https://github.com/llvm/llvm-project/archive/5bcd34a03ff343674c106b9a6a0406bf249b9b31.tar.gz",
-        ],
+        name = "llvm-project",
+        sha256 = LLVM_SHA256,
+        strip_prefix = "llvm-project-" + LLVM_COMMIT,
+        urls = LLVM_URLS,
+        additional_build_files = {
+            clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"): "llvm/BUILD",
+            "//third_party/mlir:BUILD": "mlir/BUILD",
+            "//third_party/mlir:test.BUILD": "mlir/test/BUILD",
+        },
     )
 
     tf_http_archive(
diff --git a/third_party/eigen3/gpu_packet_math.patch b/third_party/eigen3/gpu_packet_math.patch
index 50ac056df79..1b6131abd41 100644
--- a/third_party/eigen3/gpu_packet_math.patch
+++ b/third_party/eigen3/gpu_packet_math.patch
@@ -22,4 +22,161 @@
      return res;
    }
  };
- 
\ No newline at end of file
+--- a/unsupported/Eigen/SpecialFunctions
++++ b/unsupported/Eigen/SpecialFunctions
+@@ -48,6 +48,9 @@
+ }
+
+ #include "src/SpecialFunctions/SpecialFunctionsImpl.h"
++#if defined(EIGEN_HIPCC)
++#include "src/SpecialFunctions/HipVectorCompatibility.h"
++#endif
+ #include "src/SpecialFunctions/SpecialFunctionsPacketMath.h"
+ #include "src/SpecialFunctions/SpecialFunctionsHalf.h"
+ #include "src/SpecialFunctions/SpecialFunctionsFunctors.h"
+--- /dev/null
++++ b/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h
+@@ -0,0 +1,143 @@
++#ifndef HIP_VECTOR_COMPATIBILITY_H
++#define HIP_VECTOR_COMPATIBILITY_H
++
++namespace hip_impl {
++  template <typename, typename, unsigned int> struct Scalar_accessor;
++}   // end namespace hip_impl
++
++namespace Eigen {
++namespace internal {
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct lgamma_impl<hip_impl::Scalar_accessor<T, U, n>> : lgamma_impl<T> {};
++#endif
++
++template <typename T, typename U, unsigned int n>
++struct digamma_impl_maybe_poly<hip_impl::Scalar_accessor<T, U, n>>
++  : digamma_impl_maybe_poly<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct digamma_impl<hip_impl::Scalar_accessor<T, U, n>> : digamma_impl<T> {};
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct erf_impl<hip_impl::Scalar_accessor<T, U, n>> : erf_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct erfc_impl<hip_impl::Scalar_accessor<T, U, n>> : erfc_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct ndtri_impl<hip_impl::Scalar_accessor<T, U, n>> : ndtri_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++template <typename T, typename U, unsigned int n, IgammaComputationMode mode>
++struct igammac_cf_impl<hip_impl::Scalar_accessor<T, U, n>, mode>
++  : igammac_cf_impl<T, mode> {};
++
++template <typename T, typename U, unsigned int n, IgammaComputationMode mode>
++struct igamma_series_impl<hip_impl::Scalar_accessor<T, U, n>, mode>
++  : igamma_series_impl<T, mode> {};
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct igammac_impl<hip_impl::Scalar_accessor<T, U, n>> : igammac_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n, IgammaComputationMode mode>
++struct igamma_generic_impl<hip_impl::Scalar_accessor<T, U, n>, mode>
++  : igamma_generic_impl<T, mode> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++template <typename T, typename U, unsigned int n>
++struct igamma_impl<hip_impl::Scalar_accessor<T, U, n>> : igamma_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct igamma_der_a_retval<hip_impl::Scalar_accessor<T, U, n>>
++  : igamma_der_a_retval<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct igamma_der_a_impl<hip_impl::Scalar_accessor<T, U, n>>
++  : igamma_der_a_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct gamma_sample_der_alpha_retval<hip_impl::Scalar_accessor<T, U, n>>
++  : gamma_sample_der_alpha_retval<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct gamma_sample_der_alpha_impl<hip_impl::Scalar_accessor<T, U, n>>
++  : gamma_sample_der_alpha_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct zeta_impl_series<hip_impl::Scalar_accessor<T, U, n>>
++  : zeta_impl_series<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct zeta_impl<hip_impl::Scalar_accessor<T, U, n>> : zeta_impl<T> {};
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct polygamma_impl<hip_impl::Scalar_accessor<T, U, n>>
++  : polygamma_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct betainc_impl<hip_impl::Scalar_accessor<T, U, n>> : betainc_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct incbeta_cfe<hip_impl::Scalar_accessor<T, U, n>> : incbeta_cfe<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct betainc_helper<hip_impl::Scalar_accessor<T, U, n>>
++  : betainc_helper<T> {};
++#else
++template <typename T, typename U, unsigned int n>
++struct betainc_impl<hip_impl::Scalar_accessor<T, U, n>> : betainc_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i0e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i0e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i1e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i1e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i1_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k0e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k0e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k1e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k1e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k1_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_j0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_j0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_y0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_y0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_j1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_j1_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_y1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_y1_impl<T> {};
++
++}  // end namespace internal
++}  // end namespace Eigen
++
++#endif  // HIP_VECTOR_COMPATIBILITY_H
diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
index 99288480799..9d6331df6b1 100644
--- a/third_party/gpus/rocm_configure.bzl
+++ b/third_party/gpus/rocm_configure.bzl
@@ -322,7 +322,7 @@ def _hipcc_is_hipclang(repository_ctx):
         ["grep", "HIP_COMPILER=clang", "/opt/rocm/hip/lib/.hipInfo"],
         empty_stdout_fine = True,
     )
-    result = grep_result.stdout
+    result = grep_result.stdout.strip()
     if result == "HIP_COMPILER=clang":
         return "True"
     return "False"
diff --git a/third_party/hwloc/BUILD.bazel b/third_party/hwloc/BUILD.bazel
index 3f93910ed75..091ec7059df 100644
--- a/third_party/hwloc/BUILD.bazel
+++ b/third_party/hwloc/BUILD.bazel
@@ -34,10 +34,10 @@ _INCLUDE_HWLOC_AUTOIGEN_CONFIG_H_COMMON_SUBS = {
     "#undef HWLOC_VERSION": "#define HWLOC_VERSION \"2.0.3\"",
     "#undef hwloc_pid_t": "#define hwloc_pid_t pid_t",
     "#undef hwloc_thread_t": "#define hwloc_thread_t pthread_t",
-    "#  undef HWLOC_HAVE_STDINT_H": "#  define HWLOC_HAVE_STDINT_H 1 ",
+    "#  undef HWLOC_HAVE_STDINT_H": "#  define HWLOC_HAVE_STDINT_H 1",
     "#undef HWLOC_SYM_TRANSFORM": "#define HWLOC_SYM_TRANSFORM 0",
-    "#undef HWLOC_SYM_PREFIX": "#define HWLOC_SYM_PREFIX hwloc_",
     "#undef HWLOC_SYM_PREFIX_CAPS": "#define HWLOC_SYM_PREFIX_CAPS HWLOC_",
+    "#undef HWLOC_SYM_PREFIX": "#define HWLOC_SYM_PREFIX hwloc_",
 }
 
 _INCLUDE_HWLOC_AUTOIGEN_CONFIG_H_LINUX_SUBS = dict(_INCLUDE_HWLOC_AUTOIGEN_CONFIG_H_COMMON_SUBS)
@@ -83,8 +83,8 @@ _INCLUDE_PRIVATE_HWLOC_AUTOIGEN_CONFIG_H_COMMON_SUBS = {
     "#undef HAVE_DECL__STRDUP": "#define HAVE_DECL__STRDUP 0",
     "#undef HAVE_DIRENT_H": "#define HAVE_DIRENT_H 1",
     "#undef HAVE_DLFCN_H": "#define HAVE_DLFCN_H 1",
-    "#undef HAVE_FFS": "#define HAVE_FFS 1",
     "#undef HAVE_FFSL": "#define HAVE_FFSL 1",
+    "#undef HAVE_FFS": "#define HAVE_FFS 1",
     "#undef HAVE_GETPAGESIZE": "#define HAVE_GETPAGESIZE 1",
     "#undef HAVE_INTTYPES_H": "#define HAVE_INTTYPES_H 1",
     "#undef HAVE_LANGINFO_H": "#define HAVE_LANGINFO_H 1",
@@ -123,7 +123,6 @@ _INCLUDE_PRIVATE_HWLOC_AUTOIGEN_CONFIG_H_COMMON_SUBS = {
     "#undef HAVE_X11_XUTIL_H": "#define HAVE_X11_XUTIL_H 1",
     "#undef HAVE___PROGNAME": "#define HAVE___PROGNAME 1",
     "#undef HWLOC_C_HAVE_VISIBILITY": "#define HWLOC_C_HAVE_VISIBILITY 1",
-    "#undef HWLOC_HAVE_ATTRIBUTE": "#define HWLOC_HAVE_ATTRIBUTE 1",
     "#undef HWLOC_HAVE_ATTRIBUTE_ALIGNED": "#define HWLOC_HAVE_ATTRIBUTE_ALIGNED 1",
     "#undef HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE": "#define HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE 1",
     "#undef HWLOC_HAVE_ATTRIBUTE_COLD": "#define HWLOC_HAVE_ATTRIBUTE_COLD 1",
@@ -142,14 +141,15 @@ _INCLUDE_PRIVATE_HWLOC_AUTOIGEN_CONFIG_H_COMMON_SUBS = {
     "#undef HWLOC_HAVE_ATTRIBUTE_UNUSED": "#define HWLOC_HAVE_ATTRIBUTE_UNUSED 1",
     "#undef HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT": "#define HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT 1",
     "#undef HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS": "#define HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS 1",
-    "#undef HWLOC_HAVE_CPU_SET": "#define HWLOC_HAVE_CPU_SET 1",
+    "#undef HWLOC_HAVE_ATTRIBUTE": "#define HWLOC_HAVE_ATTRIBUTE 1",
     "#undef HWLOC_HAVE_CPU_SET_S": "#define HWLOC_HAVE_CPU_SET_S 1",
-    "#undef HWLOC_HAVE_DECL_FFS": "#define HWLOC_HAVE_DECL_FFS 1",
+    "#undef HWLOC_HAVE_CPU_SET": "#define HWLOC_HAVE_CPU_SET 1",
     "#undef HWLOC_HAVE_DECL_FFSL": "#define HWLOC_HAVE_DECL_FFSL 1",
+    "#undef HWLOC_HAVE_DECL_FFS": "#define HWLOC_HAVE_DECL_FFS 1",
     "#undef HWLOC_HAVE_DECL_STRCASECMP": "#define HWLOC_HAVE_DECL_STRCASECMP 1",
     "#undef HWLOC_HAVE_DECL_STRNCASECMP": "#define HWLOC_HAVE_DECL_STRNCASECMP 1",
-    "#undef HWLOC_HAVE_FFS": "#define HWLOC_HAVE_FFS 1",
     "#undef HWLOC_HAVE_FFSL": "#define HWLOC_HAVE_FFSL 1",
+    "#undef HWLOC_HAVE_FFS": "#define HWLOC_HAVE_FFS 1",
     "#undef HWLOC_HAVE_LIBTERMCAP": "#define HWLOC_HAVE_LIBTERMCAP 1",
     "#undef HWLOC_HAVE_LINUXIO": "#define HWLOC_HAVE_LINUXIO 1",
     "#undef HWLOC_HAVE_PTHREAD_MUTEX": "#define HWLOC_HAVE_PTHREAD_MUTEX 1",
@@ -160,24 +160,24 @@ _INCLUDE_PRIVATE_HWLOC_AUTOIGEN_CONFIG_H_COMMON_SUBS = {
     "#undef HWLOC_HAVE_X86_CPUID": "#define HWLOC_HAVE_X86_CPUID 1",
     "#undef HWLOC_SIZEOF_UNSIGNED_INT": "#define HWLOC_SIZEOF_UNSIGNED_INT 4",
     "#undef HWLOC_SIZEOF_UNSIGNED_LONG": "#define HWLOC_SIZEOF_UNSIGNED_LONG 8",
-    "#undef HWLOC_SYM_PREFIX": "#define HWLOC_SYM_PREFIX hwloc_",
     "#undef HWLOC_SYM_PREFIX_CAPS": "#define HWLOC_SYM_PREFIX_CAPS HWLOC_",
+    "#undef HWLOC_SYM_PREFIX": "#define HWLOC_SYM_PREFIX hwloc_",
     "#undef HWLOC_SYM_TRANSFORM": "#define HWLOC_SYM_TRANSFORM 0",
     "#undef HWLOC_USE_NCURSES": "#define HWLOC_USE_NCURSES 1",
-    "#undef HWLOC_VERSION": "#define HWLOC_VERSION \"2.0.3\"",
     "#undef HWLOC_VERSION_GREEK": "#define HWLOC_VERSION_GREEK \"\"",
     "#undef HWLOC_VERSION_MAJOR": "#define HWLOC_VERSION_MAJOR 2",
     "#undef HWLOC_VERSION_MINOR": "#define HWLOC_VERSION_MINOR 0",
     "#undef HWLOC_VERSION_RELEASE": "#define HWLOC_VERSION_RELEASE 3",
+    "#undef HWLOC_VERSION": "#define HWLOC_VERSION \"2.0.3\"",
     "#undef HWLOC_X86_64_ARCH": "#define HWLOC_X86_64_ARCH 1",
     "#undef LT_OBJDIR": "#define LT_OBJDIR \".libs/\"",
-    "#undef PACKAGE": "#define PACKAGE \"hwloc\"",
-    "#undef PACKAGE_BUGREPORT": "#define PACKAGE_BUGREPORT \"http://github.com/open-mpi/hwloc/i",
+    "#undef PACKAGE_BUGREPORT": "#define PACKAGE_BUGREPORT \"http://github.com/open-mpi/hwloc/issues",
     "#undef PACKAGE_NAME": "#define PACKAGE_NAME \"hwloc\"",
     "#undef PACKAGE_STRING": "#define PACKAGE_STRING \"hwloc 2.0.3\"",
     "#undef PACKAGE_TARNAME": "#define PACKAGE_TARNAME \"hwloc\"",
     "#undef PACKAGE_URL": "#define PACKAGE_URL \"\"",
     "#undef PACKAGE_VERSION": "#define PACKAGE_VERSION \"2.0.3\"",
+    "#undef PACKAGE": "#define PACKAGE \"hwloc\"",
     "#undef SIZEOF_UNSIGNED_INT": "#define SIZEOF_UNSIGNED_INT 4",
     "#undef SIZEOF_UNSIGNED_LONG": "#define SIZEOF_UNSIGNED_LONG 8",
     "#undef SIZEOF_VOID_P": "#define SIZEOF_VOID_P 8",
diff --git a/third_party/hwloc/static-components.h b/third_party/hwloc/static-components.h
index 0c9a621fd74..cc2948cabb2 100644
--- a/third_party/hwloc/static-components.h
+++ b/third_party/hwloc/static-components.h
@@ -22,7 +22,7 @@ static const struct hwloc_component* hwloc_static_components[] = {
     &hwloc_xml_component,
     &hwloc_synthetic_component,
     &hwloc_xml_nolibxml_component,
-#ifdef __Linux__
+#ifdef __linux__
     &hwloc_linux_component,
     &hwloc_linuxio_component,
 #endif
diff --git a/third_party/llvm/BUILD b/third_party/llvm/BUILD
index 0a9fd6b5817..563f394b276 100644
--- a/third_party/llvm/BUILD
+++ b/third_party/llvm/BUILD
@@ -4,5 +4,5 @@ py_binary(
     name = "expand_cmake_vars",
     srcs = ["expand_cmake_vars.py"],
     srcs_version = "PY2AND3",
-    visibility = ["@llvm//:__subpackages__"],
+    visibility = ["@llvm-project//:__subpackages__"],
 )
diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD
index 692f007a8e7..cc63a0bb140 100644
--- a/third_party/llvm/llvm.autogenerated.BUILD
+++ b/third_party/llvm/llvm.autogenerated.BUILD
@@ -186,6 +186,202 @@ gentbl(
     ]),
 )
 
+gentbl(
+    name = "aarch64_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=aarch64",
+        "include/llvm/IR/IntrinsicsAArch64.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "amdgcn_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=amdgcn",
+        "include/llvm/IR/IntrinsicsAMDGPU.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "arm_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=arm",
+        "include/llvm/IR/IntrinsicsARM.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "bpf_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=bpf",
+        "include/llvm/IR/IntrinsicsBPF.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "hexagon_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=hexagon",
+        "include/llvm/IR/IntrinsicsHexagon.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "mips_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=mips",
+        "include/llvm/IR/IntrinsicsMips.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "nvvm_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=nvvm",
+        "include/llvm/IR/IntrinsicsNVPTX.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "ppc_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=ppc",
+        "include/llvm/IR/IntrinsicsPowerPC.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "r600_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=r600",
+        "include/llvm/IR/IntrinsicsR600.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "riscv_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=riscv",
+        "include/llvm/IR/IntrinsicsRISCV.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "s390_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=s390",
+        "include/llvm/IR/IntrinsicsS390.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "wasm_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=wasm",
+        "include/llvm/IR/IntrinsicsWebAssembly.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "x86_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=x86",
+        "include/llvm/IR/IntrinsicsX86.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
+gentbl(
+    name = "xcore_enums_gen",
+    tbl_outs = [(
+        "-gen-intrinsic-enums -intrinsic-prefix=xcore",
+        "include/llvm/IR/IntrinsicsXCore.h",
+    )],
+    tblgen = ":llvm-tblgen",
+    td_file = "include/llvm/IR/Intrinsics.td",
+    td_srcs = glob([
+        "include/llvm/CodeGen/*.td",
+        "include/llvm/IR/Intrinsics*.td",
+    ]),
+)
+
 gentbl(
     name = "intrinsics_impl_gen",
     tbl_outs = [("-gen-intrinsic-impl", "include/llvm/IR/IntrinsicImpl.inc")],
@@ -1505,14 +1701,28 @@ cc_library(
     ]),
     copts = llvm_copts,
     deps = [
+        ":aarch64_enums_gen",
+        ":amdgcn_enums_gen",
+        ":arm_enums_gen",
         ":attributes_compat_gen",
         ":attributes_gen",
         ":binary_format",
+        ":bpf_enums_gen",
         ":config",
+        ":hexagon_enums_gen",
         ":intrinsic_enums_gen",
         ":intrinsics_impl_gen",
+        ":mips_enums_gen",
+        ":nvvm_enums_gen",
+        ":ppc_enums_gen",
+        ":r600_enums_gen",
         ":remarks",
+        ":riscv_enums_gen",
+        ":s390_enums_gen",
         ":support",
+        ":wasm_enums_gen",
+        ":x86_enums_gen",
+        ":xcore_enums_gen",
     ],
 )
 
diff --git a/third_party/llvm/llvm.bzl b/third_party/llvm/llvm.bzl
index 0d06b7e8df7..d8e2faf2822 100644
--- a/third_party/llvm/llvm.bzl
+++ b/third_party/llvm/llvm.bzl
@@ -59,8 +59,8 @@ def gentbl(name, tblgen, td_file, td_srcs, tbl_outs, library = True, **kwargs):
             outs = [out],
             tools = [tblgen],
             message = "Generating code from table: %s" % td_file,
-            cmd = (("$(location %s) " + "-I external/llvm/include " +
-                    "-I external/llvm/tools/clang/include " +
+            cmd = (("$(location %s) " + "-I external/llvm-project/llvm/include " +
+                    "-I external/llvm-project/clang/include " +
                     "-I $$(dirname $(location %s)) " + "%s $(location %s) -o $@") % (
                 tblgen,
                 td_file,
diff --git a/third_party/mlir/.clang-format b/third_party/mlir/.clang-format
deleted file mode 100644
index 392e2018955..00000000000
--- a/third_party/mlir/.clang-format
+++ /dev/null
@@ -1,2 +0,0 @@
-BasedOnStyle: LLVM
-AlwaysBreakTemplateDeclarations: Yes
\ No newline at end of file
diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD
index 9be60bb5124..fceaad1b4c4 100644
--- a/third_party/mlir/BUILD
+++ b/third_party/mlir/BUILD
@@ -1,7 +1,7 @@
 # Description:
 #   The MLIR "Multi-Level Intermediate Representation" Compiler Infrastructure
 
-load(":tblgen.bzl", "gentbl")
+load("@org_tensorflow//third_party/mlir:tblgen.bzl", "gentbl")
 
 licenses(["notice"])
 
@@ -18,16 +18,7 @@ package_group(
 package_group(
     name = "friends",
     includes = ["@org_tensorflow//tensorflow/compiler/mlir:subpackages"],
-    packages = [
-        "//...",
-        "//learning/brain/research/sair/...",
-        "//learning/brain/swift/swift_mlir/...",
-        "//learning/glassbox/evaluation/compiler/...",
-        "//tensorflow/compiler/mlir/tfrt/...",
-        "//tensorflow/core/tfrt_delegate/...",
-        "//tensorflow/lite/experimental/tf_runtime/...",
-        "//third_party/tf_runtime_google/...",
-    ],
+    packages = ["//..."],
 )
 
 exports_files([
@@ -146,7 +137,7 @@ cc_library(
         ":InferTypeOpInterfaceIncGen",
         ":OpAsmInterfacesIncGen",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -167,6 +158,7 @@ cc_library(
         "include/mlir/Pass/Pass.h",
         "include/mlir/Pass/PassInstrumentation.h",
         "include/mlir/Pass/PassManager.h",
+        "include/mlir/Pass/PassOptions.h",
         "include/mlir/Pass/PassRegistry.h",
     ],
     includes = ["include"],
@@ -177,7 +169,7 @@ cc_library(
     deps = [
         ":IR",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -202,7 +194,7 @@ cc_library(
         ":StandardOps",
         ":Support",
         ":TransformUtils",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -219,7 +211,7 @@ cc_library(
         ":IR",
         ":Parser",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -338,7 +330,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":IR",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -353,7 +345,7 @@ cc_library(
     deps = [
         ":IR",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -375,7 +367,7 @@ cc_library(
         ":LoopLikeOpInterfaceIncGen",
         ":StandardOps",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -424,7 +416,7 @@ cc_library(
     deps = [
         ":IR",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -446,7 +438,7 @@ cc_library(
         ":LoopOpsIncGen",
         ":StandardOps",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -474,7 +466,7 @@ cc_library(
         ":IR",
         ":StandardOpsIncGen",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -506,7 +498,7 @@ cc_library(
         ":Support",
         ":VectorOpsIncGen",
         ":VectorTransformPatternsIncGen",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -525,6 +517,7 @@ cc_library(
         "lib/Support/ToolUtilities.cpp",
     ],
     hdrs = [
+        "include/mlir/ADT/TypeSwitch.h",
         "include/mlir/Support/DebugStringHelper.h",
         "include/mlir/Support/FileUtilities.h",
         "include/mlir/Support/Functional.h",
@@ -538,7 +531,7 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -568,7 +561,7 @@ cc_library(
         ":IR",
         ":ParserTokenKinds",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -585,9 +578,9 @@ cc_library(
         ":IR",
         ":LLVMOpsIncGen",
         ":Support",
-        "@llvm//:asm_parser",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:asm_parser",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -681,7 +674,7 @@ cc_library(
         ":LLVMDialect",
         ":LLVMTransforms",
         ":StandardOps",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -718,7 +711,7 @@ cc_library(
         ":NVVMDialect",
         ":Pass",
         ":Transforms",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -756,10 +749,10 @@ cc_library(
         ":Pass",
         ":Support",
         ":TargetNVVMIR",
-        "@llvm//:core",
-        "@llvm//:nvptx_target",  # buildcleaner: keep
-        "@llvm//:support",
-        "@llvm//:target",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:nvptx_target",  # buildcleaner: keep
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
     ],
     alwayslink = 1,
 )
@@ -848,9 +841,9 @@ cc_library(
         ":NVVMOpsIncGen",
         ":StandardOps",
         ":Support",
-        "@llvm//:asm_parser",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:asm_parser",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -915,9 +908,9 @@ cc_library(
         ":ROCDLOpsIncGen",
         ":StandardOps",
         ":Support",
-        "@llvm//:asm_parser",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:asm_parser",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1130,7 +1123,7 @@ cc_library(
         ":SPIRVOpUtilsIncGen",
         ":SPIRVOpsIncGen",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1157,7 +1150,7 @@ cc_library(
         ":StandardOps",
         ":Support",
         ":Transforms",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1186,7 +1179,7 @@ cc_library(
         ":StandardToSPIRVGen",
         ":Support",
         ":Transforms",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1208,7 +1201,7 @@ cc_library(
         ":SPIRVDialect",
         ":SPIRVSerializationGen",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1225,7 +1218,7 @@ cc_library(
         ":SPIRVSerialization",
         ":Support",
         ":Translation",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1268,7 +1261,7 @@ cc_library(
         ":StandardDialectRegistration",
         ":StandardOps",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1332,7 +1325,7 @@ cc_library(
         ":TransformUtils",
         ":VectorAnalysis",
         ":VectorOps",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1347,7 +1340,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":IR",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1371,7 +1364,7 @@ cc_library(
         ":Support",
         ":TransformUtils",
         ":Transforms",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1391,7 +1384,7 @@ cc_library(
         ":Pass",
         ":StandardOps",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1437,8 +1430,8 @@ cc_library(
         ":Support",
         ":TransformUtils",
         ":Transforms",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1527,7 +1520,7 @@ cc_library(
         ":Pass",
         ":StandardOps",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1545,7 +1538,7 @@ cc_library(
         ":StandardOps",
         ":Support",
         ":VectorOps",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1560,7 +1553,7 @@ cc_library(
         ":Parser",
         ":StandardOps",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1578,9 +1571,9 @@ cc_library(
         ":LLVMConversionIncGen",
         ":LLVMDialect",
         ":Support",
-        "@llvm//:core",
-        "@llvm//:support",
-        "@llvm//:transform_utils",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:transform_utils",
     ],
 )
 
@@ -1600,9 +1593,9 @@ cc_library(
         ":LLVMIRModuleTranslation",
         ":Support",
         ":Translation",
-        "@llvm//:core",
-        "@llvm//:ir_reader",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:ir_reader",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1625,8 +1618,8 @@ cc_library(
         ":NVVMDialect",
         ":Support",
         ":Translation",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1649,8 +1642,8 @@ cc_library(
         ":ROCDLDialect",
         ":Support",
         ":Translation",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1670,17 +1663,17 @@ cc_library(
         ":Support",
         ":TargetLLVMIR",
         ":Translation",
-        "@llvm//:bit_reader",
-        "@llvm//:bit_writer",
-        "@llvm//:core",
-        "@llvm//:execution_engine",
-        "@llvm//:mc",
-        "@llvm//:orc_jit",
-        "@llvm//:support",
-        "@llvm//:target",  # fixdeps: keep
-        "@llvm//:transform_utils",
-        "@llvm//:x86_code_gen",  # fixdeps: keep
-        "@llvm//:x86_disassembler",  # fixdeps: keep
+        "@llvm-project//llvm:bit_reader",
+        "@llvm-project//llvm:bit_writer",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:execution_engine",
+        "@llvm-project//llvm:mc",
+        "@llvm-project//llvm:orc_jit",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",  # fixdeps: keep
+        "@llvm-project//llvm:transform_utils",
+        "@llvm-project//llvm:x86_code_gen",  # fixdeps: keep
+        "@llvm-project//llvm:x86_disassembler",  # fixdeps: keep
     ],
 )
 
@@ -1694,11 +1687,11 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        "@llvm//:analysis",
-        "@llvm//:core",
-        "@llvm//:ipo",
-        "@llvm//:support",
-        "@llvm//:target",
+        "@llvm-project//llvm:analysis",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:ipo",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:target",
     ],
 )
 
@@ -1733,7 +1726,7 @@ cc_library(
         ":VectorToLoops",
         ":ViewOpGraph",
         ":ViewRegionGraph",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1747,7 +1740,7 @@ cc_library(
         ":IR",
         ":Pass",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1762,7 +1755,7 @@ cc_library(
         ":IR",
         ":Pass",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1778,7 +1771,7 @@ cc_library(
         ":Parser",
         ":Support",
         ":Translation",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1791,7 +1784,7 @@ cc_library(
         ":Support",
         ":TranslateClParser",
         ":Translation",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1821,7 +1814,7 @@ cc_library(
         ":MlirOptLib",
         ":Pass",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1845,11 +1838,11 @@ cc_binary(
         ":StandardDialectRegistration",
         ":Transforms",
         ":VectorDialectRegistration",
-        "//test:TestDialect",
-        "//test:TestTransforms",
-        "@llvm//:support",
-        "@local_config_mlir//test:TestIR",
-        "@local_config_mlir//test:TestPass",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir/test:TestDialect",
+        "@llvm-project//mlir/test:TestIR",
+        "@llvm-project//mlir/test:TestPass",
+        "@llvm-project//mlir/test:TestTransforms",
     ],
 )
 
@@ -1868,9 +1861,9 @@ cc_library(
         ":Parser",
         ":Pass",
         ":Support",
-        "@llvm//:core",
-        "@llvm//:orc_jit",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:orc_jit",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -1890,7 +1883,7 @@ cc_binary(
         "//third_party/gpus/cuda:cuda_headers",
         "//third_party/gpus/cuda:cuda_runtime",
         "//third_party/gpus/cuda:libcuda",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1899,7 +1892,7 @@ cc_binary(
     srcs = ["tools/mlir-cuda-runner/mlir-cuda-runner.cpp"],
     data = [
         ":tools/libcuda-runtime-wrappers.so",
-        "@local_config_mlir//test/mlir-cpu-runner:libmlir_runner_utils.so",
+        "@llvm-project//mlir/test/mlir-cpu-runner:libmlir_runner_utils.so",
     ],
     deps = [
         ":GPUDialect",
@@ -1918,7 +1911,7 @@ cc_binary(
         "//third_party/gpus/cuda:cuda_headers",
         "//third_party/gpus/cuda:cuda_runtime",
         "//third_party/gpus/cuda:libcuda",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1956,8 +1949,8 @@ cc_library(
     includes = ["include"],
     deps = [
         ":Support",
-        "@llvm//:support",
-        "@llvm//:tablegen",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:tablegen",
     ],
 )
 
@@ -1970,9 +1963,9 @@ cc_library(
     deps = [
         ":Support",
         ":TableGen",
-        "@llvm//:config",
-        "@llvm//:support",
-        "@llvm//:tablegen",
+        "@llvm-project//llvm:config",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:tablegen",
     ],
 )
 
@@ -1998,9 +1991,9 @@ cc_binary(
         ":MlirTableGenMain",
         ":Support",
         ":TableGen",
-        "@llvm//:config",
-        "@llvm//:support",
-        "@llvm//:tablegen",
+        "@llvm-project//llvm:config",
+        "@llvm-project//llvm:support",
+        "@llvm-project//llvm:tablegen",
     ],
 )
 
@@ -2068,7 +2061,7 @@ cc_library(
         ":StandardOps",
         ":Support",
         ":TransformUtils",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -2135,7 +2128,7 @@ cc_library(
         ":StandardOps",
         ":Support",
         ":TransformUtils",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -2152,6 +2145,7 @@ filegroup(
     srcs = [
         "include/mlir/Dialect/Linalg/IR/LinalgBase.td",
         "include/mlir/Dialect/Linalg/IR/LinalgOps.td",
+        "include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td",
         ":AffineOpsTdFiles",
         ":OpBaseTdFiles",
     ],
@@ -2178,40 +2172,64 @@ gentbl(
 )
 
 filegroup(
-    name = "LinalgLibraryOpsTdFiles",
+    name = "LinalgStructuredOpsTdFiles",
     srcs = [
         "include/mlir/Dialect/Linalg/IR/LinalgBase.td",
-        "include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td",
+        "include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td",
         ":AffineOpsTdFiles",
         ":OpBaseTdFiles",
     ],
 )
 
 gentbl(
-    name = "LinalgLibraryOpsIncGen",
+    name = "LinalgStructuredOpsIncGen",
     strip_include_prefix = "include",
     tbl_outs = [
         (
             "-gen-op-decls",
-            "include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.h.inc",
+            "include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.h.inc",
         ),
         (
             "-gen-op-defs",
-            "include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.cpp.inc",
+            "include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc",
         ),
         (
             "-gen-op-interface-decls",
-            "include/mlir/Dialect/Linalg/IR/LinalgLibraryOpInterfaces.h.inc",
+            "include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterfaces.h.inc",
         ),
         (
             "-gen-op-interface-defs",
-            "include/mlir/Dialect/Linalg/IR/LinalgLibraryOpInterfaces.cpp.inc",
+            "include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterfaces.cpp.inc",
         ),
     ],
     tblgen = ":mlir-tblgen",
-    td_file = "include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td",
+    td_file = "include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td",
     td_srcs = [
-        ":LinalgLibraryOpsTdFiles",
+        ":LinalgStructuredOpsTdFiles",
+    ],
+)
+
+filegroup(
+    name = "LinalgDocTdFiles",
+    srcs = [
+        "include/mlir/Dialect/Linalg/IR/LinalgDoc.td",
+        ":LinalgOpsTdFiles",
+    ],
+)
+
+gentbl(
+    name = "LinalgDocIncGen",
+    strip_include_prefix = "include",
+    tbl_outs = [
+        (
+            "-gen-op-doc",
+            "g3doc/Dialects/Linalg/LinalgOps.md",
+        ),
+    ],
+    tblgen = ":mlir-tblgen",
+    td_file = "include/mlir/Dialect/Linalg/IR/LinalgDoc.td",
+    td_srcs = [
+        ":LinalgDocTdFiles",
     ],
 )
 
@@ -2220,8 +2238,8 @@ filegroup(
     srcs = [
         "include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td",
         ":AffineOpsTdFiles",
-        ":LinalgLibraryOpsTdFiles",
         ":LinalgOpsTdFiles",
+        ":LinalgStructuredOpsTdFiles",
         ":OpBaseTdFiles",
     ],
 )
@@ -2264,8 +2282,8 @@ cc_library(
         ":Support",
         ":Transforms",
         ":VectorToLLVM",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -2287,6 +2305,7 @@ cc_library(
     hdrs = [
         "include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h",
         "include/mlir/Dialect/Linalg/EDSC/Builders.h",
+        "include/mlir/Dialect/Linalg/EDSC/Intrinsics.h",
         "include/mlir/Dialect/Linalg/IR/LinalgOps.h",
         "include/mlir/Dialect/Linalg/IR/LinalgTraits.h",
         "include/mlir/Dialect/Linalg/IR/LinalgTypes.h",
@@ -2306,8 +2325,8 @@ cc_library(
         ":IR",
         ":LLVMDialect",
         ":LLVMTransforms",
-        ":LinalgLibraryOpsIncGen",
         ":LinalgOpsIncGen",
+        ":LinalgStructuredOpsIncGen",
         ":LinalgTransformPatternsIncGen",
         ":LoopOps",
         ":Parser",
@@ -2317,8 +2336,8 @@ cc_library(
         ":TransformUtils",
         ":Transforms",
         ":VectorOps",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -2360,7 +2379,8 @@ cc_library(
         ":IR",
         ":QuantOps",
         ":StandardOps",
-        "@llvm//:support",
+        ":Support",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -2381,7 +2401,7 @@ cc_library(
         ":QuantOps",
         ":QuantizerSupportLib",
         ":Support",
-        "@llvm//:support",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -2424,8 +2444,8 @@ filegroup(
     srcs = [
         "include/mlir/Dialect/VectorOps/VectorTransformPatterns.td",
         ":AffineOpsTdFiles",
-        ":LinalgLibraryOpsTdFiles",
         ":LinalgOpsTdFiles",
+        ":LinalgStructuredOpsTdFiles",
         ":OpBaseTdFiles",
         ":StdOpsTdFiles",
         ":VectorOpsTdFiles",
@@ -2466,8 +2486,8 @@ cc_library(
         ":Support",
         ":Transforms",
         ":VectorOps",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -2491,8 +2511,8 @@ cc_library(
         ":Support",
         ":Transforms",
         ":VectorOps",
-        "@llvm//:core",
-        "@llvm//:support",
+        "@llvm-project//llvm:core",
+        "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
 )
@@ -2512,10 +2532,10 @@ exports_files(
         "include/mlir/IR/OpAsmInterface.td",
         "include/mlir/Analysis/CallInterfaces.h",
     ],
-    visibility = ["@local_config_mlir//:friends"],
+    visibility = ["@llvm-project//mlir:friends"],
 )
 
 exports_files(
     ["include/mlir/Analysis/InferTypeOpInterface.td"],
-    visibility = ["@local_config_mlir//:friends"],
+    visibility = ["@llvm-project//mlir:friends"],
 )
diff --git a/third_party/mlir/CMakeLists.txt b/third_party/mlir/CMakeLists.txt
deleted file mode 100644
index d6767fa75a8..00000000000
--- a/third_party/mlir/CMakeLists.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-# MLIR project.
-set(MLIR_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include ) # --src-root
-set(MLIR_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include ) # --includedir
-set(MLIR_TABLEGEN_EXE mlir-tblgen)
-
-set(MLIR_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-set(MLIR_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
-
-function(mlir_tablegen ofn)
-  tablegen(MLIR ${ARGV} "-I${MLIR_MAIN_SRC_DIR}" "-I${MLIR_INCLUDE_DIR}")
-  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
-      PARENT_SCOPE)
-endfunction()
-
-function(add_mlir_dialect dialect)
-  set(LLVM_TARGET_DEFINITIONS ${dialect}.td)
-  mlir_tablegen(${dialect}.h.inc -gen-op-decls)
-  mlir_tablegen(${dialect}.cpp.inc -gen-op-defs)
-  add_public_tablegen_target(MLIR${dialect}IncGen)
-
-  # Generate Dialect Documentation
-  tablegen(MLIR ${dialect}.md -gen-op-doc "-I${MLIR_MAIN_SRC_DIR}" "-I${MLIR_INCLUDE_DIR}")
-  set(GEN_DOC_FILE ${MLIR_BINARY_DIR}/docs/Dialects/${dialect}.md)
-  add_custom_command(
-          OUTPUT ${GEN_DOC_FILE}
-          COMMAND ${CMAKE_COMMAND} -E copy
-                  ${CMAKE_CURRENT_BINARY_DIR}/${dialect}.md
-                  ${GEN_DOC_FILE}
-          DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${dialect}.md)
-  add_custom_target(${dialect}DocGen DEPENDS ${GEN_DOC_FILE})
-  add_dependencies(mlir-doc ${dialect}DocGen)
-endfunction()
-
-add_custom_target(mlir-doc)
-
-# TODO: This is to handle the current static registration, but should be
-# factored out a bit.
-function(whole_archive_link target)
-  if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
-    set(link_flags "-L${CMAKE_BINARY_DIR}/lib ")
-    FOREACH(LIB ${ARGN})
-      string(CONCAT link_flags ${link_flags} "-Wl,-force_load ${CMAKE_BINARY_DIR}/lib/lib${LIB}.a ")
-    ENDFOREACH(LIB)
-  elseif(MSVC)
-    FOREACH(LIB ${ARGN})
-      string(CONCAT link_flags ${link_flags} "/WHOLEARCHIVE:${LIB} ")
-    ENDFOREACH(LIB)
-  else()
-    set(link_flags "-L${CMAKE_BINARY_DIR}/lib -Wl,--whole-archive,")
-    FOREACH(LIB ${ARGN})
-      string(CONCAT link_flags ${link_flags} "-l${LIB},")
-    ENDFOREACH(LIB)
-    string(CONCAT link_flags ${link_flags} "--no-whole-archive")
-  endif()
-  set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
-endfunction(whole_archive_link)
-
-# Build the CUDA conversions and run according tests if the NVPTX backend
-# is available
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
-  set(MLIR_CUDA_CONVERSIONS_ENABLED 1)
-else()
-  set(MLIR_CUDA_CONVERSIONS_ENABLED 0)
-endif()
-
-set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner")
-
-include_directories( "include")
-include_directories( ${MLIR_INCLUDE_DIR})
-
-add_subdirectory(include/mlir)
-add_subdirectory(lib)
-add_subdirectory(tools)
-add_subdirectory(unittests)
-add_subdirectory(test)
-
-if( LLVM_INCLUDE_EXAMPLES )
-  add_subdirectory(examples)
-endif()
-
-if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
-  install(DIRECTORY include/mlir include/mlir-c
-    DESTINATION include
-    COMPONENT mlir-headers
-    FILES_MATCHING
-    PATTERN "*.h"
-    PATTERN "*.inc"
-    PATTERN "LICENSE.TXT"
-    )
-
-  install(DIRECTORY ${MLIR_INCLUDE_DIR}/mlir ${MLIR_INCLUDE_DIR}/mlir-c
-    DESTINATION include
-    COMPONENT mlir-headers
-    FILES_MATCHING
-    PATTERN "*.h"
-    PATTERN "*.gen"
-    PATTERN "*.inc"
-    PATTERN "CMakeFiles" EXCLUDE
-    PATTERN "config.h" EXCLUDE
-    )
-
-  if (NOT LLVM_ENABLE_IDE)
-    add_llvm_install_targets(install-mlir-headers
-                             DEPENDS mlir-headers
-                             COMPONENT mlir-headers)
-  endif()
-endif()
diff --git a/third_party/mlir/CONTRIBUTING.md b/third_party/mlir/CONTRIBUTING.md
deleted file mode 100644
index e21e4b8db56..00000000000
--- a/third_party/mlir/CONTRIBUTING.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# How to Contribute
-
-Everyone is welcome to contribute to MLIR. There are several ways of getting involved and contributing including reporting bugs, improving documentation, writing models or tutorials. 
-
-Please read our [Code of Conduct](https://github.com/tensorflow/tensorflow/blob/master/CODE_OF_CONDUCT.md) before participating.
-
-## Community Guidelines
-
-This project follows [Google's Open Source Community
-Guidelines](https://opensource.google.com/conduct/).
-
-## How to become a contributor and submit your own code
-
-### Contributor License Agreements
-
-We'd love to accept your patches! Before we can take them, please fill out either the individual or corporate Contributor License Agreement (CLA).
-
-* If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](https://code.google.com/legal/individual-cla-v1.0.html).
-  * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](https://code.google.com/legal/corporate-cla-v1.0.html).
-
-Follow either of the two links above to access the appropriate CLA and instructions for how to sign and return it. Once we receive it, we'll be able to accept your pull requests.
-
-***NOTE***: Only original source code from you and other people that have signed the CLA can be accepted into the main repository.
-
-### Contributing code
-
-If you have improvements to MLIR, send us your pull requests! For those
-just getting started, GitHub has a [howto](https://help.github.com/articles/using-pull-requests/).
-
-MLIR team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, a team member will merge your pull request submitted to our internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub.
-
-If you want to contribute, start working through the MLIR codebase, navigate to [Github "issues" tab](https://github.com/tensorflow/mlir/issues) and start looking through interesting issues. If you decide to start on an issue, leave a comment so that other people know that you're working on it. If you want to help out, but not alone, use the issue comment thread to coordinate.
-
-### Contribution guidelines and standards
-
-*   Read the [developer guide](g3doc/DeveloperGuide.md).
-*   Ensure that you use the correct license. Examples are provided below.
-*   Include tests when you contribute new features, as they help to a)
-    prove that your code works correctly, and b) guard against future breaking
-    changes to lower the maintenance cost.
-*   Bug fixes also generally require tests, because the presence of bugs
-    usually indicates insufficient test coverage.
-
-#### License
-
-Include a license at the top of new files.
-
-* [C/C++ license example](https://github.com/tensorflow/mlir/blob/master/examples/toy/Ch1/toyc.cpp)
-* [Python license example](https://github.com/tensorflow/mlir/blob/master/bindings/python/test/test_py2and3.py)
diff --git a/third_party/mlir/LICENSE.TXT b/third_party/mlir/LICENSE.TXT
deleted file mode 100644
index a4b160b6e33..00000000000
--- a/third_party/mlir/LICENSE.TXT
+++ /dev/null
@@ -1,205 +0,0 @@
-Copyright 2019 The MLIR Authors.
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-
diff --git a/third_party/mlir/README.md b/third_party/mlir/README.md
deleted file mode 100644
index 361bbc2044e..00000000000
--- a/third_party/mlir/README.md
+++ /dev/null
@@ -1,140 +0,0 @@
-# Multi-Level Intermediate Representation Overview
-
-The MLIR project aims to define a common intermediate representation (IR) that
-will unify the infrastructure required to execute high performance machine
-learning models in TensorFlow and similar ML frameworks. This project will
-include the application of HPC techniques, along with integration of search
-algorithms like reinforcement learning. This project aims to reduce the cost to
-bring up new hardware, and improve usability for existing TensorFlow users.
-
-Note that this repository contains the core of the MLIR framework. The
-TensorFlow compilers we are building on top of MLIR will be part of the
-main TensorFlow repository soon.
-
-# How to Contribute
-
-Thank you for your interest in contributing to MLIR! If you want to contribute
-to MLIR, be sure to review the [contribution guidelines](CONTRIBUTING.md).
-
-## More resources
-
-For more information on MLIR, please see:
-
-*   [The MLIR draft specification](g3doc/LangRef.md), which describes the IR
-    itself.
-*   [The MLIR rationale document](g3doc/Rationale.md), covering motivation
-    behind some decisions.
-*   Previous external [talks](#mlir-talks).
-
-Join the [MLIR mailing list](https://groups.google.com/a/tensorflow.org/forum/#!forum/mlir)
-to hear about announcements and discussions.
-
-We also have an [MLIR SIG](https://github.com/tensorflow/community/blob/master/sigs/mlir/CHARTER.md)
-which was created to enable collaboration and form a strong
-engineering-driven open community. We have weekly 'Open Design Meetings'. If you’d like
-to discuss a particular topic or have questions, please add it to the [agenda doc](https://docs.google.com/document/d/1y_9f1AbfgcoVdJh4_aM6-BaSHvrHl8zuA5G4jv_94K8/edit#).
-Details on how to join the meeting are in the agenda doc. You
-should also get an invite when you join the mailing list.
-
-Please be mindful of the [TensorFlow Code of Conduct](https://github.com/tensorflow/tensorflow/blob/master/CODE_OF_CONDUCT.md),
-which pledges to foster an open and welcoming environment.
-
-## What is MLIR for?
-
-MLIR is intended to be a hybrid IR which can support multiple different
-requirements in a unified infrastructure. For example, this includes:
-
-*   The ability to represent all TensorFlow graphs, including dynamic shapes,
-    the user-extensible op ecosystem, TensorFlow variables, etc.
-*   Optimizations and transformations typically done on a TensorFlow graph, e.g.
-    in Grappler.
-*   Quantization and other graph transformations done on a TensorFlow graph or
-    the TF Lite representation.
-*   Representation of kernels for ML operations in a form suitable for
-    optimization.
-*   Ability to host high-performance-computing-style loop optimizations across
-    kernels (fusion, loop interchange, tiling, etc) and to transform memory
-    layouts of data.
-*   Code generation "lowering" transformations such as DMA insertion, explicit
-    cache management, memory tiling, and vectorization for 1D and 2D register
-    architectures.
-*   Ability to represent target-specific operations, e.g. the MXU on TPUs.
-
-MLIR is a common IR that also supports hardware specific operations. Thus,
-any investment into the infrastructure surrounding MLIR (e.g. the compiler
-passes that work on it) should yield good returns; many targets can use that
-infrastructure and will benefit from it.
-
-MLIR is a powerful representation, but it also has non-goals. We do not try to
-support low level machine code generation algorithms (like register allocation
-and instruction scheduling). They are a better fit for lower level optimizers
-(such as LLVM). Also, we do not intend MLIR to be a source language that
-end-users would themselves write kernels in (analogous to CUDA C++). While we
-would love to see a kernel language happen someday, that will be an independent
-project that compiles down to MLIR.
-
-## Compiler infrastructure
-
-We benefited from experience gained from building other IRs (HLO, LLVM and SIL)
-when building MLIR. We will directly adopt existing best practices, e.g. writing
-and maintaining an IR spec, building an IR verifier, providing the ability to
-dump and parse MLIR files to text, writing extensive unit tests with the
-[FileCheck](https://llvm.org/docs/CommandGuide/FileCheck.html) tool, and
-building the infrastructure as a set of modular libraries that can be combined
-in new ways. We plan to use the infrastructure developed by the XLA team for
-performance analysis and benchmarking.
-
-Other lessons have been incorporated and integrated into the design in subtle
-ways. For example, LLVM has non-obvious design mistakes that prevent a
-multithreaded compiler from working on multiple functions in an LLVM module at
-the same time. MLIR solves these problems by having per-function constant pools
-and by making references explicit with `function_ref`.
-
-# Getting started with MLIR
-
-The following instructions for compiling and testing MLIR assume that you have
-`git`, [`ninja`](https://ninja-build.org/), and a working C++ toolchain. In the
-future, we aim to align on the same level of platform support as
-[LLVM](https://llvm.org/docs/GettingStarted.html#requirements). For now, MLIR
-has been tested on Linux and macOS, with recent versions of clang and with
-gcc 7.
-
-```sh
-git clone https://github.com/llvm/llvm-project.git
-git clone https://github.com/tensorflow/mlir llvm-project/llvm/projects/mlir
-mkdir llvm-project/build
-cd llvm-project/build
-cmake -G Ninja ../llvm -DLLVM_BUILD_EXAMPLES=ON -DLLVM_TARGETS_TO_BUILD="host"
-cmake --build . --target check-mlir
-```
-
-To compile and test on Windows using Visual Studio 2017:
-
-```bat
-REM In shell with Visual Studio environment set up, e.g., with command such as
-REM   $visual-studio-install\Auxiliary\Build\vcvarsall.bat" x64
-REM invoked.
-git clone https://github.com/llvm/llvm-project.git
-git clone https://github.com/tensorflow/mlir llvm-project\llvm\projects\mlir
-mkdir llvm-project\build
-cd llvm-project\build
-cmake ..\llvm -G "Visual Studio 15 2017 Win64" -DLLVM_BUILD_EXAMPLES=ON -DLLVM_TARGETS_TO_BUILD="host" -DCMAKE_BUILD_TYPE=Release -Thost=x64
-cmake --build . --target check-mlir
-```
-
-As a starter, you may try [the tutorial](g3doc/Tutorials/Toy/Ch-1.md) on
-building a compiler for a Toy language.
-
-# MLIR talks
-
-* "[MLIR Primer: A Compiler Infrastructure for the End of Moore’s Law](https://ai.google/research/pubs/pub48035.pdf)"
-  * Chris Lattner & Jacques Pienaar, Google at
-    [Compilers for Machine Learning](https://www.c4ml.org/) workshop at
-    [CGO 2019](http://cgo.org/cgo2019/)
-* "[MLIR: Multi-Level Intermediate Representation for Compiler
-    Infrastructure](https://llvm.org/devmtg/2019-04/talks.html#Keynote_1)"
-  * Tatiana Shpeisman & Chris Lattner, Google at
-    [EuroLLVM 2019](https://llvm.org/devmtg/2019-04)
-* "[Tutorial: Building a Compiler with MLIR](https://llvm.org/devmtg/2019-04/talks.html#Tutorial_1)"
-  * Mehdi Amini, Jacques Pienaar, Nicolas Vasilache, Google at
-    [EuroLLVM 2019](https://llvm.org/devmtg/2019-04)
diff --git a/third_party/mlir/bindings/python/BUILD b/third_party/mlir/bindings/python/BUILD
deleted file mode 100644
index 64ade7f43e2..00000000000
--- a/third_party/mlir/bindings/python/BUILD
+++ /dev/null
@@ -1,38 +0,0 @@
-# Description:
-#   BUILD file for the Python bindings.
-
-licenses(["notice"])  # Apache 2.0
-
-# Export the BUILD file so automated tooling can check licenses
-exports_files(["BUILD"])
-
-package(
-    default_visibility = ["@local_config_mlir//:friends"],
-)
-
-#
-# Pybind route uses exceptions and py_extension.
-#
-py_extension(
-    name = "_pybind",
-    srcs = ["pybind.cpp"],
-    copts = ["-fexceptions"],
-    features = ["-use_header_modules"],
-    module_name = "pybind",
-    deps = [
-        "//third_party/llvm/llvm:ir",
-        "//third_party/llvm/llvm:support",
-        "//third_party/pybind11",
-        "@local_config_mlir//:AffineToStandardTransforms",
-        "@local_config_mlir//:EDSC",
-        "@local_config_mlir//:EDSCInterface",
-        "@local_config_mlir//:ExecutionEngine",
-        "@local_config_mlir//:ExecutionEngineUtils",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:LLVMTransforms",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardDialectRegistration",
-        "@local_config_mlir//:TargetLLVMIR",
-        "@local_config_mlir//:Transforms",
-    ],
-)
diff --git a/third_party/mlir/bindings/python/pybind.cpp b/third_party/mlir/bindings/python/pybind.cpp
deleted file mode 100644
index eb9984feb1c..00000000000
--- a/third_party/mlir/bindings/python/pybind.cpp
+++ /dev/null
@@ -1,1149 +0,0 @@
-//===- pybind.cpp - MLIR Python bindings ----------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstddef>
-#include <unordered_map>
-
-#include "mlir-c/Core.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/EDSC/Intrinsics.h"
-#include "mlir/ExecutionEngine/ExecutionEngine.h"
-#include "mlir/ExecutionEngine/OptUtils.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Target/LLVMIR.h"
-#include "mlir/Transforms/Passes.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/pytypes.h"
-#include "pybind11/stl.h"
-
-static bool inited = [] {
-  llvm::InitializeNativeTarget();
-  llvm::InitializeNativeTargetAsmPrinter();
-  return true;
-}();
-
-namespace mlir {
-namespace edsc {
-namespace python {
-
-namespace py = pybind11;
-
-struct PythonAttribute;
-struct PythonAttributedType;
-struct PythonBindable;
-struct PythonExpr;
-struct PythonFunctionContext;
-struct PythonStmt;
-struct PythonBlock;
-struct PythonAffineExpr;
-struct PythonAffineMap;
-
-struct PythonType {
-  PythonType() : type{nullptr} {}
-  PythonType(mlir_type_t t) : type{t} {}
-
-  operator mlir_type_t() const { return type; }
-
-  PythonAttributedType attachAttributeDict(
-      const std::unordered_map<std::string, PythonAttribute> &attrs) const;
-
-  std::string str() {
-    mlir::Type f = mlir::Type::getFromOpaquePointer(type);
-    std::string res;
-    llvm::raw_string_ostream os(res);
-    f.print(os);
-    return res;
-  }
-
-  mlir_type_t type;
-};
-
-struct PythonValueHandle {
-  PythonValueHandle(PythonType type)
-      : value(mlir::Type::getFromOpaquePointer(type.type)) {}
-  PythonValueHandle(const PythonValueHandle &other) = default;
-  PythonValueHandle(const mlir::edsc::ValueHandle &other) : value(other) {}
-  operator ValueHandle() const { return value; }
-  operator ValueHandle &() { return value; }
-
-  std::string str() const {
-    return std::to_string(reinterpret_cast<intptr_t>(value.getValue()));
-  }
-
-  PythonValueHandle call(const std::vector<PythonValueHandle> &args) {
-    assert(value.hasType() && value.getType().isa<FunctionType>() &&
-           "can only call function-typed values");
-
-    std::vector<Value *> argValues;
-    argValues.reserve(args.size());
-    for (auto arg : args)
-      argValues.push_back(arg.value.getValue());
-    return ValueHandle::create<CallIndirectOp>(value, argValues);
-  }
-
-  PythonType type() const {
-    return PythonType(value.getType().getAsOpaquePointer());
-  }
-
-  mlir::edsc::ValueHandle value;
-};
-
-struct PythonFunction {
-  PythonFunction() : function{nullptr} {}
-  PythonFunction(mlir_func_t f) : function{f} {}
-  PythonFunction(mlir::FuncOp f)
-      : function(const_cast<void *>(f.getAsOpaquePointer())) {}
-  operator mlir_func_t() { return function; }
-  std::string str() {
-    mlir::FuncOp f = mlir::FuncOp::getFromOpaquePointer(function);
-    std::string res;
-    llvm::raw_string_ostream os(res);
-    f.print(os);
-    return res;
-  }
-
-  // If the function does not yet have an entry block, i.e. if it is a function
-  // declaration, add the entry block, transforming the declaration into a
-  // definition.  Return true if the block was added, false otherwise.
-  bool define() {
-    auto f = mlir::FuncOp::getFromOpaquePointer(function);
-    if (!f.getBlocks().empty())
-      return false;
-
-    f.addEntryBlock();
-    return true;
-  }
-
-  PythonValueHandle arg(unsigned index) {
-    auto f = mlir::FuncOp::getFromOpaquePointer(function);
-    assert(index < f.getNumArguments() && "argument index out of bounds");
-    return PythonValueHandle(ValueHandle(f.getArgument(index)));
-  }
-
-  mlir_func_t function;
-};
-
-/// Trivial C++ wrappers make use of the EDSC C API.
-struct PythonMLIRModule {
-  PythonMLIRModule()
-      : mlirContext(),
-        module(mlir::ModuleOp::create(mlir::UnknownLoc::get(&mlirContext))),
-        symbolTable(*module) {}
-
-  PythonType makeMemRefType(PythonType elemType, std::vector<int64_t> sizes) {
-    return ::makeMemRefType(mlir_context_t{&mlirContext}, elemType,
-                            int64_list_t{sizes.data(), sizes.size()});
-  }
-  PythonType makeIndexType() {
-    return ::makeIndexType(mlir_context_t{&mlirContext});
-  }
-  PythonType makeType(const std::string &type) {
-    return ::mlirParseType(type.c_str(), mlir_context_t{&mlirContext}, nullptr);
-  }
-
-  // Declare a function with the given name, input types and their attributes,
-  // output types, and function attributes, but do not define it.
-  PythonFunction declareFunction(const std::string &name,
-                                 const py::list &inputs,
-                                 const std::vector<PythonType> &outputTypes,
-                                 const py::kwargs &funcAttributes);
-
-  // Declare a function with the given name, input types and their attributes,
-  // output types, and function attributes.
-  PythonFunction makeFunction(const std::string &name, const py::list &inputs,
-                              const std::vector<PythonType> &outputTypes,
-                              const py::kwargs &funcAttributes) {
-    auto declaration =
-        declareFunction(name, inputs, outputTypes, funcAttributes);
-    declaration.define();
-    return declaration;
-  }
-
-  // Create a custom op given its name and arguments.
-  PythonExpr op(const std::string &name, PythonType type,
-                const py::list &arguments, const py::list &successors,
-                py::kwargs attributes);
-
-  // Create an integer attribute.
-  PythonAttribute integerAttr(PythonType type, int64_t value);
-
-  // Create a boolean attribute.
-  PythonAttribute boolAttr(bool value);
-
-  // Creates an Array attribute.
-  PythonAttribute arrayAttr(const std::vector<PythonAttribute> &values);
-
-  // Creates an AffineMap attribute.
-  PythonAttribute affineMapAttr(PythonAffineMap value);
-
-  // Creates an affine constant expression.
-  PythonAffineExpr affineConstantExpr(int64_t value);
-
-  // Creates an affine symbol expression.
-  PythonAffineExpr affineSymbolExpr(unsigned position);
-
-  // Creates an affine dimension expression.
-  PythonAffineExpr affineDimExpr(unsigned position);
-
-  // Creates a single constant result affine map.
-  PythonAffineMap affineConstantMap(int64_t value);
-
-  // Creates an affine map.
-  PythonAffineMap affineMap(unsigned dimCount, unsigned symbolCount,
-                            const std::vector<PythonAffineExpr> &results);
-
-  // Compile the module save the execution engine. "optLevel" and
-  // "codegenOptLevel" contain the levels of optimization to run (0 to 3) for
-  // transformations and codegen. -1 means ExecutionEngine default.
-  void compile(int optLevel, int codegenOptLevel) {
-    PassManager manager(module->getContext());
-    manager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
-    manager.addNestedPass<FuncOp>(mlir::createCSEPass());
-    manager.addPass(mlir::createLowerAffinePass());
-    manager.addPass(mlir::createLowerToLLVMPass());
-    if (failed(manager.run(*module))) {
-      llvm::errs() << "conversion to the LLVM IR dialect failed\n";
-      return;
-    }
-
-    // Make sure the executione engine runs LLVM passes for the specified
-    // optimization level.
-    auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
-    assert(tmBuilderOrError);
-    auto tmOrError = tmBuilderOrError->createTargetMachine();
-    assert(tmOrError);
-    targetMachine = std::move(tmOrError.get());
-    auto transformer = mlir::makeLLVMPassesTransformer(
-        /*llvmPasses=*/{},
-        optLevel == -1 ? llvm::Optional<unsigned>() : optLevel,
-        targetMachine.get(),
-        /*optPassesInsertPos=*/0);
-
-    auto created = mlir::ExecutionEngine::create(
-        *module, transformer,
-        codegenOptLevel == -1
-            ? llvm::Optional<llvm::CodeGenOpt::Level>()
-            : static_cast<llvm::CodeGenOpt::Level>(codegenOptLevel));
-    llvm::handleAllErrors(created.takeError(),
-                          [](const llvm::ErrorInfoBase &b) {
-                            b.log(llvm::errs());
-                            assert(false);
-                          });
-    engine = std::move(*created);
-  }
-
-  std::string getIR() {
-    std::string res;
-    llvm::raw_string_ostream os(res);
-    module->print(os);
-    return res;
-  }
-
-  uint64_t getEngineAddress() {
-    assert(engine && "module must be compiled into engine first");
-    return reinterpret_cast<uint64_t>(reinterpret_cast<void *>(engine.get()));
-  }
-
-  PythonFunction getNamedFunction(const std::string &name) {
-    return symbolTable.lookup<FuncOp>(name);
-  }
-
-  PythonFunctionContext
-  makeFunctionContext(const std::string &name, const py::list &inputs,
-                      const std::vector<PythonType> &outputs,
-                      const py::kwargs &attributes);
-
-private:
-  mlir::MLIRContext mlirContext;
-  // One single module in a python-exposed MLIRContext for now.
-  mlir::OwningModuleRef module;
-  mlir::SymbolTable symbolTable;
-
-  // An execution engine and an associated target machine. The latter must
-  // outlive the former since it may be used by the transformation layers.
-  std::unique_ptr<mlir::ExecutionEngine> engine;
-  std::unique_ptr<llvm::TargetMachine> targetMachine;
-};
-
-struct PythonFunctionContext {
-  PythonFunctionContext(PythonFunction f) : function(f) {}
-  PythonFunctionContext(PythonMLIRModule &module, const std::string &name,
-                        const py::list &inputs,
-                        const std::vector<PythonType> &outputs,
-                        const py::kwargs &attributes) {
-    auto function = module.declareFunction(name, inputs, outputs, attributes);
-    function.define();
-  }
-
-  PythonFunction enter() {
-    assert(function.function && "function is not set up");
-    auto mlirFunc = mlir::FuncOp::getFromOpaquePointer(function.function);
-    contextBuilder.emplace(mlirFunc.getBody());
-    context = new mlir::edsc::ScopedContext(*contextBuilder, mlirFunc.getLoc());
-    return function;
-  }
-
-  void exit(py::object, py::object, py::object) {
-    delete context;
-    context = nullptr;
-    contextBuilder.reset();
-  }
-
-  PythonFunction function;
-  mlir::edsc::ScopedContext *context;
-  llvm::Optional<OpBuilder> contextBuilder;
-};
-
-PythonFunctionContext PythonMLIRModule::makeFunctionContext(
-    const std::string &name, const py::list &inputs,
-    const std::vector<PythonType> &outputs, const py::kwargs &attributes) {
-  auto func = declareFunction(name, inputs, outputs, attributes);
-  func.define();
-  return PythonFunctionContext(func);
-}
-
-struct PythonBlockHandle {
-  PythonBlockHandle() : value(nullptr) {}
-  PythonBlockHandle(const PythonBlockHandle &other) = default;
-  PythonBlockHandle(const mlir::edsc::BlockHandle &other) : value(other) {}
-  operator mlir::edsc::BlockHandle() const { return value; }
-
-  PythonValueHandle arg(int index) { return arguments[index]; }
-
-  std::string str() {
-    std::string s;
-    llvm::raw_string_ostream os(s);
-    value.getBlock()->print(os);
-    return os.str();
-  }
-
-  mlir::edsc::BlockHandle value;
-  std::vector<mlir::edsc::ValueHandle> arguments;
-};
-
-struct PythonLoopContext {
-  PythonLoopContext(PythonValueHandle lb, PythonValueHandle ub, int64_t step)
-      : lb(lb), ub(ub), step(step) {}
-  PythonLoopContext(const PythonLoopContext &) = delete;
-  PythonLoopContext(PythonLoopContext &&) = default;
-  PythonLoopContext &operator=(const PythonLoopContext &) = delete;
-  PythonLoopContext &operator=(PythonLoopContext &&) = default;
-  ~PythonLoopContext() { assert(!builder && "did not exit from the context"); }
-
-  PythonValueHandle enter() {
-    ValueHandle iv(lb.value.getType());
-    builder = new AffineLoopNestBuilder(&iv, lb.value, ub.value, step);
-    return iv;
-  }
-
-  void exit(py::object, py::object, py::object) {
-    (*builder)({}); // exit from the builder's scope.
-    delete builder;
-    builder = nullptr;
-  }
-
-  PythonValueHandle lb, ub;
-  int64_t step;
-  AffineLoopNestBuilder *builder = nullptr;
-};
-
-struct PythonLoopNestContext {
-  PythonLoopNestContext(const std::vector<PythonValueHandle> &lbs,
-                        const std::vector<PythonValueHandle> &ubs,
-                        const std::vector<int64_t> steps)
-      : lbs(lbs), ubs(ubs), steps(steps) {
-    assert(lbs.size() == ubs.size() && lbs.size() == steps.size() &&
-           "expected the same number of lower, upper bounds, and steps");
-  }
-  PythonLoopNestContext(const PythonLoopNestContext &) = delete;
-  PythonLoopNestContext(PythonLoopNestContext &&) = default;
-  PythonLoopNestContext &operator=(const PythonLoopNestContext &) = delete;
-  PythonLoopNestContext &operator=(PythonLoopNestContext &&) = default;
-  ~PythonLoopNestContext() {
-    assert(!builder && "did not exit from the context");
-  }
-
-  std::vector<PythonValueHandle> enter() {
-    if (steps.empty())
-      return {};
-
-    auto type = mlir_type_t(lbs.front().value.getType().getAsOpaquePointer());
-    std::vector<PythonValueHandle> handles(steps.size(),
-                                           PythonValueHandle(type));
-    std::vector<ValueHandle *> handlePtrs;
-    handlePtrs.reserve(steps.size());
-    for (auto &h : handles)
-      handlePtrs.push_back(&h.value);
-    builder = new AffineLoopNestBuilder(
-        handlePtrs, std::vector<ValueHandle>(lbs.begin(), lbs.end()),
-        std::vector<ValueHandle>(ubs.begin(), ubs.end()), steps);
-    return handles;
-  }
-
-  void exit(py::object, py::object, py::object) {
-    (*builder)({}); // exit from the builder's scope.
-    delete builder;
-    builder = nullptr;
-  }
-
-  std::vector<PythonValueHandle> lbs;
-  std::vector<PythonValueHandle> ubs;
-  std::vector<int64_t> steps;
-  AffineLoopNestBuilder *builder = nullptr;
-};
-
-struct PythonBlockAppender {
-  PythonBlockAppender(const PythonBlockHandle &handle) : handle(handle) {}
-  PythonBlockHandle handle;
-};
-
-struct PythonBlockContext {
-public:
-  PythonBlockContext() {
-    createBlockBuilder();
-    clearBuilder();
-  }
-  PythonBlockContext(const std::vector<PythonType> &argTypes) {
-    handle.arguments.reserve(argTypes.size());
-    for (const auto &t : argTypes) {
-      auto type =
-          Type::getFromOpaquePointer(reinterpret_cast<const void *>(t.type));
-      handle.arguments.emplace_back(type);
-    }
-    createBlockBuilder();
-    clearBuilder();
-  }
-  PythonBlockContext(const PythonBlockAppender &a) : handle(a.handle) {}
-  PythonBlockContext(const PythonBlockContext &) = delete;
-  PythonBlockContext(PythonBlockContext &&) = default;
-  PythonBlockContext &operator=(const PythonBlockContext &) = delete;
-  PythonBlockContext &operator=(PythonBlockContext &&) = default;
-  ~PythonBlockContext() {
-    assert(!builder && "did not exit from the block context");
-  }
-
-  // EDSC maintain an implicit stack of builders (mostly for keeping track of
-  // insertion points); every operation gets inserted using the top-of-the-stack
-  // builder.  Creating a new EDSC Builder automatically puts it on the stack,
-  // effectively entering the block for it.
-  void createBlockBuilder() {
-    if (handle.value.getBlock()) {
-      builder = new BlockBuilder(handle.value, mlir::edsc::Append());
-    } else {
-      std::vector<ValueHandle *> args;
-      args.reserve(handle.arguments.size());
-      for (auto &a : handle.arguments)
-        args.push_back(&a);
-      builder = new BlockBuilder(&handle.value, args);
-    }
-  }
-
-  PythonBlockHandle enter() {
-    createBlockBuilder();
-    return handle;
-  }
-
-  void exit(py::object, py::object, py::object) { clearBuilder(); }
-
-  PythonBlockHandle getHandle() { return handle; }
-
-  // EDSC maintain an implicit stack of builders (mostly for keeping track of
-  // insertion points); every operation gets inserted using the top-of-the-stack
-  // builder.  Calling operator() on a builder pops the builder from the stack,
-  // effectively resetting the insertion point to its position before we entered
-  // the block.
-  void clearBuilder() {
-    (*builder)({}); // exit from the builder's scope.
-    delete builder;
-    builder = nullptr;
-  }
-
-  PythonBlockHandle handle;
-  BlockBuilder *builder = nullptr;
-};
-
-struct PythonAttribute {
-  PythonAttribute() : attr(nullptr) {}
-  PythonAttribute(const mlir_attr_t &a) : attr(a) {}
-  PythonAttribute(const PythonAttribute &other) = default;
-  operator mlir_attr_t() { return attr; }
-
-  operator Attribute() const { return Attribute::getFromOpaquePointer(attr); }
-
-  std::string str() const {
-    if (!attr)
-      return "##null attr##";
-
-    std::string res;
-    llvm::raw_string_ostream os(res);
-    Attribute().print(os);
-    return res;
-  }
-
-  mlir_attr_t attr;
-};
-
-struct PythonAttributedType {
-  PythonAttributedType() : type(nullptr) {}
-  PythonAttributedType(mlir_type_t t) : type(t) {}
-  PythonAttributedType(
-      PythonType t,
-      const std::unordered_map<std::string, PythonAttribute> &attributes =
-          std::unordered_map<std::string, PythonAttribute>())
-      : type(t), attrs(attributes) {}
-
-  operator mlir_type_t() const { return type.type; }
-  operator PythonType() const { return type; }
-
-  // Return a vector of named attribute descriptors.  The vector owns the
-  // mlir_named_attr_t objects it contains, but not the names and attributes
-  // those objects point to (names and opaque pointers to attributes are owned
-  // by `this`).
-  std::vector<mlir_named_attr_t> getNamedAttrs() const {
-    std::vector<mlir_named_attr_t> result;
-    result.reserve(attrs.size());
-    for (const auto &namedAttr : attrs)
-      result.push_back({namedAttr.first.c_str(), namedAttr.second.attr});
-    return result;
-  }
-
-  std::string str() {
-    mlir::Type t = mlir::Type::getFromOpaquePointer(type);
-    std::string res;
-    llvm::raw_string_ostream os(res);
-    t.print(os);
-    if (attrs.empty())
-      return os.str();
-
-    os << '{';
-    bool first = true;
-    for (const auto &namedAttr : attrs) {
-      if (first)
-        first = false;
-      else
-        os << ", ";
-      os << namedAttr.first << ": " << namedAttr.second.str();
-    }
-    os << '}';
-
-    return os.str();
-  }
-
-private:
-  PythonType type;
-  std::unordered_map<std::string, PythonAttribute> attrs;
-};
-
-// Wraps mlir::AffineExpr.
-struct PythonAffineExpr {
-  PythonAffineExpr() : affine_expr() {}
-  PythonAffineExpr(const AffineExpr &a) : affine_expr(a) {}
-  PythonAffineExpr(const PythonAffineExpr &other) = default;
-
-  operator AffineExpr() const { return affine_expr; }
-  operator AffineExpr &() { return affine_expr; }
-
-  AffineExpr get() const { return affine_expr; }
-
-  std::string str() const {
-    std::string res;
-    llvm::raw_string_ostream os(res);
-    affine_expr.print(os);
-    return res;
-  }
-
-private:
-  AffineExpr affine_expr;
-};
-
-// Wraps mlir::AffineMap.
-struct PythonAffineMap {
-  PythonAffineMap() : affine_map() {}
-  PythonAffineMap(const AffineMap &a) : affine_map(a) {}
-  PythonAffineMap(const PythonAffineMap &other) = default;
-
-  operator AffineMap() const { return affine_map; }
-  operator AffineMap &() { return affine_map; }
-
-  std::string str() const {
-    std::string res;
-    llvm::raw_string_ostream os(res);
-    affine_map.print(os);
-    return res;
-  }
-
-private:
-  AffineMap affine_map;
-};
-
-struct PythonIndexedValue {
-  explicit PythonIndexedValue(PythonType type)
-      : indexed(Type::getFromOpaquePointer(type.type)) {}
-  explicit PythonIndexedValue(const IndexedValue &other) : indexed(other) {}
-  PythonIndexedValue(PythonValueHandle handle) : indexed(handle.value) {}
-  PythonIndexedValue(const PythonIndexedValue &other) = default;
-
-  // Create a new indexed value with the same base as this one but with indices
-  // provided as arguments.
-  PythonIndexedValue index(const std::vector<PythonValueHandle> &indices) {
-    std::vector<ValueHandle> handles(indices.begin(), indices.end());
-    return PythonIndexedValue(IndexedValue(indexed(handles)));
-  }
-
-  void store(const std::vector<PythonValueHandle> &indices,
-             PythonValueHandle value) {
-    // Uses the overloaded `operator=` to emit a store.
-    index(indices).indexed = value.value;
-  }
-
-  PythonValueHandle load(const std::vector<PythonValueHandle> &indices) {
-    // Uses the overloaded cast to `ValueHandle` to emit a load.
-    return static_cast<ValueHandle>(index(indices).indexed);
-  }
-
-  IndexedValue indexed;
-};
-
-template <typename ListTy, typename PythonTy, typename Ty>
-ListTy makeCList(SmallVectorImpl<Ty> &owning, const py::list &list) {
-  for (auto &inp : list) {
-    owning.push_back(Ty{inp.cast<PythonTy>()});
-  }
-  return ListTy{owning.data(), owning.size()};
-}
-
-static mlir_type_list_t makeCTypes(llvm::SmallVectorImpl<mlir_type_t> &owning,
-                                   const py::list &types) {
-  return makeCList<mlir_type_list_t, PythonType>(owning, types);
-}
-
-PythonFunction
-PythonMLIRModule::declareFunction(const std::string &name,
-                                  const py::list &inputs,
-                                  const std::vector<PythonType> &outputTypes,
-                                  const py::kwargs &funcAttributes) {
-
-  std::vector<PythonAttributedType> attributedInputs;
-  attributedInputs.reserve(inputs.size());
-  for (const auto &in : inputs) {
-    std::string className = in.get_type().str();
-    if (className.find(".Type'") != std::string::npos)
-      attributedInputs.emplace_back(in.cast<PythonType>());
-    else
-      attributedInputs.push_back(in.cast<PythonAttributedType>());
-  }
-
-  // Create the function type.
-  std::vector<mlir_type_t> ins(attributedInputs.begin(),
-                               attributedInputs.end());
-  std::vector<mlir_type_t> outs(outputTypes.begin(), outputTypes.end());
-  auto funcType = ::makeFunctionType(
-      mlir_context_t{&mlirContext}, mlir_type_list_t{ins.data(), ins.size()},
-      mlir_type_list_t{outs.data(), outs.size()});
-
-  // Build the list of function attributes.
-  std::vector<mlir::NamedAttribute> attrs;
-  attrs.reserve(funcAttributes.size());
-  for (const auto &named : funcAttributes)
-    attrs.emplace_back(
-        Identifier::get(std::string(named.first.str()), &mlirContext),
-        mlir::Attribute::getFromOpaquePointer(reinterpret_cast<const void *>(
-            named.second.cast<PythonAttribute>().attr)));
-
-  // Build the list of lists of function argument attributes.
-  std::vector<mlir::NamedAttributeList> inputAttrs;
-  inputAttrs.reserve(attributedInputs.size());
-  for (const auto &in : attributedInputs) {
-    std::vector<mlir::NamedAttribute> inAttrs;
-    for (const auto &named : in.getNamedAttrs())
-      inAttrs.emplace_back(Identifier::get(named.name, &mlirContext),
-                           mlir::Attribute::getFromOpaquePointer(
-                               reinterpret_cast<const void *>(named.value)));
-    inputAttrs.emplace_back(inAttrs);
-  }
-
-  // Create the function itself.
-  auto func = mlir::FuncOp::create(
-      UnknownLoc::get(&mlirContext), name,
-      mlir::Type::getFromOpaquePointer(funcType).cast<FunctionType>(), attrs,
-      inputAttrs);
-  symbolTable.insert(func);
-  return func;
-}
-
-PythonAttributedType PythonType::attachAttributeDict(
-    const std::unordered_map<std::string, PythonAttribute> &attrs) const {
-  return PythonAttributedType(*this, attrs);
-}
-
-PythonAttribute PythonMLIRModule::integerAttr(PythonType type, int64_t value) {
-  return PythonAttribute(::makeIntegerAttr(type, value));
-}
-
-PythonAttribute PythonMLIRModule::boolAttr(bool value) {
-  return PythonAttribute(::makeBoolAttr(&mlirContext, value));
-}
-
-PythonAttribute
-PythonMLIRModule::arrayAttr(const std::vector<PythonAttribute> &values) {
-  std::vector<mlir::Attribute> mlir_attributes(values.begin(), values.end());
-  auto array_attr = ArrayAttr::get(
-      llvm::ArrayRef<mlir::Attribute>(mlir_attributes), &mlirContext);
-  return PythonAttribute(array_attr.getAsOpaquePointer());
-}
-
-PythonAttribute PythonMLIRModule::affineMapAttr(PythonAffineMap value) {
-  return PythonAttribute(AffineMapAttr::get(value).getAsOpaquePointer());
-}
-
-PythonAffineExpr PythonMLIRModule::affineConstantExpr(int64_t value) {
-  return PythonAffineExpr(getAffineConstantExpr(value, &mlirContext));
-}
-
-PythonAffineExpr PythonMLIRModule::affineSymbolExpr(unsigned position) {
-  return PythonAffineExpr(getAffineSymbolExpr(position, &mlirContext));
-}
-
-PythonAffineExpr PythonMLIRModule::affineDimExpr(unsigned position) {
-  return PythonAffineExpr(getAffineDimExpr(position, &mlirContext));
-}
-
-PythonAffineMap PythonMLIRModule::affineConstantMap(int64_t value) {
-  return PythonAffineMap(AffineMap::getConstantMap(value, &mlirContext));
-}
-
-PythonAffineMap
-PythonMLIRModule::affineMap(unsigned dimCount, unsigned SymbolCount,
-                            const std::vector<PythonAffineExpr> &results) {
-  std::vector<AffineExpr> mlir_results(results.begin(), results.end());
-  return PythonAffineMap(AffineMap::get(
-      dimCount, SymbolCount, llvm::ArrayRef<AffineExpr>(mlir_results)));
-}
-
-PYBIND11_MODULE(pybind, m) {
-  m.doc() =
-      "Python bindings for MLIR Embedded Domain-Specific Components (EDSCs)";
-  m.def("version", []() { return "EDSC Python extensions v1.0"; });
-
-  py::class_<PythonLoopContext>(
-      m, "LoopContext", "A context for building the body of a 'for' loop")
-      .def(py::init<PythonValueHandle, PythonValueHandle, int64_t>())
-      .def("__enter__", &PythonLoopContext::enter)
-      .def("__exit__", &PythonLoopContext::exit);
-
-  py::class_<PythonLoopNestContext>(m, "LoopNestContext",
-                                    "A context for building the body of a the "
-                                    "innermost loop in a nest of 'for' loops")
-      .def(py::init<const std::vector<PythonValueHandle> &,
-                    const std::vector<PythonValueHandle> &,
-                    const std::vector<int64_t> &>())
-      .def("__enter__", &PythonLoopNestContext::enter)
-      .def("__exit__", &PythonLoopNestContext::exit);
-
-  m.def("constant_index", [](int64_t val) -> PythonValueHandle {
-    return ValueHandle(index_t(val));
-  });
-  m.def("constant_int", [](int64_t val, int width) -> PythonValueHandle {
-    return ValueHandle::create<ConstantIntOp>(val, width);
-  });
-  m.def("constant_float", [](double val, PythonType type) -> PythonValueHandle {
-    FloatType floatType =
-        Type::getFromOpaquePointer(type.type).cast<FloatType>();
-    assert(floatType);
-    auto value = APFloat(val);
-    bool lostPrecision;
-    value.convert(floatType.getFloatSemantics(), APFloat::rmNearestTiesToEven,
-                  &lostPrecision);
-    return ValueHandle::create<ConstantFloatOp>(value, floatType);
-  });
-  m.def("constant_function", [](PythonFunction func) -> PythonValueHandle {
-    auto function = FuncOp::getFromOpaquePointer(func.function);
-    auto attr = SymbolRefAttr::get(function.getName(), function.getContext());
-    return ValueHandle::create<ConstantOp>(function.getType(), attr);
-  });
-  m.def("appendTo", [](const PythonBlockHandle &handle) {
-    return PythonBlockAppender(handle);
-  });
-  m.def(
-      "ret",
-      [](const std::vector<PythonValueHandle> &args) {
-        std::vector<ValueHandle> values(args.begin(), args.end());
-        (intrinsics::ret(ArrayRef<ValueHandle>{values})); // vexing parse
-        return PythonValueHandle(nullptr);
-      },
-      py::arg("args") = std::vector<PythonValueHandle>());
-  m.def(
-      "br",
-      [](const PythonBlockHandle &dest,
-         const std::vector<PythonValueHandle> &args) {
-        std::vector<ValueHandle> values(args.begin(), args.end());
-        intrinsics::br(dest, values);
-        return PythonValueHandle(nullptr);
-      },
-      py::arg("dest"), py::arg("args") = std::vector<PythonValueHandle>());
-  m.def(
-      "cond_br",
-      [](PythonValueHandle condition, const PythonBlockHandle &trueDest,
-         const std::vector<PythonValueHandle> &trueArgs,
-         const PythonBlockHandle &falseDest,
-         const std::vector<PythonValueHandle> &falseArgs) -> PythonValueHandle {
-        std::vector<ValueHandle> trueArguments(trueArgs.begin(),
-                                               trueArgs.end());
-        std::vector<ValueHandle> falseArguments(falseArgs.begin(),
-                                                falseArgs.end());
-        intrinsics::cond_br(condition, trueDest, trueArguments, falseDest,
-                            falseArguments);
-        return PythonValueHandle(nullptr);
-      });
-  m.def("index_cast",
-        [](PythonValueHandle element, PythonType type) -> PythonValueHandle {
-          return ValueHandle::create<IndexCastOp>(
-              element.value, Type::getFromOpaquePointer(type.type));
-        });
-  m.def("select",
-        [](PythonValueHandle condition, PythonValueHandle trueValue,
-           PythonValueHandle falseValue) -> PythonValueHandle {
-          return ValueHandle::create<SelectOp>(condition.value, trueValue.value,
-                                               falseValue.value);
-        });
-  m.def("op",
-        [](const std::string &name,
-           const std::vector<PythonValueHandle> &operands,
-           const std::vector<PythonType> &resultTypes,
-           const py::kwargs &attributes) -> PythonValueHandle {
-          std::vector<ValueHandle> operandHandles(operands.begin(),
-                                                  operands.end());
-          std::vector<Type> types;
-          types.reserve(resultTypes.size());
-          for (auto t : resultTypes)
-            types.push_back(Type::getFromOpaquePointer(t.type));
-
-          std::vector<NamedAttribute> attrs;
-          attrs.reserve(attributes.size());
-          for (const auto &a : attributes) {
-            std::string name = a.first.str();
-            auto pyAttr = a.second.cast<PythonAttribute>();
-            auto cppAttr = Attribute::getFromOpaquePointer(pyAttr.attr);
-            auto identifier =
-                Identifier::get(name, ScopedContext::getContext());
-            attrs.emplace_back(identifier, cppAttr);
-          }
-
-          return ValueHandle::create(name, operandHandles, types, attrs);
-        });
-
-  py::class_<PythonFunction>(m, "Function", "Wrapping class for mlir::FuncOp.")
-      .def(py::init<PythonFunction>())
-      .def("__str__", &PythonFunction::str)
-      .def("define", &PythonFunction::define,
-           "Adds a body to the function if it does not already have one.  "
-           "Returns true if the body was added")
-      .def("arg", &PythonFunction::arg,
-           "Get the ValueHandle to the indexed argument of the function");
-
-  py::class_<PythonAttribute>(m, "Attribute",
-                              "Wrapping class for mlir::Attribute")
-      .def(py::init<PythonAttribute>())
-      .def("__str__", &PythonAttribute::str);
-
-  py::class_<PythonType>(m, "Type", "Wrapping class for mlir::Type.")
-      .def(py::init<PythonType>())
-      .def("__call__", &PythonType::attachAttributeDict,
-           "Attach the attributes to these type, making it suitable for "
-           "constructing functions with argument attributes")
-      .def("__str__", &PythonType::str);
-
-  py::class_<PythonAttributedType>(
-      m, "AttributedType",
-      "A class containing a wrapped mlir::Type and a wrapped "
-      "mlir::NamedAttributeList that are used together, e.g. in function "
-      "argument declaration")
-      .def(py::init<PythonAttributedType>())
-      .def("__str__", &PythonAttributedType::str);
-
-  py::class_<PythonMLIRModule>(
-      m, "MLIRModule",
-      "An MLIRModule is the abstraction that owns the allocations to support "
-      "compilation of a single mlir::ModuleOp into an ExecutionEngine backed "
-      "by "
-      "the LLVM ORC JIT. A typical flow consists in creating an MLIRModule, "
-      "adding functions, compiling the module to obtain an ExecutionEngine on "
-      "which named functions may be called. For now the only means to retrieve "
-      "the ExecutionEngine is by calling `get_engine_address`. This mode of "
-      "execution is limited to passing the pointer to C++ where the function "
-      "is called. Extending the API to allow calling JIT compiled functions "
-      "directly require integration with a tensor library (e.g. numpy). This "
-      "is left as the prerogative of libraries and frameworks for now.")
-      .def(py::init<>())
-      .def("boolAttr", &PythonMLIRModule::boolAttr,
-           "Creates an mlir::BoolAttr with the given value")
-      .def(
-          "integerAttr", &PythonMLIRModule::integerAttr,
-          "Creates an mlir::IntegerAttr of the given type with the given value "
-          "in the context associated with this MLIR module.")
-      .def("arrayAttr", &PythonMLIRModule::arrayAttr,
-           "Creates an mlir::ArrayAttr of the given type with the given values "
-           "in the context associated with this MLIR module.")
-      .def("affineMapAttr", &PythonMLIRModule::affineMapAttr,
-           "Creates an mlir::AffineMapAttr of the given type with the given "
-           "value in the context associated with this MLIR module.")
-      .def("declare_function", &PythonMLIRModule::declareFunction,
-           "Declares a new mlir::FuncOp in the current mlir::ModuleOp.  The "
-           "function arguments can have attributes.  The function has no "
-           "definition and can be linked to an external library.")
-      .def("make_function", &PythonMLIRModule::makeFunction,
-           "Defines a new mlir::FuncOp in the current mlir::ModuleOp.")
-      .def("function_context", &PythonMLIRModule::makeFunctionContext,
-           "Defines a new mlir::FuncOp in the mlir::ModuleOp and creates the "
-           "function context for building the body of the function.")
-      .def("get_function", &PythonMLIRModule::getNamedFunction,
-           "Looks up the function with the given name in the module.")
-      .def("make_memref_type", &PythonMLIRModule::makeMemRefType,
-           "Returns an mlir::MemRefType of an elemental scalar. -1 is used to "
-           "denote symbolic dimensions in the resulting memref shape.")
-      .def("make_index_type", &PythonMLIRModule::makeIndexType,
-           "Returns an mlir::IndexType")
-      .def("make_type", &PythonMLIRModule::makeType,
-           "Returns an mlir::Type defined by the IR passed in as the argument.")
-      .def("compile", &PythonMLIRModule::compile,
-           "Compiles the mlir::ModuleOp to LLVMIR a creates new opaque "
-           "ExecutionEngine backed by the ORC JIT. The arguments, if present, "
-           "indicates the level of LLVM optimizations to run (similar to -O?).",
-           py::arg("optLevel") = -1, py::arg("codegenOptLevel") = -1)
-      .def("get_ir", &PythonMLIRModule::getIR,
-           "Returns a dump of the MLIR representation of the module. This is "
-           "used for serde to support out-of-process execution as well as "
-           "debugging purposes.")
-      .def("get_engine_address", &PythonMLIRModule::getEngineAddress,
-           "Returns the address of the compiled ExecutionEngine. This is used "
-           "for in-process execution.")
-      .def("affine_constant_expr", &PythonMLIRModule::affineConstantExpr,
-           "Returns an affine constant expression.")
-      .def("affine_symbol_expr", &PythonMLIRModule::affineSymbolExpr,
-           "Returns an affine symbol expression.")
-      .def("affine_dim_expr", &PythonMLIRModule::affineDimExpr,
-           "Returns an affine dim expression.")
-      .def("affine_constant_map", &PythonMLIRModule::affineConstantMap,
-           "Returns an affine map with single constant result.")
-      .def("affine_map", &PythonMLIRModule::affineMap, "Returns an affine map.",
-           py::arg("dimCount"), py::arg("symbolCount"), py::arg("results"))
-      .def("__str__", &PythonMLIRModule::getIR,
-           "Get the string representation of the module");
-
-  py::class_<PythonFunctionContext>(
-      m, "FunctionContext", "A wrapper around mlir::edsc::ScopedContext")
-      .def(py::init<PythonFunction>())
-      .def("__enter__", &PythonFunctionContext::enter)
-      .def("__exit__", &PythonFunctionContext::exit);
-
-  {
-    using namespace mlir::edsc::op;
-    py::class_<PythonValueHandle>(m, "ValueHandle",
-                                  "A wrapper around mlir::edsc::ValueHandle")
-        .def(py::init<PythonType>())
-        .def(py::init<PythonValueHandle>())
-        .def("__add__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value + rhs.value; })
-        .def("__sub__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value - rhs.value; })
-        .def("__mul__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value * rhs.value; })
-        .def("__div__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value / rhs.value; })
-        .def("__truediv__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value / rhs.value; })
-        .def("__floordiv__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return floorDiv(lhs, rhs); })
-        .def("__mod__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value % rhs.value; })
-        .def("__lt__",
-             [](PythonValueHandle lhs,
-                PythonValueHandle rhs) -> PythonValueHandle {
-               return ValueHandle::create<CmpIOp>(CmpIPredicate::slt, lhs.value,
-                                                  rhs.value);
-             })
-        .def("__le__",
-             [](PythonValueHandle lhs,
-                PythonValueHandle rhs) -> PythonValueHandle {
-               return ValueHandle::create<CmpIOp>(CmpIPredicate::sle, lhs.value,
-                                                  rhs.value);
-             })
-        .def("__gt__",
-             [](PythonValueHandle lhs,
-                PythonValueHandle rhs) -> PythonValueHandle {
-               return ValueHandle::create<CmpIOp>(CmpIPredicate::sgt, lhs.value,
-                                                  rhs.value);
-             })
-        .def("__ge__",
-             [](PythonValueHandle lhs,
-                PythonValueHandle rhs) -> PythonValueHandle {
-               return ValueHandle::create<CmpIOp>(CmpIPredicate::sge, lhs.value,
-                                                  rhs.value);
-             })
-        .def("__eq__",
-             [](PythonValueHandle lhs,
-                PythonValueHandle rhs) -> PythonValueHandle {
-               return ValueHandle::create<CmpIOp>(CmpIPredicate::eq, lhs.value,
-                                                  rhs.value);
-             })
-        .def("__ne__",
-             [](PythonValueHandle lhs,
-                PythonValueHandle rhs) -> PythonValueHandle {
-               return ValueHandle::create<CmpIOp>(CmpIPredicate::ne, lhs.value,
-                                                  rhs.value);
-             })
-        .def("__invert__",
-             [](PythonValueHandle handle) -> PythonValueHandle {
-               return !handle.value;
-             })
-        .def("__and__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value && rhs.value; })
-        .def("__or__",
-             [](PythonValueHandle lhs, PythonValueHandle rhs)
-                 -> PythonValueHandle { return lhs.value || rhs.value; })
-        .def("__call__", &PythonValueHandle::call)
-        .def("type", &PythonValueHandle::type);
-  }
-
-  py::class_<PythonBlockAppender>(
-      m, "BlockAppender",
-      "A dummy class signaling BlockContext to append IR to the given block "
-      "instead of creating a new block")
-      .def(py::init<const PythonBlockHandle &>());
-  py::class_<PythonBlockHandle>(m, "BlockHandle",
-                                "A wrapper around mlir::edsc::BlockHandle")
-      .def(py::init<PythonBlockHandle>())
-      .def("arg", &PythonBlockHandle::arg);
-
-  py::class_<PythonBlockContext>(m, "BlockContext",
-                                 "A wrapper around mlir::edsc::BlockBuilder")
-      .def(py::init<>())
-      .def(py::init<const std::vector<PythonType> &>())
-      .def(py::init<const PythonBlockAppender &>())
-      .def("__enter__", &PythonBlockContext::enter)
-      .def("__exit__", &PythonBlockContext::exit)
-      .def("handle", &PythonBlockContext::getHandle);
-
-  py::class_<PythonIndexedValue>(m, "IndexedValue",
-                                 "A wrapper around mlir::edsc::IndexedValue")
-      .def(py::init<PythonValueHandle>())
-      .def("load", &PythonIndexedValue::load)
-      .def("store", &PythonIndexedValue::store);
-
-  py::class_<PythonAffineExpr>(m, "AffineExpr",
-                               "A wrapper around mlir::AffineExpr")
-      .def(py::init<PythonAffineExpr>())
-      .def("__add__",
-           [](PythonAffineExpr lhs, int64_t rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() + rhs);
-           })
-      .def("__add__",
-           [](PythonAffineExpr lhs, PythonAffineExpr rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() + rhs.get());
-           })
-      .def("__neg__",
-           [](PythonAffineExpr lhs) -> PythonAffineExpr {
-             return PythonAffineExpr(-lhs.get());
-           })
-      .def("__sub__",
-           [](PythonAffineExpr lhs, int64_t rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() - rhs);
-           })
-      .def("__sub__",
-           [](PythonAffineExpr lhs, PythonAffineExpr rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() - rhs.get());
-           })
-      .def("__mul__",
-           [](PythonAffineExpr lhs, int64_t rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() * rhs);
-           })
-      .def("__mul__",
-           [](PythonAffineExpr lhs, PythonAffineExpr rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() * rhs.get());
-           })
-      .def("__floordiv__",
-           [](PythonAffineExpr lhs, uint64_t rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get().floorDiv(rhs));
-           })
-      .def("__floordiv__",
-           [](PythonAffineExpr lhs, PythonAffineExpr rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get().floorDiv(rhs.get()));
-           })
-      .def("ceildiv",
-           [](PythonAffineExpr lhs, uint64_t rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get().ceilDiv(rhs));
-           })
-      .def("ceildiv",
-           [](PythonAffineExpr lhs, PythonAffineExpr rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get().ceilDiv(rhs.get()));
-           })
-      .def("__mod__",
-           [](PythonAffineExpr lhs, uint64_t rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() % rhs);
-           })
-      .def("__mod__",
-           [](PythonAffineExpr lhs, PythonAffineExpr rhs) -> PythonAffineExpr {
-             return PythonAffineExpr(lhs.get() % rhs.get());
-           })
-      .def("compose",
-           [](PythonAffineExpr self, PythonAffineMap map) -> PythonAffineExpr {
-             return PythonAffineExpr(self.get().compose(map));
-           })
-      .def(
-          "get_constant_value",
-          [](PythonAffineExpr self) -> py::object {
-            auto const_expr = self.get().dyn_cast<AffineConstantExpr>();
-            if (const_expr)
-              return py::cast(const_expr.getValue());
-            return py::none();
-          },
-          "Returns the constant value for the affine expression if any, or "
-          "returns None.")
-      .def("__str__", &PythonAffineExpr::str);
-
-  py::class_<PythonAffineMap>(m, "AffineMap",
-                              "A wrapper around mlir::AffineMap")
-      .def(py::init<PythonAffineMap>())
-      .def("__str__", &PythonAffineMap::str);
-}
-
-} // namespace python
-} // namespace edsc
-} // namespace mlir
diff --git a/third_party/mlir/bindings/python/test/BUILD b/third_party/mlir/bindings/python/test/BUILD
deleted file mode 100644
index 36fe5cbe48e..00000000000
--- a/third_party/mlir/bindings/python/test/BUILD
+++ /dev/null
@@ -1,36 +0,0 @@
-# Description:
-#   BUILD file for the Python wrappers for EDSCs
-
-licenses(["notice"])  # Apache 2.0
-
-# Export the BUILD file so automated tooling can check licenses
-exports_files(["BUILD"])
-
-load("//third_party/llvm/build_defs:lit.bzl", "glob_lit_tests")
-
-glob_lit_tests(
-    data = [":test_utilities"],
-    driver = "@local_config_mlir//:run_lit.sh",
-    test_file_exts = ["py"],
-)
-
-# Bundle together all of the test utilities that are used by tests.
-filegroup(
-    name = "test_utilities",
-    testonly = True,
-    data = [
-        ":test_edsc",
-        "//third_party/llvm/llvm:FileCheck",
-    ],
-)
-
-py_binary(
-    name = "test_edsc",
-    srcs = ["test_py2and3.py"],
-    main = "test_py2and3.py",
-    python_version = "PY2",
-    deps = [
-        "//testing/pybase",
-        "@local_config_mlir//bindings/python:_pybind",
-    ],
-)
diff --git a/third_party/mlir/bindings/python/test/test_py2and3.py b/third_party/mlir/bindings/python/test/test_py2and3.py
deleted file mode 100644
index 02ff4ab3061..00000000000
--- a/third_party/mlir/bindings/python/test/test_py2and3.py
+++ /dev/null
@@ -1,572 +0,0 @@
-# Copyright 2019 The MLIR Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# RUN: %p/test_edsc %s | FileCheck %s
-"""Python2 and 3 test for the MLIR EDSC Python bindings"""
-
-import google_mlir.bindings.python.pybind as E
-import inspect
-
-
-# Prints `str` prefixed by the current test function name so we can use it in
-# Filecheck label directives.
-# This is achieved by inspecting the stack and getting the parent name.
-def printWithCurrentFunctionName(str):
-  print(inspect.stack()[1][3])
-  print(str)
-
-
-class EdscTest:
-
-  def setUp(self):
-    self.module = E.MLIRModule()
-    self.boolType = self.module.make_type("i1")
-    self.i32Type = self.module.make_type("i32")
-    self.f32Type = self.module.make_type("f32")
-    self.indexType = self.module.make_index_type()
-
-  def testBlockArguments(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      E.constant_index(42)
-      with E.BlockContext([self.f32Type, self.f32Type]) as b:
-        b.arg(0) + b.arg(1)
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBlockArguments
-    #       CHECK: %{{.*}} = constant 42 : index
-    #       CHECK: ^bb{{.*}}(%{{.*}}: f32, %{{.*}}: f32):
-    #       CHECK:   %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-
-  def testBlockContext(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      cst = E.constant_index(42)
-      with E.BlockContext():
-        cst + cst
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBlockContext
-    #       CHECK: %{{.*}} = constant 42 : index
-    #       CHECK: ^bb
-    #       CHECK: %{{.*}} = "affine.apply"() {map = () -> (84)} : () -> index
-
-  def testBlockContextAppend(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      E.constant_index(41)
-      with E.BlockContext() as b:
-        blk = b  # save block handle for later
-        E.constant_index(0)
-      E.constant_index(42)
-      with E.BlockContext(E.appendTo(blk)):
-        E.constant_index(1)
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBlockContextAppend
-    #       CHECK: %{{.*}} = constant 41 : index
-    #       CHECK: %{{.*}} = constant 42 : index
-    #       CHECK: ^bb
-    #       CHECK: %{{.*}} = constant 0 : index
-    #       CHECK: %{{.*}} = constant 1 : index
-
-  def testBlockContextStandalone(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      blk1 = E.BlockContext()
-      blk2 = E.BlockContext()
-      with blk1:
-        E.constant_index(0)
-      with blk2:
-        E.constant_index(56)
-        E.constant_index(57)
-      E.constant_index(41)
-      with blk1:
-        E.constant_index(1)
-      E.constant_index(42)
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBlockContextStandalone
-    #       CHECK: %{{.*}} = constant 41 : index
-    #       CHECK: %{{.*}} = constant 42 : index
-    #       CHECK: ^bb
-    #       CHECK: %{{.*}} = constant 0 : index
-    #       CHECK: %{{.*}} = constant 1 : index
-    #       CHECK: ^bb
-    #       CHECK: %{{.*}} = constant 56 : index
-    #       CHECK: %{{.*}} = constant 57 : index
-
-  def testBooleanOps(self):
-    self.setUp()
-    with self.module.function_context("booleans",
-                                      [self.boolType for _ in range(4)],
-                                      []) as fun:
-      i, j, k, l = (fun.arg(x) for x in range(4))
-      stmt1 = (i < j) & (j >= k)
-      stmt2 = ~(stmt1 | (k == l))
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBooleanOps
-    #       CHECK: %{{.*}} = cmpi "slt", %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = cmpi "sge", %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = muli %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = cmpi "eq", %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = constant 1 : i1
-    #       CHECK: %{{.*}} = subi %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = constant 1 : i1
-    #       CHECK: %{{.*}} = subi %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = muli %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = constant 1 : i1
-    #       CHECK: %{{.*}} = subi %{{.*}}, %{{.*}} : i1
-    #       CHECK: %{{.*}} = constant 1 : i1
-    #       CHECK: %{{.*}} = subi %{{.*}}, %{{.*}} : i1
-
-  def testBr(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      with E.BlockContext() as b:
-        blk = b
-        E.ret()
-      E.br(blk)
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBr
-    #       CHECK:   br ^bb
-    #       CHECK: ^bb
-    #       CHECK:   return
-
-  def testBrArgs(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      # Create an infinite loop.
-      with E.BlockContext([self.indexType, self.indexType]) as b:
-        E.br(b, [b.arg(1), b.arg(0)])
-      E.br(b, [E.constant_index(0), E.constant_index(1)])
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBrArgs
-    #       CHECK:   %{{.*}} = constant 0 : index
-    #       CHECK:   %{{.*}} = constant 1 : index
-    #       CHECK:   br ^bb{{.*}}(%{{.*}}, %{{.*}} : index, index)
-    #       CHECK: ^bb{{.*}}(%{{.*}}: index, %{{.*}}: index):
-    #       CHECK:   br ^bb{{.*}}(%{{.*}}, %{{.*}} : index, index)
-
-  def testBrDeclaration(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      blk = E.BlockContext()
-      E.br(blk.handle())
-      with blk:
-        E.ret()
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testBrDeclaration
-    #       CHECK:   br ^bb
-    #       CHECK: ^bb
-    #       CHECK:   return
-
-  def testCallOp(self):
-    self.setUp()
-    callee = self.module.declare_function("sqrtf", [self.f32Type],
-                                          [self.f32Type])
-    with self.module.function_context("call", [self.f32Type], []) as fun:
-      funCst = E.constant_function(callee)
-      funCst([fun.arg(0)]) + E.constant_float(42., self.f32Type)
-      printWithCurrentFunctionName(str(self.module))
-    # CHECK-LABEL: testCallOp
-    #       CHECK: func @sqrtf(f32) -> f32
-    #       CHECK:   %{{.*}} = constant @sqrtf : (f32) -> f32
-    #       CHECK:   %{{.*}} = call_indirect %{{.*}}(%{{.*}}) : (f32) -> f32
-
-  def testCondBr(self):
-    self.setUp()
-    with self.module.function_context("foo", [self.boolType], []) as fun:
-      with E.BlockContext() as blk1:
-        E.ret([])
-      with E.BlockContext([self.indexType]) as blk2:
-        E.ret([])
-      cst = E.constant_index(0)
-      E.cond_br(fun.arg(0), blk1, [], blk2, [cst])
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testCondBr
-    #       CHECK:   cond_br %{{.*}}, ^bb{{.*}}, ^bb{{.*}}(%{{.*}} : index)
-
-  def testConstantAffineExpr(self):
-    self.setUp()
-    with self.module.function_context("constant_affine", [], []) as fun:
-      a1 = self.module.affine_dim_expr(0)
-      a2 = self.module.affine_dim_expr(1)
-      a3 = a1 + a2 + 3
-      composedExpr = a3.compose(
-          self.module.affine_map(2, 0, [
-              self.module.affine_constant_expr(4),
-              self.module.affine_constant_expr(7)
-          ]))
-      printWithCurrentFunctionName(str(fun))
-      print("constant value : %d" % composedExpr.get_constant_value())
-    # CHECK-LABEL: testConstantAffineExpr
-    #       CHECK: constant value : 14
-
-  def testConstants(self):
-    self.setUp()
-    with self.module.function_context("constants", [], []) as fun:
-      E.constant_float(1.23, self.module.make_type("bf16"))
-      E.constant_float(1.23, self.module.make_type("f16"))
-      E.constant_float(1.23, self.module.make_type("f32"))
-      E.constant_float(1.23, self.module.make_type("f64"))
-      E.constant_int(1, 1)
-      E.constant_int(123, 8)
-      E.constant_int(123, 16)
-      E.constant_int(123, 32)
-      E.constant_int(123, 64)
-      E.constant_index(123)
-      E.constant_function(fun)
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testConstants
-    #       CHECK:  constant 1.230000e+00 : bf16
-    #       CHECK:  constant 1.230470e+00 : f16
-    #       CHECK:  constant 1.230000e+00 : f32
-    #       CHECK:  constant 1.230000e+00 : f64
-    #       CHECK:  constant 1 : i1
-    #       CHECK:  constant 123 : i8
-    #       CHECK:  constant 123 : i16
-    #       CHECK:  constant 123 : i32
-    #       CHECK:  constant 123 : index
-    #       CHECK:  constant @constants : () -> ()
-
-  def testCustom(self):
-    self.setUp()
-    with self.module.function_context("custom", [self.indexType, self.f32Type],
-                                      []) as fun:
-      E.op("foo", [fun.arg(0)], [self.f32Type]) + fun.arg(1)
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testCustom
-    #       CHECK: %{{.*}} = "foo"(%{{.*}}) : (index) -> f32
-    #       CHECK:  %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-
-  # Create 'addi' using the generic Op interface.  We need an operation known
-  # to the execution engine so that the engine can compile it.
-  def testCustomOpCompilation(self):
-    self.setUp()
-    with self.module.function_context("adder", [self.i32Type], []) as f:
-      c1 = E.op(
-          "std.constant", [], [self.i32Type],
-          value=self.module.integerAttr(self.i32Type, 42))
-      E.op("std.addi", [c1, f.arg(0)], [self.i32Type])
-      E.ret([])
-    self.module.compile()
-    printWithCurrentFunctionName(str(self.module.get_engine_address() == 0))
-    # CHECK-LABEL: testCustomOpCompilation
-    #       CHECK: False
-
-  def testDivisions(self):
-    self.setUp()
-    with self.module.function_context(
-        "division", [self.indexType, self.i32Type, self.i32Type], []) as fun:
-      # indices only support floor division
-      fun.arg(0) // E.constant_index(42)
-      # regular values only support regular division
-      fun.arg(1) / fun.arg(2)
-      printWithCurrentFunctionName(str(self.module))
-    # CHECK-LABEL: testDivisions
-    #       CHECK:  floordiv 42
-    #       CHECK:  divis %{{.*}}, %{{.*}} : i32
-
-  def testFunctionArgs(self):
-    self.setUp()
-    with self.module.function_context("foo", [self.f32Type, self.f32Type],
-                                      [self.indexType]) as fun:
-      pass
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testFunctionArgs
-    #       CHECK: func @foo(%{{.*}}: f32, %{{.*}}: f32) -> index
-
-  def testFunctionContext(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []):
-      pass
-      printWithCurrentFunctionName(self.module.get_function("foo"))
-    # CHECK-LABEL: testFunctionContext
-    #       CHECK: func @foo() {
-
-  def testFunctionDeclaration(self):
-    self.setUp()
-    boolAttr = self.module.boolAttr(True)
-    t = self.module.make_memref_type(self.f32Type, [10])
-    t_llvm_noalias = t({"llvm.noalias": boolAttr})
-    t_readonly = t({"readonly": boolAttr})
-    f = self.module.declare_function("foo", [t, t_llvm_noalias, t_readonly], [])
-    printWithCurrentFunctionName(str(self.module))
-    # CHECK-LABEL: testFunctionDeclaration
-    #       CHECK: func @foo(memref<10xf32>, memref<10xf32> {llvm.noalias = true}, memref<10xf32> {readonly = true})
-
-  def testFunctionDeclarationWithAffineAttr(self):
-    self.setUp()
-    a1 = self.module.affine_constant_expr(23)
-    a2 = self.module.affine_constant_expr(44)
-    a3 = self.module.affine_dim_expr(1)
-    s0 = self.module.affine_symbol_expr(0)
-    aMap1 = self.module.affine_map(2, 0, [a1, a2, s0])
-    aMap2 = self.module.affine_constant_map(42)
-    aMap3 = self.module.affine_map(
-        2, 0,
-        [a1 + a2 * a3, a1 // a3 % a2,
-         a1.ceildiv(a2), a1 - 2, a2 * 2, -a3])
-
-    affineAttr1 = self.module.affineMapAttr(aMap1)
-    affineAttr2 = self.module.affineMapAttr(aMap2)
-    affineAttr3 = self.module.affineMapAttr(aMap3)
-
-    t = self.module.make_memref_type(self.f32Type, [10])
-    t_with_attr = t({
-        "affine_attr_1": affineAttr1,
-        "affine_attr_2": affineAttr2,
-        "affine_attr_3": affineAttr3,
-    })
-
-    f = self.module.declare_function("foo", [t, t_with_attr], [])
-    printWithCurrentFunctionName(str(self.module))
-    # CHECK-LABEL: testFunctionDeclarationWithAffineAttr
-    #       CHECK:  func @foo(memref<10xf32>, memref<10xf32> {affine_attr_1 = (d0, d1) -> (23, 44, s0), affine_attr_2 = () -> (42), affine_attr_3 = (d0, d1) -> (d1 * 44 + 23, (23 floordiv d1) mod 44, 1, 21, 88, -d1)})
-
-  def testFunctionDeclarationWithArrayAttr(self):
-    self.setUp()
-    arrayAttr = self.module.arrayAttr([
-        self.module.integerAttr(self.i32Type, 43),
-        self.module.integerAttr(self.i32Type, 33),
-    ])
-    t = self.module.make_memref_type(self.f32Type, [10])
-    t_with_attr = t({"array_attr": arrayAttr})
-
-    f = self.module.declare_function("foo", [t, t_with_attr], [])
-    printWithCurrentFunctionName(str(self.module))
-    # CHECK-LABEL: testFunctionDeclarationWithArrayAttr
-    #       CHECK: func @foo(memref<10xf32>, memref<10xf32> {array_attr = [43 : i32, 33 : i32]})
-
-  def testFunctionMultiple(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []):
-      pass
-    with self.module.function_context("foo", [], []):
-      E.constant_index(0)
-    printWithCurrentFunctionName(str(self.module))
-    # CHECK-LABEL: testFunctionMultiple
-    #       CHECK: func @foo()
-    #       CHECK: func @foo_0()
-    #       CHECK: %{{.*}} = constant 0 : index
-
-  def testIndexCast(self):
-    self.setUp()
-    with self.module.function_context("testIndexCast", [], []):
-      index = E.constant_index(0)
-      E.index_cast(index, self.i32Type)
-    printWithCurrentFunctionName(str(self.module))
-    # CHECK-LABEL: testIndexCast
-    #       CHECK: index_cast %{{.*}} : index to i32
-
-  def testIndexedValue(self):
-    self.setUp()
-    memrefType = self.module.make_memref_type(self.f32Type, [10, 42])
-    with self.module.function_context("indexed", [memrefType],
-                                      [memrefType]) as fun:
-      A = E.IndexedValue(fun.arg(0))
-      cst = E.constant_float(1., self.f32Type)
-      with E.LoopNestContext(
-          [E.constant_index(0), E.constant_index(0)],
-          [E.constant_index(10), E.constant_index(42)], [1, 1]) as (i, j):
-        A.store([i, j], A.load([i, j]) + cst)
-      E.ret([fun.arg(0)])
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testIndexedValue
-    #       CHECK: "affine.for"()
-    #       CHECK: "affine.for"()
-    #       CHECK: "affine.load"
-    #  CHECK-SAME: memref<10x42xf32>
-    #       CHECK:  %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-    #       CHECK:  "affine.store"
-    #  CHECK-SAME:  memref<10x42xf32>
-    #       CHECK: {lower_bound = () -> (0), step = 1 : index, upper_bound = () -> (42)}
-    #       CHECK: {lower_bound = () -> (0), step = 1 : index, upper_bound = () -> (10)}
-
-  def testLoopContext(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      lhs = E.constant_index(0)
-      rhs = E.constant_index(42)
-      with E.LoopContext(lhs, rhs, 1) as i:
-        lhs + rhs + i
-        with E.LoopContext(rhs, rhs + rhs, 2) as j:
-          x = i + j
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testLoopContext
-    #       CHECK: "affine.for"() (
-    #       CHECK:   ^bb{{.*}}(%{{.*}}: index):
-    #       CHECK: "affine.for"(%{{.*}}, %{{.*}}) (
-    #       CHECK: ^bb{{.*}}(%{{.*}}: index):
-    #       CHECK: "affine.apply"(%{{.*}}, %{{.*}}) {map = (d0, d1) -> (d0 + d1)} : (index, index) -> index
-    #       CHECK: {lower_bound = (d0) -> (d0), step = 2 : index, upper_bound = (d0) -> (d0)} : (index, index) -> ()
-    #       CHECK: {lower_bound = () -> (0), step = 1 : index, upper_bound = () -> (42)}
-
-  def testLoopNestContext(self):
-    self.setUp()
-    with self.module.function_context("foo", [], []) as fun:
-      lbs = [E.constant_index(i) for i in range(4)]
-      ubs = [E.constant_index(10 * i + 5) for i in range(4)]
-      with E.LoopNestContext(lbs, ubs, [1, 3, 5, 7]) as (i, j, k, l):
-        i + j + k + l
-    printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testLoopNestContext
-    #       CHECK: "affine.for"() (
-    #       CHECK: ^bb{{.*}}(%{{.*}}: index):
-    #       CHECK: "affine.for"() (
-    #       CHECK: ^bb{{.*}}(%{{.*}}: index):
-    #       CHECK: "affine.for"() (
-    #       CHECK: ^bb{{.*}}(%{{.*}}: index):
-    #       CHECK: "affine.for"() (
-    #       CHECK: ^bb{{.*}}(%{{.*}}: index):
-    #       CHECK: %{{.*}} = "affine.apply"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {map = (d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)} : (index, index, index, index) -> index
-
-  def testMLIRBooleanCompilation(self):
-    self.setUp()
-    m = self.module.make_memref_type(self.boolType, [10])  # i1 tensor
-    with self.module.function_context("mkbooltensor", [m, m], []) as f:
-      input = E.IndexedValue(f.arg(0))
-      output = E.IndexedValue(f.arg(1))
-      zero = E.constant_index(0)
-      ten = E.constant_index(10)
-      with E.LoopNestContext([zero] * 3, [ten] * 3, [1] * 3) as (i, j, k):
-        b1 = (i < j) & (j < k)
-        b2 = ~b1
-        b3 = b2 | (k < j)
-        output.store([i], input.load([i]) & b3)
-      E.ret([])
-    self.module.compile()
-    printWithCurrentFunctionName(str(self.module.get_engine_address() == 0))
-    # CHECK-LABEL: testMLIRBooleanCompilation
-    #       CHECK: False
-
-  def testMLIRFunctionCreation(self):
-    self.setUp()
-    module = E.MLIRModule()
-    t = module.make_type("f32")
-    m = module.make_memref_type(t, [3, 4, -1, 5])
-    printWithCurrentFunctionName(str(t))
-    print(str(m))
-    print(str(module.make_function("copy", [m, m], [])))
-    print(str(module.make_function("sqrtf", [t], [t])))
-    # CHECK-LABEL: testMLIRFunctionCreation
-    #       CHECK:  f32
-    #       CHECK:  memref<3x4x?x5xf32>
-    #       CHECK: func @copy(%{{.*}}: memref<3x4x?x5xf32>, %{{.*}}: memref<3x4x?x5xf32>) {
-    #       CHECK:  func @sqrtf(%{{.*}}: f32) -> f32
-
-  def testMLIRScalarTypes(self):
-    self.setUp()
-    module = E.MLIRModule()
-    printWithCurrentFunctionName(str(module.make_type("bf16")))
-    print(str(module.make_type("f16")))
-    print(str(module.make_type("f32")))
-    print(str(module.make_type("f64")))
-    print(str(module.make_type("i1")))
-    print(str(module.make_type("i8")))
-    print(str(module.make_type("i32")))
-    print(str(module.make_type("i123")))
-    print(str(module.make_type("index")))
-    # CHECK-LABEL: testMLIRScalarTypes
-    #       CHECK:  bf16
-    #       CHECK:  f16
-    #       CHECK:  f32
-    #       CHECK:  f64
-    #       CHECK:  i1
-    #       CHECK:  i8
-    #       CHECK:  i32
-    #       CHECK:  i123
-    #       CHECK:  index
-
-  def testMatrixMultiply(self):
-    self.setUp()
-    memrefType = self.module.make_memref_type(self.f32Type, [32, 32])
-    with self.module.function_context("matmul",
-                                      [memrefType, memrefType, memrefType],
-                                      []) as fun:
-      A = E.IndexedValue(fun.arg(0))
-      B = E.IndexedValue(fun.arg(1))
-      C = E.IndexedValue(fun.arg(2))
-      c0 = E.constant_index(0)
-      c32 = E.constant_index(32)
-      with E.LoopNestContext([c0, c0, c0], [c32, c32, c32],
-                             [1, 1, 1]) as (i, j, k):
-        C.store([i, j], A.load([i, k]) * B.load([k, j]))
-      E.ret([])
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testMatrixMultiply
-    #       CHECK: "affine.for"()
-    #       CHECK: "affine.for"()
-    #       CHECK: "affine.for"()
-    #   CHECK-DAG:  %{{.*}} = "affine.load"
-    #   CHECK-DAG:  %{{.*}} = "affine.load"
-    #       CHECK:  %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-    #       CHECK:  "affine.store"
-    #  CHECK-SAME:  memref<32x32xf32>
-    #       CHECK: {lower_bound = () -> (0), step = 1 : index, upper_bound = () -> (32)} : () -> ()
-    #       CHECK: {lower_bound = () -> (0), step = 1 : index, upper_bound = () -> (32)} : () -> ()
-    #       CHECK: {lower_bound = () -> (0), step = 1 : index, upper_bound = () -> (32)} : () -> ()
-
-  def testRet(self):
-    self.setUp()
-    with self.module.function_context("foo", [],
-                                      [self.indexType, self.indexType]) as fun:
-      c42 = E.constant_index(42)
-      c0 = E.constant_index(0)
-      E.ret([c42, c0])
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testRet
-    #       CHECK:    %{{.*}} = constant 42 : index
-    #       CHECK:    %{{.*}} = constant 0 : index
-    #       CHECK:    return %{{.*}}, %{{.*}} : index, index
-
-  def testSelectOp(self):
-    self.setUp()
-    with self.module.function_context("foo", [self.boolType],
-                                      [self.i32Type]) as fun:
-      a = E.constant_int(42, 32)
-      b = E.constant_int(0, 32)
-      E.ret([E.select(fun.arg(0), a, b)])
-      printWithCurrentFunctionName(str(fun))
-    # CHECK-LABEL: testSelectOp
-    #       CHECK:  %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : i32
-
-  def testType(self):
-    self.setUp()
-    printWithCurrentFunctionName("")
-    with self.module.function_context(
-        "foo", [self.module.make_memref_type(self.f32Type, [10])], []) as fun:
-      c42 = E.constant_int(42, 32)
-      print(str(c42.type()))
-      print(str(fun.arg(0).type()))
-    # CHECK-LABEL: testType
-    #       CHECK:    i32
-    #       CHECK:    memref<10xf32>
-
-
-# Until python 3.6 this cannot be used because the order in the dict is not the
-# order of method declaration.
-def runTests():
-
-  def isTest(attr):
-    return inspect.ismethod(attr) and "EdscTest.setUp " not in str(attr)
-
-  edscTest = EdscTest()
-  tests = sorted(
-      filter(isTest, (getattr(edscTest, attr) for attr in dir(edscTest))),
-      key=lambda x: str(x))
-  for test in tests:
-    test()
-
-
-if __name__ == "__main__":
-  runTests()
diff --git a/third_party/mlir/g3doc/Canonicalization.md b/third_party/mlir/g3doc/Canonicalization.md
deleted file mode 100644
index 642717faa73..00000000000
--- a/third_party/mlir/g3doc/Canonicalization.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# Operation Canonicalization in MLIR
-
-Canonicalization is an important part of compiler IR design: it makes it easier
-to implement reliable compiler transformations and to reason about what is
-better or worse in the code, and it forces interesting discussions about the
-goals of a particular level of IR. Dan Gohman wrote
-[an article](https://sunfishcode.github.io/blog/2018/10/22/Canonicalization.html)
-exploring these issues; it is worth reading if you're not familiar with these
-concepts.
-
-Most compilers have canonicalization passes, and sometimes they have many
-different ones (e.g. instcombine, dag combine, etc in LLVM). Because MLIR is a
-multi-level IR, we can provide a single canonicalization infrastructure and
-reuse it across many different IRs that it represents. This document describes
-the general approach, global canonicalizations performed, and provides sections
-to capture IR-specific rules for reference.
-
-## General Design
-
-MLIR has a single canonicalization pass, which iteratively applies
-canonicalization transformations in a greedy way until the IR converges. These
-transformations are defined by the operations themselves, which allows each
-dialect to define its own set of operations and canonicalizations together.
-
-Some important things to think about w.r.t. canonicalization patterns:
-
-*   Repeated applications of patterns should converge. Unstable or cyclic
-    rewrites will cause infinite loops in the canonicalizer.
-
-*   It is generally better to canonicalize towards operations that have fewer
-    uses of a value when the operands are duplicated, because some patterns only
-    match when a value has a single user. For example, it is generally good to
-    canonicalize "x + x" into "x * 2", because this reduces the number of uses
-    of x by one.
-
-*   It is always good to eliminate operations entirely when possible, e.g. by
-    folding known identities (like "x + 0 = x").
-
-## Globally Applied Rules
-
-These transformations are applied to all levels of IR:
-
-*   Elimination of operations that have no side effects and have no uses.
-
-*   Constant folding - e.g. "(addi 1, 2)" to "3". Constant folding hooks are
-    specified by operations.
-
-*   Move constant operands to commutative binary operators to the right side -
-    e.g. "(addi 4, x)" to "(addi x, 4)".
-
-## Builtin Ops Canonicalizations
-
-These transformations are applied to builtin ops:
-
-*   `constant` ops are uniqued and hoisted into the entry block of the first
-    parent region that is isolated from above, e.g. the entry block of a
-    function.
-*   (TODO) Merge `affine.apply` operations that directly feed each other.
-
-## Standard Ops Canonicalizations
-
-*   Shape folding of `alloc` operations to turn dynamic dimensions into static
-    ones.
-*   Folding `memref_cast` operations into users where possible.
diff --git a/third_party/mlir/g3doc/ConversionToLLVMDialect.md b/third_party/mlir/g3doc/ConversionToLLVMDialect.md
deleted file mode 100644
index 3564d4c86a3..00000000000
--- a/third_party/mlir/g3doc/ConversionToLLVMDialect.md
+++ /dev/null
@@ -1,436 +0,0 @@
-# Conversion to the LLVM Dialect
-
-Conversion from the Standard to the [LLVM Dialect](Dialects/LLVM.md) can be
-performed by the specialized dialect conversion pass by running
-
-```sh
-mlir-opt -convert-std-to-llvm <filename.mlir>
-```
-
-It performs type and operation conversions for a subset of operations from
-standard dialect (operations on scalars and vectors, control flow operations) as
-described in this document. We use the terminology defined by the
-[LLVM IR Dialect description](Dialects/LLVM.md) throughout this document.
-
-[TOC]
-
-## Type Conversion
-
-### Scalar Types
-
-Scalar types are converted to their LLVM counterparts if they exist. The
-following conversions are currently implemented.
-
--   `i*` converts to `!llvm.type<"i*">`
--   `f16` converts to `!llvm.type<"half">`
--   `f32` converts to `!llvm.type<"float">`
--   `f64` converts to `!llvm.type<"double">`
-
-Note: `bf16` type is not supported by LLVM IR and cannot be converted.
-
-### Index Type
-
-Index type is converted to a wrapped LLVM IR integer with bitwidth equal to the
-bitwidth of the pointer size as specified by the
-[data layout](https://llvm.org/docs/LangRef.html#data-layout) of the LLVM module
-[contained](Dialects/LLVM.md#context-and-module-association) in the LLVM Dialect
-object. For example, on x86-64 CPUs it converts to `!llvm.type<"i64">`.
-
-### Vector Types
-
-LLVM IR only supports *one-dimensional* vectors, unlike MLIR where vectors can
-be multi-dimensional. Vector types cannot be nested in either IR. In the
-one-dimensional case, MLIR vectors are converted to LLVM IR vectors of the same
-size with element type converted using these conversion rules. In the
-n-dimensional case, MLIR vectors are converted to (n-1)-dimensional array types
-of one-dimensional vectors.
-
-For example, `vector<4 x f32>` converts to `!llvm.type<"<4 x float>">` and
-`vector<4 x 8 x 16 f32>` converts to `!llvm<"[4 x [8 x <16 x float>]]">`.
-
-### Memref Types
-
-Memref types in MLIR have both static and dynamic information associated with
-them. The dynamic information comprises the buffer pointer as well as sizes and
-strides of any dynamically sized dimensions. Memref types are normalized and
-converted to a descriptor that is only dependent on the rank of the memref. The
-descriptor contains:
-
-1.  the pointer to the data buffer, followed by
-2.  the pointer to properly aligned data payload that the memref indexes,
-    followed by
-3.  a lowered `index`-type integer containing the distance between the beginning
-    of the buffer and the first element to be accessed through the memref,
-    followed by
-4.  an array containing as many `index`-type integers as the rank of the memref:
-    the array represents the size, in number of elements, of the memref along
-    the given dimension. For constant MemRef dimensions, the corresponding size
-    entry is a constant whose runtime value must match the static value,
-    followed by
-5.  a second array containing as many 64-bit integers as the rank of the MemRef:
-    the second array represents the "stride" (in tensor abstraction sense), i.e.
-    the number of consecutive elements of the underlying buffer.
-
-For constant memref dimensions, the corresponding size entry is a constant whose
-runtime value matches the static value. This normalization serves as an ABI for
-the memref type to interoperate with externally linked functions. In the
-particular case of rank `0` memrefs, the size and stride arrays are omitted,
-resulting in a struct containing two pointers + offset.
-
-Examples:
-
-```mlir
-memref<f32> -> !llvm.type<"{ float*, float*, i64 }">
-memref<1 x f32> -> !llvm.type<"{ float*, float*, i64, [1 x i64], [1 x i64] }">
-memref<? x f32> -> !llvm.type<"{ float*, float*, i64, [1 x i64], [1 x i64] }">
-memref<10x42x42x43x123 x f32> -> !llvm.type<"{ float*, float*, i64, [5 x i64], [5 x i64] }">
-memref<10x?x42x?x123 x f32> -> !llvm.type<"{ float*, float*, i64, [5 x i64], [5 x i64]  }">
-
-// Memref types can have vectors as element types
-memref<1x? x vector<4xf32>> -> !llvm.type<"{ <4 x float>*, <4 x float>*, i64, [1 x i64], [1 x i64] }">
-```
-
-If the rank of the memref is unknown at compile time, the Memref is converted to
-an unranked descriptor that contains: 1. a 64-bit integer representing the
-dynamic rank of the memref, followed by 2. a pointer to a ranked memref
-descriptor with the contents listed above.
-
-Dynamic ranked memrefs should be used only to pass arguments to external library
-calls that expect a unified memref type. The called functions can parse any
-unranked memref descriptor by reading the rank and parsing the enclosed ranked
-descriptor pointer.
-
-Examples:
-
-```mlir
-// unranked descriptor
-memref<*xf32> -> !llvm.type<"{i64, i8*}">
-```
-
-**In function signatures,** `memref` is passed as a _pointer_ to the structured
-defined above to comply with the calling convention.
-
-Example:
-
-```mlir
-// A function type with memref as argument
-(memref<?xf32>) -> ()
-// is transformed into the LLVM function with pointer-to-structure argument.
-!llvm.type<"void({ float*, float*, i64, [1 x i64], [1 x i64]}*) ">
-```
-
-### Function Types
-
-Function types get converted to LLVM function types. The arguments are converted
-individually according to these rules. The result types need to accommodate the
-fact that LLVM IR functions always have a return type, which may be a Void type.
-The converted function always has a single result type. If the original function
-type had no results, the converted function will have one result of the wrapped
-`void` type. If the original function type had one result, the converted
-function will have one result converted using these rules. Otherwise, the result
-type will be a wrapped LLVM IR structure type where each element of the
-structure corresponds to one of the results of the original function, converted
-using these rules. In high-order functions, function-typed arguments and results
-are converted to a wrapped LLVM IR function pointer type (since LLVM IR does not
-allow passing functions to functions without indirection) with the pointee type
-converted using these rules.
-
-Examples:
-
-```mlir
-// zero-ary function type with no results.
-() -> ()
-// is converted to a zero-ary function with `void` result
-!llvm.type<"void ()">
-
-// unary function with one result
-(i32) -> (i64)
-// has its argument and result type converted, before creating the LLVM IR function type
-!llvm.type<"i64 (i32)">
-
-// binary function with one result
-(i32, f32) -> (i64)
-// has its arguments handled separately
-!llvm.type<"i64 (i32, float)">
-
-// binary function with two results
-(i32, f32) -> (i64, f64)
-// has its result aggregated into a structure type
-!llvm.type<"{i64, double} (i32, f32)">
-
-// function-typed arguments or results in higher-order functions
-(() -> ()) -> (() -> ())
-// are converted into pointers to functions
-!llvm.type<"void ()* (void ()*)">
-```
-
-## Calling Convention
-
-### Function Signature Conversion
-
-LLVM IR functions are defined by a custom operation. The function itself has a
-wrapped LLVM IR function type converted as described above. The function
-definition operation uses MLIR syntax.
-
-Examples:
-
-```mlir
-// zero-ary function type with no results.
-func @foo() -> ()
-// gets LLVM type void().
-llvm.func @foo() -> ()
-
-// function with one result
-func @bar(i32) -> (i64)
-// gets converted to LLVM type i64(i32).
-func @bar(!llvm.i32) -> !llvm.i64
-
-// function with two results
-func @qux(i32, f32) -> (i64, f64)
-// has its result aggregated into a structure type
-func @qux(!llvm.i32, !llvm.float) -> !llvm.type<"{i64, double}">
-
-// function-typed arguments or results in higher-order functions
-func @quux(() -> ()) -> (() -> ())
-// are converted into pointers to functions
-func @quux(!llvm.type<"void ()*">) -> !llvm.type<"void ()*">
-// the call flow is handled by the LLVM dialect `call` operation supporting both
-// direct and indirect calls
-```
-
-### Result Packing
-
-In case of multi-result functions, the returned values are inserted into a
-structure-typed value before being returned and extracted from it at the call
-site. This transformation is a part of the conversion and is transparent to the
-defines and uses of the values being returned.
-
-Example:
-
-```mlir
-func @foo(%arg0: i32, %arg1: i64) -> (i32, i64) {
-  return %arg0, %arg1 : i32, i64
-}
-func @bar() {
-  %0 = constant 42 : i32
-  %1 = constant 17 : i64
-  %2:2 = call @foo(%0, %1) : (i32, i64) -> (i32, i64)
-  "use_i32"(%2#0) : (i32) -> ()
-  "use_i64"(%2#1) : (i64) -> ()
-}
-
-// is transformed into
-
-func @foo(%arg0: !llvm.type<"i32">, %arg1: !llvm.type<"i64">) -> !llvm.type<"{i32, i64}"> {
-  // insert the vales into a structure
-  %0 = llvm.mlir.undef :  !llvm.type<"{i32, i64}">
-  %1 = llvm.insertvalue %arg0, %0[0] : !llvm.type<"{i32, i64}">
-  %2 = llvm.insertvalue %arg1, %1[1] : !llvm.type<"{i32, i64}">
-
-  // return the structure value
-  llvm.return %2 : !llvm.type<"{i32, i64}">
-}
-func @bar() {
-  %0 = llvm.mlir.constant(42 : i32) : !llvm.type<"i32">
-  %1 = llvm.mlir.constant(17) : !llvm.type<"i64">
-
-  // call and extract the values from the structure
-  %2 = llvm.call @bar(%0, %1) : (%arg0: !llvm.type<"i32">, %arg1: !llvm.type<"i64">) -> !llvm.type<"{i32, i64}">
-  %3 = llvm.extractvalue %2[0] : !llvm.type<"{i32, i64}">
-  %4 = llvm.extractvalue %2[1] : !llvm.type<"{i32, i64}">
-
-  // use as before
-  "use_i32"(%3) : (!llvm.type<"i32">) -> ()
-  "use_i64"(%4) : (!llvm.type<"i64">) -> ()
-}
-```
-
-### Calling Convention for `memref`
-
-For function _arguments_ of `memref` type, ranked or unranked, the type of the
-argument is a _pointer_ to the memref descriptor type defined above. The caller
-of such function is required to store the descriptor in memory and guarantee
-that the storage remains live until the callee returns. The caller can than pass
-the pointer to that memory as function argument. The callee loads from the
-pointers it was passed as arguments in the entry block of the function, making
-the descriptor passed in as argument available for use similarly to
-ocally-defined descriptors.
-
-This convention is implemented in the conversion of `std.func` and `std.call` to
-the LLVM dialect. Conversions from other dialects should take it into account.
-The motivation for this convention is to simplify the ABI for interfacing with
-other LLVM modules, in particular those generated from C sources, while avoiding
-platform-specific aspects until MLIR has a proper ABI modeling.
-
-Example:
-
-```mlir
-
-func @foo(memref<?xf32>) -> () {
-  %c0 = constant 0 : index
-  load %arg0[%c0] : memref<?xf32>
-  return
-}
-
-func @bar(%arg0: index) {
-  %0 = alloc(%arg0) : memref<?xf32>
-  call @foo(%0) : (memref<?xf32>)-> ()
-  return
-}
-
-// Gets converted to the following IR.
-// Accepts a pointer to the memref descriptor.
-llvm.func @foo(!llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">) {
-  // Loads the descriptor so that it can be used similarly to locally
-  // created descriptors.
-  %0 = llvm.load %arg0 : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">
-}
-
-llvm.func @bar(%arg0: !llvm.i64) {
-  // ... Allocation ...
-  // Definition of the descriptor.
-  %7 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }">
-  // ... Filling in the descriptor ...
-  %14 = // The final value of the allocated descriptor.
-  // Allocate the memory for the descriptor and store it.
-  %15 = llvm.mlir.constant(1 : index) : !llvm.i64
-  %16 = llvm.alloca %15 x !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }">
-      : (!llvm.i64) -> !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">
-  llvm.store %14, %16 : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">
-  // Pass the pointer to the function.
-  llvm.call @foo(%16) : (!llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">) -> ()
-  llvm.return
-}
-
-
-
-```
-
-## Repeated Successor Removal
-
-Since the goal of the LLVM IR dialect is to reflect LLVM IR in MLIR, the dialect
-and the conversion procedure must account for the differences between block
-arguments and LLVM IR PHI nodes. In particular, LLVM IR disallows PHI nodes with
-different values coming from the same source. Therefore, the LLVM IR dialect
-disallows operations that have identical successors accepting arguments, which
-would lead to invalid PHI nodes. The conversion process resolves the potential
-PHI source ambiguity by injecting dummy blocks if the same block is used more
-than once as a successor in an instruction. These dummy blocks branch
-unconditionally to the original successors, pass them the original operands
-(available in the dummy block because it is dominated by the original block) and
-are used instead of them in the original terminator operation.
-
-Example:
-
-```mlir
-  cond_br %0, ^bb1(%1 : i32), ^bb1(%2 : i32)
-^bb1(%3 : i32)
-  "use"(%3) : (i32) -> ()
-```
-
-leads to a new basic block being inserted,
-
-```mlir
-  cond_br %0, ^bb1(%1 : i32), ^dummy
-^bb1(%3 : i32):
-  "use"(%3) : (i32) -> ()
-^dummy:
-  br ^bb1(%4 : i32)
-```
-
-before the conversion to the LLVM IR dialect:
-
-```mlir
-  llvm.cond_br  %0, ^bb1(%1 : !llvm.type<"i32">), ^dummy
-^bb1(%3 : !llvm.type<"i32">):
-  "use"(%3) : (!llvm.type<"i32">) -> ()
-^dummy:
-  llvm.br ^bb1(%2 : !llvm.type<"i32">)
-```
-
-## Memref Model
-
-### Memref Descriptor
-
-Within a converted function, a `memref`-typed value is represented by a memref
-_descriptor_, the type of which is the structure type obtained by converting
-from the memref type. This descriptor holds a pointer to a linear buffer storing
-the data, and dynamic sizes of the memref value. It is created by the allocation
-operation and is updated by the conversion operations that may change static
-dimensions into dynamic and vice versa.
-
-Note: LLVM IR conversion does not support `memref`s in non-default memory spaces
-or `memref`s with non-identity layouts.
-
-### Index Linearization
-
-Accesses to a memref element are transformed into an access to an element of the
-buffer pointed to by the descriptor. The position of the element in the buffer
-is calculated by linearizing memref indices in row-major order (lexically first
-index is the slowest varying, similar to C). The computation of the linear
-address is emitted as arithmetic operation in the LLVM IR dialect. Static sizes
-are introduced as constants. Dynamic sizes are extracted from the memref
-descriptor.
-
-Accesses to zero-dimensional memref (that are interpreted as pointers to the
-elemental type) are directly converted into `llvm.load` or `llvm.store` without
-any pointer manipulations.
-
-Examples:
-
-An access to a zero-dimensional memref is converted into a plain load:
-
-```mlir
-// before
-%0 = load %m[] : memref<f32>
-
-// after
-%0 = llvm.load %m : !llvm.type<"float*">
-```
-
-An access to a memref with indices:
-
-```mlir
-%0 = load %m[1,2,3,4] : memref<10x?x13x?xf32>
-```
-
-is transformed into the equivalent of the following code:
-
-```mlir
-// obtain the buffer pointer
-%b = llvm.extractvalue %m[0] : !llvm.type<"{float*, i64, i64}">
-
-// obtain the components for the index
-%sub1 = llvm.mlir.constant(1) : !llvm.type<"i64">  // first subscript
-%sz2 = llvm.extractvalue %m[1]
-    : !llvm.type<"{float*, i64, i64}"> // second size (dynamic, second descriptor element)
-%sub2 = llvm.mlir.constant(2) : !llvm.type<"i64">  // second subscript
-%sz3 = llvm.mlir.constant(13) : !llvm.type<"i64">  // third size (static)
-%sub3 = llvm.mlir.constant(3) : !llvm.type<"i64">  // third subscript
-%sz4 = llvm.extractvalue %m[1]
-    : !llvm.type<"{float*, i64, i64}"> // fourth size (dynamic, third descriptor element)
-%sub4 = llvm.mlir.constant(4) : !llvm.type<"i64">  // fourth subscript
-
-// compute the linearized index
-// %sub4 + %sub3 * %sz4 + %sub2 * (%sz3 * %sz4) + %sub1 * (%sz2 * %sz3 * %sz4) =
-// = ((%sub1 * %sz2 + %sub2) * %sz3 + %sub3) * %sz4 + %sub4
-%idx0 = llvm.mul %sub1, %sz2 : !llvm.type<"i64">
-%idx1 = llvm.add %idx0, %sub : !llvm.type<"i64">
-%idx2 = llvm.mul %idx1, %sz3 : !llvm.type<"i64">
-%idx3 = llvm.add %idx2, %sub3 : !llvm.type<"i64">
-%idx4 = llvm.mul %idx3, %sz4 : !llvm.type<"i64">
-%idx5 = llvm.add %idx4, %sub4 : !llvm.type<"i64">
-
-// obtain the element address
-%a = llvm.getelementptr %b[%idx5] : (!llvm.type<"float*">, !llvm.type<"i64">) -> !llvm.type<"float*">
-
-// perform the actual load
-%0 = llvm.load %a : !llvm.type<"float*">
-```
-
-In practice, the subscript and size extraction will be interleaved with the
-linear index computation. For stores, the address computation code is identical
-and only the actual store operation is different.
-
-Note: the conversion does not perform any sort of common subexpression
-elimination when emitting memref accesses.
diff --git a/third_party/mlir/g3doc/DeclarativeRewrites.md b/third_party/mlir/g3doc/DeclarativeRewrites.md
deleted file mode 100644
index 5adcb320983..00000000000
--- a/third_party/mlir/g3doc/DeclarativeRewrites.md
+++ /dev/null
@@ -1,690 +0,0 @@
-# Table-driven Declarative Rewrite Rule (DRR)
-
-In addition to subclassing the `mlir::RewritePattern` C++ class, MLIR also
-supports defining rewrite rules in a declarative manner. Similar to
-[Op Definition Specification](OpDefinitions.md) (ODS), this is achieved via
-[TableGen][TableGen], which is a language to maintain records of domain-specific
-information. The rewrite rules are specified concisely in a TableGen record,
-which will be expanded into an equivalent `mlir::RewritePattern` subclass at
-compiler build time.
-
-This manual explains in detail all of the available mechanisms for defining
-rewrite rules in such a declarative manner. It aims to be a specification
-instead of a tutorial. Please refer to
-[Quickstart tutorial to adding MLIR graph rewrite](QuickstartRewrites.md) for
-the latter.
-
-Given that declarative rewrite rules depend on op definition specification, this
-manual assumes knowledge of the [ODS](OpDefinitions.md) doc.
-
-## Benefits
-
-Compared to the hand-written C++ classes, this declarative approach has several
-benefits, including but not limited to:
-
-*   **Being declarative**: The pattern creator just needs to state the rewrite
-    pattern declaratively, without worrying about the concrete C++ methods to
-    call.
-*   **Removing boilerplate and showing the very essence of the rewrite**:
-    `mlir::RewritePattern` is already good at hiding boilerplate for defining a
-    rewrite rule. But we still need to write the class and function structures
-    required by the C++ programming language, inspect ops for matching, and call
-    op `build()` methods for constructing. These statements are typically quite
-    simple and similar, so they can be further condensed with auto-generation.
-    Because we reduce the boilerplate to the bare minimum, the declarative
-    rewrite rule will just contain the very essence of the rewrite. This makes
-    it very easy to understand the pattern.
-
-## Strengths and Limitations
-
-The declarative rewrite rule is **operation-based**: it describes a rule to
-match against a directed acyclic graph (DAG) of operations and generate DAGs of
-operations. This gives DRR both its strengths and limitations: it is good at
-expressing op to op conversions, but not that well suited for, say, converting
-an op into a loop nest.
-
-Per the current implementation, DRR does not have good support for the following
-features:
-
-*   Matching and generating ops with regions.
-*   Matching and generating ops with block arguments.
-*   Matching multi-result ops in nested patterns.
-*   Matching and generating variadic operand/result ops in nested patterns.
-*   Packing and unpacking variadic operands/results during generation.
-*   [`NativeCodeCall`](#native-code-call-transforming-the-generated-op)
-    returning more than one results.
-
-## Rule Definition
-
-The core construct for defining a rewrite rule is defined in
-[`OpBase.td`][OpBase] as
-
-```tblgen
-class Pattern<
-    dag sourcePattern, list<dag> resultPatterns,
-    list<dag> additionalConstraints = [],
-    dag benefitsAdded = (addBenefit 0)>;
-```
-
-A declarative rewrite rule contains two main components:
-
-*   A _source pattern_, which is used for matching a DAG of operations.
-*   One or more _result patterns_, which are used for generating DAGs of
-    operations to replace the matched DAG of operations.
-
-We allow multiple result patterns to support
-[multi-result ops](#supporting-multi-result-ops) and
-[auxiliary ops](#supporting-auxiliary-ops), but frequently we just want to
-convert one DAG of operations to another DAG of operations. There is a handy
-wrapper of `Pattern`, `Pat`, which takes a single result pattern:
-
-```tblgen
-class Pat<
-    dag sourcePattern, dag resultPattern,
-    list<dag> additionalConstraints = [],
-    dag benefitsAdded = (addBenefit 0)> :
-  Pattern<sourcePattern, [resultPattern], additionalConstraints, benefitAdded>;
-```
-
-Each pattern is specified as a TableGen `dag` object with the syntax of
-`(operator arg0, arg1, ...)`.
-
-`operator` is typically an MLIR op, but it can also be other
-[directives](#special-directives). `argN` is for matching (if used in source
-pattern) or generating (if used in result pattern) the `N`-th argument for
-`operator`. If the `operator` is some MLIR operation, it means the `N`-th
-argument as specified in the `arguments` list of the op's definition.
-Therefore, we say op argument specification in pattern is **position-based**:
-the position where they appear matters.
-
-`argN` can be a `dag` object itself, thus we can have nested `dag` tree to model
-the def-use relationship between ops.
-
-### Source pattern
-
-The source pattern is for matching a DAG of operations. Arguments in the `dag`
-object are intended to **capture** the op arguments. They can also be used to
-**further limit** the match criteria. The capturing is done by specifying a
-symbol starting with the `$` sign, while further constraints are introduced by
-specifying a `TypeConstraint` (for an operand) or a `AttrConstraint` (for an
-attribute).
-
-#### Binding op arguments and limiting the match
-
-For example,
-
-```tblgen
-def AOp : Op<"a_op"> {
-    let arguments = (ins
-      AnyType:$a_input,
-      AnyAttr:$a_attr
-    );
-
-    let results = (outs
-      AnyType:$a_output
-    );
-}
-
-def : Pat<(AOp $input, F32Attr:$attr), ...>;
-```
-
-In the above, we are matching an `AOp` whose `$input` can be anything valid as
-defined by the op and whose `$attr` must be a float attribute. If the match
-succeeds, we bind the `$input` symbol to the op's only input (`$a_input`) and
-`$attr` to the only attribute (`$a_attr`); we can reference them using `$input`
-and `$attr` in result patterns and additional constraints.
-
-The pattern is position-based: the symbol names used for capturing here do not
-need to match with the op definition as shown in the above example. As another
-example, the pattern can be written as ` def : Pat<(AOp $a, F32Attr:$b), ...>;`
-and use `$a` and `$b` to refer to the captured input and attribute. But using
-the ODS name directly in the pattern is also allowed.
-
-Also note that we only need to add `TypeConstraint` or `AttributeConstraint`
-when we need to further limit the match criteria. If all valid cases to the op
-are acceptable, then we can leave the constraint unspecified.
-
-`$_` is a special symbol to mean ignore capturing an argument. For example,
-`def : Pat<(AOp $_, $b), ...>` means only `$b` is interesting to capture and
-will be referenced later in result patterns. It's still possible to place
-additional constraints even if the symbol is not to be captured; for such case,
-you can simply use just the `TypeConstraint` or `AttributeConstraint` without a
-bound symbol, for example, `def : Pat<(AOp $a, F32Attr), ...>`.
-
-#### Matching DAG of operations
-
-To match an DAG of ops, use nested `dag` objects:
-
-```tblgen
-
-def BOp : Op<"b_op"> {
-    let arguments = (ins);
-
-    let results = (outs
-      AnyType:$b_output
-    );
-}
-
-
-def : Pat<(AOp (BOp), $attr), ...>;
-```
-
-The above pattern matches an `AOp` whose only operand is generated by a `BOp`,
-that is, the following MLIR code:
-
-```mlir
-%0 = "b_op"() : () -> (...)
-%1 = "a_op"(%0) {attr: ...} : () -> (...)
-```
-
-#### Binding op results
-
-To bind a symbol to the results of a matched op for later reference, attach the
-symbol to the op itself:
-
-```tblgen
-def : Pat<(AOp (BOp:$b_result), $attr), ...>;
-```
-
-The above will bind `$b_result` to the matched `BOp`'s result. (There are more
-details regarding multi-result ops, which is covered
-[later](#supporting-multi-result-ops).)
-
-### Result pattern
-
-The result pattern is for generating a DAG of operations. Arguments in the `dag`
-object are intended to **reference** values captured in the source pattern and
-potentially **apply transformations**.
-
-#### Referencing bound symbols
-
-For example,
-
-```tblgen
-def COp : Op<"c_op"> {
-    let arguments = (ins
-      AnyType:$c_input,
-      AnyAttr:$c_attr
-    );
-
-    let results = (outs
-      AnyType:$c_output
-    );
-}
-
-def : Pat<(AOp $input, $attr), (COp $input, $attr)>;
-```
-
-In the above, `AOp`'s only operand and attribute are bound to `$input` and
-`$attr`, respectively. We then reference them in the result pattern for
-generating the `COp` by passing them in as arguments to `COp`'s `build()`
-method.
-
-We can also reference symbols bound to matched op's results:
-
-```tblgen
-def : Pat<(AOp (BOp:$b_result) $attr), (COp $b_result $attr)>;
-```
-
-In the above, we are using `BOp`'s result for building `COp`.
-
-#### Building operations
-
-Given that `COp` was specified with table-driven op definition, there will be
-several `build()` methods generated for it. One of them has aggregated
-parameters for result types, operands, and attributes in the signature: `void
-COp::build(..., ArrayRef<Type> resultTypes, Array<Value *> operands,
-ArrayRef<NamedAttribute> attr)`. The pattern in the above calls this `build()`
-method for constructing the `COp`.
-
-In general, arguments in the result pattern will be passed directly to the
-`build()` method to leverage the auto-generated `build()` method, list them in
-the pattern by following the exact same order as the ODS `arguments` definition.
-Otherwise, a custom `build()` method that matches the argument list is required.
-
-Right now all ODS-generated `build()` methods require specifying the result
-type(s), unless the op has known traits like `SameOperandsAndResultType` that
-we can use to auto-generate a `build()` method with result type deduction.
-When generating an op to replace the result of the matched root op, we can use
-the matched root op's result type when calling the ODS-generated builder.
-Otherwise (e.g., generating an [auxiliary op](#supporting-auxiliary-ops) or
-generating an op with a nested result pattern), DRR will not be able to deduce
-the result type(s). The pattern author will need to define a custom builder
-that has result type deduction ability via `OpBuilder` in ODS. For example,
-in the following pattern
-
-```tblgen
-def : Pat<(AOp $input, $attr), (COp (AOp $input, $attr) $attr)>;
-```
-
-`AOp` is generated via a nested result pattern; DRR won't be able to deduce the
-result type for it. A custom builder for `AOp` should be defined and it should
-deduce the result type by itself. The builder should have the separate parameter
-for each operand and attribute and deduce the result type internally by itself.
-For example, for the above `AOp`, a possible builder is:
-
-```c++
-
-void AOp::build(Builder *builder, OperationState &state,
-                Value *input, Attribute attr) {
-  state.addOperands({input});
-  state.addAttribute("a_attr", attr);
-  Type type = ...; // Deduce result type here
-  state.addTypes({type});
-}
-```
-
-Failing to define such a builder will result in an error at C++ compilation time
-saying the call to `AOp::build()` cannot be resolved because of the number of
-parameters mismatch.
-
-#### Generating DAG of operations
-
-`dag` objects can be nested to generate a DAG of operations:
-
-```tblgen
-def : Pat<(AOp $input, $attr), (COp (BOp), $attr)>;
-```
-
-In the above, we generate a `BOp`, and then use its result to generate the `COp`
-to replace the matched `AOp`.
-
-#### Binding op results
-
-In the result pattern, we can bind to the result(s) of a newly built op by
-attaching symbols to the op. (But we **cannot** bind to op arguments given that
-they are referencing previously bound symbols.) This is useful for reusing
-newly created results where suitable. For example,
-
-```tblgen
-def DOp : Op<"d_op"> {
-    let arguments = (ins
-      AnyType:$d_input1,
-      AnyType:$d_input2,
-    );
-
-    let results = (outs
-      AnyType:$d_output
-    );
-}
-
-def : Pat<(AOp $input, $ignored_attr), (DOp (BOp:$b_result) $b_result)>;
-```
-
-In this pattern, an `AOp` is matched and replaced with a `DOp` whose two
-operands are from the result of a single `BOp`. This is only possible by binding
-the result of the `BOp` to a name and reuse it for the second operand of the
-`DOp`
-
-#### `NativeCodeCall`: transforming the generated op
-
-Sometimes the captured arguments are not exactly what we want so they cannot be
-directly fed in as arguments to build the new op. For such cases, we can apply
-transformations on the arguments by calling into C++ helper functions. This is
-achieved by `NativeCodeCall`.
-
-For example, if we want to capture some op's attributes and group them as an
-array attribute to construct a new op:
-
-```tblgen
-
-def TwoAttrOp : Op<"two_attr_op"> {
-    let arguments = (ins
-      AnyAttr:$op_attr1,
-      AnyAttr:$op_attr2
-    );
-
-    let results = (outs
-      AnyType:$op_output
-    );
-}
-
-def OneAttrOp : Op<"one_attr_op"> {
-    let arguments = (ins
-      ArrayAttr:$op_attr
-    );
-
-    let results = (outs
-      AnyType:$op_output
-    );
-}
-```
-
-We can write a C++ helper function:
-
-```c++
-Attribute createArrayAttr(Builder &builder, Attribute a, Attribute b) {
-  return builder.getArrayAttr({a, b});
-}
-```
-
-And then write the pattern as:
-
-```tblgen
-def createArrayAttr : NativeCodeCall<"createArrayAttr($_builder, $0, $1)">;
-
-def : Pat<(TwoAttrOp $attr1, $attr2),
-          (OneAttrOp (createArrayAttr $attr1, $attr2))>;
-```
-
-And make sure the generated C++ code from the above pattern has access to the
-definition of the C++ helper function.
-
-In the above example, we are using a string to specialize the `NativeCodeCall`
-template. The string can be an arbitrary C++ expression that evaluates into
-some C++ object expected at the `NativeCodeCall` site (here it would be
-expecting an array attribute). Typically the string should be a function call.
-
-Note that currently `NativeCodeCall` must return no more than one value or
-attribute. This might change in the future.
-
-##### `NativeCodeCall` placeholders
-
-In `NativeCodeCall`, we can use placeholders like `$_builder`, `$N`. The former
-is called _special placeholder_, while the latter is called _positional
-placeholder_.
-
-`NativeCodeCall` right now only supports two special placeholders: `$_builder`
-and `$_self`:
-
-*   `$_builder` will be replaced by the current `mlir::PatternRewriter`.
-*   `$_self` will be replaced with the entity `NativeCodeCall` is attached to.
-
-We have seen how `$_builder` can be used in the above; it allows us to pass a
-`mlir::Builder` (`mlir::PatternRewriter` is a subclass of `mlir::OpBuilder`,
-which is a subclass of `mlir::Builder`) to the C++ helper function to use the
-handy methods on `mlir::Builder`.
-
-`$_self` is useful when we want to write something in the form of
-`NativeCodeCall<"...">:$symbol`. For example, if we want to reverse the previous
-example and decompose the array attribute into two attributes:
-
-```tblgen
-class getNthAttr<int n> : NativeCodeCall<"$_self.getValue()[" # n # "]">;
-
-def : Pat<(OneAttrOp $attr),
-          (TwoAttrOp (getNthAttr<0>:$attr), (getNthAttr<1>:$attr)>;
-```
-
-In the above, `$_self` is substituted by the attribute bound by `$attr`, which
-is `OnAttrOp`'s array attribute.
-
-Positional placeholders will be substituted by the `dag` object parameters at
-the `NativeCodeCall` use site. For example, if we define `SomeCall :
-NativeCodeCall<"someFn($1, $2, $0)">` and use it like `(SomeCall $in0, $in1,
-$in2)`, then this will be translated into C++ call `someFn($in1, $in2, $in0)`.
-
-##### Customizing entire op building
-
-`NativeCodeCall` is not only limited to transforming arguments for building an
-op; it can be also used to specify how to build an op entirely. An example:
-
-If we have a C++ function for building an op:
-
-```c++
-Operation *createMyOp(OpBuilder builder, Value *input, Attribute attr);
-```
-
-We can wrap it up and invoke it like:
-
-```tblgen
-def createMyOp : NativeCodeCall<"createMyOp($_builder, $0, $1)">;
-
-def : Pat<(... $input, $attr), (createMyOp $input, $attr)>;
-```
-
-### Supporting auxiliary ops
-
-A declarative rewrite rule supports multiple result patterns. One of the
-purposes is to allow generating _auxiliary ops_. Auxiliary ops are operations
-used for building the replacement ops; but they are not directly used for
-replacement themselves.
-
-For the case of uni-result ops, if there are multiple result patterns, only the
-value generated from the last result pattern will be used to replace the matched
-root op's result; all other result patterns will be considered as generating
-auxiliary ops.
-
-Normally we want to specify ops as nested `dag` objects if their def-use
-relationship can be expressed in the way that an op's result can feed as the
-argument to consuming op. But that is not always possible. For example, if we
-want to allocate memory and store some computation (in pseudocode):
-
-```mlir
-%dst = addi %lhs, %rhs
-```
-
-into
-
-```mlir
-%shape = shape %lhs
-%mem = alloc %shape
-%sum = addi %lhs, %rhs
-store %mem, %sum
-%dst = load %mem
-```
-
-We cannot fit in with just one result pattern given `store` does not return a
-value. Instead we can use multiple result patterns:
-
-```tblgen
-def : Pattern<(AddIOp $lhs, $rhs),
-              [(StoreOp (AllocOp:$mem (ShapeOp %lhs)), (AddIOp $lhs, $rhs)),
-               (LoadOp $mem)];
-```
-
-In the above we use the first result pattern to generate the first four ops, and
-use the last pattern to generate the last op, which is used to replace the
-matched op.
-
-### Supporting multi-result ops
-
-Multi-result ops bring extra complexity to declarative rewrite rules. We use
-TableGen `dag` objects to represent ops in patterns; there is no native way to
-indicate that an op generates multiple results. The approach adopted is based
-on **naming convention**: a `__N` suffix is added to a symbol to indicate the
-`N`-th result.
-
-#### `__N` suffix
-
-The `__N` suffix is specifying the `N`-th result as a whole (which can be
-[variadic](#supporting-variadic-ops)). For example, we can bind a symbol to some
-multi-result op and reference a specific result later:
-
-```tblgen
-def ThreeResultOp : Op<"three_result_op"> {
-    let arguments = (ins ...);
-
-    let results = (outs
-      AnyTensor:$op_output1,
-      AnyTensor:$op_output2,
-      AnyTensor:$op_output3
-    );
-}
-
-def : Pattern<(ThreeResultOp:$results ...),
-              [(... $results__0), ..., (... $results__2), ...]>;
-```
-
-In the above pattern we bind `$results` to all the results generated by
-`ThreeResultOp` and references its `$input1` and `$input3` later in the result
-patterns.
-
-We can also bind a symbol and reference one of its specific result at the same
-time, which is typically useful when generating multi-result ops:
-
-```tblgen
-// TwoResultOp has similar definition as ThreeResultOp, but only has two
-// results.
-
-def : Pattern<(TwoResultOp ...),
-              [(ThreeResultOp:$results__2, ...),
-               (replaceWithValue $results__0)]>;
-```
-
-In the above, we created a `ThreeResultOp` and bind `results` to its results,
-and uses its last result (`$output3`) and first result (`$output1`) to replace
-the `TwoResultOp`'s two results, respectively.
-
-#### Replacing multi-result ops
-
-The above example also shows how to replace a matched multi-result op.
-
-To replace a `N`-result op, the result patterns must generate at least `N`
-declared values (see [Declared vs. actual value](#declared-vs-actual-value) for
-definition). If there are more than `N` declared values generated, only the
-last `N` declared values will be used to replace the matched op. Note that
-because of the existence of multi-result op, one result pattern **may** generate
-multiple declared values. So it means we do not necessarily need `N` result
-patterns to replace an `N`-result op. For example, to replace an op with three
-results, you can have
-
-```tblgen
-// ThreeResultOp/TwoResultOp/OneResultOp generates three/two/one result(s),
-// respectively.
-
-// Replace each result with a result generated from an individual op.
-def : Pattern<(ThreeResultOp ...),
-              [(OneResultOp ...), (OneResultOp ...), (OneResultOp ...)]>;
-
-// Replace the first two results with two results generated from the same op.
-def : Pattern<(ThreeResultOp ...),
-              [(TwoResultOp ...), (OneResultOp ...)]>;
-
-// Replace all three results with three results generated from the same op.
-def : Pat<(ThreeResultOp ...), (ThreeResultOp ...)>;
-
-def : Pattern<(ThreeResultOp ...),
-              [(AuxiliaryOp ...), (ThreeResultOp ...)]>;
-```
-
-But using a single op to serve as both auxiliary op and replacement op is
-forbidden, i.e., the following is not allowed because that the first
-`TwoResultOp` generates two results but only the second result is used for
-replacing the matched op's result:
-
-```tblgen
-def : Pattern<(ThreeResultOp ...),
-              [(TwoResultOp ...), (TwoResultOp ...)]>;
-```
-
-### Supporting variadic ops
-
-#### Declared vs. actual value
-
-Before going into details on variadic op support, we need to define a few terms
-regarding an op's values.
-
-*   _Value_: either an operand or a result
-*   _Declared operand/result/value_: an operand/result/value statically declared
-    in ODS of the op
-*   _Actual operand/result/value_: an operand/result/value of an op instance at
-    runtime
-
-The above terms are needed because ops can have multiple results, and some of the
-results can also be variadic. For example,
-
-```tblgen
-def MultiVariadicOp : Op<"multi_variadic_op"> {
-    let arguments = (ins
-      AnyTensor:$input1,
-      Variadic<AnyTensor>:$input2,
-      AnyTensor:$input3
-    );
-
-    let results = (outs
-      AnyTensor:$output1,
-      Variadic<AnyTensor>:$output2,
-      AnyTensor:$output3
-    );
-}
-```
-
-We say the above op has 3 declared operands and 3 declared results. But at
-runtime, an instance can have 3 values corresponding to `$input2` and 2 values
-correspond to `$output2`; we say it has 5 actual operands and 4 actual
-results. A variadic operand/result is a considered as a declared value that can
-correspond to multiple actual values.
-
-[TODO]
-
-### Supplying additional constraints
-
-Constraints can be placed on op arguments when matching. But sometimes we need
-to also place constraints on the matched op's results or sometimes need to limit
-the matching with some constraints that cover both the arguments and the
-results. The third parameter to `Pattern` (and `Pat`) is for this purpose.
-
-For example, we can write
-
-```tblgen
-def HasNoUseOf: Constraint<
-    CPred<"$_self->use_begin() == $_self->use_end()">, "has no use">;
-
-def HasSameElementType : Constraint<
-    CPred<"$0.cast<ShapedType>().getElementType() == "
-          "$1.cast<ShapedType>().getElementType()">,
-    "has same element type">;
-
-def : Pattern<(TwoResultOp:$results $input),
-              [(...), (...)],
-              [(F32Tensor:$results__0), (HasNoUseOf:$results__1),
-               (HasSameElementShape $results__0, $input)]>;
-```
-
-You can
-
-*   Use normal `TypeConstraint`s on previous bound symbols (the first result of
-    `TwoResultOp` must be a float tensor);
-*   Define new `Constraint` for previous bound symbols (the second result of
-    `TwoResultOp` must has no use);
-*   Apply constraints on multiple bound symbols (`$input` and `TwoResultOp`'s
-    first result must have the same element type).
-
-### Adjusting benefits
-
-The benefit of a `Pattern` is an integer value indicating the benefit of matching
-the pattern. It determines the priorities of patterns inside the pattern rewrite
-driver. A pattern with a higher benefit is applied before one with a lower
-benefit.
-
-In DRR, a rule is set to have a benefit of the number of ops in the source
-pattern. This is based on the heuristics and assumptions that:
-
-*   Larger matches are more beneficial than smaller ones.
-*   If a smaller one is applied first the larger one may not apply anymore.
-
-
-The fourth parameter to `Pattern` (and `Pat`) allows to manually tweak a
-pattern's benefit. Just supply `(addBenefit N)` to add `N` to the benefit value.
-
-## Special directives
-
-[TODO]
-
-## Debugging Tips
-
-### Run `mlir-tblgen` to see the generated content
-
-TableGen syntax sometimes can be obscure; reading the generated content can be
-a very helpful way to understand and debug issues. To build `mlir-tblgen`, run
-`cmake --build . --target mlir-tblgen` in your build directory and find the
-`mlir-tblgen` binary in the `bin/` subdirectory. All the supported generators
-can be found via `mlir-tblgen --help`.
-
-To see the generated code, invoke `mlir-tblgen` with a specific generator by
-providing include paths via `-I`. For example,
-
-```sh
-# To see all the C++ pattern rewrite classes
-mlir-tblgen --gen-rewriters -I /path/to/mlir/include /path/to/input/td/file
-```
-
-### Compilation error: no matching member function for call to 'build'
-
-This is because DRR is failing to call a `build()` method with result type
-deduction ability. See [building operations](#building-operations) for more
-details.
-
-[TableGen]: https://llvm.org/docs/TableGen/index.html
-[OpBase]: https://github.com/tensorflow/mlir/blob/master/include/mlir/IR/OpBase.td
diff --git a/third_party/mlir/g3doc/DefiningAttributesAndTypes.md b/third_party/mlir/g3doc/DefiningAttributesAndTypes.md
deleted file mode 100644
index 60243e5fd57..00000000000
--- a/third_party/mlir/g3doc/DefiningAttributesAndTypes.md
+++ /dev/null
@@ -1,282 +0,0 @@
-# Quickstart tutorial to defining custom dialect attributes and types
-
-This document is a quickstart to defining dialect specific extensions to the
-[attribute](LangRef.md#attributes) and [type system](LangRef.md#type-system).
-The main part of the tutorial focuses on defining types, but the instructions
-are nearly identical for defining attributes.
-
-See [MLIR specification](LangRef.md) for more information about MLIR, the
-structure of the IR, operations, etc.
-
-## Types
-
-Types in MLIR (like attributes, locations, and many other things) are
-value-typed. This means that instances of `Type` should be passed around
-by-value, as opposed to by-pointer or by-reference. The `Type` class in itself
-acts as a wrapper around an internal storage object that is uniqued within an
-instance of an `MLIRContext`.
-
-### Reserving a range of type kinds
-
-Types in MLIR rely on having a unique `kind` value to ensure that casting checks
-remain extremely
-efficient([rationale](Rationale.md#reserving-dialect-type-kinds). For a dialect
-author, this means that a range of type `kind` values must be explicitly, and
-statically, reserved. A dialect can reserve a range of values by adding a new
-entry to the
-[DialectSymbolRegistry](https://github.com/tensorflow/mlir/blob/master/include/mlir/IR/DialectSymbolRegistry.def).
-To support out-of-tree and experimental dialects, the registry predefines a set
-of privates ranges, `PRIVATE_EXPERIMENTAL_[0-9]`, that are free for immediate
-use.
-
-```c++
-DEFINE_SYM_KIND_RANGE(LINALG) // Linear Algebra Dialect
-DEFINE_SYM_KIND_RANGE(TOY)    // Toy language (tutorial) Dialect
-
-// The following ranges are reserved for experimenting with MLIR dialects in a
-// private context without having to register them here.
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_0)
-```
-
-For the sake of this tutorial, we will use the predefined
-`PRIVATE_EXPERIMENTAL_0` range. These definitions will provide a range in the
-Type::Kind enum to use when defining the derived types.
-
-```c++
-namespace MyTypes {
-enum Kinds {
-  // These kinds will be used in the examples below.
-  Simple = Type::Kind::FIRST_PRIVATE_EXPERIMENTAL_0_TYPE,
-  Complex
-};
-}
-```
-
-### Defining the type class
-
-As described above, `Type` objects in MLIR are value-typed and rely on having an
-implicitly internal storage object that holds the actual data for the type. When
-defining a new `Type` it isn't always necessary to define a new storage class.
-So before defining the derived `Type`, it's important to know which of the two
-classes of `Type` we are defining. Some types are `primitives` meaning they do
-not have any parameters and are singletons uniqued by kind, like the
-[`index` type](LangRef.md#index-type). Parametric types on the other hand, have
-additional information that differentiates different instances of the same
-`Type` kind. For example the [`integer` type](LangRef.md#integer-type) has a
-bitwidth, making `i8` and `i16` be different instances of
-[`integer` type](LangRef.md#integer-type).
-
-#### Simple non-parametric types
-
-For simple parameterless types, we can jump straight into defining the derived
-type class. Given that these types are uniqued solely on `kind`, we don't need
-to provide our own storage class.
-
-```c++
-/// This class defines a simple parameterless type. All derived types must
-/// inherit from the CRTP class 'Type::TypeBase'. It takes as template
-/// parameters the concrete type (SimpleType), and the base class to use (Type).
-/// 'Type::TypeBase' also provides several utility methods to simplify type
-/// construction.
-class SimpleType : public Type::TypeBase<SimpleType, Type> {
-public:
-  /// Inherit some necessary constructors from 'TypeBase'.
-  using Base::Base;
-
-  /// This static method is used to support type inquiry through isa, cast,
-  /// and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == MyTypes::Simple; }
-
-  /// This method is used to get an instance of the 'SimpleType'. Given that
-  /// this is a parameterless type, it just needs to take the context for
-  /// uniquing purposes.
-  static SimpleType get(MLIRContext *context) {
-    // Call into a helper 'get' method in 'TypeBase' to get a uniqued instance
-    // of this type.
-    return Base::get(context, MyTypes::Simple);
-  }
-};
-```
-
-#### Parametric types
-
-Parametric types are those that have additional construction or uniquing
-constraints outside of the type `kind`. As such, these types require defining a
-type storage class.
-
-##### Defining a type storage
-
-Type storage objects contain all of the data necessary to construct and unique a
-parametric type instance. The storage classes must obey the following:
-
-*   Inherit from the base type storage class `TypeStorage`.
-*   Define a type alias, `KeyTy`, that maps to a type that uniquely identifies
-    an instance of the parent type.
-*   Provide a construction method that is used to allocate a new instance of the
-    storage class.
-    -   `Storage *construct(TypeStorageAllocator &, const KeyTy &key)`
-*   Provide a comparison method between the storage and `KeyTy`.
-    -   `bool operator==(const KeyTy &) const`
-*   Provide a method to generate the `KeyTy` from a list of arguments passed to
-    the uniquer. (Note: This is only necessary if the `KeyTy` cannot be default
-    constructed from these arguments).
-    -   `static KeyTy getKey(Args...&& args)`
-*   Provide a method to hash an instance of the `KeyTy`. (Note: This is not
-    necessary if an `llvm::DenseMapInfo<KeyTy>` specialization exists)
-    -   `static llvm::hash_code hashKey(const KeyTy &)`
-
-Let's look at an example:
-
-```c++
-/// Here we define a storage class for a ComplexType, that holds a non-zero
-/// integer and an integer type.
-struct ComplexTypeStorage : public TypeStorage {
-  ComplexTypeStorage(unsigned nonZeroParam, Type integerType)
-      : nonZeroParam(nonZeroParam), integerType(integerType) {}
-
-  /// The hash key for this storage is a pair of the integer and type params.
-  using KeyTy = std::pair<unsigned, Type>;
-
-  /// Define the comparison function for the key type.
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(nonZeroParam, integerType);
-  }
-
-  /// Define a hash function for the key type.
-  /// Note: This isn't necessary because std::pair, unsigned, and Type all have
-  /// hash functions already available.
-  static llvm::hash_code hashKey(const KeyTy &key) {
-    return llvm::hash_combine(key.first, key.second);
-  }
-
-  /// Define a construction function for the key type.
-  /// Note: This isn't necessary because KeyTy can be directly constructed with
-  /// the given parameters.
-  static KeyTy getKey(unsigned nonZeroParam, Type integerType) {
-    return KeyTy(nonZeroParam, integerType);
-  }
-
-  /// Define a construction method for creating a new instance of this storage.
-  static ComplexTypeStorage *construct(TypeStorageAllocator &allocator,
-                                       const KeyTy &key) {
-    return new (allocator.allocate<ComplexTypeStorage>())
-        ComplexTypeStorage(key.first, key.second);
-  }
-
-  unsigned nonZeroParam;
-  Type integerType;
-};
-```
-
-##### Type class definition
-
-Now that the storage class has been created, the derived type class can be
-defined. This structure is similar to the
-[simple type](#simple-non-parametric-types), except for a bit more of the
-functionality of `Type::TypeBase` is put to use.
-
-```c++
-/// This class defines a parametric type. All derived types must inherit from
-/// the CRTP class 'Type::TypeBase'. It takes as template parameters the
-/// concrete type (ComplexType), the base class to use (Type), and the storage
-/// class (ComplexTypeStorage). 'Type::TypeBase' also provides several utility
-/// methods to simplify type construction and verification.
-class ComplexType : public Type::TypeBase<ComplexType, Type,
-                                          ComplexTypeStorage> {
-public:
-  /// Inherit some necessary constructors from 'TypeBase'.
-  using Base::Base;
-
-  /// This static method is used to support type inquiry through isa, cast,
-  /// and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == MyTypes::Complex; }
-
-  /// This method is used to get an instance of the 'ComplexType'. This method
-  /// asserts that all of the construction invariants were satisfied. To
-  /// gracefully handle failed construction, getChecked should be used instead.
-  static ComplexType get(MLIRContext *context, unsigned param, Type type) {
-    // Call into a helper 'get' method in 'TypeBase' to get a uniqued instance
-    // of this type. All parameters to the storage class are passed after the
-    // type kind.
-    return Base::get(context, MyTypes::Complex, param, type);
-  }
-
-  /// This method is used to get an instance of the 'ComplexType', defined at
-  /// the given location. If any of the construction invariants are invalid,
-  /// errors are emitted with the provided location and a null type is returned.
-  /// Note: This method is completely optional.
-  static ComplexType getChecked(MLIRContext *context, unsigned param, Type type,
-                                Location location) {
-    // Call into a helper 'getChecked' method in 'TypeBase' to get a uniqued
-    // instance of this type. All parameters to the storage class are passed
-    // after the type kind.
-    return Base::getChecked(location, context, MyTypes::Complex, param, type);
-  }
-
-  /// This method is used to verify the construction invariants passed into the
-  /// 'get' and 'getChecked' methods. Note: This method is completely optional.
-  static LogicalResult verifyConstructionInvariants(
-      llvm::Optional<Location> loc, MLIRContext *context, unsigned param,
-      Type type) {
-    // Our type only allows non-zero parameters.
-    if (param == 0) {
-      if (loc)
-        context->emitError(loc) << "non-zero parameter passed to 'ComplexType'";
-      return failure();
-    }
-    // Our type also expects an integer type.
-    if (!type.isa<IntegerType>()) {
-      if (loc)
-        context->emitError(loc) << "non integer-type passed to 'ComplexType'";
-      return failure();
-    }
-    return success();
-  }
-
-  /// Return the parameter value.
-  unsigned getParameter() {
-    // 'getImpl' returns a pointer to our internal storage instance.
-    return getImpl()->nonZeroParam;
-  }
-
-  /// Return the integer parameter type.
-  IntegerType getParameterType() {
-    // 'getImpl' returns a pointer to our internal storage instance.
-    return getImpl()->integerType;
-  }
-};
-```
-
-### Registering types with a Dialect
-
-Once the dialect types have been defined, they must then be registered with a
-`Dialect`. This is done via similar mechanism to
-[operations](LangRef.md#operations), `addTypes`.
-
-```c++
-struct MyDialect : public Dialect {
-  MyDialect(MLIRContext *context) : Dialect(/*name=*/"mydialect", context) {
-    /// Add these types to the dialect.
-    addTypes<SimpleType, ComplexType>();
-  }
-};
-```
-
-### Parsing and Printing
-
-As a final step after registration, a dialect must override the `printType` and
-`parseType` hooks. These enable native support for roundtripping the type in the
-textual IR.
-
-## Attributes
-
-As stated in the introduction, the process for defining dialect attributes is
-nearly identical to that of defining dialect types. That key difference is that
-the things named `*Type` are generally now named `*Attr`.
-
-*   `Type::TypeBase` -> `Attribute::AttrBase`
-*   `TypeStorageAllocator` -> `AttributeStorageAllocator`
-*   `addTypes` -> `addAttributes`
-
-Aside from that, all of the interfaces for uniquing and storage construction are
-all the same.
diff --git a/third_party/mlir/g3doc/DeveloperGuide.md b/third_party/mlir/g3doc/DeveloperGuide.md
deleted file mode 100644
index 74500995925..00000000000
--- a/third_party/mlir/g3doc/DeveloperGuide.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Developer Guide
-
-This document attempts to describe a few developer policies used in MLIR (such
-as coding standards used) as well as development approach (such as, testing
-methods).
-
-## Style guide
-
-MLIR follows the [LLVM style](https://llvm.org/docs/CodingStandards.html) guide.
-We also adhere to the following (which deviate from or are not specified in the
-LLVM style guide):
-
-*   Adopts [camelBack](https://llvm.org/docs/Proposals/VariableNames.html);
-*   Except for IR units (Region, Block, and Operation), non-nullable output
-    arguments are passed by non-const reference in general.
-*   IR constructs are not designed for [const correctness](UsageOfConst.md).
-*   Do *not* use recursive algorithms if the recursion can't be bounded
-    statically: that is avoid recursion if there is a possible IR input that can
-    trigger a stack overflow (for example traversing use-def chains in a
-    recursive way). At the moment, we tolerate it for the two following cases:
-    *   The nesting of the IR: we use recursion when traversing nested regions.
-    *   Type nesting: recursion may be used for the nesting of composite types.
-*   Follow the `git` conventions for writing a commit message, in particular the
-    first line is the "title", it should be followed by an empty line and an
-    optional description. This [post](https://chris.beams.io/posts/git-commit/)
-    give examples and more details.
-
-Please run clang-format on the files you modified with the `.clang-format`
-configuration file available in the root directory. Check the clang-format
-[documentation](https://clang.llvm.org/docs/ClangFormat.html) for more details
-on integrating it with your development environment. In particular, if clang is
-installed system-wide, running `git clang-format origin/master` will update the
-files in the working directory with the relevant formatting changes; don't
-forget to include those to the commit.
-
-## Pass name and other command line options
-
-To avoid collision between options provided by different dialects, the naming
-convention is to prepend the dialect name to every dialect-specific passes and
-options in general. Options that are specific to a pass should also be prefixed
-with the pass name. For example, the affine dialect provides a loop tiling pass
-that is registered on the command line as `-affine-tile`, and with a tile size
-option that can be set with `-affine-tile-size`.
-
-We also avoid `cl::opt` to provide pass options in favor of the
-[pass options](WritingAPass.md#instance-specific-pass-options) mechanism. This
-allows for these options to be serialized in a pass pipeline description, as
-well as passing different options to multiple instances of a pass in the same
-pipeline.
-
-## Testing guidelines
-
-See here for the [testing guide](TestingGuide.md).
-
-## Guidelines on contributing a new dialect (or important components)
-
-To contribute a dialect (or a major component in MLIR), it is usual to write an
-overview "RFC" (it can be just a few informal paragraphs) and send it to the
-MLIR mailing list. When accepting a new component to MLIR, the community is also
-accepting the burden of maintaining it. The following points should be
-considered when evaluating whether a dialect is a good fit for the core MLIR
-repository:
-
-*   What is the overall goal of the dialect? What is the first implementation
-    milestone?
-*   How does it fit into the MLIR dialect ecosystem?
-    *   Connection: how does it connect to the existing dialects in a
-        compilation pipeline(s)?
-    *   Consolidation: is there already a dialect with a similar goal or
-        matching abstractions; if so, can it be improved instead of adding a new
-        one?
-    *   Reuse: how does it generalize to similar but slightly different
-        use-cases?
-*   What is the community of users that it is serving?
-*   Who are the future contributors/maintainers beyond those who propose the
-    dialect?
-
-On a practical aspect, we will expect the code to follow the other sections of
-this document, with an emphasis on the documentation alongside the source code.
-
-It is prefered to upstream your dialects/components in small incremental patches
-that can be individually reviewed. That is, after the initial RFC has been
-agreed on, we encourage dialects to be built progressively by faster iterations
-in-tree; as long as it is clear they evolve towards their milestones and goals.
-
-We have seen the following broad categories of dialects:
-
-*   Edge dialects that model a representation external to MLIR. Examples include
-    LLVM, SPIR-V dialects, TensorFlow, XLA/HLO, ... Such dialects may be a
-    better fit for the project that contains the original representation instead
-    of being added to the MLIR repository. In particular, because MLIR will not
-    take an external dependency on another project.
-*   Structured Abstraction dialects that generalize common features of several
-    other dialects or introduce a programming model. Generalization is sometimes
-    demonstrated by having several dialects lower to or originate from a new
-    dialect. While additional abstractions may be useful, they should be traded
-    off against the additional complexity of the dialect ecosystem. Examples of
-    abstraction dialects include the GPU and Loop dialects.
-*   Transformation dialects that serve as input/output for program
-    transformations. These dialects are commonly introduced to materialize
-    transformation pre- and post-conditions in the IR, while conditions can be
-    obtained through analysis or through operation semantics. Examples include
-    Affine and Linalg dialects.
-
-While it can be useful to frame the goals of a proposal, this categorization is
-not exhaustive or absolute, and the community is open to discussing any new
-dialect beyond this taxonomy.
diff --git a/third_party/mlir/g3doc/Diagnostics.md b/third_party/mlir/g3doc/Diagnostics.md
deleted file mode 100644
index 69a30942c00..00000000000
--- a/third_party/mlir/g3doc/Diagnostics.md
+++ /dev/null
@@ -1,402 +0,0 @@
-# Introduction and Usage Guide to MLIR's Diagnostics Infrastructure
-
-[TOC]
-
-This document presents an introduction to using and interfacing with MLIR's
-diagnostics infrastructure.
-
-See [MLIR specification](LangRef.md) for more information about MLIR, the
-structure of the IR, operations, etc.
-
-## Source Locations
-
-Source location information is extremely important for any compiler, because it
-provides a baseline for debuggability and error-reporting. MLIR provides several
-different location types depending on the situational need.
-
-### CallSite Location
-
-```
-callsite-location ::= 'callsite' '(' location 'at' location ')'
-```
-
-An instance of this location allows for representing a directed stack of
-location usages. This connects a location of a `callee` with the location of a
-`caller`.
-
-### FileLineCol Location
-
-```
-filelinecol-location ::= string-literal ':' integer-literal ':' integer-literal
-```
-
-An instance of this location represents a tuple of file, line number, and column
-number. This is similar to the type of location that you get from most source
-languages.
-
-### Fused Location
-
-```
-fused-location ::= `fused` fusion-metadata? '[' location (location ',')* ']'
-fusion-metadata ::= '<' attribute-value '>'
-```
-
-An instance of a `fused` location represents a grouping of several other source
-locations, with optional metadata that describes the context of the fusion.
-There are many places within a compiler in which several constructs may be fused
-together, e.g. pattern rewriting, that normally result partial or even total
-loss of location information. With `fused` locations, this is a non-issue.
-
-### Name Location
-
-```
-name-location ::= string-literal ('(' location ')')?
-```
-
-An instance of this location allows for attaching a name to a child location.
-This can be useful for representing the locations of variable, or node,
-definitions.
-
-### Opaque Location
-
-An instance of this location essentially contains a pointer to some data
-structure that is external to MLIR and an optional location that can be used if
-the first one is not suitable. Since it contains an external structure, only the
-optional location is used during serialization.
-
-### Unknown Location
-
-```
-unknown-location ::= `unknown`
-```
-
-Source location information is an extremely integral part of the MLIR
-infrastructure. As such, location information is always present in the IR, and
-must explicitly be set to unknown. Thus an instance of the `unknown` location,
-represents an unspecified source location.
-
-## Diagnostic Engine
-
-The `DiagnosticEngine` acts as the main interface for diagnostics in MLIR. It
-manages the registration of diagnostic handlers, as well as the core API for
-diagnostic emission. Handlers generally take the form of
-`LogicalResult(Diagnostic &)`. If the result is `success`, it signals that the
-diagnostic has been fully processed and consumed. If `failure`, it signals that
-the diagnostic should be propagated to any previously registered handlers. It
-can be interfaced with via an `MLIRContext` instance.
-
-```c++
-DiagnosticEngine engine = ctx->getDiagEngine();
-
-/// Handle the reported diagnostic.
-// Return success to signal that the diagnostic has either been fully processed,
-// or failure if the diagnostic should be propagated to the previous handlers.
-DiagnosticEngine::HandlerID id = engine.registerHandler(
-    [](Diagnostic &diag) -> LogicalResult {
-  bool should_propage_diagnostic = ...;
-  return failure(should_propage_diagnostic);
-});
-
-
-// We can also elide the return value completely, in which the engine assumes
-// that all diagnostics are consumed(i.e. a success() result).
-DiagnosticEngine::HandlerID id = engine.registerHandler([](Diagnostic &diag) {
-  return;
-});
-
-// Unregister this handler when we are done.
-engine.eraseHandler(id);
-```
-
-### Constructing a Diagnostic
-
-As stated above, the `DiagnosticEngine` holds the core API for diagnostic
-emission. A new diagnostic can be emitted with the engine via `emit`. This
-method returns an [InFlightDiagnostic](#inflight-diagnostic) that can be
-modified further.
-
-```c++
-InFlightDiagnostic emit(Location loc, DiagnosticSeverity severity);
-```
-
-Using the `DiagnosticEngine`, though, is generally not the preferred way to emit
-diagnostics in MLIR. [`operation`](LangRef.md#operations) provides utility
-methods for emitting diagnostics:
-
-```c++
-// `emit` methods available in the mlir namespace.
-InFlightDiagnostic emitError/Remark/Warning(Location);
-
-// These methods use the location attached to the operation.
-InFlightDiagnostic Operation::emitError/Remark/Warning();
-
-// This method creates a diagnostic prefixed with "'op-name' op ".
-InFlightDiagnostic Operation::emitOpError();
-```
-
-## Diagnostic
-
-A `Diagnostic` in MLIR contains all of the necessary information for reporting a
-message to the user. A `Diagnostic` essentially boils down to three main
-components:
-
-*   [Source Location](#source-locations)
-*   Severity Level
-    -   Error, Note, Remark, Warning
-*   Diagnostic Arguments
-    -   The diagnostic arguments are used when constructing the output message.
-
-### Appending arguments
-
-One a diagnostic has been constructed, the user can start composing it. The
-output message of a diagnostic is composed of a set of diagnostic arguments that
-have been attached to it. New arguments can be attached to a diagnostic in a few
-different ways:
-
-```c++
-// A few interesting things to use when composing a diagnostic.
-Attribute fooAttr;
-Type fooType;
-SmallVector<int> fooInts;
-
-// Diagnostics can be composed via the streaming operators.
-op->emitError() << "Compose an interesting error: " << fooAttr << ", " << fooType
-                << ", (" << fooInts << ')';
-
-// This could generate something like (FuncAttr:@foo, IntegerType:i32, {0,1,2}):
-"Compose an interesting error: @foo, i32, (0, 1, 2)"
-```
-
-### Attaching notes
-
-Unlike many other compiler frameworks, notes in MLIR cannot be emitted directly.
-They must be explicitly attached to another diagnostic non-note diagnostic. When
-emitting a diagnostic, notes can be directly attached via `attachNote`. When
-attaching a note, if the user does not provide an explicit source location the
-note will inherit the location of the parent diagnostic.
-
-```c++
-// Emit a note with an explicit source location.
-op->emitError("...").attachNote(noteLoc) << "...";
-
-// Emit a note that inherits the parent location.
-op->emitError("...").attachNote() << "...";
-```
-
-## InFlight Diagnostic
-
-Now that [Diagnostics](#diagnostic) have been explained, we introduce the
-`InFlightDiagnostic`. is an RAII wrapper around a diagnostic that is set to be
-reported. This allows for modifying a diagnostic while it is still in flight. If
-it is not reported directly by the user it will automatically report when
-destroyed.
-
-```c++
-{
-  InFlightDiagnostic diag = op->emitError() << "...";
-}  // The diagnostic is automatically reported here.
-```
-
-## Diagnostic Configuration Options
-
-Several options are provided to help control and enhance the behavior of
-diagnostics. These options are listed below:
-
-### Print Operation On Diagnostic
-
-Command Line Flag: `-mlir-print-op-on-diagnostic`
-
-When a diagnostic is emitted on an operation, via `Operation::emitError/...`,
-the textual form of that operation is printed and attached as a note to the
-diagnostic. This option is useful for understanding the current form of an
-operation that may be invalid, especially when debugging verifier failures. An
-example output is shown below:
-
-```shell
-test.mlir:3:3: error: 'module_terminator' op expects parent op 'module'
-  "module_terminator"() : () -> ()
-  ^
-test.mlir:3:3: note: see current operation: "module_terminator"() : () -> ()
-  "module_terminator"() : () -> ()
-  ^
-```
-
-### Print StackTrace On Diagnostic
-
-Command Line Flag: `-mlir-print-stacktrace-on-diagnostic`
-
-When a diagnostic is emitted, attach the current stack trace as a note to the
-diagnostic. This option is useful for understanding which part of the compiler
-generated certain diagnostics. An example output is shown below:
-
-```shell
-test.mlir:3:3: error: 'module_terminator' op expects parent op 'module'
-  "module_terminator"() : () -> ()
-  ^
-test.mlir:3:3: note: diagnostic emitted with trace:
- #0 0x000055dd40543805 llvm::sys::PrintStackTrace(llvm::raw_ostream&) llvm/lib/Support/Unix/Signals.inc:553:11
- #1 0x000055dd3f8ac162 emitDiag(mlir::Location, mlir::DiagnosticSeverity, llvm::Twine const&) /lib/IR/Diagnostics.cpp:292:7
- #2 0x000055dd3f8abe8e mlir::emitError(mlir::Location, llvm::Twine const&) /lib/IR/Diagnostics.cpp:304:10
- #3 0x000055dd3f998e87 mlir::Operation::emitError(llvm::Twine const&) /lib/IR/Operation.cpp:324:29
- #4 0x000055dd3f99d21c mlir::Operation::emitOpError(llvm::Twine const&) /lib/IR/Operation.cpp:652:10
- #5 0x000055dd3f96b01c mlir::OpTrait::HasParent<mlir::ModuleOp>::Impl<mlir::ModuleTerminatorOp>::verifyTrait(mlir::Operation*) /mlir/IR/OpDefinition.h:897:18
- #6 0x000055dd3f96ab38 mlir::Op<mlir::ModuleTerminatorOp, mlir::OpTrait::ZeroOperands, mlir::OpTrait::ZeroResult, mlir::OpTrait::HasParent<mlir::ModuleOp>::Impl, mlir::OpTrait::IsTerminator>::BaseVerifier<mlir::OpTrait::HasParent<mlir::ModuleOp>::Impl<mlir::ModuleTerminatorOp>, mlir::OpTrait::IsTerminator<mlir::ModuleTerminatorOp> >::verifyTrait(mlir::Operation*) /mlir/IR/OpDefinition.h:1052:29
- #  ...
-  "module_terminator"() : () -> ()
-  ^
-```
-
-## Common Diagnostic Handlers
-
-To interface with the diagnostics infrastructure, users will need to register a
-diagnostic handler with the [`DiagnosticEngine`](#diagnostic-engine).
-Recognizing the many users will want the same handler functionality, MLIR
-provides several common diagnostic handlers for immediate use.
-
-### Scoped Diagnostic Handler
-
-This diagnostic handler is a simple RAII class that registers and unregisters a
-given diagnostic handler. This class can be either be used directly, or in
-conjunction with a derived diagnostic handler.
-
-```c++
-// Construct the handler directly.
-MLIRContext context;
-ScopedDiagnosticHandler scopedHandler(&context, [](Diagnostic &diag) {
-  ...
-});
-
-// Use this handler in conjunction with another.
-class MyDerivedHandler : public ScopedDiagnosticHandler {
-  MyDerivedHandler(MLIRContext *ctx) : ScopedDiagnosticHandler(ctx) {
-    // Set the handler that should be RAII managed.
-    setHandler([&](Diagnostic diag) {
-      ...
-    });
-  }
-};
-```
-
-### SourceMgr Diagnostic Handler
-
-This diagnostic handler is a wrapper around an llvm::SourceMgr instance. It
-provides support for displaying diagnostic messages inline with a line of a
-respective source file. This handler will also automatically load newly seen
-source files into the SourceMgr when attempting to display the source line of a
-diagnostic. Example usage of this handler can be seen in the `mlir-opt` tool.
-
-```shell
-$ mlir-opt foo.mlir
-
-/tmp/test.mlir:6:24: error: expected non-function type
-func @foo() -> (index, ind) {
-                       ^
-```
-
-To use this handler in your tool, add the following:
-
-```c++
-SourceMgr sourceMgr;
-MLIRContext context;
-SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context);
-```
-
-### SourceMgr Diagnostic Verifier Handler
-
-This handler is a wrapper around a llvm::SourceMgr that is used to verify that
-certain diagnostics have been emitted to the context. To use this handler,
-annotate your source file with expected diagnostics in the form of:
-
-*   `expected-(error|note|remark|warning) {{ message }}`
-
-A few examples are shown below:
-
-```mlir
-// Expect an error on the same line.
-func @bad_branch() {
-  br ^missing  // expected-error {{reference to an undefined block}}
-}
-
-// Expect an error on an adjacent line.
-func @foo(%a : f32) {
-  // expected-error@+1 {{unknown comparison predicate "foo"}}
-  %result = cmpf "foo", %a, %a : f32
-  return
-}
-
-// Expect an error on the next line that does not contain a designator.
-// expected-remark@below {{remark on function below}}
-// expected-remark@below {{another remark on function below}}
-func @bar(%a : f32)
-
-// Expect an error on the previous line that does not contain a designator.
-func @baz(%a : f32)
-// expected-remark@above {{remark on function above}}
-// expected-remark@above {{another remark on function above}}
-
-```
-
-The handler will report an error if any unexpected diagnostics were seen, or if
-any expected diagnostics weren't.
-
-```shell
-$ mlir-opt foo.mlir
-
-/tmp/test.mlir:6:24: error: unexpected error: expected non-function type
-func @foo() -> (index, ind) {
-                       ^
-
-/tmp/test.mlir:15:4: error: expected remark "expected some remark" was not produced
-// expected-remark {{expected some remark}}
-   ^~~~~~~~~~~~~~~~~~~~~~~~~~
-```
-
-Similarly to the [SourceMgr Diagnostic Handler](#sourcemgr-diagnostic-handler),
-this handler can be added to any tool via the following:
-
-```c++
-SourceMgr sourceMgr;
-MLIRContext context;
-SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, &context);
-```
-
-### Parallel Diagnostic Handler
-
-MLIR is designed from the ground up to be multi-threaded. One important to thing
-to keep in mind when multi-threading is determinism. This means that the
-behavior seen when operating on multiple threads is the same as when operating
-on a single thread. For diagnostics, this means that the ordering of the
-diagnostics is the same regardless of the amount of threads being operated on.
-The ParallelDiagnosticHandler is introduced to solve this problem.
-
-After creating a handler of this type, the only remaining step is to ensure that
-each thread that will be emitting diagnostics to the handler sets a respective
-'orderID'. The orderID corresponds to the order in which diagnostics would be
-emitted when executing synchronously. For example, if we were processing a list
-of operations [a, b, c] on a single-thread. Diagnostics emitted while processing
-operation 'a' would be emitted before those for 'b' or 'c'. This corresponds 1-1
-with the 'orderID'. The thread that is processing 'a' should set the orderID to
-'0'; the thread processing 'b' should set it to '1'; and so on and so forth.
-This provides a way for the handler to deterministically order the diagnostics
-that it receives given the thread that it is receiving on.
-
-A simple example is shown below:
-
-```c++
-MLIRContext *context = ...;
-ParallelDiagnosticHandler handler(context);
-
-// Process a list of operations in parallel.
-std::vector<Operation *> opsToProcess = ...;
-llvm::for_each_n(llvm::parallel::par, 0, opsToProcess.size(),
-                 [&](size_t i) {
-  // Notify the handler that we are processing the i'th operation.
-  handler.setOrderIDForThread(i);
-  auto *op = opsToProcess[i];
-  ...
-
-  // Notify the handler that we are finished processing diagnostics on this
-  // thread.
-  handler.eraseOrderIDForThread();
-});
-```
diff --git a/third_party/mlir/g3doc/DialectConversion.md b/third_party/mlir/g3doc/DialectConversion.md
deleted file mode 100644
index b4e309daf1f..00000000000
--- a/third_party/mlir/g3doc/DialectConversion.md
+++ /dev/null
@@ -1,277 +0,0 @@
-# Dialect Conversion
-
-This document describes a framework in MLIR in which to perform operation
-conversions between, and within dialects. This framework allows for transforming
-illegal operations to those supported by a provided conversion target, via a set
-of pattern-based operation rewriting patterns.
-
-[TOC]
-
-To utilize the framework, a few things must be provided:
-
-*   A [Conversion Target](#conversion-target)
-*   A set of [Rewrite Patterns](#rewrite-pattern-specification)
-*   A [Type Converter](#type-conversion) (Optional)
-
-## Modes of Conversion
-
-When applying a conversion to a set of operations, there are several conversion
-modes that can be selected from:
-
-*   Partial Conversion
-
-    -   A partial conversion will legalize as many operations to the target as
-        possible, but will allow pre-existing operations that were not
-        explicitly marked as `illegal` to remain unconverted. This allows for
-        partially lowering parts of the module in the presence of unknown
-        operations.
-    -   A partial conversion can be applied via `applyPartialConversion`.
-
-*   Full Conversion
-
-    -   A full conversion is only successful if all operations are properly
-        legalized to the given conversion target. This ensures that only known
-        operations will exist after the conversion process.
-    -   A full conversion can be applied via `applyFullConversion`.
-
-*   Analysis Conversion
-
-    -   An analysis conversion will analyze which operations are legalizable to
-        the given conversion target if a conversion were to be applied. Note
-        that no rewrites, or transformations, are actually applied to the input
-        operations.
-    -   An analysis conversion can be applied via `applyAnalysisConversion`.
-
-## Conversion Target
-
-The conversion target is the formal definition of what is considered to be legal
-during the conversion process. The final operations generated by the conversion
-framework must be marked as legal on the `ConversionTarget` for the rewrite to
-be a success. Existing operations need not always be legal, though; see the
-different conversion modes for why. Operations and dialects may be marked with
-any of the provided legality actions below:
-
-*   Legal
-
-    -   This action signals that every instance of a given operation is legal,
-        i.e. any combination of attributes, operands, types, etc. are valid.
-
-*   Dynamic
-
-    -   This action signals that only some instances of a given operation are
-        legal. This allows for defining fine-tune constraints, e.g. saying that
-        `addi` is only legal when operating on 32-bit integers.
-    -   If a specific handler is not provided when setting the action, the
-        target must override the `isDynamicallyLegal` hook provided by
-        `ConversionTarget`.
-
-*   Illegal
-
-    -   This action signals that no instance of a given operation is legal.
-        Operations marked as `illegal` must always be converted for the
-        conversion to be successful. This action also allows for selectively
-        marking specific operations as illegal in an otherwise legal dialect.
-
-An example conversion target is shown below:
-
-```c++
-struct MyTarget : public ConversionTarget {
-  MyTarget(MLIRContext &ctx) : ConversionTarget(ctx) {
-    //--------------------------------------------------------------------------
-    // Marking an operation as Legal:
-
-    /// Mark all operations within the LLVM dialect are legal.
-    addLegalDialects<LLVMDialect>();
-
-    /// Mark `std.constant` op is always legal on this target.
-    addLegalOps<ConstantOp>();
-
-    //--------------------------------------------------------------------------
-    // Marking an operation as dynamically legal.
-
-    /// Mark all operations within Affine dialect have dynamic legality
-    /// constraints.
-    addDynamicallyLegalDialects<AffineDialect>();
-
-    /// Mark `std.return` as dynamically legal.
-    addDynamicallyLegalOp<ReturnOp>();
-
-    /// Mark `std.return` as dynamically legal, but provide a specific legality
-    /// callback.
-    addDynamicallyLegalOp<ReturnOp>([](ReturnOp op) { ... });
-
-    //--------------------------------------------------------------------------
-    // Marking an operation as illegal.
-
-    /// All operations within the GPU dialect are illegal.
-    addIllegalDialect<GPUDialect>();
-
-    /// Mark `std.br` and `std.cond_br` as illegal.
-    addIllegalOp<BranchOp, CondBranchOp>();
-  }
-
-  /// Implement the default legalization handler to handle operations marked as
-  /// dynamically legal that were not provided with an explicit handler.
-  bool isDynamicallyLegal(Operation *op) override { ... }
-};
-```
-
-### Recursive Legality
-
-In some cases, it may be desirable to mark entire regions of operations as
-legal. This provides an additional granularity of context to the concept of
-"legal". The `ConversionTarget` supports marking operations, that were
-previously added as `Legal` or `Dynamic`, as `recursively` legal. Recursive
-legality means that if an operation instance is legal, either statically or
-dynamically, all of the operations nested within are also considered legal. An
-operation can be marked via `markOpRecursivelyLegal<>`:
-
-```c++
-ConversionTarget &target = ...;
-
-/// The operation must first be marked as `Legal` or `Dynamic`.
-target.addLegalOp<MyOp>(...);
-target.addDynamicallyLegalOp<MySecondOp>(...);
-
-/// Mark the operation as always recursively legal.
-target.markOpRecursivelyLegal<MyOp>();
-/// Mark optionally with a callback to allow selective marking.
-target.markOpRecursivelyLegal<MyOp, MySecondOp>([](Operation *op) { ... });
-/// Mark optionally with a callback to allow selective marking.
-target.markOpRecursivelyLegal<MyOp>([](MyOp op) { ... });
-```
-
-## Rewrite Pattern Specification
-
-After the conversion target has been defined, a set of legalization patterns
-must be provided to transform illegal operations into legal ones. The patterns
-supplied here, that do not [require type changes](#conversion-patterns), are the
-same as those described in the
-[quickstart rewrites guide](QuickstartRewrites.md#adding-patterns), but have a
-few additional [restrictions](#restrictions). The patterns provided do not need
-to generate operations that are directly legal on the target. The framework will
-automatically build a graph of conversions to convert non-legal operations into
-a set of legal ones.
-
-As an example, say you define a target that supports one operation: `foo.add`.
-When providing the following patterns: [`bar.add` -> `baz.add`, `baz.add` ->
-`foo.add`], the framework will automatically detect that it can legalize
-`baz.add` -> `foo.add` even though a direct conversion does not exist. This
-means that you don’t have to define a direct legalization pattern for `bar.add`
--> `foo.add`.
-
-### Restrictions
-
-The framework processes operations in topological order, trying to legalize them
-individually. As such, patterns used in the conversion framework have a few
-additional restrictions:
-
-1.  If a pattern matches, it must erase or replace the op it matched on.
-    Operations can *not* be updated in place.
-2.  Match criteria should not be based on the IR outside of the op itself. The
-    preceding ops will already have been processed by the framework (although it
-    may not update uses), and the subsequent IR will not yet be processed. This
-    can create confusion if a pattern attempts to match against a sequence of
-    ops (e.g. rewrite A + B -> C). That sort of rewrite should be performed in a
-    separate pass.
-
-## Type Conversion
-
-It is sometimes necessary as part of a conversion to convert the set types of
-being operated on. In these cases, a `TypeConverter` object may be defined that
-details how types should be converted. The `TypeConverter` is used by patterns
-and by the general conversion infrastructure to convert the signatures of blocks
-and regions.
-
-### Type Converter
-
-As stated above, the `TypeConverter` contains several hooks for detailing how to
-convert types. Several of these hooks are detailed below:
-
-```c++
-class TypeConverter {
- public:
-  /// This hook allows for converting a type. This function should return
-  /// failure if no valid conversion exists, success otherwise. If the new set
-  /// of types is empty, the type is removed and any usages of the existing
-  /// value are expected to be removed during conversion.
-  virtual LogicalResult convertType(Type t, SmallVectorImpl<Type> &results);
-
-  /// This hook simplifies defining 1-1 type conversions. This function returns
-  /// the type to convert to on success, and a null type on failure.
-  virtual Type convertType(Type t);
-
-  /// This hook allows for materializing a conversion from a set of types into
-  /// one result type by generating a cast operation of some kind. The generated
-  /// operation should produce one result, of 'resultType', with the provided
-  /// 'inputs' as operands. This hook must be overridden when a type conversion
-  /// results in more than one type, or if a type conversion may persist after
-  /// the conversion has finished.
-  virtual Operation *materializeConversion(PatternRewriter &rewriter,
-                                           Type resultType,
-                                           ArrayRef<Value *> inputs,
-                                           Location loc);
-};
-```
-
-### Conversion Patterns
-
-When type conversion comes into play, the general Rewrite Patterns can no longer
-be used. This is due to the fact that the operands of the operation being
-matched will not correspond with the operands of the correct type as determined
-by `TypeConverter`. The operation rewrites on type boundaries must thus use a
-special pattern, the `ConversionPattern`. This pattern provides, as an
-additional argument to the `matchAndRewrite` and `rewrite` methods, the set of
-remapped operands corresponding to the desired type. These patterns also utilize
-a special `PatternRewriter`, `ConversionPatternRewriter`, that provides special
-hooks for use with the conversion infrastructure.
-
-```c++
-struct MyConversionPattern : public ConversionPattern {
-  /// The `matchAndRewrite` hooks on ConversionPatterns take an additional
-  /// `operands` parameter, containing the remapped operands of the original
-  /// operation.
-  virtual PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const;
-};
-```
-
-These patterns have the same [restrictions](#restrictions) as the basic rewrite
-patterns used in dialect conversion.
-
-### Region Signature Conversion
-
-From the perspective of type conversion, the entry block to a region is often
-special. The types of the entry block arguments are often tied semantically to
-details on the operation, e.g. FuncOp, AffineForOp, etc. Given this, the
-conversion of the types for this block must be done explicitly via a conversion
-pattern. To convert the signature of a region entry block, a custom hook on the
-ConversionPatternRewriter must be invoked `applySignatureConversion`. A
-signature conversion, `TypeConverter::SignatureConversion`, can be built
-programmatically:
-
-```c++
-class SignatureConversion {
-public:
-    /// Remap an input of the original signature with a new set of types. The
-    /// new types are appended to the new signature conversion.
-    void addInputs(unsigned origInputNo, ArrayRef<Type> types);
-
-    /// Append new input types to the signature conversion, this should only be
-    /// used if the new types are not intended to remap an existing input.
-    void addInputs(ArrayRef<Type> types);
-
-    /// Remap an input of the original signature with a range of types in the
-    /// new signature.
-    void remapInput(unsigned origInputNo, unsigned newInputNo,
-                    unsigned newInputCount = 1);
-
-    /// Remap an input of the original signature to another `replacement`
-    /// value. This drops the original argument.
-    void remapInput(unsigned origInputNo, Value *replacement);
-};
-```
-
-The `TypeConverter` provides several default utilities for signature conversion:
-`convertSignatureArg`/`convertBlockSignature`.
diff --git a/third_party/mlir/g3doc/Dialects/Affine.md b/third_party/mlir/g3doc/Dialects/Affine.md
deleted file mode 100644
index c5dcf6a6790..00000000000
--- a/third_party/mlir/g3doc/Dialects/Affine.md
+++ /dev/null
@@ -1,610 +0,0 @@
-# Affine Dialect
-
-This dialect provides a powerful abstraction for affine operations and analyses.
-
-[TOC]
-
-## Polyhedral Structures
-
-MLIR uses techniques from polyhedral compilation to make dependence analysis and
-loop transformations efficient and reliable. This section introduces some of the
-core concepts that are used throughout the document.
-
-### Dimensions and Symbols
-
-Dimensions and symbols are the two kinds of identifiers that can appear in the
-polyhedral structures, and are always of [`index`](../LangRef.md#index-type)
-type. Dimensions are declared in parentheses and symbols are declared in square
-brackets.
-
-Examples:
-
-```mlir
-// A 2d to 3d affine mapping.
-// d0/d1 are dimensions, s0 is a symbol
-#affine_map2to3 = (d0, d1)[s0] -> (d0, d1 + s0, d1 - s0)
-```
-
-Dimensional identifiers correspond to the dimensions of the underlying structure
-being represented (a map, set, or more concretely a loop nest or a tensor); for
-example, a three-dimensional loop nest has three dimensional identifiers. Symbol
-identifiers represent an unknown quantity that can be treated as constant for a
-region of interest.
-
-Dimensions and symbols are bound to SSA values by various operations in MLIR and
-use the same parenthesized vs square bracket list to distinguish the two.
-
-Syntax:
-
-```
-// Uses of SSA values that are passed to dimensional identifiers.
-dim-use-list ::= `(` ssa-use-list? `)`
-
-// Uses of SSA values that are used to bind symbols.
-symbol-use-list ::= `[` ssa-use-list? `]`
-
-// Most things that bind SSA values bind dimensions and symbols.
-dim-and-symbol-use-list ::= dim-use-list symbol-use-list?
-```
-
-SSA values bound to dimensions and symbols must always have 'index' type.
-
-Example:
-
-```mlir
-#affine_map2to3 = (d0, d1)[s0] -> (d0, d1 + s0, d1 - s0)
-// Binds %N to the s0 symbol in affine_map2to3.
-%x = alloc()[%N] : memref<40x50xf32, #affine_map2to3>
-```
-
-### Restrictions on Dimensions and Symbols
-
-The affine dialect imposes certain restrictions on dimension and symbolic
-identifiers to enable powerful analysis and transformation. A symbolic
-identifier can be bound to an SSA value that is either an argument to the
-function, a value defined at the top level of that function (outside of all
-loops and if operations), the result of a
-[`constant` operation](Standard.md#constant-operation), or the result of an
-[`affine.apply` operation](#affineapply-operation) that recursively takes as
-arguments any symbolic identifiers, or the result of a [`dim`
-operation](Standard.md#dim-operation) on either a memref that is a function
-argument or a memref where the corresponding dimension is either static or a
-dynamic one in turn bound to a symbolic identifier.  Dimensions may be bound not
-only to anything that a symbol is bound to, but also to induction variables of
-enclosing [`affine.for` operations](#affinefor-operation), and the result of an
-[`affine.apply` operation](#affineapply-operation) (which recursively may use
-other dimensions and symbols).
-
-### Affine Expressions
-
-Syntax:
-
-```
-affine-expr ::= `(` affine-expr `)`
-              | affine-expr `+` affine-expr
-              | affine-expr `-` affine-expr
-              | `-`? integer-literal `*` affine-expr
-              | affine-expr `ceildiv` integer-literal
-              | affine-expr `floordiv` integer-literal
-              | affine-expr `mod` integer-literal
-              | `-`affine-expr
-              | bare-id
-              | `-`? integer-literal
-
-multi-dim-affine-expr ::= `(` affine-expr (`,` affine-expr)* `)`
-```
-
-`ceildiv` is the ceiling function which maps the result of the division of its
-first argument by its second argument to the smallest integer greater than or
-equal to that result. `floordiv` is a function which maps the result of the
-division of its first argument by its second argument to the largest integer
-less than or equal to that result. `mod` is the modulo operation: since its
-second argument is always positive, its results are always positive in our
-usage. The `integer-literal` operand for ceildiv, floordiv, and mod is always
-expected to be positive. `bare-id` is an identifier which must have type
-[index](../LangRef.md#index-type). The precedence of operations in an affine
-expression are ordered from highest to lowest in the order: (1)
-parenthesization, (2) negation, (3) modulo, multiplication, floordiv, and
-ceildiv, and (4) addition and subtraction. All of these operators associate from
-left to right.
-
-A _multidimensional affine expression_ is a comma separated list of
-one-dimensional affine expressions, with the entire list enclosed in
-parentheses.
-
-**Context:** An affine function, informally, is a linear function plus a
-constant. More formally, a function f defined on a vector $$\vec{v} \in
-\mathbb{Z}^n$$ is a multidimensional affine function of $$\vec{v}$$ if
-$$f(\vec{v})$$ can be expressed in the form $$M \vec{v} + \vec{c}$$ where $$M$$
-is a constant matrix from $$\mathbb{Z}^{m \times n}$$ and $$\vec{c}$$ is a
-constant vector from $$\mathbb{Z}$$. $$m$$ is the dimensionality of such an
-affine function. MLIR further extends the definition of an affine function to
-allow 'floordiv', 'ceildiv', and 'mod' with respect to positive integer
-constants. Such extensions to affine functions have often been referred to as
-quasi-affine functions by the polyhedral compiler community. MLIR uses the term
-'affine map' to refer to these multidimensional quasi-affine functions. As
-examples, $$(i+j+1, j)$$, $$(i \mod 2, j+i)$$, $$(j, i/4, i \mod 4)$$, $$(2i+1,
-j)$$ are two-dimensional affine functions of $$(i, j)$$, but $$(i \cdot j,
-i^2)$$, $$(i \mod j, i/j)$$ are not affine functions of $$(i, j)$$.
-
-### Affine Maps
-
-Syntax:
-
-```
-affine-map-inline
-   ::= dim-and-symbol-id-lists `->` multi-dim-affine-expr
-```
-
-The identifiers in the dimensions and symbols lists must be unique. These are
-the only identifiers that may appear in 'multi-dim-affine-expr'. Affine maps
-with one or more symbols in its specification are known as "symbolic affine
-maps", and those with no symbols as "non-symbolic affine maps".
-
-**Context:** Affine maps are mathematical functions that transform a list of
-dimension indices and symbols into a list of results, with affine expressions
-combining the indices and symbols. Affine maps distinguish between
-[indices and symbols](#dimensions-and-symbols) because indices are inputs to the
-affine map when the map is called (through an operation such as
-[affine.apply](#affineapply-operation)), whereas symbols are bound when
-the map is established (e.g. when a memref is formed, establishing a
-memory [layout map](../LangRef.md#layout-map)).
-
-Affine maps are used for various core structures in MLIR. The restrictions we
-impose on their form allows powerful analysis and transformation, while keeping
-the representation closed with respect to several operations of interest.
-
-#### Named affine mappings
-
-Syntax:
-
-```
-affine-map-id ::= `#` suffix-id
-
-// Definitions of affine maps are at the top of the file.
-affine-map-def    ::= affine-map-id `=` affine-map-inline
-module-header-def ::= affine-map-def
-
-// Uses of affine maps may use the inline form or the named form.
-affine-map ::= affine-map-id | affine-map-inline
-```
-
-Affine mappings may be defined inline at the point of use, or may be hoisted to
-the top of the file and given a name with an affine map definition, and used by
-name.
-
-Examples:
-
-```mlir
-// Affine map out-of-line definition and usage example.
-#affine_map42 = (d0, d1)[s0] -> (d0, d0 + d1 + s0 floordiv 2)
-
-// Use an affine mapping definition in an alloc operation, binding the
-// SSA value %N to the symbol s0.
-%a = alloc()[%N] : memref<4x4xf32, #affine_map42>
-
-// Same thing with an inline affine mapping definition.
-%b = alloc()[%N] : memref<4x4xf32, (d0, d1)[s0] -> (d0, d0 + d1 + s0 floordiv 2)>
-```
-
-### Semi-affine maps
-
-Semi-affine maps are extensions of affine maps to allow multiplication,
-`floordiv`, `ceildiv`, and `mod` with respect to symbolic identifiers.
-Semi-affine maps are thus a strict superset of affine maps.
-
-Syntax of semi-affine expressions:
-
-```
-semi-affine-expr ::= `(` semi-affine-expr `)`
-                   | semi-affine-expr `+` semi-affine-expr
-                   | semi-affine-expr `-` semi-affine-expr
-                   | symbol-or-const `*` semi-affine-expr
-                   | semi-affine-expr `ceildiv` symbol-or-const
-                   | semi-affine-expr `floordiv` symbol-or-const
-                   | semi-affine-expr `mod` symbol-or-const
-                   | bare-id
-                   | `-`? integer-literal
-
-symbol-or-const ::= `-`? integer-literal | symbol-id
-
-multi-dim-semi-affine-expr ::= `(` semi-affine-expr (`,` semi-affine-expr)* `)`
-```
-
-The precedence and associativity of operations in the syntax above is the same
-as that for [affine expressions](#affine-expressions).
-
-Syntax of semi-affine maps:
-
-```
-semi-affine-map-inline
-   ::= dim-and-symbol-id-lists `->` multi-dim-semi-affine-expr
-```
-
-Semi-affine maps may be defined inline at the point of use, or may be hoisted to
-the top of the file and given a name with a semi-affine map definition, and used
-by name.
-
-```
-semi-affine-map-id ::= `#` suffix-id
-
-// Definitions of semi-affine maps are at the top of file.
-semi-affine-map-def ::= semi-affine-map-id `=` semi-affine-map-inline
-module-header-def ::= semi-affine-map-def
-
-// Uses of semi-affine maps may use the inline form or the named form.
-semi-affine-map ::= semi-affine-map-id | semi-affine-map-inline
-```
-
-### Integer Sets
-
-An integer set is a conjunction of affine constraints on a list of identifiers.
-The identifiers associated with the integer set are separated out into two
-classes: the set's dimension identifiers, and the set's symbolic identifiers.
-The set is viewed as being parametric on its symbolic identifiers. In the
-syntax, the list of set's dimension identifiers are enclosed in parentheses
-while its symbols are enclosed in square brackets.
-
-Syntax of affine constraints:
-
-```
-affine-constraint ::= affine-expr `>=` `0`
-                    | affine-expr `==` `0`
-affine-constraint-conjunction ::= affine-constraint (`,` affine-constraint)*
-```
-
-Integer sets may be defined inline at the point of use, or may be hoisted to the
-top of the file and given a name with an integer set definition, and used by
-name.
-
-```
-integer-set-id ::= `#` suffix-id
-
-integer-set-inline
-   ::= dim-and-symbol-id-lists `:` '(' affine-constraint-conjunction? ')'
-
-// Declarations of integer sets are at the top of the file.
-integer-set-decl ::= integer-set-id `=` integer-set-inline
-
-// Uses of integer sets may use the inline form or the named form.
-integer-set ::= integer-set-id | integer-set-inline
-```
-
-The dimensionality of an integer set is the number of identifiers appearing in
-dimension list of the set. The affine-constraint non-terminals appearing in the
-syntax above are only allowed to contain identifiers from dims and symbols. A
-set with no constraints is a set that is unbounded along all of the set's
-dimensions.
-
-Example:
-
-```mlir
-// A example two-dimensional integer set with two symbols.
-#set42 = (d0, d1)[s0, s1]
-   : (d0 >= 0, -d0 + s0 - 1 >= 0, d1 >= 0, -d1 + s1 - 1 >= 0)
-
-// Inside a Region
-affine.if #set42(%i, %j)[%M, %N] {
-  ...
-}
-```
-
-`d0` and `d1` correspond to dimensional identifiers of the set, while `s0` and
-`s1` are symbol identifiers.
-
-## Operations
-
-#### 'affine.apply' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `affine.apply` affine-map dim-and-symbol-use-list
-```
-
-The `affine.apply` operation applies an
-[affine mapping](#affine-expressions) to a list of SSA values,
-yielding a single SSA value. The number of dimension and symbol arguments to
-affine.apply must be equal to the respective number of dimensional and symbolic
-inputs to the affine mapping; the `affine.apply` operation always returns one
-value. The input operands and result must all have 'index' type.
-
-Example:
-
-```mlir
-#map10 = (d0, d1) -> (d0 floordiv 8 + d1 floordiv 128)
-...
-%1 = affine.apply #map10 (%s, %t)
-
-// Inline example.
-%2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
-```
-
-#### 'affine.for' operation
-
-Syntax:
-
-```
-operation   ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
-                      (`step` integer-literal)? `{` op* `}`
-
-lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
-upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
-shorthand-bound ::= ssa-id | `-`? integer-literal
-```
-
-The `affine.for` operation represents an affine loop nest. It has one region
-containing its body. This region must contain one block that terminates with
-[`affine.terminator`](#affineterminator-operation). *Note:* when `affine.for` is
-printed in custom format, the terminator is omitted. The block has one argument
-of [`index`](../LangRef.md#index-type) type that represents the induction
-variable of the loop.
-
-The `affine.for` operation executes its body a number of times iterating from a
-lower bound to an upper bound by a stride. The stride, represented by `step`, is
-a positive constant integer which defaults to "1" if not present. The lower and
-upper bounds specify a half-open range: the range includes the lower bound but
-does not include the upper bound.
-
-The lower and upper bounds of a `affine.for` operation are represented as an
-application of an affine mapping to a list of SSA values passed to the map. The
-[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
-values as for all bindings of SSA values to dimensions and symbols.
-
-The affine mappings for the bounds may return multiple results, in which case
-the `max`/`min` keywords are required (for the lower/upper bound respectively),
-and the bound is the maximum/minimum of the returned values. There is no
-semantic ambiguity, but MLIR syntax requires the use of these keywords to make
-things more obvious to human readers.
-
-Many upper and lower bounds are simple, so MLIR accepts two custom form
-syntaxes: the form that accepts a single 'ssa-id' (e.g. `%N`) is shorthand for
-applying that SSA value to a function that maps a single symbol to itself, e.g.,
-`()[s]->(s)()[%N]`. The integer literal form (e.g. `-42`) is shorthand for a
-nullary mapping function that returns the constant value (e.g. `()->(-42)()`).
-
-Example showing reverse iteration of the inner loop:
-
-```mlir
-#map57 = (d0)[s0] -> (s0 - d0 - 1)
-
-func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
-  %N = dim %A, 0 : memref<?x?xf32>
-  affine.for %i = 0 to %N step 1 {
-    affine.for %j = 0 to %N {   // implicitly steps by 1
-      %0 = affine.apply #map57(%j)[%N]
-      %tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
-      call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
-    }
-  }
-  return
-}
-```
-
-#### 'affine.if' operation
-
-Syntax:
-
-```
-operation    ::= `affine.if` if-op-cond `{` op* `}` (`else` `{` op* `}`)?
-if-op-cond ::= integer-set dim-and-symbol-use-list
-```
-
-The `affine.if` operation restricts execution to a subset of the loop iteration
-space defined by an integer set (a conjunction of affine constraints). A single
-`affine.if` may end with an optional `else` clause.
-
-The condition of the `affine.if` is represented by an
-[integer set](#integer-sets) (a conjunction of affine constraints),
-and the SSA values bound to the dimensions and symbols in the integer set. The
-[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
-values as for all bindings of SSA values to dimensions and symbols.
-
-The `affine.if` operation contains two regions for the "then" and "else"
-clauses. The latter may be empty (i.e. contain no blocks), meaning the absence
-of the else clause. When non-empty, both regions must contain exactly one block
-terminating with [`affine.terminator`](#affineterminator-operation). *Note:*
-when `affine.if` is printed in custom format, the terminator is omitted. These
-blocks must not have any arguments.
-
-Example:
-
-```mlir
-#set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
-                      d1 - 10 >= 0, s0 - d1 - 9 >= 0)
-func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
-  affine.for %i = 0 to %N {
-     affine.for %j = 0 to %N {
-       %0 = affine.apply #map42(%j)
-       %tmp = call @S1(%X, %i, %0)
-       affine.if #set(%i, %j)[%N] {
-          %1 = affine.apply #map43(%i, %j)
-          call @S2(%tmp, %A, %i, %1)
-       }
-    }
-  }
-  return
-}
-```
-
-#### 'affine.load' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `affine.load` ssa-use `[` multi-dim-affine-map-of-ssa-ids `]` `:` memref-type
-```
-
-The `affine.load` op reads an element from a memref, where the index for each
-memref dimension is an affine expression of loop induction variables and
-symbols. The output of 'affine.load' is a new value with the same type as the
-elements of the memref. An affine expression of loop IVs and symbols must be
-specified for each dimension of the memref. The keyword 'symbol' can be used to
-indicate SSA identifiers which are symbolic.
-
-Example:
-
-```mlir
-
-  Example 1:
-
-    %1 = affine.load %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>
-
-  Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'.
-
-    %1 = affine.load %0[%i0 + symbol(%n), %i1 + symbol(%m)]
-      : memref<100x100xf32>
-
-```
-
-#### 'affine.store' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `affine.store` ssa-use, ssa-use `[` multi-dim-affine-map-of-ssa-ids `]` `:` memref-type
-```
-
-The `affine.store` op writes an element to a memref, where the index for each
-memref dimension is an affine expression of loop induction variables and
-symbols. The 'affine.store' op stores a new value which is the same type as the
-elements of the memref. An affine expression of loop IVs and symbols must be
-specified for each dimension of the memref. The keyword 'symbol' can be used to
-indicate SSA identifiers which are symbolic.
-
-Example:
-
-```mlir
-
-    Example 1:
-
-      affine.store %v0, %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>
-
-    Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'.
-
-      affine.store %v0, %0[%i0 + symbol(%n), %i1 + symbol(%m)]
-        : memref<100x100xf32>
-
-```
-
-#### 'affine.dma_start' operation
-
-Syntax:
-
-```
-operation ::= `affine.dma_Start` ssa-use `[` multi-dim-affine-map-of-ssa-ids `]`, `[` multi-dim-affine-map-of-ssa-ids `]`, `[` multi-dim-affine-map-of-ssa-ids `]`, ssa-use `:` memref-type
-```
-
-The `affine.dma_start` op starts a non-blocking DMA operation that transfers
-data from a source memref to a destination memref. The source and destination
-memref need not be of the same dimensionality, but need to have the same
-elemental type. The operands include the source and destination memref's
-each followed by its indices, size of the data transfer in terms of the
-number of elements (of the elemental type of the memref), a tag memref with
-its indices, and optionally at the end, a stride and a
-number_of_elements_per_stride arguments. The tag location is used by an
-AffineDmaWaitOp to check for completion. The indices of the source memref,
-destination memref, and the tag memref have the same restrictions as any
-affine.load/store. In particular, index for each memref dimension must be an
-affine expression of loop induction variables and symbols.
-The optional stride arguments should be of 'index' type, and specify a
-stride for the slower memory space (memory space with a lower memory space
-id), transferring chunks of number_of_elements_per_stride every stride until
-%num_elements are transferred. Either both or no stride arguments should be
-specified. The value of 'num_elements' must be a multiple of
-'number_of_elements_per_stride'.
-
-
-Example:
-
-```mlir
-
-For example, a DmaStartOp operation that transfers 256 elements of a memref
-'%src' in memory space 0 at indices [%i + 3, %j] to memref '%dst' in memory
-space 1 at indices [%k + 7, %l], would be specified as follows:
-
-  %num_elements = constant 256
-  %idx = constant 0 : index
-  %tag = alloc() : memref<1xi32, 4>
-  affine.dma_start %src[%i + 3, %j], %dst[%k + 7, %l], %tag[%idx],
-    %num_elements :
-      memref<40x128xf32, 0>, memref<2x1024xf32, 1>, memref<1xi32, 2>
-
-  If %stride and %num_elt_per_stride are specified, the DMA is expected to
-  transfer %num_elt_per_stride elements every %stride elements apart from
-  memory space 0 until %num_elements are transferred.
-
-  affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%idx], %num_elements,
-    %stride, %num_elt_per_stride : ...
-
-```
-
-#### 'affine.dma_wait' operation
-
-Syntax:
-
-```
-operation ::= `affine.dma_Start` ssa-use `[` multi-dim-affine-map-of-ssa-ids `]`, `[` multi-dim-affine-map-of-ssa-ids `]`, `[` multi-dim-affine-map-of-ssa-ids `]`, ssa-use `:` memref-type
-```
-
-The `affine.dma_start` op blocks until the completion of a DMA operation
-associated with the tag element '%tag[%index]'. %tag is a memref, and %index
-has to be an index with the same restrictions as any load/store index.
-In particular, index for each memref dimension must be an affine expression of
-loop induction variables and symbols. %num_elements is the number of elements
-associated with the DMA operation. For example:
-
-Example:
-
-```mlir
-
-  affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%index], %num_elements :
-    memref<2048xf32, 0>, memref<256xf32, 1>, memref<1xi32, 2>
-  ...
-  ...
-  affine.dma_wait %tag[%index], %num_elements : memref<1xi32, 2>
-
-```
-
-#### 'affine.min' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `affine.min` affine-map dim-and-symbol-use-list
-```
-
-The `affine.min` operation applies an
-[affine mapping](#affine-expressions) to a list of SSA values, and returns the
-minimum value of all result expressions. The number of dimension and symbol
-arguments to affine.min must be equal to the respective number of dimensional
-and symbolic inputs to the affine mapping; the `affine.min` operation always
-returns one value. The input operands and result must all have 'index' type.
-
-Example:
-
-```mlir
-
-%0 = affine.min (d0)[s0] -> (1000, d0 + 512, s0) (%arg0)[%arg1]
-
-```
-
-#### `affine.terminator` operation
-
-Syntax:
-
-```
-operation ::= `"affine.terminator"() : () -> ()`
-```
-
-Affine terminator is a special terminator operation for blocks inside affine
-loops ([`affine.for`](#affinefor-operation)) and branches
-([`affine.if`](#affineif-operation)). It unconditionally transmits the control
-flow to the successor of the operation enclosing the region.
-
-*Rationale*: bodies of affine operations are [blocks](../LangRef.md#blocks) that
-must have terminators. Loops and branches represent structured control flow and
-should not accept arbitrary branches as terminators.
-
-This operation does _not_ have a custom syntax. However, affine control
-operations omit the terminator in their custom syntax for brevity.
diff --git a/third_party/mlir/g3doc/Dialects/GPU.md b/third_party/mlir/g3doc/Dialects/GPU.md
deleted file mode 100644
index 7dcd8f6053c..00000000000
--- a/third_party/mlir/g3doc/Dialects/GPU.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# GPU Dialect
-
-Note: this dialect is more likely to change than others in the near future; use
-with caution.
-
-This dialect provides middle-level abstractions for launching GPU kernels
-following a programming model similar to that of CUDA or OpenCL. It provides
-abstractions for kernel invocations (and may eventually provide those for device
-management) that are not present at the lower level (e.g., as LLVM IR intrinsics
-for GPUs). Its goal is to abstract away device- and driver-specific
-manipulations to launch a GPU kernel and provide a simple path towards GPU
-execution from MLIR. It may be targeted, for example, by DSLs using MLIR. The
-dialect uses `gpu` as its canonical prefix.
-
-## Memory attribution
-
-Memory buffers are defined at the function level, either in "gpu.launch" or in
-"gpu.func" ops. This encoding makes it clear where the memory belongs and makes
-the lifetime of the memory visible. The memory is only accessible while the
-kernel is launched/the function is currently invoked. The latter is more strict
-than actual GPU implementations but using static memory at the function level is
-just for convenience. It is also always possible to pass pointers to the
-workgroup memory into other functions, provided they expect the correct memory
-space.
-
-The buffers are considered live throughout the execution of the GPU function
-body. The absence of memory attribution syntax means that the function does not
-require special buffers. Rationale: although the underlying models declare
-memory buffers at the module level, we chose to do it at the function level to
-provide some structuring for the lifetime of those buffers; this avoids the
-incentive to use the buffers for communicating between different kernels or
-launches of the same kernel, which should be done through function arguments
-instead; we chose not to use `alloca`-style approach that would require more
-complex lifetime analysis following the principles of MLIR that promote
-structure and representing analysis results in the IR.
-
-## Operations
-
-### `gpu.block_dim`
-
-Returns the number of threads in the thread block (aka the block size) along the
-x, y, or z `dimension`.
-
-Example:
-
-```mlir
-  %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
-```
-
-### `gpu.block_id`
-
-Returns the block id, i.e. the index of the current block within the grid along
-the x, y, or z `dimension`.
-
-Example:
-
-```mlir
-  %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
-```
-
-### `gpu.grid_dim`
-
-Returns the number of thread blocks in the grid along the x, y, or z
-`dimension`.
-
-Example:
-
-```mlir
-  %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
-```
-
-### `gpu.thread_id`
-
-Returns the thread id, i.e. the index of the current thread within the block
-along the x, y, or z `dimension`.
-
-Example:
-
-```mlir
-  %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
-```
-
-### `gpu.yield`
-
-Is a special terminator operation for blocks inside regions in gpu ops. It
-returns values to the immediately enclosing gpu op.
-
-Example:
-
-```mlir
-gpu.yield %f0, %f1 : f32, f32
-```
-
-### `gpu.all_reduce`
-
-The "all_reduce" op reduces the value of every work item across a local
-workgroup. The result is equal for all work items of a workgroup.
-
-For example, both
-
-```mlir
-%1 = "gpu.all_reduce"(%0) ({}) { op = "add" } : (f32) -> (f32)
-%2 = "gpu.all_reduce"(%0) ({
-^bb(%lhs : f32, %rhs : f32):
-  %sum = addf %lhs, %rhs : f32
-  "gpu.yield"(%sum) : (f32) -> ()
-}) : (f32) -> (f32)
-```
-
-compute the sum of each work item's %0 value. The first version specifies the
-accumulation as operation, whereas the second version specifies the accumulation
-as code region. The accumulation operation must either be `add` or `mul`.
-
-Either none or all work items of a workgroup need to execute this op
-in convergence.
-
-### `gpu.barrier`
-
-The "barrier" op synchronizes all work items of a workgroup. It is used
-to coordinate communication between the work items of the workgroup.
-
-```mlir
-gpu.barrier
-```
-
-waits until all work items in the workgroup have reached this point and all
-memory accesses made by these work items prior to the op are visible to all work
-items in the workgroup. Data hazards between work items accessing the same
-memory can be avoided by synchronizing work items in-between these accesses.
-
-Either none or all work items of a workgroup need to execute this op
-in convergence.
diff --git a/third_party/mlir/g3doc/Dialects/LLVM.md b/third_party/mlir/g3doc/Dialects/LLVM.md
deleted file mode 100644
index 00d0fa02fec..00000000000
--- a/third_party/mlir/g3doc/Dialects/LLVM.md
+++ /dev/null
@@ -1,429 +0,0 @@
-# LLVM IR Dialect
-
-This dialect wraps the LLVM IR types and instructions into MLIR types and
-operations. It provides several additional operations that are necessary to
-cover for the differences in the IR structure (e.g., MLIR does not have `phi`
-operations and LLVM IR does not have a `constant` operation).
-
-In this document, we use "LLVM IR" to designate the
-[intermediate representation of LLVM](https://llvm.org/docs/LangRef.html) and
-"LLVM IR _dialect_" to refer to the MLIR dialect reflecting LLVM instructions
-and types.
-
-[TOC]
-
-## Context and Module Association
-
-The LLVM IR dialect object _contains_ an LLVM Context and an LLVM Module that it
-uses to define, print, parse and manage LLVM IR types. These objects can be
-obtained from the dialect object using `.getLLVMContext()` and
-`getLLVMModule()`. All LLVM IR objects that interact with the LLVM IR dialect
-must exist in the dialect's context.
-
-## Types
-
-The LLVM IR dialect defines a single MLIR type, `LLVM::LLVMType`, that can wrap
-any existing LLVM IR type. Its syntax is as follows
-
-```
-type ::= `!llvm<"` llvm-canonical-type `">
-llvm-canonical-type ::= <canonical textual representation defined by LLVM>
-```
-
-For example, one can use primitive types `!llvm.i32`, pointer types
-`!llvm<"i8*">`, vector types `!llvm<"<4 x float>">` or structure types
-`!llvm<"{i32, float}">`. The parsing and printing of the canonical form is
-delegated to the LLVM assembly parser and printer.
-
-LLVM IR dialect types contain an `llvm::Type*` object that can be obtained by
-calling `.getUnderlyingType()` and used in LLVM API calls directly. These
-objects are allocated within the LLVM context associated with the LLVM IR
-dialect and may be linked to the properties of the associated LLVM module.
-
-LLVM IR dialect type can be constructed from any `llvm::Type*` that is
-associated with the LLVM context of the dialect. In this document, we use the
-term "wrapped LLVM IR type" to refer to the LLVM IR dialect type containing a
-specific LLVM IR type.
-
-## Operations
-
-All operations in the LLVM IR dialect have a custom form in MLIR. The mnemonic
-of an operation is that used in LLVM IR prefixed with "`llvm.`".
-
-### LLVM functions
-
-MLIR functions are defined by an operation that is not built into the IR itself.
-The LLVM IR dialect provides an `llvm.func` operation to define functions
-compatible with LLVM IR. These functions have wrapped LLVM IR function type but
-use MLIR syntax to express it. They are required to have exactly one result
-type. LLVM function operation is intended to capture additional properties of
-LLVM functions, such as linkage and calling convention, that may be modeled
-differently by the built-in MLIR function.
-
-```mlir
-// The type of @bar is !llvm<"i64 (i64)">
-llvm.func @bar(%arg0: !llvm.i64) -> !llvm.i64 {
-  llvm.return %arg0 : !llvm.i64
-}
-
-// Type type of @foo is !llvm<"void (i64)">
-// !llvm.void type is omitted
-llvm.func @foo(%arg0: !llvm.i64) {
-  llvm.return
-}
-
-// A function with `internal` linkage.
-llvm.func internal @internal_func() {
-  llvm.return
-}
-
-```
-
-### LLVM IR operations
-
-The following operations are currently supported. The semantics of these
-operations corresponds to the semantics of the similarly-named LLVM IR
-instructions.
-
-#### Integer binary arithmetic operations
-
-Take two arguments of wrapped LLVM IR integer type, produce one value of the
-same type.
-
--   `add`
--   `sub`
--   `mul`
--   `udiv`
--   `sdiv`
--   `urem`
--   `srem`
-
-Examples:
-
-```mlir
-// Integer addition.
-%0 = llvm.add %a, %b : !llvm.i32
-
-// Unsigned integer division.
-%1 = llvm.udiv %a, %b : !llvm.i32
-```
-
-#### Floating point binary arithmetic operations
-
-Take two arguments of wrapped LLVM IR floating point type, produce one value of
-the same type.
-
--   `fadd`
--   `fsub`
--   `fmul`
--   `fdiv`
--   `frem`
-
-Examples:
-
-```mlir
-// Float addition.
-%0 = llvm.fadd %a, %b : !llvm.float
-
-// Float division.
-%1 = llvm.fdiv %a, %b : !llvm.float
-```
-
-#### Memory-related operations
-
--   `<r> = alloca <size> x <type>`
--   `<r> = getelementptr <address>[<index> (, <index>)+]`
--   `<r> = load <address>`
--   `store <value>, <address>`
-
-In these operations, `<size>` must be a value of wrapped LLVM IR integer type,
-`<address>` must be a value of wrapped LLVM IR pointer type, and `<value>` must
-be a value of wrapped LLVM IR type that corresponds to the pointer type of
-`<address>`.
-
-The `index` operands are integer values whose semantics is identical to the
-non-pointer arguments of LLVM IR's `getelementptr`.
-
-Examples:
-
-```mlir
-// Allocate an array of 4 floats on stack
-%c4 = llvm.mlir.constant(4) : !llvm.i64
-%0 = llvm.alloca %c4 x !llvm.float : (!llvm.i64) -> !llvm<"float*">
-
-// Get the second element of the array (note 0-based indexing).
-%c1 = llvm.mlir.constant(1) : !llvm.i64
-%1 = llvm.getelementptr %0[%c1] : (!llvm<"float*">, !llvm.i64)
-                                   -> !llvm<"float*">
-
-// Store a constant into this element.
-%cf = llvm.mlir.constant(42.0 : f32) : !llvm.float
-llvm.store %cf, %1 : !llvm<"float*">
-
-// Load the value from this element.
-%3 = llvm.load %1 : !llvm<"float*">
-```
-
-#### Operations on values of aggregate type.
-
--   `<value> = extractvalue <struct>[<index> (, <index>)+]`
--   `<struct> = insertvalue <value>, <struct>[<index> (, <index>)+]`
-
-In these operations, `<struct>` must be a value of wrapped LLVM IR structure
-type and `<value>` must be a value that corresponds to one of the (nested)
-structure element types.
-
-Note the use of integer literals to designate subscripts, which is made possible
-by `extractvalue` and `insertvalue` must have constant subscripts. Internally,
-they are modeled as array attributes.
-
-Examples:
-
-```mlir
-// Get the value third element of the second element of a structure.
-%0 = llvm.extractvalue %s[1, 2] : !llvm<"{i32, {i1, i8, i16}">
-
-// Insert the value to the third element of the second element of a structure.
-// Note that this returns a new structure-typed value.
-%1 = llvm.insertvalue %0, %s[1, 2] : !llvm<"{i32, {i1, i8, i16}">
-```
-
-#### Terminator operations.
-
-Branch operations:
-
--   `br [<successor>(<operands>)]`
--   `cond_br <condition> [<true-successor>(<true-operands>),`
-    `<false-successor>(<false-operands>)]`
-
-In order to comply with MLIR design, branch operations in the LLVM IR dialect
-pass arguments to basic blocks. Successors must be valid block MLIR identifiers
-and operand lists for each of them must have the same types as the arguments of
-the respective blocks. `<condition>` must be a wrapped LLVM IR `i1` type.
-
-Since LLVM IR uses the name of the predecessor basic block to identify the
-sources of a PHI node, it is invalid for two entries of the PHI node to indicate
-different values coming from the same block. Therefore, `cond_br` in the LLVM IR
-dialect disallows its successors to be the same block _if_ this block has
-arguments.
-
-Examples:
-
-```mlir
-// Branch without arguments.
-^bb0:
-  llvm.br ^bb0
-
-// Branch and pass arguments.
-^bb1(%arg: !llvm.i32):
-  llvm.br ^bb1(%arg : !llvm.i32)
-
-// Conditionally branch and pass arguments to one of the blocks.
-llvm.cond_br %cond, ^bb0, %bb1(%arg : !llvm.i32)
-
-// It's okay to use the same block without arguments, but probably useless.
-llvm.cond_br %cond, ^bb0, ^bb0
-
-// ERROR: Passing different arguments to the same block in a conditional branch.
-llvm.cond_br %cond, ^bb1(%0 : !llvm.i32), ^bb1(%1 : !llvm.i32)
-
-```
-
-Call operations:
-
--   `<r> = call(<operands>)`
--   `call(<operands>)`
-
-In LLVM IR, functions may return either 0 or 1 value. LLVM IR dialect implements
-this behavior by providing a variadic `call` operation for 0- and 1-result
-functions. Even though MLIR supports multi-result functions, LLVM IR dialect
-disallows them.
-
-The `call` instruction supports both direct and indirect calls. Direct calls
-start with a function name (`@`-prefixed) and indirect calls start with an SSA
-value (`%`-prefixed). The direct callee, if present, is stored as a function
-attribute `callee`. The trailing type of the instruction is always the MLIR
-function type, which may be different from the indirect callee that has the
-wrapped LLVM IR function type.
-
-Examples:
-
-```mlir
-// Direct call without arguments and with one result.
-%0 = llvm.call @foo() : () -> (!llvm.float)
-
-// Direct call with arguments and without a result.
-llvm.call @bar(%0) : (!llvm.float) -> ()
-
-// Indirect call with an argument and without a result.
-llvm.call %1(%0) : (!llvm.float) -> ()
-```
-
-#### Miscellaneous operations.
-
-Integer comparisons: `icmp "predicate" <lhs>, <rhs>`. The following predicate
-values are supported:
-
--   `eq` - equality comparison;
--   `ne` - inequality comparison;
--   `slt` - signed less-than comparison
--   `sle` - signed less-than-or-equal comparison
--   `sgt` - signed greater-than comparison
--   `sge` - signed greater-than-or-equal comparison
--   `ult` - unsigned less-than comparison
--   `ule` - unsigned less-than-or-equal comparison
--   `ugt` - unsigned greater-than comparison
--   `uge` - unsigned greater-than-or-equal comparison
-
-Bitwise reinterpretation: `bitcast <value>`.
-
-Selection: `select <condition>, <lhs>, <rhs>`.
-
-### Auxiliary MLIR operations
-
-These operations do not have LLVM IR counterparts but are necessary to map LLVM
-IR into MLIR. They should be prefixed with `llvm.mlir`.
-
-#### `llvm.mlir.addressof`
-
-Creates an SSA value containing a pointer to a global variable or constant
-defined by `llvm.mlir.global`. The global value can be defined after its first
-referenced. If the global value is a constant, storing into it is not allowed.
-
-Examples:
-
-```mlir
-func @foo() {
-  // Get the address of a global.
-  %0 = llvm.mlir.addressof @const : !llvm<"i32*">
-
-  // Use it as a regular pointer.
-  %1 = llvm.load %0 : !llvm<"i32*">
-}
-
-// Define the global.
-llvm.mlir.global @const(42 : i32) : !llvm.i32
-```
-
-#### `llvm.mlir.constant`
-
-Unlike LLVM IR, MLIR does not have first-class constant values. Therefore, all
-constants must be created as SSA values before being used in other operations.
-`llvm.mlir.constant` creates such values for scalars and vectors. It has a
-mandatory `value` attribute, which may be an integer, floating point attribute;
-dense or sparse attribute containing integers or floats. The type of the
-attribute is one the corresponding MLIR standard types. It may be omitted for
-`i64` and `f64` types that are implied. The operation produces a new SSA value
-of the specified LLVM IR dialect type. The type of that value _must_ correspond
-to the attribute type converted to LLVM IR.
-
-Examples:
-
-```mlir
-// Integer constant, internal i32 is mandatory
-%0 = llvm.mlir.constant(42 : i32) : !llvm.i32
-
-// It's okay to omit i64.
-%1 = llvm.mlir.constant(42) : !llvm.i64
-
-// Floating point constant.
-%2 = llvm.mlir.constant(42.0 : f32) : !llvm.float
-
-// Splat dense vector constant.
-%3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : !llvm<"<4 x float>">
-```
-
-#### `llvm.mlir.global`
-
-Since MLIR allows for arbitrary operations to be present at the top level,
-global variables are defined using the `llvm.mlir.global` operation. Both global
-constants and variables can be defined, and the value may also be initialized in
-both cases.
-
-There are two forms of initialization syntax. Simple constants that can be
-represented as MLIR attributes can be given in-line:
-
-```mlir
-llvm.mlir.global @variable(32.0 : f32) : !llvm.float
-```
-
-This initialization and type syntax is similar to `llvm.mlir.constant` and may
-use two types: one for MLIR attribute and another for the LLVM value. These
-types must be compatible.
-
-More complex constants that cannot be represented as MLIR attributes can be
-given in an initializer region:
-
-```mlir
-// This global is initialized with the equivalent of:
-//   i32* getelementptr (i32* @g2, i32 2)
-llvm.mlir.global constant @int_gep() : !llvm<"i32*"> {
-  %0 = llvm.mlir.addressof @g2 : !llvm<"i32*">
-  %1 = llvm.mlir.constant(2 : i32) : !llvm.i32
-  %2 = llvm.getelementptr %0[%1] : (!llvm<"i32*">, !llvm.i32) -> !llvm<"i32*">
-  // The initializer region must end with `llvm.return`.
-  llvm.return %2 : !llvm<"i32*">
-}
-```
-
-Only one of the initializer attribute or initializer region may be provided.
-
-`llvm.mlir.global` must appear at top-level of the enclosing module. It uses an
-@-identifier for its value, which will be uniqued by the module with respect to
-other @-identifiers in it.
-
-Examples:
-
-```mlir
-// Global values use @-identifiers.
-llvm.mlir.global constant @cst(42 : i32) : !llvm.i32
-
-// Non-constant values must also be initialized.
-llvm.mlir.global @variable(32.0 : f32) : !llvm.float
-
-// Strings are expected to be of wrapped LLVM i8 array type and do not
-// automatically include the trailing zero.
-llvm.mlir.global @string("abc") : !llvm<"[3 x i8]">
-
-// For strings globals, the trailing type may be omitted.
-llvm.mlir.global constant @no_trailing_type("foo bar")
-
-// A complex initializer is constructed with an initializer region.
-llvm.mlir.global constant @int_gep() : !llvm<"i32*"> {
-  %0 = llvm.mlir.addressof @g2 : !llvm<"i32*">
-  %1 = llvm.mlir.constant(2 : i32) : !llvm.i32
-  %2 = llvm.getelementptr %0[%1] : (!llvm<"i32*">, !llvm.i32) -> !llvm<"i32*">
-  llvm.return %2 : !llvm<"i32*">
-}
-```
-
-#### `llvm.mlir.null`
-
-Unlike LLVM IR, MLIR does not have first-class null pointers. They must be
-explicitly created as SSA values using `llvm.mlir.null`. This operation has
-operands or attributes, and returns a null value of a wrapped LLVM IR pointer
-type.
-
-Examples:
-
-```mlir
-// Null pointer to i8 value.
-%0 = llvm.mlir.null : !llvm<"i8*">
-
-// Null pointer to a function with signature void() value.
-%1 = llvm.mlir.null : !llvm<"void()*">
-```
-
-#### `llvm.mlir.undef`
-
-Unlike LLVM IR, MLIR does not have first-class undefined values. Such values
-must be created as SSA values using `llvm.mlir.undef`. This operation has no
-operands or attributes. It creates an undefined value of the specified LLVM IR
-dialect type wrapping an LLVM IR structure type.
-
-Example:
-
-```mlir
-// Create a structure with a 32-bit integer followed by a float.
-%0 = llvm.mlir.undef : !llvm<"{i32, float}">
-```
diff --git a/third_party/mlir/g3doc/Dialects/SPIR-V.md b/third_party/mlir/g3doc/Dialects/SPIR-V.md
deleted file mode 100644
index 1413b181407..00000000000
--- a/third_party/mlir/g3doc/Dialects/SPIR-V.md
+++ /dev/null
@@ -1,490 +0,0 @@
-# SPIR-V Dialect
-
-This document defines the SPIR-V dialect in MLIR.
-
-[SPIR-V][SPIR-V] is the Khronos Group’s binary intermediate language for
-representing graphics shaders and compute kernels. It is adopted by multiple
-Khronos Group’s APIs, including Vulkan and OpenCL.
-
-## Design Principles
-
-SPIR-V defines a stable binary format for hardware driver consumption.
-Regularity is one of the design goals of SPIR-V. All concepts are represented
-as SPIR-V instructions, including declaring extensions and capabilities,
-defining types and constants, defining functions, attaching additional
-properties to computation results, etc. This way favors driver consumption
-but not necessarily compiler transformations.
-
-The purpose of the SPIR-V dialect is to serve as the "proxy" of the binary
-format and to facilitate transformations. Therefore, it should
-
-*   Stay as the same semantic level and try to be a mechanical 1:1 mapping;
-*   But deviate representationally if possible with MLIR mechanisms.
-*   Be straightforward to serialize into and deserialize from the SPIR-V binary
-    format.
-
-## Conventions
-
-The SPIR-V dialect has the following conventions:
-
-*   The prefix for all SPIR-V types and operations are `spv.`.
-*   Ops that directly mirror instructions in the binary format have `CamelCase`
-    names that are the same as the instruction opnames (without the `Op`
-    prefix). For example, `spv.FMul` is a direct mirror of `OpFMul`. They will
-    be serialized into and deserialized from one instruction.
-*   Ops with `snake_case` names are those that have different representation
-    from corresponding instructions (or concepts) in the binary format. These
-    ops are mostly for defining the SPIR-V structure. For example, `spv.module`
-    and `spv.constant`. They may correspond to zero or more instructions during
-    (de)serialization.
-*   Ops with `_snake_case` names are those that have no corresponding
-    instructions (or concepts) in the binary format. They are introduced to
-    satisfy MLIR structural requirements. For example, `spv._module_end` and
-    `spv._merge`. They maps to no instructions during (de)serialization.
-
-## Module
-
-A SPIR-V module is defined via the `spv.module` op, which has one region that
-contains one block. Model-level instructions, including function definitions,
-are all placed inside the block. Functions are defined using the builtin `func`
-op.
-
-Compared to the binary format, we adjust how certain module-level SPIR-V
-instructions are represented in the SPIR-V dialect. Notably,
-
-*   Requirements for capabilities, extensions, extended instruction sets,
-    addressing model, and memory model is conveyed using `spv.module`
-    attributes. This is considered better because these information are for the
-    execution environment. It's easier to probe them if on the module op
-    itself.
-*   Annotations/decoration instructions are "folded" into the instructions they
-    decorate and represented as attributes on those ops. This eliminates
-    potential forward references of SSA values, improves IR readability, and
-    makes querying the annotations more direct.
-*   Types are represented using MLIR standard types and SPIR-V dialect specific
-    types. There are no type declaration ops in the SPIR-V dialect.
-*   Various normal constant instructions are represented by the same
-    `spv.constant` op. Those instructions are just for constants of different
-    types; using one op to represent them reduces IR verbosity and makes
-    transformations less tedious.
-*   Normal constants are not placed in `spv.module`'s region; they are localized
-    into functions. This is to make functions in the SPIR-V dialect to be
-    isolated and explicit capturing.
-*   Global variables are defined with the `spv.globalVariable` op. They do not
-    generate SSA values. Instead they have symbols and should be referenced via
-    symbols. To use a global variables in a function block, `spv._address_of` is
-    needed to turn the symbol into a SSA value.
-*   Specialization constants are defined with the `spv.specConstant` op. Similar
-    to global variables, they do not generate SSA values and have symbols for
-    reference, too. `spv._reference_of` is needed to turn the symbol into a SSA
-    value for use in a function block.
-
-## Types
-
-The SPIR-V dialect reuses standard integer, float, and vector types and defines
-the following dialect-specific types:
-
-```
-spirv-type ::= array-type
-             | pointer-type
-             | runtime-array-type
-```
-
-### Array type
-
-This corresponds to SPIR-V [array type][ArrayType]. Its syntax is
-
-```
-element-type ::= integer-type
-               | floating-point-type
-               | vector-type
-               | spirv-type
-
-array-type ::= `!spv.array<` integer-literal `x` element-type `>`
-```
-
-For example,
-
-```{.mlir}
-!spv.array<4 x i32>
-!spv.array<16 x vector<4 x f32>>
-```
-
-### Image type
-
-This corresponds to SPIR-V [image type][ImageType]. Its syntax is
-
-```
-dim ::= `1D` | `2D` | `3D` | `Cube` | <and other SPIR-V Dim specifiers...>
-
-depth-info ::= `NoDepth` | `IsDepth` | `DepthUnknown`
-
-arrayed-info ::= `NonArrayed` | `Arrayed`
-
-sampling-info ::= `SingleSampled` | `MultiSampled`
-
-sampler-use-info ::= `SamplerUnknown` | `NeedSampler` | `NoSampler`
-
-format ::= `Unknown` | `Rgba32f` | <and other SPIR-V Image Formats...>
-
-image-type ::= `!spv.image<` element-type `,` dim `,` depth-info `,`
-                           arrayed-info `,` sampling-info `,`
-                           sampler-use-info `,` format `>`
-```
-
-For example,
-
-```
-!spv.image<f32, 1D, NoDepth, NonArrayed, SingleSampled, SamplerUnknown, Unknown>
-!spv.image<f32, Cube, IsDepth, Arrayed, MultiSampled, NeedSampler, Rgba32f>
-```
-
-### Pointer type
-
-This corresponds to SPIR-V [pointer type][PointerType]. Its syntax is
-
-```
-storage-class ::= `UniformConstant`
-                | `Uniform`
-                | `Workgroup`
-                | <and other storage classes...>
-
-pointer-type ::= `!spv.ptr<` element-type `,` storage-class `>`
-```
-
-For example,
-
-```{.mlir}
-!spv.ptr<i32, Function>
-!spv.ptr<vector<4 x f32>, Uniform>
-```
-
-### Runtime array type
-
-This corresponds to SPIR-V [runtime array type][RuntimeArrayType]. Its syntax is
-
-```
-runtime-array-type ::= `!spv.rtarray<` element-type `>`
-```
-
-For example,
-
-```{.mlir}
-!spv.rtarray<i32>
-!spv.rtarray<vector<4 x f32>>
-```
-
-### Struct type
-
-This corresponds to SPIR-V [struct type][StructType]. Its syntax is
-
-```
-struct-member-decoration ::= integer-literal? spirv-decoration*
-struct-type ::= `!spv.struct<` spirv-type (`[` struct-member-decoration `]`)?
-                     (`, ` spirv-type (`[` struct-member-decoration `]`)?
-```
-
-For Example,
-
-```
-!spv.struct<f32>
-!spv.struct<f32 [0]>
-!spv.struct<f32, !spv.image<f32, 1D, NoDepth, NonArrayed, SingleSampled, SamplerUnknown, Unknown>>
-!spv.struct<f32 [0], i32 [4]>
-```
-
-## Function
-
-A SPIR-V function is defined using the builtin `func` op. `spv.module` verifies
-that the functions inside it comply with SPIR-V requirements: at most one
-result, no nested functions, and so on.
-
-## Operations
-
-Operation documentation is written in each op's Op Definition Spec using
-TableGen. A markdown version of the doc can be generated using `mlir-tblgen
--gen-doc`.
-
-## Control Flow
-
-SPIR-V binary format uses merge instructions (`OpSelectionMerge` and
-`OpLoopMerge`) to declare structured control flow. They explicitly declare a
-header block before the control flow diverges and a merge block where control
-flow subsequently converges. These blocks delimit constructs that must nest, and
-can only be entered and exited in structured ways.
-
-In the SPIR-V dialect, we use regions to mark the boundary of a structured
-control flow construct. With this approach, it's easier to discover all blocks
-belonging to a structured control flow construct. It is also more idiomatic to
-MLIR system.
-
-We introduce a `spv.selection` and `spv.loop` op for structured selections and
-loops, respectively. The merge targets are the next ops following them. Inside
-their regions, a special terminator, `spv._merge` is introduced for branching to
-the merge target.
-
-### Selection
-
-`spv.selection` defines a selection construct. It contains one region. The
-region should contain at least two blocks: one selection header block and one
-merge block.
-
-*   The selection header block should be the first block. It should contain the
-    `spv.BranchConditional` or `spv.Switch` op.
-*   The merge block should be the last block. The merge block should only
-    contain a `spv._merge` op. Any block can branch to the merge block for early
-    exit.
-
-```
-               +--------------+
-               | header block |                 (may have multiple outgoing branches)
-               +--------------+
-                    / | \
-                     ...
-
-
-   +---------+   +---------+   +---------+
-   | case #0 |   | case #1 |   | case #2 |  ... (may have branches between each other)
-   +---------+   +---------+   +---------+
-
-
-                     ...
-                    \ | /
-                      v
-               +-------------+
-               | merge block |                  (may have multiple incoming branches)
-               +-------------+
-```
-
-For example, for the given function
-
-```c++
-void loop(bool cond) {
-  int x = 0;
-  if (cond) {
-    x = 1;
-  } else {
-    x = 2;
-  }
-  // ...
-}
-```
-
-It will be represented as
-
-```mlir
-func @selection(%cond: i1) -> () {
-  %zero = spv.constant 0: i32
-  %one = spv.constant 1: i32
-  %two = spv.constant 2: i32
-  %x = spv.Variable init(%zero) : !spv.ptr<i32, Function>
-
-  spv.selection {
-    spv.BranchConditional %cond, ^then, ^else
-
-  ^then:
-    spv.Store "Function" %x, %one : i32
-    spv.Branch ^merge
-
-  ^else:
-    spv.Store "Function" %x, %two : i32
-    spv.Branch ^merge
-
-  ^merge:
-    spv._merge
-  }
-
-  // ...
-}
-
-```
-
-### Loop
-
-`spv.loop` defines a loop construct. It contains one region. The region should
-contain at least four blocks: one entry block, one loop header block, one loop
-continue block, one merge block.
-
-*   The entry block should be the first block and it should jump to the loop
-    header block, which is the second block.
-*   The merge block should be the last block. The merge block should only
-    contain a `spv._merge` op. Any block except the entry block can branch to
-    the merge block for early exit.
-*   The continue block should be the second to last block and it should have a
-    branch to the loop header block.
-*   The loop continue block should be the only block, except the entry block,
-    branching to the loop header block.
-
-```
-    +-------------+
-    | entry block |           (one outgoing branch)
-    +-------------+
-           |
-           v
-    +-------------+           (two incoming branches)
-    | loop header | <-----+   (may have one or two outgoing branches)
-    +-------------+       |
-                          |
-          ...             |
-         \ | /            |
-           v              |
-   +---------------+      |   (may have multiple incoming branches)
-   | loop continue | -----+   (may have one or two outgoing branches)
-   +---------------+
-
-          ...
-         \ | /
-           v
-    +-------------+           (may have multiple incoming branches)
-    | merge block |
-    +-------------+
-```
-
-The reason to have another entry block instead of directly using the loop header
-block as the entry block is to satisfy region's requirement: entry block of
-region may not have predecessors. We have a merge block so that branch ops can
-reference it as successors. The loop continue block here corresponds to
-"continue construct" using SPIR-V spec's term; it does not mean the "continue
-block" as defined in the SPIR-V spec, which is "a block containing a branch to
-an OpLoopMerge instruction’s Continue Target."
-
-For example, for the given function
-
-```c++
-void loop(int count) {
-  for (int i = 0; i < count; ++i) {
-    // ...
-  }
-}
-```
-
-It will be represented as
-
-```mlir
-func @loop(%count : i32) -> () {
-  %zero = spv.constant 0: i32
-  %one = spv.constant 1: i32
-  %var = spv.Variable init(%zero) : !spv.ptr<i32, Function>
-
-  spv.loop {
-    spv.Branch ^header
-
-  ^header:
-    %val0 = spv.Load "Function" %var : i32
-    %cmp = spv.SLessThan %val0, %count : i32
-    spv.BranchConditional %cmp, ^body, ^merge
-
-  ^body:
-    // ...
-    spv.Branch ^continue
-
-  ^continue:
-    %val1 = spv.Load "Function" %var : i32
-    %add = spv.IAdd %val1, %one : i32
-    spv.Store "Function" %var, %add : i32
-    spv.Branch ^header
-
-  ^merge:
-    spv._merge
-  }
-  return
-}
-```
-
-### Block argument for Phi
-
-There are no direct Phi operations in the SPIR-V dialect; SPIR-V `OpPhi`
-instructions are modelled as block arguments in the SPIR-V dialect. (See the
-[Rationale][Rationale] doc for "Block Arguments vs Phi nodes".) Each block
-argument corresponds to one `OpPhi` instruction in the SPIR-V binary format. For
-example, for the following SPIR-V function `foo`:
-
-```spirv
-  %foo = OpFunction %void None ...
-%entry = OpLabel
-  %var = OpVariable %_ptr_Function_int Function
-         OpSelectionMerge %merge None
-         OpBranchConditional %true %true %false
- %true = OpLabel
-         OpBranch %phi
-%false = OpLabel
-         OpBranch %phi
-  %phi = OpLabel
-  %val = OpPhi %int %int_1 %false %int_0 %true
-         OpStore %var %val
-         OpReturn
-%merge = OpLabel
-         OpReturn
-         OpFunctionEnd
-```
-
-It will be represented as:
-
-```mlir
-func @foo() -> () {
-  %var = spv.Variable : !spv.ptr<i32, Function>
-
-  spv.selection {
-    %true = spv.constant true
-    spv.BranchConditional %true, ^true, ^false
-
-  ^true:
-    %zero = spv.constant 0 : i32
-    spv.Branch ^phi(%zero: i32)
-
-  ^false:
-    %one = spv.constant 1 : i32
-    spv.Branch ^phi(%one: i32)
-
-  ^phi(%arg: i32):
-    spv.Store "Function" %var, %arg : i32
-    spv.Return
-
-  ^merge:
-    spv._merge
-  }
-  spv.Return
-}
-```
-
-## Serialization and deserialization
-
-The serialization library provides two entry points, `mlir::spirv::serialize()`
-and `mlir::spirv::deserialize()`, for converting a MLIR SPIR-V module to binary
-format and back.
-
-The purpose of this library is to enable importing SPIR-V binary modules to run
-transformations on them and exporting SPIR-V modules to be consumed by execution
-environments. The focus is transformations, which inevitably means changes to
-the binary module; so it is not designed to be a general tool for investigating
-the SPIR-V binary module and does not guarantee roundtrip equivalence (at least
-for now). For the latter, please use the assembler/disassembler in the
-[SPIRV-Tools][SPIRV-Tools] project.
-
-A few transformations are performed in the process of serialization because of
-the representational differences between SPIR-V dialect and binary format:
-
-*   Attributes on `spv.module` are emitted as their corresponding SPIR-V
-    instructions.
-*   `spv.constant`s are unified and placed in the SPIR-V binary module section
-    for types, constants, and global variables.
-*   `spv.selection`s and `spv.loop`s are emitted as basic blocks with `Op*Merge`
-    instructions in the header block as required by the binary format.
-
-Similarly, a few transformations are performed during deserialization:
-
-*   Instructions for execution environment requirements will be placed as
-    attributes on `spv.module`.
-*   `OpConstant*` instructions are materialized as `spv.constant` at each use
-    site.
-*   `OpPhi` instructions are converted to block arguments.
-*   Structured control flow are placed inside `spv.selection` and `spv.loop`.
-
-[SPIR-V]: https://www.khronos.org/registry/spir-v/
-[ArrayType]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpTypeArray
-[ImageType]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpTypeImage
-[PointerType]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpTypePointer
-[RuntimeArrayType]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpTypeRuntimeArray
-[StructType]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#Structure
-[SPIRV-Tools]: https://github.com/KhronosGroup/SPIRV-Tools
-[Rationale]: https://github.com/tensorflow/mlir/blob/master/g3doc/Rationale.md#block-arguments-vs-phi-nodes
diff --git a/third_party/mlir/g3doc/Dialects/Standard.md b/third_party/mlir/g3doc/Dialects/Standard.md
deleted file mode 100644
index 05ec703b059..00000000000
--- a/third_party/mlir/g3doc/Dialects/Standard.md
+++ /dev/null
@@ -1,1146 +0,0 @@
-# Standard Dialect
-
-This dialect provides documentation for operations within the Standard dialect.
-
-Note: This dialect is a collection of operations for several different concepts,
-and should be split into multiple more-focused dialects accordingly.
-
-[TOC]
-
-TODO: shape, which returns a 1D tensor, and can take an unknown rank tensor as
-input.
-
-TODO: rank, which returns an index.
-
-## Terminator operations
-
-Terminator operations are required at the end of each block. They may contain a
-list of successors, i.e. other blocks to which the control flow will proceed.
-
-### 'br' terminator operation
-
-Syntax:
-
-```
-operation ::= `br` successor
-successor ::= bb-id branch-use-list?
-branch-use-list ::= `(` ssa-use-list `:` type-list-no-parens `)`
-```
-
-The `br` terminator operation represents an unconditional jump to a target
-block. The count and types of operands to the branch must align with the
-arguments in the target block.
-
-The MLIR branch operation is not allowed to target the entry block for a region.
-
-### 'cond_br' terminator operation
-
-Syntax:
-
-```
-operation ::= `cond_br` ssa-use `,` successor `,` successor
-```
-
-The `cond_br` terminator operation represents a conditional branch on a boolean
-(1-bit integer) value. If the bit is set, then the first destination is jumped
-to; if it is false, the second destination is chosen. The count and types of
-operands must align with the arguments in the corresponding target blocks.
-
-The MLIR conditional branch operation is not allowed to target the entry block
-for a region. The two destinations of the conditional branch operation are
-allowed to be the same.
-
-The following example illustrates a function with a conditional branch operation
-that targets the same block:
-
-```mlir
-func @select(i32, i32, i1) -> i32 {
-^bb0(%a : i32, %b :i32, %flag : i1) :
-    // Both targets are the same, operands differ
-    cond_br %flag, ^bb1(%a : i32), ^bb1(%b : i32)
-
-^bb1(%x : i32) :
-    return %x : i32
-}
-```
-
-### 'return' terminator operation
-
-Syntax:
-
-```
-operation ::= `return` (ssa-use-list `:` type-list-no-parens)?
-```
-
-The `return` terminator operation represents the completion of a function, and
-produces the result values. The count and types of the operands must match the
-result types of the enclosing function. It is legal for multiple blocks in a
-single function to return.
-
-## Core Operations
-
-### 'call' operation
-
-Syntax:
-
-```
-operation ::=
-    (ssa-id `=`)? `call` symbol-ref-id `(` ssa-use-list? `)` `:` function-type
-```
-
-The `call` operation represents a direct call to a function. The operands and
-result types of the call must match the specified function type. The callee is
-encoded as a function attribute named "callee".
-
-Example:
-
-```mlir
-// Calling the function my_add.
-%31 = call @my_add(%0, %1) : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
-```
-
-### 'call_indirect' operation
-
-Syntax:
-
-```
-operation ::= `call_indirect` ssa-use `(` ssa-use-list? `)` `:` function-type
-```
-
-The `call_indirect` operation represents an indirect call to a value of function
-type. Functions are first class types in MLIR, and may be passed as arguments
-and merged together with block arguments. The operands and result types of the
-call must match the specified function type.
-
-Function values can be created with the
-[`constant` operation](#constant-operation).
-
-Example:
-
-```mlir
-%31 = call_indirect %15(%0, %1)
-        : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
-```
-
-### 'dim' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `dim` ssa-id `,` integer-literal `:` type
-```
-
-The `dim` operation takes a memref or tensor operand and a dimension index, and
-returns an [`index`](../LangRef.md#index-type) that is the size of that
-dimension.
-
-The `dim` operation is represented with a single integer attribute named
-`index`, and the type specifies the type of the memref or tensor operand.
-
-Examples:
-
-```mlir
-// Always returns 4, can be constant folded:
-%x = dim %A, 0 : tensor<4 x ? x f32>
-
-// Returns the dynamic dimension of %A.
-%y = dim %A, 1 : tensor<4 x ? x f32>
-
-// Equivalent generic form:
-%x = "std.dim"(%A) {index = 0 : i64} : (tensor<4 x ? x f32>) -> index
-%y = "std.dim"(%A) {index = 1 : i64} : (tensor<4 x ? x f32>) -> index
-```
-
-## Memory Operations
-
-### 'alloc' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `alloc` dim-and-symbol-use-list `:` memref-type
-```
-
-Allocates a new memref of specified type. Values required for dynamic dimension
-sizes are passed as arguments in parentheses (in the same order in which they
-appear in the shape signature of the memref) while the symbols required by the
-layout map are passed in the square brackets in lexicographical order. If no
-layout maps are specified in the memref, then an identity mapping is used.
-
-The buffer referenced by a memref type is created by the `alloc` operation, and
-destroyed by the `dealloc` operation.
-
-Example:
-
-```mlir
-// Allocating memref for a fully static shape.
-%A = alloc() : memref<1024x64xf32, #layout_map0, memspace0>
-
-// %M, %N, %x, %y are SSA values of integer type.  M and N are bound to the
-// two unknown dimensions of the type and x/y are bound to symbols in
-// #layout_map1.
-%B = alloc(%M, %N)[%x, %y] : memref<?x?xf32, #layout_map1, memspace1>
-```
-
-### 'alloc_static' operation
-
-Syntax:
-
-```
-operation ::=
-    ssa-id `=` `alloc_static` `(` integer-literal `)` :  memref-type
-```
-
-Allocates a new memref of specified type with a fixed base pointer location in
-memory. 'alloc_static' does not support types that have dynamic shapes or that
-require dynamic symbols in their layout function (use the
-[`alloc` operation](#alloc-operation) in those cases).
-
-Example:
-
-```mlir
-%A = alloc_static(0x1232a00) : memref<1024 x 64 x f32, #layout_map0, memspace0>
-```
-
-The `alloc_static` operation is used to represent code after buffer allocation
-has been performed.
-
-### 'dealloc' operation
-
-Syntax:
-
-```
-operation ::= `dealloc` ssa-use `:` memref-type
-```
-
-Delineates the end of the lifetime of the memory corresponding to a memref
-allocation. It is paired with an [`alloc`](#alloc-operation) or
-[`alloc_static`](#alloc-static-operation) operation.
-
-Example:
-
-```mlir
-dealloc %A : memref<128 x f32, #layout, memspace0>
-```
-
-### 'dma_start' operation
-
-Syntax:
-
-```
-operation ::= `dma_start` ssa-use`[`ssa-use-list`]` `,`
-               ssa-use`[`ssa-use-list`]` `,` ssa-use `,`
-               ssa-use`[`ssa-use-list`]` (`,` ssa-use `,` ssa-use)?
-              `:` memref-type `,` memref-type `,` memref-type
-```
-
-Starts a non-blocking DMA operation that transfers data from a source memref to
-a destination memref. The operands include the source and destination memref's
-each followed by its indices, size of the data transfer in terms of the number
-of elements (of the elemental type of the memref), a tag memref with its
-indices, and optionally two additional arguments corresponding to the stride (in
-terms of number of elements) and the number of elements to transfer per stride.
-The tag location is used by a dma_wait operation to check for completion. The
-indices of the source memref, destination memref, and the tag memref have the
-same restrictions as any load/store operation in a affine context (whenever DMA
-operations appear in an affine context). See
-[restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
-in affine contexts. This allows powerful static analysis and transformations in
-the presence of such DMAs including rescheduling, pipelining / overlap with
-computation, and checking for matching start/end operations. The source and
-destination memref need not be of the same dimensionality, but need to have the
-same elemental type.
-
-For example, a `dma_start` operation that transfers 32 vector elements from a
-memref `%src` at location `[%i, %j]` to memref `%dst` at `[%k, %l]` would be
-specified as shown below.
-
-Example:
-
-```mlir
-%size = constant 32 : index
-%tag = alloc() : memref<1 x i32, (d0) -> (d0), 4>
-%idx = constant 0 : index
-dma_start %src[%i, %j], %dst[%k, %l], %size, %tag[%idx] :
-     memref<40 x 8 x vector<16xf32>, (d0, d1) -> (d0, d1), 0>,
-     memref<2 x 4 x vector<16xf32>, (d0, d1) -> (d0, d1), 2>,
-     memref<1 x i32>, (d0) -> (d0), 4>
-```
-
-### 'dma_wait' operation
-
-Syntax:
-
-```
-operation ::= `dma_wait` ssa-use`[`ssa-use-list`]` `,` ssa-use `:` memref-type
-```
-
-Blocks until the completion of a DMA operation associated with the tag element
-specified with a tag memref and its indices. The operands include the tag memref
-followed by its indices and the number of elements associated with the DMA being
-waited on. The indices of the tag memref have the same restrictions as
-load/store indices.
-
-Example:
-
-```mlir
-dma_wait %tag[%idx], %size : memref<1 x i32, (d0) -> (d0), 4>
-```
-
-### 'extract_element' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `extract_element` ssa-use `[` ssa-use-list `]` `:` type
-```
-
-The `extract_element` op reads a tensor or vector and returns one element from
-it specified by an index list. The output of the 'extract_element' is a new
-value with the same type as the elements of the tensor or vector. The arity of
-indices matches the rank of the accessed value (i.e., if a tensor is of rank 3,
-then 3 indices are required for the extract. The indices should all be of
-`index` type.
-
-Examples:
-
-```mlir
-%3 = extract_element %v[%1, %2] : vector<4x4xi32>
-%4 = extract_element %t[%1, %2] : tensor<4x4xi32>
-%5 = extract_element %ut[%1, %2] : tensor<*xi32>
-```
-
-### 'load' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `load` ssa-use `[` ssa-use-list `]` `:` memref-type
-```
-
-The `load` op reads an element from a memref specified by an index list. The
-output of load is a new value with the same type as the elements of the memref.
-The arity of indices is the rank of the memref (i.e., if the memref loaded from
-is of rank 3, then 3 indices are required for the load following the memref
-identifier).
-
-In an `affine.if` or `affine.for` body, the indices of a load are restricted to
-SSA values bound to surrounding loop induction variables,
-[symbols](../LangRef.md#dimensions-and-symbols), results of a
-[`constant` operation](#constant-operation), or the result of an `affine.apply`
-operation that can in turn take as arguments all of the aforementioned SSA
-values or the recursively result of such an `affine.apply` operation.
-
-Example:
-
-```mlir
-%1 = affine.apply (d0, d1) -> (3*d0) (%i, %j)
-%2 = affine.apply (d0, d1) -> (d1+1) (%i, %j)
-%12 = load %A[%1, %2] : memref<8x?xi32, #layout, memspace0>
-
-// Example of an indirect load (treated as non-affine)
-%3 = affine.apply (d0) -> (2*d0 + 1)(%12)
-%13 = load %A[%3, %2] : memref<4x?xi32, #layout, memspace0>
-```
-
-**Context:** The `load` and `store` operations are specifically crafted to fully
-resolve a reference to an element of a memref, and (in affine `affine.if` and
-`affine.for` operations) the compiler can follow use-def chains (e.g. through
-[`affine.apply`](Affine.md#affineapply-operation) operations) to precisely
-analyze references at compile-time using polyhedral techniques. This is possible
-because of the
-[restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
-in these contexts.
-
-### 'splat' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `splat` ssa-use `:` ( vector-type | tensor-type )
-```
-
-Broadcast the operand to all elements of the result vector or tensor. The
-operand has to be of either integer or float type. When the result is a tensor,
-it has to be statically shaped.
-
-Example:
-
-```mlir
-  %s = load %A[%i] : memref<128xf32>
-  %v = splat %s : vector<4xf32>
-  %t = splat %s : tensor<8x16xi32>
-```
-
-TODO: This operation is easy to extend to broadcast to dynamically shaped
-tensors in the same way dynamically shaped memrefs are handled.
-```mlir {.mlir}
-// Broadcasts %s to a 2-d dynamically shaped tensor, with %m, %n binding
-// to the sizes of the two dynamic dimensions.
-%m = "foo"() : () -> (index)
-%n = "bar"() : () -> (index)
-%t = splat %s [%m, %n] : tensor<?x?xi32>
-```
-
-### 'store' operation
-
-Syntax:
-
-```
-operation ::= `store` ssa-use `,` ssa-use `[` ssa-use-list `]` `:` memref-type
-```
-
-Store value to memref location given by indices. The value stored should have
-the same type as the elemental type of the memref. The number of arguments
-provided within brackets need to match the rank of the memref.
-
-In an affine context, the indices of a store are restricted to SSA values bound
-to surrounding loop induction variables,
-[symbols](Affine.md#restrictions-on-dimensions-and-symbols), results of a
-[`constant` operation](#constant-operation), or the result of an
-[`affine.apply`](Affine.md#affineapply-operation) operation that can in turn
-take as arguments all of the aforementioned SSA values or the recursively result
-of such an `affine.apply` operation.
-
-Example:
-
-```mlir
-store %100, %A[%1, 1023] : memref<4x?xf32, #layout, memspace0>
-```
-
-**Context:** The `load` and `store` operations are specifically crafted to fully
-resolve a reference to an element of a memref, and (in polyhedral `affine.if`
-and `affine.for` operations) the compiler can follow use-def chains (e.g.
-through [`affine.apply`](Affine.md#affineapply-operation) operations) to
-precisely analyze references at compile-time using polyhedral techniques. This
-is possible because of the
-[restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
-in these contexts.
-
-### 'tensor_load' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `tensor_load` ssa-use-and-type
-```
-
-Create a tensor from a memref, making an independent copy of the element data.
-The result value is a tensor whose shape and element type match the memref
-operand.
-
-Example:
-
-```mlir
-// Produces a value of tensor<4x?xf32> type.
-%12 = tensor_load %10 : memref<4x?xf32, #layout, memspace0>
-```
-
-### 'tensor_store' operation
-
-Syntax:
-
-```
-operation ::= `tensor_store` ssa-use `,` ssa-use `:` memref-type
-```
-
-Stores the contents of a tensor into a memref. The first operand is a value of
-tensor type, the second operand is a value of memref type. The shapes and
-element types of these must match, and are specified by the memref type.
-
-Example:
-
-```mlir
-%9 = dim %8, 1 : tensor<4x?xf32>
-%10 = alloc(%9) : memref<4x?xf32, #layout, memspace0>
-tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0>
-```
-
-## Unary Operations
-
-### 'absf' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `absf` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar absolute value.
-%a = absf %b : f64
-
-// SIMD vector element-wise absolute value.
-%f = absf %g : vector<4xf32>
-
-// Tensor element-wise absolute value.
-%x = absf %y : tensor<4x?xf8>
-```
-
-The `absf` operation computes the absolute value. It takes one operand and
-returns one result of the same type. This type may be a float scalar type, a
-vector whose element type is float, or a tensor of floats. It has no standard
-attributes.
-
-### 'ceilf' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `ceilf` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar ceiling value.
-%a = ceilf %b : f64
-
-// SIMD vector element-wise ceiling value.
-%f = ceilf %g : vector<4xf32>
-
-// Tensor element-wise ceiling value.
-%x = ceilf %y : tensor<4x?xf8>
-```
-
-The `ceilf` operation computes the ceiling of a given value. It takes one
-operand and returns one result of the same type. This type may be a float
-scalar type, a vector whose element type is float, or a tensor of floats. It
-has no standard attributes.
-
-### 'cos' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `cos` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar cosine value.
-%a = cos %b : f64
-
-// SIMD vector element-wise cosine value.
-%f = cos %g : vector<4xf32>
-
-// Tensor element-wise cosine value.
-%x = cos %y : tensor<4x?xf8>
-```
-
-The `cos` operation computes the cosine of a given value. It takes one operand
-and returns one result of the same type. This type may be a float scalar type,
-a vector whose element type is float, or a tensor of floats. It has no standard
-attributes.
-
-### 'exp' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `exp` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar natural exponential.
-%a = exp %b : f64
-
-// SIMD vector element-wise natural exponential.
-%f = exp %g : vector<4xf32>
-
-// Tensor element-wise natural exponential.
-%x = exp %y : tensor<4x?xf8>
-```
-
-The `exp` operation takes one operand and returns one result of the same type.
-This type may be a float scalar type, a vector whose element type is float, or a
-tensor of floats. It has no standard attributes.
-
-### 'negf' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `negf` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar negation value.
-%a = negf %b : f64
-
-// SIMD vector element-wise negation value.
-%f = negf %g : vector<4xf32>
-
-// Tensor element-wise negation value.
-%x = negf %y : tensor<4x?xf8>
-```
-
-The `negf` operation computes the negation of a given value. It takes one
-operand and returns one result of the same type. This type may be a float
-scalar type, a vector whose element type is float, or a tensor of floats. It
-has no standard attributes.
-
-### 'tanh' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `tanh` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar hyperbolic tangent value.
-%a = tanh %b : f64
-
-// SIMD vector element-wise hyperbolic tangent value.
-%f = tanh %g : vector<4xf32>
-
-// Tensor element-wise hyperbolic tangent value.
-%x = tanh %y : tensor<4x?xf8>
-```
-
-The `tanh` operation computes the hyperbolic tangent. It takes one operand and
-returns one result of the same type. This type may be a float scalar type, a
-vector whose element type is float, or a tensor of floats. It has no standard
-attributes.
-
-## Arithmetic Operations
-
-Basic arithmetic in MLIR is specified by standard operations described in this
-section.
-
-### 'addi' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `addi` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar addition.
-%a = addi %b, %c : i64
-
-// SIMD vector element-wise addition, e.g. for Intel SSE.
-%f = addi %g, %h : vector<4xi32>
-
-// Tensor element-wise addition.
-%x = addi %y, %z : tensor<4x?xi8>
-```
-
-The `addi` operation takes two operands and returns one result, each of these is
-required to be the same type. This type may be an integer scalar type, a vector
-whose element type is integer, or a tensor of integers. It has no standard
-attributes.
-
-### 'addf' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `addf` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar addition.
-%a = addf %b, %c : f64
-
-// SIMD vector addition, e.g. for Intel SSE.
-%f = addf %g, %h : vector<4xf32>
-
-// Tensor addition.
-%x = addf %y, %z : tensor<4x?xbf16>
-```
-
-The `addf` operation takes two operands and returns one result, each of these is
-required to be the same type. This type may be a floating point scalar type, a
-vector whose element type is a floating point type, or a floating point tensor.
-
-It has no standard attributes.
-
-TODO: In the distant future, this will accept optional attributes for fast math,
-contraction, rounding mode, and other controls.
-
-### 'and' operation
-
-Bitwise integer and.
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `and` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar integer bitwise and.
-%a = and %b, %c : i64
-
-// SIMD vector element-wise bitwise integer and.
-%f = and %g, %h : vector<4xi32>
-
-// Tensor element-wise bitwise integer and.
-%x = and %y, %z : tensor<4x?xi8>
-```
-
-The `and` operation takes two operands and returns one result, each of these is
-required to be the same type. This type may be an integer scalar type, a vector
-whose element type is integer, or a tensor of integers. It has no standard
-attributes.
-
-### 'cmpi' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `cmpi` string-literal `,` ssa-id `,` ssa-id `:` type
-```
-
-Examples:
-
-```mlir
-// Custom form of scalar "signed less than" comparison.
-%x = cmpi "slt", %lhs, %rhs : i32
-
-// Generic form of the same operation.
-%x = "std.cmpi"(%lhs, %rhs) {predicate = 2 : i64} : (i32, i32) -> i1
-
-// Custom form of vector equality comparison.
-%x = cmpi "eq", %lhs, %rhs : vector<4xi64>
-
-// Generic form of the same operation.
-%x = "std.cmpi"(%lhs, %rhs) {predicate = 0 : i64}
-    : (vector<4xi64>, vector<4xi64>) -> vector<4xi1>
-```
-
-The `cmpi` operation is a generic comparison for integer-like types. Its two
-arguments can be integers, vectors or tensors thereof as long as their types
-match. The operation produces an i1 for the former case, a vector or a tensor of
-i1 with the same shape as inputs in the other cases.
-
-Its first argument is an attribute that defines which type of comparison is
-performed. The following comparisons are supported:
-
--   equal (mnemonic: `"eq"`; integer value: `0`)
--   not equal (mnemonic: `"ne"`; integer value: `1`)
--   signed less than (mnemonic: `"slt"`; integer value: `2`)
--   signed less than or equal (mnemonic: `"sle"`; integer value: `3`)
--   signed greater than (mnemonic: `"sgt"`; integer value: `4`)
--   signed greater than or equal (mnemonic: `"sge"`; integer value: `5`)
--   unsigned less than (mnemonic: `"ult"`; integer value: `6`)
--   unsigned less than or equal (mnemonic: `"ule"`; integer value: `7`)
--   unsigned greater than (mnemonic: `"ugt"`; integer value: `8`)
--   unsigned greater than or equal (mnemonic: `"uge"`; integer value: `9`)
-
-The result is `1` if the comparison is true and `0` otherwise. For vector or
-tensor operands, the comparison is performed elementwise and the element of the
-result indicates whether the comparison is true for the operand elements with
-the same indices as those of the result.
-
-Note: while the custom assembly form uses strings, the actual underlying
-attribute has integer type (or rather enum class in C++ code) as seen from the
-generic assembly form. String literals are used to improve readability of the IR
-by humans.
-
-This operation only applies to integer-like operands, but not floats. The main
-reason being that comparison operations have diverging sets of attributes:
-integers require sign specification while floats require various floating
-point-related particularities, e.g., `-ffast-math` behavior, IEEE754 compliance,
-etc
-([rationale](../Rationale.md#splitting-floating-point-vs-integer-operations)).
-The type of comparison is specified as attribute to avoid introducing ten
-similar operations, taking into account that they are often implemented using
-the same operation downstream
-([rationale](../Rationale.md#specifying-comparison-kind-as-attribute)). The
-separation between signed and unsigned order comparisons is necessary because of
-integers being signless. The comparison operation must know how to interpret
-values with the foremost bit being set: negatives in two's complement or large
-positives
-([rationale](../Rationale.md#specifying-sign-in-integer-comparison-operations)).
-
-### 'constant' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `constant` attribute-value `:` type
-```
-
-The `constant` operation produces an SSA value equal to some constant specified
-by an attribute. This is the way that MLIR uses to form simple integer and
-floating point constants, as well as more exotic things like references to
-functions and (TODO!) tensor/vector constants.
-
-The `constant` operation is represented with a single attribute named "value".
-The type specifies the result type of the operation.
-
-Examples:
-
-```mlir
-// Integer constant
-%1 = constant 42 : i32
-
-// Reference to function @myfn.
-%3 = constant @myfn : (tensor<16xf32>, f32) -> tensor<16xf32>
-
-// Equivalent generic forms
-%1 = "std.constant"() {value = 42 : i32} : () -> i32
-%3 = "std.constant"() {value = @myfn}
-   : () -> ((tensor<16xf32>, f32) -> tensor<16xf32>)
-
-```
-
-MLIR does not allow direct references to functions in SSA operands because the
-compiler is multithreaded, and disallowing SSA values to directly reference a
-function simplifies this
-([rationale](../Rationale.md#multithreading-the-compiler)).
-
-### 'copysign' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `copysign` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar copysign value.
-%a = copysign %b %c : f64
-
-// SIMD vector element-wise copysign value.
-%f = copysign %g %h : vector<4xf32>
-
-// Tensor element-wise copysign value.
-%x = copysign %y %z : tensor<4x?xf8>
-```
-
-The `copysign` returns a value with the magnitude of the first operand and the
-sign of the second operand. It takes two operands and returns one result of the
-same type. This type may be a float scalar type, a vector whose element type is
-float, or a tensor of floats. It has no standard attributes.
-
-### 'divis' operation
-
-Signed integer division. Rounds towards zero. Treats the leading bit as sign,
-i.e. `6 / -2 = -3`.
-
-Note: the semantics of division by zero or signed division overflow (minimum
-value divided by -1) is TBD; do NOT assume any specific behavior.
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `divis` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar signed integer division.
-%a = divis %b, %c : i64
-
-// SIMD vector element-wise division.
-%f = divis %g, %h : vector<4xi32>
-
-// Tensor element-wise integer division.
-%x = divis %y, %z : tensor<4x?xi8>
-```
-
-The `divis` operation takes two operands and returns one result, each of these
-is required to be the same type. This type may be an integer scalar type, a
-vector whose element type is integer, or a tensor of integers. It has no
-standard attributes.
-
-### 'diviu' operation
-
-Unsigned integer division. Rounds towards zero. Treats the leading bit as the
-most significant, i.e. for `i16` given two's complement representation, `6 /
--2 = 6 / (2^16 - 2) = 0`.
-
-Note: the semantics of division by zero is TBD; do NOT assume any specific
-behavior.
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `diviu` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar unsigned integer division.
-%a = diviu %b, %c : i64
-
-// SIMD vector element-wise division.
-%f = diviu %g, %h : vector<4xi32>
-
-// Tensor element-wise integer division.
-%x = diviu %y, %z : tensor<4x?xi8>
-```
-
-The `diviu` operation takes two operands and returns one result, each of these
-is required to be the same type. This type may be an integer scalar type, a
-vector whose element type is integer, or a tensor of integers. It has no
-standard attributes.
-
-### 'memref_cast' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `memref_cast` ssa-use `:` type `to` type
-```
-
-Examples:
-
-```mlir
-// Discard static dimension information.
-%3 = memref_cast %2 : memref<4x?xf32> to memref<?x?xf32>
-
-// Convert to a type with more known dimensions.
-%4 = memref_cast %3 : memref<?x?xf32> to memref<4x?xf32>
-
-// Convert to a type with unknown rank.
-%5 = memref_cast %3 : memref<?x?xf32> to memref<*xf32>
-
-// Convert to a type with static rank.
-%6 = memref_cast %5 : memref<*xf32> to memref<?x?xf32>
-```
-
-Convert a memref from one type to an equivalent type without changing any data
-elements. The types are equivalent if 1. they both have the same static rank,
-same element type, same mappings, same address space. The operation is invalid
-if converting to a mismatching constant dimension, or 2. exactly one of the
-operands have an unknown rank, and they both have the same element type and same
-address space. The operation is invalid if both operands are of dynamic rank or
-if converting to a mismatching static rank.
-
-### 'mulf' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `mulf` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar multiplication.
-%a = mulf %b, %c : f64
-
-// SIMD pointwise vector multiplication, e.g. for Intel SSE.
-%f = mulf %g, %h : vector<4xf32>
-
-// Tensor pointwise multiplication.
-%x = mulf %y, %z : tensor<4x?xbf16>
-```
-
-The `mulf` operation takes two operands and returns one result, each of these is
-required to be the same type. This type may be a floating point scalar type, a
-vector whose element type is a floating point type, or a floating point tensor.
-
-It has no standard attributes.
-
-TODO: In the distant future, this will accept optional attributes for fast math,
-contraction, rounding mode, and other controls.
-
-### 'or' operation
-
-Bitwise integer or.
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `or` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar integer bitwise or.
-%a = or %b, %c : i64
-
-// SIMD vector element-wise bitwise integer or.
-%f = or %g, %h : vector<4xi32>
-
-// Tensor element-wise bitwise integer or.
-%x = or %y, %z : tensor<4x?xi8>
-```
-
-The `or` operation takes two operands and returns one result, each of these is
-required to be the same type. This type may be an integer scalar type, a vector
-whose element type is integer, or a tensor of integers. It has no standard
-attributes.
-
-### 'remis' operation
-
-Signed integer division remainder. Treats the leading bit as sign, i.e. `6 %
--2 = 0`.
-
-Note: the semantics of division by zero is TBD; do NOT assume any specific
-behavior.
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `remis` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar signed integer division remainder.
-%a = remis %b, %c : i64
-
-// SIMD vector element-wise division remainder.
-%f = remis %g, %h : vector<4xi32>
-
-// Tensor element-wise integer division remainder.
-%x = remis %y, %z : tensor<4x?xi8>
-```
-
-The `remis` operation takes two operands and returns one result, each of these
-is required to be the same type. This type may be an integer scalar type, a
-vector whose element type is integer, or a tensor of integers. It has no
-standard attributes.
-
-### 'remiu' operation
-
-Unsigned integer division remainder. Treats the leading bit as the most
-significant, i.e. for `i16`, `6 % -2 = 6 % (2^16 - 2) = 6`.
-
-Note: the semantics of division by zero is TBD; do NOT assume any specific
-behavior.
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `remiu` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar unsigned integer division remainder.
-%a = remiu %b, %c : i64
-
-// SIMD vector element-wise division remainder.
-%f = remiu %g, %h : vector<4xi32>
-
-// Tensor element-wise integer division remainder.
-%x = remiu %y, %z : tensor<4x?xi8>
-```
-
-The `remiu` operation takes two operands and returns one result, each of these
-is required to be the same type. This type may be an integer scalar type, a
-vector whose element type is integer, or a tensor of integers. It has no
-standard attributes.
-
-### 'select' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `select` ssa-use `,` ssa-use `,` ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Custom form of scalar selection.
-%x = select %cond, %true, %false : i32
-
-// Generic form of the same operation.
-%x = "std.select"(%cond, %true, %false) : (i1, i32, i32) -> i32
-
-// Vector selection is element-wise
-%vx = "std.select"(%vcond, %vtrue, %vfalse)
-    : (vector<42xi1>, vector<42xf32>, vector<42xf32>) -> vector<42xf32>
-```
-
-The `select` operation chooses one value based on a binary condition supplied as
-its first operand. If the value of the first operand is `1`, the second operand
-is chosen, otherwise the third operand is chosen. The second and the third
-operand must have the same type.
-
-The operation applies to vectors and tensors elementwise given the _shape_ of
-all operands is identical. The choice is made for each element individually
-based on the value at the same position as the element in the condition operand.
-
-The `select` operation combined with [`cmpi`](#cmpi-operation) can be used to
-implement `min` and `max` with signed or unsigned comparison semantics.
-
-### 'tensor_cast' operation
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `tensor_cast` ssa-use `:` type `to` type
-```
-
-Examples:
-
-```mlir
-// Convert from unknown rank to rank 2 with unknown dimension sizes.
-%2 = "std.tensor_cast"(%1) : (tensor<*xf32>) -> tensor<?x?xf32>
-%2 = tensor_cast %1 : tensor<*xf32> to tensor<?x?xf32>
-
-// Convert to a type with more known dimensions.
-%3 = "std.tensor_cast"(%2) : (tensor<?x?xf32>) -> tensor<4x?xf32>
-
-// Discard static dimension and rank information.
-%4 = "std.tensor_cast"(%3) : (tensor<4x?xf32>) -> tensor<?x?xf32>
-%5 = "std.tensor_cast"(%4) : (tensor<?x?xf32>) -> tensor<*xf32>
-```
-
-Convert a tensor from one type to an equivalent type without changing any data
-elements. The source and destination types must both be tensor types with the
-same element type. If both are ranked, then the rank should be the same and
-static dimensions should match. The operation is invalid if converting to a
-mismatching constant dimension.
-
-### 'xor' operation
-
-Bitwise integer xor.
-
-Syntax:
-
-```
-operation ::= ssa-id `=` `xor` ssa-use, ssa-use `:` type
-```
-
-Examples:
-
-```mlir
-// Scalar integer bitwise xor.
-%a = xor %b, %c : i64
-
-// SIMD vector element-wise bitwise integer xor.
-%f = xor %g, %h : vector<4xi32>
-
-// Tensor element-wise bitwise integer xor.
-%x = xor %y, %z : tensor<4x?xi8>
-```
-
-The `xor` operation takes two operands and returns one result, each of these is
-required to be the same type. This type may be an integer scalar type, a vector
-whose element type is integer, or a tensor of integers. It has no standard
-attributes.
diff --git a/third_party/mlir/g3doc/Dialects/Vector.md b/third_party/mlir/g3doc/Dialects/Vector.md
deleted file mode 100644
index 04f5ba71cdb..00000000000
--- a/third_party/mlir/g3doc/Dialects/Vector.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Vector Dialect
-
-This dialect provides mid-level abstraction for the MLIR super-vectorizer.
-
-[TOC]
-
-## Operations
-
-# To see op documentation
-
-```sh
-mlir-tblgen --gen-op-doc -I /path/to/mlir/include \
-/path/to/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
-```
diff --git a/third_party/mlir/g3doc/EDSC.md b/third_party/mlir/g3doc/EDSC.md
deleted file mode 100644
index afceac2dfc1..00000000000
--- a/third_party/mlir/g3doc/EDSC.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# Background: declarative builders API
-
-The main purpose of the declarative builders API is to provide an intuitive way
-of constructing MLIR programmatically. In the majority of cases, the IR we wish
-to construct exhibits structured control-flow. Declarative builders provide an
-API to make MLIR construction and manipulation very idiomatic, for the
-structured control-flow case, in C++.
-
-## ScopedContext
-
-`mlir::edsc::ScopedContext` provides an implicit thread-local context,
-supporting a simple declarative API with globally accessible builders. These
-declarative builders are available within the lifetime of a `ScopedContext`.
-
-## ValueHandle and IndexHandle
-
-`mlir::edsc::ValueHandle` and `mlir::edsc::IndexHandle` provide typed
-abstractions around an `mlir::Value*`. These abstractions are "delayed", in the
-sense that they allow separating declaration from definition. They may
-capture IR snippets, as they are built, for programmatic manipulation.
-Intuitive operators are provided to allow concise and idiomatic expressions.
-
-```c++
-ValueHandle zero = constant_index(0);
-IndexHandle i, j, k;
-```
-
-## Intrinsics
-
-`mlir::edsc::ValueBuilder` is a generic wrapper for the `mlir::Builder::create`
-method that operates on `ValueHandle` objects and return a single ValueHandle.
-For instructions that return no values or that return multiple values, the
-`mlir::edsc::InstructionBuilder` can be used. Named intrinsics are provided as
-syntactic sugar to further reduce boilerplate.
-
-```c++
-using load = ValueBuilder<LoadOp>;
-using store = InstructionBuilder<StoreOp>;
-```
-
-## LoopBuilder and AffineLoopNestBuilder
-
-`mlir::edsc::AffineLoopNestBuilder` provides an interface to allow writing
-concise and structured loop nests.
-
-```c++
-  ScopedContext scope(f.get());
-  ValueHandle i(indexType),
-              j(indexType),
-              lb(f->getArgument(0)),
-              ub(f->getArgument(1));
-  ValueHandle f7(constant_float(llvm::APFloat(7.0f), f32Type)),
-              f13(constant_float(llvm::APFloat(13.0f), f32Type)),
-              i7(constant_int(7, 32)),
-              i13(constant_int(13, 32));
-  AffineLoopNestBuilder(&i, lb, ub, 3)([&]{
-      lb * index_t(3) + ub;
-      lb + index_t(3);
-      AffineLoopNestBuilder(&j, lb, ub, 2)([&]{
-          ceilDiv(index_t(31) * floorDiv(i + j * index_t(3), index_t(32)),
-                  index_t(32));
-          ((f7 + f13) / f7) % f13 - f7 * f13;
-          ((i7 + i13) / i7) % i13 - i7 * i13;
-      });
-  });
-```
-
-## IndexedValue
-
-`mlir::edsc::IndexedValue` provides an index notation around load and store
-operations on abstract data types by overloading the C++ assignment and
-parenthesis operators. The relevant loads and stores are emitted as appropriate.
-
-## Putting it all together
-
-With declarative builders, it becomes fairly concise to build rank and
-type-agnostic custom operations even though MLIR does not yet have generic
-types. Here is what a definition of a general pointwise add looks in
-Tablegen with declarative builders.
-
-```c++
-def AddOp : Op<"x.add">,
-    Arguments<(ins Tensor:$A, Tensor:$B)>,
-    Results<(outs Tensor: $C)> {
-  code referenceImplementation = [{
-    auto ivs = makeIndexHandles(view_A.rank());
-    auto pivs = makePIndexHandles(ivs);
-    IndexedValue A(arg_A), B(arg_B), C(arg_C);
-    AffineLoopNestBuilder(pivs, view_A.getLbs(), view_A.getUbs(), view_A.getSteps())(
-      [&]{
-        C(ivs) = A(ivs) + B(ivs)
-      });
-  }];
-}
-```
-
-Depending on the function signature on which this emitter is called, the
-generated IR resembles the following, for a 4-D memref of `vector<4xi8>`:
-
-```
-// CHECK-LABEL: func @t1(%lhs: memref<3x4x5x6xvector<4xi8>>, %rhs: memref<3x4x5x6xvector<4xi8>>, %result: memref<3x4x5x6xvector<4xi8>>) -> () {
-//       CHECK: affine.for {{.*}} = 0 to 3 {
-//       CHECK:   affine.for {{.*}} = 0 to 4 {
-//       CHECK:     affine.for {{.*}} = 0 to 5 {
-//       CHECK:       affine.for {{.*}}= 0 to 6 {
-//       CHECK:         {{.*}} = load %arg1[{{.*}}] : memref<3x4x5x6xvector<4xi8>>
-//       CHECK:         {{.*}} = load %arg0[{{.*}}] : memref<3x4x5x6xvector<4xi8>>
-//       CHECK:         {{.*}} = addi {{.*}} : vector<4xi8>
-//       CHECK:         store {{.*}}, %arg2[{{.*}}] : memref<3x4x5x6xvector<4xi8>>
-```
-
-or the following, for a 0-D `memref<f32>`:
-
-```
-// CHECK-LABEL: func @t3(%lhs: memref<f32>, %rhs: memref<f32>, %result: memref<f32>) -> () {
-//       CHECK: {{.*}} = load %arg1[] : memref<f32>
-//       CHECK: {{.*}} = load %arg0[] : memref<f32>
-//       CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
-//       CHECK: store {{.*}}, %arg2[] : memref<f32>
-```
-
-Similar APIs are provided to emit the lower-level `loop.for` op with
-`LoopNestBuilder`. See the `builder-api-test.cpp` test for more usage examples.
-
-Since the implementation of declarative builders is in C++, it is also available
-to program the IR with an embedded-DSL flavor directly integrated in MLIR. We
-make use of these properties in the tutorial.
-
-Spoiler: MLIR also provides Python bindings for these builders, and a
-full-fledged Python machine learning DSL with automatic differentiation
-targeting MLIR was built as an early research collaboration.
-
diff --git a/third_party/mlir/g3doc/GenericDAGRewriter.md b/third_party/mlir/g3doc/GenericDAGRewriter.md
deleted file mode 100644
index 3b26c22eb37..00000000000
--- a/third_party/mlir/g3doc/GenericDAGRewriter.md
+++ /dev/null
@@ -1,415 +0,0 @@
-# MLIR Generic DAG Rewriter Infrastructure
-
-## Introduction and Motivation
-
-The goal of a compiler IR is to represent code - at various levels of
-abstraction which pose different sets of tradeoffs in terms of representational
-capabilities and ease of transformation. However, the ability to represent code
-is not itself very useful - you also need to be able to implement those
-transformations.
-
-There are many different sorts of compiler transformations, but this document
-focuses on a particularly important class of transformation that comes up
-repeatedly at scale, and is important for the immediate goals of MLIR: that of
-pattern matching on a set of operations and replacing with another set. This is
-the key algorithm required to implement the "op fission" algorithm used by the
-tf2xla bridge, pattern matching rewrites from TF ops to TF/Lite, peephole
-optimizations like "eliminate identity nodes" or "replace x+0 with x", as well
-as a useful abstraction to implement optimization algorithms for MLIR graphs at
-all levels.
-
-A particular strength of MLIR (and a major difference vs other compiler
-infrastructures like LLVM, GCC, XLA, TensorFlow, etc) is that it uses a single
-compiler IR to represent code at multiple levels of abstraction: an MLIR
-operation can be a "TensorFlow operation", an "XLA HLO", a "TF Lite
-FlatBufferModel op", a TPU LLO instruction, an LLVM IR instruction (transitively
-including X86, Lanai, CUDA, and other target specific instructions), or anything
-else that the MLIR type system can reasonably express. Because MLIR spans such a
-wide range of different problems, a single infrastructure for performing
-graph-to-graph rewrites can help solve many diverse domain challenges, including
-TensorFlow graph level down to the machine code level.
-
-[Static single assignment](https://en.wikipedia.org/wiki/Static_single_assignment_form)
-(SSA) representations like MLIR make it easy to access the operands and "users"
-of an operation. As such, a natural abstraction for these graph-to-graph
-rewrites is that of DAG pattern matching: clients define DAG tile patterns, and
-each pattern includes a result DAG to produce and the cost of the result (or,
-inversely, the benefit of doing the replacement). A common infrastructure
-efficiently finds and perform the rewrites.
-
-While this concept is simple, the details are more nuanced. This proposal
-defines and explores a set of abstractions that we feel can solve a wide range
-of different problems, and can be applied to many different sorts of problems
-that MLIR is - and is expected to - face over time. We do this by separating the
-pattern definition and matching algorithm from the "driver" of the computation
-loop, and make space for the patterns to be defined declaratively in the future.
-
-## Related Work
-
-There is a huge amount of related work to consider, given that pretty much every
-compiler in existence has to solve this problem many times over. Here are a few
-graph rewrite systems we have used, along with the pros and cons of this related
-work. One unifying problem with all of these is that these systems are only
-trying to solve one particular and usually narrow problem: our proposal would
-like to solve many of these problems with a single infrastructure. Of these, the
-most similar design to our proposal is the LLVM DAG-to-DAG instruction selection
-algorithm at the end.
-
-### Constant folding
-
-A degenerate but pervasive case of DAG-to-DAG pattern matching is constant
-folding: given an operation whose operands contain constants can often be folded
-to a result constant value.
-
-MLIR already has constant folding routines which provide a simpler API than a
-general DAG-to-DAG pattern matcher, and we expect it to remain because the
-simpler contract makes it applicable in some cases that a generic matcher would
-not. For example, a DAG-rewrite can remove arbitrary nodes in the current
-function, which could invalidate iterators. Constant folding as an API does not
-remove any nodes, it just provides a (list of) constant values and allows the
-clients to update their data structures as necessary.
-
-### AST-Level Pattern Matchers
-
-The literature is full of source-to-source translators which transform
-identities in order to improve performance (e.g. transforming `X*0` into `0`).
-One large example that I'm aware of is the GCC `fold` function, which performs
-[many optimizations](https://github.com/gcc-mirror/gcc/blob/master/gcc/fold-const.c)
-on ASTs. Clang has
-[similar routines](http://releases.llvm.org/3.5.0/tools/clang/docs/InternalsManual.html#constant-folding-in-the-clang-ast)
-for simple constant folding of expressions (as required by the C++ standard) but
-doesn't perform general optimizations on its ASTs.
-
-The primary downside of tree optimizers is that you can't see across operations
-that have multiple uses. It is
-[well known in literature](https://llvm.org/pubs/2008-06-LCTES-ISelUsingSSAGraphs.pdf)
-that DAG pattern matching is more powerful than tree pattern matching, but OTOH,
-DAG pattern matching can lead to duplication of computation which needs to be
-checked for.
-
-### "Combiners" and other peephole optimizers
-
-Compilers end up with a lot of peephole optimizers for various things, e.g. the
-GCC
-["combine" routines](https://github.com/gcc-mirror/gcc/blob/master/gcc/combine.c)
-(which try to merge two machine instructions into a single one), the LLVM
-[Inst Combine](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/)
-[pass](https://llvm.org/docs/Passes.html#instcombine-combine-redundant-instructions),
-LLVM's
-[DAG Combiner](https://github.com/llvm-mirror/llvm/blob/master/lib/CodeGen/SelectionDAG/DAGCombiner.cpp),
-the Swift compiler's
-[SIL Combiner](https://github.com/apple/swift/tree/master/lib/SILOptimizer/SILCombiner),
-etc. These generally match one or more operations and produce zero or more
-operations as a result. The LLVM
-[Legalization](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/)
-infrastructure has a different outer loop but otherwise works the same way.
-
-These passes have a lot of diversity, but also have a unifying structure: they
-mostly have a worklist outer loop which visits operations. They then use the C++
-visitor pattern (or equivalent) to switch over the class of operation and
-dispatch to a method. That method contains a long list of hand-written C++ code
-that pattern-matches various special cases. LLVM introduced a "match" function
-that allows writing patterns in a somewhat more declarative style using template
-metaprogramming (MLIR has similar facilities). Here's a simple example:
-
-```c++
-  // Y - (X + 1) --> ~X + Y
-  if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One()))))
-    return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0);
-```
-
-Here is a somewhat more complicated one (this is not the biggest or most
-complicated :)
-
-```c++
-  // C2 is ODD
-  // LHS = XOR(Y,C1), Y = AND(Z,C2), C1==(C2+1) => LHS == NEG(OR(Z, ~C2))
-  // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
-  if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
-    if (C1->countTrailingZeros() == 0)
-      if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {
-        Value *NewOr = Builder.CreateOr(Z, ~(*C2));
-        return Builder.CreateSub(RHS, NewOr, "sub");
-      }
-```
-
-These systems are simple to set up, and pattern matching templates have some
-advantages (they are extensible for new sorts of sub-patterns, look compact at
-point of use). OTOH, they have lots of well known problems, for example:
-
-*   These patterns are very error prone to write, and contain lots of
-    redundancies.
-*   The IR being matched often has identities (e.g. when matching commutative
-    operators) and the C++ code has to handle it manually - take a look at
-    [the full code](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp?view=markup#l775)
-    for checkForNegativeOperand that defines the second pattern).
-*   The matching code compiles slowly, both because it generates tons of code
-    and because the templates instantiate slowly.
-*   Adding new patterns (e.g. for count leading zeros in the example above) is
-    awkward and doesn't often happen.
-*   The cost model for these patterns is not really defined - it is emergent
-    based on the order the patterns are matched in code.
-*   They are non-extensible without rebuilding the compiler.
-*   It isn't practical to apply theorem provers and other tools to these
-    patterns - they cannot be reused for other purposes.
-
-In addition to structured "combiners" like these, there are lots of ad-hoc
-systems like the
-[LLVM Machine code peephole optimizer](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?view=markup)
-which are related.
-
-### LLVM's DAG-to-DAG Instruction Selection Infrastructure
-
-The instruction selection subsystem in LLVM is the result of many years worth of
-iteration and discovery, driven by the need for LLVM to support code generation
-for lots of targets, the complexity of code generators for modern instruction
-sets (e.g. X86), and the fanatical pursuit of reusing code across targets. Eli
-wrote a
-[nice short overview](https://eli.thegreenplace.net/2013/02/25/a-deeper-look-into-the-llvm-code-generator-part-1)
-of how this works, and the
-[LLVM documentation](https://llvm.org/docs/CodeGenerator.html#select-instructions-from-dag)
-describes it in more depth including its advantages and limitations. It allows
-writing patterns like this.
-
-```
-def : Pat<(or GR64:$src, (not (add GR64:$src, 1))),
-          (BLCI64rr GR64:$src)>;
-```
-
-This example defines a matcher for the
-["blci" instruction](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_\(Trailing_Bit_Manipulation\))
-in the
-[X86 target description](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?view=markup),
-there are many others in that file (look for `Pat<>` patterns, since they aren't
-entangled in details of the compiler like assembler/disassembler generation
-logic).
-
-For our purposes, there is much to like about this system, for example:
-
-*   It is defined in a declarative format.
-*   It is extensible to target-defined operations.
-*   It automates matching across identities, like commutative patterns.
-*   It allows custom abstractions and intense factoring of target-specific
-    commonalities.
-*   It generates compact code - it compiles into a state machine, which is
-    interpreted.
-*   It allows the instruction patterns to be defined and reused for multiple
-    purposes.
-*   The patterns are "type checked" at compile time, detecting lots of bugs
-    early and eliminating redundancy from the pattern specifications.
-*   It allows the use of general C++ code for weird/complex cases.
-
-While there is a lot that is good here, there is also a lot of bad things:
-
-*   All of this machinery is only applicable to instruction selection. Even
-    directly adjacent problems like the DAGCombiner and Legalizer can't use it.
-*   This isn't extensible at compiler runtime, you have to rebuild the compiler
-    to extend it.
-*   The error messages when failing to match a pattern
-    [are not exactly optimal](https://www.google.com/search?q=llvm+cannot+select).
-*   It has lots of implementation problems and limitations (e.g. can't write a
-    pattern for a multi-result operation) as a result of working with the
-    awkward SelectionDAG representation and being designed and implemented
-    lazily.
-*   This stuff all grew organically over time and has lots of sharp edges.
-
-### Summary
-
-MLIR will face a wide range of pattern matching and graph rewrite problems, and
-one of the major advantages of having a common representation for code at
-multiple levels that it allows us to invest in - and highly leverage - a single
-infra for doing this sort of work.
-
-## Goals
-
-This proposal includes support for defining pattern matching and rewrite
-algorithms on MLIR. We'd like these algorithms to encompass many problems in the
-MLIR space, including 1-to-N expansions (e.g. as seen in the TF/XLA bridge when
-lowering a "tf.AddN" to multiple "add" HLOs), M-to-1 patterns (as seen in
-Grappler optimization passes, e.g. that convert multiple/add into a single
-muladd op), as well as general M-to-N patterns (e.g. instruction selection for
-target instructions). Patterns should have a cost associated with them, and the
-common infrastructure should be responsible for sorting out the lowest cost
-match for a given application.
-
-We separate the task of picking a particular locally optimal pattern from a
-given root node, the algorithm used to rewrite an entire graph given a
-particular set of goals, and the definition of the patterns themselves. We do
-this because DAG tile pattern matching is NP complete, which means that there
-are no known polynomial time algorithms to optimally solve this problem.
-Additionally, we would like to support iterative rewrite algorithms that
-progressively transform the input program through multiple steps. Furthermore,
-we would like to support many different sorts of clients across the MLIR stack,
-and they may have different tolerances for compile time cost, different demands
-for optimality, and other algorithmic goals or constraints.
-
-We aim for MLIR transformations to be easy to implement and reduce the
-likelihood for compiler bugs. We expect there to be a very very large number of
-patterns that are defined over time, and we believe that these sorts of patterns
-will have a very large number of legality/validity constraints - many of which
-are difficult to reason about in a consistent way, may be target specific, and
-whose implementation may be particularly bug-prone. As such, we aim to design the
-API around pattern definition to be simple, resilient to programmer errors, and
-allow separation of concerns between the legality of the nodes generated from
-the idea of the pattern being defined.
-
-Finally, error handling is a topmost concern: in addition to allowing patterns
-to be defined in a target-independent way that may not apply for all hardware,
-we also want failure for any pattern to match to be diagnosable in a reasonable
-way. To be clear, this is not a solvable problem in general - the space of
-malfunction is too great to be fully enumerated and handled optimally, but there
-are better and worse ways to handle the situation. MLIR is already designed to
-represent the provenance of an operation well. This project aims to propagate
-that provenance information precisely, as well as diagnose pattern match
-failures with the rationale for why a set of patterns do not apply.
-
-### Non goals
-
-This proposal doesn't aim to solve all compiler problems, it is simply a
-DAG-to-DAG pattern matching system, starting with a greedy driver algorithm.
-Compiler algorithms that require global dataflow analysis (e.g. common
-subexpression elimination, conditional constant propagation, and many many
-others) will not be directly solved by this infrastructure.
-
-This proposal is limited to DAG patterns, which (by definition) prevent the
-patterns from seeing across cycles in a graph. In an SSA-based IR like MLIR,
-this means that these patterns don't see across PHI nodes / basic block
-arguments. We consider this acceptable given the set of problems we are trying
-to solve - we don't know of any other system that attempts to do so, and
-consider the payoff of worrying about this to be low.
-
-This design includes the ability for DAG patterns to have associated costs
-(benefits), but those costs are defined in terms of magic numbers (typically
-equal to the number of nodes being replaced). For any given application, the
-units of magic numbers will have to be defined.
-
-## Overall design
-
-We decompose the problem into four major pieces:
-
-1.  the code that is used to define patterns to match, cost, and their
-    replacement actions
-1.  the driver logic to pick the best match for a given root node
-1.  the client that is implementing some transformation (e.g. a combiner)
-1.  (future) the subsystem that allows patterns to be described with a
-    declarative syntax, which sugars step #1.
-
-We sketch the first three of these pieces, each in turn. This is not intended to
-be a concrete API proposal, merely to describe the design
-
-### Defining Patterns
-
-Each pattern will be an instance of a mlir::Pattern class, whose subclasses
-implement methods like this. Note that this API is meant for exposition, the
-actual details are different for efficiency and coding standards reasons (e.g.
-the memory management of `PatternState` is not specified below, etc):
-
-```c++
-class Pattern {
-  /// Return the benefit (the inverse of "cost") of matching this pattern.  The
-  /// benefit of a Pattern is always static - rewrites that may have dynamic
-  /// benefit can be instantiated multiple times (different Pattern instances)
-  /// for each benefit that they may return, and be guarded by different match
-  /// condition predicates.
-  PatternBenefit getBenefit() const { return benefit; }
-
-  /// Return the root node that this pattern matches.  Patterns that can
-  /// match multiple root types are instantiated once per root.
-  OperationName getRootKind() const { return rootKind; }
-
-  /// Attempt to match against code rooted at the specified operation,
-  /// which is the same operation code as getRootKind().  On failure, this
-  /// returns a None value.  On success it a (possibly null) pattern-specific
-  /// state wrapped in a Some.  This state is passed back into its rewrite
-  /// function if this match is selected.
-  virtual Optional<PatternState*> match(Operation *op) const = 0;
-
-  /// Rewrite the IR rooted at the specified operation with the result of
-  /// this pattern, generating any new operations with the specified
-  /// rewriter.  If an unexpected error is encountered (an internal
-  /// compiler error), it is emitted through the normal MLIR diagnostic
-  /// hooks and the IR is left in a valid state.
-  virtual void rewrite(Operation *op, PatternState *state,
-                       PatternRewriter &rewriter) const;
-};
-```
-
-In practice, the first patterns we implement will directly subclass and
-implement this stuff, but we will define some helpers to reduce boilerplate.
-When we have a declarative way to describe patterns, this should be
-automatically generated from the description.
-
-Instances of `Pattern` have a benefit that is static upon construction of the
-pattern instance, but may be computed dynamically at pattern initialization
-time, e.g. allowing the benefit to be derived from domain specific information,
-like the target architecture). This limitation allows us MLIR to (eventually)
-perform pattern fusion and compile patterns into an efficient state machine, and
-[Thier, Ertl, and Krall](https://dl.acm.org/citation.cfm?id=3179501) have shown
-that match predicates eliminate the need for dynamically computed costs in
-almost all cases: you can simply instantiate the same pattern one time for each
-possible cost and use the predicate to guard the match.
-
-The two-phase nature of this API (match separate from rewrite) is important for
-two reasons: 1) some clients may want to explore different ways to tile the
-graph, and only rewrite after committing to one tiling. 2) We want to support
-runtime extensibility of the pattern sets, but want to be able to statically
-compile the bulk of known patterns into a state machine at "compiler compile
-time". Both of these reasons lead to us needing to match multiple patterns
-before committing to an answer.
-
-### Picking and performing a replacement
-
-In the short term, this API can be very simple, something like this can work and
-will be useful for many clients:
-
-```c++
-class PatternMatcher {
-   // Create a pattern matcher with a bunch of patterns.  This constructor
-   // looks across all of the specified patterns, and builds an internal
-   // data structure that allows efficient matching.
-   PatternMatcher(ArrayRef<Pattern*> patterns);
-
-   // Given a specific operation, see if there is some rewrite that is
-   // interesting.  If so, return success and return the list of new
-   // operations that were created.  If not, return failure.
-   bool matchAndRewrite(Operation *op,
-                        SmallVectorImpl<Operation*> &newlyCreatedOps);
-};
-```
-
-In practice the interesting part of this class is the acceleration structure it
-builds internally. It buckets up the patterns by root operation, and sorts them
-by their static benefit. When performing a match, it tests any dynamic patterns,
-then tests statically known patterns from highest to lowest benefit.
-
-### First Client: A Greedy Worklist Combiner
-
-We expect that there will be lots of clients for this, but a simple greedy
-worklist-driven combiner should be powerful enough to serve many important ones,
-including the
-[TF2XLA op expansion logic](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/tf2xla/kernels),
-many of the pattern substitution passes of the
-[TOCO compiler](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/toco)
-for TF-Lite, many
-[Grappler](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/grappler)
-passes, and other general performance optimizations for applying identities.
-
-The structure of this algorithm is straight-forward, here is pseudo code:
-
-*   Walk a function in preorder, adding each operation to a worklist.
-*   While the worklist is non-empty, pull something off the back (processing
-    things generally in postorder)
-    *   Perform matchAndRewrite on the operation. If failed, continue to the
-        next operation.
-    *   On success, add the newly created ops to the worklist and continue.
-
-## Future directions
-
-It is important to get implementation and usage experience with this, and many
-patterns can be defined using this sort of framework. Over time, we can look to
-make it easier to declare patterns in a declarative form (e.g. with the LLVM
-tblgen tool or something newer/better). Once we have that, we can define an
-internal abstraction for describing the patterns to match, allowing better high
-level optimization of patterns (including fusion of the matching logic across
-patterns, which the LLVM instruction selector does) and allow the patterns to be
-defined without rebuilding the compiler itself.
diff --git a/third_party/mlir/g3doc/Glossary.md b/third_party/mlir/g3doc/Glossary.md
deleted file mode 100644
index 542d3756ac7..00000000000
--- a/third_party/mlir/g3doc/Glossary.md
+++ /dev/null
@@ -1,174 +0,0 @@
-# MLIR Glossary
-
-This glossary contains definitions of MLIR-specific terminology. It is intended
-to be a quick reference document. For terms which are well-documented elsewhere,
-definitions are kept brief and the header links to the more in-depth
-documentation.
-
-<!-- When contributing, please ensure that entries remain in alphabetical order. -->
-
-#### [Block](LangRef.md#blocks)
-
-A sequential list of operations without control flow.
-
-Also called a [basic block](https://en.wikipedia.org/wiki/Basic_block).
-
-#### Conversion
-
-The transformation of code represented in one dialect into a semantically
-equivalent representation in another dialect (i.e. inter-dialect conversion) or
-the same dialect (i.e. intra-dialect conversion).
-
-In the context of MLIR, conversion is distinct from [translation](#translation).
-Conversion refers to a transformation between (or within) dialects, but all
-still within MLIR, whereas translation refers to a transformation between MLIR
-and an external representation.
-
-#### [Declarative Rewrite Rule](DeclarativeRewrites.md) (DRR)
-
-A [rewrite rule](https://en.wikipedia.org/wiki/Graph_rewriting) which can be
-defined declaratively (e.g. through specification in a
-[TableGen](https://llvm.org/docs/TableGen/) record). At compiler build time,
-these rules are expanded into an equivalent `mlir::RewritePattern` subclass.
-
-#### [Dialect](LangRef.md#dialects)
-
-A dialect is a grouping of functionality which can be used to extend the MLIR
-system.
-
-A dialect creates a unique `namespace` within which new
-[operations](#operation-op), [attributes](LangRef.md#attributes), and
-[types](LangRef.md#type-system) are defined. This is the fundamental method by
-which to extend MLIR.
-
-In this way, MLIR is a meta-IR: its extensible framework allows it to be
-leveraged in many different ways (e.g. at different levels of the compilation
-process). Dialects provide an abstraction for the different uses of MLIR while
-recognizing that they are all a part of the meta-IR that is MLIR.
-
-The tutorial provides an example of
-[interfacing with MLIR](Tutorials/Toy/Ch-2.md#interfacing-with-mlir) in this
-way.
-
-(Note that we have intentionally selected the term "dialect" instead of
-"language", as the latter would wrongly suggest that these different namespaces
-define entirely distinct IRs.)
-
-#### Export
-
-To transform code represented in MLIR into a semantically equivalent
-representation which is external to MLIR.
-
-The tool that performs such a transformation is called an exporter.
-
-See also: [translation](#translation).
-
-#### [Function](LangRef.md#functions)
-
-An [operation](#operation-op) with a name containing one [region](#region).
-
-The region of a function is not allowed to implicitly capture values defined
-outside of the function, and all external references must use function arguments
-or attributes that establish a symbolic connection.
-
-#### Import
-
-To transform code represented in an external representation into a semantically
-equivalent representation in MLIR.
-
-The tool that performs such a transformation is called an importer.
-
-See also: [translation](#translation).
-
-#### Legalization
-
-The process of transforming operations into a semantically equivalent
-representation which adheres to the requirements set by the
-[conversion target](DialectConversion.md#conversion-target).
-
-That is, legalization is accomplished if and only if the new representation
-contains only operations which are legal, as specified in the conversion target.
-
-#### Lowering
-
-The process of transforming a higher-level representation of an operation into a
-lower-level, but semantically equivalent, representation.
-
-In MLIR, this is typically accomplished through
-[dialect conversion](DialectConversion.md). This provides a framework by which
-to define the requirements of the lower-level representation, called the
-[conversion target](DialectConversion.md#conversion-target), by specifying which
-operations are legal versus illegal after lowering.
-
-See also: [legalization](#legalization).
-
-#### [Module](LangRef.md#module)
-
-An [operation](#operation-op) which contains a single region containing a single
-block that is comprised of operations.
-
-This provides an organizational structure for MLIR operations, and is the
-expected top-level operation in the IR: the textual parser returns a Module.
-
-#### [Operation](LangRef.md#operations) (op)
-
-A unit of code in MLIR. Operations are the building blocks for all code and
-computations represented by MLIR. They are fully extensible (there is no fixed
-list of operations) and have application-specific semantics.
-
-An operation can have zero or more [regions](#region). Note that this creates a
-nested IR structure, as regions consist of blocks, which in turn, consist of a
-list of operations.
-
-In MLIR, there are two main classes related to operations: `Operation` and `Op`.
-Operation is the actual opaque instance of the operation, and represents the
-general API into an operation instance. An `Op` is the base class of a derived
-operation, like `ConstantOp`, and acts as smart pointer wrapper around a
-`Operation*`
-
-#### [Region](LangRef.md#regions)
-
-A [CFG](https://en.wikipedia.org/wiki/Control-flow_graph) of MLIR
-[blocks](#block).
-
-#### Round-trip
-
-The process of converting from a source format to a target format and then back
-to the source format.
-
-This is a good way of gaining confidence that the target format richly models
-the source format. This is particularly relevant in the MLIR context, since
-MLIR's multi-level nature allows for easily writing target dialects that model a
-source format (such as TensorFlow GraphDef or another non-MLIR format)
-faithfully and have a simple conversion procedure. Further cleanup/lowering can
-be done entirely within the MLIR representation. This separation - making the
-[importer](#import) as simple as possible and performing all further
-cleanups/lowering in MLIR - has proven to be a useful design pattern.
-
-#### [Terminator operation](LangRef.md#terminator-operations)
-
-An [operation](#operation-op) which *must* terminate a [block](#block).
-Terminator operations are a special category of operations.
-
-#### Transitive lowering
-
-An A->B->C [lowering](#lowering); that is, a lowering in which multiple patterns
-may be applied in order to fully transform an illegal operation into a set of
-legal ones.
-
-This provides the flexibility that the [conversion](#conversion) framework may
-perform the lowering in multiple stages of applying patterns (which may utilize
-intermediate patterns not in the conversion target) in order to fully legalize
-an operation. This is accomplished through
-[partial conversion](DialectConversion.md#modes-of-conversion).
-
-#### Translation
-
-The transformation of code represented in an external (non-MLIR) representation
-into a semantically equivalent representation in MLIR (i.e.
-[importing](#import)), or the inverse (i.e. [exporting](#export)).
-
-In the context of MLIR, translation is distinct from [conversion](#conversion).
-Translation refers to a transformation between MLIR and an external
-representation, whereas conversion refers to a transformation within MLIR
-(between or within dialects).
diff --git a/third_party/mlir/g3doc/Interfaces.md b/third_party/mlir/g3doc/Interfaces.md
deleted file mode 100644
index f413cac28bb..00000000000
--- a/third_party/mlir/g3doc/Interfaces.md
+++ /dev/null
@@ -1,200 +0,0 @@
-# Introduction to MLIR Interfaces
-
-MLIR is generic and very extensible; it allows for opaquely representing many
-different dialects that have their own operations, attributes, types, and so on.
-This allows for dialects to be very expressive in their semantics and for MLIR
-to capture many different levels of abstraction. The downside to this is that
-transformations and analyses must be extremely conservative about the operations
-that they encounter, and must special-case the different dialects that they
-support. To combat this, MLIR provides the concept of `interfaces`.
-
-## Motivation
-
-Interfaces provide a generic way of interacting with the IR. The goal is to be
-able to express transformations/analyses in terms of these interfaces without
-encoding specific knowledge about the exact operation or dialect involved. This
-makes the compiler more extensible by allowing the addition of new dialects and
-operations in a decoupled way with respect to the implementation of
-transformations/analyses.
-
-### Dialect Interfaces
-
-Dialect interfaces are generally useful for transformation passes or analyses
-that want to opaquely operate on operations, even *across* dialects. These
-interfaces generally involve wide coverage over the entire dialect and are only
-used for a handful of transformations/analyses. In these cases, registering the
-interface directly on each operation is overly complex and cumbersome. The
-interface is not core to the operation, just to the specific transformation. An
-example of where this type of interface would be used is inlining. Inlining
-generally queries high-level information about the operations within a dialect,
-like legality and cost modeling, that often is not specific to one operation.
-
-A dialect interface can be defined by inheriting from the CRTP base class
-`DialectInterfaceBase::Base`. This class provides the necessary utilities for
-registering an interface with the dialect so that it can be looked up later.
-Once the interface has been defined, dialects can override it using
-dialect-specific information. The interfaces defined by a dialect are registered
-in a similar mechanism to Attributes, Operations, Types, etc.
-
-```c++
-/// Define an Inlining interface to allow for dialects to opt-in.
-class DialectInlinerInterface :
-    public DialectInterface::Base<DialectInlinerInterface> {
-public:
-  /// Returns true if the given region 'src' can be inlined into the region
-  /// 'dest' that is attached to an operation registered to the current dialect.
-  /// 'valueMapping' contains any remapped values from within the 'src' region.
-  /// This can be used to examine what values will replace entry arguments into
-  /// the 'src' region, for example.
-  virtual bool isLegalToInline(Region *dest, Region *src,
-                               BlockAndValueMapping &valueMapping) const {
-    return false;
-  }
-};
-
-/// Override the inliner interface to add support for inlining affine
-/// operations.
-struct AffineInlinerInterface : public DialectInlinerInterface {
-  /// Affine structures have specific inlining constraints.
-  bool isLegalToInline(Region *dest, Region *src,
-                       BlockAndValueMapping &valueMapping) const final {
-    ...
-  }
-};
-
-/// Register the interface with the dialect.
-AffineOpsDialect::AffineOpsDialect(MLIRContext *context) ... {
-  addInterfaces<AffineInlinerInterface>();
-}
-```
-
-Once registered, these interfaces can be opaquely queried from the dialect by
-the transformation/analysis that wants to use them:
-
-```c++
-Dialect *dialect = ...;
-if (auto *interface = dialect->getInterface<DialectInlinerInterface>())
-    ... // The dialect provides this interface.
-```
-
-#### DialectInterfaceCollections
-
-An additional utility is provided via DialectInterfaceCollection. This CRTP
-class allows for collecting all of the dialects that have registered a given
-interface within the context.
-
-```c++
-class InlinerInterface : public
-    DialectInterfaceCollection<DialectInlinerInterface> {
-  /// The hooks for this class mirror the hooks for the DialectInlinerInterface,
-  /// with default implementations that call the hook on the interface for a
-  /// given dialect.
-  virtual bool isLegalToInline(Region *dest, Region *src,
-                               BlockAndValueMapping &valueMapping) const {
-    auto *handler = getInterfaceFor(dest->getContainingOp());
-    return handler ? handler->isLegalToInline(dest, src, valueMapping) : false;
-  }
-};
-
-MLIRContext *ctx = ...;
-InlinerInterface interface(ctx);
-if(!interface.isLegalToInline(...))
-   ...
-```
-
-### Operation Interfaces
-
-Operation interfaces, as the name suggests, are those registered at the
-Operation level. These interfaces provide an opaque view into derived operations
-by providing a virtual interface that must be implemented. As an example, the
-`Linalg` dialect may implement an interface that provides general queries about
-some of the dialects library operations. These queries may provide things like:
-the number of parallel loops; the number of inputs and outputs; etc.
-
-Operation interfaces are defined by overriding the CRTP base class
-`OpInterface`. This class takes, as a template parameter, a `Traits` class that
-defines a `Concept` and a `Model` class. These classes provide an implementation
-of concept-based polymorphism, where the Concept defines a set of virtual
-methods that are overridden by the Model that is templated on the concrete
-operation type. It is important to note that these classes should be pure in
-that they contain no non-static data members. Operations that wish to override
-this interface should add the provided trait `OpInterface<..>::Trait` upon
-registration.
-
-```c++
-struct ExampleOpInterfaceTraits {
-  /// Define a base concept class that defines the virtual interface that needs
-  /// to be overridden.
-  struct Concept {
-    virtual ~Concept();
-    virtual unsigned getNumInputs(Operation *op) = 0;
-  };
-
-  /// Define a model class that specializes a concept on a given operation type.
-  template <typename OpT>
-  struct Model : public Concept {
-    /// Override the method to dispatch on the concrete operation.
-    unsigned getNumInputs(Operation *op) final {
-      return llvm::cast<OpT>(op).getNumInputs();
-    }
-  };
-};
-
-class ExampleOpInterface : public OpInterface<ExampleOpInterface,
-                                              ExampleOpInterfaceTraits> {
-public:
-  /// Use base class constructor to support LLVM-style casts.
-  using OpInterface<ExampleOpInterface, ExampleOpInterfaceTraits>::OpInterface;
-
-  /// The interface dispatches to 'getImpl()', an instance of the concept.
-  unsigned getNumInputs() {
-    return getImpl()->getNumInputs(getOperation());
-  }
-};
-
-```
-
-Once the interface has been defined, it is registered to an operation by adding
-the provided trait `ExampleOpInterface::Trait`. Using this interface is just
-like using any other derived operation type, i.e. casting:
-
-```c++
-/// When defining the operation, the interface is registered via the nested
-/// 'Trait' class provided by the 'OpInterface<>' base class.
-class MyOp : public Op<MyOp, ExampleOpInterface::Trait> {
-public:
-  /// The definition of the interface method on the derived operation.
-  unsigned getNumInputs() { return ...; }
-};
-
-/// Later, we can query if a specific operation(like 'MyOp') overrides the given
-/// interface.
-Operation *op = ...;
-if (ExampleOpInterface example = dyn_cast<ExampleOpInterface>(op))
-  llvm::errs() << "num inputs = " << example.getNumInputs() << "\n";
-```
-
-#### Utilizing the ODS Framework
-
-Operation interfaces require a bit of boiler plate to connect all of the pieces
-together. The ODS(Operation Definition Specification) framework provides
-simplified mechanisms for
-[defining interfaces](OpDefinitions.md#operation-interfaces).
-
-As an example, using the ODS framework would allow for defining the example
-interface above as:
-
-```tablegen
-def ExampleOpInterface : OpInterface<"ExampleOpInterface"> {
-  let description = [{
-    This is an example interface definition.
-  }];
-
-  let methods = [
-    InterfaceMethod<
-      "Get the number of inputs for the current operation.",
-      "unsigned", "getNumInputs"
-    >,
-  ];
-}
-```
diff --git a/third_party/mlir/g3doc/LangRef.md b/third_party/mlir/g3doc/LangRef.md
deleted file mode 100644
index cd6d3314c7c..00000000000
--- a/third_party/mlir/g3doc/LangRef.md
+++ /dev/null
@@ -1,1495 +0,0 @@
-# MLIR Specification
-
-MLIR (Multi-Level IR) is a compiler intermediate representation with
-similarities to traditional three-address SSA representations (like
-[LLVM IR](http://llvm.org/docs/LangRef.html) or
-[SIL](https://github.com/apple/swift/blob/master/docs/SIL.rst)), but which
-introduces notions from polyhedral loop optimization as first-class concepts.
-This hybrid design is optimized to represent, analyze, and transform high level
-dataflow graphs as well as target-specific code generated for high performance
-data parallel systems. Beyond its representational capabilities, its single
-continuous design provides a framework to lower from dataflow graphs to
-high-performance target-specific code.
-
-This document defines and describes the key concepts in MLIR, and is intended to
-be a dry reference document - the [rationale documentation](Rationale.md),
-[glossary](Glossary.md), and other content are hosted elsewhere.
-
-MLIR is designed to be used in three different forms: a human-readable textual
-form suitable for debugging, an in-memory form suitable for programmatic
-transformations and analysis, and a compact serialized form suitable for storage
-and transport. The different forms all describe the same semantic content. This
-document describes the human-readable textual form.
-
-[TOC]
-
-## High-Level Structure
-
-MLIR is an
-[SSA-based](https://en.wikipedia.org/wiki/Static_single_assignment_form) IR,
-which means that values are defined before use and have scope defined by their
-dominance relations. Operations may produce zero or more results, and each is a
-distinct SSA value with its own type defined by the [type system](#type-system).
-
-The unit of code in MLIR is an [Operation](#operations). Operations allow for
-representing many different concepts: allocating buffers, producing views to
-transform them, target-independent arithmetic, target-specific operations, and
-even arbitrary user-defined high-level operations including the
-[Module](#module) and [Function](#functions) operations. Operations may contain
-[Regions](#regions) that represent a Control Flow Graph (CFG) of
-[Blocks](#blocks), that contain operations and end with a
-[terminator operation](#terminator-operations) (like branches).
-
-Here's an example of an MLIR module:
-
-```mlir
-// Compute A*B using an implementation of multiply kernel and print the
-// result using a TensorFlow op. The dimensions of A and B are partially
-// known. The shapes are assumed to match.
-func @mul(%A: tensor<100x?xf32>, %B: tensor<?x50xf32>) -> (tensor<100x50xf32>) {
-  // Compute the inner dimension of %A using the dim operation.
-  %n = dim %A, 1 : tensor<100x?xf32>
-
-  // Allocate addressable "buffers" and copy tensors %A and %B into them.
-  %A_m = alloc(%n) : memref<100x?xf32>
-  tensor_store %A to %A_m : memref<100x?xf32>
-
-  %B_m = alloc(%n) : memref<?x50xf32>
-  tensor_store %B to %B_m : memref<?x50xf32>
-
-  // Call function @multiply passing memrefs as arguments,
-  // and getting returned the result of the multiplication.
-  %C_m = call @multiply(%A_m, %B_m)
-          : (memref<100x?xf32>, memref<?x50xf32>) -> (memref<100x50xf32>)
-
-  dealloc %A_m : memref<100x?xf32>
-  dealloc %B_m : memref<?x50xf32>
-
-  // Load the buffer data into a higher level "tensor" value.
-  %C = tensor_load %C_m : memref<100x50xf32>
-  dealloc %C_m : memref<100x50xf32>
-
-  // Call TensorFlow built-in function to print the result tensor.
-  "tf.Print"(%C){message: "mul result"}
-                  : (tensor<100x50xf32) -> (tensor<100x50xf32>)
-
-  return %C : tensor<100x50xf32>
-}
-
-// A function that multiplies two memrefs and returns the result.
-func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
-          -> (memref<100x50xf32>)  {
-  // Compute the inner dimension of %A.
-  %n = dim %A, 1 : memref<100x?xf32>
-
-  // Allocate memory for the multiplication result.
-  %C = alloc() : memref<100x50xf32>
-
-  // Multiplication loop nest.
-  affine.for %i = 0 to 100 {
-     affine.for %j = 0 to 50 {
-        store 0 to %C[%i, %j] : memref<100x50xf32>
-        affine.for %k = 0 to %n {
-           %a_v  = load %A[%i, %k] : memref<100x?xf32>
-           %b_v  = load %B[%k, %j] : memref<?x50xf32>
-           %prod = mulf %a_v, %b_v : f32
-           %c_v  = load %C[%i, %j] : memref<100x50xf32>
-           %sum  = addf %c_v, %prod : f32
-           store %sum, %C[%i, %j] : memref<100x50xf32>
-        }
-     }
-  }
-  return %C : memref<100x50xf32>
-}
-```
-
-## Notation
-
-MLIR has a simple and unambiguous grammar, allowing it to reliably round-trip
-through a textual form. This is important for development of the compiler - e.g.
-for understanding the state of code as it is being transformed and writing test
-cases.
-
-This document describes the grammar using
-[Extended Backus-Naur Form (EBNF)](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form).
-
-This is the EBNF grammar used in this document, presented in yellow boxes.
-
-```
-alternation ::= expr0 | expr1 | expr2  // Either expr0 or expr1 or expr2.
-sequence    ::= expr0 expr1 expr2      // Sequence of expr0 expr1 expr2.
-repetition0 ::= expr*  // 0 or more occurrences.
-repetition1 ::= expr+  // 1 or more occurrences.
-optionality ::= expr?  // 0 or 1 occurrence.
-grouping    ::= (expr) // Everything inside parens is grouped together.
-literal     ::= `abcd` // Matches the literal `abcd`.
-```
-
-Code examples are presented in blue boxes.
-
-```mlir
-// This is an example use of the grammar above:
-// This matches things like: ba, bana, boma, banana, banoma, bomana...
-example ::= `b` (`an` | `om`)* `a`
-```
-
-### Common syntax
-
-The following core grammar productions are used in this document:
-
-```
-// TODO: Clarify the split between lexing (tokens) and parsing (grammar).
-digit     ::= [0-9]
-hex_digit ::= [0-9a-fA-F]
-letter    ::= [a-zA-Z]
-id-punct  ::= [$._-]
-
-integer-literal ::= decimal-literal | hexadecimal-literal
-decimal-literal ::= digit+
-hexadecimal-literal ::= `0x` hex_digit+
-float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
-string-literal  ::= `"` [^"\n\f\v\r]* `"`   TODO define escaping rules
-```
-
-Not listed here, but MLIR does support comments. They use standard BCPL syntax,
-starting with a `//` and going until the end of the line.
-
-### Identifiers and keywords
-
-Syntax:
-
-```
-// Identifiers
-bare-id ::= (letter|[_]) (letter|digit|[_$.])*
-bare-id-list ::= bare-id (`,` bare-id)*
-ssa-id ::= `%` suffix-id
-suffix-id ::= (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
-
-symbol-ref-id ::= `@` (suffix-id | string-literal)
-ssa-id-list ::= ssa-id (`,` ssa-id)*
-
-// Uses of an SSA value, e.g. in an operand list to an operation.
-ssa-use ::= ssa-id
-ssa-use-list ::= ssa-use (`,` ssa-use)*
-```
-
-Identifiers name entities such as SSA values, types and functions, and are
-chosen by the writer of MLIR code. Identifiers may be descriptive (e.g.
-`%batch_size`, `@matmul`), or may be non-descriptive when they are
-auto-generated (e.g. `%23`, `@func42`). Identifier names for SSA values may be
-used in an MLIR text file but are not persisted as part of the IR - the printer
-will give them anonymous names like `%42`.
-
-MLIR guarantees identifiers never collide with keywords by prefixing identifiers
-with a sigil (e.g. `%`, `#`, `@`, `^`, `!`). In certain unambiguous contexts
-(e.g. affine expressions), identifiers are not prefixed, for brevity. New
-keywords may be added to future versions of MLIR without danger of collision
-with existing identifiers.
-
-The scope of SSA values is defined based on the standard definition of
-[dominance](https://en.wikipedia.org/wiki/Dominator_\(graph_theory\)). Argument
-identifiers in mapping functions are in scope for the mapping body. Function
-identifiers and mapping identifiers are visible across the entire module.
-
-## Dialects
-
-Dialects are the mechanism by which to engage with and extend the MLIR
-ecosystem. They allow for defining new [operations](#operations), as well as
-[attributes](#attributes) and [types](#type-system). Each dialect is given a
-unique `namespace` that is prefixed to each defined attribute/operation/type.
-For example, the [Affine dialect](Dialects/Affine.md) defines the namespace:
-`affine`.
-
-MLIR allows for multiple dialects, even those outside of the main tree, to
-co-exist together within one module. Dialects are produced and consumed by
-certain passes. MLIR provides a [framework](DialectConversion.md) to convert
-between, and within, different dialects.
-
-A few of the dialects supported by MLIR:
-
-*   [Affine dialect](Dialects/Affine.md)
-*   [GPU dialect](Dialects/GPU.md)
-*   [LLVM dialect](Dialects/LLVM.md)
-*   [SPIR-V dialect](Dialects/SPIR-V.md)
-*   [Standard dialect](Dialects/Standard.md)
-*   [Vector dialect](Dialects/Vector.md)
-
-### Target specific operations
-
-Dialects provide a modular way in which targets can expose target-specific
-operations directly through to MLIR. As an example, some targets go through
-LLVM. LLVM has a rich set of intrinsics for certain target-independent
-operations (e.g. addition with overflow check) as well as providing access to
-target-specific operations for the targets it supports (e.g. vector permutation
-operations). LLVM intrinsics in MLIR are represented via operations that start
-with an "llvm." name.
-
-Example:
-
-```mlir
-// LLVM: %x = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
-%x:2 = "llvm.sadd.with.overflow.i16"(%a, %b) : (i16, i16) -> (i16, i1)
-```
-
-These operations only work when targeting LLVM as a backend (e.g. for CPUs and
-GPUs), and are required to align with the LLVM definition of these intrinsics.
-
-## Operations
-
-Syntax:
-
-```
-operation         ::= op-result-list? (generic-operation | custom-operation)
-                      trailing-location?
-generic-operation ::= string-literal '(' ssa-use-list? ')' attribute-dict?
-                      `:` function-type
-custom-operation  ::= bare-id custom-operation-format
-op-result-list    ::= op-result (`,` op-result)* `=`
-op-result         ::= ssa-id (`:` integer-literal)
-successor-list    ::= successor (`,` successor)*
-successor         ::= caret-id (`:` bb-arg-list)?
-region-list       ::= region (`,` region)*
-trailing-location ::= (`loc` `(` location `)`)?
-```
-
-MLIR introduces a uniform concept called _operations_ to enable describing many
-different levels of abstractions and computations. Operations in MLIR are fully
-extensible (there is no fixed list of operations) and have application-specific
-semantics. For example, MLIR supports
-[target-independent operations](Dialects/Standard.md#memory-operations),
-[affine operations](Dialects/Affine.md), and
-[target-specific machine operations](#target-specific-operations).
-
-The internal representation of an operation is simple: an operation is
-identified by a unique string (e.g. `dim`, `tf.Conv2d`, `x86.repmovsb`,
-`ppc.eieio`, etc), can return zero or more results, take zero or more SSA
-operands, may have zero or more attributes, may have zero or more successors,
-and zero or more enclosed [regions](#regions). The generic printing form
-includes all these elements literally, with a function type to indicate the
-types of the results and operands.
-
-Example:
-
-```mlir
-// An operation that produces two results.
-// The results of %result can be accessed via the <name> `#` <opNo> syntax.
-%result:2 = "foo_div"() : () -> (f32, i32)
-
-// Pretty form that defines a unique name for each result.
-%foo, %bar = "foo_div"() : () -> (f32, i32)
-
-// Invoke a TensorFlow function called tf.scramble with two inputs
-// and an attribute "fruit".
-%2 = "tf.scramble"(%result#0, %bar) {fruit: "banana"} : (f32, i32) -> f32
-```
-
-In addition to the basic syntax above, dialects may register known operations.
-This allows those dialects to support _custom assembly form_ for parsing and
-printing operations. In the operation sets listed below, we show both forms.
-
-### Terminator Operations
-
-These are a special category of operations that *must* terminate a block, e.g.
-[branches](Dialects/Standard.md#terminator-operations). These operations may
-also have a list of successors ([blocks](#blocks) and their arguments).
-
-Example:
-
-```mlir
-// Branch to ^bb1 or ^bb2 depending on the condition %cond.
-// Pass value %v to ^bb2, but not to ^bb1.
-"cond_br"(%cond)[^bb1, ^bb2(%v : index)] : (i1) -> ()
-```
-
-### Module
-
-```
-module ::= `module` symbol-ref-id? (`attributes` attribute-dict)? region
-```
-
-An MLIR module represents an opaque top-level container operation. It contains a
-single region containing a single block that is comprised of any operations.
-Operations within this region must not implicitly capture values defined above
-it. Modules have an optional symbol name that can be used to refer to them in
-operations.
-
-### Functions
-
-An MLIR Function is an operation with a name containing one [region](#regions).
-The region of a function is not allowed to implicitly capture values defined
-outside of the function, and all external references must use function arguments
-or attributes that establish a symbolic connection (e.g. symbols referenced by
-name via a string attribute like [SymbolRefAttr](#symbol-reference-attribute)):
-
-```
-function ::= `func` function-signature function-attributes? function-body?
-
-function-signature ::= symbol-ref-id `(` argument-list `)`
-                       (`->` function-result-list)?
-
-argument-list ::= (named-argument (`,` named-argument)*) | /*empty*/
-argument-list ::= (type attribute-dict? (`,` type attribute-dict?)*) | /*empty*/
-named-argument ::= ssa-id `:` type attribute-dict?
-
-function-result-list ::= function-result-list-parens
-                       | non-function-type
-function-result-list-parens ::= `(` `)`
-                              | `(` function-result-list-no-parens `)`
-function-result-list-no-parens ::= function-result (`,` function-result)*
-function-result ::= type attribute-dict?
-
-function-attributes ::= `attributes` attribute-dict
-function-body ::= region
-```
-
-An external function declaration (used when referring to a function declared in
-some other module) has no body. While the MLIR textual form provides a nice
-inline syntax for function arguments, they are internally represented as "block
-arguments" to the first block in the region.
-
-Only dialect attribute names may be specified in the attribute dictionaries for
-function arguments, results, or the function itself.
-
-Examples:
-
-```mlir
-// External function definitions.
-func @abort()
-func @scribble(i32, i64, memref<? x 128 x f32, #layout_map0>) -> f64
-
-// A function that returns its argument twice:
-func @count(%x: i64) -> (i64, i64)
-  attributes {fruit: "banana"} {
-  return %x, %x: i64, i64
-}
-
-// A function with an argument attribute
-func @example_fn_arg(%x: i32 {swift.self = unit})
-
-// A function with a result attribute
-func @example_fn_result() -> (f64 {dialectName.attrName = 0 : i64})
-
-// A function with an attribute
-func @example_fn_attr() attributes {dialectName.attrName = false}
-```
-
-## Blocks
-
-Syntax:
-
-```
-block           ::= block-label operation+
-block-label     ::= block-id block-arg-list? `:`
-block-id        ::= caret-id
-caret-id        ::= `^` suffix-id
-ssa-id-and-type ::= ssa-id `:` type
-
-// Non-empty list of names and types.
-ssa-id-and-type-list ::= ssa-id-and-type (`,` ssa-id-and-type)*
-
-block-arg-list ::= `(` ssa-id-and-type-list? `)`
-```
-
-A [block](https://en.wikipedia.org/wiki/Basic_block) is a sequential list of
-operations without control flow (calls are not considered control flow for this
-purpose) that are executed from top to bottom. The last operation in a block is
-a [terminator operation](#terminator-operations), which ends the block.
-
-Blocks in MLIR take a list of block arguments, which represent SSA PHI nodes in
-a functional notation. The arguments are defined by the block, and values are
-provided for these block arguments by branches that go to the block.
-
-Here is a simple example function showing branches, returns, and block
-arguments:
-
-```mlir
-func @simple(i64, i1) -> i64 {
-^bb0(%a: i64, %cond: i1): // Code dominated by ^bb0 may refer to %a
-  cond_br %cond, ^bb1, ^bb2
-
-^bb1:
-  br ^bb3(%a: i64)    // Branch passes %a as the argument
-
-^bb2:
-  %b = addi %a, %a : i64
-  br ^bb3(%b: i64)    // Branch passes %b as the argument
-
-// ^bb3 receives an argument, named %c, from predecessors
-// and passes it on to bb4 twice.
-^bb3(%c: i64):
-  br ^bb4(%c, %c : i64, i64)
-
-^bb4(%d : i64, %e : i64):
-  %0 = addi %d, %e : i64
-  return %0 : i64
-}
-```
-
-**Context:** The "block argument" representation eliminates a number of special
-cases from the IR compared to traditional "PHI nodes are operations" SSA IRs
-(like LLVM). For example, the
-[parallel copy semantics](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.524.5461&rep=rep1&type=pdf)
-of SSA is immediately apparent, and function arguments are no longer a special
-case: they become arguments to the entry block
-[[more rationale](Rationale.md#block-arguments-vs-phi-nodes)].
-
-## Regions
-
-### Definition
-
-A region is a CFG of MLIR [Blocks](#blocks). Regions serve to group semantically
-connected blocks, where the semantics is not imposed by the IR. Instead, the
-containing operation defines the semantics of the regions it contains. Regions
-do not have a name or an address, only the blocks contained in a region do.
-Regions are meaningless outside of the containing entity and have no type or
-attributes.
-
-The first block in the region cannot be a successor of any other block. The
-syntax for the region is as follows:
-
-```
-region ::= `{` block* `}`
-```
-
-The function body is an example of a region: it consists of a CFG of blocks and
-has additional semantic restrictions that other types of regions may not have
-(block terminators must either branch to a different block, or return from a
-function where the types of the `return` arguments must match the result types
-of the function signature).
-
-### Control and Value Scoping
-
-Regions provide nested control isolation: it is impossible to branch to a block
-within a region from outside it or to branch from within a region to a block
-outside it. Similarly, it provides a natural scoping for value visibility: SSA
-values defined in a region don't escape to the enclosing region, if any. By
-default, a region can reference values defined outside of the region whenever it
-would have been legal to use them as operands to the enclosing operation.
-
-Example:
-
-```mlir
-func @accelerator_compute(i64, i1) -> i64 {
-^bb0(%a: i64, %cond: i1): // Code dominated by ^bb0 may refer to %a
-  cond_br %cond, ^bb1, ^bb2
-
-^bb1:
-  // This def for %value does not dominate ^bb2
-  %value = "op.convert"(%a) : (i64) -> i64
-  br ^bb3(%a: i64)    // Branch passes %a as the argument
-
-^bb2:
-  "accelerator.launch"() {
-    ^bb0:
-      // Region of code nested under "accelerator.launch", it can reference %a but
-      // not %value.
-      %new_value = "accelerator.do_something"(%a) : (i64) -> ()
-  }
-  // %new_value cannot be referenced outside of the region
-
-^bb3:
-  ...
-}
-```
-
-This can be further restricted using the custom verifier associated with the
-enclosing operation, for example, disallowing references to values defined
-outside the region completely.
-
-### Control Flow
-
-Regions are Single-Entry-Multiple-Exit (SEME). This means that control can only
-flow into the first block of the region, but can flow out of the region at the
-end of any of the contained blocks (This behavior is similar to that of a
-function body in most programming languages). When exiting a Region, control is
-returned to the enclosing operation.
-
-The enclosing operation determines the way in which control is transmitted into
-the entry block of a Region. The successor to a region’s exit points may not
-necessarily exist: for example a call to a function that does not return.
-Concurrent or asynchronous execution of regions is unspecified. Operations may
-define specific rules of execution, e.g. sequential loops or switch cases.
-
-A Region may also enter another region within the enclosing operation. If an
-operation has multiple regions, the semantics of the operation defines into
-which regions the control flows and in which order, if any. An operation may
-transmit control into regions that were specified in other operations, in
-particular those that defined the values the given operation uses. Thus such
-operations can be treated opaquely in the enclosing control flow graph,
-providing a level of control flow isolation similar to that of the call
-operation.
-
-#### Closure
-
-Regions allow defining an operation that creates a closure, for example by
-“boxing” the body of the region into a value they produce. It remains up to the
-operation to define its semantics. Note that if an operation triggers
-asynchronous execution of the region, it is under the responsibility of the
-operation caller to wait for the region to be executed guaranteeing that any
-directly used values remain live.
-
-### Arguments and Results
-
-The arguments of the first block of a region are treated as arguments of the
-region. The source of these arguments is defined by the semantics of the parent
-operation. They may correspond to some of the values the operation itself uses.
-
-Regions produce a (possibly empty) list of values. The operation semantics
-defines the relation between the region results and the operation results.
-
-## Type System
-
-Each SSA value in MLIR has a type defined by the type system below. There are a
-number of primitive types (like integers) and also aggregate types for tensors
-and memory buffers. MLIR [standard types](#standard-types) do not include
-structures, arrays, or dictionaries.
-
-MLIR has an open type system (i.e. there is no fixed list of types), and types
-may have application-specific semantics. For example, MLIR supports a set of
-[dialect types](#dialect-types).
-
-```
-type ::= type-alias | dialect-type | standard-type
-
-type-list-no-parens ::=  type (`,` type)*
-type-list-parens ::= `(` `)`
-                   | `(` type-list-no-parens `)`
-
-// This is a common way to refer to an SSA value with a specified type.
-ssa-use-and-type ::= ssa-use `:` type
-
-// Non-empty list of names and types.
-ssa-use-and-type-list ::= ssa-use-and-type (`,` ssa-use-and-type)*
-```
-
-### Type Aliases
-
-```
-type-alias-def ::= '!' alias-name '=' 'type' type
-type-alias ::= '!' alias-name
-```
-
-MLIR supports defining named aliases for types. A type alias is an identifier
-that can be used in the place of the type that it defines. These aliases *must*
-be defined before their uses. Alias names may not contain a '.', since those
-names are reserved for [dialect types](#dialect-types).
-
-Example:
-
-```mlir
-!avx_m128 = type vector<4 x f32>
-
-// Using the original type.
-"foo"(%x) : vector<4 x f32> -> ()
-
-// Using the type alias.
-"foo"(%x) : !avx_m128 -> ()
-```
-
-### Dialect Types
-
-Similarly to operations, dialects may define custom extensions to the type
-system.
-
-```
-dialect-namespace ::= bare-id
-
-opaque-dialect-item ::= dialect-namespace '<' string-literal '>'
-
-pretty-dialect-item ::= dialect-namespace '.' pretty-dialect-item-lead-ident
-                                              pretty-dialect-item-body?
-
-pretty-dialect-item-lead-ident ::= '[A-Za-z][A-Za-z0-9._]*'
-pretty-dialect-item-body ::= '<' pretty-dialect-item-contents+ '>'
-pretty-dialect-item-contents ::= pretty-dialect-item-body
-                              | '(' pretty-dialect-item-contents+ ')'
-                              | '[' pretty-dialect-item-contents+ ']'
-                              | '{' pretty-dialect-item-contents+ '}'
-                              | '[^[<({>\])}\0]+'
-
-dialect-type ::= '!' opaque-dialect-item
-dialect-type ::= '!' pretty-dialect-item
-```
-
-Dialect types can be specified in a verbose form, e.g. like this:
-
-```mlir
-// LLVM type that wraps around llvm IR types.
-!llvm<"i32*">
-
-// Tensor flow string type.
-!tf.string
-
-// Complex type
-!foo<"something<abcd>">
-
-// Even more complex type
-!foo<"something<a%%123^^^>>>">
-```
-
-Dialect types that are simple enough can use the pretty format, which is a
-lighter weight syntax that is equivalent to the above forms:
-
-```mlir
-// Tensor flow string type.
-!tf.string
-
-// Complex type
-!foo.something<abcd>
-```
-
-Sufficiently complex dialect types are required to use the verbose form for
-generality. For example, the more complex type shown above wouldn't be valid in
-the lighter syntax: `!foo.something<a%%123^^^>>>` because it contains characters
-that are not allowed in the lighter syntax, as well as unbalanced `<>`
-characters.
-
-See [here](DefiningAttributesAndTypes.md) to learn how to define dialect types.
-
-### Standard Types
-
-Standard types are a core set of [dialect types](#dialect-types) that are
-defined in a builtin dialect and thus available to all users of MLIR.
-
-```
-standard-type ::=     complex-type
-                    | float-type
-                    | function-type
-                    | index-type
-                    | integer-type
-                    | memref-type
-                    | none-type
-                    | tensor-type
-                    | tuple-type
-                    | vector-type
-```
-
-#### Complex Type
-
-Syntax:
-
-```
-complex-type ::= `complex` `<` type `>`
-```
-
-The value of `complex` type represents a complex number with a parameterized
-element type, which is composed of a real and imaginary value of that element
-type. The element must be a floating point or integer scalar type.
-
-Examples:
-
-```mlir
-complex<f32>
-complex<i32>
-```
-
-#### Floating Point Types
-
-Syntax:
-
-```
-// Floating point.
-float-type ::= `f16` | `bf16` | `f32` | `f64`
-```
-
-MLIR supports float types of certain widths that are widely used as indicated
-above.
-
-#### Function Type
-
-Syntax:
-
-```
-// MLIR functions can return multiple values.
-function-result-type ::= type-list-parens
-                       | non-function-type
-
-function-type ::= type-list-parens `->` function-result-type
-```
-
-MLIR supports first-class functions: for example, the
-[`constant` operation](Dialects/Standard.md#constant-operation) produces the
-address of a function as an SSA value. This SSA value may be passed to and
-returned from functions, merged across control flow boundaries with
-[block arguments](#blocks), and called with the
-[`call_indirect` operation](Dialects/Standard.md#call-indirect-operation).
-
-Function types are also used to indicate the arguments and results of
-[operations](#operations).
-
-#### Index Type
-
-Syntax:
-
-```
-// Target word-sized integer.
-index-type ::= `index`
-```
-
-The `index` type is a signless integer whose size is equal to the natural
-machine word of the target ([rationale](Rationale.md#signless-types)) and is
-used by the affine constructs in MLIR. Unlike fixed-size integers, it cannot be
-used as an element of vector, tensor or memref type
-([rationale](Rationale.md#index-type-disallowed-in-vectortensormemref-types)).
-
-**Rationale:** integers of platform-specific bit widths are practical to express
-sizes, dimensionalities and subscripts.
-
-#### Integer Type
-
-Syntax:
-
-```
-// Sized integers like i1, i4, i8, i16, i32.
-integer-type ::= `i` [1-9][0-9]*
-```
-
-MLIR supports arbitrary precision integer types. Integer types are signless, but
-have a designated width.
-
-**Rationale:** low precision integers (like `i2`, `i4` etc) are useful for
-low-precision inference chips, and arbitrary precision integers are useful for
-hardware synthesis (where a 13 bit multiplier is a lot cheaper/smaller than a 16
-bit one).
-
-TODO: Need to decide on a representation for quantized integers
-([initial thoughts](Rationale.md#quantized-integer-operations)).
-
-#### Memref Type
-
-Syntax:
-
-```
-
-memref-type ::= ranked-memref-type | unranked-memref-type
-
-ranked-memref-type ::= `memref` `<` dimension-list-ranked tensor-memref-element-type
-                      (`,` layout-specification)? |
-                      (`,` memory-space)? `>`
-
-unranked-memref-type ::= `memref` `<*x` tensor-memref-element-type
-                         (`,` memory-space)? `>`
-
-stride-list ::= `[` (dimension (`,` dimension)*)? `]`
-strided-layout ::= `offset:` dimension `,` `strides: ` stride-list
-layout-specification ::= semi-affine-map | strided-layout
-memory-space ::= integer-literal /* | TODO: address-space-id */
-```
-
-A `memref` type is a reference to a region of memory (similar to a buffer
-pointer, but more powerful). The buffer pointed to by a memref can be allocated,
-aliased and deallocated. A memref can be used to read and write data from/to the
-memory region which it references. Memref types use the same shape specifier as
-tensor types. Note that `memref<f32>`, `memref<0 x f32>`, `memref<1 x 0 x f32>`,
-and `memref<0 x 1 x f32>` are all different types.
-
-A `memref` is allowed to have an unknown rank (e.g. `memref<*xf32>`). The
-purpose of unranked memrefs is to allow external library functions to receive
-memref arguments of any rank without versioning the functions based on the rank.
-Other uses of this type are disallowed or will have undefined behavior.
-
-##### Codegen of Unranked Memref
-
-Using unranked memref in codegen besides the case mentioned above is highly
-discouraged. Codegen is concerned with generating loop nests and specialized
-instructions for high-performance, unranked memref is concerned with hiding the
-rank and thus, the number of enclosing loops required to iterate over the data.
-However, if there is a need to code-gen unranked memref, one possible path is to
-cast into a static ranked type based on the dynamic rank. Another possible path
-is to emit a single while loop conditioned on a linear index and perform
-delinearization of the linear index to a dynamic array containing the (unranked)
-indices. While this is possible, it is expected to not be a good idea to perform
-this during codegen as the cost of the translations is expected to be
-prohibitive and optimizations at this level are not expected to be worthwhile.
-If expressiveness is the main concern, irrespective of performance, passing
-unranked memrefs to an external C++ library and implementing rank-agnostic logic
-there is expected to be significantly simpler.
-
-Unranked memrefs may provide expressiveness gains in the future and help bridge
-the gap with unranked tensors. Unranked memrefs will not be expected to be
-exposed to codegen but one may query the rank of an unranked memref (a special
-op will be needed for this purpose) and perform a switch and cast to a ranked
-memref as a prerequisite to codegen.
-
-Example ```mlir // With static ranks, we need a function for each // possible
-argument type %A = alloc() : memref<16x32xf32> %B = alloc() :
-memref<16x32x64xf32> call @helper_2D(%A) : (memref<16x32xf32>)->() call
-@helper_3D(%B) : (memref<16x32x64xf32>)->()
-
-// With unknown rank, the functions can be unified under one unranked type 
-%A = alloc() : memref<16x32xf32>
-%B = alloc() : memref<16x32x64xf32>
-// Remove rank info
-%A_u = memref_cast %A : memref<16x32xf32> -> memref<*xf32>
-%B_u = memref_cast %B : memref<16x32x64xf32> -> memref<*xf32>
-// call same function with dynamic ranks 
-call @helper(%A_u) : (memref<*xf32>)->()
-call @helper(%B_u) : (memref<*xf32>)->() 
-```
-
-The core syntax and representation of a layout specification is a
-[semi-affine map](Dialects/Affine.md#semi-affine-maps). Additionally, syntactic
-sugar is supported to make certain layout specifications more intuitive to read.
-For the moment, a `memref` supports parsing a strided form which is converted to
-a semi-affine map automatically.
-
-The memory space of a memref is specified by a target-specific integer index. If
-no memory space is specified, then the default memory space (0) is used. The
-default space is target specific but always at index 0.
-
-TODO: MLIR will eventually have target-dialects which allow symbolic use of
-memory hierarchy names (e.g. L3, L2, L1, ...) but we have not spec'd the details
-of that mechanism yet. Until then, this document pretends that it is valid to
-refer to these memories by `bare-id`.
-
-The notionally dynamic value of a memref value includes the address of the
-buffer allocated, as well as the symbols referred to by the shape, layout map,
-and index maps.
-
-Examples of memref static type
-
-```mlir
-// Identity index/layout map
-#identity = (d0, d1) -> (d0, d1)
-
-// Column major layout.
-#col_major = (d0, d1, d2) -> (d2, d1, d0)
-
-// A 2-d tiled layout with tiles of size 128 x 256.
-#tiled_2d_128x256 = (d0, d1) -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)
-
-// A tiled data layout with non-constant tile sizes.
-#tiled_dynamic = (d0, d1)[s0, s1] -> (d0 floordiv s0, d1 floordiv s1,
-                              d0 mod s0, d1 mod s1)
-
-// A layout that yields a padding on two at either end of the minor dimension.
-#padded = (d0, d1) -> (d0, (d1 + 2) floordiv 2, (d1 + 2) mod 2)
-
-
-// The dimension list "16x32" defines the following 2D index space:
-//
-//   { (i, j) : 0 <= i < 16, 0 <= j < 32 }
-//
-memref<16x32xf32, #identity, memspace0>
-
-// The dimension list "16x4x?" defines the following 3D index space:
-//
-//   { (i, j, k) : 0 <= i < 16, 0 <= j < 4, 0 <= k < N }
-//
-// where N is a symbol which represents the runtime value of the size of
-// the third dimension.
-//
-// %N here binds to the size of the third dimension.
-%A = alloc(%N) : memref<16x4x?xf32, #col_major, memspace0>
-
-// A 2-d dynamic shaped memref that also has a dynamically sized tiled layout.
-// The memref index space is of size %M x %N, while %B1 and %B2 bind to the
-// symbols s0, s1 respectively of the layout map #tiled_dynamic. Data tiles of
-// size %B1 x %B2 in the logical space will be stored contiguously in memory.
-// The allocation size will be (%M ceildiv %B1) * %B1 * (%N ceildiv %B2) * %B2
-// f32 elements.
-%T = alloc(%M, %N) [%B1, %B2] : memref<?x?xf32, #tiled_dynamic>
-
-// A memref that has a two-element padding at either end. The allocation size
-// will fit 16 * 68 float elements of data.
-%P = alloc() : memref<16x64xf32, #padded>
-
-// Affine map with symbol 's0' used as offset for the first dimension.
-#imapS = (d0, d1) [s0] -> (d0 + s0, d1)
-// Allocate memref and bind the following symbols:
-// '%n' is bound to the dynamic second dimension of the memref type.
-// '%o' is bound to the symbol 's0' in the affine map of the memref type.
-%n = ...
-%o = ...
-%A = alloc (%n)[%o] : <16x?xf32, #imapS>
-```
-
-##### Index Space
-
-A memref dimension list defines an index space within which the memref can be
-indexed to access data.
-
-##### Index
-
-Data is accessed through a memref type using a multidimensional index into the
-multidimensional index space defined by the memref's dimension list.
-
-Examples
-
-```mlir
-// Allocates a memref with 2D index space:
-//   { (i, j) : 0 <= i < 16, 0 <= j < 32 }
-%A = alloc() : memref<16x32xf32, #imapA, memspace0>
-
-// Loads data from memref '%A' using a 2D index: (%i, %j)
-%v = load %A[%i, %j] : memref<16x32xf32, #imapA, memspace0>
-```
-
-##### Index Map
-
-An index map is a one-to-one
-[semi-affine map](Dialects/Affine.md#semi-affine-maps) that transforms a
-multidimensional index from one index space to another. For example, the
-following figure shows an index map which maps a 2-dimensional index from a 2x2
-index space to a 3x3 index space, using symbols `S0` and `S1` as offsets.
-
-![Index Map Example](includes/img/index-map.svg)
-
-The number of domain dimensions and range dimensions of an index map can be
-different, but must match the number of dimensions of the input and output index
-spaces on which the map operates. The index space is always non-negative and
-integral. In addition, an index map must specify the size of each of its range
-dimensions onto which it maps. Index map symbols must be listed in order with
-symbols for dynamic dimension sizes first, followed by other required symbols.
-
-##### Layout Map
-
-A layout map is a [semi-affine map](Dialects/Affine.md#semi-affine-maps) which
-encodes logical to physical index space mapping, by mapping input dimensions to
-their ordering from most-major (slowest varying) to most-minor (fastest
-varying). Therefore, an identity layout map corresponds to a row-major layout.
-Identity layout maps do not contribute to the MemRef type identification and are
-discarded on construction. That is, a type with an explicit identity map is
-`memref<?x?xf32, (i,j)->(i,j)>` is strictly the same as the one without layout
-maps, `memref<?x?xf32>`.
-
-Layout map examples:
-
-```mlir
-// MxN matrix stored in row major layout in memory:
-#layout_map_row_major = (i, j) -> (i, j)
-
-// MxN matrix stored in column major layout in memory:
-#layout_map_col_major = (i, j) -> (j, i)
-
-// MxN matrix stored in a 2-d blocked/tiled layout with 64x64 tiles.
-#layout_tiled = (i, j) -> (i floordiv 64, j floordiv 64, i mod 64, j mod 64)
-```
-
-##### Affine Map Composition
-
-A memref specifies a semi-affine map composition as part of its type. A
-semi-affine map composition is a composition of semi-affine maps beginning with
-zero or more index maps, and ending with a layout map. The composition must be
-conformant: the number of dimensions of the range of one map, must match the
-number of dimensions of the domain of the next map in the composition.
-
-The semi-affine map composition specified in the memref type, maps from accesses
-used to index the memref in load/store operations to other index spaces (i.e.
-logical to physical index mapping). Each of the
-[semi-affine maps](Dialects/Affine.md) and thus its composition is required to
-be one-to-one.
-
-The semi-affine map composition can be used in dependence analysis, memory
-access pattern analysis, and for performance optimizations like vectorization,
-copy elision and in-place updates. If an affine map composition is not specified
-for the memref, the identity affine map is assumed.
-
-##### Strided MemRef
-
-A memref may specify strides as part of its type. A stride specification is a
-list of integer values that are either static or `?` (dynamic case). Strides
-encode the distance, in number of elements, in (linear) memory between
-successive entries along a particular dimension. A stride specification is
-syntactic sugar for an equivalent strided memref representation using
-semi-affine maps. For example, `memref<42x16xf32, offset: 33 strides: [1, 64]>`
-specifies a non-contiguous memory region of `42` by `16` `f32` elements such
-that:
-
-1.  the minimal size of the enclosing memory region must be `33 + 42 * 1 + 16 *
-    64 = 1066` elements;
-2.  the address calculation for accessing element `(i, j)` computes `33 + i +
-    64 * j`
-3.  the distance between two consecutive elements along the outer dimension is
-    `1` element and the distance between two consecutive elements along the
-    outer dimension is `64` elements.
-
-This corresponds to a column major view of the memory region and is internally
-represented as the type `memref<42x16xf32, (i, j) -> (33 + i + 64 * j)>`.
-
-The specification of strides must not alias: given an n-D strided memref,
-indices `(i1, ..., in)` and `(j1, ..., jn)` may not refer to the same memory
-address unless `i1 == j1, ..., in == jn`.
-
-Strided memrefs represent a view abstraction over preallocated data. They are
-constructed with special ops, yet to be introduced. Strided memrefs are a
-special subclass of memrefs with generic semi-affine map and correspond to a
-normalized memref descriptor when lowering to LLVM.
-
-#### None Type
-
-Syntax:
-
-```
-none-type ::= `none`
-```
-
-The `none` type is a unit type, i.e. a type with exactly one possible value,
-where its value does not have a defined dynamic representation.
-
-#### Tensor Type
-
-Syntax:
-
-```
-tensor-type ::= `tensor` `<` dimension-list tensor-memref-element-type `>`
-tensor-memref-element-type ::= vector-element-type | vector-type | complex-type
-
-// memref requires a known rank, but tensor does not.
-dimension-list ::= dimension-list-ranked | (`*` `x`)
-dimension-list-ranked ::= (dimension `x`)*
-dimension ::= `?` | decimal-literal
-```
-
-SSA values of tensor type represents aggregate N-dimensional data values, and
-have a known element type. It may have an unknown rank (indicated by `*`) or may
-have a fixed rank with a list of dimensions. Each dimension may be a static
-non-negative decimal constant or be dynamically determined (indicated by `?`).
-
-The runtime representation of the MLIR tensor type is intentionally abstracted -
-you cannot control layout or get a pointer to the data. For low level buffer
-access, MLIR has a [`memref` type](#memref-type). This abstracted runtime
-representation holds both the tensor data values as well as information about
-the (potentially dynamic) shape of the tensor. The
-[`dim` operation](Dialects/Standard.md#dim-operation) returns the size of a
-dimension from a value of tensor type.
-
-Note: hexadecimal integer literals are not allowed in tensor type declarations
-to avoid confusion between `0xf32` and `0 x f32`. Zero sizes are allowed in
-tensors and treated as other sizes, e.g., `tensor<0 x 1 x i32>` and `tensor<1 x
-0 x i32>` are different types. Since zero sizes are not allowed in some other
-types, such tensors should be optimized away before lowering tensors to vectors.
-
-Examples:
-
-```mlir
-// Tensor with unknown rank.
-tensor<* x f32>
-
-// Known rank but unknown dimensions.
-tensor<? x ? x ? x ? x f32>
-
-// Partially known dimensions.
-tensor<? x ? x 13 x ? x f32>
-
-// Full static shape.
-tensor<17 x 4 x 13 x 4 x f32>
-
-// Tensor with rank zero. Represents a scalar.
-tensor<f32>
-
-// Zero-element dimensions are allowed.
-tensor<0 x 42 x f32>
-
-// Zero-element tensor of f32 type (hexadecimal literals not allowed here).
-tensor<0xf32>
-```
-
-#### Tuple Type
-
-Syntax:
-
-```
-tuple-type ::= `tuple` `<` (type ( `,` type)*)? `>`
-```
-
-The value of `tuple` type represents a fixed-size collection of elements, where
-each element may be of a different type.
-
-**Rationale:** Though this type is first class in the type system, MLIR provides
-no standard operations for operating on `tuple` types
-([rationale](Rationale.md#tuple-types)).
-
-Examples:
-
-```mlir
-// Empty tuple.
-tuple<>
-
-// Single element
-tuple<f32>
-
-// Many elements.
-tuple<i32, f32, tensor<i1>, i5>
-```
-
-#### Vector Type
-
-Syntax:
-
-```
-vector-type ::= `vector` `<` static-dimension-list vector-element-type `>`
-vector-element-type ::= float-type | integer-type
-
-static-dimension-list ::= (decimal-literal `x`)+
-```
-
-The vector type represents a SIMD style vector, used by target-specific
-operation sets like AVX. While the most common use is for 1D vectors (e.g.
-vector<16 x f32>) we also support multidimensional registers on targets that
-support them (like TPUs).
-
-Vector shapes must be positive decimal integers.
-
-Note: hexadecimal integer literals are not allowed in vector type declarations,
-`vector<0x42xi32>` is invalid because it is interpreted as a 2D vector with
-shape `(0, 42)` and zero shapes are not allowed.
-
-## Attributes
-
-Syntax:
-
-```
-attribute-dict ::= `{` `}`
-                 | `{` attribute-entry (`,` attribute-entry)* `}`
-attribute-entry ::= dialect-attribute-entry | dependent-attribute-entry
-dialect-attribute-entry ::= dialect-namespace `.` bare-id `=` attribute-value
-dependent-attribute-entry ::= dependent-attribute-name `=` attribute-value
-dependent-attribute-name ::= (letter|[_]) (letter|digit|[_$])*
-```
-
-Attributes are the mechanism for specifying constant data on operations in
-places where a variable is never allowed - e.g. the index of a
-[`dim` operation](Dialects/Standard.md#dim-operation), or the stride of a
-convolution. They consist of a name and a concrete attribute value. The set of
-expected attributes, their structure, and their interpretation are all
-contextually dependent on what they are attached to.
-
-There are two main classes of attributes: dependent and dialect. Dependent
-attributes derive their structure and meaning from what they are attached to;
-e.g., the meaning of the `index` attribute on a `dim` operation is defined by
-the `dim` operation. Dialect attributes, on the other hand, derive their context
-and meaning from a specific dialect. An example of a dialect attribute may be a
-`swift.self` function argument attribute that indicates an argument is the
-self/context parameter. The context of this attribute is defined by the `swift`
-dialect and not the function argument.
-
-Attribute values are represented by the following forms:
-
-```
-attribute-value ::= attribute-alias | dialect-attribute | standard-attribute
-```
-
-### Attribute Value Aliases
-
-```
-attribute-alias ::= '#' alias-name '=' attribute-value
-attribute-alias ::= '#' alias-name
-```
-
-MLIR supports defining named aliases for attribute values. An attribute alias is
-an identifier that can be used in the place of the attribute that it defines.
-These aliases *must* be defined before their uses. Alias names may not contain a
-'.', since those names are reserved for
-[dialect attributes](#dialect-attribute-values).
-
-Example:
-
-```mlir
-#map = (d0) -> (d0 + 10)
-
-// Using the original attribute.
-%b = affine.apply (d0) -> (d0 + 10) (%a)
-
-// Using the attribute alias.
-%b = affine.apply #map(%a)
-```
-
-### Dialect Attribute Values
-
-Similarly to operations, dialects may define custom attribute values. The
-syntactic structure of these values is identical to custom dialect type values,
-except that dialect attributes values are distinguished with a leading '#',
-while dialect types are distinguished with a leading '!'.
-
-```
-dialect-attribute ::= '#' opaque-dialect-item
-dialect-attribute ::= '#' pretty-dialect-item
-```
-
-Dialect attributes can be specified in a verbose form, e.g. like this:
-
-```mlir
-// Complex attribute
-#foo<"something<abcd>">
-
-// Even more complex attribute
-#foo<"something<a%%123^^^>>>">
-```
-
-Dialect attributes that are simple enough can use the pretty format, which is a
-lighter weight syntax that is equivalent to the above forms:
-
-```mlir
-// Complex attribute
-#foo.something<abcd>
-```
-
-Sufficiently complex dialect attributes are required to use the verbose form for
-generality. For example, the more complex type shown above wouldn't be valid in
-the lighter syntax: `#foo.something<a%%123^^^>>>` because it contains characters
-that are not allowed in the lighter syntax, as well as unbalanced `<>`
-characters.
-
-See [here](DefiningAttributesAndTypes.md) to learn how to define dialect
-attribute values.
-
-### Standard Attribute Values
-
-Standard attributes are a core set of
-[dialect attributes](#dialect-attribute-values) that are defined in a builtin
-dialect and thus available to all users of MLIR.
-
-```
-standard-attribute ::=   affine-map-attribute
-                       | array-attribute
-                       | bool-attribute
-                       | dictionary-attribute
-                       | elements-attribute
-                       | float-attribute
-                       | integer-attribute
-                       | integer-set-attribute
-                       | string-attribute
-                       | symbol-ref-attribute
-                       | type-attribute
-                       | unit-attribute
-```
-
-#### AffineMap Attribute
-
-Syntax:
-
-```
-affine-map-attribute ::= affine-map
-```
-
-An affine-map attribute is an attribute that represents a affine-map object.
-
-#### Array Attribute
-
-Syntax:
-
-```
-array-attribute ::= `[` (attribute-value (`,` attribute-value)*)? `]`
-```
-
-An array attribute is an attribute that represents a collection of attribute
-values.
-
-#### Boolean Attribute
-
-Syntax:
-
-```
-bool-attribute ::= bool-literal
-```
-
-A boolean attribute is a literal attribute that represents a one-bit boolean
-value, true or false.
-
-#### Dictionary Attribute
-
-Syntax:
-
-```
-dictionary-attribute ::= `{` (attribute-entry (`,` attribute-entry)*)? `}`
-```
-
-A dictionary attribute is an attribute that represents a sorted collection of
-named attribute values. The elements are sorted by name, and each name must be
-unique within the collection.
-
-#### Elements Attributes
-
-Syntax:
-
-```
-elements-attribute ::= dense-elements-attribute
-                     | opaque-elements-attribute
-                     | sparse-elements-attribute
-```
-
-An elements attribute is a literal attribute that represents a constant
-[vector](#vector-type) or [tensor](#tensor-type) value.
-
-##### Dense Elements Attribute
-
-Syntax:
-
-```
-dense-elements-attribute ::= `dense` `<` attribute-value `>` `:`
-                             ( tensor-type | vector-type )
-```
-
-A dense elements attribute is an elements attribute where the storage for the
-constant vector or tensor value has been packed to the element bitwidth. The
-element type of the vector or tensor constant must be of integer, index, or
-floating point type.
-
-##### Opaque Elements Attribute
-
-Syntax:
-
-```
-opaque-elements-attribute ::= `opaque` `<` dialect-namespace  `,`
-                              hex-string-literal `>` `:`
-                              ( tensor-type | vector-type )
-```
-
-An opaque elements attribute is an elements attribute where the content of the
-value is opaque. The representation of the constant stored by this elements
-attribute is only understood, and thus decodable, by the dialect that created
-it.
-
-Note: The parsed string literal must be in hexadecimal form.
-
-##### Sparse Elements Attribute
-
-Syntax:
-
-```
-sparse-elements-attribute ::= `sparse` `<` attribute-value `,` attribute-value
-                              `>` `:` ( tensor-type | vector-type )
-```
-
-A sparse elements attribute is an elements attribute that represents a sparse
-vector or tensor object. This is where very few of the elements are non-zero.
-
-The attribute uses COO (coordinate list) encoding to represent the sparse
-elements of the elements attribute. The indices are stored via a 2-D tensor of
-64-bit integer elements with shape [N, ndims], which specifies the indices of
-the elements in the sparse tensor that contains non-zero values. The element
-values are stored via a 1-D tensor with shape [N], that supplies the
-corresponding values for the indices.
-
-Example:
-
-```mlir
-  sparse<[[0, 0], [1, 2]], [1, 5]> : tensor<3x4xi32>
-
-// This represents the following tensor:
-///  [[1, 0, 0, 0],
-///   [0, 0, 5, 0],
-///   [0, 0, 0, 0]]
-```
-
-#### Float Attribute
-
-Syntax:
-
-```
-float-attribute ::= (float-literal (`:` float-type)?)
-                  | (hexadecimal-literal `:` float-type)
-```
-
-A float attribute is a literal attribute that represents a floating point value
-of the specified [float type](#floating-point-types). It can be represented in
-the hexadecimal form where the hexadecimal value is interpreted as bits of the
-underlying binary representation. This form is useful for representing infinity
-and NaN floating point values. To avoid confusion with integer attributes,
-hexadecimal literals _must_ be followed by a float type to define a float
-attribute.
-
-Examples:
-
-```
-42.0         // float attribute defaults to f64 type
-42.0 : f32   // float attribute of f32 type
-0x7C00 : f16 // positive infinity
-0x7CFF : f16 // NaN (one of possible values)
-42 : f32     // Error: expected integer type
-```
-
-#### Integer Attribute
-
-Syntax:
-
-```
-integer-attribute ::= integer-literal ( `:` (index-type | integer-type) )?
-```
-
-An integer attribute is a literal attribute that represents an integral value of
-the specified integer or index type. The default type for this attribute, if one
-is not specified, is a 64-bit integer.
-
-##### Integer Set Attribute
-
-Syntax:
-
-```
-integer-set-attribute ::= affine-map
-```
-
-An integer-set attribute is an attribute that represents an integer-set object.
-
-#### String Attribute
-
-Syntax:
-
-```
-string-attribute ::= string-literal (`:` type)?
-```
-
-A string attribute is an attribute that represents a string literal value.
-
-#### Symbol Reference Attribute
-
-Syntax:
-
-```
-symbol-ref-attribute ::= symbol-ref-id (`::` symbol-ref-id)*
-```
-
-A symbol reference attribute is a literal attribute that represents a named
-reference to an operation that is nested within an operation with the
-`OpTrait::SymbolTable` trait. As such, this reference is given meaning by the
-nearest parent operation containing the `OpTrait::SymbolTable` trait. It may
-optionally contain a set of nested references that further resolve to a symbol
-nested within a different symbol table.
-
-This attribute can only be held internally by
-[array attributes](#array-attribute) and
-[dictionary attributes](#dictionary-attribute)(including the top-level operation
-attribute dictionary), i.e. no other attribute kinds such as Locations or
-extended attribute kinds. If a reference to a symbol is necessary from outside
-of the symbol table that the symbol is defined in, a
-[string attribute](string-attribute) can be used to refer to the symbol name.
-
-**Rationale:** Given that MLIR models global accesses with symbol references, to
-enable efficient multi-threading, it becomes difficult to effectively reason
-about their uses. By restricting the places that can legally hold a symbol
-reference, we can always opaquely reason about a symbols usage characteristics.
-
-#### Type Attribute
-
-Syntax:
-
-```
-type-attribute ::= type
-```
-
-A type attribute is an attribute that represents a [type object](#type-system).
-
-#### Unit Attribute
-
-```
-unit-attribute ::= `unit`
-```
-
-A unit attribute is an attribute that represents a value of `unit` type. The
-`unit` type allows only one value forming a singleton set. This attribute value
-is used to represent attributes that only have meaning from their existence.
-
-One example of such an attribute could be the `swift.self` attribute. This
-attribute indicates that a function parameter is the self/context parameter. It
-could be represented as a [boolean attribute](#boolean-attribute)(true or
-false), but a value of false doesn't really bring any value. The parameter
-either is the self/context or it isn't.
-
-```mlir
-// A unit attribute defined with the `unit` value specifier.
-func @verbose_form(i1) attributes {dialectName.unitAttr = unit}
-
-// A unit attribute can also be defined without the value specifier.
-func @simple_form(i1) attributes {dialectName.unitAttr}
-```
diff --git a/third_party/mlir/g3doc/MLIRForGraphAlgorithms.md b/third_party/mlir/g3doc/MLIRForGraphAlgorithms.md
deleted file mode 100644
index ac26e5beb9b..00000000000
--- a/third_party/mlir/g3doc/MLIRForGraphAlgorithms.md
+++ /dev/null
@@ -1,403 +0,0 @@
-# MLIR: Incremental Application to Graph Algorithms in ML Frameworks
-
-The existing documentation about MLIR focuses on long term vision, how its
-pieces fit together, and the benefits of modular and composable infrastructure
-in the vast and distant future. While this viewpoint appeals to some, it causes
-concern for others who are more concerned about the "here and now" - why does it
-make sense to make a "revolutionary" change when any individual problem can be
-fixed in place?
-
-This document explains that adoption of MLIR to solve graph based problems
-_isn't_ a revolutionary change: it is an incremental series of steps which build
-on each other, each of which delivers local value. This document also addresses
-some points of confusion that keep coming up.
-
-One note: even though a major advantage of MLIR is that it can span the full
-spectrum from graph algorithms down to low-level code generation, this document
-focuses on the use of MLIR for **graph-level algorithms**. MLIR will also unlock
-exciting code generation opportunities (particularly given its novel approach to
-integrating state of the art polyhedral techniques), but issues that touch on
-MLIR's relationship to XLA, Eigen, etc, are out of scope for this particular
-doc.
-
-This document uses TensorFlow as the example given that it is the focus of our
-immediate work, but we believe that the same viewpoint could be useful for
-people working in the context of other ML frameworks that may consider adopting
-MLIR in the future.
-
-### How is MLIR relevant?
-
-MLIR is an overloaded acronym which unpacks as "Multi-Level Intermediate
-Representation". Its high-level purpose is to provide mechanics for describing
-and transforming programs and computations in a flexible way. It provides common
-compiler infrastructure for things like constant folding, dead code elimination,
-graph rewriting, and others - which are independent of the representational
-choices picked by a given dialect (e.g. its concurrency semantics). It was built
-with a specific focus on compile time and memory efficiency, accurate
-propagation of source location information (important for reporting high quality
-errors and warnings) and is designed for testability.
-
-TensorFlow has numerous subsystems (some of which are proprietary, e.g.
-Tensor-RT, nGraph, CoreML, etc) as well as translation layers between these
-different subsystems, and these translation layers face similar challenges. ((As
-an aside, the internals of each of these subsystems could often benefit from
-MLIR infrastructure, but that isn't a focus of this doc.))
-
-A key observation that MLIR makes is that these subsystems often have two things
-going on: they are both particular data structures and encodings (e.g. HLO
-graphs, TF-Lite's flat buffer format, TensorFlow's Graph format, the ONNX
-abstraction, etc) as well as an abstraction of computation (a specific way of
-modeling a convolution, a set of supported operations etc).
-
-MLIR uses a standard IR (i.e., a set of data structures) for representing these
-computations - this allows a huge amount of shared infrastructure across these
-problem domains. MLIR then allows the definition of domain-specific "dialects"
-that describe the set of operations that are legal and supported for a given
-application. This means that the actual translations between data structures are
-kept as simple as possible - and are thus relatively easy to make "correct".
-This allows the common compiler infrastructure to handle the mapping problems
-and the other issues within the domain.
-
-MLIR's design is directly informed by the experience of building (and then
-living with) intermediate representations like the LLVM IR, LLVM SelectionDAG,
-the LLVM machine instruction representation, Swift SIL IR, and learns new
-lessons from TensorFlow and XLA HLO, as well as learning from building countless
-research and production systems on top of them. Our goal is to drag the state of
-the art in compilers forward, not to merely apply a few well-known techniques to
-the machine learning domain.
-
-### What does adoption mean?
-
-The point of this document is not to advocate for rewriting any particular
-subsystem in TensorFlow - indeed, the burden required to justify a rewrite is
-high, and often very specific to that subsystem. That said, there are several
-subsystems that are about to get rewritten or substantially revised anyway, so
-we use those as examples to concretely describe the benefits that MLIR provides
-in these cases and what it will take. The subsystems discussed are:
-
-1.  the TF Lite TOCO translator, which we need to improve error
-    reporting/reliability issues and generalize it to support more ops, and
-1.  the TF/XLA bridge which needs to improve usability by merging some of its
-    usage models, support dynamic shapes and generalize guest subsystem support
-    to Tensor-RT and nGraph.
-1.  Grappler is another subsystem that is likely to get substantial revisions in
-    the future, and would definitely benefit from the MLIR framework, but there
-    are no known plans to do that work at this point, so we don't discuss it
-    further.
-
-Adopting MLIR for these works the same way - and, in fact, the work to support
-TF Lite is mostly a subset of the larger work to support the functionality of
-the TF/XLA bridge. TF Lite and the TF/XLA bridge include several compiler passes
-(things like encapsulate, functionalize control flow, lowering of ops, fusion,
-constant folding, shape inference, etc).
-
-MLIR supports converting from TensorFlow Graphs to MLIR and back, which means
-that we can start by putting in a no-op translation to MLIR and back into the
-pipeline, and verify that nothing breaks. Then we can work on replacing the
-compiler transformations one by one by reimplementing them (with the improved
-algorithms that we're planning).
-
-This is a development plan, we wouldn't actually ship a TensorFlow that just
-uses MLIR for a single pass. In practice, we'll have the MLIR flag gated under
-an option, build out a replacement for an entire subsystem (e.g. the TOCO
-translator) and when the time is right, we'll do A/B comparisons and eventually
-make a switch and phase out the old code over time.
-
-## What benefit does MLIR provide?
-
-The adoption plan above might sound like it only makes things worse in the
-immediate term - we have two implementations of the same functionality, we are
-dividing our efforts, etc. In order for this to be worth it, we should have a
-good sense that we are building towards an improved future that will make
-customers and TensorFlow engineers happier when it lands. Here we describe a few
-of the benefits that MLIR provides, in no particular order:
-
-### A Lossless Human Editable Textual Representation
-
-The MLIR in-memory data structure has a human readable and writable format, as
-well as [a specification](LangRef.md) for that format - built just like any
-other programming language. Important properties of this format are that it is
-compact, easy to read, and lossless. You can dump an MLIR program out to disk
-and munge around with it, then send it through a few more passes.
-
-If you haven't worked with a system that works this way, it is hard to overstate
-how big of a deal this in practice: it means that you can call `foo->dump()` on
-an IR object to see its full contents, it means you can diff the IR before and
-after a change, delta reduce IR files, and many other things.
-
-### A Graph Verification Pass
-
-Like many other popular compiler infrastructures, MLIR provides infrastructure
-and implementation for a "verifier" which checks that the IR is well formed. The
-MLIR verifier is a simple framework that makes it easy to provide a single
-source of truth for those correctness properties and is general across all
-Dialects (e.g. TF Graph, TF Lite flat buffer, XLA HLO, etc).
-
-A verifier pass is sort of like a 'super assertion' that catches mistakes in
-program transformations early, making you as an engineer more productive, making
-the product more reliable, and making it easier to track down bugs when they
-appear - because the verifier can be run at any time, either as a compiler pass
-or with a single function call.
-
-While MLIR provides a well-considered infrastructure for IR verification, and
-has simple checks for existing TensorFlow operations, there is a lot that should
-be added here and lots of opportunity to get involved!
-
-### Designed for Testability
-
-There are many aspects of this in MLIR, but we'll focus on compiler
-transformations since they are the easiest to understand. Compiler
-transformations are modeled as subclasses of the `Pass` C++ class, which are
-driven by an `mlir-opt` tool. When combined with a lossless textual
-representation, it becomes really easy to write unit tests for compiler
-transformations, for example, this is a simple test that shows "x-x" is being
-turned into zero:
-
-```mlir
-  // RUN: mlir-opt %s -canonicalize | FileCheck %s
-  func @test_subi_zero_cfg(%arg0: i32) -> i32 {
-    %y = subi %arg0, %arg0 : i32
-    return %y: i32
-  }
-  // CHECK-LABEL: func @test_subi_zero_cfg(%arg0: i32)
-  // CHECK-NEXT: %c0_i32 = constant 0 : i32
-  // CHECK-NEXT: return %c0
-```
-
-The "CHECK" comments are interpreted by the
-[LLVM FileCheck tool](https://llvm.org/docs/CommandGuide/FileCheck.html), which
-is sort of like a really advanced grep. This test is fully self-contained: it
-feeds the input into the [canonicalize pass](Canonicalization.md), and checks
-that the output matches the CHECK lines. See the `test/Transforms` directory for
-more examples. In contrast, standard unit testing exposes the API of the
-underlying framework to lots and lots of tests (making it harder to refactor and
-move the API), typically requires a lot more code, and exacerbates issues with
-link time. For examples, see
-[the TEST_F functions in TensorFlow's testsuite](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc).
-
-MLIR has been pervasively designed with this sort of design by testability,
-allowing us to put in place a culture that expects every behavior changing
-commit to include a test case, and for these test cases to be stable and
-reliable over time, since they are testing exactly what they are supposed to.
-End to end integration tests are still super useful for some things of course!
-
-### Infrastructure for Warnings and Error Diagnostics and Location Tracking
-
-MLIR benefits from the lessons learned from building other compilers - including
-Clang which
-[[set the standard](http://blog.llvm.org/2010/04/amazing-feats-of-clang-error-recovery.html)](http://blog.llvm.org/2010/04/amazing-feats-of-clang-error-recovery.html)
-for quality of implementation in C/C++ compiler diagnostics. Drawing from this
-experience (and fixing mistakes in LLVM), MLIR requires that operations and
-functions carry abstract location information, that transformations propagate
-this information, and provides standardized mechanisms to emit errors and
-warnings, as well as for clients to hook into them to capture and report them in
-custom ways.
-
-Why is this important? In practice, many graph-to-graph translators can fail
-(e.g. TF Lite when an unsupported op is used) and it is important to be able to
-report the error up through to the user in the most precise way possible, in
-order for it to be actionable. This includes tracking rewrites through fusions
-and fissions of ops, mapping back into language / API specific domains, etc.
-
-More selfishly for infrastructure hackers, this is a huge boon because it means
-that it is easy to write good tests for this: the testing tools for MLIR capture
-the diagnostics produced by passes (using the standard diagnostic hooks) and
-check that they match the expected diagnostics in the testcase. For example, to
-test the dependence analysis infra in the code generator, Andy Davis wrote a
-simple pass that checks dependencies and emits them as "notes", allowing him to
-write tests like this:
-
-```mlir
-  // RUN: mlir-opt %s -memref-dependence-check -verify-diagnostics
-  func @different_memrefs() {
-    %m.a = alloc() : memref<100xf32>
-    %m.b = alloc() : memref<100xf32>
-    %c0 = constant 0 : index
-    %c1 = constant 1.0 : f32
-    store %c1, %m.a[%c0] : memref<100xf32>
-    // expected-note@-1 {{dependence from memref access 0 to access 1 = false}}
-    %v0 = load %m.b[%c0] : memref<100xf32>
-    return
-  }
-```
-
-Note that a major limitation of this is that MLIR suffers from a problem of
-"garbage in, garbage out": if the input locations to MLIR are imprecise, then
-there is nothing that it can do to recover them. There is work underway in
-TensorFlow/Python to improve the situation, and Swift for TensorFlow already has
-perfect location tracking due to its design.
-
-### Shape Information Captured in the IR
-
-In TensorFlow Graphs, each op takes and returns values using a very simple type
-system (TF_DataType) in which each value is a tensor of unknown rank and
-dimensions. At the same time, many graphs have static shapes easily knowable for
-wide swaths of the computation, and even dynamically shaped operations often
-have statically knowable dimensions. Many analyses and transformations benefit
-and use this information when available, but because TensorFlow graphs don't
-capture this (e.g. serialize it to proto), passes have to recompute it on demand
-with ShapeRefiner.
-
-The [MLIR Tensor Type](LangRef.md#tensor-type) directly captures shape
-information, so you can have things like:
-
-```mlir
-  %x = tf.Add %x, %y : tensor<128 x 8 x ? x f32>
-```
-
-Capturing this in the IR is expected to speed up transformations (avoiding
-recomputing the same info over and over again) which therefore makes it
-practical to apply stronger shape analysis algorithms. It also makes it easier
-to work with the IR, because on-the-side representations can get out of date,
-and the API is easier to work with from an ergonomics perspective.
-
-### Unified Graph Rewriting Infrastructure
-
-This is still a work in progress, but we have sightlines towards a
-[general rewriting infrastructure](GenericDAGRewriter.md) for transforming DAG
-tiles into other DAG tiles, using a declarative pattern format. DAG to DAG
-rewriting is a generalized solution for many common compiler optimizations,
-lowerings, and other rewrites and having an IR enables us to invest in building
-a single high-quality implementation.
-
-Declarative pattern rules are preferable to imperative C++ code for a number of
-reasons: they are more compact, easier to reason about, can have checkers
-written against them, and new tools can be built that inspect and manipulate the
-declarative patterns in interesting ways - e.g. applying theorem provers to
-them. It will be exciting to see this ecosystem develop as the infrastructure
-matures.
-
-### Clarified Semantics for TensorFlow Operations
-
-One of the challenging things about working with TensorFlow is that there are
-many invariants and behaviors that need to be preserved and known about when
-working with Graphs, and these can be difficult to reason about and lead to
-bugs. Things like 'dead values', Switch and Merge nodes, concurrency semantics,
-nodes that execute even when passed a dead value, multiple device program
-representation - etc... all add complexities that can make it challenging to
-reason about whether a transformation or analysis is correct in general. Even
-something as simple as constant folding or transforming integer `x-x` into `0`
-is non-trivial because you need to consider control dependence edges.
-
-One of our major goals for the TensorFlow dialect of MLIR is to sort out these
-situations and upgrade existing TensorFlow graphs to semantics that are easier
-to reason about. The solutions to these problems are all still being debated,
-but those discussions have already yielded a lot of potential answers:
-introducing a `tf_dead_or<x>` types for switch/merge, modeling of TF operations
-using futures/async semantics etc. None of these particular battles are critical
-or important for MLIR to succeed (because of its "meta" nature, the abstraction
-decisions of any given dialect are up for it to decide), but each one that works
-out will make it easier to work with and transform TensorFlow operations. We
-expect these issues to get nailed down in the next couple of months when MLIR
-effort moves beyond TF Lite / TOCO support. The discussions that are happening
-now are super valuable and making progress.
-
-### Ergonomics
-
-A minor-in-theory, but important-in-practice point is that MLIR is designed to
-make it easy, memory efficient, and less error prone to transform code than
-other systems. `TensorFlow::Graph` has implementation issues where the same
-information is stored redundantly in different places (which must be manually
-kept up to date), has somewhat unusual representation of certain constructs
-(e.g. the function library, which makes it very difficult to add or remove
-functions, e.g. during interprocedural transformations), and stores information
-in the graph that is used by the executor, but isn't necessary for program
-transformation.
-
-TensorFlow has made a lot of progress in this area over the years, and there are
-lots of ideas about further improvements in the future, we are happy that MLIR
-addresses these needs (making it much easier to implement correct program
-transformations) today, and are committed to pushing hard to make it better.
-
-### Compile Time Performance and Memory Use
-
-MLIR has been designed to be memory and compile-time efficient in its algorithms
-and data structures, using immutable and uniqued structures, low level
-bit-packing, and other well-known techniques to avoid unnecessary heap
-allocations, and allow simple and safe multithreaded optimization of MLIR
-programs. There are other reasons to believe that the MLIR implementations of
-common transformations will be more efficient than the Python and C++
-TensorFlow::Graph implementations of the same things, given the current
-implementation details of TensorFlow.
-
-That said, this is very much a theory at this point. When the new implementation
-of various subsystems are available, we will see what happens in practice: there
-will be no reason to speculate - we can measure.
-
-## Common Questions and Concerns
-
-Here we address some frequently asked questions and concerns.
-
-### Isn't MLIR a big dependency to take on?
-
-We've heard that at least some people are concerned that MLIR is a "big"
-dependency to take on, and could result in large code size. Here are some key
-points MLIR:
-
-1.  The entire MLIR codebase is a pretty small C++ code base in absolute terms
-    compared to what goes into a modern ML framework.
-1.  Like LLVM, MLIR is designed as a set of libraries that clients can link in
-    or ignore as they wish. For example, the transformations in MLIR kept
-    separate from the core IR abstractions, and dialect specific code (e.g.
-    TensorFlow, TF-Lite, XLA, etc) is all independently selectable by the build
-    system. Clients that don't care about XLA don't link in that code, whether
-    they are a TF-Lite system or a client that is completely unrelated to
-    TensorFlow.
-1.  MLIR's only third party dependency is on LLVM, but it doesn't depend on LLVM
-    IR or any other heavy dependency - it just depends on LLVM's support library
-    which provides efficient hash tables and other
-    [memory efficient data structures that the STL does not](http://llvm.org/docs/ProgrammersManual.html#picking-the-right-data-structure-for-a-task).
-    There have been discussions about splitting this set of libraries out to its
-    own subproject in LLVM that the LLVM IR project depends on. This would be
-    great for MLIR as well as other LLVM subprojects.
-1.  TensorFlow and many other frameworks already use LLVM - if so, MLIR would
-    not be pulling in an additional dependency at all.
-
-### How does MLIR represent {control flow, concurrency, …} semantics in TensorFlow?
-
-MLIR provides a dialect that is an isomorphic 1-1 mapping between TensorFlow
-graphs and MLIR, as well as a pretty complete translator back and forth (the
-only known gap is that a few TF_DataType enums aren't handled yet). MLIR is a
-"Multi-Level IR", which allows it to represent code with different abstraction
-levels, so the ability to faithfully represent TensorFlow code in a completely
-backwards compatible way (even if there are some historical warts!) is critical.
-
-In *addition* to the isomorphic mapping, we are actively working on efforts to
-raise the abstraction level for working with TensorFlow graphs in MLIR. Doing so
-would make it even easier to write TensorFlow transformations than it is today,
-and would provide a path to migrating TF 1.x graphs forward into the TF 2.x
-world. For example, because MLIR has an extensible type system, we can directly
-model whether it is impossible for a Tensor value to be a "dead" value - similar
-to the use of optional types in modern programming languages.
-
-These discussions occasionally cause confusion because there are several issues
-being mixed up into one:
-
-*   What are the current semantics of TensorFlow graphs, and what invariants can
-    we rely on?
-*   What should the semantics be in TensorFlow 2.0?
-*   What do programs rely on in practice, and if it is unfriendly, can we
-    migrate it?
-*   Can we find a way to make it so transforms don't have to worry about the
-    complexities of Switch/Merge, by using higher level control flow
-    representations? (tentative answer: yes)
-*   How should MLIR represent async vs sync operations, what invariants are
-    provided, how does this dovetail with control flow?
-*   When is it safe and beneficial to perform optimizations that might reduce
-    parallelism?
-
-All of these questions have a "conservative/safe fallback": we can continue
-providing exactly the same abstractions that TensorFlow always has. That said,
-we are trying hard to level-up the representation (taking advantage of the
-"Multi-Level" part of MLIR) because doing so will make it much much easier to
-write analyses and transformations than it currently is in TensorFlow.
-
-### Non Goals
-
-It is important to point out things that MLIR does not aim to do. For example,
-there is no runtime component to MLIR: the TensorFlow executor, the TF Lite
-FlatBuffer interpreter, or other existing runtime should be used as-is.
-
-Another non-goal is that MLIR currently doesn't support a stable binary
-encoding. We will certainly add this at some point, but existing formats should
-be used for serialization and distribution in the meantime.
diff --git a/third_party/mlir/g3doc/OWNERS b/third_party/mlir/g3doc/OWNERS
deleted file mode 100644
index d56875de182..00000000000
--- a/third_party/mlir/g3doc/OWNERS
+++ /dev/null
@@ -1,2 +0,0 @@
-# The guitar runner to update the ops docs runs as this role user.
-mlir-automator
diff --git a/third_party/mlir/g3doc/OpDefinitions.md b/third_party/mlir/g3doc/OpDefinitions.md
deleted file mode 100644
index 0e786a0a4e7..00000000000
--- a/third_party/mlir/g3doc/OpDefinitions.md
+++ /dev/null
@@ -1,1194 +0,0 @@
-# Table-driven Operation Definition Specification (ODS)
-
-In addition to specializing the `mlir::Op` C++ template, MLIR also supports
-defining operations in a table-driven manner. This is achieved via
-[TableGen][TableGen], which is both a generic language and its tooling to
-maintain records of domain-specific information. Facts regarding an operation
-are specified concisely into a TableGen record, which will be expanded into an
-equivalent `mlir::Op` C++ template specialization at compiler build time.
-
-This manual explains in detail all the available mechanisms for defining
-operations in such a table-driven manner. It aims to be a specification instead
-of a tutorial. Please refer to [Quickstart tutorial to adding MLIR graph
-rewrite](QuickstartRewrites.md) for the latter.
-
-In addition to detailing each mechanism, this manual also tries to capture
-best practices. They are rendered as quoted bullet points.
-
-## Motivation
-
-MLIR allows pluggable dialects, and dialects contain, among others, a list of
-operations. This open and extensible ecosystem leads to the "stringly" type IR
-problem, e.g., repetitive string comparisons during optimization and analysis
-passes, unintuitive accessor methods (e.g., generic/error prone `getOperand(3)`
-vs self-documenting `getStride()`) with more generic return types, verbose and
-generic constructors without default arguments, verbose textual IR dump, and
-so on. Furthermore, operation verification is:
-
-1. best case: a central string-to-verification-function map,
-1. middle case: duplication of verification across the code base, or
-1. worst case: no verification functions.
-
-The fix is to support defining ops in a table-driven manner. Then for each
-dialect, we can have a central place that contains everything you need to know
-about each op, including its constraints, custom assembly form, etc. This
-description is also used to generate helper functions and classes to allow
-building, verification, parsing, printing, analysis, and many more.
-
-## Benefits
-
-Compared to the C++ template, this table-driven approach has several benefits
-including but not limited to:
-
-* **Single source of truth**: We strive to encode all facts regarding an
-  operation into the record, so that readers don't need to jump among code
-  snippets to fully understand an operation.
-* **Removing boilerplate**: We can automatically generate
-  operand/attribute/result getter methods, operation build methods, operation
-  verify methods, and many more utilities from the record. This greatly reduces
-  the boilerplate needed for defining a new op.
-* **Facilitating auto-generation**: The usage of these operation information
-  records are by no means limited to op definition itself. We can use them to
-  drive the auto-generation of many other components, like computation graph
-  serialization.
-
-## TableGen Syntax
-
-We use TableGen as the language for specifying operation information. TableGen
-itself just provides syntax for writing records; the syntax and constructs
-allowed in a TableGen file (typically with filename suffix `.td`) can be found
-[here][TableGenIntro]. The formal language specification can be found
-[here][TableGenRef]. _Roughly_ speaking,
-
-*   TableGen `class` is similar to C++ class; it can be templated and
-    subclassed.
-*   TableGen `def` is similar to C++ object; it can be declared by specializing
-    a TableGen `class` (e.g., `def MyDef : MyClass<...>;`) or completely
-    independently (e.g., `def MyDef;`). It cannot be further templated or
-    subclassed.
-*   TableGen `dag` is a dedicated type for directed acyclic graph of elements. A
-    `dag` has one operator and zero or more arguments. Its syntax is `(operator
-    arg0, arg1, argN)`. The operator can be any TableGen `def`; an argument can
-    be anything, including `dag` itself. We can have names attached to both the
-    operator and the arguments like `(MyOp:$op_name MyArg:$arg_name)`.
-
-Please see the [language introduction][TableGenIntro] to learn about all the
-types and expressions supported by TableGen.
-
-## Operation Definition
-
-MLIR defines several common constructs to help operation definition and provide
-their semantics via a special [TableGen backend][TableGenBackend]:
-[`OpDefinitionsGen`][OpDefinitionsGen]. These constructs are defined in
-[`OpBase.td`][OpBase]. The main ones are
-
-*   The `Op` class: It is the main construct for defining operations. All facts
-    regarding the operation are specified when specializing this class, with the
-    help of the following constructs.
-*   The `Dialect` class: Operations belonging to one logical group are placed in
-    the same dialect. The `Dialect` class contains dialect-level information.
-*   The `OpTrait` class hierarchy: They are used to specify special properties
-    and constraints of the operation, including whether the operation has side
-    effect or whether its output has the same shape as the input.
-*   The `ins`/`outs` marker: These are two special makers builtin to the
-    `OpDefinitionsGen` backend. They lead the definitions of operands/attributes
-    and results respectively.
-*   The `TypeConstraint` class hierarchy: They are used to specify the
-    constraints over operands or results. A notable subclass hierarchy is
-    `Type`, which stands for constraints for common C++ types.
-*   The `AttrConstraint` class hierarchy: They are used to specify the
-    constraints over attributes. A notable subclass hierarchy is `Attr`, which
-    stands for constraints for attributes whose values are of common types.
-
-An operation is defined by specializing the `Op` class with concrete contents
-for all the fields it requires. For example, `tf.AvgPool` is defined as
-
-```tablegen
-def TF_AvgPoolOp : TF_Op<"AvgPool", [NoSideEffect]> {
-  let summary = "Performs average pooling on the input.";
-
-  let description = [{
-Each entry in `output` is the mean of the corresponding size `ksize`
-window in `value`.
-  }];
-
-  let arguments = (ins
-    TF_FpTensor:$value,
-
-    Confined<I64ArrayAttr, [ArrayMinCount<4>]>:$ksize,
-    Confined<I64ArrayAttr, [ArrayMinCount<4>]>:$strides,
-    TF_AnyStrAttrOf<["SAME", "VALID"]>:$padding,
-    DefaultValuedAttr<TF_ConvertDataFormatAttr, "NHWC">:$data_format
-  );
-
-  let results = (outs
-    TF_FpTensor:$output
-  );
-
-  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
-}
-```
-
-In the following we describe all the fields needed. Please see the definition
-of the `Op` class for the complete list of fields supported.
-
-### Operation name
-
-The operation name is a unique identifier of the operation within MLIR, e.g.,
-`tf.Add` for addition operation in the TensorFlow dialect. This is the
-equivalent of the mnemonic in assembly language. It is used for parsing and
-printing in the textual format. It is also used for pattern matching in graph
-rewrites.
-
-The full operation name is composed of the dialect name and the op name, with
-the former provided via the dialect and the latter provided as the second
-template parameter to the `Op` class.
-
-### Operation documentation
-
-This includes both an one-line `summary` and a longer human-readable
-`description`. They will be used to drive automatic generation of dialect
-documentation. They need to be provided in the operation's definition body:
-
-```tablegen
-let summary = "...";
-
-let description = [{
-...
-}];
-```
-
-`description` should be written in Markdown syntax.
-
-Placing the documentation at the beginning is recommended since
-it helps in understanding the operation.
-
-> * Place documentation at the beginning of the operation definition
-> * The summary should be short and concise. It should be a one-liner without
->   trailing punctuation. Put expanded explanation in description.
-
-### Operation arguments
-
-There are two kinds of arguments: operands and attributes. Operands are runtime
-values produced by other ops; while attributes are compile-time known constant
-values, including two categories:
-
-1. Natural attributes: these attributes affect the behavior of the operations
-   (e.g., padding for convolution);
-1. Derived attributes: these attributes are not needed to define the operation
-   but are instead derived from information of the operation. E.g., the output
-   shape of type. This is mostly used for convenience interface generation or
-   interaction with other frameworks/translation.
-
-Both operands and attributes are specified inside the `dag`-typed `arguments`,
-led by `ins`:
-
-```tablegen
-let arguments = (ins
-  <type-constraint>:$<operand-name>,
-  ...
-  <attr-constraint>:$<attr-name>,
-  ...
-);
-```
-
-Here `<type-constraint>` is a TableGen `def` from the `TypeConstraint` class
-hierarchy. Similarly, `<attr-constraint>` is a TableGen `def` from the
-`AttrConstraint` class hierarchy. See [Constraints](#constraints) for more
-information.
-
-There is no requirements on the relative order of operands and attributes; they
-can mix freely. The relative order of operands themselves matters. From each
-named argument a named getter will be generated that returns the argument with
-the return type (in the case of attributes the return type will be
-constructed from the storage type, while for operands it will be `Value`). Each
-attribute's raw value (e.g., as stored) can also be accessed via generated
-`<name>Attr` getters for use in transformation passes where the more user
-friendly return type is less suitable.
-
-All the arguments should be named to 1) provide documentation, 2) drive
-auto-generation of getter methods, 3) provide a handle to reference for other
-places like constraints.
-
-#### Variadic operands
-
-To declare a variadic operand, wrap the `TypeConstraint` for the operand with
-`Variadic<...>`.
-
-Normally operations have no variadic operands or just one variadic operand. For
-the latter case, it is easy to deduce which dynamic operands are for the static
-variadic operand definition. But if an operation has more than one variadic
-operands, it would be impossible to attribute dynamic operands to the
-corresponding static variadic operand definitions without further information
-from the operation. Therefore, the `SameVariadicOperandSize` trait is needed to
-indicate that all variadic operands have the same number of dynamic values.
-
-#### Optional attributes
-
-To declare an optional attribute, wrap the `AttrConstraint` for the attribute
-with `OptionalAttr<...>`.
-
-#### Attributes with default values
-
-To declare an attribute with a default value, wrap the `AttrConstraint` for the
-attribute with `DefaultValuedAttr<..., "...">`.
-
-The second parameter to `DefaultValuedAttr` should be a string containing the
-C++ default value. For example, a float default value should be specified as
-like `"0.5f"`, and an integer array default value should be specified as like
-`"{1, 2, 3}"`.
-
-#### Confining attributes
-
-`Confined` is provided as a general mechanism to help modelling further
-constraints on attributes beyond the ones brought by value types. You can use
-`Confined` to compose complex constraints out of more primitive ones. For
-example, a 32-bit integer attribute whose minimal value must be 10 can be
-expressed as `Confined<I32Attr, [IntMinValue<10>]>`.
-
-Right now, the following primitive constraints are supported:
-
-* `IntMinValue<N>`: Specifying an integer attribute to be greater than or equal
-  to `N`
-* `ArrayMinCount<N>`: Specifying an array attribute to have at least `N`
-  elements
-* `IntArrayNthElemEq<I, N>`: Specifying an integer array attribute's `I`-th
-  element to be equal to `N`
-* `IntArrayNthElemMinValue<I, N>`: Specifying an integer array attribute's
-  `I`-th element to be greater than or equal to `N`
-
-TODO: Design and implement more primitive constraints
-
-### Operation results
-
-Similar to operands, results are specified inside the `dag`-typed `results`, led
-by `outs`:
-
-```tablegen
-let results = (outs
-  <type-constraint>:$<result-name>,
-  ...
-);
-```
-
-#### Variadic results
-
-Similar to variadic operands, `Variadic<...>` can also be used for results.
-And similarly, `SameVariadicResultSize` for multiple variadic results in the
-same operation.
-
-### Operation traits and constraints
-
-Traits are operation properties that affect syntax or semantics. MLIR C++
-models various traits in the `mlir::OpTrait` namespace.
-
-Both operation traits, [interfaces](#operation-interfaces), and constraints
-involving multiple operands/attributes/results are provided as the second
-template parameter to the `Op` class. They should be deriving from the `OpTrait`
-class. See [Constraints](#constraints) for more information.
-
-### Operation interfaces
-
-[Operation interfaces](Interfaces.md#operation-interfaces) are a mechanism by
-which to opaquely call methods and access information on an *Op instance*,
-without knowing the exact operation type. Operation interfaces defined in C++
-can be accessed in the ODS framework via the `OpInterfaceTrait` class. Aside
-from using pre-existing interfaces in the C++ API, the ODS framework also
-provides a simplified mechanism for defining such interfaces; that removes much
-of the boilerplate necessary.
-
-Providing a definition of the `OpInterface` class will auto-generate the C++
-classes for the interface. An `OpInterface` includes a name, for the C++ class,
-a description, and a list of interface methods.
-
-```tablegen
-def MyInterface : OpInterface<"MyInterface"> {
-  let description = ...;
-  let methods = [...];
-}
-```
-
-There are two types of methods that can be used with an interface,
-`InterfaceMethod` and `StaticInterfaceMethod`. They are both comprised of the
-same core components, with the distinction that `StaticInterfaceMethod` models a
-static method on the derived operation.
-
-An `InterfaceMethod` is comprised of the following components:
-
-*   Description
-    -   A string description of what this method does and its invariants.
-*   ReturnType
-    -   A string corresponding to the C++ return type of the method.
-*   MethodName
-    -   A string corresponding to the desired name of the method.
-*   Arguments (Optional)
-    -   A dag of strings that correspond to a C++ type and variable name
-        respectively.
-*   MethodBody (Optional)
-    -   An optional explicit implementation of the interface method.
-    -   `ConcreteOp` is an implicitly defined typename that can be used to refer
-        to the type of the derived operation currently being operated on.
-    -   In non-static methods, a variable 'ConcreteOp op' is defined and may be
-        used to refer to an instance of the derived operation.
-
-ODS also allows generating the declarations for the `InterfaceMethod` of the op
-if one specifies the interface with `DeclareOpInterfaceMethods` (see example
-below).
-
-Examples:
-
-```tablegen
-def MyInterface : OpInterface<"MyInterface"> {
-  let description = [{
-    My interface is very interesting. ...
-  }];
-
-  let methods = [
-    // A simple non-static method with no inputs.
-    InterfaceMethod<"'foo' is a non-static method with no inputs.",
-      "unsigned", "foo"
-    >,
-
-    // A new non-static method accepting an input argument.
-    InterfaceMethod<"/*insert doc here*/",
-      "Value *", "bar", (ins "unsigned":$i)
-    >,
-
-    // Query a static property of the derived operation.
-    StaticInterfaceMethod<"'fooStatic' is a static method with no inputs.",
-      "unsigned", "fooStatic"
-    >,
-
-    // Provide the definition of a static interface method.
-    // Note: `ConcreteOp` corresponds to the derived operation typename.
-    StaticInterfaceMethod<"/*insert doc here*/",
-      "Operation *", "create", (ins "OpBuilder &":$builder, "Location":$loc), [{
-        return builder.create<ConcreteOp>(loc);
-    }]>,
-
-    // Provide a definition of the non-static method.
-    // Note: `op` corresponds to the derived operation variable.
-    InterfaceMethod<"/*insert doc here*/",
-      "unsigned", "getNumInputsAndOutputs", (ins), [{
-        return op.getNumInputs() + op.getNumOutputs();
-    }]>,
-  ];
-}
-
-// Interfaces can optionally be wrapped inside DeclareOpInterfaceMethods. This
-// would result in autogenerating declarations for members `foo`, `bar` and
-// `fooStatic`. Methods with bodies are not declared inside the op
-// declaration but instead handled by the op interface trait directly.
-def OpWithInferTypeInterfaceOp : Op<...
-    [DeclareOpInterfaceMethods<MyInterface>]> { ... }
-```
-
-### Builder methods
-
-For each operation, there are a few builders automatically generated based on
-the arguments and returns types. For example, given the following op definition:
-
-```tablegen
-def MyOp : ... {
-  let arguments = (ins
-    I32:$i32_operand,
-    F32:$f32_operand,
-    ...,
-
-    I32Attr:$i32_attr,
-    F32Attr:$f32_attr,
-    ...
-  );
-
-  let results = (outs
-    I32:$i32_result,
-    F32:$f32_result,
-    ...
-  );
-}
-```
-
-The following builders are generated:
-
-```c++
-// All result-types/operands/attributes have one aggregate parameter.
-static void build(Builder *tblgen_builder, OperationState &tblgen_state,
-                  ArrayRef<Type> resultTypes,
-                  ValueRange operands,
-                  ArrayRef<NamedAttribute> attributes);
-
-// Each result-type/operand/attribute has a separate parameter. The parameters
-// for attributes are of mlir::Attribute types.
-static void build(Builder *tblgen_builder, OperationState &tblgen_state,
-                  Type i32_result, Type f32_result, ...,
-                  Value *i32_operand, Value *f32_operand, ...,
-                  IntegerAttr i32_attr, FloatAttr f32_attr, ...);
-
-// Each result-type/operand/attribute has a separate parameter. The parameters
-// for attributes are raw values unwrapped with mlir::Attribute instances.
-// (Note that this builder will not always be generated. See the following
-// explanation for more details.)
-static void build(Builder *tblgen_builder, OperationState &tblgen_state,
-                  Type i32_result, Type f32_result, ...,
-                  Value *i32_operand, Value *f32_operand, ...,
-                  APInt i32_attr, StringRef f32_attr, ...);
-
-// Each operand/attribute has a separate parameter but result type is aggregate.
-static void build(Builder *tblgen_builder, OperationState &tblgen_state,
-                  ArrayRef<Type> resultTypes,
-                  Value *i32_operand, Value *f32_operand, ...,
-                  IntegerAttr i32_attr, FloatAttr f32_attr, ...);
-
-// All operands/attributes have aggregate parameters.
-// Generated if InferTypeOpInterface interface is specified.
-static void build(Builder *tblgen_builder, OperationState &tblgen_state,
-                  ValueRange operands,
-                  ArrayRef<NamedAttribute> attributes);
-
-// (And manually specified builders depending on the specific op.)
-```
-
-The first form provides basic uniformity so that we can create ops using the
-same form regardless of the exact op. This is particularly useful for
-implementing declarative pattern rewrites.
-
-The second and third forms are good for use in manually written code given that
-they provide better guarantee via signatures.
-
-The third form will be generated if any of the op's attribute has different
-`Attr.returnType` from `Attr.storageType` and we know how to build an attribute
-from an unwrapped value (i.e., `Attr.constBuilderCall` is defined.)
-Additionally, for the third form, if an attribute appearing later in the
-`arguments` list has a default value, the default value will be supplied in the
-declaration. This works for `BoolAttr`, `StrAttr`, `EnumAttr` for now and the
-list can grow in the future. So if possible, default valued attribute should be
-placed at the end of the `arguments` list to leverage this feature. (This
-behavior is essentially due to C++ function parameter default value placement
-restrictions.) Otherwise, the builder of the third form will still be generated
-but default values for the attributes not at the end of the `arguments` list
-will not be supplied in the builder's signature.
-
-And there may potentially exist other builders depending on the specific op;
-please refer to the
-[generated C++ file](#run-mlir-tblgen-to-see-the-generated-content) for the
-complete list.
-
-#### Custom builder methods
-
-However, if the above cases cannot satisfy all needs, you can define additional
-convenience build methods with `OpBuilder`.
-
-`OpBuilder` is a class that takes the parameter list and the optional `build()`
-method body. They are separated because we need to generate op declaration and
-definition into separate files. The parameter list should _include_ `Builder
-*builder, OperationState &state`. If the `body` is not provided, _only_ the
-builder declaration will be generated; this provides a way to define complicated
-builders entirely in C++ files.
-
-For example, for the following op:
-
-```tablegen
-def MyOp : Op<"my_op", []> {
-  let arguments = (ins F32Attr:$attr);
-
-  let results = (outs);
-}
-```
-
-If we want to define a builder with a default value for the only attribute, we
-can add into `MyOp`:
-
-```tablegen
-def MyOp : ... {
-  ...
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &state, float val = 0.5f", [{
-      state.addAttribute("attr", builder->getF32FloatAttr(val));
-    ]}>
-  ];
-}
-```
-
-The generated builder will look like:
-
-```c++
-static void build(Builder *builder, OperationState &state, float val = 0.5f) {
-  state.addAttribute("attr", builder->getF32FloatAttr(val));
-}
-```
-
-### Custom parser and printer methods
-
-Functions to parse and print the operation's custom assembly form.
-
-### Custom verifier code
-
-Verification code will be automatically generated for
-[constraints](#constraints) specified on various entities of the op. To
-perform _additional_ verification, you can use
-
-```tablegen
-let verifier = [{
-  ...
-}];
-```
-
-Code placed in `verifier` will be called after the auto-generated verification
-code.
-
-### `hasCanonicalizer`
-
-This boolean field indicate whether canonicalization patterns have been defined
-for this operation. If it is `1`, then `::getCanonicalizationPatterns()` should
-be defined.
-
-### `hasFolder`
-
-This boolean field indicate whether general folding rules have been defined
-for this operation. If it is `1`, then `::fold()` should be defined.
-
-### Extra declarations
-
-One of the goals of table-driven op definition is to auto-generate as much logic
-and methods needed for each op as possible. With that said, there will always be
-long-tail cases that won't be covered. For such cases, you can use
-`extraClassDeclaration`. Code in `extraClassDeclaration` will be copied
-literally to the generated C++ op class.
-
-Note that `extraClassDeclaration` is a mechanism intended for long-tail cases
-by power users; for not-yet-implemented widely-applicable cases, improving the
-infrastructure is preferable.
-
-### Generated C++ code
-
-[OpDefinitionsGen][OpDefinitionsGen] processes the op definition spec file and
-generates two files containing the corresponding C++ code: one for declarations,
-the other for definitions. The former is generated via the `-gen-op-decls`
-command-line option, while the latter is via the `-gen-op-defs` option.
-
-The definition file contains all the op method definitions, which can be
-included and enabled by defining `GET_OP_CLASSES`. For each operation,
-OpDefinitionsGen generates an operation class and an
-[operand adaptor](#operand-adaptors) class. Besides, it also contains a
-comma-separated list of all defined ops, which can be included and enabled by
-defining `GET_OP_LIST`.
-
-#### Class name and namespaces
-
-For each operation, its generated C++ class name is the symbol `def`ed with
-TableGen with dialect prefix removed. The first `_` serves as the delimiter.
-For example, for `def TF_AddOp`, the C++ class name would be `AddOp`.
-We remove the `TF` prefix because it is for scoping ops; other dialects
-may as well define their own `AddOp`s.
-
-The namespaces of the generated C++ class will come from the dialect's
-`cppNamespace` field. For example, if a dialect's `cppNamespace` is `A::B`,
-then an op of that dialect will be placed in
-`namespace A { namespace B { ... } }`. If a dialect does not specify a
-`cppNamespace`, we then use the dialect's name as the namespace.
-
-This means the qualified name of the generated C++ class does not necessarily
-match exactly with the operation name as explained in
-[Operation name](#operation-name). This is to allow flexible naming to satisfy
-coding style requirements.
-
-#### Operand adaptors
-
-For each operation, we automatically generate an _operand adaptor_. This class
-solves the problem of accessing operands provided as a list of `Value`s without
-using "magic" constants. The operand adaptor takes a reference to an array of
-`Value *` and provides methods with the same names as those in the operation
-class to access them. For example, for a binary arithmetic operation, it may
-provide `.lhs()` to access the first operand and `.rhs()` to access the second
-operand.
-
-The operand adaptor class lives in the same namespace as the operation class,
-and has the name of the operation followed by `OperandAdaptor`. A template
-declaration `OperandAdaptor<>` is provided to look up the operand adaptor for
-the given operation.
-
-Operand adaptors can be used in function templates that also process operations:
-
-```c++
-template <typename BinaryOpTy>
-std::pair<Value *, Value *> zip(BinaryOpTy &&op) {
-  return std::make_pair(op.lhs(), op.rhs());;
-}
-
-void process(AddOp op, ArrayRef<Value *> newOperands) {
-  zip(op);
-  zip(OperandAdaptor<AddOp>(newOperands));
-  /*...*/
-}
-```
-
-## Constraints
-
-Constraint is a core concept in table-driven operation definition: operation
-verification and graph operation matching are all based on satisfying
-constraints. So both the operation definition and rewrite rules specification
-significantly involve writing constraints. We have the `Constraint` class in
-[`OpBase.td`][OpBase] has the common base class for all constraints.
-
-An operation's constraint can cover different range; it may
-
-* Only concern a single attribute (e.g. being an 32-bit integer greater than 5),
-* Multiple operands and results (e.g., the 1st result's shape must be the same
-  as the 1st operand), or
-* Intrinsic to the operation itself (e.g., having no side effect).
-
-We call them as single-entity constraint, multi-entity constraint, and traits,
-respectively.
-
-### Single-entity constraint
-
-Constraints scoped to a single operand, attribute, or result are specified at
-the entity's declaration place as described in
-[Operation arguments](#operation-arguments) and
-[Operation results](#operation-results).
-
-To help modelling constraints of common types, a set of `TypeConstraint`s are
-created; they are the `Type` subclass hierarchy. It includes `F32` for the
-constraints of being a float, `TensorOf<[F32]>` for the constraints of being
-a float tensor, and so on.
-
-Similarly, a set of `AttrConstraint`s are created for helping modelling
-constraints of common attribute kinds. They are the `Attr` subclass hierarchy.
-It includes `F32Attr` for the constraints of being a float attribute,
-`F32ArrayAttr` for the constraints of being a float array attribute, and so on.
-
-### Multi-entity constraint
-
-Constraints involving more than one operand/attribute/result are quite common
-on operations, like the element type and shape relation between operands and
-results. These constraints should be specified as the `Op` class template
-parameter as described in
-[Operation traits and constraints](#operation-traits-and-constraints).
-
-Multi-entity constraints are modeled as `PredOpTrait` (a subclass of `OpTrait`)
-in [`OpBase.td`][OpBase].A bunch of constraint primitives are provided to help
-specification. See [`OpBase.td`][OpBase] for the complete list.
-
-### Trait
-
-Traits are intrinsic properties of the operation like having side effect or not,
-commutative or not, whether is a terminator, etc. These constraints should be
-specified as the `Op` class template parameter as described in
-[Operation traits and constraints](#operation-traits-and-constraints).
-
-Traits are modeled as `NativeOpTrait` (a subclass of `OpTrait`) in
-[`OpBase.td`][OpBase]. They are backed and will be translated into the
-corresponding C++ `mlir::OpTrait` classes.
-
-### How to specify new constraint
-
-To write a constraint, you need to provide its predicates and give it a
-descriptive name. Predicates, modeled with the `Pred` class, are the workhorse
-for composing constraints. The predicate for a constraint is typically built up
-in a nested manner, using the two categories of predicates:
-
-1.  `CPred`: the primitive leaf predicate.
-2.  Compound predicate: a predicate composed from child predicates using
-    predicate combiners (conjunction: `And`, disjunction: `Or`, negation: `Neg`,
-    substitution: `SubstLeaves`, concatenation: `Concat`).
-
-`CPred` is the basis for composing more complex predicates. It is the "atom"
-predicate from the perspective of TableGen and the "interface" between
-TableGen and C++. What is inside is already C++ code, which will be treated
-as opaque strings with special placeholders to be substituted.
-
-You can put any C++ code that returns a boolean value inside a `CPred`,
-including evaluating expressions, calling functions, calling class methods,
-and so on.
-
-To help interaction with the C++ environment, there are a few special
-placeholders provided to refer to entities in the context where this predicate
-is used. They serve as "hooks" to the enclosing environment.  This includes
-`$_builder`, `$_op`, and `$_self`:
-
-* `$_builder` will be replaced by a `mlir::Builder` instance so that you can
-  access common build methods.
-* `$_op` will be replaced by the current operation so that you can access
-  information of the current operation.
-* `$_self` will be replaced with the entity this predicate is attached to.
-  E.g., `BoolAttr` is an attribute constraint that wraps a
-  `CPred<"$_self.isa<BoolAttr>()">`. Then for `F32:$attr`,`$_self` will be
-  replaced by `$attr`. For type constraints, it's a little bit special since
-  we want the constraints on each type definition reads naturally and we want
-  to attach type constraints directly to an operand/result, `$_self` will be
-  replaced by the operand/result's type. E.g., for `F32` in `F32:$operand`, its
-  `$_self` will be expanded as `getOperand(...)->getType()`.
-
-TODO(b/130663252): Reconsider the leading symbol for special placeholders.
-Eventually we want to allow referencing operand/result $-names; such $-names
-can start with underscore.
-
-For example, to write an attribute `attr` is an `IntegerAttr`, in C++ you can
-just call `attr.isa<IntegerAttr>()`. The code can be wrapped in a `CPred` as
-`$_self.isa<IntegerAttr>()`, with `$_self` as the special placeholder to be
-replaced by the current attribute `attr` at expansion time.
-
-For more complicated predicates, you can wrap it in a single `CPred`, or you
-can use predicate combiners to combine them. For example, to write the
-constraint that an attribute `attr` is a 32-bit or 64-bit integer, you can
-write it as
-
-```tablegen
-And<[
-  CPred<"$_self.isa<IntegerAttr>()">,
-  Or<[
-    CPred<"$_self.cast<IntegerAttr>().getType().isInteger(32)">,
-    CPred<"$_self.cast<IntegerAttr>().getType().isInteger(64)">
-  ]>
-]>
-```
-
-(Note that the above is just to show with a familiar example how you can use
-`CPred` and predicate combiners to write complicated predicates. For integer
-attributes specifically, [`OpBase.td`][OpBase] already defines `I32Attr` and
-`I64Attr`. So you can actually reuse them to write it as `Or<[I32Attr.predicate,
-I64Attr.predicate]>`.)
-
-TODO: Build up a library of reusable primitive constraints
-
-If the predicate is very complex to write with `CPred` together with predicate
-combiners, you can also write it as a normal C++ function and use the `CPred`
-as a way to "invoke" the function. For example, to verify an attribute `attr`
-has some property, you can write a C++ function like
-
-```cpp
-bool HasSomeProperty(Attribute attr) { ... }
-```
-
-and then define the op as:
-
-```tablegen
-def HasSomeProperty : AttrConstraint<CPred<"HasSomeProperty($_self)">,
-                                     "has some property">;
-
-def MyOp : Op<...> {
-  let arguments = (ins
-    ...
-    HasSomeProperty:$attr
-  );
-}
-```
-
-As to whether we should define the predicate using a single `CPred` wrapping
-the whole expression, multiple `CPred`s with predicate combiners, or a single
-`CPred` "invoking" a function, there are no clear-cut criteria. Defining using
-`CPred` and predicate combiners is preferable since it exposes more information
-(instead hiding all the logic behind a C++ function) into the op definition spec
-so that it can potentially drive more auto-generation cases. But it will
-require a nice library of common predicates as the building blocks to avoid the
-duplication, which is being worked on right now.
-
-## Attribute Definition
-
-### Enum attributes
-
-Some attributes can only take values from an predefined enum, e.g., the
-comparison kind of a comparison op. To define such attributes, ODS provides
-several mechanisms: `StrEnumAttr`, `IntEnumAttr`, and `BitEnumAttr`.
-
-*   `StrEnumAttr`: each enum case is a string, the attribute is stored as a
-    [`StringAttr`][StringAttr] in the op.
-*   `IntEnumAttr`: each enum case is an integer, the attribute is stored as a
-    [`IntegerAttr`][IntegerAttr] in the op.
-*   `BitEnumAttr`: each enum case is a bit, the attribute is stored as a
-    [`IntegerAttr`][IntegerAttr] in the op.
-
-All these `*EnumAttr` attributes require fully specifying all of the allowed
-cases via their corresponding `*EnumAttrCase`. With this, ODS is able to
-generate additional verification to only accept allowed cases. To facilitate the
-interaction between `*EnumAttr`s and their C++ consumers, the
-[`EnumsGen`][EnumsGen] TableGen backend can generate a few common utilities: a
-C++ enum class, `llvm::DenseMapInfo` for the enum class, conversion functions
-from/to strings. This is controlled via the `-gen-enum-decls` and
-`-gen-enum-defs` command-line options of `mlir-tblgen`.
-
-For example, given the following `EnumAttr`:
-
-```tablegen
-def Case15: I32EnumAttrCase<"Case15", 15>;
-def Case20: I32EnumAttrCase<"Case20", 20>;
-
-def MyIntEnum: I32EnumAttr<"MyIntEnum", "An example int enum",
-                           [Case15, Case20]> {
-  let cppNamespace = "Outer::Inner";
-  let stringToSymbolFnName = "ConvertToEnum";
-  let symbolToStringFnName = "ConvertToString";
-}
-```
-
-The following will be generated via `mlir-tblgen -gen-enum-decls`:
-
-```c++
-namespace Outer {
-namespace Inner {
-// An example int enum
-enum class MyIntEnum : uint32_t {
-  Case15 = 15,
-  Case20 = 20,
-};
-
-llvm::Optional<MyIntEnum> symbolizeMyIntEnum(uint32_t);
-llvm::StringRef ConvertToString(MyIntEnum);
-llvm::Optional<MyIntEnum> ConvertToEnum(llvm::StringRef);
-inline constexpr unsigned getMaxEnumValForMyIntEnum() {
-  return 20;
-}
-
-} // namespace Inner
-} // namespace Outer
-
-namespace llvm {
-template<> struct DenseMapInfo<Outer::Inner::MyIntEnum> {
-  using StorageInfo = llvm::DenseMapInfo<uint32_t>;
-
-  static inline Outer::Inner::MyIntEnum getEmptyKey() {
-    return static_cast<Outer::Inner::MyIntEnum>(StorageInfo::getEmptyKey());
-  }
-
-  static inline Outer::Inner::MyIntEnum getTombstoneKey() {
-    return static_cast<Outer::Inner::MyIntEnum>(StorageInfo::getTombstoneKey());
-  }
-
-  static unsigned getHashValue(const Outer::Inner::MyIntEnum &val) {
-    return StorageInfo::getHashValue(static_cast<uint32_t>(val));
-  }
-
-  static bool isEqual(const Outer::Inner::MyIntEnum &lhs, const Outer::Inner::MyIntEnum &rhs) {
-    return lhs == rhs;
-  }
-};
-}
-```
-
-The following will be generated via `mlir-tblgen -gen-enum-defs`:
-
-```c++
-namespace Outer {
-namespace Inner {
-llvm::StringRef ConvertToString(MyIntEnum val) {
-  switch (val) {
-    case MyIntEnum::Case15: return "Case15";
-    case MyIntEnum::Case20: return "Case20";
-  }
-  return "";
-}
-
-llvm::Optional<MyIntEnum> ConvertToEnum(llvm::StringRef str) {
-  return llvm::StringSwitch<llvm::Optional<MyIntEnum>>(str)
-      .Case("Case15", MyIntEnum::Case15)
-      .Case("Case20", MyIntEnum::Case20)
-      .Default(llvm::None);
-}
-llvm::Optional<MyIntEnum> symbolizeMyIntEnum(uint32_t value) {
-  switch (value) {
-  case 15: return MyIntEnum::Case15;
-  case 20: return MyIntEnum::Case20;
-  default: return llvm::None;
-  }
-}
-
-} // namespace Inner
-} // namespace Outer
-```
-
-Similarly for the following `BitEnumAttr` definition:
-
-```tablegen
-def None: BitEnumAttrCase<"None", 0x0000>;
-def Bit1: BitEnumAttrCase<"Bit1", 0x0001>;
-def Bit2: BitEnumAttrCase<"Bit2", 0x0002>;
-def Bit3: BitEnumAttrCase<"Bit3", 0x0004>;
-
-def MyBitEnum: BitEnumAttr<"MyBitEnum", "An example bit enum",
-                           [None, Bit1, Bit2, Bit3]>;
-```
-
-We can have:
-
-```c++
-// An example bit enum
-enum class MyBitEnum : uint32_t {
-  None = 0,
-  Bit1 = 1,
-  Bit2 = 2,
-  Bit3 = 4,
-};
-
-llvm::Optional<MyBitEnum> symbolizeMyBitEnum(uint32_t);
-std::string stringifyMyBitEnum(MyBitEnum);
-llvm::Optional<MyBitEnum> symbolizeMyBitEnum(llvm::StringRef);
-inline MyBitEnum operator|(MyBitEnum lhs, MyBitEnum rhs) {
-  return static_cast<MyBitEnum>(static_cast<uint32_t>(lhs) | static_cast<uint32_t>(rhs));
-}
-inline MyBitEnum operator&(MyBitEnum lhs, MyBitEnum rhs) {
-  return static_cast<MyBitEnum>(static_cast<uint32_t>(lhs) & static_cast<uint32_t>(rhs));
-}
-inline bool bitEnumContains(MyBitEnum bits, MyBitEnum bit) {
-  return (static_cast<uint32_t>(bits) & static_cast<uint32_t>(bit)) != 0;
-}
-
-namespace llvm {
-template<> struct DenseMapInfo<::MyBitEnum> {
-  using StorageInfo = llvm::DenseMapInfo<uint32_t>;
-
-  static inline ::MyBitEnum getEmptyKey() {
-    return static_cast<::MyBitEnum>(StorageInfo::getEmptyKey());
-  }
-
-  static inline ::MyBitEnum getTombstoneKey() {
-    return static_cast<::MyBitEnum>(StorageInfo::getTombstoneKey());
-  }
-
-  static unsigned getHashValue(const ::MyBitEnum &val) {
-    return StorageInfo::getHashValue(static_cast<uint32_t>(val));
-  }
-
-  static bool isEqual(const ::MyBitEnum &lhs, const ::MyBitEnum &rhs) {
-    return lhs == rhs;
-  }
-};
-```
-
-```c++
-std::string stringifyMyBitEnum(MyBitEnum symbol) {
-  auto val = static_cast<uint32_t>(symbol);
-  // Special case for all bits unset.
-  if (val == 0) return "None";
-
-  llvm::SmallVector<llvm::StringRef, 2> strs;
-  if (1u & val) { strs.push_back("Bit1"); val &= ~1u; }
-  if (2u & val) { strs.push_back("Bit2"); val &= ~2u; }
-  if (4u & val) { strs.push_back("Bit3"); val &= ~4u; }
-
-  if (val) return "";
-  return llvm::join(strs, "|");
-}
-
-llvm::Optional<MyBitEnum> symbolizeMyBitEnum(llvm::StringRef str) {
-  // Special case for all bits unset.
-  if (str == "None") return MyBitEnum::None;
-
-  llvm::SmallVector<llvm::StringRef, 2> symbols;
-  str.split(symbols, "|");
-
-  uint32_t val = 0;
-  for (auto symbol : symbols) {
-    auto bit = llvm::StringSwitch<llvm::Optional<uint32_t>>(symbol)
-      .Case("Bit1", 1)
-      .Case("Bit2", 2)
-      .Case("Bit3", 4)
-      .Default(llvm::None);
-    if (bit) { val |= *bit; } else { return llvm::None; }
-  }
-  return static_cast<MyBitEnum>(val);
-}
-
-llvm::Optional<MyBitEnum> symbolizeMyBitEnum(uint32_t value) {
-  // Special case for all bits unset.
-  if (value == 0) return MyBitEnum::None;
-
-  if (value & ~(1u | 2u | 4u)) return llvm::None;
-  return static_cast<MyBitEnum>(value);
-}
-```
-
-TODO(b/132506080): This following is outdated. Update it.
-
-An attribute is a compile time known constant of an operation. Attributes are
-required to be known to construct an operation (e.g., the padding behavior is
-required to fully define the `conv2d` op).
-
-Attributes are defined as having a storage type (corresponding to a derived
-class of `mlir::Attribute`), a return type (that corresponds to the C++ type to
-use in the generation of the helper accessors) as well as method to convert
-between the internal storage and the helper method. Derived attributes are a
-special class of attributes that do not have storage but are instead calculated
-based on the operation and its attributes.
-
-## Debugging Tips
-
-### Run `mlir-tblgen` to see the generated content
-
-TableGen syntax sometimes can be obscure; reading the generated content can be
-a very helpful way to understand and debug issues. To build `mlir-tblgen`, run
-`cmake --build . --target mlir-tblgen` in your build directory and find the
-`mlir-tblgen` binary in the `bin/` subdirectory. All the supported generators
-can be found via `mlir-tblgen --help`. For example, `--gen-op-decls` and
-`--gen-op-defs` as explained in [Generated C++ code](#generated-c++-code).
-
-To see the generated code, invoke `mlir-tblgen` with a specific generator by
-providing include paths via `-I`. For example,
-
-```sh
-# To see op C++ class declaration
-mlir-tblgen --gen-op-decls -I /path/to/mlir/include /path/to/input/td/file
-# To see op C++ class definition
-mlir-tblgen --gen-op-defs -I /path/to/mlir/include /path/to/input/td/file
-# To see op documentation
-mlir-tblgen --gen-op-doc -I /path/to/mlir/include /path/to/input/td/file
-
-# To see op interface C++ class declaration
-mlir-tblgen --gen-op-interface-decls -I /path/to/mlir/include /path/to/input/td/file
-# To see op interface C++ class definition
-mlir-tblgen --gen-op-interface-defs -I /path/to/mlir/include /path/to/input/td/file
-# To see op interface documentation
-mlir-tblgen --gen-op-interface-doc -I /path/to/mlir/include /path/to/input/td/file
-```
-
-
-## Appendix
-
-### Requirements and existing mechanisms analysis
-
-The op description should as declarative as possible to allow a wide range of
-tools to work with them and query methods generated from them. In particular
-this means specifying traits, constraints and shape inference information in
-a way that is easily analyzable (e.g., avoid opaque calls to C++ functions where
-possible).
-
-We considered the approaches of several contemporary systems and focused on
-requirements that were desirable:
-
-*   Ops registered using a registry separate from C++ code.
-    *   Unknown ops are allowed in MLIR, so ops need not be registered. The
-        ability of the compiler to optimize those ops or graphs containing those
-        ops is constrained but correct.
-    *   The current proposal does not include a runtime op description, but it
-        does not preclude such description, it can be added later.
-    *   The op registry is essential for generating C++ classes that make
-        manipulating ops, verifying correct construction etc. in C++ easier by
-        providing a typed representation and accessors.
-*   The op registry will be defined in
-    [TableGen](https://llvm.org/docs/TableGen/index.html) and be used to
-    generate C++ classes and utility functions
-    (builder/verifier/parser/printer).
-    *   TableGen is a modelling specification language used by LLVM's backends
-        and fits in well with trait-based modelling. This is an implementation
-        decision and there are alternative ways of doing this. But the
-        specification language is good for the requirements of modelling the
-        traits (as seen from usage in LLVM processor backend modelling) and easy
-        to extend, so a practical choice. If another good option comes up, we
-        will consider it.
-*   MLIR allows both defined and undefined ops.
-    *   Defined ops should have fixed semantics and could have a corresponding
-        reference implementation defined using, for example, EDSC.
-    *   Dialects are under full control of the dialect owner and normally live
-        with the framework of the dialect.
-*   The op's traits (e.g., commutative) are modelled along with the op in the
-    registry.
-*   The op's operand/return type constraints are modelled along with the op in
-    the registry (see [Shape inference](#shape-inference) discussion below),
-    this allows (e.g.) optimized concise syntax in textual dumps.
-*   Behavior of the op is documented along with the op with a summary and a
-    description. The description is written in markdown and extracted for
-    inclusion in the generated LangRef section of the dialect.
-*   The generic assembly form of printing and parsing is available as normal,
-    but a custom parser and printer can either be specified or automatically
-    generated from an optional string representation showing the mapping of the
-    "assembly" string to operands/type.
-    *   Parser-level remappings (e.g., `eq` to enum) will be supported as part
-        of the parser generation.
-*   Matching patterns are specified separately from the op description.
-    *   Contrasted with LLVM there is no "base" set of ops that every backend
-        needs to be aware of. Instead there are many different dialects and the
-        transformations/legalizations between these dialects form a graph of
-        transformations.
-*   Reference implementation may be provided along with the op definition.
-
-    *   The reference implementation may be in terms of either standard ops or
-        other reference implementations.
-
-    TODO: document expectation if the dependent op's definition changes.
-
-### A proposal for auto-generating printer and parser methods
-
-NOTE: Auto-generating printing/parsing (as explained in the below) has _not_
-been prototyped, and potentially just being able to specify custom printer/
-parser methods are sufficient. This should presumably be influenced by the
-design of the assembler/disassembler logic that LLVM backends get for free
-for machine instructions.
-
-The custom assembly form of the operation is specified using a string with
-matching operation name, operands and attributes. With the ability
-to express additional information that needs to be parsed to build the
-operation:
-
-```tablegen
-tfl.add $lhs, $rhs {fused_activation_function: $fused_activation_function}: ${type(self)}
-```
-
-1. The output is never shown in the "mnemonics" string as that is fixed form
-   and cannot be altered.
-1. Custom parsing of ops may include some punctuation (e.g., parenthesis).
-1. The operands/results are added to the created operation in the order that
-   they are shown in the input and output dags.
-1. The `${type(self)}` operator is used to represent the type of the operator.
-   The type of operands can also be queried.
-1. Attributes names are matched to the placeholders in the mnemonic strings.
-   E.g., attribute axis is matched with `$axis`. Custom parsing for attribute
-   type can be defined along with the attribute definition.
-1. The information in the custom assembly form should be sufficient to invoke
-   the builder generated. That may require being able to propagate information
-   (e.g., the `$lhs` has the same type as the result).
-
-Printing is effectively the inverse of the parsing function generated with the
-mnemonic string serving as a template.
-
-### Shape inference
-
-Type constraints are along (at least) three axis: 1) elemental type, 2) rank
-(including static or dynamic), 3) dimensions. While some ops have no compile
-time fixed shape (e.g., output shape is dictated by data) we could still have
-some knowledge of constraints/bounds in the system for that op (e.g., the output
-of a `tf.where` is at most the size of the input data). And so there are
-additional valuable constraints that could be captured even without full
-knowledge.
-
-Initially the shape inference will be declaratively specified using:
-
-*   Constraint on the operands of an operation directly. For example
-    constraining the input type to be tensor/vector elements or that the
-    elemental type be of a specific type (e.g., output of sign is of elemental
-    type `i1`) or class (e.g., float like).
-*   Constraints across operands and results of an operation. For example,
-    enabling specifying equality constraints on type/constituents of a type
-    (shape and elemental type) between operands and results (e.g., the output
-    type of an add is the same as those of the input operands).
-
-In general there is an input/output transfer function which maps the inputs to
-the outputs (e.g., given input X and Y [or slices thereof] with these sizes, the
-output is Z [or this slice thereof]). Such a function could be used to determine
-the output type (shape) for given input type (shape).
-
-But shape functions are determined by attributes and could be arbitrarily
-complicated with a wide-range of specification possibilities. Equality
-relationships are common (e.g., the elemental type of the output matches the
-primitive type of the inputs, both inputs have exactly the same type [primitive
-type and shape]) and so these should be easy to specify. Algebraic relationships
-would also be common (e.g., a concat of `[n,m]` and `[n,m]` matrix along axis 0
-is `[n+n, m]` matrix), while some ops only have defined shapes under certain
-cases (e.g., matrix multiplication of `[a,b]` and `[c,d]` is only defined if
-`b == c`). As ops are also verified, the shape inference need only specify rules
-for the allowed cases (e.g., shape inference for matmul can ignore the case
-where `b != c`), which would simplify type constraint specification.
-
-Instead of specifying an additional mechanism to specify a shape transfer
-function, the reference implementation of the operation will be used to derive
-the shape function. The reference implementation is general and can support the
-arbitrary computations needed to specify output shapes.
-
-[TableGen]: https://llvm.org/docs/TableGen/index.html
-[TableGenIntro]: https://llvm.org/docs/TableGen/LangIntro.html
-[TableGenRef]: https://llvm.org/docs/TableGen/LangRef.html
-[TableGenBackend]: https://llvm.org/docs/TableGen/BackEnds.html#introduction
-[OpBase]: https://github.com/tensorflow/mlir/blob/master/include/mlir/IR/OpBase.td
-[OpDefinitionsGen]: https://github.com/tensorflow/mlir/blob/master/tools/mlir-tblgen/OpDefinitionsGen.cpp
-[EnumsGen]: https://github.com/tensorflow/mlir/blob/master/tools/mlir-tblgen/EnumsGen.cpp
-[StringAttr]: https://github.com/tensorflow/mlir/blob/master/g3doc/LangRef.md#string-attribute
-[IntegerAttr]: https://github.com/tensorflow/mlir/blob/master/g3doc/LangRef.md#integer-attribute
diff --git a/third_party/mlir/g3doc/Passes.md b/third_party/mlir/g3doc/Passes.md
deleted file mode 100644
index 78ea257b57b..00000000000
--- a/third_party/mlir/g3doc/Passes.md
+++ /dev/null
@@ -1,298 +0,0 @@
-# MLIR Passes
-
-This document describes the available MLIR passes and their contracts.
-
-[TOC]
-
-## Affine control lowering (`-lower-affine`)
-
-Convert operations related to affine control into a graph of blocks using
-operations from the standard dialect.
-
-Loop statements are converted to a subgraph of blocks (initialization, condition
-checking, subgraph of body blocks) with loop induction variable being passed as
-the block argument of the condition checking block. Conditional statements are
-converted to a subgraph of blocks (chain of condition checking with
-short-circuit logic, subgraphs of 'then' and 'else' body blocks). `affine.apply`
-operations are converted into sequences of primitive arithmetic operations that
-have the same effect, using operands of the `index` type. Consequently, named
-maps and sets may be removed from the module.
-
-For example, `%r = affine.apply (d0, d1)[s0] -> (d0 + 2*d1 + s0)(%d0, %d1)[%s0]`
-can be converted into:
-
-```mlir
-%d0 = <...>
-%d1 = <...>
-%s0 = <...>
-%0 = constant 2 : index
-%1 = muli %0, %d1
-%2 = addi %d0, %1
-%r = addi %2, %s0
-```
-
-### Input invariant
-
--   no `Tensor` types;
-
-These restrictions may be lifted in the future.
-
-### Output IR
-
-Functions with `affine.for` and `affine.if` operations eliminated. These
-functions may contain operations from the Standard dialect in addition to those
-already present before the pass.
-
-### Invariants
-
--   Functions without a body are not modified.
--   The semantics of the other functions is preserved.
--   Individual operations other than those mentioned above are not modified if
-    they do not depend on the loop iterator value or on the result of
-    `affine.apply`.
-
-## Conversion from Standard to LLVM IR dialect (`-convert-std-to-llvm`)
-
-Convert standard operations into the LLVM IR dialect operations.
-
-### Input invariant
-
--   operations including: arithmetic on integers and floats, constants, direct
-    calls, returns and branches;
--   no `tensor` types;
--   all `vector` are one-dimensional;
--   all blocks are reachable by following the successors of the first basic
-    block;
-
-If other operations are present and their results are required by the LLVM IR
-dialect operations, the pass will fail.  Any LLVM IR operations or types already
-present in the IR will be kept as is.
-
-### Output IR
-
-Functions converted to LLVM IR. Function arguments types are converted
-one-to-one. Function results are converted one-to-one and, in case more than 1
-value is returned, packed into an LLVM IR struct type. Function calls and
-returns are updated accordingly. Block argument types are updated to use LLVM IR
-types.
-
-## Data Copy DMA generation (`-affine-data-copy-generate`)
-
-Replaces all loads and stores on memref's living in 'slowMemorySpace' by
-introducing DMA operations (strided DMA if necessary) to transfer data to/from
-`fastMemorySpace` and rewriting the original load's/store's to instead
-load/store from the allocated fast memory buffers. Additional options specify
-the identifier corresponding to the fast memory space and the amount of fast
-memory space available. The pass traverses through the nesting structure,
-recursing to inner levels if necessary to determine at what depth DMA transfers
-need to be placed so that the allocated buffers fit within the memory capacity
-provided. If this is not possible (for example, when the elemental type itself
-is of size larger than the DMA capacity), an error with location information is
-emitted. The DMA transfers are also hoisted up past all loops with respect to
-which the transfers are invariant.
-
-Input
-
-```mlir
-func @loop_nest_tiled() -> memref<256x1024xf32> {
-  %0 = alloc() : memref<256x1024xf32>
-  affine.for %i0 = 0 to 256 step 32 {
-    affine.for %i1 = 0 to 1024 step 32 {
-      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
-        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
-          %1 = affine.load %0[%i2, %i3] : memref<256x1024xf32>
-        }
-      }
-    }
-  }
-  return %0 : memref<256x1024xf32>
-}
-```
-
-Output (with flags: -affine-data-copy-generate -affine-data-copy-generate-fast-mem-space=2)
-
-```mlir
-module {
-  func @loop_nest_tiled() -> memref<256x1024xf32> {
-    %c262144 = constant 262144 : index
-    %c0 = constant 0 : index
-    %0 = alloc() : memref<256x1024xf32>
-    %1 = alloc() : memref<256x1024xf32, 2>
-    %2 = alloc() : memref<1xi32>
-    affine.dma_start %0[%c0, %c0], %1[%c0, %c0], %2[%c0], %c262144 : memref<256x1024xf32>, memref<256x1024xf32, 2>, memref<1xi32>
-    affine.dma_wait %2[%c0], %c262144 : memref<1xi32>
-    affine.for %arg0 = 0 to 256 step 32 {
-      affine.for %arg1 = 0 to 1024 step 32 {
-        affine.for %arg2 = #map1(%arg0) to #map2(%arg0) {
-          affine.for %arg3 = #map1(%arg1) to #map2(%arg1) {
-            %3 = affine.load %1[%arg2, %arg3] : memref<256x1024xf32, 2>
-          }
-        }
-      }
-    }
-    dealloc %2 : memref<1xi32>
-    dealloc %1 : memref<256x1024xf32, 2>
-    return %0 : memref<256x1024xf32>
-  }
-}
-```
-
-## Loop tiling (`-affine-loop-tile`)
-
-Performs tiling or blocking of loop nests. It currently works on perfect loop
-nests.
-
-## Loop unroll (`-affine-loop-unroll`)
-
-This pass implements loop unrolling. It is able to unroll loops with arbitrary
-bounds, and generate a cleanup loop when necessary.
-
-## Loop unroll and jam (`-affine-loop-unroll-jam`)
-
-This pass implements unroll and jam for loops. It works on both perfect or
-imperfect loop nests.
-
-## Loop fusion (`-affine-loop-fusion`)
-
-Performs fusion of loop nests using a slicing-based approach. The fused loop
-nests, when possible, are rewritten to access significantly smaller local
-buffers instead of the original memref's, and the latter are often
-either completely optimized away or contracted. This transformation leads to
-enhanced locality and lower memory footprint through the elimination or
-contraction of temporaries / intermediate memref's. These benefits are sometimes
-achieved at the expense of redundant computation through a cost model that
-evaluates available choices such as the depth at which a source slice should be
-materialized in the designation slice.
-
-## Memref bound checking (`-memref-bound-check`)
-
-Checks all load's and store's on memref's for out of bound accesses, and reports
-any out of bound accesses (both overrun and underrun) with location information.
-
-```mlir
-test/Transforms/memref-bound-check.mlir:19:13: error: 'load' op memref out of upper bound access along dimension #2
-      %x  = load %A[%idx0, %idx1] : memref<9 x 9 x i32>
-            ^
-test/Transforms/memref-bound-check.mlir:19:13: error: 'load' op memref out of lower bound access along dimension #2
-      %x  = load %A[%idx0, %idx1] : memref<9 x 9 x i32>
-            ^
-```
-
-## Memref dataflow optimization (`-memref-dataflow-opt`)
-
-This pass performs store to load forwarding for memref's to eliminate memory
-accesses and potentially the entire memref if all its accesses are forwarded.
-
-Input
-
-```mlir
-func @store_load_affine_apply() -> memref<10x10xf32> {
-  %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10x10xf32>
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
-      affine.store %cf7, %m[%i0, %i1] : memref<10x10xf32>
-      %v0 = affine.load %m[%i0, %i1] : memref<10x10xf32>
-      %v1 = addf %v0, %v0 : f32
-    }
-  }
-  return %m : memref<10x10xf32>
-}
-```
-
-Output
-
-```mlir
-module {
-  func @store_load_affine_apply() -> memref<10x10xf32> {
-    %cst = constant 7.000000e+00 : f32
-    %0 = alloc() : memref<10x10xf32>
-    affine.for %arg0 = 0 to 10 {
-      affine.for %arg1 = 0 to 10 {
-        affine.store %cst, %0[%arg0, %arg1] : memref<10x10xf32>
-        %1 = addf %cst, %cst : f32
-      }
-    }
-    return %0 : memref<10x10xf32>
-  }
-}
-
-```
-
-## Memref dependence analysis (`-memref-dependence-check`)
-
-This pass performs dependence analysis to determine dependences between pairs of
-memory operations (load's and store's) on memref's. Dependence analysis exploits
-polyhedral information available (affine maps, expressions, and affine.apply
-operations) to precisely represent dependences using affine constraints, while
-also computing dependence vectors from them, where each component of the
-dependence vector provides a lower and an upper bound on the dependence distance
-along the corresponding dimension.
-
-```mlir
-test/Transforms/memref-dataflow-opt.mlir:232:7: note: dependence from 2 to 1 at depth 1 = ([1, 1], [-inf, +inf])
-      store %cf9, %m[%idx] : memref<10xf32>
-```
-
-## Pipeline data transfer (`-affine-pipeline-data-transfer`)
-
-This pass performs a transformation to overlap non-blocking DMA operations in a
-loop with computations through double buffering. This is achieved by advancing
-dma_start operations with respect to other operations.
-
-Input
-
-```mlir
-func @pipelinedatatransfer() {
-  %0 = alloc() : memref<256xf32>
-  %1 = alloc() : memref<32xf32, 1>
-  %2 = alloc() : memref<1xf32>
-  %c0 = constant 0 : index
-  %c128 = constant 128 : index
-  affine.for %i0 = 0 to 8 {
-    affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
-    affine.dma_wait %2[%c0], %c128 : memref<1xf32>
-    %3 = affine.load %1[%i0] : memref<32xf32, 1>
-    %4 = "compute"(%3) : (f32) -> f32
-    affine.store %4, %1[%i0] : memref<32xf32, 1>
-  }
-  return
-}
-```
-
-Output
-
-```mlir
-module {
-  func @pipelinedatatransfer() {
-    %c8 = constant 8 : index
-    %c0 = constant 0 : index
-    %0 = alloc() : memref<256xf32>
-    %c0_0 = constant 0 : index
-    %c128 = constant 128 : index
-    %1 = alloc() : memref<2x32xf32, 1>
-    %2 = alloc() : memref<2x1xf32>
-    affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-    affine.for %arg0 = 1 to 8 {
-      affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-      %8 = affine.apply #map3(%arg0)
-      %9 = affine.apply #map4(%8)
-      %10 = affine.apply #map4(%8)
-      affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
-      %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
-      %12 = "compute"(%11) : (f32) -> f32
-      affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
-    }
-    %3 = affine.apply #map3(%c8)
-    %4 = affine.apply #map4(%3)
-    %5 = affine.apply #map4(%3)
-    affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
-    %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
-    %7 = "compute"(%6) : (f32) -> f32
-    affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
-    dealloc %2 : memref<2x1xf32>
-    dealloc %1 : memref<2x32xf32, 1>
-    return
-  }
-}
-```
diff --git a/third_party/mlir/g3doc/Quantization.md b/third_party/mlir/g3doc/Quantization.md
deleted file mode 100644
index 99e450ca84d..00000000000
--- a/third_party/mlir/g3doc/Quantization.md
+++ /dev/null
@@ -1,359 +0,0 @@
-# MLIR Quantization
-
-This document outlines the design of the MLIR quantization system. While the
-term "quantization" is highly overloaded, in this case, it refers to a fairly
-narrow scope of techniques in use to enable conversion of floating-point
-computations to corresponding and plausible variants expressed in integer math
-for inference, as has historically been supported by low-bit depth inference
-engines such as TFLite, various accelerator hardware, and many DSPs.
-
-Much of this is inspired by the approach taken
-[in this paper](https://arxiv.org/abs/1712.05877) with many extensions and
-adaptations folded in. It specifically documents the positions that MLIR has
-taken on the topic, and is not a general reference.
-
-[TOC]
-
-## Uniform quantization
-
-The primary quantization mechanism supported by MLIR is a scheme which can
-express fixed point and affine transformations via uniformly spaced point on the
-Real number line.
-
-Further, the scheme can be applied:
-
-*   *per-layer* : Applying to every value within the target type.
-*   *per-axis* (also called *per-channel*) : Applying individually to each index
-    along a specific axis of a tensor type.
-
-### Fixed point values
-
-[Fixed point](https://en.wikipedia.org/wiki/Fixed-point_arithmetic) values are a
-[Real](https://en.wikipedia.org/wiki/Real_number) number divided by a *scale*.
-We will call the result of the divided Real the *scaled value*.
-
-$$ real\_value = scaled\_value * scale $$
-
-The scale can be interpreted as the distance, in Real units, between neighboring
-scaled values. For example, if the scale is $$ \pi $$, then fixed point values
-with this scale can only represent multiples of $$ \pi $$, and nothing in
-between. The maximum rounding error to convert an arbitrary Real to a fixed
-point value with a given $$ scale $$ is $$ \frac{scale}{2} $$. Continuing the
-previous example, when $$ scale = \pi $$, the maximum rounding error will be $$
-\frac{\pi}{2} $$.
-
-Multiplication can be performed on scaled values with different scales, using
-the same algorithm as multiplication of Real values (note that product scaled
-value has $$ scale_{product} = scale_{left \mbox{ } operand} * scale_{right
-\mbox{ } operand} $$). Addition can be performed on scaled values, as long as
-they have the same scale, using the same algorithm as addition of Real values.
-This makes it convenient to represent scaled values on a computer as signed
-integers, and perform arithmetic on those signed integers, because the results
-will be correct scaled values.
-
-### Affine values
-
-Mathematically speaking, affine values are the result of
-[adding a Real-valued *zero point*, to a scaled value](https://en.wikipedia.org/wiki/Affine_transformation#Representation).
-Or equivalently, subtracting a zero point from an affine value results in a
-scaled value:
-
-$$ real\_value = scaled\_value * scale = (affine\_value - zero\_point) * scale $$
-
-Essentially, affine values are a shifting of the scaled values by some constant
-amount. Arithmetic (i.e., addition, subtraction, multiplication, division)
-cannot, in general, be directly performed on affine values; you must first
-[convert](#affine-to-fixed-point) them to the equivalent scaled values.
-
-As alluded to above, the motivation for using affine values is to more
-efficiently represent the Real values that will actually be encountered during
-computation. Frequently, the Real values that will be encountered are not
-symmetric around the Real zero. We also make the assumption that the Real zero
-is encountered during computation, and should thus be represented.
-
-In this case, it's inefficient to store scaled values represented by signed
-integers, as some of the signed integers will never be used. The bit patterns
-corresponding to those signed integers are going to waste.
-
-In order to exactly represent the Real zero with an integral-valued affine
-value, the zero point must be an integer between the minimum and maximum affine
-value (inclusive). For example, given an affine value represented by an 8 bit
-unsigned integer, we have: $$ 0 \leq zero\_point \leq 255$$. This is important,
-because in deep neural networks' convolution-like operations, we frequently
-need to zero-pad inputs and outputs, so zero must be exactly representable, or
-the result will be biased.
-
-### Relation
-
-Real values, fixed point values, and affine values relate through the following
-equation, which demonstrates how to convert one type of number to another:
-
-$$ real\_value = scaled\_value * scale = (affine\_value - zero\_point) * scale $$
-
-Note that computers generally store mathematical values using a finite number of
-bits. Thus, while the above conversions are exact, to store the result in a
-finite number of bits, we must, in general, round the result of the conversion
-(this applies to both cases: storing using floating point and storing using
-fixed point). Note that a full discussion of rounding behavior is outside the
-scope of this document, and it is safe to assume unless otherwise stated that
-rounding should be according to the IEEE754 default of RNE (where hardware
-permits).
-
-### Converting between Real and fixed point or affine
-
-To convert a Real value to a fixed point value, you must know the scale. To
-convert a Real value to an affine value, you must know the scale and zero point.
-
-#### Real to affine
-
-To convert an input tensor of Real-valued elements (usually represented by a
-floating point format, frequently
-[Single precision](https://en.wikipedia.org/wiki/Single-precision_floating-point_format))
-to a tensor of affine elements represented by an integral type (e.g. 8-bit
-unsigned integer), the following conversion can be performed (note that it is
-not required that all representable values of the integral type are used):
-
-$$
-\begin{align*}
-af&fine\_value_{uint8 \, or \, uint16} \\
-      &= clampToTargetSize(roundToNearestInteger( \frac{real\_value_{Single}}{scale_{Single}})_{sint32} + zero\_point_{uint8 \, or \, uint16})
-\end{align*}
-$$
-
-In the above, we assume that $$real\_value$$ is a Single, $$scale$$ is a Single,
-$$roundToNearestInteger$$ returns a signed 32 bit integer, and $$zero\_point$$
-is an unsigned 8 or 16 bit integer. Note that bit depth and number of fixed
-point values are indicative of common types on typical hardware but is not
-constrained to particular bit depths or a requirement that the entire range of
-an N-bit integer is used.
-
-#### Affine to Real
-
-To convert an output tensor of affine elements represented by uint8
-or uint16 to a tensor of Real-valued elements (usually represented with a
-floating point format, frequently Single precision), the following conversion
-can be performed:
-
-$$
-\begin{align*}
-re&al\_value_{Single} \\
-      &= roundToNearestFloat((affine\_value_{uint8 \, or \, uint16} - zero\_point_{uint8 \, or \, uint16})_{sint32})_{Single} * scale_{Single}
-\end{align*}
-$$
-
-In the above, we assume that the result of subtraction is in 32-bit signed
-integer format, and that $$roundToNearestFloat$$ returns a Single.
-
-#### Affine to fixed point
-
-When the affine and fixed point scales are the same, subtract the zero point
-from the affine value to get the equivalent fixed point value.
-
-$$
-scaled\_value = affine\_value_{non\mbox{-}negative} - zero\_point_{non\mbox{-}negative}
-$$
-
-#### Fixed point to affine
-
-When the affine and fixed point scales are the same, add the zero point to the
-fixed point value to get the equivalent affine value.
-
-$$
-affine\_value_{non\mbox{-}negative} = scaled\_value + zero\_point_{non\mbox{-}negative}
-$$
-
-## Usage within MLIR
-
-There are several components to the quantization system being developed within
-MLIR:
-
-*   *Quantization* dialect containing:
-
-    *   A family of [QuantizedTypes](#quantized-type) which represent the
-        mapping between *expressed* values (typically of a floating point
-        computer type) and *storage* values (typically of an integral computer
-        type).
-    *   [Type conversion ops](#quantized-type-conversion-ops) for converting
-        between types based on a QuantizedType and its *expressed* and *storage*
-        sub-types.
-    *   [Instrumentation ops](#instrumentation-and-constraint-ops) for assigning
-        instrumentation points within the computation where runtime statistics
-        may help guide the quantization process.
-
-*   [Integration with simulated quantization at training time](#integration-with-simulated-quantization-at-training-time)
-
-*   [TFLite native quantization](#tflite-native-quantization)
-
-    *   The TFLite op-set natively supports uniform-quantized variants.
-    *   Passes and tools exist to convert directly from the *TensorFlow* dialect
-        to the TFLite quantized op-set.
-
-*   [*FxpMath* dialect](#fxpmath-dialect) containing (experimental) generalized
-    representations of fixed-point math ops and conversions:
-
-    *   [Real math ops](#real-math-ops) representing common combinations of
-        arithmetic operations that closely match corresponding fixed-point math
-        concepts (as opposed to being spread across multiple ops as is typical
-        in source dialects).
-    *   [Fixed-point math ops](#fixed-point-math-ops) that for carrying out
-        computations on integers, as are typically needed by uniform
-        quantization schemes.
-    *   Passes to lower from real math ops to fixed-point math ops.
-
-*   [Solver tools](#solver-tools) which can (experimentally and generically
-    operate on computations expressed in the *FxpMath* dialect in order to
-    convert from floating point types to appropriate *QuantizedTypes*, allowing
-    the computation to be further lowered to integral math ops.
-
-Not every application of quantization will use all facilities. Specifically, the
-TensorFlow to TensorFlow Lite conversion uses the QuantizedTypes but has its own
-ops for type conversion and expression of the backing math.
-
-## Quantization Dialect
-
-### Quantized type
-
-TODO : Flesh this section out.
-
-*   QuantizedType base class
-*   UniformQuantizedType
-
-### Quantized type conversion ops
-
-*   qcast : Convert from an expressed type to QuantizedType
-*   dcast : Convert from a QuantizedType to its expressed type
-*   scast : Convert between a QuantizedType and its storage type
-
-### Instrumentation and constraint ops
-
-*   const_fake_quant : Emulates the logic of the historic TensorFlow
-    fake_quant_with_min_max_args op.
-*   stats_ref : Declares that statistics should be gathered at this point with a
-    unique key and made available to future passes of the solver.
-*   stats : Declares inline statistics (per layer and per axis) for the point in
-    the computation. stats_ref ops are generally converted to stats ops once
-    trial runs have been performed.
-*   coupled_ref : Declares points in the computation to be coupled from a type
-    inference perspective based on a unique key.
-
-## Integration with simulated quantization at training time
-
-TensorFlow has historically used the
-[tf.quantization.fake_quant_\*](https://www.tensorflow.org/api_docs/python/tf/quantization/fake_quant_with_min_max_args)
-family of operations to simulate the effect of quantization at training time.
-
-As originally implemented, TensorFlow Lite was the primary user of such
-operations at inference time. When quantized inference was enabled, if every
-eligible tensor passed through an appropriate fake_quant node (the rules of
-which tensors can have fake_quant applied are somewhat involved), then
-TensorFlow Lite would use the attributes of the fake_quant ops to make a
-judgment about how to convert to use kernels from its quantized ops subset.
-
-In MLIR-based quantization, fake_quant_\* ops are handled by converting them to
-a sequence of *qcast* (quantize) followed by *dcast* (dequantize) with an
-appropriate *UniformQuantizedType* as the target of the qcast operation.
-
-This allows subsequent compiler passes to preserve the knowledge that
-quantization was simulated in a certain way while giving the compiler
-flexibility to move the casts as it simplifies the computation and converts it
-to a form based on integral arithmetic.
-
-This scheme also naturally allows computations that are *partially quantized*
-where the parts which could not be reduced to integral ops are still carried out
-in floating point with appropriate conversions at the boundaries.
-
-## TFLite Native Quantization
-
-TODO : Flesh this out
-
-### General algorithm
-
-1.  Take input min/max information and set the ArrayInfo (which really is
-    InputOrOutputArrayInfo.
-1.  In LegalizeTF, convert ArrayInfo min/max to tf.Quantize and tf.Dequantize
-    nodes. (or tf.FakeQuant) Convert all constant FakeQuants to (tf.FQ -> tfl.Q
-    -> tfl.DQ).
-1.  Hardcode logic/propagation needs to happen here.
-1.  Run TF constant folding.
-1.  In PrepareTFL, convert all tf.FQ to (tfl.Q -> tfl.DQ).
-1.  Run quantization pass that take (tfl.DQ (for both input and weights) -> op
-    -> tfl.Q) and replaces with (op). Also replace (constant_float -> tfl.Q)
-    with (constant_quant).
-
-## FxpMath Dialect
-
-### Real math ops
-
-Note that these all support explicit clamps, which allows for simple fusions and
-representation of some common sequences quantization-compatible math. Of
-addition, some support explicit biases, which are often represented as separate
-adds in source dialects.
-
-TODO: This op set is still evolving and needs to be completed.
-
-*   RealBinaryOp
-    *   RealAddEwOp
-    *   RealSubEwOp
-    *   RealMulEwOp
-    *   RealDivEwOp
-*   RealUnaryOp
-    *   IDENTITY
-    *   TANH
-    *   SIGMOID
-    *   EXP
-    *   LOG
-    *   NEG
-    *   RSQRT
-    *   SIN
-    *   SQUARE
-    *   SQRT
-    *   CMPZ
-    *   CMPNZ
-    *   CMPLZ
-    *   CMPGZ
-
-### Fixed-point math ops
-
-TODO: This op set only has enough ops to lower a simple power-of-two
-RealAddEwOp.
-
-*   RoundingDivideByPotFxpOp
-*   SaturatingAddFxpOp
-
-## Solver tools
-
-Solver tools exist to analyze an MLIR-computation, expressed in either a
-supported source dialect or in the *real math ops* set and solve for appropriate
-QuantizedTypes that allow the computation to be lowered to integral math.
-
-These tools are an active area of work and may be expanded in the future to
-adjacent areas such as solving for transformations to other kinds of lower
-precision types (i.e. bfloat16 or fp16).
-
-Solver tools are expected to operate in several modes, depending on the
-computation and the manner in which it was trained:
-
-*   *Transform* : With all available information in the MLIR computation, infer
-    boundaries where the computation can be carried out with integral math and
-    change types accordingly to appropriate QuantizedTypes:
-
-    *   For passthrough ops which do not perform active math, change them to
-        operate directly on the storage type, converting in and out at the edges
-        via scast ops.
-    *   For ops that have the *Quantizable* trait, the type can be set directly.
-        This includes ops from the [real math ops set]{#real-math-ops}.
-    *   For others, encase them in appropriate dcast/qcast ops, presuming that
-        some follow-on pass will know what to do with them.
-
-*   *Instrument* : Most of the time, there are not sufficient implied
-    constraints within a computation to perform many transformations. For this
-    reason, the solver can insert instrumentation ops at points where additional
-    runtime statistics may yield solutions. It is expected that such
-    computations will be lowered as-is for execution, run over an appropriate
-    eval set, and statistics at each instrumentation point made available for a
-    future invocation of the solver.
-
-*   *Simplify* : A variety of passes and simplifications are applied once
-    QuantizedTypes are added in order to arrive at a computation that is
-    expressed in as much integral math, with the fewest number of casts as
-    possible.
diff --git a/third_party/mlir/g3doc/QuickstartRewrites.md b/third_party/mlir/g3doc/QuickstartRewrites.md
deleted file mode 100644
index 2e2192071ae..00000000000
--- a/third_party/mlir/g3doc/QuickstartRewrites.md
+++ /dev/null
@@ -1,255 +0,0 @@
-# Quickstart tutorial to adding MLIR graph rewrite
-
-This document will present a quickstart to adding graph rewrites. We shall start
-by defining an operation, showing multiple ways to define the rewrite using
-patterns, as well as defining the rewrite using a graph walker (note: using
-patterns and the rewrite engine is preferred, showing the walker is for
-demonstration purposes).
-
-See [MLIR specification](LangRef.md) for more information about MLIR, the
-structure of the IR, operations, etc. See
-[Table-driven Operation Definition](OpDefinitions.md) and
-[Declarative Rewrite Rule](DeclarativeRewrites.md) for the detailed explanation
-of all available mechanisms for defining operations and rewrites in a
-table-driven manner.
-
-## Adding operation
-
-An operation in MLIR is specified using a definition in
-[TableGen](https://llvm.org/docs/TableGen/LangIntro.html) file. TableGen is a
-modeling tool to specify the ops and the C++ code to interact with these
-operations are generated from. To define an operation one needs to specify:
-
-*   The operation name. This name is a unique identifier of the operation within
-    MLIR. Most operations are within a dialect, so for example one could have
-    `tfl.add` to represent the add operation in the TensorFlow Lite dialect.
-    Instead of repeating the dialect in the op definition, a base class for the
-    op dialect is commonly created that prepends the dialect namespace given an
-    op name.
-*   The traits of the operation. These allow you to specify traits of the
-    operation, such as whether it has side effects or whether it should be
-    verified that the operands and result types are the same. These are backed
-    by C++ traits that perform the verification.
-*   The arguments of the operation. These are the input operands (values at
-    runtime produced by other ops) and attributes (compile time known constant
-    values that affect the behavior of the op) that are the inputs of/define the
-    behavior of the operation. The input operands may be named, the attributes
-    must be named.
-*   The result(s) of the operation. These may again named or not.
-*   Documentation of the operation. This includes a one-line summary as well as
-    a longer human-readable description of the operation.
-*   Dialect specific information. Additional information could be added to the
-    operation definition that are only used by dialect specific drivers. These
-    are ignored by the main op and doc generators, but could be used in, say,
-    the translation from a dialect to another representation.
-
-```td {.td}
-def TFL_LeakyReluOp: TFL_Op<TFL_Dialect, "leaky_relu",
-                            [NoSideEffect, SameValueType]>,
-                     Results<(outs Tensor)> {
-  let arguments = (ins
-    F32Tensor:$x,
-    // Slope of the activation function at x < 0.
-    F32Attr:$alpha
-  );
-
-  let summary = "Leaky ReLU operator";
-  let description = [{
-    Element-wise Leaky ReLU operator
-      x -> x >= 0 ? x : (alpha * x)
-  }];
-
-  // TFLite specific attribute that is used when generating the output
-  // flatbuffer.
-  let hasOptions = 1;
-}
-```
-
-Note in the above the result types and inputs are specified in different ways,
-one by way of trait and the other by way of let. It is possible to specify both
-in either way.
-
-<!-- TODO: Define a style convention. -->
-
-Operations can also have custom parser, printer, builder, verifier, constant
-folder, or canonicalizer. These require specifying additional C++ methods to
-invoke for additional functionality. For example, if an operation is marked to
-have a folder, the constant folder also needs to be added, e.g.,:
-
-```c++
-OpFoldResult SpecificOp::fold(ArrayRef<Attribute> constOperands) {
-  if (unable_to_fold)
-    return {};
-  ....
-  return val;
-}
-```
-
-## Adding patterns
-
-There are multiple forms of graph rewrite that can be performed in MLIR. One of
-the most common is DAG tile to DAG tile rewrite. Patterns provide a concise way
-to express this transformation as a pair of source pattern to match and
-resultant pattern. There are both the C++ classes to represent this
-transformation, as well as the patterns in TableGen from which these can be
-generated.
-
-### TableGen patterns
-
-Let us continue with LeakyRelu. To map from TensorFlow's `LeakyRelu` to
-TensorFlow Lite's `LeakyRelu`:
-
-```td {.td}
-def : Pat<(TF_LeakyReluOp $arg, F32Attr:$a), (TFL_LeakyReluOp $arg, $a)>
-```
-
-The pattern is specified by instantiating a `Pat` with a source and result DAG.
-The arguments in the source pattern is captured and can be used in the result
-pattern. This is a simple pattern as we have a 1:1 mapping and the attribute
-does not need to be transformed (e.g., both have a floating point attribute for
-alpha). The names of the attributes specified in the pattern is for
-matching/referencing and need not match the original attribute name in the op
-definition but the order of arguments of the dags do need to match.
-
-To specify a pattern, both the source and resultant ops need to be defined using
-TableGen.
-
-If this were a more advance pattern that the current framework could not express
-as destination then one could use a general native code fallback method. This
-consists of defining a pattern as well as adding a C++ function to perform the
-replacement:
-
-```td {.td}
-def createTFLLeakyRelu : NativeCodeCall<
-    "createTFLLeakyRelu($_builder, $0->getDefiningOp(), $1, $2)">;
-
-def : Pat<(TF_LeakyReluOp:$old_value, $arg, F32Attr:$a),
-          (createTFLLeakyRelu $old_value, $arg, $a)>;
-```
-
-```c++
-static Value* createTFLLeakyRelu(PatternRewriter &rewriter, Operation *op,
-                                 Value* operand, Attribute attr) {
-  return rewriter.create<mlir::TFL::LeakyReluOp>(
-      op->getLoc(), operands[0]->getType(), /*arg=*/operands[0],
-      /*alpha=*/attrs[0].cast<FloatAttr>());
-}
-```
-
-This allows for arbitrarily complex builders. Input pattern side one can express
-multi-op patterns with constraints on input operands and attributes. But input
-patterns cannot yet express constraints across multiple operands/attributes.
-
-### Register the pattern
-
-The file containing the patterns need to be processed using `mlir-tblgen`
-`-gen-rewriters` during compilation time. It can be invoked with the following
-configuration in CMake:
-
-```cmake
-set(LLVM_TARGET_DEFINITIONS <name-of-the-td-file>)
-mlir_tablegen(<name-of-the-generated-inc-file> -gen-rewriters)
-add_public_tablegen_target(<name-of-the-cmake-target>)
-```
-
-Then you can `#include` the generated file in any C++ implementation file you
-like. (You will also need to make sure the library depends on the CMake target
-defined in the above.) The generated file will have a `populateWithGenerated(
-MLIRContext *context, OwningRewritePatternList *patterns)` function that you can
-use to collect all the generated patterns inside `patterns` and then use
-`patterns` in any pass you would like.
-
-### C++ rewrite specification
-
-In case patterns are not sufficient there is also the fully C++ way of
-expressing a rewrite:
-
-```c++
-/// Multi-step rewrite using "match" and "rewrite". This allows for separating
-/// the concerns of matching and rewriting.
-struct ConvertTFLeakyRelu : public RewritePattern {
-  ConvertTFLeakyRelu(MLIRContext *context)
-      : RewritePattern("tf.LeakyRelu", 1, context) {}
-
-  PatternMatchResult match(Operation *op) const override {
-    return matchSuccess();
-  }
-
-  void rewrite(Operation *op, PatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<TFL::LeakyReluOp>(
-        op, op->getResult(0)->getType(), op->getOperand(0),
-        /*alpha=*/op->getAttrOfType<FloatAttr>("alpha"));
-  }
-};
-
-/// Single-step rewrite with "matchAndRewrite". This allows for performing the
-/// rewrite immediately upon a successful match.
-struct ConvertTFLeakyRelu : public RewritePattern {
-  ConvertTFLeakyRelu(MLIRContext *context)
-      : RewritePattern("tf.LeakyRelu", 1, context) {}
-
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<TFL::LeakyReluOp>(
-        op, op->getResult(0)->getType(), op->getOperand(0),
-        /*alpha=*/op->getAttrOfType<FloatAttr>("alpha"));
-    return matchSuccess();
-  }
-};
-```
-
-In the C++ rewrite the static benefit of the rewrite pattern is specified at
-construction. While in the pattern generator a simple heuristic is currently
-employed based around the number of ops matched and replaced.
-
-The above rule did not capture the matching operands/attributes, but in general
-the `match` function in a multi-step rewrite may populate and return a
-`PatternState` (or class derived from one) to pass information extracted during
-matching to the rewrite. A single-step rewrite with the `matchAndRewrite`
-function has the benefit of being able to directly use any values created when
-matching; removing the need for `PatternState`.
-
-## Testing
-
-MLIR uses [lit](https://llvm.org/docs/CommandGuide/lit.html) (LLVM Integrated
-Testing) tool for performing testing. Testing is performed by way of creating
-the input IR file, running a transformation and then verifying the output IR.
-C++ unit tests are the exception, with the IR transformation serving as the core
-testing mechanism. This results in fewer binaries that need to be built (and
-linked) and forces to focus on the representation as an important piece.
-
-For the legalization transform above we would have a test (probably as part of
-the legalization pass test in TensorFlow Lite) such as:
-
-```mlir
-// RUN: mlir-opt -tfl-legalize-tf %s | FileCheck %s
-
-func @LeakyRelu(%arg0: tensor<1xf32>) -> tensor<1xf32> {
-  %2 = "tf.LeakyRelu"(%arg0) {alpha: 0.1} : (tensor<1xf32>) -> tensor<1xf32>
-  return %2: tensor<1xf32>
-
-// CHECK-LABEL: LeakyRelu
-// CHECK:  %0 = "tfl.leaky_relu"(%arg0) {alpha: 1.000000e-01} : (tensor<1xf32>) -> tensor<1xf32>
-}
-```
-
-The RUN command at the top results in running the `mlir-opt` binary (which is
-compiler writer tool to exercise different registered passes) to invoke the
-optimization pass this transform was added as part of on the current file and to
-verify its output using `FileCheck`. `FileCheck` is textual output verifier. In
-particular it uses the CHECK expressions to verify the given output is produced.
-
-There can be multiple RUN commands with different corresponding CHECK prefixes.
-And in addition multiple independent tests separated by `// -----` and
-`mlir-opt` invoked with `-split-input-file` flag. This is especially useful for
-error testing.
-
-This results in very simple, directed testing without need to work around
-constant propagation or other, unrelated, optimization passes.
-
-## Adding optimization pass
-
-Optimization passes that do not fit/are difficult to specify in the above
-structure can be specified as general iterations across modules/functions. See
-[Writing a Pass](WritingAPass.md) for a general overview and introduction to
-optimization passes in MLIR.
diff --git a/third_party/mlir/g3doc/Rationale.md b/third_party/mlir/g3doc/Rationale.md
deleted file mode 100644
index 66cf800621d..00000000000
--- a/third_party/mlir/g3doc/Rationale.md
+++ /dev/null
@@ -1,1121 +0,0 @@
-# MLIR Rationale
-
-This document is intended to capture some of the alternatives considered and
-open debates in the design of MLIR, along with the rationale for certain
-decisions we made. This is not intended to be a "finely groomed" document - we
-prefer the ability to dump in interesting tidbits without worrying too much
-about their consistency or readability.
-
-[TOC]
-
-## Abstract
-
-MLIR is a compiler intermediate representation with similarities to traditional
-three-address SSA representations (like
-[LLVM IR](http://llvm.org/docs/LangRef.html) or
-[SIL](https://github.com/apple/swift/blob/master/docs/SIL.rst)), but which
-introduces notions from the polyhedral loop optimization works as first class
-concepts. This hybrid design is optimized to represent, analyze, and transform
-high level dataflow graphs as well as target-specific code generated for high
-performance data parallel systems. Beyond its representational capabilities, its
-single continuous design provides a framework to lower from dataflow graphs to
-high performance target specific code.
-
-MLIR stands for one of "Multi-Level IR" or "Multi-dimensional Loop IR" or
-"Machine Learning IR" or "Mid Level IR", we prefer the first. This document only
-provides the rationale behind MLIR -- its actual
-[specification document](LangRef.md) and other content is hosted elsewhere.
-
-## Introduction and Motivation
-
-The Multi-Level Intermediate Representation (MLIR) is intended for easy
-expression and optimization of computations involving deep loop nests and dense
-matrices of high dimensionality. It is thus well-suited to deep learning
-computations in particular. Yet it is general enough to also represent arbitrary
-sequential computation. The representation allows high-level optimization and
-parallelization for a wide range of parallel architectures including those with
-deep memory hierarchies --- general-purpose multicores, GPUs, and specialized
-neural network accelerators.
-
-MLIR uses ideas drawn from IRs of LLVM and Swift for lower level constructs
-while combining them with ideas from the polyhedral abstraction to represent
-loop nests, multidimensional data (tensors), and transformations on these
-entities as first class concepts in the IR.
-
-MLIR is a multi-level IR, i.e., it represents code at a domain-specific
-representation such as HLO or TensorFlow graphs, all the way down to the machine
-level. MLIR is able to represent arbitrary control flow and arbitrary data
-accesses, and is general enough to represent nearly all sequential computation.
-This is a key distinction from existing polyhedral representation
-implementations (such as LLVM [Polly](https://polly.llvm.org/)) that are able to
-use the polyhedral abstraction in a way isolated from the LLVM IR and only for
-affine loop nests, i.e., portions of the code where array accesses, loop bounds,
-and conditionals are regular (involve linear functions of loop iterators and
-constant symbols). The presence of statically unpredictable data accesses or
-control flow does not preclude representation in MLIR, but only limits to a
-certain extent the ability to reason about and apply transformations using the
-polyhedral abstraction.
-
-Maps, sets, and relations with affine constraints are the core structures
-underlying a polyhedral representation of high-dimensional loop nests and
-multidimensional arrays. These structures are represented as textual
-expressions in a form close to their mathematical form. These structures are
-used to capture loop nests, tensor data structures, and how they are reordered
-and mapped for a target architecture. All structured or "conforming" loops are
-captured as part of the polyhedral information, and so are tensor variables,
-their layouts, and subscripted accesses to these tensors in memory.
-
-The information captured in the IR allows a compact expression of all loop
-transformations, data remappings, explicit copying necessary for explicitly
-addressed memory in accelerators, mapping to pre-tuned expert written
-primitives, and mapping to specialized vector instructions. Loop transformations
-that can be easily implemented include the body of affine transformations: these
-subsume all traditional loop transformations (unimodular and non-unimodular)
-such as loop tiling, interchange, permutation, skewing, scaling, relative
-shifting, reversal, fusion, and distribution/fission. Transformations on data
-layout such as padding and transforming to blocked layouts are also represented
-well via affine layout maps.
-
-MLIR's design allows a progressive lowering to target-specific forms. Besides
-high-level transformations for loop nests and data layouts that a typical
-mid-level optimizer is expected to deal with, MLIR is also designed to perform
-certain low-level scheduling and mapping decisions that a typical backend IR is
-entrusted with: these include mapping to specialized vector instructions,
-auto-vectorization, and software pipelining. The need to support these
-transformations stems from the fact that neural network accelerators have
-specialized units that deal with large chunks of data whose computation maps
-back to chunks of more than one loop of the loop nests as viewed by a program at
-a level closer to the original specification. Such specialized units or
-instructions operate on multidimensional data chunks from a programmer's
-viewpoint. It thus makes it hard or infeasible for a backend operating on a very
-low-level IR close to assembly to lift and reconstruct loops and perform such a
-mapping. This is in contrast to classic instruction selection and scheduling in
-today's compilers that primarily only deals with the body of the innermost loop.
-MLIR also facilitates automatic mapping to expert pre-tuned primitives or vendor
-libraries operating on data at higher levels (or at the highest level) of the
-memory hierarchy.
-
-In summary, MLIR is convenient for and closed under the kind of transformations
-needed to lower to general-purpose as well as specialized accelerators. It also
-allows one to build modular and reusable target independent and target dependent
-passes.
-
-## Design Decisions
-
-This section sheds light on some of the design decisions -- some of these are
-indirectly implied by the specification document.
-
-### Loads and stores
-
-The 'load' and 'store' instructions are specifically crafted to fully resolve to
-an element of a memref. These instructions take as arguments n+1 indices for an
-n-ranked tensor. This disallows the equivalent of pointer arithmetic or the
-ability to index into the same memref in other ways (something which C arrays
-allow for example). Furthermore, for the affine constructs, the compiler can
-follow use-def chains (e.g. through
-[affine.apply operations](Dialects/Affine.md#affineapply-operation)) or through
-the map attributes of [affine operations](Dialects/Affine.md#Operations)) to
-precisely analyze references at compile-time using polyhedral techniques. This
-is possible because of the [restrictions on dimensions and symbols](Dialects/Affine.md#restrictions-on-dimensions-and-symbols).
-
-A scalar of element-type (a primitive type or a vector type) that is stored in
-memory is modeled as a 0-d memref. This is also necessary for scalars that are
-live out of for loops and if conditionals in a function, for which we don't yet
-have an SSA representation --
-[an extension](#mlfunction-extensions-for-"escaping-scalars") to allow that is
-described later in this doc.
-
-### Symbols and types
-
-The current MLIR disallows use of symbols in types. For example, when a tensor
-or memref dimension is statically unknown, it is denoted in the type as '?'. An
-SSA symbol is then bound to it when a memref is created. The actual value of the
-unknown dimension can be queried using the "dim" builtin as shown below.
-
-Example:
-
-```mlir
-func foo(...) {
-  %A = alloc <8x?xf32, #lmap> (%N)
-  ...
-  call bar(%A) : (memref<8x?xf32, #lmap>)
-}
-
-func bar(%A : memref<8x?xf32, #lmap>) {
-  // Type of %A indicates that %A has dynamic shape with 8 rows
-  // and unknown number of columns. The number of columns is queried
-  // dynamically using dim instruction.
-  %N = dim %A, 1 : memref<8x?xf32, #lmap>
-
-  affine.for %i = 0 to 8 {
-    affine.for %j = 0 to %N {
-      // A[i,j] += 1
-      %s1 = affine.load %A[%i, %j] : memref<8x?xf32, #lmap>
-      %s2 = add %s1, 1
-      affine.store %s2, %A[%i, %j] : memref<8x?xf32, #lmap>
-    }
-  }
-  return
-}
-
-```
-
-An alternative design is to embed the reference to symbols directly in the
-type - memref<8x%Nxf32>. We went for the current approach in MLIR because it
-simplifies the design --- types remain immutable when the values of symbols
-change.
-
-### Block Arguments vs PHI nodes
-
-MLIR Regions represent SSA using "[block arguments](LangRef.md#blocks)" rather
-than [PHI instructions](http://llvm.org/docs/LangRef.html#i-phi) used in LLVM.
-This choice is representationally identical (the same constructs can be
-represented in either form) but block arguments have several advantages:
-
-1.  LLVM PHI nodes always have to be kept at the top of a block, and
-    transformations frequently have to manually skip over them. This is defined
-    away with BB arguments.
-1.  LLVM has a separate function Argument node. This is defined away with BB
-    arguments, because the arguments to the entry block serve this purpose.
-1.  Blocks of PHI nodes in LLVM execute atomically, which is surprising and
-    super confusing to compiler engineers and it is easy to introduce bugs with
-    this (very related to the
-    "[lost copy](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.524.5461&rep=rep1&type=pdf)"
-    problem in SSA lowering literature.) With the BB argument representation,
-    this confusion is defined away.
-1.  The entry list of PHI nodes in LLVM are unordered, and some blocks have
-    thousands of predecessors (e.g. unwind blocks). This can cause long compile
-    time problems because transformations have to linearly scan this list. This
-    is defined away with BB argument representation.
-1.  LLVM has no way to represent values that are available only in one successor
-    but not the other, e.g. its invoke instruction cannot produce the exception
-    value JUST on the exception edge. Instead, the
-    [landingpad instruction](http://llvm.org/docs/LangRef.html#landingpad-instruction)
-    is a hack used to represent this. MLIR doesn't make use of this capability,
-    but SIL uses it extensively, e.g. in the
-    [switch_enum instruction](https://github.com/apple/swift/blob/master/docs/SIL.rst#switch-enum).
-
-For more context, block arguments were previously used in the Swift
-[SIL Intermediate Representation](https://github.com/apple/swift/blob/master/docs/SIL.rst),
-and described in
-[a talk on YouTube](https://www.youtube.com/watch?v=Ntj8ab-5cvE). The section of
-interest
-[starts here](https://www.google.com/url?q=https://youtu.be/Ntj8ab-5cvE?t%3D596&sa=D&ust=1529450150971000&usg=AFQjCNFQHEWL7m8q3eO-1DiKw9zqC2v24Q).
-
-### Index type disallowed in vector/tensor/memref types
-
-Index types are not allowed as elements of `vector`, `tensor` or `memref` type.
-Index types are intended to be used for platform-specific "size" values and may
-appear in subscripts, sizes of aggregate types and affine expressions. They are
-also tightly coupled with `affine.apply` and affine.load/store operations;
-having `index` type is a necessary precondition of a value to be acceptable by
-these operations. While it may be useful to have `memref<?xindex>` to express
-indirect accesses, e.g. sparse matrix manipulations or lookup tables, it creates
-problems MLIR is not ready to address yet. MLIR needs to internally store
-constants of aggregate types and emit code operating on values of those types,
-which are subject to target-specific size and alignment constraints.  Since MLIR
-does not have a target description mechanism at the moment, it cannot reliably
-emit such code. Moreover, some platforms may not support vectors of type
-equivalent to `index`.
-
-Indirect access use cases can be alternatively supported by providing and
-`index_cast` instruction that allows for conversion between `index` and
-fixed-width integer types, at the SSA value level. It has an additional benefit
-of supporting smaller integer types, e.g. `i8` or `i16`, for small indices
-instead of (presumably larger) `index` type.
-
-### Bit width of a non-primitive types and `index` is undefined
-
-The bit width of a compound type is not defined by MLIR, it may be defined by a
-specific lowering pass. In MLIR, bit width is a property of certain primitive
-_type_, in particular integers and floats. It is equal to the number that
-appears in the type definition, e.g. the bit width of `i32` is `32`, so is the
-bit width of `f32`. The bit width is not _necessarily_ related to the amount of
-memory (in bytes) or the size of register (in bits) that is necessary to store
-the value of the given type. These quantities are target and ABI-specific and
-should be defined during the lowering process rather than imposed from above.
-For example, `vector<3xi57>` is likely to be lowered to a vector of four 64-bit
-integers, so that its storage requirement is `4 x 64 / 8 = 32` bytes, rather
-than `(3 x 57) ceildiv 8 = 22` bytes as can be naively computed from the
-bitwidth. Individual components of MLIR that allocate space for storing values
-may use the bit size as the baseline and query the target description when it is
-introduced.
-
-The bit width is not defined for dialect-specific types at MLIR level. Dialects
-are free to define their own quantities for type sizes.
-
-### Signless types
-
-Integers in the builtin MLIR type system have a bitwidth (note that the `index`
-type has a symbolic width equal to the machine word size), but they do not have
-an intrinsic sign. This means that the "standard ops" operation set has things
-like `addi` and `muli` which do two's complement arithmetic, but some other
-operations get a sign, e.g. `divis` vs `diviu`.
-
-LLVM uses the [same design](http://llvm.org/docs/LangRef.html#integer-type),
-which was introduced in a revamp rolled out
-[in the LLVM 2.0 integer type](http://releases.llvm.org/2.0/docs/LangRef.html#t_derived).
-Prior to that, from
-[LLVM 1.0](http://releases.llvm.org/1.0/docs/LangRef.html#t_classifications) to
-[1.9](http://releases.llvm.org/1.9/docs/LangRef.html#t_classifications), LLVM
-uses signed types like "sbyte" and "ubyte". This shift was important and has
-served LLVM well over the years. The reason this is important is that it is a
-good thing for an intermediate representation to represent the same computation
-with the same instruction. Signed types got in the way, because (e.g.) an "add
-of an sbyte" does the same computation as an "add of a ubyte", but the type
-system made them look artificially different. This split also required casts
-like "cast from sbyte to ubyte" which do nothing at the machine level. Removing
-signs from the type system eliminated these problems, making the compiler
-simpler.
-
-More information about this split is available in an old
-[talk on youtube](https://www.youtube.com/watch?v=VeRaLPupGks) talking about
-LLVM 2.0.
-
-Note that this rationale only applies to the "standard ops" dialect in which we
-can express an opinion about its design. Other dialects generally try to model
-an external system, and should aim to reflect its design as closely as possible.
-
-### Splitting floating point vs integer operations
-
-The MLIR "standard" operation set splits many integer and floating point
-operations into different categories, for example `addf` vs `addi` and `cmpf` vs
-`cmpi`
-([following the design of LLVM](http://llvm.org/docs/LangRef.html#binary-operations)).
-These instructions _are_ polymorphic on the number of elements in the type
-though, for example `addf` is used with scalar floats, vectors of floats, and
-tensors of floats (LLVM does the same thing with its scalar/vector types).
-
-This split is important because floating point and integer operations are quite
-different in practice: for example, floating point values include NaN's, so
-[integer comparisons](http://llvm.org/docs/LangRef.html#icmp-instruction) and
-[floating point comparisons](http://llvm.org/docs/LangRef.html#fcmp-instruction)
-should use different comparison opcodes. On the arithmetic side of things,
-floating point operations support rounding modes, floating point contractions,
-["fast math"](http://llvm.org/docs/LangRef.html#fadd-instruction), and integers
-may want to have two's complement overflow behavior or be undefined on
-[various forms of wrapping](http://llvm.org/docs/LangRef.html#add-instruction)
-for performance.
-
-We are a long way from this sort of thing being a priority to care about in
-MLIR, but since we have experience and know the right way to do this, we'd
-rather design it in from the beginning.
-
-Note that this rationale only applies to the "standard ops" dialect in which we
-can express an opinion about its design. Other dialects generally try to model
-an external system, and should aim to reflect its design as closely as possible.
-
-### Specifying sign in integer comparison operations
-
-Since integers are [signless](#signless-types), it is necessary to define the
-sign for integer comparison operations. This sign indicates how to treat the
-foremost bit of the integer: as sign bit or as most significant bit. For
-example, comparing two `i4` values `0b1000` and `0b0010` yields different
-results for unsigned (`8 > 3`) and signed (`-8 < 3`) interpretations. This
-difference is only significant for _order_ comparisons, but not for _equality_
-comparisons. Indeed, for the latter all bits must have the same value
-independently of the sign. Since both arguments have exactly the same bit width
-and cannot be padded by this operation, it is impossible to compare two values
-whose bit representations would differ while the values are interpreted as
-equal.
-
-### Specifying comparison kind as attribute
-
-Unlike arithmetic, comparison operators share several common properties, e.g.
-they cannot be considered associative. In practice, comparisons are sometimes
-implemented by the same instruction or its variants so it makes sense to group
-them together at the IR level.
-
-An alternative would be introducing ten distinct operators for all currently
-supported kinds of integer comparisons. These operators would have increased the
-number of "reserved" names used by standard operations as well as the size of
-the C++ API while their implementations would have been mostly identical.
-
-The comparison kind is internally an integer attribute. However, for the sake of
-readability by humans, custom assembly form accepts string literals that are
-mapped to the underlying integer values: `cmpi "eq", %lhs, %rhs` better implies
-integer equality comparison than `cmpi 0, %lhs, %rhs` where it is unclear what
-gets compared to what else. This syntactic sugar is possible thanks to parser
-logic redefinitions for custom assembly form of non-builtin operations.
-Supporting it in the full notation would have required changing how the main
-parsing algorithm works and may have unexpected repercussions. While it had been
-possible to store the predicate as string attribute, it would have rendered
-impossible to implement switching logic based on the comparison kind and made
-attribute validity checks (one out of ten possible kinds) more complex.
-
-### 'select' operation to implement min/max
-
-Although `min` and `max` operations are likely to occur as a result of
-transforming affine loops in ML functions, we did not make them first-class
-operations. Instead, we provide the `select` operation that can be combined with
-`cmpi` to implement the minimum and maximum computation. Although they now
-require two operations, they are likely to be emitted automatically during the
-transformation inside MLIR. On the other hand, there are multiple benefits of
-introducing `select`: standalone min/max would concern themselves with the
-signedness of the comparison, already taken into account by `cmpi`; `select` can
-support floats transparently if used after a float-comparison operation; the
-lower-level targets provide `select`-like instructions making the translation
-trivial.
-
-This operation could have been implemented with additional control flow: `%r =
-select %cond, %t, %f` is equivalent to
-
-```mlir
-^bb0:
-  cond_br %cond, ^bb1(%t), ^bb1(%f)
-^bb1(%r):
-```
-
-However, this control flow granularity is not available in the ML functions
-where min/max, and thus `select`, are likely to appear. In addition, simpler
-control flow may be beneficial for optimization in general.
-
-### Regions
-
-#### Attributes of type 'Block'
-
-We considered representing regions through `ArrayAttr`s containing a list of a
-special type `IRBlockAttr`, which in turn would contain a list of operations.
-All attributes in MLIR are unique’d within the context, which would make the IR
-inside the regions immortal for no good reason.
-
-#### Use "inlined" functions as regions
-
-We considered attaching a "force-inline" attribute on a function and/or a
-function `call` operation. Even the minimal region support (use cases in
-affine.for and affine.if existing before the regions) requires access to the
-values defined in the dominating block, which is not supported by functions.
-Conceptually, function bodies are instances of regions rather than the inverse;
-regions can also be device kernels, alternative sections, etc.
-
-#### Dedicated `region` operation
-
-This would mean we have a special kind of operation that is allowed to have
-regions while other operations are not. Such distinction is similar to the
-Stmt/Op difference we have had and chose to remove to make the IR simpler and
-more flexible. It would also require analyses and passes to consider the
-interplay between operations (e.g., an `affine.for` operation must be followed
-by a region operation). Finally, a region operation can be introduced using the
-current implementation, among other operations and without being special in any
-sense.
-
-#### Explicit capture of the values used in a region
-
-Being able to use values defined outside the region implies that use-def chains
-may contain uses from different nested regions. Consequently, IR transformations
-and analyses can pull the instruction defining the value across region
-boundaries, for example in case of TableGen-defined canonicalization patterns.
-This would not be the case if all used values had been passed as region
-arguments. One of the motivations for introducing regions in the IR is precisely
-to enable cross-region analyses and transformations that are simpler than
-inter-procedural transformations. Having uses from different regions appear in
-the same use-def chain, contrary to an additional data structure maintaining
-correspondence between function call arguments as uses of the original
-definitions and formal arguments as new definitions, enables such
-simplification. Since individual operations now belong to blocks, which belong
-to regions, it is always possible to check if the definition of the value
-belongs to the same region as its particular use. The risk is that any IR
-traversal will need to handle explicitly this situation and it is easy to forget
-a check (or conversely it isn’t easy to design the right check in a tablegen
-pattern for example): traversing use-def chains potentially crosses implicitly
-semantic barriers, making it possible to unknowingly break region semantics.
-This is expected to be caught in the verifier after the transformation.
-
-At the same time, one may choose to pass certain or all values as region
-arguments to explicitly break the use-def chains in the current proposal. This
-can be combined with an attribute-imposed semantic requirement disallowing the
-body of the region to refer to any value from outside it.
-
-### Quantized integer operations
-
-We haven't designed integer quantized operations in MLIR, but experience from
-TensorFlow suggests that it is better to put information about the quantization
-range/scale into the type itself, rather than have a single type like "qint8"
-and put these on attributes of the operation.
-
-There are a few ways to do this with MLIR, including at least:
-
-*   We could do the same thing TensorFlow does - and we will _have_ to support
-    that model to some extent for compatibility.
-*   We can encode the fp range of quantized integers directly into the types
-    when they are constants. The best practice on this seems to be to encode the
-    zero point as well as a scale factor. This ensures that 0.0 is always
-    exactly representable, e.g. `qi8<-1.42, 31.23x>`.
-*   We could theoretically encode dynamically determined ranges into the types
-    using something like `qi8<?,?>` with the bounds being determined through the
-    SSA dataflow graph dynamically - similar to how dynamic shapes are handled.
-
-We will definitely need to do #1 for compatibility, we probably want to do #2,
-and we should investigate #3 over time. That said, our short term plan is to get
-more implementation experience with the rest of the system first, then come back
-to re-examine the representation for quantized arithmetic when we have that
-experience. When we do, we should chat with benoitjacob@ and
-[read the paper](https://arxiv.org/abs/1712.05877).
-
-### Dialect type extensions
-
-This section describes the design decisions that shaped the dialect extensible
-type system present in MLIR.
-
-#### Reserving dialect type kinds
-
-Dialects that wish to define type extensions must reserve a range of type kinds
-within a '.def' file within the core IR library. This means that every dialect
-wishing to define custom types must modify this file, but it guarantees that all
-type casting checkings are performed in O(1) time.
-
-#### Interactions between dialects
-
-There are two different interactions between dialects that are important to
-understand. When types of a dialect are:
-
-*   In operations of other dialects
-
-    -   For standard/builtin operations, only standard/builtin types are
-        allowed. This restriction allows for operations to clearly understand
-        the invariants that they are working under.
-    -   Outside of standard/builtin operations, dialects are expected to verify
-        the allowable operation types per operation.
-
-*   In types of other dialects
-
-    -   For standard/builtin types, these types are allowed to contain types
-        from other dialects. This simplifies the type system and removes the
-        need for dialects to redefine all of the standard aggregate types, e.g.
-        tensor, as well as the memref type. Dialects are expected to verify that
-        a specific type is valid within a standard type, e.g. if a type can be
-        an element of a tensor.
-    -   For dialect types, the dialect is expected to verify any type
-        invariants, e.g. if the standard tensor type can contain a specific type
-        of that dialect.
-
-#### Separating builtin and standard types
-
-Following the separation between the built-in and standard dialect, it makes
-sense to separate built-in types and standard dialect types. Built-in types are
-required for the validity of the IR itself, e.g. the function type (which
-appears in function signatures and generic assembly forms of operations).
-Integer, float, vector, memref and tensor types, while important, are not
-necessary for IR validity.
-
-#### Unregistered types
-
-MLIR supports unregistered operations in generic assembly form. MLIR also
-supports a similar concept for types. When parsing, if the dialect for dialect
-type has not been registered the type is modeled as an 'OpaqueType'. This allows
-for types to be round-tripped without needing to link in the dialect library
-that defined them. No additional information about opaque types, outside of
-parsing/printing, will be available.
-
-#### Dialect type syntax
-
-Dialect extended types are represented as string literals wrapped inside of the
-dialect namespace. This means that the parser delegates to the dialect for
-parsing specific type instances. This differs from the representation of dialect
-defined operations, of which have an identifier name that the parser uses to
-identify and parse them.
-
-This representation was chosen for several reasons:
-
-##### Dialects must provide custom type parsers
-
-Dialect type parsing cannot plug into the existing parser infrastructure as
-operations do with the OpAsmParser/Printer. Operations have a defined syntax
-structure that is the same across all dialects. Types, on the other hand, may
-have many different, and sometimes conflicting, parsing constraints that would
-be difficult/unmaintainable to provide within a single interface.
-
-This also has the added benefit of encouraging dialects to reuse existing
-external type parsers. For example, an LLVM dialect may provide an MLIR LLVM
-type that is simply a wrapper around LLVM types. The LLVM dialect would then use
-the existing LLVM type parsing infrastructure.
-
-Example:
-
-```mlir
-%s = "foo"() : () -> !llvm<"i32*">
-```
-
-##### Types do not always have canonical names
-
-Unlike operations, types generally do not have a formal canonical name. For
-example, function types have no defined keyword and integer types are defined by
-a regular expression to support arbitrary bitwidth. Dialects with existing type
-systems, e.g. LLVM, are likely to provide wrappers around their existing type
-systems. For these wrapper types there is no simple canonical name, it's logical
-to think of these types as existing within the namespace of the dialect. If a
-dialect wishes to assign a canonical name to a type, it can be done via
-[type aliases](LangRef.md#type-aliases).
-
-### Tuple types
-
-The MLIR type system provides first class support for defining
-[tuple types](LangRef.md#tuple-type). This is due to the fact that `Tuple`
-represents a universal concept that is likely to, and has already begun to,
-present itself in many different dialects. Though this type is first class in
-the type system, it merely serves to provide a common mechanism in which to
-represent this concept in MLIR. As such, MLIR provides no standard operations
-for interfacing with `tuple` types. It is up to dialect authors to provide
-operations, e.g. extract_tuple_element, to interpret and manipulate them. When
-possible, operations should prefer to use multiple results instead. These
-provide a myriad of benefits, such as alleviating any need for tuple-extract
-operations that merely get in the way of analysis and transformation.
-
-### Assembly forms
-
-MLIR decides to support both generic and custom assembly forms under the
-following considerations:
-
-MLIR is an open system; it is designed to support modular and pluggable
-dialects. Depending on whether there exists a corresponding dialect and whether
-the dialect is plugged in, operations may or may not be registered into MLIR
-system. Yet we still need a way to investigate these operations. So the generic
-assembly form is mandated by this aspect of MLIR system. It provides a default
-textual form for operations.
-
-On the other hand, an assembly form is for assisting developers to investigate
-the IR. The generic form serves as a safe fallback but it can be too verbose for
-certain ops. Therefore, MLIR gives each dialect the choice to define a custom
-assembly form for each operation according to the operation's semantics and
-specific needs. The custom assembly form can de-duplicate information from the
-operation to derive a more concise form, thus better facilitating the
-comprehension of the IR.
-
-## Examples
-
-This section describes a few very simple examples that help understand how MLIR
-represents computation.
-
-### Non-affine control flow
-
-```mlir
-// A simple linear search in every row of a matrix
-for (i = 0; i < N; i++) {
-  for (j = 0; j < N; j++) {
-    // dynamic control flow
-    if (a[i][j] == key) {
-      s[i] = j;
-      break;
-    }
-  }
-}
-```
-
-The presence of dynamic control flow leads to an inner non-affine function
-nested in an outer function that using affine loops.
-
-```mlir
-func @search(%A: memref<?x?xi32, %S: <?xi32>, %key : i32) {
-  %ni = dim %A, 0 : memref<?x?xi32>
-  // This loop can be parallelized
-  affine.for %i = 0 to %ni {
-    call @search_body (%A, %S, %key, %i) : (memref<?x?xi32>, memref<?xi32>, i32, i32)
-  }
-  return
-}
-
-func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32, %i : i32) {
-  %nj = dim %A, 1 : memref<?x?xi32>
-  br ^bb1(0)
-
-^bb1(%j: i32)
-  %p1 = cmpi "lt", %j, %nj : i32
-  cond_br %p1, ^bb2, ^bb5
-
-^bb2:
-  %v = affine.load %A[%i, %j] : memref<?x?xi32>
-  %p2 = cmpi "eq", %v, %key : i32
-  cond_br %p2, ^bb3(%j), ^bb4
-
-^bb3(%j: i32)
-  affine.store %j, %S[%i] : memref<?xi32>
-  br ^bb5
-
-^bb4:
-  %jinc = addi %j, 1 : i32
-  br ^bb1(%jinc)
-
-^bb5:
-  return
-}
-```
-
-As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
-identifiers to be used with the affine.apply operation only apply to accesses
-inside `affine.for` and `affine.if` operations. However, an analysis of accesses
-inside the called function (`@search_body`) is necessary to determine if the
-`%i` loop could be parallelized: such function access analysis is calling
-context sensitive.
-
-### Non-affine loop bounds
-
-Loop bounds that are not affine lead to a nesting of functions as shown below.
-
-```c
-for (i = 0; i < N; i++)
-  for (j = 0; j < N; j++)
-    // Non-affine loop bound for k loop.
-    for (k = 0; k < pow(2, j); k++)
-       for (l = 0; l < N; l++) {
-        // block loop body
-        ...
-       }
-```
-
-```mlir
-func @outer_nest(%n : index) {
-  affine.for %i = 0 to %n {
-    affine.for %j = 0 to %n {
-      %pow = call @pow(2, %j) : (index, index) ->  index
-      call @inner_nest(%pow, %n) : ...
-    }
-  }
-  return
-}
-
-func @inner_nest(%m : index, %n : index) {
-  affine.for %k = 0 to %m {
-    affine.for %l = 0 to %n {
-      ...
-    }
-  }
-  return
-}
-```
-
-### Reference 2D Convolution
-
-The following example illustrates a reference implementation of a 2D
-convolution, which uses an integer set `#domain` to represent valid input data
-in a dilated convolution.
-
-```mlir
-// Dilation factors S0 and S1 can be constant folded if constant at compile time.
-#domain = (d0, d1)[S0,S1,S2,S3]: (d0 % S0 == 0, d1 % S1 == 0, d0 >= 0, d1 >= 0,
-                                   S3 - d0 - 1 >= 0, S4 - d1 - 1 >= 0)
-// Identity map (shown here for illustration).
-#map0 = (d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)
-
-// Affine map from output to input coordinate space.
-// d0 = output_h, d1 = output_w, d2 = kernel_h, d3 = kernel_w
-// S0 = h_stride, S1 = w_stride, S2 = h_kernel_dilation, S3 = w_kernel_dilation
-// S4 = h_pad_low, S5 = w_pad_low
-//     %out0 =  %0#1 * %h_stride + %0#4 * %h_kernel_dilation - %h_pad_low
-//     %out1=  %0#2 * %w_stride + %0#5 * %w_kernel_dilation - %w_pad_low
-#map1_0 = (d0, d1, d2, d3) [S0, S1, S2, S3, S4, S5] -> (d0 * S0 + d2 * S2 - %S4)
-#map1_1 = (d0, d1, d2, d3) [S0, S1, S2, S3, S4, S5] -> (d1 * S1 + d3 * S3 - %S5)
-
-// Semi-affine map to undilated input coordinate space.
-// d0 = input_h, d1 = input_w, S0 = h_base_dilation, S1 = w_base_dilation.
-#map2_0 = (d0, d1) [S0, S1] -> (d0 / S0)
-#map2_1 = (d0, d1) [S0, S1] -> (d1 / S1)
-
-// Conv2D shapes:
-// input:   [batch, input_height, input_width, input_feature]
-// kernel: [kernel_height, kernel_width, input_feature, output_feature]
-// output: [batch, output_height, output_width, output_feature]
-func @conv2d(%input: memref<16x1024x1024x3xf32, #lm0, /*scratchpad=*/1>,
-             %kernel: memref<5x5x3x32xf32, #lm0, /*scratchpad=*/1>,
-             %output: memref<16x512x512x32xf32, #lm0, /*scratchpad=*/1>) {
-  affine.for %b = 0 to %batch {
-    affine.for %oh = 0 to %output_height {
-      affine.for %ow = 0 to %output_width {
-        affine.for %of = 0 to %output_feature {
-          affine.for %kh = 0 to %kernel_height {
-            affine.for %kw = 0 to %kernel_width {
-              affine.for %if = 0 to %input_feature {
-                // Calculate input indices.
-                %1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
-                  [%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
-                   %h_pad_low, %w_pad_low]
-                %1_1 = affine.apply #map1_1 (%0#1, %0#2, %0#4, %0#5)
-                  [%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
-                   %h_pad_low, %w_pad_low]
-
-                // Check if access is not in padding.
-                affine.if #domain(%1_0, %1_1)
-                                       [%h_base_dilation, %w_kernel_dilation, %h_bound, %w_bound] {
-                  %2_0 = affine.apply #map2 (%1_0, %1_1)
-                  %2_1 = affine.apply #map2 (%1_0, %1_1)
-                  // Compute: output[output_indices] += input[input_indices] * kernel[kernel_indices]
-                  call @multiply_accumulate(%input, %kernel, %output, %b, %oh, %ow, %of, %kh, %kw, %if, %2_0, %2_1)
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  return
-}
-```
-
-TODO (Add more examples showing the IR for a variety of interesting cases)
-
-## Design alternatives and extensions
-
-This is a list of some design alternatives and extensions that we discussed in
-detail but did not include in the spec or postponed them for future
-consideration on demand. We will revisit these discussions when we have more
-implementation experience and learn more about the challenges and limitations of
-our current design in practice.
-
-### Polyhedral code representation alternatives: schedule lists vs schedules trees vs affine loop/if forms
-
-The current MLIR uses a representation of polyhedral schedules using a tree of
-if/for loops. We extensively debated the tradeoffs involved in the typical
-unordered polyhedral instruction representation (where each instruction has
-multidimensional schedule information), discussed the benefits of schedule tree
-forms, and eventually decided to go with a syntactic tree of affine if/else
-conditionals and affine for loops. Discussion of the tradeoff was captured in
-this document:
-[ MLIR: The case for a simplified polyhedral form](RationaleSimplifiedPolyhedralForm.md).
-
-At a high level, we have two alternatives here:
-
-1.  Schedule tree representation instead of an affine loop AST form: The current
-    proposal uses an affine loop and conditional tree form, which is syntactic
-    and with no separation of domains as sets and schedules as multidimensional
-    affine functions. A schedule tree form however makes polyhedral domains and
-    schedules a first class concept in the IR allowing compact expression of
-    transformations through the schedule tree without changing the domains of
-    instructions. Such a representation also hides prologues, epilogues, partial
-    tiles, complex loop bounds and conditionals making loop nests free of
-    "syntax". Cost models instead look at domains and schedules. In addition, if
-    necessary such a domain schedule representation can be normalized to
-    explicitly propagate the schedule into domains and model all the cleanup
-    code. An example and more detail on the schedule tree form is in the next
-    section.
-1.  Having two different forms of "affine regions": an affine loop tree form
-    and a polyhedral schedule tree form. In the latter, ops could carry
-    attributes capturing domain, scheduling, and other polyhedral code
-    generation options with IntegerSet, AffineMap, and other attributes.
-
-#### Schedule Tree Representation for Affine Regions
-
-This representation is based on a simplified form of the domain/schedule
-representation used by the polyhedral compiler community. Domains represent what
-has to be executed while schedules represent the order in which domain elements
-are interleaved. We model domains as non-piece-wise convex integer sets, and
-schedules as affine functions; however, the former can be disjunctive, and the
-latter can be piece-wise affine relations. In the schedule tree representation,
-domain and schedules for instructions are represented in a tree-like structure
-which is called a schedule tree. Each non-leaf node of the tree is an abstract
-polyhedral dimension corresponding to an abstract fused loop for each ML
-instruction that appears in that branch. Each leaf node is an ML Instruction.
-
-```mlir
-// A tiled matmul code (128x128x128) represented in schedule tree form
-
-// #map0 = (d0, d1, d2, d3, d4, d5) -> (128*d0 + d3, 128*d1 + d4, 128*d2 + d5)
-#intset_ij = (i, j) [M, N, K]  : i >= 0, -i + N - 1 >= 0, j >= 0, -j + N-1 >= 0
-#intset_ijk = (i, j, k) [M, N, K] : i >= 0, -i + N - 1 >= 0, j >= 0,
-                                     -j +  M-1 >= 0, k >= 0, -k + N - 1 >= 0)
-func @matmul(%A, %B, %C, %M, %N, %K) : (...)  { // %M, N, K are symbols
-  // t1, t2, t3, t4, t5, t6  are abstract polyhedral loops
-  mldim %t1 : {S1,S2,S3,S4,S5}  floordiv (i, 128) {
-    mldim %t2 : {S1,S2,S3,S4,S5}  floordiv (j, 128) {
-      // (%i, %j) = affine.apply (d0, d1) -> (128*d0, 128*d1) (%t1, %t2)
-      call dma_mem_to_scratchpad(%C, %i, %j, %M, %N, %K)
-          with @intset_ij(%i, %j) [%M, %N, %K]
-      mldim %t3 :   {S2,S3,S4,S5} floordiv (k, 128) {
-        // (%i, %j, %k) = affine.apply (d0, d1, d2)
-        //                          -> (128*d0, 128*d1, 128*d2)  (%t1, %t2, %t3)
-        call dma_mem_to_scratchpad(%A, ...) with #inset_ijk (%i, %j, %k) [%M, %N, %K]
-        // (%i, %j, %k) = affine.apply (d0, d1, d2)
-        //                          -> (128*d0, 128*d1, 128*d2)  (%t1, %t2, %t3)
-        call dma_mem_to_scratchpad(%B, ...) with #inset_ijk (%i, %j, %k) [%M, %N, %K]
-        mldim %t4 : {S4} i mod 128 {
-          mldim %t5 : {S4} j mod 128 {
-            mldim %t6 : {S4} k mod 128 {
-              // (%i, %j, %k) = affine.apply #map0 (%t1, %t2, %t3, %t4, %t5, %t6)
-              call matmul_body(A, B, C, %i, %j, %k, %M, %N, %K)
-                  with #inset_ijk(%i, %j, %k) [%M, %N, %K]
-            } // end mld4im t6
-          } // end mldim t5
-        } // end mldim t4
-      } // end mldim t3
-      // (%i, %j) = affine.apply (d0, d1) -> (128*d0, 128*d1) (%t1, %t2)
-      call $dma_scratchpad_to_mem_C ... with #intset(%i, %j) [%M, %N, %K]
-    }  // end mldim t2
-  } // end mldim t1
-  return
-}
-
-```
-
-### Affine Relations
-
-The current MLIR spec includes affine maps and integer sets, but not affine
-relations. Affine relations are a natural way to model read and write access
-information, which can be very useful to capture the behavior of opaque external
-library calls, high-performance vendor libraries, or user-provided / user-tuned
-routines.
-
-An affine relation is a relation between input and output dimension identifiers
-while being symbolic on a list of symbolic identifiers and with affine
-constraints on the identifiers.
-
-Syntax:
-
-```
-// Affine relation definition at the top of file
-affine-rel-def ::= affine-rel-id `=` affine-relation-inline
-
-affine-rel-id ::= `##` prefixed-id
-
-affine-relation-inline ::=
-       `(` input-dims `)` (`[` symbols `]`)? `->`
-       `(` output-dims `)` :  affine-constraint-conjunction
-
-input-dims ::= bare-id-list
-output-dims ::= bare-id-list
-symbols ::= bare-id-list
-
-affine-rel ::= affine-rel-id | affine-relation-inline
-
-// Usage
-affine-rel-spec ::= affine-rel dim-and-symbol-use-list
-```
-
-All identifiers appearing in input-dims, output-dims, and symbol-dims are
-pairwise distinct. All affine-constraint non-terminals in the above syntax are
-allowed to contain identifiers only from input-dims, output-dims, and
-symbol-dims.
-
-Affine relations are used to model read, write, may_read, and may_write sets of
-functions in the IR. The output dimension identifiers correspond to the data
-dimensions.
-
-Example:
-
-```mlir
-// read relation: two elements ( d0 <= r0 <= d0+1 )
-##aff_rel9 = (d0) -> (r0) : r0 - d0 >= 0, d0 - r0 + 1 >= 0
-
-func @count (%A : memref<128xf32>, %pos : i32) -> f32
-  reads: {%A ##aff_rel9 (%pos)}
-  writes: /* empty */
-  may_reads: /* empty */
-  may_writes: /* empty */ {
-bb0 (%0, %1: memref<128xf32>, i64):
-  %val = affine.load %A [%pos]
-  %val = affine.load %A [%pos + 1]
-  %p = mulf %val, %val : f32
-  return %p : f32
-}
-```
-
-### Regions
-
-#### Making function definition an operation
-
-MLIR supports values of a Function type. Instead of having first-class IR
-concept for functions, one could define an operation with a body region that
-defines a function value. The particularity of functions is that their names are
-globally visible and can be referred to before being defined, unlike SSA values
-that must be defined first. Implementing a "function definition" operation would
-require to relax some of the SSA constraints in a region, and also make the IR
-Module a region as well. It would also affect the core infrastructure (e.g.,
-function passes) only for the sake of concept unification.
-
-#### Having types on a region
-
-Instead of inspecting the types of arguments of the first block, one could give
-the region itself a type. This type would be redundant with block argument
-types, which must have values and create room for type mismatches. While
-functions do have types that are partly redundant with the arguments of the
-first block in the function, this is necessary to support function declarations
-that do not have a body which we can refer to in order to obtain the argument
-types. A region is always contained in an operation or a function that can be
-queried to obtain the “type” of the region if necessary.
-
-A type on a region can be justified if Regions were to be considered separately
-from the enclosing entity (operation or function) and had their own semantics
-that should be checked.
-
-#### Attaching attributes to regions
-
-Regions could be annotated with dialect attributes to use attribute verification
-hooks. An operation could take multiple regions as arguments, and each of them
-may require different attributes. However, there are currently very few
-practical cases where this would be necessary. Instead, one could simulate
-per-region attributes with array attributes attached to the entity containing
-the region (operation or function). This decreases the overall complexity of the
-IR and enables more concise and op-specific forms, e.g., when all regions of an
-op have the same attribute that can be only mentioned once. Since the semantics
-of the region is entirely defined by the enclosing entity, it also makes sense
-to have attributes attached to that entity rather than to the region itself.
-
-This can be reconsidered in the future if we see a non-neglectable amount of use
-cases.
-
-### Read/Write/May_Read/May_Write sets for External Functions
-
-Having read, write, may_read, and may_write sets for external functions which
-include opaque ones, high-performance vendor libraries such as CuDNN, CuB, MKL,
-FFT libraries, user-provided/optimized functions, or data movement runtimes such
-as DMA ones is a powerful feature. It allows the compiler to perform analysis,
-composition/transformation in the presence of such calls and with loops around
-such calls on sub-tensors. For user-provided or custom hand-tuned functions, the
-read/write/may_read/may_write sets could be provided a-priori by a user as part
-of the external function signature or they could be part of a database.
-
-TODO: Design this, and update to use function attribute syntax.
-
-Example:
-
-```mlir
-##rel9 ( ) [s0] -> (r0, r1) : 0 <= r0 <= 1023, 0 <= r1 <= s0 - 1
-
-func @cblas_reduce_ffi(%M: memref<1024 x ? x f32, #layout_map0, /*mem=*/0>)
-  -> f32 [
-  reads: {%M, ##rel9() }
-  writes: /* empty */
-  may_reads: /* empty */
-  may_writes: /* empty */
-]
-
-func @dma_mem_to_scratchpad(%a : memref<1024 x f32, #layout_map0, /*mem=*/0>,
-    %b : memref<1024 x f32, #layout_map0, 1>, %c : memref<1024 x f32,
-    #layout_map0>) [
-  reads: {%M, ##rel9() }
-  writes: /* empty */
-  may_reads: /* empty */
-  may_writes: /* empty */
- ]
-
-```
-
-### Memref Extensions
-
-1.  Arbitrary polyhedral shapes for tensors: e.g., triangular shapes in tensor
-    dimensions where there is symmetry: use integer set (affine constraints) to
-    model tensor data space (instead of just extents). Requires some changes to
-    the IR and the in-memory form.
-1.  Layout maps
-
-    1.  Allow piece-wise affine maps for layouts: allows clean modeling of
-        boundary cases for images/tensors through padding, wrapping, mirroring,
-        padding where padded values are the results of computation as opposed to
-        data, padding in the interior as opposed to just boundaries.
-    1.  Allow many-to-one layout maps: Index and layout maps in the current
-        proposal are bijective. Extending them to many-to-one layout maps allows
-        cleaner(?) modeling of broadcast/reduce style computations while reusing
-        memory.
-
-    Proposal 2(a) requires non-trivial changes to the IR and the in-memory
-    representation. 2(b) requires no change, but impacts how cost models look at
-    index and layout maps.
-
-### `affine.if` and `affine.for` Extensions for "Escaping Scalars"
-
-We considered providing a representation for SSA values that are live out of
-`if/else` conditional bodies and loop carried in `affine.for` loops. We
-ultimately abandoned this approach due to its complexity. In the current design
-of MLIR, scalar variables cannot escape for loops or if instructions. In
-situations, where escaping is necessary, we use zero-dimensional tensors and
-memrefs instead of scalars.
-
-**TODO**: This whole section is obsolete and should be updated to use block
-arguments and a yield like terminator in for/if instructions.
-
-The abandoned design of supporting escaping scalars is as follows:
-
-#### affine.for Instruction
-
-Syntax:
-
-```
-[<out-var-list> =]
-for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
-   [with <in-var-list>] { <loop-instruction-list> }
-```
-
-out-var-list is a comma separated list of SSA values defined in the loop body
-and used outside the loop body. in-var-list is a comma separated list of SSA
-values used inside the loop body and their initializers. loop-instruction-list
-is a list of instructions that may also include a yield instruction.
-
-Example:
-
-```mlir
-// Return sum of elements in 1-dimensional mref A
-func i32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
-   %init = 0
-   %result = affine.for %i = 0 to N with %tmp(%init) {
-      %value = affine.load %A[%i]
-      %sum = %value + %tmp
-      yield %sum
-   }
-   return %result : i32
-}
-```
-
-#### affine.if/else Instruction
-
-Syntax:
-
-```
-<out-var-list> = affine.if (<cond-list>) {...} [else {...}]
-```
-
-Out-var-list is a list of SSA values defined by the if-instruction. The values
-are arguments to the yield-instruction that occurs in both then and else clauses
-when else clause is present. When if instruction contains only if clause, the
-escaping value defined in the then clause should be merged with the value the
-variable had before the if instruction. The design captured here does not handle
-this situation.
-
-Example:
-
-```mlir
-// Compute sum of half of the array
-func i32 @sum_half(%A : memref<?xi32>, %N : i32) -> (i32) {
-   %s0 = 0
-   %s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
-       %s3 = if (%i >= %N / 2) {
-          %v0 = affine.load %A[%i]
-          %s4 = %s2 + %v0
-          yield %s4
-       }
-       yield %s3
-   }
-   return %s1 : i32
-}
-```
-
-### Multithreading the compiler
-
-People want compilers to go fast, and one simple way to do that is to
-multi-thread them. There are multiple strategies for this, but a simple one is
-to optimize and compile separate functions in parallel. LLVM's original pass
-manager anticipated this demand, and the CallGraphSCCPass manager is even
-designed to support this as well, but unfortunately, a few early design
-decisions in LLVM prevent this from ever happening. Instead, things like ThinLTO
-are forced to split programs into separate LLVM modules/context and optimize
-those chunks independently.
-
-The problem is that LLVM has several objects in its IR that are globally uniqued
-and also mutable: notably constants like `i32 0`. In LLVM, these constants are
-`Value*r`'s, which allow them to be used as operands to instructions, and that
-they also have SSA use lists. Because these things are uniqued, every `i32 0` in
-any function shares a use list. This means that optimizing multiple functions in
-parallel won't work (at least without some sort of synchronization on the use
-lists, which would be unbearably inefficient).
-
-MLIR now supports a multithreaded pass manager. We do this through several
-design choices:
-
-1.  MLIR makes use of extensive uniqued immutable data structures (affine
-    expressions, types, etc are all immutable, uniqued, and immortal).
-2.  Constants are defined in per-function pools, instead of being globally
-    uniqued.
-3.  Functions themselves are not SSA values either, so they don't have the same
-    problem as constants.
-4.  FunctionPasses are copied (through their copy ctor) into one instance per
-    thread, avoiding sharing of local state across threads.
-
-This allows MLIR function passes to support efficient multithreaded compilation
-and code generation.
diff --git a/third_party/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md b/third_party/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
deleted file mode 100644
index ec2ecc9fe50..00000000000
--- a/third_party/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
+++ /dev/null
@@ -1,415 +0,0 @@
-# MLIR: The case for a <em>simplified</em> polyhedral form
-
-MLIR embraces polyhedral compiler techniques for their many advantages
-representing and transforming dense numerical kernels, but it uses a form that
-differs significantly from other polyhedral frameworks.
-
-**Disclaimer / Warning**
-
-This document is a very early design proposal (which has since been accepted)
-that explored the tradeoffs of using this simplified form vs the traditional
-polyhedral schedule list form. At some point, this document could be dusted off
-and written as a proper academic paper, but until now, it is better to included
-it in this crafty form than not to. Beware that this document uses archaic
-syntax and should not be considered a canonical reference to modern MLIR.
-
-## Introduction
-
-This document discusses general goals of the project, introduces context and the
-two alternatives, then talks about the tradeoffs of these designs. Written by
-Chris Lattner.
-
-## General goals of an IR, and goals of mlfunc's specifically
-
-Our currently planned representation for MLIR consists of two kinds of
-functions: an LLVM-like "CFG Function" and an "ML Function": a function
-represented in multidimensional loop form. The idea is that a CFG function is
-capable of full generality for expressing arbitrary computation, but is awkward
-for loop transformations. In contrast, mlfunc's are limited (e.g. to control
-flow involving loop nests over affine spaces) but these limitations make it much
-easier to transform and analyze, particularly for the set of computations in a
-machine learning kernel.
-
-The design of an intermediate representations is an optimization problem, which
-makes intentional tradeoffs that aim to make certain kinds of compiler
-transformations simple. After all, it is "possible" to do almost any
-transformation on any IR: we could theoretically do loop transformations on
-assembly language. OTOH, such transformations would take too long to write,
-would be fragile due to irrelevant changes, would be difficult to maintain, and
-difficult to make target independent. Performing transformations on the "right
-level" of IR makes it much easier to do analysis and transformation of code, and
-can make them faster by reducing the size of the IR, and eliminating
-possibilities that would have otherwise have to be considered.
-
-This is the reason we're interested in adding polyhedral techniques to an IR in
-the first place: though our base "CFG function" representation is fully capable
-of expressing any computation, it is "too" expressive. The limitations imposed
-by polyhedral techniques (e.g. on affine loop bounds and array subscripts)
-define a closed algebra that can represent an interesting range of
-transformations and their compositions, and because of their simplicity, we can
-perform (e.g.) dependence analysis more efficiently and more reliably.
-
-This raises an important question that this document examines: given we are
-introducing a redundant and limited way to express code and transformations,
-exactly what form is best to perform the analyses and transformations we want?
-
-We explore two different design points that are capable of expressing the same
-class of affine loop computations, but which use different representational
-forms. These forms trade off verbosity, ease of transformation, and ease of
-analysis in interesting ways.
-
-## Context: Traditional Polyhedral Form
-
-We started by discussing a representation that uses the traditional polyhedral
-schedule set + domain representation, e.g. consider C-like code like:
-
-```c
-  void simple_example(...) {
-    for (int i = 0; i < N; ++i) {
-      for (int j = 0; j < N; ++j) {
-         float tmp = X[i,j]    // S1
-         A[i,j] = tmp + 1      // S2
-         B[i,j] = tmp * 42     // S3
-       }
-    }
-  }
-```
-
-The polyhedral representation doesn't care about the actual computation, so we
-will abstract them into S1/S2/S3 in the discussion below. Originally, we planned
-to represent this with a classical form like (syntax details are not important
-and probably slightly incorrect below):
-
-```
-  mlfunc @simple_example(... %N) {
-    %tmp = call @S1(%X, %i, %j)
-      domain: (0 <= %i < %N), (0 <= %j < %N)
-      schedule: (i, j, 0)
-
-    call @S2(%tmp, %A, %i, %j)
-      domain: (0 <= %i < %N), (0 <= %j < %N)
-      schedule: (i, j, 1)
-
-    call @S3(%tmp, %B, %i, %j)
-      domain: (0 <= %i < %N), (0 <= %j < %N)
-      schedule: (i, j, 2)
-  }
-```
-
-In this design, an mlfunc is an unordered bag of instructions whose execution
-order is fully controlled by their schedule.
-
-However, we recently agreed that a more explicit schedule tree representation is
-a better fit for our needs, because it exposes important structure that will
-make analyses and optimizations more efficient, and also makes the scoping of
-SSA values more explicit. This leads us to a representation along the lines of:
-
-```
-  mlfunc @simple_example(... %N) {
-    d0/d1 = mlspace
-    for S1(d0), S2(d0), S3(d0) {
-      for S1(d1), S2(d1), S3(d1) {
-
-        %tmp = call @S1(%X, d0, d1)      ;; S1
-          domain: (0 <= d0 < %N), (0 <= d1 < %N)
-
-        call @S2(%tmp, %A, d0, d1)      ;; S2
-          domain: (0 <= d0 < %N), (0 <= d1 < %N)
-
-        call @S3(%tmp, %B, d0, d1)      ;; S3
-          domain: (0 <= d0 < %N), (0 <= d1 < %N)
-      }
-    }
-  }
-```
-
-This change makes the nesting structure of the loops an explicit part of the
-representation, and makes lexical ordering within a loop significant
-(eliminating the constant 0/1/2 of schedules).
-
-It isn't obvious in the example above, but the representation allows for some
-interesting features, including the ability for instructions within a loop nest
-to have non-equal domains, like this - the second instruction ignores the outer
-10 points inside the loop:
-
-```
-  mlfunc @reduced_domain_example(... %N) {
-    d0/d1 = mlspace
-    for S1(d0), S2(d0) {
-      for S1(d1), S2(d1) {
-        %tmp = call @S1(%X, d0, d1)    ;; S1
-          domain: (0 <= d0 < %N), (0 <= d1 < %N)
-
-        call @S2(%tmp, %A, d0, d1)      ;; S2
-          domain: (10 <= d0 < %N-10), (10 <= d1 < %N-10)
-      }
-    }
-  }
-```
-
-It also allows schedule remapping within the instruction, like this example that
-introduces a diagonal skew through a simple change to the schedules of the two
-instructions:
-
-```
-  mlfunc @skewed_domain_example(... %N) {
-    d0/d1 = mlspace
-    for S1(d0), S2(d0+d1) {
-      for S1(d0+d1), S2(d1) {
-        %tmp = call @S1(%X, d0, d1)    ;; S1
-          domain: (0 <= d0 < %N), (0 <= d1 < %N)
-
-        call @S2(%tmp, %A, d0, d1)      ;; S2
-          domain: (0 <= d0 < %N), (0 <= d1 < %N)
-      }
-    }
-  }
-```
-
-This form has great power, and the polyhedral code generator (which lowers from
-an mlfunc to a cfgfunc representation) handles this power so things that
-introduce loop transformations don't have to explicitly manipulate the looping
-structure.
-
-## Proposal: Simplified Polyhedral Form
-
-This document proposes and explores the idea of going one step further, moving
-all of the domain and schedule information into the "schedule tree". In this
-form, we would have a representation where all instructions inside of a given
-for-loop are known to have the same domain, which is maintained by the loop. In
-the simplified form, we also have an "if" instruction that takes an affine
-condition.
-
-Our simple example above would be represented as:
-
-```mlir
-  mlfunc @simple_example(... %N) {
-    affine.for %i = 0 ... %N step 1 {
-      affine.for %j = 0 ... %N step 1 {
-        // identity noop in this case, but can exist in general.
-        %0,%1 = affine.apply #57(%i, %j)
-
-        %tmp = call @S1(%X, %0, %1)
-
-        call @S2(%tmp, %A, %0, %1)
-
-        call @S3(%tmp, %B, %0, %1)
-      }
-    }
-  }
-```
-
-The example with the reduced domain would be represented with an if instruction:
-
-```mlir
-  mlfunc @reduced_domain_example(... %N) {
-    affine.for %i = 0 ... %N step 1 {
-      affine.for %j = 0 ... %N step 1 {
-        // identity noop in this case, but can exist in general.
-        %0,%1 = affinecall #57(%i, %j)
-
-        %tmp = call @S1(%X, %0, %1)
-
-        if (10 <= %i < %N-10), (10 <= %j < %N-10) {
-
-          %2,%3 = affine.apply(%i, %j)    // identity noop in this case
-
-          call @S2(%tmp, %A, %2, %3)
-        }
-      }
-    }
-  }
-```
-
-These IRs represent exactly the same information, and use a similar information
-density. The 'traditional' form introduces an extra level of abstraction
-(schedules and domains) that make it easy to transform instructions at the
-expense of making it difficult to reason about how those instructions will come
-out after code generation. With the simplified form, transformations have to do
-parts of code generation inline with their transformation: instead of simply
-changing a schedule to **(i+j, j)** to get skewing, you'd have to generate this
-code explicitly (potentially implemented by making polyhedral codegen a library
-that transformations call into):
-
-```mlir
-mlfunc @skewed_domain_example(... %N) {
-  affine.for %t1 = 0 ... 2*N-2 step 1 {
-    affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
-      (%i, %j) = (%t1-%t2, %t2)
-      ...
-    }
-  }
-}
-```
-
-## Evaluation
-
-Both of these forms are capable of expressing the same class of computation:
-multidimensional loop nests with affine loop bounds and affine memory
-references. That said, they pose very different tradeoffs in other ways.
-
-### Commonality: can express same computation
-
-Both of these can express the same sorts of computation, e.g. kernels written in
-one form are representable in the other form in all cases.
-
-### Commonality: dependence analysis
-
-These representations both use affine functions for data layout mapping and
-access subscripts, and dependence analysis works the same way.
-
-### Commonality: difficulty of determining optimal transformation series
-
-One major challenge in performance of optimization of this sort of code is
-choosing the ordering and behavior of various loop transformations that get
-applied. There are non-local effects of every decision, and neither
-representation helps solve this inherently hard problem.
-
-### Commonality: compactness of IR
-
-In the cases that are most relevant to us (hyper rectangular spaces) these forms
-are directly equivalent: a traditional instruction with a limited domain (e.g.
-the "reduced_domain_example" above) ends up having one level of ML 'if' inside
-its loops. The simplified form pays for this by eliminating schedules and
-domains from the IR. Both forms allow code duplication to reduce dynamic
-branches in the IR: the traditional approach allows instruction splitting, the
-simplified form supports instruction duplication.
-
-It is important to point out that the traditional form wins on compactness in
-the extreme cases: e.g. the loop skewing case. These cases will be rare in
-practice for our workloads, and are exactly the cases that downstream
-transformations want to be explicit about what they are doing.
-
-### Simplicity of code generation
-
-A key final stage of an mlfunc is its conversion to a CFG function, which is
-required as part of lowering to the target machine. The simplified form has a
-clear advantage here: the IR has a direct correspondence to the structure of the
-generated code.
-
-In contrast, the traditional form has significant complexity in the lowering
-process to a CFG function, because the verbosity not imbued in the IR needs to
-come out during code generation. Code generation from ISL shows that it is
-possible to do this, but it is a non-trivial transformation.
-
-### Ease of transformation
-
-An advantage for the traditional form is that it is easier to perform certain
-transformations on it: skewing and tiling are just transformations on the
-schedule of the instructions in question, it doesn't require changing the loop
-structure.
-
-In practice, the simplified form requires moving the complexity of code
-generation into the transformations themselves - this is sometimes trivial,
-sometimes involved. The author believes that this should be possible by making
-the code generation algorithms themselves be library functions that
-transformations call into, instead of an opaque block that happens at the end of
-the mlfunc processing.
-
-Also, the sorts of transformations performed today by XLA (including tiling,
-padding, unrolling, and other rectangular transformations) should be easy enough
-to implement on either representation. The only cases that are a challenge are
-more advanced cases like skewing, e.g. for DMA data movement generation.
-
-### Ease of analysis: Cost models
-
-The simplified form is much easier for analyses and transformations to build
-cost models for (e.g. answering the question of "how much code bloat will be
-caused by unrolling a loop at this level?"), because it is easier to predict
-what target code will be generated. With the traditional form, these analyses
-will have to anticipate what polyhedral codegen will do to a set of instructions
-under consideration: something that is non-trivial in the interesting cases in
-question (see "Cost of code generation").
-
-### Cost of code generation
-
-State of the art polyhedral code generation is
-[expensive and complicated](https://lirias.kuleuven.be/bitstream/123456789/497238/1/toplas-astgen.pdf),
-sometimes exponential time complexity. We expect that most machine learning
-workloads will be hyper-rectangular, and thus it should be easy to specialize in
-important cases. That said, the traditional polyhedral representation makes it
-very easy to introduce complicated and expensive schedules, and provides no way
-to understand and project a cost model for using them. All downstream clients of
-the IR need to be prepared to handle the full generality of IR that may come to
-them.
-
-The simplified form defines this away: the concepts in the IR remain simple, and
-the code much more directly reflects the cost model for lowering to CFG
-functions and machine code. This is expected to be very important in the late
-stages of a code generator for an accelerator.
-
-### SSA in ML Functions
-
-We agree already that values defined in an mlfunc can include scalar values and
-they are defined based on traditional dominance. In the simplified form, this is
-very simple: arguments and induction variables defined in for-loops are live
-inside their lexical body, and linear series of instructions have the same "top
-down" dominance relation that a basic block does.
-
-In the traditional form though, this is not the case: it seems that a lot of
-knowledge about how codegen will emit the code is necessary to determine if SSA
-form is correct or not. For example, this is invalid code:
-
-```
-  %tmp = call @S1(%X, %0, %1)
-    domain: (10 <= %i < %N), (0 <= %j < %N)
-    schedule: (i, j)
-
-  call @S2(%tmp, %A, %0, %1)
-    domain: (0 <= %i < %N), (0 <= %j < %N)
-    schedule: (i, j)
-```
-
-Because `%tmp` isn't defined on some iterations of the %i loop.
-
-This matters because it makes the verifier more complicated, but more
-significantly, it means that load promotion and other optimizations that will
-produce SSA form will need to be aware of this and be able to model what codegen
-does.
-
-An emergent property of this that we discussed recently is that PHI nodes in
-mlfunc's (if we support them) will also have to have domains.
-
-### Lack of redundancy in IR
-
-The traditional form has multiple encodings for the same sorts of behavior: you
-end up having bits on `affine.for` loops to specify whether codegen should use
-"atomic/separate" policies, unroll loops, etc. Instructions can be split or can
-generate multiple copies of their instruction because of overlapping domains,
-etc.
-
-This is a problem for analyses and cost models, because they each have to reason
-about these additional forms in the IR.
-
-### Suitability to purpose: lowering to machine code
-
-One of the main drivers for this work is lowering to low-level accelerator code,
-including two-dimensional vectorization, insertion of DMAs, and other
-utilization of the matrix accelerator units. In the author's opinion, the extra
-compactness of the traditional form is a negative for this purpose: reasoning
-about the generated machine code will require understanding the mapping from
-mlfunc to lowered code, which means that it must understand what code generation
-will do.
-
-In the simplified form, the effect of "code generation" is always obvious from
-the IR itself, which should make it easier to perform vectorization to target
-instructions and other analyses we need to perform.
-
-## Third Alternative: two different levels of mlfunc
-
-One hybrid alternative is to support both the traditional and simplified forms
-of mlfunc in our IR.
-
-The stages could look like this, for example:
-
-1.  Early performance transformations could be done on the traditional form.
-1.  Partial code generation lowers to the simplified form
-1.  Target specific lowering phases for tiling, and vectorization and other 2D
-    transforms that don't benefit much from the traditional form could be run.
-1.  Final codegen to a cfg func can be done when all of the instructions are
-    replaced with ones valid on the target.
-
-While this is possible, it isn't clear what would justify the complexity of this
-approach. Unless there is a super compelling reason for this, it would be nice
-to not do this. **Update:** we discussed this as a design team and agreed that
-this wouldn't be a good way to go.
diff --git a/third_party/mlir/g3doc/TestingGuide.md b/third_party/mlir/g3doc/TestingGuide.md
deleted file mode 100644
index 723b78bf0f5..00000000000
--- a/third_party/mlir/g3doc/TestingGuide.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# Testing Guide
-
-Testing is an integral part of any software infrastructure. In general, all
-commits to the MLIR repository should include an accompanying test of some form.
-Commits that include no functional changes, such as API changes like symbol
-renaming, should be tagged with NFC(no functional changes). This signals to the
-reviewer why the change doesn't/shouldn't include a test.
-
-MLIR generally separates testing into two main categories, [Check](#check-tests)
-tests and [Unit](#unit-tests) tests.
-
-## Check tests
-
-Check tests are tests that verify that some set of string tags appear in the
-output of some program. These tests generally encompass anything related to the
-state of the IR (and more); analysis, parsing, transformation, verification,
-etc. They are written utilizing several different tools:
-
-### FileCheck tests
-
-[FileCheck](https://llvm.org/docs/CommandGuide/FileCheck.html) is a utility tool
-that "reads two files (one from standard input, and one specified on the command
-line) and uses one to verify the other." Essentially, one file contains a set of
-tags that are expected to appear in the output file. MLIR utilizes FileCheck, in
-combination with [lit](https://llvm.org/docs/CommandGuide/lit.html), to verify
-different aspects of the IR - such as the output of a transformation pass.
-
-An example FileCheck test is shown below:
-
-```mlir
-// RUN: mlir-opt %s -cse | FileCheck %s
-
-// CHECK-LABEL: func @simple_constant
-func @simple_constant() -> (i32, i32) {
-  // CHECK-NEXT: %[[RESULT:.*]] = constant 1
-  // CHECK-NEXT: return %[[RESULT]], %[[RESULT]]
-
-  %0 = constant 1 : i32
-  %1 = constant 1 : i32
-  return %0, %1 : i32, i32
-}
-```
-
-The above test performs a check that after running Common Sub-Expression
-elimination, only one constant remains in the IR.
-
-#### FileCheck best practices
-
-FileCheck is an extremely useful utility, it allows for easily matching various
-parts of the output. This ease of use means that it becomes easy to write
-brittle tests that are essentially `diff` tests. FileCheck tests should be as
-self-contained as possible and focus on testing the minimal set of
-functionalities needed. Let's see an example:
-
-```mlir
-// RUN: mlir-opt %s -cse | FileCheck %s
-
-// CHECK-LABEL: func @simple_constant() -> (i32, i32)
-func @simple_constant() -> (i32, i32) {
-  // CHECK-NEXT: %result = constant 1 : i32
-  // CHECK-NEXT: return %result, %result : i32, i32
-  // CHECK-NEXT: }
-
-  %0 = constant 1 : i32
-  %1 = constant 1 : i32
-  return %0, %1 : i32, i32
-}
-```
-
-The above example is another way to write the original example shown in the main
-[FileCheck tests](#filecheck-tests) section. There are a few problems with this
-test; below is a breakdown of the no-nos of this test to specifically highlight
-best practices.
-
-*   Tests should be self-contained.
-
-This means that tests should not test lines or sections outside of what is
-intended. In the above example, we see lines such as `CHECK-NEXT: }`. This line
-in particular is testing pieces of the Parser/Printer of FuncOp, which is
-outside of the realm of concern for the CSE pass. This line should be removed.
-
-*   Tests should be minimal, and only check what is absolutely necessary.
-
-This means that anything in the output that is not core to the functionality
-that you are testing should *not* be present in a CHECK line. This is a separate
-bullet just to highlight the importance of it, especially when checking against
-IR output.
-
-If we naively remove the unrelated `CHECK` lines in our source file, we may end
-up with:
-
-```mlir
-// CHECK-LABEL: func @simple_constant
-func @simple_constant() -> (i32, i32) {
-  // CHECK-NEXT: %result = constant 1 : i32
-  // CHECK-NEXT: return %result, %result : i32, i32
-
-  %0 = constant 1 : i32
-  %1 = constant 1 : i32
-  return %0, %1 : i32, i32
-}
-```
-
-It may seem like this is a minimal test case, but it still checks several
-aspects of the output that are unrelated to the CSE transformation. Namely the
-result types of the `constant` and `return` operations, as well the actual SSA
-value names that are produced. FileCheck `CHECK` lines may contain
-[regex statements](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-regex-matching-syntax)
-as well as named
-[string substitution blocks](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-string-substitution-blocks).
-Utilizing the above, we end up with the example shown in the main
-[FileCheck tests](#filecheck-tests) section.
-
-```mlir
-// CHECK-LABEL: func @simple_constant
-func @simple_constant() -> (i32, i32) {
-  /// Here we use a substitution variable as the output of the constant is
-  /// useful for the test, but we omit as much as possible of everything else.
-  // CHECK-NEXT: %[[RESULT:.*]] = constant 1
-  // CHECK-NEXT: return %[[RESULT]], %[[RESULT]]
-
-  %0 = constant 1 : i32
-  %1 = constant 1 : i32
-  return %0, %1 : i32, i32
-}
-```
-
-### Diagnostic verification tests
-
-MLIR provides rich source location tracking that can be used to emit errors,
-warnings, etc. easily from anywhere throughout the codebase. Certain classes of
-tests are written to check that certain diagnostics are emitted for a given
-input program, such as an MLIR file. These tests are useful in that they allow
-checking specific invariants of the IR without transforming or changing
-anything. Some examples of tests in this category are: those that verify
-invariants of operations, or check the expected results of an analysis.
-Diagnostic verification tests are written utilizing the
-[source manager verifier handler](Diagnostics.md#sourcemgr-diagnostic-verifier-handler),
-accessible via the `verify-diagnostics` flag in mlir-opt.
-
-An example .mlir test running under `mlir-opt` is shown below:
-
-```mlir
-// RUN: mlir-opt %s -split-input-file -verify-diagnostics
-
-// Expect an error on the same line.
-func @bad_branch() {
-  br ^missing  // expected-error {{reference to an undefined block}}
-}
-
-// -----
-
-// Expect an error on an adjacent line.
-func @foo(%a : f32) {
-  // expected-error@+1 {{unknown comparison predicate "foo"}}
-  %result = cmpf "foo", %a, %a : f32
-  return
-}
-```
-
-## Unit tests
-
-Unit tests are written using
-[Google Test](https://github.com/google/googletest/blob/master/googletest/docs/primer.md)
-and are located in the unittests/ directory. Tests of these form *should* be
-limited to API tests that cannot be reasonably written as [Check](#check-tests)
-tests, e.g. those for data structures. It is important to keep in mind that the
-C++ APIs are not stable, and evolve over time. As such, directly testing the C++
-IR interfaces makes the tests more fragile as those C++ APIs evolve over time.
-This makes future API refactorings, which may happen frequently, much more
-cumbersome as the number of tests scale.
diff --git a/third_party/mlir/g3doc/Traits.md b/third_party/mlir/g3doc/Traits.md
deleted file mode 100644
index 25e20234691..00000000000
--- a/third_party/mlir/g3doc/Traits.md
+++ /dev/null
@@ -1,246 +0,0 @@
-# Introduction to MLIR Operation Traits
-
-[TOC]
-
-MLIR allows for a truly open operation ecosystem, as any dialect may define
-operations that suit a specific level of abstraction. `Traits` are a mechanism
-in which to abstract implementation details and properties that are common
-across many different operations. `Traits` may be used to specify special
-properties and constraints of the operation, including whether the operation has
-side effects or whether its output has the same type as the input. Some examples
-of traits are `Commutative`, `SingleResult`, `Terminator`, etc. See the more
-[comprehensive list](#traits) below for more examples of what is possible.
-
-## Defining a Trait
-
-Traits may be defined in C++ by inheriting from the
-`OpTrait::TraitBase<ConcreteType, TraitType>` class. This base class takes as
-template parameters:
-
-*   ConcreteType
-    -   The concrete operation type that this trait was attached to.
-*   TraitType
-    -   The type of the trait class that is being defined, for use with the
-        [`Curiously Recurring Template Pattern`](https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern).
-
-A derived trait class is expected to take a single template that corresponds to
-the `ConcreteType`. An example trait definition is shown below:
-
-```c++
-template <typename ConcreteType>
-class MyTrait : public OpTrait::TraitBase<ConcreteType, MyTrait> {
-};
-```
-
-Derived traits may also provide a `verifyTrait` hook, that is called when
-verifying the concrete operation. The trait verifiers will currently always be
-invoked before the main `Op::verify`.
-
-```c++
-template <typename ConcreteType>
-class MyTrait : public OpTrait::TraitBase<ConcreteType, MyTrait> {
-public:
-  /// Override the 'verifyTrait' hook to add additional verification on the
-  /// concrete operation.
-  static LogicalResult verifyTrait(Operation *op) {
-    // ...
-  }
-};
-```
-
-Note: It is generally good practice to define the implementation of the
-`verifyTrait` hook out-of-line as a free function when possible to avoid
-instantiating the implementation for every concrete operation type.
-
-### Parametric Traits
-
-The above demonstrates the definition of a simple self-contained trait. It is
-also often useful to provide some static parameters to the trait to control its
-behavior. Given that the definition of the trait class is rigid, i.e. we must
-have a single template argument for the concrete operation, the templates for
-the parameters will need to be split out. An example is shown below:
-
-```c++
-template <int Parameter>
-class MyParametricTrait {
-public:
-  template <typename ConcreteType>
-  class Impl : public OpTrait::TraitBase<ConcreteType, Impl> {
-    // Inside of 'Impl' we have full access to the template parameters
-    // specified above.
-  };
-};
-```
-
-## Attaching a Trait
-
-Traits may be used when defining a derived operation type, by simply adding the
-name of the trait class to the `Op` class after the concrete operation type:
-
-```c++
-/// Here we define 'MyOp' along with the 'MyTrait' and `MyParametric trait
-/// classes we defined previously.
-class MyOp : public Op<MyOp, MyTrait, MyParametricTrait<10>::Impl> {};
-```
-
-To use a trait in the [ODS](OpDefinitions.md) framework, we need to provide a
-definition of the trait class. This can be done using the `NativeOpTrait` and
-`ParamNativeOpTrait` classes. `ParamNativeOpTrait` provides a mechanism in which
-to specify arguments to a parametric trait class with an internal `Impl`.
-
-```td
-// The argument is the c++ trait class name.
-def MyTrait : NativeOpTrait<"MyTrait">;
-
-// The first argument is the parent c++ class name. The second argument is a
-// string containing the parameter list.
-class MyParametricTrait<int prop>
-  : NativeOpTrait<"MyParametricTrait", !cast<string>(!head(parameters))>;
-```
-
-These can then be used in the `traits` list of an op definition:
-
-```td
-def OpWithInferTypeInterfaceOp : Op<...[MyTrait, MyParametricTrait<10>]> { ... }
-```
-
-See the documentation on [operation definitions](OpDefinitions.md) for more
-details.
-
-## Using a Trait
-
-Traits may be used to provide additional methods, static fields, or other
-information directly on the concrete operation. `Traits` internally become
-`Base` classes of the concrete operation, so all of these are directly
-accessible. To expose this information opaquely to transformations and analyses,
-[`interfaces`](Interfaces.md) may be used.
-
-To query if a specific operation contains a specific trait, the `hasTrait<>`
-method may be used. This takes as a template parameter the trait class, which is
-the same as the one passed when attaching the trait to an operation.
-
-```c++
-Operation *op = ..;
-if (op->hasTrait<MyTrait>() || op->hasTrait<MyParametricTrait<10>::Impl>())
-  ...;
-```
-
-## Trait List
-
-MLIR provides a suite of traits that provide various functionalities that are
-common across many different operations. Below is a list of some key traits that
-may be used directly by any dialect. The format of the header for each trait
-section goes as follows:
-
-*   `Header`
-    -   (`C++ class` -- `ODS class`(if applicable))
-
-### Broadcastable
-
-*   `OpTrait::BroadcastableTwoOperandsOneResult` -- `Broadcastable`
-
-This trait provides the API for operations that are known to have
-[broadcast-compatible](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-operand and result types. Specifically, starting from the most varying
-dimension, each dimension pair of the two operands' types should either be the
-same or one of them is one. Also, the result type should have the corresponding
-dimension equal to the larger one, if known. Shapes are checked partially if
-ranks or dimensions are not known. For example, an op with `tensor<?x2xf32>` and
-`tensor<2xf32>` as operand types and `tensor<3x2xf32>` as the result type is
-broadcast-compatible.
-
-Ths trait assumes the op has two operands and one result, and it asserts if the
-pre-condition is not satisfied.
-
-### Commutative
-
-*   `OpTrait::IsCommutative` -- `Commutative`
-
-This trait adds the property that the operation is commutative, i.e. `X op Y ==
-Y op X`
-
-### Function-Like
-
-*   `OpTrait::FunctionLike`
-
-This trait provides APIs for operations that behave like functions. In
-particular:
-
--   Ops must be symbols, i.e. also have the `Symbol` trait;
--   Ops have a single region with multiple blocks that corresponds to the body
-    of the function;
--   the absence of a region corresponds to an external function;
--   arguments of the first block of the region are treated as function
-    arguments;
--   they can have argument and result attributes that are stored in dictionary
-    attributes on the operation itself.
-
-This trait does *NOT* provide type support for the functions, meaning that
-concrete Ops must handle the type of the declared or defined function.
-`getTypeAttrName()` is a convenience function that returns the name of the
-attribute that can be used to store the function type, but the trait makes no
-assumption based on it.
-
-### HasParent
-
-*   `OpTrait::HasParent<typename ParentOpType>` -- `HasParent<string op>`
-
-This trait provides APIs and verifiers for operations that can only be nested
-within regions that are attached to operations of `ParentOpType`.
-
-### IsolatedFromAbove
-
-*   `OpTrait::IsIsolatedFromAbove` -- `IsolatedFromAbove`
-
-This trait signals that the regions of an operations are known to be isolated
-from above. This trait asserts that the regions of an operation will not
-capture, or reference, SSA values defined above the region scope. This means
-that the following is invalid if `foo.region_op` is defined as
-`IsolatedFromAbove`:
-
-```mlir
-%result = constant 10 : i32
-foo.region_op {
-  foo.yield %result : i32
-}
-```
-
-This trait is an important structural property of the IR, and enables operations
-to have [passes](WritingAPass.md) scheduled under them.
-
-### NoSideEffect
-
-*   `OpTrait::HasNoSideEffect` -- `NoSideEffect`
-
-This trait signifies that the operation is pure and has no visible side effects.
-
-### Single Block with Implicit Terminator
-
-*   `OpTrait::SingleBlockImplicitTerminator<typename TerminatorOpType>` :
-    `SingleBlockImplicitTerminator<string op>`
-
-This trait provides APIs and verifiers for operations with regions that have a
-single block that must terminate with `TerminatorOpType`.
-
-### Symbol
-
-*   `OpTrait::Symbol` -- `Symbol`
-
-This trait is used for operations that define a `Symbol`.
-
-TODO(riverriddle) Link to the proper document detailing the design of symbols.
-
-### SymbolTable
-
-*   `OpTrait::SymbolTable` -- `SymbolTable`
-
-This trait is used for operations that define a `SymbolTable`.
-
-TODO(riverriddle) Link to the proper document detailing the design of symbols.
-
-### Terminator
-
-*   `OpTrait::IsTerminator` -- `Terminator`
-
-This trait provides verification and functionality for operations that are known
-to be [terminators](LangRef.md#terminator-operations).
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-1.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-1.md
deleted file mode 100644
index cb7f97cb3f6..00000000000
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-1.md
+++ /dev/null
@@ -1,169 +0,0 @@
-# Chapter 1: Toy Tutorial Introduction
-
-[TOC]
-
-This tutorial runs through the implementation of a basic toy language on top of
-MLIR. The goal of this tutorial is to introduce the concepts of MLIR; in
-particular, how [dialects](../../LangRef.md#dialects) can help easily support
-language specific constructs and transformations while still offering an easy
-path to lower to LLVM or other codegen infrastructure. This tutorial is based on
-the model of the
-[LLVM Kaleidoscope Tutorial](https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/index.html).
-
-This tutorial assumes you have cloned and built MLIR; if you have not yet done
-so, see
-[Getting started with MLIR](https://github.com/tensorflow/mlir#getting-started-with-mlir).
-
-## The Chapters
-
-This tutorial is divided in the following chapters:
-
--   [Chapter #1](Ch-1.md): Introduction to the Toy language and the definition
-    of its AST.
--   [Chapter #2](Ch-2.md): Traversing the AST to emit a dialect in MLIR,
-    introducing base MLIR concepts. Here we show how to start attaching
-    semantics to our custom operations in MLIR.
--   [Chapter #3](Ch-3.md): High-level language-specific optimization using
-    pattern rewriting system.
--   [Chapter #4](Ch-4.md): Writing generic dialect-independent transformations
-    with Interfaces. Here we will show how to plug dialect specific information
-    into generic transformations like shape inference and inlining.
--   [Chapter #5](Ch-5.md): Partially lowering to lower-level dialects. We'll
-    convert some our high level language specific semantics towards a generic
-    affine oriented dialect for optimization.
--   [Chapter #6](Ch-6.md): Lowering to LLVM and code generation. Here we'll
-    target LLVM IR for code generation, and detail more of the lowering
-    framework.
--   [Chapter #7](Ch-7.md): Extending Toy: Adding support for a composite type.
-    We'll demonstrate how to add a custom type to MLIR, and how it fits in the
-    existing pipeline.
-
-## The Language
-
-This tutorial will be illustrated with a toy language that we’ll call “Toy”
-(naming is hard...). Toy is a tensor-based language that allows you to define
-functions, perform some math computation, and print results.
-
-Given that we want to keep things simple, the codegen will be limited to tensors
-of rank <= 2, and the only datatype in Toy is a 64-bit floating point type (aka
-‘double’ in C parlance). As such, all values are implicitly double precision,
-`Values` are immutable (i.e. every operation returns a newly allocated value),
-and deallocation is automatically managed. But enough with the long description;
-nothing is better than walking through an example to get a better understanding:
-
-```Toy {.toy}
-def main() {
-  # Define a variable `a` with shape <2, 3>, initialized with the literal value.
-  # The shape is inferred from the supplied literal.
-  var a = [[1, 2, 3], [4, 5, 6]];
-
-  # b is identical to a, the literal tensor is implicitly reshaped: defining new
-  # variables is the way to reshape tensors (element count must match).
-  var b<2, 3> = [1, 2, 3, 4, 5, 6];
-
-  # transpose() and print() are the only builtin, the following will transpose
-  # a and b and perform an element-wise multiplication before printing the result.
-  print(transpose(a) * transpose(b));
-}
-```
-
-Type checking is statically performed through type inference; the language only
-requires type declarations to specify tensor shapes when needed. Functions are
-generic: their parameters are unranked (in other words, we know these are
-tensors, but we don't know their dimensions). They are specialized for every
-newly discovered signature at call sites. Let's revisit the previous example by
-adding a user-defined function:
-
-```Toy {.toy}
-# User defined generic function that operates on unknown shaped arguments.
-def multiply_transpose(a, b) {
-  return transpose(a) * transpose(b);
-}
-
-def main() {
-  # Define a variable `a` with shape <2, 3>, initialized with the literal value.
-  var a = [[1, 2, 3], [4, 5, 6]];
-  var b<2, 3> = [1, 2, 3, 4, 5, 6];
-
-  # This call will specialize `multiply_transpose` with <2, 3> for both
-  # arguments and deduce a return type of <3, 2> in initialization of `c`.
-  var c = multiply_transpose(a, b);
-
-  # A second call to `multiply_transpose` with <2, 3> for both arguments will
-  # reuse the previously specialized and inferred version and return <3, 2>.
-  var d = multiply_transpose(b, a);
-
-  # A new call with <3, 2> (instead of <2, 3>) for both dimensions will
-  # trigger another specialization of `multiply_transpose`.
-  var e = multiply_transpose(c, d);
-
-  # Finally, calling into `multiply_transpose` with incompatible shape will
-  # trigger a shape inference error.
-  var f = multiply_transpose(transpose(a), c);
-}
-```
-
-## The AST
-
-The AST from the above code is fairly straightforward; here is a dump of it:
-
-```
-Module:
-  Function
-    Proto 'multiply_transpose' @test/ast.toy:5:1'
-    Args: [a, b]
-    Block {
-      Return
-        BinOp: * @test/ast.toy:6:25
-          Call 'transpose' [ @test/ast.toy:6:10
-            var: a @test/ast.toy:6:20
-          ]
-          Call 'transpose' [ @test/ast.toy:6:25
-            var: b @test/ast.toy:6:35
-          ]
-    } // Block
-  Function
-    Proto 'main' @test/ast.toy:9:1'
-    Args: []
-    Block {
-      VarDecl a<> @test/ast.toy:11:3
-        Literal: <2, 3>[<3>[1.000000e+00, 2.000000e+00, 3.000000e+00], <3>[4.000000e+00, 5.000000e+00, 6.000000e+00]] @test/ast.toy:11:17
-      VarDecl b<2, 3> @test/ast.toy:12:3
-        Literal: <6>[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00] @test/ast.toy:12:17
-      VarDecl c<> @test/ast.toy:15:3
-        Call 'multiply_transpose' [ @test/ast.toy:15:11
-          var: a @test/ast.toy:15:30
-          var: b @test/ast.toy:15:33
-        ]
-      VarDecl d<> @test/ast.toy:18:3
-        Call 'multiply_transpose' [ @test/ast.toy:18:11
-          var: b @test/ast.toy:18:30
-          var: a @test/ast.toy:18:33
-        ]
-      VarDecl e<> @test/ast.toy:21:3
-        Call 'multiply_transpose' [ @test/ast.toy:21:11
-          var: b @test/ast.toy:21:30
-          var: c @test/ast.toy:21:33
-        ]
-      VarDecl f<> @test/ast.toy:24:3
-        Call 'multiply_transpose' [ @test/ast.toy:24:11
-          Call 'transpose' [ @test/ast.toy:24:30
-            var: a @test/ast.toy:24:40
-          ]
-          var: c @test/ast.toy:24:44
-        ]
-    } // Block
-```
-
-You can reproduce this result and play with the example in the
-`examples/toy/Ch1/` directory; try running `path/to/BUILD/bin/toyc-ch1
-test/Examples/Toy/Ch1/ast.toy -emit=ast`.
-
-The code for the lexer is fairly straightforward; it is all in a single header:
-`examples/toy/Ch1/include/toy/Lexer.h`. The parser can be found in
-`examples/toy/Ch1/include/toy/Parser.h`; it is a recursive descent parser. If
-you are not familiar with such a Lexer/Parser, these are very similar to the
-LLVM Kaleidoscope equivalent that are detailed in the first two chapters of the
-[Kaleidoscope Tutorial](https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.html).
-
-The [next chapter](Ch-2.md) will demonstrate how to convert this AST into MLIR.
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-2.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-2.md
deleted file mode 100755
index ce46788f4ae..00000000000
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-2.md
+++ /dev/null
@@ -1,577 +0,0 @@
-# Chapter 2: Emitting Basic MLIR
-
-[TOC]
-
-Now that we're familiar with our language and the AST, let's see how MLIR can
-help to compile Toy.
-
-## Introduction: Multi-Level Intermediate Representation
-
-Other compilers, like LLVM (see the
-[Kaleidoscope tutorial](https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/index.html)),
-offer a fixed set of predefined types and (usually *low-level* / RISC-like)
-instructions. It is up to the frontend for a given language to perform any
-language-specific type-checking, analysis, or transformation before emitting
-LLVM IR. For example, Clang will use its AST to perform not only static analysis
-but also transformations, such as C++ template instantiation through AST cloning
-and rewrite. Finally, languages with construction at a higher-level than C/C++
-may require non-trivial lowering from their AST to generate LLVM IR.
-
-As a consequence, multiple frontends end up reimplementing significant pieces of
-infrastructure to support the need for these analyses and transformation. MLIR
-addresses this issue by being designed for extensibility. As such, there are few
-pre-defined instructions (*operations* in MLIR terminology) or types.
-
-## Interfacing with MLIR
-
-[Language reference](../../LangRef.md)
-
-MLIR is designed to be a completely extensible infrastructure; there is no
-closed set of attributes (think: constant metadata), operations, or types. MLIR
-supports this extensibility with the concept of
-[Dialects](../../LangRef.md#dialects). Dialects provide a grouping mechanism for
-abstraction under a unique `namespace`.
-
-In MLIR, [`Operations`](../../LangRef.md#operations) are the core unit of
-abstraction and computation, similar in many ways to LLVM instructions.
-Operations can have application-specific semantics and can be used to represent
-all of the core IR structures in LLVM: instructions, globals (like functions),
-modules, etc.
-
-Here is the MLIR assembly for the Toy `transpose` operations:
-
-```mlir
-%t_tensor = "toy.transpose"(%tensor) {inplace = true} : (tensor<2x3xf64>) -> tensor<3x2xf64> loc("example/file/path":12:1)
-```
-
-Let's break down the anatomy of this MLIR operation:
-
--   `%t_tensor`
-
-    *   The name given to the result defined by this operation (which includes
-        [a prefixed sigil to avoid collisions](../../LangRef.md#identifiers-and-keywords)).
-        An operation may define zero or more results (in the context of Toy, we
-        will limit ourselves to single-result operations), which are SSA values.
-        The name is used during parsing but is not persistent (e.g., it is not
-        tracked in the in-memory representation of the SSA value).
-
--   `"toy.transpose"`
-
-    *   The name of the operation. It is expected to be a unique string, with
-        the namespace of the dialect prefixed before the "`.`". This can be read
-        as the `transpose` operation in the `toy` dialect.
-
--   `(%tensor)`
-
-    *   A list of zero or more input operands (or arguments), which are SSA
-        values defined by other operations or referring to block arguments.
-
--   `{ inplace = true }`
-
-    *   A dictionary of zero or more attributes, which are special operands that
-        are always constant. Here we define a boolean attribute named 'inplace'
-        that has a constant value of true.
-
--   `(tensor<2x3xf64>) -> tensor<3x2xf64>`
-
-    *   This refers to the type of the operation in a functional form, spelling
-        the types of the arguments in parentheses and the type of the return
-        values afterward.
-
--   `loc("example/file/path":12:1)`
-
-    *   This is the location in the source code from which this operation
-        originated.
-
-Shown here is the general form of an operation. As described above, the set of
-operations in MLIR is extensible. This means that the infrastructure must be
-able to opaquely reason about the structure of an operation. This is done by
-boiling down the composition of an operation into discrete pieces:
-
--   A name for the operation.
--   A list of SSA operand values.
--   A list of [attributes](../../LangRef.md#attributes).
--   A list of [types](../../LangRef.md#type-system) for result values.
--   A [source location](../../Diagnostics.md#source-locations) for debugging
-    purposes.
--   A list of successors [blocks](../../LangRef.md#blocks) (for branches,
-    mostly).
--   A list of [regions](../../LangRef.md#regions) (for structural operations
-    like functions).
-
-In MLIR, every operation has a mandatory source location associated with it.
-Contrary to LLVM, where debug info locations are metadata and can be dropped, in
-MLIR, the location is a core requirement, and APIs depend on and manipulate it.
-Dropping a location is thus an explicit choice which cannot happen by mistake.
-
-To provide an illustration: If a transformation replaces an operation by
-another, that new operation must still have a location attached. This makes it
-possible to track where that operation came from.
-
-It's worth noting that the mlir-opt tool - a tool for testing
-compiler passes - does not include locations in the output by default. The
-`-mlir-print-debuginfo` flag specifies to include locations. (Run `mlir-opt
---help` for more options.)
-
-### Opaque API
-
-MLIR is designed to be a completely extensible system, and as such, the
-infrastructure has the capability to opaquely represent all of its core
-components: attributes, operations, types, etc. This allows MLIR to parse,
-represent, and [round-trip](../../Glossary.md#round-trip) any valid IR. For
-example, we could place our Toy operation from above into an `.mlir` file and
-round-trip through *mlir-opt* without registering anything:
-
-```mlir
-func @toy_func(%tensor: tensor<2x3xf64>) -> tensor<3x2xf64> {
-  %t_tensor = "toy.transpose"(%tensor) { inplace = true } : (tensor<2x3xf64>) -> tensor<3x2xf64>
-  return %t_tensor : tensor<3x2xf64>
-}
-```
-
-In the cases of unregistered attributes, operations, and types, MLIR will
-enforce some structural constraints (SSA, block termination, etc.), but
-otherwise they are completely opaque. This can be useful for bootstrapping
-purposes, but it is generally advised against. Opaque operations must be treated
-conservatively by transformations and analyses, and they are much harder to
-construct and manipulate.
-
-This handling can be observed by crafting what should be an invalid IR for Toy
-and seeing it round-trip without tripping the verifier:
-
-```mlir
-// RUN: toyc %s -emit=mlir
-
-func @main() {
-  %0 = "toy.print"() : () -> tensor<2x3xf64>
-}
-```
-
-There are multiple problems here: the `toy.print` operation is not a terminator;
-it should take an operand; and it shouldn't return any values. In the next
-section, we will register our dialect and operations with MLIR, plug into the
-verifier, and add nicer APIs to manipulate our operations.
-
-## Defining a Toy Dialect
-
-To effectively interface with MLIR, we will define a new Toy dialect. This
-dialect will properly model the semantics of the Toy language, as well as
-provide an easy avenue for high-level analysis and transformation.
-
-```c++
-/// This is the definition of the Toy dialect. A dialect inherits from
-/// mlir::Dialect and registers custom attributes, operations, and types (in its
-/// constructor). It can also override some general behavior exposed via virtual
-/// methods, which will be demonstrated in later chapters of the tutorial.
-class ToyDialect : public mlir::Dialect {
- public:
-  explicit ToyDialect(mlir::MLIRContext *ctx);
-
-  /// Provide a utility accessor to the dialect namespace. This is used by
-  /// several utilities.
-  static llvm::StringRef getDialectNamespace() { return "toy"; }
-};
-```
-
-The dialect can now be registered in the global registry:
-
-```c++
-  mlir::registerDialect<ToyDialect>();
-```
-
-Any new `MLIRContext` created from now on will contain an instance of the Toy
-dialect and invoke specific hooks for things like parsing attributes and types.
-
-## Defining Toy Operations
-
-Now that we have a `Toy` dialect, we can start registering operations. This will
-allow for providing semantic information that the rest of the system can hook
-into. Let's walk through the creation of the `toy.constant` operation:
-
-```mlir
- %4 = "toy.constant"() {value = dense<1.0> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-```
-
-This operation takes zero operands, a
-[dense elements](../../LangRef.md#dense-elements-attribute) attribute named
-`value`, and returns a single result of
-[TensorType](../../LangRef.md#tensor-type). An operation inherits from the
-[CRTP](https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern)
-`mlir::Op` class which also takes some optional [*traits*](../../Traits.md) to
-customize its behavior. These traits may provide additional accessors,
-verification, etc.
-
-```c++
-class ConstantOp : public mlir::Op<ConstantOp,
-                     /// The ConstantOp takes zero inputs.
-                     mlir::OpTrait::ZeroOperands,
-                     /// The ConstantOp returns a single result.
-                     mlir::OpTrait::OneResult,
-                     /// The ConstantOp is pure and has no visible side-effects.
-                     mlir::OpTrait::HasNoSideEffect> {
-
- public:
-  /// Inherit the constructors from the base Op class.
-  using Op::Op;
-
-  /// Provide the unique name for this operation. MLIR will use this to register
-  /// the operation and uniquely identify it throughout the system.
-  static llvm::StringRef getOperationName() { return "toy.constant"; }
-
-  /// Return the value of the constant by fetching it from the attribute.
-  mlir::DenseElementsAttr getValue();
-
-  /// Operations can provide additional verification beyond the traits they
-  /// define. Here we will ensure that the specific invariants of the constant
-  /// operation are upheld, for example the result type must be of TensorType.
-  LogicalResult verify();
-
-  /// Provide an interface to build this operation from a set of input values.
-  /// This interface is used by the builder to allow for easily generating
-  /// instances of this operation:
-  ///   mlir::OpBuilder::create<ConstantOp>(...)
-  /// This method populates the given `state` that MLIR uses to create
-  /// operations. This state is a collection of all of the discrete elements
-  /// that an operation may contain.
-  /// Build a constant with the given return type and `value` attribute.
-  static void build(mlir::Builder *builder, mlir::OperationState &state,
-                    mlir::Type result, mlir::DenseElementsAttr value);
-  /// Build a constant and reuse the type from the given 'value'.
-  static void build(mlir::Builder *builder, mlir::OperationState &state,
-                    mlir::DenseElementsAttr value);
-  /// Build a constant by broadcasting the given 'value'.
-  static void build(mlir::Builder *builder, mlir::OperationState &state,
-                    double value);
-};
-```
-
-and we register this operation in the `ToyDialect` constructor:
-
-```c++
-ToyDialect::ToyDialect(mlir::MLIRContext *ctx)
-    : mlir::Dialect(getDialectNamespace(), ctx) {
-  addOperations<ConstantOp>();
-}
-```
-
-### Op vs Operation: Using MLIR Operations
-
-Now that we have defined an operation, we will want to access and transform it.
-In MLIR, there are two main classes related to operations: `Operation` and `Op`.
-Operation is the actual opaque instance of the operation, and represents the
-general API into an operation instance. An `Op` is the base class of a derived
-operation, like `ConstantOp`, and acts as smart pointer wrapper around a
-`Operation*`. This means that when we define our Toy operations, we are actually
-providing a clean interface for building and interfacing with the `Operation`
-class; this is why our `ConstantOp` defines no class fields. Therefore, we
-always pass these classes around by value, instead of by reference or pointer
-(*passing by value* is a common idiom and applies similarly to attributes,
-types, etc). We can always get an instance of our toy operation by using LLVM's
-casting infrastructure:
-
-```c++
-void processConstantOp(mlir::Operation *operation) {
-  ConstantOp op = llvm::dyn_cast<ConstantOp>(operation);
-
-  // This operation is not an instance of `ConstantOp`.
-  if (!op)
-    return;
-
-  // Get the internal operation instance back.
-  mlir::Operation *internalOperation = op.getOperation();
-  assert(internalOperation == operation &&
-         "these operation instances are the same");
-}
-```
-
-### Using the Operation Definition Specification (ODS) Framework
-
-In addition to specializing the `mlir::Op` C++ template, MLIR also supports
-defining operations in a declarative manner. This is achieved via the
-[Operation Definition Specification](../../OpDefinitions.md) framework. Facts
-regarding an operation are specified concisely into a TableGen record, which
-will be expanded into an equivalent `mlir::Op` C++ template specialization at
-compile time. Using the ODS framework is the desired way for defining operations
-in MLIR given the simplicity, conciseness, and general stability in the face of
-C++ API changes.
-
-Lets see how to define the ODS equivalent of our ConstantOp:
-
-The first thing to do is to define a link to the Toy dialect that we defined in
-C++. This is used to link all of the operations that we will define to our
-dialect:
-
-```tablegen
-// Provide a definition of the 'toy' dialect in the ODS framework so that we
-// can define our operations.
-def Toy_Dialect : Dialect {
-  // The namespace of our dialect, this corresponds 1-1 with the string we
-  // provided in `ToyDialect::getDialectNamespace`.
-  let name = "toy";
-
-  // The C++ namespace that the dialect class definition resides in.
-  let cppNamespace = "toy";
-}
-```
-
-Now that we have defined a link to the Toy dialect, we can start defining
-operations. Operations in ODS are defined by inheriting from the `Op` class. To
-simplify our operation definitions, we will define a base class for operations
-in the Toy dialect.
-
-```tablegen
-// Base class for toy dialect operations. This operation inherits from the base
-// `Op` class in OpBase.td, and provides:
-//   * The parent dialect of the operation.
-//   * The mnemonic for the operation, or the name without the dialect prefix.
-//   * A list of traits for the operation.
-class Toy_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<Toy_Dialect, mnemonic, traits>;
-```
-
-With all of the preliminary pieces defined, we can begin to define the constant
-operation.
-
-We define a toy operation by inheriting from our base 'Toy_Op' class above. Here
-we provide the mnemonic and a list of traits for the operation. The
-[mnemonic](../../OpDefinitions.md#operation-name) here matches the one given in
-`ConstantOp::getOperationName` without the dialect prefix; `toy.`. The constant
-operation here is also marked as 'NoSideEffect'. This is an ODS trait, and
-matches one-to-one with the trait we providing when defining `ConstantOp`:
-`mlir::OpTrait::HasNoSideEffect`. Missing here from our C++ definition are the
-`ZeroOperands` and `OneResult` traits; these will be automatically inferred
-based upon the `arguments` and `results` fields we define later.
-
-```tablegen
-def ConstantOp : Toy_Op<"constant", [NoSideEffect]> {
-}
-```
-
-At this point you probably might want to know what the C++ code generated by
-TableGen looks like. Simply run the `mlir-tblgen` command with the
-`gen-op-decls` or the `gen-op-defs` action like so:
-
-```
-${build_root}/bin/mlir-tblgen -gen-op-defs ${mlir_src_root}/examples/toy/Ch2/include/toy/Ops.td -I ${mlir_src_root}/include/
-```
-
-Depending on the selected action, this will print either the `ConstantOp` class
-declaration or its implementation. Comparing this output to the hand-crafted
-implementation is incredibly useful when getting started with TableGen.
-
-#### Defining Arguments and Results
-
-With the shell of the operation defined, we can now provide the
-[inputs](../../OpDefinitions.md#operation-arguments) and
-[outputs](../../OpDefinitions.md#operation-results) to our operation. The
-inputs, or arguments, to an operation may be attributes or types for SSA operand
-values. The results correspond to a set of types for the values produced by the
-operation:
-
-```tablegen
-def ConstantOp : Toy_Op<"constant", [NoSideEffect]> {
-  // The constant operation takes an attribute as the only input.
-  // `F64ElementsAttr` corresponds to a 64-bit floating-point ElementsAttr.
-  let arguments = (ins F64ElementsAttr:$value);
-
-  // The constant operation returns a single value of TensorType.
-  // F64Tensor corresponds to a 64-bit floating-point TensorType.
-  let results = (outs F64Tensor);
-}
-```
-
-By providing a name to the arguments or results, e.g. `$value`, ODS will
-automatically generate a matching accessor: `DenseElementsAttr
-ConstantOp::value()`.
-
-#### Adding Documentation
-
-The next step after defining the operation is to document it. Operations may
-provide
-[`summary` and `description`](../../OpDefinitions.md#operation-documentation)
-fields to describe the semantics of the operation. This information is useful
-for users of the dialect and can even be used to auto-generate Markdown
-documents.
-
-```tablegen
-def ConstantOp : Toy_Op<"constant", [NoSideEffect]> {
-  // Provide a summary and description for this operation. This can be used to
-  // auto-generate documentation of the operations within our dialect.
-  let summary = "constant operation";
-  let description = [{
-    Constant operation turns a literal into an SSA value. The data is attached
-    to the operation as an attribute. For example:
-
-      %0 = "toy.constant"()
-         { value = dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf64> }
-        : () -> tensor<2x3xf64>
-  }];
-
-  // The constant operation takes an attribute as the only input.
-  // `F64ElementsAttr` corresponds to a 64-bit floating-point ElementsAttr.
-  let arguments = (ins F64ElementsAttr:$value);
-
-  // The generic call operation returns a single value of TensorType.
-  // F64Tensor corresponds to a 64-bit floating-point TensorType.
-  let results = (outs F64Tensor);
-}
-```
-
-#### Verifying Operation Semantics
-
-At this point we've already covered a majority of the original C++ operation
-definition. The next piece to define is the verifier. Luckily, much like the
-named accessor, the ODS framework will automatically generate a lot of the
-necessary verification logic based upon the constraints we have given. This
-means that we don't need to verify the structure of the return type, or even the
-input attribute `value`. In many cases, additional verification is not even
-necessary for ODS operations. To add additional verification logic, an operation
-can override the [`verifier`](../../OpDefinitions.md#custom-verifier-code)
-field. The `verifier` field allows for defining a C++ code blob that will be run
-as part of `ConstantOp::verify`. This blob can assume that all of the other
-invariants of the operation have already been verified:
-
-```tablegen
-def ConstantOp : Toy_Op<"constant", [NoSideEffect]> {
-  // Provide a summary and description for this operation. This can be used to
-  // auto-generate documentation of the operations within our dialect.
-  let summary = "constant operation";
-  let description = [{
-    Constant operation turns a literal into an SSA value. The data is attached
-    to the operation as an attribute. For example:
-
-      %0 = "toy.constant"()
-         { value = dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf64> }
-        : () -> tensor<2x3xf64>
-  }];
-
-  // The constant operation takes an attribute as the only input.
-  // `F64ElementsAttr` corresponds to a 64-bit floating-point ElementsAttr.
-  let arguments = (ins F64ElementsAttr:$value);
-
-  // The generic call operation returns a single value of TensorType.
-  // F64Tensor corresponds to a 64-bit floating-point TensorType.
-  let results = (outs F64Tensor);
-
-  // Add additional verification logic to the constant operation. Here we invoke
-  // a static `verify` method in a C++ source file. This codeblock is executed
-  // inside of ConstantOp::verify, so we can use `this` to refer to the current
-  // operation instance.
-  let verifier = [{ return ::verify(*this); }];
-}
-```
-
-#### Attaching `build` Methods
-
-The final missing component here from our original C++ example are the `build`
-methods. ODS can generate some simple build methods automatically, and in this
-case it will generate our first build method for us. For the rest, we define the
-[`builders`](../../OpDefinitions.md#custom-builder-methods) field. This field
-takes a list of `OpBuilder` objects that take a string corresponding to a list
-of C++ parameters, as well as an optional code block that can be used to specify
-the implementation inline.
-
-```tablegen
-def ConstantOp : Toy_Op<"constant", [NoSideEffect]> {
-  // Provide a summary and description for this operation. This can be used to
-  // auto-generate documentation of the operations within our dialect.
-  let summary = "constant operation";
-  let description = [{
-    Constant operation turns a literal into an SSA value. The data is attached
-    to the operation as an attribute. For example:
-
-      %0 = "toy.constant"()
-         { value = dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf64> }
-        : () -> tensor<2x3xf64>
-  }];
-
-  // The constant operation takes an attribute as the only input.
-  // `F64ElementsAttr` corresponds to a 64-bit floating-point ElementsAttr.
-  let arguments = (ins F64ElementsAttr:$value);
-
-  // The generic call operation returns a single value of TensorType.
-  // F64Tensor corresponds to a 64-bit floating-point TensorType.
-  let results = (outs F64Tensor);
-
-  // Add additional verification logic to the constant operation. Here we invoke
-  // a static `verify` method in a c++ source file. This codeblock is executed
-  // inside of ConstantOp::verify, so we can use `this` to refer to the current
-  // operation instance.
-  let verifier = [{ return ::verify(*this); }];
-
-  // Add custom build methods for the constant operation. These methods populate
-  // the `state` that MLIR uses to create operations, i.e. these are used when
-  // using `builder.create<ConstantOp>(...)`.
-  let builders = [
-    // Build a constant with a given constant tensor value.
-    OpBuilder<"Builder *builder, OperationState &result, "
-              "DenseElementsAttr value", [{
-      // Call into an autogenerated `build` method.
-      build(builder, result, value.getType(), value);
-    }]>,
-
-    // Build a constant with a given constant floating-point value. This builder
-    // creates a declaration for `ConstantOp::build` with the given parameters.
-    OpBuilder<"Builder *builder, OperationState &result, double value">
-  ];
-}
-```
-
-Above we introduce several of the concepts for defining operations in the ODS
-framework, but there are many more that we haven't had a chance to: regions,
-variadic operands, etc. Check out the
-[full specification](../../OpDefinitions.md) for more details.
-
-## Complete Toy Example
-
-At this point we can generate our "Toy IR". A simplified version of the previous
-example:
-
-```.toy
-# User defined generic function that operates on unknown shaped arguments.
-def multiply_transpose(a, b) {
-  return transpose(a) * transpose(b);
-}
-
-def main() {
-  var a<2, 3> = [[1, 2, 3], [4, 5, 6]];
-  var b<2, 3> = [1, 2, 3, 4, 5, 6];
-  var c = multiply_transpose(a, b);
-  var d = multiply_transpose(b, a);
-  print(d);
-}
-```
-
-Results in the following IR:
-
-```mlir
-module {
-  func @multiply_transpose(%arg0: tensor<*xf64>, %arg1: tensor<*xf64>) -> tensor<*xf64> {
-    %0 = "toy.transpose"(%arg0) : (tensor<*xf64>) -> tensor<*xf64> loc("test/codegen.toy":5:10)
-    %1 = "toy.transpose"(%arg1) : (tensor<*xf64>) -> tensor<*xf64> loc("test/codegen.toy":5:25)
-    %2 = "toy.mul"(%0, %1) : (tensor<*xf64>, tensor<*xf64>) -> tensor<*xf64> loc("test/codegen.toy":5:25)
-    "toy.return"(%2) : (tensor<*xf64>) -> () loc("test/codegen.toy":5:3)
-  } loc("test/codegen.toy":4:1)
-  func @main() {
-    %0 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64> loc("test/codegen.toy":9:17)
-    %1 = "toy.reshape"(%0) : (tensor<2x3xf64>) -> tensor<2x3xf64> loc("test/codegen.toy":9:3)
-    %2 = "toy.constant"() {value = dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00]> : tensor<6xf64>} : () -> tensor<6xf64> loc("test/codegen.toy":10:17)
-    %3 = "toy.reshape"(%2) : (tensor<6xf64>) -> tensor<2x3xf64> loc("test/codegen.toy":10:3)
-    %4 = "toy.generic_call"(%1, %3) {callee = @multiply_transpose} : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64> loc("test/codegen.toy":11:11)
-    %5 = "toy.generic_call"(%3, %1) {callee = @multiply_transpose} : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64> loc("test/codegen.toy":12:11)
-    "toy.print"(%5) : (tensor<*xf64>) -> () loc("test/codegen.toy":13:3)
-    "toy.return"() : () -> () loc("test/codegen.toy":8:1)
-  } loc("test/codegen.toy":8:1)
-} loc("test/codegen.toy":0:0)
-```
-
-You can build `toyc-ch2` and try yourself: `toyc-ch2
-test/Examples/Toy/Ch2/codegen.toy -emit=mlir -mlir-print-debuginfo`. We can also
-check our RoundTrip: `toyc-ch2 test/Examples/Toy/Ch2/codegen.toy -emit=mlir
--mlir-print-debuginfo 2> codegen.mlir` followed by `toyc-ch2 codegen.mlir
--emit=mlir`. You should also use `mlir-tblgen` on the final definition file and
-study the generated C++ code.
-
-At this point, MLIR knows about our Toy dialect and operations. In the
-[next chapter](Ch-3.md), we will leverage our new dialect to implement some
-high-level language-specific analyses and transformations for the Toy language.
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-3.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-3.md
deleted file mode 100644
index 57936e61fa8..00000000000
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-3.md
+++ /dev/null
@@ -1,264 +0,0 @@
-# Chapter 3: High-level Language-Specific Analysis and Transformation
-
-[TOC]
-
-Creating a dialect that closely represents the semantics of an input language
-enables analyses, transformations and optimizations in MLIR that require
-high-level language information and are generally performed on the language AST.
-For example, `clang` has a fairly
-[heavy mechanism](https://clang.llvm.org/doxygen/classclang_1_1TreeTransform.html)
-for performing template instantiation in C++.
-
-We divide compiler transformations into two categories: local and global. In
-this chapter, we focus on how to leverage the Toy Dialect and its high-level
-semantics to perform local pattern-match transformations that would be difficult
-in LLVM. For this, we use MLIR's
-[Generic DAG Rewriter](../../GenericDAGRewriter.md).
-
-There are two methods that can be used to implement pattern-match
-transformations: 1. Imperative, C++ pattern-match and rewrite 2. Declarative,
-rule-based pattern-match and rewrite using table-driven
-[Declarative Rewrite Rules](../../DeclarativeRewrites.md) (DRR). Note that the
-use of DRR requires that the operations be defined using ODS, as described in
-[Chapter 2](Ch-2.md).
-
-# Optimize Transpose using C++ style pattern-match and rewrite
-
-Let's start with a simple pattern and try to eliminate a sequence of two
-transpose that cancel out: `transpose(transpose(X)) -> X`. Here is the
-corresponding Toy example:
-
-```Toy(.toy)
-def transpose_transpose(x) {
-  return transpose(transpose(x));
-}
-```
-
-Which corresponds to the following IR:
-
-```MLIR(.mlir)
-func @transpose_transpose(%arg0: tensor<*xf64>) -> tensor<*xf64> {
-  %0 = "toy.transpose"(%arg0) : (tensor<*xf64>) -> tensor<*xf64>
-  %1 = "toy.transpose"(%0) : (tensor<*xf64>) -> tensor<*xf64>
-  "toy.return"(%1) : (tensor<*xf64>) -> ()
-}
-```
-
-This is a good example of a transformation that is trivial to match on the Toy
-IR but that would be quite hard for LLVM to figure. For example, today Clang
-can't optimize away the temporary array, and the computation with the naive
-transpose is expressed with these loops:
-
-```c++
-#define N 100
-#define M 100
-
-void sink(void *);
-void double_transpose(int A[N][M]) {
-  int B[M][N];
-  for(int i = 0; i < N; ++i) {
-    for(int j = 0; j < M; ++j) {
-       B[j][i] = A[i][j];
-    }
-  }
-  for(int i = 0; i < N; ++i) {
-    for(int j = 0; j < M; ++j) {
-       A[i][j] = B[j][i];
-    }
-  }
-  sink(A);
-}
-```
-
-For a simple C++ approach to rewrite involving matching a tree-like pattern in
-the IR and replacing it with a different set of operations, we can plug into the
-MLIR `Canonicalizer` pass by implementing a `RewritePattern`:
-
-```c++
-/// Fold transpose(transpose(x)) -> x
-struct SimplifyRedundantTranspose : public mlir::OpRewritePattern<TransposeOp> {
-  /// We register this pattern to match every toy.transpose in the IR.
-  /// The "benefit" is used by the framework to order the patterns and process
-  /// them in order of profitability.
-  SimplifyRedundantTranspose(mlir::MLIRContext *context)
-      : OpRewritePattern<TransposeOp>(context, /*benefit=*/1) {}
-
-  /// This method is attempting to match a pattern and rewrite it. The rewriter
-  /// argument is the orchestrator of the sequence of rewrites. It is expected
-  /// to interact with it to perform any changes to the IR from here.
-  mlir::PatternMatchResult
-  matchAndRewrite(TransposeOp op,
-                  mlir::PatternRewriter &rewriter) const override {
-    // Look through the input of the current transpose.
-    mlir::Value *transposeInput = op.getOperand();
-    TransposeOp transposeInputOp =
-        llvm::dyn_cast_or_null<TransposeOp>(transposeInput->getDefiningOp());
-    // If the input is defined by another Transpose, bingo!
-    if (!transposeInputOp)
-      return matchFailure();
-
-    // Use the rewriter to perform the replacement
-    rewriter.replaceOp(op, {transposeInputOp.getOperand()}, {transposeInputOp});
-    return matchSuccess();
-  }
-};
-```
-
-The implementation of this rewriter is in `ToyCombine.cpp`. The
-[canonicalization pass](../../Canonicalization.md) applies transformations
-defined by operations in a greedy, iterative manner. To ensure that the
-canonicalization pass applies our new transform, we set
-[hasCanonicalizer = 1](../../OpDefinitions.md#hascanonicalizer) and register the
-pattern with the canonicalization framework.
-
-```c++
-// Register our patterns for rewrite by the Canonicalization framework.
-void TransposeOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<SimplifyRedundantTranspose>(context);
-}
-```
-
-We also need to update our main file, `toyc.cpp`, to add an optimization
-pipeline. In MLIR, the optimizations are run through a `PassManager` in a
-similar way to LLVM:
-
-```c++
-  mlir::PassManager pm(module.getContext());
-  pm.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
-```
-
-Finally, we can run `toyc-ch3 test/transpose_transpose.toy -emit=mlir -opt` and
-observe our pattern in action:
-
-```MLIR(.mlir)
-func @transpose_transpose(%arg0: tensor<*xf64>) -> tensor<*xf64> {
-  %0 = "toy.transpose"(%arg0) : (tensor<*xf64>) -> tensor<*xf64>
-  "toy.return"(%arg0) : (tensor<*xf64>) -> ()
-}
-```
-
-As expected, we now directly return the function argument, bypassing any
-transpose operation. However, one of the transposes still hasn't been
-eliminated. That is not ideal! What happened is that our pattern replaced the
-last transform with the function input and left behind the now dead transpose
-input. The Canonicalizer knows to clean up dead operations; however, MLIR
-conservatively assumes that operations may have side-effects. We can fix this by
-adding a new trait, `NoSideEffect`, to our `TransposeOp`:
-
-```TableGen(.td):
-def TransposeOp : Toy_Op<"transpose", [NoSideEffect]> {...}
-```
-
-Let's retry now `toyc-ch3 test/transpose_transpose.toy -emit=mlir -opt`:
-
-```MLIR(.mlir)
-func @transpose_transpose(%arg0: tensor<*xf64>) -> tensor<*xf64> {
-  "toy.return"(%arg0) : (tensor<*xf64>) -> ()
-}
-```
-
-Perfect! No `transpose` operation is left - the code is optimal.
-
-In the next section, we use DRR for pattern match optimizations associated with
-the Reshape op.
-
-# Optimize Reshapes using DRR
-
-Declarative, rule-based pattern-match and rewrite (DRR) is an operation
-DAG-based declarative rewriter that provides a table-based syntax for
-pattern-match and rewrite rules:
-
-```TableGen(.td):
-class Pattern<
-    dag sourcePattern, list<dag> resultPatterns,
-    list<dag> additionalConstraints = [],
-    dag benefitsAdded = (addBenefit 0)>;
-```
-
-A redundant reshape optimization similar to SimplifyRedundantTranspose can be
-expressed more simply using DRR as follows:
-
-```TableGen(.td):
-// Reshape(Reshape(x)) = Reshape(x)
-def ReshapeReshapeOptPattern : Pat<(ReshapeOp(ReshapeOp $arg)),
-                                   (ReshapeOp $arg)>;
-```
-
-The automatically generated C++ code corresponding to each of the DRR patterns
-can be found under path/to/BUILD/projects/mlir/examples/toy/Ch3/ToyCombine.inc.
-
-DRR also provides a method for adding argument constraints when the
-transformation is conditional on some properties of the arguments and results.
-An example is a transformation that eliminates reshapes when they are redundant,
-i.e. when the input and output shapes are identical.
-
-```TableGen(.td):
-def TypesAreIdentical : Constraint<CPred<"$0->getType() == $1->getType()">>;
-def RedundantReshapeOptPattern : Pat<
-  (ReshapeOp:$res $arg), (replaceWithValue $arg),
-  [(TypesAreIdentical $res, $arg)]>;
-```
-
-Some optimizations may require additional transformations on instruction
-arguments. This is achieved using NativeCodeCall, which allows for more complex
-transformations either by calling into a C++ helper function or by using inline
-C++. An example of such an optimization is FoldConstantReshape, where we
-optimize Reshape of a constant value by reshaping the constant in place and
-eliminating the reshape operation.
-
-```TableGen(.td):
-def ReshapeConstant : NativeCodeCall<"$0.reshape(($1->getType()).cast<ShapedType>())">;
-def FoldConstantReshapeOptPattern : Pat<
-  (ReshapeOp:$res (ConstantOp $arg)),
-  (ConstantOp (ReshapeConstant $arg, $res))>;
-```
-
-We demonstrate these reshape optimizations using the following
-trivialReshape.toy program:
-
-```c++
-def main() {
-  var a<2,1> = [1, 2];
-  var b<2,1> = a;
-  var c<2,1> = b;
-  print(c);
-}
-```
-
-```MLIR(.mlir)
-module {
-  func @main() {
-    %0 = "toy.constant"() {value = dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf64>}
-                           : () -> tensor<2xf64>
-    %1 = "toy.reshape"(%0) : (tensor<2xf64>) -> tensor<2x1xf64>
-    %2 = "toy.reshape"(%1) : (tensor<2x1xf64>) -> tensor<2x1xf64>
-    %3 = "toy.reshape"(%2) : (tensor<2x1xf64>) -> tensor<2x1xf64>
-    "toy.print"(%3) : (tensor<2x1xf64>) -> ()
-    "toy.return"() : () -> ()
-  }
-}
-```
-
-We can try to run `toyc-ch3 test/trivialReshape.toy -emit=mlir -opt` and observe
-our pattern in action:
-
-```MLIR(.mlir)
-module {
-  func @main() {
-    %0 = "toy.constant"() {value = dense<[[1.000000e+00], [2.000000e+00]]> \
-                           : tensor<2x1xf64>} : () -> tensor<2x1xf64>
-    "toy.print"(%0) : (tensor<2x1xf64>) -> ()
-    "toy.return"() : () -> ()
-  }
-}
-```
-
-As expected, no reshape operations remain after canonicalization.
-
-Further details on the declarative rewrite method can be found at
-[Table-driven Declarative Rewrite Rule (DRR)](../../DeclarativeRewrites.md).
-
-In this chapter, we saw how to use certain core transformations through always
-available hooks. In the [next chapter](Ch-4.md), we will see how to use generic
-solutions that scale better through Interfaces.
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-4.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-4.md
deleted file mode 100644
index b39380a15f4..00000000000
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-4.md
+++ /dev/null
@@ -1,387 +0,0 @@
-# Chapter 4: Enabling Generic Transformation with Interfaces
-
-[TOC]
-
-## Background: Grappling with an Extensible IR
-
-Through dialects, MLIR allows for the representation of many different levels of
-abstraction; the Toy dialect that we have previously defined is one such
-example. Though these different dialects may represent different abstractions,
-there is often a set of common transformations and analyses that we would like
-to perform. The problem that arises is that naively implementing each
-transformation for each dialect leads to large amounts of code duplication, as
-the internal algorithms are generally very similar, if not the same. We would
-like to provide the ability for transformations to opaquely hook into dialects
-like Toy to get the information they need.
-
-MLIR provides a set of always available-hooks for certain core transformations,
-as seen in the [previous chapter](Ch-3.md), where we registered some
-canonicalizations via a hook on our operations (`getCanonicalizationPatterns`).
-However, these types of hooks don't really scale well. Therefore, a more generic
-solution was designed, in the form of [interfaces](../../Interfaces.md), to make
-the MLIR infrastructure as extensible as the representation. Interfaces provide
-a generic mechanism for dialects and operations to provide information to a
-transformation or analysis.
-
-## Shape Inference: Preparing for Code Generation
-
-Our Toy IR currently operates on generic tensors, meaning that we don't know the
-shape of tensors other than during the initialization of constants. This
-complicates optimizations, as well as code generation. Fortunately, we can
-simply propagate the shapes through the computation until they are all known.
-The issue is how to handle calls to user-defined generic functions: every call
-site could deduce different shapes. One possibility would be to perform symbolic
-inference based on the argument types, but this would be hard to generalize if
-we were to introduce more control flow in the language. Another approach would
-be function specialization, where every call site with new argument shapes
-duplicates the called function and specializes it. The approach we take for Toy
-is to inline all of the function calls, then perform intraprocedural shape
-propagation.
-
-### Inlining
-
-Here we could write an inlining algorithm specifically designed for the Toy
-dialect, but that can become quite complicated depending on the level of
-complexity that we want. Disregarding cost modeling, the pure structural
-transformation is already complex to implement from scratch. Thankfully, MLIR
-provides a generic inliner algorithm that dialects can plug into. All we need to
-do in Toy is to provide the [interfaces](../../Interfaces.md) for the inliner to
-hook into.
-
-The first thing we need to do is to define the constraints on inlining
-operations in the Toy dialect. This information is provided through a
-[dialect interface](../../Interfaces.md#dialect-interfaces). This is essentially
-a class containing a set of virtual hooks for which a dialect may provide a
-specialization. In this case, the interface is `DialectInlinerInterface`.
-
-```c++
-/// This class defines the interface for handling inlining with Toy operations.
-/// We simplify inherit from the base interface class and provide a
-/// specialization of the necessary methods.
-struct ToyInlinerInterface : public DialectInlinerInterface {
-  using DialectInlinerInterface::DialectInlinerInterface;
-
-  /// This hook checks to see if the given operation is legal to inline into the
-  /// given region. For Toy this hook can simply return true, as all Toy
-  /// operations are inlinable.
-  bool isLegalToInline(Operation *, Region *,
-                       BlockAndValueMapping &) const final {
-    return true;
-  }
-
-  /// This hook is called when a terminator operation has been inlined. The only
-  /// terminator that we have in the Toy dialect is the return
-  /// operation(toy.return). We handle the return by replacing the values
-  /// previously returned by the call operation with the operands of the
-  /// return.
-  void handleTerminator(Operation *op,
-                        ArrayRef<Value *> valuesToRepl) const final {
-    // Only "toy.return" needs to be handled here.
-    auto returnOp = cast<ReturnOp>(op);
-
-    // Replace the values directly with the return operands.
-    assert(returnOp.getNumOperands() == valuesToRepl.size());
-    for (const auto &it : llvm::enumerate(returnOp.getOperands()))
-      valuesToRepl[it.index()]->replaceAllUsesWith(it.value());
-  }
-};
-```
-
-We then register our dialect interface directly on the Toy dialect, similarly to
-how we did for operations.
-
-```c++
-ToyDialect::ToyDialect(mlir::MLIRContext *ctx) : mlir::Dialect("toy", ctx) {
-  addInterfaces<ToyInlinerInterface>();
-}
-```
-
-Next, we need to provide a way for the inliner to know that `toy.generic_call`
-represents a call to a function. MLIR provides an
-[operation interface](../../Interfaces.md#operation-interfaces) that can be used
-to mark an operation as being "call-like". Unlike dialect interfaces, operation
-interfaces provide a more refined granularity of information that is specific
-and core to a single operation. The interface that we will be adding here is the
-`CallOpInterface`.
-
-To add this interface we just need to include the definition into our operation
-specification file (`Ops.td`):
-
-```.td
-#ifdef MLIR_CALLINTERFACES
-#else
-include "mlir/Analysis/CallInterfaces.td"
-#endif // MLIR_CALLINTERFACES
-```
-
-and add it to the traits list of `GenericCallOp`:
-
-```.td
-def GenericCallOp : Toy_Op<"generic_call",
-    [DeclareOpInterfaceMethods<CallOpInterface>]> {
-  ...
-}
-```
-
-In the above we also use the `DeclareOpInterfaceMethods` directive to
-auto-declare all of the interface methods in the class declaration of
-GenericCallOp. This means that we just need to provide a definition:
-
-```c++
-/// Return the callee of the generic call operation, this is required by the
-/// call interface.
-CallInterfaceCallable GenericCallOp::getCallableForCallee() {
-  return getAttrOfType<SymbolRefAttr>("callee");
-}
-
-/// Get the argument operands to the called function, this is required by the
-/// call interface.
-Operation::operand_range GenericCallOp::getArgOperands() { return inputs(); }
-```
-
-Now that the inliner has been informed about the Toy dialect, we can add the
-inliner pass to the pass manager for Toy:
-
-```c++
-  pm.addPass(mlir::createInlinerPass());
-```
-
-Now let's look at a working example:
-
-```mlir
-func @multiply_transpose(%arg0: tensor<*xf64>, %arg1: tensor<*xf64>) -> tensor<*xf64> {
-  %0 = "toy.transpose"(%arg0) : (tensor<*xf64>) -> tensor<*xf64>
-  %1 = "toy.transpose"(%arg1) : (tensor<*xf64>) -> tensor<*xf64>
-  %2 = "toy.mul"(%0, %1) : (tensor<*xf64>, tensor<*xf64>) -> tensor<*xf64>
-  "toy.return"(%2) : (tensor<*xf64>) -> ()
-}
-func @main() {
-  %0 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-  %1 = "toy.reshape"(%0) : (tensor<2x3xf64>) -> tensor<2x3xf64>
-  %2 = "toy.constant"() {value = dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00]> : tensor<6xf64>} : () -> tensor<6xf64>
-  %3 = "toy.reshape"(%2) : (tensor<6xf64>) -> tensor<2x3xf64>
-  %4 = "toy.generic_call"(%1, %3) {callee = @multiply_transpose} : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64>
-  %5 = "toy.generic_call"(%3, %1) {callee = @multiply_transpose} : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64>
-  "toy.print"(%5) : (tensor<*xf64>) -> ()
-  "toy.return"() : () -> ()
-}
-```
-
-We have two calls to multiple_transpose that we would like to inline into main,
-but if we look at the output nothing has changed. We are missing one last subtle
-piece: there is a hidden type conversion on the edge of the call. If we look at
-the above, the operands to the generic_call are of type `tensor<2x3xf64>`, while
-the inputs to the function expect `tensor<*xf64>`. To resolve this difference,
-the inliner expects an explicit cast operation to be inserted. For this, we need
-to add a new operation to the Toy dialect, `ToyCastOp`(toy.cast), to represent
-casts between two different shapes.
-
-```.td
-def CastOp : Toy_Op<"cast", [NoSideEffect, SameOperandsAndResultShape]> {
-  let summary = "shape cast operation";
-  let description = [{
-    The "cast" operation converts a tensor from one type to an equivalent type
-    without changing any data elements. The source and destination types
-    must both be tensor types with the same element type. If both are ranked
-    then the rank should be the same and static dimensions should match. The
-    operation is invalid if converting to a mismatching constant dimension.
-  }];
-
-  let arguments = (ins F64Tensor:$input);
-  let results = (outs F64Tensor:$output);
-
-  // Set the folder bit so that we can fold redundant cast operations.
-  let hasFolder = 1;
-}
-```
-
-We can then override the necessary hook on the ToyInlinerInterface to insert
-this for us when necessary:
-
-```c++
-struct ToyInlinerInterface : public DialectInlinerInterface {
-  ...
-
-  /// Attempts to materialize a conversion for a type mismatch between a call
-  /// from this dialect, and a callable region. This method should generate an
-  /// operation that takes 'input' as the only operand, and produces a single
-  /// result of 'resultType'. If a conversion can not be generated, nullptr
-  /// should be returned.
-  Operation *materializeCallConversion(OpBuilder &builder, Value *input,
-                                       Type resultType,
-                                       Location conversionLoc) const final {
-    return builder.create<CastOp>(conversionLoc, resultType, input);
-  }
-};
-```
-
-If we run the working example through the pipeline again, we get the expected:
-
-```mlir
-func @main() {
-  %0 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-  %1 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-  %2 = "toy.cast"(%1) : (tensor<2x3xf64>) -> tensor<*xf64>
-  %3 = "toy.cast"(%0) : (tensor<2x3xf64>) -> tensor<*xf64>
-  %4 = "toy.transpose"(%2) : (tensor<*xf64>) -> tensor<*xf64>
-  %5 = "toy.transpose"(%3) : (tensor<*xf64>) -> tensor<*xf64>
-  %6 = "toy.mul"(%4, %5) : (tensor<*xf64>, tensor<*xf64>) -> tensor<*xf64>
-  "toy.print"(%6) : (tensor<*xf64>) -> ()
-  "toy.return"() : () -> ()
-}
-```
-
-NOTE: The generic inliner will also perform simplifications, so the output may
-be a bit cleaner than expected.
-
-### Intraprocedural Shape Inference
-
-Now that we have inlined all of the functions, we are left with a main function
-containing a mix of static and dynamically shaped operations. We can now write a
-simple shape inference pass to propagate shapes intraprocedurally (within a
-single function). We could write this as a pass that directly encodes the
-constraints of the operations within the Toy dialect, but this seems like a good
-candidate for a transformation that could be written generically. As a good rule
-of thumb, it is best to express a transformation as generically as possible,
-such that it can be extended to other dialects in the future. There is no
-telling how many other dialects may have similar needs or encounter the same
-problems.
-
-For shape inference, if we break down the problem to its core, we really just
-want operations to tell us the expected outputs given a set of statically known
-inputs. (We can definitely get more complex than that, but for our needs we can
-keep it simple.) Given that this property is core to a specific operation, we
-can define an operation interface that can be specified on operations that need
-to have their result shapes inferred.
-
-Similarly to operations, we can also
-[define operation interfaces](../../OpDefinitions.md#operation-interfaces) using
-the operation definition specification (ODS) framework.
-
-The interface is defined by inheriting from `OpInterface`, which takes the name
-to be given to the generated C++ interface class as a template argument. For our
-purposes, we will name the generated class a simpler `ShapeInference`. We also
-provide a description for the interface.
-
-```.td
-def ShapeInferenceOpInterface : OpInterface<"ShapeInference"> {
-  let description = [{
-    Interface to access a registered method to infer the return types for an
-    operation that can be used during type inference.
-  }];
-}
-```
-
-Next, we define the interface methods that the operations will need to provide.
-An interface method is comprised of: a description; a C++ return type in string
-form; a method name in string form; and a few optional components, depending on
-the need. See the
-[ODS documentation](../../OpDefinitions.md#operation-interfaces) for more
-information.
-
-```.td
-def ShapeInferenceOpInterface : OpInterface<"ShapeInference"> {
-  let description = [{
-    Interface to access a registered method to infer the return types for an
-    operation that can be used during type inference.
-  }];
-
-  let methods = [
-    InterfaceMethod<"Infer and set the output shape for the current operation.",
-                    "void", "inferShapes">
-  ];
-}
-```
-
-Now that the interface is defined, we can add it to the necessary Toy operations
-in a similar way to how we added the `CallOpInterface` to the GenericCallOp:
-
-```
-def MulOp : Toy_Op<"mul",
-    [..., DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
-  ...
-}
-```
-
-Each of these operations will then need to provide a definition for the
-`inferShapes()` method. As an example, for the mul op, the result shape is
-inferred as the shape of the inputs.
-
-```c++
-/// Infer the output shape of the MulOp, this is required by the shape inference
-/// interface.
-void MulOp::inferShapes() { getResult()->setType(getOperand(0)->getType()); }
-```
-
-At this point, each of the necessary Toy operations provide a mechanism by which
-to infer their output shapes. The ShapeInferencePass is a FunctionPass: it will
-runs on each Function in isolation. MLIR also supports general
-[OperationPasses](../../WritingAPass.md#operation-pass) that run on any isolated
-operation (i.e. other function-like operations), but here our module only
-contains functions, so there is no need to generalize to all operations.
-
-Implementing such a pass is done by creating a class inheriting from
-`mlir::FunctionPass` and overriding the `runOnFunction()` method:
-
-```c++
-class ShapeInferencePass : public mlir::FunctionPass<ShapeInferencePass> {
-  void runOnFunction() override {
-    FuncOp function = getFunction();
-    ...
-  }
-};
-```
-
-The algorithm operates as follows:
-
-1.  Build a worklist containing all the operations that return a dynamically
-    shaped tensor: these are the operations that need shape inference.
-2.  Iterate on the worklist:
-    -   find an operation to process: the next ready operation in the worklist
-        has all of its arguments non-generic,
-    -   if no operation is found, break out of the loop,
-    -   remove the operation from the worklist,
-    -   infer the shape of its output from the argument types.
-3.  If the worklist is empty, the algorithm succeeded.
-
-When processing an operation, we query if it registered the `ShapeInference`
-interface.
-
-```c++
-  // Ask the operation to infer its output shapes.
-  LLVM_DEBUG(llvm::dbgs() << "Inferring shape for: " << *op << "\n");
-
-  /// We check if an operation has a particular interface by casting.
-  if (ShapeInference shapeOp = dyn_cast<ShapeInference>(op)) {
-    shapeOp.inferShapes();
-  } else {
-    op->emitError("unable to infer shape of operation without shape "
-                  "inference interface");
-    return signalPassFailure();
-  }
-```
-
-We can then add our pass to the pass manager:
-
-```c++
-  pm.addPass(mlir::createShapeInferencePass());
-```
-
-If we rerun our original example, we now get the following:
-
-```mlir
-func @main() {
-  %0 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-  %1 = "toy.transpose"(%0) : (tensor<2x3xf64>) -> tensor<3x2xf64>
-  %2 = "toy.mul"(%1, %1) : (tensor<3x2xf64>, tensor<3x2xf64>) -> tensor<3x2xf64>
-  "toy.print"(%2) : (tensor<3x2xf64>) -> ()
-  "toy.return"() : () -> ()
-}
-```
-
-You can build `toyc-ch4` and try yourself: `toyc-ch4
-test/Examples/Toy/Ch4/codegen.toy -emit=mlir -opt`.
-
-In the [next chapter](Ch-5.md), we will start the process of code generation by
-targeting a lower level dialect for optimizing some of the more compute-heavy
-Toy operations.
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-5.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-5.md
deleted file mode 100644
index 5573354aef1..00000000000
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-5.md
+++ /dev/null
@@ -1,357 +0,0 @@
-# Chapter 5: Partial Lowering to Lower-Level Dialects for Optimization
-
-[TOC]
-
-At this point, we are eager to generate actual code and see our Toy language
-take life. We will use LLVM to generate code, but just showing the LLVM builder
-interface here wouldn't be very exciting. Instead, we will show how to perform
-progressive lowering through a mix of dialects coexisting in the same function.
-
-To make it more interesting, in this chapter we will consider that we want to
-reuse existing optimizations implemented in a dialect optimizing affine
-transformations: `Affine`. This dialect is tailored to the computation-heavy
-part of the program and is limited: it doesn't support representing our
-`toy.print` builtin, for instance, neither should it! Instead, we can target
-`Affine` for the computation heavy part of Toy, and in the
-[next chapter](Ch-6.md) directly the `LLVM IR` dialect for lowering `print`. As
-part of this lowering, we will be lowering from the
-[TensorType](../../LangRef.md#tensor-type) that `Toy` operates on to the
-[MemRefType](../../LangRef.md#memref-type) that is indexed via an affine
-loop-nest. Tensors represent an abstract value-typed sequence of data, meaning
-that they don't live in any memory. MemRefs, on the other hand, represent lower
-level buffer access, as they are concrete references to a region of memory.
-
-# Dialect Conversions
-
-MLIR has many different dialects, so it is important to have a unified framework
-for [converting](../../Glossary.md#conversion) between them. This is where the
-`DialectConversion` framework comes into play. This framework allows for
-transforming a set of `illegal` operations to a set of `legal` ones. To use this
-framework, we need to provide two things (and an optional third):
-
-*   A [Conversion Target](../../DialectConversion.md#conversion-target)
-
-    -   This is the formal specification of what operations or dialects are
-        legal for the conversion. Operations that aren't legal will require
-        rewrite patterns to perform
-        [legalization](./../../Glossary.md#legalization).
-
-*   A set of
-    [Rewrite Patterns](../../DialectConversion.md#rewrite-pattern-specification)
-
-    -   These are the set of [patterns](../../QuickstartRewrites.md) used to
-        convert `illegal` operations into a set of zero or more `legal` ones.
-
-*   Optionally, a [Type Converter](../../DialectConversion.md#type-conversion).
-
-    -   If provided, this is used to convert the types of block arguments. We
-        won't be needing this for our conversion.
-
-## Conversion Target
-
-For our purposes, we want to convert the compute-intensive `Toy` operations into
-a combination of operations from the `Affine` `Standard` dialects for further
-optimization. To start off the lowering, we first define our conversion target:
-
-```c++
-void ToyToAffineLoweringPass::runOnFunction() {
-  // The first thing to define is the conversion target. This will define the
-  // final target for this lowering.
-  mlir::ConversionTarget target(getContext());
-
-  // We define the specific operations, or dialects, that are legal targets for
-  // this lowering. In our case, we are lowering to a combination of the
-  // `Affine` and `Standard` dialects.
-  target.addLegalDialect<mlir::AffineOpsDialect, mlir::StandardOpsDialect>();
-
-  // We also define the Toy dialect as Illegal so that the conversion will fail
-  // if any of these operations are *not* converted. Given that we actually want
-  // a partial lowering, we explicitly mark the Toy operations that don't want
-  // to lower, `toy.print`, as `legal`.
-  target.addIllegalDialect<ToyDialect>();
-  target.addLegalOp<PrintOp>();
-  ...
-}
-```
-
-## Conversion Patterns
-
-After the conversion target has been defined, we can define how to convert the
-`illegal` operations into `legal` ones. Similarly to the canonicalization
-framework introduced in [chapter 3](Ch-3.md), the
-[`DialectConversion` framework](../../DialectConversion.md) also uses
-[RewritePatterns](../../QuickstartRewrites.md) to perform the conversion logic.
-These patterns may be the `RewritePatterns` seen before or a new type of pattern
-specific to the conversion framework `ConversionPattern`. `ConversionPatterns`
-are different from traditional `RewritePatterns` in that they accept an
-additional `operands` parameter containing operands that have been
-remapped/replaced. This is used when dealing with type conversions, as the
-pattern will want to operate on values of the new type but match against the
-old. For our lowering, this invariant will be useful as it translates from the
-[TensorType](../../LangRef.md#tensor-type) currently being operated on to the
-[MemRefType](../../LangRef.md#memref-type). Let's look at a snippet of lowering
-the `toy.transpose` operation:
-
-```c++
-/// Lower the `toy.transpose` operation to an affine loop nest.
-struct TransposeOpLowering : public mlir::ConversionPattern {
-  TransposeOpLowering(mlir::MLIRContext *ctx)
-      : mlir::ConversionPattern(TransposeOp::getOperationName(), 1, ctx) {}
-
-  /// Match and rewrite the given `toy.transpose` operation, with the given
-  /// operands that have been remapped from `tensor<...>` to `memref<...>`.
-  mlir::PatternMatchResult
-  matchAndRewrite(mlir::Operation *op, ArrayRef<mlir::Value *> operands,
-                  mlir::ConversionPatternRewriter &rewriter) const final {
-    auto loc = op->getLoc();
-
-    // Call to a helper function that will lower the current operation to a set
-    // of affine loops. We provide a functor that operates on the remapped
-    // operands, as well as the loop induction variables for the inner most
-    // loop body.
-    lowerOpToLoops(
-        op, operands, rewriter,
-        [loc](mlir::PatternRewriter &rewriter,
-              ArrayRef<mlir::Value *> memRefOperands,
-              ArrayRef<mlir::Value *> loopIvs) {
-          // Generate an adaptor for the remapped operands of the TransposeOp.
-          // This allows for using the nice named accessors that are generated
-          // by the ODS. This adaptor is automatically provided by the ODS
-          // framework.
-          TransposeOpOperandAdaptor transposeAdaptor(memRefOperands);
-          mlir::Value *input = transposeAdaptor.input();
-
-          // Transpose the elements by generating a load from the reverse
-          // indices.
-          SmallVector<mlir::Value *, 2> reverseIvs(llvm::reverse(loopIvs));
-          return rewriter.create<mlir::AffineLoadOp>(loc, input, reverseIvs);
-        });
-    return matchSuccess();
-  }
-};
-```
-
-Now we can prepare the list of patterns to use during the lowering process:
-
-```c++
-void ToyToAffineLoweringPass::runOnFunction() {
-  ...
-
-  // Now that the conversion target has been defined, we just need to provide
-  // the set of patterns that will lower the Toy operations.
-  mlir::OwningRewritePatternList patterns;
-  patterns.insert<..., TransposeOpLowering>(&getContext());
-
-  ...
-```
-
-## Partial Lowering
-
-Once the patterns have been defined, we can perform the actual lowering. The
-`DialectConversion` framework provides several different modes of lowering, but,
-for our purposes, we will perform a partial lowering, as we will not convert
-`toy.print` at this time.
-
-```c++
-void ToyToAffineLoweringPass::runOnFunction() {
-  // The first thing to define is the conversion target. This will define the
-  // final target for this lowering.
-  mlir::ConversionTarget target(getContext());
-
-  // We define the specific operations, or dialects, that are legal targets for
-  // this lowering. In our case, we are lowering to a combination of the
-  // `Affine` and `Standard` dialects.
-  target.addLegalDialect<mlir::AffineOpsDialect, mlir::StandardOpsDialect>();
-
-  // We also define the Toy dialect as Illegal so that the conversion will fail
-  // if any of these operations are *not* converted. Given that we actually want
-  // a partial lowering, we explicitly mark the Toy operations that don't want
-  // to lower, `toy.print`, as `legal`.
-  target.addIllegalDialect<ToyDialect>();
-  target.addLegalOp<PrintOp>();
-
-  // Now that the conversion target has been defined, we just need to provide
-  // the set of patterns that will lower the Toy operations.
-  mlir::OwningRewritePatternList patterns;
-  patterns.insert<..., TransposeOpLowering>(&getContext());
-
-  // With the target and rewrite patterns defined, we can now attempt the
-  // conversion. The conversion will signal failure if any of our `illegal`
-  // operations were not converted successfully.
-  auto function = getFunction();
-  if (mlir::failed(mlir::applyPartialConversion(function, target, patterns)))
-    signalPassFailure();
-}
-```
-
-### Design Considerations With Partial Lowering
-
-Before diving into the result of our lowering, this is a good time to discuss
-potential design considerations when it comes to partial lowering. In our
-lowering, we transform from a value-type, TensorType, to an allocated
-(buffer-like) type, MemRefType. However, given that we do not lower the
-`toy.print` operation, we need to temporarily bridge these two worlds. There are
-many ways to go about this, each with their own tradeoffs:
-
-*   Generate `load` operations from the buffer
-
-One option is to generate `load` operations from the buffer type to materialize
-an instance of the value type. This allows for the definition of the `toy.print`
-operation to remain unchanged. The downside to this approach is that the
-optimizations on the `affine` dialect are limited, because the `load` will
-actually involve a full copy that is only visible *after* our optimizations have
-been performed.
-
-*   Generate a new version of `toy.print` that operates on the lowered type
-
-Another option would be to have another, lowered, variant of `toy.print` that
-operates on the lowered type. The benefit of this option is that there is no
-hidden, unnecessary copy to the optimizer. The downside is that another
-operation definition is needed that may duplicate many aspects of the first.
-Defining a base class in [ODS](../../OpDefinitions.md) may simplify this, but
-you still need to treat these operations separately.
-
-*   Update `toy.print` to allow for operating on the lowered type
-
-A third option is to update the current definition of `toy.print` to allow for
-operating the on the lowered type. The benefit of this approach is that it is
-simple, does not introduce an additional hidden copy, and does not require
-another operation definition. The downside to this option is that it requires
-mixing abstraction levels in the `Toy` dialect.
-
-For the sake of simplicity, we will use the third option for this lowering. This
-involves updating the type constraints on the PrintOp in the operation
-definition file:
-
-```tablegen
-def PrintOp : Toy_Op<"print"> {
-  ...
-
-  // The print operation takes an input tensor to print.
-  // We also allow a F64MemRef to enable interop during partial lowering.
-  let arguments = (ins AnyTypeOf<[F64Tensor, F64MemRef]>:$input);
-}
-```
-
-## Complete Toy Example
-
-Looking back at our current working example:
-
-```.mlir
-func @main() {
-  %0 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-  %2 = "toy.transpose"(%0) : (tensor<2x3xf64>) -> tensor<3x2xf64>
-  %3 = "toy.mul"(%2, %2) : (tensor<3x2xf64>, tensor<3x2xf64>) -> tensor<3x2xf64>
-  "toy.print"(%3) : (tensor<3x2xf64>) -> ()
-  "toy.return"() : () -> ()
-}
-```
-
-With affine lowering added to our pipeline, we can now generate:
-
-```mlir
-func @main() {
-  %cst = constant 1.000000e+00 : f64
-  %cst_0 = constant 2.000000e+00 : f64
-  %cst_1 = constant 3.000000e+00 : f64
-  %cst_2 = constant 4.000000e+00 : f64
-  %cst_3 = constant 5.000000e+00 : f64
-  %cst_4 = constant 6.000000e+00 : f64
-
-  // Allocating buffers for the inputs and outputs.
-  %0 = alloc() : memref<3x2xf64>
-  %1 = alloc() : memref<3x2xf64>
-  %2 = alloc() : memref<2x3xf64>
-
-  // Initialize the input buffer with the constant values.
-  affine.store %cst, %2[0, 0] : memref<2x3xf64>
-  affine.store %cst_0, %2[0, 1] : memref<2x3xf64>
-  affine.store %cst_1, %2[0, 2] : memref<2x3xf64>
-  affine.store %cst_2, %2[1, 0] : memref<2x3xf64>
-  affine.store %cst_3, %2[1, 1] : memref<2x3xf64>
-  affine.store %cst_4, %2[1, 2] : memref<2x3xf64>
-
-  // Load the transpose value from the input buffer and store it into the
-  // next input buffer.
-  affine.for %arg0 = 0 to 3 {
-    affine.for %arg1 = 0 to 2 {
-      %3 = affine.load %2[%arg1, %arg0] : memref<2x3xf64>
-      affine.store %3, %1[%arg0, %arg1] : memref<3x2xf64>
-    }
-  }
-
-  // Multiply and store into the output buffer.
-  affine.for %arg0 = 0 to 2 {
-    affine.for %arg1 = 0 to 3 {
-      %3 = affine.load %1[%arg0, %arg1] : memref<3x2xf64>
-      %4 = affine.load %1[%arg0, %arg1] : memref<3x2xf64>
-      %5 = mulf %3, %4 : f64
-      affine.store %5, %0[%arg0, %arg1] : memref<3x2xf64>
-    }
-  }
-
-  // Print the value held by the buffer.
-  "toy.print"(%0) : (memref<3x2xf64>) -> ()
-  dealloc %2 : memref<2x3xf64>
-  dealloc %1 : memref<3x2xf64>
-  dealloc %0 : memref<3x2xf64>
-  return
-}
-```
-
-## Taking Advantage of Affine Optimization
-
-Our naive lowering is correct, but it leaves a lot to be desired with regards to
-efficiency. For example, the lowering of `toy.mul` has generated some redundant
-loads. Let's look at how adding a few existing optimizations to the pipeline can
-help clean this up. Adding the `LoopFusion` and `MemRefDataFlowOpt` passes to
-the pipeline gives the following result:
-
-```mlir
-func @main() {
-  %cst = constant 1.000000e+00 : f64
-  %cst_0 = constant 2.000000e+00 : f64
-  %cst_1 = constant 3.000000e+00 : f64
-  %cst_2 = constant 4.000000e+00 : f64
-  %cst_3 = constant 5.000000e+00 : f64
-  %cst_4 = constant 6.000000e+00 : f64
-
-  // Allocating buffers for the inputs and outputs.
-  %0 = alloc() : memref<3x2xf64>
-  %1 = alloc() : memref<2x3xf64>
-
-  // Initialize the input buffer with the constant values.
-  affine.store %cst, %1[0, 0] : memref<2x3xf64>
-  affine.store %cst_0, %1[0, 1] : memref<2x3xf64>
-  affine.store %cst_1, %1[0, 2] : memref<2x3xf64>
-  affine.store %cst_2, %1[1, 0] : memref<2x3xf64>
-  affine.store %cst_3, %1[1, 1] : memref<2x3xf64>
-  affine.store %cst_4, %1[1, 2] : memref<2x3xf64>
-
-  affine.for %arg0 = 0 to 3 {
-    affine.for %arg1 = 0 to 2 {
-      // Load the transpose value from the input buffer.
-      %2 = affine.load %1[%arg1, %arg0] : memref<2x3xf64>
-
-      // Multiply and store into the output buffer.
-      %3 = mulf %2, %2 : f64
-      affine.store %3, %0[%arg0, %arg1] : memref<3x2xf64>
-    }
-  }
-
-  // Print the value held by the buffer.
-  "toy.print"(%0) : (memref<3x2xf64>) -> ()
-  dealloc %1 : memref<2x3xf64>
-  dealloc %0 : memref<3x2xf64>
-  return
-}
-```
-
-Here, we can see that a redundant allocation was removed, the two loop nests
-were fused, and some unnecessary `load`s were removed. You can build `toyc-ch5`
-and try yourself: `toyc-ch5 test/lowering.toy -emit=mlir-affine`. We can also
-check our optimizations by adding `-opt`.
-
-In this chapter we explored some aspects of partial lowering, with the intent to
-optimize. In the [next chapter](Ch-6.md) we will continue the discussion about
-dialect conversion by targeting LLVM for code generation.
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-6.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-6.md
deleted file mode 100644
index 4f1f3177811..00000000000
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-6.md
+++ /dev/null
@@ -1,323 +0,0 @@
-# Chapter 6: Lowering to LLVM and CodeGeneration
-
-[TOC]
-
-In the [previous chapter](Ch-5.md), we introduced the
-[dialect conversion](../../DialectConversion.md) framework and partially lowered
-many of the `Toy` operations to affine loop nests for optimization. In this
-chapter, we will finally lower to LLVM for code generation.
-
-# Lowering to LLVM
-
-For this lowering, we will again use the dialect conversion framework to perform
-the heavy lifting. However, this time, we will be performing a full conversion
-to the [LLVM dialect](../../Dialects/LLVM.md). Thankfully, we have already
-lowered all but one of the `toy` operations, with the last being `toy.print`.
-Before going over the conversion to LLVM, let's lower the `toy.print` operation.
-We will lower this operation to a non-affine loop nest that invokes `printf` for
-each element. Note that, because the dialect conversion framework supports
-[transitive lowering](Glossary.md#transitive-lowering), we don't need to
-directly emit operations in the LLVM dialect. By transitive lowering, we mean
-that the conversion framework may apply multiple patterns to fully legalize an
-operation. In this example, we are generating a structured loop nest instead of
-the branch-form in the LLVM dialect. As long as we then have a lowering from the
-loop operations to LLVM, the lowering will still succeed.
-
-During lowering we can get, or build, the declaration for printf as so:
-
-```c++
-/// Return a symbol reference to the printf function, inserting it into the
-/// module if necessary.
-static FlatSymbolRefAttr getOrInsertPrintf(PatternRewriter &rewriter,
-                                           ModuleOp module,
-                                           LLVM::LLVMDialect *llvmDialect) {
-  auto *context = module.getContext();
-  if (module.lookupSymbol<LLVM::LLVMFuncOp>("printf"))
-    return SymbolRefAttr::get("printf", context);
-
-  // Create a function declaration for printf, the signature is:
-  //   * `i32 (i8*, ...)`
-  auto llvmI32Ty = LLVM::LLVMType::getInt32Ty(llvmDialect);
-  auto llvmI8PtrTy = LLVM::LLVMType::getInt8PtrTy(llvmDialect);
-  auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmI32Ty, llvmI8PtrTy,
-                                                  /*isVarArg=*/true);
-
-  // Insert the printf function into the body of the parent module.
-  PatternRewriter::InsertionGuard insertGuard(rewriter);
-  rewriter.setInsertionPointToStart(module.getBody());
-  rewriter.create<LLVM::LLVMFuncOp>(module.getLoc(), "printf", llvmFnType);
-  return SymbolRefAttr::get("printf", context);
-}
-```
-
-Now that the lowering for the printf operation has been defined, we can specify
-the components necessary for the lowering. These are largely the same as the
-components defined in the [previous chapter](Ch-5.md).
-
-## Conversion Target
-
-For this conversion, aside from the top-level module, we will be lowering
-everything to the LLVM dialect.
-
-```c++
-  mlir::ConversionTarget target(getContext());
-  target.addLegalDialect<mlir::LLVM::LLVMDialect>();
-  target.addLegalOp<mlir::ModuleOp, mlir::ModuleTerminatorOp>();
-```
-
-## Type Converter
-
-This lowering will also transform the MemRef types which are currently being
-operated on into a representation in LLVM. To perform this conversion, we use a
-TypeConverter as part of the lowering. This converter specifies how one type
-maps to another. This is necessary now that we are performing more complicated
-lowerings involving block arguments. Given that we don't have any
-Toy-dialect-specific types that need to be lowered, the default converter is
-enough for our use case.
-
-```c++
-  LLVMTypeConverter typeConverter(&getContext());
-```
-
-## Conversion Patterns
-
-Now that the conversion target has been defined, we need to provide the patterns
-used for lowering. At this point in the compilation process, we have a
-combination of `toy`, `affine`, and `std` operations. Luckily, the `std` and
-`affine` dialects already provide the set of patterns needed to transform them
-into LLVM dialect. These patterns allow for lowering the IR in multiple stages
-by relying on [transitive lowering](Glossary.md#transitive-lowering).
-
-```c++
-  mlir::OwningRewritePatternList patterns;
-  mlir::populateAffineToStdConversionPatterns(patterns, &getContext());
-  mlir::populateLoopToStdConversionPatterns(patterns, &getContext());
-  mlir::populateStdToLLVMConversionPatterns(typeConverter, patterns);
-
-  // The only remaining operation to lower from the `toy` dialect, is the
-  // PrintOp.
-  patterns.insert<PrintOpLowering>(&getContext());
-```
-
-## Full Lowering
-
-We want to completely lower to LLVM, so we use a `FullConversion`. This ensures
-that only legal operations will remain after the conversion.
-
-```c++
-  mlir::ModuleOp module = getModule();
-  if (mlir::failed(mlir::applyFullConversion(module, target, patterns,
-                                             &typeConverter)))
-    signalPassFailure();
-```
-
-Looking back at our current working example:
-
-```.mlir
-func @main() {
-  %0 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-  %2 = "toy.transpose"(%0) : (tensor<2x3xf64>) -> tensor<3x2xf64>
-  %3 = "toy.mul"(%2, %2) : (tensor<3x2xf64>, tensor<3x2xf64>) -> tensor<3x2xf64>
-  "toy.print"(%3) : (tensor<3x2xf64>) -> ()
-  "toy.return"() : () -> ()
-}
-```
-
-We can now lower down to the LLVM dialect, which produces the following code:
-
-```.mlir
-llvm.func @free(!llvm<"i8*">)
-llvm.func @printf(!llvm<"i8*">, ...) -> !llvm.i32
-llvm.func @malloc(!llvm.i64) -> !llvm<"i8*">
-llvm.func @main() {
-  %0 = llvm.mlir.constant(1.000000e+00 : f64) : !llvm.double
-  %1 = llvm.mlir.constant(2.000000e+00 : f64) : !llvm.double
-
-  ...
-
-^bb16:
-  %221 = llvm.extractvalue %25[0 : index] : !llvm<"{ double*, i64, [2 x i64], [2 x i64] }">
-  %222 = llvm.mlir.constant(0 : index) : !llvm.i64
-  %223 = llvm.mlir.constant(2 : index) : !llvm.i64
-  %224 = llvm.mul %214, %223 : !llvm.i64
-  %225 = llvm.add %222, %224 : !llvm.i64
-  %226 = llvm.mlir.constant(1 : index) : !llvm.i64
-  %227 = llvm.mul %219, %226 : !llvm.i64
-  %228 = llvm.add %225, %227 : !llvm.i64
-  %229 = llvm.getelementptr %221[%228] : (!llvm<"double*">, !llvm.i64) -> !llvm<"double*">
-  %230 = llvm.load %229 : !llvm<"double*">
-  %231 = llvm.call @printf(%207, %230) : (!llvm<"i8*">, !llvm.double) -> !llvm.i32
-  %232 = llvm.add %219, %218 : !llvm.i64
-  llvm.br ^bb15(%232 : !llvm.i64)
-
-  ...
-
-^bb18:
-  %235 = llvm.extractvalue %65[0 : index] : !llvm<"{ double*, i64, [2 x i64], [2 x i64] }">
-  %236 = llvm.bitcast %235 : !llvm<"double*"> to !llvm<"i8*">
-  llvm.call @free(%236) : (!llvm<"i8*">) -> ()
-  %237 = llvm.extractvalue %45[0 : index] : !llvm<"{ double*, i64, [2 x i64], [2 x i64] }">
-  %238 = llvm.bitcast %237 : !llvm<"double*"> to !llvm<"i8*">
-  llvm.call @free(%238) : (!llvm<"i8*">) -> ()
-  %239 = llvm.extractvalue %25[0 : index] : !llvm<"{ double*, i64, [2 x i64], [2 x i64] }">
-  %240 = llvm.bitcast %239 : !llvm<"double*"> to !llvm<"i8*">
-  llvm.call @free(%240) : (!llvm<"i8*">) -> ()
-  llvm.return
-}
-```
-
-See [Conversion to the LLVM IR Dialect](../../ConversionToLLVMDialect.md) for
-more in-depth details on lowering to the LLVM dialect.
-
-# CodeGen: Getting Out of MLIR
-
-At this point we are right at the cusp of code generation. We can generate code
-in the LLVM dialect, so now we just need to export to LLVM IR and setup a JIT to
-run it.
-
-## Emitting LLVM IR
-
-Now that our module is comprised only of operations in the LLVM dialect, we can
-export to LLVM IR. To do this programmatically, we can invoke the following
-utility:
-
-```c++
-  std::unique_ptr<llvm::Module> llvmModule = mlir::translateModuleToLLVMIR(module);
-  if (!llvmModule)
-    /* ... an error was encountered ... */
-```
-
-Exporting our module to LLVM IR generates:
-
-```.llvm
-define void @main() {
-  ...
-
-102:
-  %103 = extractvalue { double*, i64, [2 x i64], [2 x i64] } %8, 0
-  %104 = mul i64 %96, 2
-  %105 = add i64 0, %104
-  %106 = mul i64 %100, 1
-  %107 = add i64 %105, %106
-  %108 = getelementptr double, double* %103, i64 %107
-  %109 = load double, double* %108
-  %110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double %109)
-  %111 = add i64 %100, 1
-  br label %99
-
-  ...
-
-115:
-  %116 = extractvalue { double*, i64, [2 x i64], [2 x i64] } %24, 0
-  %117 = bitcast double* %116 to i8*
-  call void @free(i8* %117)
-  %118 = extractvalue { double*, i64, [2 x i64], [2 x i64] } %16, 0
-  %119 = bitcast double* %118 to i8*
-  call void @free(i8* %119)
-  %120 = extractvalue { double*, i64, [2 x i64], [2 x i64] } %8, 0
-  %121 = bitcast double* %120 to i8*
-  call void @free(i8* %121)
-  ret void
-}
-```
-
-If we enable optimization on the generated LLVM IR, we can trim this down quite
-a bit:
-
-```.llvm
-define void @main()
-  %0 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double 1.000000e+00)
-  %1 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double 1.600000e+01)
-  %putchar = tail call i32 @putchar(i32 10)
-  %2 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double 4.000000e+00)
-  %3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double 2.500000e+01)
-  %putchar.1 = tail call i32 @putchar(i32 10)
-  %4 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double 9.000000e+00)
-  %5 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @frmt_spec, i64 0, i64 0), double 3.600000e+01)
-  %putchar.2 = tail call i32 @putchar(i32 10)
-  ret void
-}
-
-```
-
-The full code listing for dumping LLVM IR can be found in `Ch6/toy.cpp` in the
-`dumpLLVMIR()` function:
-
-```c++
-
-int dumpLLVMIR(mlir::ModuleOp module) {
-  // Translate the module, that contains the LLVM dialect, to LLVM IR.
-  auto llvmModule = mlir::translateModuleToLLVMIR(module);
-  if (!llvmModule) {
-    llvm::errs() << "Failed to emit LLVM IR\n";
-    return -1;
-  }
-
-  // Initialize LLVM targets.
-  llvm::InitializeNativeTarget();
-  llvm::InitializeNativeTargetAsmPrinter();
-  mlir::ExecutionEngine::setupTargetTriple(llvmModule.get());
-
-  /// Optionally run an optimization pipeline over the llvm module.
-  auto optPipeline = mlir::makeOptimizingTransformer(
-      /*optLevel=*/EnableOpt ? 3 : 0, /*sizeLevel=*/0,
-      /*targetMachine=*/nullptr);
-  if (auto err = optPipeline(llvmModule.get())) {
-    llvm::errs() << "Failed to optimize LLVM IR " << err << "\n";
-    return -1;
-  }
-  llvm::errs() << *llvmModule << "\n";
-  return 0;
-}
-```
-
-## Setting up a JIT
-
-Setting up a JIT to run the module containing the LLVM dialect can be done using
-the `mlir::ExecutionEngine` infrastructure. This is a utility wrapper around
-LLVM's JIT that accepts `.mlir` as input. The full code listing for setting up
-the JIT can be found in `Ch6/toy.cpp` in the `runJit()` function:
-
-```c++
-int runJit(mlir::ModuleOp module) {
-  // Initialize LLVM targets.
-  llvm::InitializeNativeTarget();
-  llvm::InitializeNativeTargetAsmPrinter();
-
-  // An optimization pipeline to use within the execution engine.
-  auto optPipeline = mlir::makeOptimizingTransformer(
-      /*optLevel=*/EnableOpt ? 3 : 0, /*sizeLevel=*/0,
-      /*targetMachine=*/nullptr);
-
-  // Create an MLIR execution engine. The execution engine eagerly JIT-compiles
-  // the module.
-  auto maybeEngine = mlir::ExecutionEngine::create(module, optPipeline);
-  assert(maybeEngine && "failed to construct an execution engine");
-  auto &engine = maybeEngine.get();
-
-  // Invoke the JIT-compiled function.
-  auto invocationResult = engine->invoke("main");
-  if (invocationResult) {
-    llvm::errs() << "JIT invocation failed\n";
-    return -1;
-  }
-
-  return 0;
-}
-```
-
-You can play around with it from the build directory:
-
-```sh
-$ echo 'def main() { print([[1, 2], [3, 4]]); }' | ./bin/toyc-ch6 -emit=jit
-1.000000 2.000000
-3.000000 4.000000
-```
-
-You can also play with `-emit=mlir`, `-emit=mlir-affine`, `-emit=mlir-llvm`, and
-`-emit=llvm` to compare the various levels of IR involved. Also try options like
-[`--print-ir-after-all`](../../WritingAPass.md#ir-printing) to track the
-evolution of the IR throughout the pipeline.
-
-So far, we have worked with primitive data types. In the
-[next chapter](Ch-7.md), we will add a composite `struct` type.
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-7.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-7.md
deleted file mode 100644
index 398983ac469..00000000000
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-7.md
+++ /dev/null
@@ -1,539 +0,0 @@
-# Chapter 7: Adding a Composite Type to Toy
-
-[TOC]
-
-In the [previous chapter](Ch-6.md), we demonstrated an end-to-end compilation
-flow from our Toy front-end to LLVM IR. In this chapter, we will extend the Toy
-language to support a new composite `struct` type.
-
-## Defining a `struct` in Toy
-
-The first thing we need to define is the interface of this type in our `toy`
-source language. The general syntax of a `struct` type in Toy is as follows:
-
-```toy
-# A struct is defined by using the `struct` keyword followed by a name.
-struct MyStruct {
-  # Inside of the struct is a list of variable declarations without initializers
-  # or shapes, which may also be other previously defined structs.
-  var a;
-  var b;
-}
-```
-
-Structs may now be used in functions as variables or parameters by using the
-name of the struct instead of `var`. The members of the struct are accessed via
-a `.` access operator. Values of `struct` type may be initialized with a
-composite initializer, or a comma-separated list of other initializers
-surrounded by `{}`. An example is shown below:
-
-```toy
-struct Struct {
-  var a;
-  var b;
-}
-
-# User defined generic function may operate on struct types as well.
-def multiply_transpose(Struct value) {
-  # We can access the elements of a struct via the '.' operator.
-  return transpose(value.a) * transpose(value.b);
-}
-
-def main() {
-  # We initialize struct values using a composite initializer.
-  Struct value = {[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]};
-
-  # We pass these arguments to functions like we do with variables.
-  var c = multiply_transpose(value);
-  print(c);
-}
-```
-
-## Defining a `struct` in MLIR
-
-In MLIR, we will also need a representation for our struct types. MLIR does not
-provide a type that does exactly what we need, so we will need to define our
-own. We will simply define our `struct` as an unnamed container of a set of
-element types. The name of the `struct` and its elements are only useful for the
-AST of our `toy` compiler, so we don't need to encode it in the MLIR
-representation.
-
-### Defining the Type Class
-
-#### Reserving a Range of Type Kinds
-
-Types in MLIR rely on having a unique `kind` value to ensure that casting checks
-remain extremely efficient
-([rationale](../../Rationale.md#reserving-dialect-type-kinds)). For `toy`, this
-means we need to explicitly reserve a static range of type `kind` values in the
-symbol registry file
-[DialectSymbolRegistry](https://github.com/tensorflow/mlir/blob/master/include/mlir/IR/DialectSymbolRegistry.def).
-
-```c++
-DEFINE_SYM_KIND_RANGE(LINALG) // Linear Algebra Dialect
-DEFINE_SYM_KIND_RANGE(TOY)    // Toy language (tutorial) Dialect
-
-// The following ranges are reserved for experimenting with MLIR dialects in a
-// private context without having to register them here.
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_0)
-```
-
-These definitions will provide a range in the Type::Kind enum to use when
-defining the derived types.
-
-```c++
-/// Create a local enumeration with all of the types that are defined by Toy.
-namespace ToyTypes {
-enum Types {
-  Struct = mlir::Type::FIRST_TOY_TYPE,
-};
-} // end namespace ToyTypes
-```
-
-#### Defining the Type Class
-
-As mentioned in [chapter 2](Ch-2.md), [`Type`](../../LangRef.md#type-system)
-objects in MLIR are value-typed and rely on having an internal storage object
-that holds the actual data for the type. The `Type` class in itself acts as a
-simple wrapper around an internal `TypeStorage` object that is uniqued within an
-instance of an `MLIRContext`. When constructing a `Type`, we are internally just
-constructing and uniquing an instance of a storage class.
-
-When defining a new `Type` that requires additional information beyond just the
-`kind` (e.g. the `struct` type, which requires additional information to hold
-the element types), we will need to provide a derived storage class. The
-`primitive` types that don't have any additional data (e.g. the
-[`index` type](../../LangRef.md#index-type)) don't require a storage class.
-
-##### Defining the Storage Class
-
-Type storage objects contain all of the data necessary to construct and unique a
-type instance. Derived storage classes must inherit from the base
-`mlir::TypeStorage` and provide a set of aliases and hooks that will be used by
-the `MLIRContext` for uniquing. Below is the definition of the storage instance
-for our `struct` type, with each of the necessary requirements detailed inline:
-
-```c++
-/// This class represents the internal storage of the Toy `StructType`.
-struct StructTypeStorage : public mlir::TypeStorage {
-  /// The `KeyTy` is a required type that provides an interface for the storage
-  /// instance. This type will be used when uniquing an instance of the type
-  /// storage. For our struct type, we will unique each instance structurally on
-  /// the elements that it contains.
-  using KeyTy = llvm::ArrayRef<mlir::Type>;
-
-  /// A constructor for the type storage instance.
-  StructTypeStorage(llvm::ArrayRef<mlir::Type> elementTypes)
-      : elementTypes(elementTypes) {}
-
-  /// Define the comparison function for the key type with the current storage
-  /// instance. This is used when constructing a new instance to ensure that we
-  /// haven't already uniqued an instance of the given key.
-  bool operator==(const KeyTy &key) const { return key == elementTypes; }
-
-  /// Define a hash function for the key type. This is used when uniquing
-  /// instances of the storage.
-  /// Note: This method isn't necessary as both llvm::ArrayRef and mlir::Type
-  /// have hash functions available, so we could just omit this entirely.
-  static llvm::hash_code hashKey(const KeyTy &key) {
-    return llvm::hash_value(key);
-  }
-
-  /// Define a construction function for the key type from a set of parameters.
-  /// These parameters will be provided when constructing the storage instance
-  /// itself, see the `StructType::get` method further below.
-  /// Note: This method isn't necessary because KeyTy can be directly
-  /// constructed with the given parameters.
-  static KeyTy getKey(llvm::ArrayRef<mlir::Type> elementTypes) {
-    return KeyTy(elementTypes);
-  }
-
-  /// Define a construction method for creating a new instance of this storage.
-  /// This method takes an instance of a storage allocator, and an instance of a
-  /// `KeyTy`. The given allocator must be used for *all* necessary dynamic
-  /// allocations used to create the type storage and its internal.
-  static StructTypeStorage *construct(mlir::TypeStorageAllocator &allocator,
-                                      const KeyTy &key) {
-    // Copy the elements from the provided `KeyTy` into the allocator.
-    llvm::ArrayRef<mlir::Type> elementTypes = allocator.copyInto(key);
-
-    // Allocate the storage instance and construct it.
-    return new (allocator.allocate<StructTypeStorage>())
-        StructTypeStorage(elementTypes);
-  }
-
-  /// The following field contains the element types of the struct.
-  llvm::ArrayRef<mlir::Type> elementTypes;
-};
-```
-
-##### Defining the Type Class
-
-With the storage class defined, we can add the definition for the user-visible
-`StructType` class. This is the class that we will actually interface with.
-
-```c++
-/// This class defines the Toy struct type. It represents a collection of
-/// element types. All derived types in MLIR must inherit from the CRTP class
-/// 'Type::TypeBase'. It takes as template parameters the concrete type
-/// (StructType), the base class to use (Type), and the storage class
-/// (StructTypeStorage).
-class StructType : public mlir::Type::TypeBase<StructType, mlir::Type,
-                                               StructTypeStorage> {
-public:
-  /// Inherit some necessary constructors from 'TypeBase'.
-  using Base::Base;
-
-  /// This static method is used to support type inquiry through isa, cast,
-  /// and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == ToyTypes::Struct; }
-
-  /// Create an instance of a `StructType` with the given element types. There
-  /// *must* be at least one element type.
-  static StructType get(llvm::ArrayRef<mlir::Type> elementTypes) {
-    assert(!elementTypes.empty() && "expected at least 1 element type");
-
-    // Call into a helper 'get' method in 'TypeBase' to get a uniqued instance
-    // of this type. The first two parameters are the context to unique in and
-    // the kind of the type. The parameters after the type kind are forwarded to
-    // the storage instance.
-    mlir::MLIRContext *ctx = elementTypes.front().getContext();
-    return Base::get(ctx, ToyTypes::Struct, elementTypes);
-  }
-
-  /// Returns the element types of this struct type.
-  llvm::ArrayRef<mlir::Type> getElementTypes() {
-    // 'getImpl' returns a pointer to the internal storage instance.
-    return getImpl()->elementTypes;
-  }
-
-  /// Returns the number of element type held by this struct.
-  size_t getNumElementTypes() { return getElementTypes().size(); }
-};
-```
-
-We register this type in the `ToyDialect` constructor in a similar way to how we
-did with operations:
-
-```c++
-ToyDialect::ToyDialect(mlir::MLIRContext *ctx)
-    : mlir::Dialect(getDialectNamespace(), ctx) {
-  addTypes<StructType>();
-}
-```
-
-With this we can now use our `StructType` when generating MLIR from Toy. See
-examples/toy/Ch7/mlir/MLIRGen.cpp for more details.
-
-### Parsing and Printing
-
-At this point we can use our `StructType` during MLIR generation and
-transformation, but we can't output or parse `.mlir`. For this we need to add
-support for parsing and printing instances of the `StructType`. This can be done
-by overriding the `parseType` and `printType` methods on the `ToyDialect`.
-
-```c++
-class ToyDialect : public mlir::Dialect {
-public:
-  /// Parse an instance of a type registered to the toy dialect.
-  mlir::Type parseType(mlir::DialectAsmParser &parser) const override;
-
-  /// Print an instance of a type registered to the toy dialect.
-  void printType(mlir::Type type,
-                 mlir::DialectAsmPrinter &printer) const override;
-};
-```
-
-These methods take an instance of a high-level parser or printer that allows for
-easily implementing the necessary functionality. Before going into the
-implementation, let's think about the syntax that we want for the `struct` type
-in the printed IR. As described in the
-[MLIR language reference](../../LangRef.md#dialect-types), dialect types are
-generally represented as: `! dialect-namespace < type-data >`, with a pretty
-form available under certain circumstances. The responsibility of our `Toy`
-parser and printer is to provide the `type-data` bits. We will define our
-`StructType` as having the following form:
-
-```
-  struct-type ::= `struct` `<` type (`,` type)* `>`
-```
-
-#### Parsing
-
-An implementation of the parser is shown below:
-
-```c++
-/// Parse an instance of a type registered to the toy dialect.
-mlir::Type ToyDialect::parseType(mlir::DialectAsmParser &parser) const {
-  // Parse a struct type in the following form:
-  //   struct-type ::= `struct` `<` type (`,` type)* `>`
-
-  // NOTE: All MLIR parser function return a ParseResult. This is a
-  // specialization of LogicalResult that auto-converts to a `true` boolean
-  // value on failure to allow for chaining, but may be used with explicit
-  // `mlir::failed/mlir::succeeded` as desired.
-
-  // Parse: `struct` `<`
-  if (parser.parseKeyword("struct") || parser.parseLess())
-    return Type();
-
-  // Parse the element types of the struct.
-  SmallVector<mlir::Type, 1> elementTypes;
-  do {
-    // Parse the current element type.
-    llvm::SMLoc typeLoc = parser.getCurrentLocation();
-    mlir::Type elementType;
-    if (parser.parseType(elementType))
-      return nullptr;
-
-    // Check that the type is either a TensorType or another StructType.
-    if (!elementType.isa<mlir::TensorType>() &&
-        !elementType.isa<StructType>()) {
-      parser.emitError(typeLoc, "element type for a struct must either "
-                                "be a TensorType or a StructType, got: ")
-          << elementType;
-      return Type();
-    }
-    elementTypes.push_back(elementType);
-
-    // Parse the optional: `,`
-  } while (succeeded(parser.parseOptionalComma()));
-
-  // Parse: `>`
-  if (parser.parseGreater())
-    return Type();
-  return StructType::get(elementTypes);
-}
-```
-
-#### Printing
-
-An implementation of the printer is shown below:
-
-```c++
-/// Print an instance of a type registered to the toy dialect.
-void ToyDialect::printType(mlir::Type type,
-                           mlir::DialectAsmPrinter &printer) const {
-  // Currently the only toy type is a struct type.
-  StructType structType = type.cast<StructType>();
-
-  // Print the struct type according to the parser format.
-  printer << "struct<";
-  mlir::interleaveComma(structType.getElementTypes(), printer);
-  printer << '>';
-}
-```
-
-Before moving on, let's look at a quick of example showcasing the functionality
-we have now:
-
-```toy
-struct Struct {
-  var a;
-  var b;
-}
-
-def multiply_transpose(Struct value) {
-}
-```
-
-Which generates the following:
-
-```mlir
-module {
-  func @multiply_transpose(%arg0: !toy.struct<tensor<*xf64>, tensor<*xf64>>) {
-    "toy.return"() : () -> ()
-  }
-}
-```
-
-### Operating on `StructType`
-
-Now that the `struct` type has been defined, and we can round-trip it through
-the IR. The next step is to add support for using it within our operations.
-
-#### Updating Existing Operations
-
-A few of our existing operations will need to be updated to handle `StructType`.
-The first step is to make the ODS framework aware of our Type so that we can use
-it in the operation definitions. A simple example is shown below:
-
-```td
-// Provide a definition for the Toy StructType for use in ODS. This allows for
-// using StructType in a similar way to Tensor or MemRef.
-def Toy_StructType :
-    Type<CPred<"$_self.isa<StructType>()">, "Toy struct type">;
-
-// Provide a definition of the types that are used within the Toy dialect.
-def Toy_Type : AnyTypeOf<[F64Tensor, Toy_StructType]>;
-```
-
-We can then update our operations, e.g. `ReturnOp`, to also accept the
-`Toy_StructType`:
-
-```td
-def ReturnOp : Toy_Op<"return", [Terminator, HasParent<"FuncOp">]> {
-  ...
-  let arguments = (ins Variadic<Toy_Type>:$input);
-  ...
-}
-```
-
-#### Adding New `Toy` Operations
-
-In addition to the existing operations, we will be adding a few new operations
-that will provide more specific handling of `structs`.
-
-##### `toy.struct_constant`
-
-This new operation materializes a constant value for a struct. In our current
-modeling, we just use an [array attribute](../../LangRef.md#array-attribute)
-that contains a set of constant values for each of the `struct` elements.
-
-```mlir
-  %0 = "toy.struct_constant"() {
-    value = [dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf64>]
-  } : () -> !toy.struct<tensor<*xf64>>
-```
-
-##### `toy.struct_access`
-
-This new operation materializes the Nth element of a `struct` value.
-
-```mlir
-  %0 = "toy.struct_constant"() {
-    value = [dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf64>]
-  } : () -> !toy.struct<tensor<*xf64>>
-  %1 = "toy.struct_access"(%0) {index = 0 : i64} : (!toy.struct<tensor<*xf64>>) -> tensor<*xf64>
-```
-
-With these operations, we can revisit our original example:
-
-```toy
-struct Struct {
-  var a;
-  var b;
-}
-
-# User defined generic function may operate on struct types as well.
-def multiply_transpose(Struct value) {
-  # We can access the elements of a struct via the '.' operator.
-  return transpose(value.a) * transpose(value.b);
-}
-
-def main() {
-  # We initialize struct values using a composite initializer.
-  Struct value = {[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]};
-
-  # We pass these arguments to functions like we do with variables.
-  var c = multiply_transpose(value);
-  print(c);
-}
-```
-
-and finally get a full MLIR module:
-
-```mlir
-module {
-  func @multiply_transpose(%arg0: !toy.struct<tensor<*xf64>, tensor<*xf64>>) -> tensor<*xf64> {
-    %0 = "toy.struct_access"(%arg0) {index = 0 : i64} : (!toy.struct<tensor<*xf64>, tensor<*xf64>>) -> tensor<*xf64>
-    %1 = "toy.transpose"(%0) : (tensor<*xf64>) -> tensor<*xf64>
-    %2 = "toy.struct_access"(%arg0) {index = 1 : i64} : (!toy.struct<tensor<*xf64>, tensor<*xf64>>) -> tensor<*xf64>
-    %3 = "toy.transpose"(%2) : (tensor<*xf64>) -> tensor<*xf64>
-    %4 = "toy.mul"(%1, %3) : (tensor<*xf64>, tensor<*xf64>) -> tensor<*xf64>
-    "toy.return"(%4) : (tensor<*xf64>) -> ()
-  }
-  func @main() {
-    %0 = "toy.struct_constant"() {value = [dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>, dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>]} : () -> !toy.struct<tensor<*xf64>, tensor<*xf64>>
-    %1 = "toy.generic_call"(%0) {callee = @multiply_transpose} : (!toy.struct<tensor<*xf64>, tensor<*xf64>>) -> tensor<*xf64>
-    "toy.print"(%1) : (tensor<*xf64>) -> ()
-    "toy.return"() : () -> ()
-  }
-}
-```
-
-#### Optimizing Operations on `StructType`
-
-Now that we have a few operations operating on `StructType`, we also have many
-new constant folding opportunities.
-
-After inlining, the MLIR module in the previous section looks something like:
-
-```mlir
-module {
-  func @main() {
-    %0 = "toy.struct_constant"() {value = [dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>, dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>]} : () -> !toy.struct<tensor<*xf64>, tensor<*xf64>>
-    %1 = "toy.struct_access"(%0) {index = 0 : i64} : (!toy.struct<tensor<*xf64>, tensor<*xf64>>) -> tensor<*xf64>
-    %2 = "toy.transpose"(%1) : (tensor<*xf64>) -> tensor<*xf64>
-    %3 = "toy.struct_access"(%0) {index = 1 : i64} : (!toy.struct<tensor<*xf64>, tensor<*xf64>>) -> tensor<*xf64>
-    %4 = "toy.transpose"(%3) : (tensor<*xf64>) -> tensor<*xf64>
-    %5 = "toy.mul"(%2, %4) : (tensor<*xf64>, tensor<*xf64>) -> tensor<*xf64>
-    "toy.print"(%5) : (tensor<*xf64>) -> ()
-    "toy.return"() : () -> ()
-  }
-}
-```
-
-We have several `toy.struct_access` operations that access into a
-`toy.struct_constant`. As detailed in [chapter 3](Ch-3.md), we can add folders
-for these `toy` operations by setting the `hasFolder` bit on the operation
-definition and providing a definition of the `*Op::fold` method.
-
-```c++
-/// Fold constants.
-OpFoldResult ConstantOp::fold(ArrayRef<Attribute> operands) { return value(); }
-
-/// Fold struct constants.
-OpFoldResult StructConstantOp::fold(ArrayRef<Attribute> operands) {
-  return value();
-}
-
-/// Fold simple struct access operations that access into a constant.
-OpFoldResult StructAccessOp::fold(ArrayRef<Attribute> operands) {
-  auto structAttr = operands.front().dyn_cast_or_null<mlir::ArrayAttr>();
-  if (!structAttr)
-    return nullptr;
-
-  size_t elementIndex = index().getZExtValue();
-  return structAttr.getValue()[elementIndex];
-}
-```
-
-To ensure that MLIR generates the proper constant operations when folding our
-`Toy` operations, i.e. `ConstantOp` for `TensorType` and `StructConstant` for
-`StructType`, we will need to provide an override for the dialect hook
-`materializeConstant`. This allows for generic MLIR operations to create
-constants for the `Toy` dialect when necessary.
-
-```c++
-mlir::Operation *ToyDialect::materializeConstant(mlir::OpBuilder &builder,
-                                                 mlir::Attribute value,
-                                                 mlir::Type type,
-                                                 mlir::Location loc) {
-  if (type.isa<StructType>())
-    return builder.create<StructConstantOp>(loc, type,
-                                            value.cast<mlir::ArrayAttr>());
-  return builder.create<ConstantOp>(loc, type,
-                                    value.cast<mlir::DenseElementsAttr>());
-}
-```
-
-With this, we can now generate code that can be generated to LLVM without any
-changes to our pipeline.
-
-```mlir
-module {
-  func @main() {
-    %0 = "toy.constant"() {value = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>} : () -> tensor<2x3xf64>
-    %1 = "toy.transpose"(%0) : (tensor<2x3xf64>) -> tensor<3x2xf64>
-    %2 = "toy.mul"(%1, %1) : (tensor<3x2xf64>, tensor<3x2xf64>) -> tensor<3x2xf64>
-    "toy.print"(%2) : (tensor<3x2xf64>) -> ()
-    "toy.return"() : () -> ()
-  }
-}
-```
-
-You can build `toyc-ch7` and try yourself: `toyc-ch7
-test/Examples/Toy/Ch7/struct-codegen.toy -emit=mlir`. More details on defining
-custom types can be found in
-[DefiningAttributesAndTypes](../../DefiningAttributesAndTypes.md).
diff --git a/third_party/mlir/g3doc/UsageOfConst.md b/third_party/mlir/g3doc/UsageOfConst.md
deleted file mode 100644
index 052f14ddf01..00000000000
--- a/third_party/mlir/g3doc/UsageOfConst.md
+++ /dev/null
@@ -1,272 +0,0 @@
-# Usage of 'Const' in MLIR, for core IR types
-
-aka, where'd `const` go?
-
-The MLIR data structures that represent the IR itself (Instruction, Block, etc)
-form a graph-based data structure, and the compiler analyses and passes
-frequently walk this graph (e.g. traversing from defs to users). The early
-design of MLIR adopted the `const` model of LLVM, which is familiar and well
-understood (even though the LLVM implementation is flawed in many ways).
-
-The design team since decided to change to a different module, which eschews
-`const` entirely for the core IR types: you should never see a `const` method on
-`Operation`, should never see the type `const Value *`, and you shouldn't feel
-bad about this. That said, you *should* use `const` for non-IR types, like
-`SmallVector`'s and many other things.
-
-The document below explains this design point from the viewpoint of "why make a
-change", to explain the rationale and the tradeoffs involved that led us to this
-potentially controversial design point.
-
-Bjarke Roune summarized the situation like this:
-
-> In my opinion `const` correctness is highly valuable, catching many bugs and
-> making it clear in a code base where the mutations happen. In my opinion
-> `const` correctness still isn't worth it in particular for IR elements because
-> of the special uses and properties of IRs, in particular that it is common to
-> transfer a pointer/reference to an instruction from an analysis to an
-> optimization which will change the instruction. The analysis should be const,
-> the optimization needs to get a non-`const` pointer. So all analyses either
-> end up being templates (and if they never get instantiated in a const context,
-> then the point of `const` correctness has been defeated), you need to somehow
-> launder the const in a safe way or there will be `const_cast`s. These options
-> are all bad, probably so bad as to out-weigh the benefits of const.
-
-# Reconsidering `const` in MLIR
-
-This document argues this design is introducing significant sub-optimalities
-into the MLIR codebase, argues that the cost/benefit tradeoff of this design is
-a poor tradeoff, and proposes switching to a much simpler approach - eliminating
-the use of const of these IR types entirely.
-
-**Note:** **This document is only discussing things like `const Value*` and
-`const Operation*`. There is no proposed change for other types, e.g.
-`SmallVector` references, the immutable types like `Attribute`, etc.**
-
-## Background: The LLVM Const Model
-
-The LLVM and MLIR data structures provide the IR data structures (like
-`mlir::Operation`s and their users) as a structured cyclic graph data structure.
-Clients of the IR typically walk up and down the graph, perform dynamic down
-casting (of various sorts) to check for patterns, and use some high-abstraction
-pattern matching and binding facilities to do their work.
-
-The basic idea of LLVM's design is that these traversals of the IR should
-preserve the const'ness of a pointer: if you have a const pointer to an
-instruction and ask for its parent (or operand, users, etc), you should get a
-const pointer to the block containing the instruction (or value defining the
-operand, instruction using the instruction, etc). The instruction class looks
-like this:
-
-```
-namespace llvm {
-class Instruction : ...  {
-  BasicBlock *Parent;
-public:
-  // A const instruction returns a const parent pointer.
-  inline const BasicBlock *getParent() const { return Parent; }
-  // A non-const instruction returns a non-const parent pointer.
-  inline       BasicBlock *getParent()       { return Parent; }
-…
-};
-}
-```
-
-The rationale for this design is that it would be const-incorrect to return a
-non-const pointer from getParent, because you could then walk the block to find
-the instruction again and get non-const references to the same instruction - all
-without a `const_cast`.
-
-This const model is simple and the C++ type system generally supports it through
-code duplication of methods. That said, LLVM is actually inconsistent and buggy
-about this. Even the core classes have bugs: `llvm::Instruction::getOperand()`
-isn't currently const correct! There are other subsystems (e.g. the
-`llvm/IR/PatternMatch.h` APIs) where you can perform a pattern match on a const
-IR object and bind a non-const IR object.
-
-LLVM is a mature technology with hundreds of people working on it. The fact that
-it still isn't correctly following the const model it set out for strongly hints
-that one of: 1) The design is too complicated to be practical, 2) the benefits
-of the model aren't worth the cost of the complexity, or 3) both 1 and 2,
-together in some combination.
-
-## Advantages of Const-correctness in MLIR
-
-Even though this doc argues for eliminating const from MLIR, it is important to
-evaluate that as a tradeoff with the advantages the const model provides,
-allowing us to do a cost/benefit tradeoff. These are the benefits we see:
-
-The major advantage of allowing const on MLIR types is as a marker in APIs that
-indicate that the function will not modify the specified values. For example,
-the dominator APIs have a `dominates(const Block*, const Block*)` method, and
-the consts provide a way of indicating that the call won't modify the blocks
-passed in - similarly predicates like `Instruction::isTerminator() const` do not
-modify the receiver object.
-
-It is also an advantage that MLIR follows the generally prevailing pattern of
-C++ code, which generally uses const. Consistency with the community norm is
-important.
-
-## Costs of Const-correctness in MLIR
-
-As mentioned above, early work on MLIR adopted the same design as LLVM intended,
-allowing const-correct traversals in the APIs. Here we discuss the various costs
-of doing this by looking at some examples, listed in roughly increasing order of
-severity.
-
-### Pervasively duplicated accessors
-
-Just as the getParent() example above shows, achieving this const model requires
-that all of the graph traversal accessors be duplicated into const and non-const
-versions. This causes API bloat and slows compile time, but these are minor
-problems.
-
-The more significant issue is that this duplication can be so significant that
-the signal disappears in the noise, for example `mlir::Operation` ends up with
-things like this, which is twice as much API surface area just to try to satisfy
-const.
-
-```c++
-  operand_iterator operand_begin();
-  operand_iterator operand_end();
-
-  /// Returns an iterator on the underlying Value's (Value *).
-  operand_range getOperands();
-
-  // Support const operand iteration.
-  using const_operand_iterator =
-      OperandIterator<const Operation, const Value>;
-  using const_operand_range = llvm::iterator_range<const_operand_iterator>;
-
-  const_operand_iterator operand_begin() const;
-  const_operand_iterator operand_end() const;
-
-  /// Returns a const iterator on the underlying Value's (Value *).
-  llvm::iterator_range<const_operand_iterator> getOperands() const;
-
-  ArrayRef<OpOperand> getOpOperands() const {
-    return getOperandStorage().getOperands();
-  }
-  MutableArrayRef<OpOperand> getOpOperands() {
-    return getOperandStorage().getOperands();
-  }
-
-  OpOperand &getOpOperand(unsigned idx) { return getOpOperands()[idx]; }
-  const OpOperand &getOpOperand(unsigned idx) const {
-    return getOpOperands()[idx];
-  }
-
-```
-
-### Templated accessors
-
-A related issue is that having to provide both const and non-const versions of
-accessors leads to us having to turn more code into templates than would
-otherwise be desirable. Things like `ResultIterator` and `ResultTypeIterator`
-are templates *_only_* because they are generic over const and non-const
-versions of types. This leads to them being defined inline in headers (instead
-of in .cpp files).
-
-Thus, our const model is leading to more code in headers and more complexity in
-the implementation.
-
-### Const incorrect in practice
-
-For some things, const is more trouble than it is worth, so they never get
-updated.
-
-This means that certain API in practice don't provide a const variant, leading
-to pervasive use of `const_cast` to drop the const qualifier. For example the
-logic in `Matchers.h` doesn't support const pointers at all (b/123355851), even
-though matching and binding values themselves makes perfect sense for both const
-and non-const values. Actually fixing this would cause massive code bloat and
-complexity.
-
-Other parts of the code are just outright incorrect. For example, the operation
-cloning methods are defined on Operation like this:
-
-```C++
-Operation *clone(BlockAndValueMapping &mapper, MLIRContext *context) const;
-
-Operation *clone(MLIRContext *context) const;
-```
-
-While it makes sense for a clone method to be `const` conceptually (the original
-operation isn't modified) this is a violation of the model, since the returned
-operation must be mutable, and provides access to the full graph of operands as
-the original operation, violating the graph based const model we were shooting
-for.
-
-### The `OpPointer` and `ConstOpPointer` Classes
-
-The "typed operation" classes for registered operations (e.g. like `DimOp` for
-the "std.dim" operation in standard ops) contain a pointer to an operation and
-provide typed APIs for processing it.
-
-However, this is a problem for our current `const` design - `const DimOp` means
-the pointer itself is immutable, not the pointee. The current solution for this
-is the `OpPointer<>` and `ConstOpPointer<>` classes, which exist solely to
-provide const correctness when referring to a typed operation. Instead of
-referring to `DimOp` directly, we need to use `OpPointer<DimOp>` and
-`ConstOpPointer<DimOp>` to preserve this constness.
-
-While `auto` hides many instances of these `OpPointer` classes, their presence
-leads to extremely ugly APIs. It also obscures the fact that the user does not
-have a direct `DimOp` object, creating easy pitfalls with subtly incorrect
-semantics:
-
-```C++
-// OpPointer encodes unnecessary and superfluous information into the API.
-SmallVector<OpPointer<AffineForOp>, 8> stripmineSink(
-  OpPointer<AffineForOp> forOp, uint64_t factor,
-  ArrayRef<OpPointer<AffineForOp>> targets);
-// Compared to the much cleaner and easier to read...
-SmallVector<AffineForOp, 8> stripmineSink(AffineForOp forOp, uint64_t factor,
-                                          ArrayRef<AffineForOp> targets);
-
-// OpPointer is easy to misuse.
-if (auto *dimOp = inst->dyn_cast<DimOp>()) {
-  // This is actually undefined behavior because dyn_cast actually returns
-  // OpPointer<DimOp>. OpPointer<DimOp> happily implicitly converts to DimOp *
-  // creating undefined behavior that will execute correctly most of the time.
-}
-```
-
-It would be much better to eliminate them entirely, and just pass around `DimOp`
-directly. For example, instead of:
-
-```C++
-LogicalResult mlir::getIndexSet(MutableArrayRef<OpPointer<AffineForOp>> forOps,
-                                FlatAffineConstraints *domain) {
-
-```
-
-It would be a lot nicer to just have:
-
-```c++
-LogicalResult mlir::getIndexSet(MutableArrayRef<AffineForOp> forOps,
-                                FlatAffineConstraints *domain) {
-```
-
-Particularly since all of the `FooOp` classes are already semantically a smart
-pointer to their underlying operation.
-
-## Proposal: Remove `const` from IR objects
-
-As we can see above, there is very little benefit to our const design and
-significant cost, and given that the primary purpose of an IR is to represent
-transformations of code, const is providing very little benefit.
-
-As such, we propose eliminating support for const references in MLIR. This
-implies the following changes to the codebase:
-
-1.  All of the const-duplicated accessors would be eliminated, e.g.
-    `Operation::getParent() const` would be removed. This is expected to remove
-    approximately ~130 lines of code from just Operation.h alone.
-1.  Const-only predicates would be changed to be non-const, e.g.
-    `Operation::isTerminator() const` would have the const removed.
-1.  Iterators and other types and functions that are templated to support
-    `const` can have those template arguments removed.
-1.  Types like `OpPointer` and `ConstOpPointer` that exist solely to propagate
-    const can be entirely removed from the codebase.
-1.  We can close bugs complaining about const incorrectness in the IR.
diff --git a/third_party/mlir/g3doc/WritingAPass.md b/third_party/mlir/g3doc/WritingAPass.md
deleted file mode 100644
index 784757139d3..00000000000
--- a/third_party/mlir/g3doc/WritingAPass.md
+++ /dev/null
@@ -1,817 +0,0 @@
-# Writing a Pass
-
-[TOC]
-
-Passes represent the basic infrastructure for transformation and optimization.
-This document provides a quickstart to the pass infrastructure in MLIR and how
-to use it.
-
-See [MLIR specification](LangRef.md) for more information about MLIR and its
-core aspects, such as the IR structure and operations.
-
-See [MLIR Rewrites](QuickstartRewrites.md) for a quick start on graph rewriting
-in MLIR. If your transformation involves pattern matching operation DAGs, this
-is a great place to start.
-
-## Operation Pass
-
-In MLIR, the main unit of abstraction and transformation is an
-[operation](LangRef.md#operations). As such, the pass manager is designed to
-work on instances of operations at different levels of nesting. The structure of
-the [pass manager](#pass-manager), and the concept of nesting, is detailed
-further below. All passes in MLIR derive from `OperationPass` and adhere to the
-following restrictions; any noncompliance will lead to problematic behavior in
-multithreaded and other advanced scenarios:
-
-*   Modify anything within the parent block/region/operation/etc, outside of the
-    current operation being operated on. This includes adding or removing
-    operations from the parent block.
-*   Maintain pass state across invocations of `runOnOperation`. A pass may be
-    run on several different operations with no guarantee of execution order.
-    *   When multithreading, a specific pass instance may not even execute on
-        all operations within the module. As such, a pass should not rely on
-        running on all operations.
-*   Modify the state of another operation not nested within the current
-    operation being operated on.
-    *   Other threads may be operating on different operations within the module
-        simultaneously.
-*   Maintain any global mutable state, e.g. static variables within the source
-    file. All mutable state should be maintained by an instance of the pass.
-*   Must be copy-constructible, multiple instances of the pass may be created by
-    the pass manager to process operations in parallel.
-*   Inspect the IR of sibling operations. Other threads may be modifying these
-    operations in parallel.
-
-When creating an operation pass, there are two different types to choose from
-depending on the usage scenario:
-
-### OperationPass : Op-Specific
-
-An `op-specific` operation pass operates explicitly on a given operation type.
-This operation type must adhere to the restrictions set by the pass manager for
-pass execution.
-
-To define an op-specific operation pass, a derived class must adhere to the
-following:
-
-*   Inherit from the CRTP class `OperationPass` and provide the operation type
-    as an additional template parameter.
-*   Override the virtual `void runOnOperation()` method.
-
-A simple pass may look like:
-
-```c++
-namespace {
-struct MyFunctionPass : public OperationPass<MyFunctionPass, FuncOp> {
-  void runOnOperation() override {
-    // Get the current FuncOp operation being operated on.
-    FuncOp f = getOperation();
-
-    // Walk the operations within the function.
-    f.walk([](Operation *inst) {
-      ....
-    });
-  }
-};
-} // end anonymous namespace
-
-// Register this pass to make it accessible to utilities like mlir-opt.
-// (Pass registration is discussed more below)
-static PassRegistration<MyFunctionPass> pass(
-    "flag-name-to-invoke-pass-via-mlir-opt", "Pass description here");
-```
-
-### OperationPass : Op-Agnostic
-
-An `op-agnostic` pass operates on the operation type of the pass manager that it
-is added to. This means that a pass that operates on several different operation
-types in the same way only needs one implementation.
-
-To create an operation pass, a derived class must adhere to the following:
-
-*   Inherit from the CRTP class `OperationPass`.
-*   Override the virtual `void runOnOperation()` method.
-
-A simple pass may look like:
-
-```c++
-struct MyOperationPass : public OperationPass<MyOperationPass> {
-  void runOnOperation() override {
-    // Get the current operation being operated on.
-    Operation *op = getOperation();
-    ...
-  }
-};
-```
-
-## Analysis Management
-
-An important concept, along with transformation passes, are analyses. These are
-conceptually similar to transformation passes, except that they compute
-information on a specific operation without modifying it. In MLIR, analyses are
-not passes but free-standing classes that are computed lazily on-demand and
-cached to avoid unnecessary recomputation. An analysis in MLIR must adhere to
-the following:
-
-*   Provide a valid constructor taking an `Operation*`.
-*   Must not modify the given operation.
-
-An analysis may provide additional hooks to control various behavior:
-
-*   `bool isInvalidated(const AnalysisManager::PreservedAnalyses &)`
-
-Given a preserved analysis set, the analysis returns true if it should truly be
-invalidated. This allows for more fine-tuned invalidation in cases where an
-analysis wasn't explicitly marked preserved, but may be preserved (or
-invalidated) based upon other properties such as analyses sets.
-
-### Querying Analyses
-
-The base `OperationPass` class provide utilities for querying and preserving
-analyses for the current operation being processed.
-
-*   OperationPass automatically provides the following utilities for querying
-    analyses:
-    *   `getAnalysis<>`
-        -   Get an analysis for the current operation, constructing it if
-            necessary.
-    *   `getCachedAnalysis<>`
-        -   Get an analysis for the current operation, if it already exists.
-    *   `getCachedParentAnalysis<>`
-        -   Get an analysis for a given parent operation, if it exists.
-    *   `getCachedChildAnalysis<>`
-        -   Get an analysis for a given child operation, if it exists.
-    *   `getChildAnalysis<>`
-        -   Get an analysis for a given child operation, constructing it if
-            necessary.
-
-Using the example passes defined above, let's see some examples:
-
-```c++
-/// An interesting analysis.
-struct MyOperationAnalysis {
-  // Compute this analysis with the provided operation.
-  MyOperationAnalysis(Operation *op);
-};
-
-void MyOperationPass::runOnOperation() {
-  // Query MyOperationAnalysis for the current operation.
-  MyOperationAnalysis &myAnalysis = getAnalysis<MyOperationAnalysis>();
-
-  // Query a cached instance of MyOperationAnalysis for the current operation.
-  // It will not be computed if it doesn't exist.
-  auto optionalAnalysis = getCachedAnalysis<MyOperationAnalysis>();
-  if (optionalAnalysis)
-    ...
-
-  // Query a cached instance of MyOperationAnalysis for the parent operation of
-  // the current operation. It will not be computed if it doesn't exist.
-  auto optionalAnalysis = getCachedParentAnalysis<MyOperationAnalysis>();
-  if (optionalAnalysis)
-    ...
-}
-```
-
-### Preserving Analyses
-
-Analyses that are constructed after being queried by a pass are cached to avoid
-unnecessary computation if they are requested again later. To avoid stale
-analyses, all analyses are assumed to be invalidated by a pass. To avoid
-invalidation, a pass must specifically mark analyses that are known to be
-preserved.
-
-*   All Pass classes automatically provide the following utilities for
-    preserving analyses:
-    *   `markAllAnalysesPreserved`
-    *   `markAnalysesPreserved<>`
-
-```c++
-void MyOperationPass::runOnOperation() {
-  // Mark all analyses as preserved. This is useful if a pass can guarantee
-  // that no transformation was performed.
-  markAllAnalysesPreserved();
-
-  // Mark specific analyses as preserved. This is used if some transformation
-  // was performed, but some analyses were either unaffected or explicitly
-  // preserved.
-  markAnalysesPreserved<MyAnalysis, MyAnalyses...>();
-}
-```
-
-## Pass Failure
-
-Passes in MLIR are allowed to gracefully fail. This may happen if some invariant
-of the pass was broken, potentially leaving the IR in some invalid state. If
-such a situation occurs, the pass can directly signal a failure to the pass
-manager. If a pass signaled a failure when executing, no other passes in the
-pipeline will execute and the `PassManager::run` will return failure. Failure
-signaling is provided in the form of a `signalPassFailure` method.
-
-```c++
-void MyPass::runOnOperation() {
-  // Signal failure on a broken invariant.
-  if (some_broken_invariant) {
-    signalPassFailure();
-    return;
-  }
-}
-```
-
-## Pass Manager
-
-Above we introduced the different types of passes and their constraints. Now
-that we have our pass we need to be able to run it over a specific module. This
-is where the pass manager comes into play. The `PassManager` class is used to
-configure and run a pipeline. The `OpPassManager` class is used to schedule
-passes to run at a specific level of nesting.
-
-### OpPassManager
-
-An `OpPassManager` is essentially a collection of passes to execute on an
-operation of a given type. This operation type must adhere to the following
-requirement:
-
-*   Must be registered and marked `IsolatedFromAbove`.
-
-    *   Passes are expected to not modify operations at or above the current
-        operation being processed. If the operation is not isolated, it may
-        inadvertently modify the use-list of an operation it is not supposed to
-        modify.
-
-Passes can be added to a pass manager via `addPass`. The pass must either be an
-`op-specific` pass operating on the same operation type as `OpPassManager`, or
-an `op-agnostic` pass.
-
-An `OpPassManager` cannot be created directly, but must be explicitly nested
-within another `OpPassManager` via the `nest<>` method. This method takes the
-operation type that the nested pass manager will operate on. At the top-level, a
-`PassManager` acts as an `OpPassManager` that operates on the
-[`module`](LangRef.md#module) operation. Nesting in this sense, corresponds to
-the structural nesting within [Regions](LangRef.md#regions) of the IR.
-
-For example, the following `.mlir`:
-
-```
-module {
-  spv.module "Logical" "GLSL450" {
-    func @foo() {
-      ...
-    }
-  }
-}
-```
-
-Has the nesting structure of:
-
-```
-`module`
-  `spv.module`
-    `function`
-```
-
-Below is an example of constructing a pipeline that operates on the above
-structure:
-
-```c++
-PassManager pm(ctx);
-
-// Add a pass on the top-level module operation.
-pm.addPass(std::make_unique<MyModulePass>());
-
-// Nest a pass manager that operates on spirv module operations nested directly
-// under the top-level module.
-OpPassManager &nestedModulePM = pm.nest<spirv::ModuleOp>();
-nestedModulePM.addPass(std::make_unique<MySPIRVModulePass>());
-
-// Nest a pass manager that operates on functions within the nested SPIRV
-// module.
-OpPassManager &nestedFunctionPM = nestedModulePM.nest<FuncOp>();
-nestedFunctionPM.addPass(std::make_unique<MyFunctionPass>());
-
-// Run the pass manager on the top-level module.
-Module m = ...;
-if (failed(pm.run(m)))
-    ... // One of the passes signaled a failure.
-```
-
-The above pass manager would contain the following pipeline structure:
-
-```
-OpPassManager<ModuleOp>
-  MyModulePass
-  OpPassManager<spirv::ModuleOp>
-    MySPIRVModulePass
-    OpPassManager<FuncOp>
-      MyFunctionPass
-```
-
-These pipelines are then run over a single operation at a time. This means that,
-for example, given a series of consecutive passes on FuncOp, it will execute all
-on the first function, then all on the second function, etc. until the entire
-program has been run through the passes. This provides several benefits:
-
-*   This improves the cache behavior of the compiler, because it is only
-    touching a single function at a time, instead of traversing the entire
-    program.
-*   This improves multi-threading performance by reducing the number of jobs
-    that need to be scheduled, as well as increasing the efficiency of each job.
-    An entire function pipeline can be run on each function asynchronously.
-
-## Pass Registration
-
-Briefly shown in the example definitions of the various pass types is the
-`PassRegistration` class. This is a utility to register derived pass classes so
-that they may be created, and inspected, by utilities like mlir-opt. Registering
-a pass class takes the form:
-
-```c++
-static PassRegistration<MyPass> pass("command-line-arg", "description");
-```
-
-*   `MyPass` is the name of the derived pass class.
-*   "command-line-arg" is the argument to use on the command line to invoke the
-    pass from `mlir-opt`.
-*   "description" is a description of the pass.
-
-For passes that cannot be default-constructed, `PassRegistration` accepts an
-optional third argument that takes a callback to create the pass:
-
-```c++
-static PassRegistration<MyParametricPass> pass(
-    "command-line-arg", "description",
-    []() -> std::unique_ptr<Pass> {
-      std::unique_ptr<Pass> p = std::make_unique<MyParametricPass>(/*options*/);
-      /*... non-trivial-logic to configure the pass ...*/;
-      return p;
-    });
-```
-
-This variant of registration can be used, for example, to accept the
-configuration of a pass from command-line arguments and pass it over to the pass
-constructor. Make sure that the pass is copy-constructible in a way that does
-not share data as the [pass manager](#pass-manager) may create copies of the
-pass to run in parallel.
-
-### Pass Pipeline Registration
-
-Described above is the mechanism used for registering a specific derived pass
-class. On top of that, MLIR allows for registering custom pass pipelines in a
-similar fashion. This allows for custom pipelines to be available to tools like
-mlir-opt in the same way that passes are, which is useful for encapsulating
-common pipelines like the "-O1" series of passes. Pipelines are registered via a
-similar mechanism to passes in the form of `PassPipelineRegistration`. Compared
-to `PassRegistration`, this class takes an additional parameter in the form of a
-pipeline builder that modifies a provided `OpPassManager`.
-
-```c++
-void pipelineBuilder(OpPassManager &pm) {
-  pm.addPass(std::make_unique<MyPass>());
-  pm.addPass(std::make_unique<MyOtherPass>());
-}
-
-// Register an existing pipeline builder function.
-static PassPipelineRegistration<> pipeline(
-  "command-line-arg", "description", pipelineBuilder);
-
-// Register an inline pipeline builder.
-static PassPipelineRegistration<> pipeline(
-  "command-line-arg", "description", [](OpPassManager &pm) {
-    pm.addPass(std::make_unique<MyPass>());
-    pm.addPass(std::make_unique<MyOtherPass>());
-  });
-```
-
-Pipeline registration also allows for simplified registration of
-specifializations for existing passes:
-
-```c++
-static PassPipelineRegistration<> foo10(
-    "foo-10", "Foo Pass 10", [] { return std::make_unique<FooPass>(10); } );
-```
-
-### Textual Pass Pipeline Specification
-
-In the previous sections, we showed how to register passes and pass pipelines
-with a specific argument and description. Once registered, these can be used on
-the command line to configure a pass manager. The limitation of using these
-arguments directly is that they cannot build a nested pipeline. For example, if
-our module has another module nested underneath, with just `-my-module-pass`
-there is no way to specify that this pass should run on the nested module and
-not the top-level module. This is due to the flattened nature of the command
-line.
-
-To circumvent this limitation, MLIR also supports a textual description of a
-pass pipeline. This allows for explicitly specifying the structure of the
-pipeline to add to the pass manager. This includes the nesting structure, as
-well as the passes and pass pipelines to run. A textual pipeline is defined as a
-series of names, each of which may in itself recursively contain a nested
-pipeline description. The syntax for this specification is as follows:
-
-```ebnf
-pipeline          ::= op-name `(` pipeline-element (`,` pipeline-element)* `)`
-pipeline-element  ::= pipeline | (pass-name | pass-pipeline-name) options?
-options           ::= '{' (key ('=' value)?)+ '}'
-```
-
-*   `op-name`
-    *   This corresponds to the mnemonic name of an operation to run passes on,
-        e.g. `func` or `module`.
-*   `pass-name` | `pass-pipeline-name`
-    *   This corresponds to the command-line argument of a registered pass or
-        pass pipeline, e.g. `cse` or `canonicalize`.
-*   `options`
-    *   Options are pass specific key value pairs that are handled as described
-        in the instance specific pass options section.
-
-For example, the following pipeline:
-
-```shell
-$ mlir-opt foo.mlir -cse -canonicalize -convert-std-to-llvm
-```
-
-Can also be specified as (via the `-pass-pipeline` flag):
-
-```shell
-$ mlir-opt foo.mlir -pass-pipeline='func(cse, canonicalize), convert-std-to-llvm'
-```
-
-In order to support round-tripping your pass to the textual representation using
-`OpPassManager::printAsTextualPipeline(raw_ostream&)`, override
-`Pass::printAsTextualPipeline(raw_ostream&)` to format your pass-name and
-options in the format described above.
-
-### Instance Specific Pass Options
-
-Options may be specified for a parametric pass. Individual options are defined
-using `llvm::cl::opt` flag definition rules. These options will then be parsed
-at pass construction time independently for each instance of the pass. The
-`PassRegistration` and `PassPipelineRegistration` templates take an additional
-optional template parameter that is the Option struct definition to be used for
-that pass. To use pass specific options, create a class that inherits from
-`mlir::PassOptions` and then add a new constructor that takes `const
-MyPassOptions&` and constructs the pass. When using `PassPipelineRegistration`,
-the constructor now takes a function with the signature `void (OpPassManager
-&pm, const MyPassOptions&)` which should construct the passes from the options
-and pass them to the pm. The user code will look like the following:
-
-```c++
-class MyPass ... {
-public:
-  MyPass(const MyPassOptions& options) ...
-};
-
-struct MyPassOptions : public PassOptions<MyPassOptions> {
-  // These just forward onto llvm::cl::list and llvm::cl::opt respectively.
-  Option<int> exampleOption{*this, "flag-name", llvm::cl::desc("...")};
-  List<int> exampleListOption{*this, "list-flag-name", llvm::cl::desc("...")};
-};
-
-static PassRegistration<MyPass, MyPassOptions> pass("my-pass", "description");
-```
-
-## Pass Statistics
-
-Statistics are a way to keep track of what the compiler is doing and how
-effective various transformations are. It is often useful to see what effect
-specific transformations have on a particular program, and how often they
-trigger. Pass statistics are instance specific which allow for taking this a
-step further as you are able to see the effect of placing a particular
-transformation at specific places within the pass pipeline. For example, they
-help answer questions like `What happens if I run CSE again here?`.
-
-Statistics can be added to a pass by using the 'Pass::Statistic' class. This
-class takes as a constructor arguments: the parent pass, a name, and a
-description. This class acts like an unsigned integer, and may be incremented
-and updated accordingly. These statistics use the same infrastructure as
-[`llvm::Statistic`](http://llvm.org/docs/ProgrammersManual.html#the-statistic-class-stats-option)
-and thus have similar usage constraints. Collected statistics can be dumped by
-the [pass manager](#pass-manager) programmatically via
-`PassManager::enableStatistics`; or via `-pass-statistics` and
-`-pass-statistics-display` on the command line.
-
-An example is shown below:
-
-```c++
-struct MyPass : public OperationPass<MyPass> {
-  Statistic testStat{this, "testStat", "A test statistic"};
-
-  void runOnOperation() {
-    ...
-
-    // Update our statistic after some invariant was hit.
-    ++testStat;
-
-    ...
-  }
-};
-```
-
-The collected statistics may be aggregated in two types of views:
-
-A pipeline view that models the structure of the pass manager, this is the
-default view:
-
-```shell
-$ mlir-opt -pass-pipeline='func(my-pass,my-pass)' foo.mlir -pass-statistics
-
-===-------------------------------------------------------------------------===
-                         ... Pass statistics report ...
-===-------------------------------------------------------------------------===
-'func' Pipeline
-  MyPass
-    (S) 15 testStat - A test statistic
-  VerifierPass
-  MyPass
-    (S)  6 testStat - A test statistic
-  VerifierPass
-VerifierPass
-```
-
-And a list view that aggregates all instances of a specific pass together:
-
-```shell
-$ mlir-opt -pass-pipeline='func(my-pass, my-pass)' foo.mlir -pass-statistics -pass-statistics-display=list
-
-===-------------------------------------------------------------------------===
-                         ... Pass statistics report ...
-===-------------------------------------------------------------------------===
-MyPass
-  (S) 21 testStat - A test statistic
-```
-
-## Pass Instrumentation
-
-MLIR provides a customizable framework to instrument pass execution and analysis
-computation. This is provided via the `PassInstrumentation` class. This class
-provides hooks into the PassManager that observe various pass events:
-
-*   `runBeforePipeline`
-    *   This callback is run just before a pass pipeline, i.e. pass manager, is
-        executed.
-*   `runAfterPipeline`
-    *   This callback is run right after a pass pipeline has been executed,
-        successfully or not.
-*   `runBeforePass`
-    *   This callback is run just before a pass is executed.
-*   `runAfterPass`
-    *   This callback is run right after a pass has been successfully executed.
-        If this hook is executed, runAfterPassFailed will not be.
-*   `runAfterPassFailed`
-    *   This callback is run right after a pass execution fails. If this hook is
-        executed, runAfterPass will not be.
-*   `runBeforeAnalysis`
-    *   This callback is run just before an analysis is computed.
-*   `runAfterAnalysis`
-    *   This callback is run right after an analysis is computed.
-
-PassInstrumentation objects can be registered directly with a
-[PassManager](#pass-manager) instance via the `addInstrumentation` method.
-Instrumentations added to the PassManager are run in a stack like fashion, i.e.
-the last instrumentation to execute a `runBefore*` hook will be the first to
-execute the respective `runAfter*` hook. Below in an example instrumentation
-that counts the number of times DominanceInfo is computed:
-
-```c++
-struct DominanceCounterInstrumentation : public PassInstrumentation {
-  unsigned &count;
-
-  DominanceCounterInstrumentation(unsigned &count) : count(count) {}
-  void runAfterAnalysis(llvm::StringRef, AnalysisID *id, Operation *) override {
-    if (id == AnalysisID::getID<DominanceInfo>())
-      ++count;
-  }
-};
-
-MLIRContext *ctx = ...;
-PassManager pm(ctx);
-
-// Add the instrumentation to the pass manager.
-unsigned domInfoCount;
-pm.addInstrumentation(
-    std::make_unique<DominanceCounterInstrumentation>(domInfoCount));
-
-// Run the pass manager on a module operation.
-ModuleOp m = ...;
-if (failed(pm.run(m)))
-    ...
-
-llvm::errs() << "DominanceInfo was computed " << domInfoCount << " times!\n";
-```
-
-### Standard Instrumentations
-
-MLIR utilizes the pass instrumentation framework to provide a few useful
-developer tools and utilities. Each of these instrumentations are immediately
-available to all users of the MLIR pass framework.
-
-#### Pass Timing
-
-The PassTiming instrumentation provides timing information about the execution
-of passes and computation of analyses. This provides a quick glimpse into what
-passes are taking the most time to execute, as well as how much of an effect
-your pass has on the total execution time of the pipeline. Users can enable this
-instrumentation directly on the PassManager via `enableTiming`. This
-instrumentation is also made available in mlir-opt via the `-pass-timing` flag.
-The PassTiming instrumentation provides several different display modes for the
-timing results, each of which is described below:
-
-##### List Display Mode
-
-In this mode, the results are displayed in a list sorted by total time with each
-pass/analysis instance aggregated into one unique result. This view is useful
-for getting an overview of what analyses/passes are taking the most time in a
-pipeline. This display mode is available in mlir-opt via
-`-pass-timing-display=list`.
-
-```shell
-$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing -pass-timing-display=list
-
-===-------------------------------------------------------------------------===
-                      ... Pass execution timing report ...
-===-------------------------------------------------------------------------===
-  Total Execution Time: 0.0203 seconds
-
-   ---Wall Time---  --- Name ---
-   0.0047 ( 55.9%)  Canonicalizer
-   0.0019 ( 22.2%)  VerifierPass
-   0.0016 ( 18.5%)  LLVMLoweringPass
-   0.0003 (  3.4%)  CSE
-   0.0002 (  1.9%)  (A) DominanceInfo
-   0.0084 (100.0%)  Total
-```
-
-##### Pipeline Display Mode
-
-In this mode, the results are displayed in a nested pipeline view that mirrors
-the internal pass pipeline that is being executed in the pass manager. This view
-is useful for understanding specifically which parts of the pipeline are taking
-the most time, and can also be used to identify when analyses are being
-invalidated and recomputed. This is the default display mode.
-
-```shell
-$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing
-
-===-------------------------------------------------------------------------===
-                      ... Pass execution timing report ...
-===-------------------------------------------------------------------------===
-  Total Execution Time: 0.0249 seconds
-
-   ---Wall Time---  --- Name ---
-   0.0058 ( 70.8%)  'func' Pipeline
-   0.0004 (  4.3%)    CSE
-   0.0002 (  2.6%)      (A) DominanceInfo
-   0.0004 (  4.8%)    VerifierPass
-   0.0046 ( 55.4%)    Canonicalizer
-   0.0005 (  6.2%)    VerifierPass
-   0.0005 (  5.8%)  VerifierPass
-   0.0014 ( 17.2%)  LLVMLoweringPass
-   0.0005 (  6.2%)  VerifierPass
-   0.0082 (100.0%)  Total
-```
-
-##### Multi-threaded Pass Timing
-
-When multi-threading is enabled in the pass manager the meaning of the display
-slightly changes. First, a new timing column is added, `User Time`, that
-displays the total time spent across all threads. Secondly, the `Wall Time`
-column displays the longest individual time spent amongst all of the threads.
-This means that the `Wall Time` column will continue to give an indicator on the
-perceived time, or clock time, whereas the `User Time` will display the total
-cpu time.
-
-```shell
-$ mlir-opt foo.mlir -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing
-
-===-------------------------------------------------------------------------===
-                      ... Pass execution timing report ...
-===-------------------------------------------------------------------------===
-  Total Execution Time: 0.0078 seconds
-
-   ---User Time---   ---Wall Time---  --- Name ---
-   0.0177 ( 88.5%)     0.0057 ( 71.3%)  'func' Pipeline
-   0.0044 ( 22.0%)     0.0015 ( 18.9%)    CSE
-   0.0029 ( 14.5%)     0.0012 ( 15.2%)      (A) DominanceInfo
-   0.0038 ( 18.9%)     0.0015 ( 18.7%)    VerifierPass
-   0.0089 ( 44.6%)     0.0025 ( 31.1%)    Canonicalizer
-   0.0006 (  3.0%)     0.0002 (  2.6%)    VerifierPass
-   0.0004 (  2.2%)     0.0004 (  5.4%)  VerifierPass
-   0.0013 (  6.5%)     0.0013 ( 16.3%)  LLVMLoweringPass
-   0.0006 (  2.8%)     0.0006 (  7.0%)  VerifierPass
-   0.0200 (100.0%)     0.0081 (100.0%)  Total
-```
-
-#### IR Printing
-
-When debugging it is often useful to dump the IR at various stages of a pass
-pipeline. This is where the IR printing instrumentation comes into play. This
-instrumentation allows for conditionally printing the IR before and after pass
-execution by optionally filtering on the pass being executed. This
-instrumentation can be added directly to the PassManager via the
-`enableIRPrinting` method. `mlir-opt` provides a few useful flags for utilizing
-this instrumentation:
-
-*   `print-ir-before=(comma-separated-pass-list)`
-    *   Print the IR before each of the passes provided within the pass list.
-*   `print-ir-before-all`
-    *   Print the IR before every pass in the pipeline.
-
-```shell
-$ mlir-opt foo.mlir -pass-pipeline='func(cse)' -print-ir-before=cse
-
-*** IR Dump Before CSE ***
-func @simple_constant() -> (i32, i32) {
-  %c1_i32 = constant 1 : i32
-  %c1_i32_0 = constant 1 : i32
-  return %c1_i32, %c1_i32_0 : i32, i32
-}
-```
-
-*   `print-ir-after=(comma-separated-pass-list)`
-    *   Print the IR after each of the passes provided within the pass list.
-*   `print-ir-after-all`
-    *   Print the IR after every pass in the pipeline.
-
-```shell
-$ mlir-opt foo.mlir -pass-pipeline='func(cse)' -print-ir-after=cse
-
-*** IR Dump After CSE ***
-func @simple_constant() -> (i32, i32) {
-  %c1_i32 = constant 1 : i32
-  return %c1_i32, %c1_i32 : i32, i32
-}
-```
-
-*   `print-ir-after-change`
-    *   Only print the IR after a pass if the pass mutated the IR. This helps to
-        reduce the number of IR dumps for "uninteresting" passes.
-    *   Note: Changes are detected by comparing a hash of the operation before
-        and after the pass. This adds additional run-time to compute the hash of
-        the IR, and in some rare cases may result in false-positives depending
-        on the collision rate of the hash algorithm used.
-    *   Note: This option should be used in unison with one of the other
-        'print-ir-after' options above, as this option alone does not enable
-        printing.
-
-```shell
-$ mlir-opt foo.mlir -pass-pipeline='func(cse,cse)' -print-ir-after=cse -print-ir-after-change
-
-*** IR Dump After CSE ***
-func @simple_constant() -> (i32, i32) {
-  %c1_i32 = constant 1 : i32
-  return %c1_i32, %c1_i32 : i32, i32
-}
-```
-
-*   `print-ir-module-scope`
-    *   Always print the top-level module operation, regardless of pass type or
-        operation nesting level.
-    *   Note: Printing at module scope should only be used when multi-threading
-        is disabled(`-disable-pass-threading`)
-
-```shell
-$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse)' -print-ir-after=cse -print-ir-module-scope
-
-*** IR Dump After CSE ***  ('func' operation: @bar)
-func @bar(%arg0: f32, %arg1: f32) -> f32 {
-  ...
-}
-
-func @simple_constant() -> (i32, i32) {
-  %c1_i32 = constant 1 : i32
-  %c1_i32_0 = constant 1 : i32
-  return %c1_i32, %c1_i32_0 : i32, i32
-}
-
-*** IR Dump After CSE ***  ('func' operation: @simple_constant)
-func @bar(%arg0: f32, %arg1: f32) -> f32 {
-  ...
-}
-
-func @simple_constant() -> (i32, i32) {
-  %c1_i32 = constant 1 : i32
-  return %c1_i32, %c1_i32 : i32, i32
-}
-```
-
-## Crash and Failure Reproduction
-
-The [pass manager](#pass-manager) in MLIR contains a builtin mechanism to
-generate reproducibles in the even of a crash, or a
-[pass failure](#pass-failure). This functionality can be enabled via
-`PassManager::enableCrashReproducerGeneration` or via the command line flag
-`pass-pipeline-crash-reproducer`. In either case, an argument is provided that
-corresponds to the output `.mlir` file name that the reproducible should be
-written to. The reproducible contains the configuration of the pass manager that
-was executing, as well as the initial IR before any passes were run. A potential
-reproducible may have the form:
-
-```mlir
-// configuration: -pass-pipeline='func(cse, canonicalize), inline'
-// note: verifyPasses=false
-
-module {
-  func @foo() {
-    ...
-  }
-}
-```
diff --git a/third_party/mlir/g3doc/includes/img/index-map.svg b/third_party/mlir/g3doc/includes/img/index-map.svg
deleted file mode 100644
index 6004c2da362..00000000000
--- a/third_party/mlir/g3doc/includes/img/index-map.svg
+++ /dev/null
@@ -1,380 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   version="1.1"
-   viewBox="0 0 573.56073 250.77821"
-   stroke-miterlimit="10"
-   id="svg133"
-   sodipodi:docname="index-map.svg"
-   width="573.56073"
-   height="250.77821"
-   style="fill:none;stroke:none;stroke-linecap:square;stroke-miterlimit:10"
-   inkscape:version="0.92.2pre0 (973e216, 2017-07-25)">
-  <metadata
-     id="metadata139">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <defs
-     id="defs137" />
-  <sodipodi:namedview
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1"
-     objecttolerance="10"
-     gridtolerance="10"
-     guidetolerance="10"
-     inkscape:pageopacity="0"
-     inkscape:pageshadow="2"
-     inkscape:window-width="2145"
-     inkscape:window-height="1372"
-     id="namedview135"
-     showgrid="false"
-     fit-margin-top="0"
-     fit-margin-left="0"
-     fit-margin-right="0"
-     fit-margin-bottom="0"
-     inkscape:zoom="0.45"
-     inkscape:cx="685.47816"
-     inkscape:cy="101.31222"
-     inkscape:window-x="413"
-     inkscape:window-y="149"
-     inkscape:window-maximized="0"
-     inkscape:current-layer="svg133" />
-  <clipPath
-     id="p.0">
-    <path
-       d="M 0,0 H 1280 V 960 H 0 Z"
-       id="path2"
-       inkscape:connector-curvature="0"
-       style="clip-rule:nonzero" />
-  </clipPath>
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path5"
-     d="M -12.118111,-9.267716 H 1267.8819 V 950.73228 H -12.118111 Z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path7"
-     d="M 94.598429,41.62992 H 118.063 V 68.338584 H 94.598429 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path9"
-     d="M 94.598429,41.62992 H 118.063 V 68.338584 H 94.598429 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path11"
-     d="m 111.1453,60.716754 q -0.92188,0.765625 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.984375 0,-0.71875 0.32812,-1.296875 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.359375 1.1875,-0.546875 0.46875,-0.125 1.45313,-0.25 1.98437,-0.234375 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.421875 0,-1 -0.46875,-1.421875 -0.625,-0.546875 -1.875,-0.546875 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.421875 l -1.60937,-0.21875 q 0.21875,-1.015625 0.71875,-1.640625 0.5,-0.640625 1.45312,-0.984375 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.296875 0.78125,0.28125 1.14062,0.734375 0.375,0.4375 0.51563,1.109375 0.0781,0.421875 0.0781,1.515625 v 2.1875 q 0,2.28125 0.10938,2.890625 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.515625 -0.32812,-1.1875 z m -0.14063,-3.671875 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.140625 -1.4375,0.328125 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.421875 1.09375,-1.140625 0.26562,-0.5625 0.26562,-1.640625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path13"
-     d="m 118.06299,41.62992 h 23.46457 v 26.708664 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path15"
-     d="m 118.06299,41.62992 h 23.46457 v 26.708664 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path17"
-     d="m 129.79737,61.904254 h -1.51563 V 48.544879 h 1.64063 v 4.765625 q 1.04687,-1.296875 2.65625,-1.296875 0.89062,0 1.6875,0.359375 0.79687,0.359375 1.3125,1.015625 0.51562,0.640625 0.79687,1.5625 0.29688,0.921875 0.29688,1.96875 0,2.484375 -1.23438,3.84375 -1.21875,1.359375 -2.95312,1.359375 -1.70313,0 -2.6875,-1.4375 z m -0.0156,-4.90625 q 0,1.734375 0.48438,2.515625 0.76562,1.265625 2.09375,1.265625 1.07812,0 1.85937,-0.9375 0.78125,-0.9375 0.78125,-2.78125 0,-1.890625 -0.75,-2.796875 -0.75,-0.90625 -1.82812,-0.90625 -1.0625,0 -1.85938,0.9375 -0.78125,0.9375 -0.78125,2.703125 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path19"
-     d="m 141.52757,41.62992 h 23.46455 v 26.708664 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path21"
-     d="m 141.52757,41.62992 h 23.46455 v 26.708664 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path23"
-     d="m 158.07444,58.357374 1.60937,0.21875 q -0.26562,1.65625 -1.35937,2.609375 -1.07813,0.9375 -2.67188,0.9375 -1.98437,0 -3.1875,-1.296875 -1.20312,-1.296875 -1.20312,-3.71875 0,-1.578125 0.51562,-2.75 0.51563,-1.171875 1.57813,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57812,0 2.57812,0.796875 1,0.796875 1.28125,2.265625 l -1.59375,0.234375 q -0.23437,-0.96875 -0.8125,-1.453125 -0.57812,-0.5 -1.39062,-0.5 -1.23438,0 -2.01563,0.890625 -0.78125,0.890625 -0.78125,2.8125 0,1.953125 0.75,2.84375 0.75,0.875 1.95313,0.875 0.96875,0 1.60937,-0.59375 0.65625,-0.59375 0.82813,-1.828125 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path25"
-     d="M 94.598429,68.338584 H 118.063 v 26.70866 H 94.598429 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path27"
-     d="M 94.598429,68.338584 H 118.063 v 26.70866 H 94.598429 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path29"
-     d="m 111.09843,88.612914 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.640625 -0.96875,-0.640625 -1.5,-1.78125 -0.53125,-1.140625 -0.53125,-2.625 0,-1.453125 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.359375 1.10937,0.953125 v -4.796875 h 1.64063 v 13.359375 z m -5.17188,-4.828125 q 0,1.859375 0.78125,2.78125 0.78125,0.921875 1.84375,0.921875 1.07813,0 1.82813,-0.875 0.75,-0.890625 0.75,-2.6875 0,-1.984375 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.890625 -0.73438,0.890625 -0.73438,2.8125 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path31"
-     d="m 118.06299,68.338584 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path33"
-     d="m 118.06299,68.338584 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path35"
-     d="m 134.92237,85.503539 1.6875,0.203125 q -0.40625,1.484375 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.296875 -1.23438,-1.3125 -1.23438,-3.671875 0,-2.453125 1.25,-3.796875 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.328125 1.23437,1.3125 1.23437,3.703125 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.453125 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.484375 1.01563,-1.515625 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.828125 -0.78125,-0.953125 -2.03125,-0.953125 -1.125,0 -1.90625,0.765625 -0.76562,0.75 -0.84375,2.015625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path37"
-     d="m 141.52757,68.338584 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path39"
-     d="m 141.52757,68.338584 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path41"
-     d="m 152.29319,88.612914 v -8.40625 h -1.45313 v -1.265625 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.453125 0.23438,-0.640625 0.82813,-1.03125 0.59375,-0.390625 1.67187,-0.390625 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.09375 -0.95312,-0.09375 -0.75,0 -1.0625,0.328125 -0.3125,0.3125 -0.3125,1.1875 v 0.890625 h 1.89062 v 1.265625 h -1.89062 v 8.40625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path43"
-     d="M 94.598429,95.047244 H 118.063 V 121.7559 H 94.598429 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path45"
-     d="M 94.598429,95.047244 H 118.063 V 121.7559 H 94.598429 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path47"
-     d="m 104.5203,116.11844 1.59375,0.23438 q 0.10937,0.75 0.5625,1.07812 0.60937,0.45313 1.67187,0.45313 1.14063,0 1.75,-0.45313 0.625,-0.45312 0.84375,-1.26562 0.125,-0.5 0.10938,-2.10938 -1.0625,1.26563 -2.67188,1.26563 -2,0 -3.09375,-1.4375 -1.09375,-1.4375 -1.09375,-3.45313 0,-1.39062 0.5,-2.5625 0.51563,-1.17187 1.45313,-1.79687 0.95312,-0.64063 2.25,-0.64063 1.70312,0 2.8125,1.375 v -1.15625 h 1.51562 v 8.35938 q 0,2.26562 -0.46875,3.20312 -0.45312,0.9375 -1.45312,1.48438 -0.98438,0.54688 -2.45313,0.54688 -1.71875,0 -2.79687,-0.78126 -1.0625,-0.76563 -1.03125,-2.34375 z m 1.35937,-5.8125 q 0,1.90625 0.75,2.78125 0.76563,0.875 1.90625,0.875 1.125,0 1.89063,-0.85937 0.76562,-0.875 0.76562,-2.73438 0,-1.78125 -0.79687,-2.67187 -0.78125,-0.90625 -1.89063,-0.90625 -1.09375,0 -1.85937,0.89062 -0.76563,0.875 -0.76563,2.625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path49"
-     d="m 118.06299,95.047244 h 23.46457 V 121.7559 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path51"
-     d="m 118.06299,95.047244 h 23.46457 V 121.7559 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path53"
-     d="M 128.29737,115.32157 V 101.9622 h 1.64062 v 4.79687 q 1.14063,-1.32812 2.89063,-1.32812 1.07812,0 1.85937,0.42187 0.79688,0.42188 1.14063,1.17188 0.34375,0.75 0.34375,2.17187 v 6.125 h -1.64063 v -6.125 q 0,-1.23437 -0.53125,-1.79687 -0.53125,-0.5625 -1.51562,-0.5625 -0.71875,0 -1.35938,0.39062 -0.64062,0.375 -0.92187,1.01563 -0.26563,0.64062 -0.26563,1.78125 v 5.29687 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path55"
-     d="m 141.52757,95.047244 h 23.46455 V 121.7559 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path57"
-     d="m 141.52757,95.047244 h 23.46455 V 121.7559 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path59"
-     d="m 152.42181,103.85282 v -1.89062 h 1.64062 v 1.89062 z m 0,11.46875 v -9.67187 h 1.64062 v 9.67187 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path61"
-     d="m 118.06299,196.8609 h 23.46457 v 26.70865 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path63"
-     d="m 118.06299,196.8609 h 23.46457 v 26.70865 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path65"
-     d="m 134.92237,214.02584 1.6875,0.20313 q -0.40625,1.48437 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29688 -1.23438,-1.3125 -1.23438,-3.67187 0,-2.45313 1.25,-3.79688 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.3125 1.23437,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48438 1.01563,-1.51563 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76562,0.75 -0.84375,2.01562 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path67"
-     d="m 141.52757,196.8609 h 23.46455 v 26.70865 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path69"
-     d="m 141.52757,196.8609 h 23.46455 v 26.70865 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path71"
-     d="m 152.15257,217.13522 v -8.40625 h -1.45313 v -1.26563 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.45312 0.23438,-0.64063 0.82813,-1.03125 0.59375,-0.39063 1.67187,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95312,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89062 v 1.26563 h -1.89062 v 8.40625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path73"
-     d="m 118.06299,223.56955 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path75"
-     d="m 118.06299,223.56955 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path77"
-     d="M 128.29737,243.84388 V 230.4845 h 1.64062 v 4.79688 q 1.14063,-1.32813 2.89063,-1.32813 1.07812,0 1.85937,0.42188 0.79688,0.42187 1.14063,1.17187 0.34375,0.75 0.34375,2.17188 v 6.125 h -1.64063 v -6.125 q 0,-1.23438 -0.53125,-1.79688 -0.53125,-0.5625 -1.51562,-0.5625 -0.71875,0 -1.35938,0.39063 -0.64062,0.375 -0.92187,1.01562 -0.26563,0.64063 -0.26563,1.78125 v 5.29688 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path79"
-     d="m 141.52757,223.56955 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path81"
-     d="m 141.52757,223.56955 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path83"
-     d="m 151.76194,232.37513 v -1.89063 h 1.64062 v 1.89063 z m 0,11.46875 V 234.172 h 1.64062 v 9.67188 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path85"
-     d="m 163.38583,132.35694 h 464.50394 v 64.50395 H 163.38583 Z" />
-  <path
-     style="fill:#434343;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path87"
-     d="m 174.1202,159.27694 v -13.35938 h 1.76563 v 13.35938 z m 4.6833,0 v -9.67188 h 1.46875 v 1.375 q 1.0625,-1.59375 3.07813,-1.59375 0.875,0 1.60937,0.3125 0.73438,0.3125 1.09375,0.82813 0.375,0.5 0.51563,1.20312 0.0937,0.45313 0.0937,1.59375 v 5.95313 h -1.64063 v -5.89063 q 0,-1 -0.20312,-1.48437 -0.1875,-0.5 -0.67188,-0.79688 -0.48437,-0.29687 -1.14062,-0.29687 -1.04688,0 -1.8125,0.67187 -0.75,0.65625 -0.75,2.51563 v 5.28125 z m 16.64135,0 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64063 -0.96875,-0.64062 -1.5,-1.78125 -0.53125,-1.14062 -0.53125,-2.625 0,-1.45312 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35938 z m -5.17188,-4.82813 q 0,1.85938 0.78125,2.78125 0.78125,0.92188 1.84375,0.92188 1.07813,0 1.82813,-0.875 0.75,-0.89063 0.75,-2.6875 0,-1.98438 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89062 -0.73438,0.89063 -0.73438,2.8125 z m 15.90697,1.71875 1.6875,0.20313 q -0.40625,1.48437 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29688 -1.23438,-1.3125 -1.23438,-3.67187 0,-2.45313 1.25,-3.79688 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.3125 1.23437,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48438 1.01563,-1.51563 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76562,0.75 -0.84375,2.01562 z m 8.0476,5.76563 3.53125,-5.03125 -3.26563,-4.64063 h 2.04688 l 1.48437,2.26563 q 0.42188,0.64062 0.67188,1.07812 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.54688 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 15.76142,0 v -13.35938 h 2.65625 l 3.15625,9.45313 q 0.4375,1.32812 0.64063,1.98437 0.23437,-0.73437 0.70312,-2.14062 l 3.20313,-9.29688 h 2.375 v 13.35938 h -1.70313 v -11.17188 l -3.875,11.17188 h -1.59375 l -3.85937,-11.375 v 11.375 z m 21.69707,-1.1875 q -0.92187,0.76562 -1.76562,1.09375 -0.82814,0.3125 -1.79689,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98438 0,-0.71875 0.32812,-1.29687 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35938 1.1875,-0.54688 0.46875,-0.125 1.45313,-0.25 1.98439,-0.23437 2.92189,-0.5625 0.0156,-0.34375 0.0156,-0.42187 0,-1 -0.46875,-1.42188 -0.625,-0.54687 -1.87501,-0.54687 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42187 l -1.60937,-0.21875 q 0.21875,-1.01562 0.71875,-1.64062 0.5,-0.64063 1.45312,-0.98438 0.95313,-0.34375 2.18752,-0.34375 1.25,0 2.01562,0.29688 0.78125,0.28125 1.14063,0.73437 0.375,0.4375 0.51562,1.10938 0.0781,0.42187 0.0781,1.51562 v 2.1875 q 0,2.28125 0.10937,2.89063 0.10938,0.59375 0.40625,1.15625 h -1.70312 q -0.26563,-0.51563 -0.32813,-1.1875 z m -0.14062,-3.67188 q -0.89063,0.375 -2.67189,0.625 -1.01563,0.14063 -1.4375,0.32813 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.93752,0 1.67189,-0.40625 0.75,-0.42188 1.09375,-1.14063 0.26563,-0.5625 0.26563,-1.64062 z m 4.20382,8.5625 v -13.375 h 1.48438 v 1.25 q 0.53125,-0.73437 1.1875,-1.09375 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.64063 0.95313,0.625 1.4375,1.79687 0.48438,1.15625 0.48438,2.54688 0,1.48437 -0.53125,2.67187 -0.53125,1.1875 -1.54688,1.82813 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70312 z m 1.48438,-8.48437 q 0,1.85937 0.75,2.76562 0.76562,0.89063 1.82812,0.89063 1.09375,0 1.875,-0.92188 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.76562 -0.75,-0.92188 -1.8125,-0.92188 -1.04688,0 -1.85938,0.98438 -0.79687,0.96875 -0.79687,2.84375 z m 9.34448,-3.03125 v -1.85938 h 1.85937 v 1.85938 z m 0,7.8125 v -1.875 h 1.85937 v 1.875 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path89"
-     d="m 173.32333,181.51132 0.79687,-3.89063 h -1.54687 v -1.35937 h 1.8125 l 0.67187,-3.29688 h -2.48437 v -1.35937 h 2.76562 l 0.79688,-3.90625 h 1.35937 l -0.79687,3.90625 h 2.875 l 0.79687,-3.90625 h 1.375 l -0.79687,3.90625 h 1.57812 v 1.35937 h -1.84375 l -0.6875,3.29688 h 2.53125 v 1.35937 h -2.8125 l -0.78125,3.89063 h -1.375 l 0.78125,-3.89063 h -2.85937 l -0.78125,3.89063 z m 2.4375,-5.25 h 2.85937 l 0.6875,-3.29688 h -2.875 z m 8.23509,-6.45313 v -1.89062 h 1.64063 v 1.89062 z m 0,11.46875 v -9.67187 h 1.64063 v 9.67187 z m 4.14482,0 v -9.67187 h 1.46875 v 1.35937 q 0.45313,-0.71875 1.20313,-1.14062 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45312 0.6875,0.4375 0.96875,1.23438 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79687 0.78125,0.79688 0.78125,2.45313 v 6.64062 h -1.64063 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70312 -0.42188,-0.26563 -0.98438,-0.26563 -1.01562,0 -1.6875,0.6875 -0.67187,0.67188 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85937,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 21.85331,-1.1875 q -0.92188,0.76563 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98437 0,-0.71875 0.32812,-1.29688 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35937 1.1875,-0.54687 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23438 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42188 0,-1 -0.46875,-1.42187 -0.625,-0.54688 -1.875,-0.54688 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42188 l -1.60937,-0.21875 q 0.21875,-1.01563 0.71875,-1.64063 0.5,-0.64062 1.45312,-0.98437 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29687 0.78125,0.28125 1.14062,0.73438 0.375,0.4375 0.51563,1.10937 0.0781,0.42188 0.0781,1.51563 v 2.1875 q 0,2.28125 0.10938,2.89062 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51562 -0.32812,-1.1875 z m -0.14063,-3.67187 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14062 -1.4375,0.32812 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42187 1.09375,-1.14062 0.26562,-0.5625 0.26562,-1.64063 z m 4.20384,8.5625 v -13.375 h 1.48438 v 1.25 q 0.53125,-0.73438 1.1875,-1.09375 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.64062 0.95313,0.625 1.4375,1.79688 0.48438,1.15625 0.48438,2.54687 0,1.48438 -0.53125,2.67188 -0.53125,1.1875 -1.54688,1.82812 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70313 z m 1.48438,-8.48438 q 0,1.85938 0.75,2.76563 0.76562,0.89062 1.82812,0.89062 1.09375,0 1.875,-0.92187 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.76563 -0.75,-0.92187 -1.8125,-0.92187 -1.04688,0 -1.85938,0.98437 -0.79687,0.96875 -0.79687,2.84375 z m 9.01634,4.78125 v -13.35937 h 5.01562 q 1.53125,0 2.45313,0.40625 0.92187,0.40625 1.4375,1.25 0.53125,0.84375 0.53125,1.76562 0,0.85938 -0.46875,1.625 -0.45313,0.75 -1.39063,1.20313 1.20313,0.35937 1.85938,1.21875 0.65625,0.85937 0.65625,2.01562 0,0.9375 -0.40625,1.75 -0.39063,0.79688 -0.98438,1.23438 -0.57812,0.4375 -1.45312,0.67187 -0.875,0.21875 -2.15625,0.21875 z m 1.78125,-7.75 h 2.875 q 1.1875,0 1.6875,-0.14062 0.67187,-0.20313 1.01562,-0.67188 0.34375,-0.46875 0.34375,-1.17187 0,-0.65625 -0.32812,-1.15625 -0.3125,-0.51563 -0.90625,-0.70313 -0.59375,-0.1875 -2.03125,-0.1875 h -2.65625 z m 0,6.17188 h 3.3125 q 0.85937,0 1.20312,-0.0625 0.60938,-0.10938 1.01563,-0.35938 0.42187,-0.26562 0.6875,-0.75 0.26562,-0.48437 0.26562,-1.125 0,-0.75 -0.39062,-1.29687 -0.375,-0.54688 -1.0625,-0.76563 -0.67188,-0.23437 -1.95313,-0.23437 h -3.07812 z m 18.69357,0 v 1.57812 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17188,-1.78125 2.9375,-2.82812 0.76563,-1.04688 0.76563,-1.96875 0,-0.98438 -0.70313,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70312,0.70312 -0.70312,1.95312 l -1.6875,-0.17187 q 0.17187,-1.89063 1.29687,-2.875 1.14063,-0.98438 3.03125,-0.98438 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32813,1.57812 -0.32812,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 0.95386,1.57812 5.125,-13.35937 h 1.90625 l 5.46875,13.35937 h -2.01563 l -1.54687,-4.04687 h -5.59375 l -1.46875,4.04687 z m 3.85937,-5.48437 h 4.53125 l -1.40625,-3.70313 q -0.625,-1.6875 -0.9375,-2.76562 -0.26562,1.28125 -0.71875,2.54687 z m 18.15812,9.40625 q -1.35938,-1.70313 -2.29688,-4 -0.9375,-2.29688 -0.9375,-4.76563 0,-2.15625 0.70313,-4.14062 0.82812,-2.3125 2.53125,-4.59375 h 1.17187 q -1.09375,1.89062 -1.45312,2.70312 -0.54688,1.25 -0.875,2.625 -0.39063,1.70313 -0.39063,3.42188 0,4.375 2.71875,8.75 z m 9.3533,-3.92188 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35937 1.10937,0.95312 v -4.79687 h 1.64063 v 13.35937 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 8.82886,-1.76563 q 0,-2.35937 0.48438,-3.79687 0.48437,-1.45313 1.4375,-2.23438 0.96875,-0.78125 2.42187,-0.78125 1.07813,0 1.89063,0.4375 0.8125,0.42188 1.32812,1.25 0.53125,0.8125 0.82813,1.98438 0.3125,1.15625 0.3125,3.14062 0,2.35938 -0.48438,3.8125 -0.48437,1.4375 -1.45312,2.23438 -0.95313,0.78125 -2.42188,0.78125 -1.92187,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67188,0 q 0,3.29688 0.76562,4.39063 0.78125,1.07812 1.90625,1.07812 1.14063,0 1.90625,-1.09375 0.76563,-1.09375 0.76563,-4.375 0,-3.29687 -0.76563,-4.375 -0.76562,-1.07812 -1.92187,-1.07812 -1.125,0 -1.79688,0.95312 -0.85937,1.21875 -0.85937,4.5 z m 9.57882,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 16.21036,0 v -1.21875 q -0.90625,1.4375 -2.70312,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48437,-2.625 0.48438,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17188,-0.625 0.875,0 1.54687,0.375 0.6875,0.35937 1.10938,0.95312 v -4.79687 h 1.64062 v 13.35937 z m -5.17187,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07812,0 1.82812,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76562,-2.90625 -0.76563,-0.9375 -1.89063,-0.9375 -1.07812,0 -1.8125,0.89063 -0.73437,0.89062 -0.73437,2.8125 z m 15.00073,4.82812 h -1.64063 v -10.45312 q -0.59375,0.5625 -1.5625,1.14062 -0.95312,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64062 2.40625,-1.5625 1.03125,-0.92187 1.45313,-1.78125 h 1.0625 z m 5.73507,3.92188 h -1.1875 q 2.73438,-4.375 2.73438,-8.75 0,-1.71875 -0.39063,-3.39063 -0.3125,-1.375 -0.875,-2.625 -0.35937,-0.82812 -1.46875,-2.73437 h 1.1875 q 1.70313,2.28125 2.53125,4.59375 0.6875,1.98437 0.6875,4.14062 0,2.46875 -0.9375,4.76563 -0.9375,2.29687 -2.28125,4 z m 5.16581,-0.21875 v -17.0625 h 3.60937 v 1.35937 h -1.96875 v 14.34375 h 1.96875 v 1.35938 z m 4.76144,-8 1.65625,-0.14063 q 0.125,1 0.54688,1.64063 0.4375,0.64062 1.34375,1.04687 0.92187,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35938,-0.46875 -1.1875,-0.79687 -0.54688,-0.20313 -2.39063,-0.64063 -1.82812,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75 -0.46875,-1.67188 0,-1 0.57813,-1.875 0.57812,-0.89062 1.67187,-1.34375 1.10938,-0.45312 2.45313,-0.45312 1.48437,0 2.60937,0.48437 1.14063,0.46875 1.75,1.40625 0.60938,0.92188 0.65625,2.09375 l -1.6875,0.125 q -0.14062,-1.26562 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60937,0 -2.34375,0.59375 -0.73437,0.59375 -0.73437,1.42188 0,0.71875 0.53125,1.17187 0.5,0.46875 2.65625,0.96875 2.15625,0.48438 2.95312,0.84375 1.17188,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60937,2.01562 -0.60938,0.9375 -1.75,1.46875 -1.14063,0.51563 -2.57813,0.51563 -1.8125,0 -3.04687,-0.53125 -1.21875,-0.53125 -1.92188,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.38107,-2.29688 q 0,-2.35937 0.48438,-3.79687 0.48437,-1.45313 1.4375,-2.23438 0.96875,-0.78125 2.42187,-0.78125 1.07813,0 1.89063,0.4375 0.8125,0.42188 1.32812,1.25 0.53125,0.8125 0.82813,1.98438 0.3125,1.15625 0.3125,3.14062 0,2.35938 -0.48438,3.8125 -0.48437,1.4375 -1.45312,2.23438 -0.95313,0.78125 -2.42188,0.78125 -1.92187,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67188,0 q 0,3.29688 0.76562,4.39063 0.78125,1.07812 1.90625,1.07812 1.14063,0 1.90625,-1.09375 0.76563,-1.09375 0.76563,-4.375 0,-3.29687 -0.76563,-4.375 -0.76562,-1.07812 -1.92187,-1.07812 -1.125,0 -1.79688,0.95312 -0.85937,1.21875 -0.85937,4.5 z m 9.57883,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35938,0.64063 -1.15625,0.98438 l -0.45313,-0.70313 q 0.51563,-0.21875 0.76563,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 9.5541,-4.29687 1.65625,-0.14063 q 0.125,1 0.54688,1.64063 0.4375,0.64062 1.34375,1.04687 0.92187,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35938,-0.46875 -1.1875,-0.79687 -0.54688,-0.20313 -2.39063,-0.64063 -1.82812,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75 -0.46875,-1.67188 0,-1 0.57813,-1.875 0.57812,-0.89062 1.67187,-1.34375 1.10938,-0.45312 2.45313,-0.45312 1.48437,0 2.60937,0.48437 1.14063,0.46875 1.75,1.40625 0.60938,0.92188 0.65625,2.09375 l -1.6875,0.125 q -0.14062,-1.26562 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60937,0 -2.34375,0.59375 -0.73437,0.59375 -0.73437,1.42188 0,0.71875 0.53125,1.17187 0.5,0.46875 2.65625,0.96875 2.15625,0.48438 2.95312,0.84375 1.17188,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60937,2.01562 -0.60938,0.9375 -1.75,1.46875 -1.14063,0.51563 -2.57813,0.51563 -1.8125,0 -3.04687,-0.53125 -1.21875,-0.53125 -1.92188,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 18.55295,4.29687 h -1.64062 v -10.45312 q -0.59375,0.5625 -1.5625,1.14062 -0.95313,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64062 2.40625,-1.5625 1.03125,-0.92187 1.45312,-1.78125 h 1.0625 z m 7.39136,3.70313 h -3.60938 v -1.35938 h 1.96875 v -14.34375 h -1.96875 v -1.35937 h 3.60938 z m 6.99161,-7.71875 v -1.64063 h 5.03125 v 1.64063 z m 15.4783,-1.82813 -8.84375,3.78125 v -1.625 l 7.01562,-2.90625 -7.01562,-2.875 v -1.64062 l 8.84375,3.73437 z m 10.57825,9.76563 q -1.35938,-1.70313 -2.29688,-4 -0.9375,-2.29688 -0.9375,-4.76563 0,-2.15625 0.70313,-4.14062 0.82812,-2.3125 2.53125,-4.59375 h 1.17187 q -1.09375,1.89062 -1.45312,2.70312 -0.54688,1.25 -0.875,2.625 -0.39063,1.70313 -0.39063,3.42188 0,4.375 2.71875,8.75 z m 9.3533,-3.92188 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35937 1.10937,0.95312 v -4.79687 h 1.64063 v 13.35937 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 8.82886,-1.76563 q 0,-2.35937 0.48437,-3.79687 0.48438,-1.45313 1.4375,-2.23438 0.96875,-0.78125 2.42188,-0.78125 1.07812,0 1.89062,0.4375 0.8125,0.42188 1.32813,1.25 0.53125,0.8125 0.82812,1.98438 0.3125,1.15625 0.3125,3.14062 0,2.35938 -0.48437,3.8125 -0.48438,1.4375 -1.45313,2.23438 -0.95312,0.78125 -2.42187,0.78125 -1.92188,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67187,0 q 0,3.29688 0.76563,4.39063 0.78125,1.07812 1.90625,1.07812 1.14062,0 1.90625,-1.09375 0.76562,-1.09375 0.76562,-4.375 0,-3.29687 -0.76562,-4.375 -0.76563,-1.07812 -1.92188,-1.07812 -1.125,0 -1.79687,0.95312 -0.85938,1.21875 -0.85938,4.5 z m 17.77778,4.4375 v -3.67187 h -3.64063 v -1.51563 h 3.64063 v -3.64062 h 1.54687 v 3.64062 h 3.64063 v 1.51563 h -3.64063 v 3.67187 z m 12.25012,-2.14062 1.65625,-0.14063 q 0.125,1 0.54687,1.64063 0.4375,0.64062 1.34375,1.04687 0.92188,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.79687 -0.54687,-0.20313 -2.39062,-0.64063 -1.82813,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75 -0.46875,-1.67188 0,-1 0.57812,-1.875 0.57813,-0.89062 1.67188,-1.34375 1.10937,-0.45312 2.45312,-0.45312 1.48438,0 2.60938,0.48437 1.14062,0.46875 1.75,1.40625 0.60937,0.92188 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.26562 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.42188 0,0.71875 0.53125,1.17187 0.5,0.46875 2.65625,0.96875 2.15625,0.48438 2.95313,0.84375 1.17187,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60938,2.01562 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.51563 -2.57812,0.51563 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.38107,-2.29688 q 0,-2.35937 0.48438,-3.79687 0.48437,-1.45313 1.4375,-2.23438 0.96875,-0.78125 2.42187,-0.78125 1.07813,0 1.89063,0.4375 0.8125,0.42188 1.32812,1.25 0.53125,0.8125 0.82813,1.98438 0.3125,1.15625 0.3125,3.14062 0,2.35938 -0.48438,3.8125 -0.48437,1.4375 -1.45312,2.23438 -0.95313,0.78125 -2.42188,0.78125 -1.92187,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67188,0 q 0,3.29688 0.76562,4.39063 0.78125,1.07812 1.90625,1.07812 1.14063,0 1.90625,-1.09375 0.76563,-1.09375 0.76563,-4.375 0,-3.29687 -0.76563,-4.375 -0.76562,-1.07812 -1.92187,-1.07812 -1.125,0 -1.79688,0.95312 -0.85937,1.21875 -0.85937,4.5 z m 9.57882,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 16.21036,0 v -1.21875 q -0.90625,1.4375 -2.70312,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48437,-2.625 0.48438,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17188,-0.625 0.875,0 1.54687,0.375 0.6875,0.35937 1.10938,0.95312 v -4.79687 h 1.64062 v 13.35937 z m -5.17187,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07812,0 1.82812,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76562,-2.90625 -0.76563,-0.9375 -1.89063,-0.9375 -1.07812,0 -1.8125,0.89063 -0.73437,0.89062 -0.73437,2.8125 z m 15.00073,4.82812 h -1.64063 v -10.45312 q -0.59375,0.5625 -1.5625,1.14062 -0.95312,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64062 2.40625,-1.5625 1.03125,-0.92187 1.45313,-1.78125 h 1.0625 z m 13.27777,-2.15625 v -3.67187 h -3.64063 v -1.51563 h 3.64063 v -3.64062 h 1.54687 v 3.64062 h 3.64063 v 1.51563 h -3.64063 v 3.67187 z m 12.25012,-2.14062 1.65625,-0.14063 q 0.125,1 0.54688,1.64063 0.4375,0.64062 1.34375,1.04687 0.92187,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35938,-0.46875 -1.1875,-0.79687 -0.54688,-0.20313 -2.39063,-0.64063 -1.82812,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75 -0.46875,-1.67188 0,-1 0.57813,-1.875 0.57812,-0.89062 1.67187,-1.34375 1.10938,-0.45312 2.45313,-0.45312 1.48437,0 2.60937,0.48437 1.14063,0.46875 1.75,1.40625 0.60938,0.92188 0.65625,2.09375 l -1.6875,0.125 q -0.14062,-1.26562 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60937,0 -2.34375,0.59375 -0.73437,0.59375 -0.73437,1.42188 0,0.71875 0.53125,1.17187 0.5,0.46875 2.65625,0.96875 2.15625,0.48438 2.95312,0.84375 1.17188,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60937,2.01562 -0.60938,0.9375 -1.75,1.46875 -1.14063,0.51563 -2.57813,0.51563 -1.8125,0 -3.04687,-0.53125 -1.21875,-0.53125 -1.92188,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 18.55292,4.29687 h -1.64063 v -10.45312 q -0.59375,0.5625 -1.5625,1.14062 -0.95312,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64062 2.40625,-1.5625 1.03125,-0.92187 1.45313,-1.78125 h 1.0625 z m 5.7351,3.92188 h -1.1875 q 2.73438,-4.375 2.73438,-8.75 0,-1.71875 -0.39063,-3.39063 -0.3125,-1.375 -0.875,-2.625 -0.35937,-0.82812 -1.46875,-2.73437 h 1.1875 q 1.70313,2.28125 2.53125,4.59375 0.6875,1.98437 0.6875,4.14062 0,2.46875 -0.9375,4.76563 -0.9375,2.29687 -2.28125,4 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path91"
-     d="M 73.383199,62.081364 H 85.761152 V 78.364827 H 73.383199 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path93"
-     d="m 77.995529,75.816074 v -1.890625 h 1.640625 v 1.890625 z m 0,11.46875 v -9.671875 h 1.640625 v 9.671875 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path95"
-     d="M 128.95013,0 H 156.4147 V 33.007874 H 128.95013 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path97"
-     d="m 139.16888,15.466874 v -1.90625 h 1.64062 v 1.90625 z m -2.07813,15.203123 0.3125,-1.390625 q 0.5,0.125 0.78125,0.125 0.5,0 0.73438,-0.328125 0.25,-0.328125 0.25,-1.671875 V 17.248124 h 1.64062 v 10.203123 q 0,1.78125 -0.46875,2.484375 -0.59375,0.90625 -1.96875,0.90625 -0.65625,0 -1.28125,-0.171875 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path99"
-     d="M 0,25.010498 H 128.18896 V 46.490814 H 0 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path101"
-     d="M 13.359375,55.852374 Q 12,54.149249 11.0625,51.852374 10.125,49.555499 10.125,47.086749 q 0,-2.15625 0.703125,-4.140625 0.828125,-2.3125 2.53125,-4.59375 h 1.171875 q -1.09375,1.890625 -1.453125,2.703125 -0.546875,1.25 -0.875,2.625 -0.390625,1.703125 -0.390625,3.421875 0,4.375 2.71875,8.75 z m 2.697052,-8.21875 1.65625,-0.140625 q 0.125,1 0.546875,1.640625 0.4375,0.640625 1.34375,1.046875 0.921875,0.390625 2.0625,0.390625 1,0 1.78125,-0.296875 0.78125,-0.296875 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.359375,-0.46875 -1.1875,-0.796875 -0.546875,-0.203125 -2.390625,-0.640625 -1.828125,-0.453125 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.234375 -0.46875,-0.75 -0.46875,-1.671875 0,-1 0.578125,-1.875 0.578125,-0.890625 1.671875,-1.34375 1.109375,-0.453125 2.453125,-0.453125 1.484375,0 2.609375,0.484375 1.140625,0.46875 1.75,1.40625 0.609375,0.921875 0.65625,2.09375 l -1.6875,0.125 q -0.140625,-1.265625 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.609375,0 -2.34375,0.59375 -0.734375,0.59375 -0.734375,1.421875 0,0.71875 0.53125,1.171875 0.5,0.46875 2.65625,0.96875 2.15625,0.484375 2.953125,0.84375 1.171875,0.53125 1.71875,1.359375 0.5625,0.828125 0.5625,1.90625 0,1.0625 -0.609375,2.015625 -0.609375,0.9375 -1.75,1.46875 -1.140625,0.515625 -2.578125,0.515625 -1.8125,0 -3.046875,-0.53125 -1.21875,-0.53125 -1.921875,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z M 28.4375,45.336749 q 0,-2.359375 0.484375,-3.796875 0.484375,-1.453125 1.4375,-2.234375 0.96875,-0.78125 2.421875,-0.78125 1.078125,0 1.890625,0.4375 0.8125,0.421875 1.328125,1.25 0.53125,0.8125 0.828125,1.984375 0.3125,1.15625 0.3125,3.140625 0,2.359375 -0.484375,3.8125 -0.484375,1.4375 -1.453125,2.234375 -0.953125,0.78125 -2.421875,0.78125 -1.921875,0 -3.03125,-1.390625 -1.3125,-1.671875 -1.3125,-5.4375 z m 1.671875,0 q 0,3.296875 0.765625,4.390625 0.78125,1.078125 1.90625,1.078125 1.140625,0 1.90625,-1.09375 0.765625,-1.09375 0.765625,-4.375 0,-3.296875 -0.765625,-4.375 -0.765625,-1.078125 -1.921875,-1.078125 -1.125,0 -1.796875,0.953125 -0.859375,1.21875 -0.859375,4.5 z m 9.578842,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.359375,0.640625 -1.15625,0.984375 l -0.453125,-0.703125 q 0.515625,-0.21875 0.765625,-0.671875 0.25,-0.4375 0.28125,-1.265625 z m 9.554108,-4.296875 1.65625,-0.140625 q 0.125,1 0.546875,1.640625 0.4375,0.640625 1.34375,1.046875 0.921875,0.390625 2.0625,0.390625 1,0 1.78125,-0.296875 0.78125,-0.296875 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.359375,-0.46875 -1.1875,-0.796875 -0.546875,-0.203125 -2.390625,-0.640625 -1.828125,-0.453125 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.234375 -0.46875,-0.75 -0.46875,-1.671875 0,-1 0.578125,-1.875 0.578125,-0.890625 1.671875,-1.34375 1.109375,-0.453125 2.453125,-0.453125 1.484375,0 2.609375,0.484375 1.140625,0.46875 1.75,1.40625 0.609375,0.921875 0.65625,2.09375 l -1.6875,0.125 q -0.140625,-1.265625 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.609375,0 -2.34375,0.59375 -0.734375,0.59375 -0.734375,1.421875 0,0.71875 0.53125,1.171875 0.5,0.46875 2.65625,0.96875 2.15625,0.484375 2.953125,0.84375 1.171875,0.53125 1.71875,1.359375 0.5625,0.828125 0.5625,1.90625 0,1.0625 -0.609375,2.015625 -0.609375,0.9375 -1.75,1.46875 -1.140625,0.515625 -2.578125,0.515625 -1.8125,0 -3.046875,-0.53125 -1.21875,-0.53125 -1.921875,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 18.552948,4.296875 H 66.154648 V 41.477374 q -0.59375,0.5625 -1.5625,1.140625 -0.953125,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.640625 2.40625,-1.5625 1.03125,-0.921875 1.453125,-1.78125 h 1.0625 z m 5.735092,3.921875 h -1.1875 q 2.734375,-4.375 2.734375,-8.75 0,-1.71875 -0.390625,-3.390625 -0.3125,-1.375 -0.875,-2.625 -0.359375,-0.828125 -1.46875,-2.734375 h 1.1875 q 1.703125,2.28125 2.53125,4.59375 0.6875,1.984375 0.6875,4.140625 0,2.46875 -0.9375,4.765625 -0.9375,2.296875 -2.28125,4 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path103"
-     d="M 54.629919,54.490814 118.06299,68.349083" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path105"
-     d="M 54.629919,54.490814 112.20125,67.068466" />
-  <path
-     style="fill:#000000;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt"
-     inkscape:connector-curvature="0"
-     id="path107"
-     d="m 111.84871,68.682134 4.78606,-0.645081 -4.08098,-2.58226 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path109"
-     d="M 153.25985,196.8609 V 121.74278" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path111"
-     d="M 153.25985,196.8609 V 127.74278" />
-  <path
-     style="fill:#000000;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt"
-     inkscape:connector-curvature="0"
-     id="path113"
-     d="m 154.91157,127.74278 -1.65172,-4.5381 -1.65173,4.5381 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path115"
-     d="m 82.181099,203.58267 h 63.433071 v 31.2756 H 82.181099 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path117"
-     d="m 98.681099,230.50267 v -1.21875 q -0.90625,1.4375 -2.70312,1.4375 -1.15625,0 -2.125,-0.64063 -0.96875,-0.64062 -1.5,-1.78125 -0.53125,-1.14062 -0.53125,-2.625 0,-1.45312 0.48437,-2.625 0.48438,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17188,-0.625 0.875,0 1.54687,0.375 0.6875,0.35938 1.10938,0.95313 v -4.79688 h 1.640621 v 13.35938 z m -5.17187,-4.82813 q 0,1.85938 0.78125,2.78125 0.78125,0.92188 1.84375,0.92188 1.07812,0 1.82812,-0.875 0.75,-0.89063 0.75,-2.6875 0,-1.98438 -0.76562,-2.90625 -0.76563,-0.9375 -1.89063,-0.9375 -1.07812,0 -1.8125,0.89062 -0.73437,0.89063 -0.73437,2.8125 z m 8.828841,-1.76562 q 0,-2.35938 0.48437,-3.79688 0.48438,-1.45312 1.4375,-2.23437 0.96875,-0.78125 2.42188,-0.78125 1.07812,0 1.89062,0.4375 0.8125,0.42187 1.32813,1.25 0.53125,0.8125 0.82812,1.98437 0.3125,1.15625 0.3125,3.14063 0,2.35937 -0.48437,3.8125 -0.48438,1.4375 -1.45313,2.23437 -0.95312,0.78125 -2.42187,0.78125 -1.92188,0 -3.03125,-1.39062 -1.3125,-1.67188 -1.3125,-5.4375 z m 1.67187,0 q 0,3.29687 0.76563,4.39062 0.78125,1.07813 1.90625,1.07813 1.14062,0 1.90625,-1.09375 0.76562,-1.09375 0.76562,-4.375 0,-3.29688 -0.76562,-4.375 -0.76563,-1.07813 -1.92188,-1.07813 -1.125,0 -1.79687,0.95313 -0.85938,1.21875 -0.85938,4.5 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path119"
-     d="m 115.32809,160.28871 h 71.55905 v 31.27559 h -71.55905 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path121"
-     d="m 131.82809,187.20871 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64063 -0.96875,-0.64062 -1.5,-1.78125 -0.53125,-1.14062 -0.53125,-2.625 0,-1.45312 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35938 z m -5.17188,-4.82813 q 0,1.85938 0.78125,2.78125 0.78125,0.92188 1.84375,0.92188 1.07813,0 1.82813,-0.875 0.75,-0.89063 0.75,-2.6875 0,-1.98438 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89062 -0.73438,0.89063 -0.73438,2.8125 z m 15.00072,4.82813 h -1.64062 v -10.45313 q -0.59375,0.5625 -1.5625,1.14063 -0.95313,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64063 2.40625,-1.5625 1.03125,-0.92188 1.45312,-1.78125 h 1.0625 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path123"
-     d="m 171.88189,57.732284 h 440 v 26.708664 h -440 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path125"
-     d="M 182.61627,84.652284 V 71.292909 h 1.76562 v 13.359375 z m 4.6833,0 v -9.671875 h 1.46875 v 1.375 q 1.0625,-1.59375 3.07813,-1.59375 0.875,0 1.60937,0.3125 0.73438,0.3125 1.09375,0.828125 0.375,0.5 0.51563,1.203125 0.0937,0.453125 0.0937,1.59375 v 5.953125 h -1.64063 v -5.890625 q 0,-1 -0.20312,-1.484375 -0.1875,-0.5 -0.67188,-0.796875 -0.48437,-0.296875 -1.14062,-0.296875 -1.04688,0 -1.8125,0.671875 -0.75,0.65625 -0.75,2.515625 v 5.28125 z m 16.64135,0 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.640625 -0.96875,-0.640625 -1.5,-1.78125 -0.53125,-1.140625 -0.53125,-2.625 0,-1.453125 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.359375 1.10937,0.953125 v -4.796875 h 1.64063 v 13.359375 z m -5.17188,-4.828125 q 0,1.859375 0.78125,2.78125 0.78125,0.921875 1.84375,0.921875 1.07813,0 1.82813,-0.875 0.75,-0.890625 0.75,-2.6875 0,-1.984375 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.890625 -0.73438,0.890625 -0.73438,2.8125 z m 15.90697,1.71875 1.6875,0.203125 q -0.40625,1.484375 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.296875 -1.23438,-1.3125 -1.23438,-3.671875 0,-2.453125 1.25,-3.796875 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.328125 1.23437,1.3125 1.23437,3.703125 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.453125 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.484375 1.01563,-1.515625 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.828125 -0.78125,-0.953125 -2.03125,-0.953125 -1.125,0 -1.90625,0.765625 -0.76562,0.75 -0.84375,2.015625 z m 8.04759,5.765625 3.53125,-5.03125 -3.26562,-4.640625 h 2.04687 l 1.48438,2.265625 q 0.42187,0.640625 0.67187,1.078125 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.546875 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 15.21456,-4.296875 1.65625,-0.140625 q 0.125,1 0.54687,1.640625 0.4375,0.640625 1.34375,1.046875 0.92188,0.390625 2.0625,0.390625 1,0 1.78125,-0.296875 0.78125,-0.296875 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.796875 -0.54687,-0.203125 -2.39062,-0.640625 -1.82813,-0.453125 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.234375 -0.46875,-0.75 -0.46875,-1.671875 0,-1 0.57812,-1.875 0.57813,-0.890625 1.67188,-1.34375 1.10937,-0.453125 2.45312,-0.453125 1.48438,0 2.60938,0.484375 1.14062,0.46875 1.75,1.40625 0.60937,0.921875 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.265625 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.421875 0,0.71875 0.53125,1.171875 0.5,0.46875 2.65625,0.96875 2.15625,0.484375 2.95313,0.84375 1.17187,0.53125 1.71875,1.359375 0.5625,0.828125 0.5625,1.90625 0,1.0625 -0.60938,2.015625 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.515625 -2.57812,0.515625 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.83418,8 v -13.375 h 1.48438 v 1.25 q 0.53125,-0.734375 1.1875,-1.09375 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.640625 0.95313,0.625 1.4375,1.796875 0.48438,1.15625 0.48438,2.546875 0,1.484375 -0.53125,2.671875 -0.53125,1.1875 -1.54688,1.828125 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.703125 z m 1.48438,-8.484375 q 0,1.859375 0.75,2.765625 0.76562,0.890625 1.82812,0.890625 1.09375,0 1.875,-0.921875 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.765625 -0.75,-0.921875 -1.8125,-0.921875 -1.04688,0 -1.85938,0.984375 -0.79687,0.96875 -0.79687,2.84375 z m 15.20385,3.59375 q -0.92187,0.765625 -1.76562,1.09375 -0.82813,0.3125 -1.79688,0.3125 -1.59375,0 -2.45312,-0.78125 -0.85938,-0.78125 -0.85938,-1.984375 0,-0.71875 0.32813,-1.296875 0.32812,-0.59375 0.84375,-0.9375 0.53125,-0.359375 1.1875,-0.546875 0.46875,-0.125 1.45312,-0.25 1.98438,-0.234375 2.92188,-0.5625 0.0156,-0.34375 0.0156,-0.421875 0,-1 -0.46875,-1.421875 -0.625,-0.546875 -1.875,-0.546875 -1.15625,0 -1.70312,0.40625 -0.54688,0.40625 -0.8125,1.421875 l -1.60938,-0.21875 q 0.21875,-1.015625 0.71875,-1.640625 0.5,-0.640625 1.45313,-0.984375 0.95312,-0.34375 2.1875,-0.34375 1.25,0 2.01562,0.296875 0.78125,0.28125 1.14063,0.734375 0.375,0.4375 0.51562,1.109375 0.0781,0.421875 0.0781,1.515625 v 2.1875 q 0,2.28125 0.10937,2.890625 0.10938,0.59375 0.40625,1.15625 h -1.70312 q -0.26563,-0.515625 -0.32813,-1.1875 z m -0.14062,-3.671875 q -0.89063,0.375 -2.67188,0.625 -1.01562,0.140625 -1.4375,0.328125 -0.42187,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45313,0.4375 0.9375,0 1.67187,-0.40625 0.75,-0.421875 1.09375,-1.140625 0.26563,-0.5625 0.26563,-1.640625 z m 10.51636,1.3125 1.60937,0.21875 q -0.26562,1.65625 -1.35937,2.609375 -1.07813,0.9375 -2.67188,0.9375 -1.98437,0 -3.1875,-1.296875 -1.20312,-1.296875 -1.20312,-3.71875 0,-1.578125 0.51562,-2.75 0.51563,-1.171875 1.57813,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57812,0 2.57812,0.796875 1,0.796875 1.28125,2.265625 l -1.59375,0.234375 q -0.23437,-0.96875 -0.8125,-1.453125 -0.57812,-0.5 -1.39062,-0.5 -1.23438,0 -2.01563,0.890625 -0.78125,0.890625 -0.78125,2.8125 0,1.953125 0.75,2.84375 0.75,0.875 1.95313,0.875 0.96875,0 1.60937,-0.59375 0.65625,-0.59375 0.82813,-1.828125 z m 9.64062,0.4375 1.6875,0.203125 q -0.40625,1.484375 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.296875 -1.23437,-1.3125 -1.23437,-3.671875 0,-2.453125 1.25,-3.796875 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.328125 1.23438,1.3125 1.23438,3.703125 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.453125 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.484375 1.01562,-1.515625 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.828125 -0.78125,-0.953125 -2.03125,-0.953125 -1.125,0 -1.90625,0.765625 -0.76563,0.75 -0.84375,2.015625 z m 14.10589,-0.07813 v -1.531245 l 8.84375,-3.734375 v 1.640625 l -7.01562,2.875 7.01562,2.90625 v 1.625 z m 10.66059,2.3125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.015625 0.6875,0.609375 1.65625,0.609375 1.15625,0 1.95312,-0.796875 0.79688,-0.796875 0.79688,-1.984375 0,-1.125 -0.73438,-1.859375 -0.73437,-0.734375 -1.875,-0.734375 -0.46875,0 -1.15625,0.171875 l 0.1875,-1.4375 q 0.15625,0.01563 0.26563,0.01563 1.04687,0 1.875,-0.546875 0.84375,-0.546875 0.84375,-1.671875 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.609375 -0.64063,0.59375 -0.8125,1.796875 l -1.64063,-0.296875 q 0.29688,-1.640625 1.35938,-2.546875 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.859375 -0.46875,1.578125 -0.46875,0.703125 -1.375,1.125 1.1875,0.28125 1.84375,1.140625 0.65625,0.859375 0.65625,2.15625 0,1.734375 -1.28125,2.953125 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.046875 -1.15625,-1.046875 -1.32812,-2.71875 z m 9.73507,3.53125 3.53125,-5.03125 -3.26562,-4.640625 h 2.04687 l 1.48438,2.265625 q 0.42187,0.640625 0.67187,1.078125 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.546875 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 9.96875,-3.53125 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.015625 0.6875,0.609375 1.65625,0.609375 1.15625,0 1.95313,-0.796875 0.79687,-0.796875 0.79687,-1.984375 0,-1.125 -0.73437,-1.859375 -0.73438,-0.734375 -1.875,-0.734375 -0.46875,0 -1.15625,0.171875 l 0.1875,-1.4375 q 0.15625,0.01563 0.26562,0.01563 1.04688,0 1.875,-0.546875 0.84375,-0.546875 0.84375,-1.671875 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.609375 -0.64062,0.59375 -0.8125,1.796875 l -1.64062,-0.296875 q 0.29687,-1.640625 1.35937,-2.546875 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.859375 -0.46875,1.578125 -0.46875,0.703125 -1.375,1.125 1.1875,0.28125 1.84375,1.140625 0.65625,0.859375 0.65625,2.15625 0,1.734375 -1.28125,2.953125 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.046875 -1.15625,-1.046875 -1.32813,-2.71875 z m 9.73508,3.53125 3.53125,-5.03125 -3.26563,-4.640625 h 2.04688 l 1.48437,2.265625 q 0.42188,0.640625 0.67188,1.078125 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.546875 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 10.8125,0 v -8.40625 h -1.45313 v -1.265625 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.453125 0.23438,-0.640625 0.82813,-1.03125 0.59375,-0.390625 1.67187,-0.390625 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.09375 -0.95312,-0.09375 -0.75,0 -1.0625,0.328125 -0.3125,0.3125 -0.3125,1.1875 v 0.890625 h 1.89062 v 1.265625 h -1.89062 v 8.406255 z m 4.33957,-3.53125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.015625 0.6875,0.609375 1.65625,0.609375 1.15625,0 1.95312,-0.796875 0.79688,-0.796875 0.79688,-1.984375 0,-1.125 -0.73438,-1.859375 -0.73437,-0.734375 -1.875,-0.734375 -0.46875,0 -1.15625,0.171875 l 0.1875,-1.4375 q 0.15625,0.01563 0.26563,0.01563 1.04687,0 1.875,-0.546875 0.84375,-0.546875 0.84375,-1.671875 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.609375 -0.64063,0.59375 -0.8125,1.796875 L 346.225,74.699159 q 0.29688,-1.640625 1.35938,-2.546875 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.859375 -0.46875,1.578125 -0.46875,0.703125 -1.375,1.125 1.1875,0.28125 1.84375,1.140625 0.65625,0.859375 0.65625,2.15625 0,1.734375 -1.28125,2.953125 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.046875 -1.15625,-1.046875 -1.32812,-2.71875 z m 18.98508,1.953125 v 1.57813 h -8.82813 q -0.0156,-0.59375 0.1875,-1.140625 0.34375,-0.90625 1.07813,-1.78125 0.75,-0.875 2.15625,-2.015625 2.17187,-1.78125 2.9375,-2.828125 0.76562,-1.046875 0.76562,-1.96875 0,-0.984375 -0.70312,-1.640625 -0.6875,-0.671875 -1.8125,-0.671875 -1.1875,0 -1.90625,0.71875 -0.70313,0.703125 -0.70313,1.953125 l -1.6875,-0.171875 q 0.17188,-1.890625 1.29688,-2.875 1.14062,-0.984375 3.03125,-0.984375 1.92187,0 3.04687,1.0625 1.125,1.0625 1.125,2.640625 0,0.796875 -0.32812,1.578125 -0.32813,0.78125 -1.09375,1.640625 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.234375 -1.89063,1.6875 -0.42187,0.4375 -0.6875,0.875 z m 10.84448,-4.265625 -8.84375,3.78125 v -1.625 l 7.01562,-2.90625 -7.01562,-2.875 v -1.64062 l 8.84375,3.734375 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path127"
-     d="m 167.14961,208.14961 h 309.7323 v 42.14172 h -309.7323 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path129"
-     d="m 177.88398,235.06961 v -13.35938 h 1.76562 v 13.35938 z m 4.6833,0 v -9.67188 h 1.46875 v 1.375 q 1.0625,-1.59375 3.07813,-1.59375 0.875,0 1.60937,0.3125 0.73438,0.3125 1.09375,0.82813 0.375,0.5 0.51563,1.20312 0.0937,0.45313 0.0937,1.59375 v 5.95313 h -1.64063 v -5.89063 q 0,-1 -0.20312,-1.48437 -0.1875,-0.5 -0.67188,-0.79688 -0.48437,-0.29687 -1.14062,-0.29687 -1.04688,0 -1.8125,0.67187 -0.75,0.65625 -0.75,2.51563 v 5.28125 z m 16.64135,0 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64063 -0.96875,-0.64062 -1.5,-1.78125 -0.53125,-1.14062 -0.53125,-2.625 0,-1.45312 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35938 z m -5.17188,-4.82813 q 0,1.85938 0.78125,2.78125 0.78125,0.92188 1.84375,0.92188 1.07813,0 1.82813,-0.875 0.75,-0.89063 0.75,-2.6875 0,-1.98438 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89062 -0.73438,0.89063 -0.73438,2.8125 z m 15.90697,1.71875 1.6875,0.20313 q -0.40625,1.48437 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29688 -1.23438,-1.3125 -1.23438,-3.67187 0,-2.45313 1.25,-3.79688 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.3125 1.23437,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48438 1.01563,-1.51563 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76562,0.75 -0.84375,2.01562 z m 8.0476,5.76563 3.53125,-5.03125 -3.26563,-4.64063 h 2.04688 l 1.48437,2.26563 q 0.42188,0.64062 0.67188,1.07812 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.54688 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 15.21455,-4.29688 1.65625,-0.14062 q 0.125,1 0.54687,1.64062 0.4375,0.64063 1.34375,1.04688 0.92188,0.39062 2.0625,0.39062 1,0 1.78125,-0.29687 0.78125,-0.29688 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.79688 -0.54687,-0.20312 -2.39062,-0.64062 -1.82813,-0.45313 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23438 -0.46875,-0.75 -0.46875,-1.67187 0,-1 0.57812,-1.875 0.57813,-0.89063 1.67188,-1.34375 1.10937,-0.45313 2.45312,-0.45313 1.48438,0 2.60938,0.48438 1.14062,0.46875 1.75,1.40625 0.60937,0.92187 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.26563 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.42187 0,0.71875 0.53125,1.17188 0.5,0.46875 2.65625,0.96875 2.15625,0.48437 2.95313,0.84375 1.17187,0.53125 1.71875,1.35937 0.5625,0.82813 0.5625,1.90625 0,1.0625 -0.60938,2.01563 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.51562 -2.57812,0.51562 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.8342,8 v -13.375 h 1.48437 v 1.25 q 0.53125,-0.73437 1.1875,-1.09375 0.67188,-0.375 1.625,-0.375 1.23438,0 2.17188,0.64063 0.95312,0.625 1.4375,1.79687 0.48437,1.15625 0.48437,2.54688 0,1.48437 -0.53125,2.67187 -0.53125,1.1875 -1.54687,1.82813 -1.01563,0.625 -2.14063,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70312 z m 1.48437,-8.48437 q 0,1.85937 0.75,2.76562 0.76563,0.89063 1.82813,0.89063 1.09375,0 1.875,-0.92188 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76563,-2.76562 -0.75,-0.92188 -1.8125,-0.92188 -1.04687,0 -1.85937,0.98438 -0.79688,0.96875 -0.79688,2.84375 z m 15.20386,3.59375 q -0.92188,0.76562 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98438 0,-0.71875 0.32812,-1.29687 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35938 1.1875,-0.54688 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23437 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42187 0,-1 -0.46875,-1.42188 -0.625,-0.54687 -1.875,-0.54687 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42187 l -1.60937,-0.21875 q 0.21875,-1.01562 0.71875,-1.64062 0.5,-0.64063 1.45312,-0.98438 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29688 0.78125,0.28125 1.14062,0.73437 0.375,0.4375 0.51563,1.10938 0.0781,0.42187 0.0781,1.51562 v 2.1875 q 0,2.28125 0.10938,2.89063 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51563 -0.32812,-1.1875 z m -0.14063,-3.67188 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14063 -1.4375,0.32813 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42188 1.09375,-1.14063 0.26562,-0.5625 0.26562,-1.64062 z m 10.51633,1.3125 1.60938,0.21875 q -0.26563,1.65625 -1.35938,2.60938 -1.07812,0.9375 -2.67187,0.9375 -1.98438,0 -3.1875,-1.29688 -1.20313,-1.29687 -1.20313,-3.71875 0,-1.57812 0.51563,-2.75 0.51562,-1.17187 1.57812,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57813,0 2.57813,0.79688 1,0.79687 1.28125,2.26562 l -1.59375,0.23438 q -0.23438,-0.96875 -0.8125,-1.45313 -0.57813,-0.5 -1.39063,-0.5 -1.23437,0 -2.01562,0.89063 -0.78125,0.89062 -0.78125,2.8125 0,1.95312 0.75,2.84375 0.75,0.875 1.95312,0.875 0.96875,0 1.60938,-0.59375 0.65625,-0.59375 0.82812,-1.82813 z m 9.64063,0.4375 1.6875,0.20313 q -0.40625,1.48437 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29688 -1.23438,-1.3125 -1.23438,-3.67187 0,-2.45313 1.25,-3.79688 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.3125 1.23437,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48438 1.01563,-1.51563 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76562,0.75 -0.84375,2.01562 z m 14.1059,-0.0781 v -1.53125 l 8.84375,-3.73438 v 1.64063 l -7.01563,2.875 7.01563,2.90625 v 1.625 z m 19.26995,4.26562 v 1.57813 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14063 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01562 2.17188,-1.78125 2.9375,-2.82813 0.76563,-1.04687 0.76563,-1.96875 0,-0.98437 -0.70313,-1.64062 -0.6875,-0.67188 -1.8125,-0.67188 -1.1875,0 -1.90625,0.71875 -0.70312,0.70313 -0.70312,1.95313 l -1.6875,-0.17188 q 0.17187,-1.89062 1.29687,-2.875 1.14063,-0.98437 3.03125,-0.98437 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64062 0,0.79688 -0.32813,1.57813 -0.32812,0.78125 -1.09375,1.64062 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23438 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 1.12574,1.57813 3.53125,-5.03125 -3.26563,-4.64063 h 2.04688 l 1.48437,2.26563 q 0.42188,0.64062 0.67188,1.07812 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.54688 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 18.57812,-1.57813 v 1.57813 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14063 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01562 2.17188,-1.78125 2.9375,-2.82813 0.76563,-1.04687 0.76563,-1.96875 0,-0.98437 -0.70313,-1.64062 -0.6875,-0.67188 -1.8125,-0.67188 -1.1875,0 -1.90625,0.71875 -0.70312,0.70313 -0.70312,1.95313 l -1.6875,-0.17188 q 0.17187,-1.89062 1.29687,-2.875 1.14063,-0.98437 3.03125,-0.98437 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64062 0,0.79688 -0.32813,1.57813 -0.32812,0.78125 -1.09375,1.64062 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23438 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 1.1257,1.57813 3.53125,-5.03125 -3.26562,-4.64063 h 2.04687 l 1.48438,2.26563 q 0.42187,0.64062 0.67187,1.07812 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54688 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 10.8125,0 v -8.40625 h -1.45312 v -1.26563 h 1.45312 v -1.03125 q 0,-0.96875 0.17188,-1.45312 0.23437,-0.64063 0.82812,-1.03125 0.59375,-0.39063 1.67188,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95313,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89063 v 1.26563 h -1.89063 v 8.40625 z m 4.33957,-3.53125 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95313,-0.79688 0.79687,-0.79687 0.79687,-1.98437 0,-1.125 -0.73437,-1.85938 -0.73438,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26562,0.0156 1.04688,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.60937 -0.64062,0.59375 -0.8125,1.79688 l -1.64062,-0.29688 q 0.29687,-1.64062 1.35937,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.04687 -1.15625,-1.04688 -1.32813,-2.71875 z m 18.98508,1.95312 v 1.57813 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14063 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01562 2.17188,-1.78125 2.9375,-2.82813 0.76563,-1.04687 0.76563,-1.96875 0,-0.98437 -0.70313,-1.64062 -0.6875,-0.67188 -1.8125,-0.67188 -1.1875,0 -1.90625,0.71875 -0.70312,0.70313 -0.70312,1.95313 l -1.6875,-0.17188 q 0.17187,-1.89062 1.29687,-2.875 1.14063,-0.98437 3.03125,-0.98437 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64062 0,0.79688 -0.32813,1.57813 -0.32812,0.78125 -1.09375,1.64062 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23438 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 10.84448,-4.26562 -8.84375,3.78125 v -1.625 l 7.01563,-2.90625 -7.01563,-2.875 v -1.64063 l 8.84375,3.73438 z" />
-</svg>
diff --git a/third_party/mlir/g3doc/includes/img/view-operation.svg b/third_party/mlir/g3doc/includes/img/view-operation.svg
deleted file mode 100644
index f4d622ee263..00000000000
--- a/third_party/mlir/g3doc/includes/img/view-operation.svg
+++ /dev/null
@@ -1,580 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   version="1.1"
-   viewBox="0 0 781.88983 360.73489"
-   stroke-miterlimit="10"
-   id="svg213"
-   sodipodi:docname="view-operation.svg"
-   width="781.88983"
-   height="360.73489"
-   style="fill:none;stroke:none;stroke-linecap:square;stroke-miterlimit:10"
-   inkscape:version="0.92.2pre0 (973e216, 2017-07-25)">
-  <metadata
-     id="metadata219">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <defs
-     id="defs217" />
-  <sodipodi:namedview
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1"
-     objecttolerance="10"
-     gridtolerance="10"
-     guidetolerance="10"
-     inkscape:pageopacity="0"
-     inkscape:pageshadow="2"
-     inkscape:window-width="2312"
-     inkscape:window-height="1165"
-     id="namedview215"
-     showgrid="false"
-     fit-margin-top="0"
-     fit-margin-left="0"
-     fit-margin-right="0"
-     fit-margin-bottom="0"
-     inkscape:zoom="0.9"
-     inkscape:cx="514.61205"
-     inkscape:cy="336.45539"
-     inkscape:window-x="0"
-     inkscape:window-y="0"
-     inkscape:window-maximized="0"
-     inkscape:current-layer="svg213" />
-  <clipPath
-     id="p.0">
-    <path
-       d="M 0,0 H 1280 V 960 H 0 Z"
-       id="path2"
-       inkscape:connector-curvature="0"
-       style="clip-rule:nonzero" />
-  </clipPath>
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path5"
-     d="M -12.118111,-20.430447 H 1267.8819 V 939.56955 H -12.118111 Z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path7"
-     d="M 94.598429,118.46719 H 118.063 v 26.70865 H 94.598429 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path9"
-     d="M 94.598429,118.46719 H 118.063 v 26.70865 H 94.598429 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path11"
-     d="m 111.1453,137.55402 q -0.92188,0.76562 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98438 0,-0.71875 0.32812,-1.29687 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35938 1.1875,-0.54688 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23437 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42187 0,-1 -0.46875,-1.42188 -0.625,-0.54687 -1.875,-0.54687 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42187 l -1.60937,-0.21875 q 0.21875,-1.01562 0.71875,-1.64062 0.5,-0.64063 1.45312,-0.98438 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29688 0.78125,0.28125 1.14062,0.73437 0.375,0.4375 0.51563,1.10938 0.0781,0.42187 0.0781,1.51562 v 2.1875 q 0,2.28125 0.10938,2.89063 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51563 -0.32812,-1.1875 z m -0.14063,-3.67188 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14063 -1.4375,0.32813 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42188 1.09375,-1.14063 0.26562,-0.5625 0.26562,-1.64062 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path13"
-     d="m 118.06299,118.46719 h 23.46457 v 26.70865 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path15"
-     d="m 118.06299,118.46719 h 23.46457 v 26.70865 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path17"
-     d="m 129.79737,138.74152 h -1.51563 v -13.35938 h 1.64063 v 4.76563 q 1.04687,-1.29688 2.65625,-1.29688 0.89062,0 1.6875,0.35938 0.79687,0.35937 1.3125,1.01562 0.51562,0.64063 0.79687,1.5625 0.29688,0.92188 0.29688,1.96875 0,2.48438 -1.23438,3.84375 -1.21875,1.35938 -2.95312,1.35938 -1.70313,0 -2.6875,-1.4375 z m -0.0156,-4.90625 q 0,1.73437 0.48438,2.51562 0.76562,1.26563 2.09375,1.26563 1.07812,0 1.85937,-0.9375 0.78125,-0.9375 0.78125,-2.78125 0,-1.89063 -0.75,-2.79688 -0.75,-0.90625 -1.82812,-0.90625 -1.0625,0 -1.85938,0.9375 -0.78125,0.9375 -0.78125,2.70313 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path19"
-     d="m 141.52757,118.46719 h 23.46455 v 26.70865 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path21"
-     d="m 141.52757,118.46719 h 23.46455 v 26.70865 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path23"
-     d="m 158.07444,135.19464 1.60937,0.21875 q -0.26562,1.65625 -1.35937,2.60937 -1.07813,0.9375 -2.67188,0.9375 -1.98437,0 -3.1875,-1.29687 -1.20312,-1.29688 -1.20312,-3.71875 0,-1.57813 0.51562,-2.75 0.51563,-1.17188 1.57813,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57812,0 2.57812,0.79687 1,0.79688 1.28125,2.26563 l -1.59375,0.23437 q -0.23437,-0.96875 -0.8125,-1.45312 -0.57812,-0.5 -1.39062,-0.5 -1.23438,0 -2.01563,0.89062 -0.78125,0.89063 -0.78125,2.8125 0,1.95313 0.75,2.84375 0.75,0.875 1.95313,0.875 0.96875,0 1.60937,-0.59375 0.65625,-0.59375 0.82813,-1.82812 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path25"
-     d="M 94.598429,145.17585 H 118.063 v 26.70866 H 94.598429 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path27"
-     d="M 94.598429,145.17585 H 118.063 v 26.70866 H 94.598429 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path29"
-     d="m 111.09843,165.45018 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64063 -0.96875,-0.64062 -1.5,-1.78125 -0.53125,-1.14062 -0.53125,-2.625 0,-1.45312 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35938 z m -5.17188,-4.82813 q 0,1.85938 0.78125,2.78125 0.78125,0.92188 1.84375,0.92188 1.07813,0 1.82813,-0.875 0.75,-0.89063 0.75,-2.6875 0,-1.98438 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89062 -0.73438,0.89063 -0.73438,2.8125 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path31"
-     d="m 118.06299,145.17585 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path33"
-     d="m 118.06299,145.17585 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path35"
-     d="m 134.92237,162.34081 1.6875,0.20312 q -0.40625,1.48438 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29687 -1.23438,-1.3125 -1.23438,-3.67188 0,-2.45312 1.25,-3.79687 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32812 1.23437,1.3125 1.23437,3.70313 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48437 1.01563,-1.51562 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82813 -0.78125,-0.95312 -2.03125,-0.95312 -1.125,0 -1.90625,0.76562 -0.76562,0.75 -0.84375,2.01563 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path37"
-     d="m 141.52757,145.17585 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path39"
-     d="m 141.52757,145.17585 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path41"
-     d="m 152.29319,165.45018 v -8.40625 h -1.45313 v -1.26563 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.45312 0.23438,-0.64063 0.82813,-1.03125 0.59375,-0.39063 1.67187,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95312,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89062 v 1.26563 h -1.89062 v 8.40625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path43"
-     d="M 94.598429,171.88451 H 118.063 v 26.70866 H 94.598429 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path45"
-     d="M 94.598429,171.88451 H 118.063 v 26.70866 H 94.598429 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path47"
-     d="m 104.5203,192.95572 1.59375,0.23437 q 0.10937,0.75 0.5625,1.07813 0.60937,0.45312 1.67187,0.45312 1.14063,0 1.75,-0.45312 0.625,-0.45313 0.84375,-1.26563 0.125,-0.5 0.10938,-2.10937 -1.0625,1.26562 -2.67188,1.26562 -2,0 -3.09375,-1.4375 -1.09375,-1.4375 -1.09375,-3.45312 0,-1.39063 0.5,-2.5625 0.51563,-1.17188 1.45313,-1.79688 0.95312,-0.64062 2.25,-0.64062 1.70312,0 2.8125,1.375 v -1.15625 h 1.51562 v 8.35937 q 0,2.26563 -0.46875,3.20313 -0.45312,0.9375 -1.45312,1.48437 -0.98438,0.54688 -2.45313,0.54688 -1.71875,0 -2.79687,-0.78125 -1.0625,-0.76563 -1.03125,-2.34375 z m 1.35937,-5.8125 q 0,1.90625 0.75,2.78125 0.76563,0.875 1.90625,0.875 1.125,0 1.89063,-0.85938 0.76562,-0.875 0.76562,-2.73437 0,-1.78125 -0.79687,-2.67188 -0.78125,-0.90625 -1.89063,-0.90625 -1.09375,0 -1.85937,0.89063 -0.76563,0.875 -0.76563,2.625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path49"
-     d="m 118.06299,171.88451 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path51"
-     d="m 118.06299,171.88451 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path53"
-     d="m 128.29737,192.15885 v -13.35938 h 1.64062 v 4.79688 q 1.14063,-1.32813 2.89063,-1.32813 1.07812,0 1.85937,0.42188 0.79688,0.42187 1.14063,1.17187 0.34375,0.75 0.34375,2.17188 v 6.125 h -1.64063 v -6.125 q 0,-1.23438 -0.53125,-1.79688 -0.53125,-0.5625 -1.51562,-0.5625 -0.71875,0 -1.35938,0.39063 -0.64062,0.375 -0.92187,1.01562 -0.26563,0.64063 -0.26563,1.78125 v 5.29688 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path55"
-     d="m 141.52757,171.88451 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path57"
-     d="m 141.52757,171.88451 h 23.46455 v 26.70866 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path59"
-     d="m 152.42181,180.69009 v -1.89063 h 1.64062 v 1.89063 z m 0,11.46875 v -9.67188 h 1.64062 v 9.67188 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path61"
-     d="m 22.598423,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path63"
-     d="m 22.598423,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path65"
-     d="m 39.145299,27.086826 q -0.921875,0.765625 -1.765625,1.09375 -0.828125,0.3125 -1.796875,0.3125 -1.59375,0 -2.453125,-0.78125 -0.859375,-0.78125 -0.859375,-1.984375 0,-0.71875 0.328125,-1.296875 0.328125,-0.59375 0.84375,-0.9375 0.53125,-0.359375 1.1875,-0.546875 0.46875,-0.125 1.453125,-0.25 1.984375,-0.234375 2.921875,-0.5625 0.01563,-0.34375 0.01563,-0.421875 0,-1 -0.46875,-1.421875 -0.625,-0.546875 -1.875,-0.546875 -1.15625,0 -1.703125,0.40625 -0.546875,0.40625 -0.8125,1.421875 l -1.609375,-0.21875 q 0.21875,-1.015625 0.71875,-1.640625 0.5,-0.640625 1.453125,-0.984375 0.953125,-0.34375 2.1875,-0.34375 1.25,0 2.015625,0.296875 0.78125,0.28125 1.140625,0.734375 0.375,0.4375 0.515625,1.109375 0.07813,0.421875 0.07813,1.515625 v 2.1875 q 0,2.28125 0.109375,2.890625 0.109375,0.59375 0.40625,1.15625 h -1.703125 q -0.265625,-0.515625 -0.328125,-1.1875 z m -0.140625,-3.671875 q -0.890625,0.375 -2.671875,0.625 -1.015625,0.140625 -1.4375,0.328125 -0.421875,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.453125,0.4375 0.9375,0 1.671875,-0.40625 0.75,-0.421875 1.09375,-1.140625 0.265625,-0.5625 0.265625,-1.640625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path67"
-     d="M 46.062992,8 H 69.527557 V 34.70866 H 46.062992 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path69"
-     d="M 46.062992,8 H 69.527557 V 34.70866 H 46.062992 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path71"
-     d="M 57.797363,28.274326 H 56.281738 V 14.914951 h 1.640625 v 4.765625 q 1.046875,-1.296875 2.65625,-1.296875 0.890625,0 1.6875,0.359375 0.796875,0.359375 1.3125,1.015625 0.515625,0.640625 0.796875,1.5625 0.296875,0.921875 0.296875,1.96875 0,2.484375 -1.234375,3.84375 -1.21875,1.359375 -2.953125,1.359375 -1.703125,0 -2.6875,-1.4375 z m -0.01563,-4.90625 q 0,1.734375 0.484375,2.515625 0.765625,1.265625 2.09375,1.265625 1.078125,0 1.859375,-0.9375 0.78125,-0.9375 0.78125,-2.78125 0,-1.890625 -0.75,-2.796875 -0.75,-0.90625 -1.828125,-0.90625 -1.0625,0 -1.859375,0.9375 -0.78125,0.9375 -0.78125,2.703125 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path73"
-     d="m 69.527559,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path75"
-     d="m 69.527559,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path77"
-     d="m 86.074429,24.727451 1.609375,0.21875 q -0.265625,1.65625 -1.359375,2.609375 -1.078125,0.9375 -2.671875,0.9375 -1.984375,0 -3.1875,-1.296875 -1.203125,-1.296875 -1.203125,-3.71875 0,-1.578125 0.515625,-2.75 0.515625,-1.171875 1.578125,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.578125,0 2.578125,0.796875 1,0.796875 1.28125,2.265625 l -1.59375,0.234375 q -0.234375,-0.96875 -0.8125,-1.453125 -0.578125,-0.5 -1.390625,-0.5 -1.234375,0 -2.015625,0.890625 -0.78125,0.890625 -0.78125,2.8125 0,1.953125 0.75,2.84375 0.75,0.875 1.953125,0.875 0.96875,0 1.609375,-0.59375 0.65625,-0.59375 0.828125,-1.828125 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path79"
-     d="M 92.992129,8 H 116.45669 V 34.70866 H 92.992129 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path81"
-     d="M 92.992129,8 H 116.45669 V 34.70866 H 92.992129 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path83"
-     d="m 109.49213,28.274326 v -1.21875 q -0.90625,1.4375 -2.70312,1.4375 -1.15625,0 -2.125,-0.640625 -0.96875,-0.640625 -1.5,-1.78125 -0.53125,-1.140625 -0.53125,-2.625 0,-1.453125 0.48437,-2.625 0.48438,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17188,-0.625 0.875,0 1.54687,0.375 0.6875,0.359375 1.10938,0.953125 v -4.796875 h 1.64062 v 13.359375 z m -5.17187,-4.828125 q 0,1.859375 0.78125,2.78125 0.78125,0.921875 1.84375,0.921875 1.07812,0 1.82812,-0.875 0.75,-0.890625 0.75,-2.6875 0,-1.984375 -0.76562,-2.90625 -0.76563,-0.9375 -1.89063,-0.9375 -1.07812,0 -1.8125,0.890625 -0.73437,0.890625 -0.73437,2.8125 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path85"
-     d="m 116.45669,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path87"
-     d="m 116.45669,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path89"
-     d="m 133.31606,25.164951 1.6875,0.203125 q -0.40625,1.484375 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.296875 -1.23438,-1.3125 -1.23438,-3.671875 0,-2.453125 1.25,-3.796875 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.328125 1.23437,1.3125 1.23437,3.703125 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.453125 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.484375 1.01563,-1.515625 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.828125 -0.78125,-0.953125 -2.03125,-0.953125 -1.125,0 -1.90625,0.765625 -0.76562,0.75 -0.84375,2.015625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path91"
-     d="m 139.92126,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path93"
-     d="m 139.92126,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path95"
-     d="m 150.54626,28.274326 v -8.40625 h -1.45313 v -1.265625 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.453125 0.23438,-0.640625 0.82813,-1.03125 0.59375,-0.390625 1.67187,-0.390625 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.09375 -0.95312,-0.09375 -0.75,0 -1.0625,0.328125 -0.3125,0.3125 -0.3125,1.1875 v 0.890625 h 1.89062 v 1.265625 h -1.89062 v 8.40625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path97"
-     d="M 163.38583,8 H 186.8504 V 34.70866 H 163.38583 Z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path99"
-     d="M 163.38583,8 H 186.8504 V 34.70866 H 163.38583 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path101"
-     d="m 173.3077,29.071201 1.59375,0.234375 q 0.10938,0.75 0.5625,1.078125 0.60938,0.453125 1.67188,0.453125 1.14062,0 1.75,-0.453125 0.625,-0.453125 0.84375,-1.265625 0.125,-0.5 0.10937,-2.109375 -1.0625,1.265625 -2.67187,1.265625 -2,0 -3.09375,-1.4375 -1.09375,-1.4375 -1.09375,-3.453125 0,-1.390625 0.5,-2.5625 0.51562,-1.171875 1.45312,-1.796875 0.95313,-0.640625 2.25,-0.640625 1.70313,0 2.8125,1.375 v -1.15625 h 1.51563 v 8.359375 q 0,2.265625 -0.46875,3.203125 -0.45313,0.9375 -1.45313,1.484375 -0.98437,0.546875 -2.45312,0.546875 -1.71875,0 -2.79688,-0.78125 -1.0625,-0.765625 -1.03125,-2.34375 z m 1.35938,-5.8125 q 0,1.90625 0.75,2.78125 0.76562,0.875 1.90625,0.875 1.125,0 1.89062,-0.859375 0.76563,-0.875 0.76563,-2.734375 0,-1.78125 -0.79688,-2.671875 -0.78125,-0.90625 -1.89062,-0.90625 -1.09375,0 -1.85938,0.890625 -0.76562,0.875 -0.76562,2.625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path103"
-     d="m 186.85039,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path105"
-     d="m 186.85039,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path107"
-     d="M 197.08477,28.274326 V 14.914951 h 1.64062 v 4.796875 q 1.14063,-1.328125 2.89063,-1.328125 1.07812,0 1.85937,0.421875 0.79688,0.421875 1.14063,1.171875 0.34375,0.75 0.34375,2.171875 v 6.125 h -1.64063 v -6.125 q 0,-1.234375 -0.53125,-1.796875 -0.53125,-0.5625 -1.51562,-0.5625 -0.71875,0 -1.35938,0.390625 -0.64062,0.375 -0.92187,1.015625 -0.26563,0.640625 -0.26563,1.78125 v 5.296875 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path109"
-     d="m 210.31496,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path111"
-     d="m 210.31496,8 h 23.46457 v 26.70866 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path113"
-     d="m 220.54934,16.805576 v -1.890625 h 1.64062 v 1.890625 z m 0,11.46875 v -9.671875 h 1.64062 v 9.671875 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path115"
-     d="m 118.06299,273.69815 h 23.46457 v 26.70868 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path117"
-     d="m 118.06299,273.69815 h 23.46457 v 26.70868 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path119"
-     d="m 134.92237,290.8631 1.6875,0.20312 q -0.40625,1.48438 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29687 -1.23438,-1.3125 -1.23438,-3.67188 0,-2.45312 1.25,-3.79687 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32812 1.23437,1.3125 1.23437,3.70313 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48437 1.01563,-1.51562 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82813 -0.78125,-0.95312 -2.03125,-0.95312 -1.125,0 -1.90625,0.76562 -0.76562,0.75 -0.84375,2.01563 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path121"
-     d="m 141.52757,273.69815 h 23.46455 v 26.70868 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path123"
-     d="m 141.52757,273.69815 h 23.46455 v 26.70868 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path125"
-     d="m 152.15257,293.97247 v -8.40625 h -1.45313 v -1.26563 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.45312 0.23438,-0.64063 0.82813,-1.03125 0.59375,-0.39063 1.67187,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95312,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89062 v 1.26563 h -1.89062 v 8.40625 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path127"
-     d="m 118.06299,300.40683 h 23.46457 v 26.70865 h -23.46457 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path129"
-     d="m 118.06299,300.40683 h 23.46457 v 26.70865 h -23.46457 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path131"
-     d="m 128.29737,320.68115 v -13.35938 h 1.64062 v 4.79688 q 1.14063,-1.32813 2.89063,-1.32813 1.07812,0 1.85937,0.42188 0.79688,0.42187 1.14063,1.17187 0.34375,0.75 0.34375,2.17188 v 6.125 h -1.64063 v -6.125 q 0,-1.23438 -0.53125,-1.79688 -0.53125,-0.5625 -1.51562,-0.5625 -0.71875,0 -1.35938,0.39063 -0.64062,0.375 -0.92187,1.01562 -0.26563,0.64063 -0.26563,1.78125 v 5.29688 z" />
-  <path
-     style="fill:#cfe2f3;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path133"
-     d="m 141.52757,300.40683 h 23.46455 v 26.70865 h -23.46455 z" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path135"
-     d="m 141.52757,300.40683 h 23.46455 v 26.70865 h -23.46455 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path137"
-     d="m 151.76194,309.2124 v -1.89063 h 1.64062 v 1.89063 z m 0,11.46875 v -9.67188 h 1.64062 v 9.67188 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path139"
-     d="M 156.42782,47.356953 H 652.86877 V 68.837269 H 156.42782 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path141"
-     d="m 166.36532,74.511323 0.79687,-3.890625 h -1.54687 v -1.359375 h 1.8125 l 0.67187,-3.296875 h -2.48437 v -1.359375 h 2.76562 l 0.79688,-3.90625 h 1.35937 l -0.79687,3.90625 h 2.875 l 0.79687,-3.90625 h 1.375 l -0.79687,3.90625 h 1.57812 v 1.359375 h -1.84375 l -0.6875,3.296875 h 2.53125 v 1.359375 h -2.8125 l -0.78125,3.890625 h -1.375 l 0.78125,-3.890625 h -2.85937 l -0.78125,3.890625 z m 2.4375,-5.25 h 2.85937 l 0.6875,-3.296875 h -2.875 z m 8.18822,5.015625 V 60.917573 h 1.64062 v 13.359375 z m 4.19169,0 v -9.671875 h 1.46875 v 1.359375 q 0.45313,-0.71875 1.20313,-1.140625 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.453125 0.6875,0.4375 0.96875,1.234375 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.796875 0.78125,0.796875 0.78125,2.453125 v 6.640625 h -1.64063 v -6.09375 q 0,-0.984375 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.703125 -0.42188,-0.265625 -0.98438,-0.265625 -1.01562,0 -1.6875,0.6875 -0.67187,0.671875 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.640625 -0.40625,-0.546875 -1.3125,-0.546875 -0.6875,0 -1.28125,0.359375 -0.59375,0.359375 -0.85937,1.0625 -0.25,0.703125 -0.25,2.03125 v 5.015625 z m 21.85331,-1.1875 q -0.92188,0.765625 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.984375 0,-0.71875 0.32812,-1.296875 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.359375 1.1875,-0.546875 0.46875,-0.125 1.45313,-0.25 1.98437,-0.234375 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.421875 0,-1 -0.46875,-1.421875 -0.625,-0.546875 -1.875,-0.546875 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.421875 l -1.60937,-0.21875 q 0.21875,-1.015625 0.71875,-1.640625 0.5,-0.640625 1.45312,-0.984375 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.296875 0.78125,0.28125 1.14062,0.734375 0.375,0.4375 0.51563,1.109375 0.0781,0.421875 0.0781,1.515625 v 2.1875 q 0,2.28125 0.10938,2.890625 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.515625 -0.32812,-1.1875 z m -0.14063,-3.671875 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.140625 -1.4375,0.328125 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.421875 1.09375,-1.140625 0.26562,-0.5625 0.26562,-1.640625 z m 4.20384,8.5625 v -13.375 h 1.48438 v 1.25 q 0.53125,-0.734375 1.1875,-1.09375 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.640625 0.95313,0.625 1.4375,1.796875 0.48438,1.15625 0.48438,2.546875 0,1.484375 -0.53125,2.671875 -0.53125,1.1875 -1.54688,1.828125 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.703125 z m 1.48438,-8.484375 q 0,1.859375 0.75,2.765625 0.76562,0.890625 1.82812,0.890625 1.09375,0 1.875,-0.921875 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.765625 -0.75,-0.921875 -1.8125,-0.921875 -1.04688,0 -1.85938,0.984375 -0.79687,0.96875 -0.79687,2.84375 z m 7.62571,4.78125 5.125,-13.359375 h 1.90625 l 5.46875,13.359375 h -2.01562 l -1.54688,-4.046875 h -5.59375 l -1.46875,4.046875 z m 3.85938,-5.484375 h 4.53125 l -1.40625,-3.703125 q -0.625,-1.6875 -0.9375,-2.765625 -0.26563,1.28125 -0.71875,2.546875 z m 23.65813,-2.375 h -8.82813 v -1.515625 h 8.82813 z m 0,4.0625 h -8.82813 v -1.53125 h 8.82813 z m 10.57826,7.71875 q -1.35938,-1.703125 -2.29688,-4 -0.9375,-2.296875 -0.9375,-4.765625 0,-2.15625 0.70313,-4.140625 0.82812,-2.3125 2.53125,-4.59375 h 1.17187 q -1.09375,1.890625 -1.45312,2.703125 -0.54688,1.25 -0.875,2.625 -0.39063,1.703125 -0.39063,3.421875 0,4.375 2.71875,8.75 z m 3.08768,-15.390625 v -1.890625 h 1.64062 v 1.890625 z m 0,11.46875 v -9.671875 h 1.64062 v 9.671875 z m 4.56671,0 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35938,0.640625 -1.15625,0.984375 l -0.45313,-0.703125 q 0.51563,-0.21875 0.76563,-0.671875 0.25,-0.4375 0.28125,-1.265625 z m 9.9291,-11.453125 v -1.90625 h 1.64063 v 1.90625 z m -2.07812,15.203125 0.3125,-1.390625 q 0.5,0.125 0.78125,0.125 0.5,0 0.73437,-0.328125 0.25,-0.328125 0.25,-1.671875 v -10.15625 h 1.64063 v 10.203125 q 0,1.78125 -0.46875,2.484375 -0.59375,0.90625 -1.96875,0.90625 -0.65625,0 -1.28125,-0.171875 z m 7.31668,0.171875 h -1.1875 q 2.73438,-4.375 2.73438,-8.75 0,-1.71875 -0.39063,-3.390625 -0.3125,-1.375 -0.875,-2.625 -0.35937,-0.828125 -1.46875,-2.734375 h 1.1875 q 1.70313,2.28125 2.53125,4.59375 0.6875,1.984375 0.6875,4.140625 0,2.46875 -0.9375,4.765625 -0.9375,2.296875 -2.28125,4 z m 9.67725,-7.9375 v -1.640625 h 5.03125 v 1.640625 z m 15.4783,-1.828125 -8.84375,3.78125 v -1.625 l 7.01562,-2.90625 -7.01562,-2.875 v -1.640625 l 8.84375,3.734375 z m 10.57825,9.765625 q -1.35938,-1.703125 -2.29688,-4 -0.9375,-2.296875 -0.9375,-4.765625 0,-2.15625 0.70313,-4.140625 0.82812,-2.3125 2.53125,-4.59375 h 1.17187 q -1.09375,1.890625 -1.45312,2.703125 -0.54688,1.25 -0.875,2.625 -0.39063,1.703125 -0.39063,3.421875 0,4.375 2.71875,8.75 z m 3.08767,-15.390625 v -1.890625 h 1.64063 v 1.890625 z m 0,11.46875 v -9.671875 h 1.64063 v 9.671875 z m 4.56671,0 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.640625 -1.15625,0.984375 l -0.45312,-0.703125 q 0.51562,-0.21875 0.76562,-0.671875 0.25,-0.4375 0.28125,-1.265625 z m 9.92911,-11.453125 v -1.90625 h 1.64063 v 1.90625 z m -2.07812,15.203125 0.3125,-1.390625 q 0.5,0.125 0.78125,0.125 0.5,0 0.73437,-0.328125 0.25,-0.328125 0.25,-1.671875 v -10.15625 h 1.64063 v 10.203125 q 0,1.78125 -0.46875,2.484375 -0.59375,0.90625 -1.96875,0.90625 -0.65625,0 -1.28125,-0.171875 z m 7.31668,0.171875 h -1.1875 q 2.73437,-4.375 2.73437,-8.75 0,-1.71875 -0.39062,-3.390625 -0.3125,-1.375 -0.875,-2.625 -0.35938,-0.828125 -1.46875,-2.734375 h 1.1875 q 1.70312,2.28125 2.53125,4.59375 0.6875,1.984375 0.6875,4.140625 0,2.46875 -0.9375,4.765625 -0.9375,2.296875 -2.28125,4 z m 9.66162,-6.8125 1.625,-0.25 q 0.125,0.96875 0.75,1.5 0.625,0.515625 1.75,0.515625 1.125,0 1.67187,-0.453125 0.54688,-0.46875 0.54688,-1.09375 0,-0.546875 -0.48438,-0.875 -0.32812,-0.21875 -1.67187,-0.546875 -1.8125,-0.46875 -2.51563,-0.796875 -0.6875,-0.328125 -1.04687,-0.90625 -0.35938,-0.59375 -0.35938,-1.3125 0,-0.640625 0.29688,-1.1875 0.29687,-0.5625 0.8125,-0.921875 0.375,-0.28125 1.03125,-0.46875 0.67187,-0.203125 1.42187,-0.203125 1.14063,0 2,0.328125 0.85938,0.328125 1.26563,0.890625 0.42187,0.5625 0.57812,1.5 l -1.60937,0.21875 q -0.10938,-0.75 -0.64063,-1.171875 -0.51562,-0.421875 -1.46875,-0.421875 -1.14062,0 -1.625,0.375 -0.46875,0.375 -0.46875,0.875 0,0.3125 0.1875,0.578125 0.20313,0.265625 0.64063,0.4375 0.23437,0.09375 1.4375,0.421875 1.75,0.453125 2.4375,0.75 0.6875,0.296875 1.07812,0.859375 0.39063,0.5625 0.39063,1.40625 0,0.828125 -0.48438,1.546875 -0.46875,0.71875 -1.375,1.125 -0.90625,0.390625 -2.04687,0.390625 -1.875,0 -2.875,-0.78125 -0.98438,-0.78125 -1.25,-2.328125 z m 9.98437,-8.578125 v -1.890625 h 1.64063 v 1.890625 z m 0,11.46875 v -9.671875 h 1.64063 v 9.671875 z m 3.26981,0 v -1.328125 l 6.15625,-7.078125 q -1.04688,0.0625 -1.84375,0.0625 h -3.9375 v -1.328125 h 7.90625 v 1.078125 l -5.25,6.140625 -1,1.125 q 1.09375,-0.07813 2.0625,-0.07813 h 4.46875 v 1.40625 z m 16.82812,-3.109375 1.6875,0.203125 q -0.40625,1.484375 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.296875 -1.23437,-1.3125 -1.23437,-3.671875 0,-2.453125 1.25,-3.796875 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.328125 1.23438,1.3125 1.23438,3.703125 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.453125 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.484375 1.01562,-1.515625 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.828125 -0.78125,-0.953125 -2.03125,-0.953125 -1.125,0 -1.90625,0.765625 -0.76563,0.75 -0.84375,2.015625 z m 17.44965,9.6875 q -1.35938,-1.703125 -2.29688,-4 -0.9375,-2.296875 -0.9375,-4.765625 0,-2.15625 0.70313,-4.140625 0.82812,-2.3125 2.53125,-4.59375 h 1.17187 q -1.09375,1.890625 -1.45312,2.703125 -0.54688,1.25 -0.875,2.625 -0.39063,1.703125 -0.39063,3.421875 0,4.375 2.71875,8.75 z m 2.63455,-7.453125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.015625 0.6875,0.609375 1.65625,0.609375 1.15625,0 1.95312,-0.796875 0.79688,-0.796875 0.79688,-1.984375 0,-1.125 -0.73438,-1.859375 -0.73437,-0.734375 -1.875,-0.734375 -0.46875,0 -1.15625,0.171875 l 0.1875,-1.4375 q 0.15625,0.01563 0.26563,0.01563 1.04687,0 1.875,-0.546875 0.84375,-0.546875 0.84375,-1.671875 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.609375 -0.64063,0.59375 -0.8125,1.796875 l -1.64063,-0.296875 q 0.29688,-1.640625 1.35938,-2.546875 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.859375 -0.46875,1.578125 -0.46875,0.703125 -1.375,1.125 1.1875,0.28125 1.84375,1.140625 0.65625,0.859375 0.65625,2.15625 0,1.734375 -1.28125,2.953125 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.046875 -1.15625,-1.046875 -1.32812,-2.71875 z m 11.25073,3.53125 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35938,0.640625 -1.15625,0.984375 l -0.45313,-0.703125 q 0.51563,-0.21875 0.76563,-0.671875 0.25,-0.4375 0.28125,-1.265625 z m 9.49161,-3.53125 1.64059,-0.21875 q 0.28125,1.40625 0.95313,2.015625 0.6875,0.609375 1.65625,0.609375 1.15625,0 1.95312,-0.796875 0.79688,-0.796875 0.79688,-1.984375 0,-1.125 -0.73438,-1.859375 -0.73437,-0.734375 -1.875,-0.734375 -0.46875,0 -1.15625,0.171875 l 0.1875,-1.4375 q 0.15625,0.01563 0.26563,0.01563 1.04687,0 1.875,-0.546875 0.84375,-0.546875 0.84375,-1.671875 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.609375 -0.64063,0.59375 -0.8125,1.796875 l -1.6406,-0.296875 q 0.29688,-1.640625 1.35938,-2.546875 1.06247,-0.90625 2.65622,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.859375 -0.46875,1.578125 -0.46875,0.703125 -1.375,1.125 1.1875,0.28125 1.84375,1.140625 0.65625,0.859375 0.65625,2.15625 0,1.734375 -1.28125,2.953125 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92185,-1.046875 -1.15625,-1.046875 -1.32812,-2.71875 z m 11.90695,7.453125 h -1.1875 q 2.73437,-4.375 2.73437,-8.75 0,-1.71875 -0.39062,-3.390625 -0.3125,-1.375 -0.875,-2.625 -0.35938,-0.828125 -1.46875,-2.734375 h 1.1875 q 1.70312,2.28125 2.53125,4.59375 0.6875,1.984375 0.6875,4.140625 0,2.46875 -0.9375,4.765625 -0.9375,2.296875 -2.28125,4 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path143"
-     d="m 163.38583,217.19421 h 582.48816 v 44.34647 H 163.38583 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path145"
-     d="m 173.32333,244.3486 0.79687,-3.89063 h -1.54687 v -1.35937 h 1.8125 l 0.67187,-3.29688 h -2.48437 v -1.35939 h 2.76562 l 0.79688,-3.90625 h 1.35937 l -0.79687,3.90625 h 2.875 l 0.79687,-3.90625 h 1.375 l -0.79687,3.90625 h 1.57812 v 1.35939 h -1.84375 l -0.6875,3.29688 h 2.53125 v 1.35937 h -2.8125 l -0.78125,3.89063 h -1.375 l 0.78125,-3.89063 h -2.85937 l -0.78125,3.89063 z m 2.4375,-5.25 h 2.85937 l 0.6875,-3.29688 h -2.875 z m 8.23509,-6.45314 v -1.89063 h 1.64063 v 1.89063 z m 0,11.46876 v -9.67189 h 1.64063 v 9.67189 z m 4.14482,0 v -9.67189 h 1.46875 v 1.35939 q 0.45313,-0.71876 1.20313,-1.14064 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45313 0.6875,0.4375 0.96875,1.23439 1.15625,-1.68752 2.98438,-1.68752 1.45312,0 2.21875,0.79688 0.78125,0.79689 0.78125,2.45314 v 6.64062 h -1.64063 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70312 -0.42188,-0.26563 -0.98438,-0.26563 -1.01562,0 -1.6875,0.6875 -0.67187,0.67188 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85937,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 21.85331,-1.1875 q -0.92188,0.76563 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98437 0,-0.71875 0.32812,-1.29688 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35937 1.1875,-0.54687 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23438 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42188 0,-1 -0.46875,-1.42187 -0.625,-0.54688 -1.875,-0.54688 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42188 l -1.60937,-0.21875 q 0.21875,-1.01563 0.71875,-1.64064 0.5,-0.64063 1.45312,-0.98438 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29688 0.78125,0.28125 1.14062,0.73437 0.375,0.43752 0.51563,1.10939 0.0781,0.42188 0.0781,1.51563 v 2.1875 q 0,2.28125 0.10938,2.89062 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51562 -0.32812,-1.1875 z m -0.14063,-3.67187 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14062 -1.4375,0.32812 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42187 1.09375,-1.14062 0.26562,-0.5625 0.26562,-1.64063 z m 4.20384,8.5625 v -13.37502 h 1.48438 v 1.25002 q 0.53125,-0.73439 1.1875,-1.09377 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.64063 0.95313,0.625 1.4375,1.79689 0.48438,1.15625 0.48438,2.54687 0,1.48438 -0.53125,2.67188 -0.53125,1.1875 -1.54688,1.82812 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70313 z m 1.48438,-8.48438 q 0,1.85938 0.75,2.76563 0.76562,0.89062 1.82812,0.89062 1.09375,0 1.875,-0.92187 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.76563 -0.75,-0.92189 -1.8125,-0.92189 -1.04688,0 -1.85938,0.98439 -0.79687,0.96875 -0.79687,2.84375 z m 9.01634,4.78125 v -13.35939 h 5.01562 q 1.53125,0 2.45313,0.40625 0.92187,0.40625 1.4375,1.25 0.53125,0.84375 0.53125,1.76563 0,0.85937 -0.46875,1.62501 -0.45313,0.75 -1.39063,1.20313 1.20313,0.35937 1.85938,1.21875 0.65625,0.85937 0.65625,2.01562 0,0.9375 -0.40625,1.75 -0.39063,0.79688 -0.98438,1.23438 -0.57812,0.4375 -1.45312,0.67187 -0.875,0.21875 -2.15625,0.21875 z m 1.78125,-7.75 h 2.875 q 1.1875,0 1.6875,-0.14062 0.67187,-0.20313 1.01562,-0.67189 0.34375,-0.46875 0.34375,-1.17188 0,-0.65625 -0.32812,-1.15625 -0.3125,-0.51562 -0.90625,-0.70312 -0.59375,-0.1875 -2.03125,-0.1875 h -2.65625 z m 0,6.17188 h 3.3125 q 0.85937,0 1.20312,-0.0625 0.60938,-0.10938 1.01563,-0.35938 0.42187,-0.26562 0.6875,-0.75 0.26562,-0.48437 0.26562,-1.125 0,-0.75 -0.39062,-1.29687 -0.375,-0.54688 -1.0625,-0.76563 -0.67188,-0.23437 -1.95313,-0.23437 h -3.07812 z m 18.69357,0 v 1.57812 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17188,-1.78125 2.9375,-2.82812 0.76563,-1.04689 0.76563,-1.96877 0,-0.98437 -0.70313,-1.64062 -0.6875,-0.67188 -1.8125,-0.67188 -1.1875,0 -1.90625,0.71875 -0.70312,0.70313 -0.70312,1.95313 l -1.6875,-0.17188 q 0.17187,-1.89062 1.29687,-2.875 1.14063,-0.98437 3.03125,-0.98437 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64062 0,0.79688 -0.32813,1.57814 -0.32812,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 0.95386,1.57812 5.125,-13.35939 h 1.90625 l 5.46875,13.35939 h -2.01563 l -1.54687,-4.04687 h -5.59375 l -1.46875,4.04687 z m 3.85937,-5.48437 h 4.53125 l -1.40625,-3.70314 q -0.625,-1.6875 -0.9375,-2.76563 -0.26562,1.28125 -0.71875,2.54688 z m 23.65812,-2.375 h -8.82813 v -1.51564 h 8.82813 z m 0,4.0625 h -8.82813 v -1.53125 h 8.82813 z m 10.57827,7.71875 q -1.35937,-1.70313 -2.29687,-4 -0.9375,-2.29688 -0.9375,-4.76563 0,-2.15625 0.70312,-4.14064 0.82813,-2.3125 2.53125,-4.59375 h 1.17188 q -1.09375,1.89063 -1.45313,2.70313 -0.54687,1.25 -0.875,2.62501 -0.39062,1.70313 -0.39062,3.42188 0,4.375 2.71875,8.75 z m 9.35331,-3.92188 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.18752 1.4375,-1.81252 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35939 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 8.82883,-1.76563 q 0,-2.35939 0.48437,-3.79689 0.48438,-1.45312 1.4375,-2.23437 0.96875,-0.78125 2.42188,-0.78125 1.07812,0 1.89062,0.4375 0.8125,0.42187 1.32813,1.25 0.53125,0.8125 0.82812,1.98437 0.3125,1.15625 0.3125,3.14064 0,2.35938 -0.48437,3.8125 -0.48438,1.4375 -1.45313,2.23438 -0.95312,0.78125 -2.42187,0.78125 -1.92188,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67187,0 q 0,3.29688 0.76563,4.39063 0.78125,1.07812 1.90625,1.07812 1.14062,0 1.90625,-1.09375 0.76562,-1.09375 0.76562,-4.375 0,-3.29689 -0.76562,-4.37501 -0.76563,-1.07813 -1.92188,-1.07813 -1.125,0 -1.79687,0.95313 -0.85938,1.21875 -0.85938,4.50001 z m 9.57886,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 16.21036,0 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.18752 1.4375,-1.81252 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35939 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 15.00071,4.82812 h -1.64063 v -10.45314 q -0.59375,0.5625 -1.5625,1.14063 -0.95312,0.5625 -1.71875,0.84376 v -1.59376 q 1.375,-0.64063 2.40625,-1.5625 1.03125,-0.92188 1.45313,-1.78125 h 1.0625 z m 5.7351,3.92188 h -1.1875 q 2.73438,-4.375 2.73438,-8.75 0,-1.71875 -0.39063,-3.39063 -0.3125,-1.37501 -0.875,-2.62501 -0.35937,-0.82813 -1.46875,-2.73438 h 1.1875 q 1.70313,2.28125 2.53125,4.59375 0.6875,1.98439 0.6875,4.14064 0,2.46875 -0.9375,4.76563 -0.9375,2.29687 -2.28125,4 z m 5.1658,-0.21875 v -17.06252 h 3.60938 v 1.35938 h -1.96875 v 14.34376 h 1.96875 v 1.35938 z m 4.76142,-8 1.65625,-0.14063 q 0.125,1 0.54687,1.64063 0.4375,0.64062 1.34375,1.04687 0.92188,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.79687 -0.54687,-0.20313 -2.39062,-0.64063 -1.82813,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75002 -0.46875,-1.67189 0,-1 0.57812,-1.875 0.57813,-0.89063 1.67188,-1.34375 1.10937,-0.45313 2.45312,-0.45313 1.48438,0 2.60938,0.48438 1.14062,0.46875 1.75,1.40625 0.60937,0.92187 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.26563 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.42187 0,0.71875 0.53125,1.17188 0.5,0.46876 2.65625,0.96876 2.15625,0.48438 2.95313,0.84375 1.17187,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60938,2.01562 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.51563 -2.57812,0.51563 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.38107,-2.29688 q 0,-2.35939 0.48438,-3.79689 0.48437,-1.45312 1.4375,-2.23437 0.96875,-0.78125 2.42187,-0.78125 1.07813,0 1.89063,0.4375 0.8125,0.42187 1.32812,1.25 0.53125,0.8125 0.82813,1.98437 0.3125,1.15625 0.3125,3.14064 0,2.35938 -0.48438,3.8125 -0.48437,1.4375 -1.45312,2.23438 -0.95313,0.78125 -2.42188,0.78125 -1.92187,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67188,0 q 0,3.29688 0.76562,4.39063 0.78125,1.07812 1.90625,1.07812 1.14063,0 1.90625,-1.09375 0.76563,-1.09375 0.76563,-4.375 0,-3.29689 -0.76563,-4.37501 -0.76562,-1.07813 -1.92187,-1.07813 -1.125,0 -1.79688,0.95313 -0.85937,1.21875 -0.85937,4.50001 z m 9.57885,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 9.55411,-4.29687 1.65625,-0.14063 q 0.125,1 0.54688,1.64063 0.4375,0.64062 1.34375,1.04687 0.92187,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35938,-0.46875 -1.1875,-0.79687 -0.54688,-0.20313 -2.39063,-0.64063 -1.82812,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75002 -0.46875,-1.67189 0,-1 0.57813,-1.875 0.57812,-0.89063 1.67187,-1.34375 1.10938,-0.45313 2.45313,-0.45313 1.48437,0 2.60937,0.48438 1.14063,0.46875 1.75,1.40625 0.60938,0.92187 0.65625,2.09375 l -1.6875,0.125 q -0.14062,-1.26563 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60937,0 -2.34375,0.59375 -0.73437,0.59375 -0.73437,1.42187 0,0.71875 0.53125,1.17188 0.5,0.46876 2.65625,0.96876 2.15625,0.48438 2.95312,0.84375 1.17188,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60937,2.01562 -0.60938,0.9375 -1.75,1.46875 -1.14063,0.51563 -2.57813,0.51563 -1.8125,0 -3.04687,-0.53125 -1.21875,-0.53125 -1.92188,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 18.55295,4.29687 h -1.64063 v -10.45314 q -0.59375,0.5625 -1.5625,1.14063 -0.95312,0.5625 -1.71875,0.84376 v -1.59376 q 1.375,-0.64063 2.40625,-1.5625 1.03125,-0.92188 1.45313,-1.78125 h 1.0625 z m 7.39133,3.70313 h -3.60938 v -1.35938 h 1.96875 v -14.34376 h -1.96875 v -1.35938 h 3.60938 z m 6.9916,-7.71875 v -1.64063 h 5.03125 v 1.64063 z m 15.47831,-1.82813 -8.84375,3.78125 v -1.625 l 7.01562,-2.90625 -7.01562,-2.87501 v -1.64063 l 8.84375,3.73439 z m 10.57827,9.76563 q -1.35937,-1.70313 -2.29687,-4 -0.9375,-2.29688 -0.9375,-4.76563 0,-2.15625 0.70312,-4.14064 0.82813,-2.3125 2.53125,-4.59375 h 1.17188 q -1.09375,1.89063 -1.45313,2.70313 -0.54687,1.25 -0.875,2.62501 -0.39062,1.70313 -0.39062,3.42188 0,4.375 2.71875,8.75 z m 9.35331,-3.92188 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.18752 1.4375,-1.81252 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35939 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 8.82883,-1.76563 q 0,-2.35939 0.48437,-3.79689 0.48438,-1.45312 1.4375,-2.23437 0.96875,-0.78125 2.42188,-0.78125 1.07812,0 1.89062,0.4375 0.8125,0.42187 1.32813,1.25 0.53125,0.8125 0.82812,1.98437 0.3125,1.15625 0.3125,3.14064 0,2.35938 -0.48437,3.8125 -0.48438,1.4375 -1.45313,2.23438 -0.95312,0.78125 -2.42187,0.78125 -1.92188,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67187,0 q 0,3.29688 0.76563,4.39063 0.78125,1.07812 1.90625,1.07812 1.14062,0 1.90625,-1.09375 0.76562,-1.09375 0.76562,-4.375 0,-3.29689 -0.76562,-4.37501 -0.76563,-1.07813 -1.92188,-1.07813 -1.125,0 -1.79687,0.95313 -0.85938,1.21875 -0.85938,4.50001 z m 17.77777,4.4375 v -3.67187 h -3.64062 v -1.51563 h 3.64062 v -3.64064 h 1.54688 v 3.64064 h 3.64062 v 1.51563 h -3.64062 v 3.67187 z m 12.25013,-2.14062 1.65625,-0.14063 q 0.125,1 0.54687,1.64063 0.4375,0.64062 1.34375,1.04687 0.92188,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.79687 -0.54687,-0.20313 -2.39062,-0.64063 -1.82813,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75002 -0.46875,-1.67189 0,-1 0.57812,-1.875 0.57813,-0.89063 1.67188,-1.34375 1.10937,-0.45313 2.45312,-0.45313 1.48438,0 2.60938,0.48438 1.14062,0.46875 1.75,1.40625 0.60937,0.92187 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.26563 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.42187 0,0.71875 0.53125,1.17188 0.5,0.46876 2.65625,0.96876 2.15625,0.48438 2.95313,0.84375 1.17187,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60938,2.01562 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.51563 -2.57812,0.51563 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.38107,-2.29688 q 0,-2.35939 0.48437,-3.79689 0.48438,-1.45312 1.4375,-2.23437 0.96875,-0.78125 2.42188,-0.78125 1.07812,0 1.89062,0.4375 0.8125,0.42187 1.32813,1.25 0.53125,0.8125 0.82812,1.98437 0.3125,1.15625 0.3125,3.14064 0,2.35938 -0.48437,3.8125 -0.48438,1.4375 -1.45313,2.23438 -0.95312,0.78125 -2.42187,0.78125 -1.92188,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.67187,0 q 0,3.29688 0.76563,4.39063 0.78125,1.07812 1.90625,1.07812 1.14062,0 1.90625,-1.09375 0.76562,-1.09375 0.76562,-4.375 0,-3.29689 -0.76562,-4.37501 -0.76563,-1.07813 -1.92188,-1.07813 -1.125,0 -1.79687,0.95313 -0.85938,1.21875 -0.85938,4.50001 z m 9.57886,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 16.21033,0 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.18752 1.4375,-1.81252 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35939 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 15.00074,4.82812 h -1.64063 v -10.45314 q -0.59375,0.5625 -1.5625,1.14063 -0.95312,0.5625 -1.71875,0.84376 v -1.59376 q 1.375,-0.64063 2.40625,-1.5625 1.03125,-0.92188 1.45313,-1.78125 h 1.0625 z m 13.27777,-2.15625 v -3.67187 h -3.64063 v -1.51563 h 3.64063 v -3.64064 h 1.54687 v 3.64064 h 3.64063 v 1.51563 h -3.64063 v 3.67187 z m 12.25012,-2.14062 1.65625,-0.14063 q 0.125,1 0.54687,1.64063 0.4375,0.64062 1.34375,1.04687 0.92188,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.79687 -0.54687,-0.20313 -2.39062,-0.64063 -1.82813,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75002 -0.46875,-1.67189 0,-1 0.57812,-1.875 0.57813,-0.89063 1.67188,-1.34375 1.10937,-0.45313 2.45312,-0.45313 1.48438,0 2.60938,0.48438 1.14062,0.46875 1.75,1.40625 0.60937,0.92187 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.26563 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.42187 0,0.71875 0.53125,1.17188 0.5,0.46876 2.65625,0.96876 2.15625,0.48438 2.95313,0.84375 1.17187,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60938,2.01562 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.51563 -2.57812,0.51563 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 18.55292,4.29687 h -1.64063 v -10.45314 q -0.59375,0.5625 -1.5625,1.14063 -0.95312,0.5625 -1.71875,0.84376 v -1.59376 q 1.375,-0.64063 2.40625,-1.5625 1.03125,-0.92188 1.45313,-1.78125 h 1.0625 z m 5.7351,3.92188 h -1.1875 q 2.73438,-4.375 2.73438,-8.75 0,-1.71875 -0.39063,-3.39063 -0.3125,-1.37501 -0.875,-2.62501 -0.35937,-0.82813 -1.46875,-2.73438 h 1.1875 q 1.70313,2.28125 2.53125,4.59375 0.6875,1.98439 0.6875,4.14064 0,2.46875 -0.9375,4.76563 -0.9375,2.29687 -2.28125,4 z m 9.66162,-6.8125 1.625,-0.25 q 0.125,0.96875 0.75,1.5 0.625,0.51562 1.75,0.51562 1.125,0 1.67188,-0.45312 0.54687,-0.46875 0.54687,-1.09375 0,-0.54688 -0.48437,-0.875 -0.32813,-0.21875 -1.67188,-0.54688 -1.8125,-0.46875 -2.51562,-0.79687 -0.6875,-0.32813 -1.04688,-0.90625 -0.35937,-0.59375 -0.35937,-1.3125 0,-0.64063 0.29687,-1.1875 0.29688,-0.56252 0.8125,-0.92189 0.375,-0.28125 1.03125,-0.46875 0.67188,-0.20313 1.42188,-0.20313 1.14062,0 2,0.32813 0.85937,0.32812 1.26562,0.89062 0.42188,0.56252 0.57813,1.50002 l -1.60938,0.21875 q -0.10937,-0.75 -0.64062,-1.17188 -0.51563,-0.42189 -1.46875,-0.42189 -1.14063,0 -1.625,0.37502 -0.46875,0.375 -0.46875,0.875 0,0.3125 0.1875,0.57812 0.20312,0.26563 0.64062,0.4375 0.23438,0.0937 1.4375,0.42188 1.75,0.45312 2.4375,0.75 0.6875,0.29687 1.07813,0.85937 0.39062,0.5625 0.39062,1.40625 0,0.82813 -0.48437,1.54688 -0.46875,0.71875 -1.375,1.125 -0.90625,0.39062 -2.04688,0.39062 -1.875,0 -2.875,-0.78125 -0.98437,-0.78125 -1.25,-2.32812 z m 9.98438,-8.57814 v -1.89063 h 1.64062 v 1.89063 z m 0,11.46876 v -9.67189 h 1.64062 v 9.67189 z m 3.26983,0 v -1.32812 l 6.15625,-7.07813 q -1.04687,0.0625 -1.84375,0.0625 h -3.9375 v -1.32814 h 7.90625 v 1.07813 l -5.25,6.14064 -1,1.125 q 1.09375,-0.0781 2.0625,-0.0781 h 4.46875 v 1.40625 z m 16.82813,-3.10937 1.6875,0.20312 q -0.40625,1.48438 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29687 -1.23438,-1.3125 -1.23438,-3.67188 0,-2.45312 1.25,-3.79689 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.31251 1.23437,3.70314 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48437 1.01563,-1.51562 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82813 -0.78125,-0.95314 -2.03125,-0.95314 -1.125,0 -1.90625,0.76564 -0.76562,0.75 -0.84375,2.01563 z m 17.44965,9.6875 q -1.35937,-1.70313 -2.29687,-4 -0.9375,-2.29688 -0.9375,-4.76563 0,-2.15625 0.70312,-4.14064 0.82813,-2.3125 2.53125,-4.59375 h 1.17188 q -1.09375,1.89063 -1.45313,2.70313 -0.54687,1.25 -0.875,2.62501 -0.39062,1.70313 -0.39062,3.42188 0,4.375 2.71875,8.75 z m 2.63452,-7.45313 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.01563 0.6875,0.60937 1.65625,0.60937 1.15625,0 1.95313,-0.79687 0.79687,-0.79688 0.79687,-1.98438 0,-1.125 -0.73437,-1.85937 -0.73438,-0.73438 -1.875,-0.73438 -0.46875,0 -1.15625,0.17188 l 0.1875,-1.4375 q 0.15625,0.0156 0.26562,0.0156 1.04688,0 1.875,-0.54687 0.84375,-0.54689 0.84375,-1.67189 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.60937 -0.64062,0.59375 -0.8125,1.79688 l -1.64062,-0.29688 q 0.29687,-1.64062 1.35937,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57814 -0.46875,0.70312 -1.375,1.125 1.1875,0.28125 1.84375,1.14062 0.65625,0.85938 0.65625,2.15625 0,1.73438 -1.28125,2.95313 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.04688 -1.15625,-1.04687 -1.32813,-2.71875 z m 11.25073,3.53125 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 9.49158,-3.53125 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.01563 0.6875,0.60937 1.65625,0.60937 1.15625,0 1.95313,-0.79687 0.79687,-0.79688 0.79687,-1.98438 0,-1.125 -0.73437,-1.85937 -0.73438,-0.73438 -1.875,-0.73438 -0.46875,0 -1.15625,0.17188 l 0.1875,-1.4375 q 0.15625,0.0156 0.26562,0.0156 1.04688,0 1.875,-0.54687 0.84375,-0.54689 0.84375,-1.67189 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.60937 -0.64062,0.59375 -0.8125,1.79688 l -1.64062,-0.29688 q 0.29687,-1.64062 1.35937,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57814 -0.46875,0.70312 -1.375,1.125 1.1875,0.28125 1.84375,1.14062 0.65625,0.85938 0.65625,2.15625 0,1.73438 -1.28125,2.95313 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.04688 -1.15625,-1.04687 -1.32813,-2.71875 z m 11.90698,7.45313 h -1.1875 q 2.73438,-4.375 2.73438,-8.75 0,-1.71875 -0.39063,-3.39063 -0.3125,-1.37501 -0.875,-2.62501 -0.35937,-0.82813 -1.46875,-2.73438 h 1.1875 q 1.70313,2.28125 2.53125,4.59375 0.6875,1.98439 0.6875,4.14064 0,2.46875 -0.9375,4.76563 -0.9375,2.29687 -2.28125,4 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path147"
-     d="m 73.383199,138.91863 h 12.377953 v 16.28348 H 73.383199 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path149"
-     d="m 77.995529,152.65335 v -1.89063 h 1.640625 v 1.89063 z m 0,11.46875 v -9.67188 h 1.640625 v 9.67188 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path151"
-     d="M 128.95013,76.837268 H 156.4147 V 109.84514 H 128.95013 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path153"
-     d="m 139.16888,92.304143 v -1.90625 h 1.64062 v 1.90625 z m -2.07813,15.203117 0.3125,-1.39062 q 0.5,0.125 0.78125,0.125 0.5,0 0.73438,-0.32813 0.25,-0.32812 0.25,-1.67187 V 94.085393 h 1.64062 v 10.203117 q 0,1.78125 -0.46875,2.48438 -0.59375,0.90625 -1.96875,0.90625 -0.65625,0 -1.28125,-0.17188 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path155"
-     d="m 0,101.84776 h 128.18896 v 21.48032 H 0 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path157"
-     d="m 13.359375,132.68964 q -1.359375,-1.70313 -2.296875,-4 -0.9375,-2.29688 -0.9375,-4.76563 0,-2.15625 0.703125,-4.14062 0.828125,-2.3125 2.53125,-4.59375 h 1.171875 q -1.09375,1.89062 -1.453125,2.70312 -0.546875,1.25 -0.875,2.625 -0.390625,1.70313 -0.390625,3.42188 0,4.375 2.71875,8.75 z m 2.697052,-8.21875 1.65625,-0.14063 q 0.125,1 0.546875,1.64063 0.4375,0.64062 1.34375,1.04687 0.921875,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.359375,-0.46875 -1.1875,-0.79687 -0.546875,-0.20313 -2.390625,-0.64063 -1.828125,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75 -0.46875,-1.67188 0,-1 0.578125,-1.875 0.578125,-0.89062 1.671875,-1.34375 1.109375,-0.45312 2.453125,-0.45312 1.484375,0 2.609375,0.48437 1.140625,0.46875 1.75,1.40625 0.609375,0.92188 0.65625,2.09375 l -1.6875,0.125 q -0.140625,-1.26562 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.609375,0 -2.34375,0.59375 -0.734375,0.59375 -0.734375,1.42188 0,0.71875 0.53125,1.17187 0.5,0.46875 2.65625,0.96875 2.15625,0.48438 2.953125,0.84375 1.171875,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.609375,2.01562 -0.609375,0.9375 -1.75,1.46875 -1.140625,0.51563 -2.578125,0.51563 -1.8125,0 -3.046875,-0.53125 -1.21875,-0.53125 -1.921875,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z M 28.4375,122.17401 q 0,-2.35937 0.484375,-3.79687 0.484375,-1.45313 1.4375,-2.23438 0.96875,-0.78125 2.421875,-0.78125 1.078125,0 1.890625,0.4375 0.8125,0.42188 1.328125,1.25 0.53125,0.8125 0.828125,1.98438 0.3125,1.15625 0.3125,3.14062 0,2.35938 -0.484375,3.8125 -0.484375,1.4375 -1.453125,2.23438 -0.953125,0.78125 -2.421875,0.78125 -1.921875,0 -3.03125,-1.39063 -1.3125,-1.67187 -1.3125,-5.4375 z m 1.671875,0 q 0,3.29688 0.765625,4.39063 0.78125,1.07812 1.90625,1.07812 1.140625,0 1.90625,-1.09375 0.765625,-1.09375 0.765625,-4.375 0,-3.29687 -0.765625,-4.375 -0.765625,-1.07812 -1.921875,-1.07812 -1.125,0 -1.796875,0.95312 -0.859375,1.21875 -0.859375,4.5 z m 9.578842,6.59375 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.359375,0.64063 -1.15625,0.98438 l -0.453125,-0.70313 q 0.515625,-0.21875 0.765625,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 9.554108,-4.29687 1.65625,-0.14063 q 0.125,1 0.546875,1.64063 0.4375,0.64062 1.34375,1.04687 0.921875,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.359375,-0.46875 -1.1875,-0.79687 -0.546875,-0.20313 -2.390625,-0.64063 -1.828125,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75 -0.46875,-1.67188 0,-1 0.578125,-1.875 0.578125,-0.89062 1.671875,-1.34375 1.109375,-0.45312 2.453125,-0.45312 1.484375,0 2.609375,0.48437 1.140625,0.46875 1.75,1.40625 0.609375,0.92188 0.65625,2.09375 l -1.6875,0.125 q -0.140625,-1.26562 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.609375,0 -2.34375,0.59375 -0.734375,0.59375 -0.734375,1.42188 0,0.71875 0.53125,1.17187 0.5,0.46875 2.65625,0.96875 2.15625,0.48438 2.953125,0.84375 1.171875,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.609375,2.01562 -0.609375,0.9375 -1.75,1.46875 -1.140625,0.51563 -2.578125,0.51563 -1.8125,0 -3.046875,-0.53125 -1.21875,-0.53125 -1.921875,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 18.552948,4.29687 h -1.640625 v -10.45312 q -0.59375,0.5625 -1.5625,1.14062 -0.953125,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64062 2.40625,-1.5625 1.03125,-0.92187 1.453125,-1.78125 h 1.0625 z m 5.735092,3.92188 h -1.1875 q 2.734375,-4.375 2.734375,-8.75 0,-1.71875 -0.390625,-3.39063 -0.3125,-1.375 -0.875,-2.625 -0.359375,-0.82812 -1.46875,-2.73437 h 1.1875 q 1.703125,2.28125 2.53125,4.59375 0.6875,1.98437 0.6875,4.14062 0,2.46875 -0.9375,4.76563 -0.9375,2.29687 -2.28125,4 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path159"
-     d="m 54.629919,131.32808 63.433071,13.85826" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path161"
-     d="m 54.629919,131.32808 57.571331,12.57765" />
-  <path
-     style="fill:#000000;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt"
-     inkscape:connector-curvature="0"
-     id="path163"
-     d="m 111.84871,145.51939 4.78606,-0.64507 -4.08098,-2.58227 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path165"
-     d="M 129.79528,118.46719 128.18897,34.719153" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path167"
-     d="M 129.79528,118.46719 128.30402,40.718047" />
-  <path
-     style="fill:#000000;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt"
-     inkscape:connector-curvature="0"
-     id="path169"
-     d="m 129.95547,40.686383 -1.73846,-4.505589 -1.5644,4.568936 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path171"
-     d="m 153.25985,273.69815 v -75.1181" />
-  <path
-     style="fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round"
-     inkscape:connector-curvature="0"
-     id="path173"
-     d="m 153.25985,273.69815 v -69.1181" />
-  <path
-     style="fill:#000000;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt"
-     inkscape:connector-curvature="0"
-     id="path175"
-     d="m 154.91157,204.58005 -1.65172,-4.5381 -1.65173,4.5381 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path177"
-     d="m 82.181099,280.41995 h 63.433071 v 31.27557 H 82.181099 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path179"
-     d="m 98.681099,307.33995 v -1.21875 q -0.90625,1.4375 -2.70312,1.4375 -1.15625,0 -2.125,-0.64063 -0.96875,-0.64062 -1.5,-1.78125 -0.53125,-1.14062 -0.53125,-2.625 0,-1.45312 0.48437,-2.625 0.48438,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17188,-0.625 0.875,0 1.54687,0.375 0.6875,0.35938 1.10938,0.95313 v -4.79688 h 1.640621 v 13.35938 z m -5.17187,-4.82813 q 0,1.85938 0.78125,2.78125 0.78125,0.92188 1.84375,0.92188 1.07812,0 1.82812,-0.875 0.75,-0.89063 0.75,-2.6875 0,-1.98438 -0.76562,-2.90625 -0.76563,-0.9375 -1.89063,-0.9375 -1.07812,0 -1.8125,0.89062 -0.73437,0.89063 -0.73437,2.8125 z m 8.828841,-1.76562 q 0,-2.35938 0.48437,-3.79688 0.48438,-1.45312 1.4375,-2.23437 0.96875,-0.78125 2.42188,-0.78125 1.07812,0 1.89062,0.4375 0.8125,0.42187 1.32813,1.25 0.53125,0.8125 0.82812,1.98437 0.3125,1.15625 0.3125,3.14063 0,2.35937 -0.48437,3.8125 -0.48438,1.4375 -1.45313,2.23437 -0.95312,0.78125 -2.42187,0.78125 -1.92188,0 -3.03125,-1.39062 -1.3125,-1.67188 -1.3125,-5.4375 z m 1.67187,0 q 0,3.29687 0.76563,4.39062 0.78125,1.07813 1.90625,1.07813 1.14062,0 1.90625,-1.09375 0.76562,-1.09375 0.76562,-4.375 0,-3.29688 -0.76562,-4.375 -0.76563,-1.07813 -1.92188,-1.07813 -1.125,0 -1.79687,0.95313 -0.85938,1.21875 -0.85938,4.5 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path181"
-     d="m 115.32809,237.12598 h 71.55905 v 31.2756 h -71.55905 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path183"
-     d="m 131.82809,264.04599 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35937 1.10937,0.95312 v -4.79687 h 1.64063 v 13.35937 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 15.00072,4.82812 h -1.64062 v -10.45312 q -0.59375,0.5625 -1.5625,1.14062 -0.95313,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64062 2.40625,-1.5625 1.03125,-0.92187 1.45312,-1.78125 h 1.0625 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path185"
-     d="m 171.88189,134.56955 h 440 v 26.70866 h -440 z" />
-  <path
-     style="fill:#434343;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path187"
-     d="m 182.61627,161.48955 v -13.35938 h 1.76562 v 13.35938 z m 4.6833,0 v -9.67188 h 1.46875 v 1.375 q 1.0625,-1.59375 3.07813,-1.59375 0.875,0 1.60937,0.3125 0.73438,0.3125 1.09375,0.82813 0.375,0.5 0.51563,1.20312 0.0937,0.45313 0.0937,1.59375 v 5.95313 h -1.64063 v -5.89063 q 0,-1 -0.20312,-1.48437 -0.1875,-0.5 -0.67188,-0.79688 -0.48437,-0.29687 -1.14062,-0.29687 -1.04688,0 -1.8125,0.67187 -0.75,0.65625 -0.75,2.51563 v 5.28125 z m 16.64135,0 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64063 -0.96875,-0.64062 -1.5,-1.78125 -0.53125,-1.14062 -0.53125,-2.625 0,-1.45312 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35938 1.10937,0.95313 v -4.79688 h 1.64063 v 13.35938 z m -5.17188,-4.82813 q 0,1.85938 0.78125,2.78125 0.78125,0.92188 1.84375,0.92188 1.07813,0 1.82813,-0.875 0.75,-0.89063 0.75,-2.6875 0,-1.98438 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89062 -0.73438,0.89063 -0.73438,2.8125 z m 15.90697,1.71875 1.6875,0.20313 q -0.40625,1.48437 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29688 -1.23438,-1.3125 -1.23438,-3.67187 0,-2.45313 1.25,-3.79688 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.3125 1.23437,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48438 1.01563,-1.51563 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76562,0.75 -0.84375,2.01562 z m 8.04759,5.76563 3.53125,-5.03125 -3.26562,-4.64063 h 2.04687 l 1.48438,2.26563 q 0.42187,0.64062 0.67187,1.07812 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54688 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 15.21456,-4.29688 1.65625,-0.14062 q 0.125,1 0.54687,1.64062 0.4375,0.64063 1.34375,1.04688 0.92188,0.39062 2.0625,0.39062 1,0 1.78125,-0.29687 0.78125,-0.29688 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.79688 -0.54687,-0.20312 -2.39062,-0.64062 -1.82813,-0.45313 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23438 -0.46875,-0.75 -0.46875,-1.67187 0,-1 0.57812,-1.875 0.57813,-0.89063 1.67188,-1.34375 1.10937,-0.45313 2.45312,-0.45313 1.48438,0 2.60938,0.48438 1.14062,0.46875 1.75,1.40625 0.60937,0.92187 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.26563 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.42187 0,0.71875 0.53125,1.17188 0.5,0.46875 2.65625,0.96875 2.15625,0.48437 2.95313,0.84375 1.17187,0.53125 1.71875,1.35937 0.5625,0.82813 0.5625,1.90625 0,1.0625 -0.60938,2.01563 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.51562 -2.57812,0.51562 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.83418,8 v -13.375 h 1.48438 v 1.25 q 0.53125,-0.73437 1.1875,-1.09375 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.64063 0.95313,0.625 1.4375,1.79687 0.48438,1.15625 0.48438,2.54688 0,1.48437 -0.53125,2.67187 -0.53125,1.1875 -1.54688,1.82813 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70312 z m 1.48438,-8.48437 q 0,1.85937 0.75,2.76562 0.76562,0.89063 1.82812,0.89063 1.09375,0 1.875,-0.92188 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.76562 -0.75,-0.92188 -1.8125,-0.92188 -1.04688,0 -1.85938,0.98438 -0.79687,0.96875 -0.79687,2.84375 z m 15.20385,3.59375 q -0.92187,0.76562 -1.76562,1.09375 -0.82813,0.3125 -1.79688,0.3125 -1.59375,0 -2.45312,-0.78125 -0.85938,-0.78125 -0.85938,-1.98438 0,-0.71875 0.32813,-1.29687 0.32812,-0.59375 0.84375,-0.9375 0.53125,-0.35938 1.1875,-0.54688 0.46875,-0.125 1.45312,-0.25 1.98438,-0.23437 2.92188,-0.5625 0.0156,-0.34375 0.0156,-0.42187 0,-1 -0.46875,-1.42188 -0.625,-0.54687 -1.875,-0.54687 -1.15625,0 -1.70312,0.40625 -0.54688,0.40625 -0.8125,1.42187 l -1.60938,-0.21875 q 0.21875,-1.01562 0.71875,-1.64062 0.5,-0.64063 1.45313,-0.98438 0.95312,-0.34375 2.1875,-0.34375 1.25,0 2.01562,0.29688 0.78125,0.28125 1.14063,0.73437 0.375,0.4375 0.51562,1.10938 0.0781,0.42187 0.0781,1.51562 v 2.1875 q 0,2.28125 0.10937,2.89063 0.10938,0.59375 0.40625,1.15625 h -1.70312 q -0.26563,-0.51563 -0.32813,-1.1875 z m -0.14062,-3.67188 q -0.89063,0.375 -2.67188,0.625 -1.01562,0.14063 -1.4375,0.32813 -0.42187,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45313,0.4375 0.9375,0 1.67187,-0.40625 0.75,-0.42188 1.09375,-1.14063 0.26563,-0.5625 0.26563,-1.64062 z m 10.51636,1.3125 1.60937,0.21875 q -0.26562,1.65625 -1.35937,2.60938 -1.07813,0.9375 -2.67188,0.9375 -1.98437,0 -3.1875,-1.29688 -1.20312,-1.29687 -1.20312,-3.71875 0,-1.57812 0.51562,-2.75 0.51563,-1.17187 1.57813,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57812,0 2.57812,0.79688 1,0.79687 1.28125,2.26562 l -1.59375,0.23438 q -0.23437,-0.96875 -0.8125,-1.45313 -0.57812,-0.5 -1.39062,-0.5 -1.23438,0 -2.01563,0.89063 -0.78125,0.89062 -0.78125,2.8125 0,1.95312 0.75,2.84375 0.75,0.875 1.95313,0.875 0.96875,0 1.60937,-0.59375 0.65625,-0.59375 0.82813,-1.82813 z m 9.64062,0.4375 1.6875,0.20313 q -0.40625,1.48437 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29688 -1.23437,-1.3125 -1.23437,-3.67187 0,-2.45313 1.25,-3.79688 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32813 1.23438,1.3125 1.23438,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48438 1.01562,-1.51563 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76563,0.75 -0.84375,2.01562 z m 14.10589,-0.0781 v -1.53127 l 8.84375,-3.73438 v 1.64063 l -7.01562,2.875 7.01562,2.90625 v 1.625 z m 10.66059,2.3125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95312,-0.79688 0.79688,-0.79687 0.79688,-1.98437 0,-1.125 -0.73438,-1.85938 -0.73437,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26563,0.0156 1.04687,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.60937 -0.64063,0.59375 -0.8125,1.79688 l -1.64063,-0.29688 q 0.29688,-1.64062 1.35938,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.04687 -1.15625,-1.04688 -1.32812,-2.71875 z m 9.73507,3.53125 3.53125,-5.03125 -3.26562,-4.64063 h 2.04687 l 1.48438,2.26563 q 0.42187,0.64062 0.67187,1.07812 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54688 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 9.96875,-3.53125 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95313,-0.79688 0.79687,-0.79687 0.79687,-1.98437 0,-1.125 -0.73437,-1.85938 -0.73438,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26562,0.0156 1.04688,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.60937 -0.64062,0.59375 -0.8125,1.79688 l -1.64062,-0.29688 q 0.29687,-1.64062 1.35937,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.04687 -1.15625,-1.04688 -1.32813,-2.71875 z m 9.73508,3.53125 3.53125,-5.03125 -3.26563,-4.64063 h 2.04688 l 1.48437,2.26563 q 0.42188,0.64062 0.67188,1.07812 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.54688 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 10.8125,0 v -8.40625 h -1.45313 v -1.26563 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.45312 0.23438,-0.64063 0.82813,-1.03125 0.59375,-0.39063 1.67187,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95312,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89062 v 1.26563 h -1.89062 v 8.40618 z m 4.33957,-3.53125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95312,-0.79688 0.79688,-0.79687 0.79688,-1.98437 0,-1.125 -0.73438,-1.85938 -0.73437,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26563,0.0156 1.04687,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.60937 -0.64063,0.59375 -0.8125,1.79688 l -1.64063,-0.29688 q 0.29688,-1.64062 1.35938,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.04687 -1.15625,-1.04688 -1.32812,-2.71875 z m 18.98508,1.95312 v 1.57811 h -8.82813 q -0.0156,-0.59375 0.1875,-1.14063 0.34375,-0.90625 1.07813,-1.78125 0.75,-0.875 2.15625,-2.01562 2.17187,-1.78125 2.9375,-2.82813 0.76562,-1.04687 0.76562,-1.96875 0,-0.98437 -0.70312,-1.64062 -0.6875,-0.67188 -1.8125,-0.67188 -1.1875,0 -1.90625,0.71875 -0.70313,0.70313 -0.70313,1.95313 l -1.6875,-0.17188 q 0.17188,-1.89062 1.29688,-2.875 1.14062,-0.98437 3.03125,-0.98437 1.92187,0 3.04687,1.0625 1.125,1.0625 1.125,2.64062 0,0.79688 -0.32812,1.57813 -0.32813,0.78125 -1.09375,1.64062 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23438 -1.89063,1.6875 -0.42187,0.4375 -0.6875,0.875 z m 10.84448,-4.26562 -8.84375,3.78125 v -1.625 l 7.01562,-2.90625 -7.01562,-2.875 v -1.64063 l 8.84375,3.73438 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path189"
-     d="m 181.96001,173.34892 q 0,-1.4375 0.71875,-2.4375 0.71875,-1 2.09375,-1 1.25,0 2.07813,0.90625 0.82812,0.89062 0.82812,2.625 0,1.6875 -0.84375,2.60937 -0.82812,0.92188 -2.04687,0.92188 -1.20313,0 -2.01563,-0.90625 -0.8125,-0.90625 -0.8125,-2.71875 z m 2.85938,-2.3125 q -0.60938,0 -1.01563,0.53125 -0.40625,0.53125 -0.40625,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51562 1.01563,0.51562 0.625,0 1.01562,-0.51562 0.40625,-0.53125 0.40625,-1.9375 0,-1.29688 -0.40625,-1.8125 -0.40625,-0.53125 -1.01562,-0.53125 z m 0,12.9375 7.3125,-14.0625 h 1.32812 l -7.28125,14.0625 z m 5.78125,-3.625 q 0,-1.4375 0.71875,-2.42188 0.71875,-1 2.09375,-1 1.26562,0 2.07812,0.90625 0.82813,0.89063 0.82813,2.625 0,1.6875 -0.82813,2.60938 -0.82812,0.90625 -2.0625,0.90625 -1.21875,0 -2.03125,-0.90625 -0.79687,-0.90625 -0.79687,-2.71875 z m 2.85937,-2.29688 q -0.625,0 -1.03125,0.53125 -0.39062,0.53125 -0.39062,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51563 1.01562,0.51563 0.625,0 1.03125,-0.51563 0.40625,-0.53125 0.40625,-1.9375 0,-1.29687 -0.42187,-1.8125 -0.40625,-0.53125 -1.01563,-0.53125 z m 3.97902,5.4375 5.125,-13.35937 h 1.90625 l 5.46875,13.35937 h -2.01563 l -1.54687,-4.04687 h -5.59375 l -1.46875,4.04687 z m 3.85937,-5.48437 h 4.53125 l -1.40625,-3.70313 q -0.625,-1.6875 -0.9375,-2.76562 -0.26562,1.28125 -0.71875,2.54687 z m 23.65813,-2.375 h -8.82812 v -1.51563 h 8.82812 z m 0,4.0625 h -8.82812 v -1.53125 h 8.82812 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path191"
-     d="m 234.42542,176.7708 -2.32813,-0.42188 q 0.40625,-1.40625 1.35938,-2.07812 0.95312,-0.67188 2.84375,-0.67188 1.70312,0 2.54687,0.40625 0.84375,0.40625 1.17188,1.03125 0.34375,0.625 0.34375,2.28125 l -0.0156,3 q 0,1.26563 0.10938,1.875 0.125,0.60938 0.46875,1.29688 h -2.53125 q -0.10938,-0.25 -0.25,-0.75 -0.0625,-0.23438 -0.0937,-0.3125 -0.65625,0.64062 -1.40625,0.96875 -0.73438,0.3125 -1.59375,0.3125 -1.48438,0 -2.34375,-0.8125 -0.85938,-0.8125 -0.85938,-2.04688 0,-0.82812 0.39063,-1.46875 0.39062,-0.64062 1.09375,-0.96875 0.70312,-0.34375 2.03125,-0.60937 1.79687,-0.32813 2.48437,-0.625 v -0.25 q 0,-0.75 -0.35937,-1.0625 -0.35938,-0.3125 -1.375,-0.3125 -0.6875,0 -1.07813,0.28125 -0.375,0.26562 -0.60937,0.9375 z m 3.42187,2.07812 q -0.48437,0.15625 -1.5625,0.39063 -1.0625,0.21875 -1.39062,0.4375 -0.5,0.35937 -0.5,0.90625 0,0.53125 0.40625,0.9375 0.40625,0.39062 1.01562,0.39062 0.70313,0 1.32813,-0.46875 0.46875,-0.34375 0.60937,-0.84375 0.0937,-0.32812 0.0937,-1.25 z m 5.0476,4.64063 v -13.35938 h 2.5625 v 13.35938 z m 5.18329,0 v -13.35938 h 2.5625 v 13.35938 z m 4.58956,-4.96875 q 0,-1.28125 0.625,-2.46875 0.625,-1.20313 1.78125,-1.82813 1.15625,-0.625 2.57813,-0.625 2.1875,0 3.59375,1.42188 1.40625,1.42187 1.40625,3.60937 0,2.1875 -1.42188,3.64063 -1.42187,1.4375 -3.5625,1.4375 -1.32812,0 -2.54687,-0.59375 -1.20313,-0.60938 -1.82813,-1.76563 -0.625,-1.17187 -0.625,-2.82812 z m 2.625,0.125 q 0,1.45312 0.67188,2.21875 0.6875,0.75 1.6875,0.75 1,0 1.67187,-0.75 0.6875,-0.76563 0.6875,-2.23438 0,-1.42187 -0.6875,-2.1875 -0.67187,-0.76562 -1.67187,-0.76562 -1,0 -1.6875,0.76562 -0.67188,0.76563 -0.67188,2.20313 z m 17.80222,-1.96875 -2.53125,0.45312 q -0.125,-0.75 -0.57812,-1.125 -0.45313,-0.39062 -1.17188,-0.39062 -0.95312,0 -1.53125,0.65625 -0.5625,0.65625 -0.5625,2.20312 0,1.73438 0.57813,2.4375 0.57812,0.70313 1.54687,0.70313 0.73438,0 1.20313,-0.40625 0.46875,-0.42188 0.65625,-1.42188 l 2.51562,0.42188 q -0.39062,1.73437 -1.51562,2.625 -1.10938,0.875 -2.96875,0.875 -2.125,0 -3.39063,-1.32813 -1.25,-1.34375 -1.25,-3.71875 0,-2.39062 1.26563,-3.71875 1.26562,-1.34375 3.42187,-1.34375 1.76563,0 2.79688,0.76563 1.04687,0.75 1.51562,2.3125 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path193"
-     d="m 280.10711,183.48955 v -9.67188 h 1.46875 v 1.35938 q 0.45312,-0.71875 1.20312,-1.14063 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64062 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70313 -0.42187,-0.26562 -0.98437,-0.26562 -1.01563,0 -1.6875,0.6875 -0.67188,0.67187 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85938,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 22.16583,-3.10938 1.6875,0.20313 q -0.40625,1.48437 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29688 -1.23437,-1.3125 -1.23437,-3.67187 0,-2.45313 1.25,-3.79688 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32813 1.23438,1.3125 1.23438,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48438 1.01562,-1.51563 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76563,0.75 -0.84375,2.01562 z m 9.14132,5.76563 v -9.67188 h 1.46875 v 1.35938 q 0.45313,-0.71875 1.20313,-1.14063 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64063 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70313 -0.42188,-0.26562 -0.98438,-0.26562 -1.01562,0 -1.6875,0.6875 -0.67187,0.67187 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85937,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 15.52518,0 v -9.67188 h 1.46875 v 1.46875 q 0.5625,-1.03125 1.03125,-1.35937 0.48438,-0.32813 1.0625,-0.32813 0.82813,0 1.6875,0.53125 l -0.5625,1.51563 q -0.60937,-0.35938 -1.20312,-0.35938 -0.54688,0 -0.96875,0.32813 -0.42188,0.32812 -0.60938,0.89062 -0.28125,0.875 -0.28125,1.92188 v 5.0625 z m 12.8533,-3.10938 1.6875,0.20313 q -0.40625,1.48437 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29688 -1.23437,-1.3125 -1.23437,-3.67187 0,-2.45313 1.25,-3.79688 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32813 1.23438,1.3125 1.23438,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48438 1.01562,-1.51563 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76563,0.75 -0.84375,2.01562 z m 9.53195,5.76563 v -8.40625 h -1.45312 v -1.26563 h 1.45312 v -1.03125 q 0,-0.96875 0.17188,-1.45312 0.23437,-0.64063 0.82812,-1.03125 0.59375,-0.39063 1.67188,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95313,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89063 v 1.26563 h -1.89063 v 8.4062 z m 4.57394,-5.84375 v -1.53125 l 8.84375,-3.73438 v 1.64063 l -7.01562,2.875 7.01562,2.90625 v 1.625 z m 10.66059,2.3125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95312,-0.79688 0.79688,-0.79687 0.79688,-1.98437 0,-1.125 -0.73438,-1.85938 -0.73437,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26563,0.0156 1.04687,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.60937 -0.64063,0.59375 -0.8125,1.79688 l -1.64063,-0.29688 q 0.29688,-1.64062 1.35938,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.04687 -1.15625,-1.04688 -1.32812,-2.71875 z m 9.73508,3.53125 3.53125,-5.03125 -3.26563,-4.64063 h 2.04688 l 1.48437,2.26563 q 0.42188,0.64062 0.67188,1.07812 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.54688 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 9.96875,-3.53125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95312,-0.79688 0.79688,-0.79687 0.79688,-1.98437 0,-1.125 -0.73438,-1.85938 -0.73437,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26563,0.0156 1.04687,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.60937 -0.64063,0.59375 -0.8125,1.79688 l -1.64063,-0.29688 q 0.29688,-1.64062 1.35938,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.04687 -1.15625,-1.04688 -1.32812,-2.71875 z m 9.7351,3.53125 3.53125,-5.03125 -3.26562,-4.64063 h 2.04687 l 1.48438,2.26563 q 0.42187,0.64062 0.67187,1.07812 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54688 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 10.8125,0 v -8.40625 h -1.45312 v -1.26563 h 1.45312 v -1.03125 q 0,-0.96875 0.17188,-1.45312 0.23437,-0.64063 0.82812,-1.03125 0.59375,-0.39063 1.67188,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95313,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89063 v 1.26563 h -1.89063 v 8.4062 z m 4.33954,-3.53125 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95313,-0.79688 0.79687,-0.79687 0.79687,-1.98437 0,-1.125 -0.73437,-1.85938 -0.73438,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26562,0.0156 1.04688,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.60937 -0.64062,0.59375 -0.8125,1.79688 l -1.64062,-0.29688 q 0.29687,-1.64062 1.35937,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.04687 -1.15625,-1.04688 -1.32813,-2.71875 z m 18.98511,1.95312 v 1.57813 h -8.82813 q -0.0156,-0.59375 0.1875,-1.14063 0.34375,-0.90625 1.07813,-1.78125 0.75,-0.875 2.15625,-2.01562 2.17187,-1.78125 2.9375,-2.82813 0.76562,-1.04687 0.76562,-1.96875 0,-0.98437 -0.70312,-1.64062 -0.6875,-0.67188 -1.8125,-0.67188 -1.1875,0 -1.90625,0.71875 -0.70313,0.70313 -0.70313,1.95313 l -1.6875,-0.17188 q 0.17188,-1.89062 1.29688,-2.875 1.14062,-0.98437 3.03125,-0.98437 1.92187,0 3.04687,1.0625 1.125,1.0625 1.125,2.64062 0,0.79688 -0.32812,1.57813 -0.32813,0.78125 -1.09375,1.64062 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23438 -1.89063,1.6875 -0.42187,0.4375 -0.6875,0.875 z m 2.64136,1.57813 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35938,0.64062 -1.15625,0.98437 l -0.45313,-0.70312 q 0.51563,-0.21875 0.76563,-0.67188 0.25,-0.4375 0.28125,-1.26562 z m 9.64782,0.23437 0.79688,-3.89062 h -1.54688 v -1.35938 h 1.8125 l 0.67188,-3.29687 h -2.48438 v -1.35938 h 2.76563 l 0.79687,-3.90625 h 1.35938 l -0.79688,3.90625 h 2.875 l 0.79688,-3.90625 h 1.375 l -0.79688,3.90625 h 1.57813 v 1.35938 h -1.84375 l -0.6875,3.29687 h 2.53125 v 1.35938 h -2.8125 l -0.78125,3.89062 h -1.375 l 0.78125,-3.89062 h -2.85938 l -0.78125,3.89062 z m 2.4375,-5.25 h 2.85938 l 0.6875,-3.29687 h -2.875 z m 8.18823,5.01563 v -13.35938 h 1.64063 v 13.35938 z m 4.19172,0 v -9.67188 h 1.46875 v 1.35938 q 0.45312,-0.71875 1.20312,-1.14063 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64062 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70313 -0.42187,-0.26562 -0.98437,-0.26562 -1.01563,0 -1.6875,0.6875 -0.67188,0.67187 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85938,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 21.85327,-1.1875 q -0.92188,0.76562 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98438 0,-0.71875 0.32812,-1.29687 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35938 1.1875,-0.54688 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23437 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42187 0,-1 -0.46875,-1.42188 -0.625,-0.54687 -1.875,-0.54687 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42187 l -1.60937,-0.21875 q 0.21875,-1.01562 0.71875,-1.64062 0.5,-0.64063 1.45312,-0.98438 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29688 0.78125,0.28125 1.14062,0.73437 0.375,0.4375 0.51563,1.10938 0.0781,0.42187 0.0781,1.51562 v 2.1875 q 0,2.28125 0.10938,2.89063 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51563 -0.32812,-1.1875 z m -0.14063,-3.67188 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14063 -1.4375,0.32813 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42188 1.09375,-1.14063 0.26562,-0.5625 0.26562,-1.64062 z m 4.20386,8.5625 v -13.375 h 1.48437 v 1.25 q 0.53125,-0.73437 1.1875,-1.09375 0.67188,-0.375 1.625,-0.375 1.23438,0 2.17188,0.64063 0.95312,0.625 1.4375,1.79687 0.48437,1.15625 0.48437,2.54688 0,1.48437 -0.53125,2.67187 -0.53125,1.1875 -1.54687,1.82813 -1.01563,0.625 -2.14063,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70312 z m 1.48437,-8.48437 q 0,1.85937 0.75,2.76562 0.76563,0.89063 1.82813,0.89063 1.09375,0 1.875,-0.92188 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76563,-2.76562 -0.75,-0.92188 -1.8125,-0.92188 -1.04687,0 -1.85937,0.98438 -0.79688,0.96875 -0.79688,2.84375 z m 7.62574,4.78125 5.125,-13.35938 h 1.90625 l 5.46875,13.35938 h -2.01563 l -1.54687,-4.04688 h -5.59375 l -1.46875,4.04688 z m 3.85937,-5.48438 h 4.53125 l -1.40625,-3.70312 q -0.625,-1.6875 -0.9375,-2.76563 -0.26562,1.28125 -0.71875,2.54688 z m 10.27167,5.48438 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35938,0.64062 -1.15625,0.98437 l -0.45313,-0.70312 q 0.51563,-0.21875 0.76563,-0.67188 0.25,-0.4375 0.28125,-1.26562 z m 12.63226,0 -3.6875,-9.67188 h 1.73438 l 2.07812,5.79688 q 0.32813,0.9375 0.625,1.9375 0.20313,-0.76563 0.60938,-1.82813 l 2.14062,-5.90625 h 1.6875 l -3.65625,9.67188 z m 6.64063,0 v -9.67188 h 1.46875 v 1.35938 q 0.45312,-0.71875 1.20312,-1.14063 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64062 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70313 -0.42187,-0.26562 -0.98437,-0.26562 -1.01563,0 -1.6875,0.6875 -0.67188,0.67187 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85938,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 22.16577,-3.10938 1.6875,0.20313 q -0.40625,1.48437 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29688 -1.23438,-1.3125 -1.23438,-3.67187 0,-2.45313 1.25,-3.79688 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.3125 1.23437,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48438 1.01563,-1.51563 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76562,0.75 -0.84375,2.01562 z m 9.14136,5.76563 v -9.67188 h 1.46875 v 1.35938 q 0.45313,-0.71875 1.20313,-1.14063 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64063 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70313 -0.42188,-0.26562 -0.98438,-0.26562 -1.01562,0 -1.6875,0.6875 -0.67187,0.67187 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85937,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 24.16583,-5.84375 -8.84375,3.78125 v -1.625 l 7.01563,-2.90625 -7.01563,-2.875 v -1.64063 l 8.84375,3.73438 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path195"
-     d="m 167.14961,276.98688 h 614.7402 v 83.74802 h -614.7402 z" />
-  <path
-     style="fill:#434343;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path197"
-     d="m 177.88398,303.90685 v -13.35937 h 1.76562 v 13.35937 z m 4.6833,0 v -9.67187 h 1.46875 v 1.375 q 1.0625,-1.59375 3.07813,-1.59375 0.875,0 1.60937,0.3125 0.73438,0.3125 1.09375,0.82812 0.375,0.5 0.51563,1.20313 0.0937,0.45312 0.0937,1.59375 v 5.95312 h -1.64063 v -5.89062 q 0,-1 -0.20312,-1.48438 -0.1875,-0.5 -0.67188,-0.79687 -0.48437,-0.29688 -1.14062,-0.29688 -1.04688,0 -1.8125,0.67188 -0.75,0.65625 -0.75,2.51562 v 5.28125 z m 16.64135,0 v -1.21875 q -0.90625,1.4375 -2.70313,1.4375 -1.15625,0 -2.125,-0.64062 -0.96875,-0.64063 -1.5,-1.78125 -0.53125,-1.14063 -0.53125,-2.625 0,-1.45313 0.48438,-2.625 0.48437,-1.1875 1.4375,-1.8125 0.96875,-0.625 2.17187,-0.625 0.875,0 1.54688,0.375 0.6875,0.35937 1.10937,0.95312 v -4.79687 h 1.64063 v 13.35937 z m -5.17188,-4.82812 q 0,1.85937 0.78125,2.78125 0.78125,0.92187 1.84375,0.92187 1.07813,0 1.82813,-0.875 0.75,-0.89062 0.75,-2.6875 0,-1.98437 -0.76563,-2.90625 -0.76562,-0.9375 -1.89062,-0.9375 -1.07813,0 -1.8125,0.89063 -0.73438,0.89062 -0.73438,2.8125 z m 15.90697,1.71875 1.6875,0.20312 q -0.40625,1.48438 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29687 -1.23438,-1.3125 -1.23438,-3.67188 0,-2.45312 1.25,-3.79687 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32812 1.23437,1.3125 1.23437,3.70313 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48437 1.01563,-1.51562 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82813 -0.78125,-0.95312 -2.03125,-0.95312 -1.125,0 -1.90625,0.76562 -0.76562,0.75 -0.84375,2.01563 z m 8.0476,5.76562 3.53125,-5.03125 -3.26563,-4.64062 h 2.04688 l 1.48437,2.26562 q 0.42188,0.64063 0.67188,1.07813 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.54687 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 15.21455,-4.29687 1.65625,-0.14063 q 0.125,1 0.54687,1.64063 0.4375,0.64062 1.34375,1.04687 0.92188,0.39063 2.0625,0.39063 1,0 1.78125,-0.29688 0.78125,-0.29687 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35937,-0.46875 -1.1875,-0.79687 -0.54687,-0.20313 -2.39062,-0.64063 -1.82813,-0.45312 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.23437 -0.46875,-0.75 -0.46875,-1.67188 0,-1 0.57812,-1.875 0.57813,-0.89062 1.67188,-1.34375 1.10937,-0.45312 2.45312,-0.45312 1.48438,0 2.60938,0.48437 1.14062,0.46875 1.75,1.40625 0.60937,0.92188 0.65625,2.09375 l -1.6875,0.125 q -0.14063,-1.26562 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60938,0 -2.34375,0.59375 -0.73438,0.59375 -0.73438,1.42188 0,0.71875 0.53125,1.17187 0.5,0.46875 2.65625,0.96875 2.15625,0.48438 2.95313,0.84375 1.17187,0.53125 1.71875,1.35938 0.5625,0.82812 0.5625,1.90625 0,1.0625 -0.60938,2.01562 -0.60937,0.9375 -1.75,1.46875 -1.14062,0.51563 -2.57812,0.51563 -1.8125,0 -3.04688,-0.53125 -1.21875,-0.53125 -1.92187,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.8342,8 v -13.375 h 1.48437 v 1.25 q 0.53125,-0.73438 1.1875,-1.09375 0.67188,-0.375 1.625,-0.375 1.23438,0 2.17188,0.64062 0.95312,0.625 1.4375,1.79688 0.48437,1.15625 0.48437,2.54687 0,1.48438 -0.53125,2.67188 -0.53125,1.1875 -1.54687,1.82812 -1.01563,0.625 -2.14063,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70313 z m 1.48437,-8.48438 q 0,1.85938 0.75,2.76563 0.76563,0.89062 1.82813,0.89062 1.09375,0 1.875,-0.92187 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76563,-2.76563 -0.75,-0.92187 -1.8125,-0.92187 -1.04687,0 -1.85937,0.98437 -0.79688,0.96875 -0.79688,2.84375 z m 15.20386,3.59375 q -0.92188,0.76563 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98437 0,-0.71875 0.32812,-1.29688 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35937 1.1875,-0.54687 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23438 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42188 0,-1 -0.46875,-1.42187 -0.625,-0.54688 -1.875,-0.54688 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42188 l -1.60937,-0.21875 q 0.21875,-1.01563 0.71875,-1.64063 0.5,-0.64062 1.45312,-0.98437 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29687 0.78125,0.28125 1.14062,0.73438 0.375,0.4375 0.51563,1.10937 0.0781,0.42188 0.0781,1.51563 v 2.1875 q 0,2.28125 0.10938,2.89062 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51562 -0.32812,-1.1875 z m -0.14063,-3.67187 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14062 -1.4375,0.32812 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42187 1.09375,-1.14062 0.26562,-0.5625 0.26562,-1.64063 z m 10.51633,1.3125 1.60938,0.21875 q -0.26563,1.65625 -1.35938,2.60937 -1.07812,0.9375 -2.67187,0.9375 -1.98438,0 -3.1875,-1.29687 -1.20313,-1.29688 -1.20313,-3.71875 0,-1.57813 0.51563,-2.75 0.51562,-1.17188 1.57812,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57813,0 2.57813,0.79687 1,0.79688 1.28125,2.26563 l -1.59375,0.23437 q -0.23438,-0.96875 -0.8125,-1.45312 -0.57813,-0.5 -1.39063,-0.5 -1.23437,0 -2.01562,0.89062 -0.78125,0.89063 -0.78125,2.8125 0,1.95313 0.75,2.84375 0.75,0.875 1.95312,0.875 0.96875,0 1.60938,-0.59375 0.65625,-0.59375 0.82812,-1.82812 z m 9.64063,0.4375 1.6875,0.20312 q -0.40625,1.48438 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29687 -1.23438,-1.3125 -1.23438,-3.67188 0,-2.45312 1.25,-3.79687 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32812 1.23437,1.3125 1.23437,3.70313 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48437 1.01563,-1.51562 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82813 -0.78125,-0.95312 -2.03125,-0.95312 -1.125,0 -1.90625,0.76562 -0.76562,0.75 -0.84375,2.01563 z m 14.1059,-0.0781 v -1.53125 l 8.84375,-3.73437 v 1.64062 l -7.01563,2.875 7.01563,2.90625 v 1.625 z m 19.26995,4.26563 v 1.57812 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17188,-1.78125 2.9375,-2.82812 0.76563,-1.04688 0.76563,-1.96875 0,-0.98438 -0.70313,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70312,0.70312 -0.70312,1.95312 l -1.6875,-0.17187 q 0.17187,-1.89063 1.29687,-2.875 1.14063,-0.98438 3.03125,-0.98438 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32813,1.57812 -0.32812,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 1.12574,1.57812 3.53125,-5.03125 -3.26563,-4.64062 h 2.04688 l 1.48437,2.26562 q 0.42188,0.64063 0.67188,1.07813 0.40625,-0.59375 0.73437,-1.0625 l 1.64063,-2.28125 h 1.95312 l -3.34375,4.54687 3.59375,5.125 h -2.01562 l -1.98438,-3 -0.51562,-0.8125 -2.54688,3.8125 z m 18.57812,-1.57812 v 1.57812 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17188,-1.78125 2.9375,-2.82812 0.76563,-1.04688 0.76563,-1.96875 0,-0.98438 -0.70313,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70312,0.70312 -0.70312,1.95312 l -1.6875,-0.17187 q 0.17187,-1.89063 1.29687,-2.875 1.14063,-0.98438 3.03125,-0.98438 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32813,1.57812 -0.32812,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 1.1257,1.57812 3.53125,-5.03125 -3.26562,-4.64062 h 2.04687 l 1.48438,2.26562 q 0.42187,0.64063 0.67187,1.07813 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54687 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 10.8125,0 v -8.40625 h -1.45312 v -1.26562 h 1.45312 v -1.03125 q 0,-0.96875 0.17188,-1.45313 0.23437,-0.64062 0.82812,-1.03125 0.59375,-0.39062 1.67188,-0.39062 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95313,-0.0937 -0.75,0 -1.0625,0.32812 -0.3125,0.3125 -0.3125,1.1875 v 0.89063 h 1.89063 v 1.26562 h -1.89063 v 8.40625 z m 4.33957,-3.53125 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.01563 0.6875,0.60937 1.65625,0.60937 1.15625,0 1.95313,-0.79687 0.79687,-0.79688 0.79687,-1.98438 0,-1.125 -0.73437,-1.85937 -0.73438,-0.73438 -1.875,-0.73438 -0.46875,0 -1.15625,0.17188 l 0.1875,-1.4375 q 0.15625,0.0156 0.26562,0.0156 1.04688,0 1.875,-0.54687 0.84375,-0.54688 0.84375,-1.67188 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.60938 -0.64062,0.59375 -0.8125,1.79687 l -1.64062,-0.29687 q 0.29687,-1.64063 1.35937,-2.54688 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85938 -0.46875,1.57813 -0.46875,0.70312 -1.375,1.125 1.1875,0.28125 1.84375,1.14062 0.65625,0.85938 0.65625,2.15625 0,1.73438 -1.28125,2.95313 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.04688 -1.15625,-1.04687 -1.32813,-2.71875 z m 18.98508,1.95313 v 1.57812 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17188,-1.78125 2.9375,-2.82812 0.76563,-1.04688 0.76563,-1.96875 0,-0.98438 -0.70313,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70312,0.70312 -0.70312,1.95312 l -1.6875,-0.17187 q 0.17187,-1.89063 1.29687,-2.875 1.14063,-0.98438 3.03125,-0.98438 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32813,1.57812 -0.32812,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 10.84448,-4.26563 -8.84375,3.78125 v -1.625 l 7.01563,-2.90625 -7.01563,-2.875 v -1.64062 l 8.84375,3.73437 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path199"
-     d="m 177.22773,315.76625 q 0,-1.4375 0.71875,-2.4375 0.71875,-1 2.09375,-1 1.25,0 2.07812,0.90625 0.82813,0.89063 0.82813,2.625 0,1.6875 -0.84375,2.60938 -0.82813,0.92187 -2.04688,0.92187 -1.20312,0 -2.01562,-0.90625 -0.8125,-0.90625 -0.8125,-2.71875 z m 2.85937,-2.3125 q -0.60937,0 -1.01562,0.53125 -0.40625,0.53125 -0.40625,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51563 1.01562,0.51563 0.625,0 1.01563,-0.51563 0.40625,-0.53125 0.40625,-1.9375 0,-1.29687 -0.40625,-1.8125 -0.40625,-0.53125 -1.01563,-0.53125 z m 0,12.9375 7.3125,-14.0625 h 1.32813 l -7.28125,14.0625 z m 5.78125,-3.625 q 0,-1.4375 0.71875,-2.42187 0.71875,-1 2.09375,-1 1.26563,0 2.07813,0.90625 0.82812,0.89062 0.82812,2.625 0,1.6875 -0.82812,2.60937 -0.82813,0.90625 -2.0625,0.90625 -1.21875,0 -2.03125,-0.90625 -0.79688,-0.90625 -0.79688,-2.71875 z m 2.85938,-2.29687 q -0.625,0 -1.03125,0.53125 -0.39063,0.53125 -0.39063,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51562 1.01563,0.51562 0.625,0 1.03125,-0.51562 0.40625,-0.53125 0.40625,-1.9375 0,-1.29688 -0.42188,-1.8125 -0.40625,-0.53125 -1.01562,-0.53125 z m 5.36964,5.4375 V 312.5475 h 5.01563 q 1.53125,0 2.45312,0.40625 0.92188,0.40625 1.4375,1.25 0.53125,0.84375 0.53125,1.76563 0,0.85937 -0.46875,1.625 -0.45312,0.75 -1.39062,1.20312 1.20312,0.35938 1.85937,1.21875 0.65625,0.85938 0.65625,2.01563 0,0.9375 -0.40625,1.75 -0.39062,0.79687 -0.98437,1.23437 -0.57813,0.4375 -1.45313,0.67188 -0.875,0.21875 -2.15625,0.21875 z m 1.78125,-7.75 h 2.875 q 1.1875,0 1.6875,-0.14063 0.67188,-0.20312 1.01563,-0.67187 0.34375,-0.46875 0.34375,-1.17188 0,-0.65625 -0.32813,-1.15625 -0.3125,-0.51562 -0.90625,-0.70312 -0.59375,-0.1875 -2.03125,-0.1875 h -2.65625 z m 0,6.17187 h 3.3125 q 0.85938,0 1.20313,-0.0625 0.60937,-0.10937 1.01562,-0.35937 0.42188,-0.26563 0.6875,-0.75 0.26563,-0.48438 0.26563,-1.125 0,-0.75 -0.39063,-1.29688 -0.375,-0.54687 -1.0625,-0.76562 -0.67187,-0.23438 -1.95312,-0.23438 h -3.07813 z m 24.34563,-6.28125 h -8.82812 v -1.51562 h 8.82812 z m 0,4.0625 h -8.82812 v -1.53125 h 8.82812 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path201"
-     d="m 230.44314,325.90685 -3.90625,-9.67187 h 2.6875 l 1.82812,4.9375 0.53125,1.64062 q 0.20313,-0.625 0.26563,-0.82812 0.125,-0.40625 0.26562,-0.8125 l 1.84375,-4.9375 h 2.625 l -3.84375,9.67187 z m 7.71947,-10.98437 v -2.375 h 2.5625 v 2.375 z m 0,10.98437 v -9.67187 h 2.5625 v 9.67187 z m 10.77705,-3.07812 2.54688,0.42187 q -0.48438,1.40625 -1.54688,2.14063 -1.0625,0.73437 -2.65625,0.73437 -2.51562,0 -3.73437,-1.65625 -0.95313,-1.3125 -0.95313,-3.32812 0,-2.40625 1.25,-3.76563 1.26563,-1.35937 3.1875,-1.35937 2.15625,0 3.40625,1.42187 1.25,1.42188 1.1875,4.375 h -6.40625 q 0.0312,1.14063 0.60938,1.78125 0.59375,0.625 1.48437,0.625 0.59375,0 1,-0.32812 0.42188,-0.32813 0.625,-1.0625 z m 0.15625,-2.59375 q -0.0312,-1.10938 -0.57812,-1.6875 -0.54688,-0.57813 -1.32813,-0.57813 -0.84375,0 -1.39062,0.60938 -0.54688,0.60937 -0.53125,1.65625 z m 6.42261,5.67187 -3.0625,-9.67187 h 2.48437 l 1.8125,6.34375 1.67188,-6.34375 h 2.46875 l 1.60937,6.34375 1.85938,-6.34375 h 2.51562 l -3.10937,9.67187 h -2.45313 l -1.67187,-6.21875 -1.64063,6.21875 z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path203"
-     d="m 273.30699,325.90685 v -9.67187 h 1.46875 v 1.35937 q 0.45312,-0.71875 1.20312,-1.14062 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45312 0.6875,0.4375 0.96875,1.23438 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79687 0.78125,0.79688 0.78125,2.45313 v 6.64062 h -1.64062 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70312 -0.42187,-0.26563 -0.98437,-0.26563 -1.01563,0 -1.6875,0.6875 -0.67188,0.67188 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85938,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 22.1658,-3.10937 1.6875,0.20312 q -0.40625,1.48438 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29687 -1.23437,-1.3125 -1.23437,-3.67188 0,-2.45312 1.25,-3.79687 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32812 1.23438,1.3125 1.23438,3.70313 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48437 1.01562,-1.51562 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82813 -0.78125,-0.95312 -2.03125,-0.95312 -1.125,0 -1.90625,0.76562 -0.76563,0.75 -0.84375,2.01563 z m 9.14132,5.76562 v -9.67187 h 1.46875 v 1.35937 q 0.45313,-0.71875 1.20313,-1.14062 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45312 0.6875,0.4375 0.96875,1.23438 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79687 0.78125,0.79688 0.78125,2.45313 v 6.64062 h -1.64063 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70312 -0.42188,-0.26563 -0.98438,-0.26563 -1.01562,0 -1.6875,0.6875 -0.67187,0.67188 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85937,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 15.52518,0 v -9.67187 h 1.46875 v 1.46875 q 0.5625,-1.03125 1.03125,-1.35938 0.48438,-0.32812 1.0625,-0.32812 0.82813,0 1.6875,0.53125 l -0.5625,1.51562 q -0.60937,-0.35937 -1.20312,-0.35937 -0.54688,0 -0.96875,0.32812 -0.42188,0.32813 -0.60938,0.89063 -0.28125,0.875 -0.28125,1.92187 v 5.0625 z m 12.8533,-3.10937 1.6875,0.20312 q -0.40625,1.48438 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29687 -1.23437,-1.3125 -1.23437,-3.67188 0,-2.45312 1.25,-3.79687 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32812 1.23438,1.3125 1.23438,3.70313 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48437 1.01562,-1.51562 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82813 -0.78125,-0.95312 -2.03125,-0.95312 -1.125,0 -1.90625,0.76562 -0.76563,0.75 -0.84375,2.01563 z m 9.53198,5.76562 v -8.40625 h -1.45313 v -1.26562 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.45313 0.23438,-0.64062 0.82813,-1.03125 0.59375,-0.39062 1.67187,-0.39062 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95312,-0.0937 -0.75,0 -1.0625,0.32812 -0.3125,0.3125 -0.3125,1.1875 v 0.89063 h 1.89062 v 1.26562 h -1.89062 v 8.40625 z m 4.57391,-5.84375 v -1.53125 l 8.84375,-3.73437 v 1.64062 l -7.01562,2.875 7.01562,2.90625 v 1.625 z m 19.26996,4.26563 v 1.57812 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17188,-1.78125 2.9375,-2.82812 0.76563,-1.04688 0.76563,-1.96875 0,-0.98438 -0.70313,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70312,0.70312 -0.70312,1.95312 l -1.6875,-0.17187 q 0.17187,-1.89063 1.29687,-2.875 1.14063,-0.98438 3.03125,-0.98438 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32813,1.57812 -0.32812,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 1.12573,1.57812 3.53125,-5.03125 -3.26562,-4.64062 h 2.04687 l 1.48438,2.26562 q 0.42187,0.64063 0.67187,1.07813 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54687 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 18.57813,-1.57812 v 1.57812 h -8.82813 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07813,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17187,-1.78125 2.9375,-2.82812 0.76562,-1.04688 0.76562,-1.96875 0,-0.98438 -0.70312,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70313,0.70312 -0.70313,1.95312 l -1.6875,-0.17187 q 0.17188,-1.89063 1.29688,-2.875 1.14062,-0.98438 3.03125,-0.98438 1.92187,0 3.04687,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32812,1.57812 -0.32813,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89063,1.6875 -0.42187,0.4375 -0.6875,0.875 z m 1.1257,1.57812 3.53125,-5.03125 -3.26562,-4.64062 h 2.04687 l 1.48438,2.26562 q 0.42187,0.64063 0.67187,1.07813 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54687 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 10.8125,0 v -8.40625 h -1.45312 v -1.26562 h 1.45312 v -1.03125 q 0,-0.96875 0.17188,-1.45313 0.23437,-0.64062 0.82812,-1.03125 0.59375,-0.39062 1.67188,-0.39062 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95313,-0.0937 -0.75,0 -1.0625,0.32812 -0.3125,0.3125 -0.3125,1.1875 v 0.89063 h 1.89063 v 1.26562 h -1.89063 v 8.40625 z m 4.33957,-3.53125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.01563 0.6875,0.60937 1.65625,0.60937 1.15625,0 1.95312,-0.79687 0.79688,-0.79688 0.79688,-1.98438 0,-1.125 -0.73438,-1.85937 -0.73437,-0.73438 -1.875,-0.73438 -0.46875,0 -1.15625,0.17188 l 0.1875,-1.4375 q 0.15625,0.0156 0.26563,0.0156 1.04687,0 1.875,-0.54687 0.84375,-0.54688 0.84375,-1.67188 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.60938 -0.64063,0.59375 -0.8125,1.79687 l -1.64063,-0.29687 q 0.29688,-1.64063 1.35938,-2.54688 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85938 -0.46875,1.57813 -0.46875,0.70312 -1.375,1.125 1.1875,0.28125 1.84375,1.14062 0.65625,0.85938 0.65625,2.15625 0,1.73438 -1.28125,2.95313 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.04688 -1.15625,-1.04687 -1.32812,-2.71875 z m 18.98508,1.95313 v 1.57812 h -8.82813 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07813,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17187,-1.78125 2.9375,-2.82812 0.76562,-1.04688 0.76562,-1.96875 0,-0.98438 -0.70312,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70313,0.70312 -0.70313,1.95312 l -1.6875,-0.17187 q 0.17188,-1.89063 1.29688,-2.875 1.14062,-0.98438 3.03125,-0.98438 1.92187,0 3.04687,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32812,1.57812 -0.32813,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89063,1.6875 -0.42187,0.4375 -0.6875,0.875 z m 2.64135,1.57812 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 9.64786,0.23438 0.79688,-3.89063 h -1.54688 v -1.35937 h 1.8125 l 0.67188,-3.29688 h -2.48438 v -1.35937 h 2.76563 l 0.79687,-3.90625 h 1.35938 l -0.79688,3.90625 h 2.875 l 0.79688,-3.90625 h 1.375 l -0.79688,3.90625 h 1.57813 v 1.35937 h -1.84375 l -0.6875,3.29688 h 2.53125 v 1.35937 h -2.8125 l -0.78125,3.89063 h -1.375 l 0.78125,-3.89063 h -2.85938 l -0.78125,3.89063 z m 2.4375,-5.25 H 428.14 l 0.6875,-3.29688 h -2.875 z m 8.1882,5.01562 v -13.35937 h 1.64063 v 13.35937 z m 4.19172,0 v -9.67187 h 1.46875 v 1.35937 q 0.45312,-0.71875 1.20312,-1.14062 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45312 0.6875,0.4375 0.96875,1.23438 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79687 0.78125,0.79688 0.78125,2.45313 v 6.64062 h -1.64062 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70312 -0.42187,-0.26563 -0.98437,-0.26563 -1.01563,0 -1.6875,0.6875 -0.67188,0.67188 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85938,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 21.8533,-1.1875 q -0.92188,0.76563 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98437 0,-0.71875 0.32812,-1.29688 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35937 1.1875,-0.54687 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23438 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42188 0,-1 -0.46875,-1.42187 -0.625,-0.54688 -1.875,-0.54688 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42188 l -1.60937,-0.21875 q 0.21875,-1.01563 0.71875,-1.64063 0.5,-0.64062 1.45312,-0.98437 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29687 0.78125,0.28125 1.14062,0.73438 0.375,0.4375 0.51563,1.10937 0.0781,0.42188 0.0781,1.51563 v 2.1875 q 0,2.28125 0.10938,2.89062 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51562 -0.32812,-1.1875 z m -0.14063,-3.67187 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14062 -1.4375,0.32812 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42187 1.09375,-1.14062 0.26562,-0.5625 0.26562,-1.64063 z m 4.20383,8.5625 v -13.375 h 1.48437 v 1.25 q 0.53125,-0.73438 1.1875,-1.09375 0.67188,-0.375 1.625,-0.375 1.23438,0 2.17188,0.64062 0.95312,0.625 1.4375,1.79688 0.48437,1.15625 0.48437,2.54687 0,1.48438 -0.53125,2.67188 -0.53125,1.1875 -1.54687,1.82812 -1.01563,0.625 -2.14063,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70313 z m 1.48437,-8.48438 q 0,1.85938 0.75,2.76563 0.76563,0.89062 1.82813,0.89062 1.09375,0 1.875,-0.92187 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76563,-2.76563 -0.75,-0.92187 -1.8125,-0.92187 -1.04687,0 -1.85937,0.98437 -0.79688,0.96875 -0.79688,2.84375 z m 7.62574,4.78125 5.125,-13.35937 h 1.90625 l 5.46875,13.35937 h -2.01563 l -1.54687,-4.04687 h -5.59375 l -1.46875,4.04687 z m 3.85937,-5.48437 h 4.53125 l -1.40625,-3.70313 q -0.625,-1.6875 -0.9375,-2.76562 -0.26562,1.28125 -0.71875,2.54687 z m 10.2717,5.48437 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 9.64786,0.23438 0.79687,-3.89063 h -1.54687 v -1.35937 h 1.8125 l 0.67187,-3.29688 h -2.48437 v -1.35937 h 2.76562 l 0.79688,-3.90625 h 1.35934 l -0.79684,3.90625 h 2.87497 l 0.79687,-3.90625 h 1.375 l -0.79687,3.90625 h 1.57812 v 1.35937 h -1.84375 l -0.6875,3.29688 h 2.53125 v 1.35937 h -2.8125 l -0.78125,3.89063 h -1.375 l 0.78125,-3.89063 h -2.85934 l -0.78125,3.89063 z m 2.4375,-5.25 h 2.85934 l 0.6875,-3.29688 h -2.87497 z m 8.23508,-6.45313 v -1.89062 h 1.64062 v 1.89062 z m 0,11.46875 v -9.67187 h 1.64062 v 9.67187 z m 4.14483,0 v -9.67187 h 1.46875 v 1.35937 q 0.45313,-0.71875 1.20313,-1.14062 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45312 0.6875,0.4375 0.96875,1.23438 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79687 0.78125,0.79688 0.78125,2.45313 v 6.64062 h -1.64063 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70312 -0.42188,-0.26563 -0.98438,-0.26563 -1.01562,0 -1.6875,0.6875 -0.67187,0.67188 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85937,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 21.85327,-1.1875 q -0.92187,0.76563 -1.76562,1.09375 -0.82813,0.3125 -1.79688,0.3125 -1.59375,0 -2.45312,-0.78125 -0.85938,-0.78125 -0.85938,-1.98437 0,-0.71875 0.32813,-1.29688 0.32812,-0.59375 0.84375,-0.9375 0.53125,-0.35937 1.1875,-0.54687 0.46875,-0.125 1.45312,-0.25 1.98438,-0.23438 2.92188,-0.5625 0.0156,-0.34375 0.0156,-0.42188 0,-1 -0.46875,-1.42187 -0.625,-0.54688 -1.875,-0.54688 -1.15625,0 -1.70312,0.40625 -0.54688,0.40625 -0.8125,1.42188 l -1.60938,-0.21875 q 0.21875,-1.01563 0.71875,-1.64063 0.5,-0.64062 1.45313,-0.98437 0.95312,-0.34375 2.1875,-0.34375 1.25,0 2.01562,0.29687 0.78125,0.28125 1.14063,0.73438 0.375,0.4375 0.51562,1.10937 0.0781,0.42188 0.0781,1.51563 v 2.1875 q 0,2.28125 0.10937,2.89062 0.10938,0.59375 0.40625,1.15625 h -1.70307 q -0.26563,-0.51562 -0.32813,-1.1875 z m -0.14062,-3.67187 q -0.89063,0.375 -2.67188,0.625 -1.01562,0.14062 -1.4375,0.32812 -0.42187,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45313,0.4375 0.9375,0 1.67187,-0.40625 0.75,-0.42187 1.09375,-1.14062 0.26563,-0.5625 0.26563,-1.64063 z m 4.20385,8.5625 v -13.375 h 1.48438 v 1.25 q 0.53125,-0.73438 1.1875,-1.09375 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.64062 0.95313,0.625 1.4375,1.79688 0.48438,1.15625 0.48438,2.54687 0,1.48438 -0.53125,2.67188 -0.53125,1.1875 -1.54688,1.82812 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.70313 z m 1.48438,-8.48438 q 0,1.85938 0.75,2.76563 0.76562,0.89062 1.82812,0.89062 1.09375,0 1.875,-0.92187 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.76563 -0.75,-0.92187 -1.8125,-0.92187 -1.04688,0 -1.85938,0.98437 -0.79687,0.96875 -0.79687,2.84375 z m 9.01636,4.78125 v -13.35937 h 5.01562 q 1.53125,0 2.45313,0.40625 0.92187,0.40625 1.4375,1.25 0.53125,0.84375 0.53125,1.76562 0,0.85938 -0.46875,1.625 -0.45313,0.75 -1.39063,1.20313 1.20313,0.35937 1.85938,1.21875 0.65625,0.85937 0.65625,2.01562 0,0.9375 -0.40625,1.75 -0.39063,0.79688 -0.98438,1.23438 -0.57812,0.4375 -1.45312,0.67187 -0.875,0.21875 -2.15625,0.21875 z m 1.78125,-7.75 h 2.875 q 1.1875,0 1.6875,-0.14062 0.67187,-0.20313 1.01562,-0.67188 0.34375,-0.46875 0.34375,-1.17187 0,-0.65625 -0.32812,-1.15625 -0.3125,-0.51563 -0.90625,-0.70313 -0.59375,-0.1875 -2.03125,-0.1875 h -2.65625 z m 0,6.17188 h 3.3125 q 0.85937,0 1.20312,-0.0625 0.60938,-0.10938 1.01563,-0.35938 0.42187,-0.26562 0.6875,-0.75 0.26562,-0.48437 0.26562,-1.125 0,-0.75 -0.39062,-1.29687 -0.375,-0.54688 -1.0625,-0.76563 -0.67188,-0.23437 -1.95313,-0.23437 h -3.07812 z m 18.6936,0 v 1.57812 h -8.82812 q -0.0156,-0.59375 0.1875,-1.14062 0.34375,-0.90625 1.07812,-1.78125 0.75,-0.875 2.15625,-2.01563 2.17188,-1.78125 2.9375,-2.82812 0.76563,-1.04688 0.76563,-1.96875 0,-0.98438 -0.70313,-1.64063 -0.6875,-0.67187 -1.8125,-0.67187 -1.1875,0 -1.90625,0.71875 -0.70312,0.70312 -0.70312,1.95312 l -1.6875,-0.17187 q 0.17187,-1.89063 1.29687,-2.875 1.14063,-0.98438 3.03125,-0.98438 1.92188,0 3.04688,1.0625 1.125,1.0625 1.125,2.64063 0,0.79687 -0.32813,1.57812 -0.32812,0.78125 -1.09375,1.64063 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23437 -1.89062,1.6875 -0.42188,0.4375 -0.6875,0.875 z m 0.9538,1.57812 5.125,-13.35937 h 1.90625 l 5.46875,13.35937 h -2.01563 l -1.54687,-4.04687 h -5.59375 l -1.46875,4.04687 z m 3.85937,-5.48437 H 577.52 l -1.40625,-3.70313 q -0.625,-1.6875 -0.9375,-2.76562 -0.26562,1.28125 -0.71875,2.54687 z m 10.27173,5.48437 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64063 -1.15625,0.98438 l -0.45312,-0.70313 q 0.51562,-0.21875 0.76562,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 12.6322,0 -3.6875,-9.67187 h 1.73438 l 2.07812,5.79687 q 0.32813,0.9375 0.625,1.9375 0.20313,-0.76562 0.60938,-1.82812 l 2.14062,-5.90625 h 1.6875 l -3.65625,9.67187 z m 6.64063,0 v -9.67187 h 1.46875 v 1.35937 q 0.45312,-0.71875 1.20312,-1.14062 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45312 0.6875,0.4375 0.96875,1.23438 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79687 0.78125,0.79688 0.78125,2.45313 v 6.64062 h -1.64062 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70312 -0.42187,-0.26563 -0.98437,-0.26563 -1.01563,0 -1.6875,0.6875 -0.67188,0.67188 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85938,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 22.16583,-3.10937 1.6875,0.20312 q -0.40625,1.48438 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29687 -1.23437,-1.3125 -1.23437,-3.67188 0,-2.45312 1.25,-3.79687 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32812 1.23438,1.3125 1.23438,3.70313 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45312 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48437 1.01562,-1.51562 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82813 -0.78125,-0.95312 -2.03125,-0.95312 -1.125,0 -1.90625,0.76562 -0.76563,0.75 -0.84375,2.01563 z m 9.14129,5.76562 v -9.67187 h 1.46875 v 1.35937 q 0.45313,-0.71875 1.20313,-1.14062 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45312 0.6875,0.4375 0.96875,1.23438 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79687 0.78125,0.79688 0.78125,2.45313 v 6.64062 h -1.64063 v -6.09375 q 0,-0.98437 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70312 -0.42188,-0.26563 -0.98438,-0.26563 -1.01562,0 -1.6875,0.6875 -0.67187,0.67188 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64062 -0.40625,-0.54688 -1.3125,-0.54688 -0.6875,0 -1.28125,0.35938 -0.59375,0.35937 -0.85937,1.0625 -0.25,0.70312 -0.25,2.03125 v 5.01562 z m 24.16583,-5.84375 -8.84375,3.78125 v -1.625 l 7.01563,-2.90625 -7.01563,-2.875 v -1.64062 l 8.84375,3.73437 z m 10.57825,9.76563 q -1.35937,-1.70313 -2.29687,-4 -0.9375,-2.29688 -0.9375,-4.76563 0,-2.15625 0.70312,-4.14062 0.82813,-2.3125 2.53125,-4.59375 h 1.17188 q -1.09375,1.89062 -1.45313,2.70312 -0.54687,1.25 -0.875,2.625 -0.39062,1.70313 -0.39062,3.42188 0,4.375 2.71875,8.75 z m 4.16583,0 h -1.1875 q 2.73438,-4.375 2.73438,-8.75 0,-1.71875 -0.39063,-3.39063 -0.3125,-1.375 -0.875,-2.625 -0.35937,-0.82812 -1.46875,-2.73437 h 1.1875 q 1.70313,2.28125 2.53125,4.59375 0.6875,1.98437 0.6875,4.14062 0,2.46875 -0.9375,4.76563 -0.9375,2.29687 -2.28125,4 z m 10.34906,-0.21875 v -17.0625 h 3.60938 v 1.35937 h -1.96875 v 14.34375 h 1.96875 v 1.35938 z m 4.99579,-13.84375 q 0,-1.4375 0.71875,-2.4375 0.71875,-1 2.09375,-1 1.25,0 2.07813,0.90625 0.82812,0.89062 0.82812,2.625 0,1.6875 -0.84375,2.60937 -0.82812,0.92188 -2.04687,0.92188 -1.20313,0 -2.01563,-0.90625 -0.8125,-0.90625 -0.8125,-2.71875 z m 2.85938,-2.3125 q -0.60938,0 -1.01563,0.53125 -0.40625,0.53125 -0.40625,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51562 1.01563,0.51562 0.625,0 1.01562,-0.51562 0.40625,-0.53125 0.40625,-1.9375 0,-1.29688 -0.40625,-1.8125 -0.40625,-0.53125 -1.01562,-0.53125 z m 0,12.9375 7.3125,-14.0625 h 1.32812 l -7.28125,14.0625 z m 5.78125,-3.625 q 0,-1.4375 0.71875,-2.42188 0.71875,-1 2.09375,-1 1.26562,0 2.07812,0.90625 0.82813,0.89063 0.82813,2.625 0,1.6875 -0.82813,2.60938 -0.82812,0.90625 -2.0625,0.90625 -1.21875,0 -2.03125,-0.90625 -0.79687,-0.90625 -0.79687,-2.71875 z m 2.85937,-2.29688 q -0.625,0 -1.03125,0.53125 -0.39062,0.53125 -0.39062,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51563 1.01562,0.51563 0.625,0 1.03125,-0.51563 0.40625,-0.53125 0.40625,-1.9375 0,-1.29687 -0.42187,-1.8125 -0.40625,-0.53125 -1.01563,-0.53125 z m 10.96338,5.4375 h -1.64062 v -10.45312 q -0.59375,0.5625 -1.5625,1.14062 -0.95313,0.5625 -1.71875,0.84375 v -1.59375 q 1.375,-0.64062 2.40625,-1.5625 1.03125,-0.92187 1.45312,-1.78125 h 1.0625 z m 5.07886,0 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35938,0.64063 -1.15625,0.98438 l -0.45313,-0.70313 q 0.51563,-0.21875 0.76563,-0.67187 0.25,-0.4375 0.28125,-1.26563 z m 9.78845,-10.14062 q 0,-1.4375 0.71875,-2.4375 0.71875,-1 2.09375,-1 1.25,0 2.07813,0.90625 0.82812,0.89062 0.82812,2.625 0,1.6875 -0.84375,2.60937 -0.82812,0.92188 -2.04687,0.92188 -1.20313,0 -2.01563,-0.90625 -0.8125,-0.90625 -0.8125,-2.71875 z m 2.85938,-2.3125 q -0.60938,0 -1.01563,0.53125 -0.40625,0.53125 -0.40625,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51562 1.01563,0.51562 0.625,0 1.01562,-0.51562 0.40625,-0.53125 0.40625,-1.9375 0,-1.29688 -0.40625,-1.8125 -0.40625,-0.53125 -1.01562,-0.53125 z m 0,12.9375 7.3125,-14.0625 h 1.32812 l -7.28125,14.0625 z m 5.78125,-3.625 q 0,-1.4375 0.71875,-2.42188 0.71875,-1 2.09375,-1 1.26562,0 2.07812,0.90625 0.82813,0.89063 0.82813,2.625 0,1.6875 -0.82813,2.60938 -0.82812,0.90625 -2.0625,0.90625 -1.21875,0 -2.03125,-0.90625 -0.79687,-0.90625 -0.79687,-2.71875 z m 2.85937,-2.29688 q -0.625,0 -1.03125,0.53125 -0.39062,0.53125 -0.39062,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51563 1.01562,0.51563 0.625,0 1.03125,-0.51563 0.40625,-0.53125 0.40625,-1.9375 0,-1.29687 -0.42187,-1.8125 -0.40625,-0.53125 -1.01563,-0.53125 z m 4.90088,-6.17187 v -1.57813 h 8.64063 v 1.28125 q -1.28125,1.35938 -2.53125,3.60938 -1.25,2.25 -1.9375,4.625 -0.48438,1.67187 -0.625,3.67187 h -1.6875 q 0.0312,-1.57812 0.625,-3.8125 0.59375,-2.23437 1.6875,-4.29687 1.10937,-2.07813 2.35937,-3.5 z m 13.45386,15.3125 h -3.60938 v -1.35938 h 1.96875 v -14.34375 h -1.96875 v -1.35937 H 749.89 Z" />
-  <path
-     style="fill:#000000;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path205"
-     d="m 203.14425,337.76625 q 0,-1.4375 0.71875,-2.4375 0.71875,-1 2.09375,-1 1.25,0 2.07812,0.90625 0.82813,0.89063 0.82813,2.625 0,1.6875 -0.84375,2.60938 -0.82813,0.92187 -2.04688,0.92187 -1.20312,0 -2.01562,-0.90625 -0.8125,-0.90625 -0.8125,-2.71875 z m 2.85937,-2.3125 q -0.60937,0 -1.01562,0.53125 -0.40625,0.53125 -0.40625,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51563 1.01562,0.51563 0.625,0 1.01563,-0.51563 0.40625,-0.53125 0.40625,-1.9375 0,-1.29687 -0.40625,-1.8125 -0.40625,-0.53125 -1.01563,-0.53125 z m 0,12.9375 7.3125,-14.0625 h 1.32813 l -7.28125,14.0625 z m 5.78125,-3.625 q 0,-1.4375 0.71875,-2.42187 0.71875,-1 2.09375,-1 1.26563,0 2.07813,0.90625 0.82812,0.89062 0.82812,2.625 0,1.6875 -0.82812,2.60937 -0.82813,0.90625 -2.0625,0.90625 -1.21875,0 -2.03125,-0.90625 -0.79688,-0.90625 -0.79688,-2.71875 z m 2.85938,-2.29687 q -0.625,0 -1.03125,0.53125 -0.39063,0.53125 -0.39063,1.9375 0,1.28125 0.40625,1.8125 0.40625,0.51562 1.01563,0.51562 0.625,0 1.03125,-0.51562 0.40625,-0.53125 0.40625,-1.9375 0,-1.29688 -0.42188,-1.8125 -0.40625,-0.53125 -1.01562,-0.53125 z m 3.97902,5.4375 5.125,-13.35938 h 1.90625 l 5.46875,13.35938 h -2.01563 L 227.56077,343.86 h -5.59375 l -1.46875,4.04688 z m 3.85937,-5.48438 h 4.53125 l -1.40625,-3.70312 q -0.625,-1.6875 -0.9375,-2.76563 -0.26562,1.28125 -0.71875,2.54688 z m 15.48626,-2.32812 V 338.235 h 1.85937 v 1.85938 z m 0,7.8125 v -1.875 h 1.85937 v 1.875 z m 9.91348,0 V 338.235 h 1.46875 v 1.35938 q 0.45312,-0.71875 1.20312,-1.14063 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64062 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70313 -0.42187,-0.26562 -0.98437,-0.26562 -1.01563,0 -1.6875,0.6875 -0.67188,0.67187 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85938,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 22.1658,-3.10938 1.6875,0.20313 q -0.40625,1.48437 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29688 -1.23437,-1.3125 -1.23437,-3.67187 0,-2.45313 1.25,-3.79688 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32813 1.23438,1.3125 1.23438,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48438 1.01562,-1.51563 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76563,0.75 -0.84375,2.01562 z m 9.14135,5.76563 V 338.235 h 1.46875 v 1.35938 q 0.45313,-0.71875 1.20313,-1.14063 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64063 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70313 -0.42188,-0.26562 -0.98438,-0.26562 -1.01562,0 -1.6875,0.6875 -0.67187,0.67187 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85937,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 15.52518,0 V 338.235 h 1.46875 v 1.46875 q 0.5625,-1.03125 1.03125,-1.35937 0.48438,-0.32813 1.0625,-0.32813 0.82813,0 1.6875,0.53125 l -0.5625,1.51563 q -0.60937,-0.35938 -1.20312,-0.35938 -0.54688,0 -0.96875,0.32813 -0.42188,0.32812 -0.60938,0.89062 -0.28125,0.875 -0.28125,1.92188 v 5.0625 z m 12.8533,-3.10938 1.6875,0.20313 q -0.40625,1.48437 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.29688 -1.23437,-1.3125 -1.23437,-3.67187 0,-2.45313 1.25,-3.79688 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.32813 1.23438,1.3125 1.23438,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.48438 1.01562,-1.51563 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76563,0.75 -0.84375,2.01562 z m 9.53195,5.76563 v -8.40625 h -1.45313 V 338.235 h 1.45313 v -1.03125 q 0,-0.96875 0.17187,-1.45312 0.23438,-0.64063 0.82813,-1.03125 0.59375,-0.39063 1.67187,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95312,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89062 v 1.26563 h -1.89062 v 8.40625 z m 4.57394,-5.84375 v -1.53125 l 8.84375,-3.73438 v 1.64063 l -7.01562,2.875 7.01562,2.90625 v 1.625 z m 10.66059,2.3125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95312,-0.79688 0.79688,-0.79687 0.79688,-1.98437 0,-1.125 -0.73438,-1.85938 -0.73437,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26563,0.0156 1.04687,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.60937 -0.64063,0.59375 -0.8125,1.79688 l -1.64063,-0.29688 q 0.29688,-1.64062 1.35938,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.04687 -1.15625,-1.04688 -1.32812,-2.71875 z m 9.73507,3.53125 3.53125,-5.03125 -3.26562,-4.64063 h 2.04687 l 1.48438,2.26563 q 0.42187,0.64062 0.67187,1.07812 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54688 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 9.96875,-3.53125 1.64063,-0.21875 q 0.28125,1.40625 0.95312,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95313,-0.79688 0.79687,-0.79687 0.79687,-1.98437 0,-1.125 -0.73437,-1.85938 -0.73438,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26562,0.0156 1.04688,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60937,-1.5 -0.60938,-0.59375 -1.57813,-0.59375 -0.95312,0 -1.59375,0.60937 -0.64062,0.59375 -0.8125,1.79688 l -1.64062,-0.29688 q 0.29687,-1.64062 1.35937,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92188,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26562,1.21875 -3.21875,1.21875 -1.76562,0 -2.92187,-1.04687 -1.15625,-1.04688 -1.32813,-2.71875 z m 9.73511,3.53125 3.53125,-5.03125 -3.26562,-4.64063 h 2.04687 l 1.48438,2.26563 q 0.42187,0.64062 0.67187,1.07812 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.54688 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 10.8125,0 v -8.40625 h -1.45312 V 338.235 h 1.45312 v -1.03125 q 0,-0.96875 0.17188,-1.45312 0.23437,-0.64063 0.82812,-1.03125 0.59375,-0.39063 1.67188,-0.39063 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.0937 -0.95313,-0.0937 -0.75,0 -1.0625,0.32813 -0.3125,0.3125 -0.3125,1.1875 v 0.89062 h 1.89063 v 1.26563 h -1.89063 v 8.40625 z m 4.33954,-3.53125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.01562 0.6875,0.60938 1.65625,0.60938 1.15625,0 1.95312,-0.79688 0.79688,-0.79687 0.79688,-1.98437 0,-1.125 -0.73438,-1.85938 -0.73437,-0.73437 -1.875,-0.73437 -0.46875,0 -1.15625,0.17187 l 0.1875,-1.4375 q 0.15625,0.0156 0.26563,0.0156 1.04687,0 1.875,-0.54688 0.84375,-0.54687 0.84375,-1.67187 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.60937 -0.64063,0.59375 -0.8125,1.79688 l -1.64063,-0.29688 q 0.29688,-1.64062 1.35938,-2.54687 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.85937 -0.46875,1.57812 -0.46875,0.70313 -1.375,1.125 1.1875,0.28125 1.84375,1.14063 0.65625,0.85937 0.65625,2.15625 0,1.73437 -1.28125,2.95312 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.04687 -1.15625,-1.04688 -1.32812,-2.71875 z m 18.98511,1.95312 v 1.57813 h -8.82813 q -0.0156,-0.59375 0.1875,-1.14063 0.34375,-0.90625 1.07813,-1.78125 0.75,-0.875 2.15625,-2.01562 2.17187,-1.78125 2.9375,-2.82813 0.76562,-1.04687 0.76562,-1.96875 0,-0.98437 -0.70312,-1.64062 -0.6875,-0.67188 -1.8125,-0.67188 -1.1875,0 -1.90625,0.71875 -0.70313,0.70313 -0.70313,1.95313 L 376.6137,338.36 q 0.17188,-1.89062 1.29688,-2.875 1.14062,-0.98437 3.03125,-0.98437 1.92187,0 3.04687,1.0625 1.125,1.0625 1.125,2.64062 0,0.79688 -0.32812,1.57813 -0.32813,0.78125 -1.09375,1.64062 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.23438 -1.89063,1.6875 -0.42187,0.4375 -0.6875,0.875 z m 2.64132,1.57813 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35937,0.64062 -1.15625,0.98437 l -0.45312,-0.70312 q 0.51562,-0.21875 0.76562,-0.67188 0.25,-0.4375 0.28125,-1.26562 z m 9.64786,0.23437 0.79688,-3.89062 h -1.54688 v -1.35938 h 1.8125 l 0.67188,-3.29687 h -2.48438 V 338.235 h 2.76563 l 0.79687,-3.90625 h 1.35938 l -0.79688,3.90625 h 2.875 l 0.79688,-3.90625 h 1.375 l -0.79688,3.90625 h 1.57813 v 1.35938 h -1.84375 l -0.6875,3.29687 h 2.53125 v 1.35938 h -2.8125 l -0.78125,3.89062 h -1.375 l 0.78125,-3.89062 h -2.85938 l -0.78125,3.89062 z m 2.4375,-5.25 h 2.85938 l 0.6875,-3.29687 h -2.875 z m 8.18823,5.01563 V 334.5475 h 1.64063 v 13.35938 z m 4.19168,0 V 338.235 h 1.46875 v 1.35938 q 0.45313,-0.71875 1.20313,-1.14063 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64063 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70313 -0.42188,-0.26562 -0.98438,-0.26562 -1.01562,0 -1.6875,0.6875 -0.67187,0.67187 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85937,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 21.85334,-1.1875 q -0.92188,0.76562 -1.76563,1.09375 -0.82812,0.3125 -1.79687,0.3125 -1.59375,0 -2.45313,-0.78125 -0.85937,-0.78125 -0.85937,-1.98438 0,-0.71875 0.32812,-1.29687 0.32813,-0.59375 0.84375,-0.9375 0.53125,-0.35938 1.1875,-0.54688 0.46875,-0.125 1.45313,-0.25 1.98437,-0.23437 2.92187,-0.5625 0.0156,-0.34375 0.0156,-0.42187 0,-1 -0.46875,-1.42188 -0.625,-0.54687 -1.875,-0.54687 -1.15625,0 -1.70313,0.40625 -0.54687,0.40625 -0.8125,1.42187 l -1.60937,-0.21875 q 0.21875,-1.01562 0.71875,-1.64062 0.5,-0.64063 1.45312,-0.98438 0.95313,-0.34375 2.1875,-0.34375 1.25,0 2.01563,0.29688 0.78125,0.28125 1.14062,0.73437 0.375,0.4375 0.51563,1.10938 0.0781,0.42187 0.0781,1.51562 v 2.1875 q 0,2.28125 0.10938,2.89063 0.10937,0.59375 0.40625,1.15625 h -1.70313 q -0.26562,-0.51563 -0.32812,-1.1875 z m -0.14063,-3.67188 q -0.89062,0.375 -2.67187,0.625 -1.01563,0.14063 -1.4375,0.32813 -0.42188,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45312,0.4375 0.9375,0 1.67188,-0.40625 0.75,-0.42188 1.09375,-1.14063 0.26562,-0.5625 0.26562,-1.64062 z m 4.20383,8.5625 v -13.375 h 1.48437 v 1.25 q 0.53125,-0.73437 1.1875,-1.09375 0.67188,-0.375 1.625,-0.375 1.23438,0 2.17188,0.64063 0.95312,0.625 1.4375,1.79687 0.48437,1.15625 0.48437,2.54688 0,1.48437 -0.53125,2.67187 -0.53125,1.1875 -1.54687,1.82813 -1.01563,0.625 -2.14063,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 V 351.61 Z m 1.48437,-8.48437 q 0,1.85937 0.75,2.76562 0.76563,0.89063 1.82813,0.89063 1.09375,0 1.875,-0.92188 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76563,-2.76562 -0.75,-0.92188 -1.8125,-0.92188 -1.04687,0 -1.85937,0.98438 -0.79688,0.96875 -0.79688,2.84375 z m 7.62571,4.78125 5.125,-13.35938 h 1.90625 l 5.46875,13.35938 h -2.01563 L 456.20004,343.86 h -5.59375 l -1.46875,4.04688 z m 3.85937,-5.48438 h 4.53125 l -1.40625,-3.70312 q -0.625,-1.6875 -0.9375,-2.76563 -0.26562,1.28125 -0.71875,2.54688 z m 10.2717,5.48438 v -1.875 h 1.875 v 1.875 q 0,1.03125 -0.375,1.65625 -0.35938,0.64062 -1.15625,0.98437 l -0.45313,-0.70312 q 0.51563,-0.21875 0.76563,-0.67188 0.25,-0.4375 0.28125,-1.26562 z m 12.63223,0 -3.6875,-9.67188 h 1.73438 l 2.07812,5.79688 q 0.32813,0.9375 0.625,1.9375 0.20313,-0.76563 0.60938,-1.82813 l 2.14062,-5.90625 h 1.6875 l -3.65625,9.67188 z m 6.64063,0 V 338.235 h 1.46875 v 1.35938 q 0.45312,-0.71875 1.20312,-1.14063 0.76563,-0.4375 1.71875,-0.4375 1.07813,0 1.76563,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98437,-1.6875 1.45313,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64062 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57813,-0.70313 -0.42187,-0.26562 -0.98437,-0.26562 -1.01563,0 -1.6875,0.6875 -0.67188,0.67187 -0.67188,2.15625 v 5.625 h -1.64062 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85938,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 22.1658,-3.10938 1.6875,0.20313 q -0.40625,1.48437 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.29688 -1.23438,-1.3125 -1.23438,-3.67187 0,-2.45313 1.25,-3.79688 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.32813 1.23437,1.3125 1.23437,3.70312 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.45313 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.48438 1.01563,-1.51563 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.82812 -0.78125,-0.95313 -2.03125,-0.95313 -1.125,0 -1.90625,0.76563 -0.76562,0.75 -0.84375,2.01562 z m 9.1413,5.76563 V 338.235 h 1.46875 v 1.35938 q 0.45313,-0.71875 1.20313,-1.14063 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.45313 0.6875,0.4375 0.96875,1.23437 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.79688 0.78125,0.79687 0.78125,2.45312 v 6.64063 h -1.64063 v -6.09375 q 0,-0.98438 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.70313 -0.42188,-0.26562 -0.98438,-0.26562 -1.01562,0 -1.6875,0.6875 -0.67187,0.67187 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.64063 -0.40625,-0.54687 -1.3125,-0.54687 -0.6875,0 -1.28125,0.35937 -0.59375,0.35938 -0.85937,1.0625 -0.25,0.70313 -0.25,2.03125 v 5.01563 z m 24.16583,-5.84375 -8.84375,3.78125 v -1.625 l 7.01563,-2.90625 -7.01563,-2.875 v -1.64063 l 8.84375,3.73438 z" />
-  <path
-     style="fill:#000000;fill-opacity:0;fill-rule:evenodd"
-     inkscape:connector-curvature="0"
-     id="path207"
-     d="M 245.88976,0 H 586.86614 V 16.283463 H 245.88976 Z" />
-  <path
-     style="fill:#434343;fill-rule:nonzero"
-     inkscape:connector-curvature="0"
-     id="path209"
-     d="M 256.32726,25.20346 V 11.844085 h 5.04688 q 1.32812,0 2.03125,0.125 0.96875,0.171875 1.64062,0.640625 0.67188,0.453125 1.07813,1.28125 0.40625,0.828125 0.40625,1.828125 0,1.703125 -1.09375,2.890625 -1.07813,1.171875 -3.92188,1.171875 h -3.42187 v 5.421875 z m 1.76563,-7 h 3.45312 q 1.71875,0 2.4375,-0.640625 0.71875,-0.640625 0.71875,-1.796875 0,-0.84375 -0.42187,-1.4375 -0.42188,-0.59375 -1.125,-0.78125 -0.4375,-0.125 -1.64063,-0.125 h -3.42187 z m 10.47482,7 V 11.844085 h 1.64062 v 4.796875 q 1.14063,-1.328125 2.89063,-1.328125 1.07812,0 1.85937,0.421875 0.79688,0.421875 1.14063,1.171875 0.34375,0.75 0.34375,2.171875 v 6.125 h -1.64063 v -6.125 q 0,-1.234375 -0.53125,-1.796875 -0.53125,-0.5625 -1.51562,-0.5625 -0.71875,0 -1.35938,0.390625 -0.64062,0.375 -0.92187,1.015625 -0.26563,0.640625 -0.26563,1.78125 v 5.296875 z m 10.2976,3.71875 -0.1875,-1.53125 q 0.54688,0.140625 0.9375,0.140625 0.54688,0 0.875,-0.1875 0.32813,-0.171875 0.54688,-0.5 0.15625,-0.25 0.5,-1.21875 0.0469,-0.140625 0.14062,-0.40625 l -3.67187,-9.6875 h 1.76562 l 2.01563,5.59375 q 0.39062,1.078125 0.70312,2.25 0.28125,-1.125 0.67188,-2.203125 l 2.07812,-5.640625 h 1.64063 l -3.6875,9.828125 q -0.59375,1.609375 -0.92188,2.203125 -0.4375,0.8125 -1,1.1875 -0.5625,0.375 -1.34375,0.375 -0.48437,0 -1.0625,-0.203125 z m 8.75,-6.609375 1.625,-0.25 q 0.125,0.96875 0.75,1.5 0.625,0.515625 1.75,0.515625 1.125,0 1.67188,-0.453125 0.54687,-0.46875 0.54687,-1.09375 0,-0.546875 -0.48437,-0.875 -0.32813,-0.21875 -1.67188,-0.546875 -1.8125,-0.46875 -2.51562,-0.796875 -0.6875,-0.328125 -1.04688,-0.90625 -0.35937,-0.59375 -0.35937,-1.3125 0,-0.640625 0.29687,-1.1875 0.29688,-0.5625 0.8125,-0.921875 0.375,-0.28125 1.03125,-0.46875 0.67188,-0.203125 1.42188,-0.203125 1.14062,0 2,0.328125 0.85937,0.328125 1.26562,0.890625 0.42188,0.5625 0.57813,1.5 l -1.60938,0.21875 q -0.10937,-0.75 -0.64062,-1.171875 -0.51563,-0.421875 -1.46875,-0.421875 -1.14063,0 -1.625,0.375 -0.46875,0.375 -0.46875,0.875 0,0.3125 0.1875,0.578125 0.20312,0.265625 0.64062,0.4375 0.23438,0.09375 1.4375,0.421875 1.75,0.453125 2.4375,0.75 0.6875,0.296875 1.07813,0.859375 0.39062,0.5625 0.39062,1.40625 0,0.828125 -0.48437,1.546875 -0.46875,0.71875 -1.375,1.125 -0.90625,0.390625 -2.04688,0.390625 -1.875,0 -2.875,-0.78125 -0.98437,-0.78125 -1.25,-2.328125 z m 9.98438,-8.578125 v -1.890625 h 1.64062 v 1.890625 z m 0,11.46875 v -9.671875 h 1.64062 v 9.671875 z m 10.45731,-3.546875 1.60937,0.21875 q -0.26562,1.65625 -1.35937,2.609375 -1.07813,0.9375 -2.67188,0.9375 -1.98437,0 -3.1875,-1.296875 -1.20312,-1.296875 -1.20312,-3.71875 0,-1.578125 0.51562,-2.75 0.51563,-1.171875 1.57813,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57812,0 2.57812,0.796875 1,0.796875 1.28125,2.265625 l -1.59375,0.234375 q -0.23437,-0.96875 -0.8125,-1.453125 -0.57812,-0.5 -1.39062,-0.5 -1.23438,0 -2.01563,0.890625 -0.78125,0.890625 -0.78125,2.8125 0,1.953125 0.75,2.84375 0.75,0.875 1.95313,0.875 0.96875,0 1.60937,-0.59375 0.65625,-0.59375 0.82813,-1.828125 z m 9.32812,2.359375 q -0.92187,0.765625 -1.76562,1.09375 -0.82813,0.3125 -1.79688,0.3125 -1.59375,0 -2.45312,-0.78125 -0.85938,-0.78125 -0.85938,-1.984375 0,-0.71875 0.32813,-1.296875 0.32812,-0.59375 0.84375,-0.9375 0.53125,-0.359375 1.1875,-0.546875 0.46875,-0.125 1.45312,-0.25 1.98438,-0.234375 2.92188,-0.5625 0.0156,-0.34375 0.0156,-0.421875 0,-1 -0.46875,-1.421875 -0.625,-0.546875 -1.875,-0.546875 -1.15625,0 -1.70312,0.40625 -0.54688,0.40625 -0.8125,1.421875 l -1.60938,-0.21875 q 0.21875,-1.015625 0.71875,-1.640625 0.5,-0.640625 1.45313,-0.984375 0.95312,-0.34375 2.1875,-0.34375 1.25,0 2.01562,0.296875 0.78125,0.28125 1.14063,0.734375 0.375,0.4375 0.51562,1.109375 0.0781,0.421875 0.0781,1.515625 v 2.1875 q 0,2.28125 0.10937,2.890625 0.10938,0.59375 0.40625,1.15625 h -1.70312 q -0.26563,-0.515625 -0.32813,-1.1875 z m -0.14062,-3.671875 q -0.89063,0.375 -2.67188,0.625 -1.01562,0.140625 -1.4375,0.328125 -0.42187,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45313,0.4375 0.9375,0 1.67187,-0.40625 0.75,-0.421875 1.09375,-1.140625 0.26563,-0.5625 0.26563,-1.640625 z m 4.15698,4.859375 V 11.844085 h 1.64062 V 25.20346 Z m 9.53125,0 V 11.844085 h 2.65625 l 3.15625,9.453125 q 0.4375,1.328125 0.64062,1.984375 0.23438,-0.734375 0.70313,-2.140625 l 3.20312,-9.296875 h 2.375 V 25.20346 h -1.70312 V 14.031585 l -3.875,11.171875 h -1.59375 l -3.85938,-11.375 v 11.375 z m 22.00955,-3.109375 1.6875,0.203125 q -0.40625,1.484375 -1.48437,2.3125 -1.07813,0.8125 -2.76563,0.8125 -2.125,0 -3.375,-1.296875 -1.23437,-1.3125 -1.23437,-3.671875 0,-2.453125 1.25,-3.796875 1.26562,-1.34375 3.26562,-1.34375 1.9375,0 3.15625,1.328125 1.23438,1.3125 1.23438,3.703125 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.453125 0.8125,0.84375 2.01562,0.84375 0.90625,0 1.54688,-0.46875 0.64062,-0.484375 1.01562,-1.515625 z m -5.39062,-2.65625 h 5.40625 q -0.10938,-1.21875 -0.625,-1.828125 -0.78125,-0.953125 -2.03125,-0.953125 -1.125,0 -1.90625,0.765625 -0.76563,0.75 -0.84375,2.015625 z m 9.14132,5.765625 v -9.671875 h 1.46875 v 1.359375 q 0.45313,-0.71875 1.20313,-1.140625 0.76562,-0.4375 1.71875,-0.4375 1.07812,0 1.76562,0.453125 0.6875,0.4375 0.96875,1.234375 1.15625,-1.6875 2.98438,-1.6875 1.45312,0 2.21875,0.796875 0.78125,0.796875 0.78125,2.453125 v 6.640625 h -1.64063 v -6.09375 q 0,-0.984375 -0.15625,-1.40625 -0.15625,-0.4375 -0.57812,-0.703125 -0.42188,-0.265625 -0.98438,-0.265625 -1.01562,0 -1.6875,0.6875 -0.67187,0.671875 -0.67187,2.15625 v 5.625 h -1.64063 v -6.28125 q 0,-1.09375 -0.40625,-1.640625 -0.40625,-0.546875 -1.3125,-0.546875 -0.6875,0 -1.28125,0.359375 -0.59375,0.359375 -0.85937,1.0625 -0.25,0.703125 -0.25,2.03125 v 5.015625 z m 14.93143,-4.84375 q 0,-2.6875 1.48437,-3.96875 1.25,-1.078125 3.04688,-1.078125 2,0 3.26562,1.3125 1.26563,1.296875 1.26563,3.609375 0,1.859375 -0.5625,2.9375 -0.5625,1.0625 -1.64063,1.65625 -1.0625,0.59375 -2.32812,0.59375 -2.03125,0 -3.28125,-1.296875 -1.25,-1.3125 -1.25,-3.765625 z m 1.6875,0 q 0,1.859375 0.79687,2.796875 0.8125,0.921875 2.04688,0.921875 1.21875,0 2.03125,-0.921875 0.8125,-0.9375 0.8125,-2.84375 0,-1.796875 -0.8125,-2.71875 -0.8125,-0.921875 -2.03125,-0.921875 -1.23438,0 -2.04688,0.921875 -0.79687,0.90625 -0.79687,2.765625 z m 9.28198,4.84375 v -9.671875 h 1.46875 v 1.46875 q 0.5625,-1.03125 1.03125,-1.359375 0.48438,-0.328125 1.0625,-0.328125 0.82813,0 1.6875,0.53125 l -0.5625,1.515625 q -0.60937,-0.359375 -1.20312,-0.359375 -0.54688,0 -0.96875,0.328125 -0.42188,0.328125 -0.60938,0.890625 -0.28125,0.875 -0.28125,1.921875 v 5.0625 z m 6.15018,3.71875 -0.1875,-1.53125 q 0.54687,0.140625 0.9375,0.140625 0.54687,0 0.875,-0.1875 0.32812,-0.171875 0.54687,-0.5 0.15625,-0.25 0.5,-1.21875 0.0469,-0.140625 0.14063,-0.40625 l -3.67188,-9.6875 h 1.76563 l 2.01562,5.59375 q 0.39063,1.078125 0.70313,2.25 0.28125,-1.125 0.67187,-2.203125 l 2.07813,-5.640625 h 1.64062 l -3.6875,9.828125 q -0.59375,1.609375 -0.92187,2.203125 -0.4375,0.8125 -1,1.1875 -0.5625,0.375 -1.34375,0.375 -0.48438,0 -1.0625,-0.203125 z m 14.19891,-8.015625 1.65625,-0.140625 q 0.125,1 0.54688,1.640625 0.4375,0.640625 1.34375,1.046875 0.92187,0.390625 2.0625,0.390625 1,0 1.78125,-0.296875 0.78125,-0.296875 1.15625,-0.8125 0.375,-0.53125 0.375,-1.15625 0,-0.625 -0.375,-1.09375 -0.35938,-0.46875 -1.1875,-0.796875 -0.54688,-0.203125 -2.39063,-0.640625 -1.82812,-0.453125 -2.5625,-0.84375 -0.96875,-0.5 -1.4375,-1.234375 -0.46875,-0.75 -0.46875,-1.671875 0,-1 0.57813,-1.875 0.57812,-0.890625 1.67187,-1.34375 1.10938,-0.453125 2.45313,-0.453125 1.48437,0 2.60937,0.484375 1.14063,0.46875 1.75,1.40625 0.60938,0.921875 0.65625,2.09375 l -1.6875,0.125 q -0.14062,-1.265625 -0.9375,-1.90625 -0.78125,-0.65625 -2.3125,-0.65625 -1.60937,0 -2.34375,0.59375 -0.73437,0.59375 -0.73437,1.421875 0,0.71875 0.53125,1.171875 0.5,0.46875 2.65625,0.96875 2.15625,0.484375 2.95312,0.84375 1.17188,0.53125 1.71875,1.359375 0.5625,0.828125 0.5625,1.90625 0,1.0625 -0.60937,2.015625 -0.60938,0.9375 -1.75,1.46875 -1.14063,0.515625 -2.57813,0.515625 -1.8125,0 -3.04687,-0.53125 -1.21875,-0.53125 -1.92188,-1.59375 -0.6875,-1.0625 -0.71875,-2.40625 z m 12.8342,8 v -13.375 h 1.48438 v 1.25 q 0.53125,-0.734375 1.1875,-1.09375 0.67187,-0.375 1.625,-0.375 1.23437,0 2.17187,0.640625 0.95313,0.625 1.4375,1.796875 0.48438,1.15625 0.48438,2.546875 0,1.484375 -0.53125,2.671875 -0.53125,1.1875 -1.54688,1.828125 -1.01562,0.625 -2.14062,0.625 -0.8125,0 -1.46875,-0.34375 -0.65625,-0.34375 -1.0625,-0.875 v 4.703125 z m 1.48438,-8.484375 q 0,1.859375 0.75,2.765625 0.76562,0.890625 1.82812,0.890625 1.09375,0 1.875,-0.921875 0.78125,-0.9375 0.78125,-2.875 0,-1.84375 -0.76562,-2.765625 -0.75,-0.921875 -1.8125,-0.921875 -1.04688,0 -1.85938,0.984375 -0.79687,0.96875 -0.79687,2.84375 z m 15.20385,3.59375 q -0.92187,0.765625 -1.76562,1.09375 -0.82813,0.3125 -1.79688,0.3125 -1.59375,0 -2.45312,-0.78125 -0.85938,-0.78125 -0.85938,-1.984375 0,-0.71875 0.32813,-1.296875 0.32812,-0.59375 0.84375,-0.9375 0.53125,-0.359375 1.1875,-0.546875 0.46875,-0.125 1.45312,-0.25 1.98438,-0.234375 2.92188,-0.5625 0.0156,-0.34375 0.0156,-0.421875 0,-1 -0.46875,-1.421875 -0.625,-0.546875 -1.875,-0.546875 -1.15625,0 -1.70312,0.40625 -0.54688,0.40625 -0.8125,1.421875 l -1.60938,-0.21875 q 0.21875,-1.015625 0.71875,-1.640625 0.5,-0.640625 1.45313,-0.984375 0.95312,-0.34375 2.1875,-0.34375 1.25,0 2.01562,0.296875 0.78125,0.28125 1.14063,0.734375 0.375,0.4375 0.51562,1.109375 0.0781,0.421875 0.0781,1.515625 v 2.1875 q 0,2.28125 0.10937,2.890625 0.10938,0.59375 0.40625,1.15625 h -1.70312 q -0.26563,-0.515625 -0.32813,-1.1875 z m -0.14062,-3.671875 q -0.89063,0.375 -2.67188,0.625 -1.01562,0.140625 -1.4375,0.328125 -0.42187,0.1875 -0.65625,0.53125 -0.21875,0.34375 -0.21875,0.78125 0,0.65625 0.5,1.09375 0.5,0.4375 1.45313,0.4375 0.9375,0 1.67187,-0.40625 0.75,-0.421875 1.09375,-1.140625 0.26563,-0.5625 0.26563,-1.640625 z m 10.51632,1.3125 1.60938,0.21875 q -0.26563,1.65625 -1.35938,2.609375 -1.07812,0.9375 -2.67187,0.9375 -1.98438,0 -3.1875,-1.296875 -1.20313,-1.296875 -1.20313,-3.71875 0,-1.578125 0.51563,-2.75 0.51562,-1.171875 1.57812,-1.75 1.0625,-0.59375 2.3125,-0.59375 1.57813,0 2.57813,0.796875 1,0.796875 1.28125,2.265625 l -1.59375,0.234375 q -0.23438,-0.96875 -0.8125,-1.453125 -0.57813,-0.5 -1.39063,-0.5 -1.23437,0 -2.01562,0.890625 -0.78125,0.890625 -0.78125,2.8125 0,1.953125 0.75,2.84375 0.75,0.875 1.95312,0.875 0.96875,0 1.60938,-0.59375 0.65625,-0.59375 0.82812,-1.828125 z m 9.64063,0.4375 1.6875,0.203125 q -0.40625,1.484375 -1.48438,2.3125 -1.07812,0.8125 -2.76562,0.8125 -2.125,0 -3.375,-1.296875 -1.23438,-1.3125 -1.23438,-3.671875 0,-2.453125 1.25,-3.796875 1.26563,-1.34375 3.26563,-1.34375 1.9375,0 3.15625,1.328125 1.23437,1.3125 1.23437,3.703125 0,0.15625 0,0.4375 h -7.21875 q 0.0937,1.59375 0.90625,2.453125 0.8125,0.84375 2.01563,0.84375 0.90625,0 1.54687,-0.46875 0.64063,-0.484375 1.01563,-1.515625 z m -5.39063,-2.65625 h 5.40625 q -0.10937,-1.21875 -0.625,-1.828125 -0.78125,-0.953125 -2.03125,-0.953125 -1.125,0 -1.90625,0.765625 -0.76562,0.75 -0.84375,2.015625 z m 14.1059,-0.07813 v -1.53125 l 8.84375,-3.734375 v 1.640625 l -7.01562,2.875 7.01562,2.90625 v 1.625 z m 10.89496,2.75 1.57812,-0.140625 q 0.20313,1.109375 0.76563,1.609375 0.5625,0.5 1.45312,0.5 0.75,0 1.3125,-0.34375 0.57813,-0.34375 0.9375,-0.921875 0.375,-0.578125 0.60938,-1.5625 0.25,-0.984375 0.25,-2 0,-0.109375 0,-0.328125 -0.5,0.78125 -1.35938,1.265625 -0.84375,0.484375 -1.82812,0.484375 -1.67188,0 -2.8125,-1.203125 -1.14063,-1.203125 -1.14063,-3.171875 0,-2.03125 1.1875,-3.265625 1.20313,-1.234375 3,-1.234375 1.3125,0 2.39063,0.703125 1.07812,0.703125 1.64062,2 0.5625,1.296875 0.5625,3.75 0,2.5625 -0.5625,4.078125 -0.5625,1.515625 -1.65625,2.3125 -1.09375,0.796875 -2.57812,0.796875 -1.5625,0 -2.5625,-0.875 -0.98438,-0.875 -1.1875,-2.453125 z m 6.71875,-5.890625 q 0,-1.40625 -0.75,-2.234375 -0.75,-0.828125 -1.8125,-0.828125 -1.09375,0 -1.90625,0.890625 -0.8125,0.890625 -0.8125,2.3125 0,1.28125 0.76562,2.078125 0.78125,0.796875 1.90625,0.796875 1.14063,0 1.875,-0.796875 0.73438,-0.796875 0.73438,-2.21875 z m 2.78198,8.984375 3.53125,-5.03125 -3.26562,-4.640625 h 2.04687 l 1.48438,2.265625 q 0.42187,0.640625 0.67187,1.078125 0.40625,-0.59375 0.73438,-1.0625 l 1.64062,-2.28125 h 1.95313 l -3.34375,4.546875 3.59375,5.125 h -2.01563 l -1.98437,-3 -0.51563,-0.8125 -2.54687,3.8125 z m 10.8125,0 v -8.40625 h -1.45312 V 15.53158 h 1.45312 v -1.03125 q 0,-0.96875 0.17188,-1.453125 0.23437,-0.640625 0.82812,-1.03125 0.59375,-0.390625 1.67188,-0.390625 0.6875,0 1.53125,0.15625 l -0.25,1.4375 q -0.5,-0.09375 -0.95313,-0.09375 -0.75,0 -1.0625,0.328125 -0.3125,0.3125 -0.3125,1.1875 v 0.890625 h 1.89063 v 1.265625 h -1.89063 v 8.40625 z m 4.33954,-3.53125 1.64062,-0.21875 q 0.28125,1.40625 0.95313,2.015625 0.6875,0.609375 1.65625,0.609375 1.15625,0 1.95312,-0.796875 0.79688,-0.796875 0.79688,-1.984375 0,-1.125 -0.73438,-1.859375 -0.73437,-0.734375 -1.875,-0.734375 -0.46875,0 -1.15625,0.171875 l 0.1875,-1.4375 q 0.15625,0.01563 0.26563,0.01563 1.04687,0 1.875,-0.546875 0.84375,-0.546875 0.84375,-1.671875 0,-0.90625 -0.60938,-1.5 -0.60937,-0.59375 -1.57812,-0.59375 -0.95313,0 -1.59375,0.609375 -0.64063,0.59375 -0.8125,1.796875 l -1.64063,-0.296875 q 0.29688,-1.640625 1.35938,-2.546875 1.0625,-0.90625 2.65625,-0.90625 1.09375,0 2,0.46875 0.92187,0.46875 1.40625,1.28125 0.5,0.8125 0.5,1.71875 0,0.859375 -0.46875,1.578125 -0.46875,0.703125 -1.375,1.125 1.1875,0.28125 1.84375,1.140625 0.65625,0.859375 0.65625,2.15625 0,1.734375 -1.28125,2.953125 -1.26563,1.21875 -3.21875,1.21875 -1.76563,0 -2.92188,-1.046875 -1.15625,-1.046875 -1.32812,-2.71875 z m 18.98511,1.953125 v 1.578125 h -8.82813 q -0.0156,-0.59375 0.1875,-1.140625 0.34375,-0.90625 1.07813,-1.78125 0.75,-0.875 2.15625,-2.015625 2.17187,-1.78125 2.9375,-2.828125 0.76562,-1.046875 0.76562,-1.96875 0,-0.984375 -0.70312,-1.640625 -0.6875,-0.671875 -1.8125,-0.671875 -1.1875,0 -1.90625,0.71875 -0.70313,0.703125 -0.70313,1.953125 l -1.6875,-0.171875 q 0.17188,-1.890625 1.29688,-2.875 1.14062,-0.984375 3.03125,-0.984375 1.92187,0 3.04687,1.0625 1.125,1.0625 1.125,2.640625 0,0.796875 -0.32812,1.578125 -0.32813,0.78125 -1.09375,1.640625 -0.75,0.84375 -2.53125,2.34375 -1.46875,1.234375 -1.89063,1.6875 -0.42187,0.4375 -0.6875,0.875 z m 10.84448,-4.265625 -8.84375,3.78125 v -1.625 l 7.01562,-2.90625 -7.01562,-2.875 V 14.09408 l 8.84375,3.734375 z" />
-</svg>
diff --git a/third_party/mlir/g3doc/includes/style.css b/third_party/mlir/g3doc/includes/style.css
deleted file mode 100644
index d47c43fab59..00000000000
--- a/third_party/mlir/g3doc/includes/style.css
+++ /dev/null
@@ -1,11 +0,0 @@
-.mlir {
-  background-color: #eef;
-}
-
-.ebnf {
-  background-color: #ffe;
-}
-
-.td {
-  background-color: #eef;
-}
diff --git a/third_party/mlir/include/mlir-c/Core.h b/third_party/mlir/include/mlir-c/Core.h
deleted file mode 100644
index 857d42ecf7a..00000000000
--- a/third_party/mlir/include/mlir-c/Core.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*===-- mlir-c/Core.h - Core Library C Interface ------------------*- C -*-===*\
-|*                                                                            *|
-|* Copyright 2019 The MLIR Authors.                                           *|
-|*                                                                            *|
-|* Licensed under the Apache License, Version 2.0 (the "License");            *|
-|* you may not use this file except in compliance with the License.           *|
-|* You may obtain a copy of the License at                                    *|
-|*                                                                            *|
-|*   http://www.apache.org/licenses/LICENSE-2.0                               *|
-|*                                                                            *|
-|* Unless required by applicable law or agreed to in writing, software        *|
-|* distributed under the License is distributed on an "AS IS" BASIS,          *|
-|* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *|
-|* See the License for the specific language governing permissions and        *|
-|* limitations under the License.                                             *|
-|*                                                                            *|
-|*===----------------------------------------------------------------------===*|
-|*                                                                            *|
-|* This header declares the C interface to MLIR.                              *|
-|*                                                                            *|
-\*===----------------------------------------------------------------------===*/
-#ifndef MLIR_C_CORE_H
-#define MLIR_C_CORE_H
-
-#ifdef __cplusplus
-#include <cstdint>
-extern "C" {
-#else
-#include <stdint.h>
-#endif
-
-/// Opaque MLIR types.
-/// Opaque C type for mlir::MLIRContext*.
-typedef void *mlir_context_t;
-/// Opaque C type for mlir::Type.
-typedef const void *mlir_type_t;
-/// Opaque C type for mlir::FuncOp.
-typedef void *mlir_func_t;
-/// Opaque C type for mlir::Attribute.
-typedef const void *mlir_attr_t;
-
-/// Simple C lists for non-owning mlir Opaque C types.
-/// Recommended usage is construction from the `data()` and `size()` of a scoped
-/// owning SmallVectorImpl<...> and passing to one of the C functions declared
-/// later in this file.
-/// Once the function returns and the proper EDSC has been constructed,
-/// resources are freed by exiting the scope.
-typedef struct {
-  int64_t *values;
-  uint64_t n;
-} int64_list_t;
-
-typedef struct {
-  mlir_type_t *types;
-  uint64_t n;
-} mlir_type_list_t;
-
-typedef struct {
-  const char *name;
-  mlir_attr_t value;
-} mlir_named_attr_t;
-
-typedef struct {
-  mlir_named_attr_t *list;
-  uint64_t n;
-} mlir_named_attr_list_t;
-
-/// Minimal C API for exposing EDSCs to Swift, Python and other languages.
-
-/// Returns an `mlir::MemRefType` of the element type `elemType` and shape
-/// `sizes`.
-mlir_type_t makeMemRefType(mlir_context_t context, mlir_type_t elemType,
-                           int64_list_t sizes);
-
-/// Returns an `mlir::FunctionType` of the element type `elemType` and shape
-/// `sizes`.
-mlir_type_t makeFunctionType(mlir_context_t context, mlir_type_list_t inputs,
-                             mlir_type_list_t outputs);
-
-/// Returns an `mlir::IndexType`.
-mlir_type_t makeIndexType(mlir_context_t context);
-
-/// Returns an `mlir::IntegerAttr` of the specified type that contains the given
-/// value.
-mlir_attr_t makeIntegerAttr(mlir_type_t type, int64_t value);
-
-/// Returns an `mlir::BoolAttr` with the given value.
-mlir_attr_t makeBoolAttr(mlir_context_t context, bool value);
-
-/// Parses an MLIR type from the string `type` in the given context. Returns a
-/// NULL type on error. If non-NULL, `charsRead` will contain the number of
-/// characters that were processed by the parser.
-mlir_type_t mlirParseType(const char *type, mlir_context_t context,
-                          uint64_t *charsRead);
-
-/// Returns the arity of `function`.
-unsigned getFunctionArity(mlir_func_t function);
-
-/// Returns the rank of the `function` argument at position `pos`.
-/// If the argument is of MemRefType, this returns the rank of the MemRef.
-/// Otherwise returns `0`.
-/// TODO(ntv): support more than MemRefType and scalar Type.
-unsigned getRankOfFunctionArgument(mlir_func_t function, unsigned pos);
-
-/// Returns an opaque mlir::Type of the `function` argument at position `pos`.
-mlir_type_t getTypeOfFunctionArgument(mlir_func_t function, unsigned pos);
-
-#ifdef __cplusplus
-} // end extern "C"
-#endif
-
-#endif // MLIR_C_CORE_H
diff --git a/third_party/mlir/include/mlir/Analysis/AffineAnalysis.h b/third_party/mlir/include/mlir/Analysis/AffineAnalysis.h
deleted file mode 100644
index bb50c80dbe8..00000000000
--- a/third_party/mlir/include/mlir/Analysis/AffineAnalysis.h
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- AffineAnalysis.h - analyses for affine structures --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes for methods that perform analysis
-// involving affine structures (AffineExprStorage, AffineMap, IntegerSet, etc.)
-// and other IR structures that in turn use these.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_AFFINE_ANALYSIS_H
-#define MLIR_ANALYSIS_AFFINE_ANALYSIS_H
-
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace mlir {
-
-class AffineApplyOp;
-class AffineForOp;
-class AffineValueMap;
-class FlatAffineConstraints;
-class Operation;
-class Value;
-
-/// Returns in `affineApplyOps`, the sequence of those AffineApplyOp
-/// Operations that are reachable via a search starting from `operands` and
-/// ending at those operands that are not the result of an AffineApplyOp.
-void getReachableAffineApplyOps(
-    llvm::ArrayRef<Value *> operands,
-    llvm::SmallVectorImpl<Operation *> &affineApplyOps);
-
-/// Builds a system of constraints with dimensional identifiers corresponding to
-/// the loop IVs of the forOps appearing in that order. Bounds of the loop are
-/// used to add appropriate inequalities. Any symbols founds in the bound
-/// operands are added as symbols in the system. Returns failure for the yet
-/// unimplemented cases.
-//  TODO(bondhugula): handle non-unit strides.
-LogicalResult getIndexSet(llvm::MutableArrayRef<AffineForOp> forOps,
-                          FlatAffineConstraints *domain);
-
-/// Encapsulates a memref load or store access information.
-struct MemRefAccess {
-  Value *memref;
-  Operation *opInst;
-  llvm::SmallVector<Value *, 4> indices;
-
-  /// Constructs a MemRefAccess from a load or store operation.
-  // TODO(b/119949820): add accessors to standard op's load, store, DMA op's to
-  // return MemRefAccess, i.e., loadOp->getAccess(), dmaOp->getRead/WriteAccess.
-  explicit MemRefAccess(Operation *opInst);
-
-  // Returns the rank of the memref associated with this access.
-  unsigned getRank() const;
-  // Returns true if this access is of a store op.
-  bool isStore() const;
-
-  /// Populates 'accessMap' with composition of AffineApplyOps reachable from
-  /// 'indices'.
-  void getAccessMap(AffineValueMap *accessMap) const;
-
-  /// Equal if both affine accesses can be proved to be equivalent at compile
-  /// time (considering the memrefs, their respective affine access maps  and
-  /// operands). The equality of access functions + operands is checked by
-  /// subtracting fully composed value maps, and then simplifying the difference
-  /// using the expression flattener.
-  /// TODO: this does not account for aliasing of memrefs.
-  bool operator==(const MemRefAccess &rhs) const;
-  bool operator!=(const MemRefAccess &rhs) const { return !(*this == rhs); }
-};
-
-// DependenceComponent contains state about the direction of a dependence as an
-// interval [lb, ub] for an AffineForOp.
-// Distance vectors components are represented by the interval [lb, ub] with
-// lb == ub.
-// Direction vectors components are represented by the interval [lb, ub] with
-// lb < ub. Note that ub/lb == None means unbounded.
-struct DependenceComponent {
-  // The AffineForOp Operation associated with this dependence component.
-  Operation *op;
-  // The lower bound of the dependence distance.
-  llvm::Optional<int64_t> lb;
-  // The upper bound of the dependence distance (inclusive).
-  llvm::Optional<int64_t> ub;
-  DependenceComponent() : lb(llvm::None), ub(llvm::None) {}
-};
-
-/// Checks whether two accesses to the same memref access the same element.
-/// Each access is specified using the MemRefAccess structure, which contains
-/// the operation, indices and memref associated with the access. Returns
-/// 'NoDependence' if it can be determined conclusively that the accesses do not
-/// access the same memref element. If 'allowRAR' is true, will consider
-/// read-after-read dependences (typically used by applications trying to
-/// optimize input reuse).
-// TODO(andydavis) Wrap 'dependenceConstraints' and 'dependenceComponents' into
-// a single struct.
-// TODO(andydavis) Make 'dependenceConstraints' optional arg.
-struct DependenceResult {
-  enum ResultEnum {
-    HasDependence, // A dependence exists between 'srcAccess' and 'dstAccess'.
-    NoDependence,  // No dependence exists between 'srcAccess' and 'dstAccess'.
-    Failure,       // Dependence check failed due to unsupported cases.
-  } value;
-  DependenceResult(ResultEnum v) : value(v) {}
-};
-
-DependenceResult checkMemrefAccessDependence(
-    const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
-    unsigned loopDepth, FlatAffineConstraints *dependenceConstraints,
-    llvm::SmallVector<DependenceComponent, 2> *dependenceComponents,
-    bool allowRAR = false);
-
-/// Utility function that returns true if the provided DependenceResult
-/// corresponds to a dependence result.
-inline bool hasDependence(DependenceResult result) {
-  return result.value == DependenceResult::HasDependence;
-}
-
-/// Returns in 'depCompsVec', dependence components for dependences between all
-/// load and store ops in loop nest rooted at 'forOp', at loop depths in range
-/// [1, maxLoopDepth].
-void getDependenceComponents(
-    AffineForOp forOp, unsigned maxLoopDepth,
-    std::vector<llvm::SmallVector<DependenceComponent, 2>> *depCompsVec);
-
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_AFFINE_ANALYSIS_H
diff --git a/third_party/mlir/include/mlir/Analysis/AffineStructures.h b/third_party/mlir/include/mlir/Analysis/AffineStructures.h
deleted file mode 100644
index 143956e3b5c..00000000000
--- a/third_party/mlir/include/mlir/Analysis/AffineStructures.h
+++ /dev/null
@@ -1,822 +0,0 @@
-//===- AffineStructures.h - MLIR Affine Structures Class --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Structures for affine/polyhedral analysis of ML functions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_AFFINE_STRUCTURES_H
-#define MLIR_ANALYSIS_AFFINE_STRUCTURES_H
-
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/Support/LogicalResult.h"
-
-namespace mlir {
-
-class AffineApplyOp;
-class AffineBound;
-class AffineCondition;
-class AffineMap;
-class AffineForOp;
-class IntegerSet;
-class MLIRContext;
-class Value;
-class HyperRectangularSet;
-class MemRefType;
-
-/// A mutable affine map. Its affine expressions are however unique.
-struct MutableAffineMap {
-public:
-  MutableAffineMap() {}
-  MutableAffineMap(AffineMap map);
-
-  ArrayRef<AffineExpr> getResults() const { return results; }
-  AffineExpr getResult(unsigned idx) const { return results[idx]; }
-  void setResult(unsigned idx, AffineExpr result) { results[idx] = result; }
-  unsigned getNumResults() const { return results.size(); }
-  unsigned getNumDims() const { return numDims; }
-  void setNumDims(unsigned d) { numDims = d; }
-  unsigned getNumSymbols() const { return numSymbols; }
-  void setNumSymbols(unsigned d) { numSymbols = d; }
-  MLIRContext *getContext() const { return context; }
-
-  /// Returns true if the idx'th result expression is a multiple of factor.
-  bool isMultipleOf(unsigned idx, int64_t factor) const;
-
-  /// Resets this MutableAffineMap with 'map'.
-  void reset(AffineMap map);
-
-  /// Simplify the (result) expressions in this map using analysis (used by
-  //-simplify-affine-expr pass).
-  void simplify();
-  /// Get the AffineMap corresponding to this MutableAffineMap. Note that an
-  /// AffineMap will be uniqued and stored in context, while a mutable one
-  /// isn't.
-  AffineMap getAffineMap() const;
-
-private:
-  // Same meaning as AffineMap's fields.
-  SmallVector<AffineExpr, 8> results;
-  unsigned numDims;
-  unsigned numSymbols;
-  /// A pointer to the IR's context to store all newly created
-  /// AffineExprStorage's.
-  MLIRContext *context;
-};
-
-/// A mutable integer set. Its affine expressions are however unique.
-struct MutableIntegerSet {
-public:
-  MutableIntegerSet(IntegerSet set, MLIRContext *context);
-
-  /// Create a universal set (no constraints).
-  MutableIntegerSet(unsigned numDims, unsigned numSymbols,
-                    MLIRContext *context);
-
-  unsigned getNumDims() const { return numDims; }
-  unsigned getNumSymbols() const { return numSymbols; }
-  unsigned getNumConstraints() const { return constraints.size(); }
-
-  void clear() {
-    constraints.clear();
-    eqFlags.clear();
-  }
-
-private:
-  unsigned numDims;
-  unsigned numSymbols;
-
-  SmallVector<AffineExpr, 8> constraints;
-  SmallVector<bool, 8> eqFlags;
-};
-
-/// An AffineValueMap is an affine map plus its ML value operands and
-/// results for analysis purposes. The structure is still a tree form that is
-/// same as that of an affine map or an AffineApplyOp. However, its operands,
-/// results, and its map can themselves change  as a result of
-/// substitutions, simplifications, and other analysis.
-// An affine value map can readily be constructed from an AffineApplyOp, or an
-// AffineBound of a AffineForOp. It can be further transformed, substituted
-// into, or simplified. Unlike AffineMap's, AffineValueMap's are created and
-// destroyed during analysis. Only the AffineMap expressions that are pointed by
-// them are unique'd. An affine value map, and the operations on it, maintain
-// the invariant that operands are always positionally aligned with the
-// AffineDimExpr and AffineSymbolExpr in the underlying AffineMap.
-// TODO(bondhugula): Some of these classes could go into separate files.
-class AffineValueMap {
-public:
-  // Creates an empty AffineValueMap (users should call 'reset' to reset map
-  // and operands).
-  AffineValueMap() {}
-  AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
-                 ArrayRef<Value *> results = llvm::None);
-
-  explicit AffineValueMap(AffineApplyOp applyOp);
-  explicit AffineValueMap(AffineBound bound);
-
-  ~AffineValueMap();
-
-  // Resets this AffineValueMap with 'map', 'operands', and 'results'.
-  void reset(AffineMap map, ArrayRef<Value *> operands,
-             ArrayRef<Value *> results = llvm::None);
-
-  /// Return the value map that is the difference of value maps 'a' and 'b',
-  /// represented as an affine map and its operands. The output map + operands
-  /// are canonicalized and simplified.
-  static void difference(const AffineValueMap &a, const AffineValueMap &b,
-                         AffineValueMap *res);
-
-  /// Return true if the idx^th result can be proved to be a multiple of
-  /// 'factor', false otherwise.
-  inline bool isMultipleOf(unsigned idx, int64_t factor) const;
-
-  /// Return true if the idx^th result depends on 'value', false otherwise.
-  bool isFunctionOf(unsigned idx, Value *value) const;
-
-  /// Return true if the result at 'idx' is a constant, false
-  /// otherwise.
-  bool isConstant(unsigned idx) const;
-
-  /// Return true if this is an identity map.
-  bool isIdentity() const;
-
-  void setResult(unsigned i, AffineExpr e) { map.setResult(i, e); }
-  AffineExpr getResult(unsigned i) { return map.getResult(i); }
-  inline unsigned getNumOperands() const { return operands.size(); }
-  inline unsigned getNumDims() const { return map.getNumDims(); }
-  inline unsigned getNumSymbols() const { return map.getNumSymbols(); }
-  inline unsigned getNumResults() const { return map.getNumResults(); }
-
-  Value *getOperand(unsigned i) const;
-  ArrayRef<Value *> getOperands() const;
-  AffineMap getAffineMap() const;
-
-private:
-  // A mutable affine map.
-  MutableAffineMap map;
-
-  // TODO: make these trailing objects?
-  /// The SSA operands binding to the dim's and symbols of 'map'.
-  SmallVector<Value *, 4> operands;
-  /// The SSA results binding to the results of 'map'.
-  SmallVector<Value *, 4> results;
-};
-
-/// An IntegerValueSet is an integer set plus its operands.
-// Both, the integer set being pointed to and the operands can change during
-// analysis, simplification, and transformation.
-class IntegerValueSet {
-  /// Constructs an integer value set from an affine value map.
-  // This will lead to a single equality in 'set'.
-  explicit IntegerValueSet(const AffineValueMap &avm);
-
-  /// Returns true if this integer set is determined to be empty. Emptiness is
-  /// checked by by eliminating identifiers successively (through either
-  /// Gaussian or Fourier-Motzkin) while using the GCD test and a trivial
-  /// invalid constraint check. Returns 'true' if the constraint system is found
-  /// to be empty; false otherwise. This method is exact for rational spaces but
-  /// not integer spaces - thus, if it returns true, the set is provably integer
-  /// empty as well, but if it returns false, it doesn't necessarily mean an
-  /// integer point exists in it. This method also returns false where an
-  /// explosion of constraints is detected - due to the super-exponential
-  /// worse-case complexity of Fourier-Motzkin elimination (rare for realistic
-  /// problem cases but possible for artificial adversarial or improperly
-  // constructed ones), this method returns false conservatively.
-  bool isEmpty() const;
-
-  bool getNumDims() const { return set.getNumDims(); }
-  bool getNumSymbols() const { return set.getNumSymbols(); }
-
-private:
-  // The set pointed to may itself change unlike in IR structures like
-  // 'AffineCondition'.
-  MutableIntegerSet set;
-  /// The SSA operands binding to the dim's and symbols of 'set'.
-  SmallVector<Value *, 4> operands;
-};
-
-/// A flat list of affine equalities and inequalities in the form.
-/// Inequality: c_0*x_0 + c_1*x_1 + .... + c_{n-1}*x_{n-1} >= 0
-/// Equality: c_0*x_0 + c_1*x_1 + .... + c_{n-1}*x_{n-1} == 0
-///
-/// FlatAffineConstraints stores coefficients in a contiguous buffer (one buffer
-/// for equalities and one for inequalities). The size of each buffer is
-/// numReservedCols * number of inequalities (or equalities). The reserved size
-/// is numReservedCols * numReservedInequalities (or numReservedEqualities). A
-/// coefficient (r, c) lives at the location numReservedCols * r + c in the
-/// buffer. The extra space between getNumCols() and numReservedCols exists to
-/// prevent frequent movement of data when adding columns, especially at the
-/// end.
-///
-/// The identifiers x_0, x_1, ... appear in the order: dimensional identifiers,
-/// symbolic identifiers, and local identifiers.  The local identifiers
-/// correspond to local/internal variables created when converting from
-/// AffineExpr's containing mod's and div's; they are thus needed to increase
-/// representational power. Each local identifier is always (by construction) a
-/// floordiv of a pure add/mul affine function of dimensional, symbolic, and
-/// other local identifiers, in a non-mutually recursive way. Hence, every local
-/// identifier can ultimately always be recovered as an affine function of
-/// dimensional and symbolic identifiers (involving floordiv's); note however
-/// that some floordiv combinations are converted to mod's by AffineExpr
-/// construction.
-///
-class FlatAffineConstraints {
-public:
-  enum IdKind { Dimension, Symbol, Local };
-
-  /// Constructs a constraint system reserving memory for the specified number
-  /// of constraints and identifiers..
-  FlatAffineConstraints(unsigned numReservedInequalities,
-                        unsigned numReservedEqualities,
-                        unsigned numReservedCols, unsigned numDims = 0,
-                        unsigned numSymbols = 0, unsigned numLocals = 0,
-                        ArrayRef<Optional<Value *>> idArgs = {})
-      : numReservedCols(numReservedCols), numDims(numDims),
-        numSymbols(numSymbols) {
-    assert(numReservedCols >= numDims + numSymbols + 1);
-    assert(idArgs.empty() || idArgs.size() == numDims + numSymbols + numLocals);
-    equalities.reserve(numReservedCols * numReservedEqualities);
-    inequalities.reserve(numReservedCols * numReservedInequalities);
-    numIds = numDims + numSymbols + numLocals;
-    ids.reserve(numReservedCols);
-    if (idArgs.empty())
-      ids.resize(numIds, None);
-    else
-      ids.append(idArgs.begin(), idArgs.end());
-  }
-
-  /// Constructs a constraint system with the specified number of
-  /// dimensions and symbols.
-  FlatAffineConstraints(unsigned numDims = 0, unsigned numSymbols = 0,
-                        unsigned numLocals = 0,
-                        ArrayRef<Optional<Value *>> idArgs = {})
-      : numReservedCols(numDims + numSymbols + numLocals + 1), numDims(numDims),
-        numSymbols(numSymbols) {
-    assert(numReservedCols >= numDims + numSymbols + 1);
-    assert(idArgs.empty() || idArgs.size() == numDims + numSymbols + numLocals);
-    numIds = numDims + numSymbols + numLocals;
-    ids.reserve(numIds);
-    if (idArgs.empty())
-      ids.resize(numIds, None);
-    else
-      ids.append(idArgs.begin(), idArgs.end());
-  }
-
-  explicit FlatAffineConstraints(const HyperRectangularSet &set);
-
-  /// Create a flat affine constraint system from an AffineValueMap or a list of
-  /// these. The constructed system will only include equalities.
-  // TODO(bondhugula)
-  explicit FlatAffineConstraints(const AffineValueMap &avm);
-  explicit FlatAffineConstraints(ArrayRef<const AffineValueMap *> avmRef);
-
-  /// Creates an affine constraint system from an IntegerSet.
-  explicit FlatAffineConstraints(IntegerSet set);
-
-  /// Create an affine constraint system from an IntegerValueSet.
-  // TODO(bondhugula)
-  explicit FlatAffineConstraints(const IntegerValueSet &set);
-
-  FlatAffineConstraints(const FlatAffineConstraints &other);
-
-  FlatAffineConstraints(ArrayRef<const AffineValueMap *> avmRef,
-                        IntegerSet set);
-
-  FlatAffineConstraints(const MutableAffineMap &map);
-
-  ~FlatAffineConstraints() {}
-
-  // Clears any existing data and reserves memory for the specified constraints.
-  void reset(unsigned numReservedInequalities, unsigned numReservedEqualities,
-             unsigned numReservedCols, unsigned numDims, unsigned numSymbols,
-             unsigned numLocals = 0, ArrayRef<Value *> idArgs = {});
-
-  void reset(unsigned numDims = 0, unsigned numSymbols = 0,
-             unsigned numLocals = 0, ArrayRef<Value *> idArgs = {});
-
-  /// Appends constraints from 'other' into this. This is equivalent to an
-  /// intersection with no simplification of any sort attempted.
-  void append(const FlatAffineConstraints &other);
-
-  // Checks for emptiness by performing variable elimination on all identifiers,
-  // running the GCD test on each equality constraint, and checking for invalid
-  // constraints.
-  // Returns true if the GCD test fails for any equality, or if any invalid
-  // constraints are discovered on any row. Returns false otherwise.
-  bool isEmpty() const;
-
-  // Runs the GCD test on all equality constraints. Returns 'true' if this test
-  // fails on any equality. Returns 'false' otherwise.
-  // This test can be used to disprove the existence of a solution. If it
-  // returns true, no integer solution to the equality constraints can exist.
-  bool isEmptyByGCDTest() const;
-
-  // Clones this object.
-  std::unique_ptr<FlatAffineConstraints> clone() const;
-
-  /// Returns the value at the specified equality row and column.
-  inline int64_t atEq(unsigned i, unsigned j) const {
-    return equalities[i * numReservedCols + j];
-  }
-  inline int64_t &atEq(unsigned i, unsigned j) {
-    return equalities[i * numReservedCols + j];
-  }
-
-  inline int64_t atIneq(unsigned i, unsigned j) const {
-    return inequalities[i * numReservedCols + j];
-  }
-
-  inline int64_t &atIneq(unsigned i, unsigned j) {
-    return inequalities[i * numReservedCols + j];
-  }
-
-  /// Returns the number of columns in the constraint system.
-  inline unsigned getNumCols() const { return numIds + 1; }
-
-  inline unsigned getNumEqualities() const {
-    assert(equalities.size() % numReservedCols == 0 &&
-           "inconsistent equality buffer size");
-    return equalities.size() / numReservedCols;
-  }
-
-  inline unsigned getNumInequalities() const {
-    assert(inequalities.size() % numReservedCols == 0 &&
-           "inconsistent inequality buffer size");
-    return inequalities.size() / numReservedCols;
-  }
-
-  inline unsigned getNumReservedEqualities() const {
-    return equalities.capacity() / numReservedCols;
-  }
-
-  inline unsigned getNumReservedInequalities() const {
-    return inequalities.capacity() / numReservedCols;
-  }
-
-  inline ArrayRef<int64_t> getEquality(unsigned idx) const {
-    return ArrayRef<int64_t>(&equalities[idx * numReservedCols], getNumCols());
-  }
-
-  inline ArrayRef<int64_t> getInequality(unsigned idx) const {
-    return ArrayRef<int64_t>(&inequalities[idx * numReservedCols],
-                             getNumCols());
-  }
-
-  AffineExpr toAffineExpr(unsigned idx, MLIRContext *context);
-
-  /// Adds constraints (lower and upper bounds) for the specified 'affine.for'
-  /// operation's Value using IR information stored in its bound maps. The
-  /// right identifier is first looked up using forOp's Value. Asserts if the
-  /// Value corresponding to the 'affine.for' operation isn't found in the
-  /// constraint system. Returns failure for the yet unimplemented/unsupported
-  /// cases.  Any new identifiers that are found in the bound operands of the
-  /// 'affine.for' operation are added as trailing identifiers (either
-  /// dimensional or symbolic depending on whether the operand is a valid
-  /// symbol).
-  //  TODO(bondhugula): add support for non-unit strides.
-  LogicalResult addAffineForOpDomain(AffineForOp forOp);
-
-  /// Adds a lower or an upper bound for the identifier at the specified
-  /// position with constraints being drawn from the specified bound map and
-  /// operands. If `eq` is true, add a single equality equal to the bound map's
-  /// first result expr.
-  LogicalResult addLowerOrUpperBound(unsigned pos, AffineMap boundMap,
-                                     ArrayRef<Value *> operands, bool eq,
-                                     bool lower = true);
-
-  /// Computes the lower and upper bounds of the first 'num' dimensional
-  /// identifiers (starting at 'offset') as an affine map of the remaining
-  /// identifiers (dimensional and symbolic). This method is able to detect
-  /// identifiers as floordiv's and mod's of affine expressions of other
-  /// identifiers with respect to (positive) constants. Sets bound map to a
-  /// null AffineMap if such a bound can't be found (or yet unimplemented).
-  void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context,
-                      SmallVectorImpl<AffineMap> *lbMaps,
-                      SmallVectorImpl<AffineMap> *ubMaps);
-
-  /// Adds slice lower bounds represented by lower bounds in 'lbMaps' and upper
-  /// bounds in 'ubMaps' to each identifier in the constraint system which has
-  /// a value in 'values'. Note that both lower/upper bounds share the same
-  /// operand list 'operands'.
-  /// This function assumes 'values.size' == 'lbMaps.size' == 'ubMaps.size'.
-  /// Note that both lower/upper bounds use operands from 'operands'.
-  LogicalResult addSliceBounds(ArrayRef<Value *> values,
-                               ArrayRef<AffineMap> lbMaps,
-                               ArrayRef<AffineMap> ubMaps,
-                               ArrayRef<Value *> operands);
-
-  // Adds an inequality (>= 0) from the coefficients specified in inEq.
-  void addInequality(ArrayRef<int64_t> inEq);
-  // Adds an equality from the coefficients specified in eq.
-  void addEquality(ArrayRef<int64_t> eq);
-
-  /// Adds a constant lower bound constraint for the specified identifier.
-  void addConstantLowerBound(unsigned pos, int64_t lb);
-  /// Adds a constant upper bound constraint for the specified identifier.
-  void addConstantUpperBound(unsigned pos, int64_t ub);
-
-  /// Adds a new local identifier as the floordiv of an affine function of other
-  /// identifiers, the coefficients of which are provided in 'dividend' and with
-  /// respect to a positive constant 'divisor'. Two constraints are added to the
-  /// system to capture equivalence with the floordiv:
-  /// q = dividend floordiv c    <=>   c*q <= dividend <= c*q + c - 1.
-  void addLocalFloorDiv(ArrayRef<int64_t> dividend, int64_t divisor);
-
-  /// Adds a constant lower bound constraint for the specified expression.
-  void addConstantLowerBound(ArrayRef<int64_t> expr, int64_t lb);
-  /// Adds a constant upper bound constraint for the specified expression.
-  void addConstantUpperBound(ArrayRef<int64_t> expr, int64_t ub);
-
-  /// Sets the identifier at the specified position to a constant.
-  void setIdToConstant(unsigned pos, int64_t val);
-
-  /// Sets the identifier corresponding to the specified Value id to a
-  /// constant. Asserts if the 'id' is not found.
-  void setIdToConstant(Value &id, int64_t val);
-
-  /// Looks up the position of the identifier with the specified Value. Returns
-  /// true if found (false otherwise). `pos' is set to the (column) position of
-  /// the identifier.
-  bool findId(Value &id, unsigned *pos) const;
-
-  /// Returns true if an identifier with the specified Value exists, false
-  /// otherwise.
-  bool containsId(Value &id) const;
-
-  // Add identifiers of the specified kind - specified positions are relative to
-  // the kind of identifier. The coefficient column corresponding to the added
-  // identifier is initialized to zero. 'id' is the Value corresponding to the
-  // identifier that can optionally be provided.
-  void addDimId(unsigned pos, Value *id = nullptr);
-  void addSymbolId(unsigned pos, Value *id = nullptr);
-  void addLocalId(unsigned pos);
-  void addId(IdKind kind, unsigned pos, Value *id = nullptr);
-
-  /// Add the specified values as a dim or symbol id depending on its nature, if
-  /// it already doesn't exist in the system. `id' has to be either a terminal
-  /// symbol or a loop IV, i.e., it cannot be the result affine.apply of any
-  /// symbols or loop IVs. The identifier is added to the end of the existing
-  /// dims or symbols. Additional information on the identifier is extracted
-  /// from the IR and added to the constraint system.
-  void addInductionVarOrTerminalSymbol(Value *id);
-
-  /// Composes the affine value map with this FlatAffineConstrains, adding the
-  /// results of the map as dimensions at the front [0, vMap->getNumResults())
-  /// and with the dimensions set to the equalities specified by the value map.
-  /// Returns failure if the composition fails (when vMap is a semi-affine map).
-  /// The vMap's operand Value's are used to look up the right positions in
-  /// the FlatAffineConstraints with which to associate. The dimensional and
-  /// symbolic operands of vMap should match 1:1 (in the same order) with those
-  /// of this constraint system, but the latter could have additional trailing
-  /// operands.
-  LogicalResult composeMap(const AffineValueMap *vMap);
-
-  /// Composes an affine map whose dimensions match one to one to the
-  /// dimensions of this FlatAffineConstraints. The results of the map 'other'
-  /// are added as the leading dimensions of this constraint system. Returns
-  /// failure if 'other' is a semi-affine map.
-  LogicalResult composeMatchingMap(AffineMap other);
-
-  /// Projects out (aka eliminates) 'num' identifiers starting at position
-  /// 'pos'. The resulting constraint system is the shadow along the dimensions
-  /// that still exist. This method may not always be integer exact.
-  // TODO(bondhugula): deal with integer exactness when necessary - can return a
-  // value to mark exactness for example.
-  void projectOut(unsigned pos, unsigned num);
-  inline void projectOut(unsigned pos) { return projectOut(pos, 1); }
-
-  /// Projects out the identifier that is associate with Value *.
-  void projectOut(Value *id);
-
-  void removeId(IdKind idKind, unsigned pos);
-  void removeId(unsigned pos);
-
-  void removeDim(unsigned pos);
-
-  void removeEquality(unsigned pos);
-  void removeInequality(unsigned pos);
-
-  /// Changes the partition between dimensions and symbols. Depending on the new
-  /// symbol count, either a chunk of trailing dimensional identifiers becomes
-  /// symbols, or some of the leading symbols become dimensions.
-  void setDimSymbolSeparation(unsigned newSymbolCount);
-
-  /// Changes all symbol identifiers which are loop IVs to dim identifiers.
-  void convertLoopIVSymbolsToDims();
-
-  /// Sets the specified identifier to a constant and removes it.
-  void setAndEliminate(unsigned pos, int64_t constVal);
-
-  /// Tries to fold the specified identifier to a constant using a trivial
-  /// equality detection; if successful, the constant is substituted for the
-  /// identifier everywhere in the constraint system and then removed from the
-  /// system.
-  LogicalResult constantFoldId(unsigned pos);
-
-  /// This method calls constantFoldId for the specified range of identifiers,
-  /// 'num' identifiers starting at position 'pos'.
-  void constantFoldIdRange(unsigned pos, unsigned num);
-
-  /// Updates the constraints to be the smallest bounding (enclosing) box that
-  /// contains the points of 'this' set and that of 'other', with the symbols
-  /// being treated specially. For each of the dimensions, the min of the lower
-  /// bounds (symbolic) and the max of the upper bounds (symbolic) is computed
-  /// to determine such a bounding box. `other' is expected to have the same
-  /// dimensional identifiers as this constraint system (in the same order).
-  ///
-  /// Eg: if 'this' is {0 <= d0 <= 127}, 'other' is {16 <= d0 <= 192}, the
-  ///      output is {0 <= d0 <= 192}.
-  /// 2) 'this' = {s0 + 5 <= d0 <= s0 + 20}, 'other' is {s0 + 1 <= d0 <= s0 +
-  ///     9}, output = {s0 + 1 <= d0 <= s0 + 20}.
-  /// 3) 'this' = {0 <= d0 <= 5, 1 <= d1 <= 9}, 'other' = {2 <= d0 <= 6, 5 <= d1
-  ///     <= 15}, output = {0 <= d0 <= 6, 1 <= d1 <= 15}.
-  LogicalResult unionBoundingBox(const FlatAffineConstraints &other);
-
-  /// Returns 'true' if this constraint system and 'other' are in the same
-  /// space, i.e., if they are associated with the same set of identifiers,
-  /// appearing in the same order. Returns 'false' otherwise.
-  bool areIdsAlignedWithOther(const FlatAffineConstraints &other);
-
-  /// Merge and align the identifiers of 'this' and 'other' starting at
-  /// 'offset', so that both constraint systems get the union of the contained
-  /// identifiers that is dimension-wise and symbol-wise unique; both
-  /// constraint systems are updated so that they have the union of all
-  /// identifiers, with this's original identifiers appearing first followed by
-  /// any of other's identifiers that didn't appear in 'this'. Local
-  /// identifiers of each system are by design separate/local and are placed
-  /// one after other (this's followed by other's).
-  //  Eg: Input: 'this'  has ((%i %j) [%M %N])
-  //             'other' has (%k, %j) [%P, %N, %M])
-  //      Output: both 'this', 'other' have (%i, %j, %k) [%M, %N, %P]
-  //
-  void mergeAndAlignIdsWithOther(unsigned offset, FlatAffineConstraints *other);
-
-  unsigned getNumConstraints() const {
-    return getNumInequalities() + getNumEqualities();
-  }
-  inline unsigned getNumIds() const { return numIds; }
-  inline unsigned getNumDimIds() const { return numDims; }
-  inline unsigned getNumSymbolIds() const { return numSymbols; }
-  inline unsigned getNumDimAndSymbolIds() const { return numDims + numSymbols; }
-  inline unsigned getNumLocalIds() const {
-    return numIds - numDims - numSymbols;
-  }
-
-  inline ArrayRef<Optional<Value *>> getIds() const {
-    return {ids.data(), ids.size()};
-  }
-  inline MutableArrayRef<Optional<Value *>> getIds() {
-    return {ids.data(), ids.size()};
-  }
-
-  /// Returns the optional Value corresponding to the pos^th identifier.
-  inline Optional<Value *> getId(unsigned pos) const { return ids[pos]; }
-  inline Optional<Value *> &getId(unsigned pos) { return ids[pos]; }
-
-  /// Returns the Value associated with the pos^th identifier. Asserts if
-  /// no Value identifier was associated.
-  inline Value *getIdValue(unsigned pos) const {
-    assert(ids[pos].hasValue() && "identifier's Value not set");
-    return ids[pos].getValue();
-  }
-
-  /// Returns the Values associated with identifiers in range [start, end).
-  /// Asserts if no Value was associated with one of these identifiers.
-  void getIdValues(unsigned start, unsigned end,
-                   SmallVectorImpl<Value *> *values) const {
-    assert((start < numIds || start == end) && "invalid start position");
-    assert(end <= numIds && "invalid end position");
-    values->clear();
-    values->reserve(end - start);
-    for (unsigned i = start; i < end; i++) {
-      values->push_back(getIdValue(i));
-    }
-  }
-  inline void getAllIdValues(SmallVectorImpl<Value *> *values) const {
-    getIdValues(0, numIds, values);
-  }
-
-  /// Sets Value associated with the pos^th identifier.
-  inline void setIdValue(unsigned pos, Value *val) {
-    assert(pos < numIds && "invalid id position");
-    ids[pos] = val;
-  }
-  /// Sets Values associated with identifiers in the range [start, end).
-  void setIdValues(unsigned start, unsigned end, ArrayRef<Value *> values) {
-    assert((start < numIds || end == start) && "invalid start position");
-    assert(end <= numIds && "invalid end position");
-    assert(values.size() == end - start);
-    for (unsigned i = start; i < end; ++i)
-      ids[i] = values[i - start];
-  }
-
-  /// Clears this list of constraints and copies other into it.
-  void clearAndCopyFrom(const FlatAffineConstraints &other);
-
-  /// Returns the smallest known constant bound for the extent of the specified
-  /// identifier (pos^th), i.e., the smallest known constant that is greater
-  /// than or equal to 'exclusive upper bound' - 'lower bound' of the
-  /// identifier. Returns None if it's not a constant. This method employs
-  /// trivial (low complexity / cost) checks and detection. Symbolic identifiers
-  /// are treated specially, i.e., it looks for constant differences between
-  /// affine expressions involving only the symbolic identifiers. See comments
-  /// at function definition for examples. 'lb' and 'lbDivisor', if provided,
-  /// are used to express the lower bound associated with the constant
-  /// difference: 'lb' has the coefficients and lbDivisor, the divisor. For eg.,
-  /// if the lower bound is [(s0 + s2 - 1) floordiv 32] for a system with three
-  /// symbolic identifiers, *lb = [1, 0, 1], lbDivisor = 32.
-  Optional<int64_t>
-  getConstantBoundOnDimSize(unsigned pos,
-                            SmallVectorImpl<int64_t> *lb = nullptr,
-                            int64_t *lbFloorDivisor = nullptr,
-                            SmallVectorImpl<int64_t> *ub = nullptr) const;
-
-  /// Returns the constant lower bound for the pos^th identifier if there is
-  /// one; None otherwise.
-  Optional<int64_t> getConstantLowerBound(unsigned pos) const;
-
-  /// Returns the constant upper bound for the pos^th identifier if there is
-  /// one; None otherwise.
-  Optional<int64_t> getConstantUpperBound(unsigned pos) const;
-
-  /// Gets the lower and upper bound of the pos^th identifier treating
-  /// [0, offset) U [offset + num, symStartPos) as dimensions and
-  /// [symStartPos, getNumDimAndSymbolIds) as symbols. The returned
-  /// multi-dimensional maps in the pair represent the max and min of
-  /// potentially multiple affine expressions. The upper bound is exclusive.
-  /// 'localExprs' holds pre-computed AffineExpr's for all local identifiers in
-  /// the system.
-  std::pair<AffineMap, AffineMap>
-  getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num,
-                        unsigned symStartPos, ArrayRef<AffineExpr> localExprs,
-                        MLIRContext *context) const;
-
-  /// Returns true if the set can be trivially detected as being
-  /// hyper-rectangular on the specified contiguous set of identifiers.
-  bool isHyperRectangular(unsigned pos, unsigned num) const;
-
-  /// Removes duplicate constraints, trivially true constraints, and constraints
-  /// that can be detected as redundant as a result of differing only in their
-  /// constant term part. A constraint of the form <non-negative constant> >= 0
-  /// is considered trivially true. This method is a linear time method on the
-  /// constraints, does a single scan, and updates in place.
-  void removeTrivialRedundancy();
-
-  /// A more expensive check to detect redundant inequalities thatn
-  /// removeTrivialRedundancy.
-  void removeRedundantInequalities();
-
-  // Removes all equalities and inequalities.
-  void clearConstraints();
-
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-private:
-  /// Returns false if the fields corresponding to various identifier counts, or
-  /// equality/inequality buffer sizes aren't consistent; true otherwise. This
-  /// is meant to be used within an assert internally.
-  bool hasConsistentState() const;
-
-  /// Checks all rows of equality/inequality constraints for trivial
-  /// contradictions (for example: 1 == 0, 0 >= 1), which may have surfaced
-  /// after elimination. Returns 'true' if an invalid constraint is found;
-  /// 'false'otherwise.
-  bool hasInvalidConstraint() const;
-
-  /// Returns the constant lower bound bound if isLower is true, and the upper
-  /// bound if isLower is false.
-  template <bool isLower>
-  Optional<int64_t> computeConstantLowerOrUpperBound(unsigned pos);
-
-  // Eliminates a single identifier at 'position' from equality and inequality
-  // constraints. Returns 'success' if the identifier was eliminated, and
-  // 'failure' otherwise.
-  inline LogicalResult gaussianEliminateId(unsigned position) {
-    return success(gaussianEliminateIds(position, position + 1) == 1);
-  }
-
-  // Eliminates identifiers from equality and inequality constraints
-  // in column range [posStart, posLimit).
-  // Returns the number of variables eliminated.
-  unsigned gaussianEliminateIds(unsigned posStart, unsigned posLimit);
-
-  /// Eliminates identifier at the specified position using Fourier-Motzkin
-  /// variable elimination, but uses Gaussian elimination if there is an
-  /// equality involving that identifier. If the result of the elimination is
-  /// integer exact, *isResultIntegerExact is set to true. If 'darkShadow' is
-  /// set to true, a potential under approximation (subset) of the rational
-  /// shadow / exact integer shadow is computed.
-  // See implementation comments for more details.
-  void FourierMotzkinEliminate(unsigned pos, bool darkShadow = false,
-                               bool *isResultIntegerExact = nullptr);
-
-  /// Tightens inequalities given that we are dealing with integer spaces. This
-  /// is similar to the GCD test but applied to inequalities. The constant term
-  /// can be reduced to the preceding multiple of the GCD of the coefficients,
-  /// i.e.,
-  ///  64*i - 100 >= 0  =>  64*i - 128 >= 0 (since 'i' is an integer). This is a
-  /// fast method (linear in the number of coefficients).
-  void GCDTightenInequalities();
-
-  /// Normalized each constraints by the GCD of its coefficients.
-  void normalizeConstraintsByGCD();
-
-  /// Removes identifiers in column range [idStart, idLimit), and copies any
-  /// remaining valid data into place, updates member variables, and resizes
-  /// arrays as needed.
-  void removeIdRange(unsigned idStart, unsigned idLimit);
-
-  /// Coefficients of affine equalities (in == 0 form).
-  SmallVector<int64_t, 64> equalities;
-
-  /// Coefficients of affine inequalities (in >= 0 form).
-  SmallVector<int64_t, 64> inequalities;
-
-  /// Number of columns reserved. Actual ones in used are returned by
-  /// getNumCols().
-  unsigned numReservedCols;
-
-  /// Total number of identifiers.
-  unsigned numIds;
-
-  /// Number of identifiers corresponding to real dimensions.
-  unsigned numDims;
-
-  /// Number of identifiers corresponding to symbols (unknown but constant for
-  /// analysis).
-  unsigned numSymbols;
-
-  /// Values corresponding to the (column) identifiers of this constraint
-  /// system appearing in the order the identifiers correspond to columns.
-  /// Temporary ones or those that aren't associated to any Value are set to
-  /// None.
-  SmallVector<Optional<Value *>, 8> ids;
-
-  /// A parameter that controls detection of an unrealistic number of
-  /// constraints. If the number of constraints is this many times the number of
-  /// variables, we consider such a system out of line with the intended use
-  /// case of FlatAffineConstraints.
-  // The rationale for 32 is that in the typical simplest of cases, an
-  // identifier is expected to have one lower bound and one upper bound
-  // constraint. With a level of tiling or a connection to another identifier
-  // through a div or mod, an extra pair of bounds gets added. As a limit, we
-  // don't expect an identifier to have more than 32 lower/upper/equality
-  // constraints. This is conservatively set low and can be raised if needed.
-  constexpr static unsigned kExplosionFactor = 32;
-};
-
-/// Simplify an affine expression by flattening and some amount of
-/// simple analysis. This has complexity linear in the number of nodes in
-/// 'expr'. Returns the simplified expression, which is the same as the input
-///  expression if it can't be simplified.
-AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims,
-                              unsigned numSymbols);
-
-/// Flattens 'expr' into 'flattenedExpr', which contains the coefficients of the
-/// dimensions, symbols, and additional variables that represent floor divisions
-/// of dimensions, symbols, and in turn other floor divisions.  Returns failure
-/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled).
-/// 'cst' contains constraints that connect newly introduced local identifiers
-/// to existing dimensional and symbolic identifiers. See documentation for
-/// AffineExprFlattener on how mod's and div's are flattened.
-LogicalResult
-getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols,
-                       llvm::SmallVectorImpl<int64_t> *flattenedExpr,
-                       FlatAffineConstraints *cst = nullptr);
-
-/// Flattens the result expressions of the map to their corresponding flattened
-/// forms and set in 'flattenedExprs'. Returns failure if any expression in the
-/// map could not be flattened (i.e., semi-affine is not yet handled). 'cst'
-/// contains constraints that connect newly introduced local identifiers to
-/// existing dimensional and / symbolic identifiers. See documentation for
-/// AffineExprFlattener on how mod's and div's are flattened. For all affine
-/// expressions that share the same operands (like those of an affine map), this
-/// method should be used instead of repeatedly calling getFlattenedAffineExpr
-/// since local variables added to deal with div's and mod's will be reused
-/// across expressions.
-LogicalResult getFlattenedAffineExprs(
-    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *cst = nullptr);
-LogicalResult getFlattenedAffineExprs(
-    IntegerSet set, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *cst = nullptr);
-
-} // end namespace mlir.
-
-#endif // MLIR_ANALYSIS_AFFINE_STRUCTURES_H
diff --git a/third_party/mlir/include/mlir/Analysis/CMakeLists.txt b/third_party/mlir/include/mlir/Analysis/CMakeLists.txt
deleted file mode 100644
index 3d9a7ed3697..00000000000
--- a/third_party/mlir/include/mlir/Analysis/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS CallInterfaces.td)
-mlir_tablegen(CallInterfaces.h.inc -gen-op-interface-decls)
-mlir_tablegen(CallInterfaces.cpp.inc -gen-op-interface-defs)
-add_public_tablegen_target(MLIRCallOpInterfacesIncGen)
-
-set(LLVM_TARGET_DEFINITIONS InferTypeOpInterface.td)
-mlir_tablegen(InferTypeOpInterface.h.inc -gen-op-interface-decls)
-mlir_tablegen(InferTypeOpInterface.cpp.inc -gen-op-interface-defs)
-add_public_tablegen_target(MLIRTypeInferOpInterfaceIncGen)
diff --git a/third_party/mlir/include/mlir/Analysis/CallGraph.h b/third_party/mlir/include/mlir/Analysis/CallGraph.h
deleted file mode 100644
index 700a016e836..00000000000
--- a/third_party/mlir/include/mlir/Analysis/CallGraph.h
+++ /dev/null
@@ -1,262 +0,0 @@
-//===- CallGraph.h - CallGraph analysis for MLIR ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains an analysis for computing the multi-level callgraph from a
-// given top-level operation. This nodes within this callgraph are defined by
-// the `CallOpInterface` and `CallableOpInterface` operation interfaces defined
-// in CallInterface.td.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_CALLGRAPH_H
-#define MLIR_ANALYSIS_CALLGRAPH_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/SetVector.h"
-
-namespace mlir {
-struct CallInterfaceCallable;
-class Operation;
-class Region;
-
-//===----------------------------------------------------------------------===//
-// CallGraphNode
-//===----------------------------------------------------------------------===//
-
-/// This class represents a single callable in the callgraph. Aside from the
-/// external node, each node represents a callable node in the graph and
-/// contains a valid corresponding Region. The external node is a virtual node
-/// used to represent external edges into, and out of, the callgraph.
-class CallGraphNode {
-public:
-  /// This class represents a directed edge between two nodes in the callgraph.
-  class Edge {
-    enum class Kind {
-      // An 'Abstract' edge represents an opaque, non-operation, reference
-      // between this node and the target. Edges of this type are only valid
-      // from the external node, as there is no valid connection to an operation
-      // in the module.
-      Abstract,
-
-      // A 'Call' edge represents a direct reference to the target node via a
-      // call-like operation within the callable region of this node.
-      Call,
-
-      // A 'Child' edge is used when the region of target node is defined inside
-      // of the callable region of this node. This means that the region of this
-      // node is an ancestor of the region for the target node. As such, this
-      // edge cannot be used on the 'external' node.
-      Child,
-    };
-
-  public:
-    /// Returns if this edge represents an `Abstract` edge.
-    bool isAbstract() const { return targetAndKind.getInt() == Kind::Abstract; }
-
-    /// Returns if this edge represents a `Call` edge.
-    bool isCall() const { return targetAndKind.getInt() == Kind::Call; }
-
-    /// Returns if this edge represents a `Child` edge.
-    bool isChild() const { return targetAndKind.getInt() == Kind::Child; }
-
-    /// Returns the target node for this edge.
-    CallGraphNode *getTarget() const { return targetAndKind.getPointer(); }
-
-    bool operator==(const Edge &edge) const {
-      return targetAndKind == edge.targetAndKind;
-    }
-
-  private:
-    Edge(CallGraphNode *node, Kind kind) : targetAndKind(node, kind) {}
-    explicit Edge(llvm::PointerIntPair<CallGraphNode *, 2, Kind> targetAndKind)
-        : targetAndKind(targetAndKind) {}
-
-    /// The target node of this edge, as well as the edge kind.
-    llvm::PointerIntPair<CallGraphNode *, 2, Kind> targetAndKind;
-
-    // Provide access to the constructor and Kind.
-    friend class CallGraphNode;
-  };
-
-  /// Returns if this node is the external node.
-  bool isExternal() const;
-
-  /// Returns the callable region this node represents. This can only be called
-  /// on non-external nodes.
-  Region *getCallableRegion() const;
-
-  /// Adds an abstract reference edge to the given node. An abstract edge does
-  /// not come from any observable operations, so this is only valid on the
-  /// external node.
-  void addAbstractEdge(CallGraphNode *node);
-
-  /// Add an outgoing call edge from this node.
-  void addCallEdge(CallGraphNode *node);
-
-  /// Adds a reference edge to the given child node.
-  void addChildEdge(CallGraphNode *child);
-
-  /// Iterator over the outgoing edges of this node.
-  using iterator = SmallVectorImpl<Edge>::const_iterator;
-  iterator begin() const { return edges.begin(); }
-  iterator end() const { return edges.end(); }
-
-  /// Returns true if this node has any child edges.
-  bool hasChildren() const;
-
-private:
-  /// DenseMap info for callgraph edges.
-  struct EdgeKeyInfo {
-    using BaseInfo =
-        DenseMapInfo<llvm::PointerIntPair<CallGraphNode *, 2, Edge::Kind>>;
-
-    static Edge getEmptyKey() { return Edge(BaseInfo::getEmptyKey()); }
-    static Edge getTombstoneKey() { return Edge(BaseInfo::getTombstoneKey()); }
-    static unsigned getHashValue(const Edge &edge) {
-      return BaseInfo::getHashValue(edge.targetAndKind);
-    }
-    static bool isEqual(const Edge &lhs, const Edge &rhs) { return lhs == rhs; }
-  };
-
-  CallGraphNode(Region *callableRegion) : callableRegion(callableRegion) {}
-
-  /// Add an edge to 'node' with the given kind.
-  void addEdge(CallGraphNode *node, Edge::Kind kind);
-
-  /// The callable region defines the boundary of the call graph node. This is
-  /// the region referenced by 'call' operations. This is at a per-region
-  /// boundary as operations may define multiple callable regions.
-  Region *callableRegion;
-
-  /// A set of out-going edges from this node to other nodes in the graph.
-  llvm::SetVector<Edge, SmallVector<Edge, 4>,
-                  llvm::SmallDenseSet<Edge, 4, EdgeKeyInfo>>
-      edges;
-
-  // Provide access to private methods.
-  friend class CallGraph;
-};
-
-//===----------------------------------------------------------------------===//
-// CallGraph
-//===----------------------------------------------------------------------===//
-
-class CallGraph {
-  using NodeMapT = llvm::MapVector<Region *, std::unique_ptr<CallGraphNode>>;
-
-  /// This class represents an iterator over the internal call graph nodes. This
-  /// class unwraps the map iterator to access the raw node.
-  class NodeIterator final
-      : public llvm::mapped_iterator<
-            NodeMapT::const_iterator,
-            CallGraphNode *(*)(const NodeMapT::value_type &)> {
-    static CallGraphNode *unwrap(const NodeMapT::value_type &value) {
-      return value.second.get();
-    }
-
-  public:
-    /// Initializes the result type iterator to the specified result iterator.
-    NodeIterator(NodeMapT::const_iterator it)
-        : llvm::mapped_iterator<
-              NodeMapT::const_iterator,
-              CallGraphNode *(*)(const NodeMapT::value_type &)>(it, &unwrap) {}
-  };
-
-public:
-  CallGraph(Operation *op);
-
-  /// Get or add a call graph node for the given region. `parentNode`
-  /// corresponds to the direct node in the callgraph that contains the parent
-  /// operation of `region`, or nullptr if there is no parent node.
-  CallGraphNode *getOrAddNode(Region *region, CallGraphNode *parentNode);
-
-  /// Lookup a call graph node for the given region, or nullptr if none is
-  /// registered.
-  CallGraphNode *lookupNode(Region *region) const;
-
-  /// Return the callgraph node representing the indirect-external callee.
-  CallGraphNode *getExternalNode() const {
-    return const_cast<CallGraphNode *>(&externalNode);
-  }
-
-  /// Resolve the callable for given callee to a node in the callgraph, or the
-  /// external node if a valid node was not resolved. 'from' provides an anchor
-  /// for symbol table lookups, and is only required if the callable is a symbol
-  /// reference.
-  CallGraphNode *resolveCallable(CallInterfaceCallable callable,
-                                 Operation *from = nullptr) const;
-
-  /// An iterator over the nodes of the graph.
-  using iterator = NodeIterator;
-  iterator begin() const { return nodes.begin(); }
-  iterator end() const { return nodes.end(); }
-
-  /// Dump the graph in a human readable format.
-  void dump() const;
-  void print(raw_ostream &os) const;
-
-private:
-  /// The set of nodes within the callgraph.
-  NodeMapT nodes;
-
-  /// A special node used to indicate an external edges.
-  CallGraphNode externalNode;
-};
-
-} // end namespace mlir
-
-namespace llvm {
-// Provide graph traits for traversing call graphs using standard graph
-// traversals.
-template <> struct GraphTraits<const mlir::CallGraphNode *> {
-  using NodeRef = mlir::CallGraphNode *;
-  static NodeRef getEntryNode(NodeRef node) { return node; }
-
-  static NodeRef unwrap(const mlir::CallGraphNode::Edge &edge) {
-    return edge.getTarget();
-  }
-
-  // ChildIteratorType/begin/end - Allow iteration over all nodes in the graph.
-  using ChildIteratorType =
-      mapped_iterator<mlir::CallGraphNode::iterator, decltype(&unwrap)>;
-  static ChildIteratorType child_begin(NodeRef node) {
-    return {node->begin(), &unwrap};
-  }
-  static ChildIteratorType child_end(NodeRef node) {
-    return {node->end(), &unwrap};
-  }
-};
-
-template <>
-struct GraphTraits<const mlir::CallGraph *>
-    : public GraphTraits<const mlir::CallGraphNode *> {
-  /// The entry node into the graph is the external node.
-  static NodeRef getEntryNode(const mlir::CallGraph *cg) {
-    return cg->getExternalNode();
-  }
-
-  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  using nodes_iterator = mlir::CallGraph::iterator;
-  static nodes_iterator nodes_begin(mlir::CallGraph *cg) { return cg->begin(); }
-  static nodes_iterator nodes_end(mlir::CallGraph *cg) { return cg->end(); }
-};
-} // end namespace llvm
-
-#endif // MLIR_ANALYSIS_CALLGRAPH_H
diff --git a/third_party/mlir/include/mlir/Analysis/CallInterfaces.h b/third_party/mlir/include/mlir/Analysis/CallInterfaces.h
deleted file mode 100644
index 0c67e8428ff..00000000000
--- a/third_party/mlir/include/mlir/Analysis/CallInterfaces.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- CallInterfaces.h - Call Interfaces for MLIR --------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains the definitions of the call interfaces defined in
-// `CallInterfaces.td`.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_CALLINTERFACES_H
-#define MLIR_ANALYSIS_CALLINTERFACES_H
-
-#include "mlir/IR/OpDefinition.h"
-#include "llvm/ADT/PointerUnion.h"
-
-namespace mlir {
-
-/// A callable is either a symbol, or an SSA value, that is referenced by a
-/// call-like operation. This represents the destination of the call.
-struct CallInterfaceCallable
-    : public llvm::PointerUnion<SymbolRefAttr, Value *> {
-  using llvm::PointerUnion<SymbolRefAttr, Value *>::PointerUnion;
-};
-
-#include "mlir/Analysis/CallInterfaces.h.inc"
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_CALLINTERFACES_H
diff --git a/third_party/mlir/include/mlir/Analysis/CallInterfaces.td b/third_party/mlir/include/mlir/Analysis/CallInterfaces.td
deleted file mode 100644
index 043f009a8e2..00000000000
--- a/third_party/mlir/include/mlir/Analysis/CallInterfaces.td
+++ /dev/null
@@ -1,93 +0,0 @@
-//===- CallInterfaces.td - Call Interfaces for ops -*- tablegen ---------*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains a set of interfaces that can be used to define information
-// related to call-like and callable operations. Each of which are defined along
-// with the respective interface below.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CALLINTERFACES
-#define MLIR_CALLINTERFACES
-
-include "mlir/IR/OpBase.td"
-
-// `CallInterfaceCallable`: This is a type used to represent a single callable
-// region. A callable is either a symbol, or an SSA value, that is referenced by
-// a call-like operation. This represents the destination of the call.
-
-/// Interface for call-like operations.
-def CallOpInterface : OpInterface<"CallOpInterface"> {
-  let description = [{
-    A call-like operation is one that transfers control from one sub-routine to
-    another. These operations may be traditional direct calls `call @foo`, or
-    indirect calls to other operations `call_indirect %foo`. An operation that
-    uses this interface, must *not* also provide the `CallableOpInterface`.
-  }];
-
-  let methods = [
-    InterfaceMethod<[{
-        Returns the callee of this call-like operation. A `callee` is either a
-        reference to a symbol, via SymbolRefAttr, or a reference to a defined
-        SSA value.
-      }],
-      "CallInterfaceCallable", "getCallableForCallee"
-    >,
-    InterfaceMethod<[{
-        Returns the operands within this call that are used as arguments to the
-        callee.
-      }],
-      "Operation::operand_range", "getArgOperands"
-    >,
-  ];
-}
-
-/// Interface for callable operations.
-def CallableOpInterface : OpInterface<"CallableOpInterface"> {
-  let description = [{
-    A callable operation is one who represents a potential sub-routine, and may
-    be a target for a call-like operation (those providing the CallOpInterface
-    above). These operations may be traditional functional operation
-    `func @foo(...)`, as well as function producing operations
-    `%foo = dialect.create_function(...)`. These operations may produce multiple
-    callable regions, or subroutines.
-  }];
-
-  let methods = [
-    InterfaceMethod<[{
-        Returns a region on the current operation that the given callable refers
-        to. This may return null in the case of an external callable object,
-        e.g. an external function.
-      }],
-      "Region *", "getCallableRegion", (ins "CallInterfaceCallable":$callable)
-    >,
-    InterfaceMethod<[{
-        Returns all of the callable regions of this operation.
-      }],
-      "void", "getCallableRegions",
-              (ins "SmallVectorImpl<Region *> &":$callables)
-    >,
-    InterfaceMethod<[{
-        Returns the results types that the given callable region produces when
-        executed.
-      }],
-      "ArrayRef<Type>", "getCallableResults", (ins "Region *":$callable)
-    >,
-  ];
-}
-
-#endif // MLIR_CALLINTERFACES
diff --git a/third_party/mlir/include/mlir/Analysis/Dominance.h b/third_party/mlir/include/mlir/Analysis/Dominance.h
deleted file mode 100644
index 5e21e6e90d3..00000000000
--- a/third_party/mlir/include/mlir/Analysis/Dominance.h
+++ /dev/null
@@ -1,150 +0,0 @@
-//===- Dominance.h - Dominator analysis for CFGs ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_ANALYSIS_DOMINANCE_H
-#define MLIR_ANALYSIS_DOMINANCE_H
-
-#include "mlir/IR/RegionGraphTraits.h"
-#include "llvm/Support/GenericDomTree.h"
-
-extern template class llvm::DominatorTreeBase<mlir::Block, false>;
-extern template class llvm::DominatorTreeBase<mlir::Block, true>;
-
-namespace mlir {
-using DominanceInfoNode = llvm::DomTreeNodeBase<Block>;
-class Operation;
-
-namespace detail {
-template <bool IsPostDom> class DominanceInfoBase {
-  using base = llvm::DominatorTreeBase<Block, IsPostDom>;
-
-public:
-  DominanceInfoBase(Operation *op) { recalculate(op); }
-  DominanceInfoBase(DominanceInfoBase &&) = default;
-  DominanceInfoBase &operator=(DominanceInfoBase &&) = default;
-
-  DominanceInfoBase(const DominanceInfoBase &) = delete;
-  DominanceInfoBase &operator=(const DominanceInfoBase &) = delete;
-
-  /// Recalculate the dominance info.
-  void recalculate(Operation *op);
-
-  /// Get the root dominance node of the given region.
-  DominanceInfoNode *getRootNode(Region *region) {
-    assert(dominanceInfos.count(region) != 0);
-    return dominanceInfos[region]->getRootNode();
-  }
-
-protected:
-  using super = DominanceInfoBase<IsPostDom>;
-
-  /// Return true if the specified block A properly dominates block B.
-  bool properlyDominates(Block *a, Block *b);
-
-  /// A mapping of regions to their base dominator tree.
-  llvm::DenseMap<Region *, std::unique_ptr<base>> dominanceInfos;
-};
-} // end namespace detail
-
-/// A class for computing basic dominance information.
-class DominanceInfo : public detail::DominanceInfoBase</*IsPostDom=*/false> {
-public:
-  using super::super;
-
-  /// Return true if operation A properly dominates operation B.
-  bool properlyDominates(Operation *a, Operation *b);
-
-  /// Return true if operation A dominates operation B.
-  bool dominates(Operation *a, Operation *b) {
-    return a == b || properlyDominates(a, b);
-  }
-
-  /// Return true if value A properly dominates operation B.
-  bool properlyDominates(Value *a, Operation *b);
-
-  /// Return true if operation A dominates operation B.
-  bool dominates(Value *a, Operation *b) {
-    return (Operation *)a->getDefiningOp() == b || properlyDominates(a, b);
-  }
-
-  /// Return true if the specified block A dominates block B.
-  bool dominates(Block *a, Block *b) {
-    return a == b || properlyDominates(a, b);
-  }
-
-  /// Return true if the specified block A properly dominates block B.
-  bool properlyDominates(Block *a, Block *b) {
-    return super::properlyDominates(a, b);
-  }
-
-  /// Return the dominance node from the Region containing block A.
-  DominanceInfoNode *getNode(Block *a);
-
-  /// Update the internal DFS numbers for the dominance nodes.
-  void updateDFSNumbers();
-};
-
-/// A class for computing basic postdominance information.
-class PostDominanceInfo : public detail::DominanceInfoBase</*IsPostDom=*/true> {
-public:
-  using super::super;
-
-  /// Return true if operation A properly postdominates operation B.
-  bool properlyPostDominates(Operation *a, Operation *b);
-
-  /// Return true if operation A postdominates operation B.
-  bool postDominates(Operation *a, Operation *b) {
-    return a == b || properlyPostDominates(a, b);
-  }
-
-  /// Return true if the specified block A properly postdominates block B.
-  bool properlyPostDominates(Block *a, Block *b) {
-    return super::properlyDominates(a, b);
-  }
-
-  /// Return true if the specified block A postdominates block B.
-  bool postDominates(Block *a, Block *b) {
-    return a == b || properlyPostDominates(a, b);
-  }
-};
-
-} //  end namespace mlir
-
-namespace llvm {
-
-/// DominatorTree GraphTraits specialization so the DominatorTree can be
-/// iterated by generic graph iterators.
-template <> struct GraphTraits<mlir::DominanceInfoNode *> {
-  using ChildIteratorType = mlir::DominanceInfoNode::iterator;
-  using NodeRef = mlir::DominanceInfoNode *;
-
-  static NodeRef getEntryNode(NodeRef N) { return N; }
-  static inline ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
-  static inline ChildIteratorType child_end(NodeRef N) { return N->end(); }
-};
-
-template <> struct GraphTraits<const mlir::DominanceInfoNode *> {
-  using ChildIteratorType = mlir::DominanceInfoNode::const_iterator;
-  using NodeRef = const mlir::DominanceInfoNode *;
-
-  static NodeRef getEntryNode(NodeRef N) { return N; }
-  static inline ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
-  static inline ChildIteratorType child_end(NodeRef N) { return N->end(); }
-};
-
-} // end namespace llvm
-#endif
diff --git a/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.h b/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.h
deleted file mode 100644
index 2d68ada0d13..00000000000
--- a/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===- InferTypeOpInterface.h - Infer Type Interfaces -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains the definitions of the infer op interfaces defined in
-// `InferTypeOpInterface.td`.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_INFERTYPEOPINTERFACE_H_
-#define MLIR_ANALYSIS_INFERTYPEOPINTERFACE_H_
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace mlir {
-
-#include "mlir/Analysis/InferTypeOpInterface.h.inc"
-
-namespace OpTrait {
-template <typename ConcreteType>
-class TypeOpInterfaceDefault
-    : public TraitBase<ConcreteType, TypeOpInterfaceDefault> {
-public:
-  /// Returns whether two arrays are equal as strongest check for compatibility
-  /// by default.
-  static bool isCompatibleReturnTypes(ArrayRef<Type> lhs, ArrayRef<Type> rhs) {
-    return lhs == rhs;
-  };
-};
-} // namespace OpTrait
-
-} // namespace mlir
-
-#endif // MLIR_ANALYSIS_INFERTYPEOPINTERFACE_H_
diff --git a/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.td b/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.td
deleted file mode 100644
index 7f63b2b18bf..00000000000
--- a/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.td
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- InferTypeOpInterface.td - Infer Type interfaces -----*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains a set of interfaces that can be used to define information
-// related to type inference.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_INFERTYPEOPINTERFACE
-#define MLIR_INFERTYPEOPINTERFACE
-
-include "mlir/IR/OpBase.td"
-
-// OpInterface to compute the return type of an operation. The arguments match
-// those in Operation::create with the exception that the location is optional
-// (if no location is provided, then the method will not emit an error on
-// mismatch).
-def InferTypeOpInterface : OpInterface<"InferTypeOpInterface"> {
-  let description = [{
-    Interface to access a registered method to infer the return types for an
-    operation that could be used during op construction, verification or
-    type inference.
-  }];
-
-  let methods = [
-    StaticInterfaceMethod<
-      /*desc=*/[{Infer the return types that an op would generate.
-
-      The method takes an optional location which, if set, will be used to
-      report errors on. The operands and attributes correspond to those with
-      which an Operation would be created (e.g., as used in Operation::create)
-      and the regions of the op.
-      }],
-      /*retTy=*/"LogicalResult",
-      /*methodName=*/"inferReturnTypes",
-      /*args=*/(ins "llvm::Optional<Location>":$location,
-                    "ValueRange":$operands,
-                    "ArrayRef<NamedAttribute>":$attributes,
-                    "RegionRange":$regions,
-                    "SmallVectorImpl<Type>&":$inferedReturnTypes)
-    >,
-    StaticInterfaceMethod<
-      /*desc=*/"Returns whether two array of types are compatible result types"
-               " for an op.",
-      /*retTy=*/"bool",
-      /*methodName=*/"isCompatibleReturnTypes",
-      /*args=*/(ins "ArrayRef<Type>":$lhs, "ArrayRef<Type>":$rhs),
-      [{
-        return ConcreteOp::isCompatibleReturnTypes(lhs, rhs);
-      }]
-    >,
-  ];
-}
-
-// Default implementations for some of the interface methods above:
-// - compatibleReturnTypes returns whether strictly true.
-def InferTypeOpInterfaceDefault : NativeOpTrait<"TypeOpInterfaceDefault">;
-
-#endif // MLIR_INFERTYPEOPINTERFACE
diff --git a/third_party/mlir/include/mlir/Analysis/Liveness.h b/third_party/mlir/include/mlir/Analysis/Liveness.h
deleted file mode 100644
index 0bdb474fd92..00000000000
--- a/third_party/mlir/include/mlir/Analysis/Liveness.h
+++ /dev/null
@@ -1,157 +0,0 @@
-//===- Liveness.h - Liveness analysis for MLIR ------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains an analysis for computing liveness information from a
-// given top-level operation. The current version of the analysis uses a
-// traditional algorithm to resolve detailed live-range information about all
-// values within the specified regions. It is also possible to query liveness
-// information on block level.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_LIVENESS_H
-#define MLIR_ANALYSIS_LIVENESS_H
-
-#include <vector>
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-
-namespace mlir {
-
-class Block;
-class LivenessBlockInfo;
-class Operation;
-class Region;
-class Value;
-
-/// Represents an analysis for computing liveness information from a
-/// given top-level operation. The analysis iterates over all associated
-/// regions that are attached to the given top-level operation. It
-/// computes liveness information for every value and block that are
-/// included in the mentioned regions. It relies on a fixpoint iteration
-/// to compute all live-in and live-out values of all included blocks.
-/// Sample usage:
-///   Liveness liveness(topLevelOp);
-///   auto &allInValues = liveness.getLiveIn(block);
-///   auto &allOutValues = liveness.getLiveOut(block);
-///   auto allOperationsInWhichValueIsLive = liveness.resolveLiveness(value);
-///   bool lastUse = liveness.isLastUse(value, operation);
-class Liveness {
-public:
-  using OperationListT = std::vector<Operation *>;
-  using BlockMapT = DenseMap<Block *, LivenessBlockInfo>;
-  using ValueSetT = SmallPtrSet<Value *, 16>;
-
-public:
-  /// Creates a new Liveness analysis that computes liveness
-  /// information for all associated regions.
-  Liveness(Operation *op);
-
-  /// Returns the operation this analysis was constructed from.
-  Operation *getOperation() const { return operation; }
-
-  /// Gets liveness info (if any) for the given value.
-  /// This includes all operations in which the given value is live.
-  /// Note that the operations in this list are not ordered and the current
-  /// implementation is computationally expensive (as it iterates over all
-  /// blocks in which the given value is live).
-  OperationListT resolveLiveness(Value *value) const;
-
-  /// Gets liveness info (if any) for the block.
-  const LivenessBlockInfo *getLiveness(Block *block) const;
-
-  /// Returns a reference to a set containing live-in values (unordered).
-  const ValueSetT &getLiveIn(Block *block) const;
-
-  /// Returns a reference to a set containing live-out values (unordered).
-  const ValueSetT &getLiveOut(Block *block) const;
-
-  /// Returns true if the given operation represent the last use of the
-  /// given value.
-  bool isLastUse(Value *value, Operation *operation) const;
-
-  /// Dumps the liveness information in a human readable format.
-  void dump() const;
-
-  /// Dumps the liveness information to the given stream.
-  void print(raw_ostream &os) const;
-
-private:
-  /// Initializes the internal mappings.
-  void build(MutableArrayRef<Region> regions);
-
-private:
-  /// The operation this analysis was constructed from.
-  Operation *operation;
-
-  /// Maps blocks to internal liveness information.
-  BlockMapT blockMapping;
-};
-
-/// This class represents liveness information on block level.
-class LivenessBlockInfo {
-public:
-  /// A typedef declaration of a value set.
-  using ValueSetT = Liveness::ValueSetT;
-
-public:
-  /// Returns the underlying block.
-  Block *getBlock() const { return block; }
-
-  /// Returns all values that are live at the beginning
-  /// of the block (unordered).
-  const ValueSetT &in() const { return inValues; }
-
-  /// Returns all values that are live at the end
-  /// of the block (unordered).
-  const ValueSetT &out() const { return outValues; }
-
-  /// Returns true if the given value is in the live-in set.
-  bool isLiveIn(Value *value) const;
-
-  /// Returns true if the given value is in the live-out set.
-  bool isLiveOut(Value *value) const;
-
-  /// Gets the start operation for the given value. This is the first operation
-  /// the given value is considered to be live. This could either be the start
-  /// operation of the current block (in case the value is live-in) or the
-  /// operation that defines the given value (must be referenced in this block).
-  Operation *getStartOperation(Value *value) const;
-
-  /// Gets the end operation for the given value using the start operation
-  /// provided (must be referenced in this block).
-  Operation *getEndOperation(Value *value, Operation *startOperation) const;
-
-private:
-  /// The underlying block.
-  Block *block;
-
-  /// The set of all live in values.
-  ValueSetT inValues;
-
-  /// The set of all live out values.
-  ValueSetT outValues;
-
-  friend class Liveness;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_LIVENESS_H
diff --git a/third_party/mlir/include/mlir/Analysis/LoopAnalysis.h b/third_party/mlir/include/mlir/Analysis/LoopAnalysis.h
deleted file mode 100644
index 140d9e91719..00000000000
--- a/third_party/mlir/include/mlir/Analysis/LoopAnalysis.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===- LoopAnalysis.h - loop analysis methods -------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes for methods to analyze loops.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_LOOP_ANALYSIS_H
-#define MLIR_ANALYSIS_LOOP_ANALYSIS_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
-
-namespace mlir {
-
-class AffineExpr;
-class AffineForOp;
-class AffineMap;
-class MemRefType;
-class NestedPattern;
-class Operation;
-class Value;
-
-/// Returns the trip count of the loop as an affine map with its corresponding
-/// operands if the latter is expressible as an affine expression, and nullptr
-/// otherwise. This method always succeeds as long as the lower bound is not a
-/// multi-result map. The trip count expression is simplified before returning.
-/// This method only utilizes map composition to construct lower and upper
-/// bounds before computing the trip count expressions
-// TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
-// pure analysis method relying on FlatAffineConstraints
-void buildTripCountMapAndOperands(AffineForOp forOp, AffineMap *map,
-                                  SmallVectorImpl<Value *> *operands);
-
-/// Returns the trip count of the loop if it's a constant, None otherwise. This
-/// uses affine expression analysis and is able to determine constant trip count
-/// in non-trivial cases.
-llvm::Optional<uint64_t> getConstantTripCount(AffineForOp forOp);
-
-/// Returns the greatest known integral divisor of the trip count. Affine
-/// expression analysis is used (indirectly through getTripCount), and
-/// this method is thus able to determine non-trivial divisors.
-uint64_t getLargestDivisorOfTripCount(AffineForOp forOp);
-
-/// Given an induction variable `iv` of type AffineForOp and `indices` of type
-/// IndexType, returns the set of `indices` that are independent of `iv`.
-///
-/// Prerequisites (inherited from `isAccessInvariant` above):
-///   1. `iv` and `indices` of the proper type;
-///   2. at most one affine.apply is reachable from each index in `indices`;
-///
-/// Emits a note if it encounters a chain of affine.apply and conservatively
-///  those cases.
-llvm::DenseSet<Value *, llvm::DenseMapInfo<Value *>>
-getInvariantAccesses(Value *iv, llvm::ArrayRef<Value *> indices);
-
-using VectorizableLoopFun = std::function<bool(AffineForOp)>;
-
-/// Checks whether the loop is structurally vectorizable; i.e.:
-///   1. no conditionals are nested under the loop;
-///   2. all nested load/stores are to scalar MemRefs.
-/// TODO(ntv): relax the no-conditionals restriction
-bool isVectorizableLoopBody(AffineForOp loop,
-                            NestedPattern &vectorTransferMatcher);
-
-/// Checks whether the loop is structurally vectorizable and that all the LoadOp
-/// and StoreOp matched have access indexing functions that are are either:
-///   1. invariant along the loop induction variable created by 'loop';
-///   2. varying along at most one memory dimension. If such a unique dimension
-///      is found, it is written into `memRefDim`.
-bool isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
-                            NestedPattern &vectorTransferMatcher);
-
-/// Checks where SSA dominance would be violated if a for op's body
-/// operations are shifted by the specified shifts. This method checks if a
-/// 'def' and all its uses have the same shift factor.
-// TODO(mlir-team): extend this to check for memory-based dependence
-// violation when we have the support.
-bool isInstwiseShiftValid(AffineForOp forOp, llvm::ArrayRef<uint64_t> shifts);
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_LOOP_ANALYSIS_H
diff --git a/third_party/mlir/include/mlir/Analysis/NestedMatcher.h b/third_party/mlir/include/mlir/Analysis/NestedMatcher.h
deleted file mode 100644
index 9af26e8842a..00000000000
--- a/third_party/mlir/include/mlir/Analysis/NestedMatcher.h
+++ /dev/null
@@ -1,196 +0,0 @@
-//===- NestedMacher.h - Nested matcher for Function -------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_ANALYSIS_MLFUNCTIONMATCHER_H_
-#define MLIR_ANALYSIS_MLFUNCTIONMATCHER_H_
-
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Operation.h"
-#include "llvm/Support/Allocator.h"
-
-namespace mlir {
-
-class NestedPattern;
-class Operation;
-
-/// An NestedPattern captures nested patterns in the IR.
-/// It is used in conjunction with a scoped NestedPatternContext which is an
-/// llvm::BumpPtrAllocator that handles memory allocations efficiently and
-/// avoids ownership issues.
-///
-/// In order to use NestedPatterns, first create a scoped context.
-/// When the context goes out of scope, everything is freed.
-/// This design simplifies the API by avoiding references to the context and
-/// makes it clear that references to matchers must not escape.
-///
-/// Example:
-///   {
-///      NestedPatternContext context;
-///      auto gemmLike = Doall(Doall(Red(LoadStores())));
-///      auto matches = gemmLike.match(f);
-///      // do work on matches
-///   }  // everything is freed
-///
-///
-/// Nested abstraction for matching results.
-/// Provides access to the nested Operation* captured by a Matcher.
-///
-/// A NestedMatch contains an Operation* and the children NestedMatch and is
-/// thus cheap to copy. NestedMatch is stored in a scoped bumper allocator whose
-/// lifetime is managed by an RAII NestedPatternContext.
-class NestedMatch {
-public:
-  static NestedMatch build(Operation *operation,
-                           ArrayRef<NestedMatch> nestedMatches);
-  NestedMatch(const NestedMatch &) = default;
-  NestedMatch &operator=(const NestedMatch &) = default;
-
-  explicit operator bool() { return matchedOperation != nullptr; }
-
-  Operation *getMatchedOperation() { return matchedOperation; }
-  ArrayRef<NestedMatch> getMatchedChildren() { return matchedChildren; }
-
-private:
-  friend class NestedPattern;
-  friend class NestedPatternContext;
-
-  /// Underlying global bump allocator managed by a NestedPatternContext.
-  static llvm::BumpPtrAllocator *&allocator();
-
-  NestedMatch() = default;
-
-  /// Payload, holds a NestedMatch and all its children along this branch.
-  Operation *matchedOperation;
-  ArrayRef<NestedMatch> matchedChildren;
-};
-
-/// A NestedPattern is a nested operation walker that:
-///   1. recursively matches a substructure in the tree;
-///   2. uses a filter function to refine matches with extra semantic
-///      constraints (passed via a lambda of type FilterFunctionType);
-///   3. TODO(ntv) optionally applies actions (lambda).
-///
-/// Nested patterns are meant to capture imperfectly nested loops while matching
-/// properties over the whole loop nest. For instance, in vectorization we are
-/// interested in capturing all the imperfectly nested loops of a certain type
-/// and such that all the load and stores have certain access patterns along the
-/// loops' induction variables). Such NestedMatches are first captured using the
-/// `match` function and are later processed to analyze properties and apply
-/// transformations in a non-greedy way.
-///
-/// The NestedMatches captured in the IR can grow large, especially after
-/// aggressive unrolling. As experience has shown, it is generally better to use
-/// a plain walk over operations to match flat patterns but the current
-/// implementation is competitive nonetheless.
-using FilterFunctionType = std::function<bool(Operation &)>;
-inline bool defaultFilterFunction(Operation &) { return true; }
-class NestedPattern {
-public:
-  NestedPattern(ArrayRef<NestedPattern> nested,
-                FilterFunctionType filter = defaultFilterFunction);
-  NestedPattern(const NestedPattern &) = default;
-  NestedPattern &operator=(const NestedPattern &) = default;
-
-  /// Returns all the top-level matches in `func`.
-  void match(FuncOp func, SmallVectorImpl<NestedMatch> *matches) {
-    func.walk([&](Operation *op) { matchOne(op, matches); });
-  }
-
-  /// Returns all the top-level matches in `op`.
-  void match(Operation *op, SmallVectorImpl<NestedMatch> *matches) {
-    op->walk([&](Operation *child) { matchOne(child, matches); });
-  }
-
-  /// Returns the depth of the pattern.
-  unsigned getDepth() const;
-
-private:
-  friend class NestedPatternContext;
-  friend class NestedMatch;
-  friend struct State;
-
-  /// Underlying global bump allocator managed by a NestedPatternContext.
-  static llvm::BumpPtrAllocator *&allocator();
-
-  /// Matches this pattern against a single `op` and fills matches with the
-  /// result.
-  void matchOne(Operation *op, SmallVectorImpl<NestedMatch> *matches);
-
-  /// Nested patterns to be matched.
-  ArrayRef<NestedPattern> nestedPatterns;
-
-  /// Extra filter function to apply to prune patterns as the IR is walked.
-  FilterFunctionType filter;
-
-  /// skip is an implementation detail needed so that we can implement match
-  /// without switching on the type of the Operation. The idea is that a
-  /// NestedPattern first checks if it matches locally and then recursively
-  /// applies its nested matchers to its elem->nested. Since we want to rely on
-  /// the existing operation walking functionality rather than duplicate
-  /// it, we allow an off-by-one traversal to account for the fact that we
-  /// write:
-  ///
-  ///  void match(Operation *elem) {
-  ///    for (auto &c : getNestedPatterns()) {
-  ///      NestedPattern childPattern(...);
-  ///                                  ^~~~ Needs off-by-one skip.
-  ///
-  Operation *skip;
-};
-
-/// RAII structure to transparently manage the bump allocator for
-/// NestedPattern and NestedMatch classes. This avoids passing a context to
-/// all the API functions.
-class NestedPatternContext {
-public:
-  NestedPatternContext() {
-    assert(NestedMatch::allocator() == nullptr &&
-           "Only a single NestedPatternContext is supported");
-    assert(NestedPattern::allocator() == nullptr &&
-           "Only a single NestedPatternContext is supported");
-    NestedMatch::allocator() = &allocator;
-    NestedPattern::allocator() = &allocator;
-  }
-  ~NestedPatternContext() {
-    NestedMatch::allocator() = nullptr;
-    NestedPattern::allocator() = nullptr;
-  }
-  llvm::BumpPtrAllocator allocator;
-};
-
-namespace matcher {
-// Syntactic sugar NestedPattern builder functions.
-NestedPattern Op(FilterFunctionType filter = defaultFilterFunction);
-NestedPattern If(NestedPattern child);
-NestedPattern If(FilterFunctionType filter, NestedPattern child);
-NestedPattern If(ArrayRef<NestedPattern> nested = {});
-NestedPattern If(FilterFunctionType filter,
-                 ArrayRef<NestedPattern> nested = {});
-NestedPattern For(NestedPattern child);
-NestedPattern For(FilterFunctionType filter, NestedPattern child);
-NestedPattern For(ArrayRef<NestedPattern> nested = {});
-NestedPattern For(FilterFunctionType filter,
-                  ArrayRef<NestedPattern> nested = {});
-
-bool isParallelLoop(Operation &op);
-bool isReductionLoop(Operation &op);
-bool isLoadOrStore(Operation &op);
-
-} // end namespace matcher
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_MLFUNCTIONMATCHER_H_
diff --git a/third_party/mlir/include/mlir/Analysis/Passes.h b/third_party/mlir/include/mlir/Analysis/Passes.h
deleted file mode 100644
index b233ab5f209..00000000000
--- a/third_party/mlir/include/mlir/Analysis/Passes.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes that expose pass constructors in the
-// analysis library.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_PASSES_H
-#define MLIR_ANALYSIS_PASSES_H
-
-#include "mlir/Support/LLVM.h"
-#include <memory>
-
-namespace mlir {
-
-class FuncOp;
-template <typename T> class OpPassBase;
-
-/// Creates a pass to check memref accesses in a Function.
-std::unique_ptr<OpPassBase<FuncOp>> createMemRefBoundCheckPass();
-
-/// Creates a pass to check memref access dependences in a Function.
-std::unique_ptr<OpPassBase<FuncOp>> createTestMemRefDependenceCheckPass();
-
-/// Creates a pass to test parallelism detection; emits note for parallel loops.
-std::unique_ptr<OpPassBase<FuncOp>> createParallelismDetectionTestPass();
-
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_PASSES_H
diff --git a/third_party/mlir/include/mlir/Analysis/SliceAnalysis.h b/third_party/mlir/include/mlir/Analysis/SliceAnalysis.h
deleted file mode 100644
index ad6b65387be..00000000000
--- a/third_party/mlir/include/mlir/Analysis/SliceAnalysis.h
+++ /dev/null
@@ -1,215 +0,0 @@
-//===- SliceAnalysis.h - Analysis for Transitive UseDef chains --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_ANALYSIS_SLICEANALYSIS_H_
-#define MLIR_ANALYSIS_SLICEANALYSIS_H_
-
-#include <functional>
-#include <vector>
-
-#include "mlir/Support/LLVM.h"
-
-#include "llvm/ADT/SetVector.h"
-
-namespace mlir {
-
-class Operation;
-
-/// Type of the condition to limit the propagation of transitive use-defs.
-/// This can be used in particular to limit the propagation to a given Scope or
-/// to avoid passing through certain types of operation in a configurable
-/// manner.
-using TransitiveFilter = std::function<bool(Operation *)>;
-
-/// Fills `forwardSlice` with the computed forward slice (i.e. all
-/// the transitive uses of op), **without** including that operation.
-///
-/// This additionally takes a TransitiveFilter which acts as a frontier:
-/// when looking at uses transitively, a operation that does not pass the
-/// filter is never propagated through. This allows in particular to carve out
-/// the scope within a ForInst or the scope within an IfInst.
-///
-/// The implementation traverses the use chains in postorder traversal for
-/// efficiency reasons: if a operation is already in `forwardSlice`, no
-/// need to traverse its uses again. Since use-def chains form a DAG, this
-/// terminates.
-///
-/// Upon return to the root call, `forwardSlice` is filled with a
-/// postorder list of uses (i.e. a reverse topological order). To get a proper
-/// topological order, we just just reverse the order in `forwardSlice` before
-/// returning.
-///
-/// Example starting from node 0
-/// ============================
-///
-///               0
-///    ___________|___________
-///    1       2      3      4
-///    |_______|      |______|
-///    |   |             |
-///    |   5             6
-///    |___|_____________|
-///      |               |
-///      7               8
-///      |_______________|
-///              |
-///              9
-///
-/// Assuming all local orders match the numbering order:
-/// 1. after getting back to the root getForwardSlice, `forwardSlice` may
-///    contain:
-///      {9, 7, 8, 5, 1, 2, 6, 3, 4}
-/// 2. reversing the result of 1. gives:
-///      {4, 3, 6, 2, 1, 5, 8, 7, 9}
-///
-void getForwardSlice(
-    Operation *op, llvm::SetVector<Operation *> *forwardSlice,
-    TransitiveFilter filter = /* pass-through*/
-    [](Operation *) { return true; });
-
-/// Fills `backwardSlice` with the computed backward slice (i.e.
-/// all the transitive defs of op), **without** including that operation.
-///
-/// This additionally takes a TransitiveFilter which acts as a frontier:
-/// when looking at defs transitively, a operation that does not pass the
-/// filter is never propagated through. This allows in particular to carve out
-/// the scope within a ForInst or the scope within an IfInst.
-///
-/// The implementation traverses the def chains in postorder traversal for
-/// efficiency reasons: if a operation is already in `backwardSlice`, no
-/// need to traverse its definitions again. Since useuse-def chains form a DAG,
-/// this terminates.
-///
-/// Upon return to the root call, `backwardSlice` is filled with a
-/// postorder list of defs. This happens to be a topological order, from the
-/// point of view of the use-def chains.
-///
-/// Example starting from node 8
-/// ============================
-///
-///    1       2      3      4
-///    |_______|      |______|
-///    |   |             |
-///    |   5             6
-///    |___|_____________|
-///      |               |
-///      7               8
-///      |_______________|
-///              |
-///              9
-///
-/// Assuming all local orders match the numbering order:
-///    {1, 2, 5, 3, 4, 6}
-///
-void getBackwardSlice(
-    Operation *op, llvm::SetVector<Operation *> *backwardSlice,
-    TransitiveFilter filter = /* pass-through*/
-    [](Operation *) { return true; });
-
-/// Iteratively computes backward slices and forward slices until
-/// a fixed point is reached. Returns an `llvm::SetVector<Operation *>` which
-/// **includes** the original operation.
-///
-/// This allows building a slice (i.e. multi-root DAG where everything
-/// that is reachable from an Value in forward and backward direction is
-/// contained in the slice).
-/// This is the abstraction we need to materialize all the operations for
-/// supervectorization without worrying about orderings and Value
-/// replacements.
-///
-/// Example starting from any node
-/// ==============================
-///
-///    1       2      3      4
-///    |_______|      |______|
-///    |   |             |   |
-///    |   5             6___|
-///    |___|_____________|   |
-///      |               |   |
-///      7               8   |
-///      |_______________|   |
-///              |           |
-///              9          10
-///
-/// Return the whole DAG in some topological order.
-///
-/// The implementation works by just filling up a worklist with iterative
-/// alternate calls to `getBackwardSlice` and `getForwardSlice`.
-///
-/// The following section describes some additional implementation
-/// considerations for a potentially more efficient implementation but they are
-/// just an intuition without proof, we still use a worklist for now.
-///
-/// Additional implementation considerations
-/// ========================================
-/// Consider the defs-op-uses hourglass.
-///    ____
-///    \  /  defs (in some topological order)
-///     \/
-///     op
-///     /\
-///    /  \  uses (in some topological order)
-///   /____\
-///
-/// We want to iteratively apply `getSlice` to construct the whole
-/// list of Operation that are reachable by (use|def)+ from op.
-/// We want the resulting slice in topological order.
-/// Ideally we would like the ordering to be maintained in-place to avoid
-/// copying Operation at each step. Keeping this ordering by construction
-/// seems very unclear, so we list invariants in the hope of seeing whether
-/// useful properties pop up.
-///
-/// In the following:
-///   we use |= for set inclusion;
-///   we use << for set topological ordering (i.e. each pair is ordered).
-///
-/// Assumption:
-/// ===========
-/// We wish to maintain the following property by a recursive argument:
-///   """
-///      defs << {op} <<uses are in topological order.
-///   """
-/// The property clearly holds for 0 and 1-sized uses and defs;
-///
-/// Invariants:
-///   2. defs and uses are in topological order internally, by construction;
-///   3. for any {x} |= defs, defs(x) |= defs;    because all go through op
-///   4. for any {x} |= uses,    defs |= defs(x); because all go through op
-///   5. for any {x} |= defs,    uses |= uses(x); because all go through op
-///   6. for any {x} |= uses, uses(x) |= uses;    because all go through op
-///
-/// Intuitively, we should be able to recurse like:
-///   preorder(defs) - op - postorder(uses)
-/// and keep things ordered but this is still hand-wavy and not worth the
-/// trouble for now: punt to a simple worklist-based solution.
-///
-llvm::SetVector<Operation *> getSlice(
-    Operation *op,
-    TransitiveFilter backwardFilter = /* pass-through*/
-    [](Operation *) { return true; },
-    TransitiveFilter forwardFilter = /* pass-through*/
-    [](Operation *) { return true; });
-
-/// Multi-root DAG topological sort.
-/// Performs a topological sort of the Operation in the `toSort` SetVector.
-/// Returns a topologically sorted SetVector.
-llvm::SetVector<Operation *>
-topologicalSort(const llvm::SetVector<Operation *> &toSort);
-
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_SLICEANALYSIS_H_
diff --git a/third_party/mlir/include/mlir/Analysis/Utils.h b/third_party/mlir/include/mlir/Analysis/Utils.h
deleted file mode 100644
index cffa222154f..00000000000
--- a/third_party/mlir/include/mlir/Analysis/Utils.h
+++ /dev/null
@@ -1,304 +0,0 @@
-//===- Utils.h - General analysis utilities ---------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes for various transformation utilities for
-// memref's and non-loop IR structures. These are not passes by themselves but
-// are used either by passes, optimization sequences, or in turn by other
-// transformation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_ANALYSIS_UTILS_H
-#define MLIR_ANALYSIS_UTILS_H
-
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Location.h"
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/SmallVector.h"
-#include <memory>
-
-namespace mlir {
-
-class AffineForOp;
-class Block;
-class FlatAffineConstraints;
-class Location;
-struct MemRefAccess;
-class Operation;
-class Value;
-
-/// Populates 'loops' with IVs of the loops surrounding 'op' ordered from
-/// the outermost 'affine.for' operation to the innermost one.
-//  TODO(bondhugula): handle 'affine.if' ops.
-void getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops);
-
-/// Returns the nesting depth of this operation, i.e., the number of loops
-/// surrounding this operation.
-unsigned getNestingDepth(Operation &op);
-
-/// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
-/// at 'forOp'.
-void getSequentialLoops(AffineForOp forOp,
-                        llvm::SmallDenseSet<Value *, 8> *sequentialLoops);
-
-/// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their
-/// associated operands for a set of loops within a loop nest (typically the
-/// set of loops surrounding a store operation). Loop bound AffineMaps which
-/// are non-null represent slices of that loop's iteration space.
-struct ComputationSliceState {
-  // List of sliced loop IVs (ordered from outermost to innermost).
-  // EX: 'ivs[i]' has lower bound 'lbs[i]' and upper bound 'ubs[i]'.
-  SmallVector<Value *, 4> ivs;
-  // List of lower bound AffineMaps.
-  SmallVector<AffineMap, 4> lbs;
-  // List of upper bound AffineMaps.
-  SmallVector<AffineMap, 4> ubs;
-  // List of lower bound operands (lbOperands[i] are used by 'lbs[i]').
-  std::vector<SmallVector<Value *, 4>> lbOperands;
-  // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
-  std::vector<SmallVector<Value *, 4>> ubOperands;
-  // Slice loop nest insertion point in target loop nest.
-  Block::iterator insertPoint;
-  // Adds to 'cst' with constraints which represent the slice bounds on 'ivs'
-  // in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim
-  // identifiers and the values in 'lb/ubOperands' are added as symbols.
-  // Constraints are added for all loop IV bounds (dim or symbol), and
-  // constraints are added for slice bounds in 'lbs'/'ubs'.
-  // Returns failure if we cannot add loop bounds because of unsupported cases.
-  LogicalResult getAsConstraints(FlatAffineConstraints *cst);
-
-  // Clears all bounds and operands in slice state.
-  void clearBounds();
-};
-
-/// Computes the computation slice loop bounds for one loop nest as affine maps
-/// of the other loop nest's IVs and symbols, using 'dependenceConstraints'
-/// computed between 'depSourceAccess' and 'depSinkAccess'.
-/// If 'isBackwardSlice' is true, a backwards slice is computed in which the
-/// slice bounds of loop nest surrounding 'depSourceAccess' are computed in
-/// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess'
-/// at 'loopDepth'.
-/// If 'isBackwardSlice' is false, a forward slice is computed in which the
-/// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms
-/// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at
-/// 'loopDepth'.
-/// The slice loop bounds and associated operands are returned in 'sliceState'.
-//
-//  Backward slice example:
-//
-//    affine.for %i0 = 0 to 10 {
-//      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
-//    }
-//    affine.for %i1 = 0 to 10 {
-//      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
-//    }
-//
-//    // Backward computation slice of loop nest '%i0'.
-//    affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) {
-//      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
-//    }
-//
-//  Forward slice example:
-//
-//    affine.for %i0 = 0 to 10 {
-//      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
-//    }
-//    affine.for %i1 = 0 to 10 {
-//      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
-//    }
-//
-//    // Forward computation slice of loop nest '%i1'.
-//    affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) {
-//      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
-//    }
-//
-void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp,
-                              FlatAffineConstraints *dependenceConstraints,
-                              unsigned loopDepth, bool isBackwardSlice,
-                              ComputationSliceState *sliceState);
-
-/// Computes in 'sliceUnion' the union of all slice bounds computed at
-/// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'.
-/// The parameter 'numCommonLoops' is the number of loops common to the
-/// operations in 'opsA' and 'opsB'.
-/// If 'isBackwardSlice' is true, computes slice bounds for loop nest
-/// surrounding ops in 'opsA', as a function of IVs and symbols of loop nest
-/// surrounding ops in 'opsB' at 'loopDepth'.
-/// If 'isBackwardSlice' is false, computes slice bounds for loop nest
-/// surrounding ops in 'opsB', as a function of IVs and symbols of loop nest
-/// surrounding ops in 'opsA' at 'loopDepth'.
-/// Returns 'success' if union was computed, 'failure' otherwise.
-// TODO(andydavis) Change this API to take 'forOpA'/'forOpB'.
-LogicalResult computeSliceUnion(ArrayRef<Operation *> opsA,
-                                ArrayRef<Operation *> opsB, unsigned loopDepth,
-                                unsigned numCommonLoops, bool isBackwardSlice,
-                                ComputationSliceState *sliceUnion);
-
-/// Creates a clone of the computation contained in the loop nest surrounding
-/// 'srcOpInst', slices the iteration space of src loop based on slice bounds
-/// in 'sliceState', and inserts the computation slice at the beginning of the
-/// operation block of the loop at 'dstLoopDepth' in the loop nest surrounding
-/// 'dstOpInst'. Returns the top-level loop of the computation slice on
-/// success, returns nullptr otherwise.
-// Loop depth is a crucial optimization choice that determines where to
-// materialize the results of the backward slice - presenting a trade-off b/w
-// storage and redundant computation in several cases.
-// TODO(andydavis) Support computation slices with common surrounding loops.
-AffineForOp insertBackwardComputationSlice(Operation *srcOpInst,
-                                           Operation *dstOpInst,
-                                           unsigned dstLoopDepth,
-                                           ComputationSliceState *sliceState);
-
-/// A region of a memref's data space; this is typically constructed by
-/// analyzing load/store op's on this memref and the index space of loops
-/// surrounding such op's.
-// For example, the memref region for a load operation at loop depth = 1:
-//
-//    affine.for %i = 0 to 32 {
-//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
-//        affine.load %A[%ii]
-//      }
-//    }
-//
-// Region:  {memref = %A, write = false, {%i <= m0 <= %i + 7} }
-// The last field is a 2-d FlatAffineConstraints symbolic in %i.
-//
-struct MemRefRegion {
-  explicit MemRefRegion(Location loc) : loc(loc) {}
-
-  /// Computes the memory region accessed by this memref with the region
-  /// represented as constraints symbolic/parametric in 'loopDepth' loops
-  /// surrounding opInst. The computed region's 'cst' field has exactly as many
-  /// dimensional identifiers as the rank of the memref, and *potentially*
-  /// additional symbolic identifiers which could include any of the loop IVs
-  /// surrounding opInst up until 'loopDepth' and another additional Function
-  /// symbols involved with the access (for eg., those appear in affine.apply's,
-  /// loop bounds, etc.). If 'sliceState' is non-null, operands from
-  /// 'sliceState' are added as symbols, and the following constraints are added
-  /// to the system:
-  /// *) Inequality constraints which represent loop bounds for 'sliceState'
-  ///    operands which are loop IVS (these represent the destination loop IVs
-  ///    of the slice, and are added as symbols to MemRefRegion's constraint
-  ///    system).
-  /// *) Inequality constraints for the slice bounds in 'sliceState', which
-  ///    represent the bounds on the loop IVs in this constraint system w.r.t
-  ///    to slice operands (which correspond to symbols).
-  /// If 'addMemRefDimBounds' is true, constant upper/lower bounds
-  /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'.
-  ///
-  ///  For example, the memref region for this operation at loopDepth = 1 will
-  ///  be:
-  ///
-  ///    affine.for %i = 0 to 32 {
-  ///      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
-  ///        load %A[%ii]
-  ///      }
-  ///    }
-  ///
-  ///   {memref = %A, write = false, {%i <= m0 <= %i + 7} }
-  /// The last field is a 2-d FlatAffineConstraints symbolic in %i.
-  ///
-  LogicalResult compute(Operation *op, unsigned loopDepth,
-                        ComputationSliceState *sliceState = nullptr,
-                        bool addMemRefDimBounds = true);
-
-  FlatAffineConstraints *getConstraints() { return &cst; }
-  const FlatAffineConstraints *getConstraints() const { return &cst; }
-  bool isWrite() const { return write; }
-  void setWrite(bool flag) { write = flag; }
-
-  /// Returns a constant upper bound on the number of elements in this region if
-  /// bounded by a known constant (always possible for static shapes), None
-  /// otherwise. Note that the symbols of the region are treated specially,
-  /// i.e., the returned bounding constant holds for *any given* value of the
-  /// symbol identifiers. The 'shape' vector is set to the corresponding
-  /// dimension-wise bounds major to minor. We use int64_t instead of uint64_t
-  /// since index types can be at most int64_t.
-  Optional<int64_t> getConstantBoundingSizeAndShape(
-      SmallVectorImpl<int64_t> *shape = nullptr,
-      std::vector<SmallVector<int64_t, 4>> *lbs = nullptr,
-      SmallVectorImpl<int64_t> *lbDivisors = nullptr) const;
-
-  /// A wrapper around FlatAffineConstraints::getConstantBoundOnDimSize(). 'pos'
-  /// corresponds to the position of the memref shape's dimension (major to
-  /// minor) which matches 1:1 with the dimensional identifier positions in
-  //'cst'.
-  Optional<int64_t>
-  getConstantBoundOnDimSize(unsigned pos,
-                            SmallVectorImpl<int64_t> *lb = nullptr,
-                            int64_t *lbFloorDivisor = nullptr) const {
-    assert(pos < getRank() && "invalid position");
-    return cst.getConstantBoundOnDimSize(pos, lb);
-  }
-
-  /// Returns the size of this MemRefRegion in bytes.
-  Optional<int64_t> getRegionSize();
-
-  // Wrapper around FlatAffineConstraints::unionBoundingBox.
-  LogicalResult unionBoundingBox(const MemRefRegion &other);
-
-  /// Returns the rank of the memref that this region corresponds to.
-  unsigned getRank() const;
-
-  /// Memref that this region corresponds to.
-  Value *memref;
-
-  /// Read or write.
-  bool write;
-
-  /// If there is more than one load/store op associated with the region, the
-  /// location information would correspond to one of those op's.
-  Location loc;
-
-  /// Region (data space) of the memref accessed. This set will thus have at
-  /// least as many dimensional identifiers as the shape dimensionality of the
-  /// memref, and these are the leading dimensions of the set appearing in that
-  /// order (major to minor / outermost to innermost). There may be additional
-  /// identifiers since getMemRefRegion() is called with a specific loop depth,
-  /// and thus the region is symbolic in the outer surrounding loops at that
-  /// depth.
-  // TODO(bondhugula): Replace this to exploit HyperRectangularSet.
-  FlatAffineConstraints cst;
-};
-
-/// Returns the size of memref data in bytes if it's statically shaped, None
-/// otherwise.
-Optional<uint64_t> getMemRefSizeInBytes(MemRefType memRefType);
-
-/// Checks a load or store op for an out of bound access; returns failure if the
-/// access is out of bounds along any of the dimensions, success otherwise.
-/// Emits a diagnostic error (with location information) if emitError is true.
-template <typename LoadOrStoreOpPointer>
-LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
-                                      bool emitError = true);
-
-/// Returns the number of surrounding loops common to both A and B.
-unsigned getNumCommonSurroundingLoops(Operation &A, Operation &B);
-
-/// Gets the memory footprint of all data touched in the specified memory space
-/// in bytes; if the memory space is unspecified, considers all memory spaces.
-Optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp,
-                                          int memorySpace = -1);
-
-/// Returns true if `forOp' is a parallel loop.
-bool isLoopParallel(AffineForOp forOp);
-
-} // end namespace mlir
-
-#endif // MLIR_ANALYSIS_UTILS_H
diff --git a/third_party/mlir/include/mlir/Analysis/Verifier.h b/third_party/mlir/include/mlir/Analysis/Verifier.h
deleted file mode 100644
index daaff57683e..00000000000
--- a/third_party/mlir/include/mlir/Analysis/Verifier.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- Verifier.h - Verifier analysis for MLIR structures -------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_ANALYSIS_VERIFIER_H
-#define MLIR_ANALYSIS_VERIFIER_H
-
-namespace mlir {
-struct LogicalResult;
-class Operation;
-
-/// Perform (potentially expensive) checks of invariants, used to detect
-/// compiler bugs, on this operation and any nested operations. On error, this
-/// reports the error through the MLIRContext and returns failure.
-LogicalResult verify(Operation *op);
-} //  end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/CMakeLists.txt b/third_party/mlir/include/mlir/CMakeLists.txt
deleted file mode 100644
index 84031a5e72a..00000000000
--- a/third_party/mlir/include/mlir/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_subdirectory(Analysis)
-add_subdirectory(Dialect)
-add_subdirectory(IR)
-add_subdirectory(Transforms)
diff --git a/third_party/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h b/third_party/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
deleted file mode 100644
index b5c51ad4b4c..00000000000
--- a/third_party/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===- AffineToStandard.h - Convert Affine to Standard dialect --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_CONVERSION_AFFINETOSTANDARD_AFFINETOSTANDARD_H
-#define MLIR_CONVERSION_AFFINETOSTANDARD_AFFINETOSTANDARD_H
-
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-class AffineExpr;
-class AffineForOp;
-class Location;
-struct LogicalResult;
-class MLIRContext;
-class OpBuilder;
-class RewritePattern;
-class Value;
-
-// Owning list of rewriting patterns.
-class OwningRewritePatternList;
-
-/// Emit code that computes the given affine expression using standard
-/// arithmetic operations applied to the provided dimension and symbol values.
-Value *expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr,
-                        ArrayRef<Value *> dimValues,
-                        ArrayRef<Value *> symbolValues);
-
-/// Collect a set of patterns to convert from the Affine dialect to the Standard
-/// dialect, in particular convert structured affine control flow into CFG
-/// branch-based control flow.
-void populateAffineToStdConversionPatterns(OwningRewritePatternList &patterns,
-                                           MLIRContext *ctx);
-
-/// Emit code that computes the lower bound of the given affine loop using
-/// standard arithmetic operations.
-Value *lowerAffineLowerBound(AffineForOp op, OpBuilder &builder);
-
-/// Emit code that computes the upper bound of the given affine loop using
-/// standard arithmetic operations.
-Value *lowerAffineUpperBound(AffineForOp op, OpBuilder &builder);
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_AFFINETOSTANDARD_AFFINETOSTANDARD_H
diff --git a/third_party/mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h b/third_party/mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h
deleted file mode 100644
index 6b9b08ed7d5..00000000000
--- a/third_party/mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h
+++ /dev/null
@@ -1,64 +0,0 @@
-//===- GPUToCUDAPass.h - MLIR CUDA runtime support --------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_GPUTOCUDA_GPUTOCUDAPASS_H_
-#define MLIR_CONVERSION_GPUTOCUDA_GPUTOCUDAPASS_H_
-
-#include "mlir/Support/LLVM.h"
-#include <functional>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace mlir {
-
-class Location;
-class ModuleOp;
-
-namespace LLVM {
-class LLVMDialect;
-} // namespace LLVM
-
-template <typename T> class OpPassBase;
-
-using OwnedCubin = std::unique_ptr<std::vector<char>>;
-using CubinGenerator =
-    std::function<OwnedCubin(const std::string &, Location, StringRef)>;
-
-/// Creates a pass to convert kernel functions into CUBIN blobs.
-///
-/// This transformation takes the body of each function that is annotated with
-/// the 'nvvm.kernel' attribute, copies it to a new LLVM module, compiles the
-/// module with help of the nvptx backend to PTX and then invokes the provided
-/// cubinGenerator to produce a binary blob (the cubin). Such blob is then
-/// attached as a string attribute named 'nvvm.cubin' to the kernel function.
-/// After the transformation, the body of the kernel function is removed (i.e.,
-/// it is turned into a declaration).
-std::unique_ptr<OpPassBase<ModuleOp>>
-createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator);
-
-/// Creates a pass to convert a gpu.launch_func operation into a sequence of
-/// CUDA calls.
-///
-/// This pass does not generate code to call CUDA directly but instead uses a
-/// small wrapper library that exports a stable and conveniently typed ABI
-/// on top of CUDA.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createConvertGpuLaunchFuncToCudaCallsPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_GPUTOCUDA_GPUTOCUDAPASS_H_
diff --git a/third_party/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/third_party/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
deleted file mode 100644
index 635d4366e83..00000000000
--- a/third_party/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- GPUToNVVMPass.h - Convert GPU kernel to NVVM dialect -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_
-#define MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_
-
-#include <memory>
-
-namespace mlir {
-class LLVMTypeConverter;
-class OwningRewritePatternList;
-
-class ModuleOp;
-template <typename OpT> class OpPassBase;
-
-/// Collect a set of patterns to convert from the GPU dialect to NVVM.
-void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
-                                         OwningRewritePatternList &patterns);
-
-/// Creates a pass that lowers GPU dialect operations to NVVM counterparts.
-std::unique_ptr<OpPassBase<ModuleOp>> createLowerGpuOpsToNVVMOpsPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_
diff --git a/third_party/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/third_party/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
deleted file mode 100644
index 54cda41afa1..00000000000
--- a/third_party/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===- GPUToROCDLPass.h - Convert GPU kernel to ROCDL dialect ---*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
-#define MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
-
-#include <memory>
-
-namespace mlir {
-
-class ModuleOp;
-template <typename OpT> class OpPassBase;
-
-/// Creates a pass that lowers GPU dialect operations to ROCDL counterparts.
-std::unique_ptr<OpPassBase<ModuleOp>> createLowerGpuOpsToROCDLOpsPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
diff --git a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h b/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
deleted file mode 100644
index 134dbf40b4d..00000000000
--- a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- ConvertGPUToSPIRV.h - GPU Ops to SPIR-V dialect patterns ----C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides patterns for lowering GPU Ops to SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H
-#define MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H
-
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-class SPIRVTypeConverter;
-/// Appends to a pattern list additional patterns for translating GPU Ops to
-/// SPIR-V ops. Needs the workgroup size as input since SPIR-V/Vulkan requires
-/// the workgroup size to be statically specified.
-void populateGPUToSPIRVPatterns(MLIRContext *context,
-                                SPIRVTypeConverter &typeConverter,
-                                OwningRewritePatternList &patterns,
-                                ArrayRef<int64_t> workGroupSize);
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H
diff --git a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h b/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
deleted file mode 100644
index 8f0a910c74d..00000000000
--- a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- ConvertGPUToSPIRVPass.h - GPU to SPIR-V conversion pass --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides a pass to convert GPU ops to SPIRV ops.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
-#define MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
-
-#include "mlir/Support/LLVM.h"
-
-#include <memory>
-
-namespace mlir {
-
-class ModuleOp;
-template <typename T> class OpPassBase;
-
-/// Pass to convert GPU Ops to SPIR-V ops.  Needs the workgroup size as input
-/// since SPIR-V/Vulkan requires the workgroup size to be statically specified.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize);
-
-} // namespace mlir
-#endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
diff --git a/third_party/mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h b/third_party/mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h
deleted file mode 100644
index 6bae08e13be..00000000000
--- a/third_party/mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===- LinalgToLLVM.h - Utils to convert from the linalg dialect ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_LINALGTOLLVM_LINALGTOLLVM_H_
-#define MLIR_CONVERSION_LINALGTOLLVM_LINALGTOLLVM_H_
-
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-class MLIRContext;
-
-class LinalgTypeConverter : public LLVMTypeConverter {
-public:
-  using LLVMTypeConverter::LLVMTypeConverter;
-  Type convertType(Type t) override;
-};
-
-/// Populate the given list with patterns that convert from Linalg to LLVM.
-void populateLinalgToLLVMConversionPatterns(LinalgTypeConverter &converter,
-                                            OwningRewritePatternList &patterns,
-                                            MLIRContext *ctx);
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_LINALGTOLLVM_LINALGTOLLVM_H_
diff --git a/third_party/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h b/third_party/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
deleted file mode 100644
index e44e723376f..00000000000
--- a/third_party/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- ConvertLoopToStandard.h - Pass entrypoint ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_CONVERSION_LOOPTOSTANDARD_CONVERTLOOPTOSTANDARD_H_
-#define MLIR_CONVERSION_LOOPTOSTANDARD_CONVERTLOOPTOSTANDARD_H_
-
-#include <memory>
-#include <vector>
-
-namespace mlir {
-class FuncOp;
-struct LogicalResult;
-class MLIRContext;
-template <typename T> class OpPassBase;
-class RewritePattern;
-
-// Owning list of rewriting patterns.
-class OwningRewritePatternList;
-
-/// Collect a set of patterns to lower from loop.for, loop.if, and
-/// loop.terminator to CFG operations within the Standard dialect, in particular
-/// convert structured control flow into CFG branch-based control flow.
-void populateLoopToStdConversionPatterns(OwningRewritePatternList &patterns,
-                                         MLIRContext *ctx);
-
-/// Creates a pass to convert loop.for, loop.if and loop.terminator ops to CFG.
-std::unique_ptr<OpPassBase<FuncOp>> createLowerToCFGPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_LOOPTOSTANDARD_CONVERTLOOPTOSTANDARD_H_
diff --git a/third_party/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h b/third_party/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
deleted file mode 100644
index 0aab8723eab..00000000000
--- a/third_party/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//===- LoopsToGPU.h - Convert loop nests to GPU kernels ---------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_LOOPSTOGPU_LOOPSTOGPU_H_
-#define MLIR_CONVERSION_LOOPSTOGPU_LOOPSTOGPU_H_
-
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-class AffineForOp;
-struct LogicalResult;
-class Value;
-
-namespace loop {
-class ForOp;
-} // end namespace loop
-
-/// Convert a perfect affine loop nest with the outermost loop identified by
-/// `forOp` into a gpu::Launch operation.  Map `numBlockDims` outer loops to
-/// GPU blocks and `numThreadDims` to GPU threads.  The bounds of the loops that
-/// are mapped should be independent of the induction variables of the other
-/// mapped loops.
-///
-/// No check on the size of the block or grid, or on the validity of
-/// parallelization is performed, it is under the responsibility of the caller
-/// to strip-mine the loops and to perform the dependence analysis before
-/// calling the conversion.
-LogicalResult convertAffineLoopNestToGPULaunch(AffineForOp forOp,
-                                               unsigned numBlockDims,
-                                               unsigned numThreadDims);
-
-/// Convert a perfect linalg loop nest with the outermost loop identified by
-/// `forOp` into a gpu::Launch operation.  Map `numBlockDims` outer loops to
-/// GPU blocks and `numThreadDims` to GPU threads.  The bounds of the loops that
-/// are mapped should be independent of the induction variables of the other
-/// mapped loops.
-///
-/// No check on the size of the block or grid, or on the validity of
-/// parallelization is performed, it is under the responsibility of the caller
-/// to strip-mine the loops and to perform the dependence analysis before
-/// calling the conversion.
-LogicalResult convertLoopNestToGPULaunch(loop::ForOp forOp,
-                                         unsigned numBlockDims,
-                                         unsigned numThreadDims);
-
-/// Convert a loop operation into a GPU launch with the values provided in
-/// `numWorkGroups` as the grid size and the values provided in `workGroupSizes`
-/// as the block size. Size of `numWorkGroups` and workGroupSizes` must be less
-/// than or equal to 3. The loop operation can be an imperfectly nested
-/// computation with the following restrictions:
-/// 1) The loop nest must contain as many perfectly nested loops as the number
-/// of values passed in through `numWorkGroups`. This corresponds to the number
-/// of grid dimensions of the launch. All loops within the loop nest must be
-/// parallel.
-/// 2) The body of the innermost loop of the above perfectly nested loops, must
-/// contain statements that satisfy one of the two conditions below:
-///   a) A perfect loop nest of depth greater than or equal to the number of
-///   values passed in through `workGroupSizes`, i.e. the number of thread
-///   dimensions of the launch. Loops at depth less than or equal to size of
-///   `workGroupSizes` must be parallel. Loops nested deeper can be sequential
-///   and are retained as such in the generated GPU launch code.
-///   b) Statements that are safe to be executed by all threads within the
-///   workgroup. No checks are performed that this is indeed the case.
-///   TODO(ravishankarm) : Add checks that verify 2(b) above.
-/// The above conditions are assumed to be satisfied by the computation rooted
-/// at `forOp`.
-LogicalResult convertLoopToGPULaunch(loop::ForOp forOp,
-                                     ArrayRef<Value *> numWorkGroups,
-                                     ArrayRef<Value *> workGroupSizes);
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_LOOPSTOGPU_LOOPSTOGPU_H_
diff --git a/third_party/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h b/third_party/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
deleted file mode 100644
index a42320c9bdf..00000000000
--- a/third_party/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- LoopsToGPUPass.h - Pass converting loops to GPU kernels --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_LOOPSTOGPU_LOOPSTOGPUPASS_H_
-#define MLIR_CONVERSION_LOOPSTOGPU_LOOPSTOGPUPASS_H_
-
-#include "mlir/Support/LLVM.h"
-
-#include <memory>
-
-namespace mlir {
-class FuncOp;
-template <typename T> class OpPassBase;
-
-/// Create a pass that converts loop nests into GPU kernels.  It considers
-/// top-level affine.for and linalg.for operations as roots of loop nests and
-/// converts them to the gpu.launch operations if possible.
-///
-/// No check on the size of the block or grid, or on the validity of
-/// parallelization is performed, it is under the responsibility of the caller
-/// to strip-mine the loops and to perform the dependence analysis before
-/// calling the conversion.
-std::unique_ptr<OpPassBase<FuncOp>>
-createSimpleLoopsToGPUPass(unsigned numBlockDims, unsigned numThreadDims);
-
-/// Create a pass that converts every loop operation within the body of the
-/// FuncOp into a GPU launch. The number of workgroups and workgroup size for
-/// the implementation is controlled by SSA values passed into conversion
-/// method. For testing, the values are set as constants obtained from a command
-/// line flag. See convertLoopToGPULaunch for a description of the required
-/// semantics of the converted loop operation.
-std::unique_ptr<OpPassBase<FuncOp>>
-createLoopToGPUPass(ArrayRef<int64_t> numWorkGroups,
-                    ArrayRef<int64_t> workGroupSize);
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_LOOPSTOGPU_LOOPSTOGPUPASS_H_
diff --git a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
deleted file mode 100644
index e8d16f064a8..00000000000
--- a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
+++ /dev/null
@@ -1,253 +0,0 @@
-//===- ConvertStandardToLLVM.h - Convert to the LLVM dialect ----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides a dialect conversion targeting the LLVM IR dialect.  By default, it
-// converts Standard ops and types and provides hooks for dialect-specific
-// extensions to the conversion.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H
-#define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H
-
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace llvm {
-class IntegerType;
-class LLVMContext;
-class Module;
-class Type;
-} // namespace llvm
-
-namespace mlir {
-
-class UnrankedMemRefType;
-
-namespace LLVM {
-class LLVMDialect;
-class LLVMType;
-} // namespace LLVM
-
-/// Conversion from types in the Standard dialect to the LLVM IR dialect.
-class LLVMTypeConverter : public TypeConverter {
-public:
-  using TypeConverter::convertType;
-
-  LLVMTypeConverter(MLIRContext *ctx);
-
-  /// Convert types to LLVM IR.  This calls `convertAdditionalType` to convert
-  /// non-standard or non-builtin types.
-  Type convertType(Type t) override;
-
-  /// Convert a function type.  The arguments and results are converted one by
-  /// one and results are packed into a wrapped LLVM IR structure type. `result`
-  /// is populated with argument mapping.
-  LLVM::LLVMType convertFunctionSignature(FunctionType type, bool isVariadic,
-                                          SignatureConversion &result);
-
-  /// Convert a non-empty list of types to be returned from a function into a
-  /// supported LLVM IR type.  In particular, if more than one values is
-  /// returned, create an LLVM IR structure type with elements that correspond
-  /// to each of the MLIR types converted with `convertType`.
-  Type packFunctionResults(ArrayRef<Type> types);
-
-  /// Returns the LLVM context.
-  llvm::LLVMContext &getLLVMContext();
-
-  /// Returns the LLVM dialect.
-  LLVM::LLVMDialect *getDialect() { return llvmDialect; }
-
-  /// Promote the LLVM struct representation of all MemRef descriptors to stack
-  /// and use pointers to struct to avoid the complexity of the
-  /// platform-specific C/C++ ABI lowering related to struct argument passing.
-  SmallVector<Value *, 4> promoteMemRefDescriptors(Location loc,
-                                                   ValueRange opOperands,
-                                                   ValueRange operands,
-                                                   OpBuilder &builder);
-
-  /// Promote the LLVM struct representation of one MemRef descriptor to stack
-  /// and use pointer to struct to avoid the complexity of the platform-specific
-  /// C/C++ ABI lowering related to struct argument passing.
-  Value *promoteOneMemRefDescriptor(Location loc, Value *operand,
-                                    OpBuilder &builder);
-
-protected:
-  /// LLVM IR module used to parse/create types.
-  llvm::Module *module;
-  LLVM::LLVMDialect *llvmDialect;
-
-private:
-  Type convertStandardType(Type type);
-
-  // Convert a function type.  The arguments and results are converted one by
-  // one.  Additionally, if the function returns more than one value, pack the
-  // results into an LLVM IR structure type so that the converted function type
-  // returns at most one result.
-  Type convertFunctionType(FunctionType type);
-
-  // Convert the index type.  Uses llvmModule data layout to create an integer
-  // of the pointer bitwidth.
-  Type convertIndexType(IndexType type);
-
-  // Convert an integer type `i*` to `!llvm<"i*">`.
-  Type convertIntegerType(IntegerType type);
-
-  // Convert a floating point type: `f16` to `!llvm.half`, `f32` to
-  // `!llvm.float` and `f64` to `!llvm.double`.  `bf16` is not supported
-  // by LLVM.
-  Type convertFloatType(FloatType type);
-
-  // Convert a memref type into an LLVM type that captures the relevant data.
-  // For statically-shaped memrefs, the resulting type is a pointer to the
-  // (converted) memref element type. For dynamically-shaped memrefs, the
-  // resulting type is an LLVM structure type that contains:
-  //   1. a pointer to the (converted) memref element type
-  //   2. as many index types as memref has dynamic dimensions.
-  Type convertMemRefType(MemRefType type);
-
-  // Convert an unranked memref type to an LLVM type that captures the
-  // runtime rank and a pointer to the static ranked memref desc
-  Type convertUnrankedMemRefType(UnrankedMemRefType type);
-
-  // Convert a 1D vector type into an LLVM vector type.
-  Type convertVectorType(VectorType type);
-
-  // Get the LLVM representation of the index type based on the bitwidth of the
-  // pointer as defined by the data layout of the module.
-  LLVM::LLVMType getIndexType();
-
-  // Extract an LLVM IR dialect type.
-  LLVM::LLVMType unwrap(Type type);
-};
-
-/// Helper class to produce LLVM dialect operations extracting or inserting
-/// values to a struct.
-class StructBuilder {
-public:
-  /// Construct a helper for the given value.
-  explicit StructBuilder(Value *v);
-  /// Builds IR creating an `undef` value of the descriptor type.
-  static StructBuilder undef(OpBuilder &builder, Location loc,
-                             Type descriptorType);
-
-  /*implicit*/ operator Value *() { return value; }
-
-protected:
-  // LLVM value
-  Value *value;
-  // Cached struct type.
-  Type structType;
-
-protected:
-  /// Builds IR to extract a value from the struct at position pos
-  Value *extractPtr(OpBuilder &builder, Location loc, unsigned pos);
-  /// Builds IR to set a value in the struct at position pos
-  void setPtr(OpBuilder &builder, Location loc, unsigned pos, Value *ptr);
-};
-/// Helper class to produce LLVM dialect operations extracting or inserting
-/// elements of a MemRef descriptor. Wraps a Value pointing to the descriptor.
-/// The Value may be null, in which case none of the operations are valid.
-class MemRefDescriptor : public StructBuilder {
-public:
-  /// Construct a helper for the given descriptor value.
-  explicit MemRefDescriptor(Value *descriptor);
-  /// Builds IR creating an `undef` value of the descriptor type.
-  static MemRefDescriptor undef(OpBuilder &builder, Location loc,
-                                Type descriptorType);
-  /// Builds IR creating a MemRef descriptor that represents `type` and
-  /// populates it with static shape and stride information extracted from the
-  /// type.
-  static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc,
-                                          LLVMTypeConverter &typeConverter,
-                                          MemRefType type, Value *memory);
-
-  /// Builds IR extracting the allocated pointer from the descriptor.
-  Value *allocatedPtr(OpBuilder &builder, Location loc);
-  /// Builds IR inserting the allocated pointer into the descriptor.
-  void setAllocatedPtr(OpBuilder &builder, Location loc, Value *ptr);
-
-  /// Builds IR extracting the aligned pointer from the descriptor.
-  Value *alignedPtr(OpBuilder &builder, Location loc);
-
-  /// Builds IR inserting the aligned pointer into the descriptor.
-  void setAlignedPtr(OpBuilder &builder, Location loc, Value *ptr);
-
-  /// Builds IR extracting the offset from the descriptor.
-  Value *offset(OpBuilder &builder, Location loc);
-
-  /// Builds IR inserting the offset into the descriptor.
-  void setOffset(OpBuilder &builder, Location loc, Value *offset);
-  void setConstantOffset(OpBuilder &builder, Location loc, uint64_t offset);
-
-  /// Builds IR extracting the pos-th size from the descriptor.
-  Value *size(OpBuilder &builder, Location loc, unsigned pos);
-
-  /// Builds IR inserting the pos-th size into the descriptor
-  void setSize(OpBuilder &builder, Location loc, unsigned pos, Value *size);
-  void setConstantSize(OpBuilder &builder, Location loc, unsigned pos,
-                       uint64_t size);
-
-  /// Builds IR extracting the pos-th size from the descriptor.
-  Value *stride(OpBuilder &builder, Location loc, unsigned pos);
-
-  /// Builds IR inserting the pos-th stride into the descriptor
-  void setStride(OpBuilder &builder, Location loc, unsigned pos, Value *stride);
-  void setConstantStride(OpBuilder &builder, Location loc, unsigned pos,
-                         uint64_t stride);
-
-  /// Returns the (LLVM) type this descriptor points to.
-  LLVM::LLVMType getElementType();
-
-private:
-  // Cached index type.
-  Type indexType;
-};
-
-class UnrankedMemRefDescriptor : public StructBuilder {
-public:
-  /// Construct a helper for the given descriptor value.
-  explicit UnrankedMemRefDescriptor(Value *descriptor);
-  /// Builds IR creating an `undef` value of the descriptor type.
-  static UnrankedMemRefDescriptor undef(OpBuilder &builder, Location loc,
-                                        Type descriptorType);
-
-  /// Builds IR extracting the rank from the descriptor
-  Value *rank(OpBuilder &builder, Location loc);
-  /// Builds IR setting the rank in the descriptor
-  void setRank(OpBuilder &builder, Location loc, Value *value);
-  /// Builds IR extracting ranked memref descriptor ptr
-  Value *memRefDescPtr(OpBuilder &builder, Location loc);
-  /// Builds IR setting ranked memref descriptor ptr
-  void setMemRefDescPtr(OpBuilder &builder, Location loc, Value *value);
-};
-/// Base class for operation conversions targeting the LLVM IR dialect. Provides
-/// conversion patterns with an access to the containing LLVMLowering for the
-/// purpose of type conversions.
-class LLVMOpLowering : public ConversionPattern {
-public:
-  LLVMOpLowering(StringRef rootOpName, MLIRContext *context,
-                 LLVMTypeConverter &lowering, PatternBenefit benefit = 1);
-
-protected:
-  // Back-reference to the lowering class, used to call type and function
-  // conversions accounting for potential extensions.
-  LLVMTypeConverter &lowering;
-};
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H
diff --git a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h b/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
deleted file mode 100644
index c5c17b36f5e..00000000000
--- a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- ConvertStandardToLLVMPass.h - Pass entrypoint ------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_
-#define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_
-
-#include "llvm/ADT/STLExtras.h"
-#include <memory>
-#include <vector>
-
-namespace llvm {
-class Module;
-} // namespace llvm
-
-namespace mlir {
-class DialectConversion;
-class FuncOp;
-class LLVMTypeConverter;
-struct LogicalResult;
-class MLIRContext;
-class ModuleOp;
-template <typename T> class OpPassBase;
-class RewritePattern;
-class Type;
-
-// Owning list of rewriting patterns.
-class OwningRewritePatternList;
-
-/// Type for a callback constructing the owning list of patterns for the
-/// conversion to the LLVMIR dialect.  The callback is expected to append
-/// patterns to the owning list provided as the second argument.
-using LLVMPatternListFiller =
-    std::function<void(LLVMTypeConverter &, OwningRewritePatternList &)>;
-
-/// Type for a callback constructing the type converter for the conversion to
-/// the LLVMIR dialect.  The callback is expected to return an instance of the
-/// converter.
-using LLVMTypeConverterMaker =
-    std::function<std::unique_ptr<LLVMTypeConverter>(MLIRContext *)>;
-
-/// Collect a set of patterns to convert from the Standard dialect to LLVM.
-void populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
-                                         OwningRewritePatternList &patterns);
-
-/// Creates a pass to convert the Standard dialect into the LLVMIR dialect.
-/// By default stdlib malloc/free are used for allocating MemRef payloads.
-/// Specifying `useAlloca-true` emits stack allocations instead. In the future
-/// this may become an enum when we have concrete uses for other options.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createLowerToLLVMPass(bool useAlloca = false);
-
-/// Creates a pass to convert operations to the LLVMIR dialect.  The conversion
-/// is defined by a list of patterns and a type converter that will be obtained
-/// during the pass using the provided callbacks.
-/// By default stdlib malloc/free are used for allocating MemRef payloads.
-/// Specifying `useAlloca-true` emits stack allocations instead. In the future
-/// this may become an enum when we have concrete uses for other options.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createLowerToLLVMPass(LLVMPatternListFiller patternListFiller,
-                      LLVMTypeConverterMaker typeConverterMaker,
-                      bool useAlloca = false);
-
-/// Creates a pass to convert operations to the LLVMIR dialect.  The conversion
-/// is defined by a list of patterns obtained during the pass using the provided
-/// callback and an optional type conversion class, an instance is created
-/// during the pass.
-/// By default stdlib malloc/free are used for allocating MemRef payloads.
-/// Specifying `useAlloca-true` emits stack allocations instead. In the future
-/// this may become an enum when we have concrete uses for other options.
-template <typename TypeConverter = LLVMTypeConverter>
-std::unique_ptr<OpPassBase<ModuleOp>>
-createLowerToLLVMPass(LLVMPatternListFiller patternListFiller,
-                      bool useAlloca = false) {
-  return createLowerToLLVMPass(
-      patternListFiller,
-      [](MLIRContext *context) {
-        return std::make_unique<TypeConverter>(context);
-      },
-      useAlloca);
-}
-
-namespace LLVM {
-/// Make argument-taking successors of each block distinct.  PHI nodes in LLVM
-/// IR use the predecessor ID to identify which value to take.  They do not
-/// support different values coming from the same predecessor.  If a block has
-/// another block as a successor more than once with different values, insert
-/// a new dummy block for LLVM PHI nodes to tell the sources apart.
-void ensureDistinctSuccessors(ModuleOp m);
-} // namespace LLVM
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVMPASS_H_
diff --git a/third_party/mlir/include/mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.h b/third_party/mlir/include/mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.h
deleted file mode 100644
index 4caa6d9de77..00000000000
--- a/third_party/mlir/include/mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//===- ConvertStandardToSPIRV.h - Convert to SPIR-V dialect -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides patterns to lower StandardOps to SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_STANDARDTOSPIRV_CONVERTSTANDARDTOSPIRV_H
-#define MLIR_CONVERSION_STANDARDTOSPIRV_CONVERTSTANDARDTOSPIRV_H
-
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-class SPIRVTypeConverter;
-
-/// Appends to a pattern list additional patterns for translating StandardOps to
-/// SPIR-V ops. Also adds the patterns legalize ops not directly translated to
-/// SPIR-V dialect.
-void populateStandardToSPIRVPatterns(MLIRContext *context,
-                                     SPIRVTypeConverter &typeConverter,
-                                     OwningRewritePatternList &patterns);
-
-/// Appends to a pattern list patterns to legalize ops that are not directly
-/// lowered to SPIR-V.
-void populateStdLegalizationPatternsForSPIRVLowering(
-    MLIRContext *context, OwningRewritePatternList &patterns);
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_STANDARDTOSPIRV_CONVERTSTANDARDTOSPIRV_H
diff --git a/third_party/mlir/include/mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h b/third_party/mlir/include/mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h
deleted file mode 100644
index e8a71feb8b2..00000000000
--- a/third_party/mlir/include/mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===- ConvertStandardToSPIRVPass.h - StdOps to SPIR-V pass -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides a pass to lower from StandardOps to SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_STANDARDTOSPIRV_CONVERTSTANDARDTOSPIRVPASS_H
-#define MLIR_CONVERSION_STANDARDTOSPIRV_CONVERTSTANDARDTOSPIRVPASS_H
-
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-
-/// Pass to convert StandardOps to SPIR-V ops.
-std::unique_ptr<OpPassBase<ModuleOp>> createConvertStandardToSPIRVPass();
-
-/// Pass to legalize ops that are not directly lowered to SPIR-V.
-std::unique_ptr<Pass> createLegalizeStdOpsForSPIRVLoweringPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_STANDARDTOSPIRV_CONVERTSTANDARDTOSPIRVPASS_H
diff --git a/third_party/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h b/third_party/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h
deleted file mode 100644
index a87e1c658a6..00000000000
--- a/third_party/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- ConvertVectorToLLVM.h - Utils to convert from the vector dialect ---===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLLVM_H_
-#define MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLLVM_H_
-
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-class LLVMTypeConverter;
-class ModuleOp;
-template <typename T> class OpPassBase;
-
-/// Collect a set of patterns to convert from the Vector dialect to LLVM.
-void populateVectorToLLVMConversionPatterns(LLVMTypeConverter &converter,
-                                            OwningRewritePatternList &patterns);
-
-/// Create a pass to convert vector operations to the LLVMIR dialect.
-OpPassBase<ModuleOp> *createLowerVectorToLLVMPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLLVM_H_
diff --git a/third_party/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h b/third_party/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h
deleted file mode 100644
index 198eaceda41..00000000000
--- a/third_party/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- ConvertVectorToLoops.h - Utils to convert from the vector dialect --===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLOOPS_H_
-#define MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLOOPS_H_
-
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-class MLIRContext;
-class ModuleOp;
-template <typename T> class OpPassBase;
-
-/// Collect a set of patterns to convert from the Vector dialect to loops + std.
-void populateVectorToAffineLoopsConversionPatterns(
-    MLIRContext *context, OwningRewritePatternList &patterns);
-
-/// Create a pass to convert vector operations to affine loops + std dialect.
-OpPassBase<ModuleOp> *createLowerVectorToLoopsPass();
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLOOPS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.h b/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
deleted file mode 100644
index 8268f81b856..00000000000
--- a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
+++ /dev/null
@@ -1,688 +0,0 @@
-//===- AffineOps.h - MLIR Affine Operations -------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines convenience types for working with Affine operations
-// in the MLIR operation set.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H
-#define MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H
-
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Transforms/LoopLikeInterface.h"
-
-namespace mlir {
-class AffineBound;
-class AffineDimExpr;
-class AffineValueMap;
-class AffineTerminatorOp;
-class FlatAffineConstraints;
-class OpBuilder;
-
-/// A utility function to check if a value is defined at the top level of a
-/// function. A value of index type defined at the top level is always a valid
-/// symbol.
-bool isTopLevelValue(Value *value);
-
-class AffineOpsDialect : public Dialect {
-public:
-  AffineOpsDialect(MLIRContext *context);
-  static StringRef getDialectNamespace() { return "affine"; }
-
-  /// Materialize a single constant operation from a given attribute value with
-  /// the desired resultant type.
-  Operation *materializeConstant(OpBuilder &builder, Attribute value, Type type,
-                                 Location loc) override;
-};
-
-/// The "affine.apply" operation applies an affine map to a list of operands,
-/// yielding a single result. The operand list must be the same size as the
-/// number of arguments to the affine mapping.  All operands and the result are
-/// of type 'Index'. This operation requires a single affine map attribute named
-/// "map".  For example:
-///
-///   %y = "affine.apply" (%x) { map: (d0) -> (d0 + 1) } :
-///          (index) -> (index)
-///
-/// equivalently:
-///
-///   #map42 = (d0)->(d0+1)
-///   %y = affine.apply #map42(%x)
-///
-class AffineApplyOp : public Op<AffineApplyOp, OpTrait::VariadicOperands,
-                                OpTrait::OneResult, OpTrait::HasNoSideEffect> {
-public:
-  using Op::Op;
-
-  /// Builds an affine apply op with the specified map and operands.
-  static void build(Builder *builder, OperationState &result, AffineMap map,
-                    ValueRange operands);
-
-  /// Returns the affine map to be applied by this operation.
-  AffineMap getAffineMap() {
-    return getAttrOfType<AffineMapAttr>("map").getValue();
-  }
-
-  /// Returns true if the result of this operation can be used as dimension id.
-  bool isValidDim();
-
-  /// Returns true if the result of this operation is a symbol.
-  bool isValidSymbol();
-
-  static StringRef getOperationName() { return "affine.apply"; }
-
-  operand_range getMapOperands() { return getOperands(); }
-
-  // Hooks to customize behavior of this op.
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-  OpFoldResult fold(ArrayRef<Attribute> operands);
-
-  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context);
-};
-
-/// AffineDmaStartOp starts a non-blocking DMA operation that transfers data
-/// from a source memref to a destination memref. The source and destination
-/// memref need not be of the same dimensionality, but need to have the same
-/// elemental type. The operands include the source and destination memref's
-/// each followed by its indices, size of the data transfer in terms of the
-/// number of elements (of the elemental type of the memref), a tag memref with
-/// its indices, and optionally at the end, a stride and a
-/// number_of_elements_per_stride arguments. The tag location is used by an
-/// AffineDmaWaitOp to check for completion. The indices of the source memref,
-/// destination memref, and the tag memref have the same restrictions as any
-/// affine.load/store. In particular, index for each memref dimension must be an
-/// affine expression of loop induction variables and symbols.
-/// The optional stride arguments should be of 'index' type, and specify a
-/// stride for the slower memory space (memory space with a lower memory space
-/// id), transferring chunks of number_of_elements_per_stride every stride until
-/// %num_elements are transferred. Either both or no stride arguments should be
-/// specified. The value of 'num_elements' must be a multiple of
-/// 'number_of_elements_per_stride'.
-//
-// For example, a DmaStartOp operation that transfers 256 elements of a memref
-// '%src' in memory space 0 at indices [%i + 3, %j] to memref '%dst' in memory
-// space 1 at indices [%k + 7, %l], would be specified as follows:
-//
-//   %num_elements = constant 256
-//   %idx = constant 0 : index
-//   %tag = alloc() : memref<1xi32, 4>
-//   affine.dma_start %src[%i + 3, %j], %dst[%k + 7, %l], %tag[%idx],
-//     %num_elements :
-//       memref<40x128xf32, 0>, memref<2x1024xf32, 1>, memref<1xi32, 2>
-//
-//   If %stride and %num_elt_per_stride are specified, the DMA is expected to
-//   transfer %num_elt_per_stride elements every %stride elements apart from
-//   memory space 0 until %num_elements are transferred.
-//
-//   affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%idx], %num_elements,
-//     %stride, %num_elt_per_stride : ...
-//
-// TODO(mlir-team): add additional operands to allow source and destination
-// striding, and multiple stride levels (possibly using AffineMaps to specify
-// multiple levels of striding).
-// TODO(andydavis) Consider replacing src/dst memref indices with view memrefs.
-class AffineDmaStartOp : public Op<AffineDmaStartOp, OpTrait::VariadicOperands,
-                                   OpTrait::ZeroResult> {
-public:
-  using Op::Op;
-
-  static void build(Builder *builder, OperationState &result, Value *srcMemRef,
-                    AffineMap srcMap, ValueRange srcIndices, Value *destMemRef,
-                    AffineMap dstMap, ValueRange destIndices, Value *tagMemRef,
-                    AffineMap tagMap, ValueRange tagIndices, Value *numElements,
-                    Value *stride = nullptr,
-                    Value *elementsPerStride = nullptr);
-
-  /// Returns the operand index of the src memref.
-  unsigned getSrcMemRefOperandIndex() { return 0; }
-
-  /// Returns the source MemRefType for this DMA operation.
-  Value *getSrcMemRef() { return getOperand(getSrcMemRefOperandIndex()); }
-  MemRefType getSrcMemRefType() {
-    return getSrcMemRef()->getType().cast<MemRefType>();
-  }
-
-  /// Returns the rank (number of indices) of the source MemRefType.
-  unsigned getSrcMemRefRank() { return getSrcMemRefType().getRank(); }
-
-  /// Returns the affine map used to access the src memref.
-  AffineMap getSrcMap() { return getSrcMapAttr().getValue(); }
-  AffineMapAttr getSrcMapAttr() {
-    return getAttr(getSrcMapAttrName()).cast<AffineMapAttr>();
-  }
-
-  /// Returns the source memref affine map indices for this DMA operation.
-  operand_range getSrcIndices() {
-    return {operand_begin() + getSrcMemRefOperandIndex() + 1,
-            operand_begin() + getSrcMemRefOperandIndex() + 1 +
-                getSrcMap().getNumInputs()};
-  }
-
-  /// Returns the memory space of the src memref.
-  unsigned getSrcMemorySpace() {
-    return getSrcMemRef()->getType().cast<MemRefType>().getMemorySpace();
-  }
-
-  /// Returns the operand index of the dst memref.
-  unsigned getDstMemRefOperandIndex() {
-    return getSrcMemRefOperandIndex() + 1 + getSrcMap().getNumInputs();
-  }
-
-  /// Returns the destination MemRefType for this DMA operations.
-  Value *getDstMemRef() { return getOperand(getDstMemRefOperandIndex()); }
-  MemRefType getDstMemRefType() {
-    return getDstMemRef()->getType().cast<MemRefType>();
-  }
-
-  /// Returns the rank (number of indices) of the destination MemRefType.
-  unsigned getDstMemRefRank() {
-    return getDstMemRef()->getType().cast<MemRefType>().getRank();
-  }
-
-  /// Returns the memory space of the src memref.
-  unsigned getDstMemorySpace() {
-    return getDstMemRef()->getType().cast<MemRefType>().getMemorySpace();
-  }
-
-  /// Returns the affine map used to access the dst memref.
-  AffineMap getDstMap() { return getDstMapAttr().getValue(); }
-  AffineMapAttr getDstMapAttr() {
-    return getAttr(getDstMapAttrName()).cast<AffineMapAttr>();
-  }
-
-  /// Returns the destination memref indices for this DMA operation.
-  operand_range getDstIndices() {
-    return {operand_begin() + getDstMemRefOperandIndex() + 1,
-            operand_begin() + getDstMemRefOperandIndex() + 1 +
-                getDstMap().getNumInputs()};
-  }
-
-  /// Returns the operand index of the tag memref.
-  unsigned getTagMemRefOperandIndex() {
-    return getDstMemRefOperandIndex() + 1 + getDstMap().getNumInputs();
-  }
-
-  /// Returns the Tag MemRef for this DMA operation.
-  Value *getTagMemRef() { return getOperand(getTagMemRefOperandIndex()); }
-  MemRefType getTagMemRefType() {
-    return getTagMemRef()->getType().cast<MemRefType>();
-  }
-
-  /// Returns the rank (number of indices) of the tag MemRefType.
-  unsigned getTagMemRefRank() {
-    return getTagMemRef()->getType().cast<MemRefType>().getRank();
-  }
-
-  /// Returns the affine map used to access the tag memref.
-  AffineMap getTagMap() { return getTagMapAttr().getValue(); }
-  AffineMapAttr getTagMapAttr() {
-    return getAttr(getTagMapAttrName()).cast<AffineMapAttr>();
-  }
-
-  /// Returns the tag memref indices for this DMA operation.
-  operand_range getTagIndices() {
-    return {operand_begin() + getTagMemRefOperandIndex() + 1,
-            operand_begin() + getTagMemRefOperandIndex() + 1 +
-                getTagMap().getNumInputs()};
-  }
-
-  /// Returns the number of elements being transferred by this DMA operation.
-  Value *getNumElements() {
-    return getOperand(getTagMemRefOperandIndex() + 1 +
-                      getTagMap().getNumInputs());
-  }
-
-  /// Returns the AffineMapAttr associated with 'memref'.
-  NamedAttribute getAffineMapAttrForMemRef(Value *memref) {
-    if (memref == getSrcMemRef())
-      return {Identifier::get(getSrcMapAttrName(), getContext()),
-              getSrcMapAttr()};
-    else if (memref == getDstMemRef())
-      return {Identifier::get(getDstMapAttrName(), getContext()),
-              getDstMapAttr()};
-    assert(memref == getTagMemRef() &&
-           "DmaStartOp expected source, destination or tag memref");
-    return {Identifier::get(getTagMapAttrName(), getContext()),
-            getTagMapAttr()};
-  }
-
-  /// Returns true if this is a DMA from a faster memory space to a slower one.
-  bool isDestMemorySpaceFaster() {
-    return (getSrcMemorySpace() < getDstMemorySpace());
-  }
-
-  /// Returns true if this is a DMA from a slower memory space to a faster one.
-  bool isSrcMemorySpaceFaster() {
-    // Assumes that a lower number is for a slower memory space.
-    return (getDstMemorySpace() < getSrcMemorySpace());
-  }
-
-  /// Given a DMA start operation, returns the operand position of either the
-  /// source or destination memref depending on the one that is at the higher
-  /// level of the memory hierarchy. Asserts failure if neither is true.
-  unsigned getFasterMemPos() {
-    assert(isSrcMemorySpaceFaster() || isDestMemorySpaceFaster());
-    return isSrcMemorySpaceFaster() ? 0 : getDstMemRefOperandIndex();
-  }
-
-  static StringRef getSrcMapAttrName() { return "src_map"; }
-  static StringRef getDstMapAttrName() { return "dst_map"; }
-  static StringRef getTagMapAttrName() { return "tag_map"; }
-
-  static StringRef getOperationName() { return "affine.dma_start"; }
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-  LogicalResult fold(ArrayRef<Attribute> cstOperands,
-                     SmallVectorImpl<OpFoldResult> &results);
-
-  /// Returns true if this DMA operation is strided, returns false otherwise.
-  bool isStrided() {
-    return getNumOperands() !=
-           getTagMemRefOperandIndex() + 1 + getTagMap().getNumInputs() + 1;
-  }
-
-  /// Returns the stride value for this DMA operation.
-  Value *getStride() {
-    if (!isStrided())
-      return nullptr;
-    return getOperand(getNumOperands() - 1 - 1);
-  }
-
-  /// Returns the number of elements to transfer per stride for this DMA op.
-  Value *getNumElementsPerStride() {
-    if (!isStrided())
-      return nullptr;
-    return getOperand(getNumOperands() - 1);
-  }
-};
-
-/// AffineDmaWaitOp blocks until the completion of a DMA operation associated
-/// with the tag element '%tag[%index]'. %tag is a memref, and %index has to be
-/// an index with the same restrictions as any load/store index. In particular,
-/// index for each memref dimension must be an affine expression of loop
-/// induction variables and symbols. %num_elements is the number of elements
-/// associated with the DMA operation. For example:
-//
-//   affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%index], %num_elements :
-//     memref<2048xf32, 0>, memref<256xf32, 1>, memref<1xi32, 2>
-//   ...
-//   ...
-//   affine.dma_wait %tag[%index], %num_elements : memref<1xi32, 2>
-//
-class AffineDmaWaitOp : public Op<AffineDmaWaitOp, OpTrait::VariadicOperands,
-                                  OpTrait::ZeroResult> {
-public:
-  using Op::Op;
-
-  static void build(Builder *builder, OperationState &result, Value *tagMemRef,
-                    AffineMap tagMap, ValueRange tagIndices,
-                    Value *numElements);
-
-  static StringRef getOperationName() { return "affine.dma_wait"; }
-
-  // Returns the Tag MemRef associated with the DMA operation being waited on.
-  Value *getTagMemRef() { return getOperand(0); }
-  MemRefType getTagMemRefType() {
-    return getTagMemRef()->getType().cast<MemRefType>();
-  }
-
-  /// Returns the affine map used to access the tag memref.
-  AffineMap getTagMap() { return getTagMapAttr().getValue(); }
-  AffineMapAttr getTagMapAttr() {
-    return getAttr(getTagMapAttrName()).cast<AffineMapAttr>();
-  }
-
-  // Returns the tag memref index for this DMA operation.
-  operand_range getTagIndices() {
-    return {operand_begin() + 1,
-            operand_begin() + 1 + getTagMap().getNumInputs()};
-  }
-
-  // Returns the rank (number of indices) of the tag memref.
-  unsigned getTagMemRefRank() {
-    return getTagMemRef()->getType().cast<MemRefType>().getRank();
-  }
-
-  /// Returns the AffineMapAttr associated with 'memref'.
-  NamedAttribute getAffineMapAttrForMemRef(Value *memref) {
-    assert(memref == getTagMemRef());
-    return {Identifier::get(getTagMapAttrName(), getContext()),
-            getTagMapAttr()};
-  }
-
-  /// Returns the number of elements transferred in the associated DMA op.
-  Value *getNumElements() { return getOperand(1 + getTagMap().getNumInputs()); }
-
-  static StringRef getTagMapAttrName() { return "tag_map"; }
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-  LogicalResult fold(ArrayRef<Attribute> cstOperands,
-                     SmallVectorImpl<OpFoldResult> &results);
-};
-
-/// The "affine.load" op reads an element from a memref, where the index
-/// for each memref dimension is an affine expression of loop induction
-/// variables and symbols. The output of 'affine.load' is a new value with the
-/// same type as the elements of the memref. An affine expression of loop IVs
-/// and symbols must be specified for each dimension of the memref. The keyword
-/// 'symbol' can be used to indicate SSA identifiers which are symbolic.
-//
-//  Example 1:
-//
-//    %1 = affine.load %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>
-//
-//  Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'.
-//
-//    %1 = affine.load %0[%i0 + symbol(%n), %i1 + symbol(%m)]
-//      : memref<100x100xf32>
-//
-class AffineLoadOp : public Op<AffineLoadOp, OpTrait::OneResult,
-                               OpTrait::AtLeastNOperands<1>::Impl> {
-public:
-  using Op::Op;
-
-  /// Builds an affine load op with the specified map and operands.
-  static void build(Builder *builder, OperationState &result, AffineMap map,
-                    ValueRange operands);
-  /// Builds an affine load op with an identity map and operands.
-  static void build(Builder *builder, OperationState &result, Value *memref,
-                    ValueRange indices = {});
-  /// Builds an affine load op with the specified map and its operands.
-  static void build(Builder *builder, OperationState &result, Value *memref,
-                    AffineMap map, ValueRange mapOperands);
-
-  /// Returns the operand index of the memref.
-  unsigned getMemRefOperandIndex() { return 0; }
-
-  /// Get memref operand.
-  Value *getMemRef() { return getOperand(getMemRefOperandIndex()); }
-  void setMemRef(Value *value) { setOperand(getMemRefOperandIndex(), value); }
-  MemRefType getMemRefType() {
-    return getMemRef()->getType().cast<MemRefType>();
-  }
-
-  /// Get affine map operands.
-  operand_range getMapOperands() { return llvm::drop_begin(getOperands(), 1); }
-
-  /// Returns the affine map used to index the memref for this operation.
-  AffineMap getAffineMap() { return getAffineMapAttr().getValue(); }
-  AffineMapAttr getAffineMapAttr() {
-    return getAttr(getMapAttrName()).cast<AffineMapAttr>();
-  }
-
-  /// Returns the AffineMapAttr associated with 'memref'.
-  NamedAttribute getAffineMapAttrForMemRef(Value *memref) {
-    assert(memref == getMemRef());
-    return {Identifier::get(getMapAttrName(), getContext()),
-            getAffineMapAttr()};
-  }
-
-  static StringRef getMapAttrName() { return "map"; }
-  static StringRef getOperationName() { return "affine.load"; }
-
-  // Hooks to customize behavior of this op.
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context);
-  OpFoldResult fold(ArrayRef<Attribute> operands);
-};
-
-/// The "affine.store" op writes an element to a memref, where the index
-/// for each memref dimension is an affine expression of loop induction
-/// variables and symbols. The 'affine.store' op stores a new value which is the
-/// same type as the elements of the memref. An affine expression of loop IVs
-/// and symbols must be specified for each dimension of the memref. The keyword
-/// 'symbol' can be used to indicate SSA identifiers which are symbolic.
-//
-//  Example 1:
-//
-//    affine.store %v0, %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>
-//
-//  Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'.
-//
-//    affine.store %v0, %0[%i0 + symbol(%n), %i1 + symbol(%m)]
-//      : memref<100x100xf32>
-//
-class AffineStoreOp : public Op<AffineStoreOp, OpTrait::ZeroResult,
-                                OpTrait::AtLeastNOperands<1>::Impl> {
-public:
-  using Op::Op;
-
-  /// Builds an affine store operation with the provided indices (identity map).
-  static void build(Builder *builder, OperationState &result,
-                    Value *valueToStore, Value *memref, ValueRange indices);
-  /// Builds an affine store operation with the specified map and its operands.
-  static void build(Builder *builder, OperationState &result,
-                    Value *valueToStore, Value *memref, AffineMap map,
-                    ValueRange mapOperands);
-
-  /// Get value to be stored by store operation.
-  Value *getValueToStore() { return getOperand(0); }
-
-  /// Returns the operand index of the memref.
-  unsigned getMemRefOperandIndex() { return 1; }
-
-  /// Get memref operand.
-  Value *getMemRef() { return getOperand(getMemRefOperandIndex()); }
-  void setMemRef(Value *value) { setOperand(getMemRefOperandIndex(), value); }
-
-  MemRefType getMemRefType() {
-    return getMemRef()->getType().cast<MemRefType>();
-  }
-
-  /// Get affine map operands.
-  operand_range getMapOperands() { return llvm::drop_begin(getOperands(), 2); }
-
-  /// Returns the affine map used to index the memref for this operation.
-  AffineMap getAffineMap() { return getAffineMapAttr().getValue(); }
-  AffineMapAttr getAffineMapAttr() {
-    return getAttr(getMapAttrName()).cast<AffineMapAttr>();
-  }
-
-  /// Returns the AffineMapAttr associated with 'memref'.
-  NamedAttribute getAffineMapAttrForMemRef(Value *memref) {
-    assert(memref == getMemRef());
-    return {Identifier::get(getMapAttrName(), getContext()),
-            getAffineMapAttr()};
-  }
-
-  static StringRef getMapAttrName() { return "map"; }
-  static StringRef getOperationName() { return "affine.store"; }
-
-  // Hooks to customize behavior of this op.
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context);
-  LogicalResult fold(ArrayRef<Attribute> cstOperands,
-                     SmallVectorImpl<OpFoldResult> &results);
-};
-
-/// Returns true if the given Value can be used as a dimension id.
-bool isValidDim(Value *value);
-
-/// Returns true if the given Value can be used as a symbol.
-bool isValidSymbol(Value *value);
-
-/// Modifies both `map` and `operands` in-place so as to:
-/// 1. drop duplicate operands
-/// 2. drop unused dims and symbols from map
-/// 3. promote valid symbols to symbolic operands in case they appeared as
-///    dimensional operands
-/// 4. propagate constant operands and drop them
-void canonicalizeMapAndOperands(AffineMap *map,
-                                llvm::SmallVectorImpl<Value *> *operands);
-/// Canonicalizes an integer set the same way canonicalizeMapAndOperands does
-/// for affine maps.
-void canonicalizeSetAndOperands(IntegerSet *set,
-                                llvm::SmallVectorImpl<Value *> *operands);
-
-/// Returns a composed AffineApplyOp by composing `map` and `operands` with
-/// other AffineApplyOps supplying those operands. The operands of the resulting
-/// AffineApplyOp do not change the length of  AffineApplyOp chains.
-AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
-                                      llvm::ArrayRef<Value *> operands);
-
-/// Given an affine map `map` and its input `operands`, this method composes
-/// into `map`, maps of AffineApplyOps whose results are the values in
-/// `operands`, iteratively until no more of `operands` are the result of an
-/// AffineApplyOp. When this function returns, `map` becomes the composed affine
-/// map, and each Value in `operands` is guaranteed to be either a loop IV or a
-/// terminal symbol, i.e., a symbol defined at the top level or a block/function
-/// argument.
-void fullyComposeAffineMapAndOperands(AffineMap *map,
-                                      llvm::SmallVectorImpl<Value *> *operands);
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/AffineOps/AffineOps.h.inc"
-
-/// Returns if the provided value is the induction variable of a AffineForOp.
-bool isForInductionVar(Value *val);
-
-/// Returns the loop parent of an induction variable. If the provided value is
-/// not an induction variable, then return nullptr.
-AffineForOp getForInductionVarOwner(Value *val);
-
-/// Extracts the induction variables from a list of AffineForOps and places them
-/// in the output argument `ivs`.
-void extractForInductionVars(ArrayRef<AffineForOp> forInsts,
-                             SmallVectorImpl<Value *> *ivs);
-
-/// AffineBound represents a lower or upper bound in the for operation.
-/// This class does not own the underlying operands. Instead, it refers
-/// to the operands stored in the AffineForOp. Its life span should not exceed
-/// that of the for operation it refers to.
-class AffineBound {
-public:
-  AffineForOp getAffineForOp() { return op; }
-  AffineMap getMap() { return map; }
-
-  /// Returns an AffineValueMap representing this bound.
-  AffineValueMap getAsAffineValueMap();
-
-  unsigned getNumOperands() { return opEnd - opStart; }
-  Value *getOperand(unsigned idx) { return op.getOperand(opStart + idx); }
-
-  using operand_iterator = AffineForOp::operand_iterator;
-  using operand_range = AffineForOp::operand_range;
-
-  operand_iterator operand_begin() { return op.operand_begin() + opStart; }
-  operand_iterator operand_end() { return op.operand_begin() + opEnd; }
-  operand_range getOperands() { return {operand_begin(), operand_end()}; }
-
-private:
-  // 'affine.for' operation that contains this bound.
-  AffineForOp op;
-  // Start and end positions of this affine bound operands in the list of
-  // the containing 'affine.for' operation operands.
-  unsigned opStart, opEnd;
-  // Affine map for this bound.
-  AffineMap map;
-
-  AffineBound(AffineForOp op, unsigned opStart, unsigned opEnd, AffineMap map)
-      : op(op), opStart(opStart), opEnd(opEnd), map(map) {}
-
-  friend class AffineForOp;
-};
-
-/// An `AffineApplyNormalizer` is a helper class that supports renumbering
-/// operands of AffineApplyOp. This acts as a reindexing map of Value* to
-/// positional dims or symbols and allows simplifications such as:
-///
-/// ```mlir
-///    %1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
-/// ```
-///
-/// into:
-///
-/// ```mlir
-///    %1 = affine.apply () -> (0)
-/// ```
-struct AffineApplyNormalizer {
-  AffineApplyNormalizer(AffineMap map, ArrayRef<Value *> operands);
-
-  /// Returns the AffineMap resulting from normalization.
-  AffineMap getAffineMap() { return affineMap; }
-
-  SmallVector<Value *, 8> getOperands() {
-    SmallVector<Value *, 8> res(reorderedDims);
-    res.append(concatenatedSymbols.begin(), concatenatedSymbols.end());
-    return res;
-  }
-
-  unsigned getNumSymbols() { return concatenatedSymbols.size(); }
-  unsigned getNumDims() { return reorderedDims.size(); }
-
-  /// Normalizes 'otherMap' and its operands 'otherOperands' to map to this
-  /// normalizer's coordinate space.
-  void normalize(AffineMap *otherMap, SmallVectorImpl<Value *> *otherOperands);
-
-private:
-  /// Helper function to insert `v` into the coordinate system of the current
-  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
-  /// renumbered position.
-  AffineDimExpr renumberOneDim(Value *v);
-
-  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
-  /// coordinate system of the current AffineApplyNormalizer.
-  /// Returns the rewritten AffineMap and updates the dims and symbols of
-  /// `this`.
-  AffineMap renumber(const AffineApplyNormalizer &other);
-
-  /// Maps of Value* to position in `affineMap`.
-  DenseMap<Value *, unsigned> dimValueToPosition;
-
-  /// Ordered dims and symbols matching positional dims and symbols in
-  /// `affineMap`.
-  SmallVector<Value *, 8> reorderedDims;
-  SmallVector<Value *, 8> concatenatedSymbols;
-
-  AffineMap affineMap;
-
-  /// Used with RAII to control the depth at which AffineApply are composed
-  /// recursively. Only accepts depth 1 for now to allow a behavior where a
-  /// newly composed AffineApplyOp does not increase the length of the chain of
-  /// AffineApplyOps. Full composition is implemented iteratively on top of
-  /// this behavior.
-  static unsigned &affineApplyDepth() {
-    static thread_local unsigned depth = 0;
-    return depth;
-  }
-  static constexpr unsigned kMaxAffineApplyDepth = 1;
-
-  AffineApplyNormalizer() { affineApplyDepth()++; }
-
-public:
-  ~AffineApplyNormalizer() { affineApplyDepth()--; }
-};
-
-} // end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.td b/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
deleted file mode 100644
index cea44b8dacd..00000000000
--- a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
+++ /dev/null
@@ -1,284 +0,0 @@
-//===- AffineOps.td - Affine operation definitions ---------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines MLIR affine operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AFFINE_OPS
-#define AFFINE_OPS
-
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
-include "mlir/IR/OpBase.td"
-include "mlir/Transforms/LoopLikeInterface.td"
-
-def Affine_Dialect : Dialect {
-  let name = "affine";
-  let cppNamespace = "";
-}
-
-// Base class for Affine dialect ops.
-class Affine_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<Affine_Dialect, mnemonic, traits> {
-  // For every affine op, there needs to be a:
-  //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
-  //   * LogicalResult verify(${C++ class of Op} op)
-  //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
-  //                                         OperationState &result)
-  // functions.
-  let printer = [{ return ::print(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-}
-
-// Require regions to have affine terminator.
-def ImplicitAffineTerminator
-    : SingleBlockImplicitTerminator<"AffineTerminatorOp">;
-
-def AffineForOp : Affine_Op<"for",
-    [ImplicitAffineTerminator,
-     DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
-  let summary = "for operation";
-  let description = [{
-    The "affine.for" operation represents an affine loop nest, defining an SSA
-    value for its induction variable. It has one region capturing the loop body.
-    The induction variable is represented as a argument of this region. This SSA
-    value always has type index, which is the size of the machine word. The
-    stride, represented by step, is a positive constant integer which defaults
-    to "1" if not present. The lower and upper bounds specify a half-open range:
-    the range includes the lower bound but does not include the upper bound.
-
-    The body region must contain exactly one block that terminates with
-    "affine.terminator".  Calling AffineForOp::build will create such region
-    and insert the terminator, so will the parsing even in cases if it is absent
-    from the custom format.
-
-    The lower and upper bounds of a for operation are represented as an
-    application of an affine mapping to a list of SSA values passed to the map.
-    The same restrictions hold for these SSA values as for all bindings of SSA
-    values to dimensions and symbols. The affine mappings for the bounds may
-    return multiple results, in which case the max/min keywords are required
-    (for the lower/upper bound respectively), and the bound is the
-    maximum/minimum of the returned values.
-
-    Example:
-
-      affine.for %i = 1 to 10 {
-        ...
-      }
-
-  }];
-  let arguments = (ins Variadic<AnyType>);
-  let regions = (region SizedRegion<1>:$region);
-
-  let skipDefaultBuilders = 1;
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, "
-              "int64_t lowerBound, int64_t upperBound, int64_t step = 1">,
-    OpBuilder<"Builder *builder, OperationState &result, "
-              "ValueRange lbOperands, AffineMap lbMap, "
-              "ValueRange ubOperands, AffineMap ubMap, "
-              "int64_t step = 1">
-  ];
-
-  let extraClassDeclaration = [{
-    static StringRef getStepAttrName() { return "step"; }
-    static StringRef getLowerBoundAttrName() { return "lower_bound"; }
-    static StringRef getUpperBoundAttrName() { return "upper_bound"; }
-
-    Block *getBody() { return &region().front(); }
-    Value *getInductionVar() { return getBody()->getArgument(0); }
-    OpBuilder getBodyBuilder() {
-      return OpBuilder(getBody(), std::prev(getBody()->end()));
-    }
-
-    // TODO: provide iterators for the lower and upper bound operands
-    // if the current access via getLowerBound(), getUpperBound() is too slow.
-
-    /// Returns operands for the lower bound map.
-    operand_range getLowerBoundOperands();
-
-    /// Returns operands for the upper bound map.
-    operand_range getUpperBoundOperands();
-
-    /// Returns information about the lower bound as a single object.
-    AffineBound getLowerBound();
-
-    /// Returns information about the upper bound as a single object.
-    AffineBound getUpperBound();
-
-    /// Returns loop step.
-    int64_t getStep() {
-      return getAttr(getStepAttrName()).cast<IntegerAttr>().getInt();
-    }
-
-    /// Returns affine map for the lower bound.
-    AffineMap getLowerBoundMap() { return getLowerBoundMapAttr().getValue(); }
-    AffineMapAttr getLowerBoundMapAttr() {
-      return getAttr(getLowerBoundAttrName()).cast<AffineMapAttr>();
-    }
-    /// Returns affine map for the upper bound. The upper bound is exclusive.
-    AffineMap getUpperBoundMap() { return getUpperBoundMapAttr().getValue(); }
-    AffineMapAttr getUpperBoundMapAttr() {
-      return getAttr(getUpperBoundAttrName()).cast<AffineMapAttr>();
-    }
-
-    /// Set lower bound. The new bound must have the same number of operands as
-    /// the current bound map. Otherwise, 'replaceForLowerBound' should be used.
-    void setLowerBound(ValueRange operands, AffineMap map);
-    /// Set upper bound. The new bound must not have more operands than the
-    /// current bound map. Otherwise, 'replaceForUpperBound' should be used.
-    void setUpperBound(ValueRange operands, AffineMap map);
-
-    /// Set the lower bound map without changing operands.
-    void setLowerBoundMap(AffineMap map);
-
-    /// Set the upper bound map without changing operands.
-    void setUpperBoundMap(AffineMap map);
-
-    /// Set loop step.
-    void setStep(int64_t step) {
-      assert(step > 0 && "step has to be a positive integer constant");
-      auto *context = getLowerBoundMap().getContext();
-      setAttr(Identifier::get(getStepAttrName(), context),
-              IntegerAttr::get(IndexType::get(context), step));
-    }
-
-    /// Returns true if the lower bound is constant.
-    bool hasConstantLowerBound();
-    /// Returns true if the upper bound is constant.
-    bool hasConstantUpperBound();
-    /// Returns true if both bounds are constant.
-    bool hasConstantBounds() {
-      return hasConstantLowerBound() && hasConstantUpperBound();
-    }
-    /// Returns the value of the constant lower bound.
-    /// Fails assertion if the bound is non-constant.
-    int64_t getConstantLowerBound();
-    /// Returns the value of the constant upper bound. The upper bound is
-    /// exclusive. Fails assertion if the bound is non-constant.
-    int64_t getConstantUpperBound();
-    /// Sets the lower bound to the given constant value.
-    void setConstantLowerBound(int64_t value);
-    /// Sets the upper bound to the given constant value.
-    void setConstantUpperBound(int64_t value);
-
-    /// Returns true if both the lower and upper bound have the same operand
-    /// lists (same operands in the same order).
-    bool matchingBoundOperandList();
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
-def AffineIfOp : Affine_Op<"if", [ImplicitAffineTerminator]> {
-  let summary = "if-then-else operation";
-  let description = [{
-    The "if" operation represents an if-then-else construct for conditionally
-    executing two regions of code. The operands to an if operation are an
-    IntegerSet condition and a set of symbol/dimension operands to the
-    condition set. The operation produces no results. For example:
-
-       affine.if #set(%i)  {
-         ...
-       } else {
-         ...
-       }
-
-    The 'else' blocks to the if operation are optional, and may be omitted. For
-    example:
-
-       affine.if #set(%i)  {
-         ...
-       }
-  }];
-  let arguments = (ins Variadic<AnyType>);
-  let regions = (region SizedRegion<1>:$thenRegion, AnyRegion:$elseRegion);
-
-  let skipDefaultBuilders = 1;
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, "
-              "IntegerSet set, ValueRange args, bool withElseRegion">
-  ];
-
-  let extraClassDeclaration = [{
-    static StringRef getConditionAttrName() { return "condition"; }
-
-    IntegerSet getIntegerSet();
-    void setIntegerSet(IntegerSet newSet);
-
-    /// Sets the integer set with its operands. The size of 'operands' must not
-    /// exceed the current number of operands for this instance, as the operands
-    /// list of AffineIf is not resizable.
-    void setConditional(IntegerSet set, ValueRange operands);
-
-    OpBuilder getThenBodyBuilder() {
-      assert(!thenRegion().empty() && "Unexpected empty 'then' region.");
-      Block &body = thenRegion().front();
-      return OpBuilder(&body, std::prev(body.end()));
-    }
-    OpBuilder getElseBodyBuilder() {
-      assert(!elseRegion().empty() && "Unexpected empty 'else' region.");
-      Block &body = elseRegion().front();
-      return OpBuilder(&body, std::prev(body.end()));
-    }
-  }];
-
-  let hasFolder = 1;
-}
-
-def AffineMinOp : Affine_Op<"min"> {
-  let summary = "min operation";
-  let description = [{
-    The "min" operation computes the minimum value result from a multi-result
-    affine map.
-
-    Example:
-
-       %0 = affine.min (d0) -> (1000, d0 + 512) (%i0) : index
-  }];
-  let arguments = (ins AffineMapAttr:$map, Variadic<Index>:$operands);
-  let results = (outs Index);
-  let extraClassDeclaration = [{
-    static StringRef getMapAttrName() { return "map"; }
-  }];
-  let hasFolder = 1;
-}
-
-def AffineTerminatorOp :
-    Affine_Op<"terminator", [Terminator]> {
-  let summary = "affine terminator operation";
-  let description = [{
-    Affine terminator is a special terminator operation for blocks inside affine
-    loops and branches. It unconditionally transmits the control flow to the
-    successor of the operation enclosing the region.
-
-    This operation does _not_ have a custom syntax. However, affine control
-    operations omit the terminator in their custom syntax for brevity.
-  }];
-
-  // No custom parsing/printing form.
-  let parser = ?;
-  let printer = ?;
-
-  // Fully specified by traits.
-  let verifier = ?;
-}
-
-#endif // AFFINE_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOpsBase.td b/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOpsBase.td
deleted file mode 100644
index 755f65c338e..00000000000
--- a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOpsBase.td
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- AffineOpsBase.td - Affine operation definitions -----*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines base support for MLIR affine operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AFFINE_OPS_BASE
-#define AFFINE_OPS_BASE
-
-include "mlir/IR/OpBase.td"
-
-// Attributes containing affine maps.
-def AffineMapAttr : Attr<
-    CPred<"$_self.isa<AffineMapAttr>()">, "AffineMap attribute"> {
-  let storageType = [{ AffineMapAttr }];
-  let returnType = [{ AffineMap }];
-  let constBuilderCall = "AffineMapAttr::get($0)";
-}
-
-def AffineMapArrayAttr : TypedArrayAttrBase<AffineMapAttr,
-                                      "AffineMap array attribute"> {
-  let constBuilderCall = "$_builder.getAffineMapArrayAttr($0)";
-}
-
-#endif // AFFINE_OPS_BASE
diff --git a/third_party/mlir/include/mlir/Dialect/AffineOps/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/AffineOps/CMakeLists.txt
deleted file mode 100644
index 8f812b39593..00000000000
--- a/third_party/mlir/include/mlir/Dialect/AffineOps/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_mlir_dialect(AffineOps)
diff --git a/third_party/mlir/include/mlir/Dialect/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/CMakeLists.txt
deleted file mode 100644
index 9235436995a..00000000000
--- a/third_party/mlir/include/mlir/Dialect/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_subdirectory(AffineOps)
-add_subdirectory(FxpMathOps)
-add_subdirectory(GPU)
-add_subdirectory(Linalg)
-add_subdirectory(LLVMIR)
-add_subdirectory(LoopOps)
-add_subdirectory(QuantOps)
-add_subdirectory(SPIRV)
-add_subdirectory(StandardOps)
-add_subdirectory(VectorOps)
diff --git a/third_party/mlir/include/mlir/Dialect/CommonFolders.h b/third_party/mlir/include/mlir/Dialect/CommonFolders.h
deleted file mode 100644
index 28619ce0118..00000000000
--- a/third_party/mlir/include/mlir/Dialect/CommonFolders.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===- CommonFolders.h - Common Operation Folders----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file declares various common operation folders. These folders
-// are intended to be used by dialects to support common folding behavior
-// without requiring each dialect to provide its own implementation.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_COMMONFOLDERS_H
-#define MLIR_DIALECT_COMMONFOLDERS_H
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-
-namespace mlir {
-/// Performs constant folding `calculate` with element-wise behavior on the two
-/// attributes in `operands` and returns the result if possible.
-template <class AttrElementT,
-          class ElementValueT = typename AttrElementT::ValueType,
-          class CalculationT =
-              llvm::function_ref<ElementValueT(ElementValueT, ElementValueT)>>
-Attribute constFoldBinaryOp(llvm::ArrayRef<Attribute> operands,
-                            const CalculationT &calculate) {
-  assert(operands.size() == 2 && "binary op takes two operands");
-  if (!operands[0] || !operands[1])
-    return {};
-  if (operands[0].getType() != operands[1].getType())
-    return {};
-
-  if (operands[0].isa<AttrElementT>() && operands[1].isa<AttrElementT>()) {
-    auto lhs = operands[0].cast<AttrElementT>();
-    auto rhs = operands[1].cast<AttrElementT>();
-
-    return AttrElementT::get(lhs.getType(),
-                             calculate(lhs.getValue(), rhs.getValue()));
-  } else if (operands[0].isa<SplatElementsAttr>() &&
-             operands[1].isa<SplatElementsAttr>()) {
-    // Both operands are splats so we can avoid expanding the values out and
-    // just fold based on the splat value.
-    auto lhs = operands[0].cast<SplatElementsAttr>();
-    auto rhs = operands[1].cast<SplatElementsAttr>();
-
-    auto elementResult = calculate(lhs.getSplatValue<ElementValueT>(),
-                                   rhs.getSplatValue<ElementValueT>());
-    return DenseElementsAttr::get(lhs.getType(), elementResult);
-  } else if (operands[0].isa<ElementsAttr>() &&
-             operands[1].isa<ElementsAttr>()) {
-    // Operands are ElementsAttr-derived; perform an element-wise fold by
-    // expanding the values.
-    auto lhs = operands[0].cast<ElementsAttr>();
-    auto rhs = operands[1].cast<ElementsAttr>();
-
-    auto lhsIt = lhs.getValues<ElementValueT>().begin();
-    auto rhsIt = rhs.getValues<ElementValueT>().begin();
-    SmallVector<ElementValueT, 4> elementResults;
-    elementResults.reserve(lhs.getNumElements());
-    for (size_t i = 0, e = lhs.getNumElements(); i < e; ++i, ++lhsIt, ++rhsIt)
-      elementResults.push_back(calculate(*lhsIt, *rhsIt));
-    return DenseElementsAttr::get(lhs.getType(), elementResults);
-  }
-  return {};
-}
-} // namespace mlir
-
-#endif // MLIR_DIALECT_COMMONFOLDERS_H
diff --git a/third_party/mlir/include/mlir/Dialect/FxpMathOps/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/FxpMathOps/CMakeLists.txt
deleted file mode 100644
index a8fb5e08ee5..00000000000
--- a/third_party/mlir/include/mlir/Dialect/FxpMathOps/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_mlir_dialect(FxpMathOps)
diff --git a/third_party/mlir/include/mlir/Dialect/FxpMathOps/FxpMathOps.h b/third_party/mlir/include/mlir/Dialect/FxpMathOps/FxpMathOps.h
deleted file mode 100644
index 88a42344c3b..00000000000
--- a/third_party/mlir/include/mlir/Dialect/FxpMathOps/FxpMathOps.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- FxpMathOps.h - Fixed point ops ---------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_FXPMATHOPS_FXPMATHOPS_H_
-#define MLIR_DIALECT_FXPMATHOPS_FXPMATHOPS_H_
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-
-namespace mlir {
-namespace fxpmath {
-
-/// Defines the 'FxpMathOps' dialect.
-class FxpMathOpsDialect : public Dialect {
-public:
-  FxpMathOpsDialect(MLIRContext *context);
-};
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/FxpMathOps/FxpMathOps.h.inc"
-
-} // namespace fxpmath
-} // namespace mlir
-
-#endif // MLIR_DIALECT_FXPMATHOPS_FXPMATHOPS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/FxpMathOps/FxpMathOps.td b/third_party/mlir/include/mlir/Dialect/FxpMathOps/FxpMathOps.td
deleted file mode 100644
index b1bfb2706cf..00000000000
--- a/third_party/mlir/include/mlir/Dialect/FxpMathOps/FxpMathOps.td
+++ /dev/null
@@ -1,286 +0,0 @@
-//===- FxpMathOps.td - Fixed point ops  --------------------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the operation definition file for fixed point ops (and real
-// equivalents).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef DIALECT_FXPMATHOPS_FXPMATH_OPS_
-#define DIALECT_FXPMATHOPS_FXPMATH_OPS_
-
-include "mlir/IR/OpBase.td"
-include "mlir/Dialect/QuantOps/QuantPredicates.td"
-
-def fxpmath_Dialect : Dialect {
-  let name = "fxpmath";
-}
-
-//===----------------------------------------------------------------------===//
-// Attributes
-//===----------------------------------------------------------------------===//
-
-// Real value for an (inclusive) min/max clamp limit.
-def fxpmath_ClampValueAttr : OptionalAttr<F64Attr>;
-
-// Element-wise activation function to apply.
-// Note that RELU activations are not here: they are expressed as clamps.
-def fxpmath_EwUnaryFnAttr :
-    StringBasedAttr<CPred<"true">, "element-wise unary function"> {
-  let returnType = [{ StringRef }];
-  let defaultValue = "IDENTITY";
-}
-
-class fxpmath_ConstEwUnaryFn<string val> : ConstantAttr<fxpmath_EwUnaryFnAttr, val>;
-def fxpmath_EwUnaryFn_Abs     : fxpmath_ConstEwUnaryFn<"ABS">;
-def fxpmath_EwUnaryFn_Exp     : fxpmath_ConstEwUnaryFn<"EXP">;
-def fxpmath_EwUnaryFn_Identity: fxpmath_ConstEwUnaryFn<"IDENTITY">;
-def fxpmath_EwUnaryFn_Log     : fxpmath_ConstEwUnaryFn<"LOG">;
-def fxpmath_EwUnaryFn_Neg     : fxpmath_ConstEwUnaryFn<"NEG">;
-def fxpmath_EwUnaryFn_Rsqrt   : fxpmath_ConstEwUnaryFn<"RSQRT">;
-def fxpmath_EwUnaryFn_Sigmoid : fxpmath_ConstEwUnaryFn<"SIGMOID">;
-def fxpmath_EwUnaryFn_Sign    : fxpmath_ConstEwUnaryFn<"SIGN">;
-def fxpmath_EwUnaryFn_Sin     : fxpmath_ConstEwUnaryFn<"SIN">;
-def fxpmath_EwUnaryFn_Sqrt    : fxpmath_ConstEwUnaryFn<"SQRT">;
-def fxpmath_EwUnaryFn_Square  : fxpmath_ConstEwUnaryFn<"SQUARE">;
-def fxpmath_EwUnaryFn_Tanh    : fxpmath_ConstEwUnaryFn<"TANH">;
-
-//===----------------------------------------------------------------------===//
-// Comparison functions (compares relative to zero on a subtraction result).
-//===----------------------------------------------------------------------===//
-
-def fxpmath_CompareZ    : StrEnumAttrCase<"CMPZ">;
-def fxpmath_CompareNZ   : StrEnumAttrCase<"CMPNZ">;
-def fxpmath_CompareLZ   : StrEnumAttrCase<"CMPLZ">;
-def fxpmath_CompareLZE  : StrEnumAttrCase<"CMPLZE">;
-def fxpmath_CompareGZ   : StrEnumAttrCase<"CMPGZ">;
-def fxpmath_CompareGZE  : StrEnumAttrCase<"CMPGZE">;
-
-def fxpmath_CompareFnAttr : StrEnumAttr<"ComparisonFn",
-    "Type of subtraction-result comparison to perform.",
-    [
-      fxpmath_CompareZ,
-      fxpmath_CompareNZ,
-      fxpmath_CompareLZ,
-      fxpmath_CompareLZE,
-      fxpmath_CompareGZ,
-      fxpmath_CompareGZE
-    ]>;
-
-//===----------------------------------------------------------------------===//
-// Base classes
-//===----------------------------------------------------------------------===//
-
-class fxpmath_Op<string mnemonic, list<OpTrait> traits> :
-    Op<fxpmath_Dialect, mnemonic, traits>;
-
-//===----------------------------------------------------------------------===//
-// Fixed-point (fxp) arithmetic ops used by kernels.
-// Some of these are temporary pending inclusion into a more core dialect.
-//===----------------------------------------------------------------------===//
-
-def fxpmath_ClampISOp : fxpmath_Op<"clampis", [NoSideEffect, SameOperandsAndResultType]> {
-  let summary =
-      "Clamps a signed-integer like argument to a min/max range.";
-  let description = [{
-    Element-wise equivalent to:
-      r = std::min(clamp_max, std::max(e, clamp_min))
-  }];
-  let arguments = (ins IntegerLike:$operand,
-                       APIntAttr:$clamp_min,
-                       APIntAttr:$clamp_max);
-  let results = (outs IntegerLike);
-}
-
-def fxpmath_ConvertISOp :
-    fxpmath_Op<"convertis",
-               [NoSideEffect, SameOperandsAndResultShape]> {
-  let summary =
-      "Does an element-wise conversion from a signed integer to signed integer";
-  let description = [{
-    Similar to an element-wise static_cast in C++, from a one signed integer
-    element type to another.
-  }];
-  let arguments = (ins IntegerLike:$operand);
-  let results = (outs IntegerLike);
-}
-
-def fxpmath_ConvertISToFOp :
-    fxpmath_Op<"convertistof",
-               [NoSideEffect, SameOperandsAndResultShape]> {
-  let summary =
-      "Does an element-wise conversion from a signed integer to a float";
-  let description = [{
-    Similar to an element-wise static_cast in C++, from a signed integer
-    element type to a floating point element type, rounding to the nearest
-    floating point value.
-  }];
-  let arguments = (ins IntegerLike:$operand);
-  let results = (outs FloatLike);
-}
-
-
-def fxpmath_VecScalarSaturatingRoundingDoublingHighMulISOp :
-    fxpmath_Op<"vs_saturating_rounding_doubling_high_mulis",
-               [NoSideEffect, SameOperandsAndResultType]> {
-  let summary = "Implements equivalent functionality to ARMv7 NEON VQRDMULH";
-  let description = [{
-    Equivalent to the ARMv7 NEON VQRDMULH instruction.
-    See gemmlowp::SaturatingRoundingDoublingHighMul for a reference
-    implementation.
-  }];
-  let arguments = (ins IntegerLike:$a, APIntAttr:$b);
-  let results = (outs IntegerLike);
-}
-
-def fxpmath_RoundingDivideByPotISOp :
-    fxpmath_Op<"rounding_divide_by_potis", [NoSideEffect, SameOperandsAndResultType]> {
-  let summary = [{
-    Computes a rounding arithmetic right shift.
-  }];
-  let description = [{
-    Computes integer division by a power-of-two, correctly rounded-to-nearest.
-    Also known as a rounding arithmetic right shift. See
-    gemmlowp::RoundingDivideByPOT for a reference implementation.
-  }];
-  let arguments = (ins IntegerLike:$operand, APIntAttr:$exponent);
-  let results = (outs IntegerLike:$res);
-  let verifier = [{
-    auto verifyExponent = exponent().getSExtValue();
-    if (verifyExponent < 0 || verifyExponent > 31) {
-      return emitOpError("exponent must be in range [0..31]");
-    }
-    return success();
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// Real math ops.
-//
-// Math ops on real numbers which may have a representation in quantized
-// arithmetic. It is expected that eligible ops are lowered from a source
-// dialect to this set of ops prior to the process of converting a computation
-// to a quantized form. It is a non-goal of these ops to preserve enough
-// information to convert back to the higher level, source dialect.
-//
-// These ops support either real/floating point or QuantizedTypes as operands
-// and results. Since not all transformations are supported (globally or
-// sometimes for specific targets), a computation may end up with
-// untransformable RealMathOps, in which case they need to be lowered as is
-// (using floating point math).
-//
-// This op set takes advantage of the fact that it is typically trivial to
-// combine a math function with a compatible bias addition and real-valued
-// clamp (which can be done at a higher accumulation bit depth).
-//
-// In addition, all element-wise unary functions are collapsed into a single
-// fxpmath_RealUnaryEwOp and selected via an enum-like attribute. Especially at
-// low bit depths, this makes matching simpler and allows the construction of
-// generic LUT-based implementations. It also allows specific lowering rules
-// to consolidate runs of chained unary ops and fuse them to preceding math
-// ops, potentially allowing them to operate directly on higher precision
-// intermediates without resorting to lots of custom kernels for common
-// formulas that can suffer from insufficient precision at low bit depths.
-//
-// Comparison operators are modeled as element-wise unary functions (i.e.
-// CMPZ, CMPNZ, CMPLZ, CMPGZ) intended to follow a sub and output a 1bit
-// quantized value. It is expected that lowering rules can fuse them with
-// the preceding sub.
-//===----------------------------------------------------------------------===//
-
-class fxpmath_RealMathOp<string mnemonic, list<OpTrait> traits = [], dag args> :
-    fxpmath_Op<mnemonic, traits>,
-    Arguments<!con(args, (ins
-        fxpmath_ClampValueAttr:$clamp_min, fxpmath_ClampValueAttr:$clamp_max))>;
-
-//===----------------------------------------------------------------------===//
-// Element wise binary real math ops.
-//===----------------------------------------------------------------------===//
-
-class fxpmath_RealBinaryOp<string mnemonic, list<OpTrait> traits = []> :
-    fxpmath_RealMathOp<mnemonic, traits,
-                     (ins quant_RealValueType:$lhs,
-                      quant_RealValueType:$rhs)>,
-    Results<(outs quant_RealValueType:$res)>;
-
-class fxpmath_RealBinaryBiasOp<string mnemonic, list<OpTrait> traits = []> :
-    fxpmath_RealMathOp<mnemonic, traits,
-                     (ins quant_RealValueType:$lhs, quant_RealValueType:$rhs,
-                          quant_RealValueType:$bias)>,
-    Results<(outs quant_RealValueType:$res)>;
-
-def fxpmath_RealAddEwOp :
-    fxpmath_RealBinaryOp<"real_add_ew", [NoSideEffect]>;
-
-def fxpmath_RealSubEwOp :
-    fxpmath_RealBinaryOp<"real_sub_ew", [NoSideEffect]>;
-
-def fxpmath_RealMulEwOp :
-    fxpmath_RealBinaryOp<"real_mul_ew", [NoSideEffect]>;
-
-def fxpmath_RealDivEwOp :
-    fxpmath_RealBinaryOp<"real_div_ew", [NoSideEffect]>;
-
-//===----------------------------------------------------------------------===//
-// Element wise unary real math op.
-//===----------------------------------------------------------------------===//
-
-def fxpmath_RealUnaryEwOp :
-    fxpmath_RealMathOp<"real_unary_ew", [NoSideEffect],
-        (ins quant_RealValueType:$operand, fxpmath_EwUnaryFnAttr:$fn)>,
-    Results<(outs quant_RealValueType:$res)>;
-
-def fxpmath_RealCompareZeroEwOp : fxpmath_Op<"compare", [NoSideEffect]>,
-    Arguments<(ins quant_RealValueType:$operand, fxpmath_CompareFnAttr:$fn)>,
-    Results<(outs I1Tensor:$res)> {
-  let description = [{
-    Compares a real value to zero, returning an I1 (boolean) tensor with the
-    result of applying the comparison function.
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// Dot op with fused bias addition.
-//===----------------------------------------------------------------------===//
-
-def fxpmath_RealMatMulOp :
-    fxpmath_RealBinaryOp<"real_matmul", [NoSideEffect]> {
-  let summary = "Matmul";
-  let description = [{
-    A matrix multiply of [m, k] and [k, n] -> [m, n] where the bias vector is
-    of shape [n]. Also accepts rank 3 or more input tensors, in which case
-    the leading dimensions are batch dims.
-
-    Many real systems have specific library calls optimized for this precise
-    operation, which is why it is handled explicitly versus purely as a
-    generalized tensor contraction.
-  }];
-}
-
-def fxpmath_RealMatMulBiasOp :
-    fxpmath_RealBinaryBiasOp<"real_matmul_bias", [NoSideEffect]> {
-  let summary = "Matmul with bias";
-  let description = [{
-    A specialization of a RealMatMulOp that also accepts an [n] dimension
-    bias vector.
-
-    In addition, there is often special support for a fused bias and clamp,
-    which is why they are included.
-  }];
-}
-
-#endif  // DIALECT_FXPMATHOPS_FXPMATH_OPS_
diff --git a/third_party/mlir/include/mlir/Dialect/FxpMathOps/Passes.h b/third_party/mlir/include/mlir/Dialect/FxpMathOps/Passes.h
deleted file mode 100644
index 415b1c0b253..00000000000
--- a/third_party/mlir/include/mlir/Dialect/FxpMathOps/Passes.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//===- Passes.h - Fixed point math passes -----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines all of the passes owned by the FxpMathOps dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_FXPMATHOPS_PASSES_H
-#define MLIR_DIALECT_FXPMATHOPS_PASSES_H
-
-namespace mlir {
-class FuncOp;
-template <typename T> class OpPassBase;
-
-namespace fxpmath {
-
-/// Creates a pass that lowers uniform-quantized real math ops to integer
-/// arithmetic. This will leave unrecognized real math ops as-is and is
-/// typically followed by a pass that lowers any unrecognized ops to a pure
-/// floating point form.
-OpPassBase<FuncOp> *createLowerUniformRealMathPass();
-
-/// Creates a pass that lowers uniform-quantized qcast/dcast ops to equivalent
-/// operations that perform quantize/dequantize.
-OpPassBase<FuncOp> *createLowerUniformCastsPass();
-
-} // namespace fxpmath
-} // namespace mlir
-
-#endif // MLIR_DIALECT_FXPMATHOPS_PASSES_H
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/GPU/CMakeLists.txt
deleted file mode 100644
index bdb5dec79b9..00000000000
--- a/third_party/mlir/include/mlir/Dialect/GPU/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_mlir_dialect(GPUOps)
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
deleted file mode 100644
index 8b62c70178b..00000000000
--- a/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//===- GPUDialect.h - MLIR Dialect for GPU Kernels --------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the GPU kernel-related operations and puts them in the
-// corresponding dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_GPU_GPUDIALECT_H
-#define MLIR_DIALECT_GPU_GPUDIALECT_H
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/FunctionSupport.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/SymbolTable.h"
-
-namespace mlir {
-class FuncOp;
-
-namespace gpu {
-
-/// The dialect containing GPU kernel launching operations and related
-/// facilities.
-class GPUDialect : public Dialect {
-public:
-  /// Create the dialect in the given `context`.
-  explicit GPUDialect(MLIRContext *context);
-  /// Get dialect namespace.
-  static StringRef getDialectNamespace() { return "gpu"; }
-
-  /// Get the name of the attribute used to annotate the modules that contain
-  /// kernel modules.
-  static StringRef getContainerModuleAttrName() {
-    return "gpu.container_module";
-  }
-
-  /// Get the canonical string name of the dialect.
-  static StringRef getDialectName();
-
-  /// Get the name of the attribute used to annotate external kernel functions.
-  static StringRef getKernelFuncAttrName() { return "gpu.kernel"; }
-
-  /// Get the name of the attribute used to annotate kernel modules.
-  static StringRef getKernelModuleAttrName() { return "gpu.kernel_module"; }
-
-  /// Returns whether the given function is a kernel function, i.e., has the
-  /// 'gpu.kernel' attribute.
-  static bool isKernel(Operation *op);
-
-  /// Returns the numeric value used to identify the workgroup memory address
-  /// space.
-  static int getWorkgroupAddressSpace() { return 3; }
-
-  LogicalResult verifyOperationAttribute(Operation *op,
-                                         NamedAttribute attr) override;
-};
-
-/// Utility class for the GPU dialect to represent triples of `Value`s
-/// accessible through `.x`, `.y`, and `.z` similarly to CUDA notation.
-struct KernelDim3 {
-  Value *x;
-  Value *y;
-  Value *z;
-};
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/GPU/GPUOps.h.inc"
-
-} // end namespace gpu
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_GPU_GPUDIALECT_H
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td b/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
deleted file mode 100644
index 7ef10808888..00000000000
--- a/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ /dev/null
@@ -1,550 +0,0 @@
-//===-- GPUOps.td - GPU dialect operation definitions ------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines some operations of the GPU dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef GPU_OPS
-#define GPU_OPS
-
-include "mlir/IR/OpBase.td"
-include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
-
-// Type constraint accepting standard integers, indices and wrapped LLVM integer
-// types.
-def IntLikeOrLLVMInt : TypeConstraint<
-  Or<[AnyInteger.predicate, Index.predicate, LLVMInt.predicate]>,
-  "integer, index or LLVM dialect equivalent">;
-
-//===----------------------------------------------------------------------===//
-// GPU Dialect operations.
-//===----------------------------------------------------------------------===//
-
-def GPU_Dialect : Dialect {
-  let name = "gpu";
-}
-
-class GPU_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<GPU_Dialect, mnemonic, traits>;
-
-class GPU_IndexOp<string mnemonic, list<OpTrait> traits = []> :
-    GPU_Op<mnemonic, !listconcat(traits, [NoSideEffect])>,
-    Arguments<(ins StrAttr:$dimension)>, Results<(outs Index)> {
-  let verifier = [{ return ::verifyIndexOp(*this); }];
-}
-
-def GPU_BlockDimOp : GPU_IndexOp<"block_dim">;
-def GPU_BlockIdOp : GPU_IndexOp<"block_id">;
-def GPU_GridDimOp : GPU_IndexOp<"grid_dim">;
-def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">;
-
-def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
-  let summary = "Function executable on a GPU";
-
-  let description = [{
-    Defines a function that can be executed on a GPU. This supports memory
-    attribution and its body has a particular execution model.
-
-    GPU functions are either kernels (as indicated by the `kernel` attribute) or
-    regular functions. The former can be launched from the host side, while the
-    latter are device side only.
-
-    The memory attribution defines SSA values that correspond to memory buffers
-    allocated in the memory hierarchy of the GPU (see below).
-
-    The operation has one attached region that corresponds to the body of the
-    function. The region arguments consist of the function arguments without
-    modification, followed by buffers defined in memory annotations. The body of
-    a GPU function, when launched, is executed by multiple work items. There are
-    no guarantees on the order in which work items execute, or on the connection
-    between them. In particular, work items are not necessarily executed in
-    lock-step. Synchronization ops such as "gpu.barrier" should be used to
-    coordinate work items. Declarations of GPU functions, i.e. not having the
-    body region, are not supported.
-
-    Syntax:
-
-    ```
-    op ::= `gpu.func` symbol-ref-id `(` argument-list `)` (`->`
-    function-result-list)?
-           memory-attribution `kernel`? function-attributes? region
-
-    memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
-                           (`private` `(` ssa-id-and-type-list `)`)?
-    ```
-
-    Example:
-
-    ```mlir
-    gpu.func @foo(%arg0: index)
-        workgroup(%workgroup: memref<32xf32, 3>)
-        private(%private: memref<1xf32, 5>)
-        kernel
-        attributes {qux: "quux"} {
-      gpu.return
-    }
-    ```
-
-    The generic form illustrates the concept
-
-    ```mlir
-    "gpu.func"(%arg: index) {sym_name: "foo", kernel, qux: "quux"} ({
-    ^bb0(%arg0: index, %workgroup: memref<32xf32, 3>,
-         %private: memref<1xf32, 5>):
-      "gpu.return"() : () -> ()
-    }) : (index) -> ()
-    ```
-
-    Note the non-default memory spaces used in memref types in memory
-    attribution.
-  }];
-
-  let regions = (region AnyRegion:$body);
-
-  let skipDefaultBuilders = 1;
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, StringRef name, "
-              "FunctionType type, ArrayRef<Type> workgroupAttributions, "
-              "ArrayRef<Type> privateAttributions, "
-              "ArrayRef<NamedAttribute> attrs">
-  ];
-
-  let extraClassDeclaration = [{
-    /// Returns `true` if the GPU function defined by this Op is a kernel, i.e.
-    /// it is intended to be launched from host.
-    bool isKernel() {
-      return getAttrOfType<UnitAttr>(GPUDialect::getKernelFuncAttrName()) !=
-             nullptr;
-    }
-
-    /// Returns the type of the function this Op defines.
-    FunctionType getType() {
-      return getTypeAttr().getValue().cast<FunctionType>();
-    }
-
-    /// Returns the number of buffers located in the workgroup memory.
-    unsigned getNumWorkgroupAttributions() {
-      return getAttrOfType<IntegerAttr>(getNumWorkgroupAttributionsAttrName())
-          .getInt();
-    }
-
-    /// Returns a list of block arguments that correspond to buffers located in
-    /// the workgroup memory
-    ArrayRef<BlockArgument *> getWorkgroupAttributions() {
-      auto begin =
-          std::next(getBody().front().args_begin(), getType().getNumInputs());
-      auto end = std::next(begin, getNumWorkgroupAttributions());
-      return {begin, end};
-    }
-
-    /// Returns a list of block arguments that correspond to buffers located in
-    /// the private memory.
-    ArrayRef<BlockArgument *> getPrivateAttributions() {
-      auto begin =
-          std::next(getBody().front().args_begin(),
-                    getType().getNumInputs() + getNumWorkgroupAttributions());
-      return {begin, getBody().front().args_end()};
-    }
-
-    /// Returns the name of the attribute containing the number of buffers
-    /// located in the workgroup memory.
-    static StringRef getNumWorkgroupAttributionsAttrName() {
-      return "workgroup_attributions";
-    }
-
-    // FunctionLike trait needs access to the functions below.
-    friend class OpTrait::FunctionLike<GPUFuncOp>;
-
-    /// Hooks for the input/output type enumeration in FunctionLike .
-    unsigned getNumFuncArguments() { return getType().getNumInputs(); }
-    unsigned getNumFuncResults() { return getType().getNumResults(); }
-
-    /// Returns the keywords used in the custom syntax for this Op.
-    static StringRef getWorkgroupKeyword() { return "workgroup"; }
-    static StringRef getPrivateKeyword() { return "private"; }
-    static StringRef getKernelKeyword() { return "kernel"; }
-
-    /// Hook for FunctionLike verifier.
-    LogicalResult verifyType();
-
-    /// Verifies the body of the function.
-    LogicalResult verifyBody();
-  }];
-
-  // let verifier = [{ return ::verifFuncOpy(*this); }];
-  let printer = [{ printGPUFuncOp(p, *this); }];
-  let parser = [{ return parseGPUFuncOp(parser, result); }];
-}
-
-def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
-    Arguments<(ins IntLikeOrLLVMInt:$gridSizeX, IntLikeOrLLVMInt:$gridSizeY,
-               IntLikeOrLLVMInt:$gridSizeZ, IntLikeOrLLVMInt:$blockSizeX,
-               IntLikeOrLLVMInt:$blockSizeY, IntLikeOrLLVMInt:$blockSizeZ,
-               Variadic<AnyType>:$operands)>,
-    Results<(outs)> {
-  let summary = "Launches a function as a GPU kerneel";
-
-  let description = [{
-    Launch a kernel function on the specified grid of thread blocks.
-    `gpu.launch` operations are lowered to `gpu.launch_func` operations by
-    outlining the kernel body into a function in a dedicated module, which
-    reflects the separate compilation process. The kernel function is required
-    to have the `gpu.kernel` attribute. The module containing the kernel
-    function is required to have the `gpu.kernel_module` attribute and must be
-    named. And finally, the module containing the kernel module (which thus
-    cannot be the top-level module) is required to have the
-    `gpu.container_module` attribute. The `gpu.launch_func` operation has a
-    string attribute named `kernel` to specify the name of the kernel function
-    to launch and an attribute named `kernel_module` to specify the name of the
-    module containing that kernel function.
-
-    The operation takes at least six operands, with the first three operands
-    being grid sizes along x,y,z dimensions and the following three being block
-    sizes along x,y,z dimensions. When a lower-dimensional kernel is required,
-    unused sizes must be explicitly set to `1`. The remaining operands are
-    passed as arguments to the kernel function.
-
-    A custom syntax for this operation is currently not available.
-
-    Example:
-
-    ```mlir
-    module attributes {gpu.container_module} {
-
-      // This module creates a separate compilation unit for the GPU compiler.
-      module @kernels attributes {gpu.kernel_module} {
-        func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
-            attributes { nvvm.kernel = true } {
-
-          // Operations that produce block/thread IDs and dimensions are
-          // injected when outlining the `gpu.launch` body to a function called
-          // by `gpu.launch_func`.
-          %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
-          %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
-          %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
-
-          %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
-          %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
-          %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
-
-          %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
-          %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
-          %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
-
-          %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
-          %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
-          %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
-
-          "some_op"(%bx, %tx) : (index, index) -> ()
-          %42 = load %arg1[%bx] : memref<?xf32, 1>
-        }
-      }
-
-      "gpu.launch_func"(%cst, %cst, %cst,  // Grid sizes.
-                        %cst, %cst, %cst,  // Block sizes.
-                        %arg0, %arg1)      // Arguments passed to the kernel.
-            { kernel_module = @kernels,    // Module containing the kernel.
-              kernel = "kernel_1" }        // Kernel function.
-            : (index, index, index, index, index, index, f32, !llvm<"float*">)
-              -> ()
-    }
-    ```
-  }];
-
-  let skipDefaultBuilders = 1;
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
-              "Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
-              "Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
-              "ValueRange kernelOperands">,
-    OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
-              "KernelDim3 gridSize, KernelDim3 blockSize, "
-              "ValueRange kernelOperands">
-  ];
-
-  let extraClassDeclaration = [{
-    /// The kernel function specified by the operation's `kernel` attribute.
-    StringRef kernel();
-
-    /// The number of operands passed to the kernel function.
-    unsigned getNumKernelOperands();
-
-    /// The name of the kernel module specified by the operation's
-    /// `kernel_module` attribute.
-    StringRef getKernelModuleName();
-
-    /// The i-th operand passed to the kernel function.
-    Value *getKernelOperand(unsigned i);
-
-    /// Get the SSA values passed as operands to specify the grid size.
-    KernelDim3 getGridSizeOperandValues();
-
-    /// Get the SSA values passed as operands to specify the block size.
-    KernelDim3 getBlockSizeOperandValues();
-
-    /// The number of launch configuration operands, placed at the leading
-    /// positions of the operand list.
-    static constexpr unsigned kNumConfigOperands = 6;
-
-    // This needs to quietly verify if attributes with names defined below are
-    // present since it is run before the verifier of this op.
-    friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
-                                                              NamedAttribute);
-
-    /// The name of the symbolRef attribute specifying the kernel to launch.
-    static StringRef getKernelAttrName() { return "kernel"; }
-
-    /// The name of the symbolRef attribute specifying the name of the module
-    /// containing the kernel to launch.
-    static StringRef getKernelModuleAttrName() { return "kernel_module"; }
-  }];
-
-  let verifier = [{ return ::verify(*this); }];
-}
-
-def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
-    Arguments<(ins Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
-               Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
-               Variadic<AnyType>:$operands)>,
-    Results<(outs)> {
-  let summary = "GPU kernel launch operation";
-
-  let description = [{
-    Launch a kernel on the specified grid of thread blocks. The body of the
-    kernel is defined by the single region that this operation contains. The
-    operation takes at least six operands, with first three operands being grid
-    sizes along x,y,z dimensions, the following three arguments being block
-    sizes along x,y,z dimension, and the remaining operands are arguments of the
-    kernel. When a lower-dimensional kernel is required, unused sizes must be
-    explicitly set to `1`.
-
-    The body region has at least _twelve_ arguments, grouped as follows:
-
-    -   three arguments that contain block identifiers along x,y,z dimensions;
-    -   three arguments that contain thread identifiers along x,y,z dimensions;
-    -   operands of the `gpu.launch` operation as is, including six leading
-        operands for grid and block sizes.
-
-    Operations inside the body region, and any operations in the nested regions,
-    are _not_ allowed to use values defined outside the _body_ region, as if
-    this region was a function. If necessary, values must be passed as kernel
-    arguments into the body region. Nested regions inside the kernel body are
-    allowed to use values defined in their ancestor regions as long as they
-    don't cross the kernel body region boundary.
-
-    Syntax:
-
-    ```
-    operation ::= `gpu.launch` `block` `(` ssa-id-list `)` `in` ssa-reassignment
-                             `threads` `(` ssa-id-list `)` `in` ssa-reassignment
-                               (`args` ssa-reassignment `:` type-list)?
-                               region attr-dict?
-    ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
-    ```
-
-    Example:
-
-    ```mlir
-    gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
-               threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
-               args(%arg0 = %6, %arg1 = 7) : f32, memref<?xf32, 1> {
-      // Block and thread identifiers, as well as block/grid sizes are
-      // immediately usable inside body region.
-      "some_op"(%bx, %tx) : (index, index) -> ()
-      %42 = load %arg1[%bx] : memref<?xf32, 1>
-    }
-
-    // Generic syntax explains how the pretty syntax maps to the IR structure.
-    "gpu.launch"(%cst, %cst, %c1,  // Grid sizes.
-                        %cst, %c1, %c1,   // Block sizes.
-                        %arg0, %arg1)     // Actual arguments.
-        {/*attributes*/}
-        // All sizes and identifiers have "index" size.
-        : (index, index, index, index, index, index, f32, memref<?xf32, 1>)
-            -> () {
-    // The operation passes block and thread identifiers, followed by grid and
-    // block sizes, followed by actual arguments to the entry block of the
-    // region.
-    ^bb0(%bx : index, %by : index, %bz : index,
-         %tx : index, %ty : index, %tz : index,
-         %num_bx : index, %num_by : index, %num_bz : index,
-         %num_tx : index, %num_ty : index, %num_tz : index,
-         %arg0 : f32, %arg1 : memref<?xf32, 1>):
-      "some_op"(%bx, %tx) : (index, index) -> ()
-      %3 = "std.load"(%arg1, %bx) : (memref<?xf32, 1>, index) -> f32
-    }
-    ```
-
-    Rationale: using operation/block arguments gives analyses a clear way of
-    understanding that a value has additional semantics (e.g., we will need to
-    know what value corresponds to threadIdx.x for coalescing). We can recover
-    these properties by analyzing the operations producing values, but it is
-    easier just to have that information by construction.
-  }];
-
-  let regions = (region AnyRegion:$body);
-
-  let skipDefaultBuilders = 1;
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, Value *gridSizeX,"
-              "Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,"
-              "Value *blockSizeY, Value *blockSizeZ,"
-              "ValueRange operands">
-  ];
-
-  let hasCanonicalizer = 1;
-
-  let extraClassDeclaration = [{
-    /// Get the SSA values corresponding to kernel block identifiers.
-    KernelDim3 getBlockIds();
-    /// Get the SSA values corresponding to kernel thread identifiers.
-    KernelDim3 getThreadIds();
-    /// Get the SSA values corresponding to kernel grid size.
-    KernelDim3 getGridSize();
-    /// Get the SSA values corresponding to kernel block size.
-    KernelDim3 getBlockSize();
-    /// Get the operand values passed as kernel arguments.
-    operand_range getKernelOperandValues();
-    /// Get the operand types passed as kernel arguments.
-    operand_type_range getKernelOperandTypes();
-
-    /// Get the SSA values passed as operands to specify the grid size.
-    KernelDim3 getGridSizeOperandValues();
-    /// Get the SSA values passed as operands to specify the block size.
-    KernelDim3 getBlockSizeOperandValues();
-
-    /// Get the SSA values of the kernel arguments.
-    llvm::iterator_range<Block::args_iterator> getKernelArguments();
-
-    /// Erase the `index`-th kernel argument.  Both the entry block argument and
-    /// the operand will be dropped.  The block argument must not have any uses.
-    void eraseKernelArgument(unsigned index);
-
-    static StringRef getBlocksKeyword() { return "blocks"; }
-    static StringRef getThreadsKeyword() { return "threads"; }
-    static StringRef getArgsKeyword() { return "args"; }
-
-    /// The number of launch configuration operands, placed at the leading
-    /// positions of the operand list.
-    static constexpr unsigned kNumConfigOperands = 6;
-
-    /// The number of region attributes containing the launch configuration,
-    /// placed in the leading positions of the argument list.
-    static constexpr unsigned kNumConfigRegionAttributes = 12;
-  }];
-
-  let parser = [{ return parseLaunchOp(parser, result); }];
-  let printer = [{ printLaunchOp(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
-    Results<(outs)> {
-  let summary = "Terminator for GPU launch regions.";
-  let description = [{
-    A terminator operation for regions that appear in the body of `gpu.launch`
-    operation.  These regions are not expected to return any value so the
-    terminator takes no operands.
-  }];
-
-  let parser = [{ return success(); }];
-  let printer = [{ p << getOperationName(); }];
-}
-
-def GPU_YieldOp : GPU_Op<"yield", [Terminator]>,
-    Arguments<(ins Variadic<AnyType>:$values)> {
-  let summary = "GPU yield operation";
-  let description = [{
-    "gpu.yield" is a special terminator operation for blocks inside regions
-    in gpu ops. It returns values to the immediately enclosing gpu op.
-
-    Example:
-
-       ```gpu.yield %f0, %f1 : f32, f32
-       ```
-  }];
-}
-
-// These mirror the XLA ComparisonDirection enum.
-def GPU_AllReduceOpAdd : StrEnumAttrCase<"add">;
-def GPU_AllReduceOpMul : StrEnumAttrCase<"mul">;
-
-def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr",
-    "built-in reduction operations supported by gpu.allreduce.",
-    [
-      GPU_AllReduceOpAdd,
-      GPU_AllReduceOpMul,
-    ]>;
-
-def GPU_AllReduceOp : GPU_Op<"all_reduce",
-    [SameOperandsAndResultType, IsolatedFromAbove]>,
-    Arguments<(ins AnyType:$value,
-               OptionalAttr<GPU_AllReduceOperationAttr>:$op)>,
-    Results<(outs AnyType)> {
-  let summary = "Reduce values among workgroup.";
-  let description = [{
-    The "all_reduce" op reduces the value of every work item across a local
-    workgroup. The result is equal for all work items of a workgroup.
-
-    For example, both
-    ```
-      %1 = "gpu.all_reduce"(%0) ({}) { op = "add" } : (f32) -> (f32)
-      %2 = "gpu.all_reduce"(%0) ({
-      ^bb(%lhs : f32, %rhs : f32):
-        %sum = addf %lhs, %rhs : f32
-        "gpu.yield"(%sum) : (f32) -> ()
-      }) : (f32) -> (f32)
-    ```
-    compute the sum of each work item's %0 value. The first version specifies
-    the accumulation as operation, whereas the second version specifies the
-    accumulation as code region. The accumulation operation must either be
-    `add` or `mul`.
-
-    Either none or all work items of a workgroup need to execute this op
-    in convergence.
-  }];
-  let regions = (region AnyRegion:$body);
-  let verifier = [{ return ::verifyAllReduce(*this); }];
-}
-
-def GPU_BarrierOp : GPU_Op<"barrier"> {
-  let summary = "Synchronizes all work items of a workgroup.";
-  let description = [{
-    The "barrier" op synchronizes all work items of a workgroup. It is used
-    to coordinate communication between the work items of the workgroup.
-
-    ```
-      gpu.barrier
-    ```
-    waits until all work items in the workgroup have reached this point
-    and all memory accesses made by these work items prior to the op are
-    visible to all work items in the workgroup. Data hazards between work items
-    accessing the same memory can be avoided by synchronizing work items
-    in-between these accesses.
-
-    Either none or all work items of a workgroup need to execute this op
-    in convergence.
-  }];
-  let parser = [{ return success(); }];
-  let printer = [{ p << getOperationName(); }];
-}
-
-#endif // GPU_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/Passes.h b/third_party/mlir/include/mlir/Dialect/GPU/Passes.h
deleted file mode 100644
index 7c8ce02db90..00000000000
--- a/third_party/mlir/include/mlir/Dialect/GPU/Passes.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes that expose pass constructors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_GPU_PASSES_H_
-#define MLIR_DIALECT_GPU_PASSES_H_
-
-#include <memory>
-
-namespace mlir {
-
-class ModuleOp;
-template <typename T> class OpPassBase;
-
-std::unique_ptr<OpPassBase<ModuleOp>> createGpuKernelOutliningPass();
-
-} // namespace mlir
-
-#endif // MLIR_DIALECT_GPU_PASSES_H_
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
deleted file mode 100644
index 4ecc71aef08..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS LLVMOps.td)
-mlir_tablegen(LLVMOps.h.inc -gen-op-decls)
-mlir_tablegen(LLVMOps.cpp.inc -gen-op-defs)
-mlir_tablegen(LLVMOpsEnums.h.inc -gen-enum-decls)
-mlir_tablegen(LLVMOpsEnums.cpp.inc -gen-enum-defs)
-add_public_tablegen_target(MLIRLLVMOpsIncGen)
-
-add_mlir_dialect(NVVMOps)
-add_mlir_dialect(ROCDLOps)
-
-set(LLVM_TARGET_DEFINITIONS LLVMOps.td)
-mlir_tablegen(LLVMConversions.inc -gen-llvmir-conversions)
-add_public_tablegen_target(MLIRLLVMConversionsIncGen)
-set(LLVM_TARGET_DEFINITIONS NVVMOps.td)
-mlir_tablegen(NVVMConversions.inc -gen-llvmir-conversions)
-add_public_tablegen_target(MLIRNVVMConversionsIncGen)
-set(LLVM_TARGET_DEFINITIONS ROCDLOps.td)
-mlir_tablegen(ROCDLConversions.inc -gen-llvmir-conversions)
-add_public_tablegen_target(MLIRROCDLConversionsIncGen)
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
deleted file mode 100644
index 5332a7479ad..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ /dev/null
@@ -1,208 +0,0 @@
-//===- LLVMDialect.h - MLIR LLVM IR dialect ---------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the LLVM IR dialect in MLIR, containing LLVM operations and
-// LLVM type system.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LLVMIR_LLVMDIALECT_H_
-#define MLIR_DIALECT_LLVMIR_LLVMDIALECT_H_
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/TypeSupport.h"
-#include "mlir/IR/Types.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-
-#include "mlir/Dialect/LLVMIR/LLVMOpsEnums.h.inc"
-
-namespace llvm {
-class Type;
-class LLVMContext;
-} // end namespace llvm
-
-namespace mlir {
-namespace LLVM {
-class LLVMDialect;
-
-namespace detail {
-struct LLVMTypeStorage;
-struct LLVMDialectImpl;
-} // namespace detail
-
-class LLVMType : public mlir::Type::TypeBase<LLVMType, mlir::Type,
-                                             detail::LLVMTypeStorage> {
-public:
-  enum Kind {
-    LLVM_TYPE = FIRST_LLVM_TYPE,
-  };
-
-  using Base::Base;
-
-  static bool kindof(unsigned kind) { return kind == LLVM_TYPE; }
-
-  LLVMDialect &getDialect();
-  llvm::Type *getUnderlyingType() const;
-
-  /// Utilities to identify types.
-  bool isFloatTy() { return getUnderlyingType()->isFloatTy(); }
-  bool isDoubleTy() { return getUnderlyingType()->isDoubleTy(); }
-  bool isIntegerTy() { return getUnderlyingType()->isIntegerTy(); }
-  bool isIntegerTy(unsigned bitwidth) {
-    return getUnderlyingType()->isIntegerTy(bitwidth);
-  }
-
-  /// Array type utilities.
-  LLVMType getArrayElementType();
-  unsigned getArrayNumElements();
-  bool isArrayTy();
-
-  /// Vector type utilities.
-  LLVMType getVectorElementType();
-  bool isVectorTy();
-
-  /// Function type utilities.
-  LLVMType getFunctionParamType(unsigned argIdx);
-  unsigned getFunctionNumParams();
-  LLVMType getFunctionResultType();
-  bool isFunctionTy();
-
-  /// Pointer type utilities.
-  LLVMType getPointerTo(unsigned addrSpace = 0);
-  LLVMType getPointerElementTy();
-  bool isPointerTy();
-
-  /// Struct type utilities.
-  LLVMType getStructElementType(unsigned i);
-  unsigned getStructNumElements();
-  bool isStructTy();
-
-  /// Utilities used to generate floating point types.
-  static LLVMType getDoubleTy(LLVMDialect *dialect);
-  static LLVMType getFloatTy(LLVMDialect *dialect);
-  static LLVMType getHalfTy(LLVMDialect *dialect);
-  static LLVMType getFP128Ty(LLVMDialect *dialect);
-  static LLVMType getX86_FP80Ty(LLVMDialect *dialect);
-
-  /// Utilities used to generate integer types.
-  static LLVMType getIntNTy(LLVMDialect *dialect, unsigned numBits);
-  static LLVMType getInt1Ty(LLVMDialect *dialect) {
-    return getIntNTy(dialect, /*numBits=*/1);
-  }
-  static LLVMType getInt8Ty(LLVMDialect *dialect) {
-    return getIntNTy(dialect, /*numBits=*/8);
-  }
-  static LLVMType getInt8PtrTy(LLVMDialect *dialect) {
-    return getInt8Ty(dialect).getPointerTo();
-  }
-  static LLVMType getInt16Ty(LLVMDialect *dialect) {
-    return getIntNTy(dialect, /*numBits=*/16);
-  }
-  static LLVMType getInt32Ty(LLVMDialect *dialect) {
-    return getIntNTy(dialect, /*numBits=*/32);
-  }
-  static LLVMType getInt64Ty(LLVMDialect *dialect) {
-    return getIntNTy(dialect, /*numBits=*/64);
-  }
-
-  /// Utilities used to generate other miscellaneous types.
-  static LLVMType getArrayTy(LLVMType elementType, uint64_t numElements);
-  static LLVMType getFunctionTy(LLVMType result, ArrayRef<LLVMType> params,
-                                bool isVarArg);
-  static LLVMType getFunctionTy(LLVMType result, bool isVarArg) {
-    return getFunctionTy(result, llvm::None, isVarArg);
-  }
-  static LLVMType getStructTy(LLVMDialect *dialect, ArrayRef<LLVMType> elements,
-                              bool isPacked = false);
-  static LLVMType getStructTy(LLVMDialect *dialect, bool isPacked = false) {
-    return getStructTy(dialect, llvm::None, isPacked);
-  }
-  template <typename... Args>
-  static typename std::enable_if<llvm::are_base_of<LLVMType, Args...>::value,
-                                 LLVMType>::type
-  getStructTy(LLVMType elt1, Args... elts) {
-    SmallVector<LLVMType, 8> fields({elt1, elts...});
-    return getStructTy(&elt1.getDialect(), fields);
-  }
-  static LLVMType getVectorTy(LLVMType elementType, unsigned numElements);
-  static LLVMType getVoidTy(LLVMDialect *dialect);
-
-private:
-  friend LLVMDialect;
-
-  /// Get an LLVMType with a pre-existing llvm type.
-  static LLVMType get(MLIRContext *context, llvm::Type *llvmType);
-
-  /// Get an LLVMType with an llvm type that may cause changes to the underlying
-  /// llvm context when constructed.
-  static LLVMType getLocked(LLVMDialect *dialect,
-                            llvm::function_ref<llvm::Type *()> typeBuilder);
-};
-
-///// Ops /////
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LLVMIR/LLVMOps.h.inc"
-
-class LLVMDialect : public Dialect {
-public:
-  explicit LLVMDialect(MLIRContext *context);
-  ~LLVMDialect();
-  static StringRef getDialectNamespace() { return "llvm"; }
-
-  llvm::LLVMContext &getLLVMContext();
-  llvm::Module &getLLVMModule();
-
-  /// Parse a type registered to this dialect.
-  Type parseType(DialectAsmParser &parser) const override;
-
-  /// Print a type registered to this dialect.
-  void printType(Type type, DialectAsmPrinter &os) const override;
-
-  /// Verify a region argument attribute registered to this dialect.
-  /// Returns failure if the verification failed, success otherwise.
-  LogicalResult verifyRegionArgAttribute(Operation *op, unsigned regionIdx,
-                                         unsigned argIdx,
-                                         NamedAttribute argAttr) override;
-
-private:
-  friend LLVMType;
-
-  std::unique_ptr<detail::LLVMDialectImpl> impl;
-};
-
-/// Create an LLVM global containing the string "value" at the module containing
-/// surrounding the insertion point of builder. Obtain the address of that
-/// global and use it to compute the address of the first character in the
-/// string (operations inserted at the builder insertion point).
-Value *createGlobalString(Location loc, OpBuilder &builder, StringRef name,
-                          StringRef value, LLVM::Linkage linkage,
-                          LLVM::LLVMDialect *llvmDialect);
-
-/// LLVM requires some operations to be inside of a Module operation. This
-/// function confirms that the Operation has the desired properties.
-bool satisfiesLLVMModule(Operation *op);
-
-} // end namespace LLVM
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_LLVMIR_LLVMDIALECT_H_
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
deleted file mode 100644
index 6257b4a51d9..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
+++ /dev/null
@@ -1,61 +0,0 @@
-//===-- LLVMOpBase.td - LLVM IR dialect shared definitions -*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains shared definitions for the LLVM IR dialect and its
-// subdialects.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVMIR_OP_BASE
-#define LLVMIR_OP_BASE
-
-include "mlir/IR/OpBase.td"
-
-def LLVM_Dialect : Dialect {
-  let name = "llvm";
-  let cppNamespace = "LLVM";
-}
-
-// LLVM IR type wrapped in MLIR.
-def LLVM_Type : Type<CPred<"$_self.isa<::mlir::LLVM::LLVMType>()">,
-                     "LLVM dialect type">;
-
-// Type constraint accepting only wrapped LLVM integer types.
-def LLVMInt : TypeConstraint<
-  And<[LLVM_Type.predicate,
-       CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>,
-  "LLVM dialect integer">;
-
-// Base class for LLVM operations. Defines the interface to the llvm::IRBuilder
-// used to translate to LLVM IR proper.
-class LLVM_OpBase<Dialect dialect, string mnemonic, list<OpTrait> traits = []> :
-    Op<dialect, mnemonic, traits> {
-  // A pattern for constructing the LLVM IR Instruction (or other Value) that
-  // corresponds to this op.  This pattern can use `builder` to refer to an
-  // `llvm::IRBuilder<>` instance, $-names of arguments and results and the
-  // following special variable names:
-  //   - $_resultType - substituted with the LLVM IR type of the result;
-  //   - $_numOperands - substituted with the number of operands (including
-  //                     the variadic ones);
-  //   - $_hasResult - substituted with a check that a variadic-result op does
-  //                   have a result (LLVM ops can have 0 or 1 result);
-  //   - $_location - mlir::Location object of the instruction.
-  // Additionally, `$$` can be used to produce the dollar character.
-  string llvmBuilder = "";
-}
-
-#endif  // LLVMIR_OP_BASE
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
deleted file mode 100644
index a7119147fc5..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ /dev/null
@@ -1,721 +0,0 @@
-//===-- LLVMOps.td - LLVM IR dialect op definition file ----*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the LLVM IR operation definition file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVMIR_OPS
-#define LLVMIR_OPS
-
-include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
-
-// Base class for LLVM operations.  All operations get an "llvm." prefix in
-// their name automatically.  LLVM operations have either zero or one result,
-// this class is specialized below for both cases and should not be used
-// directly.
-class LLVM_Op<string mnemonic, list<OpTrait> traits = []> :
-    LLVM_OpBase<LLVM_Dialect, mnemonic, traits> {
-}
-
-class LLVM_Builder<string builder> {
-  string llvmBuilder = builder;
-}
-
-def LLVM_OneResultOpBuilder : OpBuilder<
-  "Builder *, OperationState &result, Type resultType, "
-  "ValueRange operands, ArrayRef<NamedAttribute> attributes = {}",
-  [{
-    if (resultType) result.addTypes(resultType);
-    result.addOperands(operands);
-    for (auto namedAttr : attributes) {
-      result.addAttribute(namedAttr.first, namedAttr.second);
-    }
-  }]>;
-
-def LLVM_ZeroResultOpBuilder : OpBuilder<
-  "Builder *, OperationState &result, ValueRange operands, "
-  "ArrayRef<NamedAttribute> attributes = {}",
-  [{
-    result.addOperands(operands);
-    for (auto namedAttr : attributes) {
-      result.addAttribute(namedAttr.first, namedAttr.second);
-    }
-  }]>;
-
-class LLVM_TwoBuilders<OpBuilder b1, OpBuilder b2> {
-  list<OpBuilder> builders = [b1, b2];
-}
-
-// Base class for LLVM operations with one result.
-class LLVM_OneResultOp<string mnemonic, list<OpTrait> traits = []> :
-    LLVM_Op<mnemonic, traits>, Results<(outs LLVM_Type:$res)> {
-  let builders = [LLVM_OneResultOpBuilder];
-}
-
-// Compatibility builder that takes an instance of wrapped llvm::VoidType
-// to indicate no result.
-def LLVM_VoidResultTypeOpBuilder : OpBuilder<
-  "Builder *builder, OperationState &result, Type resultType, "
-  "ValueRange operands, ArrayRef<NamedAttribute> attributes = {}",
-  [{
-    auto llvmType = resultType.dyn_cast<LLVM::LLVMType>(); (void)llvmType;
-    assert(llvmType && "result must be an LLVM type");
-    assert(llvmType.getUnderlyingType() &&
-            llvmType.getUnderlyingType()->isVoidTy() &&
-            "for zero-result operands, only 'void' is accepted as result type");
-    build(builder, result, operands, attributes);
-  }]>;
-
-// Base class for LLVM operations with zero results.
-class LLVM_ZeroResultOp<string mnemonic, list<OpTrait> traits = []> :
-    LLVM_Op<mnemonic, traits>, Results<(outs)>,
-    LLVM_TwoBuilders<LLVM_VoidResultTypeOpBuilder, LLVM_ZeroResultOpBuilder>;
-
-// Base class for LLVM terminator operations.  All terminator operations have
-// zero results and an optional list of successors.
-class LLVM_TerminatorOp<string mnemonic, list<OpTrait> traits = []> :
-    LLVM_Op<mnemonic, !listconcat(traits, [Terminator])>,
-    Arguments<(ins Variadic<LLVM_Type>:$args)>, Results<(outs)> {
-  let builders = [
-    OpBuilder<
-      "Builder *, OperationState &result, "
-      "ValueRange properOperands, "
-      "ArrayRef<Block *> destinations, "
-      "ArrayRef<ValueRange> operands, "
-      "ArrayRef<NamedAttribute> attributes = {}",
-      [{
-        result.addOperands(properOperands);
-        for (auto kvp : llvm::zip(destinations, operands)) {
-          result.addSuccessor(std::get<0>(kvp), std::get<1>(kvp));
-        }
-        for (auto namedAttr : attributes) {
-          result.addAttribute(namedAttr.first, namedAttr.second);
-        }
-      }]
-    >,
-    OpBuilder<
-      "Builder *builder, OperationState &result, "
-      "ValueRange properOperands, "
-      "ArrayRef<Block *> destinations, "
-      "ArrayRef<NamedAttribute> attributes = {}",
-    [{
-        SmallVector<ValueRange, 2> operands(destinations.size(), {});
-        build(builder, result, properOperands,
-            destinations, operands, attributes);
-      }]
-    >,
-  ];
-}
-
-// Class for arithmetic binary operations.
-class LLVM_ArithmeticOp<string mnemonic, string builderFunc,
-                        list<OpTrait> traits = []> :
-    LLVM_OneResultOp<mnemonic,
-           !listconcat([NoSideEffect, SameOperandsAndResultType], traits)>,
-    Arguments<(ins LLVM_Type:$lhs, LLVM_Type:$rhs)>,
-    LLVM_Builder<"$res = builder." # builderFunc # "($lhs, $rhs);"> {
-  let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }];
-  let printer = [{ mlir::impl::printOneResultOp(this->getOperation(), p); }];
-}
-class LLVM_UnaryArithmeticOp<string mnemonic, string builderFunc,
-                        list<OpTrait> traits = []> :
-    LLVM_OneResultOp<mnemonic,
-           !listconcat([NoSideEffect, SameOperandsAndResultType], traits)>,
-    Arguments<(ins LLVM_Type:$operand)>,
-    LLVM_Builder<"$res = builder." # builderFunc # "($operand);"> {
-  let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }];
-  let printer = [{ mlir::impl::printOneResultOp(this->getOperation(), p); }];
-}
-
-// Integer binary operations.
-def LLVM_AddOp : LLVM_ArithmeticOp<"add", "CreateAdd", [Commutative]>;
-def LLVM_SubOp : LLVM_ArithmeticOp<"sub", "CreateSub">;
-def LLVM_MulOp : LLVM_ArithmeticOp<"mul", "CreateMul", [Commutative]>;
-def LLVM_UDivOp : LLVM_ArithmeticOp<"udiv", "CreateUDiv">;
-def LLVM_SDivOp : LLVM_ArithmeticOp<"sdiv", "CreateSDiv">;
-def LLVM_URemOp : LLVM_ArithmeticOp<"urem", "CreateURem">;
-def LLVM_SRemOp : LLVM_ArithmeticOp<"srem", "CreateSRem">;
-def LLVM_AndOp : LLVM_ArithmeticOp<"and", "CreateAnd">;
-def LLVM_OrOp : LLVM_ArithmeticOp<"or", "CreateOr">;
-def LLVM_XOrOp : LLVM_ArithmeticOp<"xor", "CreateXor">;
-def LLVM_ShlOp : LLVM_ArithmeticOp<"shl", "CreateShl">;
-def LLVM_LShrOp : LLVM_ArithmeticOp<"lshr", "CreateLShr">;
-def LLVM_AShrOp : LLVM_ArithmeticOp<"ashr", "CreateAShr">;
-
-// Predicate for integer comparisons.
-def ICmpPredicateEQ  : I64EnumAttrCase<"eq", 0>;
-def ICmpPredicateNE  : I64EnumAttrCase<"ne", 1>;
-def ICmpPredicateSLT : I64EnumAttrCase<"slt", 2>;
-def ICmpPredicateSLE : I64EnumAttrCase<"sle", 3>;
-def ICmpPredicateSGT : I64EnumAttrCase<"sgt", 4>;
-def ICmpPredicateSGE : I64EnumAttrCase<"sge", 5>;
-def ICmpPredicateULT : I64EnumAttrCase<"ult", 6>;
-def ICmpPredicateULE : I64EnumAttrCase<"ule", 7>;
-def ICmpPredicateUGT : I64EnumAttrCase<"ugt", 8>;
-def ICmpPredicateUGE : I64EnumAttrCase<"uge", 9>;
-def ICmpPredicate : I64EnumAttr<
-    "ICmpPredicate",
-    "llvm.icmp comparison predicate",
-    [ICmpPredicateEQ, ICmpPredicateNE, ICmpPredicateSLT, ICmpPredicateSLE,
-     ICmpPredicateSGT, ICmpPredicateSGE, ICmpPredicateULT, ICmpPredicateULE,
-     ICmpPredicateUGT, ICmpPredicateUGE]> {
-  let cppNamespace = "::mlir::LLVM";
-}
-
-// Other integer operations.
-def LLVM_ICmpOp : LLVM_OneResultOp<"icmp", [NoSideEffect]>,
-                  Arguments<(ins ICmpPredicate:$predicate, LLVM_Type:$lhs,
-                             LLVM_Type:$rhs)> {
-  let llvmBuilder = [{
-    $res = builder.CreateICmp(getLLVMCmpPredicate($predicate), $lhs, $rhs);
-  }];
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, ICmpPredicate predicate, Value *lhs, "
-    "Value *rhs", [{
-      LLVMDialect *dialect = &lhs->getType().cast<LLVMType>().getDialect();
-      build(b, result, LLVMType::getInt1Ty(dialect),
-            b->getI64IntegerAttr(static_cast<int64_t>(predicate)), lhs, rhs);
-    }]>];
-  let parser = [{ return parseCmpOp<ICmpPredicate>(parser, result); }];
-  let printer = [{ printICmpOp(p, *this); }];
-}
-
-// Predicate for float comparisons
-def FCmpPredicateFALSE  : I64EnumAttrCase<"_false", 0>;
-def FCmpPredicateOEQ    : I64EnumAttrCase<"oeq", 1>;
-def FCmpPredicateOGT    : I64EnumAttrCase<"ogt", 2>;
-def FCmpPredicateOGE    : I64EnumAttrCase<"oge", 3>;
-def FCmpPredicateOLT    : I64EnumAttrCase<"olt", 4>;
-def FCmpPredicateOLE    : I64EnumAttrCase<"ole", 5>;
-def FCmpPredicateONE    : I64EnumAttrCase<"one", 6>;
-def FCmpPredicateORD    : I64EnumAttrCase<"ord", 7>;
-def FCmpPredicateUEQ    : I64EnumAttrCase<"ueq", 8>;
-def FCmpPredicateUGT    : I64EnumAttrCase<"ugt", 9>;
-def FCmpPredicateUGE    : I64EnumAttrCase<"uge", 10>;
-def FCmpPredicateULT    : I64EnumAttrCase<"ult", 11>;
-def FCmpPredicateULE    : I64EnumAttrCase<"ule", 12>;
-def FCmpPredicateUNE    : I64EnumAttrCase<"une", 13>;
-def FCmpPredicateUNO    : I64EnumAttrCase<"uno", 14>;
-def FCmpPredicateTRUE   : I64EnumAttrCase<"_true", 15>;
-
-def FCmpPredicate : I64EnumAttr<
-    "FCmpPredicate",
-    "llvm.fcmp comparison predicate",
-    [FCmpPredicateFALSE, FCmpPredicateOEQ, FCmpPredicateOGT, FCmpPredicateOGE,
-     FCmpPredicateOLT, FCmpPredicateOLE, FCmpPredicateONE, FCmpPredicateORD,
-     FCmpPredicateUEQ, FCmpPredicateUGT, FCmpPredicateUGE, FCmpPredicateULT,
-     FCmpPredicateULE, FCmpPredicateUNE, FCmpPredicateUNO, FCmpPredicateTRUE
-    ]> {
-  let cppNamespace = "::mlir::LLVM";
-}
-
-// Other integer operations.
-def LLVM_FCmpOp : LLVM_OneResultOp<"fcmp", [NoSideEffect]>,
-                  Arguments<(ins FCmpPredicate:$predicate, LLVM_Type:$lhs,
-                             LLVM_Type:$rhs)> {
-  let llvmBuilder = [{
-    $res = builder.CreateFCmp(getLLVMCmpPredicate($predicate), $lhs, $rhs);
-  }];
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, FCmpPredicate predicate, Value *lhs, "
-    "Value *rhs", [{
-      LLVMDialect *dialect = &lhs->getType().cast<LLVMType>().getDialect();
-      build(b, result, LLVMType::getInt1Ty(dialect),
-            b->getI64IntegerAttr(static_cast<int64_t>(predicate)), lhs, rhs);
-    }]>];
-  let parser = [{ return parseCmpOp<FCmpPredicate>(parser, result); }];
-  let printer = [{ printFCmpOp(p, *this); }];
-}
-
-// Floating point binary operations.
-def LLVM_FAddOp : LLVM_ArithmeticOp<"fadd", "CreateFAdd">;
-def LLVM_FSubOp : LLVM_ArithmeticOp<"fsub", "CreateFSub">;
-def LLVM_FMulOp : LLVM_ArithmeticOp<"fmul", "CreateFMul">;
-def LLVM_FDivOp : LLVM_ArithmeticOp<"fdiv", "CreateFDiv">;
-def LLVM_FRemOp : LLVM_ArithmeticOp<"frem", "CreateFRem">;
-def LLVM_FNegOp : LLVM_UnaryArithmeticOp<"fneg", "CreateFNeg">;
-
-// Memory-related operations.
-def LLVM_AllocaOp :
-    LLVM_OneResultOp<"alloca">,
-    Arguments<(ins LLVM_Type:$arraySize, OptionalAttr<I64Attr>:$alignment)> {
-  string llvmBuilder = [{
-    auto *alloca = builder.CreateAlloca(
-      $_resultType->getPointerElementType(), $arraySize);
-    if ($alignment.hasValue()) {
-      auto align = $alignment.getValue().getZExtValue();
-      if (align != 0)
-        alloca->setAlignment(llvm::MaybeAlign(align));
-    }
-    $res = alloca;
-  }];
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Type resultType, Value *arraySize, "
-    "unsigned alignment",
-    [{
-      if (alignment == 0)
-        return build(b, result, resultType, arraySize, IntegerAttr());
-      build(b, result, resultType, arraySize, b->getI64IntegerAttr(alignment));
-  }]>];
-  let parser = [{ return parseAllocaOp(parser, result); }];
-  let printer = [{ printAllocaOp(p, *this); }];
-  let verifier = [{
-    if (alignment().hasValue()) {
-      auto align = alignment().getValue().getSExtValue();
-      if (align < 0)
-        return emitOpError("expected positive alignment");
-    }
-    return success();
-  }];
-}
-def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>,
-                 Arguments<(ins LLVM_Type:$base, Variadic<LLVM_Type>:$indices)>,
-                 LLVM_Builder<"$res = builder.CreateGEP($base, $indices);"> {
-  let parser = [{ return parseGEPOp(parser, result); }];
-  let printer = [{ printGEPOp(p, *this); }];
-}
-def LLVM_LoadOp : LLVM_OneResultOp<"load">, Arguments<(ins LLVM_Type:$addr)>,
-                  LLVM_Builder<"$res = builder.CreateLoad($addr);"> {
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Value *addr",
-    [{
-      auto type = addr->getType().cast<LLVM::LLVMType>().getPointerElementTy();
-      build(b, result, type, addr);
-    }]>];
-  let parser = [{ return parseLoadOp(parser, result); }];
-  let printer = [{ printLoadOp(p, *this); }];
-}
-def LLVM_StoreOp : LLVM_ZeroResultOp<"store">,
-                   Arguments<(ins LLVM_Type:$value, LLVM_Type:$addr)>,
-                   LLVM_Builder<"builder.CreateStore($value, $addr);"> {
-  let parser = [{ return parseStoreOp(parser, result); }];
-  let printer = [{ printStoreOp(p, *this); }];
-}
-
-// Casts.
-class LLVM_CastOp<string mnemonic, string builderFunc,
-                  list<OpTrait> traits = []> :
-    LLVM_OneResultOp<mnemonic,
-           !listconcat([NoSideEffect], traits)>,
-    Arguments<(ins LLVM_Type:$arg)>,
-    LLVM_Builder<"$res = builder." # builderFunc # "($arg, $_resultType);"> {
-  let parser = [{ return mlir::impl::parseCastOp(parser, result); }];
-  let printer = [{ mlir::impl::printCastOp(this->getOperation(), p); }];
-}
-def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "CreateBitCast">;
-def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "CreateAddrSpaceCast">;
-def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "CreateIntToPtr">;
-def LLVM_PtrToIntOp : LLVM_CastOp<"ptrtoint", "CreatePtrToInt">;
-def LLVM_SExtOp : LLVM_CastOp<"sext", "CreateSExt">;
-def LLVM_ZExtOp : LLVM_CastOp<"zext", "CreateZExt">;
-def LLVM_TruncOp : LLVM_CastOp<"trunc", "CreateTrunc">;
-def LLVM_SIToFPOp : LLVM_CastOp<"sitofp", "CreateSIToFP">;
-def LLVM_UIToFPOp : LLVM_CastOp<"uitofp", "CreateUIToFP">;
-def LLVM_FPToSIOp : LLVM_CastOp<"fptosi", "CreateFPToSI">;
-def LLVM_FPToUIOp : LLVM_CastOp<"fptoui", "CreateFPToUI">;
-def LLVM_FPExtOp : LLVM_CastOp<"fpext", "CreateFPExt">;
-def LLVM_FPTruncOp : LLVM_CastOp<"fptrunc", "CreateFPTrunc">;
-
-// Call-related operations.
-def LLVM_CallOp : LLVM_Op<"call">,
-                  Arguments<(ins OptionalAttr<FlatSymbolRefAttr>:$callee,
-                             Variadic<LLVM_Type>)>,
-                  Results<(outs Variadic<LLVM_Type>)>,
-                  LLVM_TwoBuilders<LLVM_OneResultOpBuilder,
-                                   LLVM_ZeroResultOpBuilder> {
-  let verifier = [{
-    if (getNumResults() > 1)
-      return emitOpError("must have 0 or 1 result");
-    return success();
-  }];
-  let parser = [{ return parseCallOp(parser, result); }];
-  let printer = [{ printCallOp(p, *this); }];
-}
-def LLVM_ExtractElementOp : LLVM_OneResultOp<"extractelement", [NoSideEffect]>,
-                          Arguments<(ins LLVM_Type:$vector,
-                                     LLVM_Type:$position)> {
-  string llvmBuilder = [{
-    $res = builder.CreateExtractElement($vector, $position);
-  }];
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Value *vector, Value *position,"
-    "ArrayRef<NamedAttribute> attrs = {}">];
-  let parser = [{ return parseExtractElementOp(parser, result); }];
-  let printer = [{ printExtractElementOp(p, *this); }];
-}
-def LLVM_ExtractValueOp : LLVM_OneResultOp<"extractvalue", [NoSideEffect]>,
-                          Arguments<(ins LLVM_Type:$container,
-                                     ArrayAttr:$position)> {
-  string llvmBuilder = [{
-    $res = builder.CreateExtractValue($container, extractPosition($position));
-  }];
-  let parser = [{ return parseExtractValueOp(parser, result); }];
-  let printer = [{ printExtractValueOp(p, *this); }];
-}
-def LLVM_InsertElementOp : LLVM_OneResultOp<"insertelement", [NoSideEffect]>,
-                         Arguments<(ins LLVM_Type:$vector, LLVM_Type:$value,
-                                    LLVM_Type:$position)> {
-  string llvmBuilder = [{
-    $res = builder.CreateInsertElement($vector, $value, $position);
-  }];
-  let parser = [{ return parseInsertElementOp(parser, result); }];
-  let printer = [{ printInsertElementOp(p, *this); }];
-}
-def LLVM_InsertValueOp : LLVM_OneResultOp<"insertvalue", [NoSideEffect]>,
-                         Arguments<(ins LLVM_Type:$container, LLVM_Type:$value,
-                                    ArrayAttr:$position)> {
-  string llvmBuilder = [{
-    $res = builder.CreateInsertValue($container, $value,
-                                     extractPosition($position));
-  }];
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Value *container, Value *value, "
-    "ArrayAttr position",
-    [{
-      build(b, result, container->getType(), container, value, position);
-    }]>];
-  let parser = [{ return parseInsertValueOp(parser, result); }];
-  let printer = [{ printInsertValueOp(p, *this); }];
-}
-def LLVM_ShuffleVectorOp
-    : LLVM_OneResultOp<"shufflevector", [NoSideEffect]>,
-      Arguments<(ins LLVM_Type:$v1, LLVM_Type:$v2, I32ArrayAttr:$mask)>,
-      LLVM_Builder<
-      "$res = builder.CreateShuffleVector($v1, $v2, extractPosition($mask));"> {
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Value *v1, Value *v2, "
-    "ArrayAttr mask, ArrayRef<NamedAttribute> attrs = {}">];
-  let verifier = [{
-    auto wrappedVectorType1 = v1()->getType().cast<LLVM::LLVMType>();
-    auto wrappedVectorType2 = v2()->getType().cast<LLVM::LLVMType>();
-    if (!wrappedVectorType2.getUnderlyingType()->isVectorTy())
-      return emitOpError("expected LLVM IR Dialect vector type for operand #2");
-    if (wrappedVectorType1.getVectorElementType() !=
-        wrappedVectorType2.getVectorElementType())
-      return emitOpError("expected matching LLVM IR Dialect element types");
-    return success();
-  }];
-  let parser = [{ return parseShuffleVectorOp(parser, result); }];
-  let printer = [{ printShuffleVectorOp(p, *this); }];
-}
-
-// Misc operations.
-def LLVM_SelectOp
-    : LLVM_OneResultOp<"select", [NoSideEffect]>,
-      Arguments<(ins LLVM_Type:$condition, LLVM_Type:$trueValue,
-                 LLVM_Type:$falseValue)>,
-      LLVM_Builder<
-          "$res = builder.CreateSelect($condition, $trueValue, $falseValue);"> {
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Value *condition, Value *lhs, "
-    "Value *rhs", [{
-      build(b, result, lhs->getType(), condition, lhs, rhs);
-    }]>];
-  let parser = [{ return parseSelectOp(parser, result); }];
-  let printer = [{ printSelectOp(p, *this); }];
-}
-
-// Terminators.
-def LLVM_BrOp : LLVM_TerminatorOp<"br", []> {
-  let parser = [{ return parseBrOp(parser, result); }];
-  let printer = [{ printBrOp(p, *this); }];
-}
-def LLVM_CondBrOp : LLVM_TerminatorOp<"cond_br", []> {
-  let verifier = [{
-    if (getNumSuccessors() != 2)
-      return emitOpError("expected exactly two successors");
-    return success();
-  }];
-  let parser = [{ return parseCondBrOp(parser, result); }];
-  let printer = [{ printCondBrOp(p, *this); }];
-}
-def LLVM_ReturnOp : LLVM_TerminatorOp<"return", []> {
-  string llvmBuilder = [{
-    if ($_numOperands != 0)
-      builder.CreateRet($args[0]);
-    else
-      builder.CreateRetVoid();
-  }];
-
-  let verifier = [{
-    if (getNumOperands() > 1)
-      return emitOpError("expects at most 1 operand");
-    return success();
-  }];
-
-  let parser = [{ return parseReturnOp(parser, result); }];
-  let printer = [{ printReturnOp(p, *this); }];
-}
-def LLVM_UnreachableOp : LLVM_TerminatorOp<"unreachable", []> {
-  string llvmBuilder = [{ builder.CreateUnreachable(); }];
-  let parser = [{ return success(); }];
-  let printer = [{ p << getOperationName(); }];
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Auxiliary operations (do not appear in LLVM IR but necessary for the dialect
-// to work correctly).
-////////////////////////////////////////////////////////////////////////////////
-
-// Linkage attribute is used on functions and globals. The order follows that of
-// https://llvm.org/docs/LangRef.html#linkage-types. The names are equivalent to
-// visible names in the IR rather than to enum values names in llvm::GlobalValue
-// since the latter is easier to change.
-def LinkagePrivate             : I64EnumAttrCase<"Private", 0>;
-def LinkageInternal            : I64EnumAttrCase<"Internal", 1>;
-def LinkageAvailableExternally : I64EnumAttrCase<"AvailableExternally", 2>;
-def LinkageLinkonce            : I64EnumAttrCase<"Linkonce", 3>;
-def LinkageWeak                : I64EnumAttrCase<"Weak", 4>;
-def LinkageCommon              : I64EnumAttrCase<"Common", 5>;
-def LinkageAppending           : I64EnumAttrCase<"Appending", 6>;
-def LinkageExternWeak          : I64EnumAttrCase<"ExternWeak", 7>;
-def LinkageLinkonceODR         : I64EnumAttrCase<"LinkonceODR", 8>;
-def LinkageWeakODR             : I64EnumAttrCase<"WeakODR", 9>;
-def LinkageExternal            : I64EnumAttrCase<"External", 10>;
-def Linkage : I64EnumAttr<
-    "Linkage",
-    "LLVM linkage types",
-    [LinkagePrivate, LinkageInternal, LinkageAvailableExternally,
-     LinkageLinkonce, LinkageWeak, LinkageCommon, LinkageAppending,
-     LinkageExternWeak, LinkageLinkonceODR, LinkageWeakODR, LinkageExternal]> {
-  let cppNamespace = "::mlir::LLVM";
-}
-
-
-def LLVM_AddressOfOp
-    : LLVM_OneResultOp<"mlir.addressof">,
-      Arguments<(ins FlatSymbolRefAttr:$global_name)> {
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, LLVMType resType, "
-              "StringRef name, ArrayRef<NamedAttribute> attrs = {}", [{
-      result.addAttribute("global_name", builder->getSymbolRefAttr(name));
-      result.addAttributes(attrs);
-      result.addTypes(resType);}]>,
-
-    OpBuilder<"Builder *builder, OperationState &result, GlobalOp global, "
-              "ArrayRef<NamedAttribute> attrs = {}", [{
-      build(builder, result,
-            global.getType().getPointerTo(global.addr_space().getZExtValue()),
-            global.sym_name(), attrs);}]>
-  ];
-
-  let extraClassDeclaration = [{
-    /// Return the llvm.mlir.global operation that defined the value referenced
-    /// here.
-    GlobalOp getGlobal();
-  }];
-
-  let printer = "printAddressOfOp(p, *this);";
-  let parser = "return parseAddressOfOp(parser, result);";
-  let verifier = "return ::verify(*this);";
-}
-
-def LLVM_GlobalOp
-    : LLVM_ZeroResultOp<"mlir.global",
-                        [IsolatedFromAbove,
-                         SingleBlockImplicitTerminator<"ReturnOp">, Symbol]>,
-      Arguments<(ins TypeAttr:$type, UnitAttr:$constant, StrAttr:$sym_name,
-                 Linkage:$linkage,
-                 OptionalAttr<AnyAttr>:$value,
-                 DefaultValuedAttr<NonNegativeI32Attr, "0">:$addr_space)> {
-  let summary = "LLVM dialect global.";
-  let description = [{
-    Can contain an optional initializer region or attribute for simple
-    initializers.
-
-    Examples:
-      // Initialized using an attribute.
-      llvm.mlir.global @a("abc") : !llvm<"[3 x i8]">
-      // Initialized using a region.
-      llvm.mlir.global constant @b() : !llvm<"i32*"> {
-        %0 = llvm.constant(0 : i32) : !llvm.i32
-        %1 = llvm.inttoptr %0 : !llvm.i32 to !llvm<"i32*">
-        llvm.return %1 : !llvm<"i32*">
-      }
-  }];
-  let regions = (region AnyRegion:$initializer);
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, LLVMType type, "
-              "bool isConstant, Linkage linkage, StringRef name, "
-              "Attribute value, unsigned addrSpace = 0, "
-              "ArrayRef<NamedAttribute> attrs = {}">
-  ];
-
-  let extraClassDeclaration = [{
-    /// Return the LLVM type of the global.
-    LLVMType getType() {
-      return type().cast<LLVMType>();
-    }
-    /// Return the initializer attribute if it exists, or a null attribute.
-    Attribute getValueOrNull() {
-      return value().getValueOr(Attribute());
-    }
-    /// Return the initializer region. This may be empty, but if it is not it
-    /// terminates in an `llvm.return` op with the initializer value.
-    Region &getInitializerRegion() {
-      return getOperation()->getRegion(0);
-    }
-    /// Return the initializer block. If the initializer region is empty this
-    /// is nullptr. If it is not nullptr, it terminates with an `llvm.return`
-    /// op with the initializer value.
-    Block *getInitializerBlock() {
-      return getInitializerRegion().empty() ?
-        nullptr : &getInitializerRegion().front();
-    }
-  }];
-
-  let printer = "printGlobalOp(p, *this);";
-  let parser = "return parseGlobalOp(parser, result);";
-  let verifier = "return ::verify(*this);";
-}
-
-def LLVM_LLVMFuncOp
-    : LLVM_ZeroResultOp<"func", [IsolatedFromAbove, FunctionLike, Symbol]>,
-      Arguments<(ins DefaultValuedAttr<Linkage,
-                                       "Linkage::External">:$linkage)> {
-  let summary = "LLVM dialect function, has wrapped LLVM IR function type";
-
-  let regions = (region AnyRegion:$body);
-
-  let skipDefaultBuilders = 1;
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, StringRef name, "
-              "LLVMType type, LLVM::Linkage linkage = LLVM::Linkage::External, "
-              "ArrayRef<NamedAttribute> attrs = {}, "
-              "ArrayRef<NamedAttributeList> argAttrs = {}">
-  ];
-
-  let extraClassDeclaration = [{
-    // Add an entry block to an empty function, and set up the block arguments
-    // to match the signature of the function.
-    Block *addEntryBlock();
-
-    LLVMType getType() {
-      return getAttrOfType<TypeAttr>(getTypeAttrName())
-          .getValue().cast<LLVMType>();
-    }
-    bool isVarArg() {
-      return getType().getUnderlyingType()->isFunctionVarArg();
-    }
-
-    // Hook for OpTrait::FunctionLike, returns the number of function arguments.
-    // Depends on the type attribute being correct as checked by verifyType.
-    unsigned getNumFuncArguments();
-
-    // Hook for OpTrait::FunctionLike, returns the number of function results.
-    // Depends on the type attribute being correct as checked by verifyType.
-    unsigned getNumFuncResults();
-
-    // Hook for OpTrait::FunctionLike, called after verifying that the 'type'
-    // attribute is present.  This can check for preconditions of the
-    // getNumArguments hook not failing.
-    LogicalResult verifyType();
-  }];
-
-  let verifier = [{ return ::verify(*this); }];
-  let printer = [{ printLLVMFuncOp(p, *this); }];
-  let parser = [{ return parseLLVMFuncOp(parser, result); }];
-}
-
-def LLVM_NullOp
-    : LLVM_OneResultOp<"mlir.null", [NoSideEffect]>,
-      LLVM_Builder<"$res = llvm::ConstantPointerNull::get("
-                   "    llvm::cast<llvm::PointerType>($_resultType));"> {
-  let parser = [{ return parseNullOp(parser, result); }];
-  let printer = [{ printNullOp(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-def LLVM_UndefOp : LLVM_OneResultOp<"mlir.undef", [NoSideEffect]>,
-                   LLVM_Builder<"$res = llvm::UndefValue::get($_resultType);"> {
-  let parser = [{ return parseUndefOp(parser, result); }];
-  let printer = [{ printUndefOp(p, *this); }];
-}
-def LLVM_ConstantOp
-    : LLVM_OneResultOp<"mlir.constant", [NoSideEffect]>,
-      Arguments<(ins AnyAttr:$value)>,
-      LLVM_Builder<"$res = getLLVMConstant($_resultType, $value, $_location);">
-{
-  let parser = [{ return parseConstantOp(parser, result); }];
-  let printer = [{ printConstantOp(p, *this); }];
-}
-
-// Operations that correspond to LLVM intrinsics. With MLIR operation set being
-// extendable, there is no reason to introduce a hard boundary between "core"
-// operations and intrinsics. However, we systematically prefix them with
-// "intr." to avoid potential name clashes.
-
-def LLVM_FMulAddOp : LLVM_Op<"intr.fmuladd", [NoSideEffect]>,
-                   Arguments<(ins LLVM_Type:$a, LLVM_Type:$b, LLVM_Type:$c)>,
-                   Results<(outs LLVM_Type:$res)> {
-  let llvmBuilder = [{
-    llvm::Module *module = builder.GetInsertBlock()->getModule();
-    llvm::Function *fn = llvm::Intrinsic::getDeclaration(
-        module, llvm::Intrinsic::fmuladd,
-        {$a->getType(), $b->getType(), $c->getType()});
-    $res = builder.CreateCall(fn, {$a, $b, $c});
-  }];
-}
-
-def LLVM_ExpOp : LLVM_Op<"intr.exp", [NoSideEffect]>,
-                   Arguments<(ins LLVM_Type:$in)>,
-                   Results<(outs LLVM_Type:$res)> {
-  let llvmBuilder = [{
-    llvm::Module *module = builder.GetInsertBlock()->getModule();
-    llvm::Function *fn = llvm::Intrinsic::getDeclaration(
-        module, llvm::Intrinsic::exp, {$in->getType()});
-    $res = builder.CreateCall(fn, {$in});
-  }];
-}
-
-def LLVM_LogOp : LLVM_Op<"intr.log", [NoSideEffect]>,
-                   Arguments<(ins LLVM_Type:$in)>,
-                   Results<(outs LLVM_Type:$res)> {
-  let llvmBuilder = [{
-    llvm::Module *module = builder.GetInsertBlock()->getModule();
-    llvm::Function *fn = llvm::Intrinsic::getDeclaration(
-        module, llvm::Intrinsic::log, {$in->getType()});
-    $res = builder.CreateCall(fn, {$in});
-  }];
-}
-
-def LLVM_Log10Op : LLVM_Op<"intr.log10", [NoSideEffect]>,
-                   Arguments<(ins LLVM_Type:$in)>,
-                   Results<(outs LLVM_Type:$res)> {
-  let llvmBuilder = [{
-    llvm::Module *module = builder.GetInsertBlock()->getModule();
-    llvm::Function *fn = llvm::Intrinsic::getDeclaration(
-        module, llvm::Intrinsic::log10, {$in->getType()});
-    $res = builder.CreateCall(fn, {$in});
-  }];
-}
-
-def LLVM_Log2Op : LLVM_Op<"intr.log2", [NoSideEffect]>,
-                   Arguments<(ins LLVM_Type:$in)>,
-                   Results<(outs LLVM_Type:$res)> {
-  let llvmBuilder = [{
-    llvm::Module *module = builder.GetInsertBlock()->getModule();
-    llvm::Function *fn = llvm::Intrinsic::getDeclaration(
-        module, llvm::Intrinsic::log2, {$in->getType()});
-    $res = builder.CreateCall(fn, {$in});
-  }];
-}
-
-#endif // LLVMIR_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/third_party/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
deleted file mode 100644
index 0328cf4ba94..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- NVVMDialect.h - MLIR NVVM IR dialect ---------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the NVVM IR dialect in MLIR, containing NVVM operations and
-// NVVM specific extensions to the LLVM type system.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_
-#define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-namespace mlir {
-namespace NVVM {
-
-///// Ops /////
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LLVMIR/NVVMOps.h.inc"
-
-class NVVMDialect : public Dialect {
-public:
-  explicit NVVMDialect(MLIRContext *context);
-
-  static StringRef getDialectNamespace() { return "nvvm"; }
-};
-
-} // namespace NVVM
-} // namespace mlir
-
-#endif /* MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_ */
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/third_party/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
deleted file mode 100644
index bc6887da8e4..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ /dev/null
@@ -1,146 +0,0 @@
-//===-- NVVMOps.td - NVVM IR dialect op definition file ----*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the NVVM IR operation definition file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NVVMIR_OPS
-#define NVVMIR_OPS
-
-include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
-
-//===----------------------------------------------------------------------===//
-// NVVM dialect definitions
-//===----------------------------------------------------------------------===//
-
-def NVVM_Dialect : Dialect {
-  let name = "nvvm";
-  let cppNamespace = "NVVM";
-}
-
-//===----------------------------------------------------------------------===//
-// NVVM op definitions
-//===----------------------------------------------------------------------===//
-
-class NVVM_Op<string mnemonic, list<OpTrait> traits = []> :
-  LLVM_OpBase<NVVM_Dialect, mnemonic, traits> {
-}
-
-//===----------------------------------------------------------------------===//
-// NVVM special register op definitions
-//===----------------------------------------------------------------------===//
-
-class NVVM_SpecialRegisterOp<string mnemonic,
-    list<OpTrait> traits = []> :
-  NVVM_Op<mnemonic, !listconcat(traits, [NoSideEffect])>,
-  Results<(outs LLVM_Type:$res)>, Arguments<(ins)> {
-  string llvmBuilder = "$res = createIntrinsicCall(builder,"
-    # "llvm::Intrinsic::nvvm_" # !subst(".","_", mnemonic) # ");";
-  let parser = [{ return parseNVVMSpecialRegisterOp(parser, result); }];
-  let printer = [{ printNVVMIntrinsicOp(p, this->getOperation()); }];
-}
-
-//===----------------------------------------------------------------------===//
-// Lane index and range
-def NVVM_LaneIdOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.laneid">;
-def NVVM_WarpSizeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.warpsize">;
-
-//===----------------------------------------------------------------------===//
-// Thread index and range
-def NVVM_ThreadIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.x">;
-def NVVM_ThreadIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.y">;
-def NVVM_ThreadIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.z">;
-def NVVM_BlockDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.x">;
-def NVVM_BlockDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.y">;
-def NVVM_BlockDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.z">;
-
-//===----------------------------------------------------------------------===//
-// Block index and range
-def NVVM_BlockIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.x">;
-def NVVM_BlockIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.y">;
-def NVVM_BlockIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.z">;
-def NVVM_GridDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.x">;
-def NVVM_GridDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.y">;
-def NVVM_GridDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.z">;
-
-//===----------------------------------------------------------------------===//
-// NVVM synchronization op definitions
-//===----------------------------------------------------------------------===//
-
-def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
-  string llvmBuilder = [{
-      createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0);
-  }];
-  let parser = [{ return success(); }];
-  let printer = [{ printNVVMIntrinsicOp(p, this->getOperation()); }];
-}
-
-def NVVM_ShflBflyOp :
-  NVVM_Op<"shfl.sync.bfly">,
-  Results<(outs LLVM_Type:$res)>,
-  Arguments<(ins LLVM_Type:$dst,
-                 LLVM_Type:$val,
-                 LLVM_Type:$offset,
-                 LLVM_Type:$mask_and_clamp,
-                 OptionalAttr<UnitAttr>:$return_value_and_is_valid)> {
-  string llvmBuilder = [{
-      auto intId = getShflBflyIntrinsicId(
-          $_resultType, static_cast<bool>($return_value_and_is_valid));
-      $res = createIntrinsicCall(builder,
-          intId, {$dst, $val, $offset, $mask_and_clamp});
-  }];
-  let parser = [{ return parseNVVMShflSyncBflyOp(parser, result); }];
-  let printer = [{ printNVVMIntrinsicOp(p, this->getOperation()); }];
-  let verifier = [{
-    if (!getAttrOfType<UnitAttr>("return_value_and_is_valid"))
-      return success();
-    auto type = getType().cast<LLVM::LLVMType>();
-    if (!type.isStructTy() || type.getStructNumElements() != 2 ||
-        !type.getStructElementType(1).isIntegerTy(
-            /*Bitwidth=*/1))
-      return emitError("expected return type !llvm<\"{ ?, i1 }\">");
-    return success();
-  }];
-}
-
-def NVVM_VoteBallotOp :
-  NVVM_Op<"vote.ballot.sync">,
-  Results<(outs LLVM_Type:$res)>,
-  Arguments<(ins LLVM_Type:$mask, LLVM_Type:$pred)> {
-  string llvmBuilder = [{
-      $res = createIntrinsicCall(builder,
-            llvm::Intrinsic::nvvm_vote_ballot_sync, {$mask, $pred});
-  }];
-  let parser = [{ return parseNVVMVoteBallotOp(parser, result); }];
-  let printer = [{ printNVVMIntrinsicOp(p, this->getOperation()); }];
-}
-
-def NVVM_MmaOp :
-  NVVM_Op<"mma.sync">,
-  Results<(outs LLVM_Type:$res)>,
-  Arguments<(ins Variadic<LLVM_Type>:$args)> {
-  string llvmBuilder = [{
-    $res = createIntrinsicCall(
-        builder, llvm::Intrinsic::nvvm_mma_m8n8k4_row_row_f32_f32, $args);
-  }];
-  let parser = [{ return parseNVVMMmaOp(parser, result); }];
-  let printer = [{ printNVVMMmaOp(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-#endif // NVVMIR_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/third_party/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h
deleted file mode 100644
index a34c11223f3..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- ROCDLDialect.h - MLIR ROCDL IR dialect -------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the ROCDL dialect in MLIR, containing ROCDL operations
-// and ROCDL specific extensions to the LLVM type system.
-//
-// Unfortunately there does not exists a formal definition of ROCDL IR that be
-// pointed to here. However the following links contain more information about
-// ROCDL (ROCm-Device-Library)
-//
-// https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/master/doc/OCML.md
-// https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/master/doc/OCKL.md
-// https://llvm.org/docs/AMDGPUUsage.html
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_
-#define MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-
-namespace mlir {
-namespace ROCDL {
-
-///// Ops /////
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LLVMIR/ROCDLOps.h.inc"
-
-class ROCDLDialect : public Dialect {
-public:
-  explicit ROCDLDialect(MLIRContext *context);
-
-  static StringRef getDialectNamespace() { return "rocdl"; }
-};
-
-} // namespace ROCDL
-} // namespace mlir
-
-#endif /* MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ */
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/third_party/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
deleted file mode 100644
index 79d4136d6f5..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ /dev/null
@@ -1,101 +0,0 @@
-//===-- ROCDLOps.td - ROCDL IR dialect op definition file --*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the ROCDL IR operation definition file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ROCDLIR_OPS
-#define ROCDLIR_OPS
-
-include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
-
-//===----------------------------------------------------------------------===//
-// ROCDL dialect definitions
-//===----------------------------------------------------------------------===//
-
-def ROCDL_Dialect : Dialect {
-  let name = "rocdl";
-  let cppNamespace = "ROCDL";
-}
-
-//===----------------------------------------------------------------------===//
-// ROCDL op definitions
-//===----------------------------------------------------------------------===//
-
-class ROCDL_Op<string mnemonic, list<OpTrait> traits = []> :
-  LLVM_OpBase<ROCDL_Dialect, mnemonic, traits> {
-}
-
-//===----------------------------------------------------------------------===//
-// ROCDL special register op definitions
-//===----------------------------------------------------------------------===//
-
-class ROCDL_SpecialRegisterOp<string mnemonic,
-    list<OpTrait> traits = []> :
-  ROCDL_Op<mnemonic, !listconcat(traits, [NoSideEffect])>,
-  Results<(outs LLVM_Type:$res)>, Arguments<(ins)> {
-  string llvmBuilder = "$res = createIntrinsicCall(builder,"
-    # "llvm::Intrinsic::amdgcn_" # !subst(".","_", mnemonic) # ");";
-  let parser = [{ return parseROCDLOp(parser, result); }];
-  let printer = [{ printROCDLOp(p, this->getOperation()); }];
-}
-
-class ROCDL_DeviceFunctionOp<string mnemonic, string device_function,
-                             int parameter, list<OpTrait> traits = []> :
-  ROCDL_Op<mnemonic, !listconcat(traits, [NoSideEffect])>,
-  Results<(outs LLVM_Type:$res)>, Arguments<(ins)> {
-  string llvmBuilder = "$res = createDeviceFunctionCall(builder, \""
-  # device_function # "\", " # parameter # ");";
-  let parser = [{ return parseROCDLOp(parser, result); }];
-  let printer = [{ printROCDLOp(p, this->getOperation()); }];
-}
-
-//===----------------------------------------------------------------------===//
-// Thread index and Block index
-
-def ROCDL_ThreadIdXOp : ROCDL_SpecialRegisterOp<"workitem.id.x">;
-def ROCDL_ThreadIdYOp : ROCDL_SpecialRegisterOp<"workitem.id.y">;
-def ROCDL_ThreadIdZOp : ROCDL_SpecialRegisterOp<"workitem.id.z">;
-
-def ROCDL_BlockIdXOp : ROCDL_SpecialRegisterOp<"workgroup.id.x">;
-def ROCDL_BlockIdYOp : ROCDL_SpecialRegisterOp<"workgroup.id.y">;
-def ROCDL_BlockIdZOp : ROCDL_SpecialRegisterOp<"workgroup.id.z">;
-
-//===----------------------------------------------------------------------===//
-// Thread range and Block range
-
-def ROCDL_BlockDimXOp : ROCDL_DeviceFunctionOp<"workgroup.dim.x",
-                                               "__ockl_get_local_size", 0>;
-
-def ROCDL_BlockDimYOp : ROCDL_DeviceFunctionOp<"workgroup.dim.y",
-                                               "__ockl_get_local_size", 1>;
-
-def ROCDL_BlockDimZOp : ROCDL_DeviceFunctionOp<"workgroup.dim.z",
-                                               "__ockl_get_local_size", 2>;
-
-def ROCDL_GridDimXOp : ROCDL_DeviceFunctionOp<"grid.dim.x",
-                                               "__ockl_get_global_size", 0>;
-
-def ROCDL_GridDimYOp : ROCDL_DeviceFunctionOp<"grid.dim.y",
-                                               "__ockl_get_global_size", 1>;
-
-def ROCDL_GridDimZOp : ROCDL_DeviceFunctionOp<"grid.dim.z",
-                                               "__ockl_get_global_size", 2>;
-
-
-#endif // ROCDLIR_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h b/third_party/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
deleted file mode 100644
index 354c94cdd68..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- DependenceAnalysis.h - Dependence analysis on SSA views --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_LINALG_ANALYSIS_DEPENDENCEANALYSIS_H_
-#define MLIR_DIALECT_LINALG_ANALYSIS_DEPENDENCEANALYSIS_H_
-
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/OpDefinition.h"
-
-namespace mlir {
-class FuncOp;
-
-namespace linalg {
-
-class LinalgOp;
-
-/// A very primitive alias analysis which just records for each view, either:
-///   1. The base buffer, or
-///   2. The block argument view
-/// that it indexes into.
-/// This does not perform inter-block or inter-procedural analysis and assumes
-/// that different block argument views do not alias.
-class Aliases {
-public:
-  /// Returns true if v1 and v2 alias.
-  bool alias(Value *v1, Value *v2) { return find(v1) == find(v2); }
-
-private:
-  /// Returns the base buffer or block argument into which the view `v` aliases.
-  /// This lazily records the new aliases discovered while walking back the
-  /// use-def chain.
-  Value *find(Value *v);
-
-  DenseMap<Value *, Value *> aliases;
-};
-
-/// Data structure for holding a dependence graph that operates on LinalgOp and
-/// views as SSA values.
-class LinalgDependenceGraph {
-public:
-  struct LinalgOpView {
-    Operation *op;
-    Value *view;
-  };
-  struct LinalgDependenceGraphElem {
-    // dependentOpView may be either:
-    //   1. src in the case of dependencesIntoGraphs.
-    //   2. dst in the case of dependencesFromDstGraphs.
-    LinalgOpView dependentOpView;
-    // View in the op that is used to index in the graph:
-    //   1. src in the case of dependencesFromDstGraphs.
-    //   2. dst in the case of dependencesIntoGraphs.
-    Value *indexingView;
-  };
-  using LinalgDependences = llvm::SmallVector<LinalgDependenceGraphElem, 8>;
-  using DependenceGraph = DenseMap<Operation *, LinalgDependences>;
-  using dependence_iterator = LinalgDependences::const_iterator;
-  using dependence_range = llvm::iterator_range<dependence_iterator>;
-
-  enum DependenceType { RAR = 0, RAW, WAR, WAW, NumTypes };
-
-  // Builds a linalg dependence graph for the ops of type LinalgOp under `f`.
-  static LinalgDependenceGraph buildDependenceGraph(Aliases &aliases, FuncOp f);
-  LinalgDependenceGraph(Aliases &aliases, ArrayRef<Operation *> ops);
-
-  /// Returns the X such that op -> X is a dependence of type dt.
-  dependence_range getDependencesFrom(Operation *src, DependenceType dt) const;
-  dependence_range getDependencesFrom(LinalgOp src, DependenceType dt) const;
-
-  /// Returns the X such that X -> op is a dependence of type dt.
-  dependence_range getDependencesInto(Operation *dst, DependenceType dt) const;
-  dependence_range getDependencesInto(LinalgOp dst, DependenceType dt) const;
-
-  /// Returns the operations that are interleaved between `srcLinalgOp` and
-  /// `dstLinalgOp` and that are involved in any RAW, WAR or WAW dependence
-  /// relation with `srcLinalgOp`, on any view.
-  /// Any such operation prevents reordering.
-  SmallVector<Operation *, 8>
-  findCoveringDependences(LinalgOp srcLinalgOp, LinalgOp dstLinalgOp) const;
-
-  /// Returns the operations that are interleaved between `srcLinalgOp` and
-  /// `dstLinalgOp` and that are involved in a RAR or RAW with `srcLinalgOp`.
-  /// Dependences are restricted to views aliasing `view`.
-  SmallVector<Operation *, 8> findCoveringReads(LinalgOp srcLinalgOp,
-                                                LinalgOp dstLinalgOp,
-                                                Value *view) const;
-
-  /// Returns the operations that are interleaved between `srcLinalgOp` and
-  /// `dstLinalgOp` and that are involved in a WAR or WAW with `srcLinalgOp`.
-  /// Dependences are restricted to views aliasing `view`.
-  SmallVector<Operation *, 8> findCoveringWrites(LinalgOp srcLinalgOp,
-                                                 LinalgOp dstLinalgOp,
-                                                 Value *view) const;
-
-private:
-  // Keep dependences in both directions, this is not just a performance gain
-  // but it also reduces usage errors.
-  // Dependence information is stored as a map of:
-  //   (source operation -> LinalgDependenceGraphElem)
-  DependenceGraph dependencesFromGraphs[DependenceType::NumTypes];
-  // Reverse dependence information is stored as a map of:
-  //   (destination operation -> LinalgDependenceGraphElem)
-  DependenceGraph dependencesIntoGraphs[DependenceType::NumTypes];
-
-  /// Analyses the aliasing views between `src` and `dst` and inserts the proper
-  /// dependences in the graph.
-  void addDependencesBetween(LinalgOp src, LinalgOp dst);
-
-  // Adds an new dependence unit in the proper graph.
-  // Uses std::pair to keep operations and view together and avoid usage errors
-  // related to src/dst and producer/consumer terminology in the context of
-  // dependences.
-  void addDependenceElem(DependenceType dt, LinalgOpView indexingOpView,
-                         LinalgOpView dependentOpView);
-
-  /// Implementation detail for findCoveringxxx.
-  SmallVector<Operation *, 8>
-  findOperationsWithCoveringDependences(LinalgOp srcLinalgOp,
-                                        LinalgOp dstLinalgOp, Value *view,
-                                        ArrayRef<DependenceType> types) const;
-
-  Aliases &aliases;
-  SmallVector<Operation *, 8> linalgOps;
-  DenseMap<Operation *, unsigned> linalgOpPositions;
-};
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_ANALYSIS_DEPENDENCEANALYSIS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt
deleted file mode 100644
index 9f57627c321..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_subdirectory(IR)
-add_subdirectory(Transforms)
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h b/third_party/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
deleted file mode 100644
index 00da1d68cf2..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
+++ /dev/null
@@ -1,105 +0,0 @@
-//===- Builders.h - MLIR Declarative Linalg Builders ------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides intuitive composable interfaces for building structured MLIR
-// snippets in a declarative fashion.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_LINALG_EDSC_BUILDERS_H_
-#define MLIR_DIALECT_LINALG_EDSC_BUILDERS_H_
-
-#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/Builders.h"
-
-namespace mlir {
-class BlockArgument;
-namespace edsc {
-
-enum class IterType { Parallel, Reduction };
-
-inline StringRef toString(IterType t) {
-  switch (t) {
-  case IterType::Parallel:
-    return getParallelIteratorTypeName();
-  case IterType::Reduction:
-    return getParallelIteratorTypeName();
-  default:
-    llvm_unreachable("Unsupport IterType");
-  }
-}
-
-/// A StructuredIndexed represents a captured value that can be indexed and
-/// passed to the `makeLinalgGenericOp`. It allows writing intuitive index
-/// expressions such as:
-///
-/// ```
-///      StructuredIndexed A(vA), B(vB), C(vC);
-///      makeLinalgGenericOp({A({m, n}), B({k, n})}, {C({m, n})}, ... );
-/// ```
-struct StructuredIndexed {
-  StructuredIndexed(Value *v) : value(v) {}
-  StructuredIndexed operator()(ArrayRef<AffineExpr> indexings) {
-    return StructuredIndexed(value, indexings);
-  }
-
-  operator Value *() const /* implicit */ { return value; }
-  ArrayRef<AffineExpr> getExprs() { return exprs; }
-
-private:
-  StructuredIndexed(Value *v, ArrayRef<AffineExpr> indexings)
-      : value(v), exprs(indexings.begin(), indexings.end()) {
-    assert(v->getType().isa<MemRefType>() && "MemRefType expected");
-  }
-  StructuredIndexed(ValueHandle v, ArrayRef<AffineExpr> indexings)
-      : StructuredIndexed(v.getValue(), indexings) {}
-
-  Value *value;
-  SmallVector<AffineExpr, 4> exprs;
-};
-
-inline void defaultRegionBuilder(ArrayRef<BlockArgument *> args) {}
-
-Operation *makeLinalgGenericOp(
-    ArrayRef<IterType> iteratorTypes, ArrayRef<StructuredIndexed> inputs,
-    ArrayRef<StructuredIndexed> outputs,
-    decltype(defaultRegionBuilder) regionBuilder = defaultRegionBuilder,
-    ArrayRef<Value *> otherValues = {},
-    ArrayRef<Attribute> otherAttributes = {});
-
-//===----------------------------------------------------------------------===//
-// EDSC builders for linalg generic operations.
-//===----------------------------------------------------------------------===//
-
-/// TODO(ntv): In the future we should tie these implementations to something in
-/// Tablegen that generates the proper interfaces and the proper sugared named
-/// ops.
-
-/// Build a linalg.generic that represents C = A * B in the current
-/// ScopedContext.
-Operation *linalg_matmul(ValueHandle vA, ValueHandle vB, ValueHandle vC);
-
-template <typename Container> Operation *linalg_matmul(Container values) {
-  assert(values.size() == 3 && "Expected exactly 3 values");
-  return linalg_matmul(values[0], values[1], values[2]);
-}
-
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_EDSC_BUILDERS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt
deleted file mode 100644
index 2a883a138a5..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_mlir_dialect(LinalgOps)
-set(LLVM_TARGET_DEFINITIONS LinalgLibraryOps.td)
-mlir_tablegen(LinalgLibraryOps.h.inc -gen-op-decls)
-mlir_tablegen(LinalgLibraryOps.cpp.inc -gen-op-defs)
-mlir_tablegen(LinalgLibraryOpInterfaces.h.inc -gen-op-interface-decls)
-mlir_tablegen(LinalgLibraryOpInterfaces.cpp.inc -gen-op-interface-defs)
-add_public_tablegen_target(MLIRLinalgLibraryOpsIncGen)
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td
deleted file mode 100644
index edc81250aae..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td
+++ /dev/null
@@ -1,122 +0,0 @@
-//===- LinalgBase.td - Linalg dialect base support ---------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the definition file for base linear algebra support.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LINALG_BASE
-#define LINALG_BASE
-
-include "mlir/IR/OpBase.td"
-
-def Linalg_Dialect : Dialect {
-  let name = "linalg";
-  let description = [{
-    The `linalg` dialect groups together a set of types, operations and
-    transformations that are useful to implement a structured abstraction where
-    ops can lower to scalar load/store and operations or to more general library
-    calls.
-
-    The `linalg` dialect manipulates the following types and operations:
-
-    ### Core data types and special ops.
-
-    The following abstractions are used by the `linalg` dialect:
-
-    #### Views
-    The current implementation uses the strided memref abstraction. In the
-    future other abstractions than strided memref will be used.
-
-    #### `!linalg.range`
-    This data type is currently just a triple (`min`,`max`, `step`) that does
-    not pass function boundaries.
-
-    #### `linalg.yield`
-    This op is used as a terminator within the appropriate `linalg` regions.
-
-    In the future, richer `view` and `range` representations are expected, in
-    particular to represent sparse traversals.
-
-    ### Metadata Ops
-    A set of ops that manipulate metadata but do not move memory. These ops take
-    `view` operands + extra attributes and return new `view`s. The returned
-    `view`s generally alias the operand `view`. At the moment the existing ops
-    are:
-
-        * `std.view`,
-        * `std.subview`,
-        * `linalg.range`,
-        * `linalg.slice`,
-        * `linalg.transpose`.
-
-    Future ops are added on a per-need basis but should include:
-
-        * `linalg.reshape`,
-        * `linalg.tile`,
-        * `linalg.intersection`,
-        * `linalg.convex_union`,
-        * `linalg.difference` (would need to work on a list of views).
-
-    ### Payload Ops
-    A set of payload carrying operations that implement the [structured ops](
-    https://docs.google.com/presentation/d/1P-j1GrH6Q5gLBjao0afQ-GfvcAeF-QU4GXXeSy0eJ9I/edit#slide=id.p
-    )
-    abstraction on buffers. `linalg` has `2` generic operations `linalg.generic`
-    and `linalg.indexed_generic` for expressing custom operations. This is
-    subject to further evolution as transformations and analyses continue to be
-    developed.
-
-    Additionally, `linalg` provides some common named operations:
-
-        * `linalg.copy`,
-        * `linalg.fill`,
-        * `linalg.dot`,
-        * `linalg.matmul`,
-        * `linalg.conv`.
-
-    Future ops are added on a per-need basis but should include:
-
-        * `linalg.pad`.
-
-    In an ideal world, all the named ops would be automatically generated from
-    a description in terms of only the `2` generic ops. Unfortunately we do not
-    have such support yet (contributions are most welcome).
-
-    ### Convention for external library interop
-    The `linalg` dialect adopts a convention that is similar to `BLAS` when
-    offloading operations to fast library implementations: pass a non-owning
-    pointer to input and output data with additional metadata. This convention
-    is also found in libraries such as `MKL`, `OpenBLAS`, `BLIS`, `cuBLAS`,
-    `cuDNN`, etc.. and more generally at interface points across language
-    boundaries (e.g. C++ / Python).
-
-    Generally, `linalg` passes non-owning pointers to strided memref data
-    structures to precompiled library calls linked externally. The name `view`
-    is used interchangeably in `linalg` to signify strided memref discussed at
-    length in the [strided memref RFC](
-    https://groups.google.com/a/tensorflow.org/g/mlir/c/MaL8m2nXuio/m/a_v07o9yBwAJ).
-  }];
-}
-
-// Whether a type is a RangeType.
-def LinalgIsRangeTypePred : CPred<"$_self.isa<RangeType>()">;
-def Range : Type<LinalgIsRangeTypePred, "range">;
-
-// TODO(ntv): inject the doc for LinalgLibraryOps.td here.
-
-#endif // LINALG_BASE
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td
deleted file mode 100644
index 4f9621c9912..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td
+++ /dev/null
@@ -1,625 +0,0 @@
-//===- LinalgLibraryOps.td - Linalg dialect library ops -*- tablegen ----*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the operation definition file for linear algebra operations that
-// correspond to underlying library calls (e.g. BLAS).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LINALG_LIBRARY_OPS
-#define LINALG_LIBRARY_OPS
-
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
-include "mlir/Dialect/Linalg/IR/LinalgBase.td"
-
-// The Linalg `NInputs` trait provides the API for ops that are known
-// to have a specified number of inputs, all passed as operands.
-// See Linalg/LinalgTraits.h for implementation details an usage.
-class NInputs<int args_in> :
-  NativeOpTrait<"linalg::NInputs<" # !cast<string>(args_in) # ">::Impl"> {}
-
-// The Linalg `NOutputs` trait provides the API for ops that are known
-// to have a specified number of outputs, all passed as operands.
-// See Linalg/LinalgTraits.h for implementation details an usage.
-class NOutputs<int args_out> :
-  NativeOpTrait<"linalg::NOutputs<" # !cast<string>(args_out) # ">::Impl"> {}
-
-def ViewTraits : NativeOpTrait<"linalg::ViewTraits">;
-
-// The linalg 'LinalgLibraryInterface' provides access to the 'LinalgOp'
-// interface.
-def LinalgLibraryInterface : OpInterface<"LinalgOp"> {
-  let methods = [
-    InterfaceMethod<
-      "Query the number of inputs from the current operation.",
-      "unsigned", "getNumInputs"
-    >,
-    InterfaceMethod<
-      "Query the number of outputs from the current operation.",
-      "unsigned", "getNumOutputs"
-    >,
-    InterfaceMethod<
-      "Query the number of inputs and outputs from the current operation.",
-      "unsigned", "getNumInputsAndOutputs"
-    >,
-    InterfaceMethod<
-      "Query the input operands from the current operation.",
-      "Operation::operand_range", "getInputs"
-    >,
-    InterfaceMethod<
-      "Query the output operands from the current operation.",
-      "Operation::operand_range", "getOutputs"
-    >,
-    InterfaceMethod<
-      "Query the input and output operands from the current operation.",
-      "Operation::operand_range", "getInputsAndOutputs"
-    >,
-    InterfaceMethod<
-      "Query the iterator types attribute within the current operation.",
-      "ArrayAttr", "iterator_types"
-    >,
-    InterfaceMethod<
-      "Query the indexing maps attribute within the current operation.",
-      "ArrayAttr", "indexing_maps"
-    >,
-    InterfaceMethod<
-      "Query the number of parallel loops within the current operation.",
-      "unsigned", "getNumParallelLoops"
-    >,
-    InterfaceMethod<
-      "Query the number of reduction loops within the current operation.",
-      "unsigned", "getNumReductionLoops"
-    >,
-    InterfaceMethod<
-      "Query the number of window loops within the current operation.",
-      "unsigned", "getNumWindowLoops"
-    >,
-    InterfaceMethod<
-      "Query the number of loops within the current operation.",
-      "unsigned", "getNumLoops">,
-    InterfaceMethod<"Query the input view at the given index.",
-      "Value *", "getInput", (ins "unsigned":$i)
-    >,
-    InterfaceMethod<"Query the output view at the given index.",
-      "Value *", "getOutput", (ins "unsigned":$i)
-    >,
-    InterfaceMethod<[{
-        Query the index of the given input value, or `None` if the value is not
-        an input.
-      }],
-      "llvm::Optional<unsigned>", "getIndexOfInput", (ins "Value *":$view)
-    >,
-    InterfaceMethod<[{
-        Query the index of the given view value, or `None` if the value is not
-        an view.
-      }],
-      "llvm::Optional<unsigned>", "getIndexOfOutput", (ins "Value *":$view)
-    >,
-    InterfaceMethod<[{
-        Query the type of the input view at the given index.
-      }], "MemRefType", "getInputViewType", (ins "unsigned":$i)>,
-    InterfaceMethod<[{
-        Query the type of the output view at the given index.
-      }], "MemRefType", "getOutputViewType", (ins "unsigned":$i)>,
-
-    StaticInterfaceMethod<[{
-        Create an operation of the current type with the given location,
-        operands, and attributes.
-      }],
-      "Operation *", "create",
-      (ins "OpBuilder &":$builder, "Location":$loc,
-           "ValueRange":$operands,
-           "ArrayRef<NamedAttribute>":$attributes), [{
-        return builder.create<ConcreteOp>(loc, ArrayRef<Type>{}, operands,
-                                          attributes);
-      }]
-    >,
-
-    /// Clone an operation with the given location and operands. This is used to
-    /// abstract away the optional underlying region creation.
-    InterfaceMethod<[{
-        Clone the current operation with the given location and operands. This
-        is used to abstract away the optional underlying region creation.
-      }],
-      "Operation *", "clone",
-      (ins "OpBuilder &":$b, "Location":$loc, "ValueRange":$operands), [{
-        BlockAndValueMapping map;
-        unsigned numRegions = op.getOperation()->getNumRegions();
-        Operation *res = create(b, loc, operands, op.getAttrs());
-        assert(res->getNumRegions() == numRegions && "inconsistent # regions");
-        for (unsigned ridx = 0; ridx < numRegions; ++ridx)
-          op.getOperation()->getRegion(ridx).cloneInto(
-            &res->getRegion(ridx), map);
-        return res;
-      }]
-    >
-  ];
-}
-
-// Base Tablegen class for Linalg ops.
-// Linalg ops that correspond to library calls operate on linalg::View as their
-// first operands. These may be optionally followed by non-view operands
-// depending on the specific Linalg op.
-class LinalgLibraryBase_Op<string mnemonic, list<OpTrait> props>
-  : Op<Linalg_Dialect, mnemonic,
-       !listconcat(props, [ViewTraits, LinalgLibraryInterface])> {
-  let parser = [{ return parseLinalgLibraryOp(parser, result); }];
-  let printer = [{ printLinalgLibraryOp(p, *this); }];
-}
-
-class LinalgLibrary_Op<string mnemonic, list<OpTrait> props>
-  : LinalgLibraryBase_Op<mnemonic, props> {
-  code libraryCallName = [{
-    std::string getLibraryCallName() {
-      return generateLibraryCallName(getOperation());
-    }
-  }];
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Concrete Linalg ops.
-////////////////////////////////////////////////////////////////////////////////
-def CopyOp : LinalgLibrary_Op<"copy", [NInputs<1>, NOutputs<1>]> {
-  let description = [{
-    Copies the data in the input view into the output view.
-
-    Usage:
-      ```mlir
-      linalg.copy(%arg0, %arg1) : memref<?xf32, stride_specification>,
-                                  memref<?xf32, stride_specification>
-      ```
-
-    One possible lowering to loop form is:
-      ```mlir
-      %0 = linalg.dim %arg0, 0 : index
-      loop.for %i0 = %c0 to %0 step %c1 {
-        %1 = linalg.load %arg0[%i0] : memref<?xf32, stride_specification>
-        linalg.store %1, %arg1[%i0] : memref<?xf32, stride_specification>
-      }
-      ```
-
-    Optionally, can take `input_permutation` and `output_permutation` attributes
-    to reorder the dimensions of the input and output views.
-
-    Usage:
-      ```mlir
-      linalg.copy(%arg0, %arg1) {inputPermutation : (i, j, k) -> (i, k, j),
-                                 outputPermutation : (i, j, k) -> (k, j, i)} :
-        memref<?x?x?xf32, stride_specification>,
-        memref<?x?x?xf32, stride_specification>
-     ```
-
-    One possible lowering to loop form is:
-      ```mlir
-      %0 = linalg.dim %arg0, 0
-      %1 = linalg.dim %arg0, 1
-      %2 = linalg.dim %arg0, 2
-      loop.for %i0 = %c0 to %{{.*}} step %c1 {
-        loop.for %i1 = %c0 to %{{.*}} step %c1 {
-          loop.for %i2 = %c0 to %{{.*}} step %c1 {
-            %3 = linalg.load %arg0[%i0, %i2, %i1] :
-                    memref<?x?x?xf32, stride_specification>
-            linalg.store %3, %arg1[%i2, %i1, %i0] :
-                    memref<?x?x?xf32, stride_specification>
-      ```
-
-    The views are expected to be compatible for correctness but this is not
-    enforced at the moment.
-  }];
-  let arguments = (ins
-    AnyStridedMemRef:$input,
-    AnyStridedMemRef:$output,
-    OptionalAttr<AffineMapAttr>:$inputPermutation,
-    OptionalAttr<AffineMapAttr>:$outputPermutation);
-  // TODO(ntv) this should go away once the usage of OptionalAttr triggers
-  // emission of builders with default arguments left unspecified.
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *input, Value *output", [{
-    return build(
-      builder, result, input, output, AffineMapAttr(), AffineMapAttr());
-  }]>];
-  let extraClassDeclaration = libraryCallName # [{
-    ArrayAttr indexing_maps();
-
-    ArrayAttr iterator_types() {
-      unsigned nPar = input()->getType().cast<ShapedType>().getRank();
-      MLIRContext *ctx = getContext();
-      SmallVector<Attribute, 8> iters(
-        nPar, StringAttr::get(getParallelIteratorTypeName(), ctx));
-      return ArrayAttr::get(iters, ctx);
-    }
-  }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-def FillOp : LinalgLibrary_Op<"fill", [NInputs<0>, NOutputs<1>]> {
-  let arguments = (ins AnyStridedMemRef:$input,
-                   AnyTypeOf<[AnyFloat, AnyInteger, AnyVector]>:$value);
-  let extraClassDeclaration = libraryCallName # [{
-    ArrayAttr indexing_maps();
-
-    ArrayAttr iterator_types() {
-      unsigned nPar = input()->getType().cast<ShapedType>().getRank();
-      MLIRContext *ctx = getContext();
-      SmallVector<Attribute, 8> iters(
-        nPar, StringAttr::get(getParallelIteratorTypeName(), ctx));
-      return ArrayAttr::get(iters, ctx);
-    }
-  }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-def DotOp : LinalgLibrary_Op<"dot", [NInputs<2>, NOutputs<1>]> {
-  let arguments = (ins AnyStridedMemRefOfRank<1>,
-                       AnyStridedMemRefOfRank<1>,
-                       AnyStridedMemRefOfRank<0>);
-  let extraClassDeclaration = libraryCallName # [{
-    ArrayAttr indexing_maps();
-
-    ArrayAttr iterator_types() {
-      MLIRContext *ctx = getContext();
-      return ArrayAttr::get(
-        StringAttr::get(getReductionIteratorTypeName(), ctx), ctx);
-    }
-  }];
-}
-
-def MatvecOp : LinalgLibrary_Op<"matvec", [NInputs<2>, NOutputs<1>]> {
-  let arguments = (ins AnyStridedMemRefOfRank<2>,
-                       AnyStridedMemRefOfRank<1>,
-                       AnyStridedMemRefOfRank<1>);
-  let extraClassDeclaration = libraryCallName # [{
-    ArrayAttr indexing_maps();
-
-    ArrayAttr iterator_types() {
-      MLIRContext *ctx = getContext();
-      Attribute iters[2]{
-        StringAttr::get(getParallelIteratorTypeName(), ctx),
-        StringAttr::get(getReductionIteratorTypeName(), ctx)};
-      return ArrayAttr::get(iters, ctx);
-    }
-  }];
-}
-
-def MatmulOp : LinalgLibrary_Op<"matmul", [NInputs<2>, NOutputs<1>]> {
-  let arguments = (ins AnyStridedMemRefOfRank<2>,
-                       AnyStridedMemRefOfRank<2>,
-                       AnyStridedMemRefOfRank<2>);
-  let extraClassDeclaration = libraryCallName # [{
-    ArrayAttr indexing_maps();
-
-    ArrayAttr iterator_types() {
-      MLIRContext *ctx = getContext();
-      Attribute iters[3]{
-        StringAttr::get(getParallelIteratorTypeName(), ctx),
-        StringAttr::get(getParallelIteratorTypeName(), ctx),
-        StringAttr::get(getReductionIteratorTypeName(), ctx)};
-      return ArrayAttr::get(iters, ctx);
-    }
-  }];
-}
-
-def ConvOp : LinalgLibrary_Op<"conv", [NInputs<2>, NOutputs<1>]> {
-  let description = [{
-    Generic n-D convolution as described in the TF documentation:
-    https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/nn/convolution
-
-    ```
-      output[b, x[0], ..., x[N-1], k] =
-      sum_{z[0], ..., z[N-1], q}
-          filter[z[0], ..., z[N-1], q, k] *
-          padded_input[b,
-                       x[0] * strides[0] + dilation_rate[0] * z[0],
-                       ...,
-                       x[N-1] * strides[N-1] + dilation_rate[N-1] * z[N-1],
-                       q]
-    ```
-  }];
-
-  // TODO(ntv) padding.
-  // Following the TF source of truth above, strides and dilations are integer
-  // attributes of the same rank as the number of window dimensions.
-  let arguments = (ins AnyStridedMemRef:$filter, AnyStridedMemRef:$input,
-                   AnyStridedMemRef:$output,
-                   OptionalAttr<I64ArrayAttr>:$strides,
-                   OptionalAttr<I64ArrayAttr>:$dilations);
-  let extraClassDeclaration = libraryCallName # [{
-    // TODO(ntv) extend to support more than 1 dimensions and potentially
-    // grouping too.
-    unsigned getNumBatchDimensions() { return 1; }
-    unsigned getNumInputFeatureDimensions() { return 1; }
-    unsigned getNumOutputFeatureDimensions() { return 1; }
-
-    ArrayAttr indexing_maps();
-
-    ArrayAttr iterator_types() {
-      // Outer parallel loops are always the number of output dimensions; i.e.
-      // [ b, xs, q] in the TF notation above.
-      unsigned nPar = getOutputViewType(0).getRank();
-      unsigned nRed = getNumInputFeatureDimensions();
-      // Window loops are a special kind of reduction that is never tiled or
-      // parallelized across; i.e. [zs] in the TF notation above whose number
-      // match `xs` (i.e. 1 window loop per "image" dimension).
-      // This may evolve in the future.
-      unsigned nWin =
-        nPar - getNumBatchDimensions() - getNumInputFeatureDimensions();
-      MLIRContext *ctx = getContext();
-      SmallVector<Attribute, 8> iters(
-        nPar, StringAttr::get(getParallelIteratorTypeName(), ctx));
-      iters.reserve(nPar + nRed + nWin);
-      iters.append(nRed, StringAttr::get(getReductionIteratorTypeName(), ctx));
-      iters.append(nWin, StringAttr::get(getWindowIteratorTypeName(), ctx));
-      return ArrayAttr::get(iters, ctx);
-    }
-
-    int64_t getStride(unsigned i) {
-      assert(i < getNumWindowLoops());
-      if (!strides().hasValue()) return 1;
-      return strides()->getValue()[i]
-        .cast<IntegerAttr>().getValue().getSExtValue();
-    }
-
-    int64_t getDilation(unsigned i) {
-      assert(i < getNumWindowLoops());
-      if (!dilations().hasValue()) return 1;
-      return dilations()->getValue()[i]
-        .cast<IntegerAttr>().getValue().getSExtValue();
-    }
-  }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-class GenericOpBase<string mnemonic> : LinalgLibraryBase_Op<mnemonic, []> {
-  let arguments = (ins Variadic<AnyStridedMemRef>:$views,
-                   I64Attr:$args_in,
-                   I64Attr:$args_out,
-                   AffineMapArrayAttr:$indexing_maps,
-                   ArrayAttr:$iterator_types,
-                   OptionalAttr<StrAttr>:$doc,
-                   OptionalAttr<FlatSymbolRefAttr>:$fun,
-                   OptionalAttr<StrAttr>:$library_call);
-  let regions = (region AnyRegion:$region);
-  let extraClassDeclaration = [{
-    SmallVector<StringRef, 8> linalgTraitAttrNames() {
-      return SmallVector<StringRef, 8>{
-        getArgsInAttrName(), getArgsOutAttrName(), getDocAttrName(),
-        getFunAttrName(), getIndexingMapsAttrName(), getLibraryCallAttrName(),
-        getIteratorTypesAttrName()
-      };
-    }
-    unsigned getNumInputs() { return args_in().getSExtValue(); }
-    unsigned getNumOutputs() { return args_out().getSExtValue(); }
-    FuncOp getFunction() {
-      auto moduleOp = getParentOfType<ModuleOp>();
-      return fun().hasValue() ?
-        moduleOp.lookupSymbol<FuncOp>(fun().getValue()) : FuncOp();
-    }
-    StringRef getLibraryCallName() {
-      return library_call().hasValue() ? library_call().getValue() : "";
-    }
-    AffineMap getIndexingMap(unsigned i) {
-      assert(i < getNumInputsAndOutputs());
-      return indexing_maps().getValue()[i].cast<AffineMapAttr>().getValue();
-    }
-    AffineMap getInputIndexingMap(unsigned i) {
-      assert(i < getNumInputs());
-      return indexing_maps().getValue()[i].cast<AffineMapAttr>().getValue();
-    }
-    AffineMap getOutputIndexingMap(unsigned i) {
-      assert(i < getNumOutputs());
-      return indexing_maps().getValue()[i + getNumInputs()]
-          .cast<AffineMapAttr>().getValue();
-    }
-  }];
-  let printer = [{ return ::print(p, *this); }];
-  let parser = [{ return ::parseGenericOp(parser, result); }];
-}
-
-def GenericOp : GenericOpBase<"generic"> {
-  let description = [{
-    Generic Linalg op form where the key properties of the computation are
-    specified as attributes. In pretty form, a linalg.generic op is written as:
-
-      ```mlir
-        linalg.generic #trait_attribute %A, %B, %C {other-attributes} :
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>
-      ```
-
-    Where #trait_attributes is an alias of a dictionary attribute containing:
-      - args_in: an I64Attr representing the number of input (readonly) views
-      - args_out: an I64Attr representing the number of output (readwrite) views
-      - doc [optional]: a documentation string
-      - fun: a FlatSymbolRefAttr that must resolve to an existing function
-        symbol. To support inplace updates in a generic fashion, the signature
-        of the function must be:
-        ```
-          fun([input views element types], [output views element types])
-            -> ([output views element types])
-        ```
-      - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input
-        and output view. Such AffineMapAttr specifies the mapping between the
-        loops and the indexing within each view.
-      - library_call [optional]: a StringAttr containing the name of an
-        external library function that the linalg.generic operation maps to.
-        The external library is assumed to be dynamically linked and no strong
-        compile-time guarantees are provided. In the absence of such a library
-        call, linalg.generic will always lower to loops.
-      - iterator_types: an ArrayAttr specifying the type of the enclosing loops.
-        Each element of the list represents and iterator of one of the following
-        types:
-          parallel, reduction, window
-
-    Example:
-    Defining a #matmul_trait attribute in MLIR can be done as follows:
-      ```mlir
-        func @fma(%a: f32, %b: f32, %c: f32) -> f32 {
-          %d = mulf %a, %b: f32
-          %e = addf %c, %d: f32
-          return %e: f32
-        }
-        #matmul_accesses = [
-          (m, n, k) -> (m, k),
-          (m, n, k) -> (k, n),
-          (m, n, k) -> (m, n)
-        ]
-        #matmul_trait = {
-          doc = "C(m, n) += A(m, k) * B(k, n)",
-          fun = @fma,
-          indexing_maps = #matmul_accesses,
-          library_call = "linalg_matmul",
-          n_views = [2, 1],
-          iterator_types = ["parallel", "parallel", "reduction"]
-        }
-      ```
-
-    And can be reused in multiple places as:
-      ```mlir
-        linalg.generic #matmul_trait %A, %B, %C [other-attributes] :
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>
-      ```
-
-    This may lower to either:
-      ```mlir
-        call @linalg_matmul(%A, %B, %C) :
-          (memref<?x?xf32, stride_specification>,
-           memref<?x?xf32, stride_specification>,
-           memref<?x?xf32, stride_specification>)
-          -> ()
-      ```
-
-    or IR resembling:
-    ```mlir
-    loop.for %m = %c0 to %M step %c1 {
-      loop.for %n = %c0 to %N step %c1 {
-        loop.for %k = %c0 to %K step %c1 {
-          %a = linalg.load %A[%m, %k] : memref<?x?xf32, stride_specification>
-          %b = linalg.load %B[%k, %n] : memref<?x?xf32, stride_specification>
-          %c = linalg.load %C[%m, %n] : memref<?x?xf32, stride_specification>
-          %d = call @func_of_elements(%a, %b, %c)
-                 : (f32, f32, f32) -> (f32)
-          linalg.store %d, %C[%m, %n] : memref<?x?x?xf32, stride_specification>
-        }
-      }
-    }
-    ```
-  }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
-  let description = [{
-    Indexed Generic Linalg op form where the key properties of the computation
-    are specified as attributes. In pretty form, a linalg.indexed_generic op is
-    written as:
-
-      ```mlir
-        linalg.indexed_generic #trait_attribute %A, %B, %C {other-attributes} :
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>
-      ```
-
-    Where #trait_attributes is an alias of a dictionary attribute containing:
-      - args_in: an I64Attr representing the number of input (readonly) views
-      - args_out: an I64Attr representing the number of output (readwrite) views
-      - doc [optional]: a documentation string
-      - fun: a FlatSymbolRefAttr that must resolve to an existing function
-        symbol. To support inplace updates in a generic fashion, the signature
-        of the function must be:
-        ```
-          fun([index types of induction variables], [input views element types],
-              [output views element types]) -> ([output views element types])
-        ```
-      - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input
-        and output view. Such AffineMapAttr specifies the mapping between the
-        loops and the indexing within each view.
-      - library_call [optional]: a StringAttr containing the name of an
-        external library function that the linalg.indexed_generic operation
-        maps to.  The external library is assumed to be dynamically linked and
-        no strong compile-time guarantees are provided. In the absence of such
-        a library call, linalg.indexed_generic will always lower to loops.
-      - iterator_types: an ArrayAttr they type of the enclosing loops; Each
-        element of the list represents and iterator of one of the following
-        types:
-          parallel, reduction, window
-
-    Example:
-    Defining a #matmul_trait attribute in MLIR can be done as follows:
-      ```mlir
-        func @fma(%i: index, %j: index, %k: index, %a: f32, %b: f32, %c: f32)
-          -> f32
-        {
-          %d = mulf %a, %b: f32
-          %e = addf %c, %d: f32
-          return %e: f32
-        }
-        #matmul_accesses = [
-          (m, n, k) -> (m, k),
-          (m, n, k) -> (k, n),
-          (m, n, k) -> (m, n)
-        ]
-        #matmul_trait = {
-          doc = "C(m, n) += A(m, k) * B(k, n)",
-          fun = @fma,
-          indexing_maps = #matmul_accesses,
-          library_call = "linalg_matmul",
-          n_views = [2, 1],
-          iterator_types = ["parallel", "parallel", "reduction"]
-        }
-      ```
-
-    And can be reused in multiple places as:
-      ```mlir
-        linalg.indexed_generic #matmul_trait %A, %B, %C [other-attributes] :
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>,
-          memref<?x?xf32, stride_specification>
-      ```
-
-    This may lower to either:
-      ```mlir
-        call @linalg_matmul(%A, %B, %C) :
-          (memref<?x?xf32, stride_specification>,
-           memref<?x?xf32, stride_specification>,
-           memref<?x?xf32, stride_specification>)
-          -> ()
-      ```
-
-    or IR resembling:
-    ```mlir
-    loop.for %m = %c0 to %M step %c1 {
-      loop.for %n = %c0 to %N step %c1 {
-        loop.for %k = %c0 to %K step %c1 {
-          %a = linalg.load %A[%m, %k] : memref<?x?xf32, stride_specification>
-          %b = linalg.load %B[%k, %n] : memref<?x?xf32, stride_specification>
-          %c = linalg.load %C[%m, %n] : memref<?x?xf32, stride_specification>
-          %d = call @func_of_elements_and_indices(%m, %n, %k, %a, %b, %c)
-                 : (index, index, index, f32, f32, f32) -> (f32)
-          linalg.store %d, %C[%m, %n] : memref<?x?x?xf32, stride_specification>
-        }
-      }
-    }
-    ```
-  }];
-  let verifier = [{ return ::verify(*this); }];
-}
-
-#endif // LINALG_LIBRARY_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h
deleted file mode 100644
index 41155701b8d..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- LinalgOps.h - Linalg Operations --------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_LINALG_LINALGOPS_H_
-#define MLIR_DIALECT_LINALG_LINALGOPS_H_
-
-#include "mlir/Dialect/Linalg/IR/LinalgTraits.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-namespace linalg {
-
-/// Returns the name mangled library call name to disambiguate between different
-/// overloads at the C level. The name mangling scheme is basic and uses MLIR
-/// type names:
-///   1. form a string which is the concatenation of the linalg op name with all
-///      the operand type names, separate by underscores;
-///   2. drop the `linalg.` prefix, and the `<`, `>`, `?` symbols from the type.
-/// Assumes `op` is a LinalgOp.
-///
-/// Examples:
-///
-/// 1. linalg.fill(%A, %f) : memref<f32>, f32
-///   name mangles into `linalg_fill_viewf32_f32_impl`
-///
-/// 2. linalg.dot(%A, %B, %C) :
-///      memref<?xf32, stride_specification>,
-///      memref<?xf32, stride_specification>, memref<f32>
-///   name mangles into `linalg_dot_viewxf32_viewxf32_viewf32_impl`
-///
-/// 3. linalg.matmul(...) :
-///      memref<?x?xf32, stride_specification>,
-///      memref<?x?xf32, stride_specification>,
-///      memref<?x?xf32, stride_specification>
-///   name mangles into `linalg_matmul_viewxxf32_viewxxf32_viewxxf32_impl`
-std::string generateLibraryCallName(Operation *op);
-
-/// Returns the list of maps that map loops to operands of a Linalg op.
-/// The i-th affine map identifies loop indices to subscripts that are used when
-/// accessing the i-th operand.
-/// For instance, a matmul that can be written in index notation as:
-/// `A(i, k) * B(k, j) -> C(i, j)` will have the following, ordered, list of
-/// affine maps:
-///
-/// ```{.mlir}
-///    (
-///      (i, j, k) -> (i, k),
-///      (i, j, k) -> (k, j),
-///      (i, j, k) -> (i, j)
-///    )
-/// ```
-///
-/// Only permutation maps are currently supported.
-SmallVector<AffineMap, 4> loopToOperandRangesMaps(Operation *op);
-
-#include "mlir/Dialect/Linalg/IR/LinalgLibraryOpInterfaces.h.inc"
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h.inc"
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/Linalg/IR/LinalgLibraryOps.h.inc"
-
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_LINALGOPS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
deleted file mode 100644
index 64e0bb089b7..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ /dev/null
@@ -1,190 +0,0 @@
-//===- LinalgOps.td - Linalg dialect ops -------------------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the operation definition file for linear algebra operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LINALG_OPS
-#define LINALG_OPS
-
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
-include "mlir/Dialect/Linalg/IR/LinalgBase.td"
-
-// Base class for Linalg dialect ops that do not correspond to library calls.
-class Linalg_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<Linalg_Dialect, mnemonic, traits> {
-  // For every linalg op, there needs to be a:
-  //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
-  //   * LogicalResult verify(${C++ class of Op} op)
-  //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
-  //                                         OperationState &result)
-  // functions.
-  let printer = [{ return ::print(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-}
-
-def Linalg_RangeOp :
-    Linalg_Op<"range", [NoSideEffect]>,
-    Arguments<(ins Index:$min, Index:$max, Index:$step)>,
-    Results<(outs Range)> {
-  let summary = "Create a `range` type value, used to create `view`s";
-  let description = [{
-    The `linalg.range` op creates a `!linalg.range` from 3 values of type
-    `index` that represent the min, max and step values of the `range`. This
-    type does not pass function boundaries at the moment.
-
-    Example:
-
-    ```mlir
-      %3 = linalg.range %0:%1:%2 : !linalg.range
-    ````
-  }];
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *min, Value *max, "
-    "Value *step",
-    [{
-      auto rangeType = RangeType::get(builder->getContext());
-      build(builder, result, rangeType, min, max, step);
-    }]>];
-
-  // Fully specified by traits.
-  let verifier = ?;
-}
-
-def Linalg_SliceOp : Linalg_Op<"slice", [NoSideEffect]>,
-    Arguments<(ins AnyStridedMemRef:$view, Variadic<AnyTypeOf<[Range, Index]>>:$indexings)>,
-    Results<(outs AnyStridedMemRef)> {
-  let summary = "Produce a rank-reduced `subview` of a base `view`.";
-  let description = [{
-    The `linalg.slice` op allows defining a subregion of a smaller rank than the
-    operand `view` within the underlying buffer.
-
-    A `linalg.slice` op takes a view and a variadic number of indexings and
-    produces a `view` of the same elemental type. An indexing is either:
-      1. a `linalg.range`, in which case it does not reduce the rank of the
-         parent `view` along the corresponding dimension.
-      2. an `index`, in which case it reduces the rank of the parent view by
-         one.
-
-    If an indexing extends past the size of the `view`, this is undefined
-    behavior. Ideally the `linalg.slice` operation would automatically truncate
-    it to be within bounds but there are tradeoffs involved now that `std.view`
-    is a standard op.
-
-    Examples:
-
-      1. rank-preserving `slice`:
-
-      ```mlir
-        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
-          !linalg.range, !linalg.range, memref<?x?xf32, stride_spec>
-       ```
-
-      2. rank-reducing `slice` (from 2-D to 1-D):
-
-      ```mlir
-        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
-          index, !linalg.range, memref<?x?xf32, stride_spec>
-      ```
-
-      3. rank-reducing `slice` (from 2-D to 0-D):
-
-      ```mlir
-        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
-          index, index, memref<?x?xf32, stride_spec>
-      ```
-  }];
-
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Value *base, "
-    "ValueRange indexings">];
-
-  let extraClassDeclaration = [{
-    enum { FirstIndexingOperand = 1 };
-    unsigned getRank() { return getViewType().getRank(); }
-    Type getElementType() { return getViewType().getElementType(); }
-    MemRefType getViewType() { return getType().cast<MemRefType>(); }
-    unsigned getBaseViewRank() { return getBaseViewType().getRank(); }
-    MemRefType getBaseViewType() { return view()->getType().cast<MemRefType>(); }
-
-    // Get the underlying indexing at a given rank.
-    Value *indexing(unsigned rank) { return *(indexings().begin() + rank); }
-
-    // Get the subset of indexings that are of RangeType.
-    SmallVector<Value *, 8> getRanges() {
-      llvm::SmallVector<Value *, 8> res;
-      for (auto *operand : indexings())
-        if (!operand->getType().isa<IndexType>())
-          res.push_back(operand);
-      return res;
-    }
-  }];
-}
-
-def Linalg_TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>,
-    Arguments<(ins AnyStridedMemRef:$view, AffineMapAttr:$permutation)>,
-    Results<(outs AnyStridedMemRef)> {
-  let summary = "transpose operation produces a new strided memref (metadata-only)";
-  let description = [{
-    The `linalg.transpose` op produces a strided memref whose sizes and strides
-    are a permutation of the original `view`. This is a pure metadata
-    transformation.
-
-    Example:
-
-    ```mlir
-       %1 = linalg.transpose %0 (i, j) -> (j, i) : memref<?x?xf32, stride_spec>
-    ```
-  }];
-
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result, Value *view, "
-    "AffineMapAttr permutation, ArrayRef<NamedAttribute> attrs = {}">];
-
-  let verifier = [{
-    if (!permutation().isPermutation())
-      return emitOpError("expected a permutation map");
-    if (permutation().getNumDims() != getViewType().getRank())
-      return emitOpError("expected a permutation map of same rank as the view");
-    return success();
-  }];
-
-  let extraClassDeclaration = [{
-    static StringRef getPermutationAttrName() { return "permutation"; }
-    MemRefType getViewType() { return view()->getType().cast<MemRefType>(); }
-  }];
-}
-
-def Linalg_YieldOp : Linalg_Op<"yield", [NativeOpTrait<"IsTerminator">]>,
-    Arguments<(ins Variadic<AnyType>:$values)> {
-  let summary = "Linalg yield operation";
-  let description = [{
-    `linalg.yield` is a special terminator operation for blocks inside regions
-    in `linalg` generic ops. It returns values to the immediately enclosing
-    `linalg` generic op.
-
-    Example:
-
-    ```mlir
-       linalg.yield %f0, %f1 : f32, f32
-    ```
-  }];
-}
-
-#endif // LINALG_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h
deleted file mode 100644
index fa5f9e740cb..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h
+++ /dev/null
@@ -1,166 +0,0 @@
-//===- LinalgTraits.h - Linalg Traits ---------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_LINALG_LINALGTRAITS_H_
-#define MLIR_DIALECT_LINALG_LINALGTRAITS_H_
-
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-namespace OpTrait {
-namespace linalg {
-
-/// This class provides the API for ops that are known to have a specified
-/// number of inputs, all passed as operands. This is used as a trait like this:
-///
-///   class DotOp : public Op<DotOp, OpTrait::NInputs<2>::Impl> {
-///
-template <unsigned N> class NInputs {
-public:
-  template <typename ConcreteType>
-  class Impl : public OpTrait::TraitBase<ConcreteType, NInputs<N>::Impl> {
-  public:
-    static unsigned getNumInputs() { return N; }
-  };
-};
-
-/// This class provides the API for ops that are known to have a specified
-/// number of inputs, all passed as operands. This is used as a trait like this:
-///
-///   class DotOp : public Op<DotOp, OpTrait::NOutputs<2>::Impl> {
-///
-template <unsigned N> class NOutputs {
-public:
-  template <typename ConcreteType>
-  class Impl : public OpTrait::TraitBase<ConcreteType, NOutputs<N>::Impl> {
-  public:
-    static unsigned getNumOutputs() { return N; }
-  };
-};
-
-/// This class provides the API for ops that are known to operate on views. This
-/// trait must be used in conjunction with an op definition or a trait that
-/// provides the methods `getNumInputs` and `getNumOutputs`. This is used as a
-/// trait like this:
-///
-///   class DotOp : public Op<DotOp, OpTrait::ViewTrait> {
-///
-template <typename ConcreteType>
-class ViewTraits : public OpTrait::TraitBase<ConcreteType, ViewTraits> {
-private:
-  /// Return the number of input views. For internal use only.
-  unsigned nInputs() {
-    return cast<ConcreteType>(this->getOperation()).getNumInputs();
-  }
-  /// Return the number of input views. For internal use only.
-  unsigned nOutputs() {
-    return cast<ConcreteType>(this->getOperation()).getNumOutputs();
-  }
-
-public:
-  /// Return the `i`-th input view.
-  Value *getInput(unsigned i) {
-    assert(i < nInputs());
-    return this->getOperation()->getOperand(i);
-  }
-  /// Return the index of `view` in the list of input views if found, llvm::None
-  /// otherwise.
-  llvm::Optional<unsigned> getIndexOfInput(Value *view) {
-    auto it = llvm::find(getInputs(), view);
-    if (it != getInputs().end())
-      return it - getInputs().begin();
-    return llvm::None;
-  }
-  /// Return the `i`-th input view type.
-  MemRefType getInputViewType(unsigned i) {
-    return getInput(i)->getType().template cast<MemRefType>();
-  }
-  /// Return the range over input views.
-  Operation::operand_range getInputs() {
-    auto range = this->getOperation()->getOperands();
-    return {range.begin(), range.begin() + nInputs()};
-  }
-  /// Return the `i`-th output view.
-  Value *getOutput(unsigned i) {
-    return this->getOperation()->getOperand(nInputs() + i);
-  }
-  /// Return the index of `view` in the list of output views if found,
-  /// llvm::None otherwise.
-  llvm::Optional<unsigned> getIndexOfOutput(Value *view) {
-    auto it = llvm::find(getOutputs(), view);
-    if (it != getOutputs().end())
-      return it - getOutputs().begin();
-    return llvm::None;
-  }
-  /// Return the `i`-th output view type.
-  MemRefType getOutputViewType(unsigned i) {
-    return getOutput(i)->getType().template cast<MemRefType>();
-  }
-  /// Return the range over output views.
-  Operation::operand_range getOutputs() {
-    auto range = this->getOperation()->getOperands();
-    return {range.begin() + nInputs(),
-            range.begin() + getNumInputsAndOutputs()};
-  }
-  /// Return the number of input and output views.
-  unsigned getNumInputsAndOutputs() { return nInputs() + nOutputs(); }
-  /// Return the `i`-th view type.
-  MemRefType getViewType(unsigned i) {
-    return (i < nInputs()) ? getInputViewType(i)
-                           : getOutputViewType(i - nInputs());
-  }
-  /// Return the range over input and output views.
-  Operation::operand_range getInputsAndOutputs() {
-    auto range = this->getOperation()->getOperands();
-    return {range.begin(), range.begin() + getNumInputsAndOutputs()};
-  }
-  unsigned getNumParallelLoops() {
-    return getNumIterators(
-        getParallelIteratorTypeName(),
-        cast<ConcreteType>(this->getOperation()).iterator_types());
-  }
-  unsigned getNumReductionLoops() {
-    return getNumIterators(
-        getReductionIteratorTypeName(),
-        cast<ConcreteType>(this->getOperation()).iterator_types());
-  }
-  unsigned getNumWindowLoops() {
-    return getNumIterators(
-        getWindowIteratorTypeName(),
-        cast<ConcreteType>(this->getOperation()).iterator_types());
-  }
-  unsigned getNumLoops() {
-    return getNumIterators(
-        cast<ConcreteType>(this->getOperation()).iterator_types());
-  }
-  static LogicalResult verifyTrait(Operation *op) {
-    auto nViews = cast<ConcreteType>(op).getNumInputsAndOutputs();
-    if (failed(OpTrait::impl::verifyAtLeastNOperands(op, nViews)))
-      return failure();
-    return success();
-  }
-};
-
-} // namespace linalg
-} // namespace OpTrait
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_LINALGTRAITS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h
deleted file mode 100644
index 181a79ce38d..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===- LinalgTypes.h - Linalg Types ---------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_LINALG_LINALGTYPES_H_
-#define MLIR_DIALECT_LINALG_LINALGTYPES_H_
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Types.h"
-
-namespace mlir {
-class MLIRContext;
-
-namespace linalg {
-enum LinalgTypes {
-  Range = Type::FIRST_LINALG_TYPE,
-  LAST_USED_LINALG_TYPE = Range,
-};
-
-class LinalgDialect : public Dialect {
-public:
-  explicit LinalgDialect(MLIRContext *context);
-  static StringRef getDialectNamespace() { return "linalg"; }
-
-  /// Parse a type registered to this dialect.
-  Type parseType(DialectAsmParser &parser) const override;
-
-  /// Print a type registered to this dialect.
-  void printType(Type type, DialectAsmPrinter &os) const override;
-};
-
-/// A RangeType represents a minimal range abstraction (min, max, step).
-/// It is constructed by calling the linalg.range op with three values index of
-/// index type:
-///
-/// ```{.mlir}
-///    func @foo(%arg0 : index, %arg1 : index, %arg2 : index) {
-///      %0 = linalg.range %arg0:%arg1:%arg2 : !linalg.range
-///    }
-/// ```
-class RangeType : public Type::TypeBase<RangeType, Type> {
-public:
-  // Used for generic hooks in TypeBase.
-  using Base::Base;
-  /// Construction hook.
-  static RangeType get(MLIRContext *context) {
-    /// Custom, uniq'ed construction in the MLIRContext.
-    return Base::get(context, LinalgTypes::Range);
-  }
-  /// Used to implement llvm-style cast.
-  static bool kindof(unsigned kind) { return kind == LinalgTypes::Range; }
-};
-
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_LINALGTYPES_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Passes.h b/third_party/mlir/include/mlir/Dialect/Linalg/Passes.h
deleted file mode 100644
index 7ae3877f01e..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===- Passes.h - Linalg pass entry points ----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes that expose pass constructors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LINALG_PASSES_H_
-#define MLIR_DIALECT_LINALG_PASSES_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mlir {
-class FuncOp;
-class ModuleOp;
-template <typename T> class OpPassBase;
-
-namespace linalg {
-std::unique_ptr<OpPassBase<FuncOp>> createLinalgFusionPass();
-
-std::unique_ptr<OpPassBase<FuncOp>>
-createLinalgTilingPass(ArrayRef<int64_t> tileSizes = {});
-
-std::unique_ptr<OpPassBase<FuncOp>>
-createLinalgPromotionPass(bool dynamicBuffers);
-
-/// Create a pass to convert Linalg operations to loop.for loops and
-/// std.load/std.store accesses.
-std::unique_ptr<OpPassBase<FuncOp>> createConvertLinalgToLoopsPass();
-
-/// Create a pass to convert Linalg operations to affine.for loops and
-/// affine_load/affine_store accesses.
-/// Placeholder for now, this is NYI.
-std::unique_ptr<OpPassBase<FuncOp>> createConvertLinalgToAffineLoopsPass();
-
-/// Create a pass to convert Linalg operations to the LLVMIR dialect.
-std::unique_ptr<OpPassBase<ModuleOp>> createConvertLinalgToLLVMPass();
-
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_PASSES_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/CMakeLists.txt
deleted file mode 100644
index f87938c943e..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS LinalgTransformPatterns.td)
-mlir_tablegen(LinalgTransformPatterns.h.inc -gen-rewriters)
-add_public_tablegen_target(MLIRLinalgTransformPatternsIncGen)
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td b/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
deleted file mode 100644
index 6e3ec889503..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- LinalgPatterns.td - Linalg transformation patterns --*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the pattern definition file for declarative Linalg transformation.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LINALG_TRANSFORMS
-#define LINALG_TRANSFORMS
-
-include "mlir/Dialect/Linalg/IR/LinalgOps.td"
-include "mlir/Dialect/Linalg/IR/LinalgLibraryOps.td"
-include "mlir/Dialect/AffineOps/AffineOps.td"
-
-def HasNoLinalgTransformMarker : CPred<[{
-  !$0.getAttrOfType<StringAttr>(LinalgTransforms::kLinalgTransformMarker)
-}]>;
-
-class HasLinalgTransformMarker<string str> : CPred<[{
-  $0.getAttrOfType<StringAttr>(
-    LinalgTransforms::kLinalgTransformMarker) &&
-  $0.getAttrOfType<StringAttr>(
-    LinalgTransforms::kLinalgTransformMarker).getValue() == "}] # str # [{"}]>;
-
-class IsProducedByOpOfType<string str> :
-  CPred<"isProducedByOpOfType<" # str # ">($0, $1)">;
-
-class AffineMapDomainHasDim<int n> : CPred<[{
-  $0.getAttrOfType<ArrayAttr>(getIndexingMapsAttrName()).getValue()[0].
-  cast<AffineMapAttr>().getValue().getNumDims() ==}] # n # [{}]>;
-
-//===----------------------------------------------------------------------===//
-// Linalg fusion patterns.
-//===----------------------------------------------------------------------===//
-//
-// In the future, tile sizes should be derived from op properties + machine
-// description but we do not need to wait on this to start having useful
-// patterns.
-class TileAndFuseLinalgOp<
-    list<int> sizes, list<int> operandIndices, string value> : NativeCodeCall<
-  "if (failed(tileAndFuseLinalgOpAndSetMarker($_builder, $0, {" #
-  StrJoinInt<sizes>.result # "}, {" # StrJoinInt<operandIndices>.result # "}," #
-      " \"" # value # "\")))" #
-  "  return matchFailure();">;
-
-//===----------------------------------------------------------------------===//
-// Linalg tiling patterns.
-//===----------------------------------------------------------------------===//
-//
-// In the future, tile sizes should be derived from op properties + machine
-// description but we do not need to wait on this to start having useful
-// patterns.
-// `permutation` is an optional parameter to specify the ordering of the
-// tiled loops. If provided, it must be a list of integers with the same number
-// of elements as `sizes`.
-class TileLinalgOp<list<int> sizes, string value, list<int> permutation=[]> :
-  NativeCodeCall<
-    "if (failed(tileLinalgOpAndSetMarker($_builder, $0, {" #
-    StrJoinInt<sizes>.result # "}, \"" # value # "\", {" #
-    StrJoinInt<permutation>.result # "})))" #
-    "  return matchFailure();">;
-
-//===----------------------------------------------------------------------===//
-// Linalg to loop patterns.
-//===----------------------------------------------------------------------===//
-class LinalgOpToLoops<string OpType> : NativeCodeCall<
-  "if (failed(linalgOpToLoops<" # OpType # ">($_builder, $0))) " #
-  "  return matchFailure();">;
-
-class LinalgOpToAffineLoops<string OpType> : NativeCodeCall<
-  "if (failed(linalgOpToAffineLoops<" # OpType # ">($_builder, $0))) " #
-  "  return matchFailure();">;
-
-//===----------------------------------------------------------------------===//
-// Linalg to vector contraction patterns.
-//===----------------------------------------------------------------------===//
-class LinalgOpToVectorContraction<string OpType> : NativeCodeCall<
-  "if (failed(vectorizeGenericOp($_builder, $0))) " #
-  "  return matchFailure();">;
-
-//===----------------------------------------------------------------------===//
-// Linalg generic permutation patterns.
-//===----------------------------------------------------------------------===//
-class PermuteGenericLinalgOp<list<int> permutation, string value> :
-  NativeCodeCall<
-    "if (failed(permuteGenericLinalgOp($_builder, $0, {" #
-    StrJoinInt<permutation>.result # "}, \"" # value # "\"))) " #
-    "  return matchFailure();">;
-
-#endif // LINALG_TRANSFORMS
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h b/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h
deleted file mode 100644
index b103625a8a4..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- LinalgTransforms.h - Linalg transformations as patterns --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef DIALECT_LINALG_TRANSFORMS_LINALGTRANSFORMS_H_
-#define DIALECT_LINALG_TRANSFORMS_LINALGTRANSFORMS_H_
-
-#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace mlir {
-namespace linalg {
-
-// Marker used as attribute name in generated Linalg rewriting transformations.
-struct LinalgTransforms {
-  static const StringLiteral kLinalgTransformMarker;
-};
-
-namespace detail {
-// Implementation detail of isProducedByOpOfType avoids the need for explicit
-// template instantiations.
-bool isProducedByOpOfTypeImpl(Operation *consumerOp, Value *consumedView,
-                              llvm::function_ref<bool(Operation *)> isaOpType);
-} // namespace detail
-
-// Returns true if the `consumedView` value use in `consumerOp` is produced by
-// an op of type `OpTy`. This is used to implement use-def type information on
-// buffers.
-template <typename OpTy>
-bool isProducedByOpOfType(Operation *consumerOp, Value *consumedView) {
-  return detail::isProducedByOpOfTypeImpl(
-      consumerOp, consumedView, [](Operation *op) { return isa<OpTy>(op); });
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// The following Declarative Rewrite Rule (DRR) helpers are used in rewrite
-// patterns. As such, they must not call into `rewriter.erase/replace` APIs and
-// it is the responsibility of the enclosing PatternRewriter to erase on
-// success.
-////////////////////////////////////////////////////////////////////////////////
-
-/// Tiles `op` by `sizes` permuting the looops according to `permutation`
-/// and sets the attribute `kLinalgTransformMarker` to `linalgMarker`.
-/// The permutation is expressed as a list of integers that specify
-/// the new ordering of the loop nest. The length of `permutation`
-/// must be equal to the length of `tileSizes`.
-/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
-/// `permutation = [1,2,0]`. All values in `permutation` must be
-/// integers, in the range 0..`tileSizes.size()` without duplications
-/// (i.e. `[1,1,2]` is an invalid permutation). An empty list
-/// states for the identity permutation.
-LogicalResult tileLinalgOpAndSetMarker(PatternRewriter &rewriter, Operation *op,
-                                       ArrayRef<int64_t> sizes,
-                                       StringRef linalgMarker,
-                                       ArrayRef<unsigned> permutation);
-
-/// Tiles `op` by `sizes`, fuses the producers of `operandIndicesToFuse` and
-/// sets the attribute `kLinalgTransformMarker` to `linalgMarker`.
-LogicalResult tileAndFuseLinalgOpAndSetMarker(
-    PatternRewriter &rewriter, Operation *op, ArrayRef<int64_t> sizes,
-    ArrayRef<int64_t> operandIndicesToFuse, StringRef linalgMarker);
-
-/// Emits a loop nest of `loop.for` with the proper body for `op`.
-template <typename ConcreteOp>
-LogicalResult linalgOpToLoops(PatternRewriter &rewriter, Operation *op);
-
-/// Emits a loop nest of `affine.for` with the proper body for `op`.
-template <typename ConcreteOp>
-LogicalResult linalgOpToAffineLoops(PatternRewriter &rewriter, Operation *op);
-
-/// Rewrite a linalg.generic into a suitable vector.contraction op.
-LogicalResult vectorizeGenericOp(PatternRewriter &rewriter, Operation *op);
-
-/// Emits a `generic` or `indexed_generic` operation with the `indexing_maps`
-/// and `iterator_types` permutated according to `permutation`.
-LogicalResult permuteGenericLinalgOp(PatternRewriter &rewriter, Operation *op,
-                                     ArrayRef<unsigned> permutation,
-                                     StringRef linalgMarker);
-} // namespace linalg
-} // namespace mlir
-
-#endif // DIALECT_LINALG_TRANSFORMS_LINALGTRANSFORMS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Intrinsics.h b/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Intrinsics.h
deleted file mode 100644
index 5a815ba158e..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Intrinsics.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Intrinsics.h - Linalg intrinsics definitions -----------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_LINALG_INTRINSICS_H_
-#define MLIR_DIALECT_LINALG_INTRINSICS_H_
-
-#include "mlir/EDSC/Intrinsics.h"
-
-namespace mlir {
-namespace linalg {
-class CopyOp;
-class FillOp;
-class RangeOp;
-class SliceOp;
-namespace intrinsics {
-using copy = mlir::edsc::intrinsics::OperationBuilder<CopyOp>;
-using fill = mlir::edsc::intrinsics::OperationBuilder<FillOp>;
-using range = mlir::edsc::intrinsics::ValueBuilder<RangeOp>;
-using slice = mlir::edsc::intrinsics::ValueBuilder<SliceOp>;
-} // namespace intrinsics
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_INTRINSICS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
deleted file mode 100644
index 994b3c9f185..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ /dev/null
@@ -1,223 +0,0 @@
-//===- Utils.h - Utilities to support the Linalg dialect --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_LINALG_UTILS_H_
-#define MLIR_DIALECT_LINALG_UTILS_H_
-
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/EDSC/Helpers.h"
-
-namespace mlir {
-class AffineExpr;
-class AffineMap;
-class OperationFolder;
-
-namespace edsc {
-
-/// A LoopRangeBuilder is a generic NestedBuilder for loop.for operations.
-/// More specifically it is meant to be used as a temporary object for
-/// representing any nested MLIR construct that is "related to" an mlir::Value*
-/// (for now an induction variable).
-class LoopRangeBuilder : public NestedBuilder {
-public:
-  /// Constructs a new loop.for and captures the associated induction
-  /// variable. A ValueHandle pointer is passed as the first argument and is the
-  /// *only* way to capture the loop induction variable.
-  LoopRangeBuilder(ValueHandle *iv, ValueHandle range);
-  LoopRangeBuilder(ValueHandle *iv, Value *range);
-  LoopRangeBuilder(ValueHandle *iv, SubViewOp::Range range);
-
-  LoopRangeBuilder(const LoopRangeBuilder &) = delete;
-  LoopRangeBuilder(LoopRangeBuilder &&) = default;
-
-  LoopRangeBuilder &operator=(const LoopRangeBuilder &) = delete;
-  LoopRangeBuilder &operator=(LoopRangeBuilder &&) = default;
-
-  /// The only purpose of this operator is to serve as a sequence point so that
-  /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is
-  /// scoped within a LoopRangeBuilder.
-  ValueHandle operator()(std::function<void(void)> fun = nullptr);
-};
-
-/// Helper class to sugar building loop.for loop nests from ranges.
-/// This is similar to edsc::AffineLoopNestBuilder except it works on ranges
-/// directly. In the current implementation it produces loop.for operations.
-class LoopNestRangeBuilder {
-public:
-  LoopNestRangeBuilder(llvm::ArrayRef<edsc::ValueHandle *> ivs,
-                       llvm::ArrayRef<edsc::ValueHandle> ranges);
-  LoopNestRangeBuilder(llvm::ArrayRef<edsc::ValueHandle *> ivs,
-                       llvm::ArrayRef<Value *> ranges);
-  LoopNestRangeBuilder(llvm::ArrayRef<edsc::ValueHandle *> ivs,
-                       llvm::ArrayRef<SubViewOp::Range> ranges);
-  edsc::ValueHandle operator()(std::function<void(void)> fun = nullptr);
-
-private:
-  llvm::SmallVector<LoopRangeBuilder, 4> loops;
-};
-
-} // namespace edsc
-
-namespace linalg {
-class LinalgDependenceGraph;
-
-struct FusionInfo {
-  LinalgOp originalProducer;
-  LinalgOp fusedProducer;
-};
-
-/// Checks whether the specific `producer` is the last write to exactly the
-/// whole `consumedView`. This checks structural dominance, that the dependence
-/// is a RAW without any interleaved write to any piece of `consumedView`.
-bool isProducerLastWriteOfView(const LinalgDependenceGraph &graph,
-                               LinalgOp consumer, Value *consumedView,
-                               LinalgOp producer);
-
-/// Checks whether fusing the specific `producer` of the `consumedView` is
-/// feasible. This checks `producer` is the last write of `consumedView` and
-/// that no interleaved dependence would be violated (RAW, WAR or WAW).
-bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
-                   Value *consumedView, LinalgOp producer);
-
-/// Fuses producer into consumer if the producer is structurally feasible and
-/// the fusion would not violate dependencies.
-/// When non-null, the optional pointer `folder` is used to call into the
-/// `createAndFold` builder method. If `folder` is null, the regular `create`
-/// method is called.
-Optional<FusionInfo> fuseProducerOf(OpBuilder &b, LinalgOp consumer,
-                                    unsigned consumerIdx,
-                                    const LinalgDependenceGraph &graph,
-                                    OperationFolder *folder = nullptr);
-
-/// Returns the linearized list of all view dimensions in a linalgOp. Applying
-/// the inverse, concatenated loopToOperandRangeMaps to this list allows the
-/// derivation of loop ranges for any linalgOp.
-template <typename ConcreteOp>
-SmallVector<Value *, 8> getViewSizes(ConcreteOp linalgOp) {
-  SmallVector<Value *, 8> res;
-  for (auto v : linalgOp.getInputsAndOutputs()) {
-    MemRefType t = v->getType().template cast<MemRefType>();
-    for (unsigned i = 0; i < t.getRank(); ++i)
-      res.push_back(edsc::intrinsics::dim(v, i));
-  }
-  return res;
-}
-
-/// Returns the values obtained by applying `map` to the list of values.
-/// When non-null, the optional pointer `folder` is used to call into the
-/// `createAndFold` builder method. If `folder` is null, the regular `create`
-/// method is called.
-SmallVector<Value *, 4> applyMapToValues(OpBuilder &b, Location loc,
-                                         AffineMap map,
-                                         ArrayRef<Value *> values,
-                                         OperationFolder *folder = nullptr);
-
-struct TiledLinalgOp {
-  LinalgOp op;
-  SmallVector<loop::ForOp, 8> loops;
-};
-
-/// Performs standalone tiling of a single LinalgOp by `tileSizes`.
-/// and permute the loop nest according to `permutation`
-/// The permutation is expressed as a list of integers that specify
-/// the new ordering of the loop nest. The length of `permutation`
-/// must be equal to the length of `tileSizes`.
-/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
-/// `permutation = [1,2,0]`. All values in `permutation` must be
-/// integers, in the range 0..`tileSizes.size()` without duplications
-/// (i.e. `[1,1,2]` is an invalid permutation). An empty list
-/// states for the identity permutation.
-/// Returns a struct containing the tiled loops in the specified order
-/// and the cloned op if successful, llvm::None otherwise.
-/// When non-null, the optional pointer `folder` is used to call into the
-/// `createAndFold` builder method. If `folder` is null, the regular `create`
-/// method is called.
-llvm::Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
-                                           ArrayRef<Value *> tileSizes,
-                                           ArrayRef<unsigned> permutation = {},
-                                           OperationFolder *folder = nullptr);
-
-/// Performs standalone tiling of a single LinalgOp by constant `tileSizes`.
-/// and permute the loop nest according to `permutation`
-/// The permutation is expressed as a list of integers that specify
-/// the new ordering of the loop nest. The length of `permutation`
-/// must be equal to the length of `tileSizes`.
-/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
-/// `permutation = [1,2,0]`. All values in `permutation` must be
-/// integers, in the range 0..`tileSizes.size()` without duplications
-/// (i.e. `[1,1,2]` is an invalid permutation). An empty list
-/// states for the identity permutation.
-/// Returns a struct containing the tiled loops in the specified order
-/// and the cloned op if successful, llvm::None otherwise.
-/// When non-null, the optional pointer `folder` is used to call into the
-/// `createAndFold` builder method. If `folder` is null, the regular `create`
-/// method is called.
-llvm::Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
-                                           ArrayRef<int64_t> tileSizes,
-                                           ArrayRef<unsigned> permutation = {},
-                                           OperationFolder *folder = nullptr);
-
-template <typename... Args>
-llvm::Optional<TiledLinalgOp> tileLinalgOperation(OpBuilder &b, Operation *op,
-                                                  Args... args) {
-  return tileLinalgOp(b, cast<LinalgOp>(op), args...);
-}
-
-struct PromotionInfo {
-  Value *buffer;
-  Value *fullLocalView;
-  Value *partialLocalView;
-};
-
-/// Promotes the `subViews` into a new buffer allocated at the insertion point
-/// `b`. For now, promotion occurs in 3 steps:
-///   1. Create a new buffer for a full tile (i.e. not clipped at the boundary).
-///   2. Take a full view on the buffer and `linalg.fill` it with zeros (use
-///      float zero for now).
-///   3. Take a partial slice of the full view in step 2. and copy into it.
-/// Infers statically sized buffers from subViews unless `dynamicBuffers` is
-/// true.
-///
-/// Returns a list of PromotionInfo which hold the promoted buffer and the
-/// full and partial views indexing into the buffer.
-llvm::SmallVector<PromotionInfo, 8>
-promoteSubViews(OpBuilder &b, Location loc, ArrayRef<Value *> subViews,
-                bool dynamicBuffers = false, OperationFolder *folder = nullptr);
-
-/// Returns all the operands of `linalgOp` that are not views.
-/// Asserts that these operands are value types to allow transformations like
-/// tiling to just use the values when cloning `linalgOp`.
-llvm::SmallVector<Value *, 4> getAssumedNonViewOperands(LinalgOp linalgOp);
-
-/// Apply the permutation defined by `permutation` to `inVec`.
-/// Element `i` in `inVec` is mapped to location `j = permutation[i]`.
-/// E.g.: for an input vector `inVec = ['a', 'b', 'c']` and a permutation vector
-/// `permutation = [2, 0, 1]`, this function leaves `inVec = ['c', 'a', 'b']`.
-template <typename T, unsigned N>
-void applyPermutationToVector(SmallVector<T, N> &inVec,
-                              ArrayRef<unsigned> permutation) {
-  SmallVector<T, N> auxVec(inVec.size());
-  for (unsigned i = 0; i < permutation.size(); ++i)
-    auxVec[i] = inVec[permutation[i]];
-  inVec = auxVec;
-}
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_UTILS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt
deleted file mode 100644
index 9f5863f2be9..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_mlir_dialect(LoopOps)
diff --git a/third_party/mlir/include/mlir/Dialect/LoopOps/LoopOps.h b/third_party/mlir/include/mlir/Dialect/LoopOps/LoopOps.h
deleted file mode 100644
index fdadf4a40dd..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LoopOps/LoopOps.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===- Ops.h - Loop MLIR Operations -----------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines convenience types for working with loop operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_LOOPOPS_OPS_H_
-#define MLIR_LOOPOPS_OPS_H_
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/Transforms/LoopLikeInterface.h"
-
-namespace mlir {
-namespace loop {
-
-class TerminatorOp;
-
-class LoopOpsDialect : public Dialect {
-public:
-  LoopOpsDialect(MLIRContext *context);
-  static StringRef getDialectNamespace() { return "loop"; }
-};
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LoopOps/LoopOps.h.inc"
-
-// Insert `loop.terminator` at the end of the only region's only block if it
-// does not have a terminator already.  If a new `loop.terminator` is inserted,
-// the location is specified by `loc`. If the region is empty, insert a new
-// block first.
-void ensureLoopTerminator(Region &region, Builder &builder, Location loc);
-
-/// Returns the loop parent of an induction variable. If the provided value is
-/// not an induction variable, then return nullptr.
-ForOp getForInductionVarOwner(Value *val);
-
-} // end namespace loop
-} // end namespace mlir
-#endif // MLIR_LOOPOPS_OPS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/LoopOps/LoopOps.td b/third_party/mlir/include/mlir/Dialect/LoopOps/LoopOps.td
deleted file mode 100644
index 5e0b8098411..00000000000
--- a/third_party/mlir/include/mlir/Dialect/LoopOps/LoopOps.td
+++ /dev/null
@@ -1,156 +0,0 @@
-//===- Ops.td - Loop operation definitions ---------------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines MLIR loop operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LOOP_OPS
-#define LOOP_OPS
-
-include "mlir/IR/OpBase.td"
-include "mlir/Transforms/LoopLikeInterface.td"
-
-def Loop_Dialect : Dialect {
-  let name = "loop";
-  let cppNamespace = "";
-}
-
-// Base class for Loop dialect ops.
-class Loop_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<Loop_Dialect, mnemonic, traits> {
-  // For every standard op, there needs to be a:
-  //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
-  //   * LogicalResult verify(${C++ class of Op} op)
-  //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
-  //                                         OperationState &result)
-  // functions.
-  let printer = [{ return ::print(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-}
-
-def ForOp : Loop_Op<"for",
-      [DeclareOpInterfaceMethods<LoopLikeOpInterface>,
-       SingleBlockImplicitTerminator<"TerminatorOp">]> {
-  let summary = "for operation";
-  let description = [{
-    The "loop.for" operation represents a loop nest taking 3 SSA value as
-    operands that represent the lower bound, upper bound and step respectively.
-    The operation defines an SSA value for its induction variable. It has one
-    region capturing the loop body. The induction variable is represented as an
-    argument of this region. This SSA value always has type index, which is the
-    size of the machine word. The step is a value of type index, required to be
-    positive.
-    The lower and upper bounds specify a half-open range: the range includes the
-    lower bound but does not include the upper bound.
-
-    The body region must contain exactly one block that terminates with
-    "loop.terminator".  Calling ForOp::build will create such region and insert
-    the terminator, so will the parsing even in cases when it is absent from the
-    custom format. For example:
-
-       loop.for %iv = %lb to %ub step %step {
-         ... // body
-       }
-  }];
-  let arguments = (ins Index:$lowerBound, Index:$upperBound, Index:$step);
-  let regions = (region SizedRegion<1>:$region);
-
-  let skipDefaultBuilders = 1;
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, "
-              "Value *lowerBound, Value *upperBound, Value *step">
-  ];
-
-  let extraClassDeclaration = [{
-    Block *getBody() { return &region().front(); }
-    Value *getInductionVar() { return getBody()->getArgument(0); }
-    OpBuilder getBodyBuilder() {
-      return OpBuilder(getBody(), std::prev(getBody()->end()));
-    }
-    void setLowerBound(Value *bound) { getOperation()->setOperand(0, bound); }
-    void setUpperBound(Value *bound) { getOperation()->setOperand(1, bound); }
-    void setStep(Value *step) { getOperation()->setOperand(2, step); }
-  }];
-}
-
-def IfOp : Loop_Op<"if",
-      [SingleBlockImplicitTerminator<"TerminatorOp">]> {
-  let summary = "if-then-else operation";
-  let description = [{
-    The "loop.if" operation represents an if-then-else construct for
-    conditionally executing two regions of code. The operand to an if operation
-    is a boolean value. The operation produces no results. For example:
-
-       loop.if %b  {
-         ...
-       } else {
-         ...
-       }
-
-    The 'else' block is optional, and may be omitted. For
-    example:
-
-       loop.if %b  {
-         ...
-       }
-  }];
-  let arguments = (ins I1:$condition);
-  let regions = (region SizedRegion<1>:$thenRegion, AnyRegion:$elseRegion);
-
-  let skipDefaultBuilders = 1;
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &result, "
-              "Value *cond, bool withElseRegion">
-  ];
-
-  let extraClassDeclaration = [{
-    OpBuilder getThenBodyBuilder() {
-      assert(!thenRegion().empty() && "Unexpected empty 'then' region.");
-      Block &body = thenRegion().front();
-      return OpBuilder(&body, std::prev(body.end()));
-    }
-    OpBuilder getElseBodyBuilder() {
-      assert(!elseRegion().empty() && "Unexpected empty 'else' region.");
-      Block &body = elseRegion().front();
-      return OpBuilder(&body, std::prev(body.end()));
-    }
-  }];
-}
-
-def TerminatorOp :
-    Loop_Op<"terminator", [NativeOpTrait<"IsTerminator">]> {
-  let summary = "cf terminator operation";
-  let description = [{
-    "loop.terminator" is a special terminator operation for blocks inside
-    loops. It terminates the region. This operation does _not_ have a custom
-    syntax. However, `std` control operations omit the terminator in their
-    custom syntax for brevity.
-
-       loop.terminator
-  }];
-
-  // No custom parsing/printing form.
-  let parser = ?;
-  let printer = ?;
-
-  // Fully specified by traits.
-  let verifier = ?;
-}
-
-#endif // LOOP_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/QuantOps/CMakeLists.txt
deleted file mode 100644
index f95532ecf6e..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_mlir_dialect(QuantOps)
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/FakeQuantSupport.h b/third_party/mlir/include/mlir/Dialect/QuantOps/FakeQuantSupport.h
deleted file mode 100644
index 23e2967bd77..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/FakeQuantSupport.h
+++ /dev/null
@@ -1,76 +0,0 @@
-//===- FakeQuantSupport.h - Support utilities for FakeQuant ops -*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines support utilities for interoperating with FakeQuant* based
-// QAT (Quantized Aware Training) computations, as implemented by TFLite. Note
-// that FakeQuant* operators mix multiple concerns specific to how TFLite
-// originally implemented quantization. As such, utilities here enforce
-// opinions taken by that codebase (vs providing any amount of genericity).
-//
-// Specifically, it combines the following concerns, each of which would be
-// independent variables in a more generic setup:
-//   - numBits and isSigned imply storage data type (uint8, int8, int16)
-//   - numBits < 8 is promoted to uint8 or int8
-//   - "narrow_range" narrows the lower bound of the storage type's range by
-//     1
-//   - the specified min/max values are "nudged" so that the result has a zero
-//     that can be exactly expressed
-//   - min=max=0 implies scale=0 and zero_point=0
-//
-// With the above assumptions applied, every conforming specified FakeQuant op
-// can be represented by a UniformQuantizedType. This scheme is not expected to
-// be generalized further in the future and should be considered to be a
-// legacy set of rules.
-//
-// As canonically used in TensorFlow graphs, the presence of a FakeQuant node
-// is a hint that the specific math represented here has been simulated at
-// training time. As such, it is usually not advised to arbitrarily change
-// quantization parameters derived from FakeQuant.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_QUANTOPS_FAKEQUANTSUPPORT_H_
-#define MLIR_DIALECT_QUANTOPS_FAKEQUANTSUPPORT_H_
-
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-
-namespace mlir {
-namespace quant {
-
-/// Converts per-layer FakeQuant attributes to the corresponding type.
-/// In the event that the parameters cannot be converted, returns a nullptr
-/// convertible Type and issues an appropriate error.
-/// Note that there are multiple variants of a per-layer FakeQuant op, so
-/// this function takes the attributes discretely vs taking a reference to the
-/// originating op.
-UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits,
-                                          double rmin, double rmax,
-                                          bool narrowRange, Type expressedType,
-                                          bool isSigned = false);
-
-/// Converts per-channel FakeQuant attributes to the corresponding type.
-/// In the event that the parameters cannot be converted, returns a nullptr
-/// convertible Type and issues an appropriate error.
-UniformQuantizedPerAxisType
-fakeQuantAttrsToType(Location loc, unsigned numBits, int32_t quantizedDimension,
-                     ArrayRef<double> rmins, ArrayRef<double> rmax,
-                     bool narrowRange, Type expressedType,
-                     bool isSigned = false);
-} // namespace quant
-} // namespace mlir
-
-#endif // MLIR_DIALECT_QUANTOPS_FAKEQUANTSUPPORT_H_
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/Passes.h b/third_party/mlir/include/mlir/Dialect/QuantOps/Passes.h
deleted file mode 100644
index c57d7bf41fe..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/Passes.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- Passes.h - Quantization Passes ------ --------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines all of the passes owned by the quantization dialect. As
-// things mature, it is expected that passes specific to certain frontend or
-// backend dialects will move to those dialects directly. For now, they are
-// incubated here.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_QUANTOPS_PASSES_H
-#define MLIR_DIALECT_QUANTOPS_PASSES_H
-
-#include <memory>
-
-namespace mlir {
-class FuncOp;
-template <typename T> class OpPassBase;
-
-namespace quant {
-
-/// Creates a pass that converts quantization simulation operations (i.e.
-/// FakeQuant and those like it) to casts into/out of supported QuantizedTypes.
-std::unique_ptr<OpPassBase<FuncOp>> createConvertSimulatedQuantPass();
-
-/// Creates a pass that converts constants followed by a qbarrier to a
-/// constant whose value is quantized. This is typically one of the last
-/// passes done when lowering to express actual quantized arithmetic in a
-/// low level representation. Because it modifies the constant, it is
-/// destructive and cannot be undone.
-std::unique_ptr<OpPassBase<FuncOp>> createConvertConstPass();
-
-} // namespace quant
-} // namespace mlir
-
-#endif // MLIR_DIALECT_QUANTOPS_PASSES_H
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantOps.h b/third_party/mlir/include/mlir/Dialect/QuantOps/QuantOps.h
deleted file mode 100644
index 020d34918d4..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantOps.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- QuantOps.h - Quantization Ops and Types ------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_QUANTOPS_QUANTOPS_H_
-#define MLIR_DIALECT_QUANTOPS_QUANTOPS_H_
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "llvm/Support/MathExtras.h"
-
-namespace mlir {
-namespace quant {
-
-/// Defines the 'Quantization' dialect
-class QuantizationDialect : public Dialect {
-public:
-  QuantizationDialect(MLIRContext *context);
-
-  /// Parse a type registered to this dialect.
-  Type parseType(DialectAsmParser &parser) const override;
-
-  /// Print a type registered to this dialect.
-  void printType(Type type, DialectAsmPrinter &os) const override;
-};
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/QuantOps/QuantOps.h.inc"
-
-} // namespace quant
-} // namespace mlir
-
-#endif // MLIR_DIALECT_QUANTOPS_QUANTOPS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantOps.td b/third_party/mlir/include/mlir/Dialect/QuantOps/QuantOps.td
deleted file mode 100644
index 072715d65aa..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantOps.td
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- QuantOps.td - Quantization operation definition -----*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the operation definition file for Quantization.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef DIALECT_QUANTOPS_QUANT_OPS_
-#define DIALECT_QUANTOPS_QUANT_OPS_
-
-include "mlir/IR/OpBase.td"
-include "mlir/Dialect/QuantOps/QuantPredicates.td"
-
-def quant_Dialect : Dialect {
-  let name = "quant";
-}
-
-//===----------------------------------------------------------------------===//
-// Base classes
-//===----------------------------------------------------------------------===//
-
-class quant_Op<string mnemonic, list<OpTrait> traits> :
-    Op<quant_Dialect, mnemonic, traits>;
-
-//===----------------------------------------------------------------------===//
-// Quantization casts
-//===----------------------------------------------------------------------===//
-// A QuantizeCast (qcast) represents a potential type shift from a quantizable
-// type to a quantized type.
-//
-// At runtime, a qcast will apply the transformation expressed by its
-// operand and result type. For flexibility during transformation, it is also
-// possible to have a qcast that performs no transformation (both its
-// operand and result type are quantizable).
-//
-// A qcast will typically originate from either:
-//   a) An expressed or implied constraint in the source dialect which signals
-//      that a certain level of quantization is possible or required.
-//   b) An inference made by a quantization algorithm indicating that a
-//      quantized representation may be acceptable.
-//
-// Especially early in transformation, it is common to have pairs of
-// qcast/dcast at points where a transition to a quantized type is
-// required. In addition, it is also common to have an identity qcast
-// (where the operand and result type are not quantized) at all points where
-// it is legal to use a quantized representation (but is not known to be
-// acceptable).
-def quant_QuantizeCastOp : quant_Op<"qcast", [NoSideEffect]> {
-  let arguments = (ins quant_RealValueType:$arg);
-  let results = (outs quant_RealValueType);
-}
-
-// A DequantizeCast op (dcast) represents the inverse of a qcast,
-// converting back from a quantized to quantizable (expressed) type.
-//
-// Like qcasts, a dcast is allowed to have both its operand and result
-// as non quantized types. This facilitates transformations and marks edges
-// where the computation must be carried out in the expressed type.
-//
-// Especially early in transformation, it is common to have dcasts on
-// all operands to ops that must operate with the expressed type (typically
-// math ops prior to lowering to target-specific, quantized kernels).
-def quant_DequantizeCastOp : quant_Op<"dcast", [NoSideEffect]> {
-  let arguments = (ins quant_RealValueType:$arg);
-  let results = (outs quant_RealValueType);
-}
-
-// A StorageCast (scast) represents a cast from or to a type based on the
-// storage type and a type based on a corresponding quantized type.
-//
-// This op exists to ensure type coherency for between parts of the computation
-// which are operating directly on an underlying storage type and those which
-// operate on quantized values.
-//
-// Examples from storage to quantized type:
-//   i8 -> !quant<"uniform[i8:f32]{1.0}">
-//   tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
-//   vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">>
-def quant_StorageCastOp : quant_Op<"scast", [NoSideEffect]> {
-  let arguments = (ins quant_RealOrStorageValueType:$arg);
-  let results = (outs quant_RealOrStorageValueType);
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Training integration and instrumentation ops
-//===----------------------------------------------------------------------===//
-
-def quant_ConstFakeQuant : quant_Op<"const_fake_quant",
-                                    [SameOperandsAndResultType, NoSideEffect]> {
-  let summary =
-      "Simulates the effect of uniform quantization with const range.";
-
-  let description = [{
-    Given a const min, max, num_bits and narrow_range attribute, applies the
-    same uniform quantization simulation as is done by the TensorFlow
-    fake_quant_with_min_max_args op. See the fakeQuantAttrsToType() utility
-    method and the quant-convert-simulated-quantization pass for futher details.
-  }];
-
-  let arguments = (ins
-    F32Tensor:$inputs,
-    F32Attr:$min,
-    F32Attr:$max,
-    // The bitwidth of the quantization; between 2 and 16, inclusive.
-    I64Attr:$num_bits,
-    // Quantization range starts from 0 or 1; starts from 1 if true.
-    DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
-    // The sign of the quantization.
-    DefaultValuedAttr<BoolAttr, "false">:$is_signed
-  );
-
-  let results = (outs
-    F32Tensor:$outputs
-  );
-}
-
-def quant_ConstFakeQuantPerAxis : quant_Op<"const_fake_quant_per_axis",
-                                    [SameOperandsAndResultType, NoSideEffect]> {
-  let summary =
-      "Simulates the effect of per axis uniform quantization with const range.";
-
-  let description = [{
-    Given a const min, max, num_bits and narrow_range attribute, applies the
-    same per axis uniform quantization simulation as is done by the TensorFlow
-    fake_quant_with_min_max_vars_per_channel op. See the fakeQuantAttrsToType()
-    utility method and the quant-convert-simulated-quantization pass for futher
-    details.
-  }];
-
-  let arguments = (ins
-    F32Tensor:$inputs,
-    F32ArrayAttr:$min,
-    F32ArrayAttr:$max,
-    // The quantized dimension of the inputs tensor.
-    I64Attr:$axis,
-    // The bitwidth of the quantization; between 2 and 16, inclusive.
-    I64Attr:$num_bits,
-    // Quantization range starts from 0 or 1; starts from 1 if true.
-    DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
-    // The sign of the quantization.
-    DefaultValuedAttr<BoolAttr, "false">:$is_signed
-  );
-
-  let results = (outs
-    F32Tensor:$outputs
-  );
-}
-
-def quant_StatisticsRefOp : quant_Op<"stats_ref", [SameOperandsAndResultType]> {
-  let summary =
-      "Indicates that statistics are resolved by reference.";
-
-  let description = [{
-    This op acts as an identity that, when encountered at runtime, should result
-    in statistics being collected about about the value of its operand/result.
-    Such statistics will be stored with the provided key, allowing this node
-    to later be converted to a 'stats' op if statistics with that key have been
-    encountered.
-  }];
-
-  let arguments = (ins
-    quant_RealValueType:$arg,
-    StrAttr:$statsKey
-  );
-  let results = (outs quant_RealValueType);
-}
-
-def quant_StatisticsOp : quant_Op<"stats", [SameOperandsAndResultType]> {
-  let summary =
-      "Identity op which associates statistics with the value.";
-
-  let description = [{
-    Associates statistics about the runtime ranges of values observed for
-    evaluations of this node.
-
-    Statistics about the entire type are reported in the 'layerStats' attribute
-    and those for each axis, in the (optional) `axisStats` attribute. The
-    interpretation of each is determined by the last dimension of its shape.
-    Currently, only dim=2 is supported, which is interpreted as [min, max].
-
-    `layerStats` must be a rank 1 tensor: [2]
-    `axisStats` must be a rank 2 tensor: [N, 2], where N=the slice size
-      splitted by the `axis` dimension. For example:
-      <?x?x3x2>, axis=3 => N=2
-      <?x?x3x2>, axis=2 => N=6
-  }];
-
-  let arguments = (ins
-    quant_RealValueType:$arg,
-    ElementsAttr:$layerStats,
-    OptionalAttr<ElementsAttr>:$axisStats,
-    OptionalAttr<I64Attr>:$axis);
-  let results = (outs quant_RealValueType);
-
-  let verifier = [{
-    auto tensorArg = arg()->getType().dyn_cast<TensorType>();
-    if (!tensorArg) return emitOpError("arg needs to be tensor type.");
-
-    // Verify layerStats attribute.
-    {
-      auto layerStatsType = layerStats().getType();
-      if (!layerStatsType.getElementType().isa<FloatType>()) {
-        return emitOpError(
-            "layerStats must have a floating point element type");
-      }
-      if (layerStatsType.getRank() != 1 || layerStatsType.getDimSize(0) != 2) {
-        return emitOpError("layerStats must have shape [2]");
-      }
-    }
-    // Verify axisStats (optional) attribute.
-    if (axisStats()) {
-      if (!axis()) return emitOpError("axis must be specified for axisStats");
-
-      auto shape = tensorArg.getShape();
-      auto argSliceSize = std::accumulate(std::next(shape.begin(),
-        axis()->getSExtValue()), shape.end(), 1, std::multiplies<int64_t>());
-
-      auto axisStatsType = axisStats()->getType();
-      if (!axisStatsType.getElementType().isa<FloatType>()) {
-        return emitOpError("axisStats must have a floating point element type");
-      }
-      if (axisStatsType.getRank() != 2 ||
-          axisStatsType.getDimSize(1) != 2 ||
-          axisStatsType.getDimSize(0) != argSliceSize) {
-        return emitOpError("axisStats must have shape [N,2] "
-                           "where N = the slice size defined by the axis dim");
-      }
-    }
-    return success();
-  }];
-}
-
-def quant_CoupledRefOp : quant_Op<"coupled_ref", [SameOperandsAndResultType]> {
-  let summary =
-      "Indicates that one point of the computation is coupled to another.";
-
-  let description = [{
-    Ordinarily, relationships between ops for the purposes of determining
-    compatible quantized types is explicit based on the use-def chain. However,
-    in some situations, a use may be separated from its def by arbitrary
-    external connections. In such a case, during analysis, all coupled_ref
-    nodes in a module which share a coupledKey will be considered to be
-    directly connected as via an identity op for the purpose of type inference.
-  }];
-
-  let arguments = (ins
-    quant_RealValueType:$arg,
-    StrAttr:$coupledKey);
-  let results = (outs quant_RealValueType);
-}
-
-#endif // DIALECT_QUANTOPS_QUANT_OPS_
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantPredicates.td b/third_party/mlir/include/mlir/Dialect/QuantOps/QuantPredicates.td
deleted file mode 100644
index 2fbb7995dd4..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantPredicates.td
+++ /dev/null
@@ -1,72 +0,0 @@
-//===- QuantPredicates.td - Predicates for dialect types ---*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Predicates for types in the Quantization dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef DIALECT_QUANTOPS_QUANT_PREDICATES_
-#define DIALECT_QUANTOPS_QUANT_PREDICATES_
-
-//===----------------------------------------------------------------------===//
-// Quantization type definitions
-//===----------------------------------------------------------------------===//
-
-class quant_TypedPrimitiveOrContainer<Type etype> :
-    Type<Or<[etype.predicate,
-                TensorOf<[etype]>.predicate,
-                VectorOf<[etype]>.predicate]>,
-         "primitive/tensor/vector of " # etype.description>;
-
-// An implementation of QuantizedType.
-def quant_QuantizedType :
-    Type<CPred<"$_self.isa<mlir::quant::QuantizedType>()">, "QuantizedType">;
-
-// A primitive type that can represent a real value. This is either a
-// floating point value or a quantized type.
-def quant_RealPrimitiveType :
-    Type<Or<[AnyFloat.predicate, quant_QuantizedType.predicate]>,
-    "real valued primitive (float or quantized type)">;
-
-// A primitive type that can represent a storage value. This is either an
-// integer or quantized type.
-def quant_StoragePrimitiveType :
-    Type<Or<[AnyInteger.predicate, quant_QuantizedType.predicate]>,
-    "quantized storage primitive (integer or quantized type)">;
-
-// A primitive or container of RealPrimitiveType.
-def quant_RealValueType :
-    quant_TypedPrimitiveOrContainer<quant_RealPrimitiveType>;
-
-// A primitive or container of StoragePrimitiveType.
-def quant_StorageValueType :
-    quant_TypedPrimitiveOrContainer<quant_StoragePrimitiveType>;
-
-// Either a real valued or storage primitive or container type.
-def quant_RealOrStorageValueType :
-    Type<Or<[quant_RealValueType.predicate,
-                quant_StorageValueType.predicate]>>;
-
-// An implementation of UniformQuantizedType.
-def quant_UniformQuantizedType :
-    Type<CPred<"$_self.isa<UniformQuantizedType>()">, "UniformQuantizedType">;
-
-// Predicate for detecting a container or primitive of UniformQuantizedType.
-def quant_UniformQuantizedValueType :
-    quant_TypedPrimitiveOrContainer<quant_UniformQuantizedType>;
-
-#endif // DIALECT_QUANTOPS_QUANT_PREDICATES_
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantTypes.h b/third_party/mlir/include/mlir/Dialect/QuantOps/QuantTypes.h
deleted file mode 100644
index a681d16c3ee..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantTypes.h
+++ /dev/null
@@ -1,411 +0,0 @@
-//===- QuantTypes.h - Quantization Ops and Types ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_QUANTOPS_QUANT_TYPES_H_
-#define MLIR_DIALECT_QUANTOPS_QUANT_TYPES_H_
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "llvm/Support/MathExtras.h"
-
-namespace mlir {
-namespace quant {
-
-class QuantizedIntegerType;
-
-namespace detail {
-
-struct QuantizedTypeStorage;
-struct AnyQuantizedTypeStorage;
-struct UniformQuantizedTypeStorage;
-struct UniformQuantizedPerAxisTypeStorage;
-
-} // namespace detail
-
-namespace QuantizationTypes {
-enum Kind {
-  Any = Type::FIRST_QUANTIZATION_TYPE,
-  UniformQuantized,
-  UniformQuantizedPerAxis,
-  LAST_USED_QUANTIZATION_TYPE = UniformQuantizedPerAxis,
-};
-} // namespace QuantizationTypes
-
-/// Enumeration of bit-mapped flags related to quantized types.
-namespace QuantizationFlags {
-enum FlagValue {
-  // Indicates that the storage type should be interpreted as a signed
-  // integer. The default is to interpret it as an unsigned value.
-  Signed = 1,
-};
-} // namespace QuantizationFlags
-
-/// Base class for all quantized types known to this dialect.
-/// All quantized types have:
-///   - storageType: The (narrower) numeric type that is being used to
-///     approximate some expressed type.
-///   - expressedType: The type that is being approximated.
-///
-/// The base class provides generic support for manipulating the types based
-/// on these fields.
-class QuantizedType : public Type {
-public:
-  using ImplType = detail::QuantizedTypeStorage;
-  using Type::Type;
-
-  /// The maximum number of bits supported for storage types.
-  static constexpr unsigned MaxStorageBits = 32;
-
-  static LogicalResult
-  verifyConstructionInvariants(Optional<Location> loc, MLIRContext *context,
-                               unsigned flags, Type storageType,
-                               Type expressedType, int64_t storageTypeMin,
-                               int64_t storageTypeMax);
-
-  /// Support method to enable LLVM-style type casting.
-  static bool classof(Type type) {
-    return type.getKind() >= Type::FIRST_QUANTIZATION_TYPE &&
-           type.getKind() <= QuantizationTypes::LAST_USED_QUANTIZATION_TYPE;
-  }
-
-  /// Gets the minimum possible stored by a storageType. storageTypeMin must
-  /// be greater than or equal to this value.
-  static int64_t getDefaultMinimumForInteger(bool isSigned,
-                                             unsigned integralWidth) {
-    if (isSigned) {
-      return llvm::minIntN(integralWidth);
-    }
-    return 0;
-  }
-
-  /// Gets the maximum possible stored by a storageType. storageTypeMax must
-  /// be less than or equal to this value.
-  static int64_t getDefaultMaximumForInteger(bool isSigned,
-                                             unsigned integralWidth) {
-    if (isSigned) {
-      return llvm::maxIntN(integralWidth);
-    }
-    return llvm::maxUIntN(integralWidth);
-  }
-
-  /// Gets the original expressed type that this quantized type approximates.
-  /// Note that this presumes that the quantized type was always derived from
-  /// a floating point type, which in the broadest definition, is not true (i.e.
-  /// it could be some form of integral, fixed type or affine type in its own
-  /// right); however, at the high level, no examples of such usage are
-  /// presently known and the restriction serves some useful purposes (such as
-  /// always being able to reverse a transformation or measure error). In most
-  /// cases, this will be f32.
-  Type getExpressedType() const;
-
-  /// Gets the flags associated with this type. Typically a more specific
-  /// accessor is appropriate.
-  unsigned getFlags() const;
-
-  // Convenience helpers.
-  /// Whether the storage type should be interpreted as a signed quantity
-  /// (true) or an unsigned value (false).
-  bool isSigned() const {
-    return (getFlags() & QuantizationFlags::Signed) ==
-           QuantizationFlags::Signed;
-  }
-
-  /// Gets the underlying type used for to store values. Note that this may
-  /// be signed or unsigned. Use the isSigned() accessor to differentiate.
-  Type getStorageType() const;
-
-  /// The minimum value that storageType can take.
-  int64_t getStorageTypeMin() const;
-
-  /// The maximum value that storageType can take.
-  int64_t getStorageTypeMax() const;
-
-  /// Gets the integral bit width that the underlying storage type can exactly
-  /// represent. For integral storage types, this will just be their width.
-  unsigned getStorageTypeIntegralWidth() const;
-
-  /// Returns whether the candidateExpressedType is a match for this
-  /// QuantizedType. This will be true if the candidate type is either a
-  /// primitive type or a container type whose element type equals this
-  /// QuantizedType's expressed type.
-  /// Examples of compatible candidateExpressedType:
-  ///   !quant.uniform<i8:f32, 1.0> =~ f32
-  ///   !quant.uniform<i8:f32, 1.0> =~ tensor<4xf32>
-  bool isCompatibleExpressedType(Type candidateExpressedType);
-
-  /// Returns the element type as a QuantizedType or nullptr if it is not
-  /// a quantized type. If the type is primitive, returns that. If it is a
-  /// container (vector/tensor), return the element type.
-  /// Examples:
-  ///   !quant.uniform<i8:f32, 1.0> -> !quant.uniform<i8:f32, 1.0>
-  ///   tensor<4x!quant.uniform<i8:f32, 1.0> -> quant.uniform<i8:f32, 1.0>
-  static QuantizedType getQuantizedElementType(Type primitiveOrContainerType);
-
-  /// Casts from a type based on the storageType to a corresponding type based
-  /// on this type (returns nullptr if the cast is not valid).
-  /// Examples:
-  ///   i8 -> !quant.uniform<i8:f32, 1.0>
-  ///   tensor<4xi8> -> tensor<4x!quant.uniform<i8:f32, 1.0}>>
-  ///   vector<4xi8> -> vector<4x!quant.uniform<i8:f32, 1.0>>
-  Type castFromStorageType(Type candidateType);
-
-  /// Casts from a type based on a QuantizedType to a corresponding type based
-  /// on the storageType (returns nullptr if the cast is not valid).
-  /// This is the inverse of castFromStorageType().
-  static Type castToStorageType(Type quantizedType);
-
-  /// Casts from a type based on the expressedType to a corresponding type based
-  /// on this type (returns nullptr if the cast is not valid).
-  /// Examples:
-  ///   f32 -> !quant.uniform<i8:f32, 1.0>
-  ///   tensor<4xf32> -> tensor<4x!quant.uniform<i8:f32, 1.0>>
-  ///   vector<4xf32> -> vector<4x!quant.uniform<i8:f32, 1.0>>
-  Type castFromExpressedType(Type candidateType);
-
-  /// Casts from a type based on QuantizedType to a corresponding type based
-  /// on the expressedType (returns nullptr if the cast is not valid).
-  /// This is the inverse of castFromExpressedType.
-  static Type castToExpressedType(Type quantizedType);
-
-  /// Casts from a type based on the expressedType to the equivalent type
-  /// based on storageType by way of this QuantizedType. Equivalent to:
-  ///   QuantizedType::castToStorageType(castFromExpressedType(candidateType))
-  /// (but with validity checks).
-  /// Example (for this = !quant.uniform<i8:f32, 1.0>):
-  ///   tensor<4xf32> -> tensor<4xi8>
-  Type castExpressedToStorageType(Type candidateType);
-
-private:
-  /// Hide the following methods inherited from `Type`. It is almost certainly
-  /// a bug to call them from a `QuantizedType` object. Users should call
-  /// `getStorageType` or `getExpressedType` to get the underlying types
-  /// they want to inspect.
-  using Type::isBF16;
-  using Type::isF16;
-  using Type::isF32;
-  using Type::isF64;
-  using Type::isIndex;
-  using Type::isInteger;
-};
-
-/// A quantized type that maps storage to/from expressed types in an
-/// unspecified way.
-///
-/// Typical syntax:
-///   quant.any<i8:f32>
-///   quant.any<i8>
-///   quant.any<i8<-16,15>>
-///
-/// Note that for the any type, the expressed type is optional.
-class AnyQuantizedType
-    : public Type::TypeBase<AnyQuantizedType, QuantizedType,
-                            detail::AnyQuantizedTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Support method to enable LLVM-style type casting.
-  static bool kindof(unsigned kind) { return kind == QuantizationTypes::Any; }
-
-  /// Gets an instance of the type with all parameters specified but not
-  /// checked.
-  static AnyQuantizedType get(unsigned flags, Type storageType,
-                              Type expressedType, int64_t storageTypeMin,
-                              int64_t storageTypeMax);
-
-  /// Gets an instance of the type with all specified parameters checked.
-  /// Returns a nullptr convertible type on failure.
-  static AnyQuantizedType getChecked(unsigned flags, Type storageType,
-                                     Type expressedType, int64_t storageTypeMin,
-                                     int64_t storageTypeMax, Location location);
-
-  /// Verifies construction invariants and issues errors/warnings.
-  static LogicalResult
-  verifyConstructionInvariants(Optional<Location> loc, MLIRContext *context,
-                               unsigned flags, Type storageType,
-                               Type expressedType, int64_t storageTypeMin,
-                               int64_t storageTypeMax);
-};
-
-/// Represents a family of uniform, quantized types.
-///
-/// Each instance of this type expresses a mapping between real values (most
-/// often expressed in floating point f32) and quantized values (either fixed
-/// point or affine).
-///
-/// The relationship is:
-///     real_value = scale * (quantized_value - zero_point)
-///
-/// It is used as part of high level graph transformations that have the goal
-/// of re-expressing parts of a computation in terms of this common form for
-/// more efficient execution at runtime. In addition, it is designed to be
-/// expressive enough to facilitate lowering to precise types and operations
-/// in target hardware.
-///
-/// As a high-level type, focused on intermediate passes, this type holds
-/// opinions consistent with high-level usage. If lowering math kernels below
-/// the high level arithmetic ops (i.e. to LLVM IR or hardware specific
-/// instruction sets), it is expected that the information expressed here
-/// will be used to drive low level codegen and target specific type selection,
-/// but this type will likely be erased in the process.
-///
-/// Syntax synopsis:
-///   Per-layer, all parameters expressed:
-///     !quant<uniform[StorageType:ExpressedType]{Scale:ZeroPoint}>
-///   Per-layer, optional parameters omitted:
-///     !quant<uniform[StorageType]{Scale}>
-///
-///   StorageType: 'i'|'u' NumBits
-///   ExpressedType: 'f16', 'f32', 'bf16', 'f64'
-///   Scale: A legal double value
-///   ZeroPoint: An integer value
-class UniformQuantizedType
-    : public Type::TypeBase<UniformQuantizedType, QuantizedType,
-                            detail::UniformQuantizedTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Gets an instance of the type with all parameters specified but not
-  /// checked.
-  static UniformQuantizedType get(unsigned flags, Type storageType,
-                                  Type expressedType, double scale,
-                                  int64_t zeroPoint, int64_t storageTypeMin,
-                                  int64_t storageTypeMax);
-
-  /// Gets an instance of the type with all specified parameters checked.
-  /// Returns a nullptr convertible type on failure.
-  static UniformQuantizedType
-  getChecked(unsigned flags, Type storageType, Type expressedType, double scale,
-             int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax,
-             Location location);
-
-  /// Verifies construction invariants and issues errors/warnings.
-  static LogicalResult verifyConstructionInvariants(
-      Optional<Location> loc, MLIRContext *context, unsigned flags,
-      Type storageType, Type expressedType, double scale, int64_t zeroPoint,
-      int64_t storageTypeMin, int64_t storageTypeMax);
-
-  /// Support method to enable LLVM-style type casting.
-  static bool kindof(unsigned kind) {
-    return kind == QuantizationTypes::UniformQuantized;
-  }
-
-  /// Gets the scale term. The scale designates the difference between the real
-  /// values corresponding to consecutive quantized values differing by 1.
-  double getScale() const;
-
-  /// Gets the storage value corresponding to the real value 0 in the affine
-  /// equation.
-  int64_t getZeroPoint() const;
-
-  // Fixed point values are real numbers divided by a scale.
-  // Currently, only signed storage types are treated as fixed point.
-  // A fixed point value can be obtained from an affine value by subtracting
-  // the zeroPoint.
-  // In the future, this may be explicit versus implied by type and zeroPoint.
-  bool isFixedPoint() const { return isSigned() && getZeroPoint() == 0; }
-};
-
-/// Represents per-axis (also known as per-channel quantization).
-///
-/// Syntax synopsis:
-///   Per-axis, all parameters expressed:
-///     !quant<uniform[StorageType:ExpressedType:QuantizedDim]{QuantParams}>
-///   Per-axis, optional parameters omitted:
-///     !quant<uniform[StorageType]{Scale}>
-///
-///   StorageType: 'i'|'u' NumBits
-///   ExpressedType: 'f16', 'f32', 'bf16', 'f64'
-///   QuantizedDim: An integer value
-///   QuantParams: (Scale ':' ZeroPoint)+
-///   Scale: A legal double value
-///   ZeroPoint: An integer value
-class UniformQuantizedPerAxisType
-    : public Type::TypeBase<UniformQuantizedPerAxisType, QuantizedType,
-                            detail::UniformQuantizedPerAxisTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Gets an instance of the type with all parameters specified but not
-  /// checked.
-  static UniformQuantizedPerAxisType
-  get(unsigned flags, Type storageType, Type expressedType,
-      ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
-      int32_t quantizedDimension, int64_t storageTypeMin,
-      int64_t storageTypeMax);
-
-  /// Gets an instance of the type with all specified parameters checked.
-  /// Returns a nullptr convertible type on failure.
-  static UniformQuantizedPerAxisType
-  getChecked(unsigned flags, Type storageType, Type expressedType,
-             ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
-             int32_t quantizedDimension, int64_t storageTypeMin,
-             int64_t storageTypeMax, Location location);
-
-  /// Verifies construction invariants and issues errors/warnings.
-  static LogicalResult verifyConstructionInvariants(
-      llvm::Optional<Location> loc, MLIRContext *context, unsigned flags,
-      Type storageType, Type expressedType, ArrayRef<double> scales,
-      ArrayRef<int64_t> zeroPoints, int32_t quantizedDimension,
-      int64_t storageTypeMin, int64_t storageTypeMax);
-
-  /// Support method to enable LLVM-style type casting.
-  static bool kindof(unsigned kind) {
-    return kind == QuantizationTypes::UniformQuantizedPerAxis;
-  }
-
-  /// Gets the quantization scales. The scales designate the difference between
-  /// the real values corresponding to consecutive quantized values differing
-  /// by 1. The ith scale corresponds to the ith slice in the
-  /// quantized_dimension.
-  ArrayRef<double> getScales() const;
-
-  /// Gets the storage values corresponding to the real value 0 in the affine
-  /// equation. The ith zero point corresponds to the ith slice in the
-  /// quantized_dimension.
-  ArrayRef<int64_t> getZeroPoints() const;
-
-  /// Specifies the dimension of the Tensor's shape that the scales and
-  /// zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
-  /// with quantization params:
-  ///   scales=[1.0, 2.0, 3.0], zeroPoints=[1, 2, 3], quantizedDimension=1
-  /// will be quantized across the second dimension of t.
-  ///   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
-  ///   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
-  ///   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
-  int32_t getQuantizedDimension() const;
-
-  /// Fixed point values are real numbers divided by a scale.
-  /// Currently, only signed storage types are treated as fixed point.
-  /// A fixed point value can be obtained from an affine value by subtracting
-  /// the zeroPoint.
-  /// In the future, this may be explicit versus implied by type and zeroPoint.
-  bool isFixedPoint() const {
-    if (!isSigned())
-      return false;
-    return llvm::all_of(getZeroPoints(),
-                        [](int64_t zeroPoint) { return zeroPoint != 0; });
-  }
-};
-
-} // namespace quant
-} // namespace mlir
-
-#endif // MLIR_DIALECT_QUANTOPS_QUANT_TYPES_H_
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantizeUtils.h b/third_party/mlir/include/mlir/Dialect/QuantOps/QuantizeUtils.h
deleted file mode 100644
index de87ca1e67c..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantizeUtils.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===- QuantizeUtils.h - Support utilities for quantization -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_QUANTOPS_QUANTIZEUTILS_H_
-#define MLIR_DIALECT_QUANTOPS_QUANTIZEUTILS_H_
-
-namespace mlir {
-class Attribute;
-class Type;
-
-namespace quant {
-class QuantizedType;
-class UniformQuantizedType;
-class UniformQuantizedValueConverter;
-
-/// Converts an attribute from a type based on
-/// quantizedElementType.getExpressedType() to one based on
-/// quantizedElementType.getStorageType(), where quantizedElementType is as from
-/// QuantizedType::getQuantizedElementType().
-/// Returns nullptr if the conversion is not supported. On success, stores the
-/// converted type in outConvertedType.
-///
-/// Examples:
-/// 1. realValue is a primitive value attribute:
-/// (realValue: FloatAttr, quantizedElementType: UniformQuantizedType[i8:f32])
-///   -> (IntegerAttr, outConvertedType: i8)
-/// 2. realValue is an elements attribute:
-/// (realValue: DenseElementsAttr[tensor<2x2xf32>],
-///  quantizedElementType: UniformQuantizedType[i8:f32])
-///   -> (DenseElementsAttr[tensor<2x2xi8>], outConvertedType: tensor<2x2xi8>)
-Attribute quantizeAttr(Attribute realValue, QuantizedType quantizedElementType,
-                       Type &outConvertedType);
-
-/// Converts an attribute from a type based on
-/// quantizedElementType.getExpressedType() to one based on
-/// quantizedElementType.getStorageType(), where quantizedElementType is as from
-/// QuantizedType::getQuantizedElementType() and casted to an
-/// UniformQuantizedType. Returns nullptr if the conversion is not supported. On
-/// success, stores the converted type in outConvertedType.
-///
-/// Examples:
-/// 1. realValue is a primitive value attribute:
-/// (realValue: FloatAttr, quantizedElementType: UniformQuantizedType[i8:f32])
-///   -> (IntegerAttr, outConvertedType: i8)
-/// 2. realValue is an elements attribute:
-/// (realValue: DenseElementsAttr[tensor<2x2xf32>],
-///  quantizedElementType: UniformQuantizedType[i8:f32])
-///   -> (DenseElementsAttr[tensor<2x2xi8>], outConvertedType: tensor<2x2xi8>)
-Attribute quantizeAttrUniform(Attribute realValue,
-                              UniformQuantizedType quantizedElementType,
-                              const UniformQuantizedValueConverter &converter,
-                              Type &outConvertedType);
-} // namespace quant
-} // namespace mlir
-
-#endif // MLIR_DIALECT_QUANTOPS_QUANTIZEUTILS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/UniformSupport.h b/third_party/mlir/include/mlir/Dialect/QuantOps/UniformSupport.h
deleted file mode 100644
index b68d40fb5a9..00000000000
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/UniformSupport.h
+++ /dev/null
@@ -1,228 +0,0 @@
-//===- UniformSupport.h - Support utilities for uniform quant ---*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_QUANTOPS_UNIFORMSUPPORT_H_
-#define MLIR_DIALECT_QUANTOPS_UNIFORMSUPPORT_H_
-
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/APSInt.h"
-
-namespace mlir {
-namespace quant {
-
-/// Performs type conversion from an arbitrary input type to a type
-/// that is expressed by a QuantizedType.
-///
-/// This handles cases where the inputType is a supported primitive type
-/// (i.e. f32, bf16, etc) or a vector/tensor type based on a supported
-/// elemental type.
-///
-/// Since conversion often involves introspecting some attributes of the
-/// input type in order to determine how to represent it, this is a two step
-/// process.
-struct ExpressedToQuantizedConverter {
-  /// Creates a converter for the given input type.
-  static const ExpressedToQuantizedConverter forInputType(Type inputType);
-
-  /// Converts the inputType to be based on the given elemental type,
-  /// returning the new type (or nullptr and emit an error on failure).
-  Type convert(QuantizedType elementalType) const;
-
-  /// Whether the conversion is legal.
-  explicit operator bool() const { return (bool)expressedType; }
-
-  /// The input type that is being converted from.
-  /// This may be an elemental or composite type.
-  const Type inputType;
-
-  /// Supported, elemental expressed type (i.e. f32).
-  /// Will be nullptr if conversion is not supported.
-  const Type expressedType;
-};
-
-/// Reference implementation of converting between real numbers and values
-/// represented by a UniformQuantizedType.
-/// Note that this is not expected to be speedy and may be superseded eventually
-/// by a more optimal implementation.
-/// Also, the interface assumes that quantization is done per-layer and will
-/// need to be wider for various per-channel schemes. As such, this is a
-/// placeholder.
-class UniformQuantizedValueConverter {
-public:
-  explicit UniformQuantizedValueConverter(UniformQuantizedType uniformType)
-      : UniformQuantizedValueConverter(
-            uniformType.getScale(),
-            static_cast<double>(uniformType.getZeroPoint()),
-            static_cast<double>(uniformType.getStorageTypeMin()),
-            static_cast<double>(uniformType.getStorageTypeMax()),
-            uniformType.getStorageTypeIntegralWidth(), uniformType.isSigned()) {
-    assert(uniformType.getExpressedType().isa<FloatType>());
-    assert(uniformType.getStorageType().isa<IntegerType>());
-  }
-
-  UniformQuantizedValueConverter(double scale, double zeroPoint,
-                                 double clampMin, double clampMax,
-                                 uint32_t storageBitWidth, bool isSigned)
-      : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
-        clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
-        clampMinDouble(clampMin), clampMaxDouble(clampMax),
-        storageBitWidth(storageBitWidth), isSigned(isSigned),
-        roundMode(llvm::APFloat::rmNearestTiesToAway) {}
-
-  UniformQuantizedValueConverter(double scale, double zeroPoint,
-                                 APFloat clampMin, APFloat clampMax,
-                                 uint32_t storageBitWidth, bool isSigned)
-      : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
-        clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
-        clampMinDouble(clampMin.convertToDouble()),
-        clampMaxDouble(clampMax.convertToDouble()),
-        storageBitWidth(storageBitWidth), isSigned(isSigned),
-        roundMode(llvm::APFloat::rmNearestTiesToAway) {}
-
-  virtual APInt quantizeFloatToInt(APFloat expressedValue) const {
-    // This function is a performance critical code path in quantization
-    // since it runs for each single float parameter value.
-
-    // Specialize f32->u8/i8 case to optimize performance.
-    if (&expressedValue.getSemantics() == &APFloat::IEEEsingle() &&
-        storageBitWidth == 8 &&
-        roundMode == llvm::APFloatBase::rmNearestTiesToAway) {
-      return quantizeF32ToInt8(expressedValue);
-    }
-
-    bool lossy;
-    expressedValue.convert(scale.getSemantics(), roundMode, &lossy);
-    // fixedpoint = clamp(clampMin, clampMax, (
-    //   roundHalfToEven(expressed / scale) + zeroPoint))
-    APFloat scaled = (expressedValue / scale);
-    scaled.roundToIntegral(roundMode);
-    scaled.add(zeroPoint, roundMode);
-    APFloat fixedpoint = llvm::minimum(scaled, clampMax);
-    fixedpoint = llvm::maximum(fixedpoint, clampMin);
-
-    llvm::APSInt result(storageBitWidth, !isSigned);
-    fixedpoint.convertToInteger(result, roundMode, &lossy);
-
-    return std::move(result);
-  }
-
-  int64_t quantizeFloatToInt64(APFloat expressedValue) const {
-    APInt qValue = quantizeFloatToInt(expressedValue);
-    return isSigned ? qValue.getSExtValue() : qValue.getZExtValue();
-  }
-
-  virtual ~UniformQuantizedValueConverter() {}
-
-private:
-  // An optimized implementation to quantize f32 to i8/u8 with C++ native
-  // arithmetic.
-  virtual APInt quantizeF32ToInt8(APFloat expressedValue) const {
-    assert(&expressedValue.getSemantics() == &APFloat::IEEEsingle());
-    assert(storageBitWidth == 8);
-    assert(roundMode == llvm::APFloatBase::rmNearestTiesToAway);
-
-    const float realValue = expressedValue.convertToFloat();
-
-    const double scaled = realValue / scaleDouble + zeroPointDouble;
-    // Round to nearest integer with halfway cases rounded away from zero.
-    const double scaledRounded = std::round(scaled);
-    const double clamped =
-        std::min(std::max(scaledRounded, clampMinDouble), clampMaxDouble);
-
-    uint64_t signlessResult;
-    if (isSigned) {
-      int64_t clampedInt = static_cast<int8_t>(clamped);
-      memcpy(&signlessResult, &clampedInt, sizeof(clampedInt));
-    } else {
-      signlessResult = static_cast<uint8_t>(clamped);
-    }
-    llvm::APInt result(storageBitWidth, signlessResult);
-    return result;
-  }
-
-  // Keep both APFloat and double versions of the quantization parameters
-  // around since they will be used in generic and specialized arithmetic,
-  // respectively.
-  const APFloat scale;
-  const APFloat zeroPoint;
-  const APFloat clampMin;
-  const APFloat clampMax;
-
-  const double scaleDouble;
-  const double zeroPointDouble;
-  const double clampMinDouble;
-  const double clampMaxDouble;
-
-  const uint32_t storageBitWidth;
-  const bool isSigned;
-  const llvm::APFloat::roundingMode roundMode;
-};
-
-/// An utility class to quantize an attribute by the per-axis quantization
-/// parameters. The size of the quantization dim in the converted elements
-/// attribute should matche the size of of scales/zeroPoints vectors in the
-/// quantization parameters.
-class UniformQuantizedPerAxisValueConverter {
-public:
-  explicit UniformQuantizedPerAxisValueConverter(
-      UniformQuantizedPerAxisType uniformType)
-      : scales(uniformType.getScales()),
-        zeroPoints(uniformType.getZeroPoints()),
-        clampMin(static_cast<double>(uniformType.getStorageTypeMin())),
-        clampMax(static_cast<double>(uniformType.getStorageTypeMax())),
-        storageBitWidth(uniformType.getStorageTypeIntegralWidth()),
-        isSigned(uniformType.isSigned()),
-        quantizationDim(uniformType.getQuantizedDimension()) {
-    assert(uniformType.getExpressedType().isa<FloatType>());
-    assert(uniformType.getStorageType().isa<IntegerType>());
-    assert(scales.size() == zeroPoints.size());
-  }
-
-  /// Quantize an Attribute by the quantization parameters. Return nullptr if
-  /// the conversion fails or the input array isn't an ElementsAttr.
-  ElementsAttr convert(Attribute realValue);
-
-private:
-  /// Quantize an DenseFPElementsAttr by the quantization parameters.
-  DenseElementsAttr convert(DenseFPElementsAttr attr);
-
-  /// Get a uniform converter for the index-th chunk along the quantizationDim.
-  /// All the elements in this chunk is quantized by the returned converter.
-  UniformQuantizedValueConverter getPerChunkConverter(int index) const {
-    UniformQuantizedValueConverter converter(scales[index], zeroPoints[index],
-                                             clampMin, clampMax,
-                                             storageBitWidth, isSigned);
-    return converter;
-  }
-
-  const ArrayRef<double> scales;
-  const ArrayRef<int64_t> zeroPoints;
-  const APFloat clampMin;
-  const APFloat clampMax;
-  const uint32_t storageBitWidth;
-  const bool isSigned;
-  int32_t quantizationDim;
-};
-
-} // namespace quant
-} // namespace mlir
-
-#endif // MLIR_DIALECT_QUANTOPS_UNIFORMSUPPORT_H_
diff --git a/third_party/mlir/include/mlir/Dialect/SDBM/SDBM.h b/third_party/mlir/include/mlir/Dialect/SDBM/SDBM.h
deleted file mode 100644
index 97078465ff1..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SDBM/SDBM.h
+++ /dev/null
@@ -1,206 +0,0 @@
-//===- SDBM.h - MLIR SDBM declaration ---------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// A striped difference-bound matrix (SDBM) is a set in Z^N (or R^N) defined
-// as {(x_1, ... x_n) | f(x_1, ... x_n) >= 0} where f is an SDBM expression.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SDBM_SDBM_H
-#define MLIR_DIALECT_SDBM_SDBM_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/DenseMap.h"
-
-namespace mlir {
-
-class MLIRContext;
-class SDBMDialect;
-class SDBMExpr;
-class SDBMTermExpr;
-
-/// A utility class for SDBM to represent an integer with potentially infinite
-/// positive value. This uses the largest value of int64_t to represent infinity
-/// and redefines the arithmetic operators so that the infinity "saturates":
-///   inf + x = inf,
-///   inf - x = inf.
-/// If a sum of two finite values reaches the largest value of int64_t, the
-/// behavior of IntInfty is undefined (in practice, it asserts), similarly to
-/// regular signed integer overflow.
-class IntInfty {
-public:
-  constexpr static int64_t infty = std::numeric_limits<int64_t>::max();
-
-  /*implicit*/ IntInfty(int64_t v) : value(v) {}
-
-  IntInfty &operator=(int64_t v) {
-    value = v;
-    return *this;
-  }
-
-  static IntInfty infinity() { return IntInfty(infty); }
-
-  int64_t getValue() const { return value; }
-  explicit operator int64_t() const { return value; }
-
-  bool isFinite() { return value != infty; }
-
-private:
-  int64_t value;
-};
-
-inline IntInfty operator+(IntInfty lhs, IntInfty rhs) {
-  if (!lhs.isFinite() || !rhs.isFinite())
-    return IntInfty::infty;
-
-  // Check for overflows, treating the sum of two values adding up to INT_MAX as
-  // overflow.  Convert values to unsigned to get an extra bit and avoid the
-  // undefined behavior of signed integer overflows.
-  assert((lhs.getValue() <= 0 || rhs.getValue() <= 0 ||
-          static_cast<uint64_t>(lhs.getValue()) +
-                  static_cast<uint64_t>(rhs.getValue()) <
-              static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) &&
-         "IntInfty overflow");
-  // Check for underflows by converting values to unsigned to avoid undefined
-  // behavior of signed integers perform the addition (bitwise result is same
-  // because numbers are required to be two's complement in C++) and check if
-  // the sign bit remains negative.
-  assert((lhs.getValue() >= 0 || rhs.getValue() >= 0 ||
-          ((static_cast<uint64_t>(lhs.getValue()) +
-            static_cast<uint64_t>(rhs.getValue())) >>
-           63) == 1) &&
-         "IntInfty underflow");
-
-  return lhs.getValue() + rhs.getValue();
-}
-
-inline bool operator<(IntInfty lhs, IntInfty rhs) {
-  return lhs.getValue() < rhs.getValue();
-}
-
-inline bool operator<=(IntInfty lhs, IntInfty rhs) {
-  return lhs.getValue() <= rhs.getValue();
-}
-
-inline bool operator==(IntInfty lhs, IntInfty rhs) {
-  return lhs.getValue() == rhs.getValue();
-}
-
-inline bool operator!=(IntInfty lhs, IntInfty rhs) { return !(lhs == rhs); }
-
-/// Striped difference-bound matrix is a representation of an integer set bound
-/// by a system of SDBMExprs interpreted as inequalities "expr <= 0".
-class SDBM {
-public:
-  /// Obtain an SDBM from a list of SDBM expressions treated as inequalities and
-  /// equalities with zero.
-  static SDBM get(ArrayRef<SDBMExpr> inequalities,
-                  ArrayRef<SDBMExpr> equalities);
-
-  void getSDBMExpressions(SDBMDialect *dialect,
-                          SmallVectorImpl<SDBMExpr> &inequalities,
-                          SmallVectorImpl<SDBMExpr> &equalities);
-
-  void print(llvm::raw_ostream &os);
-  void dump();
-
-  IntInfty operator()(int i, int j) { return at(i, j); }
-
-private:
-  /// Get the given element of the difference bounds matrix.  First index
-  /// corresponds to the negative term of the difference, second index
-  /// corresponds to the positive term of the difference.
-  IntInfty &at(int i, int j) { return matrix[i * getNumVariables() + j]; }
-
-  /// Populate `inequalities` and `equalities` based on the values at(row,col)
-  /// and at(col,row) of the DBM.  Depending on the values being finite and
-  /// being subsumed by stripe expressions, this may or may not add elements to
-  /// the lists of equalities and inequalities.
-  void convertDBMElement(unsigned row, unsigned col, SDBMTermExpr rowExpr,
-                         SDBMTermExpr colExpr,
-                         SmallVectorImpl<SDBMExpr> &inequalities,
-                         SmallVectorImpl<SDBMExpr> &equalities);
-
-  /// Populate `inequalities` based on the value at(pos,pos) of the DBM. Only
-  /// adds new inequalities if the inequality is not trivially true.
-  void convertDBMDiagonalElement(unsigned pos, SDBMTermExpr expr,
-                                 SmallVectorImpl<SDBMExpr> &inequalities);
-
-  /// Get the total number of elements in the matrix.
-  unsigned getNumVariables() const {
-    return 1 + numDims + numSymbols + numTemporaries;
-  }
-
-  /// Get the position in the matrix that corresponds to the given dimension.
-  unsigned getDimPosition(unsigned position) const { return 1 + position; }
-
-  /// Get the position in the matrix that corresponds to the given symbol.
-  unsigned getSymbolPosition(unsigned position) const {
-    return 1 + numDims + position;
-  }
-
-  /// Get the position in the matrix that corresponds to the given temporary.
-  unsigned getTemporaryPosition(unsigned position) const {
-    return 1 + numDims + numSymbols + position;
-  }
-
-  /// Number of dimensions in the system,
-  unsigned numDims;
-  /// Number of symbols in the system.
-  unsigned numSymbols;
-  /// Number of temporary variables in the system.
-  unsigned numTemporaries;
-
-  /// Difference bounds matrix, stored as a linearized row-major vector.
-  /// Each value in this matrix corresponds to an inequality
-  ///
-  ///   v@col - v@row <= at(row, col)
-  ///
-  /// where v@col and v@row are the variables that correspond to the linearized
-  /// position in the matrix.  The positions correspond to
-  ///
-  ///   - constant 0 (producing constraints v@col <= X and -v@row <= Y);
-  ///   - SDBM expression dimensions (d0, d1, ...);
-  ///   - SDBM expression symbols (s0, s1, ...);
-  ///   - temporary variables (t0, t1, ...).
-  ///
-  /// Temporary variables are introduced to represent expressions that are not
-  /// trivially a difference between two variables.  For example, if one side of
-  /// a difference expression is itself a stripe expression, it will be replaced
-  /// with a temporary variable assigned equal to this expression.
-  ///
-  /// Infinite entries in the matrix correspond correspond to an absence of a
-  /// constraint:
-  ///
-  ///   v@col - v@row <= infinity
-  ///
-  /// is trivially true.  Negated values at symmetric positions in the matrix
-  /// allow one to couple two inequalities into a single equality.
-  std::vector<IntInfty> matrix;
-
-  /// The mapping between the indices of variables in the DBM and the stripe
-  /// expressions they are equal to.  These expressions are stored as they
-  /// appeared when constructing an SDBM from a SDBMExprs, in particular no
-  /// temporaries can appear in these expressions.  This removes the need to
-  /// iteratively substitute definitions of the temporaries in the reverse
-  /// conversion.
-  llvm::DenseMap<unsigned, SDBMExpr> stripeToPoint;
-};
-
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SDBM_SDBM_H
diff --git a/third_party/mlir/include/mlir/Dialect/SDBM/SDBMDialect.h b/third_party/mlir/include/mlir/Dialect/SDBM/SDBMDialect.h
deleted file mode 100644
index e3573ba604d..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SDBM/SDBMDialect.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- SDBMDialect.h - Dialect for striped DBMs -----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_SDBM_SDBMDIALECT_H
-#define MLIR_DIALECT_SDBM_SDBMDIALECT_H
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/Support/StorageUniquer.h"
-
-namespace mlir {
-class MLIRContext;
-
-class SDBMDialect : public Dialect {
-public:
-  SDBMDialect(MLIRContext *context) : Dialect(getDialectNamespace(), context) {}
-
-  static StringRef getDialectNamespace() { return "sdbm"; }
-
-  /// Get the uniquer for SDBM expressions. This should not be used directly.
-  StorageUniquer &getUniquer() { return uniquer; }
-
-private:
-  StorageUniquer uniquer;
-};
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SDBM_SDBMDIALECT_H
diff --git a/third_party/mlir/include/mlir/Dialect/SDBM/SDBMExpr.h b/third_party/mlir/include/mlir/Dialect/SDBM/SDBMExpr.h
deleted file mode 100644
index 8cb5ef0be10..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SDBM/SDBMExpr.h
+++ /dev/null
@@ -1,585 +0,0 @@
-//===- SDBMExpr.h - MLIR SDBM Expression ------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// A striped difference-bound matrix (SDBM) expression is a constant expression,
-// an identifier, a binary expression with constant RHS and +, stripe operators
-// or a difference expression between two identifiers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SDBM_SDBMEXPR_H
-#define MLIR_DIALECT_SDBM_SDBMEXPR_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/DenseMapInfo.h"
-
-namespace mlir {
-
-class AffineExpr;
-class MLIRContext;
-
-enum class SDBMExprKind { Add, Stripe, Diff, Constant, DimId, SymbolId, Neg };
-
-namespace detail {
-struct SDBMExprStorage;
-struct SDBMBinaryExprStorage;
-struct SDBMDiffExprStorage;
-struct SDBMTermExprStorage;
-struct SDBMConstantExprStorage;
-struct SDBMNegExprStorage;
-} // namespace detail
-
-class SDBMConstantExpr;
-class SDBMDialect;
-class SDBMDimExpr;
-class SDBMSymbolExpr;
-class SDBMTermExpr;
-
-/// Striped Difference-Bounded Matrix (SDBM) expression is a base left-hand side
-/// expression for the SDBM framework.  SDBM expressions are a subset of affine
-/// expressions supporting low-complexity algorithms for the operations used in
-/// loop transformations.  In particular, are supported:
-///   - constant expressions;
-///   - single variables (dimensions and symbols) with +1 or -1 coefficient;
-///   - stripe expressions: "x # C", where "x" is a single variable or another
-///     stripe expression, "#" is the stripe operator, and "C" is a constant
-///     expression; "#" is defined as x - x mod C.
-///   - sum expressions between single variable/stripe expressions and constant
-///     expressions;
-///   - difference expressions between single variable/stripe expressions.
-/// `SDBMExpr` class hierarchy provides a type-safe interface to constructing
-/// and operating on SDBM expressions.  For example, it requires the LHS of a
-/// sum expression to be a single variable or a stripe expression.  These
-/// restrictions are intended to force the caller to perform the necessary
-/// simplifications to stay within the SDBM domain, because SDBM expressions do
-/// not combine in more cases than they do.  This choice may be reconsidered in
-/// the future.
-///
-/// SDBM expressions are grouped into the following structure
-/// - expression
-///   - varying
-///     - direct
-///       - sum <- (term, constant)
-///       - term
-///         - symbol
-///         - dimension
-///         - stripe <- (direct, constant)
-///     - negation <- (direct)
-///     - difference <- (direct, term)
-///   - constant
-/// The notation <- (...) denotes the types of subexpressions a compound
-/// expression can combine.  The tree of subexpressions essentially imposes the
-/// following canonicalization rules:
-///   - constants are always folded;
-///   - constants can only appear on the RHS of an expression;
-///   - double negation must be elided;
-///   - an additive constant term is only allowed in a sum expression, and
-///     should be sunk into the nearest such expression in the tree;
-///   - zero constant expression can only appear at the top level.
-///
-/// `SDBMExpr` and derived classes are thin wrappers around a pointer owned by
-/// an MLIRContext, and should be used by-value.  They are uniqued in the
-/// MLIRContext and immortal.
-class SDBMExpr {
-public:
-  using ImplType = detail::SDBMExprStorage;
-  SDBMExpr() : impl(nullptr) {}
-  /* implicit */ SDBMExpr(ImplType *expr) : impl(expr) {}
-
-  /// SDBM expressions are thin wrappers around a unique'ed immutable pointer,
-  /// which makes them trivially assignable and trivially copyable.
-  SDBMExpr(const SDBMExpr &) = default;
-  SDBMExpr &operator=(const SDBMExpr &) = default;
-
-  /// SDBM expressions can be compared straight-forwardly.
-  bool operator==(const SDBMExpr &other) const { return impl == other.impl; }
-  bool operator!=(const SDBMExpr &other) const { return !(*this == other); }
-
-  /// SDBM expressions are convertible to `bool`: null expressions are converted
-  /// to false, non-null expressions are converted to true.
-  explicit operator bool() const { return impl != nullptr; }
-  bool operator!() const { return !static_cast<bool>(*this); }
-
-  /// Negate the given SDBM expression.
-  SDBMExpr operator-();
-
-  /// Prints the SDBM expression.
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-  /// LLVM-style casts.
-  template <typename U> bool isa() const { return U::isClassFor(*this); }
-  template <typename U> U dyn_cast() const {
-    if (!isa<U>())
-      return {};
-    return U(const_cast<SDBMExpr *>(this)->impl);
-  }
-  template <typename U> U cast() const {
-    assert(isa<U>() && "cast to incorrect subtype");
-    return U(const_cast<SDBMExpr *>(this)->impl);
-  }
-
-  /// Support for LLVM hashing.
-  ::llvm::hash_code hash_value() const { return ::llvm::hash_value(impl); }
-
-  /// Returns the kind of the SDBM expression.
-  SDBMExprKind getKind() const;
-
-  /// Returns the MLIR context in which this expression lives.
-  MLIRContext *getContext() const;
-
-  /// Returns the SDBM dialect instance.
-  SDBMDialect *getDialect() const;
-
-  /// Convert the SDBM expression into an Affine expression.  This always
-  /// succeeds because SDBM are a subset of affine.
-  AffineExpr getAsAffineExpr() const;
-
-  /// Try constructing an SDBM expression from the given affine expression.
-  /// This may fail if the affine expression is not representable as SDBM, in
-  /// which case llvm::None is returned.  The conversion procedure recognizes
-  /// (nested) multiplicative ((x floordiv B) * B) and additive (x - x mod B)
-  /// patterns for the stripe expression.
-  static Optional<SDBMExpr> tryConvertAffineExpr(AffineExpr affine);
-
-protected:
-  ImplType *impl;
-};
-
-/// SDBM constant expression, wraps a 64-bit integer.
-class SDBMConstantExpr : public SDBMExpr {
-public:
-  using ImplType = detail::SDBMConstantExprStorage;
-
-  using SDBMExpr::SDBMExpr;
-
-  /// Obtain or create a constant expression unique'ed in the given dialect
-  /// (which belongs to a context).
-  static SDBMConstantExpr get(SDBMDialect *dialect, int64_t value);
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::Constant;
-  }
-
-  int64_t getValue() const;
-};
-
-/// SDBM varying expression can be one of:
-///   - input variable expression;
-///   - stripe expression;
-///   - negation (product with -1) of either of the above.
-///   - sum of a varying and a constant expression
-///   - difference between varying expressions
-class SDBMVaryingExpr : public SDBMExpr {
-public:
-  using ImplType = detail::SDBMExprStorage;
-  using SDBMExpr::SDBMExpr;
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::DimId ||
-           expr.getKind() == SDBMExprKind::SymbolId ||
-           expr.getKind() == SDBMExprKind::Neg ||
-           expr.getKind() == SDBMExprKind::Stripe ||
-           expr.getKind() == SDBMExprKind::Add ||
-           expr.getKind() == SDBMExprKind::Diff;
-  }
-};
-
-/// SDBM direct expression includes exactly one variable (symbol or dimension),
-/// which is not negated in the expression.  It can be one of:
-///   - term expression;
-///   - sum expression.
-class SDBMDirectExpr : public SDBMVaryingExpr {
-public:
-  using SDBMVaryingExpr::SDBMVaryingExpr;
-
-  /// If this is a sum expression, return its variable part, otherwise return
-  /// self.
-  SDBMTermExpr getTerm();
-
-  /// If this is a sum expression, return its constant part, otherwise return 0.
-  int64_t getConstant();
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::DimId ||
-           expr.getKind() == SDBMExprKind::SymbolId ||
-           expr.getKind() == SDBMExprKind::Stripe ||
-           expr.getKind() == SDBMExprKind::Add;
-  }
-};
-
-/// SDBM term expression can be one of:
-///  - single variable expression;
-///  - stripe expression.
-/// Stripe expressions are treated as terms since, in the SDBM domain, they are
-/// attached to temporary variables and can appear anywhere a variable can.
-class SDBMTermExpr : public SDBMDirectExpr {
-public:
-  using SDBMDirectExpr::SDBMDirectExpr;
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::DimId ||
-           expr.getKind() == SDBMExprKind::SymbolId ||
-           expr.getKind() == SDBMExprKind::Stripe;
-  }
-};
-
-/// SDBM sum expression.  LHS is a term expression and RHS is a constant.
-class SDBMSumExpr : public SDBMDirectExpr {
-public:
-  using ImplType = detail::SDBMBinaryExprStorage;
-  using SDBMDirectExpr::SDBMDirectExpr;
-
-  /// Obtain or create a sum expression unique'ed in the given context.
-  static SDBMSumExpr get(SDBMTermExpr lhs, SDBMConstantExpr rhs);
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    SDBMExprKind kind = expr.getKind();
-    return kind == SDBMExprKind::Add;
-  }
-
-  SDBMTermExpr getLHS() const;
-  SDBMConstantExpr getRHS() const;
-};
-
-/// SDBM difference expression.  LHS is a direct expression, i.e. it may be a
-/// sum of a term and a constant.  RHS is a term expression.  Thus the
-/// expression (t1 - t2 + C) with term expressions t1,t2 is represented as
-///   diff(sum(t1, C), t2)
-/// and it is possible to extract the constant factor without negating it.
-class SDBMDiffExpr : public SDBMVaryingExpr {
-public:
-  using ImplType = detail::SDBMDiffExprStorage;
-  using SDBMVaryingExpr::SDBMVaryingExpr;
-
-  /// Obtain or create a difference expression unique'ed in the given context.
-  static SDBMDiffExpr get(SDBMDirectExpr lhs, SDBMTermExpr rhs);
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::Diff;
-  }
-
-  SDBMDirectExpr getLHS() const;
-  SDBMTermExpr getRHS() const;
-};
-
-/// SDBM stripe expression "x # C" where "x" is a term expression, "C" is a
-/// constant expression and "#" is the stripe operator defined as:
-///   x # C = x - x mod C.
-class SDBMStripeExpr : public SDBMTermExpr {
-public:
-  using ImplType = detail::SDBMBinaryExprStorage;
-  using SDBMTermExpr::SDBMTermExpr;
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::Stripe;
-  }
-
-  static SDBMStripeExpr get(SDBMDirectExpr var, SDBMConstantExpr stripeFactor);
-
-  SDBMDirectExpr getLHS() const;
-  SDBMConstantExpr getStripeFactor() const;
-};
-
-/// SDBM "input" variable expression can be either a dimension identifier or
-/// a symbol identifier.  When used to define SDBM functions, dimensions are
-/// interpreted as function arguments while symbols are treated as unknown but
-/// constant values, hence the name.
-class SDBMInputExpr : public SDBMTermExpr {
-public:
-  using ImplType = detail::SDBMTermExprStorage;
-  using SDBMTermExpr::SDBMTermExpr;
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::DimId ||
-           expr.getKind() == SDBMExprKind::SymbolId;
-  }
-
-  unsigned getPosition() const;
-};
-
-/// SDBM dimension expression.  Dimensions correspond to function arguments
-/// when defining functions using SDBM expressions.
-class SDBMDimExpr : public SDBMInputExpr {
-public:
-  using ImplType = detail::SDBMTermExprStorage;
-  using SDBMInputExpr::SDBMInputExpr;
-
-  /// Obtain or create a dimension expression unique'ed in the given dialect
-  /// (which belongs to a context).
-  static SDBMDimExpr get(SDBMDialect *dialect, unsigned position);
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::DimId;
-  }
-};
-
-/// SDBM symbol expression.  Symbols correspond to symbolic constants when
-/// defining functions using SDBM expressions.
-class SDBMSymbolExpr : public SDBMInputExpr {
-public:
-  using ImplType = detail::SDBMTermExprStorage;
-  using SDBMInputExpr::SDBMInputExpr;
-
-  /// Obtain or create a symbol expression unique'ed in the given dialect (which
-  /// belongs to a context).
-  static SDBMSymbolExpr get(SDBMDialect *dialect, unsigned position);
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::SymbolId;
-  }
-};
-
-/// Negation of an SDBM variable expression.  Equivalent to multiplying the
-/// expression with -1 (SDBM does not support other coefficients that 1 and -1).
-class SDBMNegExpr : public SDBMVaryingExpr {
-public:
-  using ImplType = detail::SDBMNegExprStorage;
-  using SDBMVaryingExpr::SDBMVaryingExpr;
-
-  /// Obtain or create a negation expression unique'ed in the given context.
-  static SDBMNegExpr get(SDBMDirectExpr var);
-
-  static bool isClassFor(const SDBMExpr &expr) {
-    return expr.getKind() == SDBMExprKind::Neg;
-  }
-
-  SDBMDirectExpr getVar() const;
-};
-
-/// A visitor class for SDBM expressions.  Calls the kind-specific function
-/// depending on the kind of expression it visits.
-template <typename Derived, typename Result = void> class SDBMVisitor {
-public:
-  /// Visit the given SDBM expression, dispatching to kind-specific functions.
-  Result visit(SDBMExpr expr) {
-    auto *derived = static_cast<Derived *>(this);
-    switch (expr.getKind()) {
-    case SDBMExprKind::Add:
-    case SDBMExprKind::Diff:
-    case SDBMExprKind::DimId:
-    case SDBMExprKind::SymbolId:
-    case SDBMExprKind::Neg:
-    case SDBMExprKind::Stripe:
-      return derived->visitVarying(expr.cast<SDBMVaryingExpr>());
-    case SDBMExprKind::Constant:
-      return derived->visitConstant(expr.cast<SDBMConstantExpr>());
-    }
-
-    llvm_unreachable("unsupported SDBM expression kind");
-  }
-
-  /// Traverse the SDBM expression tree calling `visit` on each node
-  /// in depth-first preorder.
-  void walkPreorder(SDBMExpr expr) { return walk</*isPreorder=*/true>(expr); }
-
-  /// Traverse the SDBM expression tree calling `visit` on each node in
-  /// depth-first postorder.
-  void walkPostorder(SDBMExpr expr) { return walk</*isPreorder=*/false>(expr); }
-
-protected:
-  /// Default visitors do nothing.
-  void visitSum(SDBMSumExpr) {}
-  void visitDiff(SDBMDiffExpr) {}
-  void visitStripe(SDBMStripeExpr) {}
-  void visitDim(SDBMDimExpr) {}
-  void visitSymbol(SDBMSymbolExpr) {}
-  void visitNeg(SDBMNegExpr) {}
-  void visitConstant(SDBMConstantExpr) {}
-
-  /// Default implementation of visitDirect dispatches to the dedicated for sums
-  /// or delegates to visitTerm for the other expression kinds.  Concrete
-  /// visitors can overload it.
-  Result visitDirect(SDBMDirectExpr expr) {
-    auto *derived = static_cast<Derived *>(this);
-    if (auto sum = expr.dyn_cast<SDBMSumExpr>())
-      return derived->visitSum(sum);
-    else
-      return derived->visitTerm(expr.cast<SDBMTermExpr>());
-  }
-
-  /// Default implementation of visitTerm dispatches to the special functions
-  /// for stripes and other variables.  Concrete visitors can override it.
-  Result visitTerm(SDBMTermExpr expr) {
-    auto *derived = static_cast<Derived *>(this);
-    if (expr.getKind() == SDBMExprKind::Stripe)
-      return derived->visitStripe(expr.cast<SDBMStripeExpr>());
-    else
-      return derived->visitInput(expr.cast<SDBMInputExpr>());
-  }
-
-  /// Default implementation of visitInput dispatches to the special
-  /// functions for dimensions or symbols.  Concrete visitors can override it to
-  /// visit all variables instead.
-  Result visitInput(SDBMInputExpr expr) {
-    auto *derived = static_cast<Derived *>(this);
-    if (expr.getKind() == SDBMExprKind::DimId)
-      return derived->visitDim(expr.cast<SDBMDimExpr>());
-    else
-      return derived->visitSymbol(expr.cast<SDBMSymbolExpr>());
-  }
-
-  /// Default implementation of visitVarying dispatches to the special
-  /// functions for variables and negations thereof.  Concrete visitors can
-  /// override it to visit all variables and negations instead.
-  Result visitVarying(SDBMVaryingExpr expr) {
-    auto *derived = static_cast<Derived *>(this);
-    if (auto var = expr.dyn_cast<SDBMDirectExpr>())
-      return derived->visitDirect(var);
-    else if (auto neg = expr.dyn_cast<SDBMNegExpr>())
-      return derived->visitNeg(neg);
-    else if (auto diff = expr.dyn_cast<SDBMDiffExpr>())
-      return derived->visitDiff(diff);
-
-    llvm_unreachable("unhandled subtype of varying SDBM expression");
-  }
-
-  template <bool isPreorder> void walk(SDBMExpr expr) {
-    if (isPreorder)
-      visit(expr);
-    if (auto sumExpr = expr.dyn_cast<SDBMSumExpr>()) {
-      walk<isPreorder>(sumExpr.getLHS());
-      walk<isPreorder>(sumExpr.getRHS());
-    } else if (auto diffExpr = expr.dyn_cast<SDBMDiffExpr>()) {
-      walk<isPreorder>(diffExpr.getLHS());
-      walk<isPreorder>(diffExpr.getRHS());
-    } else if (auto stripeExpr = expr.dyn_cast<SDBMStripeExpr>()) {
-      walk<isPreorder>(stripeExpr.getLHS());
-      walk<isPreorder>(stripeExpr.getStripeFactor());
-    } else if (auto negExpr = expr.dyn_cast<SDBMNegExpr>()) {
-      walk<isPreorder>(negExpr.getVar());
-    }
-    if (!isPreorder)
-      visit(expr);
-  }
-};
-
-/// Overloaded arithmetic operators for SDBM expressions asserting that their
-/// arguments have the proper SDBM expression subtype.  Perform canonicalization
-/// and constant folding on these expressions.
-namespace ops_assertions {
-
-/// Add two SDBM expressions.  At least one of the expressions must be a
-/// constant or a negation, but both expressions cannot be negations
-/// simultaneously.
-SDBMExpr operator+(SDBMExpr lhs, SDBMExpr rhs);
-inline SDBMExpr operator+(SDBMExpr lhs, int64_t rhs) {
-  return lhs + SDBMConstantExpr::get(lhs.getDialect(), rhs);
-}
-inline SDBMExpr operator+(int64_t lhs, SDBMExpr rhs) {
-  return SDBMConstantExpr::get(rhs.getDialect(), lhs) + rhs;
-}
-
-/// Subtract an SDBM expression from another SDBM expression.  Both expressions
-/// must not be difference expressions.
-SDBMExpr operator-(SDBMExpr lhs, SDBMExpr rhs);
-inline SDBMExpr operator-(SDBMExpr lhs, int64_t rhs) {
-  return lhs - SDBMConstantExpr::get(lhs.getDialect(), rhs);
-}
-inline SDBMExpr operator-(int64_t lhs, SDBMExpr rhs) {
-  return SDBMConstantExpr::get(rhs.getDialect(), lhs) - rhs;
-}
-
-/// Construct a stripe expression from a positive expression and a positive
-/// constant stripe factor.
-SDBMExpr stripe(SDBMExpr expr, SDBMExpr factor);
-inline SDBMExpr stripe(SDBMExpr expr, int64_t factor) {
-  return stripe(expr, SDBMConstantExpr::get(expr.getDialect(), factor));
-}
-} // namespace ops_assertions
-
-} // end namespace mlir
-
-namespace llvm {
-// SDBMExpr hash just like pointers.
-template <> struct DenseMapInfo<mlir::SDBMExpr> {
-  static mlir::SDBMExpr getEmptyKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::SDBMExpr(static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static mlir::SDBMExpr getTombstoneKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::SDBMExpr(static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::SDBMExpr expr) {
-    return expr.hash_value();
-  }
-  static bool isEqual(mlir::SDBMExpr lhs, mlir::SDBMExpr rhs) {
-    return lhs == rhs;
-  }
-};
-
-// SDBMDirectExpr hash just like pointers.
-template <> struct DenseMapInfo<mlir::SDBMDirectExpr> {
-  static mlir::SDBMDirectExpr getEmptyKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::SDBMDirectExpr(
-        static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static mlir::SDBMDirectExpr getTombstoneKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::SDBMDirectExpr(
-        static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::SDBMDirectExpr expr) {
-    return expr.hash_value();
-  }
-  static bool isEqual(mlir::SDBMDirectExpr lhs, mlir::SDBMDirectExpr rhs) {
-    return lhs == rhs;
-  }
-};
-
-// SDBMTermExpr hash just like pointers.
-template <> struct DenseMapInfo<mlir::SDBMTermExpr> {
-  static mlir::SDBMTermExpr getEmptyKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::SDBMTermExpr(static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static mlir::SDBMTermExpr getTombstoneKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::SDBMTermExpr(static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::SDBMTermExpr expr) {
-    return expr.hash_value();
-  }
-  static bool isEqual(mlir::SDBMTermExpr lhs, mlir::SDBMTermExpr rhs) {
-    return lhs == rhs;
-  }
-};
-
-// SDBMConstantExpr hash just like pointers.
-template <> struct DenseMapInfo<mlir::SDBMConstantExpr> {
-  static mlir::SDBMConstantExpr getEmptyKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::SDBMConstantExpr(
-        static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static mlir::SDBMConstantExpr getTombstoneKey() {
-    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::SDBMConstantExpr(
-        static_cast<mlir::SDBMExpr::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::SDBMConstantExpr expr) {
-    return expr.hash_value();
-  }
-  static bool isEqual(mlir::SDBMConstantExpr lhs, mlir::SDBMConstantExpr rhs) {
-    return lhs == rhs;
-  }
-};
-} // namespace llvm
-
-#endif // MLIR_DIALECT_SDBM_SDBMEXPR_H
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt
deleted file mode 100644
index b6759a9111b..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS SPIRVLowering.td)
-mlir_tablegen(SPIRVLowering.h.inc -gen-struct-attr-decls)
-mlir_tablegen(SPIRVLowering.cpp.inc -gen-struct-attr-defs)
-add_public_tablegen_target(MLIRSPIRVLoweringStructGen)
-
-add_mlir_dialect(SPIRVOps)
-
-set(LLVM_TARGET_DEFINITIONS SPIRVBase.td)
-mlir_tablegen(SPIRVEnums.h.inc -gen-enum-decls)
-mlir_tablegen(SPIRVEnums.cpp.inc -gen-enum-defs)
-add_public_tablegen_target(MLIRSPIRVEnumsIncGen)
-
-set(LLVM_TARGET_DEFINITIONS SPIRVOps.td)
-mlir_tablegen(SPIRVSerialization.inc -gen-spirv-serialization)
-add_public_tablegen_target(MLIRSPIRVSerializationGen)
-
-set(LLVM_TARGET_DEFINITIONS SPIRVBase.td)
-mlir_tablegen(SPIRVOpUtils.inc -gen-spirv-op-utils)
-add_public_tablegen_target(MLIRSPIRVOpUtilsGen)
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/LayoutUtils.h b/third_party/mlir/include/mlir/Dialect/SPIRV/LayoutUtils.h
deleted file mode 100644
index 7537e5f654b..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/LayoutUtils.h
+++ /dev/null
@@ -1,80 +0,0 @@
-//===-- LayoutUtils.h - Decorate composite type with layout information ---===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines utilities used to get alignment and layout information for
-// types in SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_SPIRV_LAYOUTUTILS_H_
-#define MLIR_DIALECT_SPIRV_LAYOUTUTILS_H_
-
-#include <cstdint>
-
-namespace mlir {
-class Type;
-class VectorType;
-namespace spirv {
-class StructType;
-class ArrayType;
-} // namespace spirv
-
-/// According to the Vulkan spec "14.5.4. Offset and Stride Assignment":
-/// "There are different alignment requirements depending on the specific
-/// resources and on the features enabled on the device."
-///
-/// There are 3 types of alignment: scalar, base, extended.
-/// See the spec for details.
-///
-/// Note: Even if scalar alignment is supported, it is generally more
-/// performant to use the base alignment. So here the calculation is based on
-/// base alignment.
-///
-/// The memory layout must obey the following rules:
-/// 1. The Offset decoration of any member must be a multiple of its alignment.
-/// 2. Any ArrayStride or MatrixStride decoration must be a multiple of the
-/// alignment of the array or matrix as defined above.
-///
-/// According to the SPIR-V spec:
-/// "The ArrayStride, MatrixStride, and Offset decorations must be large
-/// enough to hold the size of the objects they affect (that is, specifying
-/// overlap is invalid)."
-class VulkanLayoutUtils {
-public:
-  using Size = uint64_t;
-
-  /// Returns a new StructType with layout info. Assigns the type size in bytes
-  /// to the `size`. Assigns the type alignment in bytes to the `alignment`.
-  static spirv::StructType decorateType(spirv::StructType structType,
-                                        Size &size, Size &alignment);
-  /// Checks whether a type is legal in terms of Vulkan layout info
-  /// decoration. A type is dynamically illegal if it's a composite type in the
-  /// StorageBuffer, PhysicalStorageBuffer, Uniform, and PushConstant Storage
-  /// Classes without layout information.
-  static bool isLegalType(Type type);
-
-private:
-  static Type decorateType(Type type, Size &size, Size &alignment);
-  static Type decorateType(VectorType vectorType, Size &size, Size &alignment);
-  static Type decorateType(spirv::ArrayType arrayType, Size &size,
-                           Size &alignment);
-  /// Calculates the alignment for the given scalar type.
-  static Size getScalarTypeAlignment(Type scalarType);
-};
-
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_LAYOUTUTILS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/Passes.h b/third_party/mlir/include/mlir/Dialect/SPIRV/Passes.h
deleted file mode 100644
index fe029ff27ea..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/Passes.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- Passes.h - SPIR-V pass entry points ----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes that expose pass constructors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SPIRV_PASSES_H_
-#define MLIR_DIALECT_SPIRV_PASSES_H_
-
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-namespace spirv {
-
-class ModuleOp;
-/// Creates a module pass that converts composite types used by objects in the
-/// StorageBuffer, PhysicalStorageBuffer, Uniform, and PushConstant storage
-/// classes with layout information.
-/// Right now this pass only supports Vulkan layout rules.
-std::unique_ptr<OpPassBase<mlir::ModuleOp>>
-createDecorateSPIRVCompositeTypeLayoutPass();
-
-/// Creates a module pass that lowers the ABI attributes specified during SPIR-V
-/// Lowering. Specifically,
-/// 1) Creates the global variables for arguments of entry point function using
-/// the specification in the ABI attributes for each argument.
-/// 2) Inserts the EntryPointOp and the ExecutionModeOp for entry point
-/// functions using the specification in the EntryPointAttr.
-std::unique_ptr<OpPassBase<spirv::ModuleOp>> createLowerABIAttributesPass();
-
-} // namespace spirv
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_PASSES_H_
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td
deleted file mode 100644
index f15d274922a..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td
+++ /dev/null
@@ -1,546 +0,0 @@
-//===-- SPIRVArithmeticOps.td - MLIR SPIR-V Arithmetic Ops -*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains arithmetic ops for the SPIR-V dialect. It corresponds
-// to "3.32.13. Arithmetic Instructions" of the SPIR-V specification.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_ARITHMETIC_OPS
-#define SPIRV_ARITHMETIC_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-class SPV_ArithmeticBinaryOp<string mnemonic, Type type,
-                       list<OpTrait> traits = []> :
-      // Operands type same as result type.
-      SPV_BinaryOp<mnemonic, type, type,
-                   !listconcat(traits,
-                               [NoSideEffect, SameOperandsAndResultType])>;
-
-class SPV_ArithmeticUnaryOp<string mnemonic, Type type,
-                            list<OpTrait> traits = []> :
-      // Operand type same as result type.
-      SPV_UnaryOp<mnemonic, type, type,
-                   !listconcat(traits,
-                               [NoSideEffect, SameOperandsAndResultType])>;
-
-// -----
-
-def SPV_FAddOp : SPV_ArithmeticBinaryOp<"FAdd", SPV_Float, [Commutative]> {
-  let summary = "Floating-point addition of Operand 1 and Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fadd-op ::= ssa-id `=` `spv.FAdd` ssa-use, ssa-use
-                          `:` float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.FAdd %0, %1 : f32
-    %5 = spv.FAdd %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FDivOp : SPV_ArithmeticBinaryOp<"FDiv", SPV_Float, []> {
-  let summary = "Floating-point division of Operand 1 divided by Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.
-
-    ### Custom assembly form
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fdiv-op ::= ssa-id `=` `spv.FDiv` ssa-use, ssa-use
-                          `:` float-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FDiv %0, %1 : f32
-    %5 = spv.FDiv %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FModOp : SPV_ArithmeticBinaryOp<"FMod", SPV_Float, []> {
-  let summary = [{
-    The floating-point remainder whose sign matches the sign of Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.  Otherwise, the result is the remainder r of Operand
-    1 divided by Operand 2 where if r ≠ 0, the sign of r is the same as the
-    sign of Operand 2.
-
-    ### Custom assembly form
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fmod-op ::= ssa-id `=` `spv.FMod` ssa-use, ssa-use
-                          `:` float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.FMod %0, %1 : f32
-    %5 = spv.FMod %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FMulOp : SPV_ArithmeticBinaryOp<"FMul", SPV_Float, [Commutative]> {
-  let summary = "Floating-point multiplication of Operand 1 and Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fmul-op ::= `spv.FMul` ssa-use, ssa-use
-                          `:` float-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FMul %0, %1 : f32
-    %5 = spv.FMul %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FNegateOp : SPV_ArithmeticUnaryOp<"FNegate", SPV_Float, []> {
-  let summary = "Floating-point subtract of Operand from zero.";
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-     The type of Operand must be the same as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fmul-op ::= `spv.FNegate` ssa-use `:` float-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.FNegate %0 : f32
-    %3 = spv.FNegate %2 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FRemOp : SPV_ArithmeticBinaryOp<"FRem", SPV_Float, []> {
-  let summary = [{
-    The floating-point remainder whose sign matches the sign of Operand 1.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.  Otherwise, the result is the remainder r of Operand
-    1 divided by Operand 2 where if r ≠ 0, the sign of r is the same as the
-    sign of Operand 1.
-
-    ### Custom assembly form
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    frem-op ::= ssa-id `=` `spv.FRemOp` ssa-use, ssa-use
-                          `:` float-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FRemOp %0, %1 : f32
-    %5 = spv.FRemOp %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FSubOp : SPV_ArithmeticBinaryOp<"FSub", SPV_Float, []> {
-  let summary = "Floating-point subtraction of Operand 2 from Operand 1.";
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fsub-op ::= ssa-id `=` `spv.FRemOp` ssa-use, ssa-use
-                          `:` float-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FRemOp %0, %1 : f32
-    %5 = spv.FRemOp %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_IAddOp : SPV_ArithmeticBinaryOp<"IAdd", SPV_Integer, [Commutative]> {
-  let summary = "Integer addition of Operand 1 and Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same number of components as Result
-    Type. They must have the same component width as Result Type.
-
-    The resulting value will equal the low-order N bits of the correct
-    result R, where N is the component width and R is computed with enough
-    precision to avoid overflow and underflow.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    iadd-op ::= ssa-id `=` `spv.IAdd` ssa-use, ssa-use
-                          `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.IAdd %0, %1 : i32
-    %5 = spv.IAdd %2, %3 : vector<4xi32>
-
-    ```
-  }];
-
-  let hasFolder = 1;
-}
-
-// -----
-
-def SPV_IMulOp : SPV_ArithmeticBinaryOp<"IMul", SPV_Integer, [Commutative]> {
-  let summary = "Integer multiplication of Operand 1 and Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same number of components as Result
-    Type. They must have the same component width as Result Type.
-
-    The resulting value will equal the low-order N bits of the correct
-    result R, where N is the component width and R is computed with enough
-    precision to avoid overflow and underflow.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    imul-op ::= ssa-id `=` `spv.IMul` ssa-use, ssa-use
-                          `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.IMul %0, %1 : i32
-    %5 = spv.IMul %2, %3 : vector<4xi32>
-
-    ```
-  }];
-
-  let hasFolder = 1;
-}
-
-// -----
-
-def SPV_ISubOp : SPV_ArithmeticBinaryOp<"ISub", SPV_Integer, []> {
-  let summary = "Integer subtraction of Operand 2 from Operand 1.";
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same number of components as Result
-    Type. They must have the same component width as Result Type.
-
-    The resulting value will equal the low-order N bits of the correct
-    result R, where N is the component width and R is computed with enough
-    precision to avoid overflow and underflow.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    isub-op ::= `spv.ISub` ssa-use, ssa-use
-                          `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.ISub %0, %1 : i32
-    %5 = spv.ISub %2, %3 : vector<4xi32>
-
-    ```
-  }];
-
-  let hasFolder = 1;
-}
-
-// -----
-
-def SPV_SDivOp : SPV_ArithmeticBinaryOp<"SDiv", SPV_Integer, []> {
-  let summary = "Signed-integer division of Operand 1 divided by Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same number of components as Result
-    Type. They must have the same component width as Result Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    sdiv-op ::= ssa-id `=` `spv.SDiv` ssa-use, ssa-use
-                           `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.SDiv %0, %1 : i32
-    %5 = spv.SDiv %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_SModOp : SPV_ArithmeticBinaryOp<"SMod", SPV_Integer, []> {
-  let summary = [{
-    Signed remainder operation for the remainder whose sign matches the sign
-    of Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same number of components as Result
-    Type. They must have the same component width as Result Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.  Otherwise, the result is the remainder r of Operand
-    1 divided by Operand 2 where if r ≠ 0, the sign of r is the same as the
-    sign of Operand 2.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    smod-op ::= ssa-id `=` `spv.SMod` ssa-use, ssa-use
-                           `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.SMod %0, %1 : i32
-    %5 = spv.SMod %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_SRemOp : SPV_ArithmeticBinaryOp<"SRem", SPV_Integer, []> {
-  let summary = [{
-    Signed remainder operation for the remainder whose sign matches the sign
-    of Operand 1.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same number of components as Result
-    Type. They must have the same component width as Result Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.  Otherwise, the result is the remainder r of Operand
-    1 divided by Operand 2 where if r ≠ 0, the sign of r is the same as the
-    sign of Operand 1.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    srem-op ::= ssa-id `=` `spv.SRem` ssa-use, ssa-use
-                           `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.SRem %0, %1 : i32
-    %5 = spv.SRem %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_UDivOp : SPV_ArithmeticBinaryOp<"UDiv", SPV_Integer, []> {
-  let summary = "Unsigned-integer division of Operand 1 divided by Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type, whose Signedness
-    operand is 0.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    udiv-op ::= ssa-id `=` `spv.UDiv` ssa-use, ssa-use
-                           `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.UDiv %0, %1 : i32
-    %5 = spv.UDiv %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_UModOp : SPV_ArithmeticBinaryOp<"UMod", SPV_Integer> {
-  let summary = "Unsigned modulo operation of Operand 1 modulo Operand 2.";
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type, whose Signedness
-    operand is 0.
-
-     The types of Operand 1 and Operand 2 both must be the same as Result
-    Type.
-
-     Results are computed per component.  The resulting value is undefined
-    if Operand 2 is 0.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    umod-op ::= ssa-id `=` `spv.UMod` ssa-use, ssa-use
-                           `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.UMod %0, %1 : i32
-    %5 = spv.UMod %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-#endif // SPIRV_ARITHMETIC_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td
deleted file mode 100644
index d8e62bb13b8..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td
+++ /dev/null
@@ -1,74 +0,0 @@
-//===-- SPIRVAtomicOps.td - MLIR SPIR-V Atomic Ops ---------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains atomic ops for the SPIR-V dialect. It corresponds to
-// "3.32.18. Atomic Instructions" of the SPIR-V specification.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_ATOMIC_OPS
-#define SPIRV_ATOMIC_OPS
-
-// -----
-
-def SPV_AtomicCompareExchangeWeakOp : SPV_Op<"AtomicCompareExchangeWeak", []> {
-  let summary = "Deprecated (use OpAtomicCompareExchange).";
-
-  let description = [{
-    Has the same semantics as OpAtomicCompareExchange.
-
-    Memory must be a valid memory Scope.
-
-    ### Custom assembly form
-
-    ```
-    scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ...
-
-    memory-semantics ::= `"None"` | `"Acquire"` | "Release"` | ...
-
-    atomic-compare-exchange-weak-op ::=
-        `spv.AtomicCompareExchangeWeak` scope memory-semantics memory-semantics
-                                        ssa-use `,` ssa-use `,` ssa-use
-                                        `:` spv-pointer-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.AtomicCompareExchangeWeak "Workgroup" "Acquire" "None"
-                                       %pointer, %value, %comparator
-                                       : !spv.ptr<i32, WorkGroup>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_AnyPtr:$pointer,
-    SPV_ScopeAttr:$memory_scope,
-    SPV_MemorySemanticsAttr:$equal_semantics,
-    SPV_MemorySemanticsAttr:$unequal_semantics,
-    SPV_Integer:$value,
-    SPV_Integer:$comparator
-  );
-
-  let results = (outs
-    SPV_Integer:$result
-  );
-}
-
-// -----
-
-#endif // SPIRV_ATOMIC_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
deleted file mode 100644
index 8368a626ffc..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
+++ /dev/null
@@ -1,1314 +0,0 @@
-//===- SPIRVBase.td - MLIR SPIR-V Op Definitions Base file -*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the base file for SPIR-V operation definition specification.
-// This file defines the SPIR-V dialect, common SPIR-V types, and utilities
-// for facilitating defining SPIR-V ops.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_BASE
-#define SPIRV_BASE
-
-include "mlir/IR/OpBase.td"
-
-//===----------------------------------------------------------------------===//
-// SPIR-V dialect definitions
-//===----------------------------------------------------------------------===//
-
-def SPV_Dialect : Dialect {
-  let name = "spv";
-
-  let description = [{
-    The SPIR-V dialect in MLIR.
-
-    SPIR-V is the Khronos Group's binary intermediate language for representing
-    graphical-shader stages and compute kernels for multiple Khronos APIs,
-    including OpenCL, OpenGL, and Vulkan.
-    See https://www.khronos.org/registry/spir-v for more details.
-
-    This dialect aims to be a simple proxy for the SPIR-V binary format to
-    enable straightforward and lightweight conversion from/to the binary
-    format. Ops in this dialect should stay at the same semantic level and
-    try to be a mechanical mapping to the corresponding SPIR-V instructions;
-    but they may deviate representationally to allow using MLIR mechanisms.
-    As a convention, if such deviation happens, the op name follows "snake_case"
-    style; otherwise, the op name just follows the SPIR-V mnemonic (by removing
-    the leading `Op` prefix) to use "CamelCase" style.
-  }];
-
-  let cppNamespace = "spirv";
-}
-
-//===----------------------------------------------------------------------===//
-// SPIR-V extension definitions
-//===----------------------------------------------------------------------===//
-
-// Extensions known to the SPIR-V dialect.
-// https://github.com/KhronosGroup/SPIRV-Registry has the full list.
-def SPV_KHR_16bit_storage                : StrEnumAttrCase<"SPV_KHR_16bit_storage">;
-def SPV_KHR_8bit_storage                 : StrEnumAttrCase<"SPV_KHR_8bit_storage">;
-def SPV_KHR_device_group                 : StrEnumAttrCase<"SPV_KHR_device_group">;
-def SPV_KHR_float_controls               : StrEnumAttrCase<"SPV_KHR_float_controls">;
-def SPV_KHR_physical_storage_buffer      : StrEnumAttrCase<"SPV_KHR_physical_storage_buffer">;
-def SPV_KHR_multiview                    : StrEnumAttrCase<"SPV_KHR_multiview">;
-def SPV_KHR_no_integer_wrap_decoration   : StrEnumAttrCase<"SPV_KHR_no_integer_wrap_decoration">;
-def SPV_KHR_post_depth_coverage          : StrEnumAttrCase<"SPV_KHR_post_depth_coverage">;
-def SPV_KHR_shader_atomic_counter_ops    : StrEnumAttrCase<"SPV_KHR_shader_atomic_counter_ops">;
-def SPV_KHR_shader_ballot                : StrEnumAttrCase<"SPV_KHR_shader_ballot">;
-def SPV_KHR_shader_draw_parameters       : StrEnumAttrCase<"SPV_KHR_shader_draw_parameters">;
-def SPV_KHR_storage_buffer_storage_class : StrEnumAttrCase<"SPV_KHR_storage_buffer_storage_class">;
-def SPV_KHR_subgroup_vote                : StrEnumAttrCase<"SPV_KHR_subgroup_vote">;
-def SPV_KHR_variable_pointers            : StrEnumAttrCase<"SPV_KHR_variable_pointers">;
-def SPV_KHR_vulkan_memory_model          : StrEnumAttrCase<"SPV_KHR_vulkan_memory_model">;
-
-def SPV_EXT_fragment_fully_covered       : StrEnumAttrCase<"SPV_EXT_fragment_fully_covered">;
-def SPV_EXT_fragment_invocation_density  : StrEnumAttrCase<"SPV_EXT_fragment_invocation_density">;
-def SPV_EXT_fragment_shader_interlock    : StrEnumAttrCase<"SPV_EXT_fragment_shader_interlock">;
-def SPV_EXT_physical_storage_buffer      : StrEnumAttrCase<"SPV_EXT_physical_storage_buffer">;
-def SPV_EXT_shader_stencil_export        : StrEnumAttrCase<"SPV_EXT_shader_stencil_export">;
-
-def SPV_AMD_shader_explicit_vertex_parameter : StrEnumAttrCase<"SPV_AMD_shader_explicit_vertex_parameter">;
-
-def SPV_GOOGLE_user_type                 : StrEnumAttrCase<"SPV_GOOGLE_user_type">;
-
-def SPV_NV_compute_shader_derivatives    : StrEnumAttrCase<"SPV_NV_compute_shader_derivatives">;
-def SPV_NV_fragment_shader_barycentric   : StrEnumAttrCase<"SPV_NV_fragment_shader_barycentric">;
-def SPV_NV_geometry_shader_passthrough   : StrEnumAttrCase<"SPV_NV_geometry_shader_passthrough">;
-def SPV_NV_mesh_shader                   : StrEnumAttrCase<"SPV_NV_mesh_shader">;
-def SPV_NV_ray_tracing                   : StrEnumAttrCase<"SPV_NV_ray_tracing">;
-def SPV_NV_sample_mask_override_coverage : StrEnumAttrCase<"SPV_NV_sample_mask_override_coverage">;
-def SPV_NV_shader_sm_builtins            : StrEnumAttrCase<"SPV_NV_shader_sm_builtins">;
-def SPV_NV_shading_rate                  : StrEnumAttrCase<"SPV_NV_shading_rate">;
-def SPV_NV_stereo_view_rendering         : StrEnumAttrCase<"SPV_NV_stereo_view_rendering">;
-def SPV_NV_viewport_array2               : StrEnumAttrCase<"SPV_NV_viewport_array2">;
-
-def SPV_NVX_multiview_per_view_attributes : StrEnumAttrCase<"SPV_NVX_multiview_per_view_attributes">;
-
-def SPV_ExtensionAttr :
-    StrEnumAttr<"Extension", "supported SPIR-V extensions", [
-      SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_device_group,
-      SPV_KHR_float_controls, SPV_KHR_physical_storage_buffer, SPV_KHR_multiview,
-      SPV_KHR_no_integer_wrap_decoration, SPV_KHR_post_depth_coverage,
-      SPV_KHR_shader_atomic_counter_ops, SPV_KHR_shader_ballot,
-      SPV_KHR_shader_draw_parameters, SPV_KHR_storage_buffer_storage_class,
-      SPV_KHR_subgroup_vote, SPV_KHR_variable_pointers,
-      SPV_KHR_vulkan_memory_model, SPV_EXT_fragment_fully_covered,
-      SPV_EXT_fragment_invocation_density, SPV_EXT_fragment_shader_interlock,
-      SPV_EXT_physical_storage_buffer, SPV_EXT_shader_stencil_export,
-      SPV_AMD_shader_explicit_vertex_parameter, SPV_GOOGLE_user_type,
-      SPV_NV_compute_shader_derivatives, SPV_NV_fragment_shader_barycentric,
-      SPV_NV_geometry_shader_passthrough, SPV_NV_mesh_shader, SPV_NV_ray_tracing,
-      SPV_NV_sample_mask_override_coverage, SPV_NV_shader_sm_builtins,
-      SPV_NV_shading_rate, SPV_NV_stereo_view_rendering,
-      SPV_NV_viewport_array2, SPV_NVX_multiview_per_view_attributes,
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-//===----------------------------------------------------------------------===//
-// SPIR-V enum definitions
-//===----------------------------------------------------------------------===//
-
-// Begin enum section. Generated from SPIR-V spec; DO NOT MODIFY!
-
-def SPV_C_Matrix                                    : I32EnumAttrCase<"Matrix", 0>;
-def SPV_C_Shader                                    : I32EnumAttrCase<"Shader", 1>;
-def SPV_C_Geometry                                  : I32EnumAttrCase<"Geometry", 2>;
-def SPV_C_Tessellation                              : I32EnumAttrCase<"Tessellation", 3>;
-def SPV_C_Addresses                                 : I32EnumAttrCase<"Addresses", 4>;
-def SPV_C_Linkage                                   : I32EnumAttrCase<"Linkage", 5>;
-def SPV_C_Kernel                                    : I32EnumAttrCase<"Kernel", 6>;
-def SPV_C_Vector16                                  : I32EnumAttrCase<"Vector16", 7>;
-def SPV_C_Float16Buffer                             : I32EnumAttrCase<"Float16Buffer", 8>;
-def SPV_C_Float16                                   : I32EnumAttrCase<"Float16", 9>;
-def SPV_C_Float64                                   : I32EnumAttrCase<"Float64", 10>;
-def SPV_C_Int64                                     : I32EnumAttrCase<"Int64", 11>;
-def SPV_C_Int64Atomics                              : I32EnumAttrCase<"Int64Atomics", 12>;
-def SPV_C_ImageBasic                                : I32EnumAttrCase<"ImageBasic", 13>;
-def SPV_C_ImageReadWrite                            : I32EnumAttrCase<"ImageReadWrite", 14>;
-def SPV_C_ImageMipmap                               : I32EnumAttrCase<"ImageMipmap", 15>;
-def SPV_C_Pipes                                     : I32EnumAttrCase<"Pipes", 17>;
-def SPV_C_Groups                                    : I32EnumAttrCase<"Groups", 18>;
-def SPV_C_DeviceEnqueue                             : I32EnumAttrCase<"DeviceEnqueue", 19>;
-def SPV_C_LiteralSampler                            : I32EnumAttrCase<"LiteralSampler", 20>;
-def SPV_C_AtomicStorage                             : I32EnumAttrCase<"AtomicStorage", 21>;
-def SPV_C_Int16                                     : I32EnumAttrCase<"Int16", 22>;
-def SPV_C_TessellationPointSize                     : I32EnumAttrCase<"TessellationPointSize", 23>;
-def SPV_C_GeometryPointSize                         : I32EnumAttrCase<"GeometryPointSize", 24>;
-def SPV_C_ImageGatherExtended                       : I32EnumAttrCase<"ImageGatherExtended", 25>;
-def SPV_C_StorageImageMultisample                   : I32EnumAttrCase<"StorageImageMultisample", 27>;
-def SPV_C_UniformBufferArrayDynamicIndexing         : I32EnumAttrCase<"UniformBufferArrayDynamicIndexing", 28>;
-def SPV_C_SampledImageArrayDynamicIndexing          : I32EnumAttrCase<"SampledImageArrayDynamicIndexing", 29>;
-def SPV_C_StorageBufferArrayDynamicIndexing         : I32EnumAttrCase<"StorageBufferArrayDynamicIndexing", 30>;
-def SPV_C_StorageImageArrayDynamicIndexing          : I32EnumAttrCase<"StorageImageArrayDynamicIndexing", 31>;
-def SPV_C_ClipDistance                              : I32EnumAttrCase<"ClipDistance", 32>;
-def SPV_C_CullDistance                              : I32EnumAttrCase<"CullDistance", 33>;
-def SPV_C_ImageCubeArray                            : I32EnumAttrCase<"ImageCubeArray", 34>;
-def SPV_C_SampleRateShading                         : I32EnumAttrCase<"SampleRateShading", 35>;
-def SPV_C_ImageRect                                 : I32EnumAttrCase<"ImageRect", 36>;
-def SPV_C_SampledRect                               : I32EnumAttrCase<"SampledRect", 37>;
-def SPV_C_GenericPointer                            : I32EnumAttrCase<"GenericPointer", 38>;
-def SPV_C_Int8                                      : I32EnumAttrCase<"Int8", 39>;
-def SPV_C_InputAttachment                           : I32EnumAttrCase<"InputAttachment", 40>;
-def SPV_C_SparseResidency                           : I32EnumAttrCase<"SparseResidency", 41>;
-def SPV_C_MinLod                                    : I32EnumAttrCase<"MinLod", 42>;
-def SPV_C_Sampled1D                                 : I32EnumAttrCase<"Sampled1D", 43>;
-def SPV_C_Image1D                                   : I32EnumAttrCase<"Image1D", 44>;
-def SPV_C_SampledCubeArray                          : I32EnumAttrCase<"SampledCubeArray", 45>;
-def SPV_C_SampledBuffer                             : I32EnumAttrCase<"SampledBuffer", 46>;
-def SPV_C_ImageBuffer                               : I32EnumAttrCase<"ImageBuffer", 47>;
-def SPV_C_ImageMSArray                              : I32EnumAttrCase<"ImageMSArray", 48>;
-def SPV_C_StorageImageExtendedFormats               : I32EnumAttrCase<"StorageImageExtendedFormats", 49>;
-def SPV_C_ImageQuery                                : I32EnumAttrCase<"ImageQuery", 50>;
-def SPV_C_DerivativeControl                         : I32EnumAttrCase<"DerivativeControl", 51>;
-def SPV_C_InterpolationFunction                     : I32EnumAttrCase<"InterpolationFunction", 52>;
-def SPV_C_TransformFeedback                         : I32EnumAttrCase<"TransformFeedback", 53>;
-def SPV_C_GeometryStreams                           : I32EnumAttrCase<"GeometryStreams", 54>;
-def SPV_C_StorageImageReadWithoutFormat             : I32EnumAttrCase<"StorageImageReadWithoutFormat", 55>;
-def SPV_C_StorageImageWriteWithoutFormat            : I32EnumAttrCase<"StorageImageWriteWithoutFormat", 56>;
-def SPV_C_MultiViewport                             : I32EnumAttrCase<"MultiViewport", 57>;
-def SPV_C_SubgroupDispatch                          : I32EnumAttrCase<"SubgroupDispatch", 58>;
-def SPV_C_NamedBarrier                              : I32EnumAttrCase<"NamedBarrier", 59>;
-def SPV_C_PipeStorage                               : I32EnumAttrCase<"PipeStorage", 60>;
-def SPV_C_GroupNonUniform                           : I32EnumAttrCase<"GroupNonUniform", 61>;
-def SPV_C_GroupNonUniformVote                       : I32EnumAttrCase<"GroupNonUniformVote", 62>;
-def SPV_C_GroupNonUniformArithmetic                 : I32EnumAttrCase<"GroupNonUniformArithmetic", 63>;
-def SPV_C_GroupNonUniformBallot                     : I32EnumAttrCase<"GroupNonUniformBallot", 64>;
-def SPV_C_GroupNonUniformShuffle                    : I32EnumAttrCase<"GroupNonUniformShuffle", 65>;
-def SPV_C_GroupNonUniformShuffleRelative            : I32EnumAttrCase<"GroupNonUniformShuffleRelative", 66>;
-def SPV_C_GroupNonUniformClustered                  : I32EnumAttrCase<"GroupNonUniformClustered", 67>;
-def SPV_C_GroupNonUniformQuad                       : I32EnumAttrCase<"GroupNonUniformQuad", 68>;
-def SPV_C_ShaderLayer                               : I32EnumAttrCase<"ShaderLayer", 69>;
-def SPV_C_ShaderViewportIndex                       : I32EnumAttrCase<"ShaderViewportIndex", 70>;
-def SPV_C_SubgroupBallotKHR                         : I32EnumAttrCase<"SubgroupBallotKHR", 4423>;
-def SPV_C_DrawParameters                            : I32EnumAttrCase<"DrawParameters", 4427>;
-def SPV_C_SubgroupVoteKHR                           : I32EnumAttrCase<"SubgroupVoteKHR", 4431>;
-def SPV_C_StorageBuffer16BitAccess                  : I32EnumAttrCase<"StorageBuffer16BitAccess", 4433>;
-def SPV_C_StorageUniform16                          : I32EnumAttrCase<"StorageUniform16", 4434>;
-def SPV_C_StoragePushConstant16                     : I32EnumAttrCase<"StoragePushConstant16", 4435>;
-def SPV_C_StorageInputOutput16                      : I32EnumAttrCase<"StorageInputOutput16", 4436>;
-def SPV_C_DeviceGroup                               : I32EnumAttrCase<"DeviceGroup", 4437>;
-def SPV_C_MultiView                                 : I32EnumAttrCase<"MultiView", 4439>;
-def SPV_C_VariablePointersStorageBuffer             : I32EnumAttrCase<"VariablePointersStorageBuffer", 4441>;
-def SPV_C_VariablePointers                          : I32EnumAttrCase<"VariablePointers", 4442>;
-def SPV_C_AtomicStorageOps                          : I32EnumAttrCase<"AtomicStorageOps", 4445>;
-def SPV_C_SampleMaskPostDepthCoverage               : I32EnumAttrCase<"SampleMaskPostDepthCoverage", 4447>;
-def SPV_C_StorageBuffer8BitAccess                   : I32EnumAttrCase<"StorageBuffer8BitAccess", 4448>;
-def SPV_C_UniformAndStorageBuffer8BitAccess         : I32EnumAttrCase<"UniformAndStorageBuffer8BitAccess", 4449>;
-def SPV_C_StoragePushConstant8                      : I32EnumAttrCase<"StoragePushConstant8", 4450>;
-def SPV_C_DenormPreserve                            : I32EnumAttrCase<"DenormPreserve", 4464>;
-def SPV_C_DenormFlushToZero                         : I32EnumAttrCase<"DenormFlushToZero", 4465>;
-def SPV_C_SignedZeroInfNanPreserve                  : I32EnumAttrCase<"SignedZeroInfNanPreserve", 4466>;
-def SPV_C_RoundingModeRTE                           : I32EnumAttrCase<"RoundingModeRTE", 4467>;
-def SPV_C_RoundingModeRTZ                           : I32EnumAttrCase<"RoundingModeRTZ", 4468>;
-def SPV_C_Float16ImageAMD                           : I32EnumAttrCase<"Float16ImageAMD", 5008>;
-def SPV_C_ImageGatherBiasLodAMD                     : I32EnumAttrCase<"ImageGatherBiasLodAMD", 5009>;
-def SPV_C_FragmentMaskAMD                           : I32EnumAttrCase<"FragmentMaskAMD", 5010>;
-def SPV_C_StencilExportEXT                          : I32EnumAttrCase<"StencilExportEXT", 5013>;
-def SPV_C_ImageReadWriteLodAMD                      : I32EnumAttrCase<"ImageReadWriteLodAMD", 5015>;
-def SPV_C_ShaderClockKHR                            : I32EnumAttrCase<"ShaderClockKHR", 5055>;
-def SPV_C_SampleMaskOverrideCoverageNV              : I32EnumAttrCase<"SampleMaskOverrideCoverageNV", 5249>;
-def SPV_C_GeometryShaderPassthroughNV               : I32EnumAttrCase<"GeometryShaderPassthroughNV", 5251>;
-def SPV_C_ShaderViewportIndexLayerEXT               : I32EnumAttrCase<"ShaderViewportIndexLayerEXT", 5254>;
-def SPV_C_ShaderViewportMaskNV                      : I32EnumAttrCase<"ShaderViewportMaskNV", 5255>;
-def SPV_C_ShaderStereoViewNV                        : I32EnumAttrCase<"ShaderStereoViewNV", 5259>;
-def SPV_C_PerViewAttributesNV                       : I32EnumAttrCase<"PerViewAttributesNV", 5260>;
-def SPV_C_FragmentFullyCoveredEXT                   : I32EnumAttrCase<"FragmentFullyCoveredEXT", 5265>;
-def SPV_C_MeshShadingNV                             : I32EnumAttrCase<"MeshShadingNV", 5266>;
-def SPV_C_ImageFootprintNV                          : I32EnumAttrCase<"ImageFootprintNV", 5282>;
-def SPV_C_FragmentBarycentricNV                     : I32EnumAttrCase<"FragmentBarycentricNV", 5284>;
-def SPV_C_ComputeDerivativeGroupQuadsNV             : I32EnumAttrCase<"ComputeDerivativeGroupQuadsNV", 5288>;
-def SPV_C_FragmentDensityEXT                        : I32EnumAttrCase<"FragmentDensityEXT", 5291>;
-def SPV_C_GroupNonUniformPartitionedNV              : I32EnumAttrCase<"GroupNonUniformPartitionedNV", 5297>;
-def SPV_C_ShaderNonUniform                          : I32EnumAttrCase<"ShaderNonUniform", 5301>;
-def SPV_C_RuntimeDescriptorArray                    : I32EnumAttrCase<"RuntimeDescriptorArray", 5302>;
-def SPV_C_InputAttachmentArrayDynamicIndexing       : I32EnumAttrCase<"InputAttachmentArrayDynamicIndexing", 5303>;
-def SPV_C_UniformTexelBufferArrayDynamicIndexing    : I32EnumAttrCase<"UniformTexelBufferArrayDynamicIndexing", 5304>;
-def SPV_C_StorageTexelBufferArrayDynamicIndexing    : I32EnumAttrCase<"StorageTexelBufferArrayDynamicIndexing", 5305>;
-def SPV_C_UniformBufferArrayNonUniformIndexing      : I32EnumAttrCase<"UniformBufferArrayNonUniformIndexing", 5306>;
-def SPV_C_SampledImageArrayNonUniformIndexing       : I32EnumAttrCase<"SampledImageArrayNonUniformIndexing", 5307>;
-def SPV_C_StorageBufferArrayNonUniformIndexing      : I32EnumAttrCase<"StorageBufferArrayNonUniformIndexing", 5308>;
-def SPV_C_StorageImageArrayNonUniformIndexing       : I32EnumAttrCase<"StorageImageArrayNonUniformIndexing", 5309>;
-def SPV_C_InputAttachmentArrayNonUniformIndexing    : I32EnumAttrCase<"InputAttachmentArrayNonUniformIndexing", 5310>;
-def SPV_C_UniformTexelBufferArrayNonUniformIndexing : I32EnumAttrCase<"UniformTexelBufferArrayNonUniformIndexing", 5311>;
-def SPV_C_StorageTexelBufferArrayNonUniformIndexing : I32EnumAttrCase<"StorageTexelBufferArrayNonUniformIndexing", 5312>;
-def SPV_C_RayTracingNV                              : I32EnumAttrCase<"RayTracingNV", 5340>;
-def SPV_C_VulkanMemoryModel                         : I32EnumAttrCase<"VulkanMemoryModel", 5345>;
-def SPV_C_VulkanMemoryModelDeviceScope              : I32EnumAttrCase<"VulkanMemoryModelDeviceScope", 5346>;
-def SPV_C_PhysicalStorageBufferAddresses            : I32EnumAttrCase<"PhysicalStorageBufferAddresses", 5347>;
-def SPV_C_ComputeDerivativeGroupLinearNV            : I32EnumAttrCase<"ComputeDerivativeGroupLinearNV", 5350>;
-def SPV_C_CooperativeMatrixNV                       : I32EnumAttrCase<"CooperativeMatrixNV", 5357>;
-def SPV_C_FragmentShaderSampleInterlockEXT          : I32EnumAttrCase<"FragmentShaderSampleInterlockEXT", 5363>;
-def SPV_C_FragmentShaderShadingRateInterlockEXT     : I32EnumAttrCase<"FragmentShaderShadingRateInterlockEXT", 5372>;
-def SPV_C_ShaderSMBuiltinsNV                        : I32EnumAttrCase<"ShaderSMBuiltinsNV", 5373>;
-def SPV_C_FragmentShaderPixelInterlockEXT           : I32EnumAttrCase<"FragmentShaderPixelInterlockEXT", 5378>;
-def SPV_C_DemoteToHelperInvocationEXT               : I32EnumAttrCase<"DemoteToHelperInvocationEXT", 5379>;
-def SPV_C_SubgroupShuffleINTEL                      : I32EnumAttrCase<"SubgroupShuffleINTEL", 5568>;
-def SPV_C_SubgroupBufferBlockIOINTEL                : I32EnumAttrCase<"SubgroupBufferBlockIOINTEL", 5569>;
-def SPV_C_SubgroupImageBlockIOINTEL                 : I32EnumAttrCase<"SubgroupImageBlockIOINTEL", 5570>;
-def SPV_C_SubgroupImageMediaBlockIOINTEL            : I32EnumAttrCase<"SubgroupImageMediaBlockIOINTEL", 5579>;
-def SPV_C_IntegerFunctions2INTEL                    : I32EnumAttrCase<"IntegerFunctions2INTEL", 5584>;
-def SPV_C_SubgroupAvcMotionEstimationINTEL          : I32EnumAttrCase<"SubgroupAvcMotionEstimationINTEL", 5696>;
-def SPV_C_SubgroupAvcMotionEstimationIntraINTEL     : I32EnumAttrCase<"SubgroupAvcMotionEstimationIntraINTEL", 5697>;
-def SPV_C_SubgroupAvcMotionEstimationChromaINTEL    : I32EnumAttrCase<"SubgroupAvcMotionEstimationChromaINTEL", 5698>;
-
-def SPV_CapabilityAttr :
-    I32EnumAttr<"Capability", "valid SPIR-V Capability", [
-      SPV_C_Matrix, SPV_C_Shader, SPV_C_Geometry, SPV_C_Tessellation,
-      SPV_C_Addresses, SPV_C_Linkage, SPV_C_Kernel, SPV_C_Vector16,
-      SPV_C_Float16Buffer, SPV_C_Float16, SPV_C_Float64, SPV_C_Int64,
-      SPV_C_Int64Atomics, SPV_C_ImageBasic, SPV_C_ImageReadWrite, SPV_C_ImageMipmap,
-      SPV_C_Pipes, SPV_C_Groups, SPV_C_DeviceEnqueue, SPV_C_LiteralSampler,
-      SPV_C_AtomicStorage, SPV_C_Int16, SPV_C_TessellationPointSize,
-      SPV_C_GeometryPointSize, SPV_C_ImageGatherExtended,
-      SPV_C_StorageImageMultisample, SPV_C_UniformBufferArrayDynamicIndexing,
-      SPV_C_SampledImageArrayDynamicIndexing,
-      SPV_C_StorageBufferArrayDynamicIndexing,
-      SPV_C_StorageImageArrayDynamicIndexing, SPV_C_ClipDistance, SPV_C_CullDistance,
-      SPV_C_ImageCubeArray, SPV_C_SampleRateShading, SPV_C_ImageRect,
-      SPV_C_SampledRect, SPV_C_GenericPointer, SPV_C_Int8, SPV_C_InputAttachment,
-      SPV_C_SparseResidency, SPV_C_MinLod, SPV_C_Sampled1D, SPV_C_Image1D,
-      SPV_C_SampledCubeArray, SPV_C_SampledBuffer, SPV_C_ImageBuffer,
-      SPV_C_ImageMSArray, SPV_C_StorageImageExtendedFormats, SPV_C_ImageQuery,
-      SPV_C_DerivativeControl, SPV_C_InterpolationFunction, SPV_C_TransformFeedback,
-      SPV_C_GeometryStreams, SPV_C_StorageImageReadWithoutFormat,
-      SPV_C_StorageImageWriteWithoutFormat, SPV_C_MultiViewport,
-      SPV_C_SubgroupDispatch, SPV_C_NamedBarrier, SPV_C_PipeStorage,
-      SPV_C_GroupNonUniform, SPV_C_GroupNonUniformVote,
-      SPV_C_GroupNonUniformArithmetic, SPV_C_GroupNonUniformBallot,
-      SPV_C_GroupNonUniformShuffle, SPV_C_GroupNonUniformShuffleRelative,
-      SPV_C_GroupNonUniformClustered, SPV_C_GroupNonUniformQuad, SPV_C_ShaderLayer,
-      SPV_C_ShaderViewportIndex, SPV_C_SubgroupBallotKHR, SPV_C_DrawParameters,
-      SPV_C_SubgroupVoteKHR, SPV_C_StorageBuffer16BitAccess, SPV_C_StorageUniform16,
-      SPV_C_StoragePushConstant16, SPV_C_StorageInputOutput16, SPV_C_DeviceGroup,
-      SPV_C_MultiView, SPV_C_VariablePointersStorageBuffer, SPV_C_VariablePointers,
-      SPV_C_AtomicStorageOps, SPV_C_SampleMaskPostDepthCoverage,
-      SPV_C_StorageBuffer8BitAccess, SPV_C_UniformAndStorageBuffer8BitAccess,
-      SPV_C_StoragePushConstant8, SPV_C_DenormPreserve, SPV_C_DenormFlushToZero,
-      SPV_C_SignedZeroInfNanPreserve, SPV_C_RoundingModeRTE, SPV_C_RoundingModeRTZ,
-      SPV_C_Float16ImageAMD, SPV_C_ImageGatherBiasLodAMD, SPV_C_FragmentMaskAMD,
-      SPV_C_StencilExportEXT, SPV_C_ImageReadWriteLodAMD, SPV_C_ShaderClockKHR,
-      SPV_C_SampleMaskOverrideCoverageNV, SPV_C_GeometryShaderPassthroughNV,
-      SPV_C_ShaderViewportIndexLayerEXT, SPV_C_ShaderViewportMaskNV,
-      SPV_C_ShaderStereoViewNV, SPV_C_PerViewAttributesNV,
-      SPV_C_FragmentFullyCoveredEXT, SPV_C_MeshShadingNV, SPV_C_ImageFootprintNV,
-      SPV_C_FragmentBarycentricNV, SPV_C_ComputeDerivativeGroupQuadsNV,
-      SPV_C_FragmentDensityEXT, SPV_C_GroupNonUniformPartitionedNV,
-      SPV_C_ShaderNonUniform, SPV_C_RuntimeDescriptorArray,
-      SPV_C_InputAttachmentArrayDynamicIndexing,
-      SPV_C_UniformTexelBufferArrayDynamicIndexing,
-      SPV_C_StorageTexelBufferArrayDynamicIndexing,
-      SPV_C_UniformBufferArrayNonUniformIndexing,
-      SPV_C_SampledImageArrayNonUniformIndexing,
-      SPV_C_StorageBufferArrayNonUniformIndexing,
-      SPV_C_StorageImageArrayNonUniformIndexing,
-      SPV_C_InputAttachmentArrayNonUniformIndexing,
-      SPV_C_UniformTexelBufferArrayNonUniformIndexing,
-      SPV_C_StorageTexelBufferArrayNonUniformIndexing, SPV_C_RayTracingNV,
-      SPV_C_VulkanMemoryModel, SPV_C_VulkanMemoryModelDeviceScope,
-      SPV_C_PhysicalStorageBufferAddresses, SPV_C_ComputeDerivativeGroupLinearNV,
-      SPV_C_CooperativeMatrixNV, SPV_C_FragmentShaderSampleInterlockEXT,
-      SPV_C_FragmentShaderShadingRateInterlockEXT, SPV_C_ShaderSMBuiltinsNV,
-      SPV_C_FragmentShaderPixelInterlockEXT, SPV_C_DemoteToHelperInvocationEXT,
-      SPV_C_SubgroupShuffleINTEL, SPV_C_SubgroupBufferBlockIOINTEL,
-      SPV_C_SubgroupImageBlockIOINTEL, SPV_C_SubgroupImageMediaBlockIOINTEL,
-      SPV_C_IntegerFunctions2INTEL, SPV_C_SubgroupAvcMotionEstimationINTEL,
-      SPV_C_SubgroupAvcMotionEstimationIntraINTEL,
-      SPV_C_SubgroupAvcMotionEstimationChromaINTEL
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_AM_Logical                 : I32EnumAttrCase<"Logical", 0>;
-def SPV_AM_Physical32              : I32EnumAttrCase<"Physical32", 1>;
-def SPV_AM_Physical64              : I32EnumAttrCase<"Physical64", 2>;
-def SPV_AM_PhysicalStorageBuffer64 : I32EnumAttrCase<"PhysicalStorageBuffer64", 5348>;
-
-def SPV_AddressingModelAttr :
-    I32EnumAttr<"AddressingModel", "valid SPIR-V AddressingModel", [
-      SPV_AM_Logical, SPV_AM_Physical32, SPV_AM_Physical64,
-      SPV_AM_PhysicalStorageBuffer64
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_BI_Position                    : I32EnumAttrCase<"Position", 0>;
-def SPV_BI_PointSize                   : I32EnumAttrCase<"PointSize", 1>;
-def SPV_BI_ClipDistance                : I32EnumAttrCase<"ClipDistance", 3>;
-def SPV_BI_CullDistance                : I32EnumAttrCase<"CullDistance", 4>;
-def SPV_BI_VertexId                    : I32EnumAttrCase<"VertexId", 5>;
-def SPV_BI_InstanceId                  : I32EnumAttrCase<"InstanceId", 6>;
-def SPV_BI_PrimitiveId                 : I32EnumAttrCase<"PrimitiveId", 7>;
-def SPV_BI_InvocationId                : I32EnumAttrCase<"InvocationId", 8>;
-def SPV_BI_Layer                       : I32EnumAttrCase<"Layer", 9>;
-def SPV_BI_ViewportIndex               : I32EnumAttrCase<"ViewportIndex", 10>;
-def SPV_BI_TessLevelOuter              : I32EnumAttrCase<"TessLevelOuter", 11>;
-def SPV_BI_TessLevelInner              : I32EnumAttrCase<"TessLevelInner", 12>;
-def SPV_BI_TessCoord                   : I32EnumAttrCase<"TessCoord", 13>;
-def SPV_BI_PatchVertices               : I32EnumAttrCase<"PatchVertices", 14>;
-def SPV_BI_FragCoord                   : I32EnumAttrCase<"FragCoord", 15>;
-def SPV_BI_PointCoord                  : I32EnumAttrCase<"PointCoord", 16>;
-def SPV_BI_FrontFacing                 : I32EnumAttrCase<"FrontFacing", 17>;
-def SPV_BI_SampleId                    : I32EnumAttrCase<"SampleId", 18>;
-def SPV_BI_SamplePosition              : I32EnumAttrCase<"SamplePosition", 19>;
-def SPV_BI_SampleMask                  : I32EnumAttrCase<"SampleMask", 20>;
-def SPV_BI_FragDepth                   : I32EnumAttrCase<"FragDepth", 22>;
-def SPV_BI_HelperInvocation            : I32EnumAttrCase<"HelperInvocation", 23>;
-def SPV_BI_NumWorkgroups               : I32EnumAttrCase<"NumWorkgroups", 24>;
-def SPV_BI_WorkgroupSize               : I32EnumAttrCase<"WorkgroupSize", 25>;
-def SPV_BI_WorkgroupId                 : I32EnumAttrCase<"WorkgroupId", 26>;
-def SPV_BI_LocalInvocationId           : I32EnumAttrCase<"LocalInvocationId", 27>;
-def SPV_BI_GlobalInvocationId          : I32EnumAttrCase<"GlobalInvocationId", 28>;
-def SPV_BI_LocalInvocationIndex        : I32EnumAttrCase<"LocalInvocationIndex", 29>;
-def SPV_BI_WorkDim                     : I32EnumAttrCase<"WorkDim", 30>;
-def SPV_BI_GlobalSize                  : I32EnumAttrCase<"GlobalSize", 31>;
-def SPV_BI_EnqueuedWorkgroupSize       : I32EnumAttrCase<"EnqueuedWorkgroupSize", 32>;
-def SPV_BI_GlobalOffset                : I32EnumAttrCase<"GlobalOffset", 33>;
-def SPV_BI_GlobalLinearId              : I32EnumAttrCase<"GlobalLinearId", 34>;
-def SPV_BI_SubgroupSize                : I32EnumAttrCase<"SubgroupSize", 36>;
-def SPV_BI_SubgroupMaxSize             : I32EnumAttrCase<"SubgroupMaxSize", 37>;
-def SPV_BI_NumSubgroups                : I32EnumAttrCase<"NumSubgroups", 38>;
-def SPV_BI_NumEnqueuedSubgroups        : I32EnumAttrCase<"NumEnqueuedSubgroups", 39>;
-def SPV_BI_SubgroupId                  : I32EnumAttrCase<"SubgroupId", 40>;
-def SPV_BI_SubgroupLocalInvocationId   : I32EnumAttrCase<"SubgroupLocalInvocationId", 41>;
-def SPV_BI_VertexIndex                 : I32EnumAttrCase<"VertexIndex", 42>;
-def SPV_BI_InstanceIndex               : I32EnumAttrCase<"InstanceIndex", 43>;
-def SPV_BI_SubgroupEqMask              : I32EnumAttrCase<"SubgroupEqMask", 4416>;
-def SPV_BI_SubgroupGeMask              : I32EnumAttrCase<"SubgroupGeMask", 4417>;
-def SPV_BI_SubgroupGtMask              : I32EnumAttrCase<"SubgroupGtMask", 4418>;
-def SPV_BI_SubgroupLeMask              : I32EnumAttrCase<"SubgroupLeMask", 4419>;
-def SPV_BI_SubgroupLtMask              : I32EnumAttrCase<"SubgroupLtMask", 4420>;
-def SPV_BI_BaseVertex                  : I32EnumAttrCase<"BaseVertex", 4424>;
-def SPV_BI_BaseInstance                : I32EnumAttrCase<"BaseInstance", 4425>;
-def SPV_BI_DrawIndex                   : I32EnumAttrCase<"DrawIndex", 4426>;
-def SPV_BI_DeviceIndex                 : I32EnumAttrCase<"DeviceIndex", 4438>;
-def SPV_BI_ViewIndex                   : I32EnumAttrCase<"ViewIndex", 4440>;
-def SPV_BI_BaryCoordNoPerspAMD         : I32EnumAttrCase<"BaryCoordNoPerspAMD", 4992>;
-def SPV_BI_BaryCoordNoPerspCentroidAMD : I32EnumAttrCase<"BaryCoordNoPerspCentroidAMD", 4993>;
-def SPV_BI_BaryCoordNoPerspSampleAMD   : I32EnumAttrCase<"BaryCoordNoPerspSampleAMD", 4994>;
-def SPV_BI_BaryCoordSmoothAMD          : I32EnumAttrCase<"BaryCoordSmoothAMD", 4995>;
-def SPV_BI_BaryCoordSmoothCentroidAMD  : I32EnumAttrCase<"BaryCoordSmoothCentroidAMD", 4996>;
-def SPV_BI_BaryCoordSmoothSampleAMD    : I32EnumAttrCase<"BaryCoordSmoothSampleAMD", 4997>;
-def SPV_BI_BaryCoordPullModelAMD       : I32EnumAttrCase<"BaryCoordPullModelAMD", 4998>;
-def SPV_BI_FragStencilRefEXT           : I32EnumAttrCase<"FragStencilRefEXT", 5014>;
-def SPV_BI_ViewportMaskNV              : I32EnumAttrCase<"ViewportMaskNV", 5253>;
-def SPV_BI_SecondaryPositionNV         : I32EnumAttrCase<"SecondaryPositionNV", 5257>;
-def SPV_BI_SecondaryViewportMaskNV     : I32EnumAttrCase<"SecondaryViewportMaskNV", 5258>;
-def SPV_BI_PositionPerViewNV           : I32EnumAttrCase<"PositionPerViewNV", 5261>;
-def SPV_BI_ViewportMaskPerViewNV       : I32EnumAttrCase<"ViewportMaskPerViewNV", 5262>;
-def SPV_BI_FullyCoveredEXT             : I32EnumAttrCase<"FullyCoveredEXT", 5264>;
-def SPV_BI_TaskCountNV                 : I32EnumAttrCase<"TaskCountNV", 5274>;
-def SPV_BI_PrimitiveCountNV            : I32EnumAttrCase<"PrimitiveCountNV", 5275>;
-def SPV_BI_PrimitiveIndicesNV          : I32EnumAttrCase<"PrimitiveIndicesNV", 5276>;
-def SPV_BI_ClipDistancePerViewNV       : I32EnumAttrCase<"ClipDistancePerViewNV", 5277>;
-def SPV_BI_CullDistancePerViewNV       : I32EnumAttrCase<"CullDistancePerViewNV", 5278>;
-def SPV_BI_LayerPerViewNV              : I32EnumAttrCase<"LayerPerViewNV", 5279>;
-def SPV_BI_MeshViewCountNV             : I32EnumAttrCase<"MeshViewCountNV", 5280>;
-def SPV_BI_MeshViewIndicesNV           : I32EnumAttrCase<"MeshViewIndicesNV", 5281>;
-def SPV_BI_BaryCoordNV                 : I32EnumAttrCase<"BaryCoordNV", 5286>;
-def SPV_BI_BaryCoordNoPerspNV          : I32EnumAttrCase<"BaryCoordNoPerspNV", 5287>;
-def SPV_BI_FragSizeEXT                 : I32EnumAttrCase<"FragSizeEXT", 5292>;
-def SPV_BI_FragInvocationCountEXT      : I32EnumAttrCase<"FragInvocationCountEXT", 5293>;
-def SPV_BI_LaunchIdNV                  : I32EnumAttrCase<"LaunchIdNV", 5319>;
-def SPV_BI_LaunchSizeNV                : I32EnumAttrCase<"LaunchSizeNV", 5320>;
-def SPV_BI_WorldRayOriginNV            : I32EnumAttrCase<"WorldRayOriginNV", 5321>;
-def SPV_BI_WorldRayDirectionNV         : I32EnumAttrCase<"WorldRayDirectionNV", 5322>;
-def SPV_BI_ObjectRayOriginNV           : I32EnumAttrCase<"ObjectRayOriginNV", 5323>;
-def SPV_BI_ObjectRayDirectionNV        : I32EnumAttrCase<"ObjectRayDirectionNV", 5324>;
-def SPV_BI_RayTminNV                   : I32EnumAttrCase<"RayTminNV", 5325>;
-def SPV_BI_RayTmaxNV                   : I32EnumAttrCase<"RayTmaxNV", 5326>;
-def SPV_BI_InstanceCustomIndexNV       : I32EnumAttrCase<"InstanceCustomIndexNV", 5327>;
-def SPV_BI_ObjectToWorldNV             : I32EnumAttrCase<"ObjectToWorldNV", 5330>;
-def SPV_BI_WorldToObjectNV             : I32EnumAttrCase<"WorldToObjectNV", 5331>;
-def SPV_BI_HitTNV                      : I32EnumAttrCase<"HitTNV", 5332>;
-def SPV_BI_HitKindNV                   : I32EnumAttrCase<"HitKindNV", 5333>;
-def SPV_BI_IncomingRayFlagsNV          : I32EnumAttrCase<"IncomingRayFlagsNV", 5351>;
-def SPV_BI_WarpsPerSMNV                : I32EnumAttrCase<"WarpsPerSMNV", 5374>;
-def SPV_BI_SMCountNV                   : I32EnumAttrCase<"SMCountNV", 5375>;
-def SPV_BI_WarpIDNV                    : I32EnumAttrCase<"WarpIDNV", 5376>;
-def SPV_BI_SMIDNV                      : I32EnumAttrCase<"SMIDNV", 5377>;
-
-def SPV_BuiltInAttr :
-    I32EnumAttr<"BuiltIn", "valid SPIR-V BuiltIn", [
-      SPV_BI_Position, SPV_BI_PointSize, SPV_BI_ClipDistance, SPV_BI_CullDistance,
-      SPV_BI_VertexId, SPV_BI_InstanceId, SPV_BI_PrimitiveId, SPV_BI_InvocationId,
-      SPV_BI_Layer, SPV_BI_ViewportIndex, SPV_BI_TessLevelOuter,
-      SPV_BI_TessLevelInner, SPV_BI_TessCoord, SPV_BI_PatchVertices,
-      SPV_BI_FragCoord, SPV_BI_PointCoord, SPV_BI_FrontFacing, SPV_BI_SampleId,
-      SPV_BI_SamplePosition, SPV_BI_SampleMask, SPV_BI_FragDepth,
-      SPV_BI_HelperInvocation, SPV_BI_NumWorkgroups, SPV_BI_WorkgroupSize,
-      SPV_BI_WorkgroupId, SPV_BI_LocalInvocationId, SPV_BI_GlobalInvocationId,
-      SPV_BI_LocalInvocationIndex, SPV_BI_WorkDim, SPV_BI_GlobalSize,
-      SPV_BI_EnqueuedWorkgroupSize, SPV_BI_GlobalOffset, SPV_BI_GlobalLinearId,
-      SPV_BI_SubgroupSize, SPV_BI_SubgroupMaxSize, SPV_BI_NumSubgroups,
-      SPV_BI_NumEnqueuedSubgroups, SPV_BI_SubgroupId,
-      SPV_BI_SubgroupLocalInvocationId, SPV_BI_VertexIndex, SPV_BI_InstanceIndex,
-      SPV_BI_SubgroupEqMask, SPV_BI_SubgroupGeMask, SPV_BI_SubgroupGtMask,
-      SPV_BI_SubgroupLeMask, SPV_BI_SubgroupLtMask, SPV_BI_BaseVertex,
-      SPV_BI_BaseInstance, SPV_BI_DrawIndex, SPV_BI_DeviceIndex, SPV_BI_ViewIndex,
-      SPV_BI_BaryCoordNoPerspAMD, SPV_BI_BaryCoordNoPerspCentroidAMD,
-      SPV_BI_BaryCoordNoPerspSampleAMD, SPV_BI_BaryCoordSmoothAMD,
-      SPV_BI_BaryCoordSmoothCentroidAMD, SPV_BI_BaryCoordSmoothSampleAMD,
-      SPV_BI_BaryCoordPullModelAMD, SPV_BI_FragStencilRefEXT, SPV_BI_ViewportMaskNV,
-      SPV_BI_SecondaryPositionNV, SPV_BI_SecondaryViewportMaskNV,
-      SPV_BI_PositionPerViewNV, SPV_BI_ViewportMaskPerViewNV, SPV_BI_FullyCoveredEXT,
-      SPV_BI_TaskCountNV, SPV_BI_PrimitiveCountNV, SPV_BI_PrimitiveIndicesNV,
-      SPV_BI_ClipDistancePerViewNV, SPV_BI_CullDistancePerViewNV,
-      SPV_BI_LayerPerViewNV, SPV_BI_MeshViewCountNV, SPV_BI_MeshViewIndicesNV,
-      SPV_BI_BaryCoordNV, SPV_BI_BaryCoordNoPerspNV, SPV_BI_FragSizeEXT,
-      SPV_BI_FragInvocationCountEXT, SPV_BI_LaunchIdNV, SPV_BI_LaunchSizeNV,
-      SPV_BI_WorldRayOriginNV, SPV_BI_WorldRayDirectionNV, SPV_BI_ObjectRayOriginNV,
-      SPV_BI_ObjectRayDirectionNV, SPV_BI_RayTminNV, SPV_BI_RayTmaxNV,
-      SPV_BI_InstanceCustomIndexNV, SPV_BI_ObjectToWorldNV, SPV_BI_WorldToObjectNV,
-      SPV_BI_HitTNV, SPV_BI_HitKindNV, SPV_BI_IncomingRayFlagsNV,
-      SPV_BI_WarpsPerSMNV, SPV_BI_SMCountNV, SPV_BI_WarpIDNV, SPV_BI_SMIDNV
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_D_RelaxedPrecision            : I32EnumAttrCase<"RelaxedPrecision", 0>;
-def SPV_D_SpecId                      : I32EnumAttrCase<"SpecId", 1>;
-def SPV_D_Block                       : I32EnumAttrCase<"Block", 2>;
-def SPV_D_BufferBlock                 : I32EnumAttrCase<"BufferBlock", 3>;
-def SPV_D_RowMajor                    : I32EnumAttrCase<"RowMajor", 4>;
-def SPV_D_ColMajor                    : I32EnumAttrCase<"ColMajor", 5>;
-def SPV_D_ArrayStride                 : I32EnumAttrCase<"ArrayStride", 6>;
-def SPV_D_MatrixStride                : I32EnumAttrCase<"MatrixStride", 7>;
-def SPV_D_GLSLShared                  : I32EnumAttrCase<"GLSLShared", 8>;
-def SPV_D_GLSLPacked                  : I32EnumAttrCase<"GLSLPacked", 9>;
-def SPV_D_CPacked                     : I32EnumAttrCase<"CPacked", 10>;
-def SPV_D_BuiltIn                     : I32EnumAttrCase<"BuiltIn", 11>;
-def SPV_D_NoPerspective               : I32EnumAttrCase<"NoPerspective", 13>;
-def SPV_D_Flat                        : I32EnumAttrCase<"Flat", 14>;
-def SPV_D_Patch                       : I32EnumAttrCase<"Patch", 15>;
-def SPV_D_Centroid                    : I32EnumAttrCase<"Centroid", 16>;
-def SPV_D_Sample                      : I32EnumAttrCase<"Sample", 17>;
-def SPV_D_Invariant                   : I32EnumAttrCase<"Invariant", 18>;
-def SPV_D_Restrict                    : I32EnumAttrCase<"Restrict", 19>;
-def SPV_D_Aliased                     : I32EnumAttrCase<"Aliased", 20>;
-def SPV_D_Volatile                    : I32EnumAttrCase<"Volatile", 21>;
-def SPV_D_Constant                    : I32EnumAttrCase<"Constant", 22>;
-def SPV_D_Coherent                    : I32EnumAttrCase<"Coherent", 23>;
-def SPV_D_NonWritable                 : I32EnumAttrCase<"NonWritable", 24>;
-def SPV_D_NonReadable                 : I32EnumAttrCase<"NonReadable", 25>;
-def SPV_D_Uniform                     : I32EnumAttrCase<"Uniform", 26>;
-def SPV_D_UniformId                   : I32EnumAttrCase<"UniformId", 27>;
-def SPV_D_SaturatedConversion         : I32EnumAttrCase<"SaturatedConversion", 28>;
-def SPV_D_Stream                      : I32EnumAttrCase<"Stream", 29>;
-def SPV_D_Location                    : I32EnumAttrCase<"Location", 30>;
-def SPV_D_Component                   : I32EnumAttrCase<"Component", 31>;
-def SPV_D_Index                       : I32EnumAttrCase<"Index", 32>;
-def SPV_D_Binding                     : I32EnumAttrCase<"Binding", 33>;
-def SPV_D_DescriptorSet               : I32EnumAttrCase<"DescriptorSet", 34>;
-def SPV_D_Offset                      : I32EnumAttrCase<"Offset", 35>;
-def SPV_D_XfbBuffer                   : I32EnumAttrCase<"XfbBuffer", 36>;
-def SPV_D_XfbStride                   : I32EnumAttrCase<"XfbStride", 37>;
-def SPV_D_FuncParamAttr               : I32EnumAttrCase<"FuncParamAttr", 38>;
-def SPV_D_FPRoundingMode              : I32EnumAttrCase<"FPRoundingMode", 39>;
-def SPV_D_FPFastMathMode              : I32EnumAttrCase<"FPFastMathMode", 40>;
-def SPV_D_LinkageAttributes           : I32EnumAttrCase<"LinkageAttributes", 41>;
-def SPV_D_NoContraction               : I32EnumAttrCase<"NoContraction", 42>;
-def SPV_D_InputAttachmentIndex        : I32EnumAttrCase<"InputAttachmentIndex", 43>;
-def SPV_D_Alignment                   : I32EnumAttrCase<"Alignment", 44>;
-def SPV_D_MaxByteOffset               : I32EnumAttrCase<"MaxByteOffset", 45>;
-def SPV_D_AlignmentId                 : I32EnumAttrCase<"AlignmentId", 46>;
-def SPV_D_MaxByteOffsetId             : I32EnumAttrCase<"MaxByteOffsetId", 47>;
-def SPV_D_NoSignedWrap                : I32EnumAttrCase<"NoSignedWrap", 4469>;
-def SPV_D_NoUnsignedWrap              : I32EnumAttrCase<"NoUnsignedWrap", 4470>;
-def SPV_D_ExplicitInterpAMD           : I32EnumAttrCase<"ExplicitInterpAMD", 4999>;
-def SPV_D_OverrideCoverageNV          : I32EnumAttrCase<"OverrideCoverageNV", 5248>;
-def SPV_D_PassthroughNV               : I32EnumAttrCase<"PassthroughNV", 5250>;
-def SPV_D_ViewportRelativeNV          : I32EnumAttrCase<"ViewportRelativeNV", 5252>;
-def SPV_D_SecondaryViewportRelativeNV : I32EnumAttrCase<"SecondaryViewportRelativeNV", 5256>;
-def SPV_D_PerPrimitiveNV              : I32EnumAttrCase<"PerPrimitiveNV", 5271>;
-def SPV_D_PerViewNV                   : I32EnumAttrCase<"PerViewNV", 5272>;
-def SPV_D_PerTaskNV                   : I32EnumAttrCase<"PerTaskNV", 5273>;
-def SPV_D_PerVertexNV                 : I32EnumAttrCase<"PerVertexNV", 5285>;
-def SPV_D_NonUniform                  : I32EnumAttrCase<"NonUniform", 5300>;
-def SPV_D_RestrictPointer             : I32EnumAttrCase<"RestrictPointer", 5355>;
-def SPV_D_AliasedPointer              : I32EnumAttrCase<"AliasedPointer", 5356>;
-def SPV_D_CounterBuffer               : I32EnumAttrCase<"CounterBuffer", 5634>;
-def SPV_D_UserSemantic                : I32EnumAttrCase<"UserSemantic", 5635>;
-def SPV_D_UserTypeGOOGLE              : I32EnumAttrCase<"UserTypeGOOGLE", 5636>;
-
-def SPV_DecorationAttr :
-    I32EnumAttr<"Decoration", "valid SPIR-V Decoration", [
-      SPV_D_RelaxedPrecision, SPV_D_SpecId, SPV_D_Block, SPV_D_BufferBlock,
-      SPV_D_RowMajor, SPV_D_ColMajor, SPV_D_ArrayStride, SPV_D_MatrixStride,
-      SPV_D_GLSLShared, SPV_D_GLSLPacked, SPV_D_CPacked, SPV_D_BuiltIn,
-      SPV_D_NoPerspective, SPV_D_Flat, SPV_D_Patch, SPV_D_Centroid, SPV_D_Sample,
-      SPV_D_Invariant, SPV_D_Restrict, SPV_D_Aliased, SPV_D_Volatile, SPV_D_Constant,
-      SPV_D_Coherent, SPV_D_NonWritable, SPV_D_NonReadable, SPV_D_Uniform,
-      SPV_D_UniformId, SPV_D_SaturatedConversion, SPV_D_Stream, SPV_D_Location,
-      SPV_D_Component, SPV_D_Index, SPV_D_Binding, SPV_D_DescriptorSet, SPV_D_Offset,
-      SPV_D_XfbBuffer, SPV_D_XfbStride, SPV_D_FuncParamAttr, SPV_D_FPRoundingMode,
-      SPV_D_FPFastMathMode, SPV_D_LinkageAttributes, SPV_D_NoContraction,
-      SPV_D_InputAttachmentIndex, SPV_D_Alignment, SPV_D_MaxByteOffset,
-      SPV_D_AlignmentId, SPV_D_MaxByteOffsetId, SPV_D_NoSignedWrap,
-      SPV_D_NoUnsignedWrap, SPV_D_ExplicitInterpAMD, SPV_D_OverrideCoverageNV,
-      SPV_D_PassthroughNV, SPV_D_ViewportRelativeNV,
-      SPV_D_SecondaryViewportRelativeNV, SPV_D_PerPrimitiveNV, SPV_D_PerViewNV,
-      SPV_D_PerTaskNV, SPV_D_PerVertexNV, SPV_D_NonUniform, SPV_D_RestrictPointer,
-      SPV_D_AliasedPointer, SPV_D_CounterBuffer, SPV_D_UserSemantic,
-      SPV_D_UserTypeGOOGLE
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_D_1D          : I32EnumAttrCase<"Dim1D", 0>;
-def SPV_D_2D          : I32EnumAttrCase<"Dim2D", 1>;
-def SPV_D_3D          : I32EnumAttrCase<"Dim3D", 2>;
-def SPV_D_Cube        : I32EnumAttrCase<"Cube", 3>;
-def SPV_D_Rect        : I32EnumAttrCase<"Rect", 4>;
-def SPV_D_Buffer      : I32EnumAttrCase<"Buffer", 5>;
-def SPV_D_SubpassData : I32EnumAttrCase<"SubpassData", 6>;
-
-def SPV_DimAttr :
-    I32EnumAttr<"Dim", "valid SPIR-V Dim", [
-      SPV_D_1D, SPV_D_2D, SPV_D_3D, SPV_D_Cube, SPV_D_Rect, SPV_D_Buffer,
-      SPV_D_SubpassData
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_EM_Invocations                      : I32EnumAttrCase<"Invocations", 0>;
-def SPV_EM_SpacingEqual                     : I32EnumAttrCase<"SpacingEqual", 1>;
-def SPV_EM_SpacingFractionalEven            : I32EnumAttrCase<"SpacingFractionalEven", 2>;
-def SPV_EM_SpacingFractionalOdd             : I32EnumAttrCase<"SpacingFractionalOdd", 3>;
-def SPV_EM_VertexOrderCw                    : I32EnumAttrCase<"VertexOrderCw", 4>;
-def SPV_EM_VertexOrderCcw                   : I32EnumAttrCase<"VertexOrderCcw", 5>;
-def SPV_EM_PixelCenterInteger               : I32EnumAttrCase<"PixelCenterInteger", 6>;
-def SPV_EM_OriginUpperLeft                  : I32EnumAttrCase<"OriginUpperLeft", 7>;
-def SPV_EM_OriginLowerLeft                  : I32EnumAttrCase<"OriginLowerLeft", 8>;
-def SPV_EM_EarlyFragmentTests               : I32EnumAttrCase<"EarlyFragmentTests", 9>;
-def SPV_EM_PointMode                        : I32EnumAttrCase<"PointMode", 10>;
-def SPV_EM_Xfb                              : I32EnumAttrCase<"Xfb", 11>;
-def SPV_EM_DepthReplacing                   : I32EnumAttrCase<"DepthReplacing", 12>;
-def SPV_EM_DepthGreater                     : I32EnumAttrCase<"DepthGreater", 14>;
-def SPV_EM_DepthLess                        : I32EnumAttrCase<"DepthLess", 15>;
-def SPV_EM_DepthUnchanged                   : I32EnumAttrCase<"DepthUnchanged", 16>;
-def SPV_EM_LocalSize                        : I32EnumAttrCase<"LocalSize", 17>;
-def SPV_EM_LocalSizeHint                    : I32EnumAttrCase<"LocalSizeHint", 18>;
-def SPV_EM_InputPoints                      : I32EnumAttrCase<"InputPoints", 19>;
-def SPV_EM_InputLines                       : I32EnumAttrCase<"InputLines", 20>;
-def SPV_EM_InputLinesAdjacency              : I32EnumAttrCase<"InputLinesAdjacency", 21>;
-def SPV_EM_Triangles                        : I32EnumAttrCase<"Triangles", 22>;
-def SPV_EM_InputTrianglesAdjacency          : I32EnumAttrCase<"InputTrianglesAdjacency", 23>;
-def SPV_EM_Quads                            : I32EnumAttrCase<"Quads", 24>;
-def SPV_EM_Isolines                         : I32EnumAttrCase<"Isolines", 25>;
-def SPV_EM_OutputVertices                   : I32EnumAttrCase<"OutputVertices", 26>;
-def SPV_EM_OutputPoints                     : I32EnumAttrCase<"OutputPoints", 27>;
-def SPV_EM_OutputLineStrip                  : I32EnumAttrCase<"OutputLineStrip", 28>;
-def SPV_EM_OutputTriangleStrip              : I32EnumAttrCase<"OutputTriangleStrip", 29>;
-def SPV_EM_VecTypeHint                      : I32EnumAttrCase<"VecTypeHint", 30>;
-def SPV_EM_ContractionOff                   : I32EnumAttrCase<"ContractionOff", 31>;
-def SPV_EM_Initializer                      : I32EnumAttrCase<"Initializer", 33>;
-def SPV_EM_Finalizer                        : I32EnumAttrCase<"Finalizer", 34>;
-def SPV_EM_SubgroupSize                     : I32EnumAttrCase<"SubgroupSize", 35>;
-def SPV_EM_SubgroupsPerWorkgroup            : I32EnumAttrCase<"SubgroupsPerWorkgroup", 36>;
-def SPV_EM_SubgroupsPerWorkgroupId          : I32EnumAttrCase<"SubgroupsPerWorkgroupId", 37>;
-def SPV_EM_LocalSizeId                      : I32EnumAttrCase<"LocalSizeId", 38>;
-def SPV_EM_LocalSizeHintId                  : I32EnumAttrCase<"LocalSizeHintId", 39>;
-def SPV_EM_PostDepthCoverage                : I32EnumAttrCase<"PostDepthCoverage", 4446>;
-def SPV_EM_DenormPreserve                   : I32EnumAttrCase<"DenormPreserve", 4459>;
-def SPV_EM_DenormFlushToZero                : I32EnumAttrCase<"DenormFlushToZero", 4460>;
-def SPV_EM_SignedZeroInfNanPreserve         : I32EnumAttrCase<"SignedZeroInfNanPreserve", 4461>;
-def SPV_EM_RoundingModeRTE                  : I32EnumAttrCase<"RoundingModeRTE", 4462>;
-def SPV_EM_RoundingModeRTZ                  : I32EnumAttrCase<"RoundingModeRTZ", 4463>;
-def SPV_EM_StencilRefReplacingEXT           : I32EnumAttrCase<"StencilRefReplacingEXT", 5027>;
-def SPV_EM_OutputLinesNV                    : I32EnumAttrCase<"OutputLinesNV", 5269>;
-def SPV_EM_OutputPrimitivesNV               : I32EnumAttrCase<"OutputPrimitivesNV", 5270>;
-def SPV_EM_DerivativeGroupQuadsNV           : I32EnumAttrCase<"DerivativeGroupQuadsNV", 5289>;
-def SPV_EM_DerivativeGroupLinearNV          : I32EnumAttrCase<"DerivativeGroupLinearNV", 5290>;
-def SPV_EM_OutputTrianglesNV                : I32EnumAttrCase<"OutputTrianglesNV", 5298>;
-def SPV_EM_PixelInterlockOrderedEXT         : I32EnumAttrCase<"PixelInterlockOrderedEXT", 5366>;
-def SPV_EM_PixelInterlockUnorderedEXT       : I32EnumAttrCase<"PixelInterlockUnorderedEXT", 5367>;
-def SPV_EM_SampleInterlockOrderedEXT        : I32EnumAttrCase<"SampleInterlockOrderedEXT", 5368>;
-def SPV_EM_SampleInterlockUnorderedEXT      : I32EnumAttrCase<"SampleInterlockUnorderedEXT", 5369>;
-def SPV_EM_ShadingRateInterlockOrderedEXT   : I32EnumAttrCase<"ShadingRateInterlockOrderedEXT", 5370>;
-def SPV_EM_ShadingRateInterlockUnorderedEXT : I32EnumAttrCase<"ShadingRateInterlockUnorderedEXT", 5371>;
-
-def SPV_ExecutionModeAttr :
-    I32EnumAttr<"ExecutionMode", "valid SPIR-V ExecutionMode", [
-      SPV_EM_Invocations, SPV_EM_SpacingEqual, SPV_EM_SpacingFractionalEven,
-      SPV_EM_SpacingFractionalOdd, SPV_EM_VertexOrderCw, SPV_EM_VertexOrderCcw,
-      SPV_EM_PixelCenterInteger, SPV_EM_OriginUpperLeft, SPV_EM_OriginLowerLeft,
-      SPV_EM_EarlyFragmentTests, SPV_EM_PointMode, SPV_EM_Xfb, SPV_EM_DepthReplacing,
-      SPV_EM_DepthGreater, SPV_EM_DepthLess, SPV_EM_DepthUnchanged, SPV_EM_LocalSize,
-      SPV_EM_LocalSizeHint, SPV_EM_InputPoints, SPV_EM_InputLines,
-      SPV_EM_InputLinesAdjacency, SPV_EM_Triangles, SPV_EM_InputTrianglesAdjacency,
-      SPV_EM_Quads, SPV_EM_Isolines, SPV_EM_OutputVertices, SPV_EM_OutputPoints,
-      SPV_EM_OutputLineStrip, SPV_EM_OutputTriangleStrip, SPV_EM_VecTypeHint,
-      SPV_EM_ContractionOff, SPV_EM_Initializer, SPV_EM_Finalizer,
-      SPV_EM_SubgroupSize, SPV_EM_SubgroupsPerWorkgroup,
-      SPV_EM_SubgroupsPerWorkgroupId, SPV_EM_LocalSizeId, SPV_EM_LocalSizeHintId,
-      SPV_EM_PostDepthCoverage, SPV_EM_DenormPreserve, SPV_EM_DenormFlushToZero,
-      SPV_EM_SignedZeroInfNanPreserve, SPV_EM_RoundingModeRTE,
-      SPV_EM_RoundingModeRTZ, SPV_EM_StencilRefReplacingEXT, SPV_EM_OutputLinesNV,
-      SPV_EM_OutputPrimitivesNV, SPV_EM_DerivativeGroupQuadsNV,
-      SPV_EM_DerivativeGroupLinearNV, SPV_EM_OutputTrianglesNV,
-      SPV_EM_PixelInterlockOrderedEXT, SPV_EM_PixelInterlockUnorderedEXT,
-      SPV_EM_SampleInterlockOrderedEXT, SPV_EM_SampleInterlockUnorderedEXT,
-      SPV_EM_ShadingRateInterlockOrderedEXT, SPV_EM_ShadingRateInterlockUnorderedEXT
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_EM_Vertex                 : I32EnumAttrCase<"Vertex", 0>;
-def SPV_EM_TessellationControl    : I32EnumAttrCase<"TessellationControl", 1>;
-def SPV_EM_TessellationEvaluation : I32EnumAttrCase<"TessellationEvaluation", 2>;
-def SPV_EM_Geometry               : I32EnumAttrCase<"Geometry", 3>;
-def SPV_EM_Fragment               : I32EnumAttrCase<"Fragment", 4>;
-def SPV_EM_GLCompute              : I32EnumAttrCase<"GLCompute", 5>;
-def SPV_EM_Kernel                 : I32EnumAttrCase<"Kernel", 6>;
-def SPV_EM_TaskNV                 : I32EnumAttrCase<"TaskNV", 5267>;
-def SPV_EM_MeshNV                 : I32EnumAttrCase<"MeshNV", 5268>;
-def SPV_EM_RayGenerationNV        : I32EnumAttrCase<"RayGenerationNV", 5313>;
-def SPV_EM_IntersectionNV         : I32EnumAttrCase<"IntersectionNV", 5314>;
-def SPV_EM_AnyHitNV               : I32EnumAttrCase<"AnyHitNV", 5315>;
-def SPV_EM_ClosestHitNV           : I32EnumAttrCase<"ClosestHitNV", 5316>;
-def SPV_EM_MissNV                 : I32EnumAttrCase<"MissNV", 5317>;
-def SPV_EM_CallableNV             : I32EnumAttrCase<"CallableNV", 5318>;
-
-def SPV_ExecutionModelAttr :
-    I32EnumAttr<"ExecutionModel", "valid SPIR-V ExecutionModel", [
-      SPV_EM_Vertex, SPV_EM_TessellationControl, SPV_EM_TessellationEvaluation,
-      SPV_EM_Geometry, SPV_EM_Fragment, SPV_EM_GLCompute, SPV_EM_Kernel,
-      SPV_EM_TaskNV, SPV_EM_MeshNV, SPV_EM_RayGenerationNV, SPV_EM_IntersectionNV,
-      SPV_EM_AnyHitNV, SPV_EM_ClosestHitNV, SPV_EM_MissNV, SPV_EM_CallableNV
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_FC_None       : BitEnumAttrCase<"None", 0x0000>;
-def SPV_FC_Inline     : BitEnumAttrCase<"Inline", 0x0001>;
-def SPV_FC_DontInline : BitEnumAttrCase<"DontInline", 0x0002>;
-def SPV_FC_Pure       : BitEnumAttrCase<"Pure", 0x0004>;
-def SPV_FC_Const      : BitEnumAttrCase<"Const", 0x0008>;
-
-def SPV_FunctionControlAttr :
-    BitEnumAttr<"FunctionControl", "valid SPIR-V FunctionControl", [
-      SPV_FC_None, SPV_FC_Inline, SPV_FC_DontInline, SPV_FC_Pure, SPV_FC_Const
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_IF_Unknown      : I32EnumAttrCase<"Unknown", 0>;
-def SPV_IF_Rgba32f      : I32EnumAttrCase<"Rgba32f", 1>;
-def SPV_IF_Rgba16f      : I32EnumAttrCase<"Rgba16f", 2>;
-def SPV_IF_R32f         : I32EnumAttrCase<"R32f", 3>;
-def SPV_IF_Rgba8        : I32EnumAttrCase<"Rgba8", 4>;
-def SPV_IF_Rgba8Snorm   : I32EnumAttrCase<"Rgba8Snorm", 5>;
-def SPV_IF_Rg32f        : I32EnumAttrCase<"Rg32f", 6>;
-def SPV_IF_Rg16f        : I32EnumAttrCase<"Rg16f", 7>;
-def SPV_IF_R11fG11fB10f : I32EnumAttrCase<"R11fG11fB10f", 8>;
-def SPV_IF_R16f         : I32EnumAttrCase<"R16f", 9>;
-def SPV_IF_Rgba16       : I32EnumAttrCase<"Rgba16", 10>;
-def SPV_IF_Rgb10A2      : I32EnumAttrCase<"Rgb10A2", 11>;
-def SPV_IF_Rg16         : I32EnumAttrCase<"Rg16", 12>;
-def SPV_IF_Rg8          : I32EnumAttrCase<"Rg8", 13>;
-def SPV_IF_R16          : I32EnumAttrCase<"R16", 14>;
-def SPV_IF_R8           : I32EnumAttrCase<"R8", 15>;
-def SPV_IF_Rgba16Snorm  : I32EnumAttrCase<"Rgba16Snorm", 16>;
-def SPV_IF_Rg16Snorm    : I32EnumAttrCase<"Rg16Snorm", 17>;
-def SPV_IF_Rg8Snorm     : I32EnumAttrCase<"Rg8Snorm", 18>;
-def SPV_IF_R16Snorm     : I32EnumAttrCase<"R16Snorm", 19>;
-def SPV_IF_R8Snorm      : I32EnumAttrCase<"R8Snorm", 20>;
-def SPV_IF_Rgba32i      : I32EnumAttrCase<"Rgba32i", 21>;
-def SPV_IF_Rgba16i      : I32EnumAttrCase<"Rgba16i", 22>;
-def SPV_IF_Rgba8i       : I32EnumAttrCase<"Rgba8i", 23>;
-def SPV_IF_R32i         : I32EnumAttrCase<"R32i", 24>;
-def SPV_IF_Rg32i        : I32EnumAttrCase<"Rg32i", 25>;
-def SPV_IF_Rg16i        : I32EnumAttrCase<"Rg16i", 26>;
-def SPV_IF_Rg8i         : I32EnumAttrCase<"Rg8i", 27>;
-def SPV_IF_R16i         : I32EnumAttrCase<"R16i", 28>;
-def SPV_IF_R8i          : I32EnumAttrCase<"R8i", 29>;
-def SPV_IF_Rgba32ui     : I32EnumAttrCase<"Rgba32ui", 30>;
-def SPV_IF_Rgba16ui     : I32EnumAttrCase<"Rgba16ui", 31>;
-def SPV_IF_Rgba8ui      : I32EnumAttrCase<"Rgba8ui", 32>;
-def SPV_IF_R32ui        : I32EnumAttrCase<"R32ui", 33>;
-def SPV_IF_Rgb10a2ui    : I32EnumAttrCase<"Rgb10a2ui", 34>;
-def SPV_IF_Rg32ui       : I32EnumAttrCase<"Rg32ui", 35>;
-def SPV_IF_Rg16ui       : I32EnumAttrCase<"Rg16ui", 36>;
-def SPV_IF_Rg8ui        : I32EnumAttrCase<"Rg8ui", 37>;
-def SPV_IF_R16ui        : I32EnumAttrCase<"R16ui", 38>;
-def SPV_IF_R8ui         : I32EnumAttrCase<"R8ui", 39>;
-
-def SPV_ImageFormatAttr :
-    I32EnumAttr<"ImageFormat", "valid SPIR-V ImageFormat", [
-      SPV_IF_Unknown, SPV_IF_Rgba32f, SPV_IF_Rgba16f, SPV_IF_R32f, SPV_IF_Rgba8,
-      SPV_IF_Rgba8Snorm, SPV_IF_Rg32f, SPV_IF_Rg16f, SPV_IF_R11fG11fB10f,
-      SPV_IF_R16f, SPV_IF_Rgba16, SPV_IF_Rgb10A2, SPV_IF_Rg16, SPV_IF_Rg8,
-      SPV_IF_R16, SPV_IF_R8, SPV_IF_Rgba16Snorm, SPV_IF_Rg16Snorm, SPV_IF_Rg8Snorm,
-      SPV_IF_R16Snorm, SPV_IF_R8Snorm, SPV_IF_Rgba32i, SPV_IF_Rgba16i, SPV_IF_Rgba8i,
-      SPV_IF_R32i, SPV_IF_Rg32i, SPV_IF_Rg16i, SPV_IF_Rg8i, SPV_IF_R16i, SPV_IF_R8i,
-      SPV_IF_Rgba32ui, SPV_IF_Rgba16ui, SPV_IF_Rgba8ui, SPV_IF_R32ui,
-      SPV_IF_Rgb10a2ui, SPV_IF_Rg32ui, SPV_IF_Rg16ui, SPV_IF_Rg8ui, SPV_IF_R16ui,
-      SPV_IF_R8ui
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_LT_Export : I32EnumAttrCase<"Export", 0>;
-def SPV_LT_Import : I32EnumAttrCase<"Import", 1>;
-
-def SPV_LinkageTypeAttr :
-    I32EnumAttr<"LinkageType", "valid SPIR-V LinkageType", [
-      SPV_LT_Export, SPV_LT_Import
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_LC_None               : BitEnumAttrCase<"None", 0x0000>;
-def SPV_LC_Unroll             : BitEnumAttrCase<"Unroll", 0x0001>;
-def SPV_LC_DontUnroll         : BitEnumAttrCase<"DontUnroll", 0x0002>;
-def SPV_LC_DependencyInfinite : BitEnumAttrCase<"DependencyInfinite", 0x0004>;
-def SPV_LC_DependencyLength   : BitEnumAttrCase<"DependencyLength", 0x0008>;
-def SPV_LC_MinIterations      : BitEnumAttrCase<"MinIterations", 0x0010>;
-def SPV_LC_MaxIterations      : BitEnumAttrCase<"MaxIterations", 0x0020>;
-def SPV_LC_IterationMultiple  : BitEnumAttrCase<"IterationMultiple", 0x0040>;
-def SPV_LC_PeelCount          : BitEnumAttrCase<"PeelCount", 0x0080>;
-def SPV_LC_PartialCount       : BitEnumAttrCase<"PartialCount", 0x0100>;
-
-def SPV_LoopControlAttr :
-    BitEnumAttr<"LoopControl", "valid SPIR-V LoopControl", [
-      SPV_LC_None, SPV_LC_Unroll, SPV_LC_DontUnroll, SPV_LC_DependencyInfinite,
-      SPV_LC_DependencyLength, SPV_LC_MinIterations, SPV_LC_MaxIterations,
-      SPV_LC_IterationMultiple, SPV_LC_PeelCount, SPV_LC_PartialCount
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_MA_None                 : BitEnumAttrCase<"None", 0x0000>;
-def SPV_MA_Volatile             : BitEnumAttrCase<"Volatile", 0x0001>;
-def SPV_MA_Aligned              : BitEnumAttrCase<"Aligned", 0x0002>;
-def SPV_MA_Nontemporal          : BitEnumAttrCase<"Nontemporal", 0x0004>;
-def SPV_MA_MakePointerAvailable : BitEnumAttrCase<"MakePointerAvailable", 0x0008>;
-def SPV_MA_MakePointerVisible   : BitEnumAttrCase<"MakePointerVisible", 0x0010>;
-def SPV_MA_NonPrivatePointer    : BitEnumAttrCase<"NonPrivatePointer", 0x0020>;
-
-def SPV_MemoryAccessAttr :
-    BitEnumAttr<"MemoryAccess", "valid SPIR-V MemoryAccess", [
-      SPV_MA_None, SPV_MA_Volatile, SPV_MA_Aligned, SPV_MA_Nontemporal,
-      SPV_MA_MakePointerAvailable, SPV_MA_MakePointerVisible,
-      SPV_MA_NonPrivatePointer
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_MM_Simple  : I32EnumAttrCase<"Simple", 0>;
-def SPV_MM_GLSL450 : I32EnumAttrCase<"GLSL450", 1>;
-def SPV_MM_OpenCL  : I32EnumAttrCase<"OpenCL", 2>;
-def SPV_MM_Vulkan  : I32EnumAttrCase<"Vulkan", 3>;
-
-def SPV_MemoryModelAttr :
-    I32EnumAttr<"MemoryModel", "valid SPIR-V MemoryModel", [
-      SPV_MM_Simple, SPV_MM_GLSL450, SPV_MM_OpenCL, SPV_MM_Vulkan
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_MS_None                   : BitEnumAttrCase<"None", 0x0000>;
-def SPV_MS_Acquire                : BitEnumAttrCase<"Acquire", 0x0002>;
-def SPV_MS_Release                : BitEnumAttrCase<"Release", 0x0004>;
-def SPV_MS_AcquireRelease         : BitEnumAttrCase<"AcquireRelease", 0x0008>;
-def SPV_MS_SequentiallyConsistent : BitEnumAttrCase<"SequentiallyConsistent", 0x0010>;
-def SPV_MS_UniformMemory          : BitEnumAttrCase<"UniformMemory", 0x0040>;
-def SPV_MS_SubgroupMemory         : BitEnumAttrCase<"SubgroupMemory", 0x0080>;
-def SPV_MS_WorkgroupMemory        : BitEnumAttrCase<"WorkgroupMemory", 0x0100>;
-def SPV_MS_CrossWorkgroupMemory   : BitEnumAttrCase<"CrossWorkgroupMemory", 0x0200>;
-def SPV_MS_AtomicCounterMemory    : BitEnumAttrCase<"AtomicCounterMemory", 0x0400>;
-def SPV_MS_ImageMemory            : BitEnumAttrCase<"ImageMemory", 0x0800>;
-def SPV_MS_OutputMemory           : BitEnumAttrCase<"OutputMemory", 0x1000>;
-def SPV_MS_MakeAvailable          : BitEnumAttrCase<"MakeAvailable", 0x2000>;
-def SPV_MS_MakeVisible            : BitEnumAttrCase<"MakeVisible", 0x4000>;
-def SPV_MS_Volatile               : BitEnumAttrCase<"Volatile", 0x8000>;
-
-def SPV_MemorySemanticsAttr :
-    BitEnumAttr<"MemorySemantics", "valid SPIR-V MemorySemantics", [
-      SPV_MS_None, SPV_MS_Acquire, SPV_MS_Release, SPV_MS_AcquireRelease,
-      SPV_MS_SequentiallyConsistent, SPV_MS_UniformMemory, SPV_MS_SubgroupMemory,
-      SPV_MS_WorkgroupMemory, SPV_MS_CrossWorkgroupMemory,
-      SPV_MS_AtomicCounterMemory, SPV_MS_ImageMemory, SPV_MS_OutputMemory,
-      SPV_MS_MakeAvailable, SPV_MS_MakeVisible, SPV_MS_Volatile
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_S_CrossDevice : I32EnumAttrCase<"CrossDevice", 0>;
-def SPV_S_Device      : I32EnumAttrCase<"Device", 1>;
-def SPV_S_Workgroup   : I32EnumAttrCase<"Workgroup", 2>;
-def SPV_S_Subgroup    : I32EnumAttrCase<"Subgroup", 3>;
-def SPV_S_Invocation  : I32EnumAttrCase<"Invocation", 4>;
-def SPV_S_QueueFamily : I32EnumAttrCase<"QueueFamily", 5>;
-
-def SPV_ScopeAttr :
-    I32EnumAttr<"Scope", "valid SPIR-V Scope", [
-      SPV_S_CrossDevice, SPV_S_Device, SPV_S_Workgroup, SPV_S_Subgroup,
-      SPV_S_Invocation, SPV_S_QueueFamily
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_SC_None        : BitEnumAttrCase<"None", 0x0000>;
-def SPV_SC_Flatten     : BitEnumAttrCase<"Flatten", 0x0001>;
-def SPV_SC_DontFlatten : BitEnumAttrCase<"DontFlatten", 0x0002>;
-
-def SPV_SelectionControlAttr :
-    BitEnumAttr<"SelectionControl", "valid SPIR-V SelectionControl", [
-      SPV_SC_None, SPV_SC_Flatten, SPV_SC_DontFlatten
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_SC_UniformConstant        : I32EnumAttrCase<"UniformConstant", 0>;
-def SPV_SC_Input                  : I32EnumAttrCase<"Input", 1>;
-def SPV_SC_Uniform                : I32EnumAttrCase<"Uniform", 2>;
-def SPV_SC_Output                 : I32EnumAttrCase<"Output", 3>;
-def SPV_SC_Workgroup              : I32EnumAttrCase<"Workgroup", 4>;
-def SPV_SC_CrossWorkgroup         : I32EnumAttrCase<"CrossWorkgroup", 5>;
-def SPV_SC_Private                : I32EnumAttrCase<"Private", 6>;
-def SPV_SC_Function               : I32EnumAttrCase<"Function", 7>;
-def SPV_SC_Generic                : I32EnumAttrCase<"Generic", 8>;
-def SPV_SC_PushConstant           : I32EnumAttrCase<"PushConstant", 9>;
-def SPV_SC_AtomicCounter          : I32EnumAttrCase<"AtomicCounter", 10>;
-def SPV_SC_Image                  : I32EnumAttrCase<"Image", 11>;
-def SPV_SC_StorageBuffer          : I32EnumAttrCase<"StorageBuffer", 12>;
-def SPV_SC_CallableDataNV         : I32EnumAttrCase<"CallableDataNV", 5328>;
-def SPV_SC_IncomingCallableDataNV : I32EnumAttrCase<"IncomingCallableDataNV", 5329>;
-def SPV_SC_RayPayloadNV           : I32EnumAttrCase<"RayPayloadNV", 5338>;
-def SPV_SC_HitAttributeNV         : I32EnumAttrCase<"HitAttributeNV", 5339>;
-def SPV_SC_IncomingRayPayloadNV   : I32EnumAttrCase<"IncomingRayPayloadNV", 5342>;
-def SPV_SC_ShaderRecordBufferNV   : I32EnumAttrCase<"ShaderRecordBufferNV", 5343>;
-def SPV_SC_PhysicalStorageBuffer  : I32EnumAttrCase<"PhysicalStorageBuffer", 5349>;
-
-def SPV_StorageClassAttr :
-    I32EnumAttr<"StorageClass", "valid SPIR-V StorageClass", [
-      SPV_SC_UniformConstant, SPV_SC_Input, SPV_SC_Uniform, SPV_SC_Output,
-      SPV_SC_Workgroup, SPV_SC_CrossWorkgroup, SPV_SC_Private, SPV_SC_Function,
-      SPV_SC_Generic, SPV_SC_PushConstant, SPV_SC_AtomicCounter, SPV_SC_Image,
-      SPV_SC_StorageBuffer, SPV_SC_CallableDataNV, SPV_SC_IncomingCallableDataNV,
-      SPV_SC_RayPayloadNV, SPV_SC_HitAttributeNV, SPV_SC_IncomingRayPayloadNV,
-      SPV_SC_ShaderRecordBufferNV, SPV_SC_PhysicalStorageBuffer
-    ]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-// End enum section. Generated from SPIR-V spec; DO NOT MODIFY!
-
-// Enums added manually that are not part of SPIR-V spec
-
-def SPV_IDI_NoDepth      : I32EnumAttrCase<"NoDepth", 0>;
-def SPV_IDI_IsDepth      : I32EnumAttrCase<"IsDepth", 1>;
-def SPV_IDI_DepthUnknown : I32EnumAttrCase<"DepthUnknown", 2>;
-
-def SPV_DepthAttr :
-    I32EnumAttr<"ImageDepthInfo", "valid SPIR-V Image Depth specification",
-      [SPV_IDI_NoDepth, SPV_IDI_IsDepth, SPV_IDI_DepthUnknown]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_IAI_NonArrayed : I32EnumAttrCase<"NonArrayed", 0>;
-def SPV_IAI_Arrayed    : I32EnumAttrCase<"Arrayed", 1>;
-
-def SPV_ArrayedAttr :
-    I32EnumAttr<"ImageArrayedInfo", "valid SPIR-V Image Arrayed specification",
-      [SPV_IAI_NonArrayed, SPV_IAI_Arrayed]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_ISI_SingleSampled : I32EnumAttrCase<"SingleSampled", 0>;
-def SPV_ISI_MultiSampled  : I32EnumAttrCase<"MultiSampled", 1>;
-
-def SPV_SamplingAttr:
-    I32EnumAttr<"ImageSamplingInfo", "valid SPIR-V Image Sampling specification",
-      [SPV_ISI_SingleSampled, SPV_ISI_MultiSampled]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-def SPV_ISUI_SamplerUnknown : I32EnumAttrCase<"SamplerUnknown", 0>;
-def SPV_ISUI_NeedSampler    : I32EnumAttrCase<"NeedSampler", 1>;
-def SPV_ISUI_NoSampler      : I32EnumAttrCase<"NoSampler", 2>;
-
-def SPV_SamplerUseAttr:
-    I32EnumAttr<"ImageSamplerUseInfo", "valid SPIR-V Sampler Use specification",
-      [SPV_ISUI_SamplerUnknown, SPV_ISUI_NeedSampler, SPV_ISUI_NoSampler]> {
-  let cppNamespace = "::mlir::spirv";
-}
-
-//===----------------------------------------------------------------------===//
-// SPIR-V type definitions
-//===----------------------------------------------------------------------===//
-
-def SPV_IsPtrType : CPred<"$_self.isa<::mlir::spirv::PointerType>()">;
-def SPV_IsArrayType : CPred<"$_self.isa<::mlir::spirv::ArrayType>()">;
-def SPV_IsRTArrayType : CPred<"$_self.isa<::mlir::spirv::RuntimeArrayType>()">;
-def SPV_IsStructType : CPred<"$_self.isa<::mlir::spirv::StructType>()">;
-
-// See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_types
-// for the definition of the following types and type categories.
-
-def SPV_Void : TypeAlias<NoneType, "void type">;
-def SPV_Bool : IntOfWidths<[1]>;
-def SPV_Integer : IntOfWidths<[8, 16, 32, 64]>;
-def SPV_Float : FloatOfWidths<[16, 32, 64]>;
-def SPV_Float16or32 : FloatOfWidths<[16, 32]>;
-def SPV_Vector : VectorOfLengthAndType<[2, 3, 4],
-                                       [SPV_Bool, SPV_Integer, SPV_Float]>;
-// Component type check is done in the type parser for the following SPIR-V
-// dialect-specific types so we use "Any" here.
-def SPV_AnyPtr : Type<SPV_IsPtrType, "any SPIR-V pointer type">;
-def SPV_AnyArray : Type<SPV_IsArrayType, "any SPIR-V array type">;
-def SPV_AnyRTArray : Type<SPV_IsRTArrayType, "any SPIR-V runtime array type">;
-def SPV_AnyStruct : Type<SPV_IsStructType, "any SPIR-V struct type">;
-
-def SPV_Numerical : AnyTypeOf<[SPV_Integer, SPV_Float]>;
-def SPV_Scalar : AnyTypeOf<[SPV_Numerical, SPV_Bool]>;
-def SPV_Aggregate : AnyTypeOf<[SPV_AnyArray, SPV_AnyStruct]>;
-def SPV_Composite :
-    AnyTypeOf<[SPV_Vector, SPV_AnyArray, SPV_AnyRTArray, SPV_AnyStruct]>;
-def SPV_Type : AnyTypeOf<[
-    SPV_Void, SPV_Bool, SPV_Integer, SPV_Float, SPV_Vector,
-    SPV_AnyPtr, SPV_AnyArray, SPV_AnyRTArray, SPV_AnyStruct
-  ]>;
-
-class SPV_ScalarOrVectorOf<Type type> :
-    AnyTypeOf<[type, VectorOfLengthAndType<[2, 3, 4], [type]>]>;
-
-def SPV_ScalarOrVector : AnyTypeOf<[SPV_Scalar, SPV_Vector]>;
-def SPV_ScalarOrVectorOrPtr : AnyTypeOf<[SPV_ScalarOrVector, SPV_AnyPtr]>;
-
-class SPV_Vec4<Type type> : VectorOfLengthAndType<[4], [type]>;
-def SPV_IntVec4 : SPV_Vec4<SPV_Integer>;
-def SPV_I32Vec4 : SPV_Vec4<I32>;
-
-// TODO(antiagainst): Use a more appropriate way to model optional operands
-class SPV_Optional<Type type> : Variadic<type>;
-
-// TODO(ravishankarm): From 1.4, this should also include Composite type.
-def SPV_SelectType : AnyTypeOf<[SPV_Scalar, SPV_Vector, SPV_AnyPtr]>;
-
-//===----------------------------------------------------------------------===//
-// SPIR-V OpTrait definitions
-//===----------------------------------------------------------------------===//
-
-// Check that an op can only be used within the scope of a FuncOp.
-def InFunctionScope : PredOpTrait<
-  "op must appear in a 'func' block",
-  CPred<"($_op.getParentOfType<FuncOp>())">>;
-
-// Check that an op can only be used within the scope of a SPIR-V ModuleOp.
-def InModuleScope : PredOpTrait<
-  "op must appear in a 'spv.module' block",
-  CPred<"llvm::isa_and_nonnull<spirv::ModuleOp>($_op.getParentOp())">>;
-
-//===----------------------------------------------------------------------===//
-// SPIR-V opcode specification
-//===----------------------------------------------------------------------===//
-
-class SPV_OpCode<string name, int val> {
-  // Name used as reference to retrieve the opcode
-  string opname = name;
-
-  // Opcode associated with the name
-  int opcode = val;
-}
-
-// Begin opcode section. Generated from SPIR-V spec; DO NOT MODIFY!
-
-def SPV_OC_OpNop                       : I32EnumAttrCase<"OpNop", 0>;
-def SPV_OC_OpUndef                     : I32EnumAttrCase<"OpUndef", 1>;
-def SPV_OC_OpSourceContinued           : I32EnumAttrCase<"OpSourceContinued", 2>;
-def SPV_OC_OpSource                    : I32EnumAttrCase<"OpSource", 3>;
-def SPV_OC_OpSourceExtension           : I32EnumAttrCase<"OpSourceExtension", 4>;
-def SPV_OC_OpName                      : I32EnumAttrCase<"OpName", 5>;
-def SPV_OC_OpMemberName                : I32EnumAttrCase<"OpMemberName", 6>;
-def SPV_OC_OpString                    : I32EnumAttrCase<"OpString", 7>;
-def SPV_OC_OpExtension                 : I32EnumAttrCase<"OpExtension", 10>;
-def SPV_OC_OpExtInstImport             : I32EnumAttrCase<"OpExtInstImport", 11>;
-def SPV_OC_OpExtInst                   : I32EnumAttrCase<"OpExtInst", 12>;
-def SPV_OC_OpMemoryModel               : I32EnumAttrCase<"OpMemoryModel", 14>;
-def SPV_OC_OpEntryPoint                : I32EnumAttrCase<"OpEntryPoint", 15>;
-def SPV_OC_OpExecutionMode             : I32EnumAttrCase<"OpExecutionMode", 16>;
-def SPV_OC_OpCapability                : I32EnumAttrCase<"OpCapability", 17>;
-def SPV_OC_OpTypeVoid                  : I32EnumAttrCase<"OpTypeVoid", 19>;
-def SPV_OC_OpTypeBool                  : I32EnumAttrCase<"OpTypeBool", 20>;
-def SPV_OC_OpTypeInt                   : I32EnumAttrCase<"OpTypeInt", 21>;
-def SPV_OC_OpTypeFloat                 : I32EnumAttrCase<"OpTypeFloat", 22>;
-def SPV_OC_OpTypeVector                : I32EnumAttrCase<"OpTypeVector", 23>;
-def SPV_OC_OpTypeArray                 : I32EnumAttrCase<"OpTypeArray", 28>;
-def SPV_OC_OpTypeRuntimeArray          : I32EnumAttrCase<"OpTypeRuntimeArray", 29>;
-def SPV_OC_OpTypeStruct                : I32EnumAttrCase<"OpTypeStruct", 30>;
-def SPV_OC_OpTypePointer               : I32EnumAttrCase<"OpTypePointer", 32>;
-def SPV_OC_OpTypeFunction              : I32EnumAttrCase<"OpTypeFunction", 33>;
-def SPV_OC_OpConstantTrue              : I32EnumAttrCase<"OpConstantTrue", 41>;
-def SPV_OC_OpConstantFalse             : I32EnumAttrCase<"OpConstantFalse", 42>;
-def SPV_OC_OpConstant                  : I32EnumAttrCase<"OpConstant", 43>;
-def SPV_OC_OpConstantComposite         : I32EnumAttrCase<"OpConstantComposite", 44>;
-def SPV_OC_OpConstantNull              : I32EnumAttrCase<"OpConstantNull", 46>;
-def SPV_OC_OpSpecConstantTrue          : I32EnumAttrCase<"OpSpecConstantTrue", 48>;
-def SPV_OC_OpSpecConstantFalse         : I32EnumAttrCase<"OpSpecConstantFalse", 49>;
-def SPV_OC_OpSpecConstant              : I32EnumAttrCase<"OpSpecConstant", 50>;
-def SPV_OC_OpSpecConstantComposite     : I32EnumAttrCase<"OpSpecConstantComposite", 51>;
-def SPV_OC_OpFunction                  : I32EnumAttrCase<"OpFunction", 54>;
-def SPV_OC_OpFunctionParameter         : I32EnumAttrCase<"OpFunctionParameter", 55>;
-def SPV_OC_OpFunctionEnd               : I32EnumAttrCase<"OpFunctionEnd", 56>;
-def SPV_OC_OpFunctionCall              : I32EnumAttrCase<"OpFunctionCall", 57>;
-def SPV_OC_OpVariable                  : I32EnumAttrCase<"OpVariable", 59>;
-def SPV_OC_OpLoad                      : I32EnumAttrCase<"OpLoad", 61>;
-def SPV_OC_OpStore                     : I32EnumAttrCase<"OpStore", 62>;
-def SPV_OC_OpAccessChain               : I32EnumAttrCase<"OpAccessChain", 65>;
-def SPV_OC_OpDecorate                  : I32EnumAttrCase<"OpDecorate", 71>;
-def SPV_OC_OpMemberDecorate            : I32EnumAttrCase<"OpMemberDecorate", 72>;
-def SPV_OC_OpCompositeConstruct        : I32EnumAttrCase<"OpCompositeConstruct", 80>;
-def SPV_OC_OpCompositeExtract          : I32EnumAttrCase<"OpCompositeExtract", 81>;
-def SPV_OC_OpCompositeInsert           : I32EnumAttrCase<"OpCompositeInsert", 82>;
-def SPV_OC_OpConvertFToU               : I32EnumAttrCase<"OpConvertFToU", 109>;
-def SPV_OC_OpConvertFToS               : I32EnumAttrCase<"OpConvertFToS", 110>;
-def SPV_OC_OpConvertSToF               : I32EnumAttrCase<"OpConvertSToF", 111>;
-def SPV_OC_OpConvertUToF               : I32EnumAttrCase<"OpConvertUToF", 112>;
-def SPV_OC_OpUConvert                  : I32EnumAttrCase<"OpUConvert", 113>;
-def SPV_OC_OpSConvert                  : I32EnumAttrCase<"OpSConvert", 114>;
-def SPV_OC_OpFConvert                  : I32EnumAttrCase<"OpFConvert", 115>;
-def SPV_OC_OpBitcast                   : I32EnumAttrCase<"OpBitcast", 124>;
-def SPV_OC_OpFNegate                   : I32EnumAttrCase<"OpFNegate", 127>;
-def SPV_OC_OpIAdd                      : I32EnumAttrCase<"OpIAdd", 128>;
-def SPV_OC_OpFAdd                      : I32EnumAttrCase<"OpFAdd", 129>;
-def SPV_OC_OpISub                      : I32EnumAttrCase<"OpISub", 130>;
-def SPV_OC_OpFSub                      : I32EnumAttrCase<"OpFSub", 131>;
-def SPV_OC_OpIMul                      : I32EnumAttrCase<"OpIMul", 132>;
-def SPV_OC_OpFMul                      : I32EnumAttrCase<"OpFMul", 133>;
-def SPV_OC_OpUDiv                      : I32EnumAttrCase<"OpUDiv", 134>;
-def SPV_OC_OpSDiv                      : I32EnumAttrCase<"OpSDiv", 135>;
-def SPV_OC_OpFDiv                      : I32EnumAttrCase<"OpFDiv", 136>;
-def SPV_OC_OpUMod                      : I32EnumAttrCase<"OpUMod", 137>;
-def SPV_OC_OpSRem                      : I32EnumAttrCase<"OpSRem", 138>;
-def SPV_OC_OpSMod                      : I32EnumAttrCase<"OpSMod", 139>;
-def SPV_OC_OpFRem                      : I32EnumAttrCase<"OpFRem", 140>;
-def SPV_OC_OpFMod                      : I32EnumAttrCase<"OpFMod", 141>;
-def SPV_OC_OpLogicalEqual              : I32EnumAttrCase<"OpLogicalEqual", 164>;
-def SPV_OC_OpLogicalNotEqual           : I32EnumAttrCase<"OpLogicalNotEqual", 165>;
-def SPV_OC_OpLogicalOr                 : I32EnumAttrCase<"OpLogicalOr", 166>;
-def SPV_OC_OpLogicalAnd                : I32EnumAttrCase<"OpLogicalAnd", 167>;
-def SPV_OC_OpLogicalNot                : I32EnumAttrCase<"OpLogicalNot", 168>;
-def SPV_OC_OpSelect                    : I32EnumAttrCase<"OpSelect", 169>;
-def SPV_OC_OpIEqual                    : I32EnumAttrCase<"OpIEqual", 170>;
-def SPV_OC_OpINotEqual                 : I32EnumAttrCase<"OpINotEqual", 171>;
-def SPV_OC_OpUGreaterThan              : I32EnumAttrCase<"OpUGreaterThan", 172>;
-def SPV_OC_OpSGreaterThan              : I32EnumAttrCase<"OpSGreaterThan", 173>;
-def SPV_OC_OpUGreaterThanEqual         : I32EnumAttrCase<"OpUGreaterThanEqual", 174>;
-def SPV_OC_OpSGreaterThanEqual         : I32EnumAttrCase<"OpSGreaterThanEqual", 175>;
-def SPV_OC_OpULessThan                 : I32EnumAttrCase<"OpULessThan", 176>;
-def SPV_OC_OpSLessThan                 : I32EnumAttrCase<"OpSLessThan", 177>;
-def SPV_OC_OpULessThanEqual            : I32EnumAttrCase<"OpULessThanEqual", 178>;
-def SPV_OC_OpSLessThanEqual            : I32EnumAttrCase<"OpSLessThanEqual", 179>;
-def SPV_OC_OpFOrdEqual                 : I32EnumAttrCase<"OpFOrdEqual", 180>;
-def SPV_OC_OpFUnordEqual               : I32EnumAttrCase<"OpFUnordEqual", 181>;
-def SPV_OC_OpFOrdNotEqual              : I32EnumAttrCase<"OpFOrdNotEqual", 182>;
-def SPV_OC_OpFUnordNotEqual            : I32EnumAttrCase<"OpFUnordNotEqual", 183>;
-def SPV_OC_OpFOrdLessThan              : I32EnumAttrCase<"OpFOrdLessThan", 184>;
-def SPV_OC_OpFUnordLessThan            : I32EnumAttrCase<"OpFUnordLessThan", 185>;
-def SPV_OC_OpFOrdGreaterThan           : I32EnumAttrCase<"OpFOrdGreaterThan", 186>;
-def SPV_OC_OpFUnordGreaterThan         : I32EnumAttrCase<"OpFUnordGreaterThan", 187>;
-def SPV_OC_OpFOrdLessThanEqual         : I32EnumAttrCase<"OpFOrdLessThanEqual", 188>;
-def SPV_OC_OpFUnordLessThanEqual       : I32EnumAttrCase<"OpFUnordLessThanEqual", 189>;
-def SPV_OC_OpFOrdGreaterThanEqual      : I32EnumAttrCase<"OpFOrdGreaterThanEqual", 190>;
-def SPV_OC_OpFUnordGreaterThanEqual    : I32EnumAttrCase<"OpFUnordGreaterThanEqual", 191>;
-def SPV_OC_OpShiftRightLogical         : I32EnumAttrCase<"OpShiftRightLogical", 194>;
-def SPV_OC_OpShiftRightArithmetic      : I32EnumAttrCase<"OpShiftRightArithmetic", 195>;
-def SPV_OC_OpShiftLeftLogical          : I32EnumAttrCase<"OpShiftLeftLogical", 196>;
-def SPV_OC_OpBitwiseOr                 : I32EnumAttrCase<"OpBitwiseOr", 197>;
-def SPV_OC_OpBitwiseXor                : I32EnumAttrCase<"OpBitwiseXor", 198>;
-def SPV_OC_OpBitwiseAnd                : I32EnumAttrCase<"OpBitwiseAnd", 199>;
-def SPV_OC_OpNot                       : I32EnumAttrCase<"OpNot", 200>;
-def SPV_OC_OpBitFieldInsert            : I32EnumAttrCase<"OpBitFieldInsert", 201>;
-def SPV_OC_OpBitFieldSExtract          : I32EnumAttrCase<"OpBitFieldSExtract", 202>;
-def SPV_OC_OpBitFieldUExtract          : I32EnumAttrCase<"OpBitFieldUExtract", 203>;
-def SPV_OC_OpBitReverse                : I32EnumAttrCase<"OpBitReverse", 204>;
-def SPV_OC_OpBitCount                  : I32EnumAttrCase<"OpBitCount", 205>;
-def SPV_OC_OpControlBarrier            : I32EnumAttrCase<"OpControlBarrier", 224>;
-def SPV_OC_OpMemoryBarrier             : I32EnumAttrCase<"OpMemoryBarrier", 225>;
-def SPV_OC_OpAtomicCompareExchangeWeak : I32EnumAttrCase<"OpAtomicCompareExchangeWeak", 231>;
-def SPV_OC_OpPhi                       : I32EnumAttrCase<"OpPhi", 245>;
-def SPV_OC_OpLoopMerge                 : I32EnumAttrCase<"OpLoopMerge", 246>;
-def SPV_OC_OpSelectionMerge            : I32EnumAttrCase<"OpSelectionMerge", 247>;
-def SPV_OC_OpLabel                     : I32EnumAttrCase<"OpLabel", 248>;
-def SPV_OC_OpBranch                    : I32EnumAttrCase<"OpBranch", 249>;
-def SPV_OC_OpBranchConditional         : I32EnumAttrCase<"OpBranchConditional", 250>;
-def SPV_OC_OpReturn                    : I32EnumAttrCase<"OpReturn", 253>;
-def SPV_OC_OpReturnValue               : I32EnumAttrCase<"OpReturnValue", 254>;
-def SPV_OC_OpUnreachable               : I32EnumAttrCase<"OpUnreachable", 255>;
-def SPV_OC_OpModuleProcessed           : I32EnumAttrCase<"OpModuleProcessed", 330>;
-def SPV_OC_OpGroupNonUniformBallot     : I32EnumAttrCase<"OpGroupNonUniformBallot", 339>;
-def SPV_OC_OpSubgroupBallotKHR         : I32EnumAttrCase<"OpSubgroupBallotKHR", 4421>;
-
-def SPV_OpcodeAttr :
-    I32EnumAttr<"Opcode", "valid SPIR-V instructions", [
-      SPV_OC_OpNop, SPV_OC_OpUndef, SPV_OC_OpSourceContinued, SPV_OC_OpSource,
-      SPV_OC_OpSourceExtension, SPV_OC_OpName, SPV_OC_OpMemberName, SPV_OC_OpString,
-      SPV_OC_OpExtension, SPV_OC_OpExtInstImport, SPV_OC_OpExtInst,
-      SPV_OC_OpMemoryModel, SPV_OC_OpEntryPoint, SPV_OC_OpExecutionMode,
-      SPV_OC_OpCapability, SPV_OC_OpTypeVoid, SPV_OC_OpTypeBool, SPV_OC_OpTypeInt,
-      SPV_OC_OpTypeFloat, SPV_OC_OpTypeVector, SPV_OC_OpTypeArray,
-      SPV_OC_OpTypeRuntimeArray, SPV_OC_OpTypeStruct, SPV_OC_OpTypePointer,
-      SPV_OC_OpTypeFunction, SPV_OC_OpConstantTrue, SPV_OC_OpConstantFalse,
-      SPV_OC_OpConstant, SPV_OC_OpConstantComposite, SPV_OC_OpConstantNull,
-      SPV_OC_OpSpecConstantTrue, SPV_OC_OpSpecConstantFalse, SPV_OC_OpSpecConstant,
-      SPV_OC_OpSpecConstantComposite, SPV_OC_OpFunction, SPV_OC_OpFunctionParameter,
-      SPV_OC_OpFunctionEnd, SPV_OC_OpFunctionCall, SPV_OC_OpVariable, SPV_OC_OpLoad,
-      SPV_OC_OpStore, SPV_OC_OpAccessChain, SPV_OC_OpDecorate,
-      SPV_OC_OpMemberDecorate, SPV_OC_OpCompositeConstruct,
-      SPV_OC_OpCompositeExtract, SPV_OC_OpCompositeInsert, SPV_OC_OpConvertFToU,
-      SPV_OC_OpConvertFToS, SPV_OC_OpConvertSToF, SPV_OC_OpConvertUToF,
-      SPV_OC_OpUConvert, SPV_OC_OpSConvert, SPV_OC_OpFConvert, SPV_OC_OpBitcast,
-      SPV_OC_OpFNegate, SPV_OC_OpIAdd, SPV_OC_OpFAdd, SPV_OC_OpISub, SPV_OC_OpFSub,
-      SPV_OC_OpIMul, SPV_OC_OpFMul, SPV_OC_OpUDiv, SPV_OC_OpSDiv, SPV_OC_OpFDiv,
-      SPV_OC_OpUMod, SPV_OC_OpSRem, SPV_OC_OpSMod, SPV_OC_OpFRem, SPV_OC_OpFMod,
-      SPV_OC_OpLogicalEqual, SPV_OC_OpLogicalNotEqual, SPV_OC_OpLogicalOr,
-      SPV_OC_OpLogicalAnd, SPV_OC_OpLogicalNot, SPV_OC_OpSelect, SPV_OC_OpIEqual,
-      SPV_OC_OpINotEqual, SPV_OC_OpUGreaterThan, SPV_OC_OpSGreaterThan,
-      SPV_OC_OpUGreaterThanEqual, SPV_OC_OpSGreaterThanEqual, SPV_OC_OpULessThan,
-      SPV_OC_OpSLessThan, SPV_OC_OpULessThanEqual, SPV_OC_OpSLessThanEqual,
-      SPV_OC_OpFOrdEqual, SPV_OC_OpFUnordEqual, SPV_OC_OpFOrdNotEqual,
-      SPV_OC_OpFUnordNotEqual, SPV_OC_OpFOrdLessThan, SPV_OC_OpFUnordLessThan,
-      SPV_OC_OpFOrdGreaterThan, SPV_OC_OpFUnordGreaterThan,
-      SPV_OC_OpFOrdLessThanEqual, SPV_OC_OpFUnordLessThanEqual,
-      SPV_OC_OpFOrdGreaterThanEqual, SPV_OC_OpFUnordGreaterThanEqual,
-      SPV_OC_OpShiftRightLogical, SPV_OC_OpShiftRightArithmetic,
-      SPV_OC_OpShiftLeftLogical, SPV_OC_OpBitwiseOr, SPV_OC_OpBitwiseXor,
-      SPV_OC_OpBitwiseAnd, SPV_OC_OpNot, SPV_OC_OpBitFieldInsert,
-      SPV_OC_OpBitFieldSExtract, SPV_OC_OpBitFieldUExtract, SPV_OC_OpBitReverse,
-      SPV_OC_OpBitCount, SPV_OC_OpControlBarrier, SPV_OC_OpMemoryBarrier,
-      SPV_OC_OpAtomicCompareExchangeWeak, SPV_OC_OpPhi, SPV_OC_OpLoopMerge,
-      SPV_OC_OpSelectionMerge, SPV_OC_OpLabel, SPV_OC_OpBranch,
-      SPV_OC_OpBranchConditional, SPV_OC_OpReturn, SPV_OC_OpReturnValue,
-      SPV_OC_OpUnreachable, SPV_OC_OpModuleProcessed, SPV_OC_OpGroupNonUniformBallot,
-      SPV_OC_OpSubgroupBallotKHR
-      ]> {
-    let cppNamespace = "::mlir::spirv";
-}
-
-// End opcode section. Generated from SPIR-V spec; DO NOT MODIFY!
-
-//===----------------------------------------------------------------------===//
-// SPIR-V op definitions
-//===----------------------------------------------------------------------===//
-
-// Base class for all SPIR-V ops.
-class SPV_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<SPV_Dialect, mnemonic, traits> {
-
-  // For each SPIR-V op, the following static functions need to be defined
-  // in SPVOps.cpp:
-  //
-  // * static ParseResult parse<op-c++-class-name>(OpAsmParser &parser,
-  //                                               OperationState &result)
-  // * static void print(OpAsmPrinter &p, <op-c++-class-name> op)
-  // * static LogicalResult verify(<op-c++-class-name> op)
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-  let printer = [{ return ::print(*this, p); }];
-  let verifier = [{ return ::verify(*this); }];
-
-  // Specifies whether this op has a direct corresponding SPIR-V binary
-  // instruction opcode. The (de)serializer use this field to determine whether
-  // to auto-generate an entry in the (de)serialization dispatch table for this
-  // op.
-  bit hasOpcode = 1;
-
-  // Name of the corresponding SPIR-V op. Only valid to use when hasOpcode is 1.
-  string spirvOpName = "Op" # mnemonic;
-
-  // Controls whether to auto-generate this op's (de)serialization method.
-  // If set, it results in generation of the following methods:
-  //
-  // ```c++
-  // template<typename OpTy> Serializer::processOp(OpTy op);
-  // template<typename OpTy> Deserializer::processOp(ArrayRef<uint32_t>);
-  // ```
-  //
-  // If this field is not set, then manual implementation of a specialization of
-  // these methods is required.
-  //
-  // Note:
-  // 1) If hasOpcode is set but autogenSerialization is not set, the
-  //    (de)serializer dispatch method still calls the above method for
-  //    (de)serializing this op.
-  // 2) If hasOpcode is not set, but autogenSerialization is set, the
-  //    above methods for (de)serialization are generated, but there is no
-  //    entry added in the dispatch tables to invoke these methods. The
-  //    dispatch needs to be handled manually. SPV_ExtInstOps are an
-  //    example of this.
-  bit autogenSerialization = 1;
-}
-
-class SPV_UnaryOp<string mnemonic, Type resultType, Type operandType,
-                  list<OpTrait> traits = []> :
-      SPV_Op<mnemonic, traits> {
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<operandType>:$operand
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOf<resultType>:$result
-  );
-
-  let parser = [{ return ::parseUnaryOp(parser, result); }];
-  let printer = [{ return ::printUnaryOp(getOperation(), p); }];
-  // No additional verification needed in addition to the ODS-generated ones.
-  let verifier = [{ return success(); }];
-}
-
-class SPV_BinaryOp<string mnemonic, Type resultType, Type operandsType,
-                   list<OpTrait> traits = []> :
-      SPV_Op<mnemonic, traits> {
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<operandsType>:$operand1,
-    SPV_ScalarOrVectorOf<operandsType>:$operand2
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOf<resultType>:$result
-  );
-
-  let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }];
-  let printer = [{ return impl::printOneResultOp(getOperation(), p); }];
-  // No additional verification needed in addition to the ODS-generated ones.
-  let verifier = [{ return success(); }];
-}
-
-class SPV_ExtInstOp<string mnemonic, string setPrefix, string setName,
-                    int opcode, list<OpTrait> traits = []> :
-  SPV_Op<setPrefix # "." # mnemonic, traits> {
-
-  // Extended instruction sets have no direct opcode (they share the
-  // same `OpExtInst` instruction). So the hasOpcode field is set to
-  // false. So no entry corresponding to these ops are added in the
-  // dispatch functions for (de)serialization. The methods for
-  // (de)serialization are still automatically generated (since
-  // autogenSerialization remains 1). A separate method is generated
-  // for dispatching extended instruction set ops.
-  let hasOpcode = 0;
-
-  // Opcode within extended instruction set.
-  int extendedInstOpcode = opcode;
-
-  // Name used to import the extended instruction set.
-  string extendedInstSetName = setName;
-}
-
-#endif // SPIRV_BASE
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBinaryUtils.h b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBinaryUtils.h
deleted file mode 100644
index 3229e28ef1a..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBinaryUtils.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- SPIRVBinaryUtils.cpp - SPIR-V Binary Module Utils --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares common utilities for SPIR-V binary module.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SPIRV_SPIRV_BINARY_UTILS_H_
-#define MLIR_DIALECT_SPIRV_SPIRV_BINARY_UTILS_H_
-
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Support/LogicalResult.h"
-
-#include <cstdint>
-
-namespace mlir {
-namespace spirv {
-
-/// SPIR-V binary header word count
-constexpr unsigned kHeaderWordCount = 5;
-
-/// SPIR-V magic number
-constexpr uint32_t kMagicNumber = 0x07230203;
-
-/// The serializer tool ID registered to the Khronos Group
-constexpr uint32_t kGeneratorNumber = 22;
-
-/// Auto-generated getOpcode<*Op>() specializations
-#define GET_SPIRV_SERIALIZATION_UTILS
-#include "mlir/Dialect/SPIRV/SPIRVSerialization.inc"
-
-/// Appends a SPRI-V module header to `header` with the given `idBound`.
-void appendModuleHeader(SmallVectorImpl<uint32_t> &header, uint32_t idBound);
-
-/// Returns the word-count-prefixed opcode for an SPIR-V instruction.
-uint32_t getPrefixedOpcode(uint32_t wordCount, spirv::Opcode opcode);
-
-/// Encodes an SPIR-V `literal` string into the given `binary` vector.
-LogicalResult encodeStringLiteralInto(SmallVectorImpl<uint32_t> &binary,
-                                      StringRef literal);
-} // end namespace spirv
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_SPIRV_BINARY_UTILS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBitOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBitOps.td
deleted file mode 100644
index d76a1e3854b..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBitOps.td
+++ /dev/null
@@ -1,532 +0,0 @@
-//===-- SPIRVBitOps.td - MLIR SPIR-V Bit Ops -*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains bit ops for the SPIR-V dialect. It corresponds
-// to "3.32.13. Bit Instructions" of the SPIR-V specification.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_BIT_OPS
-#define SPIRV_BIT_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-class SPV_BitBinaryOp<string mnemonic, list<OpTrait> traits = []> :
-      // All the operands type used in bit instructions are SPV_Integer.
-      SPV_BinaryOp<mnemonic, SPV_Integer, SPV_Integer,
-                   !listconcat(traits,
-                               [NoSideEffect, SameOperandsAndResultType])>;
-
-class SPV_BitFieldExtractOp<string mnemonic, list<OpTrait> traits = []> :
-      SPV_Op<mnemonic, !listconcat(traits, [NoSideEffect])> {
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<SPV_Integer>:$base,
-    SPV_Integer:$offset,
-    SPV_Integer:$count
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOf<SPV_Integer>:$result
-  );
-
-  let parser = [{ return ::parseBitFieldExtractOp(parser, result); }];
-  let printer = [{ ::printBitFieldExtractOp(this->getOperation(), p); }];
-  let verifier = [{ return ::verifyBitFieldExtractOp(this->getOperation()); }];
-}
-
-class SPV_BitUnaryOp<string mnemonic, list<OpTrait> traits = []> :
-      SPV_UnaryOp<mnemonic, SPV_Integer, SPV_Integer,
-                   !listconcat(traits,
-                               [NoSideEffect, SameOperandsAndResultType])>;
-
-class SPV_ShiftOp<string mnemonic, list<OpTrait> traits = []> :
-      SPV_BinaryOp<mnemonic, SPV_Integer, SPV_Integer,
-                   !listconcat(traits,
-                               [NoSideEffect, SameOperandsAndResultShape])> {
-  let parser = [{ return ::parseShiftOp(parser, result); }];
-  let printer = [{ ::printShiftOp(this->getOperation(), p); }];
-  let verifier = [{ return ::verifyShiftOp(this->getOperation()); }];
-}
-
-// -----
-
-def SPV_BitCountOp : SPV_BitUnaryOp<"BitCount", []> {
-  let summary = "Count the number of set bits in an object.";
-
-  let description = [{
-     Results are computed per component.
-
-    Result Type must be a scalar or vector of integer type.  The components
-    must be wide enough to hold the unsigned Width of Base as an unsigned
-    value. That is, no sign bit is needed or counted when checking for a
-    wide enough result width.
-
-    Base must be a scalar or vector of integer type.  It must have the same
-    number of components as Result Type.
-
-    The result is the unsigned value that is the number of bits in Base that
-    are 1.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    bitcount-op ::= ssa-id `=` `spv.BitCount` ssa-use
-                               `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.BitCount %0: i32
-    %3 = spv.BitCount %1: vector<4xi32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_BitFieldInsertOp : SPV_Op<"BitFieldInsert", [NoSideEffect]> {
-  let summary = [{
-    Make a copy of an object, with a modified bit field that comes from
-    another object.
-  }];
-
-  let description = [{
-     Results are computed per component.
-
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Base and Insert must be the same as Result Type.
-
-    Any result bits numbered outside [Offset, Offset + Count -  1]
-    (inclusive) will come from the corresponding bits in Base.
-
-    Any result bits numbered in [Offset, Offset + Count -  1] come, in
-    order, from the bits numbered [0, Count - 1] of Insert.
-
-    Count  must be an integer type scalar. Count is the number of bits taken
-    from Insert. It will be consumed as an unsigned value. Count can be 0,
-    in which case the result will be Base.
-
-    Offset  must be an integer type scalar. Offset is the lowest-order bit
-    of the bit field.  It will be consumed as an unsigned value.
-
-    The resulting value is undefined if Count or Offset or their sum is
-    greater than the number of bits in the result.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    bitfield-insert-op ::= ssa-id `=` `spv.BitFieldInsert` ssa-use `,` ssa-use
-                                      `,` ssa-use `,` ssa-use
-                                      `:` integer-scalar-vector-type
-                                      `,` integer-type `,` integer-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.BitFieldInsert %base, %insert, %offset, %count : vector<3xi32>, i8, i8
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<SPV_Integer>:$base,
-    SPV_ScalarOrVectorOf<SPV_Integer>:$insert,
-    SPV_Integer:$offset,
-    SPV_Integer:$count
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOf<SPV_Integer>:$result
-  );
-}
-
-// -----
-
-def SPV_BitFieldSExtractOp : SPV_BitFieldExtractOp<"BitFieldSExtract", []> {
-  let summary = "Extract a bit field from an object, with sign extension.";
-
-  let description = [{
-     Results are computed per component.
-
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Base must be the same as Result Type.
-
-    If Count is greater than 0: The bits of Base numbered in [Offset, Offset
-    + Count -  1] (inclusive) become the bits numbered [0, Count - 1] of the
-    result. The remaining bits of the result will all be the same as bit
-    Offset + Count -  1 of Base.
-
-    Count  must be an integer type scalar. Count is the number of bits
-    extracted from Base. It will be consumed as an unsigned value. Count can
-    be 0, in which case the result will be 0.
-
-    Offset  must be an integer type scalar. Offset is the lowest-order bit
-    of the bit field to extract from Base.  It will be consumed as an
-    unsigned value.
-
-    The resulting value is undefined if Count or Offset or their sum is
-    greater than the number of bits in the result.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    bitfield-extract-s-op ::= ssa-id `=` `spv.BitFieldSExtract` ssa-use
-                                         `,` ssa-use `,` ssa-use
-                                         `:` integer-scalar-vector-type
-                                         `,` integer-type `,` integer-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.BitFieldSExtract %base, %offset, %count : vector<3xi32>, i8, i8
-    ```
-  }];
-}
-
-// -----
-
-def SPV_BitFieldUExtractOp : SPV_BitFieldExtractOp<"BitFieldUExtract", []> {
-  let summary = "Extract a bit field from an object, without sign extension.";
-
-  let description = [{
-    The semantics are the same as with OpBitFieldSExtract with the exception
-    that there is no sign extension. The remaining bits of the result will
-    all be 0.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    bitfield-extract-u-op ::= ssa-id `=` `spv.BitFieldUExtract` ssa-use
-                                         `,` ssa-use `,` ssa-use
-                                         `:` integer-scalar-vector-type
-                                         `,` integer-type `,` integer-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.BitFieldUExtract %base, %offset, %count : vector<3xi32>, i8, i8
-    ```
-  }];
-}
-
-// -----
-
-def SPV_BitReverseOp : SPV_BitUnaryOp<"BitReverse", []> {
-  let summary = "Reverse the bits in an object.";
-
-  let description = [{
-     Results are computed per component.
-
-    Result Type must be a scalar or vector of integer type.
-
-     The type of Base must be the same as Result Type.
-
-    The bit-number n of the result will be taken from bit-number Width - 1 -
-    n of Base, where Width is the OpTypeInt operand of the Result Type.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                   `vector<` integer-literal `x` integer-type `>`
-    bitreverse-op ::= ssa-id `=` `spv.BitReverse` ssa-use
-                                 `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.BitReverse %0 : i32
-    %3 = spv.BitReverse %1 : vector<4xi32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_BitwiseAndOp : SPV_BitBinaryOp<"BitwiseAnd", [Commutative]> {
-  let summary = [{
-    Result is 1 if both Operand 1 and Operand 2 are 1. Result is 0 if either
-    Operand 1 or Operand 2 are 0.
-  }];
-
-  let description = [{
-     Results are computed per component, and within each component, per bit.
-
-    Result Type must be a scalar or vector of integer type.  The type of
-    Operand 1 and Operand 2  must be a scalar or vector of integer type.
-    They must have the same number of components as Result Type. They must
-    have the same component width as Result Type.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    bitwise-and-op ::= ssa-id `=` `spv.BitwiseAnd` ssa-use, ssa-use
-                                  `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.BitwiseAnd %0, %1 : i32
-    %2 = spv.BitwiseAnd %0, %1 : vector<4xi32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_BitwiseOrOp : SPV_BitBinaryOp<"BitwiseOr", [Commutative]> {
-  let summary = [{
-    Result is 1 if either Operand 1 or Operand 2 is 1. Result is 0 if both
-    Operand 1 and Operand 2 are 0.
-  }];
-
-  let description = [{
-     Results are computed per component, and within each component, per bit.
-
-    Result Type must be a scalar or vector of integer type.  The type of
-    Operand 1 and Operand 2  must be a scalar or vector of integer type.
-    They must have the same number of components as Result Type. They must
-    have the same component width as Result Type.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    bitwise-or-op ::= ssa-id `=` `spv.BitwiseOr` ssa-use, ssa-use
-                                  `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.BitwiseOr %0, %1 : i32
-    %2 = spv.BitwiseOr %0, %1 : vector<4xi32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_BitwiseXorOp : SPV_BitBinaryOp<"BitwiseXor", [Commutative]> {
-  let summary = [{
-    Result is 1 if exactly one of Operand 1 or Operand 2 is 1. Result is 0
-    if Operand 1 and Operand 2 have the same value.
-  }];
-
-  let description = [{
-     Results are computed per component, and within each component, per bit.
-
-    Result Type must be a scalar or vector of integer type.  The type of
-    Operand 1 and Operand 2  must be a scalar or vector of integer type.
-    They must have the same number of components as Result Type. They must
-    have the same component width as Result Type.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    bitwise-xor-op ::= ssa-id `=` `spv.BitwiseXor` ssa-use, ssa-use
-                                  `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.BitwiseXor %0, %1 : i32
-    %2 = spv.BitwiseXor %0, %1 : vector<4xi32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ShiftLeftLogicalOp : SPV_ShiftOp<"ShiftLeftLogical", []> {
-  let summary = [{
-    Shift the bits in Base left by the number of bits specified in Shift.
-    The least-significant bits will be zero filled.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of each Base and Shift must be a scalar or vector of integer
-    type. Base and Shift must have the same number of components.  The
-    number of components and bit width of the type of Base must be the same
-    as in Result Type.
-
-    Shift is treated as unsigned. The result is undefined if Shift is
-    greater than or equal to the bit width of the components of Base.
-
-    The number of components and bit width of Result Type must match those
-    Base type. All types must be integer types.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    shift-left-logical-op ::= ssa-id `=` `spv.ShiftLeftLogical`
-                                          ssa-use `,` ssa-use `:`
-                                          integer-scalar-vector-type `,`
-                                          integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.ShiftLeftLogical %0, %1 : i32, i16
-    %5 = spv.ShiftLeftLogical %3, %4 : vector<3xi32>, vector<3xi16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ShiftRightArithmeticOp : SPV_ShiftOp<"ShiftRightArithmetic", []> {
-  let summary = [{
-    Shift the bits in Base right by the number of bits specified in Shift.
-    The most-significant bits will be filled with the sign bit from Base.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of each Base and Shift must be a scalar or vector of integer
-    type. Base and Shift must have the same number of components.  The
-    number of components and bit width of the type of Base must be the same
-    as in Result Type.
-
-    Shift is treated as unsigned. The result is undefined if Shift is
-    greater than or equal to the bit width of the components of Base.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    shift-right-arithmetic-op ::= ssa-id `=` `spv.ShiftRightArithmetic`
-                                              ssa-use `,` ssa-use `:`
-                                              integer-scalar-vector-type `,`
-                                              integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.ShiftRightArithmetic %0, %1 : i32, i16
-    %5 = spv.ShiftRightArithmetic %3, %4 : vector<3xi32>, vector<3xi16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ShiftRightLogicalOp : SPV_ShiftOp<"ShiftRightLogical", []> {
-  let summary = [{
-    Shift the bits in Base right by the number of bits specified in Shift.
-    The most-significant bits will be zero filled.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-     The type of each Base and Shift must be a scalar or vector of integer
-    type. Base and Shift must have the same number of components.  The
-    number of components and bit width of the type of Base must be the same
-    as in Result Type.
-
-    Shift is consumed as an unsigned integer. The result is undefined if
-    Shift is greater than or equal to the bit width of the components of
-    Base.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    shift-right-logical-op ::= ssa-id `=` `spv.ShiftRightLogical`
-                                           ssa-use `,` ssa-use `:`
-                                           integer-scalar-vector-type `,`
-                                           integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.ShiftRightLogical %0, %1 : i32, i16
-    %5 = spv.ShiftRightLogical %3, %4 : vector<3xi32>, vector<3xi16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_NotOp : SPV_BitUnaryOp<"Not", []> {
-  let summary = "Complement the bits of Operand.";
-
-  let description = [{
-     Results are computed per component, and within each component, per bit.
-
-    Result Type must be a scalar or vector of integer type.
-
-    Operand’s type  must be a scalar or vector of integer type.  It must
-    have the same number of components as Result Type.  The component width
-    must equal the component width in Result Type.
-
-    ### Custom assembly form
-
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                  `vector<` integer-literal `x` integer-type `>`
-    not-op ::= ssa-id `=` `spv.BitNot` ssa-use `:` integer-scalar-vector-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.Not %0 : i32
-    %3 = spv.Not %1 : vector<4xi32>
-    ```
-  }];
-}
-
-#endif // SPIRV_BIT_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCastOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCastOps.td
deleted file mode 100644
index e4fe526e420..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCastOps.td
+++ /dev/null
@@ -1,334 +0,0 @@
-//===-- SPIRVCastOps.td - MLIR SPIR-V Cast Ops -------*- tablegen -*-------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains cast ops for the SPIR-V dialect. It corresponds
-// to "3.32.11. Convertion Instructions" of the SPIR-V specification.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_CAST_OPS
-#define SPIRV_CAST_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-class SPV_CastOp<string mnemonic, Type resultType, Type operandType,
-                 list<OpTrait> traits = []> :
-      SPV_Op<mnemonic,
-             !listconcat(traits,
-                         [NoSideEffect, SameOperandsAndResultShape])> {
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<operandType>:$operand
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOf<resultType>:$result
-  );
-
-  let parser = [{ return mlir::impl::parseCastOp(parser, result); }];
-  let printer = [{ mlir::impl::printCastOp(this->getOperation(), p); }];
-  let verifier = [{ return verifyCastOp(this->getOperation()); }];
-}
-
-// -----
-
-def SPV_BitcastOp : SPV_Op<"Bitcast", [NoSideEffect]> {
-  let summary = "Bit pattern-preserving type conversion.";
-
-  let description = [{
-    Result Type must be an OpTypePointer, or a scalar or vector of
-    numerical-type.
-
-    Operand must have a type of OpTypePointer, or a scalar or vector of
-    numerical-type. It must be a different type than Result Type.
-
-    If either Result Type or Operand is a pointer, the other must be a
-    pointer (diverges from the SPIR-V spec).
-
-    If Result Type has a different number of components than Operand, the
-    total number of bits in Result Type must equal the total number of bits
-    in Operand. Let L be the type, either Result Type or Operand’s type,
-    that has the larger number of components. Let S be the other type, with
-    the smaller number of components. The number of components in L must be
-    an integer multiple of the number of components in S. The first
-    component (that is, the only or lowest-numbered component) of S maps to
-    the first components of L, and so on,  up to the last component of S
-    mapping to the last components of L. Within this mapping, any single
-    component of S (mapping to multiple components of L) maps its lower-
-    ordered bits to the lower-numbered components of L.
-
-    ### Custom assembly form
-
-    ```
-    bitcast-op ::= ssa-id `=` `spv.Bitcast` ssa-use
-                   `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.Bitcast %0 : f32 to i32
-    %1 = spv.Bitcast %0 : vector<2xf32> to i64
-    %1 = spv.Bitcast %0 : !spv.ptr<f32, Function> to !spv.ptr<i32, Function>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_ScalarOrVectorOrPtr:$operand
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOrPtr:$result
-  );
-
-  let parser = [{ return mlir::impl::parseCastOp(parser, result); }];
-  let printer = [{ mlir::impl::printCastOp(this->getOperation(), p); }];
-
-  let hasCanonicalizer = 1;
-}
-
-// -----
-
-def SPV_ConvertFToSOp : SPV_CastOp<"ConvertFToS", SPV_Integer, SPV_Float, []> {
-  let summary = [{
-    Convert value numerically from floating point to signed integer, with
-    round toward 0.0.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-    Float Value must be a scalar or vector of floating-point type.  It must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    convert-f-to-s-op ::= ssa-id `=` `spv.ConvertFToSOp` ssa-use
-                          `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.ConvertFToS %0 : f32 to i32
-    %3 = spv.ConvertFToS %2 : vector<3xf32> to vector<3xi32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ConvertFToUOp : SPV_CastOp<"ConvertFToU", SPV_Integer, SPV_Float, []> {
-  let summary = [{
-    Convert value numerically from floating point to unsigned integer, with
-    round toward 0.0.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type, whose Signedness
-    operand is 0.
-
-    Float Value must be a scalar or vector of floating-point type.  It must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    convert-f-to-u-op ::= ssa-id `=` `spv.ConvertFToUOp` ssa-use
-                          `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.ConvertFToU %0 : f32 to i32
-    %3 = spv.ConvertFToU %2 : vector<3xf32> to vector<3xi32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ConvertSToFOp : SPV_CastOp<"ConvertSToF", SPV_Float, SPV_Integer, []> {
-  let summary = [{
-    Convert value numerically from signed integer to floating point.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-    Signed Value must be a scalar or vector of integer type.  It must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    convert-s-to-f-op ::= ssa-id `=` `spv.ConvertSToFOp` ssa-use
-                          `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.ConvertSToF %0 : i32 to f32
-    %3 = spv.ConvertSToF %2 : vector<3xi32> to vector<3xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ConvertUToFOp : SPV_CastOp<"ConvertUToF", SPV_Float, SPV_Integer, []> {
-  let summary = [{
-    Convert value numerically from unsigned integer to floating point.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-    Unsigned Value must be a scalar or vector of integer type.  It must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    convert-u-to-f-op ::= ssa-id `=` `spv.ConvertUToFOp` ssa-use
-                          `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.ConvertUToF %0 : i32 to f32
-    %3 = spv.ConvertUToF %2 : vector<3xi32> to vector<3xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FConvertOp : SPV_CastOp<"FConvert", SPV_Float, SPV_Float, []> {
-  let summary = [{
-    Convert value numerically from one floating-point width to another
-    width.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of floating-point type.
-
-    Float Value must be a scalar or vector of floating-point type.  It must
-    have the same number of components as Result Type.  The component width
-    cannot equal the component width in Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    f-convert-op ::= ssa-id `=` `spv.FConvertOp` ssa-use
-                     `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.FConvertOp %0 : f32 to f64
-    %3 = spv.FConvertOp %2 : vector<3xf32> to vector<3xf64>
-    ```
-  }];
-
-  let verifier = [{ return verifyCastOp(this->getOperation(), false); }];
-}
-
-// -----
-
-def SPV_SConvertOp : SPV_CastOp<"SConvert", SPV_Integer, SPV_Integer, []> {
-  let summary = [{
-    Convert signed width.  This is either a truncate or a sign extend.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type.
-
-    Signed Value must be a scalar or vector of integer type.  It must have
-    the same number of components as Result Type.  The component width
-    cannot equal the component width in Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    s-convert-op ::= ssa-id `=` `spv.SConvertOp` ssa-use
-                     `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.SConvertOp %0 : i32 to i64
-    %3 = spv.SConvertOp %2 : vector<3xi32> to vector<3xi64>
-    ```
-  }];
-
-  let verifier = [{ return verifyCastOp(this->getOperation(), false); }];
-}
-
-// -----
-
-def SPV_UConvertOp : SPV_CastOp<"UConvert", SPV_Integer, SPV_Integer, []> {
-  let summary = [{
-    Convert unsigned width. This is either a truncate or a zero extend.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of integer type, whose Signedness
-    operand is 0.
-
-    Unsigned Value must be a scalar or vector of integer type.  It must have
-    the same number of components as Result Type.  The component width
-    cannot equal the component width in Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    u-convert-op ::= ssa-id `=` `spv.UConvertOp` ssa-use
-                 `:` operand-type `to` result-type
-    ```
-
-    For example:
-
-    ```
-    %1 = spv.UConvertOp %0 : i32 to i64
-    %3 = spv.UConvertOp %2 : vector<3xi32> to vector<3xi64>
-    ```
-  }];
-
-  let verifier = [{ return verifyCastOp(this->getOperation(), false); }];
-}
-
-#endif // SPIRV_CAST_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td
deleted file mode 100644
index d6e2e1c6fda..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td
+++ /dev/null
@@ -1,175 +0,0 @@
-//===-- SPIRVCompositeOps.td - MLIR SPIR-V Composite Ops ---*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains composite ops for SPIR-V dialect. It corresponds
-// to "3.32.12. Composite Instructions" of the SPIR-V spec.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_COMPOSITE_OPS
-#define SPIRV_COMPOSITE_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-// -----
-
-def SPV_CompositeConstructOp : SPV_Op<"CompositeConstruct", [NoSideEffect]> {
-  let summary = [{
-    Construct a new composite object from a set of constituent objects that
-    will fully form it.
-  }];
-
-  let description = [{
-    Result Type must be a composite type, whose top-level
-    members/elements/components/columns have the same type as the types of
-    the operands, with one exception. The exception is that for constructing
-    a vector, the operands may also be vectors with the same component type
-    as the Result Type component type. When constructing a vector, the total
-    number of components in all the operands must equal the number of
-    components in Result Type.
-
-    Constituents will become members of a structure, or elements of an
-    array, or components of a vector, or columns of a matrix. There must be
-    exactly one Constituent for each top-level
-    member/element/component/column of the result, with one exception. The
-    exception is that for constructing a vector, a contiguous subset of the
-    scalars consumed can be represented by a vector operand instead. The
-    Constituents must appear in the order needed by the definition of the
-    type of the result. When constructing a vector, there must be at least
-    two Constituent operands.
-
-    ### Custom assembly form
-
-    ```
-    composite-construct-op ::= ssa-id `=` `spv.CompositeConstruct`
-                               (ssa-use (`,` ssa-use)* )? `:` composite-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.CompositeConstruct %1, %2, %3 : vector<3xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    Variadic<SPV_Type>:$constituents
-  );
-
-  let results = (outs
-    SPV_Composite:$result
-  );
-}
-
-// -----
-
-def SPV_CompositeExtractOp : SPV_Op<"CompositeExtract", [NoSideEffect]> {
-  let summary = "Extract a part of a composite object.";
-
-  let description = [{
-    Result Type must be the type of object selected by the last provided
-    index.  The instruction result is the extracted object.
-
-    Composite is the composite to extract from.
-
-    Indexes walk the type hierarchy, potentially down to component
-    granularity, to select the part to extract. All indexes must be in
-    bounds.  All composite constituents use zero-based numbering, as
-    described by their OpType… instruction.
-
-    ### Custom assembly form
-
-    ```
-    composite-extract-op ::= ssa-id `=` `spv.CompositeExtract` ssa-use
-                             `[` integer-literal (',' integer-literal)* `]`
-                             `:` composite-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.Variable : !spv.ptr<!spv.array<4x!spv.array<4xf32>>, Function>
-    %1 = spv.Load "Function" %0 ["Volatile"] : !spv.array<4x!spv.array<4xf32>>
-    %2 = spv.CompositeExtract %1[1 : i32] : !spv.array<4x!spv.array<4xf32>>
-    ```
-
-  }];
-
-  let arguments = (ins
-    SPV_Composite:$composite,
-    I32ArrayAttr:$indices
-  );
-
-  let results = (outs
-    SPV_Type:$component
-  );
-
-  let builders = [
-    OpBuilder<[{Builder *builder, OperationState &state,
-                Value *composite, ArrayRef<int32_t> indices}]>
-  ];
-
-  let hasFolder = 1;
-}
-
-// -----
-
-def SPV_CompositeInsertOp : SPV_Op<"CompositeInsert", [NoSideEffect]> {
-  let summary = [{
-    Make a copy of a composite object, while modifying one part of it.
-  }];
-
-  let description = [{
-    Result Type must be the same type as Composite.
-
-    Object is the object to use as the modified part.
-
-    Composite is the composite to copy all but the modified part from.
-
-    Indexes walk the type hierarchy of Composite to the desired depth,
-    potentially down to component granularity, to select the part to modify.
-    All indexes must be in bounds. All composite constituents use zero-based
-    numbering, as described by their OpType… instruction. The type of the
-    part selected to modify must match the type of Object.
-
-    ### Custom assembly form
-
-    ```
-    composite-insert-op ::= ssa-id `=` `spv.CompositeInsert` ssa-use, ssa-use
-                            `[` integer-literal (',' integer-literal)* `]`
-                            `:` object-type `into` composite-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.CompositeInsert %object, %composite[1 : i32] : f32 into !spv.array<4xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_Type:$object,
-    SPV_Composite:$composite,
-    I32ArrayAttr:$indices
-  );
-
-  let results = (outs
-    SPV_Composite:$result
-  );
-}
-
-#endif // SPIRV_COMPOSITE_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td
deleted file mode 100644
index 464b670dae9..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td
+++ /dev/null
@@ -1,475 +0,0 @@
-//===-- SPIRVControlFlowOps.td - SPIR-V Control Flow Ops ---*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains control flow ops for the SPIR-V dialect. It corresponds
-// to "3.32.17. Control-Flow Instructions" of the SPIR-V specification.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_CONTROLFLOW_OPS
-#define SPIRV_CONTROLFLOW_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-include "mlir/Analysis/CallInterfaces.td"
-
-// -----
-
-def SPV_BranchOp : SPV_Op<"Branch", [InFunctionScope, Terminator]> {
-  let summary = "Unconditional branch to target block.";
-
-  let description = [{
-    This instruction must be the last instruction in a block.
-
-    ### Custom assembly form
-
-    ```
-    branch-op ::= `spv.Branch` successor
-    successor ::= bb-id branch-use-list?
-    branch-use-list ::= `(` ssa-use-list `:` type-list-no-parens `)`
-    ```
-
-    For example:
-
-    ```
-    spv.Branch ^target
-    spv.Branch ^target(%0, %1: i32, f32)
-    ```
-  }];
-
-  let arguments = (ins
-    Variadic<AnyType>:$block_arguments
-  );
-
-  let results = (outs);
-
-  let builders = [
-    OpBuilder<
-      "Builder *, OperationState &state, "
-      "Block *successor, ValueRange arguments = {}", [{
-        state.addSuccessor(successor, arguments);
-      }]
-    >
-  ];
-
-  let skipDefaultBuilders = 1;
-
-  let extraClassDeclaration = [{
-    /// Returns the branch target block.
-    Block *getTarget() { return getOperation()->getSuccessor(0); }
-
-    /// Returns the block arguments.
-    operand_range getBlockArguments() {
-      return getOperation()->getSuccessorOperands(0);
-    }
-  }];
-
-  let autogenSerialization = 0;
-}
-
-// -----
-
-def SPV_BranchConditionalOp : SPV_Op<"BranchConditional",
-                                     [InFunctionScope, Terminator]> {
-  let summary = [{
-    If Condition is true, branch to true block, otherwise branch to false
-    block.
-  }];
-
-  let description = [{
-    Condition must be a Boolean type scalar.
-
-    Branch weights are unsigned 32-bit integer literals. There must be
-    either no Branch Weights or exactly two branch weights. If present, the
-    first is the weight for branching to True Label, and the second is the
-    weight for branching to False Label. The implied probability that a
-    branch is taken is its weight divided by the sum of the two Branch
-    weights. At least one weight must be non-zero. A weight of zero does not
-    imply a branch is dead or permit its removal; branch weights are only
-    hints. The two weights must not overflow a 32-bit unsigned integer when
-    added together.
-
-    This instruction must be the last instruction in a block.
-
-    ### Custom assembly form
-
-    ```
-    branch-conditional-op ::= `spv.BranchConditional` ssa-use
-                              (`[` integer-literal, integer-literal `]`)?
-                              `,` successor `,` successor
-    successor ::= bb-id branch-use-list?
-    branch-use-list ::= `(` ssa-use-list `:` type-list-no-parens `)`
-    ```
-
-    For example:
-
-    ```
-    spv.BranchConditional %condition, ^true_branch, ^false_branch
-    spv.BranchConditional %condition, ^true_branch(%0: i32), ^false_branch(%1: i32)
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_Bool:$condition,
-    Variadic<AnyType>:$branch_arguments,
-    OptionalAttr<I32ArrayAttr>:$branch_weights
-  );
-
-  let results = (outs);
-
-  let builders = [
-    OpBuilder<
-      "Builder *builder, OperationState &state, Value *condition, "
-      "Block *trueBlock, ValueRange trueArguments, "
-      "Block *falseBlock, ValueRange falseArguments, "
-      "Optional<std::pair<uint32_t, uint32_t>> weights = {}",
-      [{
-        state.addOperands(condition);
-        state.addSuccessor(trueBlock, trueArguments);
-        state.addSuccessor(falseBlock, falseArguments);
-        if (weights) {
-          auto attr =
-              builder->getI32ArrayAttr({static_cast<int32_t>(weights->first),
-                                        static_cast<int32_t>(weights->second)});
-          state.addAttribute("branch_weights", attr);
-        }
-      }]
-    >
-  ];
-
-  let skipDefaultBuilders = 1;
-
-  let autogenSerialization = 0;
-
-  let extraClassDeclaration = [{
-    /// Branch indices into the successor list.
-    enum { kTrueIndex = 0, kFalseIndex = 1 };
-
-    /// Returns the target block for the true branch.
-    Block *getTrueBlock() { return getOperation()->getSuccessor(kTrueIndex); }
-
-    /// Returns the target block for the false branch.
-    Block *getFalseBlock() { return getOperation()->getSuccessor(kFalseIndex); }
-
-    /// Returns the number of arguments to the true target block.
-    unsigned getNumTrueBlockArguments() {
-      return getNumSuccessorOperands(kTrueIndex);
-    }
-
-    /// Returns the number of arguments to the false target block.
-    unsigned getNumFalseBlockArguments() {
-      return getNumSuccessorOperands(kFalseIndex);
-    }
-
-    // Iterator and range support for true target block arguments.
-    operand_iterator true_block_argument_begin() {
-      return operand_begin() + getTrueBlockArgumentIndex();
-    }
-    operand_iterator true_block_argument_end() {
-      return true_block_argument_begin() + getNumTrueBlockArguments();
-    }
-    operand_range getTrueBlockArguments() {
-      return {true_block_argument_begin(), true_block_argument_end()};
-    }
-
-    // Iterator and range support for false target block arguments.
-    operand_iterator false_block_argument_begin() {
-      return true_block_argument_end();
-    }
-    operand_iterator false_block_argument_end() {
-      return false_block_argument_begin() + getNumFalseBlockArguments();
-    }
-    operand_range getFalseBlockArguments() {
-      return {false_block_argument_begin(), false_block_argument_end()};
-    }
-
-  private:
-    /// Gets the index of the first true block argument in the operand list.
-    unsigned getTrueBlockArgumentIndex() {
-      return 1; // Omit the first argument, which is the condition.
-    }
-
-    /// Gets the index of the first false block argument in the operand list.
-    unsigned getFalseBlockArgumentIndex() {
-      return getTrueBlockArgumentIndex() + getNumTrueBlockArguments();
-    }
-  }];
-}
-
-// -----
-
-def SPV_FunctionCallOp : SPV_Op<"FunctionCall", [
-    InFunctionScope, DeclareOpInterfaceMethods<CallOpInterface>]> {
-  let summary = "Call a function.";
-
-  let description = [{
-    Result Type is the type of the return value of the function. It must be
-    the same as the Return Type operand of the Function Type operand of the
-    Function operand.
-
-    Function is an OpFunction instruction.  This could be a forward
-    reference.
-
-    Argument N is the object to copy to parameter N of Function.
-
-    Note: A forward call is possible because there is no missing type
-    information: Result Type must match the Return Type of the function, and
-    the calling argument types must match the formal parameter types.
-
-    ### Custom assembly form
-
-    ```
-    function-call-op ::= `spv.FunctionCall` function-id `(` ssa-use-list `)`
-                     `:` function-type
-    ```
-
-    For example:
-
-    ```
-    spv.FunctionCall @f_void(%arg0) : (i32) ->  ()
-    %0 = spv.FunctionCall @f_iadd(%arg0, %arg1) : (i32, i32) -> i32
-    ```
-  }];
-
-  let arguments = (ins
-    FlatSymbolRefAttr:$callee,
-    Variadic<SPV_Type>:$arguments
-  );
-
-  let results = (outs
-    SPV_Optional<SPV_Type>:$result
-  );
-
-  let autogenSerialization = 0;
-}
-
-// -----
-
-def SPV_LoopOp : SPV_Op<"loop", [InFunctionScope]> {
-  let summary = "Define a structured loop.";
-
-  let description = [{
-    SPIR-V can explicitly declare structured control-flow constructs using merge
-    instructions. These explicitly declare a header block before the control
-    flow diverges and a merge block where control flow subsequently converges.
-    These blocks delimit constructs that must nest, and can only be entered
-    and exited in structured ways. See "2.11. Structured Control Flow" of the
-    SPIR-V spec for more details.
-
-    Instead of having a `spv.LoopMerge` op to directly model loop merge
-    instruction for indicating the merge and continue target, we use regions
-    to delimit the boundary of the loop: the merge target is the next op
-    following the `spv.loop` op and the continue target is the block that
-    has a back-edge pointing to the entry block inside the `spv.loop`'s region.
-    This way it's easier to discover all blocks belonging to a construct and
-    it plays nicer with the MLIR system.
-
-    The `spv.loop` region should contain at least four blocks: one entry block,
-    one loop header block, one loop continue block, one loop merge block.
-    The entry block should be the first block and it should jump to the loop
-    header block, which is the second block. The loop merge block should be the
-    last block. The merge block should only contain a `spv._merge` op.
-    The continue block should be the second to last block and it should have a
-    branch to the loop header block. The loop continue block should be the only
-    block, except the entry block, branching to the header block.
-  }];
-
-  let arguments = (ins
-    SPV_LoopControlAttr:$loop_control
-  );
-
-  let results = (outs);
-
-  let regions = (region AnyRegion:$body);
-
-  let builders = [OpBuilder<"Builder *builder, OperationState &state">];
-
-  let extraClassDeclaration = [{
-    // Returns the entry block.
-    Block *getEntryBlock();
-
-    // Returns the loop header block.
-    Block *getHeaderBlock();
-
-    // Returns the loop continue block.
-    Block *getContinueBlock();
-
-    // Returns the loop merge block.
-    Block *getMergeBlock();
-
-    // Adds an empty entry block and loop merge block containing one
-    // spv._merge op.
-    void addEntryAndMergeBlock();
-  }];
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-}
-
-// -----
-
-def SPV_MergeOp : SPV_Op<"_merge", [Terminator]> {
-  let summary = "A special terminator for merging a structured selection/loop.";
-
-  let description = [{
-    We use `spv.selection`/`spv.loop` for modelling structured selection/loop.
-    This op is a terminator used inside their regions to mean jumping to the
-    merge point, which is the next op following the `spv.selection` or
-    `spv.loop` op. This op does not have a corresponding instruction in the
-    SPIR-V binary format; it's solely for structural purpose.
-  }];
-
-  let arguments = (ins);
-
-  let results = (outs);
-
-  let parser = [{ return parseNoIOOp(parser, result); }];
-  let printer = [{ printNoIOOp(getOperation(), p); }];
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-}
-
-// -----
-
-def SPV_ReturnOp : SPV_Op<"Return", [InFunctionScope, Terminator]> {
-  let summary = "Return with no value from a function with void return type.";
-
-  let description = [{
-    This instruction must be the last instruction in a block.
-
-    ### Custom assembly form
-
-    ```
-    return-op ::= `spv.Return`
-    ```
-  }];
-
-  let arguments = (ins);
-
-  let results = (outs);
-
-  let parser = [{ return parseNoIOOp(parser, result); }];
-  let printer = [{ printNoIOOp(getOperation(), p); }];
-}
-
-// -----
-
-def SPV_UnreachableOp : SPV_Op<"Unreachable", [InFunctionScope, Terminator]> {
-  let summary = "Declares that this block is not reachable in the CFG.";
-
-  let description = [{
-    This instruction must be the last instruction in a block.
-
-    ### Custom assembly form
-
-    ```
-    unreachable-op ::= `spv.Unreachable`
-    ```
-  }];
-
-  let arguments = (ins);
-
-  let results = (outs);
-
-  let parser = [{ return parseNoIOOp(parser, result); }];
-  let printer = [{ printNoIOOp(getOperation(), p); }];
-}
-
-// -----
-
-def SPV_ReturnValueOp : SPV_Op<"ReturnValue", [InFunctionScope, Terminator]> {
-  let summary = "Return a value from a function.";
-
-  let description = [{
-    Value is the value returned, by copy, and must match the Return Type
-    operand of the OpTypeFunction type of the OpFunction body this return
-    instruction is in.
-
-    This instruction must be the last instruction in a block.
-
-    ### Custom assembly form
-
-    ```
-    return-value-op ::= `spv.ReturnValue` ssa-use `:` spirv-type
-    ```
-
-    For example:
-
-    ```
-    spv.ReturnValue %0 : f32
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_Type:$value
-  );
-
-  let results = (outs);
-}
-
-def SPV_SelectionOp : SPV_Op<"selection", [InFunctionScope]> {
-  let summary = "Define a structured selection.";
-
-  let description = [{
-    SPIR-V can explicitly declare structured control-flow constructs using merge
-    instructions. These explicitly declare a header block before the control
-    flow diverges and a merge block where control flow subsequently converges.
-    These blocks delimit constructs that must nest, and can only be entered
-    and exited in structured ways. See "2.11. Structured Control Flow" of the
-    SPIR-V spec for more details.
-
-    Instead of having a `spv.SelectionMerge` op to directly model selection
-    merge instruction for indicating the merge target, we use regions to delimit
-    the boundary of the selection: the merge target is the next op following the
-    `spv.selection` op. This way it's easier to discover all blocks belonging to
-    the selection and it plays nicer with the MLIR system.
-
-    The `spv.selection` region should contain at least two blocks: one selection
-    header block, and one selection merge. The selection header block should be
-    the first block. The selection merge block should be the last block.
-    The merge block should only contain a `spv._merge` op.
-  }];
-
-  let arguments = (ins
-    SPV_SelectionControlAttr:$selection_control
-  );
-
-  let results = (outs);
-
-  let regions = (region AnyRegion:$body);
-
-  let extraClassDeclaration = [{
-    // Returns the selection header block.
-    Block *getHeaderBlock();
-
-    // Returns the selection merge block.
-    Block *getMergeBlock();
-
-    // Adds a selection merge block containing one spv._merge op.
-    void addMergeBlock();
-  }];
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-
-  let hasCanonicalizer = 1;
-}
-
-#endif // SPIRV_CONTROLFLOW_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVDialect.h b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVDialect.h
deleted file mode 100644
index 2571e5d8928..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVDialect.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===- SPIRVDialect.h - MLIR SPIR-V dialect ---------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the SPIR-V dialect in MLIR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SPIRV_SPIRVDIALECT_H_
-#define MLIR_DIALECT_SPIRV_SPIRVDIALECT_H_
-
-#include "mlir/IR/Dialect.h"
-
-namespace mlir {
-namespace spirv {
-
-enum class Decoration : uint32_t;
-
-class SPIRVDialect : public Dialect {
-public:
-  explicit SPIRVDialect(MLIRContext *context);
-
-  static StringRef getDialectNamespace() { return "spv"; }
-
-  /// Checks if the given `type` is valid in SPIR-V dialect.
-  static bool isValidType(Type type);
-
-  /// Checks if the given `scalar type` is valid in SPIR-V dialect.
-  static bool isValidScalarType(Type type);
-
-  /// Returns the attribute name to use when specifying decorations on results
-  /// of operations.
-  static std::string getAttributeName(Decoration decoration);
-
-  /// Parses a type registered to this dialect.
-  Type parseType(DialectAsmParser &parser) const override;
-
-  /// Prints a type registered to this dialect.
-  void printType(Type type, DialectAsmPrinter &os) const override;
-
-  /// Provides a hook for materializing a constant to this dialect.
-  Operation *materializeConstant(OpBuilder &builder, Attribute value, Type type,
-                                 Location loc) override;
-};
-
-} // end namespace spirv
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_SPIRVDIALECT_H_
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
deleted file mode 100644
index a031facdf5a..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
+++ /dev/null
@@ -1,579 +0,0 @@
-//===- SPIRVGLSLOps.td - GLSL extended insts spec file -----*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the op definition spec of GLSL extension ops.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_GLSL_OPS
-#define SPIRV_GLSL_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-//===----------------------------------------------------------------------===//
-// SPIR-V GLSL 4.50 opcode specification.
-//===----------------------------------------------------------------------===//
-
-// Base class for all GLSL ops.
-class SPV_GLSLOp<string mnemonic, int opcode, list<OpTrait> traits = []> :
-  SPV_ExtInstOp<mnemonic, "GLSL", "GLSL.std.450", opcode, traits>;
-
-// Base class for GLSL unary ops.
-class SPV_GLSLUnaryOp<string mnemonic, int opcode, Type resultType,
-                      Type operandType, list<OpTrait> traits = []> :
-  SPV_GLSLOp<mnemonic, opcode, !listconcat([NoSideEffect], traits)> {
-
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<operandType>:$operand
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOf<resultType>:$result
-  );
-
-  let parser = [{ return parseUnaryOp(parser, result); }];
-
-  let printer = [{ return printUnaryOp(getOperation(), p); }];
-
-  let verifier = [{ return success(); }];
-}
-
-// Base class for GLSL Unary arithmetic ops where return type matches
-// the operand type.
-class SPV_GLSLUnaryArithmeticOp<string mnemonic, int opcode, Type type,
-                                list<OpTrait> traits = []> :
-  SPV_GLSLUnaryOp<mnemonic, opcode, type, type, traits>;
-
-// Base class for GLSL binary ops.
-class SPV_GLSLBinaryOp<string mnemonic, int opcode, Type resultType,
-                        Type operandType, list<OpTrait> traits = []> :
-  SPV_GLSLOp<mnemonic, opcode, !listconcat([NoSideEffect], traits)> {
-
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<operandType>:$lhs,
-    SPV_ScalarOrVectorOf<operandType>:$rhs
-  );
-
-  let results = (outs
-    SPV_ScalarOrVectorOf<resultType>:$result
-  );
-
-  let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }];
-
-  let printer = [{ return impl::printOneResultOp(getOperation(), p); }];
-
-  let verifier = [{ return success(); }];
-}
-
-// Base class for GLSL Binary arithmetic ops where operand types and
-// return type matches.
-class SPV_GLSLBinaryArithmeticOp<string mnemonic, int opcode, Type type,
-                                 list<OpTrait> traits = []> :
-  SPV_GLSLBinaryOp<mnemonic, opcode, type, type, traits>;
-
-// -----
-
-def SPV_GLSLFAbsOp : SPV_GLSLUnaryArithmeticOp<"FAbs", 4, SPV_Float> {
-  let summary = "Absolute value of operand";
-
-  let description = [{
-    Result is x if x >= 0; otherwise result is -x.
-
-    The operand x must be a scalar or vector whose component type is
-    floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    abs-op ::= ssa-id `=` `spv.GLSL.FAbs` ssa-use `:`
-               float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.FAbs %0 : f32
-    %3 = spv.GLSL.FAbs %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLSAbsOp : SPV_GLSLUnaryArithmeticOp<"SAbs", 5, SPV_Integer> {
-  let summary = "Absolute value of operand";
-
-  let description = [{
-    Result is x if x ≥ 0; otherwise result is -x, where x is interpreted as a
-    signed integer.
-
-    Result Type and the type of x must both be integer scalar or integer vector
-    types. Result Type and operand types must have the same number of components
-    with the same component width. Results are computed per component.
-
-    ### Custom assembly format
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                   `vector<` integer-literal `x` integer-type `>`
-    abs-op ::= ssa-id `=` `spv.GLSL.SAbs` ssa-use `:`
-               integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.SAbs %0 : i32
-    %3 = spv.GLSL.SAbs %1 : vector<3xi16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLCeilOp : SPV_GLSLUnaryArithmeticOp<"Ceil", 9, SPV_Float> {
-  let summary = "Rounds up to the next whole number";
-
-  let description = [{
-    Result is the value equal to the nearest whole number that is greater than
-    or equal to x.
-
-    The operand x must be a scalar or vector whose component type is
-    floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    ceil-op ::= ssa-id `=` `spv.GLSL.Ceil` ssa-use `:`
-                float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.Ceil %0 : f32
-    %3 = spv.GLSL.Ceil %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLCosOp : SPV_GLSLUnaryArithmeticOp<"Cos", 14, SPV_Float16or32> {
-  let summary = "Cosine of operand in radians";
-
-  let description = [{
-    The standard trigonometric cosine of x radians.
-
-    The operand x must be a scalar or vector whose component type is 16-bit or
-    32-bit floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    restricted-float-scalar-type ::=  `f16` | `f32`
-    restricted-float-scalar-vector-type ::=
-      restricted-float-scalar-type |
-      `vector<` integer-literal `x` restricted-float-scalar-type `>`
-    cos-op ::= ssa-id `=` `spv.GLSL.Cos` ssa-use `:`
-               restricted-float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.Cos %0 : f32
-    %3 = spv.GLSL.Cos %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLExpOp : SPV_GLSLUnaryArithmeticOp<"Exp", 27, SPV_Float16or32> {
-  let summary = "Exponentiation of Operand 1";
-
-  let description = [{
-    Result is the natural exponentiation of x; e^x.
-
-    The operand x must be a scalar or vector whose component type is
-    16-bit or 32-bit floating-point.
-
-    Result Type and the type of x must be the same type. Results are
-    computed per component.";
-
-    ### Custom assembly format
-    ```
-    restricted-float-scalar-type ::=  `f16` | `f32`
-    restricted-float-scalar-vector-type ::=
-      restricted-float-scalar-type |
-      `vector<` integer-literal `x` restricted-float-scalar-type `>`
-    exp-op ::= ssa-id `=` `spv.GLSL.Exp` ssa-use `:`
-               restricted-float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.Exp %0 : f32
-    %3 = spv.GLSL.Exp %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLFloorOp : SPV_GLSLUnaryArithmeticOp<"Floor", 8, SPV_Float> {
-  let summary = "Rounds down to the next whole number";
-
-  let description = [{
-    Result is the value equal to the nearest whole number that is less than or
-    equal to x.
-
-    The operand x must be a scalar or vector whose component type is
-    floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    floor-op ::= ssa-id `=` `spv.GLSL.Floor` ssa-use `:`
-                float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.Floor %0 : f32
-    %3 = spv.GLSL.Floor %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLInverseSqrtOp : SPV_GLSLUnaryArithmeticOp<"InverseSqrt", 32, SPV_Float> {
-  let summary = "Reciprocal of sqrt(operand)";
-
-  let description = [{
-    Result is the reciprocal of sqrt x. Result is undefined if x <= 0.
-
-    The operand x must be a scalar or vector whose component type is
-    floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    rsqrt-op ::= ssa-id `=` `spv.GLSL.InverseSqrt` ssa-use `:`
-                 float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.InverseSqrt %0 : f32
-    %3 = spv.GLSL.InverseSqrt %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLLogOp : SPV_GLSLUnaryArithmeticOp<"Log", 28, SPV_Float16or32> {
-  let summary = "Natural logarithm of the operand";
-
-  let description = [{
-    Result is the natural logarithm of x, i.e., the value y which satisfies the
-    equation x = ey. Result is undefined if x <= 0.
-
-    The operand x must be a scalar or vector whose component type is 16-bit or
-    32-bit floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    restricted-float-scalar-type ::=  `f16` | `f32`
-    restricted-float-scalar-vector-type ::=
-      restricted-float-scalar-type |
-      `vector<` integer-literal `x` restricted-float-scalar-type `>`
-    log-op ::= ssa-id `=` `spv.GLSL.Log` ssa-use `:`
-               restricted-float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.Log %0 : f32
-    %3 = spv.GLSL.Log %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLFMaxOp : SPV_GLSLBinaryArithmeticOp<"FMax", 40, SPV_Float> {
-  let summary = "Return maximum of two floating-point operands";
-
-  let description = [{
-    Result is y if x < y; otherwise result is x. Which operand is the
-    result is undefined if one of the operands is a NaN.
-
-    The operands must all be a scalar or vector whose component type
-    is floating-point.
-
-    Result Type and the type of all operands must be the same
-    type. Results are computed per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fmax-op ::= ssa-id `=` `spv.GLSL.FMax` ssa-use `:`
-                float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.FMax %0, %1 : f32
-    %3 = spv.GLSL.FMax %0, %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLSMaxOp : SPV_GLSLBinaryArithmeticOp<"SMax", 42, SPV_Integer> {
-  let summary = "Return maximum of two signed integer operands";
-
-  let description = [{
-    Result is y if x < y; otherwise result is x, where x and y are interpreted
-    as signed integers.
-
-    Result Type and the type of x and y must both be integer scalar or integer
-    vector types. Result Type and operand types must have the same number of
-    components with the same component width. Results are computed per
-    component.
-
-    ### Custom assembly format
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                   `vector<` integer-literal `x` integer-type `>`
-    smax-op ::= ssa-id `=` `spv.GLSL.SMax` ssa-use `:`
-                integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.SMax %0, %1 : i32
-    %3 = spv.GLSL.SMax %0, %1 : vector<3xi16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLFMinOp : SPV_GLSLBinaryArithmeticOp<"FMin", 37, SPV_Float> {
-  let summary = "Return minimum of two floating-point operands";
-
-  let description = [{
-    Result is y if y < x; otherwise result is x. Which operand is the result is
-    undefined if one of the operands is a NaN.
-
-    The operands must all be a scalar or vector whose component type is
-    floating-point.
-
-    Result Type and the type of all operands must be the same type. Results are
-    computed per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fmin-op ::= ssa-id `=` `spv.GLSL.FMin` ssa-use `:`
-                float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.FMin %0, %1 : f32
-    %3 = spv.GLSL.FMin %0, %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLSMinOp : SPV_GLSLBinaryArithmeticOp<"SMin", 39, SPV_Integer> {
-  let summary = "Return minimum of two signed integer operands";
-
-  let description = [{
-    Result is y if y < x; otherwise result is x, where x and y are interpreted
-    as signed integers.
-
-    Result Type and the type of x and y must both be integer scalar or integer
-    vector types. Result Type and operand types must have the same number of
-    components with the same component width. Results are computed per
-    component.
-
-    ### Custom assembly format
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                   `vector<` integer-literal `x` integer-type `>`
-    smin-op ::= ssa-id `=` `spv.GLSL.SMin` ssa-use `:`
-                integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.SMin %0, %1 : i32
-    %3 = spv.GLSL.SMin %0, %1 : vector<3xi16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLFSignOp : SPV_GLSLUnaryArithmeticOp<"FSign", 6, SPV_Float> {
-  let summary = "Returns the sign of the operand";
-
-  let description = [{
-    Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
-
-    The operand x must be a scalar or vector whose component type is
-    floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    sign-op ::= ssa-id `=` `spv.GLSL.FSign` ssa-use `:`
-                float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.FSign %0 : f32
-    %3 = spv.GLSL.FSign %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLSSignOp : SPV_GLSLUnaryArithmeticOp<"SSign", 7, SPV_Integer> {
-  let summary = "Returns the sign of the operand";
-
-  let description = [{
-    Result is 1 if x > 0, 0 if x = 0, or -1 if x < 0, where x is interpreted as
-    a signed integer.
-
-    Result Type and the type of x must both be integer scalar or integer vector
-    types. Result Type and operand types must have the same number of components
-    with the same component width. Results are computed per component.
-
-    ### Custom assembly format
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                   `vector<` integer-literal `x` integer-type `>`
-    sign-op ::= ssa-id `=` `spv.GLSL.SSign` ssa-use `:`
-                integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.SSign %0 : i32
-    %3 = spv.GLSL.SSign %1 : vector<3xi16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLSqrtOp : SPV_GLSLUnaryArithmeticOp<"Sqrt", 31, SPV_Float> {
-  let summary = "Returns the square root of the operand";
-
-  let description = [{
-    Result is the square root of x. Result is undefined if x < 0.
-
-    The operand x must be a scalar or vector whose component type is
-    floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    sqrt-op ::= ssa-id `=` `spv.GLSL.Sqrt` ssa-use `:`
-                float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.Sqrt %0 : f32
-    %3 = spv.GLSL.Sqrt %1 : vector<3xf16>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_GLSLTanhOp : SPV_GLSLUnaryArithmeticOp<"Tanh", 21, SPV_Float16or32> {
-  let summary = "Hyperbolic tangent of operand in radians";
-
-  let description = [{
-    Hyperbolic tangent of x radians.
-
-    The operand x must be a scalar or vector whose component type is 16-bit or
-    32-bit floating-point.
-
-    Result Type and the type of x must be the same type. Results are computed
-    per component.
-
-    ### Custom assembly format
-    ```
-    restricted-float-scalar-type ::=  `f16` | `f32`
-    restricted-float-scalar-vector-type ::=
-      restricted-float-scalar-type |
-      `vector<` integer-literal `x` restricted-float-scalar-type `>`
-    tanh-op ::= ssa-id `=` `spv.GLSL.Tanh` ssa-use `:`
-                restricted-float-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %2 = spv.GLSL.Tanh %0 : f32
-    %3 = spv.GLSL.Tanh %1 : vector<3xf16>
-    ```
-  }];
-}
-
-#endif // SPIRV_GLSL_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td
deleted file mode 100644
index c0388fe4e23..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td
+++ /dev/null
@@ -1,74 +0,0 @@
-//===-- SPIRVGroupOps.td - MLIR SPIR-V (Sub)Group Ops ------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains group and subgroup ops for the SPIR-V dialect. It
-// corresponds to "3.32.21. Group and Subgroup Instructions" of the SPIR-V
-// specification.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_GROUP_OPS
-#define SPIRV_GROUP_OPS
-
-// -----
-
-def SPV_SubgroupBallotKHROp : SPV_Op<"SubgroupBallotKHR", []> {
-  let summary = "See extension SPV_KHR_shader_ballot";
-
-  let description = [{
-    Computes a bitfield value combining the Predicate value from all invocations
-    in the current Subgroup that execute the same dynamic instance of this
-    instruction. The bit is set to one if the corresponding invocation is active
-    and the predicate is evaluated to true; otherwise, it is set to zero.
-
-    Predicate must be a Boolean type.
-
-    Result Type must be a 4 component vector of 32 bit integer types.
-
-    Result is a set of bitfields where the first invocation is represented in bit
-    0 of the first vector component and the last (up to SubgroupSize) is the
-    higher bit number of the last bitmask needed to represent all bits of the
-    subgroup invocations.
-
-    ### Custom assembly form
-
-    ```
-    subgroup-ballot-op ::= ssa-id `=` `spv.SubgroupBallotKHR`
-                                ssa-use `:` `vector` `<` 4 `x` `i32` `>`
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.SubgroupBallotKHR %predicate : vector<4xi32>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_Bool:$predicate
-  );
-
-  let results = (outs
-    SPV_I32Vec4:$result
-  );
-
-  let verifier = [{ return success(); }];
-}
-
-// -----
-
-#endif // SPIRV_GROUP_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td
deleted file mode 100644
index 0c4b2902a12..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td
+++ /dev/null
@@ -1,1000 +0,0 @@
-//===-- SPIRVLogicalOps.td - MLIR SPIR-V Logical Ops -------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains arithmetic ops for the SPIR-V dialect. It corresponds
-// to "3.32.15. Relational and Logical Instructions" of the SPIR-V spec.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_LOGICAL_OPS
-#define SPIRV_LOGICAL_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-class SPV_LogicalBinaryOp<string mnemonic, Type operandsType,
-                    list<OpTrait> traits = []> :
-      // Result type is SPV_Bool.
-      SPV_BinaryOp<mnemonic, SPV_Bool, operandsType,
-                   !listconcat(traits,
-                               [NoSideEffect, SameTypeOperands,
-                                SameOperandsAndResultShape])> {
-  let parser = [{ return ::parseLogicalBinaryOp(parser, result); }];
-  let printer = [{ return ::printLogicalOp(getOperation(), p); }];
-}
-
-class SPV_LogicalUnaryOp<string mnemonic, Type operandType,
-                         list<OpTrait> traits = []> :
-      // Result type is SPV_Bool.
-      SPV_UnaryOp<mnemonic, SPV_Bool, operandType,
-                  !listconcat(traits, [NoSideEffect, SameTypeOperands,
-                                       SameOperandsAndResultShape])> {
-  let parser = [{ return ::parseLogicalUnaryOp(parser, result); }];
-  let printer = [{ return ::printLogicalOp(getOperation(), p); }];
-}
-
-// -----
-
-def SPV_FOrdEqualOp : SPV_LogicalBinaryOp<"FOrdEqual", SPV_Float, [Commutative]> {
-  let summary = "Floating-point comparison for being ordered and equal.";
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fordequal-op ::= ssa-id `=` `spv.FOrdEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FOrdEqual %0, %1 : f32
-    %5 = spv.FOrdEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FOrdGreaterThanOp : SPV_LogicalBinaryOp<"FOrdGreaterThan", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are ordered and Operand 1 is
-    greater than  Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fordgt-op ::= ssa-id `=` `spv.FOrdGreaterThan` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FOrdGreaterThan %0, %1 : f32
-    %5 = spv.FOrdGreaterThan %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FOrdGreaterThanEqualOp : SPV_LogicalBinaryOp<"FOrdGreaterThanEqual", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are ordered and Operand 1 is
-    greater than or equal to Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fordgte-op ::= ssa-id `=` `spv.FOrdGreaterThanEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FOrdGreaterThanEqual %0, %1 : f32
-    %5 = spv.FOrdGreaterThanEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FOrdLessThanOp : SPV_LogicalBinaryOp<"FOrdLessThan", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are ordered and Operand 1 is less
-    than Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fordlt-op ::= ssa-id `=` `spv.FOrdLessThan` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FOrdLessThan %0, %1 : f32
-    %5 = spv.FOrdLessThan %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FOrdLessThanEqualOp : SPV_LogicalBinaryOp<"FOrdLessThanEqual", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are ordered and Operand 1 is less
-    than or equal to Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fordlte-op ::= ssa-id `=` `spv.FOrdLessThanEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FOrdLessThanEqual %0, %1 : f32
-    %5 = spv.FOrdLessThanEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FOrdNotEqualOp : SPV_LogicalBinaryOp<"FOrdNotEqual", SPV_Float, [Commutative]> {
-  let summary = "Floating-point comparison for being ordered and not equal.";
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    fordneq-op ::= ssa-id `=` `spv.FOrdNotEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FOrdNotEqual %0, %1 : f32
-    %5 = spv.FOrdNotEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FUnordEqualOp : SPV_LogicalBinaryOp<"FUnordEqual", SPV_Float, [Commutative]> {
-  let summary = "Floating-point comparison for being unordered or equal.";
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    funordequal-op ::= ssa-id `=` `spv.FUnordEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FUnordEqual %0, %1 : f32
-    %5 = spv.FUnordEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FUnordGreaterThanOp : SPV_LogicalBinaryOp<"FUnordGreaterThan", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are unordered or Operand 1 is
-    greater than  Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    funordgt-op ::= ssa-id `=` `spv.FUnordGreaterThan` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FUnordGreaterThan %0, %1 : f32
-    %5 = spv.FUnordGreaterThan %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FUnordGreaterThanEqualOp : SPV_LogicalBinaryOp<"FUnordGreaterThanEqual", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are unordered or Operand 1 is
-    greater than or equal to Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    funordgte-op ::= ssa-id `=` `spv.FUnordGreaterThanEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FUnordGreaterThanEqual %0, %1 : f32
-    %5 = spv.FUnordGreaterThanEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FUnordLessThanOp : SPV_LogicalBinaryOp<"FUnordLessThan", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are unordered or Operand 1 is less
-    than Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    funordlt-op ::= ssa-id `=` `spv.FUnordLessThan` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FUnordLessThan %0, %1 : f32
-    %5 = spv.FUnordLessThan %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FUnordLessThanEqualOp : SPV_LogicalBinaryOp<"FUnordLessThanEqual", SPV_Float, []> {
-  let summary = [{
-    Floating-point comparison if operands are unordered or Operand 1 is less
-    than or equal to Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    funordlte-op ::= ssa-id `=` `spv.FUnordLessThanEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FUnordLessThanEqual %0, %1 : f32
-    %5 = spv.FUnordLessThanEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_FUnordNotEqualOp : SPV_LogicalBinaryOp<"FUnordNotEqual", SPV_Float, [Commutative]> {
-  let summary = "Floating-point comparison for being unordered or not equal.";
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    floating-point type.  They must have the same type, and they must have
-    the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    float-scalar-vector-type ::= float-type |
-                                 `vector<` integer-literal `x` float-type `>`
-    funordneq-op ::= ssa-id `=` `spv.FUnordNotEqual` ssa-use, ssa-use
-    ```
-
-    For example:
-
-    ```
-    %4 = spv.FUnordNotEqual %0, %1 : f32
-    %5 = spv.FUnordNotEqual %2, %3 : vector<4xf32>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_IEqualOp : SPV_LogicalBinaryOp<"IEqual", SPV_Integer, [Commutative]> {
-  let summary = "Integer comparison for equality.";
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    iequal-op ::= ssa-id `=` `spv.IEqual` ssa-use, ssa-use
-                             `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.IEqual %0, %1 : i32
-    %5 = spv.IEqual %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_INotEqualOp : SPV_LogicalBinaryOp<"INotEqual", SPV_Integer, [Commutative]> {
-  let summary = "Integer comparison for inequality.";
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    inot-equal-op ::= ssa-id `=` `spv.INotEqual` ssa-use, ssa-use
-                                 `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.INotEqual %0, %1 : i32
-    %5 = spv.INotEqual %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_LogicalAndOp : SPV_LogicalBinaryOp<"LogicalAnd", SPV_Bool, [Commutative]> {
-  let summary = [{
-    Result is true if both Operand 1 and Operand 2 are true. Result is false
-    if either Operand 1 or Operand 2 are false.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 must be the same as Result Type.
-
-     The type of Operand 2 must be the same as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    logical-and ::= `spv.LogicalAnd` ssa-use `,` ssa-use
-                    `:` operand-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.LogicalAnd %0, %1 : i1
-    %2 = spv.LogicalAnd %0, %1 : vector<4xi1>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_LogicalEqualOp : SPV_LogicalBinaryOp<"LogicalEqual", SPV_Bool, [Commutative]> {
-  let summary = [{
-    Result is true if Operand 1 and Operand 2 have the same value. Result is
-    false if Operand 1 and Operand 2 have different values.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 must be the same as Result Type.
-
-     The type of Operand 2 must be the same as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    logical-equal ::= `spv.LogicalEqual` ssa-use `,` ssa-use
-                      `:` operand-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.LogicalEqual %0, %1 : i1
-    %2 = spv.LogicalEqual %0, %1 : vector<4xi1>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_LogicalNotOp : SPV_LogicalUnaryOp<"LogicalNot", SPV_Bool, []> {
-  let summary = [{
-    Result is true if Operand is false.  Result is false if Operand is true.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand must be the same as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    logical-not ::= `spv.LogicalNot` ssa-use `:` operand-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.LogicalNot %0 : i1
-    %2 = spv.LogicalNot %0 : vector<4xi1>
-    ```
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-// -----
-
-def SPV_LogicalNotEqualOp : SPV_LogicalBinaryOp<"LogicalNotEqual", SPV_Bool, [Commutative]> {
-  let summary = [{
-    Result is true if Operand 1 and Operand 2 have different values. Result
-    is false if Operand 1 and Operand 2 have the same value.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 must be the same as Result Type.
-
-     The type of Operand 2 must be the same as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    logical-not-equal ::= `spv.LogicalNotEqual` ssa-use `,` ssa-use
-                          `:` operand-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.LogicalNotEqual %0, %1 : i1
-    %2 = spv.LogicalNotEqual %0, %1 : vector<4xi1>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_LogicalOrOp : SPV_LogicalBinaryOp<"LogicalOr", SPV_Bool, [Commutative]> {
-  let summary = [{
-    Result is true if either Operand 1 or Operand 2 is true. Result is false
-    if both Operand 1 and Operand 2 are false.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 must be the same as Result Type.
-
-     The type of Operand 2 must be the same as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-
-    ```
-    logical-or ::= `spv.LogicalOr` ssa-use `,` ssa-use
-                    `:` operand-type
-    ```
-
-    For example:
-
-    ```
-    %2 = spv.LogicalOr %0, %1 : i1
-    %2 = spv.LogicalOr %0, %1 : vector<4xi1>
-    ```
-  }];
-}
-
-// -----
-
-def SPV_SGreaterThanOp : SPV_LogicalBinaryOp<"SGreaterThan", SPV_Integer, []> {
-  let summary = [{
-    Signed-integer comparison if Operand 1 is greater than  Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    sgreater-than-op ::= ssa-id `=` `spv.SGreaterThan` ssa-use, ssa-use
-                                    `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.SGreaterThan %0, %1 : i32
-    %5 = spv.SGreaterThan %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_SGreaterThanEqualOp : SPV_LogicalBinaryOp<"SGreaterThanEqual", SPV_Integer, []> {
-  let summary = [{
-    Signed-integer comparison if Operand 1 is greater than or equal to
-    Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    sgreater-than-equal-op ::= ssa-id `=` `spv.SGreaterThanEqual` ssa-use, ssa-use
-                                          `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.SGreaterThanEqual %0, %1 : i32
-    %5 = spv.SGreaterThanEqual %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_SLessThanOp : SPV_LogicalBinaryOp<"SLessThan", SPV_Integer, []> {
-  let summary = [{
-    Signed-integer comparison if Operand 1 is less than Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    sless-than-op ::= ssa-id `=` `spv.SLessThan` ssa-use, ssa-use
-                                 `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.SLessThan %0, %1 : i32
-    %5 = spv.SLessThan %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_SLessThanEqualOp : SPV_LogicalBinaryOp<"SLessThanEqual", SPV_Integer, []> {
-  let summary = [{
-    Signed-integer comparison if Operand 1 is less than or equal to Operand
-    2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    sless-than-equal-op ::= ssa-id `=` `spv.SLessThanEqual` ssa-use, ssa-use
-                                       `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.SLessThanEqual %0, %1 : i32
-    %5 = spv.SLessThanEqual %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_SelectOp : SPV_Op<"Select", [NoSideEffect]> {
-  let summary = [{
-    Select between two objects. Before version 1.4, results are only
-    computed per component.
-  }];
-
-  let description = [{
-    Before version 1.4, Result Type must be a pointer, scalar, or vector.
-
-     The types of Object 1 and Object 2 must be the same as Result Type.
-
-    Condition must be a scalar or vector of Boolean type.
-
-    If Condition is a scalar and true, the result is Object 1. If Condition
-    is a scalar and false, the result is Object 2.
-
-    If Condition is a vector, Result Type must be a vector with the same
-    number of components as Condition and the result is a mix of Object 1
-    and Object 2: When a component of Condition is true, the corresponding
-    component in the result is taken from Object 1, otherwise it is taken
-    from Object 2.
-
-    ### Custom assembly form
-
-    ```
-    scalar-type ::= integer-type | float-type | boolean-type
-    select-object-type ::= scalar-type
-                           | `vector<` integer-literal `x` scalar-type `>`
-                           | pointer-type
-    select-condition-type ::= boolean-type
-                              | `vector<` integer-literal `x` boolean-type `>`
-    select-op ::= ssa-id `=` `spv.Select` ssa-use, ssa-use, ssa-use
-                  `:` select-condition-type `,` select-object-type
-    ```
-
-    For example:
-
-    ```
-    %3 = spv.Select %0, %1, %2 : i1, f32
-    %3 = spv.Select %0, %1, %2 : i1, vector<3xi32>
-    %3 = spv.Select %0, %1, %2 : vector<3xi1>, vector<3xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_ScalarOrVectorOf<SPV_Bool>:$condition,
-    SPV_SelectType:$true_value,
-    SPV_SelectType:$false_value
-  );
-
-  let results = (outs
-    SPV_SelectType:$result
-  );
-
-  let builders = [OpBuilder<[{Builder *builder, OperationState &state,
-                              Value *cond, Value *trueValue,
-                              Value *falseValue}]>];
-}
-
-// -----
-
-def SPV_UGreaterThanOp : SPV_LogicalBinaryOp<"UGreaterThan", SPV_Integer, []> {
-  let summary = [{
-    Unsigned-integer comparison if Operand 1 is greater than  Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    ugreater-than-op ::= ssa-id `=` `spv.UGreaterThan` ssa-use, ssa-use
-                                    `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.UGreaterThan %0, %1 : i32
-    %5 = spv.UGreaterThan %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_UGreaterThanEqualOp : SPV_LogicalBinaryOp<"UGreaterThanEqual", SPV_Integer, []> {
-  let summary = [{
-    Unsigned-integer comparison if Operand 1 is greater than or equal to
-    Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    ugreater-than-equal-op ::= ssa-id `=` `spv.UGreaterThanEqual` ssa-use, ssa-use
-                                          `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.UGreaterThanEqual %0, %1 : i32
-    %5 = spv.UGreaterThanEqual %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ULessThanOp : SPV_LogicalBinaryOp<"ULessThan", SPV_Integer, []> {
-  let summary = [{
-    Unsigned-integer comparison if Operand 1 is less than Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    uless-than-op ::= ssa-id `=` `spv.ULessThan` ssa-use, ssa-use
-                                 `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.ULessThan %0, %1 : i32
-    %5 = spv.ULessThan %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-// -----
-
-def SPV_ULessThanEqualOp :
-  SPV_LogicalBinaryOp<"ULessThanEqual", SPV_Integer, []> {
-  let summary = [{
-    Unsigned-integer comparison if Operand 1 is less than or equal to
-    Operand 2.
-  }];
-
-  let description = [{
-    Result Type must be a scalar or vector of Boolean type.
-
-     The type of Operand 1 and Operand 2  must be a scalar or vector of
-    integer type.  They must have the same component width, and they must
-    have the same number of components as Result Type.
-
-     Results are computed per component.
-
-    ### Custom assembly form
-    ```
-    integer-scalar-vector-type ::= integer-type |
-                                 `vector<` integer-literal `x` integer-type `>`
-    uless-than-equal-op ::= ssa-id `=` `spv.ULessThanEqual` ssa-use, ssa-use
-                                       `:` integer-scalar-vector-type
-    ```
-    For example:
-
-    ```
-    %4 = spv.ULessThanEqual %0, %1 : i32
-    %5 = spv.ULessThanEqual %2, %3 : vector<4xi32>
-
-    ```
-  }];
-}
-
-#endif // SPIRV_LOGICAL_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h
deleted file mode 100644
index 1619a5edd89..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- SPIRVLowering.h - SPIR-V lowering utilities  -------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines utilities to use while targeting SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SPIRV_SPIRVLOWERING_H
-#define MLIR_DIALECT_SPIRV_SPIRVLOWERING_H
-
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/Support/StringExtras.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "llvm/ADT/SetVector.h"
-
-namespace mlir {
-
-/// Type conversion from standard types to SPIR-V types for shader interface.
-///
-/// For composite types, this converter additionally performs type wrapping to
-/// satisfy shader interface requirements: shader interface types must be
-/// pointers to structs.
-class SPIRVTypeConverter final : public TypeConverter {
-public:
-  using TypeConverter::TypeConverter;
-
-  /// Converts the given standard `type` to SPIR-V correspondence.
-  Type convertType(Type type) override;
-
-  /// Gets the SPIR-V correspondence for the standard index type.
-  static Type getIndexType(MLIRContext *context);
-};
-
-/// Base class to define a conversion pattern to lower `SourceOp` into SPIR-V.
-template <typename SourceOp>
-class SPIRVOpLowering : public OpConversionPattern<SourceOp> {
-public:
-  SPIRVOpLowering(MLIRContext *context, SPIRVTypeConverter &typeConverter,
-                  PatternBenefit benefit = 1)
-      : OpConversionPattern<SourceOp>(context, benefit),
-        typeConverter(typeConverter) {}
-
-protected:
-  SPIRVTypeConverter &typeConverter;
-};
-
-#include "mlir/Dialect/SPIRV/SPIRVLowering.h.inc"
-
-namespace spirv {
-/// Returns a value that represents a builtin variable value within the SPIR-V
-/// module.
-Value *getBuiltinVariableValue(Operation *op, spirv::BuiltIn builtin,
-                               OpBuilder &builder);
-
-/// Attribute name for specifying argument ABI information.
-StringRef getInterfaceVarABIAttrName();
-
-/// Get the InterfaceVarABIAttr given its fields.
-InterfaceVarABIAttr getInterfaceVarABIAttr(unsigned descriptorSet,
-                                           unsigned binding,
-                                           spirv::StorageClass storageClass,
-                                           MLIRContext *context);
-
-/// Attribute name for specifying entry point information.
-StringRef getEntryPointABIAttrName();
-
-/// Get the EntryPointABIAttr given its fields.
-EntryPointABIAttr getEntryPointABIAttr(ArrayRef<int32_t> localSize,
-                                       MLIRContext *context);
-
-/// Legalizes a function as an entry function.
-FuncOp lowerAsEntryFunction(FuncOp funcOp, SPIRVTypeConverter &typeConverter,
-                            ConversionPatternRewriter &rewriter,
-                            spirv::EntryPointABIAttr entryPointInfo,
-                            ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo);
-
-/// Sets the InterfaceVarABIAttr and EntryPointABIAttr for a function and its
-/// arguments
-LogicalResult setABIAttrs(FuncOp funcOp,
-                          spirv::EntryPointABIAttr entryPointInfo,
-                          ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo);
-
-} // namespace spirv
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_SPIRVLOWERING_H
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.td
deleted file mode 100644
index d9cf0a752b8..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.td
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- SPIRVBase.td - MLIR SPIR-V Op Definitions Base file -*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the base file for supporting lowering to SPIR-V dialect. This
-// file defines SPIR-V attributes used for specifying the shader
-// interface or ABI. This is because SPIR-V module is expected to work in
-// an execution environment as specified by a client API. A SPIR-V module
-// needs to "link" correctly with the execution environment regarding the
-// resources that are used in the SPIR-V module and get populated with
-// data via the client API. The shader interface (or ABI) is passed into
-// SPIR-V lowering path via attributes defined in this file. A
-// compilation flow targeting SPIR-V is expected to attach such
-// attributes to resources and other suitable places.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_LOWERING
-#define SPIRV_LOWERING
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-// For arguments that eventually map to spv.globalVariable for the
-// shader interface, this attribute specifies the information regarding
-// the global variable :
-// 1) Descriptor Set.
-// 2) Binding number.
-// 3) Storage class.
-def SPV_InterfaceVarABIAttr:
-    StructAttr<"InterfaceVarABIAttr", SPV_Dialect,
-               [StructFieldAttr<"descriptor_set", I32Attr>,
-                StructFieldAttr<"binding", I32Attr>,
-                StructFieldAttr<"storage_class", SPV_StorageClassAttr>]>;
-
-// For entry functions, this attribute specifies information related to entry
-// points in the generated SPIR-V module:
-// 1) WorkGroup Size.
-def SPV_EntryPointABIAttr:
-    StructAttr<"EntryPointABIAttr", SPV_Dialect,
-               [StructFieldAttr<"local_size", I32ElementsAttr>]>;
-
-#endif // SPIRV_LOWERING
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td
deleted file mode 100644
index 1b3174c9e9f..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- SPIRVNonUniformOps.td - MLIR SPIR-V NonUniform Ops -*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains non-uniform ops for the SPIR-V dialect. It corresponds to
-// "3.32.24. Non-Uniform Instructions" of the SPIR-V specification.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_NON_UNIFORM_OPS
-#define SPIRV_NON_UNIFORM_OPS
-
-// -----
-
-def SPV_GroupNonUniformBallotOp : SPV_Op<"GroupNonUniformBallot", []> {
-  let summary = [{
-    Returns a bitfield value combining the Predicate value from all
-    invocations in the group that execute the same dynamic instance of this
-    instruction. The bit is set to one if the corresponding invocation is
-    active and the Predicate for that invocation evaluated to true;
-    otherwise, it is set to zero.
-  }];
-
-  let description = [{
-    Result Type  must be a vector of four components of integer type scalar,
-    whose Signedness operand is 0.
-
-    Result is a set of bitfields where the first invocation is represented
-    in the lowest bit of the first vector component and the last (up to the
-    size of the group) is the higher bit number of the last bitmask needed
-    to represent all bits of the group invocations.
-
-    Execution must be Workgroup or Subgroup Scope.
-
-    Predicate must be a Boolean type.
-
-    ### Custom assembly form
-
-    ```
-    scope ::= `"Workgroup"` | `"Subgroup"`
-    non-uniform-ballot-op ::= ssa-id `=` `spv.GroupNonUniformBallot` scope
-                              ssa-use `:` `vector` `<` 4 `x` `integer-type` `>`
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.GroupNonUniformBallot "SubGroup" %predicate : vector<4xi32>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_ScopeAttr:$execution_scope,
-    SPV_Bool:$predicate
-  );
-
-  let results = (outs
-    SPV_IntVec4:$result
-  );
-}
-
-// -----
-
-#endif // SPIRV_NON_UNIFORM_OPS
-
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h
deleted file mode 100644
index 353004b6c76..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- SPIRVOps.h - MLIR SPIR-V operations ----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the operations in the SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SPIRV_SPIRVOPS_H_
-#define MLIR_DIALECT_SPIRV_SPIRVOPS_H_
-
-#include "mlir/Dialect/SPIRV/SPIRVTypes.h"
-#include "mlir/IR/Function.h"
-
-namespace mlir {
-class OpBuilder;
-
-namespace spirv {
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/SPIRV/SPIRVOps.h.inc"
-
-/// Following methods are auto-generated.
-///
-/// Get the name used in the Op to refer to an enum value of the given
-/// `EnumClass`.
-/// template <typename EnumClass> StringRef attributeName();
-///
-/// Get the function that can be used to symbolize an enum value.
-/// template <typename EnumClass>
-/// llvm::Optional<EnumClass> (*)(StringRef) symbolizeEnum();
-#include "mlir/Dialect/SPIRV/SPIRVOpUtils.inc"
-
-} // end namespace spirv
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_SPIRVOPS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td
deleted file mode 100644
index 91ea8d7d676..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td
+++ /dev/null
@@ -1,477 +0,0 @@
-//===-- SPIRVOps.td - MLIR SPIR-V Op Definitions Spec ------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the main operation definition specification file for SPIR-V
-// operations.
-//
-//===----------------------------------------------------------------------===//
-
-// Note that for each op in this file and the included files for specific op
-// categories, we use a tool to automatically generate certain sections in its
-// definition: basic structure, summary, description. So modifications to these
-// sections will not be respected. Modifications to op traits, arguments,
-// results, and sections after the results are retained. Besides, ops must be
-// separated via the '// -----' marker.
-
-#ifndef SPIRV_OPS
-#define SPIRV_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-include "mlir/Dialect/SPIRV/SPIRVArithmeticOps.td"
-include "mlir/Dialect/SPIRV/SPIRVAtomicOps.td"
-include "mlir/Dialect/SPIRV/SPIRVBitOps.td"
-include "mlir/Dialect/SPIRV/SPIRVCastOps.td"
-include "mlir/Dialect/SPIRV/SPIRVCompositeOps.td"
-include "mlir/Dialect/SPIRV/SPIRVControlFlowOps.td"
-include "mlir/Dialect/SPIRV/SPIRVGLSLOps.td"
-include "mlir/Dialect/SPIRV/SPIRVGroupOps.td"
-include "mlir/Dialect/SPIRV/SPIRVLogicalOps.td"
-include "mlir/Dialect/SPIRV/SPIRVNonUniformOps.td"
-include "mlir/Dialect/SPIRV/SPIRVStructureOps.td"
-
-// -----
-
-def SPV_AccessChainOp : SPV_Op<"AccessChain", [NoSideEffect]> {
-  let summary = [{
-    Create a pointer into a composite object that can be used with OpLoad
-    and OpStore.
-  }];
-
-  let description = [{
-    Result Type must be an OpTypePointer. Its Type operand must be the type
-    reached by walking the Base’s type hierarchy down to the last provided
-    index in Indexes, and its Storage Class operand must be the same as the
-    Storage Class of Base.
-
-    Base must be a pointer, pointing to the base of a composite object.
-
-    Indexes walk the type hierarchy to the desired depth, potentially down
-    to scalar granularity. The first index in Indexes will select the top-
-    level member/element/component/element of the base composite. All
-    composite constituents use zero-based numbering, as described by their
-    OpType… instruction. The second index will apply similarly to that
-    result, and so on. Once any non-composite type is reached, there must be
-    no remaining (unused) indexes.
-
-     Each index in Indexes
-
-    - must be a scalar integer type,
-
-    - is treated as a signed count, and
-
-    - must be an OpConstant when indexing into a structure.
-
-    ### Custom assembly form
-    ```
-    access-chain-op ::= ssa-id `=` `spv.AccessChain` ssa-use
-                        `[` ssa-use (',' ssa-use)* `]`
-                        `:` pointer-type
-    ```
-
-    For example:
-
-    ```
-    %0 = "spv.constant"() { value = 1: i32} : () -> i32
-    %1 = spv.Variable : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>
-    %2 = spv.AccessChain %1[%0] : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>
-    %3 = spv.Load "Function" %2 ["Volatile"] : !spv.array<4xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_AnyPtr:$base_ptr,
-    Variadic<SPV_Integer>:$indices
-  );
-
-  let results = (outs
-    SPV_AnyPtr:$component_ptr
-  );
-
-  let builders = [OpBuilder<[{Builder *builder, OperationState &state,
-                              Value *basePtr, ValueRange indices}]>];
-
-  let hasCanonicalizer = 1;
-}
-
-// -----
-
-def SPV_ControlBarrierOp : SPV_Op<"ControlBarrier", []> {
-  let summary = [{
-    Wait for other invocations of this module to reach the current point of
-    execution.
-  }];
-
-  let description = [{
-    All invocations of this module within Execution scope must reach this
-    point of execution before any invocation will proceed beyond it.
-
-    When Execution is Workgroup or larger, behavior is undefined if this
-    instruction is used in control flow that is non-uniform within
-    Execution. When Execution is Subgroup or Invocation, the behavior of
-    this instruction in non-uniform control flow is defined by the client
-    API.
-
-    If Semantics is not None, this instruction also serves as an
-    OpMemoryBarrier instruction, and must also perform and adhere to the
-    description and semantics of an OpMemoryBarrier instruction with the
-    same Memory and Semantics operands.  This allows atomically specifying
-    both a control barrier and a memory barrier (that is, without needing
-    two instructions). If Semantics is None, Memory is ignored.
-
-    Before version 1.3, it is only valid to use this instruction with
-    TessellationControl, GLCompute, or Kernel execution models. There is no
-    such restriction starting with version 1.3.
-
-    When used with the TessellationControl execution model, it also
-    implicitly synchronizes the Output Storage Class:  Writes to Output
-    variables performed by any invocation executed prior to a
-    OpControlBarrier will be visible to any other invocation after return
-    from that OpControlBarrier.
-
-    ### Custom assembly form
-
-    ```
-    scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ...
-
-    memory-semantics ::= `"None"` | `"Acquire"` | "Release"` | ...
-
-    control-barrier-op ::= `spv.ControlBarrier` scope, scope, memory-semantics
-    ```
-
-    For example:
-
-    ```
-    spv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory"
-
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_ScopeAttr:$execution_scope,
-    SPV_ScopeAttr:$memory_scope,
-    SPV_MemorySemanticsAttr:$memory_semantics
-  );
-
-  let results = (outs);
-
-  let verifier = [{ return verifyMemorySemantics(*this); }];
-
-  let autogenSerialization = 0;
-}
-
-// -----
-
-def SPV_ExecutionModeOp : SPV_Op<"ExecutionMode", [InModuleScope]> {
-  let summary = "Declare an execution mode for an entry point.";
-
-  let description = [{
-    Entry Point must be the Entry Point <id> operand of an OpEntryPoint
-    instruction.
-
-    Mode is the execution mode. See Execution Mode.
-
-    This instruction is only valid when the Mode operand is an execution
-    mode that takes no Extra Operands, or takes Extra Operands that are not
-    <id> operands.
-
-    ### Custom assembly form
-
-    ```
-    execution-mode ::= "Invocations" | "SpacingEqual" |
-                       <and other SPIR-V execution modes...>
-
-    execution-mode-op ::= `spv.ExecutionMode ` ssa-use execution-mode
-                          (integer-literal (`, ` integer-literal)* )?
-    ```
-
-    For example:
-
-    ```
-    spv.ExecutionMode @foo "ContractionOff"
-    spv.ExecutionMode @bar "LocalSizeHint", 3, 4, 5
-    ```
-  }];
-
-  let arguments = (ins
-    FlatSymbolRefAttr:$fn,
-    SPV_ExecutionModeAttr:$execution_mode,
-    I32ArrayAttr:$values
-  );
-
-  let results = (outs);
-
-  let verifier = [{ return success(); }];
-
-  let autogenSerialization = 0;
-
-  let builders = [OpBuilder<[{Builder *builder, OperationState &state,
-                              FuncOp function,
-                              spirv::ExecutionMode executionMode,
-                              ArrayRef<int32_t> params}]>];
-}
-
-// -----
-
-def SPV_LoadOp : SPV_Op<"Load", []> {
-  let summary = "Load through a pointer.";
-
-  let description = [{
-    Result Type is the type of the loaded object. It must be a type with
-    fixed size; i.e., it cannot be, nor include, any OpTypeRuntimeArray
-    types.
-
-    Pointer is the pointer to load through.  Its type must be an
-    OpTypePointer whose Type operand is the same as Result Type.
-
-    If present, any Memory Operands must begin with a memory operand
-    literal. If not present, it is the same as specifying the memory operand
-    None.
-
-    ### Custom assembly form
-
-    ```
-    memory-access ::= `"None"` | `"Volatile"` | `"Aligned", ` integer-literal
-                    | `"NonTemporal"`
-
-    load-op ::= ssa-id ` = spv.Load ` storage-class ssa-use
-                (`[` memory-access `]`)? ` : ` spirv-element-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.Variable : !spv.ptr<f32, Function>
-    %1 = spv.Load "Function" %0 : f32
-    %2 = spv.Load "Function" %0 ["Volatile"] : f32
-    %3 = spv.Load "Function" %0 ["Aligned", 4] : f32
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_AnyPtr:$ptr,
-    OptionalAttr<SPV_MemoryAccessAttr>:$memory_access,
-    OptionalAttr<I32Attr>:$alignment
-  );
-
-  let results = (outs
-    SPV_Type:$value
-  );
-
-  let builders = [OpBuilder<[{Builder *builder, OperationState &state,
-                  Value *basePtr, /*optional*/IntegerAttr memory_access,
-                  /*optional*/IntegerAttr alignment}]>];
-}
-
-// -----
-
-def SPV_MemoryBarrierOp : SPV_Op<"MemoryBarrier", []> {
-  let summary = "Control the order that memory accesses are observed.";
-
-  let description = [{
-    Ensures that memory accesses issued before this instruction will be
-    observed before memory accesses issued after this instruction. This
-    control is ensured only for memory accesses issued by this invocation
-    and observed by another invocation executing within Memory scope. If the
-    Vulkan memory model is declared, this ordering only applies to memory
-    accesses that use the NonPrivatePointer memory operand or
-    NonPrivateTexel image operand.
-
-    Semantics declares what kind of memory is being controlled and what kind
-    of control to apply.
-
-    To execute both a memory barrier and a control barrier, see
-    OpControlBarrier.
-
-    ### Custom assembly form
-
-    ```
-    scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ...
-
-    memory-semantics ::= `"None"` | `"Acquire"` | `"Release"` | ...
-
-    memory-barrier-op ::= `spv.MemoryBarrier` scope, memory-semantics
-    ```
-
-    For example:
-
-    ```
-    spv.MemoryBarrier "Device", "Acquire|UniformMemory"
-
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_ScopeAttr:$memory_scope,
-    SPV_MemorySemanticsAttr:$memory_semantics
-  );
-
-  let results = (outs);
-
-  let verifier = [{ return verifyMemorySemantics(*this); }];
-
-  let autogenSerialization = 0;
-}
-
-// -----
-
-def SPV_StoreOp : SPV_Op<"Store", []> {
-  let summary = "Store through a pointer.";
-
-  let description = [{
-    Pointer is the pointer to store through.  Its type must be an
-    OpTypePointer whose Type operand is the same as the type of Object.
-
-    Object is the object to store.
-
-    If present, any Memory Operands must begin with a memory operand
-    literal. If not present, it is the same as specifying the memory operand
-    None.
-
-    ### Custom assembly form
-
-    ```
-    store-op ::= `spv.Store ` storage-class ssa-use `, ` ssa-use `, `
-                  (`[` memory-access `]`)? `:` spirv-element-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.Variable : !spv.ptr<f32, Function>
-    %1 = spv.FMul ... : f32
-    spv.Store "Function" %0, %1 : f32
-    spv.Store "Function" %0, %1 ["Volatile"] : f32
-    spv.Store "Function" %0, %1 ["Aligned", 4] : f32
-  }];
-
-  let arguments = (ins
-    SPV_AnyPtr:$ptr,
-    SPV_Type:$value,
-    OptionalAttr<SPV_MemoryAccessAttr>:$memory_access,
-    OptionalAttr<I32Attr>:$alignment
-  );
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &state, "
-      "Value *ptr, Value *value, ArrayRef<NamedAttribute> namedAttrs", [{
-      state.addOperands(ptr);
-      state.addOperands(value);
-      state.addAttributes(namedAttrs);
-    }]>
-  ];
-
-  let results = (outs);
-}
-
-// -----
-
-def SPV_UndefOp : SPV_Op<"undef", []> {
-  let summary = "Make an intermediate object whose value is undefined.";
-
-  let description = [{
-    Result Type is the type of object to make.
-
-    Each consumption of Result <id> yields an arbitrary, possibly different
-    bit pattern or abstract value resulting in possibly different concrete,
-    abstract, or opaque values.
-
-    ### Custom assembly form
-
-    ```
-    undef-op ::= `spv.undef` `:` spirv-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.undef : f32
-    %1 = spv.undef : !spv.struct<!spv.array<4 x vector<4xi32>>>
-    ```
-  }];
-
-  let arguments = (ins);
-
-  let results = (outs
-    SPV_Type:$result
-  );
-
-  let verifier = [{ return success(); }];
-
-  let hasOpcode = 0;
-  let autogenSerialization = 0;
-}
-
-// -----
-
-def SPV_VariableOp : SPV_Op<"Variable", []> {
-  let summary = [{
-    Allocate an object in memory, resulting in a pointer to it, which can be
-    used with OpLoad and OpStore.
-  }];
-
-  let description = [{
-    Result Type must be an OpTypePointer. Its Type operand is the type of
-    object in memory.
-
-    Storage Class is the Storage Class of the memory holding the object. It
-    cannot be Generic. It must be the same as the Storage Class operand of
-    the Result Type.
-
-    Initializer is optional.  If Initializer is present, it will be the
-    initial value of the variable’s memory content. Initializer must be an
-    <id> from a constant instruction or a global (module scope) OpVariable
-    instruction. Initializer must have the same type as the type pointed to
-    by Result Type.
-
-    ### Custom assembly form
-
-    ```
-    variable-op ::= ssa-id `=` `spv.Variable` (`init(` ssa-use `)`)?
-                    (`bind(` integer-literal, integer-literal `)`)?
-                    (`built_in(` string-literal `)`)?
-                    attribute-dict? `:` spirv-pointer-type
-    ```
-
-    where `init` specifies initializer and `bind` specifies the
-    descriptor set and binding number. `built_in` specifies SPIR-V
-    BuiltIn decoration associated with the op.
-
-    For example:
-
-    ```
-    %0 = spv.constant ...
-
-    %1 = spv.Variable : !spv.ptr<f32, Function>
-    %2 = spv.Variable init(%0): !spv.ptr<f32, Private>
-    %3 = spv.Variable init(%0) bind(1, 2): !spv.ptr<f32, Uniform>
-    %3 = spv.Variable built_in("GlobalInvocationID") : !spv.ptr<vector<3xi32>, Uniform>
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_StorageClassAttr:$storage_class,
-    SPV_Optional<AnyType>:$initializer
-  );
-
-  let results = (outs
-    SPV_AnyPtr:$pointer
-  );
-}
-
-// -----
-
-#endif // SPIRV_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td
deleted file mode 100644
index d1dacf3d63d..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td
+++ /dev/null
@@ -1,470 +0,0 @@
-//===-- SPIRVStructureOps.td - MLIR SPIR-V Structure Ops ---*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains ops for defining the SPIR-V structure: module, function,
-// and module-level operations. The representational form of these ops deviate
-// from the SPIR-V binary format in order to utilize MLIR mechanisms.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPIRV_STRUCTURE_OPS
-#define SPIRV_STRUCTURE_OPS
-
-include "mlir/Dialect/SPIRV/SPIRVBase.td"
-
-def SPV_AddressOfOp : SPV_Op<"_address_of", [InFunctionScope, NoSideEffect]> {
-  let summary = "Get the address of a global variable.";
-
-  let description = [{
-    Variables in module scope are defined using symbol names. This op generates
-    an SSA value that can be used to refer to the symbol within function scope
-    for use in ops that expect an SSA value. This operation has no corresponding
-    SPIR-V instruction; it's merely used for modelling purpose in the SPIR-V
-    dialect. Since variables in module scope in SPIR-V dialect are of pointer
-    type, this op returns a pointer type as well, and the type is the same as
-    the variable referenced.
-
-    ### Custom assembly form
-
-    ```
-    spv-address-of-op ::= ssa-id `=` `spv._address_of` symbol-ref-id
-                                     `:` spirv-pointer-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv._address_of @global_var : !spv.ptr<f32, Input>
-    ```
-  }];
-
-  let arguments = (ins
-    FlatSymbolRefAttr:$variable
-  );
-
-  let results = (outs
-    SPV_AnyPtr:$pointer
-  );
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-
-  let builders = [OpBuilder<[{Builder *builder, OperationState &state,
-                              spirv::GlobalVariableOp var}]>];
-}
-
-def SPV_ConstantOp : SPV_Op<"constant", [NoSideEffect]> {
-  let summary = "The op that declares a SPIR-V normal constant";
-
-  let description = [{
-    This op declares a SPIR-V normal constant. SPIR-V has multiple constant
-    instructions covering different constant types:
-
-    * `OpConstantTrue` and `OpConstantFalse` for boolean constants
-    * `OpConstant` for scalar constants
-    * `OpConstantComposite` for composite constants
-    * `OpConstantNull` for null constants
-    * ...
-
-    Having such a plethora of constant instructions renders IR transformations
-    more tedious. Therefore, we use a single `spv.constant` op to represent
-    them all. Note that conversion between those SPIR-V constant instructions
-    and this op is purely mechanical; so it can be scoped to the binary
-    (de)serialization process.
-
-    ### Custom assembly form
-
-    ```
-    spv-constant-op ::= ssa-id `=` `spv.constant` attribute-value
-                        (`:` spirv-type)?
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.constant true
-    %1 = spv.constant dense<[2, 3]> : vector<2xf32>
-    %2 = spv.constant [dense<3.0> : vector<2xf32>] : !spv.array<1xvector<2xf32>>
-    ```
-
-    TODO(antiagainst): support constant structs
-  }];
-
-  let arguments = (ins
-    AnyAttr:$value
-  );
-
-  let results = (outs
-    SPV_Type:$constant
-  );
-
-  let hasFolder = 1;
-
-  let extraClassDeclaration = [{
-    // Returns true if a constant can be built for the given `type`.
-    static bool isBuildableWith(Type type);
-
-    // Creates a constant zero/one of the given `type` at the current insertion
-    // point of `builder` and returns it.
-    static spirv::ConstantOp getZero(Type type, Location loc,
-                                     OpBuilder *builder);
-    static spirv::ConstantOp getOne(Type type, Location loc,
-                                    OpBuilder *builder);
-  }];
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-}
-
-def SPV_EntryPointOp : SPV_Op<"EntryPoint", [InModuleScope]> {
-  let summary = [{
-    Declare an entry point, its execution model, and its interface.
-  }];
-
-  let description = [{
-    Execution Model is the execution model for the entry point and its
-    static call tree. See Execution Model.
-
-    Entry Point must be the Result <id> of an OpFunction instruction.
-
-    Name is a name string for the entry point. A module cannot have two
-    OpEntryPoint instructions with the same Execution Model and the same
-    Name string.
-
-    Interface is a list of symbol references to `spv.globalVariable`
-    operations. These declare the set of global variables from a
-    module that form the interface of this entry point. The set of
-    Interface symbols must be equal to or a superset of the
-    `spv.globalVariable`s referenced by the entry point’s static call
-    tree, within the interface’s storage classes.  Before version 1.4,
-    the interface’s storage classes are limited to the Input and
-    Output storage classes. Starting with version 1.4, the interface’s
-    storage classes are all storage classes used in declaring all
-    global variables referenced by the entry point’s call tree.
-
-    ### Custom assembly form
-
-    ```
-    execution-model ::= "Vertex" | "TesellationControl" |
-                        <and other SPIR-V execution models...>
-
-    entry-point-op ::= ssa-id `=` `spv.EntryPoint` execution-model
-                       symbol-reference (`, ` symbol-reference)*
-    ```
-
-    For example:
-
-    ```
-    spv.EntryPoint "GLCompute" @foo
-    spv.EntryPoint "Kernel" @foo, @var1, @var2
-
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_ExecutionModelAttr:$execution_model,
-    FlatSymbolRefAttr:$fn,
-    SymbolRefArrayAttr:$interface
-  );
-
-  let results = (outs);
-
-  let autogenSerialization = 0;
-
-  let builders = [OpBuilder<[{Builder *builder, OperationState &state,
-                              spirv::ExecutionModel executionModel,
-                              FuncOp function,
-                              ArrayRef<Attribute> interfaceVars}]>];
-}
-
-
-def SPV_GlobalVariableOp : SPV_Op<"globalVariable", [InModuleScope, Symbol]> {
-  let summary = [{
-    Allocate an object in memory at module scope. The object is
-    referenced using a symbol name.
-  }];
-
-  let description = [{
-    The variable type must be an OpTypePointer. Its type operand is the type of
-    object in memory.
-
-    Storage Class is the Storage Class of the memory holding the object. It
-    cannot be Generic. It must be the same as the Storage Class operand of
-    the variable types. Only those storage classes that are valid at module
-    scope (like Input, Output, StorageBuffer, etc.) are valid.
-
-    Initializer is optional.  If Initializer is present, it will be
-    the initial value of the variable’s memory content. Initializer
-    must be an symbol defined from a constant instruction or other
-    `spv.globalVariable` operation in module scope. Initializer must
-    have the same type as the type of the defined symbol.
-
-    ### Custom assembly form
-
-    ```
-    variable-op ::= `spv.globalVariable` spirv-type symbol-ref-id
-                    (`initializer(` symbol-ref-id `)`)?
-                    (`bind(` integer-literal, integer-literal `)`)?
-                    (`built_in(` string-literal `)`)?
-                    attribute-dict?
-    ```
-
-    where `initializer` specifies initializer and `bind` specifies the
-    descriptor set and binding number. `built_in` specifies SPIR-V
-    BuiltIn decoration associated with the op.
-
-    For example:
-
-    ```
-    spv.globalVariable @var0 : !spv.ptr<f32, Input> @var0
-    spv.globalVariable @var1 initializer(@var0) : !spv.ptr<f32, Output>
-    spv.globalVariable @var2 bind(1, 2) : !spv.ptr<f32, Uniform>
-    spv.globalVariable @var3 built_in("GlobalInvocationId") : !spv.ptr<vector<3xi32>, Input>
-    ```
-  }];
-
-  let arguments = (ins
-    TypeAttr:$type,
-    StrAttr:$sym_name,
-    OptionalAttr<FlatSymbolRefAttr>:$initializer
-  );
-
-  let builders = [
-    OpBuilder<"Builder *builder, OperationState &state, "
-      "TypeAttr type, ArrayRef<NamedAttribute> namedAttrs", [{
-      state.addAttribute("type", type);
-      state.addAttributes(namedAttrs);
-    }]>,
-    OpBuilder<[{Builder *builder, OperationState &state,
-                Type type, StringRef name, unsigned descriptorSet,
-                unsigned binding}]>,
-    OpBuilder<[{Builder *builder, OperationState &state,
-                Type type, StringRef name, spirv::BuiltIn builtin}]>
-  ];
-
-  let results = (outs);
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-
-  let extraClassDeclaration = [{
-    ::mlir::spirv::StorageClass storageClass() {
-      return this->type().cast<::mlir::spirv::PointerType>().getStorageClass();
-    }
-  }];
-}
-
-def SPV_ModuleOp : SPV_Op<"module",
-                          [IsolatedFromAbove,
-                           SingleBlockImplicitTerminator<"ModuleEndOp">,
-                           NativeOpTrait<"SymbolTable">]> {
-  let summary = "The top-level op that defines a SPIR-V module";
-
-  let description = [{
-    This op defines a SPIR-V module using a MLIR region. The region contains
-    one block. Module-level operations, including functions definitions,
-    are all placed in this block.
-
-    Using an op with a region to define a SPIR-V module enables "embedding"
-    SPIR-V modules in other dialects in a clean manner: this op guarantees
-    the validity and serializability of a SPIR-V module and thus serves as
-    a clear-cut boundary.
-
-    This op takes no operands and generates no results. This op should not
-    implicitly capture values from the enclosing environment.
-
-    This op has only one region, which only contains one block. The block
-    must be terminated via the `spv._module_end` op.
-
-    ### Custom assembly form
-
-    ```
-    addressing-model ::= `"Logical"` | `"Physical32"` | `"Physical64"`
-    memory-model ::= `"Simple"` | `"GLSL450"` | `"OpenCL"` | `"VulkanKHR"`
-    spv-module-op ::= `spv.module` addressing-model memory-model
-                      region
-                      (`attributes` attribute-dict)?
-    ```
-
-    For example:
-
-    ```
-    spv.module "Logical" "VulkanKHR" { }
-
-    spv.module "Logical" "VulkanKHR" {
-      func @do_nothing() -> () {
-        spv.Return
-      }
-    } attributes {
-      capability = ["Shader"],
-      extension = ["SPV_KHR_16bit_storage"]
-    }
-    ```
-  }];
-
-  let arguments = (ins
-    SPV_AddressingModelAttr:$addressing_model,
-    SPV_MemoryModelAttr:$memory_model,
-    OptionalAttr<StrArrayAttr>:$capabilities,
-    OptionalAttr<StrArrayAttr>:$extensions,
-    OptionalAttr<StrArrayAttr>:$extended_instruction_sets
-  );
-
-  let results = (outs);
-
-  let regions = (region SizedRegion<1>:$body);
-
-  let builders =
-    [OpBuilder<"Builder *, OperationState &state">,
-     OpBuilder<[{Builder *, OperationState &state,
-                 IntegerAttr addressing_model,
-                 IntegerAttr memory_model}]>,
-     OpBuilder<[{Builder *, OperationState &state,
-                 spirv::AddressingModel addressing_model,
-                 spirv::MemoryModel memory_model,
-                 /*optional*/ ArrayRef<spirv::Capability> capabilities = {},
-                 /*optional*/ ArrayRef<spirv::Extension> extensions = {},
-                 /*optional*/ ArrayAttr extended_instruction_sets = nullptr}]>];
-
-  // We need to ensure the block inside the region is properly terminated;
-  // the auto-generated builders do not guarantee that.
-  let skipDefaultBuilders = 1;
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-
-  let extraClassDeclaration = [{
-    Block& getBlock() {
-      return this->getOperation()->getRegion(0).front();
-    }
-  }];
-}
-
-def SPV_ModuleEndOp : SPV_Op<"_module_end", [InModuleScope, Terminator]> {
-  let summary = "The pseudo op that ends a SPIR-V module";
-
-  let description = [{
-    This op terminates the only block inside a `spv.module`'s only region.
-    This op does not have a corresponding SPIR-V instruction and thus will
-    not be serialized into the binary format; it is used solely to satisfy
-    the structual requirement that an block must be ended with a terminator.
-  }];
-
-  let arguments = (ins);
-
-  let results = (outs);
-
-  let parser = [{ return parseNoIOOp(parser, result); }];
-  let printer = [{ printNoIOOp(getOperation(), p); }];
-
-  let verifier = [{ return success(); }];
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-}
-
-def SPV_ReferenceOfOp : SPV_Op<"_reference_of", [NoSideEffect]> {
-  let summary = "Reference a specialization constant.";
-
-  let description = [{
-    Specialization constant in module scope are defined using symbol names.
-    This op generates an SSA value that can be used to refer to the symbol
-    within function scope for use in ops that expect an SSA value.
-    This operation has no corresponding SPIR-V instruction; it's merely used
-    for modelling purpose in the SPIR-V dialect. This op's return type is
-    the same as the specialization constant.
-
-    ### Custom assembly form
-
-    ```
-    spv-reference-of-op ::= ssa-id `=` `spv._reference_of` symbol-ref-id
-                                       `:` spirv-scalar-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv._reference_of @spec_const : f32
-    ```
-  }];
-
-  let arguments = (ins
-    FlatSymbolRefAttr:$spec_const
-  );
-
-  let results = (outs
-    SPV_Type:$reference
-  );
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-}
-
-def SPV_SpecConstantOp : SPV_Op<"specConstant", [InModuleScope, Symbol]> {
-  let summary = "The op that declares a SPIR-V specialization constant";
-
-  let description = [{
-    This op declares a SPIR-V scalar specialization constant. SPIR-V has
-    multiple constant instructions covering different scalar types:
-
-    * `OpSpecConstantTrue` and `OpSpecConstantFalse` for boolean constants
-    * `OpSpecConstant` for scalar constants
-
-    Similar as `spv.constant`, this op represents all of the above cases.
-    `OpSpecConstantComposite` and `OpSpecConstantOp` are modelled with
-    separate ops.
-
-    ### Custom assembly form
-
-    ```
-    spv-spec-constant-op ::= `spv.specConstant` symbol-ref-id
-                             `spec_id(` integer `)`
-                             `=` attribute-value (`:` spirv-type)?
-    ```
-
-    where `spec_id` specifies the SPIR-V SpecId decoration associated with
-    the op.
-
-    For example:
-
-    ```
-    spv.specConstant @spec_const1 = true
-    spv.specConstant @spec_const2 spec_id(5) = 42 : i32
-    ```
-
-    TODO(antiagainst): support composite spec constants with another op
-  }];
-
-  let arguments = (ins
-    StrAttr:$sym_name,
-    AnyAttr:$default_value
-  );
-
-  let results = (outs);
-
-  let hasOpcode = 0;
-
-  let autogenSerialization = 0;
-}
-
-#endif // SPIRV_STRUCTURE_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h
deleted file mode 100644
index bc3083e8d7c..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h
+++ /dev/null
@@ -1,206 +0,0 @@
-//===- SPIRVTypes.h - MLIR SPIR-V Types -------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the types in the SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SPIRV_SPIRVTYPES_H_
-#define MLIR_DIALECT_SPIRV_SPIRVTYPES_H_
-
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeSupport.h"
-#include "mlir/IR/Types.h"
-
-// Pull in all enum type definitions and utility function declarations
-#include "mlir/Dialect/SPIRV/SPIRVEnums.h.inc"
-
-#include <tuple>
-
-namespace mlir {
-namespace spirv {
-
-namespace detail {
-struct ArrayTypeStorage;
-struct ImageTypeStorage;
-struct PointerTypeStorage;
-struct RuntimeArrayTypeStorage;
-struct StructTypeStorage;
-} // namespace detail
-
-namespace TypeKind {
-enum Kind {
-  Array = Type::FIRST_SPIRV_TYPE,
-  Image,
-  Pointer,
-  RuntimeArray,
-  Struct,
-  LAST_SPIRV_TYPE = Struct,
-};
-}
-
-// SPIR-V composite type: VectorType, SPIR-V ArrayType, or SPIR-V StructType.
-class CompositeType : public Type {
-public:
-  using Type::Type;
-
-  static bool classof(Type type);
-
-  unsigned getNumElements() const;
-
-  Type getElementType(unsigned) const;
-};
-
-// SPIR-V array type
-class ArrayType : public Type::TypeBase<ArrayType, CompositeType,
-                                        detail::ArrayTypeStorage> {
-public:
-  using Base::Base;
-  // Zero layout specifies that is no layout
-  using LayoutInfo = uint64_t;
-
-  static bool kindof(unsigned kind) { return kind == TypeKind::Array; }
-
-  static ArrayType get(Type elementType, unsigned elementCount);
-
-  static ArrayType get(Type elementType, unsigned elementCount,
-                       LayoutInfo layoutInfo);
-
-  unsigned getNumElements() const;
-
-  Type getElementType() const;
-
-  bool hasLayout() const;
-
-  uint64_t getArrayStride() const;
-};
-
-// SPIR-V image type
-class ImageType
-    : public Type::TypeBase<ImageType, Type, detail::ImageTypeStorage> {
-public:
-  using Base::Base;
-
-  static bool kindof(unsigned kind) { return kind == TypeKind::Image; }
-
-  static ImageType
-  get(Type elementType, Dim dim,
-      ImageDepthInfo depth = ImageDepthInfo::DepthUnknown,
-      ImageArrayedInfo arrayed = ImageArrayedInfo::NonArrayed,
-      ImageSamplingInfo samplingInfo = ImageSamplingInfo::SingleSampled,
-      ImageSamplerUseInfo samplerUse = ImageSamplerUseInfo::SamplerUnknown,
-      ImageFormat format = ImageFormat::Unknown) {
-    return ImageType::get(
-        std::tuple<Type, Dim, ImageDepthInfo, ImageArrayedInfo,
-                   ImageSamplingInfo, ImageSamplerUseInfo, ImageFormat>(
-            elementType, dim, depth, arrayed, samplingInfo, samplerUse,
-            format));
-  }
-
-  static ImageType
-      get(std::tuple<Type, Dim, ImageDepthInfo, ImageArrayedInfo,
-                     ImageSamplingInfo, ImageSamplerUseInfo, ImageFormat>);
-
-  Type getElementType() const;
-  Dim getDim() const;
-  ImageDepthInfo getDepthInfo() const;
-  ImageArrayedInfo getArrayedInfo() const;
-  ImageSamplingInfo getSamplingInfo() const;
-  ImageSamplerUseInfo getSamplerUseInfo() const;
-  ImageFormat getImageFormat() const;
-  // TODO(ravishankarm): Add support for Access qualifier
-};
-
-// SPIR-V pointer type
-class PointerType
-    : public Type::TypeBase<PointerType, Type, detail::PointerTypeStorage> {
-public:
-  using Base::Base;
-
-  static bool kindof(unsigned kind) { return kind == TypeKind::Pointer; }
-
-  static PointerType get(Type pointeeType, StorageClass storageClass);
-
-  Type getPointeeType() const;
-
-  StorageClass getStorageClass() const;
-};
-
-// SPIR-V run-time array type
-class RuntimeArrayType
-    : public Type::TypeBase<RuntimeArrayType, Type,
-                            detail::RuntimeArrayTypeStorage> {
-public:
-  using Base::Base;
-
-  static bool kindof(unsigned kind) { return kind == TypeKind::RuntimeArray; }
-
-  static RuntimeArrayType get(Type elementType);
-
-  Type getElementType() const;
-};
-
-// SPIR-V struct type
-class StructType : public Type::TypeBase<StructType, CompositeType,
-                                         detail::StructTypeStorage> {
-public:
-  using Base::Base;
-
-  // Layout information used for members in a struct in SPIR-V
-  //
-  // TODO(ravishankarm) : For now this only supports the offset type, so uses
-  // uint64_t value to represent the offset, with
-  // std::numeric_limit<uint64_t>::max indicating no offset. Change this to
-  // something that can hold all the information needed for different member
-  // types
-  using LayoutInfo = uint64_t;
-
-  using MemberDecorationInfo = std::pair<uint32_t, spirv::Decoration>;
-
-  static bool kindof(unsigned kind) { return kind == TypeKind::Struct; }
-
-  /// Construct a StructType with at least one member.
-  static StructType get(ArrayRef<Type> memberTypes,
-                        ArrayRef<LayoutInfo> layoutInfo = {},
-                        ArrayRef<MemberDecorationInfo> memberDecorations = {});
-
-  /// Construct a struct with no members.
-  static StructType getEmpty(MLIRContext *context);
-
-  unsigned getNumElements() const;
-
-  Type getElementType(unsigned) const;
-
-  bool hasLayout() const;
-
-  uint64_t getOffset(unsigned) const;
-
-  // Returns in `allMemberDecorations` the spirv::Decorations (apart from
-  // Offset) associated with all members of the StructType.
-  void getMemberDecorations(SmallVectorImpl<StructType::MemberDecorationInfo>
-                                &allMemberDecorations) const;
-
-  // Returns in `memberDecorations` all the spirv::Decorations (apart from
-  // Offset) associated with the `i`-th member of the StructType.
-  void getMemberDecorations(
-      unsigned i, SmallVectorImpl<spirv::Decoration> &memberDecorations) const;
-};
-
-} // end namespace spirv
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_SPIRVTYPES_H_
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/Serialization.h b/third_party/mlir/include/mlir/Dialect/SPIRV/Serialization.h
deleted file mode 100644
index bad7355791f..00000000000
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/Serialization.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- Serialization.h - MLIR SPIR-V (De)serialization ----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the entry points for serialize and deserialize SPIR-V
-// binary modules.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SPIRV_SERIALIZATION_H_
-#define MLIR_DIALECT_SPIRV_SERIALIZATION_H_
-
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-struct LogicalResult;
-class MLIRContext;
-
-namespace spirv {
-class ModuleOp;
-
-/// Serializes the given SPIR-V `module` and writes to `binary`. On failure,
-/// reports errors to the error handler registered with the MLIR context for
-/// `module`.
-LogicalResult serialize(ModuleOp module, SmallVectorImpl<uint32_t> &binary);
-
-/// Deserializes the given SPIR-V `binary` module and creates a MLIR ModuleOp
-/// in the given `context`. Returns the ModuleOp on success; otherwise, reports
-/// errors to the error handler registered with `context` and returns
-/// llvm::None.
-Optional<ModuleOp> deserialize(ArrayRef<uint32_t> binary, MLIRContext *context);
-
-} // end namespace spirv
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_SPIRV_SERIALIZATION_H_
diff --git a/third_party/mlir/include/mlir/Dialect/StandardOps/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/StandardOps/CMakeLists.txt
deleted file mode 100644
index b6534797a06..00000000000
--- a/third_party/mlir/include/mlir/Dialect/StandardOps/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS Ops.td)
-mlir_tablegen(Ops.h.inc -gen-op-decls)
-mlir_tablegen(Ops.cpp.inc -gen-op-defs)
-mlir_tablegen(OpsEnums.h.inc -gen-enum-decls)
-mlir_tablegen(OpsEnums.cpp.inc -gen-enum-defs)
-add_public_tablegen_target(MLIRStandardOpsIncGen)
diff --git a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.h b/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.h
deleted file mode 100644
index fcf16c05c33..00000000000
--- a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.h
+++ /dev/null
@@ -1,352 +0,0 @@
-//===- Ops.h - Standard MLIR Operations -------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines convenience types for working with standard operations
-// in the MLIR operation set.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_STANDARDOPS_OPS_H
-#define MLIR_DIALECT_STANDARDOPS_OPS_H
-
-#include "mlir/Analysis/CallInterfaces.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/StandardTypes.h"
-
-// Pull in all enum type definitions and utility function declarations.
-#include "mlir/Dialect/StandardOps/OpsEnums.h.inc"
-
-namespace mlir {
-class AffineMap;
-class Builder;
-class FuncOp;
-class OpBuilder;
-
-class StandardOpsDialect : public Dialect {
-public:
-  StandardOpsDialect(MLIRContext *context);
-  static StringRef getDialectNamespace() { return "std"; }
-
-  /// Materialize a single constant operation from a given attribute value with
-  /// the desired resultant type.
-  Operation *materializeConstant(OpBuilder &builder, Attribute value, Type type,
-                                 Location loc) override;
-};
-
-/// The predicate indicates the type of the comparison to perform:
-/// (un)orderedness, (in)equality and less/greater than (or equal to) as
-/// well as predicates that are always true or false.
-enum class CmpFPredicate {
-  FirstValidValue,
-  // Always false
-  AlwaysFalse = FirstValidValue,
-  // Ordered comparisons
-  OEQ,
-  OGT,
-  OGE,
-  OLT,
-  OLE,
-  ONE,
-  // Both ordered
-  ORD,
-  // Unordered comparisons
-  UEQ,
-  UGT,
-  UGE,
-  ULT,
-  ULE,
-  UNE,
-  // Any unordered
-  UNO,
-  // Always true
-  AlwaysTrue,
-  // Number of predicates.
-  NumPredicates
-};
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/StandardOps/Ops.h.inc"
-
-/// This is a refinement of the "constant" op for the case where it is
-/// returning a float value of FloatType.
-///
-///   %1 = "std.constant"(){value: 42.0} : bf16
-///
-class ConstantFloatOp : public ConstantOp {
-public:
-  using ConstantOp::ConstantOp;
-
-  /// Builds a constant float op producing a float of the specified type.
-  static void build(Builder *builder, OperationState &result,
-                    const APFloat &value, FloatType type);
-
-  APFloat getValue() { return getAttrOfType<FloatAttr>("value").getValue(); }
-
-  static bool classof(Operation *op);
-};
-
-/// This is a refinement of the "constant" op for the case where it is
-/// returning an integer value of IntegerType.
-///
-///   %1 = "std.constant"(){value: 42} : i32
-///
-class ConstantIntOp : public ConstantOp {
-public:
-  using ConstantOp::ConstantOp;
-  /// Build a constant int op producing an integer of the specified width.
-  static void build(Builder *builder, OperationState &result, int64_t value,
-                    unsigned width);
-
-  /// Build a constant int op producing an integer with the specified type,
-  /// which must be an integer type.
-  static void build(Builder *builder, OperationState &result, int64_t value,
-                    Type type);
-
-  int64_t getValue() { return getAttrOfType<IntegerAttr>("value").getInt(); }
-
-  static bool classof(Operation *op);
-};
-
-/// This is a refinement of the "constant" op for the case where it is
-/// returning an integer value of Index type.
-///
-///   %1 = "std.constant"(){value: 99} : () -> index
-///
-class ConstantIndexOp : public ConstantOp {
-public:
-  using ConstantOp::ConstantOp;
-
-  /// Build a constant int op producing an index.
-  static void build(Builder *builder, OperationState &result, int64_t value);
-
-  int64_t getValue() { return getAttrOfType<IntegerAttr>("value").getInt(); }
-
-  static bool classof(Operation *op);
-};
-
-// DmaStartOp starts a non-blocking DMA operation that transfers data from a
-// source memref to a destination memref. The source and destination memref need
-// not be of the same dimensionality, but need to have the same elemental type.
-// The operands include the source and destination memref's each followed by its
-// indices, size of the data transfer in terms of the number of elements (of the
-// elemental type of the memref), a tag memref with its indices, and optionally
-// at the end, a stride and a number_of_elements_per_stride arguments. The tag
-// location is used by a DmaWaitOp to check for completion. The indices of the
-// source memref, destination memref, and the tag memref have the same
-// restrictions as any load/store. The optional stride arguments should be of
-// 'index' type, and specify a stride for the slower memory space (memory space
-// with a lower memory space id), transferring chunks of
-// number_of_elements_per_stride every stride until %num_elements are
-// transferred. Either both or no stride arguments should be specified.
-//
-// For example, a DmaStartOp operation that transfers 256 elements of a memref
-// '%src' in memory space 0 at indices [%i, %j] to memref '%dst' in memory space
-// 1 at indices [%k, %l], would be specified as follows:
-//
-//   %num_elements = constant 256
-//   %idx = constant 0 : index
-//   %tag = alloc() : memref<1 x i32, (d0) -> (d0), 4>
-//   dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx] :
-//     memref<40 x 128 x f32>, (d0) -> (d0), 0>,
-//     memref<2 x 1024 x f32>, (d0) -> (d0), 1>,
-//     memref<1 x i32>, (d0) -> (d0), 2>
-//
-//   If %stride and %num_elt_per_stride are specified, the DMA is expected to
-//   transfer %num_elt_per_stride elements every %stride elements apart from
-//   memory space 0 until %num_elements are transferred.
-//
-//   dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx], %stride,
-//             %num_elt_per_stride :
-//
-// TODO(mlir-team): add additional operands to allow source and destination
-// striding, and multiple stride levels.
-// TODO(andydavis) Consider replacing src/dst memref indices with view memrefs.
-class DmaStartOp
-    : public Op<DmaStartOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
-public:
-  using Op::Op;
-
-  static void build(Builder *builder, OperationState &result, Value *srcMemRef,
-                    ValueRange srcIndices, Value *destMemRef,
-                    ValueRange destIndices, Value *numElements,
-                    Value *tagMemRef, ValueRange tagIndices,
-                    Value *stride = nullptr,
-                    Value *elementsPerStride = nullptr);
-
-  // Returns the source MemRefType for this DMA operation.
-  Value *getSrcMemRef() { return getOperand(0); }
-  // Returns the rank (number of indices) of the source MemRefType.
-  unsigned getSrcMemRefRank() {
-    return getSrcMemRef()->getType().cast<MemRefType>().getRank();
-  }
-  // Returns the source memref indices for this DMA operation.
-  operand_range getSrcIndices() {
-    return {getOperation()->operand_begin() + 1,
-            getOperation()->operand_begin() + 1 + getSrcMemRefRank()};
-  }
-
-  // Returns the destination MemRefType for this DMA operations.
-  Value *getDstMemRef() { return getOperand(1 + getSrcMemRefRank()); }
-  // Returns the rank (number of indices) of the destination MemRefType.
-  unsigned getDstMemRefRank() {
-    return getDstMemRef()->getType().cast<MemRefType>().getRank();
-  }
-  unsigned getSrcMemorySpace() {
-    return getSrcMemRef()->getType().cast<MemRefType>().getMemorySpace();
-  }
-  unsigned getDstMemorySpace() {
-    return getDstMemRef()->getType().cast<MemRefType>().getMemorySpace();
-  }
-
-  // Returns the destination memref indices for this DMA operation.
-  operand_range getDstIndices() {
-    return {getOperation()->operand_begin() + 1 + getSrcMemRefRank() + 1,
-            getOperation()->operand_begin() + 1 + getSrcMemRefRank() + 1 +
-                getDstMemRefRank()};
-  }
-
-  // Returns the number of elements being transferred by this DMA operation.
-  Value *getNumElements() {
-    return getOperand(1 + getSrcMemRefRank() + 1 + getDstMemRefRank());
-  }
-
-  // Returns the Tag MemRef for this DMA operation.
-  Value *getTagMemRef() {
-    return getOperand(1 + getSrcMemRefRank() + 1 + getDstMemRefRank() + 1);
-  }
-  // Returns the rank (number of indices) of the tag MemRefType.
-  unsigned getTagMemRefRank() {
-    return getTagMemRef()->getType().cast<MemRefType>().getRank();
-  }
-
-  // Returns the tag memref index for this DMA operation.
-  operand_range getTagIndices() {
-    unsigned tagIndexStartPos =
-        1 + getSrcMemRefRank() + 1 + getDstMemRefRank() + 1 + 1;
-    return {getOperation()->operand_begin() + tagIndexStartPos,
-            getOperation()->operand_begin() + tagIndexStartPos +
-                getTagMemRefRank()};
-  }
-
-  /// Returns true if this is a DMA from a faster memory space to a slower one.
-  bool isDestMemorySpaceFaster() {
-    return (getSrcMemorySpace() < getDstMemorySpace());
-  }
-
-  /// Returns true if this is a DMA from a slower memory space to a faster one.
-  bool isSrcMemorySpaceFaster() {
-    // Assumes that a lower number is for a slower memory space.
-    return (getDstMemorySpace() < getSrcMemorySpace());
-  }
-
-  /// Given a DMA start operation, returns the operand position of either the
-  /// source or destination memref depending on the one that is at the higher
-  /// level of the memory hierarchy. Asserts failure if neither is true.
-  unsigned getFasterMemPos() {
-    assert(isSrcMemorySpaceFaster() || isDestMemorySpaceFaster());
-    return isSrcMemorySpaceFaster() ? 0 : getSrcMemRefRank() + 1;
-  }
-
-  static StringRef getOperationName() { return "std.dma_start"; }
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-
-  LogicalResult fold(ArrayRef<Attribute> cstOperands,
-                     SmallVectorImpl<OpFoldResult> &results);
-
-  bool isStrided() {
-    return getNumOperands() != 1 + getSrcMemRefRank() + 1 + getDstMemRefRank() +
-                                   1 + 1 + getTagMemRefRank();
-  }
-
-  Value *getStride() {
-    if (!isStrided())
-      return nullptr;
-    return getOperand(getNumOperands() - 1 - 1);
-  }
-
-  Value *getNumElementsPerStride() {
-    if (!isStrided())
-      return nullptr;
-    return getOperand(getNumOperands() - 1);
-  }
-};
-
-// DmaWaitOp blocks until the completion of a DMA operation associated with the
-// tag element '%tag[%index]'. %tag is a memref, and %index has to be an index
-// with the same restrictions as any load/store index. %num_elements is the
-// number of elements associated with the DMA operation. For example:
-//
-//   dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%index] :
-//     memref<2048 x f32>, (d0) -> (d0), 0>,
-//     memref<256 x f32>, (d0) -> (d0), 1>
-//     memref<1 x i32>, (d0) -> (d0), 2>
-//   ...
-//   ...
-//   dma_wait %tag[%index], %num_elements : memref<1 x i32, (d0) -> (d0), 2>
-//
-class DmaWaitOp
-    : public Op<DmaWaitOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
-public:
-  using Op::Op;
-
-  static void build(Builder *builder, OperationState &result, Value *tagMemRef,
-                    ValueRange tagIndices, Value *numElements);
-
-  static StringRef getOperationName() { return "std.dma_wait"; }
-
-  // Returns the Tag MemRef associated with the DMA operation being waited on.
-  Value *getTagMemRef() { return getOperand(0); }
-
-  // Returns the tag memref index for this DMA operation.
-  operand_range getTagIndices() {
-    return {getOperation()->operand_begin() + 1,
-            getOperation()->operand_begin() + 1 + getTagMemRefRank()};
-  }
-
-  // Returns the rank (number of indices) of the tag memref.
-  unsigned getTagMemRefRank() {
-    return getTagMemRef()->getType().cast<MemRefType>().getRank();
-  }
-
-  // Returns the number of elements transferred in the associated DMA operation.
-  Value *getNumElements() { return getOperand(1 + getTagMemRefRank()); }
-
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult fold(ArrayRef<Attribute> cstOperands,
-                     SmallVectorImpl<OpFoldResult> &results);
-};
-
-/// Prints dimension and symbol list.
-void printDimAndSymbolList(Operation::operand_iterator begin,
-                           Operation::operand_iterator end, unsigned numDims,
-                           OpAsmPrinter &p);
-
-/// Parses dimension and symbol list and returns true if parsing failed.
-ParseResult parseDimAndSymbolList(OpAsmParser &parser,
-                                  SmallVectorImpl<Value *> &operands,
-                                  unsigned &numDims);
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os, SubViewOp::Range &range);
-
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_STANDARDOPS_OPS_H
diff --git a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.td b/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.td
deleted file mode 100644
index 2cba150560c..00000000000
--- a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.td
+++ /dev/null
@@ -1,1549 +0,0 @@
-//===- Ops.td - Standard operation definitions -------------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines some MLIR standard operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef STANDARD_OPS
-#define STANDARD_OPS
-
-include "mlir/Analysis/CallInterfaces.td"
-include "mlir/IR/OpAsmInterface.td"
-
-def Std_Dialect : Dialect {
-  let name = "std";
-  let cppNamespace = "";
-}
-
-// Base class for Standard dialect ops.
-class Std_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<Std_Dialect, mnemonic, traits> {
-  // For every standard op, there needs to be a:
-  //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
-  //   * LogicalResult verify(${C++ class of Op} op)
-  //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
-  //                                         OperationState &result)
-  // functions.
-  let printer = [{ return ::print(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-}
-
-// Base class for standard cast operations. Requires single operand and result,
-// but does not constrain them to specific types.
-class CastOp<string mnemonic, list<OpTrait> traits = []> :
-    Std_Op<mnemonic, !listconcat(traits, [NoSideEffect])> {
-
-  let results = (outs AnyType);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *source, Type destType", [{
-       impl::buildCastOp(builder, result, source, destType);
-  }]>];
-
-  let parser = [{
-    return impl::parseCastOp(parser, result);
-  }];
-  let printer = [{
-    return printStandardCastOp(this->getOperation(), p);
-  }];
-  let verifier = [{ return ::verifyCastOp(*this); }];
-
-  let hasFolder = 1;
-}
-
-// Base class for unary ops. Requires single operand and result. Individual
-// classes will have `operand` accessor.
-class UnaryOp<string mnemonic, list<OpTrait> traits = []> :
-    Op<Std_Dialect, mnemonic, !listconcat(traits, [NoSideEffect])> {
-  let results = (outs AnyType);
-  let printer = [{
-    return printStandardUnaryOp(this->getOperation(), p);
-  }];
-}
-
-class UnaryOpSameOperandAndResultType<string mnemonic,
-                                      list<OpTrait> traits = []> :
-    UnaryOp<mnemonic, !listconcat(traits, [SameOperandsAndResultType])> {
-  let parser = [{
-    return impl::parseOneResultSameOperandTypeOp(parser, result);
-  }];
-}
-
-class FloatUnaryOp<string mnemonic, list<OpTrait> traits = []> :
-    UnaryOpSameOperandAndResultType<mnemonic, traits>,
-    Arguments<(ins FloatLike:$operand)>;
-
-// Base class for standard arithmetic operations.  Requires operands and
-// results to be of the same type, but does not constrain them to specific
-// types.  Individual classes will have `lhs` and `rhs` accessor to operands.
-class ArithmeticOp<string mnemonic, list<OpTrait> traits = []> :
-    Op<Std_Dialect, mnemonic,
-       !listconcat(traits, [NoSideEffect, SameOperandsAndResultType])> {
-
-  let results = (outs AnyType);
-
-  let parser = [{
-    return impl::parseOneResultSameOperandTypeOp(parser, result);
-  }];
-
-  let printer = [{
-    return printStandardBinaryOp(this->getOperation(), p);
-  }];
-}
-
-// Base class for standard arithmetic operations on integers, vectors and
-// tensors thereof.  This operation takes two operands and returns one result,
-// each of these is required to be of the same type.  This type may be an
-// integer scalar type, a vector whose element type is an integer type, or an
-// integer tensor.  The custom assembly form of the operation is as follows
-//
-//     <op>i %0, %1 : i32
-class IntArithmeticOp<string mnemonic, list<OpTrait> traits = []> :
-    ArithmeticOp<mnemonic, traits>,
-    Arguments<(ins IntegerLike:$lhs, IntegerLike:$rhs)>;
-
-// Base class for standard arithmetic binary operations on floats, vectors and
-// tensors thereof.  This operation has two operands and returns one result,
-// each of these is required to be of the same type.  This type may be a
-// floating point scalar type, a vector whose element type is a floating point
-// type, or a floating point tensor.  The custom assembly form of the operation
-// is as follows
-//
-//     <op>f %0, %1 : f32
-class FloatArithmeticOp<string mnemonic, list<OpTrait> traits = []> :
-    ArithmeticOp<mnemonic, traits>,
-    Arguments<(ins FloatLike:$lhs, FloatLike:$rhs)>;
-
-def AbsFOp : FloatUnaryOp<"absf"> {
-  let summary = "floating point absolute-value operation";
-  let description = [{
-    The `absf` operation computes the absolute value. It takes one operand and
-    returns one result of the same type. This type may be a float scalar type,
-    a vector whose element type is float, or a tensor of floats. It has no
-    standard attributes.
-  }];
-}
-
-def AddFOp : FloatArithmeticOp<"addf"> {
-  let summary = "floating point addition operation";
-  let hasFolder = 1;
-}
-
-def AddIOp : IntArithmeticOp<"addi", [Commutative]> {
-  let summary = "integer addition operation";
-  let hasFolder = 1;
-}
-
-def AllocOp : Std_Op<"alloc"> {
-  let summary = "memory allocation operation";
-  let description = [{
-    The "alloc" operation allocates a region of memory, as specified by its
-    memref type. For example:
-
-      %0 = alloc() : memref<8x64xf32, (d0, d1) -> (d0, d1), 1>
-
-    The optional list of dimension operands are bound to the dynamic dimensions
-    specified in its memref type. In the example below, the ssa value '%d' is
-    bound to the second dimension of the memref (which is dynamic).
-
-      %0 = alloc(%d) : memref<8x?xf32, (d0, d1) -> (d0, d1), 1>
-
-    The optional list of symbol operands are bound to the symbols of the
-    memrefs affine map. In the example below, the ssa value '%s' is bound to
-    the symbol 's0' in the affine map specified in the allocs memref type.
-
-      %0 = alloc()[%s] : memref<8x64xf32, (d0, d1)[s0] -> ((d0 + s0), d1), 1>
-
-    This operation returns a single ssa value of memref type, which can be used
-    by subsequent load and store operations.
-
-    The optional `alignment` attribute may be specified to ensure that the
-    region of memory that will be indexed is aligned at the specified byte
-    boundary. TODO(b/144281289) optional alignment attribute to MemRefType.
-
-      %0 = alloc()[%s] {alignment = 8} :
-        memref<8x64xf32, (d0, d1)[s0] -> ((d0 + s0), d1), 1>
-  }];
-
-  let arguments = (ins Variadic<Index>:$value,
-                   Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$alignment);
-  let results = (outs AnyMemRef);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, MemRefType memrefType", [{
-       result.types.push_back(memrefType);
-     }]>,
-    OpBuilder<
-    "Builder *builder, OperationState &result, MemRefType memrefType, " #
-    "ArrayRef<Value*> operands, IntegerAttr alignment = IntegerAttr()", [{
-       result.addOperands(operands);
-       result.types.push_back(memrefType);
-       if (alignment)
-         result.addAttribute(getAlignmentAttrName(), alignment);
-     }]>];
-
-  let extraClassDeclaration = [{
-    static StringRef getAlignmentAttrName() { return "alignment"; }
-
-    MemRefType getType() { return getResult()->getType().cast<MemRefType>(); }
-
-    /// Returns the number of symbolic operands (the ones in square brackets),
-    /// which bind to the symbols of the memref's layout map.
-    unsigned getNumSymbolicOperands() {
-      return getNumOperands() - getType().getNumDynamicDims();
-    }
-
-    /// Returns the symbolic operands (the ones in square brackets), which bind
-    /// to the symbols of the memref's layout map.
-    operand_range getSymbolicOperands() {
-      return {operand_begin() + getType().getNumDynamicDims(), operand_end()};
-    }
-
-    /// Returns the dynamic sizes for this alloc operation if specified.
-    operand_range getDynamicSizes() { return getOperands(); }
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-def AndOp : IntArithmeticOp<"and", [Commutative]> {
-  let summary = "integer binary and";
-  let hasFolder = 1;
-}
-
-def BranchOp : Std_Op<"br", [Terminator]> {
-  let summary = "branch operation";
-  let description = [{
-    The "br" operation represents a branch operation in a function.
-    The operation takes variable number of operands and produces no results.
-    The operand number and types for each successor must match the arguments of
-    the block successor. For example:
-
-      ^bb2:
-        %2 = call @someFn()
-        br ^bb3(%2 : tensor<*xf32>)
-      ^bb3(%3: tensor<*xf32>):
-  }];
-
-  let arguments = (ins Variadic<AnyType>:$operands);
-
-  let builders = [OpBuilder<
-    "Builder *, OperationState &result, Block *dest,"
-    "ValueRange operands = {}", [{
-      result.addSuccessor(dest, operands);
-  }]>];
-
-  // BranchOp is fully verified by traits.
-  let verifier = ?;
-
-  let extraClassDeclaration = [{
-    Block *getDest();
-    void setDest(Block *block);
-
-    /// Erase the operand at 'index' from the operand list.
-    void eraseOperand(unsigned index);
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-def CallOp : Std_Op<"call", [CallOpInterface]> {
-  let summary = "call operation";
-  let description = [{
-    The "call" operation represents a direct call to a function that is within
-    the same symbol scope as the call.  The operands and result types of the
-    call must match the specified function type. The callee is encoded as a
-    function attribute named "callee".
-
-      %2 = call @my_add(%0, %1) : (f32, f32) -> f32
-  }];
-
-  let arguments = (ins FlatSymbolRefAttr:$callee, Variadic<AnyType>:$operands);
-  let results = (outs Variadic<AnyType>);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, FuncOp callee,"
-    "ValueRange operands = {}", [{
-      result.addOperands(operands);
-      result.addAttribute("callee", builder->getSymbolRefAttr(callee));
-      result.addTypes(callee.getType().getResults());
-  }]>, OpBuilder<
-    "Builder *builder, OperationState &result, SymbolRefAttr callee,"
-    "ArrayRef<Type> results, ValueRange operands = {}", [{
-      result.addOperands(operands);
-      result.addAttribute("callee", callee);
-      result.addTypes(results);
-  }]>, OpBuilder<
-    "Builder *builder, OperationState &result, StringRef callee,"
-    "ArrayRef<Type> results, ValueRange operands = {}", [{
-      build(builder, result, builder->getSymbolRefAttr(callee), results,
-            operands);
-  }]>];
-
-  let extraClassDeclaration = [{
-    StringRef getCallee() { return callee(); }
-    FunctionType getCalleeType();
-
-    /// Get the argument operands to the called function.
-    operand_range getArgOperands() {
-      return {arg_operand_begin(), arg_operand_end()};
-    }
-
-    operand_iterator arg_operand_begin() { return operand_begin(); }
-    operand_iterator arg_operand_end() { return operand_end(); }
-
-    /// Return the callee of this operation.
-    CallInterfaceCallable getCallableForCallee() {
-      return getAttrOfType<SymbolRefAttr>("callee");
-    }
-  }];
-}
-
-def CallIndirectOp : Std_Op<"call_indirect", [CallOpInterface]> {
-  let summary = "indirect call operation";
-  let description = [{
-    The "call_indirect" operation represents an indirect call to a value of
-    function type.  Functions are first class types in MLIR, and may be passed
-    as arguments and merged together with block arguments.  The operands
-    and result types of the call must match the specified function type.
-
-      %3 = call_indirect %2(%0, %1) : (f32, f32) -> f32
-  }];
-
-  let arguments = (ins FunctionType:$callee, Variadic<AnyType>:$operands);
-  let results = (outs Variadic<AnyType>);
-
-  let builders = [OpBuilder<
-    "Builder *, OperationState &result, Value *callee,"
-    "ValueRange operands = {}", [{
-      result.operands.push_back(callee);
-      result.addOperands(operands);
-      result.addTypes(callee->getType().cast<FunctionType>().getResults());
-  }]>];
-
-  let extraClassDeclaration = [{
-    Value *getCallee() { return getOperand(0); }
-
-    /// Get the argument operands to the called function.
-    operand_range getArgOperands() {
-      return {arg_operand_begin(), arg_operand_end()};
-    }
-
-    operand_iterator arg_operand_begin() { return ++operand_begin(); }
-    operand_iterator arg_operand_end() { return operand_end(); }
-
-    /// Return the callee of this operation.
-    CallInterfaceCallable getCallableForCallee() { return getCallee(); }
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-def CeilFOp : FloatUnaryOp<"ceilf"> {
-  let summary = "ceiling of the specified value";
-  let description = [{
-    The `ceilf` operation computes the ceiling of a given value. It takes one
-    operand and returns one result of the same type. This type may be a float
-    scalar type, a vector whose element type is float, or a tensor of floats.
-    It has no standard attributes.
-  }];
-}
-
-def CmpFOp : Std_Op<"cmpf",
-    [NoSideEffect, SameTypeOperands, SameOperandsAndResultShape]> {
-  let summary = "floating-point comparison operation";
-  let description = [{
-    The "cmpf" operation compares its two operands according to the float
-    comparison rules and the predicate specified by the respective attribute.
-    The predicate defines the type of comparison: (un)orderedness, (in)equality
-    and signed less/greater than (or equal to) as well as predicates that are
-    always true or false.  The operands must have the same type, and this type
-    must be a float type, or a vector or tensor thereof.  The result is an i1,
-    or a vector/tensor thereof having the same shape as the inputs. Unlike cmpi,
-    the operands are always treated as signed. The u prefix indicates
-    *unordered* comparison, not unsigned comparison, so "une" means unordered or
-    not equal. For the sake of readability by humans, custom assembly form for
-    the operation uses a string-typed attribute for the predicate.  The value of
-    this attribute corresponds to lower-cased name of the predicate constant,
-    e.g., "one" means "ordered not equal".  The string representation of the
-    attribute is merely a syntactic sugar and is converted to an integer
-    attribute by the parser.
-
-      %r1 = cmpf "oeq" %0, %1 : f32
-      %r2 = cmpf "ult" %0, %1 : tensor<42x42xf64>
-      %r3 = "std.cmpf"(%0, %1) {predicate: 0} : (f8, f8) -> i1
-  }];
-
-  let arguments = (ins FloatLike:$lhs, FloatLike:$rhs);
-  let results = (outs BoolLike);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, CmpFPredicate predicate,"
-    "Value *lhs, Value *rhs", [{
-      ::buildCmpFOp(builder, result, predicate, lhs, rhs);
-  }]>];
-
-  let extraClassDeclaration = [{
-    static StringRef getPredicateAttrName() { return "predicate"; }
-    static CmpFPredicate getPredicateByName(StringRef name);
-
-    CmpFPredicate getPredicate() {
-      return (CmpFPredicate)getAttrOfType<IntegerAttr>(getPredicateAttrName())
-          .getInt();
-    }
-  }];
-
-  let hasFolder = 1;
-}
-
-def CMPI_P_EQ  : I64EnumAttrCase<"eq", 0>;
-def CMPI_P_NE  : I64EnumAttrCase<"ne", 1>;
-def CMPI_P_SLT : I64EnumAttrCase<"slt", 2>;
-def CMPI_P_SLE : I64EnumAttrCase<"sle", 3>;
-def CMPI_P_SGT : I64EnumAttrCase<"sgt", 4>;
-def CMPI_P_SGE : I64EnumAttrCase<"sge", 5>;
-def CMPI_P_ULT : I64EnumAttrCase<"ult", 6>;
-def CMPI_P_ULE : I64EnumAttrCase<"ule", 7>;
-def CMPI_P_UGT : I64EnumAttrCase<"ugt", 8>;
-def CMPI_P_UGE : I64EnumAttrCase<"uge", 9>;
-
-def CmpIPredicateAttr : I64EnumAttr<
-    "CmpIPredicate", "",
-    [CMPI_P_EQ, CMPI_P_NE, CMPI_P_SLT, CMPI_P_SLE, CMPI_P_SGT,
-     CMPI_P_SGE, CMPI_P_ULT, CMPI_P_ULE, CMPI_P_UGT, CMPI_P_UGE]> {
-  let cppNamespace = "::mlir";
-}
-
-def CmpIOp : Std_Op<"cmpi",
-    [NoSideEffect, SameTypeOperands, SameOperandsAndResultShape]> {
-  let summary = "integer comparison operation";
-  let description = [{
-    The "cmpi" operation compares its two operands according to the integer
-    comparison rules and the predicate specified by the respective attribute.
-    The predicate defines the type of comparison: (in)equality, (un)signed
-    less/greater than (or equal to).  The operands must have the same type, and
-    this type must be an integer type, a vector or a tensor thereof.  The result
-    is an i1, or a vector/tensor thereof having the same shape as the inputs.
-    Since integers are signless, the predicate also explicitly indicates
-    whether to interpret the operands as signed or unsigned integers for
-    less/greater than comparisons.  For the sake of readability by humans,
-    custom assembly form for the operation uses a string-typed attribute for
-    the predicate.  The value of this attribute corresponds to lower-cased name
-    of the predicate constant, e.g., "slt" means "signed less than".  The string
-    representation of the attribute is merely a syntactic sugar and is converted
-    to an integer attribute by the parser.
-
-      %r1 = cmpi "eq" %0, %1 : i32
-      %r2 = cmpi "slt" %0, %1 : tensor<42x42xi64>
-      %r3 = "std.cmpi"(%0, %1){predicate: 0} : (i8, i8) -> i1
-  }];
-
-  let arguments = (ins
-      CmpIPredicateAttr:$predicate,
-      IntegerLike:$lhs,
-      IntegerLike:$rhs
-  );
-  let results = (outs BoolLike);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, CmpIPredicate predicate,"
-    "Value *lhs, Value *rhs", [{
-      ::buildCmpIOp(builder, result, predicate, lhs, rhs);
-  }]>];
-
-  let extraClassDeclaration = [{
-    static StringRef getPredicateAttrName() { return "predicate"; }
-    static CmpIPredicate getPredicateByName(StringRef name);
-
-    CmpIPredicate getPredicate() {
-      return (CmpIPredicate)getAttrOfType<IntegerAttr>(getPredicateAttrName())
-          .getInt();
-    }
-  }];
-
-  let verifier = [{ return success(); }];
-
-  let hasFolder = 1;
-}
-
-def CondBranchOp : Std_Op<"cond_br", [Terminator]> {
-  let summary = "conditional branch operation";
-  let description = [{
-    The "cond_br" operation represents a conditional branch operation in a
-    function. The operation takes variable number of operands and produces
-    no results. The operand number and types for each successor must match the
-    arguments of the block successor. For example:
-
-      ^bb0:
-         %0 = extract_element %arg0[] : tensor<i1>
-         cond_br %0, ^bb1, ^bb2
-      ^bb1:
-         ...
-      ^bb2:
-         ...
-  }];
-
-  let arguments = (ins I1:$condition, Variadic<AnyType>:$branchOperands);
-
-  let builders = [OpBuilder<
-    "Builder *, OperationState &result, Value *condition,"
-    "Block *trueDest, ValueRange trueOperands,"
-    "Block *falseDest, ValueRange falseOperands", [{
-      result.addOperands(condition);
-      result.addSuccessor(trueDest, trueOperands);
-      result.addSuccessor(falseDest, falseOperands);
-  }]>];
-
-  // CondBranchOp is fully verified by traits.
-  let verifier = ?;
-
-  let extraClassDeclaration = [{
-    // These are the indices into the dests list.
-    enum { trueIndex = 0, falseIndex = 1 };
-
-    // The condition operand is the first operand in the list.
-    Value *getCondition() { return getOperand(0); }
-
-    /// Return the destination if the condition is true.
-    Block *getTrueDest() {
-      return getSuccessor(trueIndex);
-    }
-
-    /// Return the destination if the condition is false.
-    Block *getFalseDest() {
-      return getSuccessor(falseIndex);
-    }
-
-    // Accessors for operands to the 'true' destination.
-    Value *getTrueOperand(unsigned idx) {
-      assert(idx < getNumTrueOperands());
-      return getOperand(getTrueDestOperandIndex() + idx);
-    }
-
-    void setTrueOperand(unsigned idx, Value *value) {
-      assert(idx < getNumTrueOperands());
-      setOperand(getTrueDestOperandIndex() + idx, value);
-    }
-
-    operand_iterator true_operand_begin() {
-      return operand_begin() + getTrueDestOperandIndex();
-    }
-    operand_iterator true_operand_end() {
-      return true_operand_begin() + getNumTrueOperands();
-    }
-    operand_range getTrueOperands() {
-      return {true_operand_begin(), true_operand_end()};
-    }
-
-    unsigned getNumTrueOperands()  {
-      return getNumSuccessorOperands(trueIndex);
-    }
-
-    /// Erase the operand at 'index' from the true operand list.
-    void eraseTrueOperand(unsigned index)  {
-      getOperation()->eraseSuccessorOperand(trueIndex, index);
-    }
-
-    // Accessors for operands to the 'false' destination.
-    Value *getFalseOperand(unsigned idx) {
-      assert(idx < getNumFalseOperands());
-      return getOperand(getFalseDestOperandIndex() + idx);
-    }
-    void setFalseOperand(unsigned idx, Value *value) {
-      assert(idx < getNumFalseOperands());
-      setOperand(getFalseDestOperandIndex() + idx, value);
-    }
-
-    operand_iterator false_operand_begin() { return true_operand_end(); }
-    operand_iterator false_operand_end() {
-      return false_operand_begin() + getNumFalseOperands();
-    }
-    operand_range getFalseOperands() {
-      return {false_operand_begin(), false_operand_end()};
-    }
-
-    unsigned getNumFalseOperands() {
-      return getNumSuccessorOperands(falseIndex);
-    }
-
-    /// Erase the operand at 'index' from the false operand list.
-    void eraseFalseOperand(unsigned index) {
-      getOperation()->eraseSuccessorOperand(falseIndex, index);
-    }
-
-  private:
-    /// Get the index of the first true destination operand.
-    unsigned getTrueDestOperandIndex() { return 1; }
-
-    /// Get the index of the first false destination operand.
-    unsigned getFalseDestOperandIndex() {
-      return getTrueDestOperandIndex() + getNumTrueOperands();
-    }
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-def ConstantOp : Std_Op<"constant",
-    [NoSideEffect, DeclareOpInterfaceMethods<OpAsmOpInterface>]> {
-  let summary = "constant";
-
-  let arguments = (ins AnyAttr:$value);
-  let results = (outs AnyType);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Attribute value",
-    [{ build(builder, result, value.getType(), value); }]>];
-
-  let extraClassDeclaration = [{
-    Attribute getValue() { return getAttr("value"); }
-
-    /// Returns true if a constant operation can be built with the given value
-    /// and result type.
-    static bool isBuildableWith(Attribute value, Type type);
-  }];
-
-  let hasFolder = 1;
-}
-
-def CopySignOp : FloatArithmeticOp<"copysign"> {
-  let summary = "A copysign operation";
-  let description = [{
-    The `copysign` returns a value with the magnitude of the first operand and
-    the sign of the second operand. It takes two operands and returns one
-    result of the same type. This type may be a float scalar type, a vector
-    whose element type is float, or a tensor of floats. It has no standard
-    attributes.
-  }];
-}
-
-def CosOp : FloatUnaryOp<"cos"> {
-  let summary = "cosine of the specified value";
-  let description = [{
-    The `cos` operation computes the cosine of a given value. It takes one
-    operand and returns one result of the same type. This type may be a float
-    scalar type, a vector whose element type is float, or a tensor of floats.
-    It has no standard attributes.
-  }];
-}
-
-def DeallocOp : Std_Op<"dealloc"> {
-  let summary = "memory deallocation operation";
-  let description = [{
-    The "dealloc" operation frees the region of memory referenced by a memref
-    which was originally created by the "alloc" operation.
-    The "dealloc" operation should not be called on memrefs which alias an
-    alloc'd memref (i.e. memrefs returned by the "view" and "reshape"
-    operations).
-
-      %0 = alloc() : memref<8x64xf32, (d0, d1) -> (d0, d1), 1>
-      dealloc %0 : memref<8x64xf32, (d0, d1) -> (d0, d1), 1>
-  }];
-
-  let arguments = (ins AnyMemRef:$memref);
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
-def DimOp : Std_Op<"dim", [NoSideEffect]> {
-  let summary = "dimension index operation";
-  let description = [{
-    The "dim" operation takes a memref or tensor operand and returns an "index".
-    It requires a single integer attribute named "index". It returns the size
-    of the specified dimension. For example:
-
-      %1 = dim %0, 2 : tensor<?x?x?xf32>
-  }];
-
-  let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor],
-                                 "any tensor or memref type">:$memrefOrTensor,
-                       APIntAttr:$index);
-  let results = (outs Index);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *memrefOrTensor,"
-    "unsigned index", [{
-      auto indexType = builder->getIndexType();
-      auto indexAttr = builder->getIntegerAttr(indexType, index);
-      build(builder, result, indexType, memrefOrTensor, indexAttr);
-    }]>];
-
-  let extraClassDeclaration = [{
-    unsigned getIndex() {
-      return getAttrOfType<IntegerAttr>("index").getValue().getZExtValue();
-    }
-  }];
-
-  let hasFolder = 1;
-}
-
-def DivFOp : FloatArithmeticOp<"divf"> {
-  let summary = "floating point division operation";
-}
-
-def DivISOp : IntArithmeticOp<"divis"> {
-  let summary = "signed integer division operation";
-  let hasFolder = 1;
-}
-
-def DivIUOp : IntArithmeticOp<"diviu"> {
-  let summary = "unsigned integer division operation";
-  let hasFolder = 1;
-}
-
-def ExpOp : FloatUnaryOp<"exp"> {
-  let summary = "base-e exponential of the specified value";
-}
-
-def ExtractElementOp : Std_Op<"extract_element", [NoSideEffect]> {
-  let summary = "element extract operation";
-  let description = [{
-    The "extract_element" op reads a tensor or vector and returns one element
-    from it specified by an index list. The output of extract is a new value
-    with the same type as the elements of the tensor or vector. The arity of
-    indices matches the rank of the accessed value (i.e., if a tensor is of rank
-    3, then 3 indices are required for the extract).  The indices should all be
-    of index type. For example:
-
-      %3 = extract_element %0[%1, %2] : vector<4x4xi32>
-  }];
-
-  let arguments = (ins AnyTypeOf<[AnyVector, AnyTensor]>:$aggregate,
-                       Variadic<Index>:$indices);
-  let results = (outs AnyType);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *aggregate,"
-    "ValueRange indices = {}", [{
-      auto resType = aggregate->getType().cast<ShapedType>()
-                                         .getElementType();
-      build(builder, result, resType, aggregate, indices);
-    }]>];
-
-  let extraClassDeclaration = [{
-    Value *getAggregate() { return getOperand(0); }
-
-    operand_range getIndices() {
-      return {operand_begin() + 1, operand_end()};
-    }
-  }];
-
-  let hasFolder = 1;
-}
-
-def IndexCastOp : CastOp<"index_cast">, Arguments<(ins AnyType:$in)> {
-  let summary = "cast between index and integer types";
-  let description = [{
-    Casts between integer scalars and 'index' scalars.  Index is an integer of
-    platform-specific bit width.  If casting to a wider integer, the value is
-    sign-extended.  If casting to a narrower integer, the value is truncated.
-  }];
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-  }];
-
-  let hasFolder = 0;
-}
-
-def FPExtOp : CastOp<"fpext">, Arguments<(ins AnyType:$in)> {
-  let summary = "cast from floating-point to wider floating-point";
-  let description = [{
-    Cast a floating-point value to a larger floating-point-typed value.
-    The destination type must to be strictly wider than the source type.
-    Only scalars are currently supported.
-  }];
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-  }];
-
-  let hasFolder = 0;
-}
-
-def FPTruncOp : CastOp<"fptrunc">, Arguments<(ins AnyType:$in)> {
-  let summary = "cast from floating-point to narrower floating-point";
-  let description = [{
-    Truncate a floating-point value to a smaller floating-point-typed value.
-    The destination type must be strictly narrower than the source type.
-    If the value cannot be exactly represented, it is rounded using the default
-    rounding mode. Only scalars are currently supported.
-  }];
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-  }];
-
-  let hasFolder = 0;
-}
-
-def LoadOp : Std_Op<"load"> {
-  let summary = "load operation";
-  let description = [{
-    The "load" op reads an element from a memref specified by an index list. The
-    output of load is a new value with the same type as the elements of the
-    memref. The arity of indices is the rank of the memref (i.e., if the memref
-    loaded from is of rank 3, then 3 indices are required for the load following
-    the memref identifier). For example:
-
-      %3 = load %0[%1, %1] : memref<4x4xi32>
-  }];
-
-  let arguments = (ins AnyMemRef:$memref, Variadic<Index>:$indices);
-  let results = (outs AnyType);
-
-  let builders = [OpBuilder<
-    "Builder *, OperationState &result, Value *memref,"
-    "ValueRange indices = {}", [{
-      auto memrefType = memref->getType().cast<MemRefType>();
-      result.addOperands(memref);
-      result.addOperands(indices);
-      result.types.push_back(memrefType.getElementType());
-  }]>];
-
-  let extraClassDeclaration = [{
-    Value *getMemRef() { return getOperand(0); }
-    void setMemRef(Value *value) { setOperand(0, value); }
-    MemRefType getMemRefType() {
-      return getMemRef()->getType().cast<MemRefType>();
-    }
-
-    operand_range getIndices() { return {operand_begin() + 1, operand_end()}; }
-  }];
-
-  let hasFolder = 1;
-}
-
-def LogOp : FloatUnaryOp<"log"> {
-  let summary = "base-e logarithm of the specified value";
-}
-
-def Log10Op : FloatUnaryOp<"log10"> {
-  let summary = "base-10 logarithm of the specified value";
-}
-
-def Log2Op : FloatUnaryOp<"log2"> {
-  let summary = "base-2 logarithm of the specified value";
-}
-
-def MemRefCastOp : CastOp<"memref_cast"> {
-  let summary = "memref cast operation";
-  let description = [{
-    The "memref_cast" operation converts a memref from one type to an equivalent
-    type with a compatible shape. The source and destination types are
-    compatible if:
-    a. both are ranked memref types with the same element type, affine mappings,
-    address space, and rank but where the individual dimensions may add or
-    remove constant dimensions from the memref type.
-
-    If the cast converts any dimensions from an unknown to a known size, then it
-    acts as an assertion that fails at runtime of the dynamic dimensions
-    disagree with resultant destination size.
-
-    Example:
-    Assert that the input dynamic shape matches the destination static shape.
-       %2 = memref_cast %1 : memref<?x?xf32> to memref<4x4xf32>
-    Erase static shape information, replacing it with dynamic information.
-       %3 = memref_cast %1 : memref<4xf32> to memref<?xf32>
-
-    The same holds true for offsets and strides.
-
-    Assert that the input dynamic shape matches the destination static stride.
-       %4 = memref_cast %1 : memref<12x4xf32, offset:?, strides: [?, ?]> to
-                             memref<12x4xf32, offset:5, strides: [4, 1]>
-    Erase static offset and stride information, replacing it with
-    dynamic information.
-       %5 = memref_cast %1 : memref<12x4xf32, offset:5, strides: [4, 1]> to
-                             memref<12x4xf32, offset:?, strides: [?, ?]>
-
-    b. either or both memref types are unranked with the same element type, and
-    address space.
-
-    Example:
-    Cast to concrete shape.
-        %4 = memref_cast %1 : memref<*xf32> to memref<4x?xf32>
-
-    Erase rank information.
-        %5 = memref_cast %1 : memref<4x?xf32> to memref<*xf32>
-  }];
-
-  let arguments = (ins AnyRankedOrUnrankedMemRef:$source);
-  let results = (outs AnyRankedOrUnrankedMemRef);
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-
-    /// The result of a memref_cast is always a memref.
-    Type getType() { return getResult()->getType(); }
-  }];
-}
-
-def MulFOp : FloatArithmeticOp<"mulf"> {
-  let summary = "floating point multiplication operation";
-  let hasFolder = 1;
-}
-
-def MulIOp : IntArithmeticOp<"muli", [Commutative]> {
-  let summary = "integer multiplication operation";
-  let hasFolder = 1;
-}
-
-def NegFOp : FloatUnaryOp<"negf"> {
-  let summary = "floating point negation";
-  let description = [{
-    The `negf` operation computes the negation of a given value. It takes one
-    operand and returns one result of the same type. This type may be a float
-    scalar type, a vector whose element type is float, or a tensor of floats.
-    It has no standard attributes.
-  }];
-}
-
-def OrOp : IntArithmeticOp<"or", [Commutative]> {
-  let summary = "integer binary or";
-  let hasFolder = 1;
-}
-
-def RankOp : Std_Op<"rank", [NoSideEffect]> {
-  let summary = "rank operation";
-  let description = [{
-    The "rank" operation takes a tensor operand and returns its rank.
-
-      %1 = rank %0 : index
-  }];
-
-  let arguments = (ins AnyTensor);
-  let results = (outs Index);
-  let verifier = ?;
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *tensor", [{
-      auto indexType = builder->getIndexType();
-      build(builder, result, indexType, tensor);
-    }]>];
-
-  let hasFolder = 1;
-}
-
-def RemFOp : FloatArithmeticOp<"remf"> {
-  let summary = "floating point division remainder operation";
-}
-
-def RemISOp : IntArithmeticOp<"remis"> {
-  let summary = "signed integer division remainder operation";
-  let hasFolder = 1;
-}
-
-def RemIUOp : IntArithmeticOp<"remiu"> {
-  let summary = "unsigned integer division remainder operation";
-  let hasFolder = 1;
-}
-
-def ReturnOp : Std_Op<"return", [Terminator, HasParent<"FuncOp">]> {
-  let summary = "return operation";
-  let description = [{
-    The "return" operation represents a return operation within a function.
-    The operation takes variable number of operands and produces no results.
-    The operand number and types must match the signature of the function
-    that contains the operation. For example:
-
-      func @foo() : (i32, f8) {
-      ...
-      return %0, %1 : i32, f8
-  }];
-
-  let arguments = (ins Variadic<AnyType>:$operands);
-
-  let builders = [OpBuilder<
-    "Builder *b, OperationState &result", [{ build(b, result, llvm::None); }]
-  >];
-}
-
-def SelectOp : Std_Op<"select", [NoSideEffect, SameOperandsAndResultShape]> {
-  let summary = "select operation";
-  let description = [{
-    The "select" operation chooses one value based on a binary condition
-    supplied as its first operand. If the value of the first operand is 1, the
-    second operand is chosen, otherwise the third operand is chosen. The second
-    and the third operand must have the same type. The operation applies
-    elementwise to vectors and tensors.  The shape of all arguments must be
-    identical. For example, the maximum operation is obtained by combining
-    "select" with "cmpi" as follows.
-
-      %2 = cmpi "gt" %0, %1 : i32         // %2 is i1
-      %3 = select %2, %0, %1 : i32
-  }];
-
-  let arguments = (ins BoolLike:$condition, AnyType:$true_value,
-                       AnyType:$false_value);
-  let results = (outs AnyType);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *condition,"
-    "Value *trueValue, Value *falseValue", [{
-      result.addOperands({condition, trueValue, falseValue});
-      result.addTypes(trueValue->getType());
-  }]>];
-
-  let extraClassDeclaration = [{
-      Value *getCondition() { return condition(); }
-      Value *getTrueValue() { return true_value(); }
-      Value *getFalseValue() { return false_value(); }
-  }];
-
-  let hasFolder = 1;
-}
-
-def SignExtendIOp : Std_Op<"sexti",
-    [NoSideEffect, SameOperandsAndResultShape]> {
-  let summary = "integer sign extension operation";
-  let description = [{
-    The integer sign extension operation takes an integer input of
-    width M and an integer destination type of width N. The destination
-    bit-width must be larger than the input bit-width (N > M).
-    The top-most (N - M) bits of the output are filled with copies
-    of the most-significant bit of the input.
-
-      %1 = constant 5 : i3            // %1 is 0b101
-      %2 = sexti %1 : i3 to i6        // %2 is 0b111101
-      %3 = constant 2 : i3            // %3 is 0b010
-      %4 = sexti %3 : i3 to i6        // %4 is 0b000010
-
-      %5 = sexti %0 : vector<2 x i32> to vector<2 x i64>
-  }];
-
-  let arguments = (ins IntegerLike:$value);
-  let results = (outs IntegerLike);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *value, Type destType", [{
-      result.addOperands(value);
-      result.addTypes(destType);
-  }]>];
-
-  let parser = [{
-    return impl::parseCastOp(parser, result);
-  }];
-  let printer = [{
-    return printStandardCastOp(this->getOperation(), p);
-  }];
-}
-
-def ShlISOp : IntArithmeticOp<"shlis"> {
-  let summary = "signed integer shift left";
-}
-
-def SIToFPOp : CastOp<"sitofp">, Arguments<(ins AnyType:$in)> {
-  let summary = "cast from integer type to floating-point";
-  let description = [{
-    Cast from a value interpreted as signed integer to the corresponding
-    floating-point value. If the value cannot be exactly represented, it is
-    rounded using the default rounding mode. Only scalars are currently
-    supported.
-  }];
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-  }];
-
-  let hasFolder = 0;
-}
-
-def SplatOp : Std_Op<"splat", [NoSideEffect]> {
-  let summary = "splat or broadcast operation";
-  let description = [{
-    The "splat" op reads a value of integer or float type and broadcasts it into
-    a vector or a tensor. The output of splat is thus a new value of either
-    vector or tensor type with elemental type being its operand's type.
-    When the result is a tensor, it has to be statically shaped.
-
-      %1 = splat %0 : vector<8xi32>
-      %2 = splat %0 : tensor<4x8xi32>
-
-    TODO: Extend this operation to broadcast to dynamically shaped tensors in
-    the same way dynamically shaped memrefs are handled.
-
-    // Broadcasts %s to a 2-d dynamically shaped tensor, with %m, %n binding
-    // to the sizes of the two dynamic dimensions.
-
-      %m = "foo"() : () -> (index)
-      %n = "bar"() : () -> (index)
-      %t = splat %s [%m, %n] : tensor<?x?xi32>
-
-  }];
-
-  let arguments = (ins AnyTypeOf<[AnyInteger, AnyFloat],
-                                 "integer or float type">:$input);
-  let results = (outs AnyTypeOf<[AnyVector, AnyStaticShapeTensor]>:$aggregate);
-
-  let builders =
-      [OpBuilder<"Builder *builder, OperationState &result, Value *element, "
-                  "Type aggregateType",
-                  [{ build(builder, result, aggregateType, element); }]>];
-
-  let hasFolder = 1;
-}
-
-def StoreOp : Std_Op<"store"> {
-  let summary = "store operation";
-  let description = [{
-    The "store" op writes an element to a memref specified by an index list.
-    The arity of indices is the rank of the memref (i.e. if the memref being
-    stored to is of rank 3, then 3 indices are required for the store following
-    the memref identifier). The store operation does not produce a result.
-
-    In the following example, the ssa value '%v' is stored in memref '%A' at
-    indices [%i, %j]:
-      store %v, %A[%i, %j] : memref<4x128xf32, (d0, d1) -> (d0, d1), 0>
-  }];
-
-  let arguments = (ins AnyType:$value, AnyMemRef:$memref,
-                   Variadic<Index>:$indices);
-
-  let builders = [OpBuilder<
-    "Builder *, OperationState &result, Value *valueToStore, Value *memref", [{
-      result.addOperands(valueToStore);
-      result.addOperands(memref);
-  }]>];
-
-  let extraClassDeclaration = [{
-      Value *getValueToStore() { return getOperand(0); }
-
-      Value *getMemRef() { return getOperand(1); }
-      void setMemRef(Value *value) { setOperand(1, value); }
-      MemRefType getMemRefType() {
-        return getMemRef()->getType().cast<MemRefType>();
-      }
-
-      operand_range getIndices() {
-        return {operand_begin() + 2, operand_end()};
-      }
-  }];
-
-  let hasFolder = 1;
-}
-
-def SubFOp : FloatArithmeticOp<"subf"> {
-  let summary = "floating point subtraction operation";
-  let hasFolder = 1;
-}
-
-def SubIOp : IntArithmeticOp<"subi"> {
-  let summary = "integer subtraction operation";
-  let hasFolder = 1;
-}
-
-def SubViewOp : Std_Op<"subview", [AttrSizedOperandSegments, NoSideEffect]> {
-  let summary = "memref subview operation";
-  let description = [{
-    The "subview" operation converts a memref type to another memref type
-    which represents a reduced-size view of the original memref as specified by
-    the operation's offsets, sizes and strides arguments.
-
-    The SubView operation supports the following arguments:
-    *) Memref: the "base" memref on which to create a "view" memref.
-    *) Offsets: zero or memref-rank number of dynamic offsets into the "base"
-                memref at which to create the "view" memref.
-    *) Sizes: zero or memref-rank dynamic size operands which specify the
-              dynamic sizes of the result "view" memref type.
-    *) Strides: zero or memref-rank number of dynamic strides which are applied
-                multiplicatively to the base memref strides in each dimension.
-
-    Note on the number of operands for offsets, sizes and strides: For
-    each of these, the number of operands must either be same as the
-    memref-rank number or empty. For the latter, those values will be
-    treated as constants.
-
-    Example 1:
-
-      %0 = alloc() : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1)>
-
-      // Create a sub-view of "base" memref '%0' with offset arguments '%c0',
-      // dynamic sizes for each dimension, and stride arguments '%c1'.
-      %1 = subview %0[%c0, %c0][%size0, %size1][%c1, %c1]
-        : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1) > to
-          memref<?x?xf32, (d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)>
-
-    Example 2:
-
-      %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
-
-      // Create a sub-view of "base" memref '%0' with dynamic offsets, sizes,
-      // and strides.
-      // Note that dynamic offsets are represented by the linearized dynamic
-      // offset symbol 's0' in the subview memref layout map, and that the
-      // dynamic strides operands, after being applied to the base memref
-      // strides in each dimension, are represented in the view memref layout
-      // map as symbols 's1', 's2' and 's3'.
-      %1 = subview %0[%i, %j, %k][%size0, %size1, %size2][%x, %y, %z]
-        : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
-          memref<?x?x?xf32,
-            (d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>
-
-    Example 3:
-
-      %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
-
-      // Subview with constant offsets, sizes and strides.
-      %1 = subview %0[][][]
-        : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
-          memref<4x4x4xf32, (d0, d1, d2) -> (d0 * 16 + d1 * 4 + d2 + 8)>
-
-    Example 4:
-
-      %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-
-      // Subview with constant size, but dynamic offsets and
-      // strides. The resulting memref has a static shape, but if the
-      // base memref has an affine map to describe the layout, the result
-      // memref also uses an affine map to describe the layout. The
-      // strides of the result memref is computed as follows:
-      //
-      // Let #map1 represents the layout of the base memref, and #map2
-      // represents the layout of the result memref. A #mapsubview can be
-      // constructed to map an index from the result memref to the base
-      // memref (note that the description below uses more convenient
-      // naming for symbols, while in affine maps, symbols are
-      // represented as unsigned numbers that identify that symbol in the
-      // given affine map.
-      //
-      // #mapsubview = (d0, d1)[o0, o1, t0, t1] -> (d0 * t0 + o0, d1 * t1 + o1)
-      //
-      // where, o0, o1, ... are offsets, and t0, t1, ... are strides. Then,
-      //
-      // #map2 = #map1.compose(#mapsubview)
-      //
-      // If the layout map is represented as
-      //
-      // #map1 = (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)
-      //
-      // then,
-      //
-      // #map2 = (d0, d1)[s0, s1, s2, o0, o1, t0, t1] ->
-      //              (d0 * s1 * t0 + d1 * s2 * t1 + o0 * s1 + o1 * s2 + s0)
-      //
-      // Representing this canonically
-      //
-      // #map2 = (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)
-      //
-      // where, r0 = o0 * s1 + o1 * s2 + s0, r1 = s1 * t0, r2 = s2 * t1.
-      %1 = subview %0[%i, %j][][%x, %y] :
-        : memref<?x?xf32, (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)> to
-          memref<4x4xf32, (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)>
-
-      // Note that the subview op does not guarantee that the result
-      // memref is "inbounds" w.r.t to base memref. It is upto the client
-      // to ensure that the subview is accessed in a manner that is
-      // in-bounds.
-
-    }
-  }];
-
-  // TODO(b/144779634, ravishankarm) : Use different arguments for
-  // offsets, sizes and strides.
-  let arguments = (ins
-    AnyMemRef:$source,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I32ElementsAttr:$operand_segment_sizes
-  );
-  let results = (outs AnyMemRef);
-
-  let builders = [
-    OpBuilder<
-      "Builder *b, OperationState &result, Value *source, "
-      "ValueRange offsets, ValueRange sizes, "
-      "ValueRange strides, Type resultType = Type(), "
-      "ArrayRef<NamedAttribute> attrs = {}">,
-    OpBuilder<
-      "Builder *builder, OperationState &result, "
-      "Type resultType, Value *source">
-  ];
-
-  let extraClassDeclaration = [{
-    /// Returns the type of the base memref operand.
-    MemRefType getBaseMemRefType() {
-      return source()->getType().cast<MemRefType>();
-    }
-
-    /// The result of a subview is always a memref.
-    MemRefType getType() { return getResult()->getType().cast<MemRefType>(); }
-
-    /// Returns as integer value the number of offset operands.
-    int64_t getNumOffsets() { return llvm::size(offsets()); }
-
-    /// Returns as integer value the number of size operands.
-    int64_t getNumSizes() { return llvm::size(sizes()); }
-
-    /// Returns as integer value the number of stride operands.
-    int64_t getNumStrides() { return llvm::size(strides()); }
-
-    /// Returns the dynamic sizes for this subview operation if specified.
-    operand_range getDynamicSizes() { return sizes(); }
-
-    /// Returns in `staticStrides` the static value of the stride
-    /// operands. Returns failure() if the static value of the stride
-    /// operands could not be retrieved.
-    LogicalResult getStaticStrides(SmallVectorImpl<int64_t> &staticStrides);
-
-    // Auxiliary range data structure and helper function that unpacks the
-    // offset, size and stride operands of the SubViewOp into a list of triples.
-    // Such a list of triple is sometimes more convenient to manipulate.
-    struct Range {
-      Value *offset, *size, *stride;
-    };
-    SmallVector<Range, 8> getRanges();
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-def TanhOp : FloatUnaryOp<"tanh"> {
-  let summary = "hyperbolic tangent of the specified value";
-  let description = [{
-    The `tanh` operation computes the hyperbolic tangent. It takes one operand
-    and returns one result of the same type. This type may be a float scalar
-    type, a vector whose element type is float, or a tensor of floats. It has
-    no standard attributes.
-  }];
-}
-
-def TensorCastOp : CastOp<"tensor_cast"> {
-  let summary = "tensor cast operation";
-  let description = [{
-    The "tensor_cast" operation converts a tensor from one type to an equivalent
-    type without changing any data elements.  The source and destination types
-    must both be tensor types with the same element type.  If both are ranked
-    then the rank should be the same and static dimensions should match.  The
-    operation is invalid if converting to a mismatching constant dimension.
-
-    Convert from unknown rank to rank 2 with unknown dimension sizes.
-       %2 = tensor_cast %1 : tensor<*xf32> to tensor<?x?xf32>
-  }];
-
-  let arguments = (ins AnyTensor);
-  let results = (outs AnyTensor);
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-
-    /// The result of a tensor_cast is always a tensor.
-    TensorType getType() { return getResult()->getType().cast<TensorType>(); }
-  }];
-}
-
-def TensorLoadOp : Std_Op<"tensor_load",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType]> {
-  let summary = "tensor load operation";
-  let description = [{
-    The "tensor_load" operation creates a tensor from a memref, making an
-    independent copy of the element data. The result value is a tensor whose
-    shape and element type match the memref operand.
-
-    Produce a value of tensor<4x?xf32> type.
-       %12 = tensor_load %10 : memref<4x?xf32, #layout, memspace0>
-  }];
-
-  let arguments = (ins AnyMemRef);
-  let results = (outs AnyTensor);
-  // TensorLoadOp is fully verified by traits.
-  let verifier = ?;
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *memref", [{
-      auto memrefType = memref->getType().cast<MemRefType>();
-      auto resultType = RankedTensorType::get(memrefType.getShape(),
-                                              memrefType.getElementType());
-      result.addOperands(memref);
-      result.addTypes(resultType);
-  }]>];
-
-
-  let extraClassDeclaration = [{
-    /// The result of a tensor_load is always a tensor.
-    TensorType getType() { return getResult()->getType().cast<TensorType>(); }
-  }];
-}
-
-def TensorStoreOp : Std_Op<"tensor_store",
-    [SameOperandsShape, SameOperandsElementType]> {
-  let summary = "tensor store operation";
-  let description = [{
-    The "tensor_store" operation stores the contents of a tensor into a memref.
-    The first operand is a value of tensor type, the second operand is a value
-    of memref type. The shapes and element types of these must match, and are
-    specified by the memref type.
-
-    Example:
-       %9 = dim %8, 1 : tensor<4x?xf32>
-       %10 = alloc(%9) : memref<4x?xf32, #layout, memspace0>
-       tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0>
-  }];
-
-  let arguments = (ins AnyTensor:$tensor, AnyMemRef:$memref);
-  // TensorStoreOp is fully verified by traits.
-  let verifier = ?;
-}
-
-def TruncateIOp : Std_Op<"trunci", [NoSideEffect, SameOperandsAndResultShape]> {
-  let summary = "integer truncation operation";
-  let description = [{
-    The integer truncation operation takes an integer input of
-    width M and an integer destination type of width N. The destination
-    bit-width must be smaller than the input bit-width (N < M).
-    The top-most (N - M) bits of the input are discarded.
-
-      %1 = constant 21 : i5           // %1 is 0b10101
-      %2 = trunci %1 : i5 to i4       // %2 is 0b0101
-      %3 = trunci %1 : i5 to i3       // %3 is 0b101
-
-      %5 = trunci %0 : vector<2 x i32> to vector<2 x i16>
-  }];
-
-  let arguments = (ins IntegerLike:$value);
-  let results = (outs IntegerLike);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *value, Type destType", [{
-      result.addOperands(value);
-      result.addTypes(destType);
-  }]>];
-
-  let parser = [{
-    return impl::parseCastOp(parser, result);
-  }];
-  let printer = [{
-    return printStandardCastOp(this->getOperation(), p);
-  }];
-}
-
-def ViewOp : Std_Op<"view", [NoSideEffect]> {
-  let summary = "memref view operation";
-  let description = [{
-    The "view" operation converts a 1-D memref with i8 element type,
-    to an N-D memref with arbitrary element type. In addition, the ViewOp
-    supports the following arguments:
-    *) A single dynamic offset operand can be specified which represents a
-       a dynamic offset within the base 1-D memref at which to create the
-       resulting memref view.
-    *) A dynamic size operand must be specified for each dynamic dimension
-       in the resulting view memref type.
-
-    // Allocate a flat 1D/i8 memref.
-    %0 = alloc() : memref<2048xi8>
-
-    // ViewOp with static offset and sizes.
-    %1 = view %0[][] : memref<2048xi8> to memref<64x4xf32>
-
-    // ViewOp with dynamic offset and one dynamic size.
-    %2 = view %0[%offset_1024][%size0]
-      : memref<2048xi8> to memref<?x4xf32, (d0, d1)[s0] -> (d0 * 4 + d1 + s0)>
-
-    // ViewOp creating 3D shape where two of the dim sizes are dynamic.
-    // *) The dynamic offset specified in the ViewOp is applied to the
-    //    base 1-D memref, and is represented by the symbol 's0' in the
-    //    layout map of the ViewOp result memref type.
-    // *) The dynamic size for the second dimension induces a dynamic
-    //    stride for the first dimension, which is represented by the
-    //    symbol 's1' in the layout map of the ViewOp result memref type.
-    //    Note that this dynamic stride will be computed from the view
-    //    shape and dynamic sizes.
-    %3 = view %0[%offset_1024][%size0, %size1]
-      : memref<2048xi8> to memref<?x?x4xf32,
-        (d0, d1, d2)[s0, s1] -> (d0 * s1 + d1 * 4 + d2 + s0)>
-  }];
-
-  let arguments = (ins MemRefRankOf<[I8], [1]>:$source,
-                       Variadic<Index>:$operands);
-  let results = (outs AnyMemRef);
-
-  let extraClassDeclaration = [{
-    /// The result of a view is always a memref.
-    MemRefType getType() { return getResult()->getType().cast<MemRefType>(); }
-
-    /// Returns the dynamic offset for this view operation if specified.
-    /// Returns nullptr if no dynamic offset was specified.
-    Value *getDynamicOffset();
-
-    /// Returns the starting operand list position of the dynamic size operands.
-    unsigned getDynamicSizesOperandStart() {
-      return getDynamicOffset() == nullptr ? 1 : 2;
-    }
-
-    /// Returns the dynamic sizes for this view operation.
-    operand_range getDynamicSizes() {
-      return {operand_begin() + getDynamicSizesOperandStart(), operand_end()};
-    }
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-def XOrOp : IntArithmeticOp<"xor", [Commutative]> {
-  let summary = "integer binary xor";
-  let hasFolder = 1;
-}
-
-def ZeroExtendIOp : Std_Op<"zexti", [NoSideEffect, SameOperandsAndResultShape]> {
-  let summary = "integer zero extension operation";
-  let description = [{
-    The integer zero extension operation takes an integer input of
-    width M and an integer destination type of width N. The destination
-    bit-width must be larger than the input bit-width (N > M).
-    The top-most (N - M) bits of the output are filled with zeros.
-
-      %1 = constant 5 : i3            // %1 is 0b101
-      %2 = zexti %1 : i3 to i6        // %2 is 0b000101
-      %3 = constant 2 : i3            // %3 is 0b010
-      %4 = zexti %3 : i3 to i6        // %4 is 0b000010
-
-      %5 = zexti %0 : vector<2 x i32> to vector<2 x i64>
-  }];
-
-  let arguments = (ins IntegerLike:$value);
-  let results = (outs IntegerLike);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *value, Type destType", [{
-      result.addOperands(value);
-      result.addTypes(destType);
-  }]>];
-
-  let parser = [{
-    return impl::parseCastOp(parser, result);
-  }];
-  let printer = [{
-    return printStandardCastOp(this->getOperation(), p);
-  }];
-}
-
-#endif // STANDARD_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/Traits.h b/third_party/mlir/include/mlir/Dialect/Traits.h
deleted file mode 100644
index e04eb829e88..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Traits.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//===- Traits.h - Common op traits shared by dialects -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares common op traits that are not core to MLIR but can be
-// shared by multiple dialects.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_TRAITS
-#define MLIR_DIALECT_TRAITS
-
-#include "mlir/IR/OpDefinition.h"
-
-namespace mlir {
-namespace OpTrait {
-
-// These functions are out-of-line implementations of the methods in the
-// corresponding trait classes.  This avoids them being template
-// instantiated/duplicated.
-namespace impl {
-LogicalResult verifyCompatibleOperandBroadcast(Operation *op);
-} // namespace impl
-
-namespace util {
-/// Returns true and sets `resultShape` to the broadcasted shape from the two
-/// given shapes if they are broadcast compatible. Returns false and clears
-/// `resultShape` otherwise.
-///
-/// The rules for determining the result shape are:
-///
-/// Zip together the dimensions in the two given shapes by prepending the shape
-/// with less dimensions with 1s. For each dimension pair, deduces the result
-/// dimension according to the following order:
-/// - If there are unknown dimensions, follows the TensorFlow behavior:
-///   - If either dimension is greater than 1, we assume that the program is
-///     correct, and the other dimension will be broadcast to match it.
-///   - If either dimension is 1, the other dimension is the result.
-///   - Otherwise, the result dimension is unknown dimension.
-/// - If one of the dimension is 1, the other dimension is the result.
-/// - If two dimensions are the same, that's the result.
-/// - Otherwise, incompatible shape.
-bool getBroadcastedShape(ArrayRef<int64_t> shape1, ArrayRef<int64_t> shape2,
-                         SmallVectorImpl<int64_t> &resultShape);
-
-/// Returns the result broadcast composition type from the two given types by
-/// following NumPy broadcast semantics. Returned type may have dynamic shape if
-/// either of the input types has dynamic shape. Returns null type if the two
-/// given types are not broadcast-compatible.
-Type getBroadcastedType(Type type1, Type type2);
-} // namespace util
-
-/// This class provides the API for ops that are known to have broadcast-
-/// compatible operand and result types. Specifically,  starting from the
-/// most varying dimension, each dimension pair of the two operands' types
-/// should either be the same or one of them is one. Also, the result type
-/// should have the corresponding dimension equal to the larger one, if known.
-/// Shapes are checked partially if ranks or dimensions are not known. For
-/// example, an op with tensor<? x 2 x f32> and tensor <2 x f32> as operand
-/// types and tensor<3 x 2 x f32> as the result type is broadcast-compatible.
-///
-/// Ths trait assumes the op has two operands and one result, and it asserts
-/// if the pre-condition is not satisfied.
-template <typename ConcreteType>
-class BroadcastableTwoOperandsOneResult
-    : public TraitBase<ConcreteType, BroadcastableTwoOperandsOneResult> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyCompatibleOperandBroadcast(op);
-  }
-};
-
-} // end namespace OpTrait
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_TRAITS
diff --git a/third_party/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/third_party/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h
deleted file mode 100644
index b7e3990a333..00000000000
--- a/third_party/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h
+++ /dev/null
@@ -1,114 +0,0 @@
-//===- StructuredOpsUtils.h - Utilities used by structured ops --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file define utilities that operate on standard types and are
-// useful across multiple dialects that use structured ops abstractions. These
-// abstractions consist of define custom operations that encode and transport
-// information about their semantics (e.g. type of iterators like parallel,
-// reduction, etc..) as attributes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_UTILS_STRUCTUREDOPSUTILS_H
-#define MLIR_DIALECT_UTILS_STRUCTUREDOPSUTILS_H
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace mlir {
-/// Attribute name for the AffineArrayAttr which encodes the relationship
-/// between a structured op iterators' and its operands.
-static constexpr StringLiteral getIndexingMapsAttrName() {
-  return StringLiteral("indexing_maps");
-}
-
-/// Attribute name for the StrArrayAttr which encodes the type of a structured
-/// op's iterators.
-static constexpr StringLiteral getIteratorTypesAttrName() {
-  return StringLiteral("iterator_types");
-}
-
-/// Attribute name for the IntegerAttr which encodes the number of input buffer
-/// arguments.
-static constexpr StringLiteral getArgsInAttrName() {
-  return StringLiteral("args_in");
-}
-
-/// Attribute name for the IntegerAttr which encodes the number of input buffer
-/// arguments.
-static constexpr StringLiteral getArgsOutAttrName() {
-  return StringLiteral("args_out");
-}
-
-/// Attribute name for the StringAttr which encodes an optional documentation
-/// string of the structured op.
-static constexpr StringLiteral getDocAttrName() { return StringLiteral("doc"); }
-
-/// Attribute name for the StrArrayAttr which encodes the SymbolAttr for the
-/// MLIR function that implements the body of the structured op.
-static constexpr StringLiteral getFunAttrName() { return StringLiteral("fun"); }
-
-/// Attribute name for the StrArrayAttr which encodes the external library
-/// function that implements the structured op.
-static constexpr StringLiteral getLibraryCallAttrName() {
-  return StringLiteral("library_call");
-}
-
-/// Use to encode that a particular iterator type has parallel semantics.
-inline static constexpr StringLiteral getParallelIteratorTypeName() {
-  return StringLiteral("parallel");
-}
-
-/// Use to encode that a particular iterator type has reduction semantics.
-inline static constexpr StringLiteral getReductionIteratorTypeName() {
-  return StringLiteral("reduction");
-}
-
-/// Use to encode that a particular iterator type has window semantics.
-inline static constexpr StringLiteral getWindowIteratorTypeName() {
-  return StringLiteral("window");
-}
-
-/// Use to encode that a particular iterator type has window semantics.
-inline static ArrayRef<StringRef> getAllIteratorTypeNames() {
-  static StringRef names[3] = {getParallelIteratorTypeName(),
-                               getReductionIteratorTypeName(),
-                               getWindowIteratorTypeName()};
-  return llvm::makeArrayRef(names);
-}
-
-/// Returns the iterator of a certain type.
-inline unsigned getNumIterators(StringRef name, ArrayAttr iteratorTypes) {
-  auto names = getAllIteratorTypeNames();
-  (void)names;
-  assert(llvm::is_contained(names, name));
-  return llvm::count_if(iteratorTypes, [name](Attribute a) {
-    return a.cast<StringAttr>().getValue() == name;
-  });
-}
-
-inline unsigned getNumIterators(ArrayAttr iteratorTypes) {
-  unsigned res = 0;
-  for (auto n : getAllIteratorTypeNames())
-    res += getNumIterators(n, iteratorTypes);
-  return res;
-}
-
-} // end namespace mlir
-
-#endif // MLIR_UTILS_STRUCTUREDOPSUTILS_H
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/CMakeLists.txt b/third_party/mlir/include/mlir/Dialect/VectorOps/CMakeLists.txt
deleted file mode 100644
index c165c5e676d..00000000000
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_mlir_dialect(VectorOps)
-
-set(LLVM_TARGET_DEFINITIONS VectorTransformPatterns.td)
-mlir_tablegen(VectorTransformPatterns.h.inc -gen-rewriters)
-add_public_tablegen_target(MLIRVectorTransformPatternsIncGen)
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/Utils.h b/third_party/mlir/include/mlir/Dialect/VectorOps/Utils.h
deleted file mode 100644
index 2cff8795304..00000000000
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/Utils.h
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- Utils.h - VectorOps Utils ----------------------------*- C++ -*-=======//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_DIALECT_VECTOROPS_UTILS_H_
-#define MLIR_DIALECT_VECTOROPS_UTILS_H_
-
-#include "mlir/Support/LLVM.h"
-
-#include "llvm/ADT/DenseMap.h"
-
-namespace mlir {
-
-class AffineApplyOp;
-class AffineForOp;
-class AffineMap;
-class Location;
-class MemRefType;
-class OpBuilder;
-class Operation;
-class Value;
-class VectorType;
-
-/// Computes and returns the multi-dimensional ratio of `superShape` to
-/// `subShape`. This is calculated by performing a traversal from minor to major
-/// dimensions (i.e. in reverse shape order). If integral division is not
-/// possible, returns None.
-/// The ArrayRefs are assumed (and enforced) to only contain > 1 values.
-/// This constraint comes from the fact that they are meant to be used with
-/// VectorTypes, for which the property holds by construction.
-///
-/// Examples:
-///   - shapeRatio({3, 4, 5, 8}, {2, 5, 2}) returns {3, 2, 1, 4}
-///   - shapeRatio({3, 4, 4, 8}, {2, 5, 2}) returns None
-///   - shapeRatio({1, 2, 10, 32}, {2, 5, 2}) returns {1, 1, 2, 16}
-llvm::Optional<llvm::SmallVector<int64_t, 4>>
-shapeRatio(ArrayRef<int64_t> superShape, ArrayRef<int64_t> subShape);
-
-/// Computes and returns the multi-dimensional ratio of the shapes of
-/// `superVector` to `subVector`. If integral division is not possible, returns
-/// None.
-/// Assumes and enforces that the VectorTypes have the same elemental type.
-llvm::Optional<llvm::SmallVector<int64_t, 4>>
-shapeRatio(VectorType superVectorType, VectorType subVectorType);
-
-/// Constructs a permutation map of invariant memref indices to vector
-/// dimension.
-///
-/// If no index is found to be invariant, 0 is added to the permutation_map and
-/// corresponds to a vector broadcast along that dimension.
-///
-/// The implementation uses the knowledge of the mapping of loops to
-/// vector dimension. `loopToVectorDim` carries this information as a map with:
-///   - keys representing "vectorized enclosing loops";
-///   - values representing the corresponding vector dimension.
-/// Note that loopToVectorDim is a whole function map from which only enclosing
-/// loop information is extracted.
-///
-/// Prerequisites: `opInst` is a vectorizable load or store operation (i.e. at
-/// most one invariant index along each AffineForOp of `loopToVectorDim`).
-///
-/// Example 1:
-/// The following MLIR snippet:
-///
-/// ```mlir
-///    affine.for %i3 = 0 to %0 {
-///      affine.for %i4 = 0 to %1 {
-///        affine.for %i5 = 0 to %2 {
-///          %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
-///    }}}
-/// ```
-///
-/// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
-///
-/// ```mlir
-///    affine.for %i3 = 0 to %0 step 32 {
-///      affine.for %i4 = 0 to %1 {
-///        affine.for %i5 = 0 to %2 step 256 {
-///          %4 = vector.transfer_read %arg0, %i4, %i5, %i3
-///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
-///               (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
-///    }}}
-/// ```
-///
-/// Meaning that vector.transfer_read will be responsible for reading the slice:
-/// `%arg0[%i4, %i5:%15+256, %i3:%i3+32]` into vector<32x256xf32>.
-///
-/// Example 2:
-/// The following MLIR snippet:
-///
-/// ```mlir
-///    %cst0 = constant 0 : index
-///    affine.for %i0 = 0 to %0 {
-///      %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
-///    }
-/// ```
-///
-/// may vectorize with {permutation_map: (d0) -> (0)} into:
-///
-/// ```mlir
-///    affine.for %i0 = 0 to %0 step 128 {
-///      %3 = vector.transfer_read %arg0, %c0_0, %c0_0
-///           {permutation_map: (d0, d1) -> (0)} :
-///           (memref<?x?xf32>, index, index) -> vector<128xf32>
-///    }
-/// ````
-///
-/// Meaning that vector.transfer_read will be responsible of reading the slice
-/// `%arg0[%c0, %c0]` into vector<128xf32> which needs a 1-D vector broadcast.
-///
-AffineMap makePermutationMap(
-    Operation *op, ArrayRef<Value *> indices,
-    const llvm::DenseMap<Operation *, unsigned> &loopToVectorDim);
-
-namespace matcher {
-
-/// Matches vector.transfer_read, vector.transfer_write and ops that return a
-/// vector type that is a multiple of the sub-vector type. This allows passing
-/// over other smaller vector types in the function and avoids interfering with
-/// operations on those.
-/// This is a first approximation, it can easily be extended in the future.
-/// TODO(ntv): this could all be much simpler if we added a bit that a vector
-/// type to mark that a vector is a strict super-vector but it still does not
-/// warrant adding even 1 extra bit in the IR for now.
-bool operatesOnSuperVectorsOf(Operation &op, VectorType subVectorType);
-
-} // end namespace matcher
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_VECTOROPS_UTILS_H_
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.h b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.h
deleted file mode 100644
index 06672c7ea73..00000000000
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.h
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- VectorOps.h - MLIR Super Vectorizer Operations -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the Vector dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_VECTOROPS_VECTOROPS_H
-#define MLIR_DIALECT_VECTOROPS_VECTOROPS_H
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-
-namespace mlir {
-class MLIRContext;
-class OwningRewritePatternList;
-namespace vector {
-
-/// Dialect for Ops on higher-dimensional vector types.
-class VectorOpsDialect : public Dialect {
-public:
-  VectorOpsDialect(MLIRContext *context);
-  static StringRef getDialectNamespace() { return "vector"; }
-
-  /// Materialize a single constant operation from a given attribute value with
-  /// the desired resultant type.
-  Operation *materializeConstant(OpBuilder &builder, Attribute value, Type type,
-                                 Location loc) override;
-};
-
-/// Collect a set of vector-to-vector canonicalization patterns.
-void populateVectorToVectorCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context);
-
-/// Collect a set of vector-to-vector transformation patterns.
-void populateVectorToVectorTransformationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context);
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/VectorOps/VectorOps.h.inc"
-
-} // end namespace vector
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_VECTOROPS_VECTOROPS_H
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.td b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
deleted file mode 100644
index eb05821952d..00000000000
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
+++ /dev/null
@@ -1,938 +0,0 @@
-//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines MLIR vector operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef VECTOR_OPS
-#define VECTOR_OPS
-
-include "mlir/IR/OpBase.td"
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
-
-def Vector_Dialect : Dialect {
-  let name = "vector";
-  let cppNamespace = "vector";
-}
-
-// Base class for Vector dialect ops.
-class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<Vector_Dialect, mnemonic, traits> {
-  // For every vector op, there needs to be a:
-  //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
-  //   * LogicalResult verify(${C++ class of Op} op)
-  //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
-  //                                         OperationState &result)
-  // functions.
-  let printer = [{ return ::print(p, *this); }];
-  let verifier = [{ return ::verify(*this); }];
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-}
-
-// TODO(andydavis, ntv) Add an attribute to specify a different algebra
-// with operators other than the current set: {*, +}.
-def Vector_ContractionOp :
-  Vector_Op<"contract", [NoSideEffect]>,
-    Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc,
-               Variadic<VectorOf<[I1]>>:$masks,
-               AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types)>,
-    Results<(outs AnyVector)> {
-  let summary = "vector contraction operation";
-  let description = [{
-    Computes the sum of products of vector elements along contracting
-    dimension pairs from 2 vectors of rank M and N respectively, adds this
-    intermediate result to the accumulator argument of rank K, and returns a
-    vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
-    num_batch_dims (see dimension type descriptions below)).
-
-    Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp)
-    specify the dynamic dimension sizes of valid data within the lhs/rhs vector
-    arguments.
-
-    An iterator type attribute list must be specified, where each element of
-    the list represents an iterator with one of the following types:
-
-    *) "reduction": reduction dimensions are present in the lhs and rhs
-                    arguments but not in the output (or optional accumulator
-                    argument). These are the dimensions along which the vector
-                    contraction op computes the sum of products, and
-                    contracting dimension pair dimension sizes must match
-                    between lhs/rhs.
-    *) "parallel": Batch dimensions are iterator type "parallel", and
-                   are non-contracting dimensions present in the lhs, rhs and
-                   output. The lhs/rhs co-iterate along the batch dimensions,
-                   which should be expressed in their indexing maps.
-
-                   Free dimensions are iterator type "parallel", and are
-                   non-contraction, non-batch dimensions accessed by either the
-                   lhs or rhs (but not both). The lhs and rhs free dimensions
-                   are unrelated to each other and do not co-iterate, which
-                   should be expressed in their indexing maps.
-
-    An indexing map attribute list must be specified with an entry for lhs, rhs
-    and acc arguments. An indexing map attribute specifies a mapping from each
-    iterator in the iterator type list, to each dimension of an N-D vector.
-
-    Examples:
-
-      // 2D vector contraction with one contracting dimension (matmul).
-      #contraction_accesses = [
-        (i, j, k) -> (i, k),
-        (i, j, k) -> (k, j),
-        (i, j, k) -> (i, j)
-      ]
-      #contraction_trait = {
-        indexing_maps = #contraction_accesses,
-        iterator_types = [parallel, parallel, reduction]
-      }
-
-      %3 = vector.contract #contraction_trait %0, %1, %2
-        : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
-
-      // 4D to 3D vector contraction with two contracting dimensions and
-      // one batch dimension.
-      #contraction_accesses = [
-        (b0, f0, f1, c0, c1) -> (c0, b0, c1, f0),
-        (b0, f0, f1, c0, c1) -> (b0, c1, c0, f1),
-        (b0, f0, f1, c0, c1) -> (b0, f0, f1)
-      ]
-      #contraction_trait = {
-        indexing_maps = #contraction_accesses,
-        iterator_types = [parallel, parallel, parallel reduction, reduction]
-      }
-
-      %4 = vector.contract #contraction_trait %0, %1, %2
-          : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
-
-      // 4D vector contraction with two contracting dimensions and optional
-      // vector mask arguments.
-      %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
-      %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
-
-      %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
-         : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
-  }];
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *lhs, Value *rhs, "
-    "Value *acc, ArrayAttr indexingMaps, ArrayAttr iteratorTypes">];
-  let extraClassDeclaration = [{
-    VectorType getLhsType() {
-      return lhs()->getType().cast<VectorType>();
-    }
-    VectorType getRhsType() {
-      return rhs()->getType().cast<VectorType>();
-    }
-    VectorType getAccType() {
-      return acc()->getType().cast<VectorType>();
-    }
-    VectorType getLHSVectorMaskType() {
-      if (llvm::size(masks()) != 2) return VectorType();
-      return getOperand(3)->getType().cast<VectorType>();
-    }
-    VectorType getRHSVectorMaskType() {
-      if (llvm::size(masks()) != 2) return VectorType();
-      return getOperand(4)->getType().cast<VectorType>();
-    }
-    VectorType getResultType() {
-      return getResult()->getType().cast<VectorType>();
-    }
-    ArrayRef<StringRef> getTraitAttrNames();
-    SmallVector<AffineMap, 4> getIndexingMaps();
-    static unsigned getAccOperandIndex() { return 2; }
-
-    // Returns the bounds of each dimension in the iteration space spanned
-    // by the iterator types of this operation.
-    void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);
-
-    // Returns a list of index maps, where there is a list entry for each
-    // op indexing map attribute (i.e. one for each input and output, with
-    // the output listed last). Each index map, maps from this operations
-    // iteration space, to vector dimensions of the maps input/output.
-    void getIterationIndexMap(
-      std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);
-
-    std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
-    std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
-  }];
-}
-
-def Vector_BroadcastOp :
-  Vector_Op<"broadcast", [NoSideEffect,
-     PredOpTrait<"source operand and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
-    Arguments<(ins AnyType:$source)>,
-    Results<(outs AnyVector:$vector)> {
-  let summary = "broadcast operation";
-  let description = [{
-    Broadcasts the scalar or k-D vector value in the source operand
-    to a n-D result vector such that the broadcast makes sense, i.e.,
-    the source operand is duplicated to match the given rank and sizes
-    in the result vector. The legality rules are:
-    * the source operand must have the same element type as the result type
-    * a k-D vector <s_1 x .. x s_k x type> can be broadcast to
-      a n-D vector <t_1 x .. x t_n x type> if
-       * k <= n, and
-       * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n
-          match exactly as s_j = t_i or s_j = 1:
-       ```
-           t_1 x   ..  t_n-k x t_n-k+1 x .. x t_i x .. x t_n
-                               s_1     x .. x s_j x .. x s_k
-               <duplication>         <potential stretch>
-       ```
-    The source operand is duplicated over all the missing leading dimensions
-    and stretched over the trailing dimensions where the source has a non-equal
-    dimension of 1. These rules imply that any scalar broadcast (k=0) to any
-    shaped vector with the same element type is always legal.
-
-    Examples:
-    ```
-      %0 = constant 0.0 : f32
-      %1 = vector.broadcast %0 : f32 to vector<16xf32>
-      %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
-    ```
-  }];
-  let extraClassDeclaration = [{
-    Type getSourceType() { return source()->getType(); }
-    VectorType getVectorType() {
-      return vector()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_ShuffleOp :
-  Vector_Op<"shuffle", [NoSideEffect,
-     PredOpTrait<"first operand v1 and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>,
-     PredOpTrait<"second operand v2 and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 1>>]>,
-     Arguments<(ins AnyVector:$v1, AnyVector:$v2, I32ArrayAttr:$mask)>,
-     Results<(outs AnyVector:$vector)> {
-  let summary = "shuffle operation";
-  let description = [{
-    The shuffle operation constructs a permutation (or duplication) of elements
-    from two input vectors, returning a vector with the same element type as
-    the input and a length that is the same as the shuffle mask. The two input
-    vectors must have the same element type, rank, and trailing dimension sizes
-    and shuffles their values in the leading dimension (which may differ in size)
-    according to the given mask. The legality rules are:
-    * the two operands must have the same element type as the result
-    * the two operands and the result must have the same rank and trailing
-      dimension sizes, viz. given two k-D operands
-              v1 : <s_1 x s_2 x .. x s_k x type> and
-              v2 : <t_1 x t_2 x .. x t_k x type>
-      we have s_i = t_i for all 1 < i <= k
-    * the mask length equals the leading dimension size of the result
-    * numbering the input vector indices left to right accross the operands, all
-      mask values must be within range, viz. given two k-D operands v1 and v2
-      above, all mask values are in the range [0,s_1+t_1)
-
-    Examples:
-    ```
-    %0 = vector.shuffle %a, %b[0:i32, 3:i32]
-               : vector<2xf32>, vector<2xf32>       ; yields vector<2xf32>
-    %1 = vector.shuffle %c, %b[0:i32, 1:i32, 2:i32]
-               : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
-    %2 = vector.shuffle %a, %b[3:i32, 2:i32, 1:i32 : 0:i32]
-               : vector<2xf32>, vector<2xf32>       ; yields vector<4xf32>
-
-    ```
-  }];
-  let builders = [OpBuilder<"Builder *builder, OperationState &result, Value *v1, Value *v2, ArrayRef<int32_t>">];
-  let extraClassDeclaration = [{
-    static StringRef getMaskAttrName() { return "mask"; }
-    VectorType getV1VectorType() {
-      return v1()->getType().cast<VectorType>();
-    }
-    VectorType getV2VectorType() {
-      return v2()->getType().cast<VectorType>();
-    }
-    VectorType getVectorType() {
-      return vector()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_ExtractElementOp :
-  Vector_Op<"extractelement", [NoSideEffect,
-     PredOpTrait<"operand and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
-    Arguments<(ins AnyVector:$vector, Index:$position)>,
-    Results<(outs AnyType)> {
-  let summary = "extractelement operation";
-  let description = [{
-    Takes an 1-D vector and a dynamic index position and extracts the
-    scalar at that position. Note that this instruction resembles
-    vector.extract, but is restricted to 1-D vectors and relaxed
-    to dynamic indices. It is meant to be closer to LLVM's version:
-    https://llvm.org/docs/LangRef.html#extractelement-instruction
-
-    Example:
-    ```
-      %c = constant 15 : i32
-      %1 = vector.extractelement %0[%c : i32]: vector<16xf32>
-    ```
-  }];
-  let extraClassDeclaration = [{
-    VectorType getVectorType() {
-      return vector()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_ExtractOp :
-  Vector_Op<"extract", [NoSideEffect,
-     PredOpTrait<"operand and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
-    Arguments<(ins AnyVector:$vector, I32ArrayAttr:$position)>,
-    Results<(outs AnyType)> {
-  let summary = "extract operation";
-  let description = [{
-    Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
-    the proper position. Degenerates to an element type in the 0-D case.
-
-    Examples:
-    ```
-      %1 = vector.extract %0[3]: vector<4x8x16xf32>
-      %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
-    ```
-  }];
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *source, ArrayRef<int32_t>">];
-  let extraClassDeclaration = [{
-    static StringRef getPositionAttrName() { return "position"; }
-    VectorType getVectorType() {
-      return vector()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_ExtractSlicesOp :
-  Vector_Op<"extract_slices", [NoSideEffect]>,
-    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$sizes,
-                   I64ArrayAttr:$strides)>,
-    Results<(outs TupleOf<[AnyVector]>)> {
-  let summary = "vector extract slices operation";
-  let description = [{
-    Takes an N-d vector and returns a tuple of vector slices of 'vector',
-    based on 'sizes' and 'strides' parameters.
-
-    The arguments 'sizes' and 'strides' represent a specification for
-    generating the unrolling of 'vector' shape, which has all slices of shape
-    'sizes' except for slices at dimension boundaries when 'vector' dimension
-    sizes are not a multiple of 'sizes'.
-
-    Each slice is returned at the tuple element index corresponding to the
-    linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
-    Currently, only unit strides are supported.
-
-    Examples:
-    ```
-      %0 = vector.transfer_read ...: vector<4x2xf32>
-
-      %1 = vector.extract_slices %0, [2, 2], [1, 1]
-        : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
-
-      // Example with partial slices at dimension boundaries.
-      %2 = vector.transfer_read ...: vector<4x3xf32>
-
-      %3 = vector.extract_slices %2, [2, 2], [1, 1]
-        : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
-                                     vector<2x2xf32>, vector<2x2xf32>>
-    ```
-  }];
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, TupleType tupleType, " #
-    "Value *vector, ArrayRef<int64_t> sizes, " #
-    "ArrayRef<int64_t> strides">];
-  let extraClassDeclaration = [{
-    VectorType getSourceVectorType() {
-      return vector()->getType().cast<VectorType>();
-    }
-    TupleType getResultTupleType() {
-      return getResult()->getType().cast<TupleType>();
-    }
-    void getSizes(SmallVectorImpl<int64_t> &results);
-    void getStrides(SmallVectorImpl<int64_t> &results);
-    static StringRef getSizesAttrName() { return "sizes"; }
-    static StringRef getStridesAttrName() { return "strides"; }
-  }];
-}
-
-def Vector_InsertElementOp :
-  Vector_Op<"insertelement", [NoSideEffect,
-     PredOpTrait<"source operand and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>,
-     PredOpTrait<"dest operand and result have same type",
-                 TCresIsSameAsOpBase<0, 1>>]>,
-     Arguments<(ins AnyType:$source, AnyVector:$dest, Index:$position)>,
-     Results<(outs AnyVector)> {
-  let summary = "insertelement operation";
-  let description = [{
-    Takes a scalar source, an 1-D destination vector and a dynamic index
-    position and inserts the source into the destination at the proper
-    position.  Note that this instruction resembles vector.insert, but
-    is restricted to 1-D vectors and relaxed to dynamic indices. It is
-    meant to be closer to LLVM's version:
-    https://llvm.org/docs/LangRef.html#insertelement-instruction
-
-    Example:
-    ```
-      %c = constant 15 : i32
-      %f = constant 0.0f : f32
-      %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
-    ```
-  }];
-  let extraClassDeclaration = [{
-    Type getSourceType() { return source()->getType(); }
-    VectorType getDestVectorType() {
-      return dest()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_InsertOp :
-  Vector_Op<"insert", [NoSideEffect,
-     PredOpTrait<"source operand and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>,
-     PredOpTrait<"dest operand and result have same type",
-                 TCresIsSameAsOpBase<0, 1>>]>,
-     Arguments<(ins AnyType:$source, AnyVector:$dest, I32ArrayAttr:$position)>,
-     Results<(outs AnyVector)> {
-  let summary = "insert operation";
-  let description = [{
-    Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
-    and inserts the n-D source into the (n+k)-D destination at the proper
-    position. Degenerates to a scalar source type when n = 0.
-
-    Examples:
-    ```
-      %2 = vector.insert %0, %1[3 : i32]:
-        vector<8x16xf32> into vector<4x8x16xf32>
-      %5 = vector.insert %3, %4[3 : i32, 3 : i32, 3 : i32]:
-        f32 into vector<4x8x16xf32>
-    ```
-  }];
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *source, " #
-    "Value *dest, ArrayRef<int32_t>">];
-  let extraClassDeclaration = [{
-    static StringRef getPositionAttrName() { return "position"; }
-    Type getSourceType() { return source()->getType(); }
-    VectorType getDestVectorType() {
-      return dest()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_InsertStridedSliceOp :
-  Vector_Op<"insert_strided_slice", [NoSideEffect,
-    PredOpTrait<"operand #0 and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>,
-    PredOpTrait<"dest operand and result have same type",
-                 TCresIsSameAsOpBase<0, 1>>]>,
-    Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets,
-               I64ArrayAttr:$strides)>,
-    Results<(outs AnyVector)> {
-  let summary = "strided_slice operation";
-  let description = [{
-    Takes a k-D source vector, an n-D destination vector (n >= k), n-D `offsets`
-    integer array attribute, a k-D `strides` integer array attribute and inserts
-    the k-D source vector as a strided subvector at the proper offset into the
-    n-D destination vector.
-
-    At the moment strides must contain only 1s.
-
-    Returns an n-D vector that is a copy of the n-D destination vector in which
-    the last k-D dimensions contain the k-D source vector elements strided at
-    the proper location as specified by the offsets.
-
-    Examples:
-    ```
-      %2 = vector.insert_strided_slice %0, %1
-          {offsets : [0, 0, 2], strides : [1, 1]}:
-        vector<2x4xf32> into vector<16x4x8xf32>
-    ```
-  }];
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *source, Value *dest, " #
-    "ArrayRef<int64_t> offsets, ArrayRef<int64_t> strides">];
-  let extraClassDeclaration = [{
-    static StringRef getOffsetsAttrName() { return "offsets"; }
-    static StringRef getStridesAttrName() { return "strides"; }
-    VectorType getSourceVectorType() {
-      return source()->getType().cast<VectorType>();
-    }
-    VectorType getDestVectorType() {
-      return dest()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_OuterProductOp :
-  Vector_Op<"outerproduct", [NoSideEffect, SameOperandsAndResultElementType]>,
-    Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, Variadic<AnyVector>:$acc)>,
-    Results<(outs AnyVector)> {
-  let summary = "vector outerproduct with optional fused add";
-  let description = [{
-    Takes 2 1-D vectors and returns the 2-D vector containing the outer product.
-
-    An optional extra 2-D vector argument may be specified in which case the
-    operation returns the sum of the outer product and the extra vector. When
-    lowered to the LLVMIR dialect, this form emits `llvm.intr.fmuladd`, which
-    can lower to actual `fma` instructions in LLVM.
-
-    Examples
-
-      %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
-      return %2: vector<4x8xf32>
-
-      %3 = vector.outerproduct %0, %1, %2:
-        vector<4xf32>, vector<8xf32>, vector<4x8xf32>
-      return %3: vector<4x8xf32>
-  }];
-  let extraClassDeclaration = [{
-    VectorType getOperandVectorTypeLHS() {
-      return lhs()->getType().cast<VectorType>();
-    }
-    VectorType getOperandVectorTypeRHS() {
-      return rhs()->getType().cast<VectorType>();
-    }
-    VectorType getOperandVectorTypeACC() {
-      return (llvm::size(acc()) == 0) ? VectorType() :
-        (*acc().begin())->getType().cast<VectorType>();
-    }
-    VectorType getVectorType() {
-      return getResult()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_StridedSliceOp :
-  Vector_Op<"strided_slice", [NoSideEffect,
-    PredOpTrait<"operand and result have same element type",
-                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
-    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets,
-               I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>,
-    Results<(outs AnyVector)> {
-  let summary = "strided_slice operation";
-  let description = [{
-    Takes an n-D vector, k-D `offsets` integer array attribute, a k-D `sizes`
-    integer array attribute, a k-D `strides` integer array attribute and
-    extracts the n-D subvector at the proper offset.
-
-    At the moment strides must contain only 1s.
-    // TODO(ntv) support non-1 strides.
-
-    Returns an n-D vector where the first k-D dimensions match the `sizes`
-    attribute. The returned subvector contains the elements starting at offset
-    `offsets` and ending at `offsets + sizes`.
-
-    Examples:
-    ```
-      %1 = vector.strided_slice %0
-          {offsets : [0, 2], sizes : [2, 4], strides : [1, 1]}:
-        vector<4x8x16xf32> to vector<2x4x16xf32>
-    ```
-
-    // TODO(ntv) Evolve to a range form syntax similar to:
-    %1 = vector.strided_slice %0[0:2:1][2:4:1]
-      vector<4x8x16xf32> to vector<2x4x16xf32>
-  }];
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *source, " #
-    "ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes, " #
-    "ArrayRef<int64_t> strides">];
-  let extraClassDeclaration = [{
-    static StringRef getOffsetsAttrName() { return "offsets"; }
-    static StringRef getSizesAttrName() { return "sizes"; }
-    static StringRef getStridesAttrName() { return "strides"; }
-    VectorType getVectorType(){ return vector()->getType().cast<VectorType>(); }
-    void getOffsets(SmallVectorImpl<int64_t> &results);
-  }];
-  let hasCanonicalizer = 1;
-}
-
-def Vector_TransferReadOp :
-  Vector_Op<"transfer_read">,
-    Arguments<(ins AnyMemRef:$memref, Variadic<Index>:$indices,
-               AffineMapAttr:$permutation_map, AnyType:$padding)>,
-    Results<(outs AnyVector:$vector)> {
-
-  let summary = "Reads a supervector from memory into an SSA vector value.";
-
-  let description = [{
-    The `vector.transfer_read` op performs a blocking read from a slice within
-    a scalar [MemRef](../LangRef.md#memref-type) supplied as its first operand
-    into a [vector](../LangRef.md#vector-type) of the same elemental type. The
-    slice is further defined by a full-rank index within the MemRef, supplied as
-    the operands `2 .. 1 + rank(memref)`. The permutation_map
-    [attribute](../LangRef.md#attributes) is an
-    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
-    slice to match the vector shape. The size of the slice is specified by the
-    size of the vector, given as the return type. An `ssa-value` of the same
-    elemental type as the MemRef is provided as the last operand to specify
-    padding in the case of out-of-bounds accesses. This operation is called
-    'read' by opposition to 'load' because the super-vector granularity is
-    generally not representable with a single hardware register.
-    A `vector.transfer_read` is thus a mid-level
-    abstraction that supports super-vectorization with non-effecting padding for
-    full-tile-only code.
-
-    More precisely, let's dive deeper into the permutation_map for the following
-    MLIR:
-
-    ```mlir
-    vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4]
-      { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } :
-      memref<?x?x?x?xf32>, vector<3x4x5xf32>
-    ```
-
-    This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3,
-    %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice
-    is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]`
-
-    That slice needs to be read into a `vector<3x4x5xf32>`. Since the
-    permutation map is not full rank, there must be a broadcast along vector
-    dimension `1`.
-
-    A notional lowering of vector.transfer_read could generate code resembling:
-
-    ```mlir
-    // %expr1, %expr2, %expr3, %expr4 defined before this point
-    %tmp = alloc() : vector<3x4x5xf32>
-    %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
-    for %i = 0 to 3 {
-      affine.for %j = 0 to 4 {
-        affine.for %k = 0 to 5 {
-          %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
-            memref<?x?x?x?xf32>
-          store %tmp[%i, %j, %k] : vector<3x4x5xf32>
-    }}}
-    %c0 = constant 0 : index
-    %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
-    ```
-
-    On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that
-    the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are
-    actually transferred between `%A` and `%tmp`.
-
-    Alternatively, if a notional vector broadcast operation were available, the
-    lowered code would resemble:
-
-    ```mlir
-    // %expr1, %expr2, %expr3, %expr4 defined before this point
-    %tmp = alloc() : vector<3x4x5xf32>
-    %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
-    for %i = 0 to 3 {
-      affine.for %k = 0 to 5 {
-        %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
-          memref<?x?x?x?xf32>
-        store %tmp[%i, 0, %k] : vector<3x4x5xf32>
-    }}
-    %c0 = constant 0 : index
-    %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
-    %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32>
-    ```
-
-    where `broadcast` broadcasts from element 0 to all others along the
-    specified dimension. This time, the temporary storage footprint is `3 * 5`
-    values which is the same amount of data as the `3 * 5` values transferred.
-    An additional `1` broadcast is required. On a GPU this broadcast could be
-    implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.
-
-    Syntax
-    ```
-    operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
-      `{` attribute-entry `} :` memref-type `,` vector-type
-    ```
-
-    Examples:
-
-    ```mlir
-    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
-    // and pad with %f0 to handle the boundary case:
-    %f0 = constant 0.0f : f32
-    for %i0 = 0 to %0 {
-      affine.for %i1 = 0 to %1 step 256 {
-        affine.for %i2 = 0 to %2 step 32 {
-          %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
-               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
-               memref<?x?x?xf32>, vector<32x256xf32>
-    }}}
-
-    // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
-    // vector<128xf32>. The underlying implementation will require a 1-D vector
-    // broadcast:
-    for %i0 = 0 to %0 {
-      affine.for %i1 = 0 to %1 {
-        %3 = vector.transfer_read %A[%i0, %i1]
-             {permutation_map: (d0, d1) -> (0)} :
-             memref<?x?xf32>, vector<128xf32>
-      }
-    }
-    ```
-  }];
-
-  let extraClassDeclaration = [{
-    MemRefType getMemRefType() {
-      return memref()->getType().cast<MemRefType>();
-    }
-    VectorType getVectorType() {
-      return vector()->getType().cast<VectorType>();
-    }
-  }];
-}
-
-def Vector_TransferWriteOp :
-  Vector_Op<"transfer_write">,
-    Arguments<(ins AnyVector:$vector, AnyMemRef:$memref,
-               Variadic<Index>:$indices,
-               AffineMapAttr:$permutation_map)> {
-
-  let summary = "The vector.transfer_write op writes a supervector to memory.";
-
-  let description = [{
-    The `vector.transfer_write` performs a blocking write from a
-    [vector](../LangRef.md#vector-type), supplied as its first operand, into a
-    slice within a scalar [MemRef](../LangRef.md#memref-type) of the same
-    elemental type, supplied as its second operand. The slice is further defined
-    by a full-rank index within the MemRef, supplied as the operands
-    `3 .. 2 + rank(memref)`.
-    The permutation_map [attribute](../LangRef.md#attributes) is an
-    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
-    slice to match the vector shape. The size of the slice is specified by the
-    size of the vector. This operation is called 'write' by opposition to
-    'store' because the super-vector granularity is generally not representable
-    with a single hardware register. A `vector.transfer_write` is thus a
-    mid-level abstraction that supports super-vectorization with non-effecting
-    padding for full-tile-only code. It is the responsibility of
-    `vector.transfer_write`'s implementation to ensure the memory writes are
-    valid. Different lowerings may be pertinent depending on the hardware
-    support.
-
-    Syntax:
-
-    ```
-    operation ::= `vector.transfer_write` ssa-use-list `{` attribute-entry `} :
-      ` vector-type ', ' memref-type '
-    ```
-
-    Examples:
-
-    ```mlir
-    // write vector<16x32x64xf32> into the slice
-    //   `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
-    for %i0 = 0 to %0 {
-      affine.for %i1 = 0 to %1 step 32 {
-        affine.for %i2 = 0 to %2 step 64 {
-          affine.for %i3 = 0 to %3 step 16 {
-            %val = `ssa-value` : vector<16x32x64xf32>
-            vector.transfer_write %val, %A[%i0, %i1, %i2, %i3]
-              {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
-              vector<16x32x64xf32>, memref<?x?x?x?xf32>
-    }}}}
-    ```
-  }];
-
-  let extraClassDeclaration = [{
-    VectorType getVectorType() {
-      return vector()->getType().cast<VectorType>();
-    }
-    MemRefType getMemRefType() {
-      return memref()->getType().cast<MemRefType>();
-    }
-  }];
-}
-
-def Vector_TypeCastOp :
-  Vector_Op<"type_cast", [NoSideEffect]>,
-    Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>,
-    Results<(outs AnyMemRef)> {
-  let summary = "type_cast op converts a scalar memref to a vector memref";
-  let description = [{
-    Performs a conversion from a memref with scalar element to a memref with a
-    *single* vector element, copying the shape of the memref to the vector. This
-    is the minimal viable operation that is required to makeke
-    super-vectorization operational. It can be seen as a special case of the
-    `view` operation but scoped in the super-vectorization context.
-
-    Syntax:
-
-    ```
-    operation ::= `vector.type_cast` ssa-use : memref-type to memref-type
-    ```
-
-    Example:
-
-    ```mlir
-    %A  = alloc() : memref<5x4x3xf32>
-    %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>>
-    ```
-  }];
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, Value *source">];
-
-  let parser = [{
-    return impl::parseCastOp(parser, result);
-  }];
-
-  let extraClassDeclaration = [{
-    MemRefType getMemRefType() {
-      return memref()->getType().cast<MemRefType>();
-    }
-    MemRefType getResultMemRefType() {
-      return getResult()->getType().cast<MemRefType>();
-    }
-  }];
-}
-
-def Vector_ConstantMaskOp :
-  Vector_Op<"constant_mask", [NoSideEffect]>,
-    Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>,
-    Results<(outs VectorOf<[I1]>)> {
-  let summary = "creates a constant vector mask";
-  let description = [{
-    Creates and returns a vector mask where elements of the result vector
-    are set to '0' or '1', based on whether the element indices are contained
-    within a hyper-rectangular region specified by the 'mask_dim_sizes'
-    array attribute argument. Each element of the 'mask_dim_sizes' array,
-    specifies an exclusive upper bound [0, mask-dim-size-element-value)
-    for a unique dimension in the vector result. The conjunction of the ranges
-    define a hyper-rectangular region within which elements values are set to 1
-    (otherwise element values are set to 0).
-
-    Example: create a constant vector mask of size 4x3xi1 with elements in range
-             0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
-
-      %1 = vector.constant_mask [3, 2] : vector<4x3xi1>
-
-      print %1
-                    columns
-                  0    1    2
-                |------------
-              0 | 1    1    0
-        rows  1 | 1    1    0
-              2 | 1    1    0
-              3 | 0    0    0
-  }];
-
-  let extraClassDeclaration = [{
-    static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; }
-  }];
-}
-
-def Vector_CreateMaskOp :
-  Vector_Op<"create_mask", [NoSideEffect]>,
-    Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> {
-  let summary = "creates a vector mask";
-  let description = [{
-    Creates and returns a vector mask where elements of the result vector
-    are set to '0' or '1', based on whether the element indices are contained
-    within a hyper-rectangular region specified by the operands. Specifically,
-    each operand specifies a range [0, operand-value) for a unique dimension in
-    the vector result. The conjunction of the operand ranges define a
-    hyper-rectangular region within which elements values are set to 1
-    (otherwise element values are set to 0).
-
-    Example: create a vector mask of size 4x3xi1 where elements in range
-             0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
-
-      %1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
-
-      print %1
-                    columns
-                  0    1    2
-                |------------
-              0 | 1    1    0
-        rows  1 | 1    1    0
-              2 | 1    1    0
-              3 | 0    0    0
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
-def Vector_TupleOp :
-  Vector_Op<"tuple", [NoSideEffect]>,
-    Arguments<(ins Variadic<AnyVector>:$vectors)>,
-    Results<(outs TupleOf<[AnyVector]>)> {
-  let summary = "make tuple of vectors operation";
-  let description = [{
-    Returns a tuple of its operands 'vectors'.
-
-    Note that this operation is used during the vector op unrolling
-    transformation and should be removed before lowering to lower-level
-    dialects.
-    
-
-    Examples:
-    ```
-      %0 = vector.transfer_read ... : vector<2x2xf32>
-      %1 = vector.transfer_read ... : vector<2x1xf32>
-      %2 = vector.transfer_read ... : vector<2x2xf32>
-      %3 = vector.transfer_read ... : vector<2x1xf32>
-
-      %4 = vector.tuple %0, %1, %2, %3
-        : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>
-
-    ```
-  }];
-
-  let extraClassDeclaration = [{
-    TupleType getResultTupleType() {
-      return getResult()->getType().cast<TupleType>();
-    }
-  }];
-}
-
-def Vector_TupleGetOp :
-  Vector_Op<"tuple_get", [NoSideEffect]>,
-    Arguments<(ins TupleOf<[AnyVector]>:$vectors, APIntAttr:$index)>,
-    Results<(outs AnyVector)> {
-  let summary = "vector tuple get operation";
-  let description = [{
-    Returns the tuple element of 'vectors' at 'index'.
-
-    Note that this operation is used during the vector op unrolling
-    transformation and should be removed before lowering to lower-level
-    dialects.
-
-    Examples:
-    ```
-      %4 = vector.tuple %0, %1, %2, %3
-        : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>>
-
-      %5 = vector.tuple_get %4, 1
-        : tuple<vector<2x2xf32>, vector<2x1xf32>,
-                vector<2x2xf32>, vector<2x1xf32>>
-    ```
-  }];
-
-  let extraClassDeclaration = [{
-    VectorType getResultVectorType() {
-      return getResult()->getType().cast<VectorType>();
-    }
-    unsigned getIndex() {
-      return getAttrOfType<IntegerAttr>("index").getValue().getZExtValue();
-    }
-    static StringRef getIndexAttrName() { return "index"; }
-  }];
-}
-
-#endif // VECTOR_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransformPatterns.td b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransformPatterns.td
deleted file mode 100644
index 86ff9b505d5..00000000000
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransformPatterns.td
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- VectorTransformPatterns.td - Vector-Vector patterns -*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the pattern definition file for declarative Vector transformations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef VECTOR_TRANSFORM_PATTERNS
-#define VECTOR_TRANSFORM_PATTERNS
-
-include "mlir/IR/OpBase.td"
-
-class HasShape<list<int> shape> :
-  CPred<"$0->getType().cast<ShapedType>().hasStaticShape({" #
-    StrJoinInt<shape>.result # "})">;
-
-class UnrollVectorOp<list<int> factors> : NativeCodeCall<
-  "unrollSingleResultOpMatchingType($_builder, $0->getDefiningOp(), " #
-    "{" # StrJoinInt<factors>.result # "})">;
-
-#endif // VECTOR_TRANSFORM_PATTERNS
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransforms.h b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransforms.h
deleted file mode 100644
index 2c2e4e7c4fa..00000000000
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransforms.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===- VectorTransforms.h - Vector transformations as patterns --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef DIALECT_VECTOROPS_VECTORTRANSFORMS_H_
-#define DIALECT_VECTOROPS_VECTORTRANSFORMS_H_
-
-#include "mlir/IR/PatternMatch.h"
-
-namespace mlir {
-class MLIRContext;
-class OwningRewritePatternList;
-
-/// Collect a set of patterns to convert from the Vector dialect to itself.
-/// Should be merged with populateVectorToAffineLoopsConversionPatterns.
-void populateVectorToVectorConversionPatterns(
-    MLIRContext *context, OwningRewritePatternList &patterns,
-    ArrayRef<int64_t> coarseVectorShape = {},
-    ArrayRef<int64_t> fineVectorShape = {});
-
-////////////////////////////////////////////////////////////////////////////////
-// The following Declarative Rewrite Rule (DRR) helpers are used in rewrite
-// patterns. As such, they must not call into `rewriter.erase/replace` APIs and
-// it is the responsibility of the enclosing PatternRewriter to erase on
-// success.
-////////////////////////////////////////////////////////////////////////////////
-
-namespace vector {
-
-// Entry point for unrolling declarative pattern rewrites.
-// `op` is unrolled to the `targetShape` as follows, for each of its operands:
-//   1. the unrolled type `unrolledVectorType` and number of unrolled instances
-//   `numUnrolledInstances` are computed from the `targetShape`. For now it is
-//   assumed the unrolling factors divide the vector sizes.
-//   2. a fakeFork cast op is inserted that takes the operand and returns
-//   `numUnrolledInstances` results of type `unrolledVectorType`.
-//   3. the original op is cloned `numUnrolledInstances` times, once for each
-//   result of the fakeFork cast op.
-//   4. a fakeJoin cast op takes all these results and merges them into a single
-//   aggregate vector result whose size matches the original non-unrolled op
-//   operand types.
-//
-// Example:
-//
-//    opA(operand0, operand1)  // numUnrolledInstances = 3
-//
-//            operand0                   operand1
-//               |                          |
-//             fork                       fork
-//        <----------gather all fork ops --------->
-//              /|\                        /|\
-//          f00 f01 f02                f10 f11 f12
-//        <---------- clone op 3 times --------->
-//          opA0(f00, f10), opA1(f01, f11), opA2(f02, f12)
-//                 \            |            /
-//      <-------------------- join ------------------------->
-//
-// Other local patterns then kick in iteratively (including DCE) and compose
-// until all the fakeFork and fakeJoin ops are removed.
-//
-// This will be extended in the future to support more advanced use cases than
-// simple pointwise ops.
-Value *unrollSingleResultOpMatchingType(PatternRewriter &builder, Operation *op,
-                                        ArrayRef<int64_t> targetShape);
-
-} // namespace vector
-} // namespace mlir
-
-#endif // DIALECT_VECTOROPS_VECTORTRANSFORMS_H_
diff --git a/third_party/mlir/include/mlir/EDSC/Builders.h b/third_party/mlir/include/mlir/EDSC/Builders.h
deleted file mode 100644
index 740b5cd5c23..00000000000
--- a/third_party/mlir/include/mlir/EDSC/Builders.h
+++ /dev/null
@@ -1,547 +0,0 @@
-//===- Builders.h - MLIR Declarative Builder Classes ------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides intuitive composable interfaces for building structured MLIR
-// snippets in a declarative fashion.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_EDSC_BUILDERS_H_
-#define MLIR_EDSC_BUILDERS_H_
-
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Transforms/FoldUtils.h"
-
-namespace mlir {
-
-namespace edsc {
-
-struct index_t {
-  explicit index_t(int64_t v) : v(v) {}
-  explicit operator int64_t() { return v; }
-  int64_t v;
-};
-
-class BlockHandle;
-class CapturableHandle;
-class NestedBuilder;
-class ValueHandle;
-
-/// Helper class to transparently handle builder insertion points by RAII.
-/// As its name indicates, a ScopedContext is means to be used locally in a
-/// scoped fashion. This abstracts away all the boilerplate related to
-/// checking proper usage of captures, NestedBuilders as well as handling the
-/// setting and restoring of insertion points.
-class ScopedContext {
-public:
-  ScopedContext(OpBuilder &builder, Location location);
-
-  /// Sets the insertion point of the builder to 'newInsertPt' for the duration
-  /// of the scope. The existing insertion point of the builder is restored on
-  /// destruction.
-  ScopedContext(OpBuilder &builder, OpBuilder::InsertPoint newInsertPt,
-                Location location);
-  ~ScopedContext();
-
-  static MLIRContext *getContext();
-  static OpBuilder &getBuilder();
-  static Location getLocation();
-
-private:
-  /// Only NestedBuilder (which is used to create an operation with a body)
-  /// may access private members in order to implement scoping.
-  friend class NestedBuilder;
-
-  ScopedContext() = delete;
-  ScopedContext(const ScopedContext &) = delete;
-  ScopedContext &operator=(const ScopedContext &) = delete;
-
-  static ScopedContext *&getCurrentScopedContext();
-
-  /// Top level OpBuilder.
-  OpBuilder &builder;
-  /// The previous insertion point of the builder.
-  llvm::Optional<OpBuilder::InsertPoint> prevBuilderInsertPoint;
-  /// Current location.
-  Location location;
-  /// Parent context we return into.
-  ScopedContext *enclosingScopedContext;
-  /// Defensively keeps track of the current NestedBuilder to ensure proper
-  /// scoping usage.
-  NestedBuilder *nestedBuilder;
-
-  // TODO: Implement scoping of ValueHandles. To do this we need a proper data
-  // structure to hold ValueHandle objects. We can emulate one but there should
-  // already be something available in LLVM for this purpose.
-};
-
-/// A NestedBuilder is a scoping abstraction to create an idiomatic syntax
-/// embedded in C++ that serves the purpose of building nested MLIR.
-/// Nesting and compositionality is obtained by using the strict ordering that
-/// exists between object construction and method invocation on said object (in
-/// our case, the call to `operator()`).
-/// This ordering allows implementing an abstraction that decouples definition
-/// from declaration (in a PL sense) on placeholders of type ValueHandle and
-/// BlockHandle.
-class NestedBuilder {
-protected:
-  NestedBuilder() = default;
-  NestedBuilder(const NestedBuilder &) = delete;
-  NestedBuilder(NestedBuilder &&other) : bodyScope(other.bodyScope) {
-    other.bodyScope = nullptr;
-  }
-
-  NestedBuilder &operator=(const NestedBuilder &) = delete;
-  NestedBuilder &operator=(NestedBuilder &&other) {
-    std::swap(bodyScope, other.bodyScope);
-    return *this;
-  }
-
-  /// Enter an mlir::Block and setup a ScopedContext to insert operations at
-  /// the end of it. Since we cannot use c++ language-level scoping to implement
-  /// scoping itself, we use enter/exit pairs of operations.
-  /// As a consequence we must allocate a new OpBuilder + ScopedContext and
-  /// let the escape.
-  /// Step back "prev" times from the end of the block to set up the insertion
-  /// point, which is useful for non-empty blocks.
-  void enter(mlir::Block *block, int prev = 0) {
-    bodyScope = new ScopedContext(
-        ScopedContext::getBuilder(),
-        OpBuilder::InsertPoint(block, std::prev(block->end(), prev)),
-        ScopedContext::getLocation());
-    bodyScope->nestedBuilder = this;
-  }
-
-  /// Exit the current mlir::Block by explicitly deleting the dynamically
-  /// allocated OpBuilder and ScopedContext.
-  void exit() {
-    // Reclaim now to exit the scope.
-    bodyScope->nestedBuilder = nullptr;
-    delete bodyScope;
-    bodyScope = nullptr;
-  }
-
-  /// Custom destructor does nothing because we already destroyed bodyScope
-  /// manually in `exit`. Insert an assertion to defensively guard against
-  /// improper usage of scoping.
-  ~NestedBuilder() {
-    assert(!bodyScope &&
-           "Illegal use of NestedBuilder; must have called exit()");
-  }
-
-private:
-  ScopedContext *bodyScope = nullptr;
-};
-
-/// A LoopBuilder is a generic NestedBuilder for loop-like MLIR operations.
-/// More specifically it is meant to be used as a temporary object for
-/// representing any nested MLIR construct that is "related to" an mlir::Value*
-/// (for now an induction variable).
-/// This is extensible and will evolve in the future as MLIR evolves, hence
-/// the name LoopBuilder (as opposed to say ForBuilder or AffineForBuilder).
-class LoopBuilder : public NestedBuilder {
-public:
-  /// Constructs a new AffineForOp and captures the associated induction
-  /// variable. A ValueHandle pointer is passed as the first argument and is the
-  /// *only* way to capture the loop induction variable.
-  static LoopBuilder makeAffine(ValueHandle *iv,
-                                ArrayRef<ValueHandle> lbHandles,
-                                ArrayRef<ValueHandle> ubHandles, int64_t step);
-  /// Constructs a new loop::ForOp and captures the associated induction
-  /// variable. A ValueHandle pointer is passed as the first argument and is the
-  /// *only* way to capture the loop induction variable.
-  static LoopBuilder makeLoop(ValueHandle *iv, ValueHandle lbHandle,
-                              ValueHandle ubHandle, ValueHandle stepHandle);
-  LoopBuilder(const LoopBuilder &) = delete;
-  LoopBuilder(LoopBuilder &&) = default;
-
-  LoopBuilder &operator=(const LoopBuilder &) = delete;
-  LoopBuilder &operator=(LoopBuilder &&) = default;
-
-  /// The only purpose of this operator is to serve as a sequence point so that
-  /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is
-  /// scoped within a LoopBuilder.
-  void operator()(llvm::function_ref<void(void)> fun = nullptr);
-
-private:
-  LoopBuilder() = default;
-};
-
-/// Explicit nested LoopBuilder. Offers a compressed multi-loop builder to avoid
-/// explicitly writing all the loops in a nest. This simple functionality is
-/// also useful to write rank-agnostic custom ops.
-///
-/// Usage:
-///
-/// ```c++
-///    AffineLoopNestBuilder({&i, &j, &k}, {lb, lb, lb}, {ub, ub, ub}, {1, 1,
-///    1})(
-///      [&](){
-///        ...
-///      });
-/// ```
-///
-/// ```c++
-///    AffineLoopNestBuilder({&i}, {lb}, {ub}, {1})([&](){
-///      AffineLoopNestBuilder({&j}, {lb}, {ub}, {1})([&](){
-///        AffineLoopNestBuilder({&k}, {lb}, {ub}, {1})([&](){
-///          ...
-///        }),
-///      }),
-///    });
-/// ```
-class AffineLoopNestBuilder {
-public:
-  // This entry point accommodates the fact that AffineForOp implicitly uses
-  // multiple `lbs` and `ubs` with one single `iv` and `step` to encode `max`
-  // and and `min` constraints respectively.
-  AffineLoopNestBuilder(ValueHandle *iv, ArrayRef<ValueHandle> lbs,
-                        ArrayRef<ValueHandle> ubs, int64_t step);
-  AffineLoopNestBuilder(ArrayRef<ValueHandle *> ivs, ArrayRef<ValueHandle> lbs,
-                        ArrayRef<ValueHandle> ubs, ArrayRef<int64_t> steps);
-
-  void operator()(llvm::function_ref<void(void)> fun = nullptr);
-
-private:
-  SmallVector<LoopBuilder, 4> loops;
-};
-
-/// Helper class to sugar building loop.for loop nests from ranges.
-/// This is similar to edsc::AffineLoopNestBuilder except it operates on
-/// loop.for.
-class LoopNestBuilder {
-public:
-  LoopNestBuilder(llvm::ArrayRef<edsc::ValueHandle *> ivs,
-                  ArrayRef<ValueHandle> lbs, ArrayRef<ValueHandle> ubs,
-                  ArrayRef<ValueHandle> steps);
-  void operator()(std::function<void(void)> fun = nullptr);
-
-private:
-  llvm::SmallVector<LoopBuilder, 4> loops;
-};
-
-// This class exists solely to handle the C++ vexing parse case when
-// trying to enter a Block that has already been constructed.
-class Append {};
-
-/// A BlockBuilder is a NestedBuilder for mlir::Block*.
-/// This exists by opposition to LoopBuilder which is not related to an
-/// mlir::Block* but to a mlir::Value*.
-/// It is meant to be used as a temporary object for representing any nested
-/// MLIR construct that is "related to" an mlir::Block*.
-class BlockBuilder : public NestedBuilder {
-public:
-  /// Enters the mlir::Block* previously captured by `bh` and sets the insertion
-  /// point to its end.
-  BlockBuilder(BlockHandle bh, Append);
-
-  /// Constructs a new mlir::Block with argument types derived from `args`.
-  /// Captures the new block in `bh` and its arguments into `args`.
-  /// Enters the new mlir::Block* and sets the insertion point to its end.
-  ///
-  /// Prerequisites:
-  ///   The ValueHandle `args` are typed delayed ValueHandles; i.e. they are
-  ///   not yet bound to mlir::Value*.
-  BlockBuilder(BlockHandle *bh, ArrayRef<ValueHandle *> args);
-
-  /// The only purpose of this operator is to serve as a sequence point so that
-  /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is
-  /// scoped within a BlockBuilder.
-  void operator()(llvm::function_ref<void(void)> fun = nullptr);
-
-private:
-  BlockBuilder(BlockBuilder &) = delete;
-  BlockBuilder &operator=(BlockBuilder &other) = delete;
-};
-
-/// Base class for ValueHandle, OperationHandle and BlockHandle.
-/// Not meant to be used outside of these classes.
-class CapturableHandle {
-protected:
-  CapturableHandle() = default;
-};
-
-/// ValueHandle implements a (potentially "delayed") typed Value abstraction.
-/// ValueHandle should be captured by pointer but otherwise passed by Value
-/// everywhere.
-/// A ValueHandle can have 3 states:
-///   1. null state (empty type and empty value), in which case it does not hold
-///      a value and must never hold a Value (now or in the future). This is
-///      used for MLIR operations with zero returns as well as the result of
-///      calling a NestedBuilder::operator(). In both cases the objective is to
-///      have an object that can be inserted in an ArrayRef<ValueHandle> to
-///      implement nesting;
-///   2. delayed state (empty value), in which case it represents an eagerly
-///      typed "delayed" value that can be hold a Value in the future;
-///   3. constructed state,in which case it holds a Value.
-///
-/// A ValueHandle is meant to capture a single Value* and should be used for
-/// operations that have a single result. For convenience of use, we also
-/// include AffineForOp in this category although it does not return a value.
-/// In the case of AffineForOp, the captured Value* is the loop induction
-/// variable.
-class ValueHandle : public CapturableHandle {
-public:
-  /// A ValueHandle in a null state can never be captured;
-  static ValueHandle null() { return ValueHandle(); }
-
-  /// A ValueHandle that is constructed from a Type represents a typed "delayed"
-  /// Value. A delayed Value can only capture Values of the specified type.
-  /// Such a delayed value represents the declaration (in the PL sense) of a
-  /// placeholder for an mlir::Value* that will be constructed and captured at
-  /// some later point in the program.
-  explicit ValueHandle(Type t) : t(t), v(nullptr) {}
-
-  /// A ValueHandle that is constructed from an mlir::Value* is an "eager"
-  /// Value. An eager Value represents both the declaration and the definition
-  /// (in the PL sense) of a placeholder for an mlir::Value* that has already
-  /// been constructed in the past and that is captured "now" in the program.
-  explicit ValueHandle(Value *v) : t(v->getType()), v(v) {}
-
-  /// Builds a ConstantIndexOp of value `cst`. The constant is created at the
-  /// current insertion point.
-  /// This implicit constructor is provided to each build an eager Value for a
-  /// constant at the current insertion point in the IR. An implicit constructor
-  /// allows idiomatic expressions mixing ValueHandle and literals.
-  ValueHandle(index_t cst);
-
-  /// ValueHandle is a value type, use the default copy constructor.
-  ValueHandle(const ValueHandle &other) = default;
-
-  /// ValueHandle is a value type, the assignment operator typechecks before
-  /// assigning.
-  ValueHandle &operator=(const ValueHandle &other);
-
-  /// Provide a swap operator.
-  void swap(ValueHandle &other) {
-    if (this == &other)
-      return;
-    std::swap(t, other.t);
-    std::swap(v, other.v);
-  }
-
-  /// Implicit conversion useful for automatic conversion to Container<Value*>.
-  operator Value *() const { return getValue(); }
-
-  /// Generic mlir::Op create. This is the key to being extensible to the whole
-  /// of MLIR without duplicating the type system or the op definitions.
-  template <typename Op, typename... Args>
-  static ValueHandle create(Args... args);
-
-  /// Generic mlir::Op create. This is the key to being extensible to the whole
-  /// of MLIR without duplicating the type system or the op definitions.
-  /// When non-null, the optional pointer `folder` is used to call into the
-  /// `createAndFold` builder method. If `folder` is null, the regular `create`
-  /// method is called.
-  template <typename Op, typename... Args>
-  static ValueHandle create(OperationFolder *folder, Args... args);
-
-  /// Special case to build composed AffineApply operations.
-  // TODO: createOrFold when available and move inside of the `create` method.
-  static ValueHandle createComposedAffineApply(AffineMap map,
-                                               ArrayRef<Value *> operands);
-
-  /// Generic create for a named operation producing a single value.
-  static ValueHandle create(StringRef name, ArrayRef<ValueHandle> operands,
-                            ArrayRef<Type> resultTypes,
-                            ArrayRef<NamedAttribute> attributes = {});
-
-  bool hasValue() const { return v != nullptr; }
-  Value *getValue() const {
-    assert(hasValue() && "Unexpected null value;");
-    return v;
-  }
-  bool hasType() const { return t != Type(); }
-  Type getType() const { return t; }
-
-  Operation *getOperation() const {
-    if (!v)
-      return nullptr;
-    return v->getDefiningOp();
-  }
-
-protected:
-  ValueHandle() : t(), v(nullptr) {}
-
-  Type t;
-  Value *v;
-};
-
-/// An OperationHandle can be used in lieu of ValueHandle to capture the
-/// operation in cases when one does not care about, or cannot extract, a
-/// unique Value* from the operation.
-/// This can be used for capturing zero result operations as well as
-/// multi-result operations that are not supported by ValueHandle.
-/// We do not distinguish further between zero and multi-result operations at
-/// this time.
-struct OperationHandle : public CapturableHandle {
-  OperationHandle() : op(nullptr) {}
-  OperationHandle(Operation *op) : op(op) {}
-
-  OperationHandle(const OperationHandle &) = default;
-  OperationHandle &operator=(const OperationHandle &) = default;
-
-  /// Generic mlir::Op create. This is the key to being extensible to the whole
-  /// of MLIR without duplicating the type system or the op definitions.
-  template <typename Op, typename... Args>
-  static OperationHandle create(Args... args);
-  template <typename Op, typename... Args> static Op createOp(Args... args);
-
-  /// Generic create for a named operation.
-  static OperationHandle create(StringRef name, ArrayRef<ValueHandle> operands,
-                                ArrayRef<Type> resultTypes,
-                                ArrayRef<NamedAttribute> attributes = {});
-
-  operator Operation *() { return op; }
-  Operation *getOperation() const { return op; }
-
-private:
-  Operation *op;
-};
-
-/// Simple wrapper to build a generic operation without successor blocks.
-template <typename HandleType> struct CustomOperation {
-  CustomOperation(StringRef name) : name(name) {
-    static_assert(std::is_same<HandleType, ValueHandle>() ||
-                      std::is_same<HandleType, OperationHandle>(),
-                  "Only CustomOperation<ValueHandle> or "
-                  "CustomOperation<OperationHandle> can be constructed.");
-  }
-  HandleType operator()(ArrayRef<ValueHandle> operands = {},
-                        ArrayRef<Type> resultTypes = {},
-                        ArrayRef<NamedAttribute> attributes = {}) {
-    return HandleType::create(name, operands, resultTypes, attributes);
-  }
-  std::string name;
-};
-
-/// A BlockHandle represents a (potentially "delayed") Block abstraction.
-/// This extra abstraction is necessary because an mlir::Block is not an
-/// mlir::Value.
-/// A BlockHandle should be captured by pointer but otherwise passed by Value
-/// everywhere.
-class BlockHandle : public CapturableHandle {
-public:
-  /// A BlockHandle constructed without an mlir::Block* represents a "delayed"
-  /// Block. A delayed Block represents the declaration (in the PL sense) of a
-  /// placeholder for an mlir::Block* that will be constructed and captured at
-  /// some later point in the program.
-  BlockHandle() : block(nullptr) {}
-
-  /// A BlockHandle constructed with an mlir::Block* represents an "eager"
-  /// Block. An eager Block represents both the declaration and the definition
-  /// (in the PL sense) of a placeholder for an mlir::Block* that has already
-  /// been constructed in the past and that is captured "now" in the program.
-  BlockHandle(mlir::Block *block) : block(block) {}
-
-  /// BlockHandle is a value type, use the default copy constructor and
-  /// assignment operator.
-  BlockHandle(const BlockHandle &) = default;
-  BlockHandle &operator=(const BlockHandle &) = default;
-
-  /// Delegates block creation to MLIR and wrap the resulting mlir::Block.
-  static BlockHandle create(ArrayRef<Type> argTypes);
-
-  operator bool() { return block != nullptr; }
-  operator mlir::Block *() { return block; }
-  mlir::Block *getBlock() { return block; }
-
-private:
-  mlir::Block *block;
-};
-
-template <typename Op, typename... Args>
-OperationHandle OperationHandle::create(Args... args) {
-  return OperationHandle(ScopedContext::getBuilder()
-                             .create<Op>(ScopedContext::getLocation(), args...)
-                             .getOperation());
-}
-
-template <typename Op, typename... Args>
-Op OperationHandle::createOp(Args... args) {
-  return cast<Op>(
-      OperationHandle(ScopedContext::getBuilder()
-                          .create<Op>(ScopedContext::getLocation(), args...)
-                          .getOperation())
-          .getOperation());
-}
-
-template <typename Op, typename... Args>
-ValueHandle ValueHandle::create(Args... args) {
-  Operation *op = ScopedContext::getBuilder()
-                      .create<Op>(ScopedContext::getLocation(), args...)
-                      .getOperation();
-  if (op->getNumResults() == 1) {
-    return ValueHandle(op->getResult(0));
-  } else if (op->getNumResults() == 0) {
-    if (auto f = dyn_cast<AffineForOp>(op)) {
-      return ValueHandle(f.getInductionVar());
-    }
-  }
-  llvm_unreachable("unsupported operation, use an OperationHandle instead");
-}
-
-template <typename Op, typename... Args>
-ValueHandle ValueHandle::create(OperationFolder *folder, Args... args) {
-  return folder ? ValueHandle(folder->create<Op>(ScopedContext::getBuilder(),
-                                                 ScopedContext::getLocation(),
-                                                 args...))
-                : ValueHandle(ScopedContext::getBuilder().create<Op>(
-                      ScopedContext::getLocation(), args...));
-}
-
-namespace op {
-
-ValueHandle operator+(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator-(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator*(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator/(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator%(ValueHandle lhs, ValueHandle rhs);
-ValueHandle floorDiv(ValueHandle lhs, ValueHandle rhs);
-ValueHandle ceilDiv(ValueHandle lhs, ValueHandle rhs);
-
-ValueHandle operator!(ValueHandle value);
-ValueHandle operator&&(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator||(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator^(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator==(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator!=(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator<(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator<=(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator>(ValueHandle lhs, ValueHandle rhs);
-ValueHandle operator>=(ValueHandle lhs, ValueHandle rhs);
-
-} // namespace op
-
-/// Entry point to build multiple ValueHandle from a `Container` of Value* or
-/// Type.
-template <typename Container>
-inline SmallVector<ValueHandle, 8> makeValueHandles(Container values) {
-  SmallVector<ValueHandle, 8> res;
-  res.reserve(values.size());
-  for (auto v : values)
-    res.push_back(ValueHandle(v));
-  return res;
-}
-
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_EDSC_BUILDERS_H_
diff --git a/third_party/mlir/include/mlir/EDSC/Helpers.h b/third_party/mlir/include/mlir/EDSC/Helpers.h
deleted file mode 100644
index 69b72905eb0..00000000000
--- a/third_party/mlir/include/mlir/EDSC/Helpers.h
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- Helpers.h - MLIR Declarative Helper Functionality --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides helper classes and syntactic sugar for declarative builders.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_EDSC_HELPERS_H_
-#define MLIR_EDSC_HELPERS_H_
-
-#include "mlir/EDSC/Builders.h"
-#include "mlir/EDSC/Intrinsics.h"
-
-namespace mlir {
-namespace edsc {
-
-// A TemplatedIndexedValue brings an index notation over the template Load and
-// Store parameters.
-template <typename Load, typename Store> class TemplatedIndexedValue;
-
-// By default, edsc::IndexedValue provides an index notation around the affine
-// load and stores. edsc::StdIndexedValue provides the standard load/store
-// counterpart.
-using IndexedValue =
-    TemplatedIndexedValue<intrinsics::affine_load, intrinsics::affine_store>;
-using StdIndexedValue =
-    TemplatedIndexedValue<intrinsics::std_load, intrinsics::std_store>;
-
-// Base class for MemRefView and VectorView.
-class View {
-public:
-  unsigned rank() const { return lbs.size(); }
-  ValueHandle lb(unsigned idx) { return lbs[idx]; }
-  ValueHandle ub(unsigned idx) { return ubs[idx]; }
-  int64_t step(unsigned idx) { return steps[idx]; }
-  std::tuple<ValueHandle, ValueHandle, int64_t> range(unsigned idx) {
-    return std::make_tuple(lbs[idx], ubs[idx], steps[idx]);
-  }
-  void swapRanges(unsigned i, unsigned j) {
-    if (i == j)
-      return;
-    lbs[i].swap(lbs[j]);
-    ubs[i].swap(ubs[j]);
-    std::swap(steps[i], steps[j]);
-  }
-
-  ArrayRef<ValueHandle> getLbs() { return lbs; }
-  ArrayRef<ValueHandle> getUbs() { return ubs; }
-  ArrayRef<int64_t> getSteps() { return steps; }
-
-protected:
-  SmallVector<ValueHandle, 8> lbs;
-  SmallVector<ValueHandle, 8> ubs;
-  SmallVector<int64_t, 8> steps;
-};
-
-/// A MemRefView represents the information required to step through a
-/// MemRef. It has placeholders for non-contiguous tensors that fit within the
-/// Fortran subarray model.
-/// At the moment it can only capture a MemRef with an identity layout map.
-// TODO(ntv): Support MemRefs with layoutMaps.
-class MemRefView : public View {
-public:
-  explicit MemRefView(Value *v);
-  MemRefView(const MemRefView &) = default;
-  MemRefView &operator=(const MemRefView &) = default;
-
-  unsigned fastestVarying() const { return rank() - 1; }
-
-private:
-  friend IndexedValue;
-  ValueHandle base;
-};
-
-/// A VectorView represents the information required to step through a
-/// Vector accessing each scalar element at a time. It is the counterpart of
-/// a MemRefView but for vectors. This exists purely for boilerplate avoidance.
-class VectorView : public View {
-public:
-  explicit VectorView(Value *v);
-  VectorView(const VectorView &) = default;
-  VectorView &operator=(const VectorView &) = default;
-
-private:
-  friend IndexedValue;
-  ValueHandle base;
-};
-
-/// A TemplatedIndexedValue brings an index notation over the template Load and
-/// Store parameters. This helper class is an abstraction purely for sugaring
-/// purposes and allows writing compact expressions such as:
-///
-/// ```mlir
-///    // `IndexedValue` provided by default in the mlir::edsc namespace.
-///    using IndexedValue =
-///      TemplatedIndexedValue<intrinsics::load, intrinsics::store>;
-///    IndexedValue A(...), B(...), C(...);
-///    For(ivs, zeros, shapeA, ones, {
-///      C(ivs) = A(ivs) + B(ivs)
-///    });
-/// ```
-///
-/// Assigning to an IndexedValue emits an actual `Store` operation, while
-/// converting an IndexedValue to a ValueHandle emits an actual `Load`
-/// operation.
-template <typename Load, typename Store> class TemplatedIndexedValue {
-public:
-  explicit TemplatedIndexedValue(Type t) : base(t) {}
-  explicit TemplatedIndexedValue(Value *v)
-      : TemplatedIndexedValue(ValueHandle(v)) {}
-  explicit TemplatedIndexedValue(ValueHandle v) : base(v) {}
-
-  TemplatedIndexedValue(const TemplatedIndexedValue &rhs) = default;
-
-  TemplatedIndexedValue operator()() { return *this; }
-  /// Returns a new `TemplatedIndexedValue`.
-  TemplatedIndexedValue operator()(ValueHandle index) {
-    TemplatedIndexedValue res(base);
-    res.indices.push_back(index);
-    return res;
-  }
-  template <typename... Args>
-  TemplatedIndexedValue operator()(ValueHandle index, Args... indices) {
-    return TemplatedIndexedValue(base, index).append(indices...);
-  }
-  TemplatedIndexedValue operator()(llvm::ArrayRef<ValueHandle> indices) {
-    return TemplatedIndexedValue(base, indices);
-  }
-  TemplatedIndexedValue operator()(llvm::ArrayRef<IndexHandle> indices) {
-    return TemplatedIndexedValue(
-        base, llvm::ArrayRef<ValueHandle>(indices.begin(), indices.end()));
-  }
-
-  /// Emits a `store`.
-  // NOLINTNEXTLINE: unconventional-assign-operator
-  OperationHandle operator=(const TemplatedIndexedValue &rhs) {
-    ValueHandle rrhs(rhs);
-    return Store(rrhs, getBase(), {indices.begin(), indices.end()});
-  }
-  // NOLINTNEXTLINE: unconventional-assign-operator
-  OperationHandle operator=(ValueHandle rhs) {
-    return Store(rhs, getBase(), {indices.begin(), indices.end()});
-  }
-
-  /// Emits a `load` when converting to a ValueHandle.
-  operator ValueHandle() const {
-    return Load(getBase(), {indices.begin(), indices.end()});
-  }
-
-  /// Emits a `load` when converting to a Value*.
-  Value *operator*(void)const {
-    return Load(getBase(), {indices.begin(), indices.end()}).getValue();
-  }
-
-  ValueHandle getBase() const { return base; }
-
-  /// Operator overloadings.
-  ValueHandle operator+(ValueHandle e);
-  ValueHandle operator-(ValueHandle e);
-  ValueHandle operator*(ValueHandle e);
-  ValueHandle operator/(ValueHandle e);
-  OperationHandle operator+=(ValueHandle e);
-  OperationHandle operator-=(ValueHandle e);
-  OperationHandle operator*=(ValueHandle e);
-  OperationHandle operator/=(ValueHandle e);
-  ValueHandle operator+(TemplatedIndexedValue e) {
-    return *this + static_cast<ValueHandle>(e);
-  }
-  ValueHandle operator-(TemplatedIndexedValue e) {
-    return *this - static_cast<ValueHandle>(e);
-  }
-  ValueHandle operator*(TemplatedIndexedValue e) {
-    return *this * static_cast<ValueHandle>(e);
-  }
-  ValueHandle operator/(TemplatedIndexedValue e) {
-    return *this / static_cast<ValueHandle>(e);
-  }
-  OperationHandle operator+=(TemplatedIndexedValue e) {
-    return this->operator+=(static_cast<ValueHandle>(e));
-  }
-  OperationHandle operator-=(TemplatedIndexedValue e) {
-    return this->operator-=(static_cast<ValueHandle>(e));
-  }
-  OperationHandle operator*=(TemplatedIndexedValue e) {
-    return this->operator*=(static_cast<ValueHandle>(e));
-  }
-  OperationHandle operator/=(TemplatedIndexedValue e) {
-    return this->operator/=(static_cast<ValueHandle>(e));
-  }
-
-private:
-  TemplatedIndexedValue(ValueHandle base, ArrayRef<ValueHandle> indices)
-      : base(base), indices(indices.begin(), indices.end()) {}
-
-  TemplatedIndexedValue &append() { return *this; }
-
-  template <typename T, typename... Args>
-  TemplatedIndexedValue &append(T index, Args... indices) {
-    this->indices.push_back(static_cast<ValueHandle>(index));
-    append(indices...);
-    return *this;
-  }
-  ValueHandle base;
-  llvm::SmallVector<ValueHandle, 8> indices;
-};
-
-/// Operator overloadings.
-template <typename Load, typename Store>
-ValueHandle TemplatedIndexedValue<Load, Store>::operator+(ValueHandle e) {
-  using op::operator+;
-  return static_cast<ValueHandle>(*this) + e;
-}
-template <typename Load, typename Store>
-ValueHandle TemplatedIndexedValue<Load, Store>::operator-(ValueHandle e) {
-  using op::operator-;
-  return static_cast<ValueHandle>(*this) - e;
-}
-template <typename Load, typename Store>
-ValueHandle TemplatedIndexedValue<Load, Store>::operator*(ValueHandle e) {
-  using op::operator*;
-  return static_cast<ValueHandle>(*this) * e;
-}
-template <typename Load, typename Store>
-ValueHandle TemplatedIndexedValue<Load, Store>::operator/(ValueHandle e) {
-  using op::operator/;
-  return static_cast<ValueHandle>(*this) / e;
-}
-
-template <typename Load, typename Store>
-OperationHandle TemplatedIndexedValue<Load, Store>::operator+=(ValueHandle e) {
-  using op::operator+;
-  return Store(*this + e, getBase(), {indices.begin(), indices.end()});
-}
-template <typename Load, typename Store>
-OperationHandle TemplatedIndexedValue<Load, Store>::operator-=(ValueHandle e) {
-  using op::operator-;
-  return Store(*this - e, getBase(), {indices.begin(), indices.end()});
-}
-template <typename Load, typename Store>
-OperationHandle TemplatedIndexedValue<Load, Store>::operator*=(ValueHandle e) {
-  using op::operator*;
-  return Store(*this * e, getBase(), {indices.begin(), indices.end()});
-}
-template <typename Load, typename Store>
-OperationHandle TemplatedIndexedValue<Load, Store>::operator/=(ValueHandle e) {
-  using op::operator/;
-  return Store(*this / e, getBase(), {indices.begin(), indices.end()});
-}
-
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_EDSC_HELPERS_H_
diff --git a/third_party/mlir/include/mlir/EDSC/Intrinsics.h b/third_party/mlir/include/mlir/EDSC/Intrinsics.h
deleted file mode 100644
index 68bd210fce5..00000000000
--- a/third_party/mlir/include/mlir/EDSC/Intrinsics.h
+++ /dev/null
@@ -1,281 +0,0 @@
-//===- Intrinsics.h - MLIR Operations for Declarative Builders ---*- C++-*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides intuitive composable intrinsics for building snippets of MLIR
-// declaratively
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_EDSC_INTRINSICS_H_
-#define MLIR_EDSC_INTRINSICS_H_
-
-#include "mlir/EDSC/Builders.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-
-class MemRefType;
-class Type;
-
-namespace edsc {
-
-/// An IndexHandle is a simple wrapper around a ValueHandle.
-/// IndexHandles are ubiquitous enough to justify a new type to allow simple
-/// declarations without boilerplate such as:
-///
-/// ```c++
-///    IndexHandle i, j, k;
-/// ```
-struct IndexHandle : public ValueHandle {
-  explicit IndexHandle()
-      : ValueHandle(ScopedContext::getBuilder().getIndexType()) {}
-  explicit IndexHandle(index_t v) : ValueHandle(v) {}
-  explicit IndexHandle(Value *v) : ValueHandle(v) {
-    assert(v->getType() == ScopedContext::getBuilder().getIndexType() &&
-           "Expected index type");
-  }
-  explicit IndexHandle(ValueHandle v) : ValueHandle(v) {
-    assert(v.getType() == ScopedContext::getBuilder().getIndexType() &&
-           "Expected index type");
-  }
-  IndexHandle &operator=(const ValueHandle &v) {
-    assert(v.getType() == ScopedContext::getBuilder().getIndexType() &&
-           "Expected index type");
-    /// Creating a new IndexHandle(v) and then std::swap rightly complains the
-    /// binding has already occurred and that we should use another name.
-    this->t = v.getType();
-    this->v = v.getValue();
-    return *this;
-  }
-};
-
-inline SmallVector<IndexHandle, 8> makeIndexHandles(unsigned rank) {
-  return SmallVector<IndexHandle, 8>(rank);
-}
-
-/// Entry point to build multiple ValueHandle* from a mutable list `ivs` of T.
-template <typename T>
-inline SmallVector<ValueHandle *, 8>
-makeHandlePointers(MutableArrayRef<T> ivs) {
-  SmallVector<ValueHandle *, 8> pivs;
-  pivs.reserve(ivs.size());
-  for (auto &iv : ivs) {
-    pivs.push_back(&iv);
-  }
-  return pivs;
-}
-
-/// Returns a vector of the underlying Value* from `ivs`.
-inline SmallVector<Value *, 8> extractValues(ArrayRef<IndexHandle> ivs) {
-  SmallVector<Value *, 8> vals;
-  vals.reserve(ivs.size());
-  for (auto &iv : ivs) {
-    vals.push_back(iv.getValue());
-  }
-  return vals;
-}
-
-/// Provides a set of first class intrinsics.
-/// In the future, most of intrinsics related to Operation that don't contain
-/// other operations should be Tablegen'd.
-namespace intrinsics {
-namespace detail {
-/// Helper structure to be used with ValueBuilder / OperationBuilder.
-/// It serves the purpose of removing boilerplate specialization for the sole
-/// purpose of implicitly converting ArrayRef<ValueHandle> -> ArrayRef<Value*>.
-class ValueHandleArray {
-public:
-  ValueHandleArray(ArrayRef<ValueHandle> vals) {
-    values.append(vals.begin(), vals.end());
-  }
-  ValueHandleArray(ArrayRef<IndexHandle> vals) {
-    values.append(vals.begin(), vals.end());
-  }
-  ValueHandleArray(ArrayRef<index_t> vals) {
-    llvm::SmallVector<IndexHandle, 8> tmp(vals.begin(), vals.end());
-    values.append(tmp.begin(), tmp.end());
-  }
-  operator ArrayRef<Value *>() { return values; }
-
-private:
-  ValueHandleArray() = default;
-  llvm::SmallVector<Value *, 8> values;
-};
-
-template <typename T> inline T unpack(T value) { return value; }
-
-inline detail::ValueHandleArray unpack(ArrayRef<ValueHandle> values) {
-  return detail::ValueHandleArray(values);
-}
-
-} // namespace detail
-
-/// Helper variadic abstraction to allow extending to any MLIR op without
-/// boilerplate or Tablegen.
-/// Arguably a builder is not a ValueHandle but in practice it is only used as
-/// an alias to a notional ValueHandle<Op>.
-/// Implementing it as a subclass allows it to compose all the way to Value*.
-/// Without subclassing, implicit conversion to Value* would fail when composing
-/// in patterns such as: `select(a, b, select(c, d, e))`.
-template <typename Op> struct ValueBuilder : public ValueHandle {
-  // Builder-based
-  template <typename... Args>
-  ValueBuilder(Args... args)
-      : ValueHandle(ValueHandle::create<Op>(detail::unpack(args)...)) {}
-  ValueBuilder(ArrayRef<ValueHandle> vs)
-      : ValueBuilder(ValueBuilder::create<Op>(detail::unpack(vs))) {}
-  template <typename... Args>
-  ValueBuilder(ArrayRef<ValueHandle> vs, Args... args)
-      : ValueHandle(ValueHandle::create<Op>(detail::unpack(vs),
-                                            detail::unpack(args)...)) {}
-  template <typename T, typename... Args>
-  ValueBuilder(T t, ArrayRef<ValueHandle> vs, Args... args)
-      : ValueHandle(ValueHandle::create<Op>(
-            detail::unpack(t), detail::unpack(vs), detail::unpack(args)...)) {}
-  template <typename T1, typename T2, typename... Args>
-  ValueBuilder(T1 t1, T2 t2, ArrayRef<ValueHandle> vs, Args... args)
-      : ValueHandle(ValueHandle::create<Op>(
-            detail::unpack(t1), detail::unpack(t2), detail::unpack(vs),
-            detail::unpack(args)...)) {}
-
-  /// Folder-based
-  template <typename... Args>
-  ValueBuilder(OperationFolder &folder, Args... args)
-      : ValueHandle(ValueHandle::create<Op>(folder, detail::unpack(args)...)) {}
-  ValueBuilder(OperationFolder &folder, ArrayRef<ValueHandle> vs)
-      : ValueBuilder(ValueBuilder::create<Op>(folder, detail::unpack(vs))) {}
-  template <typename... Args>
-  ValueBuilder(OperationFolder &folder, ArrayRef<ValueHandle> vs, Args... args)
-      : ValueHandle(ValueHandle::create<Op>(folder, detail::unpack(vs),
-                                            detail::unpack(args)...)) {}
-  template <typename T, typename... Args>
-  ValueBuilder(OperationFolder &folder, T t, ArrayRef<ValueHandle> vs,
-               Args... args)
-      : ValueHandle(ValueHandle::create<Op>(folder, detail::unpack(t),
-                                            detail::unpack(vs),
-                                            detail::unpack(args)...)) {}
-  template <typename T1, typename T2, typename... Args>
-  ValueBuilder(OperationFolder &folder, T1 t1, T2 t2, ArrayRef<ValueHandle> vs,
-               Args... args)
-      : ValueHandle(ValueHandle::create<Op>(
-            folder, detail::unpack(t1), detail::unpack(t2), detail::unpack(vs),
-            detail::unpack(args)...)) {}
-
-  ValueBuilder() : ValueHandle(ValueHandle::create<Op>()) {}
-};
-
-template <typename Op> struct OperationBuilder : public OperationHandle {
-  template <typename... Args>
-  OperationBuilder(Args... args)
-      : OperationHandle(OperationHandle::create<Op>(detail::unpack(args)...)) {}
-  OperationBuilder(ArrayRef<ValueHandle> vs)
-      : OperationHandle(OperationHandle::create<Op>(detail::unpack(vs))) {}
-  template <typename... Args>
-  OperationBuilder(ArrayRef<ValueHandle> vs, Args... args)
-      : OperationHandle(OperationHandle::create<Op>(detail::unpack(vs),
-                                                    detail::unpack(args)...)) {}
-  template <typename T, typename... Args>
-  OperationBuilder(T t, ArrayRef<ValueHandle> vs, Args... args)
-      : OperationHandle(OperationHandle::create<Op>(
-            detail::unpack(t), detail::unpack(vs), detail::unpack(args)...)) {}
-  template <typename T1, typename T2, typename... Args>
-  OperationBuilder(T1 t1, T2 t2, ArrayRef<ValueHandle> vs, Args... args)
-      : OperationHandle(OperationHandle::create<Op>(
-            detail::unpack(t1), detail::unpack(t2), detail::unpack(vs),
-            detail::unpack(args)...)) {}
-  OperationBuilder() : OperationHandle(OperationHandle::create<Op>()) {}
-};
-
-using affine_apply = ValueBuilder<AffineApplyOp>;
-using affine_if = OperationBuilder<AffineIfOp>;
-using affine_load = ValueBuilder<AffineLoadOp>;
-using affine_store = OperationBuilder<AffineStoreOp>;
-using alloc = ValueBuilder<AllocOp>;
-using call = OperationBuilder<mlir::CallOp>;
-using constant_float = ValueBuilder<ConstantFloatOp>;
-using constant_index = ValueBuilder<ConstantIndexOp>;
-using constant_int = ValueBuilder<ConstantIntOp>;
-using dealloc = OperationBuilder<DeallocOp>;
-using dim = ValueBuilder<DimOp>;
-using muli = ValueBuilder<MulIOp>;
-using ret = OperationBuilder<ReturnOp>;
-using select = ValueBuilder<SelectOp>;
-using std_load = ValueBuilder<LoadOp>;
-using std_store = OperationBuilder<StoreOp>;
-using subi = ValueBuilder<SubIOp>;
-using view = ValueBuilder<ViewOp>;
-
-/// Branches into the mlir::Block* captured by BlockHandle `b` with `operands`.
-///
-/// Prerequisites:
-///   All Handles have already captured previously constructed IR objects.
-OperationHandle br(BlockHandle bh, ArrayRef<ValueHandle> operands);
-
-/// Creates a new mlir::Block* and branches to it from the current block.
-/// Argument types are specified by `operands`.
-/// Captures the new block in `bh` and the actual `operands` in `captures`. To
-/// insert the new mlir::Block*, a local ScopedContext is constructed and
-/// released to the current block. The branch operation is then added to the
-/// new block.
-///
-/// Prerequisites:
-///   `b` has not yet captured an mlir::Block*.
-///   No `captures` have captured any mlir::Value*.
-///   All `operands` have already captured an mlir::Value*
-///   captures.size() == operands.size()
-///   captures and operands are pairwise of the same type.
-OperationHandle br(BlockHandle *bh, ArrayRef<ValueHandle *> captures,
-                   ArrayRef<ValueHandle> operands);
-
-/// Branches into the mlir::Block* captured by BlockHandle `trueBranch` with
-/// `trueOperands` if `cond` evaluates to `true` (resp. `falseBranch` and
-/// `falseOperand` if `cond` evaluates to `false`).
-///
-/// Prerequisites:
-///   All Handles have captured previously constructed IR objects.
-OperationHandle cond_br(ValueHandle cond, BlockHandle trueBranch,
-                        ArrayRef<ValueHandle> trueOperands,
-                        BlockHandle falseBranch,
-                        ArrayRef<ValueHandle> falseOperands);
-
-/// Eagerly creates new mlir::Block* with argument types specified by
-/// `trueOperands`/`falseOperands`.
-/// Captures the new blocks in `trueBranch`/`falseBranch` and the arguments in
-/// `trueCaptures/falseCaptures`.
-/// To insert the new mlir::Block*, a local ScopedContext is constructed and
-/// released. The branch operation is then added in the original location and
-/// targeting the eagerly constructed blocks.
-///
-/// Prerequisites:
-///   `trueBranch`/`falseBranch` has not yet captured an mlir::Block*.
-///   No `trueCaptures`/`falseCaptures` have captured any mlir::Value*.
-///   All `trueOperands`/`trueOperands` have already captured an mlir::Value*
-///   `trueCaptures`.size() == `trueOperands`.size()
-///   `falseCaptures`.size() == `falseOperands`.size()
-///   `trueCaptures` and `trueOperands` are pairwise of the same type
-///   `falseCaptures` and `falseOperands` are pairwise of the same type.
-OperationHandle cond_br(ValueHandle cond, BlockHandle *trueBranch,
-                        ArrayRef<ValueHandle *> trueCaptures,
-                        ArrayRef<ValueHandle> trueOperands,
-                        BlockHandle *falseBranch,
-                        ArrayRef<ValueHandle *> falseCaptures,
-                        ArrayRef<ValueHandle> falseOperands);
-} // namespace intrinsics
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_EDSC_INTRINSICS_H_
diff --git a/third_party/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h b/third_party/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
deleted file mode 100644
index 23a8764db1d..00000000000
--- a/third_party/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
+++ /dev/null
@@ -1,135 +0,0 @@
-//===- ExecutionEngine.h - MLIR Execution engine and utils -----*- C++ -*--===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file provides a JIT-backed execution engine for MLIR modules.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_EXECUTIONENGINE_EXECUTIONENGINE_H_
-#define MLIR_EXECUTIONENGINE_EXECUTIONENGINE_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ExecutionEngine/ObjectCache.h"
-#include "llvm/ExecutionEngine/Orc/LLJIT.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Error.h"
-
-#include <functional>
-#include <memory>
-
-namespace llvm {
-template <typename T> class Expected;
-class Module;
-class ExecutionEngine;
-class MemoryBuffer;
-} // namespace llvm
-
-namespace mlir {
-
-class ModuleOp;
-
-/// A simple object cache following Lang's LLJITWithObjectCache example.
-class SimpleObjectCache : public llvm::ObjectCache {
-public:
-  void notifyObjectCompiled(const llvm::Module *M,
-                            llvm::MemoryBufferRef ObjBuffer) override;
-  std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module *M) override;
-
-  /// Dump cached object to output file `filename`.
-  void dumpToObjectFile(llvm::StringRef filename);
-
-private:
-  llvm::StringMap<std::unique_ptr<llvm::MemoryBuffer>> cachedObjects;
-};
-
-/// JIT-backed execution engine for MLIR modules.  Assumes the module can be
-/// converted to LLVM IR.  For each function, creates a wrapper function with
-/// the fixed interface
-///
-///     void _mlir_funcName(void **)
-///
-/// where the only argument is interpreted as a list of pointers to the actual
-/// arguments of the function, followed by a pointer to the result.  This allows
-/// the engine to provide the caller with a generic function pointer that can
-/// be used to invoke the JIT-compiled function.
-class ExecutionEngine {
-public:
-  ExecutionEngine(bool enableObjectCache);
-
-  /// Creates an execution engine for the given module.  If `transformer` is
-  /// provided, it will be called on the LLVM module during JIT-compilation and
-  /// can be used, e.g., for reporting or optimization. `jitCodeGenOptLevel`,
-  /// when provided, is used as the optimization level for target code
-  /// generation. If `sharedLibPaths` are provided, the underlying
-  /// JIT-compilation will open and link the shared libraries for symbol
-  /// resolution. If `objectCache` is provided, JIT compiler will use it to
-  /// store the object generated for the given module.
-  static llvm::Expected<std::unique_ptr<ExecutionEngine>> create(
-      ModuleOp m, std::function<llvm::Error(llvm::Module *)> transformer = {},
-      Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel = llvm::None,
-      ArrayRef<StringRef> sharedLibPaths = {}, bool enableObjectCache = false);
-
-  /// Looks up a packed-argument function with the given name and returns a
-  /// pointer to it.  Propagates errors in case of failure.
-  llvm::Expected<void (*)(void **)> lookup(StringRef name) const;
-
-  /// Invokes the function with the given name passing it the list of arguments.
-  /// The arguments are accepted by lvalue-reference since the packed function
-  /// interface expects a list of non-null pointers.
-  template <typename... Args>
-  llvm::Error invoke(StringRef name, Args &... args);
-
-  /// Invokes the function with the given name passing it the list of arguments
-  /// as a list of opaque pointers. This is the arity-agnostic equivalent of
-  /// the templated `invoke`.
-  llvm::Error invoke(StringRef name, MutableArrayRef<void *> args);
-
-  /// Set the target triple on the module. This is implicitly done when creating
-  /// the engine.
-  static bool setupTargetTriple(llvm::Module *llvmModule);
-
-  /// Dump object code to output file `filename`.
-  void dumpToObjectFile(llvm::StringRef filename);
-
-private:
-  // Ordering of llvmContext and jit is important for destruction purposes: the
-  // jit must be destroyed before the context.
-  llvm::LLVMContext llvmContext;
-
-  // Underlying LLJIT.
-  std::unique_ptr<llvm::orc::LLJIT> jit;
-
-  // Underlying cache.
-  std::unique_ptr<SimpleObjectCache> cache;
-};
-
-template <typename... Args>
-llvm::Error ExecutionEngine::invoke(StringRef name, Args &... args) {
-  auto expectedFPtr = lookup(name);
-  if (!expectedFPtr)
-    return expectedFPtr.takeError();
-  auto fptr = *expectedFPtr;
-
-  llvm::SmallVector<void *, 8> packedArgs{static_cast<void *>(&args)...};
-  (*fptr)(packedArgs.data());
-
-  return llvm::Error::success();
-}
-
-} // end namespace mlir
-
-#endif // MLIR_EXECUTIONENGINE_EXECUTIONENGINE_H_
diff --git a/third_party/mlir/include/mlir/ExecutionEngine/OptUtils.h b/third_party/mlir/include/mlir/ExecutionEngine/OptUtils.h
deleted file mode 100644
index 8c0249d5c09..00000000000
--- a/third_party/mlir/include/mlir/ExecutionEngine/OptUtils.h
+++ /dev/null
@@ -1,66 +0,0 @@
-//===- OptUtils.h - MLIR Execution Engine opt pass utilities ----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the utility functions to trigger LLVM optimizations from
-// MLIR Execution Engine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_EXECUTIONENGINE_OPTUTILS_H_
-#define MLIR_EXECUTIONENGINE_OPTUTILS_H_
-
-#include "llvm/Pass.h"
-
-#include <functional>
-#include <string>
-
-namespace llvm {
-class Module;
-class Error;
-class TargetMachine;
-} // namespace llvm
-
-namespace mlir {
-
-/// Initialize LLVM passes that can be when running MLIR code using
-/// ExecutionEngine.
-void initializeLLVMPasses();
-
-/// Create a module transformer function for MLIR ExecutionEngine that runs
-/// LLVM IR passes corresponding to the given speed and size optimization
-/// levels (e.g. -O2 or -Os). If not null, `targetMachine` is used to
-/// initialize passes that provide target-specific information to the LLVM
-/// optimizer. `targetMachine` must outlive the returned std::function.
-std::function<llvm::Error(llvm::Module *)>
-makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel,
-                          llvm::TargetMachine *targetMachine);
-
-/// Create a module transformer function for MLIR ExecutionEngine that runs
-/// LLVM IR passes explicitly specified, plus an optional optimization level,
-/// Any optimization passes, if present, will be inserted before the pass at
-/// position optPassesInsertPos. If not null, `targetMachine` is used to
-/// initialize passes that provide target-specific information to the LLVM
-/// optimizer. `targetMachine` must outlive the returned std::function.
-std::function<llvm::Error(llvm::Module *)>
-makeLLVMPassesTransformer(llvm::ArrayRef<const llvm::PassInfo *> llvmPasses,
-                          llvm::Optional<unsigned> mbOptLevel,
-                          llvm::TargetMachine *targetMachine,
-                          unsigned optPassesInsertPos = 0);
-
-} // end namespace mlir
-
-#endif // LIR_EXECUTIONENGINE_OPTUTILS_H_
diff --git a/third_party/mlir/include/mlir/IR/AffineExpr.h b/third_party/mlir/include/mlir/IR/AffineExpr.h
deleted file mode 100644
index 37e7592697c..00000000000
--- a/third_party/mlir/include/mlir/IR/AffineExpr.h
+++ /dev/null
@@ -1,330 +0,0 @@
-//===- AffineExpr.h - MLIR Affine Expr Class --------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// An affine expression is an affine combination of dimension identifiers and
-// symbols, including ceildiv/floordiv/mod by a constant integer.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_AFFINE_EXPR_H
-#define MLIR_IR_AFFINE_EXPR_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/Support/Casting.h"
-#include <type_traits>
-
-namespace mlir {
-
-class MLIRContext;
-class AffineMap;
-class IntegerSet;
-
-namespace detail {
-
-struct AffineExprStorage;
-struct AffineBinaryOpExprStorage;
-struct AffineDimExprStorage;
-struct AffineSymbolExprStorage;
-struct AffineConstantExprStorage;
-
-} // namespace detail
-
-enum class AffineExprKind {
-  Add,
-  /// RHS of mul is always a constant or a symbolic expression.
-  Mul,
-  /// RHS of mod is always a constant or a symbolic expression with a positive
-  /// value.
-  Mod,
-  /// RHS of floordiv is always a constant or a symbolic expression.
-  FloorDiv,
-  /// RHS of ceildiv is always a constant or a symbolic expression.
-  CeilDiv,
-
-  /// This is a marker for the last affine binary op. The range of binary
-  /// op's is expected to be this element and earlier.
-  LAST_AFFINE_BINARY_OP = CeilDiv,
-
-  /// Constant integer.
-  Constant,
-  /// Dimensional identifier.
-  DimId,
-  /// Symbolic identifier.
-  SymbolId,
-};
-
-/// Base type for affine expression.
-/// AffineExpr's are immutable value types with intuitive operators to
-/// operate on chainable, lightweight compositions.
-/// An AffineExpr is an interface to the underlying storage type pointer.
-class AffineExpr {
-public:
-  using ImplType = detail::AffineExprStorage;
-
-  AffineExpr() : expr(nullptr) {}
-  /* implicit */ AffineExpr(const ImplType *expr)
-      : expr(const_cast<ImplType *>(expr)) {}
-
-  AffineExpr(const AffineExpr &other) : expr(other.expr) {}
-  AffineExpr &operator=(AffineExpr other) {
-    expr = other.expr;
-    return *this;
-  }
-
-  bool operator==(AffineExpr other) const { return expr == other.expr; }
-  bool operator!=(AffineExpr other) const { return !(*this == other); }
-  bool operator==(int64_t v) const;
-  bool operator!=(int64_t v) const { return !(*this == v); }
-  explicit operator bool() const { return expr; }
-
-  bool operator!() const { return expr == nullptr; }
-
-  template <typename U> bool isa() const;
-  template <typename U> U dyn_cast() const;
-  template <typename U> U cast() const;
-
-  MLIRContext *getContext() const;
-
-  /// Return the classification for this type.
-  AffineExprKind getKind() const;
-
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-  /// Returns true if this expression is made out of only symbols and
-  /// constants, i.e., it does not involve dimensional identifiers.
-  bool isSymbolicOrConstant() const;
-
-  /// Returns true if this is a pure affine expression, i.e., multiplication,
-  /// floordiv, ceildiv, and mod is only allowed w.r.t constants.
-  bool isPureAffine() const;
-
-  /// Returns the greatest known integral divisor of this affine expression. The
-  /// result is always positive.
-  int64_t getLargestKnownDivisor() const;
-
-  /// Return true if the affine expression is a multiple of 'factor'.
-  bool isMultipleOf(int64_t factor) const;
-
-  /// Return true if the affine expression involves AffineDimExpr `position`.
-  bool isFunctionOfDim(unsigned position) const;
-
-  /// Walk all of the AffineExpr's in this expression in postorder.
-  void walk(std::function<void(AffineExpr)> callback) const;
-
-  /// This method substitutes any uses of dimensions and symbols (e.g.
-  /// dim#0 with dimReplacements[0]) and returns the modified expression tree.
-  AffineExpr replaceDimsAndSymbols(ArrayRef<AffineExpr> dimReplacements,
-                                   ArrayRef<AffineExpr> symReplacements) const;
-
-  AffineExpr operator+(int64_t v) const;
-  AffineExpr operator+(AffineExpr other) const;
-  AffineExpr operator-() const;
-  AffineExpr operator-(int64_t v) const;
-  AffineExpr operator-(AffineExpr other) const;
-  AffineExpr operator*(int64_t v) const;
-  AffineExpr operator*(AffineExpr other) const;
-  AffineExpr floorDiv(uint64_t v) const;
-  AffineExpr floorDiv(AffineExpr other) const;
-  AffineExpr ceilDiv(uint64_t v) const;
-  AffineExpr ceilDiv(AffineExpr other) const;
-  AffineExpr operator%(uint64_t v) const;
-  AffineExpr operator%(AffineExpr other) const;
-
-  /// Compose with an AffineMap.
-  /// Returns the composition of this AffineExpr with `map`.
-  ///
-  /// Prerequisites:
-  /// `this` and `map` are composable, i.e. that the number of AffineDimExpr of
-  /// `this` is smaller than the number of results of `map`. If a result of a
-  /// map does not have a corresponding AffineDimExpr, that result simply does
-  /// not appear in the produced AffineExpr.
-  ///
-  /// Example:
-  ///   expr: `d0 + d2`
-  ///   map:  `(d0, d1, d2)[s0, s1] -> (d0 + s1, d1 + s0, d0 + d1 + d2)`
-  ///   returned expr: `d0 * 2 + d1 + d2 + s1`
-  AffineExpr compose(AffineMap map) const;
-
-  friend ::llvm::hash_code hash_value(AffineExpr arg);
-
-protected:
-  ImplType *expr;
-};
-
-/// Affine binary operation expression. An affine binary operation could be an
-/// add, mul, floordiv, ceildiv, or a modulo operation. (Subtraction is
-/// represented through a multiply by -1 and add.) These expressions are always
-/// constructed in a simplified form. For eg., the LHS and RHS operands can't
-/// both be constants. There are additional canonicalizing rules depending on
-/// the op type: see checks in the constructor.
-class AffineBinaryOpExpr : public AffineExpr {
-public:
-  using ImplType = detail::AffineBinaryOpExprStorage;
-  /* implicit */ AffineBinaryOpExpr(AffineExpr::ImplType *ptr);
-  AffineExpr getLHS() const;
-  AffineExpr getRHS() const;
-};
-
-/// A dimensional identifier appearing in an affine expression.
-class AffineDimExpr : public AffineExpr {
-public:
-  using ImplType = detail::AffineDimExprStorage;
-  /* implicit */ AffineDimExpr(AffineExpr::ImplType *ptr);
-  unsigned getPosition() const;
-};
-
-/// A symbolic identifier appearing in an affine expression.
-class AffineSymbolExpr : public AffineExpr {
-public:
-  using ImplType = detail::AffineDimExprStorage;
-  /* implicit */ AffineSymbolExpr(AffineExpr::ImplType *ptr);
-  unsigned getPosition() const;
-};
-
-/// An integer constant appearing in affine expression.
-class AffineConstantExpr : public AffineExpr {
-public:
-  using ImplType = detail::AffineConstantExprStorage;
-  /* implicit */ AffineConstantExpr(AffineExpr::ImplType *ptr);
-  int64_t getValue() const;
-};
-
-/// Make AffineExpr hashable.
-inline ::llvm::hash_code hash_value(AffineExpr arg) {
-  return ::llvm::hash_value(arg.expr);
-}
-
-inline AffineExpr operator+(int64_t val, AffineExpr expr) { return expr + val; }
-inline AffineExpr operator*(int64_t val, AffineExpr expr) { return expr * val; }
-inline AffineExpr operator-(int64_t val, AffineExpr expr) {
-  return expr * (-1) + val;
-}
-
-/// These free functions allow clients of the API to not use classes in detail.
-AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context);
-AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context);
-AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context);
-AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs,
-                                 AffineExpr rhs);
-
-/// Constructs an affine expression from a flat ArrayRef. If there are local
-/// identifiers (neither dimensional nor symbolic) that appear in the sum of
-/// products expression, 'localExprs' is expected to have the AffineExpr
-/// for it, and is substituted into. The ArrayRef 'eq' is expected to be in the
-/// format [dims, symbols, locals, constant term].
-AffineExpr toAffineExpr(ArrayRef<int64_t> eq, unsigned numDims,
-                        unsigned numSymbols, ArrayRef<AffineExpr> localExprs,
-                        MLIRContext *context);
-
-raw_ostream &operator<<(raw_ostream &os, AffineExpr &expr);
-
-template <typename U> bool AffineExpr::isa() const {
-  if (std::is_same<U, AffineBinaryOpExpr>::value) {
-    return getKind() <= AffineExprKind::LAST_AFFINE_BINARY_OP;
-  }
-  if (std::is_same<U, AffineDimExpr>::value) {
-    return getKind() == AffineExprKind::DimId;
-  }
-  if (std::is_same<U, AffineSymbolExpr>::value) {
-    return getKind() == AffineExprKind::SymbolId;
-  }
-  if (std::is_same<U, AffineConstantExpr>::value) {
-    return getKind() == AffineExprKind::Constant;
-  }
-}
-template <typename U> U AffineExpr::dyn_cast() const {
-  if (isa<U>()) {
-    return U(expr);
-  }
-  return U(nullptr);
-}
-template <typename U> U AffineExpr::cast() const {
-  assert(isa<U>());
-  return U(expr);
-}
-
-/// Simplify an affine expression by flattening and some amount of
-/// simple analysis. This has complexity linear in the number of nodes in
-/// 'expr'. Returns the simplified expression, which is the same as the input
-///  expression if it can't be simplified.
-AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims,
-                              unsigned numSymbols);
-
-/// Flattens 'expr' into 'flattenedExpr'. Returns true on success or false
-/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled).
-/// See documentation for AffineExprFlattener on how mod's and div's are
-/// flattened.
-bool getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
-                            unsigned numSymbols,
-                            llvm::SmallVectorImpl<int64_t> *flattenedExpr);
-
-/// Flattens the result expressions of the map to their corresponding flattened
-/// forms and set in 'flattenedExprs'. Returns true on success or false
-/// if any expression in the map could not be flattened (i.e., semi-affine is
-/// not yet handled).  For all affine expressions that share the same operands
-/// (like those of an affine map), this method should be used instead of
-/// repeatedly calling getFlattenedAffineExpr since local variables added to
-/// deal with div's and mod's will be reused across expressions.
-bool getFlattenedAffineExprs(
-    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs);
-bool getFlattenedAffineExprs(
-    IntegerSet set, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs);
-
-namespace detail {
-template <int N> void bindDims(MLIRContext *ctx) {}
-
-template <int N, typename AffineExprTy, typename... AffineExprTy2>
-void bindDims(MLIRContext *ctx, AffineExprTy &e, AffineExprTy2 &... exprs) {
-  e = getAffineDimExpr(N, ctx);
-  bindDims<N + 1, AffineExprTy2 &...>(ctx, exprs...);
-}
-} // namespace detail
-
-/// Bind a list of AffineExpr references to DimExpr at positions:
-///   [0 .. sizeof...(exprs)]
-template <typename... AffineExprTy>
-void bindDims(MLIRContext *ctx, AffineExprTy &... exprs) {
-  detail::bindDims<0>(ctx, exprs...);
-}
-
-} // namespace mlir
-
-namespace llvm {
-
-// AffineExpr hash just like pointers
-template <> struct DenseMapInfo<mlir::AffineExpr> {
-  static mlir::AffineExpr getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::AffineExpr(static_cast<mlir::AffineExpr::ImplType *>(pointer));
-  }
-  static mlir::AffineExpr getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::AffineExpr(static_cast<mlir::AffineExpr::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::AffineExpr val) {
-    return mlir::hash_value(val);
-  }
-  static bool isEqual(mlir::AffineExpr LHS, mlir::AffineExpr RHS) {
-    return LHS == RHS;
-  }
-};
-
-} // namespace llvm
-
-#endif // MLIR_IR_AFFINE_EXPR_H
diff --git a/third_party/mlir/include/mlir/IR/AffineExprVisitor.h b/third_party/mlir/include/mlir/IR/AffineExprVisitor.h
deleted file mode 100644
index 9fa40218b5f..00000000000
--- a/third_party/mlir/include/mlir/IR/AffineExprVisitor.h
+++ /dev/null
@@ -1,334 +0,0 @@
-//===- AffineExprVisitor.h - MLIR AffineExpr Visitor Class ------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the AffineExpr visitor class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_AFFINE_EXPR_VISITOR_H
-#define MLIR_IR_AFFINE_EXPR_VISITOR_H
-
-#include "mlir/IR/AffineExpr.h"
-
-namespace mlir {
-
-/// Base class for AffineExpr visitors/walkers.
-///
-/// AffineExpr visitors are used when you want to perform different actions
-/// for different kinds of AffineExprs without having to use lots of casts
-/// and a big switch instruction.
-///
-/// To define your own visitor, inherit from this class, specifying your
-/// new type for the 'SubClass' template parameter, and "override" visitXXX
-/// functions in your class. This class is defined in terms of statically
-/// resolved overloading, not virtual functions.
-///
-/// For example, here is a visitor that counts the number of for AffineDimExprs
-/// in an AffineExpr.
-///
-///  /// Declare the class.  Note that we derive from AffineExprVisitor
-///  /// instantiated with our new subclasses_ type.
-///
-///  struct DimExprCounter : public AffineExprVisitor<DimExprCounter> {
-///    unsigned numDimExprs;
-///    DimExprCounter() : numDimExprs(0) {}
-///    void visitDimExpr(AffineDimExpr expr) { ++numDimExprs; }
-///  };
-///
-///  And this class would be used like this:
-///    DimExprCounter dec;
-///    dec.visit(affineExpr);
-///    numDimExprs = dec.numDimExprs;
-///
-/// AffineExprVisitor provides visit methods for the following binary affine
-/// op expressions:
-/// AffineBinaryAddOpExpr, AffineBinaryMulOpExpr,
-/// AffineBinaryModOpExpr, AffineBinaryFloorDivOpExpr,
-/// AffineBinaryCeilDivOpExpr. Note that default implementations of these
-/// methods will call the general AffineBinaryOpExpr method.
-///
-/// In addition, visit methods are provided for the following affine
-//  expressions: AffineConstantExpr, AffineDimExpr, and
-//  AffineSymbolExpr.
-///
-/// Note that if you don't implement visitXXX for some affine expression type,
-/// the visitXXX method for Instruction superclass will be invoked.
-///
-/// Note that this class is specifically designed as a template to avoid
-/// virtual function call overhead. Defining and using a AffineExprVisitor is
-/// just as efficient as having your own switch instruction over the instruction
-/// opcode.
-
-template <typename SubClass, typename RetTy = void> class AffineExprVisitor {
-  //===--------------------------------------------------------------------===//
-  // Interface code - This is the public interface of the AffineExprVisitor
-  // that you use to visit affine expressions...
-public:
-  // Function to walk an AffineExpr (in post order).
-  RetTy walkPostOrder(AffineExpr expr) {
-    static_assert(std::is_base_of<AffineExprVisitor, SubClass>::value,
-                  "Must instantiate with a derived type of AffineExprVisitor");
-    switch (expr.getKind()) {
-    case AffineExprKind::Add: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      walkOperandsPostOrder(binOpExpr);
-      return static_cast<SubClass *>(this)->visitAddExpr(binOpExpr);
-    }
-    case AffineExprKind::Mul: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      walkOperandsPostOrder(binOpExpr);
-      return static_cast<SubClass *>(this)->visitMulExpr(binOpExpr);
-    }
-    case AffineExprKind::Mod: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      walkOperandsPostOrder(binOpExpr);
-      return static_cast<SubClass *>(this)->visitModExpr(binOpExpr);
-    }
-    case AffineExprKind::FloorDiv: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      walkOperandsPostOrder(binOpExpr);
-      return static_cast<SubClass *>(this)->visitFloorDivExpr(binOpExpr);
-    }
-    case AffineExprKind::CeilDiv: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      walkOperandsPostOrder(binOpExpr);
-      return static_cast<SubClass *>(this)->visitCeilDivExpr(binOpExpr);
-    }
-    case AffineExprKind::Constant:
-      return static_cast<SubClass *>(this)->visitConstantExpr(
-          expr.cast<AffineConstantExpr>());
-    case AffineExprKind::DimId:
-      return static_cast<SubClass *>(this)->visitDimExpr(
-          expr.cast<AffineDimExpr>());
-    case AffineExprKind::SymbolId:
-      return static_cast<SubClass *>(this)->visitSymbolExpr(
-          expr.cast<AffineSymbolExpr>());
-    }
-  }
-
-  // Function to visit an AffineExpr.
-  RetTy visit(AffineExpr expr) {
-    static_assert(std::is_base_of<AffineExprVisitor, SubClass>::value,
-                  "Must instantiate with a derived type of AffineExprVisitor");
-    switch (expr.getKind()) {
-    case AffineExprKind::Add: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      return static_cast<SubClass *>(this)->visitAddExpr(binOpExpr);
-    }
-    case AffineExprKind::Mul: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      return static_cast<SubClass *>(this)->visitMulExpr(binOpExpr);
-    }
-    case AffineExprKind::Mod: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      return static_cast<SubClass *>(this)->visitModExpr(binOpExpr);
-    }
-    case AffineExprKind::FloorDiv: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      return static_cast<SubClass *>(this)->visitFloorDivExpr(binOpExpr);
-    }
-    case AffineExprKind::CeilDiv: {
-      auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-      return static_cast<SubClass *>(this)->visitCeilDivExpr(binOpExpr);
-    }
-    case AffineExprKind::Constant:
-      return static_cast<SubClass *>(this)->visitConstantExpr(
-          expr.cast<AffineConstantExpr>());
-    case AffineExprKind::DimId:
-      return static_cast<SubClass *>(this)->visitDimExpr(
-          expr.cast<AffineDimExpr>());
-    case AffineExprKind::SymbolId:
-      return static_cast<SubClass *>(this)->visitSymbolExpr(
-          expr.cast<AffineSymbolExpr>());
-    }
-    llvm_unreachable("Unknown AffineExpr");
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Visitation functions... these functions provide default fallbacks in case
-  // the user does not specify what to do for a particular instruction type.
-  // The default behavior is to generalize the instruction type to its subtype
-  // and try visiting the subtype.  All of this should be inlined perfectly,
-  // because there are no virtual functions to get in the way.
-  //
-
-  // Default visit methods. Note that the default op-specific binary op visit
-  // methods call the general visitAffineBinaryOpExpr visit method.
-  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {}
-  void visitAddExpr(AffineBinaryOpExpr expr) {
-    static_cast<SubClass *>(this)->visitAffineBinaryOpExpr(expr);
-  }
-  void visitMulExpr(AffineBinaryOpExpr expr) {
-    static_cast<SubClass *>(this)->visitAffineBinaryOpExpr(expr);
-  }
-  void visitModExpr(AffineBinaryOpExpr expr) {
-    static_cast<SubClass *>(this)->visitAffineBinaryOpExpr(expr);
-  }
-  void visitFloorDivExpr(AffineBinaryOpExpr expr) {
-    static_cast<SubClass *>(this)->visitAffineBinaryOpExpr(expr);
-  }
-  void visitCeilDivExpr(AffineBinaryOpExpr expr) {
-    static_cast<SubClass *>(this)->visitAffineBinaryOpExpr(expr);
-  }
-  void visitConstantExpr(AffineConstantExpr expr) {}
-  void visitDimExpr(AffineDimExpr expr) {}
-  void visitSymbolExpr(AffineSymbolExpr expr) {}
-
-private:
-  // Walk the operands - each operand is itself walked in post order.
-  void walkOperandsPostOrder(AffineBinaryOpExpr expr) {
-    walkPostOrder(expr.getLHS());
-    walkPostOrder(expr.getRHS());
-  }
-};
-
-// This class is used to flatten a pure affine expression (AffineExpr,
-// which is in a tree form) into a sum of products (w.r.t constants) when
-// possible, and in that process simplifying the expression. For a modulo,
-// floordiv, or a ceildiv expression, an additional identifier, called a local
-// identifier, is introduced to rewrite the expression as a sum of product
-// affine expression. Each local identifier is always and by construction a
-// floordiv of a pure add/mul affine function of dimensional, symbolic, and
-// other local identifiers, in a non-mutually recursive way. Hence, every local
-// identifier can ultimately always be recovered as an affine function of
-// dimensional and symbolic identifiers (involving floordiv's); note however
-// that by AffineExpr construction, some floordiv combinations are converted to
-// mod's. The result of the flattening is a flattened expression and a set of
-// constraints involving just the local variables.
-//
-// d2 + (d0 + d1) floordiv 4  is flattened to d2 + q where 'q' is the local
-// variable introduced, with localVarCst containing 4*q <= d0 + d1 <= 4*q + 3.
-//
-// The simplification performed includes the accumulation of contributions for
-// each dimensional and symbolic identifier together, the simplification of
-// floordiv/ceildiv/mod expressions and other simplifications that in turn
-// happen as a result. A simplification that this flattening naturally performs
-// is of simplifying the numerator and denominator of floordiv/ceildiv, and
-// folding a modulo expression to a zero, if possible. Three examples are below:
-//
-// (d0 + 3 * d1) + d0) - 2 * d1) - d0    simplified to     d0 + d1
-// (d0 - d0 mod 4 + 4) mod 4             simplified to     0
-// (3*d0 + 2*d1 + d0) floordiv 2 + d1    simplified to     2*d0 + 2*d1
-//
-// The way the flattening works for the second example is as follows: d0 % 4 is
-// replaced by d0 - 4*q with q being introduced: the expression then simplifies
-// to: (d0 - (d0 - 4q) + 4) = 4q + 4, modulo of which w.r.t 4 simplifies to
-// zero. Note that an affine expression may not always be expressible purely as
-// a sum of products involving just the original dimensional and symbolic
-// identifiers due to the presence of modulo/floordiv/ceildiv expressions that
-// may not be eliminated after simplification; in such cases, the final
-// expression can be reconstructed by replacing the local identifiers with their
-// corresponding explicit form stored in 'localExprs' (note that each of the
-// explicit forms itself would have been simplified).
-//
-// The expression walk method here performs a linear time post order walk that
-// performs the above simplifications through visit methods, with partial
-// results being stored in 'operandExprStack'. When a parent expr is visited,
-// the flattened expressions corresponding to its two operands would already be
-// on the stack - the parent expression looks at the two flattened expressions
-// and combines the two. It pops off the operand expressions and pushes the
-// combined result (although this is done in-place on its LHS operand expr).
-// When the walk is completed, the flattened form of the top-level expression
-// would be left on the stack.
-//
-// A flattener can be repeatedly used for multiple affine expressions that bind
-// to the same operands, for example, for all result expressions of an
-// AffineMap or AffineValueMap. In such cases, using it for multiple expressions
-// is more efficient than creating a new flattener for each expression since
-// common identical div and mod expressions appearing across different
-// expressions are mapped to the same local identifier (same column position in
-// 'localVarCst').
-class SimpleAffineExprFlattener
-    : public AffineExprVisitor<SimpleAffineExprFlattener> {
-public:
-  // Flattend expression layout: [dims, symbols, locals, constant]
-  // Stack that holds the LHS and RHS operands while visiting a binary op expr.
-  // In future, consider adding a prepass to determine how big the SmallVector's
-  // will be, and linearize this to std::vector<int64_t> to prevent
-  // SmallVector moves on re-allocation.
-  std::vector<SmallVector<int64_t, 8>> operandExprStack;
-
-  unsigned numDims;
-  unsigned numSymbols;
-
-  // Number of newly introduced identifiers to flatten mod/floordiv/ceildiv's.
-  unsigned numLocals;
-
-  // AffineExpr's corresponding to the floordiv/ceildiv/mod expressions for
-  // which new identifiers were introduced; if the latter do not get canceled
-  // out, these expressions can be readily used to reconstruct the AffineExpr
-  // (tree) form. Note that these expressions themselves would have been
-  // simplified (recursively) by this pass. Eg. d0 + (d0 + 2*d1 + d0) ceildiv 4
-  // will be simplified to d0 + q, where q = (d0 + d1) ceildiv 2. (d0 + d1)
-  // ceildiv 2 would be the local expression stored for q.
-  SmallVector<AffineExpr, 4> localExprs;
-
-  SimpleAffineExprFlattener(unsigned numDims, unsigned numSymbols);
-
-  virtual ~SimpleAffineExprFlattener() = default;
-
-  // Visitor method overrides.
-  void visitMulExpr(AffineBinaryOpExpr expr);
-  void visitAddExpr(AffineBinaryOpExpr expr);
-  void visitDimExpr(AffineDimExpr expr);
-  void visitSymbolExpr(AffineSymbolExpr expr);
-  void visitConstantExpr(AffineConstantExpr expr);
-  void visitCeilDivExpr(AffineBinaryOpExpr expr);
-  void visitFloorDivExpr(AffineBinaryOpExpr expr);
-
-  //
-  // t = expr mod c   <=>  t = expr - c*q and c*q <= expr <= c*q + c - 1
-  //
-  // A mod expression "expr mod c" is thus flattened by introducing a new local
-  // variable q (= expr floordiv c), such that expr mod c is replaced with
-  // 'expr - c * q' and c * q <= expr <= c * q + c - 1 are added to localVarCst.
-  void visitModExpr(AffineBinaryOpExpr expr);
-
-protected:
-  // Add a local identifier (needed to flatten a mod, floordiv, ceildiv expr).
-  // The local identifier added is always a floordiv of a pure add/mul affine
-  // function of other identifiers, coefficients of which are specified in
-  // dividend and with respect to a positive constant divisor. localExpr is the
-  // simplified tree expression (AffineExpr) corresponding to the quantifier.
-  virtual void addLocalFloorDivId(ArrayRef<int64_t> dividend, int64_t divisor,
-                                  AffineExpr localExpr);
-
-private:
-  // t = expr floordiv c   <=> t = q, c * q <= expr <= c * q + c - 1
-  // A floordiv is thus flattened by introducing a new local variable q, and
-  // replacing that expression with 'q' while adding the constraints
-  // c * q <= expr <= c * q + c - 1 to localVarCst (done by
-  // FlatAffineConstraints::addLocalFloorDiv).
-  //
-  // A ceildiv is similarly flattened:
-  // t = expr ceildiv c   <=> t =  (expr + c - 1) floordiv c
-  void visitDivExpr(AffineBinaryOpExpr expr, bool isCeil);
-
-  int findLocalId(AffineExpr localExpr);
-
-  inline unsigned getNumCols() const {
-    return numDims + numSymbols + numLocals + 1;
-  }
-  inline unsigned getConstantIndex() const { return getNumCols() - 1; }
-  inline unsigned getLocalVarStartIndex() const { return numDims + numSymbols; }
-  inline unsigned getSymbolStartIndex() const { return numDims; }
-  inline unsigned getDimStartIndex() const { return 0; }
-};
-
-} // end namespace mlir
-
-#endif // MLIR_IR_AFFINE_EXPR_VISITOR_H
diff --git a/third_party/mlir/include/mlir/IR/AffineMap.h b/third_party/mlir/include/mlir/IR/AffineMap.h
deleted file mode 100644
index e42173d5a2b..00000000000
--- a/third_party/mlir/include/mlir/IR/AffineMap.h
+++ /dev/null
@@ -1,260 +0,0 @@
-//===- AffineMap.h - MLIR Affine Map Class ----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Affine maps are mathematical functions which map a list of dimension
-// identifiers and symbols, to multidimensional affine expressions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_AFFINE_MAP_H
-#define MLIR_IR_AFFINE_MAP_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMapInfo.h"
-
-namespace mlir {
-
-namespace detail {
-struct AffineMapStorage;
-} // end namespace detail
-
-class AffineExpr;
-class Attribute;
-struct LogicalResult;
-class MLIRContext;
-
-/// A multi-dimensional affine map
-/// Affine map's are immutable like Type's, and they are uniqued.
-/// Eg: (d0, d1) -> (d0/128, d0 mod 128, d1)
-/// The names used (d0, d1) don't matter - it's the mathematical function that
-/// is unique to this affine map.
-class AffineMap {
-public:
-  using ImplType = detail::AffineMapStorage;
-
-  AffineMap() : map(nullptr) {}
-  explicit AffineMap(ImplType *map) : map(map) {}
-  AffineMap(const AffineMap &other) : map(other.map) {}
-  AffineMap &operator=(const AffineMap &other) = default;
-
-  /// Returns a zero result affine map with no dimensions or symbols: () -> ().
-  static AffineMap get(MLIRContext *context);
-
-  static AffineMap get(unsigned dimCount, unsigned symbolCount,
-                       ArrayRef<AffineExpr> results);
-
-  /// Returns a single constant result affine map.
-  static AffineMap getConstantMap(int64_t val, MLIRContext *context);
-
-  /// Returns an AffineMap with 'numDims' identity result dim exprs.
-  static AffineMap getMultiDimIdentityMap(unsigned numDims,
-                                          MLIRContext *context);
-
-  /// Returns an AffineMap representing a permutation.
-  /// The permutation is expressed as a non-empty vector of integers.
-  /// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
-  /// `permutation = [1,2,0]`. All values in `permutation` must be
-  /// integers, in the range 0..`permutation.size()-1` without duplications
-  /// (i.e. `[1,1,2]` is an invalid permutation).
-  static AffineMap getPermutationMap(ArrayRef<unsigned> permutation,
-                                     MLIRContext *context);
-
-  MLIRContext *getContext() const;
-
-  explicit operator bool() { return map != nullptr; }
-  bool operator==(AffineMap other) const { return other.map == map; }
-  bool operator!=(AffineMap other) const { return !(other.map == map); }
-
-  /// Returns true if this affine map is an identity affine map.
-  /// An identity affine map corresponds to an identity affine function on the
-  /// dimensional identifiers.
-  bool isIdentity() const;
-
-  /// Returns true if this affine map is an empty map, i.e., () -> ().
-  bool isEmpty() const;
-
-  /// Returns true if this affine map is a single result constant function.
-  bool isSingleConstant() const;
-
-  /// Returns the constant result of this map. This methods asserts that the map
-  /// has a single constant result.
-  int64_t getSingleConstantResult() const;
-
-  // Prints affine map to 'os'.
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-  unsigned getNumDims() const;
-  unsigned getNumSymbols() const;
-  unsigned getNumResults() const;
-  unsigned getNumInputs() const;
-
-  ArrayRef<AffineExpr> getResults() const;
-  AffineExpr getResult(unsigned idx) const;
-
-  /// Walk all of the AffineExpr's in this mapping. Each node in an expression
-  /// tree is visited in postorder.
-  void walkExprs(std::function<void(AffineExpr)> callback) const;
-
-  /// This method substitutes any uses of dimensions and symbols (e.g.
-  /// dim#0 with dimReplacements[0]) in subexpressions and returns the modified
-  /// expression mapping.  Because this can be used to eliminate dims and
-  /// symbols, the client needs to specify the number of dims and symbols in
-  /// the result.  The returned map always has the same number of results.
-  AffineMap replaceDimsAndSymbols(ArrayRef<AffineExpr> dimReplacements,
-                                  ArrayRef<AffineExpr> symReplacements,
-                                  unsigned numResultDims,
-                                  unsigned numResultSyms);
-
-  /// Folds the results of the application of an affine map on the provided
-  /// operands to a constant if possible.
-  LogicalResult constantFold(ArrayRef<Attribute> operandConstants,
-                             SmallVectorImpl<Attribute> &results) const;
-
-  /// Returns the AffineMap resulting from composing `this` with `map`.
-  /// The resulting AffineMap has as many AffineDimExpr as `map` and as many
-  /// AffineSymbolExpr as the concatenation of `this` and `map` (in which case
-  /// the symbols of `this` map come first).
-  ///
-  /// Prerequisites:
-  /// The maps are composable, i.e. that the number of AffineDimExpr of `this`
-  /// matches the number of results of `map`.
-  ///
-  /// Example:
-  ///   map1: `(d0, d1)[s0, s1] -> (d0 + 1 + s1, d1 - 1 - s0)`
-  ///   map2: `(d0)[s0] -> (d0 + s0, d0 - s0)`
-  ///   map1.compose(map2):
-  ///     `(d0)[s0, s1, s2] -> (d0 + s1 + s2 + 1, d0 - s0 - s2 - 1)`
-  AffineMap compose(AffineMap map);
-
-  /// Returns true if the AffineMap represents a subset (i.e. a projection) of a
-  /// symbol-less permutation map.
-  bool isProjectedPermutation();
-
-  /// Returns true if the AffineMap represents a symbol-less permutation map.
-  bool isPermutation();
-
-  /// Returns the map consisting of the `resultPos` subset.
-  AffineMap getSubMap(ArrayRef<unsigned> resultPos);
-
-  friend ::llvm::hash_code hash_value(AffineMap arg);
-
-private:
-  ImplType *map;
-
-  static AffineMap getImpl(unsigned dimCount, unsigned symbolCount,
-                           ArrayRef<AffineExpr> results, MLIRContext *context);
-};
-
-// Make AffineExpr hashable.
-inline ::llvm::hash_code hash_value(AffineMap arg) {
-  return ::llvm::hash_value(arg.map);
-}
-
-/// Simplify an affine map by simplifying its underlying AffineExpr results.
-AffineMap simplifyAffineMap(AffineMap map);
-
-/// Returns a map of codomain to domain dimensions such that the first codomain
-/// dimension for a particular domain dimension is selected.
-/// Returns an empty map if the input map is empty or if `map` is not invertible
-/// (i.e. `map` does not contain a subset that is a permutation of full domain
-/// rank).
-///
-/// Prerequisites:
-///   1. `map` has no symbols.
-///
-/// Example 1:
-///
-/// ```{.mlir}
-///    (d0, d1, d2) -> (d1, d1, d0, d2, d1, d2, d1, d0)
-///                      0       2   3
-/// ```
-///
-/// returns:
-///
-/// ```{.mlir}
-///    (d0, d1, d2, d3, d4, d5, d6, d7) -> (d2, d0, d3)
-/// ```
-///
-/// Example 2:
-///
-/// ```{.mlir}
-///    (d0, d1, d2) -> (d1, d0 + d1, d0, d2, d1, d2, d1, d0)
-///                      0            2   3
-/// ```
-///
-/// returns:
-///
-/// ```{.mlir}
-///    (d0, d1, d2, d3, d4, d5, d6, d7) -> (d2, d0, d3)
-/// ```
-AffineMap inversePermutation(AffineMap map);
-
-/// Concatenates a list of `maps` into a single AffineMap, stepping over
-/// potentially empty maps. Assumes each of the underlying map has 0 symbols.
-/// The resulting map has a number of dims equal to the max of `maps`' dims and
-/// the concatenated results as its results.
-/// Returns an empty map if all input `maps` are empty.
-///
-/// Example:
-/// When applied to the following list of 3 affine maps,
-///
-/// ```{.mlir}
-///    {
-///      (i, j, k) -> (i, k),
-///      (i, j, k) -> (k, j),
-///      (i, j, k) -> (i, j)
-///    }
-/// ```
-///
-/// Returns the map:
-///
-/// ```{.mlir}
-///     (i, j, k) -> (i, k, k, j, i, j)
-/// ```
-AffineMap concatAffineMaps(llvm::ArrayRef<AffineMap> maps);
-
-inline raw_ostream &operator<<(raw_ostream &os, AffineMap map) {
-  map.print(os);
-  return os;
-}
-} // end namespace mlir
-
-namespace llvm {
-
-// AffineExpr hash just like pointers
-template <> struct DenseMapInfo<mlir::AffineMap> {
-  static mlir::AffineMap getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::AffineMap(static_cast<mlir::AffineMap::ImplType *>(pointer));
-  }
-  static mlir::AffineMap getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::AffineMap(static_cast<mlir::AffineMap::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::AffineMap val) {
-    return mlir::hash_value(val);
-  }
-  static bool isEqual(mlir::AffineMap LHS, mlir::AffineMap RHS) {
-    return LHS == RHS;
-  }
-};
-
-} // namespace llvm
-
-#endif // MLIR_IR_AFFINE_MAP_H
diff --git a/third_party/mlir/include/mlir/IR/AttributeSupport.h b/third_party/mlir/include/mlir/IR/AttributeSupport.h
deleted file mode 100644
index 78b3a2779d3..00000000000
--- a/third_party/mlir/include/mlir/IR/AttributeSupport.h
+++ /dev/null
@@ -1,116 +0,0 @@
-//===- AttributeSupport.h ---------------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines support types for registering dialect extended attributes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_ATTRIBUTESUPPORT_H
-#define MLIR_IR_ATTRIBUTESUPPORT_H
-
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/StorageUniquerSupport.h"
-#include "llvm/ADT/PointerIntPair.h"
-
-namespace mlir {
-class MLIRContext;
-class Type;
-
-//===----------------------------------------------------------------------===//
-// AttributeStorage
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-class AttributeUniquer;
-} // end namespace detail
-
-/// Base storage class appearing in an attribute. Derived storage classes should
-/// only be constructed within the context of the AttributeUniquer.
-class AttributeStorage : public StorageUniquer::BaseStorage {
-  friend detail::AttributeUniquer;
-  friend StorageUniquer;
-
-public:
-  /// Get the type of this attribute.
-  Type getType() const;
-
-  /// Get the dialect of this attribute.
-  Dialect &getDialect() const {
-    assert(dialect && "Malformed attribute storage object.");
-    return const_cast<Dialect &>(*dialect);
-  }
-
-protected:
-  /// Construct a new attribute storage instance with the given type.
-  /// Note: All attributes require a valid type. If no type is provided here,
-  ///       the type of the attribute will automatically default to NoneType
-  ///       upon initialization in the uniquer.
-  AttributeStorage(Type type);
-  AttributeStorage();
-
-  /// Set the type of this attribute.
-  void setType(Type type);
-
-  // Set the dialect for this storage instance. This is used by the
-  // AttributeUniquer when initializing a newly constructed storage object.
-  void initializeDialect(Dialect &newDialect) { dialect = &newDialect; }
-
-private:
-  /// The dialect for this attribute.
-  Dialect *dialect;
-
-  /// The opaque type of the attribute value.
-  const void *type;
-};
-
-/// Default storage type for attributes that require no additional
-/// initialization or storage.
-using DefaultAttributeStorage = AttributeStorage;
-
-//===----------------------------------------------------------------------===//
-// AttributeStorageAllocator
-//===----------------------------------------------------------------------===//
-
-// This is a utility allocator used to allocate memory for instances of derived
-// Attributes.
-using AttributeStorageAllocator = StorageUniquer::StorageAllocator;
-
-//===----------------------------------------------------------------------===//
-// AttributeUniquer
-//===----------------------------------------------------------------------===//
-namespace detail {
-// A utility class to get, or create, unique instances of attributes within an
-// MLIRContext. This class manages all creation and uniquing of attributes.
-class AttributeUniquer {
-public:
-  /// Get an uniqued instance of attribute T.
-  template <typename T, typename... Args>
-  static T get(MLIRContext *ctx, unsigned kind, Args &&... args) {
-    return ctx->getAttributeUniquer().get<typename T::ImplType>(
-        getInitFn(ctx, T::getClassID()), kind, std::forward<Args>(args)...);
-  }
-
-private:
-  /// Returns a functor used to initialize new attribute storage instances.
-  static std::function<void(AttributeStorage *)>
-  getInitFn(MLIRContext *ctx, const ClassID *const attrID);
-};
-} // namespace detail
-
-} // end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/Attributes.h b/third_party/mlir/include/mlir/IR/Attributes.h
deleted file mode 100644
index 59df75dc483..00000000000
--- a/third_party/mlir/include/mlir/IR/Attributes.h
+++ /dev/null
@@ -1,1456 +0,0 @@
-//===- Attributes.h - MLIR Attribute Classes --------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_ATTRIBUTES_H
-#define MLIR_IR_ATTRIBUTES_H
-
-#include "mlir/IR/AttributeSupport.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Sequence.h"
-
-namespace mlir {
-class AffineMap;
-class Dialect;
-class FunctionType;
-class Identifier;
-class IntegerSet;
-class Location;
-class MLIRContext;
-class ShapedType;
-class Type;
-
-namespace detail {
-
-struct AffineMapAttributeStorage;
-struct ArrayAttributeStorage;
-struct BoolAttributeStorage;
-struct DictionaryAttributeStorage;
-struct IntegerAttributeStorage;
-struct IntegerSetAttributeStorage;
-struct FloatAttributeStorage;
-struct OpaqueAttributeStorage;
-struct StringAttributeStorage;
-struct SymbolRefAttributeStorage;
-struct TypeAttributeStorage;
-
-/// Elements Attributes.
-struct DenseElementsAttributeStorage;
-struct OpaqueElementsAttributeStorage;
-struct SparseElementsAttributeStorage;
-} // namespace detail
-
-/// Attributes are known-constant values of operations and functions.
-///
-/// Instances of the Attribute class are references to immutable, uniqued,
-/// and immortal values owned by MLIRContext. As such, an Attribute is a thin
-/// wrapper around an underlying storage pointer. Attributes are usually passed
-/// by value.
-class Attribute {
-public:
-  /// Integer identifier for all the concrete attribute kinds.
-  enum Kind {
-  // Reserve attribute kinds for dialect specific extensions.
-#define DEFINE_SYM_KIND_RANGE(Dialect)                                         \
-  FIRST_##Dialect##_ATTR, LAST_##Dialect##_ATTR = FIRST_##Dialect##_ATTR + 0xff,
-#include "DialectSymbolRegistry.def"
-  };
-
-  /// Utility class for implementing attributes.
-  template <typename ConcreteType, typename BaseType = Attribute,
-            typename StorageType = AttributeStorage>
-  using AttrBase = detail::StorageUserBase<ConcreteType, BaseType, StorageType,
-                                           detail::AttributeUniquer>;
-
-  using ImplType = AttributeStorage;
-  using ValueType = void;
-
-  Attribute() : impl(nullptr) {}
-  /* implicit */ Attribute(const ImplType *impl)
-      : impl(const_cast<ImplType *>(impl)) {}
-
-  Attribute(const Attribute &other) : impl(other.impl) {}
-  Attribute &operator=(Attribute other) {
-    impl = other.impl;
-    return *this;
-  }
-
-  bool operator==(Attribute other) const { return impl == other.impl; }
-  bool operator!=(Attribute other) const { return !(*this == other); }
-  explicit operator bool() const { return impl; }
-
-  bool operator!() const { return impl == nullptr; }
-
-  template <typename U> bool isa() const;
-  template <typename U> U dyn_cast() const;
-  template <typename U> U dyn_cast_or_null() const;
-  template <typename U> U cast() const;
-
-  // Support dyn_cast'ing Attribute to itself.
-  static bool classof(Attribute) { return true; }
-
-  /// Return the classification for this attribute.
-  unsigned getKind() const { return impl->getKind(); }
-
-  /// Return the type of this attribute.
-  Type getType() const;
-
-  /// Return the context this attribute belongs to.
-  MLIRContext *getContext() const;
-
-  /// Get the dialect this attribute is registered to.
-  Dialect &getDialect() const;
-
-  /// Print the attribute.
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-  /// Get an opaque pointer to the attribute.
-  const void *getAsOpaquePointer() const { return impl; }
-  /// Construct an attribute from the opaque pointer representation.
-  static Attribute getFromOpaquePointer(const void *ptr) {
-    return Attribute(reinterpret_cast<const ImplType *>(ptr));
-  }
-
-  friend ::llvm::hash_code hash_value(Attribute arg);
-
-protected:
-  ImplType *impl;
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, Attribute attr) {
-  attr.print(os);
-  return os;
-}
-
-namespace StandardAttributes {
-enum Kind {
-  AffineMap = Attribute::FIRST_STANDARD_ATTR,
-  Array,
-  Bool,
-  Dictionary,
-  Float,
-  Integer,
-  IntegerSet,
-  Opaque,
-  String,
-  SymbolRef,
-  Type,
-  Unit,
-
-  /// Elements Attributes.
-  DenseElements,
-  OpaqueElements,
-  SparseElements,
-  FIRST_ELEMENTS_ATTR = DenseElements,
-  LAST_ELEMENTS_ATTR = SparseElements,
-
-  /// Locations.
-  CallSiteLocation,
-  FileLineColLocation,
-  FusedLocation,
-  NameLocation,
-  OpaqueLocation,
-  UnknownLocation,
-
-  // Represents a location as a 'void*' pointer to a front-end's opaque
-  // location information, which must live longer than the MLIR objects that
-  // refer to it.  OpaqueLocation's are never serialized.
-  //
-  // TODO: OpaqueLocation,
-
-  // Represents a value inlined through a function call.
-  // TODO: InlinedLocation,
-
-  FIRST_LOCATION_ATTR = CallSiteLocation,
-  LAST_LOCATION_ATTR = UnknownLocation,
-};
-} // namespace StandardAttributes
-
-//===----------------------------------------------------------------------===//
-// AffineMapAttr
-//===----------------------------------------------------------------------===//
-
-class AffineMapAttr
-    : public Attribute::AttrBase<AffineMapAttr, Attribute,
-                                 detail::AffineMapAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = AffineMap;
-
-  static AffineMapAttr get(AffineMap value);
-
-  AffineMap getValue() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::AffineMap;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// ArrayAttr
-//===----------------------------------------------------------------------===//
-
-/// Array attributes are lists of other attributes.  They are not necessarily
-/// type homogenous given that attributes don't, in general, carry types.
-class ArrayAttr : public Attribute::AttrBase<ArrayAttr, Attribute,
-                                             detail::ArrayAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = ArrayRef<Attribute>;
-
-  static ArrayAttr get(ArrayRef<Attribute> value, MLIRContext *context);
-
-  ArrayRef<Attribute> getValue() const;
-
-  /// Support range iteration.
-  using iterator = llvm::ArrayRef<Attribute>::iterator;
-  iterator begin() const { return getValue().begin(); }
-  iterator end() const { return getValue().end(); }
-  size_t size() const { return getValue().size(); }
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::Array;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// BoolAttr
-//===----------------------------------------------------------------------===//
-
-class BoolAttr : public Attribute::AttrBase<BoolAttr, Attribute,
-                                            detail::BoolAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = bool;
-
-  static BoolAttr get(bool value, MLIRContext *context);
-
-  bool getValue() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == StandardAttributes::Bool; }
-};
-
-//===----------------------------------------------------------------------===//
-// DictionaryAttr
-//===----------------------------------------------------------------------===//
-
-/// NamedAttribute is used for dictionary attributes, it holds an identifier for
-/// the name and a value for the attribute. The attribute pointer should always
-/// be non-null.
-using NamedAttribute = std::pair<Identifier, Attribute>;
-
-/// Dictionary attribute is an attribute that represents a sorted collection of
-/// named attribute values. The elements are sorted by name, and each name must
-/// be unique within the collection.
-class DictionaryAttr
-    : public Attribute::AttrBase<DictionaryAttr, Attribute,
-                                 detail::DictionaryAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = ArrayRef<NamedAttribute>;
-
-  static DictionaryAttr get(ArrayRef<NamedAttribute> value,
-                            MLIRContext *context);
-
-  ArrayRef<NamedAttribute> getValue() const;
-
-  /// Return the specified attribute if present, null otherwise.
-  Attribute get(StringRef name) const;
-  Attribute get(Identifier name) const;
-
-  /// Support range iteration.
-  using iterator = llvm::ArrayRef<NamedAttribute>::iterator;
-  iterator begin() const;
-  iterator end() const;
-  bool empty() const { return size() == 0; }
-  size_t size() const;
-
-  /// Methods for supporting type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::Dictionary;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// FloatAttr
-//===----------------------------------------------------------------------===//
-
-class FloatAttr : public Attribute::AttrBase<FloatAttr, Attribute,
-                                             detail::FloatAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = APFloat;
-
-  /// Return a float attribute for the specified value in the specified type.
-  /// These methods should only be used for simple constant values, e.g 1.0/2.0,
-  /// that are known-valid both as host double and the 'type' format.
-  static FloatAttr get(Type type, double value);
-  static FloatAttr getChecked(Type type, double value, Location loc);
-
-  /// Return a float attribute for the specified value in the specified type.
-  static FloatAttr get(Type type, const APFloat &value);
-  static FloatAttr getChecked(Type type, const APFloat &value, Location loc);
-
-  APFloat getValue() const;
-
-  /// This function is used to convert the value to a double, even if it loses
-  /// precision.
-  double getValueAsDouble() const;
-  static double getValueAsDouble(APFloat val);
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::Float;
-  }
-
-  /// Verify the construction invariants for a double value.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *ctx, Type type,
-                                                    double value);
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *ctx, Type type,
-                                                    const APFloat &value);
-};
-
-//===----------------------------------------------------------------------===//
-// IntegerAttr
-//===----------------------------------------------------------------------===//
-
-class IntegerAttr
-    : public Attribute::AttrBase<IntegerAttr, Attribute,
-                                 detail::IntegerAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = APInt;
-
-  static IntegerAttr get(Type type, int64_t value);
-  static IntegerAttr get(Type type, const APInt &value);
-
-  APInt getValue() const;
-  // TODO(jpienaar): Change callers to use getValue instead.
-  int64_t getInt() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::Integer;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// IntegerSetAttr
-//===----------------------------------------------------------------------===//
-
-class IntegerSetAttr
-    : public Attribute::AttrBase<IntegerSetAttr, Attribute,
-                                 detail::IntegerSetAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = IntegerSet;
-
-  static IntegerSetAttr get(IntegerSet value);
-
-  IntegerSet getValue() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::IntegerSet;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// OpaqueAttr
-//===----------------------------------------------------------------------===//
-
-/// Opaque attributes represent attributes of non-registered dialects. These are
-/// attribute represented in their raw string form, and can only usefully be
-/// tested for attribute equality.
-class OpaqueAttr : public Attribute::AttrBase<OpaqueAttr, Attribute,
-                                              detail::OpaqueAttributeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new OpaqueAttr with the provided dialect and string data.
-  static OpaqueAttr get(Identifier dialect, StringRef attrData, Type type,
-                        MLIRContext *context);
-
-  /// Get or create a new OpaqueAttr with the provided dialect and string data.
-  /// If the given identifier is not a valid namespace for a dialect, then a
-  /// null attribute is returned.
-  static OpaqueAttr getChecked(Identifier dialect, StringRef attrData,
-                               Type type, Location location);
-
-  /// Returns the dialect namespace of the opaque attribute.
-  Identifier getDialectNamespace() const;
-
-  /// Returns the raw attribute data of the opaque attribute.
-  StringRef getAttrData() const;
-
-  /// Verify the construction of an opaque attribute.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *context,
-                                                    Identifier dialect,
-                                                    StringRef attrData,
-                                                    Type type);
-
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::Opaque;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// StringAttr
-//===----------------------------------------------------------------------===//
-
-class StringAttr : public Attribute::AttrBase<StringAttr, Attribute,
-                                              detail::StringAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = StringRef;
-
-  /// Get an instance of a StringAttr with the given string.
-  static StringAttr get(StringRef bytes, MLIRContext *context);
-
-  /// Get an instance of a StringAttr with the given string and Type.
-  static StringAttr get(StringRef bytes, Type type);
-
-  StringRef getValue() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::String;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// SymbolRefAttr
-//===----------------------------------------------------------------------===//
-
-class FlatSymbolRefAttr;
-
-/// A symbol reference attribute represents a symbolic reference to another
-/// operation.
-class SymbolRefAttr
-    : public Attribute::AttrBase<SymbolRefAttr, Attribute,
-                                 detail::SymbolRefAttributeStorage> {
-public:
-  using Base::Base;
-
-  /// Construct a symbol reference for the given value name.
-  static FlatSymbolRefAttr get(StringRef value, MLIRContext *ctx);
-
-  /// Construct a symbol reference for the given value name, and a set of nested
-  /// references that are further resolve to a nested symbol.
-  static SymbolRefAttr get(StringRef value,
-                           ArrayRef<FlatSymbolRefAttr> references,
-                           MLIRContext *ctx);
-
-  /// Returns the name of the top level symbol reference, i.e. the root of the
-  /// reference path.
-  StringRef getRootReference() const;
-
-  /// Returns the name of the fully resolved symbol, i.e. the leaf of the
-  /// reference path.
-  StringRef getLeafReference() const;
-
-  /// Returns the set of nested references representing the path to the symbol
-  /// nested under the root reference.
-  ArrayRef<FlatSymbolRefAttr> getNestedReferences() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::SymbolRef;
-  }
-};
-
-/// A symbol reference with a reference path containing a single element. This
-/// is used to refer to an operation within the current symbol table.
-class FlatSymbolRefAttr : public SymbolRefAttr {
-public:
-  using SymbolRefAttr::SymbolRefAttr;
-  using ValueType = StringRef;
-
-  /// Construct a symbol reference for the given value name.
-  static FlatSymbolRefAttr get(StringRef value, MLIRContext *ctx) {
-    return SymbolRefAttr::get(value, ctx);
-  }
-
-  /// Returns the name of the held symbol reference.
-  StringRef getValue() const { return getRootReference(); }
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool classof(Attribute attr) {
-    SymbolRefAttr refAttr = attr.dyn_cast<SymbolRefAttr>();
-    return refAttr && refAttr.getNestedReferences().empty();
-  }
-
-private:
-  using SymbolRefAttr::get;
-  using SymbolRefAttr::getNestedReferences;
-};
-
-//===----------------------------------------------------------------------===//
-// Type
-//===----------------------------------------------------------------------===//
-
-class TypeAttr : public Attribute::AttrBase<TypeAttr, Attribute,
-                                            detail::TypeAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = Type;
-
-  static TypeAttr get(Type value);
-
-  Type getValue() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == StandardAttributes::Type; }
-};
-
-//===----------------------------------------------------------------------===//
-// UnitAttr
-//===----------------------------------------------------------------------===//
-
-/// Unit attributes are attributes that hold no specific value and are given
-/// meaning by their existence.
-class UnitAttr : public Attribute::AttrBase<UnitAttr> {
-public:
-  using Base::Base;
-
-  static UnitAttr get(MLIRContext *context);
-
-  static bool kindof(unsigned kind) { return kind == StandardAttributes::Unit; }
-};
-
-//===----------------------------------------------------------------------===//
-// Elements Attributes
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-template <typename T> class ElementsAttrIterator;
-template <typename T> class ElementsAttrRange;
-} // namespace detail
-
-/// A base attribute that represents a reference to a static shaped tensor or
-/// vector constant.
-class ElementsAttr : public Attribute {
-public:
-  using Attribute::Attribute;
-  template <typename T> using iterator = detail::ElementsAttrIterator<T>;
-  template <typename T> using iterator_range = detail::ElementsAttrRange<T>;
-
-  /// Return the type of this ElementsAttr, guaranteed to be a vector or tensor
-  /// with static shape.
-  ShapedType getType() const;
-
-  /// Return the value at the given index. The index is expected to refer to a
-  /// valid element.
-  Attribute getValue(ArrayRef<uint64_t> index) const;
-
-  /// Return the value of type 'T' at the given index, where 'T' corresponds to
-  /// an Attribute type.
-  template <typename T> T getValue(ArrayRef<uint64_t> index) const {
-    return getValue(index).template cast<T>();
-  }
-
-  /// Return the elements of this attribute as a value of type 'T'. Note:
-  /// Aborts if the subclass is OpaqueElementsAttrs, these attrs do not support
-  /// iteration.
-  template <typename T> iterator_range<T> getValues() const;
-
-  /// Return if the given 'index' refers to a valid element in this attribute.
-  bool isValidIndex(ArrayRef<uint64_t> index) const;
-
-  /// Returns the number of elements held by this attribute.
-  int64_t getNumElements() const;
-
-  /// Generates a new ElementsAttr by mapping each int value to a new
-  /// underlying APInt. The new values can represent either a integer or float.
-  /// This ElementsAttr should contain integers.
-  ElementsAttr
-  mapValues(Type newElementType,
-            llvm::function_ref<APInt(const APInt &)> mapping) const;
-
-  /// Generates a new ElementsAttr by mapping each float value to a new
-  /// underlying APInt. The new values can represent either a integer or float.
-  /// This ElementsAttr should contain floats.
-  ElementsAttr
-  mapValues(Type newElementType,
-            llvm::function_ref<APInt(const APFloat &)> mapping) const;
-
-  /// Method for support type inquiry through isa, cast and dyn_cast.
-  static bool classof(Attribute attr) {
-    return attr.getKind() >= StandardAttributes::FIRST_ELEMENTS_ATTR &&
-           attr.getKind() <= StandardAttributes::LAST_ELEMENTS_ATTR;
-  }
-
-protected:
-  /// Returns the 1 dimensional flattened row-major index from the given
-  /// multi-dimensional index.
-  uint64_t getFlattenedIndex(ArrayRef<uint64_t> index) const;
-};
-
-namespace detail {
-/// DenseElementsAttr data is aligned to uint64_t, so this traits class is
-/// necessary to interop with PointerIntPair.
-class DenseElementDataPointerTypeTraits {
-public:
-  static inline const void *getAsVoidPointer(const char *ptr) { return ptr; }
-  static inline const char *getFromVoidPointer(const void *ptr) {
-    return static_cast<const char *>(ptr);
-  }
-
-  // Note: We could steal more bits if the need arises.
-  enum { NumLowBitsAvailable = 1 };
-};
-
-/// Pair of raw pointer and a boolean flag of whether the pointer holds a splat,
-using DenseIterPtrAndSplat =
-    llvm::PointerIntPair<const char *, 1, bool,
-                         DenseElementDataPointerTypeTraits>;
-
-/// Impl iterator for indexed DenseElementAttr iterators that records a data
-/// pointer and data index that is adjusted for the case of a splat attribute.
-template <typename ConcreteT, typename T, typename PointerT = T *,
-          typename ReferenceT = T &>
-class DenseElementIndexedIteratorImpl
-    : public indexed_accessor_iterator<ConcreteT, DenseIterPtrAndSplat, T,
-                                       PointerT, ReferenceT> {
-protected:
-  DenseElementIndexedIteratorImpl(const char *data, bool isSplat,
-                                  size_t dataIndex)
-      : indexed_accessor_iterator<ConcreteT, DenseIterPtrAndSplat, T, PointerT,
-                                  ReferenceT>({data, isSplat}, dataIndex) {}
-
-  /// Return the current index for this iterator, adjusted for the case of a
-  /// splat.
-  ptrdiff_t getDataIndex() const {
-    bool isSplat = this->base.getInt();
-    return isSplat ? 0 : this->index;
-  }
-
-  /// Return the data base pointer.
-  const char *getData() const { return this->base.getPointer(); }
-};
-} // namespace detail
-
-/// An attribute that represents a reference to a dense vector or tensor object.
-///
-class DenseElementsAttr
-    : public Attribute::AttrBase<DenseElementsAttr, ElementsAttr,
-                                 detail::DenseElementsAttributeStorage> {
-public:
-  using Base::Base;
-
-  /// Method for support type inquiry through isa, cast and dyn_cast.
-  static bool classof(Attribute attr) {
-    return attr.getKind() == StandardAttributes::DenseElements;
-  }
-
-  /// Constructs a dense elements attribute from an array of element values.
-  /// Each element attribute value is expected to be an element of 'type'.
-  /// 'type' must be a vector or tensor with static shape.
-  static DenseElementsAttr get(ShapedType type, ArrayRef<Attribute> values);
-
-  /// Constructs a dense integer elements attribute from an array of integer
-  /// or floating-point values. Each value is expected to be the same bitwidth
-  /// of the element type of 'type'. 'type' must be a vector or tensor with
-  /// static shape.
-  template <typename T, typename = typename std::enable_if<
-                            std::numeric_limits<T>::is_integer ||
-                            llvm::is_one_of<T, float, double>::value>::type>
-  static DenseElementsAttr get(const ShapedType &type, ArrayRef<T> values) {
-    const char *data = reinterpret_cast<const char *>(values.data());
-    return getRawIntOrFloat(
-        type, ArrayRef<char>(data, values.size() * sizeof(T)), sizeof(T),
-        /*isInt=*/std::numeric_limits<T>::is_integer);
-  }
-
-  /// Constructs a dense integer elements attribute from a single element.
-  template <typename T, typename = typename std::enable_if<
-                            std::numeric_limits<T>::is_integer ||
-                            llvm::is_one_of<T, float, double>::value>::type>
-  static DenseElementsAttr get(const ShapedType &type, T value) {
-    return get(type, llvm::makeArrayRef(value));
-  }
-
-  /// Overload of the above 'get' method that is specialized for boolean values.
-  static DenseElementsAttr get(ShapedType type, ArrayRef<bool> values);
-
-  /// Constructs a dense integer elements attribute from an array of APInt
-  /// values. Each APInt value is expected to have the same bitwidth as the
-  /// element type of 'type'. 'type' must be a vector or tensor with static
-  /// shape.
-  static DenseElementsAttr get(ShapedType type, ArrayRef<APInt> values);
-
-  /// Constructs a dense float elements attribute from an array of APFloat
-  /// values. Each APFloat value is expected to have the same bitwidth as the
-  /// element type of 'type'. 'type' must be a vector or tensor with static
-  /// shape.
-  static DenseElementsAttr get(ShapedType type, ArrayRef<APFloat> values);
-
-  /// Construct a dense elements attribute for an initializer_list of values.
-  /// Each value is expected to be the same bitwidth of the element type of
-  /// 'type'. 'type' must be a vector or tensor with static shape.
-  template <typename T>
-  static DenseElementsAttr get(const ShapedType &type,
-                               const std::initializer_list<T> &list) {
-    return get(type, ArrayRef<T>(list));
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Iterators
-  //===--------------------------------------------------------------------===//
-
-  /// A utility iterator that allows walking over the internal Attribute values
-  /// of a DenseElementsAttr.
-  class AttributeElementIterator
-      : public indexed_accessor_iterator<AttributeElementIterator, const void *,
-                                         Attribute, Attribute, Attribute> {
-  public:
-    /// Accesses the Attribute value at this iterator position.
-    Attribute operator*() const;
-
-  private:
-    friend DenseElementsAttr;
-
-    /// Constructs a new iterator.
-    AttributeElementIterator(DenseElementsAttr attr, size_t index);
-  };
-
-  /// Iterator for walking raw element values of the specified type 'T', which
-  /// may be any c++ data type matching the stored representation: int32_t,
-  /// float, etc.
-  template <typename T>
-  class ElementIterator
-      : public detail::DenseElementIndexedIteratorImpl<ElementIterator<T>,
-                                                       const T> {
-  public:
-    /// Accesses the raw value at this iterator position.
-    const T &operator*() const {
-      return reinterpret_cast<const T *>(this->getData())[this->getDataIndex()];
-    }
-
-  private:
-    friend DenseElementsAttr;
-
-    /// Constructs a new iterator.
-    ElementIterator(const char *data, bool isSplat, size_t dataIndex)
-        : detail::DenseElementIndexedIteratorImpl<ElementIterator<T>, const T>(
-              data, isSplat, dataIndex) {}
-  };
-
-  /// A utility iterator that allows walking over the internal bool values.
-  class BoolElementIterator
-      : public detail::DenseElementIndexedIteratorImpl<BoolElementIterator,
-                                                       bool, bool, bool> {
-  public:
-    /// Accesses the bool value at this iterator position.
-    bool operator*() const;
-
-  private:
-    friend DenseElementsAttr;
-
-    /// Constructs a new iterator.
-    BoolElementIterator(DenseElementsAttr attr, size_t dataIndex);
-  };
-
-  /// A utility iterator that allows walking over the internal raw APInt values.
-  class IntElementIterator
-      : public detail::DenseElementIndexedIteratorImpl<IntElementIterator,
-                                                       APInt, APInt, APInt> {
-  public:
-    /// Accesses the raw APInt value at this iterator position.
-    APInt operator*() const;
-
-  private:
-    friend DenseElementsAttr;
-
-    /// Constructs a new iterator.
-    IntElementIterator(DenseElementsAttr attr, size_t dataIndex);
-
-    /// The bitwidth of the element type.
-    size_t bitWidth;
-  };
-
-  /// Iterator for walking over APFloat values.
-  class FloatElementIterator final
-      : public llvm::mapped_iterator<IntElementIterator,
-                                     std::function<APFloat(const APInt &)>> {
-    friend DenseElementsAttr;
-
-    /// Initializes the float element iterator to the specified iterator.
-    FloatElementIterator(const llvm::fltSemantics &smt, IntElementIterator it);
-
-  public:
-    using reference = APFloat;
-  };
-
-  //===--------------------------------------------------------------------===//
-  // Value Querying
-  //===--------------------------------------------------------------------===//
-
-  /// Returns if this attribute corresponds to a splat, i.e. if all element
-  /// values are the same.
-  bool isSplat() const;
-
-  /// Return the splat value for this attribute. This asserts that the attribute
-  /// corresponds to a splat.
-  Attribute getSplatValue() const { return getSplatValue<Attribute>(); }
-  template <typename T>
-  typename std::enable_if<!std::is_base_of<Attribute, T>::value ||
-                              std::is_same<Attribute, T>::value,
-                          T>::type
-  getSplatValue() const {
-    assert(isSplat() && "expected the attribute to be a splat");
-    return *getValues<T>().begin();
-  }
-  /// Return the splat value for derived attribute element types.
-  template <typename T>
-  typename std::enable_if<std::is_base_of<Attribute, T>::value &&
-                              !std::is_same<Attribute, T>::value,
-                          T>::type
-  getSplatValue() const {
-    return getSplatValue().template cast<T>();
-  }
-
-  /// Return the value at the given index. The 'index' is expected to refer to a
-  /// valid element.
-  Attribute getValue(ArrayRef<uint64_t> index) const {
-    return getValue<Attribute>(index);
-  }
-  template <typename T> T getValue(ArrayRef<uint64_t> index) const {
-    // Skip to the element corresponding to the flattened index.
-    return *std::next(getValues<T>().begin(), getFlattenedIndex(index));
-  }
-
-  /// Return the held element values as a range of integer or floating-point
-  /// values.
-  template <typename T, typename = typename std::enable_if<
-                            (!std::is_same<T, bool>::value &&
-                             std::numeric_limits<T>::is_integer) ||
-                            llvm::is_one_of<T, float, double>::value>::type>
-  llvm::iterator_range<ElementIterator<T>> getValues() const {
-    assert(isValidIntOrFloat(sizeof(T), std::numeric_limits<T>::is_integer));
-    auto rawData = getRawData().data();
-    bool splat = isSplat();
-    return {ElementIterator<T>(rawData, splat, 0),
-            ElementIterator<T>(rawData, splat, getNumElements())};
-  }
-
-  /// Return the held element values as a range of Attributes.
-  llvm::iterator_range<AttributeElementIterator> getAttributeValues() const;
-  template <typename T, typename = typename std::enable_if<
-                            std::is_same<T, Attribute>::value>::type>
-  llvm::iterator_range<AttributeElementIterator> getValues() const {
-    return getAttributeValues();
-  }
-  AttributeElementIterator attr_value_begin() const;
-  AttributeElementIterator attr_value_end() const;
-
-  /// Return the held element values a range of T, where T is a derived
-  /// attribute type.
-  template <typename T>
-  using DerivedAttributeElementIterator =
-      llvm::mapped_iterator<AttributeElementIterator, T (*)(Attribute)>;
-  template <typename T, typename = typename std::enable_if<
-                            std::is_base_of<Attribute, T>::value &&
-                            !std::is_same<Attribute, T>::value>::type>
-  llvm::iterator_range<DerivedAttributeElementIterator<T>> getValues() const {
-    auto castFn = [](Attribute attr) { return attr.template cast<T>(); };
-    return llvm::map_range(getAttributeValues(),
-                           static_cast<T (*)(Attribute)>(castFn));
-  }
-
-  /// Return the held element values as a range of bool. The element type of
-  /// this attribute must be of integer type of bitwidth 1.
-  llvm::iterator_range<BoolElementIterator> getBoolValues() const;
-  template <typename T, typename = typename std::enable_if<
-                            std::is_same<T, bool>::value>::type>
-  llvm::iterator_range<BoolElementIterator> getValues() const {
-    return getBoolValues();
-  }
-
-  /// Return the held element values as a range of APInts. The element type of
-  /// this attribute must be of integer type.
-  llvm::iterator_range<IntElementIterator> getIntValues() const;
-  template <typename T, typename = typename std::enable_if<
-                            std::is_same<T, APInt>::value>::type>
-  llvm::iterator_range<IntElementIterator> getValues() const {
-    return getIntValues();
-  }
-  IntElementIterator int_value_begin() const;
-  IntElementIterator int_value_end() const;
-
-  /// Return the held element values as a range of APFloat. The element type of
-  /// this attribute must be of float type.
-  llvm::iterator_range<FloatElementIterator> getFloatValues() const;
-  template <typename T, typename = typename std::enable_if<
-                            std::is_same<T, APFloat>::value>::type>
-  llvm::iterator_range<FloatElementIterator> getValues() const {
-    return getFloatValues();
-  }
-  FloatElementIterator float_value_begin() const;
-  FloatElementIterator float_value_end() const;
-
-  //===--------------------------------------------------------------------===//
-  // Mutation Utilities
-  //===--------------------------------------------------------------------===//
-
-  /// Return a new DenseElementsAttr that has the same data as the current
-  /// attribute, but has been reshaped to 'newType'. The new type must have the
-  /// same total number of elements as well as element type.
-  DenseElementsAttr reshape(ShapedType newType);
-
-  /// Generates a new DenseElementsAttr by mapping each int value to a new
-  /// underlying APInt. The new values can represent either a integer or float.
-  /// This underlying type must be an DenseIntElementsAttr.
-  DenseElementsAttr
-  mapValues(Type newElementType,
-            llvm::function_ref<APInt(const APInt &)> mapping) const;
-
-  /// Generates a new DenseElementsAttr by mapping each float value to a new
-  /// underlying APInt. the new values can represent either a integer or float.
-  /// This underlying type must be an DenseFPElementsAttr.
-  DenseElementsAttr
-  mapValues(Type newElementType,
-            llvm::function_ref<APInt(const APFloat &)> mapping) const;
-
-protected:
-  /// Return the raw storage data held by this attribute.
-  ArrayRef<char> getRawData() const;
-
-  /// Get iterators to the raw APInt values for each element in this attribute.
-  IntElementIterator raw_int_begin() const {
-    return IntElementIterator(*this, 0);
-  }
-  IntElementIterator raw_int_end() const {
-    return IntElementIterator(*this, getNumElements());
-  }
-
-  /// Constructs a dense elements attribute from an array of raw APInt values.
-  /// Each APInt value is expected to have the same bitwidth as the element type
-  /// of 'type'. 'type' must be a vector or tensor with static shape.
-  static DenseElementsAttr getRaw(ShapedType type, ArrayRef<APInt> values);
-
-  /// Get or create a new dense elements attribute instance with the given raw
-  /// data buffer. 'type' must be a vector or tensor with static shape.
-  static DenseElementsAttr getRaw(ShapedType type, ArrayRef<char> data,
-                                  bool isSplat);
-
-  /// Overload of the raw 'get' method that asserts that the given type is of
-  /// integer or floating-point type. This method is used to verify type
-  /// invariants that the templatized 'get' method cannot.
-  static DenseElementsAttr getRawIntOrFloat(ShapedType type,
-                                            ArrayRef<char> data,
-                                            int64_t dataEltSize, bool isInt);
-
-  /// Check the information for a c++ data type, check if this type is valid for
-  /// the current attribute. This method is used to verify specific type
-  /// invariants that the templatized 'getValues' method cannot.
-  bool isValidIntOrFloat(int64_t dataEltSize, bool isInt) const;
-};
-
-/// An attribute that represents a reference to a dense float vector or tensor
-/// object. Each element is stored as a double.
-class DenseFPElementsAttr : public DenseElementsAttr {
-public:
-  using iterator = DenseElementsAttr::FloatElementIterator;
-
-  using DenseElementsAttr::DenseElementsAttr;
-
-  /// Get an instance of a DenseFPElementsAttr with the given arguments. This
-  /// simply wraps the DenseElementsAttr::get calls.
-  template <typename Arg>
-  static DenseFPElementsAttr get(const ShapedType &type, Arg &&arg) {
-    return DenseElementsAttr::get(type, llvm::makeArrayRef(arg))
-        .template cast<DenseFPElementsAttr>();
-  }
-  template <typename T>
-  static DenseFPElementsAttr get(const ShapedType &type,
-                                 const std::initializer_list<T> &list) {
-    return DenseElementsAttr::get(type, list)
-        .template cast<DenseFPElementsAttr>();
-  }
-
-  /// Generates a new DenseElementsAttr by mapping each value attribute, and
-  /// constructing the DenseElementsAttr given the new element type.
-  DenseElementsAttr
-  mapValues(Type newElementType,
-            llvm::function_ref<APInt(const APFloat &)> mapping) const;
-
-  /// Iterator access to the float element values.
-  iterator begin() const { return float_value_begin(); }
-  iterator end() const { return float_value_end(); }
-
-  /// Method for supporting type inquiry through isa, cast and dyn_cast.
-  static bool classof(Attribute attr);
-};
-
-/// An attribute that represents a reference to a dense integer vector or tensor
-/// object.
-class DenseIntElementsAttr : public DenseElementsAttr {
-public:
-  /// DenseIntElementsAttr iterates on APInt, so we can use the raw element
-  /// iterator directly.
-  using iterator = DenseElementsAttr::IntElementIterator;
-
-  using DenseElementsAttr::DenseElementsAttr;
-
-  /// Get an instance of a DenseIntElementsAttr with the given arguments. This
-  /// simply wraps the DenseElementsAttr::get calls.
-  template <typename Arg>
-  static DenseIntElementsAttr get(const ShapedType &type, Arg &&arg) {
-    return DenseElementsAttr::get(type, llvm::makeArrayRef(arg))
-        .template cast<DenseIntElementsAttr>();
-  }
-  template <typename T>
-  static DenseIntElementsAttr get(const ShapedType &type,
-                                  const std::initializer_list<T> &list) {
-    return DenseElementsAttr::get(type, list)
-        .template cast<DenseIntElementsAttr>();
-  }
-
-  /// Generates a new DenseElementsAttr by mapping each value attribute, and
-  /// constructing the DenseElementsAttr given the new element type.
-  DenseElementsAttr
-  mapValues(Type newElementType,
-            llvm::function_ref<APInt(const APInt &)> mapping) const;
-
-  /// Iterator access to the integer element values.
-  iterator begin() const { return raw_int_begin(); }
-  iterator end() const { return raw_int_end(); }
-
-  /// Method for supporting type inquiry through isa, cast and dyn_cast.
-  static bool classof(Attribute attr);
-};
-
-/// An opaque attribute that represents a reference to a vector or tensor
-/// constant with opaque content. This representation is for tensor constants
-/// which the compiler may not need to interpret. This attribute is always
-/// associated with a particular dialect, which provides a method to convert
-/// tensor representation to a non-opaque format.
-class OpaqueElementsAttr
-    : public Attribute::AttrBase<OpaqueElementsAttr, ElementsAttr,
-                                 detail::OpaqueElementsAttributeStorage> {
-public:
-  using Base::Base;
-  using ValueType = StringRef;
-
-  static OpaqueElementsAttr get(Dialect *dialect, ShapedType type,
-                                StringRef bytes);
-
-  StringRef getValue() const;
-
-  /// Return the value at the given index. The 'index' is expected to refer to a
-  /// valid element.
-  Attribute getValue(ArrayRef<uint64_t> index) const;
-
-  /// Decodes the attribute value using dialect-specific decoding hook.
-  /// Returns false if decoding is successful. If not, returns true and leaves
-  /// 'result' argument unspecified.
-  bool decode(ElementsAttr &result);
-
-  /// Returns dialect associated with this opaque constant.
-  Dialect *getDialect() const;
-
-  /// Method for support type inquiry through isa, cast and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::OpaqueElements;
-  }
-};
-
-/// An attribute that represents a reference to a sparse vector or tensor
-/// object.
-///
-/// This class uses COO (coordinate list) encoding to represent the sparse
-/// elements in an element attribute. Specifically, the sparse vector/tensor
-/// stores the indices and values as two separate dense elements attributes of
-/// tensor type (even if the sparse attribute is of vector type, in order to
-/// support empty lists). The dense elements attribute indices is a 2-D tensor
-/// of 64-bit integer elements with shape [N, ndims], which specifies the
-/// indices of the elements in the sparse tensor that contains nonzero values.
-/// The dense elements attribute values is a 1-D tensor with shape [N], and it
-/// supplies the corresponding values for the indices.
-///
-/// For example,
-/// `sparse<tensor<3x4xi32>, [[0, 0], [1, 2]], [1, 5]>` represents tensor
-/// [[1, 0, 0, 0],
-///  [0, 0, 5, 0],
-///  [0, 0, 0, 0]].
-class SparseElementsAttr
-    : public Attribute::AttrBase<SparseElementsAttr, ElementsAttr,
-                                 detail::SparseElementsAttributeStorage> {
-public:
-  using Base::Base;
-
-  template <typename T>
-  using iterator =
-      llvm::mapped_iterator<llvm::detail::value_sequence_iterator<ptrdiff_t>,
-                            std::function<T(ptrdiff_t)>>;
-
-  /// 'type' must be a vector or tensor with static shape.
-  static SparseElementsAttr get(ShapedType type, DenseElementsAttr indices,
-                                DenseElementsAttr values);
-
-  DenseIntElementsAttr getIndices() const;
-
-  DenseElementsAttr getValues() const;
-
-  /// Return the values of this attribute in the form of the given type 'T'. 'T'
-  /// may be any of Attribute, APInt, APFloat, c++ integer/float types, etc.
-  template <typename T> llvm::iterator_range<iterator<T>> getValues() const {
-    auto zeroValue = getZeroValue<T>();
-    auto valueIt = getValues().getValues<T>().begin();
-    const std::vector<ptrdiff_t> flatSparseIndices(getFlattenedSparseIndices());
-    // TODO(riverriddle): Move-capture flatSparseIndices when c++14 is
-    // available.
-    std::function<T(ptrdiff_t)> mapFn = [=](ptrdiff_t index) {
-      // Try to map the current index to one of the sparse indices.
-      for (unsigned i = 0, e = flatSparseIndices.size(); i != e; ++i)
-        if (flatSparseIndices[i] == index)
-          return *std::next(valueIt, i);
-      // Otherwise, return the zero value.
-      return zeroValue;
-    };
-    return llvm::map_range(llvm::seq<ptrdiff_t>(0, getNumElements()), mapFn);
-  }
-
-  /// Return the value of the element at the given index. The 'index' is
-  /// expected to refer to a valid element.
-  Attribute getValue(ArrayRef<uint64_t> index) const;
-
-  /// Method for support type inquiry through isa, cast and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::SparseElements;
-  }
-
-private:
-  /// Get a zero APFloat for the given sparse attribute.
-  APFloat getZeroAPFloat() const;
-
-  /// Get a zero APInt for the given sparse attribute.
-  APInt getZeroAPInt() const;
-
-  /// Get a zero attribute for the given sparse attribute.
-  Attribute getZeroAttr() const;
-
-  /// Utility methods to generate a zero value of some type 'T'. This is used by
-  /// the 'iterator' class.
-  /// Get a zero for a given attribute type.
-  template <typename T>
-  typename std::enable_if<std::is_base_of<Attribute, T>::value, T>::type
-  getZeroValue() const {
-    return getZeroAttr().template cast<T>();
-  }
-  /// Get a zero for an APInt.
-  template <typename T>
-  typename std::enable_if<std::is_same<APInt, T>::value, T>::type
-  getZeroValue() const {
-    return getZeroAPInt();
-  }
-  /// Get a zero for an APFloat.
-  template <typename T>
-  typename std::enable_if<std::is_same<APFloat, T>::value, T>::type
-  getZeroValue() const {
-    return getZeroAPFloat();
-  }
-  /// Get a zero for an C++ integer or float type.
-  template <typename T>
-  typename std::enable_if<std::numeric_limits<T>::is_integer ||
-                              llvm::is_one_of<T, float, double>::value,
-                          T>::type
-  getZeroValue() const {
-    return T(0);
-  }
-
-  /// Flatten, and return, all of the sparse indices in this attribute in
-  /// row-major order.
-  std::vector<ptrdiff_t> getFlattenedSparseIndices() const;
-};
-
-/// An attribute that represents a reference to a splat vector or tensor
-/// constant, meaning all of the elements have the same value.
-class SplatElementsAttr : public DenseElementsAttr {
-public:
-  using DenseElementsAttr::DenseElementsAttr;
-
-  /// Method for support type inquiry through isa, cast and dyn_cast.
-  static bool classof(Attribute attr) {
-    auto denseAttr = attr.dyn_cast<DenseElementsAttr>();
-    return denseAttr && denseAttr.isSplat();
-  }
-};
-
-namespace detail {
-/// This class represents a general iterator over the values of an ElementsAttr.
-/// It supports all subclasses aside from OpaqueElementsAttr.
-template <typename T>
-class ElementsAttrIterator
-    : public llvm::iterator_facade_base<ElementsAttrIterator<T>,
-                                        std::random_access_iterator_tag, T,
-                                        std::ptrdiff_t, T, T> {
-  // NOTE: We use a dummy enable_if here because MSVC cannot use 'decltype'
-  // inside of a conversion operator.
-  using DenseIteratorT = typename std::enable_if<
-      true,
-      decltype(std::declval<DenseElementsAttr>().getValues<T>().begin())>::type;
-  using SparseIteratorT = SparseElementsAttr::iterator<T>;
-
-  /// A union containing the specific iterators for each derived attribute kind.
-  union Iterator {
-    Iterator(DenseIteratorT &&it) : denseIt(std::move(it)) {}
-    Iterator(SparseIteratorT &&it) : sparseIt(std::move(it)) {}
-    Iterator() {}
-    ~Iterator() {}
-
-    operator const DenseIteratorT &() const { return denseIt; }
-    operator const SparseIteratorT &() const { return sparseIt; }
-    operator DenseIteratorT &() { return denseIt; }
-    operator SparseIteratorT &() { return sparseIt; }
-
-    /// An instance of a dense elements iterator.
-    DenseIteratorT denseIt;
-    /// An instance of a sparse elements iterator.
-    SparseIteratorT sparseIt;
-  };
-
-  /// Utility method to process a functor on each of the internal iterator
-  /// types.
-  template <typename RetT, template <typename> class ProcessFn,
-            typename... Args>
-  RetT process(Args &... args) const {
-    switch (attrKind) {
-    case StandardAttributes::DenseElements:
-      return ProcessFn<DenseIteratorT>()(args...);
-    case StandardAttributes::SparseElements:
-      return ProcessFn<SparseIteratorT>()(args...);
-    }
-    llvm_unreachable("unexpected attribute kind");
-  }
-
-  /// Utility functors used to generically implement the iterators methods.
-  template <typename ItT> struct PlusAssign {
-    void operator()(ItT &it, ptrdiff_t offset) { it += offset; }
-  };
-  template <typename ItT> struct Minus {
-    ptrdiff_t operator()(const ItT &lhs, const ItT &rhs) { return lhs - rhs; }
-  };
-  template <typename ItT> struct MinusAssign {
-    void operator()(ItT &it, ptrdiff_t offset) { it -= offset; }
-  };
-  template <typename ItT> struct Dereference {
-    T operator()(ItT &it) { return *it; }
-  };
-  template <typename ItT> struct ConstructIter {
-    void operator()(ItT &dest, const ItT &it) { ::new (&dest) ItT(it); }
-  };
-  template <typename ItT> struct DestructIter {
-    void operator()(ItT &it) { it.~ItT(); }
-  };
-
-public:
-  ElementsAttrIterator(const ElementsAttrIterator<T> &rhs)
-      : attrKind(rhs.attrKind) {
-    process<void, ConstructIter>(it, rhs.it);
-  }
-  ~ElementsAttrIterator() { process<void, DestructIter>(it); }
-
-  /// Methods necessary to support random access iteration.
-  ptrdiff_t operator-(const ElementsAttrIterator<T> &rhs) const {
-    assert(attrKind == rhs.attrKind && "incompatible iterators");
-    return process<ptrdiff_t, Minus>(it, rhs.it);
-  }
-  bool operator==(const ElementsAttrIterator<T> &rhs) const {
-    return rhs.attrKind == attrKind && process<bool, std::equal_to>(it, rhs.it);
-  }
-  bool operator<(const ElementsAttrIterator<T> &rhs) const {
-    assert(attrKind == rhs.attrKind && "incompatible iterators");
-    return process<bool, std::less>(it, rhs.it);
-  }
-  ElementsAttrIterator<T> &operator+=(ptrdiff_t offset) {
-    process<void, PlusAssign>(it, offset);
-    return *this;
-  }
-  ElementsAttrIterator<T> &operator-=(ptrdiff_t offset) {
-    process<void, MinusAssign>(it, offset);
-    return *this;
-  }
-
-  /// Dereference the iterator at the current index.
-  T operator*() { return process<T, Dereference>(it); }
-
-private:
-  template <typename IteratorT>
-  ElementsAttrIterator(unsigned attrKind, IteratorT &&it)
-      : attrKind(attrKind), it(std::forward<IteratorT>(it)) {}
-
-  /// Allow accessing the constructor.
-  friend ElementsAttr;
-
-  /// The kind of derived elements attribute.
-  unsigned attrKind;
-
-  /// A union containing the specific iterators for each derived kind.
-  Iterator it;
-};
-
-template <typename T>
-class ElementsAttrRange : public llvm::iterator_range<ElementsAttrIterator<T>> {
-  using llvm::iterator_range<ElementsAttrIterator<T>>::iterator_range;
-};
-} // namespace detail
-
-/// Return the elements of this attribute as a value of type 'T'.
-template <typename T>
-auto ElementsAttr::getValues() const -> iterator_range<T> {
-  if (DenseElementsAttr denseAttr = dyn_cast<DenseElementsAttr>()) {
-    auto values = denseAttr.getValues<T>();
-    return {iterator<T>(getKind(), values.begin()),
-            iterator<T>(getKind(), values.end())};
-  }
-  if (SparseElementsAttr sparseAttr = dyn_cast<SparseElementsAttr>()) {
-    auto values = sparseAttr.getValues<T>();
-    return {iterator<T>(getKind(), values.begin()),
-            iterator<T>(getKind(), values.end())};
-  }
-  llvm_unreachable("unexpected attribute kind");
-}
-
-//===----------------------------------------------------------------------===//
-// Attributes Utils
-//===----------------------------------------------------------------------===//
-
-template <typename U> bool Attribute::isa() const {
-  assert(impl && "isa<> used on a null attribute.");
-  return U::classof(*this);
-}
-template <typename U> U Attribute::dyn_cast() const {
-  return isa<U>() ? U(impl) : U(nullptr);
-}
-template <typename U> U Attribute::dyn_cast_or_null() const {
-  return (impl && isa<U>()) ? U(impl) : U(nullptr);
-}
-template <typename U> U Attribute::cast() const {
-  assert(isa<U>());
-  return U(impl);
-}
-
-// Make Attribute hashable.
-inline ::llvm::hash_code hash_value(Attribute arg) {
-  return ::llvm::hash_value(arg.impl);
-}
-
-//===----------------------------------------------------------------------===//
-// NamedAttributeList
-//===----------------------------------------------------------------------===//
-
-/// A NamedAttributeList is used to manage a list of named attributes. This
-/// provides simple interfaces for adding/removing/finding attributes from
-/// within a DictionaryAttr.
-///
-/// We assume there will be relatively few attributes on a given operation
-/// (maybe a dozen or so, but not hundreds or thousands) so we use linear
-/// searches for everything.
-class NamedAttributeList {
-public:
-  NamedAttributeList(DictionaryAttr attrs = nullptr)
-      : attrs((attrs && !attrs.empty()) ? attrs : nullptr) {}
-  NamedAttributeList(ArrayRef<NamedAttribute> attributes);
-
-  bool operator!=(const NamedAttributeList &other) const {
-    return !(*this == other);
-  }
-  bool operator==(const NamedAttributeList &other) const {
-    return attrs == other.attrs;
-  }
-
-  /// Return the underlying dictionary attribute. This may be null, if this list
-  /// has no attributes.
-  DictionaryAttr getDictionary() const { return attrs; }
-
-  /// Return all of the attributes on this operation.
-  ArrayRef<NamedAttribute> getAttrs() const;
-
-  /// Replace the held attributes with ones provided in 'newAttrs'.
-  void setAttrs(ArrayRef<NamedAttribute> attributes);
-
-  /// Return the specified attribute if present, null otherwise.
-  Attribute get(StringRef name) const;
-  Attribute get(Identifier name) const;
-
-  /// If the an attribute exists with the specified name, change it to the new
-  /// value.  Otherwise, add a new attribute with the specified name/value.
-  void set(Identifier name, Attribute value);
-
-  enum class RemoveResult { Removed, NotFound };
-
-  /// Remove the attribute with the specified name if it exists.  The return
-  /// value indicates whether the attribute was present or not.
-  RemoveResult remove(Identifier name);
-
-private:
-  DictionaryAttr attrs;
-};
-
-} // end namespace mlir.
-
-namespace llvm {
-
-// Attribute hash just like pointers.
-template <> struct DenseMapInfo<mlir::Attribute> {
-  static mlir::Attribute getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::Attribute(static_cast<mlir::Attribute::ImplType *>(pointer));
-  }
-  static mlir::Attribute getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::Attribute(static_cast<mlir::Attribute::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::Attribute val) {
-    return mlir::hash_value(val);
-  }
-  static bool isEqual(mlir::Attribute LHS, mlir::Attribute RHS) {
-    return LHS == RHS;
-  }
-};
-
-/// Allow LLVM to steal the low bits of Attributes.
-template <> struct PointerLikeTypeTraits<mlir::Attribute> {
-  static inline void *getAsVoidPointer(mlir::Attribute attr) {
-    return const_cast<void *>(attr.getAsOpaquePointer());
-  }
-  static inline mlir::Attribute getFromVoidPointer(void *ptr) {
-    return mlir::Attribute::getFromOpaquePointer(ptr);
-  }
-  enum { NumLowBitsAvailable = 3 };
-};
-
-template <>
-struct PointerLikeTypeTraits<mlir::SymbolRefAttr>
-    : public PointerLikeTypeTraits<mlir::Attribute> {
-  static inline mlir::SymbolRefAttr getFromVoidPointer(void *ptr) {
-    return PointerLikeTypeTraits<mlir::Attribute>::getFromVoidPointer(ptr)
-        .cast<mlir::SymbolRefAttr>();
-  }
-};
-
-} // namespace llvm
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/Block.h b/third_party/mlir/include/mlir/IR/Block.h
deleted file mode 100644
index a36ecdd9e37..00000000000
--- a/third_party/mlir/include/mlir/IR/Block.h
+++ /dev/null
@@ -1,340 +0,0 @@
-//===- Block.h - MLIR Block Class -------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the Block class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_BLOCK_H
-#define MLIR_IR_BLOCK_H
-
-#include "mlir/IR/BlockSupport.h"
-#include "mlir/IR/Visitors.h"
-
-namespace mlir {
-/// `Block` represents an ordered list of `Operation`s.
-class Block : public IRObjectWithUseList,
-              public llvm::ilist_node_with_parent<Block, Region> {
-public:
-  explicit Block() {}
-  ~Block();
-
-  void clear() {
-    // Drop all references from within this block.
-    dropAllReferences();
-
-    // Clear operations in the reverse order so that uses are destroyed
-    // before their defs.
-    while (!empty())
-      operations.pop_back();
-  }
-
-  /// Provide a 'getParent' method for ilist_node_with_parent methods.
-  /// We mark it as a const function because ilist_node_with_parent specifically
-  /// requires a 'getParent() const' method. Once ilist_node removes this
-  /// constraint, we should drop the const to fit the rest of the MLIR const
-  /// model.
-  Region *getParent() const;
-
-  /// Returns the closest surrounding operation that contains this block.
-  Operation *getParentOp();
-
-  /// Return if this block is the entry block in the parent region.
-  bool isEntryBlock();
-
-  /// Insert this block (which must not already be in a function) right before
-  /// the specified block.
-  void insertBefore(Block *block);
-
-  /// Unlink this Block from its parent region and delete it.
-  void erase();
-
-  //===--------------------------------------------------------------------===//
-  // Block argument management
-  //===--------------------------------------------------------------------===//
-
-  // This is the list of arguments to the block.
-  using BlockArgListType = ArrayRef<BlockArgument *>;
-
-  BlockArgListType getArguments() { return arguments; }
-
-  using args_iterator = BlockArgListType::iterator;
-  using reverse_args_iterator = BlockArgListType::reverse_iterator;
-  args_iterator args_begin() { return getArguments().begin(); }
-  args_iterator args_end() { return getArguments().end(); }
-  reverse_args_iterator args_rbegin() { return getArguments().rbegin(); }
-  reverse_args_iterator args_rend() { return getArguments().rend(); }
-
-  bool args_empty() { return arguments.empty(); }
-
-  /// Add one value to the argument list.
-  BlockArgument *addArgument(Type type);
-
-  /// Add one argument to the argument list for each type specified in the list.
-  llvm::iterator_range<args_iterator> addArguments(ArrayRef<Type> types);
-
-  /// Erase the argument at 'index' and remove it from the argument list. If
-  /// 'updatePredTerms' is set to true, this argument is also removed from the
-  /// terminators of each predecessor to this block.
-  void eraseArgument(unsigned index, bool updatePredTerms = true);
-
-  unsigned getNumArguments() { return arguments.size(); }
-  BlockArgument *getArgument(unsigned i) { return arguments[i]; }
-
-  //===--------------------------------------------------------------------===//
-  // Operation list management
-  //===--------------------------------------------------------------------===//
-
-  /// This is the list of operations in the block.
-  using OpListType = llvm::iplist<Operation>;
-  OpListType &getOperations() { return operations; }
-
-  // Iteration over the operations in the block.
-  using iterator = OpListType::iterator;
-  using reverse_iterator = OpListType::reverse_iterator;
-
-  iterator begin() { return operations.begin(); }
-  iterator end() { return operations.end(); }
-  reverse_iterator rbegin() { return operations.rbegin(); }
-  reverse_iterator rend() { return operations.rend(); }
-
-  bool empty() { return operations.empty(); }
-  void push_back(Operation *op) { operations.push_back(op); }
-  void push_front(Operation *op) { operations.push_front(op); }
-
-  Operation &back() { return operations.back(); }
-  Operation &front() { return operations.front(); }
-
-  /// Returns 'op' if 'op' lies in this block, or otherwise finds the
-  /// ancestor operation of 'op' that lies in this block. Returns nullptr if
-  /// the latter fails.
-  /// TODO: This is very specific functionality that should live somewhere else,
-  /// probably in Dominance.cpp.
-  Operation *findAncestorOpInBlock(Operation &op);
-
-  /// This drops all operand uses from operations within this block, which is
-  /// an essential step in breaking cyclic dependences between references when
-  /// they are to be deleted.
-  void dropAllReferences();
-
-  /// This drops all uses of values defined in this block or in the blocks of
-  /// nested regions wherever the uses are located.
-  void dropAllDefinedValueUses();
-
-  /// Returns true if the ordering of the child operations is valid, false
-  /// otherwise.
-  bool isOpOrderValid();
-
-  /// Invalidates the current ordering of operations.
-  void invalidateOpOrder();
-
-  /// Verifies the current ordering of child operations matches the
-  /// validOpOrder flag. Returns false if the order is valid, true otherwise.
-  bool verifyOpOrder();
-
-  /// Recomputes the ordering of child operations within the block.
-  void recomputeOpOrder();
-
-private:
-  /// A utility iterator that filters out operations that are not 'OpT'.
-  template <typename OpT>
-  class op_filter_iterator
-      : public llvm::filter_iterator<Block::iterator, bool (*)(Operation &)> {
-    static bool filter(Operation &op) { return llvm::isa<OpT>(op); }
-
-  public:
-    op_filter_iterator(Block::iterator it, Block::iterator end)
-        : llvm::filter_iterator<Block::iterator, bool (*)(Operation &)>(
-              it, end, &filter) {}
-
-    /// Allow implicit conversion to the underlying block iterator.
-    operator Block::iterator() const { return this->wrapped(); }
-  };
-
-public:
-  /// This class provides iteration over the held operations of a block for a
-  /// specific operation type.
-  template <typename OpT>
-  class op_iterator : public llvm::mapped_iterator<op_filter_iterator<OpT>,
-                                                   OpT (*)(Operation &)> {
-    static OpT unwrap(Operation &op) { return llvm::cast<OpT>(op); }
-
-  public:
-    using reference = OpT;
-
-    /// Initializes the iterator to the specified filter iterator.
-    op_iterator(op_filter_iterator<OpT> it)
-        : llvm::mapped_iterator<op_filter_iterator<OpT>, OpT (*)(Operation &)>(
-              it, &unwrap) {}
-
-    /// Allow implicit conversion to the underlying block iterator.
-    operator Block::iterator() const { return this->wrapped(); }
-  };
-
-  /// Return an iterator range over the operations within this block that are of
-  /// 'OpT'.
-  template <typename OpT> llvm::iterator_range<op_iterator<OpT>> getOps() {
-    auto endIt = end();
-    return {op_filter_iterator<OpT>(begin(), endIt),
-            op_filter_iterator<OpT>(endIt, endIt)};
-  }
-  template <typename OpT> op_iterator<OpT> op_begin() {
-    return op_filter_iterator<OpT>(begin(), end());
-  }
-  template <typename OpT> op_iterator<OpT> op_end() {
-    return op_filter_iterator<OpT>(end(), end());
-  }
-
-  /// Return an iterator range over the operation within this block excluding
-  /// the terminator operation at the end.
-  llvm::iterator_range<iterator> without_terminator() {
-    if (begin() == end())
-      return {begin(), end()};
-    auto endIt = --end();
-    return {begin(), endIt};
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Terminator management
-  //===--------------------------------------------------------------------===//
-
-  /// Get the terminator operation of this block. This function asserts that
-  /// the block has a valid terminator operation.
-  Operation *getTerminator();
-
-  //===--------------------------------------------------------------------===//
-  // Predecessors and successors.
-  //===--------------------------------------------------------------------===//
-
-  // Predecessor iteration.
-  using pred_iterator = PredecessorIterator;
-  pred_iterator pred_begin() {
-    return pred_iterator((BlockOperand *)getFirstUse());
-  }
-  pred_iterator pred_end() { return pred_iterator(nullptr); }
-  llvm::iterator_range<pred_iterator> getPredecessors() {
-    return {pred_begin(), pred_end()};
-  }
-
-  /// Return true if this block has no predecessors.
-  bool hasNoPredecessors();
-
-  /// If this block has exactly one predecessor, return it.  Otherwise, return
-  /// null.
-  ///
-  /// Note that if a block has duplicate predecessors from a single block (e.g.
-  /// if you have a conditional branch with the same block as the true/false
-  /// destinations) is not considered to be a single predecessor.
-  Block *getSinglePredecessor();
-
-  // Indexed successor access.
-  unsigned getNumSuccessors();
-  Block *getSuccessor(unsigned i);
-
-  // Successor iteration.
-  using succ_iterator = SuccessorRange::iterator;
-  succ_iterator succ_begin() { return getSuccessors().begin(); }
-  succ_iterator succ_end() { return getSuccessors().end(); }
-  SuccessorRange getSuccessors() { return SuccessorRange(this); }
-
-  //===--------------------------------------------------------------------===//
-  // Operation Walkers
-  //===--------------------------------------------------------------------===//
-
-  /// Walk the operations in this block in postorder, calling the callback for
-  /// each operation.
-  /// See Operation::walk for more details.
-  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
-  RetT walk(FnT &&callback) {
-    return walk(begin(), end(), std::forward<FnT>(callback));
-  }
-
-  /// Walk the operations in the specified [begin, end) range of this block in
-  /// postorder, calling the callback for each operation. This method is invoked
-  /// for void return callbacks.
-  /// See Operation::walk for more details.
-  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
-  typename std::enable_if<std::is_same<RetT, void>::value, RetT>::type
-  walk(Block::iterator begin, Block::iterator end, FnT &&callback) {
-    for (auto &op : llvm::make_early_inc_range(llvm::make_range(begin, end)))
-      detail::walkOperations(&op, callback);
-  }
-
-  /// Walk the operations in the specified [begin, end) range of this block in
-  /// postorder, calling the callback for each operation. This method is invoked
-  /// for interruptible callbacks.
-  /// See Operation::walk for more details.
-  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
-  typename std::enable_if<std::is_same<RetT, WalkResult>::value, RetT>::type
-  walk(Block::iterator begin, Block::iterator end, FnT &&callback) {
-    for (auto &op : llvm::make_early_inc_range(llvm::make_range(begin, end)))
-      if (detail::walkOperations(&op, callback).wasInterrupted())
-        return WalkResult::interrupt();
-    return WalkResult::advance();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Other
-  //===--------------------------------------------------------------------===//
-
-  /// Split the block into two blocks before the specified operation or
-  /// iterator.
-  ///
-  /// Note that all operations BEFORE the specified iterator stay as part of
-  /// the original basic block, and the rest of the operations in the original
-  /// block are moved to the new block, including the old terminator.  The
-  /// original block is left without a terminator.
-  ///
-  /// The newly formed Block is returned, and the specified iterator is
-  /// invalidated.
-  Block *splitBlock(iterator splitBefore);
-  Block *splitBlock(Operation *splitBeforeOp) {
-    return splitBlock(iterator(splitBeforeOp));
-  }
-
-  /// Returns pointer to member of operation list.
-  static OpListType Block::*getSublistAccess(Operation *) {
-    return &Block::operations;
-  }
-
-  void print(raw_ostream &os);
-  void dump();
-
-  /// Print out the name of the block without printing its body.
-  /// NOTE: The printType argument is ignored.  We keep it for compatibility
-  /// with LLVM dominator machinery that expects it to exist.
-  void printAsOperand(raw_ostream &os, bool printType = true);
-
-private:
-  /// Pair of the parent object that owns this block and a bit that signifies if
-  /// the operations within this block have a valid ordering.
-  llvm::PointerIntPair<Region *, /*IntBits=*/1, bool> parentValidOpOrderPair;
-
-  /// This is the list of operations in the block.
-  OpListType operations;
-
-  /// This is the list of arguments to the block.
-  std::vector<BlockArgument *> arguments;
-
-  Block(Block &) = delete;
-  void operator=(Block &) = delete;
-
-  friend struct llvm::ilist_traits<Block>;
-};
-} // end namespace mlir
-
-#endif // MLIR_IR_BLOCK_H
diff --git a/third_party/mlir/include/mlir/IR/BlockAndValueMapping.h b/third_party/mlir/include/mlir/IR/BlockAndValueMapping.h
deleted file mode 100644
index bd69aa2c07f..00000000000
--- a/third_party/mlir/include/mlir/IR/BlockAndValueMapping.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===- BlockAndValueMapping.h -----------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a utility class for maintaining a mapping for multiple
-// value types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_BLOCKANDVALUEMAPPING_H
-#define MLIR_IR_BLOCKANDVALUEMAPPING_H
-
-#include "mlir/IR/Block.h"
-
-namespace mlir {
-// This is a utility class for mapping one set of values to another. New
-// mappings can be inserted via 'map'. Existing mappings can be
-// found via the 'lookup*' functions. There are two variants that differ only in
-// return value when an existing is not found for the provided key.
-// 'lookupOrNull' returns nullptr where as 'lookupOrDefault' will return the
-// lookup key.
-class BlockAndValueMapping {
-public:
-  /// Inserts a new mapping for 'from' to 'to'. If there is an existing mapping,
-  /// it is overwritten.
-  void map(Block *from, Block *to) { valueMap[from] = to; }
-  void map(Value *from, Value *to) { valueMap[from] = to; }
-
-  /// Erases a mapping for 'from'.
-  void erase(IRObjectWithUseList *from) { valueMap.erase(from); }
-
-  /// Checks to see if a mapping for 'from' exists.
-  bool contains(IRObjectWithUseList *from) const {
-    return valueMap.count(from);
-  }
-
-  /// Lookup a mapped value within the map. If a mapping for the provided value
-  /// does not exist then return nullptr.
-  Block *lookupOrNull(Block *from) const {
-    return lookupOrValue(from, (Block *)nullptr);
-  }
-  Value *lookupOrNull(Value *from) const {
-    return lookupOrValue(from, (Value *)nullptr);
-  }
-
-  /// Lookup a mapped value within the map. If a mapping for the provided value
-  /// does not exist then return the provided value.
-  Block *lookupOrDefault(Block *from) const {
-    return lookupOrValue(from, from);
-  }
-  Value *lookupOrDefault(Value *from) const {
-    return lookupOrValue(from, from);
-  }
-
-  /// Lookup a mapped value within the map. This asserts the provided value
-  /// exists within the map.
-  template <typename T> T *lookup(T *from) const {
-    auto *result = lookupOrNull(from);
-    assert(result && "expected 'from' to be contained within the map");
-    return result;
-  }
-
-  /// Clears all mappings held by the mapper.
-  void clear() { valueMap.clear(); }
-
-private:
-  /// Utility lookupOrValue that looks up an existing key or returns the
-  /// provided value. This function assumes that if a mapping does exist, then
-  /// it is of 'T' type.
-  template <typename T> T *lookupOrValue(T *from, T *value) const {
-    auto it = valueMap.find(from);
-    return it != valueMap.end() ? static_cast<T *>(it->second) : value;
-  }
-
-  llvm::DenseMap<IRObjectWithUseList *, IRObjectWithUseList *> valueMap;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_IR_BLOCKANDVALUEMAPPING_H
diff --git a/third_party/mlir/include/mlir/IR/BlockSupport.h b/third_party/mlir/include/mlir/IR/BlockSupport.h
deleted file mode 100644
index fd30c36aaa3..00000000000
--- a/third_party/mlir/include/mlir/IR/BlockSupport.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//===- BlockSupport.h -------------------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a number of support types for the Block class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_BLOCK_SUPPORT_H
-#define MLIR_IR_BLOCK_SUPPORT_H
-
-#include "mlir/IR/Value.h"
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
-
-namespace mlir {
-class Block;
-
-using BlockOperand = IROperandImpl<Block>;
-
-//===----------------------------------------------------------------------===//
-// Predecessors
-//===----------------------------------------------------------------------===//
-
-/// Implement a predecessor iterator for blocks. This works by walking the use
-/// lists of the blocks. The entries on this list are the BlockOperands that
-/// are embedded into terminator operations. From the operand, we can get the
-/// terminator that contains it, and its parent block is the predecessor.
-class PredecessorIterator final
-    : public llvm::mapped_iterator<ValueUseIterator<BlockOperand>,
-                                   Block *(*)(BlockOperand &)> {
-  static Block *unwrap(BlockOperand &value);
-
-public:
-  using reference = Block *;
-
-  /// Initializes the operand type iterator to the specified operand iterator.
-  PredecessorIterator(ValueUseIterator<BlockOperand> it)
-      : llvm::mapped_iterator<ValueUseIterator<BlockOperand>,
-                              Block *(*)(BlockOperand &)>(it, &unwrap) {}
-  explicit PredecessorIterator(BlockOperand *operand)
-      : PredecessorIterator(ValueUseIterator<BlockOperand>(operand)) {}
-
-  /// Get the successor number in the predecessor terminator.
-  unsigned getSuccessorIndex() const;
-};
-
-//===----------------------------------------------------------------------===//
-// Successors
-//===----------------------------------------------------------------------===//
-
-/// This class implements the successor iterators for Block.
-class SuccessorRange final
-    : public detail::indexed_accessor_range_base<SuccessorRange, BlockOperand *,
-                                                 Block *, Block *, Block *> {
-public:
-  using RangeBaseT::RangeBaseT;
-  SuccessorRange(Block *block);
-
-private:
-  /// See `detail::indexed_accessor_range_base` for details.
-  static BlockOperand *offset_base(BlockOperand *object, ptrdiff_t index) {
-    return object + index;
-  }
-  /// See `detail::indexed_accessor_range_base` for details.
-  static Block *dereference_iterator(BlockOperand *object, ptrdiff_t index) {
-    return object[index].get();
-  }
-
-  /// Allow access to `offset_base` and `dereference_iterator`.
-  friend RangeBaseT;
-};
-
-} // end namespace mlir
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// ilist_traits for Operation
-//===----------------------------------------------------------------------===//
-
-namespace ilist_detail {
-// Explicitly define the node access for the operation list so that we can
-// break the dependence on the Operation class in this header. This allows for
-// operations to have trailing Regions without a circular include
-// dependence.
-template <>
-struct SpecificNodeAccess<
-    typename compute_node_options<::mlir::Operation>::type> : NodeAccess {
-protected:
-  using OptionsT = typename compute_node_options<mlir::Operation>::type;
-  using pointer = typename OptionsT::pointer;
-  using const_pointer = typename OptionsT::const_pointer;
-  using node_type = ilist_node_impl<OptionsT>;
-
-  static node_type *getNodePtr(pointer N);
-  static const node_type *getNodePtr(const_pointer N);
-
-  static pointer getValuePtr(node_type *N);
-  static const_pointer getValuePtr(const node_type *N);
-};
-} // end namespace ilist_detail
-
-template <> struct ilist_traits<::mlir::Operation> {
-  using Operation = ::mlir::Operation;
-  using op_iterator = simple_ilist<Operation>::iterator;
-
-  static void deleteNode(Operation *op);
-  void addNodeToList(Operation *op);
-  void removeNodeFromList(Operation *op);
-  void transferNodesFromList(ilist_traits<Operation> &otherList,
-                             op_iterator first, op_iterator last);
-
-private:
-  mlir::Block *getContainingBlock();
-};
-
-//===----------------------------------------------------------------------===//
-// ilist_traits for Block
-//===----------------------------------------------------------------------===//
-
-template <>
-struct ilist_traits<::mlir::Block> : public ilist_alloc_traits<::mlir::Block> {
-  using Block = ::mlir::Block;
-  using block_iterator = simple_ilist<::mlir::Block>::iterator;
-
-  void addNodeToList(Block *block);
-  void removeNodeFromList(Block *block);
-  void transferNodesFromList(ilist_traits<Block> &otherList,
-                             block_iterator first, block_iterator last);
-
-private:
-  mlir::Region *getParentRegion();
-};
-
-} // end namespace llvm
-
-#endif // MLIR_IR_BLOCK_SUPPORT_H
diff --git a/third_party/mlir/include/mlir/IR/Builders.h b/third_party/mlir/include/mlir/IR/Builders.h
deleted file mode 100644
index 766902fabfa..00000000000
--- a/third_party/mlir/include/mlir/IR/Builders.h
+++ /dev/null
@@ -1,390 +0,0 @@
-//===- Builders.h - Helpers for constructing MLIR Classes -------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_BUILDERS_H
-#define MLIR_IR_BUILDERS_H
-
-#include "mlir/IR/OpDefinition.h"
-
-namespace mlir {
-
-class AffineExpr;
-class BlockAndValueMapping;
-class ModuleOp;
-class UnknownLoc;
-class FileLineColLoc;
-class Type;
-class PrimitiveType;
-class IntegerType;
-class FunctionType;
-class MemRefType;
-class VectorType;
-class RankedTensorType;
-class UnrankedTensorType;
-class TupleType;
-class NoneType;
-class BoolAttr;
-class IntegerAttr;
-class FloatAttr;
-class StringAttr;
-class TypeAttr;
-class ArrayAttr;
-class SymbolRefAttr;
-class ElementsAttr;
-class DenseElementsAttr;
-class DenseIntElementsAttr;
-class AffineMapAttr;
-class AffineMap;
-class UnitAttr;
-
-/// This class is a general helper class for creating context-global objects
-/// like types, attributes, and affine expressions.
-class Builder {
-public:
-  explicit Builder(MLIRContext *context) : context(context) {}
-  explicit Builder(ModuleOp module);
-
-  MLIRContext *getContext() const { return context; }
-
-  Identifier getIdentifier(StringRef str);
-
-  // Locations.
-  Location getUnknownLoc();
-  Location getFileLineColLoc(Identifier filename, unsigned line,
-                             unsigned column);
-  Location getFusedLoc(ArrayRef<Location> locs,
-                       Attribute metadata = Attribute());
-
-  // Types.
-  FloatType getBF16Type();
-  FloatType getF16Type();
-  FloatType getF32Type();
-  FloatType getF64Type();
-
-  IndexType getIndexType();
-
-  IntegerType getI1Type();
-  IntegerType getIntegerType(unsigned width);
-  FunctionType getFunctionType(ArrayRef<Type> inputs, ArrayRef<Type> results);
-  TupleType getTupleType(ArrayRef<Type> elementTypes);
-  NoneType getNoneType();
-
-  /// Get or construct an instance of the type 'ty' with provided arguments.
-  template <typename Ty, typename... Args> Ty getType(Args... args) {
-    return Ty::get(context, args...);
-  }
-
-  // Attributes.
-  NamedAttribute getNamedAttr(StringRef name, Attribute val);
-
-  UnitAttr getUnitAttr();
-  BoolAttr getBoolAttr(bool value);
-  DictionaryAttr getDictionaryAttr(ArrayRef<NamedAttribute> value);
-  IntegerAttr getIntegerAttr(Type type, int64_t value);
-  IntegerAttr getIntegerAttr(Type type, const APInt &value);
-  FloatAttr getFloatAttr(Type type, double value);
-  FloatAttr getFloatAttr(Type type, const APFloat &value);
-  StringAttr getStringAttr(StringRef bytes);
-  ArrayAttr getArrayAttr(ArrayRef<Attribute> value);
-  FlatSymbolRefAttr getSymbolRefAttr(Operation *value);
-  FlatSymbolRefAttr getSymbolRefAttr(StringRef value);
-  SymbolRefAttr getSymbolRefAttr(StringRef value,
-                                 ArrayRef<FlatSymbolRefAttr> nestedReferences);
-
-  // Returns a 0-valued attribute of the given `type`. This function only
-  // supports boolean, integer, and 16-/32-/64-bit float types, and vector or
-  // ranked tensor of them. Returns null attribute otherwise.
-  Attribute getZeroAttr(Type type);
-
-  // Convenience methods for fixed types.
-  FloatAttr getF16FloatAttr(float value);
-  FloatAttr getF32FloatAttr(float value);
-  FloatAttr getF64FloatAttr(double value);
-
-  IntegerAttr getI8IntegerAttr(int8_t value);
-  IntegerAttr getI16IntegerAttr(int16_t value);
-  IntegerAttr getI32IntegerAttr(int32_t value);
-  IntegerAttr getI64IntegerAttr(int64_t value);
-
-  DenseIntElementsAttr getI32VectorAttr(ArrayRef<int32_t> values);
-
-  ArrayAttr getAffineMapArrayAttr(ArrayRef<AffineMap> values);
-  ArrayAttr getI32ArrayAttr(ArrayRef<int32_t> values);
-  ArrayAttr getI64ArrayAttr(ArrayRef<int64_t> values);
-  ArrayAttr getIndexArrayAttr(ArrayRef<int64_t> values);
-  ArrayAttr getF32ArrayAttr(ArrayRef<float> values);
-  ArrayAttr getF64ArrayAttr(ArrayRef<double> values);
-  ArrayAttr getStrArrayAttr(ArrayRef<StringRef> values);
-
-  // Affine expressions and affine maps.
-  AffineExpr getAffineDimExpr(unsigned position);
-  AffineExpr getAffineSymbolExpr(unsigned position);
-  AffineExpr getAffineConstantExpr(int64_t constant);
-
-  // Special cases of affine maps and integer sets
-  /// Returns a zero result affine map with no dimensions or symbols: () -> ().
-  AffineMap getEmptyAffineMap();
-  /// Returns a single constant result affine map with 0 dimensions and 0
-  /// symbols.  One constant result: () -> (val).
-  AffineMap getConstantAffineMap(int64_t val);
-  // One dimension id identity map: (i) -> (i).
-  AffineMap getDimIdentityMap();
-  // Multi-dimensional identity map: (d0, d1, d2) -> (d0, d1, d2).
-  AffineMap getMultiDimIdentityMap(unsigned rank);
-  // One symbol identity map: ()[s] -> (s).
-  AffineMap getSymbolIdentityMap();
-
-  /// Returns a map that shifts its (single) input dimension by 'shift'.
-  /// (d0) -> (d0 + shift)
-  AffineMap getSingleDimShiftAffineMap(int64_t shift);
-
-  /// Returns an affine map that is a translation (shift) of all result
-  /// expressions in 'map' by 'shift'.
-  /// Eg: input: (d0, d1)[s0] -> (d0, d1 + s0), shift = 2
-  ///   returns:    (d0, d1)[s0] -> (d0 + 2, d1 + s0 + 2)
-  AffineMap getShiftedAffineMap(AffineMap map, int64_t shift);
-
-protected:
-  MLIRContext *context;
-};
-
-/// This class helps build Operations. Operations that are created are
-/// automatically inserted at an insertion point. The builder is copyable.
-class OpBuilder : public Builder {
-public:
-  /// Create a builder with the given context.
-  explicit OpBuilder(MLIRContext *ctx) : Builder(ctx) {}
-
-  /// Create a builder and set the insertion point to the start of the region.
-  explicit OpBuilder(Region *region) : Builder(region->getContext()) {
-    if (!region->empty())
-      setInsertionPoint(&region->front(), region->front().begin());
-  }
-  explicit OpBuilder(Region &region) : OpBuilder(&region) {}
-
-  virtual ~OpBuilder();
-
-  /// Create a builder and set insertion point to the given operation, which
-  /// will cause subsequent insertions to go right before it.
-  explicit OpBuilder(Operation *op) : Builder(op->getContext()) {
-    setInsertionPoint(op);
-  }
-
-  explicit OpBuilder(Block *block) : OpBuilder(block, block->end()) {}
-
-  OpBuilder(Block *block, Block::iterator insertPoint)
-      : OpBuilder(block->getParent()) {
-    setInsertionPoint(block, insertPoint);
-  }
-
-  /// This class represents a saved insertion point.
-  class InsertPoint {
-  public:
-    /// Creates a new insertion point which doesn't point to anything.
-    InsertPoint() = default;
-
-    /// Creates a new insertion point at the given location.
-    InsertPoint(Block *insertBlock, Block::iterator insertPt)
-        : block(insertBlock), point(insertPt) {}
-
-    /// Returns true if this insert point is set.
-    bool isSet() const { return (block != nullptr); }
-
-    Block *getBlock() const { return block; }
-    Block::iterator getPoint() const { return point; }
-
-  private:
-    Block *block = nullptr;
-    Block::iterator point;
-  };
-
-  /// RAII guard to reset the insertion point of the builder when destroyed.
-  class InsertionGuard {
-  public:
-    InsertionGuard(OpBuilder &builder)
-        : builder(builder), ip(builder.saveInsertionPoint()) {}
-    ~InsertionGuard() { builder.restoreInsertionPoint(ip); }
-
-  private:
-    OpBuilder &builder;
-    OpBuilder::InsertPoint ip;
-  };
-
-  /// Reset the insertion point to no location.  Creating an operation without a
-  /// set insertion point is an error, but this can still be useful when the
-  /// current insertion point a builder refers to is being removed.
-  void clearInsertionPoint() {
-    this->block = nullptr;
-    insertPoint = Block::iterator();
-  }
-
-  /// Return a saved insertion point.
-  InsertPoint saveInsertionPoint() const {
-    return InsertPoint(getInsertionBlock(), getInsertionPoint());
-  }
-
-  /// Restore the insert point to a previously saved point.
-  void restoreInsertionPoint(InsertPoint ip) {
-    if (ip.isSet())
-      setInsertionPoint(ip.getBlock(), ip.getPoint());
-    else
-      clearInsertionPoint();
-  }
-
-  /// Set the insertion point to the specified location.
-  void setInsertionPoint(Block *block, Block::iterator insertPoint) {
-    // TODO: check that insertPoint is in this rather than some other block.
-    this->block = block;
-    this->insertPoint = insertPoint;
-  }
-
-  /// Sets the insertion point to the specified operation, which will cause
-  /// subsequent insertions to go right before it.
-  void setInsertionPoint(Operation *op) {
-    setInsertionPoint(op->getBlock(), Block::iterator(op));
-  }
-
-  /// Sets the insertion point to the node after the specified operation, which
-  /// will cause subsequent insertions to go right after it.
-  void setInsertionPointAfter(Operation *op) {
-    setInsertionPoint(op->getBlock(), ++Block::iterator(op));
-  }
-
-  /// Sets the insertion point to the start of the specified block.
-  void setInsertionPointToStart(Block *block) {
-    setInsertionPoint(block, block->begin());
-  }
-
-  /// Sets the insertion point to the end of the specified block.
-  void setInsertionPointToEnd(Block *block) {
-    setInsertionPoint(block, block->end());
-  }
-
-  /// Return the block the current insertion point belongs to.  Note that the
-  /// the insertion point is not necessarily the end of the block.
-  Block *getInsertionBlock() const { return block; }
-
-  /// Returns the current insertion point of the builder.
-  Block::iterator getInsertionPoint() const { return insertPoint; }
-
-  /// Insert the given operation at the current insertion point and return it.
-  virtual Operation *insert(Operation *op);
-
-  /// Add new block and set the insertion point to the end of it. The block is
-  /// inserted at the provided insertion point of 'parent'.
-  Block *createBlock(Region *parent, Region::iterator insertPt = {});
-
-  /// Add new block and set the insertion point to the end of it. The block is
-  /// placed before 'insertBefore'.
-  Block *createBlock(Block *insertBefore);
-
-  /// Returns the current block of the builder.
-  Block *getBlock() const { return block; }
-
-  /// Creates an operation given the fields represented as an OperationState.
-  Operation *createOperation(const OperationState &state);
-
-  /// Create an operation of specific op type at the current insertion point.
-  template <typename OpTy, typename... Args>
-  OpTy create(Location location, Args &&... args) {
-    OperationState state(location, OpTy::getOperationName());
-    OpTy::build(this, state, std::forward<Args>(args)...);
-    auto *op = createOperation(state);
-    auto result = dyn_cast<OpTy>(op);
-    assert(result && "Builder didn't return the right type");
-    return result;
-  }
-
-  /// Create an operation of specific op type at the current insertion point,
-  /// and immediately try to fold it. This functions populates 'results' with
-  /// the results after folding the operation.
-  template <typename OpTy, typename... Args>
-  void createOrFold(SmallVectorImpl<Value *> &results, Location location,
-                    Args &&... args) {
-    // Create the operation without using 'createOperation' as we don't want to
-    // insert it yet.
-    OperationState state(location, OpTy::getOperationName());
-    OpTy::build(this, state, std::forward<Args>(args)...);
-    Operation *op = Operation::create(state);
-
-    // Fold the operation. If successful destroy it, otherwise insert it.
-    if (succeeded(tryFold(op, results)))
-      op->destroy();
-    else
-      insert(op);
-  }
-
-  /// Overload to create or fold a single result operation.
-  template <typename OpTy, typename... Args>
-  typename std::enable_if<OpTy::template hasTrait<OpTrait::OneResult>(),
-                          Value *>::type
-  createOrFold(Location location, Args &&... args) {
-    SmallVector<Value *, 1> results;
-    createOrFold<OpTy>(results, location, std::forward<Args>(args)...);
-    return results.front();
-  }
-
-  /// Overload to create or fold a zero result operation.
-  template <typename OpTy, typename... Args>
-  typename std::enable_if<OpTy::template hasTrait<OpTrait::ZeroResult>(),
-                          OpTy>::type
-  createOrFold(Location location, Args &&... args) {
-    auto op = create<OpTy>(location, std::forward<Args>(args)...);
-    SmallVector<Value *, 0> unused;
-    tryFold(op.getOperation(), unused);
-
-    // Folding cannot remove a zero-result operation, so for convenience we
-    // continue to return it.
-    return op;
-  }
-
-  /// Attempts to fold the given operation and places new results within
-  /// 'results'. Returns success if the operation was folded, failure otherwise.
-  /// Note: This function does not erase the operation on a successful fold.
-  LogicalResult tryFold(Operation *op, SmallVectorImpl<Value *> &results);
-
-  /// Creates a deep copy of the specified operation, remapping any operands
-  /// that use values outside of the operation using the map that is provided
-  /// ( leaving them alone if no entry is present).  Replaces references to
-  /// cloned sub-operations to the corresponding operation that is copied,
-  /// and adds those mappings to the map.
-  Operation *clone(Operation &op, BlockAndValueMapping &mapper) {
-    return insert(op.clone(mapper));
-  }
-  Operation *clone(Operation &op) { return insert(op.clone()); }
-
-  /// Creates a deep copy of this operation but keep the operation regions
-  /// empty. Operands are remapped using `mapper` (if present), and `mapper` is
-  /// updated to contain the results.
-  Operation *cloneWithoutRegions(Operation &op, BlockAndValueMapping &mapper) {
-    return insert(op.cloneWithoutRegions(mapper));
-  }
-  Operation *cloneWithoutRegions(Operation &op) {
-    return insert(op.cloneWithoutRegions());
-  }
-  template <typename OpT> OpT cloneWithoutRegions(OpT op) {
-    return cast<OpT>(cloneWithoutRegions(*op.getOperation()));
-  }
-
-private:
-  Block *block = nullptr;
-  Block::iterator insertPoint;
-};
-
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/CMakeLists.txt b/third_party/mlir/include/mlir/IR/CMakeLists.txt
deleted file mode 100644
index 555b16fd29d..00000000000
--- a/third_party/mlir/include/mlir/IR/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS OpAsmInterface.td)
-mlir_tablegen(OpAsmInterface.h.inc -gen-op-interface-decls)
-mlir_tablegen(OpAsmInterface.cpp.inc -gen-op-interface-defs)
-add_public_tablegen_target(MLIROpAsmInterfacesIncGen)
diff --git a/third_party/mlir/include/mlir/IR/Diagnostics.h b/third_party/mlir/include/mlir/IR/Diagnostics.h
deleted file mode 100644
index 4baea744a62..00000000000
--- a/third_party/mlir/include/mlir/IR/Diagnostics.h
+++ /dev/null
@@ -1,658 +0,0 @@
-//===- Diagnostics.h - MLIR Diagnostics -------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines utilities for emitting diagnostics.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_DIAGNOSTICS_H
-#define MLIR_IR_DIAGNOSTICS_H
-
-#include "mlir/IR/Location.h"
-#include "mlir/Support/STLExtras.h"
-#include <functional>
-
-namespace llvm {
-class MemoryBuffer;
-class SMLoc;
-class SourceMgr;
-} // end namespace llvm
-
-namespace mlir {
-class DiagnosticEngine;
-class Identifier;
-struct LogicalResult;
-class MLIRContext;
-class Operation;
-class OperationName;
-class Type;
-
-namespace detail {
-struct DiagnosticEngineImpl;
-} // end namespace detail
-
-/// Defines the different supported severity of a diagnostic.
-enum class DiagnosticSeverity {
-  Note,
-  Warning,
-  Error,
-  Remark,
-};
-
-//===----------------------------------------------------------------------===//
-// DiagnosticArgument
-//===----------------------------------------------------------------------===//
-
-/// A variant type that holds a single argument for a diagnostic.
-class DiagnosticArgument {
-public:
-  /// Enum that represents the different kinds of diagnostic arguments
-  /// supported.
-  enum class DiagnosticArgumentKind {
-    Attribute,
-    Double,
-    Integer,
-    Operation,
-    String,
-    Type,
-    Unsigned,
-  };
-
-  /// Outputs this argument to a stream.
-  void print(raw_ostream &os) const;
-
-  /// Returns the kind of this argument.
-  DiagnosticArgumentKind getKind() const { return kind; }
-
-  /// Returns this argument as an Attribute.
-  Attribute getAsAttribute() const;
-
-  /// Returns this argument as a double.
-  double getAsDouble() const {
-    assert(getKind() == DiagnosticArgumentKind::Double);
-    return doubleVal;
-  }
-
-  /// Returns this argument as a signed integer.
-  int64_t getAsInteger() const {
-    assert(getKind() == DiagnosticArgumentKind::Integer);
-    return static_cast<int64_t>(opaqueVal);
-  }
-
-  /// Returns this argument as an operation.
-  Operation &getAsOperation() const {
-    assert(getKind() == DiagnosticArgumentKind::Operation);
-    return *reinterpret_cast<Operation *>(opaqueVal);
-  }
-
-  /// Returns this argument as a string.
-  StringRef getAsString() const {
-    assert(getKind() == DiagnosticArgumentKind::String);
-    return stringVal;
-  }
-
-  /// Returns this argument as a Type.
-  Type getAsType() const;
-
-  /// Returns this argument as an unsigned integer.
-  uint64_t getAsUnsigned() const {
-    assert(getKind() == DiagnosticArgumentKind::Unsigned);
-    return static_cast<uint64_t>(opaqueVal);
-  }
-
-private:
-  friend class Diagnostic;
-
-  // Construct from an Attribute.
-  explicit DiagnosticArgument(Attribute attr);
-
-  // Construct from a floating point number.
-  explicit DiagnosticArgument(double val)
-      : kind(DiagnosticArgumentKind::Double), doubleVal(val) {}
-  explicit DiagnosticArgument(float val) : DiagnosticArgument(double(val)) {}
-
-  // Construct from a signed integer.
-  template <typename T>
-  explicit DiagnosticArgument(
-      T val, typename std::enable_if<std::is_signed<T>::value &&
-                                     std::numeric_limits<T>::is_integer &&
-                                     sizeof(T) <= sizeof(int64_t)>::type * = 0)
-      : kind(DiagnosticArgumentKind::Integer), opaqueVal(int64_t(val)) {}
-
-  // Construct from an unsigned integer.
-  template <typename T>
-  explicit DiagnosticArgument(
-      T val, typename std::enable_if<std::is_unsigned<T>::value &&
-                                     std::numeric_limits<T>::is_integer &&
-                                     sizeof(T) <= sizeof(uint64_t)>::type * = 0)
-      : kind(DiagnosticArgumentKind::Unsigned), opaqueVal(uint64_t(val)) {}
-
-  // Construct from an operation reference.
-  explicit DiagnosticArgument(Operation &val) : DiagnosticArgument(&val) {}
-  explicit DiagnosticArgument(Operation *val)
-      : kind(DiagnosticArgumentKind::Operation),
-        opaqueVal(reinterpret_cast<intptr_t>(val)) {
-    assert(val && "expected valid operation");
-  }
-
-  // Construct from a string reference.
-  explicit DiagnosticArgument(StringRef val)
-      : kind(DiagnosticArgumentKind::String), stringVal(val) {}
-
-  // Construct from a Type.
-  explicit DiagnosticArgument(Type val);
-
-  /// The kind of this argument.
-  DiagnosticArgumentKind kind;
-
-  /// The value of this argument.
-  union {
-    double doubleVal;
-    intptr_t opaqueVal;
-    StringRef stringVal;
-  };
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, const DiagnosticArgument &arg) {
-  arg.print(os);
-  return os;
-}
-
-//===----------------------------------------------------------------------===//
-// Diagnostic
-//===----------------------------------------------------------------------===//
-
-/// This class contains all of the information necessary to report a diagnostic
-/// to the DiagnosticEngine. It should generally not be constructed directly,
-/// and instead used transitively via InFlightDiagnostic.
-class Diagnostic {
-  using NoteVector = std::vector<std::unique_ptr<Diagnostic>>;
-
-  /// This class implements a wrapper iterator around NoteVector::iterator to
-  /// implicitly dereference the unique_ptr.
-  template <typename IteratorTy, typename NotePtrTy = decltype(*IteratorTy()),
-            typename ResultTy = decltype(**IteratorTy())>
-  class NoteIteratorImpl
-      : public llvm::mapped_iterator<IteratorTy, ResultTy (*)(NotePtrTy)> {
-    static ResultTy &unwrap(NotePtrTy note) { return *note; }
-
-  public:
-    NoteIteratorImpl(IteratorTy it)
-        : llvm::mapped_iterator<IteratorTy, ResultTy (*)(NotePtrTy)>(it,
-                                                                     &unwrap) {}
-  };
-
-public:
-  Diagnostic(Location loc, DiagnosticSeverity severity)
-      : loc(loc), severity(severity) {}
-  Diagnostic(Diagnostic &&) = default;
-  Diagnostic &operator=(Diagnostic &&) = default;
-
-  /// Returns the severity of this diagnostic.
-  DiagnosticSeverity getSeverity() const { return severity; }
-
-  /// Returns the source location for this diagnostic.
-  Location getLocation() const { return loc; }
-
-  /// Returns the current list of diagnostic arguments.
-  MutableArrayRef<DiagnosticArgument> getArguments() { return arguments; }
-  ArrayRef<DiagnosticArgument> getArguments() const { return arguments; }
-
-  /// Stream operator for inserting new diagnostic arguments.
-  template <typename Arg>
-  typename std::enable_if<!std::is_convertible<Arg, StringRef>::value,
-                          Diagnostic &>::type
-  operator<<(Arg &&val) {
-    arguments.push_back(DiagnosticArgument(std::forward<Arg>(val)));
-    return *this;
-  }
-
-  /// Stream in a string literal.
-  Diagnostic &operator<<(const char *val) {
-    arguments.push_back(DiagnosticArgument(val));
-    return *this;
-  }
-
-  /// Stream in a Twine argument.
-  Diagnostic &operator<<(char val);
-  Diagnostic &operator<<(const Twine &val);
-  Diagnostic &operator<<(Twine &&val);
-
-  /// Stream in an Identifier.
-  Diagnostic &operator<<(Identifier val);
-
-  /// Stream in an OperationName.
-  Diagnostic &operator<<(OperationName val);
-
-  /// Stream in a range.
-  template <typename T> Diagnostic &operator<<(llvm::iterator_range<T> range) {
-    return appendRange(range);
-  }
-  template <typename T> Diagnostic &operator<<(llvm::ArrayRef<T> range) {
-    return appendRange(range);
-  }
-
-  /// Append a range to the diagnostic. The default delimiter between elements
-  /// is ','.
-  template <typename T, template <typename> class Container>
-  Diagnostic &appendRange(const Container<T> &c, const char *delim = ", ") {
-    interleave(
-        c, [&](const detail::ValueOfRange<Container<T>> &a) { *this << a; },
-        [&]() { *this << delim; });
-    return *this;
-  }
-
-  /// Append arguments to the diagnostic.
-  template <typename Arg1, typename Arg2, typename... Args>
-  Diagnostic &append(Arg1 &&arg1, Arg2 &&arg2, Args &&... args) {
-    append(std::forward<Arg1>(arg1));
-    return append(std::forward<Arg2>(arg2), std::forward<Args>(args)...);
-  }
-  /// Append one argument to the diagnostic.
-  template <typename Arg> Diagnostic &append(Arg &&arg) {
-    *this << std::forward<Arg>(arg);
-    return *this;
-  }
-
-  /// Outputs this diagnostic to a stream.
-  void print(raw_ostream &os) const;
-
-  /// Converts the diagnostic to a string.
-  std::string str() const;
-
-  /// Attaches a note to this diagnostic. A new location may be optionally
-  /// provided, if not, then the location defaults to the one specified for this
-  /// diagnostic. Notes may not be attached to other notes.
-  Diagnostic &attachNote(llvm::Optional<Location> noteLoc = llvm::None);
-
-  using note_iterator = NoteIteratorImpl<NoteVector::iterator>;
-  using const_note_iterator = NoteIteratorImpl<NoteVector::const_iterator>;
-
-  /// Returns the notes held by this diagnostic.
-  llvm::iterator_range<note_iterator> getNotes() {
-    return {notes.begin(), notes.end()};
-  }
-  llvm::iterator_range<const_note_iterator> getNotes() const {
-    return {notes.begin(), notes.end()};
-  }
-
-  /// Allow a diagnostic to be converted to 'failure'.
-  operator LogicalResult() const;
-
-private:
-  Diagnostic(const Diagnostic &rhs) = delete;
-  Diagnostic &operator=(const Diagnostic &rhs) = delete;
-
-  /// The source location.
-  Location loc;
-
-  /// The severity of this diagnostic.
-  DiagnosticSeverity severity;
-
-  /// The current list of arguments.
-  SmallVector<DiagnosticArgument, 4> arguments;
-
-  /// A list of string values used as arguments. This is used to guarantee the
-  /// liveness of non-constant strings used in diagnostics.
-  std::vector<std::unique_ptr<char[]>> strings;
-
-  /// A list of attached notes.
-  NoteVector notes;
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, const Diagnostic &diag) {
-  diag.print(os);
-  return os;
-}
-
-//===----------------------------------------------------------------------===//
-// InFlightDiagnostic
-//===----------------------------------------------------------------------===//
-
-/// This class represents a diagnostic that is inflight and set to be reported.
-/// This allows for last minute modifications of the diagnostic before it is
-/// emitted by a DiagnosticEngine.
-class InFlightDiagnostic {
-public:
-  InFlightDiagnostic() = default;
-  InFlightDiagnostic(InFlightDiagnostic &&rhs)
-      : owner(rhs.owner), impl(std::move(rhs.impl)) {
-    // Reset the rhs diagnostic.
-    rhs.impl.reset();
-    rhs.abandon();
-  }
-  ~InFlightDiagnostic() {
-    if (isInFlight())
-      report();
-  }
-
-  /// Stream operator for new diagnostic arguments.
-  template <typename Arg> InFlightDiagnostic &operator<<(Arg &&arg) & {
-    return append(std::forward<Arg>(arg));
-  }
-  template <typename Arg> InFlightDiagnostic &&operator<<(Arg &&arg) && {
-    return std::move(append(std::forward<Arg>(arg)));
-  }
-
-  /// Append arguments to the diagnostic.
-  template <typename... Args> InFlightDiagnostic &append(Args &&... args) & {
-    assert(isActive() && "diagnostic not active");
-    if (isInFlight())
-      impl->append(std::forward<Args>(args)...);
-    return *this;
-  }
-  template <typename... Args> InFlightDiagnostic &&append(Args &&... args) && {
-    return std::move(append(std::forward<Args>(args)...));
-  }
-
-  /// Attaches a note to this diagnostic.
-  Diagnostic &attachNote(llvm::Optional<Location> noteLoc = llvm::None) {
-    assert(isActive() && "diagnostic not active");
-    return impl->attachNote(noteLoc);
-  }
-
-  /// Reports the diagnostic to the engine.
-  void report();
-
-  /// Abandons this diagnostic so that it will no longer be reported.
-  void abandon();
-
-  /// Allow an inflight diagnostic to be converted to 'failure', otherwise
-  /// 'success' if this is an empty diagnostic.
-  operator LogicalResult() const;
-
-private:
-  InFlightDiagnostic &operator=(const InFlightDiagnostic &) = delete;
-  InFlightDiagnostic &operator=(InFlightDiagnostic &&) = delete;
-  InFlightDiagnostic(DiagnosticEngine *owner, Diagnostic &&rhs)
-      : owner(owner), impl(std::move(rhs)) {}
-
-  /// Returns if the diagnostic is still active, i.e. it has a live diagnostic.
-  bool isActive() const { return impl.hasValue(); }
-
-  /// Returns if the diagnostic is still in flight to be reported.
-  bool isInFlight() const { return owner; }
-
-  // Allow access to the constructor.
-  friend DiagnosticEngine;
-
-  /// The engine that this diagnostic is to report to.
-  DiagnosticEngine *owner = nullptr;
-
-  /// The raw diagnostic that is inflight to be reported.
-  llvm::Optional<Diagnostic> impl;
-};
-
-//===----------------------------------------------------------------------===//
-// DiagnosticEngine
-//===----------------------------------------------------------------------===//
-
-/// This class is the main interface for diagnostics. The DiagnosticEngine
-/// manages the registration of diagnostic handlers as well as the core API for
-/// diagnostic emission. This class should not be constructed directly, but
-/// instead interfaced with via an MLIRContext instance.
-class DiagnosticEngine {
-public:
-  ~DiagnosticEngine();
-
-  // Diagnostic handler registration and use. MLIR supports the ability for the
-  // IR to carry arbitrary metadata about operation location information. If a
-  // problem is detected by the compiler, it can invoke the emitError /
-  // emitWarning / emitRemark method on an Operation and have it get reported
-  // through this interface.
-  //
-  // Tools using MLIR are encouraged to register error handlers and define a
-  // schema for their location information.  If they don't, then warnings and
-  // notes will be dropped and errors will be emitted to errs.
-
-  /// The handler type for MLIR diagnostics. This function takes a diagnostic as
-  /// input, and returns success if the handler has fully processed this
-  /// diagnostic. Returns failure otherwise.
-  using HandlerTy = std::function<LogicalResult(Diagnostic &)>;
-
-  /// A handle to a specific registered handler object.
-  using HandlerID = uint64_t;
-
-  /// Register a new handler for diagnostics to the engine. Diagnostics are
-  /// process by handlers in stack-like order, meaning that the last added
-  /// handlers will process diagnostics first. This function returns a unique
-  /// identifier for the registered handler, which can be used to unregister
-  /// this handler at a later time.
-  HandlerID registerHandler(const HandlerTy &handler);
-
-  /// Set the diagnostic handler with a function that returns void. This is a
-  /// convenient wrapper for handlers that always completely process the given
-  /// diagnostic.
-  template <typename FuncTy, typename RetT = decltype(std::declval<FuncTy>()(
-                                 std::declval<Diagnostic &>()))>
-  std::enable_if_t<std::is_same<RetT, void>::value, HandlerID>
-  registerHandler(FuncTy &&handler) {
-    return registerHandler([=](Diagnostic &diag) {
-      handler(diag);
-      return success();
-    });
-  }
-
-  /// Erase the registered diagnostic handler with the given identifier.
-  void eraseHandler(HandlerID id);
-
-  /// Create a new inflight diagnostic with the given location and severity.
-  InFlightDiagnostic emit(Location loc, DiagnosticSeverity severity) {
-    assert(severity != DiagnosticSeverity::Note &&
-           "notes should not be emitted directly");
-    return InFlightDiagnostic(this, Diagnostic(loc, severity));
-  }
-
-  /// Emit a diagnostic using the registered issue handler if present, or with
-  /// the default behavior if not.
-  void emit(Diagnostic diag);
-
-private:
-  friend class MLIRContextImpl;
-  DiagnosticEngine();
-
-  /// The internal implementation of the DiagnosticEngine.
-  std::unique_ptr<detail::DiagnosticEngineImpl> impl;
-};
-
-/// Utility method to emit an error message using this location.
-InFlightDiagnostic emitError(Location loc);
-InFlightDiagnostic emitError(Location loc, const Twine &message);
-
-/// Utility method to emit a warning message using this location.
-InFlightDiagnostic emitWarning(Location loc);
-InFlightDiagnostic emitWarning(Location loc, const Twine &message);
-
-/// Utility method to emit a remark message using this location.
-InFlightDiagnostic emitRemark(Location loc);
-InFlightDiagnostic emitRemark(Location loc, const Twine &message);
-
-/// Overloads of the above emission functions that take an optionally null
-/// location. If the location is null, no diagnostic is emitted and a failure is
-/// returned. Given that the provided location may be null, these methods take
-/// the diagnostic arguments directly instead of relying on the returned
-/// InFlightDiagnostic.
-template <typename... Args>
-LogicalResult emitOptionalError(Optional<Location> loc, Args &&... args) {
-  if (loc)
-    return emitError(*loc).append(std::forward<Args>(args)...);
-  return failure();
-}
-template <typename... Args>
-LogicalResult emitOptionalWarning(Optional<Location> loc, Args &&... args) {
-  if (loc)
-    return emitWarning(*loc).append(std::forward<Args>(args)...);
-  return failure();
-}
-template <typename... Args>
-LogicalResult emitOptionalRemark(Optional<Location> loc, Args &&... args) {
-  if (loc)
-    return emitRemark(*loc).append(std::forward<Args>(args)...);
-  return failure();
-}
-
-//===----------------------------------------------------------------------===//
-// ScopedDiagnosticHandler
-//===----------------------------------------------------------------------===//
-
-/// This diagnostic handler is a simple RAII class that registers and erases a
-/// diagnostic handler on a given context. This class can be either be used
-/// directly, or in conjunction with a derived diagnostic handler.
-class ScopedDiagnosticHandler {
-public:
-  explicit ScopedDiagnosticHandler(MLIRContext *ctx) : handlerID(0), ctx(ctx) {}
-  template <typename FuncTy>
-  ScopedDiagnosticHandler(MLIRContext *ctx, FuncTy &&handler)
-      : handlerID(0), ctx(ctx) {
-    setHandler(std::forward<FuncTy>(handler));
-  }
-  ~ScopedDiagnosticHandler();
-
-protected:
-  /// Set the handler to manage via RAII.
-  template <typename FuncTy> void setHandler(FuncTy &&handler) {
-    auto &diagEngine = ctx->getDiagEngine();
-    if (handlerID)
-      diagEngine.eraseHandler(handlerID);
-    handlerID = diagEngine.registerHandler(std::forward<FuncTy>(handler));
-  }
-
-private:
-  /// The unique id for the scoped handler.
-  DiagnosticEngine::HandlerID handlerID;
-
-  /// The context to erase the handler from.
-  MLIRContext *ctx;
-};
-
-//===----------------------------------------------------------------------===//
-// SourceMgrDiagnosticHandler
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-struct SourceMgrDiagnosticHandlerImpl;
-} // end namespace detail
-
-/// This class is a utility diagnostic handler for use with llvm::SourceMgr.
-class SourceMgrDiagnosticHandler : public ScopedDiagnosticHandler {
-public:
-  SourceMgrDiagnosticHandler(llvm::SourceMgr &mgr, MLIRContext *ctx,
-                             llvm::raw_ostream &os);
-  SourceMgrDiagnosticHandler(llvm::SourceMgr &mgr, MLIRContext *ctx);
-  ~SourceMgrDiagnosticHandler();
-
-  /// Emit the given diagnostic information with the held source manager.
-  void emitDiagnostic(Location loc, Twine message, DiagnosticSeverity kind);
-
-protected:
-  /// Emit the given diagnostic with the held source manager.
-  void emitDiagnostic(Diagnostic &diag);
-
-  /// Get a memory buffer for the given file, or nullptr if no file is
-  /// available.
-  const llvm::MemoryBuffer *getBufferForFile(StringRef filename);
-
-  /// The source manager that we are wrapping.
-  llvm::SourceMgr &mgr;
-
-  /// The output stream to use when printing diagnostics.
-  llvm::raw_ostream &os;
-
-private:
-  /// Convert a location into the given memory buffer into an SMLoc.
-  llvm::SMLoc convertLocToSMLoc(FileLineColLoc loc);
-
-  /// The maximum depth that a call stack will be printed.
-  /// TODO(riverriddle) This should be a tunable flag.
-  unsigned callStackLimit = 10;
-
-  std::unique_ptr<detail::SourceMgrDiagnosticHandlerImpl> impl;
-};
-
-//===----------------------------------------------------------------------===//
-// SourceMgrDiagnosticVerifierHandler
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-struct SourceMgrDiagnosticVerifierHandlerImpl;
-} // end namespace detail
-
-/// This class is a utility diagnostic handler for use with llvm::SourceMgr that
-/// verifies that emitted diagnostics match 'expected-*' lines on the
-/// corresponding line of the source file.
-class SourceMgrDiagnosticVerifierHandler : public SourceMgrDiagnosticHandler {
-public:
-  SourceMgrDiagnosticVerifierHandler(llvm::SourceMgr &srcMgr, MLIRContext *ctx,
-                                     llvm::raw_ostream &out);
-  SourceMgrDiagnosticVerifierHandler(llvm::SourceMgr &srcMgr, MLIRContext *ctx);
-  ~SourceMgrDiagnosticVerifierHandler();
-
-  /// Returns the status of the handler and verifies that all expected
-  /// diagnostics were emitted. This return success if all diagnostics were
-  /// verified correctly, failure otherwise.
-  LogicalResult verify();
-
-private:
-  /// Process a single diagnostic.
-  void process(Diagnostic &diag);
-
-  /// Process a FileLineColLoc diagnostic.
-  void process(FileLineColLoc loc, StringRef msg, DiagnosticSeverity kind);
-
-  std::unique_ptr<detail::SourceMgrDiagnosticVerifierHandlerImpl> impl;
-};
-
-//===----------------------------------------------------------------------===//
-// ParallelDiagnosticHandler
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-struct ParallelDiagnosticHandlerImpl;
-} // end namespace detail
-
-/// This class is a utility diagnostic handler for use when multi-threading some
-/// part of the compiler where diagnostics may be emitted. This handler ensures
-/// a deterministic ordering to the emitted diagnostics that mirrors that of a
-/// single-threaded compilation.
-class ParallelDiagnosticHandler {
-public:
-  ParallelDiagnosticHandler(MLIRContext *ctx);
-  ~ParallelDiagnosticHandler();
-
-  /// Set the order id for the current thread. This is required to be set by
-  /// each thread that will be emitting diagnostics to this handler. The orderID
-  /// corresponds to the order in which diagnostics would be emitted when
-  /// executing synchronously. For example, if we were processing a list
-  /// of operations [a, b, c] on a single-thread. Diagnostics emitted while
-  /// processing operation 'a' would be emitted before those for 'b' or 'c'.
-  /// This corresponds 1-1 with the 'orderID'. The thread that is processing 'a'
-  /// should set the orderID to '0'; the thread processing 'b' should set it to
-  /// '1'; and so on and so forth. This provides a way for the handler to
-  /// deterministically order the diagnostics that it receives given the thread
-  /// that it is receiving on.
-  void setOrderIDForThread(size_t orderID);
-
-  /// Remove the order id for the current thread. This removes the thread from
-  /// diagnostics tracking.
-  void eraseOrderIDForThread();
-
-private:
-  std::unique_ptr<detail::ParallelDiagnosticHandlerImpl> impl;
-};
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/Dialect.h b/third_party/mlir/include/mlir/IR/Dialect.h
deleted file mode 100644
index a1855e797e8..00000000000
--- a/third_party/mlir/include/mlir/IR/Dialect.h
+++ /dev/null
@@ -1,324 +0,0 @@
-//===- Dialect.h - IR Dialect Description -----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the 'dialect' abstraction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_DIALECT_H
-#define MLIR_IR_DIALECT_H
-
-#include "mlir/IR/OperationSupport.h"
-
-namespace mlir {
-class DialectAsmParser;
-class DialectAsmPrinter;
-class DialectInterface;
-class OpBuilder;
-class Type;
-
-using DialectConstantDecodeHook =
-    std::function<bool(const OpaqueElementsAttr, ElementsAttr &)>;
-using DialectConstantFoldHook = std::function<LogicalResult(
-    Operation *, ArrayRef<Attribute>, SmallVectorImpl<Attribute> &)>;
-using DialectExtractElementHook =
-    std::function<Attribute(const OpaqueElementsAttr, ArrayRef<uint64_t>)>;
-
-/// Dialects are groups of MLIR operations and behavior associated with the
-/// entire group.  For example, hooks into other systems for constant folding,
-/// default named types for asm printing, etc.
-///
-/// Instances of the dialect object are global across all MLIRContext's that may
-/// be active in the process.
-///
-class Dialect {
-public:
-  virtual ~Dialect();
-
-  /// Utility function that returns if the given string is a valid dialect
-  /// namespace.
-  static bool isValidNamespace(StringRef str);
-
-  MLIRContext *getContext() const { return context; }
-
-  StringRef getNamespace() const { return name; }
-
-  /// Returns true if this dialect allows for unregistered operations, i.e.
-  /// operations prefixed with the dialect namespace but not registered with
-  /// addOperation.
-  bool allowsUnknownOperations() const { return unknownOpsAllowed; }
-
-  /// Return true if this dialect allows for unregistered types, i.e., types
-  /// prefixed with the dialect namespace but not registered with addType.
-  /// These are represented with OpaqueType.
-  bool allowsUnknownTypes() const { return unknownTypesAllowed; }
-
-  //===--------------------------------------------------------------------===//
-  // Constant Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Registered fallback constant fold hook for the dialect. Like the constant
-  /// fold hook of each operation, it attempts to constant fold the operation
-  /// with the specified constant operand values - the elements in "operands"
-  /// will correspond directly to the operands of the operation, but may be null
-  /// if non-constant.  If constant folding is successful, this fills in the
-  /// `results` vector.  If not, this returns failure and `results` is
-  /// unspecified.
-  DialectConstantFoldHook constantFoldHook =
-      [](Operation *op, ArrayRef<Attribute> operands,
-         SmallVectorImpl<Attribute> &results) { return failure(); };
-
-  /// Registered hook to decode opaque constants associated with this
-  /// dialect. The hook function attempts to decode an opaque constant tensor
-  /// into a tensor with non-opaque content. If decoding is successful, this
-  /// method returns false and sets 'output' attribute. If not, it returns true
-  /// and leaves 'output' unspecified. The default hook fails to decode.
-  DialectConstantDecodeHook decodeHook =
-      [](const OpaqueElementsAttr input, ElementsAttr &output) { return true; };
-
-  /// Registered hook to extract an element from an opaque constant associated
-  /// with this dialect. If element has been successfully extracted, this
-  /// method returns that element. If not, it returns an empty attribute.
-  /// The default hook fails to extract an element.
-  DialectExtractElementHook extractElementHook =
-      [](const OpaqueElementsAttr input, ArrayRef<uint64_t> index) {
-        return Attribute();
-      };
-
-  /// Registered hook to materialize a single constant operation from a given
-  /// attribute value with the desired resultant type. This method should use
-  /// the provided builder to create the operation without changing the
-  /// insertion position. The generated operation is expected to be constant
-  /// like, i.e. single result, zero operands, non side-effecting, etc. On
-  /// success, this hook should return the value generated to represent the
-  /// constant value. Otherwise, it should return null on failure.
-  virtual Operation *materializeConstant(OpBuilder &builder, Attribute value,
-                                         Type type, Location loc) {
-    return nullptr;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Parsing Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an attribute registered to this dialect. If 'type' is nonnull, it
-  /// refers to the expected type of the attribute.
-  virtual Attribute parseAttribute(DialectAsmParser &parser, Type type) const;
-
-  /// Print an attribute registered to this dialect. Note: The type of the
-  /// attribute need not be printed by this method as it is always printed by
-  /// the caller.
-  virtual void printAttribute(Attribute, DialectAsmPrinter &) const {
-    llvm_unreachable("dialect has no registered attribute printing hook");
-  }
-
-  /// Parse a type registered to this dialect.
-  virtual Type parseType(DialectAsmParser &parser) const;
-
-  /// Print a type registered to this dialect.
-  virtual void printType(Type, DialectAsmPrinter &) const {
-    llvm_unreachable("dialect has no registered type printing hook");
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Verification Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Verify an attribute from this dialect on the argument at 'argIndex' for
-  /// the region at 'regionIndex' on the given operation. Returns failure if
-  /// the verification failed, success otherwise. This hook may optionally be
-  /// invoked from any operation containing a region.
-  virtual LogicalResult verifyRegionArgAttribute(Operation *,
-                                                 unsigned regionIndex,
-                                                 unsigned argIndex,
-                                                 NamedAttribute);
-
-  /// Verify an attribute from this dialect on the result at 'resultIndex' for
-  /// the region at 'regionIndex' on the given operation. Returns failure if
-  /// the verification failed, success otherwise. This hook may optionally be
-  /// invoked from any operation containing a region.
-  virtual LogicalResult verifyRegionResultAttribute(Operation *,
-                                                    unsigned regionIndex,
-                                                    unsigned resultIndex,
-                                                    NamedAttribute);
-
-  /// Verify an attribute from this dialect on the given operation. Returns
-  /// failure if the verification failed, success otherwise.
-  virtual LogicalResult verifyOperationAttribute(Operation *, NamedAttribute) {
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Interfaces
-  //===--------------------------------------------------------------------===//
-
-  /// Lookup an interface for the given ID if one is registered, otherwise
-  /// nullptr.
-  const DialectInterface *getRegisteredInterface(ClassID *interfaceID) {
-    auto it = registeredInterfaces.find(interfaceID);
-    return it != registeredInterfaces.end() ? it->getSecond().get() : nullptr;
-  }
-  template <typename InterfaceT> const InterfaceT *getRegisteredInterface() {
-    return static_cast<const InterfaceT *>(
-        getRegisteredInterface(InterfaceT::getInterfaceID()));
-  }
-
-protected:
-  /// The constructor takes a unique namespace for this dialect as well as the
-  /// context to bind to.
-  /// Note: The namespace must not contain '.' characters.
-  /// Note: All operations belonging to this dialect must have names starting
-  ///       with the namespace followed by '.'.
-  /// Example:
-  ///       - "tf" for the TensorFlow ops like "tf.add".
-  Dialect(StringRef name, MLIRContext *context);
-
-  /// This method is used by derived classes to add their operations to the set.
-  ///
-  template <typename... Args> void addOperations() {
-    VariadicOperationAdder<Args...>::addToSet(*this);
-  }
-
-  // It would be nice to define this as variadic functions instead of a nested
-  // variadic type, but we can't do that: function template partial
-  // specialization is not allowed, and we can't define an overload set because
-  // we don't have any arguments of the types we are pushing around.
-  template <typename First, typename... Rest> class VariadicOperationAdder {
-  public:
-    static void addToSet(Dialect &dialect) {
-      dialect.addOperation(AbstractOperation::get<First>(dialect));
-      VariadicOperationAdder<Rest...>::addToSet(dialect);
-    }
-  };
-
-  template <typename First> class VariadicOperationAdder<First> {
-  public:
-    static void addToSet(Dialect &dialect) {
-      dialect.addOperation(AbstractOperation::get<First>(dialect));
-    }
-  };
-
-  void addOperation(AbstractOperation opInfo);
-
-  /// This method is used by derived classes to add their types to the set.
-  template <typename... Args> void addTypes() {
-    VariadicSymbolAdder<Args...>::addToSet(*this);
-  }
-
-  /// This method is used by derived classes to add their attributes to the set.
-  template <typename... Args> void addAttributes() {
-    VariadicSymbolAdder<Args...>::addToSet(*this);
-  }
-
-  // It would be nice to define this as variadic functions instead of a nested
-  // variadic type, but we can't do that: function template partial
-  // specialization is not allowed, and we can't define an overload set
-  // because we don't have any arguments of the types we are pushing around.
-  template <typename First, typename... Rest> struct VariadicSymbolAdder {
-    static void addToSet(Dialect &dialect) {
-      VariadicSymbolAdder<First>::addToSet(dialect);
-      VariadicSymbolAdder<Rest...>::addToSet(dialect);
-    }
-  };
-
-  template <typename First> struct VariadicSymbolAdder<First> {
-    static void addToSet(Dialect &dialect) {
-      dialect.addSymbol(First::getClassID());
-    }
-  };
-
-  /// Enable support for unregistered operations.
-  void allowUnknownOperations(bool allow = true) { unknownOpsAllowed = allow; }
-
-  /// Enable support for unregistered types.
-  void allowUnknownTypes(bool allow = true) { unknownTypesAllowed = allow; }
-
-  /// Register a dialect interface with this dialect instance.
-  void addInterface(std::unique_ptr<DialectInterface> interface);
-
-  /// Register a set of dialect interfaces with this dialect instance.
-  template <typename T, typename T2, typename... Tys> void addInterfaces() {
-    addInterfaces<T>();
-    addInterfaces<T2, Tys...>();
-  }
-  template <typename T> void addInterfaces() {
-    addInterface(std::make_unique<T>(this));
-  }
-
-private:
-  // Register a symbol(e.g. type) with its given unique class identifier.
-  void addSymbol(const ClassID *const classID);
-
-  Dialect(const Dialect &) = delete;
-  void operator=(Dialect &) = delete;
-
-  /// Register this dialect object with the specified context.  The context
-  /// takes ownership of the heap allocated dialect.
-  void registerDialect(MLIRContext *context);
-
-  /// The namespace of this dialect.
-  StringRef name;
-
-  /// This is the context that owns this Dialect object.
-  MLIRContext *context;
-
-  /// Flag that specifies whether this dialect supports unregistered operations,
-  /// i.e. operations prefixed with the dialect namespace but not registered
-  /// with addOperation.
-  bool unknownOpsAllowed = false;
-
-  /// Flag that specifies whether this dialect allows unregistered types, i.e.
-  /// types prefixed with the dialect namespace but not registered with addType.
-  /// These types are represented with OpaqueType.
-  bool unknownTypesAllowed = false;
-
-  /// A collection of registered dialect interfaces.
-  DenseMap<ClassID *, std::unique_ptr<DialectInterface>> registeredInterfaces;
-};
-
-using DialectAllocatorFunction = std::function<void(MLIRContext *)>;
-
-/// Registers a specific dialect creation function with the system, typically
-/// used through the DialectRegistration template.
-void registerDialectAllocator(const DialectAllocatorFunction &function);
-
-/// Registers all dialects with the specified MLIRContext.
-void registerAllDialects(MLIRContext *context);
-
-/// Utility to register a dialect. Client can register their dialect with the
-/// global registry by calling registerDialect<MyDialect>();
-template <typename ConcreteDialect> void registerDialect() {
-  registerDialectAllocator([](MLIRContext *ctx) {
-    // Just allocate the dialect, the context takes ownership of it.
-    new ConcreteDialect(ctx);
-  });
-}
-
-/// DialectRegistration provides a global initializer that registers a Dialect
-/// allocation routine.
-///
-/// Usage:
-///
-///   // At namespace scope.
-///   static DialectRegistration<MyDialect> Unused;
-template <typename ConcreteDialect> struct DialectRegistration {
-  DialectRegistration() { registerDialect<ConcreteDialect>(); }
-};
-
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/DialectHooks.h b/third_party/mlir/include/mlir/IR/DialectHooks.h
deleted file mode 100644
index c51fafb6180..00000000000
--- a/third_party/mlir/include/mlir/IR/DialectHooks.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===- DialectHooks.h - MLIR DialectHooks mechanism -------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines abstraction and registration mechanism for dialect hooks.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_DIALECT_HOOKS_H
-#define MLIR_IR_DIALECT_HOOKS_H
-
-#include "mlir/IR/Dialect.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-using DialectHooksSetter = std::function<void(MLIRContext *)>;
-
-/// Dialect hooks allow external components to register their functions to
-/// be called for specific tasks specialized per dialect, such as decoding
-/// of opaque constants. To register concrete dialect hooks, one should
-/// define a DialectHooks subclass and use it as a template
-/// argument to DialectHooksRegistration. For example,
-///     class MyHooks : public DialectHooks {...};
-///     static DialectHooksRegistration<MyHooks, MyDialect> hooksReg;
-/// The subclass should override DialectHook methods for supported hooks.
-class DialectHooks {
-public:
-  // Returns hook to constant fold an operation.
-  DialectConstantFoldHook getConstantFoldHook() { return nullptr; }
-  // Returns hook to decode opaque constant tensor.
-  DialectConstantDecodeHook getDecodeHook() { return nullptr; }
-  // Returns hook to extract an element of an opaque constant tensor.
-  DialectExtractElementHook getExtractElementHook() { return nullptr; }
-};
-
-/// Registers a function that will set hooks in the registered dialects
-/// based on information coming from DialectHooksRegistration.
-void registerDialectHooksSetter(const DialectHooksSetter &function);
-
-/// DialectHooksRegistration provides a global initializer that registers
-/// a dialect hooks setter routine.
-/// Usage:
-///
-///   // At namespace scope.
-///   static DialectHooksRegistration<MyHooks, MyDialect> unused;
-template <typename ConcreteHooks> struct DialectHooksRegistration {
-  DialectHooksRegistration(StringRef dialectName) {
-    registerDialectHooksSetter([dialectName](MLIRContext *ctx) {
-      Dialect *dialect = ctx->getRegisteredDialect(dialectName);
-      if (!dialect) {
-        llvm::errs() << "error: cannot register hooks for unknown dialect '"
-                     << dialectName << "'\n";
-        abort();
-      }
-      // Set hooks.
-      ConcreteHooks hooks;
-      if (auto h = hooks.getConstantFoldHook())
-        dialect->constantFoldHook = h;
-      if (auto h = hooks.getDecodeHook())
-        dialect->decodeHook = h;
-      if (auto h = hooks.getExtractElementHook())
-        dialect->extractElementHook = h;
-    });
-  }
-};
-
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/DialectImplementation.h b/third_party/mlir/include/mlir/IR/DialectImplementation.h
deleted file mode 100644
index c645a2427b2..00000000000
--- a/third_party/mlir/include/mlir/IR/DialectImplementation.h
+++ /dev/null
@@ -1,342 +0,0 @@
-//===- DialectImplementation.h ----------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains utilities classes for implementing dialect attributes and
-// types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_DIALECTIMPLEMENTATION_H
-#define MLIR_IR_DIALECTIMPLEMENTATION_H
-
-#include "mlir/IR/OpImplementation.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-
-class Builder;
-
-//===----------------------------------------------------------------------===//
-// DialectAsmPrinter
-//===----------------------------------------------------------------------===//
-
-/// This is a pure-virtual base class that exposes the asmprinter hooks
-/// necessary to implement a custom printAttribute/printType() method on a
-/// dialect.
-class DialectAsmPrinter {
-public:
-  DialectAsmPrinter() {}
-  virtual ~DialectAsmPrinter();
-  virtual raw_ostream &getStream() const = 0;
-
-  /// Print the given attribute to the stream.
-  virtual void printAttribute(Attribute attr) = 0;
-
-  /// Print the given floating point value in a stabilized form that can be
-  /// roundtripped through the IR. This is the companion to the 'parseFloat'
-  /// hook on the DialectAsmParser.
-  virtual void printFloat(const APFloat &value) = 0;
-
-  /// Print the given type to the stream.
-  virtual void printType(Type type) = 0;
-
-private:
-  DialectAsmPrinter(const DialectAsmPrinter &) = delete;
-  void operator=(const DialectAsmPrinter &) = delete;
-};
-
-// Make the implementations convenient to use.
-inline DialectAsmPrinter &operator<<(DialectAsmPrinter &p, Attribute attr) {
-  p.printAttribute(attr);
-  return p;
-}
-
-inline DialectAsmPrinter &operator<<(DialectAsmPrinter &p,
-                                     const APFloat &value) {
-  p.printFloat(value);
-  return p;
-}
-inline DialectAsmPrinter &operator<<(DialectAsmPrinter &p, float value) {
-  return p << APFloat(value);
-}
-inline DialectAsmPrinter &operator<<(DialectAsmPrinter &p, double value) {
-  return p << APFloat(value);
-}
-
-inline DialectAsmPrinter &operator<<(DialectAsmPrinter &p, Type type) {
-  p.printType(type);
-  return p;
-}
-
-// Support printing anything that isn't convertible to one of the above types,
-// even if it isn't exactly one of them.  For example, we want to print
-// FunctionType with the Type version above, not have it match this.
-template <typename T, typename std::enable_if<
-                          !std::is_convertible<T &, Attribute &>::value &&
-                              !std::is_convertible<T &, Type &>::value &&
-                              !std::is_convertible<T &, APFloat &>::value &&
-                              !llvm::is_one_of<T, double, float>::value,
-                          T>::type * = nullptr>
-inline DialectAsmPrinter &operator<<(DialectAsmPrinter &p, const T &other) {
-  p.getStream() << other;
-  return p;
-}
-
-//===----------------------------------------------------------------------===//
-// DialectAsmParser
-//===----------------------------------------------------------------------===//
-
-/// The DialectAsmParser has methods for interacting with the asm parser:
-/// parsing things from it, emitting errors etc.  It has an intentionally
-/// high-level API that is designed to reduce/constrain syntax innovation in
-/// individual attributes or types.
-class DialectAsmParser {
-public:
-  virtual ~DialectAsmParser();
-
-  /// Emit a diagnostic at the specified location and return failure.
-  virtual InFlightDiagnostic emitError(llvm::SMLoc loc,
-                                       const Twine &message = {}) = 0;
-
-  /// Return a builder which provides useful access to MLIRContext, global
-  /// objects like types and attributes.
-  virtual Builder &getBuilder() const = 0;
-
-  /// Get the location of the next token and store it into the argument.  This
-  /// always succeeds.
-  virtual llvm::SMLoc getCurrentLocation() = 0;
-  ParseResult getCurrentLocation(llvm::SMLoc *loc) {
-    *loc = getCurrentLocation();
-    return success();
-  }
-
-  /// Return the location of the original name token.
-  virtual llvm::SMLoc getNameLoc() const = 0;
-
-  /// Re-encode the given source location as an MLIR location and return it.
-  virtual Location getEncodedSourceLoc(llvm::SMLoc loc) = 0;
-
-  /// Returns the full specification of the symbol being parsed. This allows for
-  /// using a separate parser if necessary.
-  virtual StringRef getFullSymbolSpec() const = 0;
-
-  // These methods emit an error and return failure or success. This allows
-  // these to be chained together into a linear sequence of || expressions in
-  // many cases.
-
-  /// Parse a floating point value from the stream.
-  virtual ParseResult parseFloat(double &result) = 0;
-
-  /// Parse an integer value from the stream.
-  template <typename IntT> ParseResult parseInteger(IntT &result) {
-    auto loc = getCurrentLocation();
-    OptionalParseResult parseResult = parseOptionalInteger(result);
-    if (!parseResult.hasValue())
-      return emitError(loc, "expected integer value");
-    return *parseResult;
-  }
-
-  /// Parse an optional integer value from the stream.
-  virtual OptionalParseResult parseOptionalInteger(uint64_t &result) = 0;
-
-  template <typename IntT>
-  OptionalParseResult parseOptionalInteger(IntT &result) {
-    auto loc = getCurrentLocation();
-
-    // Parse the unsigned variant.
-    uint64_t uintResult;
-    OptionalParseResult parseResult = parseOptionalInteger(uintResult);
-    if (!parseResult.hasValue() || failed(*parseResult))
-      return parseResult;
-
-    // Try to convert to the provided integer type.
-    result = IntT(uintResult);
-    if (uint64_t(result) != uintResult)
-      return emitError(loc, "integer value too large");
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Token Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a '->' token.
-  virtual ParseResult parseArrow() = 0;
-
-  /// Parse a '->' token if present
-  virtual ParseResult parseOptionalArrow() = 0;
-
-  /// Parse a '{' token.
-  virtual ParseResult parseLBrace() = 0;
-
-  /// Parse a '{' token if present
-  virtual ParseResult parseOptionalLBrace() = 0;
-
-  /// Parse a `}` token.
-  virtual ParseResult parseRBrace() = 0;
-
-  /// Parse a `}` token if present
-  virtual ParseResult parseOptionalRBrace() = 0;
-
-  /// Parse a `:` token.
-  virtual ParseResult parseColon() = 0;
-
-  /// Parse a `:` token if present.
-  virtual ParseResult parseOptionalColon() = 0;
-
-  /// Parse a `,` token.
-  virtual ParseResult parseComma() = 0;
-
-  /// Parse a `,` token if present.
-  virtual ParseResult parseOptionalComma() = 0;
-
-  /// Parse a `=` token.
-  virtual ParseResult parseEqual() = 0;
-
-  /// Parse a given keyword.
-  ParseResult parseKeyword(StringRef keyword, const Twine &msg = "") {
-    auto loc = getCurrentLocation();
-    if (parseOptionalKeyword(keyword))
-      return emitError(loc, "expected '") << keyword << "'" << msg;
-    return success();
-  }
-
-  /// Parse a keyword into 'keyword'.
-  ParseResult parseKeyword(StringRef *keyword) {
-    auto loc = getCurrentLocation();
-    if (parseOptionalKeyword(keyword))
-      return emitError(loc, "expected valid keyword");
-    return success();
-  }
-
-  /// Parse the given keyword if present.
-  virtual ParseResult parseOptionalKeyword(StringRef keyword) = 0;
-
-  /// Parse a keyword, if present, into 'keyword'.
-  virtual ParseResult parseOptionalKeyword(StringRef *keyword) = 0;
-
-  /// Parse a '<' token.
-  virtual ParseResult parseLess() = 0;
-
-  /// Parse a `<` token if present.
-  virtual ParseResult parseOptionalLess() = 0;
-
-  /// Parse a '>' token.
-  virtual ParseResult parseGreater() = 0;
-
-  /// Parse a `>` token if present.
-  virtual ParseResult parseOptionalGreater() = 0;
-
-  /// Parse a `(` token.
-  virtual ParseResult parseLParen() = 0;
-
-  /// Parse a `(` token if present.
-  virtual ParseResult parseOptionalLParen() = 0;
-
-  /// Parse a `)` token.
-  virtual ParseResult parseRParen() = 0;
-
-  /// Parse a `)` token if present.
-  virtual ParseResult parseOptionalRParen() = 0;
-
-  /// Parse a `[` token.
-  virtual ParseResult parseLSquare() = 0;
-
-  /// Parse a `[` token if present.
-  virtual ParseResult parseOptionalLSquare() = 0;
-
-  /// Parse a `]` token.
-  virtual ParseResult parseRSquare() = 0;
-
-  /// Parse a `]` token if present.
-  virtual ParseResult parseOptionalRSquare() = 0;
-
-  /// Parse a `...` token if present;
-  virtual ParseResult parseOptionalEllipsis() = 0;
-
-  /// Parse a `?` token.
-  virtual ParseResult parseOptionalQuestion() = 0;
-
-  /// Parse a `*` token.
-  virtual ParseResult parseOptionalStar() = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Attribute Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an arbitrary attribute and return it in result.
-  virtual ParseResult parseAttribute(Attribute &result, Type type = {}) = 0;
-
-  /// Parse an attribute of a specific kind and type.
-  template <typename AttrType>
-  ParseResult parseAttribute(AttrType &result, Type type = {}) {
-    llvm::SMLoc loc = getCurrentLocation();
-
-    // Parse any kind of attribute.
-    Attribute attr;
-    if (parseAttribute(attr))
-      return failure();
-
-    // Check for the right kind of attribute.
-    result = attr.dyn_cast<AttrType>();
-    if (!result)
-      return emitError(loc, "invalid kind of attribute specified");
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Type Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a type.
-  virtual ParseResult parseType(Type &result) = 0;
-
-  /// Parse a type of a specific kind, e.g. a FunctionType.
-  template <typename TypeType> ParseResult parseType(TypeType &result) {
-    llvm::SMLoc loc = getCurrentLocation();
-
-    // Parse any kind of type.
-    Type type;
-    if (parseType(type))
-      return failure();
-
-    // Check for the right kind of attribute.
-    result = type.dyn_cast<TypeType>();
-    if (!result)
-      return emitError(loc, "invalid kind of type specified");
-    return success();
-  }
-
-  /// Parse a 'x' separated dimension list. This populates the dimension list,
-  /// using -1 for the `?` dimensions if `allowDynamic` is set and errors out on
-  /// `?` otherwise.
-  ///
-  ///   dimension-list ::= (dimension `x`)*
-  ///   dimension ::= `?` | integer
-  ///
-  /// When `allowDynamic` is not set, this is used to parse:
-  ///
-  ///   static-dimension-list ::= (integer `x`)*
-  virtual ParseResult parseDimensionList(SmallVectorImpl<int64_t> &dimensions,
-                                         bool allowDynamic = true) = 0;
-};
-
-} // end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/DialectInterface.h b/third_party/mlir/include/mlir/IR/DialectInterface.h
deleted file mode 100644
index 4eb41105032..00000000000
--- a/third_party/mlir/include/mlir/IR/DialectInterface.h
+++ /dev/null
@@ -1,190 +0,0 @@
-//===- DialectInterface.h - IR Dialect Interfaces ---------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_DIALECTINTERFACE_H
-#define MLIR_IR_DIALECTINTERFACE_H
-
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/DenseSet.h"
-
-namespace mlir {
-class Dialect;
-class MLIRContext;
-class Operation;
-
-//===----------------------------------------------------------------------===//
-// DialectInterface
-//===----------------------------------------------------------------------===//
-namespace detail {
-/// The base class used for all derived interface types. This class provides
-/// utilities necessary for registration.
-template <typename ConcreteType, typename BaseT>
-class DialectInterfaceBase : public BaseT {
-public:
-  using Base = DialectInterfaceBase<ConcreteType, BaseT>;
-
-  /// Get a unique id for the derived interface type.
-  static ClassID *getInterfaceID() { return ClassID::getID<ConcreteType>(); }
-
-protected:
-  DialectInterfaceBase(Dialect *dialect) : BaseT(dialect, getInterfaceID()) {}
-};
-} // end namespace detail
-
-/// This class represents an interface overridden for a single dialect.
-class DialectInterface {
-public:
-  virtual ~DialectInterface();
-
-  /// The base class used for all derived interface types. This class provides
-  /// utilities necessary for registration.
-  template <typename ConcreteType>
-  using Base = detail::DialectInterfaceBase<ConcreteType, DialectInterface>;
-
-  /// Return the dialect that this interface represents.
-  Dialect *getDialect() const { return dialect; }
-
-  /// Return the derived interface id.
-  ClassID *getID() const { return interfaceID; }
-
-protected:
-  DialectInterface(Dialect *dialect, ClassID *id)
-      : dialect(dialect), interfaceID(id) {}
-
-private:
-  /// The dialect that represents this interface.
-  Dialect *dialect;
-
-  /// The unique identifier for the derived interface type.
-  ClassID *interfaceID;
-};
-
-//===----------------------------------------------------------------------===//
-// DialectInterfaceCollection
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-/// This class is the base class for a collection of instances for a specific
-/// interface kind.
-class DialectInterfaceCollectionBase {
-  /// DenseMap info for dialect interfaces that allows lookup by the dialect.
-  struct InterfaceKeyInfo : public DenseMapInfo<const DialectInterface *> {
-    using DenseMapInfo<const DialectInterface *>::isEqual;
-
-    static unsigned getHashValue(Dialect *key) { return llvm::hash_value(key); }
-    static unsigned getHashValue(const DialectInterface *key) {
-      return getHashValue(key->getDialect());
-    }
-
-    static bool isEqual(Dialect *lhs, const DialectInterface *rhs) {
-      if (rhs == getEmptyKey() || rhs == getTombstoneKey())
-        return false;
-      return lhs == rhs->getDialect();
-    }
-  };
-
-  /// A set of registered dialect interface instances.
-  using InterfaceSetT = DenseSet<const DialectInterface *, InterfaceKeyInfo>;
-  using InterfaceVectorT = std::vector<const DialectInterface *>;
-
-public:
-  DialectInterfaceCollectionBase(MLIRContext *ctx, ClassID *interfaceKind);
-  virtual ~DialectInterfaceCollectionBase();
-
-protected:
-  /// Get the interface for the dialect of given operation, or null if one
-  /// is not registered.
-  const DialectInterface *getInterfaceFor(Operation *op) const;
-
-  /// Get the interface for the given dialect.
-  const DialectInterface *getInterfaceFor(Dialect *dialect) const {
-    auto it = interfaces.find_as(dialect);
-    return it == interfaces.end() ? nullptr : *it;
-  }
-
-  /// An iterator class that iterates the held interface objects of the given
-  /// derived interface type.
-  template <typename InterfaceT>
-  class iterator : public llvm::mapped_iterator<
-                       InterfaceVectorT::const_iterator,
-                       const InterfaceT &(*)(const DialectInterface *)> {
-    static const InterfaceT &remapIt(const DialectInterface *interface) {
-      return *static_cast<const InterfaceT *>(interface);
-    }
-
-    iterator(InterfaceVectorT::const_iterator it)
-        : llvm::mapped_iterator<
-              InterfaceVectorT::const_iterator,
-              const InterfaceT &(*)(const DialectInterface *)>(it, &remapIt) {}
-
-    /// Allow access to the constructor.
-    friend DialectInterfaceCollectionBase;
-  };
-
-  /// Iterator access to the held interfaces.
-  template <typename InterfaceT> iterator<InterfaceT> interface_begin() const {
-    return iterator<InterfaceT>(orderedInterfaces.begin());
-  }
-  template <typename InterfaceT> iterator<InterfaceT> interface_end() const {
-    return iterator<InterfaceT>(orderedInterfaces.end());
-  }
-
-private:
-  /// A set of registered dialect interface instances.
-  InterfaceSetT interfaces;
-  /// An ordered list of the registered interface instances, necessary for
-  /// deterministic iteration.
-  // NOTE: SetVector does not provide find access, so it can't be used here.
-  InterfaceVectorT orderedInterfaces;
-};
-} // namespace detail
-
-/// A collection of dialect interfaces within a context, for a given concrete
-/// interface type.
-template <typename InterfaceType>
-class DialectInterfaceCollection
-    : public detail::DialectInterfaceCollectionBase {
-public:
-  using Base = DialectInterfaceCollection<InterfaceType>;
-
-  /// Collect the registered dialect interfaces within the provided context.
-  DialectInterfaceCollection(MLIRContext *ctx)
-      : detail::DialectInterfaceCollectionBase(
-            ctx, InterfaceType::getInterfaceID()) {}
-
-  /// Get the interface for a given object, or null if one is not registered.
-  /// The object may be a dialect or an operation instance.
-  template <typename Object>
-  const InterfaceType *getInterfaceFor(Object *obj) const {
-    return static_cast<const InterfaceType *>(
-        detail::DialectInterfaceCollectionBase::getInterfaceFor(obj));
-  }
-
-  /// Iterator access to the held interfaces.
-  using iterator =
-      detail::DialectInterfaceCollectionBase::iterator<InterfaceType>;
-  iterator begin() const { return interface_begin<InterfaceType>(); }
-  iterator end() const { return interface_end<InterfaceType>(); }
-
-private:
-  using detail::DialectInterfaceCollectionBase::interface_begin;
-  using detail::DialectInterfaceCollectionBase::interface_end;
-};
-
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/DialectSymbolRegistry.def b/third_party/mlir/include/mlir/IR/DialectSymbolRegistry.def
deleted file mode 100644
index c1056bd4da0..00000000000
--- a/third_party/mlir/include/mlir/IR/DialectSymbolRegistry.def
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- DialectSymbolRegistry.def - MLIR Dialect Symbol Registry -*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file enumerates the different dialects that define custom classes
-// within the attribute or type system.
-//
-//===----------------------------------------------------------------------===//
-
-DEFINE_SYM_KIND_RANGE(STANDARD)
-DEFINE_SYM_KIND_RANGE(TENSORFLOW_CONTROL)
-DEFINE_SYM_KIND_RANGE(TENSORFLOW_EXECUTOR)
-DEFINE_SYM_KIND_RANGE(TENSORFLOW)
-DEFINE_SYM_KIND_RANGE(LLVM)
-DEFINE_SYM_KIND_RANGE(QUANTIZATION)
-DEFINE_SYM_KIND_RANGE(IREE) // IREE stands for IR Execution Engine
-DEFINE_SYM_KIND_RANGE(LINALG) // Linear Algebra Dialect
-DEFINE_SYM_KIND_RANGE(FIR) // Flang Fortran IR Dialect
-DEFINE_SYM_KIND_RANGE(OPENMP) // OpenMP IR Dialect
-DEFINE_SYM_KIND_RANGE(TOY) // Toy language (tutorial) Dialect
-DEFINE_SYM_KIND_RANGE(SPIRV) // SPIR-V dialect
-DEFINE_SYM_KIND_RANGE(XLA_HLO) // XLA HLO dialect
-
-// The following ranges are reserved for experimenting with MLIR dialects in a
-// private context without having to register them here.
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_0)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_1)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_2)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_3)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_4)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_5)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_6)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_7)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_8)
-DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_9)
-
-#undef DEFINE_SYM_KIND_RANGE
diff --git a/third_party/mlir/include/mlir/IR/Function.h b/third_party/mlir/include/mlir/IR/Function.h
deleted file mode 100644
index 83489f6ea5b..00000000000
--- a/third_party/mlir/include/mlir/IR/Function.h
+++ /dev/null
@@ -1,210 +0,0 @@
-//===- Function.h - MLIR Function Class -------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Functions are the basic unit of composition in MLIR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_FUNCTION_H
-#define MLIR_IR_FUNCTION_H
-
-#include "mlir/Analysis/CallInterfaces.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/FunctionSupport.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/SymbolTable.h"
-
-namespace mlir {
-//===--------------------------------------------------------------------===//
-// Function Operation.
-//===--------------------------------------------------------------------===//
-
-/// FuncOp represents a function, or an operation containing one region that
-/// forms a CFG(Control Flow Graph). The region of a function is not allowed to
-/// implicitly capture global values, and all external references must use
-/// Function arguments or attributes that establish a symbolic connection(e.g.
-/// symbols referenced by name via a string attribute).
-class FuncOp : public Op<FuncOp, OpTrait::ZeroOperands, OpTrait::ZeroResult,
-                         OpTrait::IsIsolatedFromAbove, OpTrait::Symbol,
-                         OpTrait::FunctionLike, CallableOpInterface::Trait> {
-public:
-  using Op::Op;
-  using Op::print;
-
-  static StringRef getOperationName() { return "func"; }
-
-  static FuncOp create(Location location, StringRef name, FunctionType type,
-                       ArrayRef<NamedAttribute> attrs = {});
-  static FuncOp create(Location location, StringRef name, FunctionType type,
-                       llvm::iterator_range<dialect_attr_iterator> attrs);
-  static FuncOp create(Location location, StringRef name, FunctionType type,
-                       ArrayRef<NamedAttribute> attrs,
-                       ArrayRef<NamedAttributeList> argAttrs);
-
-  static void build(Builder *builder, OperationState &result, StringRef name,
-                    FunctionType type, ArrayRef<NamedAttribute> attrs);
-  static void build(Builder *builder, OperationState &result, StringRef name,
-                    FunctionType type, ArrayRef<NamedAttribute> attrs,
-                    ArrayRef<NamedAttributeList> argAttrs);
-
-  /// Operation hooks.
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-
-  /// Erase a single argument at `argIndex`.
-  void eraseArgument(unsigned argIndex) { eraseArguments({argIndex}); }
-  /// Erases the arguments listed in `argIndices`.
-  /// `argIndices` is allowed to have duplicates and can be in any order.
-  void eraseArguments(ArrayRef<unsigned> argIndices);
-
-  /// Returns the type of this function.
-  FunctionType getType() {
-    return getAttrOfType<TypeAttr>(getTypeAttrName())
-        .getValue()
-        .cast<FunctionType>();
-  }
-
-  /// Change the type of this function in place. This is an extremely dangerous
-  /// operation and it is up to the caller to ensure that this is legal for this
-  /// function, and to restore invariants:
-  ///  - the entry block args must be updated to match the function params.
-  ///  - the argument/result attributes may need an update: if the new type has
-  ///  less parameters we drop the extra attributes, if there are more
-  ///  parameters they won't have any attributes.
-  void setType(FunctionType newType) {
-    SmallVector<char, 16> nameBuf;
-    auto oldType = getType();
-    for (int i = newType.getNumInputs(), e = oldType.getNumInputs(); i < e;
-         i++) {
-      removeAttr(getArgAttrName(i, nameBuf));
-    }
-    for (int i = newType.getNumResults(), e = oldType.getNumResults(); i < e;
-         i++) {
-      removeAttr(getResultAttrName(i, nameBuf));
-    }
-    setAttr(getTypeAttrName(), TypeAttr::get(newType));
-  }
-
-  /// Create a deep copy of this function and all of its blocks, remapping
-  /// any operands that use values outside of the function using the map that is
-  /// provided (leaving them alone if no entry is present). If the mapper
-  /// contains entries for function arguments, these arguments are not included
-  /// in the new function. Replaces references to cloned sub-values with the
-  /// corresponding value that is copied, and adds those mappings to the mapper.
-  FuncOp clone(BlockAndValueMapping &mapper);
-  FuncOp clone();
-
-  /// Clone the internal blocks and attributes from this function into dest. Any
-  /// cloned blocks are appended to the back of dest. This function asserts that
-  /// the attributes of the current function and dest are compatible.
-  void cloneInto(FuncOp dest, BlockAndValueMapping &mapper);
-
-  //===--------------------------------------------------------------------===//
-  // Body Handling
-  //===--------------------------------------------------------------------===//
-
-  /// Add an entry block to an empty function, and set up the block arguments
-  /// to match the signature of the function. The newly inserted entry block is
-  /// returned.
-  Block *addEntryBlock();
-
-  /// Add a normal block to the end of the function's block list. The function
-  /// should at least already have an entry block.
-  Block *addBlock();
-
-  //===--------------------------------------------------------------------===//
-  // CallableOpInterface
-  //===--------------------------------------------------------------------===//
-
-  /// Returns a region on the current operation that the given callable refers
-  /// to. This may return null in the case of an external callable object, e.g.
-  /// an external function.
-  Region *getCallableRegion(CallInterfaceCallable callable) {
-    assert(callable.get<SymbolRefAttr>().getLeafReference() == getName());
-    return isExternal() ? nullptr : &getBody();
-  }
-
-  /// Returns all of the callable regions of this operation.
-  void getCallableRegions(SmallVectorImpl<Region *> &callables) {
-    if (!isExternal())
-      callables.push_back(&getBody());
-  }
-
-  /// Returns the results types that the given callable region produces when
-  /// executed.
-  ArrayRef<Type> getCallableResults(Region *region) {
-    assert(!isExternal() && region == &getBody() && "invalid callable");
-    return getType().getResults();
-  }
-
-private:
-  // This trait needs access to the hooks defined below.
-  friend class OpTrait::FunctionLike<FuncOp>;
-
-  /// Returns the number of arguments. This is a hook for OpTrait::FunctionLike.
-  unsigned getNumFuncArguments() { return getType().getInputs().size(); }
-
-  /// Returns the number of results. This is a hook for OpTrait::FunctionLike.
-  unsigned getNumFuncResults() { return getType().getResults().size(); }
-
-  /// Hook for OpTrait::FunctionLike, called after verifying that the 'type'
-  /// attribute is present and checks if it holds a function type.  Ensures
-  /// getType, getNumFuncArguments, and getNumFuncResults can be called safely.
-  LogicalResult verifyType() {
-    auto type = getTypeAttr().getValue();
-    if (!type.isa<FunctionType>())
-      return emitOpError("requires '" + getTypeAttrName() +
-                         "' attribute of function type");
-    return success();
-  }
-};
-} // end namespace mlir
-
-namespace llvm {
-
-// Functions hash just like pointers.
-template <> struct DenseMapInfo<mlir::FuncOp> {
-  static mlir::FuncOp getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::FuncOp::getFromOpaquePointer(pointer);
-  }
-  static mlir::FuncOp getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::FuncOp::getFromOpaquePointer(pointer);
-  }
-  static unsigned getHashValue(mlir::FuncOp val) {
-    return hash_value(val.getAsOpaquePointer());
-  }
-  static bool isEqual(mlir::FuncOp LHS, mlir::FuncOp RHS) { return LHS == RHS; }
-};
-
-/// Allow stealing the low bits of FuncOp.
-template <> struct PointerLikeTypeTraits<mlir::FuncOp> {
-public:
-  static inline void *getAsVoidPointer(mlir::FuncOp I) {
-    return const_cast<void *>(I.getAsOpaquePointer());
-  }
-  static inline mlir::FuncOp getFromVoidPointer(void *P) {
-    return mlir::FuncOp::getFromOpaquePointer(P);
-  }
-  enum { NumLowBitsAvailable = 3 };
-};
-
-} // namespace llvm
-
-#endif // MLIR_IR_FUNCTION_H
diff --git a/third_party/mlir/include/mlir/IR/FunctionImplementation.h b/third_party/mlir/include/mlir/IR/FunctionImplementation.h
deleted file mode 100644
index 241d5615acf..00000000000
--- a/third_party/mlir/include/mlir/IR/FunctionImplementation.h
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- FunctionImplementation.h - Function-like Op utilities ----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file provides utility functions for implementing function-like
-// operations, in particular, parsing, printing and verification components
-// common to function-like operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_FUNCTIONIMPLEMENTATION_H_
-#define MLIR_IR_FUNCTIONIMPLEMENTATION_H_
-
-#include "mlir/IR/FunctionSupport.h"
-#include "mlir/IR/OpImplementation.h"
-
-namespace mlir {
-
-namespace impl {
-
-/// A named class for passing around the variadic flag.
-class VariadicFlag {
-public:
-  explicit VariadicFlag(bool variadic) : variadic(variadic) {}
-  bool isVariadic() const { return variadic; }
-
-private:
-  /// Underlying storage.
-  bool variadic;
-};
-
-/// Adds argument and result attributes, provided as `argAttrs` and
-/// `resultAttrs` arguments, to the list of operation attributes in `result`.
-/// Internally, argument and result attributes are stored as dict attributes
-/// with special names given by getResultAttrName, getArgumentAttrName.
-void addArgAndResultAttrs(Builder &builder, OperationState &result,
-                          ArrayRef<SmallVector<NamedAttribute, 2>> argAttrs,
-                          ArrayRef<SmallVector<NamedAttribute, 2>> resultAttrs);
-
-/// Callback type for `parseFunctionLikeOp`, the callback should produce the
-/// type that will be associated with a function-like operation from lists of
-/// function arguments and results, VariadicFlag indicates whether the function
-/// should have variadic arguments; in case of error, it may populate the last
-/// argument with a message.
-using FuncTypeBuilder = llvm::function_ref<Type(
-    Builder &, ArrayRef<Type>, ArrayRef<Type>, VariadicFlag, std::string &)>;
-
-/// Parses a function signature using `parser`. The `allowVariadic` argument
-/// indicates whether functions with variadic arguments are supported. The
-/// trailing arguments are populated by this function with names, types and
-/// attributes of the arguments and those of the results.
-ParseResult parseFunctionSignature(
-    OpAsmParser &parser, bool allowVariadic,
-    SmallVectorImpl<OpAsmParser::OperandType> &argNames,
-    SmallVectorImpl<Type> &argTypes,
-    SmallVectorImpl<SmallVector<NamedAttribute, 2>> &argAttrs, bool &isVariadic,
-    SmallVectorImpl<Type> &resultTypes,
-    SmallVectorImpl<SmallVector<NamedAttribute, 2>> &resultAttrs);
-
-/// Parser implementation for function-like operations.  Uses
-/// `funcTypeBuilder` to construct the custom function type given lists of
-/// input and output types.  If `allowVariadic` is set, the parser will accept
-/// trailing ellipsis in the function signature and indicate to the builder
-/// whether the function is variadic.  If the builder returns a null type,
-/// `result` will not contain the `type` attribute.  The caller can then add a
-/// type, report the error or delegate the reporting to the op's verifier.
-ParseResult parseFunctionLikeOp(OpAsmParser &parser, OperationState &result,
-                                bool allowVariadic,
-                                FuncTypeBuilder funcTypeBuilder);
-
-/// Printer implementation for function-like operations.  Accepts lists of
-/// argument and result types to use while printing.
-void printFunctionLikeOp(OpAsmPrinter &p, Operation *op,
-                         ArrayRef<Type> argTypes, bool isVariadic,
-                         ArrayRef<Type> resultTypes);
-
-/// Prints the signature of the function-like operation `op`.  Assumes `op` has
-/// the FunctionLike trait and passed the verification.
-void printFunctionSignature(OpAsmPrinter &p, Operation *op,
-                            ArrayRef<Type> argTypes, bool isVariadic,
-                            ArrayRef<Type> resultTypes);
-
-/// Prints the list of function prefixed with the "attributes" keyword. The
-/// attributes with names listed in "elided" as well as those used by the
-/// function-like operation internally are not printed. Nothing is printed
-/// if all attributes are elided. Assumes `op` has the `FunctionLike` trait and
-/// passed the verification.
-void printFunctionAttributes(OpAsmPrinter &p, Operation *op, unsigned numInputs,
-                             unsigned numResults,
-                             ArrayRef<StringRef> elided = {});
-
-} // namespace impl
-
-} // namespace mlir
-
-#endif // MLIR_IR_FUNCTIONIMPLEMENTATION_H_
diff --git a/third_party/mlir/include/mlir/IR/FunctionSupport.h b/third_party/mlir/include/mlir/IR/FunctionSupport.h
deleted file mode 100644
index 4656c35a9c2..00000000000
--- a/third_party/mlir/include/mlir/IR/FunctionSupport.h
+++ /dev/null
@@ -1,548 +0,0 @@
-//===- FunctionSupport.h - Utility types for function-like ops --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines support types for Operations that represent function-like
-// constructs to use.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_FUNCTIONSUPPORT_H
-#define MLIR_IR_FUNCTIONSUPPORT_H
-
-#include "mlir/IR/OpDefinition.h"
-#include "llvm/ADT/SmallString.h"
-
-namespace mlir {
-
-namespace impl {
-
-/// Return the name of the attribute used for function types.
-inline StringRef getTypeAttrName() { return "type"; }
-
-/// Return the name of the attribute used for function arguments.
-inline StringRef getArgAttrName(unsigned arg, SmallVectorImpl<char> &out) {
-  out.clear();
-  return ("arg" + Twine(arg)).toStringRef(out);
-}
-
-/// Return the name of the attribute used for function results.
-inline StringRef getResultAttrName(unsigned arg, SmallVectorImpl<char> &out) {
-  out.clear();
-  return ("result" + Twine(arg)).toStringRef(out);
-}
-
-/// Returns the dictionary attribute corresponding to the argument at 'index'.
-/// If there are no argument attributes at 'index', a null attribute is
-/// returned.
-inline DictionaryAttr getArgAttrDict(Operation *op, unsigned index) {
-  SmallString<8> nameOut;
-  return op->getAttrOfType<DictionaryAttr>(getArgAttrName(index, nameOut));
-}
-
-/// Returns the dictionary attribute corresponding to the result at 'index'.
-/// If there are no result attributes at 'index', a null attribute is
-/// returned.
-inline DictionaryAttr getResultAttrDict(Operation *op, unsigned index) {
-  SmallString<8> nameOut;
-  return op->getAttrOfType<DictionaryAttr>(getResultAttrName(index, nameOut));
-}
-
-/// Return all of the attributes for the argument at 'index'.
-inline ArrayRef<NamedAttribute> getArgAttrs(Operation *op, unsigned index) {
-  auto argDict = getArgAttrDict(op, index);
-  return argDict ? argDict.getValue() : llvm::None;
-}
-
-/// Return all of the attributes for the result at 'index'.
-inline ArrayRef<NamedAttribute> getResultAttrs(Operation *op, unsigned index) {
-  auto resultDict = getResultAttrDict(op, index);
-  return resultDict ? resultDict.getValue() : llvm::None;
-}
-
-} // namespace impl
-
-namespace OpTrait {
-
-/// This trait provides APIs for Ops that behave like functions.  In particular:
-/// - Ops must be symbols, i.e. also have the `Symbol` trait;
-/// - Ops have a single region with multiple blocks that corresponds to the body
-///   of the function;
-/// - the absence of a region corresponds to an external function;
-/// - leading arguments of the first block of the region are treated as function
-///   arguments;
-/// - they can have argument attributes that are stored in a dictionary
-///   attribute on the Op itself.
-/// This trait does *NOT* provide type support for the functions, meaning that
-/// concrete Ops must handle the type of the declared or defined function.
-/// `getTypeAttrName()` is a convenience function that returns the name of the
-/// attribute that can be used to store the function type, but the trait makes
-/// no assumption based on it.
-///
-/// - Concrete ops *must* define a member function `getNumFuncArguments()` that
-///   returns the number of function arguments based exclusively on type (so
-///   that it can be called on function declarations).
-/// - Concrete ops *must* define a member function `getNumFuncResults()` that
-///   returns the number of function results based exclusively on type (so that
-///   it can be called on function declarations).
-/// - To verify that the type respects op-specific invariants, concrete ops may
-///   redefine the `verifyType()` hook that will be called after verifying the
-///   presence of the `type` attribute and before any call to
-///   `getNumFuncArguments`/`getNumFuncResults` from the verifier.
-/// - To verify that the body respects op-specific invariants, concrete ops may
-///   redefine the `verifyBody()` hook that will be called after verifying the
-///   function type and the presence of the (potentially empty) body region.
-template <typename ConcreteType>
-class FunctionLike : public OpTrait::TraitBase<ConcreteType, FunctionLike> {
-public:
-  /// Verify that all of the argument attributes are dialect attributes.
-  static LogicalResult verifyTrait(Operation *op);
-
-  //===--------------------------------------------------------------------===//
-  // Body Handling
-  //===--------------------------------------------------------------------===//
-
-  /// Returns true if this function is external, i.e. it has no body.
-  bool isExternal() { return empty(); }
-
-  Region &getBody() { return this->getOperation()->getRegion(0); }
-
-  /// Delete all blocks from this function.
-  void eraseBody() {
-    getBody().dropAllReferences();
-    getBody().getBlocks().clear();
-  }
-
-  /// This is the list of blocks in the function.
-  using BlockListType = Region::BlockListType;
-  BlockListType &getBlocks() { return getBody().getBlocks(); }
-
-  // Iteration over the block in the function.
-  using iterator = BlockListType::iterator;
-  using reverse_iterator = BlockListType::reverse_iterator;
-
-  iterator begin() { return getBody().begin(); }
-  iterator end() { return getBody().end(); }
-  reverse_iterator rbegin() { return getBody().rbegin(); }
-  reverse_iterator rend() { return getBody().rend(); }
-
-  bool empty() { return getBody().empty(); }
-  void push_back(Block *block) { getBody().push_back(block); }
-  void push_front(Block *block) { getBody().push_front(block); }
-
-  Block &back() { return getBody().back(); }
-  Block &front() { return getBody().front(); }
-
-  /// Hook for concrete ops to verify the contents of the body. Called as a
-  /// part of trait verification, after type verification and ensuring that a
-  /// region exists.
-  LogicalResult verifyBody();
-
-  //===--------------------------------------------------------------------===//
-  // Type Attribute Handling
-  //===--------------------------------------------------------------------===//
-
-  /// Return the name of the attribute used for function types.
-  static StringRef getTypeAttrName() { return ::mlir::impl::getTypeAttrName(); }
-
-  TypeAttr getTypeAttr() {
-    return this->getOperation()->template getAttrOfType<TypeAttr>(
-        getTypeAttrName());
-  }
-
-  bool isTypeAttrValid() {
-    auto typeAttr = getTypeAttr();
-    if (!typeAttr)
-      return false;
-    return typeAttr.getValue() != Type{};
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Argument Handling
-  //===--------------------------------------------------------------------===//
-
-  unsigned getNumArguments() {
-    return static_cast<ConcreteType *>(this)->getNumFuncArguments();
-  }
-
-  unsigned getNumResults() {
-    return static_cast<ConcreteType *>(this)->getNumFuncResults();
-  }
-
-  /// Gets argument.
-  BlockArgument *getArgument(unsigned idx) {
-    return getBlocks().front().getArgument(idx);
-  }
-
-  // Supports non-const operand iteration.
-  using args_iterator = Block::args_iterator;
-  args_iterator args_begin() { return front().args_begin(); }
-  args_iterator args_end() { return front().args_end(); }
-  llvm::iterator_range<args_iterator> getArguments() {
-    return {args_begin(), args_end()};
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Argument Attributes
-  //===--------------------------------------------------------------------===//
-
-  /// FunctionLike operations allow for attaching attributes to each of the
-  /// respective function arguments. These argument attributes are stored as
-  /// DictionaryAttrs in the main operation attribute dictionary. The name of
-  /// these entries is `arg` followed by the index of the argument. These
-  /// argument attribute dictionaries are optional, and will generally only
-  /// exist if they are non-empty.
-
-  /// Return all of the attributes for the argument at 'index'.
-  ArrayRef<NamedAttribute> getArgAttrs(unsigned index) {
-    return ::mlir::impl::getArgAttrs(this->getOperation(), index);
-  }
-
-  /// Return all argument attributes of this function.
-  void getAllArgAttrs(SmallVectorImpl<NamedAttributeList> &result) {
-    for (unsigned i = 0, e = getNumArguments(); i != e; ++i)
-      result.emplace_back(getArgAttrDict(i));
-  }
-
-  /// Return the specified attribute, if present, for the argument at 'index',
-  /// null otherwise.
-  Attribute getArgAttr(unsigned index, Identifier name) {
-    auto argDict = getArgAttrDict(index);
-    return argDict ? argDict.get(name) : nullptr;
-  }
-  Attribute getArgAttr(unsigned index, StringRef name) {
-    auto argDict = getArgAttrDict(index);
-    return argDict ? argDict.get(name) : nullptr;
-  }
-
-  template <typename AttrClass>
-  AttrClass getArgAttrOfType(unsigned index, Identifier name) {
-    return getArgAttr(index, name).template dyn_cast_or_null<AttrClass>();
-  }
-  template <typename AttrClass>
-  AttrClass getArgAttrOfType(unsigned index, StringRef name) {
-    return getArgAttr(index, name).template dyn_cast_or_null<AttrClass>();
-  }
-
-  /// Set the attributes held by the argument at 'index'.
-  void setArgAttrs(unsigned index, ArrayRef<NamedAttribute> attributes);
-  void setArgAttrs(unsigned index, NamedAttributeList attributes);
-  void setAllArgAttrs(ArrayRef<NamedAttributeList> attributes) {
-    assert(attributes.size() == getNumArguments());
-    for (unsigned i = 0, e = attributes.size(); i != e; ++i)
-      setArgAttrs(i, attributes[i]);
-  }
-
-  /// If the an attribute exists with the specified name, change it to the new
-  /// value. Otherwise, add a new attribute with the specified name/value.
-  void setArgAttr(unsigned index, Identifier name, Attribute value);
-  void setArgAttr(unsigned index, StringRef name, Attribute value) {
-    setArgAttr(index, Identifier::get(name, this->getOperation()->getContext()),
-               value);
-  }
-
-  /// Remove the attribute 'name' from the argument at 'index'.
-  NamedAttributeList::RemoveResult removeArgAttr(unsigned index,
-                                                 Identifier name);
-
-  //===--------------------------------------------------------------------===//
-  // Result Attributes
-  //===--------------------------------------------------------------------===//
-
-  /// FunctionLike operations allow for attaching attributes to each of the
-  /// respective function results. These result attributes are stored as
-  /// DictionaryAttrs in the main operation attribute dictionary. The name of
-  /// these entries is `result` followed by the index of the result. These
-  /// result attribute dictionaries are optional, and will generally only
-  /// exist if they are non-empty.
-
-  /// Return all of the attributes for the result at 'index'.
-  ArrayRef<NamedAttribute> getResultAttrs(unsigned index) {
-    return ::mlir::impl::getResultAttrs(this->getOperation(), index);
-  }
-
-  /// Return all result attributes of this function.
-  void getAllResultAttrs(SmallVectorImpl<NamedAttributeList> &result) {
-    for (unsigned i = 0, e = getNumResults(); i != e; ++i)
-      result.emplace_back(getResultAttrDict(i));
-  }
-
-  /// Return the specified attribute, if present, for the result at 'index',
-  /// null otherwise.
-  Attribute getResultAttr(unsigned index, Identifier name) {
-    auto argDict = getResultAttrDict(index);
-    return argDict ? argDict.get(name) : nullptr;
-  }
-  Attribute getResultAttr(unsigned index, StringRef name) {
-    auto argDict = getResultAttrDict(index);
-    return argDict ? argDict.get(name) : nullptr;
-  }
-
-  template <typename AttrClass>
-  AttrClass getResultAttrOfType(unsigned index, Identifier name) {
-    return getResultAttr(index, name).template dyn_cast_or_null<AttrClass>();
-  }
-  template <typename AttrClass>
-  AttrClass getResultAttrOfType(unsigned index, StringRef name) {
-    return getResultAttr(index, name).template dyn_cast_or_null<AttrClass>();
-  }
-
-  /// Set the attributes held by the result at 'index'.
-  void setResultAttrs(unsigned index, ArrayRef<NamedAttribute> attributes);
-  void setResultAttrs(unsigned index, NamedAttributeList attributes);
-  void setAllResultAttrs(ArrayRef<NamedAttributeList> attributes) {
-    assert(attributes.size() == getNumResults());
-    for (unsigned i = 0, e = attributes.size(); i != e; ++i)
-      setResultAttrs(i, attributes[i]);
-  }
-
-  /// If the an attribute exists with the specified name, change it to the new
-  /// value. Otherwise, add a new attribute with the specified name/value.
-  void setResultAttr(unsigned index, Identifier name, Attribute value);
-  void setResultAttr(unsigned index, StringRef name, Attribute value) {
-    setResultAttr(index,
-                  Identifier::get(name, this->getOperation()->getContext()),
-                  value);
-  }
-
-  /// Remove the attribute 'name' from the result at 'index'.
-  NamedAttributeList::RemoveResult removeResultAttr(unsigned index,
-                                                    Identifier name);
-
-protected:
-  /// Returns the attribute entry name for the set of argument attributes at
-  /// 'index'.
-  static StringRef getArgAttrName(unsigned index, SmallVectorImpl<char> &out) {
-    return ::mlir::impl::getArgAttrName(index, out);
-  }
-
-  /// Returns the dictionary attribute corresponding to the argument at 'index'.
-  /// If there are no argument attributes at 'index', a null attribute is
-  /// returned.
-  DictionaryAttr getArgAttrDict(unsigned index) {
-    assert(index < getNumArguments() && "invalid argument number");
-    return ::mlir::impl::getArgAttrDict(this->getOperation(), index);
-  }
-
-  /// Returns the attribute entry name for the set of result attributes at
-  /// 'index'.
-  static StringRef getResultAttrName(unsigned index,
-                                     SmallVectorImpl<char> &out) {
-    return ::mlir::impl::getResultAttrName(index, out);
-  }
-
-  /// Returns the dictionary attribute corresponding to the result at 'index'.
-  /// If there are no result attributes at 'index', a null attribute is
-  /// returned.
-  DictionaryAttr getResultAttrDict(unsigned index) {
-    assert(index < getNumResults() && "invalid result number");
-    return ::mlir::impl::getResultAttrDict(this->getOperation(), index);
-  }
-
-  /// Hook for concrete classes to verify that the type attribute respects
-  /// op-specific invariants.  Default implementation always succeeds.
-  LogicalResult verifyType() { return success(); }
-};
-
-/// Default verifier checks that if the entry block exists, it has the same
-/// number of arguments as the function-like operation.
-template <typename ConcreteType>
-LogicalResult FunctionLike<ConcreteType>::verifyBody() {
-  auto funcOp = cast<ConcreteType>(this->getOperation());
-
-  if (funcOp.isExternal())
-    return success();
-
-  unsigned numArguments = funcOp.getNumArguments();
-  if (funcOp.front().getNumArguments() != numArguments)
-    return funcOp.emitOpError("entry block must have ")
-           << numArguments << " arguments to match function signature";
-
-  return success();
-}
-
-template <typename ConcreteType>
-LogicalResult FunctionLike<ConcreteType>::verifyTrait(Operation *op) {
-  MLIRContext *ctx = op->getContext();
-  auto funcOp = cast<ConcreteType>(op);
-
-  if (!funcOp.isTypeAttrValid())
-    return funcOp.emitOpError("requires a type attribute '")
-           << getTypeAttrName() << '\'';
-
-  if (failed(funcOp.verifyType()))
-    return failure();
-
-  for (unsigned i = 0, e = funcOp.getNumArguments(); i != e; ++i) {
-    // Verify that all of the argument attributes are dialect attributes, i.e.
-    // that they contain a dialect prefix in their name.  Call the dialect, if
-    // registered, to verify the attributes themselves.
-    for (auto attr : funcOp.getArgAttrs(i)) {
-      if (!attr.first.strref().contains('.'))
-        return funcOp.emitOpError("arguments may only have dialect attributes");
-      auto dialectNamePair = attr.first.strref().split('.');
-      if (auto *dialect = ctx->getRegisteredDialect(dialectNamePair.first)) {
-        if (failed(dialect->verifyRegionArgAttribute(op, /*regionIndex=*/0,
-                                                     /*argIndex=*/i, attr)))
-          return failure();
-      }
-    }
-  }
-
-  for (unsigned i = 0, e = funcOp.getNumResults(); i != e; ++i) {
-    // Verify that all of the result attributes are dialect attributes, i.e.
-    // that they contain a dialect prefix in their name.  Call the dialect, if
-    // registered, to verify the attributes themselves.
-    for (auto attr : funcOp.getResultAttrs(i)) {
-      if (!attr.first.strref().contains('.'))
-        return funcOp.emitOpError("results may only have dialect attributes");
-      auto dialectNamePair = attr.first.strref().split('.');
-      if (auto *dialect = ctx->getRegisteredDialect(dialectNamePair.first)) {
-        if (failed(dialect->verifyRegionResultAttribute(op, /*regionIndex=*/0,
-                                                        /*resultIndex=*/i,
-                                                        attr)))
-          return failure();
-      }
-    }
-  }
-
-  // Check that the op has exactly one region for the body.
-  if (op->getNumRegions() != 1)
-    return funcOp.emitOpError("expects one region");
-
-  return funcOp.verifyBody();
-}
-
-//===----------------------------------------------------------------------===//
-// Function Argument Attribute.
-//===----------------------------------------------------------------------===//
-
-/// Set the attributes held by the argument at 'index'.
-template <typename ConcreteType>
-void FunctionLike<ConcreteType>::setArgAttrs(
-    unsigned index, ArrayRef<NamedAttribute> attributes) {
-  assert(index < getNumArguments() && "invalid argument number");
-  SmallString<8> nameOut;
-  getArgAttrName(index, nameOut);
-
-  if (attributes.empty())
-    return (void)static_cast<ConcreteType *>(this)->removeAttr(nameOut);
-  Operation *op = this->getOperation();
-  op->setAttr(nameOut, DictionaryAttr::get(attributes, op->getContext()));
-}
-
-template <typename ConcreteType>
-void FunctionLike<ConcreteType>::setArgAttrs(unsigned index,
-                                             NamedAttributeList attributes) {
-  assert(index < getNumArguments() && "invalid argument number");
-  SmallString<8> nameOut;
-  if (auto newAttr = attributes.getDictionary())
-    return this->getOperation()->setAttr(getArgAttrName(index, nameOut),
-                                         newAttr);
-  static_cast<ConcreteType *>(this)->removeAttr(getArgAttrName(index, nameOut));
-}
-
-/// If the an attribute exists with the specified name, change it to the new
-/// value. Otherwise, add a new attribute with the specified name/value.
-template <typename ConcreteType>
-void FunctionLike<ConcreteType>::setArgAttr(unsigned index, Identifier name,
-                                            Attribute value) {
-  auto curAttr = getArgAttrDict(index);
-  NamedAttributeList attrList(curAttr);
-  attrList.set(name, value);
-
-  // If the attribute changed, then set the new arg attribute list.
-  if (curAttr != attrList.getDictionary())
-    setArgAttrs(index, attrList);
-}
-
-/// Remove the attribute 'name' from the argument at 'index'.
-template <typename ConcreteType>
-NamedAttributeList::RemoveResult
-FunctionLike<ConcreteType>::removeArgAttr(unsigned index, Identifier name) {
-  // Build an attribute list and remove the attribute at 'name'.
-  NamedAttributeList attrList(getArgAttrDict(index));
-  auto result = attrList.remove(name);
-
-  // If the attribute was removed, then update the argument dictionary.
-  if (result == NamedAttributeList::RemoveResult::Removed)
-    setArgAttrs(index, attrList);
-  return result;
-}
-
-//===----------------------------------------------------------------------===//
-// Function Result Attribute.
-//===----------------------------------------------------------------------===//
-
-/// Set the attributes held by the result at 'index'.
-template <typename ConcreteType>
-void FunctionLike<ConcreteType>::setResultAttrs(
-    unsigned index, ArrayRef<NamedAttribute> attributes) {
-  assert(index < getNumResults() && "invalid result number");
-  SmallString<8> nameOut;
-  getResultAttrName(index, nameOut);
-
-  if (attributes.empty())
-    return (void)static_cast<ConcreteType *>(this)->removeAttr(nameOut);
-  Operation *op = this->getOperation();
-  op->setAttr(nameOut, DictionaryAttr::get(attributes, op->getContext()));
-}
-
-template <typename ConcreteType>
-void FunctionLike<ConcreteType>::setResultAttrs(unsigned index,
-                                                NamedAttributeList attributes) {
-  assert(index < getNumResults() && "invalid result number");
-  SmallString<8> nameOut;
-  if (auto newAttr = attributes.getDictionary())
-    return this->getOperation()->setAttr(getResultAttrName(index, nameOut),
-                                         newAttr);
-  static_cast<ConcreteType *>(this)->removeAttr(
-      getResultAttrName(index, nameOut));
-}
-
-/// If the an attribute exists with the specified name, change it to the new
-/// value. Otherwise, add a new attribute with the specified name/value.
-template <typename ConcreteType>
-void FunctionLike<ConcreteType>::setResultAttr(unsigned index, Identifier name,
-                                               Attribute value) {
-  auto curAttr = getResultAttrDict(index);
-  NamedAttributeList attrList(curAttr);
-  attrList.set(name, value);
-
-  // If the attribute changed, then set the new arg attribute list.
-  if (curAttr != attrList.getDictionary())
-    setResultAttrs(index, attrList);
-}
-
-/// Remove the attribute 'name' from the result at 'index'.
-template <typename ConcreteType>
-NamedAttributeList::RemoveResult
-FunctionLike<ConcreteType>::removeResultAttr(unsigned index, Identifier name) {
-  // Build an attribute list and remove the attribute at 'name'.
-  NamedAttributeList attrList(getResultAttrDict(index));
-  auto result = attrList.remove(name);
-
-  // If the attribute was removed, then update the result dictionary.
-  if (result == NamedAttributeList::RemoveResult::Removed)
-    setResultAttrs(index, attrList);
-  return result;
-}
-
-} // end namespace OpTrait
-
-} // end namespace mlir
-
-#endif // MLIR_IR_FUNCTIONSUPPORT_H
diff --git a/third_party/mlir/include/mlir/IR/Identifier.h b/third_party/mlir/include/mlir/IR/Identifier.h
deleted file mode 100644
index bc84c200545..00000000000
--- a/third_party/mlir/include/mlir/IR/Identifier.h
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- Identifier.h - MLIR Identifier Class ---------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_IDENTIFIER_H
-#define MLIR_IR_IDENTIFIER_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace mlir {
-class MLIRContext;
-
-/// This class represents a uniqued string owned by an MLIRContext.  Strings
-/// represented by this type cannot contain nul characters, and may not have a
-/// zero length.
-///
-/// This is a POD type with pointer size, so it should be passed around by
-/// value.  The underlying data is owned by MLIRContext and is thus immortal for
-/// almost all clients.
-class Identifier {
-public:
-  /// Return an identifier for the specified string.
-  static Identifier get(StringRef str, MLIRContext *context);
-  Identifier(const Identifier &) = default;
-  Identifier &operator=(const Identifier &other) = default;
-
-  /// Return a StringRef for the string.
-  StringRef strref() const { return StringRef(pointer, size()); }
-
-  /// Identifiers implicitly convert to StringRefs.
-  operator StringRef() const { return strref(); }
-
-  /// Return an std::string.
-  std::string str() const { return strref().str(); }
-
-  /// Return a null terminated C string.
-  const char *c_str() const { return pointer; }
-
-  /// Return a pointer to the start of the string data.
-  const char *data() const { return pointer; }
-
-  /// Return the number of bytes in this string.
-  unsigned size() const { return ::strlen(pointer); }
-
-  /// Return true if this identifier is the specified string.
-  bool is(StringRef string) const { return strref().equals(string); }
-
-  const char *begin() const { return pointer; }
-  const char *end() const { return pointer + size(); }
-
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-  const void *getAsOpaquePointer() const {
-    return static_cast<const void *>(pointer);
-  }
-  static Identifier getFromOpaquePointer(const void *pointer) {
-    return Identifier((const char *)pointer);
-  }
-
-private:
-  /// These are the bytes of the string, which is a nul terminated string.
-  const char *pointer;
-  explicit Identifier(const char *pointer) : pointer(pointer) {}
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, Identifier identifier) {
-  identifier.print(os);
-  return os;
-}
-
-inline bool operator==(Identifier lhs, Identifier rhs) {
-  return lhs.data() == rhs.data();
-}
-
-inline bool operator!=(Identifier lhs, Identifier rhs) {
-  return lhs.data() != rhs.data();
-}
-
-inline bool operator==(Identifier lhs, StringRef rhs) { return lhs.is(rhs); }
-inline bool operator!=(Identifier lhs, StringRef rhs) { return !lhs.is(rhs); }
-inline bool operator==(StringRef lhs, Identifier rhs) { return rhs.is(lhs); }
-inline bool operator!=(StringRef lhs, Identifier rhs) { return !rhs.is(lhs); }
-
-// Make identifiers hashable.
-inline llvm::hash_code hash_value(Identifier arg) {
-  return llvm::hash_value(arg.strref());
-}
-
-} // end namespace mlir
-
-namespace llvm {
-// Identifiers hash just like pointers, there is no need to hash the bytes.
-template <>
-struct DenseMapInfo<mlir::Identifier> {
-  static mlir::Identifier getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<const void *>::getEmptyKey();
-    return mlir::Identifier::getFromOpaquePointer(pointer);
-  }
-  static mlir::Identifier getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<const void *>::getTombstoneKey();
-    return mlir::Identifier::getFromOpaquePointer(pointer);
-  }
-  static unsigned getHashValue(mlir::Identifier Val) {
-    return DenseMapInfo<const void *>::getHashValue(Val.data());
-  }
-  static bool isEqual(mlir::Identifier LHS, mlir::Identifier RHS) {
-    return LHS == RHS;
-  }
-};
-
-/// The pointer inside of an identifier comes from a StringMap, so its alignment
-/// is always at least 4 and probably 8 (on 64-bit machines).  Allow LLVM to
-/// steal the low bits.
-template <>
-struct PointerLikeTypeTraits<mlir::Identifier> {
-public:
-  static inline void *getAsVoidPointer(mlir::Identifier I) {
-    return const_cast<void *>(I.getAsOpaquePointer());
-  }
-  static inline mlir::Identifier getFromVoidPointer(void *P) {
-    return mlir::Identifier::getFromOpaquePointer(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-
-} // end namespace llvm
-#endif
diff --git a/third_party/mlir/include/mlir/IR/IntegerSet.h b/third_party/mlir/include/mlir/IR/IntegerSet.h
deleted file mode 100644
index 8bcd1d2eddb..00000000000
--- a/third_party/mlir/include/mlir/IR/IntegerSet.h
+++ /dev/null
@@ -1,151 +0,0 @@
-//===- IntegerSet.h - MLIR Integer Set Class --------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Integer sets are sets of points from the integer lattice constrained by
-// affine equality/inequality constraints. This class is meant to represent
-// integer sets in the IR - for 'affine.if' operations and as attributes of
-// other operations. It is typically expected to contain only a handful of
-// affine constraints, and is immutable like an affine map. Integer sets are not
-// unique'd - although affine expressions that make up its equalities and
-// inequalities are themselves unique.
-
-// This class is not meant for affine analysis and operations like set
-// operations, emptiness checks, or other math operations for analysis and
-// transformation. For the latter, use FlatAffineConstraints.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_INTEGER_SET_H
-#define MLIR_IR_INTEGER_SET_H
-
-#include "mlir/IR/AffineExpr.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mlir {
-
-namespace detail {
-struct IntegerSetStorage;
-}
-
-class MLIRContext;
-
-/// An integer set representing a conjunction of one or more affine equalities
-/// and inequalities. An integer set in the IR is immutable like the affine map,
-/// but integer sets are not unique'd. The affine expressions that make up the
-/// equalities and inequalities of an integer set are themselves unique and are
-/// allocated by the bump pointer allocator.
-class IntegerSet {
-public:
-  using ImplType = detail::IntegerSetStorage;
-
-  IntegerSet() : set(nullptr) {}
-  explicit IntegerSet(ImplType *set) : set(set) {}
-  IntegerSet(const IntegerSet &other) : set(other.set) {}
-  IntegerSet &operator=(const IntegerSet &other) = default;
-
-  static IntegerSet get(unsigned dimCount, unsigned symbolCount,
-                        ArrayRef<AffineExpr> constraints,
-                        ArrayRef<bool> eqFlags);
-
-  // Returns the canonical empty IntegerSet (i.e. a set with no integer points).
-  static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols,
-                                MLIRContext *context) {
-    auto one = getAffineConstantExpr(1, context);
-    /* 1 == 0 */
-    return get(numDims, numSymbols, one, true);
-  }
-
-  /// Returns true if this is the canonical integer set.
-  bool isEmptyIntegerSet() const;
-
-  /// This method substitutes any uses of dimensions and symbols (e.g.
-  /// dim#0 with dimReplacements[0]) in subexpressions and returns the modified
-  /// integer set.  Because this can be used to eliminate dims and
-  /// symbols, the client needs to specify the number of dims and symbols in
-  /// the result.  The returned map always has the same number of results.
-  IntegerSet replaceDimsAndSymbols(ArrayRef<AffineExpr> dimReplacements,
-                                   ArrayRef<AffineExpr> symReplacements,
-                                   unsigned numResultDims,
-                                   unsigned numResultSyms);
-
-  explicit operator bool() { return set; }
-  bool operator==(IntegerSet other) const { return set == other.set; }
-
-  unsigned getNumDims() const;
-  unsigned getNumSymbols() const;
-  unsigned getNumInputs() const;
-  unsigned getNumConstraints() const;
-  unsigned getNumEqualities() const;
-  unsigned getNumInequalities() const;
-
-  ArrayRef<AffineExpr> getConstraints() const;
-
-  AffineExpr getConstraint(unsigned idx) const;
-
-  /// Returns the equality bits, which specify whether each of the constraints
-  /// is an equality or inequality.
-  ArrayRef<bool> getEqFlags() const;
-
-  /// Returns true if the idx^th constraint is an equality, false if it is an
-  /// inequality.
-  bool isEq(unsigned idx) const;
-
-  MLIRContext *getContext() const;
-
-  /// Walk all of the AffineExpr's in this set's constraints. Each node in an
-  /// expression tree is visited in postorder.
-  void walkExprs(llvm::function_ref<void(AffineExpr)> callback) const;
-
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-  friend ::llvm::hash_code hash_value(IntegerSet arg);
-
-private:
-  ImplType *set;
-  /// Sets with constraints fewer than kUniquingThreshold are uniqued.
-  constexpr static unsigned kUniquingThreshold = 4;
-};
-
-// Make AffineExpr hashable.
-inline ::llvm::hash_code hash_value(IntegerSet arg) {
-  return ::llvm::hash_value(arg.set);
-}
-
-} // end namespace mlir
-namespace llvm {
-
-// IntegerSet hash just like pointers
-template <> struct DenseMapInfo<mlir::IntegerSet> {
-  static mlir::IntegerSet getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::IntegerSet(static_cast<mlir::IntegerSet::ImplType *>(pointer));
-  }
-  static mlir::IntegerSet getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::IntegerSet(static_cast<mlir::IntegerSet::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::IntegerSet val) {
-    return mlir::hash_value(val);
-  }
-  static bool isEqual(mlir::IntegerSet LHS, mlir::IntegerSet RHS) {
-    return LHS == RHS;
-  }
-};
-
-} // namespace llvm
-#endif // MLIR_IR_INTEGER_SET_H
diff --git a/third_party/mlir/include/mlir/IR/Location.h b/third_party/mlir/include/mlir/IR/Location.h
deleted file mode 100644
index bb55ad69057..00000000000
--- a/third_party/mlir/include/mlir/IR/Location.h
+++ /dev/null
@@ -1,341 +0,0 @@
-//===- Location.h - MLIR Location Classes -----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// These classes provide the ability to relate MLIR objects back to source
-// location position information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_LOCATION_H
-#define MLIR_IR_LOCATION_H
-
-#include "mlir/IR/Attributes.h"
-
-namespace mlir {
-
-class Attribute;
-class MLIRContext;
-class Identifier;
-
-namespace detail {
-
-struct CallSiteLocationStorage;
-struct FileLineColLocationStorage;
-struct FusedLocationStorage;
-struct LocationStorage;
-struct NameLocationStorage;
-struct OpaqueLocationStorage;
-struct UnknownLocationStorage;
-
-} // namespace detail
-
-/// Location objects represent source locations information in MLIR.
-/// LocationAttr acts as the anchor for all Location based attributes.
-class LocationAttr : public Attribute {
-public:
-  using Attribute::Attribute;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool classof(Attribute attr) {
-    return attr.getKind() >= StandardAttributes::FIRST_LOCATION_ATTR &&
-           attr.getKind() <= StandardAttributes::LAST_LOCATION_ATTR;
-  }
-};
-
-/// This class defines the main interface for locations in MLIR and acts as a
-/// non-nullable wrapper around a LocationAttr.
-class Location {
-public:
-  Location(LocationAttr loc) : impl(loc) {
-    assert(loc && "location should never be null.");
-  }
-
-  /// Access the impl location attribute.
-  operator LocationAttr() const { return impl; }
-  LocationAttr *operator->() const { return const_cast<LocationAttr *>(&impl); }
-
-  /// Type casting utilities on the underlying location.
-  template <typename U> bool isa() const { return impl.isa<U>(); }
-  template <typename U> U dyn_cast() const { return impl.dyn_cast<U>(); }
-  template <typename U> U cast() const { return impl.cast<U>(); }
-
-  /// Comparison operators.
-  bool operator==(Location rhs) const { return impl == rhs.impl; }
-  bool operator!=(Location rhs) const { return !(*this == rhs); }
-
-  /// Print the location.
-  void print(raw_ostream &os) const { impl.print(os); }
-  void dump() const { impl.dump(); }
-
-  friend ::llvm::hash_code hash_value(Location arg);
-
-  /// Methods for supporting PointerLikeTypeTraits.
-  const void *getAsOpaquePointer() const { return impl.getAsOpaquePointer(); }
-  static Location getFromOpaquePointer(const void *pointer) {
-    return LocationAttr(reinterpret_cast<const AttributeStorage *>(pointer));
-  }
-
-protected:
-  /// The internal backing location attribute.
-  LocationAttr impl;
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, const Location &loc) {
-  loc.print(os);
-  return os;
-}
-
-/// Represents a location as call site. "callee" is the concrete location
-/// (Unknown/NameLocation/FileLineColLoc/OpaqueLoc) and "caller" points to the
-/// caller's location (another CallLocation or a concrete location). Multiple
-/// CallSiteLocs can be chained to form a call stack.
-class CallSiteLoc
-    : public Attribute::AttrBase<CallSiteLoc, LocationAttr,
-                                 detail::CallSiteLocationStorage> {
-public:
-  using Base::Base;
-
-  /// Return a uniqued call location object.
-  static Location get(Location callee, Location caller);
-
-  /// Return a call site location which represents a name reference in one line
-  /// or a stack of frames. The input frames are ordered from innermost to
-  /// outermost.
-  static Location get(Location name, ArrayRef<Location> frames);
-
-  /// The concrete location information this object presents.
-  Location getCallee() const;
-
-  /// The caller's location.
-  Location getCaller() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::CallSiteLocation;
-  }
-};
-
-/// Represents a location derived from a file/line/column location.  The column
-/// and line may be zero to represent unknown column and/or unknown line/column
-/// information.
-class FileLineColLoc
-    : public Attribute::AttrBase<FileLineColLoc, LocationAttr,
-                                 detail::FileLineColLocationStorage> {
-public:
-  using Base::Base;
-
-  /// Return a uniqued FileLineCol location object.
-  static Location get(Identifier filename, unsigned line, unsigned column,
-                      MLIRContext *context);
-  static Location get(StringRef filename, unsigned line, unsigned column,
-                      MLIRContext *context);
-
-  StringRef getFilename() const;
-
-  unsigned getLine() const;
-  unsigned getColumn() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::FileLineColLocation;
-  }
-};
-
-/// Represents a value composed of multiple source constructs, with an optional
-/// metadata attribute.
-class FusedLoc : public Attribute::AttrBase<FusedLoc, LocationAttr,
-                                            detail::FusedLocationStorage> {
-public:
-  using Base::Base;
-
-  /// Return a uniqued Fused Location object. The first location in the list
-  /// will get precedence during diagnostic emission, with the rest being
-  /// displayed as supplementary "fused from here" style notes.
-  static Location get(ArrayRef<Location> locs, Attribute metadata,
-                      MLIRContext *context);
-  static Location get(ArrayRef<Location> locs, MLIRContext *context) {
-    return get(locs, Attribute(), context);
-  }
-
-  ArrayRef<Location> getLocations() const;
-
-  /// Returns the optional metadata attached to this fused location. Given that
-  /// it is optional, the return value may be a null node.
-  Attribute getMetadata() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::FusedLocation;
-  }
-};
-
-/// Represents an identity name attached to a child location.
-class NameLoc : public Attribute::AttrBase<NameLoc, LocationAttr,
-                                           detail::NameLocationStorage> {
-public:
-  using Base::Base;
-
-  /// Return a uniqued name location object. The child location must not be
-  /// another NameLoc.
-  static Location get(Identifier name, Location child);
-
-  /// Return a uniqued name location object with an unknown child.
-  static Location get(Identifier name, MLIRContext *context);
-
-  /// Return the name identifier.
-  Identifier getName() const;
-
-  /// Return the child location.
-  Location getChildLoc() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::NameLocation;
-  }
-};
-
-/// Represents an unknown location.  This is always a singleton for a given
-/// MLIRContext.
-class UnknownLoc : public Attribute::AttrBase<UnknownLoc, LocationAttr> {
-public:
-  using Base::Base;
-
-  /// Get an instance of the UnknownLoc.
-  static Location get(MLIRContext *context);
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::UnknownLocation;
-  }
-};
-
-/// Represents a location that is external to MLIR. Contains a pointer to some
-/// data structure and an optional location that can be used if the first one is
-/// not suitable. Since it contains an external structure, only optional
-/// location is used during serialization.
-/// The class also provides a number of methods for making type-safe casts
-/// between a pointer to an object and opaque location.
-class OpaqueLoc : public Attribute::AttrBase<OpaqueLoc, LocationAttr,
-                                             detail::OpaqueLocationStorage> {
-public:
-  using Base::Base;
-
-  /// Returns an instance of opaque location which contains a given pointer to
-  /// an object. The corresponding MLIR location is set to UnknownLoc.
-  template <typename T>
-  static Location get(T underlyingLocation, MLIRContext *context) {
-    return get(reinterpret_cast<uintptr_t>(underlyingLocation),
-               ClassID::getID<T>(), UnknownLoc::get(context));
-  }
-
-  /// Returns an instance of opaque location which contains a given pointer to
-  /// an object and an additional MLIR location.
-  template <typename T>
-  static Location get(T underlyingLocation, Location fallbackLocation) {
-    return get(reinterpret_cast<uintptr_t>(underlyingLocation),
-               ClassID::getID<T>(), fallbackLocation);
-  }
-
-  /// Returns a pointer to some data structure that opaque location stores.
-  template <typename T> static T getUnderlyingLocation(Location location) {
-    assert(isa<T>(location));
-    return reinterpret_cast<T>(
-        location.cast<mlir::OpaqueLoc>().getUnderlyingLocation());
-  }
-
-  /// Returns a pointer to some data structure that opaque location stores.
-  /// Returns nullptr if provided location is not opaque location or if it
-  /// contains a pointer of different type.
-  template <typename T>
-  static T getUnderlyingLocationOrNull(Location location) {
-    return isa<T>(location)
-               ? reinterpret_cast<T>(
-                     location.cast<mlir::OpaqueLoc>().getUnderlyingLocation())
-               : T(nullptr);
-  }
-
-  /// Checks whether provided location is opaque location and contains a pointer
-  /// to an object of particular type.
-  template <typename T> static bool isa(Location location) {
-    auto opaque_loc = location.dyn_cast<OpaqueLoc>();
-    return opaque_loc && opaque_loc.getClassId() == ClassID::getID<T>();
-  }
-
-  /// Returns a pointer to the corresponding object.
-  uintptr_t getUnderlyingLocation() const;
-
-  /// Returns a ClassID* that represents the underlying objects c++ type.
-  ClassID *getClassId() const;
-
-  /// Returns a fallback location.
-  Location getFallbackLocation() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind == StandardAttributes::OpaqueLocation;
-  }
-
-private:
-  static Location get(uintptr_t underlyingLocation, ClassID *classID,
-                      Location fallbackLocation);
-};
-
-// Make Location hashable.
-inline ::llvm::hash_code hash_value(Location arg) {
-  return hash_value(arg.impl);
-}
-
-} // end namespace mlir
-
-namespace llvm {
-
-// Type hash just like pointers.
-template <> struct DenseMapInfo<mlir::Location> {
-  static mlir::Location getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::Location::getFromOpaquePointer(pointer);
-  }
-  static mlir::Location getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::Location::getFromOpaquePointer(pointer);
-  }
-  static unsigned getHashValue(mlir::Location val) {
-    return mlir::hash_value(val);
-  }
-  static bool isEqual(mlir::Location LHS, mlir::Location RHS) {
-    return LHS == RHS;
-  }
-};
-
-/// We align LocationStorage by 8, so allow LLVM to steal the low bits.
-template <> struct PointerLikeTypeTraits<mlir::Location> {
-public:
-  static inline void *getAsVoidPointer(mlir::Location I) {
-    return const_cast<void *>(I.getAsOpaquePointer());
-  }
-  static inline mlir::Location getFromVoidPointer(void *P) {
-    return mlir::Location::getFromOpaquePointer(P);
-  }
-  enum {
-    NumLowBitsAvailable =
-        PointerLikeTypeTraits<mlir::Attribute>::NumLowBitsAvailable
-  };
-};
-
-} // namespace llvm
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/MLIRContext.h b/third_party/mlir/include/mlir/IR/MLIRContext.h
deleted file mode 100644
index a93cb8b3353..00000000000
--- a/third_party/mlir/include/mlir/IR/MLIRContext.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- MLIRContext.h - MLIR Global Context Class ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_MLIRCONTEXT_H
-#define MLIR_IR_MLIRCONTEXT_H
-
-#include "mlir/Support/LLVM.h"
-#include <functional>
-#include <memory>
-#include <vector>
-
-namespace mlir {
-class AbstractOperation;
-class DiagnosticEngine;
-class Dialect;
-class InFlightDiagnostic;
-class Location;
-class MLIRContextImpl;
-class StorageUniquer;
-
-/// MLIRContext is the top-level object for a collection of MLIR modules.  It
-/// holds immortal uniqued objects like types, and the tables used to unique
-/// them.
-///
-/// MLIRContext gets a redundant "MLIR" prefix because otherwise it ends up with
-/// a very generic name ("Context") and because it is uncommon for clients to
-/// interact with it.
-///
-class MLIRContext {
-public:
-  explicit MLIRContext();
-  ~MLIRContext();
-
-  /// Return information about all registered IR dialects.
-  std::vector<Dialect *> getRegisteredDialects();
-
-  /// Get a registered IR dialect with the given namespace. If an exact match is
-  /// not found, then return nullptr.
-  Dialect *getRegisteredDialect(StringRef name);
-
-  /// Get a registered IR dialect for the given derived dialect type. The
-  /// derived type must provide a static 'getDialectNamespace' method.
-  template <typename T> T *getRegisteredDialect() {
-    return static_cast<T *>(getRegisteredDialect(T::getDialectNamespace()));
-  }
-
-  /// Return information about all registered operations.  This isn't very
-  /// efficient: typically you should ask the operations about their properties
-  /// directly.
-  std::vector<AbstractOperation *> getRegisteredOperations();
-
-  // This is effectively private given that only MLIRContext.cpp can see the
-  // MLIRContextImpl type.
-  MLIRContextImpl &getImpl() { return *impl; }
-
-  /// Returns the diagnostic engine for this context.
-  DiagnosticEngine &getDiagEngine();
-
-  /// Returns the storage uniquer used for creating affine constructs.
-  StorageUniquer &getAffineUniquer();
-
-  /// Returns the storage uniquer used for constructing type storage instances.
-  /// This should not be used directly.
-  StorageUniquer &getTypeUniquer();
-
-  /// Returns the storage uniquer used for constructing attribute storage
-  /// instances. This should not be used directly.
-  StorageUniquer &getAttributeUniquer();
-
-private:
-  const std::unique_ptr<MLIRContextImpl> impl;
-
-  MLIRContext(const MLIRContext &) = delete;
-  void operator=(const MLIRContext &) = delete;
-};
-} // end namespace mlir
-
-#endif // MLIR_IR_MLIRCONTEXT_H
diff --git a/third_party/mlir/include/mlir/IR/Matchers.h b/third_party/mlir/include/mlir/IR/Matchers.h
deleted file mode 100644
index 1261916dae2..00000000000
--- a/third_party/mlir/include/mlir/IR/Matchers.h
+++ /dev/null
@@ -1,270 +0,0 @@
-//===- Matchers.h - Various common matchers ---------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file provides a simple and efficient mechanism for performing general
-// tree-based pattern matching over MLIR. This mechanism is inspired by LLVM's
-// include/llvm/IR/PatternMatch.h.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_MATCHERS_H
-#define MLIR_MATCHERS_H
-
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/StandardTypes.h"
-
-namespace mlir {
-
-namespace detail {
-
-/// The matcher that matches a certain kind of Attribute and binds the value
-/// inside the Attribute.
-template <
-    typename AttrClass,
-    // Require AttrClass to be a derived class from Attribute and get its
-    // value type
-    typename ValueType =
-        typename std::enable_if<std::is_base_of<Attribute, AttrClass>::value,
-                                AttrClass>::type::ValueType,
-    // Require the ValueType is not void
-    typename = typename std::enable_if<!std::is_void<ValueType>::value>::type>
-struct attr_value_binder {
-  ValueType *bind_value;
-
-  /// Creates a matcher instance that binds the value to bv if match succeeds.
-  attr_value_binder(ValueType *bv) : bind_value(bv) {}
-
-  bool match(const Attribute &attr) {
-    if (auto intAttr = attr.dyn_cast<AttrClass>()) {
-      *bind_value = intAttr.getValue();
-      return true;
-    }
-    return false;
-  }
-};
-
-/// The matcher that matches a constant foldable operation that has no side
-/// effect, no operands and produces a single result.
-template <typename AttrT> struct constant_op_binder {
-  AttrT *bind_value;
-
-  /// Creates a matcher instance that binds the constant attribute value to
-  /// bind_value if match succeeds.
-  constant_op_binder(AttrT *bind_value) : bind_value(bind_value) {}
-
-  bool match(Operation *op) {
-    if (op->getNumOperands() > 0 || op->getNumResults() != 1)
-      return false;
-    if (!op->hasNoSideEffect())
-      return false;
-
-    SmallVector<OpFoldResult, 1> foldedOp;
-    if (succeeded(op->fold(/*operands=*/llvm::None, foldedOp))) {
-      if (auto attr = foldedOp.front().dyn_cast<Attribute>()) {
-        if ((*bind_value = attr.dyn_cast<AttrT>()))
-          return true;
-      }
-    }
-    return false;
-  }
-};
-
-/// The matcher that matches a constant scalar / vector splat / tensor splat
-/// integer operation and binds the constant integer value.
-struct constant_int_op_binder {
-  IntegerAttr::ValueType *bind_value;
-
-  /// Creates a matcher instance that binds the value to bv if match succeeds.
-  constant_int_op_binder(IntegerAttr::ValueType *bv) : bind_value(bv) {}
-
-  bool match(Operation *op) {
-    Attribute attr;
-    if (!constant_op_binder<Attribute>(&attr).match(op))
-      return false;
-    auto type = op->getResult(0)->getType();
-
-    if (type.isIntOrIndex()) {
-      return attr_value_binder<IntegerAttr>(bind_value).match(attr);
-    }
-    if (type.isa<VectorType>() || type.isa<RankedTensorType>()) {
-      if (auto splatAttr = attr.dyn_cast<SplatElementsAttr>()) {
-        return attr_value_binder<IntegerAttr>(bind_value)
-            .match(splatAttr.getSplatValue());
-      }
-    }
-    return false;
-  }
-};
-
-/// The matcher that matches a given target constant scalar / vector splat /
-/// tensor splat integer value.
-template <int64_t TargetValue> struct constant_int_value_matcher {
-  bool match(Operation *op) {
-    APInt value;
-    return constant_int_op_binder(&value).match(op) && TargetValue == value;
-  }
-};
-
-/// The matcher that matches anything except the given target constant scalar /
-/// vector splat / tensor splat integer value.
-template <int64_t TargetNotValue> struct constant_int_not_value_matcher {
-  bool match(Operation *op) {
-    APInt value;
-    return constant_int_op_binder(&value).match(op) && TargetNotValue != value;
-  }
-};
-
-/// The matcher that matches a certain kind of op.
-template <typename OpClass> struct op_matcher {
-  bool match(Operation *op) { return isa<OpClass>(op); }
-};
-
-/// Trait to check whether T provides a 'match' method with type
-/// `OperationOrValue`.
-template <typename T, typename OperationOrValue>
-using has_operation_or_value_matcher_t =
-    decltype(std::declval<T>().match(std::declval<OperationOrValue>()));
-
-/// Statically switch to a Value matcher.
-template <typename MatcherClass>
-typename std::enable_if_t<is_detected<detail::has_operation_or_value_matcher_t,
-                                      MatcherClass, Value *>::value,
-                          bool>
-matchOperandOrValueAtIndex(Operation *op, unsigned idx, MatcherClass &matcher) {
-  return matcher.match(op->getOperand(idx));
-}
-
-/// Statically switch to an Operation matcher.
-template <typename MatcherClass>
-typename std::enable_if_t<is_detected<detail::has_operation_or_value_matcher_t,
-                                      MatcherClass, Operation *>::value,
-                          bool>
-matchOperandOrValueAtIndex(Operation *op, unsigned idx, MatcherClass &matcher) {
-  if (auto defOp = op->getOperand(idx)->getDefiningOp())
-    return matcher.match(defOp);
-  return false;
-}
-
-/// Terminal matcher, always returns true.
-struct AnyValueMatcher {
-  bool match(Value *op) const { return true; }
-};
-
-/// Binds to a specific value and matches it.
-struct PatternMatcherValue {
-  PatternMatcherValue(Value *val) : value(val) {}
-  bool match(Value *val) const { return val == value; }
-  Value *value;
-};
-
-template <typename TupleT, class CallbackT, std::size_t... Is>
-constexpr void enumerateImpl(TupleT &&tuple, CallbackT &&callback,
-                             std::index_sequence<Is...>) {
-  (void)std::initializer_list<int>{
-      0,
-      (callback(std::integral_constant<std::size_t, Is>{}, std::get<Is>(tuple)),
-       0)...};
-}
-
-template <typename... Tys, typename CallbackT>
-constexpr void enumerate(std::tuple<Tys...> &tuple, CallbackT &&callback) {
-  detail::enumerateImpl(tuple, std::forward<CallbackT>(callback),
-                        std::make_index_sequence<sizeof...(Tys)>{});
-}
-
-/// RecursivePatternMatcher that composes.
-template <typename OpType, typename... OperandMatchers>
-struct RecursivePatternMatcher {
-  RecursivePatternMatcher(OperandMatchers... matchers)
-      : operandMatchers(matchers...) {}
-  bool match(Operation *op) {
-    if (!isa<OpType>(op) || op->getNumOperands() != sizeof...(OperandMatchers))
-      return false;
-    bool res = true;
-    enumerate(operandMatchers, [&](size_t index, auto &matcher) {
-      res &= matchOperandOrValueAtIndex(op, index, matcher);
-    });
-    return res;
-  }
-  std::tuple<OperandMatchers...> operandMatchers;
-};
-
-} // end namespace detail
-
-/// Matches a value from a constant foldable operation and writes the value to
-/// bind_value.
-template <typename AttrT>
-inline detail::constant_op_binder<AttrT> m_Constant(AttrT *bind_value) {
-  return detail::constant_op_binder<AttrT>(bind_value);
-}
-
-/// Matches a constant scalar / vector splat / tensor splat integer one.
-inline detail::constant_int_value_matcher<1> m_One() {
-  return detail::constant_int_value_matcher<1>();
-}
-
-/// Matches the given OpClass.
-template <typename OpClass> inline detail::op_matcher<OpClass> m_Op() {
-  return detail::op_matcher<OpClass>();
-}
-
-/// Matches a constant scalar / vector splat / tensor splat integer zero.
-inline detail::constant_int_value_matcher<0> m_Zero() {
-  return detail::constant_int_value_matcher<0>();
-}
-
-/// Matches a constant scalar / vector splat / tensor splat integer that is any
-/// non-zero value.
-inline detail::constant_int_not_value_matcher<0> m_NonZero() {
-  return detail::constant_int_not_value_matcher<0>();
-}
-
-/// Entry point for matching a pattern over a Value.
-template <typename Pattern>
-inline bool matchPattern(Value *value, const Pattern &pattern) {
-  // TODO: handle other cases
-  if (auto *op = value->getDefiningOp())
-    return const_cast<Pattern &>(pattern).match(op);
-  return false;
-}
-
-/// Entry point for matching a pattern over an Operation.
-template <typename Pattern>
-inline bool matchPattern(Operation *op, const Pattern &pattern) {
-  return const_cast<Pattern &>(pattern).match(op);
-}
-
-/// Matches a constant holding a scalar/vector/tensor integer (splat) and
-/// writes the integer value to bind_value.
-inline detail::constant_int_op_binder
-m_ConstantInt(IntegerAttr::ValueType *bind_value) {
-  return detail::constant_int_op_binder(bind_value);
-}
-
-template <typename OpType, typename... Matchers>
-auto m_Op(Matchers... matchers) {
-  return detail::RecursivePatternMatcher<OpType, Matchers...>(matchers...);
-}
-
-namespace matchers {
-inline auto m_Any() { return detail::AnyValueMatcher(); }
-inline auto m_Val(Value *v) { return detail::PatternMatcherValue(v); }
-} // namespace matchers
-
-} // end namespace mlir
-
-#endif // MLIR_MATCHERS_H
diff --git a/third_party/mlir/include/mlir/IR/Module.h b/third_party/mlir/include/mlir/IR/Module.h
deleted file mode 100644
index 1ff885d4b66..00000000000
--- a/third_party/mlir/include/mlir/IR/Module.h
+++ /dev/null
@@ -1,176 +0,0 @@
-//===- Module.h - MLIR Module Class -----------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Module is the top-level container for code in an MLIR program.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_MODULE_H
-#define MLIR_IR_MODULE_H
-
-#include "mlir/IR/SymbolTable.h"
-
-namespace mlir {
-class ModuleTerminatorOp;
-
-//===----------------------------------------------------------------------===//
-// Module Operation.
-//===----------------------------------------------------------------------===//
-
-/// ModuleOp represents a module, or an operation containing one region with a
-/// single block containing opaque operations. The region of a module is not
-/// allowed to implicitly capture global values, and all external references
-/// must use symbolic references via attributes(e.g. via a string name).
-class ModuleOp
-    : public Op<
-          ModuleOp, OpTrait::ZeroOperands, OpTrait::ZeroResult,
-          OpTrait::IsIsolatedFromAbove, OpTrait::SymbolTable,
-          OpTrait::SingleBlockImplicitTerminator<ModuleTerminatorOp>::Impl> {
-public:
-  using Op::Op;
-  using Op::print;
-
-  static StringRef getOperationName() { return "module"; }
-
-  static void build(Builder *builder, OperationState &result,
-                    Optional<StringRef> name = llvm::None);
-
-  /// Construct a module from the given location with an optional name.
-  static ModuleOp create(Location loc, Optional<StringRef> name = llvm::None);
-
-  /// Operation hooks.
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-
-  /// Return body of this module.
-  Region &getBodyRegion();
-  Block *getBody();
-
-  /// Return the name of this module if present.
-  Optional<StringRef> getName();
-
-  /// Print the this module in the custom top-level form.
-  void print(raw_ostream &os, OpPrintingFlags flags = llvm::None);
-  void dump();
-
-  //===--------------------------------------------------------------------===//
-  // Body Management.
-  //===--------------------------------------------------------------------===//
-
-  /// Iteration over the operations in the module.
-  using iterator = Block::iterator;
-
-  iterator begin() { return getBody()->begin(); }
-  iterator end() { return getBody()->end(); }
-  Operation &front() { return *begin(); }
-
-  /// This returns a range of operations of the given type 'T' held within the
-  /// module.
-  template <typename T> llvm::iterator_range<Block::op_iterator<T>> getOps() {
-    return getBody()->getOps<T>();
-  }
-
-  /// Insert the operation into the back of the body, before the terminator.
-  void push_back(Operation *op) {
-    insert(Block::iterator(getBody()->getTerminator()), op);
-  }
-
-  /// Insert the operation at the given insertion point. Note: The operation is
-  /// never inserted after the terminator, even if the insertion point is end().
-  void insert(Operation *insertPt, Operation *op) {
-    insert(Block::iterator(insertPt), op);
-  }
-  void insert(Block::iterator insertPt, Operation *op) {
-    auto *body = getBody();
-    if (insertPt == body->end())
-      insertPt = Block::iterator(body->getTerminator());
-    body->getOperations().insert(insertPt, op);
-  }
-};
-
-/// The ModuleTerminatorOp is a special terminator operation for the body of a
-/// ModuleOp, it has no semantic meaning beyond keeping the body of a ModuleOp
-/// well-formed.
-///
-/// This operation does _not_ have a custom syntax. However, ModuleOp will omit
-/// the terminator in their custom syntax for brevity.
-class ModuleTerminatorOp
-    : public Op<ModuleTerminatorOp, OpTrait::ZeroOperands, OpTrait::ZeroResult,
-                OpTrait::HasParent<ModuleOp>::Impl, OpTrait::IsTerminator> {
-public:
-  using Op::Op;
-  static StringRef getOperationName() { return "module_terminator"; }
-  static void build(Builder *, OperationState &) {}
-};
-
-/// This class acts as an owning reference to a module, and will automatically
-/// destroy the held module if valid.
-class OwningModuleRef {
-public:
-  OwningModuleRef(std::nullptr_t = nullptr) {}
-  OwningModuleRef(ModuleOp module) : module(module) {}
-  OwningModuleRef(OwningModuleRef &&other) : module(other.release()) {}
-  ~OwningModuleRef() {
-    if (module)
-      module.erase();
-  }
-
-  // Assign from another module reference.
-  OwningModuleRef &operator=(OwningModuleRef &&other) {
-    if (module)
-      module.erase();
-    module = other.release();
-    return *this;
-  }
-
-  /// Allow accessing the internal module.
-  ModuleOp get() const { return module; }
-  ModuleOp operator*() const { return module; }
-  ModuleOp *operator->() { return &module; }
-  explicit operator bool() const { return module; }
-
-  /// Release the referenced module.
-  ModuleOp release() {
-    ModuleOp released;
-    std::swap(released, module);
-    return released;
-  }
-
-private:
-  ModuleOp module;
-};
-
-} // end namespace mlir
-
-namespace llvm {
-
-/// Allow stealing the low bits of ModuleOp.
-template <> struct PointerLikeTypeTraits<mlir::ModuleOp> {
-public:
-  static inline void *getAsVoidPointer(mlir::ModuleOp I) {
-    return const_cast<void *>(I.getAsOpaquePointer());
-  }
-  static inline mlir::ModuleOp getFromVoidPointer(void *P) {
-    return mlir::ModuleOp::getFromOpaquePointer(P);
-  }
-  enum { NumLowBitsAvailable = 3 };
-};
-
-} // end namespace llvm
-
-#endif // MLIR_IR_MODULE_H
diff --git a/third_party/mlir/include/mlir/IR/OpAsmInterface.td b/third_party/mlir/include/mlir/IR/OpAsmInterface.td
deleted file mode 100644
index 85726a8c64d..00000000000
--- a/third_party/mlir/include/mlir/IR/OpAsmInterface.td
+++ /dev/null
@@ -1,63 +0,0 @@
-//===- OpAsmInterface.td - Asm Interfaces for opse ---------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains Interfaces for interacting with the AsmParser and
-// AsmPrinter.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_OPASMINTERFACE
-#define MLIR_OPASMINTERFACE
-
-include "mlir/IR/OpBase.td"
-
-/// Interface for hooking into the OpAsmPrinter and OpAsmParser.
-def OpAsmOpInterface : OpInterface<"OpAsmOpInterface"> {
-  let description = [{
-    This interface provides hooks to interact with the AsmPrinter and AsmParser
-    classes.
-  }];
-
-  let methods = [
-    InterfaceMethod<[{
-        Get a special name to use when printing the results of this operation.
-        The given callback is invoked with a specific result value that starts a
-        result "pack", and the name to give this result pack. To signal that a
-        result pack should use the default naming scheme, a None can be passed
-        in instead of the name.
-
-        For example, if you have an operation that has four results and you want
-        to split these into three distinct groups you could do the following:
-
-        ```c++
-          setNameFn(getResult(0), "first_result");
-          setNameFn(getResult(1), "middle_results");
-          setNameFn(getResult(3), ""); // use the default numbering.
-        ```
-
-        This would print the operation as follows:
-
-        ```mlir
-          %first_result, %middle_results:2, %0 = "my.op" ...
-        ```
-      }],
-      "void", "getAsmResultNames", (ins "OpAsmSetValueNameFn":$setNameFn)
-    >,
-  ];
-}
-
-#endif // MLIR_OPASMINTERFACE
diff --git a/third_party/mlir/include/mlir/IR/OpBase.td b/third_party/mlir/include/mlir/IR/OpBase.td
deleted file mode 100644
index dd7fac27a00..00000000000
--- a/third_party/mlir/include/mlir/IR/OpBase.td
+++ /dev/null
@@ -1,1870 +0,0 @@
-//===-- OpBase.td - Base op definition file ----------------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the base operation definition file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OP_BASE
-#define OP_BASE
-
-//===----------------------------------------------------------------------===//
-// Common utilities for defining TableGen mechanisms
-//===----------------------------------------------------------------------===//
-
-// A workaround for the inability to define functions in Tablegen.
-//
-// The template parameter defines a string that can be extracted from an
-// instance of this class by accessing the "result" member. Subclasses can take
-// their own template parameters as function "arguments" and use them to
-// populate result.
-// For example, if it didn't already exist, a concat function could be defined
-// like:
-//
-// class StrConcat<list<string> strings> :
-//     StrFunc<!foldl("", strings, prev, cur, prev # cur)>
-//
-// and then called like
-//
-// StrConcat<["a", "b", "c"]>.result
-//
-// to get the string "abc"
-class StrFunc<string r> {
-  string result = r;
-}
-
-// Concatenates a list of strings with a separator (default ", ")
-class StrJoin<list<string> strings, string sep = ", "> :
-    StrFunc<!if(!empty(strings), "",
-         !foldl(!head(strings), !tail(strings), prev, cur, prev # sep # cur))>;
-
-// Concatenates a list of integers into a string with a separator (default ", ")
-class StrJoinInt<list<int> integers, string sep = ", "> :
-    StrJoin<!foreach(i, integers, !cast<string>(i)), sep>;
-
-//===----------------------------------------------------------------------===//
-// Predicate definitions
-//===----------------------------------------------------------------------===//
-
-// Base class for logical predicates.
-//
-// Predicates are used to compose constraints (see next section for details).
-// There are two categories of predicates:
-//
-// 1. CPred: the primitive leaf predicate.
-// 2. Compound predicate: a predicate composed from child predicates using
-//    predicate combiners ("conjunction", "disjunction", "negation" or
-//    "substitution").
-class Pred;
-
-// A logical predicate wrapping any C expression.
-//
-// This is the basis for composing more complex predicates. It is the "atom"
-// predicate from the perspective of TableGen and the "interface" between
-// TableGen and C++. What is inside is already C++ code, which will be treated
-// as opaque strings with special placeholders to be substituted.
-//
-// ## Special placeholders
-//
-// Special placeholders can be used to refer to entities in the context where
-// this predicate is used. They serve as "hooks" to the enclosing environment.
-// The following special placeholders are supported in constraints for an op:
-//
-// * `$_builder` will be replaced by a mlir::Builder instance.
-// * `$_op` will be replaced by the current operation.
-// * `$_self` will be replaced with the entity this predicate is attached to.
-//   E.g., `BoolAttr` is an attribute constraint that wraps a
-//   `CPred<"$_self.isa<BoolAttr>()">` (see the following sections for details).
-//   Then for `F32:$attr`,`$_self` will be replaced by `$attr`.
-//   For type constraints, it's a little bit special since we want the
-//   constraints on each type definition reads naturally and we want to attach
-//   type constraints directly to an operand/result, $_self will be replaced
-//   by the operand/result's type. E.g., for `F32` in `F32:$operand`, its
-//   `$_self` will be expanded as `getOperand(...)->getType()`.
-class CPred<code pred> : Pred {
-  code predExpr = "(" # pred # ")";
-}
-
-// Kinds of predicate combiners.  These must closely match the predicates
-// implemented by the C++ backend (tblgen::PredCombinerKind).
-class PredCombinerKind;
-def PredCombinerAnd : PredCombinerKind;
-def PredCombinerOr : PredCombinerKind;
-def PredCombinerNot : PredCombinerKind;
-def PredCombinerSubstLeaves : PredCombinerKind;
-def PredCombinerConcat : PredCombinerKind;
-
-// A predicate that combines other predicates as defined by PredCombinerKind.
-// Instantiated below.
-class CombinedPred<PredCombinerKind k, list<Pred> c> : Pred {
-  PredCombinerKind kind = k;
-  list<Pred> children = c;
-}
-
-// Predicate combiners
-
-// A predicate that holds if all of its children hold.  Always holds for zero
-// children.
-class And<list<Pred> children> : CombinedPred<PredCombinerAnd, children>;
-
-// A predicate that holds if any of its children hold.  Never holds for zero
-// children.
-class Or<list<Pred> children> : CombinedPred<PredCombinerOr, children>;
-
-// A predicate that holds if its child does not.
-class Neg<Pred child> : CombinedPred<PredCombinerNot, [child]>;
-
-// A predicate that substitutes "pat" with "repl" in predicate calls of the
-// leaves of the predicate tree (i.e., not CombinedPred).
-//
-// This is plain string substitution without regular expressions or captures.
-// New predicates with more complex logical can be introduced should the need
-// arise.
-class SubstLeaves<string pat, string repl, Pred child>
-    : CombinedPred<PredCombinerSubstLeaves, [child]> {
-  string pattern = pat;
-  string replacement = repl;
-}
-
-// A predicate that prepends `pre` and appends `suf` to the final predicate
-// string composed from `child`. This is plain string concatenation and there
-// will be no substitution happening for `pre` and `suf`.
-class Concat<string pre, Pred child, string suf> :
-    CombinedPred<PredCombinerConcat, [child]> {
-  string prefix = pre;
-  string suffix = suf;
-}
-
-//===----------------------------------------------------------------------===//
-// Constraint definitions
-//===----------------------------------------------------------------------===//
-
-// TODO(b/130064155): Merge Constraints into Pred.
-
-// Base class for named constraints.
-//
-// An op's operands/attributes/results can have various requirements, e.g.,
-// having certain types, having values inside a certain range, and so on.
-// Besides, for a graph rewrite rule, the source pattern used to match against
-// the existing graph has conditions, like the op's operand must be of a more
-// constrained subtype, the attribute must have a certain value, and so on.
-//
-// These requirements and conditions are modeled using this class. Records of
-// this class are used to generate verification code in op verifier, and
-// matching code in pattern matcher.
-//
-// Constraints are predicates with descriptive names, to facilitate inspection,
-// provide nice error messages, etc.
-class Constraint<Pred pred, string desc = ""> {
-  // The predicates that this constraint requires.
-  Pred predicate = pred;
-  // User-readable description used in error reporting messages. If empty, a
-  // generic message will be used.
-  string description = desc;
-}
-
-// Subclasses used to differentiate different constraint kinds. These are used
-// as markers for the TableGen backend to handle different constraint kinds
-// differently if needed. Constraints not deriving from the following subclasses
-// are considered as uncategorized constraints.
-
-// Subclass for constraints on a type.
-class TypeConstraint<Pred predicate, string description = ""> :
-    Constraint<predicate, description>;
-
-// Subclass for constraints on an attribute.
-class AttrConstraint<Pred predicate, string description = ""> :
-    Constraint<predicate, description>;
-
-// Subclass for constraints on a region.
-class RegionConstraint<Pred predicate, string description = ""> :
-    Constraint<predicate, description>;
-
-// How to use these constraint categories:
-//
-// * Use TypeConstraint to specify
-//   * Constraints on an op's operand/result definition
-//   * Further constraints to match an op's operand/result in source pattern
-//
-// * Use Attr (a subclass for AttrConstraint) for
-//   * Constraints on an op's attribute definition
-// * Use AttrConstraint to specify
-//   * Further constraints to match an op's attribute in source pattern
-//
-// * Use uncategorized constraint to specify
-//   * Multi-entity constraints in rewrite rules
-
-//===----------------------------------------------------------------------===//
-// Common predicates
-//===----------------------------------------------------------------------===//
-
-// Whether a type is a VectorType.
-def IsVectorTypePred : CPred<"$_self.isa<VectorType>()">;
-
-// Whether a type is a TensorType.
-def IsTensorTypePred : CPred<"$_self.isa<TensorType>()">;
-
-// Whether a type is a MemRefType.
-def IsMemRefTypePred : CPred<"$_self.isa<MemRefType>()">;
-
-// Whether a type is an  IsUnrankedMemRefType
-def IsUnrankedMemRefTypePred : CPred<"$_self.isa<UnrankedMemRefType>()">;
-
-// Whether a type is a ShapedType.
-def IsShapedTypePred : CPred<"$_self.isa<ShapedType>()">;
-
-// For a ShapedType, verify that it has a static shape.
-def HasStaticShapePred : CPred<"$_self.cast<ShapedType>().hasStaticShape()">;
-
-// Whether a type is a TupleType.
-def IsTupleTypePred : CPred<"$_self.isa<TupleType>()">;
-
-//===----------------------------------------------------------------------===//
-// Dialect definitions
-//===----------------------------------------------------------------------===//
-
-class Dialect {
-  // The name of the dialect.
-  string name = ?;
-
-  // Short summary of the dialect.
-  string summary = ?;
-
-  // The description of the dialect.
-  string description = ?;
-
-  // The C++ namespace that ops of this dialect should be placed into.
-  //
-  // By default, uses the name of the dialect as the only namespace. To avoid
-  // placing in any namespace, use "". To specify nested namespaces, use "::"
-  // as the delimiter, e.g., given "A::B", ops will be placed in
-  // `namespace A { namespace B { <ops> } }`.
-  //
-  // Note that this works in conjunction with dialect C++ code. Depending on how
-  // the generated files are included into the dialect, you may want to specify
-  // a full namespace path or a partial one.
-  string cppNamespace = name;
-}
-
-//===----------------------------------------------------------------------===//
-// Type definitions
-//===----------------------------------------------------------------------===//
-
-// A type, carries type constraints.
-class Type<Pred condition, string descr = ""> :
-    TypeConstraint<condition, descr> {
-  string typeDescription = "";
-}
-
-// Allows providing an alternative name and description to an existing type def.
-class TypeAlias<Type t, string description = t.description> :
-    Type<t.predicate, description> {
-  let typeDescription = t.typeDescription;
-}
-
-// A type of a specific dialect.
-class DialectType<Dialect d, Pred condition, string descr = ""> :
-    Type<condition, descr> {
-  Dialect dialect = d;
-}
-
-// A variadic type constraint. It expands to zero or more of the base type. This
-// class is used for supporting variadic operands/results. An op can declare no
-// more than one variadic operand/result, and that operand/result must be the
-// last one in the operand/result list.
-class Variadic<Type type> : TypeConstraint<type.predicate, type.description> {
-  Type baseType = type;
-}
-
-// A type that can be constructed using MLIR::Builder.
-// Note that this does not "inherit" from Type because it would require
-// duplicating Type subclasses for buildable and non-buildable cases to avoid
-// diamond "inheritance".
-// TODO(zinenko): we may extend this to a more general 'Buildable' trait,
-// making some Types and some Attrs buildable.
-class BuildableType<code builder> {
-  // The builder call to invoke (if specified) to construct the BuildableType.
-  // Format: this will be affixed to the builder.
-  code builderCall = builder;
-}
-
-// Any type at all.
-def AnyType : Type<CPred<"true">, "any type">;
-
-// None type
-def NoneType : Type<CPred<"$_self.isa<NoneType>()">, "none type">;
-
-// Any type from the given list
-class AnyTypeOf<list<Type> allowedTypes, string description = ""> : Type<
-    // Satisfy any of the allowed type's condition
-    Or<!foreach(allowedtype, allowedTypes, allowedtype.predicate)>,
-    !if(!eq(description, ""),
-        StrJoin<!foreach(t, allowedTypes, t.description), " or ">.result,
-        description)>;
-
-// Integer types.
-// Any integer type irrespective of its width.
-def AnyInteger : Type<CPred<"$_self.isa<IntegerType>()">, "integer">;
-
-// Index type.
-def Index : Type<CPred<"$_self.isa<IndexType>()">, "index">;
-
-// Integer type of a specific width.
-class I<int width>
-    : Type<CPred<"$_self.isInteger(" # width # ")">,
-                  width # "-bit integer">,
-      BuildableType<"getIntegerType(" # width # ")"> {
-  int bitwidth = width;
-}
-
-class IntOfWidths<list<int> widths> :
-    AnyTypeOf<!foreach(w, widths, I<w>),
-              StrJoinInt<widths, "/">.result # "-bit integer">;
-
-def I1  : I<1>;
-def I8  : I<8>;
-def I16 : I<16>;
-def I32 : I<32>;
-def I64 : I<64>;
-
-// Floating point types.
-
-// Any float type irrespective of its width.
-def AnyFloat : Type<CPred<"$_self.isa<FloatType>()">, "floating-point">;
-
-// Float type of a specific width.
-class F<int width>
-    : Type<CPred<"$_self.isF" # width # "()">,
-                width # "-bit float">,
-      BuildableType<"getF" # width # "Type()"> {
-  int bitwidth = width;
-}
-
-class FloatOfWidths<list<int> widths> :
-    AnyTypeOf<!foreach(w, widths, F<w>),
-              StrJoinInt<widths, "/">.result # "-bit float">;
-
-def F16 : F<16>;
-def F32 : F<32>;
-def F64 : F<64>;
-
-def BF16 : Type<CPred<"$_self.isBF16()">, "bfloat16 type">,
-           BuildableType<"getBF16Type()">;
-
-class Complex<Type type>
-    : Type<And<[
-          CPred<"$_self.isa<ComplexType>()">,
-          SubstLeaves<"$_self", "$_self.cast<ComplexType>().getElementType()",
-           type.predicate>]>,
-           "complex type with " # type.description # " elements"> {
-  Type elementType = type;
-}
-
-def AnyComplex : Type<CPred<"$_self.isa<ComplexType>()">, "complex-type">;
-
-class OpaqueType<string dialect, string name, string description>
-  : Type<CPred<"isOpaqueTypeWithName($_self, \""#dialect#"\", \""#name#"\")">,
-         description>;
-
-// Function Type
-
-// Any function type.
-def FunctionType : Type<CPred<"$_self.isa<FunctionType>()">, "function type">;
-
-// A container type is a type that has another type embedded within it.
-class ContainerType<Type etype, Pred containerPred, code elementTypeCall,
-                    string descr> :
-    // First, check the container predicate.  Then, substitute the extracted
-    // element into the element type checker.
-    Type<And<[containerPred,
-                SubstLeaves<"$_self", !cast<string>(elementTypeCall),
-                etype.predicate>]>,
-         descr # " of " # etype.description # " values"> {
-  // The type of elements in the container.
-  Type elementType = etype;
-
-  // Call to retrieve.
-  code getElementTypeCall = elementTypeCall;
-}
-
-class ShapedContainerType<list<Type> allowedTypes, Pred containerPred, string descr> :
-    ContainerType<AnyTypeOf<allowedTypes>, containerPred,
-                  "$_self.cast<ShapedType>().getElementType()", descr>;
-
-// Whether a shaped type is ranked.
-def HasRankPred : CPred<"$_self.cast<ShapedType>().hasRank()">;
-
-// Whether a shaped type has one of the specified ranks.
-class HasAnyRankOfPred<list<int> ranks> : And<[
-    HasRankPred,
-    Or<!foreach(rank, ranks,
-                CPred<"$_self.cast<ShapedType>().getRank() == " # rank>)>]>;
-
-// Vector types.
-
-class VectorOf<list<Type> allowedTypes> :
-  ShapedContainerType<allowedTypes, IsVectorTypePred, "vector">;
-
-// Whether the number of elements of a vector is from the given
-// `allowedLengths` list
-class IsVectorOfLengthPred<list<int> allowedLengths> :
-  And<[IsVectorTypePred,
-       Or<!foreach(allowedlength, allowedLengths,
-                   CPred<[{$_self.cast<VectorType>().getNumElements()
-                           == }]
-                         # allowedlength>)>]>;
-
-// Any vector where the number of elements is from the given
-// `allowedLengths` list
-class VectorOfLength<list<int> allowedLengths> : Type<
-  IsVectorOfLengthPred<allowedLengths>,
-  " of length " # StrJoinInt<allowedLengths, "/">.result>;
-
-
-// Any vector where the number of elements is from the given
-// `allowedLengths` list and the type is from the given `allowedTypes`
-// list
-class VectorOfLengthAndType<list<int> allowedLengths,
-                            list<Type> allowedTypes> : Type<
-  And<[VectorOf<allowedTypes>.predicate,
-       VectorOfLength<allowedLengths>.predicate]>,
-  VectorOf<allowedTypes>.description #
-  VectorOfLength<allowedLengths>.description>;
-
-def AnyVector : VectorOf<[AnyType]>;
-
-// Tensor types.
-
-// Any tensor type whose element type is from the given `allowedTypes` list
-class TensorOf<list<Type> allowedTypes> :
-  ShapedContainerType<allowedTypes, IsTensorTypePred, "tensor">;
-
-def AnyTensor : TensorOf<[AnyType]>;
-
-def AnyRankedTensor :
-  ShapedContainerType<[AnyType], And<[IsTensorTypePred, HasRankPred]>,
-  "ranked tensor">;
-
-// TODO(b/130064155) Have an easy way to add another constraint to a type.
-class StaticShapeTensorOf<list<Type> allowedTypes>
-    : Type<And<[TensorOf<allowedTypes>.predicate, HasStaticShapePred]>,
-           "statically shaped " # TensorOf<allowedTypes>.description>;
-
-def AnyStaticShapeTensor : StaticShapeTensorOf<[AnyType]>;
-
-def I1Tensor   : TensorOf<[I1]>;
-def I8Tensor   : TensorOf<[I8]>;
-def I16Tensor  : TensorOf<[I16]>;
-def I32Tensor  : TensorOf<[I32]>;
-def I64Tensor  : TensorOf<[I64]>;
-
-def BF16Tensor : TensorOf<[BF16]>;
-def F16Tensor  : TensorOf<[F16]>;
-def F32Tensor  : TensorOf<[F32]>;
-def F64Tensor  : TensorOf<[F64]>;
-
-// Ranked tensor type with one of the specified types and ranks.
-class TensorRankOf<list<Type> allowedTypes, list<int> ranks> :
-    Type<And<[TensorOf<allowedTypes>.predicate, HasAnyRankOfPred<ranks>]>,
-         StrJoin<!foreach(rank, ranks, rank # "D"), "/">.result # " " #
-         TensorOf<allowedTypes>.description>;
-
-class 0DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [0]>;
-class 1DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [1]>;
-class 2DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [2]>;
-class 3DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [3]>;
-class 4DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [4]>;
-
-// Unranked Memref type
-def AnyUnrankedMemRef : 
-    ShapedContainerType<[AnyType], 
-                        IsUnrankedMemRefTypePred, "unranked.memref">;
-// Memref type.
-
-// Memrefs are blocks of data with fixed type and rank.
-class MemRefOf<list<Type> allowedTypes> :
-    ShapedContainerType<allowedTypes, IsMemRefTypePred, "memref">;
-
-def AnyMemRef : MemRefOf<[AnyType]>;
-
-def AnyRankedOrUnrankedMemRef: AnyTypeOf<[AnyUnrankedMemRef, AnyMemRef]>;
-
-// Memref declarations handle any memref, independent of rank, size, (static or
-// dynamic), layout, or memory space.
-def I1MemRef  : MemRefOf<[I1]>;
-def I8MemRef  : MemRefOf<[I8]>;
-def I16MemRef : MemRefOf<[I16]>;
-def I32MemRef : MemRefOf<[I32]>;
-def I64MemRef : MemRefOf<[I64]>;
-
-def BF16MemRef : MemRefOf<[BF16]>;
-def F16MemRef  : MemRefOf<[F16]>;
-def F32MemRef  : MemRefOf<[F32]>;
-def F64MemRef  : MemRefOf<[F64]>;
-
-// TODO(b/130064155) Have an easy way to add another constraint to a type.
-class MemRefRankOf<list<Type> allowedTypes, list<int> ranks> :
-    Type<And<[MemRefOf<allowedTypes>.predicate, HasAnyRankOfPred<ranks>]>,
-         StrJoin<!foreach(rank, ranks, rank # "D"), "/">.result # " " #
-         MemRefOf<allowedTypes>.description>;
-
-class StaticShapeMemRefOf<list<Type> allowedTypes>
-    : Type<And<[MemRefOf<allowedTypes>.predicate, HasStaticShapePred]>,
-           "statically shaped " # MemRefOf<allowedTypes>.description>;
-
-def AnyStaticShapeMemRef : StaticShapeMemRefOf<[AnyType]>;
-
-// For a MemRefType, verify that it has strides.
-def HasStridesPred : CPred<[{ isStrided($_self.cast<MemRefType>()) }]>;
-
-class StridedMemRefOf<list<Type> allowedTypes>
-    : Type<And<[MemRefOf<allowedTypes>.predicate, HasStridesPred]>,
-           "strided " # MemRefOf<allowedTypes>.description>;
-
-def AnyStridedMemRef : StridedMemRefOf<[AnyType]>;
-
-class AnyStridedMemRefOfRank<int rank> :
-  Type<And<[AnyStridedMemRef.predicate,
-            MemRefRankOf<[AnyType], [rank]>.predicate]>,
-       AnyStridedMemRef.description # " of rank " # rank>;
-
-// This represents a generic tuple without any constraints on element type.
-def AnyTuple : Type<IsTupleTypePred, "tuple">;
-
-// A container type that has other types embedded in it, but (unlike
-// ContainerType) can hold elements with a mix of types. Requires a call that
-// produces a list of all elements' types.
-class MixedContainerType<Type etype, Pred containerPred, code elementTypesCall,
-                         string descr> :
-    Type<
-        And<[
-            containerPred,
-            Concat<
-                "llvm::all_of(" # elementTypesCall # ", [](Type t) { return ",
-                SubstLeaves<"$_self", "t", etype.predicate>,
-                "; })"
-            >
-        ]>,
-        descr # " with any combination of " # etype.description # " values"> {
-  // The type of elements in the container.
-  Type elementType = etype;
-
-  // Call to retrieve.
-  code getElementTypesCall = elementTypesCall;
-}
-
-// A Tuple that holds a mix of elements of the allowed types.
-class TupleOf<list<Type> allowedTypes>
-    : MixedContainerType<AnyTypeOf<allowedTypes>, IsTupleTypePred,
-                         "$_self.cast<TupleType>().getTypes()", "tuple">;
-
-// A Tuple with arbitrary nesting, where all elements are a mix of the allowed
-// types.
-class NestedTupleOf<list<Type> allowedTypes> :
-    MixedContainerType<AnyTypeOf<allowedTypes>, IsTupleTypePred,
-                       "getFlattenedTypes($_self.cast<TupleType>())",
-                       "nested tuple">;
-
-//===----------------------------------------------------------------------===//
-// Common type constraints
-//===----------------------------------------------------------------------===//
-
-// Type constraint for bool-like types: bools, vectors of bools, tensors of
-// bools.
-def BoolLike : TypeConstraint<Or<[I1.predicate, VectorOf<[I1]>.predicate,
-                                  TensorOf<[I1]>.predicate]>,
-    "bool-like">;
-
-// Type constraint for integer-like types: integers, indices, vectors of
-// integers, tensors of integers.
-def IntegerLike : TypeConstraint<Or<[AnyInteger.predicate, Index.predicate,
-        VectorOf<[AnyInteger]>.predicate, TensorOf<[AnyInteger]>.predicate]>,
-    "integer-like">;
-
-// Type constraint for float-like types: floats, vectors or tensors thereof.
-def FloatLike : TypeConstraint<Or<[AnyFloat.predicate,
-        VectorOf<[AnyFloat]>.predicate, TensorOf<[AnyFloat]>.predicate]>,
-    "floating-point-like">;
-
-
-//===----------------------------------------------------------------------===//
-// Attribute definitions
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Base attribute definition
-
-// Base class for all attributes.
-class Attr<Pred condition, string descr = ""> :
-    AttrConstraint<condition, descr> {
-  code storageType = ?; // The backing mlir::Attribute type
-  code returnType = ?;  // The underlying C++ value type
-
-  // The call expression to convert from the storage type to the return
-  // type. For example, an enum can be stored as an int but returned as an
-  // enum class.
-  //
-  // Format: $_self will be expanded to the attribute.
-  //
-  // For example, `$_self.getValue().getSExtValue()` for `IntegerAttr val` will
-  // expand to `getAttrOfType<IntegerAttr>("val").getValue().getSExtValue()`.
-  code convertFromStorage = "$_self.getValue()";
-
-  // The call expression to build an attribute from a constant value.
-  //
-  // Format: $0 will be expanded to the constant value of the attribute.
-  //
-  // For example, `$_builder.getStringAttr("$0")` for `StringAttr:"foo"` will
-  // expand to `builder.getStringAttr("foo")`.
-  string constBuilderCall = ?;
-
-  // Default value for attribute.
-  // Requires a constBuilderCall defined.
-  string defaultValue = ?;
-
-  // Whether the attribute is optional. Typically requires a custom
-  // convertFromStorage method to handle the case where the attribute is
-  // not present.
-  bit isOptional = 0;
-
-  // What is the base-level Attr instantiation that this Attr is built upon.
-  // Unset means this is a base-level Attr.
-  //
-  // This field is used by attribute wrapper classes (DefaultValuedAttr,
-  // OptionalAttr, etc.) to retrieve the base-level attribute definition.
-  // This can be used for getting its name; otherwise, we will see
-  // "anonymous_<number>" as the attribute def name because of template
-  // instantiation.
-  // TOOD(b/132458159): deduplicate the fields in attribute wrapper classes.
-  Attr baseAttr = ?;
-}
-
-//===----------------------------------------------------------------------===//
-// Attribute modifier definition
-
-// Decorates an attribute to have an (unvalidated) default value if not present.
-class DefaultValuedAttr<Attr attr, string val> :
-    Attr<attr.predicate, attr.description> {
-  // Construct this attribute with the input attribute and change only
-  // the default value.
-  // Note: this has to be kept up to date with Attr above.
-  let storageType = attr.storageType;
-  let returnType = attr.returnType;
-  let convertFromStorage = attr.convertFromStorage;
-  let constBuilderCall = attr.constBuilderCall;
-  let defaultValue = val;
-
-  let baseAttr = attr;
-}
-
-// Decorates an attribute as optional. The return type of the generated
-// attribute accessor method will be Optional<>.
-class OptionalAttr<Attr attr> : Attr<attr.predicate, attr.description> {
-  // Rewrite the attribute to be optional.
-  // Note: this has to be kept up to date with Attr above.
-  let storageType = attr.storageType;
-  let returnType = "Optional<" # attr.returnType #">";
-  let convertFromStorage = "$_self ? " # returnType # "(" #
-                           attr.convertFromStorage # ") : (llvm::None)";
-  let isOptional = 1;
-
-  let baseAttr = attr;
-}
-
-//===----------------------------------------------------------------------===//
-// Primitive attribute kinds
-
-// A generic attribute that must be constructed around a specific type
-// `attrValType`. Backed by MLIR attribute kind `attrKind`.
-class TypedAttrBase<BuildableType attrValType, string attrKind,
-                    Pred condition, string descr> :
-    Attr<condition, descr> {
-  let constBuilderCall = "$_builder.get" # attrKind # "($_builder." #
-                         attrValType.builderCall # ", $0)";
-  let storageType = attrKind;
-}
-
-// Any attribute.
-def AnyAttr : Attr<CPred<"true">, "any attribute"> {
-  let storageType = "Attribute";
-  let returnType = "Attribute";
-  let convertFromStorage = "$_self";
-  let constBuilderCall = "$0";
-}
-
-def BoolAttr : Attr<CPred<"$_self.isa<BoolAttr>()">, "bool attribute"> {
-  let storageType = [{ BoolAttr }];
-  let returnType = [{ bool }];
-  let constBuilderCall = "$_builder.getBoolAttr($0)";
-}
-
-// Base class for integer attributes of fixed width.
-class IntegerAttrBase<I attrValType, string descr> :
-    TypedAttrBase<
-      attrValType, "IntegerAttr",
-      And<[CPred<"$_self.isa<IntegerAttr>()">,
-           CPred<"$_self.cast<IntegerAttr>().getType()."
-                 "isInteger(" # attrValType.bitwidth # ")">]>,
-      descr> {
-  let returnType = [{ APInt }];
-}
-
-def APIntAttr : Attr<CPred<"$_self.isa<IntegerAttr>()">,
-                     "arbitrary integer attribute"> {
-  let storageType = [{ IntegerAttr }];
-  let returnType = [{ APInt }];
-}
-
-def I1Attr  : IntegerAttrBase<I1,  "1-bit integer attribute">;
-def I8Attr  : IntegerAttrBase<I8,  "8-bit integer attribute">;
-def I16Attr : IntegerAttrBase<I16, "16-bit integer attribute">;
-def I32Attr : IntegerAttrBase<I32, "32-bit integer attribute">;
-def I64Attr : IntegerAttrBase<I64, "64-bit integer attribute">;
-
-class NonNegativeIntAttrBase<I attrValType, string descr> :
-    TypedAttrBase<
-      attrValType, "IntegerAttr",
-      And<[IntegerAttrBase<attrValType, "">.predicate,
-           CPred<"!$_self.cast<IntegerAttr>().getValue().isNegative()">]>,
-      descr> {
-  let returnType = [{ APInt }];
-}
-
-def NonNegativeI32Attr : NonNegativeIntAttrBase<
-    I32, "non-negative 32-bit integer attribute">;
-def NonNegativeI64Attr : NonNegativeIntAttrBase<
-    I64, "non-negative 64-bit integer attribute">;
-
-class PositiveIntAttrBase<I attrValType, string descr> :
-    TypedAttrBase<
-      attrValType, "IntegerAttr",
-      And<[IntegerAttrBase<attrValType, "">.predicate,
-           CPred<"$_self.cast<IntegerAttr>().getValue()"
-                 ".isStrictlyPositive()">]>,
-      descr> {
-  let returnType = [{ APInt }];
-}
-
-def PositiveI32Attr : PositiveIntAttrBase<
-    I32, "positive 32-bit integer attribute">;
-def PositiveI64Attr : PositiveIntAttrBase<
-    I64, "positive 64-bit integer attribute">;
-
-// Base class for float attributes of fixed width.
-class FloatAttrBase<F attrValType, string descr> :
-    TypedAttrBase<attrValType, "FloatAttr",
-              And<[CPred<"$_self.isa<FloatAttr>()">,
-                     CPred<"$_self.cast<FloatAttr>().getType().isF" #
-                           attrValType.bitwidth # "()">]>,
-              descr> {
-  let returnType = [{ APFloat }];
-}
-
-def F32Attr : FloatAttrBase<F32, "32-bit float attribute">;
-def F64Attr : FloatAttrBase<F64, "64-bit float attribute">;
-
-// An attribute backed by a string type.
-class StringBasedAttr<Pred condition, string descr> : Attr<condition, descr> {
-  let constBuilderCall = "$_builder.getStringAttr(\"$0\")";
-  let storageType = [{ StringAttr }];
-  let returnType = [{ StringRef }];
-}
-
-def StrAttr : StringBasedAttr<CPred<"$_self.isa<StringAttr>()">,
-                              "string attribute">;
-
-// Base class for attributes containing types. Example:
-//   def IntTypeAttr : TypeAttrBase<"IntegerType", "integer type attribute">
-// defines a type attribute containing an integer type.
-class TypeAttrBase<string retType, string description> :
-    Attr<And<[
-      CPred<"$_self.isa<TypeAttr>()">,
-      CPred<"$_self.cast<TypeAttr>().getValue().isa<" # retType # ">()">]>,
-    description> {
-  let storageType = [{ TypeAttr }];
-  let returnType = retType;
-  let convertFromStorage = "$_self.getValue().cast<" # retType # ">()";
-}
-
-def TypeAttr : TypeAttrBase<"Type", "any type attribute">;
-
-// The mere presence of unit attributes has a meaning.  Therefore, unit
-// attributes are always treated as optional and accessors to them return
-// "true" if the attribute is present and "false" otherwise.
-def UnitAttr : Attr<CPred<"$_self.isa<UnitAttr>()">, "unit attribute"> {
-  let storageType = [{ UnitAttr }];
-  let constBuilderCall = "$_builder.getUnitAttr()";
-  let convertFromStorage = "$_self != nullptr";
-  let returnType = "bool";
-  let isOptional = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Enum attribute kinds
-
-// Additional information for an enum attribute case.
-class EnumAttrCaseInfo<string sym, int val> {
-  // The C++ enumerant symbol
-  string symbol = sym;
-
-  // The C++ enumerant value
-  // If less than zero, there will be no explicit discriminator values assigned
-  // to enumerators in the generated enum class.
-  int value = val;
-}
-
-// An enum attribute case stored with StringAttr.
-class StrEnumAttrCase<string sym, int val = -1> :
-    EnumAttrCaseInfo<sym, val>,
-    StringBasedAttr<
-      CPred<"$_self.cast<StringAttr>().getValue() == \"" # sym # "\"">,
-      "case " # sym>;
-
-// An enum attribute case stored with IntegerAttr.
-class IntEnumAttrCaseBase<I intType, string sym, int val> :
-    EnumAttrCaseInfo<sym, val>,
-    IntegerAttrBase<intType, "case " # sym> {
-  let predicate =
-    CPred<"$_self.cast<IntegerAttr>().getInt() == " # val>;
-}
-
-class I32EnumAttrCase<string sym, int val> : IntEnumAttrCaseBase<I32, sym, val>;
-class I64EnumAttrCase<string sym, int val> : IntEnumAttrCaseBase<I64, sym, val>;
-
-// A bit enum case stored with 32-bit IntegerAttr. `val` here is *not* the
-// ordinal number of the bit that is set. It is the 32-bit integer with only
-// one bit set.
-class BitEnumAttrCase<string sym, int val> :
-    EnumAttrCaseInfo<sym, val>,
-    IntegerAttrBase<I32, "case " # sym> {
-  let predicate = CPred<
-    "$_self.cast<IntegerAttr>().getValue().getZExtValue() & " # val # "u">;
-}
-
-// Additional information for an enum attribute.
-class EnumAttrInfo<string name, list<EnumAttrCaseInfo> cases> {
-  // The C++ enum class name
-  string className = name;
-
-  // List of all accepted cases
-  list<EnumAttrCaseInfo> enumerants = cases;
-
-  // The following fields are only used by the EnumsGen backend to generate
-  // an enum class definition and conversion utility functions.
-
-  // The underlying type for the C++ enum class. An empty string mean the
-  // underlying type is not explicitly specified.
-  string underlyingType = "";
-
-  // The C++ namespaces that the enum class definition and utility functions
-  // should be placed into.
-  //
-  // Normally you want to place the full namespace path here. If it is nested,
-  // use "::" as the delimiter, e.g., given "A::B", generated code will be
-  // placed in `namespace A { namespace B { ... } }`. To avoid placing in any
-  // namespace, use "".
-  // TODO(b/134741431): use dialect to provide the namespace.
-  string cppNamespace = "";
-
-  // The name of the utility function that converts a value of the underlying
-  // type to the corresponding symbol. It will have the following signature:
-  //
-  // ```c++
-  // llvm::Optional<<qualified-enum-class-name>> <fn-name>(<underlying-type>);
-  // ```
-  string underlyingToSymbolFnName = "symbolize" # name;
-
-  // The name of the utility function that converts a string to the
-  // corresponding symbol. It will have the following signature:
-  //
-  // ```c++
-  // llvm::Optional<<qualified-enum-class-name>> <fn-name>(llvm::StringRef);
-  // ```
-  string stringToSymbolFnName = "symbolize" # name;
-
-  // The name of the utility function that converts a symbol to the
-  // corresponding string. It will have the following signature:
-  //
-  // ```c++
-  // <return-type> <fn-name>(<qualified-enum-class-name>);
-  // ```
-  string symbolToStringFnName = "stringify" # name;
-  string symbolToStringFnRetType = "llvm::StringRef";
-
-  // The name of the utility function that returns the max enum value used
-  // within the enum class. It will have the following signature:
-  //
-  // ```c++
-  // static constexpr unsigned <fn-name>();
-  // ```
-  string maxEnumValFnName = "getMaxEnumValFor" # name;
-}
-
-// An enum attribute backed by StringAttr.
-//
-// Op attributes of this kind are stored as StringAttr. Extra verification will
-// be generated on the string though: only the symbols of the allowed cases are
-// permitted as the string value.
-class StrEnumAttr<string name, string description,
-                  list<StrEnumAttrCase> cases> :
-    EnumAttrInfo<name, cases>,
-    StringBasedAttr<
-      And<[StrAttr.predicate, Or<!foreach(case, cases, case.predicate)>]>,
-      !if(!empty(description), "allowed string cases: " #
-          StrJoin<!foreach(case, cases, "'" # case.symbol # "'")>.result,
-          description)>;
-
-// An enum attribute backed by IntegerAttr.
-//
-// Op attributes of this kind are stored as IntegerAttr. Extra verification will
-// be generated on the integer though: only the values of the allowed cases are
-// permitted as the integer value.
-class IntEnumAttr<I intType, string name, string description,
-                  list<IntEnumAttrCaseBase> cases> :
-    EnumAttrInfo<name, cases>,
-    IntegerAttrBase<intType,
-      !if(!empty(description), "allowed " # intType.description # " cases: " #
-          StrJoinInt<!foreach(case, cases, case.value)>.result, description)> {
-  let predicate = And<[
-    IntegerAttrBase<intType, "">.predicate,
-    Or<!foreach(case, cases, case.predicate)>]>;
-}
-
-class I32EnumAttr<string name, string description,
-                  list<I32EnumAttrCase> cases> :
-    IntEnumAttr<I32, name, description, cases> {
-  let returnType = cppNamespace # "::" # name;
-  let underlyingType = "uint32_t";
-  let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
-  let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))";
-}
-class I64EnumAttr<string name, string description,
-                  list<I64EnumAttrCase> cases> :
-    IntEnumAttr<I64, name, description, cases> {
-  let returnType = cppNamespace # "::" # name;
-  let underlyingType = "uint64_t";
-  let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
-  let constBuilderCall = "$_builder.getI64IntegerAttr(static_cast<int64_t>($0))";
-}
-
-// A bit enum stored with 32-bit IntegerAttr.
-//
-// Op attributes of this kind are stored as IntegerAttr. Extra verification will
-// be generated on the integer to make sure only allowed bit are set. Besides,
-// helper methods are generated to parse a string separated with a specified
-// delimiter to a symbol and vice versa.
-class BitEnumAttr<string name, string description,
-                  list<BitEnumAttrCase> cases> :
-    EnumAttrInfo<name, cases>, IntegerAttrBase<I32, description> {
-  let predicate = And<[
-    IntegerAttrBase<I32, "">.predicate,
-    // Make sure we don't have unknown bit set.
-    CPred<"!($_self.cast<IntegerAttr>().getValue().getZExtValue() & (~(" #
-          StrJoin<!foreach(case, cases, case.value # "u"), "|">.result #
-          ")))">
-  ]>;
-
-  let returnType = cppNamespace # "::" # name;
-  let underlyingType = "uint32_t";
-  let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
-  let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))";
-
-  // We need to return a string because we may concatenate symbols for multiple
-  // bits together.
-  let symbolToStringFnRetType = "std::string";
-
-  // The delimiter used to separate bit enum cases in strings.
-  string separator = "|";
-}
-
-//===----------------------------------------------------------------------===//
-// Composite attribute kinds
-
-class DictionaryAttrBase : Attr<CPred<"$_self.isa<DictionaryAttr>()">,
-                          "dictionary of named attribute values"> {
-  let storageType = [{ DictionaryAttr }];
-  let returnType = [{ DictionaryAttr }];
-  let convertFromStorage = "$_self";
-}
-
-def DictionaryAttr : DictionaryAttrBase;
-
-class ElementsAttrBase<Pred condition, string description> :
-    Attr<condition, description> {
-  let storageType = [{ ElementsAttr }];
-  let returnType = [{ ElementsAttr }];
-  let convertFromStorage = "$_self";
-}
-
-def ElementsAttr : ElementsAttrBase<CPred<"$_self.isa<ElementsAttr>()">,
-                                   "constant vector/tensor attribute">;
-
-class IntElementsAttr<int width> : ElementsAttrBase<
-  CPred<"$_self.isa<DenseIntElementsAttr>() &&"
-      "$_self.cast<DenseIntElementsAttr>().getType()."
-      "getElementType().isInteger(" # width # ")">,
-  width # "-bit integer elements attribute"> {
-
-  let storageType = [{ DenseIntElementsAttr }];
-  let returnType = [{ DenseIntElementsAttr }];
-
-  // Note that this is only constructing scalar elements attribute.
-  let constBuilderCall = "DenseElementsAttr::get("
-    "RankedTensorType::get({}, $_builder.getIntegerType(" # width # ")), "
-    "llvm::makeArrayRef($0)).cast<DenseIntElementsAttr>()";
-  let convertFromStorage = "$_self";
-}
-
-def I32ElementsAttr : IntElementsAttr<32>;
-def I64ElementsAttr : IntElementsAttr<64>;
-
-class FloatElementsAttr<int width> : ElementsAttrBase<
-  CPred<"$_self.isa<DenseFPElementsAttr>() &&"
-      "$_self.cast<DenseElementsAttr>().getType()."
-      "getElementType().isF" # width # "()">,
-  width # "-bit float elements attribute"> {
-
-  let storageType = [{ DenseElementsAttr }];
-  let returnType = [{ DenseElementsAttr }];
-
-  // Note that this is only constructing scalar elements attribute.
-  let constBuilderCall = "DenseElementsAttr::get("
-    "RankedTensorType::get({}, $_builder.getF" # width # "Type()),"
-    "llvm::makeArrayRef($0))";
-  let convertFromStorage = "$_self";
-}
-
-def F64ElementsAttr : FloatElementsAttr<64>;
-
-// A `width`-bit floating point elements attribute. The attribute should be
-// ranked and has a shape as specified in `dims`.
-class RankedFloatElementsAttr<int width, list<int> dims> : ElementsAttrBase<
-  CPred<"$_self.isa<DenseFPElementsAttr>() &&"
-      "$_self.cast<DenseFPElementsAttr>().getType()."
-      "getElementType().isF" # width # "() && "
-      // Check that this is ranked and has the specified shape.
-      "$_self.cast<DenseFPElementsAttr>().getType().hasRank() && "
-      "$_self.cast<DenseFPElementsAttr>().getType().getShape() == "
-      "llvm::ArrayRef<int64_t>({" # StrJoinInt<dims>.result # "})">,
-  width # "-bit float elements attribute of shape [" #
-  StrJoinInt<dims>.result # "]"> {
-
-  let storageType = [{ DenseFPElementsAttr }];
-  let returnType = [{ DenseFPElementsAttr }];
-
-  let constBuilderCall = "DenseElementsAttr::get("
-    "RankedTensorType::get({" # StrJoinInt<dims>.result #
-    "}, $_builder.getF" # width # "Type()), "
-    "llvm::makeArrayRef($0)).cast<DenseFPElementsAttr>()";
-  let convertFromStorage = "$_self";
-}
-
-class RankedF32ElementsAttr<list<int> dims> : RankedFloatElementsAttr<32, dims>;
-class RankedF64ElementsAttr<list<int> dims> : RankedFloatElementsAttr<64, dims>;
-
-// Base class for array attributes.
-class ArrayAttrBase<Pred condition, string description> :
-    Attr<condition, description> {
-  let storageType = [{ ArrayAttr }];
-  let returnType = [{ ArrayAttr }];
-  let convertFromStorage = "$_self";
-}
-
-def ArrayAttr : ArrayAttrBase<CPred<"$_self.isa<ArrayAttr>()">,
-                              "array attribute">;
-
-// Base class for array attributes whose elements are of the same kind.
-// `element` specifies the element attribute kind stored in this array.
-class TypedArrayAttrBase<Attr element, string description>: ArrayAttrBase<
-    And<[
-      // Guarantee this is an ArrayAttr first
-      CPred<"$_self.isa<ArrayAttr>()">,
-      // Guarantee all elements satisfy the constraints from `element`
-      Concat<"llvm::all_of($_self.cast<ArrayAttr>(), "
-                          "[](Attribute attr) { return ",
-                             SubstLeaves<"$_self", "attr", element.predicate>,
-                          "; })">]>,
-    description> {
-  let constBuilderCall = "$_builder.getArrayAttr($0)";
-}
-
-def I32ArrayAttr : TypedArrayAttrBase<I32Attr,
-                                      "32-bit integer array attribute"> {
-  let constBuilderCall = "$_builder.getI32ArrayAttr($0)";
-}
-def I64ArrayAttr : TypedArrayAttrBase<I64Attr,
-                                      "64-bit integer array attribute"> {
-  let constBuilderCall = "$_builder.getI64ArrayAttr($0)";
-}
-def F32ArrayAttr : TypedArrayAttrBase<F32Attr, "32-bit float array attribute"> {
-  let constBuilderCall = "$_builder.getF32ArrayAttr($0)";
-}
-def F64ArrayAttr : TypedArrayAttrBase<F64Attr, "64-bit float array attribute"> {
-  let constBuilderCall = "$_builder.getF64ArrayAttr($0)";
-}
-def StrArrayAttr : TypedArrayAttrBase<StrAttr, "string array attribute"> {
-  let constBuilderCall = "$_builder.getStrArrayAttr($0)";
-}
-def TypeArrayAttr : TypedArrayAttrBase<TypeAttr, "type array attribute"> {
-  let constBuilderCall = ?;
-}
-
-// Attribute information for an Attribute field within a StructAttr.
-class StructFieldAttr<string thisName, Attr thisType> {
-  // Name of this field in the StructAttr.
-  string name = thisName;
-
-  // Attribute type wrapped by the struct attr.
-  Attr type = thisType;
-}
-
-// Structured attribute that wraps a DictionaryAttr and provides both a
-// validation method and set of accessors for a fixed set of fields. This is
-// useful when representing data that would normally be in a structure.
-class StructAttr<string name, Dialect dialect,
-                 list<StructFieldAttr> attributes> : DictionaryAttrBase {
-  // Name for this StructAttr.
-  string className = name;
-
-  // Return type should match the name of the structure.
-  let returnType = name;
-
-  // Storage type should match the name of the structure.
-  let storageType = name;
-
-  // The dialect this StructAttr belongs to.
-  Dialect structDialect = dialect;
-
-  // List of fields that the StructAttr contains.
-  list<StructFieldAttr> fields = attributes;
-}
-
-// Attributes containing symbol references.
-def SymbolRefAttr : Attr<CPred<"$_self.isa<SymbolRefAttr>()">,
-                        "symbol reference attribute"> {
-  let storageType = [{ SymbolRefAttr }];
-  let returnType = [{ SymbolRefAttr }];
-  let constBuilderCall = "$_builder.getSymbolRefAttr($0)";
-  let convertFromStorage = "$_self";
-}
-def FlatSymbolRefAttr : Attr<CPred<"$_self.isa<FlatSymbolRefAttr>()">,
-                                   "flat symbol reference attribute"> {
-  let storageType = [{ FlatSymbolRefAttr }];
-  let returnType = [{ StringRef }];
-  let constBuilderCall = "$_builder.getSymbolRefAttr($0)";
-  let convertFromStorage = "$_self.getValue()";
-}
-
-def SymbolRefArrayAttr :
-  TypedArrayAttrBase<SymbolRefAttr, "symbol ref array attribute"> {
-  let constBuilderCall = ?;
-}
-
-//===----------------------------------------------------------------------===//
-// Derive attribute kinds
-
-// DerivedAttr are attributes whose value is computed from properties
-// of the operation. They do not require additional storage and are
-// materialized as needed.
-class DerivedAttr<code ret, code b> : Attr<CPred<"true">, "derived attribute"> {
-  let returnType = ret;
-  code body = b;
-}
-
-// Derived attribute that returns a mlir::Type.
-class DerivedTypeAttr<code body> : DerivedAttr<"Type", body>;
-
-//===----------------------------------------------------------------------===//
-// Constant attribute kinds
-
-// Represents a constant attribute of specific Attr type. A constant
-// attribute can be specified only of attributes that have a constant
-// builder call defined. The constant value is specified as a string.
-//
-// If used as a constraint, it generates a matcher on a constant attribute by
-// using the constant value builder of the attribute and the value.
-class ConstantAttr<Attr attribute, string val> : AttrConstraint<
-    CPred<"$_self == " # !subst("$0", val, attribute.constBuilderCall)>,
-    "constant attribute " # val> {
-  Attr attr = attribute;
-  string value = val;
-}
-
-class ConstF32Attr<string val> : ConstantAttr<F32Attr, val>;
-def ConstBoolAttrFalse : ConstantAttr<BoolAttr, "false">;
-def ConstBoolAttrTrue : ConstantAttr<BoolAttr, "true">;
-def ConstUnitAttr : ConstantAttr<UnitAttr, "unit">;
-
-//===----------------------------------------------------------------------===//
-// Common attribute constraints
-//===----------------------------------------------------------------------===//
-
-// A general mechanism to further confine the given `attr` with all the
-// `constraints`. This allows to compose complex constraints out of a series
-// of more primitive ones.
-class Confined<Attr attr, list<AttrConstraint> constraints> : Attr<
-    And<!listconcat([attr.predicate],
-                      !foreach(pred, constraints, pred.predicate))>,
-    !foldl(/*init*/attr.description, /*list*/constraints,
-           prev, cur, prev # " " # cur.description)> {
-  let storageType = attr.storageType;
-  let returnType = attr.returnType;
-  let convertFromStorage = attr.convertFromStorage;
-  let constBuilderCall = attr.constBuilderCall;
-  let defaultValue = attr.defaultValue;
-  let isOptional = attr.isOptional;
-
-  let baseAttr = attr;
-}
-
-// An AttrConstraint that holds if all attr constraints specified in
-// 'constraints' hold.
-class AllAttrConstraintsOf<list<AttrConstraint> constraints> : AttrConstraint<
-    And<!listconcat([!head(constraints).predicate],
-                      !foreach(pred, !tail(constraints), pred.predicate))>,
-    !foldl(/*init*/!head(constraints).description, /*list*/!tail(constraints),
-           prev, cur, prev # " and " # cur.description)> {
-}
-
-class IntMinValue<int n> : AttrConstraint<
-    CPred<"$_self.cast<IntegerAttr>().getInt() >= " # n>,
-    "whose minimal value is " # n>;
-
-class ArrayMinCount<int n> : AttrConstraint<
-    CPred<"$_self.cast<ArrayAttr>().size() >= " # n>,
-    "with at least " # n # " elements">;
-
-class ArrayCount<int n> : AttrConstraint<
-    CPred<"$_self.cast<ArrayAttr>().size() == " #n>,
-    "with exactly " # n # " elements">;
-
-class IntArrayNthElemEq<int index, int value> : AttrConstraint<
-    And<[
-      CPred<"$_self.cast<ArrayAttr>().size() > " # index>,
-      CPred<"$_self.cast<ArrayAttr>().getValue()[" # index # "]"
-        ".cast<IntegerAttr>().getInt() == " # value>
-       ]>,
-    "whose " # index # "-th element must be " # value>;
-
-class IntArrayNthElemMinValue<int index, int min> : AttrConstraint<
-    And<[
-      CPred<"$_self.cast<ArrayAttr>().size() > " # index>,
-      CPred<"$_self.cast<ArrayAttr>().getValue()[" # index # "]"
-        ".cast<IntegerAttr>().getInt() >= " # min>
-        ]>,
-    "whose " # index # "-th element must be at least " # min>;
-
-def IsNullAttr : AttrConstraint<
-    CPred<"!$_self">, "empty attribute (for optional attributes)">;
-
-// An attribute constraint on FlatSymbolRefAttr that requires that the
-// reference point to an op of `opClass` within the closest parent with a symbol
-// table.
-// TODO(riverriddle) Add support for nested symbol references.
-class ReferToOp<string opClass> : AttrConstraint<
-    CPred<"isa_and_nonnull<" # opClass # ">("
-            "::mlir::SymbolTable::lookupNearestSymbolFrom("
-              "&$_op, $_self.cast<FlatSymbolRefAttr>().getValue()))">,
-    "referencing to a '" # opClass # "' symbol">;
-
-//===----------------------------------------------------------------------===//
-// Region definitions
-//===----------------------------------------------------------------------===//
-
-class Region<Pred condition, string descr = ""> :
-    RegionConstraint<condition, descr>;
-
-// Any region.
-def AnyRegion : Region<CPred<"true">, "any region">;
-
-// A region with the given number of blocks.
-class SizedRegion<int numBlocks> : Region<
-  CPred<"$_self.getBlocks().size() == " # numBlocks>,
-  "region with " # numBlocks # " blocks">;
-
-//===----------------------------------------------------------------------===//
-// OpTrait definitions
-//===----------------------------------------------------------------------===//
-
-// OpTrait represents a trait regarding an op.
-class OpTrait;
-
-// NativeOpTrait corresponds to the MLIR C++ OpTrait mechanism. The
-// purpose to wrap around C++ symbol string with this class is to make
-// traits specified for ops in TableGen less alien and more integrated.
-class NativeOpTrait<string prop> : OpTrait {
-  string trait = "OpTrait::" # prop;
-}
-
-// ParamNativeOpTrait corresponds to the template-parameterized traits in the
-// C++ implementation.  MLIR uses nested class templates to implement such
-// traits leading to constructs of the form "TraitName<Parameters>::Impl". Use
-// the value in `prop` as the trait name and the value in `params` as
-// parameters to construct the native trait class name.
-class ParamNativeOpTrait<string prop, string params>
-    : NativeOpTrait<prop # "<" # params # ">::Impl">;
-
-// GenInternalOpTrait is an op trait that does not have direct C++ mapping but
-// affects op definition generator internals, like how op builders and
-// operand/attribute/result getters are generated.
-class GenInternalOpTrait<string prop> : OpTrait {
-  string trait = "OpTrait::" # prop;
-}
-
-// PredOpTrait is an op trait implemented by way of a predicate on the op.
-class PredOpTrait<string descr, Pred pred> : OpTrait {
-  string description = descr;
-  Pred predicate = pred;
-}
-
-// Op supports operand broadcast behavior.
-def Broadcastable  : NativeOpTrait<"BroadcastableTwoOperandsOneResult">;
-// X op Y == Y op X
-def Commutative  : NativeOpTrait<"IsCommutative">;
-// Op behaves like a function.
-def FunctionLike : NativeOpTrait<"FunctionLike">;
-// Op is isolated from above.
-def IsolatedFromAbove : NativeOpTrait<"IsIsolatedFromAbove">;
-// Op results are float or vectors/tensors thereof.
-def ResultsAreFloatLike : NativeOpTrait<"ResultsAreFloatLike">;
-// Op has no side effect.
-def NoSideEffect : NativeOpTrait<"HasNoSideEffect">;
-// Op has the same operand type.
-def SameTypeOperands : NativeOpTrait<"SameTypeOperands">;
-// Op has same shape for all operands.
-def SameOperandsShape : NativeOpTrait<"SameOperandsShape">;
-// Op has same operand and result shape.
-def SameOperandsAndResultShape : NativeOpTrait<"SameOperandsAndResultShape">;
-// Op has the same operand and result type.
-def SameOperandsAndResultType : NativeOpTrait<"SameOperandsAndResultType">;
-// Op has the same element type (or type itself, if scalar) for all operands.
-def SameOperandsElementType : NativeOpTrait<"SameOperandsElementType">;
-// Op has the same operand and result element type (or type itself, if scalar).
-def SameOperandsAndResultElementType :
-  NativeOpTrait<"SameOperandsAndResultElementType">;
-// Op is a symbol.
-def Symbol : NativeOpTrait<"Symbol">;
-// Op defines a symbol table.
-def SymbolTable : NativeOpTrait<"SymbolTable">;
-// Op is a terminator.
-def Terminator : NativeOpTrait<"IsTerminator">;
-
-// Op's regions have a single block with the specified terminator.
-class SingleBlockImplicitTerminator<string op>
-    : ParamNativeOpTrait<"SingleBlockImplicitTerminator", op>;
-
-// Op's parent operation is the provided one.
-class HasParent<string op>
-    : ParamNativeOpTrait<"HasParent", op>;
-
-// Op result type is derived from the first attribute. If the attribute is an
-// subclass of `TypeAttrBase`, its value is used, otherwise, the type of the
-// attribute content is used.
-def FirstAttrDerivedResultType :
-  GenInternalOpTrait<"FirstAttrDerivedResultType">;
-
-// TODO(antiagainst): Turn the following into normal traits and generate
-// verification for them.
-
-// All variadic operands of the op have the same number of values.
-// A variadic operand contains an array of values whose array size is only
-// known at runtime. This trait requires all variadic operands of an op
-// to have the same array size.
-def SameVariadicOperandSize : GenInternalOpTrait<"SameVariadicOperandSize">;
-// All variadic results of the op have the same number of values.
-// A variadic result contains an array of values whose array size is only
-// known at runtime. This trait requires all variadic results of an op
-// to have the same array size.
-def SameVariadicResultSize : GenInternalOpTrait<"SameVariadicResultSize">;
-
-// Uses an attribute named `operand_segment_sizes` to specify how many actual
-// operand each ODS-declared operand (variadic or not) corresponds to.
-// This trait is used for ops that have multiple variadic operands but do
-// not know statically their size relationship. The attribute must be a 1D
-// vector that has the same number of elements as the number of ODS declared
-// operands. That means even if some operands are non-variadic, the attribute
-// still need to have an element for its size, which is always 1.
-def AttrSizedOperandSegments : NativeOpTrait<"AttrSizedOperandSegments">;
-// Similar to AttrSizedOperandSegments, but used for results. The attribute
-// should be named as `result_segment_sizes`.
-def AttrSizedResultSegments  : NativeOpTrait<"AttrSizedResultSegments">;
-
-//===----------------------------------------------------------------------===//
-// OpInterface definitions
-//===----------------------------------------------------------------------===//
-
-// Marker used to identify the argument list for an op or interface method.
-def ins;
-
-// OpInterfaceTrait corresponds to a specific 'OpInterface' class defined in
-// C++. The purpose to wrap around C++ symbol string with this class is to make
-// interfaces specified for ops in TableGen less alien and more integrated.
-class OpInterfaceTrait<string name> : NativeOpTrait<""> {
-  let trait = name # "::Trait";
-}
-
-// This class represents a single, optionally static, interface method.
-// Note: non-static interface methods have an implicit 'op' parameter
-// corresponding to an instance of the derived operation.
-class InterfaceMethod<string desc, string retTy, string methodName,
-                      dag args = (ins), code methodBody = [{}]> {
-  // A human-readable description of what this method does.
-  string description = desc;
-
-  // The name of the interface method.
-  string name = methodName;
-
-  // The c++ type-name of the return type.
-  string returnType = retTy;
-
-  // A dag of string that correspond to the arguments of the method.
-  dag arguments = args;
-
-  // An optional body to the method.
-  code body = methodBody;
-}
-
-// This class represents a single static interface method.
-class StaticInterfaceMethod<string desc, string retTy, string methodName,
-                            dag args = (ins), code methodBody = [{}]>
-    : InterfaceMethod<desc, retTy, methodName, args, methodBody>;
-
-// OpInterface represents an interface regarding an op.
-class OpInterface<string name> : OpInterfaceTrait<name> {
-  // A human-readable description of what this interface does.
-  string description = "";
-
-  // The name given to the c++ interface class.
-  string cppClassName = name;
-
-  // The list of methods defined by this interface.
-  list<InterfaceMethod> methods = [];
-}
-
-// Whether to declare the op interface methods in the op's header. This class
-// simply wraps an OpInterface but is used to indicate that the method
-// declarations should be generated.
-class DeclareOpInterfaceMethods<OpInterface interface> :
-  OpInterface<interface.cppClassName> {
-    let description = interface.description;
-    let cppClassName = interface.cppClassName;
-    let methods = interface.methods;
-}
-
-//===----------------------------------------------------------------------===//
-// Op definitions
-//===----------------------------------------------------------------------===//
-
-// Marker used to identify the result list for an op.
-def outs;
-
-// Marker used to identify the region list for an op.
-def region;
-
-// Class for defining a custom builder.
-//
-// TableGen generates several generic builders for each op by default (see
-// comment in the `Op` class). If the default generated ones cannot cover
-// some use case, custom builders can be defined using instances of this class.
-//
-// The signature of the builder is always
-//
-// ```c++
-// static void build(Builder *builder, OperationState &state,
-//                   <other-parameters>...) {
-//   <body>...
-// }
-// ```
-//
-// To define a custom builder, the parameter list (*including* the `Builder
-// *builder, OperationState &state` part) and body should be passed in
-// as separate template arguments to this class. This is because we generate
-// op declaration and definition into separate files. If an empty string is
-// passed in for `body`, then *only* the builder declaration will be
-// generated; this provides a way to define complicated builders entirely
-// in C++.
-class OpBuilder<string p, code b = ""> {
-  string params = p;
-  code body = b;
-}
-
-// Base class for all ops.
-class Op<Dialect dialect, string mnemonic, list<OpTrait> props = []> {
-  // The dialect of the op.
-  Dialect opDialect = dialect;
-
-  // The mnemonic of the op.
-  string opName = mnemonic;
-
-  // One-line human-readable description of what the op does.
-  string summary = "";
-
-  // Additional, longer human-readable description of what the op does.
-  string description = "";
-
-  // Dag containing the arguments of the op. Default to 0 arguments.
-  dag arguments = (ins);
-
-  // The list of results of the op. Default to 0 results.
-  dag results = (outs);
-
-  // The list of regions of the op. Default to 0 regions.
-  dag regions = (region);
-
-  // Attribute getters can be added to the op by adding an Attr member
-  // with the name and type of the attribute. E.g., adding int attribute
-  // with name "value" and type "i32":
-  //   I32Attr value;
-
-  // Define the hooks used for building, parsing, printing, verification.
-
-  // Custom builder.
-  // In addition to the custom builder provided here, and unless
-  // skipDefaultBuilders is set, two default builders are generated, with the
-  // following signatures:
-  //
-  // ```c++
-  // static void build(Builder *, OperationState &tblgen_state,
-  //                   Type <result0-name>, Type <result1-name>, ...,
-  //                   Value <arg0-name>, Value <arg1-name>, ...,
-  //                   Attribute <attr0-name>, Attribute <attr1-name>, ...);
-  // ```
-  // * where the attributes follow the same declaration order as in the op.
-  //
-  // ```c++
-  // static void build(Builder *, OperationState &tblgen_state,
-  //                   ArrayRef<Type> resultTypes,
-  //                   ArrayRef<Value> operands,
-  //                   ArrayRef<NamedAttribute> attributes);
-  // ```
-  list<OpBuilder> builders = ?;
-
-  // Avoid generating default build functions.  Custom builders must be
-  // provided.
-  bit skipDefaultBuilders = 0;
-
-  // Custom parser.
-  code parser = ?;
-
-  // Custom printer.
-  code printer = ?;
-
-  // Custom verifier.
-  code verifier = ?;
-
-  // Whether this op has associated canonicalization patterns.
-  // TODO(b/120163349): figure out a better way to write canonicalization
-  // patterns in TableGen rules directly instead of using this marker
-  // and C++ implementations.
-  bit hasCanonicalizer = 0;
-
-  // Whether this op has a folder.
-  bit hasFolder = 0;
-
-  // Op traits.
-  list<OpTrait> traits = props;
-
-  // Additional code that will be added to the public part of the generated
-  // C++ code of the op declaration.
-  code extraClassDeclaration = ?;
-}
-
-// The arguments of an op.
-class Arguments<dag args> {
-  dag arguments = args;
-}
-
-// The results of an op.
-class Results<dag rets> {
-  dag results = rets;
-}
-
-//===----------------------------------------------------------------------===//
-// Common value constraints
-//===----------------------------------------------------------------------===//
-
-def HasNoUseOf: Constraint<
-    CPred<"$_self->use_begin() == $_self->use_end()">, "has no use">;
-
-//===----------------------------------------------------------------------===//
-// Common op type constraints
-//===----------------------------------------------------------------------===//
-
-// These traits are for verifying properties of an op that require knowledge of
-// multiple arguments or results. For verifying properties of a single argument
-// or result, prefer operand type constraints.
-
-// These traits often require including "mlir/IR/TypeUtilities.h".
-
-// TODO(b/135033717): Improve the autogenerated error messages.
-
-class Rank<string name> :
-    StrFunc<"$" # name # ".getType().cast<ShapedType>().getRank()">;
-
-class Shape<string name> :
-    StrFunc<"$" # name # ".getType().cast<ShapedType>().getShape()">;
-
-class ElementCount<string name> :
-  StrFunc<"$" # name # ".getType().cast<ShapedType>().getNumElements()">;
-
-class ElementType<string name> : StrFunc<"getElementTypeOrSelf($" # name # ")">;
-
-class AllMatchPred<list<string> values> :
-    CPred<"llvm::is_splat(llvm::makeArrayRef({"# StrJoin<values>.result #"}))">;
-
-class AllMatch<list<string> values, string description> :
-    PredOpTrait<description, AllMatchPred<values>>;
-
-// TODO(b/135032064): Only works for non-variadic.
-class AllMatchSameOperatorPred<list<string> names, string operator> :
-    AllMatchPred<!foreach(n, names, !subst("$_self", "$" # n, operator))>;
-
-class AllMatchSameOperatorTrait<list<string> names, string operator,
-                                string description> :
-    PredOpTrait<
-        "all of {" # StrJoin<names>.result # "} have same " # description,
-        AllMatchSameOperatorPred<names, operator>>;
-
-class AllElementCountsMatch<list<string> names> :
-    AllMatchSameOperatorTrait<names, ElementCount<"_self">.result,
-                              "element count">;
-
-class AllElementTypesMatch<list<string> names> :
-    AllMatchSameOperatorTrait<names, ElementType<"_self">.result,
-                              "element type">;
-
-class AllRanksMatch<list<string> names> :
-    AllMatchSameOperatorTrait<names, Rank<"_self">.result, "rank">;
-
-class AllShapesMatch<list<string> names> :
-    AllMatchSameOperatorTrait<names, Shape<"_self">.result, "shape">;
-
-class AllTypesMatch<list<string> names> :
-    AllMatchSameOperatorTrait<names, "$_self.getType()", "type">;
-
-// Type Constraint operand `idx`'s Element type is `type`.
-class TCopVTEtIs<int idx, Type type> : And<[
-   CPred<"$_op.getNumOperands() > " # idx>,
-   SubstLeaves<"$_self", "$_op.getOperand(" # idx # ")->getType()",
-     IsShapedTypePred>,
-   SubstLeaves<"$_self", "getElementTypeOrSelf($_op.getOperand(" # idx # "))",
-     type.predicate>]>;
-
-// Predicate to verify that a named argument or result's element type matches a
-// given type.
-class TypeIsPred<string name, Type type> :
-   SubstLeaves<"$_self", "$" # name # ".getType()", type.predicate>;
-class TypeIs<string name, Type type> : PredOpTrait<
-  "'" # name # "' is " # type.description, TypeIsPred<name, type>>;
-
-// Predicate to verify that a named argument or result's element type matches a
-// given type.
-class ElementTypeIsPred<string name, Type type> : And<[
-   SubstLeaves<"$_self", "$" # name # ".getType()", IsShapedTypePred>,
-   SubstLeaves<"$_self", "getElementTypeOrSelf($" # name # ")",
-     type.predicate>]>;
-class ElementTypeIs<string name, Type type> : PredOpTrait<
-  "'" # name # "' is " # type.description, ElementTypeIsPred<name, type>>;
-
-// Predicate to verify that the i'th operand and the j'th operand have the same
-// elemental type.
-// Type Constraint operand `i`'s Element type is Same As operand `j`'s Element
-// type.
-class TCopVTEtIsSameAs<int i, int j> : And<[
-    CPred<"$_op.getNumOperands() > std::max(" # i # "u," # j # "u)">,
-    SubstLeaves<"$_self", "$_op.getOperand(" # i # ")->getType()",
-      IsShapedTypePred>,
-    SubstLeaves<"$_self", "$_op.getOperand(" # j # ")->getType()",
-      IsShapedTypePred>,
-    CPred<"mlir::getElementTypeOrSelf($_op.getOperand(" # i # ")) == "
-          "mlir::getElementTypeOrSelf($_op.getOperand(" # j # "))">]>;
-
-// Predicate to verify that the i'th result and the j'th operand exist and has
-// shaped types.
-class TCOpResIsShapedTypePred<int i, int j> : And<[
-    CPred<"$_op.getNumResults() > " # i>,
-    CPred<"$_op.getNumOperands() > " # j>,
-    SubstLeaves<"$_self", "$_op.getResult(" # i # ")->getType()",
-      IsShapedTypePred>,
-    SubstLeaves<"$_self", "$_op.getOperand(" # j # ")->getType()",
-      IsShapedTypePred>]>;
-
-// Predicate to verify that the i'th result and the j'th operand have the same
-// type.
-class TCresIsSameAsOpBase<int i, int j> :
-    CPred<"$_op.getResult(" # i # ")->getType() == "
-          "$_op.getOperand(" # j # ")->getType()">;
-
-// Basic Predicate to verify that the i'th result and the j'th operand have the
-// same elemental type.
-class TCresVTEtIsSameAsOpBase<int i, int j> :
-    CPred<"getElementTypeOrSelf($_op.getResult(" # i # ")) == "
-          "getElementTypeOrSelf($_op.getOperand(" # j # "))">;
-
-// Predicate to verify that the i'th result and the j'th operand have the same
-// elemental type.
-// Type Constraint result`i`'s Element type is Same As Operand `j`'s Element
-// type.
-class TCresVTEtIsSameAsOp<int i, int j> : And<[
-    TCOpResIsShapedTypePred<i, j>,
-    TCresVTEtIsSameAsOpBase<i, j>]>;
-
-// Predicate to verify that the opId'th operand can be broadcasted to the type
-// of the resId'th result.
-class TCOpIsBroadcastableToRes<int opId, int resId> : And<[
-    TCOpResIsShapedTypePred<opId, resId>,
-    CPred<"OpTrait::util::getBroadcastedType("
-              "$_op.getOperand(" # opId # ")->getType(), "
-              "$_op.getResult(" # resId # ")->getType())">]>;
-
-// Predicate to verify that all the operands at the given `indices`
-// have the same element type.
-// Type Constraint operands' Element type are all Same At the given `indices`.
-// We query the operands' types into a list and check they are all the same.
-// Precondition:
-// 1) all operands involved are of shaped type and
-// 2) the indices are not out of range.
-class TCopVTEtAreSameAt<list<int> indices> : CPred<
-  "llvm::is_splat(mlir::functional::map("
-    "[this](unsigned i) { return getElementTypeOrSelf(this->getOperand(i)); }, "
-    "llvm::ArrayRef<unsigned>({" # StrJoinInt<indices>.result # "})))">;
-
-//===----------------------------------------------------------------------===//
-// Pattern definitions
-//===----------------------------------------------------------------------===//
-
-// Marker used to identify the delta value added to the default benefit value.
-def addBenefit;
-
-// Base class for op+ -> op+ rewrite rules. These allow declaratively
-// specifying rewrite rules.
-//
-// A rewrite rule contains two components: a source pattern and one or more
-// result patterns. Each pattern is specified as a (recursive) DAG node (tree)
-// in the form of `(node arg0, arg1, ...)`.
-//
-// The `node` are normally MLIR ops, but it can also be one of the directives
-// listed later in this section.
-//
-// ## Symbol binding
-//
-// In the source pattern, `argN` can be used to specify matchers (e.g., using
-// type/attribute type constraints, etc.) and bound to a name for later use.
-// We can also bound names to op instances to reference them later in
-// multi-entity constraints.
-//
-// In the result pattern, `argN` can be used to refer to a previously bound
-// name, with potential transformations (e.g., using tAttr, etc.). `argN` can
-// itself be nested DAG node. We can also bound names to ops to reference
-// them later in other result patterns.
-//
-// For example,
-//
-// ```
-// def : Pattern<(OneResultOp1:$op1 $arg0, $arg1),
-//               [(OneResultOp2:$op2 $arg0, $arg1),
-//                (OneResultOp3 $op2 (OneResultOp4))],
-//               [(HasStaticShapePred $op1)]>;
-// ```
-//
-// `$argN` is bound to the `OneResultOp1`'s N-th argument and used later to
-// build `OneResultOp2`. `$op1` is bound to `OneResultOp1` and used to
-// check whether the result's shape is static. `$op2` is bound to
-// `OneResultOp2` and used to build `OneResultOp3`.
-//
-// ## Multi-result op
-//
-// To create multi-result ops in result pattern, you can use a syntax similar
-// to uni-result op, and it will act as a value pack for all results:
-//
-// ```
-// def : Pattern<(ThreeResultOp ...),
-//               [(TwoResultOp ...), (OneResultOp ...)]>;
-// ```
-//
-// Then `TwoResultOp` will replace the first two values of `ThreeResultOp`.
-//
-// You can also use `$<name>__N` to explicitly access the N-th result.
-// ```
-// def : Pattern<(FiveResultOp ...),
-//               [(TwoResultOp1:$res1__1 ...), (replaceWithValue $res1__0),
-//                (TwoResultOp2:$res2 ...), (replaceWithValue $res2__1)]>;
-// ```
-//
-// Then the values generated by `FiveResultOp` will be replaced by
-//
-// * `FiveResultOp`#0: `TwoResultOp1`#1
-// * `FiveResultOp`#1: `TwoResultOp1`#0
-// * `FiveResultOp`#2: `TwoResultOp2`#0
-// * `FiveResultOp`#3: `TwoResultOp2`#1
-// * `FiveResultOp`#4: `TwoResultOp2`#1
-class Pattern<dag source, list<dag> results, list<dag> preds = [],
-  dag benefitAdded = (addBenefit 0)> {
-  dag sourcePattern = source;
-  // Result patterns. Each result pattern is expected to replace one result
-  // of the root op in the source pattern. In the case of more result patterns
-  // than needed to replace the source op, only the last N results generated
-  // by the last N result pattern is used to replace a N-result source op.
-  // So that the beginning result patterns can be used to generate additional
-  // ops to aid building the results used for replacement.
-  list<dag> resultPatterns = results;
-  // Multi-entity constraints. Each constraint here involves multiple entities
-  // matched in source pattern and places further constraints on them as a
-  // whole.
-  list<dag> constraints = preds;
-  // The delta value added to the default benefit value. The default value is
-  // the number of ops in the source pattern. The rule with the highest final
-  // benefit value will be applied first if there are multiple rules matches.
-  // This delta value can be either positive or negative.
-  dag benefitDelta = benefitAdded;
-}
-
-// Form of a pattern which produces a single result.
-class Pat<dag pattern, dag result, list<dag> preds = [],
-  dag benefitAdded = (addBenefit 0)> :
-  Pattern<pattern, [result], preds, benefitAdded>;
-
-// Native code call wrapper. This allows invoking an arbitrary C++ expression
-// to create an op operand/attribute or replace an op result.
-//
-// ## Placeholders
-//
-// If used as a DAG leaf, i.e., `(... NativeCodeCall<"...">:$arg, ...)`,
-// the wrapped expression can take special placeholders listed below:
-//
-// * `$_builder` will be replaced by the current `mlir::PatternRewriter`.
-// * `$_self` will be replaced with the entity this transformer is attached to.
-//   E.g., with the definition `def transform : NativeCodeCall<"$_self...">`,
-//   `$_self` in `transform:$attr` will be replaced by the value for `$attr`.
-//
-// If used as a DAG node, i.e., `(NativeCodeCall<"..."> <arg0>, ..., <argN>)`,
-// then positional placeholders are also supported; placeholder `$N` in the
-// wrapped C++ expression will be replaced by `<argN>`.
-
-class NativeCodeCall<string expr> {
-  string expression = expr;
-}
-
-//===----------------------------------------------------------------------===//
-// Common directives
-//===----------------------------------------------------------------------===//
-
-// Directive used in result pattern to indicate that no new op are generated,
-// so to replace the matched DAG with an existing SSA value.
-def replaceWithValue;
-
-#endif // OP_BASE
diff --git a/third_party/mlir/include/mlir/IR/OpDefinition.h b/third_party/mlir/include/mlir/IR/OpDefinition.h
deleted file mode 100644
index 89ab0144d67..00000000000
--- a/third_party/mlir/include/mlir/IR/OpDefinition.h
+++ /dev/null
@@ -1,1235 +0,0 @@
-//===- OpDefinition.h - Classes for defining concrete Op types --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements helper classes for implementing the "Op" types.  This
-// includes the Op type, which is the base class for Op class definitions,
-// as well as number of traits in the OpTrait namespace that provide a
-// declarative way to specify properties of Ops.
-//
-// The purpose of these types are to allow light-weight implementation of
-// concrete ops (like DimOp) with very little boilerplate.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_OPDEFINITION_H
-#define MLIR_IR_OPDEFINITION_H
-
-#include "mlir/IR/Operation.h"
-#include <type_traits>
-
-namespace mlir {
-class Builder;
-
-namespace OpTrait {
-template <typename ConcreteType> class OneResult;
-}
-
-/// This class represents success/failure for operation parsing. It is
-/// essentially a simple wrapper class around LogicalResult that allows for
-/// explicit conversion to bool. This allows for the parser to chain together
-/// parse rules without the clutter of "failed/succeeded".
-class ParseResult : public LogicalResult {
-public:
-  ParseResult(LogicalResult result = success()) : LogicalResult(result) {}
-
-  // Allow diagnostics emitted during parsing to be converted to failure.
-  ParseResult(const InFlightDiagnostic &) : LogicalResult(failure()) {}
-  ParseResult(const Diagnostic &) : LogicalResult(failure()) {}
-
-  /// Failure is true in a boolean context.
-  explicit operator bool() const { return failed(*this); }
-};
-/// This class implements `Optional` functionality for ParseResult. We don't
-/// directly use llvm::Optional here, because it provides an implicit conversion
-/// to 'bool' which we want to avoid. This class is used to implement tri-state
-/// 'parseOptional' functions that may have a failure mode when parsing that
-/// shouldn't be attributed to "not present".
-class OptionalParseResult {
-public:
-  OptionalParseResult() = default;
-  OptionalParseResult(LogicalResult result) : impl(result) {}
-  OptionalParseResult(ParseResult result) : impl(result) {}
-  OptionalParseResult(const InFlightDiagnostic &)
-      : OptionalParseResult(failure()) {}
-  OptionalParseResult(llvm::NoneType) : impl(llvm::None) {}
-
-  /// Returns true if we contain a valid ParseResult value.
-  bool hasValue() const { return impl.hasValue(); }
-
-  /// Access the internal ParseResult value.
-  ParseResult getValue() const { return impl.getValue(); }
-  ParseResult operator*() const { return getValue(); }
-
-private:
-  Optional<ParseResult> impl;
-};
-
-// These functions are out-of-line utilities, which avoids them being template
-// instantiated/duplicated.
-namespace impl {
-/// Insert an operation, generated by `buildTerminatorOp`, at the end of the
-/// region's only block if it does not have a terminator already. If the region
-/// is empty, insert a new block first. `buildTerminatorOp` should return the
-/// terminator operation to insert.
-void ensureRegionTerminator(
-    Region &region, Location loc,
-    llvm::function_ref<Operation *()> buildTerminatorOp);
-/// Templated version that fills the generates the provided operation type.
-template <typename OpTy>
-void ensureRegionTerminator(Region &region, Builder &builder, Location loc) {
-  ensureRegionTerminator(region, loc, [&] {
-    OperationState state(loc, OpTy::getOperationName());
-    OpTy::build(&builder, state);
-    return Operation::create(state);
-  });
-}
-} // namespace impl
-
-/// This is the concrete base class that holds the operation pointer and has
-/// non-generic methods that only depend on State (to avoid having them
-/// instantiated on template types that don't affect them.
-///
-/// This also has the fallback implementations of customization hooks for when
-/// they aren't customized.
-class OpState {
-public:
-  /// Ops are pointer-like, so we allow implicit conversion to bool.
-  operator bool() { return getOperation() != nullptr; }
-
-  /// This implicitly converts to Operation*.
-  operator Operation *() const { return state; }
-
-  /// Return the operation that this refers to.
-  Operation *getOperation() { return state; }
-
-  /// Returns the closest surrounding operation that contains this operation
-  /// or nullptr if this is a top-level operation.
-  Operation *getParentOp() { return getOperation()->getParentOp(); }
-
-  /// Return the closest surrounding parent operation that is of type 'OpTy'.
-  template <typename OpTy> OpTy getParentOfType() {
-    return getOperation()->getParentOfType<OpTy>();
-  }
-
-  /// Return the context this operation belongs to.
-  MLIRContext *getContext() { return getOperation()->getContext(); }
-
-  /// Print the operation to the given stream.
-  void print(raw_ostream &os, OpPrintingFlags flags = llvm::None) {
-    state->print(os, flags);
-  }
-
-  /// Dump this operation.
-  void dump() { state->dump(); }
-
-  /// The source location the operation was defined or derived from.
-  Location getLoc() { return state->getLoc(); }
-  void setLoc(Location loc) { state->setLoc(loc); }
-
-  /// Return all of the attributes on this operation.
-  ArrayRef<NamedAttribute> getAttrs() { return state->getAttrs(); }
-
-  /// A utility iterator that filters out non-dialect attributes.
-  using dialect_attr_iterator = Operation::dialect_attr_iterator;
-  using dialect_attr_range = Operation::dialect_attr_range;
-
-  /// Return a range corresponding to the dialect attributes for this operation.
-  dialect_attr_range getDialectAttrs() { return state->getDialectAttrs(); }
-  dialect_attr_iterator dialect_attr_begin() {
-    return state->dialect_attr_begin();
-  }
-  dialect_attr_iterator dialect_attr_end() { return state->dialect_attr_end(); }
-
-  /// Return an attribute with the specified name.
-  Attribute getAttr(StringRef name) { return state->getAttr(name); }
-
-  /// If the operation has an attribute of the specified type, return it.
-  template <typename AttrClass> AttrClass getAttrOfType(StringRef name) {
-    return getAttr(name).dyn_cast_or_null<AttrClass>();
-  }
-
-  /// If the an attribute exists with the specified name, change it to the new
-  /// value.  Otherwise, add a new attribute with the specified name/value.
-  void setAttr(Identifier name, Attribute value) {
-    state->setAttr(name, value);
-  }
-  void setAttr(StringRef name, Attribute value) {
-    setAttr(Identifier::get(name, getContext()), value);
-  }
-
-  /// Set the attributes held by this operation.
-  void setAttrs(ArrayRef<NamedAttribute> attributes) {
-    state->setAttrs(attributes);
-  }
-  void setAttrs(NamedAttributeList newAttrs) { state->setAttrs(newAttrs); }
-
-  /// Set the dialect attributes for this operation, and preserve all dependent.
-  template <typename DialectAttrs> void setDialectAttrs(DialectAttrs &&attrs) {
-    state->setDialectAttrs(std::move(attrs));
-  }
-
-  /// Remove the attribute with the specified name if it exists.  The return
-  /// value indicates whether the attribute was present or not.
-  NamedAttributeList::RemoveResult removeAttr(Identifier name) {
-    return state->removeAttr(name);
-  }
-  NamedAttributeList::RemoveResult removeAttr(StringRef name) {
-    return state->removeAttr(Identifier::get(name, getContext()));
-  }
-
-  /// Return true if there are no users of any results of this operation.
-  bool use_empty() { return state->use_empty(); }
-
-  /// Remove this operation from its parent block and delete it.
-  void erase() { state->erase(); }
-
-  /// Emit an error with the op name prefixed, like "'dim' op " which is
-  /// convenient for verifiers.
-  InFlightDiagnostic emitOpError(const Twine &message = {});
-
-  /// Emit an error about fatal conditions with this operation, reporting up to
-  /// any diagnostic handlers that may be listening.
-  InFlightDiagnostic emitError(const Twine &message = {});
-
-  /// Emit a warning about this operation, reporting up to any diagnostic
-  /// handlers that may be listening.
-  InFlightDiagnostic emitWarning(const Twine &message = {});
-
-  /// Emit a remark about this operation, reporting up to any diagnostic
-  /// handlers that may be listening.
-  InFlightDiagnostic emitRemark(const Twine &message = {});
-
-  /// Walk the operation in postorder, calling the callback for each nested
-  /// operation(including this one).
-  /// See Operation::walk for more details.
-  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
-  RetT walk(FnT &&callback) {
-    return state->walk(std::forward<FnT>(callback));
-  }
-
-  // These are default implementations of customization hooks.
-public:
-  /// This hook returns any canonicalization pattern rewrites that the operation
-  /// supports, for use by the canonicalization pass.
-  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context) {}
-
-protected:
-  /// If the concrete type didn't implement a custom verifier hook, just fall
-  /// back to this one which accepts everything.
-  LogicalResult verify() { return success(); }
-
-  /// Unless overridden, the custom assembly form of an op is always rejected.
-  /// Op implementations should implement this to return failure.
-  /// On success, they should fill in result with the fields to use.
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-
-  // The fallback for the printer is to print it the generic assembly form.
-  void print(OpAsmPrinter &p);
-
-  /// Mutability management is handled by the OpWrapper/OpConstWrapper classes,
-  /// so we can cast it away here.
-  explicit OpState(Operation *state) : state(state) {}
-
-private:
-  Operation *state;
-};
-
-// Allow comparing operators.
-inline bool operator==(OpState lhs, OpState rhs) {
-  return lhs.getOperation() == rhs.getOperation();
-}
-inline bool operator!=(OpState lhs, OpState rhs) {
-  return lhs.getOperation() != rhs.getOperation();
-}
-
-/// This class represents a single result from folding an operation.
-class OpFoldResult : public llvm::PointerUnion<Attribute, Value *> {
-  using llvm::PointerUnion<Attribute, Value *>::PointerUnion;
-};
-
-/// This template defines the foldHook as used by AbstractOperation.
-///
-/// The default implementation uses a general fold method that can be defined on
-/// custom ops which can return multiple results.
-template <typename ConcreteType, bool isSingleResult, typename = void>
-class FoldingHook {
-public:
-  /// This is an implementation detail of the constant folder hook for
-  /// AbstractOperation.
-  static LogicalResult foldHook(Operation *op, ArrayRef<Attribute> operands,
-                                SmallVectorImpl<OpFoldResult> &results) {
-    return cast<ConcreteType>(op).fold(operands, results);
-  }
-
-  /// This hook implements a generalized folder for this operation.  Operations
-  /// can implement this to provide simplifications rules that are applied by
-  /// the Builder::createOrFold API and the canonicalization pass.
-  ///
-  /// This is an intentionally limited interface - implementations of this hook
-  /// can only perform the following changes to the operation:
-  ///
-  ///  1. They can leave the operation alone and without changing the IR, and
-  ///     return failure.
-  ///  2. They can mutate the operation in place, without changing anything else
-  ///     in the IR.  In this case, return success.
-  ///  3. They can return a list of existing values that can be used instead of
-  ///     the operation.  In this case, fill in the results list and return
-  ///     success.  The caller will remove the operation and use those results
-  ///     instead.
-  ///
-  /// This allows expression of some simple in-place canonicalizations (e.g.
-  /// "x+0 -> x", "min(x,y,x,z) -> min(x,y,z)", "x+y-x -> y", etc), as well as
-  /// generalized constant folding.
-  ///
-  /// If not overridden, this fallback implementation always fails to fold.
-  ///
-  LogicalResult fold(ArrayRef<Attribute> operands,
-                     SmallVectorImpl<OpFoldResult> &results) {
-    return failure();
-  }
-};
-
-/// This template specialization defines the foldHook as used by
-/// AbstractOperation for single-result operations.  This gives the hook a nicer
-/// signature that is easier to implement.
-template <typename ConcreteType, bool isSingleResult>
-class FoldingHook<ConcreteType, isSingleResult,
-                  typename std::enable_if<isSingleResult>::type> {
-public:
-  /// If the operation returns a single value, then the Op can be implicitly
-  /// converted to an Value*.  This yields the value of the only result.
-  operator Value *() {
-    return static_cast<ConcreteType *>(this)->getOperation()->getResult(0);
-  }
-
-  /// This is an implementation detail of the constant folder hook for
-  /// AbstractOperation.
-  static LogicalResult foldHook(Operation *op, ArrayRef<Attribute> operands,
-                                SmallVectorImpl<OpFoldResult> &results) {
-    auto result = cast<ConcreteType>(op).fold(operands);
-    if (!result)
-      return failure();
-
-    // Check if the operation was folded in place. In this case, the operation
-    // returns itself.
-    if (result.template dyn_cast<Value *>() != op->getResult(0))
-      results.push_back(result);
-    return success();
-  }
-
-  /// This hook implements a generalized folder for this operation.  Operations
-  /// can implement this to provide simplifications rules that are applied by
-  /// the Builder::createOrFold API and the canonicalization pass.
-  ///
-  /// This is an intentionally limited interface - implementations of this hook
-  /// can only perform the following changes to the operation:
-  ///
-  ///  1. They can leave the operation alone and without changing the IR, and
-  ///     return nullptr.
-  ///  2. They can mutate the operation in place, without changing anything else
-  ///     in the IR.  In this case, return the operation itself.
-  ///  3. They can return an existing SSA value that can be used instead of
-  ///     the operation.  In this case, return that value.  The caller will
-  ///     remove the operation and use that result instead.
-  ///
-  /// This allows expression of some simple in-place canonicalizations (e.g.
-  /// "x+0 -> x", "min(x,y,x,z) -> min(x,y,z)", "x+y-x -> y", etc), as well as
-  /// generalized constant folding.
-  ///
-  /// If not overridden, this fallback implementation always fails to fold.
-  ///
-  OpFoldResult fold(ArrayRef<Attribute> operands) { return {}; }
-};
-
-//===----------------------------------------------------------------------===//
-// Operation Trait Types
-//===----------------------------------------------------------------------===//
-
-namespace OpTrait {
-
-// These functions are out-of-line implementations of the methods in the
-// corresponding trait classes.  This avoids them being template
-// instantiated/duplicated.
-namespace impl {
-LogicalResult verifyZeroOperands(Operation *op);
-LogicalResult verifyOneOperand(Operation *op);
-LogicalResult verifyNOperands(Operation *op, unsigned numOperands);
-LogicalResult verifyAtLeastNOperands(Operation *op, unsigned numOperands);
-LogicalResult verifyOperandsAreFloatLike(Operation *op);
-LogicalResult verifyOperandsAreIntegerLike(Operation *op);
-LogicalResult verifySameTypeOperands(Operation *op);
-LogicalResult verifyZeroResult(Operation *op);
-LogicalResult verifyOneResult(Operation *op);
-LogicalResult verifyNResults(Operation *op, unsigned numOperands);
-LogicalResult verifyAtLeastNResults(Operation *op, unsigned numOperands);
-LogicalResult verifySameOperandsShape(Operation *op);
-LogicalResult verifySameOperandsAndResultShape(Operation *op);
-LogicalResult verifySameOperandsElementType(Operation *op);
-LogicalResult verifySameOperandsAndResultElementType(Operation *op);
-LogicalResult verifySameOperandsAndResultType(Operation *op);
-LogicalResult verifyResultsAreBoolLike(Operation *op);
-LogicalResult verifyResultsAreFloatLike(Operation *op);
-LogicalResult verifyResultsAreIntegerLike(Operation *op);
-LogicalResult verifyIsTerminator(Operation *op);
-LogicalResult verifyOperandSizeAttr(Operation *op, StringRef sizeAttrName);
-LogicalResult verifyResultSizeAttr(Operation *op, StringRef sizeAttrName);
-} // namespace impl
-
-/// Helper class for implementing traits.  Clients are not expected to interact
-/// with this directly, so its members are all protected.
-template <typename ConcreteType, template <typename> class TraitType>
-class TraitBase {
-protected:
-  /// Return the ultimate Operation being worked on.
-  Operation *getOperation() {
-    // We have to cast up to the trait type, then to the concrete type, then to
-    // the BaseState class in explicit hops because the concrete type will
-    // multiply derive from the (content free) TraitBase class, and we need to
-    // be able to disambiguate the path for the C++ compiler.
-    auto *trait = static_cast<TraitType<ConcreteType> *>(this);
-    auto *concrete = static_cast<ConcreteType *>(trait);
-    auto *base = static_cast<OpState *>(concrete);
-    return base->getOperation();
-  }
-
-  /// Provide default implementations of trait hooks.  This allows traits to
-  /// provide exactly the overrides they care about.
-  static LogicalResult verifyTrait(Operation *op) { return success(); }
-  static AbstractOperation::OperationProperties getTraitProperties() {
-    return 0;
-  }
-};
-
-namespace detail {
-/// Utility trait base that provides accessors for derived traits that have
-/// multiple operands.
-template <typename ConcreteType, template <typename> class TraitType>
-struct MultiOperandTraitBase : public TraitBase<ConcreteType, TraitType> {
-  using operand_iterator = Operation::operand_iterator;
-  using operand_range = Operation::operand_range;
-  using operand_type_iterator = Operation::operand_type_iterator;
-  using operand_type_range = Operation::operand_type_range;
-
-  /// Return the number of operands.
-  unsigned getNumOperands() { return this->getOperation()->getNumOperands(); }
-
-  /// Return the operand at index 'i'.
-  Value *getOperand(unsigned i) { return this->getOperation()->getOperand(i); }
-
-  /// Set the operand at index 'i' to 'value'.
-  void setOperand(unsigned i, Value *value) {
-    this->getOperation()->setOperand(i, value);
-  }
-
-  /// Operand iterator access.
-  operand_iterator operand_begin() {
-    return this->getOperation()->operand_begin();
-  }
-  operand_iterator operand_end() { return this->getOperation()->operand_end(); }
-  operand_range getOperands() { return this->getOperation()->getOperands(); }
-
-  /// Operand type access.
-  operand_type_iterator operand_type_begin() {
-    return this->getOperation()->operand_type_begin();
-  }
-  operand_type_iterator operand_type_end() {
-    return this->getOperation()->operand_type_end();
-  }
-  operand_type_range getOperandTypes() {
-    return this->getOperation()->getOperandTypes();
-  }
-};
-} // end namespace detail
-
-/// This class provides the API for ops that are known to have no
-/// SSA operand.
-template <typename ConcreteType>
-class ZeroOperands : public TraitBase<ConcreteType, ZeroOperands> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyZeroOperands(op);
-  }
-
-private:
-  // Disable these.
-  void getOperand() {}
-  void setOperand() {}
-};
-
-/// This class provides the API for ops that are known to have exactly one
-/// SSA operand.
-template <typename ConcreteType>
-class OneOperand : public TraitBase<ConcreteType, OneOperand> {
-public:
-  Value *getOperand() { return this->getOperation()->getOperand(0); }
-
-  void setOperand(Value *value) { this->getOperation()->setOperand(0, value); }
-
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyOneOperand(op);
-  }
-};
-
-/// This class provides the API for ops that are known to have a specified
-/// number of operands.  This is used as a trait like this:
-///
-///   class FooOp : public Op<FooOp, OpTrait::NOperands<2>::Impl> {
-///
-template <unsigned N> class NOperands {
-public:
-  static_assert(N > 1, "use ZeroOperands/OneOperand for N < 2");
-
-  template <typename ConcreteType>
-  class Impl
-      : public detail::MultiOperandTraitBase<ConcreteType, NOperands<N>::Impl> {
-  public:
-    static LogicalResult verifyTrait(Operation *op) {
-      return impl::verifyNOperands(op, N);
-    }
-  };
-};
-
-/// This class provides the API for ops that are known to have a at least a
-/// specified number of operands.  This is used as a trait like this:
-///
-///   class FooOp : public Op<FooOp, OpTrait::AtLeastNOperands<2>::Impl> {
-///
-template <unsigned N> class AtLeastNOperands {
-public:
-  template <typename ConcreteType>
-  class Impl : public detail::MultiOperandTraitBase<ConcreteType,
-                                                    AtLeastNOperands<N>::Impl> {
-  public:
-    static LogicalResult verifyTrait(Operation *op) {
-      return impl::verifyAtLeastNOperands(op, N);
-    }
-  };
-};
-
-/// This class provides the API for ops which have an unknown number of
-/// SSA operands.
-template <typename ConcreteType>
-class VariadicOperands
-    : public detail::MultiOperandTraitBase<ConcreteType, VariadicOperands> {};
-
-/// This class provides return value APIs for ops that are known to have
-/// zero results.
-template <typename ConcreteType>
-class ZeroResult : public TraitBase<ConcreteType, ZeroResult> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyZeroResult(op);
-  }
-};
-
-namespace detail {
-/// Utility trait base that provides accessors for derived traits that have
-/// multiple results.
-template <typename ConcreteType, template <typename> class TraitType>
-struct MultiResultTraitBase : public TraitBase<ConcreteType, TraitType> {
-  using result_iterator = Operation::result_iterator;
-  using result_range = Operation::result_range;
-  using result_type_iterator = Operation::result_type_iterator;
-  using result_type_range = Operation::result_type_range;
-
-  /// Return the number of results.
-  unsigned getNumResults() { return this->getOperation()->getNumResults(); }
-
-  /// Return the result at index 'i'.
-  Value *getResult(unsigned i) { return this->getOperation()->getResult(i); }
-
-  /// Replace all uses of results of this operation with the provided 'values'.
-  /// 'values' may correspond to an existing operation, or a range of 'Value'.
-  template <typename ValuesT> void replaceAllUsesWith(ValuesT &&values) {
-    this->getOperation()->replaceAllUsesWith(std::forward<ValuesT>(values));
-  }
-
-  /// Return the type of the `i`-th result.
-  Type getType(unsigned i) { return getResult(i)->getType(); }
-
-  /// Result iterator access.
-  result_iterator result_begin() {
-    return this->getOperation()->result_begin();
-  }
-  result_iterator result_end() { return this->getOperation()->result_end(); }
-  result_range getResults() { return this->getOperation()->getResults(); }
-
-  /// Result type access.
-  result_type_iterator result_type_begin() {
-    return this->getOperation()->result_type_begin();
-  }
-  result_type_iterator result_type_end() {
-    return this->getOperation()->result_type_end();
-  }
-  result_type_range getResultTypes() {
-    return this->getOperation()->getResultTypes();
-  }
-};
-} // end namespace detail
-
-/// This class provides return value APIs for ops that are known to have a
-/// single result.
-template <typename ConcreteType>
-class OneResult : public TraitBase<ConcreteType, OneResult> {
-public:
-  Value *getResult() { return this->getOperation()->getResult(0); }
-  Type getType() { return getResult()->getType(); }
-
-  /// Replace all uses of 'this' value with the new value, updating anything in
-  /// the IR that uses 'this' to use the other value instead.  When this returns
-  /// there are zero uses of 'this'.
-  void replaceAllUsesWith(Value *newValue) {
-    getResult()->replaceAllUsesWith(newValue);
-  }
-
-  /// Replace all uses of 'this' value with the result of 'op'.
-  void replaceAllUsesWith(Operation *op) {
-    this->getOperation()->replaceAllUsesWith(op);
-  }
-
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyOneResult(op);
-  }
-};
-
-/// This class provides the API for ops that are known to have a specified
-/// number of results.  This is used as a trait like this:
-///
-///   class FooOp : public Op<FooOp, OpTrait::NResults<2>::Impl> {
-///
-template <unsigned N> class NResults {
-public:
-  static_assert(N > 1, "use ZeroResult/OneResult for N < 2");
-
-  template <typename ConcreteType>
-  class Impl
-      : public detail::MultiResultTraitBase<ConcreteType, NResults<N>::Impl> {
-  public:
-    static LogicalResult verifyTrait(Operation *op) {
-      return impl::verifyNResults(op, N);
-    }
-  };
-};
-
-/// This class provides the API for ops that are known to have at least a
-/// specified number of results.  This is used as a trait like this:
-///
-///   class FooOp : public Op<FooOp, OpTrait::AtLeastNResults<2>::Impl> {
-///
-template <unsigned N> class AtLeastNResults {
-public:
-  template <typename ConcreteType>
-  class Impl : public detail::MultiResultTraitBase<ConcreteType,
-                                                   AtLeastNResults<N>::Impl> {
-  public:
-    static LogicalResult verifyTrait(Operation *op) {
-      return impl::verifyAtLeastNResults(op, N);
-    }
-  };
-};
-
-/// This class provides the API for ops which have an unknown number of
-/// results.
-template <typename ConcreteType>
-class VariadicResults
-    : public detail::MultiResultTraitBase<ConcreteType, VariadicResults> {};
-
-/// This class provides verification for ops that are known to have the same
-/// operand shape: all operands are scalars, vectors/tensors of the same
-/// shape.
-template <typename ConcreteType>
-class SameOperandsShape : public TraitBase<ConcreteType, SameOperandsShape> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySameOperandsShape(op);
-  }
-};
-
-/// This class provides verification for ops that are known to have the same
-/// operand and result shape: both are scalars, vectors/tensors of the same
-/// shape.
-template <typename ConcreteType>
-class SameOperandsAndResultShape
-    : public TraitBase<ConcreteType, SameOperandsAndResultShape> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySameOperandsAndResultShape(op);
-  }
-};
-
-/// This class provides verification for ops that are known to have the same
-/// operand element type (or the type itself if it is scalar).
-///
-template <typename ConcreteType>
-class SameOperandsElementType
-    : public TraitBase<ConcreteType, SameOperandsElementType> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySameOperandsElementType(op);
-  }
-};
-
-/// This class provides verification for ops that are known to have the same
-/// operand and result element type (or the type itself if it is scalar).
-///
-template <typename ConcreteType>
-class SameOperandsAndResultElementType
-    : public TraitBase<ConcreteType, SameOperandsAndResultElementType> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySameOperandsAndResultElementType(op);
-  }
-};
-
-/// This class provides verification for ops that are known to have the same
-/// operand and result type.
-///
-/// Note: this trait subsumes the SameOperandsAndResultShape and
-/// SameOperandsAndResultElementType traits.
-template <typename ConcreteType>
-class SameOperandsAndResultType
-    : public TraitBase<ConcreteType, SameOperandsAndResultType> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySameOperandsAndResultType(op);
-  }
-};
-
-/// This class verifies that any results of the specified op have a boolean
-/// type, a vector thereof, or a tensor thereof.
-template <typename ConcreteType>
-class ResultsAreBoolLike : public TraitBase<ConcreteType, ResultsAreBoolLike> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyResultsAreBoolLike(op);
-  }
-};
-
-/// This class verifies that any results of the specified op have a floating
-/// point type, a vector thereof, or a tensor thereof.
-template <typename ConcreteType>
-class ResultsAreFloatLike
-    : public TraitBase<ConcreteType, ResultsAreFloatLike> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyResultsAreFloatLike(op);
-  }
-};
-
-/// This class verifies that any results of the specified op have an integer or
-/// index type, a vector thereof, or a tensor thereof.
-template <typename ConcreteType>
-class ResultsAreIntegerLike
-    : public TraitBase<ConcreteType, ResultsAreIntegerLike> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyResultsAreIntegerLike(op);
-  }
-};
-
-/// This class adds property that the operation is commutative.
-template <typename ConcreteType>
-class IsCommutative : public TraitBase<ConcreteType, IsCommutative> {
-public:
-  static AbstractOperation::OperationProperties getTraitProperties() {
-    return static_cast<AbstractOperation::OperationProperties>(
-        OperationProperty::Commutative);
-  }
-};
-
-/// This class adds property that the operation has no side effects.
-template <typename ConcreteType>
-class HasNoSideEffect : public TraitBase<ConcreteType, HasNoSideEffect> {
-public:
-  static AbstractOperation::OperationProperties getTraitProperties() {
-    return static_cast<AbstractOperation::OperationProperties>(
-        OperationProperty::NoSideEffect);
-  }
-};
-
-/// This class verifies that all operands of the specified op have a float type,
-/// a vector thereof, or a tensor thereof.
-template <typename ConcreteType>
-class OperandsAreFloatLike
-    : public TraitBase<ConcreteType, OperandsAreFloatLike> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyOperandsAreFloatLike(op);
-  }
-};
-
-/// This class verifies that all operands of the specified op have an integer or
-/// index type, a vector thereof, or a tensor thereof.
-template <typename ConcreteType>
-class OperandsAreIntegerLike
-    : public TraitBase<ConcreteType, OperandsAreIntegerLike> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyOperandsAreIntegerLike(op);
-  }
-};
-
-/// This class verifies that all operands of the specified op have the same
-/// type.
-template <typename ConcreteType>
-class SameTypeOperands : public TraitBase<ConcreteType, SameTypeOperands> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySameTypeOperands(op);
-  }
-};
-
-/// This class provides the API for ops that are known to be terminators.
-template <typename ConcreteType>
-class IsTerminator : public TraitBase<ConcreteType, IsTerminator> {
-public:
-  static AbstractOperation::OperationProperties getTraitProperties() {
-    return static_cast<AbstractOperation::OperationProperties>(
-        OperationProperty::Terminator);
-  }
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyIsTerminator(op);
-  }
-
-  unsigned getNumSuccessors() {
-    return this->getOperation()->getNumSuccessors();
-  }
-  unsigned getNumSuccessorOperands(unsigned index) {
-    return this->getOperation()->getNumSuccessorOperands(index);
-  }
-
-  Block *getSuccessor(unsigned index) {
-    return this->getOperation()->getSuccessor(index);
-  }
-
-  void setSuccessor(Block *block, unsigned index) {
-    return this->getOperation()->setSuccessor(block, index);
-  }
-
-  void addSuccessorOperand(unsigned index, Value *value) {
-    return this->getOperation()->addSuccessorOperand(index, value);
-  }
-  void addSuccessorOperands(unsigned index, ArrayRef<Value *> values) {
-    return this->getOperation()->addSuccessorOperand(index, values);
-  }
-};
-
-/// This class provides the API for ops that are known to be isolated from
-/// above.
-template <typename ConcreteType>
-class IsIsolatedFromAbove
-    : public TraitBase<ConcreteType, IsIsolatedFromAbove> {
-public:
-  static AbstractOperation::OperationProperties getTraitProperties() {
-    return static_cast<AbstractOperation::OperationProperties>(
-        OperationProperty::IsolatedFromAbove);
-  }
-  static LogicalResult verifyTrait(Operation *op) {
-    for (auto &region : op->getRegions())
-      if (!region.isIsolatedFromAbove(op->getLoc()))
-        return failure();
-    return success();
-  }
-};
-
-/// This class provides APIs and verifiers for ops with regions having a single
-/// block that must terminate with `TerminatorOpType`.
-template <typename TerminatorOpType> struct SingleBlockImplicitTerminator {
-  template <typename ConcreteType>
-  class Impl : public TraitBase<ConcreteType, Impl> {
-  public:
-    static LogicalResult verifyTrait(Operation *op) {
-      for (unsigned i = 0, e = op->getNumRegions(); i < e; ++i) {
-        Region &region = op->getRegion(i);
-
-        // Empty regions are fine.
-        if (region.empty())
-          continue;
-
-        // Non-empty regions must contain a single basic block.
-        if (std::next(region.begin()) != region.end())
-          return op->emitOpError("expects region #")
-                 << i << " to have 0 or 1 blocks";
-
-        Block &block = region.front();
-        if (block.empty())
-          return op->emitOpError() << "expects a non-empty block";
-        Operation &terminator = block.back();
-        if (isa<TerminatorOpType>(terminator))
-          continue;
-
-        return op->emitOpError("expects regions to end with '" +
-                               TerminatorOpType::getOperationName() +
-                               "', found '" +
-                               terminator.getName().getStringRef() + "'")
-                   .attachNote()
-               << "in custom textual format, the absence of terminator implies "
-                  "'"
-               << TerminatorOpType::getOperationName() << '\'';
-      }
-
-      return success();
-    }
-
-    /// Ensure that the given region has the terminator required by this trait.
-    static void ensureTerminator(Region &region, Builder &builder,
-                                 Location loc) {
-      ::mlir::impl::template ensureRegionTerminator<TerminatorOpType>(
-          region, builder, loc);
-    }
-  };
-};
-
-/// This class provides a verifier for ops that are expecting a specific parent.
-template <typename ParentOpType> struct HasParent {
-  template <typename ConcreteType>
-  class Impl : public TraitBase<ConcreteType, Impl> {
-  public:
-    static LogicalResult verifyTrait(Operation *op) {
-      if (isa<ParentOpType>(op->getParentOp()))
-        return success();
-      return op->emitOpError() << "expects parent op '"
-                               << ParentOpType::getOperationName() << "'";
-    }
-  };
-};
-
-/// A trait for operations that have an attribute specifying operand segments.
-///
-/// Certain operations can have multiple variadic operands and their size
-/// relationship is not always known statically. For such cases, we need
-/// a per-op-instance specification to divide the operands into logical groups
-/// or segments. This can be modeled by attributes. The attribute will be named
-/// as `operand_segment_sizes`.
-///
-/// This trait verifies the attribute for specifying operand segments has
-/// the correct type (1D vector) and values (non-negative), etc.
-template <typename ConcreteType>
-class AttrSizedOperandSegments
-    : public TraitBase<ConcreteType, AttrSizedOperandSegments> {
-public:
-  static StringRef getOperandSegmentSizeAttr() {
-    return "operand_segment_sizes";
-  }
-
-  static LogicalResult verifyTrait(Operation *op) {
-    return ::mlir::OpTrait::impl::verifyOperandSizeAttr(
-        op, getOperandSegmentSizeAttr());
-  }
-};
-
-/// Similar to AttrSizedOperandSegments but used for results.
-template <typename ConcreteType>
-class AttrSizedResultSegments
-    : public TraitBase<ConcreteType, AttrSizedResultSegments> {
-public:
-  static StringRef getResultSegmentSizeAttr() { return "result_segment_sizes"; }
-
-  static LogicalResult verifyTrait(Operation *op) {
-    return ::mlir::OpTrait::impl::verifyResultSizeAttr(
-        op, getResultSegmentSizeAttr());
-  }
-};
-
-} // end namespace OpTrait
-
-//===----------------------------------------------------------------------===//
-// Operation Definition classes
-//===----------------------------------------------------------------------===//
-
-/// This provides public APIs that all operations should have.  The template
-/// argument 'ConcreteType' should be the concrete type by CRTP and the others
-/// are base classes by the policy pattern.
-template <typename ConcreteType, template <typename T> class... Traits>
-class Op : public OpState,
-           public Traits<ConcreteType>...,
-           public FoldingHook<ConcreteType,
-                              llvm::is_one_of<OpTrait::OneResult<ConcreteType>,
-                                              Traits<ConcreteType>...>::value> {
-public:
-  /// Return if this operation contains the provided trait.
-  template <template <typename T> class Trait>
-  static constexpr bool hasTrait() {
-    return llvm::is_one_of<Trait<ConcreteType>, Traits<ConcreteType>...>::value;
-  }
-
-  /// Return the operation that this refers to.
-  Operation *getOperation() { return OpState::getOperation(); }
-
-  /// Create a deep copy of this operation.
-  ConcreteType clone() { return cast<ConcreteType>(getOperation()->clone()); }
-
-  /// Create a partial copy of this operation without traversing into attached
-  /// regions. The new operation will have the same number of regions as the
-  /// original one, but they will be left empty.
-  ConcreteType cloneWithoutRegions() {
-    return cast<ConcreteType>(getOperation()->cloneWithoutRegions());
-  }
-
-  /// Return the dialect that this refers to.
-  Dialect *getDialect() { return getOperation()->getDialect(); }
-
-  /// Return the parent Region of this operation.
-  Region *getParentRegion() { return getOperation()->getParentRegion(); }
-
-  /// Return true if this "op class" can match against the specified operation.
-  static bool classof(Operation *op) {
-    if (auto *abstractOp = op->getAbstractOperation())
-      return &classof == abstractOp->classof;
-    return op->getName().getStringRef() == ConcreteType::getOperationName();
-  }
-
-  /// This is the hook used by the AsmParser to parse the custom form of this
-  /// op from an .mlir file.  Op implementations should provide a parse method,
-  /// which returns failure.  On success, they should return fill in result with
-  /// the fields to use.
-  static ParseResult parseAssembly(OpAsmParser &parser,
-                                   OperationState &result) {
-    return ConcreteType::parse(parser, result);
-  }
-
-  /// This is the hook used by the AsmPrinter to emit this to the .mlir file.
-  /// Op implementations should provide a print method.
-  static void printAssembly(Operation *op, OpAsmPrinter &p) {
-    auto opPointer = dyn_cast<ConcreteType>(op);
-    assert(opPointer &&
-           "op's name does not match name of concrete type instantiated with");
-    opPointer.print(p);
-  }
-
-  /// This is the hook that checks whether or not this operation is well
-  /// formed according to the invariants of its opcode.  It delegates to the
-  /// Traits for their policy implementations, and allows the user to specify
-  /// their own verify() method.
-  ///
-  /// On success this returns false; on failure it emits an error to the
-  /// diagnostic subsystem and returns true.
-  static LogicalResult verifyInvariants(Operation *op) {
-    return failure(
-        failed(BaseVerifier<Traits<ConcreteType>...>::verifyTrait(op)) ||
-        failed(cast<ConcreteType>(op).verify()));
-  }
-
-  // Returns the properties of an operation by combining the properties of the
-  // traits of the op.
-  static AbstractOperation::OperationProperties getOperationProperties() {
-    return BaseProperties<Traits<ConcreteType>...>::getTraitProperties();
-  }
-
-  /// Expose the type we are instantiated on to template machinery that may want
-  /// to introspect traits on this operation.
-  using ConcreteOpType = ConcreteType;
-
-  /// This is a public constructor.  Any op can be initialized to null.
-  explicit Op() : OpState(nullptr) {}
-  Op(std::nullptr_t) : OpState(nullptr) {}
-
-  /// This is a public constructor to enable access via the llvm::cast family of
-  /// methods. This should not be used directly.
-  explicit Op(Operation *state) : OpState(state) {}
-
-  /// Methods for supporting PointerLikeTypeTraits.
-  const void *getAsOpaquePointer() const {
-    return static_cast<const void *>((Operation *)*this);
-  }
-  static ConcreteOpType getFromOpaquePointer(const void *pointer) {
-    return ConcreteOpType(
-        reinterpret_cast<Operation *>(const_cast<void *>(pointer)));
-  }
-
-private:
-  template <typename... Types> struct BaseVerifier;
-
-  template <typename First, typename... Rest>
-  struct BaseVerifier<First, Rest...> {
-    static LogicalResult verifyTrait(Operation *op) {
-      return failure(failed(First::verifyTrait(op)) ||
-                     failed(BaseVerifier<Rest...>::verifyTrait(op)));
-    }
-  };
-
-  template <typename...> struct BaseVerifier {
-    static LogicalResult verifyTrait(Operation *op) { return success(); }
-  };
-
-  template <typename... Types> struct BaseProperties;
-
-  template <typename First, typename... Rest>
-  struct BaseProperties<First, Rest...> {
-    static AbstractOperation::OperationProperties getTraitProperties() {
-      return First::getTraitProperties() |
-             BaseProperties<Rest...>::getTraitProperties();
-    }
-  };
-
-  template <typename...> struct BaseProperties {
-    static AbstractOperation::OperationProperties getTraitProperties() {
-      return 0;
-    }
-  };
-
-  /// Returns true if this operation contains the trait for the given classID.
-  static bool hasTrait(ClassID *traitID) {
-    return llvm::is_contained(llvm::makeArrayRef({ClassID::getID<Traits>()...}),
-                              traitID);
-  }
-
-  /// Returns an opaque pointer to a concept instance of the interface with the
-  /// given ID if one was registered to this operation.
-  static void *getRawInterface(ClassID *id) {
-    return InterfaceLookup::template lookup<Traits<ConcreteType>...>(id);
-  }
-
-  struct InterfaceLookup {
-    /// Trait to check if T provides a static 'getInterfaceID' method.
-    template <typename T, typename... Args>
-    using has_get_interface_id = decltype(T::getInterfaceID());
-
-    /// If 'T' is the same interface as 'interfaceID' return the concept
-    /// instance.
-    template <typename T>
-    static typename std::enable_if<is_detected<has_get_interface_id, T>::value,
-                                   void *>::type
-    lookup(ClassID *interfaceID) {
-      return (T::getInterfaceID() == interfaceID) ? &T::instance() : nullptr;
-    }
-
-    /// 'T' is known to not be an interface, return nullptr.
-    template <typename T>
-    static typename std::enable_if<!is_detected<has_get_interface_id, T>::value,
-                                   void *>::type
-    lookup(ClassID *) {
-      return nullptr;
-    }
-
-    template <typename T, typename T2, typename... Ts>
-    static void *lookup(ClassID *interfaceID) {
-      auto *concept = lookup<T>(interfaceID);
-      return concept ? concept : lookup<T2, Ts...>(interfaceID);
-    }
-  };
-
-  /// Allow access to 'hasTrait' and 'getRawInterface'.
-  friend AbstractOperation;
-};
-
-/// This class represents the base of an operation interface. Operation
-/// interfaces provide access to derived *Op properties through an opaquely
-/// Operation instance. Derived interfaces must also provide a 'Traits' class
-/// that defines a 'Concept' and a 'Model' class. The 'Concept' class defines an
-/// abstract virtual interface, where as the 'Model' class implements this
-/// interface for a specific derived *Op type. Both of these classes *must* not
-/// contain non-static data. A simple example is shown below:
-///
-///  struct ExampleOpInterfaceTraits {
-///    struct Concept {
-///      virtual unsigned getNumInputs(Operation *op) = 0;
-///    };
-///    template <typename OpT> class Model {
-///      unsigned getNumInputs(Operation *op) final {
-///        return llvm::cast<OpT>(op).getNumInputs();
-///      }
-///    };
-///  };
-///
-template <typename ConcreteType, typename Traits>
-class OpInterface : public Op<ConcreteType> {
-public:
-  using Concept = typename Traits::Concept;
-  template <typename T> using Model = typename Traits::template Model<T>;
-
-  OpInterface(Operation *op = nullptr)
-      : Op<ConcreteType>(op), impl(op ? getInterfaceFor(op) : nullptr) {
-    assert((!op || impl) &&
-           "instantiating an interface with an unregistered operation");
-  }
-
-  /// Support 'classof' by checking if the given operation defines the concrete
-  /// interface.
-  static bool classof(Operation *op) { return getInterfaceFor(op); }
-
-  /// Define an accessor for the ID of this interface.
-  static ClassID *getInterfaceID() { return ClassID::getID<ConcreteType>(); }
-
-  /// This is a special trait that registers a given interface with an
-  /// operation.
-  template <typename ConcreteOp>
-  struct Trait : public OpTrait::TraitBase<ConcreteOp, Trait> {
-    /// Define an accessor for the ID of this interface.
-    static ClassID *getInterfaceID() { return ClassID::getID<ConcreteType>(); }
-
-    /// Provide an accessor to a static instance of the interface model for the
-    /// concrete operation type.
-    /// The implementation is inspired from Sean Parent's concept-based
-    /// polymorphism. A key difference is that the set of classes erased is
-    /// statically known, which alleviates the need for using dynamic memory
-    /// allocation.
-    /// We use a zero-sized templated class `Model<ConcreteOp>` to emit the
-    /// virtual table and generate a singleton object for each instantiation of
-    /// this class.
-    static Concept &instance() {
-      static Model<ConcreteOp> singleton;
-      return singleton;
-    }
-  };
-
-protected:
-  /// Get the raw concept in the correct derived concept type.
-  Concept *getImpl() { return impl; }
-
-private:
-  /// Returns the impl interface instance for the given operation.
-  static Concept *getInterfaceFor(Operation *op) {
-    // Access the raw interface from the abstract operation.
-    auto *abstractOp = op->getAbstractOperation();
-    return abstractOp ? abstractOp->getInterface<ConcreteType>() : nullptr;
-  }
-
-  /// A pointer to the impl concept object.
-  Concept *impl;
-};
-
-// These functions are out-of-line implementations of the methods in UnaryOp and
-// BinaryOp, which avoids them being template instantiated/duplicated.
-namespace impl {
-ParseResult parseOneResultOneOperandTypeOp(OpAsmParser &parser,
-                                           OperationState &result);
-
-void buildBinaryOp(Builder *builder, OperationState &result, Value *lhs,
-                   Value *rhs);
-ParseResult parseOneResultSameOperandTypeOp(OpAsmParser &parser,
-                                            OperationState &result);
-
-// Prints the given binary `op` in custom assembly form if both the two operands
-// and the result have the same time. Otherwise, prints the generic assembly
-// form.
-void printOneResultOp(Operation *op, OpAsmPrinter &p);
-} // namespace impl
-
-// These functions are out-of-line implementations of the methods in CastOp,
-// which avoids them being template instantiated/duplicated.
-namespace impl {
-void buildCastOp(Builder *builder, OperationState &result, Value *source,
-                 Type destType);
-ParseResult parseCastOp(OpAsmParser &parser, OperationState &result);
-void printCastOp(Operation *op, OpAsmPrinter &p);
-Value *foldCastOp(Operation *op);
-} // namespace impl
-} // end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/OpImplementation.h b/third_party/mlir/include/mlir/IR/OpImplementation.h
deleted file mode 100644
index 05beaeaee59..00000000000
--- a/third_party/mlir/include/mlir/IR/OpImplementation.h
+++ /dev/null
@@ -1,669 +0,0 @@
-//===- OpImplementation.h - Classes for implementing Op types ---*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This classes used by the implementation details of Op types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_OPIMPLEMENTATION_H
-#define MLIR_IR_OPIMPLEMENTATION_H
-
-#include "mlir/IR/DialectInterface.h"
-#include "mlir/IR/OpDefinition.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-
-class Builder;
-
-//===----------------------------------------------------------------------===//
-// OpAsmPrinter
-//===----------------------------------------------------------------------===//
-
-/// This is a pure-virtual base class that exposes the asmprinter hooks
-/// necessary to implement a custom print() method.
-class OpAsmPrinter {
-public:
-  OpAsmPrinter() {}
-  virtual ~OpAsmPrinter();
-  virtual raw_ostream &getStream() const = 0;
-
-  /// Print implementations for various things an operation contains.
-  virtual void printOperand(Value *value) = 0;
-
-  /// Print a comma separated list of operands.
-  template <typename ContainerType>
-  void printOperands(const ContainerType &container) {
-    printOperands(container.begin(), container.end());
-  }
-
-  /// Print a comma separated list of operands.
-  template <typename IteratorType>
-  void printOperands(IteratorType it, IteratorType end) {
-    if (it == end)
-      return;
-    printOperand(*it);
-    for (++it; it != end; ++it) {
-      getStream() << ", ";
-      printOperand(*it);
-    }
-  }
-  virtual void printType(Type type) = 0;
-  virtual void printAttribute(Attribute attr) = 0;
-
-  /// Print a successor, and use list, of a terminator operation given the
-  /// terminator and the successor index.
-  virtual void printSuccessorAndUseList(Operation *term, unsigned index) = 0;
-
-  /// If the specified operation has attributes, print out an attribute
-  /// dictionary with their values.  elidedAttrs allows the client to ignore
-  /// specific well known attributes, commonly used if the attribute value is
-  /// printed some other way (like as a fixed operand).
-  virtual void printOptionalAttrDict(ArrayRef<NamedAttribute> attrs,
-                                     ArrayRef<StringRef> elidedAttrs = {}) = 0;
-
-  /// If the specified operation has attributes, print out an attribute
-  /// dictionary prefixed with 'attributes'.
-  virtual void
-  printOptionalAttrDictWithKeyword(ArrayRef<NamedAttribute> attrs,
-                                   ArrayRef<StringRef> elidedAttrs = {}) = 0;
-
-  /// Print the entire operation with the default generic assembly form.
-  virtual void printGenericOp(Operation *op) = 0;
-
-  /// Prints a region.
-  virtual void printRegion(Region &blocks, bool printEntryBlockArgs = true,
-                           bool printBlockTerminators = true) = 0;
-
-  /// Renumber the arguments for the specified region to the same names as the
-  /// SSA values in namesToUse.  This may only be used for IsolatedFromAbove
-  /// operations.  If any entry in namesToUse is null, the corresponding
-  /// argument name is left alone.
-  virtual void shadowRegionArgs(Region &region, ValueRange namesToUse) = 0;
-
-  /// Prints an affine map of SSA ids, where SSA id names are used in place
-  /// of dims/symbols.
-  /// Operand values must come from single-result sources, and be valid
-  /// dimensions/symbol identifiers according to mlir::isValidDim/Symbol.
-  virtual void printAffineMapOfSSAIds(AffineMapAttr mapAttr,
-                                      ValueRange operands) = 0;
-
-  /// Print an optional arrow followed by a type list.
-  void printOptionalArrowTypeList(ArrayRef<Type> types) {
-    if (types.empty())
-      return;
-    auto &os = getStream() << " -> ";
-    bool wrapped = types.size() != 1 || types[0].isa<FunctionType>();
-    if (wrapped)
-      os << '(';
-    interleaveComma(types, *this);
-    if (wrapped)
-      os << ')';
-  }
-
-  /// Print the complete type of an operation in functional form.
-  void printFunctionalType(Operation *op) {
-    auto &os = getStream();
-    os << "(";
-    interleaveComma(op->getNonSuccessorOperands(), os, [&](Value *operand) {
-      if (operand)
-        printType(operand->getType());
-      else
-        os << "<<NULL>";
-    });
-    os << ") -> ";
-    if (op->getNumResults() == 1 &&
-        !op->getResult(0)->getType().isa<FunctionType>()) {
-      printType(op->getResult(0)->getType());
-    } else {
-      os << '(';
-      interleaveComma(op->getResultTypes(), os);
-      os << ')';
-    }
-  }
-
-  /// Print the given string as a symbol reference, i.e. a form representable by
-  /// a SymbolRefAttr. A symbol reference is represented as a string prefixed
-  /// with '@'. The reference is surrounded with ""'s and escaped if it has any
-  /// special or non-printable characters in it.
-  virtual void printSymbolName(StringRef symbolRef) = 0;
-
-private:
-  OpAsmPrinter(const OpAsmPrinter &) = delete;
-  void operator=(const OpAsmPrinter &) = delete;
-};
-
-// Make the implementations convenient to use.
-inline OpAsmPrinter &operator<<(OpAsmPrinter &p, Value &value) {
-  p.printOperand(&value);
-  return p;
-}
-inline OpAsmPrinter &operator<<(OpAsmPrinter &p, Value *value) {
-  return p << *value;
-}
-
-template <typename T,
-          typename std::enable_if<std::is_convertible<T &, ValueRange>::value &&
-                                      !std::is_convertible<T &, Value *>::value,
-                                  T>::type * = nullptr>
-inline OpAsmPrinter &operator<<(OpAsmPrinter &p, const T &values) {
-  p.printOperands(values);
-  return p;
-}
-
-inline OpAsmPrinter &operator<<(OpAsmPrinter &p, Type type) {
-  p.printType(type);
-  return p;
-}
-
-inline OpAsmPrinter &operator<<(OpAsmPrinter &p, Attribute attr) {
-  p.printAttribute(attr);
-  return p;
-}
-
-// Support printing anything that isn't convertible to one of the above types,
-// even if it isn't exactly one of them.  For example, we want to print
-// FunctionType with the Type version above, not have it match this.
-template <typename T, typename std::enable_if<
-                          !std::is_convertible<T &, Value &>::value &&
-                              !std::is_convertible<T &, Value *>::value &&
-                              !std::is_convertible<T &, Type &>::value &&
-                              !std::is_convertible<T &, Attribute &>::value &&
-                              !std::is_convertible<T &, ValueRange>::value &&
-                              !llvm::is_one_of<T, bool>::value,
-                          T>::type * = nullptr>
-inline OpAsmPrinter &operator<<(OpAsmPrinter &p, const T &other) {
-  p.getStream() << other;
-  return p;
-}
-
-inline OpAsmPrinter &operator<<(OpAsmPrinter &p, bool value) {
-  return p << (value ? StringRef("true") : "false");
-}
-
-template <typename IteratorT>
-inline OpAsmPrinter &
-operator<<(OpAsmPrinter &p,
-           const iterator_range<ValueTypeIterator<IteratorT>> &types) {
-  interleaveComma(types, p);
-  return p;
-}
-
-//===----------------------------------------------------------------------===//
-// OpAsmParser
-//===----------------------------------------------------------------------===//
-
-/// The OpAsmParser has methods for interacting with the asm parser: parsing
-/// things from it, emitting errors etc.  It has an intentionally high-level API
-/// that is designed to reduce/constrain syntax innovation in individual
-/// operations.
-///
-/// For example, consider an op like this:
-///
-///    %x = load %p[%1, %2] : memref<...>
-///
-/// The "%x = load" tokens are already parsed and therefore invisible to the
-/// custom op parser.  This can be supported by calling `parseOperandList` to
-/// parse the %p, then calling `parseOperandList` with a `SquareDelimiter` to
-/// parse the indices, then calling `parseColonTypeList` to parse the result
-/// type.
-///
-class OpAsmParser {
-public:
-  virtual ~OpAsmParser();
-
-  /// Emit a diagnostic at the specified location and return failure.
-  virtual InFlightDiagnostic emitError(llvm::SMLoc loc,
-                                       const Twine &message = {}) = 0;
-
-  /// Return a builder which provides useful access to MLIRContext, global
-  /// objects like types and attributes.
-  virtual Builder &getBuilder() const = 0;
-
-  /// Get the location of the next token and store it into the argument.  This
-  /// always succeeds.
-  virtual llvm::SMLoc getCurrentLocation() = 0;
-  ParseResult getCurrentLocation(llvm::SMLoc *loc) {
-    *loc = getCurrentLocation();
-    return success();
-  }
-
-  /// Return the location of the original name token.
-  virtual llvm::SMLoc getNameLoc() const = 0;
-
-  // These methods emit an error and return failure or success. This allows
-  // these to be chained together into a linear sequence of || expressions in
-  // many cases.
-
-  /// Parse an operation in its generic form.
-  /// The parsed operation is parsed in the current context and inserted in the
-  /// provided block and insertion point. The results produced by this operation
-  /// aren't mapped to any named value in the parser. Returns nullptr on
-  /// failure.
-  virtual Operation *parseGenericOperation(Block *insertBlock,
-                                           Block::iterator insertPt) = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Token Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a '->' token.
-  virtual ParseResult parseArrow() = 0;
-
-  /// Parse a '->' token if present
-  virtual ParseResult parseOptionalArrow() = 0;
-
-  /// Parse a `:` token.
-  virtual ParseResult parseColon() = 0;
-
-  /// Parse a `:` token if present.
-  virtual ParseResult parseOptionalColon() = 0;
-
-  /// Parse a `,` token.
-  virtual ParseResult parseComma() = 0;
-
-  /// Parse a `,` token if present.
-  virtual ParseResult parseOptionalComma() = 0;
-
-  /// Parse a `=` token.
-  virtual ParseResult parseEqual() = 0;
-
-  /// Parse a given keyword.
-  ParseResult parseKeyword(StringRef keyword, const Twine &msg = "") {
-    auto loc = getCurrentLocation();
-    if (parseOptionalKeyword(keyword))
-      return emitError(loc, "expected '") << keyword << "'" << msg;
-    return success();
-  }
-
-  /// Parse a keyword into 'keyword'.
-  ParseResult parseKeyword(StringRef *keyword) {
-    auto loc = getCurrentLocation();
-    if (parseOptionalKeyword(keyword))
-      return emitError(loc, "expected valid keyword");
-    return success();
-  }
-
-  /// Parse the given keyword if present.
-  virtual ParseResult parseOptionalKeyword(StringRef keyword) = 0;
-
-  /// Parse a keyword, if present, into 'keyword'.
-  virtual ParseResult parseOptionalKeyword(StringRef *keyword) = 0;
-
-  /// Parse a `(` token.
-  virtual ParseResult parseLParen() = 0;
-
-  /// Parse a `(` token if present.
-  virtual ParseResult parseOptionalLParen() = 0;
-
-  /// Parse a `)` token.
-  virtual ParseResult parseRParen() = 0;
-
-  /// Parse a `)` token if present.
-  virtual ParseResult parseOptionalRParen() = 0;
-
-  /// Parse a `[` token.
-  virtual ParseResult parseLSquare() = 0;
-
-  /// Parse a `[` token if present.
-  virtual ParseResult parseOptionalLSquare() = 0;
-
-  /// Parse a `]` token.
-  virtual ParseResult parseRSquare() = 0;
-
-  /// Parse a `]` token if present.
-  virtual ParseResult parseOptionalRSquare() = 0;
-
-  /// Parse a `...` token if present;
-  virtual ParseResult parseOptionalEllipsis() = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Attribute Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an arbitrary attribute and return it in result.  This also adds the
-  /// attribute to the specified attribute list with the specified name.
-  ParseResult parseAttribute(Attribute &result, StringRef attrName,
-                             SmallVectorImpl<NamedAttribute> &attrs) {
-    return parseAttribute(result, Type(), attrName, attrs);
-  }
-
-  /// Parse an attribute of a specific kind and type.
-  template <typename AttrType>
-  ParseResult parseAttribute(AttrType &result, StringRef attrName,
-                             SmallVectorImpl<NamedAttribute> &attrs) {
-    return parseAttribute(result, Type(), attrName, attrs);
-  }
-
-  /// Parse an arbitrary attribute of a given type and return it in result. This
-  /// also adds the attribute to the specified attribute list with the specified
-  /// name.
-  virtual ParseResult
-  parseAttribute(Attribute &result, Type type, StringRef attrName,
-                 SmallVectorImpl<NamedAttribute> &attrs) = 0;
-
-  /// Parse an attribute of a specific kind and type.
-  template <typename AttrType>
-  ParseResult parseAttribute(AttrType &result, Type type, StringRef attrName,
-                             SmallVectorImpl<NamedAttribute> &attrs) {
-    llvm::SMLoc loc = getCurrentLocation();
-
-    // Parse any kind of attribute.
-    Attribute attr;
-    if (parseAttribute(attr, type, attrName, attrs))
-      return failure();
-
-    // Check for the right kind of attribute.
-    result = attr.dyn_cast<AttrType>();
-    if (!result)
-      return emitError(loc, "invalid kind of attribute specified");
-
-    return success();
-  }
-
-  /// Parse a named dictionary into 'result' if it is present.
-  virtual ParseResult
-  parseOptionalAttrDict(SmallVectorImpl<NamedAttribute> &result) = 0;
-
-  /// Parse a named dictionary into 'result' if the `attributes` keyword is
-  /// present.
-  virtual ParseResult
-  parseOptionalAttrDictWithKeyword(SmallVectorImpl<NamedAttribute> &result) = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Identifier Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an @-identifier and store it (without the '@' symbol) in a string
-  /// attribute named 'attrName'.
-  ParseResult parseSymbolName(StringAttr &result, StringRef attrName,
-                              SmallVectorImpl<NamedAttribute> &attrs) {
-    if (failed(parseOptionalSymbolName(result, attrName, attrs)))
-      return emitError(getCurrentLocation())
-             << "expected valid '@'-identifier for symbol name";
-    return success();
-  }
-
-  /// Parse an optional @-identifier and store it (without the '@' symbol) in a
-  /// string attribute named 'attrName'.
-  virtual ParseResult
-  parseOptionalSymbolName(StringAttr &result, StringRef attrName,
-                          SmallVectorImpl<NamedAttribute> &attrs) = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Operand Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// This is the representation of an operand reference.
-  struct OperandType {
-    llvm::SMLoc location; // Location of the token.
-    StringRef name;       // Value name, e.g. %42 or %abc
-    unsigned number;      // Number, e.g. 12 for an operand like %xyz#12
-  };
-
-  /// Parse a single operand.
-  virtual ParseResult parseOperand(OperandType &result) = 0;
-
-  /// These are the supported delimiters around operand lists and region
-  /// argument lists, used by parseOperandList and parseRegionArgumentList.
-  enum class Delimiter {
-    /// Zero or more operands with no delimiters.
-    None,
-    /// Parens surrounding zero or more operands.
-    Paren,
-    /// Square brackets surrounding zero or more operands.
-    Square,
-    /// Parens supporting zero or more operands, or nothing.
-    OptionalParen,
-    /// Square brackets supporting zero or more ops, or nothing.
-    OptionalSquare,
-  };
-
-  /// Parse zero or more SSA comma-separated operand references with a specified
-  /// surrounding delimiter, and an optional required operand count.
-  virtual ParseResult
-  parseOperandList(SmallVectorImpl<OperandType> &result,
-                   int requiredOperandCount = -1,
-                   Delimiter delimiter = Delimiter::None) = 0;
-  ParseResult parseOperandList(SmallVectorImpl<OperandType> &result,
-                               Delimiter delimiter) {
-    return parseOperandList(result, /*requiredOperandCount=*/-1, delimiter);
-  }
-
-  /// Parse zero or more trailing SSA comma-separated trailing operand
-  /// references with a specified surrounding delimiter, and an optional
-  /// required operand count. A leading comma is expected before the operands.
-  virtual ParseResult
-  parseTrailingOperandList(SmallVectorImpl<OperandType> &result,
-                           int requiredOperandCount = -1,
-                           Delimiter delimiter = Delimiter::None) = 0;
-  ParseResult parseTrailingOperandList(SmallVectorImpl<OperandType> &result,
-                                       Delimiter delimiter) {
-    return parseTrailingOperandList(result, /*requiredOperandCount=*/-1,
-                                    delimiter);
-  }
-
-  /// Resolve an operand to an SSA value, emitting an error on failure.
-  virtual ParseResult resolveOperand(const OperandType &operand, Type type,
-                                     SmallVectorImpl<Value *> &result) = 0;
-
-  /// Resolve a list of operands to SSA values, emitting an error on failure, or
-  /// appending the results to the list on success. This method should be used
-  /// when all operands have the same type.
-  ParseResult resolveOperands(ArrayRef<OperandType> operands, Type type,
-                              SmallVectorImpl<Value *> &result) {
-    for (auto elt : operands)
-      if (resolveOperand(elt, type, result))
-        return failure();
-    return success();
-  }
-
-  /// Resolve a list of operands and a list of operand types to SSA values,
-  /// emitting an error and returning failure, or appending the results
-  /// to the list on success.
-  ParseResult resolveOperands(ArrayRef<OperandType> operands,
-                              ArrayRef<Type> types, llvm::SMLoc loc,
-                              SmallVectorImpl<Value *> &result) {
-    if (operands.size() != types.size())
-      return emitError(loc)
-             << operands.size() << " operands present, but expected "
-             << types.size();
-
-    for (unsigned i = 0, e = operands.size(); i != e; ++i)
-      if (resolveOperand(operands[i], types[i], result))
-        return failure();
-    return success();
-  }
-
-  /// Parses an affine map attribute where dims and symbols are SSA operands.
-  /// Operand values must come from single-result sources, and be valid
-  /// dimensions/symbol identifiers according to mlir::isValidDim/Symbol.
-  virtual ParseResult
-  parseAffineMapOfSSAIds(SmallVectorImpl<OperandType> &operands, Attribute &map,
-                         StringRef attrName,
-                         SmallVectorImpl<NamedAttribute> &attrs) = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Region Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parses a region. Any parsed blocks are appended to "region" and must be
-  /// moved to the op regions after the op is created. The first block of the
-  /// region takes "arguments" of types "argTypes". If "enableNameShadowing" is
-  /// set to true, the argument names are allowed to shadow the names of other
-  /// existing SSA values defined above the region scope. "enableNameShadowing"
-  /// can only be set to true for regions attached to operations that are
-  /// "IsolatedFromAbove".
-  virtual ParseResult parseRegion(Region &region,
-                                  ArrayRef<OperandType> arguments,
-                                  ArrayRef<Type> argTypes,
-                                  bool enableNameShadowing = false) = 0;
-
-  /// Parses a region if present.
-  virtual ParseResult parseOptionalRegion(Region &region,
-                                          ArrayRef<OperandType> arguments,
-                                          ArrayRef<Type> argTypes,
-                                          bool enableNameShadowing = false) = 0;
-
-  /// Parse a region argument, this argument is resolved when calling
-  /// 'parseRegion'.
-  virtual ParseResult parseRegionArgument(OperandType &argument) = 0;
-
-  /// Parse zero or more region arguments with a specified surrounding
-  /// delimiter, and an optional required argument count. Region arguments
-  /// define new values; so this also checks if values with the same names have
-  /// not been defined yet.
-  virtual ParseResult
-  parseRegionArgumentList(SmallVectorImpl<OperandType> &result,
-                          int requiredOperandCount = -1,
-                          Delimiter delimiter = Delimiter::None) = 0;
-  virtual ParseResult
-  parseRegionArgumentList(SmallVectorImpl<OperandType> &result,
-                          Delimiter delimiter) {
-    return parseRegionArgumentList(result, /*requiredOperandCount=*/-1,
-                                   delimiter);
-  }
-
-  /// Parse a region argument if present.
-  virtual ParseResult parseOptionalRegionArgument(OperandType &argument) = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Successor Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a single operation successor and its operand list.
-  virtual ParseResult
-  parseSuccessorAndUseList(Block *&dest,
-                           SmallVectorImpl<Value *> &operands) = 0;
-
-  //===--------------------------------------------------------------------===//
-  // Type Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a type.
-  virtual ParseResult parseType(Type &result) = 0;
-
-  /// Parse an optional arrow followed by a type list.
-  virtual ParseResult
-  parseOptionalArrowTypeList(SmallVectorImpl<Type> &result) = 0;
-
-  /// Parse a colon followed by a type.
-  virtual ParseResult parseColonType(Type &result) = 0;
-
-  /// Parse a colon followed by a type of a specific kind, e.g. a FunctionType.
-  template <typename TypeType> ParseResult parseColonType(TypeType &result) {
-    llvm::SMLoc loc = getCurrentLocation();
-
-    // Parse any kind of type.
-    Type type;
-    if (parseColonType(type))
-      return failure();
-
-    // Check for the right kind of attribute.
-    result = type.dyn_cast<TypeType>();
-    if (!result)
-      return emitError(loc, "invalid kind of type specified");
-
-    return success();
-  }
-
-  /// Parse a colon followed by a type list, which must have at least one type.
-  virtual ParseResult parseColonTypeList(SmallVectorImpl<Type> &result) = 0;
-
-  /// Parse an optional colon followed by a type list, which if present must
-  /// have at least one type.
-  virtual ParseResult
-  parseOptionalColonTypeList(SmallVectorImpl<Type> &result) = 0;
-
-  /// Parse a keyword followed by a type.
-  ParseResult parseKeywordType(const char *keyword, Type &result) {
-    return failure(parseKeyword(keyword) || parseType(result));
-  }
-
-  /// Add the specified type to the end of the specified type list and return
-  /// success.  This is a helper designed to allow parse methods to be simple
-  /// and chain through || operators.
-  ParseResult addTypeToList(Type type, SmallVectorImpl<Type> &result) {
-    result.push_back(type);
-    return success();
-  }
-
-  /// Add the specified types to the end of the specified type list and return
-  /// success.  This is a helper designed to allow parse methods to be simple
-  /// and chain through || operators.
-  ParseResult addTypesToList(ArrayRef<Type> types,
-                             SmallVectorImpl<Type> &result) {
-    result.append(types.begin(), types.end());
-    return success();
-  }
-
-private:
-  /// Parse either an operand list or a region argument list depending on
-  /// whether isOperandList is true.
-  ParseResult parseOperandOrRegionArgList(SmallVectorImpl<OperandType> &result,
-                                          bool isOperandList,
-                                          int requiredOperandCount,
-                                          Delimiter delimiter);
-};
-
-//===--------------------------------------------------------------------===//
-// Dialect OpAsm interface.
-//===--------------------------------------------------------------------===//
-
-/// A functor used to set the name of the start of a result group of an
-/// operation. See 'getAsmResultNames' below for more details.
-using OpAsmSetValueNameFn = function_ref<void(Value *, StringRef)>;
-
-class OpAsmDialectInterface
-    : public DialectInterface::Base<OpAsmDialectInterface> {
-public:
-  OpAsmDialectInterface(Dialect *dialect) : Base(dialect) {}
-
-  /// Hooks for getting identifier aliases for symbols. The identifier is used
-  /// in place of the symbol when printing textual IR.
-  ///
-  /// Hook for defining Attribute kind aliases. This will generate an alias for
-  /// all attributes of the given kind in the form : <alias>[0-9]+. These
-  /// aliases must not contain `.`.
-  virtual void getAttributeKindAliases(
-      SmallVectorImpl<std::pair<unsigned, StringRef>> &aliases) const {}
-  /// Hook for defining Attribute aliases. These aliases must not contain `.` or
-  /// end with a numeric digit([0-9]+).
-  virtual void getAttributeAliases(
-      SmallVectorImpl<std::pair<Attribute, StringRef>> &aliases) const {}
-  /// Hook for defining Type aliases.
-  virtual void
-  getTypeAliases(SmallVectorImpl<std::pair<Type, StringRef>> &aliases) const {}
-
-  /// Get a special name to use when printing the given operation. See
-  /// OpAsmInterface.td#getAsmResultNames for usage details and documentation.
-  virtual void getAsmResultNames(Operation *op,
-                                 OpAsmSetValueNameFn setNameFn) const {}
-};
-
-//===--------------------------------------------------------------------===//
-// Operation OpAsm interface.
-//===--------------------------------------------------------------------===//
-
-/// The OpAsmOpInterface, see OpAsmInterface.td for more details.
-#include "mlir/IR/OpAsmInterface.h.inc"
-
-} // end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/Operation.h b/third_party/mlir/include/mlir/IR/Operation.h
deleted file mode 100644
index ac78647e4d3..00000000000
--- a/third_party/mlir/include/mlir/IR/Operation.h
+++ /dev/null
@@ -1,704 +0,0 @@
-//===- Operation.h - MLIR Operation Class -----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the Operation class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_OPERATION_H
-#define MLIR_IR_OPERATION_H
-
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/IR/Region.h"
-#include "llvm/ADT/Twine.h"
-
-namespace mlir {
-/// Terminator operations can have Block operands to represent successors.
-using BlockOperand = IROperandImpl<Block>;
-
-/// Operation is a basic unit of execution within a function. Operations can
-/// be nested within other operations effectively forming a tree. Child
-/// operations are organized into operation blocks represented by a 'Block'
-/// class.
-class Operation final
-    : public llvm::ilist_node_with_parent<Operation, Block>,
-      private llvm::TrailingObjects<Operation, OpResult, BlockOperand, unsigned,
-                                    Region, detail::OperandStorage> {
-public:
-  /// Create a new Operation with the specific fields.
-  static Operation *create(Location location, OperationName name,
-                           ArrayRef<Type> resultTypes,
-                           ArrayRef<Value *> operands,
-                           ArrayRef<NamedAttribute> attributes,
-                           ArrayRef<Block *> successors, unsigned numRegions,
-                           bool resizableOperandList);
-
-  /// Overload of create that takes an existing NamedAttributeList to avoid
-  /// unnecessarily uniquing a list of attributes.
-  static Operation *create(Location location, OperationName name,
-                           ArrayRef<Type> resultTypes,
-                           ArrayRef<Value *> operands,
-                           NamedAttributeList attributes,
-                           ArrayRef<Block *> successors, unsigned numRegions,
-                           bool resizableOperandList);
-
-  /// Create a new Operation from the fields stored in `state`.
-  static Operation *create(const OperationState &state);
-
-  /// Create a new Operation with the specific fields.
-  static Operation *
-  create(Location location, OperationName name, ArrayRef<Type> resultTypes,
-         ArrayRef<Value *> operands, NamedAttributeList attributes,
-         ArrayRef<Block *> successors = {}, RegionRange regions = {},
-         bool resizableOperandList = false);
-
-  /// The name of an operation is the key identifier for it.
-  OperationName getName() { return name; }
-
-  /// If this operation has a registered operation description, return it.
-  /// Otherwise return null.
-  const AbstractOperation *getAbstractOperation() {
-    return getName().getAbstractOperation();
-  }
-
-  /// Returns true if this operation has a registered operation description,
-  /// otherwise false.
-  bool isRegistered() { return getAbstractOperation(); }
-
-  /// Remove this operation from its parent block and delete it.
-  void erase();
-
-  /// Create a deep copy of this operation, remapping any operands that use
-  /// values outside of the operation using the map that is provided (leaving
-  /// them alone if no entry is present).  Replaces references to cloned
-  /// sub-operations to the corresponding operation that is copied, and adds
-  /// those mappings to the map.
-  Operation *clone(BlockAndValueMapping &mapper);
-  Operation *clone();
-
-  /// Create a partial copy of this operation without traversing into attached
-  /// regions. The new operation will have the same number of regions as the
-  /// original one, but they will be left empty.
-  /// Operands are remapped using `mapper` (if present), and `mapper` is updated
-  /// to contain the results.
-  Operation *cloneWithoutRegions(BlockAndValueMapping &mapper);
-
-  /// Create a partial copy of this operation without traversing into attached
-  /// regions. The new operation will have the same number of regions as the
-  /// original one, but they will be left empty.
-  Operation *cloneWithoutRegions();
-
-  /// Returns the operation block that contains this operation.
-  Block *getBlock() { return block; }
-
-  /// Return the context this operation is associated with.
-  MLIRContext *getContext();
-
-  /// Return the dialect this operation is associated with, or nullptr if the
-  /// associated dialect is not registered.
-  Dialect *getDialect();
-
-  /// The source location the operation was defined or derived from.
-  Location getLoc() { return location; }
-
-  /// Set the source location the operation was defined or derived from.
-  void setLoc(Location loc) { location = loc; }
-
-  /// Returns the region to which the instruction belongs. Returns nullptr if
-  /// the instruction is unlinked.
-  Region *getParentRegion();
-
-  /// Returns the closest surrounding operation that contains this operation
-  /// or nullptr if this is a top-level operation.
-  Operation *getParentOp();
-
-  /// Return the closest surrounding parent operation that is of type 'OpTy'.
-  template <typename OpTy> OpTy getParentOfType() {
-    auto *op = this;
-    while ((op = op->getParentOp()))
-      if (auto parentOp = llvm::dyn_cast<OpTy>(op))
-        return parentOp;
-    return OpTy();
-  }
-
-  /// Return true if this operation is a proper ancestor of the `other`
-  /// operation.
-  bool isProperAncestor(Operation *other);
-
-  /// Return true if this operation is an ancestor of the `other` operation. An
-  /// operation is considered as its own ancestor, use `isProperAncestor` to
-  /// avoid this.
-  bool isAncestor(Operation *other) {
-    return this == other || isProperAncestor(other);
-  }
-
-  /// Replace any uses of 'from' with 'to' within this operation.
-  void replaceUsesOfWith(Value *from, Value *to);
-
-  /// Replace all uses of results of this operation with the provided 'values'.
-  template <typename ValuesT,
-            typename = decltype(std::declval<ValuesT>().begin())>
-  void replaceAllUsesWith(ValuesT &&values) {
-    assert(std::distance(values.begin(), values.end()) == getNumResults() &&
-           "expected 'values' to correspond 1-1 with the number of results");
-
-    auto valueIt = values.begin();
-    for (unsigned i = 0, e = getNumResults(); i != e; ++i)
-      getResult(i)->replaceAllUsesWith(*(valueIt++));
-  }
-
-  /// Replace all uses of results of this operation with results of 'op'.
-  void replaceAllUsesWith(Operation *op) {
-    assert(getNumResults() == op->getNumResults());
-    for (unsigned i = 0, e = getNumResults(); i != e; ++i)
-      getResult(i)->replaceAllUsesWith(op->getResult(i));
-  }
-
-  /// Destroys this operation and its subclass data.
-  void destroy();
-
-  /// This drops all operand uses from this operation, which is an essential
-  /// step in breaking cyclic dependences between references when they are to
-  /// be deleted.
-  void dropAllReferences();
-
-  /// Drop uses of all values defined by this operation or its nested regions.
-  void dropAllDefinedValueUses();
-
-  /// Unlink this operation from its current block and insert it right before
-  /// `existingOp` which may be in the same or another block in the same
-  /// function.
-  void moveBefore(Operation *existingOp);
-
-  /// Unlink this operation from its current block and insert it right before
-  /// `iterator` in the specified block.
-  void moveBefore(Block *block, llvm::iplist<Operation>::iterator iterator);
-
-  /// Given an operation 'other' that is within the same parent block, return
-  /// whether the current operation is before 'other' in the operation list
-  /// of the parent block.
-  /// Note: This function has an average complexity of O(1), but worst case may
-  /// take O(N) where N is the number of operations within the parent block.
-  bool isBeforeInBlock(Operation *other);
-
-  void print(raw_ostream &os, OpPrintingFlags flags = llvm::None);
-  void dump();
-
-  //===--------------------------------------------------------------------===//
-  // Operands
-  //===--------------------------------------------------------------------===//
-
-  /// Returns if the operation has a resizable operation list, i.e. operands can
-  /// be added.
-  bool hasResizableOperandsList() { return getOperandStorage().isResizable(); }
-
-  /// Replace the current operands of this operation with the ones provided in
-  /// 'operands'. If the operands list is not resizable, the size of 'operands'
-  /// must be less than or equal to the current number of operands.
-  void setOperands(ValueRange operands);
-
-  unsigned getNumOperands() { return getOperandStorage().size(); }
-
-  Value *getOperand(unsigned idx) { return getOpOperand(idx).get(); }
-  void setOperand(unsigned idx, Value *value) {
-    return getOpOperand(idx).set(value);
-  }
-
-  // Support operand iteration.
-  using operand_range = OperandRange;
-  using operand_iterator = operand_range::iterator;
-
-  operand_iterator operand_begin() { return getOperands().begin(); }
-  operand_iterator operand_end() { return getOperands().end(); }
-
-  /// Returns an iterator on the underlying Value's (Value *).
-  operand_range getOperands() { return operand_range(this); }
-
-  /// Erase the operand at position `idx`.
-  void eraseOperand(unsigned idx) { getOperandStorage().eraseOperand(idx); }
-
-  MutableArrayRef<OpOperand> getOpOperands() {
-    return getOperandStorage().getOperands();
-  }
-
-  OpOperand &getOpOperand(unsigned idx) { return getOpOperands()[idx]; }
-
-  // Support operand type iteration.
-  using operand_type_iterator = operand_range::type_iterator;
-  using operand_type_range = iterator_range<operand_type_iterator>;
-  operand_type_iterator operand_type_begin() { return operand_begin(); }
-  operand_type_iterator operand_type_end() { return operand_end(); }
-  operand_type_range getOperandTypes() { return getOperands().getTypes(); }
-
-  //===--------------------------------------------------------------------===//
-  // Results
-  //===--------------------------------------------------------------------===//
-
-  /// Return true if there are no users of any results of this operation.
-  bool use_empty();
-
-  unsigned getNumResults() { return numResults; }
-
-  Value *getResult(unsigned idx) { return &getOpResult(idx); }
-
-  /// Support result iteration.
-  using result_range = ResultRange;
-  using result_iterator = result_range::iterator;
-
-  result_iterator result_begin() { return getResults().begin(); }
-  result_iterator result_end() { return getResults().end(); }
-  result_range getResults() { return result_range(this); }
-
-  MutableArrayRef<OpResult> getOpResults() {
-    return {getTrailingObjects<OpResult>(), numResults};
-  }
-
-  OpResult &getOpResult(unsigned idx) { return getOpResults()[idx]; }
-
-  /// Support result type iteration.
-  using result_type_iterator = result_range::type_iterator;
-  using result_type_range = iterator_range<result_type_iterator>;
-  result_type_iterator result_type_begin() { return result_begin(); }
-  result_type_iterator result_type_end() { return result_end(); }
-  result_type_range getResultTypes() { return getResults().getTypes(); }
-
-  //===--------------------------------------------------------------------===//
-  // Attributes
-  //===--------------------------------------------------------------------===//
-
-  // Operations may optionally carry a list of attributes that associate
-  // constants to names.  Attributes may be dynamically added and removed over
-  // the lifetime of an operation.
-
-  /// Return all of the attributes on this operation.
-  ArrayRef<NamedAttribute> getAttrs() { return attrs.getAttrs(); }
-
-  /// Return the internal attribute list on this operation.
-  NamedAttributeList &getAttrList() { return attrs; }
-
-  /// Set the attribute list on this operation.
-  /// Using a NamedAttributeList is more efficient as it does not require new
-  /// uniquing in the MLIRContext.
-  void setAttrs(NamedAttributeList newAttrs) { attrs = newAttrs; }
-
-  /// Return the specified attribute if present, null otherwise.
-  Attribute getAttr(Identifier name) { return attrs.get(name); }
-  Attribute getAttr(StringRef name) { return attrs.get(name); }
-
-  template <typename AttrClass> AttrClass getAttrOfType(Identifier name) {
-    return getAttr(name).dyn_cast_or_null<AttrClass>();
-  }
-
-  template <typename AttrClass> AttrClass getAttrOfType(StringRef name) {
-    return getAttr(name).dyn_cast_or_null<AttrClass>();
-  }
-
-  /// If the an attribute exists with the specified name, change it to the new
-  /// value.  Otherwise, add a new attribute with the specified name/value.
-  void setAttr(Identifier name, Attribute value) { attrs.set(name, value); }
-  void setAttr(StringRef name, Attribute value) {
-    setAttr(Identifier::get(name, getContext()), value);
-  }
-
-  /// Remove the attribute with the specified name if it exists.  The return
-  /// value indicates whether the attribute was present or not.
-  NamedAttributeList::RemoveResult removeAttr(Identifier name) {
-    return attrs.remove(name);
-  }
-
-  /// A utility iterator that filters out non-dialect attributes.
-  class dialect_attr_iterator
-      : public llvm::filter_iterator<ArrayRef<NamedAttribute>::iterator,
-                                     bool (*)(NamedAttribute)> {
-    static bool filter(NamedAttribute attr) {
-      // Dialect attributes are prefixed by the dialect name, like operations.
-      return attr.first.strref().count('.');
-    }
-
-    explicit dialect_attr_iterator(ArrayRef<NamedAttribute>::iterator it,
-                                   ArrayRef<NamedAttribute>::iterator end)
-        : llvm::filter_iterator<ArrayRef<NamedAttribute>::iterator,
-                                bool (*)(NamedAttribute)>(it, end, &filter) {}
-
-    // Allow access to the constructor.
-    friend Operation;
-  };
-  using dialect_attr_range = llvm::iterator_range<dialect_attr_iterator>;
-
-  /// Return a range corresponding to the dialect attributes for this operation.
-  dialect_attr_range getDialectAttrs() {
-    auto attrs = getAttrs();
-    return {dialect_attr_iterator(attrs.begin(), attrs.end()),
-            dialect_attr_iterator(attrs.end(), attrs.end())};
-  }
-  dialect_attr_iterator dialect_attr_begin() {
-    auto attrs = getAttrs();
-    return dialect_attr_iterator(attrs.begin(), attrs.end());
-  }
-  dialect_attr_iterator dialect_attr_end() {
-    auto attrs = getAttrs();
-    return dialect_attr_iterator(attrs.end(), attrs.end());
-  }
-
-  /// Set the dialect attributes for this operation, and preserve all dependent.
-  template <typename DialectAttrT>
-  void setDialectAttrs(DialectAttrT &&dialectAttrs) {
-    SmallVector<NamedAttribute, 16> attrs;
-    attrs.assign(std::begin(dialectAttrs), std::end(dialectAttrs));
-    for (auto attr : getAttrs())
-      if (!attr.first.strref().count('.'))
-        attrs.push_back(attr);
-    setAttrs(llvm::makeArrayRef(attrs));
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Blocks
-  //===--------------------------------------------------------------------===//
-
-  /// Returns the number of regions held by this operation.
-  unsigned getNumRegions() { return numRegions; }
-
-  /// Returns the regions held by this operation.
-  MutableArrayRef<Region> getRegions() {
-    auto *regions = getTrailingObjects<Region>();
-    return {regions, numRegions};
-  }
-
-  /// Returns the region held by this operation at position 'index'.
-  Region &getRegion(unsigned index) {
-    assert(index < numRegions && "invalid region index");
-    return getRegions()[index];
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Terminators
-  //===--------------------------------------------------------------------===//
-
-  MutableArrayRef<BlockOperand> getBlockOperands() {
-    return {getTrailingObjects<BlockOperand>(), numSuccs};
-  }
-
-  /// Return the operands of this operation that are *not* successor arguments.
-  operand_range getNonSuccessorOperands();
-
-  operand_range getSuccessorOperands(unsigned index);
-
-  Value *getSuccessorOperand(unsigned succIndex, unsigned opIndex) {
-    assert(!isKnownNonTerminator() && "only terminators may have successors");
-    assert(opIndex < getNumSuccessorOperands(succIndex));
-    return getOperand(getSuccessorOperandIndex(succIndex) + opIndex);
-  }
-
-  bool hasSuccessors() { return numSuccs != 0; }
-  unsigned getNumSuccessors() { return numSuccs; }
-  unsigned getNumSuccessorOperands(unsigned index) {
-    assert(!isKnownNonTerminator() && "only terminators may have successors");
-    assert(index < getNumSuccessors());
-    return getTrailingObjects<unsigned>()[index];
-  }
-
-  Block *getSuccessor(unsigned index) {
-    assert(index < getNumSuccessors());
-    return getBlockOperands()[index].get();
-  }
-  void setSuccessor(Block *block, unsigned index);
-
-  /// Erase a specific operand from the operand list of the successor at
-  /// 'index'.
-  void eraseSuccessorOperand(unsigned succIndex, unsigned opIndex) {
-    assert(succIndex < getNumSuccessors());
-    assert(opIndex < getNumSuccessorOperands(succIndex));
-    getOperandStorage().eraseOperand(getSuccessorOperandIndex(succIndex) +
-                                     opIndex);
-    --getTrailingObjects<unsigned>()[succIndex];
-  }
-
-  /// Get the index of the first operand of the successor at the provided
-  /// index.
-  unsigned getSuccessorOperandIndex(unsigned index);
-
-  /// Return a pair (successorIndex, successorArgIndex) containing the index
-  /// of the successor that `operandIndex` belongs to and the index of the
-  /// argument to that successor that `operandIndex` refers to.
-  ///
-  /// If `operandIndex` is not a successor operand, None is returned.
-  Optional<std::pair<unsigned, unsigned>>
-  decomposeSuccessorOperandIndex(unsigned operandIndex);
-
-  /// Returns the `BlockArgument*` corresponding to operand `operandIndex` in
-  /// some successor, or None if `operandIndex` isn't a successor operand index.
-  Optional<BlockArgument *> getSuccessorBlockArgument(unsigned operandIndex) {
-    auto decomposed = decomposeSuccessorOperandIndex(operandIndex);
-    if (!decomposed.hasValue())
-      return None;
-    return getSuccessor(decomposed->first)->getArgument(decomposed->second);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Accessors for various properties of operations
-  //===--------------------------------------------------------------------===//
-
-  /// Returns whether the operation is commutative.
-  bool isCommutative() {
-    if (auto *absOp = getAbstractOperation())
-      return absOp->hasProperty(OperationProperty::Commutative);
-    return false;
-  }
-
-  /// Returns whether the operation has side-effects.
-  bool hasNoSideEffect() {
-    if (auto *absOp = getAbstractOperation())
-      return absOp->hasProperty(OperationProperty::NoSideEffect);
-    return false;
-  }
-
-  /// Represents the status of whether an operation is a terminator. We
-  /// represent an 'unknown' status because we want to support unregistered
-  /// terminators.
-  enum class TerminatorStatus { Terminator, NonTerminator, Unknown };
-
-  /// Returns the status of whether this operation is a terminator or not.
-  TerminatorStatus getTerminatorStatus() {
-    if (auto *absOp = getAbstractOperation()) {
-      return absOp->hasProperty(OperationProperty::Terminator)
-                 ? TerminatorStatus::Terminator
-                 : TerminatorStatus::NonTerminator;
-    }
-    return TerminatorStatus::Unknown;
-  }
-
-  /// Returns if the operation is known to be a terminator.
-  bool isKnownTerminator() {
-    return getTerminatorStatus() == TerminatorStatus::Terminator;
-  }
-
-  /// Returns if the operation is known to *not* be a terminator.
-  bool isKnownNonTerminator() {
-    return getTerminatorStatus() == TerminatorStatus::NonTerminator;
-  }
-
-  /// Returns if the operation is known to be completely isolated from enclosing
-  /// regions, i.e. no internal regions reference values defined above this
-  /// operation.
-  bool isKnownIsolatedFromAbove() {
-    if (auto *absOp = getAbstractOperation())
-      return absOp->hasProperty(OperationProperty::IsolatedFromAbove);
-    return false;
-  }
-
-  /// Attempt to fold this operation with the specified constant operand values
-  /// - the elements in "operands" will correspond directly to the operands of
-  /// the operation, but may be null if non-constant. If folding is successful,
-  /// this fills in the `results` vector. If not, `results` is unspecified.
-  LogicalResult fold(ArrayRef<Attribute> operands,
-                     SmallVectorImpl<OpFoldResult> &results);
-
-  /// Returns if the operation was registered with a particular trait, e.g.
-  /// hasTrait<OperandsAreIntegerLike>().
-  template <template <typename T> class Trait> bool hasTrait() {
-    auto *absOp = getAbstractOperation();
-    return absOp ? absOp->hasTrait<Trait>() : false;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Operation Walkers
-  //===--------------------------------------------------------------------===//
-
-  /// Walk the operation in postorder, calling the callback for each nested
-  /// operation(including this one). The callback method can take any of the
-  /// following forms:
-  ///   void(Operation*) : Walk all operations opaquely.
-  ///     * op->walk([](Operation *nestedOp) { ...});
-  ///   void(OpT) : Walk all operations of the given derived type.
-  ///     * op->walk([](ReturnOp returnOp) { ...});
-  ///   WalkResult(Operation*|OpT) : Walk operations, but allow for
-  ///                                interruption/cancellation.
-  ///     * op->walk([](... op) {
-  ///         // Interrupt, i.e cancel, the walk based on some invariant.
-  ///         if (some_invariant)
-  ///           return WalkResult::interrupt();
-  ///         return WalkResult::advance();
-  ///       });
-  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
-  RetT walk(FnT &&callback) {
-    return detail::walkOperations(this, std::forward<FnT>(callback));
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Other
-  //===--------------------------------------------------------------------===//
-
-  /// Emit an error with the op name prefixed, like "'dim' op " which is
-  /// convenient for verifiers.
-  InFlightDiagnostic emitOpError(const Twine &message = {});
-
-  /// Emit an error about fatal conditions with this operation, reporting up to
-  /// any diagnostic handlers that may be listening.
-  InFlightDiagnostic emitError(const Twine &message = {});
-
-  /// Emit a warning about this operation, reporting up to any diagnostic
-  /// handlers that may be listening.
-  InFlightDiagnostic emitWarning(const Twine &message = {});
-
-  /// Emit a remark about this operation, reporting up to any diagnostic
-  /// handlers that may be listening.
-  InFlightDiagnostic emitRemark(const Twine &message = {});
-
-private:
-  //===--------------------------------------------------------------------===//
-  // Ordering
-  //===--------------------------------------------------------------------===//
-
-  /// This value represents an invalid index ordering for an operation within a
-  /// block.
-  static constexpr unsigned kInvalidOrderIdx = -1;
-
-  /// This value represents the stride to use when computing a new order for an
-  /// operation.
-  static constexpr unsigned kOrderStride = 5;
-
-  /// Update the order index of this operation of this operation if necessary,
-  /// potentially recomputing the order of the parent block.
-  void updateOrderIfNecessary();
-
-  /// Returns true if this operation has a valid order.
-  bool hasValidOrder() { return orderIndex != kInvalidOrderIdx; }
-
-private:
-  Operation(Location location, OperationName name, unsigned numResults,
-            unsigned numSuccessors, unsigned numRegions,
-            const NamedAttributeList &attributes);
-
-  // Operations are deleted through the destroy() member because they are
-  // allocated with malloc.
-  ~Operation();
-
-  /// Returns the operand storage object.
-  detail::OperandStorage &getOperandStorage() {
-    return *getTrailingObjects<detail::OperandStorage>();
-  }
-
-  /// Provide a 'getParent' method for ilist_node_with_parent methods.
-  /// We mark it as a const function because ilist_node_with_parent specifically
-  /// requires a 'getParent() const' method. Once ilist_node removes this
-  /// constraint, we should drop the const to fit the rest of the MLIR const
-  /// model.
-  Block *getParent() const { return block; }
-
-  /// The operation block that contains this operation.
-  Block *block = nullptr;
-
-  /// This holds information about the source location the operation was defined
-  /// or derived from.
-  Location location;
-
-  /// Relative order of this operation in its parent block. Used for
-  /// O(1) local dominance checks between operations.
-  mutable unsigned orderIndex = 0;
-
-  const unsigned numResults, numSuccs, numRegions;
-
-  /// This holds the name of the operation.
-  OperationName name;
-
-  /// This holds general named attributes for the operation.
-  NamedAttributeList attrs;
-
-  // allow ilist_traits access to 'block' field.
-  friend struct llvm::ilist_traits<Operation>;
-
-  // allow block to access the 'orderIndex' field.
-  friend class Block;
-
-  // allow ilist_node_with_parent to access the 'getParent' method.
-  friend class llvm::ilist_node_with_parent<Operation, Block>;
-
-  // This stuff is used by the TrailingObjects template.
-  friend llvm::TrailingObjects<Operation, OpResult, BlockOperand, unsigned,
-                               Region, detail::OperandStorage>;
-  size_t numTrailingObjects(OverloadToken<OpResult>) const {
-    return numResults;
-  }
-  size_t numTrailingObjects(OverloadToken<BlockOperand>) const {
-    return numSuccs;
-  }
-  size_t numTrailingObjects(OverloadToken<Region>) const { return numRegions; }
-  size_t numTrailingObjects(OverloadToken<unsigned>) const { return numSuccs; }
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, Operation &op) {
-  op.print(os);
-  return os;
-}
-
-/// This class implements use iterator for the Operation. This iterates over all
-/// uses of all results of an Operation.
-class UseIterator final
-    : public llvm::iterator_facade_base<UseIterator, std::forward_iterator_tag,
-                                        Operation *> {
-public:
-  /// Initialize UseIterator for op, specify end to return iterator to last use.
-  explicit UseIterator(Operation *op, bool end = false);
-
-  UseIterator &operator++();
-  Operation *operator->() { return use->getOwner(); }
-  Operation *operator*() { return use->getOwner(); }
-
-  bool operator==(const UseIterator &other) const;
-  bool operator!=(const UseIterator &other) const;
-
-private:
-  void skipOverResultsWithNoUsers();
-
-  /// The operation whose uses are being iterated over.
-  Operation *op;
-  /// The result of op who's uses are being iterated over.
-  Operation::result_iterator res;
-  /// The use of the result.
-  Value::use_iterator use;
-};
-} // end namespace mlir
-
-namespace llvm {
-/// Provide isa functionality for operation casts.
-template <typename T> struct isa_impl<T, ::mlir::Operation> {
-  static inline bool doit(const ::mlir::Operation &op) {
-    return T::classof(const_cast<::mlir::Operation *>(&op));
-  }
-};
-
-/// Provide specializations for operation casts as the resulting T is value
-/// typed.
-template <typename T> struct cast_retty_impl<T, ::mlir::Operation *> {
-  using ret_type = T;
-};
-template <typename T> struct cast_retty_impl<T, ::mlir::Operation> {
-  using ret_type = T;
-};
-template <class T>
-struct cast_convert_val<T, ::mlir::Operation, ::mlir::Operation> {
-  static T doit(::mlir::Operation &val) { return T(&val); }
-};
-template <class T>
-struct cast_convert_val<T, ::mlir::Operation *, ::mlir::Operation *> {
-  static T doit(::mlir::Operation *val) { return T(val); }
-};
-} // end namespace llvm
-
-#endif // MLIR_IR_OPERATION_H
diff --git a/third_party/mlir/include/mlir/IR/OperationSupport.h b/third_party/mlir/include/mlir/IR/OperationSupport.h
deleted file mode 100644
index 0a0e1acc358..00000000000
--- a/third_party/mlir/include/mlir/IR/OperationSupport.h
+++ /dev/null
@@ -1,699 +0,0 @@
-//===- OperationSupport.h ---------------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a number of support types that Operation and related
-// classes build on top of.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_OPERATION_SUPPORT_H
-#define MLIR_IR_OPERATION_SUPPORT_H
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/Types.h"
-#include "mlir/IR/Value.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/Support/TrailingObjects.h"
-#include <memory>
-
-namespace mlir {
-class Block;
-class Dialect;
-class Operation;
-struct OperationState;
-class OpAsmParser;
-class OpAsmParserResult;
-class OpAsmPrinter;
-class OpFoldResult;
-class ParseResult;
-class Pattern;
-class Region;
-class RewritePattern;
-class Type;
-class Value;
-class ValueRange;
-
-/// This is an adaptor from a list of values to named operands of OpTy.  In a
-/// generic operation context, e.g., in dialect conversions, an ordered array of
-/// `Value`s is treated as operands of `OpTy`.  This adaptor takes a reference
-/// to the array and provides accessors with the same names as `OpTy` for
-/// operands.  This makes possible to create function templates that operate on
-/// either OpTy or OperandAdaptor<OpTy> seamlessly.
-template <typename OpTy> using OperandAdaptor = typename OpTy::OperandAdaptor;
-
-class OwningRewritePatternList;
-
-//===----------------------------------------------------------------------===//
-// AbstractOperation
-//===----------------------------------------------------------------------===//
-
-enum class OperationProperty {
-  /// This bit is set for an operation if it is a commutative operation: that
-  /// is a binary operator (two inputs) where "a op b" and "b op a" produce the
-  /// same results.
-  Commutative = 0x1,
-
-  /// This bit is set for operations that have no side effects: that means that
-  /// they do not read or write memory, or access any hidden state.
-  NoSideEffect = 0x2,
-
-  /// This bit is set for an operation if it is a terminator: that means
-  /// an operation at the end of a block.
-  Terminator = 0x4,
-
-  /// This bit is set for operations that are completely isolated from above.
-  /// This is used for operations whose regions are explicit capture only, i.e.
-  /// they are never allowed to implicitly reference values defined above the
-  /// parent operation.
-  IsolatedFromAbove = 0x8,
-};
-
-/// This is a "type erased" representation of a registered operation.  This
-/// should only be used by things like the AsmPrinter and other things that need
-/// to be parameterized by generic operation hooks.  Most user code should use
-/// the concrete operation types.
-class AbstractOperation {
-public:
-  using OperationProperties = uint32_t;
-
-  /// This is the name of the operation.
-  const StringRef name;
-
-  /// This is the dialect that this operation belongs to.
-  Dialect &dialect;
-
-  /// Return true if this "op class" can match against the specified operation.
-  bool (&classof)(Operation *op);
-
-  /// Use the specified object to parse this ops custom assembly format.
-  ParseResult (&parseAssembly)(OpAsmParser &parser, OperationState &result);
-
-  /// This hook implements the AsmPrinter for this operation.
-  void (&printAssembly)(Operation *op, OpAsmPrinter &p);
-
-  /// This hook implements the verifier for this operation.  It should emits an
-  /// error message and returns failure if a problem is detected, or returns
-  /// success if everything is ok.
-  LogicalResult (&verifyInvariants)(Operation *op);
-
-  /// This hook implements a generalized folder for this operation.  Operations
-  /// can implement this to provide simplifications rules that are applied by
-  /// the Builder::createOrFold API and the canonicalization pass.
-  ///
-  /// This is an intentionally limited interface - implementations of this hook
-  /// can only perform the following changes to the operation:
-  ///
-  ///  1. They can leave the operation alone and without changing the IR, and
-  ///     return failure.
-  ///  2. They can mutate the operation in place, without changing anything else
-  ///     in the IR.  In this case, return success.
-  ///  3. They can return a list of existing values that can be used instead of
-  ///     the operation.  In this case, fill in the results list and return
-  ///     success.  The caller will remove the operation and use those results
-  ///     instead.
-  ///
-  /// This allows expression of some simple in-place canonicalizations (e.g.
-  /// "x+0 -> x", "min(x,y,x,z) -> min(x,y,z)", "x+y-x -> y", etc), as well as
-  /// generalized constant folding.
-  LogicalResult (&foldHook)(Operation *op, ArrayRef<Attribute> operands,
-                            SmallVectorImpl<OpFoldResult> &results);
-
-  /// This hook returns any canonicalization pattern rewrites that the operation
-  /// supports, for use by the canonicalization pass.
-  void (&getCanonicalizationPatterns)(OwningRewritePatternList &results,
-                                      MLIRContext *context);
-
-  /// Returns whether the operation has a particular property.
-  bool hasProperty(OperationProperty property) const {
-    return opProperties & static_cast<OperationProperties>(property);
-  }
-
-  /// Returns an instance of the concept object for the given interface if it
-  /// was registered to this operation, null otherwise. This should not be used
-  /// directly.
-  template <typename T> typename T::Concept *getInterface() const {
-    return reinterpret_cast<typename T::Concept *>(
-        getRawInterface(T::getInterfaceID()));
-  }
-
-  /// Returns if the operation has a particular trait.
-  template <template <typename T> class Trait> bool hasTrait() const {
-    return hasRawTrait(ClassID::getID<Trait>());
-  }
-
-  /// Look up the specified operation in the specified MLIRContext and return a
-  /// pointer to it if present.  Otherwise, return a null pointer.
-  static const AbstractOperation *lookup(StringRef opName,
-                                         MLIRContext *context);
-
-  /// This constructor is used by Dialect objects when they register the list of
-  /// operations they contain.
-  template <typename T> static AbstractOperation get(Dialect &dialect) {
-    return AbstractOperation(
-        T::getOperationName(), dialect, T::getOperationProperties(), T::classof,
-        T::parseAssembly, T::printAssembly, T::verifyInvariants, T::foldHook,
-        T::getCanonicalizationPatterns, T::getRawInterface, T::hasTrait);
-  }
-
-private:
-  AbstractOperation(
-      StringRef name, Dialect &dialect, OperationProperties opProperties,
-      bool (&classof)(Operation *op),
-      ParseResult (&parseAssembly)(OpAsmParser &parser, OperationState &result),
-      void (&printAssembly)(Operation *op, OpAsmPrinter &p),
-      LogicalResult (&verifyInvariants)(Operation *op),
-      LogicalResult (&foldHook)(Operation *op, ArrayRef<Attribute> operands,
-                                SmallVectorImpl<OpFoldResult> &results),
-      void (&getCanonicalizationPatterns)(OwningRewritePatternList &results,
-                                          MLIRContext *context),
-      void *(&getRawInterface)(ClassID *interfaceID),
-      bool (&hasTrait)(ClassID *traitID))
-      : name(name), dialect(dialect), classof(classof),
-        parseAssembly(parseAssembly), printAssembly(printAssembly),
-        verifyInvariants(verifyInvariants), foldHook(foldHook),
-        getCanonicalizationPatterns(getCanonicalizationPatterns),
-        opProperties(opProperties), getRawInterface(getRawInterface),
-        hasRawTrait(hasTrait) {}
-
-  /// The properties of the operation.
-  const OperationProperties opProperties;
-
-  /// Returns a raw instance of the concept for the given interface id if it is
-  /// registered to this operation, nullptr otherwise. This should not be used
-  /// directly.
-  void *(&getRawInterface)(ClassID *interfaceID);
-
-  /// This hook returns if the operation contains the trait corresponding
-  /// to the given ClassID.
-  bool (&hasRawTrait)(ClassID *traitID);
-};
-
-//===----------------------------------------------------------------------===//
-// OperationName
-//===----------------------------------------------------------------------===//
-
-class OperationName {
-public:
-  using RepresentationUnion =
-      llvm::PointerUnion<Identifier, const AbstractOperation *>;
-
-  OperationName(AbstractOperation *op) : representation(op) {}
-  OperationName(StringRef name, MLIRContext *context);
-
-  /// Return the name of the dialect this operation is registered to.
-  StringRef getDialect() const;
-
-  /// Return the name of this operation.  This always succeeds.
-  StringRef getStringRef() const;
-
-  /// If this operation has a registered operation description, return it.
-  /// Otherwise return null.
-  const AbstractOperation *getAbstractOperation() const;
-
-  void print(raw_ostream &os) const;
-  void dump() const;
-
-  void *getAsOpaquePointer() const {
-    return static_cast<void *>(representation.getOpaqueValue());
-  }
-  static OperationName getFromOpaquePointer(void *pointer);
-
-private:
-  RepresentationUnion representation;
-  OperationName(RepresentationUnion representation)
-      : representation(representation) {}
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, OperationName identifier) {
-  identifier.print(os);
-  return os;
-}
-
-inline bool operator==(OperationName lhs, OperationName rhs) {
-  return lhs.getAsOpaquePointer() == rhs.getAsOpaquePointer();
-}
-
-inline bool operator!=(OperationName lhs, OperationName rhs) {
-  return lhs.getAsOpaquePointer() != rhs.getAsOpaquePointer();
-}
-
-// Make operation names hashable.
-inline llvm::hash_code hash_value(OperationName arg) {
-  return llvm::hash_value(arg.getAsOpaquePointer());
-}
-
-//===----------------------------------------------------------------------===//
-// OperationState
-//===----------------------------------------------------------------------===//
-
-/// This represents an operation in an abstracted form, suitable for use with
-/// the builder APIs.  This object is a large and heavy weight object meant to
-/// be used as a temporary object on the stack.  It is generally unwise to put
-/// this in a collection.
-struct OperationState {
-  Location location;
-  OperationName name;
-  SmallVector<Value *, 4> operands;
-  /// Types of the results of this operation.
-  SmallVector<Type, 4> types;
-  SmallVector<NamedAttribute, 4> attributes;
-  /// Successors of this operation and their respective operands.
-  SmallVector<Block *, 1> successors;
-  /// Regions that the op will hold.
-  SmallVector<std::unique_ptr<Region>, 1> regions;
-  /// If the operation has a resizable operand list.
-  bool resizableOperandList = false;
-
-public:
-  OperationState(Location location, StringRef name);
-
-  OperationState(Location location, OperationName name);
-
-  OperationState(Location location, StringRef name, ValueRange operands,
-                 ArrayRef<Type> types, ArrayRef<NamedAttribute> attributes,
-                 ArrayRef<Block *> successors = {},
-                 MutableArrayRef<std::unique_ptr<Region>> regions = {},
-                 bool resizableOperandList = false);
-
-  void addOperands(ValueRange newOperands);
-
-  void addTypes(ArrayRef<Type> newTypes) {
-    types.append(newTypes.begin(), newTypes.end());
-  }
-
-  /// Add an attribute with the specified name.
-  void addAttribute(StringRef name, Attribute attr) {
-    addAttribute(Identifier::get(name, getContext()), attr);
-  }
-
-  /// Add an attribute with the specified name.
-  void addAttribute(Identifier name, Attribute attr) {
-    attributes.push_back({name, attr});
-  }
-
-  /// Add an array of named attributes.
-  void addAttributes(ArrayRef<NamedAttribute> newAttributes) {
-    attributes.append(newAttributes.begin(), newAttributes.end());
-  }
-
-  void addSuccessor(Block *successor, ValueRange succOperands);
-
-  /// Create a region that should be attached to the operation.  These regions
-  /// can be filled in immediately without waiting for Operation to be
-  /// created.  When it is, the region bodies will be transferred.
-  Region *addRegion();
-
-  /// Take a region that should be attached to the Operation.  The body of the
-  /// region will be transferred when the Operation is constructed.  If the
-  /// region is null, a new empty region will be attached to the Operation.
-  void addRegion(std::unique_ptr<Region> &&region);
-
-  /// Sets the operand list of the operation as resizable.
-  void setOperandListToResizable(bool isResizable = true) {
-    resizableOperandList = isResizable;
-  }
-
-  /// Get the context held by this operation state.
-  MLIRContext *getContext() { return location->getContext(); }
-};
-
-//===----------------------------------------------------------------------===//
-// OperandStorage
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-/// A utility class holding the information necessary to dynamically resize
-/// operands.
-struct ResizableStorage {
-  ResizableStorage(OpOperand *opBegin, unsigned numOperands)
-      : firstOpAndIsDynamic(opBegin, false), capacity(numOperands) {}
-
-  ~ResizableStorage() { cleanupStorage(); }
-
-  /// Cleanup any allocated storage.
-  void cleanupStorage() {
-    // If the storage is dynamic, then we need to free the storage.
-    if (isStorageDynamic())
-      free(firstOpAndIsDynamic.getPointer());
-  }
-
-  /// Sets the storage pointer to a new dynamically allocated block.
-  void setDynamicStorage(OpOperand *opBegin) {
-    /// Cleanup the old storage if necessary.
-    cleanupStorage();
-    firstOpAndIsDynamic.setPointerAndInt(opBegin, true);
-  }
-
-  /// Returns the current storage pointer.
-  OpOperand *getPointer() { return firstOpAndIsDynamic.getPointer(); }
-
-  /// Returns if the current storage of operands is in the trailing objects is
-  /// in a dynamically allocated memory block.
-  bool isStorageDynamic() const { return firstOpAndIsDynamic.getInt(); }
-
-  /// A pointer to the first operand element. This is either to the trailing
-  /// objects storage, or a dynamically allocated block of memory.
-  llvm::PointerIntPair<OpOperand *, 1, bool> firstOpAndIsDynamic;
-
-  // The maximum number of operands that can be currently held by the storage.
-  unsigned capacity;
-};
-
-/// This class handles the management of operation operands. Operands are
-/// stored similarly to the elements of a SmallVector except for two key
-/// differences. The first is the inline storage, which is a trailing objects
-/// array. The second is that being able to dynamically resize the operand list
-/// is optional.
-class OperandStorage final
-    : private llvm::TrailingObjects<OperandStorage, ResizableStorage,
-                                    OpOperand> {
-public:
-  OperandStorage(unsigned numOperands, bool resizable)
-      : numOperands(numOperands), resizable(resizable) {
-    // Initialize the resizable storage.
-    if (resizable) {
-      new (&getResizableStorage())
-          ResizableStorage(getTrailingObjects<OpOperand>(), numOperands);
-    }
-  }
-
-  ~OperandStorage() {
-    // Manually destruct the operands.
-    for (auto &operand : getOperands())
-      operand.~OpOperand();
-
-    // If the storage is resizable then destruct the utility.
-    if (resizable)
-      getResizableStorage().~ResizableStorage();
-  }
-
-  /// Replace the operands contained in the storage with the ones provided in
-  /// 'operands'.
-  void setOperands(Operation *owner, ValueRange operands);
-
-  /// Erase an operand held by the storage.
-  void eraseOperand(unsigned index);
-
-  /// Get the operation operands held by the storage.
-  MutableArrayRef<OpOperand> getOperands() {
-    return {getRawOperands(), size()};
-  }
-
-  /// Return the number of operands held in the storage.
-  unsigned size() const { return numOperands; }
-
-  /// Returns the additional size necessary for allocating this object.
-  static size_t additionalAllocSize(unsigned numOperands, bool resizable) {
-    return additionalSizeToAlloc<ResizableStorage, OpOperand>(resizable ? 1 : 0,
-                                                              numOperands);
-  }
-
-  /// Returns if this storage is resizable.
-  bool isResizable() const { return resizable; }
-
-private:
-  /// Clear the storage and destroy the current operands held by the storage.
-  void clear() { numOperands = 0; }
-
-  /// Returns the current pointer for the raw operands array.
-  OpOperand *getRawOperands() {
-    return resizable ? getResizableStorage().getPointer()
-                     : getTrailingObjects<OpOperand>();
-  }
-
-  /// Returns the resizable operand utility class.
-  ResizableStorage &getResizableStorage() {
-    assert(resizable);
-    return *getTrailingObjects<ResizableStorage>();
-  }
-
-  /// Grow the internal resizable operand storage.
-  void grow(ResizableStorage &resizeUtil, size_t minSize);
-
-  /// The current number of operands, and the current max operand capacity.
-  unsigned numOperands : 31;
-
-  /// Whether this storage is resizable or not.
-  bool resizable : 1;
-
-  // This stuff is used by the TrailingObjects template.
-  friend llvm::TrailingObjects<OperandStorage, ResizableStorage, OpOperand>;
-  size_t numTrailingObjects(OverloadToken<ResizableStorage>) const {
-    return resizable ? 1 : 0;
-  }
-};
-} // end namespace detail
-
-//===----------------------------------------------------------------------===//
-// OpPrintingFlags
-//===----------------------------------------------------------------------===//
-
-/// Set of flags used to control the behavior of the various IR print methods
-/// (e.g. Operation::Print).
-class OpPrintingFlags {
-public:
-  OpPrintingFlags();
-  OpPrintingFlags(llvm::NoneType) : OpPrintingFlags() {}
-
-  /// Enable the elision of large elements attributes, by printing a '...'
-  /// instead of the element data. Note: The IR generated with this option is
-  /// not parsable. `largeElementLimit` is used to configure what is considered
-  /// to be a "large" ElementsAttr by providing an upper limit to the number of
-  /// elements.
-  OpPrintingFlags &elideLargeElementsAttrs(int64_t largeElementLimit = 16);
-
-  /// Enable printing of debug information. If 'prettyForm' is set to true,
-  /// debug information is printed in a more readable 'pretty' form. Note: The
-  /// IR generated with 'prettyForm' is not parsable.
-  OpPrintingFlags &enableDebugInfo(bool prettyForm = false);
-
-  /// Always print operations in the generic form.
-  OpPrintingFlags &printGenericOpForm();
-
-  /// Use local scope when printing the operation. This allows for using the
-  /// printer in a more localized and thread-safe setting, but may not
-  /// necessarily be identical to what the IR will look like when dumping
-  /// the full module.
-  OpPrintingFlags &useLocalScope();
-
-  /// Return if the given ElementsAttr should be elided.
-  bool shouldElideElementsAttr(ElementsAttr attr) const;
-
-  /// Return if debug information should be printed.
-  bool shouldPrintDebugInfo() const;
-
-  /// Return if debug information should be printed in the pretty form.
-  bool shouldPrintDebugInfoPrettyForm() const;
-
-  /// Return if operations should be printed in the generic form.
-  bool shouldPrintGenericOpForm() const;
-
-  /// Return if the printer should use local scope when dumping the IR.
-  bool shouldUseLocalScope() const;
-
-private:
-  /// Elide large elements attributes if the number of elements is larger than
-  /// the upper limit.
-  llvm::Optional<int64_t> elementsAttrElementLimit;
-
-  /// Print debug information.
-  bool printDebugInfoFlag : 1;
-  bool printDebugInfoPrettyFormFlag : 1;
-
-  /// Print operations in the generic form.
-  bool printGenericOpFormFlag : 1;
-
-  /// Print operations with numberings local to the current operation.
-  bool printLocalScope : 1;
-};
-
-//===----------------------------------------------------------------------===//
-// Operation Value-Iterators
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ValueTypeRange
-
-/// This class implements iteration on the types of a given range of values.
-template <typename ValueIteratorT>
-class ValueTypeIterator final
-    : public llvm::mapped_iterator<ValueIteratorT, Type (*)(Value *)> {
-  static Type unwrap(Value *value) { return value->getType(); }
-
-public:
-  using reference = Type;
-
-  /// Provide a const dereference method.
-  Type operator*() const { return unwrap(*this->I); }
-
-  /// Initializes the type iterator to the specified value iterator.
-  ValueTypeIterator(ValueIteratorT it)
-      : llvm::mapped_iterator<ValueIteratorT, Type (*)(Value *)>(it, &unwrap) {}
-};
-
-//===----------------------------------------------------------------------===//
-// OperandRange
-
-/// This class implements the operand iterators for the Operation class.
-class OperandRange final
-    : public detail::indexed_accessor_range_base<OperandRange, OpOperand *,
-                                                 Value *, Value *, Value *> {
-public:
-  using RangeBaseT::RangeBaseT;
-  OperandRange(Operation *op);
-
-  /// Returns the types of the values within this range.
-  using type_iterator = ValueTypeIterator<iterator>;
-  iterator_range<type_iterator> getTypes() const { return {begin(), end()}; }
-
-private:
-  /// See `detail::indexed_accessor_range_base` for details.
-  static OpOperand *offset_base(OpOperand *object, ptrdiff_t index) {
-    return object + index;
-  }
-  /// See `detail::indexed_accessor_range_base` for details.
-  static Value *dereference_iterator(OpOperand *object, ptrdiff_t index) {
-    return object[index].get();
-  }
-
-  /// Allow access to `offset_base` and `dereference_iterator`.
-  friend RangeBaseT;
-};
-
-//===----------------------------------------------------------------------===//
-// ResultRange
-
-/// This class implements the result iterators for the Operation class.
-class ResultRange final
-    : public detail::indexed_accessor_range_base<ResultRange, OpResult *,
-                                                 Value *, Value *, Value *> {
-public:
-  using RangeBaseT::RangeBaseT;
-  ResultRange(Operation *op);
-
-  /// Returns the types of the values within this range.
-  using type_iterator = ValueTypeIterator<iterator>;
-  iterator_range<type_iterator> getTypes() const { return {begin(), end()}; }
-
-private:
-  /// See `detail::indexed_accessor_range_base` for details.
-  static OpResult *offset_base(OpResult *object, ptrdiff_t index) {
-    return object + index;
-  }
-  /// See `detail::indexed_accessor_range_base` for details.
-  static Value *dereference_iterator(OpResult *object, ptrdiff_t index) {
-    return &object[index];
-  }
-
-  /// Allow access to `offset_base` and `dereference_iterator`.
-  friend RangeBaseT;
-};
-
-//===----------------------------------------------------------------------===//
-// ValueRange
-
-/// This class provides an abstraction over the different types of ranges over
-/// Value*s. In many cases, this prevents the need to explicitly materialize a
-/// SmallVector/std::vector. This class should be used in places that are not
-/// suitable for a more derived type (e.g. ArrayRef) or a template range
-/// parameter.
-class ValueRange final
-    : public detail::indexed_accessor_range_base<
-          ValueRange,
-          llvm::PointerUnion<Value *const *, OpOperand *, OpResult *>, Value *,
-          Value *, Value *> {
-public:
-  using RangeBaseT::RangeBaseT;
-
-  template <typename Arg,
-            typename = typename std::enable_if_t<
-                std::is_constructible<ArrayRef<Value *>, Arg>::value &&
-                !std::is_convertible<Arg, Value *>::value>>
-  ValueRange(Arg &&arg)
-      : ValueRange(ArrayRef<Value *>(std::forward<Arg>(arg))) {}
-  ValueRange(Value *const &value) : ValueRange(&value, /*count=*/1) {}
-  ValueRange(const std::initializer_list<Value *> &values)
-      : ValueRange(ArrayRef<Value *>(values)) {}
-  ValueRange(iterator_range<OperandRange::iterator> values)
-      : ValueRange(OperandRange(values)) {}
-  ValueRange(iterator_range<ResultRange::iterator> values)
-      : ValueRange(ResultRange(values)) {}
-  ValueRange(ArrayRef<Value *> values = llvm::None);
-  ValueRange(OperandRange values);
-  ValueRange(ResultRange values);
-
-  /// Returns the types of the values within this range.
-  using type_iterator = ValueTypeIterator<iterator>;
-  iterator_range<type_iterator> getTypes() const { return {begin(), end()}; }
-
-private:
-  /// The type representing the owner of this range. This is either a list of
-  /// values, operands, or results.
-  using OwnerT = llvm::PointerUnion<Value *const *, OpOperand *, OpResult *>;
-
-  /// See `detail::indexed_accessor_range_base` for details.
-  static OwnerT offset_base(const OwnerT &owner, ptrdiff_t index);
-  /// See `detail::indexed_accessor_range_base` for details.
-  static Value *dereference_iterator(const OwnerT &owner, ptrdiff_t index);
-
-  /// Allow access to `offset_base` and `dereference_iterator`.
-  friend RangeBaseT;
-};
-} // end namespace mlir
-
-namespace llvm {
-// Identifiers hash just like pointers, there is no need to hash the bytes.
-template <> struct DenseMapInfo<mlir::OperationName> {
-  static mlir::OperationName getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::OperationName::getFromOpaquePointer(pointer);
-  }
-  static mlir::OperationName getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::OperationName::getFromOpaquePointer(pointer);
-  }
-  static unsigned getHashValue(mlir::OperationName Val) {
-    return DenseMapInfo<void *>::getHashValue(Val.getAsOpaquePointer());
-  }
-  static bool isEqual(mlir::OperationName LHS, mlir::OperationName RHS) {
-    return LHS == RHS;
-  }
-};
-
-/// The pointer inside of an identifier comes from a StringMap, so its alignment
-/// is always at least 4 and probably 8 (on 64-bit machines).  Allow LLVM to
-/// steal the low bits.
-template <> struct PointerLikeTypeTraits<mlir::OperationName> {
-public:
-  static inline void *getAsVoidPointer(mlir::OperationName I) {
-    return const_cast<void *>(I.getAsOpaquePointer());
-  }
-  static inline mlir::OperationName getFromVoidPointer(void *P) {
-    return mlir::OperationName::getFromOpaquePointer(P);
-  }
-  enum {
-    NumLowBitsAvailable = PointerLikeTypeTraits<
-        mlir::OperationName::RepresentationUnion>::NumLowBitsAvailable
-  };
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/PatternMatch.h b/third_party/mlir/include/mlir/IR/PatternMatch.h
deleted file mode 100644
index 4805152cf4c..00000000000
--- a/third_party/mlir/include/mlir/IR/PatternMatch.h
+++ /dev/null
@@ -1,486 +0,0 @@
-//===- PatternMatch.h - PatternMatcher classes -------==---------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_PATTERNMATCHER_H
-#define MLIR_PATTERNMATCHER_H
-
-#include "mlir/IR/Builders.h"
-
-namespace mlir {
-
-class PatternRewriter;
-
-//===----------------------------------------------------------------------===//
-// PatternBenefit class
-//===----------------------------------------------------------------------===//
-
-/// This class represents the benefit of a pattern match in a unitless scheme
-/// that ranges from 0 (very little benefit) to 65K.  The most common unit to
-/// use here is the "number of operations matched" by the pattern.
-///
-/// This also has a sentinel representation that can be used for patterns that
-/// fail to match.
-///
-class PatternBenefit {
-  enum { ImpossibleToMatchSentinel = 65535 };
-
-public:
-  /*implicit*/ PatternBenefit(unsigned benefit);
-  PatternBenefit(const PatternBenefit &) = default;
-  PatternBenefit &operator=(const PatternBenefit &) = default;
-
-  static PatternBenefit impossibleToMatch() { return PatternBenefit(); }
-  bool isImpossibleToMatch() const { return *this == impossibleToMatch(); }
-
-  /// If the corresponding pattern can match, return its benefit.  If the
-  // corresponding pattern isImpossibleToMatch() then this aborts.
-  unsigned short getBenefit() const;
-
-  bool operator==(const PatternBenefit &rhs) const {
-    return representation == rhs.representation;
-  }
-  bool operator!=(const PatternBenefit &rhs) const { return !(*this == rhs); }
-  bool operator<(const PatternBenefit &rhs) const {
-    return representation < rhs.representation;
-  }
-
-private:
-  PatternBenefit() : representation(ImpossibleToMatchSentinel) {}
-  unsigned short representation;
-};
-
-/// Pattern state is used by patterns that want to maintain state between their
-/// match and rewrite phases.  Patterns can define a pattern-specific subclass
-/// of this.
-class PatternState {
-public:
-  virtual ~PatternState() {}
-
-protected:
-  // Must be subclassed.
-  PatternState() {}
-};
-
-/// This is the type returned by a pattern match.  A match failure returns a
-/// None value.  A match success returns a Some value with any state the pattern
-/// may need to maintain (but may also be null).
-using PatternMatchResult = Optional<std::unique_ptr<PatternState>>;
-
-//===----------------------------------------------------------------------===//
-// Pattern class
-//===----------------------------------------------------------------------===//
-
-/// Instances of Pattern can be matched against SSA IR.  These matches get used
-/// in ways dependent on their subclasses and the driver doing the matching.
-/// For example, RewritePatterns implement a rewrite from one matched pattern
-/// to a replacement DAG tile.
-class Pattern {
-public:
-  /// Return the benefit (the inverse of "cost") of matching this pattern.  The
-  /// benefit of a Pattern is always static - rewrites that may have dynamic
-  /// benefit can be instantiated multiple times (different Pattern instances)
-  /// for each benefit that they may return, and be guarded by different match
-  /// condition predicates.
-  PatternBenefit getBenefit() const { return benefit; }
-
-  /// Return the root node that this pattern matches.  Patterns that can
-  /// match multiple root types are instantiated once per root.
-  OperationName getRootKind() const { return rootKind; }
-
-  //===--------------------------------------------------------------------===//
-  // Implementation hooks for patterns to implement.
-  //===--------------------------------------------------------------------===//
-
-  /// Attempt to match against code rooted at the specified operation,
-  /// which is the same operation code as getRootKind().  On failure, this
-  /// returns a None value.  On success it returns a (possibly null)
-  /// pattern-specific state wrapped in an Optional.
-  virtual PatternMatchResult match(Operation *op) const = 0;
-
-  virtual ~Pattern() {}
-
-  //===--------------------------------------------------------------------===//
-  // Helper methods to simplify pattern implementations
-  //===--------------------------------------------------------------------===//
-
-  /// This method indicates that no match was found.
-  static PatternMatchResult matchFailure() { return None; }
-
-  /// This method indicates that a match was found and has the specified cost.
-  PatternMatchResult
-  matchSuccess(std::unique_ptr<PatternState> state = {}) const {
-    return PatternMatchResult(std::move(state));
-  }
-
-protected:
-  /// Patterns must specify the root operation name they match against, and can
-  /// also specify the benefit of the pattern matching.
-  Pattern(StringRef rootName, PatternBenefit benefit, MLIRContext *context);
-
-private:
-  const OperationName rootKind;
-  const PatternBenefit benefit;
-
-  virtual void anchor();
-};
-
-/// RewritePattern is the common base class for all DAG to DAG replacements.
-/// There are two possible usages of this class:
-///   * Multi-step RewritePattern with "match" and "rewrite"
-///     - By overloading the "match" and "rewrite" functions, the user can
-///       separate the concerns of matching and rewriting.
-///   * Single-step RewritePattern with "matchAndRewrite"
-///     - By overloading the "matchAndRewrite" function, the user can perform
-///       the rewrite in the same call as the match. This removes the need for
-///       any PatternState.
-///
-class RewritePattern : public Pattern {
-public:
-  /// Rewrite the IR rooted at the specified operation with the result of
-  /// this pattern, generating any new operations with the specified
-  /// rewriter.  If an unexpected error is encountered (an internal
-  /// compiler error), it is emitted through the normal MLIR diagnostic
-  /// hooks and the IR is left in a valid state.
-  virtual void rewrite(Operation *op, std::unique_ptr<PatternState> state,
-                       PatternRewriter &rewriter) const;
-
-  /// Rewrite the IR rooted at the specified operation with the result of
-  /// this pattern, generating any new operations with the specified
-  /// builder.  If an unexpected error is encountered (an internal
-  /// compiler error), it is emitted through the normal MLIR diagnostic
-  /// hooks and the IR is left in a valid state.
-  virtual void rewrite(Operation *op, PatternRewriter &rewriter) const;
-
-  /// Attempt to match against code rooted at the specified operation,
-  /// which is the same operation code as getRootKind().  On failure, this
-  /// returns a None value.  On success, it returns a (possibly null)
-  /// pattern-specific state wrapped in an Optional.  This state is passed back
-  /// into the rewrite function if this match is selected.
-  PatternMatchResult match(Operation *op) const override;
-
-  /// Attempt to match against code rooted at the specified operation,
-  /// which is the same operation code as getRootKind(). If successful, this
-  /// function will automatically perform the rewrite.
-  virtual PatternMatchResult matchAndRewrite(Operation *op,
-                                             PatternRewriter &rewriter) const {
-    if (auto matchResult = match(op)) {
-      rewrite(op, std::move(*matchResult), rewriter);
-      return matchSuccess();
-    }
-    return matchFailure();
-  }
-
-  /// Return a list of operations that may be generated when rewriting an
-  /// operation instance with this pattern.
-  ArrayRef<OperationName> getGeneratedOps() const { return generatedOps; }
-
-protected:
-  /// Patterns must specify the root operation name they match against, and can
-  /// also specify the benefit of the pattern matching.
-  RewritePattern(StringRef rootName, PatternBenefit benefit,
-                 MLIRContext *context)
-      : Pattern(rootName, benefit, context) {}
-  /// Patterns must specify the root operation name they match against, and can
-  /// also specify the benefit of the pattern matching. They can also specify
-  /// the names of operations that may be generated during a successful rewrite.
-  RewritePattern(StringRef rootName, ArrayRef<StringRef> generatedNames,
-                 PatternBenefit benefit, MLIRContext *context);
-
-  /// A list of the potential operations that may be generated when rewriting
-  /// an op with this pattern.
-  llvm::SmallVector<OperationName, 2> generatedOps;
-};
-
-/// OpRewritePattern is a wrapper around RewritePattern that allows for
-/// matching and rewriting against an instance of a derived operation class as
-/// opposed to a raw Operation.
-template <typename SourceOp> struct OpRewritePattern : public RewritePattern {
-  /// Patterns must specify the root operation name they match against, and can
-  /// also specify the benefit of the pattern matching.
-  OpRewritePattern(MLIRContext *context, PatternBenefit benefit = 1)
-      : RewritePattern(SourceOp::getOperationName(), benefit, context) {}
-
-  /// Wrappers around the RewritePattern methods that pass the derived op type.
-  void rewrite(Operation *op, std::unique_ptr<PatternState> state,
-               PatternRewriter &rewriter) const final {
-    rewrite(llvm::cast<SourceOp>(op), std::move(state), rewriter);
-  }
-  void rewrite(Operation *op, PatternRewriter &rewriter) const final {
-    rewrite(llvm::cast<SourceOp>(op), rewriter);
-  }
-  PatternMatchResult match(Operation *op) const final {
-    return match(llvm::cast<SourceOp>(op));
-  }
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const final {
-    return matchAndRewrite(llvm::cast<SourceOp>(op), rewriter);
-  }
-
-  /// Rewrite and Match methods that operate on the SourceOp type. These must be
-  /// overridden by the derived pattern class.
-  virtual void rewrite(SourceOp op, std::unique_ptr<PatternState> state,
-                       PatternRewriter &rewriter) const {
-    rewrite(op, rewriter);
-  }
-  virtual void rewrite(SourceOp op, PatternRewriter &rewriter) const {
-    llvm_unreachable("must override matchAndRewrite or a rewrite method");
-  }
-  virtual PatternMatchResult match(SourceOp op) const {
-    llvm_unreachable("must override match or matchAndRewrite");
-  }
-  virtual PatternMatchResult matchAndRewrite(SourceOp op,
-                                             PatternRewriter &rewriter) const {
-    if (auto matchResult = match(op)) {
-      rewrite(op, std::move(*matchResult), rewriter);
-      return matchSuccess();
-    }
-    return matchFailure();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// PatternRewriter class
-//===----------------------------------------------------------------------===//
-
-/// This class coordinates the application of a pattern to the current function,
-/// providing a way to create operations and keep track of what gets deleted.
-///
-/// These class serves two purposes:
-///  1) it is the interface that patterns interact with to make mutations to the
-///     IR they are being applied to.
-///  2) It is a base class that clients of the PatternMatcher use when they want
-///     to apply patterns and observe their effects (e.g. to keep worklists or
-///     other data structures up to date).
-///
-class PatternRewriter : public OpBuilder {
-public:
-  /// Create operation of specific op type at the current insertion point
-  /// without verifying to see if it is valid.
-  template <typename OpTy, typename... Args>
-  OpTy create(Location location, Args... args) {
-    OperationState state(location, OpTy::getOperationName());
-    OpTy::build(this, state, args...);
-    auto *op = createOperation(state);
-    auto result = dyn_cast<OpTy>(op);
-    assert(result && "Builder didn't return the right type");
-    return result;
-  }
-
-  /// Creates an operation of specific op type at the current insertion point.
-  /// If the result is an invalid op (the verifier hook fails), emit an error
-  /// and return null.
-  template <typename OpTy, typename... Args>
-  OpTy createChecked(Location location, Args... args) {
-    OperationState state(location, OpTy::getOperationName());
-    OpTy::build(this, state, args...);
-    auto *op = createOperation(state);
-
-    // If the Operation we produce is valid, return it.
-    if (!OpTy::verifyInvariants(op)) {
-      auto result = dyn_cast<OpTy>(op);
-      assert(result && "Builder didn't return the right type");
-      return result;
-    }
-
-    // Otherwise, the error message got emitted.  Just remove the operation
-    // we made.
-    op->erase();
-    return OpTy();
-  }
-
-  /// This is implemented to insert the specified operation and serves as a
-  /// notification hook for rewriters that want to know about new operations.
-  virtual Operation *insert(Operation *op) = 0;
-
-  /// Move the blocks that belong to "region" before the given position in
-  /// another region "parent". The two regions must be different. The caller
-  /// is responsible for creating or updating the operation transferring flow
-  /// of control to the region and passing it the correct block arguments.
-  virtual void inlineRegionBefore(Region &region, Region &parent,
-                                  Region::iterator before);
-  void inlineRegionBefore(Region &region, Block *before);
-
-  /// Clone the blocks that belong to "region" before the given position in
-  /// another region "parent". The two regions must be different. The caller is
-  /// responsible for creating or updating the operation transferring flow of
-  /// control to the region and passing it the correct block arguments.
-  virtual void cloneRegionBefore(Region &region, Region &parent,
-                                 Region::iterator before,
-                                 BlockAndValueMapping &mapping);
-  void cloneRegionBefore(Region &region, Region &parent,
-                         Region::iterator before);
-  void cloneRegionBefore(Region &region, Block *before);
-
-  /// This method performs the final replacement for a pattern, where the
-  /// results of the operation are updated to use the specified list of SSA
-  /// values.  In addition to replacing and removing the specified operation,
-  /// clients can specify a list of other nodes that this replacement may make
-  /// (perhaps transitively) dead.  If any of those values are dead, this will
-  /// remove them as well.
-  virtual void replaceOp(Operation *op, ValueRange newValues,
-                         ValueRange valuesToRemoveIfDead);
-  void replaceOp(Operation *op, ValueRange newValues) {
-    replaceOp(op, newValues, llvm::None);
-  }
-
-  /// Replaces the result op with a new op that is created without verification.
-  /// The result values of the two ops must be the same types.
-  template <typename OpTy, typename... Args>
-  void replaceOpWithNewOp(Operation *op, Args &&... args) {
-    auto newOp = create<OpTy>(op->getLoc(), std::forward<Args>(args)...);
-    replaceOpWithResultsOfAnotherOp(op, newOp.getOperation(), {});
-  }
-
-  /// Replaces the result op with a new op that is created without verification.
-  /// The result values of the two ops must be the same types.  This allows
-  /// specifying a list of ops that may be removed if dead.
-  template <typename OpTy, typename... Args>
-  void replaceOpWithNewOp(ValueRange valuesToRemoveIfDead, Operation *op,
-                          Args &&... args) {
-    auto newOp = create<OpTy>(op->getLoc(), std::forward<Args>(args)...);
-    replaceOpWithResultsOfAnotherOp(op, newOp.getOperation(),
-                                    valuesToRemoveIfDead);
-  }
-
-  /// This method erases an operation that is known to have no uses.
-  virtual void eraseOp(Operation *op);
-
-  /// Merge the operations of block 'source' into the end of block 'dest'.
-  /// 'source's predecessors must either be empty or only contain 'dest`.
-  /// 'argValues' is used to replace the block arguments of 'source' after
-  /// merging.
-  virtual void mergeBlocks(Block *source, Block *dest,
-                           ValueRange argValues = llvm::None);
-
-  /// Split the operations starting at "before" (inclusive) out of the given
-  /// block into a new block, and return it.
-  virtual Block *splitBlock(Block *block, Block::iterator before);
-
-  /// This method is used as the final notification hook for patterns that end
-  /// up modifying the pattern root in place, by changing its operands.  This is
-  /// a minor efficiency win (it avoids creating a new operation and removing
-  /// the old one) but also often allows simpler code in the client.
-  ///
-  /// The valuesToRemoveIfDead list is an optional list of values that the
-  /// rewriter should remove if they are dead at this point.
-  ///
-  void updatedRootInPlace(Operation *op, ValueRange valuesToRemoveIfDead = {});
-
-protected:
-  explicit PatternRewriter(MLIRContext *ctx) : OpBuilder(ctx) {}
-  virtual ~PatternRewriter();
-
-  // These are the callback methods that subclasses can choose to implement if
-  // they would like to be notified about certain types of mutations.
-
-  /// Notify the pattern rewriter that the specified operation has been mutated
-  /// in place.  This is called after the mutation is done.
-  virtual void notifyRootUpdated(Operation *op) {}
-
-  /// Notify the pattern rewriter that the specified operation is about to be
-  /// replaced with another set of operations.  This is called before the uses
-  /// of the operation have been changed.
-  virtual void notifyRootReplaced(Operation *op) {}
-
-  /// This is called on an operation that a pattern match is removing, right
-  /// before the operation is deleted.  At this point, the operation has zero
-  /// uses.
-  virtual void notifyOperationRemoved(Operation *op) {}
-
-private:
-  /// op and newOp are known to have the same number of results, replace the
-  /// uses of op with uses of newOp
-  void replaceOpWithResultsOfAnotherOp(Operation *op, Operation *newOp,
-                                       ValueRange valuesToRemoveIfDead);
-};
-
-//===----------------------------------------------------------------------===//
-// Pattern-driven rewriters
-//===----------------------------------------------------------------------===//
-
-class OwningRewritePatternList {
-  using PatternListT = std::vector<std::unique_ptr<RewritePattern>>;
-
-public:
-  PatternListT::iterator begin() { return patterns.begin(); }
-  PatternListT::iterator end() { return patterns.end(); }
-  PatternListT::const_iterator begin() const { return patterns.begin(); }
-  PatternListT::const_iterator end() const { return patterns.end(); }
-  void clear() { patterns.clear(); }
-
-  //===--------------------------------------------------------------------===//
-  // Pattern Insertion
-  //===--------------------------------------------------------------------===//
-
-  /// Add an instance of each of the pattern types 'Ts' to the pattern list with
-  /// the given arguments.
-  /// Note: ConstructorArg is necessary here to separate the two variadic lists.
-  template <typename... Ts, typename ConstructorArg,
-            typename... ConstructorArgs,
-            typename = std::enable_if_t<sizeof...(Ts) != 0>>
-  void insert(ConstructorArg &&arg, ConstructorArgs &&... args) {
-    // The following expands a call to emplace_back for each of the pattern
-    // types 'Ts'. This magic is necessary due to a limitation in the places
-    // that a parameter pack can be expanded in c++11.
-    // FIXME: In c++17 this can be simplified by using 'fold expressions'.
-    using dummy = int[];
-    (void)dummy{
-        0, (patterns.emplace_back(std::make_unique<Ts>(arg, args...)), 0)...};
-  }
-
-private:
-  PatternListT patterns;
-};
-
-/// This class manages optimization and execution of a group of rewrite
-/// patterns, providing an API for finding and applying, the best match against
-/// a given node.
-///
-class RewritePatternMatcher {
-public:
-  /// Create a RewritePatternMatcher with the specified set of patterns.
-  explicit RewritePatternMatcher(const OwningRewritePatternList &patterns);
-
-  /// Try to match the given operation to a pattern and rewrite it. Return
-  /// true if any pattern matches.
-  bool matchAndRewrite(Operation *op, PatternRewriter &rewriter);
-
-private:
-  RewritePatternMatcher(const RewritePatternMatcher &) = delete;
-  void operator=(const RewritePatternMatcher &) = delete;
-
-  /// The group of patterns that are matched for optimization through this
-  /// matcher.
-  std::vector<RewritePattern *> patterns;
-};
-
-/// Rewrite the regions of the specified operation, which must be isolated from
-/// above, by repeatedly applying the highest benefit patterns in a greedy
-/// work-list driven manner. Return true if no more patterns can be matched in
-/// the result operation regions.
-/// Note: This does not apply patterns to the top-level operation itself.
-/// Note: These methods also perform folding and simple dead-code elimination
-///       before attempting to match any of the provided patterns.
-///
-bool applyPatternsGreedily(Operation *op,
-                           const OwningRewritePatternList &patterns);
-/// Rewrite the given regions, which must be isolated from above.
-bool applyPatternsGreedily(MutableArrayRef<Region> regions,
-                           const OwningRewritePatternList &patterns);
-} // end namespace mlir
-
-#endif // MLIR_PATTERN_MATCH_H
diff --git a/third_party/mlir/include/mlir/IR/Region.h b/third_party/mlir/include/mlir/IR/Region.h
deleted file mode 100644
index 27b20c2eaf6..00000000000
--- a/third_party/mlir/include/mlir/IR/Region.h
+++ /dev/null
@@ -1,202 +0,0 @@
-//===- Region.h - MLIR Region Class -----------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the Region class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_REGION_H
-#define MLIR_IR_REGION_H
-
-#include "mlir/IR/Block.h"
-
-namespace mlir {
-class BlockAndValueMapping;
-
-/// This class contains a list of basic blocks and a link to the parent
-/// operation it is attached to.
-class Region {
-public:
-  Region() = default;
-  explicit Region(Operation *container);
-  ~Region();
-
-  /// Return the context this region is inserted in.  The region must have a
-  /// valid parent container.
-  MLIRContext *getContext();
-
-  /// Return a location for this region. This is the location attached to the
-  /// parent container. The region must have a valid parent container.
-  Location getLoc();
-
-  using BlockListType = llvm::iplist<Block>;
-  BlockListType &getBlocks() { return blocks; }
-
-  // Iteration over the blocks in the region.
-  using iterator = BlockListType::iterator;
-  using reverse_iterator = BlockListType::reverse_iterator;
-
-  iterator begin() { return blocks.begin(); }
-  iterator end() { return blocks.end(); }
-  reverse_iterator rbegin() { return blocks.rbegin(); }
-  reverse_iterator rend() { return blocks.rend(); }
-
-  bool empty() { return blocks.empty(); }
-  void push_back(Block *block) { blocks.push_back(block); }
-  void push_front(Block *block) { blocks.push_front(block); }
-
-  Block &back() { return blocks.back(); }
-  Block &front() { return blocks.front(); }
-
-  /// getSublistAccess() - Returns pointer to member of region.
-  static BlockListType Region::*getSublistAccess(Block *) {
-    return &Region::blocks;
-  }
-
-  /// Return the region containing this region or nullptr if the region is
-  /// attached to a top-level operation.
-  Region *getParentRegion();
-
-  /// Return the parent operation this region is attached to.
-  Operation *getParentOp();
-
-  /// Find the first parent operation of the given type, or nullptr if there is
-  /// no ancestor operation.
-  template <typename ParentT> ParentT getParentOfType() {
-    auto *region = this;
-    do {
-      if (auto parent = dyn_cast_or_null<ParentT>(region->container))
-        return parent;
-    } while ((region = region->getParentRegion()));
-    return ParentT();
-  }
-
-  /// Return the number of this region in the parent operation.
-  unsigned getRegionNumber();
-
-  /// Return true if this region is a proper ancestor of the `other` region.
-  bool isProperAncestor(Region *other);
-
-  /// Return true if this region is ancestor of the `other` region.  A region
-  /// is considered as its own ancestor, use `isProperAncestor` to avoid this.
-  bool isAncestor(Region *other) {
-    return this == other || isProperAncestor(other);
-  }
-
-  /// Clone the internal blocks from this region into dest. Any
-  /// cloned blocks are appended to the back of dest. If the mapper
-  /// contains entries for block arguments, these arguments are not included
-  /// in the respective cloned block.
-  void cloneInto(Region *dest, BlockAndValueMapping &mapper);
-  /// Clone this region into 'dest' before the given position in 'dest'.
-  void cloneInto(Region *dest, Region::iterator destPos,
-                 BlockAndValueMapping &mapper);
-
-  /// Takes body of another region (that region will have no body after this
-  /// operation completes).  The current body of this region is cleared.
-  void takeBody(Region &other) {
-    blocks.clear();
-    blocks.splice(blocks.end(), other.getBlocks());
-  }
-
-  /// Check that this does not use any value defined outside it.
-  /// Emit errors if `noteLoc` is provided; this location is used to point
-  /// to the operation containing the region, the actual error is reported at
-  /// the operation with an offending use.
-  bool isIsolatedFromAbove(llvm::Optional<Location> noteLoc = llvm::None);
-
-  /// Drop all operand uses from operations within this region, which is
-  /// an essential step in breaking cyclic dependences between references when
-  /// they are to be deleted.
-  void dropAllReferences();
-
-  /// Walk the operations in this region in postorder, calling the callback for
-  /// each operation. This method is invoked for void-returning callbacks.
-  /// See Operation::walk for more details.
-  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
-  typename std::enable_if<std::is_same<RetT, void>::value, RetT>::type
-  walk(FnT &&callback) {
-    for (auto &block : *this)
-      block.walk(callback);
-  }
-
-  /// Walk the operations in this region in postorder, calling the callback for
-  /// each operation. This method is invoked for interruptible callbacks.
-  /// See Operation::walk for more details.
-  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
-  typename std::enable_if<std::is_same<RetT, WalkResult>::value, RetT>::type
-  walk(FnT &&callback) {
-    for (auto &block : *this)
-      if (block.walk(callback).wasInterrupted())
-        return WalkResult::interrupt();
-    return WalkResult::advance();
-  }
-
-  /// Displays the CFG in a window. This is for use from the debugger and
-  /// depends on Graphviz to generate the graph.
-  /// This function is defined in ViewRegionGraph and only works with that
-  /// target linked.
-  void viewGraph(const llvm::Twine &regionName);
-  void viewGraph();
-
-private:
-  BlockListType blocks;
-
-  /// This is the object we are part of.
-  Operation *container;
-};
-
-/// This class provides an abstraction over the different types of ranges over
-/// Regions. In many cases, this prevents the need to explicitly materialize a
-/// SmallVector/std::vector. This class should be used in places that are not
-/// suitable for a more derived type (e.g. ArrayRef) or a template range
-/// parameter.
-class RegionRange
-    : public detail::indexed_accessor_range_base<
-          RegionRange,
-          llvm::PointerUnion<Region *, const std::unique_ptr<Region> *>,
-          Region *, Region *, Region *> {
-  /// The type representing the owner of this range. This is either a list of
-  /// values, operands, or results.
-  using OwnerT = llvm::PointerUnion<Region *, const std::unique_ptr<Region> *>;
-
-public:
-  using RangeBaseT::RangeBaseT;
-
-  RegionRange(MutableArrayRef<Region> regions = llvm::None);
-
-  template <typename Arg,
-            typename = typename std::enable_if_t<std::is_constructible<
-                ArrayRef<std::unique_ptr<Region>>, Arg>::value>>
-  RegionRange(Arg &&arg)
-      : RegionRange(ArrayRef<std::unique_ptr<Region>>(std::forward<Arg>(arg))) {
-  }
-  RegionRange(ArrayRef<std::unique_ptr<Region>> regions);
-
-private:
-  /// See `detail::indexed_accessor_range_base` for details.
-  static OwnerT offset_base(const OwnerT &owner, ptrdiff_t index);
-  /// See `detail::indexed_accessor_range_base` for details.
-  static Region *dereference_iterator(const OwnerT &owner, ptrdiff_t index);
-
-  /// Allow access to `offset_base` and `dereference_iterator`.
-  friend RangeBaseT;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_IR_REGION_H
diff --git a/third_party/mlir/include/mlir/IR/RegionGraphTraits.h b/third_party/mlir/include/mlir/IR/RegionGraphTraits.h
deleted file mode 100644
index f45dcc41a4a..00000000000
--- a/third_party/mlir/include/mlir/IR/RegionGraphTraits.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- RegionGraphTraits.h - llvm::GraphTraits for CFGs ---------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements specializations of llvm::GraphTraits for various MLIR
-// CFG data types.  This allows the generic LLVM graph algorithms to be applied
-// to CFGs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_REGIONGRAPHTRAITS_H
-#define MLIR_IR_REGIONGRAPHTRAITS_H
-
-#include "mlir/IR/Region.h"
-#include "llvm/ADT/GraphTraits.h"
-
-namespace llvm {
-template <> struct GraphTraits<mlir::Block *> {
-  using ChildIteratorType = mlir::Block::succ_iterator;
-  using Node = mlir::Block;
-  using NodeRef = Node *;
-
-  static NodeRef getEntryNode(NodeRef bb) { return bb; }
-
-  static ChildIteratorType child_begin(NodeRef node) {
-    return node->succ_begin();
-  }
-  static ChildIteratorType child_end(NodeRef node) { return node->succ_end(); }
-};
-
-template <> struct GraphTraits<Inverse<mlir::Block *>> {
-  using ChildIteratorType = mlir::Block::pred_iterator;
-  using Node = mlir::Block;
-  using NodeRef = Node *;
-  static NodeRef getEntryNode(Inverse<NodeRef> inverseGraph) {
-    return inverseGraph.Graph;
-  }
-  static inline ChildIteratorType child_begin(NodeRef node) {
-    return node->pred_begin();
-  }
-  static inline ChildIteratorType child_end(NodeRef node) {
-    return node->pred_end();
-  }
-};
-
-template <>
-struct GraphTraits<mlir::Region *> : public GraphTraits<mlir::Block *> {
-  using GraphType = mlir::Region *;
-  using NodeRef = mlir::Block *;
-
-  static NodeRef getEntryNode(GraphType fn) { return &fn->front(); }
-
-  using nodes_iterator = pointer_iterator<mlir::Region::iterator>;
-  static nodes_iterator nodes_begin(GraphType fn) {
-    return nodes_iterator(fn->begin());
-  }
-  static nodes_iterator nodes_end(GraphType fn) {
-    return nodes_iterator(fn->end());
-  }
-};
-
-template <>
-struct GraphTraits<Inverse<mlir::Region *>>
-    : public GraphTraits<Inverse<mlir::Block *>> {
-  using GraphType = Inverse<mlir::Region *>;
-  using NodeRef = NodeRef;
-
-  static NodeRef getEntryNode(GraphType fn) { return &fn.Graph->front(); }
-
-  using nodes_iterator = pointer_iterator<mlir::Region::iterator>;
-  static nodes_iterator nodes_begin(GraphType fn) {
-    return nodes_iterator(fn.Graph->begin());
-  }
-  static nodes_iterator nodes_end(GraphType fn) {
-    return nodes_iterator(fn.Graph->end());
-  }
-};
-
-} // namespace llvm
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/StandardTypes.h b/third_party/mlir/include/mlir/IR/StandardTypes.h
deleted file mode 100644
index 5634f86254f..00000000000
--- a/third_party/mlir/include/mlir/IR/StandardTypes.h
+++ /dev/null
@@ -1,585 +0,0 @@
-//===- StandardTypes.h - MLIR Standard Type Classes -------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_STANDARDTYPES_H
-#define MLIR_IR_STANDARDTYPES_H
-
-#include "mlir/IR/Types.h"
-
-namespace llvm {
-struct fltSemantics;
-} // namespace llvm
-
-namespace mlir {
-class AffineMap;
-class FloatType;
-class IndexType;
-class IntegerType;
-class Location;
-class MLIRContext;
-
-namespace detail {
-
-struct IntegerTypeStorage;
-struct ShapedTypeStorage;
-struct VectorTypeStorage;
-struct RankedTensorTypeStorage;
-struct UnrankedTensorTypeStorage;
-struct MemRefTypeStorage;
-struct UnrankedMemRefTypeStorage;
-struct ComplexTypeStorage;
-struct TupleTypeStorage;
-
-} // namespace detail
-
-namespace StandardTypes {
-enum Kind {
-  // Floating point.
-  BF16 = Type::Kind::FIRST_STANDARD_TYPE,
-  F16,
-  F32,
-  F64,
-  FIRST_FLOATING_POINT_TYPE = BF16,
-  LAST_FLOATING_POINT_TYPE = F64,
-
-  // Target pointer sized integer, used (e.g.) in affine mappings.
-  Index,
-
-  // Derived types.
-  Integer,
-  Vector,
-  RankedTensor,
-  UnrankedTensor,
-  MemRef,
-  UnrankedMemRef,
-  Complex,
-  Tuple,
-  None,
-};
-
-} // namespace StandardTypes
-
-/// Index is a special integer-like type with unknown platform-dependent bit
-/// width.
-class IndexType : public Type::TypeBase<IndexType, Type> {
-public:
-  using Base::Base;
-
-  /// Get an instance of the IndexType.
-  static IndexType get(MLIRContext *context);
-
-  /// Support method to enable LLVM-style type casting.
-  static bool kindof(unsigned kind) { return kind == StandardTypes::Index; }
-};
-
-/// Integer types can have arbitrary bitwidth up to a large fixed limit.
-class IntegerType
-    : public Type::TypeBase<IntegerType, Type, detail::IntegerTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new IntegerType of the given width within the context.
-  /// Assume the width is within the allowed range and assert on failures.
-  /// Use getChecked to handle failures gracefully.
-  static IntegerType get(unsigned width, MLIRContext *context);
-
-  /// Get or create a new IntegerType of the given width within the context,
-  /// defined at the given, potentially unknown, location.  If the width is
-  /// outside the allowed range, emit errors and return a null type.
-  static IntegerType getChecked(unsigned width, MLIRContext *context,
-                                Location location);
-
-  /// Verify the construction of an integer type.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *context,
-                                                    unsigned width);
-
-  /// Return the bitwidth of this integer type.
-  unsigned getWidth() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == StandardTypes::Integer; }
-
-  /// Integer representation maximal bitwidth.
-  static constexpr unsigned kMaxWidth = 4096;
-};
-
-class FloatType : public Type::TypeBase<FloatType, Type> {
-public:
-  using Base::Base;
-
-  static FloatType get(StandardTypes::Kind kind, MLIRContext *context);
-
-  // Convenience factories.
-  static FloatType getBF16(MLIRContext *ctx) {
-    return get(StandardTypes::BF16, ctx);
-  }
-  static FloatType getF16(MLIRContext *ctx) {
-    return get(StandardTypes::F16, ctx);
-  }
-  static FloatType getF32(MLIRContext *ctx) {
-    return get(StandardTypes::F32, ctx);
-  }
-  static FloatType getF64(MLIRContext *ctx) {
-    return get(StandardTypes::F64, ctx);
-  }
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) {
-    return kind >= StandardTypes::FIRST_FLOATING_POINT_TYPE &&
-           kind <= StandardTypes::LAST_FLOATING_POINT_TYPE;
-  }
-
-  /// Return the bitwidth of this float type.
-  unsigned getWidth();
-
-  /// Return the floating semantics of this float type.
-  const llvm::fltSemantics &getFloatSemantics();
-};
-
-/// The 'complex' type represents a complex number with a parameterized element
-/// type, which is composed of a real and imaginary value of that element type.
-///
-/// The element must be a floating point or integer scalar type.
-///
-class ComplexType
-    : public Type::TypeBase<ComplexType, Type, detail::ComplexTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a ComplexType with the provided element type.
-  static ComplexType get(Type elementType);
-
-  /// Get or create a ComplexType with the provided element type.  This emits
-  /// and error at the specified location and returns null if the element type
-  /// isn't supported.
-  static ComplexType getChecked(Type elementType, Location location);
-
-  /// Verify the construction of an integer type.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *context,
-                                                    Type elementType);
-
-  Type getElementType();
-
-  static bool kindof(unsigned kind) { return kind == StandardTypes::Complex; }
-};
-
-/// This is a common base class between Vector, UnrankedTensor, RankedTensor,
-/// and MemRef types because they share behavior and semantics around shape,
-/// rank, and fixed element type. Any type with these semantics should inherit
-/// from ShapedType.
-class ShapedType : public Type {
-public:
-  using ImplType = detail::ShapedTypeStorage;
-  using Type::Type;
-
-  // TODO(ntv): merge these two special values in a single one used everywhere.
-  // Unfortunately, uses of `-1` have crept deep into the codebase now and are
-  // hard to track.
-  static constexpr int64_t kDynamicSize = -1;
-  static constexpr int64_t kDynamicStrideOrOffset =
-      std::numeric_limits<int64_t>::min();
-
-  /// Return the element type.
-  Type getElementType() const;
-
-  /// If an element type is an integer or a float, return its width. Otherwise,
-  /// abort.
-  unsigned getElementTypeBitWidth() const;
-
-  /// If it has static shape, return the number of elements. Otherwise, abort.
-  int64_t getNumElements() const;
-
-  /// If this is a ranked type, return the rank. Otherwise, abort.
-  int64_t getRank() const;
-
-  /// Whether or not this is a ranked type. Memrefs, vectors and ranked tensors
-  /// have a rank, while unranked tensors do not.
-  bool hasRank() const;
-
-  /// If this is a ranked type, return the shape. Otherwise, abort.
-  ArrayRef<int64_t> getShape() const;
-
-  /// If this is unranked type or any dimension has unknown size (<0), it
-  /// doesn't have static shape. If all dimensions have known size (>= 0), it
-  /// has static shape.
-  bool hasStaticShape() const;
-
-  /// If this has a static shape and the shape is equal to `shape` return true.
-  bool hasStaticShape(ArrayRef<int64_t> shape) const;
-
-  /// If this is a ranked type, return the number of dimensions with dynamic
-  /// size. Otherwise, abort.
-  int64_t getNumDynamicDims() const;
-
-  /// If this is ranked type, return the size of the specified dimension.
-  /// Otherwise, abort.
-  int64_t getDimSize(int64_t i) const;
-
-  /// Returns the position of the dynamic dimension relative to just the dynamic
-  /// dimensions, given its `index` within the shape.
-  unsigned getDynamicDimIndex(unsigned index) const;
-
-  /// Get the total amount of bits occupied by a value of this type.  This does
-  /// not take into account any memory layout or widening constraints, e.g. a
-  /// vector<3xi57> is reported to occupy 3x57=171 bit, even though in practice
-  /// it will likely be stored as in a 4xi64 vector register.  Fail an assertion
-  /// if the size cannot be computed statically, i.e. if the type has a dynamic
-  /// shape or if its elemental type does not have a known bit width.
-  int64_t getSizeInBits() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool classof(Type type) {
-    return type.getKind() == StandardTypes::Vector ||
-           type.getKind() == StandardTypes::RankedTensor ||
-           type.getKind() == StandardTypes::UnrankedTensor ||
-           type.getKind() == StandardTypes::UnrankedMemRef ||
-           type.getKind() == StandardTypes::MemRef;
-  }
-
-  /// Whether the given dimension size indicates a dynamic dimension.
-  static constexpr bool isDynamic(int64_t dSize) { return dSize < 0; }
-};
-
-/// Vector types represent multi-dimensional SIMD vectors, and have a fixed
-/// known constant shape with one or more dimension.
-class VectorType
-    : public Type::TypeBase<VectorType, ShapedType, detail::VectorTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new VectorType of the provided shape and element type.
-  /// Assumes the arguments define a well-formed VectorType.
-  static VectorType get(ArrayRef<int64_t> shape, Type elementType);
-
-  /// Get or create a new VectorType of the provided shape and element type
-  /// declared at the given, potentially unknown, location.  If the VectorType
-  /// defined by the arguments would be ill-formed, emit errors and return
-  /// nullptr-wrapping type.
-  static VectorType getChecked(ArrayRef<int64_t> shape, Type elementType,
-                               Location location);
-
-  /// Verify the construction of a vector type.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *context,
-                                                    ArrayRef<int64_t> shape,
-                                                    Type elementType);
-
-  /// Returns true of the given type can be used as an element of a vector type.
-  /// In particular, vectors can consist of integer or float primitives.
-  static bool isValidElementType(Type t) { return t.isIntOrFloat(); }
-
-  ArrayRef<int64_t> getShape() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == StandardTypes::Vector; }
-};
-
-/// Tensor types represent multi-dimensional arrays, and have two variants:
-/// RankedTensorType and UnrankedTensorType.
-class TensorType : public ShapedType {
-public:
-  using ShapedType::ShapedType;
-
-  /// Return true if the specified element type is ok in a tensor.
-  static bool isValidElementType(Type type) {
-    // Note: Non standard/builtin types are allowed to exist within tensor
-    // types. Dialects are expected to verify that tensor types have a valid
-    // element type within that dialect.
-    return type.isIntOrFloat() || type.isa<ComplexType>() ||
-           type.isa<VectorType>() || type.isa<OpaqueType>() ||
-           (type.getKind() > Type::Kind::LAST_STANDARD_TYPE);
-  }
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool classof(Type type) {
-    return type.getKind() == StandardTypes::RankedTensor ||
-           type.getKind() == StandardTypes::UnrankedTensor;
-  }
-};
-
-/// Ranked tensor types represent multi-dimensional arrays that have a shape
-/// with a fixed number of dimensions. Each shape element can be a positive
-/// integer or unknown (represented -1).
-class RankedTensorType
-    : public Type::TypeBase<RankedTensorType, TensorType,
-                            detail::RankedTensorTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new RankedTensorType of the provided shape and element
-  /// type. Assumes the arguments define a well-formed type.
-  static RankedTensorType get(ArrayRef<int64_t> shape, Type elementType);
-
-  /// Get or create a new RankedTensorType of the provided shape and element
-  /// type declared at the given, potentially unknown, location.  If the
-  /// RankedTensorType defined by the arguments would be ill-formed, emit errors
-  /// and return a nullptr-wrapping type.
-  static RankedTensorType getChecked(ArrayRef<int64_t> shape, Type elementType,
-                                     Location location);
-
-  /// Verify the construction of a ranked tensor type.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *context,
-                                                    ArrayRef<int64_t> shape,
-                                                    Type elementType);
-
-  ArrayRef<int64_t> getShape() const;
-
-  static bool kindof(unsigned kind) {
-    return kind == StandardTypes::RankedTensor;
-  }
-};
-
-/// Unranked tensor types represent multi-dimensional arrays that have an
-/// unknown shape.
-class UnrankedTensorType
-    : public Type::TypeBase<UnrankedTensorType, TensorType,
-                            detail::UnrankedTensorTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new UnrankedTensorType of the provided shape and element
-  /// type. Assumes the arguments define a well-formed type.
-  static UnrankedTensorType get(Type elementType);
-
-  /// Get or create a new UnrankedTensorType of the provided shape and element
-  /// type declared at the given, potentially unknown, location.  If the
-  /// UnrankedTensorType defined by the arguments would be ill-formed, emit
-  /// errors and return a nullptr-wrapping type.
-  static UnrankedTensorType getChecked(Type elementType, Location location);
-
-  /// Verify the construction of a unranked tensor type.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *context,
-                                                    Type elementType);
-
-  ArrayRef<int64_t> getShape() const { return llvm::None; }
-
-  static bool kindof(unsigned kind) {
-    return kind == StandardTypes::UnrankedTensor;
-  }
-};
-
-/// Base MemRef for Ranked and Unranked variants
-class BaseMemRefType : public ShapedType {
-public:
-  using ShapedType::ShapedType;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool classof(Type type) {
-    return type.getKind() == StandardTypes::MemRef ||
-           type.getKind() == StandardTypes::UnrankedMemRef;
-  }
-};
-
-/// MemRef types represent a region of memory that have a shape with a fixed
-/// number of dimensions. Each shape element can be a non-negative integer or
-/// unknown (represented by any negative integer). MemRef types also have an
-/// affine map composition, represented as an array AffineMap pointers.
-class MemRefType : public Type::TypeBase<MemRefType, BaseMemRefType,
-                                         detail::MemRefTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new MemRefType based on shape, element type, affine
-  /// map composition, and memory space.  Assumes the arguments define a
-  /// well-formed MemRef type.  Use getChecked to gracefully handle MemRefType
-  /// construction failures.
-  static MemRefType get(ArrayRef<int64_t> shape, Type elementType,
-                        ArrayRef<AffineMap> affineMapComposition = {},
-                        unsigned memorySpace = 0);
-
-  /// Get or create a new MemRefType based on shape, element type, affine
-  /// map composition, and memory space declared at the given location.
-  /// If the location is unknown, the last argument should be an instance of
-  /// UnknownLoc.  If the MemRefType defined by the arguments would be
-  /// ill-formed, emits errors (to the handler registered with the context or to
-  /// the error stream) and returns nullptr.
-  static MemRefType getChecked(ArrayRef<int64_t> shape, Type elementType,
-                               ArrayRef<AffineMap> affineMapComposition,
-                               unsigned memorySpace, Location location);
-
-  ArrayRef<int64_t> getShape() const;
-
-  /// Returns an array of affine map pointers representing the memref affine
-  /// map composition.
-  ArrayRef<AffineMap> getAffineMaps() const;
-
-  /// Returns the memory space in which data referred to by this memref resides.
-  unsigned getMemorySpace() const;
-
-  // TODO(ntv): merge these two special values in a single one used everywhere.
-  // Unfortunately, uses of `-1` have crept deep into the codebase now and are
-  // hard to track.
-  static constexpr int64_t kDynamicSize = -1;
-  static int64_t getDynamicStrideOrOffset() {
-    return ShapedType::kDynamicStrideOrOffset;
-  }
-
-  static bool kindof(unsigned kind) { return kind == StandardTypes::MemRef; }
-
-private:
-  /// Get or create a new MemRefType defined by the arguments.  If the resulting
-  /// type would be ill-formed, return nullptr.  If the location is provided,
-  /// emit detailed error messages.
-  static MemRefType getImpl(ArrayRef<int64_t> shape, Type elementType,
-                            ArrayRef<AffineMap> affineMapComposition,
-                            unsigned memorySpace, Optional<Location> location);
-  using Base::getImpl;
-};
-
-/// Unranked MemRef type represent multi-dimensional MemRefs that
-/// have an unknown rank.
-class UnrankedMemRefType
-    : public Type::TypeBase<UnrankedMemRefType, BaseMemRefType,
-                            detail::UnrankedMemRefTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new UnrankedMemRefType of the provided element
-  /// type and memory space
-  static UnrankedMemRefType get(Type elementType, unsigned memorySpace);
-
-  /// Get or create a new UnrankedMemRefType of the provided element
-  /// type and memory space declared at the given, potentially unknown,
-  /// location. If the UnrankedMemRefType defined by the arguments would be
-  /// ill-formed, emit errors and return a nullptr-wrapping type.
-  static UnrankedMemRefType getChecked(Type elementType, unsigned memorySpace,
-                                       Location location);
-
-  /// Verify the construction of a unranked memref type.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, Type elementType,
-                               unsigned memorySpace);
-
-  ArrayRef<int64_t> getShape() const { return llvm::None; }
-
-  /// Returns the memory space in which data referred to by this memref resides.
-  unsigned getMemorySpace() const;
-  static bool kindof(unsigned kind) {
-    return kind == StandardTypes::UnrankedMemRef;
-  }
-};
-
-/// Tuple types represent a collection of other types. Note: This type merely
-/// provides a common mechanism for representing tuples in MLIR. It is up to
-/// dialect authors to provides operations for manipulating them, e.g.
-/// extract_tuple_element. When possible, users should prefer multi-result
-/// operations in the place of tuples.
-class TupleType
-    : public Type::TypeBase<TupleType, Type, detail::TupleTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new TupleType with the provided element types. Assumes the
-  /// arguments define a well-formed type.
-  static TupleType get(ArrayRef<Type> elementTypes, MLIRContext *context);
-
-  /// Get or create an empty tuple type.
-  static TupleType get(MLIRContext *context) { return get({}, context); }
-
-  /// Return the elements types for this tuple.
-  ArrayRef<Type> getTypes() const;
-
-  /// Accumulate the types contained in this tuple and tuples nested within it.
-  /// Note that this only flattens nested tuples, not any other container type,
-  /// e.g. a tuple<i32, tensor<i32>, tuple<f32, tuple<i64>>> is flattened to
-  /// (i32, tensor<i32>, f32, i64)
-  void getFlattenedTypes(SmallVectorImpl<Type> &types);
-
-  /// Return the number of held types.
-  size_t size() const;
-
-  /// Iterate over the held elements.
-  using iterator = ArrayRef<Type>::iterator;
-  iterator begin() const { return getTypes().begin(); }
-  iterator end() const { return getTypes().end(); }
-
-  /// Return the element type at index 'index'.
-  Type getType(size_t index) const {
-    assert(index < size() && "invalid index for tuple type");
-    return getTypes()[index];
-  }
-
-  static bool kindof(unsigned kind) { return kind == StandardTypes::Tuple; }
-};
-
-/// NoneType is a unit type, i.e. a type with exactly one possible value, where
-/// its value does not have a defined dynamic representation.
-class NoneType : public Type::TypeBase<NoneType, Type> {
-public:
-  using Base::Base;
-
-  /// Get an instance of the NoneType.
-  static NoneType get(MLIRContext *context);
-
-  static bool kindof(unsigned kind) { return kind == StandardTypes::None; }
-};
-
-/// Returns the strides of the MemRef if the layout map is in strided form.
-/// MemRefs with layout maps in strided form include:
-///   1. empty or identity layout map, in which case the stride information is
-///      the canonical form computed from sizes;
-///   2. single affine map layout of the form `K + k0 * d0 + ... kn * dn`,
-///      where K and ki's are constants or symbols.
-///
-/// A stride specification is a list of integer values that are either static
-/// or dynamic (encoded with getDynamicStrideOrOffset()). Strides encode the
-/// distance in the number of elements between successive entries along a
-/// particular dimension. For example, `memref<42x16xf32, (64 * d0 + d1)>`
-/// specifies a view into a non-contiguous memory region of `42` by `16` `f32`
-/// elements in which the distance between two consecutive elements along the
-/// outer dimension is `1` and the distance between two consecutive elements
-/// along the inner dimension is `64`.
-///
-/// If a simple strided form cannot be extracted from the composition of the
-/// layout map, returns llvm::None.
-///
-/// The convention is that the strides for dimensions d0, .. dn appear in
-/// order to make indexing intuitive into the result.
-LogicalResult getStridesAndOffset(MemRefType t,
-                                  SmallVectorImpl<int64_t> &strides,
-                                  int64_t &offset);
-
-/// Given a list of strides (in which MemRefType::getDynamicStrideOrOffset()
-/// represents a dynamic value), return the single result AffineMap which
-/// represents the linearized strided layout map. Dimensions correspond to the
-/// offset followed by the strides in order. Symbols are inserted for each
-/// dynamic dimension in order. A stride cannot take value `0`.
-///
-/// Examples:
-/// =========
-///
-///   1. For offset: 0 strides: ?, ?, 1 return
-///         (i, j, k)[M, N]->(M * i + N * j + k)
-///
-///   2. For offset: 3 strides: 32, ?, 16 return
-///         (i, j, k)[M]->(3 + 32 * i + M * j + 16 * k)
-///
-///   3. For offset: ? strides: ?, ?, ? return
-///         (i, j, k)[off, M, N, P]->(off + M * i + N * j + P * k)
-AffineMap makeStridedLinearLayoutMap(ArrayRef<int64_t> strides, int64_t offset,
-                                     MLIRContext *context);
-
-bool isStrided(MemRefType t);
-
-} // end namespace mlir
-
-#endif // MLIR_IR_STANDARDTYPES_H
diff --git a/third_party/mlir/include/mlir/IR/StorageUniquerSupport.h b/third_party/mlir/include/mlir/IR/StorageUniquerSupport.h
deleted file mode 100644
index 1a730731f32..00000000000
--- a/third_party/mlir/include/mlir/IR/StorageUniquerSupport.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- StorageUniquerSupport.h - MLIR Storage Uniquer Utilities -*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines utility classes for interfacing with StorageUniquer.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_STORAGEUNIQUERSUPPORT_H
-#define MLIR_IR_STORAGEUNIQUERSUPPORT_H
-
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Support/StorageUniquer.h"
-
-namespace mlir {
-class Location;
-class MLIRContext;
-
-namespace detail {
-/// Utility class for implementing users of storage classes uniqued by a
-/// StorageUniquer. Clients are not expected to interact with this class
-/// directly.
-template <typename ConcreteT, typename BaseT, typename StorageT,
-          typename UniquerT>
-class StorageUserBase : public BaseT {
-public:
-  using BaseT::BaseT;
-
-  /// Utility declarations for the concrete attribute class.
-  using Base = StorageUserBase<ConcreteT, BaseT, StorageT, UniquerT>;
-  using ImplType = StorageT;
-
-  /// Return a unique identifier for the concrete type.
-  static ClassID *getClassID() { return ClassID::getID<ConcreteT>(); }
-
-  /// Provide a default implementation of 'classof' that invokes a 'kindof'
-  /// method on the concrete type.
-  template <typename T> static bool classof(T val) {
-    static_assert(std::is_convertible<ConcreteT, T>::value,
-                  "casting from a non-convertible type");
-    return ConcreteT::kindof(val.getKind());
-  }
-
-protected:
-  /// Get or create a new ConcreteT instance within the ctx. This
-  /// function is guaranteed to return a non null object and will assert if
-  /// the arguments provided are invalid.
-  template <typename... Args>
-  static ConcreteT get(MLIRContext *ctx, unsigned kind, Args... args) {
-    // Ensure that the invariants are correct for construction.
-    assert(succeeded(
-        ConcreteT::verifyConstructionInvariants(llvm::None, ctx, args...)));
-    return UniquerT::template get<ConcreteT>(ctx, kind, args...);
-  }
-
-  /// Get or create a new ConcreteT instance within the ctx, defined at
-  /// the given, potentially unknown, location. If the arguments provided are
-  /// invalid then emit errors and return a null object.
-  template <typename... Args>
-  static ConcreteT getChecked(const Location &loc, MLIRContext *ctx,
-                              unsigned kind, Args... args) {
-    // If the construction invariants fail then we return a null attribute.
-    if (failed(ConcreteT::verifyConstructionInvariants(loc, ctx, args...)))
-      return ConcreteT();
-    return UniquerT::template get<ConcreteT>(ctx, kind, args...);
-  }
-
-  /// Default implementation that just returns success.
-  template <typename... Args>
-  static LogicalResult verifyConstructionInvariants(Args... args) {
-    return success();
-  }
-
-  /// Utility for easy access to the storage instance.
-  ImplType *getImpl() const { return static_cast<ImplType *>(this->impl); }
-};
-} // namespace detail
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/SymbolTable.h b/third_party/mlir/include/mlir/IR/SymbolTable.h
deleted file mode 100644
index e04beac6bc6..00000000000
--- a/third_party/mlir/include/mlir/IR/SymbolTable.h
+++ /dev/null
@@ -1,242 +0,0 @@
-//===- SymbolTable.h - MLIR Symbol Table Class ------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_SYMBOLTABLE_H
-#define MLIR_IR_SYMBOLTABLE_H
-
-#include "mlir/IR/OpDefinition.h"
-#include "llvm/ADT/StringMap.h"
-
-namespace mlir {
-class Identifier;
-class Operation;
-
-/// This class allows for representing and managing the symbol table used by
-/// operations with the 'SymbolTable' trait. Inserting into and erasing from
-/// this SymbolTable will also insert and erase from the Operation given to it
-/// at construction.
-class SymbolTable {
-public:
-  /// Build a symbol table with the symbols within the given operation.
-  SymbolTable(Operation *symbolTableOp);
-
-  /// Look up a symbol with the specified name, returning null if no such
-  /// name exists. Names never include the @ on them.
-  Operation *lookup(StringRef name) const;
-  template <typename T> T lookup(StringRef name) const {
-    return dyn_cast_or_null<T>(lookup(name));
-  }
-
-  /// Erase the given symbol from the table.
-  void erase(Operation *symbol);
-
-  /// Insert a new symbol into the table, and rename it as necessary to avoid
-  /// collisions. Also insert at the specified location in the body of the
-  /// associated operation.
-  void insert(Operation *symbol, Block::iterator insertPt = {});
-
-  /// Return the name of the attribute used for symbol names.
-  static StringRef getSymbolAttrName() { return "sym_name"; }
-
-  /// Returns the associated operation.
-  Operation *getOp() const { return symbolTableOp; }
-
-  //===--------------------------------------------------------------------===//
-  // Symbol Utilities
-  //===--------------------------------------------------------------------===//
-
-  /// Returns the operation registered with the given symbol name with the
-  /// regions of 'symbolTableOp'. 'symbolTableOp' is required to be an operation
-  /// with the 'OpTrait::SymbolTable' trait.
-  static Operation *lookupSymbolIn(Operation *op, StringRef symbol);
-
-  /// Returns the operation registered with the given symbol name within the
-  /// closest parent operation of, or including, 'from' with the
-  /// 'OpTrait::SymbolTable' trait. Returns nullptr if no valid symbol was
-  /// found.
-  static Operation *lookupNearestSymbolFrom(Operation *from, StringRef symbol);
-
-  /// This class represents a specific symbol use.
-  class SymbolUse {
-  public:
-    SymbolUse(Operation *op, SymbolRefAttr symbolRef)
-        : owner(op), symbolRef(symbolRef) {}
-
-    /// Return the operation user of this symbol reference.
-    Operation *getUser() const { return owner; }
-
-    /// Return the symbol reference that this use represents.
-    SymbolRefAttr getSymbolRef() const { return symbolRef; }
-
-  private:
-    /// The operation that this access is held by.
-    Operation *owner;
-
-    /// The symbol reference that this use represents.
-    SymbolRefAttr symbolRef;
-  };
-
-  /// This class implements a range of SymbolRef uses.
-  class UseRange {
-  public:
-    UseRange(std::vector<SymbolUse> &&uses) : uses(std::move(uses)) {}
-
-    using iterator = std::vector<SymbolUse>::const_iterator;
-    iterator begin() const { return uses.begin(); }
-    iterator end() const { return uses.end(); }
-
-  private:
-    std::vector<SymbolUse> uses;
-  };
-
-  /// Get an iterator range for all of the uses, for any symbol, that are nested
-  /// within the given operation 'from'. This does not traverse into any nested
-  /// symbol tables, and will also only return uses on 'from' if it does not
-  /// also define a symbol table. This is because we treat the region as the
-  /// boundary of the symbol table, and not the op itself. This function returns
-  /// None if there are any unknown operations that may potentially be symbol
-  /// tables.
-  static Optional<UseRange> getSymbolUses(Operation *from);
-
-  /// Get all of the uses of the given symbol that are nested within the given
-  /// operation 'from'. This does not traverse into any nested symbol tables,
-  /// and will also only return uses on 'from' if it does not also define a
-  /// symbol table. This is because we treat the region as the boundary of the
-  /// symbol table, and not the op itself. This function returns None if there
-  /// are any unknown operations that may potentially be symbol tables.
-  static Optional<UseRange> getSymbolUses(StringRef symbol, Operation *from);
-
-  /// Return if the given symbol is known to have no uses that are nested
-  /// within the given operation 'from'. This does not traverse into any nested
-  /// symbol tables, and will also only count uses on 'from' if it does not also
-  /// define a symbol table. This is because we treat the region as the boundary
-  /// of the symbol table, and not the op itself. This function will also return
-  /// false if there are any unknown operations that may potentially be symbol
-  /// tables. This doesn't necessarily mean that there are no uses, we just
-  /// can't conservatively prove it.
-  static bool symbolKnownUseEmpty(StringRef symbol, Operation *from);
-
-  /// Attempt to replace all uses of the given symbol 'oldSymbol' with the
-  /// provided symbol 'newSymbol' that are nested within the given operation
-  /// 'from'. This does not traverse into any nested symbol tables, and will
-  /// also only replace uses on 'from' if it does not also define a symbol
-  /// table. This is because we treat the region as the boundary of the symbol
-  /// table, and not the op itself. If there are any unknown operations that may
-  /// potentially be symbol tables, no uses are replaced and failure is
-  /// returned.
-  LLVM_NODISCARD static LogicalResult replaceAllSymbolUses(StringRef oldSymbol,
-                                                           StringRef newSymbol,
-                                                           Operation *from);
-
-private:
-  Operation *symbolTableOp;
-
-  /// This is a mapping from a name to the symbol with that name.
-  llvm::StringMap<Operation *> symbolTable;
-
-  /// This is used when name conflicts are detected.
-  unsigned uniquingCounter = 0;
-};
-
-//===----------------------------------------------------------------------===//
-// SymbolTable Trait Types
-//===----------------------------------------------------------------------===//
-
-namespace OpTrait {
-namespace impl {
-LogicalResult verifySymbolTable(Operation *op);
-LogicalResult verifySymbol(Operation *op);
-} // namespace impl
-
-/// A trait used to provide symbol table functionalities to a region operation.
-/// This operation must hold exactly 1 region. Once attached, all operations
-/// that are directly within the region, i.e not including those within child
-/// regions, that contain a 'SymbolTable::getSymbolAttrName()' StringAttr will
-/// be verified to ensure that the names are uniqued. These operations must also
-/// adhere to the constraints defined by the `Symbol` trait, even if they do not
-/// inherit from it.
-template <typename ConcreteType>
-class SymbolTable : public TraitBase<ConcreteType, SymbolTable> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySymbolTable(op);
-  }
-
-  /// Look up a symbol with the specified name, returning null if no such
-  /// name exists. Symbol names never include the @ on them. Note: This
-  /// performs a linear scan of held symbols.
-  Operation *lookupSymbol(StringRef name) {
-    return mlir::SymbolTable::lookupSymbolIn(this->getOperation(), name);
-  }
-  template <typename T> T lookupSymbol(StringRef name) {
-    return dyn_cast_or_null<T>(lookupSymbol(name));
-  }
-};
-
-/// A trait used to define a symbol that can be used on operations within a
-/// symbol table. Operations using this trait must adhere to the following:
-///   * Have a StringAttr attribute named 'SymbolTable::getSymbolAttrName()'.
-template <typename ConcreteType>
-class Symbol : public TraitBase<ConcreteType, Symbol> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifySymbol(op);
-  }
-
-  /// Returns the name of this symbol.
-  StringRef getName() {
-    return this->getOperation()
-        ->template getAttrOfType<StringAttr>(
-            mlir::SymbolTable::getSymbolAttrName())
-        .getValue();
-  }
-
-  /// Set the name of this symbol.
-  void setName(StringRef name) {
-    this->getOperation()->setAttr(
-        mlir::SymbolTable::getSymbolAttrName(),
-        StringAttr::get(name, this->getOperation()->getContext()));
-  }
-
-  /// Get all of the uses of the current symbol that are nested within the given
-  /// operation 'from'.
-  /// Note: See mlir::SymbolTable::getSymbolUses for more details.
-  Optional<::mlir::SymbolTable::UseRange> getSymbolUses(Operation *from) {
-    return ::mlir::SymbolTable::getSymbolUses(getName(), from);
-  }
-
-  /// Return if the current symbol is known to have no uses that are nested
-  /// within the given operation 'from'.
-  /// Note: See mlir::SymbolTable::symbolKnownUseEmpty for more details.
-  bool symbolKnownUseEmpty(Operation *from) {
-    return ::mlir::SymbolTable::symbolKnownUseEmpty(getName(), from);
-  }
-
-  /// Attempt to replace all uses of the current symbol with the provided symbol
-  /// 'newSymbol' that are nested within the given operation 'from'.
-  /// Note: See mlir::SymbolTable::replaceAllSymbolUses for more details.
-  LLVM_NODISCARD LogicalResult replaceAllSymbolUses(StringRef newSymbol,
-                                                    Operation *from) {
-    return ::mlir::SymbolTable::replaceAllSymbolUses(getName(), newSymbol,
-                                                     from);
-  }
-};
-
-} // end namespace OpTrait
-} // end namespace mlir
-
-#endif // MLIR_IR_SYMBOLTABLE_H
diff --git a/third_party/mlir/include/mlir/IR/TypeSupport.h b/third_party/mlir/include/mlir/IR/TypeSupport.h
deleted file mode 100644
index 86620da0b5c..00000000000
--- a/third_party/mlir/include/mlir/IR/TypeSupport.h
+++ /dev/null
@@ -1,121 +0,0 @@
-//===- TypeSupport.h --------------------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines support types for registering dialect extended types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_TYPE_SUPPORT_H
-#define MLIR_IR_TYPE_SUPPORT_H
-
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/StorageUniquerSupport.h"
-
-namespace mlir {
-struct ClassID;
-class Dialect;
-class MLIRContext;
-
-//===----------------------------------------------------------------------===//
-// TypeStorage
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-class TypeUniquer;
-} // end namespace detail
-
-/// Base storage class appearing in a Type.
-class TypeStorage : public StorageUniquer::BaseStorage {
-  friend detail::TypeUniquer;
-  friend StorageUniquer;
-
-protected:
-  /// This constructor is used by derived classes as part of the TypeUniquer.
-  /// When using this constructor, the initializeTypeInfo function must be
-  /// invoked afterwards for the storage to be valid.
-  TypeStorage(unsigned subclassData = 0)
-      : dialect(nullptr), subclassData(subclassData) {}
-
-public:
-  /// Get the dialect that this type is registered to.
-  Dialect &getDialect() {
-    assert(dialect && "Malformed type storage object.");
-    return *dialect;
-  }
-  /// Get the subclass data.
-  unsigned getSubclassData() const { return subclassData; }
-
-  /// Set the subclass data.
-  void setSubclassData(unsigned val) { subclassData = val; }
-
-private:
-  // Set the dialect for this storage instance. This is used by the TypeUniquer
-  // when initializing a newly constructed type storage object.
-  void initializeDialect(Dialect &newDialect) { dialect = &newDialect; }
-
-  /// The dialect for this type.
-  Dialect *dialect;
-
-  /// Space for subclasses to store data.
-  unsigned subclassData;
-};
-
-/// Default storage type for types that require no additional initialization or
-/// storage.
-using DefaultTypeStorage = TypeStorage;
-
-//===----------------------------------------------------------------------===//
-// TypeStorageAllocator
-//===----------------------------------------------------------------------===//
-
-// This is a utility allocator used to allocate memory for instances of derived
-// Types.
-using TypeStorageAllocator = StorageUniquer::StorageAllocator;
-
-//===----------------------------------------------------------------------===//
-// TypeUniquer
-//===----------------------------------------------------------------------===//
-namespace detail {
-// A utility class to get, or create, unique instances of types within an
-// MLIRContext. This class manages all creation and uniquing of types.
-class TypeUniquer {
-public:
-  /// Get an uniqued instance of a type T.
-  template <typename T, typename... Args>
-  static T get(MLIRContext *ctx, unsigned kind, Args &&... args) {
-    return ctx->getTypeUniquer().get<typename T::ImplType>(
-        [&](TypeStorage *storage) {
-          storage->initializeDialect(lookupDialectForType<T>(ctx));
-        },
-        kind, std::forward<Args>(args)...);
-  }
-
-private:
-  /// Get the dialect that the type 'T' was registered with.
-  template <typename T> static Dialect &lookupDialectForType(MLIRContext *ctx) {
-    return lookupDialectForType(ctx, T::getClassID());
-  }
-
-  /// Get the dialect that registered the type with the provided typeid.
-  static Dialect &lookupDialectForType(MLIRContext *ctx,
-                                       const ClassID *const typeID);
-};
-} // namespace detail
-
-} // end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/TypeUtilities.h b/third_party/mlir/include/mlir/IR/TypeUtilities.h
deleted file mode 100644
index c1d1095d8ea..00000000000
--- a/third_party/mlir/include/mlir/IR/TypeUtilities.h
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- TypeUtilities.h - Helper function for type queries -------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines generic type utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_TYPEUTILITIES_H
-#define MLIR_SUPPORT_TYPEUTILITIES_H
-
-#include "mlir/IR/Operation.h"
-#include "llvm/ADT/STLExtras.h"
-
-namespace mlir {
-
-class Attribute;
-class TupleType;
-class Type;
-class Value;
-
-//===----------------------------------------------------------------------===//
-// Utility Functions
-//===----------------------------------------------------------------------===//
-
-/// Return the element type or return the type itself.
-Type getElementTypeOrSelf(Type type);
-
-/// Return the element type or return the type itself.
-Type getElementTypeOrSelf(Attribute attr);
-Type getElementTypeOrSelf(Value *val);
-Type getElementTypeOrSelf(Value &val);
-
-/// Get the types within a nested Tuple. A helper for the class method that
-/// handles storage concerns, which is tricky to do in tablegen.
-SmallVector<Type, 10> getFlattenedTypes(TupleType t);
-
-/// Return true if the specified type is an opaque type with the specified
-/// dialect and typeData.
-bool isOpaqueTypeWithName(Type type, StringRef dialect, StringRef typeData);
-
-/// Returns success if the given two shapes are compatible. That is, they have
-/// the same size and each pair of the elements are equal or one of them is
-/// dynamic.
-LogicalResult verifyCompatibleShape(ArrayRef<int64_t> shape1,
-                                    ArrayRef<int64_t> shape2);
-
-/// Returns success if the given two types have compatible shape. That is,
-/// they are both scalars (not shaped), or they are both shaped types and at
-/// least one is unranked or they have compatible dimensions. Dimensions are
-/// compatible if at least one is dynamic or both are equal. The element type
-/// does not matter.
-LogicalResult verifyCompatibleShape(Type type1, Type type2);
-
-//===----------------------------------------------------------------------===//
-// Utility Iterators
-//===----------------------------------------------------------------------===//
-
-// An iterator for the element types of an op's operands of shaped types.
-class OperandElementTypeIterator final
-    : public llvm::mapped_iterator<Operation::operand_iterator,
-                                   Type (*)(Value *)> {
-public:
-  using reference = Type;
-
-  /// Initializes the result element type iterator to the specified operand
-  /// iterator.
-  explicit OperandElementTypeIterator(Operation::operand_iterator it);
-
-private:
-  static Type unwrap(Value *value);
-};
-
-using OperandElementTypeRange =
-    llvm::iterator_range<OperandElementTypeIterator>;
-
-// An iterator for the tensor element types of an op's results of shaped types.
-class ResultElementTypeIterator final
-    : public llvm::mapped_iterator<Operation::result_iterator,
-                                   Type (*)(Value *)> {
-public:
-  using reference = Type;
-
-  /// Initializes the result element type iterator to the specified result
-  /// iterator.
-  explicit ResultElementTypeIterator(Operation::result_iterator it);
-
-private:
-  static Type unwrap(Value *value);
-};
-
-using ResultElementTypeRange = llvm::iterator_range<ResultElementTypeIterator>;
-
-} // end namespace mlir
-
-#endif // MLIR_SUPPORT_TYPEUTILITIES_H
diff --git a/third_party/mlir/include/mlir/IR/Types.h b/third_party/mlir/include/mlir/IR/Types.h
deleted file mode 100644
index 11af3eb1e66..00000000000
--- a/third_party/mlir/include/mlir/IR/Types.h
+++ /dev/null
@@ -1,313 +0,0 @@
-//===- Types.h - MLIR Type Classes ------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_IR_TYPES_H
-#define MLIR_IR_TYPES_H
-
-#include "mlir/IR/TypeSupport.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMapInfo.h"
-
-namespace mlir {
-class FloatType;
-class Identifier;
-class IndexType;
-class IntegerType;
-class MLIRContext;
-class TypeStorage;
-
-namespace detail {
-struct FunctionTypeStorage;
-struct OpaqueTypeStorage;
-} // namespace detail
-
-/// Instances of the Type class are immutable and uniqued.  They wrap a pointer
-/// to the storage object owned by MLIRContext.  Therefore, instances of Type
-/// are passed around by value.
-///
-/// Some types are "primitives" meaning they do not have any parameters, for
-/// example the Index type.  Parametric types have additional information that
-/// differentiates the types of the same kind between them, for example the
-/// Integer type has bitwidth, making i8 and i16 belong to the same kind by be
-/// different instances of the IntegerType.
-///
-/// Types are constructed and uniqued via the 'detail::TypeUniquer' class.
-///
-/// Derived type classes are expected to implement several required
-/// implementation hooks:
-///  * Required:
-///    - static bool kindof(unsigned kind);
-///      * Returns if the provided type kind corresponds to an instance of the
-///        current type. Used for isa/dyn_cast casting functionality.
-///
-///  * Optional:
-///    - static LogicalResult verifyConstructionInvariants(
-///                                               Optional<Location> loc,
-///                                               MLIRContext *context,
-///                                               Args... args)
-///      * This method is invoked when calling the 'TypeBase::get/getChecked'
-///        methods to ensure that the arguments passed in are valid to construct
-///        a type instance with.
-///      * This method is expected to return failure if a type cannot be
-///        constructed with 'args', success otherwise.
-///      * 'args' must correspond with the arguments passed into the
-///        'TypeBase::get' call after the type kind.
-///
-///
-/// Type storage objects inherit from TypeStorage and contain the following:
-///    - The type kind (for LLVM-style RTTI).
-///    - The dialect that defined the type.
-///    - Any parameters of the type.
-/// For non-parametric types, a convenience DefaultTypeStorage is provided.
-/// Parametric storage types must derive TypeStorage and respect the following:
-///    - Define a type alias, KeyTy, to a type that uniquely identifies the
-///      instance of the type within its kind.
-///      * The key type must be constructible from the values passed into the
-///        detail::TypeUniquer::get call after the type kind.
-///      * If the KeyTy does not have an llvm::DenseMapInfo specialization, the
-///        storage class must define a hashing method:
-///         'static unsigned hashKey(const KeyTy &)'
-///
-///    - Provide a method, 'bool operator==(const KeyTy &) const', to
-///      compare the storage instance against an instance of the key type.
-///
-///    - Provide a construction method:
-///        'DerivedStorage *construct(TypeStorageAllocator &, const KeyTy &key)'
-///      that builds a unique instance of the derived storage. The arguments to
-///      this function are an allocator to store any uniqued data within the
-///      context and the key type for this storage.
-class Type {
-public:
-  /// Integer identifier for all the concrete type kinds.
-  /// Note: This is not an enum class as each dialect will likely define a
-  /// separate enumeration for the specific types that they define. Not being an
-  /// enum class also simplifies the handling of type kinds by not requiring
-  /// casts for each use.
-  enum Kind {
-    // Builtin types.
-    Function,
-    Opaque,
-    LAST_BUILTIN_TYPE = Opaque,
-
-  // Reserve type kinds for dialect specific type system extensions.
-#define DEFINE_SYM_KIND_RANGE(Dialect)                                         \
-  FIRST_##Dialect##_TYPE, LAST_##Dialect##_TYPE = FIRST_##Dialect##_TYPE + 0xff,
-#include "DialectSymbolRegistry.def"
-  };
-
-  /// Utility class for implementing types.
-  template <typename ConcreteType, typename BaseType,
-            typename StorageType = DefaultTypeStorage>
-  using TypeBase = detail::StorageUserBase<ConcreteType, BaseType, StorageType,
-                                           detail::TypeUniquer>;
-
-  using ImplType = TypeStorage;
-
-  Type() : impl(nullptr) {}
-  /* implicit */ Type(const ImplType *impl)
-      : impl(const_cast<ImplType *>(impl)) {}
-
-  Type(const Type &other) : impl(other.impl) {}
-  Type &operator=(Type other) {
-    impl = other.impl;
-    return *this;
-  }
-
-  bool operator==(Type other) const { return impl == other.impl; }
-  bool operator!=(Type other) const { return !(*this == other); }
-  explicit operator bool() const { return impl; }
-
-  bool operator!() const { return impl == nullptr; }
-
-  template <typename U> bool isa() const;
-  template <typename U> U dyn_cast() const;
-  template <typename U> U dyn_cast_or_null() const;
-  template <typename U> U cast() const;
-
-  // Support type casting Type to itself.
-  static bool classof(Type) { return true; }
-
-  /// Return the classification for this type.
-  unsigned getKind() const;
-
-  /// Return the LLVMContext in which this type was uniqued.
-  MLIRContext *getContext() const;
-
-  /// Get the dialect this type is registered to.
-  Dialect &getDialect() const;
-
-  // Convenience predicates.  This is only for floating point types,
-  // derived types should use isa/dyn_cast.
-  bool isIndex();
-  bool isBF16();
-  bool isF16();
-  bool isF32();
-  bool isF64();
-
-  /// Return true if this is an integer type with the specified width.
-  bool isInteger(unsigned width);
-
-  /// Return the bit width of an integer or a float type, assert failure on
-  /// other types.
-  unsigned getIntOrFloatBitWidth();
-
-  /// Return true if this is an integer or index type.
-  bool isIntOrIndex();
-  /// Return true if this is an integer, index, or float type.
-  bool isIntOrIndexOrFloat();
-  /// Return true of this is an integer or a float type.
-  bool isIntOrFloat();
-
-  /// Print the current type.
-  void print(raw_ostream &os);
-  void dump();
-
-  friend ::llvm::hash_code hash_value(Type arg);
-
-  unsigned getSubclassData() const;
-  void setSubclassData(unsigned val);
-
-  /// Methods for supporting PointerLikeTypeTraits.
-  const void *getAsOpaquePointer() const {
-    return static_cast<const void *>(impl);
-  }
-  static Type getFromOpaquePointer(const void *pointer) {
-    return Type(reinterpret_cast<ImplType *>(const_cast<void *>(pointer)));
-  }
-
-protected:
-  ImplType *impl;
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, Type type) {
-  type.print(os);
-  return os;
-}
-
-/// Function types map from a list of inputs to a list of results.
-class FunctionType
-    : public Type::TypeBase<FunctionType, Type, detail::FunctionTypeStorage> {
-public:
-  using Base::Base;
-
-  static FunctionType get(ArrayRef<Type> inputs, ArrayRef<Type> results,
-                          MLIRContext *context);
-
-  // Input types.
-  unsigned getNumInputs() const { return getSubclassData(); }
-
-  Type getInput(unsigned i) const { return getInputs()[i]; }
-
-  ArrayRef<Type> getInputs() const;
-
-  // Result types.
-  unsigned getNumResults() const;
-
-  Type getResult(unsigned i) const { return getResults()[i]; }
-
-  ArrayRef<Type> getResults() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast.
-  static bool kindof(unsigned kind) { return kind == Kind::Function; }
-};
-
-/// Opaque types represent types of non-registered dialects. These are types
-/// represented in their raw string form, and can only usefully be tested for
-/// type equality.
-class OpaqueType
-    : public Type::TypeBase<OpaqueType, Type, detail::OpaqueTypeStorage> {
-public:
-  using Base::Base;
-
-  /// Get or create a new OpaqueType with the provided dialect and string data.
-  static OpaqueType get(Identifier dialect, StringRef typeData,
-                        MLIRContext *context);
-
-  /// Get or create a new OpaqueType with the provided dialect and string data.
-  /// If the given identifier is not a valid namespace for a dialect, then a
-  /// null type is returned.
-  static OpaqueType getChecked(Identifier dialect, StringRef typeData,
-                               MLIRContext *context, Location location);
-
-  /// Returns the dialect namespace of the opaque type.
-  Identifier getDialectNamespace() const;
-
-  /// Returns the raw type data of the opaque type.
-  StringRef getTypeData() const;
-
-  /// Verify the construction of an opaque type.
-  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
-                                                    MLIRContext *context,
-                                                    Identifier dialect,
-                                                    StringRef typeData);
-
-  static bool kindof(unsigned kind) { return kind == Kind::Opaque; }
-};
-
-// Make Type hashable.
-inline ::llvm::hash_code hash_value(Type arg) {
-  return ::llvm::hash_value(arg.impl);
-}
-
-template <typename U> bool Type::isa() const {
-  assert(impl && "isa<> used on a null type.");
-  return U::classof(*this);
-}
-template <typename U> U Type::dyn_cast() const {
-  return isa<U>() ? U(impl) : U(nullptr);
-}
-template <typename U> U Type::dyn_cast_or_null() const {
-  return (impl && isa<U>()) ? U(impl) : U(nullptr);
-}
-template <typename U> U Type::cast() const {
-  assert(isa<U>());
-  return U(impl);
-}
-
-} // end namespace mlir
-
-namespace llvm {
-
-// Type hash just like pointers.
-template <> struct DenseMapInfo<mlir::Type> {
-  static mlir::Type getEmptyKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
-    return mlir::Type(static_cast<mlir::Type::ImplType *>(pointer));
-  }
-  static mlir::Type getTombstoneKey() {
-    auto pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
-    return mlir::Type(static_cast<mlir::Type::ImplType *>(pointer));
-  }
-  static unsigned getHashValue(mlir::Type val) { return mlir::hash_value(val); }
-  static bool isEqual(mlir::Type LHS, mlir::Type RHS) { return LHS == RHS; }
-};
-
-/// We align TypeStorage by 8, so allow LLVM to steal the low bits.
-template <> struct PointerLikeTypeTraits<mlir::Type> {
-public:
-  static inline void *getAsVoidPointer(mlir::Type I) {
-    return const_cast<void *>(I.getAsOpaquePointer());
-  }
-  static inline mlir::Type getFromVoidPointer(void *P) {
-    return mlir::Type::getFromOpaquePointer(P);
-  }
-  enum { NumLowBitsAvailable = 3 };
-};
-
-} // namespace llvm
-
-#endif // MLIR_IR_TYPES_H
diff --git a/third_party/mlir/include/mlir/IR/UseDefLists.h b/third_party/mlir/include/mlir/IR/UseDefLists.h
deleted file mode 100644
index fe0e9e02ad5..00000000000
--- a/third_party/mlir/include/mlir/IR/UseDefLists.h
+++ /dev/null
@@ -1,282 +0,0 @@
-//===- UseDefLists.h --------------------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines generic use/def list machinery and manipulation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_USEDEFLISTS_H
-#define MLIR_IR_USEDEFLISTS_H
-
-#include "mlir/IR/Location.h"
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/iterator_range.h"
-
-namespace mlir {
-
-class IROperand;
-class Operation;
-template <typename OperandType> class ValueUseIterator;
-template <typename OperandType> class ValueUserIterator;
-
-class IRObjectWithUseList {
-public:
-  ~IRObjectWithUseList() {
-    assert(use_empty() && "Cannot destroy a value that still has uses!");
-  }
-
-  /// Returns true if this value has no uses.
-  bool use_empty() const { return firstUse == nullptr; }
-
-  /// Returns true if this value has exactly one use.
-  inline bool hasOneUse() const;
-
-  using use_iterator = ValueUseIterator<IROperand>;
-  using use_range = llvm::iterator_range<use_iterator>;
-
-  inline use_iterator use_begin() const;
-  inline use_iterator use_end() const;
-
-  /// Returns a range of all uses, which is useful for iterating over all uses.
-  inline use_range getUses() const;
-
-  using user_iterator = ValueUserIterator<IROperand>;
-  using user_range = llvm::iterator_range<user_iterator>;
-
-  inline user_iterator user_begin() const;
-  inline user_iterator user_end() const;
-
-  /// Returns a range of all users.
-  inline user_range getUsers() const;
-
-  /// Replace all uses of 'this' value with the new value, updating anything in
-  /// the IR that uses 'this' to use the other value instead.  When this returns
-  /// there are zero uses of 'this'.
-  void replaceAllUsesWith(IRObjectWithUseList *newValue);
-
-  /// Drop all uses of this object from their respective owners.
-  void dropAllUses();
-
-protected:
-  IRObjectWithUseList() {}
-
-  /// Return the first IROperand that is using this value, for use by custom
-  /// use/def iterators.
-  IROperand *getFirstUse() { return firstUse; }
-  const IROperand *getFirstUse() const { return firstUse; }
-
-private:
-  friend class IROperand;
-  IROperand *firstUse = nullptr;
-};
-
-/// A reference to a value, suitable for use as an operand of an operation.
-class IROperand {
-public:
-  IROperand(Operation *owner) : owner(owner) {}
-  IROperand(Operation *owner, IRObjectWithUseList *value)
-      : value(value), owner(owner) {
-    insertIntoCurrent();
-  }
-
-  /// Return the current value being used by this operand.
-  IRObjectWithUseList *get() const { return value; }
-
-  /// Set the current value being used by this operand.
-  void set(IRObjectWithUseList *newValue) {
-    // It isn't worth optimizing for the case of switching operands on a single
-    // value.
-    removeFromCurrent();
-    value = newValue;
-    insertIntoCurrent();
-  }
-
-  /// Return the owner of this operand.
-  Operation *getOwner() { return owner; }
-  Operation *getOwner() const { return owner; }
-
-  /// \brief Remove this use of the operand.
-  void drop() {
-    removeFromCurrent();
-    value = nullptr;
-    nextUse = nullptr;
-    back = nullptr;
-  }
-
-  ~IROperand() { removeFromCurrent(); }
-
-  /// Return the next operand on the use-list of the value we are referring to.
-  /// This should generally only be used by the internal implementation details
-  /// of the SSA machinery.
-  IROperand *getNextOperandUsingThisValue() { return nextUse; }
-
-  /// We support a move constructor so IROperand's can be in vectors, but this
-  /// shouldn't be used by general clients.
-  IROperand(IROperand &&other) : owner(other.owner) {
-    *this = std::move(other);
-  }
-  IROperand &operator=(IROperand &&other) {
-    removeFromCurrent();
-    other.removeFromCurrent();
-    value = other.value;
-    other.value = nullptr;
-    other.back = nullptr;
-    nextUse = nullptr;
-    back = nullptr;
-    insertIntoCurrent();
-    return *this;
-  }
-
-private:
-  /// The value used as this operand.  This can be null when in a
-  /// "dropAllUses" state.
-  IRObjectWithUseList *value = nullptr;
-
-  /// The next operand in the use-chain.
-  IROperand *nextUse = nullptr;
-
-  /// This points to the previous link in the use-chain.
-  IROperand **back = nullptr;
-
-  /// The operation owner of this operand.
-  Operation *const owner;
-
-  /// Operands are not copyable or assignable.
-  IROperand(const IROperand &use) = delete;
-  IROperand &operator=(const IROperand &use) = delete;
-
-  void removeFromCurrent() {
-    if (!back)
-      return;
-    *back = nextUse;
-    if (nextUse)
-      nextUse->back = back;
-  }
-
-  void insertIntoCurrent() {
-    back = &value->firstUse;
-    nextUse = value->firstUse;
-    if (nextUse)
-      nextUse->back = &nextUse;
-    value->firstUse = this;
-  }
-};
-
-/// A reference to a value, suitable for use as an operand of an operation,
-/// operation, etc.  IRValueTy is the root type to use for values this tracks,
-/// and SSAUserTy is the type that will contain operands.
-template <typename IRValueTy> class IROperandImpl : public IROperand {
-public:
-  IROperandImpl(Operation *owner) : IROperand(owner) {}
-  IROperandImpl(Operation *owner, IRValueTy *value) : IROperand(owner, value) {}
-
-  /// Return the current value being used by this operand.
-  IRValueTy *get() { return (IRValueTy *)IROperand::get(); }
-
-  /// Set the current value being used by this operand.
-  void set(IRValueTy *newValue) { IROperand::set(newValue); }
-
-  /// Return which operand this is in the operand list of the User.
-  unsigned getOperandNumber();
-};
-
-/// An iterator over all uses of a ValueBase.
-template <typename OperandType>
-class ValueUseIterator
-    : public std::iterator<std::forward_iterator_tag, OperandType> {
-public:
-  ValueUseIterator() = default;
-  explicit ValueUseIterator(OperandType *current) : current(current) {}
-  OperandType *operator->() const { return current; }
-  OperandType &operator*() const { return *current; }
-
-  Operation *getUser() const { return current->getOwner(); }
-
-  ValueUseIterator &operator++() {
-    assert(current && "incrementing past end()!");
-    current = (OperandType *)current->getNextOperandUsingThisValue();
-    return *this;
-  }
-
-  ValueUseIterator operator++(int unused) {
-    ValueUseIterator copy = *this;
-    ++*this;
-    return copy;
-  }
-
-  friend bool operator==(ValueUseIterator lhs, ValueUseIterator rhs) {
-    return lhs.current == rhs.current;
-  }
-
-  friend bool operator!=(ValueUseIterator lhs, ValueUseIterator rhs) {
-    return !(lhs == rhs);
-  }
-
-private:
-  OperandType *current;
-};
-
-inline auto IRObjectWithUseList::use_begin() const -> use_iterator {
-  return use_iterator(firstUse);
-}
-
-inline auto IRObjectWithUseList::use_end() const -> use_iterator {
-  return use_iterator(nullptr);
-}
-
-inline auto IRObjectWithUseList::getUses() const -> use_range {
-  return {use_begin(), use_end()};
-}
-
-/// Returns true if this value has exactly one use.
-inline bool IRObjectWithUseList::hasOneUse() const {
-  return firstUse && firstUse->getNextOperandUsingThisValue() == nullptr;
-}
-
-/// An iterator over all users of a ValueBase.
-template <typename OperandType>
-class ValueUserIterator final
-    : public llvm::mapped_iterator<ValueUseIterator<OperandType>,
-                                   Operation *(*)(OperandType &)> {
-  static Operation *unwrap(OperandType &value) { return value.getOwner(); }
-
-public:
-  using pointer = Operation *;
-  using reference = Operation *;
-
-  /// Initializes the result type iterator to the specified result iterator.
-  ValueUserIterator(ValueUseIterator<OperandType> it)
-      : llvm::mapped_iterator<ValueUseIterator<OperandType>,
-                              Operation *(*)(OperandType &)>(it, &unwrap) {}
-  Operation *operator->() { return **this; }
-};
-
-inline auto IRObjectWithUseList::user_begin() const -> user_iterator {
-  return user_iterator(use_begin());
-}
-
-inline auto IRObjectWithUseList::user_end() const -> user_iterator {
-  return user_iterator(use_end());
-}
-
-inline auto IRObjectWithUseList::getUsers() const -> user_range {
-  return {user_begin(), user_end()};
-}
-
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/Value.h b/third_party/mlir/include/mlir/IR/Value.h
deleted file mode 100644
index 110c74f41f1..00000000000
--- a/third_party/mlir/include/mlir/IR/Value.h
+++ /dev/null
@@ -1,166 +0,0 @@
-//===- Value.h - Base of the SSA Value hierarchy ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines generic Value type and manipulation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_VALUE_H
-#define MLIR_IR_VALUE_H
-
-#include "mlir/IR/Types.h"
-#include "mlir/IR/UseDefLists.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-class Block;
-class Operation;
-class Region;
-class Value;
-
-/// Operands contain a Value.
-using OpOperand = IROperandImpl<Value>;
-
-/// This is the common base class for all SSA values in the MLIR system,
-/// representing a computable value that has a type and a set of users.
-///
-class Value : public IRObjectWithUseList {
-public:
-  /// This enumerates all of the SSA value kinds in the MLIR system.
-  enum class Kind {
-    BlockArgument, // block argument
-    OpResult,      // operation result
-  };
-
-  ~Value() {}
-
-  Kind getKind() const { return typeAndKind.getInt(); }
-
-  Type getType() const { return typeAndKind.getPointer(); }
-
-  /// Utility to get the associated MLIRContext that this value is defined in.
-  MLIRContext *getContext() const { return getType().getContext(); }
-
-  /// Mutate the type of this Value to be of the specified type.
-  ///
-  /// Note that this is an extremely dangerous operation which can create
-  /// completely invalid IR very easily.  It is strongly recommended that you
-  /// recreate IR objects with the right types instead of mutating them in
-  /// place.
-  void setType(Type newType) { typeAndKind.setPointer(newType); }
-
-  /// Replace all uses of 'this' value with the new value, updating anything in
-  /// the IR that uses 'this' to use the other value instead.  When this returns
-  /// there are zero uses of 'this'.
-  void replaceAllUsesWith(Value *newValue) {
-    IRObjectWithUseList::replaceAllUsesWith(newValue);
-  }
-
-  /// If this value is the result of an operation, return the operation that
-  /// defines it.
-  Operation *getDefiningOp();
-
-  /// If this value is the result of an operation, use it as a location,
-  /// otherwise return an unknown location.
-  Location getLoc();
-
-  /// Return the Region in which this Value is defined.
-  Region *getParentRegion();
-
-  using use_iterator = ValueUseIterator<OpOperand>;
-  using use_range = llvm::iterator_range<use_iterator>;
-
-  inline use_iterator use_begin();
-  inline use_iterator use_end();
-
-  /// Returns a range of all uses, which is useful for iterating over all uses.
-  inline use_range getUses();
-
-  void print(raw_ostream &os);
-  void dump();
-
-protected:
-  Value(Kind kind, Type type) : typeAndKind(type, kind) {}
-
-private:
-  llvm::PointerIntPair<Type, 1, Kind> typeAndKind;
-};
-
-inline raw_ostream &operator<<(raw_ostream &os, Value &value) {
-  value.print(os);
-  return os;
-}
-
-// Utility functions for iterating through Value uses.
-inline auto Value::use_begin() -> use_iterator {
-  return use_iterator((OpOperand *)getFirstUse());
-}
-
-inline auto Value::use_end() -> use_iterator { return use_iterator(nullptr); }
-
-inline auto Value::getUses() -> llvm::iterator_range<use_iterator> {
-  return {use_begin(), use_end()};
-}
-
-/// Block arguments are values.
-class BlockArgument : public Value {
-public:
-  static bool classof(const Value *value) {
-    return const_cast<Value *>(value)->getKind() == Kind::BlockArgument;
-  }
-
-  Block *getOwner() { return owner; }
-
-  /// Returns the number of this argument.
-  unsigned getArgNumber();
-
-private:
-  friend class Block; // For access to private constructor.
-  BlockArgument(Type type, Block *owner)
-      : Value(Value::Kind::BlockArgument, type), owner(owner) {}
-
-  /// The owner of this operand.
-  /// TODO: can encode this more efficiently to avoid the space hit of this
-  /// through bitpacking shenanigans.
-  Block *const owner;
-};
-
-/// This is a value defined by a result of an operation.
-class OpResult : public Value {
-public:
-  OpResult(Type type, Operation *owner)
-      : Value(Value::Kind::OpResult, type), owner(owner) {}
-
-  static bool classof(const Value *value) {
-    return const_cast<Value *>(value)->getKind() == Kind::OpResult;
-  }
-
-  Operation *getOwner() { return owner; }
-
-  /// Returns the number of this result.
-  unsigned getResultNumber();
-
-private:
-  /// The owner of this operand.
-  /// TODO: can encode this more efficiently to avoid the space hit of this
-  /// through bitpacking shenanigans.
-  Operation *const owner;
-};
-
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/IR/Visitors.h b/third_party/mlir/include/mlir/IR/Visitors.h
deleted file mode 100644
index 395a4e72648..00000000000
--- a/third_party/mlir/include/mlir/IR/Visitors.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//===- Visitors.h - Utilities for visiting operations -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines utilities for walking and visiting operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_VISITORS_H
-#define MLIR_IR_VISITORS_H
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/ADT/STLExtras.h"
-
-namespace mlir {
-class Diagnostic;
-class InFlightDiagnostic;
-class Operation;
-
-/// A utility result that is used to signal if a walk method should be
-/// interrupted or advance.
-class WalkResult {
-  enum ResultEnum { Interrupt, Advance } result;
-
-public:
-  WalkResult(ResultEnum result) : result(result) {}
-
-  /// Allow LogicalResult to interrupt the walk on failure.
-  WalkResult(LogicalResult result)
-      : result(failed(result) ? Interrupt : Advance) {}
-
-  /// Allow diagnostics to interrupt the walk.
-  WalkResult(Diagnostic &&) : result(Interrupt) {}
-  WalkResult(InFlightDiagnostic &&) : result(Interrupt) {}
-
-  bool operator==(const WalkResult &rhs) const { return result == rhs.result; }
-
-  static WalkResult interrupt() { return {Interrupt}; }
-  static WalkResult advance() { return {Advance}; }
-
-  /// Returns if the walk was interrupted.
-  bool wasInterrupted() const { return result == Interrupt; }
-};
-
-namespace detail {
-/// Helper templates to deduce the first argument of a callback parameter.
-template <typename Ret, typename Arg> Arg first_argument_type(Ret (*)(Arg));
-template <typename Ret, typename F, typename Arg>
-Arg first_argument_type(Ret (F::*)(Arg));
-template <typename Ret, typename F, typename Arg>
-Arg first_argument_type(Ret (F::*)(Arg) const);
-template <typename F>
-decltype(first_argument_type(&F::operator())) first_argument_type(F);
-
-/// Type definition of the first argument to the given callable 'T'.
-template <typename T>
-using first_argument = decltype(first_argument_type(std::declval<T>()));
-
-/// Walk all of the operations nested under and including the given operation.
-void walkOperations(Operation *op, function_ref<void(Operation *op)> callback);
-
-/// Walk all of the operations nested under and including the given operation.
-/// This methods walks operations until an interrupt result is returned by the
-/// callback.
-WalkResult walkOperations(Operation *op,
-                          function_ref<WalkResult(Operation *op)> callback);
-
-// Below are a set of functions to walk nested operations. Users should favor
-// the direct `walk` methods on the IR classes(Operation/Block/etc) over these
-// methods. They are also templated to allow for statically dispatching based
-// upon the type of the callback function.
-
-/// Walk all of the operations nested under and including the given operation.
-/// This method is selected for callbacks that operation on Operation*.
-///
-/// Example:
-///   op->walk([](Operation *op) { ... });
-template <
-    typename FuncTy, typename ArgT = detail::first_argument<FuncTy>,
-    typename RetT = decltype(std::declval<FuncTy>()(std::declval<ArgT>()))>
-typename std::enable_if<std::is_same<ArgT, Operation *>::value, RetT>::type
-walkOperations(Operation *op, FuncTy &&callback) {
-  return detail::walkOperations(op, llvm::function_ref<RetT(ArgT)>(callback));
-}
-
-/// Walk all of the operations of type 'ArgT' nested under and including the
-/// given operation. This method is selected for void returning callbacks that
-/// operate on a specific derived operation type.
-///
-/// Example:
-///   op->walk([](ReturnOp op) { ... });
-template <
-    typename FuncTy, typename ArgT = detail::first_argument<FuncTy>,
-    typename RetT = decltype(std::declval<FuncTy>()(std::declval<ArgT>()))>
-typename std::enable_if<!std::is_same<ArgT, Operation *>::value &&
-                            std::is_same<RetT, void>::value,
-                        RetT>::type
-walkOperations(Operation *op, FuncTy &&callback) {
-  auto wrapperFn = [&](Operation *op) {
-    if (auto derivedOp = dyn_cast<ArgT>(op))
-      callback(derivedOp);
-  };
-  return detail::walkOperations(op, function_ref<RetT(Operation *)>(wrapperFn));
-}
-
-/// Walk all of the operations of type 'ArgT' nested under and including the
-/// given operation. This method is selected for WalkReturn returning
-/// interruptible callbacks that operate on a specific derived operation type.
-///
-/// Example:
-///   op->walk([](ReturnOp op) {
-///     if (some_invariant)
-///       return WalkResult::interrupt();
-///     return WalkResult::advance();
-///   });
-template <
-    typename FuncTy, typename ArgT = detail::first_argument<FuncTy>,
-    typename RetT = decltype(std::declval<FuncTy>()(std::declval<ArgT>()))>
-typename std::enable_if<!std::is_same<ArgT, Operation *>::value &&
-                            std::is_same<RetT, WalkResult>::value,
-                        RetT>::type
-walkOperations(Operation *op, FuncTy &&callback) {
-  auto wrapperFn = [&](Operation *op) {
-    if (auto derivedOp = dyn_cast<ArgT>(op))
-      return callback(derivedOp);
-    return WalkResult::advance();
-  };
-  return detail::walkOperations(op, function_ref<RetT(Operation *)>(wrapperFn));
-}
-
-/// Utility to provide the return type of a templated walk method.
-template <typename FnT>
-using walkResultType = decltype(walkOperations(nullptr, std::declval<FnT>()));
-} // end namespace detail
-
-} // namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/Parser.h b/third_party/mlir/include/mlir/Parser.h
deleted file mode 100644
index 3a818ffa9d8..00000000000
--- a/third_party/mlir/include/mlir/Parser.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- Parser.h - MLIR Parser Library Interface -----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file is contains the interface to the MLIR parser library.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_PARSER_H
-#define MLIR_PARSER_H
-
-#include <cstddef>
-
-namespace llvm {
-class SourceMgr;
-class SMDiagnostic;
-class StringRef;
-} // end namespace llvm
-
-namespace mlir {
-class Attribute;
-class Location;
-class MLIRContext;
-class OwningModuleRef;
-class Type;
-
-/// This parses the file specified by the indicated SourceMgr and returns an
-/// MLIR module if it was valid.  If not, the error message is emitted through
-/// the error handler registered in the context, and a null pointer is returned.
-OwningModuleRef parseSourceFile(const llvm::SourceMgr &sourceMgr,
-                                MLIRContext *context);
-
-/// This parses the file specified by the indicated filename and returns an
-/// MLIR module if it was valid.  If not, the error message is emitted through
-/// the error handler registered in the context, and a null pointer is returned.
-OwningModuleRef parseSourceFile(llvm::StringRef filename, MLIRContext *context);
-
-/// This parses the file specified by the indicated filename using the provided
-/// SourceMgr and returns an MLIR module if it was valid.  If not, the error
-/// message is emitted through the error handler registered in the context, and
-/// a null pointer is returned.
-OwningModuleRef parseSourceFile(llvm::StringRef filename,
-                                llvm::SourceMgr &sourceMgr,
-                                MLIRContext *context);
-
-/// This parses the module string to a MLIR module if it was valid.  If not, the
-/// error message is emitted through the error handler registered in the
-/// context, and a null pointer is returned.
-OwningModuleRef parseSourceString(llvm::StringRef moduleStr,
-                                  MLIRContext *context);
-
-/// This parses a single MLIR attribute to an MLIR context if it was valid.  If
-/// not, an error message is emitted through a new SourceMgrDiagnosticHandler
-/// constructed from a new SourceMgr with a single a MemoryBuffer wrapping
-/// `attrStr`. If the passed `attrStr` has additional tokens that were not part
-/// of the type, an error is emitted.
-// TODO(ntv) Improve diagnostic reporting.
-Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context);
-Attribute parseAttribute(llvm::StringRef attrStr, Type type);
-
-/// This parses a single MLIR attribute to an MLIR context if it was valid.  If
-/// not, an error message is emitted through a new SourceMgrDiagnosticHandler
-/// constructed from a new SourceMgr with a single a MemoryBuffer wrapping
-/// `attrStr`. The number of characters of `attrStr` parsed in the process is
-/// returned in `numRead`.
-Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context,
-                         size_t &numRead);
-Attribute parseAttribute(llvm::StringRef attrStr, Type type, size_t &numRead);
-
-/// This parses a single MLIR type to an MLIR context if it was valid.  If not,
-/// an error message is emitted through a new SourceMgrDiagnosticHandler
-/// constructed from a new SourceMgr with a single a MemoryBuffer wrapping
-/// `typeStr`. If the passed `typeStr` has additional tokens that were not part
-/// of the type, an error is emitted.
-// TODO(ntv) Improve diagnostic reporting.
-Type parseType(llvm::StringRef typeStr, MLIRContext *context);
-
-/// This parses a single MLIR type to an MLIR context if it was valid.  If not,
-/// an error message is emitted through a new SourceMgrDiagnosticHandler
-/// constructed from a new SourceMgr with a single a MemoryBuffer wrapping
-/// `typeStr`. The number of characters of `typeStr` parsed in the process is
-/// returned in `numRead`.
-Type parseType(llvm::StringRef typeStr, MLIRContext *context, size_t &numRead);
-} // end namespace mlir
-
-#endif // MLIR_PARSER_H
diff --git a/third_party/mlir/include/mlir/Pass/AnalysisManager.h b/third_party/mlir/include/mlir/Pass/AnalysisManager.h
deleted file mode 100644
index 6c37223ad91..00000000000
--- a/third_party/mlir/include/mlir/Pass/AnalysisManager.h
+++ /dev/null
@@ -1,338 +0,0 @@
-//===- AnalysisManager.h - Analysis Management Infrastructure ---*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_PASS_ANALYSISMANAGER_H
-#define MLIR_PASS_ANALYSISMANAGER_H
-
-#include "mlir/IR/Module.h"
-#include "mlir/Pass/PassInstrumentation.h"
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/TypeName.h"
-
-namespace mlir {
-/// A special type used by analyses to provide an address that identifies a
-/// particular analysis set or a concrete analysis type.
-using AnalysisID = ClassID;
-
-//===----------------------------------------------------------------------===//
-// Analysis Preservation and Concept Modeling
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-/// A utility class to represent the analyses that are known to be preserved.
-class PreservedAnalyses {
-public:
-  /// Mark all analyses as preserved.
-  void preserveAll() { preservedIDs.insert(&allAnalysesID); }
-
-  /// Returns true if all analyses were marked preserved.
-  bool isAll() const { return preservedIDs.count(&allAnalysesID); }
-
-  /// Returns true if no analyses were marked preserved.
-  bool isNone() const { return preservedIDs.empty(); }
-
-  /// Preserve the given analyses.
-  template <typename AnalysisT> void preserve() {
-    preserve(AnalysisID::getID<AnalysisT>());
-  }
-  template <typename AnalysisT, typename AnalysisT2, typename... OtherAnalysesT>
-  void preserve() {
-    preserve<AnalysisT>();
-    preserve<AnalysisT2, OtherAnalysesT...>();
-  }
-  void preserve(const AnalysisID *id) { preservedIDs.insert(id); }
-
-  /// Returns if the given analysis has been marked as preserved. Note that this
-  /// simply checks for the presence of a given analysis ID and should not be
-  /// used as a general preservation checker.
-  template <typename AnalysisT> bool isPreserved() const {
-    return isPreserved(AnalysisID::getID<AnalysisT>());
-  }
-  bool isPreserved(const AnalysisID *id) const {
-    return preservedIDs.count(id);
-  }
-
-private:
-  /// An identifier used to represent all potential analyses.
-  constexpr static AnalysisID allAnalysesID = {};
-
-  /// The set of analyses that are known to be preserved.
-  SmallPtrSet<const void *, 2> preservedIDs;
-};
-
-namespace analysis_impl {
-/// Trait to check if T provides a static 'isInvalidated' method.
-template <typename T, typename... Args>
-using has_is_invalidated = decltype(std::declval<T &>().isInvalidated(
-    std::declval<const PreservedAnalyses &>()));
-
-/// Implementation of 'isInvalidated' if the analysis provides a definition.
-template <typename AnalysisT>
-std::enable_if_t<is_detected<has_is_invalidated, AnalysisT>::value, bool>
-isInvalidated(AnalysisT &analysis, const PreservedAnalyses &pa) {
-  return analysis.isInvalidated(pa);
-}
-/// Default implementation of 'isInvalidated'.
-template <typename AnalysisT>
-std::enable_if_t<!is_detected<has_is_invalidated, AnalysisT>::value, bool>
-isInvalidated(AnalysisT &analysis, const PreservedAnalyses &pa) {
-  return !pa.isPreserved<AnalysisT>();
-}
-} // end namespace analysis_impl
-
-/// The abstract polymorphic base class representing an analysis.
-struct AnalysisConcept {
-  virtual ~AnalysisConcept() = default;
-
-  /// A hook used to query analyses for invalidation. Given a preserved analysis
-  /// set, returns true if it should truly be invalidated. This allows for more
-  /// fine-tuned invalidation in cases where an analysis wasn't explicitly
-  /// marked preserved, but may be preserved(or invalidated) based upon other
-  /// properties such as analyses sets.
-  virtual bool isInvalidated(const PreservedAnalyses &pa) = 0;
-};
-
-/// A derived analysis model used to hold a specific analysis object.
-template <typename AnalysisT> struct AnalysisModel : public AnalysisConcept {
-  template <typename... Args>
-  explicit AnalysisModel(Args &&... args)
-      : analysis(std::forward<Args>(args)...) {}
-
-  /// A hook used to query analyses for invalidation.
-  bool isInvalidated(const PreservedAnalyses &pa) final {
-    return analysis_impl::isInvalidated(analysis, pa);
-  }
-
-  /// The actual analysis object.
-  AnalysisT analysis;
-};
-
-/// This class represents a cache of analyses for a single operation. All
-/// computation, caching, and invalidation of analyses takes place here.
-class AnalysisMap {
-  /// A mapping between an analysis id and an existing analysis instance.
-  using ConceptMap =
-      llvm::DenseMap<const AnalysisID *, std::unique_ptr<AnalysisConcept>>;
-
-  /// Utility to return the name of the given analysis class.
-  template <typename AnalysisT> static llvm::StringRef getAnalysisName() {
-    StringRef name = llvm::getTypeName<AnalysisT>();
-    if (!name.consume_front("mlir::"))
-      name.consume_front("(anonymous namespace)::");
-    return name;
-  }
-
-public:
-  explicit AnalysisMap(Operation *ir) : ir(ir) {}
-
-  /// Get an analysis for the current IR unit, computing it if necessary.
-  template <typename AnalysisT> AnalysisT &getAnalysis(PassInstrumentor *pi) {
-    auto *id = AnalysisID::getID<AnalysisT>();
-
-    typename ConceptMap::iterator it;
-    bool wasInserted;
-    std::tie(it, wasInserted) = analyses.try_emplace(id);
-
-    // If we don't have a cached analysis for this function, compute it directly
-    // and add it to the cache.
-    if (wasInserted) {
-      if (pi)
-        pi->runBeforeAnalysis(getAnalysisName<AnalysisT>(), id, ir);
-
-      it->second = std::make_unique<AnalysisModel<AnalysisT>>(ir);
-
-      if (pi)
-        pi->runAfterAnalysis(getAnalysisName<AnalysisT>(), id, ir);
-    }
-    return static_cast<AnalysisModel<AnalysisT> &>(*it->second).analysis;
-  }
-
-  /// Get a cached analysis instance if one exists, otherwise return null.
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>> getCachedAnalysis() const {
-    auto res = analyses.find(AnalysisID::getID<AnalysisT>());
-    if (res == analyses.end())
-      return llvm::None;
-    return {static_cast<AnalysisModel<AnalysisT> &>(*res->second).analysis};
-  }
-
-  /// Returns the operation that this analysis map represents.
-  Operation *getOperation() const { return ir; }
-
-  /// Clear any held analyses.
-  void clear() { analyses.clear(); }
-
-  /// Invalidate any cached analyses based upon the given set of preserved
-  /// analyses.
-  void invalidate(const PreservedAnalyses &pa) {
-    // Remove any analyses that were invalidated.
-    for (auto it = analyses.begin(), e = analyses.end(); it != e;) {
-      auto curIt = it++;
-      if (curIt->second->isInvalidated(pa))
-        analyses.erase(curIt);
-    }
-  }
-
-private:
-  Operation *ir;
-  ConceptMap analyses;
-};
-
-/// An analysis map that contains a map for the current operation, and a set of
-/// maps for any child operations.
-struct NestedAnalysisMap {
-  NestedAnalysisMap(Operation *op) : analyses(op) {}
-
-  /// Get the operation for this analysis map.
-  Operation *getOperation() const { return analyses.getOperation(); }
-
-  /// Invalidate any non preserved analyses.
-  void invalidate(const PreservedAnalyses &pa);
-
-  /// The cached analyses for nested operations.
-  llvm::DenseMap<Operation *, std::unique_ptr<NestedAnalysisMap>> childAnalyses;
-
-  /// The analyses for the owning module.
-  detail::AnalysisMap analyses;
-};
-} // namespace detail
-
-//===----------------------------------------------------------------------===//
-// Analysis Management
-//===----------------------------------------------------------------------===//
-class ModuleAnalysisManager;
-
-/// This class represents an analysis manager for a particular operation
-/// instance. It is used to manage and cache analyses on the operation as well
-/// as those for child operations, via nested AnalysisManager instances
-/// accessible via 'slice'. This class is intended to be passed around by value,
-/// and cannot be constructed directly.
-class AnalysisManager {
-  using ParentPointerT = llvm::PointerUnion<const ModuleAnalysisManager *,
-                                            const AnalysisManager *>;
-
-public:
-  using PreservedAnalyses = detail::PreservedAnalyses;
-
-  // Query for a cached analysis on the given parent operation. The analysis may
-  // not exist and if it does it may be out-of-date.
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>>
-  getCachedParentAnalysis(Operation *parentOp) const {
-    ParentPointerT curParent = parent;
-    while (auto *parentAM = curParent.dyn_cast<const AnalysisManager *>()) {
-      if (parentAM->impl->getOperation() == parentOp)
-        return parentAM->getCachedAnalysis<AnalysisT>();
-      curParent = parentAM->parent;
-    }
-    return None;
-  }
-
-  // Query for the given analysis for the current operation.
-  template <typename AnalysisT> AnalysisT &getAnalysis() {
-    return impl->analyses.getAnalysis<AnalysisT>(getPassInstrumentor());
-  }
-
-  // Query for a cached entry of the given analysis on the current operation.
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>> getCachedAnalysis() const {
-    return impl->analyses.getCachedAnalysis<AnalysisT>();
-  }
-
-  /// Query for a analysis of a child operation, constructing it if necessary.
-  template <typename AnalysisT> AnalysisT &getChildAnalysis(Operation *op) {
-    return slice(op).template getAnalysis<AnalysisT>();
-  }
-
-  /// Query for a cached analysis of a child operation, or return null.
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>>
-  getCachedChildAnalysis(Operation *op) const {
-    assert(op->getParentOp() == impl->getOperation());
-    auto it = impl->childAnalyses.find(op);
-    if (it == impl->childAnalyses.end())
-      return llvm::None;
-    return it->second->analyses.getCachedAnalysis<AnalysisT>();
-  }
-
-  /// Get an analysis manager for the given child operation.
-  AnalysisManager slice(Operation *op);
-
-  /// Invalidate any non preserved analyses,
-  void invalidate(const PreservedAnalyses &pa) { impl->invalidate(pa); }
-
-  /// Clear any held analyses.
-  void clear() {
-    impl->analyses.clear();
-    impl->childAnalyses.clear();
-  }
-
-  /// Returns a pass instrumentation object for the current operation. This
-  /// value may be null.
-  PassInstrumentor *getPassInstrumentor() const;
-
-private:
-  AnalysisManager(const AnalysisManager *parent,
-                  detail::NestedAnalysisMap *impl)
-      : parent(parent), impl(impl) {}
-  AnalysisManager(const ModuleAnalysisManager *parent,
-                  detail::NestedAnalysisMap *impl)
-      : parent(parent), impl(impl) {}
-
-  /// A reference to the parent analysis manager, or the top-level module
-  /// analysis manager.
-  llvm::PointerUnion<const ModuleAnalysisManager *, const AnalysisManager *>
-      parent;
-
-  /// A reference to the impl analysis map within the parent analysis manager.
-  detail::NestedAnalysisMap *impl;
-
-  /// Allow access to the constructor.
-  friend class ModuleAnalysisManager;
-};
-
-/// An analysis manager class specifically for the top-level module operation.
-/// This class contains the memory allocations for all nested analysis managers,
-/// and provides an anchor point. This is necessary because AnalysisManager is
-/// designed to be a thin wrapper around an existing analysis map instance.
-class ModuleAnalysisManager {
-public:
-  ModuleAnalysisManager(ModuleOp module, PassInstrumentor *passInstrumentor)
-      : analyses(module), passInstrumentor(passInstrumentor) {}
-  ModuleAnalysisManager(const ModuleAnalysisManager &) = delete;
-  ModuleAnalysisManager &operator=(const ModuleAnalysisManager &) = delete;
-
-  /// Returns a pass instrumentation object for the current module. This value
-  /// may be null.
-  PassInstrumentor *getPassInstrumentor() const { return passInstrumentor; }
-
-  /// Returns an analysis manager for the current top-level module.
-  operator AnalysisManager() { return AnalysisManager(this, &analyses); }
-
-private:
-  /// The analyses for the owning module.
-  detail::NestedAnalysisMap analyses;
-
-  /// An optional instrumentation object.
-  PassInstrumentor *passInstrumentor;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_PASS_ANALYSISMANAGER_H
diff --git a/third_party/mlir/include/mlir/Pass/Pass.h b/third_party/mlir/include/mlir/Pass/Pass.h
deleted file mode 100644
index 274ae9d12e3..00000000000
--- a/third_party/mlir/include/mlir/Pass/Pass.h
+++ /dev/null
@@ -1,330 +0,0 @@
-//===- Pass.h - Base classes for compiler passes ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_PASS_PASS_H
-#define MLIR_PASS_PASS_H
-
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/AnalysisManager.h"
-#include "mlir/Pass/PassRegistry.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/Statistic.h"
-
-namespace mlir {
-namespace detail {
-/// The state for a single execution of a pass. This provides a unified
-/// interface for accessing and initializing necessary state for pass execution.
-struct PassExecutionState {
-  PassExecutionState(Operation *ir, AnalysisManager analysisManager)
-      : irAndPassFailed(ir, false), analysisManager(analysisManager) {}
-
-  /// The current operation being transformed and a bool for if the pass
-  /// signaled a failure.
-  llvm::PointerIntPair<Operation *, 1, bool> irAndPassFailed;
-
-  /// The analysis manager for the operation.
-  AnalysisManager analysisManager;
-
-  /// The set of preserved analyses for the current execution.
-  detail::PreservedAnalyses preservedAnalyses;
-};
-} // namespace detail
-
-/// The abstract base pass class. This class contains information describing the
-/// derived pass object, e.g its kind and abstract PassInfo.
-class Pass {
-public:
-  virtual ~Pass() = default;
-
-  /// Returns the unique identifier that corresponds to this pass.
-  const PassID *getPassID() const { return passID; }
-
-  /// Returns the pass info for the specified pass class or null if unknown.
-  static const PassInfo *lookupPassInfo(const PassID *passID);
-  template <typename PassT> static const PassInfo *lookupPassInfo() {
-    return lookupPassInfo(PassID::getID<PassT>());
-  }
-
-  /// Returns the pass info for this pass.
-  const PassInfo *lookupPassInfo() const { return lookupPassInfo(getPassID()); }
-
-  /// Returns the derived pass name.
-  virtual StringRef getName() = 0;
-
-  /// Returns the name of the operation that this pass operates on, or None if
-  /// this is a generic OperationPass.
-  llvm::Optional<StringRef> getOpName() const { return opName; }
-
-  /// Prints out the pass in the textual representation of pipelines. If this is
-  /// an adaptor pass, print with the op_name(sub_pass,...) format.
-  /// Note: The default implementation uses the class name and does not respect
-  /// options used to construct the pass. Override this method to allow for your
-  /// pass to be to be round-trippable to the textual format.
-  virtual void printAsTextualPipeline(raw_ostream &os);
-
-  /// This class represents a single pass statistic. This statistic functions
-  /// similarly to an unsigned integer value, and may be updated and incremented
-  /// accordingly. This class can be used to provide additional information
-  /// about the transformations and analyses performed by a pass.
-  class Statistic : public llvm::Statistic {
-  public:
-    /// The statistic is initialized by the pass owner, a name, and a
-    /// description.
-    Statistic(Pass *owner, const char *name, const char *description);
-
-    /// Assign the statistic to the given value.
-    Statistic &operator=(unsigned value);
-
-  private:
-    /// Hide some of the details of llvm::Statistic that we don't use.
-    using llvm::Statistic::getDebugType;
-  };
-
-  /// Returns the main statistics for this pass instance.
-  ArrayRef<Statistic *> getStatistics() const { return statistics; }
-  MutableArrayRef<Statistic *> getStatistics() { return statistics; }
-
-protected:
-  explicit Pass(const PassID *passID,
-                llvm::Optional<StringRef> opName = llvm::None)
-      : passID(passID), opName(opName) {}
-
-  /// Returns the current pass state.
-  detail::PassExecutionState &getPassState() {
-    assert(passState && "pass state was never initialized");
-    return *passState;
-  }
-
-  /// Return the MLIR context for the current function being transformed.
-  MLIRContext &getContext() { return *getOperation()->getContext(); }
-
-  /// The polymorphic API that runs the pass over the currently held operation.
-  virtual void runOnOperation() = 0;
-
-  /// A clone method to create a copy of this pass.
-  virtual std::unique_ptr<Pass> clone() const = 0;
-
-  /// Return the current operation being transformed.
-  Operation *getOperation() {
-    return getPassState().irAndPassFailed.getPointer();
-  }
-
-  /// Returns the current analysis manager.
-  AnalysisManager getAnalysisManager() {
-    return getPassState().analysisManager;
-  }
-
-private:
-  /// Forwarding function to execute this pass on the given operation.
-  LLVM_NODISCARD
-  LogicalResult run(Operation *op, AnalysisManager am);
-
-  /// Out of line virtual method to ensure vtables and metadata are emitted to a
-  /// single .o file.
-  virtual void anchor();
-
-  /// Represents a unique identifier for the pass.
-  const PassID *passID;
-
-  /// The name of the operation that this pass operates on, or None if this is a
-  /// generic OperationPass.
-  llvm::Optional<StringRef> opName;
-
-  /// The current execution state for the pass.
-  llvm::Optional<detail::PassExecutionState> passState;
-
-  /// The set of statistics held by this pass.
-  std::vector<Statistic *> statistics;
-
-  /// Allow access to 'clone' and 'run'.
-  friend class OpPassManager;
-};
-
-//===----------------------------------------------------------------------===//
-// Pass Model Definitions
-//===----------------------------------------------------------------------===//
-namespace detail {
-/// The opaque CRTP model of a pass. This class provides utilities for derived
-/// pass execution and handles all of the necessary polymorphic API.
-template <typename PassT, typename BasePassT>
-class PassModel : public BasePassT {
-public:
-  /// Support isa/dyn_cast functionality for the derived pass class.
-  static bool classof(const Pass *pass) {
-    return pass->getPassID() == PassID::getID<PassT>();
-  }
-
-protected:
-  explicit PassModel(llvm::Optional<StringRef> opName = llvm::None)
-      : BasePassT(PassID::getID<PassT>(), opName) {}
-
-  /// Signal that some invariant was broken when running. The IR is allowed to
-  /// be in an invalid state.
-  void signalPassFailure() {
-    this->getPassState().irAndPassFailed.setInt(true);
-  }
-
-  /// Query an analysis for the current ir unit.
-  template <typename AnalysisT> AnalysisT &getAnalysis() {
-    return this->getAnalysisManager().template getAnalysis<AnalysisT>();
-  }
-
-  /// Query a cached instance of an analysis for the current ir unit if one
-  /// exists.
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>> getCachedAnalysis() {
-    return this->getAnalysisManager().template getCachedAnalysis<AnalysisT>();
-  }
-
-  /// Mark all analyses as preserved.
-  void markAllAnalysesPreserved() {
-    this->getPassState().preservedAnalyses.preserveAll();
-  }
-
-  /// Mark the provided analyses as preserved.
-  template <typename... AnalysesT> void markAnalysesPreserved() {
-    this->getPassState().preservedAnalyses.template preserve<AnalysesT...>();
-  }
-  void markAnalysesPreserved(const AnalysisID *id) {
-    this->getPassState().preservedAnalyses.preserve(id);
-  }
-
-  /// Returns the derived pass name.
-  StringRef getName() override {
-    StringRef name = llvm::getTypeName<PassT>();
-    if (!name.consume_front("mlir::"))
-      name.consume_front("(anonymous namespace)::");
-    return name;
-  }
-
-  /// A clone method to create a copy of this pass.
-  std::unique_ptr<Pass> clone() const override {
-    return std::make_unique<PassT>(*static_cast<const PassT *>(this));
-  }
-
-  /// Returns the analysis for the parent operation if it exists.
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>>
-  getCachedParentAnalysis(Operation *parent) {
-    return this->getAnalysisManager()
-        .template getCachedParentAnalysis<AnalysisT>(parent);
-  }
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>> getCachedParentAnalysis() {
-    return this->getAnalysisManager()
-        .template getCachedParentAnalysis<AnalysisT>(
-            this->getOperation()->getParentOp());
-  }
-
-  /// Returns the analysis for the given child operation if it exists.
-  template <typename AnalysisT>
-  llvm::Optional<std::reference_wrapper<AnalysisT>>
-  getCachedChildAnalysis(Operation *child) {
-    return this->getAnalysisManager()
-        .template getCachedChildAnalysis<AnalysisT>(child);
-  }
-
-  /// Returns the analysis for the given child operation, or creates it if it
-  /// doesn't exist.
-  template <typename AnalysisT> AnalysisT &getChildAnalysis(Operation *child) {
-    return this->getAnalysisManager().template getChildAnalysis<AnalysisT>(
-        child);
-  }
-};
-} // end namespace detail
-
-/// Utility base class for OpPass below to denote an opaque pass operating on a
-/// specific operation type.
-template <typename OpT> class OpPassBase : public Pass {
-public:
-  using Pass::Pass;
-
-  /// Support isa/dyn_cast functionality.
-  static bool classof(const Pass *pass) {
-    return pass->getOpName() == OpT::getOperationName();
-  }
-};
-
-/// Pass to transform an operation of a specific type.
-///
-/// Operation passes must not:
-///   - modify any other operations within the parent region, as other threads
-///     may be manipulating them concurrently.
-///   - modify any state within the parent operation, this includes adding
-///     additional operations.
-///
-/// Derived function passes are expected to provide the following:
-///   - A 'void runOnOperation()' method.
-template <typename PassT, typename OpT = void>
-class OperationPass : public detail::PassModel<PassT, OpPassBase<OpT>> {
-protected:
-  OperationPass()
-      : detail::PassModel<PassT, OpPassBase<OpT>>(OpT::getOperationName()) {}
-
-  /// Return the current operation being transformed.
-  OpT getOperation() { return cast<OpT>(Pass::getOperation()); }
-};
-
-/// Pass to transform an operation.
-///
-/// Operation passes must not:
-///   - modify any other operations within the parent region, as other threads
-///     may be manipulating them concurrently.
-///   - modify any state within the parent operation, this includes adding
-///     additional operations.
-///
-/// Derived function passes are expected to provide the following:
-///   - A 'void runOnOperation()' method.
-template <typename PassT>
-struct OperationPass<PassT, void> : public detail::PassModel<PassT, Pass> {};
-
-/// A model for providing function pass specific utilities.
-///
-/// Derived function passes are expected to provide the following:
-///   - A 'void runOnFunction()' method.
-template <typename T> struct FunctionPass : public OperationPass<T, FuncOp> {
-  /// The polymorphic API that runs the pass over the currently held function.
-  virtual void runOnFunction() = 0;
-
-  /// The polymorphic API that runs the pass over the currently held operation.
-  void runOnOperation() final {
-    if (!getFunction().isExternal())
-      runOnFunction();
-  }
-
-  /// Return the current module being transformed.
-  FuncOp getFunction() { return this->getOperation(); }
-};
-
-/// A model for providing module pass specific utilities.
-///
-/// Derived module passes are expected to provide the following:
-///   - A 'void runOnModule()' method.
-template <typename T> struct ModulePass : public OperationPass<T, ModuleOp> {
-  /// The polymorphic API that runs the pass over the currently held module.
-  virtual void runOnModule() = 0;
-
-  /// The polymorphic API that runs the pass over the currently held operation.
-  void runOnOperation() final { runOnModule(); }
-
-  /// Return the current module being transformed.
-  ModuleOp getModule() { return this->getOperation(); }
-};
-} // end namespace mlir
-
-#endif // MLIR_PASS_PASS_H
diff --git a/third_party/mlir/include/mlir/Pass/PassInstrumentation.h b/third_party/mlir/include/mlir/Pass/PassInstrumentation.h
deleted file mode 100644
index e8018427b5e..00000000000
--- a/third_party/mlir/include/mlir/Pass/PassInstrumentation.h
+++ /dev/null
@@ -1,162 +0,0 @@
-//===- PassInstrumentation.h ------------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_PASS_PASSINSTRUMENTATION_H_
-#define MLIR_PASS_PASSINSTRUMENTATION_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace mlir {
-using AnalysisID = ClassID;
-class Operation;
-class OperationName;
-class Pass;
-
-namespace detail {
-struct PassInstrumentorImpl;
-} // end namespace detail
-
-/// PassInstrumentation provides several entry points into the pass manager
-/// infrastructure. Instrumentations should be added directly to a PassManager
-/// before running a pipeline.
-class PassInstrumentation {
-public:
-  /// This struct represents information related to the parent pass of pipeline.
-  /// It includes information that allows for effectively linking pipelines that
-  /// run on different threads.
-  struct PipelineParentInfo {
-    /// The thread of the parent pass that the current pipeline was spawned
-    /// from. Note: This is acquired from llvm::get_threadid().
-    uint64_t parentThreadID;
-
-    /// The pass that spawned this pipeline.
-    Pass *parentPass;
-  };
-
-  virtual ~PassInstrumentation() = 0;
-
-  /// A callback to run before a pass pipeline is executed. This function takes
-  /// the name of the operation type being operated on, and information related
-  /// to the parent that spawned this pipeline.
-  virtual void runBeforePipeline(const OperationName &name,
-                                 const PipelineParentInfo &parentInfo) {}
-
-  /// A callback to run after a pass pipeline has executed. This function takes
-  /// the name of the operation type being operated on, and information related
-  /// to the parent that spawned this pipeline.
-  virtual void runAfterPipeline(const OperationName &name,
-                                const PipelineParentInfo &parentInfo) {}
-
-  /// A callback to run before a pass is executed. This function takes a pointer
-  /// to the pass to be executed, as well as the current operation being
-  /// operated on.
-  virtual void runBeforePass(Pass *pass, Operation *op) {}
-
-  /// A callback to run after a pass is successfully executed. This function
-  /// takes a pointer to the pass to be executed, as well as the current
-  /// operation being operated on.
-  virtual void runAfterPass(Pass *pass, Operation *op) {}
-
-  /// A callback to run when a pass execution fails. This function takes a
-  /// pointer to the pass that was being executed, as well as the current
-  /// operation being operated on. Note that the operation may be in an invalid
-  /// state.
-  virtual void runAfterPassFailed(Pass *pass, Operation *op) {}
-
-  /// A callback to run before an analysis is computed. This function takes the
-  /// name of the analysis to be computed, its AnalysisID, as well as the
-  /// current operation being analyzed.
-  virtual void runBeforeAnalysis(llvm::StringRef name, AnalysisID *id,
-                                 Operation *op) {}
-
-  /// A callback to run before an analysis is computed. This function takes the
-  /// name of the analysis that was computed, its AnalysisID, as well as the
-  /// current operation being analyzed.
-  virtual void runAfterAnalysis(llvm::StringRef name, AnalysisID *id,
-                                Operation *op) {}
-};
-
-/// This class holds a collection of PassInstrumentation objects, and invokes
-/// their respective call backs.
-class PassInstrumentor {
-public:
-  PassInstrumentor();
-  PassInstrumentor(PassInstrumentor &&) = delete;
-  PassInstrumentor(const PassInstrumentor &) = delete;
-  ~PassInstrumentor();
-
-  /// See PassInstrumentation::runBeforePipeline for details.
-  void
-  runBeforePipeline(const OperationName &name,
-                    const PassInstrumentation::PipelineParentInfo &parentInfo);
-
-  /// See PassInstrumentation::runAfterPipeline for details.
-  void
-  runAfterPipeline(const OperationName &name,
-                   const PassInstrumentation::PipelineParentInfo &parentInfo);
-
-  /// See PassInstrumentation::runBeforePass for details.
-  void runBeforePass(Pass *pass, Operation *op);
-
-  /// See PassInstrumentation::runAfterPass for details.
-  void runAfterPass(Pass *pass, Operation *op);
-
-  /// See PassInstrumentation::runAfterPassFailed for details.
-  void runAfterPassFailed(Pass *pass, Operation *op);
-
-  /// See PassInstrumentation::runBeforeAnalysis for details.
-  void runBeforeAnalysis(llvm::StringRef name, AnalysisID *id, Operation *op);
-
-  /// See PassInstrumentation::runAfterAnalysis for details.
-  void runAfterAnalysis(llvm::StringRef name, AnalysisID *id, Operation *op);
-
-  /// Add the given instrumentation to the collection.
-  void addInstrumentation(std::unique_ptr<PassInstrumentation> pi);
-
-private:
-  std::unique_ptr<detail::PassInstrumentorImpl> impl;
-};
-
-} // end namespace mlir
-
-namespace llvm {
-template <> struct DenseMapInfo<mlir::PassInstrumentation::PipelineParentInfo> {
-  using T = mlir::PassInstrumentation::PipelineParentInfo;
-  using PairInfo = DenseMapInfo<std::pair<uint64_t, void *>>;
-
-  static T getEmptyKey() {
-    auto pair = PairInfo::getEmptyKey();
-    return {pair.first, reinterpret_cast<mlir::Pass *>(pair.second)};
-  }
-  static T getTombstoneKey() {
-    auto pair = PairInfo::getTombstoneKey();
-    return {pair.first, reinterpret_cast<mlir::Pass *>(pair.second)};
-  }
-  static unsigned getHashValue(T val) {
-    return PairInfo::getHashValue({val.parentThreadID, val.parentPass});
-  }
-  static bool isEqual(T lhs, T rhs) {
-    return lhs.parentThreadID == rhs.parentThreadID &&
-           lhs.parentPass == rhs.parentPass;
-  }
-};
-} // end namespace llvm
-
-#endif // MLIR_PASS_PASSINSTRUMENTATION_H_
diff --git a/third_party/mlir/include/mlir/Pass/PassManager.h b/third_party/mlir/include/mlir/Pass/PassManager.h
deleted file mode 100644
index d7c73d889ae..00000000000
--- a/third_party/mlir/include/mlir/Pass/PassManager.h
+++ /dev/null
@@ -1,275 +0,0 @@
-//===- PassManager.h - Pass Management Interface ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_PASS_PASSMANAGER_H
-#define MLIR_PASS_PASSMANAGER_H
-
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator.h"
-
-#include <vector>
-
-namespace llvm {
-class Any;
-} // end namespace llvm
-
-namespace mlir {
-class AnalysisManager;
-class MLIRContext;
-class ModuleOp;
-class OperationName;
-class Operation;
-class Pass;
-class PassInstrumentation;
-class PassInstrumentor;
-
-namespace detail {
-struct OpPassManagerImpl;
-} // end namespace detail
-
-//===----------------------------------------------------------------------===//
-// OpPassManager
-//===----------------------------------------------------------------------===//
-
-/// This class represents a pass manager that runs passes on a specific
-/// operation type. This class is not constructed directly, but nested within
-/// other OpPassManagers or the top-level PassManager.
-class OpPassManager {
-public:
-  OpPassManager(OpPassManager &&rhs);
-  OpPassManager(const OpPassManager &rhs);
-  ~OpPassManager();
-  OpPassManager &operator=(const OpPassManager &rhs);
-
-  /// Iterator over the passes in this pass manager.
-  using pass_iterator =
-      llvm::pointee_iterator<std::vector<std::unique_ptr<Pass>>::iterator>;
-  pass_iterator begin();
-  pass_iterator end();
-  llvm::iterator_range<pass_iterator> getPasses() { return {begin(), end()}; }
-
-  /// Run the held passes over the given operation.
-  LogicalResult run(Operation *op, AnalysisManager am);
-
-  /// Nest a new operation pass manager for the given operation kind under this
-  /// pass manager.
-  OpPassManager &nest(const OperationName &nestedName);
-  OpPassManager &nest(StringRef nestedName);
-  template <typename OpT> OpPassManager &nest() {
-    return nest(OpT::getOperationName());
-  }
-
-  /// Add the given pass to this pass manager. If this pass has a concrete
-  /// operation type, it must be the same type as this pass manager.
-  void addPass(std::unique_ptr<Pass> pass);
-
-  /// Add the given pass to a nested pass manager for the given operation kind
-  /// `OpT`.
-  template <typename OpT> void addNestedPass(std::unique_ptr<Pass> pass) {
-    nest<OpT>().addPass(std::move(pass));
-  }
-
-  /// Returns the number of passes held by this manager.
-  size_t size() const;
-
-  /// Return an instance of the context.
-  MLIRContext *getContext() const;
-
-  /// Return the operation name that this pass manager operates on.
-  const OperationName &getOpName() const;
-
-  /// Returns the internal implementation instance.
-  detail::OpPassManagerImpl &getImpl();
-
-  /// Prints out the passes of the pass manager as the textual representation
-  /// of pipelines.
-  /// Note: The quality of the string representation depends entirely on the
-  /// the correctness of per-pass overrides of Pass::printAsTextualPipeline.
-  void printAsTextualPipeline(raw_ostream &os);
-
-  /// Merge the pass statistics of this class into 'other'.
-  void mergeStatisticsInto(OpPassManager &other);
-
-private:
-  OpPassManager(OperationName name, bool disableThreads, bool verifyPasses);
-
-  /// A pointer to an internal implementation instance.
-  std::unique_ptr<detail::OpPassManagerImpl> impl;
-
-  /// Allow access to the constructor.
-  friend class PassManager;
-};
-
-//===----------------------------------------------------------------------===//
-// PassManager
-//===----------------------------------------------------------------------===//
-
-/// An enum describing the different display modes for the information within
-/// the pass manager.
-enum class PassDisplayMode {
-  // In this mode the results are displayed in a list sorted by total,
-  // with each pass/analysis instance aggregated into one unique result.
-  List,
-
-  // In this mode the results are displayed in a nested pipeline view that
-  // mirrors the internal pass pipeline that is being executed in the pass
-  // manager.
-  Pipeline,
-};
-
-/// The main pass manager and pipeline builder.
-class PassManager : public OpPassManager {
-public:
-  // If verifyPasses is true, the verifier is run after each pass.
-  PassManager(MLIRContext *ctx, bool verifyPasses = true);
-  ~PassManager();
-
-  /// Run the passes within this manager on the provided module.
-  LLVM_NODISCARD
-  LogicalResult run(ModuleOp module);
-
-  /// Disable support for multi-threading within the pass manager.
-  void disableMultithreading(bool disable = true);
-
-  /// Enable support for the pass manager to generate a reproducer on the event
-  /// of a crash or a pass failure. `outputFile` is a .mlir filename used to
-  /// write the generated reproducer.
-  void enableCrashReproducerGeneration(StringRef outputFile);
-
-  //===--------------------------------------------------------------------===//
-  // Instrumentations
-  //===--------------------------------------------------------------------===//
-
-  /// Add the provided instrumentation to the pass manager.
-  void addInstrumentation(std::unique_ptr<PassInstrumentation> pi);
-
-  //===--------------------------------------------------------------------===//
-  // IR Printing
-
-  /// A configuration struct provided to the IR printer instrumentation.
-  class IRPrinterConfig {
-  public:
-    using PrintCallbackFn = function_ref<void(raw_ostream &)>;
-
-    /// Initialize the configuration.
-    /// * 'printModuleScope' signals if the top-level module IR should always be
-    ///   printed. This should only be set to true when multi-threading is
-    ///   disabled, otherwise we may try to print IR that is being modified
-    ///   asynchronously.
-    /// * 'printAfterOnlyOnChange' signals that when printing the IR after a
-    ///   pass, in the case of a non-failure, we should first check if any
-    ///   potential mutations were made. This allows for reducing the number of
-    ///   logs that don't contain meaningful changes.
-    explicit IRPrinterConfig(bool printModuleScope = false,
-                             bool printAfterOnlyOnChange = false);
-    virtual ~IRPrinterConfig();
-
-    /// A hook that may be overridden by a derived config that checks if the IR
-    /// of 'operation' should be dumped *before* the pass 'pass' has been
-    /// executed. If the IR should be dumped, 'printCallback' should be invoked
-    /// with the stream to dump into.
-    virtual void printBeforeIfEnabled(Pass *pass, Operation *operation,
-                                      PrintCallbackFn printCallback);
-
-    /// A hook that may be overridden by a derived config that checks if the IR
-    /// of 'operation' should be dumped *after* the pass 'pass' has been
-    /// executed. If the IR should be dumped, 'printCallback' should be invoked
-    /// with the stream to dump into.
-    virtual void printAfterIfEnabled(Pass *pass, Operation *operation,
-                                     PrintCallbackFn printCallback);
-
-    /// Returns true if the IR should always be printed at the top-level scope.
-    bool shouldPrintAtModuleScope() const { return printModuleScope; }
-
-    /// Returns true if the IR should only printed after a pass if the IR
-    /// "changed".
-    bool shouldPrintAfterOnlyOnChange() const { return printAfterOnlyOnChange; }
-
-  private:
-    /// A flag that indicates if the IR should be printed at module scope.
-    bool printModuleScope;
-
-    /// A flag that indicates that the IR after a pass should only be printed if
-    /// a change is detected.
-    bool printAfterOnlyOnChange;
-  };
-
-  /// Add an instrumentation to print the IR before and after pass execution,
-  /// using the provided configuration.
-  void enableIRPrinting(std::unique_ptr<IRPrinterConfig> config);
-
-  /// Add an instrumentation to print the IR before and after pass execution,
-  /// using the provided fields to generate a default configuration:
-  /// * 'shouldPrintBeforePass' and 'shouldPrintAfterPass' correspond to filter
-  ///   functions that take a 'Pass *' and `Operation *`. These function should
-  ///   return true if the IR should be printed or not.
-  /// * 'printModuleScope' signals if the module IR should be printed, even
-  ///   for non module passes.
-  /// * 'printAfterOnlyOnChange' signals that when printing the IR after a
-  ///   pass, in the case of a non-failure, we should first check if any
-  ///   potential mutations were made.
-  /// * 'out' corresponds to the stream to output the printed IR to.
-  void enableIRPrinting(
-      std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
-      std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
-      bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out);
-
-  //===--------------------------------------------------------------------===//
-  // Pass Timing
-
-  /// Add an instrumentation to time the execution of passes and the computation
-  /// of analyses.
-  /// Note: Timing should be enabled after all other instrumentations to avoid
-  /// any potential "ghost" timing from other instrumentations being
-  /// unintentionally included in the timing results.
-  void enableTiming(PassDisplayMode displayMode = PassDisplayMode::Pipeline);
-
-  /// Prompts the pass manager to print the statistics collected for each of the
-  /// held passes after each call to 'run'.
-  void
-  enableStatistics(PassDisplayMode displayMode = PassDisplayMode::Pipeline);
-
-private:
-  /// Dump the statistics of the passes within this pass manager.
-  void dumpStatistics();
-
-  /// Flag that specifies if pass timing is enabled.
-  bool passTiming : 1;
-
-  /// Flag that specifies if pass statistics should be dumped.
-  Optional<PassDisplayMode> passStatisticsMode;
-
-  /// A manager for pass instrumentations.
-  std::unique_ptr<PassInstrumentor> instrumentor;
-
-  /// An optional filename to use when generating a crash reproducer if valid.
-  Optional<std::string> crashReproducerFileName;
-};
-
-/// Register a set of useful command-line options that can be used to configure
-/// a pass manager. The values of these options can be applied via the
-/// 'applyPassManagerCLOptions' method below.
-void registerPassManagerCLOptions();
-
-/// Apply any values provided to the pass manager options that were registered
-/// with 'registerPassManagerOptions'.
-void applyPassManagerCLOptions(PassManager &pm);
-} // end namespace mlir
-
-#endif // MLIR_PASS_PASSMANAGER_H
diff --git a/third_party/mlir/include/mlir/Pass/PassRegistry.h b/third_party/mlir/include/mlir/Pass/PassRegistry.h
deleted file mode 100644
index 356b13ec2f9..00000000000
--- a/third_party/mlir/include/mlir/Pass/PassRegistry.h
+++ /dev/null
@@ -1,342 +0,0 @@
-//===- PassRegistry.h - Pass Registration Utilities -------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains utilities for registering information about compiler
-// passes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_PASS_PASSREGISTRY_H_
-#define MLIR_PASS_PASSREGISTRY_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include <functional>
-#include <memory>
-
-namespace mlir {
-class OpPassManager;
-class Pass;
-
-/// A registry function that adds passes to the given pass manager. This should
-/// also parse options and return success() if parsing succeeded.
-using PassRegistryFunction =
-    std::function<LogicalResult(OpPassManager &, StringRef options)>;
-
-/// A special type used by transformation passes to provide an address that can
-/// act as a unique identifier during pass registration.
-using PassID = ClassID;
-
-//===----------------------------------------------------------------------===//
-// PassRegistry
-//===----------------------------------------------------------------------===//
-
-/// Structure to group information about a passes and pass pipelines (argument
-/// to invoke via mlir-opt, description, pass pipeline builder).
-class PassRegistryEntry {
-public:
-  /// Adds this pass registry entry to the given pass manager. `options` is
-  /// an opaque string that will be parsed by the builder. The success of
-  /// parsing will be returned.
-  LogicalResult addToPipeline(OpPassManager &pm, StringRef options) const {
-    assert(builder &&
-           "cannot call addToPipeline on PassRegistryEntry without builder");
-    return builder(pm, options);
-  }
-
-  /// Returns the command line option that may be passed to 'mlir-opt' that will
-  /// cause this pass to run or null if there is no such argument.
-  StringRef getPassArgument() const { return arg; }
-
-  /// Returns a description for the pass, this never returns null.
-  StringRef getPassDescription() const { return description; }
-
-protected:
-  PassRegistryEntry(StringRef arg, StringRef description,
-                    PassRegistryFunction builder)
-      : arg(arg), description(description), builder(builder) {}
-
-private:
-  // The argument with which to invoke the pass via mlir-opt.
-  StringRef arg;
-
-  // Description of the pass.
-  StringRef description;
-
-  // Function to register this entry to a pass manager pipeline.
-  PassRegistryFunction builder;
-};
-
-/// A structure to represent the information of a registered pass pipeline.
-class PassPipelineInfo : public PassRegistryEntry {
-public:
-  PassPipelineInfo(StringRef arg, StringRef description,
-                   PassRegistryFunction builder)
-      : PassRegistryEntry(arg, description, builder) {}
-};
-
-/// A structure to represent the information for a derived pass class.
-class PassInfo : public PassRegistryEntry {
-public:
-  /// PassInfo constructor should not be invoked directly, instead use
-  /// PassRegistration or registerPass.
-  PassInfo(StringRef arg, StringRef description, const PassID *passID,
-           PassRegistryFunction allocator)
-      : PassRegistryEntry(arg, description, allocator) {}
-};
-
-//===----------------------------------------------------------------------===//
-// PassRegistration
-//===----------------------------------------------------------------------===//
-
-/// Register a specific dialect pipeline registry function with the system,
-/// typically used through the PassPipelineRegistration template.
-void registerPassPipeline(StringRef arg, StringRef description,
-                          const PassRegistryFunction &function);
-
-/// Register a specific dialect pass allocator function with the system,
-/// typically used through the PassRegistration template.
-void registerPass(StringRef arg, StringRef description, const PassID *passID,
-                  const PassRegistryFunction &function);
-
-namespace detail {
-/// Base class for PassOptions<T> that holds all of the non-CRTP features.
-class PassOptionsBase : protected llvm::cl::SubCommand {
-public:
-  /// This class represents a specific pass option, with a provided data type.
-  template <typename DataType> struct Option : public llvm::cl::opt<DataType> {
-    template <typename... Args>
-    Option(PassOptionsBase &parent, StringRef arg, Args &&... args)
-        : llvm::cl::opt<DataType>(arg, llvm::cl::sub(parent),
-                                  std::forward<Args>(args)...) {
-      assert(!this->isPositional() && !this->isSink() &&
-             "sink and positional options are not supported");
-    }
-  };
-
-  /// This class represents a specific pass option that contains a list of
-  /// values of the provided data type.
-  template <typename DataType> struct List : public llvm::cl::list<DataType> {
-    template <typename... Args>
-    List(PassOptionsBase &parent, StringRef arg, Args &&... args)
-        : llvm::cl::list<DataType>(arg, llvm::cl::sub(parent),
-                                   std::forward<Args>(args)...) {
-      assert(!this->isPositional() && !this->isSink() &&
-             "sink and positional options are not supported");
-    }
-  };
-
-  /// Parse options out as key=value pairs that can then be handed off to the
-  /// `llvm::cl` command line passing infrastructure. Everything is space
-  /// separated.
-  LogicalResult parseFromString(StringRef options);
-};
-} // end namespace detail
-
-/// Subclasses of PassOptions provide a set of options that can be used to
-/// initialize a pass instance. See PassRegistration for usage details.
-///
-/// Usage:
-///
-/// struct MyPassOptions : PassOptions<MyPassOptions> {
-///   List<int> someListFlag{
-///        *this, "flag-name", llvm::cl::MiscFlags::CommaSeparated,
-///        llvm::cl::desc("...")};
-/// };
-template <typename T> class PassOptions : public detail::PassOptionsBase {
-public:
-  /// Factory that parses the provided options and returns a unique_ptr to the
-  /// struct.
-  static std::unique_ptr<T> createFromString(StringRef options) {
-    auto result = std::make_unique<T>();
-    if (failed(result->parseFromString(options)))
-      return nullptr;
-    return result;
-  }
-};
-
-/// A default empty option struct to be used for passes that do not need to take
-/// any options.
-struct EmptyPassOptions : public PassOptions<EmptyPassOptions> {};
-
-namespace detail {
-
-// Calls `pm.addPass(std::move(pass))` to avoid including the PassManager
-// header. Only used in `makePassRegistryFunction`.
-void addPassToPassManager(OpPassManager &pm, std::unique_ptr<Pass> pass);
-
-// Helper function which constructs a PassRegistryFunction that parses options
-// into a struct of type `Options` and then calls constructor(options) to
-// build the pass.
-template <typename Options, typename PassConstructor>
-PassRegistryFunction makePassRegistryFunction(PassConstructor constructor) {
-  return [=](OpPassManager &pm, StringRef optionsStr) {
-    Options options;
-    if (failed(options.parseFromString(optionsStr)))
-      return failure();
-    addPassToPassManager(pm, constructor(options));
-    return success();
-  };
-}
-
-} // end namespace detail
-
-/// PassRegistration provides a global initializer that registers a Pass
-/// allocation routine for a concrete pass instance.  The third argument is
-/// optional and provides a callback to construct a pass that does not have
-/// a default constructor.
-///
-/// Usage:
-///
-///   // At namespace scope.
-///   static PassRegistration<MyPass> reg("my-pass", "My Pass Description.");
-///
-///   // Same, but also providing an Options struct.
-///   static PassRegistration<MyPass, MyPassOptions> reg("my-pass", "Docs...");
-template <typename ConcretePass, typename Options = EmptyPassOptions>
-struct PassRegistration {
-
-  PassRegistration(StringRef arg, StringRef description,
-                   const std::function<std::unique_ptr<Pass>(const Options &)>
-                       &constructor) {
-    registerPass(arg, description, PassID::getID<ConcretePass>(),
-                 detail::makePassRegistryFunction<Options>(constructor));
-  }
-
-  PassRegistration(StringRef arg, StringRef description) {
-    registerPass(
-        arg, description, PassID::getID<ConcretePass>(),
-        detail::makePassRegistryFunction<Options>([](const Options &options) {
-          return std::make_unique<ConcretePass>(options);
-        }));
-  }
-};
-
-/// Convenience specialization of PassRegistration for EmptyPassOptions that
-/// does not pass an empty options struct to the pass constructor.
-template <typename ConcretePass>
-struct PassRegistration<ConcretePass, EmptyPassOptions> {
-  PassRegistration(StringRef arg, StringRef description,
-                   const std::function<std::unique_ptr<Pass>()> &constructor) {
-    registerPass(
-        arg, description, PassID::getID<ConcretePass>(),
-        detail::makePassRegistryFunction<EmptyPassOptions>(
-            [=](const EmptyPassOptions &options) { return constructor(); }));
-  }
-
-  PassRegistration(StringRef arg, StringRef description) {
-    registerPass(arg, description, PassID::getID<ConcretePass>(),
-                 detail::makePassRegistryFunction<EmptyPassOptions>(
-                     [](const EmptyPassOptions &options) {
-                       return std::make_unique<ConcretePass>();
-                     }));
-  }
-};
-
-/// PassPipelineRegistration provides a global initializer that registers a Pass
-/// pipeline builder routine.
-///
-/// Usage:
-///
-///   // At namespace scope.
-///   void pipelineBuilder(OpPassManager &pm) {
-///      pm.addPass(new MyPass());
-///      pm.addPass(new MyOtherPass());
-///   }
-///
-///   static PassPipelineRegistration Unused("unused", "Unused pass",
-///                                          pipelineBuilder);
-template <typename Options = EmptyPassOptions> struct PassPipelineRegistration {
-  PassPipelineRegistration(
-      StringRef arg, StringRef description,
-      std::function<void(OpPassManager &, const Options &options)> builder) {
-    registerPassPipeline(arg, description,
-                         [builder](OpPassManager &pm, StringRef optionsStr) {
-                           Options options;
-                           if (failed(options.parseFromString(optionsStr)))
-                             return failure();
-                           builder(pm, options);
-                           return success();
-                         });
-  }
-};
-
-/// Convenience specialization of PassPipelineRegistration for EmptyPassOptions
-/// that does not pass an empty options struct to the pass builder function.
-template <> struct PassPipelineRegistration<EmptyPassOptions> {
-  PassPipelineRegistration(StringRef arg, StringRef description,
-                           std::function<void(OpPassManager &)> builder) {
-    registerPassPipeline(arg, description,
-                         [builder](OpPassManager &pm, StringRef optionsStr) {
-                           if (!optionsStr.empty())
-                             return failure();
-                           builder(pm);
-                           return success();
-                         });
-  }
-};
-
-/// This function parses the textual representation of a pass pipeline, and adds
-/// the result to 'pm' on success. This function returns failure if the given
-/// pipeline was invalid. 'errorStream' is the output stream used to emit errors
-/// found during parsing.
-LogicalResult parsePassPipeline(StringRef pipeline, OpPassManager &pm,
-                                raw_ostream &errorStream = llvm::errs());
-
-//===----------------------------------------------------------------------===//
-// PassPipelineCLParser
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-struct PassPipelineCLParserImpl;
-} // end namespace detail
-
-/// This class implements a command-line parser for MLIR passes. It registers a
-/// cl option with a given argument and description. This parser will register
-/// options for each of the passes and pipelines that have been registered with
-/// the pass registry; Meaning that `-cse` will refer to the CSE pass in MLIR.
-/// It also registers an argument, `pass-pipeline`, that supports parsing a
-/// textual description of a pipeline.
-class PassPipelineCLParser {
-public:
-  /// Construct a pass pipeline parser with the given command line description.
-  PassPipelineCLParser(StringRef arg, StringRef description);
-  ~PassPipelineCLParser();
-
-  /// Returns true if this parser contains any valid options to add.
-  bool hasAnyOccurrences() const;
-
-  /// Returns true if the given pass registry entry was registered at the
-  /// top-level of the parser, i.e. not within an explicit textual pipeline.
-  bool contains(const PassRegistryEntry *entry) const;
-
-  /// Adds the passes defined by this parser entry to the given pass manager.
-  /// Returns failure() if the pass could not be properly constructed due
-  /// to options parsing.
-  LogicalResult addToPipeline(OpPassManager &pm) const;
-
-private:
-  std::unique_ptr<detail::PassPipelineCLParserImpl> impl;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_PASS_PASSREGISTRY_H_
diff --git a/third_party/mlir/include/mlir/Quantizer/Configurations/FxpMathConfig.h b/third_party/mlir/include/mlir/Quantizer/Configurations/FxpMathConfig.h
deleted file mode 100644
index 467512f2b77..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Configurations/FxpMathConfig.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- FxpMathConfig.h - Reference fixed point config -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a TargetConfiguration for reference fixed-point math
-// quantization scheme based on the FxpMathOps (plus a small category of
-// extension ops that can be added from other dialects).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_CONFIGURATIONS_FXPMATHCONFIG_H
-#define MLIR_QUANTIZER_CONFIGURATIONS_FXPMATHCONFIG_H
-
-#include "mlir/Quantizer/Support/Configuration.h"
-#include "mlir/Quantizer/Support/Metadata.h"
-
-namespace mlir {
-namespace quantizer {
-
-/// Target configuration for a reference affine/fixed-point quantization
-/// scheme defined in terms of the FxpMathOps dialect. This can be extended
-/// with select ops from other dialects by way of the following public
-/// methods:
-///   - addValueIdentityOp
-class FxpMathTargetConfig : public TargetConfiguration {
-public:
-  /// Creates an FxpMathTargetConfig instance which can be further customized.
-  static std::unique_ptr<FxpMathTargetConfig> create(SolverContext &context);
-
-protected:
-  FxpMathTargetConfig(SolverContext &context) : TargetConfiguration(context) {}
-};
-
-} // namespace quantizer
-} // namespace mlir
-
-#endif // MLIR_QUANTIZER_CONFIGURATIONS_FXPMATHCONFIG_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/Configuration.h b/third_party/mlir/include/mlir/Quantizer/Support/Configuration.h
deleted file mode 100644
index a260824a7e6..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/Configuration.h
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- Configuration.h - Configuration object base classes ------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// The quantizer is relatively agnostic to source and target dialects, with
-// the specific represented by configuration policy objects derived from
-// classes in this file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_CONFIGURATION_H
-#define MLIR_QUANTIZER_SUPPORT_CONFIGURATION_H
-
-#include <functional>
-
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraph.h"
-#include "mlir/Quantizer/Support/Metadata.h"
-#include "mlir/Quantizer/Support/Rules.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/StringSet.h"
-
-namespace mlir {
-class Operation;
-
-namespace quantizer {
-
-class CAGSlice;
-
-/// Defines quantization configuration for the target.
-/// The settings here depend on a variety of details about the deployment
-/// environment, although, where we have control over such things, we do
-/// try to standardize as possible.
-///
-/// Non-const methods are used to setup the configuration. It is expected that
-/// const instances/references are used post-build.
-class TargetConfiguration {
-public:
-  static constexpr size_t MaxSchemeIndex = 31;
-  using OpHandlerFn = std::function<void(Operation *op, CAGSlice &cag)>;
-
-  TargetConfiguration(SolverContext &context);
-  virtual ~TargetConfiguration() = default;
-
-  /// Adds a candidate type, returning its ordinal.
-  unsigned addCandidateType(quant::AnyQuantizedType quantizedType,
-                            CandidateQuantizedType::Scheme scheme) {
-    unsigned ordinal = candidateTypes.size();
-    assert(allCandidateTypesMask.size() == ordinal);
-    CandidateQuantizedType ct{ordinal, quantizedType, scheme};
-    candidateTypes.push_back(ct);
-    allCandidateTypesMask.push_back(true);
-    return ordinal;
-  }
-
-  /// Gets a prototype scheme by index.
-  const CandidateQuantizedType &getCandidateType(unsigned index) const {
-    assert(index < candidateTypes.size());
-    return candidateTypes[index];
-  }
-
-  llvm::ArrayRef<CandidateQuantizedType> getCandidateTypes() const {
-    return candidateTypes;
-  }
-
-  /// Gets a mask of all enabled candidate types by ordinal.
-  llvm::SmallBitVector getAllCandidateTypesMask() const {
-    return allCandidateTypesMask;
-  }
-
-  /// Gets a mask with every candidate type except those in the given mask.
-  llvm::SmallBitVector getCandidateTypeDisabledExceptMask(
-      llvm::ArrayRef<unsigned> exceptOrdinals) const {
-    llvm::SmallBitVector disabled(allCandidateTypesMask);
-    for (unsigned ordinal : exceptOrdinals) {
-      disabled.reset(ordinal);
-    }
-    return disabled;
-  }
-
-  /// Adds an op handler.
-  template <typename OpTy>
-  void addOpHandler(OpHandlerFn fn) {
-    addOpHandlerByName(OpTy::getOperationName(), fn);
-  }
-
-  /// Adds an operation which requires statistics at its result nodes for
-  /// best quantization performance. Note that the opName StringRef is
-  /// expected to come from getOperationName() and be static.
-  template <typename OpTy>
-  void addRequireStatsOp() {
-    addRequireStatsOpByName(OpTy::getOperationName());
-  }
-
-  /// Returns whether opName is a RequireStatsOp.
-  bool isRequireStatsOp(Operation *op) const;
-
-  /// Adds an op which does not mutate its values but may mutate its shape
-  /// or combine its operands in an arbitrary way.
-  /// Such ops are expected to have the same types for operands and results
-  /// and must be capable of operating on storage types.
-  template <typename OpTy>
-  void addValueIdentityOp() {
-    addValueIdentityOpByName(OpTy::getOperationName());
-  }
-
-  /// Handles the operation if a handler is defined for it.
-  void handleOp(Operation *op, CAGSlice &cag) const;
-
-  /// Finalizes the CAG after all anchors have been added.
-  virtual void finalizeAnchors(CAGSlice &cag) const {}
-
-  /// Whether an operand or result type is subject to analysis by this config.
-  virtual bool isHandledType(Type t) const = 0;
-
-protected:
-  virtual void addValueIdentityOpByName(StringRef opName) = 0;
-  void addOpHandlerByName(StringRef name, OpHandlerFn fn);
-
-private:
-  void addRequireStatsOpByName(StringRef opName);
-
-  /// Vector of all candidate type constraints, indexed by ordinal.
-  std::vector<CandidateQuantizedType> candidateTypes;
-
-  // A SmallBoolVector with bits set for all known candidate types.
-  llvm::SmallBitVector allCandidateTypesMask;
-
-  /// Map of all op handlers.
-  llvm::StringMap<OpHandlerFn> opHandlers;
-
-  /// Names of operations which should have their results annotated with
-  /// statistics.
-  llvm::StringSet<> requireStatsOpNames;
-};
-
-} // namespace quantizer
-} // namespace mlir
-
-#endif // MLIR_QUANTIZER_SUPPORT_CONFIGURATION_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/ConstraintAnalysisGraph.h b/third_party/mlir/include/mlir/Quantizer/Support/ConstraintAnalysisGraph.h
deleted file mode 100644
index 63f62dbeeeb..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/ConstraintAnalysisGraph.h
+++ /dev/null
@@ -1,374 +0,0 @@
-//===- ConstraintAnalysisGraph.h - Graphs type for constraints --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file provides graph-based data structures for representing anchors
-// and constraints between them.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_CONSTRAINTANALYSISGRAPH_H
-#define MLIR_QUANTIZER_SUPPORT_CONSTRAINTANALYSISGRAPH_H
-
-#include <utility>
-#include <vector>
-
-#include "mlir/IR/Function.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Quantizer/Support/Metadata.h"
-#include "llvm/ADT/DenseMap.h"
-
-namespace mlir {
-namespace quantizer {
-
-class CAGNode;
-class CAGSlice;
-class TargetConfiguration;
-
-/// A node in the Constraint Analysis Graph.
-/// Nodes are either anchors (representing results and operands) or constraints.
-/// Anchor nodes are connected to other anchor nodes via constraints.
-/// Nodes exist within graph slices, which are typically analyses attached to
-/// the function or module. Slices can contain other slices, which mirrors
-/// the nesting of analyses.
-///
-/// Nodes have directed relationships which propagate successor-ward when dirty.
-/// Relationships can be bi-directional, in which case, the constraint's
-/// propagation mechanism must ensure convergence.
-class CAGNode {
-public:
-  enum class Kind {
-    /// Anchors.
-    Anchor,
-    OperandAnchor,
-    ResultAnchor,
-    LastAnchor = ResultAnchor,
-
-    /// Constraints.
-    Constraint,
-    SolveUniformConstraint,
-    UniformPropagateExplicitScale,
-    LastConstraint = UniformPropagateExplicitScale,
-  };
-
-  // Vector and iterator over nodes.
-  using node_vector = llvm::SmallVector<CAGNode *, 1>;
-  using iterator = node_vector::iterator;
-  using const_iterator = node_vector::const_iterator;
-
-  virtual ~CAGNode() = default;
-
-  Kind getKind() const { return kind; }
-
-  /// Unique id of the node within the slice.
-  int getNodeId() const { return nodeId; }
-
-  /// Whether the node is dirty, requiring one or more calls to propagate().
-  bool isDirty() const { return dirty; }
-  void markDirty() { dirty = true; }
-  void clearDirty() { dirty = false; }
-
-  /// Iterator over this node's children (outgoing) nodes.
-  const_iterator begin() const { return outgoing.begin(); }
-  const_iterator end() const { return outgoing.end(); }
-  iterator begin() { return outgoing.begin(); }
-  iterator end() { return outgoing.end(); }
-
-  /// Iterator over this parents (incoming) nodes.
-  const_iterator incoming_begin() const { return incoming.begin(); }
-  const_iterator incoming_end() const { return incoming.end(); }
-  iterator incoming_begin() { return incoming.begin(); }
-  iterator incoming_end() { return incoming.end(); }
-
-  virtual void propagate(SolverContext &solverContext,
-                         const TargetConfiguration &config) {}
-
-  /// Prints the node label, suitable for one-line display.
-  virtual void printLabel(llvm::raw_ostream &os) const;
-
-  template <typename T>
-  void findChildrenOfKind(llvm::SmallVectorImpl<T *> &found) {
-    for (CAGNode *child : *this) {
-      T *ofKind = llvm::dyn_cast<T>(child);
-      if (ofKind) {
-        found.push_back(ofKind);
-      }
-    }
-  }
-
-  /// Replaces this node by rerouting any parent nodes to have otherNode
-  /// as a child.
-  void replaceIncoming(CAGNode *otherNode);
-
-  /// Adds an outgoing connection to this node (and corresponding back
-  /// incoming connection).
-  void addOutgoing(CAGNode *toNode);
-
-  /// Whether this node is an orphan (has no incoming or outgoing connections).
-  bool isOrphan() const { return incoming.empty() && outgoing.empty(); }
-
-protected:
-  CAGNode(Kind kind) : kind(kind) {}
-
-private:
-  Kind kind;
-  int nodeId = -1;
-  node_vector outgoing;
-  node_vector incoming;
-  bool dirty = false;
-
-  friend class CAGSlice;
-};
-
-/// Anchor nodes represent points in the source IR where we may choose to
-/// introduce a type transition. These include operands, results, arguments
-/// returns, etc.
-class CAGAnchorNode : public CAGNode {
-public:
-  enum class TypeTransformRule {
-    /// The owning op directly supports all transformed types. In practice,
-    /// this means that the op supports QuantizedType for this anchor.
-    Direct,
-
-    /// The type of this anchor should be set to the QuantizedType storage
-    /// type. This will only be valid if constraints are such that all
-    /// inputs/outputs converge to the same storage type (i.e. coupled).
-    DirectStorage,
-
-    /// The anchor must only be typed based on the expressed type. This is
-    /// used for ops that do not natively support quantization, and suitable
-    /// casts will be inserted.
-    ExpressedOnly,
-  };
-
-  /// Metadata for solving uniform quantization params.
-  CAGUniformMetadata &getUniformMetadata() { return uniformMetadata; }
-  const CAGUniformMetadata &getUniformMetadata() const {
-    return uniformMetadata;
-  }
-
-  virtual Operation *getOp() const = 0;
-  virtual Value *getValue() const = 0;
-
-  static bool classof(const CAGNode *n) {
-    return n->getKind() >= Kind::Anchor && n->getKind() <= Kind::LastAnchor;
-  }
-
-  void propagate(SolverContext &solverContext,
-                 const TargetConfiguration &config) override;
-
-  void printLabel(llvm::raw_ostream &os) const override;
-
-  /// Given the anchor metadata and resolved solutions, chooses the most
-  /// salient and returns an appropriate type to represent it.
-  Type getTransformedType();
-
-  TypeTransformRule getTypeTransformRule() const { return typeTransformRule; }
-
-  void setTypeTransformRule(TypeTransformRule r) { typeTransformRule = r; }
-
-  /// Gets the Type that was defined for this anchor at the time of
-  /// construction.
-  Type getOriginalType() const { return originalType; }
-
-protected:
-  CAGAnchorNode(Kind kind, Type originalType)
-      : CAGNode(kind), originalType(originalType) {}
-
-private:
-  CAGUniformMetadata uniformMetadata;
-  Type originalType;
-  TypeTransformRule typeTransformRule = TypeTransformRule::Direct;
-};
-
-/// An anchor tied to a specific operand.
-/// Since operand anchors can be rewritten so that the operand refers to
-/// a new result, they are maintained by reference (to the op and index).
-class CAGOperandAnchor : public CAGAnchorNode {
-public:
-  CAGOperandAnchor(Operation *op, unsigned operandIdx);
-
-  Operation *getOp() const final { return op; }
-  unsigned getOperandIdx() const { return operandIdx; }
-
-  static bool classof(const CAGNode *n) {
-    return n->getKind() == Kind::Anchor || n->getKind() == Kind::OperandAnchor;
-  }
-
-  Value *getValue() const final { return op->getOperand(operandIdx); }
-
-  void printLabel(llvm::raw_ostream &os) const override;
-
-private:
-  Operation *op;
-  unsigned operandIdx;
-};
-
-/// An anchor tied to a specific result.
-/// Since a result is already anchored to its defining op, result anchors refer
-/// directly to the underlying Value*.
-class CAGResultAnchor : public CAGAnchorNode {
-public:
-  CAGResultAnchor(Operation *op, unsigned resultIdx);
-
-  static bool classof(const CAGNode *n) {
-    return n->getKind() == Kind::Anchor || n->getKind() == Kind::ResultAnchor;
-  }
-
-  Operation *getOp() const final { return resultValue->getDefiningOp(); }
-  Value *getValue() const final { return resultValue; }
-
-  void printLabel(llvm::raw_ostream &os) const override;
-
-private:
-  Value *resultValue;
-};
-
-/// Base class for constraint nodes.
-class CAGConstraintNode : public CAGNode {
-public:
-  CAGConstraintNode(Kind kind) : CAGNode(kind) {}
-
-  static bool classof(const CAGNode *n) {
-    return n->getKind() >= Kind::Constraint &&
-           n->getKind() <= Kind::LastConstraint;
-  }
-};
-
-/// A slice of a CAG (which may be the whole graph).
-class CAGSlice {
-public:
-  CAGSlice(SolverContext &context);
-  ~CAGSlice();
-
-  using node_vector = std::vector<CAGNode *>;
-  using iterator = node_vector::iterator;
-  using const_iterator = node_vector::const_iterator;
-
-  iterator begin() { return allNodes.begin(); }
-  iterator end() { return allNodes.end(); }
-  const_iterator begin() const { return allNodes.begin(); }
-  const_iterator end() const { return allNodes.end(); }
-
-  /// Gets an operand anchor node.
-  CAGOperandAnchor *getOperandAnchor(Operation *op, unsigned operandIdx);
-
-  /// Gets a result anchor node.
-  CAGResultAnchor *getResultAnchor(Operation *op, unsigned resultIdx);
-
-  /// Adds a relation constraint with incoming 'from' anchors and outgoing 'to'
-  /// anchors.
-  template <typename T, typename... Args>
-  T *addUniqueConstraint(llvm::ArrayRef<CAGAnchorNode *> anchors,
-                         Args... args) {
-    static_assert(std::is_convertible<T *, CAGConstraintNode *>(),
-                  "T must be a CAGConstraingNode");
-    T *constraintNode = addNode(std::make_unique<T>(args...));
-    for (auto *anchor : anchors)
-      anchor->addOutgoing(constraintNode);
-    return constraintNode;
-  }
-
-  /// Adds a unidirectional constraint from a node to an array of target nodes.
-  template <typename T, typename... Args>
-  T *addUnidirectionalConstraint(CAGAnchorNode *fromAnchor,
-                                 llvm::ArrayRef<CAGAnchorNode *> toAnchors,
-                                 Args... args) {
-    static_assert(std::is_convertible<T *, CAGConstraintNode *>(),
-                  "T must be a CAGConstraingNode");
-    T *constraintNode = addNode(std::make_unique<T>(args...));
-    fromAnchor->addOutgoing(constraintNode);
-    for (auto *toAnchor : toAnchors) {
-      constraintNode->addOutgoing(toAnchor);
-    }
-    return constraintNode;
-  }
-
-  template <typename T>
-  T *addClusteredConstraint(llvm::ArrayRef<CAGAnchorNode *> anchors) {
-    static_assert(std::is_convertible<T *, CAGConstraintNode *>(),
-                  "T must be a CAGConstraingNode");
-    llvm::SmallVector<T *, 8> cluster;
-    for (auto *anchor : anchors) {
-      anchor->findChildrenOfKind<T>(cluster);
-    }
-
-    T *constraintNode;
-    if (cluster.empty()) {
-      // Create new.
-      constraintNode = addNode(std::make_unique<T>());
-    } else {
-      // Merge existing.
-      constraintNode = cluster[0];
-      for (size_t i = 1, e = cluster.size(); i < e; ++i) {
-        cluster[i]->replaceIncoming(constraintNode);
-      }
-    }
-    for (auto *anchor : anchors) {
-      anchor->addOutgoing(constraintNode);
-    }
-    return constraintNode;
-  }
-
-  /// Enumerates all implied connections in the slice.
-  /// An implied connection is any two nodes that physically refer to the
-  /// same value in the IR, such as result->operand.
-  /// Typically this will be modeled with some kind of strong or weak
-  /// identity constraint such that types propagate.
-  /// This is usually called when the slice has been fully constructed in
-  /// order to add final constraints.
-  /// It is legal for the callback to modify the graph by adding constraints.
-  void enumerateImpliedConnections(
-      std::function<void(CAGAnchorNode *from, CAGAnchorNode *to)> callback);
-
-  /// Performs one round of propagation, returning the number of nodes
-  /// propagates. If returns > 0, then additional propagate() rounds are
-  /// required.
-  unsigned propagate(const TargetConfiguration &config);
-
-private:
-  /// Adds a node to the graph.
-  /// The node should be a subclass of TransformNode.
-  /// Returns the raw pointer to the node.
-  template <typename T>
-  T *addNode(std::unique_ptr<T> node) {
-    node->nodeId = allNodes.size();
-    T *unownedNode = node.release();
-    allNodes.push_back(unownedNode);
-    return unownedNode;
-  }
-
-  SolverContext &context;
-  std::vector<CAGNode *> allNodes;
-  llvm::DenseMap<std::pair<Operation *, unsigned>, CAGOperandAnchor *>
-      operandAnchors;
-  llvm::DenseMap<std::pair<Operation *, unsigned>, CAGResultAnchor *>
-      resultAnchors;
-};
-
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                                     const CAGNode &node) {
-  node.printLabel(os);
-  return os;
-}
-
-} // namespace quantizer
-} // namespace mlir
-
-#endif // MLIR_QUANTIZER_SUPPORT_CONSTRAINTANALYSISGRAPH_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/ConstraintAnalysisGraphTraits.h b/third_party/mlir/include/mlir/Quantizer/Support/ConstraintAnalysisGraphTraits.h
deleted file mode 100644
index 7e2b61d0496..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/ConstraintAnalysisGraphTraits.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- ConstraintAnalysisGraphTraits.h - Traits for CAGs --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Provides graph traits for constraint analysis graphs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_CONSTRAINTANALYSISGRAPHTRAITS_H
-#define MLIR_QUANTIZER_SUPPORT_CONSTRAINTANALYSISGRAPHTRAITS_H
-
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraph.h"
-#include "llvm/ADT/GraphTraits.h"
-
-namespace llvm {
-
-template <>
-struct GraphTraits<const mlir::quantizer::CAGNode *> {
-  using NodeRef = const mlir::quantizer::CAGNode *;
-
-  static NodeRef getEntryNode(NodeRef node) { return node; }
-
-  // Successors.
-  using ChildIteratorType = mlir::quantizer::CAGNode::const_iterator;
-  static ChildIteratorType child_begin(NodeRef node) { return node->begin(); }
-  static ChildIteratorType child_end(NodeRef node) { return node->end(); }
-};
-
-template <>
-struct GraphTraits<const mlir::quantizer::CAGSlice *>
-    : public llvm::GraphTraits<const mlir::quantizer::CAGNode *> {
-  using nodes_iterator = mlir::quantizer::CAGSlice::const_iterator;
-  static mlir::quantizer::CAGSlice::const_iterator
-  nodes_begin(const mlir::quantizer::CAGSlice *G) {
-    return G->begin();
-  }
-  static mlir::quantizer::CAGSlice::const_iterator
-  nodes_end(const mlir::quantizer::CAGSlice *G) {
-    return G->end();
-  }
-};
-
-} // end namespace llvm
-
-#endif // MLIR_QUANTIZER_SUPPORT_CONSTRAINTANALYSISGRAPHTRAITS_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/Metadata.h b/third_party/mlir/include/mlir/Quantizer/Support/Metadata.h
deleted file mode 100644
index 4e6fc095eff..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/Metadata.h
+++ /dev/null
@@ -1,110 +0,0 @@
-//===- Metadata.h - Top level types and metadata ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains top level types needed to construct constraint graphs,
-// including context/allocator support and concrete metadata structs for
-// different quantization schemes (which must be attached to anchor nodes).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_METADATA_H
-#define MLIR_QUANTIZER_SUPPORT_METADATA_H
-
-#include <limits>
-
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Quantizer/Support/Rules.h"
-#include "llvm/ADT/SmallBitVector.h"
-
-namespace mlir {
-namespace quantizer {
-
-class SolverContext {
-public:
-  SolverContext(MLIRContext &mlirContext) : mlirContext(mlirContext) {}
-
-  MLIRContext &getMlirContext() { return mlirContext; }
-
-  llvm::BumpPtrAllocator &getAllocator() { return allocator; }
-
-  // Optional path to write a debug DOT file for the CAG.
-  StringRef getDebugCAGDotPath() const { return debugCAGDotPath; }
-  void setDebugCAGDotPath(StringRef p) { debugCAGDotPath = p; }
-
-private:
-  MLIRContext &mlirContext;
-  llvm::BumpPtrAllocator allocator;
-  std::string debugCAGDotPath;
-};
-
-/// Candidate for a quantized type conversion.
-struct CandidateQuantizedType {
-  // Note that scheme encodes more than just the target type: it also encodes
-  // additional constraints.
-  enum class Scheme {
-    // Uses aggregate range information for all nodes in the cluster to
-    // solve for uniform scale and zero point.
-    UniformPerLayer,
-    // Uses aggregate per-axis range information for all nodes in the cluster
-    // to solve for per-axis uniform scale and zero point.
-    UniformPerAxisFixedPoint,
-    // Uses the |explicitScaleZeroPoint| to set the scale (and zero point = 0)
-    // for the uniform type. This typically overrides all other constraints
-    // and is used for wide accumulator types (i.e. i32 bias vectors).
-    UniformExplicitFixedPointScale,
-  };
-  unsigned ordinal;
-  quant::AnyQuantizedType quantizedType;
-  Scheme scheme;
-};
-
-struct CAGUniformMetadata {
-  /// Default salience for facts that are derived from data either statically
-  /// discovered in the computation or observed from an outside source.
-  static constexpr int SalienceDefault = 0;
-
-  /// Highest salience level for facts derived from overrides provided
-  /// explicitly.
-  static constexpr int SalienceForced = 100;
-
-  /// Salience for facts derived from constraints in how the math is
-  /// expressed which must be satisfied.
-  static constexpr int SalienceRequired = 200;
-
-  /// The range that the scheme must represent in order to accommodate the
-  /// underlying data.
-  ExpandingMinMaxFact requiredRange;
-
-  /// Bool vector of scheme ordinals that are disabled.
-  llvm::SmallBitVector disabledCandidateTypes;
-
-  /// If set, then a solution has converged for the given per-layer scheme.
-  quant::QuantizedType selectedType;
-
-  /// Optional scale and zero point to be used by types which solve via the
-  /// UniformExplicitFixedPointScale scheme.
-  DiscreteScaleZeroPointFact explicitScaleZeroPoint;
-
-  /// Prints a summary of the metadata suitable for display in a graph label.
-  void printSummary(llvm::raw_ostream &os) const;
-};
-
-} // end namespace quantizer
-} // end namespace mlir
-
-#endif // MLIR_QUANTIZER_SUPPORT_METADATA_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/Rules.h b/third_party/mlir/include/mlir/Quantizer/Support/Rules.h
deleted file mode 100644
index 9d1e53df5c0..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/Rules.h
+++ /dev/null
@@ -1,209 +0,0 @@
-//===- Rules.h - Helpers for declaring facts and rules ----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines helper classes and functions for managing state (facts),
-// merging and tracking modification for various data types important for
-// quantization.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_RULES_H
-#define MLIR_QUANTIZER_SUPPORT_RULES_H
-
-#include "llvm/ADT/Optional.h"
-
-#include <algorithm>
-#include <limits>
-#include <utility>
-
-namespace mlir {
-namespace quantizer {
-
-/// Typed indicator of whether a mutator produces a modification.
-struct ModificationResult {
-  enum ModificationEnum { Retained, Modified } value;
-  ModificationResult(ModificationEnum v) : value(v) {}
-
-  ModificationResult operator|(ModificationResult other) {
-    if (value == Modified || other.value == Modified) {
-      return ModificationResult(Modified);
-    } else {
-      return ModificationResult(Retained);
-    }
-  }
-
-  ModificationResult operator|=(ModificationResult other) {
-    value =
-        (value == Modified || other.value == Modified) ? Modified : Retained;
-    return *this;
-  }
-};
-
-inline ModificationResult modify(bool isModified = true) {
-  return ModificationResult{isModified ? ModificationResult::Modified
-                                       : ModificationResult::Retained};
-}
-
-inline bool modified(ModificationResult m) {
-  return m.value == ModificationResult::Modified;
-}
-
-/// A fact that can converge through forward propagation alone without the
-/// need to track ownership or individual assertions. In practice, this works
-/// for static assertions that are either minimized or maximized and do not
-/// vary dynamically.
-///
-/// It is expected that ValueTy is appropriate to pass by value and has an
-/// operator==. The BinaryReducer type should have two static methods:
-///   using ValueTy : Type of the value.
-///   ValueTy initialValue() : Returns the initial value of the fact.
-///   ValueTy reduce(ValueTy lhs, ValueTy rhs) : Reduces two values.
-template <typename BinaryReducer>
-class BasePropagatedFact {
-public:
-  using ValueTy = typename BinaryReducer::ValueTy;
-  using ThisTy = BasePropagatedFact<BinaryReducer>;
-  BasePropagatedFact()
-      : value(BinaryReducer::initialValue()),
-        salience(std::numeric_limits<int>::min()) {}
-
-  int getSalience() const { return salience; }
-  bool hasValue() const { return salience != std::numeric_limits<int>::min(); }
-  ValueTy getValue() const { return value; }
-  ModificationResult assertValue(int assertSalience, ValueTy assertValue) {
-    if (assertSalience > salience) {
-      // New salience band.
-      value = assertValue;
-      salience = assertSalience;
-      return modify(true);
-    } else if (assertSalience < salience) {
-      // Lower salience - ignore.
-      return modify(false);
-    }
-    // Merge within same salience band.
-    ValueTy updatedValue = BinaryReducer::reduce(value, assertValue);
-    auto mod = modify(value != updatedValue);
-    value = updatedValue;
-    return mod;
-  }
-  ModificationResult mergeFrom(const ThisTy &other) {
-    if (other.hasValue()) {
-      return assertValue(other.getSalience(), other.getValue());
-    }
-    return modify(false);
-  }
-
-private:
-  ValueTy value;
-  int salience;
-};
-
-/// A binary reducer that expands a min/max range represented by a pair
-/// of doubles such that it represents the largest of all inputs.
-/// The initial value is (Inf, -Inf).
-struct ExpandingMinMaxReducer {
-  using ValueTy = std::pair<double, double>;
-  static ValueTy initialValue() {
-    return std::make_pair(std::numeric_limits<double>::infinity(),
-                          -std::numeric_limits<double>::infinity());
-  }
-  static ValueTy reduce(ValueTy lhs, ValueTy rhs) {
-    return std::make_pair(std::min(lhs.first, rhs.first),
-                          std::max(lhs.second, rhs.second));
-  }
-};
-using ExpandingMinMaxFact = BasePropagatedFact<ExpandingMinMaxReducer>;
-
-/// A binary reducer that minimizing a numeric type.
-template <typename T>
-struct MinimizingNumericReducer {
-  using ValueTy = T;
-  static ValueTy initialValue() {
-    if (std::numeric_limits<T>::has_infinity()) {
-      return std::numeric_limits<T>::infinity();
-    } else {
-      return std::numeric_limits<T>::max();
-    }
-  }
-  static ValueTy reduce(ValueTy lhs, ValueTy rhs) { return std::min(lhs, rhs); }
-};
-using MinimizingDoubleFact =
-    BasePropagatedFact<MinimizingNumericReducer<double>>;
-using MinimizingIntFact = BasePropagatedFact<MinimizingNumericReducer<int>>;
-
-/// A binary reducer that maximizes a numeric type.
-template <typename T>
-struct MaximizingNumericReducer {
-  using ValueTy = T;
-  static ValueTy initialValue() {
-    if (std::numeric_limits<T>::has_infinity()) {
-      return -std::numeric_limits<T>::infinity();
-    } else {
-      return std::numeric_limits<T>::min();
-    }
-  }
-  static ValueTy reduce(ValueTy lhs, ValueTy rhs) { return std::max(lhs, rhs); }
-};
-using MaximizingDoubleFact =
-    BasePropagatedFact<MaximizingNumericReducer<double>>;
-using MaximizingIntFact = BasePropagatedFact<MaximizingNumericReducer<int>>;
-
-/// A fact and reducer for tracking agreement of discrete values. The value
-/// type consists of a |T| value and a flag indicating whether there is a
-/// conflict (in which case, the preserved value is arbitrary).
-template <typename T>
-struct DiscreteReducer {
-  struct ValueTy {
-    ValueTy() : conflict(false) {}
-    ValueTy(T value) : value(value), conflict(false) {}
-    ValueTy(T value, bool conflict) : value(value), conflict(conflict) {}
-    llvm::Optional<T> value;
-    bool conflict;
-    bool operator==(const ValueTy &other) const {
-      if (conflict != other.conflict)
-        return false;
-      if (value && other.value) {
-        return *value == *other.value;
-      } else {
-        return !value && !other.value;
-      }
-    }
-    bool operator!=(const ValueTy &other) const { return !(*this == other); }
-  };
-  static ValueTy initialValue() { return ValueTy(); }
-  static ValueTy reduce(ValueTy lhs, ValueTy rhs) {
-    if (!lhs.value && !rhs.value)
-      return lhs;
-    else if (!lhs.value)
-      return rhs;
-    else if (!rhs.value)
-      return lhs;
-    else
-      return ValueTy(*lhs.value, *lhs.value != *rhs.value);
-  }
-};
-
-template <typename T>
-using DiscreteFact = BasePropagatedFact<DiscreteReducer<T>>;
-
-/// Discrete scale/zeroPoint fact.
-using DiscreteScaleZeroPointFact = DiscreteFact<std::pair<double, int64_t>>;
-
-} // end namespace quantizer
-} // end namespace mlir
-
-#endif // MLIR_QUANTIZER_SUPPORT_RULES_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/Statistics.h b/third_party/mlir/include/mlir/Quantizer/Support/Statistics.h
deleted file mode 100644
index e1f130b9aff..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/Statistics.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- Statistics.h - Collects statistics over tensors ----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines adapters for extracting various (per layer and per axis)
-// statistics over tensors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_STATISTICS_H
-#define MLIR_QUANTIZER_SUPPORT_STATISTICS_H
-
-#include "mlir/IR/Attributes.h"
-
-namespace mlir {
-namespace quantizer {
-
-/// Statistics about a tensor axis (or the whole tensor).
-struct TensorAxisStatistics {
-  int64_t sampleSize = 0;
-  double minValue = 0;
-  double maxValue = 0;
-  double mean = 0;
-  double variance = 0;
-
-  TensorAxisStatistics() {}
-  TensorAxisStatistics(int64_t sampleSize, double minValue, double maxValue,
-                       double mean, double variance)
-      : sampleSize(sampleSize), minValue(minValue), maxValue(maxValue),
-        mean(mean), variance(variance) {}
-  void clear() { *this = TensorAxisStatistics(); }
-};
-
-/// Base class for querying statistics about a tensor.
-class AbstractTensorStatistics {
-public:
-  virtual ~AbstractTensorStatistics() = default;
-
-  /// Gets statistics across the whole tensor.
-  /// Returns true if statistics are valid and were populated.
-  virtual bool get(TensorAxisStatistics &stats) const { return false; }
-
-  /// Whether this instance supports querying per axis statistics. If true,
-  /// then getForAxis(...) can be used.
-  virtual bool supportsPerAxis() const { return false; }
-
-  /// Count of axes supported in a per-axis query.
-  virtual unsigned getAxisCount() const { return 0; }
-
-  /// Gets statistics for a specific axis (0..getAxisCount() - 1).
-  /// Returns true if statistics are valid and were populated.
-  virtual bool getForAxis(unsigned axis, TensorAxisStatistics &stats) const {
-    return false;
-  }
-};
-
-/// Wraps an MLIR Attribute and returns statistics about it.
-/// It is expected that the attribute be one of:
-///   FloatAttr (scalar)
-///   DenseFPElementsAttr
-///   OpaqueElementsAttr (with Float based type)
-///   SparseElementAttr  (with Float based type)
-class AttributeTensorStatistics : public AbstractTensorStatistics {
-public:
-  AttributeTensorStatistics(Attribute attr) : attr(attr) {}
-
-  bool get(TensorAxisStatistics &stats) const override;
-
-  // TODO: Implement per-axis.
-
-private:
-  Attribute attr;
-};
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                              const TensorAxisStatistics &stats);
-
-} // end namespace quantizer
-} // end namespace mlir
-
-#endif // MLIR_QUANTIZER_SUPPORT_STATISTICS_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/TypeUtils.h b/third_party/mlir/include/mlir/Quantizer/Support/TypeUtils.h
deleted file mode 100644
index 074f8b9e854..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/TypeUtils.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- TypeUtils.h - Helper function for manipulating types -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines various helper functions for manipulating types. The
-// process of quantizing typically involves a number of type manipulations
-// that are not very common elsewhere, and it is best to name them and define
-// them here versus inline in the rest of the tool.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef THIRD_PARTY_MLIR_EDGE_FXPSOLVER_SUPPORT_TYPEUTILS_H_
-#define THIRD_PARTY_MLIR_EDGE_FXPSOLVER_SUPPORT_TYPEUTILS_H_
-
-#include "mlir/IR/Types.h"
-
-namespace mlir {
-namespace quantizer {
-
-/// Given an arbitrary container or primitive type, returns the element type,
-/// where the element type is just the type for non-containers.
-Type getElementOrPrimitiveType(Type t);
-
-} // namespace quantizer
-} // namespace mlir
-
-#endif // THIRD_PARTY_MLIR_EDGE_FXPSOLVER_SUPPORT_TYPEUTILS_H_
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/UniformConstraints.h b/third_party/mlir/include/mlir/Quantizer/Support/UniformConstraints.h
deleted file mode 100644
index 90b5fe12153..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/UniformConstraints.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===- UniformConstraints.h - Constraints for uniform quant -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a builder that lets you attach constraints necessary to
-// perform a variety of uniform quantization conversions to CAG anchors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_UNIFORMCONSTRAINTS_H
-#define MLIR_QUANTIZER_SUPPORT_UNIFORMCONSTRAINTS_H
-
-#include "mlir/Quantizer/Support/Statistics.h"
-
-namespace mlir {
-namespace quantizer {
-
-class CAGAnchorNode;
-class CAGSlice;
-
-/// Factory methods for adding CAG constraints of various kinds suitable
-/// for solving for uniform quantization.
-class UniformConstraintsBuilder {
-public:
-  UniformConstraintsBuilder(CAGSlice &slice) : slice(slice) {}
-
-  /// Adds a coupling constraint between two nodes, effectively treating
-  /// them as a hard identity relationship.
-  void coupleAnchors(CAGAnchorNode *a, CAGAnchorNode *b);
-
-  /// Applies statistics constraints to the given anchor, such that the solver
-  /// ensures that the statistics are representable by chosen types.
-  void applyStats(CAGAnchorNode *a, TensorAxisStatistics stats);
-
-  /// Applies a constraint to a node which allows solutions that do not extend
-  /// beyond given min/max bounds (this is a hint that the tensor will not
-  /// take values outside of these bounds). If either minValue or maxValue is
-  /// NAN, then that side is considered open.
-  void clamp(CAGAnchorNode *a, APFloat minValue, APFloat maxValue);
-
-  /// Propagates an explicit scale from an anchor that may have a uniform
-  /// |selectedType| to the |explicitScaleZeroPoint| field of the to node.
-  /// This is typically used with a to node that has a candidate quantized
-  /// type of |UniformExplicitFixedPointScale|, indicating that it can be
-  /// an arbitrary (signed) type that is expected to share the same scale
-  /// as the originating node.
-  void propagateExplicitScale(CAGAnchorNode *from, CAGAnchorNode *to);
-
-private:
-  CAGSlice &slice;
-};
-
-} // namespace quantizer
-} // namespace mlir
-
-#endif // MLIR_QUANTIZER_SUPPORT_UNIFORMCONSTRAINTS_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Support/UniformSolvers.h b/third_party/mlir/include/mlir/Quantizer/Support/UniformSolvers.h
deleted file mode 100644
index 98df671f81d..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Support/UniformSolvers.h
+++ /dev/null
@@ -1,95 +0,0 @@
-//===- UniformSolvers.h - Uniform type solver algorithms --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines algorithms for solving uniform type parameters for various
-// conditions (i.e. fixed-point, affine, scale matching, etc).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_SUPPORT_UNIFORMSOLVERS_H
-#define MLIR_QUANTIZER_SUPPORT_UNIFORMSOLVERS_H
-
-#include <cstdint>
-#include <limits>
-
-namespace llvm {
-class raw_ostream;
-} // end namespace llvm
-
-namespace mlir {
-namespace quantizer {
-
-struct UniformStorageParams {
-  static UniformStorageParams getQuint8() { return {255, 0}; }
-  static UniformStorageParams getQuint8SymmetricRight() { return {254, 1}; }
-  static UniformStorageParams getQuint16() { return {32767, 0}; }
-
-  uint64_t numLevels;
-  int64_t minValue;
-};
-
-/// Solves for the uniform quantization scheme parameters delta and z given
-/// bounding min/max.
-class UniformParamsFromMinMaxSolver {
-public:
-  UniformParamsFromMinMaxSolver(const UniformStorageParams &storageParams,
-                                double boundingMin, double boundingMax)
-      : storageParams(storageParams), boundingMin(boundingMin),
-        boundingMax(boundingMax) {}
-
-  /// Performs the computation, returning whether satisfied.
-  bool compute();
-
-  // Params.
-  double getBoundingMin() const { return boundingMin; }
-  double getBoundingMax() const { return boundingMax; }
-  bool isSatisfied() const { return satisfied; }
-  double getAdjMin() const { return adjMin; }
-  double getAdjMax() const { return adjMax; }
-  double getScale() const { return delta; }
-  int64_t getZp() const { return zp; }
-  int getStepCount() const { return stepCount; }
-
-  // Quantize and dequantize.
-  int64_t quantize(double x) const;
-  double dequantize(int64_t xq) const;
-
-private:
-  const UniformStorageParams storageParams;
-  const double boundingMin;
-  const double boundingMax;
-
-  // Results
-  int stepCount = 0;
-  double adjMin = std::numeric_limits<double>::quiet_NaN();
-  double adjMax = std::numeric_limits<double>::quiet_NaN();
-  double delta = std::numeric_limits<double>::quiet_NaN();
-  int64_t zp = 0;
-
-  bool satisfied = false;
-};
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                              const UniformStorageParams &p);
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                              const UniformParamsFromMinMaxSolver &s);
-
-} // end namespace quantizer
-} // end namespace mlir
-
-#endif // MLIR_QUANTIZER_SUPPORT_UNIFORMSOLVERS_H
diff --git a/third_party/mlir/include/mlir/Quantizer/Transforms/Passes.h b/third_party/mlir/include/mlir/Quantizer/Transforms/Passes.h
deleted file mode 100644
index 4fdea58daf4..00000000000
--- a/third_party/mlir/include/mlir/Quantizer/Transforms/Passes.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//===- Passes.h - Quantizer passes  -----------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines entry points to create passes to perform various kinds
-// of quantization related transforms.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_QUANTIZER_TRANSFORMS_PASSES_H
-#define MLIR_QUANTIZER_TRANSFORMS_PASSES_H
-
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-namespace quantizer {
-
-class SolverContext;
-class TargetConfiguration;
-
-/// Creates a pass that infers quantized types based on metadata discovered
-/// in the computation.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createInferQuantizedTypesPass(SolverContext &solverContext,
-                              const TargetConfiguration &config);
-
-/// Creates a pass which removes any instrumentation and hint ops which have
-/// no effect on final runtime.
-std::unique_ptr<OpPassBase<FuncOp>> createRemoveInstrumentationPass();
-
-/// Adds default (dummy) statistics to ops that can benefit from runtime stats.
-/// Meant for testing.
-std::unique_ptr<OpPassBase<FuncOp>> createAddDefaultStatsPass();
-
-} // namespace quantizer
-} // namespace mlir
-
-#endif // MLIR_QUANTIZER_TRANSFORMS_PASSES_H
diff --git a/third_party/mlir/include/mlir/Support/DebugStringHelper.h b/third_party/mlir/include/mlir/Support/DebugStringHelper.h
deleted file mode 100644
index 230ed231458..00000000000
--- a/third_party/mlir/include/mlir/Support/DebugStringHelper.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//===- DebugStringHelper.h - helpers to generate debug strings --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Convenience functions to make it easier to get a string representation for
-// ops that have a print method. For use in debugging output and errors
-// returned.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DEBUGSTRINGHELPER_H_
-#define MLIR_DEBUGSTRINGHELPER_H_
-
-#include <string>
-
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/raw_os_ostream.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-
-// Simple helper function that returns a string as printed from a op.
-template <typename T> static std::string debugString(T &op) {
-  std::string instr_str;
-  llvm::raw_string_ostream os(instr_str);
-  op.print(os);
-  return os.str();
-}
-
-} // namespace mlir
-
-inline std::ostream &operator<<(std::ostream &out, const llvm::Twine &twine) {
-  llvm::raw_os_ostream rout(out);
-  rout << twine;
-  return out;
-}
-
-#endif // MLIR_DEBUGSTRINGHELPER_H_
diff --git a/third_party/mlir/include/mlir/Support/FileUtilities.h b/third_party/mlir/include/mlir/Support/FileUtilities.h
deleted file mode 100644
index 5ce97223176..00000000000
--- a/third_party/mlir/include/mlir/Support/FileUtilities.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- FileUtilities.h - utilities for working with files -------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Common utilities for working with files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_FILEUTILITIES_H_
-#define MLIR_SUPPORT_FILEUTILITIES_H_
-
-#include <memory>
-#include <string>
-
-namespace llvm {
-class MemoryBuffer;
-class ToolOutputFile;
-class StringRef;
-} // namespace llvm
-
-namespace mlir {
-
-/// Open the file specified by its name for reading. Write the error message to
-/// `errorMessage` if errors occur and `errorMessage` is not nullptr.
-std::unique_ptr<llvm::MemoryBuffer>
-openInputFile(llvm::StringRef inputFilename,
-              std::string *errorMessage = nullptr);
-
-/// Open the file specified by its name for writing. Write the error message to
-/// `errorMessage` if errors occur and `errorMessage` is not nullptr.
-std::unique_ptr<llvm::ToolOutputFile>
-openOutputFile(llvm::StringRef outputFilename,
-               std::string *errorMessage = nullptr);
-
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_FILEUTILITIES_H_
diff --git a/third_party/mlir/include/mlir/Support/Functional.h b/third_party/mlir/include/mlir/Support/Functional.h
deleted file mode 100644
index eca7504c150..00000000000
--- a/third_party/mlir/include/mlir/Support/Functional.h
+++ /dev/null
@@ -1,123 +0,0 @@
-//===- Functional.h - Helpers for functional-style Combinators --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_SUPPORT_FUNCTIONAL_H_
-#define MLIR_SUPPORT_FUNCTIONAL_H_
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Casting.h"
-
-/// This file provides some simple template functional-style sugar to operate
-/// on **value** types. Make sure when using that the stored type is cheap to
-/// copy!
-///
-/// TODO(ntv): add some static_assert but we need proper traits for this.
-
-namespace mlir {
-namespace functional {
-
-/// Map with iterators.
-template <typename Fn, typename IterType>
-auto map(Fn fun, IterType begin, IterType end)
-    -> llvm::SmallVector<typename std::result_of<Fn(decltype(*begin))>::type,
-                         8> {
-  using R = typename std::result_of<Fn(decltype(*begin))>::type;
-  llvm::SmallVector<R, 8> res;
-  // auto i works with both pointer types and value types with an operator*.
-  // auto *i only works for pointer types.
-  for (auto i = begin; i != end; ++i) {
-    res.push_back(fun(*i));
-  }
-  return res;
-}
-
-/// Map with templated container.
-template <typename Fn, typename ContainerType>
-auto map(Fn fun, ContainerType input)
-    -> decltype(map(fun, std::begin(input), std::end(input))) {
-  return map(fun, std::begin(input), std::end(input));
-}
-
-/// Zip map with 2 templated container, iterates to the min of the sizes of
-/// the 2 containers.
-/// TODO(ntv): make variadic when needed.
-template <typename Fn, typename ContainerType1, typename ContainerType2>
-auto zipMap(Fn fun, ContainerType1 input1, ContainerType2 input2)
-    -> llvm::SmallVector<
-        typename std::result_of<Fn(decltype(*input1.begin()),
-                                   decltype(*input2.begin()))>::type,
-        8> {
-  using R = typename std::result_of<Fn(decltype(*input1.begin()),
-                                       decltype(*input2.begin()))>::type;
-  llvm::SmallVector<R, 8> res;
-  auto zipIter = llvm::zip(input1, input2);
-  for (auto it : zipIter) {
-    res.push_back(fun(std::get<0>(it), std::get<1>(it)));
-  }
-  return res;
-}
-
-/// Apply with iterators.
-template <typename Fn, typename IterType>
-void apply(Fn fun, IterType begin, IterType end) {
-  // auto i works with both pointer types and value types with an operator*.
-  // auto *i only works for pointer types.
-  for (auto i = begin; i != end; ++i) {
-    fun(*i);
-  }
-}
-
-/// Apply with templated container.
-template <typename Fn, typename ContainerType>
-void apply(Fn fun, ContainerType input) {
-  return apply(fun, std::begin(input), std::end(input));
-}
-
-/// Zip apply with 2 templated container, iterates to the min of the sizes of
-/// the 2 containers.
-/// TODO(ntv): make variadic when needed.
-template <typename Fn, typename ContainerType1, typename ContainerType2>
-void zipApply(Fn fun, ContainerType1 input1, ContainerType2 input2) {
-  auto zipIter = llvm::zip(input1, input2);
-  for (auto it : zipIter) {
-    fun(std::get<0>(it), std::get<1>(it));
-  }
-}
-
-/// Unwraps a pointer type to another type (possibly the same).
-/// Used in particular to allow easier compositions of
-///   Operation::operand_range types.
-template <typename T, typename ToType = T>
-inline std::function<ToType *(T *)> makePtrDynCaster() {
-  return [](T *val) { return llvm::dyn_cast<ToType>(val); };
-}
-
-/// Simple ScopeGuard.
-struct ScopeGuard {
-  explicit ScopeGuard(std::function<void(void)> destruct)
-      : destruct(destruct) {}
-  ~ScopeGuard() { destruct(); }
-
-private:
-  std::function<void(void)> destruct;
-};
-
-} // namespace functional
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_FUNCTIONAL_H_
diff --git a/third_party/mlir/include/mlir/Support/JitRunner.h b/third_party/mlir/include/mlir/Support/JitRunner.h
deleted file mode 100644
index 14b66a8cebd..00000000000
--- a/third_party/mlir/include/mlir/Support/JitRunner.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- JitRunner.h - MLIR CPU Execution Driver Library ----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is a library that provides a shared implementation for command line
-// utilities that execute an MLIR file on the CPU by translating MLIR to LLVM
-// IR before JIT-compiling and executing the latter.
-//
-// The translation can be customized by providing an MLIR to MLIR
-// transformation.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_JITRUNNER_H_
-#define MLIR_SUPPORT_JITRUNNER_H_
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace mlir {
-
-class ModuleOp;
-struct LogicalResult;
-
-// Entry point for all CPU runners. Expects the common argc/argv arguments for
-// standard C++ main functions and an mlirTransformer.
-// The latter is applied after parsing the input into MLIR IR and before passing
-// the MLIR module to the ExecutionEngine.
-int JitRunnerMain(
-    int argc, char **argv,
-    llvm::function_ref<LogicalResult(mlir::ModuleOp)> mlirTransformer);
-
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_JITRUNNER_H_
diff --git a/third_party/mlir/include/mlir/Support/LLVM.h b/third_party/mlir/include/mlir/Support/LLVM.h
deleted file mode 100644
index 91d145dd3ca..00000000000
--- a/third_party/mlir/include/mlir/Support/LLVM.h
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- LLVM.h - Import and forward declare core LLVM types ------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file forward declares and imports various common LLVM datatypes that
-// MLIR wants to use unqualified.
-//
-// Note that most of these are forward declared and then imported into the MLIR
-// namespace with using decls, rather than being #included.  This is because we
-// want clients to explicitly #include the files they need.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_LLVM_H
-#define MLIR_SUPPORT_LLVM_H
-
-// We include these two headers because they cannot be practically forward
-// declared, and are effectively language features.
-#include "llvm/ADT/None.h"
-#include "llvm/Support/Casting.h"
-
-// Forward declarations.
-namespace llvm {
-// Containers.
-class StringRef;
-class StringLiteral;
-class Twine;
-template <typename T> class SmallPtrSetImpl;
-template <typename T, unsigned N> class SmallPtrSet;
-template <typename T> class SmallVectorImpl;
-template <typename T, unsigned N> class SmallVector;
-template <unsigned N> class SmallString;
-template <typename T> class ArrayRef;
-template <typename T> class MutableArrayRef;
-template <typename T> class TinyPtrVector;
-template <typename T> class Optional;
-template <typename... PT> class PointerUnion;
-namespace detail {
-template <typename KeyT, typename ValueT> struct DenseMapPair;
-}
-template <typename T> struct DenseMapInfo;
-template <typename ValueT, typename ValueInfoT> class DenseSet;
-template <typename KeyT, typename ValueT, typename KeyInfoT, typename BucketT>
-class DenseMap;
-template <typename Fn> class function_ref;
-template <typename IteratorT> class iterator_range;
-
-// Other common classes.
-class raw_ostream;
-class APInt;
-class APFloat;
-} // end namespace llvm
-
-namespace mlir {
-// Casting operators.
-using llvm::cast;
-using llvm::cast_or_null;
-using llvm::dyn_cast;
-using llvm::dyn_cast_or_null;
-using llvm::isa;
-using llvm::isa_and_nonnull;
-
-// Containers.
-using llvm::ArrayRef;
-using llvm::DenseMapInfo;
-template <typename KeyT, typename ValueT,
-          typename KeyInfoT = DenseMapInfo<KeyT>,
-          typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>>
-using DenseMap = llvm::DenseMap<KeyT, ValueT, KeyInfoT, BucketT>;
-template <typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT>>
-using DenseSet = llvm::DenseSet<ValueT, ValueInfoT>;
-template <typename Fn> using function_ref = llvm::function_ref<Fn>;
-using llvm::iterator_range;
-using llvm::MutableArrayRef;
-using llvm::None;
-using llvm::Optional;
-using llvm::PointerUnion;
-using llvm::SmallPtrSet;
-using llvm::SmallPtrSetImpl;
-using llvm::SmallString;
-using llvm::SmallVector;
-using llvm::SmallVectorImpl;
-using llvm::StringLiteral;
-using llvm::StringRef;
-using llvm::TinyPtrVector;
-using llvm::Twine;
-
-// Other common classes.
-using llvm::APFloat;
-using llvm::APInt;
-using llvm::raw_ostream;
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_LLVM_H
diff --git a/third_party/mlir/include/mlir/Support/LogicalResult.h b/third_party/mlir/include/mlir/Support/LogicalResult.h
deleted file mode 100644
index a9fc77ceef8..00000000000
--- a/third_party/mlir/include/mlir/Support/LogicalResult.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===- LogicalResult.h - Utilities for handling success/failure -*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_SUPPORT_LOGICAL_RESULT_H
-#define MLIR_SUPPORT_LOGICAL_RESULT_H
-
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-
-// Values that can be used to signal success/failure. This should be used in
-// conjunction with the utility functions below.
-struct LogicalResult {
-  enum ResultEnum { Success, Failure } value;
-  LogicalResult(ResultEnum v) : value(v) {}
-};
-
-/// Utility function to generate a LogicalResult. If isSuccess is true a
-/// `success` result is generated, otherwise a 'failure' result is generated.
-inline LogicalResult success(bool isSuccess = true) {
-  return LogicalResult{isSuccess ? LogicalResult::Success
-                                 : LogicalResult::Failure};
-}
-
-/// Utility function to generate a LogicalResult. If isFailure is true a
-/// `failure` result is generated, otherwise a 'success' result is generated.
-inline LogicalResult failure(bool isFailure = true) {
-  return LogicalResult{isFailure ? LogicalResult::Failure
-                                 : LogicalResult::Success};
-}
-
-/// Utility function that returns true if the provided LogicalResult corresponds
-/// to a success value.
-inline bool succeeded(LogicalResult result) {
-  return result.value == LogicalResult::Success;
-}
-
-/// Utility function that returns true if the provided LogicalResult corresponds
-/// to a failure value.
-inline bool failed(LogicalResult result) {
-  return result.value == LogicalResult::Failure;
-}
-
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_LOGICAL_RESULT_H
diff --git a/third_party/mlir/include/mlir/Support/MathExtras.h b/third_party/mlir/include/mlir/Support/MathExtras.h
deleted file mode 100644
index 767677fbc5d..00000000000
--- a/third_party/mlir/include/mlir/Support/MathExtras.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- MathExtras.h - Math functions relevant to MLIR -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains math functions relevant to MLIR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_MATHEXTRAS_H_
-#define MLIR_SUPPORT_MATHEXTRAS_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/APInt.h"
-
-namespace mlir {
-
-/// Returns the result of MLIR's ceildiv operation on constants. The RHS is
-/// expected to be positive.
-inline int64_t ceilDiv(int64_t lhs, int64_t rhs) {
-  assert(rhs >= 1);
-  // C/C++'s integer division rounds towards 0.
-  return lhs % rhs > 0 ? lhs / rhs + 1 : lhs / rhs;
-}
-
-/// Returns the result of MLIR's floordiv operation on constants. The RHS is
-/// expected to be positive.
-inline int64_t floorDiv(int64_t lhs, int64_t rhs) {
-  assert(rhs >= 1);
-  // C/C++'s integer division rounds towards 0.
-  return lhs % rhs < 0 ? lhs / rhs - 1 : lhs / rhs;
-}
-
-/// Returns MLIR's mod operation on constants. MLIR's mod operation yields the
-/// remainder of the Euclidean division of 'lhs' by 'rhs', and is therefore not
-/// C's % operator.  The RHS is always expected to be positive, and the result
-/// is always non-negative.
-inline int64_t mod(int64_t lhs, int64_t rhs) {
-  assert(rhs >= 1);
-  return lhs % rhs < 0 ? lhs % rhs + rhs : lhs % rhs;
-}
-
-/// Returns the least common multiple of 'a' and 'b'.
-inline int64_t lcm(int64_t a, int64_t b) {
-  uint64_t x = std::abs(a);
-  uint64_t y = std::abs(b);
-  int64_t lcm = (x * y) / llvm::GreatestCommonDivisor64(x, y);
-  assert((lcm >= a && lcm >= b) && "LCM overflow");
-  return lcm;
-}
-} // end namespace mlir
-
-#endif // MLIR_SUPPORT_MATHEXTRAS_H_
diff --git a/third_party/mlir/include/mlir/Support/MlirOptMain.h b/third_party/mlir/include/mlir/Support/MlirOptMain.h
deleted file mode 100644
index 66b1a879cb5..00000000000
--- a/third_party/mlir/include/mlir/Support/MlirOptMain.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===- MlirOptMain.h - MLIR Optimizer Driver main ---------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Main entry function for mlir-opt for when built as standalone binary.
-//
-//===----------------------------------------------------------------------===//
-
-#include <memory>
-#include <vector>
-
-namespace llvm {
-class raw_ostream;
-class MemoryBuffer;
-} // end namespace llvm
-namespace mlir {
-struct LogicalResult;
-class PassPipelineCLParser;
-
-LogicalResult MlirOptMain(llvm::raw_ostream &os,
-                          std::unique_ptr<llvm::MemoryBuffer> buffer,
-                          const PassPipelineCLParser &passPipeline,
-                          bool splitInputFile, bool verifyDiagnostics,
-                          bool verifyPasses);
-
-} // end namespace mlir
diff --git a/third_party/mlir/include/mlir/Support/STLExtras.h b/third_party/mlir/include/mlir/Support/STLExtras.h
deleted file mode 100644
index c98f925e04e..00000000000
--- a/third_party/mlir/include/mlir/Support/STLExtras.h
+++ /dev/null
@@ -1,419 +0,0 @@
-//===- STLExtras.h - STL-like extensions that are used by MLIR --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains stuff that should be arguably sunk down to the LLVM
-// Support/STLExtras.h file over time.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_STLEXTRAS_H
-#define MLIR_SUPPORT_STLEXTRAS_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/STLExtras.h"
-
-namespace mlir {
-
-namespace detail {
-template <typename RangeT>
-using ValueOfRange = typename std::remove_reference<decltype(
-    *std::begin(std::declval<RangeT &>()))>::type;
-} // end namespace detail
-
-/// An STL-style algorithm similar to std::for_each that applies a second
-/// functor between every pair of elements.
-///
-/// This provides the control flow logic to, for example, print a
-/// comma-separated list:
-/// \code
-///   interleave(names.begin(), names.end(),
-///              [&](StringRef name) { os << name; },
-///              [&] { os << ", "; });
-/// \endcode
-template <typename ForwardIterator, typename UnaryFunctor,
-          typename NullaryFunctor,
-          typename = typename std::enable_if<
-              !std::is_constructible<StringRef, UnaryFunctor>::value &&
-              !std::is_constructible<StringRef, NullaryFunctor>::value>::type>
-inline void interleave(ForwardIterator begin, ForwardIterator end,
-                       UnaryFunctor each_fn, NullaryFunctor between_fn) {
-  if (begin == end)
-    return;
-  each_fn(*begin);
-  ++begin;
-  for (; begin != end; ++begin) {
-    between_fn();
-    each_fn(*begin);
-  }
-}
-
-template <typename Container, typename UnaryFunctor, typename NullaryFunctor,
-          typename = typename std::enable_if<
-              !std::is_constructible<StringRef, UnaryFunctor>::value &&
-              !std::is_constructible<StringRef, NullaryFunctor>::value>::type>
-inline void interleave(const Container &c, UnaryFunctor each_fn,
-                       NullaryFunctor between_fn) {
-  interleave(c.begin(), c.end(), each_fn, between_fn);
-}
-
-/// Overload of interleave for the common case of string separator.
-template <typename Container, typename UnaryFunctor, typename raw_ostream,
-          typename T = detail::ValueOfRange<Container>>
-inline void interleave(const Container &c, raw_ostream &os,
-                       UnaryFunctor each_fn, const StringRef &separator) {
-  interleave(c.begin(), c.end(), each_fn, [&] { os << separator; });
-}
-template <typename Container, typename raw_ostream,
-          typename T = detail::ValueOfRange<Container>>
-inline void interleave(const Container &c, raw_ostream &os,
-                       const StringRef &separator) {
-  interleave(
-      c, os, [&](const T &a) { os << a; }, separator);
-}
-
-template <typename Container, typename UnaryFunctor, typename raw_ostream,
-          typename T = detail::ValueOfRange<Container>>
-inline void interleaveComma(const Container &c, raw_ostream &os,
-                            UnaryFunctor each_fn) {
-  interleave(c, os, each_fn, ", ");
-}
-template <typename Container, typename raw_ostream,
-          typename T = detail::ValueOfRange<Container>>
-inline void interleaveComma(const Container &c, raw_ostream &os) {
-  interleaveComma(c, os, [&](const T &a) { os << a; });
-}
-
-/// A special type used to provide an address for a given class that can act as
-/// a unique identifier during pass registration.
-/// Note: We specify an explicit alignment here to allow use with PointerIntPair
-/// and other utilities/data structures that require a known pointer alignment.
-struct alignas(8) ClassID {
-  template <typename T> static ClassID *getID() {
-    static ClassID id;
-    return &id;
-  }
-  template <template <typename T> class Trait> static ClassID *getID() {
-    static ClassID id;
-    return &id;
-  }
-};
-
-/// Utilities for detecting if a given trait holds for some set of arguments
-/// 'Args'. For example, the given trait could be used to detect if a given type
-/// has a copy assignment operator:
-///   template<class T>
-///   using has_copy_assign_t = decltype(std::declval<T&>()
-///                                                 = std::declval<const T&>());
-///   bool fooHasCopyAssign = is_detected<has_copy_assign_t, FooClass>::value;
-namespace detail {
-template <typename...> using void_t = void;
-template <class, template <class...> class Op, class... Args> struct detector {
-  using value_t = std::false_type;
-};
-template <template <class...> class Op, class... Args>
-struct detector<void_t<Op<Args...>>, Op, Args...> {
-  using value_t = std::true_type;
-};
-} // end namespace detail
-
-template <template <class...> class Op, class... Args>
-using is_detected = typename detail::detector<void, Op, Args...>::value_t;
-
-/// Check if a Callable type can be invoked with the given set of arg types.
-namespace detail {
-template <typename Callable, typename... Args>
-using is_invocable =
-    decltype(std::declval<Callable &>()(std::declval<Args>()...));
-} // namespace detail
-
-template <typename Callable, typename... Args>
-using is_invocable = is_detected<detail::is_invocable, Callable, Args...>;
-
-//===----------------------------------------------------------------------===//
-//     Extra additions to <iterator>
-//===----------------------------------------------------------------------===//
-
-/// A utility class used to implement an iterator that contains some base object
-/// and an index. The iterator moves the index but keeps the base constant.
-template <typename DerivedT, typename BaseT, typename T,
-          typename PointerT = T *, typename ReferenceT = T &>
-class indexed_accessor_iterator
-    : public llvm::iterator_facade_base<DerivedT,
-                                        std::random_access_iterator_tag, T,
-                                        std::ptrdiff_t, PointerT, ReferenceT> {
-public:
-  ptrdiff_t operator-(const indexed_accessor_iterator &rhs) const {
-    assert(base == rhs.base && "incompatible iterators");
-    return index - rhs.index;
-  }
-  bool operator==(const indexed_accessor_iterator &rhs) const {
-    return base == rhs.base && index == rhs.index;
-  }
-  bool operator<(const indexed_accessor_iterator &rhs) const {
-    assert(base == rhs.base && "incompatible iterators");
-    return index < rhs.index;
-  }
-
-  DerivedT &operator+=(ptrdiff_t offset) {
-    this->index += offset;
-    return static_cast<DerivedT &>(*this);
-  }
-  DerivedT &operator-=(ptrdiff_t offset) {
-    this->index -= offset;
-    return static_cast<DerivedT &>(*this);
-  }
-
-  /// Returns the current index of the iterator.
-  ptrdiff_t getIndex() const { return index; }
-
-  /// Returns the current base of the iterator.
-  const BaseT &getBase() const { return base; }
-
-protected:
-  indexed_accessor_iterator(BaseT base, ptrdiff_t index)
-      : base(base), index(index) {}
-  BaseT base;
-  ptrdiff_t index;
-};
-
-namespace detail {
-/// The class represents the base of a range of indexed_accessor_iterators. It
-/// provides support for many different range functionalities, e.g.
-/// drop_front/slice/etc.. Derived range classes must implement the following
-/// static methods:
-///   * ReferenceT dereference_iterator(const BaseT &base, ptrdiff_t index)
-///     - Derefence an iterator pointing to the base object at the given index.
-///   * BaseT offset_base(const BaseT &base, ptrdiff_t index)
-///     - Return a new base that is offset from the provide base by 'index'
-///       elements.
-template <typename DerivedT, typename BaseT, typename T,
-          typename PointerT = T *, typename ReferenceT = T &>
-class indexed_accessor_range_base {
-public:
-  using RangeBaseT =
-      indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, ReferenceT>;
-
-  /// An iterator element of this range.
-  class iterator : public indexed_accessor_iterator<iterator, BaseT, T,
-                                                    PointerT, ReferenceT> {
-  public:
-    // Index into this iterator, invoking a static method on the derived type.
-    ReferenceT operator*() const {
-      return DerivedT::dereference_iterator(this->getBase(), this->getIndex());
-    }
-
-  private:
-    iterator(BaseT owner, ptrdiff_t curIndex)
-        : indexed_accessor_iterator<iterator, BaseT, T, PointerT, ReferenceT>(
-              owner, curIndex) {}
-
-    /// Allow access to the constructor.
-    friend indexed_accessor_range_base<DerivedT, BaseT, T, PointerT,
-                                       ReferenceT>;
-  };
-
-  indexed_accessor_range_base(iterator begin, iterator end)
-      : base(DerivedT::offset_base(begin.getBase(), begin.getIndex())),
-        count(end.getIndex() - begin.getIndex()) {}
-  indexed_accessor_range_base(const iterator_range<iterator> &range)
-      : indexed_accessor_range_base(range.begin(), range.end()) {}
-
-  iterator begin() const { return iterator(base, 0); }
-  iterator end() const { return iterator(base, count); }
-  ReferenceT operator[](unsigned index) const {
-    assert(index < size() && "invalid index for value range");
-    return DerivedT::dereference_iterator(base, index);
-  }
-
-  /// Return the size of this range.
-  size_t size() const { return count; }
-
-  /// Return if the range is empty.
-  bool empty() const { return size() == 0; }
-
-  /// Drop the first N elements, and keep M elements.
-  DerivedT slice(size_t n, size_t m) const {
-    assert(n + m <= size() && "invalid size specifiers");
-    return DerivedT(DerivedT::offset_base(base, n), m);
-  }
-
-  /// Drop the first n elements.
-  DerivedT drop_front(size_t n = 1) const {
-    assert(size() >= n && "Dropping more elements than exist");
-    return slice(n, size() - n);
-  }
-  /// Drop the last n elements.
-  DerivedT drop_back(size_t n = 1) const {
-    assert(size() >= n && "Dropping more elements than exist");
-    return DerivedT(base, size() - n);
-  }
-
-  /// Take the first n elements.
-  DerivedT take_front(size_t n = 1) const {
-    return n < size() ? drop_back(size() - n)
-                      : static_cast<const DerivedT &>(*this);
-  }
-
-  /// Allow conversion to SmallVector if necessary.
-  /// TODO(riverriddle) Remove this when SmallVector accepts different range
-  /// types in its constructor.
-  template <typename SVT, unsigned N> operator SmallVector<SVT, N>() const {
-    return {begin(), end()};
-  }
-
-protected:
-  indexed_accessor_range_base(BaseT base, ptrdiff_t count)
-      : base(base), count(count) {}
-  indexed_accessor_range_base(const indexed_accessor_range_base &) = default;
-  indexed_accessor_range_base(indexed_accessor_range_base &&) = default;
-  indexed_accessor_range_base &
-  operator=(const indexed_accessor_range_base &) = default;
-
-  /// The base that owns the provided range of values.
-  BaseT base;
-  /// The size from the owning range.
-  ptrdiff_t count;
-};
-} // end namespace detail
-
-/// This class provides an implementation of a range of
-/// indexed_accessor_iterators where the base is not indexable. Ranges with
-/// bases that are offsetable should derive from indexed_accessor_range_base
-/// instead. Derived range classes are expected to implement the following
-/// static method:
-///   * ReferenceT dereference_iterator(const BaseT &base, ptrdiff_t index)
-///     - Derefence an iterator pointing to a parent base at the given index.
-template <typename DerivedT, typename BaseT, typename T,
-          typename PointerT = T *, typename ReferenceT = T &>
-class indexed_accessor_range
-    : public detail::indexed_accessor_range_base<
-          indexed_accessor_range<DerivedT, BaseT, T, PointerT, ReferenceT>,
-          std::pair<BaseT, ptrdiff_t>, T, PointerT, ReferenceT> {
-protected:
-  indexed_accessor_range(BaseT base, ptrdiff_t startIndex, ptrdiff_t count)
-      : detail::indexed_accessor_range_base<
-            DerivedT, std::pair<BaseT, ptrdiff_t>, T, PointerT, ReferenceT>(
-            std::make_pair(base, startIndex), count) {}
-
-private:
-  /// See `detail::indexed_accessor_range_base` for details.
-  static std::pair<BaseT, ptrdiff_t>
-  offset_base(const std::pair<BaseT, ptrdiff_t> &base, ptrdiff_t index) {
-    // We encode the internal base as a pair of the derived base and a start
-    // index into the derived base.
-    return std::make_pair(base.first, base.second + index);
-  }
-  /// See `detail::indexed_accessor_range_base` for details.
-  static ReferenceT
-  dereference_iterator(const std::pair<BaseT, ptrdiff_t> &base,
-                       ptrdiff_t index) {
-    return DerivedT::dereference_iterator(base.first, base.second + index);
-  }
-
-  /// Allow access to `offset_base` and `dereference_iterator`.
-  friend detail::indexed_accessor_range_base<
-      indexed_accessor_range<DerivedT, BaseT, T, PointerT, ReferenceT>,
-      std::pair<BaseT, ptrdiff_t>, T, PointerT, ReferenceT>;
-};
-
-/// Given a container of pairs, return a range over the second elements.
-template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
-  return llvm::map_range(
-      std::forward<ContainerTy>(c),
-      [](decltype((*std::begin(c))) elt) -> decltype((elt.second)) {
-        return elt.second;
-      });
-}
-
-/// Returns true of the given range only contains a single element.
-template <typename ContainerTy> bool has_single_element(ContainerTy &&c) {
-  auto it = std::begin(c), e = std::end(c);
-  return it != e && std::next(it) == e;
-}
-} // end namespace mlir
-
-// Allow tuples to be usable as DenseMap keys.
-// TODO: Move this to upstream LLVM.
-
-/// Simplistic combination of 32-bit hash values into 32-bit hash values.
-/// This function is taken from llvm/ADT/DenseMapInfo.h.
-static inline unsigned llvm_combineHashValue(unsigned a, unsigned b) {
-  uint64_t key = (uint64_t)a << 32 | (uint64_t)b;
-  key += ~(key << 32);
-  key ^= (key >> 22);
-  key += ~(key << 13);
-  key ^= (key >> 8);
-  key += (key << 3);
-  key ^= (key >> 15);
-  key += ~(key << 27);
-  key ^= (key >> 31);
-  return (unsigned)key;
-}
-
-namespace llvm {
-template <typename... Ts> struct DenseMapInfo<std::tuple<Ts...>> {
-  using Tuple = std::tuple<Ts...>;
-
-  static inline Tuple getEmptyKey() {
-    return Tuple(DenseMapInfo<Ts>::getEmptyKey()...);
-  }
-
-  static inline Tuple getTombstoneKey() {
-    return Tuple(DenseMapInfo<Ts>::getTombstoneKey()...);
-  }
-
-  template <unsigned I>
-  static unsigned getHashValueImpl(const Tuple &values, std::false_type) {
-    using EltType = typename std::tuple_element<I, Tuple>::type;
-    std::integral_constant<bool, I + 1 == sizeof...(Ts)> atEnd;
-    return llvm_combineHashValue(
-        DenseMapInfo<EltType>::getHashValue(std::get<I>(values)),
-        getHashValueImpl<I + 1>(values, atEnd));
-  }
-
-  template <unsigned I>
-  static unsigned getHashValueImpl(const Tuple &values, std::true_type) {
-    return 0;
-  }
-
-  static unsigned getHashValue(const std::tuple<Ts...> &values) {
-    std::integral_constant<bool, 0 == sizeof...(Ts)> atEnd;
-    return getHashValueImpl<0>(values, atEnd);
-  }
-
-  template <unsigned I>
-  static bool isEqualImpl(const Tuple &lhs, const Tuple &rhs, std::false_type) {
-    using EltType = typename std::tuple_element<I, Tuple>::type;
-    std::integral_constant<bool, I + 1 == sizeof...(Ts)> atEnd;
-    return DenseMapInfo<EltType>::isEqual(std::get<I>(lhs), std::get<I>(rhs)) &&
-           isEqualImpl<I + 1>(lhs, rhs, atEnd);
-  }
-
-  template <unsigned I>
-  static bool isEqualImpl(const Tuple &lhs, const Tuple &rhs, std::true_type) {
-    return true;
-  }
-
-  static bool isEqual(const Tuple &lhs, const Tuple &rhs) {
-    std::integral_constant<bool, 0 == sizeof...(Ts)> atEnd;
-    return isEqualImpl<0>(lhs, rhs, atEnd);
-  }
-};
-
-} // end namespace llvm
-
-#endif // MLIR_SUPPORT_STLEXTRAS_H
diff --git a/third_party/mlir/include/mlir/Support/StorageUniquer.h b/third_party/mlir/include/mlir/Support/StorageUniquer.h
deleted file mode 100644
index b7d95b64a8d..00000000000
--- a/third_party/mlir/include/mlir/Support/StorageUniquer.h
+++ /dev/null
@@ -1,271 +0,0 @@
-//===- StorageUniquer.h - Common Storage Class Uniquer ----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_SUPPORT_STORAGEUNIQUER_H
-#define MLIR_SUPPORT_STORAGEUNIQUER_H
-
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Allocator.h"
-
-namespace mlir {
-namespace detail {
-struct StorageUniquerImpl;
-
-/// Trait to check if ImplTy provides a 'getKey' method with types 'Args'.
-template <typename ImplTy, typename... Args>
-using has_impltype_getkey_t = decltype(ImplTy::getKey(std::declval<Args>()...));
-
-/// Trait to check if ImplTy provides a 'hashKey' method for 'T'.
-template <typename ImplTy, typename T>
-using has_impltype_hash_t = decltype(ImplTy::hashKey(std::declval<T>()));
-} // namespace detail
-
-/// A utility class to get, or create instances of storage classes. These
-/// storage classes must respect the following constraints:
-///    - Derive from StorageUniquer::BaseStorage.
-///    - Provide an unsigned 'kind' value to be used as part of the unique'ing
-///      process.
-///
-/// For non-parametric storage classes, i.e. those that are solely uniqued by
-/// their kind, nothing else is needed. Instances of these classes can be
-/// created by calling `get` without trailing arguments.
-///
-/// Otherwise, the parametric storage classes may be created with `get`,
-/// and must respect the following:
-///    - Define a type alias, KeyTy, to a type that uniquely identifies the
-///      instance of the storage class within its kind.
-///      * The key type must be constructible from the values passed into the
-///        getComplex call after the kind.
-///      * If the KeyTy does not have an llvm::DenseMapInfo specialization, the
-///        storage class must define a hashing method:
-///         'static unsigned hashKey(const KeyTy &)'
-///
-///    - Provide a method, 'bool operator==(const KeyTy &) const', to
-///      compare the storage instance against an instance of the key type.
-///
-///    - Provide a static construction method:
-///        'DerivedStorage *construct(StorageAllocator &, const KeyTy &key)'
-///      that builds a unique instance of the derived storage. The arguments to
-///      this function are an allocator to store any uniqued data and the key
-///      type for this storage.
-///
-///    - Provide a cleanup method:
-///        'void cleanup()'
-///      that is called when erasing a storage instance. This should cleanup any
-///      fields of the storage as necessary and not attempt to free the memory
-///      of the storage itself.
-class StorageUniquer {
-public:
-  StorageUniquer();
-  ~StorageUniquer();
-
-  /// This class acts as the base storage that all storage classes must derived
-  /// from.
-  class BaseStorage {
-  public:
-    /// Get the kind classification of this storage.
-    unsigned getKind() const { return kind; }
-
-  protected:
-    BaseStorage() : kind(0) {}
-
-  private:
-    /// Allow access to the kind field.
-    friend detail::StorageUniquerImpl;
-
-    /// Classification of the subclass, used for type checking.
-    unsigned kind;
-  };
-
-  /// This is a utility allocator used to allocate memory for instances of
-  /// derived types.
-  class StorageAllocator {
-  public:
-    /// Copy the specified array of elements into memory managed by our bump
-    /// pointer allocator.  This assumes the elements are all PODs.
-    template <typename T> ArrayRef<T> copyInto(ArrayRef<T> elements) {
-      if (elements.empty())
-        return llvm::None;
-      auto result = allocator.Allocate<T>(elements.size());
-      std::uninitialized_copy(elements.begin(), elements.end(), result);
-      return ArrayRef<T>(result, elements.size());
-    }
-
-    /// Copy the provided string into memory managed by our bump pointer
-    /// allocator.
-    StringRef copyInto(StringRef str) {
-      auto result = copyInto(ArrayRef<char>(str.data(), str.size()));
-      return StringRef(result.data(), str.size());
-    }
-
-    /// Allocate an instance of the provided type.
-    template <typename T> T *allocate() { return allocator.Allocate<T>(); }
-
-    /// Allocate 'size' bytes of 'alignment' aligned memory.
-    void *allocate(size_t size, size_t alignment) {
-      return allocator.Allocate(size, alignment);
-    }
-
-  private:
-    /// The raw allocator for type storage objects.
-    llvm::BumpPtrAllocator allocator;
-  };
-
-  /// Gets a uniqued instance of 'Storage'. 'initFn' is an optional parameter
-  /// that can be used to initialize a newly inserted storage instance. This
-  /// function is used for derived types that have complex storage or uniquing
-  /// constraints.
-  template <typename Storage, typename Arg, typename... Args>
-  Storage *get(std::function<void(Storage *)> initFn, unsigned kind, Arg &&arg,
-               Args &&... args) {
-    // Construct a value of the derived key type.
-    auto derivedKey =
-        getKey<Storage>(std::forward<Arg>(arg), std::forward<Args>(args)...);
-
-    // Create a hash of the kind and the derived key.
-    unsigned hashValue = getHash<Storage>(kind, derivedKey);
-
-    // Generate an equality function for the derived storage.
-    std::function<bool(const BaseStorage *)> isEqual =
-        [&derivedKey](const BaseStorage *existing) {
-          return static_cast<const Storage &>(*existing) == derivedKey;
-        };
-
-    // Generate a constructor function for the derived storage.
-    std::function<BaseStorage *(StorageAllocator &)> ctorFn =
-        [&](StorageAllocator &allocator) {
-          auto *storage = Storage::construct(allocator, derivedKey);
-          if (initFn)
-            initFn(storage);
-          return storage;
-        };
-
-    // Get an instance for the derived storage.
-    return static_cast<Storage *>(getImpl(kind, hashValue, isEqual, ctorFn));
-  }
-
-  /// Gets a uniqued instance of 'Storage'. 'initFn' is an optional parameter
-  /// that can be used to initialize a newly inserted storage instance. This
-  /// function is used for derived types that use no additional storage or
-  /// uniquing outside of the kind.
-  template <typename Storage>
-  Storage *get(std::function<void(Storage *)> initFn, unsigned kind) {
-    auto ctorFn = [&](StorageAllocator &allocator) {
-      auto *storage = new (allocator.allocate<Storage>()) Storage();
-      if (initFn)
-        initFn(storage);
-      return storage;
-    };
-    return static_cast<Storage *>(getImpl(kind, ctorFn));
-  }
-
-  /// Erases a uniqued instance of 'Storage'. This function is used for derived
-  /// types that have complex storage or uniquing constraints.
-  template <typename Storage, typename Arg, typename... Args>
-  void erase(unsigned kind, Arg &&arg, Args &&... args) {
-    // Construct a value of the derived key type.
-    auto derivedKey =
-        getKey<Storage>(std::forward<Arg>(arg), std::forward<Args>(args)...);
-
-    // Create a hash of the kind and the derived key.
-    unsigned hashValue = getHash<Storage>(kind, derivedKey);
-
-    // Generate an equality function for the derived storage.
-    std::function<bool(const BaseStorage *)> isEqual =
-        [&derivedKey](const BaseStorage *existing) {
-          return static_cast<const Storage &>(*existing) == derivedKey;
-        };
-
-    // Attempt to erase the storage instance.
-    eraseImpl(kind, hashValue, isEqual, [](BaseStorage *storage) {
-      static_cast<Storage *>(storage)->cleanup();
-    });
-  }
-
-private:
-  /// Implementation for getting/creating an instance of a derived type with
-  /// complex storage.
-  BaseStorage *getImpl(unsigned kind, unsigned hashValue,
-                       llvm::function_ref<bool(const BaseStorage *)> isEqual,
-                       std::function<BaseStorage *(StorageAllocator &)> ctorFn);
-
-  /// Implementation for getting/creating an instance of a derived type with
-  /// default storage.
-  BaseStorage *getImpl(unsigned kind,
-                       std::function<BaseStorage *(StorageAllocator &)> ctorFn);
-
-  /// Implementation for erasing an instance of a derived type with complex
-  /// storage.
-  void eraseImpl(unsigned kind, unsigned hashValue,
-                 llvm::function_ref<bool(const BaseStorage *)> isEqual,
-                 std::function<void(BaseStorage *)> cleanupFn);
-
-  /// The internal implementation class.
-  std::unique_ptr<detail::StorageUniquerImpl> impl;
-
-  //===--------------------------------------------------------------------===//
-  // Key Construction
-  //===--------------------------------------------------------------------===//
-
-  /// Used to construct an instance of 'ImplTy::KeyTy' if there is an
-  /// 'ImplTy::getKey' function for the provided arguments.
-  template <typename ImplTy, typename... Args>
-  static typename std::enable_if<
-      is_detected<detail::has_impltype_getkey_t, ImplTy, Args...>::value,
-      typename ImplTy::KeyTy>::type
-  getKey(Args &&... args) {
-    return ImplTy::getKey(args...);
-  }
-  /// If there is no 'ImplTy::getKey' method, then we try to directly construct
-  /// the 'ImplTy::KeyTy' with the provided arguments.
-  template <typename ImplTy, typename... Args>
-  static typename std::enable_if<
-      !is_detected<detail::has_impltype_getkey_t, ImplTy, Args...>::value,
-      typename ImplTy::KeyTy>::type
-  getKey(Args &&... args) {
-    return typename ImplTy::KeyTy(args...);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Key and Kind Hashing
-  //===--------------------------------------------------------------------===//
-
-  /// Used to generate a hash for the 'ImplTy::KeyTy' and kind of a storage
-  /// instance if there is an 'ImplTy::hashKey' overload for 'DerivedKey'.
-  template <typename ImplTy, typename DerivedKey>
-  static typename std::enable_if<
-      is_detected<detail::has_impltype_hash_t, ImplTy, DerivedKey>::value,
-      ::llvm::hash_code>::type
-  getHash(unsigned kind, const DerivedKey &derivedKey) {
-    return llvm::hash_combine(kind, ImplTy::hashKey(derivedKey));
-  }
-  /// If there is no 'ImplTy::hashKey' default to using the
-  /// 'llvm::DenseMapInfo' definition for 'DerivedKey' for generating a hash.
-  template <typename ImplTy, typename DerivedKey>
-  static typename std::enable_if<
-      !is_detected<detail::has_impltype_hash_t, ImplTy, DerivedKey>::value,
-      ::llvm::hash_code>::type
-  getHash(unsigned kind, const DerivedKey &derivedKey) {
-    return llvm::hash_combine(
-        kind, llvm::DenseMapInfo<DerivedKey>::getHashValue(derivedKey));
-  }
-};
-} // end namespace mlir
-
-#endif
diff --git a/third_party/mlir/include/mlir/Support/StringExtras.h b/third_party/mlir/include/mlir/Support/StringExtras.h
deleted file mode 100644
index 2f75c8e5d20..00000000000
--- a/third_party/mlir/include/mlir/Support/StringExtras.h
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- StringExtras.h - String utilities used by MLIR -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains string utility functions used within MLIR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_STRINGEXTRAS_H
-#define MLIR_SUPPORT_STRINGEXTRAS_H
-
-#include "llvm/ADT/StringExtras.h"
-
-#include <cctype>
-
-namespace mlir {
-/// Converts a string to snake-case from camel-case by replacing all uppercase
-/// letters with '_' followed by the letter in lowercase, except if the
-/// uppercase letter is the first character of the string.
-inline std::string convertToSnakeCase(llvm::StringRef input) {
-  std::string snakeCase;
-  snakeCase.reserve(input.size());
-  for (auto c : input) {
-    if (std::isupper(c)) {
-      if (!snakeCase.empty() && snakeCase.back() != '_') {
-        snakeCase.push_back('_');
-      }
-      snakeCase.push_back(llvm::toLower(c));
-    } else {
-      snakeCase.push_back(c);
-    }
-  }
-  return snakeCase;
-}
-
-/// Converts a string from camel-case to snake_case by replacing all occurrences
-/// of '_' followed by a lowercase letter with the letter in
-/// uppercase. Optionally allow capitalization of the first letter (if it is a
-/// lowercase letter)
-inline std::string convertToCamelCase(llvm::StringRef input,
-                                      bool capitalizeFirst = false) {
-  if (input.empty()) {
-    return "";
-  }
-  std::string output;
-  output.reserve(input.size());
-  size_t pos = 0;
-  if (capitalizeFirst && std::islower(input[pos])) {
-    output.push_back(llvm::toUpper(input[pos]));
-    pos++;
-  }
-  while (pos < input.size()) {
-    auto cur = input[pos];
-    if (cur == '_') {
-      if (pos && (pos + 1 < input.size())) {
-        if (std::islower(input[pos + 1])) {
-          output.push_back(llvm::toUpper(input[pos + 1]));
-          pos += 2;
-          continue;
-        }
-      }
-    }
-    output.push_back(cur);
-    pos++;
-  }
-  return output;
-}
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_STRINGEXTRAS_H
diff --git a/third_party/mlir/include/mlir/Support/ToolUtilities.h b/third_party/mlir/include/mlir/Support/ToolUtilities.h
deleted file mode 100644
index d1c898f06d0..00000000000
--- a/third_party/mlir/include/mlir/Support/ToolUtilities.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- ToolUtilities.h - MLIR Tool Utilities --------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares common utilities for implementing MLIR tools.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_TOOLUTILITIES_H
-#define MLIR_SUPPORT_TOOLUTILITIES_H
-
-#include "llvm/ADT/STLExtras.h"
-#include <memory>
-
-namespace llvm {
-class MemoryBuffer;
-}
-
-namespace mlir {
-struct LogicalResult;
-
-using ChunkBufferHandler = llvm::function_ref<LogicalResult(
-    std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, llvm::raw_ostream &os)>;
-
-/// Splits the specified buffer on a marker (`// -----`), processes each chunk
-/// independently according to the normal `processChunkBuffer` logic, and writes
-/// all results to `os`.
-///
-/// This is used to allow a large number of small independent tests to be put
-/// into a single file.
-LogicalResult
-splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
-                      ChunkBufferHandler processChunkBuffer,
-                      llvm::raw_ostream &os);
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_TOOLUTILITIES_H
diff --git a/third_party/mlir/include/mlir/Support/TranslateClParser.h b/third_party/mlir/include/mlir/Support/TranslateClParser.h
deleted file mode 100644
index ccd4fb97676..00000000000
--- a/third_party/mlir/include/mlir/Support/TranslateClParser.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- TranslateClParser.h - Translations command line parser ---*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains custom command line parser for translations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_SUPPORT_TRANSLATE_CL_PARSER_H_
-#define MLIR_SUPPORT_TRANSLATE_CL_PARSER_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/Translation.h"
-#include "llvm/Support/CommandLine.h"
-#include <functional>
-
-namespace mlir {
-
-struct LogicalResult;
-class MLIRContext;
-
-/// Custom parser for TranslateFunction.
-/// Wraps TranslateToMLIRFunctions and TranslateFromMLIRFunctions into
-/// TranslateFunctions before registering them as options.
-struct TranslationParser : public llvm::cl::parser<const TranslateFunction *> {
-  TranslationParser(llvm::cl::Option &opt);
-
-  void printOptionInfo(const llvm::cl::Option &O,
-                       size_t GlobalWidth) const override;
-};
-
-} // namespace mlir
-
-#endif // MLIR_SUPPORT_TRANSLATE_CL_PARSER_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Argument.h b/third_party/mlir/include/mlir/TableGen/Argument.h
deleted file mode 100644
index 83909392a43..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Argument.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- Argument.h - Argument definitions ------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file contains definitions for TableGen operation's arguments.
-// Operation arguments fall into two categories:
-//
-// 1. Operands: SSA values operated on by the operation
-// 2. Attributes: compile-time known properties that have influence over
-//    the operation's behavior
-//
-// These two categories are modelled with the unified argument concept in
-// TableGen because we need similar pattern matching mechanisms for them.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_ARGUMENT_H_
-#define MLIR_TABLEGEN_ARGUMENT_H_
-
-#include "mlir/TableGen/Attribute.h"
-#include "mlir/TableGen/Type.h"
-#include "llvm/ADT/PointerUnion.h"
-#include <string>
-
-namespace llvm {
-class StringRef;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// A struct wrapping an op attribute and its name together
-struct NamedAttribute {
-  llvm::StringRef name;
-  Attribute attr;
-};
-
-// A struct wrapping an op operand/result's constraint and its name together
-struct NamedTypeConstraint {
-  // Returns true if this operand/result has constraint to be satisfied.
-  bool hasPredicate() const;
-  // Returns true if this operand/result is variadic.
-  bool isVariadic() const;
-
-  llvm::StringRef name;
-  TypeConstraint constraint;
-};
-
-// Operation argument: either attribute or operand
-using Argument = llvm::PointerUnion<NamedAttribute *, NamedTypeConstraint *>;
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_ARGUMENT_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Attribute.h b/third_party/mlir/include/mlir/TableGen/Attribute.h
deleted file mode 100644
index 242376e24ff..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Attribute.h
+++ /dev/null
@@ -1,227 +0,0 @@
-//===- Attribute.h - Attribute wrapper class --------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Attribute wrapper to simplify using TableGen Record defining a MLIR
-// Attribute.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_ATTRIBUTE_H_
-#define MLIR_TABLEGEN_ATTRIBUTE_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/TableGen/Constraint.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace llvm {
-class DefInit;
-class Record;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// Wrapper class with helper methods for accessing attribute constraints defined
-// in TableGen.
-class AttrConstraint : public Constraint {
-public:
-  explicit AttrConstraint(const llvm::Record *record);
-
-  static bool classof(const Constraint *c) { return c->getKind() == CK_Attr; }
-
-  // Returns true if this constraint is a subclass of the given `className`
-  // class defined in TableGen.
-  bool isSubClassOf(StringRef className) const;
-};
-
-// Wrapper class providing helper methods for accessing MLIR Attribute defined
-// in TableGen. This class should closely reflect what is defined as class
-// `Attr` in TableGen.
-class Attribute : public AttrConstraint {
-public:
-  explicit Attribute(const llvm::Record *record);
-  explicit Attribute(const llvm::DefInit *init);
-
-  // Returns the storage type if set. Returns the default storage type
-  // ("Attribute") otherwise.
-  StringRef getStorageType() const;
-
-  // Returns the return type for this attribute.
-  StringRef getReturnType() const;
-
-  // Returns the template getter method call which reads this attribute's
-  // storage and returns the value as of the desired return type.
-  // The call will contain a `{0}` which will be expanded to this attribute.
-  StringRef getConvertFromStorageCall() const;
-
-  // Returns true if this attribute can be built from a constant value.
-  bool isConstBuildable() const;
-
-  // Returns the template that can be used to produce an instance of the
-  // attribute.
-  // Syntax: {0} should be replaced with a builder, {1} should be replaced with
-  // the constant value.
-  StringRef getConstBuilderTemplate() const;
-
-  // Returns the base-level attribute that this attribute constraint is
-  // built upon.
-  Attribute getBaseAttr() const;
-
-  // Returns whether this attribute has a default value.
-  bool hasDefaultValue() const;
-  // Returns the default value for this attribute.
-  StringRef getDefaultValue() const;
-
-  // Returns whether this attribute is optional.
-  bool isOptional() const;
-
-  // Returns true if this attribute is a derived attribute (i.e., a subclass
-  // of `DerivedAttr`).
-  bool isDerivedAttr() const;
-
-  // Returns true if this attribute is a type attribute (i.e., a subclass
-  // of `TypeAttrBase`).
-  bool isTypeAttr() const;
-
-  // Returns true if this attribute is an enum attribute (i.e., a subclass of
-  // `EnumAttrInfo`)
-  bool isEnumAttr() const;
-
-  // Returns this attribute's TableGen def name. If this is an `OptionalAttr`
-  // or `DefaultValuedAttr` without explicit name, returns the base attribute's
-  // name.
-  StringRef getAttrDefName() const;
-
-  // Returns the code body for derived attribute. Aborts if this is not a
-  // derived attribute.
-  StringRef getDerivedCodeBody() const;
-};
-
-// Wrapper class providing helper methods for accessing MLIR constant attribute
-// defined in TableGen. This class should closely reflect what is defined as
-// class `ConstantAttr` in TableGen.
-class ConstantAttr {
-public:
-  explicit ConstantAttr(const llvm::DefInit *init);
-
-  // Returns the attribute kind.
-  Attribute getAttribute() const;
-
-  // Returns the constant value.
-  StringRef getConstantValue() const;
-
-private:
-  // The TableGen definition of this constant attribute.
-  const llvm::Record *def;
-};
-
-// Wrapper class providing helper methods for accessing enum attribute cases
-// defined in TableGen. This is used for enum attribute case backed by both
-// StringAttr and IntegerAttr.
-class EnumAttrCase : public Attribute {
-public:
-  explicit EnumAttrCase(const llvm::DefInit *init);
-
-  // Returns true if this EnumAttrCase is backed by a StringAttr.
-  bool isStrCase() const;
-
-  // Returns the symbol of this enum attribute case.
-  StringRef getSymbol() const;
-
-  // Returns the value of this enum attribute case.
-  int64_t getValue() const;
-};
-
-// Wrapper class providing helper methods for accessing enum attributes defined
-// in TableGen.This is used for enum attribute case backed by both StringAttr
-// and IntegerAttr.
-class EnumAttr : public Attribute {
-public:
-  explicit EnumAttr(const llvm::Record *record);
-  explicit EnumAttr(const llvm::Record &record);
-  explicit EnumAttr(const llvm::DefInit *init);
-
-  // Returns true if this is a bit enum attribute.
-  bool isBitEnum() const;
-
-  // Returns the enum class name.
-  StringRef getEnumClassName() const;
-
-  // Returns the C++ namespaces this enum class should be placed in.
-  StringRef getCppNamespace() const;
-
-  // Returns the underlying type.
-  StringRef getUnderlyingType() const;
-
-  // Returns the name of the utility function that converts a value of the
-  // underlying type to the corresponding symbol.
-  StringRef getUnderlyingToSymbolFnName() const;
-
-  // Returns the name of the utility function that converts a string to the
-  // corresponding symbol.
-  StringRef getStringToSymbolFnName() const;
-
-  // Returns the name of the utility function that converts a symbol to the
-  // corresponding string.
-  StringRef getSymbolToStringFnName() const;
-
-  // Returns the return type of the utility function that converts a symbol to
-  // the corresponding string.
-  StringRef getSymbolToStringFnRetType() const;
-
-  // Returns the name of the utilit function that returns the max enum value
-  // used within the enum class.
-  StringRef getMaxEnumValFnName() const;
-
-  // Returns all allowed cases for this enum attribute.
-  std::vector<EnumAttrCase> getAllCases() const;
-};
-
-class StructFieldAttr {
-public:
-  explicit StructFieldAttr(const llvm::Record *record);
-  explicit StructFieldAttr(const llvm::Record &record);
-  explicit StructFieldAttr(const llvm::DefInit *init);
-
-  StringRef getName() const;
-  Attribute getType() const;
-
-private:
-  const llvm::Record *def;
-};
-
-// Wrapper class providing helper methods for accessing struct attributes
-// defined in TableGen.
-class StructAttr : public Attribute {
-public:
-  explicit StructAttr(const llvm::Record *record);
-  explicit StructAttr(const llvm::Record &record) : StructAttr(&record){};
-  explicit StructAttr(const llvm::DefInit *init);
-
-  // Returns the struct class name.
-  StringRef getStructClassName() const;
-
-  // Returns the C++ namespaces this struct class should be placed in.
-  StringRef getCppNamespace() const;
-
-  std::vector<StructFieldAttr> getAllFields() const;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_ATTRIBUTE_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Constraint.h b/third_party/mlir/include/mlir/TableGen/Constraint.h
deleted file mode 100644
index 17b60da6027..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Constraint.h
+++ /dev/null
@@ -1,90 +0,0 @@
-//===- Constraint.h - Constraint class --------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Constraint wrapper to simplify using TableGen Record for constraints.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_CONSTRAINT_H_
-#define MLIR_TABLEGEN_CONSTRAINT_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/TableGen/Predicate.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace llvm {
-class Record;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// Wrapper class with helper methods for accessing Constraint defined in
-// TableGen.
-class Constraint {
-public:
-  Constraint(const llvm::Record *record);
-
-  bool operator==(const Constraint &that) { return def == that.def; }
-  bool operator!=(const Constraint &that) { return def != that.def; }
-
-  // Returns the predicate for this constraint.
-  Pred getPredicate() const;
-
-  // Returns the condition template that can be used to check if a type or
-  // attribute satisfies this constraint.  The template may contain "{0}" that
-  // must be substituted with an expression returning an mlir::Type or
-  // mlir::Attribute.
-  std::string getConditionTemplate() const;
-
-  // Returns the user-readable description of this constraint. If the
-  // description is not provided, returns the TableGen def name.
-  StringRef getDescription() const;
-
-  // Constraint kind
-  enum Kind { CK_Attr, CK_Region, CK_Type, CK_Uncategorized };
-
-  Kind getKind() const { return kind; }
-
-protected:
-  Constraint(Kind kind, const llvm::Record *record);
-
-  // The TableGen definition of this constraint.
-  const llvm::Record *def;
-
-private:
-  // What kind of constraint this is.
-  Kind kind;
-};
-
-// An constraint and the concrete entities to place the constraint on.
-struct AppliedConstraint {
-  AppliedConstraint(Constraint &&constraint, StringRef self,
-                    std::vector<std::string> &&entities);
-
-  Constraint constraint;
-  // The symbol to replace `$_self` special placeholder in the constraint.
-  std::string self;
-  // The symbols to replace `$N` positional placeholders in the constraint.
-  std::vector<std::string> entities;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_CONSTRAINT_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Dialect.h b/third_party/mlir/include/mlir/TableGen/Dialect.h
deleted file mode 100644
index 6861da46e88..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Dialect.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Dialect wrapper to simplify using TableGen Record defining a MLIR dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_DIALECT_H_
-#define MLIR_TABLEGEN_DIALECT_H_
-
-#include "mlir/Support/LLVM.h"
-
-namespace llvm {
-class Record;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-// Wrapper class that contains a MLIR dialect's information defined in TableGen
-// and provides helper methods for accessing them.
-class Dialect {
-public:
-  explicit Dialect(const llvm::Record *def) : def(def) {}
-
-  // Returns the name of this dialect.
-  StringRef getName() const;
-
-  // Returns the C++ namespaces that ops of this dialect should be placed into.
-  StringRef getCppNamespace() const;
-
-  // Returns the summary description of the dialect. Returns empty string if
-  // none.
-  StringRef getSummary() const;
-
-  // Returns the description of the dialect. Returns empty string if none.
-  StringRef getDescription() const;
-
-  // Returns whether two dialects are equal by checking the equality of the
-  // underlying record.
-  bool operator==(const Dialect &other) const;
-
-  // Compares two dialects by comparing the names of the dialects.
-  bool operator<(const Dialect &other) const;
-
-  // Returns whether the dialect is defined.
-  operator bool() const { return def != nullptr; }
-
-private:
-  const llvm::Record *def;
-};
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_DIALECT_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Format.h b/third_party/mlir/include/mlir/TableGen/Format.h
deleted file mode 100644
index 6f02c283cad..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Format.h
+++ /dev/null
@@ -1,249 +0,0 @@
-//===- Format.h - Utilities for String Format -------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares utilities for formatting strings. They are specially
-// tailored to the needs of TableGen'ing op definitions and rewrite rules,
-// so they are not expected to be used as widely applicable utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_FORMAT_H_
-#define MLIR_TABLEGEN_FORMAT_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/FormatVariadic.h"
-
-namespace mlir {
-namespace tblgen {
-
-/// Format context containing substitutions for special placeholders.
-///
-/// This context divides special placeholders into two categories: builtin ones
-/// and custom ones.
-///
-/// Builtin placeholders are baked into `FmtContext` and each one of them has a
-/// dedicated setter. They can be used in all dialects. Their names follow the
-/// convention of `$_<name>`. The rationale of the leading underscore is to
-/// avoid confusion and name collision: op arguments/attributes/results are
-/// named as $<name>, and we can potentially support referencing those entities
-/// directly in the format template in the future.
-//
-/// Custom ones are registered by dialect-specific TableGen backends and use the
-/// same unified setter.
-class FmtContext {
-public:
-  // Placeholder kinds
-  enum class PHKind : char {
-    None,
-    Custom,  // For custom placeholders
-    Builder, // For the $_builder placeholder
-    Op,      // For the $_op placeholder
-    Self,    // For the $_self placeholder
-  };
-
-  FmtContext() = default;
-
-  // Setter for custom placeholders
-  FmtContext &addSubst(StringRef placeholder, Twine subst);
-
-  // Setters for builtin placeholders
-  FmtContext &withBuilder(Twine subst);
-  FmtContext &withOp(Twine subst);
-  FmtContext &withSelf(Twine subst);
-
-  Optional<StringRef> getSubstFor(PHKind placeholder) const;
-  Optional<StringRef> getSubstFor(StringRef placeholder) const;
-
-  static PHKind getPlaceHolderKind(StringRef str);
-
-private:
-  struct PHKindInfo : DenseMapInfo<PHKind> {
-    using CharInfo = DenseMapInfo<char>;
-
-    static inline PHKind getEmptyKey() {
-      return static_cast<PHKind>(CharInfo::getEmptyKey());
-    }
-    static inline PHKind getTombstoneKey() {
-      return static_cast<PHKind>(CharInfo::getTombstoneKey());
-    }
-    static unsigned getHashValue(const PHKind &val) {
-      return CharInfo::getHashValue(static_cast<char>(val));
-    }
-
-    static bool isEqual(const PHKind &lhs, const PHKind &rhs) {
-      return lhs == rhs;
-    }
-  };
-
-  llvm::SmallDenseMap<PHKind, std::string, 4, PHKindInfo> builtinSubstMap;
-  llvm::StringMap<std::string> customSubstMap;
-};
-
-/// Struct representing a replacement segment for the formatted string. It can
-/// be a segment of the formatting template (for `Literal`) or a replacement
-/// parameter (for `PositionalPH` and `SpecialPH`).
-struct FmtReplacement {
-  enum class Type { Empty, Literal, PositionalPH, SpecialPH };
-
-  FmtReplacement() = default;
-  explicit FmtReplacement(StringRef literal)
-      : type(Type::Literal), spec(literal) {}
-  FmtReplacement(StringRef spec, size_t index)
-      : type(Type::PositionalPH), spec(spec), index(index) {}
-  FmtReplacement(StringRef spec, FmtContext::PHKind placeholder)
-      : type(Type::SpecialPH), spec(spec), placeholder(placeholder) {}
-
-  Type type = Type::Empty;
-  StringRef spec;
-  size_t index = 0;
-  FmtContext::PHKind placeholder = FmtContext::PHKind::None;
-};
-
-class FmtObjectBase {
-private:
-  static std::pair<FmtReplacement, StringRef> splitFmtSegment(StringRef fmt);
-  static std::vector<FmtReplacement> parseFormatString(StringRef fmt);
-
-protected:
-  // The parameters are stored in a std::tuple, which does not provide runtime
-  // indexing capabilities.  In order to enable runtime indexing, we use this
-  // structure to put the parameters into a std::vector.  Since the parameters
-  // are not all the same type, we use some type-erasure by wrapping the
-  // parameters in a template class that derives from a non-template superclass.
-  // Essentially, we are converting a std::tuple<Derived<Ts...>> to a
-  // std::vector<Base*>.
-  struct CreateAdapters {
-    template <typename... Ts>
-    std::vector<llvm::detail::format_adapter *> operator()(Ts &... items) {
-      return std::vector<llvm::detail::format_adapter *>{&items...};
-    }
-  };
-
-  StringRef fmt;
-  const FmtContext *context;
-  std::vector<llvm::detail::format_adapter *> adapters;
-  std::vector<FmtReplacement> replacements;
-
-public:
-  FmtObjectBase(StringRef fmt, const FmtContext *ctx, size_t numParams)
-      : fmt(fmt), context(ctx), replacements(parseFormatString(fmt)) {}
-
-  FmtObjectBase(const FmtObjectBase &that) = delete;
-
-  FmtObjectBase(FmtObjectBase &&that)
-      : fmt(std::move(that.fmt)), context(that.context),
-        adapters(), // adapters are initialized by FmtObject
-        replacements(std::move(that.replacements)) {}
-
-  void format(llvm::raw_ostream &s) const;
-
-  std::string str() const {
-    std::string result;
-    llvm::raw_string_ostream s(result);
-    format(s);
-    return s.str();
-  }
-
-  template <unsigned N> SmallString<N> sstr() const {
-    SmallString<N> result;
-    llvm::raw_svector_ostream s(result);
-    format(s);
-    return result;
-  }
-
-  template <unsigned N> operator SmallString<N>() const { return sstr<N>(); }
-
-  operator std::string() const { return str(); }
-};
-
-template <typename Tuple> class FmtObject : public FmtObjectBase {
-  // Storage for the parameter adapters.  Since the base class erases the type
-  // of the parameters, we have to own the storage for the parameters here, and
-  // have the base class store type-erased pointers into this tuple.
-  Tuple parameters;
-
-public:
-  FmtObject(StringRef fmt, const FmtContext *ctx, Tuple &&params)
-      : FmtObjectBase(fmt, ctx, std::tuple_size<Tuple>::value),
-        parameters(std::move(params)) {
-    adapters.reserve(std::tuple_size<Tuple>::value);
-    adapters = llvm::apply_tuple(CreateAdapters(), parameters);
-  }
-
-  FmtObject(FmtObject const &that) = delete;
-
-  FmtObject(FmtObject &&that)
-      : FmtObjectBase(std::move(that)), parameters(std::move(that.parameters)) {
-    adapters.reserve(that.adapters.size());
-    adapters = llvm::apply_tuple(CreateAdapters(), parameters);
-  }
-};
-
-/// Formats text by substituting placeholders in format string with replacement
-/// parameters.
-///
-/// There are two categories of placeholders accepted, both led by a '$' sign:
-///
-/// 1. Positional placeholder: $[0-9]+
-/// 2. Special placeholder:    $[a-zA-Z_][a-zA-Z0-9_]*
-///
-/// Replacement parameters for positional placeholders are supplied as the
-/// `vals` parameter pack with 1:1 mapping. That is, $0 will be replaced by the
-/// first parameter in `vals`, $1 by the second one, and so on. Note that you
-/// can use the positional placeholders in any order and repeat any times, for
-/// example, "$2 $1 $1 $0" is accepted.
-///
-/// Replacement parameters for special placeholders are supplied using the `ctx`
-/// format context.
-///
-/// The `fmt` is recorded as a `StringRef` inside the returned `FmtObject`.
-/// The caller needs to make sure the underlying data is available when the
-/// `FmtObject` is used.
-///
-/// `ctx` accepts a nullptr if there is no special placeholder is used.
-///
-/// If no substitution is provided for a placeholder or any error happens during
-/// format string parsing or replacement, the placeholder will be outputted
-/// as-is with an additional marker '<no-subst-found>', to aid debugging.
-///
-/// To print a '$' literally, escape it with '$$'.
-///
-/// This utility function is inspired by LLVM formatv(), with modifications
-/// specially tailored for TableGen C++ generation usage:
-///
-/// 1. This utility use '$' instead of '{' and '}' for denoting the placeholder
-///    because '{' and '}' are frequently used in C++ code.
-/// 2. This utility does not support format layout because it is rarely needed
-///    in C++ code generation.
-template <typename... Ts>
-inline auto tgfmt(StringRef fmt, const FmtContext *ctx, Ts &&... vals)
-    -> FmtObject<decltype(std::make_tuple(
-        llvm::detail::build_format_adapter(std::forward<Ts>(vals))...))> {
-  using ParamTuple = decltype(std::make_tuple(
-      llvm::detail::build_format_adapter(std::forward<Ts>(vals))...));
-  return FmtObject<ParamTuple>(
-      fmt, ctx,
-      std::make_tuple(
-          llvm::detail::build_format_adapter(std::forward<Ts>(vals))...));
-}
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_FORMAT_H_
diff --git a/third_party/mlir/include/mlir/TableGen/GenInfo.h b/third_party/mlir/include/mlir/TableGen/GenInfo.h
deleted file mode 100644
index 0b0bd192ae5..00000000000
--- a/third_party/mlir/include/mlir/TableGen/GenInfo.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===- GenInfo.h - Generator info -------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_TABLEGEN_GENINFO_H_
-#define MLIR_TABLEGEN_GENINFO_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/StringRef.h"
-#include <functional>
-
-namespace llvm {
-class RecordKeeper;
-} // end namespace llvm
-
-namespace mlir {
-
-/// Generator function to invoke.
-using GenFunction = std::function<bool(const llvm::RecordKeeper &recordKeeper,
-                                       raw_ostream &os)>;
-
-/// Structure to group information about a generator (argument to invoke via
-/// mlir-tblgen, description, and generator function).
-class GenInfo {
-public:
-  /// GenInfo constructor should not be invoked directly, instead use
-  /// GenRegistration or registerGen.
-  GenInfo(StringRef arg, StringRef description, GenFunction generator)
-      : arg(arg), description(description), generator(generator) {}
-
-  /// Invokes the generator and returns whether the generator failed.
-  bool invoke(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) const {
-    assert(generator && "Cannot call generator with null generator");
-    return generator(recordKeeper, os);
-  }
-
-  /// Returns the command line option that may be passed to 'mlir-tblgen' to
-  /// invoke this generator.
-  StringRef getGenArgument() const { return arg; }
-
-  /// Returns a description for the generator.
-  StringRef getGenDescription() const { return description; }
-
-private:
-  // The argument with which to invoke the generator via mlir-tblgen.
-  StringRef arg;
-
-  // Description of the generator.
-  StringRef description;
-
-  // Generator function.
-  GenFunction generator;
-};
-
-/// GenRegistration provides a global initializer that registers a generator
-/// function.
-///
-/// Usage:
-///
-///   // At namespace scope.
-///   static GenRegistration Print("print", "Print records", [](...){...});
-struct GenRegistration {
-  GenRegistration(StringRef arg, StringRef description, GenFunction function);
-};
-
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_GENINFO_H_
diff --git a/third_party/mlir/include/mlir/TableGen/GenNameParser.h b/third_party/mlir/include/mlir/TableGen/GenNameParser.h
deleted file mode 100644
index 7b1e8a36d03..00000000000
--- a/third_party/mlir/include/mlir/TableGen/GenNameParser.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- GenNameParser.h - Command line parser for generators -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// The GenNameParser class adds all passes linked in to the system that are
-// creatable to the tool.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_GENNAMEPARSER_H_
-#define MLIR_TABLEGEN_GENNAMEPARSER_H_
-
-#include "llvm/Support/CommandLine.h"
-
-namespace mlir {
-class GenInfo;
-
-/// Adds command line option for each registered generator.
-struct GenNameParser : public llvm::cl::parser<const GenInfo *> {
-  GenNameParser(llvm::cl::Option &opt);
-
-  void printOptionInfo(const llvm::cl::Option &O,
-                       size_t GlobalWidth) const override;
-};
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_GENNAMEPARSER_H_
diff --git a/third_party/mlir/include/mlir/TableGen/OpInterfaces.h b/third_party/mlir/include/mlir/TableGen/OpInterfaces.h
deleted file mode 100644
index 4a87876d5e8..00000000000
--- a/third_party/mlir/include/mlir/TableGen/OpInterfaces.h
+++ /dev/null
@@ -1,106 +0,0 @@
-//===- OpInterfaces.h - OpInterfaces wrapper class --------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// OpInterfaces wrapper to simplify using TableGen OpInterfaces.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_OPINTERFACES_H_
-#define MLIR_TABLEGEN_OPINTERFACES_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace llvm {
-class Init;
-class Record;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// Wrapper class with helper methods for accessing OpInterfaceMethod defined
-// in TableGen.
-class OpInterfaceMethod {
-public:
-  // This struct represents a single method argument.
-  struct Argument {
-    StringRef type;
-    StringRef name;
-  };
-
-  explicit OpInterfaceMethod(const llvm::Record *def);
-
-  // Return the return type of this method.
-  StringRef getReturnType() const;
-
-  // Return the name of this method.
-  StringRef getName() const;
-
-  // Return if this method is static.
-  bool isStatic() const;
-
-  // Return the body for this method if it has one.
-  llvm::Optional<StringRef> getBody() const;
-
-  // Return the description of this method if it has one.
-  llvm::Optional<StringRef> getDescription() const;
-
-  // Arguments.
-  ArrayRef<Argument> getArguments() const;
-  bool arg_empty() const;
-
-private:
-  // The TableGen definition of this method.
-  const llvm::Record *def;
-
-  // The arguments of this method.
-  SmallVector<Argument, 2> arguments;
-};
-
-//===----------------------------------------------------------------------===//
-// OpInterface
-//===----------------------------------------------------------------------===//
-
-// Wrapper class with helper methods for accessing OpInterfaces defined in
-// TableGen.
-class OpInterface {
-public:
-  explicit OpInterface(const llvm::Record *def);
-
-  // Return the name of this interface.
-  StringRef getName() const;
-
-  // Return the methods of this interface.
-  ArrayRef<OpInterfaceMethod> getMethods() const;
-
-  // Return the description of this method if it has one.
-  llvm::Optional<StringRef> getDescription() const;
-
-private:
-  // The TableGen definition of this interface.
-  const llvm::Record *def;
-
-  // The methods of this interface.
-  SmallVector<OpInterfaceMethod, 8> methods;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_OPINTERFACES_H_
diff --git a/third_party/mlir/include/mlir/TableGen/OpTrait.h b/third_party/mlir/include/mlir/TableGen/OpTrait.h
deleted file mode 100644
index c3ea9a7bda0..00000000000
--- a/third_party/mlir/include/mlir/TableGen/OpTrait.h
+++ /dev/null
@@ -1,119 +0,0 @@
-//===- OpTrait.h - OpTrait wrapper class ------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// OpTrait wrapper to simplify using TableGen Record defining an MLIR OpTrait.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_OPTRAIT_H_
-#define MLIR_TABLEGEN_OPTRAIT_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace llvm {
-class Init;
-class Record;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-class OpInterface;
-
-// Wrapper class with helper methods for accessing OpTrait constraints defined
-// in TableGen.
-class OpTrait {
-public:
-  // Discriminator for kinds of op traits.
-  enum class Kind {
-    // OpTrait corresponding to C++ class.
-    Native,
-    // OpTrait corresponding to predicate on operation.
-    Pred,
-    // OpTrait controlling op definition generator internals.
-    Internal,
-    // OpTrait corresponding to OpInterface.
-    Interface
-  };
-
-  explicit OpTrait(Kind kind, const llvm::Record *def);
-
-  // Returns an OpTrait corresponding to the init provided.
-  static OpTrait create(const llvm::Init *init);
-
-  Kind getKind() const { return kind; }
-
-protected:
-  // The TableGen definition of this trait.
-  const llvm::Record *def;
-  Kind kind;
-};
-
-// OpTrait corresponding to a native C++ OpTrait.
-class NativeOpTrait : public OpTrait {
-public:
-  // Returns the trait corresponding to a C++ trait class.
-  StringRef getTrait() const;
-
-  static bool classof(const OpTrait *t) { return t->getKind() == Kind::Native; }
-};
-
-// OpTrait corresponding to a predicate on the operation.
-class PredOpTrait : public OpTrait {
-public:
-  // Returns the template for constructing the predicate.
-  std::string getPredTemplate() const;
-
-  // Returns the description of what the predicate is verifying.
-  StringRef getDescription() const;
-
-  static bool classof(const OpTrait *t) { return t->getKind() == Kind::Pred; }
-};
-
-// OpTrait controlling op definition generator internals.
-class InternalOpTrait : public OpTrait {
-public:
-  // Returns the trait controlling op definition generator internals.
-  StringRef getTrait() const;
-
-  static bool classof(const OpTrait *t) {
-    return t->getKind() == Kind::Internal;
-  }
-};
-
-// OpTrait corresponding to an OpInterface on the operation.
-class InterfaceOpTrait : public OpTrait {
-public:
-  // Returns member function definitions corresponding to the trait,
-  OpInterface getOpInterface() const;
-
-  // Returns the trait corresponding to a C++ trait class.
-  StringRef getTrait() const;
-
-  static bool classof(const OpTrait *t) {
-    return t->getKind() == Kind::Interface;
-  }
-
-  // Whether the declaration of methods for this trait should be emitted.
-  bool shouldDeclareMethods() const;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_OPTRAIT_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Operator.h b/third_party/mlir/include/mlir/TableGen/Operator.h
deleted file mode 100644
index 89fd4ed8d2e..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Operator.h
+++ /dev/null
@@ -1,222 +0,0 @@
-//===- Operator.h - Operator class ------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Operator wrapper to simplify using TableGen Record defining a MLIR Op.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_OPERATOR_H_
-#define MLIR_TABLEGEN_OPERATOR_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/TableGen/Argument.h"
-#include "mlir/TableGen/Attribute.h"
-#include "mlir/TableGen/Dialect.h"
-#include "mlir/TableGen/OpTrait.h"
-#include "mlir/TableGen/Region.h"
-#include "mlir/TableGen/Type.h"
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/SMLoc.h"
-
-namespace llvm {
-class CodeInit;
-class DefInit;
-class Record;
-class StringInit;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// Wrapper class that contains a MLIR op's information (e.g., operands,
-// attributes) defined in TableGen and provides helper methods for
-// accessing them.
-class Operator {
-public:
-  explicit Operator(const llvm::Record &def);
-  explicit Operator(const llvm::Record *def) : Operator(*def) {}
-
-  // Returns this op's dialect name.
-  StringRef getDialectName() const;
-
-  // Returns the operation name. The name will follow the "<dialect>.<op-name>"
-  // format if its dialect name is not empty.
-  std::string getOperationName() const;
-
-  // Returns this op's C++ class name.
-  StringRef getCppClassName() const;
-
-  // Returns this op's C++ class name prefixed with namespaces.
-  std::string getQualCppClassName() const;
-
-  using value_iterator = NamedTypeConstraint *;
-  using value_range = llvm::iterator_range<value_iterator>;
-
-  // Returns true if this op has variadic operands or results.
-  bool isVariadic() const;
-
-  // Returns true if default builders should not be generated.
-  bool skipDefaultBuilders() const;
-
-  // Op result iterators.
-  value_iterator result_begin();
-  value_iterator result_end();
-  value_range getResults();
-
-  // Returns the number of results this op produces.
-  int getNumResults() const;
-
-  // Returns the op result at the given `index`.
-  NamedTypeConstraint &getResult(int index) { return results[index]; }
-  const NamedTypeConstraint &getResult(int index) const {
-    return results[index];
-  }
-
-  // Returns the `index`-th result's type constraint.
-  TypeConstraint getResultTypeConstraint(int index) const;
-  // Returns the `index`-th result's name.
-  StringRef getResultName(int index) const;
-
-  // Returns the number of variadic results in this operation.
-  unsigned getNumVariadicResults() const;
-
-  // Op attribute iterators.
-  using attribute_iterator = const NamedAttribute *;
-  attribute_iterator attribute_begin() const;
-  attribute_iterator attribute_end() const;
-  llvm::iterator_range<attribute_iterator> getAttributes() const;
-
-  int getNumAttributes() const { return attributes.size(); }
-  int getNumNativeAttributes() const { return numNativeAttributes; }
-
-  // Op attribute accessors.
-  NamedAttribute &getAttribute(int index) { return attributes[index]; }
-
-  // Op operand iterators.
-  value_iterator operand_begin();
-  value_iterator operand_end();
-  value_range getOperands();
-
-  int getNumOperands() const { return operands.size(); }
-  NamedTypeConstraint &getOperand(int index) { return operands[index]; }
-  const NamedTypeConstraint &getOperand(int index) const {
-    return operands[index];
-  }
-
-  // Returns the number of variadic operands in this operation.
-  unsigned getNumVariadicOperands() const;
-
-  // Returns the total number of arguments.
-  int getNumArgs() const { return arguments.size(); }
-
-  using arg_iterator = const Argument *;
-  using arg_range = llvm::iterator_range<arg_iterator>;
-
-  // Op argument (attribute or operand) iterators.
-  arg_iterator arg_begin() const;
-  arg_iterator arg_end() const;
-  arg_range getArgs() const;
-
-  // Op argument (attribute or operand) accessors.
-  Argument getArg(int index) const;
-  StringRef getArgName(int index) const;
-
-  // Returns the trait wrapper for the given MLIR C++ `trait`.
-  // TODO: We should add a C++ wrapper class for TableGen OpTrait instead of
-  // requiring the raw MLIR trait here.
-  const OpTrait *getTrait(llvm::StringRef trait) const;
-
-  // Returns the number of regions.
-  unsigned getNumRegions() const;
-  // Returns the `index`-th region.
-  const NamedRegion &getRegion(unsigned index) const;
-
-  // Trait.
-  using const_trait_iterator = const OpTrait *;
-  const_trait_iterator trait_begin() const;
-  const_trait_iterator trait_end() const;
-  llvm::iterator_range<const_trait_iterator> getTraits() const;
-
-  ArrayRef<llvm::SMLoc> getLoc() const;
-
-  // Query functions for the documentation of the operator.
-  bool hasDescription() const;
-  StringRef getDescription() const;
-  bool hasSummary() const;
-  StringRef getSummary() const;
-
-  // Returns this op's extra class declaration code.
-  StringRef getExtraClassDeclaration() const;
-
-  // Returns the Tablegen definition this operator was constructed from.
-  // TODO(antiagainst,zinenko): do not expose the TableGen record, this is a
-  // temporary solution to OpEmitter requiring a Record because Operator does
-  // not provide enough methods.
-  const llvm::Record &getDef() const;
-
-  // Returns the dialect of the op.
-  const Dialect &getDialect() const { return dialect; }
-
-  // Prints the contents in this operator to the given `os`. This is used for
-  // debugging purposes.
-  void print(llvm::raw_ostream &os) const;
-
-private:
-  // Populates the vectors containing operands, attributes, results and traits.
-  void populateOpStructure();
-
-  // The dialect of this op.
-  Dialect dialect;
-
-  // The unqualified C++ class name of the op.
-  StringRef cppClassName;
-
-  // The operands of the op.
-  SmallVector<NamedTypeConstraint, 4> operands;
-
-  // The attributes of the op.  Contains native attributes (corresponding to the
-  // actual stored attributed of the operation) followed by derived attributes
-  // (corresponding to dynamic properties of the operation that are computed
-  // upon request).
-  SmallVector<NamedAttribute, 4> attributes;
-
-  // The arguments of the op (operands and native attributes).
-  SmallVector<Argument, 4> arguments;
-
-  // The results of the op.
-  SmallVector<NamedTypeConstraint, 4> results;
-
-  // The traits of the op.
-  SmallVector<OpTrait, 4> traits;
-
-  // The regions of this op.
-  SmallVector<NamedRegion, 1> regions;
-
-  // The number of native attributes stored in the leading positions of
-  // `attributes`.
-  int numNativeAttributes;
-
-  // The TableGen definition of this op.
-  const llvm::Record &def;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_OPERATOR_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Pattern.h b/third_party/mlir/include/mlir/TableGen/Pattern.h
deleted file mode 100644
index e9456f4ba07..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Pattern.h
+++ /dev/null
@@ -1,417 +0,0 @@
-//===- Pattern.h - Pattern wrapper class ------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Pattern wrapper class to simplify using TableGen Record defining a MLIR
-// Pattern.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_PATTERN_H_
-#define MLIR_TABLEGEN_PATTERN_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/TableGen/Argument.h"
-#include "mlir/TableGen/Operator.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSet.h"
-
-namespace llvm {
-class DagInit;
-class Init;
-class Record;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// Mapping from TableGen Record to Operator wrapper object.
-//
-// We allocate each wrapper object in heap to make sure the pointer to it is
-// valid throughout the lifetime of this map. This is important because this map
-// is shared among multiple patterns to avoid creating the wrapper object for
-// the same op again and again. But this map will continuously grow.
-using RecordOperatorMap =
-    llvm::DenseMap<const llvm::Record *, std::unique_ptr<Operator>>;
-
-class Pattern;
-
-// Wrapper class providing helper methods for accessing TableGen DAG leaves
-// used inside Patterns. This class is lightweight and designed to be used like
-// values.
-//
-// A TableGen DAG construct is of the syntax
-//   `(operator, arg0, arg1, ...)`.
-//
-// This class provides getters to retrieve `arg*` as tblgen:: wrapper objects
-// for handy helper methods. It only works on `arg*`s that are not nested DAG
-// constructs.
-class DagLeaf {
-public:
-  explicit DagLeaf(const llvm::Init *def) : def(def) {}
-
-  // Returns true if this DAG leaf is not specified in the pattern. That is, it
-  // places no further constraints/transforms and just carries over the original
-  // value.
-  bool isUnspecified() const;
-
-  // Returns true if this DAG leaf is matching an operand. That is, it specifies
-  // a type constraint.
-  bool isOperandMatcher() const;
-
-  // Returns true if this DAG leaf is matching an attribute. That is, it
-  // specifies an attribute constraint.
-  bool isAttrMatcher() const;
-
-  // Returns true if this DAG leaf is wrapping native code call.
-  bool isNativeCodeCall() const;
-
-  // Returns true if this DAG leaf is specifying a constant attribute.
-  bool isConstantAttr() const;
-
-  // Returns true if this DAG leaf is specifying an enum attribute case.
-  bool isEnumAttrCase() const;
-
-  // Returns this DAG leaf as a constraint. Asserts if fails.
-  Constraint getAsConstraint() const;
-
-  // Returns this DAG leaf as an constant attribute. Asserts if fails.
-  ConstantAttr getAsConstantAttr() const;
-
-  // Returns this DAG leaf as an enum attribute case.
-  // Precondition: isEnumAttrCase()
-  EnumAttrCase getAsEnumAttrCase() const;
-
-  // Returns the matching condition template inside this DAG leaf. Assumes the
-  // leaf is an operand/attribute matcher and asserts otherwise.
-  std::string getConditionTemplate() const;
-
-  // Returns the native code call template inside this DAG leaf.
-  // Precondition: isNativeCodeCall()
-  StringRef getNativeCodeTemplate() const;
-
-  void print(raw_ostream &os) const;
-
-private:
-  // Returns true if the TableGen Init `def` in this DagLeaf is a DefInit and
-  // also a subclass of the given `superclass`.
-  bool isSubClassOf(StringRef superclass) const;
-
-  const llvm::Init *def;
-};
-
-// Wrapper class providing helper methods for accessing TableGen DAG constructs
-// used inside Patterns. This class is lightweight and designed to be used like
-// values.
-//
-// A TableGen DAG construct is of the syntax
-//   `(operator, arg0, arg1, ...)`.
-//
-// When used inside Patterns, `operator` corresponds to some dialect op, or
-// a known list of verbs that defines special transformation actions. This
-// `arg*` can be a nested DAG construct. This class provides getters to
-// retrieve `operator` and `arg*` as tblgen:: wrapper objects for handy helper
-// methods.
-//
-// A null DagNode contains a nullptr and converts to false implicitly.
-class DagNode {
-public:
-  explicit DagNode(const llvm::DagInit *node) : node(node) {}
-
-  // Implicit bool converter that returns true if this DagNode is not a null
-  // DagNode.
-  operator bool() const { return node != nullptr; }
-
-  // Returns the symbol bound to this DAG node.
-  StringRef getSymbol() const;
-
-  // Returns the operator wrapper object corresponding to the dialect op matched
-  // by this DAG. The operator wrapper will be queried from the given `mapper`
-  // and created in it if not existing.
-  Operator &getDialectOp(RecordOperatorMap *mapper) const;
-
-  // Returns the number of operations recursively involved in the DAG tree
-  // rooted from this node.
-  int getNumOps() const;
-
-  // Returns the number of immediate arguments to this DAG node.
-  int getNumArgs() const;
-
-  // Returns true if the `index`-th argument is a nested DAG construct.
-  bool isNestedDagArg(unsigned index) const;
-
-  // Gets the `index`-th argument as a nested DAG construct if possible. Returns
-  // null DagNode otherwise.
-  DagNode getArgAsNestedDag(unsigned index) const;
-
-  // Gets the `index`-th argument as a DAG leaf.
-  DagLeaf getArgAsLeaf(unsigned index) const;
-
-  // Returns the specified name of the `index`-th argument.
-  StringRef getArgName(unsigned index) const;
-
-  // Returns true if this DAG construct means to replace with an existing SSA
-  // value.
-  bool isReplaceWithValue() const;
-
-  // Returns true if this DAG node is wrapping native code call.
-  bool isNativeCodeCall() const;
-
-  // Returns true if this DAG node is an operation.
-  bool isOperation() const;
-
-  // Returns the native code call template inside this DAG node.
-  // Precondition: isNativeCodeCall()
-  StringRef getNativeCodeTemplate() const;
-
-  void print(raw_ostream &os) const;
-
-private:
-  const llvm::DagInit *node; // nullptr means null DagNode
-};
-
-// A class for maintaining information for symbols bound in patterns and
-// provides methods for resolving them according to specific use cases.
-//
-// Symbols can be bound to
-//
-// * Op arguments and op results in the source pattern and
-// * Op results in result patterns.
-//
-// Symbols can be referenced in result patterns and additional constraints to
-// the pattern.
-//
-// For example, in
-//
-// ```
-// def : Pattern<
-//     (SrcOp:$results1 $arg0, %arg1),
-//     [(ResOp1:$results2), (ResOp2 $results2 (ResOp3 $arg0, $arg1))]>;
-// ```
-//
-// `$argN` is bound to the `SrcOp`'s N-th argument. `$results1` is bound to
-// `SrcOp`. `$results2` is bound to `ResOp1`. $result2 is referenced to build
-// `ResOp2`. `$arg0` and `$arg1` are referenced to build `ResOp3`.
-//
-// If a symbol binds to a multi-result op and it does not have the `__N`
-// suffix, the symbol is expanded to represent all results generated by the
-// multi-result op. If the symbol has a `__N` suffix, then it will expand to
-// only the N-th *static* result as declared in ODS, and that can still
-// corresponds to multiple *dynamic* values if the N-th *static* result is
-// variadic.
-//
-// This class keeps track of such symbols and resolves them into their bound
-// values in a suitable way.
-class SymbolInfoMap {
-public:
-  explicit SymbolInfoMap(ArrayRef<llvm::SMLoc> loc) : loc(loc) {}
-
-  // Class for information regarding a symbol.
-  class SymbolInfo {
-  public:
-    // Returns a string for defining a variable named as `name` to store the
-    // value bound by this symbol.
-    std::string getVarDecl(StringRef name) const;
-
-  private:
-    // Allow SymbolInfoMap to access private methods.
-    friend class SymbolInfoMap;
-
-    // What kind of entity this symbol represents:
-    // * Attr: op attribute
-    // * Operand: op operand
-    // * Result: op result
-    // * Value: a value not attached to an op (e.g., from NativeCodeCall)
-    enum class Kind : uint8_t { Attr, Operand, Result, Value };
-
-    // Creates a SymbolInfo instance. `index` is only used for `Attr` and
-    // `Operand` so should be negative for `Result` and `Value` kind.
-    SymbolInfo(const Operator *op, Kind kind, Optional<int> index);
-
-    // Static methods for creating SymbolInfo.
-    static SymbolInfo getAttr(const Operator *op, int index) {
-      return SymbolInfo(op, Kind::Attr, index);
-    }
-    static SymbolInfo getOperand(const Operator *op, int index) {
-      return SymbolInfo(op, Kind::Operand, index);
-    }
-    static SymbolInfo getResult(const Operator *op) {
-      return SymbolInfo(op, Kind::Result, llvm::None);
-    }
-    static SymbolInfo getValue() {
-      return SymbolInfo(nullptr, Kind::Value, llvm::None);
-    }
-
-    // Returns the number of static values this symbol corresponds to.
-    // A static value is an operand/result declared in ODS. Normally a symbol
-    // only represents one static value, but symbols bound to op results can
-    // represent more than one if the op is a multi-result op.
-    int getStaticValueCount() const;
-
-    // Returns a string containing the C++ expression for referencing this
-    // symbol as a value (if this symbol represents one static value) or a value
-    // range (if this symbol represents multiple static values). `name` is the
-    // name of the C++ variable that this symbol bounds to. `index` should only
-    // be used for indexing results.  `fmt` is used to format each value.
-    // `separator` is used to separate values if this is a value range.
-    std::string getValueAndRangeUse(StringRef name, int index, const char *fmt,
-                                    const char *separator) const;
-
-    // Returns a string containing the C++ expression for referencing this
-    // symbol as a value range regardless of how many static values this symbol
-    // represents. `name` is the name of the C++ variable that this symbol
-    // bounds to. `index` should only be used for indexing results. `fmt` is
-    // used to format each value. `separator` is used to separate values in the
-    // range.
-    std::string getAllRangeUse(StringRef name, int index, const char *fmt,
-                               const char *separator) const;
-
-    const Operator *op; // The op where the bound entity belongs
-    Kind kind;          // The kind of the bound entity
-    // The argument index (for `Attr` and `Operand` only)
-    Optional<int> argIndex;
-  };
-
-  using BaseT = llvm::StringMap<SymbolInfo>;
-
-  // Iterators for accessing all symbols.
-  using iterator = BaseT::iterator;
-  iterator begin() { return symbolInfoMap.begin(); }
-  iterator end() { return symbolInfoMap.end(); }
-
-  // Const iterators for accessing all symbols.
-  using const_iterator = BaseT::const_iterator;
-  const_iterator begin() const { return symbolInfoMap.begin(); }
-  const_iterator end() const { return symbolInfoMap.end(); }
-
-  // Binds the given `symbol` to the `argIndex`-th argument to the given `op`.
-  // Returns false if `symbol` is already bound.
-  bool bindOpArgument(StringRef symbol, const Operator &op, int argIndex);
-
-  // Binds the given `symbol` to the results the given `op`. Returns false if
-  // `symbol` is already bound.
-  bool bindOpResult(StringRef symbol, const Operator &op);
-
-  // Registers the given `symbol` as bound to a value. Returns false if `symbol`
-  // is already bound.
-  bool bindValue(StringRef symbol);
-
-  // Returns true if the given `symbol` is bound.
-  bool contains(StringRef symbol) const;
-
-  // Returns an iterator to the information of the given symbol named as `key`.
-  const_iterator find(StringRef key) const;
-
-  // Returns the number of static values of the given `symbol` corresponds to.
-  // A static value is a operand/result declared in ODS. Normally a symbol only
-  // represents one static value, but symbols bound to op results can represent
-  // more than one if the op is a multi-result op.
-  int getStaticValueCount(StringRef symbol) const;
-
-  // Returns a string containing the C++ expression for referencing this
-  // symbol as a value (if this symbol represents one static value) or a value
-  // range (if this symbol represents multiple static values). `fmt` is used to
-  // format each value. `separator` is used to separate values if `symbol`
-  // represents a value range.
-  std::string getValueAndRangeUse(StringRef symbol, const char *fmt = "{0}",
-                                  const char *separator = ", ") const;
-
-  // Returns a string containing the C++ expression for referencing this
-  // symbol as a value range regardless of how many static values this symbol
-  // represents. `fmt` is used to format each value. `separator` is used to
-  // separate values in the range.
-  std::string getAllRangeUse(StringRef symbol, const char *fmt = "{0}",
-                             const char *separator = ", ") const;
-
-  // Splits the given `symbol` into a value pack name and an index. Returns the
-  // value pack name and writes the index to `index` on success. Returns
-  // `symbol` itself if it does not contain an index.
-  //
-  // We can use `name__N` to access the `N`-th value in the value pack bound to
-  // `name`. `name` is typically the results of an multi-result op.
-  static StringRef getValuePackName(StringRef symbol, int *index = nullptr);
-
-private:
-  llvm::StringMap<SymbolInfo> symbolInfoMap;
-
-  // Pattern instantiation location. This is intended to be used as parameter
-  // to PrintFatalError() to report errors.
-  ArrayRef<llvm::SMLoc> loc;
-};
-
-// Wrapper class providing helper methods for accessing MLIR Pattern defined
-// in TableGen. This class should closely reflect what is defined as class
-// `Pattern` in TableGen. This class contains maps so it is not intended to be
-// used as values.
-class Pattern {
-public:
-  explicit Pattern(const llvm::Record *def, RecordOperatorMap *mapper);
-
-  // Returns the source pattern to match.
-  DagNode getSourcePattern() const;
-
-  // Returns the number of result patterns generated by applying this rewrite
-  // rule.
-  int getNumResultPatterns() const;
-
-  // Returns the DAG tree root node of the `index`-th result pattern.
-  DagNode getResultPattern(unsigned index) const;
-
-  // Collects all symbols bound in the source pattern into `infoMap`.
-  void collectSourcePatternBoundSymbols(SymbolInfoMap &infoMap);
-
-  // Collects all symbols bound in result patterns into `infoMap`.
-  void collectResultPatternBoundSymbols(SymbolInfoMap &infoMap);
-
-  // Returns the op that the root node of the source pattern matches.
-  const Operator &getSourceRootOp();
-
-  // Returns the operator wrapper object corresponding to the given `node`'s DAG
-  // operator.
-  Operator &getDialectOp(DagNode node);
-
-  // Returns the constraints.
-  std::vector<AppliedConstraint> getConstraints() const;
-
-  // Returns the benefit score of the pattern.
-  int getBenefit() const;
-
-  using IdentifierLine = std::pair<StringRef, unsigned>;
-
-  // Returns the file location of the pattern (buffer identifier + line number
-  // pair).
-  std::vector<IdentifierLine> getLocation() const;
-
-private:
-  // Recursively collects all bound symbols inside the DAG tree rooted
-  // at `tree` and updates the given `infoMap`.
-  void collectBoundSymbols(DagNode tree, SymbolInfoMap &infoMap,
-                           bool isSrcPattern);
-
-  // The TableGen definition of this pattern.
-  const llvm::Record &def;
-
-  // All operators.
-  // TODO(antiagainst): we need a proper context manager, like MLIRContext,
-  // for managing the lifetime of shared entities.
-  RecordOperatorMap *recordOpMap;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_PATTERN_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Predicate.h b/third_party/mlir/include/mlir/TableGen/Predicate.h
deleted file mode 100644
index 49f7ebcfe52..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Predicate.h
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- Predicate.h - Predicate class ----------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Wrapper around predicates defined in TableGen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_PREDICATE_H_
-#define MLIR_TABLEGEN_PREDICATE_H_
-
-#include "mlir/Support/LLVM.h"
-
-#include <string>
-#include <vector>
-
-namespace llvm {
-class Init;
-class ListInit;
-class Record;
-class SMLoc;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// A logical predicate.  This class must closely follow the definition of
-// TableGen class 'Pred'.
-class Pred {
-public:
-  // Constructs the null Predicate (e.g., always true).
-  explicit Pred() : def(nullptr) {}
-  // Construct a Predicate from a record.
-  explicit Pred(const llvm::Record *record);
-  // Construct a Predicate from an initializer.
-  explicit Pred(const llvm::Init *init);
-
-  // Check if the predicate is defined.  Callers may use this to interpret the
-  // missing predicate as either true (e.g. in filters) or false (e.g. in
-  // precondition verification).
-  bool isNull() const { return def == nullptr; }
-
-  // Get the predicate condition.  This may dispatch to getConditionImpl() of
-  // the underlying predicate type.
-  std::string getCondition() const;
-
-  // Whether the predicate is a combination of other predicates, i.e. an
-  // record of type CombinedPred.
-  bool isCombined() const;
-
-  // Records are pointer-comparable.
-  bool operator==(const Pred &other) const { return def == other.def; }
-
-  // Get the location of the predicate.
-  ArrayRef<llvm::SMLoc> getLoc() const;
-
-protected:
-  // The TableGen definition of this predicate.
-  const llvm::Record *def;
-};
-
-// A logical predicate wrapping a C expression.  This class must closely follow
-// the definition of TableGen class 'CPred'.
-class CPred : public Pred {
-public:
-  // Construct a CPred from a record.
-  explicit CPred(const llvm::Record *record);
-  // Construct a CPred an initializer.
-  explicit CPred(const llvm::Init *init);
-
-  // Get the predicate condition.
-  std::string getConditionImpl() const;
-};
-
-// A logical predicate that is a combination of other predicates.  This class
-// must closely follow the definition of TableGen class 'CombinedPred'.
-class CombinedPred : public Pred {
-public:
-  // Construct a CombinedPred from a record.
-  explicit CombinedPred(const llvm::Record *record);
-  // Construct a CombinedPred from an initializer.
-  explicit CombinedPred(const llvm::Init *init);
-
-  // Get the predicate condition.
-  std::string getConditionImpl() const;
-
-  // Get the definition of the combiner used in this predicate.
-  const llvm::Record *getCombinerDef() const;
-
-  // Get the predicates that are combined by this predicate.
-  const std::vector<llvm::Record *> getChildren() const;
-};
-
-// A combined predicate that requires all child predicates of 'CPred' type to
-// have their expression rewritten with a simple string substitution rule.
-class SubstLeavesPred : public CombinedPred {
-public:
-  // Get the replacement pattern.
-  StringRef getPattern() const;
-  // Get the string used to replace the pattern.
-  StringRef getReplacement() const;
-};
-
-// A combined predicate that prepends a prefix and appends a suffix to the
-// predicate string composed from a child predicate.
-class ConcatPred : public CombinedPred {
-public:
-  StringRef getPrefix() const;
-  StringRef getSuffix() const;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_PREDICATE_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Region.h b/third_party/mlir/include/mlir/TableGen/Region.h
deleted file mode 100644
index 21dffe687f4..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Region.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- TGRegion.h - TableGen region definitions -----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_TABLEGEN_REGION_H_
-#define MLIR_TABLEGEN_REGION_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/TableGen/Constraint.h"
-
-namespace mlir {
-namespace tblgen {
-
-// Wrapper class providing helper methods for accessing Region defined in
-// TableGen.
-class Region : public Constraint {
-public:
-  using Constraint::Constraint;
-
-  static bool classof(const Constraint *c) { return c->getKind() == CK_Region; }
-};
-
-// A struct bundling a region's constraint and its name.
-struct NamedRegion {
-  StringRef name;
-  Region constraint;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_REGION_H_
diff --git a/third_party/mlir/include/mlir/TableGen/Type.h b/third_party/mlir/include/mlir/TableGen/Type.h
deleted file mode 100644
index 03cbd104dc1..00000000000
--- a/third_party/mlir/include/mlir/TableGen/Type.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- Type.h - Type class --------------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Type wrapper to simplify using TableGen Record defining a MLIR Type.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TABLEGEN_TYPE_H_
-#define MLIR_TABLEGEN_TYPE_H_
-
-#include "mlir/Support/LLVM.h"
-#include "mlir/TableGen/Constraint.h"
-#include "mlir/TableGen/Dialect.h"
-
-namespace llvm {
-class DefInit;
-class Record;
-} // end namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// Wrapper class with helper methods for accessing Type constraints defined in
-// TableGen.
-class TypeConstraint : public Constraint {
-public:
-  explicit TypeConstraint(const llvm::Record *record);
-  explicit TypeConstraint(const llvm::DefInit *init);
-
-  static bool classof(const Constraint *c) { return c->getKind() == CK_Type; }
-
-  // Returns true if this is a variadic type constraint.
-  bool isVariadic() const;
-};
-
-// Wrapper class with helper methods for accessing Types defined in TableGen.
-class Type : public TypeConstraint {
-public:
-  explicit Type(const llvm::Record *record);
-
-  // Returns the description of the type.
-  StringRef getTypeDescription() const;
-
-  // Returns the dialect for the type if defined.
-  Dialect getDialect() const;
-};
-
-} // end namespace tblgen
-} // end namespace mlir
-
-#endif // MLIR_TABLEGEN_TYPE_H_
diff --git a/third_party/mlir/include/mlir/Target/LLVMIR.h b/third_party/mlir/include/mlir/Target/LLVMIR.h
deleted file mode 100644
index 7ed7b39c4db..00000000000
--- a/third_party/mlir/include/mlir/Target/LLVMIR.h
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- LLVMIR.h - MLIR to LLVM IR conversion --------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the entry point for the MLIR to LLVM IR conversion.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TARGET_LLVMIR_H
-#define MLIR_TARGET_LLVMIR_H
-
-#include <memory>
-
-// Forward-declare LLVM classes.
-namespace llvm {
-class LLVMContext;
-class Module;
-} // namespace llvm
-
-namespace mlir {
-
-class OwningModuleRef;
-class MLIRContext;
-class ModuleOp;
-
-/// Convert the given MLIR module into LLVM IR.  The LLVM context is extracted
-/// from the registered LLVM IR dialect.  In case of error, report it
-/// to the error handler registered with the MLIR context, if any (obtained from
-/// the MLIR module), and return `nullptr`.
-std::unique_ptr<llvm::Module> translateModuleToLLVMIR(ModuleOp m);
-
-/// Convert the given LLVM module into MLIR's LLVM dialect.  The LLVM context is
-/// extracted from the registered LLVM IR dialect. In case of error, report it
-/// to the error handler registered with the MLIR context, if any (obtained from
-/// the MLIR module), and return `{}`.
-OwningModuleRef
-translateLLVMIRToModule(std::unique_ptr<llvm::Module> llvmModule,
-                        MLIRContext *context);
-
-} // namespace mlir
-
-#endif // MLIR_TARGET_LLVMIR_H
diff --git a/third_party/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/third_party/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
deleted file mode 100644
index 288901221db..00000000000
--- a/third_party/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ /dev/null
@@ -1,123 +0,0 @@
-//===- ModuleTranslation.h - MLIR to LLVM conversion ------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the translation between an MLIR LLVM dialect module and
-// the corresponding LLVMIR module. It only handles core LLVM IR operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TARGET_LLVMIR_MODULETRANSLATION_H
-#define MLIR_TARGET_LLVMIR_MODULETRANSLATION_H
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Value.h"
-
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Value.h"
-
-namespace mlir {
-class Attribute;
-class Location;
-class ModuleOp;
-class Operation;
-
-namespace LLVM {
-
-class LLVMFuncOp;
-
-// Implementation class for module translation.  Holds a reference to the module
-// being translated, and the mappings between the original and the translated
-// functions, basic blocks and values.  It is practically easier to hold these
-// mappings in one class since the conversion of control flow operations
-// needs to look up block and function mappings.
-class ModuleTranslation {
-public:
-  template <typename T = ModuleTranslation>
-  static std::unique_ptr<llvm::Module> translateModule(Operation *m) {
-    if (!satisfiesLLVMModule(m))
-      return nullptr;
-    if (failed(checkSupportedModuleOps(m)))
-      return nullptr;
-    auto llvmModule = prepareLLVMModule(m);
-    if (!llvmModule)
-      return nullptr;
-
-    T translator(m);
-    translator.llvmModule = std::move(llvmModule);
-    translator.convertGlobals();
-    if (failed(translator.convertFunctions()))
-      return nullptr;
-
-    return std::move(translator.llvmModule);
-  }
-
-  /// A helper method to get the single Block in an operation honoring LLVM's
-  /// module requirements.
-  static Block &getModuleBody(Operation *m) { return m->getRegion(0).front(); }
-
-protected:
-  // Translate the given MLIR module expressed in MLIR LLVM IR dialect into an
-  // LLVM IR module.  The MLIR LLVM IR dialect holds a pointer to an
-  // LLVMContext, the LLVM IR module will be created in that context.
-  explicit ModuleTranslation(Operation *module) : mlirModule(module) {
-    assert(satisfiesLLVMModule(mlirModule) &&
-           "mlirModule should honor LLVM's module semantics.");
-  }
-  virtual ~ModuleTranslation() {}
-
-  virtual LogicalResult convertOperation(Operation &op,
-                                         llvm::IRBuilder<> &builder);
-  static std::unique_ptr<llvm::Module> prepareLLVMModule(Operation *m);
-
-  template <typename Range>
-  SmallVector<llvm::Value *, 8> lookupValues(Range &&values);
-
-private:
-  /// Check whether the module contains only supported ops directly in its body.
-  static LogicalResult checkSupportedModuleOps(Operation *m);
-
-  LogicalResult convertFunctions();
-  void convertGlobals();
-  LogicalResult convertOneFunction(LLVMFuncOp func);
-  void connectPHINodes(LLVMFuncOp func);
-  LogicalResult convertBlock(Block &bb, bool ignoreArguments);
-
-  llvm::Constant *getLLVMConstant(llvm::Type *llvmType, Attribute attr,
-                                  Location loc);
-
-  // Original and translated module.
-  Operation *mlirModule;
-  std::unique_ptr<llvm::Module> llvmModule;
-
-  // Mappings between llvm.mlir.global definitions and corresponding globals.
-  llvm::DenseMap<Operation *, llvm::GlobalValue *> globalsMapping;
-
-protected:
-  // Mappings between original and translated values, used for lookups.
-  llvm::StringMap<llvm::Function *> functionMapping;
-  llvm::DenseMap<Value *, llvm::Value *> valueMapping;
-  llvm::DenseMap<Block *, llvm::BasicBlock *> blockMapping;
-};
-
-} // namespace LLVM
-} // namespace mlir
-
-#endif // MLIR_TARGET_LLVMIR_MODULETRANSLATION_H
diff --git a/third_party/mlir/include/mlir/Target/NVVMIR.h b/third_party/mlir/include/mlir/Target/NVVMIR.h
deleted file mode 100644
index ec9858e0fd7..00000000000
--- a/third_party/mlir/include/mlir/Target/NVVMIR.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//===- NVVMIR.h - MLIR to LLVM + NVVM IR conversion -------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the entry point for the MLIR to LLVM + NVVM IR conversion.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TARGET_NVVMIR_H
-#define MLIR_TARGET_NVVMIR_H
-
-#include <memory>
-
-// Forward-declare LLVM classes.
-namespace llvm {
-class Module;
-} // namespace llvm
-
-namespace mlir {
-class Operation;
-
-/// Convert the given LLVM-module-like operation into NVVM IR. This conversion
-/// requires the registration of the LLVM IR dialect and will extract the LLVM
-/// context from the registered LLVM IR dialect.  In case of error, report it to
-/// the error handler registered with the MLIR context, if any (obtained from
-/// the MLIR module), and return `nullptr`.
-std::unique_ptr<llvm::Module> translateModuleToNVVMIR(Operation *m);
-
-} // namespace mlir
-
-#endif // MLIR_TARGET_NVVMIR_H
diff --git a/third_party/mlir/include/mlir/Target/ROCDLIR.h b/third_party/mlir/include/mlir/Target/ROCDLIR.h
deleted file mode 100644
index fd00e9458ef..00000000000
--- a/third_party/mlir/include/mlir/Target/ROCDLIR.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- ROCDLIR.h - MLIR to LLVM + ROCDL IR conversion -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the entry point for the MLIR to LLVM + ROCDL IR
-// conversion.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TARGET_ROCDLIR_H
-#define MLIR_TARGET_ROCDLIR_H
-
-#include <memory>
-
-// Forward-declare LLVM classes.
-namespace llvm {
-class Module;
-} // namespace llvm
-
-namespace mlir {
-class Operation;
-
-/// Convert the given LLVM-module-like operation into ROCDL IR. This conversion
-/// requires the registration of the LLVM IR dialect and will extract the LLVM
-/// context from the registered LLVM IR dialect.  In case of error, report it to
-/// the error handler registered with the MLIR context, if any (obtained from
-/// the MLIR module), and return `nullptr`.
-std::unique_ptr<llvm::Module> translateModuleToROCDLIR(Operation *m);
-
-} // namespace mlir
-
-#endif // MLIR_TARGET_ROCDLIR_H
diff --git a/third_party/mlir/include/mlir/Transforms/CMakeLists.txt b/third_party/mlir/include/mlir/Transforms/CMakeLists.txt
deleted file mode 100644
index 9ac003a4140..00000000000
--- a/third_party/mlir/include/mlir/Transforms/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS LoopLikeInterface.td)
-mlir_tablegen(LoopLikeInterface.h.inc -gen-op-interface-decls)
-mlir_tablegen(LoopLikeInterface.cpp.inc -gen-op-interface-defs)
-add_public_tablegen_target(MLIRLoopLikeInterfaceIncGen)
-
-
diff --git a/third_party/mlir/include/mlir/Transforms/DialectConversion.h b/third_party/mlir/include/mlir/Transforms/DialectConversion.h
deleted file mode 100644
index 249b4c114c9..00000000000
--- a/third_party/mlir/include/mlir/Transforms/DialectConversion.h
+++ /dev/null
@@ -1,675 +0,0 @@
-//===- DialectConversion.h - MLIR dialect conversion pass -------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares a generic pass for converting between MLIR dialects.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_DIALECTCONVERSION_H_
-#define MLIR_TRANSFORMS_DIALECTCONVERSION_H_
-
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/StringMap.h"
-
-namespace mlir {
-
-// Forward declarations.
-class Block;
-class ConversionPatternRewriter;
-class FuncOp;
-class MLIRContext;
-class Operation;
-class Type;
-class Value;
-
-//===----------------------------------------------------------------------===//
-// Type Conversion
-//===----------------------------------------------------------------------===//
-
-/// Base class for type conversion interface. Specific converters must
-/// derive this class and implement the pure virtual functions.
-class TypeConverter {
-public:
-  virtual ~TypeConverter() = default;
-
-  /// This class provides all of the information necessary to convert a type
-  /// signature.
-  class SignatureConversion {
-  public:
-    SignatureConversion(unsigned numOrigInputs)
-        : remappedInputs(numOrigInputs) {}
-
-    /// This struct represents a range of new types or a single value that
-    /// remaps an existing signature input.
-    struct InputMapping {
-      size_t inputNo, size;
-      Value *replacementValue;
-    };
-
-    /// Return the argument types for the new signature.
-    ArrayRef<Type> getConvertedTypes() const { return argTypes; }
-
-    /// Get the input mapping for the given argument.
-    Optional<InputMapping> getInputMapping(unsigned input) const {
-      return remappedInputs[input];
-    }
-
-    //===------------------------------------------------------------------===//
-    // Conversion Hooks
-    //===------------------------------------------------------------------===//
-
-    /// Remap an input of the original signature with a new set of types. The
-    /// new types are appended to the new signature conversion.
-    void addInputs(unsigned origInputNo, ArrayRef<Type> types);
-
-    /// Append new input types to the signature conversion, this should only be
-    /// used if the new types are not intended to remap an existing input.
-    void addInputs(ArrayRef<Type> types);
-
-    /// Remap an input of the original signature with a range of types in the
-    /// new signature.
-    void remapInput(unsigned origInputNo, unsigned newInputNo,
-                    unsigned newInputCount = 1);
-
-    /// Remap an input of the original signature to another `replacement`
-    /// value. This drops the original argument.
-    void remapInput(unsigned origInputNo, Value *replacement);
-
-  private:
-    /// The remapping information for each of the original arguments.
-    SmallVector<llvm::Optional<InputMapping>, 4> remappedInputs;
-
-    /// The set of new argument types.
-    SmallVector<Type, 4> argTypes;
-  };
-
-  /// This hook allows for converting a type. This function should return
-  /// failure if no valid conversion exists, success otherwise. If the new set
-  /// of types is empty, the type is removed and any usages of the existing
-  /// value are expected to be removed during conversion.
-  virtual LogicalResult convertType(Type t, SmallVectorImpl<Type> &results);
-
-  /// This hook simplifies defining 1-1 type conversions. This function returns
-  /// the type to convert to on success, and a null type on failure.
-  virtual Type convertType(Type t) { return t; }
-
-  /// Convert the given set of types, filling 'results' as necessary. This
-  /// returns failure if the conversion of any of the types fails, success
-  /// otherwise.
-  LogicalResult convertTypes(ArrayRef<Type> types,
-                             SmallVectorImpl<Type> &results);
-
-  /// Return true if the given type is legal for this type converter, i.e. the
-  /// type converts to itself.
-  bool isLegal(Type type);
-
-  /// Return true if the inputs and outputs of the given function type are
-  /// legal.
-  bool isSignatureLegal(FunctionType funcType);
-
-  /// This hook allows for converting a specific argument of a signature. It
-  /// takes as inputs the original argument input number, type.
-  /// On success, this function should populate 'result' with any new mappings.
-  virtual LogicalResult convertSignatureArg(unsigned inputNo, Type type,
-                                            SignatureConversion &result);
-
-  /// This function converts the type signature of the given block, by invoking
-  /// 'convertSignatureArg' for each argument. This function should return a
-  /// valid conversion for the signature on success, None otherwise.
-  llvm::Optional<SignatureConversion> convertBlockSignature(Block *block);
-
-  /// This hook allows for materializing a conversion from a set of types into
-  /// one result type by generating a cast operation of some kind. The generated
-  /// operation should produce one result, of 'resultType', with the provided
-  /// 'inputs' as operands. This hook must be overridden when a type conversion
-  /// results in more than one type, or if a type conversion may persist after
-  /// the conversion has finished.
-  virtual Operation *materializeConversion(PatternRewriter &rewriter,
-                                           Type resultType,
-                                           ArrayRef<Value *> inputs,
-                                           Location loc) {
-    llvm_unreachable("expected 'materializeConversion' to be overridden");
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Conversion Patterns
-//===----------------------------------------------------------------------===//
-
-/// Base class for the conversion patterns that require type changes. Specific
-/// conversions must derive this class and implement least one `rewrite` method.
-/// NOTE: These conversion patterns can only be used with the 'apply*' methods
-/// below.
-class ConversionPattern : public RewritePattern {
-public:
-  /// Construct an ConversionPattern.  `rootName` must correspond to the
-  /// canonical name of the first operation matched by the pattern.
-  ConversionPattern(StringRef rootName, PatternBenefit benefit,
-                    MLIRContext *ctx)
-      : RewritePattern(rootName, benefit, ctx) {}
-
-  /// Hook for derived classes to implement rewriting. `op` is the (first)
-  /// operation matched by the pattern, `operands` is a list of rewritten values
-  /// that are passed to this operation, `rewriter` can be used to emit the new
-  /// operations. This function must be reimplemented if the
-  /// ConversionPattern ever needs to replace an operation that does not
-  /// have successors. This function should not fail. If some specific cases of
-  /// the operation are not supported, these cases should not be matched.
-  virtual void rewrite(Operation *op, ArrayRef<Value *> operands,
-                       ConversionPatternRewriter &rewriter) const {
-    llvm_unreachable("unimplemented rewrite");
-  }
-
-  /// Hook for derived classes to implement rewriting. `op` is the (first)
-  /// operation matched by the pattern, `properOperands` is a list of rewritten
-  /// values that are passed to the operation itself, `destinations` is a list
-  /// of (potentially rewritten) successor blocks, `operands` is a list of lists
-  /// of rewritten values passed to each of the successors, co-indexed with
-  /// `destinations`, `rewriter` can be used to emit the new operations. It must
-  /// be reimplemented if the ConversionPattern ever needs to replace a
-  /// terminator operation that has successors. This function should not fail
-  /// the pass. If some specific cases of the operation are not supported,
-  /// these cases should not be matched.
-  virtual void rewrite(Operation *op, ArrayRef<Value *> properOperands,
-                       ArrayRef<Block *> destinations,
-                       ArrayRef<ArrayRef<Value *>> operands,
-                       ConversionPatternRewriter &rewriter) const {
-    llvm_unreachable("unimplemented rewrite for terminators");
-  }
-
-  /// Hook for derived classes to implement combined matching and rewriting.
-  virtual PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> properOperands,
-                  ArrayRef<Block *> destinations,
-                  ArrayRef<ArrayRef<Value *>> operands,
-                  ConversionPatternRewriter &rewriter) const {
-    if (!match(op))
-      return matchFailure();
-    rewrite(op, properOperands, destinations, operands, rewriter);
-    return matchSuccess();
-  }
-
-  /// Hook for derived classes to implement combined matching and rewriting.
-  virtual PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const {
-    if (!match(op))
-      return matchFailure();
-    rewrite(op, operands, rewriter);
-    return matchSuccess();
-  }
-
-  /// Attempt to match and rewrite the IR root at the specified operation.
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const final;
-
-private:
-  using RewritePattern::rewrite;
-};
-
-/// OpConversionPattern is a wrapper around ConversionPattern that allows for
-/// matching and rewriting against an instance of a derived operation class as
-/// opposed to a raw Operation.
-template <typename SourceOp>
-struct OpConversionPattern : public ConversionPattern {
-  OpConversionPattern(MLIRContext *context, PatternBenefit benefit = 1)
-      : ConversionPattern(SourceOp::getOperationName(), benefit, context) {}
-
-  /// Wrappers around the ConversionPattern methods that pass the derived op
-  /// type.
-  void rewrite(Operation *op, ArrayRef<Value *> operands,
-               ConversionPatternRewriter &rewriter) const final {
-    rewrite(llvm::cast<SourceOp>(op), operands, rewriter);
-  }
-  void rewrite(Operation *op, ArrayRef<Value *> properOperands,
-               ArrayRef<Block *> destinations,
-               ArrayRef<ArrayRef<Value *>> operands,
-               ConversionPatternRewriter &rewriter) const final {
-    rewrite(llvm::cast<SourceOp>(op), properOperands, destinations, operands,
-            rewriter);
-  }
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> properOperands,
-                  ArrayRef<Block *> destinations,
-                  ArrayRef<ArrayRef<Value *>> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    return matchAndRewrite(llvm::cast<SourceOp>(op), properOperands,
-                           destinations, operands, rewriter);
-  }
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    return matchAndRewrite(llvm::cast<SourceOp>(op), operands, rewriter);
-  }
-
-  // TODO(b/142763075): Use OperandAdaptor when it supports access to unnamed
-  // operands.
-
-  /// Rewrite and Match methods that operate on the SourceOp type. These must be
-  /// overridden by the derived pattern class.
-  virtual void rewrite(SourceOp op, ArrayRef<Value *> operands,
-                       ConversionPatternRewriter &rewriter) const {
-    llvm_unreachable("must override matchAndRewrite or a rewrite method");
-  }
-
-  virtual void rewrite(SourceOp op, ArrayRef<Value *> properOperands,
-                       ArrayRef<Block *> destinations,
-                       ArrayRef<ArrayRef<Value *>> operands,
-                       ConversionPatternRewriter &rewriter) const {
-    llvm_unreachable("unimplemented rewrite for terminators");
-  }
-
-  virtual PatternMatchResult
-  matchAndRewrite(SourceOp op, ArrayRef<Value *> properOperands,
-                  ArrayRef<Block *> destinations,
-                  ArrayRef<ArrayRef<Value *>> operands,
-                  ConversionPatternRewriter &rewriter) const {
-    if (!match(op))
-      return matchFailure();
-    rewrite(op, properOperands, destinations, operands, rewriter);
-    return matchSuccess();
-  }
-
-  virtual PatternMatchResult
-  matchAndRewrite(SourceOp op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const {
-    if (!match(op))
-      return matchFailure();
-    rewrite(op, operands, rewriter);
-    return matchSuccess();
-  }
-
-private:
-  using ConversionPattern::matchAndRewrite;
-};
-
-/// Add a pattern to the given pattern list to convert the signature of a FuncOp
-/// with the given type converter.
-void populateFuncOpTypeConversionPattern(OwningRewritePatternList &patterns,
-                                         MLIRContext *ctx,
-                                         TypeConverter &converter);
-
-//===----------------------------------------------------------------------===//
-// Conversion PatternRewriter
-//===----------------------------------------------------------------------===//
-
-namespace detail {
-struct ConversionPatternRewriterImpl;
-} // end namespace detail
-
-/// This class implements a pattern rewriter for use with ConversionPatterns. It
-/// extends the base PatternRewriter and provides special conversion specific
-/// hooks.
-class ConversionPatternRewriter final : public PatternRewriter {
-public:
-  ConversionPatternRewriter(MLIRContext *ctx, TypeConverter *converter);
-  ~ConversionPatternRewriter() override;
-
-  /// Apply a signature conversion to the entry block of the given region. This
-  /// replaces the entry block with a new block containing the updated
-  /// signature. The new entry block to the region is returned for convenience.
-  Block *
-  applySignatureConversion(Region *region,
-                           TypeConverter::SignatureConversion &conversion);
-
-  /// Replace all the uses of the block argument `from` with value `to`.
-  void replaceUsesOfBlockArgument(BlockArgument *from, Value *to);
-
-  /// Return the converted value that replaces 'key'. Return 'key' if there is
-  /// no such a converted value.
-  Value *getRemappedValue(Value *key);
-
-  //===--------------------------------------------------------------------===//
-  // PatternRewriter Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// PatternRewriter hook for replacing the results of an operation.
-  void replaceOp(Operation *op, ValueRange newValues,
-                 ValueRange valuesToRemoveIfDead) override;
-  using PatternRewriter::replaceOp;
-
-  /// PatternRewriter hook for erasing a dead operation. The uses of this
-  /// operation *must* be made dead by the end of the conversion process,
-  /// otherwise an assert will be issued.
-  void eraseOp(Operation *op) override;
-
-  /// PatternRewriter hook for splitting a block into two parts.
-  Block *splitBlock(Block *block, Block::iterator before) override;
-
-  /// PatternRewriter hook for merging a block into another.
-  void mergeBlocks(Block *source, Block *dest, ValueRange argValues) override;
-
-  /// PatternRewriter hook for moving blocks out of a region.
-  void inlineRegionBefore(Region &region, Region &parent,
-                          Region::iterator before) override;
-  using PatternRewriter::inlineRegionBefore;
-
-  /// PatternRewriter hook for cloning blocks of one region into another. The
-  /// given region to clone *must* not have been modified as part of conversion
-  /// yet, i.e. it must be within an operation that is either in the process of
-  /// conversion, or has not yet been converted.
-  void cloneRegionBefore(Region &region, Region &parent,
-                         Region::iterator before,
-                         BlockAndValueMapping &mapping) override;
-  using PatternRewriter::cloneRegionBefore;
-
-  /// PatternRewriter hook for inserting a new operation.
-  Operation *insert(Operation *op) override;
-
-  /// PatternRewriter hook for updating the root operation in-place.
-  void notifyRootUpdated(Operation *op) override;
-
-  /// Return a reference to the internal implementation.
-  detail::ConversionPatternRewriterImpl &getImpl();
-
-private:
-  std::unique_ptr<detail::ConversionPatternRewriterImpl> impl;
-};
-
-//===----------------------------------------------------------------------===//
-// ConversionTarget
-//===----------------------------------------------------------------------===//
-
-/// This class describes a specific conversion target.
-class ConversionTarget {
-public:
-  /// This enumeration corresponds to the specific action to take when
-  /// considering an operation legal for this conversion target.
-  enum class LegalizationAction {
-    /// The target supports this operation.
-    Legal,
-
-    /// This operation has dynamic legalization constraints that must be checked
-    /// by the target.
-    Dynamic,
-
-    /// The target explicitly does not support this operation.
-    Illegal,
-  };
-
-  /// A structure containing additional information describing a specific legal
-  /// operation instance.
-  struct LegalOpDetails {
-    /// A flag that indicates if this operation is 'recursively' legal. This
-    /// means that if an operation is legal, either statically or dynamically,
-    /// all of the operations nested within are also considered legal.
-    bool isRecursivelyLegal = false;
-  };
-
-  /// The signature of the callback used to determine if an operation is
-  /// dynamically legal on the target.
-  using DynamicLegalityCallbackFn = std::function<bool(Operation *)>;
-
-  ConversionTarget(MLIRContext &ctx) : ctx(ctx) {}
-  virtual ~ConversionTarget() = default;
-
-  //===--------------------------------------------------------------------===//
-  // Legality Registration
-  //===--------------------------------------------------------------------===//
-
-  /// Register a legality action for the given operation.
-  void setOpAction(OperationName op, LegalizationAction action);
-  template <typename OpT> void setOpAction(LegalizationAction action) {
-    setOpAction(OperationName(OpT::getOperationName(), &ctx), action);
-  }
-
-  /// Register the given operations as legal.
-  template <typename OpT> void addLegalOp() {
-    setOpAction<OpT>(LegalizationAction::Legal);
-  }
-  template <typename OpT, typename OpT2, typename... OpTs> void addLegalOp() {
-    addLegalOp<OpT>();
-    addLegalOp<OpT2, OpTs...>();
-  }
-
-  /// Register the given operation as dynamically legal, i.e. requiring custom
-  /// handling by the target via 'isDynamicallyLegal'.
-  template <typename OpT> void addDynamicallyLegalOp() {
-    setOpAction<OpT>(LegalizationAction::Dynamic);
-  }
-  template <typename OpT, typename OpT2, typename... OpTs>
-  void addDynamicallyLegalOp() {
-    addDynamicallyLegalOp<OpT>();
-    addDynamicallyLegalOp<OpT2, OpTs...>();
-  }
-
-  /// Register the given operation as dynamically legal and set the dynamic
-  /// legalization callback to the one provided.
-  template <typename OpT>
-  void addDynamicallyLegalOp(const DynamicLegalityCallbackFn &callback) {
-    OperationName opName(OpT::getOperationName(), &ctx);
-    setOpAction(opName, LegalizationAction::Dynamic);
-    setLegalityCallback(opName, callback);
-  }
-  template <typename OpT, typename OpT2, typename... OpTs>
-  void addDynamicallyLegalOp(const DynamicLegalityCallbackFn &callback) {
-    addDynamicallyLegalOp<OpT>(callback);
-    addDynamicallyLegalOp<OpT2, OpTs...>(callback);
-  }
-  template <typename OpT, class Callable>
-  typename std::enable_if<!is_invocable<Callable, Operation *>::value>::type
-  addDynamicallyLegalOp(Callable &&callback) {
-    addDynamicallyLegalOp<OpT>(
-        [=](Operation *op) { return callback(cast<OpT>(op)); });
-  }
-
-  /// Register the given operation as illegal, i.e. this operation is known to
-  /// not be supported by this target.
-  template <typename OpT> void addIllegalOp() {
-    setOpAction<OpT>(LegalizationAction::Illegal);
-  }
-  template <typename OpT, typename OpT2, typename... OpTs> void addIllegalOp() {
-    addIllegalOp<OpT>();
-    addIllegalOp<OpT2, OpTs...>();
-  }
-
-  /// Mark an operation, that *must* have either been set as `Legal` or
-  /// `DynamicallyLegal`, as being recursively legal. This means that in
-  /// addition to the operation itself, all of the operations nested within are
-  /// also considered legal. An optional dynamic legality callback may be
-  /// provided to mark subsets of legal instances as recursively legal.
-  template <typename OpT>
-  void markOpRecursivelyLegal(const DynamicLegalityCallbackFn &callback = {}) {
-    OperationName opName(OpT::getOperationName(), &ctx);
-    markOpRecursivelyLegal(opName, callback);
-  }
-  template <typename OpT, typename OpT2, typename... OpTs>
-  void markOpRecursivelyLegal(const DynamicLegalityCallbackFn &callback = {}) {
-    markOpRecursivelyLegal<OpT>(callback);
-    markOpRecursivelyLegal<OpT2, OpTs...>(callback);
-  }
-  template <typename OpT, class Callable>
-  typename std::enable_if<!is_invocable<Callable, Operation *>::value>::type
-  markOpRecursivelyLegal(Callable &&callback) {
-    markOpRecursivelyLegal<OpT>(
-        [=](Operation *op) { return callback(cast<OpT>(op)); });
-  }
-
-  /// Register a legality action for the given dialects.
-  void setDialectAction(ArrayRef<StringRef> dialectNames,
-                        LegalizationAction action);
-
-  /// Register the operations of the given dialects as legal.
-  template <typename... Names>
-  void addLegalDialect(StringRef name, Names... names) {
-    SmallVector<StringRef, 2> dialectNames({name, names...});
-    setDialectAction(dialectNames, LegalizationAction::Legal);
-  }
-  template <typename... Args> void addLegalDialect() {
-    SmallVector<StringRef, 2> dialectNames({Args::getDialectNamespace()...});
-    setDialectAction(dialectNames, LegalizationAction::Legal);
-  }
-
-  /// Register the operations of the given dialects as dynamically legal, i.e.
-  /// requiring custom handling by the target via 'isDynamicallyLegal'.
-  template <typename... Names>
-  void addDynamicallyLegalDialect(StringRef name, Names... names) {
-    SmallVector<StringRef, 2> dialectNames({name, names...});
-    setDialectAction(dialectNames, LegalizationAction::Dynamic);
-  }
-  template <typename... Args>
-  void addDynamicallyLegalDialect(
-      Optional<DynamicLegalityCallbackFn> callback = llvm::None) {
-    SmallVector<StringRef, 2> dialectNames({Args::getDialectNamespace()...});
-    setDialectAction(dialectNames, LegalizationAction::Dynamic);
-    if (callback)
-      setLegalityCallback(dialectNames, *callback);
-  }
-
-  /// Register the operations of the given dialects as illegal, i.e.
-  /// operations of this dialect are not supported by the target.
-  template <typename... Names>
-  void addIllegalDialect(StringRef name, Names... names) {
-    SmallVector<StringRef, 2> dialectNames({name, names...});
-    setDialectAction(dialectNames, LegalizationAction::Illegal);
-  }
-  template <typename... Args> void addIllegalDialect() {
-    SmallVector<StringRef, 2> dialectNames({Args::getDialectNamespace()...});
-    setDialectAction(dialectNames, LegalizationAction::Illegal);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Legality Querying
-  //===--------------------------------------------------------------------===//
-
-  /// Get the legality action for the given operation.
-  Optional<LegalizationAction> getOpAction(OperationName op) const;
-
-  /// If the given operation instance is legal on this target, a structure
-  /// containing legality information is returned. If the operation is not
-  /// legal, None is returned.
-  Optional<LegalOpDetails> isLegal(Operation *op) const;
-
-protected:
-  /// Runs a custom legalization query for the given operation. This should
-  /// return true if the given operation is legal, otherwise false.
-  virtual bool isDynamicallyLegal(Operation *op) const {
-    llvm_unreachable(
-        "targets with custom legalization must override 'isDynamicallyLegal'");
-  }
-
-private:
-  /// Set the dynamic legality callback for the given operation.
-  void setLegalityCallback(OperationName name,
-                           const DynamicLegalityCallbackFn &callback);
-
-  /// Set the dynamic legality callback for the given dialects.
-  void setLegalityCallback(ArrayRef<StringRef> dialects,
-                           const DynamicLegalityCallbackFn &callback);
-
-  /// Set the recursive legality callback for the given operation and mark the
-  /// operation as recursively legal.
-  void markOpRecursivelyLegal(OperationName name,
-                              const DynamicLegalityCallbackFn &callback);
-
-  /// The set of information that configures the legalization of an operation.
-  struct LegalizationInfo {
-    /// The legality action this operation was given.
-    LegalizationAction action;
-
-    /// If some legal instances of this operation may also be recursively legal.
-    bool isRecursivelyLegal;
-  };
-
-  /// Get the legalization information for the given operation.
-  Optional<LegalizationInfo> getOpInfo(OperationName op) const;
-
-  /// A deterministic mapping of operation name and its respective legality
-  /// information.
-  llvm::MapVector<OperationName, LegalizationInfo> legalOperations;
-
-  /// A set of dynamic legality callbacks for given operation names.
-  DenseMap<OperationName, DynamicLegalityCallbackFn> opLegalityFns;
-
-  /// A set of legality callbacks for given operation names that are used to
-  /// check if an operation instance is recursively legal.
-  DenseMap<OperationName, DynamicLegalityCallbackFn> opRecursiveLegalityFns;
-
-  /// A deterministic mapping of dialect name to the specific legality action to
-  /// take.
-  llvm::StringMap<LegalizationAction> legalDialects;
-
-  /// A set of dynamic legality callbacks for given dialect names.
-  llvm::StringMap<DynamicLegalityCallbackFn> dialectLegalityFns;
-
-  /// The current context this target applies to.
-  MLIRContext &ctx;
-};
-
-//===----------------------------------------------------------------------===//
-// Op Conversion Entry Points
-//===----------------------------------------------------------------------===//
-
-/// Below we define several entry points for operation conversion. It is
-/// important to note that the patterns provided to the conversion framework may
-/// have additional constraints. See the `PatternRewriter Hooks` section of the
-/// ConversionPatternRewriter, to see what additional constraints are imposed on
-/// the use of the PatternRewriter.
-
-/// Apply a partial conversion on the given operations, and all nested
-/// operations. This method converts as many operations to the target as
-/// possible, ignoring operations that failed to legalize. This method only
-/// returns failure if there are unreachable blocks in any of the regions nested
-/// within 'ops'. If 'converter' is provided, the signatures of blocks and
-/// regions are also converted.
-LLVM_NODISCARD LogicalResult
-applyPartialConversion(ArrayRef<Operation *> ops, ConversionTarget &target,
-                       const OwningRewritePatternList &patterns,
-                       TypeConverter *converter = nullptr);
-LLVM_NODISCARD LogicalResult
-applyPartialConversion(Operation *op, ConversionTarget &target,
-                       const OwningRewritePatternList &patterns,
-                       TypeConverter *converter = nullptr);
-
-/// Apply a complete conversion on the given operations, and all nested
-/// operations. This method returns failure if the conversion of any operation
-/// fails, or if there are unreachable blocks in any of the regions nested
-/// within 'ops'. If 'converter' is provided, the signatures of blocks and
-/// regions are also converted.
-LLVM_NODISCARD LogicalResult
-applyFullConversion(ArrayRef<Operation *> ops, ConversionTarget &target,
-                    const OwningRewritePatternList &patterns,
-                    TypeConverter *converter = nullptr);
-LLVM_NODISCARD LogicalResult
-applyFullConversion(Operation *op, ConversionTarget &target,
-                    const OwningRewritePatternList &patterns,
-                    TypeConverter *converter = nullptr);
-
-/// Apply an analysis conversion on the given operations, and all nested
-/// operations. This method analyzes which operations would be successfully
-/// converted to the target if a conversion was applied. All operations that
-/// were found to be legalizable to the given 'target' are placed within the
-/// provided 'convertedOps' set; note that no actual rewrites are applied to the
-/// operations on success and only pre-existing operations are added to the set.
-/// This method only returns failure if there are unreachable blocks in any of
-/// the regions nested within 'ops', or if a type conversion failed. If
-/// 'converter' is provided, the signatures of blocks and regions are also
-/// considered for conversion.
-LLVM_NODISCARD LogicalResult applyAnalysisConversion(
-    ArrayRef<Operation *> ops, ConversionTarget &target,
-    const OwningRewritePatternList &patterns,
-    DenseSet<Operation *> &convertedOps, TypeConverter *converter = nullptr);
-LLVM_NODISCARD LogicalResult applyAnalysisConversion(
-    Operation *op, ConversionTarget &target,
-    const OwningRewritePatternList &patterns,
-    DenseSet<Operation *> &convertedOps, TypeConverter *converter = nullptr);
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_DIALECTCONVERSION_H_
diff --git a/third_party/mlir/include/mlir/Transforms/FoldUtils.h b/third_party/mlir/include/mlir/Transforms/FoldUtils.h
deleted file mode 100644
index bbf2c0efaf9..00000000000
--- a/third_party/mlir/include/mlir/Transforms/FoldUtils.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//===- FoldUtils.h - Operation Fold Utilities -------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file declares various operation folding utilities. These
-// utilities are intended to be used by passes to unify and simply their logic.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_FOLDUTILS_H
-#define MLIR_TRANSFORMS_FOLDUTILS_H
-
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/DialectInterface.h"
-
-namespace mlir {
-class Operation;
-class Value;
-
-//===--------------------------------------------------------------------===//
-// Operation Folding Interface
-//===--------------------------------------------------------------------===//
-
-/// This class defines a dialect interface used to assist the operation folder.
-/// It provides hooks for materializing and folding operations.
-class OpFolderDialectInterface
-    : public DialectInterface::Base<OpFolderDialectInterface> {
-public:
-  OpFolderDialectInterface(Dialect *dialect) : Base(dialect) {}
-
-  /// Registered hook to check if the given region, which is attached to an
-  /// operation that is *not* isolated from above, should be used when
-  /// materializing constants. The folder will generally materialize constants
-  /// into the top-level isolated region, this allows for materializing into a
-  /// lower level ancestor region if it is more profitable/correct.
-  virtual bool shouldMaterializeInto(Region *region) const { return false; }
-};
-
-//===--------------------------------------------------------------------===//
-// OperationFolder
-//===--------------------------------------------------------------------===//
-
-/// A utility class for folding operations, and unifying duplicated constants
-/// generated along the way.
-class OperationFolder {
-public:
-  OperationFolder(MLIRContext *ctx) : interfaces(ctx) {}
-
-  /// Tries to perform folding on the given `op`, including unifying
-  /// deduplicated constants. If successful, replaces `op`'s uses with
-  /// folded results, and returns success. `preReplaceAction` is invoked on `op`
-  /// before it is replaced. 'processGeneratedConstants' is invoked for any new
-  /// operations generated when folding. If the op was completely folded it is
-  /// erased.
-  LogicalResult tryToFold(
-      Operation *op,
-      llvm::function_ref<void(Operation *)> processGeneratedConstants = nullptr,
-      llvm::function_ref<void(Operation *)> preReplaceAction = nullptr);
-
-  /// Notifies that the given constant `op` should be remove from this
-  /// OperationFolder's internal bookkeeping.
-  ///
-  /// Note: this method must be called if a constant op is to be deleted
-  /// externally to this OperationFolder. `op` must be a constant op.
-  void notifyRemoval(Operation *op);
-
-  /// Create an operation of specific op type with the given builder,
-  /// and immediately try to fold it. This function populates 'results' with
-  /// the results after folding the operation.
-  template <typename OpTy, typename... Args>
-  void create(OpBuilder &builder, SmallVectorImpl<Value *> &results,
-              Location location, Args &&... args) {
-    Operation *op = builder.create<OpTy>(location, std::forward<Args>(args)...);
-    if (failed(tryToFold(op, results)))
-      results.assign(op->result_begin(), op->result_end());
-    else if (op->getNumResults() != 0)
-      op->erase();
-  }
-
-  /// Overload to create or fold a single result operation.
-  template <typename OpTy, typename... Args>
-  typename std::enable_if<OpTy::template hasTrait<OpTrait::OneResult>(),
-                          Value *>::type
-  create(OpBuilder &builder, Location location, Args &&... args) {
-    SmallVector<Value *, 1> results;
-    create<OpTy>(builder, results, location, std::forward<Args>(args)...);
-    return results.front();
-  }
-
-  /// Overload to create or fold a zero result operation.
-  template <typename OpTy, typename... Args>
-  typename std::enable_if<OpTy::template hasTrait<OpTrait::ZeroResult>(),
-                          OpTy>::type
-  create(OpBuilder &builder, Location location, Args &&... args) {
-    auto op = builder.create<OpTy>(location, std::forward<Args>(args)...);
-    SmallVector<Value *, 0> unused;
-    (void)tryToFold(op.getOperation(), unused);
-
-    // Folding cannot remove a zero-result operation, so for convenience we
-    // continue to return it.
-    return op;
-  }
-
-private:
-  /// This map keeps track of uniqued constants by dialect, attribute, and type.
-  /// A constant operation materializes an attribute with a type. Dialects may
-  /// generate different constants with the same input attribute and type, so we
-  /// also need to track per-dialect.
-  using ConstantMap =
-      DenseMap<std::tuple<Dialect *, Attribute, Type>, Operation *>;
-
-  /// Tries to perform folding on the given `op`. If successful, populates
-  /// `results` with the results of the folding.
-  LogicalResult tryToFold(Operation *op, SmallVectorImpl<Value *> &results,
-                          llvm::function_ref<void(Operation *)>
-                              processGeneratedConstants = nullptr);
-
-  /// Try to get or create a new constant entry. On success this returns the
-  /// constant operation, nullptr otherwise.
-  Operation *tryGetOrCreateConstant(ConstantMap &uniquedConstants,
-                                    Dialect *dialect, OpBuilder &builder,
-                                    Attribute value, Type type, Location loc);
-
-  /// A mapping between an insertion region and the constants that have been
-  /// created within it.
-  DenseMap<Region *, ConstantMap> foldScopes;
-
-  /// This map tracks all of the dialects that an operation is referenced by;
-  /// given that many dialects may generate the same constant.
-  DenseMap<Operation *, SmallVector<Dialect *, 2>> referencedDialects;
-
-  /// A collection of dialect folder interfaces.
-  DialectInterfaceCollection<OpFolderDialectInterface> interfaces;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_FOLDUTILS_H
diff --git a/third_party/mlir/include/mlir/Transforms/InliningUtils.h b/third_party/mlir/include/mlir/Transforms/InliningUtils.h
deleted file mode 100644
index a0221885b23..00000000000
--- a/third_party/mlir/include/mlir/Transforms/InliningUtils.h
+++ /dev/null
@@ -1,216 +0,0 @@
-//===- InliningUtils.h - Inliner utilities ----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines interfaces for various inlining utility methods.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_INLINING_UTILS_H
-#define MLIR_TRANSFORMS_INLINING_UTILS_H
-
-#include "mlir/IR/DialectInterface.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/Region.h"
-
-namespace mlir {
-
-class Block;
-class BlockAndValueMapping;
-class CallableOpInterface;
-class CallOpInterface;
-class FuncOp;
-class OpBuilder;
-class Operation;
-class Region;
-class Value;
-
-//===----------------------------------------------------------------------===//
-// InlinerInterface
-//===----------------------------------------------------------------------===//
-
-/// This is the interface that must be implemented by the dialects of operations
-/// to be inlined. This interface should only handle the operations of the
-/// given dialect.
-class DialectInlinerInterface
-    : public DialectInterface::Base<DialectInlinerInterface> {
-public:
-  DialectInlinerInterface(Dialect *dialect) : Base(dialect) {}
-
-  //===--------------------------------------------------------------------===//
-  // Analysis Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Returns true if the given region 'src' can be inlined into the region
-  /// 'dest' that is attached to an operation registered to the current dialect.
-  /// 'valueMapping' contains any remapped values from within the 'src' region.
-  /// This can be used to examine what values will replace entry arguments into
-  /// the 'src' region for example.
-  virtual bool isLegalToInline(Region *dest, Region *src,
-                               BlockAndValueMapping &valueMapping) const {
-    return false;
-  }
-
-  /// Returns true if the given operation 'op', that is registered to this
-  /// dialect, can be inlined into the given region, false otherwise.
-  /// 'valueMapping' contains any remapped values from within the 'src' region.
-  /// This can be used to examine what values may potentially replace the
-  /// operands to 'op'.
-  virtual bool isLegalToInline(Operation *op, Region *dest,
-                               BlockAndValueMapping &valueMapping) const {
-    return false;
-  }
-
-  /// This hook is invoked on an operation that contains regions. It should
-  /// return true if the analyzer should recurse within the regions of this
-  /// operation when computing legality and cost, false otherwise. The default
-  /// implementation returns true.
-  virtual bool shouldAnalyzeRecursively(Operation *op) const { return true; }
-
-  //===--------------------------------------------------------------------===//
-  // Transformation Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Handle the given inlined terminator by replacing it with a new operation
-  /// as necessary. This overload is called when the inlined region has more
-  /// than one block. The 'newDest' block represents the new final branching
-  /// destination of blocks within this region, i.e. operations that release
-  /// control to the parent operation will likely now branch to this block.
-  /// Its block arguments correspond to any values that need to be replaced by
-  /// terminators within the inlined region.
-  virtual void handleTerminator(Operation *op, Block *newDest) const {
-    llvm_unreachable("must implement handleTerminator in the case of multiple "
-                     "inlined blocks");
-  }
-
-  /// Handle the given inlined terminator by replacing it with a new operation
-  /// as necessary. This overload is called when the inlined region only
-  /// contains one block. 'valuesToReplace' contains the previously returned
-  /// values of the call site before inlining. These values must be replaced by
-  /// this callback if they had any users (for example for traditional function
-  /// calls, these are directly replaced with the operands of the `return`
-  /// operation). The given 'op' will be removed by the caller, after this
-  /// function has been called.
-  virtual void handleTerminator(Operation *op,
-                                ArrayRef<Value *> valuesToReplace) const {
-    llvm_unreachable(
-        "must implement handleTerminator in the case of one inlined block");
-  }
-
-  /// Attempt to materialize a conversion for a type mismatch between a call
-  /// from this dialect, and a callable region. This method should generate an
-  /// operation that takes 'input' as the only operand, and produces a single
-  /// result of 'resultType'. If a conversion can not be generated, nullptr
-  /// should be returned. For example, this hook may be invoked in the following
-  /// scenarios:
-  ///   func @foo(i32) -> i32 { ... }
-  ///
-  ///   // Mismatched input operand
-  ///   ... = foo.call @foo(%input : i16) -> i32
-  ///
-  ///   // Mismatched result type.
-  ///   ... = foo.call @foo(%input : i32) -> i16
-  ///
-  /// NOTE: This hook may be invoked before the 'isLegal' checks above.
-  virtual Operation *materializeCallConversion(OpBuilder &builder, Value *input,
-                                               Type resultType,
-                                               Location conversionLoc) const {
-    return nullptr;
-  }
-};
-
-/// This interface provides the hooks into the inlining interface.
-/// Note: this class automatically collects 'DialectInlinerInterface' objects
-/// registered to each dialect within the given context.
-class InlinerInterface
-    : public DialectInterfaceCollection<DialectInlinerInterface> {
-public:
-  using Base::Base;
-
-  /// Process a set of blocks that have been inlined. This callback is invoked
-  /// *before* inlined terminator operations have been processed.
-  virtual void
-  processInlinedBlocks(llvm::iterator_range<Region::iterator> inlinedBlocks) {}
-
-  /// These hooks mirror the hooks for the DialectInlinerInterface, with default
-  /// implementations that call the hook on the handler for the dialect 'op' is
-  /// registered to.
-
-  //===--------------------------------------------------------------------===//
-  // Analysis Hooks
-  //===--------------------------------------------------------------------===//
-
-  virtual bool isLegalToInline(Region *dest, Region *src,
-                               BlockAndValueMapping &valueMapping) const;
-  virtual bool isLegalToInline(Operation *op, Region *dest,
-                               BlockAndValueMapping &valueMapping) const;
-  virtual bool shouldAnalyzeRecursively(Operation *op) const;
-
-  //===--------------------------------------------------------------------===//
-  // Transformation Hooks
-  //===--------------------------------------------------------------------===//
-
-  virtual void handleTerminator(Operation *op, Block *newDest) const;
-  virtual void handleTerminator(Operation *op,
-                                ArrayRef<Value *> valuesToRepl) const;
-};
-
-//===----------------------------------------------------------------------===//
-// Inline Methods.
-//===----------------------------------------------------------------------===//
-
-/// This function inlines a region, 'src', into another. This function returns
-/// failure if it is not possible to inline this function. If the function
-/// returned failure, then no changes to the module have been made.
-///
-/// The provided 'inlinePoint' must be within a region, and corresponds to the
-/// location where the 'src' region should be inlined. 'mapping' contains any
-/// remapped operands that are used within the region, and *must* include
-/// remappings for the entry arguments to the region. 'resultsToReplace'
-/// corresponds to any results that should be replaced by terminators within the
-/// inlined region. 'inlineLoc' is an optional Location that, if provided, will
-/// be used to update the inlined operations' location information.
-/// 'shouldCloneInlinedRegion' corresponds to whether the source region should
-/// be cloned into the 'inlinePoint' or spliced directly.
-LogicalResult inlineRegion(InlinerInterface &interface, Region *src,
-                           Operation *inlinePoint, BlockAndValueMapping &mapper,
-                           ArrayRef<Value *> resultsToReplace,
-                           llvm::Optional<Location> inlineLoc = llvm::None,
-                           bool shouldCloneInlinedRegion = true);
-
-/// This function is an overload of the above 'inlineRegion' that allows for
-/// providing the set of operands ('inlinedOperands') that should be used
-/// in-favor of the region arguments when inlining.
-LogicalResult inlineRegion(InlinerInterface &interface, Region *src,
-                           Operation *inlinePoint,
-                           ArrayRef<Value *> inlinedOperands,
-                           ArrayRef<Value *> resultsToReplace,
-                           llvm::Optional<Location> inlineLoc = llvm::None,
-                           bool shouldCloneInlinedRegion = true);
-
-/// This function inlines a given region, 'src', of a callable operation,
-/// 'callable', into the location defined by the given call operation. This
-/// function returns failure if inlining is not possible, success otherwise. On
-/// failure, no changes are made to the module. 'shouldCloneInlinedRegion'
-/// corresponds to whether the source region should be cloned into the 'call' or
-/// spliced directly.
-LogicalResult inlineCall(InlinerInterface &interface, CallOpInterface call,
-                         CallableOpInterface callable, Region *src,
-                         bool shouldCloneInlinedRegion = true);
-
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_INLINING_UTILS_H
diff --git a/third_party/mlir/include/mlir/Transforms/LoopFusionUtils.h b/third_party/mlir/include/mlir/Transforms/LoopFusionUtils.h
deleted file mode 100644
index b6d1ea41ce6..00000000000
--- a/third_party/mlir/include/mlir/Transforms/LoopFusionUtils.h
+++ /dev/null
@@ -1,100 +0,0 @@
-//===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes for various loop fusion utility
-// methods: these are not passes by themselves but are used either by passes,
-// optimization sequences, or in turn by other transformation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
-#define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace mlir {
-class AffineForOp;
-struct ComputationSliceState;
-class Operation;
-
-// TODO(andydavis) Extend this module to include utility functions for querying
-// fusion cost/storage reduction, and for performing the loop fusion
-// transformation.
-
-struct FusionResult {
-  enum ResultEnum {
-    Success,
-    FailPrecondition,     // Failed precondition for fusion. (e.g. same block).
-    FailBlockDependence,  // Fusion would violate another dependence in block.
-    FailFusionDependence, // Fusion would reverse dependences between loops.
-    FailComputationSlice, // Unable to compute src loop computation slice.
-  } value;
-  FusionResult(ResultEnum v) : value(v) {}
-};
-
-/// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the
-/// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult
-/// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are
-/// in the same block and dependences would not be violated). Otherwise
-/// returns a FusionResult explaining why fusion is not feasible.
-/// NOTE: This function is not feature complete and should only be used in
-/// testing.
-/// TODO(andydavis) Update comments when this function is fully implemented.
-FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
-                          unsigned dstLoopDepth,
-                          ComputationSliceState *srcSlice);
-
-/// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
-/// and operation count) for a loop nest up until (and including) the innermost
-/// loop body.
-struct LoopNestStats {
-  /// Map from AffineForOp to immediate child AffineForOps in its loop body.
-  llvm::DenseMap<Operation *, llvm::SmallVector<AffineForOp, 2>> loopMap;
-  /// Map from AffineForOp to count of operations in its loop body.
-  llvm::DenseMap<Operation *, uint64_t> opCountMap;
-  /// Map from AffineForOp to its constant trip count.
-  llvm::DenseMap<Operation *, uint64_t> tripCountMap;
-};
-
-/// Collect loop nest statistics (eg. loop trip count and operation count)
-/// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
-/// returns false otherwise.
-// TODO(andydavis) Consider moving this to LoopUtils.
-bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats);
-
-/// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.
-/// Currently, the total cost is computed by counting the total operation
-/// instance count (i.e. total number of operations in the loop body * loop
-/// trip count) for the entire loop nest.
-// TODO(andydavis) Improve this cost model.
-int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats);
-
-/// Computes and returns in 'computeCost', the total compute cost of fusing the
-/// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently,
-/// the total cost is computed by counting the total operation instance count
-/// (i.e. total number of operations in the loop body * loop trip count) for
-/// the entire loop nest.
-/// Returns true on success, failure otherwise (e.g. non-constant trip counts).
-// TODO(andydavis) Improve this cost model.
-bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
-                          AffineForOp dstForOp, LoopNestStats &dstStats,
-                          ComputationSliceState *slice, int64_t *computeCost);
-
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
diff --git a/third_party/mlir/include/mlir/Transforms/LoopLikeInterface.h b/third_party/mlir/include/mlir/Transforms/LoopLikeInterface.h
deleted file mode 100644
index a8bc0d11378..00000000000
--- a/third_party/mlir/include/mlir/Transforms/LoopLikeInterface.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- LoopLikeInterface.h - Loop-like operations interface ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the operation interface for loop like operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
-#define MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
-
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mlir {
-
-#include "mlir/Transforms/LoopLikeInterface.h.inc"
-
-} // namespace mlir
-
-#endif // MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
diff --git a/third_party/mlir/include/mlir/Transforms/LoopLikeInterface.td b/third_party/mlir/include/mlir/Transforms/LoopLikeInterface.td
deleted file mode 100644
index 5c324b79f67..00000000000
--- a/third_party/mlir/include/mlir/Transforms/LoopLikeInterface.td
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- LoopLikeInterface.td - LoopLike interface -----------*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines the interface for loop-like operations as used by LICM.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_LOOPLIKEINTERFACE
-#define MLIR_LOOPLIKEINTERFACE
-
-include "mlir/IR/OpBase.td"
-
-def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> {
-  let description = [{
-    Encodes properties of a loop. Operations that implement this interface will
-    be considered by loop-invariant code motion.
-  }];
-
-  let methods = [
-    InterfaceMethod<[{
-        Returns true if the given value is defined outside of the loop.
-        A sensible implementation could be to check whether the value's defining
-        operation lies outside of the loops body region. If the loop uses
-        explicit capture of dependencies, an implementation could check whether
-        the value corresponds to a captured dependency.
-      }],
-      "bool", "isDefinedOutsideOfLoop", (ins "Value *":$value)
-    >,
-    InterfaceMethod<[{
-        Returns the region that makes up the body of the loop and should be
-        inspected for loop-invariant operations.
-      }],
-      "Region &", "getLoopBody"
-    >,
-    InterfaceMethod<[{
-        Moves the given vector of operations out of the loop. The vector is
-        sorted topologically.
-      }],
-      "LogicalResult", "moveOutOfLoop", (ins "ArrayRef<Operation *>":$ops)
-    >,
-  ];
-}
-
-#endif // MLIR_LOOPLIKEINTERFACE
diff --git a/third_party/mlir/include/mlir/Transforms/LoopUtils.h b/third_party/mlir/include/mlir/Transforms/LoopUtils.h
deleted file mode 100644
index 5ca3f7f6510..00000000000
--- a/third_party/mlir/include/mlir/Transforms/LoopUtils.h
+++ /dev/null
@@ -1,236 +0,0 @@
-//===- LoopUtils.h - Loop transformation utilities --------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes for various loop transformation utility
-// methods: these are not passes by themselves but are used either by passes,
-// optimization sequences, or in turn by other transformation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_LOOP_UTILS_H
-#define MLIR_TRANSFORMS_LOOP_UTILS_H
-
-#include "mlir/IR/Block.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-
-namespace mlir {
-class AffineForOp;
-class FuncOp;
-class OpBuilder;
-class Value;
-
-namespace loop {
-class ForOp;
-} // end namespace loop
-
-/// Unrolls this for operation completely if the trip count is known to be
-/// constant. Returns failure otherwise.
-LogicalResult loopUnrollFull(AffineForOp forOp);
-
-/// Unrolls this for operation by the specified unroll factor. Returns failure
-/// if the loop cannot be unrolled either due to restrictions or due to invalid
-/// unroll factors.
-LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor);
-
-/// Unrolls this loop by the specified unroll factor or its trip count,
-/// whichever is lower.
-LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor);
-
-/// Get perfectly nested sequence of loops starting at root of loop nest
-/// (the first op being another AffineFor, and the second op - a terminator).
-/// A loop is perfectly nested iff: the first op in the loop's body is another
-/// AffineForOp, and the second op is a terminator).
-void getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops,
-                             AffineForOp root);
-void getPerfectlyNestedLoops(SmallVectorImpl<loop::ForOp> &nestedLoops,
-                             loop::ForOp root);
-
-/// Unrolls and jams this loop by the specified factor. Returns success if the
-/// loop is successfully unroll-jammed.
-LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
-                                    uint64_t unrollJamFactor);
-
-/// Unrolls and jams this loop by the specified factor or by the trip count (if
-/// constant), whichever is lower.
-LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
-                                      uint64_t unrollJamFactor);
-
-/// Promotes the loop body of a AffineForOp to its containing block if the
-/// AffineForOp was known to have a single iteration.
-LogicalResult promoteIfSingleIteration(AffineForOp forOp);
-
-/// Promotes all single iteration AffineForOp's in the Function, i.e., moves
-/// their body into the containing Block.
-void promoteSingleIterationLoops(FuncOp f);
-
-/// Computes the cleanup loop lower bound of the loop being unrolled with
-/// the specified unroll factor; this bound will also be upper bound of the main
-/// part of the unrolled loop. Computes the bound as an AffineMap with its
-/// operands or a null map when the trip count can't be expressed as an affine
-/// expression.
-void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
-                              AffineMap *map,
-                              SmallVectorImpl<Value *> *operands,
-                              OpBuilder &builder);
-
-/// Skew the operations in the body of a 'affine.for' operation with the
-/// specified operation-wise shifts. The shifts are with respect to the
-/// original execution order, and are multiplied by the loop 'step' before being
-/// applied.
-LLVM_NODISCARD
-LogicalResult instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
-                           bool unrollPrologueEpilogue = false);
-
-/// Tiles the specified band of perfectly nested loops creating tile-space loops
-/// and intra-tile loops. A band is a contiguous set of loops.
-LLVM_NODISCARD
-LogicalResult tileCodeGen(MutableArrayRef<AffineForOp> band,
-                          ArrayRef<unsigned> tileSizes);
-
-/// Performs loop interchange on 'forOpA' and 'forOpB'. Requires that 'forOpA'
-/// and 'forOpB' are part of a perfectly nested sequence of loops.
-void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB);
-
-/// Checks if the loop interchange permutation 'loopPermMap', of the perfectly
-/// nested sequence of loops in 'loops', would violate dependences (loop 'i' in
-/// 'loops' is mapped to location 'j = 'loopPermMap[i]' in the interchange).
-bool isValidLoopInterchangePermutation(ArrayRef<AffineForOp> loops,
-                                       ArrayRef<unsigned> loopPermMap);
-
-/// Performs a sequence of loop interchanges on perfectly nested 'loops', as
-/// specified by permutation 'loopPermMap' (loop 'i' in 'loops' is mapped to
-/// location 'j = 'loopPermMap[i]' after the loop interchange).
-unsigned interchangeLoops(ArrayRef<AffineForOp> loops,
-                          ArrayRef<unsigned> loopPermMap);
-
-// Sinks all sequential loops to the innermost levels (while preserving
-// relative order among them) and moves all parallel loops to the
-// outermost (while again preserving relative order among them).
-// Returns AffineForOp of the root of the new loop nest after loop interchanges.
-AffineForOp sinkSequentialLoops(AffineForOp forOp);
-
-/// Sinks 'forOp' by 'loopDepth' levels by performing a series of loop
-/// interchanges. Requires that 'forOp' is part of a perfect nest with
-/// 'loopDepth' AffineForOps consecutively nested under it.
-void sinkLoop(AffineForOp forOp, unsigned loopDepth);
-
-/// Performs tiling fo imperfectly nested loops (with interchange) by
-/// strip-mining the `forOps` by `sizes` and sinking them, in their order of
-/// occurrence in `forOps`, under each of the `targets`.
-/// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
-/// nested immediately under each of `targets`.
-using Loops = SmallVector<loop::ForOp, 8>;
-using TileLoops = std::pair<Loops, Loops>;
-SmallVector<SmallVector<AffineForOp, 8>, 8> tile(ArrayRef<AffineForOp> forOps,
-                                                 ArrayRef<uint64_t> sizes,
-                                                 ArrayRef<AffineForOp> targets);
-SmallVector<Loops, 8> tile(ArrayRef<loop::ForOp> forOps,
-                           ArrayRef<Value *> sizes,
-                           ArrayRef<loop::ForOp> targets);
-
-/// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
-/// and sinking them, in their order of occurrence in `forOps`, under `target`.
-/// Returns the new AffineForOps, one per `forOps`, nested immediately under
-/// `target`.
-SmallVector<AffineForOp, 8> tile(ArrayRef<AffineForOp> forOps,
-                                 ArrayRef<uint64_t> sizes, AffineForOp target);
-Loops tile(ArrayRef<loop::ForOp> forOps, ArrayRef<Value *> sizes,
-           loop::ForOp target);
-
-/// Tile a nest of loop::ForOp loops rooted at `rootForOp` with the given
-/// (parametric) sizes. Sizes are expected to be strictly positive values at
-/// runtime.  If more sizes than loops are provided, discard the trailing values
-/// in sizes.  Assumes the loop nest is permutable.
-/// Returns the newly created intra-tile loops.
-Loops tilePerfectlyNested(loop::ForOp rootForOp, ArrayRef<Value *> sizes);
-
-/// Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
-struct AffineCopyOptions {
-  // True if DMAs should be generated instead of point-wise copies.
-  bool generateDma;
-  // The slower memory space from which data is to be moved.
-  unsigned slowMemorySpace;
-  // Memory space of the faster one (typically a scratchpad).
-  unsigned fastMemorySpace;
-  // Memory space to place tags in: only meaningful for DMAs.
-  unsigned tagMemorySpace;
-  // Capacity of the fast memory space in bytes.
-  uint64_t fastMemCapacityBytes;
-};
-
-/// Performs explicit copying for the contiguous sequence of operations in the
-/// block iterator range [`begin', `end'), where `end' can't be past the
-/// terminator of the block (since additional operations are potentially
-/// inserted right before `end`. Returns the total size of fast memory space
-/// buffers used. `copyOptions` provides various parameters, and the output
-/// argument `copyNests` is the set of all copy nests inserted, each represented
-/// by its root affine.for. Since we generate alloc's and dealloc's for all fast
-/// buffers (before and after the range of operations resp. or at a hoisted
-/// position), all of the fast memory capacity is assumed to be available for
-/// processing this block range.
-uint64_t affineDataCopyGenerate(Block::iterator begin, Block::iterator end,
-                                const AffineCopyOptions &copyOptions,
-                                DenseSet<Operation *> &copyNests);
-
-/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
-/// parametric tile sizes that the outer loops have a fixed number of iterations
-/// as defined in `sizes`.
-TileLoops extractFixedOuterLoops(loop::ForOp rootFOrOp,
-                                 ArrayRef<int64_t> sizes);
-
-/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
-/// `loops` contains a list of perfectly nested loops with bounds and steps
-/// independent of any loop induction variable involved in the nest.
-void coalesceLoops(MutableArrayRef<loop::ForOp> loops);
-
-/// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of
-/// size given by `numProcessors`. This is achieved by embedding the SSA values
-/// corresponding to `processorIds` and `numProcessors` into the bounds and step
-/// of the `forOp`. No check is performed on the legality of the rewrite, it is
-/// the caller's responsibility to ensure legality.
-///
-/// Requires that `processorIds` and `numProcessors` have the same size and that
-/// for each idx, `processorIds`[idx] takes, at runtime, all values between 0
-/// and `numProcessors`[idx] - 1. This corresponds to traditional use cases for:
-///   1. GPU (threadIdx, get_local_id(), ...)
-///   2. MPI (MPI_Comm_rank)
-///   3. OpenMP (omp_get_thread_num)
-///
-/// Example:
-/// Assuming a 2-d grid with processorIds = [blockIdx.x, threadIdx.x] and
-/// numProcessors = [gridDim.x, blockDim.x], the loop:
-///
-/// ```
-///    loop.for %i = %lb to %ub step %step {
-///      ...
-///    }
-/// ```
-///
-/// is rewritten into a version resembling the following pseudo-IR:
-///
-/// ```
-///    loop.for %i = %lb + %step * (threadIdx.x + blockIdx.x * blockDim.x)
-///       to %ub step %gridDim.x * blockDim.x * %step {
-///      ...
-///    }
-/// ```
-void mapLoopToProcessorIds(loop::ForOp forOp, ArrayRef<Value *> processorId,
-                           ArrayRef<Value *> numProcessors);
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_LOOP_UTILS_H
diff --git a/third_party/mlir/include/mlir/Transforms/Passes.h b/third_party/mlir/include/mlir/Transforms/Passes.h
deleted file mode 100644
index ffdeeb2d232..00000000000
--- a/third_party/mlir/include/mlir/Transforms/Passes.h
+++ /dev/null
@@ -1,140 +0,0 @@
-//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes that expose pass constructors in the loop
-// transformation library.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_PASSES_H
-#define MLIR_TRANSFORMS_PASSES_H
-
-#include "mlir/Support/LLVM.h"
-#include <functional>
-#include <limits>
-
-namespace mlir {
-
-class AffineForOp;
-class FuncOp;
-class ModuleOp;
-class Pass;
-template <typename T> class OpPassBase;
-
-/// Creates an instance of the Canonicalizer pass.
-std::unique_ptr<Pass> createCanonicalizerPass();
-
-/// Creates a pass to perform common sub expression elimination.
-std::unique_ptr<Pass> createCSEPass();
-
-/// Creates a pass to vectorize loops, operations and data types using a
-/// target-independent, n-D super-vector abstraction.
-std::unique_ptr<OpPassBase<FuncOp>>
-createVectorizePass(llvm::ArrayRef<int64_t> virtualVectorSize);
-
-/// Creates a pass to allow independent testing of vectorizer functionality with
-/// FileCheck.
-std::unique_ptr<OpPassBase<FuncOp>> createVectorizerTestPass();
-
-/// Creates a pass to lower super-vectors to target-dependent HW vectors.
-std::unique_ptr<OpPassBase<FuncOp>>
-createMaterializeVectorsPass(llvm::ArrayRef<int64_t> vectorSize);
-
-/// Creates a loop unrolling pass with the provided parameters.
-/// 'getUnrollFactor' is a function callback for clients to supply a function
-/// that computes an unroll factor - the callback takes precedence over unroll
-/// factors supplied through other means. If -1 is passed as the unrollFactor
-/// and no callback is provided, anything passed from the command-line (if at
-/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<OpPassBase<FuncOp>> createLoopUnrollPass(
-    int unrollFactor = -1, int unrollFull = -1,
-    const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
-
-/// Creates a loop unroll jam pass to unroll jam by the specified factor. A
-/// factor of -1 lets the pass use the default factor or the one on the command
-/// line if provided.
-std::unique_ptr<OpPassBase<FuncOp>>
-createLoopUnrollAndJamPass(int unrollJamFactor = -1);
-
-/// Creates a simplification pass for affine structures (maps and sets). In
-/// addition, this pass also normalizes memrefs to have the trivial (identity)
-/// layout map.
-std::unique_ptr<OpPassBase<FuncOp>> createSimplifyAffineStructuresPass();
-
-/// Creates a loop fusion pass which fuses loops. Buffers of size less than or
-/// equal to `localBufSizeThreshold` are promoted to memory space
-/// `fastMemorySpace'.
-std::unique_ptr<OpPassBase<FuncOp>>
-createLoopFusionPass(unsigned fastMemorySpace = 0,
-                     uint64_t localBufSizeThreshold = 0,
-                     bool maximalFusion = false);
-
-/// Creates a loop invariant code motion pass that hoists loop invariant
-/// instructions out of the loop.
-std::unique_ptr<Pass> createLoopInvariantCodeMotionPass();
-
-/// Creates a loop invariant code motion pass that hoists loop invariant
-/// instructions out of affine loop.
-std::unique_ptr<OpPassBase<FuncOp>> createAffineLoopInvariantCodeMotionPass();
-
-/// Creates a pass to pipeline explicit movement of data across levels of the
-/// memory hierarchy.
-std::unique_ptr<OpPassBase<FuncOp>> createPipelineDataTransferPass();
-
-/// Lowers affine control flow operations (ForStmt, IfStmt and AffineApplyOp)
-/// to equivalent lower-level constructs (flow of basic blocks and arithmetic
-/// primitives).
-std::unique_ptr<OpPassBase<FuncOp>> createLowerAffinePass();
-
-/// Creates a pass to perform tiling on loop nests.
-std::unique_ptr<OpPassBase<FuncOp>>
-createLoopTilingPass(uint64_t cacheSizeBytes);
-
-/// Creates a pass that performs parametric tiling so that the outermost loops
-/// have the given fixed number of iterations.  Assumes outermost loop nests
-/// are permutable.
-std::unique_ptr<OpPassBase<FuncOp>>
-createSimpleParametricTilingPass(ArrayRef<int64_t> outerLoopSizes);
-
-/// Creates a pass that transforms perfectly nested loops with independent
-/// bounds into a single loop.
-std::unique_ptr<OpPassBase<FuncOp>> createLoopCoalescingPass();
-
-/// Performs packing (or explicit copying) of accessed memref regions into
-/// buffers in the specified faster memory space through either pointwise copies
-/// or DMA operations.
-std::unique_ptr<OpPassBase<FuncOp>> createAffineDataCopyGenerationPass(
-    unsigned slowMemorySpace, unsigned fastMemorySpace,
-    unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,
-    uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());
-
-/// Creates a pass to perform optimizations relying on memref dataflow such as
-/// store to load forwarding, elimination of dead stores, and dead allocs.
-std::unique_ptr<OpPassBase<FuncOp>> createMemRefDataFlowOptPass();
-
-/// Creates a pass to strip debug information from a function.
-std::unique_ptr<OpPassBase<FuncOp>> createStripDebugInfoPass();
-
-/// Creates a pass which tests loop fusion utilities.
-std::unique_ptr<OpPassBase<FuncOp>> createTestLoopFusionPass();
-
-/// Creates a pass which inlines calls and callable operations as defined by the
-/// CallGraph.
-std::unique_ptr<Pass> createInlinerPass();
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_PASSES_H
diff --git a/third_party/mlir/include/mlir/Transforms/RegionUtils.h b/third_party/mlir/include/mlir/Transforms/RegionUtils.h
deleted file mode 100644
index 944f6011579..00000000000
--- a/third_party/mlir/include/mlir/Transforms/RegionUtils.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- RegionUtils.h - Region-related transformation utilities --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_TRANSFORMS_REGIONUTILS_H_
-#define MLIR_TRANSFORMS_REGIONUTILS_H_
-
-#include "mlir/IR/Region.h"
-#include "mlir/IR/Value.h"
-
-#include "llvm/ADT/SetVector.h"
-
-namespace mlir {
-
-/// Check if all values in the provided range are defined above the `limit`
-/// region.  That is, if they are defined in a region that is a proper ancestor
-/// of `limit`.
-template <typename Range>
-bool areValuesDefinedAbove(Range values, Region &limit) {
-  for (Value *v : values)
-    if (!v->getParentRegion()->isProperAncestor(&limit))
-      return false;
-  return true;
-}
-
-/// Replace all uses of `orig` within the given region with `replacement`.
-void replaceAllUsesInRegionWith(Value *orig, Value *replacement,
-                                Region &region);
-
-/// Calls `callback` for each use of a value within `region` or its descendants
-/// that was defined at the ancestors of the `limit`.
-void visitUsedValuesDefinedAbove(Region &region, Region &limit,
-                                 function_ref<void(OpOperand *)> callback);
-
-/// Calls `callback` for each use of a value within any of the regions provided
-/// that was defined in one of the ancestors.
-void visitUsedValuesDefinedAbove(llvm::MutableArrayRef<Region> regions,
-                                 function_ref<void(OpOperand *)> callback);
-
-/// Fill `values` with a list of values defined at the ancestors of the `limit`
-/// region and used within `region` or its descendants.
-void getUsedValuesDefinedAbove(Region &region, Region &limit,
-                               llvm::SetVector<Value *> &values);
-
-/// Fill `values` with a list of values used within any of the regions provided
-/// but defined in one of the ancestors.
-void getUsedValuesDefinedAbove(llvm::MutableArrayRef<Region> regions,
-                               llvm::SetVector<Value *> &values);
-
-/// Run a set of structural simplifications over the given regions. This
-/// includes transformations like unreachable block elimination, dead argument
-/// elimination, as well as some other DCE. This function returns success if any
-/// of the regions were simplified, failure otherwise.
-LogicalResult simplifyRegions(llvm::MutableArrayRef<Region> regions);
-
-} // namespace mlir
-
-#endif // MLIR_TRANSFORMS_REGIONUTILS_H_
diff --git a/third_party/mlir/include/mlir/Transforms/SideEffectsInterface.h b/third_party/mlir/include/mlir/Transforms/SideEffectsInterface.h
deleted file mode 100644
index 443596b60c1..00000000000
--- a/third_party/mlir/include/mlir/Transforms/SideEffectsInterface.h
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- SideEffectsInterface.h - dialect interface modeling side effects ---===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file specifies a dialect interface to model side-effects.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
-#define MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
-
-#include "mlir/IR/DialectInterface.h"
-#include "mlir/IR/Operation.h"
-
-namespace mlir {
-
-/// Specifies an interface for basic side-effect modelling that is used by the
-/// loop-invariant code motion pass.
-///
-/// TODO: This interface should be replaced by a more general solution.
-class SideEffectsDialectInterface
-    : public DialectInterface::Base<SideEffectsDialectInterface> {
-public:
-  SideEffectsDialectInterface(Dialect *dialect) : Base(dialect) {}
-
-  enum SideEffecting {
-    Never,     /* the operation has no side-effects */
-    Recursive, /* the operation has side-effects if a contained operation has */
-    Always     /* the operation has side-effects */
-  };
-
-  /// Checks whether the given operation has side-effects.
-  virtual SideEffecting isSideEffecting(Operation *op) const {
-    if (op->hasNoSideEffect())
-      return Never;
-    return Always;
-  };
-};
-
-class SideEffectsInterface
-    : public DialectInterfaceCollection<SideEffectsDialectInterface> {
-public:
-  using SideEffecting = SideEffectsDialectInterface::SideEffecting;
-  explicit SideEffectsInterface(MLIRContext *ctx)
-      : DialectInterfaceCollection<SideEffectsDialectInterface>(ctx) {}
-
-  SideEffecting isSideEffecting(Operation *op) const {
-    // First check generic trait.
-    if (op->hasNoSideEffect())
-      return SideEffecting::Never;
-    if (auto handler = getInterfaceFor(op))
-      return handler->isSideEffecting(op);
-
-    return SideEffecting::Always;
-  }
-};
-
-} // namespace mlir
-
-#endif // MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
diff --git a/third_party/mlir/include/mlir/Transforms/Utils.h b/third_party/mlir/include/mlir/Transforms/Utils.h
deleted file mode 100644
index c682b48f331..00000000000
--- a/third_party/mlir/include/mlir/Transforms/Utils.h
+++ /dev/null
@@ -1,137 +0,0 @@
-//===- Utils.h - General transformation utilities ---------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This header file defines prototypes for various transformation utilities for
-// memref's and non-loop IR structures. These are not passes by themselves but
-// are used either by passes, optimization sequences, or in turn by other
-// transformation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_UTILS_H
-#define MLIR_TRANSFORMS_UTILS_H
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineMap.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-
-namespace mlir {
-
-class AffineApplyOp;
-class AffineForOp;
-class Location;
-class OpBuilder;
-
-/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
-/// optionally remapping the old memref's indices using the supplied affine map,
-/// `indexRemap`. The new memref could be of a different shape or rank.
-/// `extraIndices` provides any additional access indices to be added to the
-/// start.
-///
-/// `indexRemap` remaps indices of the old memref access to a new set of indices
-/// that are used to index the memref. Additional input operands to indexRemap
-/// can be optionally provided in `extraOperands`, and they occupy the start
-/// of its input list. `indexRemap`'s dimensional inputs are expected to
-/// correspond to memref's indices, and its symbolic inputs if any should be
-/// provided in `symbolOperands`.
-///
-/// `domInstFilter`, if non-null, restricts the replacement to only those
-/// operations that are dominated by the former; similarly, `postDomInstFilter`
-/// restricts replacement to only those operations that are postdominated by it.
-///
-/// Returns true on success and false if the replacement is not possible,
-/// whenever a memref is used as an operand in a non-dereferencing context,
-/// except for dealloc's on the memref which are left untouched. See comments at
-/// function definition for an example.
-//
-//  Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]:
-//  The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and
-//  index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0,
-//  d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any
-//  extra operands, note that 'indexRemap' would just be applied to existing
-//  indices (%i, %j).
-//  TODO(bondhugula): allow extraIndices to be added at any position.
-LogicalResult replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
-                                       ArrayRef<Value *> extraIndices = {},
-                                       AffineMap indexRemap = AffineMap(),
-                                       ArrayRef<Value *> extraOperands = {},
-                                       ArrayRef<Value *> symbolOperands = {},
-                                       Operation *domInstFilter = nullptr,
-                                       Operation *postDomInstFilter = nullptr);
-
-/// Performs the same replacement as the other version above but only for the
-/// dereferencing uses of `oldMemRef` in `op`.
-LogicalResult replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
-                                       Operation *op,
-                                       ArrayRef<Value *> extraIndices = {},
-                                       AffineMap indexRemap = AffineMap(),
-                                       ArrayRef<Value *> extraOperands = {},
-                                       ArrayRef<Value *> symbolOperands = {});
-
-/// Rewrites the memref defined by this alloc op to have an identity layout map
-/// and updates all its indexing uses. Returns failure if any of its uses
-/// escape (while leaving the IR in a valid state).
-LogicalResult normalizeMemRef(AllocOp op);
-
-/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
-/// its results equal to the number of operands, as a composition
-/// of all other AffineApplyOps reachable from input parameter 'operands'. If
-/// different operands were drawing results from multiple affine apply ops,
-/// these will also be collected into a single (multi-result) affine apply op.
-/// The final results of the composed AffineApplyOp are returned in output
-/// parameter 'results'. Returns the affine apply op created.
-Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
-                                       ArrayRef<Value *> operands,
-                                       ArrayRef<Operation *> affineApplyOps,
-                                       SmallVectorImpl<Value *> *results);
-
-/// Given an operation, inserts one or more single result affine apply
-/// operations, results of which are exclusively used by this operation.
-/// The operands of these newly created affine apply ops are
-/// guaranteed to be loop iterators or terminal symbols of a function.
-///
-/// Before
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   send %A[%idx], ...
-///   %v = "compute"(%idx, ...)
-///
-/// After
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   send %A[%idx], ...
-///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
-///   %v = "compute"(%idx_, ...)
-
-/// This allows the application of different transformations on send and
-/// compute (for eg. different shifts/delays)
-///
-/// Fills `sliceOps` with the list of affine.apply operations.
-/// In the following cases, `sliceOps` remains empty:
-///   1. If none of opInst's operands were the result of an affine.apply
-///      (i.e., there was no affine computation slice to create).
-///   2. If all the affine.apply op's supplying operands to this opInst did not
-///      have any uses other than those in this opInst.
-void createAffineComputationSlice(Operation *opInst,
-                                  SmallVectorImpl<AffineApplyOp> *sliceOps);
-
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_UTILS_H
diff --git a/third_party/mlir/include/mlir/Transforms/ViewOpGraph.h b/third_party/mlir/include/mlir/Transforms/ViewOpGraph.h
deleted file mode 100644
index 4f9856e9f93..00000000000
--- a/third_party/mlir/include/mlir/Transforms/ViewOpGraph.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- ViewOpGraph.h - View/write op graphviz graphs ------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines interface to produce Graphviz outputs of MLIR op within block.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_VIEWOPGRAPH_H_
-#define MLIR_TRANSFORMS_VIEWOPGRAPH_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-class Block;
-class ModuleOp;
-template <typename T> class OpPassBase;
-
-/// Displays the graph in a window. This is for use from the debugger and
-/// depends on Graphviz to generate the graph.
-void viewGraph(Block &block, const Twine &name, bool shortNames = false,
-               const Twine &title = "",
-               llvm::GraphProgram::Name program = llvm::GraphProgram::DOT);
-
-llvm::raw_ostream &writeGraph(llvm::raw_ostream &os, Block &block,
-                              bool shortNames = false, const Twine &title = "");
-
-/// Creates a pass to print op graphs.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createPrintOpGraphPass(llvm::raw_ostream &os = llvm::errs(),
-                       bool shortNames = false, const llvm::Twine &title = "");
-
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_VIEWOPGRAPH_H_
diff --git a/third_party/mlir/include/mlir/Transforms/ViewRegionGraph.h b/third_party/mlir/include/mlir/Transforms/ViewRegionGraph.h
deleted file mode 100644
index 57fe1d37695..00000000000
--- a/third_party/mlir/include/mlir/Transforms/ViewRegionGraph.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- ViewRegionGraph.h - View/write graphviz graphs -----------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines interface to produce Graphviz outputs of MLIR Regions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_VIEWFUNCTIONGRAPH_H_
-#define MLIR_TRANSFORMS_VIEWFUNCTIONGRAPH_H_
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-class FuncOp;
-template <typename T> class OpPassBase;
-class Region;
-
-/// Displays the CFG in a window. This is for use from the debugger and
-/// depends on Graphviz to generate the graph.
-void viewGraph(Region &region, const Twine &name, bool shortNames = false,
-               const Twine &title = "",
-               llvm::GraphProgram::Name program = llvm::GraphProgram::DOT);
-
-llvm::raw_ostream &writeGraph(llvm::raw_ostream &os, Region &region,
-                              bool shortNames = false, const Twine &title = "");
-
-/// Creates a pass to print CFG graphs.
-std::unique_ptr<mlir::OpPassBase<mlir::FuncOp>>
-createPrintCFGGraphPass(llvm::raw_ostream &os = llvm::errs(),
-                        bool shortNames = false, const llvm::Twine &title = "");
-
-} // end namespace mlir
-
-#endif // MLIR_TRANSFORMS_VIEWFUNCTIONGRAPH_H_
diff --git a/third_party/mlir/include/mlir/Translation.h b/third_party/mlir/include/mlir/Translation.h
deleted file mode 100644
index 0bf8178146a..00000000000
--- a/third_party/mlir/include/mlir/Translation.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//===- Translation.h - Translation registry ---------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Registry for user-provided translations.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_TRANSLATION_H
-#define MLIR_TRANSLATION_H
-
-#include "llvm/ADT/StringMap.h"
-
-#include <memory>
-
-namespace llvm {
-class MemoryBuffer;
-class SourceMgr;
-class StringRef;
-} // namespace llvm
-
-namespace mlir {
-struct LogicalResult;
-class MLIRContext;
-class ModuleOp;
-class OwningModuleRef;
-
-/// Interface of the function that translates the sources managed by `sourceMgr`
-/// to MLIR. The source manager has at least one buffer. The implementation
-/// should create a new MLIR ModuleOp in the given context and return a pointer
-/// to it, or a nullptr in case of any error.
-using TranslateSourceMgrToMLIRFunction =
-    std::function<OwningModuleRef(llvm::SourceMgr &sourceMgr, MLIRContext *)>;
-
-/// Interface of the function that translates the given string to MLIR. The
-/// implementation should create a new MLIR ModuleOp in the given context. If
-/// source-related error reporting is required from within the function, use
-/// TranslateSourceMgrToMLIRFunction instead.
-using TranslateStringRefToMLIRFunction =
-    std::function<OwningModuleRef(llvm::StringRef, MLIRContext *)>;
-
-/// Interface of the function that translates MLIR to a different format and
-/// outputs the result to a stream. It is allowed to modify the module.
-using TranslateFromMLIRFunction =
-    std::function<LogicalResult(ModuleOp, llvm::raw_ostream &output)>;
-
-/// Interface of the function that performs file-to-file translation involving
-/// MLIR. The input file is held in the given MemoryBuffer; the output file
-/// should be written to the given raw_ostream. The implementation should create
-/// all MLIR constructs needed during the process inside the given context. This
-/// can be used for round-tripping external formats through the MLIR system.
-using TranslateFunction = std::function<LogicalResult(
-    llvm::SourceMgr &sourceMgr, llvm::raw_ostream &output, MLIRContext *)>;
-
-/// Use Translate[ToMLIR|FromMLIR]Registration as a global initializer that
-/// registers a function and associates it with name. This requires that a
-/// translation has not been registered to a given name.
-///
-/// Usage:
-///
-///   // At namespace scope.
-///   static TranslateToMLIRRegistration Unused(&MySubCommand, [] { ... });
-///
-/// \{
-struct TranslateToMLIRRegistration {
-  TranslateToMLIRRegistration(llvm::StringRef name,
-                              const TranslateSourceMgrToMLIRFunction &function);
-  TranslateToMLIRRegistration(llvm::StringRef name,
-                              const TranslateStringRefToMLIRFunction &function);
-};
-
-struct TranslateFromMLIRRegistration {
-  TranslateFromMLIRRegistration(llvm::StringRef name,
-                                const TranslateFromMLIRFunction &function);
-};
-struct TranslateRegistration {
-  TranslateRegistration(llvm::StringRef name,
-                        const TranslateFunction &function);
-};
-/// \}
-
-/// Get a read-only reference to the translator registry.
-const llvm::StringMap<TranslateSourceMgrToMLIRFunction> &
-getTranslationToMLIRRegistry();
-const llvm::StringMap<TranslateFromMLIRFunction> &
-getTranslationFromMLIRRegistry();
-const llvm::StringMap<TranslateFunction> &getTranslationRegistry();
-
-} // namespace mlir
-
-#endif // MLIR_TRANSLATION_H
diff --git a/third_party/mlir/lib/Analysis/AffineAnalysis.cpp b/third_party/mlir/lib/Analysis/AffineAnalysis.cpp
deleted file mode 100644
index 9cf7fa897bf..00000000000
--- a/third_party/mlir/lib/Analysis/AffineAnalysis.cpp
+++ /dev/null
@@ -1,896 +0,0 @@
-//===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements miscellaneous analysis routines for affine structures
-// (expressions, maps, sets), and other utilities relying on such analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Support/MathExtras.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "affine-analysis"
-
-using namespace mlir;
-
-using llvm::dbgs;
-
-/// Returns the sequence of AffineApplyOp Operations operation in
-/// 'affineApplyOps', which are reachable via a search starting from 'operands',
-/// and ending at operands which are not defined by AffineApplyOps.
-// TODO(andydavis) Add a method to AffineApplyOp which forward substitutes
-// the AffineApplyOp into any user AffineApplyOps.
-void mlir::getReachableAffineApplyOps(
-    ArrayRef<Value *> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
-  struct State {
-    // The ssa value for this node in the DFS traversal.
-    Value *value;
-    // The operand index of 'value' to explore next during DFS traversal.
-    unsigned operandIndex;
-  };
-  SmallVector<State, 4> worklist;
-  for (auto *operand : operands) {
-    worklist.push_back({operand, 0});
-  }
-
-  while (!worklist.empty()) {
-    State &state = worklist.back();
-    auto *opInst = state.value->getDefiningOp();
-    // Note: getDefiningOp will return nullptr if the operand is not an
-    // Operation (i.e. block argument), which is a terminator for the search.
-    if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
-      worklist.pop_back();
-      continue;
-    }
-
-    if (state.operandIndex == 0) {
-      // Pre-Visit: Add 'opInst' to reachable sequence.
-      affineApplyOps.push_back(opInst);
-    }
-    if (state.operandIndex < opInst->getNumOperands()) {
-      // Visit: Add next 'affineApplyOp' operand to worklist.
-      // Get next operand to visit at 'operandIndex'.
-      auto *nextOperand = opInst->getOperand(state.operandIndex);
-      // Increment 'operandIndex' in 'state'.
-      ++state.operandIndex;
-      // Add 'nextOperand' to worklist.
-      worklist.push_back({nextOperand, 0});
-    } else {
-      // Post-visit: done visiting operands AffineApplyOp, pop off stack.
-      worklist.pop_back();
-    }
-  }
-}
-
-// Builds a system of constraints with dimensional identifiers corresponding to
-// the loop IVs of the forOps appearing in that order. Any symbols founds in
-// the bound operands are added as symbols in the system. Returns failure for
-// the yet unimplemented cases.
-// TODO(andydavis,bondhugula) Handle non-unit steps through local variables or
-// stride information in FlatAffineConstraints. (For eg., by using iv - lb %
-// step = 0 and/or by introducing a method in FlatAffineConstraints
-// setExprStride(ArrayRef<int64_t> expr, int64_t stride)
-LogicalResult mlir::getIndexSet(MutableArrayRef<AffineForOp> forOps,
-                                FlatAffineConstraints *domain) {
-  SmallVector<Value *, 4> indices;
-  extractForInductionVars(forOps, &indices);
-  // Reset while associated Values in 'indices' to the domain.
-  domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
-  for (auto forOp : forOps) {
-    // Add constraints from forOp's bounds.
-    if (failed(domain->addAffineForOpDomain(forOp)))
-      return failure();
-  }
-  return success();
-}
-
-// Computes the iteration domain for 'opInst' and populates 'indexSet', which
-// encapsulates the constraints involving loops surrounding 'opInst' and
-// potentially involving any Function symbols. The dimensional identifiers in
-// 'indexSet' correspond to the loops surrounding 'op' from outermost to
-// innermost.
-// TODO(andydavis) Add support to handle IfInsts surrounding 'op'.
-static LogicalResult getInstIndexSet(Operation *op,
-                                     FlatAffineConstraints *indexSet) {
-  // TODO(andydavis) Extend this to gather enclosing IfInsts and consider
-  // factoring it out into a utility function.
-  SmallVector<AffineForOp, 4> loops;
-  getLoopIVs(*op, &loops);
-  return getIndexSet(loops, indexSet);
-}
-
-// ValuePositionMap manages the mapping from Values which represent dimension
-// and symbol identifiers from 'src' and 'dst' access functions to positions
-// in new space where some Values are kept separate (using addSrc/DstValue)
-// and some Values are merged (addSymbolValue).
-// Position lookups return the absolute position in the new space which
-// has the following format:
-//
-//   [src-dim-identifiers] [dst-dim-identifiers] [symbol-identifiers]
-//
-// Note: access function non-IV dimension identifiers (that have 'dimension'
-// positions in the access function position space) are assigned as symbols
-// in the output position space. Convenience access functions which lookup
-// an Value in multiple maps are provided (i.e. getSrcDimOrSymPos) to handle
-// the common case of resolving positions for all access function operands.
-//
-// TODO(andydavis) Generalize this: could take a template parameter for
-// the number of maps (3 in the current case), and lookups could take indices
-// of maps to check. So getSrcDimOrSymPos would be "getPos(value, {0, 2})".
-class ValuePositionMap {
-public:
-  void addSrcValue(Value *value) {
-    if (addValueAt(value, &srcDimPosMap, numSrcDims))
-      ++numSrcDims;
-  }
-  void addDstValue(Value *value) {
-    if (addValueAt(value, &dstDimPosMap, numDstDims))
-      ++numDstDims;
-  }
-  void addSymbolValue(Value *value) {
-    if (addValueAt(value, &symbolPosMap, numSymbols))
-      ++numSymbols;
-  }
-  unsigned getSrcDimOrSymPos(Value *value) const {
-    return getDimOrSymPos(value, srcDimPosMap, 0);
-  }
-  unsigned getDstDimOrSymPos(Value *value) const {
-    return getDimOrSymPos(value, dstDimPosMap, numSrcDims);
-  }
-  unsigned getSymPos(Value *value) const {
-    auto it = symbolPosMap.find(value);
-    assert(it != symbolPosMap.end());
-    return numSrcDims + numDstDims + it->second;
-  }
-
-  unsigned getNumSrcDims() const { return numSrcDims; }
-  unsigned getNumDstDims() const { return numDstDims; }
-  unsigned getNumDims() const { return numSrcDims + numDstDims; }
-  unsigned getNumSymbols() const { return numSymbols; }
-
-private:
-  bool addValueAt(Value *value, DenseMap<Value *, unsigned> *posMap,
-                  unsigned position) {
-    auto it = posMap->find(value);
-    if (it == posMap->end()) {
-      (*posMap)[value] = position;
-      return true;
-    }
-    return false;
-  }
-  unsigned getDimOrSymPos(Value *value,
-                          const DenseMap<Value *, unsigned> &dimPosMap,
-                          unsigned dimPosOffset) const {
-    auto it = dimPosMap.find(value);
-    if (it != dimPosMap.end()) {
-      return dimPosOffset + it->second;
-    }
-    it = symbolPosMap.find(value);
-    assert(it != symbolPosMap.end());
-    return numSrcDims + numDstDims + it->second;
-  }
-
-  unsigned numSrcDims = 0;
-  unsigned numDstDims = 0;
-  unsigned numSymbols = 0;
-  DenseMap<Value *, unsigned> srcDimPosMap;
-  DenseMap<Value *, unsigned> dstDimPosMap;
-  DenseMap<Value *, unsigned> symbolPosMap;
-};
-
-// Builds a map from Value to identifier position in a new merged identifier
-// list, which is the result of merging dim/symbol lists from src/dst
-// iteration domains, the format of which is as follows:
-//
-//   [src-dim-identifiers, dst-dim-identifiers, symbol-identifiers, const_term]
-//
-// This method populates 'valuePosMap' with mappings from operand Values in
-// 'srcAccessMap'/'dstAccessMap' (as well as those in 'srcDomain'/'dstDomain')
-// to the position of these values in the merged list.
-static void buildDimAndSymbolPositionMaps(
-    const FlatAffineConstraints &srcDomain,
-    const FlatAffineConstraints &dstDomain, const AffineValueMap &srcAccessMap,
-    const AffineValueMap &dstAccessMap, ValuePositionMap *valuePosMap,
-    FlatAffineConstraints *dependenceConstraints) {
-  auto updateValuePosMap = [&](ArrayRef<Value *> values, bool isSrc) {
-    for (unsigned i = 0, e = values.size(); i < e; ++i) {
-      auto *value = values[i];
-      if (!isForInductionVar(values[i])) {
-        assert(isValidSymbol(values[i]) &&
-               "access operand has to be either a loop IV or a symbol");
-        valuePosMap->addSymbolValue(value);
-      } else if (isSrc) {
-        valuePosMap->addSrcValue(value);
-      } else {
-        valuePosMap->addDstValue(value);
-      }
-    }
-  };
-
-  SmallVector<Value *, 4> srcValues, destValues;
-  srcDomain.getIdValues(0, srcDomain.getNumDimAndSymbolIds(), &srcValues);
-  dstDomain.getIdValues(0, dstDomain.getNumDimAndSymbolIds(), &destValues);
-  // Update value position map with identifiers from src iteration domain.
-  updateValuePosMap(srcValues, /*isSrc=*/true);
-  // Update value position map with identifiers from dst iteration domain.
-  updateValuePosMap(destValues, /*isSrc=*/false);
-  // Update value position map with identifiers from src access function.
-  updateValuePosMap(srcAccessMap.getOperands(), /*isSrc=*/true);
-  // Update value position map with identifiers from dst access function.
-  updateValuePosMap(dstAccessMap.getOperands(), /*isSrc=*/false);
-}
-
-// Sets up dependence constraints columns appropriately, in the format:
-// [src-dim-ids, dst-dim-ids, symbol-ids, local-ids, const_term]
-void initDependenceConstraints(const FlatAffineConstraints &srcDomain,
-                               const FlatAffineConstraints &dstDomain,
-                               const AffineValueMap &srcAccessMap,
-                               const AffineValueMap &dstAccessMap,
-                               const ValuePositionMap &valuePosMap,
-                               FlatAffineConstraints *dependenceConstraints) {
-  // Calculate number of equalities/inequalities and columns required to
-  // initialize FlatAffineConstraints for 'dependenceDomain'.
-  unsigned numIneq =
-      srcDomain.getNumInequalities() + dstDomain.getNumInequalities();
-  AffineMap srcMap = srcAccessMap.getAffineMap();
-  assert(srcMap.getNumResults() == dstAccessMap.getAffineMap().getNumResults());
-  unsigned numEq = srcMap.getNumResults();
-  unsigned numDims = srcDomain.getNumDimIds() + dstDomain.getNumDimIds();
-  unsigned numSymbols = valuePosMap.getNumSymbols();
-  unsigned numLocals = srcDomain.getNumLocalIds() + dstDomain.getNumLocalIds();
-  unsigned numIds = numDims + numSymbols + numLocals;
-  unsigned numCols = numIds + 1;
-
-  // Set flat affine constraints sizes and reserving space for constraints.
-  dependenceConstraints->reset(numIneq, numEq, numCols, numDims, numSymbols,
-                               numLocals);
-
-  // Set values corresponding to dependence constraint identifiers.
-  SmallVector<Value *, 4> srcLoopIVs, dstLoopIVs;
-  srcDomain.getIdValues(0, srcDomain.getNumDimIds(), &srcLoopIVs);
-  dstDomain.getIdValues(0, dstDomain.getNumDimIds(), &dstLoopIVs);
-
-  dependenceConstraints->setIdValues(0, srcLoopIVs.size(), srcLoopIVs);
-  dependenceConstraints->setIdValues(
-      srcLoopIVs.size(), srcLoopIVs.size() + dstLoopIVs.size(), dstLoopIVs);
-
-  // Set values for the symbolic identifier dimensions.
-  auto setSymbolIds = [&](ArrayRef<Value *> values) {
-    for (auto *value : values) {
-      if (!isForInductionVar(value)) {
-        assert(isValidSymbol(value) && "expected symbol");
-        dependenceConstraints->setIdValue(valuePosMap.getSymPos(value), value);
-      }
-    }
-  };
-
-  setSymbolIds(srcAccessMap.getOperands());
-  setSymbolIds(dstAccessMap.getOperands());
-
-  SmallVector<Value *, 8> srcSymbolValues, dstSymbolValues;
-  srcDomain.getIdValues(srcDomain.getNumDimIds(),
-                        srcDomain.getNumDimAndSymbolIds(), &srcSymbolValues);
-  dstDomain.getIdValues(dstDomain.getNumDimIds(),
-                        dstDomain.getNumDimAndSymbolIds(), &dstSymbolValues);
-  setSymbolIds(srcSymbolValues);
-  setSymbolIds(dstSymbolValues);
-
-  for (unsigned i = 0, e = dependenceConstraints->getNumDimAndSymbolIds();
-       i < e; i++)
-    assert(dependenceConstraints->getIds()[i].hasValue());
-}
-
-// Adds iteration domain constraints from 'srcDomain' and 'dstDomain' into
-// 'dependenceDomain'.
-// Uses 'valuePosMap' to determine the position in 'dependenceDomain' to which a
-// srcDomain/dstDomain Value maps.
-static void addDomainConstraints(const FlatAffineConstraints &srcDomain,
-                                 const FlatAffineConstraints &dstDomain,
-                                 const ValuePositionMap &valuePosMap,
-                                 FlatAffineConstraints *dependenceDomain) {
-  unsigned depNumDimsAndSymbolIds = dependenceDomain->getNumDimAndSymbolIds();
-
-  SmallVector<int64_t, 4> cst(dependenceDomain->getNumCols());
-
-  auto addDomain = [&](bool isSrc, bool isEq, unsigned localOffset) {
-    const FlatAffineConstraints &domain = isSrc ? srcDomain : dstDomain;
-    unsigned numCsts =
-        isEq ? domain.getNumEqualities() : domain.getNumInequalities();
-    unsigned numDimAndSymbolIds = domain.getNumDimAndSymbolIds();
-    auto at = [&](unsigned i, unsigned j) -> int64_t {
-      return isEq ? domain.atEq(i, j) : domain.atIneq(i, j);
-    };
-    auto map = [&](unsigned i) -> int64_t {
-      return isSrc ? valuePosMap.getSrcDimOrSymPos(domain.getIdValue(i))
-                   : valuePosMap.getDstDimOrSymPos(domain.getIdValue(i));
-    };
-
-    for (unsigned i = 0; i < numCsts; ++i) {
-      // Zero fill.
-      std::fill(cst.begin(), cst.end(), 0);
-      // Set coefficients for identifiers corresponding to domain.
-      for (unsigned j = 0; j < numDimAndSymbolIds; ++j)
-        cst[map(j)] = at(i, j);
-      // Local terms.
-      for (unsigned j = 0, e = domain.getNumLocalIds(); j < e; j++)
-        cst[depNumDimsAndSymbolIds + localOffset + j] =
-            at(i, numDimAndSymbolIds + j);
-      // Set constant term.
-      cst[cst.size() - 1] = at(i, domain.getNumCols() - 1);
-      // Add constraint.
-      if (isEq)
-        dependenceDomain->addEquality(cst);
-      else
-        dependenceDomain->addInequality(cst);
-    }
-  };
-
-  // Add equalities from src domain.
-  addDomain(/*isSrc=*/true, /*isEq=*/true, /*localOffset=*/0);
-  // Add inequalities from src domain.
-  addDomain(/*isSrc=*/true, /*isEq=*/false, /*localOffset=*/0);
-  // Add equalities from dst domain.
-  addDomain(/*isSrc=*/false, /*isEq=*/true,
-            /*localOffset=*/srcDomain.getNumLocalIds());
-  // Add inequalities from dst domain.
-  addDomain(/*isSrc=*/false, /*isEq=*/false,
-            /*localOffset=*/srcDomain.getNumLocalIds());
-}
-
-// Adds equality constraints that equate src and dst access functions
-// represented by 'srcAccessMap' and 'dstAccessMap' for each result.
-// Requires that 'srcAccessMap' and 'dstAccessMap' have the same results count.
-// For example, given the following two accesses functions to a 2D memref:
-//
-//   Source access function:
-//     (a0 * d0 + a1 * s0 + a2, b0 * d0 + b1 * s0 + b2)
-//
-//   Destination access function:
-//     (c0 * d0 + c1 * s0 + c2, f0 * d0 + f1 * s0 + f2)
-//
-// This method constructs the following equality constraints in
-// 'dependenceDomain', by equating the access functions for each result
-// (i.e. each memref dim). Notice that 'd0' for the destination access function
-// is mapped into 'd0' in the equality constraint:
-//
-//   d0      d1      s0         c
-//   --      --      --         --
-//   a0     -c0      (a1 - c1)  (a1 - c2) = 0
-//   b0     -f0      (b1 - f1)  (b1 - f2) = 0
-//
-// Returns failure if any AffineExpr cannot be flattened (due to it being
-// semi-affine). Returns success otherwise.
-static LogicalResult
-addMemRefAccessConstraints(const AffineValueMap &srcAccessMap,
-                           const AffineValueMap &dstAccessMap,
-                           const ValuePositionMap &valuePosMap,
-                           FlatAffineConstraints *dependenceDomain) {
-  AffineMap srcMap = srcAccessMap.getAffineMap();
-  AffineMap dstMap = dstAccessMap.getAffineMap();
-  assert(srcMap.getNumResults() == dstMap.getNumResults());
-  unsigned numResults = srcMap.getNumResults();
-
-  unsigned srcNumIds = srcMap.getNumDims() + srcMap.getNumSymbols();
-  ArrayRef<Value *> srcOperands = srcAccessMap.getOperands();
-
-  unsigned dstNumIds = dstMap.getNumDims() + dstMap.getNumSymbols();
-  ArrayRef<Value *> dstOperands = dstAccessMap.getOperands();
-
-  std::vector<SmallVector<int64_t, 8>> srcFlatExprs;
-  std::vector<SmallVector<int64_t, 8>> destFlatExprs;
-  FlatAffineConstraints srcLocalVarCst, destLocalVarCst;
-  // Get flattened expressions for the source destination maps.
-  if (failed(getFlattenedAffineExprs(srcMap, &srcFlatExprs, &srcLocalVarCst)) ||
-      failed(getFlattenedAffineExprs(dstMap, &destFlatExprs, &destLocalVarCst)))
-    return failure();
-
-  unsigned domNumLocalIds = dependenceDomain->getNumLocalIds();
-  unsigned srcNumLocalIds = srcLocalVarCst.getNumLocalIds();
-  unsigned dstNumLocalIds = destLocalVarCst.getNumLocalIds();
-  unsigned numLocalIdsToAdd = srcNumLocalIds + dstNumLocalIds;
-  for (unsigned i = 0; i < numLocalIdsToAdd; i++) {
-    dependenceDomain->addLocalId(dependenceDomain->getNumLocalIds());
-  }
-
-  unsigned numDims = dependenceDomain->getNumDimIds();
-  unsigned numSymbols = dependenceDomain->getNumSymbolIds();
-  unsigned numSrcLocalIds = srcLocalVarCst.getNumLocalIds();
-  unsigned newLocalIdOffset = numDims + numSymbols + domNumLocalIds;
-
-  // Equality to add.
-  SmallVector<int64_t, 8> eq(dependenceDomain->getNumCols());
-  for (unsigned i = 0; i < numResults; ++i) {
-    // Zero fill.
-    std::fill(eq.begin(), eq.end(), 0);
-
-    // Flattened AffineExpr for src result 'i'.
-    const auto &srcFlatExpr = srcFlatExprs[i];
-    // Set identifier coefficients from src access function.
-    for (unsigned j = 0, e = srcOperands.size(); j < e; ++j)
-      eq[valuePosMap.getSrcDimOrSymPos(srcOperands[j])] = srcFlatExpr[j];
-    // Local terms.
-    for (unsigned j = 0, e = srcNumLocalIds; j < e; j++)
-      eq[newLocalIdOffset + j] = srcFlatExpr[srcNumIds + j];
-    // Set constant term.
-    eq[eq.size() - 1] = srcFlatExpr[srcFlatExpr.size() - 1];
-
-    // Flattened AffineExpr for dest result 'i'.
-    const auto &destFlatExpr = destFlatExprs[i];
-    // Set identifier coefficients from dst access function.
-    for (unsigned j = 0, e = dstOperands.size(); j < e; ++j)
-      eq[valuePosMap.getDstDimOrSymPos(dstOperands[j])] -= destFlatExpr[j];
-    // Local terms.
-    for (unsigned j = 0, e = dstNumLocalIds; j < e; j++)
-      eq[newLocalIdOffset + numSrcLocalIds + j] = -destFlatExpr[dstNumIds + j];
-    // Set constant term.
-    eq[eq.size() - 1] -= destFlatExpr[destFlatExpr.size() - 1];
-
-    // Add equality constraint.
-    dependenceDomain->addEquality(eq);
-  }
-
-  // Add equality constraints for any operands that are defined by constant ops.
-  auto addEqForConstOperands = [&](ArrayRef<Value *> operands) {
-    for (unsigned i = 0, e = operands.size(); i < e; ++i) {
-      if (isForInductionVar(operands[i]))
-        continue;
-      auto *symbol = operands[i];
-      assert(isValidSymbol(symbol));
-      // Check if the symbol is a constant.
-      if (auto cOp = dyn_cast_or_null<ConstantIndexOp>(symbol->getDefiningOp()))
-        dependenceDomain->setIdToConstant(valuePosMap.getSymPos(symbol),
-                                          cOp.getValue());
-    }
-  };
-
-  // Add equality constraints for any src symbols defined by constant ops.
-  addEqForConstOperands(srcOperands);
-  // Add equality constraints for any dst symbols defined by constant ops.
-  addEqForConstOperands(dstOperands);
-
-  // By construction (see flattener), local var constraints will not have any
-  // equalities.
-  assert(srcLocalVarCst.getNumEqualities() == 0 &&
-         destLocalVarCst.getNumEqualities() == 0);
-  // Add inequalities from srcLocalVarCst and destLocalVarCst into the
-  // dependence domain.
-  SmallVector<int64_t, 8> ineq(dependenceDomain->getNumCols());
-  for (unsigned r = 0, e = srcLocalVarCst.getNumInequalities(); r < e; r++) {
-    std::fill(ineq.begin(), ineq.end(), 0);
-
-    // Set identifier coefficients from src local var constraints.
-    for (unsigned j = 0, e = srcOperands.size(); j < e; ++j)
-      ineq[valuePosMap.getSrcDimOrSymPos(srcOperands[j])] =
-          srcLocalVarCst.atIneq(r, j);
-    // Local terms.
-    for (unsigned j = 0, e = srcNumLocalIds; j < e; j++)
-      ineq[newLocalIdOffset + j] = srcLocalVarCst.atIneq(r, srcNumIds + j);
-    // Set constant term.
-    ineq[ineq.size() - 1] =
-        srcLocalVarCst.atIneq(r, srcLocalVarCst.getNumCols() - 1);
-    dependenceDomain->addInequality(ineq);
-  }
-
-  for (unsigned r = 0, e = destLocalVarCst.getNumInequalities(); r < e; r++) {
-    std::fill(ineq.begin(), ineq.end(), 0);
-    // Set identifier coefficients from dest local var constraints.
-    for (unsigned j = 0, e = dstOperands.size(); j < e; ++j)
-      ineq[valuePosMap.getDstDimOrSymPos(dstOperands[j])] =
-          destLocalVarCst.atIneq(r, j);
-    // Local terms.
-    for (unsigned j = 0, e = dstNumLocalIds; j < e; j++)
-      ineq[newLocalIdOffset + numSrcLocalIds + j] =
-          destLocalVarCst.atIneq(r, dstNumIds + j);
-    // Set constant term.
-    ineq[ineq.size() - 1] =
-        destLocalVarCst.atIneq(r, destLocalVarCst.getNumCols() - 1);
-
-    dependenceDomain->addInequality(ineq);
-  }
-  return success();
-}
-
-// Returns the number of outer loop common to 'src/dstDomain'.
-// Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
-static unsigned
-getNumCommonLoops(const FlatAffineConstraints &srcDomain,
-                  const FlatAffineConstraints &dstDomain,
-                  SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
-  // Find the number of common loops shared by src and dst accesses.
-  unsigned minNumLoops =
-      std::min(srcDomain.getNumDimIds(), dstDomain.getNumDimIds());
-  unsigned numCommonLoops = 0;
-  for (unsigned i = 0; i < minNumLoops; ++i) {
-    if (!isForInductionVar(srcDomain.getIdValue(i)) ||
-        !isForInductionVar(dstDomain.getIdValue(i)) ||
-        srcDomain.getIdValue(i) != dstDomain.getIdValue(i))
-      break;
-    if (commonLoops != nullptr)
-      commonLoops->push_back(getForInductionVarOwner(srcDomain.getIdValue(i)));
-    ++numCommonLoops;
-  }
-  if (commonLoops != nullptr)
-    assert(commonLoops->size() == numCommonLoops);
-  return numCommonLoops;
-}
-
-// Returns Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
-static Block *getCommonBlock(const MemRefAccess &srcAccess,
-                             const MemRefAccess &dstAccess,
-                             const FlatAffineConstraints &srcDomain,
-                             unsigned numCommonLoops) {
-  if (numCommonLoops == 0) {
-    auto *block = srcAccess.opInst->getBlock();
-    while (!llvm::isa<FuncOp>(block->getParentOp())) {
-      block = block->getParentOp()->getBlock();
-    }
-    return block;
-  }
-  auto *commonForValue = srcDomain.getIdValue(numCommonLoops - 1);
-  auto forOp = getForInductionVarOwner(commonForValue);
-  assert(forOp && "commonForValue was not an induction variable");
-  return forOp.getBody();
-}
-
-// Returns true if the ancestor operation of 'srcAccess' appears before the
-// ancestor operation of 'dstAccess' in the common ancestral block. Returns
-// false otherwise.
-// Note that because 'srcAccess' or 'dstAccess' may be nested in conditionals,
-// the function is named 'srcAppearsBeforeDstInCommonBlock'. Note that
-// 'numCommonLoops' is the number of contiguous surrounding outer loops.
-static bool srcAppearsBeforeDstInAncestralBlock(
-    const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
-    const FlatAffineConstraints &srcDomain, unsigned numCommonLoops) {
-  // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
-  auto *commonBlock =
-      getCommonBlock(srcAccess, dstAccess, srcDomain, numCommonLoops);
-  // Check the dominance relationship between the respective ancestors of the
-  // src and dst in the Block of the innermost among the common loops.
-  auto *srcInst = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
-  assert(srcInst != nullptr);
-  auto *dstInst = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
-  assert(dstInst != nullptr);
-
-  // Determine whether dstInst comes after srcInst.
-  return srcInst->isBeforeInBlock(dstInst);
-}
-
-// Adds ordering constraints to 'dependenceDomain' based on number of loops
-// common to 'src/dstDomain' and requested 'loopDepth'.
-// Note that 'loopDepth' cannot exceed the number of common loops plus one.
-// EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
-// *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
-// *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
-// *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
-static void addOrderingConstraints(const FlatAffineConstraints &srcDomain,
-                                   const FlatAffineConstraints &dstDomain,
-                                   unsigned loopDepth,
-                                   FlatAffineConstraints *dependenceDomain) {
-  unsigned numCols = dependenceDomain->getNumCols();
-  SmallVector<int64_t, 4> eq(numCols);
-  unsigned numSrcDims = srcDomain.getNumDimIds();
-  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
-  unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
-  for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
-    std::fill(eq.begin(), eq.end(), 0);
-    eq[i] = -1;
-    eq[i + numSrcDims] = 1;
-    if (i == loopDepth - 1) {
-      eq[numCols - 1] = -1;
-      dependenceDomain->addInequality(eq);
-    } else {
-      dependenceDomain->addEquality(eq);
-    }
-  }
-}
-
-// Computes distance and direction vectors in 'dependences', by adding
-// variables to 'dependenceDomain' which represent the difference of the IVs,
-// eliminating all other variables, and reading off distance vectors from
-// equality constraints (if possible), and direction vectors from inequalities.
-static void computeDirectionVector(
-    const FlatAffineConstraints &srcDomain,
-    const FlatAffineConstraints &dstDomain, unsigned loopDepth,
-    FlatAffineConstraints *dependenceDomain,
-    llvm::SmallVector<DependenceComponent, 2> *dependenceComponents) {
-  // Find the number of common loops shared by src and dst accesses.
-  SmallVector<AffineForOp, 4> commonLoops;
-  unsigned numCommonLoops =
-      getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
-  if (numCommonLoops == 0)
-    return;
-  // Compute direction vectors for requested loop depth.
-  unsigned numIdsToEliminate = dependenceDomain->getNumIds();
-  // Add new variables to 'dependenceDomain' to represent the direction
-  // constraints for each shared loop.
-  for (unsigned j = 0; j < numCommonLoops; ++j) {
-    dependenceDomain->addDimId(j);
-  }
-
-  // Add equality constraints for each common loop, setting newly introduced
-  // variable at column 'j' to the 'dst' IV minus the 'src IV.
-  SmallVector<int64_t, 4> eq;
-  eq.resize(dependenceDomain->getNumCols());
-  unsigned numSrcDims = srcDomain.getNumDimIds();
-  // Constraint variables format:
-  // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
-  for (unsigned j = 0; j < numCommonLoops; ++j) {
-    std::fill(eq.begin(), eq.end(), 0);
-    eq[j] = 1;
-    eq[j + numCommonLoops] = 1;
-    eq[j + numCommonLoops + numSrcDims] = -1;
-    dependenceDomain->addEquality(eq);
-  }
-
-  // Eliminate all variables other than the direction variables just added.
-  dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
-
-  // Scan each common loop variable column and set direction vectors based
-  // on eliminated constraint system.
-  dependenceComponents->resize(numCommonLoops);
-  for (unsigned j = 0; j < numCommonLoops; ++j) {
-    (*dependenceComponents)[j].op = commonLoops[j].getOperation();
-    auto lbConst = dependenceDomain->getConstantLowerBound(j);
-    (*dependenceComponents)[j].lb =
-        lbConst.getValueOr(std::numeric_limits<int64_t>::min());
-    auto ubConst = dependenceDomain->getConstantUpperBound(j);
-    (*dependenceComponents)[j].ub =
-        ubConst.getValueOr(std::numeric_limits<int64_t>::max());
-  }
-}
-
-// Populates 'accessMap' with composition of AffineApplyOps reachable from
-// indices of MemRefAccess.
-void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
-  // Get affine map from AffineLoad/Store.
-  AffineMap map;
-  if (auto loadOp = dyn_cast<AffineLoadOp>(opInst))
-    map = loadOp.getAffineMap();
-  else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst))
-    map = storeOp.getAffineMap();
-  SmallVector<Value *, 8> operands(indices.begin(), indices.end());
-  fullyComposeAffineMapAndOperands(&map, &operands);
-  map = simplifyAffineMap(map);
-  canonicalizeMapAndOperands(&map, &operands);
-  accessMap->reset(map, operands);
-}
-
-// Builds a flat affine constraint system to check if there exists a dependence
-// between memref accesses 'srcAccess' and 'dstAccess'.
-// Returns 'NoDependence' if the accesses can be definitively shown not to
-// access the same element.
-// Returns 'HasDependence' if the accesses do access the same element.
-// Returns 'Failure' if an error or unsupported case was encountered.
-// If a dependence exists, returns in 'dependenceComponents' a direction
-// vector for the dependence, with a component for each loop IV in loops
-// common to both accesses (see Dependence in AffineAnalysis.h for details).
-//
-// The memref access dependence check is comprised of the following steps:
-// *) Compute access functions for each access. Access functions are computed
-//    using AffineValueMaps initialized with the indices from an access, then
-//    composed with AffineApplyOps reachable from operands of that access,
-//    until operands of the AffineValueMap are loop IVs or symbols.
-// *) Build iteration domain constraints for each access. Iteration domain
-//    constraints are pairs of inequality constraints representing the
-//    upper/lower loop bounds for each AffineForOp in the loop nest associated
-//    with each access.
-// *) Build dimension and symbol position maps for each access, which map
-//    Values from access functions and iteration domains to their position
-//    in the merged constraint system built by this method.
-//
-// This method builds a constraint system with the following column format:
-//
-//  [src-dim-identifiers, dst-dim-identifiers, symbols, constant]
-//
-// For example, given the following MLIR code with "source" and "destination"
-// accesses to the same memref label, and symbols %M, %N, %K:
-//
-//   affine.for %i0 = 0 to 100 {
-//     affine.for %i1 = 0 to 50 {
-//       %a0 = affine.apply
-//         (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
-//       // Source memref access.
-//       store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
-//     }
-//   }
-//
-//   affine.for %i2 = 0 to 100 {
-//     affine.for %i3 = 0 to 50 {
-//       %a1 = affine.apply
-//         (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
-//       // Destination memref access.
-//       %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
-//     }
-//   }
-//
-// The access functions would be the following:
-//
-//   src: (%i0 * 2 - %i1 * 4 + %N, %i1 * 3 - %M)
-//   dst: (%i2 * 7 + %i3 * 9 - %M, %i3 * 11 - %K)
-//
-// The iteration domains for the src/dst accesses would be the following:
-//
-//   src: 0 <= %i0 <= 100, 0 <= %i1 <= 50
-//   dst: 0 <= %i2 <= 100, 0 <= %i3 <= 50
-//
-// The symbols by both accesses would be assigned to a canonical position order
-// which will be used in the dependence constraint system:
-//
-//   symbol name: %M  %N  %K
-//   symbol  pos:  0   1   2
-//
-// Equality constraints are built by equating each result of src/destination
-// access functions. For this example, the following two equality constraints
-// will be added to the dependence constraint system:
-//
-//   [src_dim0, src_dim1, dst_dim0, dst_dim1, sym0, sym1, sym2, const]
-//      2         -4        -7        -9       1      1     0     0    = 0
-//      0          3         0        -11     -1      0     1     0    = 0
-//
-// Inequality constraints from the iteration domain will be meged into
-// the dependence constraint system
-//
-//   [src_dim0, src_dim1, dst_dim0, dst_dim1, sym0, sym1, sym2, const]
-//       1         0         0         0        0     0     0     0    >= 0
-//      -1         0         0         0        0     0     0     100  >= 0
-//       0         1         0         0        0     0     0     0    >= 0
-//       0        -1         0         0        0     0     0     50   >= 0
-//       0         0         1         0        0     0     0     0    >= 0
-//       0         0        -1         0        0     0     0     100  >= 0
-//       0         0         0         1        0     0     0     0    >= 0
-//       0         0         0        -1        0     0     0     50   >= 0
-//
-//
-// TODO(andydavis) Support AffineExprs mod/floordiv/ceildiv.
-DependenceResult mlir::checkMemrefAccessDependence(
-    const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
-    unsigned loopDepth, FlatAffineConstraints *dependenceConstraints,
-    llvm::SmallVector<DependenceComponent, 2> *dependenceComponents,
-    bool allowRAR) {
-  LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
-                          << Twine(loopDepth) << " between:\n";);
-  LLVM_DEBUG(srcAccess.opInst->dump(););
-  LLVM_DEBUG(dstAccess.opInst->dump(););
-
-  // Return 'NoDependence' if these accesses do not access the same memref.
-  if (srcAccess.memref != dstAccess.memref)
-    return DependenceResult::NoDependence;
-
-  // Return 'NoDependence' if one of these accesses is not an AffineStoreOp.
-  if (!allowRAR && !isa<AffineStoreOp>(srcAccess.opInst) &&
-      !isa<AffineStoreOp>(dstAccess.opInst))
-    return DependenceResult::NoDependence;
-
-  // Get composed access function for 'srcAccess'.
-  AffineValueMap srcAccessMap;
-  srcAccess.getAccessMap(&srcAccessMap);
-
-  // Get composed access function for 'dstAccess'.
-  AffineValueMap dstAccessMap;
-  dstAccess.getAccessMap(&dstAccessMap);
-
-  // Get iteration domain for the 'srcAccess' operation.
-  FlatAffineConstraints srcDomain;
-  if (failed(getInstIndexSet(srcAccess.opInst, &srcDomain)))
-    return DependenceResult::Failure;
-
-  // Get iteration domain for 'dstAccess' operation.
-  FlatAffineConstraints dstDomain;
-  if (failed(getInstIndexSet(dstAccess.opInst, &dstDomain)))
-    return DependenceResult::Failure;
-
-  // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
-  // operation of 'srcAccess' does not properly dominate the ancestor
-  // operation of 'dstAccess' in the same common operation block.
-  // Note: this check is skipped if 'allowRAR' is true, because because RAR
-  // deps can exist irrespective of lexicographic ordering b/w src and dst.
-  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
-  assert(loopDepth <= numCommonLoops + 1);
-  if (!allowRAR && loopDepth > numCommonLoops &&
-      !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess, srcDomain,
-                                           numCommonLoops)) {
-    return DependenceResult::NoDependence;
-  }
-  // Build dim and symbol position maps for each access from access operand
-  // Value to position in merged constraint system.
-  ValuePositionMap valuePosMap;
-  buildDimAndSymbolPositionMaps(srcDomain, dstDomain, srcAccessMap,
-                                dstAccessMap, &valuePosMap,
-                                dependenceConstraints);
-
-  initDependenceConstraints(srcDomain, dstDomain, srcAccessMap, dstAccessMap,
-                            valuePosMap, dependenceConstraints);
-
-  assert(valuePosMap.getNumDims() ==
-         srcDomain.getNumDimIds() + dstDomain.getNumDimIds());
-
-  // Create memref access constraint by equating src/dst access functions.
-  // Note that this check is conservative, and will fail in the future when
-  // local variables for mod/div exprs are supported.
-  if (failed(addMemRefAccessConstraints(srcAccessMap, dstAccessMap, valuePosMap,
-                                        dependenceConstraints)))
-    return DependenceResult::Failure;
-
-  // Add 'src' happens before 'dst' ordering constraints.
-  addOrderingConstraints(srcDomain, dstDomain, loopDepth,
-                         dependenceConstraints);
-  // Add src and dst domain constraints.
-  addDomainConstraints(srcDomain, dstDomain, valuePosMap,
-                       dependenceConstraints);
-
-  // Return 'NoDependence' if the solution space is empty: no dependence.
-  if (dependenceConstraints->isEmpty()) {
-    return DependenceResult::NoDependence;
-  }
-
-  // Compute dependence direction vector and return true.
-  if (dependenceComponents != nullptr) {
-    computeDirectionVector(srcDomain, dstDomain, loopDepth,
-                           dependenceConstraints, dependenceComponents);
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
-  LLVM_DEBUG(dependenceConstraints->dump());
-  return DependenceResult::HasDependence;
-}
-
-/// Gathers dependence components for dependences between all ops in loop nest
-/// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
-void mlir::getDependenceComponents(
-    AffineForOp forOp, unsigned maxLoopDepth,
-    std::vector<llvm::SmallVector<DependenceComponent, 2>> *depCompsVec) {
-  // Collect all load and store ops in loop nest rooted at 'forOp'.
-  SmallVector<Operation *, 8> loadAndStoreOpInsts;
-  forOp.getOperation()->walk([&](Operation *opInst) {
-    if (isa<AffineLoadOp>(opInst) || isa<AffineStoreOp>(opInst))
-      loadAndStoreOpInsts.push_back(opInst);
-  });
-
-  unsigned numOps = loadAndStoreOpInsts.size();
-  for (unsigned d = 1; d <= maxLoopDepth; ++d) {
-    for (unsigned i = 0; i < numOps; ++i) {
-      auto *srcOpInst = loadAndStoreOpInsts[i];
-      MemRefAccess srcAccess(srcOpInst);
-      for (unsigned j = 0; j < numOps; ++j) {
-        auto *dstOpInst = loadAndStoreOpInsts[j];
-        MemRefAccess dstAccess(dstOpInst);
-
-        FlatAffineConstraints dependenceConstraints;
-        llvm::SmallVector<DependenceComponent, 2> depComps;
-        // TODO(andydavis,bondhugula) Explore whether it would be profitable
-        // to pre-compute and store deps instead of repeatedly checking.
-        DependenceResult result = checkMemrefAccessDependence(
-            srcAccess, dstAccess, d, &dependenceConstraints, &depComps);
-        if (hasDependence(result))
-          depCompsVec->push_back(depComps);
-      }
-    }
-  }
-}
diff --git a/third_party/mlir/lib/Analysis/AffineStructures.cpp b/third_party/mlir/lib/Analysis/AffineStructures.cpp
deleted file mode 100644
index 7f6da8eb418..00000000000
--- a/third_party/mlir/lib/Analysis/AffineStructures.cpp
+++ /dev/null
@@ -1,2861 +0,0 @@
-//===- AffineStructures.cpp - MLIR Affine Structures Class-----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Structures for affine/polyhedral analysis of MLIR functions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "affine-structures"
-
-using namespace mlir;
-using llvm::SmallDenseMap;
-using llvm::SmallDenseSet;
-using llvm::SmallPtrSet;
-
-namespace {
-
-// See comments for SimpleAffineExprFlattener.
-// An AffineExprFlattener extends a SimpleAffineExprFlattener by recording
-// constraint information associated with mod's, floordiv's, and ceildiv's
-// in FlatAffineConstraints 'localVarCst'.
-struct AffineExprFlattener : public SimpleAffineExprFlattener {
-public:
-  // Constraints connecting newly introduced local variables (for mod's and
-  // div's) to existing (dimensional and symbolic) ones. These are always
-  // inequalities.
-  FlatAffineConstraints localVarCst;
-
-  AffineExprFlattener(unsigned nDims, unsigned nSymbols, MLIRContext *ctx)
-      : SimpleAffineExprFlattener(nDims, nSymbols) {
-    localVarCst.reset(nDims, nSymbols, /*numLocals=*/0);
-  }
-
-private:
-  // Add a local identifier (needed to flatten a mod, floordiv, ceildiv expr).
-  // The local identifier added is always a floordiv of a pure add/mul affine
-  // function of other identifiers, coefficients of which are specified in
-  // `dividend' and with respect to the positive constant `divisor'. localExpr
-  // is the simplified tree expression (AffineExpr) corresponding to the
-  // quantifier.
-  void addLocalFloorDivId(ArrayRef<int64_t> dividend, int64_t divisor,
-                          AffineExpr localExpr) override {
-    SimpleAffineExprFlattener::addLocalFloorDivId(dividend, divisor, localExpr);
-    // Update localVarCst.
-    localVarCst.addLocalFloorDiv(dividend, divisor);
-  }
-};
-
-} // end anonymous namespace
-
-// Flattens the expressions in map. Returns failure if 'expr' was unable to be
-// flattened (i.e., semi-affine expressions not handled yet).
-static LogicalResult getFlattenedAffineExprs(
-    ArrayRef<AffineExpr> exprs, unsigned numDims, unsigned numSymbols,
-    std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *localVarCst) {
-  if (exprs.empty()) {
-    localVarCst->reset(numDims, numSymbols);
-    return success();
-  }
-
-  AffineExprFlattener flattener(numDims, numSymbols, exprs[0].getContext());
-  // Use the same flattener to simplify each expression successively. This way
-  // local identifiers / expressions are shared.
-  for (auto expr : exprs) {
-    if (!expr.isPureAffine())
-      return failure();
-
-    flattener.walkPostOrder(expr);
-  }
-
-  assert(flattener.operandExprStack.size() == exprs.size());
-  flattenedExprs->clear();
-  flattenedExprs->assign(flattener.operandExprStack.begin(),
-                         flattener.operandExprStack.end());
-
-  if (localVarCst) {
-    localVarCst->clearAndCopyFrom(flattener.localVarCst);
-  }
-
-  return success();
-}
-
-// Flattens 'expr' into 'flattenedExpr'. Returns failure if 'expr' was unable to
-// be flattened (semi-affine expressions not handled yet).
-LogicalResult
-mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
-                             unsigned numSymbols,
-                             llvm::SmallVectorImpl<int64_t> *flattenedExpr,
-                             FlatAffineConstraints *localVarCst) {
-  std::vector<SmallVector<int64_t, 8>> flattenedExprs;
-  LogicalResult ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols,
-                                                &flattenedExprs, localVarCst);
-  *flattenedExpr = flattenedExprs[0];
-  return ret;
-}
-
-/// Flattens the expressions in map. Returns failure if 'expr' was unable to be
-/// flattened (i.e., semi-affine expressions not handled yet).
-LogicalResult mlir::getFlattenedAffineExprs(
-    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *localVarCst) {
-  if (map.getNumResults() == 0) {
-    localVarCst->reset(map.getNumDims(), map.getNumSymbols());
-    return success();
-  }
-  return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(),
-                                   map.getNumSymbols(), flattenedExprs,
-                                   localVarCst);
-}
-
-LogicalResult mlir::getFlattenedAffineExprs(
-    IntegerSet set, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *localVarCst) {
-  if (set.getNumConstraints() == 0) {
-    localVarCst->reset(set.getNumDims(), set.getNumSymbols());
-    return success();
-  }
-  return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(),
-                                   set.getNumSymbols(), flattenedExprs,
-                                   localVarCst);
-}
-
-//===----------------------------------------------------------------------===//
-// MutableAffineMap.
-//===----------------------------------------------------------------------===//
-
-MutableAffineMap::MutableAffineMap(AffineMap map)
-    : numDims(map.getNumDims()), numSymbols(map.getNumSymbols()),
-      // A map always has at least 1 result by construction
-      context(map.getResult(0).getContext()) {
-  for (auto result : map.getResults())
-    results.push_back(result);
-}
-
-void MutableAffineMap::reset(AffineMap map) {
-  results.clear();
-  numDims = map.getNumDims();
-  numSymbols = map.getNumSymbols();
-  // A map always has at least 1 result by construction
-  context = map.getResult(0).getContext();
-  for (auto result : map.getResults())
-    results.push_back(result);
-}
-
-bool MutableAffineMap::isMultipleOf(unsigned idx, int64_t factor) const {
-  if (results[idx].isMultipleOf(factor))
-    return true;
-
-  // TODO(bondhugula): use simplifyAffineExpr and FlatAffineConstraints to
-  // complete this (for a more powerful analysis).
-  return false;
-}
-
-// Simplifies the result affine expressions of this map. The expressions have to
-// be pure for the simplification implemented.
-void MutableAffineMap::simplify() {
-  // Simplify each of the results if possible.
-  // TODO(ntv): functional-style map
-  for (unsigned i = 0, e = getNumResults(); i < e; i++) {
-    results[i] = simplifyAffineExpr(getResult(i), numDims, numSymbols);
-  }
-}
-
-AffineMap MutableAffineMap::getAffineMap() const {
-  return AffineMap::get(numDims, numSymbols, results);
-}
-
-MutableIntegerSet::MutableIntegerSet(IntegerSet set, MLIRContext *context)
-    : numDims(set.getNumDims()), numSymbols(set.getNumSymbols()) {
-  // TODO(bondhugula)
-}
-
-// Universal set.
-MutableIntegerSet::MutableIntegerSet(unsigned numDims, unsigned numSymbols,
-                                     MLIRContext *context)
-    : numDims(numDims), numSymbols(numSymbols) {}
-
-//===----------------------------------------------------------------------===//
-// AffineValueMap.
-//===----------------------------------------------------------------------===//
-
-AffineValueMap::AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
-                               ArrayRef<Value *> results)
-    : map(map), operands(operands.begin(), operands.end()),
-      results(results.begin(), results.end()) {}
-
-AffineValueMap::AffineValueMap(AffineApplyOp applyOp)
-    : map(applyOp.getAffineMap()),
-      operands(applyOp.operand_begin(), applyOp.operand_end()) {
-  results.push_back(applyOp.getResult());
-}
-
-AffineValueMap::AffineValueMap(AffineBound bound)
-    : map(bound.getMap()),
-      operands(bound.operand_begin(), bound.operand_end()) {}
-
-void AffineValueMap::reset(AffineMap map, ArrayRef<Value *> operands,
-                           ArrayRef<Value *> results) {
-  this->map.reset(map);
-  this->operands.assign(operands.begin(), operands.end());
-  this->results.assign(results.begin(), results.end());
-}
-
-void AffineValueMap::difference(const AffineValueMap &a,
-                                const AffineValueMap &b, AffineValueMap *res) {
-  assert(a.getNumResults() == b.getNumResults() && "invalid inputs");
-
-  // Fully compose A's map + operands.
-  auto aMap = a.getAffineMap();
-  SmallVector<Value *, 4> aOperands(a.getOperands().begin(),
-                                    a.getOperands().end());
-  fullyComposeAffineMapAndOperands(&aMap, &aOperands);
-
-  // Use the affine apply normalizer to get B's map into A's coordinate space.
-  AffineApplyNormalizer normalizer(aMap, aOperands);
-  SmallVector<Value *, 4> bOperands(b.getOperands().begin(),
-                                    b.getOperands().end());
-  auto bMap = b.getAffineMap();
-  normalizer.normalize(&bMap, &bOperands);
-
-  assert(std::equal(bOperands.begin(), bOperands.end(),
-                    normalizer.getOperands().begin()) &&
-         "operands are expected to be the same after normalization");
-
-  // Construct the difference expressions.
-  SmallVector<AffineExpr, 4> diffExprs;
-  diffExprs.reserve(a.getNumResults());
-  for (unsigned i = 0, e = bMap.getNumResults(); i < e; ++i)
-    diffExprs.push_back(normalizer.getAffineMap().getResult(i) -
-                        bMap.getResult(i));
-
-  auto diffMap = AffineMap::get(normalizer.getNumDims(),
-                                normalizer.getNumSymbols(), diffExprs);
-  canonicalizeMapAndOperands(&diffMap, &bOperands);
-  diffMap = simplifyAffineMap(diffMap);
-  res->reset(diffMap, bOperands);
-}
-
-// Returns true and sets 'indexOfMatch' if 'valueToMatch' is found in
-// 'valuesToSearch' beginning at 'indexStart'. Returns false otherwise.
-static bool findIndex(Value *valueToMatch, ArrayRef<Value *> valuesToSearch,
-                      unsigned indexStart, unsigned *indexOfMatch) {
-  unsigned size = valuesToSearch.size();
-  for (unsigned i = indexStart; i < size; ++i) {
-    if (valueToMatch == valuesToSearch[i]) {
-      *indexOfMatch = i;
-      return true;
-    }
-  }
-  return false;
-}
-
-inline bool AffineValueMap::isMultipleOf(unsigned idx, int64_t factor) const {
-  return map.isMultipleOf(idx, factor);
-}
-
-/// This method uses the invariant that operands are always positionally aligned
-/// with the AffineDimExpr in the underlying AffineMap.
-bool AffineValueMap::isFunctionOf(unsigned idx, Value *value) const {
-  unsigned index;
-  if (!findIndex(value, operands, /*indexStart=*/0, &index)) {
-    return false;
-  }
-  auto expr = const_cast<AffineValueMap *>(this)->getAffineMap().getResult(idx);
-  // TODO(ntv): this is better implemented on a flattened representation.
-  // At least for now it is conservative.
-  return expr.isFunctionOfDim(index);
-}
-
-Value *AffineValueMap::getOperand(unsigned i) const {
-  return static_cast<Value *>(operands[i]);
-}
-
-ArrayRef<Value *> AffineValueMap::getOperands() const {
-  return ArrayRef<Value *>(operands);
-}
-
-AffineMap AffineValueMap::getAffineMap() const { return map.getAffineMap(); }
-
-AffineValueMap::~AffineValueMap() {}
-
-//===----------------------------------------------------------------------===//
-// FlatAffineConstraints.
-//===----------------------------------------------------------------------===//
-
-// Copy constructor.
-FlatAffineConstraints::FlatAffineConstraints(
-    const FlatAffineConstraints &other) {
-  numReservedCols = other.numReservedCols;
-  numDims = other.getNumDimIds();
-  numSymbols = other.getNumSymbolIds();
-  numIds = other.getNumIds();
-
-  auto otherIds = other.getIds();
-  ids.reserve(numReservedCols);
-  ids.append(otherIds.begin(), otherIds.end());
-
-  unsigned numReservedEqualities = other.getNumReservedEqualities();
-  unsigned numReservedInequalities = other.getNumReservedInequalities();
-
-  equalities.reserve(numReservedEqualities * numReservedCols);
-  inequalities.reserve(numReservedInequalities * numReservedCols);
-
-  for (unsigned r = 0, e = other.getNumInequalities(); r < e; r++) {
-    addInequality(other.getInequality(r));
-  }
-  for (unsigned r = 0, e = other.getNumEqualities(); r < e; r++) {
-    addEquality(other.getEquality(r));
-  }
-}
-
-// Clones this object.
-std::unique_ptr<FlatAffineConstraints> FlatAffineConstraints::clone() const {
-  return std::make_unique<FlatAffineConstraints>(*this);
-}
-
-// Construct from an IntegerSet.
-FlatAffineConstraints::FlatAffineConstraints(IntegerSet set)
-    : numReservedCols(set.getNumInputs() + 1),
-      numIds(set.getNumDims() + set.getNumSymbols()), numDims(set.getNumDims()),
-      numSymbols(set.getNumSymbols()) {
-  equalities.reserve(set.getNumEqualities() * numReservedCols);
-  inequalities.reserve(set.getNumInequalities() * numReservedCols);
-  ids.resize(numIds, None);
-
-  // Flatten expressions and add them to the constraint system.
-  std::vector<SmallVector<int64_t, 8>> flatExprs;
-  FlatAffineConstraints localVarCst;
-  if (failed(getFlattenedAffineExprs(set, &flatExprs, &localVarCst))) {
-    assert(false && "flattening unimplemented for semi-affine integer sets");
-    return;
-  }
-  assert(flatExprs.size() == set.getNumConstraints());
-  for (unsigned l = 0, e = localVarCst.getNumLocalIds(); l < e; l++) {
-    addLocalId(getNumLocalIds());
-  }
-
-  for (unsigned i = 0, e = flatExprs.size(); i < e; ++i) {
-    const auto &flatExpr = flatExprs[i];
-    assert(flatExpr.size() == getNumCols());
-    if (set.getEqFlags()[i]) {
-      addEquality(flatExpr);
-    } else {
-      addInequality(flatExpr);
-    }
-  }
-  // Add the other constraints involving local id's from flattening.
-  append(localVarCst);
-}
-
-void FlatAffineConstraints::reset(unsigned numReservedInequalities,
-                                  unsigned numReservedEqualities,
-                                  unsigned newNumReservedCols,
-                                  unsigned newNumDims, unsigned newNumSymbols,
-                                  unsigned newNumLocals,
-                                  ArrayRef<Value *> idArgs) {
-  assert(newNumReservedCols >= newNumDims + newNumSymbols + newNumLocals + 1 &&
-         "minimum 1 column");
-  numReservedCols = newNumReservedCols;
-  numDims = newNumDims;
-  numSymbols = newNumSymbols;
-  numIds = numDims + numSymbols + newNumLocals;
-  assert(idArgs.empty() || idArgs.size() == numIds);
-
-  clearConstraints();
-  if (numReservedEqualities >= 1)
-    equalities.reserve(newNumReservedCols * numReservedEqualities);
-  if (numReservedInequalities >= 1)
-    inequalities.reserve(newNumReservedCols * numReservedInequalities);
-  if (idArgs.empty()) {
-    ids.resize(numIds, None);
-  } else {
-    ids.assign(idArgs.begin(), idArgs.end());
-  }
-}
-
-void FlatAffineConstraints::reset(unsigned newNumDims, unsigned newNumSymbols,
-                                  unsigned newNumLocals,
-                                  ArrayRef<Value *> idArgs) {
-  reset(0, 0, newNumDims + newNumSymbols + newNumLocals + 1, newNumDims,
-        newNumSymbols, newNumLocals, idArgs);
-}
-
-void FlatAffineConstraints::append(const FlatAffineConstraints &other) {
-  assert(other.getNumCols() == getNumCols());
-  assert(other.getNumDimIds() == getNumDimIds());
-  assert(other.getNumSymbolIds() == getNumSymbolIds());
-
-  inequalities.reserve(inequalities.size() +
-                       other.getNumInequalities() * numReservedCols);
-  equalities.reserve(equalities.size() +
-                     other.getNumEqualities() * numReservedCols);
-
-  for (unsigned r = 0, e = other.getNumInequalities(); r < e; r++) {
-    addInequality(other.getInequality(r));
-  }
-  for (unsigned r = 0, e = other.getNumEqualities(); r < e; r++) {
-    addEquality(other.getEquality(r));
-  }
-}
-
-void FlatAffineConstraints::addLocalId(unsigned pos) {
-  addId(IdKind::Local, pos);
-}
-
-void FlatAffineConstraints::addDimId(unsigned pos, Value *id) {
-  addId(IdKind::Dimension, pos, id);
-}
-
-void FlatAffineConstraints::addSymbolId(unsigned pos, Value *id) {
-  addId(IdKind::Symbol, pos, id);
-}
-
-/// Adds a dimensional identifier. The added column is initialized to
-/// zero.
-void FlatAffineConstraints::addId(IdKind kind, unsigned pos, Value *id) {
-  if (kind == IdKind::Dimension) {
-    assert(pos <= getNumDimIds());
-  } else if (kind == IdKind::Symbol) {
-    assert(pos <= getNumSymbolIds());
-  } else {
-    assert(pos <= getNumLocalIds());
-  }
-
-  unsigned oldNumReservedCols = numReservedCols;
-
-  // Check if a resize is necessary.
-  if (getNumCols() + 1 > numReservedCols) {
-    equalities.resize(getNumEqualities() * (getNumCols() + 1));
-    inequalities.resize(getNumInequalities() * (getNumCols() + 1));
-    numReservedCols++;
-  }
-
-  int absolutePos;
-
-  if (kind == IdKind::Dimension) {
-    absolutePos = pos;
-    numDims++;
-  } else if (kind == IdKind::Symbol) {
-    absolutePos = pos + getNumDimIds();
-    numSymbols++;
-  } else {
-    absolutePos = pos + getNumDimIds() + getNumSymbolIds();
-  }
-  numIds++;
-
-  // Note that getNumCols() now will already return the new size, which will be
-  // at least one.
-  int numInequalities = static_cast<int>(getNumInequalities());
-  int numEqualities = static_cast<int>(getNumEqualities());
-  int numCols = static_cast<int>(getNumCols());
-  for (int r = numInequalities - 1; r >= 0; r--) {
-    for (int c = numCols - 2; c >= 0; c--) {
-      if (c < absolutePos)
-        atIneq(r, c) = inequalities[r * oldNumReservedCols + c];
-      else
-        atIneq(r, c + 1) = inequalities[r * oldNumReservedCols + c];
-    }
-    atIneq(r, absolutePos) = 0;
-  }
-
-  for (int r = numEqualities - 1; r >= 0; r--) {
-    for (int c = numCols - 2; c >= 0; c--) {
-      // All values in column absolutePositions < absolutePos have the same
-      // coordinates in the 2-d view of the coefficient buffer.
-      if (c < absolutePos)
-        atEq(r, c) = equalities[r * oldNumReservedCols + c];
-      else
-        // Those at absolutePosition >= absolutePos, get a shifted
-        // absolutePosition.
-        atEq(r, c + 1) = equalities[r * oldNumReservedCols + c];
-    }
-    // Initialize added dimension to zero.
-    atEq(r, absolutePos) = 0;
-  }
-
-  // If an 'id' is provided, insert it; otherwise use None.
-  if (id) {
-    ids.insert(ids.begin() + absolutePos, id);
-  } else {
-    ids.insert(ids.begin() + absolutePos, None);
-  }
-  assert(ids.size() == getNumIds());
-}
-
-/// Checks if two constraint systems are in the same space, i.e., if they are
-/// associated with the same set of identifiers, appearing in the same order.
-static bool areIdsAligned(const FlatAffineConstraints &A,
-                          const FlatAffineConstraints &B) {
-  return A.getNumDimIds() == B.getNumDimIds() &&
-         A.getNumSymbolIds() == B.getNumSymbolIds() &&
-         A.getNumIds() == B.getNumIds() && A.getIds().equals(B.getIds());
-}
-
-/// Calls areIdsAligned to check if two constraint systems have the same set
-/// of identifiers in the same order.
-bool FlatAffineConstraints::areIdsAlignedWithOther(
-    const FlatAffineConstraints &other) {
-  return areIdsAligned(*this, other);
-}
-
-/// Checks if the SSA values associated with `cst''s identifiers are unique.
-static bool LLVM_ATTRIBUTE_UNUSED
-areIdsUnique(const FlatAffineConstraints &cst) {
-  SmallPtrSet<Value *, 8> uniqueIds;
-  for (auto id : cst.getIds()) {
-    if (id.hasValue() && !uniqueIds.insert(id.getValue()).second)
-      return false;
-  }
-  return true;
-}
-
-// Swap the posA^th identifier with the posB^th identifier.
-static void swapId(FlatAffineConstraints *A, unsigned posA, unsigned posB) {
-  assert(posA < A->getNumIds() && "invalid position A");
-  assert(posB < A->getNumIds() && "invalid position B");
-
-  if (posA == posB)
-    return;
-
-  for (unsigned r = 0, e = A->getNumInequalities(); r < e; r++) {
-    std::swap(A->atIneq(r, posA), A->atIneq(r, posB));
-  }
-  for (unsigned r = 0, e = A->getNumEqualities(); r < e; r++) {
-    std::swap(A->atEq(r, posA), A->atEq(r, posB));
-  }
-  std::swap(A->getId(posA), A->getId(posB));
-}
-
-/// Merge and align the identifiers of A and B starting at 'offset', so that
-/// both constraint systems get the union of the contained identifiers that is
-/// dimension-wise and symbol-wise unique; both constraint systems are updated
-/// so that they have the union of all identifiers, with A's original
-/// identifiers appearing first followed by any of B's identifiers that didn't
-/// appear in A. Local identifiers of each system are by design separate/local
-/// and are placed one after other (A's followed by B's).
-//  Eg: Input: A has ((%i %j) [%M %N]) and B has (%k, %j) [%P, %N, %M])
-//      Output: both A, B have (%i, %j, %k) [%M, %N, %P]
-//
-static void mergeAndAlignIds(unsigned offset, FlatAffineConstraints *A,
-                             FlatAffineConstraints *B) {
-  assert(offset <= A->getNumDimIds() && offset <= B->getNumDimIds());
-  // A merge/align isn't meaningful if a cst's ids aren't distinct.
-  assert(areIdsUnique(*A) && "A's id values aren't unique");
-  assert(areIdsUnique(*B) && "B's id values aren't unique");
-
-  assert(std::all_of(A->getIds().begin() + offset,
-                     A->getIds().begin() + A->getNumDimAndSymbolIds(),
-                     [](Optional<Value *> id) { return id.hasValue(); }));
-
-  assert(std::all_of(B->getIds().begin() + offset,
-                     B->getIds().begin() + B->getNumDimAndSymbolIds(),
-                     [](Optional<Value *> id) { return id.hasValue(); }));
-
-  // Place local id's of A after local id's of B.
-  for (unsigned l = 0, e = A->getNumLocalIds(); l < e; l++) {
-    B->addLocalId(0);
-  }
-  for (unsigned t = 0, e = B->getNumLocalIds() - A->getNumLocalIds(); t < e;
-       t++) {
-    A->addLocalId(A->getNumLocalIds());
-  }
-
-  SmallVector<Value *, 4> aDimValues, aSymValues;
-  A->getIdValues(offset, A->getNumDimIds(), &aDimValues);
-  A->getIdValues(A->getNumDimIds(), A->getNumDimAndSymbolIds(), &aSymValues);
-  {
-    // Merge dims from A into B.
-    unsigned d = offset;
-    for (auto *aDimValue : aDimValues) {
-      unsigned loc;
-      if (B->findId(*aDimValue, &loc)) {
-        assert(loc >= offset && "A's dim appears in B's aligned range");
-        assert(loc < B->getNumDimIds() &&
-               "A's dim appears in B's non-dim position");
-        swapId(B, d, loc);
-      } else {
-        B->addDimId(d);
-        B->setIdValue(d, aDimValue);
-      }
-      d++;
-    }
-
-    // Dimensions that are in B, but not in A, are added at the end.
-    for (unsigned t = A->getNumDimIds(), e = B->getNumDimIds(); t < e; t++) {
-      A->addDimId(A->getNumDimIds());
-      A->setIdValue(A->getNumDimIds() - 1, B->getIdValue(t));
-    }
-  }
-  {
-    // Merge symbols: merge A's symbols into B first.
-    unsigned s = B->getNumDimIds();
-    for (auto *aSymValue : aSymValues) {
-      unsigned loc;
-      if (B->findId(*aSymValue, &loc)) {
-        assert(loc >= B->getNumDimIds() && loc < B->getNumDimAndSymbolIds() &&
-               "A's symbol appears in B's non-symbol position");
-        swapId(B, s, loc);
-      } else {
-        B->addSymbolId(s - B->getNumDimIds());
-        B->setIdValue(s, aSymValue);
-      }
-      s++;
-    }
-    // Symbols that are in B, but not in A, are added at the end.
-    for (unsigned t = A->getNumDimAndSymbolIds(),
-                  e = B->getNumDimAndSymbolIds();
-         t < e; t++) {
-      A->addSymbolId(A->getNumSymbolIds());
-      A->setIdValue(A->getNumDimAndSymbolIds() - 1, B->getIdValue(t));
-    }
-  }
-  assert(areIdsAligned(*A, *B) && "IDs expected to be aligned");
-}
-
-// Call 'mergeAndAlignIds' to align constraint systems of 'this' and 'other'.
-void FlatAffineConstraints::mergeAndAlignIdsWithOther(
-    unsigned offset, FlatAffineConstraints *other) {
-  mergeAndAlignIds(offset, this, other);
-}
-
-// This routine may add additional local variables if the flattened expression
-// corresponding to the map has such variables due to mod's, ceildiv's, and
-// floordiv's in it.
-LogicalResult FlatAffineConstraints::composeMap(const AffineValueMap *vMap) {
-  std::vector<SmallVector<int64_t, 8>> flatExprs;
-  FlatAffineConstraints localCst;
-  if (failed(getFlattenedAffineExprs(vMap->getAffineMap(), &flatExprs,
-                                     &localCst))) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "composition unimplemented for semi-affine maps\n");
-    return failure();
-  }
-  assert(flatExprs.size() == vMap->getNumResults());
-
-  // Add localCst information.
-  if (localCst.getNumLocalIds() > 0) {
-    localCst.setIdValues(0, /*end=*/localCst.getNumDimAndSymbolIds(),
-                         /*values=*/vMap->getOperands());
-    // Align localCst and this.
-    mergeAndAlignIds(/*offset=*/0, &localCst, this);
-    // Finally, append localCst to this constraint set.
-    append(localCst);
-  }
-
-  // Add dimensions corresponding to the map's results.
-  for (unsigned t = 0, e = vMap->getNumResults(); t < e; t++) {
-    // TODO: Consider using a batched version to add a range of IDs.
-    addDimId(0);
-  }
-
-  // We add one equality for each result connecting the result dim of the map to
-  // the other identifiers.
-  // For eg: if the expression is 16*i0 + i1, and this is the r^th
-  // iteration/result of the value map, we are adding the equality:
-  //  d_r - 16*i0 - i1 = 0. Hence, when flattening say (i0 + 1, i0 + 8*i2), we
-  //  add two equalities overall: d_0 - i0 - 1 == 0, d1 - i0 - 8*i2 == 0.
-  for (unsigned r = 0, e = flatExprs.size(); r < e; r++) {
-    const auto &flatExpr = flatExprs[r];
-    assert(flatExpr.size() >= vMap->getNumOperands() + 1);
-
-    // eqToAdd is the equality corresponding to the flattened affine expression.
-    SmallVector<int64_t, 8> eqToAdd(getNumCols(), 0);
-    // Set the coefficient for this result to one.
-    eqToAdd[r] = 1;
-
-    // Dims and symbols.
-    for (unsigned i = 0, e = vMap->getNumOperands(); i < e; i++) {
-      unsigned loc;
-      bool ret = findId(*vMap->getOperand(i), &loc);
-      assert(ret && "value map's id can't be found");
-      (void)ret;
-      // Negate 'eq[r]' since the newly added dimension will be set to this one.
-      eqToAdd[loc] = -flatExpr[i];
-    }
-    // Local vars common to eq and localCst are at the beginning.
-    unsigned j = getNumDimIds() + getNumSymbolIds();
-    unsigned end = flatExpr.size() - 1;
-    for (unsigned i = vMap->getNumOperands(); i < end; i++, j++) {
-      eqToAdd[j] = -flatExpr[i];
-    }
-
-    // Constant term.
-    eqToAdd[getNumCols() - 1] = -flatExpr[flatExpr.size() - 1];
-
-    // Add the equality connecting the result of the map to this constraint set.
-    addEquality(eqToAdd);
-  }
-
-  return success();
-}
-
-// Similar to composeMap except that no Value's need be associated with the
-// constraint system nor are they looked at -- since the dimensions and
-// symbols of 'other' are expected to correspond 1:1 to 'this' system. It
-// is thus not convenient to share code with composeMap.
-LogicalResult FlatAffineConstraints::composeMatchingMap(AffineMap other) {
-  assert(other.getNumDims() == getNumDimIds() && "dim mismatch");
-  assert(other.getNumSymbols() == getNumSymbolIds() && "symbol mismatch");
-
-  std::vector<SmallVector<int64_t, 8>> flatExprs;
-  FlatAffineConstraints localCst;
-  if (failed(getFlattenedAffineExprs(other, &flatExprs, &localCst))) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "composition unimplemented for semi-affine maps\n");
-    return failure();
-  }
-  assert(flatExprs.size() == other.getNumResults());
-
-  // Add localCst information.
-  if (localCst.getNumLocalIds() > 0) {
-    // Place local id's of A after local id's of B.
-    for (unsigned l = 0, e = localCst.getNumLocalIds(); l < e; l++) {
-      addLocalId(0);
-    }
-    // Finally, append localCst to this constraint set.
-    append(localCst);
-  }
-
-  // Add dimensions corresponding to the map's results.
-  for (unsigned t = 0, e = other.getNumResults(); t < e; t++) {
-    addDimId(0);
-  }
-
-  // We add one equality for each result connecting the result dim of the map to
-  // the other identifiers.
-  // For eg: if the expression is 16*i0 + i1, and this is the r^th
-  // iteration/result of the value map, we are adding the equality:
-  //  d_r - 16*i0 - i1 = 0. Hence, when flattening say (i0 + 1, i0 + 8*i2), we
-  //  add two equalities overall: d_0 - i0 - 1 == 0, d1 - i0 - 8*i2 == 0.
-  for (unsigned r = 0, e = flatExprs.size(); r < e; r++) {
-    const auto &flatExpr = flatExprs[r];
-    assert(flatExpr.size() >= other.getNumInputs() + 1);
-
-    // eqToAdd is the equality corresponding to the flattened affine expression.
-    SmallVector<int64_t, 8> eqToAdd(getNumCols(), 0);
-    // Set the coefficient for this result to one.
-    eqToAdd[r] = 1;
-
-    // Dims and symbols.
-    for (unsigned i = 0, f = other.getNumInputs(); i < f; i++) {
-      // Negate 'eq[r]' since the newly added dimension will be set to this one.
-      eqToAdd[e + i] = -flatExpr[i];
-    }
-    // Local vars common to eq and localCst are at the beginning.
-    unsigned j = getNumDimIds() + getNumSymbolIds();
-    unsigned end = flatExpr.size() - 1;
-    for (unsigned i = other.getNumInputs(); i < end; i++, j++) {
-      eqToAdd[j] = -flatExpr[i];
-    }
-
-    // Constant term.
-    eqToAdd[getNumCols() - 1] = -flatExpr[flatExpr.size() - 1];
-
-    // Add the equality connecting the result of the map to this constraint set.
-    addEquality(eqToAdd);
-  }
-
-  return success();
-}
-
-// Turn a dimension into a symbol.
-static void turnDimIntoSymbol(FlatAffineConstraints *cst, Value &id) {
-  unsigned pos;
-  if (cst->findId(id, &pos) && pos < cst->getNumDimIds()) {
-    swapId(cst, pos, cst->getNumDimIds() - 1);
-    cst->setDimSymbolSeparation(cst->getNumSymbolIds() + 1);
-  }
-}
-
-// Turn a symbol into a dimension.
-static void turnSymbolIntoDim(FlatAffineConstraints *cst, Value &id) {
-  unsigned pos;
-  if (cst->findId(id, &pos) && pos >= cst->getNumDimIds() &&
-      pos < cst->getNumDimAndSymbolIds()) {
-    swapId(cst, pos, cst->getNumDimIds());
-    cst->setDimSymbolSeparation(cst->getNumSymbolIds() - 1);
-  }
-}
-
-// Changes all symbol identifiers which are loop IVs to dim identifiers.
-void FlatAffineConstraints::convertLoopIVSymbolsToDims() {
-  // Gather all symbols which are loop IVs.
-  SmallVector<Value *, 4> loopIVs;
-  for (unsigned i = getNumDimIds(), e = getNumDimAndSymbolIds(); i < e; i++) {
-    if (ids[i].hasValue() && getForInductionVarOwner(ids[i].getValue()))
-      loopIVs.push_back(ids[i].getValue());
-  }
-  // Turn each symbol in 'loopIVs' into a dim identifier.
-  for (auto *iv : loopIVs) {
-    turnSymbolIntoDim(this, *iv);
-  }
-}
-
-void FlatAffineConstraints::addInductionVarOrTerminalSymbol(Value *id) {
-  if (containsId(*id))
-    return;
-
-  // Caller is expected to fully compose map/operands if necessary.
-  assert((isTopLevelValue(id) || isForInductionVar(id)) &&
-         "non-terminal symbol / loop IV expected");
-  // Outer loop IVs could be used in forOp's bounds.
-  if (auto loop = getForInductionVarOwner(id)) {
-    addDimId(getNumDimIds(), id);
-    if (failed(this->addAffineForOpDomain(loop)))
-      LLVM_DEBUG(
-          loop.emitWarning("failed to add domain info to constraint system"));
-    return;
-  }
-  // Add top level symbol.
-  addSymbolId(getNumSymbolIds(), id);
-  // Check if the symbol is a constant.
-  if (auto constOp = dyn_cast_or_null<ConstantIndexOp>(id->getDefiningOp()))
-    setIdToConstant(*id, constOp.getValue());
-}
-
-LogicalResult FlatAffineConstraints::addAffineForOpDomain(AffineForOp forOp) {
-  unsigned pos;
-  // Pre-condition for this method.
-  if (!findId(*forOp.getInductionVar(), &pos)) {
-    assert(false && "Value not found");
-    return failure();
-  }
-
-  int64_t step = forOp.getStep();
-  if (step != 1) {
-    if (!forOp.hasConstantLowerBound())
-      forOp.emitWarning("domain conservatively approximated");
-    else {
-      // Add constraints for the stride.
-      // (iv - lb) % step = 0 can be written as:
-      // (iv - lb) - step * q = 0 where q = (iv - lb) / step.
-      // Add local variable 'q' and add the above equality.
-      // The first constraint is q = (iv - lb) floordiv step
-      SmallVector<int64_t, 8> dividend(getNumCols(), 0);
-      int64_t lb = forOp.getConstantLowerBound();
-      dividend[pos] = 1;
-      dividend.back() -= lb;
-      addLocalFloorDiv(dividend, step);
-      // Second constraint: (iv - lb) - step * q = 0.
-      SmallVector<int64_t, 8> eq(getNumCols(), 0);
-      eq[pos] = 1;
-      eq.back() -= lb;
-      // For the local var just added above.
-      eq[getNumCols() - 2] = -step;
-      addEquality(eq);
-    }
-  }
-
-  if (forOp.hasConstantLowerBound()) {
-    addConstantLowerBound(pos, forOp.getConstantLowerBound());
-  } else {
-    // Non-constant lower bound case.
-    SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands().begin(),
-                                       forOp.getLowerBoundOperands().end());
-    if (failed(addLowerOrUpperBound(pos, forOp.getLowerBoundMap(), lbOperands,
-                                    /*eq=*/false, /*lower=*/true)))
-      return failure();
-  }
-
-  if (forOp.hasConstantUpperBound()) {
-    addConstantUpperBound(pos, forOp.getConstantUpperBound() - 1);
-    return success();
-  }
-  // Non-constant upper bound case.
-  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands().begin(),
-                                     forOp.getUpperBoundOperands().end());
-  return addLowerOrUpperBound(pos, forOp.getUpperBoundMap(), ubOperands,
-                              /*eq=*/false, /*lower=*/false);
-}
-
-// Searches for a constraint with a non-zero coefficient at 'colIdx' in
-// equality (isEq=true) or inequality (isEq=false) constraints.
-// Returns true and sets row found in search in 'rowIdx'.
-// Returns false otherwise.
-static bool
-findConstraintWithNonZeroAt(const FlatAffineConstraints &constraints,
-                            unsigned colIdx, bool isEq, unsigned *rowIdx) {
-  auto at = [&](unsigned rowIdx) -> int64_t {
-    return isEq ? constraints.atEq(rowIdx, colIdx)
-                : constraints.atIneq(rowIdx, colIdx);
-  };
-  unsigned e =
-      isEq ? constraints.getNumEqualities() : constraints.getNumInequalities();
-  for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) {
-    if (at(*rowIdx) != 0) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Normalizes the coefficient values across all columns in 'rowIDx' by their
-// GCD in equality or inequality constraints as specified by 'isEq'.
-template <bool isEq>
-static void normalizeConstraintByGCD(FlatAffineConstraints *constraints,
-                                     unsigned rowIdx) {
-  auto at = [&](unsigned colIdx) -> int64_t {
-    return isEq ? constraints->atEq(rowIdx, colIdx)
-                : constraints->atIneq(rowIdx, colIdx);
-  };
-  uint64_t gcd = std::abs(at(0));
-  for (unsigned j = 1, e = constraints->getNumCols(); j < e; ++j) {
-    gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(at(j)));
-  }
-  if (gcd > 0 && gcd != 1) {
-    for (unsigned j = 0, e = constraints->getNumCols(); j < e; ++j) {
-      int64_t v = at(j) / static_cast<int64_t>(gcd);
-      isEq ? constraints->atEq(rowIdx, j) = v
-           : constraints->atIneq(rowIdx, j) = v;
-    }
-  }
-}
-
-void FlatAffineConstraints::normalizeConstraintsByGCD() {
-  for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
-    normalizeConstraintByGCD</*isEq=*/true>(this, i);
-  }
-  for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
-    normalizeConstraintByGCD</*isEq=*/false>(this, i);
-  }
-}
-
-bool FlatAffineConstraints::hasConsistentState() const {
-  if (inequalities.size() != getNumInequalities() * numReservedCols)
-    return false;
-  if (equalities.size() != getNumEqualities() * numReservedCols)
-    return false;
-  if (ids.size() != getNumIds())
-    return false;
-
-  // Catches errors where numDims, numSymbols, numIds aren't consistent.
-  if (numDims > numIds || numSymbols > numIds || numDims + numSymbols > numIds)
-    return false;
-
-  return true;
-}
-
-/// Checks all rows of equality/inequality constraints for trivial
-/// contradictions (for example: 1 == 0, 0 >= 1), which may have surfaced
-/// after elimination. Returns 'true' if an invalid constraint is found;
-/// 'false' otherwise.
-bool FlatAffineConstraints::hasInvalidConstraint() const {
-  assert(hasConsistentState());
-  auto check = [&](bool isEq) -> bool {
-    unsigned numCols = getNumCols();
-    unsigned numRows = isEq ? getNumEqualities() : getNumInequalities();
-    for (unsigned i = 0, e = numRows; i < e; ++i) {
-      unsigned j;
-      for (j = 0; j < numCols - 1; ++j) {
-        int64_t v = isEq ? atEq(i, j) : atIneq(i, j);
-        // Skip rows with non-zero variable coefficients.
-        if (v != 0)
-          break;
-      }
-      if (j < numCols - 1) {
-        continue;
-      }
-      // Check validity of constant term at 'numCols - 1' w.r.t 'isEq'.
-      // Example invalid constraints include: '1 == 0' or '-1 >= 0'
-      int64_t v = isEq ? atEq(i, numCols - 1) : atIneq(i, numCols - 1);
-      if ((isEq && v != 0) || (!isEq && v < 0)) {
-        return true;
-      }
-    }
-    return false;
-  };
-  if (check(/*isEq=*/true))
-    return true;
-  return check(/*isEq=*/false);
-}
-
-// Eliminate identifier from constraint at 'rowIdx' based on coefficient at
-// pivotRow, pivotCol. Columns in range [elimColStart, pivotCol) will not be
-// updated as they have already been eliminated.
-static void eliminateFromConstraint(FlatAffineConstraints *constraints,
-                                    unsigned rowIdx, unsigned pivotRow,
-                                    unsigned pivotCol, unsigned elimColStart,
-                                    bool isEq) {
-  // Skip if equality 'rowIdx' if same as 'pivotRow'.
-  if (isEq && rowIdx == pivotRow)
-    return;
-  auto at = [&](unsigned i, unsigned j) -> int64_t {
-    return isEq ? constraints->atEq(i, j) : constraints->atIneq(i, j);
-  };
-  int64_t leadCoeff = at(rowIdx, pivotCol);
-  // Skip if leading coefficient at 'rowIdx' is already zero.
-  if (leadCoeff == 0)
-    return;
-  int64_t pivotCoeff = constraints->atEq(pivotRow, pivotCol);
-  int64_t sign = (leadCoeff * pivotCoeff > 0) ? -1 : 1;
-  int64_t lcm = mlir::lcm(pivotCoeff, leadCoeff);
-  int64_t pivotMultiplier = sign * (lcm / std::abs(pivotCoeff));
-  int64_t rowMultiplier = lcm / std::abs(leadCoeff);
-
-  unsigned numCols = constraints->getNumCols();
-  for (unsigned j = 0; j < numCols; ++j) {
-    // Skip updating column 'j' if it was just eliminated.
-    if (j >= elimColStart && j < pivotCol)
-      continue;
-    int64_t v = pivotMultiplier * constraints->atEq(pivotRow, j) +
-                rowMultiplier * at(rowIdx, j);
-    isEq ? constraints->atEq(rowIdx, j) = v
-         : constraints->atIneq(rowIdx, j) = v;
-  }
-}
-
-// Remove coefficients in column range [colStart, colLimit) in place.
-// This removes in data in the specified column range, and copies any
-// remaining valid data into place.
-static void shiftColumnsToLeft(FlatAffineConstraints *constraints,
-                               unsigned colStart, unsigned colLimit,
-                               bool isEq) {
-  assert(colLimit <= constraints->getNumIds());
-  if (colLimit <= colStart)
-    return;
-
-  unsigned numCols = constraints->getNumCols();
-  unsigned numRows = isEq ? constraints->getNumEqualities()
-                          : constraints->getNumInequalities();
-  unsigned numToEliminate = colLimit - colStart;
-  for (unsigned r = 0, e = numRows; r < e; ++r) {
-    for (unsigned c = colLimit; c < numCols; ++c) {
-      if (isEq) {
-        constraints->atEq(r, c - numToEliminate) = constraints->atEq(r, c);
-      } else {
-        constraints->atIneq(r, c - numToEliminate) = constraints->atIneq(r, c);
-      }
-    }
-  }
-}
-
-// Removes identifiers in column range [idStart, idLimit), and copies any
-// remaining valid data into place, and updates member variables.
-void FlatAffineConstraints::removeIdRange(unsigned idStart, unsigned idLimit) {
-  assert(idLimit < getNumCols() && "invalid id limit");
-
-  if (idStart >= idLimit)
-    return;
-
-  // We are going to be removing one or more identifiers from the range.
-  assert(idStart < numIds && "invalid idStart position");
-
-  // TODO(andydavis) Make 'removeIdRange' a lambda called from here.
-  // Remove eliminated identifiers from equalities.
-  shiftColumnsToLeft(this, idStart, idLimit, /*isEq=*/true);
-
-  // Remove eliminated identifiers from inequalities.
-  shiftColumnsToLeft(this, idStart, idLimit, /*isEq=*/false);
-
-  // Update members numDims, numSymbols and numIds.
-  unsigned numDimsEliminated = 0;
-  unsigned numLocalsEliminated = 0;
-  unsigned numColsEliminated = idLimit - idStart;
-  if (idStart < numDims) {
-    numDimsEliminated = std::min(numDims, idLimit) - idStart;
-  }
-  // Check how many local id's were removed. Note that our identifier order is
-  // [dims, symbols, locals]. Local id start at position numDims + numSymbols.
-  if (idLimit > numDims + numSymbols) {
-    numLocalsEliminated = std::min(
-        idLimit - std::max(idStart, numDims + numSymbols), getNumLocalIds());
-  }
-  unsigned numSymbolsEliminated =
-      numColsEliminated - numDimsEliminated - numLocalsEliminated;
-
-  numDims -= numDimsEliminated;
-  numSymbols -= numSymbolsEliminated;
-  numIds = numIds - numColsEliminated;
-
-  ids.erase(ids.begin() + idStart, ids.begin() + idLimit);
-
-  // No resize necessary. numReservedCols remains the same.
-}
-
-/// Returns the position of the identifier that has the minimum <number of lower
-/// bounds> times <number of upper bounds> from the specified range of
-/// identifiers [start, end). It is often best to eliminate in the increasing
-/// order of these counts when doing Fourier-Motzkin elimination since FM adds
-/// that many new constraints.
-static unsigned getBestIdToEliminate(const FlatAffineConstraints &cst,
-                                     unsigned start, unsigned end) {
-  assert(start < cst.getNumIds() && end < cst.getNumIds() + 1);
-
-  auto getProductOfNumLowerUpperBounds = [&](unsigned pos) {
-    unsigned numLb = 0;
-    unsigned numUb = 0;
-    for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
-      if (cst.atIneq(r, pos) > 0) {
-        ++numLb;
-      } else if (cst.atIneq(r, pos) < 0) {
-        ++numUb;
-      }
-    }
-    return numLb * numUb;
-  };
-
-  unsigned minLoc = start;
-  unsigned min = getProductOfNumLowerUpperBounds(start);
-  for (unsigned c = start + 1; c < end; c++) {
-    unsigned numLbUbProduct = getProductOfNumLowerUpperBounds(c);
-    if (numLbUbProduct < min) {
-      min = numLbUbProduct;
-      minLoc = c;
-    }
-  }
-  return minLoc;
-}
-
-// Checks for emptiness of the set by eliminating identifiers successively and
-// using the GCD test (on all equality constraints) and checking for trivially
-// invalid constraints. Returns 'true' if the constraint system is found to be
-// empty; false otherwise.
-bool FlatAffineConstraints::isEmpty() const {
-  if (isEmptyByGCDTest() || hasInvalidConstraint())
-    return true;
-
-  // First, eliminate as many identifiers as possible using Gaussian
-  // elimination.
-  FlatAffineConstraints tmpCst(*this);
-  unsigned currentPos = 0;
-  while (currentPos < tmpCst.getNumIds()) {
-    tmpCst.gaussianEliminateIds(currentPos, tmpCst.getNumIds());
-    ++currentPos;
-    // We check emptiness through trivial checks after eliminating each ID to
-    // detect emptiness early. Since the checks isEmptyByGCDTest() and
-    // hasInvalidConstraint() are linear time and single sweep on the constraint
-    // buffer, this appears reasonable - but can optimize in the future.
-    if (tmpCst.hasInvalidConstraint() || tmpCst.isEmptyByGCDTest())
-      return true;
-  }
-
-  // Eliminate the remaining using FM.
-  for (unsigned i = 0, e = tmpCst.getNumIds(); i < e; i++) {
-    tmpCst.FourierMotzkinEliminate(
-        getBestIdToEliminate(tmpCst, 0, tmpCst.getNumIds()));
-    // Check for a constraint explosion. This rarely happens in practice, but
-    // this check exists as a safeguard against improperly constructed
-    // constraint systems or artificially created arbitrarily complex systems
-    // that aren't the intended use case for FlatAffineConstraints. This is
-    // needed since FM has a worst case exponential complexity in theory.
-    if (tmpCst.getNumConstraints() >= kExplosionFactor * getNumIds()) {
-      LLVM_DEBUG(llvm::dbgs() << "FM constraint explosion detected\n");
-      return false;
-    }
-
-    // FM wouldn't have modified the equalities in any way. So no need to again
-    // run GCD test. Check for trivial invalid constraints.
-    if (tmpCst.hasInvalidConstraint())
-      return true;
-  }
-  return false;
-}
-
-// Runs the GCD test on all equality constraints. Returns 'true' if this test
-// fails on any equality. Returns 'false' otherwise.
-// This test can be used to disprove the existence of a solution. If it returns
-// true, no integer solution to the equality constraints can exist.
-//
-// GCD test definition:
-//
-// The equality constraint:
-//
-//  c_1*x_1 + c_2*x_2 + ... + c_n*x_n = c_0
-//
-// has an integer solution iff:
-//
-//  GCD of c_1, c_2, ..., c_n divides c_0.
-//
-bool FlatAffineConstraints::isEmptyByGCDTest() const {
-  assert(hasConsistentState());
-  unsigned numCols = getNumCols();
-  for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
-    uint64_t gcd = std::abs(atEq(i, 0));
-    for (unsigned j = 1; j < numCols - 1; ++j) {
-      gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(atEq(i, j)));
-    }
-    int64_t v = std::abs(atEq(i, numCols - 1));
-    if (gcd > 0 && (v % gcd != 0)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-/// Tightens inequalities given that we are dealing with integer spaces. This is
-/// analogous to the GCD test but applied to inequalities. The constant term can
-/// be reduced to the preceding multiple of the GCD of the coefficients, i.e.,
-///  64*i - 100 >= 0  =>  64*i - 128 >= 0 (since 'i' is an integer). This is a
-/// fast method - linear in the number of coefficients.
-// Example on how this affects practical cases: consider the scenario:
-// 64*i >= 100, j = 64*i; without a tightening, elimination of i would yield
-// j >= 100 instead of the tighter (exact) j >= 128.
-void FlatAffineConstraints::GCDTightenInequalities() {
-  unsigned numCols = getNumCols();
-  for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
-    uint64_t gcd = std::abs(atIneq(i, 0));
-    for (unsigned j = 1; j < numCols - 1; ++j) {
-      gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(atIneq(i, j)));
-    }
-    if (gcd > 0 && gcd != 1) {
-      int64_t gcdI = static_cast<int64_t>(gcd);
-      // Tighten the constant term and normalize the constraint by the GCD.
-      atIneq(i, numCols - 1) = mlir::floorDiv(atIneq(i, numCols - 1), gcdI);
-      for (unsigned j = 0, e = numCols - 1; j < e; ++j)
-        atIneq(i, j) /= gcdI;
-    }
-  }
-}
-
-// Eliminates all identifier variables in column range [posStart, posLimit).
-// Returns the number of variables eliminated.
-unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart,
-                                                     unsigned posLimit) {
-  // Return if identifier positions to eliminate are out of range.
-  assert(posLimit <= numIds);
-  assert(hasConsistentState());
-
-  if (posStart >= posLimit)
-    return 0;
-
-  GCDTightenInequalities();
-
-  unsigned pivotCol = 0;
-  for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) {
-    // Find a row which has a non-zero coefficient in column 'j'.
-    unsigned pivotRow;
-    if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true,
-                                     &pivotRow)) {
-      // No pivot row in equalities with non-zero at 'pivotCol'.
-      if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false,
-                                       &pivotRow)) {
-        // If inequalities are also non-zero in 'pivotCol', it can be
-        // eliminated.
-        continue;
-      }
-      break;
-    }
-
-    // Eliminate identifier at 'pivotCol' from each equality row.
-    for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
-      eliminateFromConstraint(this, i, pivotRow, pivotCol, posStart,
-                              /*isEq=*/true);
-      normalizeConstraintByGCD</*isEq=*/true>(this, i);
-    }
-
-    // Eliminate identifier at 'pivotCol' from each inequality row.
-    for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
-      eliminateFromConstraint(this, i, pivotRow, pivotCol, posStart,
-                              /*isEq=*/false);
-      normalizeConstraintByGCD</*isEq=*/false>(this, i);
-    }
-    removeEquality(pivotRow);
-    GCDTightenInequalities();
-  }
-  // Update position limit based on number eliminated.
-  posLimit = pivotCol;
-  // Remove eliminated columns from all constraints.
-  removeIdRange(posStart, posLimit);
-  return posLimit - posStart;
-}
-
-// Detect the identifier at 'pos' (say id_r) as modulo of another identifier
-// (say id_n) w.r.t a constant. When this happens, another identifier (say id_q)
-// could be detected as the floordiv of n. For eg:
-// id_n - 4*id_q - id_r = 0, 0 <= id_r <= 3    <=>
-//                          id_r = id_n mod 4, id_q = id_n floordiv 4.
-// lbConst and ubConst are the constant lower and upper bounds for 'pos' -
-// pre-detected at the caller.
-static bool detectAsMod(const FlatAffineConstraints &cst, unsigned pos,
-                        int64_t lbConst, int64_t ubConst,
-                        SmallVectorImpl<AffineExpr> *memo) {
-  assert(pos < cst.getNumIds() && "invalid position");
-
-  // Check if 0 <= id_r <= divisor - 1 and if id_r is equal to
-  // id_n - divisor * id_q. If these are true, then id_n becomes the dividend
-  // and id_q the quotient when dividing id_n by the divisor.
-
-  if (lbConst != 0 || ubConst < 1)
-    return false;
-
-  int64_t divisor = ubConst + 1;
-
-  // Now check for: id_r =  id_n - divisor * id_q. As an example, we
-  // are looking r = d - 4q, i.e., either r - d + 4q = 0 or -r + d - 4q = 0.
-  unsigned seenQuotient = 0, seenDividend = 0;
-  int quotientPos = -1, dividendPos = -1;
-  for (unsigned r = 0, e = cst.getNumEqualities(); r < e; r++) {
-    // id_n should have coeff 1 or -1.
-    if (std::abs(cst.atEq(r, pos)) != 1)
-      continue;
-    // constant term should be 0.
-    if (cst.atEq(r, cst.getNumCols() - 1) != 0)
-      continue;
-    unsigned c, f;
-    int quotientSign = 1, dividendSign = 1;
-    for (c = 0, f = cst.getNumDimAndSymbolIds(); c < f; c++) {
-      if (c == pos)
-        continue;
-      // The coefficient of the quotient should be +/-divisor.
-      // TODO(bondhugula): could be extended to detect an affine function for
-      // the quotient (i.e., the coeff could be a non-zero multiple of divisor).
-      int64_t v = cst.atEq(r, c) * cst.atEq(r, pos);
-      if (v == divisor || v == -divisor) {
-        seenQuotient++;
-        quotientPos = c;
-        quotientSign = v > 0 ? 1 : -1;
-      }
-      // The coefficient of the dividend should be +/-1.
-      // TODO(bondhugula): could be extended to detect an affine function of
-      // the other identifiers as the dividend.
-      else if (v == -1 || v == 1) {
-        seenDividend++;
-        dividendPos = c;
-        dividendSign = v < 0 ? 1 : -1;
-      } else if (cst.atEq(r, c) != 0) {
-        // Cannot be inferred as a mod since the constraint has a coefficient
-        // for an identifier that's neither a unit nor the divisor (see TODOs
-        // above).
-        break;
-      }
-    }
-    if (c < f)
-      // Cannot be inferred as a mod since the constraint has a coefficient for
-      // an identifier that's neither a unit nor the divisor (see TODOs above).
-      continue;
-
-    // We are looking for exactly one identifier as the dividend.
-    if (seenDividend == 1 && seenQuotient >= 1) {
-      if (!(*memo)[dividendPos])
-        return false;
-      // Successfully detected a mod.
-      (*memo)[pos] = (*memo)[dividendPos] % divisor * dividendSign;
-      auto ub = cst.getConstantUpperBound(dividendPos);
-      if (ub.hasValue() && ub.getValue() < divisor)
-        // The mod can be optimized away.
-        (*memo)[pos] = (*memo)[dividendPos] * dividendSign;
-      else
-        (*memo)[pos] = (*memo)[dividendPos] % divisor * dividendSign;
-
-      if (seenQuotient == 1 && !(*memo)[quotientPos])
-        // Successfully detected a floordiv as well.
-        (*memo)[quotientPos] =
-            (*memo)[dividendPos].floorDiv(divisor) * quotientSign;
-      return true;
-    }
-  }
-  return false;
-}
-
-// Gather lower and upper bounds for the pos^th identifier.
-static void getLowerAndUpperBoundIndices(const FlatAffineConstraints &cst,
-                                         unsigned pos,
-                                         SmallVectorImpl<unsigned> *lbIndices,
-                                         SmallVectorImpl<unsigned> *ubIndices) {
-  assert(pos < cst.getNumIds() && "invalid position");
-
-  // Gather all lower bounds and upper bounds of the variable. Since the
-  // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower
-  // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1.
-  for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
-    if (cst.atIneq(r, pos) >= 1) {
-      // Lower bound.
-      lbIndices->push_back(r);
-    } else if (cst.atIneq(r, pos) <= -1) {
-      // Upper bound.
-      ubIndices->push_back(r);
-    }
-  }
-}
-
-// Check if the pos^th identifier can be expressed as a floordiv of an affine
-// function of other identifiers (where the divisor is a positive constant).
-// For eg: 4q <= i + j <= 4q + 3   <=>   q = (i + j) floordiv 4.
-bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos,
-                      SmallVectorImpl<AffineExpr> *memo, MLIRContext *context) {
-  assert(pos < cst.getNumIds() && "invalid position");
-
-  SmallVector<unsigned, 4> lbIndices, ubIndices;
-  getLowerAndUpperBoundIndices(cst, pos, &lbIndices, &ubIndices);
-
-  // Check if any lower bound, upper bound pair is of the form:
-  // divisor * id >=  expr - (divisor - 1)    <-- Lower bound for 'id'
-  // divisor * id <=  expr                    <-- Upper bound for 'id'
-  // Then, 'id' is equivalent to 'expr floordiv divisor'.  (where divisor > 1).
-  //
-  // For example, if -32*k + 16*i + j >= 0
-  //                  32*k - 16*i - j + 31 >= 0   <=>
-  //             k = ( 16*i + j ) floordiv 32
-  unsigned seenDividends = 0;
-  for (auto ubPos : ubIndices) {
-    for (auto lbPos : lbIndices) {
-      // Check if lower bound's constant term is 'divisor - 1'. The 'divisor'
-      // here is cst.atIneq(lbPos, pos) and we already know that it's positive
-      // (since cst.Ineq(lbPos, ...) is a lower bound expression for 'pos'.
-      if (cst.atIneq(lbPos, cst.getNumCols() - 1) != cst.atIneq(lbPos, pos) - 1)
-        continue;
-      // Check if upper bound's constant term is 0.
-      if (cst.atIneq(ubPos, cst.getNumCols() - 1) != 0)
-        continue;
-      // For the remaining part, check if the lower bound expr's coeff's are
-      // negations of corresponding upper bound ones'.
-      unsigned c, f;
-      for (c = 0, f = cst.getNumCols() - 1; c < f; c++) {
-        if (cst.atIneq(lbPos, c) != -cst.atIneq(ubPos, c))
-          break;
-        if (c != pos && cst.atIneq(lbPos, c) != 0)
-          seenDividends++;
-      }
-      // Lb coeff's aren't negative of ub coeff's (for the non constant term
-      // part).
-      if (c < f)
-        continue;
-      if (seenDividends >= 1) {
-        // The divisor is the constant term of the lower bound expression.
-        // We already know that cst.atIneq(lbPos, pos) > 0.
-        int64_t divisor = cst.atIneq(lbPos, pos);
-        // Construct the dividend expression.
-        auto dividendExpr = getAffineConstantExpr(0, context);
-        unsigned c, f;
-        for (c = 0, f = cst.getNumCols() - 1; c < f; c++) {
-          if (c == pos)
-            continue;
-          int64_t ubVal = cst.atIneq(ubPos, c);
-          if (ubVal == 0)
-            continue;
-          if (!(*memo)[c])
-            break;
-          dividendExpr = dividendExpr + ubVal * (*memo)[c];
-        }
-        // Expression can't be constructed as it depends on a yet unknown
-        // identifier.
-        // TODO(mlir-team): Visit/compute the identifiers in an order so that
-        // this doesn't happen. More complex but much more efficient.
-        if (c < f)
-          continue;
-        // Successfully detected the floordiv.
-        (*memo)[pos] = dividendExpr.floorDiv(divisor);
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-// Fills an inequality row with the value 'val'.
-static inline void fillInequality(FlatAffineConstraints *cst, unsigned r,
-                                  int64_t val) {
-  for (unsigned c = 0, f = cst->getNumCols(); c < f; c++) {
-    cst->atIneq(r, c) = val;
-  }
-}
-
-// Negates an inequality.
-static inline void negateInequality(FlatAffineConstraints *cst, unsigned r) {
-  for (unsigned c = 0, f = cst->getNumCols(); c < f; c++) {
-    cst->atIneq(r, c) = -cst->atIneq(r, c);
-  }
-}
-
-// A more complex check to eliminate redundant inequalities. Uses FourierMotzkin
-// to check if a constraint is redundant.
-void FlatAffineConstraints::removeRedundantInequalities() {
-  SmallVector<bool, 32> redun(getNumInequalities(), false);
-  // To check if an inequality is redundant, we replace the inequality by its
-  // complement (for eg., i - 1 >= 0 by i <= 0), and check if the resulting
-  // system is empty. If it is, the inequality is redundant.
-  FlatAffineConstraints tmpCst(*this);
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    // Change the inequality to its complement.
-    negateInequality(&tmpCst, r);
-    tmpCst.atIneq(r, tmpCst.getNumCols() - 1)--;
-    if (tmpCst.isEmpty()) {
-      redun[r] = true;
-      // Zero fill the redundant inequality.
-      fillInequality(this, r, /*val=*/0);
-      fillInequality(&tmpCst, r, /*val=*/0);
-    } else {
-      // Reverse the change (to avoid recreating tmpCst each time).
-      tmpCst.atIneq(r, tmpCst.getNumCols() - 1)++;
-      negateInequality(&tmpCst, r);
-    }
-  }
-
-  // Scan to get rid of all rows marked redundant, in-place.
-  auto copyRow = [&](unsigned src, unsigned dest) {
-    if (src == dest)
-      return;
-    for (unsigned c = 0, e = getNumCols(); c < e; c++) {
-      atIneq(dest, c) = atIneq(src, c);
-    }
-  };
-  unsigned pos = 0;
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    if (!redun[r])
-      copyRow(r, pos++);
-  }
-  inequalities.resize(numReservedCols * pos);
-}
-
-std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
-    unsigned pos, unsigned offset, unsigned num, unsigned symStartPos,
-    ArrayRef<AffineExpr> localExprs, MLIRContext *context) const {
-  assert(pos + offset < getNumDimIds() && "invalid dim start pos");
-  assert(symStartPos >= (pos + offset) && "invalid sym start pos");
-  assert(getNumLocalIds() == localExprs.size() &&
-         "incorrect local exprs count");
-
-  SmallVector<unsigned, 4> lbIndices, ubIndices;
-  getLowerAndUpperBoundIndices(*this, pos + offset, &lbIndices, &ubIndices);
-
-  /// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos).
-  auto addCoeffs = [&](ArrayRef<int64_t> a, SmallVectorImpl<int64_t> &b) {
-    b.clear();
-    for (unsigned i = 0, e = a.size(); i < e; ++i) {
-      if (i < offset || i >= offset + num)
-        b.push_back(a[i]);
-    }
-  };
-
-  SmallVector<int64_t, 8> lb, ub;
-  SmallVector<AffineExpr, 4> exprs;
-  unsigned dimCount = symStartPos - num;
-  unsigned symCount = getNumDimAndSymbolIds() - symStartPos;
-  exprs.reserve(lbIndices.size());
-  // Lower bound expressions.
-  for (auto idx : lbIndices) {
-    auto ineq = getInequality(idx);
-    // Extract the lower bound (in terms of other coeff's + const), i.e., if
-    // i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j
-    // - 1.
-    addCoeffs(ineq, lb);
-    std::transform(lb.begin(), lb.end(), lb.begin(), std::negate<int64_t>());
-    auto expr = mlir::toAffineExpr(lb, dimCount, symCount, localExprs, context);
-    exprs.push_back(expr);
-  }
-  auto lbMap =
-      exprs.empty() ? AffineMap() : AffineMap::get(dimCount, symCount, exprs);
-
-  exprs.clear();
-  exprs.reserve(ubIndices.size());
-  // Upper bound expressions.
-  for (auto idx : ubIndices) {
-    auto ineq = getInequality(idx);
-    // Extract the upper bound (in terms of other coeff's + const).
-    addCoeffs(ineq, ub);
-    auto expr = mlir::toAffineExpr(ub, dimCount, symCount, localExprs, context);
-    // Upper bound is exclusive.
-    exprs.push_back(expr + 1);
-  }
-  auto ubMap =
-      exprs.empty() ? AffineMap() : AffineMap::get(dimCount, symCount, exprs);
-
-  return {lbMap, ubMap};
-}
-
-/// Computes the lower and upper bounds of the first 'num' dimensional
-/// identifiers (starting at 'offset') as affine maps of the remaining
-/// identifiers (dimensional and symbolic identifiers). Local identifiers are
-/// themselves explicitly computed as affine functions of other identifiers in
-/// this process if needed.
-void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num,
-                                           MLIRContext *context,
-                                           SmallVectorImpl<AffineMap> *lbMaps,
-                                           SmallVectorImpl<AffineMap> *ubMaps) {
-  assert(num < getNumDimIds() && "invalid range");
-
-  // Basic simplification.
-  normalizeConstraintsByGCD();
-
-  LLVM_DEBUG(llvm::dbgs() << "getSliceBounds for first " << num
-                          << " identifiers\n");
-  LLVM_DEBUG(dump());
-
-  // Record computed/detected identifiers.
-  SmallVector<AffineExpr, 8> memo(getNumIds());
-  // Initialize dimensional and symbolic identifiers.
-  for (unsigned i = 0, e = getNumDimIds(); i < e; i++) {
-    if (i < offset)
-      memo[i] = getAffineDimExpr(i, context);
-    else if (i >= offset + num)
-      memo[i] = getAffineDimExpr(i - num, context);
-  }
-  for (unsigned i = getNumDimIds(), e = getNumDimAndSymbolIds(); i < e; i++)
-    memo[i] = getAffineSymbolExpr(i - getNumDimIds(), context);
-
-  bool changed;
-  do {
-    changed = false;
-    // Identify yet unknown identifiers as constants or mod's / floordiv's of
-    // other identifiers if possible.
-    for (unsigned pos = 0; pos < getNumIds(); pos++) {
-      if (memo[pos])
-        continue;
-
-      auto lbConst = getConstantLowerBound(pos);
-      auto ubConst = getConstantUpperBound(pos);
-      if (lbConst.hasValue() && ubConst.hasValue()) {
-        // Detect equality to a constant.
-        if (lbConst.getValue() == ubConst.getValue()) {
-          memo[pos] = getAffineConstantExpr(lbConst.getValue(), context);
-          changed = true;
-          continue;
-        }
-
-        // Detect an identifier as modulo of another identifier w.r.t a
-        // constant.
-        if (detectAsMod(*this, pos, lbConst.getValue(), ubConst.getValue(),
-                        &memo)) {
-          changed = true;
-          continue;
-        }
-      }
-
-      // Detect an identifier as floordiv of another identifier w.r.t a
-      // constant.
-      if (detectAsFloorDiv(*this, pos, &memo, context)) {
-        changed = true;
-        continue;
-      }
-
-      // Detect an identifier as an expression of other identifiers.
-      unsigned idx;
-      if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) {
-        continue;
-      }
-
-      // Build AffineExpr solving for identifier 'pos' in terms of all others.
-      auto expr = getAffineConstantExpr(0, context);
-      unsigned j, e;
-      for (j = 0, e = getNumIds(); j < e; ++j) {
-        if (j == pos)
-          continue;
-        int64_t c = atEq(idx, j);
-        if (c == 0)
-          continue;
-        // If any of the involved IDs hasn't been found yet, we can't proceed.
-        if (!memo[j])
-          break;
-        expr = expr + memo[j] * c;
-      }
-      if (j < e)
-        // Can't construct expression as it depends on a yet uncomputed
-        // identifier.
-        continue;
-
-      // Add constant term to AffineExpr.
-      expr = expr + atEq(idx, getNumIds());
-      int64_t vPos = atEq(idx, pos);
-      assert(vPos != 0 && "expected non-zero here");
-      if (vPos > 0)
-        expr = (-expr).floorDiv(vPos);
-      else
-        // vPos < 0.
-        expr = expr.floorDiv(-vPos);
-      // Successfully constructed expression.
-      memo[pos] = expr;
-      changed = true;
-    }
-    // This loop is guaranteed to reach a fixed point - since once an
-    // identifier's explicit form is computed (in memo[pos]), it's not updated
-    // again.
-  } while (changed);
-
-  // Set the lower and upper bound maps for all the identifiers that were
-  // computed as affine expressions of the rest as the "detected expr" and
-  // "detected expr + 1" respectively; set the undetected ones to null.
-  Optional<FlatAffineConstraints> tmpClone;
-  for (unsigned pos = 0; pos < num; pos++) {
-    unsigned numMapDims = getNumDimIds() - num;
-    unsigned numMapSymbols = getNumSymbolIds();
-    AffineExpr expr = memo[pos + offset];
-    if (expr)
-      expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols);
-
-    AffineMap &lbMap = (*lbMaps)[pos];
-    AffineMap &ubMap = (*ubMaps)[pos];
-
-    if (expr) {
-      lbMap = AffineMap::get(numMapDims, numMapSymbols, expr);
-      ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + 1);
-    } else {
-      // TODO(bondhugula): Whenever there are local identifiers in the
-      // dependence constraints, we'll conservatively over-approximate, since we
-      // don't always explicitly compute them above (in the while loop).
-      if (getNumLocalIds() == 0) {
-        // Work on a copy so that we don't update this constraint system.
-        if (!tmpClone) {
-          tmpClone.emplace(FlatAffineConstraints(*this));
-          // Removing redundant inequalities is necessary so that we don't get
-          // redundant loop bounds.
-          tmpClone->removeRedundantInequalities();
-        }
-        std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound(
-            pos, offset, num, getNumDimIds(), {}, context);
-      }
-
-      // If the above fails, we'll just use the constant lower bound and the
-      // constant upper bound (if they exist) as the slice bounds.
-      // TODO(b/126426796): being conservative for the moment in cases that
-      // lead to multiple bounds - until getConstDifference in LoopFusion.cpp is
-      // fixed (b/126426796).
-      if (!lbMap || lbMap.getNumResults() > 1) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << "WARNING: Potentially over-approximating slice lb\n");
-        auto lbConst = getConstantLowerBound(pos + offset);
-        if (lbConst.hasValue()) {
-          lbMap = AffineMap::get(
-              numMapDims, numMapSymbols,
-              getAffineConstantExpr(lbConst.getValue(), context));
-        }
-      }
-      if (!ubMap || ubMap.getNumResults() > 1) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << "WARNING: Potentially over-approximating slice ub\n");
-        auto ubConst = getConstantUpperBound(pos + offset);
-        if (ubConst.hasValue()) {
-          (ubMap) = AffineMap::get(
-              numMapDims, numMapSymbols,
-              getAffineConstantExpr(ubConst.getValue() + 1, context));
-        }
-      }
-    }
-    LLVM_DEBUG(llvm::dbgs()
-               << "lb map for pos = " << Twine(pos + offset) << ", expr: ");
-    LLVM_DEBUG(lbMap.dump(););
-    LLVM_DEBUG(llvm::dbgs()
-               << "ub map for pos = " << Twine(pos + offset) << ", expr: ");
-    LLVM_DEBUG(ubMap.dump(););
-  }
-}
-
-LogicalResult
-FlatAffineConstraints::addLowerOrUpperBound(unsigned pos, AffineMap boundMap,
-                                            ArrayRef<Value *> boundOperands,
-                                            bool eq, bool lower) {
-  assert(pos < getNumDimAndSymbolIds() && "invalid position");
-  // Equality follows the logic of lower bound except that we add an equality
-  // instead of an inequality.
-  assert((!eq || boundMap.getNumResults() == 1) && "single result expected");
-  if (eq)
-    lower = true;
-
-  // Fully compose map and operands; canonicalize and simplify so that we
-  // transitively get to terminal symbols or loop IVs.
-  auto map = boundMap;
-  SmallVector<Value *, 4> operands(boundOperands.begin(), boundOperands.end());
-  fullyComposeAffineMapAndOperands(&map, &operands);
-  map = simplifyAffineMap(map);
-  canonicalizeMapAndOperands(&map, &operands);
-  for (auto *operand : operands)
-    addInductionVarOrTerminalSymbol(operand);
-
-  FlatAffineConstraints localVarCst;
-  std::vector<SmallVector<int64_t, 8>> flatExprs;
-  if (failed(getFlattenedAffineExprs(map, &flatExprs, &localVarCst))) {
-    LLVM_DEBUG(llvm::dbgs() << "semi-affine expressions not yet supported\n");
-    return failure();
-  }
-
-  // Merge and align with localVarCst.
-  if (localVarCst.getNumLocalIds() > 0) {
-    // Set values for localVarCst.
-    localVarCst.setIdValues(0, localVarCst.getNumDimAndSymbolIds(), operands);
-    for (auto *operand : operands) {
-      unsigned pos;
-      if (findId(*operand, &pos)) {
-        if (pos >= getNumDimIds() && pos < getNumDimAndSymbolIds()) {
-          // If the local var cst has this as a dim, turn it into its symbol.
-          turnDimIntoSymbol(&localVarCst, *operand);
-        } else if (pos < getNumDimIds()) {
-          // Or vice versa.
-          turnSymbolIntoDim(&localVarCst, *operand);
-        }
-      }
-    }
-    mergeAndAlignIds(/*offset=*/0, this, &localVarCst);
-    append(localVarCst);
-  }
-
-  // Record positions of the operands in the constraint system. Need to do
-  // this here since the constraint system changes after a bound is added.
-  SmallVector<unsigned, 8> positions;
-  unsigned numOperands = operands.size();
-  for (auto *operand : operands) {
-    unsigned pos;
-    if (!findId(*operand, &pos))
-      assert(0 && "expected to be found");
-    positions.push_back(pos);
-  }
-
-  for (const auto &flatExpr : flatExprs) {
-    SmallVector<int64_t, 4> ineq(getNumCols(), 0);
-    ineq[pos] = lower ? 1 : -1;
-    // Dims and symbols.
-    for (unsigned j = 0, e = map.getNumInputs(); j < e; j++) {
-      ineq[positions[j]] = lower ? -flatExpr[j] : flatExpr[j];
-    }
-    // Copy over the local id coefficients.
-    unsigned numLocalIds = flatExpr.size() - 1 - numOperands;
-    for (unsigned jj = 0, j = getNumIds() - numLocalIds; jj < numLocalIds;
-         jj++, j++) {
-      ineq[j] =
-          lower ? -flatExpr[numOperands + jj] : flatExpr[numOperands + jj];
-    }
-    // Constant term.
-    ineq[getNumCols() - 1] =
-        lower ? -flatExpr[flatExpr.size() - 1]
-              // Upper bound in flattenedExpr is an exclusive one.
-              : flatExpr[flatExpr.size() - 1] - 1;
-    eq ? addEquality(ineq) : addInequality(ineq);
-  }
-  return success();
-}
-
-// Adds slice lower bounds represented by lower bounds in 'lbMaps' and upper
-// bounds in 'ubMaps' to each value in `values' that appears in the constraint
-// system. Note that both lower/upper bounds share the same operand list
-// 'operands'.
-// This function assumes 'values.size' == 'lbMaps.size' == 'ubMaps.size', and
-// skips any null AffineMaps in 'lbMaps' or 'ubMaps'.
-// Note that both lower/upper bounds use operands from 'operands'.
-// Returns failure for unimplemented cases such as semi-affine expressions or
-// expressions with mod/floordiv.
-LogicalResult FlatAffineConstraints::addSliceBounds(
-    ArrayRef<Value *> values, ArrayRef<AffineMap> lbMaps,
-    ArrayRef<AffineMap> ubMaps, ArrayRef<Value *> operands) {
-  assert(values.size() == lbMaps.size());
-  assert(lbMaps.size() == ubMaps.size());
-
-  for (unsigned i = 0, e = lbMaps.size(); i < e; ++i) {
-    unsigned pos;
-    if (!findId(*values[i], &pos))
-      continue;
-
-    AffineMap lbMap = lbMaps[i];
-    AffineMap ubMap = ubMaps[i];
-    assert(!lbMap || lbMap.getNumInputs() == operands.size());
-    assert(!ubMap || ubMap.getNumInputs() == operands.size());
-
-    // Check if this slice is just an equality along this dimension.
-    if (lbMap && ubMap && lbMap.getNumResults() == 1 &&
-        ubMap.getNumResults() == 1 &&
-        lbMap.getResult(0) + 1 == ubMap.getResult(0)) {
-      if (failed(addLowerOrUpperBound(pos, lbMap, operands, /*eq=*/true,
-                                      /*lower=*/true)))
-        return failure();
-      continue;
-    }
-
-    if (lbMap && failed(addLowerOrUpperBound(pos, lbMap, operands, /*eq=*/false,
-                                             /*lower=*/true)))
-      return failure();
-
-    if (ubMap && failed(addLowerOrUpperBound(pos, ubMap, operands, /*eq=*/false,
-                                             /*lower=*/false)))
-      return failure();
-  }
-  return success();
-}
-
-void FlatAffineConstraints::addEquality(ArrayRef<int64_t> eq) {
-  assert(eq.size() == getNumCols());
-  unsigned offset = equalities.size();
-  equalities.resize(equalities.size() + numReservedCols);
-  std::copy(eq.begin(), eq.end(), equalities.begin() + offset);
-}
-
-void FlatAffineConstraints::addInequality(ArrayRef<int64_t> inEq) {
-  assert(inEq.size() == getNumCols());
-  unsigned offset = inequalities.size();
-  inequalities.resize(inequalities.size() + numReservedCols);
-  std::copy(inEq.begin(), inEq.end(), inequalities.begin() + offset);
-}
-
-void FlatAffineConstraints::addConstantLowerBound(unsigned pos, int64_t lb) {
-  assert(pos < getNumCols());
-  unsigned offset = inequalities.size();
-  inequalities.resize(inequalities.size() + numReservedCols);
-  std::fill(inequalities.begin() + offset,
-            inequalities.begin() + offset + getNumCols(), 0);
-  inequalities[offset + pos] = 1;
-  inequalities[offset + getNumCols() - 1] = -lb;
-}
-
-void FlatAffineConstraints::addConstantUpperBound(unsigned pos, int64_t ub) {
-  assert(pos < getNumCols());
-  unsigned offset = inequalities.size();
-  inequalities.resize(inequalities.size() + numReservedCols);
-  std::fill(inequalities.begin() + offset,
-            inequalities.begin() + offset + getNumCols(), 0);
-  inequalities[offset + pos] = -1;
-  inequalities[offset + getNumCols() - 1] = ub;
-}
-
-void FlatAffineConstraints::addConstantLowerBound(ArrayRef<int64_t> expr,
-                                                  int64_t lb) {
-  assert(expr.size() == getNumCols());
-  unsigned offset = inequalities.size();
-  inequalities.resize(inequalities.size() + numReservedCols);
-  std::fill(inequalities.begin() + offset,
-            inequalities.begin() + offset + getNumCols(), 0);
-  std::copy(expr.begin(), expr.end(), inequalities.begin() + offset);
-  inequalities[offset + getNumCols() - 1] += -lb;
-}
-
-void FlatAffineConstraints::addConstantUpperBound(ArrayRef<int64_t> expr,
-                                                  int64_t ub) {
-  assert(expr.size() == getNumCols());
-  unsigned offset = inequalities.size();
-  inequalities.resize(inequalities.size() + numReservedCols);
-  std::fill(inequalities.begin() + offset,
-            inequalities.begin() + offset + getNumCols(), 0);
-  for (unsigned i = 0, e = getNumCols(); i < e; i++) {
-    inequalities[offset + i] = -expr[i];
-  }
-  inequalities[offset + getNumCols() - 1] += ub;
-}
-
-/// Adds a new local identifier as the floordiv of an affine function of other
-/// identifiers, the coefficients of which are provided in 'dividend' and with
-/// respect to a positive constant 'divisor'. Two constraints are added to the
-/// system to capture equivalence with the floordiv.
-///      q = expr floordiv c    <=>   c*q <= expr <= c*q + c - 1.
-void FlatAffineConstraints::addLocalFloorDiv(ArrayRef<int64_t> dividend,
-                                             int64_t divisor) {
-  assert(dividend.size() == getNumCols() && "incorrect dividend size");
-  assert(divisor > 0 && "positive divisor expected");
-
-  addLocalId(getNumLocalIds());
-
-  // Add two constraints for this new identifier 'q'.
-  SmallVector<int64_t, 8> bound(dividend.size() + 1);
-
-  // dividend - q * divisor >= 0
-  std::copy(dividend.begin(), dividend.begin() + dividend.size() - 1,
-            bound.begin());
-  bound.back() = dividend.back();
-  bound[getNumIds() - 1] = -divisor;
-  addInequality(bound);
-
-  // -dividend +qdivisor * q + divisor - 1 >= 0
-  std::transform(bound.begin(), bound.end(), bound.begin(),
-                 std::negate<int64_t>());
-  bound[bound.size() - 1] += divisor - 1;
-  addInequality(bound);
-}
-
-bool FlatAffineConstraints::findId(Value &id, unsigned *pos) const {
-  unsigned i = 0;
-  for (const auto &mayBeId : ids) {
-    if (mayBeId.hasValue() && mayBeId.getValue() == &id) {
-      *pos = i;
-      return true;
-    }
-    i++;
-  }
-  return false;
-}
-
-bool FlatAffineConstraints::containsId(Value &id) const {
-  return llvm::any_of(ids, [&](const Optional<Value *> &mayBeId) {
-    return mayBeId.hasValue() && mayBeId.getValue() == &id;
-  });
-}
-
-void FlatAffineConstraints::setDimSymbolSeparation(unsigned newSymbolCount) {
-  assert(newSymbolCount <= numDims + numSymbols &&
-         "invalid separation position");
-  numDims = numDims + numSymbols - newSymbolCount;
-  numSymbols = newSymbolCount;
-}
-
-/// Sets the specified identifier to a constant value.
-void FlatAffineConstraints::setIdToConstant(unsigned pos, int64_t val) {
-  unsigned offset = equalities.size();
-  equalities.resize(equalities.size() + numReservedCols);
-  std::fill(equalities.begin() + offset,
-            equalities.begin() + offset + getNumCols(), 0);
-  equalities[offset + pos] = 1;
-  equalities[offset + getNumCols() - 1] = -val;
-}
-
-/// Sets the specified identifier to a constant value; asserts if the id is not
-/// found.
-void FlatAffineConstraints::setIdToConstant(Value &id, int64_t val) {
-  unsigned pos;
-  if (!findId(id, &pos))
-    // This is a pre-condition for this method.
-    assert(0 && "id not found");
-  setIdToConstant(pos, val);
-}
-
-void FlatAffineConstraints::removeEquality(unsigned pos) {
-  unsigned numEqualities = getNumEqualities();
-  assert(pos < numEqualities);
-  unsigned outputIndex = pos * numReservedCols;
-  unsigned inputIndex = (pos + 1) * numReservedCols;
-  unsigned numElemsToCopy = (numEqualities - pos - 1) * numReservedCols;
-  std::copy(equalities.begin() + inputIndex,
-            equalities.begin() + inputIndex + numElemsToCopy,
-            equalities.begin() + outputIndex);
-  equalities.resize(equalities.size() - numReservedCols);
-}
-
-/// Finds an equality that equates the specified identifier to a constant.
-/// Returns the position of the equality row. If 'symbolic' is set to true,
-/// symbols are also treated like a constant, i.e., an affine function of the
-/// symbols is also treated like a constant.
-static int findEqualityToConstant(const FlatAffineConstraints &cst,
-                                  unsigned pos, bool symbolic = false) {
-  assert(pos < cst.getNumIds() && "invalid position");
-  for (unsigned r = 0, e = cst.getNumEqualities(); r < e; r++) {
-    int64_t v = cst.atEq(r, pos);
-    if (v * v != 1)
-      continue;
-    unsigned c;
-    unsigned f = symbolic ? cst.getNumDimIds() : cst.getNumIds();
-    // This checks for zeros in all positions other than 'pos' in [0, f)
-    for (c = 0; c < f; c++) {
-      if (c == pos)
-        continue;
-      if (cst.atEq(r, c) != 0) {
-        // Dependent on another identifier.
-        break;
-      }
-    }
-    if (c == f)
-      // Equality is free of other identifiers.
-      return r;
-  }
-  return -1;
-}
-
-void FlatAffineConstraints::setAndEliminate(unsigned pos, int64_t constVal) {
-  assert(pos < getNumIds() && "invalid position");
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    atIneq(r, getNumCols() - 1) += atIneq(r, pos) * constVal;
-  }
-  for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
-    atEq(r, getNumCols() - 1) += atEq(r, pos) * constVal;
-  }
-  removeId(pos);
-}
-
-LogicalResult FlatAffineConstraints::constantFoldId(unsigned pos) {
-  assert(pos < getNumIds() && "invalid position");
-  int rowIdx;
-  if ((rowIdx = findEqualityToConstant(*this, pos)) == -1)
-    return failure();
-
-  // atEq(rowIdx, pos) is either -1 or 1.
-  assert(atEq(rowIdx, pos) * atEq(rowIdx, pos) == 1);
-  int64_t constVal = -atEq(rowIdx, getNumCols() - 1) / atEq(rowIdx, pos);
-  setAndEliminate(pos, constVal);
-  return success();
-}
-
-void FlatAffineConstraints::constantFoldIdRange(unsigned pos, unsigned num) {
-  for (unsigned s = pos, t = pos, e = pos + num; s < e; s++) {
-    if (failed(constantFoldId(t)))
-      t++;
-  }
-}
-
-/// Returns the extent (upper bound - lower bound) of the specified
-/// identifier if it is found to be a constant; returns None if it's not a
-/// constant. This methods treats symbolic identifiers specially, i.e.,
-/// it looks for constant differences between affine expressions involving
-/// only the symbolic identifiers. See comments at function definition for
-/// example. 'lb', if provided, is set to the lower bound associated with the
-/// constant difference. Note that 'lb' is purely symbolic and thus will contain
-/// the coefficients of the symbolic identifiers and the constant coefficient.
-//  Egs: 0 <= i <= 15, return 16.
-//       s0 + 2 <= i <= s0 + 17, returns 16. (s0 has to be a symbol)
-//       s0 + s1 + 16 <= d0 <= s0 + s1 + 31, returns 16.
-//       s0 - 7 <= 8*j <= s0 returns 1 with lb = s0, lbDivisor = 8 (since lb =
-//       ceil(s0 - 7 / 8) = floor(s0 / 8)).
-Optional<int64_t> FlatAffineConstraints::getConstantBoundOnDimSize(
-    unsigned pos, SmallVectorImpl<int64_t> *lb, int64_t *lbFloorDivisor,
-    SmallVectorImpl<int64_t> *ub) const {
-  assert(pos < getNumDimIds() && "Invalid identifier position");
-  assert(getNumLocalIds() == 0);
-
-  // TODO(bondhugula): eliminate all remaining dimensional identifiers (other
-  // than the one at 'pos' to make this more powerful. Not needed for
-  // hyper-rectangular spaces.
-
-  // Find an equality for 'pos'^th identifier that equates it to some function
-  // of the symbolic identifiers (+ constant).
-  int eqRow = findEqualityToConstant(*this, pos, /*symbolic=*/true);
-  if (eqRow != -1) {
-    // This identifier can only take a single value.
-    if (lb) {
-      // Set lb to the symbolic value.
-      lb->resize(getNumSymbolIds() + 1);
-      if (ub)
-        ub->resize(getNumSymbolIds() + 1);
-      for (unsigned c = 0, f = getNumSymbolIds() + 1; c < f; c++) {
-        int64_t v = atEq(eqRow, pos);
-        // atEq(eqRow, pos) is either -1 or 1.
-        assert(v * v == 1);
-        (*lb)[c] = v < 0 ? atEq(eqRow, getNumDimIds() + c) / -v
-                         : -atEq(eqRow, getNumDimIds() + c) / v;
-        // Since this is an equality, ub = lb.
-        if (ub)
-          (*ub)[c] = (*lb)[c];
-      }
-      assert(lbFloorDivisor &&
-             "both lb and divisor or none should be provided");
-      *lbFloorDivisor = 1;
-    }
-    return 1;
-  }
-
-  // Check if the identifier appears at all in any of the inequalities.
-  unsigned r, e;
-  for (r = 0, e = getNumInequalities(); r < e; r++) {
-    if (atIneq(r, pos) != 0)
-      break;
-  }
-  if (r == e)
-    // If it doesn't, there isn't a bound on it.
-    return None;
-
-  // Positions of constraints that are lower/upper bounds on the variable.
-  SmallVector<unsigned, 4> lbIndices, ubIndices;
-
-  // Gather all symbolic lower bounds and upper bounds of the variable. Since
-  // the canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a
-  // lower bound for x_i if c_i >= 1, and an upper bound if c_i <= -1.
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    unsigned c, f;
-    for (c = 0, f = getNumDimIds(); c < f; c++) {
-      if (c != pos && atIneq(r, c) != 0)
-        break;
-    }
-    if (c < getNumDimIds())
-      // Not a pure symbolic bound.
-      continue;
-    if (atIneq(r, pos) >= 1)
-      // Lower bound.
-      lbIndices.push_back(r);
-    else if (atIneq(r, pos) <= -1)
-      // Upper bound.
-      ubIndices.push_back(r);
-  }
-
-  // TODO(bondhugula): eliminate other dimensional identifiers to make this more
-  // powerful. Not needed for hyper-rectangular iteration spaces.
-
-  Optional<int64_t> minDiff = None;
-  unsigned minLbPosition, minUbPosition;
-  for (auto ubPos : ubIndices) {
-    for (auto lbPos : lbIndices) {
-      // Look for a lower bound and an upper bound that only differ by a
-      // constant, i.e., pairs of the form  0 <= c_pos - f(c_i's) <= diffConst.
-      // For example, if ii is the pos^th variable, we are looking for
-      // constraints like ii >= i, ii <= ii + 50, 50 being the difference. The
-      // minimum among all such constant differences is kept since that's the
-      // constant bounding the extent of the pos^th variable.
-      unsigned j, e;
-      for (j = 0, e = getNumCols() - 1; j < e; j++)
-        if (atIneq(ubPos, j) != -atIneq(lbPos, j)) {
-          break;
-        }
-      if (j < getNumCols() - 1)
-        continue;
-      int64_t diff = ceilDiv(atIneq(ubPos, getNumCols() - 1) +
-                                 atIneq(lbPos, getNumCols() - 1) + 1,
-                             atIneq(lbPos, pos));
-      if (minDiff == None || diff < minDiff) {
-        minDiff = diff;
-        minLbPosition = lbPos;
-        minUbPosition = ubPos;
-      }
-    }
-  }
-  if (lb && minDiff.hasValue()) {
-    // Set lb to the symbolic lower bound.
-    lb->resize(getNumSymbolIds() + 1);
-    if (ub)
-      ub->resize(getNumSymbolIds() + 1);
-    // The lower bound is the ceildiv of the lb constraint over the coefficient
-    // of the variable at 'pos'. We express the ceildiv equivalently as a floor
-    // for uniformity. For eg., if the lower bound constraint was: 32*d0 - N +
-    // 31 >= 0, the lower bound for d0 is ceil(N - 31, 32), i.e., floor(N, 32).
-    *lbFloorDivisor = atIneq(minLbPosition, pos);
-    assert(*lbFloorDivisor == -atIneq(minUbPosition, pos));
-    for (unsigned c = 0, e = getNumSymbolIds() + 1; c < e; c++) {
-      (*lb)[c] = -atIneq(minLbPosition, getNumDimIds() + c);
-    }
-    if (ub) {
-      for (unsigned c = 0, e = getNumSymbolIds() + 1; c < e; c++)
-        (*ub)[c] = atIneq(minUbPosition, getNumDimIds() + c);
-    }
-    // The lower bound leads to a ceildiv while the upper bound is a floordiv
-    // whenever the coefficient at pos != 1. ceildiv (val / d) = floordiv (val +
-    // d - 1 / d); hence, the addition of 'atIneq(minLbPosition, pos) - 1' to
-    // the constant term for the lower bound.
-    (*lb)[getNumSymbolIds()] += atIneq(minLbPosition, pos) - 1;
-  }
-  return minDiff;
-}
-
-template <bool isLower>
-Optional<int64_t>
-FlatAffineConstraints::computeConstantLowerOrUpperBound(unsigned pos) {
-  assert(pos < getNumIds() && "invalid position");
-  // Project to 'pos'.
-  projectOut(0, pos);
-  projectOut(1, getNumIds() - 1);
-  // Check if there's an equality equating the '0'^th identifier to a constant.
-  int eqRowIdx = findEqualityToConstant(*this, 0, /*symbolic=*/false);
-  if (eqRowIdx != -1)
-    // atEq(rowIdx, 0) is either -1 or 1.
-    return -atEq(eqRowIdx, getNumCols() - 1) / atEq(eqRowIdx, 0);
-
-  // Check if the identifier appears at all in any of the inequalities.
-  unsigned r, e;
-  for (r = 0, e = getNumInequalities(); r < e; r++) {
-    if (atIneq(r, 0) != 0)
-      break;
-  }
-  if (r == e)
-    // If it doesn't, there isn't a bound on it.
-    return None;
-
-  Optional<int64_t> minOrMaxConst = None;
-
-  // Take the max across all const lower bounds (or min across all constant
-  // upper bounds).
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    if (isLower) {
-      if (atIneq(r, 0) <= 0)
-        // Not a lower bound.
-        continue;
-    } else if (atIneq(r, 0) >= 0) {
-      // Not an upper bound.
-      continue;
-    }
-    unsigned c, f;
-    for (c = 0, f = getNumCols() - 1; c < f; c++)
-      if (c != 0 && atIneq(r, c) != 0)
-        break;
-    if (c < getNumCols() - 1)
-      // Not a constant bound.
-      continue;
-
-    int64_t boundConst =
-        isLower ? mlir::ceilDiv(-atIneq(r, getNumCols() - 1), atIneq(r, 0))
-                : mlir::floorDiv(atIneq(r, getNumCols() - 1), -atIneq(r, 0));
-    if (isLower) {
-      if (minOrMaxConst == None || boundConst > minOrMaxConst)
-        minOrMaxConst = boundConst;
-    } else {
-      if (minOrMaxConst == None || boundConst < minOrMaxConst)
-        minOrMaxConst = boundConst;
-    }
-  }
-  return minOrMaxConst;
-}
-
-Optional<int64_t>
-FlatAffineConstraints::getConstantLowerBound(unsigned pos) const {
-  FlatAffineConstraints tmpCst(*this);
-  return tmpCst.computeConstantLowerOrUpperBound</*isLower=*/true>(pos);
-}
-
-Optional<int64_t>
-FlatAffineConstraints::getConstantUpperBound(unsigned pos) const {
-  FlatAffineConstraints tmpCst(*this);
-  return tmpCst.computeConstantLowerOrUpperBound</*isLower=*/false>(pos);
-}
-
-// A simple (naive and conservative) check for hyper-rectangularity.
-bool FlatAffineConstraints::isHyperRectangular(unsigned pos,
-                                               unsigned num) const {
-  assert(pos < getNumCols() - 1);
-  // Check for two non-zero coefficients in the range [pos, pos + sum).
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    unsigned sum = 0;
-    for (unsigned c = pos; c < pos + num; c++) {
-      if (atIneq(r, c) != 0)
-        sum++;
-    }
-    if (sum > 1)
-      return false;
-  }
-  for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
-    unsigned sum = 0;
-    for (unsigned c = pos; c < pos + num; c++) {
-      if (atEq(r, c) != 0)
-        sum++;
-    }
-    if (sum > 1)
-      return false;
-  }
-  return true;
-}
-
-void FlatAffineConstraints::print(raw_ostream &os) const {
-  assert(hasConsistentState());
-  os << "\nConstraints (" << getNumDimIds() << " dims, " << getNumSymbolIds()
-     << " symbols, " << getNumLocalIds() << " locals), (" << getNumConstraints()
-     << " constraints)\n";
-  os << "(";
-  for (unsigned i = 0, e = getNumIds(); i < e; i++) {
-    if (ids[i] == None)
-      os << "None ";
-    else
-      os << "Value ";
-  }
-  os << " const)\n";
-  for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
-    for (unsigned j = 0, f = getNumCols(); j < f; ++j) {
-      os << atEq(i, j) << " ";
-    }
-    os << "= 0\n";
-  }
-  for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
-    for (unsigned j = 0, f = getNumCols(); j < f; ++j) {
-      os << atIneq(i, j) << " ";
-    }
-    os << ">= 0\n";
-  }
-  os << '\n';
-}
-
-void FlatAffineConstraints::dump() const { print(llvm::errs()); }
-
-/// Removes duplicate constraints, trivially true constraints, and constraints
-/// that can be detected as redundant as a result of differing only in their
-/// constant term part. A constraint of the form <non-negative constant> >= 0 is
-/// considered trivially true.
-//  Uses a DenseSet to hash and detect duplicates followed by a linear scan to
-//  remove duplicates in place.
-void FlatAffineConstraints::removeTrivialRedundancy() {
-  SmallDenseSet<ArrayRef<int64_t>, 8> rowSet;
-
-  // A map used to detect redundancy stemming from constraints that only differ
-  // in their constant term. The value stored is <row position, const term>
-  // for a given row.
-  SmallDenseMap<ArrayRef<int64_t>, std::pair<unsigned, int64_t>>
-      rowsWithoutConstTerm;
-
-  // Check if constraint is of the form <non-negative-constant> >= 0.
-  auto isTriviallyValid = [&](unsigned r) -> bool {
-    for (unsigned c = 0, e = getNumCols() - 1; c < e; c++) {
-      if (atIneq(r, c) != 0)
-        return false;
-    }
-    return atIneq(r, getNumCols() - 1) >= 0;
-  };
-
-  // Detect and mark redundant constraints.
-  SmallVector<bool, 256> redunIneq(getNumInequalities(), false);
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    int64_t *rowStart = inequalities.data() + numReservedCols * r;
-    auto row = ArrayRef<int64_t>(rowStart, getNumCols());
-    if (isTriviallyValid(r) || !rowSet.insert(row).second) {
-      redunIneq[r] = true;
-      continue;
-    }
-
-    // Among constraints that only differ in the constant term part, mark
-    // everything other than the one with the smallest constant term redundant.
-    // (eg: among i - 16j - 5 >= 0, i - 16j - 1 >=0, i - 16j - 7 >= 0, the
-    // former two are redundant).
-    int64_t constTerm = atIneq(r, getNumCols() - 1);
-    auto rowWithoutConstTerm = ArrayRef<int64_t>(rowStart, getNumCols() - 1);
-    const auto &ret =
-        rowsWithoutConstTerm.insert({rowWithoutConstTerm, {r, constTerm}});
-    if (!ret.second) {
-      // Check if the other constraint has a higher constant term.
-      auto &val = ret.first->second;
-      if (val.second > constTerm) {
-        // The stored row is redundant. Mark it so, and update with this one.
-        redunIneq[val.first] = true;
-        val = {r, constTerm};
-      } else {
-        // The one stored makes this one redundant.
-        redunIneq[r] = true;
-      }
-    }
-  }
-
-  auto copyRow = [&](unsigned src, unsigned dest) {
-    if (src == dest)
-      return;
-    for (unsigned c = 0, e = getNumCols(); c < e; c++) {
-      atIneq(dest, c) = atIneq(src, c);
-    }
-  };
-
-  // Scan to get rid of all rows marked redundant, in-place.
-  unsigned pos = 0;
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    if (!redunIneq[r])
-      copyRow(r, pos++);
-  }
-  inequalities.resize(numReservedCols * pos);
-
-  // TODO(bondhugula): consider doing this for equalities as well, but probably
-  // not worth the savings.
-}
-
-void FlatAffineConstraints::clearAndCopyFrom(
-    const FlatAffineConstraints &other) {
-  FlatAffineConstraints copy(other);
-  std::swap(*this, copy);
-  assert(copy.getNumIds() == copy.getIds().size());
-}
-
-void FlatAffineConstraints::removeId(unsigned pos) {
-  removeIdRange(pos, pos + 1);
-}
-
-static std::pair<unsigned, unsigned>
-getNewNumDimsSymbols(unsigned pos, const FlatAffineConstraints &cst) {
-  unsigned numDims = cst.getNumDimIds();
-  unsigned numSymbols = cst.getNumSymbolIds();
-  unsigned newNumDims, newNumSymbols;
-  if (pos < numDims) {
-    newNumDims = numDims - 1;
-    newNumSymbols = numSymbols;
-  } else if (pos < numDims + numSymbols) {
-    assert(numSymbols >= 1);
-    newNumDims = numDims;
-    newNumSymbols = numSymbols - 1;
-  } else {
-    newNumDims = numDims;
-    newNumSymbols = numSymbols;
-  }
-  return {newNumDims, newNumSymbols};
-}
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "fm"
-
-/// Eliminates identifier at the specified position using Fourier-Motzkin
-/// variable elimination. This technique is exact for rational spaces but
-/// conservative (in "rare" cases) for integer spaces. The operation corresponds
-/// to a projection operation yielding the (convex) set of integer points
-/// contained in the rational shadow of the set. An emptiness test that relies
-/// on this method will guarantee emptiness, i.e., it disproves the existence of
-/// a solution if it says it's empty.
-/// If a non-null isResultIntegerExact is passed, it is set to true if the
-/// result is also integer exact. If it's set to false, the obtained solution
-/// *may* not be exact, i.e., it may contain integer points that do not have an
-/// integer pre-image in the original set.
-///
-/// Eg:
-/// j >= 0, j <= i + 1
-/// i >= 0, i <= N + 1
-/// Eliminating i yields,
-///   j >= 0, 0 <= N + 1, j - 1 <= N + 1
-///
-/// If darkShadow = true, this method computes the dark shadow on elimination;
-/// the dark shadow is a convex integer subset of the exact integer shadow. A
-/// non-empty dark shadow proves the existence of an integer solution. The
-/// elimination in such a case could however be an under-approximation, and thus
-/// should not be used for scanning sets or used by itself for dependence
-/// checking.
-///
-/// Eg: 2-d set, * represents grid points, 'o' represents a point in the set.
-///            ^
-///            |
-///            | * * * * o o
-///         i  | * * o o o o
-///            | o * * * * *
-///            --------------->
-///                 j ->
-///
-/// Eliminating i from this system (projecting on the j dimension):
-/// rational shadow / integer light shadow:  1 <= j <= 6
-/// dark shadow:                             3 <= j <= 6
-/// exact integer shadow:                    j = 1 \union  3 <= j <= 6
-/// holes/splinters:                         j = 2
-///
-/// darkShadow = false, isResultIntegerExact = nullptr are default values.
-// TODO(bondhugula): a slight modification to yield dark shadow version of FM
-// (tightened), which can prove the existence of a solution if there is one.
-void FlatAffineConstraints::FourierMotzkinEliminate(
-    unsigned pos, bool darkShadow, bool *isResultIntegerExact) {
-  LLVM_DEBUG(llvm::dbgs() << "FM input (eliminate pos " << pos << "):\n");
-  LLVM_DEBUG(dump());
-  assert(pos < getNumIds() && "invalid position");
-  assert(hasConsistentState());
-
-  // Check if this identifier can be eliminated through a substitution.
-  for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
-    if (atEq(r, pos) != 0) {
-      // Use Gaussian elimination here (since we have an equality).
-      LogicalResult ret = gaussianEliminateId(pos);
-      (void)ret;
-      assert(succeeded(ret) && "Gaussian elimination guaranteed to succeed");
-      LLVM_DEBUG(llvm::dbgs() << "FM output (through Gaussian elimination):\n");
-      LLVM_DEBUG(dump());
-      return;
-    }
-  }
-
-  // A fast linear time tightening.
-  GCDTightenInequalities();
-
-  // Check if the identifier appears at all in any of the inequalities.
-  unsigned r, e;
-  for (r = 0, e = getNumInequalities(); r < e; r++) {
-    if (atIneq(r, pos) != 0)
-      break;
-  }
-  if (r == getNumInequalities()) {
-    // If it doesn't appear, just remove the column and return.
-    // TODO(andydavis,bondhugula): refactor removeColumns to use it from here.
-    removeId(pos);
-    LLVM_DEBUG(llvm::dbgs() << "FM output:\n");
-    LLVM_DEBUG(dump());
-    return;
-  }
-
-  // Positions of constraints that are lower bounds on the variable.
-  SmallVector<unsigned, 4> lbIndices;
-  // Positions of constraints that are lower bounds on the variable.
-  SmallVector<unsigned, 4> ubIndices;
-  // Positions of constraints that do not involve the variable.
-  std::vector<unsigned> nbIndices;
-  nbIndices.reserve(getNumInequalities());
-
-  // Gather all lower bounds and upper bounds of the variable. Since the
-  // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower
-  // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1.
-  for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
-    if (atIneq(r, pos) == 0) {
-      // Id does not appear in bound.
-      nbIndices.push_back(r);
-    } else if (atIneq(r, pos) >= 1) {
-      // Lower bound.
-      lbIndices.push_back(r);
-    } else {
-      // Upper bound.
-      ubIndices.push_back(r);
-    }
-  }
-
-  // Set the number of dimensions, symbols in the resulting system.
-  const auto &dimsSymbols = getNewNumDimsSymbols(pos, *this);
-  unsigned newNumDims = dimsSymbols.first;
-  unsigned newNumSymbols = dimsSymbols.second;
-
-  SmallVector<Optional<Value *>, 8> newIds;
-  newIds.reserve(numIds - 1);
-  newIds.append(ids.begin(), ids.begin() + pos);
-  newIds.append(ids.begin() + pos + 1, ids.end());
-
-  /// Create the new system which has one identifier less.
-  FlatAffineConstraints newFac(
-      lbIndices.size() * ubIndices.size() + nbIndices.size(),
-      getNumEqualities(), getNumCols() - 1, newNumDims, newNumSymbols,
-      /*numLocals=*/getNumIds() - 1 - newNumDims - newNumSymbols, newIds);
-
-  assert(newFac.getIds().size() == newFac.getNumIds());
-
-  // This will be used to check if the elimination was integer exact.
-  unsigned lcmProducts = 1;
-
-  // Let x be the variable we are eliminating.
-  // For each lower bound, lb <= c_l*x, and each upper bound c_u*x <= ub, (note
-  // that c_l, c_u >= 1) we have:
-  // lb*lcm(c_l, c_u)/c_l <= lcm(c_l, c_u)*x <= ub*lcm(c_l, c_u)/c_u
-  // We thus generate a constraint:
-  // lcm(c_l, c_u)/c_l*lb <= lcm(c_l, c_u)/c_u*ub.
-  // Note if c_l = c_u = 1, all integer points captured by the resulting
-  // constraint correspond to integer points in the original system (i.e., they
-  // have integer pre-images). Hence, if the lcm's are all 1, the elimination is
-  // integer exact.
-  for (auto ubPos : ubIndices) {
-    for (auto lbPos : lbIndices) {
-      SmallVector<int64_t, 4> ineq;
-      ineq.reserve(newFac.getNumCols());
-      int64_t lbCoeff = atIneq(lbPos, pos);
-      // Note that in the comments above, ubCoeff is the negation of the
-      // coefficient in the canonical form as the view taken here is that of the
-      // term being moved to the other size of '>='.
-      int64_t ubCoeff = -atIneq(ubPos, pos);
-      // TODO(bondhugula): refactor this loop to avoid all branches inside.
-      for (unsigned l = 0, e = getNumCols(); l < e; l++) {
-        if (l == pos)
-          continue;
-        assert(lbCoeff >= 1 && ubCoeff >= 1 && "bounds wrongly identified");
-        int64_t lcm = mlir::lcm(lbCoeff, ubCoeff);
-        ineq.push_back(atIneq(ubPos, l) * (lcm / ubCoeff) +
-                       atIneq(lbPos, l) * (lcm / lbCoeff));
-        lcmProducts *= lcm;
-      }
-      if (darkShadow) {
-        // The dark shadow is a convex subset of the exact integer shadow. If
-        // there is a point here, it proves the existence of a solution.
-        ineq[ineq.size() - 1] += lbCoeff * ubCoeff - lbCoeff - ubCoeff + 1;
-      }
-      // TODO: we need to have a way to add inequalities in-place in
-      // FlatAffineConstraints instead of creating and copying over.
-      newFac.addInequality(ineq);
-    }
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "FM isResultIntegerExact: " << (lcmProducts == 1)
-                          << "\n");
-  if (lcmProducts == 1 && isResultIntegerExact)
-    *isResultIntegerExact = true;
-
-  // Copy over the constraints not involving this variable.
-  for (auto nbPos : nbIndices) {
-    SmallVector<int64_t, 4> ineq;
-    ineq.reserve(getNumCols() - 1);
-    for (unsigned l = 0, e = getNumCols(); l < e; l++) {
-      if (l == pos)
-        continue;
-      ineq.push_back(atIneq(nbPos, l));
-    }
-    newFac.addInequality(ineq);
-  }
-
-  assert(newFac.getNumConstraints() ==
-         lbIndices.size() * ubIndices.size() + nbIndices.size());
-
-  // Copy over the equalities.
-  for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
-    SmallVector<int64_t, 4> eq;
-    eq.reserve(newFac.getNumCols());
-    for (unsigned l = 0, e = getNumCols(); l < e; l++) {
-      if (l == pos)
-        continue;
-      eq.push_back(atEq(r, l));
-    }
-    newFac.addEquality(eq);
-  }
-
-  // GCD tightening and normalization allows detection of more trivially
-  // redundant constraints.
-  newFac.GCDTightenInequalities();
-  newFac.normalizeConstraintsByGCD();
-  newFac.removeTrivialRedundancy();
-  clearAndCopyFrom(newFac);
-  LLVM_DEBUG(llvm::dbgs() << "FM output:\n");
-  LLVM_DEBUG(dump());
-}
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "affine-structures"
-
-void FlatAffineConstraints::projectOut(unsigned pos, unsigned num) {
-  if (num == 0)
-    return;
-
-  // 'pos' can be at most getNumCols() - 2 if num > 0.
-  assert((getNumCols() < 2 || pos <= getNumCols() - 2) && "invalid position");
-  assert(pos + num < getNumCols() && "invalid range");
-
-  // Eliminate as many identifiers as possible using Gaussian elimination.
-  unsigned currentPos = pos;
-  unsigned numToEliminate = num;
-  unsigned numGaussianEliminated = 0;
-
-  while (currentPos < getNumIds()) {
-    unsigned curNumEliminated =
-        gaussianEliminateIds(currentPos, currentPos + numToEliminate);
-    ++currentPos;
-    numToEliminate -= curNumEliminated + 1;
-    numGaussianEliminated += curNumEliminated;
-  }
-
-  // Eliminate the remaining using Fourier-Motzkin.
-  for (unsigned i = 0; i < num - numGaussianEliminated; i++) {
-    unsigned numToEliminate = num - numGaussianEliminated - i;
-    FourierMotzkinEliminate(
-        getBestIdToEliminate(*this, pos, pos + numToEliminate));
-  }
-
-  // Fast/trivial simplifications.
-  GCDTightenInequalities();
-  // Normalize constraints after tightening since the latter impacts this, but
-  // not the other way round.
-  normalizeConstraintsByGCD();
-}
-
-void FlatAffineConstraints::projectOut(Value *id) {
-  unsigned pos;
-  bool ret = findId(*id, &pos);
-  assert(ret);
-  (void)ret;
-  FourierMotzkinEliminate(pos);
-}
-
-void FlatAffineConstraints::clearConstraints() {
-  equalities.clear();
-  inequalities.clear();
-}
-
-namespace {
-
-enum BoundCmpResult { Greater, Less, Equal, Unknown };
-
-/// Compares two affine bounds whose coefficients are provided in 'first' and
-/// 'second'. The last coefficient is the constant term.
-static BoundCmpResult compareBounds(ArrayRef<int64_t> a, ArrayRef<int64_t> b) {
-  assert(a.size() == b.size());
-
-  // For the bounds to be comparable, their corresponding identifier
-  // coefficients should be equal; the constant terms are then compared to
-  // determine less/greater/equal.
-
-  if (!std::equal(a.begin(), a.end() - 1, b.begin()))
-    return Unknown;
-
-  if (a.back() == b.back())
-    return Equal;
-
-  return a.back() < b.back() ? Less : Greater;
-}
-} // namespace
-
-// Computes the bounding box with respect to 'other' by finding the min of the
-// lower bounds and the max of the upper bounds along each of the dimensions.
-LogicalResult
-FlatAffineConstraints::unionBoundingBox(const FlatAffineConstraints &otherCst) {
-  assert(otherCst.getNumDimIds() == numDims && "dims mismatch");
-  assert(otherCst.getIds()
-             .slice(0, getNumDimIds())
-             .equals(getIds().slice(0, getNumDimIds())) &&
-         "dim values mismatch");
-  assert(otherCst.getNumLocalIds() == 0 && "local ids not supported here");
-  assert(getNumLocalIds() == 0 && "local ids not supported yet here");
-
-  Optional<FlatAffineConstraints> otherCopy;
-  if (!areIdsAligned(*this, otherCst)) {
-    otherCopy.emplace(FlatAffineConstraints(otherCst));
-    mergeAndAlignIds(/*offset=*/numDims, this, &otherCopy.getValue());
-  }
-
-  const auto &other = otherCopy ? *otherCopy : otherCst;
-
-  std::vector<SmallVector<int64_t, 8>> boundingLbs;
-  std::vector<SmallVector<int64_t, 8>> boundingUbs;
-  boundingLbs.reserve(2 * getNumDimIds());
-  boundingUbs.reserve(2 * getNumDimIds());
-
-  // To hold lower and upper bounds for each dimension.
-  SmallVector<int64_t, 4> lb, otherLb, ub, otherUb;
-  // To compute min of lower bounds and max of upper bounds for each dimension.
-  SmallVector<int64_t, 4> minLb(getNumSymbolIds() + 1);
-  SmallVector<int64_t, 4> maxUb(getNumSymbolIds() + 1);
-  // To compute final new lower and upper bounds for the union.
-  SmallVector<int64_t, 8> newLb(getNumCols()), newUb(getNumCols());
-
-  int64_t lbFloorDivisor, otherLbFloorDivisor;
-  for (unsigned d = 0, e = getNumDimIds(); d < e; ++d) {
-    auto extent = getConstantBoundOnDimSize(d, &lb, &lbFloorDivisor, &ub);
-    if (!extent.hasValue())
-      // TODO(bondhugula): symbolic extents when necessary.
-      // TODO(bondhugula): handle union if a dimension is unbounded.
-      return failure();
-
-    auto otherExtent = other.getConstantBoundOnDimSize(
-        d, &otherLb, &otherLbFloorDivisor, &otherUb);
-    if (!otherExtent.hasValue() || lbFloorDivisor != otherLbFloorDivisor)
-      // TODO(bondhugula): symbolic extents when necessary.
-      return failure();
-
-    assert(lbFloorDivisor > 0 && "divisor always expected to be positive");
-
-    auto res = compareBounds(lb, otherLb);
-    // Identify min.
-    if (res == BoundCmpResult::Less || res == BoundCmpResult::Equal) {
-      minLb = lb;
-      // Since the divisor is for a floordiv, we need to convert to ceildiv,
-      // i.e., i >= expr floordiv div <=> i >= (expr - div + 1) ceildiv div <=>
-      // div * i >= expr - div + 1.
-      minLb.back() -= lbFloorDivisor - 1;
-    } else if (res == BoundCmpResult::Greater) {
-      minLb = otherLb;
-      minLb.back() -= otherLbFloorDivisor - 1;
-    } else {
-      // Uncomparable - check for constant lower/upper bounds.
-      auto constLb = getConstantLowerBound(d);
-      auto constOtherLb = other.getConstantLowerBound(d);
-      if (!constLb.hasValue() || !constOtherLb.hasValue())
-        return failure();
-      std::fill(minLb.begin(), minLb.end(), 0);
-      minLb.back() = std::min(constLb.getValue(), constOtherLb.getValue());
-    }
-
-    // Do the same for ub's but max of upper bounds. Identify max.
-    auto uRes = compareBounds(ub, otherUb);
-    if (uRes == BoundCmpResult::Greater || uRes == BoundCmpResult::Equal) {
-      maxUb = ub;
-    } else if (uRes == BoundCmpResult::Less) {
-      maxUb = otherUb;
-    } else {
-      // Uncomparable - check for constant lower/upper bounds.
-      auto constUb = getConstantUpperBound(d);
-      auto constOtherUb = other.getConstantUpperBound(d);
-      if (!constUb.hasValue() || !constOtherUb.hasValue())
-        return failure();
-      std::fill(maxUb.begin(), maxUb.end(), 0);
-      maxUb.back() = std::max(constUb.getValue(), constOtherUb.getValue());
-    }
-
-    std::fill(newLb.begin(), newLb.end(), 0);
-    std::fill(newUb.begin(), newUb.end(), 0);
-
-    // The divisor for lb, ub, otherLb, otherUb at this point is lbDivisor,
-    // and so it's the divisor for newLb and newUb as well.
-    newLb[d] = lbFloorDivisor;
-    newUb[d] = -lbFloorDivisor;
-    // Copy over the symbolic part + constant term.
-    std::copy(minLb.begin(), minLb.end(), newLb.begin() + getNumDimIds());
-    std::transform(newLb.begin() + getNumDimIds(), newLb.end(),
-                   newLb.begin() + getNumDimIds(), std::negate<int64_t>());
-    std::copy(maxUb.begin(), maxUb.end(), newUb.begin() + getNumDimIds());
-
-    boundingLbs.push_back(newLb);
-    boundingUbs.push_back(newUb);
-  }
-
-  // Clear all constraints and add the lower/upper bounds for the bounding box.
-  clearConstraints();
-  for (unsigned d = 0, e = getNumDimIds(); d < e; ++d) {
-    addInequality(boundingLbs[d]);
-    addInequality(boundingUbs[d]);
-  }
-  // TODO(mlir-team): copy over pure symbolic constraints from this and 'other'
-  // over to the union (since the above are just the union along dimensions); we
-  // shouldn't be discarding any other constraints on the symbols.
-
-  return success();
-}
diff --git a/third_party/mlir/lib/Analysis/CMakeLists.txt b/third_party/mlir/lib/Analysis/CMakeLists.txt
deleted file mode 100644
index 96c2928b17f..00000000000
--- a/third_party/mlir/lib/Analysis/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-add_llvm_library(MLIRAnalysis STATIC
-  AffineAnalysis.cpp
-  AffineStructures.cpp
-  CallGraph.cpp
-  Dominance.cpp
-  InferTypeOpInterface.cpp
-  Liveness.cpp
-  LoopAnalysis.cpp
-  MemRefBoundCheck.cpp
-  NestedMatcher.cpp
-  OpStats.cpp
-  SliceAnalysis.cpp
-  TestMemRefDependenceCheck.cpp
-  TestParallelismDetection.cpp
-  Utils.cpp
-  VectorAnalysis.cpp
-  Verifier.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Analysis
-  )
-add_dependencies(MLIRAnalysis
-  MLIRAffineOps
-  MLIRCallOpInterfacesIncGen
-  MLIRTypeInferOpInterfaceIncGen
-  MLIRLoopOps
-  MLIRVectorOps
-  )
-target_link_libraries(MLIRAnalysis MLIRAffineOps MLIRLoopOps MLIRVectorOps)
diff --git a/third_party/mlir/lib/Analysis/CallGraph.cpp b/third_party/mlir/lib/Analysis/CallGraph.cpp
deleted file mode 100644
index 93017ca3b57..00000000000
--- a/third_party/mlir/lib/Analysis/CallGraph.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-//===- CallGraph.cpp - CallGraph analysis for MLIR ------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains interfaces and analyses for defining a nested callgraph.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/CallGraph.h"
-#include "mlir/Analysis/CallInterfaces.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/SymbolTable.h"
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// CallInterfaces
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/CallInterfaces.cpp.inc"
-
-//===----------------------------------------------------------------------===//
-// CallGraphNode
-//===----------------------------------------------------------------------===//
-
-/// Returns if this node refers to the indirect/external node.
-bool CallGraphNode::isExternal() const { return !callableRegion; }
-
-/// Return the callable region this node represents. This can only be called
-/// on non-external nodes.
-Region *CallGraphNode::getCallableRegion() const {
-  assert(!isExternal() && "the external node has no callable region");
-  return callableRegion;
-}
-
-/// Adds an reference edge to the given node. This is only valid on the
-/// external node.
-void CallGraphNode::addAbstractEdge(CallGraphNode *node) {
-  assert(isExternal() && "abstract edges are only valid on external nodes");
-  addEdge(node, Edge::Kind::Abstract);
-}
-
-/// Add an outgoing call edge from this node.
-void CallGraphNode::addCallEdge(CallGraphNode *node) {
-  addEdge(node, Edge::Kind::Call);
-}
-
-/// Adds a reference edge to the given child node.
-void CallGraphNode::addChildEdge(CallGraphNode *child) {
-  addEdge(child, Edge::Kind::Child);
-}
-
-/// Returns true if this node has any child edges.
-bool CallGraphNode::hasChildren() const {
-  return llvm::any_of(edges, [](const Edge &edge) { return edge.isChild(); });
-}
-
-/// Add an edge to 'node' with the given kind.
-void CallGraphNode::addEdge(CallGraphNode *node, Edge::Kind kind) {
-  edges.insert({node, kind});
-}
-
-//===----------------------------------------------------------------------===//
-// CallGraph
-//===----------------------------------------------------------------------===//
-
-/// Recursively compute the callgraph edges for the given operation. Computed
-/// edges are placed into the given callgraph object.
-static void computeCallGraph(Operation *op, CallGraph &cg,
-                             CallGraphNode *parentNode);
-
-/// Compute the set of callgraph nodes that are created by regions nested within
-/// 'op'.
-static void computeCallables(Operation *op, CallGraph &cg,
-                             CallGraphNode *parentNode) {
-  if (op->getNumRegions() == 0)
-    return;
-  if (auto callableOp = dyn_cast<CallableOpInterface>(op)) {
-    SmallVector<Region *, 1> callables;
-    callableOp.getCallableRegions(callables);
-    for (auto *callableRegion : callables)
-      cg.getOrAddNode(callableRegion, parentNode);
-  }
-}
-
-/// Recursively compute the callgraph edges within the given region. Computed
-/// edges are placed into the given callgraph object.
-static void computeCallGraph(Region &region, CallGraph &cg,
-                             CallGraphNode *parentNode) {
-  // Iterate over the nested operations twice:
-  /// One to fully create nodes in the for each callable region of a nested
-  /// operation;
-  for (auto &block : region)
-    for (auto &nested : block)
-      computeCallables(&nested, cg, parentNode);
-
-  /// And another to recursively compute the callgraph.
-  for (auto &block : region)
-    for (auto &nested : block)
-      computeCallGraph(&nested, cg, parentNode);
-}
-
-/// Recursively compute the callgraph edges for the given operation. Computed
-/// edges are placed into the given callgraph object.
-static void computeCallGraph(Operation *op, CallGraph &cg,
-                             CallGraphNode *parentNode) {
-  // Compute the callgraph nodes and edges for each of the nested operations.
-  auto isCallable = isa<CallableOpInterface>(op);
-  for (auto &region : op->getRegions()) {
-    // Check to see if this region is a callable node, if so this is the parent
-    // node of the nested region.
-    CallGraphNode *nestedParentNode;
-    if (!isCallable || !(nestedParentNode = cg.lookupNode(&region)))
-      nestedParentNode = parentNode;
-    computeCallGraph(region, cg, nestedParentNode);
-  }
-
-  // If there is no parent node, we ignore this operation. Even if this
-  // operation was a call, there would be no callgraph node to attribute it to.
-  if (!parentNode)
-    return;
-
-  // If this is a call operation, resolve the callee.
-  if (auto call = dyn_cast<CallOpInterface>(op))
-    parentNode->addCallEdge(
-        cg.resolveCallable(call.getCallableForCallee(), op));
-}
-
-CallGraph::CallGraph(Operation *op) : externalNode(/*callableRegion=*/nullptr) {
-  computeCallGraph(op, *this, /*parentNode=*/nullptr);
-}
-
-/// Get or add a call graph node for the given region.
-CallGraphNode *CallGraph::getOrAddNode(Region *region,
-                                       CallGraphNode *parentNode) {
-  assert(region && isa<CallableOpInterface>(region->getParentOp()) &&
-         "expected parent operation to be callable");
-  std::unique_ptr<CallGraphNode> &node = nodes[region];
-  if (!node) {
-    node.reset(new CallGraphNode(region));
-
-    // Add this node to the given parent node if necessary.
-    if (parentNode)
-      parentNode->addChildEdge(node.get());
-    else
-      // Otherwise, connect all callable nodes to the external node, this allows
-      // for conservatively including all callable nodes within the graph.
-      // FIXME(riverriddle) This isn't correct, this is only necessary for
-      // callable nodes that *could* be called from external sources. This
-      // requires extending the interface for callables to check if they may be
-      // referenced externally.
-      externalNode.addAbstractEdge(node.get());
-  }
-  return node.get();
-}
-
-/// Lookup a call graph node for the given region, or nullptr if none is
-/// registered.
-CallGraphNode *CallGraph::lookupNode(Region *region) const {
-  auto it = nodes.find(region);
-  return it == nodes.end() ? nullptr : it->second.get();
-}
-
-/// Resolve the callable for given callee to a node in the callgraph, or the
-/// external node if a valid node was not resolved.
-CallGraphNode *CallGraph::resolveCallable(CallInterfaceCallable callable,
-                                          Operation *from) const {
-  // Get the callee operation from the callable.
-  Operation *callee;
-  if (auto symbolRef = callable.dyn_cast<SymbolRefAttr>())
-    // TODO(riverriddle) Support nested references.
-    callee = SymbolTable::lookupNearestSymbolFrom(from,
-                                                  symbolRef.getRootReference());
-  else
-    callee = callable.get<Value *>()->getDefiningOp();
-
-  // If the callee is non-null and is a valid callable object, try to get the
-  // called region from it.
-  if (callee && callee->getNumRegions()) {
-    if (auto callableOp = dyn_cast_or_null<CallableOpInterface>(callee)) {
-      if (auto *node = lookupNode(callableOp.getCallableRegion(callable)))
-        return node;
-    }
-  }
-
-  // If we don't have a valid direct region, this is an external call.
-  return getExternalNode();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing
-
-/// Dump the graph in a human readable format.
-void CallGraph::dump() const { print(llvm::errs()); }
-void CallGraph::print(raw_ostream &os) const {
-  os << "// ---- CallGraph ----\n";
-
-  // Functor used to output the name for the given node.
-  auto emitNodeName = [&](const CallGraphNode *node) {
-    if (node->isExternal()) {
-      os << "<External-Node>";
-      return;
-    }
-
-    auto *callableRegion = node->getCallableRegion();
-    auto *parentOp = callableRegion->getParentOp();
-    os << "'" << callableRegion->getParentOp()->getName() << "' - Region #"
-       << callableRegion->getRegionNumber();
-    if (auto attrs = parentOp->getAttrList().getDictionary())
-      os << " : " << attrs;
-  };
-
-  for (auto &nodeIt : nodes) {
-    const CallGraphNode *node = nodeIt.second.get();
-
-    // Dump the header for this node.
-    os << "// - Node : ";
-    emitNodeName(node);
-    os << "\n";
-
-    // Emit each of the edges.
-    for (auto &edge : *node) {
-      os << "// -- ";
-      if (edge.isCall())
-        os << "Call";
-      else if (edge.isChild())
-        os << "Child";
-
-      os << "-Edge : ";
-      emitNodeName(edge.getTarget());
-      os << "\n";
-    }
-    os << "//\n";
-  }
-
-  os << "// -- SCCs --\n";
-
-  for (auto &scc : make_range(llvm::scc_begin(this), llvm::scc_end(this))) {
-    os << "// - SCC : \n";
-    for (auto &node : scc) {
-      os << "// -- Node :";
-      emitNodeName(node);
-      os << "\n";
-    }
-    os << "\n";
-  }
-
-  os << "// -------------------\n";
-}
diff --git a/third_party/mlir/lib/Analysis/Dominance.cpp b/third_party/mlir/lib/Analysis/Dominance.cpp
deleted file mode 100644
index c422578320f..00000000000
--- a/third_party/mlir/lib/Analysis/Dominance.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- Dominance.cpp - Dominator analysis for CFGs ------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Implementation of dominance related classes and instantiations of extern
-// templates.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Dominance.h"
-#include "mlir/IR/Operation.h"
-#include "llvm/Support/GenericDomTreeConstruction.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-template class llvm::DominatorTreeBase<Block, /*IsPostDom=*/false>;
-template class llvm::DominatorTreeBase<Block, /*IsPostDom=*/true>;
-template class llvm::DomTreeNodeBase<Block>;
-
-//===----------------------------------------------------------------------===//
-// DominanceInfoBase
-//===----------------------------------------------------------------------===//
-
-template <bool IsPostDom>
-void DominanceInfoBase<IsPostDom>::recalculate(Operation *op) {
-  dominanceInfos.clear();
-
-  /// Build the dominance for each of the operation regions.
-  op->walk([&](Operation *op) {
-    for (auto &region : op->getRegions()) {
-      // Don't compute dominance if the region is empty.
-      if (region.empty())
-        continue;
-      auto opDominance = std::make_unique<base>();
-      opDominance->recalculate(region);
-      dominanceInfos.try_emplace(&region, std::move(opDominance));
-    }
-  });
-}
-
-/// Return true if the specified block A properly dominates block B.
-template <bool IsPostDom>
-bool DominanceInfoBase<IsPostDom>::properlyDominates(Block *a, Block *b) {
-  // A block dominates itself but does not properly dominate itself.
-  if (a == b)
-    return false;
-
-  // If either a or b are null, then conservatively return false.
-  if (!a || !b)
-    return false;
-
-  // If both blocks are not in the same region, 'a' properly dominates 'b' if
-  // 'b' is defined in an operation region that (recursively) ends up being
-  // dominated by 'a'. Walk up the list of containers enclosing B.
-  auto *regionA = a->getParent(), *regionB = b->getParent();
-  if (regionA != regionB) {
-    Operation *bAncestor;
-    do {
-      bAncestor = regionB->getParentOp();
-      // If 'bAncestor' is the top level region, then 'a' is a block that post
-      // dominates 'b'.
-      if (!bAncestor || !bAncestor->getBlock())
-        return IsPostDom;
-
-      regionB = bAncestor->getBlock()->getParent();
-    } while (regionA != regionB);
-
-    // Check to see if the ancestor of 'b' is the same block as 'a'.
-    b = bAncestor->getBlock();
-    if (a == b)
-      return true;
-  }
-
-  // Otherwise, use the standard dominance functionality.
-
-  // If we don't have a dominance information for this region, assume that b is
-  // dominated by anything.
-  auto baseInfoIt = dominanceInfos.find(regionA);
-  if (baseInfoIt == dominanceInfos.end())
-    return true;
-  return baseInfoIt->second->properlyDominates(a, b);
-}
-
-template class mlir::detail::DominanceInfoBase</*IsPostDom=*/true>;
-template class mlir::detail::DominanceInfoBase</*IsPostDom=*/false>;
-
-//===----------------------------------------------------------------------===//
-// DominanceInfo
-//===----------------------------------------------------------------------===//
-
-/// Return true if operation A properly dominates operation B.
-bool DominanceInfo::properlyDominates(Operation *a, Operation *b) {
-  auto *aBlock = a->getBlock(), *bBlock = b->getBlock();
-
-  // If a or b are not within a block, then a does not dominate b.
-  if (!aBlock || !bBlock)
-    return false;
-
-  // If the blocks are the same, then check if b is before a in the block.
-  if (aBlock == bBlock)
-    return a->isBeforeInBlock(b);
-
-  // Traverse up b's hierarchy to check if b's block is contained in a's.
-  if (auto *bAncestor = aBlock->findAncestorOpInBlock(*b)) {
-    // Since we already know that aBlock != bBlock, here bAncestor != b.
-    // a and bAncestor are in the same block; check if 'a' dominates
-    // bAncestor.
-    return dominates(a, bAncestor);
-  }
-
-  // If the blocks are different, check if a's block dominates b's.
-  return properlyDominates(aBlock, bBlock);
-}
-
-/// Return true if value A properly dominates operation B.
-bool DominanceInfo::properlyDominates(Value *a, Operation *b) {
-  if (auto *aOp = a->getDefiningOp()) {
-    // The values defined by an operation do *not* dominate any nested
-    // operations.
-    if (aOp->getParentRegion() != b->getParentRegion() && aOp->isAncestor(b))
-      return false;
-    return properlyDominates(aOp, b);
-  }
-
-  // block arguments properly dominate all operations in their own block, so
-  // we use a dominates check here, not a properlyDominates check.
-  return dominates(cast<BlockArgument>(a)->getOwner(), b->getBlock());
-}
-
-DominanceInfoNode *DominanceInfo::getNode(Block *a) {
-  auto *region = a->getParent();
-  assert(dominanceInfos.count(region) != 0);
-  return dominanceInfos[region]->getNode(a);
-}
-
-void DominanceInfo::updateDFSNumbers() {
-  for (auto &iter : dominanceInfos)
-    iter.second->updateDFSNumbers();
-}
-
-//===----------------------------------------------------------------------===//
-// PostDominanceInfo
-//===----------------------------------------------------------------------===//
-
-/// Returns true if statement 'a' properly postdominates statement b.
-bool PostDominanceInfo::properlyPostDominates(Operation *a, Operation *b) {
-  auto *aBlock = a->getBlock(), *bBlock = b->getBlock();
-
-  // If a or b are not within a block, then a does not post dominate b.
-  if (!aBlock || !bBlock)
-    return false;
-
-  // If the blocks are the same, check if b is before a in the block.
-  if (aBlock == bBlock)
-    return b->isBeforeInBlock(a);
-
-  // Traverse up b's hierarchy to check if b's block is contained in a's.
-  if (auto *bAncestor = a->getBlock()->findAncestorOpInBlock(*b))
-    // Since we already know that aBlock != bBlock, here bAncestor != b.
-    // a and bAncestor are in the same block; check if 'a' postdominates
-    // bAncestor.
-    return postDominates(a, bAncestor);
-
-  // If the blocks are different, check if a's block post dominates b's.
-  return properlyDominates(aBlock, bBlock);
-}
diff --git a/third_party/mlir/lib/Analysis/InferTypeOpInterface.cpp b/third_party/mlir/lib/Analysis/InferTypeOpInterface.cpp
deleted file mode 100644
index cbbd44681ba..00000000000
--- a/third_party/mlir/lib/Analysis/InferTypeOpInterface.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- InferTypeOpInterface.cpp - Infer Type Interfaces ---------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains the definitions of the infer op interfaces defined in
-// `InferTypeOpInterface.td`.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/InferTypeOpInterface.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/Types.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace mlir {
-#include "mlir/Analysis/InferTypeOpInterface.cpp.inc"
-} // namespace mlir
diff --git a/third_party/mlir/lib/Analysis/Liveness.cpp b/third_party/mlir/lib/Analysis/Liveness.cpp
deleted file mode 100644
index 6aaec4cc719..00000000000
--- a/third_party/mlir/lib/Analysis/Liveness.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-//===- Liveness.cpp - Liveness analysis for MLIR --------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Implementation of the liveness analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Liveness.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/Region.h"
-#include "mlir/IR/Value.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-/// Builds and holds block information during the construction phase.
-struct BlockInfoBuilder {
-  using ValueSetT = Liveness::ValueSetT;
-
-  /// Constructs an empty block builder.
-  BlockInfoBuilder() : block(nullptr) {}
-
-  /// Fills the block builder with initial liveness information.
-  BlockInfoBuilder(Block *block) : block(block) {
-    // Mark all block arguments (phis) as defined.
-    for (BlockArgument *argument : block->getArguments())
-      defValues.insert(argument);
-
-    // Check all result values and whether their uses
-    // are inside this block or not (see outValues).
-    for (Operation &operation : *block)
-      for (Value *result : operation.getResults()) {
-        defValues.insert(result);
-
-        // Check whether this value will be in the outValues
-        // set (its uses escape this block). Due to the SSA
-        // properties of the program, the uses must occur after
-        // the definition. Therefore, we do not have to check
-        // additional conditions to detect an escaping value.
-        for (OpOperand &use : result->getUses())
-          if (use.getOwner()->getBlock() != block) {
-            outValues.insert(result);
-            break;
-          }
-      }
-
-    // Check all operations for used operands.
-    for (Operation &operation : block->getOperations())
-      for (Value *operand : operation.getOperands()) {
-        // If the operand is already defined in the scope of this
-        // block, we can skip the value in the use set.
-        if (!defValues.count(operand))
-          useValues.insert(operand);
-      }
-  }
-
-  /// Updates live-in information of the current block.
-  /// To do so it uses the default liveness-computation formula:
-  /// newIn = use union out \ def.
-  /// The methods returns true, if the set has changed (newIn != in),
-  /// false otherwise.
-  bool updateLiveIn() {
-    ValueSetT newIn = useValues;
-    llvm::set_union(newIn, outValues);
-    llvm::set_subtract(newIn, defValues);
-
-    // It is sufficient to check the set sizes (instead of their contents)
-    // since the live-in set can only grow monotonically during all update
-    // operations.
-    if (newIn.size() == inValues.size())
-      return false;
-
-    inValues = newIn;
-    return true;
-  }
-
-  /// Updates live-out information of the current block.
-  /// It iterates over all successors and unifies their live-in
-  /// values with the current live-out values.
-  template <typename SourceT> void updateLiveOut(SourceT &source) {
-    for (Block *succ : block->getSuccessors()) {
-      BlockInfoBuilder &builder = source[succ];
-      llvm::set_union(outValues, builder.inValues);
-    }
-  }
-
-  /// The current block.
-  Block *block;
-
-  /// The set of all live in values.
-  ValueSetT inValues;
-
-  /// The set of all live out values.
-  ValueSetT outValues;
-
-  /// The set of all defined values.
-  ValueSetT defValues;
-
-  /// The set of all used values.
-  ValueSetT useValues;
-};
-
-/// Builds the internal liveness block mapping.
-static void buildBlockMapping(MutableArrayRef<Region> regions,
-                              DenseMap<Block *, BlockInfoBuilder> &builders) {
-  llvm::SetVector<Block *> toProcess;
-
-  // Initialize all block structures
-  for (Region &region : regions)
-    for (Block &block : region) {
-      BlockInfoBuilder &builder =
-          builders.try_emplace(&block, &block).first->second;
-
-      if (builder.updateLiveIn())
-        toProcess.insert(block.pred_begin(), block.pred_end());
-    }
-
-  // Propagate the in and out-value sets (fixpoint iteration)
-  while (!toProcess.empty()) {
-    Block *current = toProcess.pop_back_val();
-    BlockInfoBuilder &builder = builders[current];
-
-    // Update the current out values.
-    builder.updateLiveOut(builders);
-
-    // Compute (potentially) updated live in values.
-    if (builder.updateLiveIn())
-      toProcess.insert(current->pred_begin(), current->pred_end());
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Liveness
-//===----------------------------------------------------------------------===//
-
-/// Creates a new Liveness analysis that computes liveness
-/// information for all associated regions.
-Liveness::Liveness(Operation *op) : operation(op) { build(op->getRegions()); }
-
-/// Initializes the internal mappings.
-void Liveness::build(MutableArrayRef<Region> regions) {
-
-  // Build internal block mapping.
-  DenseMap<Block *, BlockInfoBuilder> builders;
-  buildBlockMapping(regions, builders);
-
-  // Store internal block data.
-  for (auto &entry : builders) {
-    BlockInfoBuilder &builder = entry.second;
-    LivenessBlockInfo &info = blockMapping[entry.first];
-
-    info.block = builder.block;
-    info.inValues = std::move(builder.inValues);
-    info.outValues = std::move(builder.outValues);
-  }
-}
-
-/// Gets liveness info (if any) for the given value.
-Liveness::OperationListT Liveness::resolveLiveness(Value *value) const {
-  OperationListT result;
-  SmallPtrSet<Block *, 32> visited;
-  SmallVector<Block *, 8> toProcess;
-
-  // Start with the defining block
-  Block *currentBlock;
-  if (Operation *defOp = value->getDefiningOp())
-    currentBlock = defOp->getBlock();
-  else
-    currentBlock = cast<BlockArgument>(value)->getOwner();
-  toProcess.push_back(currentBlock);
-  visited.insert(currentBlock);
-
-  // Start with all associated blocks
-  for (OpOperand &use : value->getUses()) {
-    Block *useBlock = use.getOwner()->getBlock();
-    if (visited.insert(useBlock).second)
-      toProcess.push_back(useBlock);
-  }
-
-  while (!toProcess.empty()) {
-    // Get block and block liveness information.
-    Block *block = toProcess.back();
-    toProcess.pop_back();
-    const LivenessBlockInfo *blockInfo = getLiveness(block);
-
-    // Note that start and end will be in the same block.
-    Operation *start = blockInfo->getStartOperation(value);
-    Operation *end = blockInfo->getEndOperation(value, start);
-
-    result.push_back(start);
-    while (start != end) {
-      start = start->getNextNode();
-      result.push_back(start);
-    }
-
-    for (Block *successor : block->getSuccessors()) {
-      if (getLiveness(successor)->isLiveIn(value) &&
-          visited.insert(successor).second)
-        toProcess.push_back(successor);
-    }
-  }
-
-  return result;
-}
-
-/// Gets liveness info (if any) for the block.
-const LivenessBlockInfo *Liveness::getLiveness(Block *block) const {
-  auto it = blockMapping.find(block);
-  return it == blockMapping.end() ? nullptr : &it->second;
-}
-
-/// Returns a reference to a set containing live-in values.
-const Liveness::ValueSetT &Liveness::getLiveIn(Block *block) const {
-  return getLiveness(block)->in();
-}
-
-/// Returns a reference to a set containing live-out values.
-const Liveness::ValueSetT &Liveness::getLiveOut(Block *block) const {
-  return getLiveness(block)->out();
-}
-
-/// Returns true if the given operation represent the last use of the
-/// given value.
-bool Liveness::isLastUse(Value *value, Operation *operation) const {
-  Block *block = operation->getBlock();
-  const LivenessBlockInfo *blockInfo = getLiveness(block);
-
-  // The given value escapes the associated block.
-  if (blockInfo->isLiveOut(value))
-    return false;
-
-  Operation *endOperation = blockInfo->getEndOperation(value, operation);
-  // If the operation is a real user of `value` the first check is sufficient.
-  // If not, we will have to test whether the end operation is executed before
-  // the given operation in the block.
-  return endOperation == operation || endOperation->isBeforeInBlock(operation);
-}
-
-/// Dumps the liveness information in a human readable format.
-void Liveness::dump() const { print(llvm::errs()); }
-
-/// Dumps the liveness information to the given stream.
-void Liveness::print(raw_ostream &os) const {
-  os << "// ---- Liveness -----\n";
-
-  // Builds unique block/value mappings for testing purposes.
-  DenseMap<Block *, size_t> blockIds;
-  DenseMap<Operation *, size_t> operationIds;
-  DenseMap<Value *, size_t> valueIds;
-  for (Region &region : operation->getRegions())
-    for (Block &block : region) {
-      blockIds.insert({&block, blockIds.size()});
-      for (BlockArgument *argument : block.getArguments())
-        valueIds.insert({argument, valueIds.size()});
-      for (Operation &operation : block) {
-        operationIds.insert({&operation, operationIds.size()});
-        for (Value *result : operation.getResults())
-          valueIds.insert({result, valueIds.size()});
-      }
-    }
-
-  // Local printing helpers
-  auto printValueRef = [&](Value *value) {
-    if (Operation *defOp = value->getDefiningOp())
-      os << "val_" << defOp->getName();
-    else {
-      auto blockArg = cast<BlockArgument>(value);
-      os << "arg" << blockArg->getArgNumber() << "@"
-         << blockIds[blockArg->getOwner()];
-    }
-    os << " ";
-  };
-
-  auto printValueRefs = [&](const ValueSetT &values) {
-    std::vector<Value *> orderedValues(values.begin(), values.end());
-    std::sort(orderedValues.begin(), orderedValues.end(),
-              [&](Value *left, Value *right) {
-                return valueIds[left] < valueIds[right];
-              });
-    for (Value *value : orderedValues)
-      printValueRef(value);
-  };
-
-  // Dump information about in and out values.
-  for (Region &region : operation->getRegions())
-    for (Block &block : region) {
-      os << "// - Block: " << blockIds[&block] << "\n";
-      auto liveness = getLiveness(&block);
-      os << "// --- LiveIn: ";
-      printValueRefs(liveness->inValues);
-      os << "\n// --- LiveOut: ";
-      printValueRefs(liveness->outValues);
-      os << "\n";
-
-      // Print liveness intervals.
-      os << "// --- BeginLiveness";
-      for (Operation &op : block) {
-        if (op.getNumResults() < 1)
-          continue;
-        os << "\n";
-        for (Value *result : op.getResults()) {
-          os << "// ";
-          printValueRef(result);
-          os << ":";
-          auto liveOperations = resolveLiveness(result);
-          std::sort(liveOperations.begin(), liveOperations.end(),
-                    [&](Operation *left, Operation *right) {
-                      return operationIds[left] < operationIds[right];
-                    });
-          for (Operation *operation : liveOperations) {
-            os << "\n//     ";
-            operation->print(os);
-          }
-        }
-      }
-      os << "\n// --- EndLiveness\n";
-    }
-  os << "// -------------------\n";
-}
-
-//===----------------------------------------------------------------------===//
-// LivenessBlockInfo
-//===----------------------------------------------------------------------===//
-
-/// Returns true if the given value is in the live-in set.
-bool LivenessBlockInfo::isLiveIn(Value *value) const {
-  return inValues.count(value);
-}
-
-/// Returns true if the given value is in the live-out set.
-bool LivenessBlockInfo::isLiveOut(Value *value) const {
-  return outValues.count(value);
-}
-
-/// Gets the start operation for the given value
-/// (must be referenced in this block).
-Operation *LivenessBlockInfo::getStartOperation(Value *value) const {
-  Operation *definingOp = value->getDefiningOp();
-  // The given value is either live-in or is defined
-  // in the scope of this block.
-  if (isLiveIn(value) || !definingOp)
-    return &block->front();
-  return definingOp;
-}
-
-/// Gets the end operation for the given value using the start operation
-/// provided (must be referenced in this block).
-Operation *LivenessBlockInfo::getEndOperation(Value *value,
-                                              Operation *startOperation) const {
-  // The given value is either dying in this block or live-out.
-  if (isLiveOut(value))
-    return &block->back();
-
-  // Resolve the last operation (must exist by definition).
-  Operation *endOperation = startOperation;
-  for (OpOperand &use : value->getUses()) {
-    Operation *useOperation = use.getOwner();
-    // Check whether the use is in our block and after
-    // the current end operation.
-    if (useOperation->getBlock() == block &&
-        endOperation->isBeforeInBlock(useOperation))
-      endOperation = useOperation;
-  }
-  return endOperation;
-}
diff --git a/third_party/mlir/lib/Analysis/LoopAnalysis.cpp b/third_party/mlir/lib/Analysis/LoopAnalysis.cpp
deleted file mode 100644
index 1d88d09d269..00000000000
--- a/third_party/mlir/lib/Analysis/LoopAnalysis.cpp
+++ /dev/null
@@ -1,398 +0,0 @@
-//===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements miscellaneous loop analysis routines.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/LoopAnalysis.h"
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Support/MathExtras.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallString.h"
-#include <type_traits>
-
-using namespace mlir;
-
-/// Returns the trip count of the loop as an affine expression if the latter is
-/// expressible as an affine expression, and nullptr otherwise. The trip count
-/// expression is simplified before returning. This method only utilizes map
-/// composition to construct lower and upper bounds before computing the trip
-/// count expressions.
-// TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
-// pure analysis method relying on FlatAffineConstraints; the latter will also
-// be more powerful (since both inequalities and equalities will be considered).
-void mlir::buildTripCountMapAndOperands(
-    AffineForOp forOp, AffineMap *tripCountMap,
-    SmallVectorImpl<Value *> *tripCountOperands) {
-  int64_t loopSpan;
-
-  int64_t step = forOp.getStep();
-  OpBuilder b(forOp.getOperation());
-
-  if (forOp.hasConstantBounds()) {
-    int64_t lb = forOp.getConstantLowerBound();
-    int64_t ub = forOp.getConstantUpperBound();
-    loopSpan = ub - lb;
-    if (loopSpan < 0)
-      loopSpan = 0;
-    *tripCountMap = b.getConstantAffineMap(ceilDiv(loopSpan, step));
-    tripCountOperands->clear();
-    return;
-  }
-  auto lbMap = forOp.getLowerBoundMap();
-  auto ubMap = forOp.getUpperBoundMap();
-  if (lbMap.getNumResults() != 1) {
-    *tripCountMap = AffineMap();
-    return;
-  }
-  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
-  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
-
-  // Difference of each upper bound expression from the single lower bound
-  // expression (divided by the step) provides the expressions for the trip
-  // count map.
-  AffineValueMap ubValueMap(ubMap, ubOperands);
-
-  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
-                                         lbMap.getResult(0));
-  auto lbMapSplat =
-      AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(), lbSplatExpr);
-  AffineValueMap lbSplatValueMap(lbMapSplat, lbOperands);
-
-  AffineValueMap tripCountValueMap;
-  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
-  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
-    tripCountValueMap.setResult(i,
-                                tripCountValueMap.getResult(i).ceilDiv(step));
-
-  *tripCountMap = tripCountValueMap.getAffineMap();
-  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
-                            tripCountValueMap.getOperands().end());
-}
-
-/// Returns the trip count of the loop if it's a constant, None otherwise. This
-/// method uses affine expression analysis (in turn using getTripCount) and is
-/// able to determine constant trip count in non-trivial cases.
-// FIXME(mlir-team): this is really relying on buildTripCountMapAndOperands;
-// being an analysis utility, it shouldn't. Replace with a version that just
-// works with analysis structures (FlatAffineConstraints) and thus doesn't
-// update the IR.
-llvm::Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) {
-  SmallVector<Value *, 4> operands;
-  AffineMap map;
-  buildTripCountMapAndOperands(forOp, &map, &operands);
-
-  if (!map)
-    return None;
-
-  // Take the min if all trip counts are constant.
-  Optional<uint64_t> tripCount;
-  for (auto resultExpr : map.getResults()) {
-    if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
-      if (tripCount.hasValue())
-        tripCount = std::min(tripCount.getValue(),
-                             static_cast<uint64_t>(constExpr.getValue()));
-      else
-        tripCount = constExpr.getValue();
-    } else
-      return None;
-  }
-  return tripCount;
-}
-
-/// Returns the greatest known integral divisor of the trip count. Affine
-/// expression analysis is used (indirectly through getTripCount), and
-/// this method is thus able to determine non-trivial divisors.
-uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
-  SmallVector<Value *, 4> operands;
-  AffineMap map;
-  buildTripCountMapAndOperands(forOp, &map, &operands);
-
-  if (!map)
-    return 1;
-
-  // The largest divisor of the trip count is the GCD of the individual largest
-  // divisors.
-  assert(map.getNumResults() >= 1 && "expected one or more results");
-  Optional<uint64_t> gcd;
-  for (auto resultExpr : map.getResults()) {
-    uint64_t thisGcd;
-    if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
-      uint64_t tripCount = constExpr.getValue();
-      // 0 iteration loops (greatest divisor is 2^64 - 1).
-      if (tripCount == 0)
-        thisGcd = std::numeric_limits<uint64_t>::max();
-      else
-        // The greatest divisor is the trip count.
-        thisGcd = tripCount;
-    } else {
-      // Trip count is not a known constant; return its largest known divisor.
-      thisGcd = resultExpr.getLargestKnownDivisor();
-    }
-    if (gcd.hasValue())
-      gcd = llvm::GreatestCommonDivisor64(gcd.getValue(), thisGcd);
-    else
-      gcd = thisGcd;
-  }
-  assert(gcd.hasValue() && "value expected per above logic");
-  return gcd.getValue();
-}
-
-/// Given an induction variable `iv` of type AffineForOp and an access `index`
-/// of type index, returns `true` if `index` is independent of `iv` and
-/// false otherwise. The determination supports composition with at most one
-/// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
-/// the composition of AffineApplyOp needs to be canonicalized by construction
-/// to avoid writing code that composes arbitrary numbers of AffineApplyOps
-/// everywhere. To achieve this, at the very least, the compose-affine-apply
-/// pass must have been run.
-///
-/// Prerequisites:
-///   1. `iv` and `index` of the proper type;
-///   2. at most one reachable AffineApplyOp from index;
-///
-/// Returns false in cases with more than one AffineApplyOp, this is
-/// conservative.
-static bool isAccessIndexInvariant(Value *iv, Value *index) {
-  assert(isForInductionVar(iv) && "iv must be a AffineForOp");
-  assert(index->getType().isa<IndexType>() && "index must be of IndexType");
-  SmallVector<Operation *, 4> affineApplyOps;
-  getReachableAffineApplyOps({index}, affineApplyOps);
-
-  if (affineApplyOps.empty()) {
-    // Pointer equality test because of Value pointer semantics.
-    return index != iv;
-  }
-
-  if (affineApplyOps.size() > 1) {
-    affineApplyOps[0]->emitRemark(
-        "CompositionAffineMapsPass must have been run: there should be at most "
-        "one AffineApplyOp, returning false conservatively.");
-    return false;
-  }
-
-  auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
-  // We need yet another level of indirection because the `dim` index of the
-  // access may not correspond to the `dim` index of composeOp.
-  return !(AffineValueMap(composeOp).isFunctionOf(0, iv));
-}
-
-llvm::DenseSet<Value *>
-mlir::getInvariantAccesses(Value *iv, llvm::ArrayRef<Value *> indices) {
-  llvm::DenseSet<Value *> res;
-  for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) {
-    auto *val = indices[idx];
-    if (isAccessIndexInvariant(iv, val)) {
-      res.insert(val);
-    }
-  }
-  return res;
-}
-
-/// Given:
-///   1. an induction variable `iv` of type AffineForOp;
-///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
-/// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
-/// is defined as either invariant or varying only along a unique MemRef dim.
-/// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
-/// convey the memRef access is invariant along `iv`).
-///
-/// Prerequisites:
-///   1. `memRefDim` ~= nullptr;
-///   2. `iv` of the proper type;
-///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
-///      identity layout map.
-///
-/// Currently only supports no layoutMap or identity layoutMap in the MemRef.
-/// Returns false if the MemRef has a non-identity layoutMap or more than 1
-/// layoutMap. This is conservative.
-///
-// TODO(ntv): check strides.
-template <typename LoadOrStoreOp>
-static bool isContiguousAccess(Value *iv, LoadOrStoreOp memoryOp,
-                               int *memRefDim) {
-  static_assert(std::is_same<LoadOrStoreOp, AffineLoadOp>::value ||
-                    std::is_same<LoadOrStoreOp, AffineStoreOp>::value,
-                "Must be called on either const LoadOp & or const StoreOp &");
-  assert(memRefDim && "memRefDim == nullptr");
-  auto memRefType = memoryOp.getMemRefType();
-
-  auto layoutMap = memRefType.getAffineMaps();
-  // TODO(ntv): remove dependence on Builder once we support non-identity
-  // layout map.
-  Builder b(memoryOp.getContext());
-  if (layoutMap.size() >= 2 ||
-      (layoutMap.size() == 1 &&
-       !(layoutMap[0] ==
-         b.getMultiDimIdentityMap(layoutMap[0].getNumDims())))) {
-    return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
-  }
-
-  int uniqueVaryingIndexAlongIv = -1;
-  auto accessMap = memoryOp.getAffineMap();
-  SmallVector<Value *, 4> mapOperands(memoryOp.getMapOperands());
-  unsigned numDims = accessMap.getNumDims();
-  for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
-    // Gather map operands used result expr 'i' in 'exprOperands'.
-    SmallVector<Value *, 4> exprOperands;
-    auto resultExpr = accessMap.getResult(i);
-    resultExpr.walk([&](AffineExpr expr) {
-      if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
-        exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
-      else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
-        exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
-    });
-    // Check access invariance of each operand in 'exprOperands'.
-    for (auto *exprOperand : exprOperands) {
-      if (!isAccessIndexInvariant(iv, exprOperand)) {
-        if (uniqueVaryingIndexAlongIv != -1) {
-          // 2+ varying indices -> do not vectorize along iv.
-          return false;
-        }
-        uniqueVaryingIndexAlongIv = i;
-      }
-    }
-  }
-
-  if (uniqueVaryingIndexAlongIv == -1)
-    *memRefDim = -1;
-  else
-    *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
-  return true;
-}
-
-template <typename LoadOrStoreOpPointer>
-static bool isVectorElement(LoadOrStoreOpPointer memoryOp) {
-  auto memRefType = memoryOp.getMemRefType();
-  return memRefType.getElementType().template isa<VectorType>();
-}
-
-using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
-
-static bool
-isVectorizableLoopBodyWithOpCond(AffineForOp loop,
-                                 VectorizableOpFun isVectorizableOp,
-                                 NestedPattern &vectorTransferMatcher) {
-  auto *forOp = loop.getOperation();
-
-  // No vectorization across conditionals for now.
-  auto conditionals = matcher::If();
-  SmallVector<NestedMatch, 8> conditionalsMatched;
-  conditionals.match(forOp, &conditionalsMatched);
-  if (!conditionalsMatched.empty()) {
-    return false;
-  }
-
-  // No vectorization across unknown regions.
-  auto regions = matcher::Op([](Operation &op) -> bool {
-    return op.getNumRegions() != 0 &&
-           !(isa<AffineIfOp>(op) || isa<AffineForOp>(op));
-  });
-  SmallVector<NestedMatch, 8> regionsMatched;
-  regions.match(forOp, &regionsMatched);
-  if (!regionsMatched.empty()) {
-    return false;
-  }
-
-  SmallVector<NestedMatch, 8> vectorTransfersMatched;
-  vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
-  if (!vectorTransfersMatched.empty()) {
-    return false;
-  }
-
-  auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
-  SmallVector<NestedMatch, 8> loadAndStoresMatched;
-  loadAndStores.match(forOp, &loadAndStoresMatched);
-  for (auto ls : loadAndStoresMatched) {
-    auto *op = ls.getMatchedOperation();
-    auto load = dyn_cast<AffineLoadOp>(op);
-    auto store = dyn_cast<AffineStoreOp>(op);
-    // Only scalar types are considered vectorizable, all load/store must be
-    // vectorizable for a loop to qualify as vectorizable.
-    // TODO(ntv): ponder whether we want to be more general here.
-    bool vector = load ? isVectorElement(load) : isVectorElement(store);
-    if (vector) {
-      return false;
-    }
-    if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
-                                  NestedPattern &vectorTransferMatcher) {
-  VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
-    auto load = dyn_cast<AffineLoadOp>(op);
-    auto store = dyn_cast<AffineStoreOp>(op);
-    return load ? isContiguousAccess(loop.getInductionVar(), load, memRefDim)
-                : isContiguousAccess(loop.getInductionVar(), store, memRefDim);
-  });
-  return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
-}
-
-bool mlir::isVectorizableLoopBody(AffineForOp loop,
-                                  NestedPattern &vectorTransferMatcher) {
-  return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
-}
-
-/// Checks whether SSA dominance would be violated if a for op's body
-/// operations are shifted by the specified shifts. This method checks if a
-/// 'def' and all its uses have the same shift factor.
-// TODO(mlir-team): extend this to check for memory-based dependence violation
-// when we have the support.
-bool mlir::isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
-  auto *forBody = forOp.getBody();
-  assert(shifts.size() == forBody->getOperations().size());
-
-  // Work backwards over the body of the block so that the shift of a use's
-  // ancestor operation in the block gets recorded before it's looked up.
-  DenseMap<Operation *, uint64_t> forBodyShift;
-  for (auto it : llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
-    auto &op = it.value();
-
-    // Get the index of the current operation, note that we are iterating in
-    // reverse so we need to fix it up.
-    size_t index = shifts.size() - it.index() - 1;
-
-    // Remember the shift of this operation.
-    uint64_t shift = shifts[index];
-    forBodyShift.try_emplace(&op, shift);
-
-    // Validate the results of this operation if it were to be shifted.
-    for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
-      Value *result = op.getResult(i);
-      for (auto *user : result->getUsers()) {
-        // If an ancestor operation doesn't lie in the block of forOp,
-        // there is no shift to check.
-        if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
-          assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
-          if (shift != forBodyShift[ancOp])
-            return false;
-        }
-      }
-    }
-  }
-  return true;
-}
diff --git a/third_party/mlir/lib/Analysis/MemRefBoundCheck.cpp b/third_party/mlir/lib/Analysis/MemRefBoundCheck.cpp
deleted file mode 100644
index 52379c0a1d0..00000000000
--- a/third_party/mlir/lib/Analysis/MemRefBoundCheck.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===- MemRefBoundCheck.cpp - MLIR Affine Structures Class ----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to check memref accesses for out of bound
-// accesses.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/Passes.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "memref-bound-check"
-
-using namespace mlir;
-
-namespace {
-
-/// Checks for out of bound memef access subscripts..
-struct MemRefBoundCheck : public FunctionPass<MemRefBoundCheck> {
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createMemRefBoundCheckPass() {
-  return std::make_unique<MemRefBoundCheck>();
-}
-
-void MemRefBoundCheck::runOnFunction() {
-  getFunction().walk([](Operation *opInst) {
-    if (auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
-      boundCheckLoadOrStoreOp(loadOp);
-    } else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
-      boundCheckLoadOrStoreOp(storeOp);
-    }
-    // TODO(bondhugula): do this for DMA ops as well.
-  });
-}
-
-static PassRegistration<MemRefBoundCheck>
-    memRefBoundCheck("memref-bound-check",
-                     "Check memref access bounds in a Function");
diff --git a/third_party/mlir/lib/Analysis/NestedMatcher.cpp b/third_party/mlir/lib/Analysis/NestedMatcher.cpp
deleted file mode 100644
index 5f2be48b327..00000000000
--- a/third_party/mlir/lib/Analysis/NestedMatcher.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//===- NestedMatcher.cpp - NestedMatcher Impl  ----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-llvm::BumpPtrAllocator *&NestedMatch::allocator() {
-  thread_local llvm::BumpPtrAllocator *allocator = nullptr;
-  return allocator;
-}
-
-NestedMatch NestedMatch::build(Operation *operation,
-                               ArrayRef<NestedMatch> nestedMatches) {
-  auto *result = allocator()->Allocate<NestedMatch>();
-  auto *children = allocator()->Allocate<NestedMatch>(nestedMatches.size());
-  std::uninitialized_copy(nestedMatches.begin(), nestedMatches.end(), children);
-  new (result) NestedMatch();
-  result->matchedOperation = operation;
-  result->matchedChildren =
-      ArrayRef<NestedMatch>(children, nestedMatches.size());
-  return *result;
-}
-
-llvm::BumpPtrAllocator *&NestedPattern::allocator() {
-  thread_local llvm::BumpPtrAllocator *allocator = nullptr;
-  return allocator;
-}
-
-NestedPattern::NestedPattern(ArrayRef<NestedPattern> nested,
-                             FilterFunctionType filter)
-    : nestedPatterns(), filter(filter), skip(nullptr) {
-  if (!nested.empty()) {
-    auto *newNested = allocator()->Allocate<NestedPattern>(nested.size());
-    std::uninitialized_copy(nested.begin(), nested.end(), newNested);
-    nestedPatterns = ArrayRef<NestedPattern>(newNested, nested.size());
-  }
-}
-
-unsigned NestedPattern::getDepth() const {
-  if (nestedPatterns.empty()) {
-    return 1;
-  }
-  unsigned depth = 0;
-  for (auto &c : nestedPatterns) {
-    depth = std::max(depth, c.getDepth());
-  }
-  return depth + 1;
-}
-
-/// Matches a single operation in the following way:
-///   1. checks the kind of operation against the matcher, if different then
-///      there is no match;
-///   2. calls the customizable filter function to refine the single operation
-///      match with extra semantic constraints;
-///   3. if all is good, recursively matches the nested patterns;
-///   4. if all nested match then the single operation matches too and is
-///      appended to the list of matches;
-///   5. TODO(ntv) Optionally applies actions (lambda), in which case we will
-///      want to traverse in post-order DFS to avoid invalidating iterators.
-void NestedPattern::matchOne(Operation *op,
-                             SmallVectorImpl<NestedMatch> *matches) {
-  if (skip == op) {
-    return;
-  }
-  // Local custom filter function
-  if (!filter(*op)) {
-    return;
-  }
-
-  if (nestedPatterns.empty()) {
-    SmallVector<NestedMatch, 8> nestedMatches;
-    matches->push_back(NestedMatch::build(op, nestedMatches));
-    return;
-  }
-  // Take a copy of each nested pattern so we can match it.
-  for (auto nestedPattern : nestedPatterns) {
-    SmallVector<NestedMatch, 8> nestedMatches;
-    // Skip elem in the walk immediately following. Without this we would
-    // essentially need to reimplement walk here.
-    nestedPattern.skip = op;
-    nestedPattern.match(op, &nestedMatches);
-    // If we could not match even one of the specified nestedPattern, early exit
-    // as this whole branch is not a match.
-    if (nestedMatches.empty()) {
-      return;
-    }
-    matches->push_back(NestedMatch::build(op, nestedMatches));
-  }
-}
-
-static bool isAffineForOp(Operation &op) { return isa<AffineForOp>(op); }
-
-static bool isAffineIfOp(Operation &op) { return isa<AffineIfOp>(op); }
-
-namespace mlir {
-namespace matcher {
-
-NestedPattern Op(FilterFunctionType filter) {
-  return NestedPattern({}, filter);
-}
-
-NestedPattern If(NestedPattern child) {
-  return NestedPattern(child, isAffineIfOp);
-}
-NestedPattern If(FilterFunctionType filter, NestedPattern child) {
-  return NestedPattern(child, [filter](Operation &op) {
-    return isAffineIfOp(op) && filter(op);
-  });
-}
-NestedPattern If(ArrayRef<NestedPattern> nested) {
-  return NestedPattern(nested, isAffineIfOp);
-}
-NestedPattern If(FilterFunctionType filter, ArrayRef<NestedPattern> nested) {
-  return NestedPattern(nested, [filter](Operation &op) {
-    return isAffineIfOp(op) && filter(op);
-  });
-}
-
-NestedPattern For(NestedPattern child) {
-  return NestedPattern(child, isAffineForOp);
-}
-NestedPattern For(FilterFunctionType filter, NestedPattern child) {
-  return NestedPattern(
-      child, [=](Operation &op) { return isAffineForOp(op) && filter(op); });
-}
-NestedPattern For(ArrayRef<NestedPattern> nested) {
-  return NestedPattern(nested, isAffineForOp);
-}
-NestedPattern For(FilterFunctionType filter, ArrayRef<NestedPattern> nested) {
-  return NestedPattern(
-      nested, [=](Operation &op) { return isAffineForOp(op) && filter(op); });
-}
-
-bool isLoadOrStore(Operation &op) {
-  return isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op);
-}
-
-} // end namespace matcher
-} // end namespace mlir
diff --git a/third_party/mlir/lib/Analysis/OpStats.cpp b/third_party/mlir/lib/Analysis/OpStats.cpp
deleted file mode 100644
index f01ec56ddb1..00000000000
--- a/third_party/mlir/lib/Analysis/OpStats.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-//===- OpStats.cpp - Prints stats of operations in module -----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/Pass/Pass.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-namespace {
-struct PrintOpStatsPass : public ModulePass<PrintOpStatsPass> {
-  explicit PrintOpStatsPass(llvm::raw_ostream &os = llvm::errs()) : os(os) {}
-
-  // Prints the resultant operation statistics post iterating over the module.
-  void runOnModule() override;
-
-  // Print summary of op stats.
-  void printSummary();
-
-private:
-  llvm::StringMap<int64_t> opCount;
-  llvm::raw_ostream &os;
-};
-} // namespace
-
-void PrintOpStatsPass::runOnModule() {
-  opCount.clear();
-
-  // Compute the operation statistics for each function in the module.
-  for (auto &op : getModule())
-    op.walk([&](Operation *op) { ++opCount[op->getName().getStringRef()]; });
-  printSummary();
-}
-
-void PrintOpStatsPass::printSummary() {
-  os << "Operations encountered:\n";
-  os << "-----------------------\n";
-  SmallVector<StringRef, 64> sorted(opCount.keys());
-  llvm::sort(sorted);
-
-  // Split an operation name from its dialect prefix.
-  auto splitOperationName = [](StringRef opName) {
-    auto splitName = opName.split('.');
-    return splitName.second.empty() ? std::make_pair("", splitName.first)
-                                    : splitName;
-  };
-
-  // Compute the largest dialect and operation name.
-  StringRef dialectName, opName;
-  size_t maxLenOpName = 0, maxLenDialect = 0;
-  for (const auto &key : sorted) {
-    std::tie(dialectName, opName) = splitOperationName(key);
-    maxLenDialect = std::max(maxLenDialect, dialectName.size());
-    maxLenOpName = std::max(maxLenOpName, opName.size());
-  }
-
-  for (const auto &key : sorted) {
-    std::tie(dialectName, opName) = splitOperationName(key);
-
-    // Left-align the names (aligning on the dialect) and right-align the count
-    // below. The alignment is for readability and does not affect CSV/FileCheck
-    // parsing.
-    if (dialectName.empty())
-      os.indent(maxLenDialect + 3);
-    else
-      os << llvm::right_justify(dialectName, maxLenDialect + 2) << '.';
-
-    // Left justify the operation name.
-    os << llvm::left_justify(opName, maxLenOpName) << " , " << opCount[key]
-       << '\n';
-  }
-}
-
-static PassRegistration<PrintOpStatsPass>
-    pass("print-op-stats", "Print statistics of operations");
diff --git a/third_party/mlir/lib/Analysis/SliceAnalysis.cpp b/third_party/mlir/lib/Analysis/SliceAnalysis.cpp
deleted file mode 100644
index 700321ebb40..00000000000
--- a/third_party/mlir/lib/Analysis/SliceAnalysis.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===- UseDefAnalysis.cpp - Analysis for Transitive UseDef chains ---------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements Analysis functions specific to slicing in Function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-
-///
-/// Implements Analysis functions specific to slicing in Function.
-///
-
-using namespace mlir;
-
-using llvm::SetVector;
-
-static void getForwardSliceImpl(Operation *op,
-                                SetVector<Operation *> *forwardSlice,
-                                TransitiveFilter filter) {
-  if (!op) {
-    return;
-  }
-
-  // Evaluate whether we should keep this use.
-  // This is useful in particular to implement scoping; i.e. return the
-  // transitive forwardSlice in the current scope.
-  if (!filter(op)) {
-    return;
-  }
-
-  if (auto forOp = dyn_cast<AffineForOp>(op)) {
-    for (auto *ownerInst : forOp.getInductionVar()->getUsers())
-      if (forwardSlice->count(ownerInst) == 0)
-        getForwardSliceImpl(ownerInst, forwardSlice, filter);
-  } else if (auto forOp = dyn_cast<loop::ForOp>(op)) {
-    for (auto *ownerInst : forOp.getInductionVar()->getUsers())
-      if (forwardSlice->count(ownerInst) == 0)
-        getForwardSliceImpl(ownerInst, forwardSlice, filter);
-  } else {
-    assert(op->getNumRegions() == 0 && "unexpected generic op with regions");
-    assert(op->getNumResults() <= 1 && "unexpected multiple results");
-    if (op->getNumResults() > 0) {
-      for (auto *ownerInst : op->getResult(0)->getUsers())
-        if (forwardSlice->count(ownerInst) == 0)
-          getForwardSliceImpl(ownerInst, forwardSlice, filter);
-    }
-  }
-
-  forwardSlice->insert(op);
-}
-
-void mlir::getForwardSlice(Operation *op, SetVector<Operation *> *forwardSlice,
-                           TransitiveFilter filter) {
-  getForwardSliceImpl(op, forwardSlice, filter);
-  // Don't insert the top level operation, we just queried on it and don't
-  // want it in the results.
-  forwardSlice->remove(op);
-
-  // Reverse to get back the actual topological order.
-  // std::reverse does not work out of the box on SetVector and I want an
-  // in-place swap based thing (the real std::reverse, not the LLVM adapter).
-  std::vector<Operation *> v(forwardSlice->takeVector());
-  forwardSlice->insert(v.rbegin(), v.rend());
-}
-
-static void getBackwardSliceImpl(Operation *op,
-                                 SetVector<Operation *> *backwardSlice,
-                                 TransitiveFilter filter) {
-  if (!op)
-    return;
-
-  assert((op->getNumRegions() == 0 || isa<AffineForOp>(op) ||
-          isa<loop::ForOp>(op)) &&
-         "unexpected generic op with regions");
-
-  // Evaluate whether we should keep this def.
-  // This is useful in particular to implement scoping; i.e. return the
-  // transitive forwardSlice in the current scope.
-  if (!filter(op)) {
-    return;
-  }
-
-  for (auto en : llvm::enumerate(op->getOperands())) {
-    auto *operand = en.value();
-    if (auto *blockArg = dyn_cast<BlockArgument>(operand)) {
-      if (auto affIv = getForInductionVarOwner(operand)) {
-        auto *affOp = affIv.getOperation();
-        if (backwardSlice->count(affOp) == 0)
-          getBackwardSliceImpl(affOp, backwardSlice, filter);
-      } else if (auto loopIv = loop::getForInductionVarOwner(operand)) {
-        auto *loopOp = loopIv.getOperation();
-        if (backwardSlice->count(loopOp) == 0)
-          getBackwardSliceImpl(loopOp, backwardSlice, filter);
-      } else if (blockArg->getOwner() !=
-                 &op->getParentOfType<FuncOp>().getBody().front()) {
-        op->emitError("unsupported CF for operand ") << en.index();
-        llvm_unreachable("Unsupported control flow");
-      }
-      continue;
-    }
-    auto *op = operand->getDefiningOp();
-    if (backwardSlice->count(op) == 0) {
-      getBackwardSliceImpl(op, backwardSlice, filter);
-    }
-  }
-
-  backwardSlice->insert(op);
-}
-
-void mlir::getBackwardSlice(Operation *op,
-                            SetVector<Operation *> *backwardSlice,
-                            TransitiveFilter filter) {
-  getBackwardSliceImpl(op, backwardSlice, filter);
-
-  // Don't insert the top level operation, we just queried on it and don't
-  // want it in the results.
-  backwardSlice->remove(op);
-}
-
-SetVector<Operation *> mlir::getSlice(Operation *op,
-                                      TransitiveFilter backwardFilter,
-                                      TransitiveFilter forwardFilter) {
-  SetVector<Operation *> slice;
-  slice.insert(op);
-
-  unsigned currentIndex = 0;
-  SetVector<Operation *> backwardSlice;
-  SetVector<Operation *> forwardSlice;
-  while (currentIndex != slice.size()) {
-    auto *currentInst = (slice)[currentIndex];
-    // Compute and insert the backwardSlice starting from currentInst.
-    backwardSlice.clear();
-    getBackwardSlice(currentInst, &backwardSlice, backwardFilter);
-    slice.insert(backwardSlice.begin(), backwardSlice.end());
-
-    // Compute and insert the forwardSlice starting from currentInst.
-    forwardSlice.clear();
-    getForwardSlice(currentInst, &forwardSlice, forwardFilter);
-    slice.insert(forwardSlice.begin(), forwardSlice.end());
-    ++currentIndex;
-  }
-  return topologicalSort(slice);
-}
-
-namespace {
-/// DFS post-order implementation that maintains a global count to work across
-/// multiple invocations, to help implement topological sort on multi-root DAGs.
-/// We traverse all operations but only record the ones that appear in
-/// `toSort` for the final result.
-struct DFSState {
-  DFSState(const SetVector<Operation *> &set)
-      : toSort(set), topologicalCounts(), seen() {}
-  const SetVector<Operation *> &toSort;
-  SmallVector<Operation *, 16> topologicalCounts;
-  DenseSet<Operation *> seen;
-};
-} // namespace
-
-static void DFSPostorder(Operation *current, DFSState *state) {
-  assert(current->getNumResults() <= 1 && "NYI: multi-result");
-  if (current->getNumResults() > 0) {
-    for (auto &u : current->getResult(0)->getUses()) {
-      auto *op = u.getOwner();
-      DFSPostorder(op, state);
-    }
-  }
-  bool inserted;
-  using IterTy = decltype(state->seen.begin());
-  IterTy iter;
-  std::tie(iter, inserted) = state->seen.insert(current);
-  if (inserted) {
-    if (state->toSort.count(current) > 0) {
-      state->topologicalCounts.push_back(current);
-    }
-  }
-}
-
-SetVector<Operation *>
-mlir::topologicalSort(const SetVector<Operation *> &toSort) {
-  if (toSort.empty()) {
-    return toSort;
-  }
-
-  // Run from each root with global count and `seen` set.
-  DFSState state(toSort);
-  for (auto *s : toSort) {
-    assert(toSort.count(s) == 1 && "NYI: multi-sets not supported");
-    DFSPostorder(s, &state);
-  }
-
-  // Reorder and return.
-  SetVector<Operation *> res;
-  for (auto it = state.topologicalCounts.rbegin(),
-            eit = state.topologicalCounts.rend();
-       it != eit; ++it) {
-    res.insert(*it);
-  }
-  return res;
-}
diff --git a/third_party/mlir/lib/Analysis/TestMemRefDependenceCheck.cpp b/third_party/mlir/lib/Analysis/TestMemRefDependenceCheck.cpp
deleted file mode 100644
index d0351e9bcf9..00000000000
--- a/third_party/mlir/lib/Analysis/TestMemRefDependenceCheck.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//===- TestMemRefDependenceCheck.cpp - Test dep analysis ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to run pair-wise memref access dependence checks.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/Passes.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "test-memref-dependence-check"
-
-using namespace mlir;
-
-namespace {
-
-// TODO(andydavis) Add common surrounding loop depth-wise dependence checks.
-/// Checks dependences between all pairs of memref accesses in a Function.
-struct TestMemRefDependenceCheck
-    : public FunctionPass<TestMemRefDependenceCheck> {
-  SmallVector<Operation *, 4> loadsAndStores;
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createTestMemRefDependenceCheckPass() {
-  return std::make_unique<TestMemRefDependenceCheck>();
-}
-
-// Returns a result string which represents the direction vector (if there was
-// a dependence), returns the string "false" otherwise.
-static std::string
-getDirectionVectorStr(bool ret, unsigned numCommonLoops, unsigned loopNestDepth,
-                      ArrayRef<DependenceComponent> dependenceComponents) {
-  if (!ret)
-    return "false";
-  if (dependenceComponents.empty() || loopNestDepth > numCommonLoops)
-    return "true";
-  std::string result;
-  for (unsigned i = 0, e = dependenceComponents.size(); i < e; ++i) {
-    std::string lbStr = "-inf";
-    if (dependenceComponents[i].lb.hasValue() &&
-        dependenceComponents[i].lb.getValue() !=
-            std::numeric_limits<int64_t>::min())
-      lbStr = std::to_string(dependenceComponents[i].lb.getValue());
-
-    std::string ubStr = "+inf";
-    if (dependenceComponents[i].ub.hasValue() &&
-        dependenceComponents[i].ub.getValue() !=
-            std::numeric_limits<int64_t>::max())
-      ubStr = std::to_string(dependenceComponents[i].ub.getValue());
-
-    result += "[" + lbStr + ", " + ubStr + "]";
-  }
-  return result;
-}
-
-// For each access in 'loadsAndStores', runs a dependence check between this
-// "source" access and all subsequent "destination" accesses in
-// 'loadsAndStores'. Emits the result of the dependence check as a note with
-// the source access.
-static void checkDependences(ArrayRef<Operation *> loadsAndStores) {
-  for (unsigned i = 0, e = loadsAndStores.size(); i < e; ++i) {
-    auto *srcOpInst = loadsAndStores[i];
-    MemRefAccess srcAccess(srcOpInst);
-    for (unsigned j = 0; j < e; ++j) {
-      auto *dstOpInst = loadsAndStores[j];
-      MemRefAccess dstAccess(dstOpInst);
-
-      unsigned numCommonLoops =
-          getNumCommonSurroundingLoops(*srcOpInst, *dstOpInst);
-      for (unsigned d = 1; d <= numCommonLoops + 1; ++d) {
-        FlatAffineConstraints dependenceConstraints;
-        llvm::SmallVector<DependenceComponent, 2> dependenceComponents;
-        DependenceResult result = checkMemrefAccessDependence(
-            srcAccess, dstAccess, d, &dependenceConstraints,
-            &dependenceComponents);
-        assert(result.value != DependenceResult::Failure);
-        bool ret = hasDependence(result);
-        // TODO(andydavis) Print dependence type (i.e. RAW, etc) and print
-        // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
-        // vectors from ([1, 1], [3, 3]) to (1, 3).
-        srcOpInst->emitRemark("dependence from ")
-            << i << " to " << j << " at depth " << d << " = "
-            << getDirectionVectorStr(ret, numCommonLoops, d,
-                                     dependenceComponents);
-      }
-    }
-  }
-}
-
-// Walks the Function 'f' adding load and store ops to 'loadsAndStores'.
-// Runs pair-wise dependence checks.
-void TestMemRefDependenceCheck::runOnFunction() {
-  // Collect the loads and stores within the function.
-  loadsAndStores.clear();
-  getFunction().walk([&](Operation *op) {
-    if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op))
-      loadsAndStores.push_back(op);
-  });
-
-  checkDependences(loadsAndStores);
-}
-
-static PassRegistration<TestMemRefDependenceCheck>
-    pass("test-memref-dependence-check",
-         "Checks dependences between all pairs of memref accesses.");
diff --git a/third_party/mlir/lib/Analysis/TestParallelismDetection.cpp b/third_party/mlir/lib/Analysis/TestParallelismDetection.cpp
deleted file mode 100644
index a9f9ea94a45..00000000000
--- a/third_party/mlir/lib/Analysis/TestParallelismDetection.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-//===- ParallelismDetection.cpp - Parallelism Detection pass ------------*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to detect parallel affine 'affine.for' ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Passes.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-
-struct TestParallelismDetection
-    : public FunctionPass<TestParallelismDetection> {
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createParallelismDetectionTestPass() {
-  return std::make_unique<TestParallelismDetection>();
-}
-
-// Walks the function and emits a note for all 'affine.for' ops detected as
-// parallel.
-void TestParallelismDetection::runOnFunction() {
-  FuncOp f = getFunction();
-  OpBuilder b(f.getBody());
-  f.walk([&](AffineForOp forOp) {
-    if (isLoopParallel(forOp))
-      forOp.emitRemark("parallel loop");
-    else
-      forOp.emitRemark("sequential loop");
-  });
-}
-
-static PassRegistration<TestParallelismDetection>
-    pass("test-detect-parallel", "Test parallelism detection ");
diff --git a/third_party/mlir/lib/Analysis/Utils.cpp b/third_party/mlir/lib/Analysis/Utils.cpp
deleted file mode 100644
index 3ba27bbb299..00000000000
--- a/third_party/mlir/lib/Analysis/Utils.cpp
+++ /dev/null
@@ -1,1016 +0,0 @@
-//===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements miscellaneous analysis routines for non-loop IR
-// structures.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Utils.h"
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "analysis-utils"
-
-using namespace mlir;
-
-using llvm::SmallDenseMap;
-
-/// Populates 'loops' with IVs of the loops surrounding 'op' ordered from
-/// the outermost 'affine.for' operation to the innermost one.
-void mlir::getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops) {
-  auto *currOp = op.getParentOp();
-  AffineForOp currAffineForOp;
-  // Traverse up the hierarchy collecting all 'affine.for' operation while
-  // skipping over 'affine.if' operations.
-  while (currOp && ((currAffineForOp = dyn_cast<AffineForOp>(currOp)) ||
-                    isa<AffineIfOp>(currOp))) {
-    if (currAffineForOp)
-      loops->push_back(currAffineForOp);
-    currOp = currOp->getParentOp();
-  }
-  std::reverse(loops->begin(), loops->end());
-}
-
-// Populates 'cst' with FlatAffineConstraints which represent slice bounds.
-LogicalResult
-ComputationSliceState::getAsConstraints(FlatAffineConstraints *cst) {
-  assert(!lbOperands.empty());
-  // Adds src 'ivs' as dimension identifiers in 'cst'.
-  unsigned numDims = ivs.size();
-  // Adds operands (dst ivs and symbols) as symbols in 'cst'.
-  unsigned numSymbols = lbOperands[0].size();
-
-  SmallVector<Value *, 4> values(ivs);
-  // Append 'ivs' then 'operands' to 'values'.
-  values.append(lbOperands[0].begin(), lbOperands[0].end());
-  cst->reset(numDims, numSymbols, 0, values);
-
-  // Add loop bound constraints for values which are loop IVs and equality
-  // constraints for symbols which are constants.
-  for (const auto &value : values) {
-    assert(cst->containsId(*value) && "value expected to be present");
-    if (isValidSymbol(value)) {
-      // Check if the symbol is a constant.
-
-      if (auto cOp = dyn_cast_or_null<ConstantIndexOp>(value->getDefiningOp()))
-        cst->setIdToConstant(*value, cOp.getValue());
-    } else if (auto loop = getForInductionVarOwner(value)) {
-      if (failed(cst->addAffineForOpDomain(loop)))
-        return failure();
-    }
-  }
-
-  // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
-  LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
-  assert(succeeded(ret) &&
-         "should not fail as we never have semi-affine slice maps");
-  (void)ret;
-  return success();
-}
-
-// Clears state bounds and operand state.
-void ComputationSliceState::clearBounds() {
-  lbs.clear();
-  ubs.clear();
-  lbOperands.clear();
-  ubOperands.clear();
-}
-
-unsigned MemRefRegion::getRank() const {
-  return memref->getType().cast<MemRefType>().getRank();
-}
-
-Optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape(
-    SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs,
-    SmallVectorImpl<int64_t> *lbDivisors) const {
-  auto memRefType = memref->getType().cast<MemRefType>();
-  unsigned rank = memRefType.getRank();
-  if (shape)
-    shape->reserve(rank);
-
-  assert(rank == cst.getNumDimIds() && "inconsistent memref region");
-
-  // Find a constant upper bound on the extent of this memref region along each
-  // dimension.
-  int64_t numElements = 1;
-  int64_t diffConstant;
-  int64_t lbDivisor;
-  for (unsigned d = 0; d < rank; d++) {
-    SmallVector<int64_t, 4> lb;
-    Optional<int64_t> diff = cst.getConstantBoundOnDimSize(d, &lb, &lbDivisor);
-    if (diff.hasValue()) {
-      diffConstant = diff.getValue();
-      assert(lbDivisor > 0);
-    } else {
-      // If no constant bound is found, then it can always be bound by the
-      // memref's dim size if the latter has a constant size along this dim.
-      auto dimSize = memRefType.getDimSize(d);
-      if (dimSize == -1)
-        return None;
-      diffConstant = dimSize;
-      // Lower bound becomes 0.
-      lb.resize(cst.getNumSymbolIds() + 1, 0);
-      lbDivisor = 1;
-    }
-    numElements *= diffConstant;
-    if (lbs) {
-      lbs->push_back(lb);
-      assert(lbDivisors && "both lbs and lbDivisor or none");
-      lbDivisors->push_back(lbDivisor);
-    }
-    if (shape) {
-      shape->push_back(diffConstant);
-    }
-  }
-  return numElements;
-}
-
-LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
-  assert(memref == other.memref);
-  return cst.unionBoundingBox(*other.getConstraints());
-}
-
-/// Computes the memory region accessed by this memref with the region
-/// represented as constraints symbolic/parametric in 'loopDepth' loops
-/// surrounding opInst and any additional Function symbols.
-//  For example, the memref region for this load operation at loopDepth = 1 will
-//  be as below:
-//
-//    affine.for %i = 0 to 32 {
-//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
-//        load %A[%ii]
-//      }
-//    }
-//
-// region:  {memref = %A, write = false, {%i <= m0 <= %i + 7} }
-// The last field is a 2-d FlatAffineConstraints symbolic in %i.
-//
-// TODO(bondhugula): extend this to any other memref dereferencing ops
-// (dma_start, dma_wait).
-LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
-                                    ComputationSliceState *sliceState,
-                                    bool addMemRefDimBounds) {
-  assert((isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op)) &&
-         "affine load/store op expected");
-
-  MemRefAccess access(op);
-  memref = access.memref;
-  write = access.isStore();
-
-  unsigned rank = access.getRank();
-
-  LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
-                          << "depth: " << loopDepth << "\n";);
-
-  if (rank == 0) {
-    SmallVector<AffineForOp, 4> ivs;
-    getLoopIVs(*op, &ivs);
-    SmallVector<Value *, 8> regionSymbols;
-    extractForInductionVars(ivs, &regionSymbols);
-    // A rank 0 memref has a 0-d region.
-    cst.reset(rank, loopDepth, 0, regionSymbols);
-    return success();
-  }
-
-  // Build the constraints for this region.
-  AffineValueMap accessValueMap;
-  access.getAccessMap(&accessValueMap);
-  AffineMap accessMap = accessValueMap.getAffineMap();
-
-  unsigned numDims = accessMap.getNumDims();
-  unsigned numSymbols = accessMap.getNumSymbols();
-  unsigned numOperands = accessValueMap.getNumOperands();
-  // Merge operands with slice operands.
-  SmallVector<Value *, 4> operands;
-  operands.resize(numOperands);
-  for (unsigned i = 0; i < numOperands; ++i)
-    operands[i] = accessValueMap.getOperand(i);
-
-  if (sliceState != nullptr) {
-    operands.reserve(operands.size() + sliceState->lbOperands[0].size());
-    // Append slice operands to 'operands' as symbols.
-    for (auto extraOperand : sliceState->lbOperands[0]) {
-      if (!llvm::is_contained(operands, extraOperand)) {
-        operands.push_back(extraOperand);
-        numSymbols++;
-      }
-    }
-  }
-  // We'll first associate the dims and symbols of the access map to the dims
-  // and symbols resp. of cst. This will change below once cst is
-  // fully constructed out.
-  cst.reset(numDims, numSymbols, 0, operands);
-
-  // Add equality constraints.
-  // Add inequalities for loop lower/upper bounds.
-  for (unsigned i = 0; i < numDims + numSymbols; ++i) {
-    auto *operand = operands[i];
-    if (auto loop = getForInductionVarOwner(operand)) {
-      // Note that cst can now have more dimensions than accessMap if the
-      // bounds expressions involve outer loops or other symbols.
-      // TODO(bondhugula): rewrite this to use getInstIndexSet; this way
-      // conditionals will be handled when the latter supports it.
-      if (failed(cst.addAffineForOpDomain(loop)))
-        return failure();
-    } else {
-      // Has to be a valid symbol.
-      auto *symbol = operand;
-      assert(isValidSymbol(symbol));
-      // Check if the symbol is a constant.
-      if (auto *op = symbol->getDefiningOp()) {
-        if (auto constOp = dyn_cast<ConstantIndexOp>(op)) {
-          cst.setIdToConstant(*symbol, constOp.getValue());
-        }
-      }
-    }
-  }
-
-  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
-  if (sliceState != nullptr) {
-    // Add dim and symbol slice operands.
-    for (auto operand : sliceState->lbOperands[0]) {
-      cst.addInductionVarOrTerminalSymbol(operand);
-    }
-    // Add upper/lower bounds from 'sliceState' to 'cst'.
-    LogicalResult ret =
-        cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
-                           sliceState->lbOperands[0]);
-    assert(succeeded(ret) &&
-           "should not fail as we never have semi-affine slice maps");
-    (void)ret;
-  }
-
-  // Add access function equalities to connect loop IVs to data dimensions.
-  if (failed(cst.composeMap(&accessValueMap))) {
-    op->emitError("getMemRefRegion: compose affine map failed");
-    LLVM_DEBUG(accessValueMap.getAffineMap().dump());
-    return failure();
-  }
-
-  // Set all identifiers appearing after the first 'rank' identifiers as
-  // symbolic identifiers - so that the ones corresponding to the memref
-  // dimensions are the dimensional identifiers for the memref region.
-  cst.setDimSymbolSeparation(cst.getNumDimAndSymbolIds() - rank);
-
-  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
-  // this memref region is symbolic.
-  SmallVector<AffineForOp, 4> enclosingIVs;
-  getLoopIVs(*op, &enclosingIVs);
-  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
-  enclosingIVs.resize(loopDepth);
-  SmallVector<Value *, 4> ids;
-  cst.getIdValues(cst.getNumDimIds(), cst.getNumDimAndSymbolIds(), &ids);
-  for (auto *id : ids) {
-    AffineForOp iv;
-    if ((iv = getForInductionVarOwner(id)) &&
-        llvm::is_contained(enclosingIVs, iv) == false) {
-      cst.projectOut(id);
-    }
-  }
-
-  // Project out any local variables (these would have been added for any
-  // mod/divs).
-  cst.projectOut(cst.getNumDimAndSymbolIds(), cst.getNumLocalIds());
-
-  // Constant fold any symbolic identifiers.
-  cst.constantFoldIdRange(/*pos=*/cst.getNumDimIds(),
-                          /*num=*/cst.getNumSymbolIds());
-
-  assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format");
-
-  // Add upper/lower bounds for each memref dimension with static size
-  // to guard against potential over-approximation from projection.
-  // TODO(andydavis) Support dynamic memref dimensions.
-  if (addMemRefDimBounds) {
-    auto memRefType = memref->getType().cast<MemRefType>();
-    for (unsigned r = 0; r < rank; r++) {
-      cst.addConstantLowerBound(r, 0);
-      int64_t dimSize = memRefType.getDimSize(r);
-      if (ShapedType::isDynamic(dimSize))
-        continue;
-      cst.addConstantUpperBound(r, dimSize - 1);
-    }
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
-  LLVM_DEBUG(cst.dump());
-  return success();
-}
-
-//  TODO(mlir-team): improve/complete this when we have target data.
-static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
-  auto elementType = memRefType.getElementType();
-
-  unsigned sizeInBits;
-  if (elementType.isIntOrFloat()) {
-    sizeInBits = elementType.getIntOrFloatBitWidth();
-  } else {
-    auto vectorType = elementType.cast<VectorType>();
-    sizeInBits =
-        vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
-  }
-  return llvm::divideCeil(sizeInBits, 8);
-}
-
-// Returns the size of the region.
-Optional<int64_t> MemRefRegion::getRegionSize() {
-  auto memRefType = memref->getType().cast<MemRefType>();
-
-  auto layoutMaps = memRefType.getAffineMaps();
-  if (layoutMaps.size() > 1 ||
-      (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) {
-    LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
-    return false;
-  }
-
-  // Indices to use for the DmaStart op.
-  // Indices for the original memref being DMAed from/to.
-  SmallVector<Value *, 4> memIndices;
-  // Indices for the faster buffer being DMAed into/from.
-  SmallVector<Value *, 4> bufIndices;
-
-  // Compute the extents of the buffer.
-  Optional<int64_t> numElements = getConstantBoundingSizeAndShape();
-  if (!numElements.hasValue()) {
-    LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
-    return None;
-  }
-  return getMemRefEltSizeInBytes(memRefType) * numElements.getValue();
-}
-
-/// Returns the size of memref data in bytes if it's statically shaped, None
-/// otherwise.  If the element of the memref has vector type, takes into account
-/// size of the vector as well.
-//  TODO(mlir-team): improve/complete this when we have target data.
-Optional<uint64_t> mlir::getMemRefSizeInBytes(MemRefType memRefType) {
-  if (!memRefType.hasStaticShape())
-    return None;
-  auto elementType = memRefType.getElementType();
-  if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
-    return None;
-
-  uint64_t sizeInBytes = getMemRefEltSizeInBytes(memRefType);
-  for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
-    sizeInBytes = sizeInBytes * memRefType.getDimSize(i);
-  }
-  return sizeInBytes;
-}
-
-template <typename LoadOrStoreOpPointer>
-LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
-                                            bool emitError) {
-  static_assert(std::is_same<LoadOrStoreOpPointer, AffineLoadOp>::value ||
-                    std::is_same<LoadOrStoreOpPointer, AffineStoreOp>::value,
-                "argument should be either a AffineLoadOp or a AffineStoreOp");
-
-  Operation *opInst = loadOrStoreOp.getOperation();
-  MemRefRegion region(opInst->getLoc());
-  if (failed(region.compute(opInst, /*loopDepth=*/0, /*sliceState=*/nullptr,
-                            /*addMemRefDimBounds=*/false)))
-    return success();
-
-  LLVM_DEBUG(llvm::dbgs() << "Memory region");
-  LLVM_DEBUG(region.getConstraints()->dump());
-
-  bool outOfBounds = false;
-  unsigned rank = loadOrStoreOp.getMemRefType().getRank();
-
-  // For each dimension, check for out of bounds.
-  for (unsigned r = 0; r < rank; r++) {
-    FlatAffineConstraints ucst(*region.getConstraints());
-
-    // Intersect memory region with constraint capturing out of bounds (both out
-    // of upper and out of lower), and check if the constraint system is
-    // feasible. If it is, there is at least one point out of bounds.
-    SmallVector<int64_t, 4> ineq(rank + 1, 0);
-    int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
-    // TODO(bondhugula): handle dynamic dim sizes.
-    if (dimSize == -1)
-      continue;
-
-    // Check for overflow: d_i >= memref dim size.
-    ucst.addConstantLowerBound(r, dimSize);
-    outOfBounds = !ucst.isEmpty();
-    if (outOfBounds && emitError) {
-      loadOrStoreOp.emitOpError()
-          << "memref out of upper bound access along dimension #" << (r + 1);
-    }
-
-    // Check for a negative index.
-    FlatAffineConstraints lcst(*region.getConstraints());
-    std::fill(ineq.begin(), ineq.end(), 0);
-    // d_i <= -1;
-    lcst.addConstantUpperBound(r, -1);
-    outOfBounds = !lcst.isEmpty();
-    if (outOfBounds && emitError) {
-      loadOrStoreOp.emitOpError()
-          << "memref out of lower bound access along dimension #" << (r + 1);
-    }
-  }
-  return failure(outOfBounds);
-}
-
-// Explicitly instantiate the template so that the compiler knows we need them!
-template LogicalResult mlir::boundCheckLoadOrStoreOp(AffineLoadOp loadOp,
-                                                     bool emitError);
-template LogicalResult mlir::boundCheckLoadOrStoreOp(AffineStoreOp storeOp,
-                                                     bool emitError);
-
-// Returns in 'positions' the Block positions of 'op' in each ancestor
-// Block from the Block containing operation, stopping at 'limitBlock'.
-static void findInstPosition(Operation *op, Block *limitBlock,
-                             SmallVectorImpl<unsigned> *positions) {
-  Block *block = op->getBlock();
-  while (block != limitBlock) {
-    // FIXME: This algorithm is unnecessarily O(n) and should be improved to not
-    // rely on linear scans.
-    int instPosInBlock = std::distance(block->begin(), op->getIterator());
-    positions->push_back(instPosInBlock);
-    op = block->getParentOp();
-    block = op->getBlock();
-  }
-  std::reverse(positions->begin(), positions->end());
-}
-
-// Returns the Operation in a possibly nested set of Blocks, where the
-// position of the operation is represented by 'positions', which has a
-// Block position for each level of nesting.
-static Operation *getInstAtPosition(ArrayRef<unsigned> positions,
-                                    unsigned level, Block *block) {
-  unsigned i = 0;
-  for (auto &op : *block) {
-    if (i != positions[level]) {
-      ++i;
-      continue;
-    }
-    if (level == positions.size() - 1)
-      return &op;
-    if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
-      return getInstAtPosition(positions, level + 1,
-                               childAffineForOp.getBody());
-
-    for (auto &region : op.getRegions()) {
-      for (auto &b : region)
-        if (auto *ret = getInstAtPosition(positions, level + 1, &b))
-          return ret;
-    }
-    return nullptr;
-  }
-  return nullptr;
-}
-
-// Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
-LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value *, 8> &ivs,
-                                     FlatAffineConstraints *cst) {
-  for (unsigned i = 0, e = cst->getNumDimIds(); i < e; ++i) {
-    auto *value = cst->getIdValue(i);
-    if (ivs.count(value) == 0) {
-      assert(isForInductionVar(value));
-      auto loop = getForInductionVarOwner(value);
-      if (failed(cst->addAffineForOpDomain(loop)))
-        return failure();
-    }
-  }
-  return success();
-}
-
-// Returns the innermost common loop depth for the set of operations in 'ops'.
-// TODO(andydavis) Move this to LoopUtils.
-static unsigned
-getInnermostCommonLoopDepth(ArrayRef<Operation *> ops,
-                            SmallVectorImpl<AffineForOp> &surroundingLoops) {
-  unsigned numOps = ops.size();
-  assert(numOps > 0);
-
-  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
-  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
-  for (unsigned i = 0; i < numOps; ++i) {
-    getLoopIVs(*ops[i], &loops[i]);
-    loopDepthLimit =
-        std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
-  }
-
-  unsigned loopDepth = 0;
-  for (unsigned d = 0; d < loopDepthLimit; ++d) {
-    unsigned i;
-    for (i = 1; i < numOps; ++i) {
-      if (loops[i - 1][d] != loops[i][d])
-        return loopDepth;
-    }
-    surroundingLoops.push_back(loops[i - 1][d]);
-    ++loopDepth;
-  }
-  return loopDepth;
-}
-
-/// Computes in 'sliceUnion' the union of all slice bounds computed at
-/// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'.
-/// Returns 'Success' if union was computed, 'failure' otherwise.
-LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> opsA,
-                                      ArrayRef<Operation *> opsB,
-                                      unsigned loopDepth,
-                                      unsigned numCommonLoops,
-                                      bool isBackwardSlice,
-                                      ComputationSliceState *sliceUnion) {
-  // Compute the union of slice bounds between all pairs in 'opsA' and
-  // 'opsB' in 'sliceUnionCst'.
-  FlatAffineConstraints sliceUnionCst;
-  assert(sliceUnionCst.getNumDimAndSymbolIds() == 0);
-  std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
-  for (unsigned i = 0, numOpsA = opsA.size(); i < numOpsA; ++i) {
-    MemRefAccess srcAccess(opsA[i]);
-    for (unsigned j = 0, numOpsB = opsB.size(); j < numOpsB; ++j) {
-      MemRefAccess dstAccess(opsB[j]);
-      if (srcAccess.memref != dstAccess.memref)
-        continue;
-      // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
-      if ((!isBackwardSlice && loopDepth > getNestingDepth(*opsA[i])) ||
-          (isBackwardSlice && loopDepth > getNestingDepth(*opsB[j]))) {
-        LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n.");
-        return failure();
-      }
-
-      bool readReadAccesses = isa<AffineLoadOp>(srcAccess.opInst) &&
-                              isa<AffineLoadOp>(dstAccess.opInst);
-      FlatAffineConstraints dependenceConstraints;
-      // Check dependence between 'srcAccess' and 'dstAccess'.
-      DependenceResult result = checkMemrefAccessDependence(
-          srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
-          &dependenceConstraints, /*dependenceComponents=*/nullptr,
-          /*allowRAR=*/readReadAccesses);
-      if (result.value == DependenceResult::Failure) {
-        LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n.");
-        return failure();
-      }
-      if (result.value == DependenceResult::NoDependence)
-        continue;
-      dependentOpPairs.push_back({opsA[i], opsB[j]});
-
-      // Compute slice bounds for 'srcAccess' and 'dstAccess'.
-      ComputationSliceState tmpSliceState;
-      mlir::getComputationSliceState(opsA[i], opsB[j], &dependenceConstraints,
-                                     loopDepth, isBackwardSlice,
-                                     &tmpSliceState);
-
-      if (sliceUnionCst.getNumDimAndSymbolIds() == 0) {
-        // Initialize 'sliceUnionCst' with the bounds computed in previous step.
-        if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
-          LLVM_DEBUG(llvm::dbgs()
-                     << "Unable to compute slice bound constraints\n.");
-          return failure();
-        }
-        assert(sliceUnionCst.getNumDimAndSymbolIds() > 0);
-        continue;
-      }
-
-      // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
-      FlatAffineConstraints tmpSliceCst;
-      if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << "Unable to compute slice bound constraints\n.");
-        return failure();
-      }
-
-      // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
-      if (!sliceUnionCst.areIdsAlignedWithOther(tmpSliceCst)) {
-
-        // Pre-constraint id alignment: record loop IVs used in each constraint
-        // system.
-        SmallPtrSet<Value *, 8> sliceUnionIVs;
-        for (unsigned k = 0, l = sliceUnionCst.getNumDimIds(); k < l; ++k)
-          sliceUnionIVs.insert(sliceUnionCst.getIdValue(k));
-        SmallPtrSet<Value *, 8> tmpSliceIVs;
-        for (unsigned k = 0, l = tmpSliceCst.getNumDimIds(); k < l; ++k)
-          tmpSliceIVs.insert(tmpSliceCst.getIdValue(k));
-
-        sliceUnionCst.mergeAndAlignIdsWithOther(/*offset=*/0, &tmpSliceCst);
-
-        // Post-constraint id alignment: add loop IV bounds missing after
-        // id alignment to constraint systems. This can occur if one constraint
-        // system uses an loop IV that is not used by the other. The call
-        // to unionBoundingBox below expects constraints for each Loop IV, even
-        // if they are the unsliced full loop bounds added here.
-        if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
-          return failure();
-        if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
-          return failure();
-      }
-      // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
-      if (sliceUnionCst.getNumLocalIds() > 0 ||
-          tmpSliceCst.getNumLocalIds() > 0 ||
-          failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << "Unable to compute union bounding box of slice bounds."
-                      "\n.");
-        return failure();
-      }
-    }
-  }
-
-  // Empty union.
-  if (sliceUnionCst.getNumDimAndSymbolIds() == 0)
-    return failure();
-
-  // Gather loops surrounding ops from loop nest where slice will be inserted.
-  SmallVector<Operation *, 4> ops;
-  for (auto &dep : dependentOpPairs) {
-    ops.push_back(isBackwardSlice ? dep.second : dep.first);
-  }
-  SmallVector<AffineForOp, 4> surroundingLoops;
-  unsigned innermostCommonLoopDepth =
-      getInnermostCommonLoopDepth(ops, surroundingLoops);
-  if (loopDepth > innermostCommonLoopDepth) {
-    LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n.");
-    return failure();
-  }
-
-  // Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
-  unsigned numSliceLoopIVs = sliceUnionCst.getNumDimIds();
-
-  // Convert any dst loop IVs which are symbol identifiers to dim identifiers.
-  sliceUnionCst.convertLoopIVSymbolsToDims();
-  sliceUnion->clearBounds();
-  sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
-  sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
-
-  // Get slice bounds from slice union constraints 'sliceUnionCst'.
-  sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
-                               opsA[0]->getContext(), &sliceUnion->lbs,
-                               &sliceUnion->ubs);
-
-  // Add slice bound operands of union.
-  SmallVector<Value *, 4> sliceBoundOperands;
-  sliceUnionCst.getIdValues(numSliceLoopIVs,
-                            sliceUnionCst.getNumDimAndSymbolIds(),
-                            &sliceBoundOperands);
-
-  // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
-  sliceUnion->ivs.clear();
-  sliceUnionCst.getIdValues(0, numSliceLoopIVs, &sliceUnion->ivs);
-
-  // Set loop nest insertion point to block start at 'loopDepth'.
-  sliceUnion->insertPoint =
-      isBackwardSlice
-          ? surroundingLoops[loopDepth - 1].getBody()->begin()
-          : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
-
-  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
-  // canonicalization.
-  sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
-  sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
-  return success();
-}
-
-const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
-// Computes slice bounds by projecting out any loop IVs from
-// 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
-// bounds in 'sliceState' which represent the one loop nest's IVs in terms of
-// the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
-void mlir::getComputationSliceState(
-    Operation *depSourceOp, Operation *depSinkOp,
-    FlatAffineConstraints *dependenceConstraints, unsigned loopDepth,
-    bool isBackwardSlice, ComputationSliceState *sliceState) {
-  // Get loop nest surrounding src operation.
-  SmallVector<AffineForOp, 4> srcLoopIVs;
-  getLoopIVs(*depSourceOp, &srcLoopIVs);
-  unsigned numSrcLoopIVs = srcLoopIVs.size();
-
-  // Get loop nest surrounding dst operation.
-  SmallVector<AffineForOp, 4> dstLoopIVs;
-  getLoopIVs(*depSinkOp, &dstLoopIVs);
-  unsigned numDstLoopIVs = dstLoopIVs.size();
-
-  assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
-         (isBackwardSlice && loopDepth <= numDstLoopIVs));
-
-  // Project out dimensions other than those up to 'loopDepth'.
-  unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
-  unsigned num =
-      isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
-  dependenceConstraints->projectOut(pos, num);
-
-  // Add slice loop IV values to 'sliceState'.
-  unsigned offset = isBackwardSlice ? 0 : loopDepth;
-  unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
-  dependenceConstraints->getIdValues(offset, offset + numSliceLoopIVs,
-                                     &sliceState->ivs);
-
-  // Set up lower/upper bound affine maps for the slice.
-  sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
-  sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
-
-  // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
-  dependenceConstraints->getSliceBounds(offset, numSliceLoopIVs,
-                                        depSourceOp->getContext(),
-                                        &sliceState->lbs, &sliceState->ubs);
-
-  // Set up bound operands for the slice's lower and upper bounds.
-  SmallVector<Value *, 4> sliceBoundOperands;
-  unsigned numDimsAndSymbols = dependenceConstraints->getNumDimAndSymbolIds();
-  for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
-    if (i < offset || i >= offset + numSliceLoopIVs) {
-      sliceBoundOperands.push_back(dependenceConstraints->getIdValue(i));
-    }
-  }
-
-  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
-  // canonicalization.
-  sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
-  sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
-
-  // Set destination loop nest insertion point to block start at 'dstLoopDepth'.
-  sliceState->insertPoint =
-      isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
-                      : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
-
-  llvm::SmallDenseSet<Value *, 8> sequentialLoops;
-  if (isa<AffineLoadOp>(depSourceOp) && isa<AffineLoadOp>(depSinkOp)) {
-    // For read-read access pairs, clear any slice bounds on sequential loops.
-    // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
-    getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
-                       &sequentialLoops);
-  }
-  // Clear all sliced loop bounds beginning at the first sequential loop, or
-  // first loop with a slice fusion barrier attribute..
-  // TODO(andydavis, bondhugula) Use MemRef read/write regions instead of
-  // using 'kSliceFusionBarrierAttrName'.
-  auto getSliceLoop = [&](unsigned i) {
-    return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
-  };
-  for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
-    Value *iv = getSliceLoop(i).getInductionVar();
-    if (sequentialLoops.count(iv) == 0 &&
-        getSliceLoop(i).getAttr(kSliceFusionBarrierAttrName) == nullptr)
-      continue;
-    for (unsigned j = i; j < numSliceLoopIVs; ++j) {
-      sliceState->lbs[j] = AffineMap();
-      sliceState->ubs[j] = AffineMap();
-    }
-    break;
-  }
-}
-
-/// Creates a computation slice of the loop nest surrounding 'srcOpInst',
-/// updates the slice loop bounds with any non-null bound maps specified in
-/// 'sliceState', and inserts this slice into the loop nest surrounding
-/// 'dstOpInst' at loop depth 'dstLoopDepth'.
-// TODO(andydavis,bondhugula): extend the slicing utility to compute slices that
-// aren't necessarily a one-to-one relation b/w the source and destination. The
-// relation between the source and destination could be many-to-many in general.
-// TODO(andydavis,bondhugula): the slice computation is incorrect in the cases
-// where the dependence from the source to the destination does not cover the
-// entire destination index set. Subtract out the dependent destination
-// iterations from destination index set and check for emptiness --- this is one
-// solution.
-AffineForOp
-mlir::insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst,
-                                     unsigned dstLoopDepth,
-                                     ComputationSliceState *sliceState) {
-  // Get loop nest surrounding src operation.
-  SmallVector<AffineForOp, 4> srcLoopIVs;
-  getLoopIVs(*srcOpInst, &srcLoopIVs);
-  unsigned numSrcLoopIVs = srcLoopIVs.size();
-
-  // Get loop nest surrounding dst operation.
-  SmallVector<AffineForOp, 4> dstLoopIVs;
-  getLoopIVs(*dstOpInst, &dstLoopIVs);
-  unsigned dstLoopIVsSize = dstLoopIVs.size();
-  if (dstLoopDepth > dstLoopIVsSize) {
-    dstOpInst->emitError("invalid destination loop depth");
-    return AffineForOp();
-  }
-
-  // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
-  SmallVector<unsigned, 4> positions;
-  // TODO(andydavis): This code is incorrect since srcLoopIVs can be 0-d.
-  findInstPosition(srcOpInst, srcLoopIVs[0].getOperation()->getBlock(),
-                   &positions);
-
-  // Clone src loop nest and insert it a the beginning of the operation block
-  // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
-  auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
-  OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
-  auto sliceLoopNest =
-      cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
-
-  Operation *sliceInst =
-      getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
-  // Get loop nest surrounding 'sliceInst'.
-  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
-  getLoopIVs(*sliceInst, &sliceSurroundingLoops);
-
-  // Sanity check.
-  unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
-  (void)sliceSurroundingLoopsSize;
-  assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
-  unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
-  (void)sliceLoopLimit;
-  assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
-
-  // Update loop bounds for loops in 'sliceLoopNest'.
-  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
-    auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
-    if (AffineMap lbMap = sliceState->lbs[i])
-      forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
-    if (AffineMap ubMap = sliceState->ubs[i])
-      forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
-  }
-  return sliceLoopNest;
-}
-
-// Constructs  MemRefAccess populating it with the memref, its indices and
-// opinst from 'loadOrStoreOpInst'.
-MemRefAccess::MemRefAccess(Operation *loadOrStoreOpInst) {
-  if (auto loadOp = dyn_cast<AffineLoadOp>(loadOrStoreOpInst)) {
-    memref = loadOp.getMemRef();
-    opInst = loadOrStoreOpInst;
-    auto loadMemrefType = loadOp.getMemRefType();
-    indices.reserve(loadMemrefType.getRank());
-    for (auto *index : loadOp.getMapOperands()) {
-      indices.push_back(index);
-    }
-  } else {
-    assert(isa<AffineStoreOp>(loadOrStoreOpInst) && "load/store op expected");
-    auto storeOp = dyn_cast<AffineStoreOp>(loadOrStoreOpInst);
-    opInst = loadOrStoreOpInst;
-    memref = storeOp.getMemRef();
-    auto storeMemrefType = storeOp.getMemRefType();
-    indices.reserve(storeMemrefType.getRank());
-    for (auto *index : storeOp.getMapOperands()) {
-      indices.push_back(index);
-    }
-  }
-}
-
-unsigned MemRefAccess::getRank() const {
-  return memref->getType().cast<MemRefType>().getRank();
-}
-
-bool MemRefAccess::isStore() const { return isa<AffineStoreOp>(opInst); }
-
-/// Returns the nesting depth of this statement, i.e., the number of loops
-/// surrounding this statement.
-unsigned mlir::getNestingDepth(Operation &op) {
-  Operation *currOp = &op;
-  unsigned depth = 0;
-  while ((currOp = currOp->getParentOp())) {
-    if (isa<AffineForOp>(currOp))
-      depth++;
-  }
-  return depth;
-}
-
-/// Equal if both affine accesses are provably equivalent (at compile
-/// time) when considering the memref, the affine maps and their respective
-/// operands. The equality of access functions + operands is checked by
-/// subtracting fully composed value maps, and then simplifying the difference
-/// using the expression flattener.
-/// TODO: this does not account for aliasing of memrefs.
-bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
-  if (memref != rhs.memref)
-    return false;
-
-  AffineValueMap diff, thisMap, rhsMap;
-  getAccessMap(&thisMap);
-  rhs.getAccessMap(&rhsMap);
-  AffineValueMap::difference(thisMap, rhsMap, &diff);
-  return llvm::all_of(diff.getAffineMap().getResults(),
-                      [](AffineExpr e) { return e == 0; });
-}
-
-/// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
-/// where each lists loops from outer-most to inner-most in loop nest.
-unsigned mlir::getNumCommonSurroundingLoops(Operation &A, Operation &B) {
-  SmallVector<AffineForOp, 4> loopsA, loopsB;
-  getLoopIVs(A, &loopsA);
-  getLoopIVs(B, &loopsB);
-
-  unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
-  unsigned numCommonLoops = 0;
-  for (unsigned i = 0; i < minNumLoops; ++i) {
-    if (loopsA[i].getOperation() != loopsB[i].getOperation())
-      break;
-    ++numCommonLoops;
-  }
-  return numCommonLoops;
-}
-
-static Optional<int64_t> getMemoryFootprintBytes(Block &block,
-                                                 Block::iterator start,
-                                                 Block::iterator end,
-                                                 int memorySpace) {
-  SmallDenseMap<Value *, std::unique_ptr<MemRefRegion>, 4> regions;
-
-  // Walk this 'affine.for' operation to gather all memory regions.
-  auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
-    if (!isa<AffineLoadOp>(opInst) && !isa<AffineStoreOp>(opInst)) {
-      // Neither load nor a store op.
-      return WalkResult::advance();
-    }
-
-    // Compute the memref region symbolic in any IVs enclosing this block.
-    auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
-    if (failed(
-            region->compute(opInst,
-                            /*loopDepth=*/getNestingDepth(*block.begin())))) {
-      return opInst->emitError("error obtaining memory region\n");
-    }
-
-    auto it = regions.find(region->memref);
-    if (it == regions.end()) {
-      regions[region->memref] = std::move(region);
-    } else if (failed(it->second->unionBoundingBox(*region))) {
-      return opInst->emitWarning(
-          "getMemoryFootprintBytes: unable to perform a union on a memory "
-          "region");
-    }
-    return WalkResult::advance();
-  });
-  if (result.wasInterrupted())
-    return None;
-
-  int64_t totalSizeInBytes = 0;
-  for (const auto &region : regions) {
-    Optional<int64_t> size = region.second->getRegionSize();
-    if (!size.hasValue())
-      return None;
-    totalSizeInBytes += size.getValue();
-  }
-  return totalSizeInBytes;
-}
-
-Optional<int64_t> mlir::getMemoryFootprintBytes(AffineForOp forOp,
-                                                int memorySpace) {
-  auto *forInst = forOp.getOperation();
-  return ::getMemoryFootprintBytes(
-      *forInst->getBlock(), Block::iterator(forInst),
-      std::next(Block::iterator(forInst)), memorySpace);
-}
-
-/// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
-/// at 'forOp'.
-void mlir::getSequentialLoops(
-    AffineForOp forOp, llvm::SmallDenseSet<Value *, 8> *sequentialLoops) {
-  forOp.getOperation()->walk([&](Operation *op) {
-    if (auto innerFor = dyn_cast<AffineForOp>(op))
-      if (!isLoopParallel(innerFor))
-        sequentialLoops->insert(innerFor.getInductionVar());
-  });
-}
-
-/// Returns true if 'forOp' is parallel.
-bool mlir::isLoopParallel(AffineForOp forOp) {
-  // Collect all load and store ops in loop nest rooted at 'forOp'.
-  SmallVector<Operation *, 8> loadAndStoreOpInsts;
-  auto walkResult = forOp.walk([&](Operation *opInst) {
-    if (isa<AffineLoadOp>(opInst) || isa<AffineStoreOp>(opInst))
-      loadAndStoreOpInsts.push_back(opInst);
-    else if (!isa<AffineForOp>(opInst) && !isa<AffineTerminatorOp>(opInst) &&
-             !isa<AffineIfOp>(opInst) && !opInst->hasNoSideEffect())
-      return WalkResult::interrupt();
-
-    return WalkResult::advance();
-  });
-
-  // Stop early if the loop has unknown ops with side effects.
-  if (walkResult.wasInterrupted())
-    return false;
-
-  // Dep check depth would be number of enclosing loops + 1.
-  unsigned depth = getNestingDepth(*forOp.getOperation()) + 1;
-
-  // Check dependences between all pairs of ops in 'loadAndStoreOpInsts'.
-  for (auto *srcOpInst : loadAndStoreOpInsts) {
-    MemRefAccess srcAccess(srcOpInst);
-    for (auto *dstOpInst : loadAndStoreOpInsts) {
-      MemRefAccess dstAccess(dstOpInst);
-      FlatAffineConstraints dependenceConstraints;
-      DependenceResult result = checkMemrefAccessDependence(
-          srcAccess, dstAccess, depth, &dependenceConstraints,
-          /*dependenceComponents=*/nullptr);
-      if (result.value != DependenceResult::NoDependence)
-        return false;
-    }
-  }
-  return true;
-}
diff --git a/third_party/mlir/lib/Analysis/VectorAnalysis.cpp b/third_party/mlir/lib/Analysis/VectorAnalysis.cpp
deleted file mode 100644
index 42d3f10b14c..00000000000
--- a/third_party/mlir/lib/Analysis/VectorAnalysis.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-//===- VectorAnalysis.cpp - Analysis for Vectorization --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Dialect/VectorOps/Utils.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/STLExtras.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-
-///
-/// Implements Analysis functions specific to vectors which support
-/// the vectorization and vectorization materialization passes.
-///
-
-using namespace mlir;
-
-using llvm::SetVector;
-
-Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape,
-                                                   ArrayRef<int64_t> subShape) {
-  if (superShape.size() < subShape.size()) {
-    return Optional<SmallVector<int64_t, 4>>();
-  }
-
-  // Starting from the end, compute the integer divisors.
-  // Set the boolean `divides` if integral division is not possible.
-  std::vector<int64_t> result;
-  result.reserve(superShape.size());
-  bool divides = true;
-  auto divide = [&divides, &result](int superSize, int subSize) {
-    assert(superSize > 0 && "superSize must be > 0");
-    assert(subSize > 0 && "subSize must be > 0");
-    divides &= (superSize % subSize == 0);
-    result.push_back(superSize / subSize);
-  };
-  functional::zipApply(
-      divide, SmallVector<int64_t, 8>{superShape.rbegin(), superShape.rend()},
-      SmallVector<int64_t, 8>{subShape.rbegin(), subShape.rend()});
-
-  // If integral division does not occur, return and let the caller decide.
-  if (!divides) {
-    return None;
-  }
-
-  // At this point we computed the ratio (in reverse) for the common
-  // size. Fill with the remaining entries from the super-vector shape (still in
-  // reverse).
-  int commonSize = subShape.size();
-  std::copy(superShape.rbegin() + commonSize, superShape.rend(),
-            std::back_inserter(result));
-
-  assert(result.size() == superShape.size() &&
-         "super to sub shape ratio is not of the same size as the super rank");
-
-  // Reverse again to get it back in the proper order and return.
-  return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
-}
-
-Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType,
-                                                   VectorType subVectorType) {
-  assert(superVectorType.getElementType() == subVectorType.getElementType() &&
-         "vector types must be of the same elemental type");
-  return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
-}
-
-/// Constructs a permutation map from memref indices to vector dimension.
-///
-/// The implementation uses the knowledge of the mapping of enclosing loop to
-/// vector dimension. `enclosingLoopToVectorDim` carries this information as a
-/// map with:
-///   - keys representing "vectorized enclosing loops";
-///   - values representing the corresponding vector dimension.
-/// The algorithm traverses "vectorized enclosing loops" and extracts the
-/// at-most-one MemRef index that is invariant along said loop. This index is
-/// guaranteed to be at most one by construction: otherwise the MemRef is not
-/// vectorizable.
-/// If this invariant index is found, it is added to the permutation_map at the
-/// proper vector dimension.
-/// If no index is found to be invariant, 0 is added to the permutation_map and
-/// corresponds to a vector broadcast along that dimension.
-///
-/// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
-/// signalling that no permutation map can be constructed given
-/// `enclosingLoopToVectorDim`.
-///
-/// Examples can be found in the documentation of `makePermutationMap`, in the
-/// header file.
-static AffineMap makePermutationMap(
-    ArrayRef<Value *> indices,
-    const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
-  if (enclosingLoopToVectorDim.empty())
-    return AffineMap();
-  MLIRContext *context =
-      enclosingLoopToVectorDim.begin()->getFirst()->getContext();
-  using functional::makePtrDynCaster;
-  using functional::map;
-  SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
-                                  getAffineConstantExpr(0, context));
-
-  for (auto kvp : enclosingLoopToVectorDim) {
-    assert(kvp.second < perm.size());
-    auto invariants = getInvariantAccesses(
-        cast<AffineForOp>(kvp.first).getInductionVar(), indices);
-    unsigned numIndices = indices.size();
-    unsigned countInvariantIndices = 0;
-    for (unsigned dim = 0; dim < numIndices; ++dim) {
-      if (!invariants.count(indices[dim])) {
-        assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
-               "permutationMap already has an entry along dim");
-        perm[kvp.second] = getAffineDimExpr(dim, context);
-      } else {
-        ++countInvariantIndices;
-      }
-    }
-    assert((countInvariantIndices == numIndices ||
-            countInvariantIndices == numIndices - 1) &&
-           "Vectorization prerequisite violated: at most 1 index may be "
-           "invariant wrt a vectorized loop");
-  }
-  return AffineMap::get(indices.size(), 0, perm);
-}
-
-/// Implementation detail that walks up the parents and records the ones with
-/// the specified type.
-/// TODO(ntv): could also be implemented as a collect parents followed by a
-/// filter and made available outside this file.
-template <typename T>
-static SetVector<Operation *> getParentsOfType(Operation *op) {
-  SetVector<Operation *> res;
-  auto *current = op;
-  while (auto *parent = current->getParentOp()) {
-    if (auto typedParent = dyn_cast<T>(parent)) {
-      assert(res.count(parent) == 0 && "Already inserted");
-      res.insert(parent);
-    }
-    current = parent;
-  }
-  return res;
-}
-
-/// Returns the enclosing AffineForOp, from closest to farthest.
-static SetVector<Operation *> getEnclosingforOps(Operation *op) {
-  return getParentsOfType<AffineForOp>(op);
-}
-
-AffineMap mlir::makePermutationMap(
-    Operation *op, ArrayRef<Value *> indices,
-    const DenseMap<Operation *, unsigned> &loopToVectorDim) {
-  DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
-  auto enclosingLoops = getEnclosingforOps(op);
-  for (auto *forInst : enclosingLoops) {
-    auto it = loopToVectorDim.find(forInst);
-    if (it != loopToVectorDim.end()) {
-      enclosingLoopToVectorDim.insert(*it);
-    }
-  }
-  return ::makePermutationMap(indices, enclosingLoopToVectorDim);
-}
-
-bool mlir::matcher::operatesOnSuperVectorsOf(Operation &op,
-                                             VectorType subVectorType) {
-  // First, extract the vector type and distinguish between:
-  //   a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
-  //      vector.transfer_write); and
-  //   b. ops that *may* lower a super-vector (all other ops).
-  // The ops that *may* lower a super-vector only do so if the super-vector to
-  // sub-vector ratio exists. The ops that *must* lower a super-vector are
-  // explicitly checked for this property.
-  /// TODO(ntv): there should be a single function for all ops to do this so we
-  /// do not have to special case. Maybe a trait, or just a method, unclear atm.
-  bool mustDivide = false;
-  (void)mustDivide;
-  VectorType superVectorType;
-  if (auto read = dyn_cast<vector::TransferReadOp>(op)) {
-    superVectorType = read.getVectorType();
-    mustDivide = true;
-  } else if (auto write = dyn_cast<vector::TransferWriteOp>(op)) {
-    superVectorType = write.getVectorType();
-    mustDivide = true;
-  } else if (op.getNumResults() == 0) {
-    if (!isa<ReturnOp>(op)) {
-      op.emitError("NYI: assuming only return operations can have 0 "
-                   " results at this point");
-    }
-    return false;
-  } else if (op.getNumResults() == 1) {
-    if (auto v = op.getResult(0)->getType().dyn_cast<VectorType>()) {
-      superVectorType = v;
-    } else {
-      // Not a vector type.
-      return false;
-    }
-  } else {
-    // Not a vector.transfer and has more than 1 result, fail hard for now to
-    // wake us up when something changes.
-    op.emitError("NYI: operation has more than 1 result");
-    return false;
-  }
-
-  // Get the ratio.
-  auto ratio = shapeRatio(superVectorType, subVectorType);
-
-  // Sanity check.
-  assert((ratio.hasValue() || !mustDivide) &&
-         "vector.transfer operation in which super-vector size is not an"
-         " integer multiple of sub-vector size");
-
-  // This catches cases that are not strictly necessary to have multiplicity but
-  // still aren't divisible by the sub-vector shape.
-  // This could be useful information if we wanted to reshape at the level of
-  // the vector type (but we would have to look at the compute and distinguish
-  // between parallel, reduction and possibly other cases.
-  if (!ratio.hasValue()) {
-    return false;
-  }
-
-  return true;
-}
diff --git a/third_party/mlir/lib/Analysis/Verifier.cpp b/third_party/mlir/lib/Analysis/Verifier.cpp
deleted file mode 100644
index 82f5aa5e01c..00000000000
--- a/third_party/mlir/lib/Analysis/Verifier.cpp
+++ /dev/null
@@ -1,275 +0,0 @@
-//===- Verifier.cpp - MLIR Verifier Implementation ------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the verify() methods on the various IR types, performing
-// (potentially expensive) checks on the holistic structure of the code.  This
-// can be used for detecting bugs in compiler transformations and hand written
-// .mlir files.
-//
-// The checks in this file are only for things that can occur as part of IR
-// transformations: e.g. violation of dominance information, malformed operation
-// attributes, etc.  MLIR supports transformations moving IR through locally
-// invalid states (e.g. unlinking an operation from a block before re-inserting
-// it in a new place), but each transformation must complete with the IR in a
-// valid form.
-//
-// This should not check for things that are always wrong by construction (e.g.
-// attributes or other immutable structures that are incorrect), because those
-// are not mutable and can be checked at time of construction.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Verifier.h"
-#include "mlir/Analysis/Dominance.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Operation.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Regex.h"
-
-using namespace mlir;
-
-namespace {
-/// This class encapsulates all the state used to verify an operation region.
-class OperationVerifier {
-public:
-  explicit OperationVerifier(MLIRContext *ctx)
-      : ctx(ctx), identifierRegex("^[a-zA-Z_][a-zA-Z_0-9\\.\\$]*$") {}
-
-  /// Verify the given operation.
-  LogicalResult verify(Operation &op);
-
-  /// Returns the registered dialect for a dialect-specific attribute.
-  Dialect *getDialectForAttribute(const NamedAttribute &attr) {
-    assert(attr.first.strref().contains('.') && "expected dialect attribute");
-    auto dialectNamePair = attr.first.strref().split('.');
-    return ctx->getRegisteredDialect(dialectNamePair.first);
-  }
-
-  /// Returns if the given string is valid to use as an identifier name.
-  bool isValidName(StringRef name) { return identifierRegex.match(name); }
-
-private:
-  /// Verify the given potentially nested region or block.
-  LogicalResult verifyRegion(Region &region);
-  LogicalResult verifyBlock(Block &block);
-  LogicalResult verifyOperation(Operation &op);
-
-  /// Verify the dominance within the given IR unit.
-  LogicalResult verifyDominance(Region &region);
-  LogicalResult verifyDominance(Operation &op);
-
-  /// Emit an error for the given block.
-  InFlightDiagnostic emitError(Block &bb, const Twine &message) {
-    // Take the location information for the first operation in the block.
-    if (!bb.empty())
-      return bb.front().emitError(message);
-
-    // Worst case, fall back to using the parent's location.
-    return mlir::emitError(bb.getParent()->getLoc(), message);
-  }
-
-  /// The current context for the verifier.
-  MLIRContext *ctx;
-
-  /// Dominance information for this operation, when checking dominance.
-  DominanceInfo *domInfo = nullptr;
-
-  /// Regex checker for attribute names.
-  llvm::Regex identifierRegex;
-
-  /// Mapping between dialect namespace and if that dialect supports
-  /// unregistered operations.
-  llvm::StringMap<bool> dialectAllowsUnknownOps;
-};
-} // end anonymous namespace
-
-/// Verify the given operation.
-LogicalResult OperationVerifier::verify(Operation &op) {
-  // Verify the operation first.
-  if (failed(verifyOperation(op)))
-    return failure();
-
-  // Since everything looks structurally ok to this point, we do a dominance
-  // check for any nested regions. We do this as a second pass since malformed
-  // CFG's can cause dominator analysis constructure to crash and we want the
-  // verifier to be resilient to malformed code.
-  DominanceInfo theDomInfo(&op);
-  domInfo = &theDomInfo;
-  for (auto &region : op.getRegions())
-    if (failed(verifyDominance(region)))
-      return failure();
-
-  domInfo = nullptr;
-  return success();
-}
-
-LogicalResult OperationVerifier::verifyRegion(Region &region) {
-  if (region.empty())
-    return success();
-
-  // Verify the first block has no predecessors.
-  auto *firstBB = &region.front();
-  if (!firstBB->hasNoPredecessors())
-    return mlir::emitError(region.getLoc(),
-                           "entry block of region may not have predecessors");
-
-  // Verify each of the blocks within the region.
-  for (auto &block : region)
-    if (failed(verifyBlock(block)))
-      return failure();
-  return success();
-}
-
-LogicalResult OperationVerifier::verifyBlock(Block &block) {
-  for (auto *arg : block.getArguments())
-    if (arg->getOwner() != &block)
-      return emitError(block, "block argument not owned by block");
-
-  // Verify that this block has a terminator.
-  if (block.empty())
-    return emitError(block, "block with no terminator");
-
-  // Verify the non-terminator operations separately so that we can verify
-  // they has no successors.
-  for (auto &op : llvm::make_range(block.begin(), std::prev(block.end()))) {
-    if (op.getNumSuccessors() != 0)
-      return op.emitError(
-          "operation with block successors must terminate its parent block");
-
-    if (failed(verifyOperation(op)))
-      return failure();
-  }
-
-  // Verify the terminator.
-  if (failed(verifyOperation(block.back())))
-    return failure();
-  if (block.back().isKnownNonTerminator())
-    return emitError(block, "block with no terminator");
-
-  // Verify that this block is not branching to a block of a different
-  // region.
-  for (Block *successor : block.getSuccessors())
-    if (successor->getParent() != block.getParent())
-      return block.back().emitOpError(
-          "branching to block of a different region");
-
-  return success();
-}
-
-LogicalResult OperationVerifier::verifyOperation(Operation &op) {
-  // Check that operands are non-nil and structurally ok.
-  for (auto *operand : op.getOperands())
-    if (!operand)
-      return op.emitError("null operand found");
-
-  /// Verify that all of the attributes are okay.
-  for (auto attr : op.getAttrs()) {
-    if (!identifierRegex.match(attr.first))
-      return op.emitError("invalid attribute name '") << attr.first << "'";
-
-    // Check for any optional dialect specific attributes.
-    if (!attr.first.strref().contains('.'))
-      continue;
-    if (auto *dialect = getDialectForAttribute(attr))
-      if (failed(dialect->verifyOperationAttribute(&op, attr)))
-        return failure();
-  }
-
-  // If we can get operation info for this, check the custom hook.
-  auto *opInfo = op.getAbstractOperation();
-  if (opInfo && failed(opInfo->verifyInvariants(&op)))
-    return failure();
-
-  // Verify that all child regions are ok.
-  for (auto &region : op.getRegions())
-    if (failed(verifyRegion(region)))
-      return failure();
-
-  // If this is a registered operation, there is nothing left to do.
-  if (opInfo)
-    return success();
-
-  // Otherwise, verify that the parent dialect allows un-registered operations.
-  auto dialectPrefix = op.getName().getDialect();
-
-  // Check for an existing answer for the operation dialect.
-  auto it = dialectAllowsUnknownOps.find(dialectPrefix);
-  if (it == dialectAllowsUnknownOps.end()) {
-    // If the operation dialect is registered, query it directly.
-    if (auto *dialect = ctx->getRegisteredDialect(dialectPrefix))
-      it = dialectAllowsUnknownOps
-               .try_emplace(dialectPrefix, dialect->allowsUnknownOperations())
-               .first;
-    // Otherwise, conservatively allow unknown operations.
-    else
-      it = dialectAllowsUnknownOps.try_emplace(dialectPrefix, true).first;
-  }
-
-  if (!it->second) {
-    return op.emitError("unregistered operation '")
-           << op.getName() << "' found in dialect ('" << dialectPrefix
-           << "') that does not allow unknown operations";
-  }
-
-  return success();
-}
-
-LogicalResult OperationVerifier::verifyDominance(Region &region) {
-  // Verify the dominance of each of the held operations.
-  for (auto &block : region)
-    for (auto &op : block)
-      if (failed(verifyDominance(op)))
-        return failure();
-  return success();
-}
-
-LogicalResult OperationVerifier::verifyDominance(Operation &op) {
-  // Check that operands properly dominate this use.
-  for (unsigned operandNo = 0, e = op.getNumOperands(); operandNo != e;
-       ++operandNo) {
-    auto *operand = op.getOperand(operandNo);
-    if (domInfo->properlyDominates(operand, &op))
-      continue;
-
-    auto diag = op.emitError("operand #")
-                << operandNo << " does not dominate this use";
-    if (auto *useOp = operand->getDefiningOp())
-      diag.attachNote(useOp->getLoc()) << "operand defined here";
-    return failure();
-  }
-
-  // Verify the dominance of each of the nested blocks within this operation.
-  for (auto &region : op.getRegions())
-    if (failed(verifyDominance(region)))
-      return failure();
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Entrypoint
-//===----------------------------------------------------------------------===//
-
-/// Perform (potentially expensive) checks of invariants, used to detect
-/// compiler bugs.  On error, this reports the error through the MLIRContext and
-/// returns failure.
-LogicalResult mlir::verify(Operation *op) {
-  return OperationVerifier(op->getContext()).verify(*op);
-}
diff --git a/third_party/mlir/lib/CMakeLists.txt b/third_party/mlir/lib/CMakeLists.txt
deleted file mode 100644
index f34b1e8bead..00000000000
--- a/third_party/mlir/lib/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-add_subdirectory(Analysis)
-add_subdirectory(Conversion)
-add_subdirectory(Dialect)
-add_subdirectory(EDSC)
-add_subdirectory(ExecutionEngine)
-add_subdirectory(IR)
-add_subdirectory(Parser)
-add_subdirectory(Pass)
-add_subdirectory(Quantizer)
-add_subdirectory(Support)
-add_subdirectory(TableGen)
-add_subdirectory(Target)
-add_subdirectory(Transforms)
-add_subdirectory(Translation)
diff --git a/third_party/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp b/third_party/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
deleted file mode 100644
index 4935d2da3fb..00000000000
--- a/third_party/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
+++ /dev/null
@@ -1,537 +0,0 @@
-//===- AffineToStandard.cpp - Lower affine constructs to primitives -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file lowers affine constructs (If and For statements, AffineApply
-// operations) within a function into their standard If and For equivalent ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
-
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-
-using namespace mlir;
-
-namespace {
-// Visit affine expressions recursively and build the sequence of operations
-// that correspond to it.  Visitation functions return an Value of the
-// expression subtree they visited or `nullptr` on error.
-class AffineApplyExpander
-    : public AffineExprVisitor<AffineApplyExpander, Value *> {
-public:
-  // This internal class expects arguments to be non-null, checks must be
-  // performed at the call site.
-  AffineApplyExpander(OpBuilder &builder, ArrayRef<Value *> dimValues,
-                      ArrayRef<Value *> symbolValues, Location loc)
-      : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
-        loc(loc) {}
-
-  template <typename OpTy> Value *buildBinaryExpr(AffineBinaryOpExpr expr) {
-    auto lhs = visit(expr.getLHS());
-    auto rhs = visit(expr.getRHS());
-    if (!lhs || !rhs)
-      return nullptr;
-    auto op = builder.create<OpTy>(loc, lhs, rhs);
-    return op.getResult();
-  }
-
-  Value *visitAddExpr(AffineBinaryOpExpr expr) {
-    return buildBinaryExpr<AddIOp>(expr);
-  }
-
-  Value *visitMulExpr(AffineBinaryOpExpr expr) {
-    return buildBinaryExpr<MulIOp>(expr);
-  }
-
-  // Euclidean modulo operation: negative RHS is not allowed.
-  // Remainder of the euclidean integer division is always non-negative.
-  //
-  // Implemented as
-  //
-  //     a mod b =
-  //         let remainder = srem a, b;
-  //             negative = a < 0 in
-  //         select negative, remainder + b, remainder.
-  Value *visitModExpr(AffineBinaryOpExpr expr) {
-    auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
-    if (!rhsConst) {
-      emitError(
-          loc,
-          "semi-affine expressions (modulo by non-const) are not supported");
-      return nullptr;
-    }
-    if (rhsConst.getValue() <= 0) {
-      emitError(loc, "modulo by non-positive value is not supported");
-      return nullptr;
-    }
-
-    auto lhs = visit(expr.getLHS());
-    auto rhs = visit(expr.getRHS());
-    assert(lhs && rhs && "unexpected affine expr lowering failure");
-
-    Value *remainder = builder.create<RemISOp>(loc, lhs, rhs);
-    Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
-    Value *isRemainderNegative =
-        builder.create<CmpIOp>(loc, CmpIPredicate::slt, remainder, zeroCst);
-    Value *correctedRemainder = builder.create<AddIOp>(loc, remainder, rhs);
-    Value *result = builder.create<SelectOp>(loc, isRemainderNegative,
-                                             correctedRemainder, remainder);
-    return result;
-  }
-
-  // Floor division operation (rounds towards negative infinity).
-  //
-  // For positive divisors, it can be implemented without branching and with a
-  // single division operation as
-  //
-  //        a floordiv b =
-  //            let negative = a < 0 in
-  //            let absolute = negative ? -a - 1 : a in
-  //            let quotient = absolute / b in
-  //                negative ? -quotient - 1 : quotient
-  Value *visitFloorDivExpr(AffineBinaryOpExpr expr) {
-    auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
-    if (!rhsConst) {
-      emitError(
-          loc,
-          "semi-affine expressions (division by non-const) are not supported");
-      return nullptr;
-    }
-    if (rhsConst.getValue() <= 0) {
-      emitError(loc, "division by non-positive value is not supported");
-      return nullptr;
-    }
-
-    auto lhs = visit(expr.getLHS());
-    auto rhs = visit(expr.getRHS());
-    assert(lhs && rhs && "unexpected affine expr lowering failure");
-
-    Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
-    Value *noneCst = builder.create<ConstantIndexOp>(loc, -1);
-    Value *negative =
-        builder.create<CmpIOp>(loc, CmpIPredicate::slt, lhs, zeroCst);
-    Value *negatedDecremented = builder.create<SubIOp>(loc, noneCst, lhs);
-    Value *dividend =
-        builder.create<SelectOp>(loc, negative, negatedDecremented, lhs);
-    Value *quotient = builder.create<DivISOp>(loc, dividend, rhs);
-    Value *correctedQuotient = builder.create<SubIOp>(loc, noneCst, quotient);
-    Value *result =
-        builder.create<SelectOp>(loc, negative, correctedQuotient, quotient);
-    return result;
-  }
-
-  // Ceiling division operation (rounds towards positive infinity).
-  //
-  // For positive divisors, it can be implemented without branching and with a
-  // single division operation as
-  //
-  //     a ceildiv b =
-  //         let negative = a <= 0 in
-  //         let absolute = negative ? -a : a - 1 in
-  //         let quotient = absolute / b in
-  //             negative ? -quotient : quotient + 1
-  Value *visitCeilDivExpr(AffineBinaryOpExpr expr) {
-    auto rhsConst = expr.getRHS().dyn_cast<AffineConstantExpr>();
-    if (!rhsConst) {
-      emitError(loc) << "semi-affine expressions (division by non-const) are "
-                        "not supported";
-      return nullptr;
-    }
-    if (rhsConst.getValue() <= 0) {
-      emitError(loc, "division by non-positive value is not supported");
-      return nullptr;
-    }
-    auto lhs = visit(expr.getLHS());
-    auto rhs = visit(expr.getRHS());
-    assert(lhs && rhs && "unexpected affine expr lowering failure");
-
-    Value *zeroCst = builder.create<ConstantIndexOp>(loc, 0);
-    Value *oneCst = builder.create<ConstantIndexOp>(loc, 1);
-    Value *nonPositive =
-        builder.create<CmpIOp>(loc, CmpIPredicate::sle, lhs, zeroCst);
-    Value *negated = builder.create<SubIOp>(loc, zeroCst, lhs);
-    Value *decremented = builder.create<SubIOp>(loc, lhs, oneCst);
-    Value *dividend =
-        builder.create<SelectOp>(loc, nonPositive, negated, decremented);
-    Value *quotient = builder.create<DivISOp>(loc, dividend, rhs);
-    Value *negatedQuotient = builder.create<SubIOp>(loc, zeroCst, quotient);
-    Value *incrementedQuotient = builder.create<AddIOp>(loc, quotient, oneCst);
-    Value *result = builder.create<SelectOp>(loc, nonPositive, negatedQuotient,
-                                             incrementedQuotient);
-    return result;
-  }
-
-  Value *visitConstantExpr(AffineConstantExpr expr) {
-    auto valueAttr =
-        builder.getIntegerAttr(builder.getIndexType(), expr.getValue());
-    auto op =
-        builder.create<ConstantOp>(loc, builder.getIndexType(), valueAttr);
-    return op.getResult();
-  }
-
-  Value *visitDimExpr(AffineDimExpr expr) {
-    assert(expr.getPosition() < dimValues.size() &&
-           "affine dim position out of range");
-    return dimValues[expr.getPosition()];
-  }
-
-  Value *visitSymbolExpr(AffineSymbolExpr expr) {
-    assert(expr.getPosition() < symbolValues.size() &&
-           "symbol dim position out of range");
-    return symbolValues[expr.getPosition()];
-  }
-
-private:
-  OpBuilder &builder;
-  ArrayRef<Value *> dimValues;
-  ArrayRef<Value *> symbolValues;
-
-  Location loc;
-};
-} // namespace
-
-// Create a sequence of operations that implement the `expr` applied to the
-// given dimension and symbol values.
-mlir::Value *mlir::expandAffineExpr(OpBuilder &builder, Location loc,
-                                    AffineExpr expr,
-                                    ArrayRef<Value *> dimValues,
-                                    ArrayRef<Value *> symbolValues) {
-  return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
-}
-
-// Create a sequence of operations that implement the `affineMap` applied to
-// the given `operands` (as it it were an AffineApplyOp).
-Optional<SmallVector<Value *, 8>> static expandAffineMap(
-    OpBuilder &builder, Location loc, AffineMap affineMap,
-    ArrayRef<Value *> operands) {
-  auto numDims = affineMap.getNumDims();
-  auto expanded = functional::map(
-      [numDims, &builder, loc, operands](AffineExpr expr) {
-        return expandAffineExpr(builder, loc, expr,
-                                operands.take_front(numDims),
-                                operands.drop_front(numDims));
-      },
-      affineMap.getResults());
-  if (llvm::all_of(expanded, [](Value *v) { return v; }))
-    return expanded;
-  return None;
-}
-
-// Given a range of values, emit the code that reduces them with "min" or "max"
-// depending on the provided comparison predicate.  The predicate defines which
-// comparison to perform, "lt" for "min", "gt" for "max" and is used for the
-// `cmpi` operation followed by the `select` operation:
-//
-//   %cond   = cmpi "predicate" %v0, %v1
-//   %result = select %cond, %v0, %v1
-//
-// Multiple values are scanned in a linear sequence.  This creates a data
-// dependences that wouldn't exist in a tree reduction, but is easier to
-// recognize as a reduction by the subsequent passes.
-static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
-                                      ArrayRef<Value *> values,
-                                      OpBuilder &builder) {
-  assert(!llvm::empty(values) && "empty min/max chain");
-
-  auto valueIt = values.begin();
-  Value *value = *valueIt++;
-  for (; valueIt != values.end(); ++valueIt) {
-    auto cmpOp = builder.create<CmpIOp>(loc, predicate, value, *valueIt);
-    value = builder.create<SelectOp>(loc, cmpOp.getResult(), value, *valueIt);
-  }
-
-  return value;
-}
-
-// Emit instructions that correspond to the affine map in the lower bound
-// applied to the respective operands, and compute the maximum value across
-// the results.
-Value *mlir::lowerAffineLowerBound(AffineForOp op, OpBuilder &builder) {
-  SmallVector<Value *, 8> boundOperands(op.getLowerBoundOperands());
-  auto lbValues = expandAffineMap(builder, op.getLoc(), op.getLowerBoundMap(),
-                                  boundOperands);
-  if (!lbValues)
-    return nullptr;
-  return buildMinMaxReductionSeq(op.getLoc(), CmpIPredicate::sgt, *lbValues,
-                                 builder);
-}
-
-// Emit instructions that correspond to the affine map in the upper bound
-// applied to the respective operands, and compute the minimum value across
-// the results.
-Value *mlir::lowerAffineUpperBound(AffineForOp op, OpBuilder &builder) {
-  SmallVector<Value *, 8> boundOperands(op.getUpperBoundOperands());
-  auto ubValues = expandAffineMap(builder, op.getLoc(), op.getUpperBoundMap(),
-                                  boundOperands);
-  if (!ubValues)
-    return nullptr;
-  return buildMinMaxReductionSeq(op.getLoc(), CmpIPredicate::slt, *ubValues,
-                                 builder);
-}
-
-namespace {
-// Affine terminators are removed.
-class AffineTerminatorLowering : public OpRewritePattern<AffineTerminatorOp> {
-public:
-  using OpRewritePattern<AffineTerminatorOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineTerminatorOp op,
-                                     PatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<loop::TerminatorOp>(op);
-    return matchSuccess();
-  }
-};
-
-class AffineForLowering : public OpRewritePattern<AffineForOp> {
-public:
-  using OpRewritePattern<AffineForOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineForOp op,
-                                     PatternRewriter &rewriter) const override {
-    Location loc = op.getLoc();
-    Value *lowerBound = lowerAffineLowerBound(op, rewriter);
-    Value *upperBound = lowerAffineUpperBound(op, rewriter);
-    Value *step = rewriter.create<ConstantIndexOp>(loc, op.getStep());
-    auto f = rewriter.create<loop::ForOp>(loc, lowerBound, upperBound, step);
-    f.region().getBlocks().clear();
-    rewriter.inlineRegionBefore(op.region(), f.region(), f.region().end());
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-
-class AffineIfLowering : public OpRewritePattern<AffineIfOp> {
-public:
-  using OpRewritePattern<AffineIfOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineIfOp op,
-                                     PatternRewriter &rewriter) const override {
-    auto loc = op.getLoc();
-
-    // Now we just have to handle the condition logic.
-    auto integerSet = op.getIntegerSet();
-    Value *zeroConstant = rewriter.create<ConstantIndexOp>(loc, 0);
-    SmallVector<Value *, 8> operands(op.getOperands());
-    auto operandsRef = llvm::makeArrayRef(operands);
-
-    // Calculate cond as a conjunction without short-circuiting.
-    Value *cond = nullptr;
-    for (unsigned i = 0, e = integerSet.getNumConstraints(); i < e; ++i) {
-      AffineExpr constraintExpr = integerSet.getConstraint(i);
-      bool isEquality = integerSet.isEq(i);
-
-      // Build and apply an affine expression
-      auto numDims = integerSet.getNumDims();
-      Value *affResult = expandAffineExpr(rewriter, loc, constraintExpr,
-                                          operandsRef.take_front(numDims),
-                                          operandsRef.drop_front(numDims));
-      if (!affResult)
-        return matchFailure();
-      auto pred = isEquality ? CmpIPredicate::eq : CmpIPredicate::sge;
-      Value *cmpVal =
-          rewriter.create<CmpIOp>(loc, pred, affResult, zeroConstant);
-      cond =
-          cond ? rewriter.create<AndOp>(loc, cond, cmpVal).getResult() : cmpVal;
-    }
-    cond = cond ? cond
-                : rewriter.create<ConstantIntOp>(loc, /*value=*/1, /*width=*/1);
-
-    bool hasElseRegion = !op.elseRegion().empty();
-    auto ifOp = rewriter.create<loop::IfOp>(loc, cond, hasElseRegion);
-    rewriter.inlineRegionBefore(op.thenRegion(), &ifOp.thenRegion().back());
-    ifOp.thenRegion().back().erase();
-    if (hasElseRegion) {
-      rewriter.inlineRegionBefore(op.elseRegion(), &ifOp.elseRegion().back());
-      ifOp.elseRegion().back().erase();
-    }
-
-    // Ok, we're done!
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-
-// Convert an "affine.apply" operation into a sequence of arithmetic
-// operations using the StandardOps dialect.
-class AffineApplyLowering : public OpRewritePattern<AffineApplyOp> {
-public:
-  using OpRewritePattern<AffineApplyOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineApplyOp op,
-                                     PatternRewriter &rewriter) const override {
-    auto maybeExpandedMap =
-        expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(),
-                        llvm::to_vector<8>(op.getOperands()));
-    if (!maybeExpandedMap)
-      return matchFailure();
-    rewriter.replaceOp(op, *maybeExpandedMap);
-    return matchSuccess();
-  }
-};
-
-// Apply the affine map from an 'affine.load' operation to its operands, and
-// feed the results to a newly created 'std.load' operation (which replaces the
-// original 'affine.load').
-class AffineLoadLowering : public OpRewritePattern<AffineLoadOp> {
-public:
-  using OpRewritePattern<AffineLoadOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineLoadOp op,
-                                     PatternRewriter &rewriter) const override {
-    // Expand affine map from 'affineLoadOp'.
-    SmallVector<Value *, 8> indices(op.getMapOperands());
-    auto maybeExpandedMap =
-        expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
-    if (!maybeExpandedMap)
-      return matchFailure();
-
-    // Build std.load memref[expandedMap.results].
-    rewriter.replaceOpWithNewOp<LoadOp>(op, op.getMemRef(), *maybeExpandedMap);
-    return matchSuccess();
-  }
-};
-
-// Apply the affine map from an 'affine.store' operation to its operands, and
-// feed the results to a newly created 'std.store' operation (which replaces the
-// original 'affine.store').
-class AffineStoreLowering : public OpRewritePattern<AffineStoreOp> {
-public:
-  using OpRewritePattern<AffineStoreOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineStoreOp op,
-                                     PatternRewriter &rewriter) const override {
-    // Expand affine map from 'affineStoreOp'.
-    SmallVector<Value *, 8> indices(op.getMapOperands());
-    auto maybeExpandedMap =
-        expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
-    if (!maybeExpandedMap)
-      return matchFailure();
-
-    // Build std.store valueToStore, memref[expandedMap.results].
-    rewriter.replaceOpWithNewOp<StoreOp>(op, op.getValueToStore(),
-                                         op.getMemRef(), *maybeExpandedMap);
-    return matchSuccess();
-  }
-};
-
-// Apply the affine maps from an 'affine.dma_start' operation to each of their
-// respective map operands, and feed the results to a newly created
-// 'std.dma_start' operation (which replaces the original 'affine.dma_start').
-class AffineDmaStartLowering : public OpRewritePattern<AffineDmaStartOp> {
-public:
-  using OpRewritePattern<AffineDmaStartOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineDmaStartOp op,
-                                     PatternRewriter &rewriter) const override {
-    SmallVector<Value *, 8> operands(op.getOperands());
-    auto operandsRef = llvm::makeArrayRef(operands);
-
-    // Expand affine map for DMA source memref.
-    auto maybeExpandedSrcMap = expandAffineMap(
-        rewriter, op.getLoc(), op.getSrcMap(),
-        operandsRef.drop_front(op.getSrcMemRefOperandIndex() + 1));
-    if (!maybeExpandedSrcMap)
-      return matchFailure();
-    // Expand affine map for DMA destination memref.
-    auto maybeExpandedDstMap = expandAffineMap(
-        rewriter, op.getLoc(), op.getDstMap(),
-        operandsRef.drop_front(op.getDstMemRefOperandIndex() + 1));
-    if (!maybeExpandedDstMap)
-      return matchFailure();
-    // Expand affine map for DMA tag memref.
-    auto maybeExpandedTagMap = expandAffineMap(
-        rewriter, op.getLoc(), op.getTagMap(),
-        operandsRef.drop_front(op.getTagMemRefOperandIndex() + 1));
-    if (!maybeExpandedTagMap)
-      return matchFailure();
-
-    // Build std.dma_start operation with affine map results.
-    rewriter.replaceOpWithNewOp<DmaStartOp>(
-        op, op.getSrcMemRef(), *maybeExpandedSrcMap, op.getDstMemRef(),
-        *maybeExpandedDstMap, op.getNumElements(), op.getTagMemRef(),
-        *maybeExpandedTagMap, op.getStride(), op.getNumElementsPerStride());
-    return matchSuccess();
-  }
-};
-
-// Apply the affine map from an 'affine.dma_wait' operation tag memref,
-// and feed the results to a newly created 'std.dma_wait' operation (which
-// replaces the original 'affine.dma_wait').
-class AffineDmaWaitLowering : public OpRewritePattern<AffineDmaWaitOp> {
-public:
-  using OpRewritePattern<AffineDmaWaitOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineDmaWaitOp op,
-                                     PatternRewriter &rewriter) const override {
-    // Expand affine map for DMA tag memref.
-    SmallVector<Value *, 8> indices(op.getTagIndices());
-    auto maybeExpandedTagMap =
-        expandAffineMap(rewriter, op.getLoc(), op.getTagMap(), indices);
-    if (!maybeExpandedTagMap)
-      return matchFailure();
-
-    // Build std.dma_wait operation with affine map results.
-    rewriter.replaceOpWithNewOp<DmaWaitOp>(
-        op, op.getTagMemRef(), *maybeExpandedTagMap, op.getNumElements());
-    return matchSuccess();
-  }
-};
-
-} // end namespace
-
-void mlir::populateAffineToStdConversionPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *ctx) {
-  patterns
-      .insert<AffineApplyLowering, AffineDmaStartLowering,
-              AffineDmaWaitLowering, AffineLoadLowering, AffineStoreLowering,
-              AffineForLowering, AffineIfLowering, AffineTerminatorLowering>(
-          ctx);
-}
-
-namespace {
-class LowerAffinePass : public FunctionPass<LowerAffinePass> {
-  void runOnFunction() override {
-    OwningRewritePatternList patterns;
-    populateAffineToStdConversionPatterns(patterns, &getContext());
-    ConversionTarget target(getContext());
-    target.addLegalDialect<loop::LoopOpsDialect, StandardOpsDialect>();
-    if (failed(applyPartialConversion(getFunction(), target, patterns)))
-      signalPassFailure();
-  }
-};
-} // namespace
-
-/// Lowers If and For operations within a function into their lower level CFG
-/// equivalent blocks.
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createLowerAffinePass() {
-  return std::make_unique<LowerAffinePass>();
-}
-
-static PassRegistration<LowerAffinePass>
-    pass("lower-affine",
-         "Lower If, For, AffineApply operations to primitive equivalents");
diff --git a/third_party/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/third_party/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
deleted file mode 100644
index 33f7db7abc4..00000000000
--- a/third_party/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-add_llvm_library(MLIRAffineToStandard
-  AffineToStandard.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/AffineToStandard
-)
-add_dependencies(
-  MLIRAffineToStandard
-
-  MLIRAffineOps
-  MLIRStandardOps
-  MLIRIR
-  LLVMCore
-  LLVMSupport
-)
-target_link_libraries(
-  MLIRAffineToStandard
-
-  MLIRAffineOps
-  MLIRStandardOps
-  MLIRIR
-  LLVMCore
-  LLVMSupport
-)
diff --git a/third_party/mlir/lib/Conversion/CMakeLists.txt b/third_party/mlir/lib/Conversion/CMakeLists.txt
deleted file mode 100644
index c791d214d30..00000000000
--- a/third_party/mlir/lib/Conversion/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_subdirectory(AffineToStandard)
-add_subdirectory(GPUToCUDA)
-add_subdirectory(GPUToNVVM)
-add_subdirectory(GPUToROCDL)
-add_subdirectory(GPUToSPIRV)
-add_subdirectory(LinalgToLLVM)
-add_subdirectory(LoopsToGPU)
-add_subdirectory(LoopToStandard)
-add_subdirectory(StandardToLLVM)
-add_subdirectory(StandardToSPIRV)
-add_subdirectory(VectorToLLVM)
-add_subdirectory(VectorToLoops)
diff --git a/third_party/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/third_party/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
deleted file mode 100644
index 6a1a580e369..00000000000
--- a/third_party/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- IndexIntrinsicsOpLowering.h - GPU IndexOps Lowering class *- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
-#define MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
-
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-
-#include "llvm/ADT/StringSwitch.h"
-
-namespace mlir {
-
-// Rewriting that replaces Op with XOp, YOp, or ZOp depending on the dimension
-// that Op operates on.  Op is assumed to return an `std.index` value and
-// XOp, YOp and ZOp are assumed to return an `llvm.i32` value.  Depending on
-// `indexBitwidth`, sign-extend or truncate the resulting value to match the
-// bitwidth expected by the consumers of the value.
-template <typename Op, typename XOp, typename YOp, typename ZOp>
-struct GPUIndexIntrinsicOpLowering : public LLVMOpLowering {
-private:
-  enum dimension { X = 0, Y = 1, Z = 2, invalid };
-  unsigned indexBitwidth;
-
-  static dimension dimensionToIndex(Op op) {
-    return llvm::StringSwitch<dimension>(op.dimension())
-        .Case("x", X)
-        .Case("y", Y)
-        .Case("z", Z)
-        .Default(invalid);
-  }
-
-  static unsigned getIndexBitWidth(LLVMTypeConverter &type_converter) {
-    auto dialect = type_converter.getDialect();
-    return dialect->getLLVMModule().getDataLayout().getPointerSizeInBits();
-  }
-
-public:
-  explicit GPUIndexIntrinsicOpLowering(LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(Op::getOperationName(),
-                       lowering_.getDialect()->getContext(), lowering_),
-        indexBitwidth(getIndexBitWidth(lowering_)) {}
-
-  // Convert the kernel arguments to an LLVM type, preserve the rest.
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto dialect = lowering.getDialect();
-    Value *newOp;
-    switch (dimensionToIndex(cast<Op>(op))) {
-    case X:
-      newOp = rewriter.create<XOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
-      break;
-    case Y:
-      newOp = rewriter.create<YOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
-      break;
-    case Z:
-      newOp = rewriter.create<ZOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
-      break;
-    default:
-      return matchFailure();
-    }
-
-    if (indexBitwidth > 32) {
-      newOp = rewriter.create<LLVM::SExtOp>(
-          loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
-    } else if (indexBitwidth < 32) {
-      newOp = rewriter.create<LLVM::TruncOp>(
-          loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
-    }
-
-    rewriter.replaceOp(op, {newOp});
-    return matchSuccess();
-  }
-};
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
diff --git a/third_party/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/third_party/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
deleted file mode 100644
index e06e88b92f1..00000000000
--- a/third_party/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- OpToFuncCallLowering.h - GPU ops lowering to custom calls *- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_CONVERSION_GPUCOMMON_OPTOFUNCCALLLOWERING_H_
-#define MLIR_CONVERSION_GPUCOMMON_OPTOFUNCCALLLOWERING_H_
-
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-
-namespace mlir {
-
-/// Rewriting that replace SourceOp with a CallOp to `f32Func` or `f64Func`
-/// depending on the element type that Op operates upon. The function
-/// declaration is added in case it was not added before.
-///
-/// Example with NVVM:
-///   %exp_f32 = std.exp %arg_f32 : f32
-///
-/// will be transformed into
-///   llvm.call @__nv_expf(%arg_f32) : (!llvm.float) -> !llvm.float
-template <typename SourceOp>
-struct OpToFuncCallLowering : public LLVMOpLowering {
-public:
-  explicit OpToFuncCallLowering(LLVMTypeConverter &lowering_, StringRef f32Func,
-                                StringRef f64Func)
-      : LLVMOpLowering(SourceOp::getOperationName(),
-                       lowering_.getDialect()->getContext(), lowering_),
-        f32Func(f32Func), f64Func(f64Func) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    using LLVM::LLVMFuncOp;
-    using LLVM::LLVMType;
-
-    static_assert(
-        std::is_base_of<OpTrait::OneResult<SourceOp>, SourceOp>::value,
-        "expected single result op");
-
-    LLVMType resultType = lowering.convertType(op->getResult(0)->getType())
-                              .template cast<LLVM::LLVMType>();
-    LLVMType funcType = getFunctionType(resultType, operands);
-    StringRef funcName = getFunctionName(resultType);
-    if (funcName.empty())
-      return matchFailure();
-
-    LLVMFuncOp funcOp = appendOrGetFuncOp(funcName, funcType, op);
-    auto callOp = rewriter.create<LLVM::CallOp>(
-        op->getLoc(), resultType, rewriter.getSymbolRefAttr(funcOp), operands);
-    rewriter.replaceOp(op, {callOp.getResult(0)});
-    return matchSuccess();
-  }
-
-private:
-  LLVM::LLVMType getFunctionType(LLVM::LLVMType resultType,
-                                 ArrayRef<Value *> operands) const {
-    using LLVM::LLVMType;
-    SmallVector<LLVMType, 1> operandTypes;
-    for (Value *operand : operands) {
-      operandTypes.push_back(operand->getType().cast<LLVMType>());
-    }
-    return LLVMType::getFunctionTy(resultType, operandTypes,
-                                   /*isVarArg=*/false);
-  }
-
-  StringRef getFunctionName(LLVM::LLVMType type) const {
-    if (type.isFloatTy())
-      return f32Func;
-    if (type.isDoubleTy())
-      return f64Func;
-    return "";
-  }
-
-  LLVM::LLVMFuncOp appendOrGetFuncOp(StringRef funcName,
-                                     LLVM::LLVMType funcType,
-                                     Operation *op) const {
-    using LLVM::LLVMFuncOp;
-
-    Operation *funcOp = SymbolTable::lookupNearestSymbolFrom(op, funcName);
-    if (funcOp)
-      return llvm::cast<LLVMFuncOp>(*funcOp);
-
-    mlir::OpBuilder b(op->getParentOfType<LLVMFuncOp>());
-    return b.create<LLVMFuncOp>(op->getLoc(), funcName, funcType);
-  }
-
-  const std::string f32Func;
-  const std::string f64Func;
-};
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_GPUCOMMON_OPTOFUNCCALLLOWERING_H_
diff --git a/third_party/mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt b/third_party/mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt
deleted file mode 100644
index 4eddb787493..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-if(MLIR_CUDA_CONVERSIONS_ENABLED)
-  llvm_map_components_to_libnames(nvptx "NVPTX")
-
-  add_llvm_library(MLIRGPUtoCUDATransforms
-    ConvertKernelFuncToCubin.cpp
-    ConvertLaunchFuncToCudaCalls.cpp
-  )
-  target_link_libraries(MLIRGPUtoCUDATransforms
-    MLIRGPU
-    MLIRLLVMIR
-    MLIRNVVMIR
-    MLIRPass
-    MLIRTargetNVVMIR
-    ${nvptx}
-  )
-endif()
diff --git a/third_party/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp b/third_party/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
deleted file mode 100644
index a91c43e1e92..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-//===- ConvertKernelFuncToCubin.cpp - MLIR GPU lowering passes ------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to convert gpu kernel functions into a
-// corresponding binary blob that can be executed on a CUDA GPU. Currently
-// only translates the function itself but no dependencies.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
-
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassRegistry.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Target/NVVMIR.h"
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace mlir;
-
-namespace {
-// TODO(herhut): Move to shared location.
-static constexpr const char *kCubinAnnotation = "nvvm.cubin";
-
-/// A pass converting tagged kernel modules to cubin blobs.
-///
-/// If tagged as a kernel module, each contained function is translated to NVVM
-/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
-/// GPU binary code, which is then attached as an attribute to the function. The
-/// function body is erased.
-class GpuKernelToCubinPass : public ModulePass<GpuKernelToCubinPass> {
-public:
-  GpuKernelToCubinPass(
-      CubinGenerator cubinGenerator = compilePtxToCubinForTesting)
-      : cubinGenerator(cubinGenerator) {}
-
-  void runOnModule() override {
-    ModuleOp module = getModule();
-    if (!module.getAttrOfType<UnitAttr>(
-            gpu::GPUDialect::getKernelModuleAttrName()) ||
-        !module.getName())
-      return;
-
-    // Make sure the NVPTX target is initialized.
-    LLVMInitializeNVPTXTarget();
-    LLVMInitializeNVPTXTargetInfo();
-    LLVMInitializeNVPTXTargetMC();
-    LLVMInitializeNVPTXAsmPrinter();
-
-    auto llvmModule = translateModuleToNVVMIR(module);
-    if (!llvmModule)
-      return signalPassFailure();
-
-    // Translate the module to CUBIN and attach the result as attribute to the
-    // module.
-    if (auto cubinAttr = translateGpuModuleToCubinAnnotation(
-            *llvmModule, module.getLoc(), *module.getName()))
-      module.setAttr(kCubinAnnotation, cubinAttr);
-    else
-      signalPassFailure();
-  }
-
-private:
-  static OwnedCubin compilePtxToCubinForTesting(const std::string &ptx,
-                                                Location, StringRef);
-
-  std::string translateModuleToPtx(llvm::Module &module,
-                                   llvm::TargetMachine &target_machine);
-
-  /// Converts llvmModule to cubin using the user-provided generator. Location
-  /// is used for error reporting and name is forwarded to the CUBIN generator
-  /// to use in its logging mechanisms.
-  OwnedCubin convertModuleToCubin(llvm::Module &llvmModule, Location loc,
-                                  StringRef name);
-
-  /// Translates llvmModule to cubin and returns the result as attribute.
-  StringAttr translateGpuModuleToCubinAnnotation(llvm::Module &llvmModule,
-                                                 Location loc, StringRef name);
-
-  CubinGenerator cubinGenerator;
-};
-
-} // anonymous namespace
-
-std::string GpuKernelToCubinPass::translateModuleToPtx(
-    llvm::Module &module, llvm::TargetMachine &target_machine) {
-  std::string ptx;
-  {
-    llvm::raw_string_ostream stream(ptx);
-    llvm::buffer_ostream pstream(stream);
-    llvm::legacy::PassManager codegen_passes;
-    target_machine.addPassesToEmitFile(codegen_passes, pstream, nullptr,
-                                       llvm::CGFT_AssemblyFile);
-    codegen_passes.run(module);
-  }
-
-  return ptx;
-}
-
-OwnedCubin
-GpuKernelToCubinPass::compilePtxToCubinForTesting(const std::string &ptx,
-                                                  Location, StringRef) {
-  const char data[] = "CUBIN";
-  return std::make_unique<std::vector<char>>(data, data + sizeof(data) - 1);
-}
-
-OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
-                                                      Location loc,
-                                                      StringRef name) {
-  std::unique_ptr<llvm::TargetMachine> targetMachine;
-  {
-    std::string error;
-    // TODO(herhut): Make triple configurable.
-    constexpr const char *cudaTriple = "nvptx64-nvidia-cuda";
-    llvm::Triple triple(cudaTriple);
-    const llvm::Target *target =
-        llvm::TargetRegistry::lookupTarget("", triple, error);
-    if (target == nullptr) {
-      emitError(loc, "cannot initialize target triple");
-      return {};
-    }
-    targetMachine.reset(
-        target->createTargetMachine(triple.str(), "sm_35", "+ptx60", {}, {}));
-  }
-
-  // Set the data layout of the llvm module to match what the ptx target needs.
-  llvmModule.setDataLayout(targetMachine->createDataLayout());
-
-  auto ptx = translateModuleToPtx(llvmModule, *targetMachine);
-
-  return cubinGenerator(ptx, loc, name);
-}
-
-StringAttr GpuKernelToCubinPass::translateGpuModuleToCubinAnnotation(
-    llvm::Module &llvmModule, Location loc, StringRef name) {
-  auto cubin = convertModuleToCubin(llvmModule, loc, name);
-  if (!cubin)
-    return {};
-  return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator) {
-  return std::make_unique<GpuKernelToCubinPass>(cubinGenerator);
-}
-
-static PassRegistration<GpuKernelToCubinPass>
-    pass("test-kernel-to-cubin",
-         "Convert all kernel functions to CUDA cubin blobs");
diff --git a/third_party/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/third_party/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
deleted file mode 100644
index f342083bee7..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
+++ /dev/null
@@ -1,434 +0,0 @@
-//===- ConvertLaunchFuncToCudaCalls.cpp - MLIR CUDA lowering passes -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to convert gpu.launch_func op into a sequence of
-// CUDA runtime calls. As the CUDA runtime does not have a stable published ABI,
-// this pass uses a slim runtime layer that builds on top of the public API from
-// the CUDA headers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
-
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Pass/Pass.h"
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/FormatVariadic.h"
-
-using namespace mlir;
-
-// To avoid name mangling, these are defined in the mini-runtime file.
-static constexpr const char *cuModuleLoadName = "mcuModuleLoad";
-static constexpr const char *cuModuleGetFunctionName = "mcuModuleGetFunction";
-static constexpr const char *cuLaunchKernelName = "mcuLaunchKernel";
-static constexpr const char *cuGetStreamHelperName = "mcuGetStreamHelper";
-static constexpr const char *cuStreamSynchronizeName = "mcuStreamSynchronize";
-static constexpr const char *kMcuMemHostRegister = "mcuMemHostRegister";
-
-static constexpr const char *kCubinAnnotation = "nvvm.cubin";
-static constexpr const char *kCubinStorageSuffix = "_cubin_cst";
-
-namespace {
-
-/// A pass to convert gpu.launch_func operations into a sequence of CUDA
-/// runtime calls.
-///
-/// In essence, a gpu.launch_func operations gets compiled into the following
-/// sequence of runtime calls:
-///
-/// * mcuModuleLoad        -- loads the module given the cubin data
-/// * mcuModuleGetFunction -- gets a handle to the actual kernel function
-/// * mcuGetStreamHelper   -- initializes a new CUDA stream
-/// * mcuLaunchKernelName  -- launches the kernel on a stream
-/// * mcuStreamSynchronize -- waits for operations on the stream to finish
-///
-/// Intermediate data structures are allocated on the stack.
-class GpuLaunchFuncToCudaCallsPass
-    : public ModulePass<GpuLaunchFuncToCudaCallsPass> {
-private:
-  LLVM::LLVMDialect *getLLVMDialect() { return llvmDialect; }
-
-  llvm::LLVMContext &getLLVMContext() {
-    return getLLVMDialect()->getLLVMContext();
-  }
-
-  void initializeCachedTypes() {
-    const llvm::Module &module = llvmDialect->getLLVMModule();
-    llvmVoidType = LLVM::LLVMType::getVoidTy(llvmDialect);
-    llvmPointerType = LLVM::LLVMType::getInt8PtrTy(llvmDialect);
-    llvmPointerPointerType = llvmPointerType.getPointerTo();
-    llvmInt8Type = LLVM::LLVMType::getInt8Ty(llvmDialect);
-    llvmInt32Type = LLVM::LLVMType::getInt32Ty(llvmDialect);
-    llvmInt64Type = LLVM::LLVMType::getInt64Ty(llvmDialect);
-    llvmIntPtrType = LLVM::LLVMType::getIntNTy(
-        llvmDialect, module.getDataLayout().getPointerSizeInBits());
-  }
-
-  LLVM::LLVMType getVoidType() { return llvmVoidType; }
-
-  LLVM::LLVMType getPointerType() { return llvmPointerType; }
-
-  LLVM::LLVMType getPointerPointerType() { return llvmPointerPointerType; }
-
-  LLVM::LLVMType getInt8Type() { return llvmInt8Type; }
-
-  LLVM::LLVMType getInt32Type() { return llvmInt32Type; }
-
-  LLVM::LLVMType getInt64Type() { return llvmInt64Type; }
-
-  LLVM::LLVMType getIntPtrType() {
-    const llvm::Module &module = getLLVMDialect()->getLLVMModule();
-    return LLVM::LLVMType::getIntNTy(
-        getLLVMDialect(), module.getDataLayout().getPointerSizeInBits());
-  }
-
-  LLVM::LLVMType getCUResultType() {
-    // This is declared as an enum in CUDA but helpers use i32.
-    return getInt32Type();
-  }
-
-  // Allocate a void pointer on the stack.
-  Value *allocatePointer(OpBuilder &builder, Location loc) {
-    auto one = builder.create<LLVM::ConstantOp>(loc, getInt32Type(),
-                                                builder.getI32IntegerAttr(1));
-    return builder.create<LLVM::AllocaOp>(loc, getPointerPointerType(), one,
-                                          /*alignment=*/0);
-  }
-
-  void declareCudaFunctions(Location loc);
-  Value *setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder);
-  Value *generateKernelNameConstant(StringRef name, Location loc,
-                                    OpBuilder &builder);
-  void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp);
-
-public:
-  // Run the dialect converter on the module.
-  void runOnModule() override {
-    // Cache the LLVMDialect for the current module.
-    llvmDialect = getContext().getRegisteredDialect<LLVM::LLVMDialect>();
-    // Cache the used LLVM types.
-    initializeCachedTypes();
-
-    getModule().walk([this](mlir::gpu::LaunchFuncOp op) {
-      translateGpuLaunchCalls(op);
-    });
-
-    // GPU kernel modules are no longer necessary since we have a global
-    // constant with the CUBIN data.
-    for (auto m : llvm::make_early_inc_range(getModule().getOps<ModuleOp>()))
-      if (m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
-        m.erase();
-  }
-
-private:
-  LLVM::LLVMDialect *llvmDialect;
-  LLVM::LLVMType llvmVoidType;
-  LLVM::LLVMType llvmPointerType;
-  LLVM::LLVMType llvmPointerPointerType;
-  LLVM::LLVMType llvmInt8Type;
-  LLVM::LLVMType llvmInt32Type;
-  LLVM::LLVMType llvmInt64Type;
-  LLVM::LLVMType llvmIntPtrType;
-};
-
-} // anonymous namespace
-
-// Adds declarations for the needed helper functions from the CUDA wrapper.
-// The types in comments give the actual types expected/returned but the API
-// uses void pointers. This is fine as they have the same linkage in C.
-void GpuLaunchFuncToCudaCallsPass::declareCudaFunctions(Location loc) {
-  ModuleOp module = getModule();
-  OpBuilder builder(module.getBody()->getTerminator());
-  if (!module.lookupSymbol(cuModuleLoadName)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, cuModuleLoadName,
-        LLVM::LLVMType::getFunctionTy(
-            getCUResultType(),
-            {
-                getPointerPointerType(), /* CUmodule *module */
-                getPointerType()         /* void *cubin */
-            },
-            /*isVarArg=*/false));
-  }
-  if (!module.lookupSymbol(cuModuleGetFunctionName)) {
-    // The helper uses void* instead of CUDA's opaque CUmodule and
-    // CUfunction.
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, cuModuleGetFunctionName,
-        LLVM::LLVMType::getFunctionTy(
-            getCUResultType(),
-            {
-                getPointerPointerType(), /* void **function */
-                getPointerType(),        /* void *module */
-                getPointerType()         /* char *name */
-            },
-            /*isVarArg=*/false));
-  }
-  if (!module.lookupSymbol(cuLaunchKernelName)) {
-    // Other than the CUDA api, the wrappers use uintptr_t to match the
-    // LLVM type if MLIR's index type, which the GPU dialect uses.
-    // Furthermore, they use void* instead of CUDA's opaque CUfunction and
-    // CUstream.
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, cuLaunchKernelName,
-        LLVM::LLVMType::getFunctionTy(
-            getCUResultType(),
-            {
-                getPointerType(),        /* void* f */
-                getIntPtrType(),         /* intptr_t gridXDim */
-                getIntPtrType(),         /* intptr_t gridyDim */
-                getIntPtrType(),         /* intptr_t gridZDim */
-                getIntPtrType(),         /* intptr_t blockXDim */
-                getIntPtrType(),         /* intptr_t blockYDim */
-                getIntPtrType(),         /* intptr_t blockZDim */
-                getInt32Type(),          /* unsigned int sharedMemBytes */
-                getPointerType(),        /* void *hstream */
-                getPointerPointerType(), /* void **kernelParams */
-                getPointerPointerType()  /* void **extra */
-            },
-            /*isVarArg=*/false));
-  }
-  if (!module.lookupSymbol(cuGetStreamHelperName)) {
-    // Helper function to get the current CUDA stream. Uses void* instead of
-    // CUDAs opaque CUstream.
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, cuGetStreamHelperName,
-        LLVM::LLVMType::getFunctionTy(getPointerType(), /*isVarArg=*/false));
-  }
-  if (!module.lookupSymbol(cuStreamSynchronizeName)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, cuStreamSynchronizeName,
-        LLVM::LLVMType::getFunctionTy(getCUResultType(),
-                                      getPointerType() /* CUstream stream */,
-                                      /*isVarArg=*/false));
-  }
-  if (!module.lookupSymbol(kMcuMemHostRegister)) {
-    builder.create<LLVM::LLVMFuncOp>(
-        loc, kMcuMemHostRegister,
-        LLVM::LLVMType::getFunctionTy(getVoidType(),
-                                      {
-                                          getPointerType(), /* void *ptr */
-                                          getInt64Type()    /* int64 sizeBytes*/
-                                      },
-                                      /*isVarArg=*/false));
-  }
-}
-
-// Generates a parameters array to be used with a CUDA kernel launch call. The
-// arguments are extracted from the launchOp.
-// The generated code is essentially as follows:
-//
-// %array = alloca(numparams * sizeof(void *))
-// for (i : [0, NumKernelOperands))
-//   %array[i] = cast<void*>(KernelOperand[i])
-// return %array
-Value *
-GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp,
-                                               OpBuilder &builder) {
-  auto numKernelOperands = launchOp.getNumKernelOperands();
-  Location loc = launchOp.getLoc();
-  auto one = builder.create<LLVM::ConstantOp>(loc, getInt32Type(),
-                                              builder.getI32IntegerAttr(1));
-  // Provision twice as much for the `array` to allow up to one level of
-  // indirection for each argument.
-  auto arraySize = builder.create<LLVM::ConstantOp>(
-      loc, getInt32Type(), builder.getI32IntegerAttr(numKernelOperands));
-  auto array = builder.create<LLVM::AllocaOp>(loc, getPointerPointerType(),
-                                              arraySize, /*alignment=*/0);
-  for (unsigned idx = 0; idx < numKernelOperands; ++idx) {
-    auto operand = launchOp.getKernelOperand(idx);
-    auto llvmType = operand->getType().cast<LLVM::LLVMType>();
-    Value *memLocation = builder.create<LLVM::AllocaOp>(
-        loc, llvmType.getPointerTo(), one, /*alignment=*/1);
-    builder.create<LLVM::StoreOp>(loc, operand, memLocation);
-    auto casted =
-        builder.create<LLVM::BitcastOp>(loc, getPointerType(), memLocation);
-
-    // Assume all struct arguments come from MemRef. If this assumption does not
-    // hold anymore then we `launchOp` to lower from MemRefType and not after
-    // LLVMConversion has taken place and the MemRef information is lost.
-    // Extra level of indirection in the `array`:
-    //   the descriptor pointer is registered via @mcuMemHostRegisterPtr
-    if (llvmType.isStructTy()) {
-      auto registerFunc =
-          getModule().lookupSymbol<LLVM::LLVMFuncOp>(kMcuMemHostRegister);
-      auto nullPtr = builder.create<LLVM::NullOp>(loc, llvmType.getPointerTo());
-      auto gep = builder.create<LLVM::GEPOp>(loc, llvmType.getPointerTo(),
-                                             ArrayRef<Value *>{nullPtr, one});
-      auto size = builder.create<LLVM::PtrToIntOp>(loc, getInt64Type(), gep);
-      builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{},
-                                   builder.getSymbolRefAttr(registerFunc),
-                                   ArrayRef<Value *>{casted, size});
-      Value *memLocation = builder.create<LLVM::AllocaOp>(
-          loc, getPointerPointerType(), one, /*alignment=*/1);
-      builder.create<LLVM::StoreOp>(loc, casted, memLocation);
-      casted =
-          builder.create<LLVM::BitcastOp>(loc, getPointerType(), memLocation);
-    }
-
-    auto index = builder.create<LLVM::ConstantOp>(
-        loc, getInt32Type(), builder.getI32IntegerAttr(idx));
-    auto gep = builder.create<LLVM::GEPOp>(loc, getPointerPointerType(), array,
-                                           ArrayRef<Value *>{index});
-    builder.create<LLVM::StoreOp>(loc, casted, gep);
-  }
-  return array;
-}
-
-// Generates an LLVM IR dialect global that contains the name of the given
-// kernel function as a C string, and returns a pointer to its beginning.
-// The code is essentially:
-//
-// llvm.global constant @kernel_name("function_name\00")
-// func(...) {
-//   %0 = llvm.addressof @kernel_name
-//   %1 = llvm.constant (0 : index)
-//   %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*">
-// }
-Value *GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant(
-    StringRef name, Location loc, OpBuilder &builder) {
-  // Make sure the trailing zero is included in the constant.
-  std::vector<char> kernelName(name.begin(), name.end());
-  kernelName.push_back('\0');
-
-  std::string globalName = llvm::formatv("{0}_kernel_name", name);
-  return LLVM::createGlobalString(
-      loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
-      LLVM::Linkage::Internal, llvmDialect);
-}
-
-// Emits LLVM IR to launch a kernel function. Expects the module that contains
-// the compiled kernel function as a cubin in the 'nvvm.cubin' attribute of the
-// kernel function in the IR.
-// While MLIR has no global constants, also expects a cubin getter function in
-// an 'nvvm.cubingetter' attribute. Such function is expected to return a
-// pointer to the cubin blob when invoked.
-// With these given, the generated code in essence is
-//
-// %0 = call %cubingetter
-// %1 = alloca sizeof(void*)
-// call %mcuModuleLoad(%2, %1)
-// %2 = alloca sizeof(void*)
-// %3 = load %1
-// %4 = <see generateKernelNameConstant>
-// call %mcuModuleGetFunction(%2, %3, %4)
-// %5 = call %mcuGetStreamHelper()
-// %6 = load %2
-// %7 = <see setupParamsArray>
-// call %mcuLaunchKernel(%6, <launchOp operands 0..5>, 0, %5, %7, nullptr)
-// call %mcuStreamSynchronize(%5)
-void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
-    mlir::gpu::LaunchFuncOp launchOp) {
-  OpBuilder builder(launchOp);
-  Location loc = launchOp.getLoc();
-  declareCudaFunctions(loc);
-
-  auto zero = builder.create<LLVM::ConstantOp>(loc, getInt32Type(),
-                                               builder.getI32IntegerAttr(0));
-  // Create an LLVM global with CUBIN extracted from the kernel annotation and
-  // obtain a pointer to the first byte in it.
-  auto kernelModule =
-      getModule().lookupSymbol<ModuleOp>(launchOp.getKernelModuleName());
-  assert(kernelModule && "expected a kernel module");
-
-  auto cubinAttr = kernelModule.getAttrOfType<StringAttr>(kCubinAnnotation);
-  if (!cubinAttr) {
-    kernelModule.emitOpError()
-        << "missing " << kCubinAnnotation << " attribute";
-    return signalPassFailure();
-  }
-
-  assert(kernelModule.getName() && "expected a named module");
-  SmallString<128> nameBuffer(*kernelModule.getName());
-  nameBuffer.append(kCubinStorageSuffix);
-  Value *data = LLVM::createGlobalString(
-      loc, builder, nameBuffer.str(), cubinAttr.getValue(),
-      LLVM::Linkage::Internal, getLLVMDialect());
-
-  // Emit the load module call to load the module data. Error checking is done
-  // in the called helper function.
-  auto cuModule = allocatePointer(builder, loc);
-  auto cuModuleLoad =
-      getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleLoadName);
-  builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()},
-                               builder.getSymbolRefAttr(cuModuleLoad),
-                               ArrayRef<Value *>{cuModule, data});
-  // Get the function from the module. The name corresponds to the name of
-  // the kernel function.
-  auto cuOwningModuleRef =
-      builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule);
-  auto kernelName = generateKernelNameConstant(launchOp.kernel(), loc, builder);
-  auto cuFunction = allocatePointer(builder, loc);
-  auto cuModuleGetFunction =
-      getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleGetFunctionName);
-  builder.create<LLVM::CallOp>(
-      loc, ArrayRef<Type>{getCUResultType()},
-      builder.getSymbolRefAttr(cuModuleGetFunction),
-      ArrayRef<Value *>{cuFunction, cuOwningModuleRef, kernelName});
-  // Grab the global stream needed for execution.
-  auto cuGetStreamHelper =
-      getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuGetStreamHelperName);
-  auto cuStream = builder.create<LLVM::CallOp>(
-      loc, ArrayRef<Type>{getPointerType()},
-      builder.getSymbolRefAttr(cuGetStreamHelper), ArrayRef<Value *>{});
-  // Invoke the function with required arguments.
-  auto cuLaunchKernel =
-      getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuLaunchKernelName);
-  auto cuFunctionRef =
-      builder.create<LLVM::LoadOp>(loc, getPointerType(), cuFunction);
-  auto paramsArray = setupParamsArray(launchOp, builder);
-  auto nullpointer =
-      builder.create<LLVM::IntToPtrOp>(loc, getPointerPointerType(), zero);
-  builder.create<LLVM::CallOp>(
-      loc, ArrayRef<Type>{getCUResultType()},
-      builder.getSymbolRefAttr(cuLaunchKernel),
-      ArrayRef<Value *>{cuFunctionRef, launchOp.getOperand(0),
-                        launchOp.getOperand(1), launchOp.getOperand(2),
-                        launchOp.getOperand(3), launchOp.getOperand(4),
-                        launchOp.getOperand(5), zero, /* sharedMemBytes */
-                        cuStream.getResult(0),        /* stream */
-                        paramsArray,                  /* kernel params */
-                        nullpointer /* extra */});
-  // Sync on the stream to make it synchronous.
-  auto cuStreamSync =
-      getModule().lookupSymbol<LLVM::LLVMFuncOp>(cuStreamSynchronizeName);
-  builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getCUResultType()},
-                               builder.getSymbolRefAttr(cuStreamSync),
-                               ArrayRef<Value *>(cuStream.getResult(0)));
-  launchOp.erase();
-}
-
-std::unique_ptr<mlir::OpPassBase<mlir::ModuleOp>>
-mlir::createConvertGpuLaunchFuncToCudaCallsPass() {
-  return std::make_unique<GpuLaunchFuncToCudaCallsPass>();
-}
-
-static PassRegistration<GpuLaunchFuncToCudaCallsPass>
-    pass("launch-func-to-cuda",
-         "Convert all launch_func ops to CUDA runtime calls");
diff --git a/third_party/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt b/third_party/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
deleted file mode 100644
index b5df446abe1..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS GPUToNVVM.td)
-mlir_tablegen(GPUToNVVM.cpp.inc -gen-rewriters)
-add_public_tablegen_target(MLIRGPUToNVVMIncGen)
-
-add_llvm_library(MLIRGPUtoNVVMTransforms
-  LowerGpuOpsToNVVMOps.cpp
-  )
-
-add_dependencies(MLIRGPUtoNVVMTransforms
-  MLIRGPUToNVVMIncGen)
-
-target_link_libraries(MLIRGPUtoNVVMTransforms
-  LLVMSupport
-  MLIRGPU
-  MLIRLLVMIR
-  MLIRNVVMIR
-  MLIRPass
-  )
diff --git a/third_party/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td b/third_party/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
deleted file mode 100644
index 8c27ba49686..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
+++ /dev/null
@@ -1,30 +0,0 @@
-//==-- GPUToNVVM.td - GPU Ops to NVVM Patterns ---------------*- tablegen -*==//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Defines Patterns to lower GPU ops to NVVM.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_GPUTONVVM_TD
-#define MLIR_CONVERSION_GPUTONVVM_TD
-
-include "mlir/Dialect/GPU/GPUOps.td"
-include "mlir/Dialect/LLVMIR/NVVMOps.td"
-
-def : Pat<(GPU_BarrierOp), (NVVM_Barrier0Op)>;
-
-#endif // MLIR_CONVERSION_GPUTONVVM_TD
diff --git a/third_party/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/third_party/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
deleted file mode 100644
index e4bdd7cb2be..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ /dev/null
@@ -1,645 +0,0 @@
-//===- LowerGpuOpsToNVVMOps.cpp - MLIR GPU to NVVM lowering passes --------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to generate NVVMIR operations for higher-level
-// GPU operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
-
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-#include "llvm/Support/FormatVariadic.h"
-
-#include "../GPUCommon/IndexIntrinsicsOpLowering.h"
-#include "../GPUCommon/OpToFuncCallLowering.h"
-
-using namespace mlir;
-
-namespace {
-
-/// Converts all_reduce op to LLVM/NVVM ops.
-struct GPUAllReduceOpLowering : public LLVMOpLowering {
-  using AccumulatorFactory = std::function<Value *(
-      Location, Value *, Value *, ConversionPatternRewriter &)>;
-
-  explicit GPUAllReduceOpLowering(LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(gpu::AllReduceOp::getOperationName(),
-                       lowering_.getDialect()->getContext(), lowering_),
-        int32Type(LLVM::LLVMType::getInt32Ty(lowering_.getDialect())) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    Location loc = op->getLoc();
-    Value *operand = operands.front();
-
-    // TODO(csigg): Generalize to other types of accumulation.
-    assert(op->getOperand(0)->getType().isIntOrFloat());
-
-    // Create the reduction using an accumulator factory.
-    AccumulatorFactory factory =
-        getFactory(cast<gpu::AllReduceOp>(op), operand);
-    assert(factory && "failed to create accumulator factory");
-    Value *result = createBlockReduce(loc, operand, factory, rewriter);
-
-    rewriter.replaceOp(op, {result});
-    return matchSuccess();
-  }
-
-private:
-  /// Returns an accumulator factory using either the op attribute or the body
-  /// region.
-  AccumulatorFactory getFactory(gpu::AllReduceOp allReduce,
-                                Value *operand) const {
-    if (!allReduce.body().empty()) {
-      return getFactory(allReduce.body());
-    }
-    if (allReduce.op()) {
-      auto type = operand->getType().cast<LLVM::LLVMType>();
-      return getFactory(*allReduce.op(), type.getUnderlyingType());
-    }
-    return AccumulatorFactory();
-  }
-
-  /// Returns an accumulator factory that clones the body. The body's entry
-  /// block is expected to have 2 arguments. The gpu.yield return the
-  /// accumulated value of the same type.
-  AccumulatorFactory getFactory(Region &body) const {
-    return AccumulatorFactory([&](Location loc, Value *lhs, Value *rhs,
-                                  ConversionPatternRewriter &rewriter) {
-      Block *block = rewriter.getInsertionBlock();
-      Block *split = rewriter.splitBlock(block, rewriter.getInsertionPoint());
-
-      // Insert accumulator body between split block.
-      BlockAndValueMapping mapping;
-      mapping.map(body.front().getArgument(0), lhs);
-      mapping.map(body.front().getArgument(1), rhs);
-      rewriter.cloneRegionBefore(body, *split->getParent(),
-                                 split->getIterator(), mapping);
-
-      // Add branch before inserted body, into body.
-      block = block->getNextNode();
-      rewriter.create<LLVM::BrOp>(loc, ArrayRef<Value *>{},
-                                  llvm::makeArrayRef(block), ValueRange());
-
-      // Replace all gpu.yield ops with branch out of body.
-      for (; block != split; block = block->getNextNode()) {
-        Operation *terminator = block->getTerminator();
-        if (!llvm::isa<gpu::YieldOp>(terminator))
-          continue;
-        rewriter.setInsertionPointToEnd(block);
-        rewriter.replaceOpWithNewOp<LLVM::BrOp>(
-            terminator, ArrayRef<Value *>{}, llvm::makeArrayRef(split),
-            ValueRange(terminator->getOperand(0)));
-      }
-
-      // Return accumulator result.
-      rewriter.setInsertionPointToStart(split);
-      return split->addArgument(lhs->getType());
-    });
-  }
-
-  /// Returns an accumulator factory that creates an op specified by opName.
-  AccumulatorFactory getFactory(StringRef opName, llvm::Type *type) const {
-    if (type->isVectorTy() || type->isArrayTy())
-      return getFactory(opName, type->getSequentialElementType());
-
-    bool isFloatingPoint = type->isFloatingPointTy();
-
-    if (opName == "add") {
-      return isFloatingPoint ? getFactory<LLVM::FAddOp>()
-                             : getFactory<LLVM::AddOp>();
-    }
-    if (opName == "mul") {
-      return isFloatingPoint ? getFactory<LLVM::FMulOp>()
-                             : getFactory<LLVM::MulOp>();
-    }
-
-    return AccumulatorFactory();
-  }
-
-  /// Returns an accumulator factory that creates an op of type T.
-  template <typename T> AccumulatorFactory getFactory() const {
-    return [](Location loc, Value *lhs, Value *rhs,
-              ConversionPatternRewriter &rewriter) {
-      return rewriter.create<T>(loc, lhs->getType(), lhs, rhs);
-    };
-  }
-
-  /// Creates an all_reduce across the block.
-  ///
-  /// First reduce the elements within a warp. The first thread of each warp
-  /// writes the intermediate result to shared memory. After synchronizing the
-  /// block, the first warp reduces the values from shared memory. The result
-  /// is broadcasted to all threads through shared memory.
-  ///
-  ///     %warp_reduce = `createWarpReduce(%operand)`
-  ///     %shared_mem_ptr = llvm.mlir.addressof @reduce_buffer
-  ///     %zero = llvm.mlir.constant(0 : i32) : !llvm.i32
-  ///     %lane_id = nvvm.read.ptx.sreg.laneid  : !llvm.i32
-  ///     %is_first_lane = llvm.icmp "eq" %lane_id, %zero : !llvm.i1
-  ///     %thread_idx = `getLinearThreadIndex()` : !llvm.i32
-  ///     llvm.cond_br %is_first_lane, ^then1, ^continue1
-  ///   ^then1:
-  ///     %warp_id = `getWarpId()`
-  ///     %store_dst = llvm.getelementptr %shared_mem_ptr[%zero, %warp_id]
-  ///     llvm.store %store_dst, %warp_reduce
-  ///     llvm.br ^continue1
-  ///   ^continue1:
-  ///     nvvm.barrier0
-  ///     %num_warps = `getNumWarps()` : !llvm.i32
-  ///     %is_valid_warp = llvm.icmp "slt" %thread_idx, %num_warps
-  ///     %result_ptr = llvm.getelementptr %shared_mem_ptr[%zero, %zero]
-  ///     llvm.cond_br %is_first_lane, ^then2, ^continue2
-  ///   ^then2:
-  ///     %load_src = llvm.getelementptr %shared_mem_ptr[%zero, %thread_idx]
-  ///     %value = llvm.load %load_src
-  ///     %result = `createWarpReduce(%value)`
-  ///     llvm.store %result_ptr, %result
-  ///     llvm.br ^continue2
-  ///   ^continue2:
-  ///     nvvm.barrier0
-  ///     %result = llvm.load %result_ptr
-  ///     return %result
-  ///
-  Value *createBlockReduce(Location loc, Value *operand,
-                           AccumulatorFactory &accumFactory,
-                           ConversionPatternRewriter &rewriter) const {
-    auto type = operand->getType().cast<LLVM::LLVMType>();
-
-    // Create shared memory array to store the warp reduction.
-    auto module = operand->getDefiningOp()->getParentOfType<ModuleOp>();
-    assert(module && "op must belong to a module");
-    Value *sharedMemPtr =
-        createSharedMemoryArray(loc, module, type, kWarpSize, rewriter);
-
-    Value *zero = rewriter.create<LLVM::ConstantOp>(
-        loc, int32Type, rewriter.getI32IntegerAttr(0u));
-    Value *laneId = rewriter.create<NVVM::LaneIdOp>(loc, int32Type);
-    Value *isFirstLane = rewriter.create<LLVM::ICmpOp>(
-        loc, LLVM::ICmpPredicate::eq, laneId, zero);
-    Value *threadIdx = getLinearThreadIndex(loc, rewriter);
-    Value *blockSize = getBlockSize(loc, rewriter);
-    Value *activeWidth = getActiveWidth(loc, threadIdx, blockSize, rewriter);
-
-    // Reduce elements within each warp to produce the intermediate results.
-    Value *warpReduce = createWarpReduce(loc, activeWidth, laneId, operand,
-                                         accumFactory, rewriter);
-
-    // Write the intermediate results to shared memory, using the first lane of
-    // each warp.
-    createPredicatedBlock(loc, rewriter, isFirstLane, [&] {
-      Value *warpId = getDivideByWarpSize(threadIdx, rewriter);
-      Value *storeDst = rewriter.create<LLVM::GEPOp>(
-          loc, type, sharedMemPtr, ArrayRef<Value *>({zero, warpId}));
-      rewriter.create<LLVM::StoreOp>(loc, warpReduce, storeDst);
-    });
-    rewriter.create<NVVM::Barrier0Op>(loc);
-
-    Value *numWarps = getNumWarps(loc, blockSize, rewriter);
-    Value *isValidWarp = rewriter.create<LLVM::ICmpOp>(
-        loc, LLVM::ICmpPredicate::slt, threadIdx, numWarps);
-    Value *resultPtr = rewriter.create<LLVM::GEPOp>(
-        loc, type, sharedMemPtr, ArrayRef<Value *>({zero, zero}));
-
-    // Use the first numWarps threads to reduce the intermediate results from
-    // shared memory. The final result is written to shared memory again.
-    createPredicatedBlock(loc, rewriter, isValidWarp, [&] {
-      Value *loadSrc = rewriter.create<LLVM::GEPOp>(
-          loc, type, sharedMemPtr, ArrayRef<Value *>({zero, threadIdx}));
-      Value *value = rewriter.create<LLVM::LoadOp>(loc, type, loadSrc);
-      Value *result = createWarpReduce(loc, numWarps, laneId, value,
-                                       accumFactory, rewriter);
-      rewriter.create<LLVM::StoreOp>(loc, result, resultPtr);
-    });
-    rewriter.create<NVVM::Barrier0Op>(loc);
-
-    // Load and return result from shared memory.
-    Value *result = rewriter.create<LLVM::LoadOp>(loc, type, resultPtr);
-    return result;
-  }
-
-  /// Creates an if-block skeleton and calls the two factories to generate the
-  /// ops in the `then` and `else` block..
-  ///
-  ///     llvm.cond_br %condition, ^then, ^continue
-  ///   ^then:
-  ///     %then_operands = `thenOpsFactory()`
-  ///     llvm.br ^continue(%then_operands)
-  ///   ^else:
-  ///     %else_operands = `elseOpsFactory()`
-  ///     llvm.br ^continue(%else_operands)
-  ///   ^continue(%block_operands):
-  ///
-  template <typename ThenOpsFactory, typename ElseOpsFactory>
-  void createIf(Location loc, ConversionPatternRewriter &rewriter,
-                Value *condition, ThenOpsFactory &&thenOpsFactory,
-                ElseOpsFactory &&elseOpsFactory) const {
-    Block *currentBlock = rewriter.getInsertionBlock();
-    auto currentPoint = rewriter.getInsertionPoint();
-
-    Block *thenBlock = rewriter.splitBlock(currentBlock, currentPoint);
-    Block *elseBlock = rewriter.splitBlock(thenBlock, thenBlock->begin());
-    Block *continueBlock = rewriter.splitBlock(elseBlock, elseBlock->begin());
-
-    rewriter.setInsertionPointToEnd(currentBlock);
-    rewriter.create<LLVM::CondBrOp>(loc, llvm::makeArrayRef(condition),
-                                    ArrayRef<Block *>{thenBlock, elseBlock});
-
-    auto addBranch = [&](ValueRange operands) {
-      rewriter.create<LLVM::BrOp>(loc, ArrayRef<Value *>{},
-                                  llvm::makeArrayRef(continueBlock),
-                                  llvm::makeArrayRef(operands));
-    };
-
-    rewriter.setInsertionPointToStart(thenBlock);
-    auto thenOperands = thenOpsFactory();
-    addBranch(thenOperands);
-
-    rewriter.setInsertionPointToStart(elseBlock);
-    auto elseOperands = elseOpsFactory();
-    addBranch(elseOperands);
-
-    assert(thenOperands.size() == elseOperands.size());
-    rewriter.setInsertionPointToStart(continueBlock);
-    for (auto *operand : thenOperands)
-      continueBlock->addArgument(operand->getType());
-  }
-
-  /// Shortcut for createIf with empty else block and no block operands.
-  template <typename Factory>
-  void createPredicatedBlock(Location loc, ConversionPatternRewriter &rewriter,
-                             Value *condition,
-                             Factory &&predicatedOpsFactory) const {
-    createIf(
-        loc, rewriter, condition,
-        [&] {
-          predicatedOpsFactory();
-          return ArrayRef<Value *>();
-        },
-        [&] { return ArrayRef<Value *>(); });
-  }
-
-  /// Creates a reduction across the first activeWidth lanes of a warp.
-  /// The first lane returns the result, all others return values are undefined.
-  Value *createWarpReduce(Location loc, Value *activeWidth, Value *laneId,
-                          Value *operand, AccumulatorFactory accumFactory,
-                          ConversionPatternRewriter &rewriter) const {
-    Value *warpSize = rewriter.create<LLVM::ConstantOp>(
-        loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize));
-    Value *isPartialWarp = rewriter.create<LLVM::ICmpOp>(
-        loc, LLVM::ICmpPredicate::slt, activeWidth, warpSize);
-    auto type = operand->getType().cast<LLVM::LLVMType>();
-
-    createIf(
-        loc, rewriter, isPartialWarp,
-        // Generate reduction over a (potentially) partial warp.
-        [&] {
-          Value *value = operand;
-          Value *one = rewriter.create<LLVM::ConstantOp>(
-              loc, int32Type, rewriter.getI32IntegerAttr(1));
-          // Bit mask of active lanes: `(1 << activeWidth) - 1`.
-          Value *activeMask = rewriter.create<LLVM::SubOp>(
-              loc, int32Type,
-              rewriter.create<LLVM::ShlOp>(loc, int32Type, one, activeWidth),
-              one);
-          // Clamp lane: `activeWidth - 1`
-          Value *maskAndClamp =
-              rewriter.create<LLVM::SubOp>(loc, int32Type, activeWidth, one);
-          auto dialect = lowering.getDialect();
-          auto predTy = LLVM::LLVMType::getInt1Ty(dialect);
-          auto shflTy = LLVM::LLVMType::getStructTy(dialect, {type, predTy});
-          auto returnValueAndIsValidAttr = rewriter.getUnitAttr();
-
-          // Repeatedly shuffle value from 'laneId ^ i' and accumulate if source
-          // lane is within the active range. All lanes contain the final
-          // result, but only the first lane's result is used.
-          for (int i = 1; i < kWarpSize; i <<= 1) {
-            Value *offset = rewriter.create<LLVM::ConstantOp>(
-                loc, int32Type, rewriter.getI32IntegerAttr(i));
-            Value *shfl = rewriter.create<NVVM::ShflBflyOp>(
-                loc, shflTy, activeMask, value, offset, maskAndClamp,
-                returnValueAndIsValidAttr);
-            Value *isActiveSrcLane = rewriter.create<LLVM::ExtractValueOp>(
-                loc, predTy, shfl, rewriter.getIndexArrayAttr(1));
-            // Skip the accumulation if the shuffle op read from a lane outside
-            // of the active range.
-            createIf(
-                loc, rewriter, isActiveSrcLane,
-                [&] {
-                  Value *shflValue = rewriter.create<LLVM::ExtractValueOp>(
-                      loc, type, shfl, rewriter.getIndexArrayAttr(0));
-                  return llvm::SmallVector<Value *, 1>{
-                      accumFactory(loc, value, shflValue, rewriter)};
-                },
-                [&] { return llvm::makeArrayRef(value); });
-            value = rewriter.getInsertionBlock()->getArgument(0);
-          }
-          return llvm::SmallVector<Value *, 1>{value};
-        },
-        // Generate a reduction over the entire warp. This is a specialization
-        // of the above reduction with unconditional accumulation.
-        [&] {
-          Value *value = operand;
-          Value *activeMask = rewriter.create<LLVM::ConstantOp>(
-              loc, int32Type, rewriter.getI32IntegerAttr(~0u));
-          Value *maskAndClamp = rewriter.create<LLVM::ConstantOp>(
-              loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize - 1));
-          for (int i = 1; i < kWarpSize; i <<= 1) {
-            Value *offset = rewriter.create<LLVM::ConstantOp>(
-                loc, int32Type, rewriter.getI32IntegerAttr(i));
-            Value *shflValue = rewriter.create<NVVM::ShflBflyOp>(
-                loc, type, activeMask, value, offset, maskAndClamp,
-                /*return_value_and_is_valid=*/UnitAttr());
-            value = accumFactory(loc, value, shflValue, rewriter);
-          }
-          return llvm::SmallVector<Value *, 1>{value};
-        });
-    return rewriter.getInsertionBlock()->getArgument(0);
-  }
-
-  /// Creates a global array stored in shared memory.
-  Value *createSharedMemoryArray(Location loc, ModuleOp module,
-                                 LLVM::LLVMType elementType, int numElements,
-                                 ConversionPatternRewriter &rewriter) const {
-    OpBuilder builder(module.getBodyRegion());
-
-    auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
-    StringRef name = "reduce_buffer";
-    auto globalOp = builder.create<LLVM::GlobalOp>(
-        loc, arrayType.cast<LLVM::LLVMType>(),
-        /*isConstant=*/false, LLVM::Linkage::Internal, name,
-        /*value=*/Attribute(), gpu::GPUDialect::getWorkgroupAddressSpace());
-
-    return rewriter.create<LLVM::AddressOfOp>(loc, globalOp);
-  }
-
-  /// Returns the index of the thread within the block.
-  Value *getLinearThreadIndex(Location loc,
-                              ConversionPatternRewriter &rewriter) const {
-    Value *dimX = rewriter.create<NVVM::BlockDimXOp>(loc, int32Type);
-    Value *dimY = rewriter.create<NVVM::BlockDimYOp>(loc, int32Type);
-    Value *idX = rewriter.create<NVVM::ThreadIdXOp>(loc, int32Type);
-    Value *idY = rewriter.create<NVVM::ThreadIdYOp>(loc, int32Type);
-    Value *idZ = rewriter.create<NVVM::ThreadIdZOp>(loc, int32Type);
-    Value *tmp1 = rewriter.create<LLVM::MulOp>(loc, int32Type, idZ, dimY);
-    Value *tmp2 = rewriter.create<LLVM::AddOp>(loc, int32Type, tmp1, idY);
-    Value *tmp3 = rewriter.create<LLVM::MulOp>(loc, int32Type, tmp2, dimX);
-    return rewriter.create<LLVM::AddOp>(loc, int32Type, tmp3, idX);
-  }
-
-  /// Returns the number of threads in the block.
-  Value *getBlockSize(Location loc, ConversionPatternRewriter &rewriter) const {
-    Value *dimX = rewriter.create<NVVM::BlockDimXOp>(loc, int32Type);
-    Value *dimY = rewriter.create<NVVM::BlockDimYOp>(loc, int32Type);
-    Value *dimZ = rewriter.create<NVVM::BlockDimZOp>(loc, int32Type);
-    Value *dimXY = rewriter.create<LLVM::MulOp>(loc, int32Type, dimX, dimY);
-    return rewriter.create<LLVM::MulOp>(loc, int32Type, dimXY, dimZ);
-  }
-
-  /// Returns the number of warps in the block.
-  Value *getNumWarps(Location loc, Value *blockSize,
-                     ConversionPatternRewriter &rewriter) const {
-    auto warpSizeMinusOne = rewriter.create<LLVM::ConstantOp>(
-        loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize - 1));
-    auto biasedBlockSize = rewriter.create<LLVM::AddOp>(
-        loc, int32Type, blockSize, warpSizeMinusOne);
-    return getDivideByWarpSize(biasedBlockSize, rewriter);
-  }
-
-  /// Returns the number of active threads in the warp, not clamped to 32.
-  Value *getActiveWidth(Location loc, Value *threadIdx, Value *blockSize,
-                        ConversionPatternRewriter &rewriter) const {
-    Value *threadIdxMask = rewriter.create<LLVM::ConstantOp>(
-        loc, int32Type, rewriter.getI32IntegerAttr(~(kWarpSize - 1)));
-    Value *numThreadsWithSmallerWarpId =
-        rewriter.create<LLVM::AndOp>(loc, threadIdx, threadIdxMask);
-    return rewriter.create<LLVM::SubOp>(loc, blockSize,
-                                        numThreadsWithSmallerWarpId);
-  }
-
-  /// Returns value divided by the warp size (i.e. 32).
-  Value *getDivideByWarpSize(Value *value,
-                             ConversionPatternRewriter &rewriter) const {
-    auto loc = value->getLoc();
-    auto warpSize = rewriter.create<LLVM::ConstantOp>(
-        loc, int32Type, rewriter.getI32IntegerAttr(kWarpSize));
-    return rewriter.create<LLVM::SDivOp>(loc, int32Type, value, warpSize);
-  }
-
-  LLVM::LLVMType int32Type;
-
-  static constexpr int kWarpSize = 32;
-};
-
-struct GPUFuncOpLowering : LLVMOpLowering {
-  explicit GPUFuncOpLowering(LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(gpu::GPUFuncOp::getOperationName(),
-                       typeConverter.getDialect()->getContext(),
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    assert(operands.empty() && "func op is not expected to have operands");
-    auto gpuFuncOp = cast<gpu::GPUFuncOp>(op);
-    Location loc = gpuFuncOp.getLoc();
-
-    SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
-    workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
-    for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
-      Value *attribution = en.value();
-
-      auto type = attribution->getType().dyn_cast<MemRefType>();
-      assert(type && type.hasStaticShape() && "unexpected type in attribution");
-
-      uint64_t numElements = type.getNumElements();
-
-      auto elementType =
-          lowering.convertType(type.getElementType()).cast<LLVM::LLVMType>();
-      auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
-      std::string name =
-          llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index());
-      auto globalOp = rewriter.create<LLVM::GlobalOp>(
-          gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
-          LLVM::Linkage::Internal, name, /*value=*/Attribute(),
-          gpu::GPUDialect::getWorkgroupAddressSpace());
-      workgroupBuffers.push_back(globalOp);
-    }
-
-    // Rewrite the original GPU function to an LLVM function.
-    // TODO(zinenko): there is a hack in the std->llvm lowering that promotes
-    // structs to pointers that probably needs to be replicated here.
-    auto funcType = lowering.convertType(gpuFuncOp.getType())
-                        .cast<LLVM::LLVMType>()
-                        .getPointerElementTy();
-
-    // Remap proper input types.
-    TypeConverter::SignatureConversion signatureConversion(
-        gpuFuncOp.front().getNumArguments());
-    for (unsigned i = 0, e = funcType.getFunctionNumParams(); i < e; ++i)
-      signatureConversion.addInputs(i, funcType.getFunctionParamType(i));
-
-    // Create the new function operation. Only copy those attributes that are
-    // not specific to function modeling.
-    SmallVector<NamedAttribute, 4> attributes;
-    for (const auto &attr : gpuFuncOp.getAttrs()) {
-      if (attr.first.is(SymbolTable::getSymbolAttrName()) ||
-          attr.first.is(impl::getTypeAttrName()) ||
-          attr.first.is(gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName()))
-        continue;
-      attributes.push_back(attr);
-    }
-    auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
-        gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
-        LLVM::Linkage::External, attributes);
-
-    {
-      // Insert operations that correspond to converted workgroup and private
-      // memory attributions to the body of the function. This must operate on
-      // the original function, before the body region is inlined in the new
-      // function to maintain the relation between block arguments and the
-      // parent operation that assigns their semantics.
-      OpBuilder::InsertionGuard guard(rewriter);
-
-      // Rewrite workgroup memory attributions to addresses of global buffers.
-      rewriter.setInsertionPointToStart(&gpuFuncOp.front());
-      unsigned numProperArguments = gpuFuncOp.getNumArguments();
-      auto i32Type = LLVM::LLVMType::getInt32Ty(lowering.getDialect());
-
-      Value *zero = nullptr;
-      if (!workgroupBuffers.empty())
-        zero = rewriter.create<LLVM::ConstantOp>(loc, i32Type,
-                                                 rewriter.getI32IntegerAttr(0));
-      for (auto en : llvm::enumerate(workgroupBuffers)) {
-        LLVM::GlobalOp global = en.value();
-        Value *address = rewriter.create<LLVM::AddressOfOp>(loc, global);
-        auto elementType = global.getType().getArrayElementType();
-        Value *memory = rewriter.create<LLVM::GEPOp>(
-            loc, elementType.getPointerTo(global.addr_space().getZExtValue()),
-            address, ArrayRef<Value *>{zero, zero});
-
-        // Build a memref descriptor pointing to the buffer to plug with the
-        // existing memref infrastructure. This may use more registers than
-        // otherwise necessary given that memref sizes are fixed, but we can try
-        // and canonicalize that away later.
-        Value *attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()];
-        auto type = attribution->getType().cast<MemRefType>();
-        auto descr = MemRefDescriptor::fromStaticShape(rewriter, loc, lowering,
-                                                       type, memory);
-        signatureConversion.remapInput(numProperArguments + en.index(), descr);
-      }
-
-      // Rewrite private memory attributions to alloca'ed buffers.
-      unsigned numWorkgroupAttributions =
-          gpuFuncOp.getNumWorkgroupAttributions();
-      auto int64Ty = LLVM::LLVMType::getInt64Ty(lowering.getDialect());
-      for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
-        Value *attribution = en.value();
-        auto type = attribution->getType().cast<MemRefType>();
-        assert(type && type.hasStaticShape() &&
-               "unexpected type in attribution");
-
-        auto ptrType = lowering.convertType(type.getElementType())
-                           .cast<LLVM::LLVMType>()
-                           .getPointerTo(type.getMemorySpace());
-        Value *numElements = rewriter.create<LLVM::ConstantOp>(
-            gpuFuncOp.getLoc(), int64Ty,
-            rewriter.getI64IntegerAttr(type.getNumElements()));
-        Value *allocated = rewriter.create<LLVM::AllocaOp>(
-            gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0);
-        auto descr = MemRefDescriptor::fromStaticShape(rewriter, loc, lowering,
-                                                       type, allocated);
-        signatureConversion.remapInput(
-            numProperArguments + numWorkgroupAttributions + en.index(), descr);
-      }
-    }
-
-    rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
-                                llvmFuncOp.end());
-    rewriter.applySignatureConversion(&llvmFuncOp.getBody(),
-                                      signatureConversion);
-
-    rewriter.eraseOp(gpuFuncOp);
-    return matchSuccess();
-  }
-};
-
-/// Import the GPU Ops to NVVM Patterns.
-#include "GPUToNVVM.cpp.inc"
-
-/// A pass that replaces all occurrences of GPU device operations with their
-/// corresponding NVVM equivalent.
-///
-/// This pass only handles device code and is not meant to be run on GPU host
-/// code.
-class LowerGpuOpsToNVVMOpsPass : public ModulePass<LowerGpuOpsToNVVMOpsPass> {
-public:
-  void runOnModule() override {
-    ModuleOp m = getModule();
-    if (!m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
-      return;
-
-    OwningRewritePatternList patterns;
-    LLVMTypeConverter converter(m.getContext());
-    populateStdToLLVMConversionPatterns(converter, patterns);
-    populateGpuToNVVMConversionPatterns(converter, patterns);
-    ConversionTarget target(getContext());
-    target.addIllegalDialect<gpu::GPUDialect>();
-    target.addIllegalOp<LLVM::ExpOp>();
-    target.addIllegalOp<FuncOp>();
-    target.addLegalDialect<LLVM::LLVMDialect>();
-    target.addLegalDialect<NVVM::NVVMDialect>();
-    // TODO(csigg): Remove once we support replacing non-root ops.
-    target.addLegalOp<gpu::YieldOp>();
-    if (failed(applyPartialConversion(m, target, patterns, &converter)))
-      signalPassFailure();
-  }
-};
-
-} // anonymous namespace
-
-void mlir::populateGpuToNVVMConversionPatterns(
-    LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
-  populateWithGenerated(converter.getDialect()->getContext(), &patterns);
-  patterns
-      .insert<GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, NVVM::ThreadIdXOp,
-                                          NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>,
-              GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, NVVM::BlockDimXOp,
-                                          NVVM::BlockDimYOp, NVVM::BlockDimZOp>,
-              GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, NVVM::BlockIdXOp,
-                                          NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
-              GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
-                                          NVVM::GridDimYOp, NVVM::GridDimZOp>,
-              GPUAllReduceOpLowering, GPUFuncOpLowering>(converter);
-  patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
-                                               "__nv_exp");
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>> mlir::createLowerGpuOpsToNVVMOpsPass() {
-  return std::make_unique<LowerGpuOpsToNVVMOpsPass>();
-}
-
-static PassRegistration<LowerGpuOpsToNVVMOpsPass>
-    pass("convert-gpu-to-nvvm", "Generate NVVM operations for gpu operations");
diff --git a/third_party/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt b/third_party/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
deleted file mode 100644
index 3c97e5ca86b..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_llvm_library(MLIRGPUtoROCDLTransforms
-  LowerGpuOpsToROCDLOps.cpp
-  )
-target_link_libraries(MLIRGPUtoROCDLTransforms
-  LLVMSupport
-  MLIRGPU
-  MLIRLLVMIR
-  MLIRROCDLIR
-  MLIRPass
-  )
diff --git a/third_party/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/third_party/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
deleted file mode 100644
index 59892dbcee8..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-//===- LowerGpuOpsToROCDLOps.cpp - MLIR GPU to ROCDL lowering passes ------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to generate ROCDLIR operations for higher-level
-// GPU operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
-
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-#include "../GPUCommon/IndexIntrinsicsOpLowering.h"
-#include "../GPUCommon/OpToFuncCallLowering.h"
-
-using namespace mlir;
-
-namespace {
-
-// A pass that replaces all occurrences of GPU device operations with their
-// corresponding ROCDL equivalent.
-//
-// This pass only handles device code and is not meant to be run on GPU host
-// code.
-class LowerGpuOpsToROCDLOpsPass : public ModulePass<LowerGpuOpsToROCDLOpsPass> {
-public:
-  void runOnModule() override {
-    ModuleOp m = getModule();
-    if (!m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
-      return;
-
-    OwningRewritePatternList patterns;
-    LLVMTypeConverter converter(m.getContext());
-    populateStdToLLVMConversionPatterns(converter, patterns);
-    patterns.insert<
-        GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
-                                    ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
-                                    ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, ROCDL::BlockIdXOp,
-                                    ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>,
-        GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp,
-                                    ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
-        converter);
-    patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "_ocml_exp_f32",
-                                                 "_ocml_exp_f64");
-
-    ConversionTarget target(getContext());
-    target.addLegalDialect<LLVM::LLVMDialect, ROCDL::ROCDLDialect>();
-    target.addIllegalOp<LLVM::ExpOp>();
-    target.addDynamicallyLegalOp<FuncOp>(
-        [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
-    if (failed(applyPartialConversion(m, target, patterns, &converter)))
-      signalPassFailure();
-  }
-};
-
-} // anonymous namespace
-
-std::unique_ptr<OpPassBase<ModuleOp>> mlir::createLowerGpuOpsToROCDLOpsPass() {
-  return std::make_unique<LowerGpuOpsToROCDLOpsPass>();
-}
-
-static PassRegistration<LowerGpuOpsToROCDLOpsPass>
-    pass("convert-gpu-to-rocdl",
-         "Generate ROCDL operations for gpu operations");
diff --git a/third_party/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt b/third_party/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
deleted file mode 100644
index be82894461d..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-add_llvm_library(MLIRGPUtoSPIRVTransforms
-  ConvertGPUToSPIRV.cpp
-  ConvertGPUToSPIRVPass.cpp
-  )
-
-target_link_libraries(MLIRGPUtoSPIRVTransforms
-  MLIRGPU
-  MLIRIR
-  MLIRPass
-  MLIRSPIRV
-  MLIRStandardOps
-  MLIRStandardToSPIRVTransforms
-  MLIRSupport
-  MLIRTransforms
-  )
diff --git a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
deleted file mode 100644
index 2b39c0db994..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-//===- ConvertGPUToSPIRV.cpp - Convert GPU ops to SPIR-V dialect ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the conversion patterns from GPU ops to SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVLowering.h"
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/IR/Module.h"
-
-using namespace mlir;
-
-namespace {
-
-/// Pattern to convert a loop::ForOp within kernel functions into spirv::LoopOp.
-class ForOpConversion final : public SPIRVOpLowering<loop::ForOp> {
-public:
-  using SPIRVOpLowering<loop::ForOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(loop::ForOp forOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Pattern lowering GPU block/thread size/id to loading SPIR-V invocation
-/// builin variables.
-template <typename SourceOp, spirv::BuiltIn builtin>
-class LaunchConfigConversion : public SPIRVOpLowering<SourceOp> {
-public:
-  using SPIRVOpLowering<SourceOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(SourceOp op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Pattern to convert a kernel function in GPU dialect (a FuncOp with the
-/// attribute gpu.kernel) within a spv.module.
-class KernelFnConversion final : public SPIRVOpLowering<FuncOp> {
-public:
-  KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
-                     ArrayRef<int64_t> workGroupSize,
-                     PatternBenefit benefit = 1)
-      : SPIRVOpLowering<FuncOp>(context, converter, benefit) {
-    auto config = workGroupSize.take_front(3);
-    workGroupSizeAsInt32.assign(config.begin(), config.end());
-    workGroupSizeAsInt32.resize(3, 1);
-  }
-
-  PatternMatchResult
-  matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-
-private:
-  SmallVector<int32_t, 3> workGroupSizeAsInt32;
-};
-
-/// Pattern to convert a module with gpu.kernel_module attribute to a
-/// spv.module.
-class KernelModuleConversion final : public SPIRVOpLowering<ModuleOp> {
-public:
-  using SPIRVOpLowering<ModuleOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(ModuleOp moduleOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Pattern to convert a module terminator op to a terminator of spv.module op.
-// TODO: Move this into DRR, but that requires ModuleTerminatorOp to be defined
-// in ODS.
-class KernelModuleTerminatorConversion final
-    : public SPIRVOpLowering<ModuleTerminatorOp> {
-public:
-  using SPIRVOpLowering<ModuleTerminatorOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(ModuleTerminatorOp terminatorOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-} // namespace
-
-//===----------------------------------------------------------------------===//
-// loop::ForOp.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-ForOpConversion::matchAndRewrite(loop::ForOp forOp, ArrayRef<Value *> operands,
-                                 ConversionPatternRewriter &rewriter) const {
-  // loop::ForOp can be lowered to the structured control flow represented by
-  // spirv::LoopOp by making the continue block of the spirv::LoopOp the loop
-  // latch and the merge block the exit block. The resulting spirv::LoopOp has a
-  // single back edge from the continue to header block, and a single exit from
-  // header to merge.
-  loop::ForOpOperandAdaptor forOperands(operands);
-  auto loc = forOp.getLoc();
-  auto loopControl = rewriter.getI32IntegerAttr(
-      static_cast<uint32_t>(spirv::LoopControl::None));
-  auto loopOp = rewriter.create<spirv::LoopOp>(loc, loopControl);
-  loopOp.addEntryAndMergeBlock();
-
-  OpBuilder::InsertionGuard guard(rewriter);
-  // Create the block for the header.
-  auto header = new Block();
-  // Insert the header.
-  loopOp.body().getBlocks().insert(std::next(loopOp.body().begin(), 1), header);
-
-  // Create the new induction variable to use.
-  BlockArgument *newIndVar =
-      header->addArgument(forOperands.lowerBound()->getType());
-  Block *body = forOp.getBody();
-
-  // Apply signature conversion to the body of the forOp. It has a single block,
-  // with argument which is the induction variable. That has to be replaced with
-  // the new induction variable.
-  TypeConverter::SignatureConversion signatureConverter(
-      body->getNumArguments());
-  signatureConverter.remapInput(0, newIndVar);
-  body = rewriter.applySignatureConversion(&forOp.getLoopBody(),
-                                           signatureConverter);
-
-  // Delete the loop terminator.
-  rewriter.eraseOp(body->getTerminator());
-
-  // Move the blocks from the forOp into the loopOp. This is the body of the
-  // loopOp.
-  rewriter.inlineRegionBefore(forOp.getOperation()->getRegion(0), loopOp.body(),
-                              std::next(loopOp.body().begin(), 2));
-
-  // Branch into it from the entry.
-  rewriter.setInsertionPointToEnd(&(loopOp.body().front()));
-  rewriter.create<spirv::BranchOp>(loc, header, forOperands.lowerBound());
-
-  // Generate the rest of the loop header.
-  rewriter.setInsertionPointToEnd(header);
-  auto mergeBlock = loopOp.getMergeBlock();
-  auto cmpOp = rewriter.create<spirv::SLessThanOp>(
-      loc, rewriter.getI1Type(), newIndVar, forOperands.upperBound());
-  rewriter.create<spirv::BranchConditionalOp>(
-      loc, cmpOp, body, ArrayRef<Value *>(), mergeBlock, ArrayRef<Value *>());
-
-  // Generate instructions to increment the step of the induction variable and
-  // branch to the header.
-  Block *continueBlock = loopOp.getContinueBlock();
-  rewriter.setInsertionPointToEnd(continueBlock);
-
-  // Add the step to the induction variable and branch to the header.
-  Value *updatedIndVar = rewriter.create<spirv::IAddOp>(
-      loc, newIndVar->getType(), newIndVar, forOperands.step());
-  rewriter.create<spirv::BranchOp>(loc, header, updatedIndVar);
-
-  rewriter.eraseOp(forOp);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// Builtins.
-//===----------------------------------------------------------------------===//
-
-template <typename SourceOp, spirv::BuiltIn builtin>
-PatternMatchResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
-    SourceOp op, ArrayRef<Value *> operands,
-    ConversionPatternRewriter &rewriter) const {
-  auto dimAttr =
-      op.getOperation()->template getAttrOfType<StringAttr>("dimension");
-  if (!dimAttr) {
-    return this->matchFailure();
-  }
-  int32_t index = 0;
-  if (dimAttr.getValue() == "x") {
-    index = 0;
-  } else if (dimAttr.getValue() == "y") {
-    index = 1;
-  } else if (dimAttr.getValue() == "z") {
-    index = 2;
-  } else {
-    return this->matchFailure();
-  }
-
-  // SPIR-V invocation builtin variables are a vector of type <3xi32>
-  auto spirvBuiltin = spirv::getBuiltinVariableValue(op, builtin, rewriter);
-  rewriter.replaceOpWithNewOp<spirv::CompositeExtractOp>(
-      op, rewriter.getIntegerType(32), spirvBuiltin,
-      rewriter.getI32ArrayAttr({index}));
-  return this->matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// FuncOp with gpu.kernel attribute.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
-                                    ConversionPatternRewriter &rewriter) const {
-  if (!gpu::GPUDialect::isKernel(funcOp)) {
-    return matchFailure();
-  }
-
-  SmallVector<spirv::InterfaceVarABIAttr, 4> argABI;
-  for (auto argNum : llvm::seq<unsigned>(0, funcOp.getNumArguments())) {
-    argABI.push_back(spirv::getInterfaceVarABIAttr(
-        0, argNum, spirv::StorageClass::StorageBuffer, rewriter.getContext()));
-  }
-
-  auto context = rewriter.getContext();
-  auto entryPointAttr =
-      spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
-  FuncOp newFuncOp = spirv::lowerAsEntryFunction(
-      funcOp, typeConverter, rewriter, entryPointAttr, argABI);
-  if (!newFuncOp) {
-    return matchFailure();
-  }
-  newFuncOp.removeAttr(Identifier::get(gpu::GPUDialect::getKernelFuncAttrName(),
-                                       rewriter.getContext()));
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// ModuleOp with gpu.kernel_module.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult KernelModuleConversion::matchAndRewrite(
-    ModuleOp moduleOp, ArrayRef<Value *> operands,
-    ConversionPatternRewriter &rewriter) const {
-  if (!moduleOp.getAttrOfType<UnitAttr>(
-          gpu::GPUDialect::getKernelModuleAttrName())) {
-    return matchFailure();
-  }
-  // TODO : Generalize this to account for different extensions,
-  // capabilities, extended_instruction_sets, other addressing models
-  // and memory models.
-  auto spvModule = rewriter.create<spirv::ModuleOp>(
-      moduleOp.getLoc(), spirv::AddressingModel::Logical,
-      spirv::MemoryModel::GLSL450, spirv::Capability::Shader,
-      spirv::Extension::SPV_KHR_storage_buffer_storage_class);
-  // Move the region from the module op into the SPIR-V module.
-  Region &spvModuleRegion = spvModule.getOperation()->getRegion(0);
-  rewriter.inlineRegionBefore(moduleOp.getBodyRegion(), spvModuleRegion,
-                              spvModuleRegion.begin());
-  // The spv.module build method adds a block with a terminator. Remove that
-  // block. The terminator of the module op in the remaining block will be
-  // legalized later.
-  spvModuleRegion.back().erase();
-  rewriter.eraseOp(moduleOp);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// ModuleTerminatorOp for gpu.kernel_module.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite(
-    ModuleTerminatorOp terminatorOp, ArrayRef<Value *> operands,
-    ConversionPatternRewriter &rewriter) const {
-  rewriter.replaceOpWithNewOp<spirv::ModuleEndOp>(terminatorOp);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// GPU To SPIRV Patterns.
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-void populateGPUToSPIRVPatterns(MLIRContext *context,
-                                SPIRVTypeConverter &typeConverter,
-                                OwningRewritePatternList &patterns,
-                                ArrayRef<int64_t> workGroupSize) {
-  patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
-  patterns.insert<
-      ForOpConversion, KernelModuleConversion, KernelModuleTerminatorConversion,
-      LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
-      LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
-      LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
-      LaunchConfigConversion<gpu::ThreadIdOp,
-                             spirv::BuiltIn::LocalInvocationId>>(context,
-                                                                 typeConverter);
-}
-} // namespace mlir
diff --git a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp b/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
deleted file mode 100644
index b8fe27e92a2..00000000000
--- a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//===- ConvertGPUToSPIRVPass.cpp - GPU to SPIR-V dialect lowering passes --===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to convert a kernel function in the GPU Dialect
-// into a spv.module operation
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h"
-#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h"
-#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVLowering.h"
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassRegistry.h"
-
-using namespace mlir;
-
-namespace {
-/// Pass to lower GPU Dialect to SPIR-V. The pass only converts those functions
-/// that have the "gpu.kernel" attribute, i.e. those functions that are
-/// referenced in gpu::LaunchKernelOp operations. For each such function
-///
-/// 1) Create a spirv::ModuleOp, and clone the function into spirv::ModuleOp
-/// (the original function is still needed by the gpu::LaunchKernelOp, so cannot
-/// replace it).
-///
-/// 2) Lower the body of the spirv::ModuleOp.
-class GPUToSPIRVPass : public ModulePass<GPUToSPIRVPass> {
-public:
-  GPUToSPIRVPass(ArrayRef<int64_t> workGroupSize)
-      : workGroupSize(workGroupSize.begin(), workGroupSize.end()) {}
-  void runOnModule() override;
-
-private:
-  SmallVector<int64_t, 3> workGroupSize;
-};
-
-/// Command line option to specify the workgroup size.
-struct GPUToSPIRVPassOptions : public PassOptions<GPUToSPIRVPassOptions> {
-  List<unsigned> workGroupSize{
-      *this, "workgroup-size",
-      llvm::cl::desc(
-          "Workgroup Sizes in the SPIR-V module for x, followed by y, followed "
-          "by z dimension of the dispatch (others will be ignored)"),
-      llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
-};
-} // namespace
-
-void GPUToSPIRVPass::runOnModule() {
-  auto context = &getContext();
-  auto module = getModule();
-
-  SmallVector<Operation *, 1> kernelModules;
-  OpBuilder builder(context);
-  module.walk([&builder, &kernelModules](ModuleOp moduleOp) {
-    if (moduleOp.getAttrOfType<UnitAttr>(
-            gpu::GPUDialect::getKernelModuleAttrName())) {
-      // For each kernel module (should be only 1 for now, but that is not a
-      // requirement here), clone the module for conversion because the
-      // gpu.launch function still needs the kernel module.
-      builder.setInsertionPoint(moduleOp.getOperation());
-      kernelModules.push_back(builder.clone(*moduleOp.getOperation()));
-    }
-  });
-
-  SPIRVTypeConverter typeConverter;
-  OwningRewritePatternList patterns;
-  populateGPUToSPIRVPatterns(context, typeConverter, patterns, workGroupSize);
-  populateStandardToSPIRVPatterns(context, typeConverter, patterns);
-
-  ConversionTarget target(*context);
-  target.addLegalDialect<spirv::SPIRVDialect>();
-  target.addDynamicallyLegalOp<FuncOp>(
-      [&](FuncOp op) { return typeConverter.isSignatureLegal(op.getType()); });
-
-  if (failed(applyFullConversion(kernelModules, target, patterns,
-                                 &typeConverter))) {
-    return signalPassFailure();
-  }
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize) {
-  return std::make_unique<GPUToSPIRVPass>(workGroupSize);
-}
-
-static PassRegistration<GPUToSPIRVPass, GPUToSPIRVPassOptions>
-    pass("convert-gpu-to-spirv", "Convert GPU dialect to SPIR-V dialect",
-         [](const GPUToSPIRVPassOptions &passOptions) {
-           SmallVector<int64_t, 3> workGroupSize;
-           workGroupSize.assign(passOptions.workGroupSize.begin(),
-                                passOptions.workGroupSize.end());
-           return std::make_unique<GPUToSPIRVPass>(workGroupSize);
-         });
diff --git a/third_party/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt b/third_party/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
deleted file mode 100644
index 9d2b5dac202..00000000000
--- a/third_party/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-add_llvm_library(MLIRLinalgToLLVM
-  LinalgToLLVM.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/LinalgToLLVM
-)
-set(LIBS
-  MLIRLLVMIR
-  MLIRTransforms
-  LLVMCore
-  LLVMSupport
-  )
-
-add_dependencies(MLIRLinalgToLLVM ${LIBS})
-target_link_libraries(MLIRLinalgToLLVM ${LIBS})
diff --git a/third_party/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/third_party/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
deleted file mode 100644
index 3eb23c19dc7..00000000000
--- a/third_party/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
+++ /dev/null
@@ -1,558 +0,0 @@
-//===- LinalgToLLVM.cpp - conversion from Linalg to LLVM dialect ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
-#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
-#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Utils/Intrinsics.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/EDSC/Intrinsics.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::LLVM;
-using namespace mlir::linalg;
-using namespace mlir::linalg::intrinsics;
-
-using add = ValueBuilder<mlir::LLVM::AddOp>;
-using addi = ValueBuilder<mlir::AddIOp>;
-using bitcast = ValueBuilder<mlir::LLVM::BitcastOp>;
-using cmpi = ValueBuilder<mlir::CmpIOp>;
-using constant = ValueBuilder<mlir::LLVM::ConstantOp>;
-using extractvalue = ValueBuilder<mlir::LLVM::ExtractValueOp>;
-using gep = ValueBuilder<mlir::LLVM::GEPOp>;
-using insertvalue = ValueBuilder<mlir::LLVM::InsertValueOp>;
-using llvm_call = OperationBuilder<mlir::LLVM::CallOp>;
-using llvm_icmp = ValueBuilder<LLVM::ICmpOp>;
-using llvm_load = ValueBuilder<LLVM::LoadOp>;
-using llvm_store = OperationBuilder<LLVM::StoreOp>;
-using llvm_select = ValueBuilder<LLVM::SelectOp>;
-using mul = ValueBuilder<mlir::LLVM::MulOp>;
-using ptrtoint = ValueBuilder<mlir::LLVM::PtrToIntOp>;
-using sub = ValueBuilder<mlir::LLVM::SubOp>;
-using llvm_undef = ValueBuilder<mlir::LLVM::UndefOp>;
-using urem = ValueBuilder<mlir::LLVM::URemOp>;
-using llvm_alloca = ValueBuilder<LLVM::AllocaOp>;
-using llvm_return = OperationBuilder<LLVM::ReturnOp>;
-
-template <typename T>
-static LLVMType getPtrToElementType(T containerType,
-                                    LLVMTypeConverter &lowering) {
-  return lowering.convertType(containerType.getElementType())
-      .template cast<LLVMType>()
-      .getPointerTo();
-}
-
-// Convert the given type to the LLVM IR Dialect type.  The following
-// conversions are supported:
-//   - an Index type is converted into an LLVM integer type with pointer
-//     bitwidth (analogous to intptr_t in C);
-//   - an Integer type is converted into an LLVM integer type of the same width;
-//   - an F32 type is converted into an LLVM float type
-//   - a Buffer, Range or View is converted into an LLVM structure type
-//     containing the respective dynamic values.
-static Type convertLinalgType(Type t, LLVMTypeConverter &lowering) {
-  auto *context = t.getContext();
-  auto int64Ty = lowering.convertType(IntegerType::get(64, context))
-                     .cast<LLVM::LLVMType>();
-
-  // Range descriptor contains the range bounds and the step as 64-bit integers.
-  //
-  // struct {
-  //   int64_t min;
-  //   int64_t max;
-  //   int64_t step;
-  // };
-  if (t.isa<RangeType>())
-    return LLVMType::getStructTy(int64Ty, int64Ty, int64Ty);
-
-  return Type();
-}
-
-namespace {
-/// EDSC-compatible wrapper for MemRefDescriptor.
-class BaseViewConversionHelper {
-public:
-  BaseViewConversionHelper(Type type)
-      : d(MemRefDescriptor::undef(rewriter(), loc(), type)) {}
-
-  BaseViewConversionHelper(Value *v) : d(v) {}
-
-  /// Wrappers around MemRefDescriptor that use EDSC builder and location.
-  Value *allocatedPtr() { return d.allocatedPtr(rewriter(), loc()); }
-  void setAllocatedPtr(Value *v) { d.setAllocatedPtr(rewriter(), loc(), v); }
-  Value *alignedPtr() { return d.alignedPtr(rewriter(), loc()); }
-  void setAlignedPtr(Value *v) { d.setAlignedPtr(rewriter(), loc(), v); }
-  Value *offset() { return d.offset(rewriter(), loc()); }
-  void setOffset(Value *v) { d.setOffset(rewriter(), loc(), v); }
-  Value *size(unsigned i) { return d.size(rewriter(), loc(), i); }
-  void setSize(unsigned i, Value *v) { d.setSize(rewriter(), loc(), i, v); }
-  Value *stride(unsigned i) { return d.stride(rewriter(), loc(), i); }
-  void setStride(unsigned i, Value *v) { d.setStride(rewriter(), loc(), i, v); }
-
-  operator Value *() { return d; }
-
-private:
-  OpBuilder &rewriter() { return ScopedContext::getBuilder(); }
-  Location loc() { return ScopedContext::getLocation(); }
-
-  MemRefDescriptor d;
-};
-} // namespace
-
-// RangeOp creates a new range descriptor.
-class RangeOpConversion : public LLVMOpLowering {
-public:
-  explicit RangeOpConversion(MLIRContext *context, LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(RangeOp::getOperationName(), context, lowering_) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto rangeOp = cast<RangeOp>(op);
-    auto rangeDescriptorTy =
-        convertLinalgType(rangeOp.getResult()->getType(), lowering);
-
-    edsc::ScopedContext context(rewriter, op->getLoc());
-
-    // Fill in an aggregate value of the descriptor.
-    RangeOpOperandAdaptor adaptor(operands);
-    Value *desc = llvm_undef(rangeDescriptorTy);
-    desc = insertvalue(desc, adaptor.min(), rewriter.getI64ArrayAttr(0));
-    desc = insertvalue(desc, adaptor.max(), rewriter.getI64ArrayAttr(1));
-    desc = insertvalue(desc, adaptor.step(), rewriter.getI64ArrayAttr(2));
-    rewriter.replaceOp(op, desc);
-    return matchSuccess();
-  }
-};
-
-/// Conversion pattern that transforms a linalg.slice op into:
-///   1. A function entry `alloca` operation to allocate a ViewDescriptor.
-///   2. A load of the ViewDescriptor from the pointer allocated in 1.
-///   3. Updates to the ViewDescriptor to introduce the data ptr, offset, size
-///      and stride corresponding to the region of memory within the bounds of
-///      the parent view.
-///   4. A store of the resulting ViewDescriptor to the alloca'ed pointer.
-/// The linalg.slice op is replaced by the alloca'ed pointer.
-class SliceOpConversion : public LLVMOpLowering {
-public:
-  explicit SliceOpConversion(MLIRContext *context, LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(SliceOp::getOperationName(), context, lowering_) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    edsc::ScopedContext context(rewriter, op->getLoc());
-    SliceOpOperandAdaptor adaptor(operands);
-    BaseViewConversionHelper baseDesc(adaptor.view());
-
-    auto sliceOp = cast<SliceOp>(op);
-    auto memRefType = sliceOp.getBaseViewType();
-    auto int64Ty = lowering.convertType(rewriter.getIntegerType(64))
-                       .cast<LLVM::LLVMType>();
-
-    BaseViewConversionHelper desc(lowering.convertType(sliceOp.getViewType()));
-
-    // TODO(ntv): extract sizes and emit asserts.
-    SmallVector<Value *, 4> strides(memRefType.getRank());
-    for (int i = 0, e = memRefType.getRank(); i < e; ++i)
-      strides[i] = baseDesc.stride(i);
-
-    auto pos = [&rewriter](ArrayRef<int64_t> values) {
-      return rewriter.getI64ArrayAttr(values);
-    };
-
-    // Compute base offset.
-    Value *baseOffset = baseDesc.offset();
-    for (int i = 0, e = memRefType.getRank(); i < e; ++i) {
-      Value *indexing = adaptor.indexings()[i];
-      Value *min = indexing;
-      if (sliceOp.indexing(i)->getType().isa<RangeType>())
-        min = extractvalue(int64Ty, indexing, pos(0));
-      baseOffset = add(baseOffset, mul(min, strides[i]));
-    }
-
-    // Insert the base and aligned pointers.
-    desc.setAllocatedPtr(baseDesc.allocatedPtr());
-    desc.setAlignedPtr(baseDesc.alignedPtr());
-
-    // Insert base offset.
-    desc.setOffset(baseOffset);
-
-    // Corner case, no sizes or strides: early return the descriptor.
-    if (sliceOp.getViewType().getRank() == 0)
-      return rewriter.replaceOp(op, {desc}), matchSuccess();
-
-    Value *zero =
-        constant(int64Ty, rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
-    // Compute and insert view sizes (max - min along the range) and strides.
-    // Skip the non-range operands as they will be projected away from the view.
-    int numNewDims = 0;
-    for (auto en : llvm::enumerate(sliceOp.indexings())) {
-      Value *indexing = en.value();
-      if (indexing->getType().isa<RangeType>()) {
-        int rank = en.index();
-        Value *rangeDescriptor = adaptor.indexings()[rank];
-        Value *min = extractvalue(int64Ty, rangeDescriptor, pos(0));
-        Value *max = extractvalue(int64Ty, rangeDescriptor, pos(1));
-        Value *step = extractvalue(int64Ty, rangeDescriptor, pos(2));
-        Value *baseSize = baseDesc.size(rank);
-
-        // Bound upper by base view upper bound.
-        max = llvm_select(llvm_icmp(ICmpPredicate::slt, max, baseSize), max,
-                          baseSize);
-        Value *size = sub(max, min);
-        // Bound lower by zero.
-        size =
-            llvm_select(llvm_icmp(ICmpPredicate::slt, size, zero), zero, size);
-        Value *stride = mul(strides[rank], step);
-        desc.setSize(numNewDims, size);
-        desc.setStride(numNewDims, stride);
-        ++numNewDims;
-      }
-    }
-
-    rewriter.replaceOp(op, {desc});
-    return matchSuccess();
-  }
-};
-
-/// Conversion pattern that transforms a linalg.transpose op into:
-///   1. A function entry `alloca` operation to allocate a ViewDescriptor.
-///   2. A load of the ViewDescriptor from the pointer allocated in 1.
-///   3. Updates to the ViewDescriptor to introduce the data ptr, offset, size
-///      and stride. Size and stride are permutations of the original values.
-///   4. A store of the resulting ViewDescriptor to the alloca'ed pointer.
-/// The linalg.transpose op is replaced by the alloca'ed pointer.
-class TransposeOpConversion : public LLVMOpLowering {
-public:
-  explicit TransposeOpConversion(MLIRContext *context,
-                                 LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(TransposeOp::getOperationName(), context, lowering_) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    // Initialize the common boilerplate and alloca at the top of the FuncOp.
-    edsc::ScopedContext context(rewriter, op->getLoc());
-    TransposeOpOperandAdaptor adaptor(operands);
-    BaseViewConversionHelper baseDesc(adaptor.view());
-
-    auto transposeOp = cast<TransposeOp>(op);
-    // No permutation, early exit.
-    if (transposeOp.permutation().isIdentity())
-      return rewriter.replaceOp(op, {baseDesc}), matchSuccess();
-
-    BaseViewConversionHelper desc(
-        lowering.convertType(transposeOp.getViewType()));
-
-    // Copy the base and aligned pointers from the old descriptor to the new
-    // one.
-    desc.setAllocatedPtr(baseDesc.allocatedPtr());
-    desc.setAlignedPtr(baseDesc.alignedPtr());
-
-    // Copy the offset pointer from the old descriptor to the new one.
-    desc.setOffset(baseDesc.offset());
-
-    // Iterate over the dimensions and apply size/stride permutation.
-    for (auto en : llvm::enumerate(transposeOp.permutation().getResults())) {
-      int sourcePos = en.index();
-      int targetPos = en.value().cast<AffineDimExpr>().getPosition();
-      desc.setSize(targetPos, baseDesc.size(sourcePos));
-      desc.setStride(targetPos, baseDesc.stride(sourcePos));
-    }
-
-    rewriter.replaceOp(op, {desc});
-    return matchSuccess();
-  }
-};
-
-// YieldOp produces and LLVM::ReturnOp.
-class YieldOpConversion : public LLVMOpLowering {
-public:
-  explicit YieldOpConversion(MLIRContext *context, LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(YieldOp::getOperationName(), context, lowering_) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands);
-    return matchSuccess();
-  }
-};
-
-template <typename LinalgOp>
-static SmallVector<Type, 4> ExtractOperandTypes(Operation *op) {
-  return SmallVector<Type, 4>{op->getOperandTypes()};
-}
-
-template <>
-SmallVector<Type, 4> ExtractOperandTypes<IndexedGenericOp>(Operation *op) {
-  auto ctx = op->getContext();
-  auto indexedGenericOp = cast<IndexedGenericOp>(op);
-  auto numLoops = indexedGenericOp.getNumLoops();
-
-  SmallVector<Type, 4> result;
-  result.reserve(numLoops + op->getNumOperands());
-  for (unsigned i = 0; i < numLoops; ++i) {
-    result.push_back(IndexType::get(ctx));
-  }
-  for (auto type : op->getOperandTypes()) {
-    result.push_back(type);
-  }
-  return result;
-}
-
-// Get a SymbolRefAttr containing the library function name for the LinalgOp.
-// If the library function does not exist, insert a declaration.
-template <typename LinalgOp>
-static FlatSymbolRefAttr getLibraryCallSymbolRef(Operation *op,
-                                                 PatternRewriter &rewriter) {
-  auto linalgOp = cast<LinalgOp>(op);
-  auto fnName = linalgOp.getLibraryCallName();
-  if (fnName.empty()) {
-    op->emitWarning("No library call defined for: ") << *op;
-    return {};
-  }
-
-  // fnName is a dynamic std::String, unique it via a SymbolRefAttr.
-  FlatSymbolRefAttr fnNameAttr = rewriter.getSymbolRefAttr(fnName);
-  auto module = op->getParentOfType<ModuleOp>();
-  if (module.lookupSymbol(fnName)) {
-    return fnNameAttr;
-  }
-
-  SmallVector<Type, 4> inputTypes(ExtractOperandTypes<LinalgOp>(op));
-  assert(op->getNumResults() == 0 &&
-         "Library call for linalg operation can be generated only for ops that "
-         "have void return types");
-  auto libFnType = FunctionType::get(inputTypes, {}, rewriter.getContext());
-
-  OpBuilder::InsertionGuard guard(rewriter);
-  // Insert before module terminator.
-  rewriter.setInsertionPoint(module.getBody(),
-                             std::prev(module.getBody()->end()));
-  rewriter.create<FuncOp>(op->getLoc(), fnNameAttr.getValue(), libFnType,
-                          ArrayRef<NamedAttribute>{});
-  return fnNameAttr;
-}
-
-Type LinalgTypeConverter::convertType(Type t) {
-  if (auto result = LLVMTypeConverter::convertType(t))
-    return result;
-  return convertLinalgType(t, *this);
-}
-
-// LinalgOpConversion<LinalgOp> creates a new call to the
-// `LinalgOp::getLibraryCallName()` function.
-// The implementation of the function can be either in the same module or in an
-// externally linked library.
-template <typename LinalgOp>
-class LinalgOpConversion : public OpRewritePattern<LinalgOp> {
-public:
-  using OpRewritePattern<LinalgOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(LinalgOp op,
-                                     PatternRewriter &rewriter) const override {
-    auto libraryCallName = getLibraryCallSymbolRef<LinalgOp>(op, rewriter);
-    if (!libraryCallName)
-      return this->matchFailure();
-
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(
-        op, libraryCallName.getValue(), ArrayRef<Type>{}, op.getOperands());
-    return this->matchSuccess();
-  }
-};
-
-/// Conversion pattern specialization for CopyOp. This kicks in when both input
-/// and output permutations are left unspecified or are the identity.
-template <> class LinalgOpConversion<CopyOp> : public OpRewritePattern<CopyOp> {
-public:
-  using OpRewritePattern<CopyOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(CopyOp op,
-                                     PatternRewriter &rewriter) const override {
-    auto inputPerm = op.inputPermutation();
-    if (inputPerm.hasValue() && !inputPerm->isIdentity())
-      return matchFailure();
-    auto outputPerm = op.outputPermutation();
-    if (outputPerm.hasValue() && !outputPerm->isIdentity())
-      return matchFailure();
-
-    auto libraryCallName = getLibraryCallSymbolRef<CopyOp>(op, rewriter);
-    if (!libraryCallName)
-      return matchFailure();
-
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(
-        op, libraryCallName.getValue(), ArrayRef<Type>{}, op.getOperands());
-    return matchSuccess();
-  }
-};
-
-/// Conversion pattern specialization for IndexedGenericOp.
-template <>
-class LinalgOpConversion<IndexedGenericOp>
-    : public OpRewritePattern<IndexedGenericOp> {
-public:
-  using OpRewritePattern<IndexedGenericOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(IndexedGenericOp op,
-                                     PatternRewriter &rewriter) const override {
-    auto libraryCallName =
-        getLibraryCallSymbolRef<IndexedGenericOp>(op, rewriter);
-    if (!libraryCallName)
-      return this->matchFailure();
-
-    // TODO(pifon, ntv): Use induction variables values instead of zeros, when
-    // IndexedGenericOp is tiled.
-    auto zero = rewriter.create<mlir::ConstantOp>(
-        op.getLoc(), rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
-    auto indexedGenericOp = cast<IndexedGenericOp>(op);
-    auto numLoops = indexedGenericOp.getNumLoops();
-    SmallVector<Value *, 4> operands;
-    operands.reserve(numLoops + op.getNumOperands());
-    for (unsigned i = 0; i < numLoops; ++i) {
-      operands.push_back(zero);
-    }
-    for (auto operand : op.getOperands()) {
-      operands.push_back(operand);
-    }
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(op, libraryCallName.getValue(),
-                                              ArrayRef<Type>{}, operands);
-    return this->matchSuccess();
-  }
-};
-
-/// A non-conversion rewrite pattern kicks in to convert CopyOp with
-/// permutations into a sequence of TransposeOp and permutation-free CopyOp.
-/// This interplays together with TransposeOpConversion and
-/// LinalgConversion<CopyOp> to create a path to the LLVM dialect.
-class CopyTransposeConversion : public OpRewritePattern<CopyOp> {
-public:
-  using OpRewritePattern<CopyOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(CopyOp op,
-                                     PatternRewriter &rewriter) const override {
-    Value *in = op.input(), *out = op.output();
-
-    // If either inputPerm or outputPerm are non-identities, insert transposes.
-    auto inputPerm = op.inputPermutation();
-    if (inputPerm.hasValue() && !inputPerm->isIdentity())
-      in = rewriter.create<linalg::TransposeOp>(op.getLoc(), in,
-                                                AffineMapAttr::get(*inputPerm));
-    auto outputPerm = op.outputPermutation();
-    if (outputPerm.hasValue() && !outputPerm->isIdentity())
-      out = rewriter.create<linalg::TransposeOp>(
-          op.getLoc(), out, AffineMapAttr::get(*outputPerm));
-
-    // If nothing was transposed, fail and let the conversion kick in.
-    if (in == op.input() && out == op.output())
-      return matchFailure();
-
-    rewriter.replaceOpWithNewOp<CopyOp>(op, in, out);
-    return matchSuccess();
-  }
-};
-
-/// Populate the given list with patterns that convert from Linalg to Standard.
-static void
-populateLinalgToStandardConversionPatterns(OwningRewritePatternList &patterns,
-                                           MLIRContext *ctx) {
-  // TODO(ntv) ConvOp conversion needs to export a descriptor with relevant
-  // attribute values such as kernel striding and dilation.
-  patterns.insert<CopyTransposeConversion, LinalgOpConversion<ConvOp>,
-                  LinalgOpConversion<CopyOp>, LinalgOpConversion<DotOp>,
-                  LinalgOpConversion<FillOp>, LinalgOpConversion<GenericOp>,
-                  LinalgOpConversion<IndexedGenericOp>,
-                  LinalgOpConversion<MatmulOp>, LinalgOpConversion<MatvecOp>>(
-      ctx);
-}
-
-/// Populate the given list with patterns that convert from Linalg to LLVM.
-void mlir::populateLinalgToLLVMConversionPatterns(
-    LinalgTypeConverter &converter, OwningRewritePatternList &patterns,
-    MLIRContext *ctx) {
-  patterns.insert<RangeOpConversion, SliceOpConversion, TransposeOpConversion,
-                  YieldOpConversion>(ctx, converter);
-}
-
-namespace {
-struct ConvertLinalgToLLVMPass : public ModulePass<ConvertLinalgToLLVMPass> {
-  void runOnModule() override;
-};
-} // namespace
-
-void ConvertLinalgToLLVMPass::runOnModule() {
-  auto module = getModule();
-
-  // Convert to the LLVM IR dialect using the converter defined above.
-  OwningRewritePatternList patterns;
-  LinalgTypeConverter converter(&getContext());
-  populateAffineToStdConversionPatterns(patterns, &getContext());
-  populateLoopToStdConversionPatterns(patterns, &getContext());
-  populateStdToLLVMConversionPatterns(converter, patterns);
-  populateVectorToLLVMConversionPatterns(converter, patterns);
-  populateLinalgToStandardConversionPatterns(patterns, &getContext());
-  populateLinalgToLLVMConversionPatterns(converter, patterns, &getContext());
-
-  ConversionTarget target(getContext());
-  target.addLegalDialect<LLVM::LLVMDialect>();
-  target.addDynamicallyLegalOp<FuncOp>(
-      [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
-  target.addLegalOp<ModuleOp, ModuleTerminatorOp>();
-  if (failed(applyFullConversion(module, target, patterns, &converter)))
-    signalPassFailure();
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::linalg::createConvertLinalgToLLVMPass() {
-  return std::make_unique<ConvertLinalgToLLVMPass>();
-}
-
-static PassRegistration<ConvertLinalgToLLVMPass> pass(
-    "convert-linalg-to-llvm",
-    "Convert the operations from the linalg dialect into the LLVM dialect");
diff --git a/third_party/mlir/lib/Conversion/LoopToStandard/CMakeLists.txt b/third_party/mlir/lib/Conversion/LoopToStandard/CMakeLists.txt
deleted file mode 100644
index 8f05dbd0b63..00000000000
--- a/third_party/mlir/lib/Conversion/LoopToStandard/CMakeLists.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-add_llvm_library(MLIRLoopToStandard
-  ConvertLoopToStandard.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/LoopToStandard
-)
-add_dependencies(
-  MLIRLoopToStandard
-
-  MLIRLoopOps
-  MLIRTransforms
-  LLVMCore
-  LLVMSupport
-)
-target_link_libraries(
-  MLIRLoopToStandard
-
-  MLIRLoopOps
-  MLIRTransforms
-  LLVMCore
-  LLVMSupport
-)
diff --git a/third_party/mlir/lib/Conversion/LoopToStandard/ConvertLoopToStandard.cpp b/third_party/mlir/lib/Conversion/LoopToStandard/ConvertLoopToStandard.cpp
deleted file mode 100644
index 08ee320f7d9..00000000000
--- a/third_party/mlir/lib/Conversion/LoopToStandard/ConvertLoopToStandard.cpp
+++ /dev/null
@@ -1,279 +0,0 @@
-//===- ConvertLoopToStandard.cpp - ControlFlow to CFG conversion ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to convert loop.for, loop.if and loop.terminator
-// ops into standard CFG ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-
-using namespace mlir;
-using namespace mlir::loop;
-
-namespace {
-
-struct LoopToStandardPass : public FunctionPass<LoopToStandardPass> {
-  void runOnFunction() override;
-};
-
-// Create a CFG subgraph for the loop around its body blocks (if the body
-// contained other loops, they have been already lowered to a flow of blocks).
-// Maintain the invariants that a CFG subgraph created for any loop has a single
-// entry and a single exit, and that the entry/exit blocks are respectively
-// first/last blocks in the parent region.  The original loop operation is
-// replaced by the initialization operations that set up the initial value of
-// the loop induction variable (%iv) and computes the loop bounds that are loop-
-// invariant for affine loops.  The operations following the original loop.for
-// are split out into a separate continuation (exit) block. A condition block is
-// created before the continuation block. It checks the exit condition of the
-// loop and branches either to the continuation block, or to the first block of
-// the body. Induction variable modification is appended to the last block of
-// the body (which is the exit block from the body subgraph thanks to the
-// invariant we maintain) along with a branch that loops back to the condition
-// block.
-//
-//      +---------------------------------+
-//      |   <code before the ForOp>       |
-//      |   <compute initial %iv value>   |
-//      |   br cond(%iv)                  |
-//      +---------------------------------+
-//             |
-//  -------|   |
-//  |      v   v
-//  |   +--------------------------------+
-//  |   | cond(%iv):                     |
-//  |   |   <compare %iv to upper bound> |
-//  |   |   cond_br %r, body, end        |
-//  |   +--------------------------------+
-//  |          |               |
-//  |          |               -------------|
-//  |          v                            |
-//  |   +--------------------------------+  |
-//  |   | body-first:                    |  |
-//  |   |   <body contents>              |  |
-//  |   +--------------------------------+  |
-//  |                   |                   |
-//  |                  ...                  |
-//  |                   |                   |
-//  |   +--------------------------------+  |
-//  |   | body-last:                     |  |
-//  |   |   <body contents>              |  |
-//  |   |   %new_iv =<add step to %iv>   |  |
-//  |   |   br cond(%new_iv)             |  |
-//  |   +--------------------------------+  |
-//  |          |                            |
-//  |-----------        |--------------------
-//                      v
-//      +--------------------------------+
-//      | end:                           |
-//      |   <code after the ForOp> |
-//      +--------------------------------+
-//
-struct ForLowering : public OpRewritePattern<ForOp> {
-  using OpRewritePattern<ForOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(ForOp forOp,
-                                     PatternRewriter &rewriter) const override;
-};
-
-// Create a CFG subgraph for the loop.if operation (including its "then" and
-// optional "else" operation blocks).  We maintain the invariants that the
-// subgraph has a single entry and a single exit point, and that the entry/exit
-// blocks are respectively the first/last block of the enclosing region. The
-// operations following the loop.if are split into a continuation (subgraph
-// exit) block. The condition is lowered to a chain of blocks that implement the
-// short-circuit scheme.  Condition blocks are created by splitting out an empty
-// block from the block that contains the loop.if operation.  They
-// conditionally branch to either the first block of the "then" region, or to
-// the first block of the "else" region.  If the latter is absent, they branch
-// to the continuation block instead.  The last blocks of "then" and "else"
-// regions (which are known to be exit blocks thanks to the invariant we
-// maintain).
-//
-//      +--------------------------------+
-//      | <code before the IfOp>         |
-//      | cond_br %cond, %then, %else    |
-//      +--------------------------------+
-//             |              |
-//             |              --------------|
-//             v                            |
-//      +--------------------------------+  |
-//      | then:                          |  |
-//      |   <then contents>              |  |
-//      |   br continue                  |  |
-//      +--------------------------------+  |
-//             |                            |
-//   |----------               |-------------
-//   |                         V
-//   |  +--------------------------------+
-//   |  | else:                          |
-//   |  |   <else contents>              |
-//   |  |   br continue                  |
-//   |  +--------------------------------+
-//   |         |
-//   ------|   |
-//         v   v
-//      +--------------------------------+
-//      | continue:                      |
-//      |   <code after the IfOp>  |
-//      +--------------------------------+
-//
-struct IfLowering : public OpRewritePattern<IfOp> {
-  using OpRewritePattern<IfOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(IfOp ifOp,
-                                     PatternRewriter &rewriter) const override;
-};
-
-struct TerminatorLowering : public OpRewritePattern<TerminatorOp> {
-  using OpRewritePattern<TerminatorOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(TerminatorOp op,
-                                     PatternRewriter &rewriter) const override {
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-} // namespace
-
-PatternMatchResult
-ForLowering::matchAndRewrite(ForOp forOp, PatternRewriter &rewriter) const {
-  Location loc = forOp.getLoc();
-
-  // Start by splitting the block containing the 'loop.for' into two parts.
-  // The part before will get the init code, the part after will be the end
-  // point.
-  auto *initBlock = rewriter.getInsertionBlock();
-  auto initPosition = rewriter.getInsertionPoint();
-  auto *endBlock = rewriter.splitBlock(initBlock, initPosition);
-
-  // Use the first block of the loop body as the condition block since it is
-  // the block that has the induction variable as its argument.  Split out
-  // all operations from the first block into a new block.  Move all body
-  // blocks from the loop body region to the region containing the loop.
-  auto *conditionBlock = &forOp.region().front();
-  auto *firstBodyBlock =
-      rewriter.splitBlock(conditionBlock, conditionBlock->begin());
-  auto *lastBodyBlock = &forOp.region().back();
-  rewriter.inlineRegionBefore(forOp.region(), endBlock);
-  auto *iv = conditionBlock->getArgument(0);
-
-  // Append the induction variable stepping logic to the last body block and
-  // branch back to the condition block.  Construct an expression f :
-  // (x -> x+step) and apply this expression to the induction variable.
-  rewriter.setInsertionPointToEnd(lastBodyBlock);
-  auto *step = forOp.step();
-  auto *stepped = rewriter.create<AddIOp>(loc, iv, step).getResult();
-  if (!stepped)
-    return matchFailure();
-  rewriter.create<BranchOp>(loc, conditionBlock, stepped);
-
-  // Compute loop bounds before branching to the condition.
-  rewriter.setInsertionPointToEnd(initBlock);
-  Value *lowerBound = forOp.lowerBound();
-  Value *upperBound = forOp.upperBound();
-  if (!lowerBound || !upperBound)
-    return matchFailure();
-  rewriter.create<BranchOp>(loc, conditionBlock, lowerBound);
-
-  // With the body block done, we can fill in the condition block.
-  rewriter.setInsertionPointToEnd(conditionBlock);
-  auto comparison =
-      rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, iv, upperBound);
-
-  rewriter.create<CondBranchOp>(loc, comparison, firstBodyBlock,
-                                ArrayRef<Value *>(), endBlock,
-                                ArrayRef<Value *>());
-  // Ok, we're done!
-  rewriter.eraseOp(forOp);
-  return matchSuccess();
-}
-
-PatternMatchResult
-IfLowering::matchAndRewrite(IfOp ifOp, PatternRewriter &rewriter) const {
-  auto loc = ifOp.getLoc();
-
-  // Start by splitting the block containing the 'loop.if' into two parts.
-  // The part before will contain the condition, the part after will be the
-  // continuation point.
-  auto *condBlock = rewriter.getInsertionBlock();
-  auto opPosition = rewriter.getInsertionPoint();
-  auto *continueBlock = rewriter.splitBlock(condBlock, opPosition);
-
-  // Move blocks from the "then" region to the region containing 'loop.if',
-  // place it before the continuation block, and branch to it.
-  auto &thenRegion = ifOp.thenRegion();
-  auto *thenBlock = &thenRegion.front();
-  rewriter.setInsertionPointToEnd(&thenRegion.back());
-  rewriter.create<BranchOp>(loc, continueBlock);
-  rewriter.inlineRegionBefore(thenRegion, continueBlock);
-
-  // Move blocks from the "else" region (if present) to the region containing
-  // 'loop.if', place it before the continuation block and branch to it.  It
-  // will be placed after the "then" regions.
-  auto *elseBlock = continueBlock;
-  auto &elseRegion = ifOp.elseRegion();
-  if (!elseRegion.empty()) {
-    elseBlock = &elseRegion.front();
-    rewriter.setInsertionPointToEnd(&elseRegion.back());
-    rewriter.create<BranchOp>(loc, continueBlock);
-    rewriter.inlineRegionBefore(elseRegion, continueBlock);
-  }
-
-  rewriter.setInsertionPointToEnd(condBlock);
-  rewriter.create<CondBranchOp>(loc, ifOp.condition(), thenBlock,
-                                /*trueArgs=*/ArrayRef<Value *>(), elseBlock,
-                                /*falseArgs=*/ArrayRef<Value *>());
-
-  // Ok, we're done!
-  rewriter.eraseOp(ifOp);
-  return matchSuccess();
-}
-
-void mlir::populateLoopToStdConversionPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *ctx) {
-  patterns.insert<ForLowering, IfLowering, TerminatorLowering>(ctx);
-}
-
-void LoopToStandardPass::runOnFunction() {
-  OwningRewritePatternList patterns;
-  populateLoopToStdConversionPatterns(patterns, &getContext());
-  ConversionTarget target(getContext());
-  target.addLegalDialect<StandardOpsDialect>();
-  if (failed(applyPartialConversion(getFunction(), target, patterns)))
-    signalPassFailure();
-}
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createLowerToCFGPass() {
-  return std::make_unique<LoopToStandardPass>();
-}
-
-static PassRegistration<LoopToStandardPass>
-    pass("convert-loop-to-std", "Convert Loop dialect to Standard dialect, "
-                                "replacing structured control flow with a CFG");
diff --git a/third_party/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt b/third_party/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
deleted file mode 100644
index 2dacc800cb2..00000000000
--- a/third_party/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-set(LIBS
-  MLIRAffineOps
-  MLIRGPU
-  MLIRIR
-  MLIRLinalg
-  MLIRPass
-  MLIRStandardOps
-  MLIRSupport
-  MLIRTransforms
-  LLVMSupport
-)
-
-add_llvm_library(MLIRLoopsToGPU
-  LoopsToGPU.cpp
-  LoopsToGPUPass.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/LoopsToGPU
-)
-add_dependencies(MLIRLoopsToGPU ${LIBS})
-target_link_libraries(MLIRLoopsToGPU ${LIBS})
diff --git a/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
deleted file mode 100644
index c269dc5c45a..00000000000
--- a/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ /dev/null
@@ -1,538 +0,0 @@
-//===- LoopsToGPU.cpp - Convert an affine loop nest to a GPU kernel -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This implements a straightforward conversion of an loop nest into a GPU
-// kernel.  The caller is expected to guarantee that the conversion is correct
-// or to further transform the kernel to ensure correctness.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
-
-#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/RegionUtils.h"
-#include "llvm/ADT/Sequence.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "loops-to-gpu"
-
-using namespace mlir;
-using namespace mlir::loop;
-
-using llvm::seq;
-
-// Extract an indexed value from KernelDim3.
-static Value *getDim3Value(const gpu::KernelDim3 &dim3, unsigned pos) {
-  switch (pos) {
-  case 0:
-    return dim3.x;
-  case 1:
-    return dim3.y;
-  case 2:
-    return dim3.z;
-  default:
-    llvm_unreachable("dim3 position out of bounds");
-  }
-  return nullptr;
-}
-
-// Get the lower bound-related operands of a loop operation.
-static Operation::operand_range getLowerBoundOperands(AffineForOp forOp) {
-  return forOp.getLowerBoundOperands();
-}
-static SmallVector<Value *, 1> getLowerBoundOperands(ForOp forOp) {
-  SmallVector<Value *, 1> bounds(1, forOp.lowerBound());
-  return bounds;
-}
-
-// Get the upper bound-related operands of a loop operation.
-static Operation::operand_range getUpperBoundOperands(AffineForOp forOp) {
-  return forOp.getUpperBoundOperands();
-}
-static SmallVector<Value *, 1> getUpperBoundOperands(ForOp forOp) {
-  SmallVector<Value *, 1> bounds(1, forOp.upperBound());
-  return bounds;
-}
-
-// Get a Value that corresponds to the loop step.  If the step is an attribute,
-// materialize a corresponding constant using builder.
-static Value *getOrCreateStep(AffineForOp forOp, OpBuilder &builder) {
-  return builder.create<ConstantIndexOp>(forOp.getLoc(), forOp.getStep());
-}
-static Value *getOrCreateStep(ForOp forOp, OpBuilder &) { return forOp.step(); }
-
-// Get a Value for the loop lower bound.  If the value requires computation,
-// materialize the instructions using builder.
-static Value *getOrEmitLowerBound(AffineForOp forOp, OpBuilder &builder) {
-  return lowerAffineLowerBound(forOp, builder);
-}
-static Value *getOrEmitLowerBound(ForOp forOp, OpBuilder &) {
-  return forOp.lowerBound();
-}
-
-// Get a Value for the loop upper bound.  If the value requires computation,
-// materialize the instructions using builder.
-static Value *getOrEmitUpperBound(AffineForOp forOp, OpBuilder &builder) {
-  return lowerAffineUpperBound(forOp, builder);
-}
-static Value *getOrEmitUpperBound(ForOp forOp, OpBuilder &) {
-  return forOp.upperBound();
-}
-
-// Check the structure of the loop nest:
-//   - there are enough loops to map to numDims;
-//   - the loops are perfectly nested;
-//   - the loop bounds can be computed above the outermost loop.
-// This roughly corresponds to the "matcher" part of the pattern-based
-// rewriting infrastructure.
-template <typename OpTy>
-LogicalResult checkLoopNestMappableImpl(OpTy forOp, unsigned numDims) {
-  Region &limit = forOp.region();
-  for (unsigned i = 0, e = numDims; i < e; ++i) {
-    Operation *nested = &forOp.getBody()->front();
-    if (!areValuesDefinedAbove(getLowerBoundOperands(forOp), limit) ||
-        !areValuesDefinedAbove(getUpperBoundOperands(forOp), limit))
-      return forOp.emitError(
-          "loops with bounds depending on other mapped loops "
-          "are not supported");
-
-    // The innermost loop can have an arbitrary body, skip the perfect nesting
-    // check for it.
-    if (i == e - 1)
-      break;
-
-    auto begin = forOp.getBody()->begin(), end = forOp.getBody()->end();
-    if (forOp.getBody()->empty() || std::next(begin, 2) != end)
-      return forOp.emitError("expected perfectly nested loops in the body");
-
-    if (!(forOp = dyn_cast<OpTy>(nested)))
-      return nested->emitError("expected a nested loop");
-  }
-  return success();
-}
-
-template <typename OpTy>
-LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
-                                    unsigned numThreadDims) {
-  if (numBlockDims < 1 || numThreadDims < 1) {
-    LLVM_DEBUG(llvm::dbgs() << "nothing to map");
-    return success();
-  }
-
-  OpBuilder builder(forOp.getOperation());
-  if (numBlockDims > 3) {
-    return forOp.emitError("cannot map to more than 3 block dimensions");
-  }
-  if (numThreadDims > 3) {
-    return forOp.emitError("cannot map to more than 3 thread dimensions");
-  }
-  return checkLoopNestMappableImpl(forOp, numBlockDims + numThreadDims);
-}
-
-template <typename OpTy>
-LogicalResult checkLoopOpMappable(OpTy forOp, unsigned numBlockDims,
-                                  unsigned numThreadDims) {
-  if (numBlockDims < 1 || numThreadDims < 1) {
-    LLVM_DEBUG(llvm::dbgs() << "nothing to map");
-    return success();
-  }
-
-  if (numBlockDims > 3) {
-    return forOp.emitError("cannot map to more than 3 block dimensions");
-  }
-  if (numThreadDims > 3) {
-    return forOp.emitError("cannot map to more than 3 thread dimensions");
-  }
-  if (numBlockDims != numThreadDims) {
-    // TODO(ravishankarm) : This can probably be relaxed by having a one-trip
-    // loop for the missing dimension, but there is not reason to handle this
-    // case for now.
-    return forOp.emitError(
-        "mismatch in block dimensions and thread dimensions");
-  }
-
-  // Check that the forOp contains perfectly nested loops for numBlockDims
-  if (failed(checkLoopNestMappableImpl(forOp, numBlockDims))) {
-    return failure();
-  }
-
-  // Get to the innermost loop.
-  for (auto i : seq<unsigned>(0, numBlockDims - 1)) {
-    forOp = cast<OpTy>(&forOp.getBody()->front());
-    (void)i;
-  }
-
-  // The forOp now points to the body of the innermost loop mapped to blocks.
-  for (Operation &op : *forOp.getBody()) {
-    // If the operation is a loop, check that it is mappable to workItems.
-    if (auto innerLoop = dyn_cast<OpTy>(&op)) {
-      if (failed(checkLoopNestMappableImpl(innerLoop, numThreadDims))) {
-        return failure();
-      }
-      continue;
-    }
-    // TODO(ravishankarm) : If it is not a loop op, it is assumed that the
-    // statement is executed by all threads. It might be a collective operation,
-    // or some non-side effect instruction. Have to decide on "allowable"
-    // statements and check for those here.
-  }
-  return success();
-}
-
-namespace {
-// Helper structure that holds common state of the loop to GPU kernel
-// conversion.
-struct LoopToGpuConverter {
-  template <typename OpTy>
-  Optional<OpTy> collectBounds(OpTy forOp, unsigned numLoops);
-
-  template <typename OpTy>
-  void createLaunch(OpTy rootForOp, OpTy innermostForOp, unsigned numBlockDims,
-                    unsigned numThreadDims);
-
-  // Ranges of the loops mapped to blocks or threads.
-  SmallVector<Value *, 6> dims;
-  // Lower bounds of the loops mapped to blocks or threads.
-  SmallVector<Value *, 6> lbs;
-  // Induction variables of the loops mapped to blocks or threads.
-  SmallVector<Value *, 6> ivs;
-  // Steps of the loops mapped to blocks or threads.
-  SmallVector<Value *, 6> steps;
-};
-} // namespace
-
-// Return true if the value is obviously a constant "one".
-static bool isConstantOne(Value *value) {
-  if (auto def = dyn_cast_or_null<ConstantIndexOp>(value->getDefiningOp()))
-    return def.getValue() == 1;
-  return false;
-}
-
-// Collect ranges, bounds, steps and induction variables in preparation for
-// mapping a loop nest of depth "numLoops" rooted at "forOp" to a GPU kernel.
-// This may fail if the IR for computing loop bounds cannot be constructed, for
-// example if an affine loop uses semi-affine maps. Return the last loop to be
-// mapped on success, llvm::None on failure.
-template <typename OpTy>
-Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp,
-                                                 unsigned numLoops) {
-  OpBuilder builder(forOp.getOperation());
-  dims.reserve(numLoops);
-  lbs.reserve(numLoops);
-  ivs.reserve(numLoops);
-  steps.reserve(numLoops);
-  OpTy currentLoop = forOp;
-  for (unsigned i = 0; i < numLoops; ++i) {
-    Value *lowerBound = getOrEmitLowerBound(currentLoop, builder);
-    Value *upperBound = getOrEmitUpperBound(currentLoop, builder);
-    if (!lowerBound || !upperBound) {
-      return llvm::None;
-    }
-
-    Value *range =
-        builder.create<SubIOp>(currentLoop.getLoc(), upperBound, lowerBound);
-    Value *step = getOrCreateStep(currentLoop, builder);
-    if (!isConstantOne(step))
-      range = builder.create<DivISOp>(currentLoop.getLoc(), range, step);
-    dims.push_back(range);
-
-    lbs.push_back(lowerBound);
-    ivs.push_back(currentLoop.getInductionVar());
-    steps.push_back(step);
-
-    if (i != numLoops - 1)
-      currentLoop = cast<OpTy>(&currentLoop.getBody()->front());
-  }
-  return currentLoop;
-}
-
-/// Given `nDims` perfectly nested loops rooted as `rootForOp`, convert them o
-/// be partitioned across workgroups or workitems. The values for the
-/// workgroup/workitem id along each dimension is passed in with `ids`. The
-/// number of workgroups/workitems along each dimension are passed in with
-/// `nids`. The innermost loop is mapped to the x-dimension, followed by the
-/// next innermost loop to y-dimension, followed by z-dimension.
-template <typename OpTy>
-OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value *> ids,
-                          ArrayRef<Value *> nids) {
-  auto nDims = ids.size();
-  assert(nDims == nids.size());
-  for (auto dim : llvm::seq<unsigned>(0, nDims)) {
-    // TODO(ravishankarm): Don't always need to generate a loop here. If nids >=
-    // number of iterations of the original loop, this becomes a if
-    // condition. Though that does rely on how the workgroup/workitem sizes are
-    // specified to begin with.
-    mapLoopToProcessorIds(rootForOp, ids[dim], nids[dim]);
-    if (dim != nDims - 1) {
-      rootForOp = cast<OpTy>(rootForOp.getBody()->front());
-    }
-  }
-  return rootForOp;
-}
-
-/// Utility method to convert the gpu::KernelDim3 object for representing id of
-/// each workgroup/workitem and number of workgroup/workitems along a dimension
-/// of the launch into a container.
-void packIdAndNumId(gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids,
-                    unsigned nDims, SmallVectorImpl<Value *> &ids,
-                    SmallVectorImpl<Value *> &nids) {
-  assert(nDims <= 3 && "invalid number of launch dimensions");
-  SmallVector<Value *, 3> allIds = {kernelIds.z, kernelIds.y, kernelIds.x};
-  SmallVector<Value *, 3> allNids = {kernelNids.z, kernelNids.y, kernelNids.x};
-  ids.clear();
-  ids.append(std::next(allIds.begin(), allIds.size() - nDims), allIds.end());
-  nids.clear();
-  nids.append(std::next(allNids.begin(), allNids.size() - nDims),
-              allNids.end());
-}
-
-/// Generate the body of the launch operation.
-template <typename OpTy>
-LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp,
-                               gpu::LaunchOp launchOp, unsigned numBlockDims,
-                               unsigned numThreadDims) {
-  OpBuilder::InsertionGuard bodyInsertionGuard(builder);
-  builder.setInsertionPointToEnd(&launchOp.body().front());
-  auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc());
-
-  rootForOp.getOperation()->moveBefore(returnOp);
-  SmallVector<Value *, 3> workgroupID, numWorkGroups;
-  packIdAndNumId(launchOp.getBlockIds(), launchOp.getGridSize(), numBlockDims,
-                 workgroupID, numWorkGroups);
-
-  // Partition the loop for mapping to workgroups.
-  auto loopOp = createGPULaunchLoops(rootForOp, workgroupID, numWorkGroups);
-
-  // Iterate over the body of the loopOp and get the loops to partition for
-  // thread blocks.
-  SmallVector<OpTy, 1> threadRootForOps;
-  for (Operation &op : *loopOp.getBody()) {
-    if (auto threadRootForOp = dyn_cast<OpTy>(&op)) {
-      threadRootForOps.push_back(threadRootForOp);
-    }
-  }
-
-  SmallVector<Value *, 3> workItemID, workGroupSize;
-  packIdAndNumId(launchOp.getThreadIds(), launchOp.getBlockSize(),
-                 numThreadDims, workItemID, workGroupSize);
-  for (auto &loopOp : threadRootForOps) {
-    builder.setInsertionPoint(loopOp);
-    createGPULaunchLoops(loopOp, workItemID, workGroupSize);
-  }
-  return success();
-}
-
-// Convert the computation rooted at the `rootForOp`, into a GPU kernel with the
-// given workgroup size and number of workgroups.
-template <typename OpTy>
-LogicalResult createLaunchFromOp(OpTy rootForOp,
-                                 ArrayRef<Value *> numWorkGroups,
-                                 ArrayRef<Value *> workGroupSizes) {
-  OpBuilder builder(rootForOp.getOperation());
-  if (numWorkGroups.size() > 3) {
-    return rootForOp.emitError("invalid ")
-           << numWorkGroups.size() << "-D workgroup specification";
-  }
-  auto loc = rootForOp.getLoc();
-  Value *one = builder.create<ConstantOp>(
-      loc, builder.getIntegerAttr(builder.getIndexType(), 1));
-  SmallVector<Value *, 3> numWorkGroups3D(3, one), workGroupSize3D(3, one);
-  for (auto numWorkGroup : enumerate(numWorkGroups)) {
-    numWorkGroups3D[numWorkGroup.index()] = numWorkGroup.value();
-  }
-  for (auto workGroupSize : enumerate(workGroupSizes)) {
-    workGroupSize3D[workGroupSize.index()] = workGroupSize.value();
-  }
-
-  // Get the values used within the region of the rootForOp but defined above
-  // it.
-  llvm::SetVector<Value *> valuesToForwardSet;
-  getUsedValuesDefinedAbove(rootForOp.region(), rootForOp.region(),
-                            valuesToForwardSet);
-  // Also add the values used for the lb, ub, and step of the rootForOp.
-  valuesToForwardSet.insert(rootForOp.getOperands().begin(),
-                            rootForOp.getOperands().end());
-  auto valuesToForward = valuesToForwardSet.takeVector();
-  auto launchOp = builder.create<gpu::LaunchOp>(
-      rootForOp.getLoc(), numWorkGroups3D[0], numWorkGroups3D[1],
-      numWorkGroups3D[2], workGroupSize3D[0], workGroupSize3D[1],
-      workGroupSize3D[2], valuesToForward);
-  if (failed(createLaunchBody(builder, rootForOp, launchOp,
-                              numWorkGroups.size(), workGroupSizes.size()))) {
-    return failure();
-  }
-
-  // Replace values that are used within the region of the launchOp but are
-  // defined outside. They all are replaced with kernel arguments.
-  for (const auto &pair :
-       llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) {
-    Value *from = std::get<0>(pair);
-    Value *to = std::get<1>(pair);
-    replaceAllUsesInRegionWith(from, to, launchOp.body());
-  }
-  return success();
-}
-
-// Replace the rooted at "rootForOp" with a GPU launch operation.  This expects
-// "innermostForOp" to point to the last loop to be transformed to the kernel,
-// and to have (numBlockDims + numThreadDims) perfectly nested loops between
-// "rootForOp" and "innermostForOp".
-// TODO(ravishankarm) : This method can be modified to use the
-// createLaunchFromOp method, since that is a strict generalization of this
-// method.
-template <typename OpTy>
-void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
-                                      unsigned numBlockDims,
-                                      unsigned numThreadDims) {
-  OpBuilder builder(rootForOp.getOperation());
-  // Prepare the grid and block sizes for the launch operation.  If there is
-  // no loop mapped to a specific dimension, use constant "1" as its size.
-  Value *constOne = (numBlockDims < 3 || numThreadDims < 3)
-                        ? builder.create<ConstantIndexOp>(rootForOp.getLoc(), 1)
-                        : nullptr;
-  Value *gridSizeX = dims[0];
-  Value *gridSizeY = numBlockDims > 1 ? dims[1] : constOne;
-  Value *gridSizeZ = numBlockDims > 2 ? dims[2] : constOne;
-  Value *blockSizeX = dims[numBlockDims];
-  Value *blockSizeY = numThreadDims > 1 ? dims[numBlockDims + 1] : constOne;
-  Value *blockSizeZ = numThreadDims > 2 ? dims[numBlockDims + 2] : constOne;
-
-  // Create a launch op and move the body region of the innermost loop to the
-  // launch op.  Pass the values defined outside the outermost loop and used
-  // inside the innermost loop and loop lower bounds as kernel data arguments.
-  // Still assuming perfect nesting so there are no values other than induction
-  // variables that are defined in one loop and used in deeper loops.
-  llvm::SetVector<Value *> valuesToForwardSet;
-  getUsedValuesDefinedAbove(innermostForOp.region(), rootForOp.region(),
-                            valuesToForwardSet);
-  auto valuesToForward = valuesToForwardSet.takeVector();
-  auto originallyForwardedValues = valuesToForward.size();
-  valuesToForward.insert(valuesToForward.end(), lbs.begin(), lbs.end());
-  valuesToForward.insert(valuesToForward.end(), steps.begin(), steps.end());
-  auto launchOp = builder.create<gpu::LaunchOp>(
-      rootForOp.getLoc(), gridSizeX, gridSizeY, gridSizeZ, blockSizeX,
-      blockSizeY, blockSizeZ, valuesToForward);
-  valuesToForward.resize(originallyForwardedValues);
-
-  // Replace the loop terminator (loops contain only a single block) with the
-  // gpu return and move the operations from the loop body block to the gpu
-  // launch body block.  Do not move the entire block because of the difference
-  // in block arguments.
-  Operation &terminator = innermostForOp.getBody()->back();
-  Location terminatorLoc = terminator.getLoc();
-  terminator.erase();
-  builder.setInsertionPointToEnd(innermostForOp.getBody());
-  builder.create<gpu::ReturnOp>(terminatorLoc);
-  launchOp.body().front().getOperations().splice(
-      launchOp.body().front().begin(),
-      innermostForOp.getBody()->getOperations());
-
-  // Remap the loop iterators to use block/thread identifiers instead.  Loops
-  // may iterate from LB with step S whereas GPU thread/block ids always iterate
-  // from 0 to N with step 1.  Therefore, loop induction variables are replaced
-  // with (gpu-thread/block-id * S) + LB.
-  builder.setInsertionPointToStart(&launchOp.body().front());
-  auto lbArgumentIt = std::next(launchOp.getKernelArguments().begin(),
-                                originallyForwardedValues);
-  auto stepArgumentIt = std::next(lbArgumentIt, lbs.size());
-  for (auto en : llvm::enumerate(ivs)) {
-    Value *id =
-        en.index() < numBlockDims
-            ? getDim3Value(launchOp.getBlockIds(), en.index())
-            : getDim3Value(launchOp.getThreadIds(), en.index() - numBlockDims);
-    Value *step = steps[en.index()];
-    if (!isConstantOne(step))
-      id = builder.create<MulIOp>(rootForOp.getLoc(), step, id);
-
-    Value *ivReplacement =
-        builder.create<AddIOp>(rootForOp.getLoc(), *lbArgumentIt, id);
-    en.value()->replaceAllUsesWith(ivReplacement);
-    replaceAllUsesInRegionWith(steps[en.index()], *stepArgumentIt,
-                               launchOp.body());
-    std::advance(lbArgumentIt, 1);
-    std::advance(stepArgumentIt, 1);
-  }
-
-  // Remap the values defined outside the body to use kernel arguments instead.
-  // The list of kernel arguments also contains the lower bounds for loops at
-  // trailing positions, make sure we don't touch those.
-  for (const auto &pair :
-       llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) {
-    Value *from = std::get<0>(pair);
-    Value *to = std::get<1>(pair);
-    replaceAllUsesInRegionWith(from, to, launchOp.body());
-  }
-
-  // We are done and can erase the original outermost loop.
-  rootForOp.erase();
-}
-
-// Generic loop to GPU kernel conversion function.
-template <typename OpTy>
-static LogicalResult convertLoopNestToGPULaunch(OpTy forOp,
-                                                unsigned numBlockDims,
-                                                unsigned numThreadDims) {
-  if (failed(checkLoopNestMappable(forOp, numBlockDims, numThreadDims)))
-    return failure();
-
-  LoopToGpuConverter converter;
-  auto maybeInnerLoop =
-      converter.collectBounds(forOp, numBlockDims + numThreadDims);
-  if (!maybeInnerLoop)
-    return failure();
-  converter.createLaunch(forOp, *maybeInnerLoop, numBlockDims, numThreadDims);
-
-  return success();
-}
-
-// Generic loop to GPU kernel conversion function when loop is imperfectly
-// nested. The workgroup size and num workgroups is provided as input
-template <typename OpTy>
-static LogicalResult convertLoopToGPULaunch(OpTy forOp,
-                                            ArrayRef<Value *> numWorkGroups,
-                                            ArrayRef<Value *> workGroupSize) {
-  if (failed(checkLoopOpMappable(forOp, numWorkGroups.size(),
-                                 workGroupSize.size()))) {
-    return failure();
-  }
-  return createLaunchFromOp(forOp, numWorkGroups, workGroupSize);
-}
-
-LogicalResult mlir::convertAffineLoopNestToGPULaunch(AffineForOp forOp,
-                                                     unsigned numBlockDims,
-                                                     unsigned numThreadDims) {
-  return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
-}
-
-LogicalResult mlir::convertLoopNestToGPULaunch(ForOp forOp,
-                                               unsigned numBlockDims,
-                                               unsigned numThreadDims) {
-  return ::convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);
-}
-
-LogicalResult mlir::convertLoopToGPULaunch(loop::ForOp forOp,
-                                           ArrayRef<Value *> numWorkGroups,
-                                           ArrayRef<Value *> workGroupSizes) {
-  return ::convertLoopToGPULaunch(forOp, numWorkGroups, workGroupSizes);
-}
diff --git a/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
deleted file mode 100644
index 21abc3cf99b..00000000000
--- a/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===- LoopsToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"
-#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Pass/Pass.h"
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/CommandLine.h"
-
-#define PASS_NAME "convert-loops-to-gpu"
-#define LOOPOP_TO_GPU_PASS_NAME "convert-loop-op-to-gpu"
-
-using namespace mlir;
-using namespace mlir::loop;
-
-static llvm::cl::OptionCategory clOptionsCategory(PASS_NAME " options");
-static llvm::cl::opt<unsigned>
-    clNumBlockDims("gpu-block-dims",
-                   llvm::cl::desc("Number of GPU block dimensions for mapping"),
-                   llvm::cl::cat(clOptionsCategory), llvm::cl::init(1u));
-static llvm::cl::opt<unsigned> clNumThreadDims(
-    "gpu-thread-dims",
-    llvm::cl::desc("Number of GPU thread dimensions for mapping"),
-    llvm::cl::cat(clOptionsCategory), llvm::cl::init(1u));
-
-static llvm::cl::OptionCategory clLoopOpToGPUCategory(LOOPOP_TO_GPU_PASS_NAME
-                                                      " options");
-static llvm::cl::list<unsigned>
-    clNumWorkGroups("gpu-num-workgroups",
-                    llvm::cl::desc("Num workgroups in the GPU launch"),
-                    llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
-                    llvm::cl::cat(clLoopOpToGPUCategory));
-static llvm::cl::list<unsigned>
-    clWorkGroupSize("gpu-workgroup-size",
-                    llvm::cl::desc("Workgroup Size in the GPU launch"),
-                    llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
-                    llvm::cl::cat(clLoopOpToGPUCategory));
-
-namespace {
-// A pass that traverses top-level loops in the function and converts them to
-// GPU launch operations.  Nested launches are not allowed, so this does not
-// walk the function recursively to avoid considering nested loops.
-struct ForLoopMapper : public FunctionPass<ForLoopMapper> {
-  ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims)
-      : numBlockDims(numBlockDims), numThreadDims(numThreadDims) {}
-
-  void runOnFunction() override {
-    for (Block &block : getFunction())
-      for (Operation &op : llvm::make_early_inc_range(block)) {
-        if (auto forOp = dyn_cast<AffineForOp>(&op)) {
-          if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
-                                                      numThreadDims)))
-            signalPassFailure();
-        } else if (auto forOp = dyn_cast<ForOp>(&op)) {
-          if (failed(convertLoopNestToGPULaunch(forOp, numBlockDims,
-                                                numThreadDims)))
-            signalPassFailure();
-        }
-      }
-  }
-
-  unsigned numBlockDims;
-  unsigned numThreadDims;
-};
-
-// A pass that traverses top-level loops in the function and convertes them to
-// GPU launch operations. The top-level loops itself does not have to be
-// perfectly nested. The only requirement is that there be as many perfectly
-// nested loops as the size of `numWorkGroups`. Within these any loop nest has
-// to be perfectly nested upto depth equal to size of `workGroupSize`.
-struct ImperfectlyNestedForLoopMapper
-    : public FunctionPass<ImperfectlyNestedForLoopMapper> {
-  ImperfectlyNestedForLoopMapper(ArrayRef<int64_t> numWorkGroups,
-                                 ArrayRef<int64_t> workGroupSize)
-      : numWorkGroups(numWorkGroups.begin(), numWorkGroups.end()),
-        workGroupSize(workGroupSize.begin(), workGroupSize.end()) {}
-
-  void runOnFunction() override {
-    // Insert the num work groups and workgroup sizes as constant values. This
-    // pass is only used for testing.
-    FuncOp funcOp = getFunction();
-    OpBuilder builder(funcOp.getOperation()->getRegion(0));
-    SmallVector<Value *, 3> numWorkGroupsVal, workGroupSizeVal;
-    for (auto val : numWorkGroups) {
-      auto constOp = builder.create<ConstantOp>(
-          funcOp.getLoc(), builder.getIntegerAttr(builder.getIndexType(), val));
-      numWorkGroupsVal.push_back(constOp);
-    }
-    for (auto val : workGroupSize) {
-      auto constOp = builder.create<ConstantOp>(
-          funcOp.getLoc(), builder.getIntegerAttr(builder.getIndexType(), val));
-      workGroupSizeVal.push_back(constOp);
-    }
-    for (Block &block : getFunction()) {
-      for (Operation &op : llvm::make_early_inc_range(block)) {
-        if (auto forOp = dyn_cast<ForOp>(&op)) {
-          if (failed(convertLoopToGPULaunch(forOp, numWorkGroupsVal,
-                                            workGroupSizeVal))) {
-            return signalPassFailure();
-          }
-        }
-      }
-    }
-  }
-  SmallVector<int64_t, 3> numWorkGroups;
-  SmallVector<int64_t, 3> workGroupSize;
-};
-
-} // namespace
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createSimpleLoopsToGPUPass(unsigned numBlockDims,
-                                 unsigned numThreadDims) {
-  return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createLoopToGPUPass(ArrayRef<int64_t> numWorkGroups,
-                          ArrayRef<int64_t> workGroupSize) {
-  return std::make_unique<ImperfectlyNestedForLoopMapper>(numWorkGroups,
-                                                          workGroupSize);
-}
-
-static PassRegistration<ForLoopMapper>
-    registration(PASS_NAME, "Convert top-level loops to GPU kernels", [] {
-      return std::make_unique<ForLoopMapper>(clNumBlockDims.getValue(),
-                                             clNumThreadDims.getValue());
-    });
-
-static PassRegistration<ImperfectlyNestedForLoopMapper> loopOpToGPU(
-    LOOPOP_TO_GPU_PASS_NAME, "Convert top-level loop::ForOp to GPU kernels",
-    [] {
-      SmallVector<int64_t, 3> numWorkGroups, workGroupSize;
-      numWorkGroups.assign(clNumWorkGroups.begin(), clNumWorkGroups.end());
-      workGroupSize.assign(clWorkGroupSize.begin(), clWorkGroupSize.end());
-      return std::make_unique<ImperfectlyNestedForLoopMapper>(numWorkGroups,
-                                                              workGroupSize);
-    });
diff --git a/third_party/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt b/third_party/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
deleted file mode 100644
index 6334c273493..00000000000
--- a/third_party/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-add_llvm_library(MLIRStandardToLLVM
-  ConvertStandardToLLVM.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/StandardToLLVM
-)
-add_dependencies(
-  MLIRStandardToLLVM
-
-  MLIRLoopToStandard
-  MLIRLLVMIR
-  MLIRTransforms
-  LLVMCore
-  LLVMSupport
-)
-target_link_libraries(
-  MLIRStandardToLLVM
-
-  MLIRLoopToStandard
-  MLIRLLVMIR
-  MLIRTransforms
-  LLVMCore
-  LLVMSupport
-)
diff --git a/third_party/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp b/third_party/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp
deleted file mode 100644
index fa6512010c8..00000000000
--- a/third_party/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp
+++ /dev/null
@@ -1,2223 +0,0 @@
-//===- ConvertStandardToLLVM.cpp - Standard to LLVM dialect conversion-----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to convert MLIR standard and builtin dialects
-// into the LLVM IR dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace mlir;
-
-#define PASS_NAME "convert-std-to-llvm"
-
-static llvm::cl::OptionCategory
-    clOptionsCategory("Standard to LLVM lowering options");
-
-static llvm::cl::opt<bool>
-    clUseAlloca(PASS_NAME "-use-alloca",
-                llvm::cl::desc("Replace emission of malloc/free by alloca"),
-                llvm::cl::init(false));
-
-LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx)
-    : llvmDialect(ctx->getRegisteredDialect<LLVM::LLVMDialect>()) {
-  assert(llvmDialect && "LLVM IR dialect is not registered");
-  module = &llvmDialect->getLLVMModule();
-}
-
-// Get the LLVM context.
-llvm::LLVMContext &LLVMTypeConverter::getLLVMContext() {
-  return module->getContext();
-}
-
-// Extract an LLVM IR type from the LLVM IR dialect type.
-LLVM::LLVMType LLVMTypeConverter::unwrap(Type type) {
-  if (!type)
-    return nullptr;
-  auto *mlirContext = type.getContext();
-  auto wrappedLLVMType = type.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedLLVMType)
-    emitError(UnknownLoc::get(mlirContext),
-              "conversion resulted in a non-LLVM type");
-  return wrappedLLVMType;
-}
-
-LLVM::LLVMType LLVMTypeConverter::getIndexType() {
-  return LLVM::LLVMType::getIntNTy(
-      llvmDialect, module->getDataLayout().getPointerSizeInBits());
-}
-
-Type LLVMTypeConverter::convertIndexType(IndexType type) {
-  return getIndexType();
-}
-
-Type LLVMTypeConverter::convertIntegerType(IntegerType type) {
-  return LLVM::LLVMType::getIntNTy(llvmDialect, type.getWidth());
-}
-
-Type LLVMTypeConverter::convertFloatType(FloatType type) {
-  switch (type.getKind()) {
-  case mlir::StandardTypes::F32:
-    return LLVM::LLVMType::getFloatTy(llvmDialect);
-  case mlir::StandardTypes::F64:
-    return LLVM::LLVMType::getDoubleTy(llvmDialect);
-  case mlir::StandardTypes::F16:
-    return LLVM::LLVMType::getHalfTy(llvmDialect);
-  case mlir::StandardTypes::BF16: {
-    auto *mlirContext = llvmDialect->getContext();
-    return emitError(UnknownLoc::get(mlirContext), "unsupported type: BF16"),
-           Type();
-  }
-  default:
-    llvm_unreachable("non-float type in convertFloatType");
-  }
-}
-
-// Except for signatures, MLIR function types are converted into LLVM
-// pointer-to-function types.
-Type LLVMTypeConverter::convertFunctionType(FunctionType type) {
-  SignatureConversion conversion(type.getNumInputs());
-  LLVM::LLVMType converted =
-      convertFunctionSignature(type, /*isVariadic=*/false, conversion);
-  return converted.getPointerTo();
-}
-
-// Function types are converted to LLVM Function types by recursively converting
-// argument and result types.  If MLIR Function has zero results, the LLVM
-// Function has one VoidType result.  If MLIR Function has more than one result,
-// they are into an LLVM StructType in their order of appearance.
-LLVM::LLVMType LLVMTypeConverter::convertFunctionSignature(
-    FunctionType type, bool isVariadic,
-    LLVMTypeConverter::SignatureConversion &result) {
-  // Convert argument types one by one and check for errors.
-  for (auto &en : llvm::enumerate(type.getInputs())) {
-    Type type = en.value();
-    auto converted = convertType(type).dyn_cast_or_null<LLVM::LLVMType>();
-    if (!converted)
-      return {};
-    if (type.isa<MemRefType>() || type.isa<UnrankedMemRefType>())
-      converted = converted.getPointerTo();
-    result.addInputs(en.index(), converted);
-  }
-
-  SmallVector<LLVM::LLVMType, 8> argTypes;
-  argTypes.reserve(llvm::size(result.getConvertedTypes()));
-  for (Type type : result.getConvertedTypes())
-    argTypes.push_back(unwrap(type));
-
-  // If function does not return anything, create the void result type,
-  // if it returns on element, convert it, otherwise pack the result types into
-  // a struct.
-  LLVM::LLVMType resultType =
-      type.getNumResults() == 0
-          ? LLVM::LLVMType::getVoidTy(llvmDialect)
-          : unwrap(packFunctionResults(type.getResults()));
-  if (!resultType)
-    return {};
-  return LLVM::LLVMType::getFunctionTy(resultType, argTypes, isVariadic);
-}
-
-// Convert a MemRef to an LLVM type. The result is a MemRef descriptor which
-// contains:
-//   1. the pointer to the data buffer, followed by
-//   2.  a lowered `index`-type integer containing the distance between the
-//   beginning of the buffer and the first element to be accessed through the
-//   view, followed by
-//   3. an array containing as many `index`-type integers as the rank of the
-//   MemRef: the array represents the size, in number of elements, of the memref
-//   along the given dimension. For constant MemRef dimensions, the
-//   corresponding size entry is a constant whose runtime value must match the
-//   static value, followed by
-//   4. a second array containing as many `index`-type integers as the rank of
-//   the MemRef: the second array represents the "stride" (in tensor abstraction
-//   sense), i.e. the number of consecutive elements of the underlying buffer.
-//   TODO(ntv, zinenko): add assertions for the static cases.
-//
-// template <typename Elem, size_t Rank>
-// struct {
-//   Elem *allocatedPtr;
-//   Elem *alignedPtr;
-//   int64_t offset;
-//   int64_t sizes[Rank]; // omitted when rank == 0
-//   int64_t strides[Rank]; // omitted when rank == 0
-// };
-static constexpr unsigned kAllocatedPtrPosInMemRefDescriptor = 0;
-static constexpr unsigned kAlignedPtrPosInMemRefDescriptor = 1;
-static constexpr unsigned kOffsetPosInMemRefDescriptor = 2;
-static constexpr unsigned kSizePosInMemRefDescriptor = 3;
-static constexpr unsigned kStridePosInMemRefDescriptor = 4;
-Type LLVMTypeConverter::convertMemRefType(MemRefType type) {
-  int64_t offset;
-  SmallVector<int64_t, 4> strides;
-  bool strideSuccess = succeeded(getStridesAndOffset(type, strides, offset));
-  assert(strideSuccess &&
-         "Non-strided layout maps must have been normalized away");
-  (void)strideSuccess;
-  LLVM::LLVMType elementType = unwrap(convertType(type.getElementType()));
-  if (!elementType)
-    return {};
-  auto ptrTy = elementType.getPointerTo(type.getMemorySpace());
-  auto indexTy = getIndexType();
-  auto rank = type.getRank();
-  if (rank > 0) {
-    auto arrayTy = LLVM::LLVMType::getArrayTy(indexTy, type.getRank());
-    return LLVM::LLVMType::getStructTy(ptrTy, ptrTy, indexTy, arrayTy, arrayTy);
-  }
-  return LLVM::LLVMType::getStructTy(ptrTy, ptrTy, indexTy);
-}
-
-// Converts UnrankedMemRefType to LLVMType. The result is a descriptor which
-// contains:
-// 1. int64_t rank, the dynamic rank of this MemRef
-// 2. void* ptr, pointer to the static ranked MemRef descriptor. This will be
-//    stack allocated (alloca) copy of a MemRef descriptor that got casted to
-//    be unranked.
-
-static constexpr unsigned kRankInUnrankedMemRefDescriptor = 0;
-static constexpr unsigned kPtrInUnrankedMemRefDescriptor = 1;
-
-Type LLVMTypeConverter::convertUnrankedMemRefType(UnrankedMemRefType type) {
-  auto rankTy = LLVM::LLVMType::getInt64Ty(llvmDialect);
-  auto ptrTy = LLVM::LLVMType::getInt8PtrTy(llvmDialect);
-  return LLVM::LLVMType::getStructTy(rankTy, ptrTy);
-}
-
-// Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when
-// n > 1.
-// For example, `vector<4 x f32>` converts to `!llvm.type<"<4 x float>">` and
-// `vector<4 x 8 x 16 f32>` converts to `!llvm<"[4 x [8 x <16 x float>]]">`.
-Type LLVMTypeConverter::convertVectorType(VectorType type) {
-  auto elementType = unwrap(convertType(type.getElementType()));
-  if (!elementType)
-    return {};
-  auto vectorType =
-      LLVM::LLVMType::getVectorTy(elementType, type.getShape().back());
-  auto shape = type.getShape();
-  for (int i = shape.size() - 2; i >= 0; --i)
-    vectorType = LLVM::LLVMType::getArrayTy(vectorType, shape[i]);
-  return vectorType;
-}
-
-// Dispatch based on the actual type.  Return null type on error.
-Type LLVMTypeConverter::convertStandardType(Type type) {
-  if (auto funcType = type.dyn_cast<FunctionType>())
-    return convertFunctionType(funcType);
-  if (auto intType = type.dyn_cast<IntegerType>())
-    return convertIntegerType(intType);
-  if (auto floatType = type.dyn_cast<FloatType>())
-    return convertFloatType(floatType);
-  if (auto indexType = type.dyn_cast<IndexType>())
-    return convertIndexType(indexType);
-  if (auto memRefType = type.dyn_cast<MemRefType>())
-    return convertMemRefType(memRefType);
-  if (auto memRefType = type.dyn_cast<UnrankedMemRefType>())
-    return convertUnrankedMemRefType(memRefType);
-  if (auto vectorType = type.dyn_cast<VectorType>())
-    return convertVectorType(vectorType);
-  if (auto llvmType = type.dyn_cast<LLVM::LLVMType>())
-    return llvmType;
-
-  return {};
-}
-
-// Convert the element type of the memref `t` to to an LLVM type using
-// `lowering`, get a pointer LLVM type pointing to the converted `t`, wrap it
-// into the MLIR LLVM dialect type and return.
-static Type getMemRefElementPtrType(MemRefType t, LLVMTypeConverter &lowering) {
-  auto elementType = t.getElementType();
-  auto converted = lowering.convertType(elementType);
-  if (!converted)
-    return {};
-  return converted.cast<LLVM::LLVMType>().getPointerTo(t.getMemorySpace());
-}
-
-LLVMOpLowering::LLVMOpLowering(StringRef rootOpName, MLIRContext *context,
-                               LLVMTypeConverter &lowering_,
-                               PatternBenefit benefit)
-    : ConversionPattern(rootOpName, benefit, context), lowering(lowering_) {}
-
-/*============================================================================*/
-/* StructBuilder implementation                                               */
-/*============================================================================*/
-StructBuilder::StructBuilder(Value *v) : value(v) {
-  assert(value != nullptr && "value cannot be null");
-  structType = value->getType().cast<LLVM::LLVMType>();
-}
-
-Value *StructBuilder::extractPtr(OpBuilder &builder, Location loc,
-                                 unsigned pos) {
-  Type type = structType.cast<LLVM::LLVMType>().getStructElementType(pos);
-  return builder.create<LLVM::ExtractValueOp>(loc, type, value,
-                                              builder.getI64ArrayAttr(pos));
-}
-
-void StructBuilder::setPtr(OpBuilder &builder, Location loc, unsigned pos,
-                           Value *ptr) {
-  value = builder.create<LLVM::InsertValueOp>(loc, structType, value, ptr,
-                                              builder.getI64ArrayAttr(pos));
-}
-/*============================================================================*/
-/* MemRefDescriptor implementation                                            */
-/*============================================================================*/
-
-/// Construct a helper for the given descriptor value.
-MemRefDescriptor::MemRefDescriptor(Value *descriptor)
-    : StructBuilder(descriptor) {
-  assert(value != nullptr && "value cannot be null");
-  indexType = value->getType().cast<LLVM::LLVMType>().getStructElementType(
-      kOffsetPosInMemRefDescriptor);
-}
-
-/// Builds IR creating an `undef` value of the descriptor type.
-MemRefDescriptor MemRefDescriptor::undef(OpBuilder &builder, Location loc,
-                                         Type descriptorType) {
-
-  Value *descriptor =
-      builder.create<LLVM::UndefOp>(loc, descriptorType.cast<LLVM::LLVMType>());
-  return MemRefDescriptor(descriptor);
-}
-
-/// Builds IR creating a MemRef descriptor that represents `type` and
-/// populates it with static shape and stride information extracted from the
-/// type.
-MemRefDescriptor
-MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc,
-                                  LLVMTypeConverter &typeConverter,
-                                  MemRefType type, Value *memory) {
-  assert(type.hasStaticShape() && "unexpected dynamic shape");
-  assert(type.getAffineMaps().empty() && "unexpected layout map");
-
-  auto convertedType = typeConverter.convertType(type);
-  assert(convertedType && "unexpected failure in memref type conversion");
-
-  auto descr = MemRefDescriptor::undef(builder, loc, convertedType);
-  descr.setAllocatedPtr(builder, loc, memory);
-  descr.setAlignedPtr(builder, loc, memory);
-  descr.setConstantOffset(builder, loc, 0);
-
-  // Fill in sizes and strides, in reverse order to simplify stride
-  // calculation.
-  uint64_t runningStride = 1;
-  for (unsigned i = type.getRank(); i > 0; --i) {
-    unsigned dim = i - 1;
-    descr.setConstantSize(builder, loc, dim, type.getDimSize(dim));
-    descr.setConstantStride(builder, loc, dim, runningStride);
-    runningStride *= type.getDimSize(dim);
-  }
-  return descr;
-}
-
-/// Builds IR extracting the allocated pointer from the descriptor.
-Value *MemRefDescriptor::allocatedPtr(OpBuilder &builder, Location loc) {
-  return extractPtr(builder, loc, kAllocatedPtrPosInMemRefDescriptor);
-}
-
-/// Builds IR inserting the allocated pointer into the descriptor.
-void MemRefDescriptor::setAllocatedPtr(OpBuilder &builder, Location loc,
-                                       Value *ptr) {
-  setPtr(builder, loc, kAllocatedPtrPosInMemRefDescriptor, ptr);
-}
-
-/// Builds IR extracting the aligned pointer from the descriptor.
-Value *MemRefDescriptor::alignedPtr(OpBuilder &builder, Location loc) {
-  return extractPtr(builder, loc, kAlignedPtrPosInMemRefDescriptor);
-}
-
-/// Builds IR inserting the aligned pointer into the descriptor.
-void MemRefDescriptor::setAlignedPtr(OpBuilder &builder, Location loc,
-                                     Value *ptr) {
-  setPtr(builder, loc, kAlignedPtrPosInMemRefDescriptor, ptr);
-}
-
-// Creates a constant Op producing a value of `resultType` from an index-typed
-// integer attribute.
-static Value *createIndexAttrConstant(OpBuilder &builder, Location loc,
-                                      Type resultType, int64_t value) {
-  return builder.create<LLVM::ConstantOp>(
-      loc, resultType, builder.getIntegerAttr(builder.getIndexType(), value));
-}
-
-/// Builds IR extracting the offset from the descriptor.
-Value *MemRefDescriptor::offset(OpBuilder &builder, Location loc) {
-  return builder.create<LLVM::ExtractValueOp>(
-      loc, indexType, value,
-      builder.getI64ArrayAttr(kOffsetPosInMemRefDescriptor));
-}
-
-/// Builds IR inserting the offset into the descriptor.
-void MemRefDescriptor::setOffset(OpBuilder &builder, Location loc,
-                                 Value *offset) {
-  value = builder.create<LLVM::InsertValueOp>(
-      loc, structType, value, offset,
-      builder.getI64ArrayAttr(kOffsetPosInMemRefDescriptor));
-}
-
-/// Builds IR inserting the offset into the descriptor.
-void MemRefDescriptor::setConstantOffset(OpBuilder &builder, Location loc,
-                                         uint64_t offset) {
-  setOffset(builder, loc,
-            createIndexAttrConstant(builder, loc, indexType, offset));
-}
-
-/// Builds IR extracting the pos-th size from the descriptor.
-Value *MemRefDescriptor::size(OpBuilder &builder, Location loc, unsigned pos) {
-  return builder.create<LLVM::ExtractValueOp>(
-      loc, indexType, value,
-      builder.getI64ArrayAttr({kSizePosInMemRefDescriptor, pos}));
-}
-
-/// Builds IR inserting the pos-th size into the descriptor
-void MemRefDescriptor::setSize(OpBuilder &builder, Location loc, unsigned pos,
-                               Value *size) {
-  value = builder.create<LLVM::InsertValueOp>(
-      loc, structType, value, size,
-      builder.getI64ArrayAttr({kSizePosInMemRefDescriptor, pos}));
-}
-
-/// Builds IR inserting the pos-th size into the descriptor
-void MemRefDescriptor::setConstantSize(OpBuilder &builder, Location loc,
-                                       unsigned pos, uint64_t size) {
-  setSize(builder, loc, pos,
-          createIndexAttrConstant(builder, loc, indexType, size));
-}
-
-/// Builds IR extracting the pos-th size from the descriptor.
-Value *MemRefDescriptor::stride(OpBuilder &builder, Location loc,
-                                unsigned pos) {
-  return builder.create<LLVM::ExtractValueOp>(
-      loc, indexType, value,
-      builder.getI64ArrayAttr({kStridePosInMemRefDescriptor, pos}));
-}
-
-/// Builds IR inserting the pos-th stride into the descriptor
-void MemRefDescriptor::setStride(OpBuilder &builder, Location loc, unsigned pos,
-                                 Value *stride) {
-  value = builder.create<LLVM::InsertValueOp>(
-      loc, structType, value, stride,
-      builder.getI64ArrayAttr({kStridePosInMemRefDescriptor, pos}));
-}
-
-/// Builds IR inserting the pos-th stride into the descriptor
-void MemRefDescriptor::setConstantStride(OpBuilder &builder, Location loc,
-                                         unsigned pos, uint64_t stride) {
-  setStride(builder, loc, pos,
-            createIndexAttrConstant(builder, loc, indexType, stride));
-}
-
-LLVM::LLVMType MemRefDescriptor::getElementType() {
-  return value->getType().cast<LLVM::LLVMType>().getStructElementType(
-      kAlignedPtrPosInMemRefDescriptor);
-}
-
-/*============================================================================*/
-/* UnrankedMemRefDescriptor implementation                                    */
-/*============================================================================*/
-
-/// Construct a helper for the given descriptor value.
-UnrankedMemRefDescriptor::UnrankedMemRefDescriptor(Value *descriptor)
-    : StructBuilder(descriptor) {}
-
-/// Builds IR creating an `undef` value of the descriptor type.
-UnrankedMemRefDescriptor UnrankedMemRefDescriptor::undef(OpBuilder &builder,
-                                                         Location loc,
-                                                         Type descriptorType) {
-  Value *descriptor =
-      builder.create<LLVM::UndefOp>(loc, descriptorType.cast<LLVM::LLVMType>());
-  return UnrankedMemRefDescriptor(descriptor);
-}
-Value *UnrankedMemRefDescriptor::rank(OpBuilder &builder, Location loc) {
-  return extractPtr(builder, loc, kRankInUnrankedMemRefDescriptor);
-}
-void UnrankedMemRefDescriptor::setRank(OpBuilder &builder, Location loc,
-                                       Value *v) {
-  setPtr(builder, loc, kRankInUnrankedMemRefDescriptor, v);
-}
-Value *UnrankedMemRefDescriptor::memRefDescPtr(OpBuilder &builder,
-                                               Location loc) {
-  return extractPtr(builder, loc, kPtrInUnrankedMemRefDescriptor);
-}
-void UnrankedMemRefDescriptor::setMemRefDescPtr(OpBuilder &builder,
-                                                Location loc, Value *v) {
-  setPtr(builder, loc, kPtrInUnrankedMemRefDescriptor, v);
-}
-namespace {
-// Base class for Standard to LLVM IR op conversions.  Matches the Op type
-// provided as template argument.  Carries a reference to the LLVM dialect in
-// case it is necessary for rewriters.
-template <typename SourceOp>
-class LLVMLegalizationPattern : public LLVMOpLowering {
-public:
-  // Construct a conversion pattern.
-  explicit LLVMLegalizationPattern(LLVM::LLVMDialect &dialect_,
-                                   LLVMTypeConverter &lowering_)
-      : LLVMOpLowering(SourceOp::getOperationName(), dialect_.getContext(),
-                       lowering_),
-        dialect(dialect_) {}
-
-  // Get the LLVM IR dialect.
-  LLVM::LLVMDialect &getDialect() const { return dialect; }
-  // Get the LLVM context.
-  llvm::LLVMContext &getContext() const { return dialect.getLLVMContext(); }
-  // Get the LLVM module in which the types are constructed.
-  llvm::Module &getModule() const { return dialect.getLLVMModule(); }
-
-  // Get the MLIR type wrapping the LLVM integer type whose bit width is defined
-  // by the pointer size used in the LLVM module.
-  LLVM::LLVMType getIndexType() const {
-    return LLVM::LLVMType::getIntNTy(
-        &dialect, getModule().getDataLayout().getPointerSizeInBits());
-  }
-
-  LLVM::LLVMType getVoidType() const {
-    return LLVM::LLVMType::getVoidTy(&dialect);
-  }
-
-  // Get the MLIR type wrapping the LLVM i8* type.
-  LLVM::LLVMType getVoidPtrType() const {
-    return LLVM::LLVMType::getInt8PtrTy(&dialect);
-  }
-
-  // Create an LLVM IR pseudo-operation defining the given index constant.
-  Value *createIndexConstant(ConversionPatternRewriter &builder, Location loc,
-                             uint64_t value) const {
-    return createIndexAttrConstant(builder, loc, getIndexType(), value);
-  }
-
-protected:
-  LLVM::LLVMDialect &dialect;
-};
-
-struct FuncOpConversion : public LLVMLegalizationPattern<FuncOp> {
-  using LLVMLegalizationPattern<FuncOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto funcOp = cast<FuncOp>(op);
-    FunctionType type = funcOp.getType();
-
-    // Store the positions of memref-typed arguments so that we can emit loads
-    // from them to follow the calling convention.
-    SmallVector<unsigned, 4> promotedArgIndices;
-    promotedArgIndices.reserve(type.getNumInputs());
-    for (auto en : llvm::enumerate(type.getInputs())) {
-      if (en.value().isa<MemRefType>() || en.value().isa<UnrankedMemRefType>())
-        promotedArgIndices.push_back(en.index());
-    }
-
-    // Convert the original function arguments. Struct arguments are promoted to
-    // pointer to struct arguments to allow calling external functions with
-    // various ABIs (e.g. compiled from C/C++ on platform X).
-    auto varargsAttr = funcOp.getAttrOfType<BoolAttr>("std.varargs");
-    TypeConverter::SignatureConversion result(funcOp.getNumArguments());
-    auto llvmType = lowering.convertFunctionSignature(
-        funcOp.getType(), varargsAttr && varargsAttr.getValue(), result);
-
-    // Only retain those attributes that are not constructed by build.
-    SmallVector<NamedAttribute, 4> attributes;
-    for (const auto &attr : funcOp.getAttrs()) {
-      if (attr.first.is(SymbolTable::getSymbolAttrName()) ||
-          attr.first.is(impl::getTypeAttrName()) ||
-          attr.first.is("std.varargs"))
-        continue;
-      attributes.push_back(attr);
-    }
-
-    // Create an LLVM function, use external linkage by default until MLIR
-    // functions have linkage.
-    auto newFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
-        op->getLoc(), funcOp.getName(), llvmType, LLVM::Linkage::External,
-        attributes);
-    rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
-                                newFuncOp.end());
-
-    // Tell the rewriter to convert the region signature.
-    rewriter.applySignatureConversion(&newFuncOp.getBody(), result);
-
-    // Insert loads from memref descriptor pointers in function bodies.
-    if (!newFuncOp.getBody().empty()) {
-      Block *firstBlock = &newFuncOp.getBody().front();
-      rewriter.setInsertionPoint(firstBlock, firstBlock->begin());
-      for (unsigned idx : promotedArgIndices) {
-        BlockArgument *arg = firstBlock->getArgument(idx);
-        Value *loaded = rewriter.create<LLVM::LoadOp>(funcOp.getLoc(), arg);
-        rewriter.replaceUsesOfBlockArgument(arg, loaded);
-      }
-    }
-
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-
-//////////////// Support for Lowering operations on n-D vectors ////////////////
-namespace {
-// Helper struct to "unroll" operations on n-D vectors in terms of operations on
-// 1-D LLVM vectors.
-struct NDVectorTypeInfo {
-  // LLVM array struct which encodes n-D vectors.
-  LLVM::LLVMType llvmArrayTy;
-  // LLVM vector type which encodes the inner 1-D vector type.
-  LLVM::LLVMType llvmVectorTy;
-  // Multiplicity of llvmArrayTy to llvmVectorTy.
-  SmallVector<int64_t, 4> arraySizes;
-};
-} // namespace
-
-// For >1-D vector types, extracts the necessary information to iterate over all
-// 1-D subvectors in the underlying llrepresentation of the n-D vector
-// Iterates on the llvm array type until we hit a non-array type (which is
-// asserted to be an llvm vector type).
-static NDVectorTypeInfo extractNDVectorTypeInfo(VectorType vectorType,
-                                                LLVMTypeConverter &converter) {
-  assert(vectorType.getRank() > 1 && "expected >1D vector type");
-  NDVectorTypeInfo info;
-  info.llvmArrayTy =
-      converter.convertType(vectorType).dyn_cast<LLVM::LLVMType>();
-  if (!info.llvmArrayTy)
-    return info;
-  info.arraySizes.reserve(vectorType.getRank() - 1);
-  auto llvmTy = info.llvmArrayTy;
-  while (llvmTy.isArrayTy()) {
-    info.arraySizes.push_back(llvmTy.getArrayNumElements());
-    llvmTy = llvmTy.getArrayElementType();
-  }
-  if (!llvmTy.isVectorTy())
-    return info;
-  info.llvmVectorTy = llvmTy;
-  return info;
-}
-
-// Express `linearIndex` in terms of coordinates of `basis`.
-// Returns the empty vector when linearIndex is out of the range [0, P] where
-// P is the product of all the basis coordinates.
-//
-// Prerequisites:
-//   Basis is an array of nonnegative integers (signed type inherited from
-//   vector shape type).
-static SmallVector<int64_t, 4> getCoordinates(ArrayRef<int64_t> basis,
-                                              unsigned linearIndex) {
-  SmallVector<int64_t, 4> res;
-  res.reserve(basis.size());
-  for (unsigned basisElement : llvm::reverse(basis)) {
-    res.push_back(linearIndex % basisElement);
-    linearIndex = linearIndex / basisElement;
-  }
-  if (linearIndex > 0)
-    return {};
-  std::reverse(res.begin(), res.end());
-  return res;
-}
-
-// Iterate of linear index, convert to coords space and insert splatted 1-D
-// vector in each position.
-template <typename Lambda>
-void nDVectorIterate(const NDVectorTypeInfo &info, OpBuilder &builder,
-                     Lambda fun) {
-  unsigned ub = 1;
-  for (auto s : info.arraySizes)
-    ub *= s;
-  for (unsigned linearIndex = 0; linearIndex < ub; ++linearIndex) {
-    auto coords = getCoordinates(info.arraySizes, linearIndex);
-    // Linear index is out of bounds, we are done.
-    if (coords.empty())
-      break;
-    assert(coords.size() == info.arraySizes.size());
-    auto position = builder.getI64ArrayAttr(coords);
-    fun(position);
-  }
-}
-////////////// End Support for Lowering operations on n-D vectors //////////////
-
-// Basic lowering implementation for one-to-one rewriting from Standard Ops to
-// LLVM Dialect Ops.
-template <typename SourceOp, typename TargetOp>
-struct OneToOneLLVMOpLowering : public LLVMLegalizationPattern<SourceOp> {
-  using LLVMLegalizationPattern<SourceOp>::LLVMLegalizationPattern;
-  using Super = OneToOneLLVMOpLowering<SourceOp, TargetOp>;
-
-  // Convert the type of the result to an LLVM type, pass operands as is,
-  // preserve attributes.
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    unsigned numResults = op->getNumResults();
-
-    Type packedType;
-    if (numResults != 0) {
-      packedType = this->lowering.packFunctionResults(
-          llvm::to_vector<4>(op->getResultTypes()));
-      if (!packedType)
-        return this->matchFailure();
-    }
-
-    auto newOp = rewriter.create<TargetOp>(op->getLoc(), packedType, operands,
-                                           op->getAttrs());
-
-    // If the operation produced 0 or 1 result, return them immediately.
-    if (numResults == 0)
-      return rewriter.eraseOp(op), this->matchSuccess();
-    if (numResults == 1)
-      return rewriter.replaceOp(op, newOp.getOperation()->getResult(0)),
-             this->matchSuccess();
-
-    // Otherwise, it had been converted to an operation producing a structure.
-    // Extract individual results from the structure and return them as list.
-    SmallVector<Value *, 4> results;
-    results.reserve(numResults);
-    for (unsigned i = 0; i < numResults; ++i) {
-      auto type = this->lowering.convertType(op->getResult(i)->getType());
-      results.push_back(rewriter.create<LLVM::ExtractValueOp>(
-          op->getLoc(), type, newOp.getOperation()->getResult(0),
-          rewriter.getI64ArrayAttr(i)));
-    }
-    rewriter.replaceOp(op, results);
-    return this->matchSuccess();
-  }
-};
-
-template <typename SourceOp, unsigned OpCount> struct OpCountValidator {
-  static_assert(
-      std::is_base_of<
-          typename OpTrait::NOperands<OpCount>::template Impl<SourceOp>,
-          SourceOp>::value,
-      "wrong operand count");
-};
-
-template <typename SourceOp> struct OpCountValidator<SourceOp, 1> {
-  static_assert(std::is_base_of<OpTrait::OneOperand<SourceOp>, SourceOp>::value,
-                "expected a single operand");
-};
-
-template <typename SourceOp, unsigned OpCount> void ValidateOpCount() {
-  OpCountValidator<SourceOp, OpCount>();
-}
-
-// Basic lowering implementation for rewriting from Standard Ops to LLVM Dialect
-// Ops for N-ary ops with one result. This supports higher-dimensional vector
-// types.
-template <typename SourceOp, typename TargetOp, unsigned OpCount>
-struct NaryOpLLVMOpLowering : public LLVMLegalizationPattern<SourceOp> {
-  using LLVMLegalizationPattern<SourceOp>::LLVMLegalizationPattern;
-  using Super = NaryOpLLVMOpLowering<SourceOp, TargetOp, OpCount>;
-
-  // Convert the type of the result to an LLVM type, pass operands as is,
-  // preserve attributes.
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    ValidateOpCount<SourceOp, OpCount>();
-    static_assert(
-        std::is_base_of<OpTrait::OneResult<SourceOp>, SourceOp>::value,
-        "expected single result op");
-    static_assert(std::is_base_of<OpTrait::SameOperandsAndResultType<SourceOp>,
-                                  SourceOp>::value,
-                  "expected same operands and result type");
-
-    // Cannot convert ops if their operands are not of LLVM type.
-    for (Value *operand : operands) {
-      if (!operand || !operand->getType().isa<LLVM::LLVMType>())
-        return this->matchFailure();
-    }
-
-    auto loc = op->getLoc();
-    auto llvmArrayTy = operands[0]->getType().cast<LLVM::LLVMType>();
-
-    if (!llvmArrayTy.isArrayTy()) {
-      auto newOp = rewriter.create<TargetOp>(
-          op->getLoc(), operands[0]->getType(), operands, op->getAttrs());
-      rewriter.replaceOp(op, newOp.getResult());
-      return this->matchSuccess();
-    }
-
-    auto vectorType = op->getResult(0)->getType().dyn_cast<VectorType>();
-    if (!vectorType)
-      return this->matchFailure();
-    auto vectorTypeInfo = extractNDVectorTypeInfo(vectorType, this->lowering);
-    auto llvmVectorTy = vectorTypeInfo.llvmVectorTy;
-    if (!llvmVectorTy || llvmArrayTy != vectorTypeInfo.llvmArrayTy)
-      return this->matchFailure();
-
-    Value *desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayTy);
-    nDVectorIterate(vectorTypeInfo, rewriter, [&](ArrayAttr position) {
-      // For this unrolled `position` corresponding to the `linearIndex`^th
-      // element, extract operand vectors
-      SmallVector<Value *, OpCount> extractedOperands;
-      for (unsigned i = 0; i < OpCount; ++i) {
-        extractedOperands.push_back(rewriter.create<LLVM::ExtractValueOp>(
-            loc, llvmVectorTy, operands[i], position));
-      }
-      Value *newVal = rewriter.create<TargetOp>(
-          loc, llvmVectorTy, extractedOperands, op->getAttrs());
-      desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmArrayTy, desc,
-                                                  newVal, position);
-    });
-    rewriter.replaceOp(op, desc);
-    return this->matchSuccess();
-  }
-};
-
-template <typename SourceOp, typename TargetOp>
-using UnaryOpLLVMOpLowering = NaryOpLLVMOpLowering<SourceOp, TargetOp, 1>;
-template <typename SourceOp, typename TargetOp>
-using BinaryOpLLVMOpLowering = NaryOpLLVMOpLowering<SourceOp, TargetOp, 2>;
-
-// Specific lowerings.
-// FIXME: this should be tablegen'ed.
-struct ExpOpLowering : public UnaryOpLLVMOpLowering<ExpOp, LLVM::ExpOp> {
-  using Super::Super;
-};
-struct LogOpLowering : public UnaryOpLLVMOpLowering<LogOp, LLVM::LogOp> {
-  using Super::Super;
-};
-struct Log10OpLowering : public UnaryOpLLVMOpLowering<Log10Op, LLVM::Log10Op> {
-  using Super::Super;
-};
-struct Log2OpLowering : public UnaryOpLLVMOpLowering<Log2Op, LLVM::Log2Op> {
-  using Super::Super;
-};
-struct AddIOpLowering : public BinaryOpLLVMOpLowering<AddIOp, LLVM::AddOp> {
-  using Super::Super;
-};
-struct SubIOpLowering : public BinaryOpLLVMOpLowering<SubIOp, LLVM::SubOp> {
-  using Super::Super;
-};
-struct MulIOpLowering : public BinaryOpLLVMOpLowering<MulIOp, LLVM::MulOp> {
-  using Super::Super;
-};
-struct DivISOpLowering : public BinaryOpLLVMOpLowering<DivISOp, LLVM::SDivOp> {
-  using Super::Super;
-};
-struct DivIUOpLowering : public BinaryOpLLVMOpLowering<DivIUOp, LLVM::UDivOp> {
-  using Super::Super;
-};
-struct RemISOpLowering : public BinaryOpLLVMOpLowering<RemISOp, LLVM::SRemOp> {
-  using Super::Super;
-};
-struct RemIUOpLowering : public BinaryOpLLVMOpLowering<RemIUOp, LLVM::URemOp> {
-  using Super::Super;
-};
-struct AndOpLowering : public BinaryOpLLVMOpLowering<AndOp, LLVM::AndOp> {
-  using Super::Super;
-};
-struct OrOpLowering : public BinaryOpLLVMOpLowering<OrOp, LLVM::OrOp> {
-  using Super::Super;
-};
-struct XOrOpLowering : public BinaryOpLLVMOpLowering<XOrOp, LLVM::XOrOp> {
-  using Super::Super;
-};
-struct AddFOpLowering : public BinaryOpLLVMOpLowering<AddFOp, LLVM::FAddOp> {
-  using Super::Super;
-};
-struct SubFOpLowering : public BinaryOpLLVMOpLowering<SubFOp, LLVM::FSubOp> {
-  using Super::Super;
-};
-struct MulFOpLowering : public BinaryOpLLVMOpLowering<MulFOp, LLVM::FMulOp> {
-  using Super::Super;
-};
-struct DivFOpLowering : public BinaryOpLLVMOpLowering<DivFOp, LLVM::FDivOp> {
-  using Super::Super;
-};
-struct RemFOpLowering : public BinaryOpLLVMOpLowering<RemFOp, LLVM::FRemOp> {
-  using Super::Super;
-};
-struct SelectOpLowering
-    : public OneToOneLLVMOpLowering<SelectOp, LLVM::SelectOp> {
-  using Super::Super;
-};
-struct ConstLLVMOpLowering
-    : public OneToOneLLVMOpLowering<ConstantOp, LLVM::ConstantOp> {
-  using Super::Super;
-};
-
-// Check if the MemRefType `type` is supported by the lowering. We currently
-// only support memrefs with identity maps.
-static bool isSupportedMemRefType(MemRefType type) {
-  return type.getAffineMaps().empty() ||
-         llvm::all_of(type.getAffineMaps(),
-                      [](AffineMap map) { return map.isIdentity(); });
-}
-
-// An `alloc` is converted into a definition of a memref descriptor value and
-// a call to `malloc` to allocate the underlying data buffer.  The memref
-// descriptor is of the LLVM structure type where:
-//   1. the first element is a pointer to the allocated (typed) data buffer,
-//   2. the second element is a pointer to the (typed) payload, aligned to the
-//      specified alignment,
-//   3. the remaining elements serve to store all the sizes and strides of the
-//      memref using LLVM-converted `index` type.
-//
-// Alignment is obtained by allocating `alignment - 1` more bytes than requested
-// and shifting the aligned pointer relative to the allocated memory. If
-// alignment is unspecified, the two pointers are equal.
-struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> {
-  using LLVMLegalizationPattern<AllocOp>::LLVMLegalizationPattern;
-
-  AllocOpLowering(LLVM::LLVMDialect &dialect_, LLVMTypeConverter &converter,
-                  bool useAlloca = false)
-      : LLVMLegalizationPattern<AllocOp>(dialect_, converter),
-        useAlloca(useAlloca) {}
-
-  PatternMatchResult match(Operation *op) const override {
-    MemRefType type = cast<AllocOp>(op).getType();
-    if (isSupportedMemRefType(type))
-      return matchSuccess();
-
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides = getStridesAndOffset(type, strides, offset);
-    if (failed(successStrides))
-      return matchFailure();
-
-    // Dynamic strides are ok if they can be deduced from dynamic sizes (which
-    // is guaranteed when succeeded(successStrides)). Dynamic offset however can
-    // never be alloc'ed.
-    if (offset == MemRefType::getDynamicStrideOrOffset())
-      return matchFailure();
-
-    return matchSuccess();
-  }
-
-  void rewrite(Operation *op, ArrayRef<Value *> operands,
-               ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto allocOp = cast<AllocOp>(op);
-    MemRefType type = allocOp.getType();
-
-    // Get actual sizes of the memref as values: static sizes are constant
-    // values and dynamic sizes are passed to 'alloc' as operands.  In case of
-    // zero-dimensional memref, assume a scalar (size 1).
-    SmallVector<Value *, 4> sizes;
-    sizes.reserve(type.getRank());
-    unsigned i = 0;
-    for (int64_t s : type.getShape())
-      sizes.push_back(s == -1 ? operands[i++]
-                              : createIndexConstant(rewriter, loc, s));
-    if (sizes.empty())
-      sizes.push_back(createIndexConstant(rewriter, loc, 1));
-
-    // Compute the total number of memref elements.
-    Value *cumulativeSize = sizes.front();
-    for (unsigned i = 1, e = sizes.size(); i < e; ++i)
-      cumulativeSize = rewriter.create<LLVM::MulOp>(
-          loc, getIndexType(), ArrayRef<Value *>{cumulativeSize, sizes[i]});
-
-    // Compute the size of an individual element. This emits the MLIR equivalent
-    // of the following sizeof(...) implementation in LLVM IR:
-    //   %0 = getelementptr %elementType* null, %indexType 1
-    //   %1 = ptrtoint %elementType* %0 to %indexType
-    // which is a common pattern of getting the size of a type in bytes.
-    auto elementType = type.getElementType();
-    auto convertedPtrType =
-        lowering.convertType(elementType).cast<LLVM::LLVMType>().getPointerTo();
-    auto nullPtr = rewriter.create<LLVM::NullOp>(loc, convertedPtrType);
-    auto one = createIndexConstant(rewriter, loc, 1);
-    auto gep = rewriter.create<LLVM::GEPOp>(loc, convertedPtrType,
-                                            ArrayRef<Value *>{nullPtr, one});
-    auto elementSize =
-        rewriter.create<LLVM::PtrToIntOp>(loc, getIndexType(), gep);
-    cumulativeSize = rewriter.create<LLVM::MulOp>(
-        loc, getIndexType(), ArrayRef<Value *>{cumulativeSize, elementSize});
-
-    // Allocate the underlying buffer and store a pointer to it in the MemRef
-    // descriptor.
-    Value *allocated = nullptr;
-    int alignment = 0;
-    Value *alignmentValue = nullptr;
-    if (auto alignAttr = allocOp.alignment())
-      alignment = alignAttr.getValue().getSExtValue();
-
-    if (useAlloca) {
-      allocated = rewriter.create<LLVM::AllocaOp>(loc, getVoidPtrType(),
-                                                  cumulativeSize, alignment);
-    } else {
-      // Insert the `malloc` declaration if it is not already present.
-      auto module = op->getParentOfType<ModuleOp>();
-      auto mallocFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("malloc");
-      if (!mallocFunc) {
-        OpBuilder moduleBuilder(
-            op->getParentOfType<ModuleOp>().getBodyRegion());
-        mallocFunc = moduleBuilder.create<LLVM::LLVMFuncOp>(
-            rewriter.getUnknownLoc(), "malloc",
-            LLVM::LLVMType::getFunctionTy(getVoidPtrType(), getIndexType(),
-                                          /*isVarArg=*/false));
-      }
-      if (alignment != 0) {
-        alignmentValue = createIndexConstant(rewriter, loc, alignment);
-        cumulativeSize = rewriter.create<LLVM::SubOp>(
-            loc,
-            rewriter.create<LLVM::AddOp>(loc, cumulativeSize, alignmentValue),
-            one);
-      }
-      allocated = rewriter
-                      .create<LLVM::CallOp>(
-                          loc, getVoidPtrType(),
-                          rewriter.getSymbolRefAttr(mallocFunc), cumulativeSize)
-                      .getResult(0);
-    }
-
-    auto structElementType = lowering.convertType(elementType);
-    auto elementPtrType = structElementType.cast<LLVM::LLVMType>().getPointerTo(
-        type.getMemorySpace());
-    Value *bitcastAllocated = rewriter.create<LLVM::BitcastOp>(
-        loc, elementPtrType, ArrayRef<Value *>(allocated));
-
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides = getStridesAndOffset(type, strides, offset);
-    assert(succeeded(successStrides) && "unexpected non-strided memref");
-    (void)successStrides;
-    assert(offset != MemRefType::getDynamicStrideOrOffset() &&
-           "unexpected dynamic offset");
-
-    // 0-D memref corner case: they have size 1 ...
-    assert(((type.getRank() == 0 && strides.empty() && sizes.size() == 1) ||
-            (strides.size() == sizes.size())) &&
-           "unexpected number of strides");
-
-    // Create the MemRef descriptor.
-    auto structType = lowering.convertType(type);
-    auto memRefDescriptor = MemRefDescriptor::undef(rewriter, loc, structType);
-    // Field 1: Allocated pointer, used for malloc/free.
-    memRefDescriptor.setAllocatedPtr(rewriter, loc, bitcastAllocated);
-
-    // Field 2: Actual aligned pointer to payload.
-    Value *bitcastAligned = bitcastAllocated;
-    if (!useAlloca && alignment != 0) {
-      assert(alignmentValue);
-      // offset = (align - (ptr % align))% align
-      Value *intVal = rewriter.create<LLVM::PtrToIntOp>(
-          loc, this->getIndexType(), allocated);
-      Value *ptrModAlign =
-          rewriter.create<LLVM::URemOp>(loc, intVal, alignmentValue);
-      Value *subbed =
-          rewriter.create<LLVM::SubOp>(loc, alignmentValue, ptrModAlign);
-      Value *offset =
-          rewriter.create<LLVM::URemOp>(loc, subbed, alignmentValue);
-      Value *aligned = rewriter.create<LLVM::GEPOp>(loc, allocated->getType(),
-                                                    allocated, offset);
-      bitcastAligned = rewriter.create<LLVM::BitcastOp>(
-          loc, elementPtrType, ArrayRef<Value *>(aligned));
-    }
-    memRefDescriptor.setAlignedPtr(rewriter, loc, bitcastAligned);
-
-    // Field 3: Offset in aligned pointer.
-    memRefDescriptor.setOffset(rewriter, loc,
-                               createIndexConstant(rewriter, loc, offset));
-
-    if (type.getRank() == 0)
-      // No size/stride descriptor in memref, return the descriptor value.
-      return rewriter.replaceOp(op, {memRefDescriptor});
-
-    // Fields 4 and 5: Sizes and strides of the strided MemRef.
-    // Store all sizes in the descriptor. Only dynamic sizes are passed in as
-    // operands to AllocOp.
-    Value *runningStride = nullptr;
-    // Iterate strides in reverse order, compute runningStride and strideValues.
-    auto nStrides = strides.size();
-    SmallVector<Value *, 4> strideValues(nStrides, nullptr);
-    for (auto indexedStride : llvm::enumerate(llvm::reverse(strides))) {
-      int64_t index = nStrides - 1 - indexedStride.index();
-      if (strides[index] == MemRefType::getDynamicStrideOrOffset())
-        // Identity layout map is enforced in the match function, so we compute:
-        //   `runningStride *= sizes[index]`
-        runningStride =
-            runningStride
-                ? rewriter.create<LLVM::MulOp>(loc, runningStride, sizes[index])
-                : createIndexConstant(rewriter, loc, 1);
-      else
-        runningStride = createIndexConstant(rewriter, loc, strides[index]);
-      strideValues[index] = runningStride;
-    }
-    // Fill size and stride descriptors in memref.
-    for (auto indexedSize : llvm::enumerate(sizes)) {
-      int64_t index = indexedSize.index();
-      memRefDescriptor.setSize(rewriter, loc, index, indexedSize.value());
-      memRefDescriptor.setStride(rewriter, loc, index, strideValues[index]);
-    }
-
-    // Return the final value of the descriptor.
-    rewriter.replaceOp(op, {memRefDescriptor});
-  }
-
-  bool useAlloca;
-};
-
-// A CallOp automatically promotes MemRefType to a sequence of alloca/store and
-// passes the pointer to the MemRef across function boundaries.
-template <typename CallOpType>
-struct CallOpInterfaceLowering : public LLVMLegalizationPattern<CallOpType> {
-  using LLVMLegalizationPattern<CallOpType>::LLVMLegalizationPattern;
-  using Super = CallOpInterfaceLowering<CallOpType>;
-  using Base = LLVMLegalizationPattern<CallOpType>;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    OperandAdaptor<CallOpType> transformed(operands);
-    auto callOp = cast<CallOpType>(op);
-
-    // Pack the result types into a struct.
-    Type packedResult;
-    unsigned numResults = callOp.getNumResults();
-    auto resultTypes = llvm::to_vector<4>(callOp.getResultTypes());
-
-    for (Type resType : resultTypes) {
-      assert(!resType.isa<UnrankedMemRefType>() &&
-             "Returning unranked memref is not supported. Pass result as an"
-             "argument instead.");
-      (void)resType;
-    }
-
-    if (numResults != 0) {
-      if (!(packedResult = this->lowering.packFunctionResults(resultTypes)))
-        return this->matchFailure();
-    }
-
-    auto promoted = this->lowering.promoteMemRefDescriptors(
-        op->getLoc(), /*opOperands=*/op->getOperands(), operands, rewriter);
-    auto newOp = rewriter.create<LLVM::CallOp>(op->getLoc(), packedResult,
-                                               promoted, op->getAttrs());
-
-    // If < 2 results, packing did not do anything and we can just return.
-    if (numResults < 2) {
-      rewriter.replaceOp(op, newOp.getResults());
-      return this->matchSuccess();
-    }
-
-    // Otherwise, it had been converted to an operation producing a structure.
-    // Extract individual results from the structure and return them as list.
-    // TODO(aminim, ntv, riverriddle, zinenko): this seems like patching around
-    // a particular interaction between MemRefType and CallOp lowering. Find a
-    // way to avoid special casing.
-    SmallVector<Value *, 4> results;
-    results.reserve(numResults);
-    for (unsigned i = 0; i < numResults; ++i) {
-      auto type = this->lowering.convertType(op->getResult(i)->getType());
-      results.push_back(rewriter.create<LLVM::ExtractValueOp>(
-          op->getLoc(), type, newOp.getOperation()->getResult(0),
-          rewriter.getI64ArrayAttr(i)));
-    }
-    rewriter.replaceOp(op, results);
-
-    return this->matchSuccess();
-  }
-};
-
-struct CallOpLowering : public CallOpInterfaceLowering<CallOp> {
-  using Super::Super;
-};
-
-struct CallIndirectOpLowering : public CallOpInterfaceLowering<CallIndirectOp> {
-  using Super::Super;
-};
-
-// A `dealloc` is converted into a call to `free` on the underlying data buffer.
-// The memref descriptor being an SSA value, there is no need to clean it up
-// in any way.
-struct DeallocOpLowering : public LLVMLegalizationPattern<DeallocOp> {
-  using LLVMLegalizationPattern<DeallocOp>::LLVMLegalizationPattern;
-
-  DeallocOpLowering(LLVM::LLVMDialect &dialect_, LLVMTypeConverter &converter,
-                    bool useAlloca = false)
-      : LLVMLegalizationPattern<DeallocOp>(dialect_, converter),
-        useAlloca(useAlloca) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    if (useAlloca)
-      return rewriter.eraseOp(op), matchSuccess();
-
-    assert(operands.size() == 1 && "dealloc takes one operand");
-    OperandAdaptor<DeallocOp> transformed(operands);
-
-    // Insert the `free` declaration if it is not already present.
-    auto freeFunc =
-        op->getParentOfType<ModuleOp>().lookupSymbol<LLVM::LLVMFuncOp>("free");
-    if (!freeFunc) {
-      OpBuilder moduleBuilder(op->getParentOfType<ModuleOp>().getBodyRegion());
-      freeFunc = moduleBuilder.create<LLVM::LLVMFuncOp>(
-          rewriter.getUnknownLoc(), "free",
-          LLVM::LLVMType::getFunctionTy(getVoidType(), getVoidPtrType(),
-                                        /*isVarArg=*/false));
-    }
-
-    MemRefDescriptor memref(transformed.memref());
-    Value *casted = rewriter.create<LLVM::BitcastOp>(
-        op->getLoc(), getVoidPtrType(),
-        memref.allocatedPtr(rewriter, op->getLoc()));
-    rewriter.replaceOpWithNewOp<LLVM::CallOp>(
-        op, ArrayRef<Type>(), rewriter.getSymbolRefAttr(freeFunc), casted);
-    return matchSuccess();
-  }
-
-  bool useAlloca;
-};
-
-// A `tanh` is converted into a call to the `tanh` function.
-struct TanhOpLowering : public LLVMLegalizationPattern<TanhOp> {
-  using LLVMLegalizationPattern<TanhOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-
-    using LLVMFuncOpT = LLVM::LLVMFuncOp;
-    using LLVMTypeT = LLVM::LLVMType;
-
-    OperandAdaptor<TanhOp> transformed(operands);
-    LLVMTypeT operandType =
-        transformed.operand()->getType().dyn_cast_or_null<LLVM::LLVMType>();
-
-    if (!operandType)
-      return matchFailure();
-
-    std::string functionName;
-    if (operandType.isFloatTy())
-      functionName = "tanhf";
-    else if (operandType.isDoubleTy())
-      functionName = "tanh";
-    else
-      return matchFailure();
-
-    // Get a reference to the tanh function, inserting it if necessary.
-    Operation *tanhFunc =
-        SymbolTable::lookupNearestSymbolFrom(op, functionName);
-
-    LLVMFuncOpT tanhLLVMFunc;
-    if (tanhFunc) {
-      tanhLLVMFunc = cast<LLVMFuncOpT>(tanhFunc);
-    } else {
-      PatternRewriter::InsertionGuard insertGuard(rewriter);
-      auto module = op->getParentOfType<ModuleOp>();
-      rewriter.setInsertionPointToStart(module.getBody());
-      tanhLLVMFunc = rewriter.create<LLVMFuncOpT>(
-          module.getLoc(), functionName,
-          LLVMTypeT::getFunctionTy(operandType, operandType,
-                                   /*isVarArg=*/false));
-    }
-
-    rewriter.replaceOpWithNewOp<LLVM::CallOp>(
-        op, operandType, rewriter.getSymbolRefAttr(tanhLLVMFunc),
-        transformed.operand());
-    return matchSuccess();
-  }
-};
-
-struct MemRefCastOpLowering : public LLVMLegalizationPattern<MemRefCastOp> {
-  using LLVMLegalizationPattern<MemRefCastOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult match(Operation *op) const override {
-    auto memRefCastOp = cast<MemRefCastOp>(op);
-    Type srcType = memRefCastOp.getOperand()->getType();
-    Type dstType = memRefCastOp.getType();
-
-    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>()) {
-      MemRefType sourceType =
-          memRefCastOp.getOperand()->getType().cast<MemRefType>();
-      MemRefType targetType = memRefCastOp.getType().cast<MemRefType>();
-      return (isSupportedMemRefType(targetType) &&
-              isSupportedMemRefType(sourceType))
-                 ? matchSuccess()
-                 : matchFailure();
-    }
-
-    // At least one of the operands is unranked type
-    assert(srcType.isa<UnrankedMemRefType>() ||
-           dstType.isa<UnrankedMemRefType>());
-
-    // Unranked to unranked cast is disallowed
-    return !(srcType.isa<UnrankedMemRefType>() &&
-             dstType.isa<UnrankedMemRefType>())
-               ? matchSuccess()
-               : matchFailure();
-  }
-
-  void rewrite(Operation *op, ArrayRef<Value *> operands,
-               ConversionPatternRewriter &rewriter) const override {
-    auto memRefCastOp = cast<MemRefCastOp>(op);
-    OperandAdaptor<MemRefCastOp> transformed(operands);
-
-    auto srcType = memRefCastOp.getOperand()->getType();
-    auto dstType = memRefCastOp.getType();
-    auto targetStructType = lowering.convertType(memRefCastOp.getType());
-    auto loc = op->getLoc();
-
-    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>()) {
-      // memref_cast is defined for source and destination memref types with the
-      // same element type, same mappings, same address space and same rank.
-      // Therefore a simple bitcast suffices. If not it is undefined behavior.
-      rewriter.replaceOpWithNewOp<LLVM::BitcastOp>(op, targetStructType,
-                                                   transformed.source());
-    } else if (srcType.isa<MemRefType>() && dstType.isa<UnrankedMemRefType>()) {
-      // Casting ranked to unranked memref type
-      // Set the rank in the destination from the memref type
-      // Allocate space on the stack and copy the src memref descriptor
-      // Set the ptr in the destination to the stack space
-      auto srcMemRefType = srcType.cast<MemRefType>();
-      int64_t rank = srcMemRefType.getRank();
-      // ptr = AllocaOp sizeof(MemRefDescriptor)
-      auto ptr = lowering.promoteOneMemRefDescriptor(loc, transformed.source(),
-                                                     rewriter);
-      // voidptr = BitCastOp srcType* to void*
-      auto voidPtr =
-          rewriter.create<LLVM::BitcastOp>(loc, getVoidPtrType(), ptr)
-              .getResult();
-      // rank = ConstantOp srcRank
-      auto rankVal = rewriter.create<LLVM::ConstantOp>(
-          loc, lowering.convertType(rewriter.getIntegerType(64)),
-          rewriter.getI64IntegerAttr(rank));
-      // undef = UndefOp
-      UnrankedMemRefDescriptor memRefDesc =
-          UnrankedMemRefDescriptor::undef(rewriter, loc, targetStructType);
-      // d1 = InsertValueOp undef, rank, 0
-      memRefDesc.setRank(rewriter, loc, rankVal);
-      // d2 = InsertValueOp d1, voidptr, 1
-      memRefDesc.setMemRefDescPtr(rewriter, loc, voidPtr);
-      rewriter.replaceOp(op, (Value *)memRefDesc);
-
-    } else if (srcType.isa<UnrankedMemRefType>() && dstType.isa<MemRefType>()) {
-      // Casting from unranked type to ranked.
-      // The operation is assumed to be doing a correct cast. If the destination
-      // type mismatches the unranked the type, it is undefined behavior.
-      UnrankedMemRefDescriptor memRefDesc(transformed.source());
-      // ptr = ExtractValueOp src, 1
-      auto ptr = memRefDesc.memRefDescPtr(rewriter, loc);
-      // castPtr = BitCastOp i8* to structTy*
-      auto castPtr =
-          rewriter
-              .create<LLVM::BitcastOp>(
-                  loc, targetStructType.cast<LLVM::LLVMType>().getPointerTo(),
-                  ptr)
-              .getResult();
-      // struct = LoadOp castPtr
-      auto loadOp = rewriter.create<LLVM::LoadOp>(loc, castPtr);
-      rewriter.replaceOp(op, loadOp.getResult());
-    } else {
-      llvm_unreachable("Unsuppored unranked memref to unranked memref cast");
-    }
-  }
-};
-
-// A `dim` is converted to a constant for static sizes and to an access to the
-// size stored in the memref descriptor for dynamic sizes.
-struct DimOpLowering : public LLVMLegalizationPattern<DimOp> {
-  using LLVMLegalizationPattern<DimOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto dimOp = cast<DimOp>(op);
-    OperandAdaptor<DimOp> transformed(operands);
-    MemRefType type = dimOp.getOperand()->getType().cast<MemRefType>();
-
-    auto shape = type.getShape();
-    int64_t index = dimOp.getIndex();
-    // Extract dynamic size from the memref descriptor.
-    if (ShapedType::isDynamic(shape[index]))
-      rewriter.replaceOp(op, {MemRefDescriptor(transformed.memrefOrTensor())
-                                  .size(rewriter, op->getLoc(), index)});
-    else
-      // Use constant for static size.
-      rewriter.replaceOp(
-          op, createIndexConstant(rewriter, op->getLoc(), shape[index]));
-    return matchSuccess();
-  }
-};
-
-// Common base for load and store operations on MemRefs.  Restricts the match
-// to supported MemRef types.  Provides functionality to emit code accessing a
-// specific element of the underlying data buffer.
-template <typename Derived>
-struct LoadStoreOpLowering : public LLVMLegalizationPattern<Derived> {
-  using LLVMLegalizationPattern<Derived>::LLVMLegalizationPattern;
-  using Base = LoadStoreOpLowering<Derived>;
-
-  PatternMatchResult match(Operation *op) const override {
-    MemRefType type = cast<Derived>(op).getMemRefType();
-    return isSupportedMemRefType(type) ? this->matchSuccess()
-                                       : this->matchFailure();
-  }
-
-  // Given subscript indices and array sizes in row-major order,
-  //   i_n, i_{n-1}, ..., i_1
-  //   s_n, s_{n-1}, ..., s_1
-  // obtain a value that corresponds to the linearized subscript
-  //   \sum_k i_k * \prod_{j=1}^{k-1} s_j
-  // by accumulating the running linearized value.
-  // Note that `indices` and `allocSizes` are passed in the same order as they
-  // appear in load/store operations and memref type declarations.
-  Value *linearizeSubscripts(ConversionPatternRewriter &builder, Location loc,
-                             ArrayRef<Value *> indices,
-                             ArrayRef<Value *> allocSizes) const {
-    assert(indices.size() == allocSizes.size() &&
-           "mismatching number of indices and allocation sizes");
-    assert(!indices.empty() && "cannot linearize a 0-dimensional access");
-
-    Value *linearized = indices.front();
-    for (int i = 1, nSizes = allocSizes.size(); i < nSizes; ++i) {
-      linearized = builder.create<LLVM::MulOp>(
-          loc, this->getIndexType(),
-          ArrayRef<Value *>{linearized, allocSizes[i]});
-      linearized = builder.create<LLVM::AddOp>(
-          loc, this->getIndexType(), ArrayRef<Value *>{linearized, indices[i]});
-    }
-    return linearized;
-  }
-
-  // This is a strided getElementPtr variant that linearizes subscripts as:
-  //   `base_offset + index_0 * stride_0 + ... + index_n * stride_n`.
-  Value *getStridedElementPtr(Location loc, Type elementTypePtr,
-                              Value *descriptor, ArrayRef<Value *> indices,
-                              ArrayRef<int64_t> strides, int64_t offset,
-                              ConversionPatternRewriter &rewriter) const {
-    MemRefDescriptor memRefDescriptor(descriptor);
-
-    Value *base = memRefDescriptor.alignedPtr(rewriter, loc);
-    Value *offsetValue = offset == MemRefType::getDynamicStrideOrOffset()
-                             ? memRefDescriptor.offset(rewriter, loc)
-                             : this->createIndexConstant(rewriter, loc, offset);
-
-    for (int i = 0, e = indices.size(); i < e; ++i) {
-      Value *stride =
-          strides[i] == MemRefType::getDynamicStrideOrOffset()
-              ? memRefDescriptor.stride(rewriter, loc, i)
-              : this->createIndexConstant(rewriter, loc, strides[i]);
-      Value *additionalOffset =
-          rewriter.create<LLVM::MulOp>(loc, indices[i], stride);
-      offsetValue =
-          rewriter.create<LLVM::AddOp>(loc, offsetValue, additionalOffset);
-    }
-    return rewriter.create<LLVM::GEPOp>(loc, elementTypePtr, base, offsetValue);
-  }
-
-  Value *getDataPtr(Location loc, MemRefType type, Value *memRefDesc,
-                    ArrayRef<Value *> indices,
-                    ConversionPatternRewriter &rewriter,
-                    llvm::Module &module) const {
-    auto ptrType = getMemRefElementPtrType(type, this->lowering);
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides = getStridesAndOffset(type, strides, offset);
-    assert(succeeded(successStrides) && "unexpected non-strided memref");
-    (void)successStrides;
-    return getStridedElementPtr(loc, ptrType, memRefDesc, indices, strides,
-                                offset, rewriter);
-  }
-};
-
-// Load operation is lowered to obtaining a pointer to the indexed element
-// and loading it.
-struct LoadOpLowering : public LoadStoreOpLowering<LoadOp> {
-  using Base::Base;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loadOp = cast<LoadOp>(op);
-    OperandAdaptor<LoadOp> transformed(operands);
-    auto type = loadOp.getMemRefType();
-
-    Value *dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(),
-                                transformed.indices(), rewriter, getModule());
-    rewriter.replaceOpWithNewOp<LLVM::LoadOp>(op, dataPtr);
-    return matchSuccess();
-  }
-};
-
-// Store operation is lowered to obtaining a pointer to the indexed element,
-// and storing the given value to it.
-struct StoreOpLowering : public LoadStoreOpLowering<StoreOp> {
-  using Base::Base;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto type = cast<StoreOp>(op).getMemRefType();
-    OperandAdaptor<StoreOp> transformed(operands);
-
-    Value *dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(),
-                                transformed.indices(), rewriter, getModule());
-    rewriter.replaceOpWithNewOp<LLVM::StoreOp>(op, transformed.value(),
-                                               dataPtr);
-    return matchSuccess();
-  }
-};
-
-// The lowering of index_cast becomes an integer conversion since index becomes
-// an integer.  If the bit width of the source and target integer types is the
-// same, just erase the cast.  If the target type is wider, sign-extend the
-// value, otherwise truncate it.
-struct IndexCastOpLowering : public LLVMLegalizationPattern<IndexCastOp> {
-  using LLVMLegalizationPattern<IndexCastOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    IndexCastOpOperandAdaptor transformed(operands);
-    auto indexCastOp = cast<IndexCastOp>(op);
-
-    auto targetType =
-        this->lowering.convertType(indexCastOp.getResult()->getType())
-            .cast<LLVM::LLVMType>();
-    auto sourceType = transformed.in()->getType().cast<LLVM::LLVMType>();
-    unsigned targetBits = targetType.getUnderlyingType()->getIntegerBitWidth();
-    unsigned sourceBits = sourceType.getUnderlyingType()->getIntegerBitWidth();
-
-    if (targetBits == sourceBits)
-      rewriter.replaceOp(op, transformed.in());
-    else if (targetBits < sourceBits)
-      rewriter.replaceOpWithNewOp<LLVM::TruncOp>(op, targetType,
-                                                 transformed.in());
-    else
-      rewriter.replaceOpWithNewOp<LLVM::SExtOp>(op, targetType,
-                                                transformed.in());
-    return matchSuccess();
-  }
-};
-
-// Convert std.cmp predicate into the LLVM dialect CmpPredicate.  The two
-// enums share the numerical values so just cast.
-template <typename LLVMPredType, typename StdPredType>
-static LLVMPredType convertCmpPredicate(StdPredType pred) {
-  return static_cast<LLVMPredType>(pred);
-}
-
-struct CmpIOpLowering : public LLVMLegalizationPattern<CmpIOp> {
-  using LLVMLegalizationPattern<CmpIOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto cmpiOp = cast<CmpIOp>(op);
-    CmpIOpOperandAdaptor transformed(operands);
-
-    rewriter.replaceOpWithNewOp<LLVM::ICmpOp>(
-        op, lowering.convertType(cmpiOp.getResult()->getType()),
-        rewriter.getI64IntegerAttr(static_cast<int64_t>(
-            convertCmpPredicate<LLVM::ICmpPredicate>(cmpiOp.getPredicate()))),
-        transformed.lhs(), transformed.rhs());
-
-    return matchSuccess();
-  }
-};
-
-struct CmpFOpLowering : public LLVMLegalizationPattern<CmpFOp> {
-  using LLVMLegalizationPattern<CmpFOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto cmpfOp = cast<CmpFOp>(op);
-    CmpFOpOperandAdaptor transformed(operands);
-
-    rewriter.replaceOpWithNewOp<LLVM::FCmpOp>(
-        op, lowering.convertType(cmpfOp.getResult()->getType()),
-        rewriter.getI64IntegerAttr(static_cast<int64_t>(
-            convertCmpPredicate<LLVM::FCmpPredicate>(cmpfOp.getPredicate()))),
-        transformed.lhs(), transformed.rhs());
-
-    return matchSuccess();
-  }
-};
-
-struct SIToFPLowering
-    : public OneToOneLLVMOpLowering<SIToFPOp, LLVM::SIToFPOp> {
-  using Super::Super;
-};
-
-struct FPExtLowering : public OneToOneLLVMOpLowering<FPExtOp, LLVM::FPExtOp> {
-  using Super::Super;
-};
-
-struct FPTruncLowering
-    : public OneToOneLLVMOpLowering<FPTruncOp, LLVM::FPTruncOp> {
-  using Super::Super;
-};
-
-struct SignExtendIOpLowering
-    : public OneToOneLLVMOpLowering<SignExtendIOp, LLVM::SExtOp> {
-  using Super::Super;
-};
-
-struct TruncateIOpLowering
-    : public OneToOneLLVMOpLowering<TruncateIOp, LLVM::TruncOp> {
-  using Super::Super;
-};
-
-struct ZeroExtendIOpLowering
-    : public OneToOneLLVMOpLowering<ZeroExtendIOp, LLVM::ZExtOp> {
-  using Super::Super;
-};
-
-// Base class for LLVM IR lowering terminator operations with successors.
-template <typename SourceOp, typename TargetOp>
-struct OneToOneLLVMTerminatorLowering
-    : public LLVMLegalizationPattern<SourceOp> {
-  using LLVMLegalizationPattern<SourceOp>::LLVMLegalizationPattern;
-  using Super = OneToOneLLVMTerminatorLowering<SourceOp, TargetOp>;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> properOperands,
-                  ArrayRef<Block *> destinations,
-                  ArrayRef<ArrayRef<Value *>> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    SmallVector<ValueRange, 2> operandRanges(operands.begin(), operands.end());
-    rewriter.replaceOpWithNewOp<TargetOp>(op, properOperands, destinations,
-                                          operandRanges, op->getAttrs());
-    return this->matchSuccess();
-  }
-};
-
-// Special lowering pattern for `ReturnOps`.  Unlike all other operations,
-// `ReturnOp` interacts with the function signature and must have as many
-// operands as the function has return values.  Because in LLVM IR, functions
-// can only return 0 or 1 value, we pack multiple values into a structure type.
-// Emit `UndefOp` followed by `InsertValueOp`s to create such structure if
-// necessary before returning it
-struct ReturnOpLowering : public LLVMLegalizationPattern<ReturnOp> {
-  using LLVMLegalizationPattern<ReturnOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    unsigned numArguments = op->getNumOperands();
-
-    // If ReturnOp has 0 or 1 operand, create it and return immediately.
-    if (numArguments == 0) {
-      rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, llvm::ArrayRef<Value *>(),
-                                                  llvm::ArrayRef<Block *>(),
-                                                  op->getAttrs());
-      return matchSuccess();
-    }
-    if (numArguments == 1) {
-      rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
-          op, llvm::ArrayRef<Value *>(operands.front()),
-          llvm::ArrayRef<Block *>(), op->getAttrs());
-      return matchSuccess();
-    }
-
-    // Otherwise, we need to pack the arguments into an LLVM struct type before
-    // returning.
-    auto packedType =
-        lowering.packFunctionResults(llvm::to_vector<4>(op->getOperandTypes()));
-
-    Value *packed = rewriter.create<LLVM::UndefOp>(op->getLoc(), packedType);
-    for (unsigned i = 0; i < numArguments; ++i) {
-      packed = rewriter.create<LLVM::InsertValueOp>(
-          op->getLoc(), packedType, packed, operands[i],
-          rewriter.getI64ArrayAttr(i));
-    }
-    rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, llvm::makeArrayRef(packed),
-                                                llvm::ArrayRef<Block *>(),
-                                                op->getAttrs());
-    return matchSuccess();
-  }
-};
-
-// FIXME: this should be tablegen'ed as well.
-struct BranchOpLowering
-    : public OneToOneLLVMTerminatorLowering<BranchOp, LLVM::BrOp> {
-  using Super::Super;
-};
-struct CondBranchOpLowering
-    : public OneToOneLLVMTerminatorLowering<CondBranchOp, LLVM::CondBrOp> {
-  using Super::Super;
-};
-
-// The Splat operation is lowered to an insertelement + a shufflevector
-// operation. Splat to only 1-d vector result types are lowered.
-struct SplatOpLowering : public LLVMLegalizationPattern<SplatOp> {
-  using LLVMLegalizationPattern<SplatOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto splatOp = cast<SplatOp>(op);
-    VectorType resultType = splatOp.getType().dyn_cast<VectorType>();
-    if (!resultType || resultType.getRank() != 1)
-      return matchFailure();
-
-    // First insert it into an undef vector so we can shuffle it.
-    auto vectorType = lowering.convertType(splatOp.getType());
-    Value *undef = rewriter.create<LLVM::UndefOp>(op->getLoc(), vectorType);
-    auto zero = rewriter.create<LLVM::ConstantOp>(
-        op->getLoc(), lowering.convertType(rewriter.getIntegerType(32)),
-        rewriter.getZeroAttr(rewriter.getIntegerType(32)));
-
-    auto v = rewriter.create<LLVM::InsertElementOp>(
-        op->getLoc(), vectorType, undef, splatOp.getOperand(), zero);
-
-    int64_t width = splatOp.getType().cast<VectorType>().getDimSize(0);
-    SmallVector<int32_t, 4> zeroValues(width, 0);
-
-    // Shuffle the value across the desired number of elements.
-    ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues);
-    rewriter.replaceOpWithNewOp<LLVM::ShuffleVectorOp>(op, v, undef, zeroAttrs);
-    return matchSuccess();
-  }
-};
-
-// The Splat operation is lowered to an insertelement + a shufflevector
-// operation. Splat to only 2+-d vector result types are lowered by the
-// SplatNdOpLowering, the 1-d case is handled by SplatOpLowering.
-struct SplatNdOpLowering : public LLVMLegalizationPattern<SplatOp> {
-  using LLVMLegalizationPattern<SplatOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto splatOp = cast<SplatOp>(op);
-    OperandAdaptor<SplatOp> adaptor(operands);
-    VectorType resultType = splatOp.getType().dyn_cast<VectorType>();
-    if (!resultType || resultType.getRank() == 1)
-      return matchFailure();
-
-    // First insert it into an undef vector so we can shuffle it.
-    auto loc = op->getLoc();
-    auto vectorTypeInfo = extractNDVectorTypeInfo(resultType, lowering);
-    auto llvmArrayTy = vectorTypeInfo.llvmArrayTy;
-    auto llvmVectorTy = vectorTypeInfo.llvmVectorTy;
-    if (!llvmArrayTy || !llvmVectorTy)
-      return matchFailure();
-
-    // Construct returned value.
-    Value *desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayTy);
-
-    // Construct a 1-D vector with the splatted value that we insert in all the
-    // places within the returned descriptor.
-    Value *vdesc = rewriter.create<LLVM::UndefOp>(loc, llvmVectorTy);
-    auto zero = rewriter.create<LLVM::ConstantOp>(
-        loc, lowering.convertType(rewriter.getIntegerType(32)),
-        rewriter.getZeroAttr(rewriter.getIntegerType(32)));
-    Value *v = rewriter.create<LLVM::InsertElementOp>(loc, llvmVectorTy, vdesc,
-                                                      adaptor.input(), zero);
-
-    // Shuffle the value across the desired number of elements.
-    int64_t width = resultType.getDimSize(resultType.getRank() - 1);
-    SmallVector<int32_t, 4> zeroValues(width, 0);
-    ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues);
-    v = rewriter.create<LLVM::ShuffleVectorOp>(loc, v, v, zeroAttrs);
-
-    // Iterate of linear index, convert to coords space and insert splatted 1-D
-    // vector in each position.
-    nDVectorIterate(vectorTypeInfo, rewriter, [&](ArrayAttr position) {
-      desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmArrayTy, desc, v,
-                                                  position);
-    });
-    rewriter.replaceOp(op, desc);
-    return matchSuccess();
-  }
-};
-
-/// Conversion pattern that transforms a subview op into:
-///   1. An `llvm.mlir.undef` operation to create a memref descriptor
-///   2. Updates to the descriptor to introduce the data ptr, offset, size
-///      and stride.
-/// The subview op is replaced by the descriptor.
-struct SubViewOpLowering : public LLVMLegalizationPattern<SubViewOp> {
-  using LLVMLegalizationPattern<SubViewOp>::LLVMLegalizationPattern;
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto viewOp = cast<SubViewOp>(op);
-    // TODO(b/144779634, ravishankarm) : After Tblgen is adapted to support
-    // having multiple variadic operands where each operand can have different
-    // number of entries, clean all of this up.
-    SmallVector<Value *, 2> dynamicOffsets(
-        std::next(operands.begin()),
-        std::next(operands.begin(), 1 + viewOp.getNumOffsets()));
-    SmallVector<Value *, 2> dynamicSizes(
-        std::next(operands.begin(), 1 + viewOp.getNumOffsets()),
-        std::next(operands.begin(),
-                  1 + viewOp.getNumOffsets() + viewOp.getNumSizes()));
-    SmallVector<Value *, 2> dynamicStrides(
-        std::next(operands.begin(),
-                  1 + viewOp.getNumOffsets() + viewOp.getNumSizes()),
-        operands.end());
-
-    auto sourceMemRefType = viewOp.source()->getType().cast<MemRefType>();
-    auto sourceElementTy =
-        lowering.convertType(sourceMemRefType.getElementType())
-            .dyn_cast_or_null<LLVM::LLVMType>();
-
-    auto viewMemRefType = viewOp.getType();
-    auto targetElementTy = lowering.convertType(viewMemRefType.getElementType())
-                               .dyn_cast<LLVM::LLVMType>();
-    auto targetDescTy =
-        lowering.convertType(viewMemRefType).dyn_cast_or_null<LLVM::LLVMType>();
-    if (!sourceElementTy || !targetDescTy)
-      return matchFailure();
-
-    // Currently, only rank > 0 and full or no operands are supported. Fail to
-    // convert otherwise.
-    unsigned rank = sourceMemRefType.getRank();
-    if (viewMemRefType.getRank() == 0 || (rank != dynamicOffsets.size()) ||
-        (!dynamicSizes.empty() && rank != dynamicSizes.size()) ||
-        (!dynamicStrides.empty() && rank != dynamicStrides.size()))
-      return matchFailure();
-
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides = getStridesAndOffset(viewMemRefType, strides, offset);
-    if (failed(successStrides))
-      return matchFailure();
-
-    // Create the descriptor.
-    MemRefDescriptor sourceMemRef(operands.front());
-    auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy);
-
-    // Copy the buffer pointer from the old descriptor to the new one.
-    Value *extracted = sourceMemRef.allocatedPtr(rewriter, loc);
-    Value *bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(), extracted);
-    targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
-
-    extracted = sourceMemRef.alignedPtr(rewriter, loc);
-    bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(), extracted);
-    targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
-
-    // Extract strides needed to compute offset.
-    SmallVector<Value *, 4> strideValues;
-    strideValues.reserve(viewMemRefType.getRank());
-    for (int i = 0, e = viewMemRefType.getRank(); i < e; ++i)
-      strideValues.push_back(sourceMemRef.stride(rewriter, loc, i));
-
-    // Fill in missing dynamic sizes.
-    auto llvmIndexType = lowering.convertType(rewriter.getIndexType());
-    if (dynamicSizes.empty()) {
-      dynamicSizes.reserve(viewMemRefType.getRank());
-      auto shape = viewMemRefType.getShape();
-      for (auto extent : shape) {
-        dynamicSizes.push_back(rewriter.create<LLVM::ConstantOp>(
-            loc, llvmIndexType, rewriter.getI64IntegerAttr(extent)));
-      }
-    }
-
-    // Offset.
-    Value *baseOffset = sourceMemRef.offset(rewriter, loc);
-    for (int i = 0, e = viewMemRefType.getRank(); i < e; ++i) {
-      Value *min = dynamicOffsets[i];
-      baseOffset = rewriter.create<LLVM::AddOp>(
-          loc, baseOffset,
-          rewriter.create<LLVM::MulOp>(loc, min, strideValues[i]));
-    }
-    targetMemRef.setOffset(rewriter, loc, baseOffset);
-
-    // Update sizes and strides.
-    for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) {
-      targetMemRef.setSize(rewriter, loc, i, dynamicSizes[i]);
-      Value *newStride;
-      if (dynamicStrides.empty())
-        newStride = rewriter.create<LLVM::ConstantOp>(
-            loc, llvmIndexType, rewriter.getI64IntegerAttr(strides[i]));
-      else
-        newStride = rewriter.create<LLVM::MulOp>(loc, dynamicStrides[i],
-                                                 strideValues[i]);
-      targetMemRef.setStride(rewriter, loc, i, newStride);
-    }
-
-    rewriter.replaceOp(op, {targetMemRef});
-    return matchSuccess();
-  }
-};
-
-/// Conversion pattern that transforms a op into:
-///   1. An `llvm.mlir.undef` operation to create a memref descriptor
-///   2. Updates to the descriptor to introduce the data ptr, offset, size
-///      and stride.
-/// The view op is replaced by the descriptor.
-struct ViewOpLowering : public LLVMLegalizationPattern<ViewOp> {
-  using LLVMLegalizationPattern<ViewOp>::LLVMLegalizationPattern;
-
-  // Build and return the value for the idx^th shape dimension, either by
-  // returning the constant shape dimension or counting the proper dynamic size.
-  Value *getSize(ConversionPatternRewriter &rewriter, Location loc,
-                 ArrayRef<int64_t> shape, ArrayRef<Value *> dynamicSizes,
-                 unsigned idx) const {
-    assert(idx < shape.size());
-    if (!ShapedType::isDynamic(shape[idx]))
-      return createIndexConstant(rewriter, loc, shape[idx]);
-    // Count the number of dynamic dims in range [0, idx]
-    unsigned nDynamic = llvm::count_if(shape.take_front(idx), [](int64_t v) {
-      return ShapedType::isDynamic(v);
-    });
-    return dynamicSizes[nDynamic];
-  }
-
-  // Build and return the idx^th stride, either by returning the constant stride
-  // or by computing the dynamic stride from the current `runningStride` and
-  // `nextSize`. The caller should keep a running stride and update it with the
-  // result returned by this function.
-  Value *getStride(ConversionPatternRewriter &rewriter, Location loc,
-                   ArrayRef<int64_t> strides, Value *nextSize,
-                   Value *runningStride, unsigned idx) const {
-    assert(idx < strides.size());
-    if (strides[idx] != MemRefType::getDynamicStrideOrOffset())
-      return createIndexConstant(rewriter, loc, strides[idx]);
-    if (nextSize)
-      return runningStride
-                 ? rewriter.create<LLVM::MulOp>(loc, runningStride, nextSize)
-                 : nextSize;
-    assert(!runningStride);
-    return createIndexConstant(rewriter, loc, 1);
-  }
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto viewOp = cast<ViewOp>(op);
-    ViewOpOperandAdaptor adaptor(operands);
-
-    auto viewMemRefType = viewOp.getType();
-    auto targetElementTy = lowering.convertType(viewMemRefType.getElementType())
-                               .dyn_cast<LLVM::LLVMType>();
-    auto targetDescTy =
-        lowering.convertType(viewMemRefType).dyn_cast<LLVM::LLVMType>();
-    if (!targetDescTy)
-      return op->emitWarning("Target descriptor type not converted to LLVM"),
-             matchFailure();
-
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides = getStridesAndOffset(viewMemRefType, strides, offset);
-    if (failed(successStrides))
-      return op->emitWarning("cannot cast to non-strided shape"),
-             matchFailure();
-
-    // Create the descriptor.
-    MemRefDescriptor sourceMemRef(adaptor.source());
-    auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy);
-
-    // Field 1: Copy the allocated pointer, used for malloc/free.
-    Value *extracted = sourceMemRef.allocatedPtr(rewriter, loc);
-    Value *bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(), extracted);
-    targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
-
-    // Field 2: Copy the actual aligned pointer to payload.
-    extracted = sourceMemRef.alignedPtr(rewriter, loc);
-    bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(), extracted);
-    targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
-
-    // Field 3: Copy the offset in aligned pointer.
-    unsigned numDynamicSizes = llvm::size(viewOp.getDynamicSizes());
-    (void)numDynamicSizes;
-    bool hasDynamicOffset = offset == MemRefType::getDynamicStrideOrOffset();
-    auto sizeAndOffsetOperands = adaptor.operands();
-    assert(llvm::size(sizeAndOffsetOperands) ==
-           numDynamicSizes + (hasDynamicOffset ? 1 : 0));
-    Value *baseOffset = !hasDynamicOffset
-                            ? createIndexConstant(rewriter, loc, offset)
-                            // TODO(ntv): better adaptor.
-                            : sizeAndOffsetOperands.front();
-    targetMemRef.setOffset(rewriter, loc, baseOffset);
-
-    // Early exit for 0-D corner case.
-    if (viewMemRefType.getRank() == 0)
-      return rewriter.replaceOp(op, {targetMemRef}), matchSuccess();
-
-    // Fields 4 and 5: Update sizes and strides.
-    if (strides.back() != 1)
-      return op->emitWarning("cannot cast to non-contiguous shape"),
-             matchFailure();
-    Value *stride = nullptr, *nextSize = nullptr;
-    // Drop the dynamic stride from the operand list, if present.
-    ArrayRef<Value *> sizeOperands(sizeAndOffsetOperands);
-    if (hasDynamicOffset)
-      sizeOperands = sizeOperands.drop_front();
-    for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) {
-      // Update size.
-      Value *size =
-          getSize(rewriter, loc, viewMemRefType.getShape(), sizeOperands, i);
-      targetMemRef.setSize(rewriter, loc, i, size);
-      // Update stride.
-      stride = getStride(rewriter, loc, strides, nextSize, stride, i);
-      targetMemRef.setStride(rewriter, loc, i, stride);
-      nextSize = size;
-    }
-
-    rewriter.replaceOp(op, {targetMemRef});
-    return matchSuccess();
-  }
-};
-
-} // namespace
-
-static void ensureDistinctSuccessors(Block &bb) {
-  auto *terminator = bb.getTerminator();
-
-  // Find repeated successors with arguments.
-  llvm::SmallDenseMap<Block *, llvm::SmallVector<int, 4>> successorPositions;
-  for (int i = 0, e = terminator->getNumSuccessors(); i < e; ++i) {
-    Block *successor = terminator->getSuccessor(i);
-    // Blocks with no arguments are safe even if they appear multiple times
-    // because they don't need PHI nodes.
-    if (successor->getNumArguments() == 0)
-      continue;
-    successorPositions[successor].push_back(i);
-  }
-
-  // If a successor appears for the second or more time in the terminator,
-  // create a new dummy block that unconditionally branches to the original
-  // destination, and retarget the terminator to branch to this new block.
-  // There is no need to pass arguments to the dummy block because it will be
-  // dominated by the original block and can therefore use any values defined in
-  // the original block.
-  for (const auto &successor : successorPositions) {
-    const auto &positions = successor.second;
-    // Start from the second occurrence of a block in the successor list.
-    for (auto position = std::next(positions.begin()), end = positions.end();
-         position != end; ++position) {
-      auto *dummyBlock = new Block();
-      bb.getParent()->push_back(dummyBlock);
-      auto builder = OpBuilder(dummyBlock);
-      SmallVector<Value *, 8> operands(
-          terminator->getSuccessorOperands(*position));
-      builder.create<BranchOp>(terminator->getLoc(), successor.first, operands);
-      terminator->setSuccessor(dummyBlock, *position);
-      for (int i = 0, e = terminator->getNumSuccessorOperands(*position); i < e;
-           ++i)
-        terminator->eraseSuccessorOperand(*position, i);
-    }
-  }
-}
-
-void mlir::LLVM::ensureDistinctSuccessors(ModuleOp m) {
-  for (auto f : m.getOps<FuncOp>()) {
-    for (auto &bb : f.getBlocks()) {
-      ::ensureDistinctSuccessors(bb);
-    }
-  }
-}
-
-/// Collect a set of patterns to convert from the Standard dialect to LLVM.
-void mlir::populateStdToLLVMConversionPatterns(
-    LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
-  // FIXME: this should be tablegen'ed
-  // clang-format off
-  patterns.insert<
-      AddFOpLowering,
-      AddIOpLowering,
-      AndOpLowering,
-      BranchOpLowering,
-      CallIndirectOpLowering,
-      CallOpLowering,
-      CmpFOpLowering,
-      CmpIOpLowering,
-      CondBranchOpLowering,
-      ConstLLVMOpLowering,
-      DimOpLowering,
-      DivFOpLowering,
-      DivISOpLowering,
-      DivIUOpLowering,
-      ExpOpLowering,
-      LogOpLowering,
-      Log10OpLowering,
-      Log2OpLowering,
-      FPExtLowering,
-      FPTruncLowering,
-      FuncOpConversion,
-      IndexCastOpLowering,
-      LoadOpLowering,
-      MemRefCastOpLowering,
-      MulFOpLowering,
-      MulIOpLowering,
-      OrOpLowering,
-      RemFOpLowering,
-      RemISOpLowering,
-      RemIUOpLowering,
-      ReturnOpLowering,
-      SIToFPLowering,
-      SelectOpLowering,
-      SignExtendIOpLowering,
-      SplatOpLowering,
-      SplatNdOpLowering,
-      StoreOpLowering,
-      SubFOpLowering,
-      SubIOpLowering,
-      SubViewOpLowering,
-      TanhOpLowering,
-      TruncateIOpLowering,
-      ViewOpLowering,
-      XOrOpLowering,
-      ZeroExtendIOpLowering>(*converter.getDialect(), converter);
-   patterns.insert<
-       AllocOpLowering,
-       DeallocOpLowering>(
-           *converter.getDialect(), converter, clUseAlloca.getValue());
-  // clang-format on
-}
-
-// Convert types using the stored LLVM IR module.
-Type LLVMTypeConverter::convertType(Type t) { return convertStandardType(t); }
-
-// Create an LLVM IR structure type if there is more than one result.
-Type LLVMTypeConverter::packFunctionResults(ArrayRef<Type> types) {
-  assert(!types.empty() && "expected non-empty list of type");
-
-  if (types.size() == 1)
-    return convertType(types.front());
-
-  SmallVector<LLVM::LLVMType, 8> resultTypes;
-  resultTypes.reserve(types.size());
-  for (auto t : types) {
-    auto converted = convertType(t).dyn_cast<LLVM::LLVMType>();
-    if (!converted)
-      return {};
-    resultTypes.push_back(converted);
-  }
-
-  return LLVM::LLVMType::getStructTy(llvmDialect, resultTypes);
-}
-
-Value *LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc,
-                                                     Value *operand,
-                                                     OpBuilder &builder) {
-  auto *context = builder.getContext();
-  auto int64Ty = LLVM::LLVMType::getInt64Ty(getDialect());
-  auto indexType = IndexType::get(context);
-  // Alloca with proper alignment. We do not expect optimizations of this
-  // alloca op and so we omit allocating at the entry block.
-  auto ptrType = operand->getType().cast<LLVM::LLVMType>().getPointerTo();
-  Value *one = builder.create<LLVM::ConstantOp>(loc, int64Ty,
-                                                IntegerAttr::get(indexType, 1));
-  Value *allocated =
-      builder.create<LLVM::AllocaOp>(loc, ptrType, one, /*alignment=*/0);
-  // Store into the alloca'ed descriptor.
-  builder.create<LLVM::StoreOp>(loc, operand, allocated);
-  return allocated;
-}
-
-SmallVector<Value *, 4>
-LLVMTypeConverter::promoteMemRefDescriptors(Location loc, ValueRange opOperands,
-                                            ValueRange operands,
-                                            OpBuilder &builder) {
-  SmallVector<Value *, 4> promotedOperands;
-  promotedOperands.reserve(operands.size());
-  for (auto it : llvm::zip(opOperands, operands)) {
-    auto *operand = std::get<0>(it);
-    auto *llvmOperand = std::get<1>(it);
-    if (!operand->getType().isa<MemRefType>() &&
-        !operand->getType().isa<UnrankedMemRefType>()) {
-      promotedOperands.push_back(operand);
-      continue;
-    }
-    promotedOperands.push_back(
-        promoteOneMemRefDescriptor(loc, llvmOperand, builder));
-  }
-  return promotedOperands;
-}
-
-/// Create an instance of LLVMTypeConverter in the given context.
-static std::unique_ptr<LLVMTypeConverter>
-makeStandardToLLVMTypeConverter(MLIRContext *context) {
-  return std::make_unique<LLVMTypeConverter>(context);
-}
-
-namespace {
-/// A pass converting MLIR operations into the LLVM IR dialect.
-struct LLVMLoweringPass : public ModulePass<LLVMLoweringPass> {
-  // By default, the patterns are those converting Standard operations to the
-  // LLVMIR dialect.
-  explicit LLVMLoweringPass(
-      bool useAlloca = false,
-      LLVMPatternListFiller patternListFiller =
-          populateStdToLLVMConversionPatterns,
-      LLVMTypeConverterMaker converterBuilder = makeStandardToLLVMTypeConverter)
-      : patternListFiller(patternListFiller),
-        typeConverterMaker(converterBuilder) {}
-
-  // Run the dialect converter on the module.
-  void runOnModule() override {
-    if (!typeConverterMaker || !patternListFiller)
-      return signalPassFailure();
-
-    ModuleOp m = getModule();
-    LLVM::ensureDistinctSuccessors(m);
-    std::unique_ptr<LLVMTypeConverter> typeConverter =
-        typeConverterMaker(&getContext());
-    if (!typeConverter)
-      return signalPassFailure();
-
-    OwningRewritePatternList patterns;
-    populateLoopToStdConversionPatterns(patterns, m.getContext());
-    patternListFiller(*typeConverter, patterns);
-
-    ConversionTarget target(getContext());
-    target.addLegalDialect<LLVM::LLVMDialect>();
-    if (failed(applyPartialConversion(m, target, patterns, &*typeConverter)))
-      signalPassFailure();
-  }
-
-  // Callback for creating a list of patterns.  It is called every time in
-  // runOnModule since applyPartialConversion consumes the list.
-  LLVMPatternListFiller patternListFiller;
-
-  // Callback for creating an instance of type converter.  The converter
-  // constructor needs an MLIRContext, which is not available until runOnModule.
-  LLVMTypeConverterMaker typeConverterMaker;
-};
-} // end namespace
-
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::createLowerToLLVMPass(bool useAlloca) {
-  return std::make_unique<LLVMLoweringPass>(useAlloca);
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::createLowerToLLVMPass(LLVMPatternListFiller patternListFiller,
-                            LLVMTypeConverterMaker typeConverterMaker,
-                            bool useAlloca) {
-  return std::make_unique<LLVMLoweringPass>(useAlloca, patternListFiller,
-                                            typeConverterMaker);
-}
-
-static PassRegistration<LLVMLoweringPass>
-    pass("convert-std-to-llvm",
-         "Convert scalar and vector operations from the "
-         "Standard to the LLVM dialect",
-         [] {
-           return std::make_unique<LLVMLoweringPass>(
-               clUseAlloca.getValue(), populateStdToLLVMConversionPatterns,
-               makeStandardToLLVMTypeConverter);
-         });
diff --git a/third_party/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt b/third_party/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
deleted file mode 100644
index fcced23a95e..00000000000
--- a/third_party/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS StandardToSPIRV.td)
-mlir_tablegen(StandardToSPIRV.cpp.inc -gen-rewriters)
-add_public_tablegen_target(MLIRStandardToSPIRVIncGen)
-
-add_llvm_library(MLIRStandardToSPIRVTransforms
-  ConvertStandardToSPIRV.cpp
-  ConvertStandardToSPIRVPass.cpp
-  LegalizeStandardForSPIRV.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SPIRV
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR
-  )
-
-add_dependencies(MLIRStandardToSPIRVTransforms
-  MLIRStandardToSPIRVIncGen)
-
-target_link_libraries(MLIRStandardToSPIRVTransforms
-  MLIRIR
-  MLIRPass
-  MLIRSPIRV
-  MLIRSupport
-  MLIRTransformUtils
-  MLIRSPIRV
-  MLIRStandardOps
-  )
diff --git a/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
deleted file mode 100644
index e87bd4ef861..00000000000
--- a/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-//===- ConvertStandardToSPIRV.cpp - Standard to SPIR-V dialect conversion--===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements patterns to convert Standard Ops to the SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/SPIRV/LayoutUtils.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVLowering.h"
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineMap.h"
-#include "llvm/ADT/SetVector.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// Operation conversion
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-/// Convert constant operation with IndexType return to SPIR-V constant
-/// operation. Since IndexType is not used within SPIR-V dialect, this needs
-/// special handling to make sure the result type and the type of the value
-/// attribute are consistent.
-// TODO(ravishankarm) : This should be moved into DRR.
-class ConstantIndexOpConversion final : public SPIRVOpLowering<ConstantOp> {
-public:
-  using SPIRVOpLowering<ConstantOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(ConstantOp constIndexOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Convert compare operation to SPIR-V dialect.
-class CmpIOpConversion final : public SPIRVOpLowering<CmpIOp> {
-public:
-  using SPIRVOpLowering<CmpIOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(CmpIOp cmpIOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Convert integer binary operations to SPIR-V operations. Cannot use
-/// tablegen for this. If the integer operation is on variables of IndexType,
-/// the type of the return value of the replacement operation differs from
-/// that of the replaced operation. This is not handled in tablegen-based
-/// pattern specification.
-// TODO(ravishankarm) : This should be moved into DRR.
-template <typename StdOp, typename SPIRVOp>
-class IntegerOpConversion final : public SPIRVOpLowering<StdOp> {
-public:
-  using SPIRVOpLowering<StdOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(StdOp operation, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto resultType =
-        this->typeConverter.convertType(operation.getResult()->getType());
-    rewriter.template replaceOpWithNewOp<SPIRVOp>(
-        operation, resultType, operands, ArrayRef<NamedAttribute>());
-    return this->matchSuccess();
-  }
-};
-
-/// Convert load -> spv.LoadOp. The operands of the replaced operation are of
-/// IndexType while that of the replacement operation are of type i32. This is
-/// not supported in tablegen based pattern specification.
-// TODO(ravishankarm) : This should be moved into DRR.
-class LoadOpConversion final : public SPIRVOpLowering<LoadOp> {
-public:
-  using SPIRVOpLowering<LoadOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(LoadOp loadOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Convert return -> spv.Return.
-// TODO(ravishankarm) : This should be moved into DRR.
-class ReturnOpConversion final : public SPIRVOpLowering<ReturnOp> {
-public:
-  using SPIRVOpLowering<ReturnOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(ReturnOp returnOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Convert select -> spv.Select
-// TODO(ravishankarm) : This should be moved into DRR.
-class SelectOpConversion final : public SPIRVOpLowering<SelectOp> {
-public:
-  using SPIRVOpLowering<SelectOp>::SPIRVOpLowering;
-  PatternMatchResult
-  matchAndRewrite(SelectOp op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Convert store -> spv.StoreOp. The operands of the replaced operation are
-/// of IndexType while that of the replacement operation are of type i32. This
-/// is not supported in tablegen based pattern specification.
-// TODO(ravishankarm) : This should be moved into DRR.
-class StoreOpConversion final : public SPIRVOpLowering<StoreOp> {
-public:
-  using SPIRVOpLowering<StoreOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(StoreOp storeOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-} // namespace
-
-//===----------------------------------------------------------------------===//
-// Utility functions for operation conversion
-//===----------------------------------------------------------------------===//
-
-/// Performs the index computation to get to the element pointed to by
-/// `indices` using the layout map of `baseType`.
-
-// TODO(ravishankarm) : This method assumes that the `origBaseType` is a
-// MemRefType with AffineMap that has static strides. Handle dynamic strides
-spirv::AccessChainOp getElementPtr(OpBuilder &builder,
-                                   SPIRVTypeConverter &typeConverter,
-                                   Location loc, MemRefType origBaseType,
-                                   Value *basePtr, ArrayRef<Value *> indices) {
-  // Get base and offset of the MemRefType and verify they are static.
-  int64_t offset;
-  SmallVector<int64_t, 4> strides;
-  if (failed(getStridesAndOffset(origBaseType, strides, offset)) ||
-      llvm::is_contained(strides, MemRefType::getDynamicStrideOrOffset())) {
-    return nullptr;
-  }
-
-  auto indexType = typeConverter.getIndexType(builder.getContext());
-
-  Value *ptrLoc = nullptr;
-  assert(indices.size() == strides.size());
-  for (auto index : enumerate(indices)) {
-    Value *strideVal = builder.create<spirv::ConstantOp>(
-        loc, indexType, IntegerAttr::get(indexType, strides[index.index()]));
-    Value *update =
-        builder.create<spirv::IMulOp>(loc, strideVal, index.value());
-    ptrLoc =
-        (ptrLoc ? builder.create<spirv::IAddOp>(loc, ptrLoc, update).getResult()
-                : update);
-  }
-  SmallVector<Value *, 2> linearizedIndices;
-  // Add a '0' at the start to index into the struct.
-  linearizedIndices.push_back(builder.create<spirv::ConstantOp>(
-      loc, indexType, IntegerAttr::get(indexType, 0)));
-  linearizedIndices.push_back(ptrLoc);
-  return builder.create<spirv::AccessChainOp>(loc, basePtr, linearizedIndices);
-}
-
-//===----------------------------------------------------------------------===//
-// ConstantOp with index type.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult ConstantIndexOpConversion::matchAndRewrite(
-    ConstantOp constIndexOp, ArrayRef<Value *> operands,
-    ConversionPatternRewriter &rewriter) const {
-  if (!constIndexOp.getResult()->getType().isa<IndexType>()) {
-    return matchFailure();
-  }
-  // The attribute has index type which is not directly supported in
-  // SPIR-V. Get the integer value and create a new IntegerAttr.
-  auto constAttr = constIndexOp.value().dyn_cast<IntegerAttr>();
-  if (!constAttr) {
-    return matchFailure();
-  }
-
-  // Use the bitwidth set in the value attribute to decide the result type
-  // of the SPIR-V constant operation since SPIR-V does not support index
-  // types.
-  auto constVal = constAttr.getValue();
-  auto constValType = constAttr.getType().dyn_cast<IndexType>();
-  if (!constValType) {
-    return matchFailure();
-  }
-  auto spirvConstType =
-      typeConverter.convertType(constIndexOp.getResult()->getType());
-  auto spirvConstVal =
-      rewriter.getIntegerAttr(spirvConstType, constAttr.getInt());
-  rewriter.replaceOpWithNewOp<spirv::ConstantOp>(constIndexOp, spirvConstType,
-                                                 spirvConstVal);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// CmpIOp
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-CmpIOpConversion::matchAndRewrite(CmpIOp cmpIOp, ArrayRef<Value *> operands,
-                                  ConversionPatternRewriter &rewriter) const {
-  CmpIOpOperandAdaptor cmpIOpOperands(operands);
-
-  switch (cmpIOp.getPredicate()) {
-#define DISPATCH(cmpPredicate, spirvOp)                                        \
-  case cmpPredicate:                                                           \
-    rewriter.replaceOpWithNewOp<spirvOp>(                                      \
-        cmpIOp, cmpIOp.getResult()->getType(), cmpIOpOperands.lhs(),           \
-        cmpIOpOperands.rhs());                                                 \
-    return matchSuccess();
-
-    DISPATCH(CmpIPredicate::eq, spirv::IEqualOp);
-    DISPATCH(CmpIPredicate::ne, spirv::INotEqualOp);
-    DISPATCH(CmpIPredicate::slt, spirv::SLessThanOp);
-    DISPATCH(CmpIPredicate::sle, spirv::SLessThanEqualOp);
-    DISPATCH(CmpIPredicate::sgt, spirv::SGreaterThanOp);
-    DISPATCH(CmpIPredicate::sge, spirv::SGreaterThanEqualOp);
-
-#undef DISPATCH
-
-  default:
-    break;
-  }
-  return matchFailure();
-}
-
-//===----------------------------------------------------------------------===//
-// LoadOp
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-LoadOpConversion::matchAndRewrite(LoadOp loadOp, ArrayRef<Value *> operands,
-                                  ConversionPatternRewriter &rewriter) const {
-  LoadOpOperandAdaptor loadOperands(operands);
-  auto loadPtr = getElementPtr(rewriter, typeConverter, loadOp.getLoc(),
-                               loadOp.memref()->getType().cast<MemRefType>(),
-                               loadOperands.memref(), loadOperands.indices());
-  rewriter.replaceOpWithNewOp<spirv::LoadOp>(loadOp, loadPtr,
-                                             /*memory_access =*/nullptr,
-                                             /*alignment =*/nullptr);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// ReturnOp
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-ReturnOpConversion::matchAndRewrite(ReturnOp returnOp,
-                                    ArrayRef<Value *> operands,
-                                    ConversionPatternRewriter &rewriter) const {
-  if (returnOp.getNumOperands()) {
-    return matchFailure();
-  }
-  rewriter.replaceOpWithNewOp<spirv::ReturnOp>(returnOp);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// SelectOp
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-SelectOpConversion::matchAndRewrite(SelectOp op, ArrayRef<Value *> operands,
-                                    ConversionPatternRewriter &rewriter) const {
-  SelectOpOperandAdaptor selectOperands(operands);
-  rewriter.replaceOpWithNewOp<spirv::SelectOp>(op, selectOperands.condition(),
-                                               selectOperands.true_value(),
-                                               selectOperands.false_value());
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// StoreOp
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-StoreOpConversion::matchAndRewrite(StoreOp storeOp, ArrayRef<Value *> operands,
-                                   ConversionPatternRewriter &rewriter) const {
-  StoreOpOperandAdaptor storeOperands(operands);
-  auto storePtr =
-      getElementPtr(rewriter, typeConverter, storeOp.getLoc(),
-                    storeOp.memref()->getType().cast<MemRefType>(),
-                    storeOperands.memref(), storeOperands.indices());
-  rewriter.replaceOpWithNewOp<spirv::StoreOp>(storeOp, storePtr,
-                                              storeOperands.value(),
-                                              /*memory_access =*/nullptr,
-                                              /*alignment =*/nullptr);
-  return matchSuccess();
-}
-
-namespace {
-/// Import the Standard Ops to SPIR-V Patterns.
-#include "StandardToSPIRV.cpp.inc"
-} // namespace
-
-namespace mlir {
-void populateStandardToSPIRVPatterns(MLIRContext *context,
-                                     SPIRVTypeConverter &typeConverter,
-                                     OwningRewritePatternList &patterns) {
-  // Add patterns that lower operations into SPIR-V dialect.
-  populateWithGenerated(context, &patterns);
-  patterns.insert<ConstantIndexOpConversion, CmpIOpConversion,
-                  IntegerOpConversion<AddIOp, spirv::IAddOp>,
-                  IntegerOpConversion<MulIOp, spirv::IMulOp>,
-                  IntegerOpConversion<DivISOp, spirv::SDivOp>,
-                  IntegerOpConversion<RemISOp, spirv::SModOp>,
-                  IntegerOpConversion<SubIOp, spirv::ISubOp>, LoadOpConversion,
-                  ReturnOpConversion, SelectOpConversion, StoreOpConversion>(
-      context, typeConverter);
-}
-} // namespace mlir
diff --git a/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.cpp b/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.cpp
deleted file mode 100644
index c0c56a3b0b2..00000000000
--- a/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- ConvertStandardToSPIRVPass.cpp - Convert Std Ops to SPIR-V Ops -----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to convert MLIR standard ops into the SPIR-V
-// ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h"
-#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVLowering.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-
-/// A simple pattern for rewriting function signature to convert arguments of
-/// functions to be of valid SPIR-V types.
-class FuncOpConversion final : public SPIRVOpLowering<FuncOp> {
-public:
-  using SPIRVOpLowering<FuncOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// A pass converting MLIR Standard operations into the SPIR-V dialect.
-class ConvertStandardToSPIRVPass
-    : public ModulePass<ConvertStandardToSPIRVPass> {
-  void runOnModule() override;
-};
-} // namespace
-
-PatternMatchResult
-FuncOpConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
-                                  ConversionPatternRewriter &rewriter) const {
-  auto fnType = funcOp.getType();
-  if (fnType.getNumResults()) {
-    return matchFailure();
-  }
-
-  TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
-  {
-    for (auto argType : enumerate(funcOp.getType().getInputs())) {
-      auto convertedType = typeConverter.convertType(argType.value());
-      signatureConverter.addInputs(argType.index(), convertedType);
-    }
-  }
-  auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
-  rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
-                              newFuncOp.end());
-  newFuncOp.setType(rewriter.getFunctionType(
-      signatureConverter.getConvertedTypes(), llvm::None));
-  rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
-  rewriter.replaceOp(funcOp.getOperation(), llvm::None);
-  return matchSuccess();
-}
-
-void ConvertStandardToSPIRVPass::runOnModule() {
-  OwningRewritePatternList patterns;
-  auto context = &getContext();
-  auto module = getModule();
-
-  SPIRVTypeConverter typeConverter;
-  populateStandardToSPIRVPatterns(context, typeConverter, patterns);
-  patterns.insert<FuncOpConversion>(context, typeConverter);
-  ConversionTarget target(*(module.getContext()));
-  target.addLegalDialect<spirv::SPIRVDialect>();
-  target.addDynamicallyLegalOp<FuncOp>(
-      [&](FuncOp op) { return typeConverter.isSignatureLegal(op.getType()); });
-
-  if (failed(applyPartialConversion(module, target, patterns))) {
-    return signalPassFailure();
-  }
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>> mlir::createConvertStandardToSPIRVPass() {
-  return std::make_unique<ConvertStandardToSPIRVPass>();
-}
-
-static PassRegistration<ConvertStandardToSPIRVPass>
-    pass("convert-std-to-spirv", "Convert Standard Ops to SPIR-V dialect");
diff --git a/third_party/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/third_party/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
deleted file mode 100644
index 4469c2802a8..00000000000
--- a/third_party/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-//===- LegalizeStandardForSPIRV.cpp - Legalize ops for SPIR-V lowering ----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This transformation pass legalizes operations before the conversion to SPIR-V
-// dialect to handle ops that cannot be lowered directly.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.h"
-#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// Merges subview operation with load operation.
-class LoadOpOfSubViewFolder final : public OpRewritePattern<LoadOp> {
-public:
-  using OpRewritePattern<LoadOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(LoadOp loadOp,
-                                     PatternRewriter &rewriter) const override;
-};
-
-/// Merges subview operation with store operation.
-class StoreOpOfSubViewFolder final : public OpRewritePattern<StoreOp> {
-public:
-  using OpRewritePattern<StoreOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(StoreOp storeOp,
-                                     PatternRewriter &rewriter) const override;
-};
-} // namespace
-
-//===----------------------------------------------------------------------===//
-// Utility functions for op legalization.
-//===----------------------------------------------------------------------===//
-
-/// Given the 'indices' of an load/store operation where the memref is a result
-/// of a subview op, returns the indices w.r.t to the source memref of the
-/// subview op. For example
-///
-/// %0 = ... : memref<12x42xf32>
-/// %1 = subview %0[%arg0, %arg1][][%stride1, %stride2] : memref<12x42xf32> to
-///          memref<4x4xf32, offset=?, strides=[?, ?]>
-/// %2 = load %1[%i1, %i2] : memref<4x4xf32, offset=?, strides=[?, ?]>
-///
-/// could be folded into
-///
-/// %2 = load %0[%arg0 + %i1 * %stride1][%arg1 + %i2 * %stride2] :
-///          memref<12x42xf32>
-static LogicalResult
-resolveSourceIndices(Location loc, PatternRewriter &rewriter,
-                     SubViewOp subViewOp, ValueRange indices,
-                     SmallVectorImpl<Value *> &sourceIndices) {
-  // TODO: Aborting when the offsets are static. There might be a way to fold
-  // the subview op with load even if the offsets have been canonicalized
-  // away.
-  if (subViewOp.getNumOffsets() == 0)
-    return failure();
-
-  ValueRange opOffsets = subViewOp.offsets();
-  SmallVector<Value *, 2> opStrides;
-  if (subViewOp.getNumStrides()) {
-    // If the strides are dynamic, get the stride operands.
-    opStrides = llvm::to_vector<2>(subViewOp.strides());
-  } else {
-    // When static, the stride operands can be retrieved by taking the strides
-    // of the result of the subview op, and dividing the strides of the base
-    // memref.
-    SmallVector<int64_t, 2> staticStrides;
-    if (failed(subViewOp.getStaticStrides(staticStrides))) {
-      return failure();
-    }
-    opStrides.reserve(opOffsets.size());
-    for (auto stride : staticStrides) {
-      auto constValAttr = rewriter.getIntegerAttr(
-          IndexType::get(rewriter.getContext()), stride);
-      opStrides.emplace_back(rewriter.create<ConstantOp>(loc, constValAttr));
-    }
-  }
-  assert(opOffsets.size() == opStrides.size());
-
-  // New indices for the load are the current indices * subview_stride +
-  // subview_offset.
-  assert(indices.size() == opStrides.size());
-  sourceIndices.resize(indices.size());
-  for (auto index : llvm::enumerate(indices)) {
-    auto offset = opOffsets[index.index()];
-    auto stride = opStrides[index.index()];
-    auto mul = rewriter.create<MulIOp>(loc, index.value(), stride);
-    sourceIndices[index.index()] =
-        rewriter.create<AddIOp>(loc, offset, mul).getResult();
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Folding SubViewOp and LoadOp.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-LoadOpOfSubViewFolder::matchAndRewrite(LoadOp loadOp,
-                                       PatternRewriter &rewriter) const {
-  auto subViewOp =
-      dyn_cast_or_null<SubViewOp>(loadOp.memref()->getDefiningOp());
-  if (!subViewOp) {
-    return matchFailure();
-  }
-  SmallVector<Value *, 4> sourceIndices;
-  if (failed(resolveSourceIndices(loadOp.getLoc(), rewriter, subViewOp,
-                                  loadOp.indices(), sourceIndices)))
-    return matchFailure();
-
-  rewriter.replaceOpWithNewOp<LoadOp>(loadOp, subViewOp.source(),
-                                      sourceIndices);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// Folding SubViewOp and StoreOp.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult
-StoreOpOfSubViewFolder::matchAndRewrite(StoreOp storeOp,
-                                        PatternRewriter &rewriter) const {
-  auto subViewOp =
-      dyn_cast_or_null<SubViewOp>(storeOp.memref()->getDefiningOp());
-  if (!subViewOp) {
-    return matchFailure();
-  }
-  SmallVector<Value *, 4> sourceIndices;
-  if (failed(resolveSourceIndices(storeOp.getLoc(), rewriter, subViewOp,
-                                  storeOp.indices(), sourceIndices)))
-    return matchFailure();
-
-  rewriter.replaceOpWithNewOp<StoreOp>(storeOp, storeOp.value(),
-                                       subViewOp.source(), sourceIndices);
-  return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
-// Hook for adding patterns.
-//===----------------------------------------------------------------------===//
-
-void mlir::populateStdLegalizationPatternsForSPIRVLowering(
-    MLIRContext *context, OwningRewritePatternList &patterns) {
-  patterns.insert<LoadOpOfSubViewFolder, StoreOpOfSubViewFolder>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// Pass for testing just the legalization patterns.
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct SPIRVLegalization final : public OperationPass<SPIRVLegalization> {
-  void runOnOperation() override;
-};
-} // namespace
-
-void SPIRVLegalization::runOnOperation() {
-  OwningRewritePatternList patterns;
-  auto *context = &getContext();
-  populateStdLegalizationPatternsForSPIRVLowering(context, patterns);
-  applyPatternsGreedily(getOperation()->getRegions(), patterns);
-}
-
-std::unique_ptr<Pass> mlir::createLegalizeStdOpsForSPIRVLoweringPass() {
-  return std::make_unique<SPIRVLegalization>();
-}
-
-static PassRegistration<SPIRVLegalization>
-    pass("legalize-std-for-spirv", "Legalize standard ops for SPIR-V lowering");
diff --git a/third_party/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.td b/third_party/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.td
deleted file mode 100644
index 6f3a6a82476..00000000000
--- a/third_party/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.td
+++ /dev/null
@@ -1,35 +0,0 @@
-//==- StandardToSPIRV.td - Standard Ops to SPIR-V Patterns ---*- tablegen -*==//
-
-// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Defines Patterns to lower standard ops to SPIR-V.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_STANDARDTOSPIRV_TD
-#define MLIR_CONVERSION_STANDARDTOSPIRV_TD
-
-include "mlir/Dialect/StandardOps/Ops.td"
-include "mlir/Dialect/SPIRV/SPIRVOps.td"
-
-class BinaryOpPattern<Op src, Op tgt> :
-      Pat<(src SPV_ScalarOrVector:$l, SPV_ScalarOrVector:$r),
-          (tgt $l, $r)>;
-
-def : BinaryOpPattern<AddFOp, SPV_FAddOp>;
-def : BinaryOpPattern<DivFOp, SPV_FDivOp>;
-def : BinaryOpPattern<MulFOp, SPV_FMulOp>;
-def : BinaryOpPattern<RemFOp, SPV_FRemOp>;
-def : BinaryOpPattern<SubFOp, SPV_FSubOp>;
-
-// Constant Op
-// TODO(ravishankarm): Handle lowering other constant types.
-def : Pat<(ConstantOp:$result $valueAttr),
-          (SPV_ConstantOp $valueAttr),
-          [(SPV_ScalarOrVector $result)]>;
-
-#endif // MLIR_CONVERSION_STANDARDTOSPIRV_TD
diff --git a/third_party/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/third_party/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
deleted file mode 100644
index 2aaec68f6c4..00000000000
--- a/third_party/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-add_llvm_library(MLIRVectorToLLVM
-  ConvertVectorToLLVM.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToLLVM
-)
-set(LIBS
-  MLIRLLVMIR
-  MLIRTransforms
-  LLVMCore
-  LLVMSupport
-  )
-
-add_dependencies(MLIRVectorToLLVM ${LIBS})
-target_link_libraries(MLIRVectorToLLVM ${LIBS})
diff --git a/third_party/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/third_party/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
deleted file mode 100644
index 71bed9516ca..00000000000
--- a/third_party/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ /dev/null
@@ -1,654 +0,0 @@
-//===- VectorToLLVM.cpp - Conversion from Vector to the LLVM dialect ------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace mlir;
-
-template <typename T>
-static LLVM::LLVMType getPtrToElementType(T containerType,
-                                          LLVMTypeConverter &lowering) {
-  return lowering.convertType(containerType.getElementType())
-      .template cast<LLVM::LLVMType>()
-      .getPointerTo();
-}
-
-// Helper to reduce vector type by one rank at front.
-static VectorType reducedVectorTypeFront(VectorType tp) {
-  assert((tp.getRank() > 1) && "unlowerable vector type");
-  return VectorType::get(tp.getShape().drop_front(), tp.getElementType());
-}
-
-// Helper to reduce vector type by *all* but one rank at back.
-static VectorType reducedVectorTypeBack(VectorType tp) {
-  assert((tp.getRank() > 1) && "unlowerable vector type");
-  return VectorType::get(tp.getShape().take_back(), tp.getElementType());
-}
-
-// Helper that picks the proper sequence for inserting.
-static Value *insertOne(ConversionPatternRewriter &rewriter,
-                        LLVMTypeConverter &lowering, Location loc, Value *val1,
-                        Value *val2, Type llvmType, int64_t rank, int64_t pos) {
-  if (rank == 1) {
-    auto idxType = rewriter.getIndexType();
-    auto constant = rewriter.create<LLVM::ConstantOp>(
-        loc, lowering.convertType(idxType),
-        rewriter.getIntegerAttr(idxType, pos));
-    return rewriter.create<LLVM::InsertElementOp>(loc, llvmType, val1, val2,
-                                                  constant);
-  }
-  return rewriter.create<LLVM::InsertValueOp>(loc, llvmType, val1, val2,
-                                              rewriter.getI64ArrayAttr(pos));
-}
-
-// Helper that picks the proper sequence for extracting.
-static Value *extractOne(ConversionPatternRewriter &rewriter,
-                         LLVMTypeConverter &lowering, Location loc, Value *val,
-                         Type llvmType, int64_t rank, int64_t pos) {
-  if (rank == 1) {
-    auto idxType = rewriter.getIndexType();
-    auto constant = rewriter.create<LLVM::ConstantOp>(
-        loc, lowering.convertType(idxType),
-        rewriter.getIntegerAttr(idxType, pos));
-    return rewriter.create<LLVM::ExtractElementOp>(loc, llvmType, val,
-                                                   constant);
-  }
-  return rewriter.create<LLVM::ExtractValueOp>(loc, llvmType, val,
-                                               rewriter.getI64ArrayAttr(pos));
-}
-
-class VectorBroadcastOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorBroadcastOpConversion(MLIRContext *context,
-                                       LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::BroadcastOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto broadcastOp = cast<vector::BroadcastOp>(op);
-    VectorType dstVectorType = broadcastOp.getVectorType();
-    if (lowering.convertType(dstVectorType) == nullptr)
-      return matchFailure();
-    // Rewrite when the full vector type can be lowered (which
-    // implies all 'reduced' types can be lowered too).
-    auto adaptor = vector::BroadcastOpOperandAdaptor(operands);
-    VectorType srcVectorType =
-        broadcastOp.getSourceType().dyn_cast<VectorType>();
-    rewriter.replaceOp(
-        op, expandRanks(adaptor.source(), // source value to be expanded
-                        op->getLoc(),     // location of original broadcast
-                        srcVectorType, dstVectorType, rewriter));
-    return matchSuccess();
-  }
-
-private:
-  // Expands the given source value over all the ranks, as defined
-  // by the source and destination type (a null source type denotes
-  // expansion from a scalar value into a vector).
-  //
-  // TODO(ajcbik): consider replacing this one-pattern lowering
-  //               with a two-pattern lowering using other vector
-  //               ops once all insert/extract/shuffle operations
-  //               are available with lowering implemention.
-  //
-  Value *expandRanks(Value *value, Location loc, VectorType srcVectorType,
-                     VectorType dstVectorType,
-                     ConversionPatternRewriter &rewriter) const {
-    assert((dstVectorType != nullptr) && "invalid result type in broadcast");
-    // Determine rank of source and destination.
-    int64_t srcRank = srcVectorType ? srcVectorType.getRank() : 0;
-    int64_t dstRank = dstVectorType.getRank();
-    int64_t curDim = dstVectorType.getDimSize(0);
-    if (srcRank < dstRank)
-      // Duplicate this rank.
-      return duplicateOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
-                              curDim, rewriter);
-    // If all trailing dimensions are the same, the broadcast consists of
-    // simply passing through the source value and we are done. Otherwise,
-    // any non-matching dimension forces a stretch along this rank.
-    assert((srcVectorType != nullptr) && (srcRank > 0) &&
-           (srcRank == dstRank) && "invalid rank in broadcast");
-    for (int64_t r = 0; r < dstRank; r++) {
-      if (srcVectorType.getDimSize(r) != dstVectorType.getDimSize(r)) {
-        return stretchOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
-                              curDim, rewriter);
-      }
-    }
-    return value;
-  }
-
-  // Picks the best way to duplicate a single rank. For the 1-D case, a
-  // single insert-elt/shuffle is the most efficient expansion. For higher
-  // dimensions, however, we need dim x insert-values on a new broadcast
-  // with one less leading dimension, which will be lowered "recursively"
-  // to matching LLVM IR.
-  // For example:
-  //   v = broadcast s : f32 to vector<4x2xf32>
-  // becomes:
-  //   x = broadcast s : f32 to vector<2xf32>
-  //   v = [x,x,x,x]
-  // becomes:
-  //   x = [s,s]
-  //   v = [x,x,x,x]
-  Value *duplicateOneRank(Value *value, Location loc, VectorType srcVectorType,
-                          VectorType dstVectorType, int64_t rank, int64_t dim,
-                          ConversionPatternRewriter &rewriter) const {
-    Type llvmType = lowering.convertType(dstVectorType);
-    assert((llvmType != nullptr) && "unlowerable vector type");
-    if (rank == 1) {
-      Value *undef = rewriter.create<LLVM::UndefOp>(loc, llvmType);
-      Value *expand =
-          insertOne(rewriter, lowering, loc, undef, value, llvmType, rank, 0);
-      SmallVector<int32_t, 4> zeroValues(dim, 0);
-      return rewriter.create<LLVM::ShuffleVectorOp>(
-          loc, expand, undef, rewriter.getI32ArrayAttr(zeroValues));
-    }
-    Value *expand =
-        expandRanks(value, loc, srcVectorType,
-                    reducedVectorTypeFront(dstVectorType), rewriter);
-    Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
-    for (int64_t d = 0; d < dim; ++d) {
-      result =
-          insertOne(rewriter, lowering, loc, result, expand, llvmType, rank, d);
-    }
-    return result;
-  }
-
-  // Picks the best way to stretch a single rank. For the 1-D case, a
-  // single insert-elt/shuffle is the most efficient expansion when at
-  // a stretch. Otherwise, every dimension needs to be expanded
-  // individually and individually inserted in the resulting vector.
-  // For example:
-  //   v = broadcast w : vector<4x1x2xf32> to vector<4x2x2xf32>
-  // becomes:
-  //   a = broadcast w[0] : vector<1x2xf32> to vector<2x2xf32>
-  //   b = broadcast w[1] : vector<1x2xf32> to vector<2x2xf32>
-  //   c = broadcast w[2] : vector<1x2xf32> to vector<2x2xf32>
-  //   d = broadcast w[3] : vector<1x2xf32> to vector<2x2xf32>
-  //   v = [a,b,c,d]
-  // becomes:
-  //   x = broadcast w[0][0] : vector<2xf32> to vector <2x2xf32>
-  //   y = broadcast w[1][0] : vector<2xf32> to vector <2x2xf32>
-  //   a = [x, y]
-  //   etc.
-  Value *stretchOneRank(Value *value, Location loc, VectorType srcVectorType,
-                        VectorType dstVectorType, int64_t rank, int64_t dim,
-                        ConversionPatternRewriter &rewriter) const {
-    Type llvmType = lowering.convertType(dstVectorType);
-    assert((llvmType != nullptr) && "unlowerable vector type");
-    Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
-    bool atStretch = dim != srcVectorType.getDimSize(0);
-    if (rank == 1) {
-      assert(atStretch);
-      Type redLlvmType = lowering.convertType(dstVectorType.getElementType());
-      Value *one =
-          extractOne(rewriter, lowering, loc, value, redLlvmType, rank, 0);
-      Value *expand =
-          insertOne(rewriter, lowering, loc, result, one, llvmType, rank, 0);
-      SmallVector<int32_t, 4> zeroValues(dim, 0);
-      return rewriter.create<LLVM::ShuffleVectorOp>(
-          loc, expand, result, rewriter.getI32ArrayAttr(zeroValues));
-    }
-    VectorType redSrcType = reducedVectorTypeFront(srcVectorType);
-    VectorType redDstType = reducedVectorTypeFront(dstVectorType);
-    Type redLlvmType = lowering.convertType(redSrcType);
-    for (int64_t d = 0; d < dim; ++d) {
-      int64_t pos = atStretch ? 0 : d;
-      Value *one =
-          extractOne(rewriter, lowering, loc, value, redLlvmType, rank, pos);
-      Value *expand = expandRanks(one, loc, redSrcType, redDstType, rewriter);
-      result =
-          insertOne(rewriter, lowering, loc, result, expand, llvmType, rank, d);
-    }
-    return result;
-  }
-};
-
-class VectorShuffleOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorShuffleOpConversion(MLIRContext *context,
-                                     LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::ShuffleOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto adaptor = vector::ShuffleOpOperandAdaptor(operands);
-    auto shuffleOp = cast<vector::ShuffleOp>(op);
-    auto v1Type = shuffleOp.getV1VectorType();
-    auto v2Type = shuffleOp.getV2VectorType();
-    auto vectorType = shuffleOp.getVectorType();
-    Type llvmType = lowering.convertType(vectorType);
-    auto maskArrayAttr = shuffleOp.mask();
-
-    // Bail if result type cannot be lowered.
-    if (!llvmType)
-      return matchFailure();
-
-    // Get rank and dimension sizes.
-    int64_t rank = vectorType.getRank();
-    assert(v1Type.getRank() == rank);
-    assert(v2Type.getRank() == rank);
-    int64_t v1Dim = v1Type.getDimSize(0);
-
-    // For rank 1, where both operands have *exactly* the same vector type,
-    // there is direct shuffle support in LLVM. Use it!
-    if (rank == 1 && v1Type == v2Type) {
-      Value *shuffle = rewriter.create<LLVM::ShuffleVectorOp>(
-          loc, adaptor.v1(), adaptor.v2(), maskArrayAttr);
-      rewriter.replaceOp(op, shuffle);
-      return matchSuccess();
-    }
-
-    // For all other cases, insert the individual values individually.
-    Value *insert = rewriter.create<LLVM::UndefOp>(loc, llvmType);
-    int64_t insPos = 0;
-    for (auto en : llvm::enumerate(maskArrayAttr)) {
-      int64_t extPos = en.value().cast<IntegerAttr>().getInt();
-      Value *value = adaptor.v1();
-      if (extPos >= v1Dim) {
-        extPos -= v1Dim;
-        value = adaptor.v2();
-      }
-      Value *extract =
-          extractOne(rewriter, lowering, loc, value, llvmType, rank, extPos);
-      insert = insertOne(rewriter, lowering, loc, insert, extract, llvmType,
-                         rank, insPos++);
-    }
-    rewriter.replaceOp(op, insert);
-    return matchSuccess();
-  }
-};
-
-class VectorExtractElementOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorExtractElementOpConversion(MLIRContext *context,
-                                            LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::ExtractElementOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto adaptor = vector::ExtractElementOpOperandAdaptor(operands);
-    auto extractEltOp = cast<vector::ExtractElementOp>(op);
-    auto vectorType = extractEltOp.getVectorType();
-    auto llvmType = lowering.convertType(vectorType.getElementType());
-
-    // Bail if result type cannot be lowered.
-    if (!llvmType)
-      return matchFailure();
-
-    rewriter.replaceOpWithNewOp<LLVM::ExtractElementOp>(
-        op, llvmType, adaptor.vector(), adaptor.position());
-    return matchSuccess();
-  }
-};
-
-class VectorExtractOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorExtractOpConversion(MLIRContext *context,
-                                     LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::ExtractOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto adaptor = vector::ExtractOpOperandAdaptor(operands);
-    auto extractOp = cast<vector::ExtractOp>(op);
-    auto vectorType = extractOp.getVectorType();
-    auto resultType = extractOp.getResult()->getType();
-    auto llvmResultType = lowering.convertType(resultType);
-    auto positionArrayAttr = extractOp.position();
-
-    // Bail if result type cannot be lowered.
-    if (!llvmResultType)
-      return matchFailure();
-
-    // One-shot extraction of vector from array (only requires extractvalue).
-    if (resultType.isa<VectorType>()) {
-      Value *extracted = rewriter.create<LLVM::ExtractValueOp>(
-          loc, llvmResultType, adaptor.vector(), positionArrayAttr);
-      rewriter.replaceOp(op, extracted);
-      return matchSuccess();
-    }
-
-    // Potential extraction of 1-D vector from array.
-    auto *context = op->getContext();
-    Value *extracted = adaptor.vector();
-    auto positionAttrs = positionArrayAttr.getValue();
-    if (positionAttrs.size() > 1) {
-      auto oneDVectorType = reducedVectorTypeBack(vectorType);
-      auto nMinusOnePositionAttrs =
-          ArrayAttr::get(positionAttrs.drop_back(), context);
-      extracted = rewriter.create<LLVM::ExtractValueOp>(
-          loc, lowering.convertType(oneDVectorType), extracted,
-          nMinusOnePositionAttrs);
-    }
-
-    // Remaining extraction of element from 1-D LLVM vector
-    auto position = positionAttrs.back().cast<IntegerAttr>();
-    auto i32Type = LLVM::LLVMType::getInt32Ty(lowering.getDialect());
-    auto constant = rewriter.create<LLVM::ConstantOp>(loc, i32Type, position);
-    extracted =
-        rewriter.create<LLVM::ExtractElementOp>(loc, extracted, constant);
-    rewriter.replaceOp(op, extracted);
-
-    return matchSuccess();
-  }
-};
-
-class VectorInsertElementOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorInsertElementOpConversion(MLIRContext *context,
-                                           LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::InsertElementOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto adaptor = vector::InsertElementOpOperandAdaptor(operands);
-    auto insertEltOp = cast<vector::InsertElementOp>(op);
-    auto vectorType = insertEltOp.getDestVectorType();
-    auto llvmType = lowering.convertType(vectorType);
-
-    // Bail if result type cannot be lowered.
-    if (!llvmType)
-      return matchFailure();
-
-    rewriter.replaceOpWithNewOp<LLVM::InsertElementOp>(
-        op, llvmType, adaptor.dest(), adaptor.source(), adaptor.position());
-    return matchSuccess();
-  }
-};
-
-class VectorInsertOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorInsertOpConversion(MLIRContext *context,
-                                    LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::InsertOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto adaptor = vector::InsertOpOperandAdaptor(operands);
-    auto insertOp = cast<vector::InsertOp>(op);
-    auto sourceType = insertOp.getSourceType();
-    auto destVectorType = insertOp.getDestVectorType();
-    auto llvmResultType = lowering.convertType(destVectorType);
-    auto positionArrayAttr = insertOp.position();
-
-    // Bail if result type cannot be lowered.
-    if (!llvmResultType)
-      return matchFailure();
-
-    // One-shot insertion of a vector into an array (only requires insertvalue).
-    if (sourceType.isa<VectorType>()) {
-      Value *inserted = rewriter.create<LLVM::InsertValueOp>(
-          loc, llvmResultType, adaptor.dest(), adaptor.source(),
-          positionArrayAttr);
-      rewriter.replaceOp(op, inserted);
-      return matchSuccess();
-    }
-
-    // Potential extraction of 1-D vector from array.
-    auto *context = op->getContext();
-    Value *extracted = adaptor.dest();
-    auto positionAttrs = positionArrayAttr.getValue();
-    auto position = positionAttrs.back().cast<IntegerAttr>();
-    auto oneDVectorType = destVectorType;
-    if (positionAttrs.size() > 1) {
-      oneDVectorType = reducedVectorTypeBack(destVectorType);
-      auto nMinusOnePositionAttrs =
-          ArrayAttr::get(positionAttrs.drop_back(), context);
-      extracted = rewriter.create<LLVM::ExtractValueOp>(
-          loc, lowering.convertType(oneDVectorType), extracted,
-          nMinusOnePositionAttrs);
-    }
-
-    // Insertion of an element into a 1-D LLVM vector.
-    auto i32Type = LLVM::LLVMType::getInt32Ty(lowering.getDialect());
-    auto constant = rewriter.create<LLVM::ConstantOp>(loc, i32Type, position);
-    Value *inserted = rewriter.create<LLVM::InsertElementOp>(
-        loc, lowering.convertType(oneDVectorType), extracted, adaptor.source(),
-        constant);
-
-    // Potential insertion of resulting 1-D vector into array.
-    if (positionAttrs.size() > 1) {
-      auto nMinusOnePositionAttrs =
-          ArrayAttr::get(positionAttrs.drop_back(), context);
-      inserted = rewriter.create<LLVM::InsertValueOp>(loc, llvmResultType,
-                                                      adaptor.dest(), inserted,
-                                                      nMinusOnePositionAttrs);
-    }
-
-    rewriter.replaceOp(op, inserted);
-    return matchSuccess();
-  }
-};
-
-class VectorOuterProductOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorOuterProductOpConversion(MLIRContext *context,
-                                          LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::OuterProductOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    auto adaptor = vector::OuterProductOpOperandAdaptor(operands);
-    auto *ctx = op->getContext();
-    auto vLHS = adaptor.lhs()->getType().cast<LLVM::LLVMType>();
-    auto vRHS = adaptor.rhs()->getType().cast<LLVM::LLVMType>();
-    auto rankLHS = vLHS.getUnderlyingType()->getVectorNumElements();
-    auto rankRHS = vRHS.getUnderlyingType()->getVectorNumElements();
-    auto llvmArrayOfVectType = lowering.convertType(
-        cast<vector::OuterProductOp>(op).getResult()->getType());
-    Value *desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayOfVectType);
-    Value *a = adaptor.lhs(), *b = adaptor.rhs();
-    Value *acc = adaptor.acc().empty() ? nullptr : adaptor.acc().front();
-    SmallVector<Value *, 8> lhs, accs;
-    lhs.reserve(rankLHS);
-    accs.reserve(rankLHS);
-    for (unsigned d = 0, e = rankLHS; d < e; ++d) {
-      // shufflevector explicitly requires i32.
-      auto attr = rewriter.getI32IntegerAttr(d);
-      SmallVector<Attribute, 4> bcastAttr(rankRHS, attr);
-      auto bcastArrayAttr = ArrayAttr::get(bcastAttr, ctx);
-      Value *aD = nullptr, *accD = nullptr;
-      // 1. Broadcast the element a[d] into vector aD.
-      aD = rewriter.create<LLVM::ShuffleVectorOp>(loc, a, a, bcastArrayAttr);
-      // 2. If acc is present, extract 1-d vector acc[d] into accD.
-      if (acc)
-        accD = rewriter.create<LLVM::ExtractValueOp>(
-            loc, vRHS, acc, rewriter.getI64ArrayAttr(d));
-      // 3. Compute aD outer b (plus accD, if relevant).
-      Value *aOuterbD =
-          accD ? rewriter.create<LLVM::FMulAddOp>(loc, vRHS, aD, b, accD)
-                     .getResult()
-               : rewriter.create<LLVM::FMulOp>(loc, aD, b).getResult();
-      // 4. Insert as value `d` in the descriptor.
-      desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmArrayOfVectType,
-                                                  desc, aOuterbD,
-                                                  rewriter.getI64ArrayAttr(d));
-    }
-    rewriter.replaceOp(op, desc);
-    return matchSuccess();
-  }
-};
-
-class VectorTypeCastOpConversion : public LLVMOpLowering {
-public:
-  explicit VectorTypeCastOpConversion(MLIRContext *context,
-                                      LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::TypeCastOp::getOperationName(), context,
-                       typeConverter) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op->getLoc();
-    vector::TypeCastOp castOp = cast<vector::TypeCastOp>(op);
-    MemRefType sourceMemRefType =
-        castOp.getOperand()->getType().cast<MemRefType>();
-    MemRefType targetMemRefType =
-        castOp.getResult()->getType().cast<MemRefType>();
-
-    // Only static shape casts supported atm.
-    if (!sourceMemRefType.hasStaticShape() ||
-        !targetMemRefType.hasStaticShape())
-      return matchFailure();
-
-    auto llvmSourceDescriptorTy =
-        operands[0]->getType().dyn_cast<LLVM::LLVMType>();
-    if (!llvmSourceDescriptorTy || !llvmSourceDescriptorTy.isStructTy())
-      return matchFailure();
-    MemRefDescriptor sourceMemRef(operands[0]);
-
-    auto llvmTargetDescriptorTy = lowering.convertType(targetMemRefType)
-                                      .dyn_cast_or_null<LLVM::LLVMType>();
-    if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy())
-      return matchFailure();
-
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides =
-        getStridesAndOffset(sourceMemRefType, strides, offset);
-    bool isContiguous = (strides.back() == 1);
-    if (isContiguous) {
-      auto sizes = sourceMemRefType.getShape();
-      for (int index = 0, e = strides.size() - 2; index < e; ++index) {
-        if (strides[index] != strides[index + 1] * sizes[index + 1]) {
-          isContiguous = false;
-          break;
-        }
-      }
-    }
-    // Only contiguous source tensors supported atm.
-    if (failed(successStrides) || !isContiguous)
-      return matchFailure();
-
-    auto int64Ty = LLVM::LLVMType::getInt64Ty(lowering.getDialect());
-
-    // Create descriptor.
-    auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy);
-    Type llvmTargetElementTy = desc.getElementType();
-    // Set allocated ptr.
-    Value *allocated = sourceMemRef.allocatedPtr(rewriter, loc);
-    allocated =
-        rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, allocated);
-    desc.setAllocatedPtr(rewriter, loc, allocated);
-    // Set aligned ptr.
-    Value *ptr = sourceMemRef.alignedPtr(rewriter, loc);
-    ptr = rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, ptr);
-    desc.setAlignedPtr(rewriter, loc, ptr);
-    // Fill offset 0.
-    auto attr = rewriter.getIntegerAttr(rewriter.getIndexType(), 0);
-    auto zero = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, attr);
-    desc.setOffset(rewriter, loc, zero);
-
-    // Fill size and stride descriptors in memref.
-    for (auto indexedSize : llvm::enumerate(targetMemRefType.getShape())) {
-      int64_t index = indexedSize.index();
-      auto sizeAttr =
-          rewriter.getIntegerAttr(rewriter.getIndexType(), indexedSize.value());
-      auto size = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, sizeAttr);
-      desc.setSize(rewriter, loc, index, size);
-      auto strideAttr =
-          rewriter.getIntegerAttr(rewriter.getIndexType(), strides[index]);
-      auto stride = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, strideAttr);
-      desc.setStride(rewriter, loc, index, stride);
-    }
-
-    rewriter.replaceOp(op, {desc});
-    return matchSuccess();
-  }
-};
-
-/// Populate the given list with patterns that convert from Vector to LLVM.
-void mlir::populateVectorToLLVMConversionPatterns(
-    LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
-  patterns.insert<VectorBroadcastOpConversion, VectorShuffleOpConversion,
-                  VectorExtractElementOpConversion, VectorExtractOpConversion,
-                  VectorInsertElementOpConversion, VectorInsertOpConversion,
-                  VectorOuterProductOpConversion, VectorTypeCastOpConversion>(
-      converter.getDialect()->getContext(), converter);
-}
-
-namespace {
-struct LowerVectorToLLVMPass : public ModulePass<LowerVectorToLLVMPass> {
-  void runOnModule() override;
-};
-} // namespace
-
-void LowerVectorToLLVMPass::runOnModule() {
-  // Convert to the LLVM IR dialect using the converter defined above.
-  OwningRewritePatternList patterns;
-  LLVMTypeConverter converter(&getContext());
-  populateVectorToLLVMConversionPatterns(converter, patterns);
-  populateStdToLLVMConversionPatterns(converter, patterns);
-
-  ConversionTarget target(getContext());
-  target.addLegalDialect<LLVM::LLVMDialect>();
-  target.addDynamicallyLegalOp<FuncOp>(
-      [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
-  if (failed(
-          applyPartialConversion(getModule(), target, patterns, &converter))) {
-    signalPassFailure();
-  }
-}
-
-OpPassBase<ModuleOp> *mlir::createLowerVectorToLLVMPass() {
-  return new LowerVectorToLLVMPass();
-}
-
-static PassRegistration<LowerVectorToLLVMPass>
-    pass("convert-vector-to-llvm",
-         "Lower the operations from the vector dialect into the LLVM dialect");
diff --git a/third_party/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt b/third_party/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
deleted file mode 100644
index e213bc9bcce..00000000000
--- a/third_party/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-add_llvm_library(MLIRVectorToLoops
-  ConvertVectorToLoops.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToLoops
-)
-set(LIBS
-  MLIRLLVMIR
-  MLIRTransforms
-  LLVMCore
-  LLVMSupport
-  )
-
-add_dependencies(MLIRVectorToLoops ${LIBS})
-target_link_libraries(MLIRVectorToLoops ${LIBS})
diff --git a/third_party/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp b/third_party/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
deleted file mode 100644
index d4f362d685d..00000000000
--- a/third_party/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
+++ /dev/null
@@ -1,366 +0,0 @@
-//===- VectorToLoops.cpp - Conversion from Vector to mix of Loops and Std -===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements target-dependent lowering of vector transfer operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include <type_traits>
-
-#include "mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/Types.h"
-
-using namespace mlir;
-using vector::TransferReadOp;
-using vector::TransferWriteOp;
-
-namespace {
-
-using vector_type_cast = edsc::intrinsics::ValueBuilder<vector::TypeCastOp>;
-
-/// Implements lowering of TransferReadOp and TransferWriteOp to a
-/// proper abstraction for the hardware.
-///
-/// For now, we only emit a simple loop nest that performs clipped pointwise
-/// copies from a remote to a locally allocated memory.
-///
-/// Consider the case:
-///
-/// ```mlir
-///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
-///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
-///    %f0 = constant 0.0f : f32
-///    loop.for %i0 = 0 to %0 {
-///      loop.for %i1 = 0 to %1 step %c256 {
-///        loop.for %i2 = 0 to %2 step %c32 {
-///          %v = vector.transfer_read %A[%i0, %i1, %i2], %f0
-///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
-///               memref<?x?x?xf32>, vector<32x256xf32>
-///    }}}
-/// ```
-///
-/// The rewriters construct loop and indices that access MemRef A in a pattern
-/// resembling the following (while guaranteeing an always full-tile
-/// abstraction):
-///
-/// ```mlir
-///    loop.for %d2 = 0 to %c256 {
-///      loop.for %d1 = 0 to %c32 {
-///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
-///        %tmp[%d2, %d1] = %s
-///      }
-///    }
-/// ```
-///
-/// In the current state, only a clipping transfer is implemented by `clip`,
-/// which creates individual indexing expressions of the form:
-///
-/// ```mlir-dsc
-///    auto condMax = i + ii < N;
-///    auto max = select(condMax, i + ii, N - one)
-///    auto cond = i + ii < zero;
-///    select(cond, zero, max);
-/// ```
-///
-/// In the future, clipping should not be the only way and instead we should
-/// load vectors + mask them. Similarly on the write side, load/mask/store for
-/// implementing RMW behavior.
-///
-/// Lowers TransferOp into a combination of:
-///   1. local memory allocation;
-///   2. perfect loop nest over:
-///      a. scalar load/stores from local buffers (viewed as a scalar memref);
-///      a. scalar store/load to original memref (with clipping).
-///   3. vector_load/store
-///   4. local memory deallocation.
-/// Minor variations occur depending on whether a TransferReadOp or
-/// a TransferWriteOp is rewritten.
-template <typename TransferOpTy>
-struct VectorTransferRewriter : public RewritePattern {
-  explicit VectorTransferRewriter(MLIRContext *context)
-      : RewritePattern(TransferOpTy::getOperationName(), 1, context) {}
-
-  /// Used for staging the transfer in a local scalar buffer.
-  MemRefType tmpMemRefType(TransferOpTy transfer) const {
-    auto vectorType = transfer.getVectorType();
-    return MemRefType::get(vectorType.getShape(), vectorType.getElementType(),
-                           {}, 0);
-  }
-
-  /// Performs the rewrite.
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const override;
-};
-
-/// Analyzes the `transfer` to find an access dimension along the fastest remote
-/// MemRef dimension. If such a dimension with coalescing properties is found,
-/// `pivs` and `vectorView` are swapped so that the invocation of
-/// LoopNestBuilder captures it in the innermost loop.
-template <typename TransferOpTy>
-void coalesceCopy(TransferOpTy transfer,
-                  SmallVectorImpl<edsc::ValueHandle *> *pivs,
-                  edsc::VectorView *vectorView) {
-  // rank of the remote memory access, coalescing behavior occurs on the
-  // innermost memory dimension.
-  auto remoteRank = transfer.getMemRefType().getRank();
-  // Iterate over the results expressions of the permutation map to determine
-  // the loop order for creating pointwise copies between remote and local
-  // memories.
-  int coalescedIdx = -1;
-  auto exprs = transfer.permutation_map().getResults();
-  for (auto en : llvm::enumerate(exprs)) {
-    auto dim = en.value().template dyn_cast<AffineDimExpr>();
-    if (!dim) {
-      continue;
-    }
-    auto memRefDim = dim.getPosition();
-    if (memRefDim == remoteRank - 1) {
-      // memRefDim has coalescing properties, it should be swapped in the last
-      // position.
-      assert(coalescedIdx == -1 && "Unexpected > 1 coalesced indices");
-      coalescedIdx = en.index();
-    }
-  }
-  if (coalescedIdx >= 0) {
-    std::swap(pivs->back(), (*pivs)[coalescedIdx]);
-    vectorView->swapRanges(pivs->size() - 1, coalescedIdx);
-  }
-}
-
-/// Emits remote memory accesses that are clipped to the boundaries of the
-/// MemRef.
-template <typename TransferOpTy>
-llvm::SmallVector<edsc::ValueHandle, 8> clip(TransferOpTy transfer,
-                                             edsc::MemRefView &view,
-                                             ArrayRef<edsc::IndexHandle> ivs) {
-  using namespace mlir::edsc;
-  using namespace edsc::op;
-  using edsc::intrinsics::select;
-
-  IndexHandle zero(index_t(0)), one(index_t(1));
-  llvm::SmallVector<edsc::ValueHandle, 8> memRefAccess(transfer.indices());
-  llvm::SmallVector<edsc::ValueHandle, 8> clippedScalarAccessExprs(
-      memRefAccess.size(), edsc::IndexHandle());
-
-  // Indices accessing to remote memory are clipped and their expressions are
-  // returned in clippedScalarAccessExprs.
-  for (unsigned memRefDim = 0; memRefDim < clippedScalarAccessExprs.size();
-       ++memRefDim) {
-    // Linear search on a small number of entries.
-    int loopIndex = -1;
-    auto exprs = transfer.permutation_map().getResults();
-    for (auto en : llvm::enumerate(exprs)) {
-      auto expr = en.value();
-      auto dim = expr.template dyn_cast<AffineDimExpr>();
-      // Sanity check.
-      assert(
-          (dim || expr.template cast<AffineConstantExpr>().getValue() == 0) &&
-          "Expected dim or 0 in permutationMap");
-      if (dim && memRefDim == dim.getPosition()) {
-        loopIndex = en.index();
-        break;
-      }
-    }
-
-    // We cannot distinguish atm between unrolled dimensions that implement
-    // the "always full" tile abstraction and need clipping from the other
-    // ones. So we conservatively clip everything.
-    auto N = view.ub(memRefDim);
-    auto i = memRefAccess[memRefDim];
-    if (loopIndex < 0) {
-      auto N_minus_1 = N - one;
-      auto select_1 = select(i < N, i, N_minus_1);
-      clippedScalarAccessExprs[memRefDim] = select(i < zero, zero, select_1);
-    } else {
-      auto ii = ivs[loopIndex];
-      auto i_plus_ii = i + ii;
-      auto N_minus_1 = N - one;
-      auto select_1 = select(i_plus_ii < N, i_plus_ii, N_minus_1);
-      clippedScalarAccessExprs[memRefDim] =
-          select(i_plus_ii < zero, zero, select_1);
-    }
-  }
-
-  return clippedScalarAccessExprs;
-}
-
-/// Lowers TransferReadOp into a combination of:
-///   1. local memory allocation;
-///   2. perfect loop nest over:
-///      a. scalar load from local buffers (viewed as a scalar memref);
-///      a. scalar store to original memref (with clipping).
-///   3. vector_load from local buffer (viewed as a memref<1 x vector>);
-///   4. local memory deallocation.
-///
-/// Lowers the data transfer part of a TransferReadOp while ensuring no
-/// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by
-/// clipping. This means that a given value in memory can be read multiple
-/// times and concurrently.
-///
-/// Important notes about clipping and "full-tiles only" abstraction:
-/// =================================================================
-/// When using clipping for dealing with boundary conditions, the same edge
-/// value will appear multiple times (a.k.a edge padding). This is fine if the
-/// subsequent vector operations are all data-parallel but **is generally
-/// incorrect** in the presence of reductions or extract operations.
-///
-/// More generally, clipping is a scalar abstraction that is expected to work
-/// fine as a baseline for CPUs and GPUs but not for vector_load and DMAs.
-/// To deal with real vector_load and DMAs, a "padded allocation + view"
-/// abstraction with the ability to read out-of-memref-bounds (but still within
-/// the allocated region) is necessary.
-///
-/// Whether using scalar loops or vector_load/DMAs to perform the transfer,
-/// junk values will be materialized in the vectors and generally need to be
-/// filtered out and replaced by the "neutral element". This neutral element is
-/// op-dependent so, in the future, we expect to create a vector filter and
-/// apply it to a splatted constant vector with the proper neutral element at
-/// each ssa-use. This filtering is not necessary for pure data-parallel
-/// operations.
-///
-/// In the case of vector_store/DMAs, Read-Modify-Write will be required, which
-/// also have concurrency implications. Note that by using clipped scalar stores
-/// in the presence of data-parallel only operations, we generate code that
-/// writes the same value multiple time on the edge locations.
-///
-/// TODO(ntv): implement alternatives to clipping.
-/// TODO(ntv): support non-data-parallel operations.
-
-/// Performs the rewrite.
-template <>
-PatternMatchResult VectorTransferRewriter<TransferReadOp>::matchAndRewrite(
-    Operation *op, PatternRewriter &rewriter) const {
-  using namespace mlir::edsc;
-  using namespace mlir::edsc::op;
-  using namespace mlir::edsc::intrinsics;
-  using IndexedValue =
-      TemplatedIndexedValue<intrinsics::std_load, intrinsics::std_store>;
-
-  TransferReadOp transfer = cast<TransferReadOp>(op);
-
-  // 1. Setup all the captures.
-  ScopedContext scope(rewriter, transfer.getLoc());
-  IndexedValue remote(transfer.memref());
-  MemRefView view(transfer.memref());
-  VectorView vectorView(transfer.vector());
-  SmallVector<IndexHandle, 8> ivs = makeIndexHandles(vectorView.rank());
-  SmallVector<ValueHandle *, 8> pivs =
-      makeHandlePointers(MutableArrayRef<IndexHandle>(ivs));
-  coalesceCopy(transfer, &pivs, &vectorView);
-
-  auto lbs = vectorView.getLbs();
-  auto ubs = vectorView.getUbs();
-  SmallVector<ValueHandle, 8> steps;
-  steps.reserve(vectorView.getSteps().size());
-  for (auto step : vectorView.getSteps())
-    steps.push_back(constant_index(step));
-
-  // 2. Emit alloc-copy-load-dealloc.
-  ValueHandle tmp = alloc(tmpMemRefType(transfer));
-  IndexedValue local(tmp);
-  ValueHandle vec = vector_type_cast(tmp);
-  LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
-    // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
-    local(ivs) = remote(clip(transfer, view, ivs));
-  });
-  ValueHandle vectorValue = std_load(vec);
-  (dealloc(tmp)); // vexing parse
-
-  // 3. Propagate.
-  rewriter.replaceOp(op, vectorValue.getValue());
-  return matchSuccess();
-}
-
-/// Lowers TransferWriteOp into a combination of:
-///   1. local memory allocation;
-///   2. vector_store to local buffer (viewed as a memref<1 x vector>);
-///   3. perfect loop nest over:
-///      a. scalar load from local buffers (viewed as a scalar memref);
-///      a. scalar store to original memref (with clipping).
-///   4. local memory deallocation.
-///
-/// More specifically, lowers the data transfer part while ensuring no
-/// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by
-/// clipping. This means that a given value in memory can be written to multiple
-/// times and concurrently.
-///
-/// See `Important notes about clipping and full-tiles only abstraction` in the
-/// description of `readClipped` above.
-///
-/// TODO(ntv): implement alternatives to clipping.
-/// TODO(ntv): support non-data-parallel operations.
-template <>
-PatternMatchResult VectorTransferRewriter<TransferWriteOp>::matchAndRewrite(
-    Operation *op, PatternRewriter &rewriter) const {
-  using namespace mlir::edsc;
-  using namespace mlir::edsc::op;
-  using namespace mlir::edsc::intrinsics;
-  using IndexedValue =
-      TemplatedIndexedValue<intrinsics::std_load, intrinsics::std_store>;
-
-  TransferWriteOp transfer = cast<TransferWriteOp>(op);
-
-  // 1. Setup all the captures.
-  ScopedContext scope(rewriter, transfer.getLoc());
-  IndexedValue remote(transfer.memref());
-  MemRefView view(transfer.memref());
-  ValueHandle vectorValue(transfer.vector());
-  VectorView vectorView(transfer.vector());
-  SmallVector<IndexHandle, 8> ivs = makeIndexHandles(vectorView.rank());
-  SmallVector<ValueHandle *, 8> pivs =
-      makeHandlePointers(MutableArrayRef<IndexHandle>(ivs));
-  coalesceCopy(transfer, &pivs, &vectorView);
-
-  auto lbs = vectorView.getLbs();
-  auto ubs = vectorView.getUbs();
-  SmallVector<ValueHandle, 8> steps;
-  steps.reserve(vectorView.getSteps().size());
-  for (auto step : vectorView.getSteps())
-    steps.push_back(constant_index(step));
-
-  // 2. Emit alloc-store-copy-dealloc.
-  ValueHandle tmp = alloc(tmpMemRefType(transfer));
-  IndexedValue local(tmp);
-  ValueHandle vec = vector_type_cast(tmp);
-  std_store(vectorValue, vec);
-  LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
-    // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
-    remote(clip(transfer, view, ivs)) = local(ivs);
-  });
-  (dealloc(tmp)); // vexing parse...
-
-  rewriter.eraseOp(op);
-  return matchSuccess();
-}
-} // namespace
-
-void mlir::populateVectorToAffineLoopsConversionPatterns(
-    MLIRContext *context, OwningRewritePatternList &patterns) {
-  patterns.insert<VectorTransferRewriter<vector::TransferReadOp>,
-                  VectorTransferRewriter<vector::TransferWriteOp>>(context);
-}
diff --git a/third_party/mlir/lib/Dialect/AffineOps/AffineOps.cpp b/third_party/mlir/lib/Dialect/AffineOps/AffineOps.cpp
deleted file mode 100644
index e58f6f8d6ed..00000000000
--- a/third_party/mlir/lib/Dialect/AffineOps/AffineOps.cpp
+++ /dev/null
@@ -1,2011 +0,0 @@
-//===- AffineOps.cpp - MLIR Affine Operations -----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "mlir/Transforms/SideEffectsInterface.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/Support/Debug.h"
-
-using namespace mlir;
-using llvm::dbgs;
-
-#define DEBUG_TYPE "affine-analysis"
-
-//===----------------------------------------------------------------------===//
-// AffineOpsDialect Interfaces
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This class defines the interface for handling inlining with affine
-/// operations.
-struct AffineInlinerInterface : public DialectInlinerInterface {
-  using DialectInlinerInterface::DialectInlinerInterface;
-
-  //===--------------------------------------------------------------------===//
-  // Analysis Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Returns true if the given region 'src' can be inlined into the region
-  /// 'dest' that is attached to an operation registered to the current dialect.
-  bool isLegalToInline(Region *dest, Region *src,
-                       BlockAndValueMapping &valueMapping) const final {
-    // Conservatively don't allow inlining into affine structures.
-    return false;
-  }
-
-  /// Returns true if the given operation 'op', that is registered to this
-  /// dialect, can be inlined into the given region, false otherwise.
-  bool isLegalToInline(Operation *op, Region *region,
-                       BlockAndValueMapping &valueMapping) const final {
-    // Always allow inlining affine operations into the top-level region of a
-    // function. There are some edge cases when inlining *into* affine
-    // structures, but that is handled in the other 'isLegalToInline' hook
-    // above.
-    // TODO: We should be able to inline into other regions than functions.
-    return isa<FuncOp>(region->getParentOp());
-  }
-
-  /// Affine regions should be analyzed recursively.
-  bool shouldAnalyzeRecursively(Operation *op) const final { return true; }
-};
-
-// TODO(mlir): Extend for other ops in this dialect.
-struct AffineSideEffectsInterface : public SideEffectsDialectInterface {
-  using SideEffectsDialectInterface::SideEffectsDialectInterface;
-
-  SideEffecting isSideEffecting(Operation *op) const override {
-    if (isa<AffineIfOp>(op)) {
-      return Recursive;
-    }
-    return SideEffectsDialectInterface::isSideEffecting(op);
-  };
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// AffineOpsDialect
-//===----------------------------------------------------------------------===//
-
-AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
-  addOperations<AffineApplyOp, AffineDmaStartOp, AffineDmaWaitOp, AffineLoadOp,
-                AffineStoreOp,
-#define GET_OP_LIST
-#include "mlir/Dialect/AffineOps/AffineOps.cpp.inc"
-                >();
-  addInterfaces<AffineInlinerInterface, AffineSideEffectsInterface>();
-}
-
-/// Materialize a single constant operation from a given attribute value with
-/// the desired resultant type.
-Operation *AffineOpsDialect::materializeConstant(OpBuilder &builder,
-                                                 Attribute value, Type type,
-                                                 Location loc) {
-  return builder.create<ConstantOp>(loc, type, value);
-}
-
-/// A utility function to check if a given region is attached to a function.
-static bool isFunctionRegion(Region *region) {
-  return llvm::isa<FuncOp>(region->getParentOp());
-}
-
-/// A utility function to check if a value is defined at the top level of a
-/// function. A value of index type defined at the top level is always a valid
-/// symbol.
-bool mlir::isTopLevelValue(Value *value) {
-  if (auto *arg = dyn_cast<BlockArgument>(value))
-    return isFunctionRegion(arg->getOwner()->getParent());
-  return isFunctionRegion(value->getDefiningOp()->getParentRegion());
-}
-
-// Value can be used as a dimension id if it is valid as a symbol, or
-// it is an induction variable, or it is a result of affine apply operation
-// with dimension id arguments.
-bool mlir::isValidDim(Value *value) {
-  // The value must be an index type.
-  if (!value->getType().isIndex())
-    return false;
-
-  if (auto *op = value->getDefiningOp()) {
-    // Top level operation or constant operation is ok.
-    if (isFunctionRegion(op->getParentRegion()) || isa<ConstantOp>(op))
-      return true;
-    // Affine apply operation is ok if all of its operands are ok.
-    if (auto applyOp = dyn_cast<AffineApplyOp>(op))
-      return applyOp.isValidDim();
-    // The dim op is okay if its operand memref/tensor is defined at the top
-    // level.
-    if (auto dimOp = dyn_cast<DimOp>(op))
-      return isTopLevelValue(dimOp.getOperand());
-    return false;
-  }
-  // This value is a block argument (which also includes 'affine.for' loop IVs).
-  return true;
-}
-
-/// Returns true if the 'index' dimension of the `memref` defined by
-/// `memrefDefOp` is a statically  shaped one or defined using a valid symbol.
-template <typename AnyMemRefDefOp>
-bool isMemRefSizeValidSymbol(AnyMemRefDefOp memrefDefOp, unsigned index) {
-  auto memRefType = memrefDefOp.getType();
-  // Statically shaped.
-  if (!ShapedType::isDynamic(memRefType.getDimSize(index)))
-    return true;
-  // Get the position of the dimension among dynamic dimensions;
-  unsigned dynamicDimPos = memRefType.getDynamicDimIndex(index);
-  return isValidSymbol(
-      *(memrefDefOp.getDynamicSizes().begin() + dynamicDimPos));
-}
-
-/// Returns true if the result of the dim op is a valid symbol.
-static bool isDimOpValidSymbol(DimOp dimOp) {
-  // The dim op is okay if its operand memref/tensor is defined at the top
-  // level.
-  if (isTopLevelValue(dimOp.getOperand()))
-    return true;
-
-  // The dim op is also okay if its operand memref/tensor is a view/subview
-  // whose corresponding size is a valid symbol.
-  unsigned index = dimOp.getIndex();
-  if (auto viewOp = dyn_cast<ViewOp>(dimOp.getOperand()->getDefiningOp()))
-    return isMemRefSizeValidSymbol<ViewOp>(viewOp, index);
-  if (auto subViewOp = dyn_cast<SubViewOp>(dimOp.getOperand()->getDefiningOp()))
-    return isMemRefSizeValidSymbol<SubViewOp>(subViewOp, index);
-  if (auto allocOp = dyn_cast<AllocOp>(dimOp.getOperand()->getDefiningOp()))
-    return isMemRefSizeValidSymbol<AllocOp>(allocOp, index);
-  return false;
-}
-
-// Value can be used as a symbol if it is a constant, or it is defined at
-// the top level, or it is a result of affine apply operation with symbol
-// arguments, or a result of the dim op on a memref satisfying certain
-// constraints.
-bool mlir::isValidSymbol(Value *value) {
-  // The value must be an index type.
-  if (!value->getType().isIndex())
-    return false;
-
-  if (auto *op = value->getDefiningOp()) {
-    // Top level operation or constant operation is ok.
-    if (isFunctionRegion(op->getParentRegion()) || isa<ConstantOp>(op))
-      return true;
-    // Affine apply operation is ok if all of its operands are ok.
-    if (auto applyOp = dyn_cast<AffineApplyOp>(op))
-      return applyOp.isValidSymbol();
-    if (auto dimOp = dyn_cast<DimOp>(op)) {
-      return isDimOpValidSymbol(dimOp);
-    }
-  }
-  // Otherwise, check that the value is a top level value.
-  return isTopLevelValue(value);
-}
-
-// Returns true if 'value' is a valid index to an affine operation (e.g.
-// affine.load, affine.store, affine.dma_start, affine.dma_wait).
-// Returns false otherwise.
-static bool isValidAffineIndexOperand(Value *value) {
-  return isValidDim(value) || isValidSymbol(value);
-}
-
-/// Utility function to verify that a set of operands are valid dimension and
-/// symbol identifiers. The operands should be laid out such that the dimension
-/// operands are before the symbol operands. This function returns failure if
-/// there was an invalid operand. An operation is provided to emit any necessary
-/// errors.
-template <typename OpTy>
-static LogicalResult
-verifyDimAndSymbolIdentifiers(OpTy &op, Operation::operand_range operands,
-                              unsigned numDims) {
-  unsigned opIt = 0;
-  for (auto *operand : operands) {
-    if (opIt++ < numDims) {
-      if (!isValidDim(operand))
-        return op.emitOpError("operand cannot be used as a dimension id");
-    } else if (!isValidSymbol(operand)) {
-      return op.emitOpError("operand cannot be used as a symbol");
-    }
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// AffineApplyOp
-//===----------------------------------------------------------------------===//
-
-void AffineApplyOp::build(Builder *builder, OperationState &result,
-                          AffineMap map, ValueRange operands) {
-  result.addOperands(operands);
-  result.types.append(map.getNumResults(), builder->getIndexType());
-  result.addAttribute("map", AffineMapAttr::get(map));
-}
-
-ParseResult AffineApplyOp::parse(OpAsmParser &parser, OperationState &result) {
-  auto &builder = parser.getBuilder();
-  auto indexTy = builder.getIndexType();
-
-  AffineMapAttr mapAttr;
-  unsigned numDims;
-  if (parser.parseAttribute(mapAttr, "map", result.attributes) ||
-      parseDimAndSymbolList(parser, result.operands, numDims) ||
-      parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-  auto map = mapAttr.getValue();
-
-  if (map.getNumDims() != numDims ||
-      numDims + map.getNumSymbols() != result.operands.size()) {
-    return parser.emitError(parser.getNameLoc(),
-                            "dimension or symbol index mismatch");
-  }
-
-  result.types.append(map.getNumResults(), indexTy);
-  return success();
-}
-
-void AffineApplyOp::print(OpAsmPrinter &p) {
-  p << "affine.apply " << getAttr("map");
-  printDimAndSymbolList(operand_begin(), operand_end(),
-                        getAffineMap().getNumDims(), p);
-  p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{"map"});
-}
-
-LogicalResult AffineApplyOp::verify() {
-  // Check that affine map attribute was specified.
-  auto affineMapAttr = getAttrOfType<AffineMapAttr>("map");
-  if (!affineMapAttr)
-    return emitOpError("requires an affine map");
-
-  // Check input and output dimensions match.
-  auto map = affineMapAttr.getValue();
-
-  // Verify that operand count matches affine map dimension and symbol count.
-  if (getNumOperands() != map.getNumDims() + map.getNumSymbols())
-    return emitOpError(
-        "operand count and affine map dimension and symbol count must match");
-
-  // Verify that all operands are of `index` type.
-  for (Type t : getOperandTypes()) {
-    if (!t.isIndex())
-      return emitOpError("operands must be of type 'index'");
-  }
-
-  if (!getResult()->getType().isIndex())
-    return emitOpError("result must be of type 'index'");
-
-  // Verify that the map only produces one result.
-  if (map.getNumResults() != 1)
-    return emitOpError("mapping must produce one value");
-
-  return success();
-}
-
-// The result of the affine apply operation can be used as a dimension id if it
-// is a CFG value or if it is an Value, and all the operands are valid
-// dimension ids.
-bool AffineApplyOp::isValidDim() {
-  return llvm::all_of(getOperands(),
-                      [](Value *op) { return mlir::isValidDim(op); });
-}
-
-// The result of the affine apply operation can be used as a symbol if it is
-// a CFG value or if it is an Value, and all the operands are symbols.
-bool AffineApplyOp::isValidSymbol() {
-  return llvm::all_of(getOperands(),
-                      [](Value *op) { return mlir::isValidSymbol(op); });
-}
-
-OpFoldResult AffineApplyOp::fold(ArrayRef<Attribute> operands) {
-  auto map = getAffineMap();
-
-  // Fold dims and symbols to existing values.
-  auto expr = map.getResult(0);
-  if (auto dim = expr.dyn_cast<AffineDimExpr>())
-    return getOperand(dim.getPosition());
-  if (auto sym = expr.dyn_cast<AffineSymbolExpr>())
-    return getOperand(map.getNumDims() + sym.getPosition());
-
-  // Otherwise, default to folding the map.
-  SmallVector<Attribute, 1> result;
-  if (failed(map.constantFold(operands, result)))
-    return {};
-  return result[0];
-}
-
-AffineDimExpr AffineApplyNormalizer::renumberOneDim(Value *v) {
-  DenseMap<Value *, unsigned>::iterator iterPos;
-  bool inserted = false;
-  std::tie(iterPos, inserted) =
-      dimValueToPosition.insert(std::make_pair(v, dimValueToPosition.size()));
-  if (inserted) {
-    reorderedDims.push_back(v);
-  }
-  return getAffineDimExpr(iterPos->second, v->getContext())
-      .cast<AffineDimExpr>();
-}
-
-AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) {
-  SmallVector<AffineExpr, 8> dimRemapping;
-  for (auto *v : other.reorderedDims) {
-    auto kvp = other.dimValueToPosition.find(v);
-    if (dimRemapping.size() <= kvp->second)
-      dimRemapping.resize(kvp->second + 1);
-    dimRemapping[kvp->second] = renumberOneDim(kvp->first);
-  }
-  unsigned numSymbols = concatenatedSymbols.size();
-  unsigned numOtherSymbols = other.concatenatedSymbols.size();
-  SmallVector<AffineExpr, 8> symRemapping(numOtherSymbols);
-  for (unsigned idx = 0; idx < numOtherSymbols; ++idx) {
-    symRemapping[idx] =
-        getAffineSymbolExpr(idx + numSymbols, other.affineMap.getContext());
-  }
-  concatenatedSymbols.insert(concatenatedSymbols.end(),
-                             other.concatenatedSymbols.begin(),
-                             other.concatenatedSymbols.end());
-  auto map = other.affineMap;
-  return map.replaceDimsAndSymbols(dimRemapping, symRemapping,
-                                   reorderedDims.size(),
-                                   concatenatedSymbols.size());
-}
-
-// Gather the positions of the operands that are produced by an AffineApplyOp.
-static llvm::SetVector<unsigned>
-indicesFromAffineApplyOp(ArrayRef<Value *> operands) {
-  llvm::SetVector<unsigned> res;
-  for (auto en : llvm::enumerate(operands))
-    if (isa_and_nonnull<AffineApplyOp>(en.value()->getDefiningOp()))
-      res.insert(en.index());
-  return res;
-}
-
-// Support the special case of a symbol coming from an AffineApplyOp that needs
-// to be composed into the current AffineApplyOp.
-// This case is handled by rewriting all such symbols into dims for the purpose
-// of allowing mathematical AffineMap composition.
-// Returns an AffineMap where symbols that come from an AffineApplyOp have been
-// rewritten as dims and are ordered after the original dims.
-// TODO(andydavis,ntv): This promotion makes AffineMap lose track of which
-// symbols are represented as dims. This loss is static but can still be
-// recovered dynamically (with `isValidSymbol`). Still this is annoying for the
-// semi-affine map case. A dynamic canonicalization of all dims that are valid
-// symbols (a.k.a `canonicalizePromotedSymbols`) into symbols helps and even
-// results in better simplifications and foldings. But we should evaluate
-// whether this behavior is what we really want after using more.
-static AffineMap promoteComposedSymbolsAsDims(AffineMap map,
-                                              ArrayRef<Value *> symbols) {
-  if (symbols.empty()) {
-    return map;
-  }
-
-  // Sanity check on symbols.
-  for (auto *sym : symbols) {
-    assert(isValidSymbol(sym) && "Expected only valid symbols");
-    (void)sym;
-  }
-
-  // Extract the symbol positions that come from an AffineApplyOp and
-  // needs to be rewritten as dims.
-  auto symPositions = indicesFromAffineApplyOp(symbols);
-  if (symPositions.empty()) {
-    return map;
-  }
-
-  // Create the new map by replacing each symbol at pos by the next new dim.
-  unsigned numDims = map.getNumDims();
-  unsigned numSymbols = map.getNumSymbols();
-  unsigned numNewDims = 0;
-  unsigned numNewSymbols = 0;
-  SmallVector<AffineExpr, 8> symReplacements(numSymbols);
-  for (unsigned i = 0; i < numSymbols; ++i) {
-    symReplacements[i] =
-        symPositions.count(i) > 0
-            ? getAffineDimExpr(numDims + numNewDims++, map.getContext())
-            : getAffineSymbolExpr(numNewSymbols++, map.getContext());
-  }
-  assert(numSymbols >= numNewDims);
-  AffineMap newMap = map.replaceDimsAndSymbols(
-      {}, symReplacements, numDims + numNewDims, numNewSymbols);
-
-  return newMap;
-}
-
-/// The AffineNormalizer composes AffineApplyOp recursively. Its purpose is to
-/// keep a correspondence between the mathematical `map` and the `operands` of
-/// a given AffineApplyOp. This correspondence is maintained by iterating over
-/// the operands and forming an `auxiliaryMap` that can be composed
-/// mathematically with `map`. To keep this correspondence in cases where
-/// symbols are produced by affine.apply operations, we perform a local rewrite
-/// of symbols as dims.
-///
-/// Rationale for locally rewriting symbols as dims:
-/// ================================================
-/// The mathematical composition of AffineMap must always concatenate symbols
-/// because it does not have enough information to do otherwise. For example,
-/// composing `(d0)[s0] -> (d0 + s0)` with itself must produce
-/// `(d0)[s0, s1] -> (d0 + s0 + s1)`.
-///
-/// The result is only equivalent to `(d0)[s0] -> (d0 + 2 * s0)` when
-/// applied to the same mlir::Value* for both s0 and s1.
-/// As a consequence mathematical composition of AffineMap always concatenates
-/// symbols.
-///
-/// When AffineMaps are used in AffineApplyOp however, they may specify
-/// composition via symbols, which is ambiguous mathematically. This corner case
-/// is handled by locally rewriting such symbols that come from AffineApplyOp
-/// into dims and composing through dims.
-/// TODO(andydavis, ntv): Composition via symbols comes at a significant code
-/// complexity. Alternatively we should investigate whether we want to
-/// explicitly disallow symbols coming from affine.apply and instead force the
-/// user to compose symbols beforehand. The annoyances may be small (i.e. 1 or 2
-/// extra API calls for such uses, which haven't popped up until now) and the
-/// benefit potentially big: simpler and more maintainable code for a
-/// non-trivial, recursive, procedure.
-AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
-                                             ArrayRef<Value *> operands)
-    : AffineApplyNormalizer() {
-  static_assert(kMaxAffineApplyDepth > 0, "kMaxAffineApplyDepth must be > 0");
-  assert(map.getNumInputs() == operands.size() &&
-         "number of operands does not match the number of map inputs");
-
-  LLVM_DEBUG(map.print(dbgs() << "\nInput map: "));
-
-  // Promote symbols that come from an AffineApplyOp to dims by rewriting the
-  // map to always refer to:
-  //   (dims, symbols coming from AffineApplyOp, other symbols).
-  // The order of operands can remain unchanged.
-  // This is a simplification that relies on 2 ordering properties:
-  //   1. rewritten symbols always appear after the original dims in the map;
-  //   2. operands are traversed in order and either dispatched to:
-  //      a. auxiliaryExprs (dims and symbols rewritten as dims);
-  //      b. concatenatedSymbols (all other symbols)
-  // This allows operand order to remain unchanged.
-  unsigned numDimsBeforeRewrite = map.getNumDims();
-  map = promoteComposedSymbolsAsDims(map,
-                                     operands.take_back(map.getNumSymbols()));
-
-  LLVM_DEBUG(map.print(dbgs() << "\nRewritten map: "));
-
-  SmallVector<AffineExpr, 8> auxiliaryExprs;
-  bool furtherCompose = (affineApplyDepth() <= kMaxAffineApplyDepth);
-  // We fully spell out the 2 cases below. In this particular instance a little
-  // code duplication greatly improves readability.
-  // Note that the first branch would disappear if we only supported full
-  // composition (i.e. infinite kMaxAffineApplyDepth).
-  if (!furtherCompose) {
-    // 1. Only dispatch dims or symbols.
-    for (auto en : llvm::enumerate(operands)) {
-      auto *t = en.value();
-      assert(t->getType().isIndex());
-      bool isDim = (en.index() < map.getNumDims());
-      if (isDim) {
-        // a. The mathematical composition of AffineMap composes dims.
-        auxiliaryExprs.push_back(renumberOneDim(t));
-      } else {
-        // b. The mathematical composition of AffineMap concatenates symbols.
-        //    We do the same for symbol operands.
-        concatenatedSymbols.push_back(t);
-      }
-    }
-  } else {
-    assert(numDimsBeforeRewrite <= operands.size());
-    // 2. Compose AffineApplyOps and dispatch dims or symbols.
-    for (unsigned i = 0, e = operands.size(); i < e; ++i) {
-      auto *t = operands[i];
-      auto affineApply = dyn_cast_or_null<AffineApplyOp>(t->getDefiningOp());
-      if (affineApply) {
-        // a. Compose affine.apply operations.
-        LLVM_DEBUG(affineApply.getOperation()->print(
-            dbgs() << "\nCompose AffineApplyOp recursively: "));
-        AffineMap affineApplyMap = affineApply.getAffineMap();
-        SmallVector<Value *, 8> affineApplyOperands(
-            affineApply.getOperands().begin(), affineApply.getOperands().end());
-        AffineApplyNormalizer normalizer(affineApplyMap, affineApplyOperands);
-
-        LLVM_DEBUG(normalizer.affineMap.print(
-            dbgs() << "\nRenumber into current normalizer: "));
-
-        auto renumberedMap = renumber(normalizer);
-
-        LLVM_DEBUG(
-            renumberedMap.print(dbgs() << "\nRecursive composition yields: "));
-
-        auxiliaryExprs.push_back(renumberedMap.getResult(0));
-      } else {
-        if (i < numDimsBeforeRewrite) {
-          // b. The mathematical composition of AffineMap composes dims.
-          auxiliaryExprs.push_back(renumberOneDim(t));
-        } else {
-          // c. The mathematical composition of AffineMap concatenates symbols.
-          //    We do the same for symbol operands.
-          concatenatedSymbols.push_back(t);
-        }
-      }
-    }
-  }
-
-  // Early exit if `map` is already composed.
-  if (auxiliaryExprs.empty()) {
-    affineMap = map;
-    return;
-  }
-
-  assert(concatenatedSymbols.size() >= map.getNumSymbols() &&
-         "Unexpected number of concatenated symbols");
-  auto numDims = dimValueToPosition.size();
-  auto numSymbols = concatenatedSymbols.size() - map.getNumSymbols();
-  auto auxiliaryMap = AffineMap::get(numDims, numSymbols, auxiliaryExprs);
-
-  LLVM_DEBUG(map.print(dbgs() << "\nCompose map: "));
-  LLVM_DEBUG(auxiliaryMap.print(dbgs() << "\nWith map: "));
-  LLVM_DEBUG(map.compose(auxiliaryMap).print(dbgs() << "\nResult: "));
-
-  // TODO(andydavis,ntv): Disabling simplification results in major speed gains.
-  // Another option is to cache the results as it is expected a lot of redundant
-  // work is performed in practice.
-  affineMap = simplifyAffineMap(map.compose(auxiliaryMap));
-
-  LLVM_DEBUG(affineMap.print(dbgs() << "\nSimplified result: "));
-  LLVM_DEBUG(dbgs() << "\n");
-}
-
-void AffineApplyNormalizer::normalize(AffineMap *otherMap,
-                                      SmallVectorImpl<Value *> *otherOperands) {
-  AffineApplyNormalizer other(*otherMap, *otherOperands);
-  *otherMap = renumber(other);
-
-  otherOperands->reserve(reorderedDims.size() + concatenatedSymbols.size());
-  otherOperands->assign(reorderedDims.begin(), reorderedDims.end());
-  otherOperands->append(concatenatedSymbols.begin(), concatenatedSymbols.end());
-}
-
-/// Implements `map` and `operands` composition and simplification to support
-/// `makeComposedAffineApply`. This can be called to achieve the same effects
-/// on `map` and `operands` without creating an AffineApplyOp that needs to be
-/// immediately deleted.
-static void composeAffineMapAndOperands(AffineMap *map,
-                                        SmallVectorImpl<Value *> *operands) {
-  AffineApplyNormalizer normalizer(*map, *operands);
-  auto normalizedMap = normalizer.getAffineMap();
-  auto normalizedOperands = normalizer.getOperands();
-  canonicalizeMapAndOperands(&normalizedMap, &normalizedOperands);
-  *map = normalizedMap;
-  *operands = normalizedOperands;
-  assert(*map);
-}
-
-void mlir::fullyComposeAffineMapAndOperands(
-    AffineMap *map, SmallVectorImpl<Value *> *operands) {
-  while (llvm::any_of(*operands, [](Value *v) {
-    return isa_and_nonnull<AffineApplyOp>(v->getDefiningOp());
-  })) {
-    composeAffineMapAndOperands(map, operands);
-  }
-}
-
-AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
-                                            AffineMap map,
-                                            ArrayRef<Value *> operands) {
-  AffineMap normalizedMap = map;
-  SmallVector<Value *, 8> normalizedOperands(operands.begin(), operands.end());
-  composeAffineMapAndOperands(&normalizedMap, &normalizedOperands);
-  assert(normalizedMap);
-  return b.create<AffineApplyOp>(loc, normalizedMap, normalizedOperands);
-}
-
-// A symbol may appear as a dim in affine.apply operations. This function
-// canonicalizes dims that are valid symbols into actual symbols.
-template <class MapOrSet>
-static void
-canonicalizePromotedSymbols(MapOrSet *mapOrSet,
-                            llvm::SmallVectorImpl<Value *> *operands) {
-  if (!mapOrSet || operands->empty())
-    return;
-
-  assert(mapOrSet->getNumInputs() == operands->size() &&
-         "map/set inputs must match number of operands");
-
-  auto *context = mapOrSet->getContext();
-  SmallVector<Value *, 8> resultOperands;
-  resultOperands.reserve(operands->size());
-  SmallVector<Value *, 8> remappedSymbols;
-  remappedSymbols.reserve(operands->size());
-  unsigned nextDim = 0;
-  unsigned nextSym = 0;
-  unsigned oldNumSyms = mapOrSet->getNumSymbols();
-  SmallVector<AffineExpr, 8> dimRemapping(mapOrSet->getNumDims());
-  for (unsigned i = 0, e = mapOrSet->getNumInputs(); i != e; ++i) {
-    if (i < mapOrSet->getNumDims()) {
-      if (isValidSymbol((*operands)[i])) {
-        // This is a valid symbol that appears as a dim, canonicalize it.
-        dimRemapping[i] = getAffineSymbolExpr(oldNumSyms + nextSym++, context);
-        remappedSymbols.push_back((*operands)[i]);
-      } else {
-        dimRemapping[i] = getAffineDimExpr(nextDim++, context);
-        resultOperands.push_back((*operands)[i]);
-      }
-    } else {
-      resultOperands.push_back((*operands)[i]);
-    }
-  }
-
-  resultOperands.append(remappedSymbols.begin(), remappedSymbols.end());
-  *operands = resultOperands;
-  *mapOrSet = mapOrSet->replaceDimsAndSymbols(dimRemapping, {}, nextDim,
-                                              oldNumSyms + nextSym);
-
-  assert(mapOrSet->getNumInputs() == operands->size() &&
-         "map/set inputs must match number of operands");
-}
-
-// Works for either an affine map or an integer set.
-template <class MapOrSet>
-static void
-canonicalizeMapOrSetAndOperands(MapOrSet *mapOrSet,
-                                llvm::SmallVectorImpl<Value *> *operands) {
-  static_assert(std::is_same<MapOrSet, AffineMap>::value ||
-                    std::is_same<MapOrSet, IntegerSet>::value,
-                "Argument must be either of AffineMap or IntegerSet type");
-
-  if (!mapOrSet || operands->empty())
-    return;
-
-  assert(mapOrSet->getNumInputs() == operands->size() &&
-         "map/set inputs must match number of operands");
-
-  canonicalizePromotedSymbols<MapOrSet>(mapOrSet, operands);
-
-  // Check to see what dims are used.
-  llvm::SmallBitVector usedDims(mapOrSet->getNumDims());
-  llvm::SmallBitVector usedSyms(mapOrSet->getNumSymbols());
-  mapOrSet->walkExprs([&](AffineExpr expr) {
-    if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
-      usedDims[dimExpr.getPosition()] = true;
-    else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
-      usedSyms[symExpr.getPosition()] = true;
-  });
-
-  auto *context = mapOrSet->getContext();
-
-  SmallVector<Value *, 8> resultOperands;
-  resultOperands.reserve(operands->size());
-
-  llvm::SmallDenseMap<Value *, AffineExpr, 8> seenDims;
-  SmallVector<AffineExpr, 8> dimRemapping(mapOrSet->getNumDims());
-  unsigned nextDim = 0;
-  for (unsigned i = 0, e = mapOrSet->getNumDims(); i != e; ++i) {
-    if (usedDims[i]) {
-      // Remap dim positions for duplicate operands.
-      auto it = seenDims.find((*operands)[i]);
-      if (it == seenDims.end()) {
-        dimRemapping[i] = getAffineDimExpr(nextDim++, context);
-        resultOperands.push_back((*operands)[i]);
-        seenDims.insert(std::make_pair((*operands)[i], dimRemapping[i]));
-      } else {
-        dimRemapping[i] = it->second;
-      }
-    }
-  }
-  llvm::SmallDenseMap<Value *, AffineExpr, 8> seenSymbols;
-  SmallVector<AffineExpr, 8> symRemapping(mapOrSet->getNumSymbols());
-  unsigned nextSym = 0;
-  for (unsigned i = 0, e = mapOrSet->getNumSymbols(); i != e; ++i) {
-    if (!usedSyms[i])
-      continue;
-    // Handle constant operands (only needed for symbolic operands since
-    // constant operands in dimensional positions would have already been
-    // promoted to symbolic positions above).
-    IntegerAttr operandCst;
-    if (matchPattern((*operands)[i + mapOrSet->getNumDims()],
-                     m_Constant(&operandCst))) {
-      symRemapping[i] =
-          getAffineConstantExpr(operandCst.getValue().getSExtValue(), context);
-      continue;
-    }
-    // Remap symbol positions for duplicate operands.
-    auto it = seenSymbols.find((*operands)[i + mapOrSet->getNumDims()]);
-    if (it == seenSymbols.end()) {
-      symRemapping[i] = getAffineSymbolExpr(nextSym++, context);
-      resultOperands.push_back((*operands)[i + mapOrSet->getNumDims()]);
-      seenSymbols.insert(std::make_pair((*operands)[i + mapOrSet->getNumDims()],
-                                        symRemapping[i]));
-    } else {
-      symRemapping[i] = it->second;
-    }
-  }
-  *mapOrSet = mapOrSet->replaceDimsAndSymbols(dimRemapping, symRemapping,
-                                              nextDim, nextSym);
-  *operands = resultOperands;
-}
-
-void mlir::canonicalizeMapAndOperands(
-    AffineMap *map, llvm::SmallVectorImpl<Value *> *operands) {
-  canonicalizeMapOrSetAndOperands<AffineMap>(map, operands);
-}
-
-void mlir::canonicalizeSetAndOperands(
-    IntegerSet *set, llvm::SmallVectorImpl<Value *> *operands) {
-  canonicalizeMapOrSetAndOperands<IntegerSet>(set, operands);
-}
-
-namespace {
-/// Simplify AffineApply, AffineLoad, and AffineStore operations by composing
-/// maps that supply results into them.
-///
-template <typename AffineOpTy>
-struct SimplifyAffineOp : public OpRewritePattern<AffineOpTy> {
-  using OpRewritePattern<AffineOpTy>::OpRewritePattern;
-
-  /// Replace the affine op with another instance of it with the supplied
-  /// map and mapOperands.
-  void replaceAffineOp(PatternRewriter &rewriter, AffineOpTy affineOp,
-                       AffineMap map, ArrayRef<Value *> mapOperands) const;
-
-  PatternMatchResult matchAndRewrite(AffineOpTy affineOp,
-                                     PatternRewriter &rewriter) const override {
-    static_assert(std::is_same<AffineOpTy, AffineLoadOp>::value ||
-                      std::is_same<AffineOpTy, AffineStoreOp>::value ||
-                      std::is_same<AffineOpTy, AffineApplyOp>::value,
-                  "affine load/store/apply op expected");
-    auto map = affineOp.getAffineMap();
-    AffineMap oldMap = map;
-    auto oldOperands = affineOp.getMapOperands();
-    SmallVector<Value *, 8> resultOperands(oldOperands);
-    composeAffineMapAndOperands(&map, &resultOperands);
-    if (map == oldMap && std::equal(oldOperands.begin(), oldOperands.end(),
-                                    resultOperands.begin()))
-      return this->matchFailure();
-
-    replaceAffineOp(rewriter, affineOp, map, resultOperands);
-    return this->matchSuccess();
-  }
-};
-
-// Specialize the template to account for the different build signatures for
-// affine load, store, and apply ops.
-template <>
-void SimplifyAffineOp<AffineLoadOp>::replaceAffineOp(
-    PatternRewriter &rewriter, AffineLoadOp load, AffineMap map,
-    ArrayRef<Value *> mapOperands) const {
-  rewriter.replaceOpWithNewOp<AffineLoadOp>(load, load.getMemRef(), map,
-                                            mapOperands);
-}
-template <>
-void SimplifyAffineOp<AffineStoreOp>::replaceAffineOp(
-    PatternRewriter &rewriter, AffineStoreOp store, AffineMap map,
-    ArrayRef<Value *> mapOperands) const {
-  rewriter.replaceOpWithNewOp<AffineStoreOp>(
-      store, store.getValueToStore(), store.getMemRef(), map, mapOperands);
-}
-template <>
-void SimplifyAffineOp<AffineApplyOp>::replaceAffineOp(
-    PatternRewriter &rewriter, AffineApplyOp apply, AffineMap map,
-    ArrayRef<Value *> mapOperands) const {
-  rewriter.replaceOpWithNewOp<AffineApplyOp>(apply, map, mapOperands);
-}
-} // end anonymous namespace.
-
-void AffineApplyOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<SimplifyAffineOp<AffineApplyOp>>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// Common canonicalization pattern support logic
-//===----------------------------------------------------------------------===//
-
-/// This is a common class used for patterns of the form
-/// "someop(memrefcast) -> someop".  It folds the source of any memref_cast
-/// into the root operation directly.
-static LogicalResult foldMemRefCast(Operation *op) {
-  bool folded = false;
-  for (OpOperand &operand : op->getOpOperands()) {
-    auto cast = dyn_cast_or_null<MemRefCastOp>(operand.get()->getDefiningOp());
-    if (cast && !cast.getOperand()->getType().isa<UnrankedMemRefType>()) {
-      operand.set(cast.getOperand());
-      folded = true;
-    }
-  }
-  return success(folded);
-}
-
-//===----------------------------------------------------------------------===//
-// AffineDmaStartOp
-//===----------------------------------------------------------------------===//
-
-// TODO(b/133776335) Check that map operands are loop IVs or symbols.
-void AffineDmaStartOp::build(Builder *builder, OperationState &result,
-                             Value *srcMemRef, AffineMap srcMap,
-                             ValueRange srcIndices, Value *destMemRef,
-                             AffineMap dstMap, ValueRange destIndices,
-                             Value *tagMemRef, AffineMap tagMap,
-                             ValueRange tagIndices, Value *numElements,
-                             Value *stride, Value *elementsPerStride) {
-  result.addOperands(srcMemRef);
-  result.addAttribute(getSrcMapAttrName(), AffineMapAttr::get(srcMap));
-  result.addOperands(srcIndices);
-  result.addOperands(destMemRef);
-  result.addAttribute(getDstMapAttrName(), AffineMapAttr::get(dstMap));
-  result.addOperands(destIndices);
-  result.addOperands(tagMemRef);
-  result.addAttribute(getTagMapAttrName(), AffineMapAttr::get(tagMap));
-  result.addOperands(tagIndices);
-  result.addOperands(numElements);
-  if (stride) {
-    result.addOperands({stride, elementsPerStride});
-  }
-}
-
-void AffineDmaStartOp::print(OpAsmPrinter &p) {
-  p << "affine.dma_start " << *getSrcMemRef() << '[';
-  p.printAffineMapOfSSAIds(getSrcMapAttr(), getSrcIndices());
-  p << "], " << *getDstMemRef() << '[';
-  p.printAffineMapOfSSAIds(getDstMapAttr(), getDstIndices());
-  p << "], " << *getTagMemRef() << '[';
-  p.printAffineMapOfSSAIds(getTagMapAttr(), getTagIndices());
-  p << "], " << *getNumElements();
-  if (isStrided()) {
-    p << ", " << *getStride();
-    p << ", " << *getNumElementsPerStride();
-  }
-  p << " : " << getSrcMemRefType() << ", " << getDstMemRefType() << ", "
-    << getTagMemRefType();
-}
-
-// Parse AffineDmaStartOp.
-// Ex:
-//   affine.dma_start %src[%i, %j], %dst[%k, %l], %tag[%index], %size,
-//     %stride, %num_elt_per_stride
-//       : memref<3076 x f32, 0>, memref<1024 x f32, 2>, memref<1 x i32>
-//
-ParseResult AffineDmaStartOp::parse(OpAsmParser &parser,
-                                    OperationState &result) {
-  OpAsmParser::OperandType srcMemRefInfo;
-  AffineMapAttr srcMapAttr;
-  SmallVector<OpAsmParser::OperandType, 4> srcMapOperands;
-  OpAsmParser::OperandType dstMemRefInfo;
-  AffineMapAttr dstMapAttr;
-  SmallVector<OpAsmParser::OperandType, 4> dstMapOperands;
-  OpAsmParser::OperandType tagMemRefInfo;
-  AffineMapAttr tagMapAttr;
-  SmallVector<OpAsmParser::OperandType, 4> tagMapOperands;
-  OpAsmParser::OperandType numElementsInfo;
-  SmallVector<OpAsmParser::OperandType, 2> strideInfo;
-
-  SmallVector<Type, 3> types;
-  auto indexType = parser.getBuilder().getIndexType();
-
-  // Parse and resolve the following list of operands:
-  // *) dst memref followed by its affine maps operands (in square brackets).
-  // *) src memref followed by its affine map operands (in square brackets).
-  // *) tag memref followed by its affine map operands (in square brackets).
-  // *) number of elements transferred by DMA operation.
-  if (parser.parseOperand(srcMemRefInfo) ||
-      parser.parseAffineMapOfSSAIds(srcMapOperands, srcMapAttr,
-                                    getSrcMapAttrName(), result.attributes) ||
-      parser.parseComma() || parser.parseOperand(dstMemRefInfo) ||
-      parser.parseAffineMapOfSSAIds(dstMapOperands, dstMapAttr,
-                                    getDstMapAttrName(), result.attributes) ||
-      parser.parseComma() || parser.parseOperand(tagMemRefInfo) ||
-      parser.parseAffineMapOfSSAIds(tagMapOperands, tagMapAttr,
-                                    getTagMapAttrName(), result.attributes) ||
-      parser.parseComma() || parser.parseOperand(numElementsInfo))
-    return failure();
-
-  // Parse optional stride and elements per stride.
-  if (parser.parseTrailingOperandList(strideInfo)) {
-    return failure();
-  }
-  if (!strideInfo.empty() && strideInfo.size() != 2) {
-    return parser.emitError(parser.getNameLoc(),
-                            "expected two stride related operands");
-  }
-  bool isStrided = strideInfo.size() == 2;
-
-  if (parser.parseColonTypeList(types))
-    return failure();
-
-  if (types.size() != 3)
-    return parser.emitError(parser.getNameLoc(), "expected three types");
-
-  if (parser.resolveOperand(srcMemRefInfo, types[0], result.operands) ||
-      parser.resolveOperands(srcMapOperands, indexType, result.operands) ||
-      parser.resolveOperand(dstMemRefInfo, types[1], result.operands) ||
-      parser.resolveOperands(dstMapOperands, indexType, result.operands) ||
-      parser.resolveOperand(tagMemRefInfo, types[2], result.operands) ||
-      parser.resolveOperands(tagMapOperands, indexType, result.operands) ||
-      parser.resolveOperand(numElementsInfo, indexType, result.operands))
-    return failure();
-
-  if (isStrided) {
-    if (parser.resolveOperands(strideInfo, indexType, result.operands))
-      return failure();
-  }
-
-  // Check that src/dst/tag operand counts match their map.numInputs.
-  if (srcMapOperands.size() != srcMapAttr.getValue().getNumInputs() ||
-      dstMapOperands.size() != dstMapAttr.getValue().getNumInputs() ||
-      tagMapOperands.size() != tagMapAttr.getValue().getNumInputs())
-    return parser.emitError(parser.getNameLoc(),
-                            "memref operand count not equal to map.numInputs");
-  return success();
-}
-
-LogicalResult AffineDmaStartOp::verify() {
-  if (!getOperand(getSrcMemRefOperandIndex())->getType().isa<MemRefType>())
-    return emitOpError("expected DMA source to be of memref type");
-  if (!getOperand(getDstMemRefOperandIndex())->getType().isa<MemRefType>())
-    return emitOpError("expected DMA destination to be of memref type");
-  if (!getOperand(getTagMemRefOperandIndex())->getType().isa<MemRefType>())
-    return emitOpError("expected DMA tag to be of memref type");
-
-  // DMAs from different memory spaces supported.
-  if (getSrcMemorySpace() == getDstMemorySpace()) {
-    return emitOpError("DMA should be between different memory spaces");
-  }
-  unsigned numInputsAllMaps = getSrcMap().getNumInputs() +
-                              getDstMap().getNumInputs() +
-                              getTagMap().getNumInputs();
-  if (getNumOperands() != numInputsAllMaps + 3 + 1 &&
-      getNumOperands() != numInputsAllMaps + 3 + 1 + 2) {
-    return emitOpError("incorrect number of operands");
-  }
-
-  for (auto *idx : getSrcIndices()) {
-    if (!idx->getType().isIndex())
-      return emitOpError("src index to dma_start must have 'index' type");
-    if (!isValidAffineIndexOperand(idx))
-      return emitOpError("src index must be a dimension or symbol identifier");
-  }
-  for (auto *idx : getDstIndices()) {
-    if (!idx->getType().isIndex())
-      return emitOpError("dst index to dma_start must have 'index' type");
-    if (!isValidAffineIndexOperand(idx))
-      return emitOpError("dst index must be a dimension or symbol identifier");
-  }
-  for (auto *idx : getTagIndices()) {
-    if (!idx->getType().isIndex())
-      return emitOpError("tag index to dma_start must have 'index' type");
-    if (!isValidAffineIndexOperand(idx))
-      return emitOpError("tag index must be a dimension or symbol identifier");
-  }
-  return success();
-}
-
-LogicalResult AffineDmaStartOp::fold(ArrayRef<Attribute> cstOperands,
-                                     SmallVectorImpl<OpFoldResult> &results) {
-  /// dma_start(memrefcast) -> dma_start
-  return foldMemRefCast(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// AffineDmaWaitOp
-//===----------------------------------------------------------------------===//
-
-// TODO(b/133776335) Check that map operands are loop IVs or symbols.
-void AffineDmaWaitOp::build(Builder *builder, OperationState &result,
-                            Value *tagMemRef, AffineMap tagMap,
-                            ValueRange tagIndices, Value *numElements) {
-  result.addOperands(tagMemRef);
-  result.addAttribute(getTagMapAttrName(), AffineMapAttr::get(tagMap));
-  result.addOperands(tagIndices);
-  result.addOperands(numElements);
-}
-
-void AffineDmaWaitOp::print(OpAsmPrinter &p) {
-  p << "affine.dma_wait " << *getTagMemRef() << '[';
-  SmallVector<Value *, 2> operands(getTagIndices());
-  p.printAffineMapOfSSAIds(getTagMapAttr(), operands);
-  p << "], ";
-  p.printOperand(getNumElements());
-  p << " : " << getTagMemRef()->getType();
-}
-
-// Parse AffineDmaWaitOp.
-// Eg:
-//   affine.dma_wait %tag[%index], %num_elements
-//     : memref<1 x i32, (d0) -> (d0), 4>
-//
-ParseResult AffineDmaWaitOp::parse(OpAsmParser &parser,
-                                   OperationState &result) {
-  OpAsmParser::OperandType tagMemRefInfo;
-  AffineMapAttr tagMapAttr;
-  SmallVector<OpAsmParser::OperandType, 2> tagMapOperands;
-  Type type;
-  auto indexType = parser.getBuilder().getIndexType();
-  OpAsmParser::OperandType numElementsInfo;
-
-  // Parse tag memref, its map operands, and dma size.
-  if (parser.parseOperand(tagMemRefInfo) ||
-      parser.parseAffineMapOfSSAIds(tagMapOperands, tagMapAttr,
-                                    getTagMapAttrName(), result.attributes) ||
-      parser.parseComma() || parser.parseOperand(numElementsInfo) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(tagMemRefInfo, type, result.operands) ||
-      parser.resolveOperands(tagMapOperands, indexType, result.operands) ||
-      parser.resolveOperand(numElementsInfo, indexType, result.operands))
-    return failure();
-
-  if (!type.isa<MemRefType>())
-    return parser.emitError(parser.getNameLoc(),
-                            "expected tag to be of memref type");
-
-  if (tagMapOperands.size() != tagMapAttr.getValue().getNumInputs())
-    return parser.emitError(parser.getNameLoc(),
-                            "tag memref operand count != to map.numInputs");
-  return success();
-}
-
-LogicalResult AffineDmaWaitOp::verify() {
-  if (!getOperand(0)->getType().isa<MemRefType>())
-    return emitOpError("expected DMA tag to be of memref type");
-  for (auto *idx : getTagIndices()) {
-    if (!idx->getType().isIndex())
-      return emitOpError("index to dma_wait must have 'index' type");
-    if (!isValidAffineIndexOperand(idx))
-      return emitOpError("index must be a dimension or symbol identifier");
-  }
-  return success();
-}
-
-LogicalResult AffineDmaWaitOp::fold(ArrayRef<Attribute> cstOperands,
-                                    SmallVectorImpl<OpFoldResult> &results) {
-  /// dma_wait(memrefcast) -> dma_wait
-  return foldMemRefCast(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// AffineForOp
-//===----------------------------------------------------------------------===//
-
-void AffineForOp::build(Builder *builder, OperationState &result,
-                        ValueRange lbOperands, AffineMap lbMap,
-                        ValueRange ubOperands, AffineMap ubMap, int64_t step) {
-  assert(((!lbMap && lbOperands.empty()) ||
-          lbOperands.size() == lbMap.getNumInputs()) &&
-         "lower bound operand count does not match the affine map");
-  assert(((!ubMap && ubOperands.empty()) ||
-          ubOperands.size() == ubMap.getNumInputs()) &&
-         "upper bound operand count does not match the affine map");
-  assert(step > 0 && "step has to be a positive integer constant");
-
-  // Add an attribute for the step.
-  result.addAttribute(getStepAttrName(),
-                      builder->getIntegerAttr(builder->getIndexType(), step));
-
-  // Add the lower bound.
-  result.addAttribute(getLowerBoundAttrName(), AffineMapAttr::get(lbMap));
-  result.addOperands(lbOperands);
-
-  // Add the upper bound.
-  result.addAttribute(getUpperBoundAttrName(), AffineMapAttr::get(ubMap));
-  result.addOperands(ubOperands);
-
-  // Create a region and a block for the body.  The argument of the region is
-  // the loop induction variable.
-  Region *bodyRegion = result.addRegion();
-  Block *body = new Block();
-  body->addArgument(IndexType::get(builder->getContext()));
-  bodyRegion->push_back(body);
-  ensureTerminator(*bodyRegion, *builder, result.location);
-
-  // Set the operands list as resizable so that we can freely modify the bounds.
-  result.setOperandListToResizable();
-}
-
-void AffineForOp::build(Builder *builder, OperationState &result, int64_t lb,
-                        int64_t ub, int64_t step) {
-  auto lbMap = AffineMap::getConstantMap(lb, builder->getContext());
-  auto ubMap = AffineMap::getConstantMap(ub, builder->getContext());
-  return build(builder, result, {}, lbMap, {}, ubMap, step);
-}
-
-static LogicalResult verify(AffineForOp op) {
-  // Check that the body defines as single block argument for the induction
-  // variable.
-  auto *body = op.getBody();
-  if (body->getNumArguments() != 1 ||
-      !body->getArgument(0)->getType().isIndex())
-    return op.emitOpError(
-        "expected body to have a single index argument for the "
-        "induction variable");
-
-  // Verify that there are enough operands for the bounds.
-  AffineMap lowerBoundMap = op.getLowerBoundMap(),
-            upperBoundMap = op.getUpperBoundMap();
-  if (op.getNumOperands() !=
-      (lowerBoundMap.getNumInputs() + upperBoundMap.getNumInputs()))
-    return op.emitOpError(
-        "operand count must match with affine map dimension and symbol count");
-
-  // Verify that the bound operands are valid dimension/symbols.
-  /// Lower bound.
-  if (failed(verifyDimAndSymbolIdentifiers(op, op.getLowerBoundOperands(),
-                                           op.getLowerBoundMap().getNumDims())))
-    return failure();
-  /// Upper bound.
-  if (failed(verifyDimAndSymbolIdentifiers(op, op.getUpperBoundOperands(),
-                                           op.getUpperBoundMap().getNumDims())))
-    return failure();
-  return success();
-}
-
-/// Parse a for operation loop bounds.
-static ParseResult parseBound(bool isLower, OperationState &result,
-                              OpAsmParser &p) {
-  // 'min' / 'max' prefixes are generally syntactic sugar, but are required if
-  // the map has multiple results.
-  bool failedToParsedMinMax =
-      failed(p.parseOptionalKeyword(isLower ? "max" : "min"));
-
-  auto &builder = p.getBuilder();
-  auto boundAttrName = isLower ? AffineForOp::getLowerBoundAttrName()
-                               : AffineForOp::getUpperBoundAttrName();
-
-  // Parse ssa-id as identity map.
-  SmallVector<OpAsmParser::OperandType, 1> boundOpInfos;
-  if (p.parseOperandList(boundOpInfos))
-    return failure();
-
-  if (!boundOpInfos.empty()) {
-    // Check that only one operand was parsed.
-    if (boundOpInfos.size() > 1)
-      return p.emitError(p.getNameLoc(),
-                         "expected only one loop bound operand");
-
-    // TODO: improve error message when SSA value is not of index type.
-    // Currently it is 'use of value ... expects different type than prior uses'
-    if (p.resolveOperand(boundOpInfos.front(), builder.getIndexType(),
-                         result.operands))
-      return failure();
-
-    // Create an identity map using symbol id. This representation is optimized
-    // for storage. Analysis passes may expand it into a multi-dimensional map
-    // if desired.
-    AffineMap map = builder.getSymbolIdentityMap();
-    result.addAttribute(boundAttrName, AffineMapAttr::get(map));
-    return success();
-  }
-
-  // Get the attribute location.
-  llvm::SMLoc attrLoc = p.getCurrentLocation();
-
-  Attribute boundAttr;
-  if (p.parseAttribute(boundAttr, builder.getIndexType(), boundAttrName,
-                       result.attributes))
-    return failure();
-
-  // Parse full form - affine map followed by dim and symbol list.
-  if (auto affineMapAttr = boundAttr.dyn_cast<AffineMapAttr>()) {
-    unsigned currentNumOperands = result.operands.size();
-    unsigned numDims;
-    if (parseDimAndSymbolList(p, result.operands, numDims))
-      return failure();
-
-    auto map = affineMapAttr.getValue();
-    if (map.getNumDims() != numDims)
-      return p.emitError(
-          p.getNameLoc(),
-          "dim operand count and integer set dim count must match");
-
-    unsigned numDimAndSymbolOperands =
-        result.operands.size() - currentNumOperands;
-    if (numDims + map.getNumSymbols() != numDimAndSymbolOperands)
-      return p.emitError(
-          p.getNameLoc(),
-          "symbol operand count and integer set symbol count must match");
-
-    // If the map has multiple results, make sure that we parsed the min/max
-    // prefix.
-    if (map.getNumResults() > 1 && failedToParsedMinMax) {
-      if (isLower) {
-        return p.emitError(attrLoc, "lower loop bound affine map with "
-                                    "multiple results requires 'max' prefix");
-      }
-      return p.emitError(attrLoc, "upper loop bound affine map with multiple "
-                                  "results requires 'min' prefix");
-    }
-    return success();
-  }
-
-  // Parse custom assembly form.
-  if (auto integerAttr = boundAttr.dyn_cast<IntegerAttr>()) {
-    result.attributes.pop_back();
-    result.addAttribute(
-        boundAttrName,
-        AffineMapAttr::get(builder.getConstantAffineMap(integerAttr.getInt())));
-    return success();
-  }
-
-  return p.emitError(
-      p.getNameLoc(),
-      "expected valid affine map representation for loop bounds");
-}
-
-static ParseResult parseAffineForOp(OpAsmParser &parser,
-                                    OperationState &result) {
-  auto &builder = parser.getBuilder();
-  OpAsmParser::OperandType inductionVariable;
-  // Parse the induction variable followed by '='.
-  if (parser.parseRegionArgument(inductionVariable) || parser.parseEqual())
-    return failure();
-
-  // Parse loop bounds.
-  if (parseBound(/*isLower=*/true, result, parser) ||
-      parser.parseKeyword("to", " between bounds") ||
-      parseBound(/*isLower=*/false, result, parser))
-    return failure();
-
-  // Parse the optional loop step, we default to 1 if one is not present.
-  if (parser.parseOptionalKeyword("step")) {
-    result.addAttribute(
-        AffineForOp::getStepAttrName(),
-        builder.getIntegerAttr(builder.getIndexType(), /*value=*/1));
-  } else {
-    llvm::SMLoc stepLoc = parser.getCurrentLocation();
-    IntegerAttr stepAttr;
-    if (parser.parseAttribute(stepAttr, builder.getIndexType(),
-                              AffineForOp::getStepAttrName().data(),
-                              result.attributes))
-      return failure();
-
-    if (stepAttr.getValue().getSExtValue() < 0)
-      return parser.emitError(
-          stepLoc,
-          "expected step to be representable as a positive signed integer");
-  }
-
-  // Parse the body region.
-  Region *body = result.addRegion();
-  if (parser.parseRegion(*body, inductionVariable, builder.getIndexType()))
-    return failure();
-
-  AffineForOp::ensureTerminator(*body, builder, result.location);
-
-  // Parse the optional attribute list.
-  if (parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-
-  // Set the operands list as resizable so that we can freely modify the bounds.
-  result.setOperandListToResizable();
-  return success();
-}
-
-static void printBound(AffineMapAttr boundMap,
-                       Operation::operand_range boundOperands,
-                       const char *prefix, OpAsmPrinter &p) {
-  AffineMap map = boundMap.getValue();
-
-  // Check if this bound should be printed using custom assembly form.
-  // The decision to restrict printing custom assembly form to trivial cases
-  // comes from the will to roundtrip MLIR binary -> text -> binary in a
-  // lossless way.
-  // Therefore, custom assembly form parsing and printing is only supported for
-  // zero-operand constant maps and single symbol operand identity maps.
-  if (map.getNumResults() == 1) {
-    AffineExpr expr = map.getResult(0);
-
-    // Print constant bound.
-    if (map.getNumDims() == 0 && map.getNumSymbols() == 0) {
-      if (auto constExpr = expr.dyn_cast<AffineConstantExpr>()) {
-        p << constExpr.getValue();
-        return;
-      }
-    }
-
-    // Print bound that consists of a single SSA symbol if the map is over a
-    // single symbol.
-    if (map.getNumDims() == 0 && map.getNumSymbols() == 1) {
-      if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>()) {
-        p.printOperand(*boundOperands.begin());
-        return;
-      }
-    }
-  } else {
-    // Map has multiple results. Print 'min' or 'max' prefix.
-    p << prefix << ' ';
-  }
-
-  // Print the map and its operands.
-  p << boundMap;
-  printDimAndSymbolList(boundOperands.begin(), boundOperands.end(),
-                        map.getNumDims(), p);
-}
-
-static void print(OpAsmPrinter &p, AffineForOp op) {
-  p << "affine.for ";
-  p.printOperand(op.getBody()->getArgument(0));
-  p << " = ";
-  printBound(op.getLowerBoundMapAttr(), op.getLowerBoundOperands(), "max", p);
-  p << " to ";
-  printBound(op.getUpperBoundMapAttr(), op.getUpperBoundOperands(), "min", p);
-
-  if (op.getStep() != 1)
-    p << " step " << op.getStep();
-  p.printRegion(op.region(),
-                /*printEntryBlockArgs=*/false,
-                /*printBlockTerminators=*/false);
-  p.printOptionalAttrDict(op.getAttrs(),
-                          /*elidedAttrs=*/{op.getLowerBoundAttrName(),
-                                           op.getUpperBoundAttrName(),
-                                           op.getStepAttrName()});
-}
-
-/// Fold the constant bounds of a loop.
-static LogicalResult foldLoopBounds(AffineForOp forOp) {
-  auto foldLowerOrUpperBound = [&forOp](bool lower) {
-    // Check to see if each of the operands is the result of a constant.  If
-    // so, get the value.  If not, ignore it.
-    SmallVector<Attribute, 8> operandConstants;
-    auto boundOperands =
-        lower ? forOp.getLowerBoundOperands() : forOp.getUpperBoundOperands();
-    for (auto *operand : boundOperands) {
-      Attribute operandCst;
-      matchPattern(operand, m_Constant(&operandCst));
-      operandConstants.push_back(operandCst);
-    }
-
-    AffineMap boundMap =
-        lower ? forOp.getLowerBoundMap() : forOp.getUpperBoundMap();
-    assert(boundMap.getNumResults() >= 1 &&
-           "bound maps should have at least one result");
-    SmallVector<Attribute, 4> foldedResults;
-    if (failed(boundMap.constantFold(operandConstants, foldedResults)))
-      return failure();
-
-    // Compute the max or min as applicable over the results.
-    assert(!foldedResults.empty() && "bounds should have at least one result");
-    auto maxOrMin = foldedResults[0].cast<IntegerAttr>().getValue();
-    for (unsigned i = 1, e = foldedResults.size(); i < e; i++) {
-      auto foldedResult = foldedResults[i].cast<IntegerAttr>().getValue();
-      maxOrMin = lower ? llvm::APIntOps::smax(maxOrMin, foldedResult)
-                       : llvm::APIntOps::smin(maxOrMin, foldedResult);
-    }
-    lower ? forOp.setConstantLowerBound(maxOrMin.getSExtValue())
-          : forOp.setConstantUpperBound(maxOrMin.getSExtValue());
-    return success();
-  };
-
-  // Try to fold the lower bound.
-  bool folded = false;
-  if (!forOp.hasConstantLowerBound())
-    folded |= succeeded(foldLowerOrUpperBound(/*lower=*/true));
-
-  // Try to fold the upper bound.
-  if (!forOp.hasConstantUpperBound())
-    folded |= succeeded(foldLowerOrUpperBound(/*lower=*/false));
-  return success(folded);
-}
-
-/// Canonicalize the bounds of the given loop.
-static LogicalResult canonicalizeLoopBounds(AffineForOp forOp) {
-  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
-  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
-
-  auto lbMap = forOp.getLowerBoundMap();
-  auto ubMap = forOp.getUpperBoundMap();
-  auto prevLbMap = lbMap;
-  auto prevUbMap = ubMap;
-
-  canonicalizeMapAndOperands(&lbMap, &lbOperands);
-  canonicalizeMapAndOperands(&ubMap, &ubOperands);
-
-  // Any canonicalization change always leads to updated map(s).
-  if (lbMap == prevLbMap && ubMap == prevUbMap)
-    return failure();
-
-  if (lbMap != prevLbMap)
-    forOp.setLowerBound(lbOperands, lbMap);
-  if (ubMap != prevUbMap)
-    forOp.setUpperBound(ubOperands, ubMap);
-  return success();
-}
-
-namespace {
-/// This is a pattern to fold trivially empty loops.
-struct AffineForEmptyLoopFolder : public OpRewritePattern<AffineForOp> {
-  using OpRewritePattern<AffineForOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AffineForOp forOp,
-                                     PatternRewriter &rewriter) const override {
-    // Check that the body only contains a terminator.
-    if (!has_single_element(*forOp.getBody()))
-      return matchFailure();
-    rewriter.eraseOp(forOp);
-    return matchSuccess();
-  }
-};
-} // end anonymous namespace
-
-void AffineForOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                              MLIRContext *context) {
-  results.insert<AffineForEmptyLoopFolder>(context);
-}
-
-LogicalResult AffineForOp::fold(ArrayRef<Attribute> operands,
-                                SmallVectorImpl<OpFoldResult> &results) {
-  bool folded = succeeded(foldLoopBounds(*this));
-  folded |= succeeded(canonicalizeLoopBounds(*this));
-  return success(folded);
-}
-
-AffineBound AffineForOp::getLowerBound() {
-  auto lbMap = getLowerBoundMap();
-  return AffineBound(AffineForOp(*this), 0, lbMap.getNumInputs(), lbMap);
-}
-
-AffineBound AffineForOp::getUpperBound() {
-  auto lbMap = getLowerBoundMap();
-  auto ubMap = getUpperBoundMap();
-  return AffineBound(AffineForOp(*this), lbMap.getNumInputs(), getNumOperands(),
-                     ubMap);
-}
-
-void AffineForOp::setLowerBound(ValueRange lbOperands, AffineMap map) {
-  assert(lbOperands.size() == map.getNumInputs());
-  assert(map.getNumResults() >= 1 && "bound map has at least one result");
-
-  SmallVector<Value *, 4> newOperands(lbOperands.begin(), lbOperands.end());
-
-  auto ubOperands = getUpperBoundOperands();
-  newOperands.append(ubOperands.begin(), ubOperands.end());
-  getOperation()->setOperands(newOperands);
-
-  setAttr(getLowerBoundAttrName(), AffineMapAttr::get(map));
-}
-
-void AffineForOp::setUpperBound(ValueRange ubOperands, AffineMap map) {
-  assert(ubOperands.size() == map.getNumInputs());
-  assert(map.getNumResults() >= 1 && "bound map has at least one result");
-
-  SmallVector<Value *, 4> newOperands(getLowerBoundOperands());
-  newOperands.append(ubOperands.begin(), ubOperands.end());
-  getOperation()->setOperands(newOperands);
-
-  setAttr(getUpperBoundAttrName(), AffineMapAttr::get(map));
-}
-
-void AffineForOp::setLowerBoundMap(AffineMap map) {
-  auto lbMap = getLowerBoundMap();
-  assert(lbMap.getNumDims() == map.getNumDims() &&
-         lbMap.getNumSymbols() == map.getNumSymbols());
-  assert(map.getNumResults() >= 1 && "bound map has at least one result");
-  (void)lbMap;
-  setAttr(getLowerBoundAttrName(), AffineMapAttr::get(map));
-}
-
-void AffineForOp::setUpperBoundMap(AffineMap map) {
-  auto ubMap = getUpperBoundMap();
-  assert(ubMap.getNumDims() == map.getNumDims() &&
-         ubMap.getNumSymbols() == map.getNumSymbols());
-  assert(map.getNumResults() >= 1 && "bound map has at least one result");
-  (void)ubMap;
-  setAttr(getUpperBoundAttrName(), AffineMapAttr::get(map));
-}
-
-bool AffineForOp::hasConstantLowerBound() {
-  return getLowerBoundMap().isSingleConstant();
-}
-
-bool AffineForOp::hasConstantUpperBound() {
-  return getUpperBoundMap().isSingleConstant();
-}
-
-int64_t AffineForOp::getConstantLowerBound() {
-  return getLowerBoundMap().getSingleConstantResult();
-}
-
-int64_t AffineForOp::getConstantUpperBound() {
-  return getUpperBoundMap().getSingleConstantResult();
-}
-
-void AffineForOp::setConstantLowerBound(int64_t value) {
-  setLowerBound({}, AffineMap::getConstantMap(value, getContext()));
-}
-
-void AffineForOp::setConstantUpperBound(int64_t value) {
-  setUpperBound({}, AffineMap::getConstantMap(value, getContext()));
-}
-
-AffineForOp::operand_range AffineForOp::getLowerBoundOperands() {
-  return {operand_begin(), operand_begin() + getLowerBoundMap().getNumInputs()};
-}
-
-AffineForOp::operand_range AffineForOp::getUpperBoundOperands() {
-  return {operand_begin() + getLowerBoundMap().getNumInputs(), operand_end()};
-}
-
-bool AffineForOp::matchingBoundOperandList() {
-  auto lbMap = getLowerBoundMap();
-  auto ubMap = getUpperBoundMap();
-  if (lbMap.getNumDims() != ubMap.getNumDims() ||
-      lbMap.getNumSymbols() != ubMap.getNumSymbols())
-    return false;
-
-  unsigned numOperands = lbMap.getNumInputs();
-  for (unsigned i = 0, e = lbMap.getNumInputs(); i < e; i++) {
-    // Compare Value *'s.
-    if (getOperand(i) != getOperand(numOperands + i))
-      return false;
-  }
-  return true;
-}
-
-Region &AffineForOp::getLoopBody() { return region(); }
-
-bool AffineForOp::isDefinedOutsideOfLoop(Value *value) {
-  return !region().isAncestor(value->getParentRegion());
-}
-
-LogicalResult AffineForOp::moveOutOfLoop(ArrayRef<Operation *> ops) {
-  for (auto *op : ops)
-    op->moveBefore(*this);
-  return success();
-}
-
-/// Returns if the provided value is the induction variable of a AffineForOp.
-bool mlir::isForInductionVar(Value *val) {
-  return getForInductionVarOwner(val) != AffineForOp();
-}
-
-/// Returns the loop parent of an induction variable. If the provided value is
-/// not an induction variable, then return nullptr.
-AffineForOp mlir::getForInductionVarOwner(Value *val) {
-  auto *ivArg = dyn_cast<BlockArgument>(val);
-  if (!ivArg || !ivArg->getOwner())
-    return AffineForOp();
-  auto *containingInst = ivArg->getOwner()->getParent()->getParentOp();
-  return dyn_cast<AffineForOp>(containingInst);
-}
-
-/// Extracts the induction variables from a list of AffineForOps and returns
-/// them.
-void mlir::extractForInductionVars(ArrayRef<AffineForOp> forInsts,
-                                   SmallVectorImpl<Value *> *ivs) {
-  ivs->reserve(forInsts.size());
-  for (auto forInst : forInsts)
-    ivs->push_back(forInst.getInductionVar());
-}
-
-//===----------------------------------------------------------------------===//
-// AffineIfOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(AffineIfOp op) {
-  // Verify that we have a condition attribute.
-  auto conditionAttr =
-      op.getAttrOfType<IntegerSetAttr>(op.getConditionAttrName());
-  if (!conditionAttr)
-    return op.emitOpError(
-        "requires an integer set attribute named 'condition'");
-
-  // Verify that there are enough operands for the condition.
-  IntegerSet condition = conditionAttr.getValue();
-  if (op.getNumOperands() != condition.getNumInputs())
-    return op.emitOpError(
-        "operand count and condition integer set dimension and "
-        "symbol count must match");
-
-  // Verify that the operands are valid dimension/symbols.
-  if (failed(verifyDimAndSymbolIdentifiers(
-          op, op.getOperation()->getNonSuccessorOperands(),
-          condition.getNumDims())))
-    return failure();
-
-  // Verify that the entry of each child region does not have arguments.
-  for (auto &region : op.getOperation()->getRegions()) {
-    for (auto &b : region)
-      if (b.getNumArguments() != 0)
-        return op.emitOpError(
-            "requires that child entry blocks have no arguments");
-  }
-  return success();
-}
-
-ParseResult parseAffineIfOp(OpAsmParser &parser, OperationState &result) {
-  // Parse the condition attribute set.
-  IntegerSetAttr conditionAttr;
-  unsigned numDims;
-  if (parser.parseAttribute(conditionAttr, AffineIfOp::getConditionAttrName(),
-                            result.attributes) ||
-      parseDimAndSymbolList(parser, result.operands, numDims))
-    return failure();
-
-  // Verify the condition operands.
-  auto set = conditionAttr.getValue();
-  if (set.getNumDims() != numDims)
-    return parser.emitError(
-        parser.getNameLoc(),
-        "dim operand count and integer set dim count must match");
-  if (numDims + set.getNumSymbols() != result.operands.size())
-    return parser.emitError(
-        parser.getNameLoc(),
-        "symbol operand count and integer set symbol count must match");
-
-  // Create the regions for 'then' and 'else'.  The latter must be created even
-  // if it remains empty for the validity of the operation.
-  result.regions.reserve(2);
-  Region *thenRegion = result.addRegion();
-  Region *elseRegion = result.addRegion();
-
-  // Parse the 'then' region.
-  if (parser.parseRegion(*thenRegion, {}, {}))
-    return failure();
-  AffineIfOp::ensureTerminator(*thenRegion, parser.getBuilder(),
-                               result.location);
-
-  // If we find an 'else' keyword then parse the 'else' region.
-  if (!parser.parseOptionalKeyword("else")) {
-    if (parser.parseRegion(*elseRegion, {}, {}))
-      return failure();
-    AffineIfOp::ensureTerminator(*elseRegion, parser.getBuilder(),
-                                 result.location);
-  }
-
-  // Parse the optional attribute list.
-  if (parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-
-  return success();
-}
-
-void print(OpAsmPrinter &p, AffineIfOp op) {
-  auto conditionAttr =
-      op.getAttrOfType<IntegerSetAttr>(op.getConditionAttrName());
-  p << "affine.if " << conditionAttr;
-  printDimAndSymbolList(op.operand_begin(), op.operand_end(),
-                        conditionAttr.getValue().getNumDims(), p);
-  p.printRegion(op.thenRegion(),
-                /*printEntryBlockArgs=*/false,
-                /*printBlockTerminators=*/false);
-
-  // Print the 'else' regions if it has any blocks.
-  auto &elseRegion = op.elseRegion();
-  if (!elseRegion.empty()) {
-    p << " else";
-    p.printRegion(elseRegion,
-                  /*printEntryBlockArgs=*/false,
-                  /*printBlockTerminators=*/false);
-  }
-
-  // Print the attribute list.
-  p.printOptionalAttrDict(op.getAttrs(),
-                          /*elidedAttrs=*/op.getConditionAttrName());
-}
-
-IntegerSet AffineIfOp::getIntegerSet() {
-  return getAttrOfType<IntegerSetAttr>(getConditionAttrName()).getValue();
-}
-void AffineIfOp::setIntegerSet(IntegerSet newSet) {
-  setAttr(getConditionAttrName(), IntegerSetAttr::get(newSet));
-}
-
-void AffineIfOp::setConditional(IntegerSet set, ValueRange operands) {
-  setIntegerSet(set);
-  getOperation()->setOperands(operands);
-}
-
-void AffineIfOp::build(Builder *builder, OperationState &result, IntegerSet set,
-                       ValueRange args, bool withElseRegion) {
-  result.addOperands(args);
-  result.addAttribute(getConditionAttrName(), IntegerSetAttr::get(set));
-  Region *thenRegion = result.addRegion();
-  Region *elseRegion = result.addRegion();
-  AffineIfOp::ensureTerminator(*thenRegion, *builder, result.location);
-  if (withElseRegion)
-    AffineIfOp::ensureTerminator(*elseRegion, *builder, result.location);
-}
-
-/// Canonicalize an affine if op's conditional (integer set + operands).
-LogicalResult AffineIfOp::fold(ArrayRef<Attribute>,
-                               SmallVectorImpl<OpFoldResult> &) {
-  auto set = getIntegerSet();
-  SmallVector<Value *, 4> operands(getOperands());
-  canonicalizeSetAndOperands(&set, &operands);
-
-  // Any canonicalization change always leads to either a reduction in the
-  // number of operands or a change in the number of symbolic operands
-  // (promotion of dims to symbols).
-  if (operands.size() < getIntegerSet().getNumInputs() ||
-      set.getNumSymbols() > getIntegerSet().getNumSymbols()) {
-    setConditional(set, operands);
-    return success();
-  }
-
-  return failure();
-}
-
-//===----------------------------------------------------------------------===//
-// AffineLoadOp
-//===----------------------------------------------------------------------===//
-
-void AffineLoadOp::build(Builder *builder, OperationState &result,
-                         AffineMap map, ValueRange operands) {
-  assert(operands.size() == 1 + map.getNumInputs() && "inconsistent operands");
-  result.addOperands(operands);
-  if (map)
-    result.addAttribute(getMapAttrName(), AffineMapAttr::get(map));
-  auto memrefType = operands[0]->getType().cast<MemRefType>();
-  result.types.push_back(memrefType.getElementType());
-}
-
-void AffineLoadOp::build(Builder *builder, OperationState &result,
-                         Value *memref, AffineMap map, ValueRange mapOperands) {
-  assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info");
-  result.addOperands(memref);
-  result.addOperands(mapOperands);
-  auto memrefType = memref->getType().cast<MemRefType>();
-  result.addAttribute(getMapAttrName(), AffineMapAttr::get(map));
-  result.types.push_back(memrefType.getElementType());
-}
-
-void AffineLoadOp::build(Builder *builder, OperationState &result,
-                         Value *memref, ValueRange indices) {
-  auto memrefType = memref->getType().cast<MemRefType>();
-  auto rank = memrefType.getRank();
-  // Create identity map for memrefs with at least one dimension or () -> ()
-  // for zero-dimensional memrefs.
-  auto map = rank ? builder->getMultiDimIdentityMap(rank)
-                  : builder->getEmptyAffineMap();
-  build(builder, result, memref, map, indices);
-}
-
-ParseResult AffineLoadOp::parse(OpAsmParser &parser, OperationState &result) {
-  auto &builder = parser.getBuilder();
-  auto indexTy = builder.getIndexType();
-
-  MemRefType type;
-  OpAsmParser::OperandType memrefInfo;
-  AffineMapAttr mapAttr;
-  SmallVector<OpAsmParser::OperandType, 1> mapOperands;
-  return failure(
-      parser.parseOperand(memrefInfo) ||
-      parser.parseAffineMapOfSSAIds(mapOperands, mapAttr, getMapAttrName(),
-                                    result.attributes) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(memrefInfo, type, result.operands) ||
-      parser.resolveOperands(mapOperands, indexTy, result.operands) ||
-      parser.addTypeToList(type.getElementType(), result.types));
-}
-
-void AffineLoadOp::print(OpAsmPrinter &p) {
-  p << "affine.load " << *getMemRef() << '[';
-  if (AffineMapAttr mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName()))
-    p.printAffineMapOfSSAIds(mapAttr, getMapOperands());
-  p << ']';
-  p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{getMapAttrName()});
-  p << " : " << getMemRefType();
-}
-
-LogicalResult AffineLoadOp::verify() {
-  if (getType() != getMemRefType().getElementType())
-    return emitOpError("result type must match element type of memref");
-
-  auto mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName());
-  if (mapAttr) {
-    AffineMap map = getAttrOfType<AffineMapAttr>(getMapAttrName()).getValue();
-    if (map.getNumResults() != getMemRefType().getRank())
-      return emitOpError("affine.load affine map num results must equal"
-                         " memref rank");
-    if (map.getNumInputs() != getNumOperands() - 1)
-      return emitOpError("expects as many subscripts as affine map inputs");
-  } else {
-    if (getMemRefType().getRank() != getNumOperands() - 1)
-      return emitOpError(
-          "expects the number of subscripts to be equal to memref rank");
-  }
-
-  for (auto *idx : getMapOperands()) {
-    if (!idx->getType().isIndex())
-      return emitOpError("index to load must have 'index' type");
-    if (!isValidAffineIndexOperand(idx))
-      return emitOpError("index must be a dimension or symbol identifier");
-  }
-  return success();
-}
-
-void AffineLoadOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<SimplifyAffineOp<AffineLoadOp>>(context);
-}
-
-OpFoldResult AffineLoadOp::fold(ArrayRef<Attribute> cstOperands) {
-  /// load(memrefcast) -> load
-  if (succeeded(foldMemRefCast(*this)))
-    return getResult();
-  return OpFoldResult();
-}
-
-//===----------------------------------------------------------------------===//
-// AffineStoreOp
-//===----------------------------------------------------------------------===//
-
-void AffineStoreOp::build(Builder *builder, OperationState &result,
-                          Value *valueToStore, Value *memref, AffineMap map,
-                          ValueRange mapOperands) {
-  assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info");
-  result.addOperands(valueToStore);
-  result.addOperands(memref);
-  result.addOperands(mapOperands);
-  result.addAttribute(getMapAttrName(), AffineMapAttr::get(map));
-}
-
-// Use identity map.
-void AffineStoreOp::build(Builder *builder, OperationState &result,
-                          Value *valueToStore, Value *memref,
-                          ValueRange indices) {
-  auto memrefType = memref->getType().cast<MemRefType>();
-  auto rank = memrefType.getRank();
-  // Create identity map for memrefs with at least one dimension or () -> ()
-  // for zero-dimensional memrefs.
-  auto map = rank ? builder->getMultiDimIdentityMap(rank)
-                  : builder->getEmptyAffineMap();
-  build(builder, result, valueToStore, memref, map, indices);
-}
-
-ParseResult AffineStoreOp::parse(OpAsmParser &parser, OperationState &result) {
-  auto indexTy = parser.getBuilder().getIndexType();
-
-  MemRefType type;
-  OpAsmParser::OperandType storeValueInfo;
-  OpAsmParser::OperandType memrefInfo;
-  AffineMapAttr mapAttr;
-  SmallVector<OpAsmParser::OperandType, 1> mapOperands;
-  return failure(parser.parseOperand(storeValueInfo) || parser.parseComma() ||
-                 parser.parseOperand(memrefInfo) ||
-                 parser.parseAffineMapOfSSAIds(mapOperands, mapAttr,
-                                               getMapAttrName(),
-                                               result.attributes) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.resolveOperand(storeValueInfo, type.getElementType(),
-                                       result.operands) ||
-                 parser.resolveOperand(memrefInfo, type, result.operands) ||
-                 parser.resolveOperands(mapOperands, indexTy, result.operands));
-}
-
-void AffineStoreOp::print(OpAsmPrinter &p) {
-  p << "affine.store " << *getValueToStore();
-  p << ", " << *getMemRef() << '[';
-  if (AffineMapAttr mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName()))
-    p.printAffineMapOfSSAIds(mapAttr, getMapOperands());
-  p << ']';
-  p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{getMapAttrName()});
-  p << " : " << getMemRefType();
-}
-
-LogicalResult AffineStoreOp::verify() {
-  // First operand must have same type as memref element type.
-  if (getValueToStore()->getType() != getMemRefType().getElementType())
-    return emitOpError("first operand must have same type memref element type");
-
-  auto mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName());
-  if (mapAttr) {
-    AffineMap map = mapAttr.getValue();
-    if (map.getNumResults() != getMemRefType().getRank())
-      return emitOpError("affine.store affine map num results must equal"
-                         " memref rank");
-    if (map.getNumInputs() != getNumOperands() - 2)
-      return emitOpError("expects as many subscripts as affine map inputs");
-  } else {
-    if (getMemRefType().getRank() != getNumOperands() - 2)
-      return emitOpError(
-          "expects the number of subscripts to be equal to memref rank");
-  }
-
-  for (auto *idx : getMapOperands()) {
-    if (!idx->getType().isIndex())
-      return emitOpError("index to store must have 'index' type");
-    if (!isValidAffineIndexOperand(idx))
-      return emitOpError("index must be a dimension or symbol identifier");
-  }
-  return success();
-}
-
-void AffineStoreOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<SimplifyAffineOp<AffineStoreOp>>(context);
-}
-
-LogicalResult AffineStoreOp::fold(ArrayRef<Attribute> cstOperands,
-                                  SmallVectorImpl<OpFoldResult> &results) {
-  /// store(memrefcast) -> store
-  return foldMemRefCast(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// AffineMinOp
-//===----------------------------------------------------------------------===//
-//
-//   %0 = affine.min (d0) -> (1000, d0 + 512) (%i0)
-//
-
-static ParseResult parseAffineMinOp(OpAsmParser &parser,
-                                    OperationState &result) {
-  auto &builder = parser.getBuilder();
-  auto indexType = builder.getIndexType();
-  SmallVector<OpAsmParser::OperandType, 8> dim_infos;
-  SmallVector<OpAsmParser::OperandType, 8> sym_infos;
-  AffineMapAttr mapAttr;
-  return failure(
-      parser.parseAttribute(mapAttr, AffineMinOp::getMapAttrName(),
-                            result.attributes) ||
-      parser.parseOperandList(dim_infos, OpAsmParser::Delimiter::Paren) ||
-      parser.parseOperandList(sym_infos,
-                              OpAsmParser::Delimiter::OptionalSquare) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.resolveOperands(dim_infos, indexType, result.operands) ||
-      parser.resolveOperands(sym_infos, indexType, result.operands) ||
-      parser.addTypeToList(indexType, result.types));
-}
-
-static void print(OpAsmPrinter &p, AffineMinOp op) {
-  p << op.getOperationName() << ' '
-    << op.getAttr(AffineMinOp::getMapAttrName());
-  auto operands = op.getOperands();
-  unsigned numDims = op.map().getNumDims();
-  p << '(' << operands.take_front(numDims) << ')';
-
-  if (operands.size() != numDims)
-    p << '[' << operands.drop_front(numDims) << ']';
-  p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"map"});
-}
-
-static LogicalResult verify(AffineMinOp op) {
-  // Verify that operand count matches affine map dimension and symbol count.
-  if (op.getNumOperands() != op.map().getNumDims() + op.map().getNumSymbols())
-    return op.emitOpError(
-        "operand count and affine map dimension and symbol count must match");
-  return success();
-}
-
-OpFoldResult AffineMinOp::fold(ArrayRef<Attribute> operands) {
-  // Fold the affine map.
-  // TODO(andydavis, ntv) Fold more cases: partial static information,
-  // min(some_affine, some_affine + constant, ...).
-  SmallVector<Attribute, 2> results;
-  if (failed(map().constantFold(operands, results)))
-    return {};
-
-  // Compute and return min of folded map results.
-  int64_t min = std::numeric_limits<int64_t>::max();
-  int minIndex = -1;
-  for (unsigned i = 0, e = results.size(); i < e; ++i) {
-    auto intAttr = results[i].cast<IntegerAttr>();
-    if (intAttr.getInt() < min) {
-      min = intAttr.getInt();
-      minIndex = i;
-    }
-  }
-  if (minIndex < 0)
-    return {};
-  return results[minIndex];
-}
-
-//===----------------------------------------------------------------------===//
-// TableGen'd op method definitions
-//===----------------------------------------------------------------------===//
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/AffineOps/AffineOps.cpp.inc"
diff --git a/third_party/mlir/lib/Dialect/AffineOps/CMakeLists.txt b/third_party/mlir/lib/Dialect/AffineOps/CMakeLists.txt
deleted file mode 100644
index 97d954b71fd..00000000000
--- a/third_party/mlir/lib/Dialect/AffineOps/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-add_llvm_library(MLIRAffineOps
-  AffineOps.cpp
-  DialectRegistration.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AffineOps
-  )
-add_dependencies(MLIRAffineOps
-  MLIRAffineOpsIncGen
-  MLIRIR
-  MLIRLoopLikeInterfaceIncGen
-  MLIRStandardOps)
-target_link_libraries(MLIRAffineOps MLIRIR MLIRStandardOps)
-
diff --git a/third_party/mlir/lib/Dialect/AffineOps/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/AffineOps/DialectRegistration.cpp
deleted file mode 100644
index 9197e3c619f..00000000000
--- a/third_party/mlir/lib/Dialect/AffineOps/DialectRegistration.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===- DialectRegistration.cpp - Register Affine Op dialect ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-using namespace mlir;
-
-// Static initialization for Affine op dialect registration.
-static DialectRegistration<AffineOpsDialect> StandardOps;
diff --git a/third_party/mlir/lib/Dialect/CMakeLists.txt b/third_party/mlir/lib/Dialect/CMakeLists.txt
deleted file mode 100644
index b0641a9611f..00000000000
--- a/third_party/mlir/lib/Dialect/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-add_subdirectory(AffineOps)
-add_subdirectory(FxpMathOps)
-add_subdirectory(GPU)
-add_subdirectory(Linalg)
-add_subdirectory(LLVMIR)
-add_subdirectory(LoopOps)
-add_subdirectory(QuantOps)
-add_subdirectory(SDBM)
-add_subdirectory(SPIRV)
-add_subdirectory(StandardOps)
-add_subdirectory(VectorOps)
-
-add_llvm_library(MLIRDialect
-  Traits.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect
-  )
-target_link_libraries(MLIRDialect MLIRIR)
diff --git a/third_party/mlir/lib/Dialect/FxpMathOps/CMakeLists.txt b/third_party/mlir/lib/Dialect/FxpMathOps/CMakeLists.txt
deleted file mode 100644
index 9eddc5545f5..00000000000
--- a/third_party/mlir/lib/Dialect/FxpMathOps/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-add_llvm_library(MLIRFxpMathOps
-  IR/FxpMathOps.cpp
-  IR/DialectRegistration.cpp
-  Transforms/LowerUniformRealMath.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/FxpMathOps
-  )
-add_dependencies(MLIRFxpMathOps
-                 MLIRFxpMathOpsIncGen
-                 MLIRQuantOps
-                 MLIRIR
-                 MLIRPass
-                 MLIRSupport
-                 MLIRStandardOps)
diff --git a/third_party/mlir/lib/Dialect/FxpMathOps/IR/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/FxpMathOps/IR/DialectRegistration.cpp
deleted file mode 100644
index aa6782e1464..00000000000
--- a/third_party/mlir/lib/Dialect/FxpMathOps/IR/DialectRegistration.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- DialectRegistration.cpp - Register FxpMathOps dialect --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/FxpMathOps/FxpMathOps.h"
-
-using namespace mlir;
-using namespace mlir::fxpmath;
-
-// Static initialization for the fxpmath ops dialect registration.
-static mlir::DialectRegistration<FxpMathOpsDialect> FxpMathOps;
diff --git a/third_party/mlir/lib/Dialect/FxpMathOps/IR/FxpMathOps.cpp b/third_party/mlir/lib/Dialect/FxpMathOps/IR/FxpMathOps.cpp
deleted file mode 100644
index 18c07b07117..00000000000
--- a/third_party/mlir/lib/Dialect/FxpMathOps/IR/FxpMathOps.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- FxpMathOps.cpp - Op implementation for FxpMathOps ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/FxpMathOps/FxpMathOps.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/MathExtras.h"
-
-using namespace mlir;
-using namespace mlir::fxpmath;
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/FxpMathOps/FxpMathOps.cpp.inc"
-
-FxpMathOpsDialect::FxpMathOpsDialect(MLIRContext *context)
-    : Dialect(/*name=*/"fxpmath", context) {
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/FxpMathOps/FxpMathOps.cpp.inc"
-      >();
-}
diff --git a/third_party/mlir/lib/Dialect/FxpMathOps/Transforms/LowerUniformRealMath.cpp b/third_party/mlir/lib/Dialect/FxpMathOps/Transforms/LowerUniformRealMath.cpp
deleted file mode 100644
index 3982a6a4713..00000000000
--- a/third_party/mlir/lib/Dialect/FxpMathOps/Transforms/LowerUniformRealMath.cpp
+++ /dev/null
@@ -1,402 +0,0 @@
-//===- LowerUniformRealMath.cpp  ------------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "UniformKernelUtils.h"
-
-#include "mlir/Dialect/FxpMathOps/FxpMathOps.h"
-#include "mlir/Dialect/FxpMathOps/Passes.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-using namespace mlir::fxpmath;
-using namespace mlir::fxpmath::detail;
-using namespace mlir::quant;
-
-namespace {
-
-struct LowerUniformRealMathPass
-    : public FunctionPass<LowerUniformRealMathPass> {
-  void runOnFunction() override;
-};
-
-struct LowerUniformCastsPass : public FunctionPass<LowerUniformCastsPass> {
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// Dequantize
-//===----------------------------------------------------------------------===//
-
-static Value *emitUniformPerLayerDequantize(Location loc, Value *input,
-                                            UniformQuantizedType elementType,
-                                            PatternRewriter &rewriter) {
-  // Pre-conditions.
-  if (!elementType.isSigned()) {
-    // TODO: Support unsigned storage type.
-    emitWarning(loc, "unimplemented: dequantize signed uniform");
-    return nullptr;
-  }
-
-  Type storageType = elementType.castToStorageType(input->getType());
-  Type realType = elementType.castToExpressedType(input->getType());
-  Type intermediateType =
-      castElementType(storageType, IntegerType::get(32, rewriter.getContext()));
-  assert(storageType && "cannot cast to storage type");
-  assert(realType && "cannot cast to expressed type");
-
-  // Cast to storage type.
-  input = rewriter.create<StorageCastOp>(loc, storageType, input);
-
-  // Promote to intermediate type.
-  input = rewriter.create<ConvertISOp>(loc, intermediateType, input);
-
-  // Apply zero-point offset.
-  if (elementType.getZeroPoint() != 0) {
-    Value *negZeroPointConst = rewriter.create<ConstantOp>(
-        loc, broadcastScalarConstIntValue(intermediateType,
-                                          -elementType.getZeroPoint()));
-    input = rewriter.create<AddIOp>(loc, input, negZeroPointConst);
-  }
-
-  // Convert to float.
-  input = rewriter.create<ConvertISToFOp>(loc, realType, input);
-
-  // Mul by scale.
-  Value *scaleConst = rewriter.create<ConstantOp>(
-      loc, broadcastScalarConstFloatValue(realType,
-                                          APFloat(elementType.getScale())));
-  return rewriter.create<MulFOp>(loc, input, scaleConst);
-}
-
-static Value *
-emitUniformPerAxisDequantize(Location loc, Value *input,
-                             UniformQuantizedPerAxisType elementType,
-                             PatternRewriter &rewriter) {
-  // TODO: Support per-axis dequantize.
-  rewriter.getContext()->getDiagEngine().emit(loc, DiagnosticSeverity::Warning)
-      << "unimplemented: per-axis uniform dequantization";
-  return nullptr;
-}
-
-static Value *emitDequantize(Location loc, Value *input,
-                             PatternRewriter &rewriter) {
-  Type inputType = input->getType();
-  QuantizedType qElementType =
-      QuantizedType::getQuantizedElementType(inputType);
-  if (auto uperLayerElementType =
-          qElementType.dyn_cast_or_null<UniformQuantizedType>()) {
-    return emitUniformPerLayerDequantize(loc, input, uperLayerElementType,
-                                         rewriter);
-  } else if (auto uperAxisElementType =
-                 qElementType.dyn_cast_or_null<UniformQuantizedPerAxisType>()) {
-    return emitUniformPerAxisDequantize(loc, input, uperAxisElementType,
-                                        rewriter);
-  } else {
-    return nullptr;
-  }
-}
-
-namespace {
-
-struct UniformDequantizePattern : public OpRewritePattern<DequantizeCastOp> {
-  using OpRewritePattern<DequantizeCastOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(DequantizeCastOp op,
-                                     PatternRewriter &rewriter) const override {
-    Type inputType = op.arg()->getType();
-    Type outputType = op.getResult()->getType();
-
-    QuantizedType inputElementType =
-        QuantizedType::getQuantizedElementType(inputType);
-    Type expressedOutputType = inputElementType.castToExpressedType(inputType);
-    if (expressedOutputType != outputType) {
-      // Not a valid uniform cast.
-      return matchFailure();
-    }
-
-    Value *dequantizedValue = emitDequantize(op.getLoc(), op.arg(), rewriter);
-    if (!dequantizedValue) {
-      return matchFailure();
-    }
-
-    rewriter.replaceOp(op, dequantizedValue);
-    return matchSuccess();
-  }
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// Elementwise add
-//===----------------------------------------------------------------------===//
-
-static LogicalResult
-tryRewriteAffineAddEwIsomorphicSigned(const UniformBinaryOpInfo &info,
-                                      PatternRewriter &rewriter) {
-  if (!info.resultType.isSigned() || info.lhsType != info.resultType ||
-      info.rhsType != info.resultType) {
-    return failure();
-  }
-
-  // Choose a byte aligned intermediate width big enough to perform the
-  // calculation without overflow.
-  // TODO: This should probably be made just big enough to avoid overflow and
-  // leave the downstream tooling to decide how to align that to machine
-  // word sizes.
-  unsigned intermediateWidth =
-      info.resultType.getStorageTypeIntegralWidth() <= 8 ? 16 : 32;
-  IntegerType intermediateElementType =
-      IntegerType::get(intermediateWidth, rewriter.getContext());
-  Type intermediateType =
-      castElementType(info.resultStorageType, intermediateElementType);
-
-  // Cast operands to storage type.
-  Value *lhsValue = rewriter
-                        .create<StorageCastOp>(info.op->getLoc(),
-                                               info.lhsStorageType, info.lhs)
-                        .getResult();
-  Value *rhsValue = rewriter
-                        .create<StorageCastOp>(info.op->getLoc(),
-                                               info.rhsStorageType, info.rhs)
-                        .getResult();
-
-  // Cast to the intermediate sized type.
-  lhsValue = rewriter.create<ConvertISOp>(info.op->getLoc(), intermediateType,
-                                          lhsValue);
-  rhsValue = rewriter.create<ConvertISOp>(info.op->getLoc(), intermediateType,
-                                          rhsValue);
-
-  // Add.
-  Value *resultValue =
-      rewriter.create<AddIOp>(info.op->getLoc(), lhsValue, rhsValue);
-
-  // Zero point offset adjustment.
-  // result = (lhs - zp) + (rhs - zp) + zp
-  // zpOffset = -zp
-  int zpOffset = -1 * info.resultType.getZeroPoint();
-  if (zpOffset != 0) {
-    Value *zpOffsetConst = rewriter.create<ConstantOp>(
-        info.op->getLoc(),
-        broadcastScalarConstIntValue(intermediateType, zpOffset));
-    resultValue =
-        rewriter.create<AddIOp>(info.op->getLoc(), resultValue, zpOffsetConst);
-  }
-
-  // Clamp.
-  auto clampMinMax = info.getClampMinMax(intermediateElementType);
-  resultValue = rewriter.create<ClampISOp>(
-      info.op->getLoc(), resultValue, clampMinMax.first, clampMinMax.second);
-
-  // Convert back to original type.
-  resultValue = rewriter.create<ConvertISOp>(
-      info.op->getLoc(), info.resultStorageType, resultValue);
-
-  // Cast back for new result.
-  rewriter.replaceOpWithNewOp<StorageCastOp>(
-      info.op, info.getQuantizedResultType(), resultValue);
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Elementwise mul
-//===----------------------------------------------------------------------===//
-
-static LogicalResult
-tryRewriteAffineMulEwSigned(const UniformBinaryOpInfo &info,
-                            PatternRewriter &rewriter) {
-  if (!info.resultType.isSigned()) {
-    return failure();
-  }
-
-  double outputMultiplierReal = info.lhsType.getScale() *
-                                info.rhsType.getScale() /
-                                info.resultType.getScale();
-  if (outputMultiplierReal > 1.0) {
-    info.op->emitWarning(
-        "unimplemented: cannot multiply with multiplier > 1.0");
-    return failure();
-  }
-
-  // TODO: Choose an appropriate intermediate width for muls > 8 bits to
-  // avoid overflow.
-  unsigned intermediateWidth = 32;
-  IntegerType intermediateElementType =
-      IntegerType::get(intermediateWidth, rewriter.getContext());
-  Type intermediateType =
-      castElementType(info.resultStorageType, intermediateElementType);
-
-  // Cast operands to storage type.
-  Value *lhsValue = rewriter
-                        .create<StorageCastOp>(info.op->getLoc(),
-                                               info.lhsStorageType, info.lhs)
-                        .getResult();
-  Value *rhsValue = rewriter
-                        .create<StorageCastOp>(info.op->getLoc(),
-                                               info.rhsStorageType, info.rhs)
-                        .getResult();
-
-  // Cast to the intermediate sized type.
-  lhsValue = rewriter.create<ConvertISOp>(info.op->getLoc(), intermediateType,
-                                          lhsValue);
-  rhsValue = rewriter.create<ConvertISOp>(info.op->getLoc(), intermediateType,
-                                          rhsValue);
-
-  // Apply argument zeroPoints.
-  if (info.lhsType.getZeroPoint() != 0) {
-    Value *zpOffsetConst = rewriter.create<ConstantOp>(
-        info.op->getLoc(), broadcastScalarConstIntValue(
-                               intermediateType, -info.lhsType.getZeroPoint()));
-    lhsValue =
-        rewriter.create<AddIOp>(info.op->getLoc(), lhsValue, zpOffsetConst);
-  }
-
-  if (info.rhsType.getZeroPoint() != 0) {
-    Value *zpOffsetConst = rewriter.create<ConstantOp>(
-        info.op->getLoc(), broadcastScalarConstIntValue(
-                               intermediateType, -info.rhsType.getZeroPoint()));
-    rhsValue =
-        rewriter.create<AddIOp>(info.op->getLoc(), rhsValue, zpOffsetConst);
-  }
-
-  // Mul.
-  Value *resultValue =
-      rewriter.create<MulIOp>(info.op->getLoc(), lhsValue, rhsValue);
-
-  // Scale output.
-  QuantizedMultiplierSmallerThanOneExp outputMultiplier(outputMultiplierReal);
-  resultValue = rewriter.create<VecScalarSaturatingRoundingDoublingHighMulISOp>(
-      info.op->getLoc(), resultValue,
-      IntegerAttr::get(intermediateElementType, outputMultiplier.multiplier));
-  resultValue = rewriter.create<RoundingDivideByPotISOp>(
-      info.op->getLoc(), resultValue,
-      IntegerAttr::get(intermediateElementType, -outputMultiplier.exponent));
-
-  // Zero point offset adjustment.
-  if (info.resultType.getZeroPoint() != 0) {
-    Value *zpOffsetConst = rewriter.create<ConstantOp>(
-        info.op->getLoc(),
-        broadcastScalarConstIntValue(intermediateType,
-                                     info.resultType.getZeroPoint()));
-    resultValue =
-        rewriter.create<AddIOp>(info.op->getLoc(), resultValue, zpOffsetConst);
-  }
-
-  // Clamp.
-  auto clampMinMax = info.getClampMinMax(intermediateElementType);
-  resultValue = rewriter.create<ClampISOp>(
-      info.op->getLoc(), resultValue, clampMinMax.first, clampMinMax.second);
-
-  // Convert back to original type.
-  resultValue = rewriter.create<ConvertISOp>(
-      info.op->getLoc(), info.resultStorageType, resultValue);
-
-  // Cast back for new result.
-  rewriter.replaceOpWithNewOp<StorageCastOp>(
-      info.op, info.getQuantizedResultType(), resultValue);
-
-  return success();
-}
-
-namespace {
-
-struct UniformRealAddEwPattern : public OpRewritePattern<RealAddEwOp> {
-  using OpRewritePattern<RealAddEwOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(RealAddEwOp op,
-                                     PatternRewriter &rewriter) const override {
-    const UniformBinaryOpInfo info(op, op.lhs(), op.rhs(), op.clamp_min(),
-                                   op.clamp_max());
-    if (!info.isValid()) {
-      return matchFailure();
-    }
-
-    // Try all of the permutations we support.
-    if (succeeded(tryRewriteAffineAddEwIsomorphicSigned(info, rewriter))) {
-      return matchSuccess();
-    }
-
-    return matchFailure();
-  }
-};
-
-struct UniformRealMulEwPattern : public OpRewritePattern<RealMulEwOp> {
-  using OpRewritePattern<RealMulEwOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(RealMulEwOp op,
-                                     PatternRewriter &rewriter) const override {
-    const UniformBinaryOpInfo info(op, op.lhs(), op.rhs(), op.clamp_min(),
-                                   op.clamp_max());
-    if (!info.isValid()) {
-      return matchFailure();
-    }
-
-    // Try all of the permutations we support.
-    if (succeeded(tryRewriteAffineMulEwSigned(info, rewriter))) {
-      return matchSuccess();
-    }
-
-    return matchFailure();
-  }
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// LowerUniformRealMath pass
-//===----------------------------------------------------------------------===//
-
-void LowerUniformRealMathPass::runOnFunction() {
-  auto fn = getFunction();
-  OwningRewritePatternList patterns;
-  auto *context = &getContext();
-  patterns.insert<UniformRealAddEwPattern, UniformRealMulEwPattern>(context);
-  applyPatternsGreedily(fn, patterns);
-}
-
-OpPassBase<FuncOp> *mlir::fxpmath::createLowerUniformRealMathPass() {
-  return new LowerUniformRealMathPass();
-}
-
-static PassRegistration<LowerUniformRealMathPass> lowerUniformRealMathPass(
-    "fxpmath-lower-uniform-real-math",
-    "Lowers uniform-quantized real math ops to integer arithmetic.");
-
-//===----------------------------------------------------------------------===//
-// LowerUniformCasts pass
-//===----------------------------------------------------------------------===//
-
-void LowerUniformCastsPass::runOnFunction() {
-  auto fn = getFunction();
-  OwningRewritePatternList patterns;
-  auto *context = &getContext();
-  patterns.insert<UniformDequantizePattern>(context);
-  applyPatternsGreedily(fn, patterns);
-}
-
-OpPassBase<FuncOp> *mlir::fxpmath::createLowerUniformCastsPass() {
-  return new LowerUniformCastsPass();
-}
-
-static PassRegistration<LowerUniformCastsPass>
-    lowerUniformCastsPass("fxpmath-lower-uniform-casts",
-                          "Lowers uniform-quantized casts.");
diff --git a/third_party/mlir/lib/Dialect/FxpMathOps/Transforms/UniformKernelUtils.h b/third_party/mlir/lib/Dialect/FxpMathOps/Transforms/UniformKernelUtils.h
deleted file mode 100644
index f0eeba0891a..00000000000
--- a/third_party/mlir/lib/Dialect/FxpMathOps/Transforms/UniformKernelUtils.h
+++ /dev/null
@@ -1,236 +0,0 @@
-//===- UniformKernelUtils.h - Utilities for lowering uniform math - C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_FXPMATH_UNIFORM_KERNEL_UTILS_H_
-#define MLIR_FXPMATH_UNIFORM_KERNEL_UTILS_H_
-
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/Dialect/QuantOps/UniformSupport.h"
-#include "mlir/IR/Operation.h"
-
-#include <cmath>
-
-namespace mlir {
-namespace fxpmath {
-namespace detail {
-
-inline quant::UniformQuantizedType getUniformElementType(Type t) {
-  return quant::QuantizedType::getQuantizedElementType(t)
-      .dyn_cast_or_null<quant::UniformQuantizedType>();
-}
-
-inline bool hasStorageBitWidth(quant::QuantizedType t,
-                               llvm::ArrayRef<unsigned> checkWidths) {
-  unsigned w = t.getStorageType().getIntOrFloatBitWidth();
-  for (unsigned checkWidth : checkWidths) {
-    if (w == checkWidth)
-      return true;
-  }
-  return false;
-}
-
-/// Computes the log2(x), rounded to an integral value. Returns whether 'x' can
-/// be considered an exact integral value.
-template <typename F> bool integralLog2(F x, int &log2Result) {
-  const F xLog2 = std::log(x) * (1.0 / std::log(2.0));
-  const F xLog2Rounded = std::round(xLog2);
-  const F xLog2Frac = xLog2 - xLog2Rounded;
-  log2Result = static_cast<int>(xLog2Rounded);
-  // Allow small comparison slop below the level that would make a difference
-  // for 2^16 levels.
-  return std::abs(xLog2Frac) < 1e-6;
-}
-
-/// Helper class for operating on binary operations where all operands
-/// and the result are a UniformQuantizedType.
-struct UniformBinaryOpInfo {
-  UniformBinaryOpInfo(Operation *op, Value *lhs, Value *rhs,
-                      Optional<APFloat> clampMin, Optional<APFloat> clampMax)
-      : op(op), lhs(lhs), rhs(rhs), clampMin(clampMin), clampMax(clampMax),
-        lhsType(getUniformElementType(lhs->getType())),
-        rhsType(getUniformElementType(rhs->getType())),
-        resultType(getUniformElementType(*op->result_type_begin())),
-        lhsStorageType(quant::QuantizedType::castToStorageType(lhs->getType())),
-        rhsStorageType(quant::QuantizedType::castToStorageType(rhs->getType())),
-        resultStorageType(
-            quant::QuantizedType::castToStorageType(*op->result_type_begin())) {
-  }
-
-  /// Returns whether this info is valid (all types defined, etc).
-  bool isValid() const {
-    return lhsType && rhsType && resultType && lhsStorageType &&
-           rhsStorageType && resultStorageType;
-  }
-
-  /// Gets the final quantized result type of the result.
-  Type getQuantizedResultType() const { return *op->result_type_begin(); }
-
-  /// Returns whether the storage type of all operands is identical.
-  bool isSameStorageType() const {
-    return lhsType.getStorageType() == rhsType.getStorageType() &&
-           lhsType.getStorageType() == resultType.getStorageType();
-  }
-
-  /// Returns whether all operands and result are considered fixedpoint power
-  /// of two, setting the lhs, rhs, and result log2 scale references.
-  bool isFixedPointPOT(int &lhsLog2Scale, int &rhsLog2Scale,
-                       int &resultLog2Scale) const {
-    if (!lhsType.isFixedPoint() || !rhsType.isFixedPoint() ||
-        !resultType.isFixedPoint()) {
-      return false;
-    }
-
-    if (!integralLog2(lhsType.getScale(), lhsLog2Scale) ||
-        !integralLog2(rhsType.getScale(), rhsLog2Scale) ||
-        !integralLog2(resultType.getScale(), resultLog2Scale)) {
-      return false;
-    }
-
-    return true;
-  }
-
-  /// Gets the result integer clamp range given the result quantized type
-  // and any explicit clamp provided as attributes.
-  std::pair<IntegerAttr, IntegerAttr> getClampMinMax(IntegerType ty) const {
-    int64_t typeMin = resultType.getStorageTypeMin();
-    int64_t typeMax = resultType.getStorageTypeMax();
-
-    if (clampMin || clampMax) {
-      quant::UniformQuantizedValueConverter conv(resultType);
-      if (clampMin) {
-        typeMin = std::max(typeMin, conv.quantizeFloatToInt64(*clampMin));
-      }
-      if (clampMax) {
-        typeMax = std::min(typeMax, conv.quantizeFloatToInt64(*clampMax));
-      }
-    }
-
-    // The quantized, integral ops expect clamps as 32bit ints.
-    return {
-        IntegerAttr::get(ty, typeMin),
-        IntegerAttr::get(ty, typeMax),
-    };
-  }
-
-  Operation *op;
-  Value *lhs;
-  Value *rhs;
-  Optional<APFloat> clampMin;
-  Optional<APFloat> clampMax;
-
-  // Element UniformQuantizedType for operands/result.
-  quant::UniformQuantizedType lhsType;
-  quant::UniformQuantizedType rhsType;
-  quant::UniformQuantizedType resultType;
-
-  // Full storage-based types.
-  Type lhsStorageType;
-  Type rhsStorageType;
-  Type resultStorageType;
-};
-
-/// Derives a quantized multiplier and shift from a real valued multiplier
-/// less than 1.
-struct QuantizedMultiplierSmallerThanOneExp {
-  QuantizedMultiplierSmallerThanOneExp(double realMultiplier) {
-    assert(realMultiplier < 1.0);
-    assert(realMultiplier > 0.0);
-
-    const double q = std::frexp(realMultiplier, &exponent);
-    auto qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
-    assert(qFixed <= (1ll << 31));
-    if (qFixed == (1ll << 31)) {
-      qFixed /= 2;
-      ++exponent;
-    }
-    assert(qFixed <= std::numeric_limits<int32_t>::max());
-    multiplier = static_cast<int32_t>(qFixed);
-  }
-
-  int32_t multiplier;
-  int exponent;
-};
-
-/// Casts an integer or floating point based shaped type to a new element type.
-inline Type castElementType(Type t, Type newElementType) {
-  if (auto st = t.dyn_cast<ShapedType>()) {
-    switch (st.getKind()) {
-    case StandardTypes::Kind::Vector:
-      return VectorType::get(st.getShape(), newElementType);
-    case StandardTypes::Kind::RankedTensor:
-      return RankedTensorType::get(st.getShape(), newElementType);
-    case StandardTypes::Kind::UnrankedTensor:
-      return UnrankedTensorType::get(newElementType);
-    case StandardTypes::Kind::MemRef:
-      return MemRefType::get(st.getShape(), newElementType,
-                             st.cast<MemRefType>().getAffineMaps());
-    }
-  }
-  assert(t.isIntOrFloat());
-  return newElementType;
-}
-
-/// Creates an IntegerAttr with a type that matches the shape of 't' (which can
-/// be a scalar primitive or a shaped type).
-inline Attribute broadcastScalarConstIntValue(Type t, int64_t value) {
-  if (auto st = t.dyn_cast<ShapedType>()) {
-    assert(st.getElementType().isa<IntegerType>());
-    return DenseElementsAttr::get(st,
-                                  IntegerAttr::get(st.getElementType(), value));
-  }
-
-  auto integerType = t.cast<IntegerType>();
-  assert(t.isa<IntegerType>() && "integer broadcast must be of integer type");
-  return IntegerAttr::get(integerType, value);
-}
-
-/// Given an APFloat, converts it to the float semantics that matches the
-/// given FloatType, silently ignoring inexact conversions.
-inline APFloat convertFloatToType(FloatType ft, APFloat value) {
-  bool losesInfo;
-  auto status = value.convert(ft.getFloatSemantics(),
-                              APFloat::rmNearestTiesToEven, &losesInfo);
-  (void)status; // unused in opt mode
-  assert((status & (APFloat::opDivByZero | APFloat::opInvalidOp)) == 0 &&
-         "could not convert to float const");
-  return value;
-}
-
-/// Creates a FloatAttr with a type that matches the shape of 't' (which can be
-/// a scalar primitive or a shaped type).
-inline Attribute broadcastScalarConstFloatValue(Type t, APFloat value) {
-  if (auto st = t.dyn_cast<ShapedType>()) {
-    FloatType floatElementType = st.getElementType().dyn_cast<FloatType>();
-    assert(floatElementType &&
-           "float broadcast element type must be float like");
-    APFloat apValue = convertFloatToType(floatElementType, value);
-    return DenseElementsAttr::get(st,
-                                  FloatAttr::get(st.getElementType(), apValue));
-  } else {
-    auto floatType = t.dyn_cast<FloatType>();
-    assert(floatType && "float broadcast must be of float type");
-    APFloat apValue = convertFloatToType(floatType, value);
-    return FloatAttr::get(floatType, apValue);
-  }
-}
-
-} // namespace detail
-} // namespace fxpmath
-} // namespace mlir
-
-#endif // MLIR_FXPMATH_UNIFORM_KERNEL_UTILS_H_
diff --git a/third_party/mlir/lib/Dialect/GPU/CMakeLists.txt b/third_party/mlir/lib/Dialect/GPU/CMakeLists.txt
deleted file mode 100644
index 6fe45ba49ef..00000000000
--- a/third_party/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_llvm_library(MLIRGPU
-  IR/GPUDialect.cpp
-  IR/DialectRegistration.cpp
-  Transforms/KernelOutlining.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
-)
-add_dependencies(MLIRGPU MLIRGPUOpsIncGen MLIRIR MLIRLLVMIR LLVMSupport)
-target_link_libraries(MLIRGPU MLIRIR MLIRLLVMIR MLIRStandardOps LLVMSupport)
diff --git a/third_party/mlir/lib/Dialect/GPU/IR/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/GPU/IR/DialectRegistration.cpp
deleted file mode 100644
index af50d0270cf..00000000000
--- a/third_party/mlir/lib/Dialect/GPU/IR/DialectRegistration.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===- DialectRegistration.cpp - MLIR GPU dialect registration ------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/GPU/GPUDialect.h"
-
-// Static initialization for GPU dialect registration.
-static mlir::DialectRegistration<mlir::gpu::GPUDialect> kernelDialect;
diff --git a/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
deleted file mode 100644
index 1f48d6d47e4..00000000000
--- a/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ /dev/null
@@ -1,758 +0,0 @@
-//===- GPUDialect.cpp - MLIR Dialect for GPU Kernels implementation -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the GPU kernel-related dialect and its operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/FunctionImplementation.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-
-using namespace mlir;
-using namespace mlir::gpu;
-
-//===----------------------------------------------------------------------===//
-// GPUDialect
-//===----------------------------------------------------------------------===//
-
-StringRef GPUDialect::getDialectName() { return "gpu"; }
-
-bool GPUDialect::isKernel(Operation *op) {
-  UnitAttr isKernelAttr = op->getAttrOfType<UnitAttr>(getKernelFuncAttrName());
-  return static_cast<bool>(isKernelAttr);
-}
-
-GPUDialect::GPUDialect(MLIRContext *context)
-    : Dialect(getDialectName(), context) {
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
-      >();
-}
-
-LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
-                                                   NamedAttribute attr) {
-  if (!attr.second.isa<UnitAttr>() ||
-      !attr.first.is(getContainerModuleAttrName()))
-    return success();
-
-  auto module = dyn_cast<ModuleOp>(op);
-  if (!module)
-    return op->emitError("expected '")
-           << getContainerModuleAttrName() << "' attribute to be attached to '"
-           << ModuleOp::getOperationName() << '\'';
-
-  auto walkResult = module.walk([&module](LaunchFuncOp launchOp) -> WalkResult {
-    // Ignore launches that are nested more or less deep than functions in the
-    // module we are currently checking.
-    if (!launchOp.getParentOp() ||
-        launchOp.getParentOp()->getParentOp() != module)
-      return success();
-
-    // Ignore launch ops with missing attributes here. The errors will be
-    // reported by the verifiers of those ops.
-    if (!launchOp.getAttrOfType<StringAttr>(
-            LaunchFuncOp::getKernelAttrName()) ||
-        !launchOp.getAttrOfType<SymbolRefAttr>(
-            LaunchFuncOp::getKernelModuleAttrName()))
-      return success();
-
-    // Check that `launch_func` refers to a well-formed GPU kernel module.
-    StringRef kernelModuleName = launchOp.getKernelModuleName();
-    auto kernelModule = module.lookupSymbol<ModuleOp>(kernelModuleName);
-    if (!kernelModule)
-      return launchOp.emitOpError()
-             << "kernel module '" << kernelModuleName << "' is undefined";
-    if (!kernelModule.getAttrOfType<UnitAttr>(
-            GPUDialect::getKernelModuleAttrName()))
-      return launchOp.emitOpError("module '")
-             << kernelModuleName << "' is missing the '"
-             << GPUDialect::getKernelModuleAttrName() << "' attribute";
-
-    // Check that `launch_func` refers to a well-formed kernel function.
-    StringRef kernelName = launchOp.kernel();
-    Operation *kernelFunc = kernelModule.lookupSymbol(kernelName);
-    auto kernelStdFunction = dyn_cast_or_null<::mlir::FuncOp>(kernelFunc);
-    auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
-    if (!kernelStdFunction && !kernelLLVMFunction)
-      return launchOp.emitOpError("kernel function '")
-             << kernelName << "' is undefined";
-    if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
-            GPUDialect::getKernelFuncAttrName()))
-      return launchOp.emitOpError("kernel function is missing the '")
-             << GPUDialect::getKernelFuncAttrName() << "' attribute";
-
-    unsigned actualNumArguments = launchOp.getNumKernelOperands();
-    unsigned expectedNumArguments = kernelLLVMFunction
-                                        ? kernelLLVMFunction.getNumArguments()
-                                        : kernelStdFunction.getNumArguments();
-    if (expectedNumArguments != actualNumArguments)
-      return launchOp.emitOpError("got ")
-             << actualNumArguments << " kernel operands but expected "
-             << expectedNumArguments;
-
-    // Due to the ordering of the current impl of lowering and LLVMLowering,
-    // type checks need to be temporarily disabled.
-    // TODO(ntv,zinenko,herhut): reactivate checks once "changing gpu.launchFunc
-    // to encode target module" has landed.
-    // auto functionType = kernelFunc.getType();
-    // for (unsigned i = 0; i < numKernelFuncArgs; ++i) {
-    //   if (getKernelOperand(i)->getType() != functionType.getInput(i)) {
-    //     return emitOpError("type of function argument ")
-    //            << i << " does not match";
-    //   }
-    // }
-
-    return success();
-  });
-
-  return walkResult.wasInterrupted() ? failure() : success();
-}
-
-template <typename T> static LogicalResult verifyIndexOp(T op) {
-  auto dimension = op.dimension();
-  if (dimension != "x" && dimension != "y" && dimension != "z")
-    return op.emitError("dimension \"") << dimension << "\" is invalid";
-  return success();
-}
-
-static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) {
-  if (allReduce.body().empty() != allReduce.op().hasValue())
-    return allReduce.emitError(
-        "expected either an op attribute or a non-empty body");
-  if (!allReduce.body().empty()) {
-    if (allReduce.body().front().getNumArguments() != 2)
-      return allReduce.emitError("expected two region arguments");
-    for (auto *argument : allReduce.body().front().getArguments()) {
-      if (argument->getType() != allReduce.getType())
-        return allReduce.emitError("incorrect region argument type");
-    }
-    unsigned yieldCount = 0;
-    for (Block &block : allReduce.body()) {
-      if (auto yield = dyn_cast<gpu::YieldOp>(block.getTerminator())) {
-        if (yield.getNumOperands() != 1)
-          return allReduce.emitError("expected one gpu.yield operand");
-        if (yield.getOperand(0)->getType() != allReduce.getType())
-          return allReduce.emitError("incorrect gpu.yield type");
-        ++yieldCount;
-      }
-    }
-    if (yieldCount == 0)
-      return allReduce.emitError("expected gpu.yield op in region");
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// LaunchOp
-//===----------------------------------------------------------------------===//
-
-static SmallVector<Type, 4> getValueTypes(ValueRange values) {
-  SmallVector<Type, 4> types;
-  types.reserve(values.size());
-  for (Value *v : values)
-    types.push_back(v->getType());
-  return types;
-}
-
-void LaunchOp::build(Builder *builder, OperationState &result, Value *gridSizeX,
-                     Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
-                     Value *blockSizeY, Value *blockSizeZ,
-                     ValueRange operands) {
-  // Add grid and block sizes as op operands, followed by the data operands.
-  result.addOperands(
-      {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
-  result.addOperands(operands);
-
-  // Create a kernel body region with kNumConfigRegionAttributes + N arguments,
-  // where the first kNumConfigRegionAttributes arguments have `index` type and
-  // the rest have the same types as the data operands.
-  Region *kernelRegion = result.addRegion();
-  Block *body = new Block();
-  body->addArguments(
-      std::vector<Type>(kNumConfigRegionAttributes, builder->getIndexType()));
-  body->addArguments(getValueTypes(operands));
-  kernelRegion->push_back(body);
-}
-
-KernelDim3 LaunchOp::getBlockIds() {
-  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = body().getBlocks().front().getArguments();
-  return KernelDim3{args[0], args[1], args[2]};
-}
-
-KernelDim3 LaunchOp::getThreadIds() {
-  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = body().getBlocks().front().getArguments();
-  return KernelDim3{args[3], args[4], args[5]};
-}
-
-KernelDim3 LaunchOp::getGridSize() {
-  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = body().getBlocks().front().getArguments();
-  return KernelDim3{args[6], args[7], args[8]};
-}
-
-KernelDim3 LaunchOp::getBlockSize() {
-  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = body().getBlocks().front().getArguments();
-  return KernelDim3{args[9], args[10], args[11]};
-}
-
-LaunchOp::operand_range LaunchOp::getKernelOperandValues() {
-  return llvm::drop_begin(getOperands(), kNumConfigOperands);
-}
-
-LaunchOp::operand_type_range LaunchOp::getKernelOperandTypes() {
-  return llvm::drop_begin(getOperandTypes(), kNumConfigOperands);
-}
-
-KernelDim3 LaunchOp::getGridSizeOperandValues() {
-  return KernelDim3{getOperand(0), getOperand(1), getOperand(2)};
-}
-
-KernelDim3 LaunchOp::getBlockSizeOperandValues() {
-  return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
-}
-
-llvm::iterator_range<Block::args_iterator> LaunchOp::getKernelArguments() {
-  auto args = body().getBlocks().front().getArguments();
-  return llvm::drop_begin(args, LaunchOp::kNumConfigRegionAttributes);
-}
-
-LogicalResult verify(LaunchOp op) {
-  // Kernel launch takes kNumConfigOperands leading operands for grid/block
-  // sizes and transforms them into kNumConfigRegionAttributes region arguments
-  // for block/thread identifiers and grid/block sizes.
-  if (!op.body().empty()) {
-    Block &entryBlock = op.body().front();
-    if (entryBlock.getNumArguments() !=
-        LaunchOp::kNumConfigOperands + op.getNumOperands())
-      return op.emitOpError("unexpected number of region arguments");
-  }
-
-  // Block terminators without successors are expected to exit the kernel region
-  // and must be `gpu.launch`.
-  for (Block &block : op.body()) {
-    if (block.empty())
-      continue;
-    if (block.back().getNumSuccessors() != 0)
-      continue;
-    if (!isa<gpu::ReturnOp>(&block.back())) {
-      return block.back()
-                 .emitError("expected 'gpu.terminator' or a terminator with "
-                            "successors")
-                 .attachNote(op.getLoc())
-             << "in '" << LaunchOp::getOperationName() << "' body region";
-    }
-  }
-
-  return success();
-}
-
-// Pretty-print the kernel grid/block size assignment as
-//   (%iter-x, %iter-y, %iter-z) in
-//   (%size-x = %ssa-use, %size-y = %ssa-use, %size-z = %ssa-use)
-// where %size-* and %iter-* will correspond to the body region arguments.
-static void printSizeAssignment(OpAsmPrinter &p, KernelDim3 size,
-                                ValueRange operands, KernelDim3 ids) {
-  p << '(' << *ids.x << ", " << *ids.y << ", " << *ids.z << ") in (";
-  p << *size.x << " = " << *operands[0] << ", ";
-  p << *size.y << " = " << *operands[1] << ", ";
-  p << *size.z << " = " << *operands[2] << ')';
-}
-
-void printLaunchOp(OpAsmPrinter &p, LaunchOp op) {
-  ValueRange operands = op.getOperands();
-
-  // Print the launch configuration.
-  p << LaunchOp::getOperationName() << ' ' << op.getBlocksKeyword();
-  printSizeAssignment(p, op.getGridSize(), operands.take_front(3),
-                      op.getBlockIds());
-  p << ' ' << op.getThreadsKeyword();
-  printSizeAssignment(p, op.getBlockSize(), operands.slice(3, 3),
-                      op.getThreadIds());
-
-  // From now on, the first kNumConfigOperands operands corresponding to grid
-  // and block sizes are irrelevant, so we can drop them.
-  operands = operands.drop_front(LaunchOp::kNumConfigOperands);
-
-  // Print the data argument remapping.
-  if (!op.body().empty() && !operands.empty()) {
-    p << ' ' << op.getArgsKeyword() << '(';
-    Block *entryBlock = &op.body().front();
-    interleaveComma(llvm::seq<int>(0, operands.size()), p, [&](int i) {
-      p << *entryBlock->getArgument(LaunchOp::kNumConfigRegionAttributes + i)
-        << " = " << *operands[i];
-    });
-    p << ") ";
-  }
-
-  // Print the types of data arguments.
-  if (!operands.empty())
-    p << ": " << operands.getTypes();
-
-  p.printRegion(op.body(), /*printEntryBlockArgs=*/false);
-  p.printOptionalAttrDict(op.getAttrs());
-}
-
-// Parse the size assignment blocks for blocks and threads.  These have the form
-//   (%region_arg, %region_arg, %region_arg) in
-//   (%region_arg = %operand, %region_arg = %operand, %region_arg = %operand)
-// where %region_arg are percent-identifiers for the region arguments to be
-// introduced further (SSA defs), and %operand are percent-identifiers for the
-// SSA value uses.
-static ParseResult
-parseSizeAssignment(OpAsmParser &parser,
-                    MutableArrayRef<OpAsmParser::OperandType> sizes,
-                    MutableArrayRef<OpAsmParser::OperandType> regionSizes,
-                    MutableArrayRef<OpAsmParser::OperandType> indices) {
-  assert(indices.size() == 3 && "space for three indices expected");
-  SmallVector<OpAsmParser::OperandType, 3> args;
-  if (parser.parseRegionArgumentList(args, /*requiredOperandCount=*/3,
-                                     OpAsmParser::Delimiter::Paren) ||
-      parser.parseKeyword("in") || parser.parseLParen())
-    return failure();
-  std::move(args.begin(), args.end(), indices.begin());
-
-  for (int i = 0; i < 3; ++i) {
-    if (i != 0 && parser.parseComma())
-      return failure();
-    if (parser.parseRegionArgument(regionSizes[i]) || parser.parseEqual() ||
-        parser.parseOperand(sizes[i]))
-      return failure();
-  }
-
-  return parser.parseRParen();
-}
-
-// Parses a Launch operation.
-// operation ::= `gpu.launch` `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
-//                           `threads` `(` ssa-id-list `)` `in` ssa-reassignment
-//                             (`args` ssa-reassignment `:` type-list)?
-//                             region attr-dict?
-// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
-ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) {
-  // Sizes of the grid and block.
-  SmallVector<OpAsmParser::OperandType, LaunchOp::kNumConfigOperands> sizes(
-      LaunchOp::kNumConfigOperands);
-  MutableArrayRef<OpAsmParser::OperandType> sizesRef(sizes);
-
-  // Actual (data) operands passed to the kernel.
-  SmallVector<OpAsmParser::OperandType, 4> dataOperands;
-
-  // Region arguments to be created.
-  SmallVector<OpAsmParser::OperandType, 16> regionArgs(
-      LaunchOp::kNumConfigRegionAttributes);
-  MutableArrayRef<OpAsmParser::OperandType> regionArgsRef(regionArgs);
-
-  // Parse the size assignment segments: the first segment assigns grid sizes
-  // and defines values for block identifiers; the second segment assigns block
-  // sizes and defines values for thread identifiers.  In the region argument
-  // list, identifiers precede sizes, and block-related values precede
-  // thread-related values.
-  if (parser.parseKeyword(LaunchOp::getBlocksKeyword().data()) ||
-      parseSizeAssignment(parser, sizesRef.take_front(3),
-                          regionArgsRef.slice(6, 3),
-                          regionArgsRef.slice(0, 3)) ||
-      parser.parseKeyword(LaunchOp::getThreadsKeyword().data()) ||
-      parseSizeAssignment(parser, sizesRef.drop_front(3),
-                          regionArgsRef.slice(9, 3),
-                          regionArgsRef.slice(3, 3)) ||
-      parser.resolveOperands(sizes, parser.getBuilder().getIndexType(),
-                             result.operands))
-    return failure();
-
-  // If kernel argument renaming segment is present, parse it.  When present,
-  // the segment should have at least one element.  If this segment is present,
-  // so is the trailing type list.  Parse it as well and use the parsed types
-  // to resolve the operands passed to the kernel arguments.
-  SmallVector<Type, 4> dataTypes;
-  if (!parser.parseOptionalKeyword(LaunchOp::getArgsKeyword())) {
-    llvm::SMLoc argsLoc = parser.getCurrentLocation();
-
-    regionArgs.push_back({});
-    dataOperands.push_back({});
-    if (parser.parseLParen() || parser.parseRegionArgument(regionArgs.back()) ||
-        parser.parseEqual() || parser.parseOperand(dataOperands.back()))
-      return failure();
-
-    while (!parser.parseOptionalComma()) {
-      regionArgs.push_back({});
-      dataOperands.push_back({});
-      if (parser.parseRegionArgument(regionArgs.back()) ||
-          parser.parseEqual() || parser.parseOperand(dataOperands.back()))
-        return failure();
-    }
-
-    if (parser.parseRParen() || parser.parseColonTypeList(dataTypes) ||
-        parser.resolveOperands(dataOperands, dataTypes, argsLoc,
-                               result.operands))
-      return failure();
-  }
-
-  // Introduce the body region and parse it.  The region has
-  // kNumConfigRegionAttributes leading arguments that correspond to
-  // block/thread identifiers and grid/block sizes, all of the `index` type.
-  // Follow the actual kernel arguments.
-  Type index = parser.getBuilder().getIndexType();
-  dataTypes.insert(dataTypes.begin(), LaunchOp::kNumConfigRegionAttributes,
-                   index);
-  Region *body = result.addRegion();
-  return failure(parser.parseRegion(*body, regionArgs, dataTypes) ||
-                 parser.parseOptionalAttrDict(result.attributes));
-}
-
-void LaunchOp::eraseKernelArgument(unsigned index) {
-  Block &entryBlock = body().front();
-  assert(index < entryBlock.getNumArguments() - kNumConfigRegionAttributes &&
-         "kernel argument index overflow");
-  entryBlock.eraseArgument(kNumConfigRegionAttributes + index);
-  getOperation()->eraseOperand(kNumConfigOperands + index);
-}
-
-namespace {
-// Clone any known constants passed as operands to the kernel into its body.
-class PropagateConstantBounds : public OpRewritePattern<LaunchOp> {
-  using OpRewritePattern<LaunchOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(LaunchOp launchOp,
-                                     PatternRewriter &rewriter) const override {
-    auto origInsertionPoint = rewriter.saveInsertionPoint();
-    rewriter.setInsertionPointToStart(&launchOp.body().front());
-
-    // Traverse operands passed to kernel and check if some of them are known
-    // constants.  If so, clone the constant operation inside the kernel region
-    // and use it instead of passing the value from the parent region.  Perform
-    // the traversal in the inverse order to simplify index arithmetics when
-    // dropping arguments.
-    SmallVector<Value *, 8> operands(launchOp.getKernelOperandValues().begin(),
-                                     launchOp.getKernelOperandValues().end());
-    SmallVector<Value *, 8> kernelArgs(launchOp.getKernelArguments().begin(),
-                                       launchOp.getKernelArguments().end());
-    bool found = false;
-    for (unsigned i = operands.size(); i > 0; --i) {
-      unsigned index = i - 1;
-      Value *operand = operands[index];
-      if (!isa_and_nonnull<ConstantOp>(operand->getDefiningOp())) {
-        continue;
-      }
-
-      found = true;
-      Value *internalConstant =
-          rewriter.clone(*operand->getDefiningOp())->getResult(0);
-      Value *kernelArg = kernelArgs[index];
-      kernelArg->replaceAllUsesWith(internalConstant);
-      launchOp.eraseKernelArgument(index);
-    }
-    rewriter.restoreInsertionPoint(origInsertionPoint);
-
-    if (!found)
-      return matchFailure();
-
-    rewriter.updatedRootInPlace(launchOp);
-    return matchSuccess();
-  }
-};
-} // end namespace
-
-void LaunchOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                           MLIRContext *context) {
-  results.insert<PropagateConstantBounds>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// LaunchFuncOp
-//===----------------------------------------------------------------------===//
-
-void LaunchFuncOp::build(Builder *builder, OperationState &result,
-                         ::mlir::FuncOp kernelFunc, Value *gridSizeX,
-                         Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
-                         Value *blockSizeY, Value *blockSizeZ,
-                         ValueRange kernelOperands) {
-  // Add grid and block sizes as op operands, followed by the data operands.
-  result.addOperands(
-      {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
-  result.addOperands(kernelOperands);
-  result.addAttribute(getKernelAttrName(),
-                      builder->getStringAttr(kernelFunc.getName()));
-  auto kernelModule = kernelFunc.getParentOfType<ModuleOp>();
-  if (Optional<StringRef> kernelModuleName = kernelModule.getName())
-    result.addAttribute(getKernelModuleAttrName(),
-                        builder->getSymbolRefAttr(*kernelModuleName));
-}
-
-void LaunchFuncOp::build(Builder *builder, OperationState &result,
-                         ::mlir::FuncOp kernelFunc, KernelDim3 gridSize,
-                         KernelDim3 blockSize, ValueRange kernelOperands) {
-  build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
-        blockSize.x, blockSize.y, blockSize.z, kernelOperands);
-}
-
-StringRef LaunchFuncOp::kernel() {
-  return getAttrOfType<StringAttr>(getKernelAttrName()).getValue();
-}
-
-unsigned LaunchFuncOp::getNumKernelOperands() {
-  return getNumOperands() - kNumConfigOperands;
-}
-
-StringRef LaunchFuncOp::getKernelModuleName() {
-  return getAttrOfType<SymbolRefAttr>(getKernelModuleAttrName())
-      .getRootReference();
-}
-
-Value *LaunchFuncOp::getKernelOperand(unsigned i) {
-  return getOperation()->getOperand(i + kNumConfigOperands);
-}
-
-KernelDim3 LaunchFuncOp::getGridSizeOperandValues() {
-  return KernelDim3{getOperand(0), getOperand(1), getOperand(2)};
-}
-
-KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
-  return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
-}
-
-LogicalResult verify(LaunchFuncOp op) {
-  auto module = op.getParentOfType<ModuleOp>();
-  if (!module)
-    return op.emitOpError("expected to belong to a module");
-
-  if (!module.getAttrOfType<UnitAttr>(GPUDialect::getContainerModuleAttrName()))
-    return op.emitOpError(
-        "expected the closest surrounding module to have the '" +
-        GPUDialect::getContainerModuleAttrName() + "' attribute");
-
-  auto kernelAttr = op.getAttrOfType<StringAttr>(op.getKernelAttrName());
-  if (!kernelAttr)
-    return op.emitOpError("string attribute '" + op.getKernelAttrName() +
-                          "' must be specified");
-
-  auto kernelModuleAttr =
-      op.getAttrOfType<SymbolRefAttr>(op.getKernelModuleAttrName());
-  if (!kernelModuleAttr)
-    return op.emitOpError("symbol reference attribute '" +
-                          op.getKernelModuleAttrName() + "' must be specified");
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// GPUFuncOp
-//===----------------------------------------------------------------------===//
-
-void GPUFuncOp::build(Builder *builder, OperationState &result, StringRef name,
-                      FunctionType type, ArrayRef<Type> workgroupAttributions,
-                      ArrayRef<Type> privateAttributions,
-                      ArrayRef<NamedAttribute> attrs) {
-  result.addAttribute(SymbolTable::getSymbolAttrName(),
-                      builder->getStringAttr(name));
-  result.addAttribute(getTypeAttrName(), TypeAttr::get(type));
-  result.addAttribute(getNumWorkgroupAttributionsAttrName(),
-                      builder->getI64IntegerAttr(workgroupAttributions.size()));
-  result.addAttributes(attrs);
-  Region *body = result.addRegion();
-  Block *entryBlock = new Block;
-  entryBlock->addArguments(type.getInputs());
-  entryBlock->addArguments(workgroupAttributions);
-  entryBlock->addArguments(privateAttributions);
-
-  body->getBlocks().push_back(entryBlock);
-}
-
-/// Parses a GPU function memory attribution.
-///
-/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
-///                        (`private` `(` ssa-id-and-type-list `)`)?
-///
-/// Note that this function parses only one of the two similar parts, with the
-/// keyword provided as argument.
-static ParseResult
-parseAttributions(OpAsmParser &parser, StringRef keyword,
-                  SmallVectorImpl<OpAsmParser::OperandType> &args,
-                  SmallVectorImpl<Type> &argTypes) {
-  // If we could not parse the keyword, just assume empty list and succeed.
-  if (failed(parser.parseOptionalKeyword(keyword)))
-    return success();
-
-  if (failed(parser.parseLParen()))
-    return failure();
-
-  // Early exit for an empty list.
-  if (succeeded(parser.parseOptionalRParen()))
-    return success();
-
-  do {
-    OpAsmParser::OperandType arg;
-    Type type;
-
-    if (parser.parseRegionArgument(arg) || parser.parseColonType(type))
-      return failure();
-
-    args.push_back(arg);
-    argTypes.push_back(type);
-  } while (succeeded(parser.parseOptionalComma()));
-
-  return parser.parseRParen();
-}
-
-/// Parses a GPU function.
-///
-/// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
-///                 (`->` function-result-list)? memory-attribution `kernel`?
-///                 function-attributes? region
-static ParseResult parseGPUFuncOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 8> entryArgs;
-  SmallVector<SmallVector<NamedAttribute, 2>, 1> argAttrs;
-  SmallVector<SmallVector<NamedAttribute, 2>, 1> resultAttrs;
-  SmallVector<Type, 8> argTypes;
-  SmallVector<Type, 4> resultTypes;
-  bool isVariadic;
-
-  // Parse the function name.
-  StringAttr nameAttr;
-  if (parser.parseSymbolName(nameAttr, ::mlir::SymbolTable::getSymbolAttrName(),
-                             result.attributes))
-    return failure();
-
-  auto signatureLocation = parser.getCurrentLocation();
-  if (failed(impl::parseFunctionSignature(
-          parser, /*allowVariadic=*/false, entryArgs, argTypes, argAttrs,
-          isVariadic, resultTypes, resultAttrs)))
-    return failure();
-
-  if (entryArgs.empty() && !argTypes.empty())
-    return parser.emitError(signatureLocation)
-           << "gpu.func requires named arguments";
-
-  // Construct the function type. More types will be added to the region, but
-  // not to the functiont type.
-  Builder &builder = parser.getBuilder();
-  auto type = builder.getFunctionType(argTypes, resultTypes);
-  result.addAttribute(GPUFuncOp::getTypeAttrName(), TypeAttr::get(type));
-
-  // Parse workgroup memory attributions.
-  if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(),
-                               entryArgs, argTypes)))
-    return failure();
-
-  // Store the number of operands we just parsed as the number of workgroup
-  // memory attributions.
-  unsigned numWorkgroupAttrs = argTypes.size() - type.getNumInputs();
-  result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(),
-                      builder.getI64IntegerAttr(numWorkgroupAttrs));
-
-  // Parse private memory attributions.
-  if (failed(parseAttributions(parser, GPUFuncOp::getPrivateKeyword(),
-                               entryArgs, argTypes)))
-    return failure();
-
-  // Parse the kernel attribute if present.
-  if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword())))
-    result.addAttribute(GPUDialect::getKernelFuncAttrName(),
-                        builder.getUnitAttr());
-
-  // Parse attributes.
-  if (failed(parser.parseOptionalAttrDictWithKeyword(result.attributes)))
-    return failure();
-  mlir::impl::addArgAndResultAttrs(builder, result, argAttrs, resultAttrs);
-
-  // Parse the region. If no argument names were provided, take all names
-  // (including those of attributions) from the entry block.
-  auto *body = result.addRegion();
-  return parser.parseRegion(*body, entryArgs, argTypes);
-}
-
-static void printAttributions(OpAsmPrinter &p, StringRef keyword,
-                              ArrayRef<BlockArgument *> values) {
-  if (values.empty())
-    return;
-
-  p << ' ' << keyword << '(';
-  interleaveComma(values, p,
-                  [&p](BlockArgument *v) { p << *v << " : " << v->getType(); });
-  p << ')';
-}
-
-/// Prints a GPU Func op.
-void printGPUFuncOp(OpAsmPrinter &p, GPUFuncOp op) {
-  p << GPUFuncOp::getOperationName() << ' ';
-  p.printSymbolName(op.getName());
-
-  FunctionType type = op.getType();
-  impl::printFunctionSignature(p, op.getOperation(), type.getInputs(),
-                               /*isVariadic=*/false, type.getResults());
-
-  printAttributions(p, op.getWorkgroupKeyword(), op.getWorkgroupAttributions());
-  printAttributions(p, op.getPrivateKeyword(), op.getPrivateAttributions());
-  if (op.isKernel())
-    p << ' ' << op.getKernelKeyword();
-
-  impl::printFunctionAttributes(p, op.getOperation(), type.getNumInputs(),
-                                type.getNumResults(),
-                                {op.getNumWorkgroupAttributionsAttrName(),
-                                 GPUDialect::getKernelFuncAttrName()});
-  p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
-}
-
-/// Hook for FunctionLike verifier.
-LogicalResult GPUFuncOp::verifyType() {
-  Type type = getTypeAttr().getValue();
-  if (!type.isa<FunctionType>())
-    return emitOpError("requires '" + getTypeAttrName() +
-                       "' attribute of function type");
-  return success();
-}
-
-/// Verifies the body of the function.
-LogicalResult GPUFuncOp::verifyBody() {
-  unsigned numFuncArguments = getNumArguments();
-  unsigned numWorkgroupAttributions = getNumWorkgroupAttributions();
-  unsigned numBlockArguments = front().getNumArguments();
-  if (numBlockArguments < numFuncArguments + numWorkgroupAttributions)
-    return emitOpError() << "expected at least "
-                         << numFuncArguments + numWorkgroupAttributions
-                         << " arguments to body region";
-
-  ArrayRef<Type> funcArgTypes = getType().getInputs();
-  for (unsigned i = 0; i < numFuncArguments; ++i) {
-    Type blockArgType = front().getArgument(i)->getType();
-    if (funcArgTypes[i] != blockArgType)
-      return emitOpError() << "expected body region argument #" << i
-                           << " to be of type " << funcArgTypes[i] << ", got "
-                           << blockArgType;
-  }
-
-  return success();
-}
-
-// Namespace avoids ambiguous ReturnOpOperandAdaptor.
-namespace mlir {
-namespace gpu {
-#define GET_OP_CLASSES
-#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
-} // namespace gpu
-} // namespace mlir
diff --git a/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
deleted file mode 100644
index b466cc280df..00000000000
--- a/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ /dev/null
@@ -1,227 +0,0 @@
-//===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the GPU dialect kernel outlining pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/GPU/Passes.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/SymbolTable.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-template <typename OpTy>
-static void createForAllDimensions(OpBuilder &builder, Location loc,
-                                   SmallVectorImpl<Value *> &values) {
-  for (StringRef dim : {"x", "y", "z"}) {
-    Value *v = builder.create<OpTy>(loc, builder.getIndexType(),
-                                    builder.getStringAttr(dim));
-    values.push_back(v);
-  }
-}
-
-// Add operations generating block/thread ids and gird/block dimensions at the
-// beginning of `kernelFunc` and replace uses of the respective function args.
-static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
-  OpBuilder OpBuilder(kernelFunc.getBody());
-  SmallVector<Value *, 12> indexOps;
-  createForAllDimensions<gpu::BlockIdOp>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::ThreadIdOp>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::GridDimOp>(OpBuilder, loc, indexOps);
-  createForAllDimensions<gpu::BlockDimOp>(OpBuilder, loc, indexOps);
-  // Replace the leading 12 function args with the respective thread/block index
-  // operations. Iterate backwards since args are erased and indices change.
-  for (int i = 11; i >= 0; --i) {
-    auto &firstBlock = kernelFunc.front();
-    firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]);
-    firstBlock.eraseArgument(i);
-  }
-}
-
-static bool isInliningBeneficiary(Operation *op) {
-  return isa<ConstantOp>(op) || isa<DimOp>(op);
-}
-
-// Move arguments of the given kernel function into the function if this reduces
-// the number of kernel arguments.
-static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc,
-                                              gpu::LaunchFuncOp launch) {
-  OpBuilder kernelBuilder(kernelFunc.getBody());
-  auto &firstBlock = kernelFunc.getBody().front();
-  llvm::SmallVector<Value *, 8> newLaunchArgs;
-  BlockAndValueMapping map;
-  for (int i = 0, e = launch.getNumKernelOperands(); i < e; ++i) {
-    map.map(launch.getKernelOperand(i), kernelFunc.getArgument(i));
-  }
-  for (int i = launch.getNumKernelOperands() - 1; i >= 0; --i) {
-    auto operandOp = launch.getKernelOperand(i)->getDefiningOp();
-    if (!operandOp || !isInliningBeneficiary(operandOp)) {
-      newLaunchArgs.push_back(launch.getKernelOperand(i));
-      continue;
-    }
-    // Only inline operations that do not create new arguments.
-    if (!llvm::all_of(operandOp->getOperands(),
-                      [map](Value *value) { return map.contains(value); })) {
-      continue;
-    }
-    auto clone = kernelBuilder.clone(*operandOp, map);
-    firstBlock.getArgument(i)->replaceAllUsesWith(clone->getResult(0));
-    firstBlock.eraseArgument(i);
-  }
-  if (newLaunchArgs.size() == launch.getNumKernelOperands())
-    return launch;
-
-  std::reverse(newLaunchArgs.begin(), newLaunchArgs.end());
-  OpBuilder LaunchBuilder(launch);
-  SmallVector<Type, 8> newArgumentTypes;
-  newArgumentTypes.reserve(firstBlock.getNumArguments());
-  for (auto value : firstBlock.getArguments()) {
-    newArgumentTypes.push_back(value->getType());
-  }
-  kernelFunc.setType(LaunchBuilder.getFunctionType(newArgumentTypes, {}));
-  auto newLaunch = LaunchBuilder.create<gpu::LaunchFuncOp>(
-      launch.getLoc(), kernelFunc, launch.getGridSizeOperandValues(),
-      launch.getBlockSizeOperandValues(), newLaunchArgs);
-  launch.erase();
-  return newLaunch;
-}
-
-// Outline the `gpu.launch` operation body into a kernel function. Replace
-// `gpu.return` operations by `std.return` in the generated function.
-static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
-  Location loc = launchOp.getLoc();
-  SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
-  FunctionType type =
-      FunctionType::get(kernelOperandTypes, {}, launchOp.getContext());
-  std::string kernelFuncName =
-      Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
-  FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type);
-  outlinedFunc.getBody().takeBody(launchOp.body());
-  Builder builder(launchOp.getContext());
-  outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
-                       builder.getUnitAttr());
-  injectGpuIndexOperations(loc, outlinedFunc);
-  outlinedFunc.walk([](gpu::ReturnOp op) {
-    OpBuilder replacer(op);
-    replacer.create<ReturnOp>(op.getLoc());
-    op.erase();
-  });
-  return outlinedFunc;
-}
-
-// Replace `gpu.launch` operations with an `gpu.launch_func` operation launching
-// `kernelFunc`. The kernel func contains the body of the `gpu.launch` with
-// constant region arguments inlined.
-static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
-  OpBuilder builder(launchOp);
-  auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
-      launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
-      launchOp.getBlockSizeOperandValues(), launchOp.getKernelOperandValues());
-  inlineBeneficiaryOps(kernelFunc, launchFuncOp);
-  launchOp.erase();
-}
-
-namespace {
-
-/// Pass that moves the kernel of each LaunchOp into its separate nested module.
-///
-/// This pass moves the kernel code of each LaunchOp into a function created
-/// inside a nested module. It also creates an external function of the same
-/// name in the parent module.
-///
-/// The kernel modules are intended to be compiled to a cubin blob independently
-/// in a separate pass. The external functions can then be annotated with the
-/// symbol of the cubin accessor function.
-class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
-public:
-  void runOnModule() override {
-    SymbolTable symbolTable(getModule());
-    bool modified = false;
-    for (auto func : getModule().getOps<FuncOp>()) {
-      // Insert just after the function.
-      Block::iterator insertPt(func.getOperation()->getNextNode());
-      func.walk([&](gpu::LaunchOp op) {
-        FuncOp outlinedFunc = outlineKernelFunc(op);
-
-        // Create nested module and insert outlinedFunc. The module will
-        // originally get the same name as the function, but may be renamed on
-        // insertion into the parent module.
-        auto kernelModule = createKernelModule(outlinedFunc, symbolTable);
-        symbolTable.insert(kernelModule, insertPt);
-
-        // Potentially changes signature, pulling in constants.
-        convertToLaunchFuncOp(op, outlinedFunc);
-        modified = true;
-      });
-    }
-
-    // If any new module was inserted in this module, annotate this module as
-    // a container module.
-    if (modified)
-      getModule().setAttr(gpu::GPUDialect::getContainerModuleAttrName(),
-                          UnitAttr::get(&getContext()));
-  }
-
-private:
-  // Returns a module containing kernelFunc and all callees (recursive).
-  ModuleOp createKernelModule(FuncOp kernelFunc,
-                              const SymbolTable &parentSymbolTable) {
-    auto context = getModule().getContext();
-    Builder builder(context);
-    auto kernelModule =
-        ModuleOp::create(builder.getUnknownLoc(), kernelFunc.getName());
-    kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(),
-                         builder.getUnitAttr());
-    SymbolTable symbolTable(kernelModule);
-    symbolTable.insert(kernelFunc);
-
-    llvm::SmallVector<Operation *, 8> symbolDefWorklist = {kernelFunc};
-    while (!symbolDefWorklist.empty()) {
-      if (Optional<SymbolTable::UseRange> symbolUses =
-              SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {
-        for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
-          StringRef symbolName =
-              symbolUse.getSymbolRef().cast<FlatSymbolRefAttr>().getValue();
-          if (symbolTable.lookup(symbolName))
-            continue;
-
-          Operation *symbolDefClone =
-              parentSymbolTable.lookup(symbolName)->clone();
-          symbolDefWorklist.push_back(symbolDefClone);
-          symbolTable.insert(symbolDefClone);
-        }
-      }
-    }
-
-    return kernelModule;
-  }
-};
-
-} // namespace
-
-std::unique_ptr<OpPassBase<ModuleOp>> mlir::createGpuKernelOutliningPass() {
-  return std::make_unique<GpuKernelOutliningPass>();
-}
-
-static PassRegistration<GpuKernelOutliningPass>
-    pass("gpu-kernel-outlining",
-         "Outline gpu.launch bodies to kernel functions.");
diff --git a/third_party/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/third_party/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
deleted file mode 100644
index 8c53e2dcf33..00000000000
--- a/third_party/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-add_llvm_library(MLIRLLVMIR
-  IR/LLVMDialect.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
-  )
-add_dependencies(MLIRLLVMIR MLIRLLVMOpsIncGen MLIRLLVMConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
-target_link_libraries(MLIRLLVMIR LLVMAsmParser LLVMCore LLVMSupport MLIRIR)
-
-add_llvm_library(MLIRNVVMIR
-  IR/NVVMDialect.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
-  )
-add_dependencies(MLIRNVVMIR MLIRNVVMOpsIncGen MLIRNVVMConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
-target_link_libraries(MLIRNVVMIR LLVMAsmParser LLVMCore LLVMSupport MLIRIR)
-
-add_llvm_library(MLIRROCDLIR
-  IR/ROCDLDialect.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
-  )
-add_dependencies(MLIRROCDLIR MLIRROCDLOpsIncGen MLIRROCDLConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
-target_link_libraries(MLIRROCDLIR LLVMAsmParser LLVMCore LLVMSupport MLIRIR)
diff --git a/third_party/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/third_party/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
deleted file mode 100644
index 9201da2322b..00000000000
--- a/third_party/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ /dev/null
@@ -1,1687 +0,0 @@
-//===- LLVMDialect.cpp - LLVM IR Ops and Dialect registration -------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the types and operation details for the LLVM IR dialect in
-// MLIR, and the LLVM IR dialect.  It also registers the dialect.
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "mlir/IR/FunctionImplementation.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/StandardTypes.h"
-
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-using namespace mlir::LLVM;
-
-#include "mlir/Dialect/LLVMIR/LLVMOpsEnums.cpp.inc"
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::CmpOp.
-//===----------------------------------------------------------------------===//
-static void printICmpOp(OpAsmPrinter &p, ICmpOp &op) {
-  p << op.getOperationName() << " \"" << stringifyICmpPredicate(op.predicate())
-    << "\" " << *op.getOperand(0) << ", " << *op.getOperand(1);
-  p.printOptionalAttrDict(op.getAttrs(), {"predicate"});
-  p << " : " << op.lhs()->getType();
-}
-
-static void printFCmpOp(OpAsmPrinter &p, FCmpOp &op) {
-  p << op.getOperationName() << " \"" << stringifyFCmpPredicate(op.predicate())
-    << "\" " << *op.getOperand(0) << ", " << *op.getOperand(1);
-  p.printOptionalAttrDict(op.getAttrs(), {"predicate"});
-  p << " : " << op.lhs()->getType();
-}
-
-// <operation> ::= `llvm.icmp` string-literal ssa-use `,` ssa-use
-//                 attribute-dict? `:` type
-// <operation> ::= `llvm.fcmp` string-literal ssa-use `,` ssa-use
-//                 attribute-dict? `:` type
-template <typename CmpPredicateType>
-static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) {
-  Builder &builder = parser.getBuilder();
-
-  Attribute predicate;
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType lhs, rhs;
-  Type type;
-  llvm::SMLoc predicateLoc, trailingTypeLoc;
-  if (parser.getCurrentLocation(&predicateLoc) ||
-      parser.parseAttribute(predicate, "predicate", attrs) ||
-      parser.parseOperand(lhs) || parser.parseComma() ||
-      parser.parseOperand(rhs) || parser.parseOptionalAttrDict(attrs) ||
-      parser.parseColon() || parser.getCurrentLocation(&trailingTypeLoc) ||
-      parser.parseType(type) ||
-      parser.resolveOperand(lhs, type, result.operands) ||
-      parser.resolveOperand(rhs, type, result.operands))
-    return failure();
-
-  // Replace the string attribute `predicate` with an integer attribute.
-  auto predicateStr = predicate.dyn_cast<StringAttr>();
-  if (!predicateStr)
-    return parser.emitError(predicateLoc,
-                            "expected 'predicate' attribute of string type");
-
-  int64_t predicateValue = 0;
-  if (std::is_same<CmpPredicateType, ICmpPredicate>()) {
-    Optional<ICmpPredicate> predicate =
-        symbolizeICmpPredicate(predicateStr.getValue());
-    if (!predicate)
-      return parser.emitError(predicateLoc)
-             << "'" << predicateStr.getValue()
-             << "' is an incorrect value of the 'predicate' attribute";
-    predicateValue = static_cast<int64_t>(predicate.getValue());
-  } else {
-    Optional<FCmpPredicate> predicate =
-        symbolizeFCmpPredicate(predicateStr.getValue());
-    if (!predicate)
-      return parser.emitError(predicateLoc)
-             << "'" << predicateStr.getValue()
-             << "' is an incorrect value of the 'predicate' attribute";
-    predicateValue = static_cast<int64_t>(predicate.getValue());
-  }
-
-  attrs[0].second = parser.getBuilder().getI64IntegerAttr(predicateValue);
-
-  // The result type is either i1 or a vector type <? x i1> if the inputs are
-  // vectors.
-  auto *dialect = builder.getContext()->getRegisteredDialect<LLVMDialect>();
-  auto resultType = LLVMType::getInt1Ty(dialect);
-  auto argType = type.dyn_cast<LLVM::LLVMType>();
-  if (!argType)
-    return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type");
-  if (argType.getUnderlyingType()->isVectorTy())
-    resultType = LLVMType::getVectorTy(
-        resultType, argType.getUnderlyingType()->getVectorNumElements());
-
-  result.attributes = attrs;
-  result.addTypes({resultType});
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::AllocaOp.
-//===----------------------------------------------------------------------===//
-
-static void printAllocaOp(OpAsmPrinter &p, AllocaOp &op) {
-  auto elemTy = op.getType().cast<LLVM::LLVMType>().getPointerElementTy();
-
-  auto funcTy = FunctionType::get({op.arraySize()->getType()}, {op.getType()},
-                                  op.getContext());
-
-  p << op.getOperationName() << ' ' << *op.arraySize() << " x " << elemTy;
-  if (op.alignment().hasValue() && op.alignment()->getSExtValue() != 0)
-    p.printOptionalAttrDict(op.getAttrs());
-  else
-    p.printOptionalAttrDict(op.getAttrs(), {"alignment"});
-  p << " : " << funcTy;
-}
-
-// <operation> ::= `llvm.alloca` ssa-use `x` type attribute-dict?
-//                 `:` type `,` type
-static ParseResult parseAllocaOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType arraySize;
-  Type type, elemType;
-  llvm::SMLoc trailingTypeLoc;
-  if (parser.parseOperand(arraySize) || parser.parseKeyword("x") ||
-      parser.parseType(elemType) || parser.parseOptionalAttrDict(attrs) ||
-      parser.parseColon() || parser.getCurrentLocation(&trailingTypeLoc) ||
-      parser.parseType(type))
-    return failure();
-
-  // Extract the result type from the trailing function type.
-  auto funcType = type.dyn_cast<FunctionType>();
-  if (!funcType || funcType.getNumInputs() != 1 ||
-      funcType.getNumResults() != 1)
-    return parser.emitError(
-        trailingTypeLoc,
-        "expected trailing function type with one argument and one result");
-
-  if (parser.resolveOperand(arraySize, funcType.getInput(0), result.operands))
-    return failure();
-
-  result.attributes = attrs;
-  result.addTypes({funcType.getResult(0)});
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::GEPOp.
-//===----------------------------------------------------------------------===//
-
-static void printGEPOp(OpAsmPrinter &p, GEPOp &op) {
-  SmallVector<Type, 8> types(op.getOperandTypes());
-  auto funcTy = FunctionType::get(types, op.getType(), op.getContext());
-
-  p << op.getOperationName() << ' ' << *op.base() << '['
-    << op.getOperands().drop_front() << ']';
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << funcTy;
-}
-
-// <operation> ::= `llvm.getelementptr` ssa-use `[` ssa-use-list `]`
-//                 attribute-dict? `:` type
-static ParseResult parseGEPOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType base;
-  SmallVector<OpAsmParser::OperandType, 8> indices;
-  Type type;
-  llvm::SMLoc trailingTypeLoc;
-  if (parser.parseOperand(base) ||
-      parser.parseOperandList(indices, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(attrs) || parser.parseColon() ||
-      parser.getCurrentLocation(&trailingTypeLoc) || parser.parseType(type))
-    return failure();
-
-  // Deconstruct the trailing function type to extract the types of the base
-  // pointer and result (same type) and the types of the indices.
-  auto funcType = type.dyn_cast<FunctionType>();
-  if (!funcType || funcType.getNumResults() != 1 ||
-      funcType.getNumInputs() == 0)
-    return parser.emitError(trailingTypeLoc,
-                            "expected trailing function type with at least "
-                            "one argument and one result");
-
-  if (parser.resolveOperand(base, funcType.getInput(0), result.operands) ||
-      parser.resolveOperands(indices, funcType.getInputs().drop_front(),
-                             parser.getNameLoc(), result.operands))
-    return failure();
-
-  result.attributes = attrs;
-  result.addTypes(funcType.getResults());
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::LoadOp.
-//===----------------------------------------------------------------------===//
-
-static void printLoadOp(OpAsmPrinter &p, LoadOp &op) {
-  p << op.getOperationName() << ' ' << *op.addr();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.addr()->getType();
-}
-
-// Extract the pointee type from the LLVM pointer type wrapped in MLIR.  Return
-// the resulting type wrapped in MLIR, or nullptr on error.
-static Type getLoadStoreElementType(OpAsmParser &parser, Type type,
-                                    llvm::SMLoc trailingTypeLoc) {
-  auto llvmTy = type.dyn_cast<LLVM::LLVMType>();
-  if (!llvmTy)
-    return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type"),
-           nullptr;
-  if (!llvmTy.getUnderlyingType()->isPointerTy())
-    return parser.emitError(trailingTypeLoc, "expected LLVM pointer type"),
-           nullptr;
-  return llvmTy.getPointerElementTy();
-}
-
-// <operation> ::= `llvm.load` ssa-use attribute-dict? `:` type
-static ParseResult parseLoadOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType addr;
-  Type type;
-  llvm::SMLoc trailingTypeLoc;
-
-  if (parser.parseOperand(addr) || parser.parseOptionalAttrDict(attrs) ||
-      parser.parseColon() || parser.getCurrentLocation(&trailingTypeLoc) ||
-      parser.parseType(type) ||
-      parser.resolveOperand(addr, type, result.operands))
-    return failure();
-
-  Type elemTy = getLoadStoreElementType(parser, type, trailingTypeLoc);
-
-  result.attributes = attrs;
-  result.addTypes(elemTy);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::StoreOp.
-//===----------------------------------------------------------------------===//
-
-static void printStoreOp(OpAsmPrinter &p, StoreOp &op) {
-  p << op.getOperationName() << ' ' << *op.value() << ", " << *op.addr();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.addr()->getType();
-}
-
-// <operation> ::= `llvm.store` ssa-use `,` ssa-use attribute-dict? `:` type
-static ParseResult parseStoreOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType addr, value;
-  Type type;
-  llvm::SMLoc trailingTypeLoc;
-
-  if (parser.parseOperand(value) || parser.parseComma() ||
-      parser.parseOperand(addr) || parser.parseOptionalAttrDict(attrs) ||
-      parser.parseColon() || parser.getCurrentLocation(&trailingTypeLoc) ||
-      parser.parseType(type))
-    return failure();
-
-  Type elemTy = getLoadStoreElementType(parser, type, trailingTypeLoc);
-  if (!elemTy)
-    return failure();
-
-  if (parser.resolveOperand(value, elemTy, result.operands) ||
-      parser.resolveOperand(addr, type, result.operands))
-    return failure();
-
-  result.attributes = attrs;
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::CallOp.
-//===----------------------------------------------------------------------===//
-
-static void printCallOp(OpAsmPrinter &p, CallOp &op) {
-  auto callee = op.callee();
-  bool isDirect = callee.hasValue();
-
-  // Print the direct callee if present as a function attribute, or an indirect
-  // callee (first operand) otherwise.
-  p << op.getOperationName() << ' ';
-  if (isDirect)
-    p.printSymbolName(callee.getValue());
-  else
-    p << *op.getOperand(0);
-
-  p << '(' << op.getOperands().drop_front(isDirect ? 0 : 1) << ')';
-  p.printOptionalAttrDict(op.getAttrs(), {"callee"});
-
-  // Reconstruct the function MLIR function type from operand and result types.
-  SmallVector<Type, 1> resultTypes(op.getResultTypes());
-  SmallVector<Type, 8> argTypes(
-      llvm::drop_begin(op.getOperandTypes(), isDirect ? 0 : 1));
-
-  p << " : " << FunctionType::get(argTypes, resultTypes, op.getContext());
-}
-
-// <operation> ::= `llvm.call` (function-id | ssa-use) `(` ssa-use-list `)`
-//                 attribute-dict? `:` function-type
-static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<NamedAttribute, 4> attrs;
-  SmallVector<OpAsmParser::OperandType, 8> operands;
-  Type type;
-  SymbolRefAttr funcAttr;
-  llvm::SMLoc trailingTypeLoc;
-
-  // Parse an operand list that will, in practice, contain 0 or 1 operand.  In
-  // case of an indirect call, there will be 1 operand before `(`.  In case of a
-  // direct call, there will be no operands and the parser will stop at the
-  // function identifier without complaining.
-  if (parser.parseOperandList(operands))
-    return failure();
-  bool isDirect = operands.empty();
-
-  // Optionally parse a function identifier.
-  if (isDirect)
-    if (parser.parseAttribute(funcAttr, "callee", attrs))
-      return failure();
-
-  if (parser.parseOperandList(operands, OpAsmParser::Delimiter::Paren) ||
-      parser.parseOptionalAttrDict(attrs) || parser.parseColon() ||
-      parser.getCurrentLocation(&trailingTypeLoc) || parser.parseType(type))
-    return failure();
-
-  auto funcType = type.dyn_cast<FunctionType>();
-  if (!funcType)
-    return parser.emitError(trailingTypeLoc, "expected function type");
-  if (isDirect) {
-    // Make sure types match.
-    if (parser.resolveOperands(operands, funcType.getInputs(),
-                               parser.getNameLoc(), result.operands))
-      return failure();
-    result.addTypes(funcType.getResults());
-  } else {
-    // Construct the LLVM IR Dialect function type that the first operand
-    // should match.
-    if (funcType.getNumResults() > 1)
-      return parser.emitError(trailingTypeLoc,
-                              "expected function with 0 or 1 result");
-
-    Builder &builder = parser.getBuilder();
-    auto *llvmDialect =
-        builder.getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
-    LLVM::LLVMType llvmResultType;
-    if (funcType.getNumResults() == 0) {
-      llvmResultType = LLVM::LLVMType::getVoidTy(llvmDialect);
-    } else {
-      llvmResultType = funcType.getResult(0).dyn_cast<LLVM::LLVMType>();
-      if (!llvmResultType)
-        return parser.emitError(trailingTypeLoc,
-                                "expected result to have LLVM type");
-    }
-
-    SmallVector<LLVM::LLVMType, 8> argTypes;
-    argTypes.reserve(funcType.getNumInputs());
-    for (int i = 0, e = funcType.getNumInputs(); i < e; ++i) {
-      auto argType = funcType.getInput(i).dyn_cast<LLVM::LLVMType>();
-      if (!argType)
-        return parser.emitError(trailingTypeLoc,
-                                "expected LLVM types as inputs");
-      argTypes.push_back(argType);
-    }
-    auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes,
-                                                      /*isVarArg=*/false);
-    auto wrappedFuncType = llvmFuncType.getPointerTo();
-
-    auto funcArguments =
-        ArrayRef<OpAsmParser::OperandType>(operands).drop_front();
-
-    // Make sure that the first operand (indirect callee) matches the wrapped
-    // LLVM IR function type, and that the types of the other call operands
-    // match the types of the function arguments.
-    if (parser.resolveOperand(operands[0], wrappedFuncType, result.operands) ||
-        parser.resolveOperands(funcArguments, funcType.getInputs(),
-                               parser.getNameLoc(), result.operands))
-      return failure();
-
-    result.addTypes(llvmResultType);
-  }
-
-  result.attributes = attrs;
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::ExtractElementOp.
-//===----------------------------------------------------------------------===//
-// Expects vector to be of wrapped LLVM vector type and position to be of
-// wrapped LLVM i32 type.
-void LLVM::ExtractElementOp::build(Builder *b, OperationState &result,
-                                   Value *vector, Value *position,
-                                   ArrayRef<NamedAttribute> attrs) {
-  auto wrappedVectorType = vector->getType().cast<LLVM::LLVMType>();
-  auto llvmType = wrappedVectorType.getVectorElementType();
-  build(b, result, llvmType, vector, position);
-  result.addAttributes(attrs);
-}
-
-static void printExtractElementOp(OpAsmPrinter &p, ExtractElementOp &op) {
-  p << op.getOperationName() << ' ' << *op.vector() << "[" << *op.position()
-    << " : " << op.position()->getType() << "]";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.vector()->getType();
-}
-
-// <operation> ::= `llvm.extractelement` ssa-use `, ` ssa-use
-//                 attribute-dict? `:` type
-static ParseResult parseExtractElementOp(OpAsmParser &parser,
-                                         OperationState &result) {
-  llvm::SMLoc loc;
-  OpAsmParser::OperandType vector, position;
-  Type type, positionType;
-  if (parser.getCurrentLocation(&loc) || parser.parseOperand(vector) ||
-      parser.parseLSquare() || parser.parseOperand(position) ||
-      parser.parseColonType(positionType) || parser.parseRSquare() ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(vector, type, result.operands) ||
-      parser.resolveOperand(position, positionType, result.operands))
-    return failure();
-  auto wrappedVectorType = type.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedVectorType ||
-      !wrappedVectorType.getUnderlyingType()->isVectorTy())
-    return parser.emitError(
-        loc, "expected LLVM IR dialect vector type for operand #1");
-  result.addTypes(wrappedVectorType.getVectorElementType());
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::ExtractValueOp.
-//===----------------------------------------------------------------------===//
-
-static void printExtractValueOp(OpAsmPrinter &p, ExtractValueOp &op) {
-  p << op.getOperationName() << ' ' << *op.container() << op.position();
-  p.printOptionalAttrDict(op.getAttrs(), {"position"});
-  p << " : " << op.container()->getType();
-}
-
-// Extract the type at `position` in the wrapped LLVM IR aggregate type
-// `containerType`.  Position is an integer array attribute where each value
-// is a zero-based position of the element in the aggregate type.  Return the
-// resulting type wrapped in MLIR, or nullptr on error.
-static LLVM::LLVMType getInsertExtractValueElementType(OpAsmParser &parser,
-                                                       Type containerType,
-                                                       Attribute positionAttr,
-                                                       llvm::SMLoc attributeLoc,
-                                                       llvm::SMLoc typeLoc) {
-  auto wrappedContainerType = containerType.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedContainerType)
-    return parser.emitError(typeLoc, "expected LLVM IR Dialect type"), nullptr;
-
-  auto positionArrayAttr = positionAttr.dyn_cast<ArrayAttr>();
-  if (!positionArrayAttr)
-    return parser.emitError(attributeLoc, "expected an array attribute"),
-           nullptr;
-
-  // Infer the element type from the structure type: iteratively step inside the
-  // type by taking the element type, indexed by the position attribute for
-  // structures.  Check the position index before accessing, it is supposed to
-  // be in bounds.
-  for (Attribute subAttr : positionArrayAttr) {
-    auto positionElementAttr = subAttr.dyn_cast<IntegerAttr>();
-    if (!positionElementAttr)
-      return parser.emitError(attributeLoc,
-                              "expected an array of integer literals"),
-             nullptr;
-    int position = positionElementAttr.getInt();
-    auto *llvmContainerType = wrappedContainerType.getUnderlyingType();
-    if (llvmContainerType->isArrayTy()) {
-      if (position < 0 || static_cast<unsigned>(position) >=
-                              llvmContainerType->getArrayNumElements())
-        return parser.emitError(attributeLoc, "position out of bounds"),
-               nullptr;
-      wrappedContainerType = wrappedContainerType.getArrayElementType();
-    } else if (llvmContainerType->isStructTy()) {
-      if (position < 0 || static_cast<unsigned>(position) >=
-                              llvmContainerType->getStructNumElements())
-        return parser.emitError(attributeLoc, "position out of bounds"),
-               nullptr;
-      wrappedContainerType =
-          wrappedContainerType.getStructElementType(position);
-    } else {
-      return parser.emitError(typeLoc,
-                              "expected wrapped LLVM IR structure/array type"),
-             nullptr;
-    }
-  }
-  return wrappedContainerType;
-}
-
-// <operation> ::= `llvm.extractvalue` ssa-use
-//                 `[` integer-literal (`,` integer-literal)* `]`
-//                 attribute-dict? `:` type
-static ParseResult parseExtractValueOp(OpAsmParser &parser,
-                                       OperationState &result) {
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType container;
-  Type containerType;
-  Attribute positionAttr;
-  llvm::SMLoc attributeLoc, trailingTypeLoc;
-
-  if (parser.parseOperand(container) ||
-      parser.getCurrentLocation(&attributeLoc) ||
-      parser.parseAttribute(positionAttr, "position", attrs) ||
-      parser.parseOptionalAttrDict(attrs) || parser.parseColon() ||
-      parser.getCurrentLocation(&trailingTypeLoc) ||
-      parser.parseType(containerType) ||
-      parser.resolveOperand(container, containerType, result.operands))
-    return failure();
-
-  auto elementType = getInsertExtractValueElementType(
-      parser, containerType, positionAttr, attributeLoc, trailingTypeLoc);
-  if (!elementType)
-    return failure();
-
-  result.attributes = attrs;
-  result.addTypes(elementType);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::InsertElementOp.
-//===----------------------------------------------------------------------===//
-
-static void printInsertElementOp(OpAsmPrinter &p, InsertElementOp &op) {
-  p << op.getOperationName() << ' ' << *op.value() << ", " << *op.vector()
-    << "[" << *op.position() << " : " << op.position()->getType() << "]";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.vector()->getType();
-}
-
-// <operation> ::= `llvm.insertelement` ssa-use `,` ssa-use `,` ssa-use
-//                 attribute-dict? `:` type
-static ParseResult parseInsertElementOp(OpAsmParser &parser,
-                                        OperationState &result) {
-  llvm::SMLoc loc;
-  OpAsmParser::OperandType vector, value, position;
-  Type vectorType, positionType;
-  if (parser.getCurrentLocation(&loc) || parser.parseOperand(value) ||
-      parser.parseComma() || parser.parseOperand(vector) ||
-      parser.parseLSquare() || parser.parseOperand(position) ||
-      parser.parseColonType(positionType) || parser.parseRSquare() ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(vectorType))
-    return failure();
-
-  auto wrappedVectorType = vectorType.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedVectorType ||
-      !wrappedVectorType.getUnderlyingType()->isVectorTy())
-    return parser.emitError(
-        loc, "expected LLVM IR dialect vector type for operand #1");
-  auto valueType = wrappedVectorType.getVectorElementType();
-  if (!valueType)
-    return failure();
-
-  if (parser.resolveOperand(vector, vectorType, result.operands) ||
-      parser.resolveOperand(value, valueType, result.operands) ||
-      parser.resolveOperand(position, positionType, result.operands))
-    return failure();
-
-  result.addTypes(vectorType);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::InsertValueOp.
-//===----------------------------------------------------------------------===//
-
-static void printInsertValueOp(OpAsmPrinter &p, InsertValueOp &op) {
-  p << op.getOperationName() << ' ' << *op.value() << ", " << *op.container()
-    << op.position();
-  p.printOptionalAttrDict(op.getAttrs(), {"position"});
-  p << " : " << op.container()->getType();
-}
-
-// <operation> ::= `llvm.insertvaluevalue` ssa-use `,` ssa-use
-//                 `[` integer-literal (`,` integer-literal)* `]`
-//                 attribute-dict? `:` type
-static ParseResult parseInsertValueOp(OpAsmParser &parser,
-                                      OperationState &result) {
-  OpAsmParser::OperandType container, value;
-  Type containerType;
-  Attribute positionAttr;
-  llvm::SMLoc attributeLoc, trailingTypeLoc;
-
-  if (parser.parseOperand(value) || parser.parseComma() ||
-      parser.parseOperand(container) ||
-      parser.getCurrentLocation(&attributeLoc) ||
-      parser.parseAttribute(positionAttr, "position", result.attributes) ||
-      parser.parseOptionalAttrDict(result.attributes) || parser.parseColon() ||
-      parser.getCurrentLocation(&trailingTypeLoc) ||
-      parser.parseType(containerType))
-    return failure();
-
-  auto valueType = getInsertExtractValueElementType(
-      parser, containerType, positionAttr, attributeLoc, trailingTypeLoc);
-  if (!valueType)
-    return failure();
-
-  if (parser.resolveOperand(container, containerType, result.operands) ||
-      parser.resolveOperand(value, valueType, result.operands))
-    return failure();
-
-  result.addTypes(containerType);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::SelectOp.
-//===----------------------------------------------------------------------===//
-
-static void printSelectOp(OpAsmPrinter &p, SelectOp &op) {
-  p << op.getOperationName() << ' ' << *op.condition() << ", "
-    << *op.trueValue() << ", " << *op.falseValue();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.condition()->getType() << ", " << op.trueValue()->getType();
-}
-
-// <operation> ::= `llvm.select` ssa-use `,` ssa-use `,` ssa-use
-//                 attribute-dict? `:` type, type
-static ParseResult parseSelectOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType condition, trueValue, falseValue;
-  Type conditionType, argType;
-
-  if (parser.parseOperand(condition) || parser.parseComma() ||
-      parser.parseOperand(trueValue) || parser.parseComma() ||
-      parser.parseOperand(falseValue) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(conditionType) || parser.parseComma() ||
-      parser.parseType(argType))
-    return failure();
-
-  if (parser.resolveOperand(condition, conditionType, result.operands) ||
-      parser.resolveOperand(trueValue, argType, result.operands) ||
-      parser.resolveOperand(falseValue, argType, result.operands))
-    return failure();
-
-  result.addTypes(argType);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::BrOp.
-//===----------------------------------------------------------------------===//
-
-static void printBrOp(OpAsmPrinter &p, BrOp &op) {
-  p << op.getOperationName() << ' ';
-  p.printSuccessorAndUseList(op.getOperation(), 0);
-  p.printOptionalAttrDict(op.getAttrs());
-}
-
-// <operation> ::= `llvm.br` bb-id (`[` ssa-use-and-type-list `]`)?
-// attribute-dict?
-static ParseResult parseBrOp(OpAsmParser &parser, OperationState &result) {
-  Block *dest;
-  SmallVector<Value *, 4> operands;
-  if (parser.parseSuccessorAndUseList(dest, operands) ||
-      parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-
-  result.addSuccessor(dest, operands);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::CondBrOp.
-//===----------------------------------------------------------------------===//
-
-static void printCondBrOp(OpAsmPrinter &p, CondBrOp &op) {
-  p << op.getOperationName() << ' ' << *op.getOperand(0) << ", ";
-  p.printSuccessorAndUseList(op.getOperation(), 0);
-  p << ", ";
-  p.printSuccessorAndUseList(op.getOperation(), 1);
-  p.printOptionalAttrDict(op.getAttrs());
-}
-
-// <operation> ::= `llvm.cond_br` ssa-use `,`
-//                  bb-id (`[` ssa-use-and-type-list `]`)? `,`
-//                  bb-id (`[` ssa-use-and-type-list `]`)? attribute-dict?
-static ParseResult parseCondBrOp(OpAsmParser &parser, OperationState &result) {
-  Block *trueDest;
-  Block *falseDest;
-  SmallVector<Value *, 4> trueOperands;
-  SmallVector<Value *, 4> falseOperands;
-  OpAsmParser::OperandType condition;
-
-  Builder &builder = parser.getBuilder();
-  auto *llvmDialect =
-      builder.getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
-  auto i1Type = LLVM::LLVMType::getInt1Ty(llvmDialect);
-
-  if (parser.parseOperand(condition) || parser.parseComma() ||
-      parser.parseSuccessorAndUseList(trueDest, trueOperands) ||
-      parser.parseComma() ||
-      parser.parseSuccessorAndUseList(falseDest, falseOperands) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.resolveOperand(condition, i1Type, result.operands))
-    return failure();
-
-  result.addSuccessor(trueDest, trueOperands);
-  result.addSuccessor(falseDest, falseOperands);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::ReturnOp.
-//===----------------------------------------------------------------------===//
-
-static void printReturnOp(OpAsmPrinter &p, ReturnOp &op) {
-  p << op.getOperationName();
-  p.printOptionalAttrDict(op.getAttrs());
-  assert(op.getNumOperands() <= 1);
-
-  if (op.getNumOperands() == 0)
-    return;
-
-  p << ' ' << *op.getOperand(0) << " : " << op.getOperand(0)->getType();
-}
-
-// <operation> ::= `llvm.return` ssa-use-list attribute-dict? `:`
-//                 type-list-no-parens
-static ParseResult parseReturnOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 1> operands;
-  Type type;
-
-  if (parser.parseOperandList(operands) ||
-      parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-  if (operands.empty())
-    return success();
-
-  if (parser.parseColonType(type) ||
-      parser.resolveOperand(operands[0], type, result.operands))
-    return failure();
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::UndefOp.
-//===----------------------------------------------------------------------===//
-
-static void printUndefOp(OpAsmPrinter &p, UndefOp &op) {
-  p << op.getOperationName();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.res()->getType();
-}
-
-// <operation> ::= `llvm.mlir.undef` attribute-dict? : type
-static ParseResult parseUndefOp(OpAsmParser &parser, OperationState &result) {
-  Type type;
-
-  if (parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type))
-    return failure();
-
-  result.addTypes(type);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printer, parser and verifier for LLVM::AddressOfOp.
-//===----------------------------------------------------------------------===//
-
-GlobalOp AddressOfOp::getGlobal() {
-  Operation *module = getParentOp();
-  while (module && !satisfiesLLVMModule(module))
-    module = module->getParentOp();
-  assert(module && "unexpected operation outside of a module");
-  return dyn_cast_or_null<LLVM::GlobalOp>(
-      mlir::SymbolTable::lookupSymbolIn(module, global_name()));
-}
-
-static void printAddressOfOp(OpAsmPrinter &p, AddressOfOp op) {
-  p << op.getOperationName() << " @" << op.global_name();
-  p.printOptionalAttrDict(op.getAttrs(), {"global_name"});
-  p << " : " << op.getResult()->getType();
-}
-
-static ParseResult parseAddressOfOp(OpAsmParser &parser,
-                                    OperationState &result) {
-  Attribute symRef;
-  Type type;
-  if (parser.parseAttribute(symRef, "global_name", result.attributes) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type) || parser.addTypeToList(type, result.types))
-    return failure();
-
-  if (!symRef.isa<SymbolRefAttr>())
-    return parser.emitError(parser.getNameLoc(), "expected symbol reference");
-  return success();
-}
-
-static LogicalResult verify(AddressOfOp op) {
-  auto global = op.getGlobal();
-  if (!global)
-    return op.emitOpError(
-        "must reference a global defined by 'llvm.mlir.global'");
-
-  if (global.getType().getPointerTo(global.addr_space().getZExtValue()) !=
-      op.getResult()->getType())
-    return op.emitOpError(
-        "the type must be a pointer to the type of the referred global");
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::ConstantOp.
-//===----------------------------------------------------------------------===//
-
-static void printConstantOp(OpAsmPrinter &p, ConstantOp &op) {
-  p << op.getOperationName() << '(' << op.value() << ')';
-  p.printOptionalAttrDict(op.getAttrs(), {"value"});
-  p << " : " << op.res()->getType();
-}
-
-// <operation> ::= `llvm.mlir.constant` `(` attribute `)` attribute-list? : type
-static ParseResult parseConstantOp(OpAsmParser &parser,
-                                   OperationState &result) {
-  Attribute valueAttr;
-  Type type;
-
-  if (parser.parseLParen() ||
-      parser.parseAttribute(valueAttr, "value", result.attributes) ||
-      parser.parseRParen() || parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type))
-    return failure();
-
-  result.addTypes(type);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Builder, printer and verifier for LLVM::GlobalOp.
-//===----------------------------------------------------------------------===//
-
-/// Returns the name used for the linkge attribute. This *must* correspond to
-/// the name of the attribute in ODS.
-static StringRef getLinkageAttrName() { return "linkage"; }
-
-void GlobalOp::build(Builder *builder, OperationState &result, LLVMType type,
-                     bool isConstant, Linkage linkage, StringRef name,
-                     Attribute value, unsigned addrSpace,
-                     ArrayRef<NamedAttribute> attrs) {
-  result.addAttribute(SymbolTable::getSymbolAttrName(),
-                      builder->getStringAttr(name));
-  result.addAttribute("type", TypeAttr::get(type));
-  if (isConstant)
-    result.addAttribute("constant", builder->getUnitAttr());
-  if (value)
-    result.addAttribute("value", value);
-  result.addAttribute(getLinkageAttrName(), builder->getI64IntegerAttr(
-                                                static_cast<int64_t>(linkage)));
-  if (addrSpace != 0)
-    result.addAttribute("addr_space", builder->getI32IntegerAttr(addrSpace));
-  result.attributes.append(attrs.begin(), attrs.end());
-  result.addRegion();
-}
-
-// Returns the textual representation of the given linkage.
-static StringRef linkageToStr(LLVM::Linkage linkage) {
-  switch (linkage) {
-  case LLVM::Linkage::Private:
-    return "private";
-  case LLVM::Linkage::Internal:
-    return "internal";
-  case LLVM::Linkage::AvailableExternally:
-    return "available_externally";
-  case LLVM::Linkage::Linkonce:
-    return "linkonce";
-  case LLVM::Linkage::Weak:
-    return "weak";
-  case LLVM::Linkage::Common:
-    return "common";
-  case LLVM::Linkage::Appending:
-    return "appending";
-  case LLVM::Linkage::ExternWeak:
-    return "extern_weak";
-  case LLVM::Linkage::LinkonceODR:
-    return "linkonce_odr";
-  case LLVM::Linkage::WeakODR:
-    return "weak_odr";
-  case LLVM::Linkage::External:
-    return "external";
-  }
-  llvm_unreachable("unknown linkage type");
-}
-
-// Prints the keyword for the linkage type using the printer.
-static void printLinkage(OpAsmPrinter &p, LLVM::Linkage linkage) {
-  p << linkageToStr(linkage);
-}
-
-static void printGlobalOp(OpAsmPrinter &p, GlobalOp op) {
-  p << op.getOperationName() << ' ';
-  printLinkage(p, op.linkage());
-  p << ' ';
-  if (op.constant())
-    p << "constant ";
-  p.printSymbolName(op.sym_name());
-  p << '(';
-  if (auto value = op.getValueOrNull())
-    p.printAttribute(value);
-  p << ')';
-  p.printOptionalAttrDict(op.getAttrs(),
-                          {SymbolTable::getSymbolAttrName(), "type", "constant",
-                           "value", getLinkageAttrName()});
-
-  // Print the trailing type unless it's a string global.
-  if (op.getValueOrNull().dyn_cast_or_null<StringAttr>())
-    return;
-  p << " : " << op.type();
-
-  Region &initializer = op.getInitializerRegion();
-  if (!initializer.empty())
-    p.printRegion(initializer, /*printEntryBlockArgs=*/false);
-}
-
-// Parses one of the keywords provided in the list `keywords` and returns the
-// position of the parsed keyword in the list. If none of the keywords from the
-// list is parsed, returns -1.
-static int parseOptionalKeywordAlternative(OpAsmParser &parser,
-                                           ArrayRef<StringRef> keywords) {
-  for (auto en : llvm::enumerate(keywords)) {
-    if (succeeded(parser.parseOptionalKeyword(en.value())))
-      return en.index();
-  }
-  return -1;
-}
-
-// Parses one of the linkage keywords and, if succeeded, appends the "linkage"
-// integer attribute with the corresponding value to `result`.
-//
-// linkage ::= `private` | `internal` | `available_externally` | `linkonce`
-//           | `weak` | `common` | `appending` | `extern_weak`
-//           | `linkonce_odr` | `weak_odr` | `external
-static ParseResult parseOptionalLinkageKeyword(OpAsmParser &parser,
-                                               OperationState &result) {
-  int index = parseOptionalKeywordAlternative(
-      parser, {"private", "internal", "available_externally", "linkonce",
-               "weak", "common", "appending", "extern_weak", "linkonce_odr",
-               "weak_odr", "external"});
-  if (index == -1)
-    return failure();
-  result.addAttribute(getLinkageAttrName(),
-                      parser.getBuilder().getI64IntegerAttr(index));
-  return success();
-}
-
-// operation ::= `llvm.mlir.global` linkage `constant`? `@` identifier
-//               `(` attribute? `)` attribute-list? (`:` type)? region?
-//
-// The type can be omitted for string attributes, in which case it will be
-// inferred from the value of the string as [strlen(value) x i8].
-static ParseResult parseGlobalOp(OpAsmParser &parser, OperationState &result) {
-  if (failed(parseOptionalLinkageKeyword(parser, result)))
-    return parser.emitError(parser.getCurrentLocation(), "expected linkage");
-
-  if (succeeded(parser.parseOptionalKeyword("constant")))
-    result.addAttribute("constant", parser.getBuilder().getUnitAttr());
-
-  StringAttr name;
-  if (parser.parseSymbolName(name, SymbolTable::getSymbolAttrName(),
-                             result.attributes) ||
-      parser.parseLParen())
-    return failure();
-
-  Attribute value;
-  if (parser.parseOptionalRParen()) {
-    if (parser.parseAttribute(value, "value", result.attributes) ||
-        parser.parseRParen())
-      return failure();
-  }
-
-  SmallVector<Type, 1> types;
-  if (parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseOptionalColonTypeList(types))
-    return failure();
-
-  if (types.size() > 1)
-    return parser.emitError(parser.getNameLoc(), "expected zero or one type");
-
-  Region &initRegion = *result.addRegion();
-  if (types.empty()) {
-    if (auto strAttr = value.dyn_cast_or_null<StringAttr>()) {
-      MLIRContext *context = parser.getBuilder().getContext();
-      auto *dialect = context->getRegisteredDialect<LLVMDialect>();
-      auto arrayType = LLVM::LLVMType::getArrayTy(
-          LLVM::LLVMType::getInt8Ty(dialect), strAttr.getValue().size());
-      types.push_back(arrayType);
-    } else {
-      return parser.emitError(parser.getNameLoc(),
-                              "type can only be omitted for string globals");
-    }
-  } else if (parser.parseOptionalRegion(initRegion, /*arguments=*/{},
-                                        /*argTypes=*/{})) {
-    return failure();
-  }
-
-  result.addAttribute("type", TypeAttr::get(types[0]));
-  return success();
-}
-
-static LogicalResult verify(GlobalOp op) {
-  if (!llvm::PointerType::isValidElementType(op.getType().getUnderlyingType()))
-    return op.emitOpError(
-        "expects type to be a valid element type for an LLVM pointer");
-  if (op.getParentOp() &&
-      !(op.getParentOp()->hasTrait<OpTrait::SymbolTable>() &&
-        op.getParentOp()->hasTrait<OpTrait::IsIsolatedFromAbove>()))
-    return op.emitOpError("must appear at the module level");
-
-  if (auto strAttr = op.getValueOrNull().dyn_cast_or_null<StringAttr>()) {
-    auto type = op.getType();
-    if (!type.getUnderlyingType()->isArrayTy() ||
-        !type.getArrayElementType().getUnderlyingType()->isIntegerTy(8) ||
-        type.getArrayNumElements() != strAttr.getValue().size())
-      return op.emitOpError(
-          "requires an i8 array type of the length equal to that of the string "
-          "attribute");
-  }
-
-  if (Block *b = op.getInitializerBlock()) {
-    ReturnOp ret = cast<ReturnOp>(b->getTerminator());
-    if (ret.operand_type_begin() == ret.operand_type_end())
-      return op.emitOpError("initializer region cannot return void");
-    if (*ret.operand_type_begin() != op.getType())
-      return op.emitOpError("initializer region type ")
-             << *ret.operand_type_begin() << " does not match global type "
-             << op.getType();
-
-    if (op.getValueOrNull())
-      return op.emitOpError("cannot have both initializer value and region");
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for LLVM::ShuffleVectorOp.
-//===----------------------------------------------------------------------===//
-// Expects vector to be of wrapped LLVM vector type and position to be of
-// wrapped LLVM i32 type.
-void LLVM::ShuffleVectorOp::build(Builder *b, OperationState &result, Value *v1,
-                                  Value *v2, ArrayAttr mask,
-                                  ArrayRef<NamedAttribute> attrs) {
-  auto wrappedContainerType1 = v1->getType().cast<LLVM::LLVMType>();
-  auto vType = LLVMType::getVectorTy(
-      wrappedContainerType1.getVectorElementType(), mask.size());
-  build(b, result, vType, v1, v2, mask);
-  result.addAttributes(attrs);
-}
-
-static void printShuffleVectorOp(OpAsmPrinter &p, ShuffleVectorOp &op) {
-  p << op.getOperationName() << ' ' << *op.v1() << ", " << *op.v2() << " "
-    << op.mask();
-  p.printOptionalAttrDict(op.getAttrs(), {"mask"});
-  p << " : " << op.v1()->getType() << ", " << op.v2()->getType();
-}
-
-// <operation> ::= `llvm.shufflevector` ssa-use `, ` ssa-use
-//                 `[` integer-literal (`,` integer-literal)* `]`
-//                 attribute-dict? `:` type
-static ParseResult parseShuffleVectorOp(OpAsmParser &parser,
-                                        OperationState &result) {
-  llvm::SMLoc loc;
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType v1, v2;
-  Attribute maskAttr;
-  Type typeV1, typeV2;
-  if (parser.getCurrentLocation(&loc) || parser.parseOperand(v1) ||
-      parser.parseComma() || parser.parseOperand(v2) ||
-      parser.parseAttribute(maskAttr, "mask", attrs) ||
-      parser.parseOptionalAttrDict(attrs) || parser.parseColonType(typeV1) ||
-      parser.parseComma() || parser.parseType(typeV2) ||
-      parser.resolveOperand(v1, typeV1, result.operands) ||
-      parser.resolveOperand(v2, typeV2, result.operands))
-    return failure();
-  auto wrappedContainerType1 = typeV1.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedContainerType1 ||
-      !wrappedContainerType1.getUnderlyingType()->isVectorTy())
-    return parser.emitError(
-        loc, "expected LLVM IR dialect vector type for operand #1");
-  auto vType =
-      LLVMType::getVectorTy(wrappedContainerType1.getVectorElementType(),
-                            maskAttr.cast<ArrayAttr>().size());
-  result.attributes = attrs;
-  result.addTypes(vType);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Builder, printer and verifier for LLVM::LLVMFuncOp.
-//===----------------------------------------------------------------------===//
-
-void LLVMFuncOp::build(Builder *builder, OperationState &result, StringRef name,
-                       LLVMType type, LLVM::Linkage linkage,
-                       ArrayRef<NamedAttribute> attrs,
-                       ArrayRef<NamedAttributeList> argAttrs) {
-  result.addRegion();
-  result.addAttribute(SymbolTable::getSymbolAttrName(),
-                      builder->getStringAttr(name));
-  result.addAttribute("type", TypeAttr::get(type));
-  result.addAttribute(getLinkageAttrName(), builder->getI64IntegerAttr(
-                                                static_cast<int64_t>(linkage)));
-  result.attributes.append(attrs.begin(), attrs.end());
-  if (argAttrs.empty())
-    return;
-
-  unsigned numInputs = type.getUnderlyingType()->getFunctionNumParams();
-  assert(numInputs == argAttrs.size() &&
-         "expected as many argument attribute lists as arguments");
-  SmallString<8> argAttrName;
-  for (unsigned i = 0; i < numInputs; ++i)
-    if (auto argDict = argAttrs[i].getDictionary())
-      result.addAttribute(getArgAttrName(i, argAttrName), argDict);
-}
-
-// Builds an LLVM function type from the given lists of input and output types.
-// Returns a null type if any of the types provided are non-LLVM types, or if
-// there is more than one output type.
-static Type buildLLVMFunctionType(OpAsmParser &parser, llvm::SMLoc loc,
-                                  ArrayRef<Type> inputs, ArrayRef<Type> outputs,
-                                  impl::VariadicFlag variadicFlag) {
-  Builder &b = parser.getBuilder();
-  if (outputs.size() > 1) {
-    parser.emitError(loc, "failed to construct function type: expected zero or "
-                          "one function result");
-    return {};
-  }
-
-  // Convert inputs to LLVM types, exit early on error.
-  SmallVector<LLVMType, 4> llvmInputs;
-  for (auto t : inputs) {
-    auto llvmTy = t.dyn_cast<LLVMType>();
-    if (!llvmTy) {
-      parser.emitError(loc, "failed to construct function type: expected LLVM "
-                            "type for function arguments");
-      return {};
-    }
-    llvmInputs.push_back(llvmTy);
-  }
-
-  // Get the dialect from the input type, if any exist.  Look it up in the
-  // context otherwise.
-  LLVMDialect *dialect =
-      llvmInputs.empty() ? b.getContext()->getRegisteredDialect<LLVMDialect>()
-                         : &llvmInputs.front().getDialect();
-
-  // No output is denoted as "void" in LLVM type system.
-  LLVMType llvmOutput = outputs.empty() ? LLVMType::getVoidTy(dialect)
-                                        : outputs.front().dyn_cast<LLVMType>();
-  if (!llvmOutput) {
-    parser.emitError(loc, "failed to construct function type: expected LLVM "
-                          "type for function results");
-    return {};
-  }
-  return LLVMType::getFunctionTy(llvmOutput, llvmInputs,
-                                 variadicFlag.isVariadic());
-}
-
-// Parses an LLVM function.
-//
-// operation ::= `llvm.func` linkage? function-signature function-attributes?
-//               function-body
-//
-static ParseResult parseLLVMFuncOp(OpAsmParser &parser,
-                                   OperationState &result) {
-  // Default to external linkage if no keyword is provided.
-  if (failed(parseOptionalLinkageKeyword(parser, result)))
-    result.addAttribute(getLinkageAttrName(),
-                        parser.getBuilder().getI64IntegerAttr(
-                            static_cast<int64_t>(LLVM::Linkage::External)));
-
-  StringAttr nameAttr;
-  SmallVector<OpAsmParser::OperandType, 8> entryArgs;
-  SmallVector<SmallVector<NamedAttribute, 2>, 1> argAttrs;
-  SmallVector<SmallVector<NamedAttribute, 2>, 1> resultAttrs;
-  SmallVector<Type, 8> argTypes;
-  SmallVector<Type, 4> resultTypes;
-  bool isVariadic;
-
-  auto signatureLocation = parser.getCurrentLocation();
-  if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(),
-                             result.attributes) ||
-      impl::parseFunctionSignature(parser, /*allowVariadic=*/true, entryArgs,
-                                   argTypes, argAttrs, isVariadic, resultTypes,
-                                   resultAttrs))
-    return failure();
-
-  auto type =
-      buildLLVMFunctionType(parser, signatureLocation, argTypes, resultTypes,
-                            impl::VariadicFlag(isVariadic));
-  if (!type)
-    return failure();
-  result.addAttribute(impl::getTypeAttrName(), TypeAttr::get(type));
-
-  if (failed(parser.parseOptionalAttrDictWithKeyword(result.attributes)))
-    return failure();
-  impl::addArgAndResultAttrs(parser.getBuilder(), result, argAttrs,
-                             resultAttrs);
-
-  auto *body = result.addRegion();
-  return parser.parseOptionalRegion(
-      *body, entryArgs, entryArgs.empty() ? llvm::ArrayRef<Type>() : argTypes);
-}
-
-// Print the LLVMFuncOp. Collects argument and result types and passes them to
-// helper functions. Drops "void" result since it cannot be parsed back. Skips
-// the external linkage since it is the default value.
-static void printLLVMFuncOp(OpAsmPrinter &p, LLVMFuncOp op) {
-  p << op.getOperationName() << ' ';
-  if (op.linkage() != LLVM::Linkage::External) {
-    printLinkage(p, op.linkage());
-    p << ' ';
-  }
-  p.printSymbolName(op.getName());
-
-  LLVMType fnType = op.getType();
-  SmallVector<Type, 8> argTypes;
-  SmallVector<Type, 1> resTypes;
-  argTypes.reserve(fnType.getFunctionNumParams());
-  for (unsigned i = 0, e = fnType.getFunctionNumParams(); i < e; ++i)
-    argTypes.push_back(fnType.getFunctionParamType(i));
-
-  LLVMType returnType = fnType.getFunctionResultType();
-  if (!returnType.getUnderlyingType()->isVoidTy())
-    resTypes.push_back(returnType);
-
-  impl::printFunctionSignature(p, op, argTypes, op.isVarArg(), resTypes);
-  impl::printFunctionAttributes(p, op, argTypes.size(), resTypes.size(),
-                                {getLinkageAttrName()});
-
-  // Print the body if this is not an external function.
-  Region &body = op.body();
-  if (!body.empty())
-    p.printRegion(body, /*printEntryBlockArgs=*/false,
-                  /*printBlockTerminators=*/true);
-}
-
-// Hook for OpTrait::FunctionLike, called after verifying that the 'type'
-// attribute is present.  This can check for preconditions of the
-// getNumArguments hook not failing.
-LogicalResult LLVMFuncOp::verifyType() {
-  auto llvmType = getTypeAttr().getValue().dyn_cast_or_null<LLVMType>();
-  if (!llvmType || !llvmType.getUnderlyingType()->isFunctionTy())
-    return emitOpError("requires '" + getTypeAttrName() +
-                       "' attribute of wrapped LLVM function type");
-
-  return success();
-}
-
-// Hook for OpTrait::FunctionLike, returns the number of function arguments.
-// Depends on the type attribute being correct as checked by verifyType
-unsigned LLVMFuncOp::getNumFuncArguments() {
-  return getType().getUnderlyingType()->getFunctionNumParams();
-}
-
-// Hook for OpTrait::FunctionLike, returns the number of function results.
-// Depends on the type attribute being correct as checked by verifyType
-unsigned LLVMFuncOp::getNumFuncResults() {
-  llvm::FunctionType *funcType =
-      cast<llvm::FunctionType>(getType().getUnderlyingType());
-  // We model LLVM functions that return void as having zero results,
-  // and all others as having one result.
-  // If we modeled a void return as one result, then it would be possible to
-  // attach an MLIR result attribute to it, and it isn't clear what semantics we
-  // would assign to that.
-  if (funcType->getReturnType()->isVoidTy())
-    return 0;
-  return 1;
-}
-
-// Verifies LLVM- and implementation-specific properties of the LLVM func Op:
-// - functions don't have 'common' linkage
-// - external functions have 'external' or 'extern_weak' linkage;
-// - vararg is (currently) only supported for external functions;
-// - entry block arguments are of LLVM types and match the function signature.
-static LogicalResult verify(LLVMFuncOp op) {
-  if (op.linkage() == LLVM::Linkage::Common)
-    return op.emitOpError()
-           << "functions cannot have '" << linkageToStr(LLVM::Linkage::Common)
-           << "' linkage";
-
-  if (op.isExternal()) {
-    if (op.linkage() != LLVM::Linkage::External &&
-        op.linkage() != LLVM::Linkage::ExternWeak)
-      return op.emitOpError()
-             << "external functions must have '"
-             << linkageToStr(LLVM::Linkage::External) << "' or '"
-             << linkageToStr(LLVM::Linkage::ExternWeak) << "' linkage";
-    return success();
-  }
-
-  if (op.isVarArg())
-    return op.emitOpError("only external functions can be variadic");
-
-  auto *funcType = cast<llvm::FunctionType>(op.getType().getUnderlyingType());
-  unsigned numArguments = funcType->getNumParams();
-  Block &entryBlock = op.front();
-  for (unsigned i = 0; i < numArguments; ++i) {
-    Type argType = entryBlock.getArgument(i)->getType();
-    auto argLLVMType = argType.dyn_cast<LLVMType>();
-    if (!argLLVMType)
-      return op.emitOpError("entry block argument #")
-             << i << " is not of LLVM type";
-    if (funcType->getParamType(i) != argLLVMType.getUnderlyingType())
-      return op.emitOpError("the type of entry block argument #")
-             << i << " does not match the function signature";
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Printing, parsing and verification for LLVM::NullOp.
-//===----------------------------------------------------------------------===//
-
-static void printNullOp(OpAsmPrinter &p, LLVM::NullOp op) {
-  p << NullOp::getOperationName();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getType();
-}
-
-// <operation> = `llvm.mlir.null` : type
-static ParseResult parseNullOp(OpAsmParser &parser, OperationState &result) {
-  Type type;
-  return failure(parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.addTypeToList(type, result.types));
-}
-
-// Only LLVM pointer types are supported.
-static LogicalResult verify(LLVM::NullOp op) {
-  auto llvmType = op.getType().dyn_cast<LLVM::LLVMType>();
-  if (!llvmType || !llvmType.isPointerTy())
-    return op.emitOpError("expected LLVM IR pointer type");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// LLVMDialect initialization, type parsing, and registration.
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace LLVM {
-namespace detail {
-struct LLVMDialectImpl {
-  LLVMDialectImpl() : module("LLVMDialectModule", llvmContext) {}
-
-  llvm::LLVMContext llvmContext;
-  llvm::Module module;
-
-  /// A set of LLVMTypes that are cached on construction to avoid any lookups or
-  /// locking.
-  LLVMType int1Ty, int8Ty, int16Ty, int32Ty, int64Ty, int128Ty;
-  LLVMType doubleTy, floatTy, halfTy, fp128Ty, x86_fp80Ty;
-  LLVMType voidTy;
-
-  /// A smart mutex to lock access to the llvm context. Unlike MLIR, LLVM is not
-  /// multi-threaded and requires locked access to prevent race conditions.
-  llvm::sys::SmartMutex<true> mutex;
-};
-} // end namespace detail
-} // end namespace LLVM
-} // end namespace mlir
-
-LLVMDialect::LLVMDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context),
-      impl(new detail::LLVMDialectImpl()) {
-  addTypes<LLVMType>();
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/LLVMIR/LLVMOps.cpp.inc"
-      >();
-
-  // Support unknown operations because not all LLVM operations are registered.
-  allowUnknownOperations();
-
-  // Cache some of the common LLVM types to avoid the need for lookups/locking.
-  auto &llvmContext = impl->llvmContext;
-  /// Integer Types.
-  impl->int1Ty = LLVMType::get(context, llvm::Type::getInt1Ty(llvmContext));
-  impl->int8Ty = LLVMType::get(context, llvm::Type::getInt8Ty(llvmContext));
-  impl->int16Ty = LLVMType::get(context, llvm::Type::getInt16Ty(llvmContext));
-  impl->int32Ty = LLVMType::get(context, llvm::Type::getInt32Ty(llvmContext));
-  impl->int64Ty = LLVMType::get(context, llvm::Type::getInt64Ty(llvmContext));
-  impl->int128Ty = LLVMType::get(context, llvm::Type::getInt128Ty(llvmContext));
-  /// Float Types.
-  impl->doubleTy = LLVMType::get(context, llvm::Type::getDoubleTy(llvmContext));
-  impl->floatTy = LLVMType::get(context, llvm::Type::getFloatTy(llvmContext));
-  impl->halfTy = LLVMType::get(context, llvm::Type::getHalfTy(llvmContext));
-  impl->fp128Ty = LLVMType::get(context, llvm::Type::getFP128Ty(llvmContext));
-  impl->x86_fp80Ty =
-      LLVMType::get(context, llvm::Type::getX86_FP80Ty(llvmContext));
-  /// Other Types.
-  impl->voidTy = LLVMType::get(context, llvm::Type::getVoidTy(llvmContext));
-}
-
-LLVMDialect::~LLVMDialect() {}
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LLVMIR/LLVMOps.cpp.inc"
-
-llvm::LLVMContext &LLVMDialect::getLLVMContext() { return impl->llvmContext; }
-llvm::Module &LLVMDialect::getLLVMModule() { return impl->module; }
-
-/// Parse a type registered to this dialect.
-Type LLVMDialect::parseType(DialectAsmParser &parser) const {
-  StringRef tyData = parser.getFullSymbolSpec();
-
-  // LLVM is not thread-safe, so lock access to it.
-  llvm::sys::SmartScopedLock<true> lock(impl->mutex);
-
-  llvm::SMDiagnostic errorMessage;
-  llvm::Type *type = llvm::parseType(tyData, errorMessage, impl->module);
-  if (!type)
-    return (parser.emitError(parser.getNameLoc(), errorMessage.getMessage()),
-            nullptr);
-  return LLVMType::get(getContext(), type);
-}
-
-/// Print a type registered to this dialect.
-void LLVMDialect::printType(Type type, DialectAsmPrinter &os) const {
-  auto llvmType = type.dyn_cast<LLVMType>();
-  assert(llvmType && "printing wrong type");
-  assert(llvmType.getUnderlyingType() && "no underlying LLVM type");
-  llvmType.getUnderlyingType()->print(os.getStream());
-}
-
-/// Verify LLVMIR function argument attributes.
-LogicalResult LLVMDialect::verifyRegionArgAttribute(Operation *op,
-                                                    unsigned regionIdx,
-                                                    unsigned argIdx,
-                                                    NamedAttribute argAttr) {
-  // Check that llvm.noalias is a boolean attribute.
-  if (argAttr.first == "llvm.noalias" && !argAttr.second.isa<BoolAttr>())
-    return op->emitError()
-           << "llvm.noalias argument attribute of non boolean type";
-  return success();
-}
-
-static DialectRegistration<LLVMDialect> llvmDialect;
-
-//===----------------------------------------------------------------------===//
-// LLVMType.
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace LLVM {
-namespace detail {
-struct LLVMTypeStorage : public ::mlir::TypeStorage {
-  LLVMTypeStorage(llvm::Type *ty) : underlyingType(ty) {}
-
-  // LLVM types are pointer-unique.
-  using KeyTy = llvm::Type *;
-  bool operator==(const KeyTy &key) const { return key == underlyingType; }
-
-  static LLVMTypeStorage *construct(TypeStorageAllocator &allocator,
-                                    llvm::Type *ty) {
-    return new (allocator.allocate<LLVMTypeStorage>()) LLVMTypeStorage(ty);
-  }
-
-  llvm::Type *underlyingType;
-};
-} // end namespace detail
-} // end namespace LLVM
-} // end namespace mlir
-
-LLVMType LLVMType::get(MLIRContext *context, llvm::Type *llvmType) {
-  return Base::get(context, FIRST_LLVM_TYPE, llvmType);
-}
-
-/// Get an LLVMType with an llvm type that may cause changes to the underlying
-/// llvm context when constructed.
-LLVMType LLVMType::getLocked(LLVMDialect *dialect,
-                             llvm::function_ref<llvm::Type *()> typeBuilder) {
-  // Lock access to the llvm context and build the type.
-  llvm::sys::SmartScopedLock<true> lock(dialect->impl->mutex);
-  return get(dialect->getContext(), typeBuilder());
-}
-
-LLVMDialect &LLVMType::getDialect() {
-  return static_cast<LLVMDialect &>(Type::getDialect());
-}
-
-llvm::Type *LLVMType::getUnderlyingType() const {
-  return getImpl()->underlyingType;
-}
-
-/// Array type utilities.
-LLVMType LLVMType::getArrayElementType() {
-  return get(getContext(), getUnderlyingType()->getArrayElementType());
-}
-unsigned LLVMType::getArrayNumElements() {
-  return getUnderlyingType()->getArrayNumElements();
-}
-bool LLVMType::isArrayTy() { return getUnderlyingType()->isArrayTy(); }
-
-/// Vector type utilities.
-LLVMType LLVMType::getVectorElementType() {
-  return get(getContext(), getUnderlyingType()->getVectorElementType());
-}
-bool LLVMType::isVectorTy() { return getUnderlyingType()->isVectorTy(); }
-
-/// Function type utilities.
-LLVMType LLVMType::getFunctionParamType(unsigned argIdx) {
-  return get(getContext(), getUnderlyingType()->getFunctionParamType(argIdx));
-}
-unsigned LLVMType::getFunctionNumParams() {
-  return getUnderlyingType()->getFunctionNumParams();
-}
-LLVMType LLVMType::getFunctionResultType() {
-  return get(
-      getContext(),
-      llvm::cast<llvm::FunctionType>(getUnderlyingType())->getReturnType());
-}
-bool LLVMType::isFunctionTy() { return getUnderlyingType()->isFunctionTy(); }
-
-/// Pointer type utilities.
-LLVMType LLVMType::getPointerTo(unsigned addrSpace) {
-  // Lock access to the dialect as this may modify the LLVM context.
-  return getLocked(&getDialect(), [=] {
-    return getUnderlyingType()->getPointerTo(addrSpace);
-  });
-}
-LLVMType LLVMType::getPointerElementTy() {
-  return get(getContext(), getUnderlyingType()->getPointerElementType());
-}
-bool LLVMType::isPointerTy() { return getUnderlyingType()->isPointerTy(); }
-
-/// Struct type utilities.
-LLVMType LLVMType::getStructElementType(unsigned i) {
-  return get(getContext(), getUnderlyingType()->getStructElementType(i));
-}
-unsigned LLVMType::getStructNumElements() {
-  return getUnderlyingType()->getStructNumElements();
-}
-bool LLVMType::isStructTy() { return getUnderlyingType()->isStructTy(); }
-
-/// Utilities used to generate floating point types.
-LLVMType LLVMType::getDoubleTy(LLVMDialect *dialect) {
-  return dialect->impl->doubleTy;
-}
-LLVMType LLVMType::getFloatTy(LLVMDialect *dialect) {
-  return dialect->impl->floatTy;
-}
-LLVMType LLVMType::getHalfTy(LLVMDialect *dialect) {
-  return dialect->impl->halfTy;
-}
-LLVMType LLVMType::getFP128Ty(LLVMDialect *dialect) {
-  return dialect->impl->fp128Ty;
-}
-LLVMType LLVMType::getX86_FP80Ty(LLVMDialect *dialect) {
-  return dialect->impl->x86_fp80Ty;
-}
-
-/// Utilities used to generate integer types.
-LLVMType LLVMType::getIntNTy(LLVMDialect *dialect, unsigned numBits) {
-  switch (numBits) {
-  case 1:
-    return dialect->impl->int1Ty;
-  case 8:
-    return dialect->impl->int8Ty;
-  case 16:
-    return dialect->impl->int16Ty;
-  case 32:
-    return dialect->impl->int32Ty;
-  case 64:
-    return dialect->impl->int64Ty;
-  case 128:
-    return dialect->impl->int128Ty;
-  default:
-    break;
-  }
-
-  // Lock access to the dialect as this may modify the LLVM context.
-  return getLocked(dialect, [=] {
-    return llvm::Type::getIntNTy(dialect->getLLVMContext(), numBits);
-  });
-}
-
-/// Utilities used to generate other miscellaneous types.
-LLVMType LLVMType::getArrayTy(LLVMType elementType, uint64_t numElements) {
-  // Lock access to the dialect as this may modify the LLVM context.
-  return getLocked(&elementType.getDialect(), [=] {
-    return llvm::ArrayType::get(elementType.getUnderlyingType(), numElements);
-  });
-}
-LLVMType LLVMType::getFunctionTy(LLVMType result, ArrayRef<LLVMType> params,
-                                 bool isVarArg) {
-  SmallVector<llvm::Type *, 8> llvmParams;
-  for (auto param : params)
-    llvmParams.push_back(param.getUnderlyingType());
-
-  // Lock access to the dialect as this may modify the LLVM context.
-  return getLocked(&result.getDialect(), [=] {
-    return llvm::FunctionType::get(result.getUnderlyingType(), llvmParams,
-                                   isVarArg);
-  });
-}
-LLVMType LLVMType::getStructTy(LLVMDialect *dialect,
-                               ArrayRef<LLVMType> elements, bool isPacked) {
-  SmallVector<llvm::Type *, 8> llvmElements;
-  for (auto elt : elements)
-    llvmElements.push_back(elt.getUnderlyingType());
-
-  // Lock access to the dialect as this may modify the LLVM context.
-  return getLocked(dialect, [=] {
-    return llvm::StructType::get(dialect->getLLVMContext(), llvmElements,
-                                 isPacked);
-  });
-}
-LLVMType LLVMType::getVectorTy(LLVMType elementType, unsigned numElements) {
-  // Lock access to the dialect as this may modify the LLVM context.
-  return getLocked(&elementType.getDialect(), [=] {
-    return llvm::VectorType::get(elementType.getUnderlyingType(), numElements);
-  });
-}
-LLVMType LLVMType::getVoidTy(LLVMDialect *dialect) {
-  return dialect->impl->voidTy;
-}
-
-//===----------------------------------------------------------------------===//
-// Utility functions.
-//===----------------------------------------------------------------------===//
-
-Value *mlir::LLVM::createGlobalString(Location loc, OpBuilder &builder,
-                                      StringRef name, StringRef value,
-                                      LLVM::Linkage linkage,
-                                      LLVM::LLVMDialect *llvmDialect) {
-  assert(builder.getInsertionBlock() &&
-         builder.getInsertionBlock()->getParentOp() &&
-         "expected builder to point to a block constrained in an op");
-  auto module =
-      builder.getInsertionBlock()->getParentOp()->getParentOfType<ModuleOp>();
-  assert(module && "builder points to an op outside of a module");
-
-  // Create the global at the entry of the module.
-  OpBuilder moduleBuilder(module.getBodyRegion());
-  auto type = LLVM::LLVMType::getArrayTy(LLVM::LLVMType::getInt8Ty(llvmDialect),
-                                         value.size());
-  auto global = moduleBuilder.create<LLVM::GlobalOp>(
-      loc, type, /*isConstant=*/true, linkage, name,
-      builder.getStringAttr(value));
-
-  // Get the pointer to the first character in the global string.
-  Value *globalPtr = builder.create<LLVM::AddressOfOp>(loc, global);
-  Value *cst0 = builder.create<LLVM::ConstantOp>(
-      loc, LLVM::LLVMType::getInt64Ty(llvmDialect),
-      builder.getIntegerAttr(builder.getIndexType(), 0));
-  return builder.create<LLVM::GEPOp>(
-      loc, LLVM::LLVMType::getInt8PtrTy(llvmDialect), globalPtr,
-      ArrayRef<Value *>({cst0, cst0}));
-}
-
-bool mlir::LLVM::satisfiesLLVMModule(Operation *op) {
-  return op->hasTrait<OpTrait::SymbolTable>() &&
-         op->hasTrait<OpTrait::IsIsolatedFromAbove>();
-}
diff --git a/third_party/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/third_party/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
deleted file mode 100644
index e4708fbe535..00000000000
--- a/third_party/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ /dev/null
@@ -1,218 +0,0 @@
-//===- NVVMDialect.cpp - NVVM IR Ops and Dialect registration -------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the types and operation details for the NVVM IR dialect in
-// MLIR, and the LLVM IR dialect.  It also registers the dialect.
-//
-// The NVVM dialect only contains GPU specific additions on top of the general
-// LLVM dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-using namespace NVVM;
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for NVVM ops
-//===----------------------------------------------------------------------===//
-
-static void printNVVMIntrinsicOp(OpAsmPrinter &p, Operation *op) {
-  p << op->getName() << " " << op->getOperands();
-  if (op->getNumResults() > 0)
-    p << " : " << op->getResultTypes();
-}
-
-// <operation> ::= `llvm.nvvm.XYZ` : type
-static ParseResult parseNVVMSpecialRegisterOp(OpAsmParser &parser,
-                                              OperationState &result) {
-  Type type;
-  if (parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type))
-    return failure();
-
-  result.addTypes(type);
-  return success();
-}
-
-static LLVM::LLVMDialect *getLlvmDialect(OpAsmParser &parser) {
-  return parser.getBuilder()
-      .getContext()
-      ->getRegisteredDialect<LLVM::LLVMDialect>();
-}
-
-// <operation> ::=
-//     `llvm.nvvm.shfl.sync.bfly %dst, %val, %offset, %clamp_and_mask`
-//      ({return_value_and_is_valid})? : result_type
-static ParseResult parseNVVMShflSyncBflyOp(OpAsmParser &parser,
-                                           OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 8> ops;
-  Type resultType;
-  if (parser.parseOperandList(ops) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(resultType) ||
-      parser.addTypeToList(resultType, result.types))
-    return failure();
-
-  auto type = resultType.cast<LLVM::LLVMType>();
-  for (auto &attr : result.attributes) {
-    if (attr.first != "return_value_and_is_valid")
-      continue;
-    if (type.isStructTy() && type.getStructNumElements() > 0)
-      type = type.getStructElementType(0);
-    break;
-  }
-
-  auto int32Ty = LLVM::LLVMType::getInt32Ty(getLlvmDialect(parser));
-  return parser.resolveOperands(ops, {int32Ty, type, int32Ty, int32Ty},
-                                parser.getNameLoc(), result.operands);
-}
-
-// <operation> ::= `llvm.nvvm.vote.ballot.sync %mask, %pred` : result_type
-static ParseResult parseNVVMVoteBallotOp(OpAsmParser &parser,
-                                         OperationState &result) {
-  auto llvmDialect = getLlvmDialect(parser);
-  auto int32Ty = LLVM::LLVMType::getInt32Ty(llvmDialect);
-  auto int1Ty = LLVM::LLVMType::getInt1Ty(llvmDialect);
-
-  SmallVector<OpAsmParser::OperandType, 8> ops;
-  Type type;
-  return failure(parser.parseOperandList(ops) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.addTypeToList(type, result.types) ||
-                 parser.resolveOperands(ops, {int32Ty, int1Ty},
-                                        parser.getNameLoc(), result.operands));
-}
-
-// <operation> ::= `llvm.nvvm.mma.sync %lhs... %rhs... %acc...`
-//                 : signature_type
-static ParseResult parseNVVMMmaOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 12> ops;
-  Type type;
-  llvm::SMLoc typeLoc;
-  if (parser.parseOperandList(ops) ||
-      parser.parseOptionalAttrDict(result.attributes) || parser.parseColon() ||
-      parser.getCurrentLocation(&typeLoc) || parser.parseType(type)) {
-    return failure();
-  }
-
-  auto signature = type.dyn_cast<FunctionType>();
-  if (!signature) {
-    return parser.emitError(
-        typeLoc, "expected the type to be the full list of input and output");
-  }
-
-  if (signature.getNumResults() != 1) {
-    return parser.emitError(typeLoc, "expected single result");
-  }
-
-  return failure(parser.addTypeToList(signature.getResult(0), result.types) ||
-                 parser.resolveOperands(ops, signature.getInputs(),
-                                        parser.getNameLoc(), result.operands));
-}
-
-static void printNVVMMmaOp(OpAsmPrinter &p, MmaOp &op) {
-  p << op.getOperationName() << " " << op.getOperands();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : "
-    << FunctionType::get(llvm::to_vector<12>(op.getOperandTypes()),
-                         op.getType(), op.getContext());
-}
-
-static LogicalResult verify(MmaOp op) {
-  auto dialect = op.getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
-  auto f16Ty = LLVM::LLVMType::getHalfTy(dialect);
-  auto f16x2Ty = LLVM::LLVMType::getVectorTy(f16Ty, 2);
-  auto f32Ty = LLVM::LLVMType::getFloatTy(dialect);
-  auto f16x2x4StructTy = LLVM::LLVMType::getStructTy(
-      dialect, {f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty});
-  auto f32x8StructTy = LLVM::LLVMType::getStructTy(
-      dialect, {f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty});
-
-  SmallVector<Type, 12> operand_types(op.getOperandTypes().begin(),
-                                      op.getOperandTypes().end());
-  if (operand_types != SmallVector<Type, 8>(8, f16x2Ty) &&
-      operand_types != SmallVector<Type, 12>{f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty,
-                                             f32Ty, f32Ty, f32Ty, f32Ty, f32Ty,
-                                             f32Ty, f32Ty, f32Ty}) {
-    return op.emitOpError(
-        "expected operands to be 4 <halfx2>s followed by either "
-        "4 <halfx2>s or 8 floats");
-  }
-  if (op.getType() != f32x8StructTy && op.getType() != f16x2x4StructTy) {
-    return op.emitOpError("expected result type to be a struct of either 4 "
-                          "<halfx2>s or 8 floats");
-  }
-
-  auto alayout = op.getAttrOfType<StringAttr>("alayout");
-  auto blayout = op.getAttrOfType<StringAttr>("blayout");
-
-  if (!(alayout && blayout) ||
-      !(alayout.getValue() == "row" || alayout.getValue() == "col") ||
-      !(blayout.getValue() == "row" || blayout.getValue() == "col")) {
-    return op.emitOpError(
-        "alayout and blayout attributes must be set to either "
-        "\"row\" or \"col\"");
-  }
-
-  if (operand_types == SmallVector<Type, 12>{f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty,
-                                             f32Ty, f32Ty, f32Ty, f32Ty, f32Ty,
-                                             f32Ty, f32Ty, f32Ty} &&
-      op.getType() == f32x8StructTy && alayout.getValue() == "row" &&
-      blayout.getValue() == "row") {
-    return success();
-  }
-  return op.emitOpError("unimplemented mma.sync variant");
-}
-
-//===----------------------------------------------------------------------===//
-// NVVMDialect initialization, type parsing, and registration.
-//===----------------------------------------------------------------------===//
-
-// TODO(herhut): This should be the llvm.nvvm dialect once this is supported.
-NVVMDialect::NVVMDialect(MLIRContext *context) : Dialect("nvvm", context) {
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
-      >();
-
-  // Support unknown operations because not all NVVM operations are registered.
-  allowUnknownOperations();
-}
-
-namespace mlir {
-namespace NVVM {
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
-} // namespace NVVM
-} // namespace mlir
-
-static DialectRegistration<NVVMDialect> nvvmDialect;
diff --git a/third_party/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/third_party/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
deleted file mode 100644
index 30c55b52e59..00000000000
--- a/third_party/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-//===- ROCDLDialect.cpp - ROCDL IR Ops and Dialect registration -----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the types and operation details for the ROCDL IR dialect in
-// MLIR, and the LLVM IR dialect.  It also registers the dialect.
-//
-// The ROCDL dialect only contains GPU specific additions on top of the general
-// LLVM dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-using namespace ROCDL;
-
-//===----------------------------------------------------------------------===//
-// Printing/parsing for ROCDL ops
-//===----------------------------------------------------------------------===//
-
-static void printROCDLOp(OpAsmPrinter &p, Operation *op) {
-  p << op->getName() << " " << op->getOperands();
-  if (op->getNumResults() > 0)
-    p << " : " << op->getResultTypes();
-}
-
-// <operation> ::= `rocdl.XYZ` : type
-static ParseResult parseROCDLOp(OpAsmParser &parser, OperationState &result) {
-  Type type;
-  return failure(parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.addTypeToList(type, result.types));
-}
-
-//===----------------------------------------------------------------------===//
-// ROCDLDialect initialization, type parsing, and registration.
-//===----------------------------------------------------------------------===//
-
-// TODO(herhut): This should be the llvm.rocdl dialect once this is supported.
-ROCDLDialect::ROCDLDialect(MLIRContext *context) : Dialect("rocdl", context) {
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc"
-      >();
-
-  // Support unknown operations because not all ROCDL operations are registered.
-  allowUnknownOperations();
-}
-
-namespace mlir {
-namespace ROCDL {
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc"
-} // namespace ROCDL
-} // namespace mlir
-
-static DialectRegistration<ROCDLDialect> rocdlDialect;
diff --git a/third_party/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp b/third_party/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
deleted file mode 100644
index d7e4d08527d..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-//===- DependenceAnalysis.cpp - Dependence analysis on SSA views ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements view-based alias and dependence analyses.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "linalg-dependence-analysis"
-
-using namespace mlir;
-using namespace mlir::linalg;
-
-using llvm::dbgs;
-
-static StringRef toStringRef(LinalgDependenceGraph::DependenceType dt) {
-  switch (dt) {
-  case LinalgDependenceGraph::DependenceType::RAW:
-    return "RAW";
-  case LinalgDependenceGraph::DependenceType::RAR:
-    return "RAR";
-  case LinalgDependenceGraph::DependenceType::WAR:
-    return "WAR";
-  case LinalgDependenceGraph::DependenceType::WAW:
-    return "WAW";
-  default:
-    break;
-  }
-  llvm_unreachable("Unexpected DependenceType");
-}
-
-Value *Aliases::find(Value *v) {
-  if (isa<BlockArgument>(v))
-    return v;
-
-  auto it = aliases.find(v);
-  if (it != aliases.end()) {
-    assert(it->getSecond()->getType().isa<MemRefType>() && "Memref expected");
-    return it->getSecond();
-  }
-
-  while (true) {
-    if (isa<BlockArgument>(v))
-      return v;
-    if (auto alloc = dyn_cast_or_null<AllocOp>(v->getDefiningOp())) {
-      if (isStrided(alloc.getType()))
-        return alloc.getResult();
-    }
-    if (auto slice = dyn_cast_or_null<SliceOp>(v->getDefiningOp())) {
-      auto it = aliases.insert(std::make_pair(v, find(slice.view())));
-      return it.first->second;
-    }
-    if (auto view = dyn_cast_or_null<ViewOp>(v->getDefiningOp())) {
-      auto it = aliases.insert(std::make_pair(v, view.source()));
-      return it.first->second;
-    }
-    if (auto view = dyn_cast_or_null<SubViewOp>(v->getDefiningOp())) {
-      v = view.source();
-      continue;
-    }
-    llvm::errs() << "View alias analysis reduces to: " << *v << "\n";
-    llvm_unreachable("unsupported view alias case");
-  }
-}
-
-LinalgDependenceGraph
-LinalgDependenceGraph::buildDependenceGraph(Aliases &aliases, FuncOp f) {
-  SmallVector<Operation *, 8> linalgOps;
-  f.walk([&](LinalgOp op) { linalgOps.push_back(op); });
-  return LinalgDependenceGraph(aliases, linalgOps);
-}
-
-LinalgDependenceGraph::LinalgDependenceGraph(Aliases &aliases,
-                                             ArrayRef<Operation *> ops)
-    : aliases(aliases), linalgOps(ops.begin(), ops.end()) {
-  for (auto en : llvm::enumerate(linalgOps)) {
-    assert(isa<LinalgOp>(en.value()) && "Expected value for LinalgOp");
-    linalgOpPositions.insert(std::make_pair(en.value(), en.index()));
-  }
-  for (unsigned i = 0, e = ops.size(); i < e; ++i) {
-    for (unsigned j = i + 1; j < e; ++j) {
-      addDependencesBetween(cast<LinalgOp>(ops[i]), cast<LinalgOp>(ops[j]));
-    }
-  }
-}
-
-void LinalgDependenceGraph::addDependenceElem(DependenceType dt,
-                                              LinalgOpView indexingOpView,
-                                              LinalgOpView dependentOpView) {
-  LLVM_DEBUG(dbgs() << "\nAdd dep type " << toStringRef(dt) << ":\t"
-                    << *indexingOpView.op << " -> " << *dependentOpView.op);
-  (void)toStringRef;
-  dependencesFromGraphs[dt][indexingOpView.op].push_back(
-      LinalgDependenceGraphElem{dependentOpView, indexingOpView.view});
-  dependencesIntoGraphs[dt][dependentOpView.op].push_back(
-      LinalgDependenceGraphElem{indexingOpView, dependentOpView.view});
-}
-
-LinalgDependenceGraph::dependence_range
-LinalgDependenceGraph::getDependencesFrom(
-    LinalgOp src, LinalgDependenceGraph::DependenceType dt) const {
-  return getDependencesFrom(src.getOperation(), dt);
-}
-
-LinalgDependenceGraph::dependence_range
-LinalgDependenceGraph::getDependencesFrom(
-    Operation *src, LinalgDependenceGraph::DependenceType dt) const {
-  auto iter = dependencesFromGraphs[dt].find(src);
-  if (iter == dependencesFromGraphs[dt].end())
-    return llvm::make_range(nullptr, nullptr);
-  return llvm::make_range(iter->second.begin(), iter->second.end());
-}
-
-LinalgDependenceGraph::dependence_range
-LinalgDependenceGraph::getDependencesInto(
-    LinalgOp dst, LinalgDependenceGraph::DependenceType dt) const {
-  return getDependencesInto(dst.getOperation(), dt);
-}
-
-LinalgDependenceGraph::dependence_range
-LinalgDependenceGraph::getDependencesInto(
-    Operation *dst, LinalgDependenceGraph::DependenceType dt) const {
-  auto iter = dependencesIntoGraphs[dt].find(dst);
-  if (iter == dependencesIntoGraphs[dt].end())
-    return llvm::make_range(nullptr, nullptr);
-  return llvm::make_range(iter->second.begin(), iter->second.end());
-}
-
-void LinalgDependenceGraph::addDependencesBetween(LinalgOp src, LinalgOp dst) {
-  for (auto *srcView : src.getOutputs()) { // W
-    // RAW graph
-    for (auto *dstView : dst.getInputs()) {  // R
-      if (aliases.alias(srcView, dstView)) { // if alias, fill RAW
-        addDependenceElem(DependenceType::RAW,
-                          LinalgOpView{src.getOperation(), srcView},
-                          LinalgOpView{dst.getOperation(), dstView});
-      }
-    }
-    // WAW graph
-    for (auto *dstView : dst.getOutputs()) { // W
-      if (aliases.alias(srcView, dstView)) { // if alias, fill WAW
-        addDependenceElem(DependenceType::WAW,
-                          LinalgOpView{src.getOperation(), srcView},
-                          LinalgOpView{dst.getOperation(), dstView});
-      }
-    }
-  }
-  for (auto *srcView : src.getInputs()) { // R
-    // RAR graph
-    for (auto *dstView : dst.getInputs()) {  // R
-      if (aliases.alias(srcView, dstView)) { // if alias, fill RAR
-        addDependenceElem(DependenceType::RAR,
-                          LinalgOpView{src.getOperation(), srcView},
-                          LinalgOpView{dst.getOperation(), dstView});
-      }
-    }
-    // WAR graph
-    for (auto *dstView : dst.getOutputs()) { // W
-      if (aliases.alias(srcView, dstView)) { // if alias, fill WAR
-        addDependenceElem(DependenceType::WAR,
-                          LinalgOpView{src.getOperation(), srcView},
-                          LinalgOpView{dst.getOperation(), dstView});
-      }
-    }
-  }
-}
-
-SmallVector<Operation *, 8>
-LinalgDependenceGraph::findCoveringDependences(LinalgOp srcLinalgOp,
-                                               LinalgOp dstLinalgOp) const {
-  return findOperationsWithCoveringDependences(
-      srcLinalgOp, dstLinalgOp, nullptr,
-      {DependenceType::WAW, DependenceType::WAR, DependenceType::RAW});
-}
-
-SmallVector<Operation *, 8> LinalgDependenceGraph::findCoveringWrites(
-    LinalgOp srcLinalgOp, LinalgOp dstLinalgOp, Value *view) const {
-  return findOperationsWithCoveringDependences(
-      srcLinalgOp, dstLinalgOp, view,
-      {DependenceType::WAW, DependenceType::WAR});
-}
-
-SmallVector<Operation *, 8> LinalgDependenceGraph::findCoveringReads(
-    LinalgOp srcLinalgOp, LinalgOp dstLinalgOp, Value *view) const {
-  return findOperationsWithCoveringDependences(
-      srcLinalgOp, dstLinalgOp, view,
-      {DependenceType::RAR, DependenceType::RAW});
-}
-
-SmallVector<Operation *, 8>
-LinalgDependenceGraph::findOperationsWithCoveringDependences(
-    LinalgOp srcLinalgOp, LinalgOp dstLinalgOp, Value *view,
-    ArrayRef<DependenceType> types) const {
-  auto *src = srcLinalgOp.getOperation();
-  auto *dst = dstLinalgOp.getOperation();
-  auto srcPos = linalgOpPositions.lookup(src);
-  auto dstPos = linalgOpPositions.lookup(dst);
-  assert(srcPos < dstPos && "expected dst after src in IR traversal order");
-
-  SmallVector<Operation *, 8> res;
-  // Consider an intermediate interleaved `interim` op, look for any dependence
-  // to an aliasing view on a src -> op -> dst path.
-  // TODO(ntv) we are not considering paths yet, just interleaved positions.
-  for (auto dt : types) {
-    for (auto dependence : getDependencesFrom(src, dt)) {
-      auto interimPos = linalgOpPositions.lookup(dependence.dependentOpView.op);
-      // Skip if not interleaved.
-      if (interimPos >= dstPos || interimPos <= srcPos)
-        continue;
-      if (view && !aliases.alias(view, dependence.indexingView))
-        continue;
-      auto *op = dependence.dependentOpView.op;
-      LLVM_DEBUG(dbgs() << "\n***Found covering dependence of type "
-                        << toStringRef(dt) << ": " << *src << " -> " << *op
-                        << " on " << *dependence.indexingView);
-      res.push_back(op);
-    }
-  }
-  return res;
-}
diff --git a/third_party/mlir/lib/Dialect/Linalg/CMakeLists.txt b/third_party/mlir/lib/Dialect/Linalg/CMakeLists.txt
deleted file mode 100644
index 9d2b0cdca93..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-add_llvm_library(MLIRLinalg
-  LinalgRegistration.cpp
-  Analysis/DependenceAnalysis.cpp
-  EDSC/Builders.cpp
-  IR/LinalgOps.cpp
-  IR/LinalgTypes.cpp
-  Transforms/Fusion.cpp
-  Transforms/LinalgTransforms.cpp
-  Transforms/LinalgToLoops.cpp
-  Transforms/Promotion.cpp
-  Transforms/Tiling.cpp
-  Utils/Utils.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg
-  DEPENDS
-  intrinsics_gen
-  )
-
-add_dependencies(MLIRLinalg
-
-  MLIRAffineOps
-  MLIRAnalysis
-  MLIREDSC
-  MLIRLinalgOpsIncGen
-  MLIRLinalgLibraryOpsIncGen
-  MLIRLinalgTransformPatternsIncGen
-  MLIRStandardOps
-  MLIRStandardToLLVM
-  MLIRVectorOps
-  )
diff --git a/third_party/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/third_party/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
deleted file mode 100644
index 3daeafe00ca..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-//===- Builders.cpp - MLIR Declarative Linalg Builders --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/Linalg/EDSC/Builders.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/EDSC/Intrinsics.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Support/Functional.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-
-static void getMaxDimIndex(ArrayRef<StructuredIndexed> structuredIndices,
-                           unsigned &pos) {
-  for (auto sidx : structuredIndices) {
-    for (auto expr : sidx.getExprs()) {
-      expr.walk([&pos](AffineExpr e) {
-        if (auto d = e.dyn_cast<AffineDimExpr>())
-          pos = std::max(pos, d.getPosition());
-      });
-    }
-  }
-}
-
-Operation *mlir::edsc::makeLinalgGenericOp(
-    ArrayRef<IterType> iteratorTypes, ArrayRef<StructuredIndexed> inputs,
-    ArrayRef<StructuredIndexed> outputs,
-    decltype(defaultRegionBuilder) regionBuilder, ArrayRef<Value *> otherValues,
-    ArrayRef<Attribute> otherAttributes) {
-  auto &builder = edsc::ScopedContext::getBuilder();
-  auto *ctx = builder.getContext();
-  unsigned nInputs = inputs.size();
-  unsigned nOutputs = outputs.size();
-  unsigned rank = 0;
-  getMaxDimIndex(inputs, rank);
-  getMaxDimIndex(outputs, rank);
-
-  SmallVector<AffineMap, 4> maps;
-  maps.reserve(nInputs + nOutputs);
-  for (auto in : inputs)
-    maps.push_back(
-        AffineMap::get(/*dimCount=*/rank, /*symbolCount=*/0, in.getExprs()));
-  for (auto out : outputs)
-    maps.push_back(
-        AffineMap::get(/*dimCount=*/rank, /*symbolCount=*/0, out.getExprs()));
-
-  unsigned nViews = nInputs + nOutputs;
-  SmallVector<Value *, 4> values;
-  values.reserve(nViews);
-  values.append(inputs.begin(), inputs.end());
-  values.append(outputs.begin(), outputs.end());
-
-  auto iteratorStrTypes = functional::map(toString, iteratorTypes);
-  // clang-format off
-  auto *op =
-      edsc::ScopedContext::getBuilder()
-          .create<linalg::GenericOp>(
-              edsc::ScopedContext::getLocation(),
-              values,
-              IntegerAttr::get(IntegerType::get(64, ctx), nInputs),
-              IntegerAttr::get(IntegerType::get(64, ctx), nOutputs),
-              builder.getAffineMapArrayAttr(maps),
-              builder.getStrArrayAttr(iteratorStrTypes),
-              StringAttr() /*doc*/,
-              FlatSymbolRefAttr() /*fun*/,
-              StringAttr() /*library_call*/
-              /* TODO: other attributes in op */
-              )
-          .getOperation();
-  // clang-format on
-
-  using namespace edsc;
-  SmallVector<Type, 4> blockTypes;
-  blockTypes.reserve(values.size());
-  for (auto it : llvm::enumerate(values))
-    blockTypes.push_back((it.index() < nViews)
-                             ? getElementTypeOrSelf(it.value())
-                             : it.value()->getType());
-
-  assert(op->getRegions().front().empty());
-  op->getRegions().front().push_front(new Block);
-  OpBuilder bb(op->getRegions().front());
-  ScopedContext scope(bb, op->getLoc());
-  BlockHandle b;
-  auto handles = makeValueHandles(blockTypes);
-  BlockBuilder(&b, makeHandlePointers(MutableArrayRef<ValueHandle>(handles)))(
-      [&] { regionBuilder(b.getBlock()->getArguments()); });
-  return op;
-}
-
-using linalg_yield = OperationBuilder<linalg::YieldOp>;
-
-Operation *mlir::edsc::linalg_matmul(ValueHandle vA, ValueHandle vB,
-                                     ValueHandle vC) {
-  // clang-format off
-  AffineExpr m, n, k;
-  bindDims(ScopedContext::getContext(), m, n, k);
-  StructuredIndexed A(vA), B(vB), C(vC);
-  return makeLinalgGenericOp(
-    {IterType::Parallel, IterType::Parallel, IterType::Reduction},
-    {A({m, n}), B({k, n})},
-    {C({m, n})},
-    [](ArrayRef<BlockArgument *> args) {
-      using edsc::op::operator*;
-      using edsc::op::operator+;
-      ValueHandle a(args[0]), b(args[1]), c(args[2]);
-      linalg_yield((c + a * b).getValue());
-  });
-  // clang-format on
-}
diff --git a/third_party/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/third_party/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
deleted file mode 100644
index 6adfeb592ef..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ /dev/null
@@ -1,831 +0,0 @@
-//===- LinalgOps.cpp - Implementation of the linalg operations ------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a the Linalg operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/FoldUtils.h"
-
-#include "llvm/ADT/StringSet.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::linalg;
-
-///////////////////// Operations defined with Tablegen /////////////////////////
-// For such operations that do not correspond to library calls (i.e. defined in
-// LinalgOps.td), we define an overloaded `print` function and a
-// parse`className` function.
-
-//===----------------------------------------------------------------------===//
-// GenericOps
-//===----------------------------------------------------------------------===//
-
-template <typename GenericOpType>
-static void printGenericOp(OpAsmPrinter &p, GenericOpType op) {
-  auto attrNames = op.linalgTraitAttrNames();
-  llvm::StringSet<> linalgTraitAttrsSet;
-  linalgTraitAttrsSet.insert(attrNames.begin(), attrNames.end());
-  SmallVector<NamedAttribute, 8> attrs;
-  for (auto attr : op.getAttrs())
-    if (linalgTraitAttrsSet.count(attr.first.strref()) > 0)
-      attrs.push_back(attr);
-
-  auto dictAttr = DictionaryAttr::get(attrs, op.getContext());
-  p << op.getOperationName() << " " << dictAttr << " " << op.getOperands();
-  if (!op.region().empty())
-    p.printRegion(op.region());
-  p.printOptionalAttrDict(op.getAttrs(), attrNames);
-  p << ": " << op.getOperandTypes();
-}
-
-static void print(OpAsmPrinter &p, GenericOp op) { printGenericOp(p, op); }
-
-static void print(OpAsmPrinter &p, IndexedGenericOp op) {
-  printGenericOp(p, op);
-}
-
-static ParseResult parseGenericOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 8> operandsInfo, regionOperandsInfo;
-  DictionaryAttr dictAttr;
-  // Parse the core linalg traits that must check into a dictAttr.
-  // The name is unimportant as we will overwrite result.attributes.
-  // The core linalg traits must contain the information necessary to pass the
-  // verifier.
-  if (parser.parseAttribute(dictAttr, "_", result.attributes) ||
-      parser.parseOperandList(operandsInfo))
-    return failure();
-  result.attributes.assign(dictAttr.getValue().begin(),
-                           dictAttr.getValue().end());
-
-  Region &region = *result.addRegion();
-  SmallVector<Type, 8> operandTypes, regionTypes;
-  // Optional attributes may be added.
-  // Either Optional getFunAttrName() attribute or region must be specified.
-  if (!dictAttr.get(getFunAttrName()) &&
-      parser.parseOptionalRegion(region, regionOperandsInfo, regionTypes))
-    return failure();
-  if (parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonTypeList(operandTypes))
-    return failure();
-  return parser.resolveOperands(operandsInfo, operandTypes,
-                                parser.getCurrentLocation(), result.operands);
-}
-
-template <typename GenericOpType>
-LogicalResult verifyBlockArgs(GenericOpType op, Block &block);
-
-template <> LogicalResult verifyBlockArgs(GenericOp op, Block &block) {
-  auto nViews = op.getNumInputsAndOutputs();
-  auto nInputViews = op.getNumInputs();
-  if (block.getNumArguments() != nViews)
-    return op.emitError(
-        "op expected number of block arguments to match number of views");
-
-  for (unsigned i = 0; i < nViews; ++i) {
-    auto viewType = op.getViewType(i);
-    if (viewType.getElementType() != block.getArgument(i)->getType())
-      return op.emitError("op expected block argument ")
-             << i << " of the same type as elemental type of "
-             << ((i < nInputViews) ? "input " : "output ")
-             << "view: " << viewType;
-  }
-  return success();
-}
-
-template <> LogicalResult verifyBlockArgs(IndexedGenericOp op, Block &block) {
-  auto nInputViews = op.getNumInputs();
-  auto nLoops = op.getNumLoops();
-  auto nViews = op.getNumInputsAndOutputs();
-  if (block.getNumArguments() != nViews + nLoops)
-    return op.emitError(
-        "op expected number of block arguments to match number of views + "
-        "number of loops");
-
-  for (unsigned i = 0; i < nLoops; ++i) {
-    if (!block.getArgument(i)->getType().isIndex())
-      return op.emitError("op expected block argument ")
-             << i << " to be of IndexType";
-  }
-
-  for (unsigned i = 0; i < nViews; ++i) {
-    unsigned memrefArgIndex = i + nLoops;
-    auto viewType = op.getViewType(i);
-    if (viewType.getElementType() !=
-        block.getArgument(memrefArgIndex)->getType())
-      return op.emitError("op expected block argument ")
-             << memrefArgIndex << " of the same type as elemental type of "
-             << ((i < nInputViews) ? "input " : "output ")
-             << "view: " << viewType;
-  }
-  return success();
-}
-
-template <typename GenericOpType>
-LogicalResult verifyFuncArgs(GenericOpType op, FunctionType funType);
-
-template <> LogicalResult verifyFuncArgs(GenericOp op, FunctionType funType) {
-  auto nViews = op.getNumInputsAndOutputs();
-  auto nInputViews = op.getNumInputs();
-  if (funType.getNumInputs() != nViews)
-    return op.emitError("op expected fun arguments to match number of views");
-  if (funType.getNumResults() != op.getNumOutputs())
-    return op.emitError(
-        "op expected fun results to match number of output views");
-
-  for (auto en : llvm::enumerate(op.indexing_maps())) {
-    auto idx = en.index();
-    auto view = (idx < nInputViews) ? op.getInputViewType(idx)
-                                    : op.getOutputViewType(idx - nInputViews);
-    if (funType.getInput(idx) != view.getElementType())
-      return op.emitError("op expected fun argument ")
-             << idx << " of the same type as elemental type "
-             << view.getElementType() << " of view " << idx;
-
-    if (idx >= nInputViews) {
-      auto resultIdx = idx - nInputViews;
-      if (funType.getResult(resultIdx) != view.getElementType())
-        return op.emitError("op expected fun result ")
-               << resultIdx << " of the same type as elemental type "
-               << view.getElementType() << " of view " << idx;
-    }
-  }
-  return success();
-}
-
-template <>
-LogicalResult verifyFuncArgs(IndexedGenericOp op, FunctionType funType) {
-  auto nLoops = op.getNumLoops();
-  auto nInputViews = op.getNumInputs();
-  auto nOutputs = op.getNumOutputs();
-  auto nViews = op.getNumInputsAndOutputs();
-  if (funType.getNumInputs() != nViews + nLoops)
-    return op.emitError(
-        "op expected fun arguments to match number of views + number of loops");
-  if (funType.getNumResults() != nOutputs)
-    return op.emitError(
-        "op expected fun results to match number of output views");
-  for (unsigned i = 0; i < nLoops; ++i) {
-    if (!funType.getInput(i).isIndex())
-      return op.emitError("op expected fun argument ")
-             << i << " to be of IndexType";
-  }
-  for (auto en : llvm::enumerate(op.indexing_maps())) {
-    auto idx = en.index();
-    auto funIdx = nLoops + idx;
-    auto view = (idx < nInputViews) ? op.getInputViewType(idx)
-                                    : op.getOutputViewType(idx - nInputViews);
-    if (funType.getInput(funIdx) != view.getElementType())
-      return op.emitError("op expected fun argument ")
-             << funIdx << " of the same type as elemental type "
-             << view.getElementType() << " of view " << idx;
-
-    if (idx >= nInputViews) {
-      auto resultIdx = idx - nInputViews;
-      if (funType.getResult(resultIdx) != view.getElementType())
-        return op.emitError("op expected fun result ")
-               << resultIdx << " of the same type as elemental type "
-               << view.getElementType() << " of view " << idx;
-    }
-  }
-  return success();
-}
-
-template <typename GenericOpType>
-LogicalResult verifyGenericOp(GenericOpType op) {
-  auto nInputViews = op.getNumInputs();
-  auto nLoops = op.getNumLoops();
-  auto nViews = op.getNumInputsAndOutputs();
-  if (nViews != llvm::size(op.views()))
-    return op.emitError("op expected exactly ") << nViews << " view operands";
-
-  auto &region = op.region();
-  auto funOp = op.getFunction();
-  auto funType = funOp ? funOp.getType() : FunctionType();
-  if (!region.empty()) {
-    if (region.getBlocks().size() != 1)
-      return op.emitError("op expected region with 1 block");
-    if (failed(verifyBlockArgs(op, region.getBlocks().front())))
-      return failure();
-  } else {
-    if (!funOp || !funOp.getType())
-      return op.emitError(
-          "op expected fun attribute to refer to a defined symbol");
-    if (failed(verifyFuncArgs(op, funType)))
-      return failure();
-  }
-
-  SmallVector<AffineMap, 4> indexingMaps;
-  indexingMaps.reserve(op.indexing_maps().size());
-  for (auto en : llvm::enumerate(op.indexing_maps())) {
-    auto idx = en.index();
-    auto m = en.value().template cast<AffineMapAttr>().getValue();
-    indexingMaps.push_back(m); // Save reference to map for further checks.
-    auto view = (idx < nInputViews) ? op.getInputViewType(idx)
-                                    : op.getOutputViewType(idx - nInputViews);
-
-    if (m.getNumSymbols() != 0)
-      return op.emitError("op expected indexing_map #")
-             << idx << " to have no symbols";
-
-    if (m.getNumDims() != nLoops)
-      return op.emitError("op expected indexing_map #")
-             << idx << " to have " << nLoops
-             << " dim(s) to match the number of loops";
-
-    if (m.getNumResults() == 1 && view.getRank() == 0) {
-      auto cst = m.getResult(0).template dyn_cast<AffineConstantExpr>();
-      if (!cst || cst.getValue() != 0)
-        return op.emitError("op expected indexing_map #")
-               << idx << " to be 0 to match 0-D view: " << view;
-    }
-
-    if (m.getNumResults() != view.getRank())
-      return op.emitError("op expected indexing_map #")
-             << idx << " results to match view rank: " << view;
-  }
-
-  auto concatMap = concatAffineMaps(indexingMaps);
-  auto aggregateMap = inversePermutation(concatMap);
-  if (!aggregateMap)
-    return op.emitError("op expected the concatenation of maps in indexing_map "
-                        "to be invertible");
-
-  return success();
-}
-
-static LogicalResult verify(GenericOp op) { return verifyGenericOp(op); }
-static LogicalResult verify(IndexedGenericOp op) { return verifyGenericOp(op); }
-
-//===----------------------------------------------------------------------===//
-// RangeOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, RangeOp op) {
-  p << op.getOperationName() << " " << *op.min() << ":" << *op.max() << ":"
-    << *op.step();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getResult()->getType();
-}
-
-static ParseResult parseRangeOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 3> rangeInfo(3);
-  RangeType type;
-  auto indexTy = parser.getBuilder().getIndexType();
-  return failure(parser.parseOperand(rangeInfo[0]) || parser.parseColon() ||
-                 parser.parseOperand(rangeInfo[1]) || parser.parseColon() ||
-                 parser.parseOperand(rangeInfo[2]) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.resolveOperands(rangeInfo, indexTy, result.operands) ||
-                 parser.addTypeToList(type, result.types));
-}
-
-//===----------------------------------------------------------------------===//
-// SliceOp
-//===----------------------------------------------------------------------===//
-void mlir::linalg::SliceOp::build(Builder *b, OperationState &result,
-                                  Value *base, ValueRange indexings) {
-  result.addOperands(base);
-  result.addOperands(indexings);
-
-  auto memRefType = base->getType().cast<MemRefType>();
-  int64_t offset;
-  SmallVector<int64_t, 4> strides;
-  auto res = getStridesAndOffset(memRefType, strides, offset);
-  assert(succeeded(res) && strides.size() == indexings.size());
-  (void)res;
-
-  unsigned rank = memRefType.getRank();
-  // TODO(ntv): propagate static size and stride information when available.
-  SmallVector<int64_t, 4> sizes(rank, -1); // -1 encodes dynamic size.
-  Type elementType = memRefType.getElementType();
-  result.addTypes({MemRefType::get(
-      sizes, elementType,
-      {makeStridedLinearLayoutMap(strides, offset, b->getContext())},
-      memRefType.getMemorySpace())});
-}
-
-static void print(OpAsmPrinter &p, SliceOp op) {
-  auto indexings = op.indexings();
-  p << SliceOp::getOperationName() << " " << *op.view() << "[" << indexings
-    << "] ";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getBaseViewType();
-  if (!indexings.empty())
-    p << ", " << op.indexings().getTypes();
-  p << ", " << op.getType();
-}
-
-static ParseResult parseSliceOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType baseInfo;
-  SmallVector<OpAsmParser::OperandType, 8> operands;
-  SmallVector<Type, 8> types;
-  if (parser.parseOperand(baseInfo) ||
-      parser.parseOperandList(operands, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonTypeList(types))
-    return failure();
-
-  if (types.size() < 2)
-    return parser.emitError(parser.getCurrentLocation(),
-                            "expected at least input and result view types");
-
-  ArrayRef<Type> indexingTypes = ArrayRef<Type>(types).drop_front().drop_back();
-  return failure(
-      parser.resolveOperand(baseInfo, types.front(), result.operands) ||
-      (!operands.empty() &&
-       parser.resolveOperands(operands, indexingTypes,
-                              operands.front().location, result.operands)) ||
-      parser.addTypeToList(types.back(), result.types));
-}
-
-static LogicalResult verify(SliceOp op) {
-  unsigned rank = op.getBaseViewRank();
-  if (rank != llvm::size(op.indexings()))
-    return op.emitOpError("expected ")
-           << rank << " indexings, got " << llvm::size(op.indexings());
-  unsigned index = 0;
-  for (auto indexing : op.indexings()) {
-    if (indexing->getType().isa<IndexType>())
-      --rank;
-    ++index;
-  }
-  if (op.getRank() != rank)
-    return op.emitOpError() << "expected rank of the view(" << op.getRank()
-                            << ") to be the number of ranges(" << rank << ")";
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// TransposeOp
-//===----------------------------------------------------------------------===//
-void mlir::linalg::TransposeOp::build(Builder *b, OperationState &result,
-                                      Value *view, AffineMapAttr permutation,
-                                      ArrayRef<NamedAttribute> attrs) {
-  auto permutationMap = permutation.getValue();
-  assert(permutationMap);
-
-  auto memRefType = view->getType().cast<MemRefType>();
-  auto rank = memRefType.getRank();
-  auto originalSizes = memRefType.getShape();
-  // Compute permuted sizes.
-  SmallVector<int64_t, 4> sizes(rank, 0);
-  for (auto en : llvm::enumerate(permutationMap.getResults()))
-    sizes[en.index()] =
-        originalSizes[en.value().cast<AffineDimExpr>().getPosition()];
-
-  // Compute permuted strides.
-  int64_t offset;
-  SmallVector<int64_t, 4> strides;
-  auto res = getStridesAndOffset(memRefType, strides, offset);
-  assert(succeeded(res) && strides.size() == static_cast<unsigned>(rank));
-  (void)res;
-  auto map = makeStridedLinearLayoutMap(strides, offset, b->getContext());
-  map = permutationMap ? map.compose(permutationMap) : map;
-  // Compute result type.
-  auto resultType = MemRefType::get(sizes, memRefType.getElementType(), map,
-                                    memRefType.getMemorySpace());
-
-  build(b, result, resultType, view, attrs);
-  result.addAttribute(TransposeOp::getPermutationAttrName(), permutation);
-}
-
-static void print(OpAsmPrinter &p, TransposeOp op) {
-  p << op.getOperationName() << " " << *op.view() << " " << op.permutation();
-  p.printOptionalAttrDict(op.getAttrs(),
-                          {TransposeOp::getPermutationAttrName()});
-  p << " : " << op.view()->getType();
-}
-
-static ParseResult parseTransposeOp(OpAsmParser &parser,
-                                    OperationState &result) {
-  OpAsmParser::OperandType view;
-  AffineMapAttr permutation;
-  MemRefType type;
-  return failure(parser.parseOperand(view) ||
-                 parser.parseAttribute(permutation,
-                                       TransposeOp::getPermutationAttrName(),
-                                       result.attributes) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.resolveOperand(view, type, result.operands) ||
-                 parser.addTypeToList(type, result.types));
-}
-
-//===----------------------------------------------------------------------===//
-// YieldOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, YieldOp op) {
-  p << op.getOperationName();
-  if (op.getNumOperands() > 0)
-    p << ' ' << op.getOperands();
-  p.printOptionalAttrDict(op.getAttrs());
-  if (op.getNumOperands() > 0)
-    p << " : " << op.getOperandTypes();
-}
-
-static ParseResult parseYieldOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 2> opInfo;
-  SmallVector<Type, 2> types;
-  llvm::SMLoc loc = parser.getCurrentLocation();
-  return failure(parser.parseOperandList(opInfo) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 (!opInfo.empty() && parser.parseColonTypeList(types)) ||
-                 parser.resolveOperands(opInfo, types, loc, result.operands));
-}
-
-template <typename GenericOpType>
-LogicalResult verifyYield(YieldOp op, GenericOpType genericOp) {
-  // The operand number and types must match the view element types.
-  auto nOutputViews = genericOp.getNumOutputs();
-  if (op.getNumOperands() != nOutputViews)
-    return op.emitOpError("op expected ")
-           << nOutputViews << " operand to match enclosing linalg.generic op";
-
-  for (unsigned i = 0; i != nOutputViews; ++i) {
-    auto elementType = genericOp.getOutputViewType(i).getElementType();
-    if (op.getOperand(i)->getType() != elementType)
-      return op.emitError("type of return operand ")
-             << i << " (" << op.getOperand(i)->getType()
-             << ") doesn't match view element type (" << elementType << ")";
-  }
-  return success();
-}
-
-static LogicalResult verify(YieldOp op) {
-  auto *parentOp = op.getParentOp();
-  if (parentOp->getNumRegions() != 1 || parentOp->getRegion(0).empty())
-    return op.emitOpError("op expected single non-empty parent region");
-
-  auto genericOp = dyn_cast<GenericOp>(parentOp);
-  if (genericOp)
-    return verifyYield(op, genericOp);
-
-  auto indexedGenericOp = dyn_cast<IndexedGenericOp>(parentOp);
-  if (indexedGenericOp)
-    return verifyYield(op, indexedGenericOp);
-
-  return op.emitOpError("expected '")
-         << GenericOp::getOperationName() << "' or '"
-         << IndexedGenericOp::getOperationName() << "' parent op";
-}
-
-/////// Operations corresponding to library calls defined with Tablegen ////////
-// For such operations correspond to library calls (i.e. defined in
-// LinalgLibraryOps.td), we define an overloaded `print` function and a
-// parse`className` function.
-
-// A LinalgLibraryOp prints as:
-//
-// ```{.mlir}
-//   concrete_op_name (ssa-inputs, ssa-outputs) : view-types
-// ```
-//
-// for example:
-//
-// ```
-//   linalg.matmul(%0, %1, %2) :
-//     memref<?x?xf32, stride_specification>,
-//     memref<?x?xf32, stride_specification>,
-//     memref<?x?xf32, stride_specification>
-// ```
-//
-// Where %0, %1 and %2 are ssa-values of type MemRefType with strides.
-static void printLinalgLibraryOp(OpAsmPrinter &p, Operation *op) {
-  assert(op->getAbstractOperation() && "unregistered operation");
-  p << op->getName().getStringRef() << "(" << op->getOperands() << ")";
-  p.printOptionalAttrDict(op->getAttrs());
-  p << " : " << op->getOperandTypes();
-}
-
-static ParseResult parseLinalgLibraryOp(OpAsmParser &parser,
-                                        OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 3> ops;
-  SmallVector<Type, 3> types;
-  return failure(
-      parser.parseOperandList(ops, OpAsmParser::Delimiter::Paren) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonTypeList(types) ||
-      parser.resolveOperands(ops, types, parser.getNameLoc(), result.operands));
-}
-
-static LogicalResult verify(FillOp op) {
-  auto viewType = op.getOutputViewType(0);
-  auto fillType = op.value()->getType();
-  if (viewType.getElementType() != fillType)
-    return op.emitOpError("expects fill type to match view elemental type");
-  return success();
-}
-
-static LogicalResult verify(CopyOp op) {
-  auto outputViewType = op.getOutputViewType(0);
-  auto inputViewType = op.getInputViewType(0);
-  if (inputViewType.getElementType() != outputViewType.getElementType())
-    return op.emitOpError("expects views of the same type");
-  if (inputViewType.getRank() != outputViewType.getRank())
-    return op.emitOpError("expects views of the same rank");
-  auto rank = op.getNumParallelLoops();
-  auto inputPermutationMap = op.inputPermutation();
-  if (inputPermutationMap) {
-    if (inputPermutationMap->getNumInputs() != rank)
-      return op.emitOpError("expects optional input_permutation map of rank ")
-             << rank;
-    if (!inputPermutationMap->isPermutation())
-      return op.emitOpError(
-          "expects optional input_permutation map to be a permutation");
-  }
-  auto outputPermutationMap = op.outputPermutation();
-  if (outputPermutationMap) {
-    if (outputPermutationMap->getNumInputs() != rank)
-      return op.emitOpError("expects optional output_permutation map of rank ")
-             << rank;
-    if (!outputPermutationMap->isPermutation())
-      return op.emitOpError(
-          "expects optional output_permutation map to be a permutation");
-  }
-  if (rank == 0 && inputPermutationMap)
-    return op.emitOpError("expected no input permutation when rank == 0");
-  if (rank == 0 && outputPermutationMap)
-    return op.emitOpError("expected no output permutation when rank == 0");
-  return success();
-}
-
-static LogicalResult
-verifyStrideOrDilation(ConvOp op, ArrayRef<Attribute> attrs, bool isStride) {
-  auto strideOrDilation = isStride ? "stride" : "dilation";
-  if (attrs.size() != op.getNumWindowLoops())
-    return op.emitOpError("expects num ")
-           << strideOrDilation
-           << "s equal to number of window dimensions: " << attrs.size()
-           << " vs " << op.getNumWindowLoops();
-  return success();
-}
-
-static LogicalResult verify(ConvOp op) {
-  auto oType = op.output()->getType().cast<MemRefType>();
-  auto fType = op.filter()->getType().cast<MemRefType>();
-  auto iType = op.input()->getType().cast<MemRefType>();
-  if (oType.getElementType() != iType.getElementType() ||
-      oType.getElementType() != fType.getElementType())
-    return op.emitOpError("expects memref elemental types to match");
-  if (oType.getRank() != iType.getRank() || oType.getRank() != fType.getRank())
-    return op.emitOpError("expects memref ranks to match");
-  if (auto strides = op.strides()) {
-    if (failed(
-            verifyStrideOrDilation(op, strides->getValue(), /*isStride=*/true)))
-      return failure();
-  }
-  if (auto dilations = op.dilations()) {
-    if (failed(verifyStrideOrDilation(op, dilations->getValue(),
-                                      /*isStride=*/false)))
-      return failure();
-  }
-  return success();
-}
-
-namespace mlir {
-namespace linalg {
-
-#include "mlir/Dialect/Linalg/IR/LinalgLibraryOpInterfaces.cpp.inc"
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/Linalg/IR/LinalgOps.cpp.inc"
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/Linalg/IR/LinalgLibraryOps.cpp.inc"
-
-} // namespace linalg
-} // namespace mlir
-
-static AffineMap extractOrIdentityMap(llvm::Optional<AffineMap> maybeMap,
-                                      unsigned rank, MLIRContext *context) {
-  if (maybeMap)
-    return maybeMap.getValue();
-  if (rank == 0)
-    return AffineMap();
-  return AffineMap::getMultiDimIdentityMap(rank, context);
-}
-
-// Returns `num` AffineDimExpr dimensions at positions [curIdx, curIdx + num)
-// and increments `curIdx` to `curIdx + num`.
-static SmallVector<AffineExpr, 4>
-makeAffineDimExprs(unsigned num, unsigned &curIdx, MLIRContext *context) {
-  SmallVector<AffineExpr, 4> res;
-  res.reserve(num);
-  for (unsigned i = 0; i < num; ++i)
-    res.push_back(getAffineDimExpr(curIdx++, context));
-  return res;
-}
-
-static SmallVector<AffineExpr, 4>
-weightedConvInputIndex(ConvOp op, ArrayRef<AffineExpr> a,
-                       ArrayRef<AffineExpr> b) {
-  assert(a.size() == b.size());
-  SmallVector<AffineExpr, 4> res;
-  res.reserve(a.size());
-  for (unsigned i = 0, e = a.size(); i < e; ++i) {
-    res.push_back(op.getStride(i) * a[i] + op.getDilation(i) * b[i]);
-  }
-  return res;
-}
-
-static SmallVector<AffineExpr, 4> concat(ArrayRef<AffineExpr> a,
-                                         ArrayRef<AffineExpr> b) {
-  SmallVector<AffineExpr, 4> res;
-  res.reserve(a.size() + b.size());
-  res.assign(a.begin(), a.end());
-  res.append(b.begin(), b.end());
-  return res;
-}
-
-// Note: both functions below would completely disappear with a simple tensor
-// kernel language.
-//
-// Ideally this should all be Tablegen'd but there is no good story for
-// AffineMap for now.
-SmallVector<AffineMap, 4> mlir::linalg::loopToOperandRangesMaps(Operation *op) {
-  MLIRContext *context = op->getContext();
-  if (auto copyOp = dyn_cast<CopyOp>(op)) {
-    // I(input_perm(ivs)) -> O(output_perm(ivs))
-    auto maybeInputMap = copyOp.inputPermutation();
-    auto maybeOutputMap = copyOp.outputPermutation();
-    unsigned inputRank = copyOp.getInputViewType(0).getRank();
-    unsigned outputRank = copyOp.getOutputViewType(0).getRank();
-    return SmallVector<AffineMap, 4>{
-        extractOrIdentityMap(maybeInputMap, inputRank, context),
-        extractOrIdentityMap(maybeOutputMap, outputRank, context)};
-  }
-  if (auto fillOp = dyn_cast<FillOp>(op)) {
-    // filling_value -> O(ivs)
-    unsigned rank = fillOp.getNumParallelLoops();
-    return SmallVector<AffineMap, 4>{
-        extractOrIdentityMap(llvm::None, rank, context)};
-  }
-  auto i = getAffineDimExpr(0, context);
-  auto j = getAffineDimExpr(1, context);
-  auto k = getAffineDimExpr(2, context);
-  if (isa<DotOp>(op))
-    // A(r_i) * B(r_i) -> C()
-    return SmallVector<AffineMap, 4>{AffineMap::get(1, 0, {i}),
-                                     AffineMap::get(1, 0, {i}), AffineMap()};
-  if (isa<MatvecOp>(op))
-    //   A(i, r_j) * B(r_j) -> C(i)
-    return SmallVector<AffineMap, 4>{AffineMap::get(2, 0, {i, j}),
-                                     AffineMap::get(2, 0, {j}),
-                                     AffineMap::get(2, 0, {i})};
-  if (isa<MatmulOp>(op))
-    //   A(i, r_k) * B(r_k, j) -> C(i, j)
-    return SmallVector<AffineMap, 4>{AffineMap::get(3, 0, {i, k}),
-                                     AffineMap::get(3, 0, {k, j}),
-                                     AffineMap::get(3, 0, {i, j})};
-  if (auto convOp = dyn_cast<ConvOp>(op)) {
-    //   F(z0, ..., zN-1, q, k) * I(b, x0 + z0, ..., xN-1 + zN-1, q) ->
-    //     O(b, x0, ..., xN-1, k)
-    // for N equal to `nWindow`.
-    auto nWin = convOp.getNumWindowLoops();
-    assert(nWin > 0 && "expected at least one window dimension");
-    unsigned idx = 0;
-    // In the following, AffineDimExprs are indexed in loop order:
-    //   [ b, xs, k,           q,                     zs]
-    //    parallels     non-window reductions     windows
-    //
-    // Parallel dims are exactly the dimensions indexing `output`:
-    //     output[b, x[0], ..., x[N-1], k]; i.e.
-    //  * batch dimensions (bs with #bs = 1 for now)
-    //  * "image" dimensions (xs with #xs = #zs = output_rank - #bs - #ks)
-    //  * output filter dimensions (ks with #ks = 1 for now)
-    auto bs = makeAffineDimExprs(convOp.getNumBatchDimensions(), idx, context);
-    auto xs = makeAffineDimExprs(nWin, idx, context);
-    auto ks = makeAffineDimExprs(convOp.getNumOutputFeatureDimensions(), idx,
-                                 context);
-    // Non-window reduction dim: sum_{z[0], ..., z[N-1], q}
-    auto qs =
-        makeAffineDimExprs(convOp.getNumInputFeatureDimensions(), idx, context);
-    // Window reduction dims: sum_{z[0], ..., z[N-1], q}
-    auto zs = makeAffineDimExprs(nWin, idx, context);
-    // Construct the weighedSum expression.
-    auto ws = weightedConvInputIndex(convOp, xs, zs);
-    return SmallVector<AffineMap, 4>{
-        // filter[z[0], ..., z[N-1], q, k]
-        AffineMap::get(idx, 0, concat(concat(zs, qs), ks)),
-        // input[b,
-        //       x[0]*s[0] + d[0]*z[0], ..., x[N-1]*s[N-1] + d[N-1]*z[N-1],
-        //       q]
-        AffineMap::get(idx, 0, concat(concat(bs, ws), qs)),
-        // output[b, x[0], ..., x[N-1], k]
-        AffineMap::get(idx, 0, concat(concat(bs, xs), ks))};
-  } else if (auto genericOp = dyn_cast<GenericOp>(op)) {
-    SmallVector<AffineMap, 4> res;
-    unsigned nViews = genericOp.getNumInputsAndOutputs();
-    res.reserve(nViews);
-    for (unsigned i = 0, e = nViews; i < e; ++i) {
-      res.push_back(genericOp.getIndexingMap(i));
-    }
-    return res;
-  } else if (auto indexedGenericOp = dyn_cast<IndexedGenericOp>(op)) {
-    SmallVector<AffineMap, 4> res;
-    unsigned nViews = indexedGenericOp.getNumInputsAndOutputs();
-    res.reserve(nViews);
-    for (unsigned i = 0, e = nViews; i < e; ++i)
-      res.push_back(indexedGenericOp.getIndexingMap(i));
-    return res;
-  }
-  llvm_unreachable("Missing loopToOperandRangesMaps for op");
-}
-
-static void appendMangledType(llvm::raw_string_ostream &ss, Type t) {
-  if (auto memref = t.dyn_cast<MemRefType>()) {
-    ss << "view";
-    for (auto size : memref.getShape())
-      if (size < 0)
-        ss << "sx";
-      else
-        ss << size << "x";
-    appendMangledType(ss, memref.getElementType());
-  } else if (auto vec = t.dyn_cast<VectorType>()) {
-    ss << "vector";
-    interleave(
-        vec.getShape(), [&](int64_t i) { ss << i; }, [&]() { ss << "x"; });
-    appendMangledType(ss, vec.getElementType());
-  } else if (t.isIntOrIndexOrFloat()) {
-    ss << t;
-  } else {
-    llvm_unreachable("Invalid type for linalg library name mangling");
-  }
-}
-
-std::string mlir::linalg::generateLibraryCallName(Operation *op) {
-  assert(isa<LinalgOp>(op));
-  std::string name(op->getName().getStringRef().str());
-  name.reserve(128);
-  std::replace(name.begin(), name.end(), '.', '_');
-  llvm::raw_string_ostream ss(name);
-  ss << "_";
-  auto types = op->getOperandTypes();
-  interleave(
-      types.begin(), types.end(), [&](Type t) { appendMangledType(ss, t); },
-      [&]() { ss << "_"; });
-  return ss.str();
-}
-
-static ArrayAttr getIndexingMaps(Operation *op) {
-  LinalgOp linalgOp = cast<LinalgOp>(op);
-  SmallVector<Attribute, 4> maps;
-  maps.reserve(linalgOp.getNumInputsAndOutputs());
-  for (AffineMap map : loopToOperandRangesMaps(op))
-    maps.push_back(AffineMapAttr::get(map));
-  return ArrayAttr::get(maps, op->getContext());
-}
-ArrayAttr mlir::linalg::ConvOp::indexing_maps() {
-  return getIndexingMaps(getOperation());
-}
-ArrayAttr mlir::linalg::CopyOp::indexing_maps() {
-  return getIndexingMaps(getOperation());
-}
-ArrayAttr mlir::linalg::DotOp::indexing_maps() {
-  return getIndexingMaps(getOperation());
-}
-ArrayAttr mlir::linalg::FillOp::indexing_maps() {
-  return getIndexingMaps(getOperation());
-}
-ArrayAttr mlir::linalg::MatmulOp::indexing_maps() {
-  return getIndexingMaps(getOperation());
-}
-ArrayAttr mlir::linalg::MatvecOp::indexing_maps() {
-  return getIndexingMaps(getOperation());
-}
diff --git a/third_party/mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp b/third_party/mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp
deleted file mode 100644
index 9fbb83be646..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/IR/LinalgTypes.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===- Dialect.cpp - Implementation of the linalg dialect and types -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the Linalg dialect types and dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Parser.h"
-#include "mlir/Support/LLVM.h"
-
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::linalg;
-
-mlir::linalg::LinalgDialect::LinalgDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
-  addTypes<RangeType>();
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/Linalg/IR/LinalgOps.cpp.inc"
-      >();
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/Linalg/IR/LinalgLibraryOps.cpp.inc"
-      >();
-}
-Type mlir::linalg::LinalgDialect::parseType(DialectAsmParser &parser) const {
-  // Parse the main keyword for the type.
-  StringRef keyword;
-  if (parser.parseKeyword(&keyword))
-    return Type();
-  MLIRContext *context = getContext();
-
-  // Handle 'range' types.
-  if (keyword == "range")
-    return RangeType::get(context);
-
-  parser.emitError(parser.getNameLoc(), "unknown Linalg type: " + keyword);
-  return Type();
-}
-
-/// RangeType prints as just "range".
-static void print(RangeType rt, DialectAsmPrinter &os) { os << "range"; }
-
-void mlir::linalg::LinalgDialect::printType(Type type,
-                                            DialectAsmPrinter &os) const {
-  switch (type.getKind()) {
-  default:
-    llvm_unreachable("Unhandled Linalg type");
-  case LinalgTypes::Range:
-    print(type.cast<RangeType>(), os);
-    break;
-  }
-}
diff --git a/third_party/mlir/lib/Dialect/Linalg/LinalgRegistration.cpp b/third_party/mlir/lib/Dialect/Linalg/LinalgRegistration.cpp
deleted file mode 100644
index df21ffa88ac..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/LinalgRegistration.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- LinalgRegistration.cpp - Register the linalg dialect statically ----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-
-using namespace mlir;
-using namespace mlir::linalg;
-
-// Static initialization for LinalgOps dialect registration.
-static DialectRegistration<LinalgDialect> LinalgOps;
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
deleted file mode 100644
index 453daba204c..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ /dev/null
@@ -1,356 +0,0 @@
-//===- Fusion.cpp - Implementation of linalg Fusion -----------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the linalg dialect Fusion pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Dominance.h"
-#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Utils/Intrinsics.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/FoldUtils.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "linalg-fusion"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::linalg;
-using namespace mlir::linalg::intrinsics;
-
-using llvm::dbgs;
-
-/// Implements a simple high-level fusion pass of linalg library operations.
-///
-/// In each block, linalg ops are processed in reverse textual order.
-/// Given a linalg op `O`, fusion occurs by:
-///   1. inspecting the linalg ops that write into the views read by `O`. This
-///      uses the SSA value of the views and a simple subview/slice analysis to
-///      determine producer-consumer dependences;
-///   2. greedily fuse the linalg ops that produce subview
-///   3. inspect the fused ops and determine whether they have other remaining
-///      LinalgOp uses. If not, then erase the original producing linalg op.
-///
-/// More advanced use cases, analyses as well as profitability heuristics are
-/// left for future work.
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-static llvm::cl::list<unsigned> clTileSizes(
-    "linalg-fusion-tile-sizes",
-    llvm::cl::desc(
-        "Tile sizes by which to tile linalg operations during linalg fusion"),
-    llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
-    llvm::cl::cat(clOptionsCategory));
-
-// Return a cloned version of `op` that operates on `loopRanges`, assumed to be
-// a subset of the original loop ranges of `op`.
-// This is achieved by applying the `loopToOperandRangesMaps` permutation maps
-// to the `loopRanges` in order to obtain view ranges.
-static LinalgOp cloneWithLoopRanges(OpBuilder &b, Location loc, LinalgOp op,
-                                    ArrayRef<SubViewOp::Range> loopRanges) {
-  auto maps = loopToOperandRangesMaps(op);
-  SmallVector<Value *, 8> clonedViews;
-  clonedViews.reserve(op.getNumInputsAndOutputs());
-  // Iterate over the inputs and outputs in order.
-  // Extract the subranges from the linearized ranges.
-  SmallVector<Value *, 8> ios(op.getInputsAndOutputs());
-  for (auto en : llvm::enumerate(ios)) {
-    unsigned idx = en.index();
-    auto map = maps[idx];
-    LLVM_DEBUG(dbgs() << "map: " << map << "\n");
-    Value *view = en.value();
-    SmallVector<SubViewOp::Range, 4> viewRanges(map.getNumResults());
-    for (auto en2 : llvm::enumerate(map.getResults())) {
-      unsigned d = en2.index();
-      // loopToOperandRangesMaps are permutations-only.
-      unsigned loopPos = en2.value().cast<AffineDimExpr>().getPosition();
-      viewRanges[d] = loopRanges[loopPos];
-      LLVM_DEBUG(dbgs() << "\ni,j: " << en.index() << ", " << en2.index()
-                        << "\t"
-                        << "loopPos: " << loopPos << "\t" << viewRanges[d]);
-    }
-    // Construct a new subview for the tile.
-    unsigned rank = viewRanges.size();
-    SmallVector<Value *, 4> offsets, sizes, strides;
-    offsets.reserve(rank);
-    sizes.reserve(rank);
-    strides.reserve(rank);
-    for (auto r : viewRanges) {
-      offsets.push_back(r.offset);
-      sizes.push_back(r.size);
-      strides.push_back(r.stride);
-    }
-    clonedViews.push_back(
-        b.create<SubViewOp>(loc, view, offsets, sizes, strides));
-  }
-  auto operands = getAssumedNonViewOperands(op);
-  clonedViews.append(operands.begin(), operands.end());
-  return op.clone(b, loc, clonedViews);
-}
-
-struct ViewDimension {
-  Value *view;
-  unsigned dimension;
-};
-
-// Given an `op`, returns the first (`view`, `dimension`) pair that identifies
-// the loop range at `loopDepth`. The semantics of the loopToOperandRangesMaps
-// guarantees at least one such dimension is found. If multiple candidates exist
-// they must agree by construction (i.e. have the same size) and we just return
-// the first one.
-static ViewDimension getViewDefiningLoopRange(LinalgOp op, unsigned loopDepth) {
-  auto maps = loopToOperandRangesMaps(op);
-  // Iterate over the inputs and outputs in order.
-  // Extract the subranges from the linearized ranges.
-  SmallVector<Value *, 8> ios(op.getInputsAndOutputs());
-  for (auto en : llvm::enumerate(ios)) {
-    unsigned idx = en.index();
-    auto map = maps[idx];
-    LLVM_DEBUG(dbgs() << "getViewDefiningLoopRange I/O idx: " << idx << "\n");
-    LLVM_DEBUG(dbgs() << "getViewDefiningLoopRange map: " << map << "\n");
-    Value *view = en.value();
-    SmallVector<Value *, 8> viewRanges(map.getNumResults(), nullptr);
-    for (auto en2 : llvm::enumerate(map.getResults())) {
-      if (loopDepth == en2.value().cast<AffineDimExpr>().getPosition()) {
-        LLVM_DEBUG(dbgs() << "getViewDefiningLoopRange loopDepth: " << loopDepth
-                          << "\n");
-        LLVM_DEBUG(dbgs() << "getViewDefiningLoopRange view: " << *view
-                          << "\n");
-        return ViewDimension{view, static_cast<unsigned>(en2.index())};
-      }
-    }
-  }
-  llvm_unreachable("Expect to be able to extract a view defining loop range");
-}
-
-static LinalgOp fuse(Value *producedView, LinalgOp producer, LinalgOp consumer,
-                     unsigned consumerIdx, unsigned producerIdx,
-                     OperationFolder *folder) {
-  auto subView = dyn_cast_or_null<SubViewOp>(
-      consumer.getInput(consumerIdx)->getDefiningOp());
-  auto slice = dyn_cast_or_null<SliceOp>(
-      consumer.getInput(consumerIdx)->getDefiningOp());
-  assert(subView || slice);
-  (void)subView;
-  (void)slice;
-
-  // loopToOperandRangesMaps are permutations-only by construction:
-  //   we can always identify a data dimension with a (at least one) loop
-  //   dimension.
-  AffineMap producerMap =
-      loopToOperandRangesMaps(producer)[producer.getNumInputs() + producerIdx];
-  LLVM_DEBUG(dbgs() << "Producer Idx: " << producerIdx
-                    << ", producer map: " << producerMap << "\n");
-
-  unsigned nPar = producer.getNumParallelLoops();
-  unsigned nRed = producer.getNumReductionLoops();
-  unsigned nWin = producer.getNumWindowLoops();
-  SmallVector<SubViewOp::Range, 8> loopRanges(nPar + nRed + nWin);
-
-  // Iterate over dimensions identified by the producer map for `producerIdx`.
-  // This defines a subset of the loop ranges that we need to complete later.
-  for (auto en : llvm::enumerate(producerMap.getResults())) {
-    unsigned posInProducerLoop = en.value().cast<AffineDimExpr>().getPosition();
-    loopRanges[posInProducerLoop] = subView.getRanges()[en.index()];
-  }
-
-  OpBuilder b(consumer.getOperation());
-  auto loc = consumer.getLoc();
-  // Iterate over all dimensions. For the dimensions not identified by the
-  // producer map for `producerIdx`, we need to explicitly compute the view that
-  // defines the loop ranges using the `producer`.
-  for (unsigned i = 0, nLoops = loopRanges.size(); i < nLoops; ++i) {
-    if (loopRanges[i].offset)
-      LLVM_DEBUG(llvm::dbgs()
-                 << "existing LoopRange: " << loopRanges[i] << "\n");
-    else {
-      auto viewDim = getViewDefiningLoopRange(producer, i);
-      loopRanges[i] = SubViewOp::Range{constant_index(folder, 0),
-                                       dim(viewDim.view, viewDim.dimension),
-                                       constant_index(folder, 1)};
-      LLVM_DEBUG(llvm::dbgs() << "new LoopRange: " << loopRanges[i] << "\n");
-    }
-  }
-
-  return cloneWithLoopRanges(b, loc, producer, loopRanges);
-}
-
-// Encode structural fusion safety preconditions.
-// Some of these will be lifted in the future with better analysis.
-static bool isStructurallyFusableProducer(LinalgOp producer,
-                                          Value *consumedView,
-                                          LinalgOp consumer) {
-  if (producer.getNumOutputs() != 1) {
-    LLVM_DEBUG(dbgs() << "\nNot structurally fusable (multi-output)");
-    return false;
-  }
-  // Only fuse when the producer block dominates.
-  DominanceInfo dom(producer.getOperation());
-  if (!dom.dominates(producer.getOperation()->getBlock(),
-                     consumer.getOperation()->getBlock())) {
-    LLVM_DEBUG(
-        dbgs()
-        << "\nNot structurally fusable (producer block does not dominate)");
-    return false;
-  }
-  return true;
-}
-
-bool mlir::linalg::isProducerLastWriteOfView(const LinalgDependenceGraph &graph,
-                                             LinalgOp consumer,
-                                             Value *consumedView,
-                                             LinalgOp producer) {
-  // Make some simple structural checks that alleviate the need for more
-  // complex analyses.
-  if (!isStructurallyFusableProducer(producer, consumedView, consumer)) {
-    LLVM_DEBUG(dbgs() << "\n***Not static last write due to structure:\t"
-                      << *producer.getOperation());
-    return false;
-  }
-  // Check for any interleaved write to consumedView.
-  if (!graph.findCoveringWrites(producer, consumer, consumedView).empty()) {
-    LLVM_DEBUG(dbgs() << "\n***Not fusable due to interleaved write:\t"
-                      << *producer.getOperation());
-    return false;
-  }
-  return true;
-}
-
-bool mlir::linalg::isFusableInto(const LinalgDependenceGraph &graph,
-                                 LinalgOp consumer, Value *consumedView,
-                                 LinalgOp producer) {
-  if (!isProducerLastWriteOfView(graph, consumer, consumedView, producer))
-    return false;
-  // Check for any fusion-preventing dependence to any view read/written that
-  // would violate dependences.
-  if (!graph.findCoveringDependences(producer, consumer).empty()) {
-    LLVM_DEBUG(dbgs() << "\n***Not fusable due to an interleaved dependence:\t"
-                      << *producer.getOperation());
-    return false;
-  }
-  return true;
-}
-
-// Only consider RAW atm.
-Optional<FusionInfo> mlir::linalg::fuseProducerOf(
-    OpBuilder &b, LinalgOp consumer, unsigned consumerIdx,
-    const LinalgDependenceGraph &graph, OperationFolder *folder) {
-  LLVM_DEBUG(dbgs() << "\nStart examining consumer: "
-                    << *consumer.getOperation());
-  for (auto dependence : graph.getDependencesInto(
-           consumer, LinalgDependenceGraph::DependenceType::RAW)) {
-    LLVM_DEBUG(dbgs() << "\n***Consider producer:\t"
-                      << *dependence.dependentOpView.op << "\n");
-    auto producer = cast<LinalgOp>(dependence.dependentOpView.op);
-
-    // Check that the dependence is indeed on the input `consumerIdx` view.
-    auto *consumedView = dependence.indexingView;
-    if (consumer.getInput(consumerIdx) != consumedView)
-      continue;
-
-    // Consumer consumes this view, `isStructurallyFusableProducer` also checks
-    // whether it is a strict subview of the producer view.
-    auto *producedView = dependence.dependentOpView.view;
-    auto producerIdx = producer.getIndexOfOutput(producedView).getValue();
-    // `consumerIdx` and `producerIdx` exist by construction.
-    LLVM_DEBUG(dbgs() << "\nRAW producer: " << *producer.getOperation()
-                      << " view: " << *producedView
-                      << " output index: " << producerIdx);
-
-    // Must be a subview or a slice to guarantee there are loops we can fuse
-    // into.
-    auto subView = dyn_cast_or_null<SubViewOp>(consumedView->getDefiningOp());
-    auto slice = dyn_cast_or_null<SliceOp>(consumedView->getDefiningOp());
-    if (!subView && !slice) {
-      LLVM_DEBUG(dbgs() << "\nNot fusable (not a subview or slice)");
-      continue;
-    }
-
-    // Simple fusability checks.
-    if (!isFusableInto(graph, consumer, consumedView, producer))
-      continue;
-
-    // Fuse `producer` just before `consumer`.
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(consumer.getOperation());
-    ScopedContext scope(b, consumer.getLoc());
-    LLVM_DEBUG(dbgs() << "Fuse into consumer: " << *consumer << "\n");
-    auto fusedProducer = fuse(producedView, producer, consumer, consumerIdx,
-                              producerIdx, folder);
-
-    return FusionInfo{producer, fusedProducer};
-  }
-  return llvm::None;
-}
-
-static void fuseLinalgOpsGreedily(FuncOp f) {
-  LLVM_DEBUG(f.print(dbgs() << "\nBefore linalg-fusion: \n"));
-
-  OpBuilder b(f);
-  OperationFolder folder(f.getContext());
-  DenseSet<Operation *> eraseSet;
-
-  // Save original Linalg ops, we only want to make a pass over those.
-  SmallVector<Operation *, 8> linalgOps;
-  f.walk([&](LinalgOp op) { linalgOps.push_back(op); });
-
-  Aliases aliases;
-  LinalgDependenceGraph G(aliases, linalgOps);
-  for (auto *op : llvm::reverse(linalgOps)) {
-    for (unsigned consumerIdx = 0, e = LinalgOp(op).getNumInputs();
-         consumerIdx < e; ++consumerIdx) {
-      if (auto fusionInfo = fuseProducerOf(b, op, consumerIdx, G, &folder))
-        eraseSet.insert(fusionInfo->originalProducer.getOperation());
-    }
-  }
-
-  // The `fuseProducerOf` function performs structural checks and in particular
-  // that no covering read or write exist between the consumer and the producer.
-  // As a consequence, the only fusions that may occur preserve subsequent
-  // dependences and are guaranteed by construction to produce the whole view.
-  // We may thus erase the producer once it is fused.
-  for (auto *e : eraseSet)
-    e->erase();
-  LLVM_DEBUG(f.print(dbgs() << "\nAfter linalg-fusion: \n"));
-}
-
-namespace {
-struct LinalgFusionPass : public FunctionPass<LinalgFusionPass> {
-  void runOnFunction() override { fuseLinalgOpsGreedily(getFunction()); }
-};
-} // namespace
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::linalg::createLinalgFusionPass() {
-  return std::make_unique<LinalgFusionPass>();
-}
-
-static PassRegistration<LinalgFusionPass>
-    pass("linalg-fusion", "Fuse operations in the linalg dialect");
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
deleted file mode 100644
index 96a8a216536..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
+++ /dev/null
@@ -1,613 +0,0 @@
-//===- LowerToLoops.cpp - conversion from Linalg library ops to loops------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h"
-#include "mlir/Dialect/Linalg/Utils/Intrinsics.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/FoldUtils.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::linalg;
-using namespace mlir::linalg::intrinsics;
-
-using IndexedStdValue = TemplatedIndexedValue<std_load, std_store>;
-using IndexedAffineValue = TemplatedIndexedValue<affine_load, affine_store>;
-
-using edsc::op::operator+;
-using edsc::op::operator==;
-
-static SmallVector<ValueHandle, 8>
-makeCanonicalAffineApplies(OpBuilder &b, Location loc, AffineMap map,
-                           ArrayRef<Value *> vals) {
-  assert(map.getNumSymbols() == 0);
-  assert(map.getNumInputs() == vals.size());
-  SmallVector<ValueHandle, 8> res;
-  res.reserve(map.getNumResults());
-  auto dims = map.getNumDims();
-  for (auto e : map.getResults()) {
-    auto exprMap = AffineMap::get(dims, 0, e);
-    SmallVector<Value *, 4> operands(vals.begin(), vals.end());
-    canonicalizeMapAndOperands(&exprMap, &operands);
-    res.push_back(affine_apply(exprMap, operands));
-  }
-  return res;
-}
-
-static SmallVector<Value *, 4> permuteIvs(ArrayRef<Value *> ivs,
-                                          Optional<AffineMap> permutation) {
-  return permutation ? applyMapToValues(ScopedContext::getBuilder(),
-                                        ScopedContext::getLocation(),
-                                        permutation.getValue(), ivs)
-                     : SmallVector<Value *, 4>(ivs.begin(), ivs.end());
-}
-
-// Creates a number of ranges equal to the number of results in `map`.
-// The returned ranges correspond to the loop ranges, in the proper order, for
-// which new loops will be created.
-static SmallVector<Value *, 4> emitLoopRanges(OpBuilder &b, Location loc,
-                                              AffineMap map,
-                                              ArrayRef<Value *> allViewSizes);
-SmallVector<Value *, 4> emitLoopRanges(OpBuilder &b, Location loc,
-                                       AffineMap map,
-                                       ArrayRef<Value *> allViewSizes) {
-  // Apply `map` to get view sizes in loop order.
-  auto sizes = applyMapToValues(b, loc, map, allViewSizes);
-  // Create a new range with the applied tile sizes.
-  ScopedContext scope(b, loc);
-  SmallVector<Value *, 4> res;
-  for (unsigned idx = 0, e = map.getNumResults(); idx < e; ++idx) {
-    res.push_back(range(constant_index(0), sizes[idx], constant_index(1)));
-  }
-  return res;
-}
-
-template <typename IndexedValueType, typename LinalgOpType>
-class LinalgScopedEmitter {};
-
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, CopyOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs,
-                                       CopyOp copyOp) {
-    auto nPar = copyOp.getNumParallelLoops();
-    assert(nPar == allIvs.size());
-    auto inputIvs =
-        permuteIvs(allIvs.take_front(nPar), copyOp.inputPermutation());
-    auto outputIvs =
-        permuteIvs(allIvs.take_front(nPar), copyOp.outputPermutation());
-    SmallVector<IndexHandle, 8> iivs(inputIvs.begin(), inputIvs.end());
-    SmallVector<IndexHandle, 8> oivs(outputIvs.begin(), outputIvs.end());
-    IndexedValueType O(copyOp.getOutput(0)), I(copyOp.getInput(0));
-    // Emit the proper scalar assignment, whether we are dealing with a 0-D or
-    // an n-D loop nest; with or without permutations.
-    // clang-format off
-    nPar > 0 ? O(oivs) = I(iivs) :
-               O() = I();
-    // clang-format on
-  }
-};
-
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, FillOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs,
-                                       FillOp fillOp) {
-    auto nPar = fillOp.getNumParallelLoops();
-    assert(nPar == allIvs.size());
-    auto ivs =
-        SmallVector<IndexHandle, 4>(allIvs.begin(), allIvs.begin() + nPar);
-    IndexedValueType O(fillOp.getOutput(0));
-    // Emit the proper scalar assignment, whether we are dealing with a 0-D or
-    // an n-D loop nest; with or without permutations.
-    nPar > 0 ? O(ivs) = ValueHandle(fillOp.value())
-             : O() = ValueHandle(fillOp.value());
-  }
-};
-
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, DotOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs, DotOp dotOp) {
-    assert(allIvs.size() == 1);
-    IndexHandle r_i(allIvs[0]);
-    IndexedValueType A(dotOp.getInput(0)), B(dotOp.getInput(1)),
-        C(dotOp.getOutput(0));
-    // Emit scalar form.
-    C() = C() + A(r_i) * B(r_i);
-  }
-};
-
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, MatvecOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs,
-                                       MatvecOp matvecOp) {
-    assert(allIvs.size() == 2);
-    IndexHandle i(allIvs[0]), r_j(allIvs[1]);
-    IndexedValueType A(matvecOp.getInput(0)), B(matvecOp.getInput(1)),
-        C(matvecOp.getOutput(0));
-    // Emit scalar form.
-    C(i) = C(i) + A(i, r_j) * B(r_j);
-  }
-};
-
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, MatmulOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs,
-                                       MatmulOp matmulOp) {
-    assert(allIvs.size() == 3);
-    IndexHandle i(allIvs[0]), j(allIvs[1]), r_k(allIvs[2]);
-    IndexedValueType A(matmulOp.getInput(0)), B(matmulOp.getInput(1)),
-        C(matmulOp.getOutput(0));
-    // Emit scalar form.
-    C(i, j) = C(i, j) + A(i, r_k) * B(r_k, j);
-  }
-};
-
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, ConvOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs,
-                                       ConvOp convOp) {
-    auto b = ScopedContext::getBuilder();
-    auto loc = ScopedContext::getLocation();
-    auto maps = loopToOperandRangesMaps(convOp);
-    SmallVector<ValueHandle, 8> fIdx(
-        makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
-    SmallVector<ValueHandle, 8> imIdx(
-        makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
-    SmallVector<ValueHandle, 8> oIdx(
-        makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
-    IndexedValueType F(convOp.filter()), I(convOp.input()), O(convOp.output());
-    // Emit scalar form.
-    O(oIdx) += F(fIdx) * I(imIdx);
-  }
-};
-
-// Emits the MLIR for the scalar part of the generic op by:
-//   1. Emitting std_load and std_store ops for each input and output
-//      view in order. This is achieved by applying the appropriate input or
-//      output map to the enclosing induction variables.
-//   2. Emitting a call to `op.fun()` that takes as arguments the scalars
-//      from point 1. above.
-//   3. Emitting std_store to store the results of 2. to the output
-//      views.
-//
-// An example output may resemble:
-//
-// ```
-//    loop.for %i = %c0 to %0 step %c1 {
-//      loop.for %j = %c0 to %1 step %c1 {
-//        loop.for %k = %c0 to %4 step %c1 {
-//          %11 = load %arg0[%i, %j] :
-//            memref<?x?xf32, stride_specification>
-//          %12 = load %arg1[%i, %j, %k] :
-//            memref<?x?x?xf32, stride_specification>
-//          %13 = load %arg2[%i, %k, %j] :
-//            memref<?x?x?xf32, stride_specification>
-//          %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32)
-//          store %14#0, %arg1[%i, %j, %k] :
-//            memref<?x?x?Xf32, stride_specification>
-//          store %14#1, %arg2[%i, %k, %j] :
-//            memref<?x?x?Xf32, stride_specification>
-//       }
-//      }
-//    }
-// ```
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, GenericOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs,
-                                       GenericOp genericOp) {
-    auto b = ScopedContext::getBuilder();
-    auto loc = ScopedContext::getLocation();
-    using edsc::intrinsics::detail::ValueHandleArray;
-    unsigned nInputs = genericOp.getNumInputs();
-    unsigned nOutputs = genericOp.getNumOutputs();
-    SmallVector<Value *, 4> indexedValues(nInputs + nOutputs);
-
-    // 1.a. Emit std_load from input views.
-    for (unsigned i = 0; i < nInputs; ++i) {
-      ValueHandleArray indexing(makeCanonicalAffineApplies(
-          b, loc, genericOp.getInputIndexingMap(i), allIvs));
-      indexedValues[i] = std_load(genericOp.getInput(i), indexing);
-    }
-
-    // 1.b. Emit std_load from output views.
-    for (unsigned i = 0; i < nOutputs; ++i) {
-      ValueHandleArray indexing(makeCanonicalAffineApplies(
-          b, loc, genericOp.getOutputIndexingMap(i), allIvs));
-      indexedValues[nInputs + i] = std_load(genericOp.getOutput(i), indexing);
-    }
-
-    auto funcOp = genericOp.getFunction();
-    if (funcOp) {
-      // 2. Emit call.
-      Operation *callOp = call(funcOp, indexedValues);
-      assert(callOp->getNumResults() == genericOp.getNumOutputs());
-
-      // 3. Emit std_store.
-      for (unsigned i = 0; i < nOutputs; ++i) {
-        ValueHandleArray indexing(makeCanonicalAffineApplies(
-            b, loc, genericOp.getOutputIndexingMap(i), allIvs));
-        std_store(callOp->getResult(i), genericOp.getOutput(i), indexing);
-      }
-      return;
-    }
-    // TODO(ntv): When a region inliner exists, use it.
-    // 2. Inline region, currently only works for a single basic block.
-    BlockAndValueMapping map;
-    auto &block = genericOp.region().front();
-    for (auto it : llvm::zip(block.getArguments(), indexedValues))
-      map.map(std::get<0>(it), std::get<1>(it));
-    for (auto &op : block.without_terminator()) {
-      assert(op.getNumRegions() == 0);
-      auto *newOp = b.clone(op, map);
-      for (auto it : llvm::zip(op.getResults(), newOp->getResults()))
-        map.map(std::get<0>(it), std::get<1>(it));
-    }
-
-    // 3. Emit std_store.
-    auto *yieldOp = cast<YieldOp>(block.back()).getOperation();
-    assert(yieldOp->getNumOperands() == nOutputs);
-    for (unsigned i = 0; i < nOutputs; ++i) {
-      ValueHandleArray indexing(makeCanonicalAffineApplies(
-          b, loc, genericOp.getOutputIndexingMap(i), allIvs));
-      std_store(map.lookup(yieldOp->getOperand(i)), genericOp.getOutput(i),
-                indexing);
-    }
-  }
-};
-
-// Emits the MLIR for the scalar part of the indexed generic op by:
-//   1. Emitting std_load and std_store ops for each input and output view in
-//      order. This is achieved by applying the appropriate input or output map
-//      to the enclosing induction variables.
-//   2. Emitting a call to `op.fun()` that takes as arguments the induction
-//      variables and the scalars from point 1. above.
-//   3. Emitting std_store to store the results of 2. to the output views.
-//
-// An example output may resemble:
-//
-// ```
-//    loop.for %i = %c0 to %0 step %c1 {
-//      loop.for %j = %c0 to %1 step %c1 {
-//        loop.for %k = %c0 to %4 step %c1 {
-//          %11 = load %arg0[%i, %j] :
-//            memref<?x?xf32, stride_specification>
-//          %12 = load %arg1[%i, %j, %k] :
-//            memref<?x?x?xf32, stride_specification>
-//          %13 = load %arg2[%i, %k, %j] :
-//            memref<?x?x?xf32, stride_specification>
-//          %14:2 = call @foo(%i, %j, %k, %11, %12, %13) :
-//            (index, index, index, f32, f32, f32) -> (f32, f32)
-//          store %14#0, %arg1[%i, %j, %k] :
-//            memref<?x?x?Xf32, stride_specification>
-//          store %14#1, %arg2[%i, %k, %j] :
-//            memref<?x?x?Xf32, stride_specification>
-//       }
-//      }
-//    }
-// ```
-template <typename IndexedValueType>
-class LinalgScopedEmitter<IndexedValueType, IndexedGenericOp> {
-public:
-  static void emitScalarImplementation(ArrayRef<Value *> allIvs,
-                                       IndexedGenericOp indexedGenericOp) {
-    auto b = ScopedContext::getBuilder();
-    auto loc = ScopedContext::getLocation();
-    using edsc::intrinsics::detail::ValueHandleArray;
-    unsigned nInputs = indexedGenericOp.getNumInputs();
-    unsigned nOutputs = indexedGenericOp.getNumOutputs();
-    unsigned nLoops = allIvs.size();
-    SmallVector<Value *, 4> indexedValues(nLoops + nInputs + nOutputs);
-
-    for (unsigned i = 0; i < nLoops; ++i) {
-      indexedValues[i] = allIvs[i];
-    }
-
-    // 1.a. Emit std_load from input views.
-    for (unsigned i = 0; i < nInputs; ++i) {
-      ValueHandleArray indexing(makeCanonicalAffineApplies(
-          b, loc, indexedGenericOp.getInputIndexingMap(i), allIvs));
-      indexedValues[nLoops + i] =
-          std_load(indexedGenericOp.getInput(i), indexing);
-    }
-
-    // 1.b. Emit std_load from output views.
-    for (unsigned i = 0; i < nOutputs; ++i) {
-      ValueHandleArray indexing(makeCanonicalAffineApplies(
-          b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs));
-      indexedValues[nLoops + nInputs + i] =
-          std_load(indexedGenericOp.getOutput(i), indexing);
-    }
-
-    if (auto funcOp = indexedGenericOp.getFunction()) {
-      // 2. Emit call.
-      Operation *callOp = call(funcOp, indexedValues);
-      assert(callOp->getNumResults() == indexedGenericOp.getNumOutputs());
-
-      // 3. Emit std_store.
-      for (unsigned i = 0; i < nOutputs; ++i) {
-        ValueHandleArray indexing(makeCanonicalAffineApplies(
-            b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs));
-        std_store(callOp->getResult(i), indexedGenericOp.getOutput(i),
-                  indexing);
-      }
-      return;
-    }
-    // TODO(ntv): When a region inliner exists, use it.
-    // 2. Inline region, currently only works for a single basic block.
-    BlockAndValueMapping map;
-    auto &block = indexedGenericOp.region().front();
-    for (auto it : llvm::zip(block.getArguments(), indexedValues))
-      map.map(std::get<0>(it), std::get<1>(it));
-    for (auto &op : block.without_terminator()) {
-      assert(op.getNumRegions() == 0);
-      auto *newOp = b.clone(op, map);
-      for (auto it : llvm::zip(op.getResults(), newOp->getResults()))
-        map.map(std::get<0>(it), std::get<1>(it));
-    }
-
-    // 3. Emit std_store.
-    auto *yieldOp = cast<YieldOp>(block.back()).getOperation();
-    assert(yieldOp->getNumOperands() == nOutputs);
-    for (unsigned i = 0; i < nOutputs; ++i) {
-      ValueHandleArray indexing(makeCanonicalAffineApplies(
-          b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs));
-      std_store(map.lookup(yieldOp->getOperand(i)),
-                indexedGenericOp.getOutput(i), indexing);
-    }
-  }
-};
-
-namespace {
-// This struct is for factoring out the implementation and support template
-// instantiations in the following 2 cases:
-//   1. Appending to a list of patterns via RewritePatternList.
-//   2. Direct invocation via `linalgOpToLoops` and `linalgOpToAffineLoops`.
-// The implementation must work both in DRR and inside a RewritePattern. As a
-// consequence, (1) it is only allowed to emit new ops if the match is
-// guaranteed to be a success, (2) it is not allowed erase/replace, and (3) an
-// encompassing pattern must take care of the erasure logic.
-template <typename LoopTy, typename IndexedValueTy, typename ConcreteOpTy>
-class LinalgOpToLoopsImpl {
-public:
-  static LogicalResult doit(Operation *op, PatternRewriter &rewriter);
-};
-} // namespace
-
-template <typename LoopTy, typename IndexedValueTy, typename ConcreteOpTy>
-LogicalResult LinalgOpToLoopsImpl<LoopTy, IndexedValueTy, ConcreteOpTy>::doit(
-    Operation *op, PatternRewriter &rewriter) {
-  OpBuilder b(op);
-  ScopedContext scope(b, op->getLoc());
-
-  // The flattened loopToOperandRangesMaps is expected to be an invertible
-  // permutation map (which is asserted in the inverse calculation).
-  auto linalgOp = cast<ConcreteOpTy>(op);
-  auto invertedMap =
-      inversePermutation(concatAffineMaps(loopToOperandRangesMaps(linalgOp)));
-  if (!invertedMap) {
-    LinalgScopedEmitter<IndexedValueTy, ConcreteOpTy>::emitScalarImplementation(
-        {}, linalgOp);
-    return success();
-  }
-
-  auto nPar = linalgOp.getNumParallelLoops();
-  auto nRed = linalgOp.getNumReductionLoops();
-  auto nWin = linalgOp.getNumWindowLoops();
-  SmallVector<IndexHandle, 4> allIvs(nPar + nRed + nWin);
-  SmallVector<ValueHandle *, 4> allPIvs =
-      makeHandlePointers(MutableArrayRef<IndexHandle>(allIvs));
-  auto loopRanges = emitLoopRanges(scope.getBuilder(), scope.getLocation(),
-                                   invertedMap, getViewSizes(linalgOp));
-  assert(loopRanges.size() == allIvs.size());
-
-  LoopNestRangeBuilder(allPIvs, loopRanges)([&] {
-    auto allIvValues = extractValues(allIvs);
-    LinalgScopedEmitter<IndexedValueTy, ConcreteOpTy>::emitScalarImplementation(
-        allIvValues, linalgOp);
-  });
-  return success();
-}
-
-template <typename LoopType, typename IndexedValueType, typename ConcreteOp>
-class LinalgRewritePattern : public RewritePattern {
-public:
-  explicit LinalgRewritePattern(MLIRContext *context)
-      : RewritePattern(ConcreteOp::getOperationName(), 1, context) {}
-
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const override {
-    using Impl = LinalgOpToLoopsImpl<LoopType, IndexedValueType, ConcreteOp>;
-    if (failed(Impl::doit(op, rewriter)))
-      return matchFailure();
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-
-// Helper classes for type list expansion.
-template <typename LoopType, typename IndexedValueType, typename... LinalgOps>
-class RewritePatternList;
-
-template <typename LoopType, typename IndexedValueType>
-class RewritePatternList<LoopType, IndexedValueType> {
-public:
-  static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) {}
-};
-
-template <typename LoopType, typename IndexedValueType, typename ConcreteOp,
-          typename... LinalgOps>
-class RewritePatternList<LoopType, IndexedValueType, ConcreteOp, LinalgOps...> {
-public:
-  static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) {
-    patterns
-        .insert<LinalgRewritePattern<LoopType, IndexedValueType, ConcreteOp>>(
-            ctx);
-    RewritePatternList<LoopType, IndexedValueType, LinalgOps...>::build(
-        patterns, ctx);
-  }
-};
-
-/// Populate the given list with patterns that convert from Linalg to LLVM.
-template <typename LoopType, typename IndexedValueType>
-void FillRewritePatterns(OwningRewritePatternList &patterns, MLIRContext *ctx) {
-  RewritePatternList<LoopType, IndexedValueType,
-#define GET_OP_LIST
-#include "mlir/Dialect/Linalg/IR/LinalgLibraryOps.cpp.inc"
-                     >::build(patterns, ctx);
-}
-
-namespace {
-template <typename LoopType, typename IndexedValueType>
-struct LowerLinalgToLoopsPass
-    : public FunctionPass<LowerLinalgToLoopsPass<LoopType, IndexedValueType>> {
-  void runOnFunction() override;
-};
-} // namespace
-
-// Local folding pattern for AffineApplyOp that we can apply greedily.
-// This replaces AffineApplyOp by the proper value in cases where the associated
-// map is trivial. A trivial map here is defined as a map with a single result
-// and either:
-//   1. Zero operand + returns a single AffineConstantExpr
-//   2. One operand + returns a single AffineDimExpr
-//   3. One operands + returns a single AffineSymbolExpr
-//
-// In the first case, the AffineApplyOp is replaced by a new constant. In the
-// other cases, it is replaced by its unique operand.
-struct FoldAffineOp : public RewritePattern {
-  FoldAffineOp(MLIRContext *context)
-      : RewritePattern(AffineApplyOp::getOperationName(), 0, context) {}
-
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const override {
-    AffineApplyOp affineApplyOp = cast<AffineApplyOp>(op);
-    auto map = affineApplyOp.getAffineMap();
-    if (map.getNumResults() != 1 || map.getNumInputs() > 1)
-      return matchFailure();
-
-    AffineExpr expr = map.getResult(0);
-    if (map.getNumInputs() == 0) {
-      if (auto val = expr.dyn_cast<AffineConstantExpr>()) {
-        rewriter.replaceOpWithNewOp<ConstantIndexOp>(op, val.getValue());
-        return matchSuccess();
-      }
-      return matchFailure();
-    }
-    if (expr.dyn_cast<AffineDimExpr>() || expr.dyn_cast<AffineSymbolExpr>()) {
-      rewriter.replaceOp(op, op->getOperand(0));
-      return matchSuccess();
-    }
-    return matchFailure();
-  }
-};
-
-template <typename LoopType, typename IndexedValueType>
-void LowerLinalgToLoopsPass<LoopType, IndexedValueType>::runOnFunction() {
-  auto *context = &this->getContext();
-  OwningRewritePatternList patterns;
-  // Canonicalization and folding patterns applied greedily allow cleaning up
-  // the emitted IR on the fly.
-  // TODO(ntv) fold view and subview ops?
-  FillRewritePatterns<LoopType, IndexedValueType>(patterns, context);
-  DimOp::getCanonicalizationPatterns(patterns, context);
-  AffineApplyOp::getCanonicalizationPatterns(patterns, context);
-  patterns.insert<FoldAffineOp>(context);
-  // Just apply the patterns greedily.
-  applyPatternsGreedily(this->getFunction(), patterns);
-}
-
-/// Create a pass to convert Linalg operations to loop.for loops and
-/// std.load/std.store accesses.
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::linalg::createConvertLinalgToLoopsPass() {
-  return std::make_unique<
-      LowerLinalgToLoopsPass<loop::ForOp, IndexedStdValue>>();
-}
-
-/// Create a pass to convert Linalg operations to affine.for loops and
-/// affine_load/affine_store accesses.
-/// Placeholder for now, this is NYI.
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::linalg::createConvertLinalgToAffineLoopsPass() {
-  return std::make_unique<
-      LowerLinalgToLoopsPass<AffineForOp, IndexedAffineValue>>();
-}
-
-// Emits a loop nest of `loop.for` with the proper body for `op`.
-template <typename ConcreteOp>
-LogicalResult mlir::linalg::linalgOpToLoops(PatternRewriter &rewriter,
-                                            Operation *op) {
-  return LinalgOpToLoopsImpl<loop::ForOp, IndexedStdValue, ConcreteOp>::doit(
-      op, rewriter);
-}
-
-// Emits a loop nest of `affine.for` with the proper body for `op`.
-template <typename ConcreteOp>
-LogicalResult mlir::linalg::linalgOpToAffineLoops(PatternRewriter &rewriter,
-                                                  Operation *op) {
-  return LinalgOpToLoopsImpl<AffineForOp, IndexedAffineValue, ConcreteOp>::doit(
-      op, rewriter);
-}
-
-// TODO(ntv) Need to make these instantiations more future-proof to avoid the
-// need to update as soon as we add new ops.
-#define INSTANTIATE_LINALG_OP_TO_LOOPS(OP_TYPE)                                \
-  template LogicalResult mlir::linalg::linalgOpToLoops<OP_TYPE>(               \
-      PatternRewriter & rewriter, Operation * op);                             \
-  template LogicalResult mlir::linalg::linalgOpToAffineLoops<OP_TYPE>(         \
-      PatternRewriter & rewriter, Operation * op);
-
-INSTANTIATE_LINALG_OP_TO_LOOPS(CopyOp)
-INSTANTIATE_LINALG_OP_TO_LOOPS(FillOp)
-INSTANTIATE_LINALG_OP_TO_LOOPS(DotOp)
-INSTANTIATE_LINALG_OP_TO_LOOPS(MatvecOp)
-INSTANTIATE_LINALG_OP_TO_LOOPS(MatmulOp)
-INSTANTIATE_LINALG_OP_TO_LOOPS(ConvOp)
-INSTANTIATE_LINALG_OP_TO_LOOPS(GenericOp)
-INSTANTIATE_LINALG_OP_TO_LOOPS(IndexedGenericOp)
-
-static PassRegistration<LowerLinalgToLoopsPass<loop::ForOp, IndexedStdValue>>
-    structuredLoopsPass(
-        "convert-linalg-to-loops",
-        "Lower the operations from the linalg dialect into loops");
-
-static PassRegistration<LowerLinalgToLoopsPass<AffineForOp, IndexedAffineValue>>
-    affineLoopsPass(
-        "convert-linalg-to-affine-loops",
-        "Lower the operations from the linalg dialect into affine loops");
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp
deleted file mode 100644
index 60512232641..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-//===- LinalgTransforms.cpp - Linalg transformations as patterns ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements logic for transforming Linalg operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h"
-#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/Utils/Intrinsics.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <type_traits>
-
-#define DEBUG_TYPE "linalg-transforms"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::linalg;
-using namespace mlir::linalg::intrinsics;
-
-using llvm::dbgs;
-
-// Marker used as attribute name in generated Linalg rewriting transformations.
-const StringLiteral mlir::linalg::LinalgTransforms::kLinalgTransformMarker =
-    "__internal_linalg_transform__";
-
-LogicalResult mlir::linalg::tileLinalgOpAndSetMarker(
-    PatternRewriter &rewriter, Operation *op, ArrayRef<int64_t> sizes,
-    StringRef linalgMarker, ArrayRef<unsigned> permutation) {
-  assert(permutation.empty() || permutation.size() == sizes.size());
-  auto tileRes = tileLinalgOperation(rewriter, op, sizes, permutation);
-  if (!tileRes)
-    return failure();
-  tileRes->op.setAttr(LinalgTransforms::kLinalgTransformMarker,
-                      rewriter.getStringAttr(linalgMarker));
-  return success();
-}
-
-LogicalResult mlir::linalg::tileAndFuseLinalgOpAndSetMarker(
-    PatternRewriter &rewriter, Operation *op, ArrayRef<int64_t> sizes,
-    ArrayRef<int64_t> operandIndicesToFuse, StringRef linalgMarker) {
-  auto tileRes = tileLinalgOperation(rewriter, op, sizes);
-  if (!tileRes)
-    return failure();
-  tileRes->op.setAttr(LinalgTransforms::kLinalgTransformMarker,
-                      rewriter.getStringAttr(linalgMarker));
-  Aliases aliases;
-  auto G = LinalgDependenceGraph::buildDependenceGraph(
-      aliases, op->getParentOfType<FuncOp>());
-  SmallVector<Operation *, 4> originalProducers;
-  for (auto operandIdx : operandIndicesToFuse) {
-    auto fusionRes = fuseProducerOf(rewriter, tileRes->op, operandIdx, G);
-    if (!fusionRes) {
-      // Linalg fusion requires tiled loops to even determine whether it is
-      // possible to fuse. As a consequence, the pattern may fail even though a
-      // tiled version of op has already been introduced.
-      // So we need to remove the tiled version ourselves in case of failure.
-      // Another possibility is to ensure the constraints on the pattern
-      // guarantee that fusion will occur and just assert here. As we develop
-      // more complex patterns we can choose what is best.
-      rewriter.eraseOp(tileRes->loops[0]);
-      return failure();
-    }
-    fusionRes->fusedProducer.setAttr(LinalgTransforms::kLinalgTransformMarker,
-                                     rewriter.getStringAttr(linalgMarker));
-    originalProducers.push_back(fusionRes->originalProducer);
-  }
-
-  // The originalProducers can now be safely erased. This is similar to
-  // SSA-value use-def but in the world of buffer + structured ops.
-  for (auto *originalProducer : originalProducers)
-    rewriter.eraseOp(originalProducer);
-  return success();
-}
-
-bool mlir::linalg::detail::isProducedByOpOfTypeImpl(
-    Operation *consumerOp, Value *consumedView,
-    llvm::function_ref<bool(Operation *)> isaOpType) {
-  LinalgOp consumer = dyn_cast<LinalgOp>(consumerOp);
-  if (!consumer)
-    return false;
-
-  auto maybeConsumerIndex = consumer.getIndexOfInput(consumedView);
-  if (!maybeConsumerIndex)
-    return false;
-
-  Aliases aliases;
-  auto G = LinalgDependenceGraph::buildDependenceGraph(
-      aliases, consumer.getParentOfType<FuncOp>());
-  for (auto dependence : G.getDependencesInto(
-           consumer, LinalgDependenceGraph::DependenceType::RAW)) {
-    auto producer = cast<LinalgOp>(dependence.dependentOpView.op);
-    if (!isProducerLastWriteOfView(G, consumer, consumedView, producer))
-      continue;
-    if (isaOpType(dependence.dependentOpView.op))
-      return true;
-  }
-  return false;
-}
-
-static bool hasMultiplyAddBody(linalg::GenericOp op) {
-  auto &r = op.region();
-  if (r.empty())
-    return false;
-  if (r.getBlocks().size() != 1)
-    return false;
-  auto &ops = r.front().getOperations();
-  if (ops.size() != 3)
-    return false;
-
-  using mlir::matchers::m_Val;
-  auto a = m_Val(r.front().getArgument(0));
-  auto b = m_Val(r.front().getArgument(1));
-  auto c = m_Val(r.front().getArgument(2));
-  // TODO(ntv) Update this detection once we have  matcher support for
-  // specifying that any permutation of operands matches.
-  auto pattern1 = m_Op<YieldOp>(m_Op<AddFOp>(m_Op<MulFOp>(a, b), c));
-  auto pattern2 = m_Op<YieldOp>(m_Op<AddFOp>(c, m_Op<MulFOp>(a, b)));
-  auto pattern3 = m_Op<YieldOp>(m_Op<AddFOp>(m_Op<MulFOp>(b, a), c));
-  auto pattern4 = m_Op<YieldOp>(m_Op<AddFOp>(c, m_Op<MulFOp>(b, a)));
-  return pattern1.match(&ops.back()) || pattern2.match(&ops.back()) ||
-         pattern3.match(&ops.back()) || pattern4.match(&ops.back());
-}
-
-// TODO(ntv) should be Tablegen'd from a single source that generates the op
-// itself.
-static bool isMatmul(linalg::GenericOp genericOp) {
-  auto *ctx = genericOp.getContext();
-  auto m = getAffineDimExpr(0, ctx);
-  auto n = getAffineDimExpr(1, ctx);
-  auto k = getAffineDimExpr(2, ctx);
-  auto mapA = AffineMapAttr::get(AffineMap::get(3, 0, {m, k}));
-  auto mapB = AffineMapAttr::get(AffineMap::get(3, 0, {k, n}));
-  auto mapC = AffineMapAttr::get(AffineMap::get(3, 0, {m, n}));
-  auto maps = ArrayAttr::get({mapA, mapB, mapC}, ctx);
-  return genericOp.getNumInputs() == 2 && genericOp.getNumOutputs() == 1 &&
-         genericOp.indexing_maps() == maps && hasMultiplyAddBody(genericOp);
-}
-
-LogicalResult mlir::linalg::vectorizeGenericOp(PatternRewriter &rewriter,
-                                               Operation *op) {
-  LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE
-                       "]: Rewrite linalg op as vector.contract: "
-                    << *op << ":\n");
-
-  // TODO(ntv): This is in fact much more general than just vectorization for
-  // matmul ops.
-  auto genericOp = dyn_cast<linalg::GenericOp>(op);
-  if (!genericOp || !isMatmul(genericOp))
-    return failure();
-
-  // TODO(ntv): non-identity layout.
-  auto isStaticMemRefWithIdentityLayout = [](Value *v) {
-    auto m = v->getType().dyn_cast<MemRefType>();
-    if (!m || !m.hasStaticShape() || !m.getAffineMaps().empty())
-      return false;
-    return true;
-  };
-  if (!llvm::all_of(genericOp.getInputsAndOutputs(),
-                    isStaticMemRefWithIdentityLayout))
-    return failure();
-
-  edsc::ScopedContext scope(rewriter, op->getLoc());
-  using edsc::intrinsics::std_load;
-  using edsc::intrinsics::std_store;
-  using vector_contract = edsc::intrinsics::ValueBuilder<vector::ContractionOp>;
-  using vector_type_cast = edsc::intrinsics::ValueBuilder<vector::TypeCastOp>;
-  auto vA = std_load(vector_type_cast(genericOp.getInput(0)));
-  auto vB = std_load(vector_type_cast(genericOp.getInput(1)));
-  auto vectorMemRefC = vector_type_cast(genericOp.getOutput(0));
-  auto vC = std_load(vectorMemRefC);
-  auto vRes = vector_contract(vA, vB, vC, genericOp.indexing_maps(),
-                              genericOp.iterator_types());
-  std_store(vRes, vectorMemRefC);
-  return success();
-}
-
-LogicalResult
-mlir::linalg::permuteGenericLinalgOp(PatternRewriter &rewriter, Operation *op,
-                                     ArrayRef<unsigned> permutation,
-                                     StringRef linalgMarker) {
-  // If permutation is empty, there is nothing to be done.
-  if (permutation.empty())
-    return failure();
-
-  auto linOp = cast<LinalgOp>(op);
-  auto permutationMap = inversePermutation(
-      AffineMap::getPermutationMap(permutation, rewriter.getContext()));
-  SmallVector<AffineMap, 4> newIndexingMap;
-  auto indexingMaps = linOp.indexing_maps().getValue();
-  for (unsigned i = 0, e = linOp.getNumInputsAndOutputs(); i != e; ++i) {
-    AffineMap m = indexingMaps[i].cast<AffineMapAttr>().getValue().compose(
-        permutationMap);
-    newIndexingMap.push_back(m);
-  }
-  auto itTypes = linOp.iterator_types().getValue();
-  SmallVector<Attribute, 4> itTypesVector;
-  for (unsigned i = 0, e = itTypes.size(); i != e; ++i)
-    itTypesVector.push_back(itTypes[i]);
-  applyPermutationToVector(itTypesVector, permutation);
-  op->setAttr(getIndexingMapsAttrName(),
-              rewriter.getAffineMapArrayAttr(newIndexingMap));
-  op->setAttr(getIteratorTypesAttrName(), rewriter.getArrayAttr(itTypesVector));
-  op->setAttr(LinalgTransforms::kLinalgTransformMarker,
-              rewriter.getStringAttr(linalgMarker));
-  linOp.clone(rewriter, linOp.getLoc(), op->getOperands());
-  return success();
-}
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
deleted file mode 100644
index 32b70346b97..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//===- Promotion.cpp - Implementation of linalg Promotion -----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the linalg dialect Promotion pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Utils/Intrinsics.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/FoldUtils.h"
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::linalg;
-using namespace mlir::linalg::intrinsics;
-using namespace mlir::loop;
-
-using llvm::SetVector;
-
-#define DEBUG_TYPE "linalg-promotion"
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-static llvm::cl::opt<bool> clPromoteDynamic(
-    "test-linalg-promote-dynamic",
-    llvm::cl::desc("Test generation of dynamic promoted buffers"),
-    llvm::cl::cat(clOptionsCategory), llvm::cl::init(false));
-
-static Value *allocBuffer(Type elementType, Value *size, bool dynamicBuffers) {
-  auto *ctx = size->getContext();
-  auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8);
-  if (!dynamicBuffers)
-    if (auto cst = dyn_cast_or_null<ConstantIndexOp>(size->getDefiningOp()))
-      return alloc(
-          MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx)));
-  Value *mul = muli(constant_index(width), size);
-  return alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul);
-}
-
-// Performs promotion of a `subView` into a local buffer of the size of the
-// *ranges* of the `subView`. This produces a buffer whose size may be bigger
-// than the actual size of the `subView` at the boundaries.
-// This is related to the full/partial tile problem.
-// Returns a PromotionInfo containing a `buffer`, `fullLocalView` and
-// `partialLocalView` such that:
-//   * `buffer` is always the size of the full tile.
-//   * `fullLocalView` is a dense contiguous view into that buffer.
-//   * `partialLocalView` is a dense non-contiguous slice of `fullLocalView`
-//     that corresponds to the size of `subView` and accounting for boundary
-//     effects.
-// The point of the full tile buffer is that constant static tile sizes are
-// folded and result in a buffer type with statically known size and alignment
-// properties.
-// To account for general boundary effects, padding must be performed on the
-// boundary tiles. For now this is done with an unconditional `fill` op followed
-// by a partial `copy` op.
-static PromotionInfo promoteFullTileBuffer(OpBuilder &b, Location loc,
-                                           SubViewOp subView,
-                                           bool dynamicBuffers,
-                                           OperationFolder *folder) {
-  auto zero = constant_index(folder, 0);
-  auto one = constant_index(folder, 1);
-
-  auto viewType = subView.getType();
-  auto rank = viewType.getRank();
-  Value *allocSize = one;
-  SmallVector<Value *, 8> fullRanges, partialRanges;
-  fullRanges.reserve(rank);
-  partialRanges.reserve(rank);
-  for (auto en : llvm::enumerate(subView.getRanges())) {
-    auto rank = en.index();
-    auto rangeValue = en.value();
-    Value *d = rangeValue.size;
-    allocSize = muli(folder, allocSize, d).getValue();
-    fullRanges.push_back(d);
-    partialRanges.push_back(range(folder, zero, dim(subView, rank), one));
-  }
-  SmallVector<int64_t, 4> dynSizes(fullRanges.size(), -1);
-  auto *buffer =
-      allocBuffer(viewType.getElementType(), allocSize, dynamicBuffers);
-  auto fullLocalView = view(
-      MemRefType::get(dynSizes, viewType.getElementType()), buffer, fullRanges);
-  auto partialLocalView = slice(fullLocalView, partialRanges);
-  return PromotionInfo{buffer, fullLocalView, partialLocalView};
-}
-
-SmallVector<PromotionInfo, 8>
-mlir::linalg::promoteSubViews(OpBuilder &b, Location loc,
-                              ArrayRef<Value *> subViews, bool dynamicBuffers,
-                              OperationFolder *folder) {
-  if (subViews.empty())
-    return {};
-
-  ScopedContext scope(b, loc);
-  SmallVector<PromotionInfo, 8> res;
-  res.reserve(subViews.size());
-  DenseMap<Value *, PromotionInfo> promotionInfoMap;
-  for (auto *v : subViews) {
-    SubViewOp subView = cast<SubViewOp>(v->getDefiningOp());
-    auto viewType = subView.getType();
-    // TODO(ntv): support more cases than just float.
-    if (!viewType.getElementType().isa<FloatType>())
-      continue;
-    auto promotionInfo =
-        promoteFullTileBuffer(b, loc, subView, dynamicBuffers, folder);
-    promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo));
-    res.push_back(promotionInfo);
-  }
-
-  for (auto *v : subViews) {
-    SubViewOp subView = cast<SubViewOp>(v->getDefiningOp());
-    auto info = promotionInfoMap.find(v);
-    if (info == promotionInfoMap.end())
-      continue;
-    // TODO(ntv): value to fill with should be related to the operation.
-    // For now, just use APFloat(0.0f).
-    auto t = subView.getType().getElementType().cast<FloatType>();
-    Value *fillVal = constant_float(folder, APFloat(0.0f), t);
-    // TODO(ntv): fill is only necessary if `promotionInfo` has a full local
-    // view that is different from the partial local view and we are on the
-    // boundary.
-    fill(info->second.fullLocalView, fillVal);
-  }
-
-  for (auto *v : subViews) {
-    auto info = promotionInfoMap.find(v);
-    if (info == promotionInfoMap.end())
-      continue;
-    copy(cast<SubViewOp>(v->getDefiningOp()), info->second.partialLocalView);
-  }
-  return res;
-}
-
-static void promoteSubViewOperands(LinalgOp op, SetVector<Value *> subViews,
-                                   bool dynamicBuffers,
-                                   OperationFolder *folder) {
-  // 1. Promote the specified views and use them in the new op.
-  OpBuilder b(op);
-  ScopedContext scope(b, op.getLoc());
-  auto promotedBufferAndViews = promoteSubViews(
-      b, op.getLoc(), subViews.getArrayRef(), dynamicBuffers, folder);
-  SmallVector<Value *, 8> opViews;
-  opViews.reserve(op.getNumInputsAndOutputs());
-  SmallVector<std::pair<Value *, Value *>, 8> writebackViews;
-  writebackViews.reserve(subViews.size());
-  unsigned promotedIdx = 0;
-  for (auto *view : op.getInputsAndOutputs()) {
-    if (subViews.count(view) != 0) {
-      opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView);
-      writebackViews.emplace_back(std::make_pair(
-          view, promotedBufferAndViews[promotedIdx].partialLocalView));
-      promotedIdx++;
-    } else {
-      opViews.push_back(view);
-    }
-  }
-
-  // 2. Append all other operands as they appear, this enforces that such
-  // operands are not views. This is to support cases such as FillOp taking
-  // extra scalars etc.
-  auto operands = getAssumedNonViewOperands(op);
-  opViews.append(operands.begin(), operands.end());
-  op.clone(b, op.getLoc(), opViews);
-
-  // 3. Emit write-back for the promoted output views: copy the partial view.
-  for (auto viewAndPartialLocalView : writebackViews) {
-    // Note: use the old op to determine whether the operand view is an output.
-    bool isOutput =
-        op.getIndexOfOutput(viewAndPartialLocalView.first).hasValue();
-    if (isOutput)
-      copy(viewAndPartialLocalView.second, viewAndPartialLocalView.first);
-  }
-
-  // 4. Dealloc local buffers.
-  for (const auto &pi : promotedBufferAndViews)
-    dealloc(pi.buffer);
-}
-
-static void promoteSubViews(FuncOp f, bool dynamicBuffers) {
-  SmallVector<LinalgOp, 8> toErase;
-  OperationFolder folder(f.getContext());
-  f.walk([dynamicBuffers, &folder, &toErase](LinalgOp op) {
-    // TODO(ntv) some heuristic here to decide what to promote. Atm it is all or
-    // nothing.
-    SetVector<Value *> subViews;
-    for (auto it : op.getInputsAndOutputs())
-      if (auto sv = dyn_cast_or_null<SubViewOp>(it->getDefiningOp()))
-        subViews.insert(sv);
-    if (!subViews.empty()) {
-      promoteSubViewOperands(op, subViews, dynamicBuffers, &folder);
-      toErase.push_back(op);
-    }
-  });
-  for (auto op : toErase)
-    op.erase();
-}
-
-namespace {
-struct LinalgPromotionPass : public FunctionPass<LinalgPromotionPass> {
-  LinalgPromotionPass() = default;
-  LinalgPromotionPass(bool dynamicBuffers) : dynamicBuffers(dynamicBuffers) {}
-
-  void runOnFunction() override {
-    promoteSubViews(getFunction(), dynamicBuffers);
-  }
-
-  bool dynamicBuffers;
-};
-} // namespace
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::linalg::createLinalgPromotionPass(bool dynamicBuffers) {
-  return std::make_unique<LinalgPromotionPass>(dynamicBuffers);
-}
-
-static PassRegistration<LinalgPromotionPass>
-    pass("linalg-promote-subviews", "promote subview ops to local buffers", [] {
-      return std::make_unique<LinalgPromotionPass>(clPromoteDynamic);
-    });
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
deleted file mode 100644
index 435aa7245ba..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ /dev/null
@@ -1,470 +0,0 @@
-//===- Tiling.cpp - Implementation of linalg Tiling -----------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the linalg dialect Tiling pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Utils/Intrinsics.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/FoldUtils.h"
-
-#include "llvm/Support/CommandLine.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::linalg;
-using namespace mlir::linalg::intrinsics;
-using namespace mlir::loop;
-
-#define DEBUG_TYPE "linalg-tiling"
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-static llvm::cl::list<unsigned>
-    clTileSizes("linalg-tile-sizes",
-                llvm::cl::desc("Tile sizes by which to tile linalg operations"),
-                llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
-                llvm::cl::cat(clOptionsCategory));
-
-static bool isZero(Value *v) {
-  return isa_and_nonnull<ConstantIndexOp>(v->getDefiningOp()) &&
-         cast<ConstantIndexOp>(v->getDefiningOp()).getValue() == 0;
-}
-
-using LoopIndexToRangeIndexMap = DenseMap<int, int>;
-
-// Creates a number of ranges equal to the number of non-zero in `tileSizes`.
-// One for each loop of the LinalgOp that is tiled. The `tileSizes` argument has
-// one entry per surrounding loop. It uses zero as the convention that a
-// particular loop is not tiled. This convention simplifies implementations by
-// avoiding affine map manipulations.
-// The returned ranges correspond to the loop ranges, in the proper order, that
-// are tiled and for which new loops will be created. Also the function returns
-// a map from loop indices of the LinalgOp to the corresponding non-empty range
-// indices of newly created loops.
-static std::tuple<SmallVector<SubViewOp::Range, 4>, LoopIndexToRangeIndexMap>
-makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,
-                    ArrayRef<Value *> allViewSizes,
-                    ArrayRef<Value *> allTileSizes, OperationFolder *folder) {
-  assert(allTileSizes.size() == map.getNumResults());
-  // Apply `map` to get view sizes in loop order.
-  auto viewSizes = applyMapToValues(b, loc, map, allViewSizes, folder);
-  SmallVector<Value *, 4> tileSizes(allTileSizes.begin(), allTileSizes.end());
-
-  // Traverse the tile sizes, which are in loop order, erase zeros everywhere.
-  LoopIndexToRangeIndexMap loopIndexToRangeIndex;
-  for (int idx = 0, e = tileSizes.size(), zerosCount = 0; idx < e; ++idx) {
-    if (isZero(tileSizes[idx - zerosCount])) {
-      viewSizes.erase(viewSizes.begin() + idx - zerosCount);
-      tileSizes.erase(tileSizes.begin() + idx - zerosCount);
-      ++zerosCount;
-      continue;
-    }
-    loopIndexToRangeIndex[idx] = idx - zerosCount;
-  }
-
-  // Create a new range with the applied tile sizes.
-  SmallVector<SubViewOp::Range, 4> res;
-  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
-    res.push_back(SubViewOp::Range{constant_index(folder, 0), viewSizes[idx],
-                                   tileSizes[idx]});
-  }
-  return std::make_tuple(res, loopIndexToRangeIndex);
-}
-
-namespace {
-
-// Helper visitor to determine whether an AffineExpr is tiled.
-// This is achieved by traversing every AffineDimExpr with position `pos` and
-// checking whether the corresponding `tileSizes[pos]` is non-zero.
-// This also enforces only positive coefficients occur in multiplications.
-//
-// Example:
-//   `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
-//
-struct TileCheck : public AffineExprVisitor<TileCheck> {
-  TileCheck(ArrayRef<Value *> tileSizes)
-      : isTiled(false), tileSizes(tileSizes) {}
-
-  void visitDimExpr(AffineDimExpr expr) {
-    isTiled |= !isZero(tileSizes[expr.getPosition()]);
-  }
-  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
-    visit(expr.getLHS());
-    visit(expr.getRHS());
-    if (expr.getKind() == mlir::AffineExprKind::Mul)
-      assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 &&
-             "nonpositive multiplying coefficient");
-  }
-  bool isTiled;
-  ArrayRef<Value *> tileSizes;
-};
-
-} // namespace
-
-// IndexedGenericOp explicitly uses induction variables in the loop body. The
-// values of the indices that are used in the loop body for any given access of
-// input/output memref before `subview` op was applied should be invariant with
-// respect to tiling.
-//
-// Therefore, if the operation is tiled, we have to transform the indices
-// accordingly, i.e. offset them by the values of the corresponding induction
-// variables that are captured implicitly in the body of the op.
-//
-// Example. `linalg.indexed_generic` before tiling:
-//
-// #id_2d = (i, j) -> (i, j)
-// #pointwise_2d_trait = {
-//   indexing_maps = [#id_2d, #id_2d],
-//   iterator_types = ["parallel", "parallel"],
-//   n_views = [1, 1]
-// }
-// linalg.indexed_generic #pointwise_2d_trait %operand, %result {
-//   ^bb0(%i: index, %j: index, %operand_in: f32, %result_in: f32):
-//     <some operations that use %i, %j>
-// }: memref<50x100xf32>, memref<50x100xf32>
-//
-// After tiling pass with tiles sizes 10 and 25:
-//
-// #strided = (i, j)[s0, s1, s2] -> (i * s1 + s0 + j * s2)
-//
-// %c1 = constant 1 : index
-// %c0 = constant 0 : index
-// %c25 = constant 25 : index
-// %c10 = constant 10 : index
-// operand_dim_0 = dim %operand, 0 : memref<50x100xf32>
-// operand_dim_1 = dim %operand, 1 : memref<50x100xf32>
-// loop.for %k = %c0 to operand_dim_0 step %c10 {
-//   loop.for %l = %c0 to operand_dim_1 step %c25 {
-//     %4 = std.subview %operand[%k, %l][%c10, %c25][%c1, %c1]
-//       : memref<50x100xf32> to memref<?x?xf32, #strided>
-//     %5 = std.subview %result[%k, %l][%c10, %c25][%c1, %c1]
-//       : memref<50x100xf32> to memref<?x?xf32, #strided>
-//     linalg.indexed_generic pointwise_2d_trait %4, %5 {
-//     ^bb0(%i: index, %j: index, %operand_in: f32, %result_in: f32):
-//       // Indices `k` and `l` are implicitly captured in the body.
-//       %transformed_i = addi %i, %k : index // index `i` is offset by %k
-//       %transformed_j = addi %j, %l : index // index `j` is offset by %l
-//       // Every use of %i, %j is replaced with %transformed_i, %transformed_j
-//       <some operations that use %transformed_i, %transformed_j>
-//     }: memref<?x?xf32, #strided>, memref<?x?xf32, #strided>
-//   }
-// }
-//
-// TODO(pifon, ntv): Investigate whether mixing implicit and explicit indices
-// does not lead to losing information.
-void transformIndexedGenericOpIndices(
-    OpBuilder &b, LinalgOp op, ArrayRef<ValueHandle *> pivs,
-    const LoopIndexToRangeIndexMap &loopIndexToRangeIndex) {
-  auto indexedGenericOp = dyn_cast<IndexedGenericOp>(op.getOperation());
-  if (!indexedGenericOp)
-    return;
-
-  // `linalg.indexed_generic` comes in two flavours. One has a region with a
-  // single block that defines the loop body. The other has a `fun` attribute
-  // that refers to an existing function symbol. The `fun` function call will be
-  // inserted in the loop body in that case.
-  //
-  // TODO(pifon): Add support for `linalg.indexed_generic` with `fun` attribute.
-  auto &region = indexedGenericOp.region();
-  if (region.empty()) {
-    indexedGenericOp.emitError("op expected a region");
-    return;
-  }
-  auto &block = region.getBlocks().front();
-
-  OpBuilder::InsertionGuard g(b);
-  b.setInsertionPointToStart(&block);
-  for (unsigned i = 0; i < indexedGenericOp.getNumLoops(); ++i) {
-    auto rangeIndex = loopIndexToRangeIndex.find(i);
-    if (rangeIndex == loopIndexToRangeIndex.end())
-      continue;
-    Value *oldIndex = block.getArgument(i);
-    // Offset the index argument `i` by the value of the corresponding induction
-    // variable and replace all uses of the previous value.
-    Value *newIndex = b.create<AddIOp>(indexedGenericOp.getLoc(), oldIndex,
-                                       pivs[rangeIndex->second]->getValue());
-    for (auto &use : oldIndex->getUses()) {
-      if (use.getOwner() == newIndex->getDefiningOp())
-        continue;
-      use.set(newIndex);
-    }
-  }
-}
-
-static bool isTiled(AffineExpr expr, ArrayRef<Value *> tileSizes) {
-  if (!expr)
-    return false;
-  TileCheck t(tileSizes);
-  t.visit(expr);
-  return t.isTiled;
-}
-
-// Checks whether the view with index `viewIndex` within `linalgOp` varies with
-// respect to a non-zero `tileSize`.
-static bool isTiled(AffineMap map, ArrayRef<Value *> tileSizes) {
-  if (!map)
-    return false;
-  for (unsigned r = 0; r < map.getNumResults(); ++r)
-    if (isTiled(map.getResult(r), tileSizes))
-      return true;
-  return false;
-}
-
-static SmallVector<Value *, 4>
-makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp,
-               ArrayRef<Value *> ivs, ArrayRef<Value *> tileSizes,
-               ArrayRef<Value *> viewSizes, OperationFolder *folder) {
-  assert(ivs.size() == static_cast<size_t>(llvm::count_if(
-                           llvm::make_range(tileSizes.begin(), tileSizes.end()),
-                           [](Value *v) { return !isZero(v); })) &&
-         "expected as many ivs as non-zero sizes");
-
-  using edsc::intrinsics::select;
-  using edsc::op::operator+;
-  using edsc::op::operator<;
-
-  // Construct (potentially temporary) mins and maxes on which to apply maps
-  // that define tile subviews.
-  SmallVector<Value *, 8> lbs, subViewSizes;
-  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
-    bool isTiled = !isZero(tileSizes[idx]);
-    lbs.push_back(isTiled ? ivs[idxIvs++] : (Value *)constant_index(folder, 0));
-    subViewSizes.push_back(isTiled ? tileSizes[idx] : viewSizes[idx]);
-  }
-
-  auto *op = linalgOp.getOperation();
-
-  SmallVector<Value *, 4> res;
-  res.reserve(op->getNumOperands());
-  auto viewIteratorBegin = linalgOp.getInputsAndOutputs().begin();
-  for (unsigned viewIndex = 0; viewIndex < linalgOp.getNumInputsAndOutputs();
-       ++viewIndex) {
-    Value *view = *(viewIteratorBegin + viewIndex);
-    unsigned rank = view->getType().cast<MemRefType>().getRank();
-    auto map = loopToOperandRangesMaps(linalgOp)[viewIndex];
-    // If the view is not tiled, we can use it as is.
-    if (!isTiled(map, tileSizes)) {
-      res.push_back(view);
-      continue;
-    }
-
-    // Construct a new subview for the tile.
-    SmallVector<Value *, 4> offsets, sizes, strides;
-    offsets.reserve(rank);
-    sizes.reserve(rank);
-    strides.reserve(rank);
-    for (unsigned r = 0; r < rank; ++r) {
-      if (!isTiled(map.getSubMap({r}), tileSizes)) {
-        offsets.push_back(constant_index(folder, 0));
-        sizes.push_back(dim(view, r));
-        strides.push_back(constant_index(folder, 1));
-        continue;
-      }
-
-      // Tiling creates a new slice at the proper index, the slice step is 1
-      // (i.e. the slice view does not subsample, stepping occurs in the loop).
-      auto m = map.getSubMap({r});
-      auto *offset = applyMapToValues(b, loc, m, lbs, folder).front();
-      offsets.push_back(offset);
-      auto *size = applyMapToValues(b, loc, m, subViewSizes, folder).front();
-      sizes.push_back(size);
-      strides.push_back(constant_index(folder, 1));
-    }
-    // TODO(b/144419024) Atm std.subview is not guaranteed in-bounds. Depending
-    // on the semantics we attach to it, we may need to use min(size, dim) here
-    // and canonicalize later.
-    res.push_back(b.create<SubViewOp>(loc, view, offsets, sizes, strides));
-  }
-
-  // Traverse the mins/maxes and erase those that don't have uses left.
-  // This is a special type of folding that we only apply when `folder` is
-  // defined.
-  if (folder)
-    for (auto *v : llvm::concat<Value *>(lbs, subViewSizes))
-      if (v->use_empty())
-        v->getDefiningOp()->erase();
-
-  return res;
-}
-
-llvm::Optional<TiledLinalgOp> mlir::linalg::tileLinalgOp(
-    OpBuilder &b, LinalgOp op, ArrayRef<Value *> tileSizes,
-    ArrayRef<unsigned> permutation, OperationFolder *folder) {
-  // 1. Enforce the convention that "tiling by zero" skips tiling a particular
-  // dimension. This convention is significantly simpler to handle instead of
-  // adjusting affine maps to account for missing dimensions.
-  assert(op.getNumParallelLoops() + op.getNumReductionLoops() +
-                 op.getNumWindowLoops() ==
-             tileSizes.size() &&
-         "expected matching number of tile sizes and loops");
-
-  // If permutation is empty, use the identity. Build the permutation map
-  // otherwise.
-  auto invPermutationMap = AffineMap::getMultiDimIdentityMap(
-      tileSizes.size(), ScopedContext::getContext());
-  if (!permutation.empty())
-    invPermutationMap = inversePermutation(
-        AffineMap::getPermutationMap(permutation, ScopedContext::getContext()));
-
-  OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(op);
-  ScopedContext scope(b, op.getLoc());
-  // 2. Build the tiled loop ranges.
-  auto viewSizes = getViewSizes(op);
-  // The flattened loopToOperandRangesMaps is expected to be an invertible
-  // permutation map (asserted in the inverse calculation).
-  auto viewSizesToLoopsMap =
-      inversePermutation(concatAffineMaps(loopToOperandRangesMaps(op)));
-  assert(viewSizesToLoopsMap && "expected invertible map");
-
-  SmallVector<SubViewOp::Range, 4> loopRanges;
-  LoopIndexToRangeIndexMap loopIndexToRangeIndex;
-  std::tie(loopRanges, loopIndexToRangeIndex) =
-      makeTiledLoopRanges(b, scope.getLocation(), viewSizesToLoopsMap,
-                          viewSizes, tileSizes, folder);
-  if (!permutation.empty())
-    applyPermutationToVector(loopRanges, permutation);
-
-  // 3. Create the tiled loops.
-  LinalgOp res = op;
-  SmallVector<IndexHandle, 4> ivs(loopRanges.size());
-  auto pivs = makeHandlePointers(MutableArrayRef<IndexHandle>(ivs));
-  LoopNestRangeBuilder(pivs, loopRanges)([&] {
-    auto b = ScopedContext::getBuilder();
-    auto loc = ScopedContext::getLocation();
-    SmallVector<Value *, 4> ivValues(ivs.begin(), ivs.end());
-
-    // If we have to apply a permutation to the tiled loop nest, we have to
-    // reorder the induction variables This permutation is the right one
-    // assuming that loopRanges have previously been permuted by
-    // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation of
-    // that one: (d0,d1,d2)->(d2,d0,d1)
-    if (!permutation.empty())
-      ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues, folder);
-
-    auto views =
-        makeTiledViews(b, loc, op, ivValues, tileSizes, viewSizes, folder);
-    auto operands = getAssumedNonViewOperands(op);
-    views.append(operands.begin(), operands.end());
-    res = op.clone(b, loc, views);
-  });
-
-  // 4. Transforms index arguments of `linalg.generic` w.r.t. to the tiling.
-  transformIndexedGenericOpIndices(b, res, pivs, loopIndexToRangeIndex);
-
-  // 5. Gather the newly created loops and return them with the new op.
-  SmallVector<ForOp, 8> loops;
-  loops.reserve(ivs.size());
-  for (auto iv : ivs)
-    loops.push_back(loop::getForInductionVarOwner(iv));
-
-  return TiledLinalgOp{res, loops};
-}
-
-llvm::Optional<TiledLinalgOp> mlir::linalg::tileLinalgOp(
-    OpBuilder &b, LinalgOp op, ArrayRef<int64_t> tileSizes,
-    ArrayRef<unsigned> permutation, OperationFolder *folder) {
-  if (tileSizes.empty())
-    return llvm::None;
-
-  // The following uses the convention that "tiling by zero" skips tiling a
-  // particular dimension. This convention is significantly simpler to handle
-  // instead of adjusting affine maps to account for missing dimensions.
-  auto nLoops = op.getNumParallelLoops() + op.getNumReductionLoops() +
-                op.getNumWindowLoops();
-  tileSizes = tileSizes.take_front(nLoops);
-  // If only 0 tilings are left, then return.
-  if (llvm::all_of(tileSizes, [](int64_t v) { return v == 0; }))
-    return llvm::None;
-
-  // Create a builder for tile size constants.
-  OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(op);
-  ScopedContext scope(b, op.getLoc());
-
-  // Materialize concrete tile size values to pass the generic tiling function.
-  SmallVector<Value *, 8> tileSizeValues;
-  tileSizeValues.reserve(tileSizes.size());
-  for (auto ts : tileSizes)
-    tileSizeValues.push_back(constant_index(folder, ts));
-  // Pad tile sizes with zero values to enforce our convention.
-  if (tileSizeValues.size() < nLoops) {
-    for (unsigned i = tileSizeValues.size(); i < nLoops; ++i)
-      tileSizeValues.push_back(constant_index(folder, 0));
-  }
-
-  return tileLinalgOp(b, op, tileSizeValues, permutation, folder);
-}
-
-static void tileLinalgOps(FuncOp f, ArrayRef<int64_t> tileSizes) {
-  OpBuilder b(f);
-  OperationFolder folder(f.getContext());
-  f.walk([tileSizes, &b, &folder](LinalgOp op) {
-    auto opLoopsPair =
-        tileLinalgOp(b, op, tileSizes, /*permutation=*/{}, &folder);
-    // If tiling occurred successfully, erase old op.
-    if (opLoopsPair)
-      op.erase();
-  });
-  f.walk([](LinalgOp op) {
-    if (!op.getOperation()->hasNoSideEffect())
-      return;
-    if (op.getOperation()->use_empty())
-      op.erase();
-  });
-}
-
-namespace {
-struct LinalgTilingPass : public FunctionPass<LinalgTilingPass> {
-  LinalgTilingPass() = default;
-  LinalgTilingPass(ArrayRef<int64_t> sizes);
-
-  void runOnFunction() override { tileLinalgOps(getFunction(), tileSizes); }
-
-  SmallVector<int64_t, 8> tileSizes;
-};
-} // namespace
-
-LinalgTilingPass::LinalgTilingPass(ArrayRef<int64_t> sizes) {
-  this->tileSizes.assign(sizes.begin(), sizes.end());
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::linalg::createLinalgTilingPass(ArrayRef<int64_t> tileSizes) {
-  return std::make_unique<LinalgTilingPass>(tileSizes);
-}
-
-static PassRegistration<LinalgTilingPass>
-    pass("linalg-tile", "Tile operations in the linalg dialect", [] {
-      auto pass = std::make_unique<LinalgTilingPass>();
-      pass->tileSizes.assign(clTileSizes.begin(), clTileSizes.end());
-      return pass;
-    });
diff --git a/third_party/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/third_party/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
deleted file mode 100644
index eb501f9b5b5..00000000000
--- a/third_party/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements utilities for the Linalg dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Utils/Intrinsics.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/FoldUtils.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using namespace mlir::linalg;
-using namespace mlir::linalg::intrinsics;
-using namespace mlir::loop;
-
-mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(ValueHandle *iv,
-                                               ValueHandle range) {
-  assert(range.getType() && "expected !linalg.range type");
-  assert(range.getValue()->getDefiningOp() &&
-         "need operations to extract range parts");
-  auto rangeOp = cast<RangeOp>(range.getValue()->getDefiningOp());
-  auto lb = rangeOp.min();
-  auto ub = rangeOp.max();
-  auto step = rangeOp.step();
-  auto forOp = OperationHandle::createOp<ForOp>(lb, ub, step);
-  *iv = ValueHandle(forOp.getInductionVar());
-  auto *body = forOp.getBody();
-  enter(body, /*prev=*/1);
-}
-
-mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(ValueHandle *iv,
-                                               SubViewOp::Range range) {
-  auto forOp =
-      OperationHandle::createOp<ForOp>(range.offset, range.size, range.stride);
-  *iv = ValueHandle(forOp.getInductionVar());
-  auto *body = forOp.getBody();
-  enter(body, /*prev=*/1);
-}
-
-ValueHandle
-mlir::edsc::LoopRangeBuilder::operator()(std::function<void(void)> fun) {
-  if (fun)
-    fun();
-  exit();
-  return ValueHandle::null();
-}
-
-mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder(
-    ArrayRef<ValueHandle *> ivs, ArrayRef<SubViewOp::Range> ranges) {
-  loops.reserve(ranges.size());
-  for (unsigned i = 0, e = ranges.size(); i < e; ++i) {
-    loops.emplace_back(ivs[i], ranges[i]);
-  }
-  assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size");
-}
-
-mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder(
-    ArrayRef<ValueHandle *> ivs, ArrayRef<ValueHandle> ranges) {
-  loops.reserve(ranges.size());
-  for (unsigned i = 0, e = ranges.size(); i < e; ++i) {
-    loops.emplace_back(ivs[i], ranges[i]);
-  }
-  assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size");
-}
-
-mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder(
-    ArrayRef<ValueHandle *> ivs, ArrayRef<Value *> ranges)
-    : LoopNestRangeBuilder(
-          ivs, SmallVector<ValueHandle, 4>(ranges.begin(), ranges.end())) {}
-
-ValueHandle LoopNestRangeBuilder::LoopNestRangeBuilder::operator()(
-    std::function<void(void)> fun) {
-  if (fun)
-    fun();
-  for (auto &lit : reverse(loops)) {
-    lit({});
-  }
-  return ValueHandle::null();
-}
-
-static Value *emitOrFoldComposedAffineApply(OpBuilder &b, Location loc,
-                                            AffineMap map,
-                                            ArrayRef<Value *> operandsRef,
-                                            OperationFolder *folder) {
-  SmallVector<Value *, 4> operands(operandsRef.begin(), operandsRef.end());
-  fullyComposeAffineMapAndOperands(&map, &operands);
-  canonicalizeMapAndOperands(&map, &operands);
-  return folder ? folder->create<AffineApplyOp>(b, loc, map, operands)
-                : b.create<AffineApplyOp>(loc, map, operands);
-}
-
-SmallVector<Value *, 4>
-mlir::linalg::applyMapToValues(OpBuilder &b, Location loc, AffineMap map,
-                               ArrayRef<Value *> values,
-                               OperationFolder *folder) {
-  SmallVector<Value *, 4> res;
-  res.reserve(map.getNumResults());
-  unsigned numDims = map.getNumDims();
-  // For each `expr` in `map`, applies the `expr` to the values extracted from
-  // ranges. If the resulting application can be folded into a Value*, the
-  // folding occurs eagerly. Otherwise, an affine.apply operation is emitted.
-  for (auto expr : map.getResults()) {
-    AffineMap map = AffineMap::get(numDims, 0, expr);
-    res.push_back(emitOrFoldComposedAffineApply(b, loc, map, values, folder));
-  }
-  return res;
-}
-
-/// Returns all the operands of `linalgOp` that are not views.
-/// Asserts that these operands are value types to allow transformations like
-/// tiling to just use the values when cloning `linalgOp`.
-SmallVector<Value *, 4>
-mlir::linalg::getAssumedNonViewOperands(LinalgOp linalgOp) {
-  auto *op = linalgOp.getOperation();
-  unsigned numViews = linalgOp.getNumInputsAndOutputs();
-  unsigned nOperands = op->getNumOperands() - numViews;
-  SmallVector<Value *, 4> res;
-  res.reserve(nOperands);
-  for (unsigned i = 0; i < nOperands; ++i) {
-    res.push_back(op->getOperand(numViews + i));
-    auto t = res.back()->getType();
-    (void)t;
-    assert((t.isIntOrIndexOrFloat() || t.isa<VectorType>()) &&
-           "expected scalar or vector type");
-  }
-  return res;
-}
diff --git a/third_party/mlir/lib/Dialect/LoopOps/CMakeLists.txt b/third_party/mlir/lib/Dialect/LoopOps/CMakeLists.txt
deleted file mode 100644
index 995e7444290..00000000000
--- a/third_party/mlir/lib/Dialect/LoopOps/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-file(GLOB globbed *.c *.cpp)
-add_llvm_library(MLIRLoopOps
-  ${globbed}
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/LoopOps
-  )
-add_dependencies(MLIRLoopOps MLIRLoopLikeInterfaceIncGen MLIRLoopOpsIncGen MLIRStandardOps LLVMSupport)
-target_link_libraries(MLIRLoopOps LLVMSupport)
diff --git a/third_party/mlir/lib/Dialect/LoopOps/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/LoopOps/DialectRegistration.cpp
deleted file mode 100644
index 5724402e690..00000000000
--- a/third_party/mlir/lib/Dialect/LoopOps/DialectRegistration.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===- DialectRegistration.cpp - Register loop dialect --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-using namespace mlir;
-
-// Static initialization for loop dialect registration.
-static DialectRegistration<loop::LoopOpsDialect> LoopOps;
diff --git a/third_party/mlir/lib/Dialect/LoopOps/LoopOps.cpp b/third_party/mlir/lib/Dialect/LoopOps/LoopOps.cpp
deleted file mode 100644
index fc8832e9a46..00000000000
--- a/third_party/mlir/lib/Dialect/LoopOps/LoopOps.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-//===- Ops.cpp - Loop MLIR Operations -------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Value.h"
-#include "mlir/Support/MathExtras.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/SideEffectsInterface.h"
-
-using namespace mlir;
-using namespace mlir::loop;
-
-//===----------------------------------------------------------------------===//
-// LoopOpsDialect Interfaces
-//===----------------------------------------------------------------------===//
-namespace {
-
-struct LoopSideEffectsInterface : public SideEffectsDialectInterface {
-  using SideEffectsDialectInterface::SideEffectsDialectInterface;
-
-  SideEffecting isSideEffecting(Operation *op) const override {
-    if (isa<IfOp>(op) || isa<ForOp>(op)) {
-      return Recursive;
-    }
-    return SideEffectsDialectInterface::isSideEffecting(op);
-  };
-};
-
-} // namespace
-
-//===----------------------------------------------------------------------===//
-// LoopOpsDialect
-//===----------------------------------------------------------------------===//
-
-LoopOpsDialect::LoopOpsDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/LoopOps/LoopOps.cpp.inc"
-      >();
-  addInterfaces<LoopSideEffectsInterface>();
-}
-
-//===----------------------------------------------------------------------===//
-// ForOp
-//===----------------------------------------------------------------------===//
-
-void ForOp::build(Builder *builder, OperationState &result, Value *lb,
-                  Value *ub, Value *step) {
-  result.addOperands({lb, ub, step});
-  Region *bodyRegion = result.addRegion();
-  ForOp::ensureTerminator(*bodyRegion, *builder, result.location);
-  bodyRegion->front().addArgument(builder->getIndexType());
-}
-
-LogicalResult verify(ForOp op) {
-  if (auto cst = dyn_cast_or_null<ConstantIndexOp>(op.step()->getDefiningOp()))
-    if (cst.getValue() <= 0)
-      return op.emitOpError("constant step operand must be positive");
-
-  // Check that the body defines as single block argument for the induction
-  // variable.
-  auto *body = op.getBody();
-  if (body->getNumArguments() != 1 ||
-      !body->getArgument(0)->getType().isIndex())
-    return op.emitOpError("expected body to have a single index argument for "
-                          "the induction variable");
-  return success();
-}
-
-static void print(OpAsmPrinter &p, ForOp op) {
-  p << op.getOperationName() << " " << *op.getInductionVar() << " = "
-    << *op.lowerBound() << " to " << *op.upperBound() << " step " << *op.step();
-  p.printRegion(op.region(),
-                /*printEntryBlockArgs=*/false,
-                /*printBlockTerminators=*/false);
-  p.printOptionalAttrDict(op.getAttrs());
-}
-
-static ParseResult parseForOp(OpAsmParser &parser, OperationState &result) {
-  auto &builder = parser.getBuilder();
-  OpAsmParser::OperandType inductionVariable, lb, ub, step;
-  // Parse the induction variable followed by '='.
-  if (parser.parseRegionArgument(inductionVariable) || parser.parseEqual())
-    return failure();
-
-  // Parse loop bounds.
-  Type indexType = builder.getIndexType();
-  if (parser.parseOperand(lb) ||
-      parser.resolveOperand(lb, indexType, result.operands) ||
-      parser.parseKeyword("to") || parser.parseOperand(ub) ||
-      parser.resolveOperand(ub, indexType, result.operands) ||
-      parser.parseKeyword("step") || parser.parseOperand(step) ||
-      parser.resolveOperand(step, indexType, result.operands))
-    return failure();
-
-  // Parse the body region.
-  Region *body = result.addRegion();
-  if (parser.parseRegion(*body, inductionVariable, indexType))
-    return failure();
-
-  ForOp::ensureTerminator(*body, builder, result.location);
-
-  // Parse the optional attribute list.
-  if (parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-
-  return success();
-}
-
-Region &ForOp::getLoopBody() { return region(); }
-
-bool ForOp::isDefinedOutsideOfLoop(Value *value) {
-  return !region().isAncestor(value->getParentRegion());
-}
-
-LogicalResult ForOp::moveOutOfLoop(ArrayRef<Operation *> ops) {
-  for (auto *op : ops)
-    op->moveBefore(this->getOperation());
-  return success();
-}
-
-ForOp mlir::loop::getForInductionVarOwner(Value *val) {
-  auto *ivArg = dyn_cast<BlockArgument>(val);
-  if (!ivArg)
-    return ForOp();
-  assert(ivArg->getOwner() && "unlinked block argument");
-  auto *containingInst = ivArg->getOwner()->getParentOp();
-  return dyn_cast_or_null<ForOp>(containingInst);
-}
-
-//===----------------------------------------------------------------------===//
-// IfOp
-//===----------------------------------------------------------------------===//
-
-void IfOp::build(Builder *builder, OperationState &result, Value *cond,
-                 bool withElseRegion) {
-  result.addOperands(cond);
-  Region *thenRegion = result.addRegion();
-  Region *elseRegion = result.addRegion();
-  IfOp::ensureTerminator(*thenRegion, *builder, result.location);
-  if (withElseRegion)
-    IfOp::ensureTerminator(*elseRegion, *builder, result.location);
-}
-
-static LogicalResult verify(IfOp op) {
-  // Verify that the entry of each child region does not have arguments.
-  for (auto &region : op.getOperation()->getRegions()) {
-    if (region.empty())
-      continue;
-
-    for (auto &b : region)
-      if (b.getNumArguments() != 0)
-        return op.emitOpError(
-            "requires that child entry blocks have no arguments");
-  }
-  return success();
-}
-
-static ParseResult parseIfOp(OpAsmParser &parser, OperationState &result) {
-  // Create the regions for 'then'.
-  result.regions.reserve(2);
-  Region *thenRegion = result.addRegion();
-  Region *elseRegion = result.addRegion();
-
-  auto &builder = parser.getBuilder();
-  OpAsmParser::OperandType cond;
-  Type i1Type = builder.getIntegerType(1);
-  if (parser.parseOperand(cond) ||
-      parser.resolveOperand(cond, i1Type, result.operands))
-    return failure();
-
-  // Parse the 'then' region.
-  if (parser.parseRegion(*thenRegion, {}, {}))
-    return failure();
-  IfOp::ensureTerminator(*thenRegion, parser.getBuilder(), result.location);
-
-  // If we find an 'else' keyword then parse the 'else' region.
-  if (!parser.parseOptionalKeyword("else")) {
-    if (parser.parseRegion(*elseRegion, {}, {}))
-      return failure();
-    IfOp::ensureTerminator(*elseRegion, parser.getBuilder(), result.location);
-  }
-
-  // Parse the optional attribute list.
-  if (parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-
-  return success();
-}
-
-static void print(OpAsmPrinter &p, IfOp op) {
-  p << IfOp::getOperationName() << " " << *op.condition();
-  p.printRegion(op.thenRegion(),
-                /*printEntryBlockArgs=*/false,
-                /*printBlockTerminators=*/false);
-
-  // Print the 'else' regions if it exists and has a block.
-  auto &elseRegion = op.elseRegion();
-  if (!elseRegion.empty()) {
-    p << " else";
-    p.printRegion(elseRegion,
-                  /*printEntryBlockArgs=*/false,
-                  /*printBlockTerminators=*/false);
-  }
-
-  p.printOptionalAttrDict(op.getAttrs());
-}
-
-//===----------------------------------------------------------------------===//
-// TableGen'd op method definitions
-//===----------------------------------------------------------------------===//
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/LoopOps/LoopOps.cpp.inc"
diff --git a/third_party/mlir/lib/Dialect/QuantOps/CMakeLists.txt b/third_party/mlir/lib/Dialect/QuantOps/CMakeLists.txt
deleted file mode 100644
index 74b3f3c4525..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-add_llvm_library(MLIRQuantOps
-  IR/DialectRegistration.cpp
-  IR/QuantOps.cpp
-  IR/QuantTypes.cpp
-  IR/TypeDetail.h
-  IR/TypeParser.cpp
-  Transforms/ConvertConst.cpp
-  Transforms/ConvertSimQuant.cpp
-  Utils/QuantizeUtils.cpp
-  Utils/UniformSupport.cpp
-  Utils/FakeQuantSupport.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/QuantOps
-  )
-add_dependencies(MLIRQuantOps
-                 MLIRIR
-                 MLIRPass
-                 MLIRQuantOpsIncGen
-                 MLIRSupport
-                 MLIRStandardOps)
diff --git a/third_party/mlir/lib/Dialect/QuantOps/IR/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/QuantOps/IR/DialectRegistration.cpp
deleted file mode 100644
index b071248f4bb..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/IR/DialectRegistration.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- DialectRegistration.cpp - Register Quantization dialect ------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-
-using namespace mlir;
-using namespace mlir::quant;
-
-// Static initialization for Quantization dialect registration.
-static mlir::DialectRegistration<QuantizationDialect> QuantizationOps;
diff --git a/third_party/mlir/lib/Dialect/QuantOps/IR/QuantOps.cpp b/third_party/mlir/lib/Dialect/QuantOps/IR/QuantOps.cpp
deleted file mode 100644
index 51f19940dcb..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/IR/QuantOps.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- QuantOps.cpp - Quantization Type and Ops Implementation --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "TypeDetail.h"
-
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/MathExtras.h"
-#include <numeric>
-
-using namespace mlir;
-using namespace mlir::quant;
-using namespace mlir::quant::detail;
-
-QuantizationDialect::QuantizationDialect(MLIRContext *context)
-    : Dialect(/*name=*/"quant", context) {
-  addTypes<AnyQuantizedType, UniformQuantizedType,
-           UniformQuantizedPerAxisType>();
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/QuantOps/QuantOps.cpp.inc"
-      >();
-}
-
-OpFoldResult StorageCastOp::fold(ArrayRef<Attribute> operands) {
-  /// Matches x -> [scast -> scast] -> y, replacing the second scast with the
-  /// value of x if the casts invert each other.
-  auto srcScastOp = dyn_cast_or_null<StorageCastOp>(arg()->getDefiningOp());
-  if (!srcScastOp || srcScastOp.arg()->getType() != getType())
-    return OpFoldResult();
-  return srcScastOp.arg();
-}
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/QuantOps/QuantOps.cpp.inc"
diff --git a/third_party/mlir/lib/Dialect/QuantOps/IR/QuantTypes.cpp b/third_party/mlir/lib/Dialect/QuantOps/IR/QuantTypes.cpp
deleted file mode 100644
index bc8290cda16..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/IR/QuantTypes.cpp
+++ /dev/null
@@ -1,371 +0,0 @@
-//===- QuantOps.cpp - Quantization Type and Ops Implementation --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "TypeDetail.h"
-
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/MathExtras.h"
-
-using namespace mlir;
-using namespace mlir::quant;
-using namespace mlir::quant::detail;
-
-unsigned QuantizedType::getFlags() const {
-  return static_cast<ImplType *>(impl)->flags;
-}
-
-LogicalResult QuantizedType::verifyConstructionInvariants(
-    Optional<Location> loc, MLIRContext *context, unsigned flags,
-    Type storageType, Type expressedType, int64_t storageTypeMin,
-    int64_t storageTypeMax) {
-  // Verify that the storage type is integral.
-  // This restriction may be lifted at some point in favor of using bf16
-  // or f16 as exact representations on hardware where that is advantageous.
-  auto intStorageType = storageType.dyn_cast<IntegerType>();
-  if (!intStorageType)
-    return emitOptionalError(loc, "storage type must be integral");
-  unsigned integralWidth = intStorageType.getWidth();
-
-  // Verify storage width.
-  if (integralWidth == 0 || integralWidth > MaxStorageBits)
-    return emitOptionalError(loc, "illegal storage type size: ", integralWidth);
-
-  // Verify storageTypeMin and storageTypeMax.
-  bool isSigned =
-      (flags & QuantizationFlags::Signed) == QuantizationFlags::Signed;
-  int64_t defaultIntegerMin =
-      getDefaultMinimumForInteger(isSigned, integralWidth);
-  int64_t defaultIntegerMax =
-      getDefaultMaximumForInteger(isSigned, integralWidth);
-  if (storageTypeMax - storageTypeMin <= 0 ||
-      storageTypeMin < defaultIntegerMin ||
-      storageTypeMax > defaultIntegerMax) {
-    return emitOptionalError(loc, "illegal storage min and storage max: (",
-                             storageTypeMin, ":", storageTypeMax, ")");
-  }
-  return success();
-}
-
-Type QuantizedType::getStorageType() const {
-  return static_cast<ImplType *>(impl)->storageType;
-}
-
-int64_t QuantizedType::getStorageTypeMin() const {
-  return static_cast<ImplType *>(impl)->storageTypeMin;
-}
-
-int64_t QuantizedType::getStorageTypeMax() const {
-  return static_cast<ImplType *>(impl)->storageTypeMax;
-}
-
-unsigned QuantizedType::getStorageTypeIntegralWidth() const {
-  // NOTE: If ever supporting non-integral storage types, some other scheme
-  // for determining the width will be needed.
-  return static_cast<ImplType *>(impl)->storageType.getIntOrFloatBitWidth();
-}
-
-Type QuantizedType::getExpressedType() const {
-  return static_cast<ImplType *>(impl)->expressedType;
-}
-
-bool QuantizedType::isCompatibleExpressedType(Type candidateExpressedType) {
-  if (candidateExpressedType.isa<ShapedType>()) {
-    return candidateExpressedType.cast<ShapedType>().getElementType() ==
-           getExpressedType();
-  }
-  return candidateExpressedType == getExpressedType();
-}
-
-QuantizedType
-QuantizedType::getQuantizedElementType(Type primitiveOrContainerType) {
-  if (primitiveOrContainerType.isa<ShapedType>()) {
-    Type elementType =
-        primitiveOrContainerType.cast<ShapedType>().getElementType();
-    return elementType.dyn_cast<QuantizedType>();
-  }
-  return primitiveOrContainerType.dyn_cast<QuantizedType>();
-}
-
-Type QuantizedType::castFromStorageType(Type candidateType) {
-  if (candidateType == getStorageType()) {
-    // i.e. i32 -> quant<"uniform[i8:f32]{1.0}">
-    return *this;
-  } else if (candidateType.isa<RankedTensorType>()) {
-    // i.e. tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
-    return RankedTensorType::get(
-        candidateType.cast<RankedTensorType>().getShape(), getStorageType());
-  } else if (candidateType.isa<UnrankedTensorType>()) {
-    // i.e. tensor<i8> -> tensor<!quant<"uniform[i8:f32]{1.0}">>
-    return UnrankedTensorType::get(getStorageType());
-  } else if (candidateType.isa<VectorType>()) {
-    // i.e. tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
-    return VectorType::get(candidateType.cast<VectorType>().getShape(),
-                           getStorageType());
-  }
-
-  return nullptr;
-}
-
-Type QuantizedType::castToStorageType(Type quantizedType) {
-  if (quantizedType.isa<QuantizedType>()) {
-    // i.e. quant<"uniform[i8:f32]{1.0}"> -> i8
-    return quantizedType.cast<QuantizedType>().getStorageType();
-  } else if (quantizedType.isa<ShapedType>()) {
-    // i.e. tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
-    ShapedType sType = quantizedType.cast<ShapedType>();
-    if (!sType.getElementType().isa<QuantizedType>()) {
-      return nullptr;
-    }
-    Type storageType =
-        sType.getElementType().cast<QuantizedType>().getStorageType();
-    if (quantizedType.isa<RankedTensorType>()) {
-      return RankedTensorType::get(sType.getShape(), storageType);
-    } else if (quantizedType.isa<UnrankedTensorType>()) {
-      return UnrankedTensorType::get(storageType);
-    } else if (quantizedType.isa<VectorType>()) {
-      return VectorType::get(sType.getShape(), storageType);
-    }
-  }
-
-  return nullptr;
-}
-
-Type QuantizedType::castFromExpressedType(Type candidateType) {
-  if (candidateType == getExpressedType()) {
-    // i.e. f32 -> quant<"uniform[i8:f32]{1.0}">
-    return *this;
-  } else if (candidateType.isa<ShapedType>()) {
-    ShapedType candidateShapedType = candidateType.cast<ShapedType>();
-    if (candidateShapedType.getElementType() != getExpressedType()) {
-      return nullptr;
-    }
-
-    if (candidateType.isa<RankedTensorType>()) {
-      // i.e. tensor<4xf32> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
-      return RankedTensorType::get(candidateShapedType.getShape(), *this);
-    } else if (candidateType.isa<UnrankedTensorType>()) {
-      // i.e. tensor<xf32> -> tensor<x!quant<"uniform[i8:f32]{1.0}">>
-      return UnrankedTensorType::get(*this);
-    } else if (candidateType.isa<VectorType>()) {
-      // i.e. tensor<4xf32> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
-      return VectorType::get(candidateShapedType.getShape(), *this);
-    }
-  }
-
-  return nullptr;
-}
-
-Type QuantizedType::castToExpressedType(Type quantizedType) {
-  if (quantizedType.isa<QuantizedType>()) {
-    // i.e. quant<"uniform[i8:f32]{1.0}"> -> f32
-    return quantizedType.cast<QuantizedType>().getExpressedType();
-  } else if (quantizedType.isa<ShapedType>()) {
-    // i.e. tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
-    ShapedType sType = quantizedType.cast<ShapedType>();
-    if (!sType.getElementType().isa<QuantizedType>()) {
-      return nullptr;
-    }
-    Type expressedType =
-        sType.getElementType().cast<QuantizedType>().getExpressedType();
-    if (quantizedType.isa<RankedTensorType>()) {
-      return RankedTensorType::get(sType.getShape(), expressedType);
-    } else if (quantizedType.isa<UnrankedTensorType>()) {
-      return UnrankedTensorType::get(expressedType);
-    } else if (quantizedType.isa<VectorType>()) {
-      return VectorType::get(sType.getShape(), expressedType);
-    }
-  }
-
-  return nullptr;
-}
-
-Type QuantizedType::castExpressedToStorageType(Type candidateType) {
-  Type expressedQuantizedType = castFromExpressedType(candidateType);
-  if (!expressedQuantizedType) {
-    return nullptr;
-  }
-  return QuantizedType::castToStorageType(expressedQuantizedType);
-}
-
-AnyQuantizedType AnyQuantizedType::get(unsigned flags, Type storageType,
-                                       Type expressedType,
-                                       int64_t storageTypeMin,
-                                       int64_t storageTypeMax) {
-  return Base::get(storageType.getContext(), QuantizationTypes::Any, flags,
-                   storageType, expressedType, storageTypeMin, storageTypeMax);
-}
-
-AnyQuantizedType AnyQuantizedType::getChecked(unsigned flags, Type storageType,
-                                              Type expressedType,
-                                              int64_t storageTypeMin,
-                                              int64_t storageTypeMax,
-                                              Location location) {
-  return Base::getChecked(location, storageType.getContext(),
-                          QuantizationTypes::Any, flags, storageType,
-                          expressedType, storageTypeMin, storageTypeMax);
-}
-
-LogicalResult AnyQuantizedType::verifyConstructionInvariants(
-    Optional<Location> loc, MLIRContext *context, unsigned flags,
-    Type storageType, Type expressedType, int64_t storageTypeMin,
-    int64_t storageTypeMax) {
-  if (failed(QuantizedType::verifyConstructionInvariants(
-          loc, context, flags, storageType, expressedType, storageTypeMin,
-          storageTypeMax))) {
-    return failure();
-  }
-
-  // Verify that the expressed type is floating point.
-  // If this restriction is ever eliminated, the parser/printer must be
-  // extended.
-  if (expressedType && !expressedType.isa<FloatType>())
-    return emitOptionalError(loc, "expressed type must be floating point");
-
-  return success();
-}
-
-UniformQuantizedType UniformQuantizedType::get(unsigned flags, Type storageType,
-                                               Type expressedType, double scale,
-                                               int64_t zeroPoint,
-                                               int64_t storageTypeMin,
-                                               int64_t storageTypeMax) {
-  return Base::get(storageType.getContext(),
-                   QuantizationTypes::UniformQuantized, flags, storageType,
-                   expressedType, scale, zeroPoint, storageTypeMin,
-                   storageTypeMax);
-}
-
-UniformQuantizedType
-UniformQuantizedType::getChecked(unsigned flags, Type storageType,
-                                 Type expressedType, double scale,
-                                 int64_t zeroPoint, int64_t storageTypeMin,
-                                 int64_t storageTypeMax, Location location) {
-  return Base::getChecked(location, storageType.getContext(),
-                          QuantizationTypes::UniformQuantized, flags,
-                          storageType, expressedType, scale, zeroPoint,
-                          storageTypeMin, storageTypeMax);
-}
-
-LogicalResult UniformQuantizedType::verifyConstructionInvariants(
-    Optional<Location> loc, MLIRContext *context, unsigned flags,
-    Type storageType, Type expressedType, double scale, int64_t zeroPoint,
-    int64_t storageTypeMin, int64_t storageTypeMax) {
-  if (failed(QuantizedType::verifyConstructionInvariants(
-          loc, context, flags, storageType, expressedType, storageTypeMin,
-          storageTypeMax))) {
-    return failure();
-  }
-
-  // Uniform quantization requires fully expressed parameters, including
-  // expressed type.
-  if (!expressedType)
-    return emitOptionalError(loc,
-                             "uniform quantization requires expressed type");
-
-  // Verify that the expressed type is floating point.
-  // If this restriction is ever eliminated, the parser/printer must be
-  // extended.
-  if (!expressedType.isa<FloatType>())
-    return emitOptionalError(loc, "expressed type must be floating point");
-
-  // Verify scale.
-  if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale))
-    return emitOptionalError(loc, "illegal scale: ", scale);
-
-  return success();
-}
-
-double UniformQuantizedType::getScale() const { return getImpl()->scale; }
-
-int64_t UniformQuantizedType::getZeroPoint() const {
-  return getImpl()->zeroPoint;
-}
-
-UniformQuantizedPerAxisType UniformQuantizedPerAxisType::get(
-    unsigned flags, Type storageType, Type expressedType,
-    ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
-    int32_t quantizedDimension, int64_t storageTypeMin,
-    int64_t storageTypeMax) {
-  return Base::get(storageType.getContext(),
-                   QuantizationTypes::UniformQuantizedPerAxis, flags,
-                   storageType, expressedType, scales, zeroPoints,
-                   quantizedDimension, storageTypeMin, storageTypeMax);
-}
-
-UniformQuantizedPerAxisType UniformQuantizedPerAxisType::getChecked(
-    unsigned flags, Type storageType, Type expressedType,
-    ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
-    int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax,
-    Location location) {
-  return Base::getChecked(location, storageType.getContext(),
-                          QuantizationTypes::UniformQuantizedPerAxis, flags,
-                          storageType, expressedType, scales, zeroPoints,
-                          quantizedDimension, storageTypeMin, storageTypeMax);
-}
-
-LogicalResult UniformQuantizedPerAxisType::verifyConstructionInvariants(
-    Optional<Location> loc, MLIRContext *context, unsigned flags,
-    Type storageType, Type expressedType, ArrayRef<double> scales,
-    ArrayRef<int64_t> zeroPoints, int32_t quantizedDimension,
-    int64_t storageTypeMin, int64_t storageTypeMax) {
-  if (failed(QuantizedType::verifyConstructionInvariants(
-          loc, context, flags, storageType, expressedType, storageTypeMin,
-          storageTypeMax))) {
-    return failure();
-  }
-
-  // Uniform quantization requires fully expressed parameters, including
-  // expressed type.
-  if (!expressedType)
-    return emitOptionalError(loc,
-                             "uniform quantization requires expressed type");
-
-  // Verify that the expressed type is floating point.
-  // If this restriction is ever eliminated, the parser/printer must be
-  // extended.
-  if (!expressedType.isa<FloatType>())
-    return emitOptionalError(loc, "expressed type must be floating point");
-
-  // Ensure that the number of scales and zeroPoints match.
-  if (scales.size() != zeroPoints.size())
-    return emitOptionalError(loc, "illegal number of scales and zeroPoints: ",
-                             scales.size(), ", ", zeroPoints.size());
-
-  // Verify scale.
-  for (double scale : scales) {
-    if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale))
-      return emitOptionalError(loc, "illegal scale: ", scale);
-  }
-
-  return success();
-}
-
-ArrayRef<double> UniformQuantizedPerAxisType::getScales() const {
-  return getImpl()->getScales();
-}
-
-ArrayRef<int64_t> UniformQuantizedPerAxisType::getZeroPoints() const {
-  return getImpl()->getZeroPoints();
-}
-
-int32_t UniformQuantizedPerAxisType::getQuantizedDimension() const {
-  return getImpl()->quantizedDimension;
-}
diff --git a/third_party/mlir/lib/Dialect/QuantOps/IR/TypeDetail.h b/third_party/mlir/lib/Dialect/QuantOps/IR/TypeDetail.h
deleted file mode 100644
index 13a88da3043..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/IR/TypeDetail.h
+++ /dev/null
@@ -1,269 +0,0 @@
-//===- TypeDetail.h - QuantOps Type detail ----------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef TYPE_DETAIL_H_
-#define TYPE_DETAIL_H_
-
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeSupport.h"
-#include "mlir/IR/Types.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/bit.h"
-
-namespace mlir {
-namespace quant {
-namespace detail {
-
-struct QuantizedTypeStorage : public mlir::TypeStorage {
-  QuantizedTypeStorage(unsigned flags, Type storageType, Type expressedType,
-                       int64_t storageTypeMin, int64_t storageTypeMax)
-      : flags(flags), storageType(storageType), expressedType(expressedType),
-        storageTypeMin(storageTypeMin), storageTypeMax(storageTypeMax) {}
-
-  /// Flags corresponding to the bitmapped enum QuantizationFlags::FlagValue.
-  unsigned flags;
-
-  // Integral type for the storage point representation.
-  Type storageType;
-
-  // Floating point type that the quantized type approximates.
-  Type expressedType;
-
-  // The minimum value storageType can take.
-  int64_t storageTypeMin;
-
-  // The maximum value storageType can take.
-  int64_t storageTypeMax;
-};
-
-struct AnyQuantizedTypeStorage : public QuantizedTypeStorage {
-  struct KeyTy {
-    KeyTy(unsigned flags, Type storageType, Type expressedType,
-          int64_t storageTypeMin, int64_t storageTypeMax)
-        : flags(flags), storageType(storageType), expressedType(expressedType),
-          storageTypeMin(storageTypeMin), storageTypeMax(storageTypeMax) {}
-    unsigned flags;
-    Type storageType;
-    Type expressedType;
-    int64_t storageTypeMin;
-    int64_t storageTypeMax;
-
-    // Check for equality of two structures that share KeyTy data members
-    // (by name).
-    template <typename T, typename U>
-    static bool genericIsEqual(const T &lhs, const U &rhs) {
-      return lhs.flags == rhs.flags && lhs.storageType == rhs.storageType &&
-             lhs.expressedType == rhs.expressedType &&
-             lhs.storageTypeMin == rhs.storageTypeMin &&
-             lhs.storageTypeMax == rhs.storageTypeMax;
-    }
-
-    bool operator==(const KeyTy &other) const {
-      return genericIsEqual(*this, other);
-    }
-
-    unsigned getHashValue() const {
-      return llvm::hash_combine(flags, storageType, expressedType,
-                                storageTypeMin, storageTypeMax);
-    }
-  };
-
-  AnyQuantizedTypeStorage(const KeyTy &key)
-      : QuantizedTypeStorage(key.flags, key.storageType, key.expressedType,
-                             key.storageTypeMin, key.storageTypeMax) {}
-
-  bool operator==(const KeyTy &key) const {
-    return KeyTy::genericIsEqual(*this, key);
-  }
-
-  /// Construction.
-  static AnyQuantizedTypeStorage *construct(TypeStorageAllocator &allocator,
-                                            const KeyTy &key) {
-    return new (allocator.allocate<AnyQuantizedTypeStorage>())
-        AnyQuantizedTypeStorage(key);
-  }
-
-  static unsigned hashKey(const KeyTy &key) { return key.getHashValue(); }
-};
-
-struct UniformQuantizedTypeStorage : public QuantizedTypeStorage {
-  struct KeyTy {
-    KeyTy(unsigned flags, Type storageType, Type expressedType, double scale,
-          int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
-        : flags(flags), storageType(storageType), expressedType(expressedType),
-          scale(scale), zeroPoint(zeroPoint), storageTypeMin(storageTypeMin),
-          storageTypeMax(storageTypeMax) {}
-    /// Flags corresponding to the bitmapped enum QuantizationFlags::FlagValue.
-    unsigned flags;
-
-    // Integral type for the storage point representation.
-    Type storageType;
-
-    // Floating point type that the quantized type approximates.
-    Type expressedType;
-
-    double scale;
-    int64_t zeroPoint;
-    int64_t storageTypeMin;
-    int64_t storageTypeMax;
-
-    // Check for equality of two structures that share KeyTy data members
-    // (by name).
-    template <typename T, typename U>
-    static bool genericIsEqual(const T &lhs, const U &rhs) {
-      return lhs.flags == rhs.flags && lhs.storageType == rhs.storageType &&
-             lhs.expressedType == rhs.expressedType && lhs.scale == rhs.scale &&
-             lhs.zeroPoint == rhs.zeroPoint &&
-             lhs.storageTypeMin == rhs.storageTypeMin &&
-             lhs.storageTypeMax == rhs.storageTypeMax;
-    }
-
-    bool operator==(const KeyTy &other) const {
-      return genericIsEqual(*this, other);
-    }
-
-    unsigned getHashValue() const {
-      int64_t scaleBits = llvm::bit_cast<int64_t>(scale);
-      return llvm::hash_combine(flags, storageType, expressedType, scaleBits,
-                                zeroPoint, storageTypeMin, storageTypeMax);
-    }
-  };
-
-  UniformQuantizedTypeStorage(const KeyTy &key)
-      : QuantizedTypeStorage(key.flags, key.storageType, key.expressedType,
-                             key.storageTypeMin, key.storageTypeMax),
-        scale(key.scale), zeroPoint(key.zeroPoint) {}
-
-  bool operator==(const KeyTy &key) const {
-    return KeyTy::genericIsEqual(*this, key);
-  }
-
-  /// Construction.
-  static UniformQuantizedTypeStorage *construct(TypeStorageAllocator &allocator,
-                                                const KeyTy &key) {
-    return new (allocator.allocate<UniformQuantizedTypeStorage>())
-        UniformQuantizedTypeStorage(key);
-  }
-
-  static unsigned hashKey(const KeyTy &key) { return key.getHashValue(); }
-
-  double scale;
-  int64_t zeroPoint;
-};
-
-struct UniformQuantizedPerAxisTypeStorage : public QuantizedTypeStorage {
-  struct KeyTy {
-    KeyTy(unsigned flags, Type storageType, Type expressedType,
-          ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
-          int32_t quantizedDimension, int64_t storageTypeMin,
-          int64_t storageTypeMax)
-        : flags(flags), storageType(storageType), expressedType(expressedType),
-          scales(scales), zeroPoints(zeroPoints),
-          quantizedDimension(quantizedDimension),
-          storageTypeMin(storageTypeMin), storageTypeMax(storageTypeMax) {}
-    /// Flags corresponding to the bitmapped enum QuantizationFlags::FlagValue.
-    unsigned flags;
-
-    // Integral type for the storage point representation.
-    Type storageType;
-
-    // Floating point type that the quantized type approximates.
-    Type expressedType;
-
-    ArrayRef<double> scales;
-    ArrayRef<int64_t> zeroPoints;
-    int32_t quantizedDimension;
-    int64_t storageTypeMin;
-    int64_t storageTypeMax;
-
-    ArrayRef<double> getScales() const { return scales; }
-
-    ArrayRef<int64_t> getZeroPoints() const { return zeroPoints; }
-
-    // Check for equality of two structures that share KeyTy data members
-    // (by name).
-    template <typename T, typename U>
-    static bool genericIsEqual(const T &lhs, const U &rhs) {
-      return lhs.flags == rhs.flags && lhs.storageType == rhs.storageType &&
-             lhs.expressedType == rhs.expressedType &&
-             lhs.getScales() == rhs.getScales() &&
-             lhs.getZeroPoints() == rhs.getZeroPoints() &&
-             lhs.quantizedDimension == rhs.quantizedDimension &&
-             lhs.storageTypeMin == rhs.storageTypeMin &&
-             lhs.storageTypeMax == rhs.storageTypeMax;
-    }
-
-    bool operator==(const KeyTy &other) const {
-      return genericIsEqual(*this, other);
-    }
-
-    unsigned getHashValue() const {
-      int64_t *scalesCast = llvm::bit_cast<int64_t *>(scales.data());
-      ArrayRef<int64_t> scalesBits(scalesCast, scales.size());
-      return llvm::hash_combine(
-          flags, storageType, expressedType,
-          llvm::hash_combine_range(scalesBits.begin(), scalesBits.end()),
-          llvm::hash_combine_range(zeroPoints.begin(), zeroPoints.end()),
-          storageTypeMin, storageTypeMax);
-    }
-  };
-
-  // We pass scales and zeroPoints in directly rather than relying on KeyTy
-  // because we have to create new reallocated versions in `construct` below.
-  UniformQuantizedPerAxisTypeStorage(const KeyTy &key, ArrayRef<double> scales,
-                                     ArrayRef<int64_t> zeroPoints)
-      : QuantizedTypeStorage(key.flags, key.storageType, key.expressedType,
-                             key.storageTypeMin, key.storageTypeMax),
-        scaleElements(scales.data()), zeroPointElements(zeroPoints.data()),
-        quantParamsSize(scales.size()),
-        quantizedDimension(key.quantizedDimension) {}
-
-  bool operator==(const KeyTy &key) const {
-    return KeyTy::genericIsEqual(*this, key);
-  }
-
-  /// Construction.
-  static UniformQuantizedPerAxisTypeStorage *
-  construct(TypeStorageAllocator &allocator, const KeyTy &key) {
-    ArrayRef<double> scales = allocator.copyInto(key.scales);
-    ArrayRef<int64_t> zeroPoints = allocator.copyInto(key.zeroPoints);
-    return new (allocator.allocate<UniformQuantizedPerAxisTypeStorage>())
-        UniformQuantizedPerAxisTypeStorage(key, scales, zeroPoints);
-  }
-
-  static unsigned hashKey(const KeyTy &key) { return key.getHashValue(); }
-
-  ArrayRef<double> getScales() const {
-    return ArrayRef<double>(scaleElements, quantParamsSize);
-  }
-
-  ArrayRef<int64_t> getZeroPoints() const {
-    return ArrayRef<int64_t>(zeroPointElements, quantParamsSize);
-  }
-
-  const double *scaleElements;
-  const int64_t *zeroPointElements;
-  unsigned quantParamsSize;
-  int32_t quantizedDimension;
-};
-
-} // namespace detail
-} // namespace quant
-} // namespace mlir
-
-#endif // TYPE_DETAIL_H_
diff --git a/third_party/mlir/lib/Dialect/QuantOps/IR/TypeParser.cpp b/third_party/mlir/lib/Dialect/QuantOps/IR/TypeParser.cpp
deleted file mode 100644
index 2bdde1f94f8..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/IR/TypeParser.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-//===- TypeParser.h - Quantization Type Parser ------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace quant;
-
-static IntegerType parseStorageType(DialectAsmParser &parser, bool &isSigned) {
-  auto typeLoc = parser.getCurrentLocation();
-  IntegerType type;
-
-  // Parse storage type (alpha_ident, integer_literal).
-  StringRef identifier;
-  unsigned storageTypeWidth = 0;
-  if (failed(parser.parseOptionalKeyword(&identifier))) {
-    // If we didn't parse a keyword, this must be a signed type.
-    if (parser.parseType(type))
-      return nullptr;
-    isSigned = true;
-    storageTypeWidth = type.getWidth();
-
-    // Otherwise, this must be an unsigned integer (`u` integer-literal).
-  } else {
-    if (!identifier.consume_front("u")) {
-      parser.emitError(typeLoc, "illegal storage type prefix");
-      return nullptr;
-    }
-    if (identifier.getAsInteger(10, storageTypeWidth)) {
-      parser.emitError(typeLoc, "expected storage type width");
-      return nullptr;
-    }
-    isSigned = false;
-    type = parser.getBuilder().getIntegerType(storageTypeWidth);
-  }
-
-  if (storageTypeWidth == 0 ||
-      storageTypeWidth > QuantizedType::MaxStorageBits) {
-    parser.emitError(typeLoc, "illegal storage type size: ")
-        << storageTypeWidth;
-    return nullptr;
-  }
-
-  return type;
-}
-
-static ParseResult parseStorageRange(DialectAsmParser &parser,
-                                     IntegerType storageType, bool isSigned,
-                                     int64_t &storageTypeMin,
-                                     int64_t &storageTypeMax) {
-  int64_t defaultIntegerMin = QuantizedType::getDefaultMinimumForInteger(
-      isSigned, storageType.getWidth());
-  int64_t defaultIntegerMax = QuantizedType::getDefaultMaximumForInteger(
-      isSigned, storageType.getWidth());
-  if (failed(parser.parseOptionalLess())) {
-    storageTypeMin = defaultIntegerMin;
-    storageTypeMax = defaultIntegerMax;
-    return success();
-  }
-
-  // Explicit storage min and storage max.
-  llvm::SMLoc minLoc = parser.getCurrentLocation(), maxLoc;
-  if (parser.parseInteger(storageTypeMin) || parser.parseColon() ||
-      parser.getCurrentLocation(&maxLoc) ||
-      parser.parseInteger(storageTypeMax) || parser.parseGreater())
-    return failure();
-  if (storageTypeMin < defaultIntegerMin) {
-    return parser.emitError(minLoc, "illegal storage type minimum: ")
-           << storageTypeMin;
-  }
-  if (storageTypeMax > defaultIntegerMax) {
-    return parser.emitError(maxLoc, "illegal storage type maximum: ")
-           << storageTypeMax;
-  }
-  return success();
-}
-
-/// Parses a UniformQuantizedType.
-///
-///   uniform_per_layer ::= `any<` storage-spec (expressed-type-spec)?`>`
-///   storage-spec ::= storage-type (`<` storage-range `>`)?
-///   storage-range ::= integer-literal `:` integer-literal
-///   storage-type ::= (`i` | `u`) integer-literal
-///   expressed-type-spec ::= `:` `f` integer-literal
-static Type parseAnyType(DialectAsmParser &parser, Location loc) {
-  IntegerType storageType;
-  FloatType expressedType;
-  unsigned typeFlags = 0;
-  int64_t storageTypeMin;
-  int64_t storageTypeMax;
-
-  // Type specification.
-  if (parser.parseLess())
-    return nullptr;
-
-  // Storage type.
-  bool isSigned = false;
-  storageType = parseStorageType(parser, isSigned);
-  if (!storageType) {
-    return nullptr;
-  }
-  if (isSigned) {
-    typeFlags |= QuantizationFlags::Signed;
-  }
-
-  // Storage type range.
-  if (parseStorageRange(parser, storageType, isSigned, storageTypeMin,
-                        storageTypeMax)) {
-    return nullptr;
-  }
-
-  // Optional expressed type.
-  if (succeeded(parser.parseOptionalColon())) {
-    if (parser.parseType(expressedType)) {
-      return nullptr;
-    }
-  }
-
-  if (parser.parseGreater()) {
-    return nullptr;
-  }
-
-  return AnyQuantizedType::getChecked(typeFlags, storageType, expressedType,
-                                      storageTypeMin, storageTypeMax, loc);
-}
-
-static ParseResult parseQuantParams(DialectAsmParser &parser, double &scale,
-                                    int64_t &zeroPoint) {
-  // scale[:zeroPoint]?
-  // scale.
-  if (parser.parseFloat(scale))
-    return failure();
-
-  // zero point.
-  zeroPoint = 0;
-  if (failed(parser.parseOptionalColon())) {
-    // Default zero point.
-    return success();
-  }
-
-  return parser.parseInteger(zeroPoint);
-}
-
-/// Parses a UniformQuantizedType.
-///
-///   uniform_type ::= uniform_per_layer
-///                  | uniform_per_axis
-///   uniform_per_layer ::= `uniform<` storage-spec expressed-type-spec
-///                          `,` scale-zero `>`
-///   uniform_per_axis ::= `uniform<` storage-spec expressed-type-spec
-///                        axis-spec `,` scale-zero-list `>`
-///   storage-spec ::= storage-type (`<` storage-range `>`)?
-///   storage-range ::= integer-literal `:` integer-literal
-///   storage-type ::= (`i` | `u`) integer-literal
-///   expressed-type-spec ::= `:` `f` integer-literal
-///   axis-spec ::= `:` integer-literal
-///   scale-zero ::= float-literal `:` integer-literal
-///   scale-zero-list ::= `{` scale-zero (`,` scale-zero)* `}`
-static Type parseUniformType(DialectAsmParser &parser, Location loc) {
-  IntegerType storageType;
-  FloatType expressedType;
-  unsigned typeFlags = 0;
-  int64_t storageTypeMin;
-  int64_t storageTypeMax;
-  bool isPerAxis = false;
-  int32_t quantizedDimension;
-  SmallVector<double, 1> scales;
-  SmallVector<int64_t, 1> zeroPoints;
-
-  // Type specification.
-  if (parser.parseLess()) {
-    return nullptr;
-  }
-
-  // Storage type.
-  bool isSigned = false;
-  storageType = parseStorageType(parser, isSigned);
-  if (!storageType) {
-    return nullptr;
-  }
-  if (isSigned) {
-    typeFlags |= QuantizationFlags::Signed;
-  }
-
-  // Storage type range.
-  if (parseStorageRange(parser, storageType, isSigned, storageTypeMin,
-                        storageTypeMax)) {
-    return nullptr;
-  }
-
-  // Expressed type.
-  if (parser.parseColon() || parser.parseType(expressedType)) {
-    return nullptr;
-  }
-
-  // Optionally parse quantized dimension for per-axis quantization.
-  if (succeeded(parser.parseOptionalColon())) {
-    if (parser.parseInteger(quantizedDimension))
-      return nullptr;
-    isPerAxis = true;
-  }
-
-  // Comma leading into range_spec.
-  if (parser.parseComma()) {
-    return nullptr;
-  }
-
-  // Parameter specification.
-  // For per-axis, ranges are in a {} delimitted list.
-  if (isPerAxis) {
-    if (parser.parseLBrace()) {
-      return nullptr;
-    }
-  }
-
-  // Parse scales/zeroPoints.
-  llvm::SMLoc scaleZPLoc = parser.getCurrentLocation();
-  do {
-    scales.resize(scales.size() + 1);
-    zeroPoints.resize(zeroPoints.size() + 1);
-    if (parseQuantParams(parser, scales.back(), zeroPoints.back())) {
-      return nullptr;
-    }
-  } while (isPerAxis && succeeded(parser.parseOptionalComma()));
-
-  if (isPerAxis) {
-    if (parser.parseRBrace()) {
-      return nullptr;
-    }
-  }
-
-  if (parser.parseGreater()) {
-    return nullptr;
-  }
-
-  if (!isPerAxis && scales.size() > 1) {
-    return (parser.emitError(scaleZPLoc,
-                             "multiple scales/zeroPoints provided, but "
-                             "quantizedDimension wasn't specified"),
-            nullptr);
-  }
-
-  if (isPerAxis) {
-    ArrayRef<double> scalesRef(scales.begin(), scales.end());
-    ArrayRef<int64_t> zeroPointsRef(zeroPoints.begin(), zeroPoints.end());
-    return UniformQuantizedPerAxisType::getChecked(
-        typeFlags, storageType, expressedType, scalesRef, zeroPointsRef,
-        quantizedDimension, storageTypeMin, storageTypeMax, loc);
-  }
-
-  return UniformQuantizedType::getChecked(typeFlags, storageType, expressedType,
-                                          scales.front(), zeroPoints.front(),
-                                          storageTypeMin, storageTypeMax, loc);
-}
-
-/// Parse a type registered to this dialect.
-Type QuantizationDialect::parseType(DialectAsmParser &parser) const {
-  Location loc = parser.getEncodedSourceLoc(parser.getNameLoc());
-
-  // All types start with an identifier that we switch on.
-  StringRef typeNameSpelling;
-  if (failed(parser.parseKeyword(&typeNameSpelling)))
-    return nullptr;
-
-  if (typeNameSpelling == "uniform")
-    return parseUniformType(parser, loc);
-  if (typeNameSpelling == "any")
-    return parseAnyType(parser, loc);
-
-  parser.emitError(parser.getNameLoc(),
-                   "unknown quantized type " + typeNameSpelling);
-  return nullptr;
-}
-
-static void printStorageType(QuantizedType type, DialectAsmPrinter &out) {
-  // storage type
-  unsigned storageWidth = type.getStorageTypeIntegralWidth();
-  bool isSigned = type.isSigned();
-  if (isSigned) {
-    out << "i" << storageWidth;
-  } else {
-    out << "u" << storageWidth;
-  }
-
-  // storageTypeMin and storageTypeMax if not default.
-  int64_t defaultIntegerMin =
-      QuantizedType::getDefaultMinimumForInteger(isSigned, storageWidth);
-  int64_t defaultIntegerMax =
-      QuantizedType::getDefaultMaximumForInteger(isSigned, storageWidth);
-  if (defaultIntegerMin != type.getStorageTypeMin() ||
-      defaultIntegerMax != type.getStorageTypeMax()) {
-    out << "<" << type.getStorageTypeMin() << ":" << type.getStorageTypeMax()
-        << ">";
-  }
-}
-
-static void printQuantParams(double scale, int64_t zeroPoint,
-                             DialectAsmPrinter &out) {
-  out << scale;
-  if (zeroPoint != 0) {
-    out << ":" << zeroPoint;
-  }
-}
-
-/// Helper that prints a UniformQuantizedType.
-static void printAnyQuantizedType(AnyQuantizedType type,
-                                  DialectAsmPrinter &out) {
-  out << "any<";
-  printStorageType(type, out);
-  if (Type expressedType = type.getExpressedType()) {
-    out << ":" << expressedType;
-  }
-  out << ">";
-}
-
-/// Helper that prints a UniformQuantizedType.
-static void printUniformQuantizedType(UniformQuantizedType type,
-                                      DialectAsmPrinter &out) {
-  out << "uniform<";
-  printStorageType(type, out);
-  out << ":" << type.getExpressedType() << ", ";
-
-  // scheme specific parameters
-  printQuantParams(type.getScale(), type.getZeroPoint(), out);
-  out << ">";
-}
-
-/// Helper that prints a UniformQuantizedPerAxisType.
-static void printUniformQuantizedPerAxisType(UniformQuantizedPerAxisType type,
-                                             DialectAsmPrinter &out) {
-  out << "uniform<";
-  printStorageType(type, out);
-  out << ":" << type.getExpressedType() << ":";
-  out << type.getQuantizedDimension();
-  out << ", ";
-
-  // scheme specific parameters
-  ArrayRef<double> scales = type.getScales();
-  ArrayRef<int64_t> zeroPoints = type.getZeroPoints();
-  out << "{";
-  interleave(
-      llvm::seq<size_t>(0, scales.size()), out,
-      [&](size_t index) {
-        printQuantParams(scales[index], zeroPoints[index], out);
-      },
-      ",");
-  out << "}>";
-}
-
-/// Print a type registered to this dialect.
-void QuantizationDialect::printType(Type type, DialectAsmPrinter &os) const {
-  switch (type.getKind()) {
-  default:
-    llvm_unreachable("Unhandled quantized type");
-  case QuantizationTypes::Any:
-    printAnyQuantizedType(type.cast<AnyQuantizedType>(), os);
-    break;
-  case QuantizationTypes::UniformQuantized:
-    printUniformQuantizedType(type.cast<UniformQuantizedType>(), os);
-    break;
-  case QuantizationTypes::UniformQuantizedPerAxis:
-    printUniformQuantizedPerAxisType(type.cast<UniformQuantizedPerAxisType>(),
-                                     os);
-    break;
-  }
-}
diff --git a/third_party/mlir/lib/Dialect/QuantOps/Transforms/ConvertConst.cpp b/third_party/mlir/lib/Dialect/QuantOps/Transforms/ConvertConst.cpp
deleted file mode 100644
index 61636dcdd8b..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/Transforms/ConvertConst.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-//===- ConvertConst.cpp - Quantizes constant ops --------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/Passes.h"
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/Dialect/QuantOps/QuantizeUtils.h"
-#include "mlir/Dialect/QuantOps/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-using namespace mlir::quant;
-
-namespace {
-
-class ConvertConstPass : public FunctionPass<ConvertConstPass> {
-public:
-  void runOnFunction() override;
-};
-
-struct QuantizedConstRewrite : public OpRewritePattern<QuantizeCastOp> {
-  using OpRewritePattern<QuantizeCastOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(QuantizeCastOp qbarrier,
-                                     PatternRewriter &rewriter) const override;
-};
-
-} // end anonymous namespace
-
-/// Matches a [constant] -> [qbarrier] where the qbarrier results type is
-/// quantized and the operand type is quantizable.
-
-PatternMatchResult
-QuantizedConstRewrite::matchAndRewrite(QuantizeCastOp qbarrier,
-                                       PatternRewriter &rewriter) const {
-  Attribute value;
-
-  // Is the operand a constant?
-  if (!matchPattern(qbarrier.arg(), m_Constant(&value))) {
-    return matchFailure();
-  }
-
-  // Does the qbarrier convert to a quantized type. This will not be true
-  // if a quantized type has not yet been chosen or if the cast to an equivalent
-  // storage type is not supported.
-  Type qbarrierResultType = qbarrier.getResult()->getType();
-  QuantizedType quantizedElementType =
-      QuantizedType::getQuantizedElementType(qbarrierResultType);
-  if (!quantizedElementType) {
-    return matchFailure();
-  }
-  if (!QuantizedType::castToStorageType(qbarrierResultType)) {
-    return matchFailure();
-  }
-
-  // Is the operand type compatible with the expressed type of the quantized
-  // type? This will not be true if the qbarrier is superfluous (converts
-  // from and to a quantized type).
-  if (!quantizedElementType.isCompatibleExpressedType(
-          qbarrier.arg()->getType())) {
-    return matchFailure();
-  }
-
-  // Is the constant value a type expressed in a way that we support?
-  if (!value.isa<FloatAttr>() && !value.isa<DenseElementsAttr>() &&
-      !value.isa<SparseElementsAttr>()) {
-    return matchFailure();
-  }
-
-  Type newConstValueType;
-  auto newConstValue =
-      quantizeAttr(value, quantizedElementType, newConstValueType);
-  if (!newConstValue) {
-    return matchFailure();
-  }
-
-  // When creating the new const op, use a fused location that combines the
-  // original const and the qbarrier that led to the quantization.
-  auto fusedLoc = FusedLoc::get(
-      {qbarrier.arg()->getDefiningOp()->getLoc(), qbarrier.getLoc()},
-      rewriter.getContext());
-  auto newConstOp =
-      rewriter.create<ConstantOp>(fusedLoc, newConstValueType, newConstValue);
-  rewriter.replaceOpWithNewOp<StorageCastOp>({qbarrier.arg()}, qbarrier,
-                                             qbarrier.getType(), newConstOp);
-  return matchSuccess();
-}
-
-void ConvertConstPass::runOnFunction() {
-  OwningRewritePatternList patterns;
-  auto func = getFunction();
-  auto *context = &getContext();
-  patterns.insert<QuantizedConstRewrite>(context);
-  applyPatternsGreedily(func, patterns);
-}
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::quant::createConvertConstPass() {
-  return std::make_unique<ConvertConstPass>();
-}
-
-static PassRegistration<ConvertConstPass>
-    pass("quant-convert-const",
-         "Converts constants followed by qbarrier to actual quantized values");
diff --git a/third_party/mlir/lib/Dialect/QuantOps/Transforms/ConvertSimQuant.cpp b/third_party/mlir/lib/Dialect/QuantOps/Transforms/ConvertSimQuant.cpp
deleted file mode 100644
index 83fa9237dee..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/Transforms/ConvertSimQuant.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===- ConvertSimQuant.cpp - Converts simulated quant ops------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"
-#include "mlir/Dialect/QuantOps/Passes.h"
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/Dialect/QuantOps/UniformSupport.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-using namespace mlir::quant;
-
-namespace {
-
-class ConvertSimulatedQuantPass
-    : public FunctionPass<ConvertSimulatedQuantPass> {
-public:
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-/// Base class rewrites ConstFakeQuant into a qbarrier/dbarrier pair.
-template <typename ConcreteRewriteClass, typename FakeQuantOp>
-class FakeQuantRewrite : public OpRewritePattern<FakeQuantOp> {
-public:
-  using OpRewritePattern<FakeQuantOp>::OpRewritePattern;
-
-  FakeQuantRewrite(MLIRContext *ctx, bool *hadFailure)
-      : OpRewritePattern<FakeQuantOp>(ctx), hadFailure(hadFailure) {}
-
-  PatternMatchResult matchAndRewrite(FakeQuantOp op,
-                                     PatternRewriter &rewriter) const override {
-    // TODO: If this pattern comes up more frequently, consider adding core
-    // support for failable rewrites.
-    if (failableRewrite(op, rewriter)) {
-      *hadFailure = true;
-      return Pattern::matchFailure();
-    }
-
-    return Pattern::matchSuccess();
-  }
-
-private:
-  bool *hadFailure;
-
-  bool failableRewrite(FakeQuantOp op, PatternRewriter &rewriter) const {
-    auto converter = ExpressedToQuantizedConverter::forInputType(op.getType());
-    if (!converter) {
-      return (op.emitError("unsupported quantized type conversion"), true);
-    }
-
-    QuantizedType elementType =
-        static_cast<const ConcreteRewriteClass *>(this)
-            ->convertFakeQuantAttrsToType(op, converter.expressedType);
-
-    if (!elementType) {
-      // Note that the fakeQuantAttrsToType will have emitted the error.
-      return true;
-    }
-
-    Type quantizedType = converter.convert(elementType);
-    assert(quantizedType &&
-           "Converter accepted a type that it did not convert");
-
-    // TODO: Map to a qbarrier with an attribute like [Forced] to signal that
-    // this is a forced/hard-coded constraint.
-    auto qbarrier = rewriter.create<QuantizeCastOp>(op.getLoc(), quantizedType,
-                                                    op.inputs());
-    rewriter.replaceOpWithNewOp<DequantizeCastOp>(op, converter.inputType,
-                                                  qbarrier.getResult());
-
-    return false;
-  }
-};
-
-class ConstFakeQuantRewrite
-    : public FakeQuantRewrite<ConstFakeQuantRewrite, ConstFakeQuant> {
-public:
-  using BaseRewrite = FakeQuantRewrite<ConstFakeQuantRewrite, ConstFakeQuant>;
-
-  ConstFakeQuantRewrite(MLIRContext *ctx, bool *hadFailure)
-      : BaseRewrite(ctx, hadFailure) {}
-
-  QuantizedType convertFakeQuantAttrsToType(ConstFakeQuant fqOp,
-                                            Type expressedType) const {
-    return fakeQuantAttrsToType(
-        fqOp.getLoc(), fqOp.num_bits().getSExtValue(),
-        fqOp.min().convertToFloat(), fqOp.max().convertToFloat(),
-        fqOp.narrow_range(), expressedType, fqOp.is_signed());
-  }
-};
-
-class ConstFakeQuantPerAxisRewrite
-    : public FakeQuantRewrite<ConstFakeQuantPerAxisRewrite,
-                              ConstFakeQuantPerAxis> {
-public:
-  using BaseRewrite =
-      FakeQuantRewrite<ConstFakeQuantPerAxisRewrite, ConstFakeQuantPerAxis>;
-
-  ConstFakeQuantPerAxisRewrite(MLIRContext *ctx, bool *hadFailure)
-      : BaseRewrite(ctx, hadFailure) {}
-
-  QuantizedType convertFakeQuantAttrsToType(ConstFakeQuantPerAxis fqOp,
-                                            Type expressedType) const {
-    SmallVector<double, 4> min, max;
-    min.reserve(fqOp.min().size());
-    max.reserve(fqOp.max().size());
-    for (auto m : fqOp.min())
-      min.push_back(m.cast<FloatAttr>().getValueAsDouble());
-    for (auto m : fqOp.max())
-      max.push_back(m.cast<FloatAttr>().getValueAsDouble());
-
-    return fakeQuantAttrsToType(fqOp.getLoc(), fqOp.num_bits().getSExtValue(),
-                                fqOp.axis().getSExtValue(), min, max,
-                                fqOp.narrow_range(), expressedType,
-                                fqOp.is_signed());
-  }
-};
-
-void ConvertSimulatedQuantPass::runOnFunction() {
-  bool hadFailure = false;
-  OwningRewritePatternList patterns;
-  auto func = getFunction();
-  auto ctx = func.getContext();
-  patterns.insert<ConstFakeQuantRewrite, ConstFakeQuantPerAxisRewrite>(
-      ctx, &hadFailure);
-  applyPatternsGreedily(func, patterns);
-  if (hadFailure)
-    signalPassFailure();
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::quant::createConvertSimulatedQuantPass() {
-  return std::make_unique<ConvertSimulatedQuantPass>();
-}
-
-static PassRegistration<ConvertSimulatedQuantPass>
-    pass("quant-convert-simulated-quantization",
-         "Converts training-time simulated quantization ops to corresponding "
-         "quantize/dequantize casts.");
diff --git a/third_party/mlir/lib/Dialect/QuantOps/Utils/FakeQuantSupport.cpp b/third_party/mlir/lib/Dialect/QuantOps/Utils/FakeQuantSupport.cpp
deleted file mode 100644
index 10668f87ed4..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/Utils/FakeQuantSupport.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===- FakeQuantSupport.cpp - Support utilities for FakeQuant ops ---------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-
-namespace mlir {
-namespace quant {
-namespace {
-bool getDefaultStorageParams(unsigned numBits, bool narrowRange, bool isSigned,
-                             MLIRContext *ctx, Type &storageType, int64_t &qmin,
-                             int64_t &qmax) {
-  // Hard-coded type mapping from TFLite.
-  if (numBits <= 8) {
-    storageType = IntegerType::get(8, ctx);
-    if (isSigned) {
-      qmin = -128;
-      qmax = 127;
-    } else {
-      qmin = 0;
-      qmax = 255;
-    }
-  } else if (numBits <= 16) {
-    storageType = IntegerType::get(16, ctx);
-    if (isSigned) {
-      qmin = -32768;
-      qmax = 32767;
-    } else {
-      qmin = 0;
-      qmax = 65535;
-    }
-  } else {
-    return true;
-  }
-
-  // Handle narrowRange.
-  if (narrowRange) {
-    qmin += 1;
-  }
-  return false;
-}
-
-// This is a specific implementation of nudging:
-// If 0.0 < rmin < rmax or rmin < rmax < 0.0, the range will be shifted
-// to include 0.0, but the range width size (rmax-rmin) isn't changed. The zero
-// point is derived from the shifted range, and the scale isn't changed. As
-// a consequence some values, which are supposed in the original [rmin, rmax]
-// range will be outside the shifted range and be clamped during quantization.
-// TODO(fengliuai): we should nudge the scale as well, but that requires the
-// fake quant op used in the training to use the nudged scale as well.
-void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin,
-                                double rmax, double &scale,
-                                int64_t &nudgedZeroPoint) {
-  // Determine the scale.
-  const double qminDouble = qmin;
-  const double qmaxDouble = qmax;
-  scale = (rmax - rmin) / (qmaxDouble - qminDouble);
-
-  // Zero point computation.
-  // In float, solve the affine equation for any known pair
-  // (real value, corresponding quantized value), of which, two such pairs
-  // are known: (rmin, qmin), (rmax, qmax).
-  // The arithmetic error on the zero point computed from either pair will be
-  // roughly machine_epsilon * (sum of absolute values of terms).
-  // Use the variant that adds the smaller error.
-  const double zeroPointFromMin = qminDouble - rmin / scale;
-  const double zeroPointFromMinError =
-      std::abs(qminDouble) + std::abs(rmin / scale);
-  const double zeroPointFromMax = qmaxDouble - rmax / scale;
-  const double zeroPointFromMaxError =
-      std::abs(qmaxDouble) + std::abs(rmax / scale);
-
-  const double zeroPointDouble = (zeroPointFromMinError < zeroPointFromMaxError)
-                                     ? zeroPointFromMin
-                                     : zeroPointFromMax;
-
-  // Now nudge the zero point to be an integer.
-  nudgedZeroPoint = 0;
-  if (zeroPointDouble < qminDouble) {
-    nudgedZeroPoint = qmin;
-  } else if (zeroPointDouble > qmaxDouble) {
-    nudgedZeroPoint = qmax;
-  } else {
-    nudgedZeroPoint = round(zeroPointDouble);
-  }
-
-  // By construction, the nudged zero point should always be in range.
-  assert(nudgedZeroPoint >= qmin);
-  assert(nudgedZeroPoint <= qmax);
-}
-
-} // end namespace
-
-UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits,
-                                          double rmin, double rmax,
-                                          bool narrowRange, Type expressedType,
-                                          bool isSigned) {
-  MLIRContext *ctx = expressedType.getContext();
-  unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
-  Type storageType;
-  int64_t qmin;
-  int64_t qmax;
-  if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
-                              qmin, qmax)) {
-    return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
-            nullptr);
-  }
-
-  // Special case where min/max is close enough. The tensor contents are all
-  // 0.0s, so the scale is set to 1.0 and the tensor can be quantized to zero
-  // points and dequantized to 0.0.
-  if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
-    return UniformQuantizedType::getChecked(flags, storageType, expressedType,
-                                            1.0, qmin, qmin, qmax, loc);
-  }
-
-  double scale;
-  int64_t nudgedZeroPoint;
-  getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);
-
-  return UniformQuantizedType::getChecked(flags, storageType, expressedType,
-                                          scale, nudgedZeroPoint, qmin, qmax,
-                                          loc);
-}
-
-UniformQuantizedPerAxisType
-fakeQuantAttrsToType(Location loc, unsigned numBits, int32_t quantizedDimension,
-                     ArrayRef<double> rmins, ArrayRef<double> rmaxs,
-                     bool narrowRange, Type expressedType, bool isSigned) {
-  size_t axis_size = rmins.size();
-  if (axis_size != rmaxs.size()) {
-    return (emitError(loc, "mismatched per-axis min and max size: ")
-                << axis_size << " vs. " << rmaxs.size(),
-            nullptr);
-  }
-
-  MLIRContext *ctx = expressedType.getContext();
-  Type storageType;
-  int64_t qmin;
-  int64_t qmax;
-  if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
-                              qmin, qmax)) {
-    return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
-            nullptr);
-  }
-
-  SmallVector<double, 4> scales;
-  SmallVector<int64_t, 4> zeroPoints;
-  scales.reserve(axis_size);
-  zeroPoints.reserve(axis_size);
-  for (size_t axis = 0; axis != axis_size; ++axis) {
-    double rmin = rmins[axis];
-    double rmax = rmaxs[axis];
-    if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
-      scales.push_back(1.0);
-      zeroPoints.push_back(qmin);
-      continue;
-    }
-
-    double scale;
-    int64_t nudgedZeroPoint;
-    getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);
-    scales.push_back(scale);
-    zeroPoints.push_back(nudgedZeroPoint);
-  }
-
-  unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
-  return UniformQuantizedPerAxisType::getChecked(
-      flags, storageType, expressedType, scales, zeroPoints, quantizedDimension,
-      qmin, qmax, loc);
-}
-
-} // namespace quant
-} // namespace mlir
diff --git a/third_party/mlir/lib/Dialect/QuantOps/Utils/QuantizeUtils.cpp b/third_party/mlir/lib/Dialect/QuantOps/Utils/QuantizeUtils.cpp
deleted file mode 100644
index e7a1df97599..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/Utils/QuantizeUtils.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===- QuantizeUtils.cpp - Support utilities for quantization -------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/QuantizeUtils.h"
-#include "mlir/Dialect/QuantOps/UniformSupport.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/StandardTypes.h"
-
-namespace mlir {
-namespace quant {
-/// Converts a possible primitive, real expressed value attribute to a
-/// corresponding storage attribute (typically FloatAttr -> IntegerAttr).
-/// quantizedElementType is the QuantizedType that describes the expressed
-/// origValue.
-/// Returns a converter Attribute or nullptr if conversion is not possible.
-static Attribute convertPrimitiveValueAttr(
-    Attribute origRealValue, QuantizedType quantizedElementType,
-    const UniformQuantizedValueConverter &converter, Type &outConvertedType) {
-  if (origRealValue.isa<FloatAttr>()) {
-    FloatAttr floatAttr = origRealValue.cast<FloatAttr>();
-    outConvertedType = quantizedElementType.getStorageType();
-    return IntegerAttr::get(quantizedElementType.getStorageType(),
-                            converter.quantizeFloatToInt(floatAttr.getValue()));
-  }
-
-  return nullptr;
-}
-
-/// Converts a real expressed DenseFPElementsAttr to a corresponding
-/// DenseElementsAttr (typically DenseIntElementsAttr) containing quantized
-/// storage values assuming the given quantizedElementType and converter.
-static DenseElementsAttr
-convertDenseFPElementsAttr(DenseFPElementsAttr realFPElementsAttr,
-                           QuantizedType quantizedElementType,
-                           const UniformQuantizedValueConverter &converter) {
-  // Convert to corresponding quantized value attributes.
-  SmallVector<APInt, 8> quantValues;
-  if (realFPElementsAttr.isSplat()) {
-    quantValues.push_back(
-        converter.quantizeFloatToInt(*realFPElementsAttr.begin()));
-  } else {
-    quantValues.reserve(realFPElementsAttr.getNumElements());
-    for (APFloat realVal : realFPElementsAttr) {
-      quantValues.push_back(converter.quantizeFloatToInt(realVal));
-    }
-  }
-
-  // Cast from an expressed-type-based type to storage-type-based type,
-  // preserving the dense shape (i.e. tensor<4xf32> -> tensor<4xi8>).
-  ShapedType newDenseType =
-      quantizedElementType
-          .castExpressedToStorageType(realFPElementsAttr.getType())
-          .dyn_cast_or_null<ShapedType>();
-  if (!newDenseType) {
-    return nullptr;
-  }
-  return DenseIntElementsAttr::get(newDenseType, quantValues);
-}
-
-/// Converts a real expressed SplatElementsAttr to a corresponding
-/// SplatElementsAttr containing quantized storage values assuming the given
-/// quantizedElementType and converter.
-static SparseElementsAttr
-convertSparseElementsAttr(SparseElementsAttr realSparseAttr,
-                          QuantizedType quantizedElementType,
-                          const UniformQuantizedValueConverter &converter) {
-  DenseElementsAttr realDenseAttr = realSparseAttr.getValues();
-  if (!realDenseAttr.isa<DenseFPElementsAttr>()) {
-    return nullptr;
-  }
-  DenseElementsAttr quantDenseAttr =
-      convertDenseFPElementsAttr(realDenseAttr.cast<DenseFPElementsAttr>(),
-                                 quantizedElementType, converter);
-  if (!quantDenseAttr) {
-    return nullptr;
-  }
-
-  // Cast from an expressed-type-based type to storage-type-based type,
-  // preserving the sparse shape (i.e. tensor<4xf32> -> tensor<4xi8>).
-  ShapedType newSparseType =
-      quantizedElementType.castExpressedToStorageType(realSparseAttr.getType())
-          .dyn_cast_or_null<ShapedType>();
-  if (!newSparseType) {
-    return nullptr;
-  }
-  return SparseElementsAttr::get(newSparseType, realSparseAttr.getIndices(),
-                                 quantDenseAttr);
-}
-
-/// Converts a real expressed Attribute to a corresponding Attribute containing
-/// quantized storage values assuming the given uniform quantizedElementType and
-/// converter.
-Attribute quantizeAttrUniform(Attribute realValue,
-                              UniformQuantizedType quantizedElementType,
-                              const UniformQuantizedValueConverter &converter,
-                              Type &outConvertedType) {
-  // Fork to handle different variants of constants supported.
-  if (realValue.isa<DenseFPElementsAttr>()) {
-    // Dense tensor or vector constant.
-    auto converted = convertDenseFPElementsAttr(
-        realValue.cast<DenseFPElementsAttr>(), quantizedElementType, converter);
-    outConvertedType = converted.getType();
-    return converted;
-  } else if (realValue.isa<SparseElementsAttr>()) {
-    // Sparse tensor or vector constant.
-    auto converted = convertSparseElementsAttr(
-        realValue.cast<SparseElementsAttr>(), quantizedElementType, converter);
-    outConvertedType = converted.getType();
-    return converted;
-  } else {
-    // Nothing else matched: try to convert a primitive.
-    return convertPrimitiveValueAttr(realValue, quantizedElementType, converter,
-                                     outConvertedType);
-  }
-}
-
-/// Convert an attribute from a type based on
-/// quantizedElementType.getExpressedType() to one based on
-/// quantizedElementType.getStorageType().
-/// Returns nullptr if the conversion is not supported.
-/// On success, stores the converted type in outConvertedType.
-Attribute quantizeAttr(Attribute realValue, QuantizedType quantizedElementType,
-                       Type &outConvertedType) {
-  if (auto uniformQuantized =
-          quantizedElementType.dyn_cast<UniformQuantizedType>()) {
-    UniformQuantizedValueConverter converter(uniformQuantized);
-    return quantizeAttrUniform(realValue, uniformQuantized, converter,
-                               outConvertedType);
-
-  } else if (auto uniformQuantizedPerAxis =
-                 quantizedElementType.dyn_cast<UniformQuantizedPerAxisType>()) {
-    UniformQuantizedPerAxisValueConverter converter(uniformQuantizedPerAxis);
-    auto converted = converter.convert(realValue);
-    // TODO(fengliuai): why we need this outConvertedType? remove it?
-    if (converted) {
-      outConvertedType = converted.getType();
-    }
-    return converted;
-  } else {
-    return nullptr;
-  }
-}
-
-} // namespace quant
-} // namespace mlir
diff --git a/third_party/mlir/lib/Dialect/QuantOps/Utils/UniformSupport.cpp b/third_party/mlir/lib/Dialect/QuantOps/Utils/UniformSupport.cpp
deleted file mode 100644
index 34e767dfee3..00000000000
--- a/third_party/mlir/lib/Dialect/QuantOps/Utils/UniformSupport.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//===- UniformSupport.cpp - Support utilities for uniform quant -----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/QuantOps/UniformSupport.h"
-#include "mlir/IR/StandardTypes.h"
-#include <numeric>
-
-using namespace mlir;
-using namespace mlir::quant;
-
-static bool isQuantizablePrimitiveType(Type inputType) {
-  return inputType.isa<FloatType>();
-}
-
-const ExpressedToQuantizedConverter
-ExpressedToQuantizedConverter::forInputType(Type inputType) {
-  switch (inputType.getKind()) {
-  default:
-    if (isQuantizablePrimitiveType(inputType)) {
-      // Supported primitive type (which just is the expressed type).
-      return ExpressedToQuantizedConverter{inputType, inputType};
-    }
-    // Unsupported.
-    return ExpressedToQuantizedConverter{inputType, nullptr};
-  case StandardTypes::RankedTensor:
-  case StandardTypes::UnrankedTensor:
-  case StandardTypes::Vector: {
-    Type elementType = inputType.cast<ShapedType>().getElementType();
-    if (!isQuantizablePrimitiveType(elementType)) {
-      // Unsupported.
-      return ExpressedToQuantizedConverter{inputType, nullptr};
-    }
-    return ExpressedToQuantizedConverter{
-        inputType, inputType.cast<ShapedType>().getElementType()};
-  }
-  }
-}
-
-Type ExpressedToQuantizedConverter::convert(QuantizedType elementalType) const {
-  assert(expressedType && "convert() on unsupported conversion");
-
-  switch (inputType.getKind()) {
-  default:
-    if (isQuantizablePrimitiveType(elementalType)) {
-      // For primitives, just use the new elemental type.
-      return elementalType;
-    }
-    // Unsupported.
-    return nullptr;
-  case StandardTypes::RankedTensor:
-    return RankedTensorType::get(inputType.cast<RankedTensorType>().getShape(),
-                                 elementalType);
-  case StandardTypes::UnrankedTensor:
-    return UnrankedTensorType::get(elementalType);
-  case StandardTypes::Vector:
-    return VectorType::get(inputType.cast<VectorType>().getShape(),
-                           elementalType);
-  }
-}
-
-ElementsAttr
-UniformQuantizedPerAxisValueConverter::convert(Attribute realValue) {
-  if (auto attr = realValue.dyn_cast<DenseFPElementsAttr>()) {
-    return convert(attr);
-  }
-  // TODO(fengliuai): handles sparse elements attribute
-  return nullptr;
-}
-
-DenseElementsAttr
-UniformQuantizedPerAxisValueConverter::convert(DenseFPElementsAttr attr) {
-  // Creates the converter for each chunk. Normally the size of the
-  // quantization dim is 3, so we can cache all the converters.
-  ShapedType type = attr.getType();
-  size_t dimSize = type.getDimSize(quantizationDim);
-  if (dimSize != scales.size()) {
-    return {};
-  }
-  SmallVector<UniformQuantizedValueConverter, 4> converters;
-  converters.reserve(dimSize);
-  for (int i = 0, e = dimSize; i != e; ++i) {
-    converters.push_back(getPerChunkConverter(i));
-  }
-
-  // Scan the elements of the dense elements attributes and quantize them by
-  // using the right quantization parameters.
-  int64_t flattenIndex = 0;
-  auto shape = type.getShape();
-  int64_t chunkSize =
-      std::accumulate(std::next(shape.begin(), quantizationDim + 1),
-                      shape.end(), 1, std::multiplies<int64_t>());
-  Type newElementType = IntegerType::get(storageBitWidth, attr.getContext());
-  return attr.mapValues(newElementType, [&](const APFloat &old) {
-    int chunkIndex = (flattenIndex++) / chunkSize;
-    return converters[chunkIndex % dimSize].quantizeFloatToInt(old);
-  });
-}
diff --git a/third_party/mlir/lib/Dialect/SDBM/CMakeLists.txt b/third_party/mlir/lib/Dialect/SDBM/CMakeLists.txt
deleted file mode 100644
index e36308e0eda..00000000000
--- a/third_party/mlir/lib/Dialect/SDBM/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_llvm_library(MLIRSDBM
-  SDBM.cpp
-  SDBMExpr.cpp
-  SDBMDialect.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SDBM
-)
-add_dependencies(MLIRSDBM MLIRIR)
-target_link_libraries(MLIRSDBM MLIRIR)
diff --git a/third_party/mlir/lib/Dialect/SDBM/SDBM.cpp b/third_party/mlir/lib/Dialect/SDBM/SDBM.cpp
deleted file mode 100644
index ec3c7f3433a..00000000000
--- a/third_party/mlir/lib/Dialect/SDBM/SDBM.cpp
+++ /dev/null
@@ -1,561 +0,0 @@
-//===- SDBM.cpp - MLIR SDBM implementation --------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// A striped difference-bound matrix (SDBM) is a set in Z^N (or R^N) defined
-// as {(x_1, ... x_n) | f(x_1, ... x_n) >= 0} where f is an SDBM expression.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SDBM/SDBM.h"
-#include "mlir/Dialect/SDBM/SDBMExpr.h"
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-// Helper function for SDBM construction that collects information necessary to
-// start building an SDBM in one sweep.  In particular, it records the largest
-// position of a dimension in `dim`, that of a symbol in `symbol` as well as
-// collects all unique stripe expressions in `stripes`.  Uses SetVector to
-// ensure these expressions always have the same order.
-static void collectSDBMBuildInfo(SDBMExpr expr, int &dim, int &symbol,
-                                 llvm::SmallSetVector<SDBMExpr, 8> &stripes) {
-  struct Visitor : public SDBMVisitor<Visitor> {
-    void visitDim(SDBMDimExpr dimExpr) {
-      int p = dimExpr.getPosition();
-      if (p > maxDimPosition)
-        maxDimPosition = p;
-    }
-    void visitSymbol(SDBMSymbolExpr symbExpr) {
-      int p = symbExpr.getPosition();
-      if (p > maxSymbPosition)
-        maxSymbPosition = p;
-    }
-    void visitStripe(SDBMStripeExpr stripeExpr) { stripes.insert(stripeExpr); }
-
-    Visitor(llvm::SmallSetVector<SDBMExpr, 8> &stripes) : stripes(stripes) {}
-
-    int maxDimPosition = -1;
-    int maxSymbPosition = -1;
-    llvm::SmallSetVector<SDBMExpr, 8> &stripes;
-  };
-
-  Visitor visitor(stripes);
-  visitor.walkPostorder(expr);
-  dim = std::max(dim, visitor.maxDimPosition);
-  symbol = std::max(symbol, visitor.maxSymbPosition);
-}
-
-namespace {
-// Utility class for SDBMBuilder.  Represents a value that can be inserted in
-// the SDB matrix that corresponds to "v0 - v1 + C <= 0", where v0 and v1 is
-// any combination of the positive and negative positions.  Since multiple
-// variables can be declared equal to the same stripe expression, the
-// constraints on this expression must be reflected to all these variables.  For
-// example, if
-//   d0 = s0 # 42
-//   d1 = s0 # 42
-//   d2 = s1 # 2
-//   d3 = s1 # 2
-// the constraint
-//   s0 # 42 - s1 # 2 <= C
-// should be reflected in the DB matrix as
-//   d0 - d2 <= C
-//   d1 - d2 <= C
-//   d0 - d3 <= C
-//   d1 - d3 <= C
-// since the DB matrix has no knowledge of the transitive equality between d0,
-// d1 and s0 # 42 as well as between d2, d3 and s1 # 2.  This knowledge can be
-// obtained by computing a transitive closure, which is impossible until the
-// DBM is actually built.
-struct SDBMBuilderResult {
-  // Positions in the matrix of the variables taken with the "+" sign in the
-  // difference expression, 0 if it is a constant rather than a variable.
-  llvm::SmallVector<unsigned, 2> positivePos;
-
-  // Positions in the matrix of the variables taken with the "-" sign in the
-  // difference expression, 0 if it is a constant rather than a variable.
-  llvm::SmallVector<unsigned, 2> negativePos;
-
-  // Constant value in the difference expression.
-  int64_t value = 0;
-};
-
-// Visitor for building an SDBM from SDBM expressions.  After traversing an SDBM
-// expression, produces an update to the SDB matrix specifying the positions in
-// the matrix and the negated value that should be stored.  Both the positive
-// and the negative positions may be lists of indices in cases where multiple
-// variables are equal to the same stripe expression.  In such cases, the update
-// applies to the cross product of positions because elements involved in the
-// update are (transitively) equal and should have the same constraints, but we
-// may not have an explicit equality for them.
-struct SDBMBuilder : public SDBMVisitor<SDBMBuilder, SDBMBuilderResult> {
-public:
-  // A difference expression produces both the positive and the negative
-  // coordinate in the matrix, recursively traversing the LHS and the RHS. The
-  // value is the difference between values obtained from LHS and RHS.
-  SDBMBuilderResult visitDiff(SDBMDiffExpr diffExpr) {
-    auto lhs = visit(diffExpr.getLHS());
-    auto rhs = visit(diffExpr.getRHS());
-    assert(lhs.negativePos.size() == 1 && lhs.negativePos[0] == 0 &&
-           "unexpected negative expression in a difference expression");
-    assert(rhs.negativePos.size() == 1 && lhs.negativePos[0] == 0 &&
-           "unexpected negative expression in a difference expression");
-
-    SDBMBuilderResult result;
-    result.positivePos = lhs.positivePos;
-    result.negativePos = rhs.positivePos;
-    result.value = lhs.value - rhs.value;
-    return result;
-  }
-
-  // An input expression is always taken with the "+" sign and therefore
-  // produces a positive coordinate keeping the negative coordinate zero for an
-  // eventual constant.
-  SDBMBuilderResult visitInput(SDBMInputExpr expr) {
-    SDBMBuilderResult r;
-    r.positivePos.push_back(linearPosition(expr));
-    r.negativePos.push_back(0);
-    return r;
-  }
-
-  // A stripe expression is always equal to one or more variables, which may be
-  // temporaries, and appears with a "+" sign in the SDBM expression tree. Take
-  // the positions of the corresponding variables as positive coordinates.
-  SDBMBuilderResult visitStripe(SDBMStripeExpr expr) {
-    SDBMBuilderResult r;
-    assert(pointExprToStripe.count(expr));
-    r.positivePos = pointExprToStripe[expr];
-    r.negativePos.push_back(0);
-    return r;
-  }
-
-  // A constant expression has both coordinates at zero.
-  SDBMBuilderResult visitConstant(SDBMConstantExpr expr) {
-    SDBMBuilderResult r;
-    r.positivePos.push_back(0);
-    r.negativePos.push_back(0);
-    r.value = expr.getValue();
-    return r;
-  }
-
-  // A negation expression swaps the positive and the negative coordinates
-  // and also negates the constant value.
-  SDBMBuilderResult visitNeg(SDBMNegExpr expr) {
-    SDBMBuilderResult result = visit(expr.getVar());
-    std::swap(result.positivePos, result.negativePos);
-    result.value = -result.value;
-    return result;
-  }
-
-  // The RHS of a sum expression must be a constant and therefore must have both
-  // positive and negative coordinates at zero.  Take the sum of the values
-  // between LHS and RHS and keep LHS coordinates.
-  SDBMBuilderResult visitSum(SDBMSumExpr expr) {
-    auto lhs = visit(expr.getLHS());
-    auto rhs = visit(expr.getRHS());
-    for (auto pos : rhs.negativePos) {
-      (void)pos;
-      assert(pos == 0 && "unexpected variable on the RHS of SDBM sum");
-    }
-    for (auto pos : rhs.positivePos) {
-      (void)pos;
-      assert(pos == 0 && "unexpected variable on the RHS of SDBM sum");
-    }
-
-    lhs.value += rhs.value;
-    return lhs;
-  }
-
-  SDBMBuilder(llvm::DenseMap<SDBMExpr, llvm::SmallVector<unsigned, 2>>
-                  &pointExprToStripe,
-              llvm::function_ref<unsigned(SDBMInputExpr)> callback)
-      : pointExprToStripe(pointExprToStripe), linearPosition(callback) {}
-
-  llvm::DenseMap<SDBMExpr, llvm::SmallVector<unsigned, 2>> &pointExprToStripe;
-  llvm::function_ref<unsigned(SDBMInputExpr)> linearPosition;
-};
-} // namespace
-
-SDBM SDBM::get(ArrayRef<SDBMExpr> inequalities, ArrayRef<SDBMExpr> equalities) {
-  SDBM result;
-
-  // TODO(zinenko): consider detecting equalities in the list of inequalities.
-  // This is potentially expensive and requires to
-  //   - create a list of negated inequalities (may allocate under lock);
-  //   - perform a pairwise comparison of direct and negated inequalities;
-  //   - copy the lists of equalities and inequalities, and move entries between
-  //     them;
-  // only for the purpose of sparing a temporary variable in cases where an
-  // implicit equality between a variable and a stripe expression is present in
-  // the input.
-
-  // Do the first sweep over (in)equalities to collect the information necessary
-  // to allocate the SDB matrix (number of dimensions, symbol and temporary
-  // variables required for stripe expressions).
-  llvm::SmallSetVector<SDBMExpr, 8> stripes;
-  int maxDim = -1;
-  int maxSymbol = -1;
-  for (auto expr : inequalities)
-    collectSDBMBuildInfo(expr, maxDim, maxSymbol, stripes);
-  for (auto expr : equalities)
-    collectSDBMBuildInfo(expr, maxDim, maxSymbol, stripes);
-  // Indexing of dimensions starts with 0, obtain the number of dimensions by
-  // incrementing the maximal position of the dimension seen in expressions.
-  result.numDims = maxDim + 1;
-  result.numSymbols = maxSymbol + 1;
-  result.numTemporaries = 0;
-
-  // Helper function that returns the position of the variable represented by
-  // an SDBM input expression.
-  auto linearPosition = [result](SDBMInputExpr expr) {
-    if (expr.isa<SDBMDimExpr>())
-      return result.getDimPosition(expr.getPosition());
-    return result.getSymbolPosition(expr.getPosition());
-  };
-
-  // Check if some stripe expressions are equal to another variable. In
-  // particular, look for the equalities of the form
-  //   d0 - stripe-expression = 0, or
-  //   stripe-expression - d0 = 0.
-  // There may be multiple variables that are equal to the same stripe
-  // expression.  Keep track of those in pointExprToStripe.
-  // There may also be multiple stripe expressions equal to the same variable.
-  // Introduce a temporary variable for each of those.
-  llvm::DenseMap<SDBMExpr, llvm::SmallVector<unsigned, 2>> pointExprToStripe;
-  unsigned numTemporaries = 0;
-
-  auto updateStripePointMaps = [&numTemporaries, &result, &pointExprToStripe,
-                                linearPosition](SDBMInputExpr input,
-                                                SDBMExpr expr) {
-    unsigned position = linearPosition(input);
-    if (result.stripeToPoint.count(position) &&
-        result.stripeToPoint[position] != expr) {
-      position = result.getNumVariables() + numTemporaries++;
-    }
-    pointExprToStripe[expr].push_back(position);
-    result.stripeToPoint.insert(std::make_pair(position, expr));
-  };
-
-  for (auto eq : equalities) {
-    auto diffExpr = eq.dyn_cast<SDBMDiffExpr>();
-    if (!diffExpr)
-      continue;
-
-    auto lhs = diffExpr.getLHS();
-    auto rhs = diffExpr.getRHS();
-    auto lhsInput = lhs.dyn_cast<SDBMInputExpr>();
-    auto rhsInput = rhs.dyn_cast<SDBMInputExpr>();
-
-    if (lhsInput && stripes.count(rhs))
-      updateStripePointMaps(lhsInput, rhs);
-    if (rhsInput && stripes.count(lhs))
-      updateStripePointMaps(rhsInput, lhs);
-  }
-
-  // Assign the remaining stripe expressions to temporary variables.  These
-  // expressions are the ones that could not be associated with an existing
-  // variable in the previous step.
-  for (auto expr : stripes) {
-    if (pointExprToStripe.count(expr))
-      continue;
-    unsigned position = result.getNumVariables() + numTemporaries++;
-    pointExprToStripe[expr].push_back(position);
-    result.stripeToPoint.insert(std::make_pair(position, expr));
-  }
-
-  // Create the DBM matrix, initialized to infinity values for the least tight
-  // possible bound (x - y <= infinity is always true).
-  result.numTemporaries = numTemporaries;
-  result.matrix.resize(result.getNumVariables() * result.getNumVariables(),
-                       IntInfty::infinity());
-
-  SDBMBuilder builder(pointExprToStripe, linearPosition);
-
-  // Only keep the tightest constraint.  Since we transform everything into
-  // less-than-or-equals-to inequalities, keep the smallest constant.  For
-  // example, if we have d0 - d1 <= 42 and d0 - d1 <= 2, we keep the latter.
-  // Note that the input expressions are in the shape of d0 - d1 + -42 <= 0
-  // so we negate the value before storing it.
-  // In case where the positive and the negative positions are equal, the
-  // corresponding expression has the form d0 - d0 + -42 <= 0.  If the constant
-  // value is positive, the set defined by SDBM is trivially empty.  We store
-  // this value anyway and continue processing to maintain the correspondence
-  // between the matrix form and the list-of-SDBMExpr form.
-  // TODO(zinenko): we may want to reconsider this once we have canonicalization
-  // or simplification in place
-  auto updateMatrix = [](SDBM &sdbm, const SDBMBuilderResult &r) {
-    for (auto positivePos : r.positivePos) {
-      for (auto negativePos : r.negativePos) {
-        auto &m = sdbm.at(negativePos, positivePos);
-        m = m < -r.value ? m : -r.value;
-      }
-    }
-  };
-
-  // Do the second sweep on (in)equalities, updating the SDB matrix to reflect
-  // the constraints.
-  for (auto ineq : inequalities)
-    updateMatrix(result, builder.visit(ineq));
-
-  // An equality f(x) = 0 is represented as a pair of inequalities {f(x) >= 0;
-  // f(x) <= 0} or, alternatively, {-f(x) <= 0 and f(x) <= 0}.
-  for (auto eq : equalities) {
-    updateMatrix(result, builder.visit(eq));
-    updateMatrix(result, builder.visit(-eq));
-  }
-
-  // Add the inequalities induced by stripe equalities.
-  //   t = x # C  =>  t <= x <= t + C - 1
-  // which is equivalent to
-  //   {t - x <= 0;
-  //    x - t - (C - 1) <= 0}.
-  for (const auto &pair : result.stripeToPoint) {
-    auto stripe = pair.second.cast<SDBMStripeExpr>();
-    SDBMBuilderResult update = builder.visit(stripe.getLHS());
-    assert(update.negativePos.size() == 1 && update.negativePos[0] == 0 &&
-           "unexpected negated variable in stripe expression");
-    assert(update.value == 0 &&
-           "unexpected non-zero value in stripe expression");
-    update.negativePos.clear();
-    update.negativePos.push_back(pair.first);
-    update.value = -(stripe.getStripeFactor().getValue() - 1);
-    updateMatrix(result, update);
-
-    std::swap(update.negativePos, update.positivePos);
-    update.value = 0;
-    updateMatrix(result, update);
-  }
-
-  return result;
-}
-
-// Given a row and a column position in the square DBM, insert one equality
-// or up to two inequalities that correspond the entries (col, row) and (row,
-// col) in the DBM.  `rowExpr` and `colExpr` contain the expressions such that
-// colExpr - rowExpr <= V where V is the value at (row, col) in the DBM.
-// If one of the expressions is derived from another using a stripe operation,
-// check if the inequalities induced by the stripe operation subsume the
-// inequalities defined in the DBM and if so, elide these inequalities.
-void SDBM::convertDBMElement(unsigned row, unsigned col, SDBMTermExpr rowExpr,
-                             SDBMTermExpr colExpr,
-                             SmallVectorImpl<SDBMExpr> &inequalities,
-                             SmallVectorImpl<SDBMExpr> &equalities) {
-  using ops_assertions::operator+;
-  using ops_assertions::operator-;
-
-  auto diffIJValue = at(col, row);
-  auto diffJIValue = at(row, col);
-
-  // If symmetric entries are opposite, the corresponding expressions are equal.
-  if (diffIJValue.isFinite() &&
-      diffIJValue.getValue() == -diffJIValue.getValue()) {
-    equalities.push_back(rowExpr - colExpr - diffIJValue.getValue());
-    return;
-  }
-
-  // Given an inequality x0 - x1 <= A, check if x0 is a stripe variable derived
-  // from x1: x0 = x1 # B.  If so, it would imply the constraints
-  // x0 <= x1 <= x0 + (B - 1) <=> x0 - x1 <= 0 and x1 - x0 <= (B - 1).
-  // Therefore, if A >= 0, this inequality is subsumed by that implied
-  // by the stripe equality and thus can be elided.
-  // Similarly, check if x1 is a stripe variable derived from x0: x1 = x0 # C.
-  // If so, it would imply the constraints x1 <= x0 <= x1 + (C - 1) <=>
-  // <=> x1 - x0 <= 0 and x0 - x1 <= (C - 1).  Therefore, if A >= (C - 1), this
-  // inequality can be elided.
-  //
-  // Note: x0 and x1 may be a stripe expressions themselves, we rely on stripe
-  // expressions being stored without temporaries on the RHS and being passed
-  // into this function as is.
-  auto canElide = [this](unsigned x0, unsigned x1, SDBMExpr x0Expr,
-                         SDBMExpr x1Expr, int64_t value) {
-    if (stripeToPoint.count(x0)) {
-      auto stripe = stripeToPoint[x0].cast<SDBMStripeExpr>();
-      SDBMDirectExpr var = stripe.getLHS();
-      if (x1Expr == var && value >= 0)
-        return true;
-    }
-    if (stripeToPoint.count(x1)) {
-      auto stripe = stripeToPoint[x1].cast<SDBMStripeExpr>();
-      SDBMDirectExpr var = stripe.getLHS();
-      if (x0Expr == var && value >= stripe.getStripeFactor().getValue() - 1)
-        return true;
-    }
-    return false;
-  };
-
-  // Check row - col.
-  if (diffIJValue.isFinite() &&
-      !canElide(row, col, rowExpr, colExpr, diffIJValue.getValue())) {
-    inequalities.push_back(rowExpr - colExpr - diffIJValue.getValue());
-  }
-  // Check col - row.
-  if (diffJIValue.isFinite() &&
-      !canElide(col, row, colExpr, rowExpr, diffJIValue.getValue())) {
-    inequalities.push_back(colExpr - rowExpr - diffJIValue.getValue());
-  }
-}
-
-// The values on the main diagonal correspond to the upper bound on the
-// difference between a variable and itself: d0 - d0 <= C, or alternatively
-// to -C <= 0.  Only construct the inequalities when C is negative, which
-// are trivially false but necessary for the returned system of inequalities
-// to indicate that the set it defines is empty.
-void SDBM::convertDBMDiagonalElement(unsigned pos, SDBMTermExpr expr,
-                                     SmallVectorImpl<SDBMExpr> &inequalities) {
-  auto selfDifference = at(pos, pos);
-  if (selfDifference.isFinite() && selfDifference < 0) {
-    auto selfDifferenceValueExpr =
-        SDBMConstantExpr::get(expr.getDialect(), -selfDifference.getValue());
-    inequalities.push_back(selfDifferenceValueExpr);
-  }
-}
-
-void SDBM::getSDBMExpressions(SDBMDialect *dialect,
-                              SmallVectorImpl<SDBMExpr> &inequalities,
-                              SmallVectorImpl<SDBMExpr> &equalities) {
-  using ops_assertions::operator-;
-  using ops_assertions::operator+;
-
-  // Helper function that creates an SDBMInputExpr given the linearized position
-  // of variable in the DBM.
-  auto getInput = [dialect, this](unsigned matrixPos) -> SDBMInputExpr {
-    if (matrixPos < numDims)
-      return SDBMDimExpr::get(dialect, matrixPos);
-    return SDBMSymbolExpr::get(dialect, matrixPos - numDims);
-  };
-
-  // The top-left value corresponds to inequality 0 <= C.  If C is negative, the
-  // set defined by SDBM is trivially empty and we add the constraint -C <= 0 to
-  // the list of inequalities.  Otherwise, the constraint is trivially true and
-  // we ignore it.
-  auto difference = at(0, 0);
-  if (difference.isFinite() && difference < 0) {
-    inequalities.push_back(
-        SDBMConstantExpr::get(dialect, -difference.getValue()));
-  }
-
-  // Traverse the segment of the matrix that involves non-temporary variables.
-  unsigned numTrueVariables = numDims + numSymbols;
-  for (unsigned i = 0; i < numTrueVariables; ++i) {
-    // The first row and column represent numerical upper and lower bound on
-    // each variable.  Transform them into inequalities if they are finite.
-    auto upperBound = at(0, 1 + i);
-    auto lowerBound = at(1 + i, 0);
-    auto inputExpr = getInput(i);
-    if (upperBound.isFinite() &&
-        upperBound.getValue() == -lowerBound.getValue()) {
-      equalities.push_back(inputExpr - upperBound.getValue());
-    } else if (upperBound.isFinite()) {
-      inequalities.push_back(inputExpr - upperBound.getValue());
-    } else if (lowerBound.isFinite()) {
-      inequalities.push_back(-inputExpr - lowerBound.getValue());
-    }
-
-    // Introduce trivially false inequalities if required by diagonal elements.
-    convertDBMDiagonalElement(1 + i, inputExpr, inequalities);
-
-    // Introduce equalities or inequalities between non-temporary variables.
-    for (unsigned j = 0; j < i; ++j) {
-      convertDBMElement(1 + i, 1 + j, getInput(i), getInput(j), inequalities,
-                        equalities);
-    }
-  }
-
-  // Add equalities for stripe expressions that define non-temporary
-  // variables.  Temporary variables will be substituted into their uses and
-  // should not appear in the resulting equalities.
-  for (const auto &stripePair : stripeToPoint) {
-    unsigned position = stripePair.first;
-    if (position < 1 + numTrueVariables) {
-      equalities.push_back(getInput(position - 1) - stripePair.second);
-    }
-  }
-
-  // Add equalities / inequalities involving temporaries by replacing the
-  // temporaries with stripe expressions that define them.
-  for (unsigned i = 1 + numTrueVariables, e = getNumVariables(); i < e; ++i) {
-    // Mixed constraints involving one temporary (j) and one non-temporary (i)
-    // variable.
-    for (unsigned j = 0; j < numTrueVariables; ++j) {
-      convertDBMElement(i, 1 + j, stripeToPoint[i].cast<SDBMStripeExpr>(),
-                        getInput(j), inequalities, equalities);
-    }
-
-    // Constraints involving only temporary variables.
-    for (unsigned j = 1 + numTrueVariables; j < i; ++j) {
-      convertDBMElement(i, j, stripeToPoint[i].cast<SDBMStripeExpr>(),
-                        stripeToPoint[j].cast<SDBMStripeExpr>(), inequalities,
-                        equalities);
-    }
-
-    // Introduce trivially false inequalities if required by diagonal elements.
-    convertDBMDiagonalElement(i, stripeToPoint[i].cast<SDBMStripeExpr>(),
-                              inequalities);
-  }
-}
-
-void SDBM::print(llvm::raw_ostream &os) {
-  unsigned numVariables = getNumVariables();
-
-  // Helper function that prints the name of the variable given its linearized
-  // position in the DBM.
-  auto getVarName = [this](unsigned matrixPos) -> std::string {
-    if (matrixPos == 0)
-      return "cst";
-    matrixPos -= 1;
-    if (matrixPos < numDims)
-      return llvm::formatv("d{0}", matrixPos);
-    matrixPos -= numDims;
-    if (matrixPos < numSymbols)
-      return llvm::formatv("s{0}", matrixPos);
-    matrixPos -= numSymbols;
-    return llvm::formatv("t{0}", matrixPos);
-  };
-
-  // Header row.
-  os << "      cst";
-  for (unsigned i = 1; i < numVariables; ++i) {
-    os << llvm::formatv(" {0,4}", getVarName(i));
-  }
-  os << '\n';
-
-  // Data rows.
-  for (unsigned i = 0; i < numVariables; ++i) {
-    os << llvm::formatv("{0,-4}", getVarName(i));
-    for (unsigned j = 0; j < numVariables; ++j) {
-      IntInfty value = operator()(i, j);
-      if (!value.isFinite())
-        os << "  inf";
-      else
-        os << llvm::formatv(" {0,4}", value.getValue());
-    }
-    os << '\n';
-  }
-
-  // Explanation of temporaries.
-  for (const auto &pair : stripeToPoint) {
-    os << getVarName(pair.first) << " = ";
-    pair.second.print(os);
-    os << '\n';
-  }
-}
-
-void SDBM::dump() { print(llvm::errs()); }
diff --git a/third_party/mlir/lib/Dialect/SDBM/SDBMDialect.cpp b/third_party/mlir/lib/Dialect/SDBM/SDBMDialect.cpp
deleted file mode 100644
index d3d895fec88..00000000000
--- a/third_party/mlir/lib/Dialect/SDBM/SDBMDialect.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===- SDBMDialect.cpp - Dialect for striped difference-bound matrices ----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/SDBM/SDBMDialect.h"
-
-static mlir::DialectRegistration<mlir::SDBMDialect> SDBMDialect;
diff --git a/third_party/mlir/lib/Dialect/SDBM/SDBMExpr.cpp b/third_party/mlir/lib/Dialect/SDBM/SDBMExpr.cpp
deleted file mode 100644
index 8f6b59d8e45..00000000000
--- a/third_party/mlir/lib/Dialect/SDBM/SDBMExpr.cpp
+++ /dev/null
@@ -1,749 +0,0 @@
-//===- SDBMExpr.cpp - MLIR SDBM Expression implementation -----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// A striped difference-bound matrix (SDBM) expression is a constant expression,
-// an identifier, a binary expression with constant RHS and +, stripe operators
-// or a difference expression between two identifiers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SDBM/SDBMExpr.h"
-#include "SDBMExprDetail.h"
-#include "mlir/Dialect/SDBM/SDBMDialect.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineExprVisitor.h"
-
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-namespace {
-/// A simple compositional matcher for AffineExpr
-///
-/// Example usage:
-///
-/// ```c++
-///    AffineExprMatcher x, C, m;
-///    AffineExprMatcher pattern1 = ((x % C) * m) + x;
-///    AffineExprMatcher pattern2 = x + ((x % C) * m);
-///    if (pattern1.match(expr) || pattern2.match(expr)) {
-///      ...
-///    }
-/// ```
-class AffineExprMatcherStorage;
-class AffineExprMatcher {
-public:
-  AffineExprMatcher();
-  AffineExprMatcher(const AffineExprMatcher &other);
-
-  AffineExprMatcher operator+(AffineExprMatcher other) {
-    return AffineExprMatcher(AffineExprKind::Add, *this, other);
-  }
-  AffineExprMatcher operator*(AffineExprMatcher other) {
-    return AffineExprMatcher(AffineExprKind::Mul, *this, other);
-  }
-  AffineExprMatcher floorDiv(AffineExprMatcher other) {
-    return AffineExprMatcher(AffineExprKind::FloorDiv, *this, other);
-  }
-  AffineExprMatcher ceilDiv(AffineExprMatcher other) {
-    return AffineExprMatcher(AffineExprKind::CeilDiv, *this, other);
-  }
-  AffineExprMatcher operator%(AffineExprMatcher other) {
-    return AffineExprMatcher(AffineExprKind::Mod, *this, other);
-  }
-
-  AffineExpr match(AffineExpr expr);
-  AffineExpr matched();
-  Optional<int> getMatchedConstantValue();
-
-private:
-  AffineExprMatcher(AffineExprKind k, AffineExprMatcher a, AffineExprMatcher b);
-  AffineExprKind kind; // only used to match in binary op cases.
-  // A shared_ptr allows multiple references to same matcher storage without
-  // worrying about ownership or dealing with an arena. To be cleaned up if we
-  // go with this.
-  std::shared_ptr<AffineExprMatcherStorage> storage;
-};
-
-class AffineExprMatcherStorage {
-public:
-  AffineExprMatcherStorage() {}
-  AffineExprMatcherStorage(const AffineExprMatcherStorage &other)
-      : subExprs(other.subExprs.begin(), other.subExprs.end()),
-        matched(other.matched) {}
-  AffineExprMatcherStorage(ArrayRef<AffineExprMatcher> exprs)
-      : subExprs(exprs.begin(), exprs.end()) {}
-  AffineExprMatcherStorage(AffineExprMatcher &a, AffineExprMatcher &b)
-      : subExprs({a, b}) {}
-  llvm::SmallVector<AffineExprMatcher, 0> subExprs;
-  AffineExpr matched;
-};
-} // namespace
-
-AffineExprMatcher::AffineExprMatcher()
-    : kind(AffineExprKind::Constant), storage(new AffineExprMatcherStorage()) {}
-
-AffineExprMatcher::AffineExprMatcher(const AffineExprMatcher &other)
-    : kind(other.kind), storage(other.storage) {}
-
-Optional<int> AffineExprMatcher::getMatchedConstantValue() {
-  if (auto cst = storage->matched.dyn_cast<AffineConstantExpr>())
-    return cst.getValue();
-  return None;
-}
-
-AffineExpr AffineExprMatcher::match(AffineExpr expr) {
-  if (kind > AffineExprKind::LAST_AFFINE_BINARY_OP) {
-    if (storage->matched)
-      if (storage->matched != expr)
-        return AffineExpr();
-    storage->matched = expr;
-    return storage->matched;
-  }
-  if (kind != expr.getKind()) {
-    return AffineExpr();
-  }
-  if (auto bin = expr.dyn_cast<AffineBinaryOpExpr>()) {
-    if (!storage->subExprs.empty() &&
-        !storage->subExprs[0].match(bin.getLHS())) {
-      return AffineExpr();
-    }
-    if (!storage->subExprs.empty() &&
-        !storage->subExprs[1].match(bin.getRHS())) {
-      return AffineExpr();
-    }
-    if (storage->matched)
-      if (storage->matched != expr)
-        return AffineExpr();
-    storage->matched = expr;
-    return storage->matched;
-  }
-  llvm_unreachable("binary expected");
-}
-
-AffineExpr AffineExprMatcher::matched() { return storage->matched; }
-
-AffineExprMatcher::AffineExprMatcher(AffineExprKind k, AffineExprMatcher a,
-                                     AffineExprMatcher b)
-    : kind(k), storage(new AffineExprMatcherStorage(a, b)) {
-  storage->subExprs.push_back(a);
-  storage->subExprs.push_back(b);
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMExpr
-//===----------------------------------------------------------------------===//
-
-SDBMExprKind SDBMExpr::getKind() const { return impl->getKind(); }
-
-MLIRContext *SDBMExpr::getContext() const {
-  return impl->dialect->getContext();
-}
-
-SDBMDialect *SDBMExpr::getDialect() const { return impl->dialect; }
-
-void SDBMExpr::print(raw_ostream &os) const {
-  struct Printer : public SDBMVisitor<Printer> {
-    Printer(raw_ostream &ostream) : prn(ostream) {}
-
-    void visitSum(SDBMSumExpr expr) {
-      visit(expr.getLHS());
-      prn << " + ";
-      visit(expr.getRHS());
-    }
-    void visitDiff(SDBMDiffExpr expr) {
-      visit(expr.getLHS());
-      prn << " - ";
-      visit(expr.getRHS());
-    }
-    void visitDim(SDBMDimExpr expr) { prn << 'd' << expr.getPosition(); }
-    void visitSymbol(SDBMSymbolExpr expr) { prn << 's' << expr.getPosition(); }
-    void visitStripe(SDBMStripeExpr expr) {
-      SDBMDirectExpr lhs = expr.getLHS();
-      bool isTerm = lhs.isa<SDBMTermExpr>();
-      if (!isTerm)
-        prn << '(';
-      visit(lhs);
-      if (!isTerm)
-        prn << ')';
-      prn << " # ";
-      visitConstant(expr.getStripeFactor());
-    }
-    void visitNeg(SDBMNegExpr expr) {
-      bool isSum = expr.getVar().isa<SDBMSumExpr>();
-      prn << '-';
-      if (isSum)
-        prn << '(';
-      visit(expr.getVar());
-      if (isSum)
-        prn << ')';
-    }
-    void visitConstant(SDBMConstantExpr expr) { prn << expr.getValue(); }
-
-    raw_ostream &prn;
-  };
-  Printer printer(os);
-  printer.visit(*this);
-}
-
-void SDBMExpr::dump() const {
-  print(llvm::errs());
-  llvm::errs() << '\n';
-}
-
-namespace {
-// Helper class to perform negation of an SDBM expression.
-struct SDBMNegator : public SDBMVisitor<SDBMNegator, SDBMExpr> {
-  // Any term expression is wrapped into a negation expression.
-  //  -(x) = -x
-  SDBMExpr visitDirect(SDBMDirectExpr expr) { return SDBMNegExpr::get(expr); }
-  // A negation expression is unwrapped.
-  //  -(-x) = x
-  SDBMExpr visitNeg(SDBMNegExpr expr) { return expr.getVar(); }
-  // The value of the constant is negated.
-  SDBMExpr visitConstant(SDBMConstantExpr expr) {
-    return SDBMConstantExpr::get(expr.getDialect(), -expr.getValue());
-  }
-
-  // Terms of a difference are interchanged. Since only the LHS of a diff
-  // expression is allowed to be a sum with a constant, we need to recreate the
-  // sum with the negated value:
-  //   -((x + C) - y) = (y - C) - x.
-  SDBMExpr visitDiff(SDBMDiffExpr expr) {
-    // If the LHS is just a term, we can do straightforward interchange.
-    if (auto term = expr.getLHS().dyn_cast<SDBMTermExpr>())
-      return SDBMDiffExpr::get(expr.getRHS(), term);
-
-    auto sum = expr.getLHS().cast<SDBMSumExpr>();
-    auto cst = visitConstant(sum.getRHS()).cast<SDBMConstantExpr>();
-    return SDBMDiffExpr::get(SDBMSumExpr::get(expr.getRHS(), cst),
-                             sum.getLHS());
-  }
-};
-} // namespace
-
-SDBMExpr SDBMExpr::operator-() { return SDBMNegator().visit(*this); }
-
-//===----------------------------------------------------------------------===//
-// SDBMSumExpr
-//===----------------------------------------------------------------------===//
-
-SDBMSumExpr SDBMSumExpr::get(SDBMTermExpr lhs, SDBMConstantExpr rhs) {
-  assert(lhs && "expected SDBM variable expression");
-  assert(rhs && "expected SDBM constant");
-
-  // If LHS of a sum is another sum, fold the constant RHS parts.
-  if (auto lhsSum = lhs.dyn_cast<SDBMSumExpr>()) {
-    lhs = lhsSum.getLHS();
-    rhs = SDBMConstantExpr::get(rhs.getDialect(),
-                                rhs.getValue() + lhsSum.getRHS().getValue());
-  }
-
-  StorageUniquer &uniquer = lhs.getDialect()->getUniquer();
-  return uniquer.get<detail::SDBMBinaryExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(SDBMExprKind::Add), lhs, rhs);
-}
-
-SDBMTermExpr SDBMSumExpr::getLHS() const {
-  return static_cast<ImplType *>(impl)->lhs.cast<SDBMTermExpr>();
-}
-
-SDBMConstantExpr SDBMSumExpr::getRHS() const {
-  return static_cast<ImplType *>(impl)->rhs;
-}
-
-AffineExpr SDBMExpr::getAsAffineExpr() const {
-  struct Converter : public SDBMVisitor<Converter, AffineExpr> {
-    AffineExpr visitSum(SDBMSumExpr expr) {
-      AffineExpr lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS());
-      return lhs + rhs;
-    }
-
-    AffineExpr visitStripe(SDBMStripeExpr expr) {
-      AffineExpr lhs = visit(expr.getLHS()),
-                 rhs = visit(expr.getStripeFactor());
-      return lhs - (lhs % rhs);
-    }
-
-    AffineExpr visitDiff(SDBMDiffExpr expr) {
-      AffineExpr lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS());
-      return lhs - rhs;
-    }
-
-    AffineExpr visitDim(SDBMDimExpr expr) {
-      return getAffineDimExpr(expr.getPosition(), expr.getContext());
-    }
-
-    AffineExpr visitSymbol(SDBMSymbolExpr expr) {
-      return getAffineSymbolExpr(expr.getPosition(), expr.getContext());
-    }
-
-    AffineExpr visitNeg(SDBMNegExpr expr) {
-      return getAffineBinaryOpExpr(AffineExprKind::Mul,
-                                   getAffineConstantExpr(-1, expr.getContext()),
-                                   visit(expr.getVar()));
-    }
-
-    AffineExpr visitConstant(SDBMConstantExpr expr) {
-      return getAffineConstantExpr(expr.getValue(), expr.getContext());
-    }
-  } converter;
-  return converter.visit(*this);
-}
-
-// Given a direct expression `expr`, add the given constant to it and pass the
-// resulting expression to `builder` before returning its result.  If the
-// expression is already a sum expression, update its constant and extract the
-// LHS if the constant becomes zero.  Otherwise, construct a sum expression.
-template <typename Result>
-Result addConstantAndSink(SDBMDirectExpr expr, int64_t constant, bool negated,
-                          llvm::function_ref<Result(SDBMDirectExpr)> builder) {
-  SDBMDialect *dialect = expr.getDialect();
-  if (auto sumExpr = expr.dyn_cast<SDBMSumExpr>()) {
-    if (negated)
-      constant = sumExpr.getRHS().getValue() - constant;
-    else
-      constant += sumExpr.getRHS().getValue();
-
-    if (constant != 0) {
-      auto sum = SDBMSumExpr::get(sumExpr.getLHS(),
-                                  SDBMConstantExpr::get(dialect, constant));
-      return builder(sum);
-    } else {
-      return builder(sumExpr.getLHS());
-    }
-  }
-  if (constant != 0)
-    return builder(SDBMSumExpr::get(
-        expr.cast<SDBMTermExpr>(),
-        SDBMConstantExpr::get(dialect, negated ? -constant : constant)));
-  return expr;
-}
-
-// Construct an expression lhs + constant while maintaining the canonical form
-// of the SDBM expressions, in particular sink the constant expression to the
-// nearest sum expression in the left subtree of the expression tree.
-static SDBMExpr addConstant(SDBMVaryingExpr lhs, int64_t constant) {
-  if (auto lhsDiff = lhs.dyn_cast<SDBMDiffExpr>())
-    return addConstantAndSink<SDBMExpr>(
-        lhsDiff.getLHS(), constant, /*negated=*/false,
-        [lhsDiff](SDBMDirectExpr e) {
-          return SDBMDiffExpr::get(e, lhsDiff.getRHS());
-        });
-  if (auto lhsNeg = lhs.dyn_cast<SDBMNegExpr>())
-    return addConstantAndSink<SDBMExpr>(
-        lhsNeg.getVar(), constant, /*negated=*/true,
-        [](SDBMDirectExpr e) { return SDBMNegExpr::get(e); });
-  if (auto lhsSum = lhs.dyn_cast<SDBMSumExpr>())
-    return addConstantAndSink<SDBMExpr>(lhsSum, constant, /*negated=*/false,
-                                        [](SDBMDirectExpr e) { return e; });
-  if (constant != 0)
-    return SDBMSumExpr::get(lhs.cast<SDBMTermExpr>(),
-                            SDBMConstantExpr::get(lhs.getDialect(), constant));
-  return lhs;
-}
-
-// Build a difference expression given a direct expression and a negation
-// expression.
-static SDBMExpr buildDiffExpr(SDBMDirectExpr lhs, SDBMNegExpr rhs) {
-  // Fold (x + C) - (x + D) = C - D.
-  if (lhs.getTerm() == rhs.getVar().getTerm())
-    return SDBMConstantExpr::get(
-        lhs.getDialect(), lhs.getConstant() - rhs.getVar().getConstant());
-
-  return SDBMDiffExpr::get(
-      addConstantAndSink<SDBMDirectExpr>(lhs, -rhs.getVar().getConstant(),
-                                         /*negated=*/false,
-                                         [](SDBMDirectExpr e) { return e; }),
-      rhs.getVar().getTerm());
-}
-
-// Try folding an expression (lhs + rhs) where at least one of the operands
-// contains a negated variable, i.e. is a negation or a difference expression.
-static SDBMExpr foldSumDiff(SDBMExpr lhs, SDBMExpr rhs) {
-  // If exactly one of LHS, RHS is a negation expression, we can construct
-  // a difference expression, which is a special kind in SDBM.
-  auto lhsDirect = lhs.dyn_cast<SDBMDirectExpr>();
-  auto rhsDirect = rhs.dyn_cast<SDBMDirectExpr>();
-  auto lhsNeg = lhs.dyn_cast<SDBMNegExpr>();
-  auto rhsNeg = rhs.dyn_cast<SDBMNegExpr>();
-
-  if (lhsDirect && rhsNeg)
-    return buildDiffExpr(lhsDirect, rhsNeg);
-  if (lhsNeg && rhsDirect)
-    return buildDiffExpr(rhsDirect, lhsNeg);
-
-  // If a subexpression appears in a diff expression on the LHS(RHS) of a
-  // sum expression where it also appears on the RHS(LHS) with the opposite
-  // sign, we can simplify it away and obtain the SDBM form.
-  auto lhsDiff = lhs.dyn_cast<SDBMDiffExpr>();
-  auto rhsDiff = rhs.dyn_cast<SDBMDiffExpr>();
-
-  // -(x + A) + ((x + B) - y) = -(y + (A - B))
-  if (lhsNeg && rhsDiff &&
-      lhsNeg.getVar().getTerm() == rhsDiff.getLHS().getTerm()) {
-    int64_t constant =
-        lhsNeg.getVar().getConstant() - rhsDiff.getLHS().getConstant();
-    // RHS of the diff is a term expression, its sum with a constant is a direct
-    // expression.
-    return SDBMNegExpr::get(
-        addConstant(rhsDiff.getRHS(), constant).cast<SDBMDirectExpr>());
-  }
-
-  // (x + A) + ((y + B) - x) = (y + B) + A.
-  if (lhsDirect && rhsDiff && lhsDirect.getTerm() == rhsDiff.getRHS())
-    return addConstant(rhsDiff.getLHS(), lhsDirect.getConstant());
-
-  // ((x + A) - y) + (-(x + B)) = -(y + (B - A)).
-  if (lhsDiff && rhsNeg &&
-      lhsDiff.getLHS().getTerm() == rhsNeg.getVar().getTerm()) {
-    int64_t constant =
-        rhsNeg.getVar().getConstant() - lhsDiff.getLHS().getConstant();
-    // RHS of the diff is a term expression, its sum with a constant is a direct
-    // expression.
-    return SDBMNegExpr::get(
-        addConstant(lhsDiff.getRHS(), constant).cast<SDBMDirectExpr>());
-  }
-
-  // ((x + A) - y) + (y + B) = (x + A) + B.
-  if (rhsDirect && lhsDiff && rhsDirect.getTerm() == lhsDiff.getRHS())
-    return addConstant(lhsDiff.getLHS(), rhsDirect.getConstant());
-
-  return {};
-}
-
-Optional<SDBMExpr> SDBMExpr::tryConvertAffineExpr(AffineExpr affine) {
-  struct Converter : public AffineExprVisitor<Converter, SDBMExpr> {
-    SDBMExpr visitAddExpr(AffineBinaryOpExpr expr) {
-      auto lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS());
-      if (!lhs || !rhs)
-        return {};
-
-      // In a "add" AffineExpr, the constant always appears on the right.  If
-      // there were two constants, they would have been folded away.
-      assert(!lhs.isa<SDBMConstantExpr>() && "non-canonical affine expression");
-
-      // If RHS is a constant, we can always extend the SDBM expression to
-      // include it by sinking the constant into the nearest sum expression.
-      if (auto rhsConstant = rhs.dyn_cast<SDBMConstantExpr>()) {
-        int64_t constant = rhsConstant.getValue();
-        auto varying = lhs.dyn_cast<SDBMVaryingExpr>();
-        assert(varying && "unexpected uncanonicalized sum of constants");
-        return addConstant(varying, constant);
-      }
-
-      // Try building a difference expression if one of the values is negated,
-      // or check if a difference on either hand side cancels out the outer term
-      // so as to remain correct within SDBM. Return null otherwise.
-      return foldSumDiff(lhs, rhs);
-    }
-
-    SDBMExpr visitMulExpr(AffineBinaryOpExpr expr) {
-      // Attempt to recover a stripe expression "x # C = (x floordiv C) * C".
-      AffineExprMatcher x, C;
-      AffineExprMatcher pattern = (x.floorDiv(C)) * C;
-      if (pattern.match(expr)) {
-        if (SDBMExpr converted = visit(x.matched())) {
-          if (auto varConverted = converted.dyn_cast<SDBMTermExpr>())
-            // TODO(ntv): return varConverted.stripe(C.getConstantValue());
-            return SDBMStripeExpr::get(
-                varConverted,
-                SDBMConstantExpr::get(dialect,
-                                      C.getMatchedConstantValue().getValue()));
-        }
-      }
-
-      auto lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS());
-      if (!lhs || !rhs)
-        return {};
-
-      // In a "mul" AffineExpr, the constant always appears on the right.  If
-      // there were two constants, they would have been folded away.
-      assert(!lhs.isa<SDBMConstantExpr>() && "non-canonical affine expression");
-      auto rhsConstant = rhs.dyn_cast<SDBMConstantExpr>();
-      if (!rhsConstant)
-        return {};
-
-      // The only supported "multiplication" expression is an SDBM is dimension
-      // negation, that is a product of dimension and constant -1.
-      if (rhsConstant.getValue() != -1)
-        return {};
-
-      if (auto lhsVar = lhs.dyn_cast<SDBMTermExpr>())
-        return SDBMNegExpr::get(lhsVar);
-      if (auto lhsDiff = lhs.dyn_cast<SDBMDiffExpr>())
-        return SDBMNegator().visitDiff(lhsDiff);
-
-      // Other multiplications are not allowed in SDBM.
-      return {};
-    }
-
-    SDBMExpr visitModExpr(AffineBinaryOpExpr expr) {
-      auto lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS());
-      if (!lhs || !rhs)
-        return {};
-
-      // 'mod' can only be converted to SDBM if its LHS is a direct expression
-      // and its RHS is a constant.  Then it `x mod c = x - x stripe c`.
-      auto rhsConstant = rhs.dyn_cast<SDBMConstantExpr>();
-      auto lhsVar = lhs.dyn_cast<SDBMDirectExpr>();
-      if (!lhsVar || !rhsConstant)
-        return {};
-      return SDBMDiffExpr::get(lhsVar,
-                               SDBMStripeExpr::get(lhsVar, rhsConstant));
-    }
-
-    // `a floordiv b = (a stripe b) / b`, but we have no division in SDBM
-    SDBMExpr visitFloorDivExpr(AffineBinaryOpExpr expr) { return {}; }
-    SDBMExpr visitCeilDivExpr(AffineBinaryOpExpr expr) { return {}; }
-
-    // Dimensions, symbols and constants are converted trivially.
-    SDBMExpr visitConstantExpr(AffineConstantExpr expr) {
-      return SDBMConstantExpr::get(dialect, expr.getValue());
-    }
-    SDBMExpr visitDimExpr(AffineDimExpr expr) {
-      return SDBMDimExpr::get(dialect, expr.getPosition());
-    }
-    SDBMExpr visitSymbolExpr(AffineSymbolExpr expr) {
-      return SDBMSymbolExpr::get(dialect, expr.getPosition());
-    }
-
-    SDBMDialect *dialect;
-  } converter;
-  converter.dialect = affine.getContext()->getRegisteredDialect<SDBMDialect>();
-
-  if (auto result = converter.visit(affine))
-    return result;
-  return None;
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMDiffExpr
-//===----------------------------------------------------------------------===//
-
-SDBMDiffExpr SDBMDiffExpr::get(SDBMDirectExpr lhs, SDBMTermExpr rhs) {
-  assert(lhs && "expected SDBM dimension");
-  assert(rhs && "expected SDBM dimension");
-
-  StorageUniquer &uniquer = lhs.getDialect()->getUniquer();
-  return uniquer.get<detail::SDBMDiffExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(SDBMExprKind::Diff), lhs, rhs);
-}
-
-SDBMDirectExpr SDBMDiffExpr::getLHS() const {
-  return static_cast<ImplType *>(impl)->lhs;
-}
-
-SDBMTermExpr SDBMDiffExpr::getRHS() const {
-  return static_cast<ImplType *>(impl)->rhs;
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMDirectExpr
-//===----------------------------------------------------------------------===//
-
-SDBMTermExpr SDBMDirectExpr::getTerm() {
-  if (auto sum = dyn_cast<SDBMSumExpr>())
-    return sum.getLHS();
-  return cast<SDBMTermExpr>();
-}
-
-int64_t SDBMDirectExpr::getConstant() {
-  if (auto sum = dyn_cast<SDBMSumExpr>())
-    return sum.getRHS().getValue();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMStripeExpr
-//===----------------------------------------------------------------------===//
-
-SDBMStripeExpr SDBMStripeExpr::get(SDBMDirectExpr var,
-                                   SDBMConstantExpr stripeFactor) {
-  assert(var && "expected SDBM variable expression");
-  assert(stripeFactor && "expected non-null stripe factor");
-  if (stripeFactor.getValue() <= 0)
-    llvm::report_fatal_error("non-positive stripe factor");
-
-  StorageUniquer &uniquer = var.getDialect()->getUniquer();
-  return uniquer.get<detail::SDBMBinaryExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(SDBMExprKind::Stripe), var,
-      stripeFactor);
-}
-
-SDBMDirectExpr SDBMStripeExpr::getLHS() const {
-  if (SDBMVaryingExpr lhs = static_cast<ImplType *>(impl)->lhs)
-    return lhs.cast<SDBMDirectExpr>();
-  return {};
-}
-
-SDBMConstantExpr SDBMStripeExpr::getStripeFactor() const {
-  return static_cast<ImplType *>(impl)->rhs;
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMInputExpr
-//===----------------------------------------------------------------------===//
-
-unsigned SDBMInputExpr::getPosition() const {
-  return static_cast<ImplType *>(impl)->position;
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMDimExpr
-//===----------------------------------------------------------------------===//
-
-SDBMDimExpr SDBMDimExpr::get(SDBMDialect *dialect, unsigned position) {
-  assert(dialect && "expected non-null dialect");
-
-  auto assignDialect = [dialect](detail::SDBMTermExprStorage *storage) {
-    storage->dialect = dialect;
-  };
-
-  StorageUniquer &uniquer = dialect->getUniquer();
-  return uniquer.get<detail::SDBMTermExprStorage>(
-      assignDialect, static_cast<unsigned>(SDBMExprKind::DimId), position);
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMSymbolExpr
-//===----------------------------------------------------------------------===//
-
-SDBMSymbolExpr SDBMSymbolExpr::get(SDBMDialect *dialect, unsigned position) {
-  assert(dialect && "expected non-null dialect");
-
-  auto assignDialect = [dialect](detail::SDBMTermExprStorage *storage) {
-    storage->dialect = dialect;
-  };
-
-  StorageUniquer &uniquer = dialect->getUniquer();
-  return uniquer.get<detail::SDBMTermExprStorage>(
-      assignDialect, static_cast<unsigned>(SDBMExprKind::SymbolId), position);
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMConstantExpr
-//===----------------------------------------------------------------------===//
-
-SDBMConstantExpr SDBMConstantExpr::get(SDBMDialect *dialect, int64_t value) {
-  assert(dialect && "expected non-null dialect");
-
-  auto assignCtx = [dialect](detail::SDBMConstantExprStorage *storage) {
-    storage->dialect = dialect;
-  };
-
-  StorageUniquer &uniquer = dialect->getUniquer();
-  return uniquer.get<detail::SDBMConstantExprStorage>(
-      assignCtx, static_cast<unsigned>(SDBMExprKind::Constant), value);
-}
-
-int64_t SDBMConstantExpr::getValue() const {
-  return static_cast<ImplType *>(impl)->constant;
-}
-
-//===----------------------------------------------------------------------===//
-// SDBMNegExpr
-//===----------------------------------------------------------------------===//
-
-SDBMNegExpr SDBMNegExpr::get(SDBMDirectExpr var) {
-  assert(var && "expected non-null SDBM direct expression");
-
-  StorageUniquer &uniquer = var.getDialect()->getUniquer();
-  return uniquer.get<detail::SDBMNegExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(SDBMExprKind::Neg), var);
-}
-
-SDBMDirectExpr SDBMNegExpr::getVar() const {
-  return static_cast<ImplType *>(impl)->expr;
-}
-
-namespace mlir {
-namespace ops_assertions {
-
-SDBMExpr operator+(SDBMExpr lhs, SDBMExpr rhs) {
-  if (auto folded = foldSumDiff(lhs, rhs))
-    return folded;
-  assert(!(lhs.isa<SDBMNegExpr>() && rhs.isa<SDBMNegExpr>()) &&
-         "a sum of negated expressions is a negation of a sum of variables and "
-         "not a correct SDBM");
-
-  // Fold (x - y) + (y - x) = 0.
-  auto lhsDiff = lhs.dyn_cast<SDBMDiffExpr>();
-  auto rhsDiff = rhs.dyn_cast<SDBMDiffExpr>();
-  if (lhsDiff && rhsDiff) {
-    if (lhsDiff.getLHS() == rhsDiff.getRHS() &&
-        lhsDiff.getRHS() == rhsDiff.getLHS())
-      return SDBMConstantExpr::get(lhs.getDialect(), 0);
-  }
-
-  // If LHS is a constant and RHS is not, swap the order to get into a supported
-  // sum case.  From now on, RHS must be a constant.
-  auto lhsConstant = lhs.dyn_cast<SDBMConstantExpr>();
-  auto rhsConstant = rhs.dyn_cast<SDBMConstantExpr>();
-  if (!rhsConstant && lhsConstant) {
-    std::swap(lhs, rhs);
-    std::swap(lhsConstant, rhsConstant);
-  }
-  assert(rhsConstant && "at least one operand must be a constant");
-
-  // Constant-fold if LHS is also a constant.
-  if (lhsConstant)
-    return SDBMConstantExpr::get(lhs.getDialect(), lhsConstant.getValue() +
-                                                       rhsConstant.getValue());
-  return addConstant(lhs.cast<SDBMVaryingExpr>(), rhsConstant.getValue());
-}
-
-SDBMExpr operator-(SDBMExpr lhs, SDBMExpr rhs) {
-  // Fold x - x == 0.
-  if (lhs == rhs)
-    return SDBMConstantExpr::get(lhs.getDialect(), 0);
-
-  // LHS and RHS may be constants.
-  auto lhsConstant = lhs.dyn_cast<SDBMConstantExpr>();
-  auto rhsConstant = rhs.dyn_cast<SDBMConstantExpr>();
-
-  // Constant fold if both LHS and RHS are constants.
-  if (lhsConstant && rhsConstant)
-    return SDBMConstantExpr::get(lhs.getDialect(), lhsConstant.getValue() -
-                                                       rhsConstant.getValue());
-
-  // Replace a difference with a sum with a negated value if one of LHS and RHS
-  // is a constant:
-  //   x - C == x + (-C);
-  //   C - x == -x + C.
-  // This calls into operator+ for further simplification.
-  if (rhsConstant)
-    return lhs + (-rhsConstant);
-  if (lhsConstant)
-    return -rhs + lhsConstant;
-
-  return buildDiffExpr(lhs.cast<SDBMDirectExpr>(), (-rhs).cast<SDBMNegExpr>());
-}
-
-SDBMExpr stripe(SDBMExpr expr, SDBMExpr factor) {
-  auto constantFactor = factor.cast<SDBMConstantExpr>();
-  assert(constantFactor.getValue() > 0 && "non-positive stripe");
-
-  // Fold x # 1 = x.
-  if (constantFactor.getValue() == 1)
-    return expr;
-
-  return SDBMStripeExpr::get(expr.cast<SDBMDirectExpr>(), constantFactor);
-}
-
-} // namespace ops_assertions
-} // namespace mlir
diff --git a/third_party/mlir/lib/Dialect/SDBM/SDBMExprDetail.h b/third_party/mlir/lib/Dialect/SDBM/SDBMExprDetail.h
deleted file mode 100644
index 0441200754c..00000000000
--- a/third_party/mlir/lib/Dialect/SDBM/SDBMExprDetail.h
+++ /dev/null
@@ -1,138 +0,0 @@
-//===- SDBMExprDetail.h - MLIR SDBM Expression storage details --*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This holds implementation details of SDBMExpr, in particular underlying
-// storage types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_IR_SDBMEXPRDETAIL_H
-#define MLIR_IR_SDBMEXPRDETAIL_H
-
-#include "mlir/Dialect/SDBM/SDBMExpr.h"
-#include "mlir/Support/StorageUniquer.h"
-
-namespace mlir {
-
-class SDBMDialect;
-
-namespace detail {
-
-// Base storage class for SDBMExpr.
-struct SDBMExprStorage : public StorageUniquer::BaseStorage {
-  SDBMExprKind getKind() {
-    return static_cast<SDBMExprKind>(BaseStorage::getKind());
-  }
-
-  SDBMDialect *dialect;
-};
-
-// Storage class for SDBM sum and stripe expressions.
-struct SDBMBinaryExprStorage : public SDBMExprStorage {
-  using KeyTy = std::pair<SDBMDirectExpr, SDBMConstantExpr>;
-
-  bool operator==(const KeyTy &key) const {
-    return std::get<0>(key) == lhs && std::get<1>(key) == rhs;
-  }
-
-  static SDBMBinaryExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<SDBMBinaryExprStorage>();
-    result->lhs = std::get<0>(key);
-    result->rhs = std::get<1>(key);
-    result->dialect = result->lhs.getDialect();
-    return result;
-  }
-
-  SDBMDirectExpr lhs;
-  SDBMConstantExpr rhs;
-};
-
-// Storage class for SDBM difference expressions.
-struct SDBMDiffExprStorage : public SDBMExprStorage {
-  using KeyTy = std::pair<SDBMDirectExpr, SDBMTermExpr>;
-
-  bool operator==(const KeyTy &key) const {
-    return std::get<0>(key) == lhs && std::get<1>(key) == rhs;
-  }
-
-  static SDBMDiffExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<SDBMDiffExprStorage>();
-    result->lhs = std::get<0>(key);
-    result->rhs = std::get<1>(key);
-    result->dialect = result->lhs.getDialect();
-    return result;
-  }
-
-  SDBMDirectExpr lhs;
-  SDBMTermExpr rhs;
-};
-
-// Storage class for SDBM constant expressions.
-struct SDBMConstantExprStorage : public SDBMExprStorage {
-  using KeyTy = int64_t;
-
-  bool operator==(const KeyTy &key) const { return constant == key; }
-
-  static SDBMConstantExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<SDBMConstantExprStorage>();
-    result->constant = key;
-    return result;
-  }
-
-  int64_t constant;
-};
-
-// Storage class for SDBM dimension and symbol expressions.
-struct SDBMTermExprStorage : public SDBMExprStorage {
-  using KeyTy = unsigned;
-
-  bool operator==(const KeyTy &key) const { return position == key; }
-
-  static SDBMTermExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<SDBMTermExprStorage>();
-    result->position = key;
-    return result;
-  }
-
-  unsigned position;
-};
-
-// Storage class for SDBM negation expressions.
-struct SDBMNegExprStorage : public SDBMExprStorage {
-  using KeyTy = SDBMDirectExpr;
-
-  bool operator==(const KeyTy &key) const { return key == expr; }
-
-  static SDBMNegExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<SDBMNegExprStorage>();
-    result->expr = key;
-    result->dialect = key.getDialect();
-    return result;
-  }
-
-  SDBMDirectExpr expr;
-};
-
-} // end namespace detail
-} // end namespace mlir
-
-#endif // MLIR_IR_SDBMEXPRDETAIL_H
diff --git a/third_party/mlir/lib/Dialect/SPIRV/CMakeLists.txt b/third_party/mlir/lib/Dialect/SPIRV/CMakeLists.txt
deleted file mode 100644
index 2c3b1b95a68..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS SPIRVCanonicalization.td)
-mlir_tablegen(SPIRVCanonicalization.inc -gen-rewriters)
-add_public_tablegen_target(MLIRSPIRVCanonicalizationIncGen)
-
-add_llvm_library(MLIRSPIRV
-  DialectRegistration.cpp
-  LayoutUtils.cpp
-  SPIRVDialect.cpp
-  SPIRVOps.cpp
-  SPIRVLowering.cpp
-  SPIRVTypes.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/SPIRV
-  )
-
-add_dependencies(MLIRSPIRV
-  MLIRSPIRVCanonicalizationIncGen
-  MLIRSPIRVEnumsIncGen
-  MLIRSPIRVLoweringStructGen
-  MLIRSPIRVOpsIncGen
-  MLIRSPIRVOpUtilsGen)
-
-target_link_libraries(MLIRSPIRV
-  MLIRIR
-  MLIRParser
-  MLIRSupport
-  MLIRTransforms)
-
-add_subdirectory(Serialization)
-add_subdirectory(Transforms)
diff --git a/third_party/mlir/lib/Dialect/SPIRV/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/SPIRV/DialectRegistration.cpp
deleted file mode 100644
index 63e9e812c39..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/DialectRegistration.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===- DialectRegistration.cpp - MLIR SPIR-V dialect registration ---------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-
-// Static initialization for SPIR-V dialect registration.
-static mlir::DialectRegistration<mlir::spirv::SPIRVDialect> spirvDialect;
diff --git a/third_party/mlir/lib/Dialect/SPIRV/LayoutUtils.cpp b/third_party/mlir/lib/Dialect/SPIRV/LayoutUtils.cpp
deleted file mode 100644
index e2d5332777d..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/LayoutUtils.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//===-- LayoutUtils.cpp - Decorate composite type with layout information -===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements Utilities used to get alignment and layout information
-// for types in SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/SPIRV/LayoutUtils.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVTypes.h"
-
-using namespace mlir;
-
-spirv::StructType
-VulkanLayoutUtils::decorateType(spirv::StructType structType,
-                                VulkanLayoutUtils::Size &size,
-                                VulkanLayoutUtils::Size &alignment) {
-  if (structType.getNumElements() == 0) {
-    return structType;
-  }
-
-  llvm::SmallVector<Type, 4> memberTypes;
-  llvm::SmallVector<VulkanLayoutUtils::Size, 4> layoutInfo;
-  llvm::SmallVector<spirv::StructType::MemberDecorationInfo, 4>
-      memberDecorations;
-
-  VulkanLayoutUtils::Size structMemberOffset = 0;
-  VulkanLayoutUtils::Size maxMemberAlignment = 1;
-
-  for (uint32_t i = 0, e = structType.getNumElements(); i < e; ++i) {
-    VulkanLayoutUtils::Size memberSize = 0;
-    VulkanLayoutUtils::Size memberAlignment = 1;
-
-    auto memberType = VulkanLayoutUtils::decorateType(
-        structType.getElementType(i), memberSize, memberAlignment);
-    structMemberOffset = llvm::alignTo(structMemberOffset, memberAlignment);
-    memberTypes.push_back(memberType);
-    layoutInfo.push_back(structMemberOffset);
-    // According to the Vulkan spec:
-    // "A structure has a base alignment equal to the largest base alignment of
-    // any of its members."
-    structMemberOffset += memberSize;
-    maxMemberAlignment = std::max(maxMemberAlignment, memberAlignment);
-  }
-
-  // According to the Vulkan spec:
-  // "The Offset decoration of a member must not place it between the end of a
-  // structure or an array and the next multiple of the alignment of that
-  // structure or array."
-  size = llvm::alignTo(structMemberOffset, maxMemberAlignment);
-  alignment = maxMemberAlignment;
-  structType.getMemberDecorations(memberDecorations);
-  return spirv::StructType::get(memberTypes, layoutInfo, memberDecorations);
-}
-
-Type VulkanLayoutUtils::decorateType(Type type, VulkanLayoutUtils::Size &size,
-                                     VulkanLayoutUtils::Size &alignment) {
-  if (spirv::SPIRVDialect::isValidScalarType(type)) {
-    alignment = VulkanLayoutUtils::getScalarTypeAlignment(type);
-    // Vulkan spec does not specify any padding for a scalar type.
-    size = alignment;
-    return type;
-  }
-
-  switch (type.getKind()) {
-  case spirv::TypeKind::Struct:
-    return VulkanLayoutUtils::decorateType(type.cast<spirv::StructType>(), size,
-                                           alignment);
-  case spirv::TypeKind::Array:
-    return VulkanLayoutUtils::decorateType(type.cast<spirv::ArrayType>(), size,
-                                           alignment);
-  case StandardTypes::Vector:
-    return VulkanLayoutUtils::decorateType(type.cast<VectorType>(), size,
-                                           alignment);
-  default:
-    llvm_unreachable("unhandled SPIR-V type");
-  }
-}
-
-Type VulkanLayoutUtils::decorateType(VectorType vectorType,
-                                     VulkanLayoutUtils::Size &size,
-                                     VulkanLayoutUtils::Size &alignment) {
-  const auto numElements = vectorType.getNumElements();
-  auto elementType = vectorType.getElementType();
-  VulkanLayoutUtils::Size elementSize = 0;
-  VulkanLayoutUtils::Size elementAlignment = 1;
-
-  auto memberType = VulkanLayoutUtils::decorateType(elementType, elementSize,
-                                                    elementAlignment);
-  // According to the Vulkan spec:
-  // 1. "A two-component vector has a base alignment equal to twice its scalar
-  // alignment."
-  // 2. "A three- or four-component vector has a base alignment equal to four
-  // times its scalar alignment."
-  size = elementSize * numElements;
-  alignment = numElements == 2 ? elementAlignment * 2 : elementAlignment * 4;
-  return VectorType::get(numElements, memberType);
-}
-
-Type VulkanLayoutUtils::decorateType(spirv::ArrayType arrayType,
-                                     VulkanLayoutUtils::Size &size,
-                                     VulkanLayoutUtils::Size &alignment) {
-  const auto numElements = arrayType.getNumElements();
-  auto elementType = arrayType.getElementType();
-  spirv::ArrayType::LayoutInfo elementSize = 0;
-  VulkanLayoutUtils::Size elementAlignment = 1;
-
-  auto memberType = VulkanLayoutUtils::decorateType(elementType, elementSize,
-                                                    elementAlignment);
-  // According to the Vulkan spec:
-  // "An array has a base alignment equal to the base alignment of its element
-  // type."
-  size = elementSize * numElements;
-  alignment = elementAlignment;
-  return spirv::ArrayType::get(memberType, numElements, elementSize);
-}
-
-VulkanLayoutUtils::Size
-VulkanLayoutUtils::getScalarTypeAlignment(Type scalarType) {
-  // According to the Vulkan spec:
-  // 1. "A scalar of size N has a scalar alignment of N."
-  // 2. "A scalar has a base alignment equal to its scalar alignment."
-  // 3. "A scalar, vector or matrix type has an extended alignment equal to its
-  // base alignment."
-  auto bitWidth = scalarType.getIntOrFloatBitWidth();
-  if (bitWidth == 1)
-    return 1;
-  return bitWidth / 8;
-}
-
-bool VulkanLayoutUtils::isLegalType(Type type) {
-  auto ptrType = type.dyn_cast<spirv::PointerType>();
-  if (!ptrType) {
-    return true;
-  }
-
-  auto storageClass = ptrType.getStorageClass();
-  auto structType = ptrType.getPointeeType().dyn_cast<spirv::StructType>();
-  if (!structType) {
-    return true;
-  }
-
-  switch (storageClass) {
-  case spirv::StorageClass::Uniform:
-  case spirv::StorageClass::StorageBuffer:
-  case spirv::StorageClass::PushConstant:
-  case spirv::StorageClass::PhysicalStorageBuffer:
-    return structType.hasLayout() || !structType.getNumElements();
-  default:
-    return true;
-  }
-}
diff --git a/third_party/mlir/lib/Dialect/SPIRV/SPIRVCanonicalization.td b/third_party/mlir/lib/Dialect/SPIRV/SPIRVCanonicalization.td
deleted file mode 100644
index 1845003db07..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/SPIRVCanonicalization.td
+++ /dev/null
@@ -1,40 +0,0 @@
-//==- SPIRVCanonicalization.td - Canonicalization Patterns ---*- tablegen -*==//
-
-// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines SPIR-V canonicalization patterns.
-//
-//===----------------------------------------------------------------------===//
-
-include "mlir/Dialect/SPIRV/SPIRVOps.td"
-
-//===----------------------------------------------------------------------===//
-// spv.Bitcast
-//===----------------------------------------------------------------------===//
-
-def ConvertChainedBitcast : Pat<(SPV_BitcastOp (SPV_BitcastOp $operand)),
-                                (SPV_BitcastOp $operand)>;
-
-//===----------------------------------------------------------------------===//
-// spv.LogicalNot
-//===----------------------------------------------------------------------===//
-
-def ConvertLogicalNotOfIEqual : Pat<
-    (SPV_LogicalNotOp (SPV_IEqualOp $lhs, $rhs)),
-    (SPV_INotEqualOp $lhs, $rhs)>;
-
-def ConvertLogicalNotOfINotEqual : Pat<
-    (SPV_LogicalNotOp (SPV_INotEqualOp $lhs, $rhs)),
-    (SPV_IEqualOp $lhs, $rhs)>;
-
-def ConvertLogicalNotOfLogicalEqual : Pat<
-    (SPV_LogicalNotOp (SPV_LogicalEqualOp $lhs, $rhs)),
-    (SPV_LogicalNotEqualOp $lhs, $rhs)>;
-
-def ConvertLogicalNotOfLogicalNotEqual : Pat<
-    (SPV_LogicalNotOp (SPV_LogicalNotEqualOp $lhs, $rhs)),
-    (SPV_LogicalEqualOp $lhs, $rhs)>;
diff --git a/third_party/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp b/third_party/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
deleted file mode 100644
index 1460cf091eb..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
+++ /dev/null
@@ -1,628 +0,0 @@
-//===- LLVMDialect.cpp - MLIR SPIR-V dialect ------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the SPIR-V dialect in MLIR.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Dialect/SPIRV/SPIRVTypes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Parser.h"
-#include "mlir/Support/StringExtras.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-namespace spirv {
-#include "mlir/Dialect/SPIRV/SPIRVOpUtils.inc"
-} // namespace spirv
-} // namespace mlir
-
-using namespace mlir;
-using namespace mlir::spirv;
-
-//===----------------------------------------------------------------------===//
-// InlinerInterface
-//===----------------------------------------------------------------------===//
-
-/// Returns true if the given region contains spv.Return or spv.ReturnValue ops.
-static inline bool containsReturn(Region &region) {
-  return llvm::any_of(region, [](Block &block) {
-    Operation *terminator = block.getTerminator();
-    return isa<spirv::ReturnOp>(terminator) ||
-           isa<spirv::ReturnValueOp>(terminator);
-  });
-}
-
-namespace {
-/// This class defines the interface for inlining within the SPIR-V dialect.
-struct SPIRVInlinerInterface : public DialectInlinerInterface {
-  using DialectInlinerInterface::DialectInlinerInterface;
-
-  /// Returns true if the given region 'src' can be inlined into the region
-  /// 'dest' that is attached to an operation registered to the current dialect.
-  bool isLegalToInline(Operation *op, Region *dest,
-                       BlockAndValueMapping &) const final {
-    // TODO(antiagainst): Enable inlining structured control flows with return.
-    if ((isa<spirv::SelectionOp>(op) || isa<spirv::LoopOp>(op)) &&
-        containsReturn(op->getRegion(0)))
-      return false;
-    // TODO(antiagainst): we need to filter OpKill here to avoid inlining it to
-    // a loop continue construct:
-    // https://github.com/KhronosGroup/SPIRV-Headers/issues/86
-    // However OpKill is fragment shader specific and we don't support it yet.
-    return true;
-  }
-
-  /// Handle the given inlined terminator by replacing it with a new operation
-  /// as necessary.
-  void handleTerminator(Operation *op, Block *newDest) const final {
-    if (auto returnOp = dyn_cast<spirv::ReturnOp>(op)) {
-      OpBuilder(op).create<spirv::BranchOp>(op->getLoc(), newDest);
-      op->erase();
-    } else if (auto retValOp = dyn_cast<spirv::ReturnValueOp>(op)) {
-      llvm_unreachable("unimplemented spv.ReturnValue in inliner");
-    }
-  }
-
-  /// Handle the given inlined terminator by replacing it with a new operation
-  /// as necessary.
-  void handleTerminator(Operation *op,
-                        ArrayRef<Value *> valuesToRepl) const final {
-    // Only spv.ReturnValue needs to be handled here.
-    auto retValOp = dyn_cast<spirv::ReturnValueOp>(op);
-    if (!retValOp)
-      return;
-
-    // Replace the values directly with the return operands.
-    assert(valuesToRepl.size() == 1 &&
-           "spv.ReturnValue expected to only handle one result");
-    valuesToRepl.front()->replaceAllUsesWith(retValOp.value());
-  }
-};
-} // namespace
-
-//===----------------------------------------------------------------------===//
-// SPIR-V Dialect
-//===----------------------------------------------------------------------===//
-
-SPIRVDialect::SPIRVDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
-  addTypes<ArrayType, ImageType, PointerType, RuntimeArrayType, StructType>();
-
-  // Add SPIR-V ops.
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/SPIRV/SPIRVOps.cpp.inc"
-      >();
-
-  addInterfaces<SPIRVInlinerInterface>();
-
-  // Allow unknown operations because SPIR-V is extensible.
-  allowUnknownOperations();
-}
-
-std::string SPIRVDialect::getAttributeName(Decoration decoration) {
-  return convertToSnakeCase(stringifyDecoration(decoration));
-}
-
-//===----------------------------------------------------------------------===//
-// Type Parsing
-//===----------------------------------------------------------------------===//
-
-// Forward declarations.
-template <typename ValTy>
-static Optional<ValTy> parseAndVerify(SPIRVDialect const &dialect,
-                                      DialectAsmParser &parser);
-template <>
-Optional<Type> parseAndVerify<Type>(SPIRVDialect const &dialect,
-                                    DialectAsmParser &parser);
-
-template <>
-Optional<uint64_t> parseAndVerify<uint64_t>(SPIRVDialect const &dialect,
-                                            DialectAsmParser &parser);
-
-static bool isValidSPIRVIntType(IntegerType type) {
-  return llvm::is_contained(llvm::ArrayRef<unsigned>({1, 8, 16, 32, 64}),
-                            type.getWidth());
-}
-
-bool SPIRVDialect::isValidScalarType(Type type) {
-  if (type.isa<FloatType>()) {
-    return !type.isBF16();
-  }
-  if (auto intType = type.dyn_cast<IntegerType>()) {
-    return isValidSPIRVIntType(intType);
-  }
-  return false;
-}
-
-static bool isValidSPIRVVectorType(VectorType type) {
-  return type.getRank() == 1 &&
-         SPIRVDialect::isValidScalarType(type.getElementType()) &&
-         type.getNumElements() >= 2 && type.getNumElements() <= 4;
-}
-
-bool SPIRVDialect::isValidType(Type type) {
-  // Allow SPIR-V dialect types
-  if (type.getKind() >= Type::FIRST_SPIRV_TYPE &&
-      type.getKind() <= TypeKind::LAST_SPIRV_TYPE) {
-    return true;
-  }
-  if (SPIRVDialect::isValidScalarType(type)) {
-    return true;
-  }
-  if (auto vectorType = type.dyn_cast<VectorType>()) {
-    return isValidSPIRVVectorType(vectorType);
-  }
-  return false;
-}
-
-static Type parseAndVerifyType(SPIRVDialect const &dialect,
-                               DialectAsmParser &parser) {
-  Type type;
-  llvm::SMLoc typeLoc = parser.getCurrentLocation();
-  if (parser.parseType(type))
-    return Type();
-
-  // Allow SPIR-V dialect types
-  if (&type.getDialect() == &dialect)
-    return type;
-
-  // Check other allowed types
-  if (auto t = type.dyn_cast<FloatType>()) {
-    if (type.isBF16()) {
-      parser.emitError(typeLoc, "cannot use 'bf16' to compose SPIR-V types");
-      return Type();
-    }
-  } else if (auto t = type.dyn_cast<IntegerType>()) {
-    if (!isValidSPIRVIntType(t)) {
-      parser.emitError(typeLoc,
-                       "only 1/8/16/32/64-bit integer type allowed but found ")
-          << type;
-      return Type();
-    }
-  } else if (auto t = type.dyn_cast<VectorType>()) {
-    if (t.getRank() != 1) {
-      parser.emitError(typeLoc, "only 1-D vector allowed but found ") << t;
-      return Type();
-    }
-    if (t.getNumElements() > 4) {
-      parser.emitError(
-          typeLoc, "vector length has to be less than or equal to 4 but found ")
-          << t.getNumElements();
-      return Type();
-    }
-  } else {
-    parser.emitError(typeLoc, "cannot use ")
-        << type << " to compose SPIR-V types";
-    return Type();
-  }
-
-  return type;
-}
-
-// element-type ::= integer-type
-//                | floating-point-type
-//                | vector-type
-//                | spirv-type
-//
-// array-type ::= `!spv.array<` integer-literal `x` element-type
-//                (`[` integer-literal `]`)? `>`
-static Type parseArrayType(SPIRVDialect const &dialect,
-                           DialectAsmParser &parser) {
-  if (parser.parseLess())
-    return Type();
-
-  SmallVector<int64_t, 1> countDims;
-  llvm::SMLoc countLoc = parser.getCurrentLocation();
-  if (parser.parseDimensionList(countDims, /*allowDynamic=*/false))
-    return Type();
-  if (countDims.size() != 1) {
-    parser.emitError(countLoc,
-                     "expected single integer for array element count");
-    return Type();
-  }
-
-  // According to the SPIR-V spec:
-  // "Length is the number of elements in the array. It must be at least 1."
-  int64_t count = countDims[0];
-  if (count == 0) {
-    parser.emitError(countLoc, "expected array length greater than 0");
-    return Type();
-  }
-
-  Type elementType = parseAndVerifyType(dialect, parser);
-  if (!elementType)
-    return Type();
-
-  ArrayType::LayoutInfo layoutInfo = 0;
-  if (succeeded(parser.parseOptionalLSquare())) {
-    llvm::SMLoc layoutLoc = parser.getCurrentLocation();
-    auto layout = parseAndVerify<ArrayType::LayoutInfo>(dialect, parser);
-    if (!layout)
-      return Type();
-
-    if (!(layoutInfo = layout.getValue())) {
-      parser.emitError(layoutLoc, "ArrayStride must be greater than zero");
-      return Type();
-    }
-
-    if (parser.parseRSquare())
-      return Type();
-  }
-
-  if (parser.parseGreater())
-    return Type();
-  return ArrayType::get(elementType, count, layoutInfo);
-}
-
-// TODO(ravishankarm) : Reorder methods to be utilities first and parse*Type
-// methods in alphabetical order
-//
-// storage-class ::= `UniformConstant`
-//                 | `Uniform`
-//                 | `Workgroup`
-//                 | <and other storage classes...>
-//
-// pointer-type ::= `!spv.ptr<` element-type `,` storage-class `>`
-static Type parsePointerType(SPIRVDialect const &dialect,
-                             DialectAsmParser &parser) {
-  if (parser.parseLess())
-    return Type();
-
-  auto pointeeType = parseAndVerifyType(dialect, parser);
-  if (!pointeeType)
-    return Type();
-
-  StringRef storageClassSpec;
-  llvm::SMLoc storageClassLoc = parser.getCurrentLocation();
-  if (parser.parseComma() || parser.parseKeyword(&storageClassSpec))
-    return Type();
-
-  auto storageClass = symbolizeStorageClass(storageClassSpec);
-  if (!storageClass) {
-    parser.emitError(storageClassLoc, "unknown storage class: ")
-        << storageClassSpec;
-    return Type();
-  }
-  if (parser.parseGreater())
-    return Type();
-  return PointerType::get(pointeeType, *storageClass);
-}
-
-// runtime-array-type ::= `!spv.rtarray<` element-type `>`
-static Type parseRuntimeArrayType(SPIRVDialect const &dialect,
-                                  DialectAsmParser &parser) {
-  if (parser.parseLess())
-    return Type();
-
-  Type elementType = parseAndVerifyType(dialect, parser);
-  if (!elementType)
-    return Type();
-
-  if (parser.parseGreater())
-    return Type();
-  return RuntimeArrayType::get(elementType);
-}
-
-// Specialize this function to parse each of the parameters that define an
-// ImageType. By default it assumes this is an enum type.
-template <typename ValTy>
-static Optional<ValTy> parseAndVerify(SPIRVDialect const &dialect,
-                                      DialectAsmParser &parser) {
-  StringRef enumSpec;
-  llvm::SMLoc enumLoc = parser.getCurrentLocation();
-  if (parser.parseKeyword(&enumSpec)) {
-    return llvm::None;
-  }
-
-  auto val = spirv::symbolizeEnum<ValTy>()(enumSpec);
-  if (!val)
-    parser.emitError(enumLoc, "unknown attribute: '") << enumSpec << "'";
-  return val;
-}
-
-template <>
-Optional<Type> parseAndVerify<Type>(SPIRVDialect const &dialect,
-                                    DialectAsmParser &parser) {
-  // TODO(ravishankarm): Further verify that the element type can be sampled
-  auto ty = parseAndVerifyType(dialect, parser);
-  if (!ty)
-    return llvm::None;
-  return ty;
-}
-
-template <typename IntTy>
-static Optional<IntTy> parseAndVerifyInteger(SPIRVDialect const &dialect,
-                                             DialectAsmParser &parser) {
-  IntTy offsetVal = std::numeric_limits<IntTy>::max();
-  if (parser.parseInteger(offsetVal))
-    return llvm::None;
-  return offsetVal;
-}
-
-template <>
-Optional<uint64_t> parseAndVerify<uint64_t>(SPIRVDialect const &dialect,
-                                            DialectAsmParser &parser) {
-  return parseAndVerifyInteger<uint64_t>(dialect, parser);
-}
-
-// Functor object to parse a comma separated list of specs. The function
-// parseAndVerify does the actual parsing and verification of individual
-// elements. This is a functor since parsing the last element of the list
-// (termination condition) needs partial specialization.
-template <typename ParseType, typename... Args> struct parseCommaSeparatedList {
-  Optional<std::tuple<ParseType, Args...>>
-  operator()(SPIRVDialect const &dialect, DialectAsmParser &parser) const {
-    auto parseVal = parseAndVerify<ParseType>(dialect, parser);
-    if (!parseVal)
-      return llvm::None;
-
-    auto numArgs = std::tuple_size<std::tuple<Args...>>::value;
-    if (numArgs != 0 && failed(parser.parseComma()))
-      return llvm::None;
-    auto remainingValues = parseCommaSeparatedList<Args...>{}(dialect, parser);
-    if (!remainingValues)
-      return llvm::None;
-    return std::tuple_cat(std::tuple<ParseType>(parseVal.getValue()),
-                          remainingValues.getValue());
-  }
-};
-
-// Partial specialization of the function to parse a comma separated list of
-// specs to parse the last element of the list.
-template <typename ParseType> struct parseCommaSeparatedList<ParseType> {
-  Optional<std::tuple<ParseType>> operator()(SPIRVDialect const &dialect,
-                                             DialectAsmParser &parser) const {
-    if (auto value = parseAndVerify<ParseType>(dialect, parser))
-      return std::tuple<ParseType>(value.getValue());
-    return llvm::None;
-  }
-};
-
-// dim ::= `1D` | `2D` | `3D` | `Cube` | <and other SPIR-V Dim specifiers...>
-//
-// depth-info ::= `NoDepth` | `IsDepth` | `DepthUnknown`
-//
-// arrayed-info ::= `NonArrayed` | `Arrayed`
-//
-// sampling-info ::= `SingleSampled` | `MultiSampled`
-//
-// sampler-use-info ::= `SamplerUnknown` | `NeedSampler` |  `NoSampler`
-//
-// format ::= `Unknown` | `Rgba32f` | <and other SPIR-V Image formats...>
-//
-// image-type ::= `!spv.image<` element-type `,` dim `,` depth-info `,`
-//                              arrayed-info `,` sampling-info `,`
-//                              sampler-use-info `,` format `>`
-static Type parseImageType(SPIRVDialect const &dialect,
-                           DialectAsmParser &parser) {
-  if (parser.parseLess())
-    return Type();
-
-  auto value =
-      parseCommaSeparatedList<Type, Dim, ImageDepthInfo, ImageArrayedInfo,
-                              ImageSamplingInfo, ImageSamplerUseInfo,
-                              ImageFormat>{}(dialect, parser);
-  if (!value)
-    return Type();
-
-  if (parser.parseGreater())
-    return Type();
-  return ImageType::get(value.getValue());
-}
-
-// Parse decorations associated with a member.
-static ParseResult parseStructMemberDecorations(
-    SPIRVDialect const &dialect, DialectAsmParser &parser,
-    ArrayRef<Type> memberTypes,
-    SmallVectorImpl<StructType::LayoutInfo> &layoutInfo,
-    SmallVectorImpl<StructType::MemberDecorationInfo> &memberDecorationInfo) {
-
-  // Check if the first element is offset.
-  llvm::SMLoc layoutLoc = parser.getCurrentLocation();
-  StructType::LayoutInfo layout = 0;
-  OptionalParseResult layoutParseResult = parser.parseOptionalInteger(layout);
-  if (layoutParseResult.hasValue()) {
-    if (failed(*layoutParseResult))
-      return failure();
-
-    if (layoutInfo.size() != memberTypes.size() - 1) {
-      return parser.emitError(
-          layoutLoc, "layout specification must be given for all members");
-    }
-    layoutInfo.push_back(layout);
-  }
-
-  // Check for no spirv::Decorations.
-  if (succeeded(parser.parseOptionalRSquare()))
-    return success();
-
-  // If there was a layout, make sure to parse the comma.
-  if (layoutParseResult.hasValue() && parser.parseComma())
-    return failure();
-
-  // Check for spirv::Decorations.
-  do {
-    auto memberDecoration = parseAndVerify<spirv::Decoration>(dialect, parser);
-    if (!memberDecoration)
-      return failure();
-
-    memberDecorationInfo.emplace_back(
-        static_cast<uint32_t>(memberTypes.size() - 1),
-        memberDecoration.getValue());
-  } while (succeeded(parser.parseOptionalComma()));
-
-  return parser.parseRSquare();
-}
-
-// struct-member-decoration ::= integer-literal? spirv-decoration*
-// struct-type ::= `!spv.struct<` spirv-type (`[` struct-member-decoration `]`)?
-//                     (`, ` spirv-type (`[` struct-member-decoration `]`)? `>`
-static Type parseStructType(SPIRVDialect const &dialect,
-                            DialectAsmParser &parser) {
-  if (parser.parseLess())
-    return Type();
-
-  if (succeeded(parser.parseOptionalGreater()))
-    return StructType::getEmpty(dialect.getContext());
-
-  SmallVector<Type, 4> memberTypes;
-  SmallVector<StructType::LayoutInfo, 4> layoutInfo;
-  SmallVector<StructType::MemberDecorationInfo, 4> memberDecorationInfo;
-
-  do {
-    Type memberType;
-    if (parser.parseType(memberType))
-      return Type();
-    memberTypes.push_back(memberType);
-
-    if (succeeded(parser.parseOptionalLSquare())) {
-      if (parseStructMemberDecorations(dialect, parser, memberTypes, layoutInfo,
-                                       memberDecorationInfo)) {
-        return Type();
-      }
-    }
-  } while (succeeded(parser.parseOptionalComma()));
-
-  if (!layoutInfo.empty() && memberTypes.size() != layoutInfo.size()) {
-    parser.emitError(parser.getNameLoc(),
-                     "layout specification must be given for all members");
-    return Type();
-  }
-  if (parser.parseGreater())
-    return Type();
-  return StructType::get(memberTypes, layoutInfo, memberDecorationInfo);
-}
-
-// spirv-type ::= array-type
-//              | element-type
-//              | image-type
-//              | pointer-type
-//              | runtime-array-type
-//              | struct-type
-Type SPIRVDialect::parseType(DialectAsmParser &parser) const {
-  StringRef keyword;
-  if (parser.parseKeyword(&keyword))
-    return Type();
-
-  if (keyword == "array")
-    return parseArrayType(*this, parser);
-  if (keyword == "image")
-    return parseImageType(*this, parser);
-  if (keyword == "ptr")
-    return parsePointerType(*this, parser);
-  if (keyword == "rtarray")
-    return parseRuntimeArrayType(*this, parser);
-  if (keyword == "struct")
-    return parseStructType(*this, parser);
-
-  parser.emitError(parser.getNameLoc(), "unknown SPIR-V type: ") << keyword;
-  return Type();
-}
-
-//===----------------------------------------------------------------------===//
-// Type Printing
-//===----------------------------------------------------------------------===//
-
-static void print(ArrayType type, DialectAsmPrinter &os) {
-  os << "array<" << type.getNumElements() << " x " << type.getElementType();
-  if (type.hasLayout()) {
-    os << " [" << type.getArrayStride() << "]";
-  }
-  os << ">";
-}
-
-static void print(RuntimeArrayType type, DialectAsmPrinter &os) {
-  os << "rtarray<" << type.getElementType() << ">";
-}
-
-static void print(PointerType type, DialectAsmPrinter &os) {
-  os << "ptr<" << type.getPointeeType() << ", "
-     << stringifyStorageClass(type.getStorageClass()) << ">";
-}
-
-static void print(ImageType type, DialectAsmPrinter &os) {
-  os << "image<" << type.getElementType() << ", " << stringifyDim(type.getDim())
-     << ", " << stringifyImageDepthInfo(type.getDepthInfo()) << ", "
-     << stringifyImageArrayedInfo(type.getArrayedInfo()) << ", "
-     << stringifyImageSamplingInfo(type.getSamplingInfo()) << ", "
-     << stringifyImageSamplerUseInfo(type.getSamplerUseInfo()) << ", "
-     << stringifyImageFormat(type.getImageFormat()) << ">";
-}
-
-static void print(StructType type, DialectAsmPrinter &os) {
-  os << "struct<";
-  auto printMember = [&](unsigned i) {
-    os << type.getElementType(i);
-    SmallVector<spirv::Decoration, 0> decorations;
-    type.getMemberDecorations(i, decorations);
-    if (type.hasLayout() || !decorations.empty()) {
-      os << " [";
-      if (type.hasLayout()) {
-        os << type.getOffset(i);
-        if (!decorations.empty())
-          os << ", ";
-      }
-      auto each_fn = [&os](spirv::Decoration decoration) {
-        os << stringifyDecoration(decoration);
-      };
-      interleaveComma(decorations, os, each_fn);
-      os << "]";
-    }
-  };
-  interleaveComma(llvm::seq<unsigned>(0, type.getNumElements()), os,
-                  printMember);
-  os << ">";
-}
-
-void SPIRVDialect::printType(Type type, DialectAsmPrinter &os) const {
-  switch (type.getKind()) {
-  case TypeKind::Array:
-    print(type.cast<ArrayType>(), os);
-    return;
-  case TypeKind::Pointer:
-    print(type.cast<PointerType>(), os);
-    return;
-  case TypeKind::RuntimeArray:
-    print(type.cast<RuntimeArrayType>(), os);
-    return;
-  case TypeKind::Image:
-    print(type.cast<ImageType>(), os);
-    return;
-  case TypeKind::Struct:
-    print(type.cast<StructType>(), os);
-    return;
-  default:
-    llvm_unreachable("unhandled SPIR-V type");
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Constant
-//===----------------------------------------------------------------------===//
-
-Operation *SPIRVDialect::materializeConstant(OpBuilder &builder,
-                                             Attribute value, Type type,
-                                             Location loc) {
-  if (!ConstantOp::isBuildableWith(type))
-    return nullptr;
-
-  return builder.create<spirv::ConstantOp>(loc, type, value);
-}
diff --git a/third_party/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp b/third_party/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
deleted file mode 100644
index 67c036dbcf9..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-//===- SPIRVLowering.cpp - Standard to SPIR-V dialect conversion--===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements utilities used to lower to SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/SPIRV/SPIRVLowering.h"
-#include "mlir/Dialect/SPIRV/LayoutUtils.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "llvm/ADT/Sequence.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// Attributes for ABI
-//===----------------------------------------------------------------------===//
-
-// Pull in the attributes needed for lowering.
-namespace mlir {
-#include "mlir/Dialect/SPIRV/SPIRVLowering.cpp.inc"
-}
-
-StringRef mlir::spirv::getInterfaceVarABIAttrName() {
-  return "spirv.interface_var_abi";
-}
-
-mlir::spirv::InterfaceVarABIAttr
-mlir::spirv::getInterfaceVarABIAttr(unsigned descriptorSet, unsigned binding,
-                                    spirv::StorageClass storageClass,
-                                    MLIRContext *context) {
-  Type i32Type = IntegerType::get(32, context);
-  return mlir::spirv::InterfaceVarABIAttr::get(
-      IntegerAttr::get(i32Type, descriptorSet),
-      IntegerAttr::get(i32Type, binding),
-      IntegerAttr::get(i32Type, static_cast<int64_t>(storageClass)), context);
-}
-
-StringRef mlir::spirv::getEntryPointABIAttrName() {
-  return "spirv.entry_point_abi";
-}
-
-mlir::spirv::EntryPointABIAttr
-mlir::spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize,
-                                  MLIRContext *context) {
-  assert(localSize.size() == 3);
-  return mlir::spirv::EntryPointABIAttr::get(
-      DenseElementsAttr::get<int32_t>(
-          VectorType::get(3, IntegerType::get(32, context)), localSize)
-          .cast<DenseIntElementsAttr>(),
-      context);
-}
-
-//===----------------------------------------------------------------------===//
-// Type Conversion
-//===----------------------------------------------------------------------===//
-
-Type SPIRVTypeConverter::getIndexType(MLIRContext *context) {
-  // Convert to 32-bit integers for now. Might need a way to control this in
-  // future.
-  // TODO(ravishankarm): It is probably better to make it 64-bit integers. To
-  // this some support is needed in SPIR-V dialect for Conversion
-  // instructions. The Vulkan spec requires the builtins like
-  // GlobalInvocationID, etc. to be 32-bit (unsigned) integers which should be
-  // SExtended to 64-bit for index computations.
-  return IntegerType::get(32, context);
-}
-
-// TODO(ravishankarm): This is a utility function that should probably be
-// exposed by the SPIR-V dialect. Keeping it local till the use case arises.
-static Optional<int64_t> getTypeNumBytes(Type t) {
-  if (auto integerType = t.dyn_cast<IntegerType>()) {
-    return integerType.getWidth() / 8;
-  } else if (auto floatType = t.dyn_cast<FloatType>()) {
-    return floatType.getWidth() / 8;
-  } else if (auto memRefType = t.dyn_cast<MemRefType>()) {
-    // TODO: Layout should also be controlled by the ABI attributes. For now
-    // using the layout from MemRef.
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    if (!memRefType.hasStaticShape() ||
-        failed(getStridesAndOffset(memRefType, strides, offset))) {
-      return llvm::None;
-    }
-    // To get the size of the memref object in memory, the total size is the
-    // max(stride * dimension-size) computed for all dimensions times the size
-    // of the element.
-    auto elementSize = getTypeNumBytes(memRefType.getElementType());
-    if (!elementSize) {
-      return llvm::None;
-    }
-    auto dims = memRefType.getShape();
-    if (llvm::is_contained(dims, ShapedType::kDynamicSize) ||
-        offset == MemRefType::getDynamicStrideOrOffset() ||
-        llvm::is_contained(strides, MemRefType::getDynamicStrideOrOffset())) {
-      return llvm::None;
-    }
-    int64_t memrefSize = -1;
-    for (auto shape : enumerate(dims)) {
-      memrefSize = std::max(memrefSize, shape.value() * strides[shape.index()]);
-    }
-    return (offset + memrefSize) * elementSize.getValue();
-  }
-  // TODO: Add size computation for other types.
-  return llvm::None;
-}
-
-static Type convertStdType(Type type) {
-  // If the type is already valid in SPIR-V, directly return.
-  if (spirv::SPIRVDialect::isValidType(type)) {
-    return type;
-  }
-
-  if (auto indexType = type.dyn_cast<IndexType>()) {
-    return SPIRVTypeConverter::getIndexType(type.getContext());
-  }
-
-  if (auto memRefType = type.dyn_cast<MemRefType>()) {
-    // TODO(ravishankarm): For now only support default memory space. The memory
-    // space description is not set is stone within MLIR, i.e. it depends on the
-    // context it is being used. To map this to SPIR-V storage classes, we
-    // should rely on the ABI attributes, and not on the memory space. This is
-    // still evolving, and needs to be revisited when there is more clarity.
-    if (memRefType.getMemorySpace()) {
-      return Type();
-    }
-
-    auto elementType = convertStdType(memRefType.getElementType());
-    if (!elementType) {
-      return Type();
-    }
-
-    auto elementSize = getTypeNumBytes(elementType);
-    if (!elementSize) {
-      return Type();
-    }
-    // TODO(ravishankarm) : Handle dynamic shapes.
-    if (memRefType.hasStaticShape()) {
-      auto arraySize = getTypeNumBytes(memRefType);
-      if (!arraySize) {
-        return Type();
-      }
-      auto arrayType = spirv::ArrayType::get(
-          elementType, arraySize.getValue() / elementSize.getValue(),
-          elementSize.getValue());
-      auto structType = spirv::StructType::get(arrayType, 0);
-      // For now initialize the storage class to StorageBuffer. This will be
-      // updated later based on whats passed in w.r.t to the ABI attributes.
-      return spirv::PointerType::get(structType,
-                                     spirv::StorageClass::StorageBuffer);
-    }
-  }
-
-  return Type();
-}
-
-Type SPIRVTypeConverter::convertType(Type type) { return convertStdType(type); }
-
-//===----------------------------------------------------------------------===//
-// Builtin Variables
-//===----------------------------------------------------------------------===//
-
-/// Look through all global variables in `moduleOp` and check if there is a
-/// spv.globalVariable that has the same `builtin` attribute.
-static spirv::GlobalVariableOp getBuiltinVariable(spirv::ModuleOp &moduleOp,
-                                                  spirv::BuiltIn builtin) {
-  for (auto varOp : moduleOp.getBlock().getOps<spirv::GlobalVariableOp>()) {
-    if (auto builtinAttr = varOp.getAttrOfType<StringAttr>(
-            spirv::SPIRVDialect::getAttributeName(
-                spirv::Decoration::BuiltIn))) {
-      auto varBuiltIn = spirv::symbolizeBuiltIn(builtinAttr.getValue());
-      if (varBuiltIn && varBuiltIn.getValue() == builtin) {
-        return varOp;
-      }
-    }
-  }
-  return nullptr;
-}
-
-/// Gets name of global variable for a builtin.
-static std::string getBuiltinVarName(spirv::BuiltIn builtin) {
-  return std::string("__builtin_var_") + stringifyBuiltIn(builtin).str() + "__";
-}
-
-/// Gets or inserts a global variable for a builtin within a module.
-static spirv::GlobalVariableOp
-getOrInsertBuiltinVariable(spirv::ModuleOp &moduleOp, Location loc,
-                           spirv::BuiltIn builtin, OpBuilder &builder) {
-  if (auto varOp = getBuiltinVariable(moduleOp, builtin)) {
-    return varOp;
-  }
-  auto ip = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(&moduleOp.getBlock());
-  auto name = getBuiltinVarName(builtin);
-  spirv::GlobalVariableOp newVarOp;
-  switch (builtin) {
-  case spirv::BuiltIn::NumWorkgroups:
-  case spirv::BuiltIn::WorkgroupSize:
-  case spirv::BuiltIn::WorkgroupId:
-  case spirv::BuiltIn::LocalInvocationId:
-  case spirv::BuiltIn::GlobalInvocationId: {
-    auto ptrType = spirv::PointerType::get(
-        VectorType::get({3}, builder.getIntegerType(32)),
-        spirv::StorageClass::Input);
-    newVarOp =
-        builder.create<spirv::GlobalVariableOp>(loc, ptrType, name, builtin);
-    break;
-  }
-  default:
-    emitError(loc, "unimplemented builtin variable generation for ")
-        << stringifyBuiltIn(builtin);
-  }
-  builder.restoreInsertionPoint(ip);
-  return newVarOp;
-}
-
-/// Gets the global variable associated with a builtin and add
-/// it if it doesn't exist.
-Value *mlir::spirv::getBuiltinVariableValue(Operation *op,
-                                            spirv::BuiltIn builtin,
-                                            OpBuilder &builder) {
-  auto moduleOp = op->getParentOfType<spirv::ModuleOp>();
-  if (!moduleOp) {
-    op->emitError("expected operation to be within a SPIR-V module");
-    return nullptr;
-  }
-  spirv::GlobalVariableOp varOp =
-      getOrInsertBuiltinVariable(moduleOp, op->getLoc(), builtin, builder);
-  Value *ptr = builder.create<spirv::AddressOfOp>(op->getLoc(), varOp);
-  return builder.create<spirv::LoadOp>(op->getLoc(), ptr,
-                                       /*memory_access =*/nullptr,
-                                       /*alignment =*/nullptr);
-}
-
-//===----------------------------------------------------------------------===//
-// Entry Function signature Conversion
-//===----------------------------------------------------------------------===//
-
-FuncOp mlir::spirv::lowerAsEntryFunction(
-    FuncOp funcOp, SPIRVTypeConverter &typeConverter,
-    ConversionPatternRewriter &rewriter,
-    spirv::EntryPointABIAttr entryPointInfo,
-    ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
-  auto fnType = funcOp.getType();
-  if (fnType.getNumResults()) {
-    funcOp.emitError("SPIR-V lowering only supports entry functions"
-                     "with no return values right now");
-    return nullptr;
-  }
-  if (fnType.getNumInputs() != argABIInfo.size()) {
-    funcOp.emitError(
-        "lowering as entry functions requires ABI info for all arguments");
-    return nullptr;
-  }
-  // For entry functions need to make the signature void(void). Compute the
-  // replacement value for all arguments and replace all uses.
-  TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
-  {
-    for (auto argType : enumerate(funcOp.getType().getInputs())) {
-      auto convertedType = typeConverter.convertType(argType.value());
-      signatureConverter.addInputs(argType.index(), convertedType);
-    }
-  }
-  auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
-  rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
-                              newFuncOp.end());
-  newFuncOp.setType(rewriter.getFunctionType(
-      signatureConverter.getConvertedTypes(), llvm::None));
-  rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
-  rewriter.eraseOp(funcOp);
-
-  spirv::setABIAttrs(newFuncOp, entryPointInfo, argABIInfo);
-  return newFuncOp;
-}
-
-LogicalResult
-mlir::spirv::setABIAttrs(FuncOp funcOp, spirv::EntryPointABIAttr entryPointInfo,
-                         ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
-  // Set the attributes for argument and the function.
-  StringRef argABIAttrName = spirv::getInterfaceVarABIAttrName();
-  for (auto argIndex : llvm::seq<unsigned>(0, funcOp.getNumArguments())) {
-    funcOp.setArgAttr(argIndex, argABIAttrName, argABIInfo[argIndex]);
-  }
-  funcOp.setAttr(spirv::getEntryPointABIAttrName(), entryPointInfo);
-  return success();
-}
diff --git a/third_party/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/third_party/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
deleted file mode 100644
index 839f134ec8f..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
+++ /dev/null
@@ -1,3014 +0,0 @@
-//===- SPIRVOps.cpp - MLIR SPIR-V operations ------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the operations in the SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-
-#include "mlir/Analysis/CallInterfaces.h"
-#include "mlir/Dialect/CommonFolders.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVTypes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/StringExtras.h"
-#include "llvm/ADT/bit.h"
-
-using namespace mlir;
-
-// TODO(antiagainst): generate these strings using ODS.
-static constexpr const char kAlignmentAttrName[] = "alignment";
-static constexpr const char kBranchWeightAttrName[] = "branch_weights";
-static constexpr const char kCallee[] = "callee";
-static constexpr const char kDefaultValueAttrName[] = "default_value";
-static constexpr const char kExecutionScopeAttrName[] = "execution_scope";
-static constexpr const char kEqualSemanticsAttrName[] = "equal_semantics";
-static constexpr const char kFnNameAttrName[] = "fn";
-static constexpr const char kIndicesAttrName[] = "indices";
-static constexpr const char kInitializerAttrName[] = "initializer";
-static constexpr const char kInterfaceAttrName[] = "interface";
-static constexpr const char kMemoryScopeAttrName[] = "memory_scope";
-static constexpr const char kSpecConstAttrName[] = "spec_const";
-static constexpr const char kSpecIdAttrName[] = "spec_id";
-static constexpr const char kTypeAttrName[] = "type";
-static constexpr const char kUnequalSemanticsAttrName[] = "unequal_semantics";
-static constexpr const char kValueAttrName[] = "value";
-static constexpr const char kValuesAttrName[] = "values";
-static constexpr const char kVariableAttrName[] = "variable";
-
-//===----------------------------------------------------------------------===//
-// Common utility functions
-//===----------------------------------------------------------------------===//
-
-static LogicalResult extractValueFromConstOp(Operation *op,
-                                             int32_t &indexValue) {
-  auto constOp = dyn_cast<spirv::ConstantOp>(op);
-  if (!constOp) {
-    return failure();
-  }
-  auto valueAttr = constOp.value();
-  auto integerValueAttr = valueAttr.dyn_cast<IntegerAttr>();
-  if (!integerValueAttr) {
-    return failure();
-  }
-  indexValue = integerValueAttr.getInt();
-  return success();
-}
-
-template <typename Ty>
-static ArrayAttr
-getStrArrayAttrForEnumList(Builder &builder, ArrayRef<Ty> enumValues,
-                           llvm::function_ref<StringRef(Ty)> stringifyFn) {
-  if (enumValues.empty()) {
-    return nullptr;
-  }
-  SmallVector<StringRef, 1> enumValStrs;
-  enumValStrs.reserve(enumValues.size());
-  for (auto val : enumValues) {
-    enumValStrs.emplace_back(stringifyFn(val));
-  }
-  return builder.getStrArrayAttr(enumValStrs);
-}
-
-template <typename EnumClass>
-static ParseResult
-parseEnumAttribute(EnumClass &value, OpAsmParser &parser,
-                   StringRef attrName = spirv::attributeName<EnumClass>()) {
-  Attribute attrVal;
-  SmallVector<NamedAttribute, 1> attr;
-  auto loc = parser.getCurrentLocation();
-  if (parser.parseAttribute(attrVal, parser.getBuilder().getNoneType(),
-                            attrName, attr)) {
-    return failure();
-  }
-  if (!attrVal.isa<StringAttr>()) {
-    return parser.emitError(loc, "expected ")
-           << attrName << " attribute specified as string";
-  }
-  auto attrOptional =
-      spirv::symbolizeEnum<EnumClass>()(attrVal.cast<StringAttr>().getValue());
-  if (!attrOptional) {
-    return parser.emitError(loc, "invalid ")
-           << attrName << " attribute specification: " << attrVal;
-  }
-  value = attrOptional.getValue();
-  return success();
-}
-
-template <typename EnumClass>
-static ParseResult
-parseEnumAttribute(EnumClass &value, OpAsmParser &parser, OperationState &state,
-                   StringRef attrName = spirv::attributeName<EnumClass>()) {
-  if (parseEnumAttribute(value, parser)) {
-    return failure();
-  }
-  state.addAttribute(attrName, parser.getBuilder().getI32IntegerAttr(
-                                   llvm::bit_cast<int32_t>(value)));
-  return success();
-}
-
-static ParseResult parseMemoryAccessAttributes(OpAsmParser &parser,
-                                               OperationState &state) {
-  // Parse an optional list of attributes staring with '['
-  if (parser.parseOptionalLSquare()) {
-    // Nothing to do
-    return success();
-  }
-
-  spirv::MemoryAccess memoryAccessAttr;
-  if (parseEnumAttribute(memoryAccessAttr, parser, state)) {
-    return failure();
-  }
-
-  if (spirv::bitEnumContains(memoryAccessAttr, spirv::MemoryAccess::Aligned)) {
-    // Parse integer attribute for alignment.
-    Attribute alignmentAttr;
-    Type i32Type = parser.getBuilder().getIntegerType(32);
-    if (parser.parseComma() ||
-        parser.parseAttribute(alignmentAttr, i32Type, kAlignmentAttrName,
-                              state.attributes)) {
-      return failure();
-    }
-  }
-  return parser.parseRSquare();
-}
-
-template <typename LoadStoreOpTy>
-static void
-printMemoryAccessAttribute(LoadStoreOpTy loadStoreOp, OpAsmPrinter &printer,
-                           SmallVectorImpl<StringRef> &elidedAttrs) {
-  // Print optional memory access attribute.
-  if (auto memAccess = loadStoreOp.memory_access()) {
-    elidedAttrs.push_back(spirv::attributeName<spirv::MemoryAccess>());
-    printer << " [\"" << stringifyMemoryAccess(*memAccess) << "\"";
-
-    // Print integer alignment attribute.
-    if (auto alignment = loadStoreOp.alignment()) {
-      elidedAttrs.push_back(kAlignmentAttrName);
-      printer << ", " << alignment;
-    }
-    printer << "]";
-  }
-  elidedAttrs.push_back(spirv::attributeName<spirv::StorageClass>());
-}
-
-static LogicalResult verifyCastOp(Operation *op,
-                                  bool requireSameBitWidth = true) {
-  Type operandType = op->getOperand(0)->getType();
-  Type resultType = op->getResult(0)->getType();
-
-  // ODS checks that result type and operand type have the same shape.
-  if (auto vectorType = operandType.dyn_cast<VectorType>()) {
-    operandType = vectorType.getElementType();
-    resultType = resultType.cast<VectorType>().getElementType();
-  }
-
-  auto operandTypeBitWidth = operandType.getIntOrFloatBitWidth();
-  auto resultTypeBitWidth = resultType.getIntOrFloatBitWidth();
-  auto isSameBitWidth = operandTypeBitWidth == resultTypeBitWidth;
-
-  if (requireSameBitWidth) {
-    if (!isSameBitWidth) {
-      return op->emitOpError(
-                 "expected the same bit widths for operand type and result "
-                 "type, but provided ")
-             << operandType << " and " << resultType;
-    }
-    return success();
-  }
-
-  if (isSameBitWidth) {
-    return op->emitOpError(
-               "expected the different bit widths for operand type and result "
-               "type, but provided ")
-           << operandType << " and " << resultType;
-  }
-  return success();
-}
-
-template <typename LoadStoreOpTy>
-static LogicalResult verifyMemoryAccessAttribute(LoadStoreOpTy loadStoreOp) {
-  // ODS checks for attributes values. Just need to verify that if the
-  // memory-access attribute is Aligned, then the alignment attribute must be
-  // present.
-  auto *op = loadStoreOp.getOperation();
-  auto memAccessAttr = op->getAttr(spirv::attributeName<spirv::MemoryAccess>());
-  if (!memAccessAttr) {
-    // Alignment attribute shouldn't be present if memory access attribute is
-    // not present.
-    if (op->getAttr(kAlignmentAttrName)) {
-      return loadStoreOp.emitOpError(
-          "invalid alignment specification without aligned memory access "
-          "specification");
-    }
-    return success();
-  }
-
-  auto memAccessVal = memAccessAttr.template cast<IntegerAttr>();
-  auto memAccess = spirv::symbolizeMemoryAccess(memAccessVal.getInt());
-
-  if (!memAccess) {
-    return loadStoreOp.emitOpError("invalid memory access specifier: ")
-           << memAccessVal;
-  }
-
-  if (spirv::bitEnumContains(*memAccess, spirv::MemoryAccess::Aligned)) {
-    if (!op->getAttr(kAlignmentAttrName)) {
-      return loadStoreOp.emitOpError("missing alignment value");
-    }
-  } else {
-    if (op->getAttr(kAlignmentAttrName)) {
-      return loadStoreOp.emitOpError(
-          "invalid alignment specification with non-aligned memory access "
-          "specification");
-    }
-  }
-  return success();
-}
-
-template <typename BarrierOp>
-static LogicalResult verifyMemorySemantics(BarrierOp op) {
-  // According to the SPIR-V specification:
-  // "Despite being a mask and allowing multiple bits to be combined, it is
-  // invalid for more than one of these four bits to be set: Acquire, Release,
-  // AcquireRelease, or SequentiallyConsistent. Requesting both Acquire and
-  // Release semantics is done by setting the AcquireRelease bit, not by setting
-  // two bits."
-  auto memorySemantics = op.memory_semantics();
-  auto atMostOneInSet = spirv::MemorySemantics::Acquire |
-                        spirv::MemorySemantics::Release |
-                        spirv::MemorySemantics::AcquireRelease |
-                        spirv::MemorySemantics::SequentiallyConsistent;
-
-  auto bitCount = llvm::countPopulation(
-      static_cast<uint32_t>(memorySemantics & atMostOneInSet));
-  if (bitCount > 1) {
-    return op.emitError("expected at most one of these four memory constraints "
-                        "to be set: `Acquire`, `Release`,"
-                        "`AcquireRelease` or `SequentiallyConsistent`");
-  }
-  return success();
-}
-
-template <typename LoadStoreOpTy>
-static LogicalResult verifyLoadStorePtrAndValTypes(LoadStoreOpTy op, Value *ptr,
-                                                   Value *val) {
-  // ODS already checks ptr is spirv::PointerType. Just check that the pointee
-  // type of the pointer and the type of the value are the same
-  //
-  // TODO(ravishankarm): Check that the value type satisfies restrictions of
-  // SPIR-V OpLoad/OpStore operations
-  if (val->getType() !=
-      ptr->getType().cast<spirv::PointerType>().getPointeeType()) {
-    return op.emitOpError("mismatch in result type and pointer type");
-  }
-  return success();
-}
-
-static ParseResult parseVariableDecorations(OpAsmParser &parser,
-                                            OperationState &state) {
-  auto builtInName =
-      convertToSnakeCase(stringifyDecoration(spirv::Decoration::BuiltIn));
-  if (succeeded(parser.parseOptionalKeyword("bind"))) {
-    Attribute set, binding;
-    // Parse optional descriptor binding
-    auto descriptorSetName = convertToSnakeCase(
-        stringifyDecoration(spirv::Decoration::DescriptorSet));
-    auto bindingName =
-        convertToSnakeCase(stringifyDecoration(spirv::Decoration::Binding));
-    Type i32Type = parser.getBuilder().getIntegerType(32);
-    if (parser.parseLParen() ||
-        parser.parseAttribute(set, i32Type, descriptorSetName,
-                              state.attributes) ||
-        parser.parseComma() ||
-        parser.parseAttribute(binding, i32Type, bindingName,
-                              state.attributes) ||
-        parser.parseRParen()) {
-      return failure();
-    }
-  } else if (succeeded(parser.parseOptionalKeyword(builtInName))) {
-    StringAttr builtIn;
-    if (parser.parseLParen() ||
-        parser.parseAttribute(builtIn, builtInName, state.attributes) ||
-        parser.parseRParen()) {
-      return failure();
-    }
-  }
-
-  // Parse other attributes
-  if (parser.parseOptionalAttrDict(state.attributes))
-    return failure();
-
-  return success();
-}
-
-static void printVariableDecorations(Operation *op, OpAsmPrinter &printer,
-                                     SmallVectorImpl<StringRef> &elidedAttrs) {
-  // Print optional descriptor binding
-  auto descriptorSetName =
-      convertToSnakeCase(stringifyDecoration(spirv::Decoration::DescriptorSet));
-  auto bindingName =
-      convertToSnakeCase(stringifyDecoration(spirv::Decoration::Binding));
-  auto descriptorSet = op->getAttrOfType<IntegerAttr>(descriptorSetName);
-  auto binding = op->getAttrOfType<IntegerAttr>(bindingName);
-  if (descriptorSet && binding) {
-    elidedAttrs.push_back(descriptorSetName);
-    elidedAttrs.push_back(bindingName);
-    printer << " bind(" << descriptorSet.getInt() << ", " << binding.getInt()
-            << ")";
-  }
-
-  // Print BuiltIn attribute if present
-  auto builtInName =
-      convertToSnakeCase(stringifyDecoration(spirv::Decoration::BuiltIn));
-  if (auto builtin = op->getAttrOfType<StringAttr>(builtInName)) {
-    printer << " " << builtInName << "(\"" << builtin.getValue() << "\")";
-    elidedAttrs.push_back(builtInName);
-  }
-
-  printer.printOptionalAttrDict(op->getAttrs(), elidedAttrs);
-}
-
-// Extracts an element from the given `composite` by following the given
-// `indices`. Returns a null Attribute if error happens.
-static Attribute extractCompositeElement(Attribute composite,
-                                         ArrayRef<unsigned> indices) {
-  // Check that given composite is a constant.
-  if (!composite)
-    return {};
-  // Return composite itself if we reach the end of the index chain.
-  if (indices.empty())
-    return composite;
-
-  if (auto vector = composite.dyn_cast<ElementsAttr>()) {
-    assert(indices.size() == 1 && "must have exactly one index for a vector");
-    return vector.getValue({indices[0]});
-  }
-
-  if (auto array = composite.dyn_cast<ArrayAttr>()) {
-    assert(!indices.empty() && "must have at least one index for an array");
-    return extractCompositeElement(array.getValue()[indices[0]],
-                                   indices.drop_front());
-  }
-
-  return {};
-}
-
-// Get bit width of types.
-static unsigned getBitWidth(Type type) {
-  if (type.isa<spirv::PointerType>()) {
-    // Just return 64 bits for pointer types for now.
-    // TODO: Make sure not caller relies on the actual pointer width value.
-    return 64;
-  }
-  if (type.isIntOrFloat()) {
-    return type.getIntOrFloatBitWidth();
-  }
-  if (auto vectorType = type.dyn_cast<VectorType>()) {
-    assert(vectorType.getElementType().isIntOrFloat());
-    return vectorType.getNumElements() *
-           vectorType.getElementType().getIntOrFloatBitWidth();
-  }
-  llvm_unreachable("unhandled bit width computation for type");
-}
-
-/// Walks the given type hierarchy with the given indices, potentially down
-/// to component granularity, to select an element type. Returns null type and
-/// emits errors with the given loc on failure.
-static Type
-getElementType(Type type, ArrayRef<int32_t> indices,
-               llvm::function_ref<InFlightDiagnostic(StringRef)> emitErrorFn) {
-  if (indices.empty()) {
-    emitErrorFn("expected at least one index for spv.CompositeExtract");
-    return nullptr;
-  }
-
-  for (auto index : indices) {
-    if (auto cType = type.dyn_cast<spirv::CompositeType>()) {
-      if (index < 0 || static_cast<uint64_t>(index) >= cType.getNumElements()) {
-        emitErrorFn("index ") << index << " out of bounds for " << type;
-        return nullptr;
-      }
-      type = cType.getElementType(index);
-    } else {
-      emitErrorFn("cannot extract from non-composite type ")
-          << type << " with index " << index;
-      return nullptr;
-    }
-  }
-  return type;
-}
-
-static Type
-getElementType(Type type, Attribute indices,
-               llvm::function_ref<InFlightDiagnostic(StringRef)> emitErrorFn) {
-  auto indicesArrayAttr = indices.dyn_cast<ArrayAttr>();
-  if (!indicesArrayAttr) {
-    emitErrorFn("expected a 32-bit integer array attribute for 'indices'");
-    return nullptr;
-  }
-  if (!indicesArrayAttr.size()) {
-    emitErrorFn("expected at least one index for spv.CompositeExtract");
-    return nullptr;
-  }
-
-  SmallVector<int32_t, 2> indexVals;
-  for (auto indexAttr : indicesArrayAttr) {
-    auto indexIntAttr = indexAttr.dyn_cast<IntegerAttr>();
-    if (!indexIntAttr) {
-      emitErrorFn("expected an 32-bit integer for index, but found '")
-          << indexAttr << "'";
-      return nullptr;
-    }
-    indexVals.push_back(indexIntAttr.getInt());
-  }
-  return getElementType(type, indexVals, emitErrorFn);
-}
-
-static Type getElementType(Type type, Attribute indices, Location loc) {
-  auto errorFn = [&](StringRef err) -> InFlightDiagnostic {
-    return ::mlir::emitError(loc, err);
-  };
-  return getElementType(type, indices, errorFn);
-}
-
-static Type getElementType(Type type, Attribute indices, OpAsmParser &parser,
-                           llvm::SMLoc loc) {
-  auto errorFn = [&](StringRef err) -> InFlightDiagnostic {
-    return parser.emitError(loc, err);
-  };
-  return getElementType(type, indices, errorFn);
-}
-
-/// Returns true if the given `block` only contains one `spv._merge` op.
-static inline bool isMergeBlock(Block &block) {
-  return !block.empty() && std::next(block.begin()) == block.end() &&
-         isa<spirv::MergeOp>(block.front());
-}
-
-//===----------------------------------------------------------------------===//
-// TableGen'erated canonicalizers
-//===----------------------------------------------------------------------===//
-
-namespace {
-#include "SPIRVCanonicalization.inc"
-}
-
-//===----------------------------------------------------------------------===//
-// Common parsers and printers
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseBitFieldExtractOp(OpAsmParser &parser,
-                                          OperationState &state) {
-  SmallVector<OpAsmParser::OperandType, 3> operandInfo;
-  Type baseType;
-  Type offsetType;
-  Type countType;
-  auto loc = parser.getCurrentLocation();
-
-  if (parser.parseOperandList(operandInfo, 3) || parser.parseColon() ||
-      parser.parseType(baseType) || parser.parseComma() ||
-      parser.parseType(offsetType) || parser.parseComma() ||
-      parser.parseType(countType) ||
-      parser.resolveOperands(operandInfo, {baseType, offsetType, countType},
-                             loc, state.operands)) {
-    return failure();
-  }
-  state.addTypes(baseType);
-  return success();
-}
-
-static void printBitFieldExtractOp(Operation *op, OpAsmPrinter &printer) {
-  printer << op->getName() << ' ' << op->getOperands() << " : "
-          << op->getOperandTypes();
-}
-
-static LogicalResult verifyBitFieldExtractOp(Operation *op) {
-  if (op->getOperand(0)->getType() != op->getResult(0)->getType()) {
-    return op->emitError("expected the same type for the first operand and "
-                         "result, but provided ")
-           << op->getOperand(0)->getType() << " and "
-           << op->getResult(0)->getType();
-  }
-  return success();
-}
-
-// Parses an op that has no inputs and no outputs.
-static ParseResult parseNoIOOp(OpAsmParser &parser, OperationState &state) {
-  if (parser.parseOptionalAttrDict(state.attributes))
-    return failure();
-  return success();
-}
-
-// Prints an op that has no inputs and no outputs.
-static void printNoIOOp(Operation *op, OpAsmPrinter &printer) {
-  printer << op->getName();
-  printer.printOptionalAttrDict(op->getAttrs());
-}
-
-static ParseResult parseUnaryOp(OpAsmParser &parser, OperationState &state) {
-  OpAsmParser::OperandType operandInfo;
-  Type type;
-  if (parser.parseOperand(operandInfo) || parser.parseColonType(type) ||
-      parser.resolveOperands(operandInfo, type, state.operands)) {
-    return failure();
-  }
-  state.addTypes(type);
-  return success();
-}
-
-static void printUnaryOp(Operation *unaryOp, OpAsmPrinter &printer) {
-  printer << unaryOp->getName() << ' ' << *unaryOp->getOperand(0) << " : "
-          << unaryOp->getOperand(0)->getType();
-}
-
-/// Result of a logical op must be a scalar or vector of boolean type.
-static Type getUnaryOpResultType(Builder &builder, Type operandType) {
-  Type resultType = builder.getIntegerType(1);
-  if (auto vecType = operandType.dyn_cast<VectorType>()) {
-    return VectorType::get(vecType.getNumElements(), resultType);
-  }
-  return resultType;
-}
-
-static ParseResult parseLogicalUnaryOp(OpAsmParser &parser,
-                                       OperationState &state) {
-  OpAsmParser::OperandType operandInfo;
-  Type type;
-  if (parser.parseOperand(operandInfo) || parser.parseColonType(type) ||
-      parser.resolveOperand(operandInfo, type, state.operands)) {
-    return failure();
-  }
-  state.addTypes(getUnaryOpResultType(parser.getBuilder(), type));
-  return success();
-}
-
-static ParseResult parseLogicalBinaryOp(OpAsmParser &parser,
-                                        OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 2> ops;
-  Type type;
-  if (parser.parseOperandList(ops, 2) || parser.parseColonType(type) ||
-      parser.resolveOperands(ops, type, result.operands)) {
-    return failure();
-  }
-  result.addTypes(getUnaryOpResultType(parser.getBuilder(), type));
-  return success();
-}
-
-static void printLogicalOp(Operation *logicalOp, OpAsmPrinter &printer) {
-  printer << logicalOp->getName() << ' ' << logicalOp->getOperands() << " : "
-          << logicalOp->getOperand(0)->getType();
-}
-
-static ParseResult parseShiftOp(OpAsmParser &parser, OperationState &state) {
-  SmallVector<OpAsmParser::OperandType, 2> operandInfo;
-  Type baseType;
-  Type shiftType;
-  auto loc = parser.getCurrentLocation();
-
-  if (parser.parseOperandList(operandInfo, 2) || parser.parseColon() ||
-      parser.parseType(baseType) || parser.parseComma() ||
-      parser.parseType(shiftType) ||
-      parser.resolveOperands(operandInfo, {baseType, shiftType}, loc,
-                             state.operands)) {
-    return failure();
-  }
-  state.addTypes(baseType);
-  return success();
-}
-
-static void printShiftOp(Operation *op, OpAsmPrinter &printer) {
-  Value *base = op->getOperand(0);
-  Value *shift = op->getOperand(1);
-  printer << op->getName() << ' ' << *base << ", " << *shift << " : "
-          << base->getType() << ", " << shift->getType();
-}
-
-static LogicalResult verifyShiftOp(Operation *op) {
-  if (op->getOperand(0)->getType() != op->getResult(0)->getType()) {
-    return op->emitError("expected the same type for the first operand and "
-                         "result, but provided ")
-           << op->getOperand(0)->getType() << " and "
-           << op->getResult(0)->getType();
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.AccessChainOp
-//===----------------------------------------------------------------------===//
-
-static Type getElementPtrType(Type type, ValueRange indices, Location baseLoc) {
-  if (indices.empty()) {
-    emitError(baseLoc, "'spv.AccessChain' op expected at least "
-                       "one index ");
-    return nullptr;
-  }
-
-  auto ptrType = type.dyn_cast<spirv::PointerType>();
-  if (!ptrType) {
-    emitError(baseLoc, "'spv.AccessChain' op expected a pointer "
-                       "to composite type, but provided ")
-        << type;
-    return nullptr;
-  }
-
-  auto resultType = ptrType.getPointeeType();
-  auto resultStorageClass = ptrType.getStorageClass();
-  int32_t index = 0;
-
-  for (auto indexSSA : indices) {
-    auto cType = resultType.dyn_cast<spirv::CompositeType>();
-    if (!cType) {
-      emitError(baseLoc,
-                "'spv.AccessChain' op cannot extract from non-composite type ")
-          << resultType << " with index " << index;
-      return nullptr;
-    }
-    index = 0;
-    if (resultType.isa<spirv::StructType>()) {
-      Operation *op = indexSSA->getDefiningOp();
-      if (!op) {
-        emitError(baseLoc, "'spv.AccessChain' op index must be an "
-                           "integer spv.constant to access "
-                           "element of spv.struct");
-        return nullptr;
-      }
-
-      // TODO(denis0x0D): this should be relaxed to allow
-      // integer literals of other bitwidths.
-      if (failed(extractValueFromConstOp(op, index))) {
-        emitError(baseLoc,
-                  "'spv.AccessChain' index must be an integer spv.constant to "
-                  "access element of spv.struct, but provided ")
-            << op->getName();
-        return nullptr;
-      }
-      if (index < 0 || static_cast<uint64_t>(index) >= cType.getNumElements()) {
-        emitError(baseLoc, "'spv.AccessChain' op index ")
-            << index << " out of bounds for " << resultType;
-        return nullptr;
-      }
-    }
-    resultType = cType.getElementType(index);
-  }
-  return spirv::PointerType::get(resultType, resultStorageClass);
-}
-
-void spirv::AccessChainOp::build(Builder *builder, OperationState &state,
-                                 Value *basePtr, ValueRange indices) {
-  auto type = getElementPtrType(basePtr->getType(), indices, state.location);
-  assert(type && "Unable to deduce return type based on basePtr and indices");
-  build(builder, state, type, basePtr, indices);
-}
-
-static ParseResult parseAccessChainOp(OpAsmParser &parser,
-                                      OperationState &state) {
-  OpAsmParser::OperandType ptrInfo;
-  SmallVector<OpAsmParser::OperandType, 4> indicesInfo;
-  Type type;
-  // TODO(denis0x0D): regarding to the spec an index must be any integer type,
-  // figure out how to use resolveOperand with a range of types and do not
-  // fail on first attempt.
-  Type indicesType = parser.getBuilder().getIntegerType(32);
-
-  if (parser.parseOperand(ptrInfo) ||
-      parser.parseOperandList(indicesInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(ptrInfo, type, state.operands) ||
-      parser.resolveOperands(indicesInfo, indicesType, state.operands)) {
-    return failure();
-  }
-
-  auto resultType = getElementPtrType(
-      type, llvm::makeArrayRef(state.operands).drop_front(), state.location);
-  if (!resultType) {
-    return failure();
-  }
-
-  state.addTypes(resultType);
-  return success();
-}
-
-static void print(spirv::AccessChainOp op, OpAsmPrinter &printer) {
-  printer << spirv::AccessChainOp::getOperationName() << ' ' << *op.base_ptr()
-          << '[' << op.indices() << "] : " << op.base_ptr()->getType();
-}
-
-static LogicalResult verify(spirv::AccessChainOp accessChainOp) {
-  SmallVector<Value *, 4> indices(accessChainOp.indices().begin(),
-                                  accessChainOp.indices().end());
-  auto resultType = getElementPtrType(accessChainOp.base_ptr()->getType(),
-                                      indices, accessChainOp.getLoc());
-  if (!resultType) {
-    return failure();
-  }
-
-  auto providedResultType =
-      accessChainOp.getType().dyn_cast<spirv::PointerType>();
-  if (!providedResultType) {
-    return accessChainOp.emitOpError(
-               "result type must be a pointer, but provided")
-           << providedResultType;
-  }
-
-  if (resultType != providedResultType) {
-    return accessChainOp.emitOpError("invalid result type: expected ")
-           << resultType << ", but provided " << providedResultType;
-  }
-
-  return success();
-}
-
-namespace {
-
-/// Combines chained `spirv::AccessChainOp` operations into one
-/// `spirv::AccessChainOp` operation.
-struct CombineChainedAccessChain
-    : public OpRewritePattern<spirv::AccessChainOp> {
-  using OpRewritePattern<spirv::AccessChainOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(spirv::AccessChainOp accessChainOp,
-                                     PatternRewriter &rewriter) const override {
-    auto parentAccessChainOp = dyn_cast_or_null<spirv::AccessChainOp>(
-        accessChainOp.base_ptr()->getDefiningOp());
-
-    if (!parentAccessChainOp) {
-      return matchFailure();
-    }
-
-    // Combine indices.
-    SmallVector<Value *, 4> indices(parentAccessChainOp.indices());
-    indices.append(accessChainOp.indices().begin(),
-                   accessChainOp.indices().end());
-
-    rewriter.replaceOpWithNewOp<spirv::AccessChainOp>(
-        accessChainOp, parentAccessChainOp.base_ptr(), indices);
-
-    return matchSuccess();
-  }
-};
-} // end anonymous namespace
-
-void spirv::AccessChainOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<CombineChainedAccessChain>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// spv._address_of
-//===----------------------------------------------------------------------===//
-
-void spirv::AddressOfOp::build(Builder *builder, OperationState &state,
-                               spirv::GlobalVariableOp var) {
-  build(builder, state, var.type(), builder->getSymbolRefAttr(var));
-}
-
-static ParseResult parseAddressOfOp(OpAsmParser &parser,
-                                    OperationState &state) {
-  FlatSymbolRefAttr varRefAttr;
-  Type type;
-  if (parser.parseAttribute(varRefAttr, Type(), kVariableAttrName,
-                            state.attributes) ||
-      parser.parseColonType(type)) {
-    return failure();
-  }
-  auto ptrType = type.dyn_cast<spirv::PointerType>();
-  if (!ptrType) {
-    return parser.emitError(parser.getCurrentLocation(),
-                            "expected spv.ptr type");
-  }
-  state.addTypes(ptrType);
-  return success();
-}
-
-static void print(spirv::AddressOfOp addressOfOp, OpAsmPrinter &printer) {
-  SmallVector<StringRef, 4> elidedAttrs;
-  printer << spirv::AddressOfOp::getOperationName();
-
-  // Print symbol name.
-  printer << ' ';
-  printer.printSymbolName(addressOfOp.variable());
-
-  // Print the type.
-  printer << " : " << addressOfOp.pointer()->getType();
-}
-
-static LogicalResult verify(spirv::AddressOfOp addressOfOp) {
-  auto moduleOp = addressOfOp.getParentOfType<spirv::ModuleOp>();
-  auto varOp =
-      moduleOp.lookupSymbol<spirv::GlobalVariableOp>(addressOfOp.variable());
-  if (!varOp) {
-    return addressOfOp.emitOpError("expected spv.globalVariable symbol");
-  }
-  if (addressOfOp.pointer()->getType() != varOp.type()) {
-    return addressOfOp.emitOpError(
-        "result type mismatch with the referenced global variable's type");
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.AtomicCompareExchangeWeak
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseAtomicCompareExchangeWeakOp(OpAsmParser &parser,
-                                                    OperationState &state) {
-  spirv::Scope memoryScope;
-  spirv::MemorySemantics equalSemantics, unequalSemantics;
-  SmallVector<OpAsmParser::OperandType, 3> operandInfo;
-  Type type;
-  if (parseEnumAttribute(memoryScope, parser, state, kMemoryScopeAttrName) ||
-      parseEnumAttribute(equalSemantics, parser, state,
-                         kEqualSemanticsAttrName) ||
-      parseEnumAttribute(unequalSemantics, parser, state,
-                         kUnequalSemanticsAttrName) ||
-      parser.parseOperandList(operandInfo, 3))
-    return failure();
-
-  auto loc = parser.getCurrentLocation();
-  if (parser.parseColonType(type))
-    return failure();
-
-  auto ptrType = type.dyn_cast<spirv::PointerType>();
-  if (!ptrType)
-    return parser.emitError(loc, "expected pointer type");
-
-  if (parser.resolveOperands(
-          operandInfo,
-          {ptrType, ptrType.getPointeeType(), ptrType.getPointeeType()},
-          parser.getNameLoc(), state.operands))
-    return failure();
-
-  return parser.addTypeToList(ptrType.getPointeeType(), state.types);
-}
-
-static void print(spirv::AtomicCompareExchangeWeakOp atomOp,
-                  OpAsmPrinter &printer) {
-  printer << spirv::AtomicCompareExchangeWeakOp::getOperationName() << " \""
-          << stringifyScope(atomOp.memory_scope()) << "\" \""
-          << stringifyMemorySemantics(atomOp.equal_semantics()) << "\" \""
-          << stringifyMemorySemantics(atomOp.unequal_semantics()) << "\" "
-          << atomOp.getOperands() << " : " << atomOp.pointer()->getType();
-}
-
-static LogicalResult verify(spirv::AtomicCompareExchangeWeakOp atomOp) {
-  // According to the spec:
-  // "The type of Value must be the same as Result Type. The type of the value
-  // pointed to by Pointer must be the same as Result Type. This type must also
-  // match the type of Comparator."
-  if (atomOp.getType() != atomOp.value()->getType())
-    return atomOp.emitOpError("value operand must have the same type as the op "
-                              "result, but found ")
-           << atomOp.value()->getType() << " vs " << atomOp.getType();
-
-  if (atomOp.getType() != atomOp.comparator()->getType())
-    return atomOp.emitOpError(
-               "comparator operand must have the same type as the op "
-               "result, but found ")
-           << atomOp.comparator()->getType() << " vs " << atomOp.getType();
-
-  Type pointeeType =
-      atomOp.pointer()->getType().cast<spirv::PointerType>().getPointeeType();
-  if (atomOp.getType() != pointeeType)
-    return atomOp.emitOpError(
-               "pointer operand's pointee type must have the same "
-               "as the op result type, but found ")
-           << pointeeType << " vs " << atomOp.getType();
-
-  // TODO(antiagainst): Unequal cannot be set to Release or Acquire and Release.
-  // In addition, Unequal cannot be set to a stronger memory-order then Equal.
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.BitcastOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(spirv::BitcastOp bitcastOp) {
-  // TODO: The SPIR-V spec validation rules are different for different
-  // versions.
-  auto operandType = bitcastOp.operand()->getType();
-  auto resultType = bitcastOp.result()->getType();
-  if (operandType == resultType) {
-    return bitcastOp.emitError(
-        "result type must be different from operand type");
-  }
-  if (operandType.isa<spirv::PointerType>() &&
-      !resultType.isa<spirv::PointerType>()) {
-    return bitcastOp.emitError(
-        "unhandled bit cast conversion from pointer type to non-pointer type");
-  }
-  if (!operandType.isa<spirv::PointerType>() &&
-      resultType.isa<spirv::PointerType>()) {
-    return bitcastOp.emitError(
-        "unhandled bit cast conversion from non-pointer type to pointer type");
-  }
-  auto operandBitWidth = getBitWidth(operandType);
-  auto resultBitWidth = getBitWidth(resultType);
-  if (operandBitWidth != resultBitWidth) {
-    return bitcastOp.emitOpError("mismatch in result type bitwidth ")
-           << resultBitWidth << " and operand type bitwidth "
-           << operandBitWidth;
-  }
-  return success();
-}
-
-void spirv::BitcastOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<ConvertChainedBitcast>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// spv.BitFieldInsert
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseBitFieldInsertOp(OpAsmParser &parser,
-                                         OperationState &state) {
-  SmallVector<OpAsmParser::OperandType, 4> operandInfo;
-  Type baseType;
-  Type offsetType;
-  Type countType;
-  auto loc = parser.getCurrentLocation();
-
-  if (parser.parseOperandList(operandInfo, 4) || parser.parseColon() ||
-      parser.parseType(baseType) || parser.parseComma() ||
-      parser.parseType(offsetType) || parser.parseComma() ||
-      parser.parseType(countType) ||
-      parser.resolveOperands(operandInfo,
-                             {baseType, baseType, offsetType, countType}, loc,
-                             state.operands)) {
-    return failure();
-  }
-  state.addTypes(baseType);
-  return success();
-}
-
-static void print(spirv::BitFieldInsertOp bitFieldInsertOp,
-                  OpAsmPrinter &printer) {
-  printer << spirv::BitFieldInsertOp::getOperationName() << ' '
-          << bitFieldInsertOp.getOperands() << " : "
-          << bitFieldInsertOp.base()->getType() << ", "
-          << bitFieldInsertOp.offset()->getType() << ", "
-          << bitFieldInsertOp.count()->getType();
-}
-
-static LogicalResult verify(spirv::BitFieldInsertOp bitFieldOp) {
-  auto baseType = bitFieldOp.base()->getType();
-  auto insertType = bitFieldOp.insert()->getType();
-  auto resultType = bitFieldOp.getResult()->getType();
-
-  if ((baseType != insertType) || (baseType != resultType)) {
-    return bitFieldOp.emitError("expected the same type for the base operand, "
-                                "insert operand, and "
-                                "result, but provided ")
-           << baseType << ", " << insertType << " and " << resultType;
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.BranchOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseBranchOp(OpAsmParser &parser, OperationState &state) {
-  Block *dest;
-  SmallVector<Value *, 4> destOperands;
-  if (parser.parseSuccessorAndUseList(dest, destOperands))
-    return failure();
-  state.addSuccessor(dest, destOperands);
-  return success();
-}
-
-static void print(spirv::BranchOp branchOp, OpAsmPrinter &printer) {
-  printer << spirv::BranchOp::getOperationName() << ' ';
-  printer.printSuccessorAndUseList(branchOp.getOperation(), /*index=*/0);
-}
-
-static LogicalResult verify(spirv::BranchOp branchOp) {
-  auto *op = branchOp.getOperation();
-  if (op->getNumSuccessors() != 1)
-    branchOp.emitOpError("must have exactly one successor");
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.BranchConditionalOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseBranchConditionalOp(OpAsmParser &parser,
-                                            OperationState &state) {
-  auto &builder = parser.getBuilder();
-  OpAsmParser::OperandType condInfo;
-  Block *dest;
-  SmallVector<Value *, 4> destOperands;
-
-  // Parse the condition.
-  Type boolTy = builder.getI1Type();
-  if (parser.parseOperand(condInfo) ||
-      parser.resolveOperand(condInfo, boolTy, state.operands))
-    return failure();
-
-  // Parse the optional branch weights.
-  if (succeeded(parser.parseOptionalLSquare())) {
-    IntegerAttr trueWeight, falseWeight;
-    SmallVector<NamedAttribute, 2> weights;
-
-    auto i32Type = builder.getIntegerType(32);
-    if (parser.parseAttribute(trueWeight, i32Type, "weight", weights) ||
-        parser.parseComma() ||
-        parser.parseAttribute(falseWeight, i32Type, "weight", weights) ||
-        parser.parseRSquare())
-      return failure();
-
-    state.addAttribute(kBranchWeightAttrName,
-                       builder.getArrayAttr({trueWeight, falseWeight}));
-  }
-
-  // Parse the true branch.
-  if (parser.parseComma() ||
-      parser.parseSuccessorAndUseList(dest, destOperands))
-    return failure();
-  state.addSuccessor(dest, destOperands);
-
-  // Parse the false branch.
-  destOperands.clear();
-  if (parser.parseComma() ||
-      parser.parseSuccessorAndUseList(dest, destOperands))
-    return failure();
-  state.addSuccessor(dest, destOperands);
-
-  return success();
-}
-
-static void print(spirv::BranchConditionalOp branchOp, OpAsmPrinter &printer) {
-  printer << spirv::BranchConditionalOp::getOperationName() << ' '
-          << branchOp.condition();
-
-  if (auto weights = branchOp.branch_weights()) {
-    printer << " [";
-    interleaveComma(weights->getValue(), printer, [&](Attribute a) {
-      printer << a.cast<IntegerAttr>().getInt();
-    });
-    printer << "]";
-  }
-
-  printer << ", ";
-  printer.printSuccessorAndUseList(branchOp.getOperation(),
-                                   spirv::BranchConditionalOp::kTrueIndex);
-  printer << ", ";
-  printer.printSuccessorAndUseList(branchOp.getOperation(),
-                                   spirv::BranchConditionalOp::kFalseIndex);
-}
-
-static LogicalResult verify(spirv::BranchConditionalOp branchOp) {
-  auto *op = branchOp.getOperation();
-  if (op->getNumSuccessors() != 2)
-    return branchOp.emitOpError("must have exactly two successors");
-
-  if (auto weights = branchOp.branch_weights()) {
-    if (weights->getValue().size() != 2) {
-      return branchOp.emitOpError("must have exactly two branch weights");
-    }
-    if (llvm::all_of(*weights, [](Attribute attr) {
-          return attr.cast<IntegerAttr>().getValue().isNullValue();
-        }))
-      return branchOp.emitOpError("branch weights cannot both be zero");
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.CompositeConstruct
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseCompositeConstructOp(OpAsmParser &parser,
-                                             OperationState &state) {
-  SmallVector<OpAsmParser::OperandType, 4> operands;
-  Type type;
-  auto loc = parser.getCurrentLocation();
-
-  if (parser.parseOperandList(operands) || parser.parseColonType(type)) {
-    return failure();
-  }
-  auto cType = type.dyn_cast<spirv::CompositeType>();
-  if (!cType) {
-    return parser.emitError(
-               loc, "result type must be a composite type, but provided ")
-           << type;
-  }
-
-  if (operands.size() != cType.getNumElements()) {
-    return parser.emitError(loc, "has incorrect number of operands: expected ")
-           << cType.getNumElements() << ", but provided " << operands.size();
-  }
-  // TODO: Add support for constructing a vector type from the vector operands.
-  // According to the spec: "for constructing a vector, the operands may
-  // also be vectors with the same component type as the Result Type component
-  // type".
-  SmallVector<Type, 4> elementTypes;
-  elementTypes.reserve(cType.getNumElements());
-  for (auto index : llvm::seq<uint32_t>(0, cType.getNumElements())) {
-    elementTypes.push_back(cType.getElementType(index));
-  }
-  state.addTypes(type);
-  return parser.resolveOperands(operands, elementTypes, loc, state.operands);
-}
-
-static void print(spirv::CompositeConstructOp compositeConstructOp,
-                  OpAsmPrinter &printer) {
-  printer << spirv::CompositeConstructOp::getOperationName() << " "
-          << compositeConstructOp.constituents() << " : "
-          << compositeConstructOp.getResult()->getType();
-}
-
-static LogicalResult verify(spirv::CompositeConstructOp compositeConstructOp) {
-  auto cType = compositeConstructOp.getType().cast<spirv::CompositeType>();
-
-  SmallVector<Value *, 4> constituents(compositeConstructOp.constituents());
-  if (constituents.size() != cType.getNumElements()) {
-    return compositeConstructOp.emitError(
-               "has incorrect number of operands: expected ")
-           << cType.getNumElements() << ", but provided "
-           << constituents.size();
-  }
-
-  for (auto index : llvm::seq<uint32_t>(0, constituents.size())) {
-    if (constituents[index]->getType() != cType.getElementType(index)) {
-      return compositeConstructOp.emitError(
-                 "operand type mismatch: expected operand type ")
-             << cType.getElementType(index) << ", but provided "
-             << constituents[index]->getType();
-    }
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.CompositeExtractOp
-//===----------------------------------------------------------------------===//
-
-void spirv::CompositeExtractOp::build(Builder *builder, OperationState &state,
-                                      Value *composite,
-                                      ArrayRef<int32_t> indices) {
-  auto indexAttr = builder->getI32ArrayAttr(indices);
-  auto elementType =
-      getElementType(composite->getType(), indexAttr, state.location);
-  if (!elementType) {
-    return;
-  }
-  build(builder, state, elementType, composite, indexAttr);
-}
-
-static ParseResult parseCompositeExtractOp(OpAsmParser &parser,
-                                           OperationState &state) {
-  OpAsmParser::OperandType compositeInfo;
-  Attribute indicesAttr;
-  Type compositeType;
-  llvm::SMLoc attrLocation;
-
-  if (parser.parseOperand(compositeInfo) ||
-      parser.getCurrentLocation(&attrLocation) ||
-      parser.parseAttribute(indicesAttr, kIndicesAttrName, state.attributes) ||
-      parser.parseColonType(compositeType) ||
-      parser.resolveOperand(compositeInfo, compositeType, state.operands)) {
-    return failure();
-  }
-
-  Type resultType =
-      getElementType(compositeType, indicesAttr, parser, attrLocation);
-  if (!resultType) {
-    return failure();
-  }
-  state.addTypes(resultType);
-  return success();
-}
-
-static void print(spirv::CompositeExtractOp compositeExtractOp,
-                  OpAsmPrinter &printer) {
-  printer << spirv::CompositeExtractOp::getOperationName() << ' '
-          << *compositeExtractOp.composite() << compositeExtractOp.indices()
-          << " : " << compositeExtractOp.composite()->getType();
-}
-
-static LogicalResult verify(spirv::CompositeExtractOp compExOp) {
-  auto indicesArrayAttr = compExOp.indices().dyn_cast<ArrayAttr>();
-  auto resultType = getElementType(compExOp.composite()->getType(),
-                                   indicesArrayAttr, compExOp.getLoc());
-  if (!resultType)
-    return failure();
-
-  if (resultType != compExOp.getType()) {
-    return compExOp.emitOpError("invalid result type: expected ")
-           << resultType << " but provided " << compExOp.getType();
-  }
-
-  return success();
-}
-
-OpFoldResult spirv::CompositeExtractOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 1 && "spv.CompositeExtract expects one operand");
-  auto indexVector = functional::map(
-      [](Attribute attr) {
-        return static_cast<unsigned>(attr.cast<IntegerAttr>().getInt());
-      },
-      indices());
-  return extractCompositeElement(operands[0], indexVector);
-}
-
-//===----------------------------------------------------------------------===//
-// spv.CompositeInsert
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseCompositeInsertOp(OpAsmParser &parser,
-                                          OperationState &state) {
-  SmallVector<OpAsmParser::OperandType, 2> operands;
-  Type objectType, compositeType;
-  Attribute indicesAttr;
-  auto loc = parser.getCurrentLocation();
-
-  return failure(
-      parser.parseOperandList(operands, 2) ||
-      parser.parseAttribute(indicesAttr, kIndicesAttrName, state.attributes) ||
-      parser.parseColonType(objectType) ||
-      parser.parseKeywordType("into", compositeType) ||
-      parser.resolveOperands(operands, {objectType, compositeType}, loc,
-                             state.operands) ||
-      parser.addTypesToList(compositeType, state.types));
-}
-
-static LogicalResult verify(spirv::CompositeInsertOp compositeInsertOp) {
-  auto indicesArrayAttr = compositeInsertOp.indices().dyn_cast<ArrayAttr>();
-  auto objectType =
-      getElementType(compositeInsertOp.composite()->getType(), indicesArrayAttr,
-                     compositeInsertOp.getLoc());
-  if (!objectType)
-    return failure();
-
-  if (objectType != compositeInsertOp.object()->getType()) {
-    return compositeInsertOp.emitOpError("object operand type should be ")
-           << objectType << ", but found "
-           << compositeInsertOp.object()->getType();
-  }
-
-  if (compositeInsertOp.composite()->getType() != compositeInsertOp.getType()) {
-    return compositeInsertOp.emitOpError("result type should be the same as "
-                                         "the composite type, but found ")
-           << compositeInsertOp.composite()->getType() << " vs "
-           << compositeInsertOp.getType();
-  }
-
-  return success();
-}
-
-static void print(spirv::CompositeInsertOp compositeInsertOp,
-                  OpAsmPrinter &printer) {
-  printer << spirv::CompositeInsertOp::getOperationName() << " "
-          << *compositeInsertOp.object() << ", "
-          << *compositeInsertOp.composite() << compositeInsertOp.indices()
-          << " : " << compositeInsertOp.object()->getType() << " into "
-          << compositeInsertOp.composite()->getType();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.constant
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseConstantOp(OpAsmParser &parser, OperationState &state) {
-  Attribute value;
-  if (parser.parseAttribute(value, kValueAttrName, state.attributes))
-    return failure();
-
-  Type type = value.getType();
-  if (type.isa<NoneType>() || type.isa<TensorType>()) {
-    if (parser.parseColonType(type))
-      return failure();
-  }
-
-  return parser.addTypeToList(type, state.types);
-}
-
-static void print(spirv::ConstantOp constOp, OpAsmPrinter &printer) {
-  printer << spirv::ConstantOp::getOperationName() << ' ' << constOp.value();
-  if (constOp.getType().isa<spirv::ArrayType>())
-    printer << " : " << constOp.getType();
-}
-
-static LogicalResult verify(spirv::ConstantOp constOp) {
-  auto opType = constOp.getType();
-  auto value = constOp.value();
-  auto valueType = value.getType();
-
-  // ODS already generates checks to make sure the result type is valid. We just
-  // need to additionally check that the value's attribute type is consistent
-  // with the result type.
-  switch (value.getKind()) {
-  case StandardAttributes::Bool:
-  case StandardAttributes::Integer:
-  case StandardAttributes::Float: {
-    if (valueType != opType)
-      return constOp.emitOpError("result type (")
-             << opType << ") does not match value type (" << valueType << ")";
-    return success();
-  } break;
-  case StandardAttributes::DenseElements:
-  case StandardAttributes::SparseElements: {
-    if (valueType == opType)
-      break;
-    auto arrayType = opType.dyn_cast<spirv::ArrayType>();
-    auto shapedType = valueType.dyn_cast<ShapedType>();
-    if (!arrayType) {
-      return constOp.emitOpError(
-          "must have spv.array result type for array value");
-    }
-
-    int numElements = arrayType.getNumElements();
-    auto opElemType = arrayType.getElementType();
-    while (auto t = opElemType.dyn_cast<spirv::ArrayType>()) {
-      numElements *= t.getNumElements();
-      opElemType = t.getElementType();
-    }
-    if (!opElemType.isIntOrFloat()) {
-      return constOp.emitOpError("only support nested array result type");
-    }
-
-    auto valueElemType = shapedType.getElementType();
-    if (valueElemType != opElemType) {
-      return constOp.emitOpError("result element type (")
-             << opElemType << ") does not match value element type ("
-             << valueElemType << ")";
-    }
-
-    if (numElements != shapedType.getNumElements()) {
-      return constOp.emitOpError("result number of elements (")
-             << numElements << ") does not match value number of elements ("
-             << shapedType.getNumElements() << ")";
-    }
-  } break;
-  case StandardAttributes::Array: {
-    auto arrayType = opType.dyn_cast<spirv::ArrayType>();
-    if (!arrayType)
-      return constOp.emitOpError(
-          "must have spv.array result type for array value");
-    auto elemType = arrayType.getElementType();
-    for (auto element : value.cast<ArrayAttr>().getValue()) {
-      if (element.getType() != elemType)
-        return constOp.emitOpError("has array element whose type (")
-               << element.getType()
-               << ") does not match the result element type (" << elemType
-               << ')';
-    }
-  } break;
-  default:
-    return constOp.emitOpError("cannot have value of type ") << valueType;
-  }
-
-  return success();
-}
-
-OpFoldResult spirv::ConstantOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.empty() && "spv.constant has no operands");
-  return value();
-}
-
-bool spirv::ConstantOp::isBuildableWith(Type type) {
-  // Must be valid SPIR-V type first.
-  if (!SPIRVDialect::isValidType(type))
-    return false;
-
-  if (type.getKind() >= Type::FIRST_SPIRV_TYPE &&
-      type.getKind() <= spirv::TypeKind::LAST_SPIRV_TYPE) {
-    // TODO(antiagainst): support constant struct
-    return type.isa<spirv::ArrayType>();
-  }
-
-  return true;
-}
-
-spirv::ConstantOp spirv::ConstantOp::getZero(Type type, Location loc,
-                                             OpBuilder *builder) {
-  if (auto intType = type.dyn_cast<IntegerType>()) {
-    unsigned width = intType.getWidth();
-    Attribute val;
-    if (width == 1)
-      return builder->create<spirv::ConstantOp>(loc, type,
-                                                builder->getBoolAttr(false));
-    return builder->create<spirv::ConstantOp>(
-        loc, type, builder->getIntegerAttr(type, APInt(width, 0)));
-  }
-
-  llvm_unreachable("unimplemented types for ConstantOp::getZero()");
-}
-
-spirv::ConstantOp spirv::ConstantOp::getOne(Type type, Location loc,
-                                            OpBuilder *builder) {
-  if (auto intType = type.dyn_cast<IntegerType>()) {
-    unsigned width = intType.getWidth();
-    if (width == 1)
-      return builder->create<spirv::ConstantOp>(loc, type,
-                                                builder->getBoolAttr(true));
-    return builder->create<spirv::ConstantOp>(
-        loc, type, builder->getIntegerAttr(type, APInt(width, 1)));
-  }
-
-  llvm_unreachable("unimplemented types for ConstantOp::getOne()");
-}
-
-//===----------------------------------------------------------------------===//
-// spv.ControlBarrier
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseControlBarrierOp(OpAsmParser &parser,
-                                         OperationState &state) {
-  spirv::Scope executionScope;
-  spirv::Scope memoryScope;
-  spirv::MemorySemantics memorySemantics;
-
-  return failure(
-      parseEnumAttribute(executionScope, parser, state,
-                         kExecutionScopeAttrName) ||
-      parser.parseComma() ||
-      parseEnumAttribute(memoryScope, parser, state, kMemoryScopeAttrName) ||
-      parser.parseComma() ||
-      parseEnumAttribute(memorySemantics, parser, state));
-}
-
-static void print(spirv::ControlBarrierOp op, OpAsmPrinter &printer) {
-  printer << spirv::ControlBarrierOp::getOperationName() << " \""
-          << stringifyScope(op.execution_scope()) << "\", \""
-          << stringifyScope(op.memory_scope()) << "\", \""
-          << stringifyMemorySemantics(op.memory_semantics()) << "\"";
-}
-
-//===----------------------------------------------------------------------===//
-// spv.EntryPoint
-//===----------------------------------------------------------------------===//
-
-void spirv::EntryPointOp::build(Builder *builder, OperationState &state,
-                                spirv::ExecutionModel executionModel,
-                                FuncOp function,
-                                ArrayRef<Attribute> interfaceVars) {
-  build(builder, state,
-        builder->getI32IntegerAttr(static_cast<int32_t>(executionModel)),
-        builder->getSymbolRefAttr(function),
-        builder->getArrayAttr(interfaceVars));
-}
-
-static ParseResult parseEntryPointOp(OpAsmParser &parser,
-                                     OperationState &state) {
-  spirv::ExecutionModel execModel;
-  SmallVector<OpAsmParser::OperandType, 0> identifiers;
-  SmallVector<Type, 0> idTypes;
-  SmallVector<Attribute, 4> interfaceVars;
-
-  FlatSymbolRefAttr fn;
-  if (parseEnumAttribute(execModel, parser, state) ||
-      parser.parseAttribute(fn, Type(), kFnNameAttrName, state.attributes)) {
-    return failure();
-  }
-
-  if (!parser.parseOptionalComma()) {
-    // Parse the interface variables
-    do {
-      // The name of the interface variable attribute isnt important
-      auto attrName = "var_symbol";
-      FlatSymbolRefAttr var;
-      SmallVector<NamedAttribute, 1> attrs;
-      if (parser.parseAttribute(var, Type(), attrName, attrs)) {
-        return failure();
-      }
-      interfaceVars.push_back(var);
-    } while (!parser.parseOptionalComma());
-  }
-  state.addAttribute(kInterfaceAttrName,
-                     parser.getBuilder().getArrayAttr(interfaceVars));
-  return success();
-}
-
-static void print(spirv::EntryPointOp entryPointOp, OpAsmPrinter &printer) {
-  printer << spirv::EntryPointOp::getOperationName() << " \""
-          << stringifyExecutionModel(entryPointOp.execution_model()) << "\" ";
-  printer.printSymbolName(entryPointOp.fn());
-  auto interfaceVars = entryPointOp.interface().getValue();
-  if (!interfaceVars.empty()) {
-    printer << ", ";
-    interleaveComma(interfaceVars, printer);
-  }
-}
-
-static LogicalResult verify(spirv::EntryPointOp entryPointOp) {
-  // Checks for fn and interface symbol reference are done in spirv::ModuleOp
-  // verification.
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.ExecutionMode
-//===----------------------------------------------------------------------===//
-
-void spirv::ExecutionModeOp::build(Builder *builder, OperationState &state,
-                                   FuncOp function,
-                                   spirv::ExecutionMode executionMode,
-                                   ArrayRef<int32_t> params) {
-  build(builder, state, builder->getSymbolRefAttr(function),
-        builder->getI32IntegerAttr(static_cast<int32_t>(executionMode)),
-        builder->getI32ArrayAttr(params));
-}
-
-static ParseResult parseExecutionModeOp(OpAsmParser &parser,
-                                        OperationState &state) {
-  spirv::ExecutionMode execMode;
-  Attribute fn;
-  if (parser.parseAttribute(fn, kFnNameAttrName, state.attributes) ||
-      parseEnumAttribute(execMode, parser, state)) {
-    return failure();
-  }
-
-  SmallVector<int32_t, 4> values;
-  Type i32Type = parser.getBuilder().getIntegerType(32);
-  while (!parser.parseOptionalComma()) {
-    SmallVector<NamedAttribute, 1> attr;
-    Attribute value;
-    if (parser.parseAttribute(value, i32Type, "value", attr)) {
-      return failure();
-    }
-    values.push_back(value.cast<IntegerAttr>().getInt());
-  }
-  state.addAttribute(kValuesAttrName,
-                     parser.getBuilder().getI32ArrayAttr(values));
-  return success();
-}
-
-static void print(spirv::ExecutionModeOp execModeOp, OpAsmPrinter &printer) {
-  printer << spirv::ExecutionModeOp::getOperationName() << " @"
-          << execModeOp.fn() << " \""
-          << stringifyExecutionMode(execModeOp.execution_mode()) << "\"";
-  auto values = execModeOp.values();
-  if (!values.size())
-    return;
-  printer << ", ";
-  interleaveComma(values, printer, [&](Attribute a) {
-    printer << a.cast<IntegerAttr>().getInt();
-  });
-}
-
-//===----------------------------------------------------------------------===//
-// spv.FunctionCall
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseFunctionCallOp(OpAsmParser &parser,
-                                       OperationState &state) {
-  FlatSymbolRefAttr calleeAttr;
-  FunctionType type;
-  SmallVector<OpAsmParser::OperandType, 4> operands;
-  auto loc = parser.getNameLoc();
-  if (parser.parseAttribute(calleeAttr, kCallee, state.attributes) ||
-      parser.parseOperandList(operands, OpAsmParser::Delimiter::Paren) ||
-      parser.parseColonType(type)) {
-    return failure();
-  }
-
-  auto funcType = type.dyn_cast<FunctionType>();
-  if (!funcType) {
-    return parser.emitError(loc, "expected function type, but provided ")
-           << type;
-  }
-
-  if (funcType.getNumResults() > 1) {
-    return parser.emitError(loc, "expected callee function to have 0 or 1 "
-                                 "result, but provided ")
-           << funcType.getNumResults();
-  }
-
-  return failure(parser.addTypesToList(funcType.getResults(), state.types) ||
-                 parser.resolveOperands(operands, funcType.getInputs(), loc,
-                                        state.operands));
-}
-
-static void print(spirv::FunctionCallOp functionCallOp, OpAsmPrinter &printer) {
-  SmallVector<Type, 4> argTypes(functionCallOp.getOperandTypes());
-  SmallVector<Type, 1> resultTypes(functionCallOp.getResultTypes());
-  Type functionType =
-      FunctionType::get(argTypes, resultTypes, functionCallOp.getContext());
-
-  printer << spirv::FunctionCallOp::getOperationName() << ' '
-          << functionCallOp.getAttr(kCallee) << '('
-          << functionCallOp.arguments() << ") : " << functionType;
-}
-
-static LogicalResult verify(spirv::FunctionCallOp functionCallOp) {
-  auto fnName = functionCallOp.callee();
-
-  auto moduleOp = functionCallOp.getParentOfType<spirv::ModuleOp>();
-  if (!moduleOp) {
-    return functionCallOp.emitOpError(
-        "must appear in a function inside 'spv.module'");
-  }
-
-  auto funcOp = moduleOp.lookupSymbol<FuncOp>(fnName);
-  if (!funcOp) {
-    return functionCallOp.emitOpError("callee function '")
-           << fnName << "' not found in 'spv.module'";
-  }
-
-  auto functionType = funcOp.getType();
-
-  if (functionCallOp.getNumResults() > 1) {
-    return functionCallOp.emitOpError(
-               "expected callee function to have 0 or 1 result, but provided ")
-           << functionCallOp.getNumResults();
-  }
-
-  if (functionType.getNumInputs() != functionCallOp.getNumOperands()) {
-    return functionCallOp.emitOpError(
-               "has incorrect number of operands for callee: expected ")
-           << functionType.getNumInputs() << ", but provided "
-           << functionCallOp.getNumOperands();
-  }
-
-  for (uint32_t i = 0, e = functionType.getNumInputs(); i != e; ++i) {
-    if (functionCallOp.getOperand(i)->getType() != functionType.getInput(i)) {
-      return functionCallOp.emitOpError(
-                 "operand type mismatch: expected operand type ")
-             << functionType.getInput(i) << ", but provided "
-             << functionCallOp.getOperand(i)->getType()
-             << " for operand number " << i;
-    }
-  }
-
-  if (functionType.getNumResults() != functionCallOp.getNumResults()) {
-    return functionCallOp.emitOpError(
-               "has incorrect number of results has for callee: expected ")
-           << functionType.getNumResults() << ", but provided "
-           << functionCallOp.getNumResults();
-  }
-
-  if (functionCallOp.getNumResults() &&
-      (functionCallOp.getResult(0)->getType() != functionType.getResult(0))) {
-    return functionCallOp.emitOpError("result type mismatch: expected ")
-           << functionType.getResult(0) << ", but provided "
-           << functionCallOp.getResult(0)->getType();
-  }
-
-  return success();
-}
-
-CallInterfaceCallable spirv::FunctionCallOp::getCallableForCallee() {
-  return getAttrOfType<SymbolRefAttr>(kCallee);
-}
-
-Operation::operand_range spirv::FunctionCallOp::getArgOperands() {
-  return arguments();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.globalVariable
-//===----------------------------------------------------------------------===//
-
-void spirv::GlobalVariableOp::build(Builder *builder, OperationState &state,
-                                    Type type, StringRef name,
-                                    unsigned descriptorSet, unsigned binding) {
-  build(builder, state, TypeAttr::get(type), builder->getStringAttr(name),
-        nullptr);
-  state.addAttribute(
-      spirv::SPIRVDialect::getAttributeName(spirv::Decoration::DescriptorSet),
-      builder->getI32IntegerAttr(descriptorSet));
-  state.addAttribute(
-      spirv::SPIRVDialect::getAttributeName(spirv::Decoration::Binding),
-      builder->getI32IntegerAttr(binding));
-}
-
-void spirv::GlobalVariableOp::build(Builder *builder, OperationState &state,
-                                    Type type, StringRef name,
-                                    spirv::BuiltIn builtin) {
-  build(builder, state, TypeAttr::get(type), builder->getStringAttr(name),
-        nullptr);
-  state.addAttribute(
-      spirv::SPIRVDialect::getAttributeName(spirv::Decoration::BuiltIn),
-      builder->getStringAttr(spirv::stringifyBuiltIn(builtin)));
-}
-
-static ParseResult parseGlobalVariableOp(OpAsmParser &parser,
-                                         OperationState &state) {
-  // Parse variable name.
-  StringAttr nameAttr;
-  if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(),
-                             state.attributes)) {
-    return failure();
-  }
-
-  // Parse optional initializer
-  if (succeeded(parser.parseOptionalKeyword(kInitializerAttrName))) {
-    FlatSymbolRefAttr initSymbol;
-    if (parser.parseLParen() ||
-        parser.parseAttribute(initSymbol, Type(), kInitializerAttrName,
-                              state.attributes) ||
-        parser.parseRParen())
-      return failure();
-  }
-
-  if (parseVariableDecorations(parser, state)) {
-    return failure();
-  }
-
-  Type type;
-  auto loc = parser.getCurrentLocation();
-  if (parser.parseColonType(type)) {
-    return failure();
-  }
-  if (!type.isa<spirv::PointerType>()) {
-    return parser.emitError(loc, "expected spv.ptr type");
-  }
-  state.addAttribute(kTypeAttrName, TypeAttr::get(type));
-
-  return success();
-}
-
-static void print(spirv::GlobalVariableOp varOp, OpAsmPrinter &printer) {
-  auto *op = varOp.getOperation();
-  SmallVector<StringRef, 4> elidedAttrs{
-      spirv::attributeName<spirv::StorageClass>()};
-  printer << spirv::GlobalVariableOp::getOperationName();
-
-  // Print variable name.
-  printer << ' ';
-  printer.printSymbolName(varOp.sym_name());
-  elidedAttrs.push_back(SymbolTable::getSymbolAttrName());
-
-  // Print optional initializer
-  if (auto initializer = varOp.initializer()) {
-    printer << " " << kInitializerAttrName << '(';
-    printer.printSymbolName(initializer.getValue());
-    printer << ')';
-    elidedAttrs.push_back(kInitializerAttrName);
-  }
-
-  elidedAttrs.push_back(kTypeAttrName);
-  printVariableDecorations(op, printer, elidedAttrs);
-  printer << " : " << varOp.type();
-}
-
-static LogicalResult verify(spirv::GlobalVariableOp varOp) {
-  // SPIR-V spec: "Storage Class is the Storage Class of the memory holding the
-  // object. It cannot be Generic. It must be the same as the Storage Class
-  // operand of the Result Type."
-  if (varOp.storageClass() == spirv::StorageClass::Generic)
-    return varOp.emitOpError("storage class cannot be 'Generic'");
-
-  if (auto init =
-          varOp.getAttrOfType<FlatSymbolRefAttr>(kInitializerAttrName)) {
-    auto moduleOp = varOp.getParentOfType<spirv::ModuleOp>();
-    auto *initOp = moduleOp.lookupSymbol(init.getValue());
-    // TODO: Currently only variable initialization with specialization
-    // constants and other variables is supported. They could be normal
-    // constants in the module scope as well.
-    if (!initOp || !(isa<spirv::GlobalVariableOp>(initOp) ||
-                     isa<spirv::SpecConstantOp>(initOp))) {
-      return varOp.emitOpError("initializer must be result of a "
-                               "spv.specConstant or spv.globalVariable op");
-    }
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.GroupNonUniformBallotOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseGroupNonUniformBallotOp(OpAsmParser &parser,
-                                                OperationState &state) {
-  spirv::Scope executionScope;
-  OpAsmParser::OperandType operandInfo;
-  Type resultType;
-  IntegerType i1Type = parser.getBuilder().getI1Type();
-  if (parseEnumAttribute(executionScope, parser, state,
-                         kExecutionScopeAttrName) ||
-      parser.parseOperand(operandInfo) || parser.parseColonType(resultType) ||
-      parser.resolveOperand(operandInfo, i1Type, state.operands))
-    return failure();
-
-  return parser.addTypeToList(resultType, state.types);
-}
-
-static void print(spirv::GroupNonUniformBallotOp ballotOp,
-                  OpAsmPrinter &printer) {
-  printer << spirv::GroupNonUniformBallotOp::getOperationName() << " \""
-          << stringifyScope(ballotOp.execution_scope()) << "\" "
-          << ballotOp.predicate() << " : " << ballotOp.getType();
-}
-
-static LogicalResult verify(spirv::GroupNonUniformBallotOp ballotOp) {
-  // TODO(antiagainst): check the result integer type's signedness bit is 0.
-
-  spirv::Scope scope = ballotOp.execution_scope();
-  if (scope != spirv::Scope::Workgroup && scope != spirv::Scope::Subgroup)
-    return ballotOp.emitOpError(
-        "execution scope must be 'Workgroup' or 'Subgroup'");
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.IAdd
-//===----------------------------------------------------------------------===//
-
-OpFoldResult spirv::IAddOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "spv.IAdd expects two operands");
-  // x + 0 = x
-  if (matchPattern(operand2(), m_Zero()))
-    return operand1();
-
-  // According to the SPIR-V spec:
-  //
-  // The resulting value will equal the low-order N bits of the correct result
-  // R, where N is the component width and R is computed with enough precision
-  // to avoid overflow and underflow.
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a + b; });
-}
-
-//===----------------------------------------------------------------------===//
-// spv.IMul
-//===----------------------------------------------------------------------===//
-
-OpFoldResult spirv::IMulOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "spv.IMul expects two operands");
-  // x * 0 == 0
-  if (matchPattern(operand2(), m_Zero()))
-    return operand2();
-  // x * 1 = x
-  if (matchPattern(operand2(), m_One()))
-    return operand1();
-
-  // According to the SPIR-V spec:
-  //
-  // The resulting value will equal the low-order N bits of the correct result
-  // R, where N is the component width and R is computed with enough precision
-  // to avoid overflow and underflow.
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a * b; });
-}
-
-//===----------------------------------------------------------------------===//
-// spv.ISub
-//===----------------------------------------------------------------------===//
-
-OpFoldResult spirv::ISubOp::fold(ArrayRef<Attribute> operands) {
-  // x - x = 0
-  if (operand1() == operand2())
-    return Builder(getContext()).getIntegerAttr(getType(), 0);
-
-  // According to the SPIR-V spec:
-  //
-  // The resulting value will equal the low-order N bits of the correct result
-  // R, where N is the component width and R is computed with enough precision
-  // to avoid overflow and underflow.
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a - b; });
-}
-
-//===----------------------------------------------------------------------===//
-// spv.LoadOp
-//===----------------------------------------------------------------------===//
-
-void spirv::LoadOp::build(Builder *builder, OperationState &state,
-                          Value *basePtr, IntegerAttr memory_access,
-                          IntegerAttr alignment) {
-  auto ptrType = basePtr->getType().cast<spirv::PointerType>();
-  build(builder, state, ptrType.getPointeeType(), basePtr, memory_access,
-        alignment);
-}
-
-static ParseResult parseLoadOp(OpAsmParser &parser, OperationState &state) {
-  // Parse the storage class specification
-  spirv::StorageClass storageClass;
-  OpAsmParser::OperandType ptrInfo;
-  Type elementType;
-  if (parseEnumAttribute(storageClass, parser) ||
-      parser.parseOperand(ptrInfo) ||
-      parseMemoryAccessAttributes(parser, state) ||
-      parser.parseOptionalAttrDict(state.attributes) || parser.parseColon() ||
-      parser.parseType(elementType)) {
-    return failure();
-  }
-
-  auto ptrType = spirv::PointerType::get(elementType, storageClass);
-  if (parser.resolveOperand(ptrInfo, ptrType, state.operands)) {
-    return failure();
-  }
-
-  state.addTypes(elementType);
-  return success();
-}
-
-static void print(spirv::LoadOp loadOp, OpAsmPrinter &printer) {
-  auto *op = loadOp.getOperation();
-  SmallVector<StringRef, 4> elidedAttrs;
-  StringRef sc = stringifyStorageClass(
-      loadOp.ptr()->getType().cast<spirv::PointerType>().getStorageClass());
-  printer << spirv::LoadOp::getOperationName() << " \"" << sc << "\" "
-          << loadOp.ptr();
-
-  printMemoryAccessAttribute(loadOp, printer, elidedAttrs);
-
-  printer.printOptionalAttrDict(op->getAttrs(), elidedAttrs);
-  printer << " : " << loadOp.getType();
-}
-
-static LogicalResult verify(spirv::LoadOp loadOp) {
-  // SPIR-V spec : "Result Type is the type of the loaded object. It must be a
-  // type with fixed size; i.e., it cannot be, nor include, any
-  // OpTypeRuntimeArray types."
-  if (failed(verifyLoadStorePtrAndValTypes(loadOp, loadOp.ptr(),
-                                           loadOp.value()))) {
-    return failure();
-  }
-  return verifyMemoryAccessAttribute(loadOp);
-}
-
-//===----------------------------------------------------------------------===//
-// spv.LogicalNot
-//===----------------------------------------------------------------------===//
-
-void spirv::LogicalNotOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<ConvertLogicalNotOfIEqual, ConvertLogicalNotOfINotEqual,
-                 ConvertLogicalNotOfLogicalEqual,
-                 ConvertLogicalNotOfLogicalNotEqual>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// spv.loop
-//===----------------------------------------------------------------------===//
-
-void spirv::LoopOp::build(Builder *builder, OperationState &state) {
-  state.addAttribute("loop_control",
-                     builder->getI32IntegerAttr(
-                         static_cast<uint32_t>(spirv::LoopControl::None)));
-  state.addRegion();
-}
-
-static ParseResult parseLoopOp(OpAsmParser &parser, OperationState &state) {
-  // TODO(antiagainst): support loop control properly
-  Builder builder = parser.getBuilder();
-  state.addAttribute("loop_control",
-                     builder.getI32IntegerAttr(
-                         static_cast<uint32_t>(spirv::LoopControl::None)));
-
-  return parser.parseRegion(*state.addRegion(), /*arguments=*/{},
-                            /*argTypes=*/{});
-}
-
-static void print(spirv::LoopOp loopOp, OpAsmPrinter &printer) {
-  auto *op = loopOp.getOperation();
-
-  printer << spirv::LoopOp::getOperationName();
-  printer.printRegion(op->getRegion(0), /*printEntryBlockArgs=*/false,
-                      /*printBlockTerminators=*/true);
-}
-
-/// Returns true if the given `srcBlock` contains only one `spv.Branch` to the
-/// given `dstBlock`.
-static inline bool hasOneBranchOpTo(Block &srcBlock, Block &dstBlock) {
-  // Check that there is only one op in the `srcBlock`.
-  if (!has_single_element(srcBlock))
-    return false;
-
-  auto branchOp = dyn_cast<spirv::BranchOp>(srcBlock.back());
-  return branchOp && branchOp.getSuccessor(0) == &dstBlock;
-}
-
-static LogicalResult verify(spirv::LoopOp loopOp) {
-  auto *op = loopOp.getOperation();
-
-  // We need to verify that the blocks follow the following layout:
-  //
-  //                     +-------------+
-  //                     | entry block |
-  //                     +-------------+
-  //                            |
-  //                            v
-  //                     +-------------+
-  //                     | loop header | <-----+
-  //                     +-------------+       |
-  //                                           |
-  //                           ...             |
-  //                          \ | /            |
-  //                            v              |
-  //                    +---------------+      |
-  //                    | loop continue | -----+
-  //                    +---------------+
-  //
-  //                           ...
-  //                          \ | /
-  //                            v
-  //                     +-------------+
-  //                     | merge block |
-  //                     +-------------+
-
-  auto &region = op->getRegion(0);
-  // Allow empty region as a degenerated case, which can come from
-  // optimizations.
-  if (region.empty())
-    return success();
-
-  // The last block is the merge block.
-  Block &merge = region.back();
-  if (!isMergeBlock(merge))
-    return loopOp.emitOpError(
-        "last block must be the merge block with only one 'spv._merge' op");
-
-  if (std::next(region.begin()) == region.end())
-    return loopOp.emitOpError(
-        "must have an entry block branching to the loop header block");
-  // The first block is the entry block.
-  Block &entry = region.front();
-
-  if (std::next(region.begin(), 2) == region.end())
-    return loopOp.emitOpError(
-        "must have a loop header block branched from the entry block");
-  // The second block is the loop header block.
-  Block &header = *std::next(region.begin(), 1);
-
-  if (!hasOneBranchOpTo(entry, header))
-    return loopOp.emitOpError(
-        "entry block must only have one 'spv.Branch' op to the second block");
-
-  if (std::next(region.begin(), 3) == region.end())
-    return loopOp.emitOpError(
-        "requires a loop continue block branching to the loop header block");
-  // The second to last block is the loop continue block.
-  Block &cont = *std::prev(region.end(), 2);
-
-  // Make sure that we have a branch from the loop continue block to the loop
-  // header block.
-  if (llvm::none_of(
-          llvm::seq<unsigned>(0, cont.getNumSuccessors()),
-          [&](unsigned index) { return cont.getSuccessor(index) == &header; }))
-    return loopOp.emitOpError("second to last block must be the loop continue "
-                              "block that branches to the loop header block");
-
-  // Make sure that no other blocks (except the entry and loop continue block)
-  // branches to the loop header block.
-  for (auto &block : llvm::make_range(std::next(region.begin(), 2),
-                                      std::prev(region.end(), 2))) {
-    for (auto i : llvm::seq<unsigned>(0, block.getNumSuccessors())) {
-      if (block.getSuccessor(i) == &header) {
-        return loopOp.emitOpError("can only have the entry and loop continue "
-                                  "block branching to the loop header block");
-      }
-    }
-  }
-
-  return success();
-}
-
-Block *spirv::LoopOp::getEntryBlock() {
-  assert(!body().empty() && "op region should not be empty!");
-  return &body().front();
-}
-
-Block *spirv::LoopOp::getHeaderBlock() {
-  assert(!body().empty() && "op region should not be empty!");
-  // The second block is the loop header block.
-  return &*std::next(body().begin());
-}
-
-Block *spirv::LoopOp::getContinueBlock() {
-  assert(!body().empty() && "op region should not be empty!");
-  // The second to last block is the loop continue block.
-  return &*std::prev(body().end(), 2);
-}
-
-Block *spirv::LoopOp::getMergeBlock() {
-  assert(!body().empty() && "op region should not be empty!");
-  // The last block is the loop merge block.
-  return &body().back();
-}
-
-void spirv::LoopOp::addEntryAndMergeBlock() {
-  assert(body().empty() && "entry and merge block already exist");
-  body().push_back(new Block());
-  auto *mergeBlock = new Block();
-  body().push_back(mergeBlock);
-  OpBuilder builder(mergeBlock);
-
-  // Add a spv._merge op into the merge block.
-  builder.create<spirv::MergeOp>(getLoc());
-}
-
-//===----------------------------------------------------------------------===//
-// spv._merge
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(spirv::MergeOp mergeOp) {
-  auto *parentOp = mergeOp.getParentOp();
-  if (!parentOp ||
-      (!isa<spirv::SelectionOp>(parentOp) && !isa<spirv::LoopOp>(parentOp)))
-    return mergeOp.emitOpError(
-        "expected parent op to be 'spv.selection' or 'spv.loop'");
-
-  Block &parentLastBlock = mergeOp.getParentRegion()->back();
-  if (mergeOp.getOperation() != parentLastBlock.getTerminator())
-    return mergeOp.emitOpError(
-        "can only be used in the last block of 'spv.selection' or 'spv.loop'");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.MemoryBarrier
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseMemoryBarrierOp(OpAsmParser &parser,
-                                        OperationState &state) {
-  spirv::Scope memoryScope;
-  spirv::MemorySemantics memorySemantics;
-
-  return failure(
-      parseEnumAttribute(memoryScope, parser, state, kMemoryScopeAttrName) ||
-      parser.parseComma() ||
-      parseEnumAttribute(memorySemantics, parser, state));
-}
-
-static void print(spirv::MemoryBarrierOp op, OpAsmPrinter &printer) {
-  printer << spirv::MemoryBarrierOp::getOperationName() << " \""
-          << stringifyScope(op.memory_scope()) << "\", \""
-          << stringifyMemorySemantics(op.memory_semantics()) << "\"";
-}
-
-//===----------------------------------------------------------------------===//
-// spv.module
-//===----------------------------------------------------------------------===//
-
-void spirv::ModuleOp::build(Builder *builder, OperationState &state) {
-  ensureTerminator(*state.addRegion(), *builder, state.location);
-}
-
-// TODO(ravishankarm): This is only here for resolving some dependency outside
-// of mlir. Remove once it is done.
-void spirv::ModuleOp::build(Builder *builder, OperationState &state,
-                            IntegerAttr addressing_model,
-                            IntegerAttr memory_model) {
-  state.addAttribute("addressing_model", addressing_model);
-  state.addAttribute("memory_model", memory_model);
-  build(builder, state);
-}
-
-void spirv::ModuleOp::build(Builder *builder, OperationState &state,
-                            spirv::AddressingModel addressing_model,
-                            spirv::MemoryModel memory_model,
-                            ArrayRef<spirv::Capability> capabilities,
-                            ArrayRef<spirv::Extension> extensions,
-                            ArrayAttr extended_instruction_sets) {
-  state.addAttribute(
-      "addressing_model",
-      builder->getI32IntegerAttr(static_cast<int32_t>(addressing_model)));
-  state.addAttribute("memory_model", builder->getI32IntegerAttr(
-                                         static_cast<int32_t>(memory_model)));
-  if (!capabilities.empty())
-    state.addAttribute("capabilities",
-                       getStrArrayAttrForEnumList<spirv::Capability>(
-                           *builder, capabilities, spirv::stringifyCapability));
-  if (!extensions.empty())
-    state.addAttribute("extensions",
-                       getStrArrayAttrForEnumList<spirv::Extension>(
-                           *builder, extensions, spirv::stringifyExtension));
-  if (extended_instruction_sets)
-    state.addAttribute("extended_instruction_sets", extended_instruction_sets);
-  build(builder, state);
-}
-
-static ParseResult parseModuleOp(OpAsmParser &parser, OperationState &state) {
-  Region *body = state.addRegion();
-
-  // Parse attributes
-  spirv::AddressingModel addrModel;
-  spirv::MemoryModel memoryModel;
-  if (parseEnumAttribute(addrModel, parser, state) ||
-      parseEnumAttribute(memoryModel, parser, state)) {
-    return failure();
-  }
-
-  if (parser.parseRegion(*body, /*arguments=*/{}, /*argTypes=*/{}))
-    return failure();
-
-  if (parser.parseOptionalAttrDictWithKeyword(state.attributes))
-    return failure();
-
-  spirv::ModuleOp::ensureTerminator(*body, parser.getBuilder(), state.location);
-  return success();
-}
-
-static void print(spirv::ModuleOp moduleOp, OpAsmPrinter &printer) {
-  printer << spirv::ModuleOp::getOperationName();
-
-  // Only print out addressing model and memory model in a nicer way if both
-  // presents. Otherwise, print them in the general form. This helps
-  // debugging ill-formed ModuleOp.
-  SmallVector<StringRef, 2> elidedAttrs;
-  auto addressingModelAttrName = spirv::attributeName<spirv::AddressingModel>();
-  auto memoryModelAttrName = spirv::attributeName<spirv::MemoryModel>();
-  if (moduleOp.getAttr(addressingModelAttrName) &&
-      moduleOp.getAttr(memoryModelAttrName)) {
-    printer << " \""
-            << spirv::stringifyAddressingModel(moduleOp.addressing_model())
-            << "\" \"" << spirv::stringifyMemoryModel(moduleOp.memory_model())
-            << '"';
-    elidedAttrs.assign({addressingModelAttrName, memoryModelAttrName});
-  }
-
-  printer.printRegion(moduleOp.body(), /*printEntryBlockArgs=*/false,
-                      /*printBlockTerminators=*/false);
-  printer.printOptionalAttrDictWithKeyword(moduleOp.getAttrs(), elidedAttrs);
-}
-
-static LogicalResult verify(spirv::ModuleOp moduleOp) {
-  auto &op = *moduleOp.getOperation();
-  auto *dialect = op.getDialect();
-  auto &body = op.getRegion(0).front();
-  llvm::DenseMap<std::pair<FuncOp, spirv::ExecutionModel>, spirv::EntryPointOp>
-      entryPoints;
-  SymbolTable table(moduleOp);
-
-  for (auto &op : body) {
-    if (op.getDialect() == dialect) {
-      // For EntryPoint op, check that the function and execution model is not
-      // duplicated in EntryPointOps. Also verify that the interface specified
-      // comes from globalVariables here to make this check cheaper.
-      if (auto entryPointOp = dyn_cast<spirv::EntryPointOp>(op)) {
-        auto funcOp = table.lookup<FuncOp>(entryPointOp.fn());
-        if (!funcOp) {
-          return entryPointOp.emitError("function '")
-                 << entryPointOp.fn() << "' not found in 'spv.module'";
-        }
-        if (auto interface = entryPointOp.interface()) {
-          for (Attribute varRef : interface) {
-            auto varSymRef = varRef.dyn_cast<FlatSymbolRefAttr>();
-            if (!varSymRef) {
-              return entryPointOp.emitError(
-                         "expected symbol reference for interface "
-                         "specification instead of '")
-                     << varRef;
-            }
-            auto variableOp =
-                table.lookup<spirv::GlobalVariableOp>(varSymRef.getValue());
-            if (!variableOp) {
-              return entryPointOp.emitError("expected spv.globalVariable "
-                                            "symbol reference instead of'")
-                     << varSymRef << "'";
-            }
-          }
-        }
-
-        auto key = std::pair<FuncOp, spirv::ExecutionModel>(
-            funcOp, entryPointOp.execution_model());
-        auto entryPtIt = entryPoints.find(key);
-        if (entryPtIt != entryPoints.end()) {
-          return entryPointOp.emitError("duplicate of a previous EntryPointOp");
-        }
-        entryPoints[key] = entryPointOp;
-      }
-      continue;
-    }
-
-    auto funcOp = dyn_cast<FuncOp>(op);
-    if (!funcOp)
-      return op.emitError("'spv.module' can only contain func and spv.* ops");
-
-    if (funcOp.isExternal())
-      return op.emitError("'spv.module' cannot contain external functions");
-
-    for (auto &block : funcOp)
-      for (auto &op : block) {
-        if (op.getDialect() == dialect)
-          continue;
-
-        if (isa<FuncOp>(op))
-          return op.emitError("'spv.module' cannot contain nested functions");
-
-        return op.emitError(
-            "functions in 'spv.module' can only contain spv.* ops");
-      }
-  }
-
-  // Verify capabilities. ODS already guarantees that we have an array of
-  // string attributes.
-  if (auto caps = moduleOp.getAttrOfType<ArrayAttr>("capabilities")) {
-    for (auto cap : caps.getValue()) {
-      auto capStr = cap.cast<StringAttr>().getValue();
-      if (!spirv::symbolizeCapability(capStr))
-        return moduleOp.emitOpError("uses unknown capability: ") << capStr;
-    }
-  }
-
-  // Verify extensions. ODS already guarantees that we have an array of
-  // string attributes.
-  if (auto exts = moduleOp.getAttrOfType<ArrayAttr>("extensions")) {
-    for (auto ext : exts.getValue()) {
-      auto extStr = ext.cast<StringAttr>().getValue();
-      if (!spirv::symbolizeExtension(extStr))
-        return moduleOp.emitOpError("uses unknown extension: ") << extStr;
-    }
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv._reference_of
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseReferenceOfOp(OpAsmParser &parser,
-                                      OperationState &state) {
-  FlatSymbolRefAttr constRefAttr;
-  Type type;
-  if (parser.parseAttribute(constRefAttr, Type(), kSpecConstAttrName,
-                            state.attributes) ||
-      parser.parseColonType(type)) {
-    return failure();
-  }
-  return parser.addTypeToList(type, state.types);
-}
-
-static void print(spirv::ReferenceOfOp referenceOfOp, OpAsmPrinter &printer) {
-  printer << spirv::ReferenceOfOp::getOperationName() << ' ';
-  printer.printSymbolName(referenceOfOp.spec_const());
-  printer << " : " << referenceOfOp.reference()->getType();
-}
-
-static LogicalResult verify(spirv::ReferenceOfOp referenceOfOp) {
-  auto moduleOp = referenceOfOp.getParentOfType<spirv::ModuleOp>();
-  auto specConstOp =
-      moduleOp.lookupSymbol<spirv::SpecConstantOp>(referenceOfOp.spec_const());
-  if (!specConstOp) {
-    return referenceOfOp.emitOpError("expected spv.specConstant symbol");
-  }
-  if (referenceOfOp.reference()->getType() !=
-      specConstOp.default_value().getType()) {
-    return referenceOfOp.emitOpError("result type mismatch with the referenced "
-                                     "specialization constant's type");
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.Return
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(spirv::ReturnOp returnOp) {
-  auto funcOp = returnOp.getParentOfType<FuncOp>();
-  auto numOutputs = funcOp.getType().getNumResults();
-  if (numOutputs != 0)
-    return returnOp.emitOpError("cannot be used in functions returning value")
-           << (numOutputs > 1 ? "s" : "");
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.ReturnValue
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseReturnValueOp(OpAsmParser &parser,
-                                      OperationState &state) {
-  OpAsmParser::OperandType retValInfo;
-  Type retValType;
-  return failure(parser.parseOperand(retValInfo) ||
-                 parser.parseColonType(retValType) ||
-                 parser.resolveOperand(retValInfo, retValType, state.operands));
-}
-
-static void print(spirv::ReturnValueOp retValOp, OpAsmPrinter &printer) {
-  printer << spirv::ReturnValueOp::getOperationName() << ' ' << retValOp.value()
-          << " : " << retValOp.value()->getType();
-}
-
-static LogicalResult verify(spirv::ReturnValueOp retValOp) {
-  auto funcOp = retValOp.getParentOfType<FuncOp>();
-  auto numFnResults = funcOp.getType().getNumResults();
-  if (numFnResults != 1)
-    return retValOp.emitOpError(
-               "returns 1 value but enclosing function requires ")
-           << numFnResults << " results";
-
-  auto operandType = retValOp.value()->getType();
-  auto fnResultType = funcOp.getType().getResult(0);
-  if (operandType != fnResultType)
-    return retValOp.emitOpError(" return value's type (")
-           << operandType << ") mismatch with function's result type ("
-           << fnResultType << ")";
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.Select
-//===----------------------------------------------------------------------===//
-
-void spirv::SelectOp::build(Builder *builder, OperationState &state,
-                            Value *cond, Value *trueValue, Value *falseValue) {
-  build(builder, state, trueValue->getType(), cond, trueValue, falseValue);
-}
-
-static ParseResult parseSelectOp(OpAsmParser &parser, OperationState &state) {
-  OpAsmParser::OperandType condition;
-  SmallVector<OpAsmParser::OperandType, 2> operands;
-  SmallVector<Type, 2> types;
-  auto loc = parser.getCurrentLocation();
-  if (parser.parseOperand(condition) || parser.parseComma() ||
-      parser.parseOperandList(operands, 2) ||
-      parser.parseColonTypeList(types)) {
-    return failure();
-  }
-  if (types.size() != 2) {
-    return parser.emitError(
-        loc, "need exactly two trailing types for select condition and object");
-  }
-  if (parser.resolveOperand(condition, types[0], state.operands) ||
-      parser.resolveOperands(operands, types[1], state.operands)) {
-    return failure();
-  }
-  return parser.addTypesToList(types[1], state.types);
-}
-
-static void print(spirv::SelectOp op, OpAsmPrinter &printer) {
-  printer << spirv::SelectOp::getOperationName() << " " << op.getOperands()
-          << " : " << op.condition()->getType() << ", "
-          << op.result()->getType();
-}
-
-static LogicalResult verify(spirv::SelectOp op) {
-  auto resultTy = op.result()->getType();
-  if (op.true_value()->getType() != resultTy) {
-    return op.emitOpError("result type and true value type must be the same");
-  }
-  if (op.false_value()->getType() != resultTy) {
-    return op.emitOpError("result type and false value type must be the same");
-  }
-  if (auto conditionTy = op.condition()->getType().dyn_cast<VectorType>()) {
-    auto resultVectorTy = resultTy.dyn_cast<VectorType>();
-    if (!resultVectorTy) {
-      return op.emitOpError("result expected to be of vector type when "
-                            "condition is of vector type");
-    }
-    if (resultVectorTy.getNumElements() != conditionTy.getNumElements()) {
-      return op.emitOpError("result should have the same number of elements as "
-                            "the condition when condition is of vector type");
-    }
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.selection
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseSelectionOp(OpAsmParser &parser,
-                                    OperationState &state) {
-  // TODO(antiagainst): support selection control properly
-  Builder builder = parser.getBuilder();
-  state.addAttribute("selection_control",
-                     builder.getI32IntegerAttr(
-                         static_cast<uint32_t>(spirv::SelectionControl::None)));
-
-  return parser.parseRegion(*state.addRegion(), /*arguments=*/{},
-                            /*argTypes=*/{});
-}
-
-static void print(spirv::SelectionOp selectionOp, OpAsmPrinter &printer) {
-  auto *op = selectionOp.getOperation();
-
-  printer << spirv::SelectionOp::getOperationName();
-  printer.printRegion(op->getRegion(0), /*printEntryBlockArgs=*/false,
-                      /*printBlockTerminators=*/true);
-}
-
-static LogicalResult verify(spirv::SelectionOp selectionOp) {
-  auto *op = selectionOp.getOperation();
-
-  // We need to verify that the blocks follow the following layout:
-  //
-  //                     +--------------+
-  //                     | header block |
-  //                     +--------------+
-  //                          / | \
-  //                           ...
-  //
-  //
-  //         +---------+   +---------+   +---------+
-  //         | case #0 |   | case #1 |   | case #2 |  ...
-  //         +---------+   +---------+   +---------+
-  //
-  //
-  //                           ...
-  //                          \ | /
-  //                            v
-  //                     +-------------+
-  //                     | merge block |
-  //                     +-------------+
-
-  auto &region = op->getRegion(0);
-  // Allow empty region as a degenerated case, which can come from
-  // optimizations.
-  if (region.empty())
-    return success();
-
-  // The last block is the merge block.
-  if (!isMergeBlock(region.back()))
-    return selectionOp.emitOpError(
-        "last block must be the merge block with only one 'spv._merge' op");
-
-  if (std::next(region.begin()) == region.end())
-    return selectionOp.emitOpError("must have a selection header block");
-
-  return success();
-}
-
-Block *spirv::SelectionOp::getHeaderBlock() {
-  assert(!body().empty() && "op region should not be empty!");
-  // The first block is the loop header block.
-  return &body().front();
-}
-
-Block *spirv::SelectionOp::getMergeBlock() {
-  assert(!body().empty() && "op region should not be empty!");
-  // The last block is the loop merge block.
-  return &body().back();
-}
-
-void spirv::SelectionOp::addMergeBlock() {
-  assert(body().empty() && "entry and merge block already exist");
-  auto *mergeBlock = new Block();
-  body().push_back(mergeBlock);
-  OpBuilder builder(mergeBlock);
-
-  // Add a spv._merge op into the merge block.
-  builder.create<spirv::MergeOp>(getLoc());
-}
-
-namespace {
-// Blocks from the given `spv.selection` operation must satisfy the following
-// layout:
-//
-//       +-----------------------------------------------+
-//       | header block                                  |
-//       | spv.BranchConditionalOp %cond, ^case0, ^case1 |
-//       +-----------------------------------------------+
-//                            /   \
-//                             ...
-//
-//
-//   +------------------------+    +------------------------+
-//   | case #0                |    | case #1                |
-//   | spv.Store %ptr %value0 |    | spv.Store %ptr %value1 |
-//   | spv.Branch ^merge      |    | spv.Branch ^merge      |
-//   +------------------------+    +------------------------+
-//
-//
-//                             ...
-//                            \   /
-//                              v
-//                       +-------------+
-//                       | merge block |
-//                       +-------------+
-//
-struct ConvertSelectionOpToSelect
-    : public OpRewritePattern<spirv::SelectionOp> {
-  using OpRewritePattern<spirv::SelectionOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(spirv::SelectionOp selectionOp,
-                                     PatternRewriter &rewriter) const override {
-    auto *op = selectionOp.getOperation();
-    auto &body = op->getRegion(0);
-    // Verifier allows an empty region for `spv.selection`.
-    if (body.empty()) {
-      return matchFailure();
-    }
-
-    // Check that region consists of 4 blocks:
-    // header block, `true` block, `false` block and merge block.
-    if (std::distance(body.begin(), body.end()) != 4) {
-      return matchFailure();
-    }
-
-    auto *headerBlock = selectionOp.getHeaderBlock();
-    if (!onlyContainsBranchConditionalOp(headerBlock)) {
-      return matchFailure();
-    }
-
-    auto brConditionalOp =
-        cast<spirv::BranchConditionalOp>(headerBlock->front());
-
-    auto *trueBlock = brConditionalOp.getSuccessor(0);
-    auto *falseBlock = brConditionalOp.getSuccessor(1);
-    auto *mergeBlock = selectionOp.getMergeBlock();
-
-    if (!canCanonicalizeSelection(trueBlock, falseBlock, mergeBlock)) {
-      return matchFailure();
-    }
-
-    auto *trueValue = getSrcValue(trueBlock);
-    auto *falseValue = getSrcValue(falseBlock);
-    auto *ptrValue = getDstPtr(trueBlock);
-    auto storeOpAttributes =
-        cast<spirv::StoreOp>(trueBlock->front()).getOperation()->getAttrs();
-
-    auto selectOp = rewriter.create<spirv::SelectOp>(
-        selectionOp.getLoc(), trueValue->getType(), brConditionalOp.condition(),
-        trueValue, falseValue);
-    rewriter.create<spirv::StoreOp>(selectOp.getLoc(), ptrValue,
-                                    selectOp.getResult(), storeOpAttributes);
-
-    // `spv.selection` is not needed anymore.
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-
-private:
-  // Checks that given blocks follow the following rules:
-  // 1. Each conditional block consists of two operations, the first operation
-  //    is a `spv.Store` and the last operation is a `spv.Branch`.
-  // 2. Each `spv.Store` uses the same pointer and the same memory attributes.
-  // 3. A control flow goes into the given merge block from the given
-  //    conditional blocks.
-  PatternMatchResult canCanonicalizeSelection(Block *trueBlock,
-                                              Block *falseBlock,
-                                              Block *mergeBlock) const;
-
-  bool onlyContainsBranchConditionalOp(Block *block) const {
-    return std::next(block->begin()) == block->end() &&
-           isa<spirv::BranchConditionalOp>(block->front());
-  }
-
-  bool isSameAttrList(spirv::StoreOp lhs, spirv::StoreOp rhs) const {
-    return lhs.getOperation()->getAttrList().getDictionary() ==
-           rhs.getOperation()->getAttrList().getDictionary();
-  }
-
-  // Checks that given type is valid for `spv.SelectOp`.
-  // According to SPIR-V spec:
-  // "Before version 1.4, Result Type must be a pointer, scalar, or vector.
-  // Starting with version 1.4, Result Type can additionally be a composite type
-  // other than a vector."
-  bool isValidType(Type type) const {
-    return spirv::SPIRVDialect::isValidScalarType(type) ||
-           type.isa<VectorType>();
-  }
-
-  // Returns a soruce value for the given block.
-  Value *getSrcValue(Block *block) const {
-    auto storeOp = cast<spirv::StoreOp>(block->front());
-    return storeOp.value();
-  }
-
-  // Returns a destination value for the given block.
-  Value *getDstPtr(Block *block) const {
-    auto storeOp = cast<spirv::StoreOp>(block->front());
-    return storeOp.ptr();
-  }
-};
-
-PatternMatchResult ConvertSelectionOpToSelect::canCanonicalizeSelection(
-    Block *trueBlock, Block *falseBlock, Block *mergeBlock) const {
-  // Each block must consists of 2 operations.
-  if ((std::distance(trueBlock->begin(), trueBlock->end()) != 2) ||
-      (std::distance(falseBlock->begin(), falseBlock->end()) != 2)) {
-    return matchFailure();
-  }
-
-  auto trueBrStoreOp = dyn_cast<spirv::StoreOp>(trueBlock->front());
-  auto trueBrBranchOp =
-      dyn_cast<spirv::BranchOp>(*std::next(trueBlock->begin()));
-  auto falseBrStoreOp = dyn_cast<spirv::StoreOp>(falseBlock->front());
-  auto falseBrBranchOp =
-      dyn_cast<spirv::BranchOp>(*std::next(falseBlock->begin()));
-
-  if (!trueBrStoreOp || !trueBrBranchOp || !falseBrStoreOp ||
-      !falseBrBranchOp) {
-    return matchFailure();
-  }
-
-  // Check that each `spv.Store` uses the same pointer, memory access
-  // attributes and a valid type of the value.
-  if ((trueBrStoreOp.ptr() != falseBrStoreOp.ptr()) ||
-      !isSameAttrList(trueBrStoreOp, falseBrStoreOp) ||
-      !isValidType(trueBrStoreOp.value()->getType())) {
-    return matchFailure();
-  }
-
-  if ((trueBrBranchOp.getOperation()->getSuccessor(0) != mergeBlock) ||
-      (falseBrBranchOp.getOperation()->getSuccessor(0) != mergeBlock)) {
-    return matchFailure();
-  }
-
-  return matchSuccess();
-}
-} // end anonymous namespace
-
-void spirv::SelectionOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<ConvertSelectionOpToSelect>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// spv.specConstant
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseSpecConstantOp(OpAsmParser &parser,
-                                       OperationState &state) {
-  StringAttr nameAttr;
-  Attribute valueAttr;
-
-  if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(),
-                             state.attributes))
-    return failure();
-
-  // Parse optional spec_id.
-  if (succeeded(parser.parseOptionalKeyword(kSpecIdAttrName))) {
-    IntegerAttr specIdAttr;
-    if (parser.parseLParen() ||
-        parser.parseAttribute(specIdAttr, kSpecIdAttrName, state.attributes) ||
-        parser.parseRParen())
-      return failure();
-  }
-
-  if (parser.parseEqual() ||
-      parser.parseAttribute(valueAttr, kDefaultValueAttrName, state.attributes))
-    return failure();
-
-  return success();
-}
-
-static void print(spirv::SpecConstantOp constOp, OpAsmPrinter &printer) {
-  printer << spirv::SpecConstantOp::getOperationName() << ' ';
-  printer.printSymbolName(constOp.sym_name());
-  if (auto specID = constOp.getAttrOfType<IntegerAttr>(kSpecIdAttrName))
-    printer << ' ' << kSpecIdAttrName << '(' << specID.getInt() << ')';
-  printer << " = " << constOp.default_value();
-}
-
-static LogicalResult verify(spirv::SpecConstantOp constOp) {
-  if (auto specID = constOp.getAttrOfType<IntegerAttr>(kSpecIdAttrName))
-    if (specID.getValue().isNegative())
-      return constOp.emitOpError("SpecId cannot be negative");
-
-  auto value = constOp.default_value();
-
-  switch (value.getKind()) {
-  case StandardAttributes::Bool:
-  case StandardAttributes::Integer:
-  case StandardAttributes::Float: {
-    // Make sure bitwidth is allowed.
-    if (!spirv::SPIRVDialect::isValidType(value.getType()))
-      return constOp.emitOpError("default value bitwidth disallowed");
-    return success();
-  }
-  default:
-    return constOp.emitOpError(
-        "default value can only be a bool, integer, or float scalar");
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// spv.StoreOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseStoreOp(OpAsmParser &parser, OperationState &state) {
-  // Parse the storage class specification
-  spirv::StorageClass storageClass;
-  SmallVector<OpAsmParser::OperandType, 2> operandInfo;
-  auto loc = parser.getCurrentLocation();
-  Type elementType;
-  if (parseEnumAttribute(storageClass, parser) ||
-      parser.parseOperandList(operandInfo, 2) ||
-      parseMemoryAccessAttributes(parser, state) || parser.parseColon() ||
-      parser.parseType(elementType)) {
-    return failure();
-  }
-
-  auto ptrType = spirv::PointerType::get(elementType, storageClass);
-  if (parser.resolveOperands(operandInfo, {ptrType, elementType}, loc,
-                             state.operands)) {
-    return failure();
-  }
-  return success();
-}
-
-static void print(spirv::StoreOp storeOp, OpAsmPrinter &printer) {
-  auto *op = storeOp.getOperation();
-  SmallVector<StringRef, 4> elidedAttrs;
-  StringRef sc = stringifyStorageClass(
-      storeOp.ptr()->getType().cast<spirv::PointerType>().getStorageClass());
-  printer << spirv::StoreOp::getOperationName() << " \"" << sc << "\" "
-          << storeOp.ptr() << ", " << storeOp.value();
-
-  printMemoryAccessAttribute(storeOp, printer, elidedAttrs);
-
-  printer << " : " << storeOp.value()->getType();
-  printer.printOptionalAttrDict(op->getAttrs(), elidedAttrs);
-}
-
-static LogicalResult verify(spirv::StoreOp storeOp) {
-  // SPIR-V spec : "Pointer is the pointer to store through. Its type must be an
-  // OpTypePointer whose Type operand is the same as the type of Object."
-  if (failed(verifyLoadStorePtrAndValTypes(storeOp, storeOp.ptr(),
-                                           storeOp.value()))) {
-    return failure();
-  }
-  return verifyMemoryAccessAttribute(storeOp);
-}
-
-//===----------------------------------------------------------------------===//
-// spv.SubgroupBallotKHROp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseSubgroupBallotKHROp(OpAsmParser &parser,
-                                            OperationState &state) {
-  OpAsmParser::OperandType operandInfo;
-  Type resultType;
-  IntegerType i1Type = parser.getBuilder().getI1Type();
-  if (parser.parseOperand(operandInfo) || parser.parseColonType(resultType) ||
-      parser.resolveOperand(operandInfo, i1Type, state.operands))
-    return failure();
-
-  return parser.addTypeToList(resultType, state.types);
-}
-
-static void print(spirv::SubgroupBallotKHROp ballotOp, OpAsmPrinter &printer) {
-  printer << spirv::SubgroupBallotKHROp::getOperationName() << ' '
-          << ballotOp.predicate() << " : " << ballotOp.getType();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.Undef
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseUndefOp(OpAsmParser &parser, OperationState &state) {
-  Type type;
-  if (parser.parseColonType(type)) {
-    return failure();
-  }
-  state.addTypes(type);
-  return success();
-}
-
-static void print(spirv::UndefOp undefOp, OpAsmPrinter &printer) {
-  printer << spirv::UndefOp::getOperationName() << " : " << undefOp.getType();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.Unreachable
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(spirv::UnreachableOp unreachableOp) {
-  auto *op = unreachableOp.getOperation();
-  auto *block = op->getBlock();
-  // Fast track: if this is in entry block, its invalid. Otherwise, if no
-  // predecessors, it's valid.
-  if (block->isEntryBlock())
-    return unreachableOp.emitOpError("cannot be used in reachable block");
-  if (block->hasNoPredecessors())
-    return success();
-
-  // TODO(antiagainst): further verification needs to analyze reachablility from
-  // the entry block.
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// spv.Variable
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseVariableOp(OpAsmParser &parser, OperationState &state) {
-  // Parse optional initializer
-  Optional<OpAsmParser::OperandType> initInfo;
-  if (succeeded(parser.parseOptionalKeyword("init"))) {
-    initInfo = OpAsmParser::OperandType();
-    if (parser.parseLParen() || parser.parseOperand(*initInfo) ||
-        parser.parseRParen())
-      return failure();
-  }
-
-  if (parseVariableDecorations(parser, state)) {
-    return failure();
-  }
-
-  // Parse result pointer type
-  Type type;
-  if (parser.parseColon())
-    return failure();
-  auto loc = parser.getCurrentLocation();
-  if (parser.parseType(type))
-    return failure();
-
-  auto ptrType = type.dyn_cast<spirv::PointerType>();
-  if (!ptrType)
-    return parser.emitError(loc, "expected spv.ptr type");
-  state.addTypes(ptrType);
-
-  // Resolve the initializer operand
-  if (initInfo) {
-    if (parser.resolveOperand(*initInfo, ptrType.getPointeeType(),
-                              state.operands))
-      return failure();
-  }
-
-  auto attr = parser.getBuilder().getI32IntegerAttr(
-      llvm::bit_cast<int32_t>(ptrType.getStorageClass()));
-  state.addAttribute(spirv::attributeName<spirv::StorageClass>(), attr);
-
-  return success();
-}
-
-static void print(spirv::VariableOp varOp, OpAsmPrinter &printer) {
-  SmallVector<StringRef, 4> elidedAttrs{
-      spirv::attributeName<spirv::StorageClass>()};
-  printer << spirv::VariableOp::getOperationName();
-
-  // Print optional initializer
-  if (varOp.getNumOperands() != 0)
-    printer << " init(" << varOp.initializer() << ")";
-
-  printVariableDecorations(varOp, printer, elidedAttrs);
-  printer << " : " << varOp.getType();
-}
-
-static LogicalResult verify(spirv::VariableOp varOp) {
-  // SPIR-V spec: "Storage Class is the Storage Class of the memory holding the
-  // object. It cannot be Generic. It must be the same as the Storage Class
-  // operand of the Result Type."
-  if (varOp.storage_class() != spirv::StorageClass::Function) {
-    return varOp.emitOpError(
-        "can only be used to model function-level variables. Use "
-        "spv.globalVariable for module-level variables.");
-  }
-
-  auto pointerType = varOp.pointer()->getType().cast<spirv::PointerType>();
-  if (varOp.storage_class() != pointerType.getStorageClass())
-    return varOp.emitOpError(
-        "storage class must match result pointer's storage class");
-
-  if (varOp.getNumOperands() != 0) {
-    // SPIR-V spec: "Initializer must be an <id> from a constant instruction or
-    // a global (module scope) OpVariable instruction".
-    auto *initOp = varOp.getOperand(0)->getDefiningOp();
-    if (!initOp || !(isa<spirv::ConstantOp>(initOp) ||    // for normal constant
-                     isa<spirv::ReferenceOfOp>(initOp) || // for spec constant
-                     isa<spirv::AddressOfOp>(initOp)))
-      return varOp.emitOpError("initializer must be the result of a "
-                               "constant or spv.globalVariable op");
-  }
-
-  // TODO(antiagainst): generate these strings using ODS.
-  auto *op = varOp.getOperation();
-  auto descriptorSetName =
-      convertToSnakeCase(stringifyDecoration(spirv::Decoration::DescriptorSet));
-  auto bindingName =
-      convertToSnakeCase(stringifyDecoration(spirv::Decoration::Binding));
-  auto builtInName =
-      convertToSnakeCase(stringifyDecoration(spirv::Decoration::BuiltIn));
-
-  for (const auto &attr : {descriptorSetName, bindingName, builtInName}) {
-    if (op->getAttr(attr))
-      return varOp.emitOpError("cannot have '")
-             << attr << "' attribute (only allowed in spv.globalVariable)";
-  }
-
-  return success();
-}
-
-namespace mlir {
-namespace spirv {
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/SPIRV/SPIRVOps.cpp.inc"
-
-} // namespace spirv
-} // namespace mlir
diff --git a/third_party/mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp b/third_party/mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp
deleted file mode 100644
index 15621aa5fde..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp
+++ /dev/null
@@ -1,519 +0,0 @@
-//===- SPIRVTypes.cpp - MLIR SPIR-V Types ---------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the types in the SPIR-V dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/SPIRVTypes.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-
-using namespace mlir;
-using namespace mlir::spirv;
-
-// Pull in all enum utility function definitions
-#include "mlir/Dialect/SPIRV/SPIRVEnums.cpp.inc"
-
-//===----------------------------------------------------------------------===//
-// ArrayType
-//===----------------------------------------------------------------------===//
-
-struct spirv::detail::ArrayTypeStorage : public TypeStorage {
-  using KeyTy = std::tuple<Type, unsigned, ArrayType::LayoutInfo>;
-
-  static ArrayTypeStorage *construct(TypeStorageAllocator &allocator,
-                                     const KeyTy &key) {
-    return new (allocator.allocate<ArrayTypeStorage>()) ArrayTypeStorage(key);
-  }
-
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(elementType, getSubclassData(), layoutInfo);
-  }
-
-  ArrayTypeStorage(const KeyTy &key)
-      : TypeStorage(std::get<1>(key)), elementType(std::get<0>(key)),
-        layoutInfo(std::get<2>(key)) {}
-
-  Type elementType;
-  ArrayType::LayoutInfo layoutInfo;
-};
-
-ArrayType ArrayType::get(Type elementType, unsigned elementCount) {
-  assert(elementCount && "ArrayType needs at least one element");
-  return Base::get(elementType.getContext(), TypeKind::Array, elementType,
-                   elementCount, 0);
-}
-
-ArrayType ArrayType::get(Type elementType, unsigned elementCount,
-                         ArrayType::LayoutInfo layoutInfo) {
-  assert(elementCount && "ArrayType needs at least one element");
-  return Base::get(elementType.getContext(), TypeKind::Array, elementType,
-                   elementCount, layoutInfo);
-}
-
-unsigned ArrayType::getNumElements() const {
-  return getImpl()->getSubclassData();
-}
-
-Type ArrayType::getElementType() const { return getImpl()->elementType; }
-
-// ArrayStride must be greater than zero
-bool ArrayType::hasLayout() const { return getImpl()->layoutInfo; }
-
-uint64_t ArrayType::getArrayStride() const { return getImpl()->layoutInfo; }
-
-//===----------------------------------------------------------------------===//
-// CompositeType
-//===----------------------------------------------------------------------===//
-
-bool CompositeType::classof(Type type) {
-  switch (type.getKind()) {
-  case TypeKind::Array:
-  case TypeKind::RuntimeArray:
-  case TypeKind::Struct:
-  case StandardTypes::Vector:
-    return true;
-  default:
-    return false;
-  }
-}
-
-Type CompositeType::getElementType(unsigned index) const {
-  switch (getKind()) {
-  case spirv::TypeKind::Array:
-    return cast<ArrayType>().getElementType();
-  case spirv::TypeKind::RuntimeArray:
-    return cast<RuntimeArrayType>().getElementType();
-  case spirv::TypeKind::Struct:
-    return cast<StructType>().getElementType(index);
-  case StandardTypes::Vector:
-    return cast<VectorType>().getElementType();
-  default:
-    llvm_unreachable("invalid composite type");
-  }
-}
-
-unsigned CompositeType::getNumElements() const {
-  switch (getKind()) {
-  case spirv::TypeKind::Array:
-    return cast<ArrayType>().getNumElements();
-  case spirv::TypeKind::RuntimeArray:
-    llvm_unreachable(
-        "invalid to query number of elements of spirv::RuntimeArray type");
-  case spirv::TypeKind::Struct:
-    return cast<StructType>().getNumElements();
-  case StandardTypes::Vector:
-    return cast<VectorType>().getNumElements();
-  default:
-    llvm_unreachable("invalid composite type");
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// ImageType
-//===----------------------------------------------------------------------===//
-
-template <typename T> static constexpr unsigned getNumBits() { return 0; }
-template <> constexpr unsigned getNumBits<Dim>() {
-  static_assert((1 << 3) > getMaxEnumValForDim(),
-                "Not enough bits to encode Dim value");
-  return 3;
-}
-template <> constexpr unsigned getNumBits<ImageDepthInfo>() {
-  static_assert((1 << 2) > getMaxEnumValForImageDepthInfo(),
-                "Not enough bits to encode ImageDepthInfo value");
-  return 2;
-}
-template <> constexpr unsigned getNumBits<ImageArrayedInfo>() {
-  static_assert((1 << 1) > getMaxEnumValForImageArrayedInfo(),
-                "Not enough bits to encode ImageArrayedInfo value");
-  return 1;
-}
-template <> constexpr unsigned getNumBits<ImageSamplingInfo>() {
-  static_assert((1 << 1) > getMaxEnumValForImageSamplingInfo(),
-                "Not enough bits to encode ImageSamplingInfo value");
-  return 1;
-}
-template <> constexpr unsigned getNumBits<ImageSamplerUseInfo>() {
-  static_assert((1 << 2) > getMaxEnumValForImageSamplerUseInfo(),
-                "Not enough bits to encode ImageSamplerUseInfo value");
-  return 2;
-}
-template <> constexpr unsigned getNumBits<ImageFormat>() {
-  static_assert((1 << 6) > getMaxEnumValForImageFormat(),
-                "Not enough bits to encode ImageFormat value");
-  return 6;
-}
-
-struct spirv::detail::ImageTypeStorage : public TypeStorage {
-private:
-  /// Define a bit-field struct to pack the enum values
-  union EnumPack {
-    struct {
-      unsigned dimEncoding : getNumBits<Dim>();
-      unsigned depthInfoEncoding : getNumBits<ImageDepthInfo>();
-      unsigned arrayedInfoEncoding : getNumBits<ImageArrayedInfo>();
-      unsigned samplingInfoEncoding : getNumBits<ImageSamplingInfo>();
-      unsigned samplerUseInfoEncoding : getNumBits<ImageSamplerUseInfo>();
-      unsigned formatEncoding : getNumBits<ImageFormat>();
-    } data;
-    unsigned storage;
-  };
-
-public:
-  using KeyTy = std::tuple<Type, Dim, ImageDepthInfo, ImageArrayedInfo,
-                           ImageSamplingInfo, ImageSamplerUseInfo, ImageFormat>;
-
-  static ImageTypeStorage *construct(TypeStorageAllocator &allocator,
-                                     const KeyTy &key) {
-    return new (allocator.allocate<ImageTypeStorage>()) ImageTypeStorage(key);
-  }
-
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(elementType, getDim(), getDepthInfo(), getArrayedInfo(),
-                        getSamplingInfo(), getSamplerUseInfo(),
-                        getImageFormat());
-  }
-
-  Dim getDim() const {
-    EnumPack v;
-    v.storage = getSubclassData();
-    return static_cast<Dim>(v.data.dimEncoding);
-  }
-  void setDim(Dim dim) {
-    EnumPack v;
-    v.storage = getSubclassData();
-    v.data.dimEncoding = static_cast<unsigned>(dim);
-    setSubclassData(v.storage);
-  }
-
-  ImageDepthInfo getDepthInfo() const {
-    EnumPack v;
-    v.storage = getSubclassData();
-    return static_cast<ImageDepthInfo>(v.data.depthInfoEncoding);
-  }
-  void setDepthInfo(ImageDepthInfo depthInfo) {
-    EnumPack v;
-    v.storage = getSubclassData();
-    v.data.depthInfoEncoding = static_cast<unsigned>(depthInfo);
-    setSubclassData(v.storage);
-  }
-
-  ImageArrayedInfo getArrayedInfo() const {
-    EnumPack v;
-    v.storage = getSubclassData();
-    return static_cast<ImageArrayedInfo>(v.data.arrayedInfoEncoding);
-  }
-  void setArrayedInfo(ImageArrayedInfo arrayedInfo) {
-    EnumPack v;
-    v.storage = getSubclassData();
-    v.data.arrayedInfoEncoding = static_cast<unsigned>(arrayedInfo);
-    setSubclassData(v.storage);
-  }
-
-  ImageSamplingInfo getSamplingInfo() const {
-    EnumPack v;
-    v.storage = getSubclassData();
-    return static_cast<ImageSamplingInfo>(v.data.samplingInfoEncoding);
-  }
-  void setSamplingInfo(ImageSamplingInfo samplingInfo) {
-    EnumPack v;
-    v.storage = getSubclassData();
-    v.data.samplingInfoEncoding = static_cast<unsigned>(samplingInfo);
-    setSubclassData(v.storage);
-  }
-
-  ImageSamplerUseInfo getSamplerUseInfo() const {
-    EnumPack v;
-    v.storage = getSubclassData();
-    return static_cast<ImageSamplerUseInfo>(v.data.samplerUseInfoEncoding);
-  }
-  void setSamplerUseInfo(ImageSamplerUseInfo samplerUseInfo) {
-    EnumPack v;
-    v.storage = getSubclassData();
-    v.data.samplerUseInfoEncoding = static_cast<unsigned>(samplerUseInfo);
-    setSubclassData(v.storage);
-  }
-
-  ImageFormat getImageFormat() const {
-    EnumPack v;
-    v.storage = getSubclassData();
-    return static_cast<ImageFormat>(v.data.formatEncoding);
-  }
-  void setImageFormat(ImageFormat format) {
-    EnumPack v;
-    v.storage = getSubclassData();
-    v.data.formatEncoding = static_cast<unsigned>(format);
-    setSubclassData(v.storage);
-  }
-
-  ImageTypeStorage(const KeyTy &key) : elementType(std::get<0>(key)) {
-    static_assert(sizeof(EnumPack) <= sizeof(getSubclassData()),
-                  "EnumPack size greater than subClassData type size");
-    setDim(std::get<1>(key));
-    setDepthInfo(std::get<2>(key));
-    setArrayedInfo(std::get<3>(key));
-    setSamplingInfo(std::get<4>(key));
-    setSamplerUseInfo(std::get<5>(key));
-    setImageFormat(std::get<6>(key));
-  }
-
-  Type elementType;
-};
-
-ImageType
-ImageType::get(std::tuple<Type, Dim, ImageDepthInfo, ImageArrayedInfo,
-                          ImageSamplingInfo, ImageSamplerUseInfo, ImageFormat>
-                   value) {
-  return Base::get(std::get<0>(value).getContext(), TypeKind::Image, value);
-}
-
-Type ImageType::getElementType() const { return getImpl()->elementType; }
-
-Dim ImageType::getDim() const { return getImpl()->getDim(); }
-
-ImageDepthInfo ImageType::getDepthInfo() const {
-  return getImpl()->getDepthInfo();
-}
-
-ImageArrayedInfo ImageType::getArrayedInfo() const {
-  return getImpl()->getArrayedInfo();
-}
-
-ImageSamplingInfo ImageType::getSamplingInfo() const {
-  return getImpl()->getSamplingInfo();
-}
-
-ImageSamplerUseInfo ImageType::getSamplerUseInfo() const {
-  return getImpl()->getSamplerUseInfo();
-}
-
-ImageFormat ImageType::getImageFormat() const {
-  return getImpl()->getImageFormat();
-}
-
-//===----------------------------------------------------------------------===//
-// PointerType
-//===----------------------------------------------------------------------===//
-
-struct spirv::detail::PointerTypeStorage : public TypeStorage {
-  // (Type, StorageClass) as the key: Type stored in this struct, and
-  // StorageClass stored as TypeStorage's subclass data.
-  using KeyTy = std::pair<Type, StorageClass>;
-
-  static PointerTypeStorage *construct(TypeStorageAllocator &allocator,
-                                       const KeyTy &key) {
-    return new (allocator.allocate<PointerTypeStorage>())
-        PointerTypeStorage(key);
-  }
-
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(pointeeType, getStorageClass());
-  }
-
-  PointerTypeStorage(const KeyTy &key)
-      : TypeStorage(static_cast<unsigned>(key.second)), pointeeType(key.first) {
-  }
-
-  StorageClass getStorageClass() const {
-    return static_cast<StorageClass>(getSubclassData());
-  }
-
-  Type pointeeType;
-};
-
-PointerType PointerType::get(Type pointeeType, StorageClass storageClass) {
-  return Base::get(pointeeType.getContext(), TypeKind::Pointer, pointeeType,
-                   storageClass);
-}
-
-Type PointerType::getPointeeType() const { return getImpl()->pointeeType; }
-
-StorageClass PointerType::getStorageClass() const {
-  return getImpl()->getStorageClass();
-}
-
-//===----------------------------------------------------------------------===//
-// RuntimeArrayType
-//===----------------------------------------------------------------------===//
-
-struct spirv::detail::RuntimeArrayTypeStorage : public TypeStorage {
-  using KeyTy = Type;
-
-  static RuntimeArrayTypeStorage *construct(TypeStorageAllocator &allocator,
-                                            const KeyTy &key) {
-    return new (allocator.allocate<RuntimeArrayTypeStorage>())
-        RuntimeArrayTypeStorage(key);
-  }
-
-  bool operator==(const KeyTy &key) const { return elementType == key; }
-
-  RuntimeArrayTypeStorage(const KeyTy &key) : elementType(key) {}
-
-  Type elementType;
-};
-
-RuntimeArrayType RuntimeArrayType::get(Type elementType) {
-  return Base::get(elementType.getContext(), TypeKind::RuntimeArray,
-                   elementType);
-}
-
-Type RuntimeArrayType::getElementType() const { return getImpl()->elementType; }
-
-//===----------------------------------------------------------------------===//
-// StructType
-//===----------------------------------------------------------------------===//
-
-struct spirv::detail::StructTypeStorage : public TypeStorage {
-  StructTypeStorage(
-      unsigned numMembers, Type const *memberTypes,
-      StructType::LayoutInfo const *layoutInfo, unsigned numMemberDecorations,
-      StructType::MemberDecorationInfo const *memberDecorationsInfo)
-      : TypeStorage(numMembers), memberTypes(memberTypes),
-        layoutInfo(layoutInfo), numMemberDecorations(numMemberDecorations),
-        memberDecorationsInfo(memberDecorationsInfo) {}
-
-  using KeyTy = std::tuple<ArrayRef<Type>, ArrayRef<StructType::LayoutInfo>,
-                           ArrayRef<StructType::MemberDecorationInfo>>;
-  bool operator==(const KeyTy &key) const {
-    return key ==
-           KeyTy(getMemberTypes(), getLayoutInfo(), getMemberDecorationsInfo());
-  }
-
-  static StructTypeStorage *construct(TypeStorageAllocator &allocator,
-                                      const KeyTy &key) {
-    ArrayRef<Type> keyTypes = std::get<0>(key);
-
-    // Copy the member type and layout information into the bump pointer
-    const Type *typesList = nullptr;
-    if (!keyTypes.empty()) {
-      typesList = allocator.copyInto(keyTypes).data();
-    }
-
-    const StructType::LayoutInfo *layoutInfoList = nullptr;
-    if (!std::get<1>(key).empty()) {
-      ArrayRef<StructType::LayoutInfo> keyLayoutInfo = std::get<1>(key);
-      assert(keyLayoutInfo.size() == keyTypes.size() &&
-             "size of layout information must be same as the size of number of "
-             "elements");
-      layoutInfoList = allocator.copyInto(keyLayoutInfo).data();
-    }
-
-    const StructType::MemberDecorationInfo *memberDecorationList = nullptr;
-    unsigned numMemberDecorations = 0;
-    if (!std::get<2>(key).empty()) {
-      auto keyMemberDecorations = std::get<2>(key);
-      numMemberDecorations = keyMemberDecorations.size();
-      memberDecorationList = allocator.copyInto(keyMemberDecorations).data();
-    }
-    return new (allocator.allocate<StructTypeStorage>())
-        StructTypeStorage(keyTypes.size(), typesList, layoutInfoList,
-                          numMemberDecorations, memberDecorationList);
-  }
-
-  ArrayRef<Type> getMemberTypes() const {
-    return ArrayRef<Type>(memberTypes, getSubclassData());
-  }
-
-  ArrayRef<StructType::LayoutInfo> getLayoutInfo() const {
-    if (layoutInfo) {
-      return ArrayRef<StructType::LayoutInfo>(layoutInfo, getSubclassData());
-    }
-    return {};
-  }
-
-  ArrayRef<StructType::MemberDecorationInfo> getMemberDecorationsInfo() const {
-    if (memberDecorationsInfo) {
-      return ArrayRef<StructType::MemberDecorationInfo>(memberDecorationsInfo,
-                                                        numMemberDecorations);
-    }
-    return {};
-  }
-
-  Type const *memberTypes;
-  StructType::LayoutInfo const *layoutInfo;
-  unsigned numMemberDecorations;
-  StructType::MemberDecorationInfo const *memberDecorationsInfo;
-};
-
-StructType
-StructType::get(ArrayRef<Type> memberTypes,
-                ArrayRef<StructType::LayoutInfo> layoutInfo,
-                ArrayRef<StructType::MemberDecorationInfo> memberDecorations) {
-  assert(!memberTypes.empty() && "Struct needs at least one member type");
-  // Sort the decorations.
-  SmallVector<StructType::MemberDecorationInfo, 4> sortedDecorations(
-      memberDecorations.begin(), memberDecorations.end());
-  llvm::array_pod_sort(sortedDecorations.begin(), sortedDecorations.end());
-  return Base::get(memberTypes.vec().front().getContext(), TypeKind::Struct,
-                   memberTypes, layoutInfo, sortedDecorations);
-}
-
-StructType StructType::getEmpty(MLIRContext *context) {
-  return Base::get(context, TypeKind::Struct, ArrayRef<Type>(),
-                   ArrayRef<StructType::LayoutInfo>(),
-                   ArrayRef<StructType::MemberDecorationInfo>());
-}
-
-unsigned StructType::getNumElements() const {
-  return getImpl()->getSubclassData();
-}
-
-Type StructType::getElementType(unsigned index) const {
-  assert(
-      getNumElements() > index &&
-      "element index is more than number of members of the SPIR-V StructType");
-  return getImpl()->memberTypes[index];
-}
-
-bool StructType::hasLayout() const { return getImpl()->layoutInfo; }
-
-uint64_t StructType::getOffset(unsigned index) const {
-  assert(
-      getNumElements() > index &&
-      "element index is more than number of members of the SPIR-V StructType");
-  return getImpl()->layoutInfo[index];
-}
-
-void StructType::getMemberDecorations(
-    SmallVectorImpl<StructType::MemberDecorationInfo> &memberDecorations)
-    const {
-  memberDecorations.clear();
-  auto implMemberDecorations = getImpl()->getMemberDecorationsInfo();
-  memberDecorations.append(implMemberDecorations.begin(),
-                           implMemberDecorations.end());
-}
-
-void StructType::getMemberDecorations(
-    unsigned index, SmallVectorImpl<spirv::Decoration> &decorations) const {
-  assert(getNumElements() > index && "member index out of range");
-  auto memberDecorations = getImpl()->getMemberDecorationsInfo();
-  decorations.clear();
-  for (auto &memberDecoration : memberDecorations) {
-    if (memberDecoration.first == index) {
-      decorations.push_back(memberDecoration.second);
-    }
-    if (memberDecoration.first > index) {
-      // Early exit since the decorations are stored sorted.
-      return;
-    }
-  }
-}
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/CMakeLists.txt b/third_party/mlir/lib/Dialect/SPIRV/Serialization/CMakeLists.txt
deleted file mode 100644
index 05a6c9cd851..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-add_llvm_library(MLIRSPIRVSerialization
-  Deserializer.cpp
-  Serializer.cpp
-  SPIRVBinaryUtils.cpp
-  TranslateRegistration.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/SPIRV
-  )
-
-add_dependencies(MLIRSPIRVSerialization
-  MLIRSPIRVSerializationGen)
-
-target_link_libraries(MLIRSPIRVSerialization
-  MLIRIR
-  MLIRSPIRV
-  MLIRSupport)
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/third_party/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
deleted file mode 100644
index e60805aca1b..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
+++ /dev/null
@@ -1,2432 +0,0 @@
-//===- Deserializer.cpp - MLIR SPIR-V Deserialization ---------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the SPIR-V binary to MLIR SPIR-V module deserialization.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/Serialization.h"
-
-#include "mlir/Dialect/SPIRV/SPIRVBinaryUtils.h"
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Dialect/SPIRV/SPIRVTypes.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Location.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Support/StringExtras.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/bit.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-#define DEBUG_TYPE "spirv-deserialization"
-
-/// Decodes a string literal in `words` starting at `wordIndex`. Update the
-/// latter to point to the position in words after the string literal.
-static inline StringRef decodeStringLiteral(ArrayRef<uint32_t> words,
-                                            unsigned &wordIndex) {
-  StringRef str(reinterpret_cast<const char *>(words.data() + wordIndex));
-  wordIndex += str.size() / 4 + 1;
-  return str;
-}
-
-/// Extracts the opcode from the given first word of a SPIR-V instruction.
-static inline spirv::Opcode extractOpcode(uint32_t word) {
-  return static_cast<spirv::Opcode>(word & 0xffff);
-}
-
-/// Returns true if the given `block` is a function entry block.
-static inline bool isFnEntryBlock(Block *block) {
-  return block->isEntryBlock() && isa_and_nonnull<FuncOp>(block->getParentOp());
-}
-
-namespace {
-/// A struct for containing a header block's merge and continue targets.
-///
-/// This struct is used to track original structured control flow info from
-/// SPIR-V blob. This info will be used to create spv.selection/spv.loop
-/// later.
-struct BlockMergeInfo {
-  Block *mergeBlock;
-  Block *continueBlock; // nullptr for spv.selection
-
-  BlockMergeInfo() : mergeBlock(nullptr), continueBlock(nullptr) {}
-  BlockMergeInfo(Block *m, Block *c = nullptr)
-      : mergeBlock(m), continueBlock(c) {}
-};
-
-/// Map from a selection/loop's header block to its merge (and continue) target.
-using BlockMergeInfoMap = DenseMap<Block *, BlockMergeInfo>;
-
-/// A SPIR-V module serializer.
-///
-/// A SPIR-V binary module is a single linear stream of instructions; each
-/// instruction is composed of 32-bit words. The first word of an instruction
-/// records the total number of words of that instruction using the 16
-/// higher-order bits. So this deserializer uses that to get instruction
-/// boundary and parse instructions and build a SPIR-V ModuleOp gradually.
-///
-// TODO(antiagainst): clean up created ops on errors
-class Deserializer {
-public:
-  /// Creates a deserializer for the given SPIR-V `binary` module.
-  /// The SPIR-V ModuleOp will be created into `context.
-  explicit Deserializer(ArrayRef<uint32_t> binary, MLIRContext *context);
-
-  /// Deserializes the remembered SPIR-V binary module.
-  LogicalResult deserialize();
-
-  /// Collects the final SPIR-V ModuleOp.
-  Optional<spirv::ModuleOp> collect();
-
-private:
-  //===--------------------------------------------------------------------===//
-  // Module structure
-  //===--------------------------------------------------------------------===//
-
-  /// Initializes the `module` ModuleOp in this deserializer instance.
-  spirv::ModuleOp createModuleOp();
-
-  /// Processes SPIR-V module header in `binary`.
-  LogicalResult processHeader();
-
-  /// Processes the SPIR-V OpCapability with `operands` and updates bookkeeping
-  /// in the deserializer.
-  LogicalResult processCapability(ArrayRef<uint32_t> operands);
-
-  /// Attaches all collected capabilities to `module` as an attribute.
-  void attachCapabilities();
-
-  /// Processes the SPIR-V OpExtension with `operands` and updates bookkeeping
-  /// in the deserializer.
-  LogicalResult processExtension(ArrayRef<uint32_t> words);
-
-  /// Processes the SPIR-V OpExtInstImport with `operands` and updates
-  /// bookkeeping in the deserializer.
-  LogicalResult processExtInstImport(ArrayRef<uint32_t> words);
-
-  /// Attaches all collected extensions to `module` as an attribute.
-  void attachExtensions();
-
-  /// Processes the SPIR-V OpMemoryModel with `operands` and updates `module`.
-  LogicalResult processMemoryModel(ArrayRef<uint32_t> operands);
-
-  /// Process SPIR-V OpName with `operands`.
-  LogicalResult processName(ArrayRef<uint32_t> operands);
-
-  /// Processes an OpDecorate instruction.
-  LogicalResult processDecoration(ArrayRef<uint32_t> words);
-
-  // Processes an OpMemberDecorate instruction.
-  LogicalResult processMemberDecoration(ArrayRef<uint32_t> words);
-
-  /// Processes an OpMemberName instruction.
-  LogicalResult processMemberName(ArrayRef<uint32_t> words);
-
-  /// Gets the FuncOp associated with a result <id> of OpFunction.
-  FuncOp getFunction(uint32_t id) { return funcMap.lookup(id); }
-
-  /// Processes the SPIR-V function at the current `offset` into `binary`.
-  /// The operands to the OpFunction instruction is passed in as ``operands`.
-  /// This method processes each instruction inside the function and dispatches
-  /// them to their handler method accordingly.
-  LogicalResult processFunction(ArrayRef<uint32_t> operands);
-
-  /// Processes OpFunctionEnd and finalizes function. This wires up block
-  /// argument created from OpPhi instructions and also structurizes control
-  /// flow.
-  LogicalResult processFunctionEnd(ArrayRef<uint32_t> operands);
-
-  /// Gets the constant's attribute and type associated with the given <id>.
-  Optional<std::pair<Attribute, Type>> getConstant(uint32_t id);
-
-  /// Gets the constant's integer attribute with the given <id>. Returns a null
-  /// IntegerAttr if the given is not registered or does not correspond to an
-  /// integer constant.
-  IntegerAttr getConstantInt(uint32_t id);
-
-  /// Returns a symbol to be used for the function name with the given
-  /// result <id>. This tries to use the function's OpName if
-  /// exists; otherwise creates one based on the <id>.
-  std::string getFunctionSymbol(uint32_t id);
-
-  /// Returns a symbol to be used for the specialization constant with the given
-  /// result <id>. This tries to use the specialization constant's OpName if
-  /// exists; otherwise creates one based on the <id>.
-  std::string getSpecConstantSymbol(uint32_t id);
-
-  /// Gets the specialization constant with the given result <id>.
-  spirv::SpecConstantOp getSpecConstant(uint32_t id) {
-    return specConstMap.lookup(id);
-  }
-
-  /// Creates a spirv::SpecConstantOp.
-  spirv::SpecConstantOp createSpecConstant(Location loc, uint32_t resultID,
-                                           Attribute defaultValue);
-
-  /// Processes the OpVariable instructions at current `offset` into `binary`.
-  /// It is expected that this method is used for variables that are to be
-  /// defined at module scope and will be deserialized into a spv.globalVariable
-  /// instruction.
-  LogicalResult processGlobalVariable(ArrayRef<uint32_t> operands);
-
-  /// Gets the global variable associated with a result <id> of OpVariable.
-  spirv::GlobalVariableOp getGlobalVariable(uint32_t id) {
-    return globalVariableMap.lookup(id);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Type
-  //===--------------------------------------------------------------------===//
-
-  /// Gets type for a given result <id>.
-  Type getType(uint32_t id) { return typeMap.lookup(id); }
-
-  /// Get the type associated with the result <id> of an OpUndef.
-  Type getUndefType(uint32_t id) { return undefMap.lookup(id); }
-
-  /// Returns true if the given `type` is for SPIR-V void type.
-  bool isVoidType(Type type) const { return type.isa<NoneType>(); }
-
-  /// Processes a SPIR-V type instruction with given `opcode` and `operands` and
-  /// registers the type into `module`.
-  LogicalResult processType(spirv::Opcode opcode, ArrayRef<uint32_t> operands);
-
-  LogicalResult processArrayType(ArrayRef<uint32_t> operands);
-
-  LogicalResult processFunctionType(ArrayRef<uint32_t> operands);
-
-  LogicalResult processRuntimeArrayType(ArrayRef<uint32_t> operands);
-
-  LogicalResult processStructType(ArrayRef<uint32_t> operands);
-
-  //===--------------------------------------------------------------------===//
-  // Constant
-  //===--------------------------------------------------------------------===//
-
-  /// Processes a SPIR-V Op{|Spec}Constant instruction with the given
-  /// `operands`. `isSpec` indicates whether this is a specialization constant.
-  LogicalResult processConstant(ArrayRef<uint32_t> operands, bool isSpec);
-
-  /// Processes a SPIR-V Op{|Spec}Constant{True|False} instruction with the
-  /// given `operands`. `isSpec` indicates whether this is a specialization
-  /// constant.
-  LogicalResult processConstantBool(bool isTrue, ArrayRef<uint32_t> operands,
-                                    bool isSpec);
-
-  /// Processes a SPIR-V OpConstantComposite instruction with the given
-  /// `operands`.
-  LogicalResult processConstantComposite(ArrayRef<uint32_t> operands);
-
-  /// Processes a SPIR-V OpConstantNull instruction with the given `operands`.
-  LogicalResult processConstantNull(ArrayRef<uint32_t> operands);
-
-  //===--------------------------------------------------------------------===//
-  // Control flow
-  //===--------------------------------------------------------------------===//
-
-  /// Returns the block for the given label <id>.
-  Block *getBlock(uint32_t id) const { return blockMap.lookup(id); }
-
-  // In SPIR-V, structured control flow is explicitly declared using merge
-  // instructions (OpSelectionMerge and OpLoopMerge). In the SPIR-V dialect,
-  // we use spv.selection and spv.loop to group structured control flow.
-  // The deserializer need to turn structured control flow marked with merge
-  // instructions into using spv.selection/spv.loop ops.
-  //
-  // Because structured control flow can nest and the basic block order have
-  // flexibility, we cannot isolate a structured selection/loop without
-  // deserializing all the blocks. So we use the following approach:
-  //
-  // 1. Deserialize all basic blocks in a function and create MLIR blocks for
-  //    them into the function's region. In the meanwhile, keep a map between
-  //    selection/loop header blocks to their corresponding merge (and continue)
-  //    target blocks.
-  // 2. For each selection/loop header block, recursively get all basic blocks
-  //    reachable (except the merge block) and put them in a newly created
-  //    spv.selection/spv.loop's region. Structured control flow guarantees
-  //    that we enter and exit in structured ways and the construct is nestable.
-  // 3. Put the new spv.selection/spv.loop op at the beginning of the old merge
-  //    block and redirect all branches to the old header block to the old
-  //    merge block (which contains the spv.selection/spv.loop op now).
-
-  /// For OpPhi instructions, we use block arguments to represent them. OpPhi
-  /// encodes a list of (value, predecessor) pairs. At the time of handling the
-  /// block containing an OpPhi instruction, the predecessor block might not be
-  /// processed yet, also the value sent by it. So we need to defer handling
-  /// the block argument from the predecessors. We use the following approach:
-  ///
-  /// 1. For each OpPhi instruction, add a block argument to the current block
-  ///    in construction. Record the block argument in `valueMap` so its uses
-  ///    can be resolved. For the list of (value, predecessor) pairs, update
-  ///    `blockPhiInfo` for bookkeeping.
-  /// 2. After processing all blocks, loop over `blockPhiInfo` to fix up each
-  ///    block recorded there to create the proper block arguments on their
-  ///    terminators.
-
-  /// A data structure for containing a SPIR-V block's phi info. It will be
-  /// represented as block argument in SPIR-V dialect.
-  using BlockPhiInfo =
-      SmallVector<uint32_t, 2>; // The result <id> of the values sent
-
-  /// Gets or creates the block corresponding to the given label <id>. The newly
-  /// created block will always be placed at the end of the current function.
-  Block *getOrCreateBlock(uint32_t id);
-
-  LogicalResult processBranch(ArrayRef<uint32_t> operands);
-
-  LogicalResult processBranchConditional(ArrayRef<uint32_t> operands);
-
-  /// Processes a SPIR-V OpLabel instruction with the given `operands`.
-  LogicalResult processLabel(ArrayRef<uint32_t> operands);
-
-  /// Processes a SPIR-V OpSelectionMerge instruction with the given `operands`.
-  LogicalResult processSelectionMerge(ArrayRef<uint32_t> operands);
-
-  /// Processes a SPIR-V OpLoopMerge instruction with the given `operands`.
-  LogicalResult processLoopMerge(ArrayRef<uint32_t> operands);
-
-  /// Processes a SPIR-V OpPhi instruction with the given `operands`.
-  LogicalResult processPhi(ArrayRef<uint32_t> operands);
-
-  /// Creates block arguments on predecessors previously recorded when handling
-  /// OpPhi instructions.
-  LogicalResult wireUpBlockArgument();
-
-  /// Extracts blocks belonging to a structured selection/loop into a
-  /// spv.selection/spv.loop op. This method iterates until all blocks
-  /// declared as selection/loop headers are handled.
-  LogicalResult structurizeControlFlow();
-
-  //===--------------------------------------------------------------------===//
-  // Instruction
-  //===--------------------------------------------------------------------===//
-
-  /// Get the Value associated with a result <id>.
-  ///
-  /// This method materializes normal constants and inserts "casting" ops
-  /// (`spv._address_of` and `spv._reference_of`) to turn an symbol into a SSA
-  /// value for handling uses of module scope constants/variables in functions.
-  Value *getValue(uint32_t id);
-
-  /// Slices the first instruction out of `binary` and returns its opcode and
-  /// operands via `opcode` and `operands` respectively. Returns failure if
-  /// there is no more remaining instructions (`expectedOpcode` will be used to
-  /// compose the error message) or the next instruction is malformed.
-  LogicalResult
-  sliceInstruction(spirv::Opcode &opcode, ArrayRef<uint32_t> &operands,
-                   Optional<spirv::Opcode> expectedOpcode = llvm::None);
-
-  /// Processes a SPIR-V instruction with the given `opcode` and `operands`.
-  /// This method is the main entrance for handling SPIR-V instruction; it
-  /// checks the instruction opcode and dispatches to the corresponding handler.
-  /// Processing of Some instructions (like OpEntryPoint and OpExecutionMode)
-  /// might need to be deferred, since they contain forward references to <id>s
-  /// in the deserialized binary, but module in SPIR-V dialect expects these to
-  /// be ssa-uses.
-  LogicalResult processInstruction(spirv::Opcode opcode,
-                                   ArrayRef<uint32_t> operands,
-                                   bool deferInstructions = true);
-
-  /// Processes a OpUndef instruction. Adds a spv.Undef operation at the current
-  /// insertion point.
-  LogicalResult processUndef(ArrayRef<uint32_t> operands);
-
-  /// Processes an OpBitcast instruction.
-  LogicalResult processBitcast(ArrayRef<uint32_t> words);
-
-  /// Method to dispatch to the specialized deserialization function for an
-  /// operation in SPIR-V dialect that is a mirror of an instruction in the
-  /// SPIR-V spec. This is auto-generated from ODS. Dispatch is handled for
-  /// all operations in SPIR-V dialect that have hasOpcode == 1.
-  LogicalResult dispatchToAutogenDeserialization(spirv::Opcode opcode,
-                                                 ArrayRef<uint32_t> words);
-
-  /// Processes a SPIR-V OpExtInst with given `operands`. This slices the
-  /// entries of `operands` that specify the extended instruction set <id> and
-  /// the instruction opcode. The op deserializer is then invoked using the
-  /// other entries.
-  LogicalResult processExtInst(ArrayRef<uint32_t> operands);
-
-  /// Dispatches the deserialization of extended instruction set operation based
-  /// on the extended instruction set name, and instruction opcode. This is
-  /// autogenerated from ODS.
-  LogicalResult
-  dispatchToExtensionSetAutogenDeserialization(StringRef extensionSetName,
-                                               uint32_t instructionID,
-                                               ArrayRef<uint32_t> words);
-
-  /// Method to deserialize an operation in the SPIR-V dialect that is a mirror
-  /// of an instruction in the SPIR-V spec. This is auto generated if hasOpcode
-  /// == 1 and autogenSerialization == 1 in ODS.
-  template <typename OpTy> LogicalResult processOp(ArrayRef<uint32_t> words) {
-    return emitError(unknownLoc, "unsupported deserialization for ")
-           << OpTy::getOperationName() << " op";
-  }
-
-private:
-  /// The SPIR-V binary module.
-  ArrayRef<uint32_t> binary;
-
-  /// The current word offset into the binary module.
-  unsigned curOffset = 0;
-
-  /// MLIRContext to create SPIR-V ModuleOp into.
-  MLIRContext *context;
-
-  // TODO(antiagainst): create Location subclass for binary blob
-  Location unknownLoc;
-
-  /// The SPIR-V ModuleOp.
-  Optional<spirv::ModuleOp> module;
-
-  /// The current function under construction.
-  Optional<FuncOp> curFunction;
-
-  /// The current block under construction.
-  Block *curBlock = nullptr;
-
-  OpBuilder opBuilder;
-
-  /// The list of capabilities used by the module.
-  llvm::SmallSetVector<spirv::Capability, 4> capabilities;
-
-  /// The list of extensions used by the module.
-  llvm::SmallSetVector<StringRef, 2> extensions;
-
-  // Result <id> to type mapping.
-  DenseMap<uint32_t, Type> typeMap;
-
-  // Result <id> to constant attribute and type mapping.
-  ///
-  /// In the SPIR-V binary format, all constants are placed in the module and
-  /// shared by instructions at module level and in subsequent functions. But in
-  /// the SPIR-V dialect, we materialize the constant to where it's used in the
-  /// function. So when seeing a constant instruction in the binary format, we
-  /// don't immediately emit a constant op into the module, we keep its value
-  /// (and type) here. Later when it's used, we materialize the constant.
-  DenseMap<uint32_t, std::pair<Attribute, Type>> constantMap;
-
-  // Result <id> to variable mapping.
-  DenseMap<uint32_t, spirv::SpecConstantOp> specConstMap;
-
-  // Result <id> to variable mapping.
-  DenseMap<uint32_t, spirv::GlobalVariableOp> globalVariableMap;
-
-  // Result <id> to function mapping.
-  DenseMap<uint32_t, FuncOp> funcMap;
-
-  // Result <id> to block mapping.
-  DenseMap<uint32_t, Block *> blockMap;
-
-  // Header block to its merge (and continue) target mapping.
-  BlockMergeInfoMap blockMergeInfo;
-
-  // Block to its phi (block argument) mapping.
-  DenseMap<Block *, BlockPhiInfo> blockPhiInfo;
-
-  // Result <id> to value mapping.
-  DenseMap<uint32_t, Value *> valueMap;
-
-  // Mapping from result <id> to undef value of a type.
-  DenseMap<uint32_t, Type> undefMap;
-
-  // Result <id> to name mapping.
-  DenseMap<uint32_t, StringRef> nameMap;
-
-  // Result <id> to decorations mapping.
-  DenseMap<uint32_t, NamedAttributeList> decorations;
-
-  // Result <id> to type decorations.
-  DenseMap<uint32_t, uint32_t> typeDecorations;
-
-  // Result <id> to member decorations.
-  // decorated-struct-type-<id> ->
-  //    (struct-member-index -> (decoration -> decoration-operands))
-  DenseMap<uint32_t,
-           DenseMap<uint32_t, DenseMap<spirv::Decoration, ArrayRef<uint32_t>>>>
-      memberDecorationMap;
-
-  // Result <id> to member name.
-  // struct-type-<id> -> (struct-member-index -> name)
-  DenseMap<uint32_t, DenseMap<uint32_t, StringRef>> memberNameMap;
-
-  // Result <id> to extended instruction set name.
-  DenseMap<uint32_t, StringRef> extendedInstSets;
-
-  // List of instructions that are processed in a deferred fashion (after an
-  // initial processing of the entire binary). Some operations like
-  // OpEntryPoint, and OpExecutionMode use forward references to function
-  // <id>s. In SPIR-V dialect the corresponding operations (spv.EntryPoint and
-  // spv.ExecutionMode) need these references resolved. So these instructions
-  // are deserialized and stored for processing once the entire binary is
-  // processed.
-  SmallVector<std::pair<spirv::Opcode, ArrayRef<uint32_t>>, 4>
-      deferredInstructions;
-};
-} // namespace
-
-Deserializer::Deserializer(ArrayRef<uint32_t> binary, MLIRContext *context)
-    : binary(binary), context(context), unknownLoc(UnknownLoc::get(context)),
-      module(createModuleOp()), opBuilder(module->body()) {}
-
-LogicalResult Deserializer::deserialize() {
-  LLVM_DEBUG(llvm::dbgs() << "+++ starting deserialization +++\n");
-
-  if (failed(processHeader()))
-    return failure();
-
-  spirv::Opcode opcode = spirv::Opcode::OpNop;
-  ArrayRef<uint32_t> operands;
-  auto binarySize = binary.size();
-  while (curOffset < binarySize) {
-    // Slice the next instruction out and populate `opcode` and `operands`.
-    // Internally this also updates `curOffset`.
-    if (failed(sliceInstruction(opcode, operands)))
-      return failure();
-
-    if (failed(processInstruction(opcode, operands)))
-      return failure();
-  }
-
-  assert(curOffset == binarySize &&
-         "deserializer should never index beyond the binary end");
-
-  for (auto &deferred : deferredInstructions) {
-    if (failed(processInstruction(deferred.first, deferred.second, false))) {
-      return failure();
-    }
-  }
-
-  // Attaches the capabilities/extensions as an attribute to the module.
-  attachCapabilities();
-  attachExtensions();
-
-  LLVM_DEBUG(llvm::dbgs() << "+++ completed deserialization +++\n");
-  return success();
-}
-
-Optional<spirv::ModuleOp> Deserializer::collect() { return module; }
-
-//===----------------------------------------------------------------------===//
-// Module structure
-//===----------------------------------------------------------------------===//
-
-spirv::ModuleOp Deserializer::createModuleOp() {
-  Builder builder(context);
-  OperationState state(unknownLoc, spirv::ModuleOp::getOperationName());
-  // TODO(antiagainst): use target environment to select the version
-  state.addAttribute("major_version", builder.getI32IntegerAttr(1));
-  state.addAttribute("minor_version", builder.getI32IntegerAttr(0));
-  spirv::ModuleOp::build(&builder, state);
-  return cast<spirv::ModuleOp>(Operation::create(state));
-}
-
-LogicalResult Deserializer::processHeader() {
-  if (binary.size() < spirv::kHeaderWordCount)
-    return emitError(unknownLoc,
-                     "SPIR-V binary module must have a 5-word header");
-
-  if (binary[0] != spirv::kMagicNumber)
-    return emitError(unknownLoc, "incorrect magic number");
-
-  // TODO(antiagainst): generator number, bound, schema
-  curOffset = spirv::kHeaderWordCount;
-  return success();
-}
-
-LogicalResult Deserializer::processCapability(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 1)
-    return emitError(unknownLoc, "OpMemoryModel must have one parameter");
-
-  auto cap = spirv::symbolizeCapability(operands[0]);
-  if (!cap)
-    return emitError(unknownLoc, "unknown capability: ") << operands[0];
-
-  capabilities.insert(*cap);
-  return success();
-}
-
-void Deserializer::attachCapabilities() {
-  if (capabilities.empty())
-    return;
-
-  SmallVector<StringRef, 2> caps;
-  caps.reserve(capabilities.size());
-
-  for (auto cap : capabilities) {
-    caps.push_back(spirv::stringifyCapability(cap));
-  }
-
-  module->setAttr("capabilities", opBuilder.getStrArrayAttr(caps));
-}
-
-LogicalResult Deserializer::processExtension(ArrayRef<uint32_t> words) {
-  if (words.empty()) {
-    return emitError(
-        unknownLoc,
-        "OpExtension must have a literal string for the extension name");
-  }
-
-  unsigned wordIndex = 0;
-  StringRef extName = decodeStringLiteral(words, wordIndex);
-  if (wordIndex != words.size()) {
-    return emitError(unknownLoc,
-                     "unexpected trailing words in OpExtension instruction");
-  }
-
-  extensions.insert(extName);
-  return success();
-}
-
-LogicalResult Deserializer::processExtInstImport(ArrayRef<uint32_t> words) {
-  if (words.size() < 2) {
-    return emitError(unknownLoc,
-                     "OpExtInstImport must have a result <id> and a literal "
-                     "string for the extended instruction set name");
-  }
-
-  unsigned wordIndex = 1;
-  extendedInstSets[words[0]] = decodeStringLiteral(words, wordIndex);
-  if (wordIndex != words.size()) {
-    return emitError(unknownLoc,
-                     "unexpected trailing words in OpExtInstImport");
-  }
-  return success();
-}
-
-void Deserializer::attachExtensions() {
-  if (extensions.empty())
-    return;
-
-  module->setAttr("extensions",
-                  opBuilder.getStrArrayAttr(extensions.getArrayRef()));
-}
-
-LogicalResult Deserializer::processMemoryModel(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 2)
-    return emitError(unknownLoc, "OpMemoryModel must have two operands");
-
-  module->setAttr(
-      "addressing_model",
-      opBuilder.getI32IntegerAttr(llvm::bit_cast<int32_t>(operands.front())));
-  module->setAttr(
-      "memory_model",
-      opBuilder.getI32IntegerAttr(llvm::bit_cast<int32_t>(operands.back())));
-
-  return success();
-}
-
-LogicalResult Deserializer::processDecoration(ArrayRef<uint32_t> words) {
-  // TODO : This function should also be auto-generated. For now, since only a
-  // few decorations are processed/handled in a meaningful manner, going with a
-  // manual implementation.
-  if (words.size() < 2) {
-    return emitError(
-        unknownLoc, "OpDecorate must have at least result <id> and Decoration");
-  }
-  auto decorationName =
-      stringifyDecoration(static_cast<spirv::Decoration>(words[1]));
-  if (decorationName.empty()) {
-    return emitError(unknownLoc, "invalid Decoration code : ") << words[1];
-  }
-  auto attrName = convertToSnakeCase(decorationName);
-  auto symbol = opBuilder.getIdentifier(attrName);
-  switch (static_cast<spirv::Decoration>(words[1])) {
-  case spirv::Decoration::DescriptorSet:
-  case spirv::Decoration::Binding:
-    if (words.size() != 3) {
-      return emitError(unknownLoc, "OpDecorate with ")
-             << decorationName << " needs a single integer literal";
-    }
-    decorations[words[0]].set(
-        symbol, opBuilder.getI32IntegerAttr(static_cast<int32_t>(words[2])));
-    break;
-  case spirv::Decoration::BuiltIn:
-    if (words.size() != 3) {
-      return emitError(unknownLoc, "OpDecorate with ")
-             << decorationName << " needs a single integer literal";
-    }
-    decorations[words[0]].set(
-        symbol, opBuilder.getStringAttr(
-                    stringifyBuiltIn(static_cast<spirv::BuiltIn>(words[2]))));
-    break;
-  case spirv::Decoration::ArrayStride:
-    if (words.size() != 3) {
-      return emitError(unknownLoc, "OpDecorate with ")
-             << decorationName << " needs a single integer literal";
-    }
-    typeDecorations[words[0]] = words[2];
-    break;
-  case spirv::Decoration::Block:
-  case spirv::Decoration::BufferBlock:
-    if (words.size() != 2) {
-      return emitError(unknownLoc, "OpDecoration with ")
-             << decorationName << "needs a single target <id>";
-    }
-    // Block decoration does not affect spv.struct type, but is still stored for
-    // verification.
-    // TODO: Update StructType to contain this information since
-    // it is needed for many validation rules.
-    decorations[words[0]].set(symbol, opBuilder.getUnitAttr());
-    break;
-  case spirv::Decoration::SpecId:
-    if (words.size() != 3) {
-      return emitError(unknownLoc, "OpDecoration with ")
-             << decorationName << "needs a single integer literal";
-    }
-    decorations[words[0]].set(
-        symbol, opBuilder.getI32IntegerAttr(static_cast<int32_t>(words[2])));
-    break;
-  default:
-    return emitError(unknownLoc, "unhandled Decoration : '") << decorationName;
-  }
-  return success();
-}
-
-LogicalResult Deserializer::processMemberDecoration(ArrayRef<uint32_t> words) {
-  // The binary layout of OpMemberDecorate is different comparing to OpDecorate
-  if (words.size() < 3) {
-    return emitError(unknownLoc,
-                     "OpMemberDecorate must have at least 3 operands");
-  }
-
-  auto decoration = static_cast<spirv::Decoration>(words[2]);
-  if (decoration == spirv::Decoration::Offset && words.size() != 4) {
-    return emitError(unknownLoc,
-                     " missing offset specification in OpMemberDecorate with "
-                     "Offset decoration");
-  }
-  ArrayRef<uint32_t> decorationOperands;
-  if (words.size() > 3) {
-    decorationOperands = words.slice(3);
-  }
-  memberDecorationMap[words[0]][words[1]][decoration] = decorationOperands;
-  return success();
-}
-
-LogicalResult Deserializer::processMemberName(ArrayRef<uint32_t> words) {
-  if (words.size() < 3) {
-    return emitError(unknownLoc, "OpMemberName must have at least 3 operands");
-  }
-  unsigned wordIndex = 2;
-  auto name = decodeStringLiteral(words, wordIndex);
-  if (wordIndex != words.size()) {
-    return emitError(unknownLoc,
-                     "unexpected trailing words in OpMemberName instruction");
-  }
-  memberNameMap[words[0]][words[1]] = name;
-  return success();
-}
-
-LogicalResult Deserializer::processFunction(ArrayRef<uint32_t> operands) {
-  if (curFunction) {
-    return emitError(unknownLoc, "found function inside function");
-  }
-
-  // Get the result type
-  if (operands.size() != 4) {
-    return emitError(unknownLoc, "OpFunction must have 4 parameters");
-  }
-  Type resultType = getType(operands[0]);
-  if (!resultType) {
-    return emitError(unknownLoc, "undefined result type from <id> ")
-           << operands[0];
-  }
-
-  if (funcMap.count(operands[1])) {
-    return emitError(unknownLoc, "duplicate function definition/declaration");
-  }
-
-  auto functionControl = spirv::symbolizeFunctionControl(operands[2]);
-  if (!functionControl) {
-    return emitError(unknownLoc, "unknown Function Control: ") << operands[2];
-  }
-  if (functionControl.getValue() != spirv::FunctionControl::None) {
-    /// TODO : Handle different function controls
-    return emitError(unknownLoc, "unhandled Function Control: '")
-           << spirv::stringifyFunctionControl(functionControl.getValue())
-           << "'";
-  }
-
-  Type fnType = getType(operands[3]);
-  if (!fnType || !fnType.isa<FunctionType>()) {
-    return emitError(unknownLoc, "unknown function type from <id> ")
-           << operands[3];
-  }
-  auto functionType = fnType.cast<FunctionType>();
-
-  if ((isVoidType(resultType) && functionType.getNumResults() != 0) ||
-      (functionType.getNumResults() == 1 &&
-       functionType.getResult(0) != resultType)) {
-    return emitError(unknownLoc, "mismatch in function type ")
-           << functionType << " and return type " << resultType << " specified";
-  }
-
-  std::string fnName = getFunctionSymbol(operands[1]);
-  auto funcOp = opBuilder.create<FuncOp>(unknownLoc, fnName, functionType,
-                                         ArrayRef<NamedAttribute>());
-  curFunction = funcMap[operands[1]] = funcOp;
-  LLVM_DEBUG(llvm::dbgs() << "-- start function " << fnName << " (type = "
-                          << fnType << ", id = " << operands[1] << ") --\n");
-  auto *entryBlock = funcOp.addEntryBlock();
-  LLVM_DEBUG(llvm::dbgs() << "[block] created entry block " << entryBlock
-                          << "\n");
-
-  // Parse the op argument instructions
-  if (functionType.getNumInputs()) {
-    for (size_t i = 0, e = functionType.getNumInputs(); i != e; ++i) {
-      auto argType = functionType.getInput(i);
-      spirv::Opcode opcode = spirv::Opcode::OpNop;
-      ArrayRef<uint32_t> operands;
-      if (failed(sliceInstruction(opcode, operands,
-                                  spirv::Opcode::OpFunctionParameter))) {
-        return failure();
-      }
-      if (opcode != spirv::Opcode::OpFunctionParameter) {
-        return emitError(
-                   unknownLoc,
-                   "missing OpFunctionParameter instruction for argument ")
-               << i;
-      }
-      if (operands.size() != 2) {
-        return emitError(
-            unknownLoc,
-            "expected result type and result <id> for OpFunctionParameter");
-      }
-      auto argDefinedType = getType(operands[0]);
-      if (!argDefinedType || argDefinedType != argType) {
-        return emitError(unknownLoc,
-                         "mismatch in argument type between function type "
-                         "definition ")
-               << functionType << " and argument type definition "
-               << argDefinedType << " at argument " << i;
-      }
-      if (getValue(operands[1])) {
-        return emitError(unknownLoc, "duplicate definition of result <id> '")
-               << operands[1];
-      }
-      auto argValue = funcOp.getArgument(i);
-      valueMap[operands[1]] = argValue;
-    }
-  }
-
-  // RAII guard to reset the insertion point to the module's region after
-  // deserializing the body of this function.
-  OpBuilder::InsertionGuard moduleInsertionGuard(opBuilder);
-
-  spirv::Opcode opcode = spirv::Opcode::OpNop;
-  ArrayRef<uint32_t> instOperands;
-
-  // Special handling for the entry block. We need to make sure it starts with
-  // an OpLabel instruction. The entry block takes the same parameters as the
-  // function. All other blocks do not take any parameter. We have already
-  // created the entry block, here we need to register it to the correct label
-  // <id>.
-  if (failed(sliceInstruction(opcode, instOperands,
-                              spirv::Opcode::OpFunctionEnd))) {
-    return failure();
-  }
-  if (opcode == spirv::Opcode::OpFunctionEnd) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "-- completed function '" << fnName << "' (type = " << fnType
-               << ", id = " << operands[1] << ") --\n");
-    return processFunctionEnd(instOperands);
-  }
-  if (opcode != spirv::Opcode::OpLabel) {
-    return emitError(unknownLoc, "a basic block must start with OpLabel");
-  }
-  if (instOperands.size() != 1) {
-    return emitError(unknownLoc, "OpLabel should only have result <id>");
-  }
-  blockMap[instOperands[0]] = entryBlock;
-  if (failed(processLabel(instOperands))) {
-    return failure();
-  }
-
-  // Then process all the other instructions in the function until we hit
-  // OpFunctionEnd.
-  while (succeeded(sliceInstruction(opcode, instOperands,
-                                    spirv::Opcode::OpFunctionEnd)) &&
-         opcode != spirv::Opcode::OpFunctionEnd) {
-    if (failed(processInstruction(opcode, instOperands))) {
-      return failure();
-    }
-  }
-  if (opcode != spirv::Opcode::OpFunctionEnd) {
-    return failure();
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "-- completed function '" << fnName << "' (type = "
-                          << fnType << ", id = " << operands[1] << ") --\n");
-  return processFunctionEnd(instOperands);
-}
-
-LogicalResult Deserializer::processFunctionEnd(ArrayRef<uint32_t> operands) {
-  // Process OpFunctionEnd.
-  if (!operands.empty()) {
-    return emitError(unknownLoc, "unexpected operands for OpFunctionEnd");
-  }
-
-  // Wire up block arguments from OpPhi instructions.
-  // Put all structured control flow in spv.selection/spv.loop ops.
-  if (failed(wireUpBlockArgument()) || failed(structurizeControlFlow())) {
-    return failure();
-  }
-
-  curBlock = nullptr;
-  curFunction = llvm::None;
-
-  return success();
-}
-
-Optional<std::pair<Attribute, Type>> Deserializer::getConstant(uint32_t id) {
-  auto constIt = constantMap.find(id);
-  if (constIt == constantMap.end())
-    return llvm::None;
-  return constIt->getSecond();
-}
-
-std::string Deserializer::getFunctionSymbol(uint32_t id) {
-  auto funcName = nameMap.lookup(id).str();
-  if (funcName.empty()) {
-    funcName = "spirv_fn_" + std::to_string(id);
-  }
-  return funcName;
-}
-
-std::string Deserializer::getSpecConstantSymbol(uint32_t id) {
-  auto constName = nameMap.lookup(id).str();
-  if (constName.empty()) {
-    constName = "spirv_spec_const_" + std::to_string(id);
-  }
-  return constName;
-}
-
-spirv::SpecConstantOp Deserializer::createSpecConstant(Location loc,
-                                                       uint32_t resultID,
-                                                       Attribute defaultValue) {
-  auto symName = opBuilder.getStringAttr(getSpecConstantSymbol(resultID));
-  auto op = opBuilder.create<spirv::SpecConstantOp>(unknownLoc, symName,
-                                                    defaultValue);
-  if (decorations.count(resultID)) {
-    for (auto attr : decorations[resultID].getAttrs())
-      op.setAttr(attr.first, attr.second);
-  }
-  specConstMap[resultID] = op;
-  return op;
-}
-
-LogicalResult Deserializer::processGlobalVariable(ArrayRef<uint32_t> operands) {
-  unsigned wordIndex = 0;
-  if (operands.size() < 3) {
-    return emitError(
-        unknownLoc,
-        "OpVariable needs at least 3 operands, type, <id> and storage class");
-  }
-
-  // Result Type.
-  auto type = getType(operands[wordIndex]);
-  if (!type) {
-    return emitError(unknownLoc, "unknown result type <id> : ")
-           << operands[wordIndex];
-  }
-  auto ptrType = type.dyn_cast<spirv::PointerType>();
-  if (!ptrType) {
-    return emitError(unknownLoc,
-                     "expected a result type <id> to be a spv.ptr, found : ")
-           << type;
-  }
-  wordIndex++;
-
-  // Result <id>.
-  auto variableID = operands[wordIndex];
-  auto variableName = nameMap.lookup(variableID).str();
-  if (variableName.empty()) {
-    variableName = "spirv_var_" + std::to_string(variableID);
-  }
-  wordIndex++;
-
-  // Storage class.
-  auto storageClass = static_cast<spirv::StorageClass>(operands[wordIndex]);
-  if (ptrType.getStorageClass() != storageClass) {
-    return emitError(unknownLoc, "mismatch in storage class of pointer type ")
-           << type << " and that specified in OpVariable instruction  : "
-           << stringifyStorageClass(storageClass);
-  }
-  wordIndex++;
-
-  // Initializer.
-  FlatSymbolRefAttr initializer = nullptr;
-  if (wordIndex < operands.size()) {
-    auto initializerOp = getGlobalVariable(operands[wordIndex]);
-    if (!initializerOp) {
-      return emitError(unknownLoc, "unknown <id> ")
-             << operands[wordIndex] << "used as initializer";
-    }
-    wordIndex++;
-    initializer = opBuilder.getSymbolRefAttr(initializerOp.getOperation());
-  }
-  if (wordIndex != operands.size()) {
-    return emitError(unknownLoc,
-                     "found more operands than expected when deserializing "
-                     "OpVariable instruction, only ")
-           << wordIndex << " of " << operands.size() << " processed";
-  }
-  auto varOp = opBuilder.create<spirv::GlobalVariableOp>(
-      unknownLoc, TypeAttr::get(type), opBuilder.getStringAttr(variableName),
-      initializer);
-
-  // Decorations.
-  if (decorations.count(variableID)) {
-    for (auto attr : decorations[variableID].getAttrs()) {
-      varOp.setAttr(attr.first, attr.second);
-    }
-  }
-  globalVariableMap[variableID] = varOp;
-  return success();
-}
-
-IntegerAttr Deserializer::getConstantInt(uint32_t id) {
-  auto constInfo = getConstant(id);
-  if (!constInfo) {
-    return nullptr;
-  }
-  return constInfo->first.dyn_cast<IntegerAttr>();
-}
-
-LogicalResult Deserializer::processName(ArrayRef<uint32_t> operands) {
-  if (operands.size() < 2) {
-    return emitError(unknownLoc, "OpName needs at least 2 operands");
-  }
-  if (!nameMap.lookup(operands[0]).empty()) {
-    return emitError(unknownLoc, "duplicate name found for result <id> ")
-           << operands[0];
-  }
-  unsigned wordIndex = 1;
-  StringRef name = decodeStringLiteral(operands, wordIndex);
-  if (wordIndex != operands.size()) {
-    return emitError(unknownLoc,
-                     "unexpected trailing words in OpName instruction");
-  }
-  nameMap[operands[0]] = name;
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Type
-//===----------------------------------------------------------------------===//
-
-LogicalResult Deserializer::processType(spirv::Opcode opcode,
-                                        ArrayRef<uint32_t> operands) {
-  if (operands.empty()) {
-    return emitError(unknownLoc, "type instruction with opcode ")
-           << spirv::stringifyOpcode(opcode) << " needs at least one <id>";
-  }
-
-  /// TODO: Types might be forward declared in some instructions and need to be
-  /// handled appropriately.
-  if (typeMap.count(operands[0])) {
-    return emitError(unknownLoc, "duplicate definition for result <id> ")
-           << operands[0];
-  }
-
-  switch (opcode) {
-  case spirv::Opcode::OpTypeVoid:
-    if (operands.size() != 1) {
-      return emitError(unknownLoc, "OpTypeVoid must have no parameters");
-    }
-    typeMap[operands[0]] = opBuilder.getNoneType();
-    break;
-  case spirv::Opcode::OpTypeBool:
-    if (operands.size() != 1) {
-      return emitError(unknownLoc, "OpTypeBool must have no parameters");
-    }
-    typeMap[operands[0]] = opBuilder.getI1Type();
-    break;
-  case spirv::Opcode::OpTypeInt:
-    if (operands.size() != 3) {
-      return emitError(
-          unknownLoc, "OpTypeInt must have bitwidth and signedness parameters");
-    }
-    // TODO: Ignoring the signedness right now. Need to handle this effectively
-    // in the MLIR representation.
-    typeMap[operands[0]] = opBuilder.getIntegerType(operands[1]);
-    break;
-  case spirv::Opcode::OpTypeFloat: {
-    if (operands.size() != 2) {
-      return emitError(unknownLoc, "OpTypeFloat must have bitwidth parameter");
-    }
-    Type floatTy;
-    switch (operands[1]) {
-    case 16:
-      floatTy = opBuilder.getF16Type();
-      break;
-    case 32:
-      floatTy = opBuilder.getF32Type();
-      break;
-    case 64:
-      floatTy = opBuilder.getF64Type();
-      break;
-    default:
-      return emitError(unknownLoc, "unsupported OpTypeFloat bitwidth: ")
-             << operands[1];
-    }
-    typeMap[operands[0]] = floatTy;
-  } break;
-  case spirv::Opcode::OpTypeVector: {
-    if (operands.size() != 3) {
-      return emitError(
-          unknownLoc,
-          "OpTypeVector must have element type and count parameters");
-    }
-    Type elementTy = getType(operands[1]);
-    if (!elementTy) {
-      return emitError(unknownLoc, "OpTypeVector references undefined <id> ")
-             << operands[1];
-    }
-    typeMap[operands[0]] = VectorType::get({operands[2]}, elementTy);
-  } break;
-  case spirv::Opcode::OpTypePointer: {
-    if (operands.size() != 3) {
-      return emitError(unknownLoc, "OpTypePointer must have two parameters");
-    }
-    auto pointeeType = getType(operands[2]);
-    if (!pointeeType) {
-      return emitError(unknownLoc, "unknown OpTypePointer pointee type <id> ")
-             << operands[2];
-    }
-    auto storageClass = static_cast<spirv::StorageClass>(operands[1]);
-    typeMap[operands[0]] = spirv::PointerType::get(pointeeType, storageClass);
-  } break;
-  case spirv::Opcode::OpTypeArray:
-    return processArrayType(operands);
-  case spirv::Opcode::OpTypeFunction:
-    return processFunctionType(operands);
-  case spirv::Opcode::OpTypeRuntimeArray:
-    return processRuntimeArrayType(operands);
-  case spirv::Opcode::OpTypeStruct:
-    return processStructType(operands);
-  default:
-    return emitError(unknownLoc, "unhandled type instruction");
-  }
-  return success();
-}
-
-LogicalResult Deserializer::processArrayType(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 3) {
-    return emitError(unknownLoc,
-                     "OpTypeArray must have element type and count parameters");
-  }
-
-  Type elementTy = getType(operands[1]);
-  if (!elementTy) {
-    return emitError(unknownLoc, "OpTypeArray references undefined <id> ")
-           << operands[1];
-  }
-
-  unsigned count = 0;
-  // TODO(antiagainst): The count can also come frome a specialization constant.
-  auto countInfo = getConstant(operands[2]);
-  if (!countInfo) {
-    return emitError(unknownLoc, "OpTypeArray count <id> ")
-           << operands[2] << "can only come from normal constant right now";
-  }
-
-  if (auto intVal = countInfo->first.dyn_cast<IntegerAttr>()) {
-    count = intVal.getInt();
-  } else {
-    return emitError(unknownLoc, "OpTypeArray count must come from a "
-                                 "scalar integer constant instruction");
-  }
-
-  typeMap[operands[0]] = spirv::ArrayType::get(
-      elementTy, count, typeDecorations.lookup(operands[0]));
-  return success();
-}
-
-LogicalResult Deserializer::processFunctionType(ArrayRef<uint32_t> operands) {
-  assert(!operands.empty() && "No operands for processing function type");
-  if (operands.size() == 1) {
-    return emitError(unknownLoc, "missing return type for OpTypeFunction");
-  }
-  auto returnType = getType(operands[1]);
-  if (!returnType) {
-    return emitError(unknownLoc, "unknown return type in OpTypeFunction");
-  }
-  SmallVector<Type, 1> argTypes;
-  for (size_t i = 2, e = operands.size(); i < e; ++i) {
-    auto ty = getType(operands[i]);
-    if (!ty) {
-      return emitError(unknownLoc, "unknown argument type in OpTypeFunction");
-    }
-    argTypes.push_back(ty);
-  }
-  ArrayRef<Type> returnTypes;
-  if (!isVoidType(returnType)) {
-    returnTypes = llvm::makeArrayRef(returnType);
-  }
-  typeMap[operands[0]] = FunctionType::get(argTypes, returnTypes, context);
-  return success();
-}
-
-LogicalResult
-Deserializer::processRuntimeArrayType(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 2) {
-    return emitError(unknownLoc, "OpTypeRuntimeArray must have two operands");
-  }
-  Type memberType = getType(operands[1]);
-  if (!memberType) {
-    return emitError(unknownLoc,
-                     "OpTypeRuntimeArray references undefined <id> ")
-           << operands[1];
-  }
-  typeMap[operands[0]] = spirv::RuntimeArrayType::get(memberType);
-  return success();
-}
-
-LogicalResult Deserializer::processStructType(ArrayRef<uint32_t> operands) {
-  if (operands.empty()) {
-    return emitError(unknownLoc, "OpTypeStruct must have at least result <id>");
-  }
-  if (operands.size() == 1) {
-    // Handle empty struct.
-    typeMap[operands[0]] = spirv::StructType::getEmpty(context);
-    return success();
-  }
-
-  SmallVector<Type, 0> memberTypes;
-  for (auto op : llvm::drop_begin(operands, 1)) {
-    Type memberType = getType(op);
-    if (!memberType) {
-      return emitError(unknownLoc, "OpTypeStruct references undefined <id> ")
-             << op;
-    }
-    memberTypes.push_back(memberType);
-  }
-
-  SmallVector<spirv::StructType::LayoutInfo, 0> layoutInfo;
-  SmallVector<spirv::StructType::MemberDecorationInfo, 0> memberDecorationsInfo;
-  if (memberDecorationMap.count(operands[0])) {
-    auto &allMemberDecorations = memberDecorationMap[operands[0]];
-    for (auto memberIndex : llvm::seq<uint32_t>(0, memberTypes.size())) {
-      if (allMemberDecorations.count(memberIndex)) {
-        for (auto &memberDecoration : allMemberDecorations[memberIndex]) {
-          // Check for offset.
-          if (memberDecoration.first == spirv::Decoration::Offset) {
-            // If layoutInfo is empty, resize to the number of members;
-            if (layoutInfo.empty()) {
-              layoutInfo.resize(memberTypes.size());
-            }
-            layoutInfo[memberIndex] = memberDecoration.second[0];
-          } else {
-            if (!memberDecoration.second.empty()) {
-              return emitError(unknownLoc,
-                               "unhandled OpMemberDecoration with decoration ")
-                     << stringifyDecoration(memberDecoration.first)
-                     << " which has additional operands";
-            }
-            memberDecorationsInfo.emplace_back(memberIndex,
-                                               memberDecoration.first);
-          }
-        }
-      }
-    }
-  }
-  typeMap[operands[0]] =
-      spirv::StructType::get(memberTypes, layoutInfo, memberDecorationsInfo);
-  // TODO(ravishankarm): Update StructType to have member name as attribute as
-  // well.
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Constant
-//===----------------------------------------------------------------------===//
-
-LogicalResult Deserializer::processConstant(ArrayRef<uint32_t> operands,
-                                            bool isSpec) {
-  StringRef opname = isSpec ? "OpSpecConstant" : "OpConstant";
-
-  if (operands.size() < 2) {
-    return emitError(unknownLoc)
-           << opname << " must have type <id> and result <id>";
-  }
-  if (operands.size() < 3) {
-    return emitError(unknownLoc)
-           << opname << " must have at least 1 more parameter";
-  }
-
-  Type resultType = getType(operands[0]);
-  if (!resultType) {
-    return emitError(unknownLoc, "undefined result type from <id> ")
-           << operands[0];
-  }
-
-  auto checkOperandSizeForBitwidth = [&](unsigned bitwidth) -> LogicalResult {
-    if (bitwidth == 64) {
-      if (operands.size() == 4) {
-        return success();
-      }
-      return emitError(unknownLoc)
-             << opname << " should have 2 parameters for 64-bit values";
-    }
-    if (bitwidth <= 32) {
-      if (operands.size() == 3) {
-        return success();
-      }
-
-      return emitError(unknownLoc)
-             << opname
-             << " should have 1 parameter for values with no more than 32 bits";
-    }
-    return emitError(unknownLoc, "unsupported OpConstant bitwidth: ")
-           << bitwidth;
-  };
-
-  auto resultID = operands[1];
-
-  if (auto intType = resultType.dyn_cast<IntegerType>()) {
-    auto bitwidth = intType.getWidth();
-    if (failed(checkOperandSizeForBitwidth(bitwidth))) {
-      return failure();
-    }
-
-    APInt value;
-    if (bitwidth == 64) {
-      // 64-bit integers are represented with two SPIR-V words. According to
-      // SPIR-V spec: "When the type’s bit width is larger than one word, the
-      // literal’s low-order words appear first."
-      struct DoubleWord {
-        uint32_t word1;
-        uint32_t word2;
-      } words = {operands[2], operands[3]};
-      value = APInt(64, llvm::bit_cast<uint64_t>(words), /*isSigned=*/true);
-    } else if (bitwidth <= 32) {
-      value = APInt(bitwidth, operands[2], /*isSigned=*/true);
-    }
-
-    auto attr = opBuilder.getIntegerAttr(intType, value);
-
-    if (isSpec) {
-      createSpecConstant(unknownLoc, resultID, attr);
-    } else {
-      // For normal constants, we just record the attribute (and its type) for
-      // later materialization at use sites.
-      constantMap.try_emplace(resultID, attr, intType);
-    }
-
-    return success();
-  }
-
-  if (auto floatType = resultType.dyn_cast<FloatType>()) {
-    auto bitwidth = floatType.getWidth();
-    if (failed(checkOperandSizeForBitwidth(bitwidth))) {
-      return failure();
-    }
-
-    APFloat value(0.f);
-    if (floatType.isF64()) {
-      // Double values are represented with two SPIR-V words. According to
-      // SPIR-V spec: "When the type’s bit width is larger than one word, the
-      // literal’s low-order words appear first."
-      struct DoubleWord {
-        uint32_t word1;
-        uint32_t word2;
-      } words = {operands[2], operands[3]};
-      value = APFloat(llvm::bit_cast<double>(words));
-    } else if (floatType.isF32()) {
-      value = APFloat(llvm::bit_cast<float>(operands[2]));
-    } else if (floatType.isF16()) {
-      APInt data(16, operands[2]);
-      value = APFloat(APFloat::IEEEhalf(), data);
-    }
-
-    auto attr = opBuilder.getFloatAttr(floatType, value);
-    if (isSpec) {
-      createSpecConstant(unknownLoc, resultID, attr);
-    } else {
-      // For normal constants, we just record the attribute (and its type) for
-      // later materialization at use sites.
-      constantMap.try_emplace(resultID, attr, floatType);
-    }
-
-    return success();
-  }
-
-  return emitError(unknownLoc, "OpConstant can only generate values of "
-                               "scalar integer or floating-point type");
-}
-
-LogicalResult Deserializer::processConstantBool(bool isTrue,
-                                                ArrayRef<uint32_t> operands,
-                                                bool isSpec) {
-  if (operands.size() != 2) {
-    return emitError(unknownLoc, "Op")
-           << (isSpec ? "Spec" : "") << "Constant"
-           << (isTrue ? "True" : "False")
-           << " must have type <id> and result <id>";
-  }
-
-  auto attr = opBuilder.getBoolAttr(isTrue);
-  auto resultID = operands[1];
-  if (isSpec) {
-    createSpecConstant(unknownLoc, resultID, attr);
-  } else {
-    // For normal constants, we just record the attribute (and its type) for
-    // later materialization at use sites.
-    constantMap.try_emplace(resultID, attr, opBuilder.getI1Type());
-  }
-
-  return success();
-}
-
-LogicalResult
-Deserializer::processConstantComposite(ArrayRef<uint32_t> operands) {
-  if (operands.size() < 2) {
-    return emitError(unknownLoc,
-                     "OpConstantComposite must have type <id> and result <id>");
-  }
-  if (operands.size() < 3) {
-    return emitError(unknownLoc,
-                     "OpConstantComposite must have at least 1 parameter");
-  }
-
-  Type resultType = getType(operands[0]);
-  if (!resultType) {
-    return emitError(unknownLoc, "undefined result type from <id> ")
-           << operands[0];
-  }
-
-  SmallVector<Attribute, 4> elements;
-  elements.reserve(operands.size() - 2);
-  for (unsigned i = 2, e = operands.size(); i < e; ++i) {
-    auto elementInfo = getConstant(operands[i]);
-    if (!elementInfo) {
-      return emitError(unknownLoc, "OpConstantComposite component <id> ")
-             << operands[i] << " must come from a normal constant";
-    }
-    elements.push_back(elementInfo->first);
-  }
-
-  auto resultID = operands[1];
-  if (auto vectorType = resultType.dyn_cast<VectorType>()) {
-    auto attr = DenseElementsAttr::get(vectorType, elements);
-    // For normal constants, we just record the attribute (and its type) for
-    // later materialization at use sites.
-    constantMap.try_emplace(resultID, attr, resultType);
-  } else if (auto arrayType = resultType.dyn_cast<spirv::ArrayType>()) {
-    auto attr = opBuilder.getArrayAttr(elements);
-    constantMap.try_emplace(resultID, attr, resultType);
-  } else {
-    return emitError(unknownLoc, "unsupported OpConstantComposite type: ")
-           << resultType;
-  }
-
-  return success();
-}
-
-LogicalResult Deserializer::processConstantNull(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 2) {
-    return emitError(unknownLoc,
-                     "OpConstantNull must have type <id> and result <id>");
-  }
-
-  Type resultType = getType(operands[0]);
-  if (!resultType) {
-    return emitError(unknownLoc, "undefined result type from <id> ")
-           << operands[0];
-  }
-
-  auto resultID = operands[1];
-  if (resultType.isa<IntegerType>() || resultType.isa<FloatType>() ||
-      resultType.isa<VectorType>()) {
-    auto attr = opBuilder.getZeroAttr(resultType);
-    // For normal constants, we just record the attribute (and its type) for
-    // later materialization at use sites.
-    constantMap.try_emplace(resultID, attr, resultType);
-    return success();
-  }
-
-    return emitError(unknownLoc, "unsupported OpConstantNull type: ")
-           << resultType;
-}
-
-//===----------------------------------------------------------------------===//
-// Control flow
-//===----------------------------------------------------------------------===//
-
-Block *Deserializer::getOrCreateBlock(uint32_t id) {
-  if (auto *block = getBlock(id)) {
-    LLVM_DEBUG(llvm::dbgs() << "[block] got exiting block for id = " << id
-                            << " @ " << block << "\n");
-    return block;
-  }
-
-  // We don't know where this block will be placed finally (in a spv.selection
-  // or spv.loop or function). Create it into the function for now and sort
-  // out the proper place later.
-  auto *block = curFunction->addBlock();
-  LLVM_DEBUG(llvm::dbgs() << "[block] created block for id = " << id << " @ "
-                          << block << "\n");
-  return blockMap[id] = block;
-}
-
-LogicalResult Deserializer::processBranch(ArrayRef<uint32_t> operands) {
-  if (!curBlock) {
-    return emitError(unknownLoc, "OpBranch must appear inside a block");
-  }
-
-  if (operands.size() != 1) {
-    return emitError(unknownLoc, "OpBranch must take exactly one target label");
-  }
-
-  auto *target = getOrCreateBlock(operands[0]);
-  opBuilder.create<spirv::BranchOp>(unknownLoc, target);
-
-  return success();
-}
-
-LogicalResult
-Deserializer::processBranchConditional(ArrayRef<uint32_t> operands) {
-  if (!curBlock) {
-    return emitError(unknownLoc,
-                     "OpBranchConditional must appear inside a block");
-  }
-
-  if (operands.size() != 3 && operands.size() != 5) {
-    return emitError(unknownLoc,
-                     "OpBranchConditional must have condition, true label, "
-                     "false label, and optionally two branch weights");
-  }
-
-  auto *condition = getValue(operands[0]);
-  auto *trueBlock = getOrCreateBlock(operands[1]);
-  auto *falseBlock = getOrCreateBlock(operands[2]);
-
-  Optional<std::pair<uint32_t, uint32_t>> weights;
-  if (operands.size() == 5) {
-    weights = std::make_pair(operands[3], operands[4]);
-  }
-
-  opBuilder.create<spirv::BranchConditionalOp>(
-      unknownLoc, condition, trueBlock,
-      /*trueArguments=*/ArrayRef<Value *>(), falseBlock,
-      /*falseArguments=*/ArrayRef<Value *>(), weights);
-
-  return success();
-}
-
-LogicalResult Deserializer::processLabel(ArrayRef<uint32_t> operands) {
-  if (!curFunction) {
-    return emitError(unknownLoc, "OpLabel must appear inside a function");
-  }
-
-  if (operands.size() != 1) {
-    return emitError(unknownLoc, "OpLabel should only have result <id>");
-  }
-
-  auto labelID = operands[0];
-  // We may have forward declared this block.
-  auto *block = getOrCreateBlock(labelID);
-  LLVM_DEBUG(llvm::dbgs() << "[block] populating block " << block << "\n");
-  // If we have seen this block, make sure it was just a forward declaration.
-  assert(block->empty() && "re-deserialize the same block!");
-
-  opBuilder.setInsertionPointToStart(block);
-  blockMap[labelID] = curBlock = block;
-
-  return success();
-}
-
-LogicalResult Deserializer::processSelectionMerge(ArrayRef<uint32_t> operands) {
-  if (!curBlock) {
-    return emitError(unknownLoc, "OpSelectionMerge must appear in a block");
-  }
-
-  if (operands.size() < 2) {
-    return emitError(
-        unknownLoc,
-        "OpLoopMerge must specify merge target and selection control");
-  }
-
-  if (static_cast<uint32_t>(spirv::LoopControl::None) != operands[1]) {
-    return emitError(unknownLoc,
-                     "unimplmented OpSelectionMerge selection control: ")
-           << operands[2];
-  }
-
-  auto *mergeBlock = getOrCreateBlock(operands[0]);
-
-  if (!blockMergeInfo.try_emplace(curBlock, mergeBlock).second) {
-    return emitError(
-        unknownLoc,
-        "a block cannot have more than one OpSelectionMerge instruction");
-  }
-
-  return success();
-}
-
-LogicalResult Deserializer::processLoopMerge(ArrayRef<uint32_t> operands) {
-  if (!curBlock) {
-    return emitError(unknownLoc, "OpLoopMerge must appear in a block");
-  }
-
-  if (operands.size() < 3) {
-    return emitError(unknownLoc, "OpLoopMerge must specify merge target, "
-                                 "continue target and loop control");
-  }
-
-  if (static_cast<uint32_t>(spirv::LoopControl::None) != operands[2]) {
-    return emitError(unknownLoc, "unimplmented OpLoopMerge loop control: ")
-           << operands[2];
-  }
-
-  auto *mergeBlock = getOrCreateBlock(operands[0]);
-  auto *continueBlock = getOrCreateBlock(operands[1]);
-
-  if (!blockMergeInfo.try_emplace(curBlock, mergeBlock, continueBlock).second) {
-    return emitError(
-        unknownLoc,
-        "a block cannot have more than one OpLoopMerge instruction");
-  }
-
-  return success();
-}
-
-LogicalResult Deserializer::processPhi(ArrayRef<uint32_t> operands) {
-  if (!curBlock) {
-    return emitError(unknownLoc, "OpPhi must appear in a block");
-  }
-
-  if (operands.size() < 4) {
-    return emitError(unknownLoc, "OpPhi must specify result type, result <id>, "
-                                 "and variable-parent pairs");
-  }
-
-  // Create a block argument for this OpPhi instruction.
-  Type blockArgType = getType(operands[0]);
-  BlockArgument *blockArg = curBlock->addArgument(blockArgType);
-  valueMap[operands[1]] = blockArg;
-  LLVM_DEBUG(llvm::dbgs() << "[phi] created block argument " << blockArg
-                          << " id = " << operands[1] << " of type "
-                          << blockArgType << '\n');
-
-  // For each (value, predecessor) pair, insert the value to the predecessor's
-  // blockPhiInfo entry so later we can fix the block argument there.
-  for (unsigned i = 2, e = operands.size(); i < e; i += 2) {
-    uint32_t value = operands[i];
-    Block *predecessor = getOrCreateBlock(operands[i + 1]);
-    blockPhiInfo[predecessor].push_back(value);
-    LLVM_DEBUG(llvm::dbgs() << "[phi] predecessor @ " << predecessor
-                            << " with arg id = " << value << '\n');
-  }
-
-  return success();
-}
-
-namespace {
-/// A class for putting all blocks in a structured selection/loop in a
-/// spv.selection/spv.loop op.
-class ControlFlowStructurizer {
-public:
-  /// Structurizes the loop at the given `headerBlock`.
-  ///
-  /// This method will create an spv.loop op in the `mergeBlock` and move all
-  /// blocks in the structured loop into the spv.loop's region. All branches to
-  /// the `headerBlock` will be redirected to the `mergeBlock`.
-  /// This method will also update `mergeInfo` by remapping all blocks inside to
-  /// the newly cloned ones inside structured control flow op's regions.
-  static LogicalResult structurize(Location loc, BlockMergeInfoMap &mergeInfo,
-                                   Block *headerBlock, Block *mergeBlock,
-                                   Block *continueBlock) {
-    return ControlFlowStructurizer(loc, mergeInfo, headerBlock, mergeBlock,
-                                   continueBlock)
-        .structurizeImpl();
-  }
-
-private:
-  ControlFlowStructurizer(Location loc, BlockMergeInfoMap &mergeInfo,
-                          Block *header, Block *merge, Block *cont)
-      : location(loc), blockMergeInfo(mergeInfo), headerBlock(header),
-        mergeBlock(merge), continueBlock(cont) {}
-
-  /// Creates a new spv.selection op at the beginning of the `mergeBlock`.
-  spirv::SelectionOp createSelectionOp();
-
-  /// Creates a new spv.loop op at the beginning of the `mergeBlock`.
-  spirv::LoopOp createLoopOp();
-
-  /// Collects all blocks reachable from `headerBlock` except `mergeBlock`.
-  void collectBlocksInConstruct();
-
-  LogicalResult structurizeImpl();
-
-  Location location;
-
-  BlockMergeInfoMap &blockMergeInfo;
-
-  Block *headerBlock;
-  Block *mergeBlock;
-  Block *continueBlock; // nullptr for spv.selection
-
-  llvm::SetVector<Block *> constructBlocks;
-};
-} // namespace
-
-spirv::SelectionOp ControlFlowStructurizer::createSelectionOp() {
-  // Create a builder and set the insertion point to the beginning of the
-  // merge block so that the newly created SelectionOp will be inserted there.
-  OpBuilder builder(&mergeBlock->front());
-
-  auto control = builder.getI32IntegerAttr(
-      static_cast<uint32_t>(spirv::SelectionControl::None));
-  auto selectionOp = builder.create<spirv::SelectionOp>(location, control);
-  selectionOp.addMergeBlock();
-
-  return selectionOp;
-}
-
-spirv::LoopOp ControlFlowStructurizer::createLoopOp() {
-  // Create a builder and set the insertion point to the beginning of the
-  // merge block so that the newly created LoopOp will be inserted there.
-  OpBuilder builder(&mergeBlock->front());
-
-  // TODO(antiagainst): handle loop control properly
-  auto loopOp = builder.create<spirv::LoopOp>(location);
-  loopOp.addEntryAndMergeBlock();
-
-  return loopOp;
-}
-
-void ControlFlowStructurizer::collectBlocksInConstruct() {
-  assert(constructBlocks.empty() && "expected empty constructBlocks");
-
-  // Put the header block in the work list first.
-  constructBlocks.insert(headerBlock);
-
-  // For each item in the work list, add its successors excluding the merge
-  // block.
-  for (unsigned i = 0; i < constructBlocks.size(); ++i) {
-    for (auto *successor : constructBlocks[i]->getSuccessors())
-      if (successor != mergeBlock)
-        constructBlocks.insert(successor);
-  }
-}
-
-LogicalResult ControlFlowStructurizer::structurizeImpl() {
-  Operation *op = nullptr;
-  bool isLoop = continueBlock != nullptr;
-  if (isLoop) {
-    if (auto loopOp = createLoopOp())
-      op = loopOp.getOperation();
-  } else {
-    if (auto selectionOp = createSelectionOp())
-      op = selectionOp.getOperation();
-  }
-  if (!op)
-    return failure();
-  Region &body = op->getRegion(0);
-
-  BlockAndValueMapping mapper;
-  // All references to the old merge block should be directed to the
-  // selection/loop merge block in the SelectionOp/LoopOp's region.
-  mapper.map(mergeBlock, &body.back());
-
-  collectBlocksInConstruct();
-
-  // We've identified all blocks belonging to the selection/loop's region. Now
-  // need to "move" them into the selection/loop. Instead of really moving the
-  // blocks, in the following we copy them and remap all values and branches.
-  // This is because:
-  // * Inserting a block into a region requires the block not in any region
-  //   before. But selections/loops can nest so we can create selection/loop ops
-  //   in a nested manner, which means some blocks may already be in a
-  //   selection/loop region when to be moved again.
-  // * It's much trickier to fix up the branches into and out of the loop's
-  //   region: we need to treat not-moved blocks and moved blocks differently:
-  //   Not-moved blocks jumping to the loop header block need to jump to the
-  //   merge point containing the new loop op but not the loop continue block's
-  //   back edge. Moved blocks jumping out of the loop need to jump to the
-  //   merge block inside the loop region but not other not-moved blocks.
-  //   We cannot use replaceAllUsesWith clearly and it's harder to follow the
-  //   logic.
-
-  // Create a corresponding block in the SelectionOp/LoopOp's region for each
-  // block in this loop construct.
-  OpBuilder builder(body);
-  for (auto *block : constructBlocks) {
-    // Create a block and insert it before the selection/loop merge block in the
-    // SelectionOp/LoopOp's region.
-    auto *newBlock = builder.createBlock(&body.back());
-    mapper.map(block, newBlock);
-    LLVM_DEBUG(llvm::dbgs() << "[cf] cloned block " << newBlock
-                            << " from block " << block << "\n");
-    if (!isFnEntryBlock(block)) {
-      for (BlockArgument *blockArg : block->getArguments()) {
-        auto *newArg = newBlock->addArgument(blockArg->getType());
-        mapper.map(blockArg, newArg);
-        LLVM_DEBUG(llvm::dbgs() << "[cf] remapped block argument " << blockArg
-                                << " to " << newArg << '\n');
-      }
-    } else {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "[cf] block " << block << " is a function entry block\n");
-    }
-
-    for (auto &op : *block)
-      newBlock->push_back(op.clone(mapper));
-  }
-
-  // Go through all ops and remap the operands.
-  auto remapOperands = [&](Operation *op) {
-    for (auto &operand : op->getOpOperands())
-      if (auto *mappedOp = mapper.lookupOrNull(operand.get()))
-        operand.set(mappedOp);
-    for (auto &succOp : op->getBlockOperands())
-      if (auto *mappedOp = mapper.lookupOrNull(succOp.get()))
-        succOp.set(mappedOp);
-  };
-  for (auto &block : body) {
-    block.walk(remapOperands);
-  }
-
-  // We have created the SelectionOp/LoopOp and "moved" all blocks belonging to
-  // the selection/loop construct into its region. Next we need to fix the
-  // connections between this new SelectionOp/LoopOp with existing blocks.
-
-  // All existing incoming branches should go to the merge block, where the
-  // SelectionOp/LoopOp resides right now.
-  headerBlock->replaceAllUsesWith(mergeBlock);
-
-  if (isLoop) {
-    // The loop selection/loop header block may have block arguments. Since now
-    // we place the selection/loop op inside the old merge block, we need to
-    // make sure the old merge block has the same block argument list.
-    assert(mergeBlock->args_empty() && "OpPhi in loop merge block unsupported");
-    for (BlockArgument *blockArg : headerBlock->getArguments()) {
-      mergeBlock->addArgument(blockArg->getType());
-    }
-
-    // If the loop header block has block arguments, make sure the spv.branch op
-    // matches.
-    SmallVector<Value *, 4> blockArgs;
-    if (!headerBlock->args_empty())
-      blockArgs = {mergeBlock->args_begin(), mergeBlock->args_end()};
-
-    // The loop entry block should have a unconditional branch jumping to the
-    // loop header block.
-    builder.setInsertionPointToEnd(&body.front());
-    builder.create<spirv::BranchOp>(location, mapper.lookupOrNull(headerBlock),
-                                    ArrayRef<Value *>(blockArgs));
-  }
-
-  // All the blocks cloned into the SelectionOp/LoopOp's region can now be
-  // cleaned up.
-  LLVM_DEBUG(llvm::dbgs() << "[cf] cleaning up blocks after clone\n");
-  // First we need to drop all operands' references inside all blocks. This is
-  // needed because we can have blocks referencing SSA values from one another.
-  for (auto *block : constructBlocks)
-    block->dropAllReferences();
-
-  // Then erase all old blocks.
-  for (auto *block : constructBlocks) {
-    // We've cloned all blocks belonging to this construct into the structured
-    // control flow op's region. Among these blocks, some may compose another
-    // selection/loop. If so, they will be recorded within blockMergeInfo.
-    // We need to update the pointers there to the newly remapped ones so we can
-    // continue structurizing them later.
-    // TODO(antiagainst): The asserts in the following assumes input SPIR-V blob
-    // forms correctly nested selection/loop constructs. We should relax this
-    // and support error cases better.
-    auto it = blockMergeInfo.find(block);
-    if (it != blockMergeInfo.end()) {
-      Block *newHeader = mapper.lookupOrNull(block);
-      assert(newHeader && "nested loop header block should be remapped!");
-
-      Block *newContinue = it->second.continueBlock;
-      if (newContinue) {
-        newContinue = mapper.lookupOrNull(newContinue);
-        assert(newContinue && "nested loop continue block should be remapped!");
-      }
-
-      Block *newMerge = it->second.mergeBlock;
-      if (Block *mappedTo = mapper.lookupOrNull(newMerge))
-        newMerge = mappedTo;
-
-      // The iterator should be erased before adding a new entry into
-      // blockMergeInfo to avoid iterator invalidation.
-      blockMergeInfo.erase(it);
-      blockMergeInfo.try_emplace(newHeader, newMerge, newContinue);
-    }
-
-    // The structured selection/loop's entry block does not have arguments.
-    // If the function's header block is also part of the structured control
-    // flow, we cannot just simply erase it because it may contain arguments
-    // matching the function signature and used by the cloned blocks.
-    if (isFnEntryBlock(block)) {
-      LLVM_DEBUG(llvm::dbgs() << "[cf] changing entry block " << block
-                              << " to only contain a spv.Branch op\n");
-      // Still keep the function entry block for the potential block arguments,
-      // but replace all ops inside with a branch to the merge block.
-      block->clear();
-      builder.setInsertionPointToEnd(block);
-      builder.create<spirv::BranchOp>(location, mergeBlock);
-    } else {
-      LLVM_DEBUG(llvm::dbgs() << "[cf] erasing block " << block << "\n");
-      block->erase();
-    }
-  }
-
-  LLVM_DEBUG(
-      llvm::dbgs() << "[cf] after structurizing construct with header block "
-                   << headerBlock << ":\n"
-                   << *op << '\n');
-
-  return success();
-}
-
-LogicalResult Deserializer::wireUpBlockArgument() {
-  LLVM_DEBUG(llvm::dbgs() << "[phi] start wiring up block arguments\n");
-
-  OpBuilder::InsertionGuard guard(opBuilder);
-
-  for (const auto &info : blockPhiInfo) {
-    Block *block = info.first;
-    const BlockPhiInfo &phiInfo = info.second;
-    LLVM_DEBUG(llvm::dbgs() << "[phi] block " << block << "\n");
-    LLVM_DEBUG(llvm::dbgs() << "[phi] before creating block argument:\n");
-    LLVM_DEBUG(block->getParentOp()->print(llvm::dbgs()));
-    LLVM_DEBUG(llvm::dbgs() << '\n');
-
-    // Set insertion point to before this block's terminator early because we
-    // may materialize ops via getValue() call.
-    auto *op = block->getTerminator();
-    opBuilder.setInsertionPoint(op);
-
-    SmallVector<Value *, 4> blockArgs;
-    blockArgs.reserve(phiInfo.size());
-    for (uint32_t valueId : phiInfo) {
-      if (Value *value = getValue(valueId)) {
-        blockArgs.push_back(value);
-        LLVM_DEBUG(llvm::dbgs() << "[phi] block argument " << value
-                                << " id = " << valueId << '\n');
-      } else {
-        return emitError(unknownLoc, "OpPhi references undefined value!");
-      }
-    }
-
-    if (auto branchOp = dyn_cast<spirv::BranchOp>(op)) {
-      // Replace the previous branch op with a new one with block arguments.
-      opBuilder.create<spirv::BranchOp>(branchOp.getLoc(), branchOp.getTarget(),
-                                        blockArgs);
-      branchOp.erase();
-    } else {
-      return emitError(unknownLoc, "unimplemented terminator for Phi creation");
-    }
-
-    LLVM_DEBUG(llvm::dbgs() << "[phi] after creating block argument:\n");
-    LLVM_DEBUG(block->getParentOp()->print(llvm::dbgs()));
-    LLVM_DEBUG(llvm::dbgs() << '\n');
-  }
-  blockPhiInfo.clear();
-
-  LLVM_DEBUG(llvm::dbgs() << "[phi] completed wiring up block arguments\n");
-  return success();
-}
-
-LogicalResult Deserializer::structurizeControlFlow() {
-  LLVM_DEBUG(llvm::dbgs() << "[cf] start structurizing control flow\n");
-
-  while (!blockMergeInfo.empty()) {
-    Block *headerBlock = blockMergeInfo.begin()->first;
-    BlockMergeInfo mergeInfo = blockMergeInfo.begin()->second;
-
-    LLVM_DEBUG(llvm::dbgs() << "[cf] header block " << headerBlock << ":\n");
-    LLVM_DEBUG(headerBlock->print(llvm::dbgs()));
-
-    auto *mergeBlock = mergeInfo.mergeBlock;
-    assert(mergeBlock && "merge block cannot be nullptr");
-    if (!mergeBlock->args_empty())
-      return emitError(unknownLoc, "OpPhi in loop merge block unimplemented");
-    LLVM_DEBUG(llvm::dbgs() << "[cf] merge block " << mergeBlock << ":\n");
-    LLVM_DEBUG(mergeBlock->print(llvm::dbgs()));
-
-    auto *continueBlock = mergeInfo.continueBlock;
-    if (continueBlock) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "[cf] continue block " << continueBlock << ":\n");
-      LLVM_DEBUG(continueBlock->print(llvm::dbgs()));
-    }
-
-    // Erase this case before calling into structurizer, who will update
-    // blockMergeInfo.
-    blockMergeInfo.erase(blockMergeInfo.begin());
-    if (failed(ControlFlowStructurizer::structurize(unknownLoc, blockMergeInfo,
-                                                    headerBlock, mergeBlock,
-                                                    continueBlock)))
-      return failure();
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "[cf] completed structurizing control flow\n");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction
-//===----------------------------------------------------------------------===//
-
-Value *Deserializer::getValue(uint32_t id) {
-  if (auto constInfo = getConstant(id)) {
-    // Materialize a `spv.constant` op at every use site.
-    return opBuilder.create<spirv::ConstantOp>(unknownLoc, constInfo->second,
-                                               constInfo->first);
-  }
-  if (auto varOp = getGlobalVariable(id)) {
-    auto addressOfOp = opBuilder.create<spirv::AddressOfOp>(
-        unknownLoc, varOp.type(),
-        opBuilder.getSymbolRefAttr(varOp.getOperation()));
-    return addressOfOp.pointer();
-  }
-  if (auto constOp = getSpecConstant(id)) {
-    auto referenceOfOp = opBuilder.create<spirv::ReferenceOfOp>(
-        unknownLoc, constOp.default_value().getType(),
-        opBuilder.getSymbolRefAttr(constOp.getOperation()));
-    return referenceOfOp.reference();
-  }
-  if (auto undef = getUndefType(id)) {
-    return opBuilder.create<spirv::UndefOp>(unknownLoc, undef);
-  }
-  return valueMap.lookup(id);
-}
-
-LogicalResult
-Deserializer::sliceInstruction(spirv::Opcode &opcode,
-                               ArrayRef<uint32_t> &operands,
-                               Optional<spirv::Opcode> expectedOpcode) {
-  auto binarySize = binary.size();
-  if (curOffset >= binarySize) {
-    return emitError(unknownLoc, "expected ")
-           << (expectedOpcode ? spirv::stringifyOpcode(*expectedOpcode)
-                              : "more")
-           << " instruction";
-  }
-
-  // For each instruction, get its word count from the first word to slice it
-  // from the stream properly, and then dispatch to the instruction handler.
-
-  uint32_t wordCount = binary[curOffset] >> 16;
-
-  if (wordCount == 0)
-    return emitError(unknownLoc, "word count cannot be zero");
-
-  uint32_t nextOffset = curOffset + wordCount;
-  if (nextOffset > binarySize)
-    return emitError(unknownLoc, "insufficient words for the last instruction");
-
-  opcode = extractOpcode(binary[curOffset]);
-  operands = binary.slice(curOffset + 1, wordCount - 1);
-  curOffset = nextOffset;
-  return success();
-}
-
-LogicalResult Deserializer::processInstruction(spirv::Opcode opcode,
-                                               ArrayRef<uint32_t> operands,
-                                               bool deferInstructions) {
-  LLVM_DEBUG(llvm::dbgs() << "[inst] processing instruction "
-                          << spirv::stringifyOpcode(opcode) << "\n");
-
-  // First dispatch all the instructions whose opcode does not correspond to
-  // those that have a direct mirror in the SPIR-V dialect
-  switch (opcode) {
-  case spirv::Opcode::OpBitcast:
-    return processBitcast(operands);
-  case spirv::Opcode::OpCapability:
-    return processCapability(operands);
-  case spirv::Opcode::OpExtension:
-    return processExtension(operands);
-  case spirv::Opcode::OpExtInst:
-    return processExtInst(operands);
-  case spirv::Opcode::OpExtInstImport:
-    return processExtInstImport(operands);
-  case spirv::Opcode::OpMemberName:
-    return processMemberName(operands);
-  case spirv::Opcode::OpMemoryModel:
-    return processMemoryModel(operands);
-  case spirv::Opcode::OpEntryPoint:
-  case spirv::Opcode::OpExecutionMode:
-    if (deferInstructions) {
-      deferredInstructions.emplace_back(opcode, operands);
-      return success();
-    }
-    break;
-  case spirv::Opcode::OpVariable:
-    if (isa<spirv::ModuleOp>(opBuilder.getBlock()->getParentOp())) {
-      return processGlobalVariable(operands);
-    }
-    break;
-  case spirv::Opcode::OpName:
-    return processName(operands);
-  case spirv::Opcode::OpModuleProcessed:
-  case spirv::Opcode::OpString:
-  case spirv::Opcode::OpSource:
-  case spirv::Opcode::OpSourceContinued:
-  case spirv::Opcode::OpSourceExtension:
-    // TODO: This is debug information embedded in the binary which should be
-    // translated into the spv.module.
-    return success();
-  case spirv::Opcode::OpTypeVoid:
-  case spirv::Opcode::OpTypeBool:
-  case spirv::Opcode::OpTypeInt:
-  case spirv::Opcode::OpTypeFloat:
-  case spirv::Opcode::OpTypeVector:
-  case spirv::Opcode::OpTypeArray:
-  case spirv::Opcode::OpTypeFunction:
-  case spirv::Opcode::OpTypeRuntimeArray:
-  case spirv::Opcode::OpTypeStruct:
-  case spirv::Opcode::OpTypePointer:
-    return processType(opcode, operands);
-  case spirv::Opcode::OpConstant:
-    return processConstant(operands, /*isSpec=*/false);
-  case spirv::Opcode::OpSpecConstant:
-    return processConstant(operands, /*isSpec=*/true);
-  case spirv::Opcode::OpConstantComposite:
-    return processConstantComposite(operands);
-  case spirv::Opcode::OpConstantTrue:
-    return processConstantBool(/*isTrue=*/true, operands, /*isSpec=*/false);
-  case spirv::Opcode::OpSpecConstantTrue:
-    return processConstantBool(/*isTrue=*/true, operands, /*isSpec=*/true);
-  case spirv::Opcode::OpConstantFalse:
-    return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/false);
-  case spirv::Opcode::OpSpecConstantFalse:
-    return processConstantBool(/*isTrue=*/false, operands, /*isSpec=*/true);
-  case spirv::Opcode::OpConstantNull:
-    return processConstantNull(operands);
-  case spirv::Opcode::OpDecorate:
-    return processDecoration(operands);
-  case spirv::Opcode::OpMemberDecorate:
-    return processMemberDecoration(operands);
-  case spirv::Opcode::OpFunction:
-    return processFunction(operands);
-  case spirv::Opcode::OpLabel:
-    return processLabel(operands);
-  case spirv::Opcode::OpBranch:
-    return processBranch(operands);
-  case spirv::Opcode::OpBranchConditional:
-    return processBranchConditional(operands);
-  case spirv::Opcode::OpSelectionMerge:
-    return processSelectionMerge(operands);
-  case spirv::Opcode::OpLoopMerge:
-    return processLoopMerge(operands);
-  case spirv::Opcode::OpPhi:
-    return processPhi(operands);
-  case spirv::Opcode::OpUndef:
-    return processUndef(operands);
-  default:
-    break;
-  }
-  return dispatchToAutogenDeserialization(opcode, operands);
-}
-
-LogicalResult Deserializer::processUndef(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 2) {
-    return emitError(unknownLoc, "OpUndef instruction must have two operands");
-  }
-  auto type = getType(operands[0]);
-  if (!type) {
-    return emitError(unknownLoc, "unknown type <id> with OpUndef instruction");
-  }
-  undefMap[operands[1]] = type;
-  return success();
-}
-
-// TODO(b/130356985): This method is copied from the auto-generated
-// deserialization function for OpBitcast instruction. This is to avoid
-// generating a Bitcast operations for cast from signed integer to unsigned
-// integer and viceversa. MLIR doesn't have native support for this so they both
-// end up mapping to the same type right now which is illegal according to
-// OpBitcast semantics (and enforced by the SPIR-V dialect).
-LogicalResult Deserializer::processBitcast(ArrayRef<uint32_t> words) {
-  SmallVector<Type, 1> resultTypes;
-  size_t wordIndex = 0;
-  (void)wordIndex;
-  uint32_t valueID = 0;
-  (void)valueID;
-  {
-    if (wordIndex >= words.size()) {
-      return emitError(
-          unknownLoc,
-          "expected result type <id> while deserializing spirv::BitcastOp");
-    }
-    auto ty = getType(words[wordIndex]);
-    if (!ty) {
-      return emitError(unknownLoc, "unknown type result <id> : ")
-             << words[wordIndex];
-    }
-    resultTypes.push_back(ty);
-    wordIndex++;
-    if (wordIndex >= words.size()) {
-      return emitError(
-          unknownLoc,
-          "expected result <id> while deserializing spirv::BitcastOp");
-    }
-  }
-  valueID = words[wordIndex++];
-  SmallVector<Value *, 4> operands;
-  SmallVector<NamedAttribute, 4> attributes;
-  if (wordIndex < words.size()) {
-    auto arg = getValue(words[wordIndex]);
-    if (!arg) {
-      return emitError(unknownLoc, "unknown result <id> : ")
-             << words[wordIndex];
-    }
-    operands.push_back(arg);
-    wordIndex++;
-  }
-  if (wordIndex != words.size()) {
-    return emitError(unknownLoc,
-                     "found more operands than expected when deserializing "
-                     "spirv::BitcastOp, only ")
-           << wordIndex << " of " << words.size() << " processed";
-  }
-  if (resultTypes[0] == operands[0]->getType() &&
-      resultTypes[0].isa<IntegerType>()) {
-    // TODO(b/130356985): This check is added to ignore error in Op verification
-    // due to both signed and unsigned integers mapping to the same
-    // type. Without this check this method is same as what is auto-generated.
-    valueMap[valueID] = operands[0];
-    return success();
-  }
-
-  auto op = opBuilder.create<spirv::BitcastOp>(unknownLoc, resultTypes,
-                                               operands, attributes);
-  (void)op;
-  valueMap[valueID] = op.getResult();
-
-  if (decorations.count(valueID)) {
-    auto attrs = decorations[valueID].getAttrs();
-    attributes.append(attrs.begin(), attrs.end());
-  }
-  return success();
-}
-
-LogicalResult Deserializer::processExtInst(ArrayRef<uint32_t> operands) {
-  if (operands.size() < 4) {
-    return emitError(unknownLoc,
-                     "OpExtInst must have at least 4 operands, result type "
-                     "<id>, result <id>, set <id> and instruction opcode");
-  }
-  if (!extendedInstSets.count(operands[2])) {
-    return emitError(unknownLoc, "undefined set <id> in OpExtInst");
-  }
-  SmallVector<uint32_t, 4> slicedOperands;
-  slicedOperands.append(operands.begin(), std::next(operands.begin(), 2));
-  slicedOperands.append(std::next(operands.begin(), 4), operands.end());
-  return dispatchToExtensionSetAutogenDeserialization(
-      extendedInstSets[operands[2]], operands[3], slicedOperands);
-}
-
-namespace {
-
-template <>
-LogicalResult
-Deserializer::processOp<spirv::EntryPointOp>(ArrayRef<uint32_t> words) {
-  unsigned wordIndex = 0;
-  if (wordIndex >= words.size()) {
-    return emitError(unknownLoc,
-                     "missing Execution Model specification in OpEntryPoint");
-  }
-  auto exec_model = opBuilder.getI32IntegerAttr(words[wordIndex++]);
-  if (wordIndex >= words.size()) {
-    return emitError(unknownLoc, "missing <id> in OpEntryPoint");
-  }
-  // Get the function <id>
-  auto fnID = words[wordIndex++];
-  // Get the function name
-  auto fnName = decodeStringLiteral(words, wordIndex);
-  // Verify that the function <id> matches the fnName
-  auto parsedFunc = getFunction(fnID);
-  if (!parsedFunc) {
-    return emitError(unknownLoc, "no function matching <id> ") << fnID;
-  }
-  if (parsedFunc.getName() != fnName) {
-    return emitError(unknownLoc, "function name mismatch between OpEntryPoint "
-                                 "and OpFunction with <id> ")
-           << fnID << ": " << fnName << " vs. " << parsedFunc.getName();
-  }
-  SmallVector<Attribute, 4> interface;
-  while (wordIndex < words.size()) {
-    auto arg = getGlobalVariable(words[wordIndex]);
-    if (!arg) {
-      return emitError(unknownLoc, "undefined result <id> ")
-             << words[wordIndex] << " while decoding OpEntryPoint";
-    }
-    interface.push_back(opBuilder.getSymbolRefAttr(arg.getOperation()));
-    wordIndex++;
-  }
-  opBuilder.create<spirv::EntryPointOp>(unknownLoc, exec_model,
-                                        opBuilder.getSymbolRefAttr(fnName),
-                                        opBuilder.getArrayAttr(interface));
-  return success();
-}
-
-template <>
-LogicalResult
-Deserializer::processOp<spirv::ExecutionModeOp>(ArrayRef<uint32_t> words) {
-  unsigned wordIndex = 0;
-  if (wordIndex >= words.size()) {
-    return emitError(unknownLoc,
-                     "missing function result <id> in OpExecutionMode");
-  }
-  // Get the function <id> to get the name of the function
-  auto fnID = words[wordIndex++];
-  auto fn = getFunction(fnID);
-  if (!fn) {
-    return emitError(unknownLoc, "no function matching <id> ") << fnID;
-  }
-  // Get the Execution mode
-  if (wordIndex >= words.size()) {
-    return emitError(unknownLoc, "missing Execution Mode in OpExecutionMode");
-  }
-  auto execMode = opBuilder.getI32IntegerAttr(words[wordIndex++]);
-
-  // Get the values
-  SmallVector<Attribute, 4> attrListElems;
-  while (wordIndex < words.size()) {
-    attrListElems.push_back(opBuilder.getI32IntegerAttr(words[wordIndex++]));
-  }
-  auto values = opBuilder.getArrayAttr(attrListElems);
-  opBuilder.create<spirv::ExecutionModeOp>(
-      unknownLoc, opBuilder.getSymbolRefAttr(fn.getName()), execMode, values);
-  return success();
-}
-
-template <>
-LogicalResult
-Deserializer::processOp<spirv::ControlBarrierOp>(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 3) {
-    return emitError(
-        unknownLoc,
-        "OpControlBarrier must have execution scope <id>, memory scope <id> "
-        "and memory semantics <id>");
-  }
-
-  SmallVector<IntegerAttr, 3> argAttrs;
-  for (auto operand : operands) {
-    auto argAttr = getConstantInt(operand);
-    if (!argAttr) {
-      return emitError(unknownLoc,
-                       "expected 32-bit integer constant from <id> ")
-             << operand << " for OpControlBarrier";
-    }
-    argAttrs.push_back(argAttr);
-  }
-
-  opBuilder.create<spirv::ControlBarrierOp>(unknownLoc, argAttrs[0],
-                                            argAttrs[1], argAttrs[2]);
-  return success();
-}
-
-template <>
-LogicalResult
-Deserializer::processOp<spirv::FunctionCallOp>(ArrayRef<uint32_t> operands) {
-  if (operands.size() < 3) {
-    return emitError(unknownLoc,
-                     "OpFunctionCall must have at least 3 operands");
-  }
-
-  Type resultType = getType(operands[0]);
-  if (!resultType) {
-    return emitError(unknownLoc, "undefined result type from <id> ")
-           << operands[0];
-  }
-
-  auto resultID = operands[1];
-  auto functionID = operands[2];
-
-  auto functionName = getFunctionSymbol(functionID);
-
-  llvm::SmallVector<Value *, 4> arguments;
-  for (auto operand : llvm::drop_begin(operands, 3)) {
-    auto *value = getValue(operand);
-    if (!value) {
-      return emitError(unknownLoc, "unknown <id> ")
-             << operand << " used by OpFunctionCall";
-    }
-    arguments.push_back(value);
-  }
-
-  SmallVector<Type, 1> resultTypes;
-  if (!isVoidType(resultType)) {
-    resultTypes.push_back(resultType);
-  }
-
-  auto opFunctionCall = opBuilder.create<spirv::FunctionCallOp>(
-      unknownLoc, resultTypes, opBuilder.getSymbolRefAttr(functionName),
-      arguments);
-
-  if (!resultTypes.empty()) {
-    valueMap[resultID] = opFunctionCall.getResult(0);
-  }
-  return success();
-}
-
-template <>
-LogicalResult
-Deserializer::processOp<spirv::MemoryBarrierOp>(ArrayRef<uint32_t> operands) {
-  if (operands.size() != 2) {
-    return emitError(unknownLoc, "OpMemoryBarrier must have memory scope <id> "
-                                 "and memory semantics <id>");
-  }
-
-  SmallVector<IntegerAttr, 2> argAttrs;
-  for (auto operand : operands) {
-    auto argAttr = getConstantInt(operand);
-    if (!argAttr) {
-      return emitError(unknownLoc,
-                       "expected 32-bit integer constant from <id> ")
-             << operand << " for OpMemoryBarrier";
-    }
-    argAttrs.push_back(argAttr);
-  }
-
-  opBuilder.create<spirv::MemoryBarrierOp>(unknownLoc, argAttrs[0],
-                                           argAttrs[1]);
-  return success();
-}
-
-// Pull in auto-generated Deserializer::dispatchToAutogenDeserialization() and
-// various Deserializer::processOp<...>() specializations.
-#define GET_DESERIALIZATION_FNS
-#include "mlir/Dialect/SPIRV/SPIRVSerialization.inc"
-} // namespace
-
-Optional<spirv::ModuleOp> spirv::deserialize(ArrayRef<uint32_t> binary,
-                                             MLIRContext *context) {
-  Deserializer deserializer(binary, context);
-
-  if (failed(deserializer.deserialize()))
-    return llvm::None;
-
-  return deserializer.collect();
-}
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/SPIRVBinaryUtils.cpp b/third_party/mlir/lib/Dialect/SPIRV/Serialization/SPIRVBinaryUtils.cpp
deleted file mode 100644
index ba383b2cc6c..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/SPIRVBinaryUtils.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//===- SPIRVBinaryUtils.cpp - MLIR SPIR-V Binary Module Utilities ---------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines common utilities for SPIR-V binary module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/SPIRVBinaryUtils.h"
-
-using namespace mlir;
-
-void spirv::appendModuleHeader(SmallVectorImpl<uint32_t> &header,
-                               uint32_t idBound) {
-  // The major and minor version number for the generated SPIR-V binary.
-  // TODO(antiagainst): use target environment to select the version
-  constexpr uint8_t kMajorVersion = 1;
-  constexpr uint8_t kMinorVersion = 0;
-
-  // See "2.3. Physical Layout of a SPIR-V Module and Instruction" in the SPIR-V
-  // spec for the definition of the binary module header.
-  //
-  // The first five words of a SPIR-V module must be:
-  // +-------------------------------------------------------------------------+
-  // | Magic number                                                            |
-  // +-------------------------------------------------------------------------+
-  // | Version number (bytes: 0 | major number | minor number | 0)             |
-  // +-------------------------------------------------------------------------+
-  // | Generator magic number                                                  |
-  // +-------------------------------------------------------------------------+
-  // | Bound (all result <id>s in the module guaranteed to be less than it)    |
-  // +-------------------------------------------------------------------------+
-  // | 0 (reserved for instruction schema)                                     |
-  // +-------------------------------------------------------------------------+
-  header.push_back(spirv::kMagicNumber);
-  header.push_back((kMajorVersion << 16) | (kMinorVersion << 8));
-  header.push_back(kGeneratorNumber);
-  header.push_back(idBound); // <id> bound
-  header.push_back(0);       // Schema (reserved word)
-}
-
-/// Returns the word-count-prefixed opcode for an SPIR-V instruction.
-uint32_t spirv::getPrefixedOpcode(uint32_t wordCount, spirv::Opcode opcode) {
-  assert(((wordCount >> 16) == 0) && "word count out of range!");
-  return (wordCount << 16) | static_cast<uint32_t>(opcode);
-}
-
-LogicalResult spirv::encodeStringLiteralInto(SmallVectorImpl<uint32_t> &binary,
-                                             StringRef literal) {
-  // We need to encode the literal and the null termination.
-  auto encodingSize = literal.size() / 4 + 1;
-  auto bufferStartSize = binary.size();
-  binary.resize(bufferStartSize + encodingSize, 0);
-  std::memcpy(binary.data() + bufferStartSize, literal.data(), literal.size());
-  return success();
-}
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/third_party/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
deleted file mode 100644
index 2cb75de084a..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
+++ /dev/null
@@ -1,1849 +0,0 @@
-//===- Serializer.cpp - MLIR SPIR-V Serialization -------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the MLIR SPIR-V module to SPIR-V binary serialization.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/Serialization.h"
-
-#include "mlir/Dialect/SPIRV/SPIRVBinaryUtils.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Dialect/SPIRV/SPIRVTypes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/RegionGraphTraits.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Support/StringExtras.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/bit.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "spirv-serialization"
-
-using namespace mlir;
-
-/// Encodes an SPIR-V instruction with the given `opcode` and `operands` into
-/// the given `binary` vector.
-static LogicalResult encodeInstructionInto(SmallVectorImpl<uint32_t> &binary,
-                                           spirv::Opcode op,
-                                           ArrayRef<uint32_t> operands) {
-  uint32_t wordCount = 1 + operands.size();
-  binary.push_back(spirv::getPrefixedOpcode(wordCount, op));
-    binary.append(operands.begin(), operands.end());
-  return success();
-}
-
-/// A pre-order depth-first visitor function for processing basic blocks.
-///
-/// Visits the basic blocks starting from the given `headerBlock` in pre-order
-/// depth-first manner and calls `blockHandler` on each block. Skips handling
-/// blocks in the `skipBlocks` list. If `skipHeader` is true, `blockHandler`
-/// will not be invoked in `headerBlock` but still handles all `headerBlock`'s
-/// successors.
-///
-/// SPIR-V spec "2.16.1. Universal Validation Rules" requires that "the order
-/// of blocks in a function must satisfy the rule that blocks appear before
-/// all blocks they dominate." This can be achieved by a pre-order CFG
-/// traversal algorithm. To make the serialization output more logical and
-/// readable to human, we perform depth-first CFG traversal and delay the
-/// serialization of the merge block and the continue block, if exists, until
-/// after all other blocks have been processed.
-static LogicalResult visitInPrettyBlockOrder(
-    Block *headerBlock, llvm::function_ref<LogicalResult(Block *)> blockHandler,
-    bool skipHeader = false, ArrayRef<Block *> skipBlocks = {}) {
-  llvm::df_iterator_default_set<Block *, 4> doneBlocks;
-  doneBlocks.insert(skipBlocks.begin(), skipBlocks.end());
-
-  for (Block *block : llvm::depth_first_ext(headerBlock, doneBlocks)) {
-    if (skipHeader && block == headerBlock)
-      continue;
-    if (failed(blockHandler(block)))
-      return failure();
-  }
-  return success();
-}
-
-/// Returns the last structured control flow op's merge block if the given
-/// `block` contains any structured control flow op. Otherwise returns nullptr.
-static Block *getLastStructuredControlFlowOpMergeBlock(Block *block) {
-  for (Operation &op : llvm::reverse(block->getOperations())) {
-    if (auto selectionOp = dyn_cast<spirv::SelectionOp>(op))
-      return selectionOp.getMergeBlock();
-    if (auto loopOp = dyn_cast<spirv::LoopOp>(op))
-      return loopOp.getMergeBlock();
-  }
-  return nullptr;
-}
-
-namespace {
-
-/// A SPIR-V module serializer.
-///
-/// A SPIR-V binary module is a single linear stream of instructions; each
-/// instruction is composed of 32-bit words with the layout:
-///
-///   | <word-count>|<opcode> |  <operand>   |  <operand>   | ... |
-///   | <------ word -------> | <-- word --> | <-- word --> | ... |
-///
-/// For the first word, the 16 high-order bits are the word count of the
-/// instruction, the 16 low-order bits are the opcode enumerant. The
-/// instructions then belong to different sections, which must be laid out in
-/// the particular order as specified in "2.4 Logical Layout of a Module" of
-/// the SPIR-V spec.
-class Serializer {
-public:
-  /// Creates a serializer for the given SPIR-V `module`.
-  explicit Serializer(spirv::ModuleOp module);
-
-  /// Serializes the remembered SPIR-V module.
-  LogicalResult serialize();
-
-  /// Collects the final SPIR-V `binary`.
-  void collect(SmallVectorImpl<uint32_t> &binary);
-
-  /// (For debugging) prints each value and its corresponding result <id>.
-  void printValueIDMap(raw_ostream &os);
-
-private:
-  // Note that there are two main categories of methods in this class:
-  // * process*() methods are meant to fully serialize a SPIR-V module entity
-  //   (header, type, op, etc.). They update internal vectors containing
-  //   different binary sections. They are not meant to be called except the
-  //   top-level serialization loop.
-  // * prepare*() methods are meant to be helpers that prepare for serializing
-  //   certain entity. They may or may not update internal vectors containing
-  //   different binary sections. They are meant to be called among themselves
-  //   or by other process*() methods for subtasks.
-
-  //===--------------------------------------------------------------------===//
-  // <id>
-  //===--------------------------------------------------------------------===//
-
-  // Note that it is illegal to use id <0> in SPIR-V binary module. Various
-  // methods in this class, if using SPIR-V word (uint32_t) as interface,
-  // check or return id <0> to indicate error in processing.
-
-  /// Consumes the next unused <id>. This method will never return 0.
-  uint32_t getNextID() { return nextID++; }
-
-  //===--------------------------------------------------------------------===//
-  // Module structure
-  //===--------------------------------------------------------------------===//
-
-  uint32_t getSpecConstID(StringRef constName) const {
-    return specConstIDMap.lookup(constName);
-  }
-
-  uint32_t getVariableID(StringRef varName) const {
-    return globalVarIDMap.lookup(varName);
-  }
-
-  uint32_t getFunctionID(StringRef fnName) const {
-    return funcIDMap.lookup(fnName);
-  }
-
-  /// Gets the <id> for the function with the given name. Assigns the next
-  /// available <id> if the function haven't been deserialized.
-  uint32_t getOrCreateFunctionID(StringRef fnName);
-
-  void processCapability();
-
-  void processExtension();
-
-  void processMemoryModel();
-
-  LogicalResult processConstantOp(spirv::ConstantOp op);
-
-  LogicalResult processSpecConstantOp(spirv::SpecConstantOp op);
-
-  /// SPIR-V dialect supports OpUndef using spv.UndefOp that produces a SSA
-  /// value to use with other operations. The SPIR-V spec recommends that
-  /// OpUndef be generated at module level. The serialization generates an
-  /// OpUndef for each type needed at module level.
-  LogicalResult processUndefOp(spirv::UndefOp op);
-
-  /// Emit OpName for the given `resultID`.
-  LogicalResult processName(uint32_t resultID, StringRef name);
-
-  /// Processes a SPIR-V function op.
-  LogicalResult processFuncOp(FuncOp op);
-
-  LogicalResult processVariableOp(spirv::VariableOp op);
-
-  /// Process a SPIR-V GlobalVariableOp
-  LogicalResult processGlobalVariableOp(spirv::GlobalVariableOp varOp);
-
-  /// Process attributes that translate to decorations on the result <id>
-  LogicalResult processDecoration(Location loc, uint32_t resultID,
-                                  NamedAttribute attr);
-
-  template <typename DType>
-  LogicalResult processTypeDecoration(Location loc, DType type,
-                                      uint32_t resultId) {
-    return emitError(loc, "unhandled decoration for type:") << type;
-  }
-
-  /// Process member decoration
-  LogicalResult processMemberDecoration(uint32_t structID, uint32_t memberIndex,
-                                        spirv::Decoration decorationType,
-                                        ArrayRef<uint32_t> values = {});
-
-  //===--------------------------------------------------------------------===//
-  // Types
-  //===--------------------------------------------------------------------===//
-
-  uint32_t getTypeID(Type type) const { return typeIDMap.lookup(type); }
-
-  Type getVoidType() { return mlirBuilder.getNoneType(); }
-
-  bool isVoidType(Type type) const { return type.isa<NoneType>(); }
-
-  /// Returns true if the given type is a pointer type to a struct in Uniform or
-  /// StorageBuffer storage class.
-  bool isInterfaceStructPtrType(Type type) const;
-
-  /// Main dispatch method for serializing a type. The result <id> of the
-  /// serialized type will be returned as `typeID`.
-  LogicalResult processType(Location loc, Type type, uint32_t &typeID);
-
-  /// Method for preparing basic SPIR-V type serialization. Returns the type's
-  /// opcode and operands for the instruction via `typeEnum` and `operands`.
-  LogicalResult prepareBasicType(Location loc, Type type, uint32_t resultID,
-                                 spirv::Opcode &typeEnum,
-                                 SmallVectorImpl<uint32_t> &operands);
-
-  LogicalResult prepareFunctionType(Location loc, FunctionType type,
-                                    spirv::Opcode &typeEnum,
-                                    SmallVectorImpl<uint32_t> &operands);
-
-  //===--------------------------------------------------------------------===//
-  // Constant
-  //===--------------------------------------------------------------------===//
-
-  uint32_t getConstantID(Attribute value) const {
-    return constIDMap.lookup(value);
-  }
-
-  /// Main dispatch method for processing a constant with the given `constType`
-  /// and `valueAttr`. `constType` is needed here because we can interpret the
-  /// `valueAttr` as a different type than the type of `valueAttr` itself; for
-  /// example, ArrayAttr, whose type is NoneType, is used for spirv::ArrayType
-  /// constants.
-  uint32_t prepareConstant(Location loc, Type constType, Attribute valueAttr);
-
-  /// Prepares array attribute serialization. This method emits corresponding
-  /// OpConstant* and returns the result <id> associated with it. Returns 0 if
-  /// failed.
-  uint32_t prepareArrayConstant(Location loc, Type constType, ArrayAttr attr);
-
-  /// Prepares bool/int/float DenseElementsAttr serialization. This method
-  /// iterates the DenseElementsAttr to construct the constant array, and
-  /// returns the result <id>  associated with it. Returns 0 if failed. Note
-  /// that the size of `index` must match the rank.
-  /// TODO(hanchung): Consider to enhance splat elements cases. For splat cases,
-  /// we don't need to loop over all elements, especially when the splat value
-  /// is zero. We can use OpConstantNull when the value is zero.
-  uint32_t prepareDenseElementsConstant(Location loc, Type constType,
-                                        DenseElementsAttr valueAttr, int dim,
-                                        MutableArrayRef<uint64_t> index);
-
-  /// Prepares scalar attribute serialization. This method emits corresponding
-  /// OpConstant* and returns the result <id> associated with it. Returns 0 if
-  /// the attribute is not for a scalar bool/integer/float value. If `isSpec` is
-  /// true, then the constant will be serialized as a specialization constant.
-  uint32_t prepareConstantScalar(Location loc, Attribute valueAttr,
-                                 bool isSpec = false);
-
-  uint32_t prepareConstantBool(Location loc, BoolAttr boolAttr,
-                               bool isSpec = false);
-
-  uint32_t prepareConstantInt(Location loc, IntegerAttr intAttr,
-                              bool isSpec = false);
-
-  uint32_t prepareConstantFp(Location loc, FloatAttr floatAttr,
-                             bool isSpec = false);
-
-  //===--------------------------------------------------------------------===//
-  // Control flow
-  //===--------------------------------------------------------------------===//
-
-  /// Returns the result <id> for the given block.
-  uint32_t getBlockID(Block *block) const { return blockIDMap.lookup(block); }
-
-  /// Returns the result <id> for the given block. If no <id> has been assigned,
-  /// assigns the next available <id>
-  uint32_t getOrCreateBlockID(Block *block);
-
-  /// Processes the given `block` and emits SPIR-V instructions for all ops
-  /// inside. Does not emit OpLabel for this block if `omitLabel` is true.
-  /// `actionBeforeTerminator` is a callback that will be invoked before
-  /// handling the terminator op. It can be used to inject the Op*Merge
-  /// instruction if this is a SPIR-V selection/loop header block.
-  LogicalResult
-  processBlock(Block *block, bool omitLabel = false,
-               llvm::function_ref<void()> actionBeforeTerminator = nullptr);
-
-  /// Emits OpPhi instructions for the given block if it has block arguments.
-  LogicalResult emitPhiForBlockArguments(Block *block);
-
-  LogicalResult processSelectionOp(spirv::SelectionOp selectionOp);
-
-  LogicalResult processLoopOp(spirv::LoopOp loopOp);
-
-  LogicalResult processBranchConditionalOp(spirv::BranchConditionalOp);
-
-  LogicalResult processBranchOp(spirv::BranchOp branchOp);
-
-  //===--------------------------------------------------------------------===//
-  // Operations
-  //===--------------------------------------------------------------------===//
-
-  LogicalResult encodeExtensionInstruction(Operation *op,
-                                           StringRef extensionSetName,
-                                           uint32_t opcode,
-                                           ArrayRef<uint32_t> operands);
-
-  uint32_t getValueID(Value *val) const { return valueIDMap.lookup(val); }
-
-  LogicalResult processAddressOfOp(spirv::AddressOfOp addressOfOp);
-
-  LogicalResult processReferenceOfOp(spirv::ReferenceOfOp referenceOfOp);
-
-  /// Main dispatch method for serializing an operation.
-  LogicalResult processOperation(Operation *op);
-
-  /// Method to dispatch to the serialization function for an operation in
-  /// SPIR-V dialect that is a mirror of an instruction in the SPIR-V spec.
-  /// This is auto-generated from ODS. Dispatch is handled for all operations
-  /// in SPIR-V dialect that have hasOpcode == 1.
-  LogicalResult dispatchToAutogenSerialization(Operation *op);
-
-  /// Method to serialize an operation in the SPIR-V dialect that is a mirror of
-  /// an instruction in the SPIR-V spec. This is auto generated if hasOpcode ==
-  /// 1 and autogenSerialization == 1 in ODS.
-  template <typename OpTy> LogicalResult processOp(OpTy op) {
-    return op.emitError("unsupported op serialization");
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Utilities
-  //===--------------------------------------------------------------------===//
-
-  /// Emits an OpDecorate instruction to decorate the given `target` with the
-  /// given `decoration`.
-  LogicalResult emitDecoration(uint32_t target, spirv::Decoration decoration,
-                               ArrayRef<uint32_t> params = {});
-
-private:
-  /// The SPIR-V module to be serialized.
-  spirv::ModuleOp module;
-
-  /// An MLIR builder for getting MLIR constructs.
-  mlir::Builder mlirBuilder;
-
-  /// The next available result <id>.
-  uint32_t nextID = 1;
-
-  // The following are for different SPIR-V instruction sections. They follow
-  // the logical layout of a SPIR-V module.
-
-  SmallVector<uint32_t, 4> capabilities;
-  SmallVector<uint32_t, 0> extensions;
-  SmallVector<uint32_t, 0> extendedSets;
-  SmallVector<uint32_t, 3> memoryModel;
-  SmallVector<uint32_t, 0> entryPoints;
-  SmallVector<uint32_t, 4> executionModes;
-  // TODO(antiagainst): debug instructions
-  SmallVector<uint32_t, 0> names;
-  SmallVector<uint32_t, 0> decorations;
-  SmallVector<uint32_t, 0> typesGlobalValues;
-  SmallVector<uint32_t, 0> functions;
-
-  /// `functionHeader` contains all the instructions that must be in the first
-  /// block in the function, and `functionBody` contains the rest. After
-  /// processing FuncOp, the encoded instructions of a function are appended to
-  /// `functions`. An example of instructions in `functionHeader` in order:
-  /// OpFunction ...
-  /// OpFunctionParameter ...
-  /// OpFunctionParameter ...
-  /// OpLabel ...
-  /// OpVariable ...
-  /// OpVariable ...
-  SmallVector<uint32_t, 0> functionHeader;
-  SmallVector<uint32_t, 0> functionBody;
-
-  /// Map from type used in SPIR-V module to their <id>s.
-  DenseMap<Type, uint32_t> typeIDMap;
-
-  /// Map from constant values to their <id>s.
-  DenseMap<Attribute, uint32_t> constIDMap;
-
-  /// Map from specialization constant names to their <id>s.
-  llvm::StringMap<uint32_t> specConstIDMap;
-
-  /// Map from GlobalVariableOps name to <id>s.
-  llvm::StringMap<uint32_t> globalVarIDMap;
-
-  /// Map from FuncOps name to <id>s.
-  llvm::StringMap<uint32_t> funcIDMap;
-
-  /// Map from blocks to their <id>s.
-  DenseMap<Block *, uint32_t> blockIDMap;
-
-  /// Map from the Type to the <id> that represents undef value of that type.
-  DenseMap<Type, uint32_t> undefValIDMap;
-
-  /// Map from results of normal operations to their <id>s.
-  DenseMap<Value *, uint32_t> valueIDMap;
-
-  /// Map from extended instruction set name to <id>s.
-  llvm::StringMap<uint32_t> extendedInstSetIDMap;
-
-  /// Map from values used in OpPhi instructions to their offset in the
-  /// `functions` section.
-  ///
-  /// When processing a block with arguments, we need to emit OpPhi
-  /// instructions to record the predecessor block <id>s and the values they
-  /// send to the block in question. But it's not guaranteed all values are
-  /// visited and thus assigned result <id>s. So we need this list to capture
-  /// the offsets into `functions` where a value is used so that we can fix it
-  /// up later after processing all the blocks in a function.
-  ///
-  /// More concretely, say if we are visiting the following blocks:
-  ///
-  /// ```mlir
-  /// ^phi(%arg0: i32):
-  ///   ...
-  /// ^parent1:
-  ///   ...
-  ///   spv.Branch ^phi(%val0: i32)
-  /// ^parent2:
-  ///   ...
-  ///   spv.Branch ^phi(%val1: i32)
-  /// ```
-  ///
-  /// When we are serializing the `^phi` block, we need to emit at the beginning
-  /// of the block OpPhi instructions which has the following parameters:
-  ///
-  /// OpPhi id-for-i32 id-for-%arg0 id-for-%val0 id-for-^parent1
-  ///                               id-for-%val1 id-for-^parent2
-  ///
-  /// But we don't know the <id> for %val0 and %val1 yet. One way is to visit
-  /// all the blocks twice and use the first visit to assign an <id> to each
-  /// value. But it's paying the overheads just for OpPhi emission. Instead,
-  /// we still visit the blocks once for emission. When we emit the OpPhi
-  /// instructions, we use 0 as a placeholder for the <id>s for %val0 and %val1.
-  /// At the same time, we record their offsets in the emitted binary (which is
-  /// placed inside `functions`) here. And then after emitting all blocks, we
-  /// replace the dummy <id> 0 with the real result <id> by overwriting
-  /// `functions[offset]`.
-  DenseMap<Value *, llvm::SmallVector<size_t, 1>> deferredPhiValues;
-};
-} // namespace
-
-Serializer::Serializer(spirv::ModuleOp module)
-    : module(module), mlirBuilder(module.getContext()) {}
-
-LogicalResult Serializer::serialize() {
-  LLVM_DEBUG(llvm::dbgs() << "+++ starting serialization +++\n");
-
-  if (failed(module.verify()))
-    return failure();
-
-  // TODO(antiagainst): handle the other sections
-  processCapability();
-  processExtension();
-  processMemoryModel();
-
-  // Iterate over the module body to serialize it. Assumptions are that there is
-  // only one basic block in the moduleOp
-  for (auto &op : module.getBlock()) {
-    if (failed(processOperation(&op))) {
-      return failure();
-    }
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "+++ completed serialization +++\n");
-  return success();
-}
-
-void Serializer::collect(SmallVectorImpl<uint32_t> &binary) {
-  auto moduleSize = spirv::kHeaderWordCount + capabilities.size() +
-                    extensions.size() + extendedSets.size() +
-                    memoryModel.size() + entryPoints.size() +
-                    executionModes.size() + decorations.size() +
-                    typesGlobalValues.size() + functions.size();
-
-  binary.clear();
-  binary.reserve(moduleSize);
-
-  spirv::appendModuleHeader(binary, nextID);
-  binary.append(capabilities.begin(), capabilities.end());
-  binary.append(extensions.begin(), extensions.end());
-  binary.append(extendedSets.begin(), extendedSets.end());
-  binary.append(memoryModel.begin(), memoryModel.end());
-  binary.append(entryPoints.begin(), entryPoints.end());
-  binary.append(executionModes.begin(), executionModes.end());
-  binary.append(names.begin(), names.end());
-  binary.append(decorations.begin(), decorations.end());
-  binary.append(typesGlobalValues.begin(), typesGlobalValues.end());
-  binary.append(functions.begin(), functions.end());
-}
-
-void Serializer::printValueIDMap(raw_ostream &os) {
-  os << "\n= Value <id> Map =\n\n";
-  for (auto valueIDPair : valueIDMap) {
-    Value *val = valueIDPair.first;
-    os << "  " << val << " "
-       << "id = " << valueIDPair.second << ' ';
-    if (auto *op = val->getDefiningOp()) {
-      os << "from op '" << op->getName() << "'";
-    } else if (auto *arg = dyn_cast<BlockArgument>(val)) {
-      Block *block = arg->getOwner();
-      os << "from argument of block " << block << ' ';
-      os << " in op '" << block->getParentOp()->getName() << "'";
-    }
-    os << '\n';
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Module structure
-//===----------------------------------------------------------------------===//
-
-uint32_t Serializer::getOrCreateFunctionID(StringRef fnName) {
-  auto funcID = funcIDMap.lookup(fnName);
-  if (!funcID) {
-    funcID = getNextID();
-    funcIDMap[fnName] = funcID;
-  }
-  return funcID;
-}
-
-void Serializer::processCapability() {
-  auto caps = module.getAttrOfType<ArrayAttr>("capabilities");
-  if (!caps)
-    return;
-
-  for (auto cap : caps.getValue()) {
-    auto capStr = cap.cast<StringAttr>().getValue();
-    auto capVal = spirv::symbolizeCapability(capStr);
-    encodeInstructionInto(capabilities, spirv::Opcode::OpCapability,
-                          {static_cast<uint32_t>(*capVal)});
-  }
-}
-
-void Serializer::processExtension() {
-  auto exts = module.getAttrOfType<ArrayAttr>("extensions");
-  if (!exts)
-    return;
-
-  SmallVector<uint32_t, 16> extName;
-  for (auto ext : exts.getValue()) {
-    auto extStr = ext.cast<StringAttr>().getValue();
-    extName.clear();
-    spirv::encodeStringLiteralInto(extName, extStr);
-    encodeInstructionInto(extensions, spirv::Opcode::OpExtension, extName);
-  }
-}
-
-void Serializer::processMemoryModel() {
-  uint32_t mm = module.getAttrOfType<IntegerAttr>("memory_model").getInt();
-  uint32_t am = module.getAttrOfType<IntegerAttr>("addressing_model").getInt();
-
-  encodeInstructionInto(memoryModel, spirv::Opcode::OpMemoryModel, {am, mm});
-}
-
-LogicalResult Serializer::processConstantOp(spirv::ConstantOp op) {
-  if (auto resultID = prepareConstant(op.getLoc(), op.getType(), op.value())) {
-    valueIDMap[op.getResult()] = resultID;
-    return success();
-  }
-  return failure();
-}
-
-LogicalResult Serializer::processSpecConstantOp(spirv::SpecConstantOp op) {
-  if (auto resultID = prepareConstantScalar(op.getLoc(), op.default_value(),
-                                            /*isSpec=*/true)) {
-    // Emit the OpDecorate instruction for SpecId.
-    if (auto specID = op.getAttrOfType<IntegerAttr>("spec_id")) {
-      auto val = static_cast<uint32_t>(specID.getInt());
-      emitDecoration(resultID, spirv::Decoration::SpecId, {val});
-    }
-
-    specConstIDMap[op.sym_name()] = resultID;
-    return processName(resultID, op.sym_name());
-  }
-  return failure();
-}
-
-LogicalResult Serializer::processUndefOp(spirv::UndefOp op) {
-  auto undefType = op.getType();
-  auto &id = undefValIDMap[undefType];
-  if (!id) {
-    id = getNextID();
-    uint32_t typeID = 0;
-    if (failed(processType(op.getLoc(), undefType, typeID)) ||
-        failed(encodeInstructionInto(typesGlobalValues, spirv::Opcode::OpUndef,
-                                     {typeID, id}))) {
-      return failure();
-    }
-  }
-  valueIDMap[op.getResult()] = id;
-  return success();
-}
-
-LogicalResult Serializer::processDecoration(Location loc, uint32_t resultID,
-                                            NamedAttribute attr) {
-  auto attrName = attr.first.strref();
-  auto decorationName = mlir::convertToCamelCase(attrName, true);
-  auto decoration = spirv::symbolizeDecoration(decorationName);
-  if (!decoration) {
-    return emitError(
-               loc, "non-argument attributes expected to have snake-case-ified "
-                    "decoration name, unhandled attribute with name : ")
-           << attrName;
-  }
-  SmallVector<uint32_t, 1> args;
-  switch (decoration.getValue()) {
-  case spirv::Decoration::DescriptorSet:
-  case spirv::Decoration::Binding:
-    if (auto intAttr = attr.second.dyn_cast<IntegerAttr>()) {
-      args.push_back(intAttr.getValue().getZExtValue());
-      break;
-    }
-    return emitError(loc, "expected integer attribute for ") << attrName;
-  case spirv::Decoration::BuiltIn:
-    if (auto strAttr = attr.second.dyn_cast<StringAttr>()) {
-      auto enumVal = spirv::symbolizeBuiltIn(strAttr.getValue());
-      if (enumVal) {
-        args.push_back(static_cast<uint32_t>(enumVal.getValue()));
-        break;
-      }
-      return emitError(loc, "invalid ")
-             << attrName << " attribute " << strAttr.getValue();
-    }
-    return emitError(loc, "expected string attribute for ") << attrName;
-  default:
-    return emitError(loc, "unhandled decoration ") << decorationName;
-  }
-  return emitDecoration(resultID, decoration.getValue(), args);
-}
-
-LogicalResult Serializer::processName(uint32_t resultID, StringRef name) {
-  assert(!name.empty() && "unexpected empty string for OpName");
-
-  SmallVector<uint32_t, 4> nameOperands;
-  nameOperands.push_back(resultID);
-  if (failed(spirv::encodeStringLiteralInto(nameOperands, name))) {
-    return failure();
-  }
-  return encodeInstructionInto(names, spirv::Opcode::OpName, nameOperands);
-}
-
-namespace {
-template <>
-LogicalResult Serializer::processTypeDecoration<spirv::ArrayType>(
-    Location loc, spirv::ArrayType type, uint32_t resultID) {
-  if (type.hasLayout()) {
-    // OpDecorate %arrayTypeSSA ArrayStride strideLiteral
-    return emitDecoration(resultID, spirv::Decoration::ArrayStride,
-                          {static_cast<uint32_t>(type.getArrayStride())});
-  }
-  return success();
-}
-
-LogicalResult
-Serializer::processMemberDecoration(uint32_t structID, uint32_t memberIndex,
-                                    spirv::Decoration decorationType,
-                                    ArrayRef<uint32_t> values) {
-  SmallVector<uint32_t, 4> args(
-      {structID, memberIndex, static_cast<uint32_t>(decorationType)});
-  if (!values.empty()) {
-    args.append(values.begin(), values.end());
-  }
-  return encodeInstructionInto(decorations, spirv::Opcode::OpMemberDecorate,
-                               args);
-}
-} // namespace
-
-LogicalResult Serializer::processFuncOp(FuncOp op) {
-  LLVM_DEBUG(llvm::dbgs() << "-- start function '" << op.getName() << "' --\n");
-  assert(functionHeader.empty() && functionBody.empty());
-
-  uint32_t fnTypeID = 0;
-  // Generate type of the function.
-  processType(op.getLoc(), op.getType(), fnTypeID);
-
-  // Add the function definition.
-  SmallVector<uint32_t, 4> operands;
-  uint32_t resTypeID = 0;
-  auto resultTypes = op.getType().getResults();
-  if (resultTypes.size() > 1) {
-    return op.emitError("cannot serialize function with multiple return types");
-  }
-  if (failed(processType(op.getLoc(),
-                         (resultTypes.empty() ? getVoidType() : resultTypes[0]),
-                         resTypeID))) {
-    return failure();
-  }
-  operands.push_back(resTypeID);
-  auto funcID = getOrCreateFunctionID(op.getName());
-  operands.push_back(funcID);
-  // TODO : Support other function control options.
-  operands.push_back(static_cast<uint32_t>(spirv::FunctionControl::None));
-  operands.push_back(fnTypeID);
-  encodeInstructionInto(functionHeader, spirv::Opcode::OpFunction, operands);
-
-  // Add function name.
-  if (failed(processName(funcID, op.getName()))) {
-    return failure();
-  }
-
-  // Declare the parameters.
-  for (auto arg : op.getArguments()) {
-    uint32_t argTypeID = 0;
-    if (failed(processType(op.getLoc(), arg->getType(), argTypeID))) {
-      return failure();
-    }
-    auto argValueID = getNextID();
-    valueIDMap[arg] = argValueID;
-    encodeInstructionInto(functionHeader, spirv::Opcode::OpFunctionParameter,
-                          {argTypeID, argValueID});
-  }
-
-  // Process the body.
-  if (op.isExternal()) {
-    return op.emitError("external function is unhandled");
-  }
-
-  // Some instructions (e.g., OpVariable) in a function must be in the first
-  // block in the function. These instructions will be put in functionHeader.
-  // Thus, we put the label in functionHeader first, and omit it from the first
-  // block.
-  encodeInstructionInto(functionHeader, spirv::Opcode::OpLabel,
-                        {getOrCreateBlockID(&op.front())});
-  processBlock(&op.front(), /*omitLabel=*/true);
-  if (failed(visitInPrettyBlockOrder(
-          &op.front(), [&](Block *block) { return processBlock(block); },
-          /*skipHeader=*/true))) {
-    return failure();
-  }
-
-  // There might be OpPhi instructions who have value references needing to fix.
-  for (auto deferredValue : deferredPhiValues) {
-    Value *value = deferredValue.first;
-    uint32_t id = getValueID(value);
-    LLVM_DEBUG(llvm::dbgs() << "[phi] fix reference of value " << value
-                            << " to id = " << id << '\n');
-    assert(id && "OpPhi references undefined value!");
-    for (size_t offset : deferredValue.second)
-      functionBody[offset] = id;
-  }
-  deferredPhiValues.clear();
-
-  LLVM_DEBUG(llvm::dbgs() << "-- completed function '" << op.getName()
-                          << "' --\n");
-  // Insert OpFunctionEnd.
-  if (failed(encodeInstructionInto(functionBody, spirv::Opcode::OpFunctionEnd,
-                                   {}))) {
-    return failure();
-  }
-
-  functions.append(functionHeader.begin(), functionHeader.end());
-  functions.append(functionBody.begin(), functionBody.end());
-  functionHeader.clear();
-  functionBody.clear();
-
-  return success();
-}
-
-LogicalResult Serializer::processVariableOp(spirv::VariableOp op) {
-  SmallVector<uint32_t, 4> operands;
-  SmallVector<StringRef, 2> elidedAttrs;
-  uint32_t resultID = 0;
-  uint32_t resultTypeID = 0;
-  if (failed(processType(op.getLoc(), op.getType(), resultTypeID))) {
-    return failure();
-  }
-  operands.push_back(resultTypeID);
-  resultID = getNextID();
-  valueIDMap[op.getResult()] = resultID;
-  operands.push_back(resultID);
-  auto attr = op.getAttr(spirv::attributeName<spirv::StorageClass>());
-  if (attr) {
-    operands.push_back(static_cast<uint32_t>(
-        attr.cast<IntegerAttr>().getValue().getZExtValue()));
-  }
-  elidedAttrs.push_back(spirv::attributeName<spirv::StorageClass>());
-  for (auto arg : op.getODSOperands(0)) {
-    auto argID = getValueID(arg);
-    if (!argID) {
-      return emitError(op.getLoc(), "operand 0 has a use before def");
-    }
-    operands.push_back(argID);
-  }
-  encodeInstructionInto(functionHeader, spirv::getOpcode<spirv::VariableOp>(),
-                        operands);
-  for (auto attr : op.getAttrs()) {
-    if (llvm::any_of(elidedAttrs,
-                     [&](StringRef elided) { return attr.first.is(elided); })) {
-      continue;
-    }
-    if (failed(processDecoration(op.getLoc(), resultID, attr))) {
-      return failure();
-    }
-  }
-  return success();
-}
-
-LogicalResult
-Serializer::processGlobalVariableOp(spirv::GlobalVariableOp varOp) {
-  // Get TypeID.
-  uint32_t resultTypeID = 0;
-  SmallVector<StringRef, 4> elidedAttrs;
-  if (failed(processType(varOp.getLoc(), varOp.type(), resultTypeID))) {
-    return failure();
-  }
-
-  if (isInterfaceStructPtrType(varOp.type())) {
-    auto structType = varOp.type()
-                          .cast<spirv::PointerType>()
-                          .getPointeeType()
-                          .cast<spirv::StructType>();
-    if (failed(
-            emitDecoration(getTypeID(structType), spirv::Decoration::Block))) {
-      return varOp.emitError("cannot decorate ")
-             << structType << " with Block decoration";
-    }
-  }
-
-  elidedAttrs.push_back("type");
-  SmallVector<uint32_t, 4> operands;
-  operands.push_back(resultTypeID);
-  auto resultID = getNextID();
-
-  // Encode the name.
-  auto varName = varOp.sym_name();
-  elidedAttrs.push_back(SymbolTable::getSymbolAttrName());
-  if (failed(processName(resultID, varName))) {
-    return failure();
-  }
-  globalVarIDMap[varName] = resultID;
-  operands.push_back(resultID);
-
-  // Encode StorageClass.
-  operands.push_back(static_cast<uint32_t>(varOp.storageClass()));
-
-  // Encode initialization.
-  if (auto initializer = varOp.initializer()) {
-    auto initializerID = getVariableID(initializer.getValue());
-    if (!initializerID) {
-      return emitError(varOp.getLoc(),
-                       "invalid usage of undefined variable as initializer");
-    }
-    operands.push_back(initializerID);
-    elidedAttrs.push_back("initializer");
-  }
-
-  if (failed(encodeInstructionInto(typesGlobalValues, spirv::Opcode::OpVariable,
-                                   operands))) {
-    elidedAttrs.push_back("initializer");
-    return failure();
-  }
-
-  // Encode decorations.
-  for (auto attr : varOp.getAttrs()) {
-    if (llvm::any_of(elidedAttrs,
-                     [&](StringRef elided) { return attr.first.is(elided); })) {
-      continue;
-    }
-    if (failed(processDecoration(varOp.getLoc(), resultID, attr))) {
-      return failure();
-    }
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Type
-//===----------------------------------------------------------------------===//
-
-bool Serializer::isInterfaceStructPtrType(Type type) const {
-  if (auto ptrType = type.dyn_cast<spirv::PointerType>()) {
-    auto storageClass = ptrType.getStorageClass();
-    if (storageClass == spirv::StorageClass::Uniform ||
-        storageClass == spirv::StorageClass::StorageBuffer) {
-      return ptrType.getPointeeType().isa<spirv::StructType>();
-    }
-  }
-  return false;
-}
-
-LogicalResult Serializer::processType(Location loc, Type type,
-                                      uint32_t &typeID) {
-  typeID = getTypeID(type);
-  if (typeID) {
-    return success();
-  }
-  typeID = getNextID();
-  SmallVector<uint32_t, 4> operands;
-  operands.push_back(typeID);
-  auto typeEnum = spirv::Opcode::OpTypeVoid;
-  if ((type.isa<FunctionType>() &&
-       succeeded(prepareFunctionType(loc, type.cast<FunctionType>(), typeEnum,
-                                     operands))) ||
-      succeeded(prepareBasicType(loc, type, typeID, typeEnum, operands))) {
-    typeIDMap[type] = typeID;
-    return encodeInstructionInto(typesGlobalValues, typeEnum, operands);
-  }
-  return failure();
-}
-
-LogicalResult
-Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
-                             spirv::Opcode &typeEnum,
-                             SmallVectorImpl<uint32_t> &operands) {
-  if (isVoidType(type)) {
-    typeEnum = spirv::Opcode::OpTypeVoid;
-    return success();
-  }
-
-  if (auto intType = type.dyn_cast<IntegerType>()) {
-    if (intType.getWidth() == 1) {
-      typeEnum = spirv::Opcode::OpTypeBool;
-      return success();
-    }
-
-    typeEnum = spirv::Opcode::OpTypeInt;
-    operands.push_back(intType.getWidth());
-    // TODO(antiagainst): support unsigned integers
-    operands.push_back(1);
-    return success();
-  }
-
-  if (auto floatType = type.dyn_cast<FloatType>()) {
-    typeEnum = spirv::Opcode::OpTypeFloat;
-    operands.push_back(floatType.getWidth());
-    return success();
-  }
-
-  if (auto vectorType = type.dyn_cast<VectorType>()) {
-    uint32_t elementTypeID = 0;
-    if (failed(processType(loc, vectorType.getElementType(), elementTypeID))) {
-      return failure();
-    }
-    typeEnum = spirv::Opcode::OpTypeVector;
-    operands.push_back(elementTypeID);
-    operands.push_back(vectorType.getNumElements());
-    return success();
-  }
-
-  if (auto arrayType = type.dyn_cast<spirv::ArrayType>()) {
-    typeEnum = spirv::Opcode::OpTypeArray;
-    uint32_t elementTypeID = 0;
-    if (failed(processType(loc, arrayType.getElementType(), elementTypeID))) {
-      return failure();
-    }
-    operands.push_back(elementTypeID);
-    if (auto elementCountID = prepareConstantInt(
-            loc, mlirBuilder.getI32IntegerAttr(arrayType.getNumElements()))) {
-      operands.push_back(elementCountID);
-    }
-    return processTypeDecoration(loc, arrayType, resultID);
-  }
-
-  if (auto ptrType = type.dyn_cast<spirv::PointerType>()) {
-    uint32_t pointeeTypeID = 0;
-    if (failed(processType(loc, ptrType.getPointeeType(), pointeeTypeID))) {
-      return failure();
-    }
-    typeEnum = spirv::Opcode::OpTypePointer;
-    operands.push_back(static_cast<uint32_t>(ptrType.getStorageClass()));
-    operands.push_back(pointeeTypeID);
-    return success();
-  }
-
-  if (auto runtimeArrayType = type.dyn_cast<spirv::RuntimeArrayType>()) {
-    uint32_t elementTypeID = 0;
-    if (failed(processType(loc, runtimeArrayType.getElementType(),
-                           elementTypeID))) {
-      return failure();
-    }
-    operands.push_back(elementTypeID);
-    typeEnum = spirv::Opcode::OpTypeRuntimeArray;
-    return success();
-  }
-
-  if (auto structType = type.dyn_cast<spirv::StructType>()) {
-    bool hasLayout = structType.hasLayout();
-    for (auto elementIndex :
-         llvm::seq<uint32_t>(0, structType.getNumElements())) {
-      uint32_t elementTypeID = 0;
-      if (failed(processType(loc, structType.getElementType(elementIndex),
-                             elementTypeID))) {
-        return failure();
-      }
-      operands.push_back(elementTypeID);
-      if (hasLayout) {
-        // Decorate each struct member with an offset
-        if (failed(processMemberDecoration(
-                resultID, elementIndex, spirv::Decoration::Offset,
-                static_cast<uint32_t>(structType.getOffset(elementIndex))))) {
-          return emitError(loc, "cannot decorate ")
-                 << elementIndex << "-th member of " << structType
-                 << " with its offset";
-        }
-      }
-    }
-    SmallVector<spirv::StructType::MemberDecorationInfo, 4> memberDecorations;
-    structType.getMemberDecorations(memberDecorations);
-    for (auto &memberDecoration : memberDecorations) {
-      if (failed(processMemberDecoration(resultID, memberDecoration.first,
-                                         memberDecoration.second))) {
-        return emitError(loc, "cannot decorate ")
-               << memberDecoration.first << "-th member of " << structType
-               << " with " << stringifyDecoration(memberDecoration.second);
-      }
-    }
-    typeEnum = spirv::Opcode::OpTypeStruct;
-    return success();
-  }
-
-  // TODO(ravishankarm) : Handle other types.
-  return emitError(loc, "unhandled type in serialization: ") << type;
-}
-
-LogicalResult
-Serializer::prepareFunctionType(Location loc, FunctionType type,
-                                spirv::Opcode &typeEnum,
-                                SmallVectorImpl<uint32_t> &operands) {
-  typeEnum = spirv::Opcode::OpTypeFunction;
-  assert(type.getNumResults() <= 1 &&
-         "serialization supports only a single return value");
-  uint32_t resultID = 0;
-  if (failed(processType(
-          loc, type.getNumResults() == 1 ? type.getResult(0) : getVoidType(),
-          resultID))) {
-    return failure();
-  }
-  operands.push_back(resultID);
-  for (auto &res : type.getInputs()) {
-    uint32_t argTypeID = 0;
-    if (failed(processType(loc, res, argTypeID))) {
-      return failure();
-    }
-    operands.push_back(argTypeID);
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Constant
-//===----------------------------------------------------------------------===//
-
-uint32_t Serializer::prepareConstant(Location loc, Type constType,
-                                     Attribute valueAttr) {
-  if (auto id = prepareConstantScalar(loc, valueAttr)) {
-    return id;
-  }
-
-  // This is a composite literal. We need to handle each component separately
-  // and then emit an OpConstantComposite for the whole.
-
-  if (auto id = getConstantID(valueAttr)) {
-    return id;
-  }
-
-  uint32_t typeID = 0;
-  if (failed(processType(loc, constType, typeID))) {
-    return 0;
-  }
-
-  uint32_t resultID = 0;
-  if (auto attr = valueAttr.dyn_cast<DenseElementsAttr>()) {
-    int rank = attr.getType().dyn_cast<ShapedType>().getRank();
-    SmallVector<uint64_t, 4> index(rank);
-    resultID = prepareDenseElementsConstant(loc, constType, attr,
-                                            /*dim=*/0, index);
-  } else if (auto arrayAttr = valueAttr.dyn_cast<ArrayAttr>()) {
-    resultID = prepareArrayConstant(loc, constType, arrayAttr);
-  }
-
-  if (resultID == 0) {
-    emitError(loc, "cannot serialize attribute: ") << valueAttr;
-    return 0;
-  }
-
-  constIDMap[valueAttr] = resultID;
-  return resultID;
-}
-
-uint32_t Serializer::prepareArrayConstant(Location loc, Type constType,
-                                          ArrayAttr attr) {
-  uint32_t typeID = 0;
-  if (failed(processType(loc, constType, typeID))) {
-    return 0;
-  }
-
-  uint32_t resultID = getNextID();
-  SmallVector<uint32_t, 4> operands = {typeID, resultID};
-  operands.reserve(attr.size() + 2);
-  auto elementType = constType.cast<spirv::ArrayType>().getElementType();
-  for (Attribute elementAttr : attr) {
-    if (auto elementID = prepareConstant(loc, elementType, elementAttr)) {
-      operands.push_back(elementID);
-    } else {
-      return 0;
-    }
-  }
-  spirv::Opcode opcode = spirv::Opcode::OpConstantComposite;
-  encodeInstructionInto(typesGlobalValues, opcode, operands);
-
-  return resultID;
-}
-
-// TODO(hanchung): Turn the below function into iterative function, instead of
-// recursive function.
-uint32_t
-Serializer::prepareDenseElementsConstant(Location loc, Type constType,
-                                         DenseElementsAttr valueAttr, int dim,
-                                         MutableArrayRef<uint64_t> index) {
-  auto shapedType = valueAttr.getType().dyn_cast<ShapedType>();
-  assert(dim <= shapedType.getRank());
-  if (shapedType.getRank() == dim) {
-    if (auto attr = valueAttr.dyn_cast<DenseIntElementsAttr>()) {
-      return attr.getType().getElementType().isInteger(1)
-                 ? prepareConstantBool(loc, attr.getValue<BoolAttr>(index))
-                 : prepareConstantInt(loc, attr.getValue<IntegerAttr>(index));
-    }
-    if (auto attr = valueAttr.dyn_cast<DenseFPElementsAttr>()) {
-      return prepareConstantFp(loc, attr.getValue<FloatAttr>(index));
-    }
-    return 0;
-  }
-
-  uint32_t typeID = 0;
-  if (failed(processType(loc, constType, typeID))) {
-    return 0;
-  }
-
-  uint32_t resultID = getNextID();
-  SmallVector<uint32_t, 4> operands = {typeID, resultID};
-  operands.reserve(shapedType.getDimSize(dim) + 2);
-  auto elementType = constType.cast<spirv::CompositeType>().getElementType(0);
-  for (int i = 0; i < shapedType.getDimSize(dim); ++i) {
-    index[dim] = i;
-    if (auto elementID = prepareDenseElementsConstant(
-            loc, elementType, valueAttr, dim + 1, index)) {
-      operands.push_back(elementID);
-    } else {
-      return 0;
-    }
-  }
-  spirv::Opcode opcode = spirv::Opcode::OpConstantComposite;
-  encodeInstructionInto(typesGlobalValues, opcode, operands);
-
-  return resultID;
-}
-
-uint32_t Serializer::prepareConstantScalar(Location loc, Attribute valueAttr,
-                                           bool isSpec) {
-  if (auto floatAttr = valueAttr.dyn_cast<FloatAttr>()) {
-    return prepareConstantFp(loc, floatAttr, isSpec);
-  }
-  if (auto intAttr = valueAttr.dyn_cast<IntegerAttr>()) {
-    return prepareConstantInt(loc, intAttr, isSpec);
-  }
-  if (auto boolAttr = valueAttr.dyn_cast<BoolAttr>()) {
-    return prepareConstantBool(loc, boolAttr, isSpec);
-  }
-
-  return 0;
-}
-
-uint32_t Serializer::prepareConstantBool(Location loc, BoolAttr boolAttr,
-                                         bool isSpec) {
-  if (!isSpec) {
-    // We can de-duplicate normal constants, but not specialization constants.
-    if (auto id = getConstantID(boolAttr)) {
-      return id;
-    }
-  }
-
-  // Process the type for this bool literal
-  uint32_t typeID = 0;
-  if (failed(processType(loc, boolAttr.getType(), typeID))) {
-    return 0;
-  }
-
-  auto resultID = getNextID();
-  auto opcode = boolAttr.getValue()
-                    ? (isSpec ? spirv::Opcode::OpSpecConstantTrue
-                              : spirv::Opcode::OpConstantTrue)
-                    : (isSpec ? spirv::Opcode::OpSpecConstantFalse
-                              : spirv::Opcode::OpConstantFalse);
-  encodeInstructionInto(typesGlobalValues, opcode, {typeID, resultID});
-
-  if (!isSpec) {
-    constIDMap[boolAttr] = resultID;
-  }
-  return resultID;
-}
-
-uint32_t Serializer::prepareConstantInt(Location loc, IntegerAttr intAttr,
-                                        bool isSpec) {
-  if (!isSpec) {
-    // We can de-duplicate normal constants, but not specialization constants.
-    if (auto id = getConstantID(intAttr)) {
-      return id;
-    }
-  }
-
-  // Process the type for this integer literal
-  uint32_t typeID = 0;
-  if (failed(processType(loc, intAttr.getType(), typeID))) {
-    return 0;
-  }
-
-  auto resultID = getNextID();
-  APInt value = intAttr.getValue();
-  unsigned bitwidth = value.getBitWidth();
-  bool isSigned = value.isSignedIntN(bitwidth);
-
-  auto opcode =
-      isSpec ? spirv::Opcode::OpSpecConstant : spirv::Opcode::OpConstant;
-
-  // According to SPIR-V spec, "When the type's bit width is less than 32-bits,
-  // the literal's value appears in the low-order bits of the word, and the
-  // high-order bits must be 0 for a floating-point type, or 0 for an integer
-  // type with Signedness of 0, or sign extended when Signedness is 1."
-  if (bitwidth == 32 || bitwidth == 16) {
-    uint32_t word = 0;
-    if (isSigned) {
-      word = static_cast<int32_t>(value.getSExtValue());
-    } else {
-      word = static_cast<uint32_t>(value.getZExtValue());
-    }
-    encodeInstructionInto(typesGlobalValues, opcode, {typeID, resultID, word});
-  }
-  // According to SPIR-V spec: "When the type's bit width is larger than one
-  // word, the literal’s low-order words appear first."
-  else if (bitwidth == 64) {
-    struct DoubleWord {
-      uint32_t word1;
-      uint32_t word2;
-    } words;
-    if (isSigned) {
-      words = llvm::bit_cast<DoubleWord>(value.getSExtValue());
-    } else {
-      words = llvm::bit_cast<DoubleWord>(value.getZExtValue());
-    }
-    encodeInstructionInto(typesGlobalValues, opcode,
-                          {typeID, resultID, words.word1, words.word2});
-  } else {
-    std::string valueStr;
-    llvm::raw_string_ostream rss(valueStr);
-    value.print(rss, /*isSigned=*/false);
-
-    emitError(loc, "cannot serialize ")
-        << bitwidth << "-bit integer literal: " << rss.str();
-    return 0;
-  }
-
-  if (!isSpec) {
-    constIDMap[intAttr] = resultID;
-  }
-  return resultID;
-}
-
-uint32_t Serializer::prepareConstantFp(Location loc, FloatAttr floatAttr,
-                                       bool isSpec) {
-  if (!isSpec) {
-    // We can de-duplicate normal constants, but not specialization constants.
-    if (auto id = getConstantID(floatAttr)) {
-      return id;
-    }
-  }
-
-  // Process the type for this float literal
-  uint32_t typeID = 0;
-  if (failed(processType(loc, floatAttr.getType(), typeID))) {
-    return 0;
-  }
-
-  auto resultID = getNextID();
-  APFloat value = floatAttr.getValue();
-  APInt intValue = value.bitcastToAPInt();
-
-  auto opcode =
-      isSpec ? spirv::Opcode::OpSpecConstant : spirv::Opcode::OpConstant;
-
-  if (&value.getSemantics() == &APFloat::IEEEsingle()) {
-    uint32_t word = llvm::bit_cast<uint32_t>(value.convertToFloat());
-    encodeInstructionInto(typesGlobalValues, opcode, {typeID, resultID, word});
-  } else if (&value.getSemantics() == &APFloat::IEEEdouble()) {
-    struct DoubleWord {
-      uint32_t word1;
-      uint32_t word2;
-    } words = llvm::bit_cast<DoubleWord>(value.convertToDouble());
-    encodeInstructionInto(typesGlobalValues, opcode,
-                          {typeID, resultID, words.word1, words.word2});
-  } else if (&value.getSemantics() == &APFloat::IEEEhalf()) {
-    uint32_t word =
-        static_cast<uint32_t>(value.bitcastToAPInt().getZExtValue());
-    encodeInstructionInto(typesGlobalValues, opcode, {typeID, resultID, word});
-  } else {
-    std::string valueStr;
-    llvm::raw_string_ostream rss(valueStr);
-    value.print(rss);
-
-    emitError(loc, "cannot serialize ")
-        << floatAttr.getType() << "-typed float literal: " << rss.str();
-    return 0;
-  }
-
-  if (!isSpec) {
-    constIDMap[floatAttr] = resultID;
-  }
-  return resultID;
-}
-
-//===----------------------------------------------------------------------===//
-// Control flow
-//===----------------------------------------------------------------------===//
-
-uint32_t Serializer::getOrCreateBlockID(Block *block) {
-  if (uint32_t id = getBlockID(block))
-    return id;
-  return blockIDMap[block] = getNextID();
-}
-
-LogicalResult
-Serializer::processBlock(Block *block, bool omitLabel,
-                         llvm::function_ref<void()> actionBeforeTerminator) {
-  LLVM_DEBUG(llvm::dbgs() << "processing block " << block << ":\n");
-  LLVM_DEBUG(block->print(llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << '\n');
-  if (!omitLabel) {
-    uint32_t blockID = getOrCreateBlockID(block);
-    LLVM_DEBUG(llvm::dbgs()
-               << "[block] " << block << " (id = " << blockID << ")\n");
-
-    // Emit OpLabel for this block.
-    encodeInstructionInto(functionBody, spirv::Opcode::OpLabel, {blockID});
-  }
-
-  // Emit OpPhi instructions for block arguments, if any.
-  if (failed(emitPhiForBlockArguments(block)))
-    return failure();
-
-  // Process each op in this block except the terminator.
-  for (auto &op : llvm::make_range(block->begin(), std::prev(block->end()))) {
-    if (failed(processOperation(&op)))
-      return failure();
-  }
-
-  // Process the terminator.
-  if (actionBeforeTerminator)
-    actionBeforeTerminator();
-  if (failed(processOperation(&block->back())))
-    return failure();
-
-  return success();
-}
-
-LogicalResult Serializer::emitPhiForBlockArguments(Block *block) {
-  // Nothing to do if this block has no arguments or it's the entry block, which
-  // always has the same arguments as the function signature.
-  if (block->args_empty() || block->isEntryBlock())
-    return success();
-
-  // If the block has arguments, we need to create SPIR-V OpPhi instructions.
-  // A SPIR-V OpPhi instruction is of the syntax:
-  //   OpPhi | result type | result <id> | (value <id>, parent block <id>) pair
-  // So we need to collect all predecessor blocks and the arguments they send
-  // to this block.
-  SmallVector<std::pair<Block *, Operation::operand_iterator>, 4> predecessors;
-  for (Block *predecessor : block->getPredecessors()) {
-    auto *terminator = predecessor->getTerminator();
-    // Check whether this predecessor block contains a structured control flow
-    // op. If so, the structured control flow op will be serialized to multiple
-    // SPIR-V blocks. The branch op jumping to the OpPhi's block then resides in
-    // the last structured control flow op's merge block.
-    if (auto *merge = getLastStructuredControlFlowOpMergeBlock(predecessor))
-      predecessor = merge;
-    if (auto branchOp = dyn_cast<spirv::BranchOp>(terminator)) {
-      predecessors.emplace_back(predecessor, branchOp.operand_begin());
-    } else {
-      return terminator->emitError("unimplemented terminator for Phi creation");
-    }
-  }
-
-  // Then create OpPhi instruction for each of the block argument.
-  for (auto argIndex : llvm::seq<unsigned>(0, block->getNumArguments())) {
-    BlockArgument *arg = block->getArgument(argIndex);
-
-    // Get the type <id> and result <id> for this OpPhi instruction.
-    uint32_t phiTypeID = 0;
-    if (failed(processType(arg->getLoc(), arg->getType(), phiTypeID)))
-      return failure();
-    uint32_t phiID = getNextID();
-
-    LLVM_DEBUG(llvm::dbgs() << "[phi] for block argument #" << argIndex << ' '
-                            << arg << " (id = " << phiID << ")\n");
-
-    SmallVector<uint32_t, 8> phiArgs;
-    phiArgs.push_back(phiTypeID);
-    phiArgs.push_back(phiID);
-
-    for (auto predIndex : llvm::seq<unsigned>(0, predecessors.size())) {
-      Value *value = *(predecessors[predIndex].second + argIndex);
-      uint32_t predBlockId = getOrCreateBlockID(predecessors[predIndex].first);
-      LLVM_DEBUG(llvm::dbgs() << "[phi] use predecessor (id = " << predBlockId
-                              << ") value " << value << ' ');
-      // Each pair is a value <id> ...
-      uint32_t valueId = getValueID(value);
-      if (valueId == 0) {
-        // The op generating this value hasn't been visited yet so we don't have
-        // an <id> assigned yet. Record this to fix up later.
-        LLVM_DEBUG(llvm::dbgs() << "(need to fix)\n");
-        deferredPhiValues[value].push_back(functionBody.size() + 1 +
-                                           phiArgs.size());
-      } else {
-        LLVM_DEBUG(llvm::dbgs() << "(id = " << valueId << ")\n");
-      }
-      phiArgs.push_back(valueId);
-      // ... and a parent block <id>.
-      phiArgs.push_back(predBlockId);
-    }
-
-    encodeInstructionInto(functionBody, spirv::Opcode::OpPhi, phiArgs);
-    valueIDMap[arg] = phiID;
-  }
-
-  return success();
-}
-
-LogicalResult Serializer::processSelectionOp(spirv::SelectionOp selectionOp) {
-  // Assign <id>s to all blocks so that branches inside the SelectionOp can
-  // resolve properly.
-  auto &body = selectionOp.body();
-  for (Block &block : body)
-    getOrCreateBlockID(&block);
-
-  auto *headerBlock = selectionOp.getHeaderBlock();
-  auto *mergeBlock = selectionOp.getMergeBlock();
-  auto mergeID = getBlockID(mergeBlock);
-
-  // Emit the selection header block, which dominates all other blocks, first.
-  // We need to emit an OpSelectionMerge instruction before the loop header
-  // block's terminator.
-  auto emitSelectionMerge = [&]() {
-    // TODO(antiagainst): properly support loop control here
-    encodeInstructionInto(
-        functionBody, spirv::Opcode::OpSelectionMerge,
-        {mergeID, static_cast<uint32_t>(spirv::LoopControl::None)});
-  };
-  // For structured selection, we cannot have blocks in the selection construct
-  // branching to the selection header block. Entering the selection (and
-  // reaching the selection header) must be from the block containing the
-  // spv.selection op. If there are ops ahead of the spv.selection op in the
-  // block, we can "merge" them into the selection header. So here we don't need
-  // to emit a separate block; just continue with the existing block.
-  if (failed(processBlock(headerBlock, /*omitLabel=*/true, emitSelectionMerge)))
-    return failure();
-
-  // Process all blocks with a depth-first visitor starting from the header
-  // block. The selection header block and merge block are skipped by this
-  // visitor.
-  if (failed(visitInPrettyBlockOrder(
-          headerBlock, [&](Block *block) { return processBlock(block); },
-          /*skipHeader=*/true, /*skipBlocks=*/{mergeBlock})))
-    return failure();
-
-  // There is nothing to do for the merge block in the selection, which just
-  // contains a spv._merge op, itself. But we need to have an OpLabel
-  // instruction to start a new SPIR-V block for ops following this SelectionOp.
-  // The block should use the <id> for the merge block.
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpLabel, {mergeID});
-}
-
-LogicalResult Serializer::processLoopOp(spirv::LoopOp loopOp) {
-  // Assign <id>s to all blocks so that branches inside the LoopOp can resolve
-  // properly. We don't need to assign for the entry block, which is just for
-  // satisfying MLIR region's structural requirement.
-  auto &body = loopOp.body();
-  for (Block &block :
-       llvm::make_range(std::next(body.begin(), 1), body.end())) {
-    getOrCreateBlockID(&block);
-  }
-  auto *headerBlock = loopOp.getHeaderBlock();
-  auto *continueBlock = loopOp.getContinueBlock();
-  auto *mergeBlock = loopOp.getMergeBlock();
-  auto headerID = getBlockID(headerBlock);
-  auto continueID = getBlockID(continueBlock);
-  auto mergeID = getBlockID(mergeBlock);
-
-  // This LoopOp is in some MLIR block with preceding and following ops. In the
-  // binary format, it should reside in separate SPIR-V blocks from its
-  // preceding and following ops. So we need to emit unconditional branches to
-  // jump to this LoopOp's SPIR-V blocks and jumping back to the normal flow
-  // afterwards.
-  encodeInstructionInto(functionBody, spirv::Opcode::OpBranch, {headerID});
-
-  // We omit the LoopOp's entry block and start serialization from the loop
-  // header block. The entry block should not contain any additional ops other
-  // than a single spv.Branch that jumps to the loop header block. However,
-  // the spv.Branch can contain additional block arguments. Those block
-  // arguments must come from out of the loop using implicit capture. We will
-  // need to query the <id> for the value sent and the <id> for the incoming
-  // parent block. For the latter, we need to make sure this block is
-  // registered. The value sent should come from the block this loop resides in.
-  blockIDMap[loopOp.getEntryBlock()] =
-      getBlockID(loopOp.getOperation()->getBlock());
-
-  // Emit the loop header block, which dominates all other blocks, first. We
-  // need to emit an OpLoopMerge instruction before the loop header block's
-  // terminator.
-  auto emitLoopMerge = [&]() {
-    // TODO(antiagainst): properly support loop control here
-    encodeInstructionInto(
-        functionBody, spirv::Opcode::OpLoopMerge,
-        {mergeID, continueID, static_cast<uint32_t>(spirv::LoopControl::None)});
-  };
-  if (failed(processBlock(headerBlock, /*omitLabel=*/false, emitLoopMerge)))
-    return failure();
-
-  // Process all blocks with a depth-first visitor starting from the header
-  // block. The loop header block, loop continue block, and loop merge block are
-  // skipped by this visitor and handled later in this function.
-  if (failed(visitInPrettyBlockOrder(
-          headerBlock, [&](Block *block) { return processBlock(block); },
-          /*skipHeader=*/true, /*skipBlocks=*/{continueBlock, mergeBlock})))
-    return failure();
-
-  // We have handled all other blocks. Now get to the loop continue block.
-  if (failed(processBlock(continueBlock)))
-    return failure();
-
-  // There is nothing to do for the merge block in the loop, which just contains
-  // a spv._merge op, itself. But we need to have an OpLabel instruction to
-  // start a new SPIR-V block for ops following this LoopOp. The block should
-  // use the <id> for the merge block.
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpLabel, {mergeID});
-}
-
-LogicalResult Serializer::processBranchConditionalOp(
-    spirv::BranchConditionalOp condBranchOp) {
-  auto conditionID = getValueID(condBranchOp.condition());
-  auto trueLabelID = getOrCreateBlockID(condBranchOp.getTrueBlock());
-  auto falseLabelID = getOrCreateBlockID(condBranchOp.getFalseBlock());
-  SmallVector<uint32_t, 5> arguments{conditionID, trueLabelID, falseLabelID};
-
-  if (auto weights = condBranchOp.branch_weights()) {
-    for (auto val : weights->getValue())
-      arguments.push_back(val.cast<IntegerAttr>().getInt());
-  }
-
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpBranchConditional,
-                               arguments);
-}
-
-LogicalResult Serializer::processBranchOp(spirv::BranchOp branchOp) {
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpBranch,
-                               {getOrCreateBlockID(branchOp.getTarget())});
-}
-
-//===----------------------------------------------------------------------===//
-// Operation
-//===----------------------------------------------------------------------===//
-
-LogicalResult Serializer::encodeExtensionInstruction(
-    Operation *op, StringRef extensionSetName, uint32_t extensionOpcode,
-    ArrayRef<uint32_t> operands) {
-  // Check if the extension has been imported.
-  auto &setID = extendedInstSetIDMap[extensionSetName];
-  if (!setID) {
-    setID = getNextID();
-    SmallVector<uint32_t, 16> importOperands;
-    importOperands.push_back(setID);
-    if (failed(
-            spirv::encodeStringLiteralInto(importOperands, extensionSetName)) ||
-        failed(encodeInstructionInto(
-            extendedSets, spirv::Opcode::OpExtInstImport, importOperands))) {
-      return failure();
-    }
-  }
-
-  // The first two operands are the result type <id> and result <id>. The set
-  // <id> and the opcode need to be insert after this.
-  if (operands.size() < 2) {
-    return op->emitError("extended instructions must have a result encoding");
-  }
-  SmallVector<uint32_t, 8> extInstOperands;
-  extInstOperands.reserve(operands.size() + 2);
-  extInstOperands.append(operands.begin(), std::next(operands.begin(), 2));
-  extInstOperands.push_back(setID);
-  extInstOperands.push_back(extensionOpcode);
-  extInstOperands.append(std::next(operands.begin(), 2), operands.end());
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpExtInst,
-                               extInstOperands);
-}
-
-LogicalResult Serializer::processAddressOfOp(spirv::AddressOfOp addressOfOp) {
-  auto varName = addressOfOp.variable();
-  auto variableID = getVariableID(varName);
-  if (!variableID) {
-    return addressOfOp.emitError("unknown result <id> for variable ")
-           << varName;
-  }
-  valueIDMap[addressOfOp.pointer()] = variableID;
-  return success();
-}
-
-LogicalResult
-Serializer::processReferenceOfOp(spirv::ReferenceOfOp referenceOfOp) {
-  auto constName = referenceOfOp.spec_const();
-  auto constID = getSpecConstID(constName);
-  if (!constID) {
-    return referenceOfOp.emitError(
-               "unknown result <id> for specialization constant ")
-           << constName;
-  }
-  valueIDMap[referenceOfOp.reference()] = constID;
-  return success();
-}
-
-LogicalResult Serializer::processOperation(Operation *op) {
-  LLVM_DEBUG(llvm::dbgs() << "[op] '" << op->getName() << "'\n");
-
-  // First dispatch the ops that do not directly mirror an instruction from
-  // the SPIR-V spec.
-  if (auto addressOfOp = dyn_cast<spirv::AddressOfOp>(op)) {
-    return processAddressOfOp(addressOfOp);
-  }
-  if (auto branchOp = dyn_cast<spirv::BranchOp>(op)) {
-    return processBranchOp(branchOp);
-  }
-  if (auto condBranchOp = dyn_cast<spirv::BranchConditionalOp>(op)) {
-    return processBranchConditionalOp(condBranchOp);
-  }
-  if (auto constOp = dyn_cast<spirv::ConstantOp>(op)) {
-    return processConstantOp(constOp);
-  }
-  if (auto fnOp = dyn_cast<FuncOp>(op)) {
-    return processFuncOp(fnOp);
-  }
-  if (auto varOp = dyn_cast<spirv::VariableOp>(op)) {
-    return processVariableOp(varOp);
-  }
-  if (auto varOp = dyn_cast<spirv::GlobalVariableOp>(op)) {
-    return processGlobalVariableOp(varOp);
-  }
-  if (auto selectionOp = dyn_cast<spirv::SelectionOp>(op)) {
-    return processSelectionOp(selectionOp);
-  }
-  if (auto loopOp = dyn_cast<spirv::LoopOp>(op)) {
-    return processLoopOp(loopOp);
-  }
-  if (isa<spirv::ModuleEndOp>(op)) {
-    return success();
-  }
-  if (auto refOpOp = dyn_cast<spirv::ReferenceOfOp>(op)) {
-    return processReferenceOfOp(refOpOp);
-  }
-  if (auto specConstOp = dyn_cast<spirv::SpecConstantOp>(op)) {
-    return processSpecConstantOp(specConstOp);
-  }
-  if (auto undefOp = dyn_cast<spirv::UndefOp>(op)) {
-    return processUndefOp(undefOp);
-  }
-
-  // Then handle all the ops that directly mirror SPIR-V instructions with
-  // auto-generated methods.
-  return dispatchToAutogenSerialization(op);
-}
-
-namespace {
-template <>
-LogicalResult
-Serializer::processOp<spirv::EntryPointOp>(spirv::EntryPointOp op) {
-  SmallVector<uint32_t, 4> operands;
-  // Add the ExecutionModel.
-  operands.push_back(static_cast<uint32_t>(op.execution_model()));
-  // Add the function <id>.
-  auto funcID = getFunctionID(op.fn());
-  if (!funcID) {
-    return op.emitError("missing <id> for function ")
-           << op.fn()
-           << "; function needs to be defined before spv.EntryPoint is "
-              "serialized";
-  }
-  operands.push_back(funcID);
-  // Add the name of the function.
-  spirv::encodeStringLiteralInto(operands, op.fn());
-
-  // Add the interface values.
-  if (auto interface = op.interface()) {
-    for (auto var : interface.getValue()) {
-      auto id = getVariableID(var.cast<FlatSymbolRefAttr>().getValue());
-      if (!id) {
-        return op.emitError("referencing undefined global variable."
-                            "spv.EntryPoint is at the end of spv.module. All "
-                            "referenced variables should already be defined");
-      }
-      operands.push_back(id);
-    }
-  }
-  return encodeInstructionInto(entryPoints, spirv::Opcode::OpEntryPoint,
-                               operands);
-}
-
-template <>
-LogicalResult
-Serializer::processOp<spirv::ControlBarrierOp>(spirv::ControlBarrierOp op) {
-  StringRef argNames[] = {"execution_scope", "memory_scope",
-                          "memory_semantics"};
-  SmallVector<uint32_t, 3> operands;
-
-  for (auto argName : argNames) {
-    auto argIntAttr = op.getAttrOfType<IntegerAttr>(argName);
-    auto operand = prepareConstantInt(op.getLoc(), argIntAttr);
-    if (!operand) {
-      return failure();
-    }
-    operands.push_back(operand);
-  }
-
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpControlBarrier,
-                               operands);
-}
-
-template <>
-LogicalResult
-Serializer::processOp<spirv::ExecutionModeOp>(spirv::ExecutionModeOp op) {
-  SmallVector<uint32_t, 4> operands;
-  // Add the function <id>.
-  auto funcID = getFunctionID(op.fn());
-  if (!funcID) {
-    return op.emitError("missing <id> for function ")
-           << op.fn()
-           << "; function needs to be serialized before ExecutionModeOp is "
-              "serialized";
-  }
-  operands.push_back(funcID);
-  // Add the ExecutionMode.
-  operands.push_back(static_cast<uint32_t>(op.execution_mode()));
-
-  // Serialize values if any.
-  auto values = op.values();
-  if (values) {
-    for (auto &intVal : values.getValue()) {
-      operands.push_back(static_cast<uint32_t>(
-          intVal.cast<IntegerAttr>().getValue().getZExtValue()));
-    }
-  }
-  return encodeInstructionInto(executionModes, spirv::Opcode::OpExecutionMode,
-                               operands);
-}
-
-template <>
-LogicalResult
-Serializer::processOp<spirv::MemoryBarrierOp>(spirv::MemoryBarrierOp op) {
-  StringRef argNames[] = {"memory_scope", "memory_semantics"};
-  SmallVector<uint32_t, 2> operands;
-
-  for (auto argName : argNames) {
-    auto argIntAttr = op.getAttrOfType<IntegerAttr>(argName);
-    auto operand = prepareConstantInt(op.getLoc(), argIntAttr);
-    if (!operand) {
-      return failure();
-    }
-    operands.push_back(operand);
-  }
-
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpMemoryBarrier,
-                               operands);
-}
-
-template <>
-LogicalResult
-Serializer::processOp<spirv::FunctionCallOp>(spirv::FunctionCallOp op) {
-  auto funcName = op.callee();
-  uint32_t resTypeID = 0;
-
-  llvm::SmallVector<Type, 1> resultTypes(op.getResultTypes());
-  if (failed(processType(op.getLoc(),
-                         (resultTypes.empty() ? getVoidType() : resultTypes[0]),
-                         resTypeID))) {
-    return failure();
-  }
-
-  auto funcID = getOrCreateFunctionID(funcName);
-  auto funcCallID = getNextID();
-  SmallVector<uint32_t, 8> operands{resTypeID, funcCallID, funcID};
-
-  for (auto *value : op.arguments()) {
-    auto valueID = getValueID(value);
-    assert(valueID && "cannot find a value for spv.FunctionCall");
-    operands.push_back(valueID);
-  }
-
-  if (!resultTypes.empty()) {
-    valueIDMap[op.getResult(0)] = funcCallID;
-  }
-
-  return encodeInstructionInto(functionBody, spirv::Opcode::OpFunctionCall,
-                               operands);
-}
-
-// Pull in auto-generated Serializer::dispatchToAutogenSerialization() and
-// various Serializer::processOp<...>() specializations.
-#define GET_SERIALIZATION_FNS
-#include "mlir/Dialect/SPIRV/SPIRVSerialization.inc"
-} // namespace
-
-LogicalResult Serializer::emitDecoration(uint32_t target,
-                                         spirv::Decoration decoration,
-                                         ArrayRef<uint32_t> params) {
-  uint32_t wordCount = 3 + params.size();
-  decorations.push_back(
-      spirv::getPrefixedOpcode(wordCount, spirv::Opcode::OpDecorate));
-  decorations.push_back(target);
-  decorations.push_back(static_cast<uint32_t>(decoration));
-  decorations.append(params.begin(), params.end());
-  return success();
-}
-
-LogicalResult spirv::serialize(spirv::ModuleOp module,
-                               SmallVectorImpl<uint32_t> &binary) {
-  Serializer serializer(module);
-
-  if (failed(serializer.serialize()))
-    return failure();
-
-  LLVM_DEBUG(serializer.printValueIDMap(llvm::dbgs()));
-
-  serializer.collect(binary);
-  return success();
-}
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp b/third_party/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp
deleted file mode 100644
index 655f559b765..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- TranslateRegistration.cpp - hooks to mlir-translate ----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a translation from SPIR-V binary module to MLIR SPIR-V
-// ModuleOp.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Dialect/SPIRV/Serialization.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Parser.h"
-#include "mlir/Support/FileUtilities.h"
-#include "mlir/Translation.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// Deserialization registration
-//===----------------------------------------------------------------------===//
-
-// Deserializes the SPIR-V binary module stored in the file named as
-// `inputFilename` and returns a module containing the SPIR-V module.
-OwningModuleRef deserializeModule(const llvm::MemoryBuffer *input,
-                                  MLIRContext *context) {
-  Builder builder(context);
-
-  // Make sure the input stream can be treated as a stream of SPIR-V words
-  auto start = input->getBufferStart();
-  auto size = input->getBufferSize();
-  if (size % sizeof(uint32_t) != 0) {
-    emitError(UnknownLoc::get(context))
-        << "SPIR-V binary module must contain integral number of 32-bit words";
-    return {};
-  }
-
-  auto binary = llvm::makeArrayRef(reinterpret_cast<const uint32_t *>(start),
-                                   size / sizeof(uint32_t));
-
-  auto spirvModule = spirv::deserialize(binary, context);
-  if (!spirvModule)
-    return {};
-
-  OwningModuleRef module(ModuleOp::create(FileLineColLoc::get(
-      input->getBufferIdentifier(), /*line=*/0, /*column=*/0, context)));
-  module->getBody()->push_front(spirvModule->getOperation());
-
-  return module;
-}
-
-static TranslateToMLIRRegistration fromBinary(
-    "deserialize-spirv", [](llvm::SourceMgr &sourceMgr, MLIRContext *context) {
-      assert(sourceMgr.getNumBuffers() == 1 && "expected one buffer");
-      return deserializeModule(
-          sourceMgr.getMemoryBuffer(sourceMgr.getMainFileID()), context);
-    });
-
-//===----------------------------------------------------------------------===//
-// Serialization registration
-//===----------------------------------------------------------------------===//
-
-LogicalResult serializeModule(ModuleOp module, llvm::raw_ostream &output) {
-  if (!module)
-    return failure();
-
-  SmallVector<uint32_t, 0> binary;
-
-  SmallVector<spirv::ModuleOp, 1> spirvModules;
-  module.walk([&](spirv::ModuleOp op) { spirvModules.push_back(op); });
-
-  if (spirvModules.empty())
-    return module.emitError("found no 'spv.module' op");
-
-  if (spirvModules.size() != 1)
-    return module.emitError("found more than one 'spv.module' op");
-
-  if (failed(spirv::serialize(spirvModules[0], binary)))
-    return failure();
-
-  output.write(reinterpret_cast<char *>(binary.data()),
-               binary.size() * sizeof(uint32_t));
-
-  return mlir::success();
-}
-
-static TranslateFromMLIRRegistration
-    toBinary("serialize-spirv", [](ModuleOp module, llvm::raw_ostream &output) {
-      return serializeModule(module, output);
-    });
-
-//===----------------------------------------------------------------------===//
-// Round-trip registration
-//===----------------------------------------------------------------------===//
-
-LogicalResult roundTripModule(llvm::SourceMgr &sourceMgr,
-                              llvm::raw_ostream &output, MLIRContext *context) {
-  // Parse an MLIR module from the source manager.
-  auto srcModule = OwningModuleRef(parseSourceFile(sourceMgr, context));
-  if (!srcModule)
-    return failure();
-
-  SmallVector<uint32_t, 0> binary;
-
-  auto spirvModules = srcModule->getOps<spirv::ModuleOp>();
-
-  if (spirvModules.begin() == spirvModules.end())
-    return srcModule->emitError("found no 'spv.module' op");
-
-  if (std::next(spirvModules.begin()) != spirvModules.end())
-    return srcModule->emitError("found more than one 'spv.module' op");
-
-  if (failed(spirv::serialize(*spirvModules.begin(), binary)))
-    return failure();
-
-  // Then deserialize to get back a SPIR-V module.
-  auto spirvModule = spirv::deserialize(binary, context);
-  if (!spirvModule)
-    return failure();
-
-  // Wrap around in a new MLIR module.
-  OwningModuleRef dstModule(ModuleOp::create(FileLineColLoc::get(
-      /*filename=*/"", /*line=*/0, /*column=*/0, context)));
-  dstModule->getBody()->push_front(spirvModule->getOperation());
-  dstModule->print(output);
-
-  return mlir::success();
-}
-
-static TranslateRegistration
-    roundtrip("test-spirv-roundtrip",
-              [](llvm::SourceMgr &sourceMgr, llvm::raw_ostream &output,
-                 MLIRContext *context) {
-                return roundTripModule(sourceMgr, output, context);
-              });
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt b/third_party/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
deleted file mode 100644
index 8316d542fa5..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-add_llvm_library(MLIRSPIRVTransforms
-  DecorateSPIRVCompositeTypeLayoutPass.cpp
-  LowerABIAttributesPass.cpp
-  )
-
-target_link_libraries(MLIRSPIRVTransforms
-  MLIRPass
-  MLIRSPIRV
-  )
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp b/third_party/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp
deleted file mode 100644
index 1fd6274b16e..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-//===- DecorateSPIRVCompositeTypeLayoutPass.cpp - Decorate composite type -===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to decorate the composite types used by
-// composite objects in the StorageBuffer, PhysicalStorageBuffer, Uniform, and
-// PushConstant storage classes with layout information. See SPIR-V spec
-// "2.16.2. Validation Rules for Shader Capabilities" for more details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/LayoutUtils.h"
-#include "mlir/Dialect/SPIRV/Passes.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVOps.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-using namespace mlir;
-
-namespace {
-class SPIRVGlobalVariableOpLayoutInfoDecoration
-    : public OpRewritePattern<spirv::GlobalVariableOp> {
-public:
-  using OpRewritePattern<spirv::GlobalVariableOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(spirv::GlobalVariableOp op,
-                                     PatternRewriter &rewriter) const override {
-    spirv::StructType::LayoutInfo structSize = 0;
-    VulkanLayoutUtils::Size structAlignment = 1;
-    SmallVector<NamedAttribute, 4> globalVarAttrs;
-
-    auto ptrType = op.type().cast<spirv::PointerType>();
-    auto structType = VulkanLayoutUtils::decorateType(
-        ptrType.getPointeeType().cast<spirv::StructType>(), structSize,
-        structAlignment);
-    auto decoratedType =
-        spirv::PointerType::get(structType, ptrType.getStorageClass());
-
-    // Save all named attributes except "type" attribute.
-    for (const auto &attr : op.getAttrs()) {
-      if (attr.first == "type") {
-        continue;
-      }
-      globalVarAttrs.push_back(attr);
-    }
-
-    rewriter.replaceOpWithNewOp<spirv::GlobalVariableOp>(
-        op, TypeAttr::get(decoratedType), globalVarAttrs);
-    return matchSuccess();
-  }
-};
-
-class SPIRVAddressOfOpLayoutInfoDecoration
-    : public OpRewritePattern<spirv::AddressOfOp> {
-public:
-  using OpRewritePattern<spirv::AddressOfOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(spirv::AddressOfOp op,
-                                     PatternRewriter &rewriter) const override {
-    auto spirvModule = op.getParentOfType<spirv::ModuleOp>();
-    auto varName = op.variable();
-    auto varOp = spirvModule.lookupSymbol<spirv::GlobalVariableOp>(varName);
-
-    rewriter.replaceOpWithNewOp<spirv::AddressOfOp>(
-        op, varOp.type(), rewriter.getSymbolRefAttr(varName));
-    return matchSuccess();
-  }
-};
-} // namespace
-
-static void populateSPIRVLayoutInfoPatterns(OwningRewritePatternList &patterns,
-                                            MLIRContext *ctx) {
-  patterns.insert<SPIRVGlobalVariableOpLayoutInfoDecoration,
-                  SPIRVAddressOfOpLayoutInfoDecoration>(ctx);
-}
-
-namespace {
-class DecorateSPIRVCompositeTypeLayoutPass
-    : public ModulePass<DecorateSPIRVCompositeTypeLayoutPass> {
-private:
-  void runOnModule() override;
-};
-
-void DecorateSPIRVCompositeTypeLayoutPass::runOnModule() {
-  auto module = getModule();
-  OwningRewritePatternList patterns;
-  populateSPIRVLayoutInfoPatterns(patterns, module.getContext());
-  ConversionTarget target(*(module.getContext()));
-  target.addLegalDialect<spirv::SPIRVDialect>();
-  target.addLegalOp<FuncOp>();
-  target.addDynamicallyLegalOp<spirv::GlobalVariableOp>(
-      [](spirv::GlobalVariableOp op) {
-        return VulkanLayoutUtils::isLegalType(op.type());
-      });
-
-  // Change the type for the direct users.
-  target.addDynamicallyLegalOp<spirv::AddressOfOp>([](spirv::AddressOfOp op) {
-    return VulkanLayoutUtils::isLegalType(op.pointer()->getType());
-  });
-
-  // TODO: Change the type for the indirect users such as spv.Load, spv.Store,
-  // spv.FunctionCall and so on.
-
-  for (auto spirvModule : module.getOps<spirv::ModuleOp>()) {
-    if (failed(applyFullConversion(spirvModule, target, patterns))) {
-      signalPassFailure();
-    }
-  }
-}
-} // namespace
-
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::spirv::createDecorateSPIRVCompositeTypeLayoutPass() {
-  return std::make_unique<DecorateSPIRVCompositeTypeLayoutPass>();
-}
-
-static PassRegistration<DecorateSPIRVCompositeTypeLayoutPass>
-    pass("decorate-spirv-composite-type-layout",
-         "Decorate SPIR-V composite type with layout info");
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/third_party/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
deleted file mode 100644
index d48b31fe491..00000000000
--- a/third_party/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-//===- LowerABIAttributesPass.cpp - Decorate composite type ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to lower attributes that specify the shader ABI
-// for the functions in the generated SPIR-V module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SPIRV/LayoutUtils.h"
-#include "mlir/Dialect/SPIRV/Passes.h"
-#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/SPIRVLowering.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-using namespace mlir;
-
-/// Checks if the `type` is a scalar or vector type. It is assumed that they are
-/// valid for SPIR-V dialect already.
-static bool isScalarOrVectorType(Type type) {
-  return spirv::SPIRVDialect::isValidScalarType(type) || type.isa<VectorType>();
-}
-
-/// Creates a global variable for an argument based on the ABI info.
-static spirv::GlobalVariableOp
-createGlobalVariableForArg(FuncOp funcOp, OpBuilder &builder, unsigned argNum,
-                           spirv::InterfaceVarABIAttr abiInfo) {
-  auto spirvModule = funcOp.getParentOfType<spirv::ModuleOp>();
-  if (!spirvModule) {
-    return nullptr;
-  }
-  OpBuilder::InsertionGuard moduleInsertionGuard(builder);
-  builder.setInsertionPoint(funcOp.getOperation());
-  std::string varName =
-      funcOp.getName().str() + "_arg_" + std::to_string(argNum);
-
-  // Get the type of variable. If this is a scalar/vector type and has an ABI
-  // info create a variable of type !spv.ptr<!spv.struct<elementTYpe>>. If not
-  // it must already be a !spv.ptr<!spv.struct<...>>.
-  auto varType = funcOp.getType().getInput(argNum);
-  auto storageClass =
-      static_cast<spirv::StorageClass>(abiInfo.storage_class().getInt());
-  if (isScalarOrVectorType(varType)) {
-    varType =
-        spirv::PointerType::get(spirv::StructType::get(varType), storageClass);
-  }
-  auto varPtrType = varType.cast<spirv::PointerType>();
-  auto varPointeeType = varPtrType.getPointeeType().cast<spirv::StructType>();
-
-  // Set the offset information.
-  VulkanLayoutUtils::Size size = 0, alignment = 0;
-  varPointeeType =
-      VulkanLayoutUtils::decorateType(varPointeeType, size, alignment)
-          .cast<spirv::StructType>();
-  varType =
-      spirv::PointerType::get(varPointeeType, varPtrType.getStorageClass());
-
-  return builder.create<spirv::GlobalVariableOp>(
-      funcOp.getLoc(), varType, varName, abiInfo.descriptor_set().getInt(),
-      abiInfo.binding().getInt());
-}
-
-/// Gets the global variables that need to be specified as interface variable
-/// with an spv.EntryPointOp. Traverses the body of a entry function to do so.
-static LogicalResult
-getInterfaceVariables(FuncOp funcOp,
-                      SmallVectorImpl<Attribute> &interfaceVars) {
-  auto module = funcOp.getParentOfType<spirv::ModuleOp>();
-  if (!module) {
-    return failure();
-  }
-  llvm::SetVector<Operation *> interfaceVarSet;
-
-  // TODO(ravishankarm) : This should in reality traverse the entry function
-  // call graph and collect all the interfaces. For now, just traverse the
-  // instructions in this function.
-  funcOp.walk([&](spirv::AddressOfOp addressOfOp) {
-    auto var =
-        module.lookupSymbol<spirv::GlobalVariableOp>(addressOfOp.variable());
-    if (var.type().cast<spirv::PointerType>().getStorageClass() !=
-        spirv::StorageClass::StorageBuffer) {
-      interfaceVarSet.insert(var.getOperation());
-    }
-  });
-  for (auto &var : interfaceVarSet) {
-    interfaceVars.push_back(SymbolRefAttr::get(
-        cast<spirv::GlobalVariableOp>(var).sym_name(), funcOp.getContext()));
-  }
-  return success();
-}
-
-/// Lowers the entry point attribute.
-static LogicalResult lowerEntryPointABIAttr(FuncOp funcOp, OpBuilder &builder) {
-  auto entryPointAttrName = spirv::getEntryPointABIAttrName();
-  auto entryPointAttr =
-      funcOp.getAttrOfType<spirv::EntryPointABIAttr>(entryPointAttrName);
-  if (!entryPointAttr) {
-    return failure();
-  }
-
-  OpBuilder::InsertionGuard moduleInsertionGuard(builder);
-  auto spirvModule = funcOp.getParentOfType<spirv::ModuleOp>();
-  builder.setInsertionPoint(spirvModule.body().front().getTerminator());
-
-  // Adds the spv.EntryPointOp after collecting all the interface variables
-  // needed.
-  SmallVector<Attribute, 1> interfaceVars;
-  if (failed(getInterfaceVariables(funcOp, interfaceVars))) {
-    return failure();
-  }
-  builder.create<spirv::EntryPointOp>(
-      funcOp.getLoc(), spirv::ExecutionModel::GLCompute, funcOp, interfaceVars);
-  // Specifies the spv.ExecutionModeOp.
-  auto localSizeAttr = entryPointAttr.local_size();
-  SmallVector<int32_t, 3> localSize(localSizeAttr.getValues<int32_t>());
-  builder.create<spirv::ExecutionModeOp>(
-      funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
-  funcOp.removeAttr(entryPointAttrName);
-  return success();
-}
-
-namespace {
-/// Pattern rewriter for changing function signature to match the ABI specified
-/// in attributes.
-class FuncOpLowering final : public SPIRVOpLowering<FuncOp> {
-public:
-  using SPIRVOpLowering<FuncOp>::SPIRVOpLowering;
-  PatternMatchResult
-  matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Pass to implement the ABI information specified as attributes.
-class LowerABIAttributesPass final
-    : public OperationPass<LowerABIAttributesPass, spirv::ModuleOp> {
-private:
-  void runOnOperation() override;
-};
-} // namespace
-
-PatternMatchResult
-FuncOpLowering::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
-                                ConversionPatternRewriter &rewriter) const {
-  if (!funcOp.getAttrOfType<spirv::EntryPointABIAttr>(
-          spirv::getEntryPointABIAttrName())) {
-    // TODO(ravishankarm) : Non-entry point functions are not handled.
-    return matchFailure();
-  }
-  TypeConverter::SignatureConversion signatureConverter(
-      funcOp.getType().getNumInputs());
-
-  auto attrName = spirv::getInterfaceVarABIAttrName();
-  for (auto argType : llvm::enumerate(funcOp.getType().getInputs())) {
-    auto abiInfo = funcOp.getArgAttrOfType<spirv::InterfaceVarABIAttr>(
-        argType.index(), attrName);
-    if (!abiInfo) {
-      // TODO(ravishankarm) : For non-entry point functions, it should be legal
-      // to pass around scalar/vector values and return a scalar/vector. For now
-      // non-entry point functions are not handled in this ABI lowering and will
-      // produce an error.
-      return matchFailure();
-    }
-    auto var =
-        createGlobalVariableForArg(funcOp, rewriter, argType.index(), abiInfo);
-    if (!var) {
-      return matchFailure();
-    }
-
-    OpBuilder::InsertionGuard funcInsertionGuard(rewriter);
-    rewriter.setInsertionPointToStart(&funcOp.front());
-    // Insert spirv::AddressOf and spirv::AccessChain operations.
-    Value *replacement =
-        rewriter.create<spirv::AddressOfOp>(funcOp.getLoc(), var);
-    // Check if the arg is a scalar or vector type. In that case, the value
-    // needs to be loaded into registers.
-    // TODO(ravishankarm) : This is loading value of the scalar into registers
-    // at the start of the function. It is probably better to do the load just
-    // before the use. There might be multiple loads and currently there is no
-    // easy way to replace all uses with a sequence of operations.
-    if (isScalarOrVectorType(argType.value())) {
-      auto indexType =
-          typeConverter.convertType(IndexType::get(funcOp.getContext()));
-      auto zero =
-          spirv::ConstantOp::getZero(indexType, funcOp.getLoc(), &rewriter);
-      auto loadPtr = rewriter.create<spirv::AccessChainOp>(
-          funcOp.getLoc(), replacement, zero.constant());
-      replacement = rewriter.create<spirv::LoadOp>(funcOp.getLoc(), loadPtr,
-                                                   /*memory_access=*/nullptr,
-                                                   /*alignment=*/nullptr);
-    }
-    signatureConverter.remapInput(argType.index(), replacement);
-  }
-
-  // Creates a new function with the update signature.
-  auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
-  rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
-                              newFuncOp.end());
-  newFuncOp.setType(rewriter.getFunctionType(
-      signatureConverter.getConvertedTypes(), llvm::None));
-  rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
-  rewriter.eraseOp(funcOp.getOperation());
-  return matchSuccess();
-}
-
-void LowerABIAttributesPass::runOnOperation() {
-  // Uses the signature conversion methodology of the dialect conversion
-  // framework to implement the conversion.
-  spirv::ModuleOp module = getOperation();
-  MLIRContext *context = &getContext();
-
-  SPIRVTypeConverter typeConverter;
-  OwningRewritePatternList patterns;
-  patterns.insert<FuncOpLowering>(context, typeConverter);
-
-  ConversionTarget target(*context);
-  target.addLegalDialect<spirv::SPIRVDialect>();
-  auto entryPointAttrName = spirv::getEntryPointABIAttrName();
-  target.addDynamicallyLegalOp<FuncOp>([&](FuncOp op) {
-    return op.getAttrOfType<spirv::EntryPointABIAttr>(entryPointAttrName) &&
-           op.getNumResults() == 0 && op.getNumArguments() == 0;
-  });
-  target.addLegalOp<ReturnOp>();
-  if (failed(
-          applyPartialConversion(module, target, patterns, &typeConverter))) {
-    return signalPassFailure();
-  }
-
-  // Walks over all the FuncOps in spirv::ModuleOp to lower the entry point
-  // attributes.
-  OpBuilder builder(context);
-  SmallVector<FuncOp, 1> entryPointFns;
-  module.walk([&](FuncOp funcOp) {
-    if (funcOp.getAttrOfType<spirv::EntryPointABIAttr>(entryPointAttrName)) {
-      entryPointFns.push_back(funcOp);
-    }
-  });
-  for (auto fn : entryPointFns) {
-    if (failed(lowerEntryPointABIAttr(fn, builder))) {
-      return signalPassFailure();
-    }
-  }
-}
-
-std::unique_ptr<OpPassBase<spirv::ModuleOp>>
-mlir::spirv::createLowerABIAttributesPass() {
-  return std::make_unique<LowerABIAttributesPass>();
-}
-
-static PassRegistration<LowerABIAttributesPass>
-    pass("spirv-lower-abi-attrs", "Lower SPIR-V ABI Attributes");
diff --git a/third_party/mlir/lib/Dialect/StandardOps/CMakeLists.txt b/third_party/mlir/lib/Dialect/StandardOps/CMakeLists.txt
deleted file mode 100644
index 87ea1e67606..00000000000
--- a/third_party/mlir/lib/Dialect/StandardOps/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-file(GLOB globbed *.c *.cpp)
-add_llvm_library(MLIRStandardOps
-  ${globbed}
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/StandardOps
-  )
-add_dependencies(MLIRStandardOps
-  MLIRCallOpInterfacesIncGen
-  MLIRStandardOpsIncGen
-  MLIRIR
-  LLVMSupport
-  )
-target_link_libraries(MLIRStandardOps MLIRIR LLVMSupport)
diff --git a/third_party/mlir/lib/Dialect/StandardOps/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/StandardOps/DialectRegistration.cpp
deleted file mode 100644
index 6b5578f93cf..00000000000
--- a/third_party/mlir/lib/Dialect/StandardOps/DialectRegistration.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===- DialectRegistration.cpp - Register standard Op dialect -------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-using namespace mlir;
-
-// Static initialization for standard op dialect registration.
-static DialectRegistration<StandardOpsDialect> StandardOps;
diff --git a/third_party/mlir/lib/Dialect/StandardOps/Ops.cpp b/third_party/mlir/lib/Dialect/StandardOps/Ops.cpp
deleted file mode 100644
index 3189e42d061..00000000000
--- a/third_party/mlir/lib/Dialect/StandardOps/Ops.cpp
+++ /dev/null
@@ -1,2942 +0,0 @@
-//===- Ops.cpp - Standard MLIR Operations ---------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-
-#include "mlir/Dialect/CommonFolders.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Value.h"
-#include "mlir/Support/MathExtras.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-
-// Pull in all enum type definitions and utility function declarations.
-#include "mlir/Dialect/StandardOps/OpsEnums.cpp.inc"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// StandardOpsDialect Interfaces
-//===----------------------------------------------------------------------===//
-namespace {
-/// This class defines the interface for handling inlining with standard
-/// operations.
-struct StdInlinerInterface : public DialectInlinerInterface {
-  using DialectInlinerInterface::DialectInlinerInterface;
-
-  //===--------------------------------------------------------------------===//
-  // Analysis Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// All operations within standard ops can be inlined.
-  bool isLegalToInline(Operation *, Region *,
-                       BlockAndValueMapping &) const final {
-    return true;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Transformation Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Handle the given inlined terminator by replacing it with a new operation
-  /// as necessary.
-  void handleTerminator(Operation *op, Block *newDest) const final {
-    // Only "std.return" needs to be handled here.
-    auto returnOp = dyn_cast<ReturnOp>(op);
-    if (!returnOp)
-      return;
-
-    // Replace the return with a branch to the dest.
-    OpBuilder builder(op);
-    builder.create<BranchOp>(op->getLoc(), newDest, returnOp.getOperands());
-    op->erase();
-  }
-
-  /// Handle the given inlined terminator by replacing it with a new operation
-  /// as necessary.
-  void handleTerminator(Operation *op,
-                        ArrayRef<Value *> valuesToRepl) const final {
-    // Only "std.return" needs to be handled here.
-    auto returnOp = cast<ReturnOp>(op);
-
-    // Replace the values directly with the return operands.
-    assert(returnOp.getNumOperands() == valuesToRepl.size());
-    for (const auto &it : llvm::enumerate(returnOp.getOperands()))
-      valuesToRepl[it.index()]->replaceAllUsesWith(it.value());
-  }
-};
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// StandardOpsDialect
-//===----------------------------------------------------------------------===//
-
-/// A custom unary operation printer that omits the "std." prefix from the
-/// operation names.
-static void printStandardUnaryOp(Operation *op, OpAsmPrinter &p) {
-  assert(op->getNumOperands() == 1 && "unary op should have one operand");
-  assert(op->getNumResults() == 1 && "unary op should have one result");
-
-  int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1;
-  p << op->getName().getStringRef().drop_front(stdDotLen) << ' '
-    << *op->getOperand(0);
-  p.printOptionalAttrDict(op->getAttrs());
-  p << " : " << op->getOperand(0)->getType();
-}
-
-/// A custom binary operation printer that omits the "std." prefix from the
-/// operation names.
-static void printStandardBinaryOp(Operation *op, OpAsmPrinter &p) {
-  assert(op->getNumOperands() == 2 && "binary op should have two operands");
-  assert(op->getNumResults() == 1 && "binary op should have one result");
-
-  // If not all the operand and result types are the same, just use the
-  // generic assembly form to avoid omitting information in printing.
-  auto resultType = op->getResult(0)->getType();
-  if (op->getOperand(0)->getType() != resultType ||
-      op->getOperand(1)->getType() != resultType) {
-    p.printGenericOp(op);
-    return;
-  }
-
-  int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1;
-  p << op->getName().getStringRef().drop_front(stdDotLen) << ' '
-    << *op->getOperand(0) << ", " << *op->getOperand(1);
-  p.printOptionalAttrDict(op->getAttrs());
-
-  // Now we can output only one type for all operands and the result.
-  p << " : " << op->getResult(0)->getType();
-}
-
-/// A custom cast operation printer that omits the "std." prefix from the
-/// operation names.
-static void printStandardCastOp(Operation *op, OpAsmPrinter &p) {
-  int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1;
-  p << op->getName().getStringRef().drop_front(stdDotLen) << ' '
-    << *op->getOperand(0) << " : " << op->getOperand(0)->getType() << " to "
-    << op->getResult(0)->getType();
-}
-
-/// A custom cast operation verifier.
-template <typename T> static LogicalResult verifyCastOp(T op) {
-  auto opType = op.getOperand()->getType();
-  auto resType = op.getType();
-  if (!T::areCastCompatible(opType, resType))
-    return op.emitError("operand type ") << opType << " and result type "
-                                         << resType << " are cast incompatible";
-
-  return success();
-}
-
-StandardOpsDialect::StandardOpsDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
-  addOperations<DmaStartOp, DmaWaitOp,
-#define GET_OP_LIST
-#include "mlir/Dialect/StandardOps/Ops.cpp.inc"
-                >();
-  addInterfaces<StdInlinerInterface>();
-}
-
-/// Materialize a single constant operation from a given attribute value with
-/// the desired resultant type.
-Operation *StandardOpsDialect::materializeConstant(OpBuilder &builder,
-                                                   Attribute value, Type type,
-                                                   Location loc) {
-  return builder.create<ConstantOp>(loc, type, value);
-}
-
-void mlir::printDimAndSymbolList(Operation::operand_iterator begin,
-                                 Operation::operand_iterator end,
-                                 unsigned numDims, OpAsmPrinter &p) {
-  Operation::operand_range operands(begin, end);
-  p << '(' << operands.take_front(numDims) << ')';
-  if (operands.size() != numDims)
-    p << '[' << operands.drop_front(numDims) << ']';
-}
-
-// Parses dimension and symbol list, and sets 'numDims' to the number of
-// dimension operands parsed.
-// Returns 'false' on success and 'true' on error.
-ParseResult mlir::parseDimAndSymbolList(OpAsmParser &parser,
-                                        SmallVectorImpl<Value *> &operands,
-                                        unsigned &numDims) {
-  SmallVector<OpAsmParser::OperandType, 8> opInfos;
-  if (parser.parseOperandList(opInfos, OpAsmParser::Delimiter::Paren))
-    return failure();
-  // Store number of dimensions for validation by caller.
-  numDims = opInfos.size();
-
-  // Parse the optional symbol operands.
-  auto indexTy = parser.getBuilder().getIndexType();
-  if (parser.parseOperandList(opInfos,
-                              OpAsmParser::Delimiter::OptionalSquare) ||
-      parser.resolveOperands(opInfos, indexTy, operands))
-    return failure();
-  return success();
-}
-
-/// Matches a ConstantIndexOp.
-/// TODO: This should probably just be a general matcher that uses m_Constant
-/// and checks the operation for an index type.
-static detail::op_matcher<ConstantIndexOp> m_ConstantIndex() {
-  return detail::op_matcher<ConstantIndexOp>();
-}
-
-//===----------------------------------------------------------------------===//
-// Common canonicalization pattern support logic
-//===----------------------------------------------------------------------===//
-
-/// This is a common class used for patterns of the form
-/// "someop(memrefcast) -> someop".  It folds the source of any memref_cast
-/// into the root operation directly.
-static LogicalResult foldMemRefCast(Operation *op) {
-  bool folded = false;
-  for (OpOperand &operand : op->getOpOperands()) {
-    auto cast = dyn_cast_or_null<MemRefCastOp>(operand.get()->getDefiningOp());
-    if (cast && !cast.getOperand()->getType().isa<UnrankedMemRefType>()) {
-      operand.set(cast.getOperand());
-      folded = true;
-    }
-  }
-  return success(folded);
-}
-
-//===----------------------------------------------------------------------===//
-// AddFOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult AddFOp::fold(ArrayRef<Attribute> operands) {
-  return constFoldBinaryOp<FloatAttr>(
-      operands, [](APFloat a, APFloat b) { return a + b; });
-}
-
-//===----------------------------------------------------------------------===//
-// AddIOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult AddIOp::fold(ArrayRef<Attribute> operands) {
-  /// addi(x, 0) -> x
-  if (matchPattern(rhs(), m_Zero()))
-    return lhs();
-
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a + b; });
-}
-
-//===----------------------------------------------------------------------===//
-// AllocOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, AllocOp op) {
-  p << "alloc";
-
-  // Print dynamic dimension operands.
-  MemRefType type = op.getType();
-  printDimAndSymbolList(op.operand_begin(), op.operand_end(),
-                        type.getNumDynamicDims(), p);
-  p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"map"});
-  p << " : " << type;
-}
-
-static ParseResult parseAllocOp(OpAsmParser &parser, OperationState &result) {
-  MemRefType type;
-
-  // Parse the dimension operands and optional symbol operands, followed by a
-  // memref type.
-  unsigned numDimOperands;
-  if (parseDimAndSymbolList(parser, result.operands, numDimOperands) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type))
-    return failure();
-
-  // Check numDynamicDims against number of question marks in memref type.
-  // Note: this check remains here (instead of in verify()), because the
-  // partition between dim operands and symbol operands is lost after parsing.
-  // Verification still checks that the total number of operands matches
-  // the number of symbols in the affine map, plus the number of dynamic
-  // dimensions in the memref.
-  if (numDimOperands != type.getNumDynamicDims())
-    return parser.emitError(parser.getNameLoc())
-           << "dimension operand count does not equal memref dynamic dimension "
-              "count";
-  result.types.push_back(type);
-  return success();
-}
-
-static LogicalResult verify(AllocOp op) {
-  auto memRefType = op.getResult()->getType().dyn_cast<MemRefType>();
-  if (!memRefType)
-    return op.emitOpError("result must be a memref");
-
-  unsigned numSymbols = 0;
-  if (!memRefType.getAffineMaps().empty()) {
-    // Store number of symbols used in affine map (used in subsequent check).
-    AffineMap affineMap = memRefType.getAffineMaps()[0];
-    numSymbols = affineMap.getNumSymbols();
-  }
-
-  // Check that the total number of operands matches the number of symbols in
-  // the affine map, plus the number of dynamic dimensions specified in the
-  // memref type.
-  unsigned numDynamicDims = memRefType.getNumDynamicDims();
-  if (op.getNumOperands() != numDynamicDims + numSymbols)
-    return op.emitOpError(
-        "operand count does not equal dimension plus symbol operand count");
-
-  // Verify that all operands are of type Index.
-  for (auto operandType : op.getOperandTypes())
-    if (!operandType.isIndex())
-      return op.emitOpError("requires operands to be of type Index");
-  return success();
-}
-
-namespace {
-/// Fold constant dimensions into an alloc operation.
-struct SimplifyAllocConst : public OpRewritePattern<AllocOp> {
-  using OpRewritePattern<AllocOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AllocOp alloc,
-                                     PatternRewriter &rewriter) const override {
-    // Check to see if any dimensions operands are constants.  If so, we can
-    // substitute and drop them.
-    if (llvm::none_of(alloc.getOperands(), [](Value *operand) {
-          return matchPattern(operand, m_ConstantIndex());
-        }))
-      return matchFailure();
-
-    auto memrefType = alloc.getType();
-
-    // Ok, we have one or more constant operands.  Collect the non-constant ones
-    // and keep track of the resultant memref type to build.
-    SmallVector<int64_t, 4> newShapeConstants;
-    newShapeConstants.reserve(memrefType.getRank());
-    SmallVector<Value *, 4> newOperands;
-    SmallVector<Value *, 4> droppedOperands;
-
-    unsigned dynamicDimPos = 0;
-    for (unsigned dim = 0, e = memrefType.getRank(); dim < e; ++dim) {
-      int64_t dimSize = memrefType.getDimSize(dim);
-      // If this is already static dimension, keep it.
-      if (dimSize != -1) {
-        newShapeConstants.push_back(dimSize);
-        continue;
-      }
-      auto *defOp = alloc.getOperand(dynamicDimPos)->getDefiningOp();
-      if (auto constantIndexOp = dyn_cast_or_null<ConstantIndexOp>(defOp)) {
-        // Dynamic shape dimension will be folded.
-        newShapeConstants.push_back(constantIndexOp.getValue());
-        // Record to check for zero uses later below.
-        droppedOperands.push_back(constantIndexOp);
-      } else {
-        // Dynamic shape dimension not folded; copy operand from old memref.
-        newShapeConstants.push_back(-1);
-        newOperands.push_back(alloc.getOperand(dynamicDimPos));
-      }
-      dynamicDimPos++;
-    }
-
-    // Create new memref type (which will have fewer dynamic dimensions).
-    auto newMemRefType = MemRefType::get(
-        newShapeConstants, memrefType.getElementType(),
-        memrefType.getAffineMaps(), memrefType.getMemorySpace());
-    assert(static_cast<int64_t>(newOperands.size()) ==
-           newMemRefType.getNumDynamicDims());
-
-    // Create and insert the alloc op for the new memref.
-    auto newAlloc = rewriter.create<AllocOp>(alloc.getLoc(), newMemRefType,
-                                             newOperands, IntegerAttr());
-    // Insert a cast so we have the same type as the old alloc.
-    auto resultCast = rewriter.create<MemRefCastOp>(alloc.getLoc(), newAlloc,
-                                                    alloc.getType());
-
-    rewriter.replaceOp(alloc, {resultCast}, droppedOperands);
-    return matchSuccess();
-  }
-};
-
-/// Fold alloc operations with no uses. Alloc has side effects on the heap,
-/// but can still be deleted if it has zero uses.
-struct SimplifyDeadAlloc : public OpRewritePattern<AllocOp> {
-  using OpRewritePattern<AllocOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(AllocOp alloc,
-                                     PatternRewriter &rewriter) const override {
-    if (alloc.use_empty()) {
-      rewriter.eraseOp(alloc);
-      return matchSuccess();
-    }
-    return matchFailure();
-  }
-};
-} // end anonymous namespace.
-
-void AllocOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context) {
-  results.insert<SimplifyAllocConst, SimplifyDeadAlloc>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// BranchOp
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// Simplify a branch to a block that has a single predecessor. This effectively
-/// merges the two blocks.
-struct SimplifyBrToBlockWithSinglePred : public OpRewritePattern<BranchOp> {
-  using OpRewritePattern<BranchOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(BranchOp op,
-                                     PatternRewriter &rewriter) const override {
-    // Check that the successor block has a single predecessor.
-    Block *succ = op.getDest();
-    Block *opParent = op.getOperation()->getBlock();
-    if (succ == opParent || !has_single_element(succ->getPredecessors()))
-      return matchFailure();
-
-    // Merge the successor into the current block and erase the branch.
-    rewriter.mergeBlocks(succ, opParent, op.getOperands());
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-} // end anonymous namespace.
-
-static ParseResult parseBranchOp(OpAsmParser &parser, OperationState &result) {
-  Block *dest;
-  SmallVector<Value *, 4> destOperands;
-  if (parser.parseSuccessorAndUseList(dest, destOperands))
-    return failure();
-  result.addSuccessor(dest, destOperands);
-  return success();
-}
-
-static void print(OpAsmPrinter &p, BranchOp op) {
-  p << "br ";
-  p.printSuccessorAndUseList(op.getOperation(), 0);
-}
-
-Block *BranchOp::getDest() { return getSuccessor(0); }
-
-void BranchOp::setDest(Block *block) { return setSuccessor(block, 0); }
-
-void BranchOp::eraseOperand(unsigned index) {
-  getOperation()->eraseSuccessorOperand(0, index);
-}
-
-void BranchOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                           MLIRContext *context) {
-  results.insert<SimplifyBrToBlockWithSinglePred>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// CallOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) {
-  FlatSymbolRefAttr calleeAttr;
-  FunctionType calleeType;
-  SmallVector<OpAsmParser::OperandType, 4> operands;
-  auto calleeLoc = parser.getNameLoc();
-  if (parser.parseAttribute(calleeAttr, "callee", result.attributes) ||
-      parser.parseOperandList(operands, OpAsmParser::Delimiter::Paren) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(calleeType) ||
-      parser.addTypesToList(calleeType.getResults(), result.types) ||
-      parser.resolveOperands(operands, calleeType.getInputs(), calleeLoc,
-                             result.operands))
-    return failure();
-
-  return success();
-}
-
-static void print(OpAsmPrinter &p, CallOp op) {
-  p << "call " << op.getAttr("callee") << '(' << op.getOperands() << ')';
-  p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"callee"});
-  p << " : " << op.getCalleeType();
-}
-
-static LogicalResult verify(CallOp op) {
-  // Check that the callee attribute was specified.
-  auto fnAttr = op.getAttrOfType<FlatSymbolRefAttr>("callee");
-  if (!fnAttr)
-    return op.emitOpError("requires a 'callee' symbol reference attribute");
-  auto fn =
-      op.getParentOfType<ModuleOp>().lookupSymbol<FuncOp>(fnAttr.getValue());
-  if (!fn)
-    return op.emitOpError() << "'" << fnAttr.getValue()
-                            << "' does not reference a valid function";
-
-  // Verify that the operand and result types match the callee.
-  auto fnType = fn.getType();
-  if (fnType.getNumInputs() != op.getNumOperands())
-    return op.emitOpError("incorrect number of operands for callee");
-
-  for (unsigned i = 0, e = fnType.getNumInputs(); i != e; ++i)
-    if (op.getOperand(i)->getType() != fnType.getInput(i))
-      return op.emitOpError("operand type mismatch");
-
-  if (fnType.getNumResults() != op.getNumResults())
-    return op.emitOpError("incorrect number of results for callee");
-
-  for (unsigned i = 0, e = fnType.getNumResults(); i != e; ++i)
-    if (op.getResult(i)->getType() != fnType.getResult(i))
-      return op.emitOpError("result type mismatch");
-
-  return success();
-}
-
-FunctionType CallOp::getCalleeType() {
-  SmallVector<Type, 4> resultTypes(getResultTypes());
-  SmallVector<Type, 8> argTypes(getOperandTypes());
-  return FunctionType::get(argTypes, resultTypes, getContext());
-}
-
-//===----------------------------------------------------------------------===//
-// CallIndirectOp
-//===----------------------------------------------------------------------===//
-namespace {
-/// Fold indirect calls that have a constant function as the callee operand.
-struct SimplifyIndirectCallWithKnownCallee
-    : public OpRewritePattern<CallIndirectOp> {
-  using OpRewritePattern<CallIndirectOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(CallIndirectOp indirectCall,
-                                     PatternRewriter &rewriter) const override {
-    // Check that the callee is a constant callee.
-    SymbolRefAttr calledFn;
-    if (!matchPattern(indirectCall.getCallee(), m_Constant(&calledFn)))
-      return matchFailure();
-
-    // Replace with a direct call.
-    SmallVector<Type, 8> callResults(indirectCall.getResultTypes());
-    rewriter.replaceOpWithNewOp<CallOp>(indirectCall, calledFn, callResults,
-                                        indirectCall.getArgOperands());
-    return matchSuccess();
-  }
-};
-} // end anonymous namespace.
-
-static ParseResult parseCallIndirectOp(OpAsmParser &parser,
-                                       OperationState &result) {
-  FunctionType calleeType;
-  OpAsmParser::OperandType callee;
-  llvm::SMLoc operandsLoc;
-  SmallVector<OpAsmParser::OperandType, 4> operands;
-  return failure(
-      parser.parseOperand(callee) || parser.getCurrentLocation(&operandsLoc) ||
-      parser.parseOperandList(operands, OpAsmParser::Delimiter::Paren) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(calleeType) ||
-      parser.resolveOperand(callee, calleeType, result.operands) ||
-      parser.resolveOperands(operands, calleeType.getInputs(), operandsLoc,
-                             result.operands) ||
-      parser.addTypesToList(calleeType.getResults(), result.types));
-}
-
-static void print(OpAsmPrinter &p, CallIndirectOp op) {
-  p << "call_indirect " << op.getCallee() << '(' << op.getArgOperands() << ')';
-  p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"callee"});
-  p << " : " << op.getCallee()->getType();
-}
-
-static LogicalResult verify(CallIndirectOp op) {
-  // The callee must be a function.
-  auto fnType = op.getCallee()->getType().dyn_cast<FunctionType>();
-  if (!fnType)
-    return op.emitOpError("callee must have function type");
-
-  // Verify that the operand and result types match the callee.
-  if (fnType.getNumInputs() != op.getNumOperands() - 1)
-    return op.emitOpError("incorrect number of operands for callee");
-
-  for (unsigned i = 0, e = fnType.getNumInputs(); i != e; ++i)
-    if (op.getOperand(i + 1)->getType() != fnType.getInput(i))
-      return op.emitOpError("operand type mismatch");
-
-  if (fnType.getNumResults() != op.getNumResults())
-    return op.emitOpError("incorrect number of results for callee");
-
-  for (unsigned i = 0, e = fnType.getNumResults(); i != e; ++i)
-    if (op.getResult(i)->getType() != fnType.getResult(i))
-      return op.emitOpError("result type mismatch");
-
-  return success();
-}
-
-void CallIndirectOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<SimplifyIndirectCallWithKnownCallee>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// General helpers for comparison ops
-//===----------------------------------------------------------------------===//
-
-// Return the type of the same shape (scalar, vector or tensor) containing i1.
-static Type getCheckedI1SameShape(Builder *build, Type type) {
-  auto i1Type = build->getI1Type();
-  if (type.isIntOrIndexOrFloat())
-    return i1Type;
-  if (auto tensorType = type.dyn_cast<RankedTensorType>())
-    return RankedTensorType::get(tensorType.getShape(), i1Type);
-  if (type.isa<UnrankedTensorType>())
-    return UnrankedTensorType::get(i1Type);
-  if (auto vectorType = type.dyn_cast<VectorType>())
-    return VectorType::get(vectorType.getShape(), i1Type);
-  return Type();
-}
-
-static Type getI1SameShape(Builder *build, Type type) {
-  Type res = getCheckedI1SameShape(build, type);
-  assert(res && "expected type with valid i1 shape");
-  return res;
-}
-
-//===----------------------------------------------------------------------===//
-// CmpIOp
-//===----------------------------------------------------------------------===//
-
-static void buildCmpIOp(Builder *build, OperationState &result,
-                        CmpIPredicate predicate, Value *lhs, Value *rhs) {
-  result.addOperands({lhs, rhs});
-  result.types.push_back(getI1SameShape(build, lhs->getType()));
-  result.addAttribute(
-      CmpIOp::getPredicateAttrName(),
-      build->getI64IntegerAttr(static_cast<int64_t>(predicate)));
-}
-
-static ParseResult parseCmpIOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 2> ops;
-  SmallVector<NamedAttribute, 4> attrs;
-  Attribute predicateNameAttr;
-  Type type;
-  if (parser.parseAttribute(predicateNameAttr, CmpIOp::getPredicateAttrName(),
-                            attrs) ||
-      parser.parseComma() || parser.parseOperandList(ops, 2) ||
-      parser.parseOptionalAttrDict(attrs) || parser.parseColonType(type) ||
-      parser.resolveOperands(ops, type, result.operands))
-    return failure();
-
-  if (!predicateNameAttr.isa<StringAttr>())
-    return parser.emitError(parser.getNameLoc(),
-                            "expected string comparison predicate attribute");
-
-  // Rewrite string attribute to an enum value.
-  StringRef predicateName = predicateNameAttr.cast<StringAttr>().getValue();
-  Optional<CmpIPredicate> predicate = symbolizeCmpIPredicate(predicateName);
-  if (!predicate.hasValue())
-    return parser.emitError(parser.getNameLoc())
-           << "unknown comparison predicate \"" << predicateName << "\"";
-
-  auto builder = parser.getBuilder();
-  Type i1Type = getCheckedI1SameShape(&builder, type);
-  if (!i1Type)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected type with valid i1 shape");
-
-  attrs[0].second = builder.getI64IntegerAttr(static_cast<int64_t>(*predicate));
-  result.attributes = attrs;
-
-  result.addTypes({i1Type});
-  return success();
-}
-
-static void print(OpAsmPrinter &p, CmpIOp op) {
-  p << "cmpi ";
-
-  Builder b(op.getContext());
-  auto predicateValue =
-      op.getAttrOfType<IntegerAttr>(CmpIOp::getPredicateAttrName()).getInt();
-  p << '"' << stringifyCmpIPredicate(static_cast<CmpIPredicate>(predicateValue))
-    << '"' << ", " << op.lhs() << ", " << op.rhs();
-  p.printOptionalAttrDict(op.getAttrs(),
-                          /*elidedAttrs=*/{CmpIOp::getPredicateAttrName()});
-  p << " : " << op.lhs()->getType();
-}
-
-// Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer
-// comparison predicates.
-static bool applyCmpPredicate(CmpIPredicate predicate, const APInt &lhs,
-                              const APInt &rhs) {
-  switch (predicate) {
-  case CmpIPredicate::eq:
-    return lhs.eq(rhs);
-  case CmpIPredicate::ne:
-    return lhs.ne(rhs);
-  case CmpIPredicate::slt:
-    return lhs.slt(rhs);
-  case CmpIPredicate::sle:
-    return lhs.sle(rhs);
-  case CmpIPredicate::sgt:
-    return lhs.sgt(rhs);
-  case CmpIPredicate::sge:
-    return lhs.sge(rhs);
-  case CmpIPredicate::ult:
-    return lhs.ult(rhs);
-  case CmpIPredicate::ule:
-    return lhs.ule(rhs);
-  case CmpIPredicate::ugt:
-    return lhs.ugt(rhs);
-  case CmpIPredicate::uge:
-    return lhs.uge(rhs);
-  default:
-    llvm_unreachable("unknown comparison predicate");
-  }
-}
-
-// Constant folding hook for comparisons.
-OpFoldResult CmpIOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "cmpi takes two arguments");
-
-  auto lhs = operands.front().dyn_cast_or_null<IntegerAttr>();
-  auto rhs = operands.back().dyn_cast_or_null<IntegerAttr>();
-  if (!lhs || !rhs)
-    return {};
-
-  auto val = applyCmpPredicate(getPredicate(), lhs.getValue(), rhs.getValue());
-  return IntegerAttr::get(IntegerType::get(1, getContext()), APInt(1, val));
-}
-
-//===----------------------------------------------------------------------===//
-// CmpFOp
-//===----------------------------------------------------------------------===//
-
-// Returns an array of mnemonics for CmpFPredicates indexed by values thereof.
-static inline const char *const *getCmpFPredicateNames() {
-  static const char *predicateNames[] = {
-      /*AlwaysFalse*/ "false",
-      /*OEQ*/ "oeq",
-      /*OGT*/ "ogt",
-      /*OGE*/ "oge",
-      /*OLT*/ "olt",
-      /*OLE*/ "ole",
-      /*ONE*/ "one",
-      /*ORD*/ "ord",
-      /*UEQ*/ "ueq",
-      /*UGT*/ "ugt",
-      /*UGE*/ "uge",
-      /*ULT*/ "ult",
-      /*ULE*/ "ule",
-      /*UNE*/ "une",
-      /*UNO*/ "uno",
-      /*AlwaysTrue*/ "true",
-  };
-  static_assert(std::extent<decltype(predicateNames)>::value ==
-                    (size_t)CmpFPredicate::NumPredicates,
-                "wrong number of predicate names");
-  return predicateNames;
-}
-
-// Returns a value of the predicate corresponding to the given mnemonic.
-// Returns NumPredicates (one-past-end) if there is no such mnemonic.
-CmpFPredicate CmpFOp::getPredicateByName(StringRef name) {
-  return llvm::StringSwitch<CmpFPredicate>(name)
-      .Case("false", CmpFPredicate::AlwaysFalse)
-      .Case("oeq", CmpFPredicate::OEQ)
-      .Case("ogt", CmpFPredicate::OGT)
-      .Case("oge", CmpFPredicate::OGE)
-      .Case("olt", CmpFPredicate::OLT)
-      .Case("ole", CmpFPredicate::OLE)
-      .Case("one", CmpFPredicate::ONE)
-      .Case("ord", CmpFPredicate::ORD)
-      .Case("ueq", CmpFPredicate::UEQ)
-      .Case("ugt", CmpFPredicate::UGT)
-      .Case("uge", CmpFPredicate::UGE)
-      .Case("ult", CmpFPredicate::ULT)
-      .Case("ule", CmpFPredicate::ULE)
-      .Case("une", CmpFPredicate::UNE)
-      .Case("uno", CmpFPredicate::UNO)
-      .Case("true", CmpFPredicate::AlwaysTrue)
-      .Default(CmpFPredicate::NumPredicates);
-}
-
-static void buildCmpFOp(Builder *build, OperationState &result,
-                        CmpFPredicate predicate, Value *lhs, Value *rhs) {
-  result.addOperands({lhs, rhs});
-  result.types.push_back(getI1SameShape(build, lhs->getType()));
-  result.addAttribute(
-      CmpFOp::getPredicateAttrName(),
-      build->getI64IntegerAttr(static_cast<int64_t>(predicate)));
-}
-
-static ParseResult parseCmpFOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 2> ops;
-  SmallVector<NamedAttribute, 4> attrs;
-  Attribute predicateNameAttr;
-  Type type;
-  if (parser.parseAttribute(predicateNameAttr, CmpFOp::getPredicateAttrName(),
-                            attrs) ||
-      parser.parseComma() || parser.parseOperandList(ops, 2) ||
-      parser.parseOptionalAttrDict(attrs) || parser.parseColonType(type) ||
-      parser.resolveOperands(ops, type, result.operands))
-    return failure();
-
-  if (!predicateNameAttr.isa<StringAttr>())
-    return parser.emitError(parser.getNameLoc(),
-                            "expected string comparison predicate attribute");
-
-  // Rewrite string attribute to an enum value.
-  StringRef predicateName = predicateNameAttr.cast<StringAttr>().getValue();
-  auto predicate = CmpFOp::getPredicateByName(predicateName);
-  if (predicate == CmpFPredicate::NumPredicates)
-    return parser.emitError(parser.getNameLoc(),
-                            "unknown comparison predicate \"" + predicateName +
-                                "\"");
-
-  auto builder = parser.getBuilder();
-  Type i1Type = getCheckedI1SameShape(&builder, type);
-  if (!i1Type)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected type with valid i1 shape");
-
-  attrs[0].second = builder.getI64IntegerAttr(static_cast<int64_t>(predicate));
-  result.attributes = attrs;
-
-  result.addTypes({i1Type});
-  return success();
-}
-
-static void print(OpAsmPrinter &p, CmpFOp op) {
-  p << "cmpf ";
-
-  auto predicateValue =
-      op.getAttrOfType<IntegerAttr>(CmpFOp::getPredicateAttrName()).getInt();
-  assert(predicateValue >= static_cast<int>(CmpFPredicate::FirstValidValue) &&
-         predicateValue < static_cast<int>(CmpFPredicate::NumPredicates) &&
-         "unknown predicate index");
-  p << '"' << getCmpFPredicateNames()[predicateValue] << '"' << ", " << op.lhs()
-    << ", " << op.rhs();
-  p.printOptionalAttrDict(op.getAttrs(),
-                          /*elidedAttrs=*/{CmpFOp::getPredicateAttrName()});
-  p << " : " << op.lhs()->getType();
-}
-
-static LogicalResult verify(CmpFOp op) {
-  auto predicateAttr =
-      op.getAttrOfType<IntegerAttr>(CmpFOp::getPredicateAttrName());
-  if (!predicateAttr)
-    return op.emitOpError("requires an integer attribute named 'predicate'");
-  auto predicate = predicateAttr.getInt();
-  if (predicate < (int64_t)CmpFPredicate::FirstValidValue ||
-      predicate >= (int64_t)CmpFPredicate::NumPredicates)
-    return op.emitOpError("'predicate' attribute value out of range");
-
-  return success();
-}
-
-// Compute `lhs` `pred` `rhs`, where `pred` is one of the known floating point
-// comparison predicates.
-static bool applyCmpPredicate(CmpFPredicate predicate, const APFloat &lhs,
-                              const APFloat &rhs) {
-  auto cmpResult = lhs.compare(rhs);
-  switch (predicate) {
-  case CmpFPredicate::AlwaysFalse:
-    return false;
-  case CmpFPredicate::OEQ:
-    return cmpResult == APFloat::cmpEqual;
-  case CmpFPredicate::OGT:
-    return cmpResult == APFloat::cmpGreaterThan;
-  case CmpFPredicate::OGE:
-    return cmpResult == APFloat::cmpGreaterThan ||
-           cmpResult == APFloat::cmpEqual;
-  case CmpFPredicate::OLT:
-    return cmpResult == APFloat::cmpLessThan;
-  case CmpFPredicate::OLE:
-    return cmpResult == APFloat::cmpLessThan || cmpResult == APFloat::cmpEqual;
-  case CmpFPredicate::ONE:
-    return cmpResult != APFloat::cmpUnordered && cmpResult != APFloat::cmpEqual;
-  case CmpFPredicate::ORD:
-    return cmpResult != APFloat::cmpUnordered;
-  case CmpFPredicate::UEQ:
-    return cmpResult == APFloat::cmpUnordered || cmpResult == APFloat::cmpEqual;
-  case CmpFPredicate::UGT:
-    return cmpResult == APFloat::cmpUnordered ||
-           cmpResult == APFloat::cmpGreaterThan;
-  case CmpFPredicate::UGE:
-    return cmpResult == APFloat::cmpUnordered ||
-           cmpResult == APFloat::cmpGreaterThan ||
-           cmpResult == APFloat::cmpEqual;
-  case CmpFPredicate::ULT:
-    return cmpResult == APFloat::cmpUnordered ||
-           cmpResult == APFloat::cmpLessThan;
-  case CmpFPredicate::ULE:
-    return cmpResult == APFloat::cmpUnordered ||
-           cmpResult == APFloat::cmpLessThan || cmpResult == APFloat::cmpEqual;
-  case CmpFPredicate::UNE:
-    return cmpResult != APFloat::cmpEqual;
-  case CmpFPredicate::UNO:
-    return cmpResult == APFloat::cmpUnordered;
-  case CmpFPredicate::AlwaysTrue:
-    return true;
-  default:
-    llvm_unreachable("unknown comparison predicate");
-  }
-}
-
-// Constant folding hook for comparisons.
-OpFoldResult CmpFOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "cmpf takes two arguments");
-
-  auto lhs = operands.front().dyn_cast_or_null<FloatAttr>();
-  auto rhs = operands.back().dyn_cast_or_null<FloatAttr>();
-
-  // TODO(gcmn) We could actually do some intelligent things if we know only one
-  // of the operands, but it's inf or nan.
-  if (!lhs || !rhs)
-    return {};
-
-  auto val = applyCmpPredicate(getPredicate(), lhs.getValue(), rhs.getValue());
-  return IntegerAttr::get(IntegerType::get(1, getContext()), APInt(1, val));
-}
-
-//===----------------------------------------------------------------------===//
-// CondBranchOp
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// cond_br true, ^bb1, ^bb2 -> br ^bb1
-/// cond_br false, ^bb1, ^bb2 -> br ^bb2
-///
-struct SimplifyConstCondBranchPred : public OpRewritePattern<CondBranchOp> {
-  using OpRewritePattern<CondBranchOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(CondBranchOp condbr,
-                                     PatternRewriter &rewriter) const override {
-    if (matchPattern(condbr.getCondition(), m_NonZero())) {
-      // True branch taken.
-      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getTrueDest(),
-                                            condbr.getTrueOperands());
-      return matchSuccess();
-    } else if (matchPattern(condbr.getCondition(), m_Zero())) {
-      // False branch taken.
-      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getFalseDest(),
-                                            condbr.getFalseOperands());
-      return matchSuccess();
-    }
-    return matchFailure();
-  }
-};
-} // end anonymous namespace.
-
-static ParseResult parseCondBranchOp(OpAsmParser &parser,
-                                     OperationState &result) {
-  SmallVector<Value *, 4> destOperands;
-  Block *dest;
-  OpAsmParser::OperandType condInfo;
-
-  // Parse the condition.
-  Type int1Ty = parser.getBuilder().getI1Type();
-  if (parser.parseOperand(condInfo) || parser.parseComma() ||
-      parser.resolveOperand(condInfo, int1Ty, result.operands)) {
-    return parser.emitError(parser.getNameLoc(),
-                            "expected condition type was boolean (i1)");
-  }
-
-  // Parse the true successor.
-  if (parser.parseSuccessorAndUseList(dest, destOperands))
-    return failure();
-  result.addSuccessor(dest, destOperands);
-
-  // Parse the false successor.
-  destOperands.clear();
-  if (parser.parseComma() ||
-      parser.parseSuccessorAndUseList(dest, destOperands))
-    return failure();
-  result.addSuccessor(dest, destOperands);
-
-  return success();
-}
-
-static void print(OpAsmPrinter &p, CondBranchOp op) {
-  p << "cond_br " << op.getCondition() << ", ";
-  p.printSuccessorAndUseList(op.getOperation(), CondBranchOp::trueIndex);
-  p << ", ";
-  p.printSuccessorAndUseList(op.getOperation(), CondBranchOp::falseIndex);
-}
-
-void CondBranchOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<SimplifyConstCondBranchPred>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// Constant*Op
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, ConstantOp &op) {
-  p << "constant ";
-  p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"value"});
-
-  if (op.getAttrs().size() > 1)
-    p << ' ';
-  p << op.getValue();
-
-  // If the value is a symbol reference, print a trailing type.
-  if (op.getValue().isa<SymbolRefAttr>())
-    p << " : " << op.getType();
-}
-
-static ParseResult parseConstantOp(OpAsmParser &parser,
-                                   OperationState &result) {
-  Attribute valueAttr;
-  if (parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseAttribute(valueAttr, "value", result.attributes))
-    return failure();
-
-  // If the attribute is a symbol reference, then we expect a trailing type.
-  Type type;
-  if (!valueAttr.isa<SymbolRefAttr>())
-    type = valueAttr.getType();
-  else if (parser.parseColonType(type))
-    return failure();
-
-  // Add the attribute type to the list.
-  return parser.addTypeToList(type, result.types);
-}
-
-/// The constant op requires an attribute, and furthermore requires that it
-/// matches the return type.
-static LogicalResult verify(ConstantOp &op) {
-  auto value = op.getValue();
-  if (!value)
-    return op.emitOpError("requires a 'value' attribute");
-
-  auto type = op.getType();
-  if (!value.getType().isa<NoneType>() && type != value.getType())
-    return op.emitOpError() << "requires attribute's type (" << value.getType()
-                            << ") to match op's return type (" << type << ")";
-
-  if (type.isa<IndexType>() || value.isa<BoolAttr>())
-    return success();
-
-  if (auto intAttr = value.dyn_cast<IntegerAttr>()) {
-    // If the type has a known bitwidth we verify that the value can be
-    // represented with the given bitwidth.
-    auto bitwidth = type.cast<IntegerType>().getWidth();
-    auto intVal = intAttr.getValue();
-    if (!intVal.isSignedIntN(bitwidth) && !intVal.isIntN(bitwidth))
-      return op.emitOpError("requires 'value' to be an integer within the "
-                            "range of the integer result type");
-    return success();
-  }
-
-  if (type.isa<FloatType>()) {
-    if (!value.isa<FloatAttr>())
-      return op.emitOpError("requires 'value' to be a floating point constant");
-    return success();
-  }
-
-  if (type.isa<ShapedType>()) {
-    if (!value.isa<ElementsAttr>())
-      return op.emitOpError("requires 'value' to be a shaped constant");
-    return success();
-  }
-
-  if (type.isa<FunctionType>()) {
-    auto fnAttr = value.dyn_cast<FlatSymbolRefAttr>();
-    if (!fnAttr)
-      return op.emitOpError("requires 'value' to be a function reference");
-
-    // Try to find the referenced function.
-    auto fn =
-        op.getParentOfType<ModuleOp>().lookupSymbol<FuncOp>(fnAttr.getValue());
-    if (!fn)
-      return op.emitOpError("reference to undefined function 'bar'");
-
-    // Check that the referenced function has the correct type.
-    if (fn.getType() != type)
-      return op.emitOpError("reference to function with mismatched type");
-
-    return success();
-  }
-
-  if (type.isa<NoneType>() && value.isa<UnitAttr>())
-    return success();
-
-  return op.emitOpError("unsupported 'value' attribute: ") << value;
-}
-
-OpFoldResult ConstantOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.empty() && "constant has no operands");
-  return getValue();
-}
-
-void ConstantOp::getAsmResultNames(
-    function_ref<void(Value *, StringRef)> setNameFn) {
-  Type type = getType();
-  if (auto intCst = getValue().dyn_cast<IntegerAttr>()) {
-    IntegerType intTy = type.dyn_cast<IntegerType>();
-
-    // Sugar i1 constants with 'true' and 'false'.
-    if (intTy && intTy.getWidth() == 1)
-      return setNameFn(getResult(), (intCst.getInt() ? "true" : "false"));
-
-    // Otherwise, build a complex name with the value and type.
-    SmallString<32> specialNameBuffer;
-    llvm::raw_svector_ostream specialName(specialNameBuffer);
-    specialName << 'c' << intCst.getInt();
-    if (intTy)
-      specialName << '_' << type;
-    setNameFn(getResult(), specialName.str());
-
-  } else if (type.isa<FunctionType>()) {
-    setNameFn(getResult(), "f");
-  } else {
-    setNameFn(getResult(), "cst");
-  }
-}
-
-/// Returns true if a constant operation can be built with the given value and
-/// result type.
-bool ConstantOp::isBuildableWith(Attribute value, Type type) {
-  // SymbolRefAttr can only be used with a function type.
-  if (value.isa<SymbolRefAttr>())
-    return type.isa<FunctionType>();
-  // Otherwise, the attribute must have the same type as 'type'.
-  if (value.getType() != type)
-    return false;
-  // Finally, check that the attribute kind is handled.
-  return value.isa<BoolAttr>() || value.isa<IntegerAttr>() ||
-         value.isa<FloatAttr>() || value.isa<ElementsAttr>() ||
-         value.isa<UnitAttr>();
-}
-
-void ConstantFloatOp::build(Builder *builder, OperationState &result,
-                            const APFloat &value, FloatType type) {
-  ConstantOp::build(builder, result, type, builder->getFloatAttr(type, value));
-}
-
-bool ConstantFloatOp::classof(Operation *op) {
-  return ConstantOp::classof(op) &&
-         op->getResult(0)->getType().isa<FloatType>();
-}
-
-/// ConstantIntOp only matches values whose result type is an IntegerType.
-bool ConstantIntOp::classof(Operation *op) {
-  return ConstantOp::classof(op) &&
-         op->getResult(0)->getType().isa<IntegerType>();
-}
-
-void ConstantIntOp::build(Builder *builder, OperationState &result,
-                          int64_t value, unsigned width) {
-  Type type = builder->getIntegerType(width);
-  ConstantOp::build(builder, result, type,
-                    builder->getIntegerAttr(type, value));
-}
-
-/// Build a constant int op producing an integer with the specified type,
-/// which must be an integer type.
-void ConstantIntOp::build(Builder *builder, OperationState &result,
-                          int64_t value, Type type) {
-  assert(type.isa<IntegerType>() && "ConstantIntOp can only have integer type");
-  ConstantOp::build(builder, result, type,
-                    builder->getIntegerAttr(type, value));
-}
-
-/// ConstantIndexOp only matches values whose result type is Index.
-bool ConstantIndexOp::classof(Operation *op) {
-  return ConstantOp::classof(op) && op->getResult(0)->getType().isIndex();
-}
-
-void ConstantIndexOp::build(Builder *builder, OperationState &result,
-                            int64_t value) {
-  Type type = builder->getIndexType();
-  ConstantOp::build(builder, result, type,
-                    builder->getIntegerAttr(type, value));
-}
-
-//===----------------------------------------------------------------------===//
-// DeallocOp
-//===----------------------------------------------------------------------===//
-namespace {
-/// Fold Dealloc operations that are deallocating an AllocOp that is only used
-/// by other Dealloc operations.
-struct SimplifyDeadDealloc : public OpRewritePattern<DeallocOp> {
-  using OpRewritePattern<DeallocOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(DeallocOp dealloc,
-                                     PatternRewriter &rewriter) const override {
-    // Check that the memref operand's defining operation is an AllocOp.
-    Value *memref = dealloc.memref();
-    if (!isa_and_nonnull<AllocOp>(memref->getDefiningOp()))
-      return matchFailure();
-
-    // Check that all of the uses of the AllocOp are other DeallocOps.
-    for (auto *user : memref->getUsers())
-      if (!isa<DeallocOp>(user))
-        return matchFailure();
-
-    // Erase the dealloc operation.
-    rewriter.eraseOp(dealloc);
-    return matchSuccess();
-  }
-};
-} // end anonymous namespace.
-
-static void print(OpAsmPrinter &p, DeallocOp op) {
-  p << "dealloc " << *op.memref() << " : " << op.memref()->getType();
-}
-
-static ParseResult parseDeallocOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType memrefInfo;
-  MemRefType type;
-
-  return failure(parser.parseOperand(memrefInfo) ||
-                 parser.parseColonType(type) ||
-                 parser.resolveOperand(memrefInfo, type, result.operands));
-}
-
-static LogicalResult verify(DeallocOp op) {
-  if (!op.memref()->getType().isa<MemRefType>())
-    return op.emitOpError("operand must be a memref");
-  return success();
-}
-
-void DeallocOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                            MLIRContext *context) {
-  results.insert<SimplifyDeadDealloc>(context);
-}
-
-LogicalResult DeallocOp::fold(ArrayRef<Attribute> cstOperands,
-                              SmallVectorImpl<OpFoldResult> &results) {
-  /// dealloc(memrefcast) -> dealloc
-  return foldMemRefCast(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// DimOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, DimOp op) {
-  p << "dim " << *op.getOperand() << ", " << op.getIndex();
-  p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"index"});
-  p << " : " << op.getOperand()->getType();
-}
-
-static ParseResult parseDimOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType operandInfo;
-  IntegerAttr indexAttr;
-  Type type;
-  Type indexType = parser.getBuilder().getIndexType();
-
-  return failure(
-      parser.parseOperand(operandInfo) || parser.parseComma() ||
-      parser.parseAttribute(indexAttr, indexType, "index", result.attributes) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(operandInfo, type, result.operands) ||
-      parser.addTypeToList(indexType, result.types));
-}
-
-static LogicalResult verify(DimOp op) {
-  // Check that we have an integer index operand.
-  auto indexAttr = op.getAttrOfType<IntegerAttr>("index");
-  if (!indexAttr)
-    return op.emitOpError("requires an integer attribute named 'index'");
-  int64_t index = indexAttr.getValue().getSExtValue();
-
-  auto type = op.getOperand()->getType();
-  if (auto tensorType = type.dyn_cast<RankedTensorType>()) {
-    if (index >= tensorType.getRank())
-      return op.emitOpError("index is out of range");
-  } else if (auto memrefType = type.dyn_cast<MemRefType>()) {
-    if (index >= memrefType.getRank())
-      return op.emitOpError("index is out of range");
-
-  } else if (type.isa<UnrankedTensorType>()) {
-    // ok, assumed to be in-range.
-  } else {
-    return op.emitOpError("requires an operand with tensor or memref type");
-  }
-
-  return success();
-}
-
-OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
-  // Constant fold dim when the size along the index referred to is a constant.
-  auto opType = memrefOrTensor()->getType();
-  int64_t indexSize = -1;
-  if (auto tensorType = opType.dyn_cast<RankedTensorType>())
-    indexSize = tensorType.getShape()[getIndex()];
-  else if (auto memrefType = opType.dyn_cast<MemRefType>())
-    indexSize = memrefType.getShape()[getIndex()];
-
-  if (!ShapedType::isDynamic(indexSize))
-    return IntegerAttr::get(IndexType::get(getContext()), indexSize);
-
-  // Fold dim to the size argument for an AllocOp/ViewOp/SubViewOp.
-  auto memrefType = opType.dyn_cast<MemRefType>();
-  if (!memrefType)
-    return {};
-
-  // The size at getIndex() is now a dynamic size of a memref.
-  auto memref = memrefOrTensor()->getDefiningOp();
-  if (auto alloc = dyn_cast_or_null<AllocOp>(memref))
-    return *(alloc.getDynamicSizes().begin() +
-             memrefType.getDynamicDimIndex(getIndex()));
-
-  if (auto view = dyn_cast_or_null<ViewOp>(memref))
-    return *(view.getDynamicSizes().begin() +
-             memrefType.getDynamicDimIndex(getIndex()));
-
-  // The subview op here is expected to have rank dynamic sizes now.
-  if (auto subview = dyn_cast_or_null<SubViewOp>(memref)) {
-    auto sizes = subview.sizes();
-    if (!sizes.empty())
-      return *(sizes.begin() + getIndex());
-  }
-
-  /// dim(memrefcast) -> dim
-  if (succeeded(foldMemRefCast(*this)))
-    return getResult();
-
-  return {};
-}
-
-//===----------------------------------------------------------------------===//
-// DivISOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult DivISOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "binary operation takes two operands");
-
-  // Don't fold if it would overflow or if it requires a division by zero.
-  bool overflowOrDiv0 = false;
-  auto result = constFoldBinaryOp<IntegerAttr>(operands, [&](APInt a, APInt b) {
-    if (overflowOrDiv0 || !b) {
-      overflowOrDiv0 = true;
-      return a;
-    }
-    return a.sdiv_ov(b, overflowOrDiv0);
-  });
-  return overflowOrDiv0 ? Attribute() : result;
-}
-
-//===----------------------------------------------------------------------===//
-// DivIUOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult DivIUOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "binary operation takes two operands");
-
-  // Don't fold if it would require a division by zero.
-  bool div0 = false;
-  auto result = constFoldBinaryOp<IntegerAttr>(operands, [&](APInt a, APInt b) {
-    if (div0 || !b) {
-      div0 = true;
-      return a;
-    }
-    return a.udiv(b);
-  });
-  return div0 ? Attribute() : result;
-}
-
-// ---------------------------------------------------------------------------
-// DmaStartOp
-// ---------------------------------------------------------------------------
-
-void DmaStartOp::build(Builder *builder, OperationState &result,
-                       Value *srcMemRef, ValueRange srcIndices,
-                       Value *destMemRef, ValueRange destIndices,
-                       Value *numElements, Value *tagMemRef,
-                       ValueRange tagIndices, Value *stride,
-                       Value *elementsPerStride) {
-  result.addOperands(srcMemRef);
-  result.addOperands(srcIndices);
-  result.addOperands(destMemRef);
-  result.addOperands(destIndices);
-  result.addOperands({numElements, tagMemRef});
-  result.addOperands(tagIndices);
-  if (stride)
-    result.addOperands({stride, elementsPerStride});
-}
-
-void DmaStartOp::print(OpAsmPrinter &p) {
-  p << "dma_start " << *getSrcMemRef() << '[' << getSrcIndices() << "], "
-    << *getDstMemRef() << '[' << getDstIndices() << "], " << *getNumElements()
-    << ", " << *getTagMemRef() << '[' << getTagIndices() << ']';
-  if (isStrided())
-    p << ", " << *getStride() << ", " << *getNumElementsPerStride();
-
-  p.printOptionalAttrDict(getAttrs());
-  p << " : " << getSrcMemRef()->getType();
-  p << ", " << getDstMemRef()->getType();
-  p << ", " << getTagMemRef()->getType();
-}
-
-// Parse DmaStartOp.
-// Ex:
-//   %dma_id = dma_start %src[%i, %j], %dst[%k, %l], %size,
-//                       %tag[%index], %stride, %num_elt_per_stride :
-//                     : memref<3076 x f32, 0>,
-//                       memref<1024 x f32, 2>,
-//                       memref<1 x i32>
-//
-ParseResult DmaStartOp::parse(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType srcMemRefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> srcIndexInfos;
-  OpAsmParser::OperandType dstMemRefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> dstIndexInfos;
-  OpAsmParser::OperandType numElementsInfo;
-  OpAsmParser::OperandType tagMemrefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> tagIndexInfos;
-  SmallVector<OpAsmParser::OperandType, 2> strideInfo;
-
-  SmallVector<Type, 3> types;
-  auto indexType = parser.getBuilder().getIndexType();
-
-  // Parse and resolve the following list of operands:
-  // *) source memref followed by its indices (in square brackets).
-  // *) destination memref followed by its indices (in square brackets).
-  // *) dma size in KiB.
-  if (parser.parseOperand(srcMemRefInfo) ||
-      parser.parseOperandList(srcIndexInfos, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseOperand(dstMemRefInfo) ||
-      parser.parseOperandList(dstIndexInfos, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseOperand(numElementsInfo) ||
-      parser.parseComma() || parser.parseOperand(tagMemrefInfo) ||
-      parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square))
-    return failure();
-
-  // Parse optional stride and elements per stride.
-  if (parser.parseTrailingOperandList(strideInfo))
-    return failure();
-
-  bool isStrided = strideInfo.size() == 2;
-  if (!strideInfo.empty() && !isStrided) {
-    return parser.emitError(parser.getNameLoc(),
-                            "expected two stride related operands");
-  }
-
-  if (parser.parseColonTypeList(types))
-    return failure();
-  if (types.size() != 3)
-    return parser.emitError(parser.getNameLoc(), "fewer/more types expected");
-
-  if (parser.resolveOperand(srcMemRefInfo, types[0], result.operands) ||
-      parser.resolveOperands(srcIndexInfos, indexType, result.operands) ||
-      parser.resolveOperand(dstMemRefInfo, types[1], result.operands) ||
-      parser.resolveOperands(dstIndexInfos, indexType, result.operands) ||
-      // size should be an index.
-      parser.resolveOperand(numElementsInfo, indexType, result.operands) ||
-      parser.resolveOperand(tagMemrefInfo, types[2], result.operands) ||
-      // tag indices should be index.
-      parser.resolveOperands(tagIndexInfos, indexType, result.operands))
-    return failure();
-
-  auto memrefType0 = types[0].dyn_cast<MemRefType>();
-  if (!memrefType0)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected source to be of memref type");
-
-  auto memrefType1 = types[1].dyn_cast<MemRefType>();
-  if (!memrefType1)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected destination to be of memref type");
-
-  auto memrefType2 = types[2].dyn_cast<MemRefType>();
-  if (!memrefType2)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected tag to be of memref type");
-
-  if (isStrided) {
-    if (parser.resolveOperands(strideInfo, indexType, result.operands))
-      return failure();
-  }
-
-  // Check that source/destination index list size matches associated rank.
-  if (static_cast<int64_t>(srcIndexInfos.size()) != memrefType0.getRank() ||
-      static_cast<int64_t>(dstIndexInfos.size()) != memrefType1.getRank())
-    return parser.emitError(parser.getNameLoc(),
-                            "memref rank not equal to indices count");
-  if (static_cast<int64_t>(tagIndexInfos.size()) != memrefType2.getRank())
-    return parser.emitError(parser.getNameLoc(),
-                            "tag memref rank not equal to indices count");
-
-  return success();
-}
-
-LogicalResult DmaStartOp::verify() {
-  // DMAs from different memory spaces supported.
-  if (getSrcMemorySpace() == getDstMemorySpace())
-    return emitOpError("DMA should be between different memory spaces");
-
-  if (getNumOperands() != getTagMemRefRank() + getSrcMemRefRank() +
-                              getDstMemRefRank() + 3 + 1 &&
-      getNumOperands() != getTagMemRefRank() + getSrcMemRefRank() +
-                              getDstMemRefRank() + 3 + 1 + 2) {
-    return emitOpError("incorrect number of operands");
-  }
-  return success();
-}
-
-LogicalResult DmaStartOp::fold(ArrayRef<Attribute> cstOperands,
-                               SmallVectorImpl<OpFoldResult> &results) {
-  /// dma_start(memrefcast) -> dma_start
-  return foldMemRefCast(*this);
-}
-
-// ---------------------------------------------------------------------------
-// DmaWaitOp
-// ---------------------------------------------------------------------------
-
-void DmaWaitOp::build(Builder *builder, OperationState &result,
-                      Value *tagMemRef, ValueRange tagIndices,
-                      Value *numElements) {
-  result.addOperands(tagMemRef);
-  result.addOperands(tagIndices);
-  result.addOperands(numElements);
-}
-
-void DmaWaitOp::print(OpAsmPrinter &p) {
-  p << "dma_wait " << getTagMemRef() << '[' << getTagIndices() << "], "
-    << getNumElements();
-  p.printOptionalAttrDict(getAttrs());
-  p << " : " << getTagMemRef()->getType();
-}
-
-// Parse DmaWaitOp.
-// Eg:
-//   dma_wait %tag[%index], %num_elements : memref<1 x i32, (d0) -> (d0), 4>
-//
-ParseResult DmaWaitOp::parse(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType tagMemrefInfo;
-  SmallVector<OpAsmParser::OperandType, 2> tagIndexInfos;
-  Type type;
-  auto indexType = parser.getBuilder().getIndexType();
-  OpAsmParser::OperandType numElementsInfo;
-
-  // Parse tag memref, its indices, and dma size.
-  if (parser.parseOperand(tagMemrefInfo) ||
-      parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseOperand(numElementsInfo) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(tagMemrefInfo, type, result.operands) ||
-      parser.resolveOperands(tagIndexInfos, indexType, result.operands) ||
-      parser.resolveOperand(numElementsInfo, indexType, result.operands))
-    return failure();
-
-  auto memrefType = type.dyn_cast<MemRefType>();
-  if (!memrefType)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected tag to be of memref type");
-
-  if (static_cast<int64_t>(tagIndexInfos.size()) != memrefType.getRank())
-    return parser.emitError(parser.getNameLoc(),
-                            "tag memref rank not equal to indices count");
-
-  return success();
-}
-
-LogicalResult DmaWaitOp::fold(ArrayRef<Attribute> cstOperands,
-                              SmallVectorImpl<OpFoldResult> &results) {
-  /// dma_wait(memrefcast) -> dma_wait
-  return foldMemRefCast(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// ExtractElementOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, ExtractElementOp op) {
-  p << "extract_element " << *op.getAggregate() << '[' << op.getIndices();
-  p << ']';
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getAggregate()->getType();
-}
-
-static ParseResult parseExtractElementOp(OpAsmParser &parser,
-                                         OperationState &result) {
-  OpAsmParser::OperandType aggregateInfo;
-  SmallVector<OpAsmParser::OperandType, 4> indexInfo;
-  ShapedType type;
-
-  auto indexTy = parser.getBuilder().getIndexType();
-  return failure(
-      parser.parseOperand(aggregateInfo) ||
-      parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(aggregateInfo, type, result.operands) ||
-      parser.resolveOperands(indexInfo, indexTy, result.operands) ||
-      parser.addTypeToList(type.getElementType(), result.types));
-}
-
-static LogicalResult verify(ExtractElementOp op) {
-  auto aggregateType = op.getAggregate()->getType().cast<ShapedType>();
-
-  // This should be possible with tablegen type constraints
-  if (op.getType() != aggregateType.getElementType())
-    return op.emitOpError("result type must match element type of aggregate");
-
-  // Verify the # indices match if we have a ranked type.
-  if (aggregateType.hasRank() &&
-      aggregateType.getRank() != op.getNumOperands() - 1)
-    return op.emitOpError("incorrect number of indices for extract_element");
-
-  return success();
-}
-
-OpFoldResult ExtractElementOp::fold(ArrayRef<Attribute> operands) {
-  assert(!operands.empty() && "extract_element takes at least one operand");
-
-  // The aggregate operand must be a known constant.
-  Attribute aggregate = operands.front();
-  if (!aggregate)
-    return {};
-
-  // If this is a splat elements attribute, simply return the value. All of the
-  // elements of a splat attribute are the same.
-  if (auto splatAggregate = aggregate.dyn_cast<SplatElementsAttr>())
-    return splatAggregate.getSplatValue();
-
-  // Otherwise, collect the constant indices into the aggregate.
-  SmallVector<uint64_t, 8> indices;
-  for (Attribute indice : llvm::drop_begin(operands, 1)) {
-    if (!indice || !indice.isa<IntegerAttr>())
-      return {};
-    indices.push_back(indice.cast<IntegerAttr>().getInt());
-  }
-
-  // If this is an elements attribute, query the value at the given indices.
-  auto elementsAttr = aggregate.dyn_cast<ElementsAttr>();
-  if (elementsAttr && elementsAttr.isValidIndex(indices))
-    return elementsAttr.getValue(indices);
-  return {};
-}
-
-//===----------------------------------------------------------------------===//
-// IndexCastOp
-//===----------------------------------------------------------------------===//
-
-// Index cast is applicable from index to integer and backwards.
-bool IndexCastOp::areCastCompatible(Type a, Type b) {
-  return (a.isIndex() && b.isa<IntegerType>()) ||
-         (a.isa<IntegerType>() && b.isIndex());
-}
-
-//===----------------------------------------------------------------------===//
-// LoadOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, LoadOp op) {
-  p << "load " << *op.getMemRef() << '[' << op.getIndices() << ']';
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getMemRefType();
-}
-
-static ParseResult parseLoadOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType memrefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> indexInfo;
-  MemRefType type;
-
-  auto indexTy = parser.getBuilder().getIndexType();
-  return failure(
-      parser.parseOperand(memrefInfo) ||
-      parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(memrefInfo, type, result.operands) ||
-      parser.resolveOperands(indexInfo, indexTy, result.operands) ||
-      parser.addTypeToList(type.getElementType(), result.types));
-}
-
-static LogicalResult verify(LoadOp op) {
-  if (op.getType() != op.getMemRefType().getElementType())
-    return op.emitOpError("result type must match element type of memref");
-
-  if (op.getNumOperands() != 1 + op.getMemRefType().getRank())
-    return op.emitOpError("incorrect number of indices for load");
-
-  return success();
-}
-
-OpFoldResult LoadOp::fold(ArrayRef<Attribute> cstOperands) {
-  /// load(memrefcast) -> load
-  if (succeeded(foldMemRefCast(*this)))
-    return getResult();
-  return OpFoldResult();
-}
-
-//===----------------------------------------------------------------------===//
-// MemRefCastOp
-//===----------------------------------------------------------------------===//
-
-bool MemRefCastOp::areCastCompatible(Type a, Type b) {
-  auto aT = a.dyn_cast<MemRefType>();
-  auto bT = b.dyn_cast<MemRefType>();
-
-  auto uaT = a.dyn_cast<UnrankedMemRefType>();
-  auto ubT = b.dyn_cast<UnrankedMemRefType>();
-
-  if (aT && bT) {
-    if (aT.getElementType() != bT.getElementType())
-      return false;
-    if (aT.getAffineMaps() != bT.getAffineMaps()) {
-      int64_t aOffset, bOffset;
-      SmallVector<int64_t, 4> aStrides, bStrides;
-      if (failed(getStridesAndOffset(aT, aStrides, aOffset)) ||
-          failed(getStridesAndOffset(bT, bStrides, bOffset)) ||
-          aStrides.size() != bStrides.size())
-        return false;
-
-      // Strides along a dimension/offset are compatible if the value in the
-      // source memref is static and the value in the target memref is the
-      // same. They are also compatible if either one is dynamic (see
-      // description of MemRefCastOp for details).
-      auto checkCompatible = [](int64_t a, int64_t b) {
-        return (a == MemRefType::getDynamicStrideOrOffset() ||
-                b == MemRefType::getDynamicStrideOrOffset() || a == b);
-      };
-      if (!checkCompatible(aOffset, bOffset))
-        return false;
-      for (auto aStride : enumerate(aStrides))
-        if (!checkCompatible(aStride.value(), bStrides[aStride.index()]))
-          return false;
-    }
-    if (aT.getMemorySpace() != bT.getMemorySpace())
-      return false;
-
-    // They must have the same rank, and any specified dimensions must match.
-    if (aT.getRank() != bT.getRank())
-      return false;
-
-    for (unsigned i = 0, e = aT.getRank(); i != e; ++i) {
-      int64_t aDim = aT.getDimSize(i), bDim = bT.getDimSize(i);
-      if (aDim != -1 && bDim != -1 && aDim != bDim)
-        return false;
-    }
-    return true;
-  } else {
-    if (!aT && !uaT)
-      return false;
-    if (!bT && !ubT)
-      return false;
-    // Unranked to unranked casting is unsupported
-    if (uaT && ubT)
-      return false;
-
-    auto aEltType = (aT) ? aT.getElementType() : uaT.getElementType();
-    auto bEltType = (bT) ? bT.getElementType() : ubT.getElementType();
-    if (aEltType != bEltType)
-      return false;
-
-    auto aMemSpace = (aT) ? aT.getMemorySpace() : uaT.getMemorySpace();
-    auto bMemSpace = (bT) ? bT.getMemorySpace() : ubT.getMemorySpace();
-    if (aMemSpace != bMemSpace)
-      return false;
-
-    return true;
-  }
-
-  return false;
-}
-
-OpFoldResult MemRefCastOp::fold(ArrayRef<Attribute> operands) {
-  return impl::foldCastOp(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// MulFOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult MulFOp::fold(ArrayRef<Attribute> operands) {
-  return constFoldBinaryOp<FloatAttr>(
-      operands, [](APFloat a, APFloat b) { return a * b; });
-}
-
-//===----------------------------------------------------------------------===//
-// MulIOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult MulIOp::fold(ArrayRef<Attribute> operands) {
-  /// muli(x, 0) -> 0
-  if (matchPattern(rhs(), m_Zero()))
-    return rhs();
-  /// muli(x, 1) -> x
-  if (matchPattern(rhs(), m_One()))
-    return getOperand(0);
-
-  // TODO: Handle the overflow case.
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a * b; });
-}
-
-//===----------------------------------------------------------------------===//
-// RankOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, RankOp op) {
-  p << "rank " << *op.getOperand() << " : " << op.getOperand()->getType();
-}
-
-static ParseResult parseRankOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType operandInfo;
-  Type type;
-  Type indexType = parser.getBuilder().getIndexType();
-  return failure(parser.parseOperand(operandInfo) ||
-                 parser.parseColonType(type) ||
-                 parser.resolveOperand(operandInfo, type, result.operands) ||
-                 parser.addTypeToList(indexType, result.types));
-}
-
-OpFoldResult RankOp::fold(ArrayRef<Attribute> operands) {
-  // Constant fold rank when the rank of the tensor is known.
-  auto type = getOperand()->getType();
-  if (auto tensorType = type.dyn_cast<RankedTensorType>())
-    return IntegerAttr::get(IndexType::get(getContext()), tensorType.getRank());
-  return IntegerAttr();
-}
-
-//===----------------------------------------------------------------------===//
-// RemISOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult RemISOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "remis takes two operands");
-
-  auto rhs = operands.back().dyn_cast_or_null<IntegerAttr>();
-  if (!rhs)
-    return {};
-  auto rhsValue = rhs.getValue();
-
-  // x % 1 = 0
-  if (rhsValue.isOneValue())
-    return IntegerAttr::get(rhs.getType(), APInt(rhsValue.getBitWidth(), 0));
-
-  // Don't fold if it requires division by zero.
-  if (rhsValue.isNullValue())
-    return {};
-
-  auto lhs = operands.front().dyn_cast_or_null<IntegerAttr>();
-  if (!lhs)
-    return {};
-  return IntegerAttr::get(lhs.getType(), lhs.getValue().srem(rhsValue));
-}
-
-//===----------------------------------------------------------------------===//
-// RemIUOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult RemIUOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 2 && "remiu takes two operands");
-
-  auto rhs = operands.back().dyn_cast_or_null<IntegerAttr>();
-  if (!rhs)
-    return {};
-  auto rhsValue = rhs.getValue();
-
-  // x % 1 = 0
-  if (rhsValue.isOneValue())
-    return IntegerAttr::get(rhs.getType(), APInt(rhsValue.getBitWidth(), 0));
-
-  // Don't fold if it requires division by zero.
-  if (rhsValue.isNullValue())
-    return {};
-
-  auto lhs = operands.front().dyn_cast_or_null<IntegerAttr>();
-  if (!lhs)
-    return {};
-  return IntegerAttr::get(lhs.getType(), lhs.getValue().urem(rhsValue));
-}
-
-//===----------------------------------------------------------------------===//
-// ReturnOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseReturnOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 2> opInfo;
-  SmallVector<Type, 2> types;
-  llvm::SMLoc loc = parser.getCurrentLocation();
-  return failure(parser.parseOperandList(opInfo) ||
-                 (!opInfo.empty() && parser.parseColonTypeList(types)) ||
-                 parser.resolveOperands(opInfo, types, loc, result.operands));
-}
-
-static void print(OpAsmPrinter &p, ReturnOp op) {
-  p << "return";
-  if (op.getNumOperands() != 0)
-    p << ' ' << op.getOperands() << " : " << op.getOperandTypes();
-}
-
-static LogicalResult verify(ReturnOp op) {
-  auto function = cast<FuncOp>(op.getParentOp());
-
-  // The operand number and types must match the function signature.
-  const auto &results = function.getType().getResults();
-  if (op.getNumOperands() != results.size())
-    return op.emitOpError("has ")
-           << op.getNumOperands()
-           << " operands, but enclosing function returns " << results.size();
-
-  for (unsigned i = 0, e = results.size(); i != e; ++i)
-    if (op.getOperand(i)->getType() != results[i])
-      return op.emitError()
-             << "type of return operand " << i << " ("
-             << op.getOperand(i)->getType()
-             << ") doesn't match function result type (" << results[i] << ")";
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// SIToFPOp
-//===----------------------------------------------------------------------===//
-
-// sitofp is applicable from integer types to float types.
-bool SIToFPOp::areCastCompatible(Type a, Type b) {
-  return a.isa<IntegerType>() && b.isa<FloatType>();
-}
-
-//===----------------------------------------------------------------------===//
-// SelectOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseSelectOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 3> ops;
-  SmallVector<NamedAttribute, 4> attrs;
-  Type type;
-  if (parser.parseOperandList(ops, 3) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type))
-    return failure();
-
-  auto i1Type = getCheckedI1SameShape(&parser.getBuilder(), type);
-  if (!i1Type)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected type with valid i1 shape");
-
-  SmallVector<Type, 3> types = {i1Type, type, type};
-  return failure(parser.resolveOperands(ops, types, parser.getNameLoc(),
-                                        result.operands) ||
-                 parser.addTypeToList(type, result.types));
-}
-
-static void print(OpAsmPrinter &p, SelectOp op) {
-  p << "select " << op.getOperands() << " : " << op.getTrueValue()->getType();
-  p.printOptionalAttrDict(op.getAttrs());
-}
-
-static LogicalResult verify(SelectOp op) {
-  auto trueType = op.getTrueValue()->getType();
-  auto falseType = op.getFalseValue()->getType();
-
-  if (trueType != falseType)
-    return op.emitOpError(
-        "requires 'true' and 'false' arguments to be of the same type");
-
-  return success();
-}
-
-OpFoldResult SelectOp::fold(ArrayRef<Attribute> operands) {
-  auto *condition = getCondition();
-
-  // select true, %0, %1 => %0
-  if (matchPattern(condition, m_One()))
-    return getTrueValue();
-
-  // select false, %0, %1 => %1
-  if (matchPattern(condition, m_Zero()))
-    return getFalseValue();
-  return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// SignExtendIOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(SignExtendIOp op) {
-  // Get the scalar type (which is either directly the type of the operand
-  // or the vector's/tensor's element type.
-  auto srcType = getElementTypeOrSelf(op.getOperand()->getType());
-  auto dstType = getElementTypeOrSelf(op.getType());
-
-  // For now, index is forbidden for the source and the destination type.
-  if (srcType.isa<IndexType>())
-    return op.emitError() << srcType << " is not a valid operand type";
-  if (dstType.isa<IndexType>())
-    return op.emitError() << dstType << " is not a valid result type";
-
-  if (srcType.cast<IntegerType>().getWidth() >=
-      dstType.cast<IntegerType>().getWidth())
-    return op.emitError("result type ")
-           << dstType << " must be wider than operand type " << srcType;
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// SplatOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, SplatOp op) {
-  p << "splat " << *op.getOperand();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getType();
-}
-
-static ParseResult parseSplatOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType splatValueInfo;
-  ShapedType shapedType;
-
-  return failure(parser.parseOperand(splatValueInfo) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(shapedType) ||
-                 parser.resolveOperand(splatValueInfo,
-                                       shapedType.getElementType(),
-                                       result.operands) ||
-                 parser.addTypeToList(shapedType, result.types));
-}
-
-static LogicalResult verify(SplatOp op) {
-  // TODO: we could replace this by a trait.
-  if (op.getOperand()->getType() !=
-      op.getType().cast<ShapedType>().getElementType())
-    return op.emitError("operand should be of elemental type of result type");
-
-  return success();
-}
-
-// Constant folding hook for SplatOp.
-OpFoldResult SplatOp::fold(ArrayRef<Attribute> operands) {
-  assert(operands.size() == 1 && "splat takes one operand");
-
-  auto constOperand = operands.front();
-  if (!constOperand ||
-      (!constOperand.isa<IntegerAttr>() && !constOperand.isa<FloatAttr>()))
-    return {};
-
-  auto shapedType = getType().cast<ShapedType>();
-  assert(shapedType.getElementType() == constOperand.getType() &&
-         "incorrect input attribute type for folding");
-
-  // SplatElementsAttr::get treats single value for second arg as being a splat.
-  return SplatElementsAttr::get(shapedType, {constOperand});
-}
-
-//===----------------------------------------------------------------------===//
-// StoreOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, StoreOp op) {
-  p << "store " << *op.getValueToStore();
-  p << ", " << *op.getMemRef() << '[' << op.getIndices() << ']';
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getMemRefType();
-}
-
-static ParseResult parseStoreOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType storeValueInfo;
-  OpAsmParser::OperandType memrefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> indexInfo;
-  MemRefType memrefType;
-
-  auto indexTy = parser.getBuilder().getIndexType();
-  return failure(
-      parser.parseOperand(storeValueInfo) || parser.parseComma() ||
-      parser.parseOperand(memrefInfo) ||
-      parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(memrefType) ||
-      parser.resolveOperand(storeValueInfo, memrefType.getElementType(),
-                            result.operands) ||
-      parser.resolveOperand(memrefInfo, memrefType, result.operands) ||
-      parser.resolveOperands(indexInfo, indexTy, result.operands));
-}
-
-static LogicalResult verify(StoreOp op) {
-  // First operand must have same type as memref element type.
-  if (op.getValueToStore()->getType() != op.getMemRefType().getElementType())
-    return op.emitOpError(
-        "first operand must have same type memref element type");
-
-  if (op.getNumOperands() != 2 + op.getMemRefType().getRank())
-    return op.emitOpError("store index operand count not equal to memref rank");
-
-  return success();
-}
-
-LogicalResult StoreOp::fold(ArrayRef<Attribute> cstOperands,
-                            SmallVectorImpl<OpFoldResult> &results) {
-  /// store(memrefcast) -> store
-  return foldMemRefCast(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// SubFOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult SubFOp::fold(ArrayRef<Attribute> operands) {
-  return constFoldBinaryOp<FloatAttr>(
-      operands, [](APFloat a, APFloat b) { return a - b; });
-}
-
-//===----------------------------------------------------------------------===//
-// SubIOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult SubIOp::fold(ArrayRef<Attribute> operands) {
-  // subi(x,x) -> 0
-  if (getOperand(0) == getOperand(1))
-    return Builder(getContext()).getZeroAttr(getType());
-
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a - b; });
-}
-
-//===----------------------------------------------------------------------===//
-// AndOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult AndOp::fold(ArrayRef<Attribute> operands) {
-  /// and(x, 0) -> 0
-  if (matchPattern(rhs(), m_Zero()))
-    return rhs();
-  /// and(x,x) -> x
-  if (lhs() == rhs())
-    return rhs();
-
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a & b; });
-}
-
-//===----------------------------------------------------------------------===//
-// OrOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult OrOp::fold(ArrayRef<Attribute> operands) {
-  /// or(x, 0) -> x
-  if (matchPattern(rhs(), m_Zero()))
-    return lhs();
-  /// or(x,x) -> x
-  if (lhs() == rhs())
-    return rhs();
-
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a | b; });
-}
-
-//===----------------------------------------------------------------------===//
-// XOrOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult XOrOp::fold(ArrayRef<Attribute> operands) {
-  /// xor(x, 0) -> x
-  if (matchPattern(rhs(), m_Zero()))
-    return lhs();
-  /// xor(x,x) -> 0
-  if (lhs() == rhs())
-    return Builder(getContext()).getZeroAttr(getType());
-
-  return constFoldBinaryOp<IntegerAttr>(operands,
-                                        [](APInt a, APInt b) { return a ^ b; });
-}
-
-//===----------------------------------------------------------------------===//
-// TensorCastOp
-//===----------------------------------------------------------------------===//
-
-bool TensorCastOp::areCastCompatible(Type a, Type b) {
-  auto aT = a.dyn_cast<TensorType>();
-  auto bT = b.dyn_cast<TensorType>();
-  if (!aT || !bT)
-    return false;
-
-  if (aT.getElementType() != bT.getElementType())
-    return false;
-
-  return succeeded(verifyCompatibleShape(aT, bT));
-}
-
-OpFoldResult TensorCastOp::fold(ArrayRef<Attribute> operands) {
-  return impl::foldCastOp(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// Helpers for Tensor[Load|Store]Op
-//===----------------------------------------------------------------------===//
-
-static Type getTensorTypeFromMemRefType(Builder &b, Type type) {
-  if (auto memref = type.dyn_cast<MemRefType>())
-    return RankedTensorType::get(memref.getShape(), memref.getElementType());
-  return b.getNoneType();
-}
-
-//===----------------------------------------------------------------------===//
-// TensorLoadOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, TensorLoadOp op) {
-  p << "tensor_load " << *op.getOperand();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getOperand()->getType();
-}
-
-static ParseResult parseTensorLoadOp(OpAsmParser &parser,
-                                     OperationState &result) {
-  OpAsmParser::OperandType op;
-  Type type;
-  return failure(parser.parseOperand(op) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.resolveOperand(op, type, result.operands) ||
-                 parser.addTypeToList(
-                     getTensorTypeFromMemRefType(parser.getBuilder(), type),
-                     result.types));
-}
-
-//===----------------------------------------------------------------------===//
-// TensorStoreOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, TensorStoreOp op) {
-  p << "tensor_store " << *op.tensor() << ", " << *op.memref();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.memref()->getType();
-}
-
-static ParseResult parseTensorStoreOp(OpAsmParser &parser,
-                                      OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 2> ops;
-  Type type;
-  llvm::SMLoc loc = parser.getCurrentLocation();
-  return failure(
-      parser.parseOperandList(ops, /*requiredOperandCount=*/2) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperands(
-          ops, {getTensorTypeFromMemRefType(parser.getBuilder(), type), type},
-          loc, result.operands));
-}
-
-//===----------------------------------------------------------------------===//
-// TruncateIOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(TruncateIOp op) {
-  auto srcType = getElementTypeOrSelf(op.getOperand()->getType());
-  auto dstType = getElementTypeOrSelf(op.getType());
-
-  if (srcType.isa<IndexType>())
-    return op.emitError() << srcType << " is not a valid operand type";
-  if (dstType.isa<IndexType>())
-    return op.emitError() << dstType << " is not a valid result type";
-
-  if (srcType.cast<IntegerType>().getWidth() <=
-      dstType.cast<IntegerType>().getWidth())
-    return op.emitError("operand type ")
-           << srcType << " must be wider than result type " << dstType;
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// ViewOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseViewOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType srcInfo;
-  SmallVector<OpAsmParser::OperandType, 1> offsetInfo;
-  SmallVector<OpAsmParser::OperandType, 4> sizesInfo;
-  auto indexType = parser.getBuilder().getIndexType();
-  Type srcType, dstType;
-  llvm::SMLoc offsetLoc;
-  if (parser.parseOperand(srcInfo) || parser.getCurrentLocation(&offsetLoc) ||
-      parser.parseOperandList(offsetInfo, OpAsmParser::Delimiter::Square))
-    return failure();
-
-  if (offsetInfo.size() > 1)
-    return parser.emitError(offsetLoc) << "expects 0 or 1 offset operand";
-
-  return failure(
-      parser.parseOperandList(sizesInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(srcType) ||
-      parser.resolveOperand(srcInfo, srcType, result.operands) ||
-      parser.resolveOperands(offsetInfo, indexType, result.operands) ||
-      parser.resolveOperands(sizesInfo, indexType, result.operands) ||
-      parser.parseKeywordType("to", dstType) ||
-      parser.addTypeToList(dstType, result.types));
-}
-
-static void print(OpAsmPrinter &p, ViewOp op) {
-  p << op.getOperationName() << ' ' << *op.getOperand(0) << '[';
-  auto *dynamicOffset = op.getDynamicOffset();
-  if (dynamicOffset != nullptr)
-    p.printOperand(dynamicOffset);
-  p << "][" << op.getDynamicSizes() << ']';
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getOperand(0)->getType() << " to " << op.getType();
-}
-
-Value *ViewOp::getDynamicOffset() {
-  int64_t offset;
-  llvm::SmallVector<int64_t, 4> strides;
-  auto result =
-      succeeded(mlir::getStridesAndOffset(getType(), strides, offset));
-  assert(result);
-  if (result && offset == MemRefType::getDynamicStrideOrOffset())
-    return getOperand(1);
-  return nullptr;
-}
-
-static LogicalResult verifyDynamicStrides(MemRefType memrefType,
-                                          ArrayRef<int64_t> strides) {
-  ArrayRef<int64_t> shape = memrefType.getShape();
-  unsigned rank = memrefType.getRank();
-  assert(rank == strides.size());
-  bool dynamicStrides = false;
-  for (int i = rank - 2; i >= 0; --i) {
-    // If size at dim 'i + 1' is dynamic, set the 'dynamicStrides' flag.
-    if (ShapedType::isDynamic(shape[i + 1]))
-      dynamicStrides = true;
-    // If stride at dim 'i' is not dynamic, return error.
-    if (dynamicStrides && strides[i] != MemRefType::getDynamicStrideOrOffset())
-      return failure();
-  }
-  return success();
-}
-
-static LogicalResult verify(ViewOp op) {
-  auto baseType = op.getOperand(0)->getType().cast<MemRefType>();
-  auto viewType = op.getResult()->getType().cast<MemRefType>();
-
-  // The base memref should have identity layout map (or none).
-  if (baseType.getAffineMaps().size() > 1 ||
-      (baseType.getAffineMaps().size() == 1 &&
-       !baseType.getAffineMaps()[0].isIdentity()))
-    return op.emitError("unsupported map for base memref type ") << baseType;
-
-  // The base memref and the view memref should be in the same memory space.
-  if (baseType.getMemorySpace() != viewType.getMemorySpace())
-    return op.emitError("different memory spaces specified for base memref "
-                        "type ")
-           << baseType << " and view memref type " << viewType;
-
-  // Verify that the result memref type has a strided layout map.
-  int64_t offset;
-  llvm::SmallVector<int64_t, 4> strides;
-  if (failed(getStridesAndOffset(viewType, strides, offset)))
-    return op.emitError("result type ") << viewType << " is not strided";
-
-  // Verify that we have the correct number of operands for the result type.
-  unsigned memrefOperandCount = 1;
-  unsigned numDynamicDims = viewType.getNumDynamicDims();
-  unsigned dynamicOffsetCount =
-      offset == MemRefType::getDynamicStrideOrOffset() ? 1 : 0;
-  if (op.getNumOperands() !=
-      memrefOperandCount + numDynamicDims + dynamicOffsetCount)
-    return op.emitError("incorrect number of operands for type ") << viewType;
-
-  // Verify dynamic strides symbols were added to correct dimensions based
-  // on dynamic sizes.
-  if (failed(verifyDynamicStrides(viewType, strides)))
-    return op.emitError("incorrect dynamic strides in view memref type ")
-           << viewType;
-  return success();
-}
-
-namespace {
-
-struct ViewOpShapeFolder : public OpRewritePattern<ViewOp> {
-  using OpRewritePattern<ViewOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(ViewOp viewOp,
-                                     PatternRewriter &rewriter) const override {
-    // Return if none of the operands are constants.
-    if (llvm::none_of(viewOp.getOperands(), [](Value *operand) {
-          return matchPattern(operand, m_ConstantIndex());
-        }))
-      return matchFailure();
-
-    // Get result memref type.
-    auto memrefType = viewOp.getType();
-    if (memrefType.getAffineMaps().size() != 1)
-      return matchFailure();
-    auto map = memrefType.getAffineMaps()[0];
-
-    // Get offset from old memref view type 'memRefType'.
-    int64_t oldOffset;
-    llvm::SmallVector<int64_t, 4> oldStrides;
-    if (failed(getStridesAndOffset(memrefType, oldStrides, oldOffset)))
-      return matchFailure();
-
-    SmallVector<Value *, 4> newOperands;
-    SmallVector<Value *, 4> droppedOperands;
-
-    // Fold dynamic offset operand if it is produced by a constant.
-    auto *dynamicOffset = viewOp.getDynamicOffset();
-    int64_t newOffset = oldOffset;
-    unsigned dynamicOffsetOperandCount = 0;
-    if (dynamicOffset != nullptr) {
-      auto *defOp = dynamicOffset->getDefiningOp();
-      if (auto constantIndexOp = dyn_cast_or_null<ConstantIndexOp>(defOp)) {
-        // Dynamic offset will be folded into the map.
-        newOffset = constantIndexOp.getValue();
-        droppedOperands.push_back(dynamicOffset);
-      } else {
-        // Unable to fold dynamic offset. Add it to 'newOperands' list.
-        newOperands.push_back(dynamicOffset);
-        dynamicOffsetOperandCount = 1;
-      }
-    }
-
-    // Fold any dynamic dim operands which are produced by a constant.
-    SmallVector<int64_t, 4> newShapeConstants;
-    newShapeConstants.reserve(memrefType.getRank());
-
-    unsigned dynamicDimPos = viewOp.getDynamicSizesOperandStart();
-    unsigned rank = memrefType.getRank();
-    for (unsigned dim = 0, e = rank; dim < e; ++dim) {
-      int64_t dimSize = memrefType.getDimSize(dim);
-      // If this is already static dimension, keep it.
-      if (!ShapedType::isDynamic(dimSize)) {
-        newShapeConstants.push_back(dimSize);
-        continue;
-      }
-      auto *defOp = viewOp.getOperand(dynamicDimPos)->getDefiningOp();
-      if (auto constantIndexOp = dyn_cast_or_null<ConstantIndexOp>(defOp)) {
-        // Dynamic shape dimension will be folded.
-        newShapeConstants.push_back(constantIndexOp.getValue());
-        // Record to check for zero uses later below.
-        droppedOperands.push_back(constantIndexOp);
-      } else {
-        // Dynamic shape dimension not folded; copy operand from old memref.
-        newShapeConstants.push_back(dimSize);
-        newOperands.push_back(viewOp.getOperand(dynamicDimPos));
-      }
-      dynamicDimPos++;
-    }
-
-    // Compute new strides based on 'newShapeConstants'.
-    SmallVector<int64_t, 4> newStrides(rank);
-    newStrides[rank - 1] = 1;
-    bool dynamicStrides = false;
-    for (int i = rank - 2; i >= 0; --i) {
-      if (ShapedType::isDynamic(newShapeConstants[i + 1]))
-        dynamicStrides = true;
-      if (dynamicStrides)
-        newStrides[i] = MemRefType::getDynamicStrideOrOffset();
-      else
-        newStrides[i] = newShapeConstants[i + 1] * newStrides[i + 1];
-    }
-
-    // Regenerate strided layout map with 'newStrides' and 'newOffset'.
-    map = makeStridedLinearLayoutMap(newStrides, newOffset,
-                                     rewriter.getContext());
-
-    // Create new memref type with constant folded dims and/or offset/strides.
-    auto newMemRefType =
-        MemRefType::get(newShapeConstants, memrefType.getElementType(), {map},
-                        memrefType.getMemorySpace());
-    assert(static_cast<int64_t>(newOperands.size()) ==
-           dynamicOffsetOperandCount + newMemRefType.getNumDynamicDims());
-
-    // Create new ViewOp.
-    auto newViewOp = rewriter.create<ViewOp>(viewOp.getLoc(), newMemRefType,
-                                             viewOp.getOperand(0), newOperands);
-    // Insert a cast so we have the same type as the old memref type.
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(droppedOperands, viewOp,
-                                              newViewOp, viewOp.getType());
-    return matchSuccess();
-  }
-};
-
-} // end anonymous namespace
-
-void ViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                         MLIRContext *context) {
-  results.insert<ViewOpShapeFolder>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// SubViewOp
-//===----------------------------------------------------------------------===//
-
-// Returns a MemRefType with dynamic sizes and offset and the same stride as the
-// `memRefType` passed as argument.
-// TODO(andydavis,ntv) Evolve to a more powerful inference that can also keep
-// sizes and offset static.
-static Type inferSubViewResultType(MemRefType memRefType) {
-  auto rank = memRefType.getRank();
-  int64_t offset;
-  SmallVector<int64_t, 4> strides;
-  Type elementType = memRefType.getElementType();
-  auto res = getStridesAndOffset(memRefType, strides, offset);
-  assert(succeeded(res) && "SubViewOp expected strided memref type");
-  (void)res;
-
-  // Assume sizes and offset are fully dynamic for now until canonicalization
-  // occurs on the ranges. Typed strides don't change though.
-  offset = MemRefType::getDynamicStrideOrOffset();
-  // Overwrite strides because verifier will not pass.
-  // TODO(b/144419106): don't force degrade the strides to fully dynamic.
-  for (auto &stride : strides)
-    stride = MemRefType::getDynamicStrideOrOffset();
-  auto stridedLayout =
-      makeStridedLinearLayoutMap(strides, offset, memRefType.getContext());
-  SmallVector<int64_t, 4> sizes(rank, ShapedType::kDynamicSize);
-  return MemRefType::get(sizes, elementType, stridedLayout,
-                         memRefType.getMemorySpace());
-}
-
-void mlir::SubViewOp::build(Builder *b, OperationState &result, Value *source,
-                            ValueRange offsets, ValueRange sizes,
-                            ValueRange strides, Type resultType,
-                            ArrayRef<NamedAttribute> attrs) {
-  if (!resultType)
-    resultType = inferSubViewResultType(source->getType().cast<MemRefType>());
-  auto segmentAttr = b->getI32VectorAttr(
-      {1, static_cast<int>(offsets.size()), static_cast<int32_t>(sizes.size()),
-       static_cast<int32_t>(strides.size())});
-  build(b, result, resultType, source, offsets, sizes, strides, segmentAttr);
-  result.addAttributes(attrs);
-}
-
-void mlir::SubViewOp::build(Builder *b, OperationState &result, Type resultType,
-                            Value *source) {
-  build(b, result, source, /*offsets=*/{}, /*sizes=*/{}, /*strides=*/{},
-        resultType);
-}
-
-static ParseResult parseSubViewOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType srcInfo;
-  SmallVector<OpAsmParser::OperandType, 4> offsetsInfo;
-  SmallVector<OpAsmParser::OperandType, 4> sizesInfo;
-  SmallVector<OpAsmParser::OperandType, 4> stridesInfo;
-  auto indexType = parser.getBuilder().getIndexType();
-  Type srcType, dstType;
-  if (parser.parseOperand(srcInfo) ||
-      parser.parseOperandList(offsetsInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOperandList(sizesInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOperandList(stridesInfo, OpAsmParser::Delimiter::Square)) {
-    return failure();
-  }
-
-  auto builder = parser.getBuilder();
-  result.addAttribute(
-      SubViewOp::getOperandSegmentSizeAttr(),
-      builder.getI32VectorAttr({1, static_cast<int>(offsetsInfo.size()),
-                                static_cast<int32_t>(sizesInfo.size()),
-                                static_cast<int32_t>(stridesInfo.size())}));
-
-  return failure(
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(srcType) ||
-      parser.resolveOperand(srcInfo, srcType, result.operands) ||
-      parser.resolveOperands(offsetsInfo, indexType, result.operands) ||
-      parser.resolveOperands(sizesInfo, indexType, result.operands) ||
-      parser.resolveOperands(stridesInfo, indexType, result.operands) ||
-      parser.parseKeywordType("to", dstType) ||
-      parser.addTypeToList(dstType, result.types));
-}
-
-static void print(OpAsmPrinter &p, SubViewOp op) {
-  p << op.getOperationName() << ' ' << *op.getOperand(0) << '[' << op.offsets()
-    << "][" << op.sizes() << "][" << op.strides() << ']';
-
-  SmallVector<StringRef, 1> elidedAttrs = {
-      SubViewOp::getOperandSegmentSizeAttr()};
-  p.printOptionalAttrDict(op.getAttrs(), elidedAttrs);
-  p << " : " << op.getOperand(0)->getType() << " to " << op.getType();
-}
-
-static LogicalResult verify(SubViewOp op) {
-  auto baseType = op.getBaseMemRefType().cast<MemRefType>();
-  auto subViewType = op.getType();
-
-  // The rank of the base and result subview must match.
-  if (baseType.getRank() != subViewType.getRank()) {
-    return op.emitError(
-        "expected rank of result type to match rank of base type ");
-  }
-
-  // The base memref and the view memref should be in the same memory space.
-  if (baseType.getMemorySpace() != subViewType.getMemorySpace())
-    return op.emitError("different memory spaces specified for base memref "
-                        "type ")
-           << baseType << " and subview memref type " << subViewType;
-
-  // Verify that the base memref type has a strided layout map.
-  int64_t baseOffset;
-  llvm::SmallVector<int64_t, 4> baseStrides;
-  if (failed(getStridesAndOffset(baseType, baseStrides, baseOffset)))
-    return op.emitError("base type ") << subViewType << " is not strided";
-
-  // Verify that the result memref type has a strided layout map.
-  int64_t subViewOffset;
-  llvm::SmallVector<int64_t, 4> subViewStrides;
-  if (failed(getStridesAndOffset(subViewType, subViewStrides, subViewOffset)))
-    return op.emitError("result type ") << subViewType << " is not strided";
-
-  // Num offsets should either be zero or rank of memref.
-  if (op.getNumOffsets() != 0 && op.getNumOffsets() != subViewType.getRank()) {
-    return op.emitError("expected number of dynamic offsets specified to match "
-                        "the rank of the result type ")
-           << subViewType;
-  }
-
-  // Num sizes should either be zero or rank of memref.
-  if (op.getNumSizes() != 0 && op.getNumSizes() != subViewType.getRank()) {
-    return op.emitError("expected number of dynamic sizes specified to match "
-                        "the rank of the result type ")
-           << subViewType;
-  }
-
-  // Num strides should either be zero or rank of memref.
-  if (op.getNumStrides() != 0 && op.getNumStrides() != subViewType.getRank()) {
-    return op.emitError("expected number of dynamic strides specified to match "
-                        "the rank of the result type ")
-           << subViewType;
-  }
-
-  // Verify that if the shape of the subview type is static, then sizes are not
-  // dynamic values, and vice versa.
-  if ((subViewType.hasStaticShape() && op.getNumSizes() != 0) ||
-      (op.getNumSizes() == 0 && !subViewType.hasStaticShape())) {
-    return op.emitError("invalid to specify dynamic sizes when subview result "
-                        "type is statically shaped and viceversa");
-  }
-
-  // Verify that if dynamic sizes are specified, then the result memref type
-  // have full dynamic dimensions.
-  if (op.getNumSizes() > 0) {
-    if (llvm::any_of(subViewType.getShape(), [](int64_t dim) {
-          return dim != ShapedType::kDynamicSize;
-        })) {
-      // TODO: This is based on the assumption that number of size arguments are
-      // either 0, or the rank of the result type. It is possible to have more
-      // fine-grained verification where only particular dimensions are
-      // dynamic. That probably needs further changes to the shape op
-      // specification.
-      return op.emitError("expected shape of result type to be fully dynamic "
-                          "when sizes are specified");
-    }
-  }
-
-  // Verify that if dynamic offsets are specified or base memref has dynamic
-  // offset or base memref has dynamic strides, then the subview offset is
-  // dynamic.
-  if ((op.getNumOffsets() > 0 ||
-       baseOffset == MemRefType::getDynamicStrideOrOffset() ||
-       llvm::is_contained(baseStrides,
-                          MemRefType::getDynamicStrideOrOffset())) &&
-      subViewOffset != MemRefType::getDynamicStrideOrOffset()) {
-    return op.emitError(
-        "expected result memref layout map to have dynamic offset");
-  }
-
-  // For now, verify that if dynamic strides are specified, then all the result
-  // memref type have dynamic strides.
-  if (op.getNumStrides() > 0) {
-    if (llvm::any_of(subViewStrides, [](int64_t stride) {
-          return stride != MemRefType::getDynamicStrideOrOffset();
-        })) {
-      return op.emitError("expected result type to have dynamic strides");
-    }
-  }
-
-  // If any of the base memref has dynamic stride, then the corresponding
-  // stride of the subview must also have dynamic stride.
-  assert(baseStrides.size() == subViewStrides.size());
-  for (auto stride : enumerate(baseStrides)) {
-    if (stride.value() == MemRefType::getDynamicStrideOrOffset() &&
-        subViewStrides[stride.index()] !=
-            MemRefType::getDynamicStrideOrOffset()) {
-      return op.emitError(
-          "expected result type to have dynamic stride along a dimension if "
-          "the base memref type has dynamic stride along that dimension");
-    }
-  }
-  return success();
-}
-
-llvm::raw_ostream &mlir::operator<<(llvm::raw_ostream &os,
-                                    SubViewOp::Range &range) {
-  return os << "range " << *range.offset << ":" << *range.size << ":"
-            << *range.stride;
-}
-
-SmallVector<SubViewOp::Range, 8> SubViewOp::getRanges() {
-  SmallVector<Range, 8> res;
-  unsigned rank = getType().getRank();
-  res.reserve(rank);
-  for (unsigned i = 0; i < rank; ++i)
-    res.emplace_back(Range{*(offsets().begin() + i), *(sizes().begin() + i),
-                           *(strides().begin() + i)});
-  return res;
-}
-
-LogicalResult
-SubViewOp::getStaticStrides(SmallVectorImpl<int64_t> &staticStrides) {
-  // If the strides are dynamic return failure.
-  if (getNumStrides())
-    return failure();
-
-  // When static, the stride operands can be retrieved by taking the strides of
-  // the result of the subview op, and dividing the strides of the base memref.
-  int64_t resultOffset, baseOffset;
-  SmallVector<int64_t, 2> resultStrides, baseStrides;
-  if (failed(
-          getStridesAndOffset(getBaseMemRefType(), baseStrides, baseOffset)) ||
-      llvm::is_contained(baseStrides, MemRefType::getDynamicStrideOrOffset()) ||
-      failed(getStridesAndOffset(getType(), resultStrides, resultOffset)))
-    return failure();
-
-  assert(static_cast<int64_t>(resultStrides.size()) == getType().getRank() &&
-         baseStrides.size() == resultStrides.size() &&
-         "base and result memrefs must have the same rank");
-  assert(!llvm::is_contained(resultStrides,
-                             MemRefType::getDynamicStrideOrOffset()) &&
-         "strides of subview op must be static, when there are no dynamic "
-         "strides specified");
-  staticStrides.resize(getType().getRank());
-  for (auto resultStride : enumerate(resultStrides)) {
-    auto baseStride = baseStrides[resultStride.index()];
-    // The result stride is expected to be a multiple of the base stride. Abort
-    // if that is not the case.
-    if (resultStride.value() < baseStride ||
-        resultStride.value() % baseStride != 0)
-      return failure();
-    staticStrides[resultStride.index()] = resultStride.value() / baseStride;
-  }
-  return success();
-}
-
-static bool hasConstantOffsetSizesAndStrides(MemRefType memrefType) {
-  if (memrefType.getNumDynamicDims() > 0)
-    return false;
-  // Get offset and strides.
-  int64_t offset;
-  llvm::SmallVector<int64_t, 4> strides;
-  if (failed(getStridesAndOffset(memrefType, strides, offset)))
-    return false;
-  // Return 'false' if any of offset or strides is dynamic.
-  if (offset == MemRefType::getDynamicStrideOrOffset() ||
-      llvm::is_contained(strides, MemRefType::getDynamicStrideOrOffset()))
-    return false;
-  return true;
-}
-
-namespace {
-
-/// Pattern to rewrite a subview op with constant size arguments.
-class SubViewOpShapeFolder final : public OpRewritePattern<SubViewOp> {
-public:
-  using OpRewritePattern<SubViewOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(SubViewOp subViewOp,
-                                     PatternRewriter &rewriter) const override {
-    MemRefType subViewType = subViewOp.getType();
-    // Follow all or nothing approach for shapes for now. If all the operands
-    // for sizes are constants then fold it into the type of the result memref.
-    if (subViewType.hasStaticShape() ||
-        llvm::any_of(subViewOp.sizes(), [](Value *operand) {
-          return !matchPattern(operand, m_ConstantIndex());
-        })) {
-      return matchFailure();
-    }
-    SmallVector<int64_t, 4> staticShape(subViewOp.getNumSizes());
-    for (auto size : llvm::enumerate(subViewOp.sizes())) {
-      auto defOp = size.value()->getDefiningOp();
-      assert(defOp);
-      staticShape[size.index()] = cast<ConstantIndexOp>(defOp).getValue();
-    }
-    MemRefType newMemRefType = MemRefType::get(
-        staticShape, subViewType.getElementType(), subViewType.getAffineMaps(),
-        subViewType.getMemorySpace());
-    auto newSubViewOp = rewriter.create<SubViewOp>(
-        subViewOp.getLoc(), subViewOp.source(), subViewOp.offsets(),
-        ArrayRef<Value *>(), subViewOp.strides(), newMemRefType);
-    // Insert a memref_cast for compatibility of the uses of the op.
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(
-        subViewOp.sizes(), subViewOp, newSubViewOp, subViewOp.getType());
-    return matchSuccess();
-  }
-};
-
-// Pattern to rewrite a subview op with constant stride arguments.
-class SubViewOpStrideFolder final : public OpRewritePattern<SubViewOp> {
-public:
-  using OpRewritePattern<SubViewOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(SubViewOp subViewOp,
-                                     PatternRewriter &rewriter) const override {
-    if (subViewOp.getNumStrides() == 0) {
-      return matchFailure();
-    }
-    // Follow all or nothing approach for strides for now. If all the operands
-    // for strides are constants then fold it into the strides of the result
-    // memref.
-    int64_t baseOffset, resultOffset;
-    SmallVector<int64_t, 4> baseStrides, resultStrides;
-    MemRefType subViewType = subViewOp.getType();
-    if (failed(getStridesAndOffset(subViewOp.getBaseMemRefType(), baseStrides,
-                                   baseOffset)) ||
-        failed(getStridesAndOffset(subViewType, resultStrides, resultOffset)) ||
-        llvm::is_contained(baseStrides,
-                           MemRefType::getDynamicStrideOrOffset()) ||
-        llvm::any_of(subViewOp.strides(), [](Value *stride) {
-          return !matchPattern(stride, m_ConstantIndex());
-        })) {
-      return matchFailure();
-    }
-
-    SmallVector<int64_t, 4> staticStrides(subViewOp.getNumStrides());
-    for (auto stride : llvm::enumerate(subViewOp.strides())) {
-      auto defOp = stride.value()->getDefiningOp();
-      assert(defOp);
-      assert(baseStrides[stride.index()] > 0);
-      staticStrides[stride.index()] =
-          cast<ConstantIndexOp>(defOp).getValue() * baseStrides[stride.index()];
-    }
-    AffineMap layoutMap = makeStridedLinearLayoutMap(
-        staticStrides, resultOffset, rewriter.getContext());
-    MemRefType newMemRefType =
-        MemRefType::get(subViewType.getShape(), subViewType.getElementType(),
-                        layoutMap, subViewType.getMemorySpace());
-    auto newSubViewOp = rewriter.create<SubViewOp>(
-        subViewOp.getLoc(), subViewOp.source(), subViewOp.offsets(),
-        subViewOp.sizes(), ArrayRef<Value *>(), newMemRefType);
-    // Insert a memref_cast for compatibility of the uses of the op.
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(
-        subViewOp.strides(), subViewOp, newSubViewOp, subViewOp.getType());
-    return matchSuccess();
-  }
-};
-
-// Pattern to rewrite a subview op with constant offset arguments.
-class SubViewOpOffsetFolder final : public OpRewritePattern<SubViewOp> {
-public:
-  using OpRewritePattern<SubViewOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(SubViewOp subViewOp,
-                                     PatternRewriter &rewriter) const override {
-    if (subViewOp.getNumOffsets() == 0) {
-      return matchFailure();
-    }
-    // Follow all or nothing approach for offsets for now. If all the operands
-    // for offsets are constants then fold it into the offset of the result
-    // memref.
-    int64_t baseOffset, resultOffset;
-    SmallVector<int64_t, 4> baseStrides, resultStrides;
-    MemRefType subViewType = subViewOp.getType();
-    if (failed(getStridesAndOffset(subViewOp.getBaseMemRefType(), baseStrides,
-                                   baseOffset)) ||
-        failed(getStridesAndOffset(subViewType, resultStrides, resultOffset)) ||
-        llvm::is_contained(baseStrides,
-                           MemRefType::getDynamicStrideOrOffset()) ||
-        baseOffset == MemRefType::getDynamicStrideOrOffset() ||
-        llvm::any_of(subViewOp.offsets(), [](Value *stride) {
-          return !matchPattern(stride, m_ConstantIndex());
-        })) {
-      return matchFailure();
-    }
-
-    auto staticOffset = baseOffset;
-    for (auto offset : llvm::enumerate(subViewOp.offsets())) {
-      auto defOp = offset.value()->getDefiningOp();
-      assert(defOp);
-      assert(baseStrides[offset.index()] > 0);
-      staticOffset +=
-          cast<ConstantIndexOp>(defOp).getValue() * baseStrides[offset.index()];
-    }
-
-    AffineMap layoutMap = makeStridedLinearLayoutMap(
-        resultStrides, staticOffset, rewriter.getContext());
-    MemRefType newMemRefType =
-        MemRefType::get(subViewType.getShape(), subViewType.getElementType(),
-                        layoutMap, subViewType.getMemorySpace());
-    auto newSubViewOp = rewriter.create<SubViewOp>(
-        subViewOp.getLoc(), subViewOp.source(), ArrayRef<Value *>(),
-        subViewOp.sizes(), subViewOp.strides(), newMemRefType);
-    // Insert a memref_cast for compatibility of the uses of the op.
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(
-        subViewOp.offsets(), subViewOp, newSubViewOp, subViewOp.getType());
-    return matchSuccess();
-  }
-};
-
-} // end anonymous namespace
-
-void SubViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                            MLIRContext *context) {
-  results.insert<SubViewOpShapeFolder, SubViewOpStrideFolder,
-                 SubViewOpOffsetFolder>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// ZeroExtendIOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(ZeroExtendIOp op) {
-  auto srcType = getElementTypeOrSelf(op.getOperand()->getType());
-  auto dstType = getElementTypeOrSelf(op.getType());
-
-  if (srcType.isa<IndexType>())
-    return op.emitError() << srcType << " is not a valid operand type";
-  if (dstType.isa<IndexType>())
-    return op.emitError() << dstType << " is not a valid result type";
-
-  if (srcType.cast<IntegerType>().getWidth() >=
-      dstType.cast<IntegerType>().getWidth())
-    return op.emitError("result type ")
-           << dstType << " must be wider than operand type " << srcType;
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// FPExtOp
-//===----------------------------------------------------------------------===//
-
-bool FPExtOp::areCastCompatible(Type a, Type b) {
-  if (auto fa = a.dyn_cast<FloatType>())
-    if (auto fb = b.dyn_cast<FloatType>())
-      return fa.getWidth() < fb.getWidth();
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// FPTruncOp
-//===----------------------------------------------------------------------===//
-
-bool FPTruncOp::areCastCompatible(Type a, Type b) {
-  if (auto fa = a.dyn_cast<FloatType>())
-    if (auto fb = b.dyn_cast<FloatType>())
-      return fa.getWidth() > fb.getWidth();
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// TableGen'd op method definitions
-//===----------------------------------------------------------------------===//
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/StandardOps/Ops.cpp.inc"
diff --git a/third_party/mlir/lib/Dialect/Traits.cpp b/third_party/mlir/lib/Dialect/Traits.cpp
deleted file mode 100644
index 9945b6ae4c2..00000000000
--- a/third_party/mlir/lib/Dialect/Traits.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-//===- Traits.cpp - Common op traits shared by dialects -------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/Traits.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/Support/FormatVariadic.h"
-
-using namespace mlir;
-
-bool OpTrait::util::getBroadcastedShape(ArrayRef<int64_t> shape1,
-                                        ArrayRef<int64_t> shape2,
-                                        SmallVectorImpl<int64_t> &resultShape) {
-  // To compute the result broadcasted shape, we compare operand shapes
-  // element-wise: starting with the trailing dimensions, and working the
-  // way backward. Two dimensions are compatible when
-  //   1. they are equal, or
-  //   2. one of them is 1
-  // The result shape has the maximum among the two inputs at every
-  // dimension index.
-
-  resultShape.clear();
-  if (shape1.size() > shape2.size()) {
-    std::copy(shape1.begin(), shape1.end(), std::back_inserter(resultShape));
-  } else {
-    std::copy(shape2.begin(), shape2.end(), std::back_inserter(resultShape));
-  }
-
-  auto i1 = shape1.rbegin(), e1 = shape1.rend();
-  auto i2 = shape2.rbegin(), e2 = shape2.rend();
-  auto iR = resultShape.rbegin();
-
-  // Check each dimension is consistent.
-  for (; i1 != e1 && i2 != e2; ++i1, ++i2, ++iR) {
-    if (*i1 == -1 || *i2 == -1) {
-      // One or both dimensions is unknown. Follow TensorFlow behavior:
-      // - If either dimension is greater than 1, we assume that the program is
-      //   correct, and the other dimension will be broadcast to match it.
-      // - If either dimension is 1, the other dimension is the output.
-      if (*i1 > 1) {
-        *iR = *i1;
-      } else if (*i2 > 1) {
-        *iR = *i2;
-      } else if (*i1 == 1) {
-        *iR = *i2;
-      } else if (*i2 == 1) {
-        *iR = *i1;
-      } else {
-        *iR = -1;
-      }
-    } else {
-      if (*i1 == *i2 || *i2 == 1) {
-        *iR = *i1;
-      } else if (*i1 == 1) {
-        *iR = *i2;
-      } else {
-        // This dimension of the two operand types is incompatible.
-        resultShape.clear();
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-/// Returns the shape of the given type. Scalars will be considered as having a
-/// shape with zero dimensions.
-static ArrayRef<int64_t> getShape(Type type) {
-  if (auto sType = type.dyn_cast<ShapedType>())
-    return sType.getShape();
-  return {};
-}
-
-/// Returns the result broadcast composition type from the two given types by
-/// following NumPy broadcast semantics. Returned type may have dynamic shape if
-/// either of the input types has dynamic shape. Returns null type if the two
-/// given types are not broadcast-compatible.
-Type OpTrait::util::getBroadcastedType(Type type1, Type type2) {
-  // Returns the scalar type out of the given type.
-  auto getScalarType = [](Type type) -> Type {
-    if (auto shapedType = type.dyn_cast<ShapedType>())
-      return shapedType.getElementType();
-    return type;
-  };
-
-  // Make sure underlying scalar type is the same.
-  auto scalarType = getScalarType(type1);
-  if (scalarType != getScalarType(type2))
-    return {};
-
-  // If one of the types is unranked tensor, then the other type shouldn't be
-  // vector and the result should have unranked tensor type.
-  if (type1.isa<UnrankedTensorType>() || type2.isa<UnrankedTensorType>()) {
-    if (type1.isa<VectorType>() || type2.isa<VectorType>())
-      return {};
-    return UnrankedTensorType::get(scalarType);
-  }
-
-  // Returns the type kind if the given type is a vector or ranked tensor type.
-  // Returns llvm::None otherwise.
-  auto getCompositeTypeKind =
-      [](Type type) -> llvm::Optional<StandardTypes::Kind> {
-    if (type.isa<VectorType>() || type.isa<RankedTensorType>())
-      return static_cast<StandardTypes::Kind>(type.getKind());
-    return llvm::None;
-  };
-
-  // Make sure the composite type, if has, is consistent.
-  auto compositeKind1 = getCompositeTypeKind(type1);
-  auto compositeKind2 = getCompositeTypeKind(type2);
-  llvm::Optional<StandardTypes::Kind> resultCompositeKind;
-
-  if (compositeKind1 && compositeKind2) {
-    // Disallow mixing vector and tensor.
-    if (compositeKind1 != compositeKind2)
-      return {};
-    resultCompositeKind = compositeKind1;
-  } else if (compositeKind1) {
-    resultCompositeKind = compositeKind1;
-  } else if (compositeKind2) {
-    resultCompositeKind = compositeKind2;
-  }
-
-  // Get the shape of each type.
-  SmallVector<int64_t, 4> resultShape;
-  if (!getBroadcastedShape(getShape(type1), getShape(type2), resultShape))
-    return {};
-
-  // Compose the final broadcasted type
-  if (resultCompositeKind == StandardTypes::Vector)
-    return VectorType::get(resultShape, scalarType);
-  if (resultCompositeKind == StandardTypes::RankedTensor)
-    return RankedTensorType::get(resultShape, scalarType);
-  return scalarType;
-}
-
-/// Returns true if the given types has both vector types and tensor types.
-static bool hasBothVectorAndTensorType(ArrayRef<Type> types) {
-  return llvm::any_of(types, [](Type t) { return t.isa<VectorType>(); }) &&
-         llvm::any_of(types, [](Type t) { return t.isa<TensorType>(); });
-}
-
-static bool areCompatibleShapes(ArrayRef<int64_t> shape1,
-                                ArrayRef<int64_t> shape2) {
-  auto isCompatible = [](int64_t dim1, int64_t dim2) {
-    return dim1 == dim2 || dim1 == -1 || dim2 == -1;
-  };
-  if (shape1.size() != shape2.size())
-    return false;
-  for (const auto &p : llvm::zip(shape1, shape2))
-    if (!isCompatible(std::get<0>(p), std::get<1>(p)))
-      return false;
-  return true;
-}
-
-LogicalResult OpTrait::impl::verifyCompatibleOperandBroadcast(Operation *op) {
-  assert(op->getNumOperands() == 2 &&
-         "only support broadcast check on two operands");
-  assert(op->getNumResults() == 1 &&
-         "only support broadcast check on one result");
-
-  auto type1 = op->getOperand(0)->getType();
-  auto type2 = op->getOperand(1)->getType();
-  auto retType = op->getResult(0)->getType();
-
-  // We forbid broadcasting vector and tensor.
-  if (hasBothVectorAndTensorType({type1, type2, retType}))
-    return op->emitError("cannot broadcast vector with tensor");
-
-  if (retType.isa<UnrankedTensorType>())
-    return success();
-
-  bool isUnranked1 = type1.isa<UnrankedTensorType>();
-  bool isUnranked2 = type2.isa<UnrankedTensorType>();
-
-  // If both operands are unranked, then all result shapes are possible.
-  if (isUnranked1 && isUnranked2)
-    return success();
-
-  // If one of the operands is unranked, then the known dimensions in the result
-  // should be compatible with the other shaped operand.
-  if (isUnranked1 || isUnranked2) {
-    // Result should have higher rank than the shaped operand's rank and then
-    // the result's trailing dimensions should be compatible with the operand
-    // shape.
-    ArrayRef<int64_t> shape = getShape(!isUnranked1 ? type1 : type2);
-    ArrayRef<int64_t> actualSuffix = getShape(retType).take_back(shape.size());
-    if (!areCompatibleShapes(actualSuffix, shape))
-      return op->emitOpError()
-             << "result type " << retType
-             << " has shape incompatible with a ranked operand type";
-    return success();
-  }
-
-  // If both operands are shaped, then the computed broadcasted shape should be
-  // compatible with the result shape.
-  SmallVector<int64_t, 4> resultShape;
-  if (!util::getBroadcastedShape(getShape(type1), getShape(type2), resultShape))
-    return op->emitOpError("operands don't have broadcast-compatible shapes");
-
-  if (!areCompatibleShapes(resultShape, getShape(retType)))
-    return op->emitOpError() << "result type " << retType
-                             << " does not have shape compatible with the one "
-                                "computed from the operand types";
-
-  return success();
-}
diff --git a/third_party/mlir/lib/Dialect/VectorOps/CMakeLists.txt b/third_party/mlir/lib/Dialect/VectorOps/CMakeLists.txt
deleted file mode 100644
index 08d58404b71..00000000000
--- a/third_party/mlir/lib/Dialect/VectorOps/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-add_llvm_library(MLIRVectorOps
-  DialectRegistration.cpp
-  VectorOps.cpp
-  VectorTransforms.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/VectorOps
-  )
-
-add_dependencies(MLIRVectorOps MLIRVectorOpsIncGen)
-add_dependencies(MLIRVectorOps MLIRVectorTransformPatternsIncGen)
-
-target_link_libraries(MLIRVectorOps MLIRIR)
diff --git a/third_party/mlir/lib/Dialect/VectorOps/DialectRegistration.cpp b/third_party/mlir/lib/Dialect/VectorOps/DialectRegistration.cpp
deleted file mode 100644
index 0caa1cf629e..00000000000
--- a/third_party/mlir/lib/Dialect/VectorOps/DialectRegistration.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===- DialectRegistration.cpp - Register super vectorization dialect -----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-using namespace mlir;
-
-// Static initialization for VectorOps dialect registration.
-static DialectRegistration<vector::VectorOpsDialect> VectorOps;
diff --git a/third_party/mlir/lib/Dialect/VectorOps/VectorOps.cpp b/third_party/mlir/lib/Dialect/VectorOps/VectorOps.cpp
deleted file mode 100644
index fc8abd710e9..00000000000
--- a/third_party/mlir/lib/Dialect/VectorOps/VectorOps.cpp
+++ /dev/null
@@ -1,1583 +0,0 @@
-//===- VectorOps.cpp - MLIR Super Vectorizer Operations -------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements convenience types for working with super-vectorization
-// operations, in particular super-vector loads and stores.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/MathExtras.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/StringSet.h"
-
-using namespace mlir;
-using namespace mlir::vector;
-
-//===----------------------------------------------------------------------===//
-// VectorOpsDialect
-//===----------------------------------------------------------------------===//
-
-VectorOpsDialect::VectorOpsDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
-  addOperations<
-#define GET_OP_LIST
-#include "mlir/Dialect/VectorOps/VectorOps.cpp.inc"
-      >();
-}
-
-/// Materialize a single constant operation from a given attribute value with
-/// the desired resultant type.
-Operation *VectorOpsDialect::materializeConstant(OpBuilder &builder,
-                                                 Attribute value, Type type,
-                                                 Location loc) {
-  return builder.create<ConstantOp>(loc, type, value);
-}
-
-//===----------------------------------------------------------------------===//
-// ContractionOp
-//===----------------------------------------------------------------------===//
-
-void vector::ContractionOp::build(Builder *builder, OperationState &result,
-                                  Value *lhs, Value *rhs, Value *acc,
-                                  ArrayAttr indexingMaps,
-                                  ArrayAttr iteratorTypes) {
-  result.addOperands({lhs, rhs, acc});
-  result.addTypes(acc->getType());
-  result.addAttribute(getIndexingMapsAttrName(), indexingMaps);
-  result.addAttribute(getIteratorTypesAttrName(), iteratorTypes);
-}
-
-static ParseResult parseContractionOp(OpAsmParser &parser,
-                                      OperationState &result) {
-  OpAsmParser::OperandType lhsInfo;
-  OpAsmParser::OperandType rhsInfo;
-  OpAsmParser::OperandType accInfo;
-  SmallVector<OpAsmParser::OperandType, 2> masksInfo;
-  SmallVector<Type, 2> types;
-  Type resultVectorType;
-  auto loc = parser.getCurrentLocation();
-  DictionaryAttr dictAttr;
-  // TODO(andydavis, ntv) Unify linalg op attribute parsing.
-  if (parser.parseAttribute(dictAttr, "_", result.attributes) ||
-      parser.parseOperand(lhsInfo) || parser.parseComma() ||
-      parser.parseOperand(rhsInfo) || parser.parseComma() ||
-      parser.parseOperand(accInfo) ||
-      parser.parseTrailingOperandList(masksInfo) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonTypeList(types) ||
-      parser.parseKeywordType("into", resultVectorType) ||
-      parser.resolveOperand(lhsInfo, types[0], result.operands) ||
-      parser.resolveOperand(rhsInfo, types[1], result.operands) ||
-      parser.resolveOperand(accInfo, resultVectorType, result.operands) ||
-      parser.addTypeToList(resultVectorType, result.types))
-    return failure();
-  result.attributes.assign(dictAttr.getValue().begin(),
-                           dictAttr.getValue().end());
-  if (masksInfo.empty())
-    return success();
-  if (masksInfo.size() != 2)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected zero or exactly 2 vector mask operands");
-  auto lhsType = types[0].cast<VectorType>();
-  auto rhsType = types[1].cast<VectorType>();
-  auto maskElementType = parser.getBuilder().getI1Type();
-  SmallVector<Type, 2> maskTypes;
-  maskTypes.push_back(VectorType::get(lhsType.getShape(), maskElementType));
-  maskTypes.push_back(VectorType::get(rhsType.getShape(), maskElementType));
-  if (parser.resolveOperands(masksInfo, maskTypes, loc, result.operands))
-    return failure();
-  return success();
-}
-
-static void print(OpAsmPrinter &p, ContractionOp op) {
-  // TODO(andydavis, ntv) Unify printing code with linalg ops.
-  auto attrNames = op.getTraitAttrNames();
-  llvm::StringSet<> traitAttrsSet;
-  traitAttrsSet.insert(attrNames.begin(), attrNames.end());
-  SmallVector<NamedAttribute, 8> attrs;
-  for (auto attr : op.getAttrs())
-    if (traitAttrsSet.count(attr.first.strref()) > 0)
-      attrs.push_back(attr);
-
-  auto dictAttr = DictionaryAttr::get(attrs, op.getContext());
-  p << op.getOperationName() << " " << dictAttr << " " << *op.lhs() << ", ";
-  p << *op.rhs() << ", " << *op.acc();
-  if (op.masks().size() == 2)
-    p << ", " << op.masks();
-
-  p.printOptionalAttrDict(op.getAttrs(), attrNames);
-  p << " : " << op.lhs()->getType() << ", " << op.rhs()->getType() << " into "
-    << op.getResultType();
-}
-
-static bool verifyDimMap(VectorType lhsType, VectorType rhsType,
-                         const std::vector<std::pair<int64_t, int64_t>> &map) {
-  for (auto &dimPair : map) {
-    if (dimPair.first < 0 || dimPair.first >= lhsType.getRank() ||
-        dimPair.second < 0 || dimPair.second >= rhsType.getRank() ||
-        lhsType.getDimSize(dimPair.first) != rhsType.getDimSize(dimPair.second))
-      return false;
-  }
-  return true;
-}
-
-static bool verifyOutputShape(
-    VectorType lhsType, VectorType rhsType, VectorType accType,
-    VectorType resType,
-    const std::vector<std::pair<int64_t, int64_t>> &contractingDimMap,
-    const std::vector<std::pair<int64_t, int64_t>> &batchDimMap) {
-  DenseSet<int64_t> lhsContractingDimSet;
-  DenseSet<int64_t> rhsContractingDimSet;
-  for (auto &dimPair : contractingDimMap) {
-    lhsContractingDimSet.insert(dimPair.first);
-    rhsContractingDimSet.insert(dimPair.second);
-  }
-  DenseSet<int64_t> rhsBatchDimSet;
-  for (auto &dimPair : batchDimMap)
-    rhsBatchDimSet.insert(dimPair.second);
-
-  // Add free and batch dimensions from 'lhsType' to 'expectedResultDims'.
-  SmallVector<int64_t, 4> expectedResultDims;
-  for (int64_t i = 0, e = lhsType.getRank(); i < e; ++i) {
-    if (lhsContractingDimSet.count(i) > 0)
-      continue;
-    expectedResultDims.push_back(lhsType.getDimSize(i));
-  }
-
-  // Add free dimensions from 'rhsType' to 'expectedResultDims'.
-  for (int64_t i = 0, e = rhsType.getRank(); i < e; ++i) {
-    if (rhsContractingDimSet.count(i) > 0 || rhsBatchDimSet.count(i) > 0)
-      continue;
-    expectedResultDims.push_back(rhsType.getDimSize(i));
-  }
-
-  // Verify dimension from 'resType' against 'expectedResultDims'.
-  if (resType.getShape().size() != expectedResultDims.size() ||
-      accType.getShape().size() != expectedResultDims.size())
-    return false;
-  for (int64_t i = 0, e = resType.getRank(); i < e; ++i) {
-    if (resType.getDimSize(i) != expectedResultDims[i] ||
-        accType.getDimSize(i) != expectedResultDims[i])
-      return false;
-  }
-  return true;
-}
-
-static LogicalResult verify(ContractionOp op) {
-  auto lhsType = op.getLhsType();
-  auto rhsType = op.getRhsType();
-  auto accType = op.getAccType();
-  auto resType = op.getResultType();
-
-  // Verify that an indexing map was specified for each vector operand.
-  if (op.indexing_maps().size() != 3)
-    return op.emitOpError("expected an indexing map for each vector operand");
-
-  // Verify that each index map has 'numIterators' inputs, no symbols, and
-  // that the number of map outputs equals the rank of its associated
-  // vector operand.
-  unsigned numIterators = op.iterator_types().getValue().size();
-  for (auto it : llvm::enumerate(op.indexing_maps())) {
-    auto index = it.index();
-    auto map = it.value().cast<AffineMapAttr>().getValue();
-    if (map.getNumSymbols() != 0)
-      return op.emitOpError("expected indexing map ")
-             << index << " to have no symbols";
-    if (map.getNumDims() != numIterators)
-      return op.emitOpError("expected indexing map ")
-             << index << " to have " << numIterators << " number of inputs";
-    auto operandType = op.getOperand(index)->getType().cast<VectorType>();
-    unsigned rank = operandType.getShape().size();
-    if (map.getNumResults() != rank)
-      return op.emitOpError("expected indexing map ")
-             << index << " to have " << rank << " number of outputs";
-    if (!map.isProjectedPermutation())
-      return op.emitOpError("expected indexing map ")
-             << index << " to be a projected permutation of its inputs";
-  }
-
-  auto contractingDimMap = op.getContractingDimMap();
-  auto batchDimMap = op.getBatchDimMap();
-
-  // Verify at least one contracting dimension pair was specified.
-  if (contractingDimMap.empty())
-    return op.emitOpError("expected at least one contracting dimension pair");
-
-  // Verify contracting dimension map was properly constructed.
-  if (!verifyDimMap(lhsType, rhsType, contractingDimMap))
-    return op.emitOpError("invalid contracting dimension map");
-
-  // Verify batch dimension map was properly constructed.
-  if (!verifyDimMap(lhsType, rhsType, batchDimMap))
-    return op.emitOpError("invalid batch dimension map");
-
-  // Verify 'accType' and 'resType' shape.
-  if (!verifyOutputShape(lhsType, rhsType, accType, resType, contractingDimMap,
-                         batchDimMap))
-    return op.emitOpError("invalid accumulator/result vector shape");
-
-  // Verify that either two vector masks are set or none are set.
-  auto lhsMaskType = op.getLHSVectorMaskType();
-  auto rhsMaskType = op.getRHSVectorMaskType();
-  if ((lhsMaskType && !rhsMaskType) || (!lhsMaskType && rhsMaskType))
-    return op.emitOpError("invalid number of vector masks specified");
-  if (lhsMaskType && rhsMaskType) {
-    // Verify mask rank == argument rank.
-    if (lhsMaskType.getShape().size() != lhsType.getShape().size() ||
-        rhsMaskType.getShape().size() != rhsType.getShape().size())
-      return op.emitOpError("invalid vector mask rank");
-  }
-  return success();
-}
-
-ArrayRef<StringRef> ContractionOp::getTraitAttrNames() {
-  static constexpr StringLiteral names[2] = {getIndexingMapsAttrName(),
-                                             getIteratorTypesAttrName()};
-  ArrayRef<StringLiteral> res{names};
-  return ArrayRef<StringRef>{res.begin(), res.end()};
-}
-
-static int64_t getResultIndex(AffineMap map, AffineExpr targetExpr) {
-  for (int64_t i = 0, e = map.getNumResults(); i < e; ++i)
-    if (targetExpr == map.getResult(i))
-      return i;
-  return -1;
-}
-
-static std::vector<std::pair<int64_t, int64_t>>
-getDimMap(ArrayRef<AffineMap> indexingMaps, ArrayAttr iteratorTypes,
-          StringRef targetIteratorTypeName, MLIRContext *context) {
-  std::vector<std::pair<int64_t, int64_t>> dimMap;
-  for (auto it : llvm::enumerate(iteratorTypes)) {
-    auto iteratorTypeName = it.value().cast<StringAttr>().getValue();
-    if (iteratorTypeName != targetIteratorTypeName)
-      continue;
-    // Search lhs/rhs map results for 'targetExpr'.
-    auto targetExpr = getAffineDimExpr(it.index(), context);
-    int64_t lhsDim = getResultIndex(indexingMaps[0], targetExpr);
-    int64_t rhsDim = getResultIndex(indexingMaps[1], targetExpr);
-    if (lhsDim >= 0 && rhsDim >= 0)
-      dimMap.push_back({lhsDim, rhsDim});
-  }
-  return dimMap;
-}
-
-void ContractionOp::getIterationBounds(
-    SmallVectorImpl<int64_t> &iterationBounds) {
-  auto lhsShape = getLhsType().getShape();
-  auto resShape = getResultType().getShape();
-  SmallVector<AffineMap, 4> indexingMaps(getIndexingMaps());
-  SmallVector<int64_t, 2> iterationShape;
-  for (auto it : llvm::enumerate(iterator_types())) {
-    // Search lhs/rhs map results for 'targetExpr'.
-    auto targetExpr = getAffineDimExpr(it.index(), getContext());
-    auto iteratorTypeName = it.value().cast<StringAttr>().getValue();
-    if (iteratorTypeName == getReductionIteratorTypeName()) {
-      // Get reduction dim size from lhs shape (same size in rhsShape).
-      int64_t lhsDimIndex = getResultIndex(indexingMaps[0], targetExpr);
-      assert(lhsDimIndex >= 0);
-      iterationBounds.push_back(lhsShape[lhsDimIndex]);
-      continue;
-    }
-    // Get parallel dimension size from result shape.
-    int64_t resDimIndex = getResultIndex(indexingMaps[2], targetExpr);
-    assert(resDimIndex >= 0);
-    iterationBounds.push_back(resShape[resDimIndex]);
-  }
-}
-
-void ContractionOp::getIterationIndexMap(
-    std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap) {
-  unsigned numMaps = indexing_maps().getValue().size();
-  iterationIndexMap.resize(numMaps);
-  for (auto it : llvm::enumerate(indexing_maps())) {
-    auto index = it.index();
-    auto map = it.value().cast<AffineMapAttr>().getValue();
-    for (unsigned i = 0, e = map.getNumResults(); i < e; ++i) {
-      auto dim = map.getResult(i).cast<AffineDimExpr>();
-      iterationIndexMap[index][dim.getPosition()] = i;
-    }
-  }
-}
-
-std::vector<std::pair<int64_t, int64_t>> ContractionOp::getContractingDimMap() {
-  SmallVector<AffineMap, 4> indexingMaps(getIndexingMaps());
-  return getDimMap(indexingMaps, iterator_types(),
-                   getReductionIteratorTypeName(), getContext());
-}
-
-std::vector<std::pair<int64_t, int64_t>> ContractionOp::getBatchDimMap() {
-  SmallVector<AffineMap, 4> indexingMaps(getIndexingMaps());
-  return getDimMap(indexingMaps, iterator_types(),
-                   getParallelIteratorTypeName(), getContext());
-}
-
-SmallVector<AffineMap, 4> ContractionOp::getIndexingMaps() {
-  SmallVector<AffineMap, 4> res;
-  auto mapAttrs = indexing_maps().getValue();
-  res.reserve(mapAttrs.size());
-  for (auto mapAttr : mapAttrs)
-    res.push_back(mapAttr.cast<AffineMapAttr>().getValue());
-  return res;
-}
-
-//===----------------------------------------------------------------------===//
-// ExtractElementOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, vector::ExtractElementOp op) {
-  p << op.getOperationName() << " " << *op.vector() << "[" << *op.position()
-    << " : " << op.position()->getType() << "]";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.vector()->getType();
-}
-
-static ParseResult parseExtractElementOp(OpAsmParser &parser,
-                                         OperationState &result) {
-  OpAsmParser::OperandType vector, position;
-  Type positionType;
-  VectorType vectorType;
-  if (parser.parseOperand(vector) || parser.parseLSquare() ||
-      parser.parseOperand(position) || parser.parseColonType(positionType) ||
-      parser.parseRSquare() ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(vectorType))
-    return failure();
-  Type resultType = vectorType.getElementType();
-  return failure(
-      parser.resolveOperand(vector, vectorType, result.operands) ||
-      parser.resolveOperand(position, positionType, result.operands) ||
-      parser.addTypeToList(resultType, result.types));
-}
-
-static LogicalResult verify(vector::ExtractElementOp op) {
-  VectorType vectorType = op.getVectorType();
-  if (vectorType.getRank() != 1)
-    return op.emitOpError("expected 1-D vector");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// ExtractOp
-//===----------------------------------------------------------------------===//
-
-static Type inferExtractOpResultType(VectorType vectorType,
-                                     ArrayAttr position) {
-  if (static_cast<int64_t>(position.size()) == vectorType.getRank())
-    return vectorType.getElementType();
-  return VectorType::get(vectorType.getShape().drop_front(position.size()),
-                         vectorType.getElementType());
-}
-
-void vector::ExtractOp::build(Builder *builder, OperationState &result,
-                              Value *source, ArrayRef<int32_t> position) {
-  result.addOperands(source);
-  auto positionAttr = builder->getI32ArrayAttr(position);
-  result.addTypes(inferExtractOpResultType(source->getType().cast<VectorType>(),
-                                           positionAttr));
-  result.addAttribute(getPositionAttrName(), positionAttr);
-}
-
-static void print(OpAsmPrinter &p, vector::ExtractOp op) {
-  p << op.getOperationName() << " " << *op.vector() << op.position();
-  p.printOptionalAttrDict(op.getAttrs(), {"position"});
-  p << " : " << op.vector()->getType();
-}
-
-static ParseResult parseExtractOp(OpAsmParser &parser, OperationState &result) {
-  llvm::SMLoc attributeLoc, typeLoc;
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType vector;
-  Type type;
-  Attribute attr;
-  if (parser.parseOperand(vector) || parser.getCurrentLocation(&attributeLoc) ||
-      parser.parseAttribute(attr, "position", attrs) ||
-      parser.parseOptionalAttrDict(attrs) ||
-      parser.getCurrentLocation(&typeLoc) || parser.parseColonType(type))
-    return failure();
-
-  auto vectorType = type.dyn_cast<VectorType>();
-  if (!vectorType)
-    return parser.emitError(typeLoc, "expected vector type");
-
-  auto positionAttr = attr.dyn_cast<ArrayAttr>();
-  if (!positionAttr ||
-      static_cast<int64_t>(positionAttr.size()) > vectorType.getRank())
-    return parser.emitError(
-        attributeLoc,
-        "expected position attribute of rank smaller than vector rank");
-
-  Type resType = inferExtractOpResultType(vectorType, positionAttr);
-  result.attributes = attrs;
-  return failure(parser.resolveOperand(vector, type, result.operands) ||
-                 parser.addTypeToList(resType, result.types));
-}
-
-static LogicalResult verify(vector::ExtractOp op) {
-  auto positionAttr = op.position().getValue();
-  if (positionAttr.empty())
-    return op.emitOpError("expected non-empty position attribute");
-  if (positionAttr.size() > static_cast<unsigned>(op.getVectorType().getRank()))
-    return op.emitOpError(
-        "expected position attribute of rank smaller than vector rank");
-  for (auto en : llvm::enumerate(positionAttr)) {
-    auto attr = en.value().dyn_cast<IntegerAttr>();
-    if (!attr || attr.getInt() < 0 ||
-        attr.getInt() >= op.getVectorType().getDimSize(en.index()))
-      return op.emitOpError("expected position attribute #")
-             << (en.index() + 1)
-             << " to be a non-negative integer smaller than the corresponding "
-                "vector dimension";
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// ExtractSlicesOp
-//===----------------------------------------------------------------------===//
-
-void ExtractSlicesOp::build(Builder *builder, OperationState &result,
-                            TupleType tupleType, Value *vector,
-                            ArrayRef<int64_t> sizes,
-                            ArrayRef<int64_t> strides) {
-  result.addOperands(vector);
-  auto sizesAttr = builder->getI64ArrayAttr(sizes);
-  auto stridesAttr = builder->getI64ArrayAttr(strides);
-  result.addTypes(tupleType);
-  result.addAttribute(getSizesAttrName(), sizesAttr);
-  result.addAttribute(getStridesAttrName(), stridesAttr);
-}
-
-static ParseResult parseExtractSlicesOp(OpAsmParser &parser,
-                                        OperationState &result) {
-  OpAsmParser::OperandType operandInfo;
-  ArrayAttr sizesAttr;
-  StringRef sizesAttrName = ExtractSlicesOp::getSizesAttrName();
-  ArrayAttr stridesAttr;
-  StringRef stridesAttrName = ExtractSlicesOp::getStridesAttrName();
-  VectorType vectorType;
-  TupleType resultTupleType;
-  return failure(
-      parser.parseOperand(operandInfo) || parser.parseComma() ||
-      parser.parseAttribute(sizesAttr, sizesAttrName, result.attributes) ||
-      parser.parseComma() ||
-      parser.parseAttribute(stridesAttr, stridesAttrName, result.attributes) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(vectorType) ||
-      parser.parseKeywordType("into", resultTupleType) ||
-      parser.resolveOperand(operandInfo, vectorType, result.operands) ||
-      parser.addTypeToList(resultTupleType, result.types));
-}
-
-static void print(OpAsmPrinter &p, ExtractSlicesOp op) {
-  p << op.getOperationName() << ' ' << *op.vector() << ", ";
-  p << op.sizes() << ", " << op.strides();
-  p.printOptionalAttrDict(
-      op.getAttrs(),
-      /*elidedAttrs=*/{ExtractSlicesOp::getSizesAttrName(),
-                       ExtractSlicesOp::getStridesAttrName()});
-  p << " : " << op.vector()->getType();
-  p << " into " << op.getResultTupleType();
-}
-
-static LogicalResult
-isValidExtractOrInsertSlicesType(Operation *op, VectorType vectorType,
-                                 TupleType tupleType, ArrayRef<int64_t> sizes,
-                                 ArrayRef<int64_t> strides) {
-  // Check for non-unit strides.
-  // TODO(b/144845578) Support non-1 strides.
-  if (llvm::any_of(strides, [](int64_t s) { return s != 1; }))
-    return op->emitError("requires unit strides");
-  // Check that 'vectorType' rank matches rank of tuple element vectors.
-  unsigned rank = vectorType.getRank();
-  auto is_vector_type_of_rank = [&](Type t) {
-    return t.isa<VectorType>() && t.cast<VectorType>().getRank() == rank;
-  };
-  if (!llvm::all_of(tupleType.getTypes(), is_vector_type_of_rank))
-    return op->emitError("requires vector tuple elements of rank ") << rank;
-  // Check that 'sizes' and 'strides' are of size == 'rank'.
-  if (sizes.size() != rank || strides.size() != rank)
-    return op->emitError("requires sizes and strides of rank ") << rank;
-
-  // Compute the number of slices in each dimension.
-  // TODO(andydavis) Move this into a slice generation helper function.
-  auto shape = vectorType.getShape();
-  SmallVector<int64_t, 4> dimSliceCounts(rank);
-  for (unsigned i = 0; i < rank; ++i)
-    dimSliceCounts[i] = ceilDiv(shape[i], sizes[i]);
-  // Compute the strides between slices in each dimension.
-  SmallVector<int64_t, 4> sliceStrides(rank);
-  sliceStrides[rank - 1] = 1;
-  for (int i = rank - 2; i >= 0; --i)
-    sliceStrides[i] = sliceStrides[i + 1] * dimSliceCounts[i + 1];
-
-  // Generate each slice shape based on 'sizes', 'strides' and 'vectorType',
-  // and varify that the same matches the corresponding tuple element 'i'.
-  for (int64_t i = 0, e = tupleType.size(); i < e; ++i) {
-    // De-linearize w.r.t. 'sliceStrides'.
-    SmallVector<int64_t, 4> vectorOffsets(rank);
-    int64_t linearIndex = i;
-    for (unsigned j = 0; j < rank; ++j) {
-      vectorOffsets.push_back(linearIndex / sliceStrides[i]);
-      linearIndex %= sliceStrides[i];
-    }
-    // Convert from unrolled vector-space offsets to element-space offsets.
-    auto offsets = mlir::functional::zipMap(
-        [](int64_t v1, int64_t v2) { return v1 * v2; }, vectorOffsets, sizes);
-    // Initialize 'sliceSizes' to target 'sizes'
-    SmallVector<int64_t, 4> sliceSizes(sizes.begin(), sizes.end());
-    for (unsigned j = 0; j < rank; ++j) {
-      // Based on 'offsets' and 'shape' clip some dim sizes for partial tiles.
-      sliceSizes[j] = std::min(sliceSizes[j], shape[j] - offsets[j]);
-    }
-    // Create slice VectorType type.
-    auto sliceVectorType =
-        VectorType::get(sliceSizes, vectorType.getElementType());
-    // Verify that 'sliceVectorType' matches tupleType.getTypes(i)
-    if (sliceVectorType != tupleType.getType(i))
-      return op->emitError("invalid tuple element type ") << sliceVectorType;
-  }
-  return success();
-}
-
-static LogicalResult verify(ExtractSlicesOp op) {
-  SmallVector<int64_t, 4> sizes;
-  op.getSizes(sizes);
-  SmallVector<int64_t, 4> strides;
-  op.getStrides(strides);
-  return isValidExtractOrInsertSlicesType(
-      op.getOperation(), op.getSourceVectorType(), op.getResultTupleType(),
-      sizes, strides);
-}
-
-static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
-                                       SmallVectorImpl<int64_t> &results) {
-  for (auto attr : arrayAttr)
-    results.push_back(attr.cast<IntegerAttr>().getInt());
-}
-
-void ExtractSlicesOp::getSizes(SmallVectorImpl<int64_t> &results) {
-  populateFromInt64AttrArray(sizes(), results);
-}
-
-void ExtractSlicesOp::getStrides(SmallVectorImpl<int64_t> &results) {
-  populateFromInt64AttrArray(strides(), results);
-}
-
-//===----------------------------------------------------------------------===//
-// BroadcastOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, BroadcastOp op) {
-  p << op.getOperationName() << " " << *op.source() << " : "
-    << op.getSourceType() << " to " << op.getVectorType();
-}
-
-static LogicalResult verify(BroadcastOp op) {
-  VectorType srcVectorType = op.getSourceType().dyn_cast<VectorType>();
-  VectorType dstVectorType = op.getVectorType();
-  // Scalar to vector broadcast is always valid. A vector
-  // to vector broadcast needs some additional checking.
-  if (srcVectorType) {
-    int64_t srcRank = srcVectorType.getRank();
-    int64_t dstRank = dstVectorType.getRank();
-    if (srcRank > dstRank)
-      return op.emitOpError("source rank higher than destination rank");
-    // Source has an exact match or singleton value for all trailing dimensions
-    // (all leading dimensions are simply duplicated).
-    int64_t lead = dstRank - srcRank;
-    for (int64_t r = 0; r < srcRank; ++r) {
-      int64_t srcDim = srcVectorType.getDimSize(r);
-      int64_t dstDim = dstVectorType.getDimSize(lead + r);
-      if (srcDim != 1 && srcDim != dstDim)
-        return op.emitOpError("dimension mismatch (")
-               << srcDim << " vs. " << dstDim << ")";
-    }
-  }
-  return success();
-}
-
-static ParseResult parseBroadcastOp(OpAsmParser &parser,
-                                    OperationState &result) {
-  OpAsmParser::OperandType source;
-  Type sourceType;
-  VectorType vectorType;
-  return failure(parser.parseOperand(source) ||
-                 parser.parseColonType(sourceType) ||
-                 parser.parseKeywordType("to", vectorType) ||
-                 parser.resolveOperand(source, sourceType, result.operands) ||
-                 parser.addTypeToList(vectorType, result.types));
-}
-
-//===----------------------------------------------------------------------===//
-// ShuffleOp
-//===----------------------------------------------------------------------===//
-
-void ShuffleOp::build(Builder *builder, OperationState &result, Value *v1,
-                      Value *v2, ArrayRef<int32_t> mask) {
-  result.addOperands({v1, v2});
-  auto maskAttr = builder->getI32ArrayAttr(mask);
-  result.addTypes(v1->getType());
-  result.addAttribute(getMaskAttrName(), maskAttr);
-}
-
-static void print(OpAsmPrinter &p, ShuffleOp op) {
-  p << op.getOperationName() << " " << *op.v1() << ", " << *op.v2() << " "
-    << op.mask();
-  p.printOptionalAttrDict(op.getAttrs(), {ShuffleOp::getMaskAttrName()});
-  p << " : " << op.v1()->getType() << ", " << op.v2()->getType();
-}
-
-static LogicalResult verify(ShuffleOp op) {
-  VectorType resultType = op.getVectorType();
-  VectorType v1Type = op.getV1VectorType();
-  VectorType v2Type = op.getV2VectorType();
-  // Verify ranks.
-  int64_t resRank = resultType.getRank();
-  int64_t v1Rank = v1Type.getRank();
-  int64_t v2Rank = v2Type.getRank();
-  if (resRank != v1Rank || v1Rank != v2Rank)
-    return op.emitOpError("rank mismatch");
-  // Verify all but leading dimension sizes.
-  for (int64_t r = 1; r < v1Rank; ++r) {
-    int64_t resDim = resultType.getDimSize(r);
-    int64_t v1Dim = v1Type.getDimSize(r);
-    int64_t v2Dim = v2Type.getDimSize(r);
-    if (resDim != v1Dim || v1Dim != v2Dim)
-      return op.emitOpError("dimension mismatch");
-  }
-  // Verify mask length.
-  auto maskAttr = op.mask().getValue();
-  int64_t maskLength = maskAttr.size();
-  if (maskLength != resultType.getDimSize(0))
-    return op.emitOpError("mask length mismatch");
-  // Verify all indices.
-  int64_t indexSize = v1Type.getDimSize(0) + v2Type.getDimSize(0);
-  for (auto en : llvm::enumerate(maskAttr)) {
-    auto attr = en.value().dyn_cast<IntegerAttr>();
-    if (!attr || attr.getInt() < 0 || attr.getInt() >= indexSize)
-      return op.emitOpError("mask index #")
-             << (en.index() + 1) << " out of range";
-  }
-  return success();
-}
-
-static ParseResult parseShuffleOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType v1, v2;
-  Attribute attr;
-  VectorType v1Type, v2Type;
-  if (parser.parseOperand(v1) || parser.parseComma() ||
-      parser.parseOperand(v2) ||
-      parser.parseAttribute(attr, ShuffleOp::getMaskAttrName(),
-                            result.attributes) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(v1Type) || parser.parseComma() ||
-      parser.parseType(v2Type) ||
-      parser.resolveOperand(v1, v1Type, result.operands) ||
-      parser.resolveOperand(v2, v2Type, result.operands))
-    return failure();
-  // Construct resulting type: leading dimension matches mask length,
-  // all trailing dimensions match the operands.
-  auto maskAttr = attr.dyn_cast<ArrayAttr>();
-  if (!maskAttr)
-    return parser.emitError(parser.getNameLoc(), "missing mask attribute");
-  int64_t maskLength = maskAttr.size();
-  if (maskLength <= 0)
-    return parser.emitError(parser.getNameLoc(), "invalid mask length");
-  int64_t v1Rank = v1Type.getRank();
-  SmallVector<int64_t, 4> shape;
-  shape.reserve(v1Rank);
-  shape.push_back(maskLength);
-  for (int64_t r = 1; r < v1Rank; ++r)
-    shape.push_back(v1Type.getDimSize(r));
-  VectorType resType = VectorType::get(shape, v1Type.getElementType());
-  parser.addTypeToList(resType, result.types);
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// InsertElementOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, InsertElementOp op) {
-  p << op.getOperationName() << " " << *op.source() << ", " << *op.dest() << "["
-    << *op.position() << " : " << op.position()->getType() << "]";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.dest()->getType();
-}
-
-static ParseResult parseInsertElementOp(OpAsmParser &parser,
-                                        OperationState &result) {
-  OpAsmParser::OperandType source, dest, position;
-  Type positionType;
-  VectorType destType;
-  if (parser.parseOperand(source) || parser.parseComma() ||
-      parser.parseOperand(dest) || parser.parseLSquare() ||
-      parser.parseOperand(position) || parser.parseColonType(positionType) ||
-      parser.parseRSquare() ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(destType))
-    return failure();
-  Type sourceType = destType.getElementType();
-  return failure(
-      parser.resolveOperand(source, sourceType, result.operands) ||
-      parser.resolveOperand(dest, destType, result.operands) ||
-      parser.resolveOperand(position, positionType, result.operands) ||
-      parser.addTypeToList(destType, result.types));
-}
-
-static LogicalResult verify(InsertElementOp op) {
-  auto dstVectorType = op.getDestVectorType();
-  if (dstVectorType.getRank() != 1)
-    return op.emitOpError("expected 1-D vector");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// InsertOp
-//===----------------------------------------------------------------------===//
-
-void InsertOp::build(Builder *builder, OperationState &result, Value *source,
-                     Value *dest, ArrayRef<int32_t> position) {
-  result.addOperands({source, dest});
-  auto positionAttr = builder->getI32ArrayAttr(position);
-  result.addTypes(dest->getType());
-  result.addAttribute(getPositionAttrName(), positionAttr);
-}
-
-static void print(OpAsmPrinter &p, InsertOp op) {
-  p << op.getOperationName() << " " << *op.source() << ", " << *op.dest()
-    << op.position();
-  p.printOptionalAttrDict(op.getAttrs(), {InsertOp::getPositionAttrName()});
-  p << " : " << op.getSourceType() << " into " << op.getDestVectorType();
-}
-
-static ParseResult parseInsertOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<NamedAttribute, 4> attrs;
-  OpAsmParser::OperandType source, dest;
-  Type sourceType;
-  VectorType destType;
-  Attribute attr;
-  return failure(parser.parseOperand(source) || parser.parseComma() ||
-                 parser.parseOperand(dest) ||
-                 parser.parseAttribute(attr, InsertOp::getPositionAttrName(),
-                                       result.attributes) ||
-                 parser.parseOptionalAttrDict(attrs) ||
-                 parser.parseColonType(sourceType) ||
-                 parser.parseKeywordType("into", destType) ||
-                 parser.resolveOperand(source, sourceType, result.operands) ||
-                 parser.resolveOperand(dest, destType, result.operands) ||
-                 parser.addTypeToList(destType, result.types));
-}
-
-static LogicalResult verify(InsertOp op) {
-  auto positionAttr = op.position().getValue();
-  if (positionAttr.empty())
-    return op.emitOpError("expected non-empty position attribute");
-  auto destVectorType = op.getDestVectorType();
-  if (positionAttr.size() > static_cast<unsigned>(destVectorType.getRank()))
-    return op.emitOpError(
-        "expected position attribute of rank smaller than dest vector rank");
-  auto srcVectorType = op.getSourceType().dyn_cast<VectorType>();
-  if (srcVectorType &&
-      (static_cast<unsigned>(srcVectorType.getRank()) + positionAttr.size() !=
-       static_cast<unsigned>(destVectorType.getRank())))
-    return op.emitOpError("expected position attribute rank + source rank to "
-                          "match dest vector rank");
-  else if (!srcVectorType && (positionAttr.size() !=
-                              static_cast<unsigned>(destVectorType.getRank())))
-    return op.emitOpError(
-        "expected position attribute rank to match the dest vector rank");
-  for (auto en : llvm::enumerate(positionAttr)) {
-    auto attr = en.value().dyn_cast<IntegerAttr>();
-    if (!attr || attr.getInt() < 0 ||
-        attr.getInt() >= destVectorType.getDimSize(en.index()))
-      return op.emitOpError("expected position attribute #")
-             << (en.index() + 1)
-             << " to be a non-negative integer smaller than the corresponding "
-                "dest vector dimension";
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// InsertStridedSliceOp
-//===----------------------------------------------------------------------===//
-
-void InsertStridedSliceOp::build(Builder *builder, OperationState &result,
-                                 Value *source, Value *dest,
-                                 ArrayRef<int64_t> offsets,
-                                 ArrayRef<int64_t> strides) {
-  result.addOperands({source, dest});
-  auto offsetsAttr = builder->getI64ArrayAttr(offsets);
-  auto stridesAttr = builder->getI64ArrayAttr(strides);
-  result.addTypes(dest->getType());
-  result.addAttribute(getOffsetsAttrName(), offsetsAttr);
-  result.addAttribute(getStridesAttrName(), stridesAttr);
-}
-
-static void print(OpAsmPrinter &p, InsertStridedSliceOp op) {
-  p << op.getOperationName() << " " << *op.source() << ", " << *op.dest()
-    << " ";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getSourceVectorType() << " into " << op.getDestVectorType();
-}
-
-static ParseResult parseInsertStridedSliceOp(OpAsmParser &parser,
-                                             OperationState &result) {
-  OpAsmParser::OperandType source, dest;
-  VectorType sourceVectorType, destVectorType;
-  return failure(
-      parser.parseOperand(source) || parser.parseComma() ||
-      parser.parseOperand(dest) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(sourceVectorType) ||
-      parser.parseKeywordType("into", destVectorType) ||
-      parser.resolveOperand(source, sourceVectorType, result.operands) ||
-      parser.resolveOperand(dest, destVectorType, result.operands) ||
-      parser.addTypeToList(destVectorType, result.types));
-}
-
-// TODO(ntv) Should be moved to Tablegen Confined attributes.
-template <typename OpType>
-LogicalResult isIntegerArrayAttrSmallerThanShape(OpType op, ArrayAttr arrayAttr,
-                                                 ArrayRef<int64_t> shape,
-                                                 StringRef attrName) {
-  if (arrayAttr.size() > shape.size())
-    return op.emitOpError("expected ")
-           << attrName << " attribute of rank smaller than vector rank";
-  return success();
-}
-
-// Returns true if all integers in `arrayAttr` are in the half-open [min, max}
-// interval. If `halfOpen` is true then the admissible interval is [min, max).
-// Otherwise, the admissible interval is [min, max].
-template <typename OpType>
-LogicalResult isIntegerArrayAttrConfinedToRange(OpType op, ArrayAttr arrayAttr,
-                                                int64_t min, int64_t max,
-                                                StringRef attrName,
-                                                bool halfOpen = true) {
-  for (auto attr : arrayAttr) {
-    auto val = attr.cast<IntegerAttr>().getInt();
-    auto upper = max;
-    if (!halfOpen)
-      upper += 1;
-    if (val < min || val >= upper)
-      return op.emitOpError("expected ") << attrName << " to be confined to ["
-                                         << min << ", " << upper << ")";
-  }
-  return success();
-}
-
-// Returns true if all integers in `arrayAttr` are in the half-open [min, max}
-// interval. If `halfOpen` is true then the admissible interval is [min, max).
-// Otherwise, the admissible interval is [min, max].
-template <typename OpType>
-LogicalResult
-isIntegerArrayAttrConfinedToShape(OpType op, ArrayAttr arrayAttr,
-                                  ArrayRef<int64_t> shape, StringRef attrName,
-                                  bool halfOpen = true, int64_t min = 0) {
-  assert(arrayAttr.size() <= shape.size());
-  unsigned index = 0;
-  for (auto it : llvm::zip(arrayAttr, shape)) {
-    auto val = std::get<0>(it).cast<IntegerAttr>().getInt();
-    auto max = std::get<1>(it);
-    if (!halfOpen)
-      max += 1;
-    if (val < min || val >= max)
-      return op.emitOpError("expected ")
-             << attrName << " dimension " << index << " to be confined to ["
-             << min << ", " << max << ")";
-    ++index;
-  }
-  return success();
-}
-
-// Returns true if all integers in `arrayAttr` are in the interval [min, max}.
-// interval. If `halfOpen` is true then the admissible interval is [min, max).
-// Otherwise, the admissible interval is [min, max].
-template <typename OpType>
-LogicalResult isSumOfIntegerArrayAttrConfinedToShape(
-    OpType op, ArrayAttr arrayAttr1, ArrayAttr arrayAttr2,
-    ArrayRef<int64_t> shape, StringRef attrName1, StringRef attrName2,
-    bool halfOpen = true, int64_t min = 1) {
-  assert(arrayAttr1.size() <= shape.size());
-  assert(arrayAttr2.size() <= shape.size());
-  unsigned index = 0;
-  for (auto it : llvm::zip(arrayAttr1, arrayAttr2, shape)) {
-    auto val1 = std::get<0>(it).cast<IntegerAttr>().getInt();
-    auto val2 = std::get<1>(it).cast<IntegerAttr>().getInt();
-    auto max = std::get<2>(it);
-    if (!halfOpen)
-      max += 1;
-    if (val1 + val2 < 0 || val1 + val2 >= max)
-      return op.emitOpError("expected sum(")
-             << attrName1 << ", " << attrName2 << ") dimension " << index
-             << " to be confined to [" << min << ", " << max << ")";
-    ++index;
-  }
-  return success();
-}
-
-static ArrayAttr makeI64ArrayAttr(ArrayRef<int64_t> values,
-                                  MLIRContext *context) {
-  auto attrs = functional::map(
-      [context](int64_t v) -> Attribute {
-        return IntegerAttr::get(IntegerType::get(64, context), APInt(64, v));
-      },
-      values);
-  return ArrayAttr::get(attrs, context);
-}
-
-static LogicalResult verify(InsertStridedSliceOp op) {
-  auto sourceVectorType = op.getSourceVectorType();
-  auto destVectorType = op.getDestVectorType();
-  auto offsets = op.offsets();
-  auto strides = op.strides();
-  if (offsets.size() != static_cast<unsigned>(destVectorType.getRank()))
-    return op.emitOpError(
-        "expected offsets of same size as destination vector rank");
-  if (strides.size() != static_cast<unsigned>(sourceVectorType.getRank()))
-    return op.emitOpError(
-        "expected strides of same size as source vector rank");
-  if (sourceVectorType.getRank() > destVectorType.getRank())
-    return op.emitOpError(
-        "expected source rank to be smaller than destination rank");
-
-  auto sourceShape = sourceVectorType.getShape();
-  auto destShape = destVectorType.getShape();
-  SmallVector<int64_t, 4> sourceShapeAsDestShape(
-      destShape.size() - sourceShape.size(), 0);
-  sourceShapeAsDestShape.append(sourceShape.begin(), sourceShape.end());
-  auto offName = InsertStridedSliceOp::getOffsetsAttrName();
-  auto stridesName = InsertStridedSliceOp::getStridesAttrName();
-  if (failed(
-          isIntegerArrayAttrConfinedToShape(op, offsets, destShape, offName)) ||
-      failed(isIntegerArrayAttrConfinedToRange(op, strides, 1, 1, stridesName,
-                                               /*halfOpen=*/false)) ||
-      failed(isSumOfIntegerArrayAttrConfinedToShape(
-          op, offsets,
-          makeI64ArrayAttr(sourceShapeAsDestShape, op.getContext()), destShape,
-          offName, "source vector shape",
-          /*halfOpen=*/false, /*min=*/1)))
-    return failure();
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// OuterProductOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, OuterProductOp op) {
-  p << op.getOperationName() << " " << *op.lhs() << ", " << *op.rhs();
-  if (!op.acc().empty())
-    p << ", " << op.acc();
-  p << " : " << op.lhs()->getType() << ", " << op.rhs()->getType();
-}
-
-static ParseResult parseOuterProductOp(OpAsmParser &parser,
-                                       OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 3> operandsInfo;
-  Type tLHS, tRHS;
-  if (parser.parseOperandList(operandsInfo) || parser.parseColonType(tLHS) ||
-      parser.parseComma() || parser.parseType(tRHS))
-    return failure();
-  if (operandsInfo.size() < 2)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected at least 2 operands");
-  VectorType vLHS = tLHS.dyn_cast<VectorType>();
-  VectorType vRHS = tRHS.dyn_cast<VectorType>();
-  if (!vLHS || !vRHS)
-    return parser.emitError(parser.getNameLoc(), "expected 2 vector types");
-  VectorType resType = VectorType::get({vLHS.getDimSize(0), vRHS.getDimSize(0)},
-                                       vLHS.getElementType());
-  return failure(
-      parser.resolveOperand(operandsInfo[0], tLHS, result.operands) ||
-      parser.resolveOperand(operandsInfo[1], tRHS, result.operands) ||
-      (operandsInfo.size() > 2 &&
-       parser.resolveOperand(operandsInfo[2], resType, result.operands)) ||
-      parser.addTypeToList(resType, result.types));
-}
-
-static LogicalResult verify(OuterProductOp op) {
-  VectorType vLHS = op.getOperandVectorTypeLHS(),
-             vRHS = op.getOperandVectorTypeRHS(),
-             vACC = op.getOperandVectorTypeACC(), vRES = op.getVectorType();
-  if (vLHS.getRank() != 1)
-    return op.emitOpError("expected 1-d vector for operand #1");
-  if (vRHS.getRank() != 1)
-    return op.emitOpError("expected 1-d vector for operand #2");
-  if (vRES.getRank() != 2)
-    return op.emitOpError("expected 2-d vector result");
-  if (vLHS.getDimSize(0) != vRES.getDimSize(0))
-    return op.emitOpError("expected #1 operand dim to match result dim #1");
-  if (vRHS.getDimSize(0) != vRES.getDimSize(1))
-    return op.emitOpError("expected #2 operand dim to match result dim #2");
-  if (vACC && vACC != vRES)
-    return op.emitOpError("expected operand #3 of same type as result type");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// StridedSliceOp
-//===----------------------------------------------------------------------===//
-
-// Inference works as follows:
-//   1. Add 'sizes' from prefix of dims in 'offsets'.
-//   2. Add sizes from 'vectorType' for remaining dims.
-static Type inferStridedSliceOpResultType(VectorType vectorType,
-                                          ArrayAttr offsets, ArrayAttr sizes,
-                                          ArrayAttr strides) {
-  assert(offsets.size() == sizes.size() && offsets.size() == strides.size());
-  SmallVector<int64_t, 4> shape;
-  shape.reserve(vectorType.getRank());
-  unsigned idx = 0;
-  for (unsigned e = offsets.size(); idx < e; ++idx)
-    shape.push_back(sizes.getValue()[idx].cast<IntegerAttr>().getInt());
-  for (unsigned e = vectorType.getShape().size(); idx < e; ++idx)
-    shape.push_back(vectorType.getShape()[idx]);
-
-  return VectorType::get(shape, vectorType.getElementType());
-}
-
-void StridedSliceOp::build(Builder *builder, OperationState &result,
-                           Value *source, ArrayRef<int64_t> offsets,
-                           ArrayRef<int64_t> sizes, ArrayRef<int64_t> strides) {
-  result.addOperands(source);
-  auto offsetsAttr = builder->getI64ArrayAttr(offsets);
-  auto sizesAttr = builder->getI64ArrayAttr(sizes);
-  auto stridesAttr = builder->getI64ArrayAttr(strides);
-  result.addTypes(
-      inferStridedSliceOpResultType(source->getType().cast<VectorType>(),
-                                    offsetsAttr, sizesAttr, stridesAttr));
-  result.addAttribute(getOffsetsAttrName(), offsetsAttr);
-  result.addAttribute(getSizesAttrName(), sizesAttr);
-  result.addAttribute(getStridesAttrName(), stridesAttr);
-}
-
-static void print(OpAsmPrinter &p, StridedSliceOp op) {
-  p << op.getOperationName() << " " << *op.vector();
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.vector()->getType() << " to " << op.getResult()->getType();
-}
-
-static ParseResult parseStridedSliceOp(OpAsmParser &parser,
-                                       OperationState &result) {
-  llvm::SMLoc attributeLoc, typeLoc;
-  OpAsmParser::OperandType vector;
-  VectorType vectorType, resultVectorType;
-  return failure(parser.parseOperand(vector) ||
-                 parser.getCurrentLocation(&attributeLoc) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.getCurrentLocation(&typeLoc) ||
-                 parser.parseColonType(vectorType) ||
-                 parser.parseKeywordType("to", resultVectorType) ||
-                 parser.resolveOperand(vector, vectorType, result.operands) ||
-                 parser.addTypeToList(resultVectorType, result.types));
-}
-
-static LogicalResult verify(StridedSliceOp op) {
-  auto type = op.getVectorType();
-  auto offsets = op.offsets();
-  auto sizes = op.sizes();
-  auto strides = op.strides();
-  if (offsets.size() != sizes.size() || offsets.size() != strides.size()) {
-    op.emitOpError(
-        "expected offsets, sizes and strides attributes of same size");
-    return failure();
-  }
-
-  auto shape = type.getShape();
-  auto offName = StridedSliceOp::getOffsetsAttrName();
-  auto sizesName = StridedSliceOp::getSizesAttrName();
-  auto stridesName = StridedSliceOp::getStridesAttrName();
-  if (failed(isIntegerArrayAttrSmallerThanShape(op, offsets, shape, offName)) ||
-      failed(isIntegerArrayAttrSmallerThanShape(op, sizes, shape, sizesName)) ||
-      failed(isIntegerArrayAttrSmallerThanShape(op, strides, shape,
-                                                stridesName)) ||
-      failed(isIntegerArrayAttrConfinedToShape(op, offsets, shape, offName)) ||
-      failed(isIntegerArrayAttrConfinedToShape(op, sizes, shape, sizesName,
-                                               /*halfOpen=*/false,
-                                               /*min=*/1)) ||
-      failed(isIntegerArrayAttrConfinedToRange(op, strides, 1, 1, stridesName,
-                                               /*halfOpen=*/false)) ||
-      failed(isSumOfIntegerArrayAttrConfinedToShape(op, offsets, sizes, shape,
-                                                    offName, sizesName,
-                                                    /*halfOpen=*/false)))
-    return failure();
-
-  auto resultType = inferStridedSliceOpResultType(
-      op.getVectorType(), op.offsets(), op.sizes(), op.strides());
-  if (op.getResult()->getType() != resultType) {
-    op.emitOpError("expected result type to be ") << resultType;
-    return failure();
-  }
-
-  return success();
-}
-
-void StridedSliceOp::getOffsets(SmallVectorImpl<int64_t> &results) {
-  populateFromInt64AttrArray(offsets(), results);
-}
-
-namespace {
-
-// Pattern to rewrite a StridedSliceOp(ConstantMaskOp) -> ConstantMaskOp.
-class StridedSliceConstantMaskFolder final
-    : public OpRewritePattern<StridedSliceOp> {
-public:
-  using OpRewritePattern<StridedSliceOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(StridedSliceOp stridedSliceOp,
-                                     PatternRewriter &rewriter) const override {
-    // Return if 'stridedSliceOp' operand is not defined by a ConstantMaskOp.
-    auto defOp = stridedSliceOp.vector()->getDefiningOp();
-    auto constantMaskOp = dyn_cast_or_null<ConstantMaskOp>(defOp);
-    if (!constantMaskOp)
-      return matchFailure();
-    // Return if 'stridedSliceOp' has non-unit strides.
-    if (llvm::any_of(stridedSliceOp.strides(), [](Attribute attr) {
-          return attr.cast<IntegerAttr>().getInt() != 1;
-        }))
-      return matchFailure();
-    // Gather constant mask dimension sizes.
-    SmallVector<int64_t, 4> maskDimSizes;
-    populateFromInt64AttrArray(constantMaskOp.mask_dim_sizes(), maskDimSizes);
-    // Gather strided slice offsets and sizes.
-    SmallVector<int64_t, 4> sliceOffsets;
-    populateFromInt64AttrArray(stridedSliceOp.offsets(), sliceOffsets);
-    SmallVector<int64_t, 4> sliceSizes;
-    populateFromInt64AttrArray(stridedSliceOp.sizes(), sliceSizes);
-
-    // Compute slice of vector mask region.
-    SmallVector<int64_t, 4> sliceMaskDimSizes;
-    assert(sliceOffsets.size() == maskDimSizes.size());
-    for (const auto &it : llvm::zip(maskDimSizes, sliceOffsets, sliceSizes)) {
-      int64_t maskDimSize = std::get<0>(it);
-      int64_t sliceOffset = std::get<1>(it);
-      int64_t sliceSize = std::get<2>(it);
-      int64_t sliceMaskDimSize = std::max(
-          static_cast<int64_t>(0),
-          std::min(sliceOffset + sliceSize, maskDimSize) - sliceOffset);
-      sliceMaskDimSizes.push_back(sliceMaskDimSize);
-    }
-    // If any of 'sliceMaskDimSizes' are zero, then set all to zero (masked
-    // region is a conjunction of mask dim intervals).
-    if (llvm::any_of(sliceMaskDimSizes, [](int64_t sz) { return sz == 0; }))
-      sliceMaskDimSizes.assign(maskDimSizes.size(), 0);
-
-    // Replace 'stridedSliceOp' with ConstantMaskOp with sliced mask region.
-    rewriter.replaceOpWithNewOp<ConstantMaskOp>(
-        stridedSliceOp, stridedSliceOp.getResult()->getType(),
-        rewriter.getI64ArrayAttr(sliceMaskDimSizes));
-    return matchSuccess();
-  }
-};
-
-} // end anonymous namespace
-
-void StridedSliceOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  // Pattern to rewrite a StridedSliceOp(ConstantMaskOp) -> ConstantMaskOp.
-  results.insert<StridedSliceConstantMaskFolder>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// TransferReadOp
-//===----------------------------------------------------------------------===//
-template <typename EmitFun>
-static LogicalResult verifyPermutationMap(AffineMap permutationMap,
-                                          EmitFun emitOpError) {
-  SmallVector<bool, 8> seen(permutationMap.getNumInputs(), false);
-  for (auto expr : permutationMap.getResults()) {
-    auto dim = expr.dyn_cast<AffineDimExpr>();
-    auto zero = expr.dyn_cast<AffineConstantExpr>();
-    if (zero) {
-      if (zero.getValue() != 0) {
-        return emitOpError(
-            "requires a projected permutation_map (at most one dim or the zero "
-            "constant can appear in each result)");
-      }
-      continue;
-    }
-    if (!dim) {
-      return emitOpError("requires a projected permutation_map (at most one "
-                         "dim or the zero constant can appear in each result)");
-    }
-    if (seen[dim.getPosition()]) {
-      return emitOpError(
-          "requires a permutation_map that is a permutation (found one dim "
-          "used more than once)");
-    }
-    seen[dim.getPosition()] = true;
-  }
-  return success();
-}
-
-static void print(OpAsmPrinter &p, TransferReadOp op) {
-  p << op.getOperationName() << " " << op.memref() << "[" << op.indices()
-    << "], " << op.padding() << " ";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getMemRefType() << ", " << op.getVectorType();
-}
-
-ParseResult parseTransferReadOp(OpAsmParser &parser, OperationState &result) {
-  llvm::SMLoc typesLoc;
-  OpAsmParser::OperandType memrefInfo;
-  SmallVector<OpAsmParser::OperandType, 8> indexInfo;
-  OpAsmParser::OperandType paddingInfo;
-  SmallVector<Type, 2> types;
-  // Parsing with support for optional paddingValue.
-  if (parser.parseOperand(memrefInfo) ||
-      parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseOperand(paddingInfo) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types))
-    return failure();
-  if (types.size() != 2)
-    return parser.emitError(typesLoc, "two types required");
-  auto indexType = parser.getBuilder().getIndexType();
-  MemRefType memRefType = types[0].dyn_cast<MemRefType>();
-  if (!memRefType)
-    return parser.emitError(typesLoc, "memref type required"), failure();
-  Type vectorType = types[1];
-  return failure(
-      parser.resolveOperand(memrefInfo, memRefType, result.operands) ||
-      parser.resolveOperands(indexInfo, indexType, result.operands) ||
-      parser.resolveOperand(paddingInfo, memRefType.getElementType(),
-                            result.operands) ||
-      parser.addTypeToList(vectorType, result.types));
-}
-
-static LogicalResult verify(TransferReadOp op) {
-  // Consistency of elemental types in memref and vector.
-  MemRefType memrefType = op.getMemRefType();
-  VectorType vectorType = op.getVectorType();
-  if (memrefType.getElementType() != vectorType.getElementType())
-    return op.emitOpError(
-        "requires memref and vector types of the same elemental type");
-  auto elementalType = op.padding()->getType();
-  if (!VectorType::isValidElementType(elementalType))
-    return op.emitOpError("requires valid padding vector elemental type");
-  if (elementalType != vectorType.getElementType())
-    return op.emitOpError(
-        "requires formal padding and vector of the same elemental type");
-  if (llvm::size(op.indices()) != memrefType.getRank())
-    return op.emitOpError("requires ") << memrefType.getRank() << " indices";
-  auto permutationMap = op.permutation_map();
-  if (permutationMap.getNumSymbols() != 0)
-    return op.emitOpError("requires permutation_map without symbols");
-  if (permutationMap.getNumInputs() != memrefType.getRank())
-    return op.emitOpError("requires a permutation_map with input dims of the "
-                          "same rank as the memref type");
-  if (permutationMap.getNumResults() != vectorType.getRank())
-    return op.emitOpError("requires a permutation_map with result dims of the "
-                          "same rank as the vector type");
-  return verifyPermutationMap(permutationMap,
-                              [&op](Twine t) { return op.emitOpError(t); });
-}
-
-//===----------------------------------------------------------------------===//
-// TransferWriteOp
-//===----------------------------------------------------------------------===//
-static void print(OpAsmPrinter &p, TransferWriteOp op) {
-  p << op.getOperationName() << " " << *op.vector() << ", " << *op.memref()
-    << "[" << op.indices() << "]";
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : " << op.getVectorType() << ", " << op.getMemRefType();
-}
-
-ParseResult parseTransferWriteOp(OpAsmParser &parser, OperationState &result) {
-  llvm::SMLoc typesLoc;
-  OpAsmParser::OperandType storeValueInfo;
-  OpAsmParser::OperandType memRefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> indexInfo;
-  SmallVector<Type, 2> types;
-  if (parser.parseOperand(storeValueInfo) || parser.parseComma() ||
-      parser.parseOperand(memRefInfo) ||
-      parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types))
-    return failure();
-  if (types.size() != 2)
-    return parser.emitError(typesLoc, "two types required");
-  auto indexType = parser.getBuilder().getIndexType();
-  Type vectorType = types[0], memRefType = types[1];
-  return failure(
-      parser.resolveOperand(storeValueInfo, vectorType, result.operands) ||
-      parser.resolveOperand(memRefInfo, memRefType, result.operands) ||
-      parser.resolveOperands(indexInfo, indexType, result.operands));
-}
-
-static LogicalResult verify(TransferWriteOp op) {
-  // Consistency of elemental types in memref and vector.
-  MemRefType memrefType = op.getMemRefType();
-  VectorType vectorType = op.getVectorType();
-  if (memrefType.getElementType() != vectorType.getElementType())
-    return op.emitOpError(
-        "requires memref and vector types of the same elemental type");
-  if (llvm::size(op.indices()) != memrefType.getRank())
-    return op.emitOpError("requires ") << memrefType.getRank() << " indices";
-
-  // Consistency of AffineMap attribute.
-  auto permutationMap = op.permutation_map();
-  if (permutationMap.getNumSymbols() != 0)
-    return op.emitOpError("requires a symbol-less permutation_map");
-  if (permutationMap.getNumInputs() != memrefType.getRank())
-    return op.emitOpError("requires a permutation_map with input dims of the "
-                          "same rank as the memref type: ")
-           << permutationMap.getNumInputs() << " vs " << memrefType;
-  if (permutationMap.getNumResults() != vectorType.getRank())
-    return op.emitOpError("requires a permutation_map with result dims of the "
-                          "same rank as the vector type.")
-           << permutationMap.getNumResults() << " vs " << vectorType;
-  return verifyPermutationMap(permutationMap,
-                              [&op](Twine t) { return op.emitOpError(t); });
-}
-
-//===----------------------------------------------------------------------===//
-// TypeCastOp
-//===----------------------------------------------------------------------===//
-
-static MemRefType inferVectorTypeCastResultType(MemRefType t) {
-  return MemRefType::get({}, VectorType::get(t.getShape(), t.getElementType()));
-}
-
-void TypeCastOp::build(Builder *builder, OperationState &result,
-                       Value *source) {
-  result.addOperands(source);
-  result.addTypes(
-      inferVectorTypeCastResultType(source->getType().cast<MemRefType>()));
-}
-
-static void print(OpAsmPrinter &p, TypeCastOp op) {
-  auto type = op.getOperand()->getType().cast<MemRefType>();
-  p << op.getOperationName() << ' ' << *op.memref() << " : " << type << " to "
-    << inferVectorTypeCastResultType(type);
-}
-
-static LogicalResult verify(TypeCastOp op) {
-  auto resultType = inferVectorTypeCastResultType(op.getMemRefType());
-  if (op.getResultMemRefType() != resultType)
-    return op.emitOpError("expects result type to be: ") << resultType;
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// TupleOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseTupleOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 4> operandInfos;
-  SmallVector<Type, 4> types;
-  auto loc = parser.getCurrentLocation();
-  auto *ctx = parser.getBuilder().getContext();
-  return failure(
-      parser.parseOperandList(operandInfos) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonTypeList(types) ||
-      parser.resolveOperands(operandInfos, types, loc, result.operands) ||
-      parser.addTypeToList(TupleType::get(types, ctx), result.types));
-}
-
-static void print(OpAsmPrinter &p, TupleOp op) {
-  p << op.getOperationName() << ' ';
-  p.printOperands(op.getOperands());
-  p.printOptionalAttrDict(op.getAttrs());
-  p << " : ";
-  interleaveComma(op.getOperation()->getOperandTypes(), p);
-}
-
-static LogicalResult verify(TupleOp op) { return success(); }
-
-//===----------------------------------------------------------------------===//
-// TupleGetOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseTupleGetOp(OpAsmParser &parser,
-                                   OperationState &result) {
-  OpAsmParser::OperandType operandInfo;
-  IntegerAttr indexAttr;
-  StringRef indexAttrName = TupleGetOp::getIndexAttrName();
-  Type indexType = parser.getBuilder().getIndexType();
-  TupleType tupleType;
-  VectorType resultVectorType;
-  if (parser.parseOperand(operandInfo) || parser.parseComma() ||
-      parser.parseAttribute(indexAttr, indexType, indexAttrName,
-                            result.attributes) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(tupleType) ||
-      parser.resolveOperand(operandInfo, tupleType, result.operands))
-    return failure();
-  if (indexAttr.getInt() < 0 ||
-      indexAttr.getInt() >= static_cast<int64_t>(tupleType.size()))
-    return failure();
-  parser.addTypeToList(tupleType.getType(indexAttr.getInt()), result.types);
-  return success();
-}
-
-static void print(OpAsmPrinter &p, TupleGetOp op) {
-  p << op.getOperationName() << ' ' << *op.getOperand() << ", " << op.index();
-  p.printOptionalAttrDict(op.getAttrs(),
-                          /*elidedAttrs=*/{TupleGetOp::getIndexAttrName()});
-  p << " : " << op.getOperand()->getType();
-}
-
-static LogicalResult verify(TupleGetOp op) {
-  auto tupleType = op.getOperand()->getType().cast<TupleType>();
-  if (op.getIndex() < 0 || op.getIndex() >= tupleType.size())
-    return op.emitOpError("tuple get index out of range");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// ConstantMaskOp
-//===----------------------------------------------------------------------===//
-
-ParseResult parseConstantMaskOp(OpAsmParser &parser, OperationState &result) {
-  Type resultType;
-  ArrayAttr maskDimSizesAttr;
-  StringRef attrName = ConstantMaskOp::getMaskDimSizesAttrName();
-  return failure(
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseAttribute(maskDimSizesAttr, attrName, result.attributes) ||
-      parser.parseColonType(resultType) ||
-      parser.addTypeToList(resultType, result.types));
-}
-
-static void print(OpAsmPrinter &p, ConstantMaskOp op) {
-  p << op.getOperationName() << ' ' << op.mask_dim_sizes() << " : "
-    << op.getResult()->getType();
-}
-
-static LogicalResult verify(ConstantMaskOp &op) {
-  // Verify that array attr size matches the rank of the vector result.
-  auto resultType = op.getResult()->getType().cast<VectorType>();
-  if (static_cast<int64_t>(op.mask_dim_sizes().size()) != resultType.getRank())
-    return op.emitOpError(
-        "must specify array attr of size equal vector result rank");
-  // Verify that each array attr element is in bounds of corresponding vector
-  // result dimension size.
-  auto resultShape = resultType.getShape();
-  SmallVector<int64_t, 4> maskDimSizes;
-  for (auto it : llvm::enumerate(op.mask_dim_sizes())) {
-    int64_t attrValue = it.value().cast<IntegerAttr>().getInt();
-    if (attrValue < 0 || attrValue > resultShape[it.index()])
-      return op.emitOpError(
-          "array attr of size out of bounds of vector result dimension size");
-    maskDimSizes.push_back(attrValue);
-  }
-  // Verify that if one mask dim size is zero, they all should be zero (because
-  // the mask region is a conjunction of each mask dimension interval).
-  bool any_zeros = llvm::is_contained(maskDimSizes, 0);
-  bool all_zeros = llvm::all_of(maskDimSizes, [](int64_t s) { return s == 0; });
-  if (any_zeros && !all_zeros)
-    return op.emitOpError("expected all mask dim sizes to be zeros, "
-                          "as a result of conjunction with zero mask dim");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// CreateMaskOp
-//===----------------------------------------------------------------------===//
-
-ParseResult parseCreateMaskOp(OpAsmParser &parser, OperationState &result) {
-  auto indexType = parser.getBuilder().getIndexType();
-  Type resultType;
-  SmallVector<OpAsmParser::OperandType, 4> operandInfo;
-  return failure(
-      parser.parseOperandList(operandInfo) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(resultType) ||
-      parser.resolveOperands(operandInfo, indexType, result.operands) ||
-      parser.addTypeToList(resultType, result.types));
-}
-
-static void print(OpAsmPrinter &p, CreateMaskOp op) {
-  p << op.getOperationName() << ' ' << op.operands() << " : " << op.getType();
-}
-
-static LogicalResult verify(CreateMaskOp op) {
-  // Verify that an operand was specified for each result vector each dimension.
-  if (op.getNumOperands() !=
-      op.getResult()->getType().cast<VectorType>().getRank())
-    return op.emitOpError(
-        "must specify an operand for each result vector dimension");
-  return success();
-}
-
-namespace {
-
-// Pattern to rewrite a CreateMaskOp with a ConstantMaskOp.
-class CreateMaskFolder final : public OpRewritePattern<CreateMaskOp> {
-public:
-  using OpRewritePattern<CreateMaskOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(CreateMaskOp createMaskOp,
-                                     PatternRewriter &rewriter) const override {
-    // Return if any of 'createMaskOp' operands are not defined by a constant.
-    auto is_not_def_by_constant = [](Value *operand) {
-      return !isa_and_nonnull<ConstantIndexOp>(operand->getDefiningOp());
-    };
-    if (llvm::any_of(createMaskOp.operands(), is_not_def_by_constant))
-      return matchFailure();
-    // Gather constant mask dimension sizes.
-    SmallVector<int64_t, 4> maskDimSizes;
-    for (auto *operand : createMaskOp.operands()) {
-      auto defOp = operand->getDefiningOp();
-      maskDimSizes.push_back(cast<ConstantIndexOp>(defOp).getValue());
-    }
-    // Replace 'createMaskOp' with ConstantMaskOp.
-    rewriter.replaceOpWithNewOp<ConstantMaskOp>(
-        createMaskOp, createMaskOp.getResult()->getType(),
-        rewriter.getI64ArrayAttr(maskDimSizes));
-    return matchSuccess();
-  }
-};
-
-} // end anonymous namespace
-
-void CreateMaskOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<CreateMaskFolder>(context);
-}
-
-void mlir::vector::populateVectorToVectorCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
-  patterns.insert<CreateMaskFolder, StridedSliceConstantMaskFolder>(context);
-}
-
-namespace mlir {
-namespace vector {
-
-#define GET_OP_CLASSES
-#include "mlir/Dialect/VectorOps/VectorOps.cpp.inc"
-
-} // namespace vector
-} // namespace mlir
diff --git a/third_party/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp b/third_party/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
deleted file mode 100644
index 6825709334b..00000000000
--- a/third_party/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
+++ /dev/null
@@ -1,516 +0,0 @@
-//===- VectorToLoops.cpp - Conversion within the Vector dialect -----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements target-independent rewrites as 1->N patterns.
-//
-//===----------------------------------------------------------------------===//
-
-#include <type_traits>
-
-#include "mlir/Dialect/VectorOps/Utils.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/Dialect/VectorOps/VectorTransforms.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/STLExtras.h"
-
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "vector-to-vector"
-
-using namespace mlir;
-using llvm::dbgs;
-using mlir::functional::zipMap;
-
-/// Given a shape with sizes greater than 0 along all dimensions,
-/// returns the distance, in number of elements, between a slice in a dimension
-/// and the next slice in the same dimension.
-///   e.g. shape[3, 4, 5] -> linearization_basis[20, 5, 1]
-static SmallVector<int64_t, 8> computeStrides(ArrayRef<int64_t> shape) {
-  if (shape.empty())
-    return {};
-  SmallVector<int64_t, 8> tmp;
-  tmp.reserve(shape.size());
-  int64_t running = 1;
-  for (auto size : llvm::reverse(shape)) {
-    assert(size > 0 && "size must be nonnegative");
-    tmp.push_back(running);
-    running *= size;
-  }
-  return SmallVector<int64_t, 8>(tmp.rbegin(), tmp.rend());
-}
-
-static int64_t computeMaxLinearIndex(ArrayRef<int64_t> basis) {
-  if (basis.empty())
-    return 0;
-  int64_t res = 1;
-  for (auto b : basis)
-    res *= b;
-  return res;
-}
-
-/// Computes and returns the linearized index of 'offsets' w.r.t. 'basis'.
-static int64_t linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) {
-  assert(offsets.size() == basis.size());
-  int64_t linearIndex = 0;
-  for (unsigned idx = 0, e = basis.size(); idx < e; ++idx)
-    linearIndex += offsets[idx] * basis[idx];
-  return linearIndex;
-}
-
-/// Given a shape with sizes greater than 0 along all dimensions, returns the
-/// delinearized components of linearIndex along shape.
-static SmallVector<int64_t, 8> delinearize(int64_t linearIndex,
-                                           ArrayRef<int64_t> basis) {
-  SmallVector<int64_t, 8> res;
-  res.reserve(basis.size());
-  for (unsigned idx = 0, e = basis.size(); idx < e; ++idx) {
-    assert(basis[idx] > 0);
-    res.push_back(linearIndex / basis[idx]);
-    linearIndex %= basis[idx];
-  }
-  // Sanity check.
-  assert(linearIndex == 0 && "linear index remainder must be 0");
-  return res;
-}
-
-// Clones `op` into a new operations that takes `operands` and returns
-// `resultTypes`.
-static Operation *cloneOpWithOperandsAndTypes(PatternRewriter &builder,
-                                              Location loc, Operation *op,
-                                              ArrayRef<Value *> operands,
-                                              ArrayRef<Type> resultTypes) {
-  OperationState res(loc, op->getName().getStringRef(), operands, resultTypes,
-                     op->getAttrs());
-  return builder.createOperation(res);
-}
-
-static Value *makeSplatZero(Location loc, PatternRewriter &rewriter,
-                            VectorType vt) {
-  auto t = vt.getElementType();
-  Value *f = nullptr;
-  if (t.isBF16() || t.isF16())
-    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF64FloatAttr(0.0f));
-  else if (t.isF32())
-    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF32FloatAttr(0.0f));
-  else if (t.isF64())
-    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF64FloatAttr(0.0f));
-  if (f)
-    return rewriter.create<SplatOp>(loc, vt, f);
-  llvm_unreachable("Unsupported type in `makeSplatZero`");
-}
-
-// Populates 'resultElements[indexMap[i]]' with elements from 'inputElements[i]'
-// for each index 'i' in inputElements with a valid mapping in 'indexMap'.
-static void getMappedElements(const DenseMap<int64_t, int64_t> &indexMap,
-                              ArrayRef<int64_t> inputElements,
-                              SmallVectorImpl<int64_t> &resultElements) {
-  assert(indexMap.size() == resultElements.size());
-  assert(inputElements.size() >= resultElements.size());
-  for (unsigned i = 0, e = inputElements.size(); i < e; ++i) {
-    auto it = indexMap.find(i);
-    if (it != indexMap.end())
-      resultElements[it->second] = inputElements[i];
-  }
-}
-
-// UnrolledVectorState aggregates per-operand/result vector state required for
-// unrolling.
-struct UnrolledVectorState {
-  SmallVector<int64_t, 4> unrolledShape;
-  SmallVector<int64_t, 4> unrollFactors;
-  SmallVector<int64_t, 8> basis;
-  int64_t numInstances;
-};
-
-// Populates 'state' with unrolled shape, unroll factors, basis and
-// num unrolled instances for 'vectorType'.
-static void initUnrolledVectorState(VectorType vectorType,
-                                    const DenseMap<int64_t, int64_t> &indexMap,
-                                    ArrayRef<int64_t> targetShape,
-                                    UnrolledVectorState &state) {
-  // Compute unrolled shape of 'vectorType'.
-  state.unrolledShape.resize(vectorType.getRank());
-  getMappedElements(indexMap, targetShape, state.unrolledShape);
-  // Compute unroll factors for unrolled shape.
-  auto maybeUnrollFactors =
-      shapeRatio(vectorType.getShape(), state.unrolledShape);
-  assert(maybeUnrollFactors.hasValue());
-  state.unrollFactors = *maybeUnrollFactors;
-  // Compute 'basis' and 'numInstances' based on 'state.unrollFactors'.
-  state.basis = computeStrides(state.unrollFactors);
-  state.numInstances = computeMaxLinearIndex(state.unrollFactors);
-}
-
-// Computes and returns the linear index of the unrolled vector at
-// 'vectorOffsets' within the vector represented by 'state'.
-static int64_t
-getUnrolledVectorLinearIndex(UnrolledVectorState &state,
-                             ArrayRef<int64_t> vectorOffsets,
-                             DenseMap<int64_t, int64_t> &indexMap) {
-  // Compute vector offsets.
-  SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
-  getMappedElements(indexMap, vectorOffsets, sliceOffsets);
-  // Compute and return linear index of 'sliceOffsets' w.r.t 'state.basis'.
-  return linearize(sliceOffsets, state.basis);
-}
-
-// Returns an unrolled vector at 'vectorOffsets' within the vector
-// represented by 'state'. The vector is created from a slice of 'initValue'
-// if not present in 'cache'.
-static Value *getOrCreateUnrolledVectorSlice(
-    Location loc, UnrolledVectorState &state, ArrayRef<int64_t> vectorOffsets,
-    ArrayRef<int64_t> offsets, DenseMap<int64_t, int64_t> &indexMap,
-    Value *initValue, SmallVectorImpl<Value *> &cache,
-    PatternRewriter &builder) {
-  // Compute slice offsets.
-  SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
-  getMappedElements(indexMap, offsets, sliceOffsets);
-  // TODO(b/144845578) Support non-1 strides.
-  SmallVector<int64_t, 4> sliceStrides(state.unrolledShape.size(), 1);
-  // Compute linear index of 'sliceOffsets' w.r.t 'state.basis'.
-  int64_t sliceLinearIndex =
-      getUnrolledVectorLinearIndex(state, vectorOffsets, indexMap);
-  assert(sliceLinearIndex < static_cast<int64_t>(cache.size()));
-  auto *valueSlice = cache[sliceLinearIndex];
-  if (valueSlice == nullptr) {
-    assert(initValue != nullptr);
-    // Initialize 'cache' with slice from 'state.value'.
-    valueSlice = builder.create<vector::StridedSliceOp>(
-        loc, initValue, sliceOffsets, state.unrolledShape, sliceStrides);
-    // Store value back to 'cache'.
-    cache[sliceLinearIndex] = valueSlice;
-  }
-  return valueSlice;
-}
-
-// VectorState aggregates per-operand/result vector state required for
-// creating slices of vector operands, and clones of the operation being
-// unrolled.
-struct VectorState {
-  // The type of this vector.
-  VectorType type;
-  // Map from iteration space index to vector dimension index.
-  DenseMap<int64_t, int64_t> indexMap;
-  // Index of this value in operation's operand list (-1 if not an operand).
-  int64_t operandIndex = -1;
-  // Accumulator iterator flag.
-  bool isAcc = false;
-};
-
-//
-// unrollSingleResultStructuredOp
-//
-// Returns a value representing the result of structured operation 'op'
-// with iteration bounds 'iterationBounds' unrolled to 'targetShape'.
-// A list of VectorState objects must be specified in 'vectors', where
-// each VectorState in the list represents a vector operand or vector result
-// (if the operation does not have an accumulator operand).
-// The VectorState at index 'resultIndex' in the list must be the state
-// associated with the operations single result (i.e. either its accumulator
-// operand or vector result value).
-//
-// Example:
-//
-//  // Before unrolling
-//
-//   operand0                operand1                operand2
-//       \                      |                      /
-//        -------------------- opA --------------------
-//
-//  // After unrolling by 2
-//
-//   operand0                operand1                operand2
-//   /      \                /      \                /      \
-// slice00  slice01       slice10  slice11        slice20  slice21
-//   \         |            |          |            /          |
-//    -------------------- opA0 --------------------           |
-//             |            |          |                       |
-//              \           |          |                      /
-//               -------------------- opA1 -------------------
-//                          |          |
-//                           \        /
-//                           insertslice
-//                                |
-
-// TODO(andydavis) Add the following canonicalization/simplifcation patterns:
-// *) Add pattern which matches InsertStridedSlice -> StridedSlice and forwards
-//    InsertStridedSlice operand to StridedSlice.
-// *) Add pattern which matches SourceOp -> StridedSlice -> UserOp which checks
-//    if there are duplicate identical StridedSlice ops from SourceOp, and
-//    rewrites itself to use the first duplicate. This transformation should
-//    cause users of identifical StridedSlice ops to reuse the same StridedSlice
-//    operation, and leave the duplicate StridedSlice ops with no users
-//    (removable with DCE).
-
-// TODO(andydavis) Generalize this to support structured ops beyond
-// vector ContractionOp, and merge it with 'unrollSingleResultOpMatchingType'
-static Value *unrollSingleResultStructuredOp(Operation *op,
-                                             ArrayRef<int64_t> iterationBounds,
-                                             std::vector<VectorState> &vectors,
-                                             unsigned resultIndex,
-                                             ArrayRef<int64_t> targetShape,
-                                             PatternRewriter &builder) {
-  auto shapedType = op->getResult(0)->getType().dyn_cast_or_null<ShapedType>();
-  if (!shapedType || !shapedType.hasStaticShape())
-    assert(false && "Expected a statically shaped result type");
-
-  // Compute unroll factors for 'iterationBounds' based on 'targetShape'
-  auto maybeUnrollFactors = shapeRatio(iterationBounds, targetShape);
-  if (!maybeUnrollFactors.hasValue())
-    assert(false && "Failed to compute unroll factors for target shape");
-  auto unrollFactors = *maybeUnrollFactors;
-
-  // Compute unrolled vector state for each vector in 'vectors'.
-  unsigned numVectors = vectors.size();
-  SmallVector<UnrolledVectorState, 3> unrolledVectorState(numVectors);
-  for (unsigned i = 0; i < numVectors; ++i) {
-    initUnrolledVectorState(vectors[i].type, vectors[i].indexMap, targetShape,
-                            unrolledVectorState[i]);
-  }
-  // Compute number of total unrolled instances.
-  auto numUnrolledInstances = computeMaxLinearIndex(unrollFactors);
-  auto basis = computeStrides(unrollFactors);
-
-  auto &resultValueState = unrolledVectorState[resultIndex];
-  auto unrolledResultType = VectorType::get(resultValueState.unrolledShape,
-                                            shapedType.getElementType());
-
-  // Initialize caches for intermediate vector results.
-  std::vector<SmallVector<Value *, 4>> caches(numVectors);
-  for (unsigned i = 0; i < numVectors; ++i)
-    caches[i].resize(unrolledVectorState[i].numInstances);
-
-  // Unroll 'numUnrolledInstances' of 'op', storing results in 'caches'.
-  for (unsigned i = 0; i < numUnrolledInstances; ++i) {
-    // De-linearize w.r.t. 'basis'.
-    auto vectorOffsets = delinearize(i, basis);
-    // Convert from unrolled vector-space offsets to element-space offsets.
-    auto offsets = zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
-                          vectorOffsets, targetShape);
-    // Get cached slice (or create slice) for each operand at 'offsets'.
-    SmallVector<Value *, 3> operands;
-    operands.resize(op->getNumOperands());
-    for (unsigned i = 0; i < numVectors; ++i) {
-      int64_t operandIndex = vectors[i].operandIndex;
-      if (operandIndex < 0)
-        continue; // Output
-      auto *operand = op->getOperand(operandIndex);
-      operands[operandIndex] = getOrCreateUnrolledVectorSlice(
-          op->getLoc(), unrolledVectorState[i], vectorOffsets, offsets,
-          vectors[i].indexMap, operand, caches[i], builder);
-    }
-    // Create op on sliced vector arguments.
-    auto resultVector =
-        cloneOpWithOperandsAndTypes(builder, op->getLoc(), op, operands,
-                                    unrolledResultType)
-            ->getResult(0);
-
-    // Compute linear result index.
-    int64_t linearIndex = getUnrolledVectorLinearIndex(
-        resultValueState, vectorOffsets, vectors[resultIndex].indexMap);
-    // Update result cache at 'linearIndex'.
-    caches[resultIndex][linearIndex] = resultVector;
-  }
-
-  // Make zero splat into which we will insert results from
-  // 'cache[resultIndex]'
-  auto resultVectorType = op->getResult(0)->getType().cast<VectorType>();
-  auto *res = makeSplatZero(op->getLoc(), builder, resultVectorType);
-  SmallVector<int64_t, 4> strides(resultValueState.unrollFactors.size(), 1);
-  // Insert vector accumulators into output.
-  for (unsigned i = 0; i < resultValueState.numInstances; ++i) {
-    auto vectorOffsets = delinearize(i, resultValueState.basis);
-    // Convert from unrolled vector-space offsets to element-space offsets.
-    auto offsets = zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
-                          vectorOffsets, resultValueState.unrolledShape);
-    res = builder.create<vector::InsertStridedSliceOp>(
-        op->getLoc(), caches[resultIndex][i], res, offsets, strides);
-  }
-  return res;
-}
-
-static void getVectorContractionOpUnrollState(
-    vector::ContractionOp contractionOp, ArrayRef<int64_t> targetShape,
-    SmallVectorImpl<int64_t> &iterationBounds,
-    std::vector<VectorState> &vectors, unsigned &resultIndex) {
-  // Get contraction op iteration bounds.
-  contractionOp.getIterationBounds(iterationBounds);
-  assert(iterationBounds.size() == targetShape.size());
-  // Get map from iteration space index to lhs/rhs/result shape index.
-  std::vector<DenseMap<int64_t, int64_t>> iterationIndexMapList;
-  contractionOp.getIterationIndexMap(iterationIndexMapList);
-  unsigned numIterators = iterationIndexMapList.size();
-  vectors.resize(numIterators);
-  unsigned accOperandIndex = vector::ContractionOp::getAccOperandIndex();
-  for (unsigned i = 0; i < numIterators; ++i) {
-    vectors[i].type = contractionOp.getOperand(i)->getType().cast<VectorType>();
-    vectors[i].indexMap = iterationIndexMapList[i];
-    vectors[i].operandIndex = i;
-    vectors[i].isAcc = i == accOperandIndex ? true : false;
-  }
-
-  if (llvm::size(contractionOp.masks()) == 2) {
-    // Add vectors for lhs/rhs vector mask arguments. Masks have the
-    // same vector shape lhs/rhs args, so copy their index maps.
-    vectors.push_back(
-        {vectors[0].type, vectors[0].indexMap, accOperandIndex + 1, false});
-    vectors.push_back(
-        {vectors[1].type, vectors[1].indexMap, accOperandIndex + 2, false});
-  }
-  // Unroll 'op' 'iterationBounds' to 'targetShape'.
-  // TODO(andydavis) Use linalg style 'args_in'/'args_out' to partition
-  // 'vectors' instead of 'resultIndex'.
-  resultIndex = accOperandIndex;
-}
-
-static void
-getVectorElementwiseOpUnrollState(Operation *op, ArrayRef<int64_t> targetShape,
-                                  SmallVectorImpl<int64_t> &iterationBounds,
-                                  std::vector<VectorState> &vectors,
-                                  unsigned &resultIndex) {
-  // Verify that operation and operands all have the same vector shape.
-  auto resultType = op->getResult(0)->getType().dyn_cast_or_null<VectorType>();
-  assert(resultType && "Expected op with vector result type");
-  auto resultShape = resultType.getShape();
-  // Verify that all operands have the same vector type as result.
-  assert(llvm::all_of(op->getOperandTypes(),
-                      [=](Type type) { return type == resultType; }));
-  // Populate 'iterationBounds' with 'resultShape' for elementwise operations.
-  iterationBounds.assign(resultShape.begin(), resultShape.end());
-
-  // Create trivial elementwise identity index map based on 'resultShape'.
-  DenseMap<int64_t, int64_t> indexMap;
-  indexMap.reserve(resultShape.size());
-  for (unsigned i = 0; i < resultShape.size(); ++i)
-    indexMap[i] = i;
-
-  // Create VectorState each operand and single result.
-  unsigned numVectors = op->getNumOperands() + op->getNumResults();
-  vectors.resize(numVectors);
-  for (unsigned i = 0; i < op->getNumOperands(); ++i)
-    vectors[i] = {resultType, indexMap, i, false};
-  vectors[numVectors - 1] = {resultType, indexMap, -1, false};
-  resultIndex = numVectors - 1;
-}
-
-// Entry point for unrolling declarative pattern rewrites.
-Value *mlir::vector::unrollSingleResultOpMatchingType(
-    PatternRewriter &builder, Operation *op, ArrayRef<int64_t> targetShape) {
-  assert(op->getNumResults() == 1 && "Expected single result operation");
-
-  // Populate 'iterationBounds', 'vectors' and 'resultIndex' to unroll 'op'.
-  SmallVector<int64_t, 6> iterationBounds;
-  std::vector<VectorState> vectors;
-  unsigned resultIndex;
-
-  if (auto contractionOp = dyn_cast<vector::ContractionOp>(op)) {
-    // Popultate state for vector ContractionOp.
-    getVectorContractionOpUnrollState(contractionOp, targetShape,
-                                      iterationBounds, vectors, resultIndex);
-  } else {
-    // Populate state for vector elementwise op.
-    getVectorElementwiseOpUnrollState(op, targetShape, iterationBounds, vectors,
-                                      resultIndex);
-  }
-
-  // Unroll 'op' with 'iterationBounds' to 'targetShape'.
-  return unrollSingleResultStructuredOp(op, iterationBounds, vectors,
-                                        resultIndex, targetShape, builder);
-}
-
-// Splits vector TransferReadOp into smaller TransferReadOps for each user.
-struct SplitTransferReadOp : public OpRewritePattern<vector::TransferReadOp> {
-  using OpRewritePattern<vector::TransferReadOp>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(vector::TransferReadOp xferReadOp,
-                                     PatternRewriter &rewriter) const override {
-    // TODO(andydavis, ntv) Support spliting TransferReadOp with non-identity
-    // permutation maps. Repurpose code from MaterializeVectors transformation.
-    if (!xferReadOp.permutation_map().isIdentity())
-      return matchFailure();
-    // Gather 'xferReadOp' users.
-    SmallVector<vector::StridedSliceOp, 2> sliceUsers;
-    sliceUsers.reserve(std::distance(xferReadOp.getResult()->use_begin(),
-                                     xferReadOp.getResult()->use_end()));
-
-    for (auto *user : xferReadOp.getResult()->getUsers()) {
-      auto sliceOp = dyn_cast<vector::StridedSliceOp>(user);
-      // Return if any user is not a vector::StridedSliceOp.
-      if (!sliceOp)
-        return matchFailure();
-      sliceUsers.push_back(sliceOp);
-    }
-    // Make zero splat into which we will insert split xferReadOp results.
-    Location loc = xferReadOp.getLoc();
-    auto *res = makeSplatZero(loc, rewriter, xferReadOp.getVectorType());
-
-    // Update each user in 'sliceUser' to use 'res'.
-    unsigned numSliceIndices = llvm::size(xferReadOp.indices());
-    for (auto sliceUser : sliceUsers) {
-      // Gather static offsets from 'sliceUser'.
-      SmallVector<int64_t, 4> sliceOffsets;
-      sliceUser.getOffsets(sliceOffsets);
-      assert(sliceOffsets.size() == numSliceIndices);
-      auto *ctx = rewriter.getContext();
-      // Compute 'sliceIndices' by adding 'sliceOffsets[i]' to 'indices[i]'.
-      SmallVector<Value *, 4> sliceIndices(numSliceIndices);
-      for (auto it : llvm::enumerate(xferReadOp.indices())) {
-        auto expr = getAffineDimExpr(0, ctx) +
-                    getAffineConstantExpr(sliceOffsets[it.index()], ctx);
-        auto map = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, expr);
-        SmallVector<Value *, 1> mapOperands = {it.value()};
-        sliceIndices[it.index()] =
-            rewriter.create<AffineApplyOp>(loc, map, mapOperands);
-      }
-      // Create split TransferReadOp for 'sliceUser'.
-      auto sliceVectorType =
-          sliceUser.getResult()->getType().cast<VectorType>();
-      auto splitXferReadOp = rewriter.create<vector::TransferReadOp>(
-          loc, sliceVectorType, xferReadOp.memref(), sliceIndices,
-          xferReadOp.permutation_map(), xferReadOp.padding());
-      // Create InsertStridedSlice into splat at same offsets as slice.
-      res = rewriter.create<vector::InsertStridedSliceOp>(
-          loc, xferReadOp.getVectorType(), splitXferReadOp, res,
-          sliceUser.offsets(), sliceUser.strides());
-    }
-
-    // Replace 'xferReadOp' with result 'res'.
-    rewriter.replaceOp(xferReadOp, res);
-    return matchSuccess();
-  }
-};
-
-// TODO(andydavis) Add this as DRR pattern.
-void mlir::vector::populateVectorToVectorTransformationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
-  patterns.insert<SplitTransferReadOp>(context);
-}
diff --git a/third_party/mlir/lib/EDSC/Builders.cpp b/third_party/mlir/lib/EDSC/Builders.cpp
deleted file mode 100644
index 9d7ca8ca99b..00000000000
--- a/third_party/mlir/lib/EDSC/Builders.cpp
+++ /dev/null
@@ -1,496 +0,0 @@
-//===- Builders.cpp - MLIR Declarative Builder Classes --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/EDSC/Builders.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-
-#include "llvm/ADT/Optional.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-
-mlir::edsc::ScopedContext::ScopedContext(OpBuilder &builder, Location location)
-    : builder(builder), location(location),
-      enclosingScopedContext(ScopedContext::getCurrentScopedContext()),
-      nestedBuilder(nullptr) {
-  getCurrentScopedContext() = this;
-}
-
-/// Sets the insertion point of the builder to 'newInsertPt' for the duration
-/// of the scope. The existing insertion point of the builder is restored on
-/// destruction.
-mlir::edsc::ScopedContext::ScopedContext(OpBuilder &builder,
-                                         OpBuilder::InsertPoint newInsertPt,
-                                         Location location)
-    : builder(builder), prevBuilderInsertPoint(builder.saveInsertionPoint()),
-      location(location),
-      enclosingScopedContext(ScopedContext::getCurrentScopedContext()),
-      nestedBuilder(nullptr) {
-  getCurrentScopedContext() = this;
-  builder.restoreInsertionPoint(newInsertPt);
-}
-
-mlir::edsc::ScopedContext::~ScopedContext() {
-  assert(!nestedBuilder &&
-         "Active NestedBuilder must have been exited at this point!");
-  if (prevBuilderInsertPoint)
-    builder.restoreInsertionPoint(*prevBuilderInsertPoint);
-  getCurrentScopedContext() = enclosingScopedContext;
-}
-
-ScopedContext *&mlir::edsc::ScopedContext::getCurrentScopedContext() {
-  thread_local ScopedContext *context = nullptr;
-  return context;
-}
-
-OpBuilder &mlir::edsc::ScopedContext::getBuilder() {
-  assert(ScopedContext::getCurrentScopedContext() &&
-         "Unexpected Null ScopedContext");
-  return ScopedContext::getCurrentScopedContext()->builder;
-}
-
-Location mlir::edsc::ScopedContext::getLocation() {
-  assert(ScopedContext::getCurrentScopedContext() &&
-         "Unexpected Null ScopedContext");
-  return ScopedContext::getCurrentScopedContext()->location;
-}
-
-MLIRContext *mlir::edsc::ScopedContext::getContext() {
-  return getBuilder().getContext();
-}
-
-mlir::edsc::ValueHandle::ValueHandle(index_t cst) {
-  auto &b = ScopedContext::getBuilder();
-  auto loc = ScopedContext::getLocation();
-  v = b.create<ConstantIndexOp>(loc, cst.v).getResult();
-  t = v->getType();
-}
-
-ValueHandle &mlir::edsc::ValueHandle::operator=(const ValueHandle &other) {
-  assert(t == other.t && "Wrong type capture");
-  assert(!v && "ValueHandle has already been captured, use a new name!");
-  v = other.v;
-  return *this;
-}
-
-ValueHandle
-mlir::edsc::ValueHandle::createComposedAffineApply(AffineMap map,
-                                                   ArrayRef<Value *> operands) {
-  Operation *op =
-      makeComposedAffineApply(ScopedContext::getBuilder(),
-                              ScopedContext::getLocation(), map, operands)
-          .getOperation();
-  assert(op->getNumResults() == 1 && "Not a single result AffineApply");
-  return ValueHandle(op->getResult(0));
-}
-
-ValueHandle ValueHandle::create(StringRef name, ArrayRef<ValueHandle> operands,
-                                ArrayRef<Type> resultTypes,
-                                ArrayRef<NamedAttribute> attributes) {
-  Operation *op =
-      OperationHandle::create(name, operands, resultTypes, attributes);
-  if (op->getNumResults() == 1) {
-    return ValueHandle(op->getResult(0));
-  }
-  if (auto f = dyn_cast<AffineForOp>(op)) {
-    return ValueHandle(f.getInductionVar());
-  }
-  llvm_unreachable("unsupported operation, use an OperationHandle instead");
-}
-
-OperationHandle OperationHandle::create(StringRef name,
-                                        ArrayRef<ValueHandle> operands,
-                                        ArrayRef<Type> resultTypes,
-                                        ArrayRef<NamedAttribute> attributes) {
-  OperationState state(ScopedContext::getLocation(), name);
-  SmallVector<Value *, 4> ops(operands.begin(), operands.end());
-  state.addOperands(ops);
-  state.addTypes(resultTypes);
-  for (const auto &attr : attributes) {
-    state.addAttribute(attr.first, attr.second);
-  }
-  return OperationHandle(ScopedContext::getBuilder().createOperation(state));
-}
-
-BlockHandle mlir::edsc::BlockHandle::create(ArrayRef<Type> argTypes) {
-  auto &currentB = ScopedContext::getBuilder();
-  auto *ib = currentB.getInsertionBlock();
-  auto ip = currentB.getInsertionPoint();
-  BlockHandle res;
-  res.block = ScopedContext::getBuilder().createBlock(ib->getParent());
-  // createBlock sets the insertion point inside the block.
-  // We do not want this behavior when using declarative builders with nesting.
-  currentB.setInsertionPoint(ib, ip);
-  for (auto t : argTypes) {
-    res.block->addArgument(t);
-  }
-  return res;
-}
-
-static llvm::Optional<ValueHandle> emitStaticFor(ArrayRef<ValueHandle> lbs,
-                                                 ArrayRef<ValueHandle> ubs,
-                                                 int64_t step) {
-  if (lbs.size() != 1 || ubs.size() != 1)
-    return llvm::Optional<ValueHandle>();
-
-  auto *lbDef = lbs.front().getValue()->getDefiningOp();
-  auto *ubDef = ubs.front().getValue()->getDefiningOp();
-  if (!lbDef || !ubDef)
-    return llvm::Optional<ValueHandle>();
-
-  auto lbConst = dyn_cast<ConstantIndexOp>(lbDef);
-  auto ubConst = dyn_cast<ConstantIndexOp>(ubDef);
-  if (!lbConst || !ubConst)
-    return llvm::Optional<ValueHandle>();
-
-  return ValueHandle::create<AffineForOp>(lbConst.getValue(),
-                                          ubConst.getValue(), step);
-}
-
-mlir::edsc::LoopBuilder mlir::edsc::LoopBuilder::makeAffine(
-    ValueHandle *iv, ArrayRef<ValueHandle> lbHandles,
-    ArrayRef<ValueHandle> ubHandles, int64_t step) {
-  mlir::edsc::LoopBuilder result;
-  if (auto staticFor = emitStaticFor(lbHandles, ubHandles, step)) {
-    *iv = staticFor.getValue();
-  } else {
-    SmallVector<Value *, 4> lbs(lbHandles.begin(), lbHandles.end());
-    SmallVector<Value *, 4> ubs(ubHandles.begin(), ubHandles.end());
-    *iv = ValueHandle::create<AffineForOp>(
-        lbs, ScopedContext::getBuilder().getMultiDimIdentityMap(lbs.size()),
-        ubs, ScopedContext::getBuilder().getMultiDimIdentityMap(ubs.size()),
-        step);
-  }
-  auto *body = getForInductionVarOwner(iv->getValue()).getBody();
-  result.enter(body, /*prev=*/1);
-  return result;
-}
-
-mlir::edsc::LoopBuilder
-mlir::edsc::LoopBuilder::makeLoop(ValueHandle *iv, ValueHandle lbHandle,
-                                  ValueHandle ubHandle,
-                                  ValueHandle stepHandle) {
-  mlir::edsc::LoopBuilder result;
-  auto forOp =
-      OperationHandle::createOp<loop::ForOp>(lbHandle, ubHandle, stepHandle);
-  *iv = ValueHandle(forOp.getInductionVar());
-  auto *body = loop::getForInductionVarOwner(iv->getValue()).getBody();
-  result.enter(body, /*prev=*/1);
-  return result;
-}
-
-void mlir::edsc::LoopBuilder::operator()(llvm::function_ref<void(void)> fun) {
-  // Call to `exit` must be explicit and asymmetric (cannot happen in the
-  // destructor) because of ordering wrt comma operator.
-  /// The particular use case concerns nested blocks:
-  ///
-  /// ```c++
-  ///    For (&i, lb, ub, 1)({
-  ///      /--- destructor for this `For` is not always called before ...
-  ///      V
-  ///      For (&j1, lb, ub, 1)({
-  ///        some_op_1,
-  ///      }),
-  ///      /--- ... this scope is entered, resulting in improperly nested IR.
-  ///      V
-  ///      For (&j2, lb, ub, 1)({
-  ///        some_op_2,
-  ///      }),
-  ///    });
-  /// ```
-  if (fun)
-    fun();
-  exit();
-}
-
-mlir::edsc::AffineLoopNestBuilder::AffineLoopNestBuilder(
-    ValueHandle *iv, ArrayRef<ValueHandle> lbs, ArrayRef<ValueHandle> ubs,
-    int64_t step) {
-  loops.emplace_back(LoopBuilder::makeAffine(iv, lbs, ubs, step));
-}
-
-mlir::edsc::AffineLoopNestBuilder::AffineLoopNestBuilder(
-    ArrayRef<ValueHandle *> ivs, ArrayRef<ValueHandle> lbs,
-    ArrayRef<ValueHandle> ubs, ArrayRef<int64_t> steps) {
-  assert(ivs.size() == lbs.size() && "Mismatch in number of arguments");
-  assert(ivs.size() == ubs.size() && "Mismatch in number of arguments");
-  assert(ivs.size() == steps.size() && "Mismatch in number of arguments");
-  for (auto it : llvm::zip(ivs, lbs, ubs, steps))
-    loops.emplace_back(LoopBuilder::makeAffine(
-        std::get<0>(it), std::get<1>(it), std::get<2>(it), std::get<3>(it)));
-}
-
-void mlir::edsc::AffineLoopNestBuilder::operator()(
-    llvm::function_ref<void(void)> fun) {
-  if (fun)
-    fun();
-  // Iterate on the calling operator() on all the loops in the nest.
-  // The iteration order is from innermost to outermost because enter/exit needs
-  // to be asymmetric (i.e. enter() occurs on LoopBuilder construction, exit()
-  // occurs on calling operator()). The asymmetry is required for properly
-  // nesting imperfectly nested regions (see LoopBuilder::operator()).
-  for (auto lit = loops.rbegin(), eit = loops.rend(); lit != eit; ++lit)
-    (*lit)();
-}
-
-mlir::edsc::LoopNestBuilder::LoopNestBuilder(ArrayRef<ValueHandle *> ivs,
-                                             ArrayRef<ValueHandle> lbs,
-                                             ArrayRef<ValueHandle> ubs,
-                                             ArrayRef<ValueHandle> steps) {
-  assert(ivs.size() == lbs.size() && "expected size of ivs and lbs to match");
-  assert(ivs.size() == ubs.size() && "expected size of ivs and ubs to match");
-  assert(ivs.size() == steps.size() &&
-         "expected size of ivs and steps to match");
-  loops.reserve(ivs.size());
-  for (auto it : llvm::zip(ivs, lbs, ubs, steps)) {
-    loops.emplace_back(LoopBuilder::makeLoop(std::get<0>(it), std::get<1>(it),
-                                             std::get<2>(it), std::get<3>(it)));
-  }
-  assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size");
-}
-
-void LoopNestBuilder::LoopNestBuilder::operator()(
-    std::function<void(void)> fun) {
-  if (fun)
-    fun();
-  for (auto &lit : reverse(loops))
-    lit({});
-}
-
-mlir::edsc::BlockBuilder::BlockBuilder(BlockHandle bh, Append) {
-  assert(bh && "Expected already captured BlockHandle");
-  enter(bh.getBlock());
-}
-
-mlir::edsc::BlockBuilder::BlockBuilder(BlockHandle *bh,
-                                       ArrayRef<ValueHandle *> args) {
-  assert(!*bh && "BlockHandle already captures a block, use "
-                 "the explicit BockBuilder(bh, Append())({}) syntax instead.");
-  llvm::SmallVector<Type, 8> types;
-  for (auto *a : args) {
-    assert(!a->hasValue() &&
-           "Expected delayed ValueHandle that has not yet captured.");
-    types.push_back(a->getType());
-  }
-  *bh = BlockHandle::create(types);
-  for (auto it : llvm::zip(args, bh->getBlock()->getArguments())) {
-    *(std::get<0>(it)) = ValueHandle(std::get<1>(it));
-  }
-  enter(bh->getBlock());
-}
-
-/// Only serves as an ordering point between entering nested block and creating
-/// stmts.
-void mlir::edsc::BlockBuilder::operator()(llvm::function_ref<void(void)> fun) {
-  // Call to `exit` must be explicit and asymmetric (cannot happen in the
-  // destructor) because of ordering wrt comma operator.
-  if (fun)
-    fun();
-  exit();
-}
-
-template <typename Op>
-static ValueHandle createBinaryHandle(ValueHandle lhs, ValueHandle rhs) {
-  return ValueHandle::create<Op>(lhs.getValue(), rhs.getValue());
-}
-
-static std::pair<AffineExpr, Value *>
-categorizeValueByAffineType(MLIRContext *context, Value *val, unsigned &numDims,
-                            unsigned &numSymbols) {
-  AffineExpr d;
-  Value *resultVal = nullptr;
-  if (auto constant = dyn_cast_or_null<ConstantIndexOp>(val->getDefiningOp())) {
-    d = getAffineConstantExpr(constant.getValue(), context);
-  } else if (isValidSymbol(val) && !isValidDim(val)) {
-    d = getAffineSymbolExpr(numSymbols++, context);
-    resultVal = val;
-  } else {
-    d = getAffineDimExpr(numDims++, context);
-    resultVal = val;
-  }
-  return std::make_pair(d, resultVal);
-}
-
-static ValueHandle createBinaryIndexHandle(
-    ValueHandle lhs, ValueHandle rhs,
-    llvm::function_ref<AffineExpr(AffineExpr, AffineExpr)> affCombiner) {
-  MLIRContext *context = ScopedContext::getContext();
-  unsigned numDims = 0, numSymbols = 0;
-  AffineExpr d0, d1;
-  Value *v0, *v1;
-  std::tie(d0, v0) =
-      categorizeValueByAffineType(context, lhs.getValue(), numDims, numSymbols);
-  std::tie(d1, v1) =
-      categorizeValueByAffineType(context, rhs.getValue(), numDims, numSymbols);
-  SmallVector<Value *, 2> operands;
-  if (v0) {
-    operands.push_back(v0);
-  }
-  if (v1) {
-    operands.push_back(v1);
-  }
-  auto map = AffineMap::get(numDims, numSymbols, {affCombiner(d0, d1)});
-  // TODO: createOrFold when available.
-  return ValueHandle::createComposedAffineApply(map, operands);
-}
-
-template <typename IOp, typename FOp>
-static ValueHandle createBinaryHandle(
-    ValueHandle lhs, ValueHandle rhs,
-    llvm::function_ref<AffineExpr(AffineExpr, AffineExpr)> affCombiner) {
-  auto thisType = lhs.getValue()->getType();
-  auto thatType = rhs.getValue()->getType();
-  assert(thisType == thatType && "cannot mix types in operators");
-  (void)thisType;
-  (void)thatType;
-  if (thisType.isIndex()) {
-    return createBinaryIndexHandle(lhs, rhs, affCombiner);
-  } else if (thisType.isa<IntegerType>()) {
-    return createBinaryHandle<IOp>(lhs, rhs);
-  } else if (thisType.isa<FloatType>()) {
-    return createBinaryHandle<FOp>(lhs, rhs);
-  } else if (thisType.isa<VectorType>() || thisType.isa<TensorType>()) {
-    auto aggregateType = thisType.cast<ShapedType>();
-    if (aggregateType.getElementType().isa<IntegerType>())
-      return createBinaryHandle<IOp>(lhs, rhs);
-    else if (aggregateType.getElementType().isa<FloatType>())
-      return createBinaryHandle<FOp>(lhs, rhs);
-  }
-  llvm_unreachable("failed to create a ValueHandle");
-}
-
-ValueHandle mlir::edsc::op::operator+(ValueHandle lhs, ValueHandle rhs) {
-  return createBinaryHandle<AddIOp, AddFOp>(
-      lhs, rhs, [](AffineExpr d0, AffineExpr d1) { return d0 + d1; });
-}
-
-ValueHandle mlir::edsc::op::operator-(ValueHandle lhs, ValueHandle rhs) {
-  return createBinaryHandle<SubIOp, SubFOp>(
-      lhs, rhs, [](AffineExpr d0, AffineExpr d1) { return d0 - d1; });
-}
-
-ValueHandle mlir::edsc::op::operator*(ValueHandle lhs, ValueHandle rhs) {
-  return createBinaryHandle<MulIOp, MulFOp>(
-      lhs, rhs, [](AffineExpr d0, AffineExpr d1) { return d0 * d1; });
-}
-
-ValueHandle mlir::edsc::op::operator/(ValueHandle lhs, ValueHandle rhs) {
-  return createBinaryHandle<DivISOp, DivFOp>(
-      lhs, rhs, [](AffineExpr d0, AffineExpr d1) -> AffineExpr {
-        llvm_unreachable("only exprs of non-index type support operator/");
-      });
-}
-
-ValueHandle mlir::edsc::op::operator%(ValueHandle lhs, ValueHandle rhs) {
-  return createBinaryHandle<RemISOp, RemFOp>(
-      lhs, rhs, [](AffineExpr d0, AffineExpr d1) { return d0 % d1; });
-}
-
-ValueHandle mlir::edsc::op::floorDiv(ValueHandle lhs, ValueHandle rhs) {
-  return createBinaryIndexHandle(
-      lhs, rhs, [](AffineExpr d0, AffineExpr d1) { return d0.floorDiv(d1); });
-}
-
-ValueHandle mlir::edsc::op::ceilDiv(ValueHandle lhs, ValueHandle rhs) {
-  return createBinaryIndexHandle(
-      lhs, rhs, [](AffineExpr d0, AffineExpr d1) { return d0.ceilDiv(d1); });
-}
-
-ValueHandle mlir::edsc::op::operator!(ValueHandle value) {
-  assert(value.getType().isInteger(1) && "expected boolean expression");
-  return ValueHandle::create<ConstantIntOp>(1, 1) - value;
-}
-
-ValueHandle mlir::edsc::op::operator&&(ValueHandle lhs, ValueHandle rhs) {
-  assert(lhs.getType().isInteger(1) && "expected boolean expression on LHS");
-  assert(rhs.getType().isInteger(1) && "expected boolean expression on RHS");
-  return lhs * rhs;
-}
-
-ValueHandle mlir::edsc::op::operator||(ValueHandle lhs, ValueHandle rhs) {
-  return !(!lhs && !rhs);
-}
-
-static ValueHandle createIComparisonExpr(CmpIPredicate predicate,
-                                         ValueHandle lhs, ValueHandle rhs) {
-  auto lhsType = lhs.getType();
-  auto rhsType = rhs.getType();
-  (void)lhsType;
-  (void)rhsType;
-  assert(lhsType == rhsType && "cannot mix types in operators");
-  assert((lhsType.isa<IndexType>() || lhsType.isa<IntegerType>()) &&
-         "only integer comparisons are supported");
-
-  auto op = ScopedContext::getBuilder().create<CmpIOp>(
-      ScopedContext::getLocation(), predicate, lhs.getValue(), rhs.getValue());
-  return ValueHandle(op.getResult());
-}
-
-static ValueHandle createFComparisonExpr(CmpFPredicate predicate,
-                                         ValueHandle lhs, ValueHandle rhs) {
-  auto lhsType = lhs.getType();
-  auto rhsType = rhs.getType();
-  (void)lhsType;
-  (void)rhsType;
-  assert(lhsType == rhsType && "cannot mix types in operators");
-  assert(lhsType.isa<FloatType>() && "only float comparisons are supported");
-
-  auto op = ScopedContext::getBuilder().create<CmpFOp>(
-      ScopedContext::getLocation(), predicate, lhs.getValue(), rhs.getValue());
-  return ValueHandle(op.getResult());
-}
-
-// All floating point comparison are ordered through EDSL
-ValueHandle mlir::edsc::op::operator==(ValueHandle lhs, ValueHandle rhs) {
-  auto type = lhs.getType();
-  return type.isa<FloatType>()
-             ? createFComparisonExpr(CmpFPredicate::OEQ, lhs, rhs)
-             : createIComparisonExpr(CmpIPredicate::eq, lhs, rhs);
-}
-ValueHandle mlir::edsc::op::operator!=(ValueHandle lhs, ValueHandle rhs) {
-  auto type = lhs.getType();
-  return type.isa<FloatType>()
-             ? createFComparisonExpr(CmpFPredicate::ONE, lhs, rhs)
-             : createIComparisonExpr(CmpIPredicate::ne, lhs, rhs);
-}
-ValueHandle mlir::edsc::op::operator<(ValueHandle lhs, ValueHandle rhs) {
-  auto type = lhs.getType();
-  return type.isa<FloatType>()
-             ? createFComparisonExpr(CmpFPredicate::OLT, lhs, rhs)
-             :
-             // TODO(ntv,zinenko): signed by default, how about unsigned?
-             createIComparisonExpr(CmpIPredicate::slt, lhs, rhs);
-}
-ValueHandle mlir::edsc::op::operator<=(ValueHandle lhs, ValueHandle rhs) {
-  auto type = lhs.getType();
-  return type.isa<FloatType>()
-             ? createFComparisonExpr(CmpFPredicate::OLE, lhs, rhs)
-             : createIComparisonExpr(CmpIPredicate::sle, lhs, rhs);
-}
-ValueHandle mlir::edsc::op::operator>(ValueHandle lhs, ValueHandle rhs) {
-  auto type = lhs.getType();
-  return type.isa<FloatType>()
-             ? createFComparisonExpr(CmpFPredicate::OGT, lhs, rhs)
-             : createIComparisonExpr(CmpIPredicate::sgt, lhs, rhs);
-}
-ValueHandle mlir::edsc::op::operator>=(ValueHandle lhs, ValueHandle rhs) {
-  auto type = lhs.getType();
-  return type.isa<FloatType>()
-             ? createFComparisonExpr(CmpFPredicate::OGE, lhs, rhs)
-             : createIComparisonExpr(CmpIPredicate::sge, lhs, rhs);
-}
diff --git a/third_party/mlir/lib/EDSC/CMakeLists.txt b/third_party/mlir/lib/EDSC/CMakeLists.txt
deleted file mode 100644
index bb7e864bce9..00000000000
--- a/third_party/mlir/lib/EDSC/CMakeLists.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
-  Builders.cpp
-  CoreAPIs.cpp
-  Helpers.cpp
-  Intrinsics.cpp
-  )
-
-add_llvm_library(MLIREDSC
-  Builders.cpp
-  Helpers.cpp
-  Intrinsics.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/EDSC
-  )
-target_link_libraries(MLIREDSC
-  PUBLIC
-    MLIRAffineOps
-    MLIRLoopOps
-    MLIRStandardOps
-    MLIRTransformUtils
-    MLIRVectorOps
-    )
-
-add_llvm_library(MLIREDSCInterface
-  CoreAPIs.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/EDSC
-  )
-add_dependencies(MLIREDSCInterface MLIRIR)
-target_link_libraries(MLIREDSC
-  PUBLIC
-    MLIRIR
-    MLIRSupport
-    MLIRParser
-    )
diff --git a/third_party/mlir/lib/EDSC/CoreAPIs.cpp b/third_party/mlir/lib/EDSC/CoreAPIs.cpp
deleted file mode 100644
index ab935742b8d..00000000000
--- a/third_party/mlir/lib/EDSC/CoreAPIs.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===- Types.cpp - Implementations of MLIR Core C APIs --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir-c/Core.h"
-
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Support/LLVM.h"
-
-#include "mlir/Parser.h"
-
-#include "llvm/ADT/StringSwitch.h"
-
-using namespace mlir;
-
-mlir_type_t makeMemRefType(mlir_context_t context, mlir_type_t elemType,
-                           int64_list_t sizes) {
-  auto t = mlir::MemRefType::get(
-      llvm::ArrayRef<int64_t>(sizes.values, sizes.n),
-      mlir::Type::getFromOpaquePointer(elemType),
-      {mlir::AffineMap::getMultiDimIdentityMap(
-          sizes.n, reinterpret_cast<mlir::MLIRContext *>(context))},
-      0);
-  return mlir_type_t{t.getAsOpaquePointer()};
-}
-
-mlir_type_t makeFunctionType(mlir_context_t context, mlir_type_list_t inputs,
-                             mlir_type_list_t outputs) {
-  llvm::SmallVector<mlir::Type, 8> ins(inputs.n), outs(outputs.n);
-  for (unsigned i = 0; i < inputs.n; ++i) {
-    ins[i] = mlir::Type::getFromOpaquePointer(inputs.types[i]);
-  }
-  for (unsigned i = 0; i < outputs.n; ++i) {
-    outs[i] = mlir::Type::getFromOpaquePointer(outputs.types[i]);
-  }
-  auto ft = mlir::FunctionType::get(
-      ins, outs, reinterpret_cast<mlir::MLIRContext *>(context));
-  return mlir_type_t{ft.getAsOpaquePointer()};
-}
-
-mlir_type_t makeIndexType(mlir_context_t context) {
-  auto *ctx = reinterpret_cast<mlir::MLIRContext *>(context);
-  auto type = mlir::IndexType::get(ctx);
-  return mlir_type_t{type.getAsOpaquePointer()};
-}
-
-mlir_attr_t makeIntegerAttr(mlir_type_t type, int64_t value) {
-  auto ty = Type::getFromOpaquePointer(reinterpret_cast<const void *>(type));
-  auto attr = IntegerAttr::get(ty, value);
-  return mlir_attr_t{attr.getAsOpaquePointer()};
-}
-
-mlir_attr_t makeBoolAttr(mlir_context_t context, bool value) {
-  auto *ctx = reinterpret_cast<mlir::MLIRContext *>(context);
-  auto attr = BoolAttr::get(value, ctx);
-  return mlir_attr_t{attr.getAsOpaquePointer()};
-}
-
-unsigned getFunctionArity(mlir_func_t function) {
-  auto f = mlir::FuncOp::getFromOpaquePointer(function);
-  return f.getNumArguments();
-}
-
-mlir_type_t mlirParseType(const char *type, mlir_context_t context,
-                          uint64_t *charsRead) {
-  auto *ctx = reinterpret_cast<MLIRContext *>(context);
-  size_t numRead = 0;
-  Type ty = parseType(type, ctx, numRead);
-  if (charsRead)
-    *charsRead = numRead;
-  return mlir_type_t{ty.getAsOpaquePointer()};
-}
diff --git a/third_party/mlir/lib/EDSC/Helpers.cpp b/third_party/mlir/lib/EDSC/Helpers.cpp
deleted file mode 100644
index eeb28668a34..00000000000
--- a/third_party/mlir/lib/EDSC/Helpers.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- Helpers.cpp - MLIR Declarative Helper Functionality ----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/EDSC/Helpers.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-
-static SmallVector<ValueHandle, 8> getMemRefSizes(Value *memRef) {
-  MemRefType memRefType = memRef->getType().cast<MemRefType>();
-  assert(isStrided(memRefType) && "Expected strided MemRef type");
-
-  SmallVector<ValueHandle, 8> res;
-  res.reserve(memRefType.getShape().size());
-  const auto &shape = memRefType.getShape();
-  for (unsigned idx = 0, n = shape.size(); idx < n; ++idx) {
-    if (shape[idx] == -1) {
-      res.push_back(ValueHandle::create<DimOp>(memRef, idx));
-    } else {
-      res.push_back(static_cast<index_t>(shape[idx]));
-    }
-  }
-  return res;
-}
-
-mlir::edsc::MemRefView::MemRefView(Value *v) : base(v) {
-  assert(v->getType().isa<MemRefType>() && "MemRefType expected");
-
-  auto memrefSizeValues = getMemRefSizes(v);
-  for (auto &size : memrefSizeValues) {
-    lbs.push_back(static_cast<index_t>(0));
-    ubs.push_back(size);
-    steps.push_back(1);
-  }
-}
-
-mlir::edsc::VectorView::VectorView(Value *v) : base(v) {
-  auto vectorType = v->getType().cast<VectorType>();
-
-  for (auto s : vectorType.getShape()) {
-    lbs.push_back(static_cast<index_t>(0));
-    ubs.push_back(static_cast<index_t>(s));
-    steps.push_back(1);
-  }
-}
diff --git a/third_party/mlir/lib/EDSC/Intrinsics.cpp b/third_party/mlir/lib/EDSC/Intrinsics.cpp
deleted file mode 100644
index 1b19f9aa0bf..00000000000
--- a/third_party/mlir/lib/EDSC/Intrinsics.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- Intrinsics.cpp - MLIR Operations for Declarative Builders ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/EDSC/Intrinsics.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/IR/AffineExpr.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-
-OperationHandle mlir::edsc::intrinsics::br(BlockHandle bh,
-                                           ArrayRef<ValueHandle> operands) {
-  assert(bh && "Expected already captured BlockHandle");
-  for (auto &o : operands) {
-    (void)o;
-    assert(o && "Expected already captured ValueHandle");
-  }
-  SmallVector<Value *, 4> ops(operands.begin(), operands.end());
-  return OperationHandle::create<BranchOp>(bh.getBlock(), ops);
-}
-static void enforceEmptyCapturesMatchOperands(ArrayRef<ValueHandle *> captures,
-                                              ArrayRef<ValueHandle> operands) {
-  assert(captures.size() == operands.size() &&
-         "Expected same number of captures as operands");
-  for (auto it : llvm::zip(captures, operands)) {
-    (void)it;
-    assert(!std::get<0>(it)->hasValue() &&
-           "Unexpected already captured ValueHandle");
-    assert(std::get<1>(it) && "Expected already captured ValueHandle");
-    assert(std::get<0>(it)->getType() == std::get<1>(it).getType() &&
-           "Expected the same type for capture and operand");
-  }
-}
-
-OperationHandle mlir::edsc::intrinsics::br(BlockHandle *bh,
-                                           ArrayRef<ValueHandle *> captures,
-                                           ArrayRef<ValueHandle> operands) {
-  assert(!*bh && "Unexpected already captured BlockHandle");
-  enforceEmptyCapturesMatchOperands(captures, operands);
-  BlockBuilder(bh, captures)(/* no body */);
-  SmallVector<Value *, 4> ops(operands.begin(), operands.end());
-  return OperationHandle::create<BranchOp>(bh->getBlock(), ops);
-}
-
-OperationHandle
-mlir::edsc::intrinsics::cond_br(ValueHandle cond, BlockHandle trueBranch,
-                                ArrayRef<ValueHandle> trueOperands,
-                                BlockHandle falseBranch,
-                                ArrayRef<ValueHandle> falseOperands) {
-  SmallVector<Value *, 4> trueOps(trueOperands.begin(), trueOperands.end());
-  SmallVector<Value *, 4> falseOps(falseOperands.begin(), falseOperands.end());
-  return OperationHandle::create<CondBranchOp>(
-      cond, trueBranch.getBlock(), trueOps, falseBranch.getBlock(), falseOps);
-}
-
-OperationHandle mlir::edsc::intrinsics::cond_br(
-    ValueHandle cond, BlockHandle *trueBranch,
-    ArrayRef<ValueHandle *> trueCaptures, ArrayRef<ValueHandle> trueOperands,
-    BlockHandle *falseBranch, ArrayRef<ValueHandle *> falseCaptures,
-    ArrayRef<ValueHandle> falseOperands) {
-  assert(!*trueBranch && "Unexpected already captured BlockHandle");
-  assert(!*falseBranch && "Unexpected already captured BlockHandle");
-  enforceEmptyCapturesMatchOperands(trueCaptures, trueOperands);
-  enforceEmptyCapturesMatchOperands(falseCaptures, falseOperands);
-  BlockBuilder(trueBranch, trueCaptures)(/* no body */);
-  BlockBuilder(falseBranch, falseCaptures)(/* no body */);
-  SmallVector<Value *, 4> trueOps(trueOperands.begin(), trueOperands.end());
-  SmallVector<Value *, 4> falseOps(falseOperands.begin(), falseOperands.end());
-  return OperationHandle::create<CondBranchOp>(
-      cond, trueBranch->getBlock(), trueOps, falseBranch->getBlock(), falseOps);
-}
diff --git a/third_party/mlir/lib/ExecutionEngine/CMakeLists.txt b/third_party/mlir/lib/ExecutionEngine/CMakeLists.txt
deleted file mode 100644
index 1326e1a5b9a..00000000000
--- a/third_party/mlir/lib/ExecutionEngine/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-llvm_map_components_to_libnames(outlibs "nativecodegen" "IPO")
-add_llvm_library(MLIRExecutionEngine
-  ExecutionEngine.cpp
-  OptUtils.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/ExecutionEngine
-  )
-target_link_libraries(MLIRExecutionEngine
-
-  MLIRLLVMIR
-  MLIRTargetLLVMIR
-  LLVMBitReader
-  LLVMBitWriter
-  LLVMExecutionEngine
-  LLVMOrcJIT
-  LLVMSupport
-  LLVMTransformUtils
-
-  ${outlibs})
diff --git a/third_party/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/third_party/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
deleted file mode 100644
index 2913c436ad5..00000000000
--- a/third_party/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
+++ /dev/null
@@ -1,320 +0,0 @@
-//===- ExecutionEngine.cpp - MLIR Execution engine and utils --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the execution engine for MLIR modules based on LLVM Orc
-// JIT engine.
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/ExecutionEngine/ExecutionEngine.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Support/FileUtilities.h"
-#include "mlir/Target/LLVMIR.h"
-
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/ExecutionEngine/ObjectCache.h"
-#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
-#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
-#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
-#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-#define DEBUG_TYPE "execution-engine"
-
-using namespace mlir;
-using llvm::dbgs;
-using llvm::Error;
-using llvm::errs;
-using llvm::Expected;
-using llvm::LLVMContext;
-using llvm::MemoryBuffer;
-using llvm::MemoryBufferRef;
-using llvm::Module;
-using llvm::SectionMemoryManager;
-using llvm::StringError;
-using llvm::Triple;
-using llvm::orc::DynamicLibrarySearchGenerator;
-using llvm::orc::ExecutionSession;
-using llvm::orc::IRCompileLayer;
-using llvm::orc::JITTargetMachineBuilder;
-using llvm::orc::RTDyldObjectLinkingLayer;
-using llvm::orc::ThreadSafeModule;
-using llvm::orc::TMOwningSimpleCompiler;
-
-// Wrap a string into an llvm::StringError.
-static inline Error make_string_error(const llvm::Twine &message) {
-  return llvm::make_error<StringError>(message.str(),
-                                       llvm::inconvertibleErrorCode());
-}
-
-namespace mlir {
-
-void SimpleObjectCache::notifyObjectCompiled(const Module *M,
-                                             MemoryBufferRef ObjBuffer) {
-  cachedObjects[M->getModuleIdentifier()] = MemoryBuffer::getMemBufferCopy(
-      ObjBuffer.getBuffer(), ObjBuffer.getBufferIdentifier());
-}
-
-std::unique_ptr<MemoryBuffer> SimpleObjectCache::getObject(const Module *M) {
-  auto I = cachedObjects.find(M->getModuleIdentifier());
-  if (I == cachedObjects.end()) {
-    LLVM_DEBUG(dbgs() << "No object for " << M->getModuleIdentifier()
-                      << " in cache. Compiling.\n");
-    return nullptr;
-  }
-  LLVM_DEBUG(dbgs() << "Object for " << M->getModuleIdentifier()
-                    << " loaded from cache.\n");
-  return MemoryBuffer::getMemBuffer(I->second->getMemBufferRef());
-}
-
-void SimpleObjectCache::dumpToObjectFile(llvm::StringRef outputFilename) {
-  // Set up the output file.
-  std::string errorMessage;
-  auto file = openOutputFile(outputFilename, &errorMessage);
-  if (!file) {
-    llvm::errs() << errorMessage << "\n";
-    return;
-  }
-
-  // Dump the object generated for a single module to the output file.
-  assert(cachedObjects.size() == 1 && "Expected only one object entry.");
-  auto &cachedObject = cachedObjects.begin()->second;
-  file->os() << cachedObject->getBuffer();
-  file->keep();
-}
-
-void ExecutionEngine::dumpToObjectFile(llvm::StringRef filename) {
-  cache->dumpToObjectFile(filename);
-}
-
-// Setup LLVM target triple from the current machine.
-bool ExecutionEngine::setupTargetTriple(Module *llvmModule) {
-  // Setup the machine properties from the current architecture.
-  auto targetTriple = llvm::sys::getDefaultTargetTriple();
-  std::string errorMessage;
-  auto target = llvm::TargetRegistry::lookupTarget(targetTriple, errorMessage);
-  if (!target) {
-    errs() << "NO target: " << errorMessage << "\n";
-    return true;
-  }
-  std::unique_ptr<llvm::TargetMachine> machine(
-      target->createTargetMachine(targetTriple, "generic", "", {}, {}));
-  llvmModule->setDataLayout(machine->createDataLayout());
-  llvmModule->setTargetTriple(targetTriple);
-  return false;
-}
-
-static std::string makePackedFunctionName(StringRef name) {
-  return "_mlir_" + name.str();
-}
-
-// For each function in the LLVM module, define an interface function that wraps
-// all the arguments of the original function and all its results into an i8**
-// pointer to provide a unified invocation interface.
-void packFunctionArguments(Module *module) {
-  auto &ctx = module->getContext();
-  llvm::IRBuilder<> builder(ctx);
-  llvm::DenseSet<llvm::Function *> interfaceFunctions;
-  for (auto &func : module->getFunctionList()) {
-    if (func.isDeclaration()) {
-      continue;
-    }
-    if (interfaceFunctions.count(&func)) {
-      continue;
-    }
-
-    // Given a function `foo(<...>)`, define the interface function
-    // `mlir_foo(i8**)`.
-    auto newType = llvm::FunctionType::get(
-        builder.getVoidTy(), builder.getInt8PtrTy()->getPointerTo(),
-        /*isVarArg=*/false);
-    auto newName = makePackedFunctionName(func.getName());
-    auto funcCst = module->getOrInsertFunction(newName, newType);
-    llvm::Function *interfaceFunc =
-        llvm::cast<llvm::Function>(funcCst.getCallee());
-    interfaceFunctions.insert(interfaceFunc);
-
-    // Extract the arguments from the type-erased argument list and cast them to
-    // the proper types.
-    auto bb = llvm::BasicBlock::Create(ctx);
-    bb->insertInto(interfaceFunc);
-    builder.SetInsertPoint(bb);
-    llvm::Value *argList = interfaceFunc->arg_begin();
-    llvm::SmallVector<llvm::Value *, 8> args;
-    args.reserve(llvm::size(func.args()));
-    for (auto &indexedArg : llvm::enumerate(func.args())) {
-      llvm::Value *argIndex = llvm::Constant::getIntegerValue(
-          builder.getInt64Ty(), llvm::APInt(64, indexedArg.index()));
-      llvm::Value *argPtrPtr = builder.CreateGEP(argList, argIndex);
-      llvm::Value *argPtr = builder.CreateLoad(argPtrPtr);
-      argPtr = builder.CreateBitCast(
-          argPtr, indexedArg.value().getType()->getPointerTo());
-      llvm::Value *arg = builder.CreateLoad(argPtr);
-      args.push_back(arg);
-    }
-
-    // Call the implementation function with the extracted arguments.
-    llvm::Value *result = builder.CreateCall(&func, args);
-
-    // Assuming the result is one value, potentially of type `void`.
-    if (!result->getType()->isVoidTy()) {
-      llvm::Value *retIndex = llvm::Constant::getIntegerValue(
-          builder.getInt64Ty(), llvm::APInt(64, llvm::size(func.args())));
-      llvm::Value *retPtrPtr = builder.CreateGEP(argList, retIndex);
-      llvm::Value *retPtr = builder.CreateLoad(retPtrPtr);
-      retPtr = builder.CreateBitCast(retPtr, result->getType()->getPointerTo());
-      builder.CreateStore(result, retPtr);
-    }
-
-    // The interface function returns void.
-    builder.CreateRetVoid();
-  }
-}
-
-ExecutionEngine::ExecutionEngine(bool enableObjectCache)
-    : cache(enableObjectCache ? nullptr : new SimpleObjectCache()) {}
-
-Expected<std::unique_ptr<ExecutionEngine>> ExecutionEngine::create(
-    ModuleOp m, std::function<Error(llvm::Module *)> transformer,
-    Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel,
-    ArrayRef<StringRef> sharedLibPaths, bool enableObjectCache) {
-  auto engine = std::make_unique<ExecutionEngine>(enableObjectCache);
-
-  std::unique_ptr<llvm::LLVMContext> ctx(new llvm::LLVMContext);
-  auto llvmModule = translateModuleToLLVMIR(m);
-  if (!llvmModule)
-    return make_string_error("could not convert to LLVM IR");
-  // FIXME: the triple should be passed to the translation or dialect conversion
-  // instead of this.  Currently, the LLVM module created above has no triple
-  // associated with it.
-  setupTargetTriple(llvmModule.get());
-  packFunctionArguments(llvmModule.get());
-
-  // Clone module in a new LLVMContext since translateModuleToLLVMIR buries
-  // ownership too deeply.
-  // TODO(zinenko): Reevaluate model of ownership of LLVMContext in LLVMDialect.
-  SmallVector<char, 1> buffer;
-  {
-    llvm::raw_svector_ostream os(buffer);
-    WriteBitcodeToFile(*llvmModule, os);
-  }
-  llvm::MemoryBufferRef bufferRef(llvm::StringRef(buffer.data(), buffer.size()),
-                                  "cloned module buffer");
-  auto expectedModule = parseBitcodeFile(bufferRef, *ctx);
-  if (!expectedModule)
-    return expectedModule.takeError();
-  std::unique_ptr<Module> deserModule = std::move(*expectedModule);
-
-  // Callback to create the object layer with symbol resolution to current
-  // process and dynamically linked libraries.
-  auto objectLinkingLayerCreator = [&](ExecutionSession &session,
-                                       const Triple &TT) {
-    auto objectLayer = std::make_unique<RTDyldObjectLinkingLayer>(
-        session, []() { return std::make_unique<SectionMemoryManager>(); });
-    auto dataLayout = deserModule->getDataLayout();
-    llvm::orc::JITDylib *mainJD = session.getJITDylibByName("<main>");
-    if (!mainJD)
-      mainJD = &session.createJITDylib("<main>");
-
-    // Resolve symbols that are statically linked in the current process.
-    mainJD->addGenerator(
-        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
-            dataLayout.getGlobalPrefix())));
-
-    // Resolve symbols from shared libraries.
-    for (auto libPath : sharedLibPaths) {
-      auto mb = llvm::MemoryBuffer::getFile(libPath);
-      if (!mb) {
-        errs() << "Fail to create MemoryBuffer for: " << libPath << "\n";
-        continue;
-      }
-      auto &JD = session.createJITDylib(libPath);
-      auto loaded = DynamicLibrarySearchGenerator::Load(
-          libPath.data(), dataLayout.getGlobalPrefix());
-      if (!loaded) {
-        errs() << "Could not load " << libPath << ":\n  " << loaded.takeError()
-               << "\n";
-        continue;
-      }
-      JD.addGenerator(std::move(*loaded));
-      cantFail(objectLayer->add(JD, std::move(mb.get())));
-    }
-
-    return objectLayer;
-  };
-
-  // Callback to inspect the cache and recompile on demand. This follows Lang's
-  // LLJITWithObjectCache example.
-  auto compileFunctionCreator = [&](JITTargetMachineBuilder JTMB)
-      -> Expected<IRCompileLayer::CompileFunction> {
-    if (jitCodeGenOptLevel)
-      JTMB.setCodeGenOptLevel(jitCodeGenOptLevel.getValue());
-    auto TM = JTMB.createTargetMachine();
-    if (!TM)
-      return TM.takeError();
-    return IRCompileLayer::CompileFunction(
-        TMOwningSimpleCompiler(std::move(*TM), engine->cache.get()));
-  };
-
-  // Create the LLJIT by calling the LLJITBuilder with 2 callbacks.
-  auto jit =
-      cantFail(llvm::orc::LLJITBuilder()
-                   .setCompileFunctionCreator(compileFunctionCreator)
-                   .setObjectLinkingLayerCreator(objectLinkingLayerCreator)
-                   .create());
-
-  // Add a ThreadSafemodule to the engine and return.
-  ThreadSafeModule tsm(std::move(deserModule), std::move(ctx));
-  if (transformer)
-    cantFail(tsm.withModuleDo(
-        [&](llvm::Module &module) { return transformer(&module); }));
-  cantFail(jit->addIRModule(std::move(tsm)));
-  engine->jit = std::move(jit);
-
-  return std::move(engine);
-}
-
-Expected<void (*)(void **)> ExecutionEngine::lookup(StringRef name) const {
-  auto expectedSymbol = jit->lookup(makePackedFunctionName(name));
-  if (!expectedSymbol)
-    return expectedSymbol.takeError();
-  auto rawFPtr = expectedSymbol->getAddress();
-  auto fptr = reinterpret_cast<void (*)(void **)>(rawFPtr);
-  if (!fptr)
-    return make_string_error("looked up function is null");
-  return fptr;
-}
-
-Error ExecutionEngine::invoke(StringRef name, MutableArrayRef<void *> args) {
-  auto expectedFPtr = lookup(name);
-  if (!expectedFPtr)
-    return expectedFPtr.takeError();
-  auto fptr = *expectedFPtr;
-
-  (*fptr)(args.data());
-
-  return Error::success();
-}
-} // end namespace mlir
diff --git a/third_party/mlir/lib/ExecutionEngine/OptUtils.cpp b/third_party/mlir/lib/ExecutionEngine/OptUtils.cpp
deleted file mode 100644
index dc3bd20794e..00000000000
--- a/third_party/mlir/lib/ExecutionEngine/OptUtils.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-//===- OptUtils.cpp - MLIR Execution Engine optimization pass utilities ---===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the utility functions to trigger LLVM optimizations from
-// MLIR Execution Engine.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/ExecutionEngine/OptUtils.h"
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/LegacyPassNameParser.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/StringSaver.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include <climits>
-#include <mutex>
-
-// Run the module and function passes managed by the module manager.
-static void runPasses(llvm::legacy::PassManager &modulePM,
-                      llvm::legacy::FunctionPassManager &funcPM,
-                      llvm::Module &m) {
-  funcPM.doInitialization();
-  for (auto &func : m) {
-    funcPM.run(func);
-  }
-  funcPM.doFinalization();
-  modulePM.run(m);
-}
-
-// Initialize basic LLVM transformation passes under lock.
-void mlir::initializeLLVMPasses() {
-  static std::mutex mutex;
-  std::lock_guard<std::mutex> lock(mutex);
-
-  auto &registry = *llvm::PassRegistry::getPassRegistry();
-  llvm::initializeCore(registry);
-  llvm::initializeTransformUtils(registry);
-  llvm::initializeScalarOpts(registry);
-  llvm::initializeIPO(registry);
-  llvm::initializeInstCombine(registry);
-  llvm::initializeAggressiveInstCombine(registry);
-  llvm::initializeAnalysis(registry);
-  llvm::initializeVectorization(registry);
-}
-
-// Populate pass managers according to the optimization and size levels.
-// This behaves similarly to LLVM opt.
-static void populatePassManagers(llvm::legacy::PassManager &modulePM,
-                                 llvm::legacy::FunctionPassManager &funcPM,
-                                 unsigned optLevel, unsigned sizeLevel,
-                                 llvm::TargetMachine *targetMachine) {
-  llvm::PassManagerBuilder builder;
-  builder.OptLevel = optLevel;
-  builder.SizeLevel = sizeLevel;
-  builder.Inliner = llvm::createFunctionInliningPass(
-      optLevel, sizeLevel, /*DisableInlineHotCallSite=*/false);
-  builder.LoopVectorize = optLevel > 1 && sizeLevel < 2;
-  builder.SLPVectorize = optLevel > 1 && sizeLevel < 2;
-  builder.DisableUnrollLoops = (optLevel == 0);
-
-  if (targetMachine) {
-    // Add pass to initialize TTI for this specific target. Otherwise, TTI will
-    // be initialized to NoTTIImpl by default.
-    modulePM.add(createTargetTransformInfoWrapperPass(
-        targetMachine->getTargetIRAnalysis()));
-    funcPM.add(createTargetTransformInfoWrapperPass(
-        targetMachine->getTargetIRAnalysis()));
-  }
-
-  builder.populateModulePassManager(modulePM);
-  builder.populateFunctionPassManager(funcPM);
-}
-
-// Create and return a lambda that uses LLVM pass manager builder to set up
-// optimizations based on the given level.
-std::function<llvm::Error(llvm::Module *)>
-mlir::makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel,
-                                llvm::TargetMachine *targetMachine) {
-  return [optLevel, sizeLevel, targetMachine](llvm::Module *m) -> llvm::Error {
-    llvm::legacy::PassManager modulePM;
-    llvm::legacy::FunctionPassManager funcPM(m);
-    populatePassManagers(modulePM, funcPM, optLevel, sizeLevel, targetMachine);
-    runPasses(modulePM, funcPM, *m);
-
-    return llvm::Error::success();
-  };
-}
-
-// Create and return a lambda that is given a set of passes to run, plus an
-// optional optimization level to pre-populate the pass manager.
-std::function<llvm::Error(llvm::Module *)> mlir::makeLLVMPassesTransformer(
-    llvm::ArrayRef<const llvm::PassInfo *> llvmPasses,
-    llvm::Optional<unsigned> mbOptLevel, llvm::TargetMachine *targetMachine,
-    unsigned optPassesInsertPos) {
-  return [llvmPasses, mbOptLevel, optPassesInsertPos,
-          targetMachine](llvm::Module *m) -> llvm::Error {
-    llvm::legacy::PassManager modulePM;
-    llvm::legacy::FunctionPassManager funcPM(m);
-
-    bool insertOptPasses = mbOptLevel.hasValue();
-    for (unsigned i = 0, e = llvmPasses.size(); i < e; ++i) {
-      const auto *passInfo = llvmPasses[i];
-      if (!passInfo->getNormalCtor())
-        continue;
-
-      if (insertOptPasses && optPassesInsertPos == i) {
-        populatePassManagers(modulePM, funcPM, mbOptLevel.getValue(), 0,
-                             targetMachine);
-        insertOptPasses = false;
-      }
-
-      auto *pass = passInfo->createPass();
-      if (!pass)
-        return llvm::make_error<llvm::StringError>(
-            "could not create pass " + passInfo->getPassName(),
-            llvm::inconvertibleErrorCode());
-      modulePM.add(pass);
-    }
-
-    if (insertOptPasses)
-      populatePassManagers(modulePM, funcPM, mbOptLevel.getValue(), 0,
-                           targetMachine);
-
-    runPasses(modulePM, funcPM, *m);
-    return llvm::Error::success();
-  };
-}
diff --git a/third_party/mlir/lib/IR/AffineExpr.cpp b/third_party/mlir/lib/IR/AffineExpr.cpp
deleted file mode 100644
index 19599a8a62e..00000000000
--- a/third_party/mlir/lib/IR/AffineExpr.cpp
+++ /dev/null
@@ -1,927 +0,0 @@
-//===- AffineExpr.cpp - MLIR Affine Expr Classes --------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/AffineExpr.h"
-#include "AffineExprDetail.h"
-#include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/Support/MathExtras.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/STLExtras.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-MLIRContext *AffineExpr::getContext() const { return expr->context; }
-
-AffineExprKind AffineExpr::getKind() const {
-  return static_cast<AffineExprKind>(expr->getKind());
-}
-
-/// Walk all of the AffineExprs in this subgraph in postorder.
-void AffineExpr::walk(std::function<void(AffineExpr)> callback) const {
-  struct AffineExprWalker : public AffineExprVisitor<AffineExprWalker> {
-    std::function<void(AffineExpr)> callback;
-
-    AffineExprWalker(std::function<void(AffineExpr)> callback)
-        : callback(callback) {}
-
-    void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) { callback(expr); }
-    void visitConstantExpr(AffineConstantExpr expr) { callback(expr); }
-    void visitDimExpr(AffineDimExpr expr) { callback(expr); }
-    void visitSymbolExpr(AffineSymbolExpr expr) { callback(expr); }
-  };
-
-  AffineExprWalker(callback).walkPostOrder(*this);
-}
-
-// Dispatch affine expression construction based on kind.
-AffineExpr mlir::getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs,
-                                       AffineExpr rhs) {
-  if (kind == AffineExprKind::Add)
-    return lhs + rhs;
-  if (kind == AffineExprKind::Mul)
-    return lhs * rhs;
-  if (kind == AffineExprKind::FloorDiv)
-    return lhs.floorDiv(rhs);
-  if (kind == AffineExprKind::CeilDiv)
-    return lhs.ceilDiv(rhs);
-  if (kind == AffineExprKind::Mod)
-    return lhs % rhs;
-
-  llvm_unreachable("unknown binary operation on affine expressions");
-}
-
-/// This method substitutes any uses of dimensions and symbols (e.g.
-/// dim#0 with dimReplacements[0]) and returns the modified expression tree.
-AffineExpr
-AffineExpr::replaceDimsAndSymbols(ArrayRef<AffineExpr> dimReplacements,
-                                  ArrayRef<AffineExpr> symReplacements) const {
-  switch (getKind()) {
-  case AffineExprKind::Constant:
-    return *this;
-  case AffineExprKind::DimId: {
-    unsigned dimId = cast<AffineDimExpr>().getPosition();
-    if (dimId >= dimReplacements.size())
-      return *this;
-    return dimReplacements[dimId];
-  }
-  case AffineExprKind::SymbolId: {
-    unsigned symId = cast<AffineSymbolExpr>().getPosition();
-    if (symId >= symReplacements.size())
-      return *this;
-    return symReplacements[symId];
-  }
-  case AffineExprKind::Add:
-  case AffineExprKind::Mul:
-  case AffineExprKind::FloorDiv:
-  case AffineExprKind::CeilDiv:
-  case AffineExprKind::Mod:
-    auto binOp = cast<AffineBinaryOpExpr>();
-    auto lhs = binOp.getLHS(), rhs = binOp.getRHS();
-    auto newLHS = lhs.replaceDimsAndSymbols(dimReplacements, symReplacements);
-    auto newRHS = rhs.replaceDimsAndSymbols(dimReplacements, symReplacements);
-    if (newLHS == lhs && newRHS == rhs)
-      return *this;
-    return getAffineBinaryOpExpr(getKind(), newLHS, newRHS);
-  }
-  llvm_unreachable("Unknown AffineExpr");
-}
-
-/// Returns true if this expression is made out of only symbols and
-/// constants (no dimensional identifiers).
-bool AffineExpr::isSymbolicOrConstant() const {
-  switch (getKind()) {
-  case AffineExprKind::Constant:
-    return true;
-  case AffineExprKind::DimId:
-    return false;
-  case AffineExprKind::SymbolId:
-    return true;
-
-  case AffineExprKind::Add:
-  case AffineExprKind::Mul:
-  case AffineExprKind::FloorDiv:
-  case AffineExprKind::CeilDiv:
-  case AffineExprKind::Mod: {
-    auto expr = this->cast<AffineBinaryOpExpr>();
-    return expr.getLHS().isSymbolicOrConstant() &&
-           expr.getRHS().isSymbolicOrConstant();
-  }
-  }
-  llvm_unreachable("Unknown AffineExpr");
-}
-
-/// Returns true if this is a pure affine expression, i.e., multiplication,
-/// floordiv, ceildiv, and mod is only allowed w.r.t constants.
-bool AffineExpr::isPureAffine() const {
-  switch (getKind()) {
-  case AffineExprKind::SymbolId:
-  case AffineExprKind::DimId:
-  case AffineExprKind::Constant:
-    return true;
-  case AffineExprKind::Add: {
-    auto op = cast<AffineBinaryOpExpr>();
-    return op.getLHS().isPureAffine() && op.getRHS().isPureAffine();
-  }
-
-  case AffineExprKind::Mul: {
-    // TODO: Canonicalize the constants in binary operators to the RHS when
-    // possible, allowing this to merge into the next case.
-    auto op = cast<AffineBinaryOpExpr>();
-    return op.getLHS().isPureAffine() && op.getRHS().isPureAffine() &&
-           (op.getLHS().template isa<AffineConstantExpr>() ||
-            op.getRHS().template isa<AffineConstantExpr>());
-  }
-  case AffineExprKind::FloorDiv:
-  case AffineExprKind::CeilDiv:
-  case AffineExprKind::Mod: {
-    auto op = cast<AffineBinaryOpExpr>();
-    return op.getLHS().isPureAffine() &&
-           op.getRHS().template isa<AffineConstantExpr>();
-  }
-  }
-  llvm_unreachable("Unknown AffineExpr");
-}
-
-// Returns the greatest known integral divisor of this affine expression.
-int64_t AffineExpr::getLargestKnownDivisor() const {
-  AffineBinaryOpExpr binExpr(nullptr);
-  switch (getKind()) {
-  case AffineExprKind::SymbolId:
-    LLVM_FALLTHROUGH;
-  case AffineExprKind::DimId:
-    return 1;
-  case AffineExprKind::Constant:
-    return std::abs(this->cast<AffineConstantExpr>().getValue());
-  case AffineExprKind::Mul: {
-    binExpr = this->cast<AffineBinaryOpExpr>();
-    return binExpr.getLHS().getLargestKnownDivisor() *
-           binExpr.getRHS().getLargestKnownDivisor();
-  }
-  case AffineExprKind::Add:
-    LLVM_FALLTHROUGH;
-  case AffineExprKind::FloorDiv:
-  case AffineExprKind::CeilDiv:
-  case AffineExprKind::Mod: {
-    binExpr = cast<AffineBinaryOpExpr>();
-    return llvm::GreatestCommonDivisor64(
-        binExpr.getLHS().getLargestKnownDivisor(),
-        binExpr.getRHS().getLargestKnownDivisor());
-  }
-  }
-  llvm_unreachable("Unknown AffineExpr");
-}
-
-bool AffineExpr::isMultipleOf(int64_t factor) const {
-  AffineBinaryOpExpr binExpr(nullptr);
-  uint64_t l, u;
-  switch (getKind()) {
-  case AffineExprKind::SymbolId:
-    LLVM_FALLTHROUGH;
-  case AffineExprKind::DimId:
-    return factor * factor == 1;
-  case AffineExprKind::Constant:
-    return cast<AffineConstantExpr>().getValue() % factor == 0;
-  case AffineExprKind::Mul: {
-    binExpr = cast<AffineBinaryOpExpr>();
-    // It's probably not worth optimizing this further (to not traverse the
-    // whole sub-tree under - it that would require a version of isMultipleOf
-    // that on a 'false' return also returns the largest known divisor).
-    return (l = binExpr.getLHS().getLargestKnownDivisor()) % factor == 0 ||
-           (u = binExpr.getRHS().getLargestKnownDivisor()) % factor == 0 ||
-           (l * u) % factor == 0;
-  }
-  case AffineExprKind::Add:
-  case AffineExprKind::FloorDiv:
-  case AffineExprKind::CeilDiv:
-  case AffineExprKind::Mod: {
-    binExpr = cast<AffineBinaryOpExpr>();
-    return llvm::GreatestCommonDivisor64(
-               binExpr.getLHS().getLargestKnownDivisor(),
-               binExpr.getRHS().getLargestKnownDivisor()) %
-               factor ==
-           0;
-  }
-  }
-  llvm_unreachable("Unknown AffineExpr");
-}
-
-bool AffineExpr::isFunctionOfDim(unsigned position) const {
-  if (getKind() == AffineExprKind::DimId) {
-    return *this == mlir::getAffineDimExpr(position, getContext());
-  }
-  if (auto expr = this->dyn_cast<AffineBinaryOpExpr>()) {
-    return expr.getLHS().isFunctionOfDim(position) ||
-           expr.getRHS().isFunctionOfDim(position);
-  }
-  return false;
-}
-
-AffineBinaryOpExpr::AffineBinaryOpExpr(AffineExpr::ImplType *ptr)
-    : AffineExpr(ptr) {}
-AffineExpr AffineBinaryOpExpr::getLHS() const {
-  return static_cast<ImplType *>(expr)->lhs;
-}
-AffineExpr AffineBinaryOpExpr::getRHS() const {
-  return static_cast<ImplType *>(expr)->rhs;
-}
-
-AffineDimExpr::AffineDimExpr(AffineExpr::ImplType *ptr) : AffineExpr(ptr) {}
-unsigned AffineDimExpr::getPosition() const {
-  return static_cast<ImplType *>(expr)->position;
-}
-
-static AffineExpr getAffineDimOrSymbol(AffineExprKind kind, unsigned position,
-                                       MLIRContext *context) {
-  auto assignCtx = [context](AffineDimExprStorage *storage) {
-    storage->context = context;
-  };
-
-  StorageUniquer &uniquer = context->getAffineUniquer();
-  return uniquer.get<AffineDimExprStorage>(
-      assignCtx, static_cast<unsigned>(kind), position);
-}
-
-AffineExpr mlir::getAffineDimExpr(unsigned position, MLIRContext *context) {
-  return getAffineDimOrSymbol(AffineExprKind::DimId, position, context);
-}
-
-AffineSymbolExpr::AffineSymbolExpr(AffineExpr::ImplType *ptr)
-    : AffineExpr(ptr) {}
-unsigned AffineSymbolExpr::getPosition() const {
-  return static_cast<ImplType *>(expr)->position;
-}
-
-AffineExpr mlir::getAffineSymbolExpr(unsigned position, MLIRContext *context) {
-  return getAffineDimOrSymbol(AffineExprKind::SymbolId, position, context);
-  ;
-}
-
-AffineConstantExpr::AffineConstantExpr(AffineExpr::ImplType *ptr)
-    : AffineExpr(ptr) {}
-int64_t AffineConstantExpr::getValue() const {
-  return static_cast<ImplType *>(expr)->constant;
-}
-
-bool AffineExpr::operator==(int64_t v) const {
-  return *this == getAffineConstantExpr(v, getContext());
-}
-
-AffineExpr mlir::getAffineConstantExpr(int64_t constant, MLIRContext *context) {
-  auto assignCtx = [context](AffineConstantExprStorage *storage) {
-    storage->context = context;
-  };
-
-  StorageUniquer &uniquer = context->getAffineUniquer();
-  return uniquer.get<AffineConstantExprStorage>(
-      assignCtx, static_cast<unsigned>(AffineExprKind::Constant), constant);
-}
-
-/// Simplify add expression. Return nullptr if it can't be simplified.
-static AffineExpr simplifyAdd(AffineExpr lhs, AffineExpr rhs) {
-  auto lhsConst = lhs.dyn_cast<AffineConstantExpr>();
-  auto rhsConst = rhs.dyn_cast<AffineConstantExpr>();
-  // Fold if both LHS, RHS are a constant.
-  if (lhsConst && rhsConst)
-    return getAffineConstantExpr(lhsConst.getValue() + rhsConst.getValue(),
-                                 lhs.getContext());
-
-  // Canonicalize so that only the RHS is a constant. (4 + d0 becomes d0 + 4).
-  // If only one of them is a symbolic expressions, make it the RHS.
-  if (lhs.isa<AffineConstantExpr>() ||
-      (lhs.isSymbolicOrConstant() && !rhs.isSymbolicOrConstant())) {
-    return rhs + lhs;
-  }
-
-  // At this point, if there was a constant, it would be on the right.
-
-  // Addition with a zero is a noop, return the other input.
-  if (rhsConst) {
-    if (rhsConst.getValue() == 0)
-      return lhs;
-  }
-  // Fold successive additions like (d0 + 2) + 3 into d0 + 5.
-  auto lBin = lhs.dyn_cast<AffineBinaryOpExpr>();
-  if (lBin && rhsConst && lBin.getKind() == AffineExprKind::Add) {
-    if (auto lrhs = lBin.getRHS().dyn_cast<AffineConstantExpr>())
-      return lBin.getLHS() + (lrhs.getValue() + rhsConst.getValue());
-  }
-
-  // When doing successive additions, bring constant to the right: turn (d0 + 2)
-  // + d1 into (d0 + d1) + 2.
-  if (lBin && lBin.getKind() == AffineExprKind::Add) {
-    if (auto lrhs = lBin.getRHS().dyn_cast<AffineConstantExpr>()) {
-      return lBin.getLHS() + rhs + lrhs;
-    }
-  }
-
-  // Detect and transform "expr - c * (expr floordiv c)" to "expr mod c". This
-  // leads to a much more efficient form when 'c' is a power of two, and in
-  // general a more compact and readable form.
-
-  // Process '(expr floordiv c) * (-c)'.
-  AffineBinaryOpExpr rBinOpExpr = rhs.dyn_cast<AffineBinaryOpExpr>();
-  if (!rBinOpExpr)
-    return nullptr;
-
-  auto lrhs = rBinOpExpr.getLHS();
-  auto rrhs = rBinOpExpr.getRHS();
-
-  // Process lrhs, which is 'expr floordiv c'.
-  AffineBinaryOpExpr lrBinOpExpr = lrhs.dyn_cast<AffineBinaryOpExpr>();
-  if (!lrBinOpExpr || lrBinOpExpr.getKind() != AffineExprKind::FloorDiv)
-    return nullptr;
-
-  auto llrhs = lrBinOpExpr.getLHS();
-  auto rlrhs = lrBinOpExpr.getRHS();
-
-  if (lhs == llrhs && rlrhs == -rrhs) {
-    return lhs % rlrhs;
-  }
-  return nullptr;
-}
-
-AffineExpr AffineExpr::operator+(int64_t v) const {
-  return *this + getAffineConstantExpr(v, getContext());
-}
-AffineExpr AffineExpr::operator+(AffineExpr other) const {
-  if (auto simplified = simplifyAdd(*this, other))
-    return simplified;
-
-  StorageUniquer &uniquer = getContext()->getAffineUniquer();
-  return uniquer.get<AffineBinaryOpExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(AffineExprKind::Add), *this, other);
-}
-
-/// Simplify a multiply expression. Return nullptr if it can't be simplified.
-static AffineExpr simplifyMul(AffineExpr lhs, AffineExpr rhs) {
-  auto lhsConst = lhs.dyn_cast<AffineConstantExpr>();
-  auto rhsConst = rhs.dyn_cast<AffineConstantExpr>();
-
-  if (lhsConst && rhsConst)
-    return getAffineConstantExpr(lhsConst.getValue() * rhsConst.getValue(),
-                                 lhs.getContext());
-
-  assert(lhs.isSymbolicOrConstant() || rhs.isSymbolicOrConstant());
-
-  // Canonicalize the mul expression so that the constant/symbolic term is the
-  // RHS. If both the lhs and rhs are symbolic, swap them if the lhs is a
-  // constant. (Note that a constant is trivially symbolic).
-  if (!rhs.isSymbolicOrConstant() || lhs.isa<AffineConstantExpr>()) {
-    // At least one of them has to be symbolic.
-    return rhs * lhs;
-  }
-
-  // At this point, if there was a constant, it would be on the right.
-
-  // Multiplication with a one is a noop, return the other input.
-  if (rhsConst) {
-    if (rhsConst.getValue() == 1)
-      return lhs;
-    // Multiplication with zero.
-    if (rhsConst.getValue() == 0)
-      return rhsConst;
-  }
-
-  // Fold successive multiplications: eg: (d0 * 2) * 3 into d0 * 6.
-  auto lBin = lhs.dyn_cast<AffineBinaryOpExpr>();
-  if (lBin && rhsConst && lBin.getKind() == AffineExprKind::Mul) {
-    if (auto lrhs = lBin.getRHS().dyn_cast<AffineConstantExpr>())
-      return lBin.getLHS() * (lrhs.getValue() * rhsConst.getValue());
-  }
-
-  // When doing successive multiplication, bring constant to the right: turn (d0
-  // * 2) * d1 into (d0 * d1) * 2.
-  if (lBin && lBin.getKind() == AffineExprKind::Mul) {
-    if (auto lrhs = lBin.getRHS().dyn_cast<AffineConstantExpr>()) {
-      return (lBin.getLHS() * rhs) * lrhs;
-    }
-  }
-
-  return nullptr;
-}
-
-AffineExpr AffineExpr::operator*(int64_t v) const {
-  return *this * getAffineConstantExpr(v, getContext());
-}
-AffineExpr AffineExpr::operator*(AffineExpr other) const {
-  if (auto simplified = simplifyMul(*this, other))
-    return simplified;
-
-  StorageUniquer &uniquer = getContext()->getAffineUniquer();
-  return uniquer.get<AffineBinaryOpExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(AffineExprKind::Mul), *this, other);
-}
-
-// Unary minus, delegate to operator*.
-AffineExpr AffineExpr::operator-() const {
-  return *this * getAffineConstantExpr(-1, getContext());
-}
-
-// Delegate to operator+.
-AffineExpr AffineExpr::operator-(int64_t v) const { return *this + (-v); }
-AffineExpr AffineExpr::operator-(AffineExpr other) const {
-  return *this + (-other);
-}
-
-static AffineExpr simplifyFloorDiv(AffineExpr lhs, AffineExpr rhs) {
-  auto lhsConst = lhs.dyn_cast<AffineConstantExpr>();
-  auto rhsConst = rhs.dyn_cast<AffineConstantExpr>();
-
-  // mlir floordiv by zero or negative numbers is undefined and preserved as is.
-  if (!rhsConst || rhsConst.getValue() < 1)
-    return nullptr;
-
-  if (lhsConst)
-    return getAffineConstantExpr(
-        floorDiv(lhsConst.getValue(), rhsConst.getValue()), lhs.getContext());
-
-  // Fold floordiv of a multiply with a constant that is a multiple of the
-  // divisor. Eg: (i * 128) floordiv 64 = i * 2.
-  if (rhsConst == 1)
-    return lhs;
-
-  // Simplify (expr * const) floordiv divConst when expr is known to be a
-  // multiple of divConst.
-  auto lBin = lhs.dyn_cast<AffineBinaryOpExpr>();
-  if (lBin && lBin.getKind() == AffineExprKind::Mul) {
-    if (auto lrhs = lBin.getRHS().dyn_cast<AffineConstantExpr>()) {
-      // rhsConst is known to be a positive constant.
-      if (lrhs.getValue() % rhsConst.getValue() == 0)
-        return lBin.getLHS() * (lrhs.getValue() / rhsConst.getValue());
-    }
-  }
-
-  // Simplify (expr1 + expr2) floordiv divConst when either expr1 or expr2 is
-  // known to be a multiple of divConst.
-  if (lBin && lBin.getKind() == AffineExprKind::Add) {
-    int64_t llhsDiv = lBin.getLHS().getLargestKnownDivisor();
-    int64_t lrhsDiv = lBin.getRHS().getLargestKnownDivisor();
-    // rhsConst is known to be a positive constant.
-    if (llhsDiv % rhsConst.getValue() == 0 ||
-        lrhsDiv % rhsConst.getValue() == 0)
-      return lBin.getLHS().floorDiv(rhsConst.getValue()) +
-             lBin.getRHS().floorDiv(rhsConst.getValue());
-  }
-
-  return nullptr;
-}
-
-AffineExpr AffineExpr::floorDiv(uint64_t v) const {
-  return floorDiv(getAffineConstantExpr(v, getContext()));
-}
-AffineExpr AffineExpr::floorDiv(AffineExpr other) const {
-  if (auto simplified = simplifyFloorDiv(*this, other))
-    return simplified;
-
-  StorageUniquer &uniquer = getContext()->getAffineUniquer();
-  return uniquer.get<AffineBinaryOpExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(AffineExprKind::FloorDiv), *this,
-      other);
-}
-
-static AffineExpr simplifyCeilDiv(AffineExpr lhs, AffineExpr rhs) {
-  auto lhsConst = lhs.dyn_cast<AffineConstantExpr>();
-  auto rhsConst = rhs.dyn_cast<AffineConstantExpr>();
-
-  if (!rhsConst || rhsConst.getValue() < 1)
-    return nullptr;
-
-  if (lhsConst)
-    return getAffineConstantExpr(
-        ceilDiv(lhsConst.getValue(), rhsConst.getValue()), lhs.getContext());
-
-  // Fold ceildiv of a multiply with a constant that is a multiple of the
-  // divisor. Eg: (i * 128) ceildiv 64 = i * 2.
-  if (rhsConst.getValue() == 1)
-    return lhs;
-
-  // Simplify (expr * const) ceildiv divConst when const is known to be a
-  // multiple of divConst.
-  auto lBin = lhs.dyn_cast<AffineBinaryOpExpr>();
-  if (lBin && lBin.getKind() == AffineExprKind::Mul) {
-    if (auto lrhs = lBin.getRHS().dyn_cast<AffineConstantExpr>()) {
-      // rhsConst is known to be a positive constant.
-      if (lrhs.getValue() % rhsConst.getValue() == 0)
-        return lBin.getLHS() * (lrhs.getValue() / rhsConst.getValue());
-    }
-  }
-
-  return nullptr;
-}
-
-AffineExpr AffineExpr::ceilDiv(uint64_t v) const {
-  return ceilDiv(getAffineConstantExpr(v, getContext()));
-}
-AffineExpr AffineExpr::ceilDiv(AffineExpr other) const {
-  if (auto simplified = simplifyCeilDiv(*this, other))
-    return simplified;
-
-  StorageUniquer &uniquer = getContext()->getAffineUniquer();
-  return uniquer.get<AffineBinaryOpExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(AffineExprKind::CeilDiv), *this,
-      other);
-}
-
-static AffineExpr simplifyMod(AffineExpr lhs, AffineExpr rhs) {
-  auto lhsConst = lhs.dyn_cast<AffineConstantExpr>();
-  auto rhsConst = rhs.dyn_cast<AffineConstantExpr>();
-
-  // mod w.r.t zero or negative numbers is undefined and preserved as is.
-  if (!rhsConst || rhsConst.getValue() < 1)
-    return nullptr;
-
-  if (lhsConst)
-    return getAffineConstantExpr(mod(lhsConst.getValue(), rhsConst.getValue()),
-                                 lhs.getContext());
-
-  // Fold modulo of an expression that is known to be a multiple of a constant
-  // to zero if that constant is a multiple of the modulo factor. Eg: (i * 128)
-  // mod 64 is folded to 0, and less trivially, (i*(j*4*(k*32))) mod 128 = 0.
-  if (lhs.getLargestKnownDivisor() % rhsConst.getValue() == 0)
-    return getAffineConstantExpr(0, lhs.getContext());
-
-  // Simplify (expr1 + expr2) mod divConst when either expr1 or expr2 is
-  // known to be a multiple of divConst.
-  auto lBin = lhs.dyn_cast<AffineBinaryOpExpr>();
-  if (lBin && lBin.getKind() == AffineExprKind::Add) {
-    int64_t llhsDiv = lBin.getLHS().getLargestKnownDivisor();
-    int64_t lrhsDiv = lBin.getRHS().getLargestKnownDivisor();
-    // rhsConst is known to be a positive constant.
-    if (llhsDiv % rhsConst.getValue() == 0)
-      return lBin.getRHS() % rhsConst.getValue();
-    if (lrhsDiv % rhsConst.getValue() == 0)
-      return lBin.getLHS() % rhsConst.getValue();
-  }
-
-  return nullptr;
-}
-
-AffineExpr AffineExpr::operator%(uint64_t v) const {
-  return *this % getAffineConstantExpr(v, getContext());
-}
-AffineExpr AffineExpr::operator%(AffineExpr other) const {
-  if (auto simplified = simplifyMod(*this, other))
-    return simplified;
-
-  StorageUniquer &uniquer = getContext()->getAffineUniquer();
-  return uniquer.get<AffineBinaryOpExprStorage>(
-      /*initFn=*/{}, static_cast<unsigned>(AffineExprKind::Mod), *this, other);
-}
-
-AffineExpr AffineExpr::compose(AffineMap map) const {
-  SmallVector<AffineExpr, 8> dimReplacements(map.getResults().begin(),
-                                             map.getResults().end());
-  return replaceDimsAndSymbols(dimReplacements, {});
-}
-raw_ostream &mlir::operator<<(raw_ostream &os, AffineExpr &expr) {
-  expr.print(os);
-  return os;
-}
-
-/// Constructs an affine expression from a flat ArrayRef. If there are local
-/// identifiers (neither dimensional nor symbolic) that appear in the sum of
-/// products expression, 'localExprs' is expected to have the AffineExpr
-/// for it, and is substituted into. The ArrayRef 'eq' is expected to be in the
-/// format [dims, symbols, locals, constant term].
-AffineExpr mlir::toAffineExpr(ArrayRef<int64_t> eq, unsigned numDims,
-                              unsigned numSymbols,
-                              ArrayRef<AffineExpr> localExprs,
-                              MLIRContext *context) {
-  // Assert expected numLocals = eq.size() - numDims - numSymbols - 1
-  assert(eq.size() - numDims - numSymbols - 1 == localExprs.size() &&
-         "unexpected number of local expressions");
-
-  auto expr = getAffineConstantExpr(0, context);
-  // Dimensions and symbols.
-  for (unsigned j = 0; j < numDims + numSymbols; j++) {
-    if (eq[j] == 0) {
-      continue;
-    }
-    auto id = j < numDims ? getAffineDimExpr(j, context)
-                          : getAffineSymbolExpr(j - numDims, context);
-    expr = expr + id * eq[j];
-  }
-
-  // Local identifiers.
-  for (unsigned j = numDims + numSymbols, e = eq.size() - 1; j < e; j++) {
-    if (eq[j] == 0) {
-      continue;
-    }
-    auto term = localExprs[j - numDims - numSymbols] * eq[j];
-    expr = expr + term;
-  }
-
-  // Constant term.
-  int64_t constTerm = eq[eq.size() - 1];
-  if (constTerm != 0)
-    expr = expr + constTerm;
-  return expr;
-}
-
-SimpleAffineExprFlattener::SimpleAffineExprFlattener(unsigned numDims,
-                                                     unsigned numSymbols)
-    : numDims(numDims), numSymbols(numSymbols), numLocals(0) {
-  operandExprStack.reserve(8);
-}
-
-void SimpleAffineExprFlattener::visitMulExpr(AffineBinaryOpExpr expr) {
-  assert(operandExprStack.size() >= 2);
-  // This is a pure affine expr; the RHS will be a constant.
-  assert(expr.getRHS().isa<AffineConstantExpr>());
-  // Get the RHS constant.
-  auto rhsConst = operandExprStack.back()[getConstantIndex()];
-  operandExprStack.pop_back();
-  // Update the LHS in place instead of pop and push.
-  auto &lhs = operandExprStack.back();
-  for (unsigned i = 0, e = lhs.size(); i < e; i++) {
-    lhs[i] *= rhsConst;
-  }
-}
-
-void SimpleAffineExprFlattener::visitAddExpr(AffineBinaryOpExpr expr) {
-  assert(operandExprStack.size() >= 2);
-  const auto &rhs = operandExprStack.back();
-  auto &lhs = operandExprStack[operandExprStack.size() - 2];
-  assert(lhs.size() == rhs.size());
-  // Update the LHS in place.
-  for (unsigned i = 0, e = rhs.size(); i < e; i++) {
-    lhs[i] += rhs[i];
-  }
-  // Pop off the RHS.
-  operandExprStack.pop_back();
-}
-
-//
-// t = expr mod c   <=>  t = expr - c*q and c*q <= expr <= c*q + c - 1
-//
-// A mod expression "expr mod c" is thus flattened by introducing a new local
-// variable q (= expr floordiv c), such that expr mod c is replaced with
-// 'expr - c * q' and c * q <= expr <= c * q + c - 1 are added to localVarCst.
-void SimpleAffineExprFlattener::visitModExpr(AffineBinaryOpExpr expr) {
-  assert(operandExprStack.size() >= 2);
-  // This is a pure affine expr; the RHS will be a constant.
-  assert(expr.getRHS().isa<AffineConstantExpr>());
-  auto rhsConst = operandExprStack.back()[getConstantIndex()];
-  operandExprStack.pop_back();
-  auto &lhs = operandExprStack.back();
-  // TODO(bondhugula): handle modulo by zero case when this issue is fixed
-  // at the other places in the IR.
-  assert(rhsConst > 0 && "RHS constant has to be positive");
-
-  // Check if the LHS expression is a multiple of modulo factor.
-  unsigned i, e;
-  for (i = 0, e = lhs.size(); i < e; i++)
-    if (lhs[i] % rhsConst != 0)
-      break;
-  // If yes, modulo expression here simplifies to zero.
-  if (i == lhs.size()) {
-    std::fill(lhs.begin(), lhs.end(), 0);
-    return;
-  }
-
-  // Add a local variable for the quotient, i.e., expr % c is replaced by
-  // (expr - q * c) where q = expr floordiv c. Do this while canceling out
-  // the GCD of expr and c.
-  SmallVector<int64_t, 8> floorDividend(lhs);
-  uint64_t gcd = rhsConst;
-  for (unsigned i = 0, e = lhs.size(); i < e; i++)
-    gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(lhs[i]));
-  // Simplify the numerator and the denominator.
-  if (gcd != 1) {
-    for (unsigned i = 0, e = floorDividend.size(); i < e; i++)
-      floorDividend[i] = floorDividend[i] / static_cast<int64_t>(gcd);
-  }
-  int64_t floorDivisor = rhsConst / static_cast<int64_t>(gcd);
-
-  // Construct the AffineExpr form of the floordiv to store in localExprs.
-  MLIRContext *context = expr.getContext();
-  auto dividendExpr =
-      toAffineExpr(floorDividend, numDims, numSymbols, localExprs, context);
-  auto divisorExpr = getAffineConstantExpr(floorDivisor, context);
-  auto floorDivExpr = dividendExpr.floorDiv(divisorExpr);
-  int loc;
-  if ((loc = findLocalId(floorDivExpr)) == -1) {
-    addLocalFloorDivId(floorDividend, floorDivisor, floorDivExpr);
-    // Set result at top of stack to "lhs - rhsConst * q".
-    lhs[getLocalVarStartIndex() + numLocals - 1] = -rhsConst;
-  } else {
-    // Reuse the existing local id.
-    lhs[getLocalVarStartIndex() + loc] = -rhsConst;
-  }
-}
-
-void SimpleAffineExprFlattener::visitCeilDivExpr(AffineBinaryOpExpr expr) {
-  visitDivExpr(expr, /*isCeil=*/true);
-}
-void SimpleAffineExprFlattener::visitFloorDivExpr(AffineBinaryOpExpr expr) {
-  visitDivExpr(expr, /*isCeil=*/false);
-}
-
-void SimpleAffineExprFlattener::visitDimExpr(AffineDimExpr expr) {
-  operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
-  auto &eq = operandExprStack.back();
-  assert(expr.getPosition() < numDims && "Inconsistent number of dims");
-  eq[getDimStartIndex() + expr.getPosition()] = 1;
-}
-
-void SimpleAffineExprFlattener::visitSymbolExpr(AffineSymbolExpr expr) {
-  operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
-  auto &eq = operandExprStack.back();
-  assert(expr.getPosition() < numSymbols && "inconsistent number of symbols");
-  eq[getSymbolStartIndex() + expr.getPosition()] = 1;
-}
-
-void SimpleAffineExprFlattener::visitConstantExpr(AffineConstantExpr expr) {
-  operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
-  auto &eq = operandExprStack.back();
-  eq[getConstantIndex()] = expr.getValue();
-}
-
-// t = expr floordiv c   <=> t = q, c * q <= expr <= c * q + c - 1
-// A floordiv is thus flattened by introducing a new local variable q, and
-// replacing that expression with 'q' while adding the constraints
-// c * q <= expr <= c * q + c - 1 to localVarCst (done by
-// FlatAffineConstraints::addLocalFloorDiv).
-//
-// A ceildiv is similarly flattened:
-// t = expr ceildiv c   <=> t =  (expr + c - 1) floordiv c
-void SimpleAffineExprFlattener::visitDivExpr(AffineBinaryOpExpr expr,
-                                             bool isCeil) {
-  assert(operandExprStack.size() >= 2);
-  assert(expr.getRHS().isa<AffineConstantExpr>());
-
-  // This is a pure affine expr; the RHS is a positive constant.
-  int64_t rhsConst = operandExprStack.back()[getConstantIndex()];
-  // TODO(bondhugula): handle division by zero at the same time the issue is
-  // fixed at other places.
-  assert(rhsConst > 0 && "RHS constant has to be positive");
-  operandExprStack.pop_back();
-  auto &lhs = operandExprStack.back();
-
-  // Simplify the floordiv, ceildiv if possible by canceling out the greatest
-  // common divisors of the numerator and denominator.
-  uint64_t gcd = std::abs(rhsConst);
-  for (unsigned i = 0, e = lhs.size(); i < e; i++)
-    gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(lhs[i]));
-  // Simplify the numerator and the denominator.
-  if (gcd != 1) {
-    for (unsigned i = 0, e = lhs.size(); i < e; i++)
-      lhs[i] = lhs[i] / static_cast<int64_t>(gcd);
-  }
-  int64_t divisor = rhsConst / static_cast<int64_t>(gcd);
-  // If the divisor becomes 1, the updated LHS is the result. (The
-  // divisor can't be negative since rhsConst is positive).
-  if (divisor == 1)
-    return;
-
-  // If the divisor cannot be simplified to one, we will have to retain
-  // the ceil/floor expr (simplified up until here). Add an existential
-  // quantifier to express its result, i.e., expr1 div expr2 is replaced
-  // by a new identifier, q.
-  MLIRContext *context = expr.getContext();
-  auto a = toAffineExpr(lhs, numDims, numSymbols, localExprs, context);
-  auto b = getAffineConstantExpr(divisor, context);
-
-  int loc;
-  auto divExpr = isCeil ? a.ceilDiv(b) : a.floorDiv(b);
-  if ((loc = findLocalId(divExpr)) == -1) {
-    if (!isCeil) {
-      SmallVector<int64_t, 8> dividend(lhs);
-      addLocalFloorDivId(dividend, divisor, divExpr);
-    } else {
-      // lhs ceildiv c <=>  (lhs + c - 1) floordiv c
-      SmallVector<int64_t, 8> dividend(lhs);
-      dividend.back() += divisor - 1;
-      addLocalFloorDivId(dividend, divisor, divExpr);
-    }
-  }
-  // Set the expression on stack to the local var introduced to capture the
-  // result of the division (floor or ceil).
-  std::fill(lhs.begin(), lhs.end(), 0);
-  if (loc == -1)
-    lhs[getLocalVarStartIndex() + numLocals - 1] = 1;
-  else
-    lhs[getLocalVarStartIndex() + loc] = 1;
-}
-
-// Add a local identifier (needed to flatten a mod, floordiv, ceildiv expr).
-// The local identifier added is always a floordiv of a pure add/mul affine
-// function of other identifiers, coefficients of which are specified in
-// dividend and with respect to a positive constant divisor. localExpr is the
-// simplified tree expression (AffineExpr) corresponding to the quantifier.
-void SimpleAffineExprFlattener::addLocalFloorDivId(ArrayRef<int64_t> dividend,
-                                                   int64_t divisor,
-                                                   AffineExpr localExpr) {
-  assert(divisor > 0 && "positive constant divisor expected");
-  for (auto &subExpr : operandExprStack)
-    subExpr.insert(subExpr.begin() + getLocalVarStartIndex() + numLocals, 0);
-  localExprs.push_back(localExpr);
-  numLocals++;
-  // dividend and divisor are not used here; an override of this method uses it.
-}
-
-int SimpleAffineExprFlattener::findLocalId(AffineExpr localExpr) {
-  SmallVectorImpl<AffineExpr>::iterator it;
-  if ((it = llvm::find(localExprs, localExpr)) == localExprs.end())
-    return -1;
-  return it - localExprs.begin();
-}
-
-/// Simplify the affine expression by flattening it and reconstructing it.
-AffineExpr mlir::simplifyAffineExpr(AffineExpr expr, unsigned numDims,
-                                    unsigned numSymbols) {
-  // TODO(bondhugula): only pure affine for now. The simplification here can
-  // be extended to semi-affine maps in the future.
-  if (!expr.isPureAffine())
-    return expr;
-
-  SimpleAffineExprFlattener flattener(numDims, numSymbols);
-  flattener.walkPostOrder(expr);
-  ArrayRef<int64_t> flattenedExpr = flattener.operandExprStack.back();
-  auto simplifiedExpr = toAffineExpr(flattenedExpr, numDims, numSymbols,
-                                     flattener.localExprs, expr.getContext());
-  flattener.operandExprStack.pop_back();
-  assert(flattener.operandExprStack.empty());
-
-  return simplifiedExpr;
-}
-
-// Flattens the expressions in map. Returns true on success or false
-// if 'expr' was unable to be flattened (i.e., semi-affine expressions not
-// handled yet).
-static bool getFlattenedAffineExprs(
-    ArrayRef<AffineExpr> exprs, unsigned numDims, unsigned numSymbols,
-    std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs) {
-  if (exprs.empty()) {
-    return true;
-  }
-
-  SimpleAffineExprFlattener flattener(numDims, numSymbols);
-  // Use the same flattener to simplify each expression successively. This way
-  // local identifiers / expressions are shared.
-  for (auto expr : exprs) {
-    if (!expr.isPureAffine())
-      return false;
-
-    flattener.walkPostOrder(expr);
-  }
-
-  flattenedExprs->clear();
-  assert(flattener.operandExprStack.size() == exprs.size());
-  flattenedExprs->assign(flattener.operandExprStack.begin(),
-                         flattener.operandExprStack.end());
-
-  return true;
-}
-
-// Flattens 'expr' into 'flattenedExpr'. Returns true on success or false
-// if 'expr' was unable to be flattened (semi-affine expressions not handled
-// yet).
-bool mlir::getFlattenedAffineExpr(
-    AffineExpr expr, unsigned numDims, unsigned numSymbols,
-    llvm::SmallVectorImpl<int64_t> *flattenedExpr) {
-  std::vector<SmallVector<int64_t, 8>> flattenedExprs;
-  bool ret =
-      ::getFlattenedAffineExprs({expr}, numDims, numSymbols, &flattenedExprs);
-  *flattenedExpr = flattenedExprs[0];
-  return ret;
-}
-
-/// Flattens the expressions in map. Returns true on success or false
-/// if 'expr' was unable to be flattened (i.e., semi-affine expressions not
-/// handled yet).
-bool mlir::getFlattenedAffineExprs(
-    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs) {
-  if (map.getNumResults() == 0) {
-    return true;
-  }
-  return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(),
-                                   map.getNumSymbols(), flattenedExprs);
-}
-
-bool mlir::getFlattenedAffineExprs(
-    IntegerSet set,
-    std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs) {
-  if (set.getNumConstraints() == 0) {
-    return true;
-  }
-  return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(),
-                                   set.getNumSymbols(), flattenedExprs);
-}
diff --git a/third_party/mlir/lib/IR/AffineExprDetail.h b/third_party/mlir/lib/IR/AffineExprDetail.h
deleted file mode 100644
index 214fee65056..00000000000
--- a/third_party/mlir/lib/IR/AffineExprDetail.h
+++ /dev/null
@@ -1,98 +0,0 @@
-//===- AffineExprDetail.h - MLIR Affine Expr storage details ----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This holds implementation details of AffineExpr. Ideally it would not be
-// exposed and would be kept local to AffineExpr.cpp however, MLIRContext.cpp
-// needs to know the sizes for placement-new style Allocation.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_IR_AFFINEEXPRDETAIL_H_
-#define MLIR_IR_AFFINEEXPRDETAIL_H_
-
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Support/StorageUniquer.h"
-
-namespace mlir {
-
-class MLIRContext;
-
-namespace detail {
-
-/// Base storage class appearing in an affine expression.
-struct AffineExprStorage : public StorageUniquer::BaseStorage {
-  MLIRContext *context;
-};
-
-/// A binary operation appearing in an affine expression.
-struct AffineBinaryOpExprStorage : public AffineExprStorage {
-  using KeyTy = std::pair<AffineExpr, AffineExpr>;
-
-  bool operator==(const KeyTy &key) const {
-    return key.first == lhs && key.second == rhs;
-  }
-
-  static AffineBinaryOpExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<AffineBinaryOpExprStorage>();
-    result->lhs = key.first;
-    result->rhs = key.second;
-    result->context = result->lhs.getContext();
-    return result;
-  }
-
-  AffineExpr lhs;
-  AffineExpr rhs;
-};
-
-/// A dimensional or symbolic identifier appearing in an affine expression.
-struct AffineDimExprStorage : public AffineExprStorage {
-  using KeyTy = unsigned;
-
-  bool operator==(const KeyTy &key) const { return position == key; }
-
-  static AffineDimExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<AffineDimExprStorage>();
-    result->position = key;
-    return result;
-  }
-
-  /// Position of this identifier in the argument list.
-  unsigned position;
-};
-
-/// An integer constant appearing in affine expression.
-struct AffineConstantExprStorage : public AffineExprStorage {
-  using KeyTy = int64_t;
-
-  bool operator==(const KeyTy &key) const { return constant == key; }
-
-  static AffineConstantExprStorage *
-  construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) {
-    auto *result = allocator.allocate<AffineConstantExprStorage>();
-    result->constant = key;
-    return result;
-  }
-
-  // The constant.
-  int64_t constant;
-};
-
-} // end namespace detail
-} // end namespace mlir
-#endif // MLIR_IR_AFFINEEXPRDETAIL_H_
diff --git a/third_party/mlir/lib/IR/AffineMap.cpp b/third_party/mlir/lib/IR/AffineMap.cpp
deleted file mode 100644
index 98357b1348b..00000000000
--- a/third_party/mlir/lib/IR/AffineMap.cpp
+++ /dev/null
@@ -1,337 +0,0 @@
-//===- AffineMap.cpp - MLIR Affine Map Classes ----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/AffineMap.h"
-#include "AffineMapDetail.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-namespace {
-
-// AffineExprConstantFolder evaluates an affine expression using constant
-// operands passed in 'operandConsts'. Returns an IntegerAttr attribute
-// representing the constant value of the affine expression evaluated on
-// constant 'operandConsts', or nullptr if it can't be folded.
-class AffineExprConstantFolder {
-public:
-  AffineExprConstantFolder(unsigned numDims, ArrayRef<Attribute> operandConsts)
-      : numDims(numDims), operandConsts(operandConsts) {}
-
-  /// Attempt to constant fold the specified affine expr, or return null on
-  /// failure.
-  IntegerAttr constantFold(AffineExpr expr) {
-    if (auto result = constantFoldImpl(expr))
-      return IntegerAttr::get(IndexType::get(expr.getContext()), *result);
-    return nullptr;
-  }
-
-private:
-  llvm::Optional<int64_t> constantFoldImpl(AffineExpr expr) {
-    switch (expr.getKind()) {
-    case AffineExprKind::Add:
-      return constantFoldBinExpr(
-          expr, [](int64_t lhs, int64_t rhs) { return lhs + rhs; });
-    case AffineExprKind::Mul:
-      return constantFoldBinExpr(
-          expr, [](int64_t lhs, int64_t rhs) { return lhs * rhs; });
-    case AffineExprKind::Mod:
-      return constantFoldBinExpr(
-          expr, [](int64_t lhs, int64_t rhs) { return mod(lhs, rhs); });
-    case AffineExprKind::FloorDiv:
-      return constantFoldBinExpr(
-          expr, [](int64_t lhs, int64_t rhs) { return floorDiv(lhs, rhs); });
-    case AffineExprKind::CeilDiv:
-      return constantFoldBinExpr(
-          expr, [](int64_t lhs, int64_t rhs) { return ceilDiv(lhs, rhs); });
-    case AffineExprKind::Constant:
-      return expr.cast<AffineConstantExpr>().getValue();
-    case AffineExprKind::DimId:
-      if (auto attr = operandConsts[expr.cast<AffineDimExpr>().getPosition()]
-                          .dyn_cast_or_null<IntegerAttr>())
-        return attr.getInt();
-      return llvm::None;
-    case AffineExprKind::SymbolId:
-      if (auto attr = operandConsts[numDims +
-                                    expr.cast<AffineSymbolExpr>().getPosition()]
-                          .dyn_cast_or_null<IntegerAttr>())
-        return attr.getInt();
-      return llvm::None;
-    }
-    llvm_unreachable("Unknown AffineExpr");
-  }
-
-  // TODO: Change these to operate on APInts too.
-  llvm::Optional<int64_t> constantFoldBinExpr(AffineExpr expr,
-                                              int64_t (*op)(int64_t, int64_t)) {
-    auto binOpExpr = expr.cast<AffineBinaryOpExpr>();
-    if (auto lhs = constantFoldImpl(binOpExpr.getLHS()))
-      if (auto rhs = constantFoldImpl(binOpExpr.getRHS()))
-        return op(*lhs, *rhs);
-    return llvm::None;
-  }
-
-  // The number of dimension operands in AffineMap containing this expression.
-  unsigned numDims;
-  // The constant valued operands used to evaluate this AffineExpr.
-  ArrayRef<Attribute> operandConsts;
-};
-
-} // end anonymous namespace
-
-/// Returns a single constant result affine map.
-AffineMap AffineMap::getConstantMap(int64_t val, MLIRContext *context) {
-  return get(/*dimCount=*/0, /*symbolCount=*/0,
-             {getAffineConstantExpr(val, context)});
-}
-
-/// Returns an AffineMap representing a permutation.
-AffineMap AffineMap::getPermutationMap(ArrayRef<unsigned> permutation,
-                                       MLIRContext *context) {
-  assert(!permutation.empty() &&
-         "Cannot create permutation map from empty permutation vector");
-  SmallVector<AffineExpr, 4> affExprs;
-  for (auto index : permutation)
-    affExprs.push_back(getAffineDimExpr(index, context));
-  auto m = std::max_element(permutation.begin(), permutation.end());
-  auto permutationMap = AffineMap::get(*m + 1, 0, affExprs);
-  assert(permutationMap.isPermutation() && "Invalid permutation vector");
-  return permutationMap;
-}
-
-AffineMap AffineMap::getMultiDimIdentityMap(unsigned numDims,
-                                            MLIRContext *context) {
-  SmallVector<AffineExpr, 4> dimExprs;
-  dimExprs.reserve(numDims);
-  for (unsigned i = 0; i < numDims; ++i)
-    dimExprs.push_back(mlir::getAffineDimExpr(i, context));
-  return get(/*dimCount=*/numDims, /*symbolCount=*/0, dimExprs);
-}
-
-MLIRContext *AffineMap::getContext() const { return map->context; }
-
-bool AffineMap::isIdentity() const {
-  if (getNumDims() != getNumResults())
-    return false;
-  ArrayRef<AffineExpr> results = getResults();
-  for (unsigned i = 0, numDims = getNumDims(); i < numDims; ++i) {
-    auto expr = results[i].dyn_cast<AffineDimExpr>();
-    if (!expr || expr.getPosition() != i)
-      return false;
-  }
-  return true;
-}
-
-bool AffineMap::isEmpty() const {
-  return getNumDims() == 0 && getNumSymbols() == 0 && getNumResults() == 0;
-}
-
-bool AffineMap::isSingleConstant() const {
-  return getNumResults() == 1 && getResult(0).isa<AffineConstantExpr>();
-}
-
-int64_t AffineMap::getSingleConstantResult() const {
-  assert(isSingleConstant() && "map must have a single constant result");
-  return getResult(0).cast<AffineConstantExpr>().getValue();
-}
-
-unsigned AffineMap::getNumDims() const {
-  assert(map && "uninitialized map storage");
-  return map->numDims;
-}
-unsigned AffineMap::getNumSymbols() const {
-  assert(map && "uninitialized map storage");
-  return map->numSymbols;
-}
-unsigned AffineMap::getNumResults() const {
-  assert(map && "uninitialized map storage");
-  return map->results.size();
-}
-unsigned AffineMap::getNumInputs() const {
-  assert(map && "uninitialized map storage");
-  return map->numDims + map->numSymbols;
-}
-
-ArrayRef<AffineExpr> AffineMap::getResults() const {
-  assert(map && "uninitialized map storage");
-  return map->results;
-}
-AffineExpr AffineMap::getResult(unsigned idx) const {
-  assert(map && "uninitialized map storage");
-  return map->results[idx];
-}
-
-/// Folds the results of the application of an affine map on the provided
-/// operands to a constant if possible. Returns false if the folding happens,
-/// true otherwise.
-LogicalResult
-AffineMap::constantFold(ArrayRef<Attribute> operandConstants,
-                        SmallVectorImpl<Attribute> &results) const {
-  assert(getNumInputs() == operandConstants.size());
-
-  // Fold each of the result expressions.
-  AffineExprConstantFolder exprFolder(getNumDims(), operandConstants);
-  // Constant fold each AffineExpr in AffineMap and add to 'results'.
-  for (auto expr : getResults()) {
-    auto folded = exprFolder.constantFold(expr);
-    // If we didn't fold to a constant, then folding fails.
-    if (!folded)
-      return failure();
-
-    results.push_back(folded);
-  }
-  assert(results.size() == getNumResults() &&
-         "constant folding produced the wrong number of results");
-  return success();
-}
-
-/// Walk all of the AffineExpr's in this mapping. Each node in an expression
-/// tree is visited in postorder.
-void AffineMap::walkExprs(std::function<void(AffineExpr)> callback) const {
-  for (auto expr : getResults())
-    expr.walk(callback);
-}
-
-/// This method substitutes any uses of dimensions and symbols (e.g.
-/// dim#0 with dimReplacements[0]) in subexpressions and returns the modified
-/// expression mapping.  Because this can be used to eliminate dims and
-/// symbols, the client needs to specify the number of dims and symbols in
-/// the result.  The returned map always has the same number of results.
-AffineMap AffineMap::replaceDimsAndSymbols(ArrayRef<AffineExpr> dimReplacements,
-                                           ArrayRef<AffineExpr> symReplacements,
-                                           unsigned numResultDims,
-                                           unsigned numResultSyms) {
-  SmallVector<AffineExpr, 8> results;
-  results.reserve(getNumResults());
-  for (auto expr : getResults())
-    results.push_back(
-        expr.replaceDimsAndSymbols(dimReplacements, symReplacements));
-
-  return get(numResultDims, numResultSyms, results);
-}
-
-AffineMap AffineMap::compose(AffineMap map) {
-  assert(getNumDims() == map.getNumResults() && "Number of results mismatch");
-  // Prepare `map` by concatenating the symbols and rewriting its exprs.
-  unsigned numDims = map.getNumDims();
-  unsigned numSymbolsThisMap = getNumSymbols();
-  unsigned numSymbols = numSymbolsThisMap + map.getNumSymbols();
-  SmallVector<AffineExpr, 8> newDims(numDims);
-  for (unsigned idx = 0; idx < numDims; ++idx) {
-    newDims[idx] = getAffineDimExpr(idx, getContext());
-  }
-  SmallVector<AffineExpr, 8> newSymbols(numSymbols);
-  for (unsigned idx = numSymbolsThisMap; idx < numSymbols; ++idx) {
-    newSymbols[idx - numSymbolsThisMap] =
-        getAffineSymbolExpr(idx, getContext());
-  }
-  auto newMap =
-      map.replaceDimsAndSymbols(newDims, newSymbols, numDims, numSymbols);
-  SmallVector<AffineExpr, 8> exprs;
-  exprs.reserve(getResults().size());
-  for (auto expr : getResults())
-    exprs.push_back(expr.compose(newMap));
-  return AffineMap::get(numDims, numSymbols, exprs);
-}
-
-bool AffineMap::isProjectedPermutation() {
-  if (getNumSymbols() > 0)
-    return false;
-  SmallVector<bool, 8> seen(getNumInputs(), false);
-  for (auto expr : getResults()) {
-    if (auto dim = expr.dyn_cast<AffineDimExpr>()) {
-      if (seen[dim.getPosition()])
-        return false;
-      seen[dim.getPosition()] = true;
-      continue;
-    }
-    return false;
-  }
-  return true;
-}
-
-bool AffineMap::isPermutation() {
-  if (getNumDims() != getNumResults())
-    return false;
-  return isProjectedPermutation();
-}
-
-AffineMap AffineMap::getSubMap(ArrayRef<unsigned> resultPos) {
-  SmallVector<AffineExpr, 4> exprs;
-  exprs.reserve(resultPos.size());
-  for (auto idx : resultPos) {
-    exprs.push_back(getResult(idx));
-  }
-  return AffineMap::get(getNumDims(), getNumSymbols(), exprs);
-}
-
-AffineMap mlir::simplifyAffineMap(AffineMap map) {
-  SmallVector<AffineExpr, 8> exprs;
-  for (auto e : map.getResults()) {
-    exprs.push_back(
-        simplifyAffineExpr(e, map.getNumDims(), map.getNumSymbols()));
-  }
-  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), exprs);
-}
-
-AffineMap mlir::inversePermutation(AffineMap map) {
-  if (!map)
-    return map;
-  assert(map.getNumSymbols() == 0 && "expected map without symbols");
-  SmallVector<AffineExpr, 4> exprs(map.getNumDims());
-  for (auto en : llvm::enumerate(map.getResults())) {
-    auto expr = en.value();
-    // Skip non-permutations.
-    if (auto d = expr.dyn_cast<AffineDimExpr>()) {
-      if (exprs[d.getPosition()])
-        continue;
-      exprs[d.getPosition()] = getAffineDimExpr(en.index(), d.getContext());
-    }
-  }
-  SmallVector<AffineExpr, 4> seenExprs;
-  seenExprs.reserve(map.getNumDims());
-  for (auto expr : exprs)
-    if (expr)
-      seenExprs.push_back(expr);
-  if (seenExprs.size() != map.getNumInputs())
-    return AffineMap();
-  return AffineMap::get(map.getNumResults(), 0, seenExprs);
-}
-
-AffineMap mlir::concatAffineMaps(ArrayRef<AffineMap> maps) {
-  unsigned numResults = 0;
-  for (auto m : maps)
-    numResults += m ? m.getNumResults() : 0;
-  unsigned numDims = 0;
-  llvm::SmallVector<AffineExpr, 8> results;
-  results.reserve(numResults);
-  for (auto m : maps) {
-    if (!m)
-      continue;
-    assert(m.getNumSymbols() == 0 && "expected map without symbols");
-    results.append(m.getResults().begin(), m.getResults().end());
-    numDims = std::max(m.getNumDims(), numDims);
-  }
-  return numDims == 0 ? AffineMap() : AffineMap::get(numDims, 0, results);
-}
diff --git a/third_party/mlir/lib/IR/AffineMapDetail.h b/third_party/mlir/lib/IR/AffineMapDetail.h
deleted file mode 100644
index a247783540c..00000000000
--- a/third_party/mlir/lib/IR/AffineMapDetail.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===- AffineMapDetail.h - MLIR Affine Map details Class --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This holds implementation details of AffineMap.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AFFINEMAPDETAIL_H_
-#define AFFINEMAPDETAIL_H_
-
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mlir {
-namespace detail {
-
-struct AffineMapStorage {
-  unsigned numDims;
-  unsigned numSymbols;
-
-  /// The affine expressions for this (multi-dimensional) map.
-  /// TODO: use trailing objects for this.
-  ArrayRef<AffineExpr> results;
-
-  MLIRContext *context;
-};
-
-} // end namespace detail
-} // end namespace mlir
-
-#endif // AFFINEMAPDETAIL_H_
diff --git a/third_party/mlir/lib/IR/AsmPrinter.cpp b/third_party/mlir/lib/IR/AsmPrinter.cpp
deleted file mode 100644
index 0ea447ed324..00000000000
--- a/third_party/mlir/lib/IR/AsmPrinter.cpp
+++ /dev/null
@@ -1,2140 +0,0 @@
-//===- AsmPrinter.cpp - MLIR Assembly Printer Implementation --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the MLIR AsmPrinter class, which is used to implement
-// the various print() methods on the core IR objects.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Regex.h"
-using namespace mlir;
-
-void Identifier::print(raw_ostream &os) const { os << str(); }
-
-void Identifier::dump() const { print(llvm::errs()); }
-
-void OperationName::print(raw_ostream &os) const { os << getStringRef(); }
-
-void OperationName::dump() const { print(llvm::errs()); }
-
-DialectAsmPrinter::~DialectAsmPrinter() {}
-
-OpAsmPrinter::~OpAsmPrinter() {}
-
-//===--------------------------------------------------------------------===//
-// Operation OpAsm interface.
-//===--------------------------------------------------------------------===//
-
-/// The OpAsmOpInterface, see OpAsmInterface.td for more details.
-#include "mlir/IR/OpAsmInterface.cpp.inc"
-
-//===----------------------------------------------------------------------===//
-// OpPrintingFlags
-//===----------------------------------------------------------------------===//
-
-static llvm::cl::opt<unsigned> elideElementsAttrIfLarger(
-    "mlir-elide-elementsattrs-if-larger",
-    llvm::cl::desc("Elide ElementsAttrs with \"...\" that have "
-                   "more elements than the given upper limit"));
-
-static llvm::cl::opt<bool>
-    printDebugInfoOpt("mlir-print-debuginfo",
-                      llvm::cl::desc("Print debug info in MLIR output"),
-                      llvm::cl::init(false));
-
-static llvm::cl::opt<bool> printPrettyDebugInfoOpt(
-    "mlir-pretty-debuginfo",
-    llvm::cl::desc("Print pretty debug info in MLIR output"),
-    llvm::cl::init(false));
-
-// Use the generic op output form in the operation printer even if the custom
-// form is defined.
-static llvm::cl::opt<bool>
-    printGenericOpFormOpt("mlir-print-op-generic",
-                          llvm::cl::desc("Print the generic op form"),
-                          llvm::cl::init(false), llvm::cl::Hidden);
-
-static llvm::cl::opt<bool> printLocalScopeOpt(
-    "mlir-print-local-scope",
-    llvm::cl::desc("Print assuming in local scope by default"),
-    llvm::cl::init(false), llvm::cl::Hidden);
-
-/// Initialize the printing flags with default supplied by the cl::opts above.
-OpPrintingFlags::OpPrintingFlags()
-    : elementsAttrElementLimit(
-          elideElementsAttrIfLarger.getNumOccurrences()
-              ? Optional<int64_t>(elideElementsAttrIfLarger)
-              : Optional<int64_t>()),
-      printDebugInfoFlag(printDebugInfoOpt),
-      printDebugInfoPrettyFormFlag(printPrettyDebugInfoOpt),
-      printGenericOpFormFlag(printGenericOpFormOpt),
-      printLocalScope(printLocalScopeOpt) {}
-
-/// Enable the elision of large elements attributes, by printing a '...'
-/// instead of the element data, when the number of elements is greater than
-/// `largeElementLimit`. Note: The IR generated with this option is not
-/// parsable.
-OpPrintingFlags &
-OpPrintingFlags::elideLargeElementsAttrs(int64_t largeElementLimit) {
-  elementsAttrElementLimit = largeElementLimit;
-  return *this;
-}
-
-/// Enable printing of debug information. If 'prettyForm' is set to true,
-/// debug information is printed in a more readable 'pretty' form.
-OpPrintingFlags &OpPrintingFlags::enableDebugInfo(bool prettyForm) {
-  printDebugInfoFlag = true;
-  printDebugInfoPrettyFormFlag = prettyForm;
-  return *this;
-}
-
-/// Always print operations in the generic form.
-OpPrintingFlags &OpPrintingFlags::printGenericOpForm() {
-  printGenericOpFormFlag = true;
-  return *this;
-}
-
-/// Use local scope when printing the operation. This allows for using the
-/// printer in a more localized and thread-safe setting, but may not necessarily
-/// be identical of what the IR will look like when dumping the full module.
-OpPrintingFlags &OpPrintingFlags::useLocalScope() {
-  printLocalScope = true;
-  return *this;
-}
-
-/// Return if the given ElementsAttr should be elided.
-bool OpPrintingFlags::shouldElideElementsAttr(ElementsAttr attr) const {
-  return elementsAttrElementLimit.hasValue() &&
-         *elementsAttrElementLimit < int64_t(attr.getNumElements());
-}
-
-/// Return if debug information should be printed.
-bool OpPrintingFlags::shouldPrintDebugInfo() const {
-  return printDebugInfoFlag;
-}
-
-/// Return if debug information should be printed in the pretty form.
-bool OpPrintingFlags::shouldPrintDebugInfoPrettyForm() const {
-  return printDebugInfoPrettyFormFlag;
-}
-
-/// Return if operations should be printed in the generic form.
-bool OpPrintingFlags::shouldPrintGenericOpForm() const {
-  return printGenericOpFormFlag;
-}
-
-/// Return if the printer should use local scope when dumping the IR.
-bool OpPrintingFlags::shouldUseLocalScope() const { return printLocalScope; }
-
-//===----------------------------------------------------------------------===//
-// ModuleState
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// A special index constant used for non-kind attribute aliases.
-static constexpr int kNonAttrKindAlias = -1;
-
-class ModuleState {
-public:
-  explicit ModuleState(MLIRContext *context) : interfaces(context) {}
-  void initialize(Operation *op);
-
-  Twine getAttributeAlias(Attribute attr) const {
-    auto alias = attrToAlias.find(attr);
-    if (alias == attrToAlias.end())
-      return Twine();
-
-    // Return the alias for this attribute, along with the index if this was
-    // generated by a kind alias.
-    int kindIndex = alias->second.second;
-    return alias->second.first +
-           (kindIndex == kNonAttrKindAlias ? Twine() : Twine(kindIndex));
-  }
-
-  void printAttributeAliases(raw_ostream &os) const {
-    auto printAlias = [&](StringRef alias, Attribute attr, int index) {
-      os << '#' << alias;
-      if (index != kNonAttrKindAlias)
-        os << index;
-      os << " = " << attr << '\n';
-    };
-
-    // Print all of the attribute kind aliases.
-    for (auto &kindAlias : attrKindToAlias) {
-      for (unsigned i = 0, e = kindAlias.second.second.size(); i != e; ++i)
-        printAlias(kindAlias.second.first, kindAlias.second.second[i], i);
-      os << "\n";
-    }
-
-    // In a second pass print all of the remaining attribute aliases that aren't
-    // kind aliases.
-    for (Attribute attr : usedAttributes) {
-      auto alias = attrToAlias.find(attr);
-      if (alias != attrToAlias.end() &&
-          alias->second.second == kNonAttrKindAlias)
-        printAlias(alias->second.first, attr, alias->second.second);
-    }
-  }
-
-  StringRef getTypeAlias(Type ty) const { return typeToAlias.lookup(ty); }
-
-  void printTypeAliases(raw_ostream &os) const {
-    for (Type type : usedTypes) {
-      auto alias = typeToAlias.find(type);
-      if (alias != typeToAlias.end())
-        os << '!' << alias->second << " = type " << type << '\n';
-    }
-  }
-
-  /// Get an instance of the OpAsmDialectInterface for the given dialect, or
-  /// null if one wasn't registered.
-  const OpAsmDialectInterface *getOpAsmInterface(Dialect *dialect) {
-    return interfaces.getInterfaceFor(dialect);
-  }
-
-private:
-  void recordAttributeReference(Attribute attr) {
-    // Don't recheck attributes that have already been seen or those that
-    // already have an alias.
-    if (!usedAttributes.insert(attr) || attrToAlias.count(attr))
-      return;
-
-    // If this attribute kind has an alias, then record one for this attribute.
-    auto alias = attrKindToAlias.find(static_cast<unsigned>(attr.getKind()));
-    if (alias == attrKindToAlias.end())
-      return;
-    std::pair<StringRef, int> attrAlias(alias->second.first,
-                                        alias->second.second.size());
-    attrToAlias.insert({attr, attrAlias});
-    alias->second.second.push_back(attr);
-  }
-
-  void recordTypeReference(Type ty) { usedTypes.insert(ty); }
-
-  // Visit functions.
-  void visitOperation(Operation *op);
-  void visitType(Type type);
-  void visitAttribute(Attribute attr);
-
-  // Initialize symbol aliases.
-  void initializeSymbolAliases();
-
-  /// Set of attributes known to be used within the module.
-  llvm::SetVector<Attribute> usedAttributes;
-
-  /// Mapping between attribute and a pair comprised of a base alias name and a
-  /// count suffix. If the suffix is set to -1, it is not displayed.
-  llvm::MapVector<Attribute, std::pair<StringRef, int>> attrToAlias;
-
-  /// Mapping between attribute kind and a pair comprised of a base alias name
-  /// and a unique list of attributes belonging to this kind sorted by location
-  /// seen in the module.
-  llvm::MapVector<unsigned, std::pair<StringRef, std::vector<Attribute>>>
-      attrKindToAlias;
-
-  /// Set of types known to be used within the module.
-  llvm::SetVector<Type> usedTypes;
-
-  /// A mapping between a type and a given alias.
-  DenseMap<Type, StringRef> typeToAlias;
-
-  /// Collection of OpAsm interfaces implemented in the context.
-  DialectInterfaceCollection<OpAsmDialectInterface> interfaces;
-};
-} // end anonymous namespace
-
-// TODO Support visiting other types/operations when implemented.
-void ModuleState::visitType(Type type) {
-  recordTypeReference(type);
-  if (auto funcType = type.dyn_cast<FunctionType>()) {
-    // Visit input and result types for functions.
-    for (auto input : funcType.getInputs())
-      visitType(input);
-    for (auto result : funcType.getResults())
-      visitType(result);
-    return;
-  }
-  if (auto memref = type.dyn_cast<MemRefType>()) {
-    // Visit affine maps in memref type.
-    for (auto map : memref.getAffineMaps())
-      recordAttributeReference(AffineMapAttr::get(map));
-  }
-  if (auto shapedType = type.dyn_cast<ShapedType>()) {
-    visitType(shapedType.getElementType());
-  }
-}
-
-void ModuleState::visitAttribute(Attribute attr) {
-  recordAttributeReference(attr);
-  if (auto arrayAttr = attr.dyn_cast<ArrayAttr>()) {
-    for (auto elt : arrayAttr.getValue())
-      visitAttribute(elt);
-  } else if (auto typeAttr = attr.dyn_cast<TypeAttr>()) {
-    visitType(typeAttr.getValue());
-  }
-}
-
-void ModuleState::visitOperation(Operation *op) {
-  // Visit all the types used in the operation.
-  for (auto type : op->getOperandTypes())
-    visitType(type);
-  for (auto type : op->getResultTypes())
-    visitType(type);
-  for (auto &region : op->getRegions())
-    for (auto &block : region)
-      for (auto *arg : block.getArguments())
-        visitType(arg->getType());
-
-  // Visit each of the attributes.
-  for (auto elt : op->getAttrs())
-    visitAttribute(elt.second);
-}
-
-// Utility to generate a function to register a symbol alias.
-static bool canRegisterAlias(StringRef name, llvm::StringSet<> &usedAliases) {
-  assert(!name.empty() && "expected alias name to be non-empty");
-  // TODO(riverriddle) Assert that the provided alias name can be lexed as
-  // an identifier.
-
-  // Check that the alias doesn't contain a '.' character and the name is not
-  // already in use.
-  return !name.contains('.') && usedAliases.insert(name).second;
-}
-
-void ModuleState::initializeSymbolAliases() {
-  // Track the identifiers in use for each symbol so that the same identifier
-  // isn't used twice.
-  llvm::StringSet<> usedAliases;
-
-  // Collect the set of aliases from each dialect.
-  SmallVector<std::pair<unsigned, StringRef>, 8> attributeKindAliases;
-  SmallVector<std::pair<Attribute, StringRef>, 8> attributeAliases;
-  SmallVector<std::pair<Type, StringRef>, 16> typeAliases;
-
-  // AffineMap/Integer set have specific kind aliases.
-  attributeKindAliases.emplace_back(StandardAttributes::AffineMap, "map");
-  attributeKindAliases.emplace_back(StandardAttributes::IntegerSet, "set");
-
-  for (auto &interface : interfaces) {
-    interface.getAttributeKindAliases(attributeKindAliases);
-    interface.getAttributeAliases(attributeAliases);
-    interface.getTypeAliases(typeAliases);
-  }
-
-  // Setup the attribute kind aliases.
-  StringRef alias;
-  unsigned attrKind;
-  for (auto &attrAliasPair : attributeKindAliases) {
-    std::tie(attrKind, alias) = attrAliasPair;
-    assert(!alias.empty() && "expected non-empty alias string");
-    if (!usedAliases.count(alias) && !alias.contains('.'))
-      attrKindToAlias.insert({attrKind, {alias, {}}});
-  }
-
-  // Clear the set of used identifiers so that the attribute kind aliases are
-  // just a prefix and not the full alias, i.e. there may be some overlap.
-  usedAliases.clear();
-
-  // Register the attribute aliases.
-  // Create a regex for the attribute kind alias names, these have a prefix with
-  // a counter appended to the end. We prevent normal aliases from having these
-  // names to avoid collisions.
-  llvm::Regex reservedAttrNames("[0-9]+$");
-
-  // Attribute value aliases.
-  Attribute attr;
-  for (auto &attrAliasPair : attributeAliases) {
-    std::tie(attr, alias) = attrAliasPair;
-    if (!reservedAttrNames.match(alias) && canRegisterAlias(alias, usedAliases))
-      attrToAlias.insert({attr, {alias, kNonAttrKindAlias}});
-  }
-
-  // Clear the set of used identifiers as types can have the same identifiers as
-  // affine structures.
-  usedAliases.clear();
-
-  // Type aliases.
-  for (auto &typeAliasPair : typeAliases)
-    if (canRegisterAlias(typeAliasPair.second, usedAliases))
-      typeToAlias.insert(typeAliasPair);
-}
-
-void ModuleState::initialize(Operation *op) {
-  // Initialize the symbol aliases.
-  initializeSymbolAliases();
-
-  // Visit each of the nested operations.
-  op->walk([&](Operation *op) { visitOperation(op); });
-}
-
-//===----------------------------------------------------------------------===//
-// ModulePrinter
-//===----------------------------------------------------------------------===//
-
-namespace {
-class ModulePrinter {
-public:
-  ModulePrinter(raw_ostream &os, OpPrintingFlags flags = llvm::None,
-                ModuleState *state = nullptr)
-      : os(os), printerFlags(flags), state(state) {}
-  explicit ModulePrinter(ModulePrinter &printer)
-      : os(printer.os), printerFlags(printer.printerFlags),
-        state(printer.state) {}
-
-  /// Returns the output stream of the printer.
-  raw_ostream &getStream() { return os; }
-
-  template <typename Container, typename UnaryFunctor>
-  inline void interleaveComma(const Container &c, UnaryFunctor each_fn) const {
-    mlir::interleaveComma(c, os, each_fn);
-  }
-
-  void print(ModuleOp module);
-
-  /// Print the given attribute. If 'mayElideType' is true, some attributes are
-  /// printed without the type when the type matches the default used in the
-  /// parser (for example i64 is the default for integer attributes).
-  void printAttribute(Attribute attr, bool mayElideType = false);
-
-  void printType(Type type);
-  void printLocation(LocationAttr loc);
-
-  void printAffineMap(AffineMap map);
-  void printAffineExpr(
-      AffineExpr expr,
-      llvm::function_ref<void(unsigned, bool)> printValueName = nullptr);
-  void printAffineConstraint(AffineExpr expr, bool isEq);
-  void printIntegerSet(IntegerSet set);
-
-protected:
-  void printOptionalAttrDict(ArrayRef<NamedAttribute> attrs,
-                             ArrayRef<StringRef> elidedAttrs = {},
-                             bool withKeyword = false);
-  void printTrailingLocation(Location loc);
-  void printLocationInternal(LocationAttr loc, bool pretty = false);
-  void printDenseElementsAttr(DenseElementsAttr attr);
-
-  void printDialectAttribute(Attribute attr);
-  void printDialectType(Type type);
-
-  /// This enum is used to represent the binding strength of the enclosing
-  /// context that an AffineExprStorage is being printed in, so we can
-  /// intelligently produce parens.
-  enum class BindingStrength {
-    Weak,   // + and -
-    Strong, // All other binary operators.
-  };
-  void printAffineExprInternal(
-      AffineExpr expr, BindingStrength enclosingTightness,
-      llvm::function_ref<void(unsigned, bool)> printValueName = nullptr);
-
-  /// The output stream for the printer.
-  raw_ostream &os;
-
-  /// A set of flags to control the printer's behavior.
-  OpPrintingFlags printerFlags;
-
-  /// An optional printer state for the module.
-  ModuleState *state;
-};
-} // end anonymous namespace
-
-void ModulePrinter::printTrailingLocation(Location loc) {
-  // Check to see if we are printing debug information.
-  if (!printerFlags.shouldPrintDebugInfo())
-    return;
-
-  os << " ";
-  printLocation(loc);
-}
-
-void ModulePrinter::printLocationInternal(LocationAttr loc, bool pretty) {
-  switch (loc.getKind()) {
-  case StandardAttributes::OpaqueLocation:
-    printLocationInternal(loc.cast<OpaqueLoc>().getFallbackLocation(), pretty);
-    break;
-  case StandardAttributes::UnknownLocation:
-    if (pretty)
-      os << "[unknown]";
-    else
-      os << "unknown";
-    break;
-  case StandardAttributes::FileLineColLocation: {
-    auto fileLoc = loc.cast<FileLineColLoc>();
-    auto mayQuote = pretty ? "" : "\"";
-    os << mayQuote << fileLoc.getFilename() << mayQuote << ':'
-       << fileLoc.getLine() << ':' << fileLoc.getColumn();
-    break;
-  }
-  case StandardAttributes::NameLocation: {
-    auto nameLoc = loc.cast<NameLoc>();
-    os << '\"' << nameLoc.getName() << '\"';
-
-    // Print the child if it isn't unknown.
-    auto childLoc = nameLoc.getChildLoc();
-    if (!childLoc.isa<UnknownLoc>()) {
-      os << '(';
-      printLocationInternal(childLoc, pretty);
-      os << ')';
-    }
-    break;
-  }
-  case StandardAttributes::CallSiteLocation: {
-    auto callLocation = loc.cast<CallSiteLoc>();
-    auto caller = callLocation.getCaller();
-    auto callee = callLocation.getCallee();
-    if (!pretty)
-      os << "callsite(";
-    printLocationInternal(callee, pretty);
-    if (pretty) {
-      if (callee.isa<NameLoc>()) {
-        if (caller.isa<FileLineColLoc>()) {
-          os << " at ";
-        } else {
-          os << "\n at ";
-        }
-      } else {
-        os << "\n at ";
-      }
-    } else {
-      os << " at ";
-    }
-    printLocationInternal(caller, pretty);
-    if (!pretty)
-      os << ")";
-    break;
-  }
-  case StandardAttributes::FusedLocation: {
-    auto fusedLoc = loc.cast<FusedLoc>();
-    if (!pretty)
-      os << "fused";
-    if (auto metadata = fusedLoc.getMetadata())
-      os << '<' << metadata << '>';
-    os << '[';
-    interleave(
-        fusedLoc.getLocations(),
-        [&](Location loc) { printLocationInternal(loc, pretty); },
-        [&]() { os << ", "; });
-    os << ']';
-    break;
-  }
-  }
-}
-
-/// Print a floating point value in a way that the parser will be able to
-/// round-trip losslessly.
-static void printFloatValue(const APFloat &apValue, raw_ostream &os) {
-  // We would like to output the FP constant value in exponential notation,
-  // but we cannot do this if doing so will lose precision.  Check here to
-  // make sure that we only output it in exponential format if we can parse
-  // the value back and get the same value.
-  bool isInf = apValue.isInfinity();
-  bool isNaN = apValue.isNaN();
-  if (!isInf && !isNaN) {
-    SmallString<128> strValue;
-    apValue.toString(strValue, 6, 0, false);
-
-    // Check to make sure that the stringized number is not some string like
-    // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
-    // that the string matches the "[-+]?[0-9]" regex.
-    assert(((strValue[0] >= '0' && strValue[0] <= '9') ||
-            ((strValue[0] == '-' || strValue[0] == '+') &&
-             (strValue[1] >= '0' && strValue[1] <= '9'))) &&
-           "[-+]?[0-9] regex does not match!");
-
-    // Parse back the stringized version and check that the value is equal
-    // (i.e., there is no precision loss). If it is not, use the default format
-    // of APFloat instead of the exponential notation.
-    if (!APFloat(apValue.getSemantics(), strValue).bitwiseIsEqual(apValue)) {
-      strValue.clear();
-      apValue.toString(strValue);
-    }
-    os << strValue;
-    return;
-  }
-
-  // Print special values in hexadecimal format.  The sign bit should be
-  // included in the literal.
-  SmallVector<char, 16> str;
-  APInt apInt = apValue.bitcastToAPInt();
-  apInt.toString(str, /*Radix=*/16, /*Signed=*/false,
-                 /*formatAsCLiteral=*/true);
-  os << str;
-}
-
-void ModulePrinter::printLocation(LocationAttr loc) {
-  if (printerFlags.shouldPrintDebugInfoPrettyForm()) {
-    printLocationInternal(loc, /*pretty=*/true);
-  } else {
-    os << "loc(";
-    printLocationInternal(loc);
-    os << ')';
-  }
-}
-
-/// Returns if the given dialect symbol data is simple enough to print in the
-/// pretty form, i.e. without the enclosing "".
-static bool isDialectSymbolSimpleEnoughForPrettyForm(StringRef symName) {
-  // The name must start with an identifier.
-  if (symName.empty() || !isalpha(symName.front()))
-    return false;
-
-  // Ignore all the characters that are valid in an identifier in the symbol
-  // name.
-  symName = symName.drop_while(
-      [](char c) { return llvm::isAlnum(c) || c == '.' || c == '_'; });
-  if (symName.empty())
-    return true;
-
-  // If we got to an unexpected character, then it must be a <>.  Check those
-  // recursively.
-  if (symName.front() != '<' || symName.back() != '>')
-    return false;
-
-  SmallVector<char, 8> nestedPunctuation;
-  do {
-    // If we ran out of characters, then we had a punctuation mismatch.
-    if (symName.empty())
-      return false;
-
-    auto c = symName.front();
-    symName = symName.drop_front();
-
-    switch (c) {
-    // We never allow null characters. This is an EOF indicator for the lexer
-    // which we could handle, but isn't important for any known dialect.
-    case '\0':
-      return false;
-    case '<':
-    case '[':
-    case '(':
-    case '{':
-      nestedPunctuation.push_back(c);
-      continue;
-    case '-':
-      // Treat `->` as a special token.
-      if (!symName.empty() && symName.front() == '>') {
-        symName = symName.drop_front();
-        continue;
-      }
-      break;
-    // Reject types with mismatched brackets.
-    case '>':
-      if (nestedPunctuation.pop_back_val() != '<')
-        return false;
-      break;
-    case ']':
-      if (nestedPunctuation.pop_back_val() != '[')
-        return false;
-      break;
-    case ')':
-      if (nestedPunctuation.pop_back_val() != '(')
-        return false;
-      break;
-    case '}':
-      if (nestedPunctuation.pop_back_val() != '{')
-        return false;
-      break;
-    default:
-      continue;
-    }
-
-    // We're done when the punctuation is fully matched.
-  } while (!nestedPunctuation.empty());
-
-  // If there were extra characters, then we failed.
-  return symName.empty();
-}
-
-/// Print the given dialect symbol to the stream.
-static void printDialectSymbol(raw_ostream &os, StringRef symPrefix,
-                               StringRef dialectName, StringRef symString) {
-  os << symPrefix << dialectName;
-
-  // If this symbol name is simple enough, print it directly in pretty form,
-  // otherwise, we print it as an escaped string.
-  if (isDialectSymbolSimpleEnoughForPrettyForm(symString)) {
-    os << '.' << symString;
-    return;
-  }
-
-  // TODO: escape the symbol name, it could contain " characters.
-  os << "<\"" << symString << "\">";
-}
-
-/// Returns if the given string can be represented as a bare identifier.
-static bool isBareIdentifier(StringRef name) {
-  assert(!name.empty() && "invalid name");
-
-  // By making this unsigned, the value passed in to isalnum will always be
-  // in the range 0-255. This is important when building with MSVC because
-  // its implementation will assert. This situation can arise when dealing
-  // with UTF-8 multibyte characters.
-  unsigned char firstChar = static_cast<unsigned char>(name[0]);
-  if (!isalpha(firstChar) && firstChar != '_')
-    return false;
-  return llvm::all_of(name.drop_front(), [](unsigned char c) {
-    return isalnum(c) || c == '_' || c == '$' || c == '.';
-  });
-}
-
-/// Print the given string as a symbol reference. A symbol reference is
-/// represented as a string prefixed with '@'. The reference is surrounded with
-/// ""'s and escaped if it has any special or non-printable characters in it.
-static void printSymbolReference(StringRef symbolRef, raw_ostream &os) {
-  assert(!symbolRef.empty() && "expected valid symbol reference");
-
-  // If the symbol can be represented as a bare identifier, write it directly.
-  if (isBareIdentifier(symbolRef)) {
-    os << '@' << symbolRef;
-    return;
-  }
-
-  // Otherwise, output the reference wrapped in quotes with proper escaping.
-  os << "@\"";
-  printEscapedString(symbolRef, os);
-  os << '"';
-}
-
-// Print out a valid ElementsAttr that is succinct and can represent any
-// potential shape/type, for use when eliding a large ElementsAttr.
-//
-// We choose to use an opaque ElementsAttr literal with conspicuous content to
-// hopefully alert readers to the fact that this has been elided.
-//
-// Unfortunately, neither of the strings of an opaque ElementsAttr literal will
-// accept the string "elided". The first string must be a registered dialect
-// name and the latter must be a hex constant.
-static void printElidedElementsAttr(raw_ostream &os) {
-  os << R"(opaque<"", "0xDEADBEEF">)";
-}
-
-void ModulePrinter::printAttribute(Attribute attr, bool mayElideType) {
-  if (!attr) {
-    os << "<<NULL ATTRIBUTE>>";
-    return;
-  }
-
-  // Check for an alias for this attribute.
-  if (state) {
-    Twine alias = state->getAttributeAlias(attr);
-    if (!alias.isTriviallyEmpty()) {
-      os << '#' << alias;
-      return;
-    }
-  }
-
-  switch (attr.getKind()) {
-  default:
-    return printDialectAttribute(attr);
-
-  case StandardAttributes::Opaque: {
-    auto opaqueAttr = attr.cast<OpaqueAttr>();
-    printDialectSymbol(os, "#", opaqueAttr.getDialectNamespace(),
-                       opaqueAttr.getAttrData());
-    break;
-  }
-  case StandardAttributes::Unit:
-    os << "unit";
-    break;
-  case StandardAttributes::Bool:
-    os << (attr.cast<BoolAttr>().getValue() ? "true" : "false");
-
-    // BoolAttr always elides the type.
-    return;
-  case StandardAttributes::Dictionary:
-    os << '{';
-    interleaveComma(attr.cast<DictionaryAttr>().getValue(),
-                    [&](NamedAttribute attr) {
-                      os << attr.first;
-
-                      // The value of a UnitAttr is elided within a dictionary.
-                      if (attr.second.isa<UnitAttr>())
-                        return;
-
-                      os << " = ";
-                      printAttribute(attr.second);
-                    });
-    os << '}';
-    break;
-  case StandardAttributes::Integer: {
-    auto intAttr = attr.cast<IntegerAttr>();
-    // Print all integer attributes as signed unless i1.
-    bool isSigned = intAttr.getType().isIndex() ||
-                    intAttr.getType().getIntOrFloatBitWidth() != 1;
-    intAttr.getValue().print(os, isSigned);
-
-    // IntegerAttr elides the type if I64.
-    if (mayElideType && intAttr.getType().isInteger(64))
-      return;
-    break;
-  }
-  case StandardAttributes::Float: {
-    auto floatAttr = attr.cast<FloatAttr>();
-    printFloatValue(floatAttr.getValue(), os);
-
-    // FloatAttr elides the type if F64.
-    if (mayElideType && floatAttr.getType().isF64())
-      return;
-    break;
-  }
-  case StandardAttributes::String:
-    os << '"';
-    printEscapedString(attr.cast<StringAttr>().getValue(), os);
-    os << '"';
-    break;
-  case StandardAttributes::Array:
-    os << '[';
-    interleaveComma(attr.cast<ArrayAttr>().getValue(), [&](Attribute attr) {
-      printAttribute(attr, /*mayElideType=*/true);
-    });
-    os << ']';
-    break;
-  case StandardAttributes::AffineMap:
-    attr.cast<AffineMapAttr>().getValue().print(os);
-
-    // AffineMap always elides the type.
-    return;
-  case StandardAttributes::IntegerSet:
-    attr.cast<IntegerSetAttr>().getValue().print(os);
-    break;
-  case StandardAttributes::Type:
-    printType(attr.cast<TypeAttr>().getValue());
-    break;
-  case StandardAttributes::SymbolRef: {
-    auto refAttr = attr.dyn_cast<SymbolRefAttr>();
-    printSymbolReference(refAttr.getRootReference(), os);
-    for (FlatSymbolRefAttr nestedRef : refAttr.getNestedReferences()) {
-      os << "::";
-      printSymbolReference(nestedRef.getValue(), os);
-    }
-    break;
-  }
-  case StandardAttributes::OpaqueElements: {
-    auto eltsAttr = attr.cast<OpaqueElementsAttr>();
-    if (printerFlags.shouldElideElementsAttr(eltsAttr)) {
-      printElidedElementsAttr(os);
-      break;
-    }
-    os << "opaque<\"" << eltsAttr.getDialect()->getNamespace() << "\", ";
-    os << '"' << "0x" << llvm::toHex(eltsAttr.getValue()) << "\">";
-    break;
-  }
-  case StandardAttributes::DenseElements: {
-    auto eltsAttr = attr.cast<DenseElementsAttr>();
-    if (printerFlags.shouldElideElementsAttr(eltsAttr)) {
-      printElidedElementsAttr(os);
-      break;
-    }
-    os << "dense<";
-    printDenseElementsAttr(eltsAttr);
-    os << '>';
-    break;
-  }
-  case StandardAttributes::SparseElements: {
-    auto elementsAttr = attr.cast<SparseElementsAttr>();
-    if (printerFlags.shouldElideElementsAttr(elementsAttr.getIndices()) ||
-        printerFlags.shouldElideElementsAttr(elementsAttr.getValues())) {
-      printElidedElementsAttr(os);
-      break;
-    }
-    os << "sparse<";
-    printDenseElementsAttr(elementsAttr.getIndices());
-    os << ", ";
-    printDenseElementsAttr(elementsAttr.getValues());
-    os << '>';
-    break;
-  }
-
-  // Location attributes.
-  case StandardAttributes::CallSiteLocation:
-  case StandardAttributes::FileLineColLocation:
-  case StandardAttributes::FusedLocation:
-  case StandardAttributes::NameLocation:
-  case StandardAttributes::OpaqueLocation:
-  case StandardAttributes::UnknownLocation:
-    printLocation(attr.cast<LocationAttr>());
-    break;
-  }
-
-  // Print the type if it isn't a 'none' type.
-  auto attrType = attr.getType();
-  if (!attrType.isa<NoneType>()) {
-    os << " : ";
-    printType(attrType);
-  }
-}
-
-/// Print the integer element of the given DenseElementsAttr at 'index'.
-static void printDenseIntElement(DenseElementsAttr attr, raw_ostream &os,
-                                 unsigned index) {
-  APInt value = *std::next(attr.int_value_begin(), index);
-  if (value.getBitWidth() == 1)
-    os << (value.getBoolValue() ? "true" : "false");
-  else
-    value.print(os, /*isSigned=*/true);
-}
-
-/// Print the float element of the given DenseElementsAttr at 'index'.
-static void printDenseFloatElement(DenseElementsAttr attr, raw_ostream &os,
-                                   unsigned index) {
-  APFloat value = *std::next(attr.float_value_begin(), index);
-  printFloatValue(value, os);
-}
-
-void ModulePrinter::printDenseElementsAttr(DenseElementsAttr attr) {
-  auto type = attr.getType();
-  auto shape = type.getShape();
-  auto rank = type.getRank();
-
-  // The function used to print elements of this attribute.
-  auto printEltFn = type.getElementType().isa<IntegerType>()
-                        ? printDenseIntElement
-                        : printDenseFloatElement;
-
-  // Special case for 0-d and splat tensors.
-  if (attr.isSplat()) {
-    printEltFn(attr, os, 0);
-    return;
-  }
-
-  // Special case for degenerate tensors.
-  auto numElements = type.getNumElements();
-  if (numElements == 0) {
-    for (int i = 0; i < rank; ++i)
-      os << '[';
-    for (int i = 0; i < rank; ++i)
-      os << ']';
-    return;
-  }
-
-  // We use a mixed-radix counter to iterate through the shape. When we bump a
-  // non-least-significant digit, we emit a close bracket. When we next emit an
-  // element we re-open all closed brackets.
-
-  // The mixed-radix counter, with radices in 'shape'.
-  SmallVector<unsigned, 4> counter(rank, 0);
-  // The number of brackets that have been opened and not closed.
-  unsigned openBrackets = 0;
-
-  auto bumpCounter = [&]() {
-    // Bump the least significant digit.
-    ++counter[rank - 1];
-    // Iterate backwards bubbling back the increment.
-    for (unsigned i = rank - 1; i > 0; --i)
-      if (counter[i] >= shape[i]) {
-        // Index 'i' is rolled over. Bump (i-1) and close a bracket.
-        counter[i] = 0;
-        ++counter[i - 1];
-        --openBrackets;
-        os << ']';
-      }
-  };
-
-  for (unsigned idx = 0, e = numElements; idx != e; ++idx) {
-    if (idx != 0)
-      os << ", ";
-    while (openBrackets++ < rank)
-      os << '[';
-    openBrackets = rank;
-    printEltFn(attr, os, idx);
-    bumpCounter();
-  }
-  while (openBrackets-- > 0)
-    os << ']';
-}
-
-void ModulePrinter::printType(Type type) {
-  // Check for an alias for this type.
-  if (state) {
-    StringRef alias = state->getTypeAlias(type);
-    if (!alias.empty()) {
-      os << '!' << alias;
-      return;
-    }
-  }
-
-  switch (type.getKind()) {
-  default:
-    return printDialectType(type);
-
-  case Type::Kind::Opaque: {
-    auto opaqueTy = type.cast<OpaqueType>();
-    printDialectSymbol(os, "!", opaqueTy.getDialectNamespace(),
-                       opaqueTy.getTypeData());
-    return;
-  }
-  case StandardTypes::Index:
-    os << "index";
-    return;
-  case StandardTypes::BF16:
-    os << "bf16";
-    return;
-  case StandardTypes::F16:
-    os << "f16";
-    return;
-  case StandardTypes::F32:
-    os << "f32";
-    return;
-  case StandardTypes::F64:
-    os << "f64";
-    return;
-
-  case StandardTypes::Integer: {
-    auto integer = type.cast<IntegerType>();
-    os << 'i' << integer.getWidth();
-    return;
-  }
-  case Type::Kind::Function: {
-    auto func = type.cast<FunctionType>();
-    os << '(';
-    interleaveComma(func.getInputs(), [&](Type type) { printType(type); });
-    os << ") -> ";
-    auto results = func.getResults();
-    if (results.size() == 1 && !results[0].isa<FunctionType>())
-      os << results[0];
-    else {
-      os << '(';
-      interleaveComma(results, [&](Type type) { printType(type); });
-      os << ')';
-    }
-    return;
-  }
-  case StandardTypes::Vector: {
-    auto v = type.cast<VectorType>();
-    os << "vector<";
-    for (auto dim : v.getShape())
-      os << dim << 'x';
-    os << v.getElementType() << '>';
-    return;
-  }
-  case StandardTypes::RankedTensor: {
-    auto v = type.cast<RankedTensorType>();
-    os << "tensor<";
-    for (auto dim : v.getShape()) {
-      if (dim < 0)
-        os << '?';
-      else
-        os << dim;
-      os << 'x';
-    }
-    os << v.getElementType() << '>';
-    return;
-  }
-  case StandardTypes::UnrankedTensor: {
-    auto v = type.cast<UnrankedTensorType>();
-    os << "tensor<*x";
-    printType(v.getElementType());
-    os << '>';
-    return;
-  }
-  case StandardTypes::MemRef: {
-    auto v = type.cast<MemRefType>();
-    os << "memref<";
-    for (auto dim : v.getShape()) {
-      if (dim < 0)
-        os << '?';
-      else
-        os << dim;
-      os << 'x';
-    }
-    printType(v.getElementType());
-    for (auto map : v.getAffineMaps()) {
-      os << ", ";
-      printAttribute(AffineMapAttr::get(map));
-    }
-    // Only print the memory space if it is the non-default one.
-    if (v.getMemorySpace())
-      os << ", " << v.getMemorySpace();
-    os << '>';
-    return;
-  }
-  case StandardTypes::UnrankedMemRef: {
-    auto v = type.cast<UnrankedMemRefType>();
-    os << "memref<*x";
-    printType(v.getElementType());
-    os << '>';
-    return;
-  }
-  case StandardTypes::Complex:
-    os << "complex<";
-    printType(type.cast<ComplexType>().getElementType());
-    os << '>';
-    return;
-  case StandardTypes::Tuple: {
-    auto tuple = type.cast<TupleType>();
-    os << "tuple<";
-    interleaveComma(tuple.getTypes(), [&](Type type) { printType(type); });
-    os << '>';
-    return;
-  }
-  case StandardTypes::None:
-    os << "none";
-    return;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// CustomDialectAsmPrinter
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This class provides the main specialization of the DialectAsmPrinter that is
-/// used to provide support for print attributes and types. This hooks allows
-/// for dialects to hook into the main ModulePrinter.
-struct CustomDialectAsmPrinter : public DialectAsmPrinter {
-public:
-  CustomDialectAsmPrinter(ModulePrinter &printer) : printer(printer) {}
-  ~CustomDialectAsmPrinter() override {}
-
-  raw_ostream &getStream() const override { return printer.getStream(); }
-
-  /// Print the given attribute to the stream.
-  void printAttribute(Attribute attr) override { printer.printAttribute(attr); }
-
-  /// Print the given floating point value in a stablized form.
-  void printFloat(const APFloat &value) override {
-    printFloatValue(value, getStream());
-  }
-
-  /// Print the given type to the stream.
-  void printType(Type type) override { printer.printType(type); }
-
-  /// The main module printer.
-  ModulePrinter &printer;
-};
-} // end anonymous namespace
-
-void ModulePrinter::printDialectAttribute(Attribute attr) {
-  auto &dialect = attr.getDialect();
-
-  // Ask the dialect to serialize the attribute to a string.
-  std::string attrName;
-  {
-    llvm::raw_string_ostream attrNameStr(attrName);
-    ModulePrinter subPrinter(attrNameStr, printerFlags, state);
-    CustomDialectAsmPrinter printer(subPrinter);
-    dialect.printAttribute(attr, printer);
-  }
-  printDialectSymbol(os, "#", dialect.getNamespace(), attrName);
-}
-
-void ModulePrinter::printDialectType(Type type) {
-  auto &dialect = type.getDialect();
-
-  // Ask the dialect to serialize the type to a string.
-  std::string typeName;
-  {
-    llvm::raw_string_ostream typeNameStr(typeName);
-    ModulePrinter subPrinter(typeNameStr, printerFlags, state);
-    CustomDialectAsmPrinter printer(subPrinter);
-    dialect.printType(type, printer);
-  }
-  printDialectSymbol(os, "!", dialect.getNamespace(), typeName);
-}
-
-//===----------------------------------------------------------------------===//
-// Affine expressions and maps
-//===----------------------------------------------------------------------===//
-
-void ModulePrinter::printAffineExpr(
-    AffineExpr expr, llvm::function_ref<void(unsigned, bool)> printValueName) {
-  printAffineExprInternal(expr, BindingStrength::Weak, printValueName);
-}
-
-void ModulePrinter::printAffineExprInternal(
-    AffineExpr expr, BindingStrength enclosingTightness,
-    llvm::function_ref<void(unsigned, bool)> printValueName) {
-  const char *binopSpelling = nullptr;
-  switch (expr.getKind()) {
-  case AffineExprKind::SymbolId: {
-    unsigned pos = expr.cast<AffineSymbolExpr>().getPosition();
-    if (printValueName)
-      printValueName(pos, /*isSymbol=*/true);
-    else
-      os << 's' << pos;
-    return;
-  }
-  case AffineExprKind::DimId: {
-    unsigned pos = expr.cast<AffineDimExpr>().getPosition();
-    if (printValueName)
-      printValueName(pos, /*isSymbol=*/false);
-    else
-      os << 'd' << pos;
-    return;
-  }
-  case AffineExprKind::Constant:
-    os << expr.cast<AffineConstantExpr>().getValue();
-    return;
-  case AffineExprKind::Add:
-    binopSpelling = " + ";
-    break;
-  case AffineExprKind::Mul:
-    binopSpelling = " * ";
-    break;
-  case AffineExprKind::FloorDiv:
-    binopSpelling = " floordiv ";
-    break;
-  case AffineExprKind::CeilDiv:
-    binopSpelling = " ceildiv ";
-    break;
-  case AffineExprKind::Mod:
-    binopSpelling = " mod ";
-    break;
-  }
-
-  auto binOp = expr.cast<AffineBinaryOpExpr>();
-  AffineExpr lhsExpr = binOp.getLHS();
-  AffineExpr rhsExpr = binOp.getRHS();
-
-  // Handle tightly binding binary operators.
-  if (binOp.getKind() != AffineExprKind::Add) {
-    if (enclosingTightness == BindingStrength::Strong)
-      os << '(';
-
-    // Pretty print multiplication with -1.
-    auto rhsConst = rhsExpr.dyn_cast<AffineConstantExpr>();
-    if (rhsConst && binOp.getKind() == AffineExprKind::Mul &&
-        rhsConst.getValue() == -1) {
-      os << "-";
-      printAffineExprInternal(lhsExpr, BindingStrength::Strong, printValueName);
-      if (enclosingTightness == BindingStrength::Strong)
-        os << ')';
-      return;
-    }
-
-    printAffineExprInternal(lhsExpr, BindingStrength::Strong, printValueName);
-
-    os << binopSpelling;
-    printAffineExprInternal(rhsExpr, BindingStrength::Strong, printValueName);
-
-    if (enclosingTightness == BindingStrength::Strong)
-      os << ')';
-    return;
-  }
-
-  // Print out special "pretty" forms for add.
-  if (enclosingTightness == BindingStrength::Strong)
-    os << '(';
-
-  // Pretty print addition to a product that has a negative operand as a
-  // subtraction.
-  if (auto rhs = rhsExpr.dyn_cast<AffineBinaryOpExpr>()) {
-    if (rhs.getKind() == AffineExprKind::Mul) {
-      AffineExpr rrhsExpr = rhs.getRHS();
-      if (auto rrhs = rrhsExpr.dyn_cast<AffineConstantExpr>()) {
-        if (rrhs.getValue() == -1) {
-          printAffineExprInternal(lhsExpr, BindingStrength::Weak,
-                                  printValueName);
-          os << " - ";
-          if (rhs.getLHS().getKind() == AffineExprKind::Add) {
-            printAffineExprInternal(rhs.getLHS(), BindingStrength::Strong,
-                                    printValueName);
-          } else {
-            printAffineExprInternal(rhs.getLHS(), BindingStrength::Weak,
-                                    printValueName);
-          }
-
-          if (enclosingTightness == BindingStrength::Strong)
-            os << ')';
-          return;
-        }
-
-        if (rrhs.getValue() < -1) {
-          printAffineExprInternal(lhsExpr, BindingStrength::Weak,
-                                  printValueName);
-          os << " - ";
-          printAffineExprInternal(rhs.getLHS(), BindingStrength::Strong,
-                                  printValueName);
-          os << " * " << -rrhs.getValue();
-          if (enclosingTightness == BindingStrength::Strong)
-            os << ')';
-          return;
-        }
-      }
-    }
-  }
-
-  // Pretty print addition to a negative number as a subtraction.
-  if (auto rhsConst = rhsExpr.dyn_cast<AffineConstantExpr>()) {
-    if (rhsConst.getValue() < 0) {
-      printAffineExprInternal(lhsExpr, BindingStrength::Weak, printValueName);
-      os << " - " << -rhsConst.getValue();
-      if (enclosingTightness == BindingStrength::Strong)
-        os << ')';
-      return;
-    }
-  }
-
-  printAffineExprInternal(lhsExpr, BindingStrength::Weak, printValueName);
-
-  os << " + ";
-  printAffineExprInternal(rhsExpr, BindingStrength::Weak, printValueName);
-
-  if (enclosingTightness == BindingStrength::Strong)
-    os << ')';
-}
-
-void ModulePrinter::printAffineConstraint(AffineExpr expr, bool isEq) {
-  printAffineExprInternal(expr, BindingStrength::Weak);
-  isEq ? os << " == 0" : os << " >= 0";
-}
-
-void ModulePrinter::printAffineMap(AffineMap map) {
-  // Dimension identifiers.
-  os << '(';
-  for (int i = 0; i < (int)map.getNumDims() - 1; ++i)
-    os << 'd' << i << ", ";
-  if (map.getNumDims() >= 1)
-    os << 'd' << map.getNumDims() - 1;
-  os << ')';
-
-  // Symbolic identifiers.
-  if (map.getNumSymbols() != 0) {
-    os << '[';
-    for (unsigned i = 0; i < map.getNumSymbols() - 1; ++i)
-      os << 's' << i << ", ";
-    if (map.getNumSymbols() >= 1)
-      os << 's' << map.getNumSymbols() - 1;
-    os << ']';
-  }
-
-  // Result affine expressions.
-  os << " -> (";
-  interleaveComma(map.getResults(),
-                  [&](AffineExpr expr) { printAffineExpr(expr); });
-  os << ')';
-}
-
-void ModulePrinter::printIntegerSet(IntegerSet set) {
-  // Dimension identifiers.
-  os << '(';
-  for (unsigned i = 1; i < set.getNumDims(); ++i)
-    os << 'd' << i - 1 << ", ";
-  if (set.getNumDims() >= 1)
-    os << 'd' << set.getNumDims() - 1;
-  os << ')';
-
-  // Symbolic identifiers.
-  if (set.getNumSymbols() != 0) {
-    os << '[';
-    for (unsigned i = 0; i < set.getNumSymbols() - 1; ++i)
-      os << 's' << i << ", ";
-    if (set.getNumSymbols() >= 1)
-      os << 's' << set.getNumSymbols() - 1;
-    os << ']';
-  }
-
-  // Print constraints.
-  os << " : (";
-  int numConstraints = set.getNumConstraints();
-  for (int i = 1; i < numConstraints; ++i) {
-    printAffineConstraint(set.getConstraint(i - 1), set.isEq(i - 1));
-    os << ", ";
-  }
-  if (numConstraints >= 1)
-    printAffineConstraint(set.getConstraint(numConstraints - 1),
-                          set.isEq(numConstraints - 1));
-  os << ')';
-}
-
-//===----------------------------------------------------------------------===//
-// Operation printing
-//===----------------------------------------------------------------------===//
-
-void ModulePrinter::printOptionalAttrDict(ArrayRef<NamedAttribute> attrs,
-                                          ArrayRef<StringRef> elidedAttrs,
-                                          bool withKeyword) {
-  // If there are no attributes, then there is nothing to be done.
-  if (attrs.empty())
-    return;
-
-  // Filter out any attributes that shouldn't be included.
-  SmallVector<NamedAttribute, 8> filteredAttrs(
-      llvm::make_filter_range(attrs, [&](NamedAttribute attr) {
-        return !llvm::is_contained(elidedAttrs, attr.first.strref());
-      }));
-
-  // If there are no attributes left to print after filtering, then we're done.
-  if (filteredAttrs.empty())
-    return;
-
-  // Print the 'attributes' keyword if necessary.
-  if (withKeyword)
-    os << " attributes";
-
-  // Otherwise, print them all out in braces.
-  os << " {";
-  interleaveComma(filteredAttrs, [&](NamedAttribute attr) {
-    os << attr.first;
-
-    // Pretty printing elides the attribute value for unit attributes.
-    if (attr.second.isa<UnitAttr>())
-      return;
-
-    os << " = ";
-    printAttribute(attr.second);
-  });
-  os << '}';
-}
-
-namespace {
-
-// OperationPrinter contains common functionality for printing operations.
-class OperationPrinter : public ModulePrinter, private OpAsmPrinter {
-public:
-  OperationPrinter(Operation *op, ModulePrinter &other);
-  OperationPrinter(Region *region, ModulePrinter &other);
-
-  // Methods to print operations.
-  void print(Operation *op);
-  void print(Block *block, bool printBlockArgs = true,
-             bool printBlockTerminator = true);
-
-  void printOperation(Operation *op);
-  void printGenericOp(Operation *op) override;
-
-  // Implement OpAsmPrinter.
-  raw_ostream &getStream() const override { return os; }
-  void printType(Type type) override { ModulePrinter::printType(type); }
-  void printAttribute(Attribute attr) override {
-    ModulePrinter::printAttribute(attr);
-  }
-  void printOperand(Value *value) override { printValueID(value); }
-
-  void printOptionalAttrDict(ArrayRef<NamedAttribute> attrs,
-                             ArrayRef<StringRef> elidedAttrs = {}) override {
-    ModulePrinter::printOptionalAttrDict(attrs, elidedAttrs);
-  }
-  void printOptionalAttrDictWithKeyword(
-      ArrayRef<NamedAttribute> attrs,
-      ArrayRef<StringRef> elidedAttrs = {}) override {
-    ModulePrinter::printOptionalAttrDict(attrs, elidedAttrs,
-                                         /*withKeyword=*/true);
-  }
-
-  enum { nameSentinel = ~0U };
-
-  void printBlockName(Block *block) {
-    auto id = getBlockID(block);
-    if (id != ~0U)
-      os << "^bb" << id;
-    else
-      os << "^INVALIDBLOCK";
-  }
-
-  unsigned getBlockID(Block *block) {
-    auto it = blockIDs.find(block);
-    return it != blockIDs.end() ? it->second : ~0U;
-  }
-
-  void printSuccessorAndUseList(Operation *term, unsigned index) override;
-
-  /// Print a region.
-  void printRegion(Region &blocks, bool printEntryBlockArgs,
-                   bool printBlockTerminators) override {
-    os << " {\n";
-    if (!blocks.empty()) {
-      auto *entryBlock = &blocks.front();
-      print(entryBlock,
-            printEntryBlockArgs && entryBlock->getNumArguments() != 0,
-            printBlockTerminators);
-      for (auto &b : llvm::drop_begin(blocks.getBlocks(), 1))
-        print(&b);
-    }
-    os.indent(currentIndent) << "}";
-  }
-
-  /// Renumber the arguments for the specified region to the same names as the
-  /// SSA values in namesToUse.  This may only be used for IsolatedFromAbove
-  /// operations.  If any entry in namesToUse is null, the corresponding
-  /// argument name is left alone.
-  void shadowRegionArgs(Region &region, ValueRange namesToUse) override;
-
-  void printAffineMapOfSSAIds(AffineMapAttr mapAttr,
-                              ValueRange operands) override {
-    AffineMap map = mapAttr.getValue();
-    unsigned numDims = map.getNumDims();
-    auto printValueName = [&](unsigned pos, bool isSymbol) {
-      unsigned index = isSymbol ? numDims + pos : pos;
-      assert(index < operands.size());
-      if (isSymbol)
-        os << "symbol(";
-      printValueID(operands[index]);
-      if (isSymbol)
-        os << ')';
-    };
-
-    interleaveComma(map.getResults(), [&](AffineExpr expr) {
-      printAffineExpr(expr, printValueName);
-    });
-  }
-
-  /// Print the given string as a symbol reference.
-  void printSymbolName(StringRef symbolRef) override {
-    ::printSymbolReference(symbolRef, os);
-  }
-
-  // Number of spaces used for indenting nested operations.
-  const static unsigned indentWidth = 2;
-
-protected:
-  void numberValuesInRegion(Region &region);
-  void numberValuesInBlock(Block &block);
-  void numberValuesInOp(Operation &op);
-  void printValueID(Value *value, bool printResultNo = true) const {
-    printValueIDImpl(value, printResultNo, os);
-  }
-
-private:
-  /// Given a result of an operation 'result', find the result group head
-  /// 'lookupValue' and the result of 'result' within that group in
-  /// 'lookupResultNo'. 'lookupResultNo' is only filled in if the result group
-  /// has more than 1 result.
-  void getResultIDAndNumber(OpResult *result, Value *&lookupValue,
-                            int &lookupResultNo) const;
-  void printValueIDImpl(Value *value, bool printResultNo,
-                        raw_ostream &stream) const;
-
-  /// Set a special value name for the given value.
-  void setValueName(Value *value, StringRef name);
-
-  /// Uniques the given value name within the printer. If the given name
-  /// conflicts, it is automatically renamed.
-  StringRef uniqueValueName(StringRef name);
-
-  /// This is the value ID for each SSA value. If this returns ~0, then the
-  /// valueID has an entry in valueNames.
-  DenseMap<Value *, unsigned> valueIDs;
-  DenseMap<Value *, StringRef> valueNames;
-
-  /// This is a map of operations that contain multiple named result groups,
-  /// i.e. there may be multiple names for the results of the operation. The key
-  /// of this map are the result numbers that start a result group.
-  DenseMap<Operation *, SmallVector<int, 1>> opResultGroups;
-
-  /// This is the block ID for each block in the current.
-  DenseMap<Block *, unsigned> blockIDs;
-
-  /// This keeps track of all of the non-numeric names that are in flight,
-  /// allowing us to check for duplicates.
-  /// Note: the value of the map is unused.
-  llvm::ScopedHashTable<StringRef, char> usedNames;
-  llvm::BumpPtrAllocator usedNameAllocator;
-
-  // This is the current indentation level for nested structures.
-  unsigned currentIndent = 0;
-
-  /// This is the next value ID to assign in numbering.
-  unsigned nextValueID = 0;
-  /// This is the next ID to assign to a region entry block argument.
-  unsigned nextArgumentID = 0;
-  /// This is the next ID to assign when a name conflict is detected.
-  unsigned nextConflictID = 0;
-};
-} // end anonymous namespace
-
-OperationPrinter::OperationPrinter(Operation *op, ModulePrinter &other)
-    : ModulePrinter(other) {
-  llvm::ScopedHashTable<StringRef, char>::ScopeTy usedNamesScope(usedNames);
-  numberValuesInOp(*op);
-
-  for (auto &region : op->getRegions())
-    numberValuesInRegion(region);
-}
-
-OperationPrinter::OperationPrinter(Region *region, ModulePrinter &other)
-    : ModulePrinter(other) {
-  numberValuesInRegion(*region);
-}
-
-void OperationPrinter::numberValuesInRegion(Region &region) {
-  // Save the current value ids to allow for numbering values in sibling regions
-  // the same.
-  unsigned curValueID = nextValueID;
-  unsigned curArgumentID = nextArgumentID;
-  unsigned curConflictID = nextConflictID;
-
-  // Push a new used names scope.
-  llvm::ScopedHashTable<StringRef, char>::ScopeTy usedNamesScope(usedNames);
-
-  // Number the values within this region in a breadth-first order.
-  unsigned nextBlockID = 0;
-  for (auto &block : region) {
-    // Each block gets a unique ID, and all of the operations within it get
-    // numbered as well.
-    blockIDs[&block] = nextBlockID++;
-    numberValuesInBlock(block);
-  }
-
-  // After that we traverse the nested regions.
-  // TODO: Rework this loop to not use recursion.
-  for (auto &block : region) {
-    for (auto &op : block)
-      for (auto &nestedRegion : op.getRegions())
-        numberValuesInRegion(nestedRegion);
-  }
-
-  // Restore the original value ids.
-  nextValueID = curValueID;
-  nextArgumentID = curArgumentID;
-  nextConflictID = curConflictID;
-}
-
-void OperationPrinter::numberValuesInBlock(Block &block) {
-  bool isEntryBlock = block.isEntryBlock();
-
-  // Number the block arguments. We give entry block arguments a special name
-  // 'arg'.
-  SmallString<32> specialNameBuffer(isEntryBlock ? "arg" : "");
-  llvm::raw_svector_ostream specialName(specialNameBuffer);
-  for (auto *arg : block.getArguments()) {
-    if (isEntryBlock) {
-      specialNameBuffer.resize(strlen("arg"));
-      specialName << nextArgumentID++;
-    }
-    setValueName(arg, specialName.str());
-  }
-
-  // Number the operations in this block.
-  for (auto &op : block)
-    numberValuesInOp(op);
-}
-
-void OperationPrinter::numberValuesInOp(Operation &op) {
-  unsigned numResults = op.getNumResults();
-  if (numResults == 0)
-    return;
-  Value *resultBegin = op.getResult(0);
-
-  // Function used to set the special result names for the operation.
-  SmallVector<int, 2> resultGroups(/*Size=*/1, /*Value=*/0);
-  auto setResultNameFn = [&](Value *result, StringRef name) {
-    assert(!valueIDs.count(result) && "result numbered multiple times");
-    assert(result->getDefiningOp() == &op && "result not defined by 'op'");
-    setValueName(result, name);
-
-    // Record the result number for groups not anchored at 0.
-    if (int resultNo = cast<OpResult>(result)->getResultNumber())
-      resultGroups.push_back(resultNo);
-  };
-
-  if (OpAsmOpInterface asmInterface = dyn_cast<OpAsmOpInterface>(&op)) {
-    asmInterface.getAsmResultNames(setResultNameFn);
-  } else if (auto *dialectAsmInterface =
-                 state ? state->getOpAsmInterface(op.getDialect()) : nullptr) {
-    dialectAsmInterface->getAsmResultNames(&op, setResultNameFn);
-  }
-
-  // If the first result wasn't numbered, give it a default number.
-  if (valueIDs.try_emplace(resultBegin, nextValueID).second)
-    ++nextValueID;
-
-  // If this operation has multiple result groups, mark it.
-  if (resultGroups.size() != 1) {
-    llvm::array_pod_sort(resultGroups.begin(), resultGroups.end());
-    opResultGroups.try_emplace(&op, std::move(resultGroups));
-  }
-}
-
-/// Set a special value name for the given value.
-void OperationPrinter::setValueName(Value *value, StringRef name) {
-  // If the name is empty, the value uses the default numbering.
-  if (name.empty()) {
-    valueIDs[value] = nextValueID++;
-    return;
-  }
-
-  valueIDs[value] = nameSentinel;
-  valueNames[value] = uniqueValueName(name);
-}
-
-/// Uniques the given value name within the printer. If the given name
-/// conflicts, it is automatically renamed.
-StringRef OperationPrinter::uniqueValueName(StringRef name) {
-  // Check to see if this name is already unique.
-  if (!usedNames.count(name)) {
-    name = name.copy(usedNameAllocator);
-  } else {
-    // Otherwise, we had a conflict - probe until we find a unique name. This
-    // is guaranteed to terminate (and usually in a single iteration) because it
-    // generates new names by incrementing nextConflictID.
-    SmallString<64> probeName(name);
-    probeName.push_back('_');
-    while (true) {
-      probeName.resize(name.size() + 1);
-      probeName += llvm::utostr(nextConflictID++);
-      if (!usedNames.count(probeName)) {
-        name = StringRef(probeName).copy(usedNameAllocator);
-        break;
-      }
-    }
-  }
-
-  usedNames.insert(name, char());
-  return name;
-}
-
-void OperationPrinter::print(Block *block, bool printBlockArgs,
-                             bool printBlockTerminator) {
-  // Print the block label and argument list if requested.
-  if (printBlockArgs) {
-    os.indent(currentIndent);
-    printBlockName(block);
-
-    // Print the argument list if non-empty.
-    if (!block->args_empty()) {
-      os << '(';
-      interleaveComma(block->getArguments(), [&](BlockArgument *arg) {
-        printValueID(arg);
-        os << ": ";
-        printType(arg->getType());
-      });
-      os << ')';
-    }
-    os << ':';
-
-    // Print out some context information about the predecessors of this block.
-    if (!block->getParent()) {
-      os << "\t// block is not in a region!";
-    } else if (block->hasNoPredecessors()) {
-      os << "\t// no predecessors";
-    } else if (auto *pred = block->getSinglePredecessor()) {
-      os << "\t// pred: ";
-      printBlockName(pred);
-    } else {
-      // We want to print the predecessors in increasing numeric order, not in
-      // whatever order the use-list is in, so gather and sort them.
-      SmallVector<std::pair<unsigned, Block *>, 4> predIDs;
-      for (auto *pred : block->getPredecessors())
-        predIDs.push_back({getBlockID(pred), pred});
-      llvm::array_pod_sort(predIDs.begin(), predIDs.end());
-
-      os << "\t// " << predIDs.size() << " preds: ";
-
-      interleaveComma(predIDs, [&](std::pair<unsigned, Block *> pred) {
-        printBlockName(pred.second);
-      });
-    }
-    os << '\n';
-  }
-
-  currentIndent += indentWidth;
-  auto range = llvm::make_range(
-      block->getOperations().begin(),
-      std::prev(block->getOperations().end(), printBlockTerminator ? 0 : 1));
-  for (auto &op : range) {
-    print(&op);
-    os << '\n';
-  }
-  currentIndent -= indentWidth;
-}
-
-void OperationPrinter::print(Operation *op) {
-  os.indent(currentIndent);
-  printOperation(op);
-  printTrailingLocation(op->getLoc());
-}
-
-void OperationPrinter::getResultIDAndNumber(OpResult *result,
-                                            Value *&lookupValue,
-                                            int &lookupResultNo) const {
-  Operation *owner = result->getOwner();
-  if (owner->getNumResults() == 1)
-    return;
-  int resultNo = result->getResultNumber();
-
-  // If this operation has multiple result groups, we will need to find the
-  // one corresponding to this result.
-  auto resultGroupIt = opResultGroups.find(owner);
-  if (resultGroupIt == opResultGroups.end()) {
-    // If not, just use the first result.
-    lookupResultNo = resultNo;
-    lookupValue = owner->getResult(0);
-    return;
-  }
-
-  // Find the correct index using a binary search, as the groups are ordered.
-  ArrayRef<int> resultGroups = resultGroupIt->second;
-  auto it = llvm::upper_bound(resultGroups, resultNo);
-  int groupResultNo = 0, groupSize = 0;
-
-  // If there are no smaller elements, the last result group is the lookup.
-  if (it == resultGroups.end()) {
-    groupResultNo = resultGroups.back();
-    groupSize = static_cast<int>(owner->getNumResults()) - resultGroups.back();
-  } else {
-    // Otherwise, the previous element is the lookup.
-    groupResultNo = *std::prev(it);
-    groupSize = *it - groupResultNo;
-  }
-
-  // We only record the result number for a group of size greater than 1.
-  if (groupSize != 1)
-    lookupResultNo = resultNo - groupResultNo;
-  lookupValue = owner->getResult(groupResultNo);
-}
-
-void OperationPrinter::printValueIDImpl(Value *value, bool printResultNo,
-                                        raw_ostream &stream) const {
-  if (!value) {
-    stream << "<<NULL>>";
-    return;
-  }
-
-  int resultNo = -1;
-  auto lookupValue = value;
-
-  // If this is a reference to the result of a multi-result operation or
-  // operation, print out the # identifier and make sure to map our lookup
-  // to the first result of the operation.
-  if (OpResult *result = dyn_cast<OpResult>(value))
-    getResultIDAndNumber(result, lookupValue, resultNo);
-
-  auto it = valueIDs.find(lookupValue);
-  if (it == valueIDs.end()) {
-    stream << "<<UNKNOWN SSA VALUE>>";
-    return;
-  }
-
-  stream << '%';
-  if (it->second != nameSentinel) {
-    stream << it->second;
-  } else {
-    auto nameIt = valueNames.find(lookupValue);
-    assert(nameIt != valueNames.end() && "Didn't have a name entry?");
-    stream << nameIt->second;
-  }
-
-  if (resultNo != -1 && printResultNo)
-    stream << '#' << resultNo;
-}
-
-/// Renumber the arguments for the specified region to the same names as the
-/// SSA values in namesToUse.  This may only be used for IsolatedFromAbove
-/// operations.  If any entry in namesToUse is null, the corresponding
-/// argument name is left alone.
-void OperationPrinter::shadowRegionArgs(Region &region, ValueRange namesToUse) {
-  assert(!region.empty() && "cannot shadow arguments of an empty region");
-  assert(region.front().getNumArguments() == namesToUse.size() &&
-         "incorrect number of names passed in");
-  assert(region.getParentOp()->isKnownIsolatedFromAbove() &&
-         "only KnownIsolatedFromAbove ops can shadow names");
-
-  SmallVector<char, 16> nameStr;
-  for (unsigned i = 0, e = namesToUse.size(); i != e; ++i) {
-    auto *nameToUse = namesToUse[i];
-    if (nameToUse == nullptr)
-      continue;
-
-    auto *nameToReplace = region.front().getArgument(i);
-
-    nameStr.clear();
-    llvm::raw_svector_ostream nameStream(nameStr);
-    printValueIDImpl(nameToUse, /*printResultNo=*/true, nameStream);
-
-    // Entry block arguments should already have a pretty "arg" name.
-    assert(valueIDs[nameToReplace] == nameSentinel);
-
-    // Use the name without the leading %.
-    auto name = StringRef(nameStream.str()).drop_front();
-
-    // Overwrite the name.
-    valueNames[nameToReplace] = name.copy(usedNameAllocator);
-  }
-}
-
-void OperationPrinter::printOperation(Operation *op) {
-  if (size_t numResults = op->getNumResults()) {
-    auto printResultGroup = [&](size_t resultNo, size_t resultCount) {
-      printValueID(op->getResult(resultNo), /*printResultNo=*/false);
-      if (resultCount > 1)
-        os << ':' << resultCount;
-    };
-
-    // Check to see if this operation has multiple result groups.
-    auto resultGroupIt = opResultGroups.find(op);
-    if (resultGroupIt != opResultGroups.end()) {
-      ArrayRef<int> resultGroups = resultGroupIt->second;
-      // Interleave the groups excluding the last one, this one will be handled
-      // separately.
-      interleaveComma(llvm::seq<int>(0, resultGroups.size() - 1), [&](int i) {
-        printResultGroup(resultGroups[i],
-                         resultGroups[i + 1] - resultGroups[i]);
-      });
-      os << ", ";
-      printResultGroup(resultGroups.back(), numResults - resultGroups.back());
-
-    } else {
-      printResultGroup(/*resultNo=*/0, /*resultCount=*/numResults);
-    }
-
-    os << " = ";
-  }
-
-  // TODO(riverriddle): FuncOp cannot be round-tripped currently, as
-  // FunctionType cannot be used in a TypeAttr.
-  if (printerFlags.shouldPrintGenericOpForm() && !isa<FuncOp>(op))
-    return printGenericOp(op);
-
-  // Check to see if this is a known operation.  If so, use the registered
-  // custom printer hook.
-  if (auto *opInfo = op->getAbstractOperation()) {
-    opInfo->printAssembly(op, *this);
-    return;
-  }
-
-  // Otherwise print with the generic assembly form.
-  printGenericOp(op);
-}
-
-void OperationPrinter::printGenericOp(Operation *op) {
-  os << '"';
-  printEscapedString(op->getName().getStringRef(), os);
-  os << "\"(";
-
-  // Get the list of operands that are not successor operands.
-  unsigned totalNumSuccessorOperands = 0;
-  unsigned numSuccessors = op->getNumSuccessors();
-  for (unsigned i = 0; i < numSuccessors; ++i)
-    totalNumSuccessorOperands += op->getNumSuccessorOperands(i);
-  unsigned numProperOperands = op->getNumOperands() - totalNumSuccessorOperands;
-  SmallVector<Value *, 8> properOperands(
-      op->operand_begin(), std::next(op->operand_begin(), numProperOperands));
-
-  interleaveComma(properOperands, [&](Value *value) { printValueID(value); });
-
-  os << ')';
-
-  // For terminators, print the list of successors and their operands.
-  if (numSuccessors != 0) {
-    os << '[';
-    for (unsigned i = 0; i < numSuccessors; ++i) {
-      if (i != 0)
-        os << ", ";
-      printSuccessorAndUseList(op, i);
-    }
-    os << ']';
-  }
-
-  // Print regions.
-  if (op->getNumRegions() != 0) {
-    os << " (";
-    interleaveComma(op->getRegions(), [&](Region &region) {
-      printRegion(region, /*printEntryBlockArgs=*/true,
-                  /*printBlockTerminators=*/true);
-    });
-    os << ')';
-  }
-
-  auto attrs = op->getAttrs();
-  printOptionalAttrDict(attrs);
-
-  // Print the type signature of the operation.
-  os << " : ";
-  printFunctionalType(op);
-}
-
-void OperationPrinter::printSuccessorAndUseList(Operation *term,
-                                                unsigned index) {
-  printBlockName(term->getSuccessor(index));
-
-  auto succOperands = term->getSuccessorOperands(index);
-  if (succOperands.begin() == succOperands.end())
-    return;
-
-  os << '(';
-  interleaveComma(succOperands,
-                  [this](Value *operand) { printValueID(operand); });
-  os << " : ";
-  interleaveComma(succOperands,
-                  [this](Value *operand) { printType(operand->getType()); });
-  os << ')';
-}
-
-void ModulePrinter::print(ModuleOp module) {
-  // Output the aliases at the top level.
-  if (state) {
-    state->printAttributeAliases(os);
-    state->printTypeAliases(os);
-  }
-
-  // Print the module.
-  OperationPrinter(module, *this).print(module);
-  os << '\n';
-}
-
-//===----------------------------------------------------------------------===//
-// print and dump methods
-//===----------------------------------------------------------------------===//
-
-void Attribute::print(raw_ostream &os) const {
-  ModulePrinter(os).printAttribute(*this);
-}
-
-void Attribute::dump() const {
-  print(llvm::errs());
-  llvm::errs() << "\n";
-}
-
-void Type::print(raw_ostream &os) { ModulePrinter(os).printType(*this); }
-
-void Type::dump() { print(llvm::errs()); }
-
-void AffineMap::dump() const {
-  print(llvm::errs());
-  llvm::errs() << "\n";
-}
-
-void IntegerSet::dump() const {
-  print(llvm::errs());
-  llvm::errs() << "\n";
-}
-
-void AffineExpr::print(raw_ostream &os) const {
-  if (expr == nullptr) {
-    os << "null affine expr";
-    return;
-  }
-  ModulePrinter(os).printAffineExpr(*this);
-}
-
-void AffineExpr::dump() const {
-  print(llvm::errs());
-  llvm::errs() << "\n";
-}
-
-void AffineMap::print(raw_ostream &os) const {
-  if (map == nullptr) {
-    os << "null affine map";
-    return;
-  }
-  ModulePrinter(os).printAffineMap(*this);
-}
-
-void IntegerSet::print(raw_ostream &os) const {
-  ModulePrinter(os).printIntegerSet(*this);
-}
-
-void Value::print(raw_ostream &os) {
-  if (auto *op = getDefiningOp())
-    return op->print(os);
-  // TODO: Improve this.
-  assert(isa<BlockArgument>(*this));
-  os << "<block argument>\n";
-}
-
-void Value::dump() {
-  print(llvm::errs());
-  llvm::errs() << "\n";
-}
-
-void Operation::print(raw_ostream &os, OpPrintingFlags flags) {
-  // Handle top-level operations or local printing.
-  if (!getParent() || flags.shouldUseLocalScope()) {
-    ModuleState state(getContext());
-    ModulePrinter modulePrinter(os, flags, &state);
-    OperationPrinter(this, modulePrinter).print(this);
-    return;
-  }
-
-  auto region = getParentRegion();
-  if (!region) {
-    os << "<<UNLINKED INSTRUCTION>>\n";
-    return;
-  }
-
-  // Get the top-level region.
-  while (auto *nextRegion = region->getParentRegion())
-    region = nextRegion;
-
-  ModuleState state(getContext());
-  ModulePrinter modulePrinter(os, flags, &state);
-  OperationPrinter(region, modulePrinter).print(this);
-}
-
-void Operation::dump() {
-  print(llvm::errs(), OpPrintingFlags().useLocalScope());
-  llvm::errs() << "\n";
-}
-
-void Block::print(raw_ostream &os) {
-  auto region = getParent();
-  if (!region) {
-    os << "<<UNLINKED BLOCK>>\n";
-    return;
-  }
-
-  // Get the top-level region.
-  while (auto *nextRegion = region->getParentRegion())
-    region = nextRegion;
-
-  ModuleState state(region->getContext());
-  ModulePrinter modulePrinter(os, /*flags=*/llvm::None, &state);
-  OperationPrinter(region, modulePrinter).print(this);
-}
-
-void Block::dump() { print(llvm::errs()); }
-
-/// Print out the name of the block without printing its body.
-void Block::printAsOperand(raw_ostream &os, bool printType) {
-  auto region = getParent();
-  if (!region) {
-    os << "<<UNLINKED BLOCK>>\n";
-    return;
-  }
-
-  // Get the top-level region.
-  while (auto *nextRegion = region->getParentRegion())
-    region = nextRegion;
-
-  ModulePrinter modulePrinter(os);
-  OperationPrinter(region, modulePrinter).printBlockName(this);
-}
-
-void ModuleOp::print(raw_ostream &os, OpPrintingFlags flags) {
-  ModuleState state(getContext());
-  // Skip initializing in local scope to avoid populating aliases.
-  if (!flags.shouldUseLocalScope())
-    state.initialize(*this);
-  ModulePrinter(os, flags, &state).print(*this);
-}
-
-void ModuleOp::dump() { print(llvm::errs()); }
diff --git a/third_party/mlir/lib/IR/AttributeDetail.h b/third_party/mlir/lib/IR/AttributeDetail.h
deleted file mode 100644
index da4aa69dda4..00000000000
--- a/third_party/mlir/lib/IR/AttributeDetail.h
+++ /dev/null
@@ -1,604 +0,0 @@
-//===- AttributeDetail.h - MLIR Affine Map details Class --------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This holds implementation details of Attribute.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ATTRIBUTEDETAIL_H_
-#define ATTRIBUTEDETAIL_H_
-
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/StorageUniquer.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/Support/TrailingObjects.h"
-
-namespace mlir {
-namespace detail {
-// An attribute representing a reference to an affine map.
-struct AffineMapAttributeStorage : public AttributeStorage {
-  using KeyTy = AffineMap;
-
-  AffineMapAttributeStorage(AffineMap value)
-      : AttributeStorage(IndexType::get(value.getContext())), value(value) {}
-
-  /// Key equality function.
-  bool operator==(const KeyTy &key) const { return key == value; }
-
-  /// Construct a new storage instance.
-  static AffineMapAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, KeyTy key) {
-    return new (allocator.allocate<AffineMapAttributeStorage>())
-        AffineMapAttributeStorage(key);
-  }
-
-  AffineMap value;
-};
-
-/// An attribute representing an array of other attributes.
-struct ArrayAttributeStorage : public AttributeStorage {
-  using KeyTy = ArrayRef<Attribute>;
-
-  ArrayAttributeStorage(ArrayRef<Attribute> value) : value(value) {}
-
-  /// Key equality function.
-  bool operator==(const KeyTy &key) const { return key == value; }
-
-  /// Construct a new storage instance.
-  static ArrayAttributeStorage *construct(AttributeStorageAllocator &allocator,
-                                          const KeyTy &key) {
-    return new (allocator.allocate<ArrayAttributeStorage>())
-        ArrayAttributeStorage(allocator.copyInto(key));
-  }
-
-  ArrayRef<Attribute> value;
-};
-
-/// An attribute representing a boolean value.
-struct BoolAttributeStorage : public AttributeStorage {
-  using KeyTy = std::pair<MLIRContext *, bool>;
-
-  BoolAttributeStorage(Type type, bool value)
-      : AttributeStorage(type), value(value) {}
-
-  /// We only check equality for and hash with the boolean key parameter.
-  bool operator==(const KeyTy &key) const { return key.second == value; }
-  static unsigned hashKey(const KeyTy &key) {
-    return llvm::hash_value(key.second);
-  }
-
-  static BoolAttributeStorage *construct(AttributeStorageAllocator &allocator,
-                                         const KeyTy &key) {
-    return new (allocator.allocate<BoolAttributeStorage>())
-        BoolAttributeStorage(IntegerType::get(1, key.first), key.second);
-  }
-
-  bool value;
-};
-
-/// An attribute representing a dictionary of sorted named attributes.
-struct DictionaryAttributeStorage final
-    : public AttributeStorage,
-      private llvm::TrailingObjects<DictionaryAttributeStorage,
-                                    NamedAttribute> {
-  using KeyTy = ArrayRef<NamedAttribute>;
-
-  /// Given a list of NamedAttribute's, canonicalize the list (sorting
-  /// by name) and return the unique'd result.
-  static DictionaryAttributeStorage *get(ArrayRef<NamedAttribute> attrs);
-
-  /// Key equality function.
-  bool operator==(const KeyTy &key) const { return key == getElements(); }
-
-  /// Construct a new storage instance.
-  static DictionaryAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, const KeyTy &key) {
-    auto size = DictionaryAttributeStorage::totalSizeToAlloc<NamedAttribute>(
-        key.size());
-    auto rawMem = allocator.allocate(size, alignof(NamedAttribute));
-
-    // Initialize the storage and trailing attribute list.
-    auto result = ::new (rawMem) DictionaryAttributeStorage(key.size());
-    std::uninitialized_copy(key.begin(), key.end(),
-                            result->getTrailingObjects<NamedAttribute>());
-    return result;
-  }
-
-  /// Return the elements of this dictionary attribute.
-  ArrayRef<NamedAttribute> getElements() const {
-    return {getTrailingObjects<NamedAttribute>(), numElements};
-  }
-
-private:
-  friend class llvm::TrailingObjects<DictionaryAttributeStorage,
-                                     NamedAttribute>;
-
-  // This is used by the llvm::TrailingObjects base class.
-  size_t numTrailingObjects(OverloadToken<NamedAttribute>) const {
-    return numElements;
-  }
-  DictionaryAttributeStorage(unsigned numElements) : numElements(numElements) {}
-
-  /// This is the number of attributes.
-  const unsigned numElements;
-};
-
-/// An attribute representing a floating point value.
-struct FloatAttributeStorage final
-    : public AttributeStorage,
-      public llvm::TrailingObjects<FloatAttributeStorage, uint64_t> {
-  using KeyTy = std::pair<Type, APFloat>;
-
-  FloatAttributeStorage(const llvm::fltSemantics &semantics, Type type,
-                        size_t numObjects)
-      : AttributeStorage(type), semantics(semantics), numObjects(numObjects) {}
-
-  /// Key equality and hash functions.
-  bool operator==(const KeyTy &key) const {
-    return key.first == getType() && key.second.bitwiseIsEqual(getValue());
-  }
-  static unsigned hashKey(const KeyTy &key) {
-    return llvm::hash_combine(key.first, llvm::hash_value(key.second));
-  }
-
-  /// Construct a key with a type and double.
-  static KeyTy getKey(Type type, double value) {
-    // Treat BF16 as double because it is not supported in LLVM's APFloat.
-    // TODO(b/121118307): add BF16 support to APFloat?
-    if (type.isBF16() || type.isF64())
-      return KeyTy(type, APFloat(value));
-
-    // This handles, e.g., F16 because there is no APFloat constructor for it.
-    bool unused;
-    APFloat val(value);
-    val.convert(type.cast<FloatType>().getFloatSemantics(),
-                APFloat::rmNearestTiesToEven, &unused);
-    return KeyTy(type, val);
-  }
-
-  /// Construct a new storage instance.
-  static FloatAttributeStorage *construct(AttributeStorageAllocator &allocator,
-                                          const KeyTy &key) {
-    const auto &apint = key.second.bitcastToAPInt();
-
-    // Here one word's bitwidth equals to that of uint64_t.
-    auto elements = ArrayRef<uint64_t>(apint.getRawData(), apint.getNumWords());
-
-    auto byteSize =
-        FloatAttributeStorage::totalSizeToAlloc<uint64_t>(elements.size());
-    auto rawMem = allocator.allocate(byteSize, alignof(FloatAttributeStorage));
-    auto result = ::new (rawMem) FloatAttributeStorage(
-        key.second.getSemantics(), key.first, elements.size());
-    std::uninitialized_copy(elements.begin(), elements.end(),
-                            result->getTrailingObjects<uint64_t>());
-    return result;
-  }
-
-  /// Returns an APFloat representing the stored value.
-  APFloat getValue() const {
-    auto val = APInt(APFloat::getSizeInBits(semantics),
-                     {getTrailingObjects<uint64_t>(), numObjects});
-    return APFloat(semantics, val);
-  }
-
-  const llvm::fltSemantics &semantics;
-  size_t numObjects;
-};
-
-/// An attribute representing a integral value.
-struct IntegerAttributeStorage final
-    : public AttributeStorage,
-      public llvm::TrailingObjects<IntegerAttributeStorage, uint64_t> {
-  using KeyTy = std::pair<Type, APInt>;
-
-  IntegerAttributeStorage(Type type, size_t numObjects)
-      : AttributeStorage(type), numObjects(numObjects) {
-    assert((type.isIndex() || type.isa<IntegerType>()) && "invalid type");
-  }
-
-  /// Key equality and hash functions.
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(getType(), getValue());
-  }
-  static unsigned hashKey(const KeyTy &key) {
-    return llvm::hash_combine(key.first, llvm::hash_value(key.second));
-  }
-
-  /// Construct a new storage instance.
-  static IntegerAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, const KeyTy &key) {
-    Type type;
-    APInt value;
-    std::tie(type, value) = key;
-
-    auto elements = ArrayRef<uint64_t>(value.getRawData(), value.getNumWords());
-    auto size =
-        IntegerAttributeStorage::totalSizeToAlloc<uint64_t>(elements.size());
-    auto rawMem = allocator.allocate(size, alignof(IntegerAttributeStorage));
-    auto result = ::new (rawMem) IntegerAttributeStorage(type, elements.size());
-    std::uninitialized_copy(elements.begin(), elements.end(),
-                            result->getTrailingObjects<uint64_t>());
-    return result;
-  }
-
-  /// Returns an APInt representing the stored value.
-  APInt getValue() const {
-    if (getType().isIndex())
-      return APInt(64, {getTrailingObjects<uint64_t>(), numObjects});
-    return APInt(getType().getIntOrFloatBitWidth(),
-                 {getTrailingObjects<uint64_t>(), numObjects});
-  }
-
-  size_t numObjects;
-};
-
-// An attribute representing a reference to an integer set.
-struct IntegerSetAttributeStorage : public AttributeStorage {
-  using KeyTy = IntegerSet;
-
-  IntegerSetAttributeStorage(IntegerSet value) : value(value) {}
-
-  /// Key equality function.
-  bool operator==(const KeyTy &key) const { return key == value; }
-
-  /// Construct a new storage instance.
-  static IntegerSetAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, KeyTy key) {
-    return new (allocator.allocate<IntegerSetAttributeStorage>())
-        IntegerSetAttributeStorage(key);
-  }
-
-  IntegerSet value;
-};
-
-/// Opaque Attribute Storage and Uniquing.
-struct OpaqueAttributeStorage : public AttributeStorage {
-  OpaqueAttributeStorage(Identifier dialectNamespace, StringRef attrData,
-                         Type type)
-      : AttributeStorage(type), dialectNamespace(dialectNamespace),
-        attrData(attrData) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::tuple<Identifier, StringRef, Type>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(dialectNamespace, attrData, getType());
-  }
-
-  static OpaqueAttributeStorage *construct(AttributeStorageAllocator &allocator,
-                                           const KeyTy &key) {
-    return new (allocator.allocate<OpaqueAttributeStorage>())
-        OpaqueAttributeStorage(std::get<0>(key),
-                               allocator.copyInto(std::get<1>(key)),
-                               std::get<2>(key));
-  }
-
-  // The dialect namespace.
-  Identifier dialectNamespace;
-
-  // The parser attribute data for this opaque attribute.
-  StringRef attrData;
-};
-
-/// An attribute representing a string value.
-struct StringAttributeStorage : public AttributeStorage {
-  using KeyTy = std::pair<StringRef, Type>;
-
-  StringAttributeStorage(StringRef value, Type type)
-      : AttributeStorage(type), value(value) {}
-
-  /// Key equality function.
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(value, getType());
-  }
-
-  /// Construct a new storage instance.
-  static StringAttributeStorage *construct(AttributeStorageAllocator &allocator,
-                                           const KeyTy &key) {
-    return new (allocator.allocate<StringAttributeStorage>())
-        StringAttributeStorage(allocator.copyInto(key.first), key.second);
-  }
-
-  StringRef value;
-};
-
-/// An attribute representing a symbol reference.
-struct SymbolRefAttributeStorage final
-    : public AttributeStorage,
-      public llvm::TrailingObjects<SymbolRefAttributeStorage,
-                                   FlatSymbolRefAttr> {
-  using KeyTy = std::pair<StringRef, ArrayRef<FlatSymbolRefAttr>>;
-
-  SymbolRefAttributeStorage(StringRef value, size_t numNestedRefs)
-      : value(value), numNestedRefs(numNestedRefs) {}
-
-  /// Key equality function.
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(value, getNestedRefs());
-  }
-
-  /// Construct a new storage instance.
-  static SymbolRefAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, const KeyTy &key) {
-    auto size = SymbolRefAttributeStorage::totalSizeToAlloc<FlatSymbolRefAttr>(
-        key.second.size());
-    auto rawMem = allocator.allocate(size, alignof(SymbolRefAttributeStorage));
-    auto result = ::new (rawMem) SymbolRefAttributeStorage(
-        allocator.copyInto(key.first), key.second.size());
-    std::uninitialized_copy(key.second.begin(), key.second.end(),
-                            result->getTrailingObjects<FlatSymbolRefAttr>());
-    return result;
-  }
-
-  /// Returns the set of nested references.
-  ArrayRef<FlatSymbolRefAttr> getNestedRefs() const {
-    return {getTrailingObjects<FlatSymbolRefAttr>(), numNestedRefs};
-  }
-
-  StringRef value;
-  size_t numNestedRefs;
-};
-
-/// An attribute representing a reference to a type.
-struct TypeAttributeStorage : public AttributeStorage {
-  using KeyTy = Type;
-
-  TypeAttributeStorage(Type value) : value(value) {}
-
-  /// Key equality function.
-  bool operator==(const KeyTy &key) const { return key == value; }
-
-  /// Construct a new storage instance.
-  static TypeAttributeStorage *construct(AttributeStorageAllocator &allocator,
-                                         KeyTy key) {
-    return new (allocator.allocate<TypeAttributeStorage>())
-        TypeAttributeStorage(key);
-  }
-
-  Type value;
-};
-
-//===----------------------------------------------------------------------===//
-// Elements Attributes
-//===----------------------------------------------------------------------===//
-
-/// An attribute representing a reference to a dense vector or tensor object.
-struct DenseElementsAttributeStorage : public AttributeStorage {
-  struct KeyTy {
-    KeyTy(ShapedType type, ArrayRef<char> data, llvm::hash_code hashCode,
-          bool isSplat = false)
-        : type(type), data(data), hashCode(hashCode), isSplat(isSplat) {}
-
-    /// The type of the dense elements.
-    ShapedType type;
-
-    /// The raw buffer for the data storage.
-    ArrayRef<char> data;
-
-    /// The computed hash code for the storage data.
-    llvm::hash_code hashCode;
-
-    /// A boolean that indicates if this data is a splat or not.
-    bool isSplat;
-  };
-
-  DenseElementsAttributeStorage(ShapedType ty, ArrayRef<char> data,
-                                bool isSplat = false)
-      : AttributeStorage(ty), data(data), isSplat(isSplat) {}
-
-  /// Compare this storage instance with the provided key.
-  bool operator==(const KeyTy &key) const {
-    if (key.type != getType())
-      return false;
-
-    // For boolean splats we need to explicitly check that the first bit is the
-    // same. Boolean values are packed at the bit level, and even though a splat
-    // is detected the rest of the bits in the first byte may differ from the
-    // splat value.
-    if (key.type.getElementTypeBitWidth() == 1) {
-      if (key.isSplat != isSplat)
-        return false;
-      if (isSplat)
-        return (key.data.front() & 1) == data.front();
-    }
-
-    // Otherwise, we can default to just checking the data.
-    return key.data == data;
-  }
-
-  /// Construct a key from a shaped type, raw data buffer, and a flag that
-  /// signals if the data is already known to be a splat. Callers to this
-  /// function are expected to tag preknown splat values when possible, e.g. one
-  /// element shapes.
-  static KeyTy getKey(ShapedType ty, ArrayRef<char> data, bool isKnownSplat) {
-    // Handle an empty storage instance.
-    if (data.empty())
-      return KeyTy(ty, data, 0);
-
-    // If the data is already known to be a splat, the key hash value is
-    // directly the data buffer.
-    if (isKnownSplat)
-      return KeyTy(ty, data, llvm::hash_value(data), isKnownSplat);
-
-    // Otherwise, we need to check if the data corresponds to a splat or not.
-
-    // Handle the simple case of only one element.
-    size_t numElements = ty.getNumElements();
-    assert(numElements != 1 && "splat of 1 element should already be detected");
-
-    // Handle boolean values directly as they are packed to 1-bit.
-    size_t elementWidth = ty.getElementTypeBitWidth();
-    if (elementWidth == 1)
-      return getKeyForBoolData(ty, data, numElements);
-
-    // FIXME(b/121118307): using 64 bits for BF16 because it is currently stored
-    // with double semantics.
-    if (ty.getElementType().isBF16())
-      elementWidth = 64;
-
-    // Non 1-bit dense elements are padded to 8-bits.
-    size_t storageSize = llvm::divideCeil(elementWidth, CHAR_BIT);
-    assert(((data.size() / storageSize) == numElements) &&
-           "data does not hold expected number of elements");
-
-    // Create the initial hash value with just the first element.
-    auto firstElt = data.take_front(storageSize);
-    auto hashVal = llvm::hash_value(firstElt);
-
-    // Check to see if this storage represents a splat. If it doesn't then
-    // combine the hash for the data starting with the first non splat element.
-    for (size_t i = storageSize, e = data.size(); i != e; i += storageSize)
-      if (memcmp(data.data(), &data[i], storageSize))
-        return KeyTy(ty, data, llvm::hash_combine(hashVal, data.drop_front(i)));
-
-    // Otherwise, this is a splat so just return the hash of the first element.
-    return KeyTy(ty, firstElt, hashVal, /*isSplat=*/true);
-  }
-
-  /// Construct a key with a set of boolean data.
-  static KeyTy getKeyForBoolData(ShapedType ty, ArrayRef<char> data,
-                                 size_t numElements) {
-    ArrayRef<char> splatData = data;
-    bool splatValue = splatData.front() & 1;
-
-    // Helper functor to generate a KeyTy for a boolean splat value.
-    auto generateSplatKey = [=] {
-      return KeyTy(ty, data.take_front(1),
-                   llvm::hash_value(ArrayRef<char>(splatValue ? 1 : 0)),
-                   /*isSplat=*/true);
-    };
-
-    // Handle the case where the potential splat value is 1 and the number of
-    // elements is non 8-bit aligned.
-    size_t numOddElements = numElements % CHAR_BIT;
-    if (splatValue && numOddElements != 0) {
-      // Check that all bits are set in the last value.
-      char lastElt = splatData.back();
-      if (lastElt != llvm::maskTrailingOnes<unsigned char>(numOddElements))
-        return KeyTy(ty, data, llvm::hash_value(data));
-
-      // If this is the only element, the data is known to be a splat.
-      if (splatData.size() == 1)
-        return generateSplatKey();
-      splatData = splatData.drop_back();
-    }
-
-    // Check that the data buffer corresponds to a splat of the proper mask.
-    char mask = splatValue ? ~0 : 0;
-    return llvm::all_of(splatData, [mask](char c) { return c == mask; })
-               ? generateSplatKey()
-               : KeyTy(ty, data, llvm::hash_value(data));
-  }
-
-  /// Hash the key for the storage.
-  static llvm::hash_code hashKey(const KeyTy &key) {
-    return llvm::hash_combine(key.type, key.hashCode);
-  }
-
-  /// Construct a new storage instance.
-  static DenseElementsAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, KeyTy key) {
-    // If the data buffer is non-empty, we copy it into the allocator with a
-    // 64-bit alignment.
-    ArrayRef<char> copy, data = key.data;
-    if (!data.empty()) {
-      char *rawData = reinterpret_cast<char *>(
-          allocator.allocate(data.size(), alignof(uint64_t)));
-      std::memcpy(rawData, data.data(), data.size());
-
-      // If this is a boolean splat, make sure only the first bit is used.
-      if (key.isSplat && key.type.getElementTypeBitWidth() == 1)
-        rawData[0] &= 1;
-      copy = ArrayRef<char>(rawData, data.size());
-    }
-
-    return new (allocator.allocate<DenseElementsAttributeStorage>())
-        DenseElementsAttributeStorage(key.type, copy, key.isSplat);
-  }
-
-  ArrayRef<char> data;
-  bool isSplat;
-};
-
-/// An attribute representing a reference to a tensor constant with opaque
-/// content.
-struct OpaqueElementsAttributeStorage : public AttributeStorage {
-  using KeyTy = std::tuple<Type, Dialect *, StringRef>;
-
-  OpaqueElementsAttributeStorage(Type type, Dialect *dialect, StringRef bytes)
-      : AttributeStorage(type), dialect(dialect), bytes(bytes) {}
-
-  /// Key equality and hash functions.
-  bool operator==(const KeyTy &key) const {
-    return key == std::make_tuple(getType(), dialect, bytes);
-  }
-  static unsigned hashKey(const KeyTy &key) {
-    return llvm::hash_combine(std::get<0>(key), std::get<1>(key),
-                              std::get<2>(key));
-  }
-
-  /// Construct a new storage instance.
-  static OpaqueElementsAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, KeyTy key) {
-    // TODO(b/131468830): Provide a way to avoid copying content of large opaque
-    // tensors This will likely require a new reference attribute kind.
-    return new (allocator.allocate<OpaqueElementsAttributeStorage>())
-        OpaqueElementsAttributeStorage(std::get<0>(key), std::get<1>(key),
-                                       allocator.copyInto(std::get<2>(key)));
-  }
-
-  Dialect *dialect;
-  StringRef bytes;
-};
-
-/// An attribute representing a reference to a sparse vector or tensor object.
-struct SparseElementsAttributeStorage : public AttributeStorage {
-  using KeyTy = std::tuple<Type, DenseIntElementsAttr, DenseElementsAttr>;
-
-  SparseElementsAttributeStorage(Type type, DenseIntElementsAttr indices,
-                                 DenseElementsAttr values)
-      : AttributeStorage(type), indices(indices), values(values) {}
-
-  /// Key equality and hash functions.
-  bool operator==(const KeyTy &key) const {
-    return key == std::make_tuple(getType(), indices, values);
-  }
-  static unsigned hashKey(const KeyTy &key) {
-    return llvm::hash_combine(std::get<0>(key), std::get<1>(key),
-                              std::get<2>(key));
-  }
-
-  /// Construct a new storage instance.
-  static SparseElementsAttributeStorage *
-  construct(AttributeStorageAllocator &allocator, KeyTy key) {
-    return new (allocator.allocate<SparseElementsAttributeStorage>())
-        SparseElementsAttributeStorage(std::get<0>(key), std::get<1>(key),
-                                       std::get<2>(key));
-  }
-
-  DenseIntElementsAttr indices;
-  DenseElementsAttr values;
-};
-} // namespace detail
-} // namespace mlir
-
-#endif // ATTRIBUTEDETAIL_H_
diff --git a/third_party/mlir/lib/IR/Attributes.cpp b/third_party/mlir/lib/IR/Attributes.cpp
deleted file mode 100644
index b546643837b..00000000000
--- a/third_party/mlir/lib/IR/Attributes.cpp
+++ /dev/null
@@ -1,1113 +0,0 @@
-//===- Attributes.cpp - MLIR Affine Expr Classes --------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Attributes.h"
-#include "AttributeDetail.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Types.h"
-#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/Twine.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-//===----------------------------------------------------------------------===//
-// AttributeStorage
-//===----------------------------------------------------------------------===//
-
-AttributeStorage::AttributeStorage(Type type)
-    : type(type.getAsOpaquePointer()) {}
-AttributeStorage::AttributeStorage() : type(nullptr) {}
-
-Type AttributeStorage::getType() const {
-  return Type::getFromOpaquePointer(type);
-}
-void AttributeStorage::setType(Type newType) {
-  type = newType.getAsOpaquePointer();
-}
-
-//===----------------------------------------------------------------------===//
-// Attribute
-//===----------------------------------------------------------------------===//
-
-/// Return the type of this attribute.
-Type Attribute::getType() const { return impl->getType(); }
-
-/// Return the context this attribute belongs to.
-MLIRContext *Attribute::getContext() const { return getType().getContext(); }
-
-/// Get the dialect this attribute is registered to.
-Dialect &Attribute::getDialect() const { return impl->getDialect(); }
-
-//===----------------------------------------------------------------------===//
-// AffineMapAttr
-//===----------------------------------------------------------------------===//
-
-AffineMapAttr AffineMapAttr::get(AffineMap value) {
-  return Base::get(value.getContext(), StandardAttributes::AffineMap, value);
-}
-
-AffineMap AffineMapAttr::getValue() const { return getImpl()->value; }
-
-//===----------------------------------------------------------------------===//
-// ArrayAttr
-//===----------------------------------------------------------------------===//
-
-ArrayAttr ArrayAttr::get(ArrayRef<Attribute> value, MLIRContext *context) {
-  return Base::get(context, StandardAttributes::Array, value);
-}
-
-ArrayRef<Attribute> ArrayAttr::getValue() const { return getImpl()->value; }
-
-//===----------------------------------------------------------------------===//
-// BoolAttr
-//===----------------------------------------------------------------------===//
-
-bool BoolAttr::getValue() const { return getImpl()->value; }
-
-//===----------------------------------------------------------------------===//
-// DictionaryAttr
-//===----------------------------------------------------------------------===//
-
-/// Perform a three-way comparison between the names of the specified
-/// NamedAttributes.
-static int compareNamedAttributes(const NamedAttribute *lhs,
-                                  const NamedAttribute *rhs) {
-  return lhs->first.strref().compare(rhs->first.strref());
-}
-
-DictionaryAttr DictionaryAttr::get(ArrayRef<NamedAttribute> value,
-                                   MLIRContext *context) {
-  assert(llvm::all_of(value,
-                      [](const NamedAttribute &attr) { return attr.second; }) &&
-         "value cannot have null entries");
-
-  // We need to sort the element list to canonicalize it, but we also don't want
-  // to do a ton of work in the super common case where the element list is
-  // already sorted.
-  SmallVector<NamedAttribute, 8> storage;
-  switch (value.size()) {
-  case 0:
-    break;
-  case 1:
-    // A single element is already sorted.
-    break;
-  case 2:
-    assert(value[0].first != value[1].first &&
-           "DictionaryAttr element names must be unique");
-
-    // Don't invoke a general sort for two element case.
-    if (value[0].first.strref() > value[1].first.strref()) {
-      storage.push_back(value[1]);
-      storage.push_back(value[0]);
-      value = storage;
-    }
-    break;
-  default:
-    // Check to see they are sorted already.
-    bool isSorted = true;
-    for (unsigned i = 0, e = value.size() - 1; i != e; ++i) {
-      if (value[i].first.strref() > value[i + 1].first.strref()) {
-        isSorted = false;
-        break;
-      }
-    }
-    // If not, do a general sort.
-    if (!isSorted) {
-      storage.append(value.begin(), value.end());
-      llvm::array_pod_sort(storage.begin(), storage.end(),
-                           compareNamedAttributes);
-      value = storage;
-    }
-
-    // Ensure that the attribute elements are unique.
-    assert(std::adjacent_find(value.begin(), value.end(),
-                              [](NamedAttribute l, NamedAttribute r) {
-                                return l.first == r.first;
-                              }) == value.end() &&
-           "DictionaryAttr element names must be unique");
-  }
-
-  return Base::get(context, StandardAttributes::Dictionary, value);
-}
-
-ArrayRef<NamedAttribute> DictionaryAttr::getValue() const {
-  return getImpl()->getElements();
-}
-
-/// Return the specified attribute if present, null otherwise.
-Attribute DictionaryAttr::get(StringRef name) const {
-  ArrayRef<NamedAttribute> values = getValue();
-  auto compare = [](NamedAttribute attr, StringRef name) {
-    return attr.first.strref() < name;
-  };
-  auto it = llvm::lower_bound(values, name, compare);
-  return it != values.end() && it->first.is(name) ? it->second : Attribute();
-}
-Attribute DictionaryAttr::get(Identifier name) const {
-  for (auto elt : getValue())
-    if (elt.first == name)
-      return elt.second;
-  return nullptr;
-}
-
-DictionaryAttr::iterator DictionaryAttr::begin() const {
-  return getValue().begin();
-}
-DictionaryAttr::iterator DictionaryAttr::end() const {
-  return getValue().end();
-}
-size_t DictionaryAttr::size() const { return getValue().size(); }
-
-//===----------------------------------------------------------------------===//
-// FloatAttr
-//===----------------------------------------------------------------------===//
-
-FloatAttr FloatAttr::get(Type type, double value) {
-  return Base::get(type.getContext(), StandardAttributes::Float, type, value);
-}
-
-FloatAttr FloatAttr::getChecked(Type type, double value, Location loc) {
-  return Base::getChecked(loc, type.getContext(), StandardAttributes::Float,
-                          type, value);
-}
-
-FloatAttr FloatAttr::get(Type type, const APFloat &value) {
-  return Base::get(type.getContext(), StandardAttributes::Float, type, value);
-}
-
-FloatAttr FloatAttr::getChecked(Type type, const APFloat &value, Location loc) {
-  return Base::getChecked(loc, type.getContext(), StandardAttributes::Float,
-                          type, value);
-}
-
-APFloat FloatAttr::getValue() const { return getImpl()->getValue(); }
-
-double FloatAttr::getValueAsDouble() const {
-  return getValueAsDouble(getValue());
-}
-double FloatAttr::getValueAsDouble(APFloat value) {
-  if (&value.getSemantics() != &APFloat::IEEEdouble()) {
-    bool losesInfo = false;
-    value.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
-                  &losesInfo);
-  }
-  return value.convertToDouble();
-}
-
-/// Verify construction invariants.
-static LogicalResult verifyFloatTypeInvariants(Optional<Location> loc,
-                                               Type type) {
-  if (!type.isa<FloatType>())
-    return emitOptionalError(loc, "expected floating point type");
-  return success();
-}
-
-LogicalResult FloatAttr::verifyConstructionInvariants(Optional<Location> loc,
-                                                      MLIRContext *ctx,
-                                                      Type type, double value) {
-  return verifyFloatTypeInvariants(loc, type);
-}
-
-LogicalResult FloatAttr::verifyConstructionInvariants(Optional<Location> loc,
-                                                      MLIRContext *ctx,
-                                                      Type type,
-                                                      const APFloat &value) {
-  // Verify that the type is correct.
-  if (failed(verifyFloatTypeInvariants(loc, type)))
-    return failure();
-
-  // Verify that the type semantics match that of the value.
-  if (&type.cast<FloatType>().getFloatSemantics() != &value.getSemantics()) {
-    return emitOptionalError(
-        loc, "FloatAttr type doesn't match the type implied by its value");
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// SymbolRefAttr
-//===----------------------------------------------------------------------===//
-
-FlatSymbolRefAttr SymbolRefAttr::get(StringRef value, MLIRContext *ctx) {
-  return Base::get(ctx, StandardAttributes::SymbolRef, value, llvm::None)
-      .cast<FlatSymbolRefAttr>();
-}
-
-SymbolRefAttr SymbolRefAttr::get(StringRef value,
-                                 ArrayRef<FlatSymbolRefAttr> nestedReferences,
-                                 MLIRContext *ctx) {
-  return Base::get(ctx, StandardAttributes::SymbolRef, value, nestedReferences);
-}
-
-StringRef SymbolRefAttr::getRootReference() const { return getImpl()->value; }
-
-StringRef SymbolRefAttr::getLeafReference() const {
-  ArrayRef<FlatSymbolRefAttr> nestedRefs = getNestedReferences();
-  return nestedRefs.empty() ? getRootReference() : nestedRefs.back().getValue();
-}
-
-ArrayRef<FlatSymbolRefAttr> SymbolRefAttr::getNestedReferences() const {
-  return getImpl()->getNestedRefs();
-}
-
-//===----------------------------------------------------------------------===//
-// IntegerAttr
-//===----------------------------------------------------------------------===//
-
-IntegerAttr IntegerAttr::get(Type type, const APInt &value) {
-  return Base::get(type.getContext(), StandardAttributes::Integer, type, value);
-}
-
-IntegerAttr IntegerAttr::get(Type type, int64_t value) {
-  // This uses 64 bit APInts by default for index type.
-  if (type.isIndex())
-    return get(type, APInt(64, value));
-
-  auto intType = type.cast<IntegerType>();
-  return get(type, APInt(intType.getWidth(), value));
-}
-
-APInt IntegerAttr::getValue() const { return getImpl()->getValue(); }
-
-int64_t IntegerAttr::getInt() const { return getValue().getSExtValue(); }
-
-//===----------------------------------------------------------------------===//
-// IntegerSetAttr
-//===----------------------------------------------------------------------===//
-
-IntegerSetAttr IntegerSetAttr::get(IntegerSet value) {
-  return Base::get(value.getConstraint(0).getContext(),
-                   StandardAttributes::IntegerSet, value);
-}
-
-IntegerSet IntegerSetAttr::getValue() const { return getImpl()->value; }
-
-//===----------------------------------------------------------------------===//
-// OpaqueAttr
-//===----------------------------------------------------------------------===//
-
-OpaqueAttr OpaqueAttr::get(Identifier dialect, StringRef attrData, Type type,
-                           MLIRContext *context) {
-  return Base::get(context, StandardAttributes::Opaque, dialect, attrData,
-                   type);
-}
-
-OpaqueAttr OpaqueAttr::getChecked(Identifier dialect, StringRef attrData,
-                                  Type type, Location location) {
-  return Base::getChecked(location, type.getContext(),
-                          StandardAttributes::Opaque, dialect, attrData, type);
-}
-
-/// Returns the dialect namespace of the opaque attribute.
-Identifier OpaqueAttr::getDialectNamespace() const {
-  return getImpl()->dialectNamespace;
-}
-
-/// Returns the raw attribute data of the opaque attribute.
-StringRef OpaqueAttr::getAttrData() const { return getImpl()->attrData; }
-
-/// Verify the construction of an opaque attribute.
-LogicalResult OpaqueAttr::verifyConstructionInvariants(Optional<Location> loc,
-                                                       MLIRContext *context,
-                                                       Identifier dialect,
-                                                       StringRef attrData,
-                                                       Type type) {
-  if (!Dialect::isValidNamespace(dialect.strref()))
-    return emitOptionalError(loc, "invalid dialect namespace '", dialect, "'");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// StringAttr
-//===----------------------------------------------------------------------===//
-
-StringAttr StringAttr::get(StringRef bytes, MLIRContext *context) {
-  return get(bytes, NoneType::get(context));
-}
-
-/// Get an instance of a StringAttr with the given string and Type.
-StringAttr StringAttr::get(StringRef bytes, Type type) {
-  return Base::get(type.getContext(), StandardAttributes::String, bytes, type);
-}
-
-StringRef StringAttr::getValue() const { return getImpl()->value; }
-
-//===----------------------------------------------------------------------===//
-// TypeAttr
-//===----------------------------------------------------------------------===//
-
-TypeAttr TypeAttr::get(Type value) {
-  return Base::get(value.getContext(), StandardAttributes::Type, value);
-}
-
-Type TypeAttr::getValue() const { return getImpl()->value; }
-
-//===----------------------------------------------------------------------===//
-// ElementsAttr
-//===----------------------------------------------------------------------===//
-
-ShapedType ElementsAttr::getType() const {
-  return Attribute::getType().cast<ShapedType>();
-}
-
-/// Returns the number of elements held by this attribute.
-int64_t ElementsAttr::getNumElements() const {
-  return getType().getNumElements();
-}
-
-/// Return the value at the given index. If index does not refer to a valid
-/// element, then a null attribute is returned.
-Attribute ElementsAttr::getValue(ArrayRef<uint64_t> index) const {
-  switch (getKind()) {
-  case StandardAttributes::DenseElements:
-    return cast<DenseElementsAttr>().getValue(index);
-  case StandardAttributes::OpaqueElements:
-    return cast<OpaqueElementsAttr>().getValue(index);
-  case StandardAttributes::SparseElements:
-    return cast<SparseElementsAttr>().getValue(index);
-  default:
-    llvm_unreachable("unknown ElementsAttr kind");
-  }
-}
-
-/// Return if the given 'index' refers to a valid element in this attribute.
-bool ElementsAttr::isValidIndex(ArrayRef<uint64_t> index) const {
-  auto type = getType();
-
-  // Verify that the rank of the indices matches the held type.
-  auto rank = type.getRank();
-  if (rank != static_cast<int64_t>(index.size()))
-    return false;
-
-  // Verify that all of the indices are within the shape dimensions.
-  auto shape = type.getShape();
-  return llvm::all_of(llvm::seq<int>(0, rank), [&](int i) {
-    return static_cast<int64_t>(index[i]) < shape[i];
-  });
-}
-
-ElementsAttr ElementsAttr::mapValues(
-    Type newElementType,
-    llvm::function_ref<APInt(const APInt &)> mapping) const {
-  switch (getKind()) {
-  case StandardAttributes::DenseElements:
-    return cast<DenseElementsAttr>().mapValues(newElementType, mapping);
-  default:
-    llvm_unreachable("unsupported ElementsAttr subtype");
-  }
-}
-
-ElementsAttr ElementsAttr::mapValues(
-    Type newElementType,
-    llvm::function_ref<APInt(const APFloat &)> mapping) const {
-  switch (getKind()) {
-  case StandardAttributes::DenseElements:
-    return cast<DenseElementsAttr>().mapValues(newElementType, mapping);
-  default:
-    llvm_unreachable("unsupported ElementsAttr subtype");
-  }
-}
-
-/// Returns the 1 dimensional flattened row-major index from the given
-/// multi-dimensional index.
-uint64_t ElementsAttr::getFlattenedIndex(ArrayRef<uint64_t> index) const {
-  assert(isValidIndex(index) && "expected valid multi-dimensional index");
-  auto type = getType();
-
-  // Reduce the provided multidimensional index into a flattended 1D row-major
-  // index.
-  auto rank = type.getRank();
-  auto shape = type.getShape();
-  uint64_t valueIndex = 0;
-  uint64_t dimMultiplier = 1;
-  for (int i = rank - 1; i >= 0; --i) {
-    valueIndex += index[i] * dimMultiplier;
-    dimMultiplier *= shape[i];
-  }
-  return valueIndex;
-}
-
-//===----------------------------------------------------------------------===//
-// DenseElementAttr Utilities
-//===----------------------------------------------------------------------===//
-
-static size_t getDenseElementBitwidth(Type eltType) {
-  // FIXME(b/121118307): using 64 bits for BF16 because it is currently stored
-  // with double semantics.
-  return eltType.isBF16() ? 64 : eltType.getIntOrFloatBitWidth();
-}
-
-/// Get the bitwidth of a dense element type within the buffer.
-/// DenseElementsAttr requires bitwidths greater than 1 to be aligned by 8.
-static size_t getDenseElementStorageWidth(size_t origWidth) {
-  return origWidth == 1 ? origWidth : llvm::alignTo<8>(origWidth);
-}
-
-/// Set a bit to a specific value.
-static void setBit(char *rawData, size_t bitPos, bool value) {
-  if (value)
-    rawData[bitPos / CHAR_BIT] |= (1 << (bitPos % CHAR_BIT));
-  else
-    rawData[bitPos / CHAR_BIT] &= ~(1 << (bitPos % CHAR_BIT));
-}
-
-/// Return the value of the specified bit.
-static bool getBit(const char *rawData, size_t bitPos) {
-  return (rawData[bitPos / CHAR_BIT] & (1 << (bitPos % CHAR_BIT))) != 0;
-}
-
-/// Writes value to the bit position `bitPos` in array `rawData`.
-static void writeBits(char *rawData, size_t bitPos, APInt value) {
-  size_t bitWidth = value.getBitWidth();
-
-  // If the bitwidth is 1 we just toggle the specific bit.
-  if (bitWidth == 1)
-    return setBit(rawData, bitPos, value.isOneValue());
-
-  // Otherwise, the bit position is guaranteed to be byte aligned.
-  assert((bitPos % CHAR_BIT) == 0 && "expected bitPos to be 8-bit aligned");
-  std::copy_n(reinterpret_cast<const char *>(value.getRawData()),
-              llvm::divideCeil(bitWidth, CHAR_BIT),
-              rawData + (bitPos / CHAR_BIT));
-}
-
-/// Reads the next `bitWidth` bits from the bit position `bitPos` in array
-/// `rawData`.
-static APInt readBits(const char *rawData, size_t bitPos, size_t bitWidth) {
-  // Handle a boolean bit position.
-  if (bitWidth == 1)
-    return APInt(1, getBit(rawData, bitPos) ? 1 : 0);
-
-  // Otherwise, the bit position must be 8-bit aligned.
-  assert((bitPos % CHAR_BIT) == 0 && "expected bitPos to be 8-bit aligned");
-  APInt result(bitWidth, 0);
-  std::copy_n(
-      rawData + (bitPos / CHAR_BIT), llvm::divideCeil(bitWidth, CHAR_BIT),
-      const_cast<char *>(reinterpret_cast<const char *>(result.getRawData())));
-  return result;
-}
-
-/// Returns if 'values' corresponds to a splat, i.e. one element, or has the
-/// same element count as 'type'.
-template <typename Values>
-static bool hasSameElementsOrSplat(ShapedType type, const Values &values) {
-  return (values.size() == 1) ||
-         (type.getNumElements() == static_cast<int64_t>(values.size()));
-}
-
-//===----------------------------------------------------------------------===//
-// DenseElementAttr Iterators
-//===----------------------------------------------------------------------===//
-
-/// Constructs a new iterator.
-DenseElementsAttr::AttributeElementIterator::AttributeElementIterator(
-    DenseElementsAttr attr, size_t index)
-    : indexed_accessor_iterator<AttributeElementIterator, const void *,
-                                Attribute, Attribute, Attribute>(
-          attr.getAsOpaquePointer(), index) {}
-
-/// Accesses the Attribute value at this iterator position.
-Attribute DenseElementsAttr::AttributeElementIterator::operator*() const {
-  auto owner = getFromOpaquePointer(base).cast<DenseElementsAttr>();
-  Type eltTy = owner.getType().getElementType();
-  if (auto intEltTy = eltTy.dyn_cast<IntegerType>()) {
-    if (intEltTy.getWidth() == 1)
-      return BoolAttr::get((*IntElementIterator(owner, index)).isOneValue(),
-                           owner.getContext());
-    return IntegerAttr::get(eltTy, *IntElementIterator(owner, index));
-  }
-  if (auto floatEltTy = eltTy.dyn_cast<FloatType>()) {
-    IntElementIterator intIt(owner, index);
-    FloatElementIterator floatIt(floatEltTy.getFloatSemantics(), intIt);
-    return FloatAttr::get(eltTy, *floatIt);
-  }
-  llvm_unreachable("unexpected element type");
-}
-
-/// Constructs a new iterator.
-DenseElementsAttr::BoolElementIterator::BoolElementIterator(
-    DenseElementsAttr attr, size_t dataIndex)
-    : DenseElementIndexedIteratorImpl<BoolElementIterator, bool, bool, bool>(
-          attr.getRawData().data(), attr.isSplat(), dataIndex) {}
-
-/// Accesses the bool value at this iterator position.
-bool DenseElementsAttr::BoolElementIterator::operator*() const {
-  return getBit(getData(), getDataIndex());
-}
-
-/// Constructs a new iterator.
-DenseElementsAttr::IntElementIterator::IntElementIterator(
-    DenseElementsAttr attr, size_t dataIndex)
-    : DenseElementIndexedIteratorImpl<IntElementIterator, APInt, APInt, APInt>(
-          attr.getRawData().data(), attr.isSplat(), dataIndex),
-      bitWidth(getDenseElementBitwidth(attr.getType().getElementType())) {}
-
-/// Accesses the raw APInt value at this iterator position.
-APInt DenseElementsAttr::IntElementIterator::operator*() const {
-  return readBits(getData(),
-                  getDataIndex() * getDenseElementStorageWidth(bitWidth),
-                  bitWidth);
-}
-
-DenseElementsAttr::FloatElementIterator::FloatElementIterator(
-    const llvm::fltSemantics &smt, IntElementIterator it)
-    : llvm::mapped_iterator<IntElementIterator,
-                            std::function<APFloat(const APInt &)>>(
-          it, [&](const APInt &val) { return APFloat(smt, val); }) {}
-
-//===----------------------------------------------------------------------===//
-// DenseElementsAttr
-//===----------------------------------------------------------------------===//
-
-DenseElementsAttr DenseElementsAttr::get(ShapedType type,
-                                         ArrayRef<Attribute> values) {
-  assert(type.getElementType().isIntOrFloat() &&
-         "expected int or float element type");
-  assert(hasSameElementsOrSplat(type, values));
-
-  auto eltType = type.getElementType();
-  size_t bitWidth = getDenseElementBitwidth(eltType);
-  size_t storageBitWidth = getDenseElementStorageWidth(bitWidth);
-
-  // Compress the attribute values into a character buffer.
-  SmallVector<char, 8> data(llvm::divideCeil(storageBitWidth, CHAR_BIT) *
-                            values.size());
-  APInt intVal;
-  for (unsigned i = 0, e = values.size(); i < e; ++i) {
-    assert(eltType == values[i].getType() &&
-           "expected attribute value to have element type");
-
-    switch (eltType.getKind()) {
-    case StandardTypes::BF16:
-    case StandardTypes::F16:
-    case StandardTypes::F32:
-    case StandardTypes::F64:
-      intVal = values[i].cast<FloatAttr>().getValue().bitcastToAPInt();
-      break;
-    case StandardTypes::Integer:
-      intVal = values[i].isa<BoolAttr>()
-                   ? APInt(1, values[i].cast<BoolAttr>().getValue() ? 1 : 0)
-                   : values[i].cast<IntegerAttr>().getValue();
-      break;
-    default:
-      llvm_unreachable("unexpected element type");
-    }
-    assert(intVal.getBitWidth() == bitWidth &&
-           "expected value to have same bitwidth as element type");
-    writeBits(data.data(), i * storageBitWidth, intVal);
-  }
-  return getRaw(type, data, /*isSplat=*/(values.size() == 1));
-}
-
-DenseElementsAttr DenseElementsAttr::get(ShapedType type,
-                                         ArrayRef<bool> values) {
-  assert(hasSameElementsOrSplat(type, values));
-  assert(type.getElementType().isInteger(1));
-
-  std::vector<char> buff(llvm::divideCeil(values.size(), CHAR_BIT));
-  for (int i = 0, e = values.size(); i != e; ++i)
-    setBit(buff.data(), i, values[i]);
-  return getRaw(type, buff, /*isSplat=*/(values.size() == 1));
-}
-
-/// Constructs a dense integer elements attribute from an array of APInt
-/// values. Each APInt value is expected to have the same bitwidth as the
-/// element type of 'type'.
-DenseElementsAttr DenseElementsAttr::get(ShapedType type,
-                                         ArrayRef<APInt> values) {
-  assert(type.getElementType().isa<IntegerType>());
-  return getRaw(type, values);
-}
-
-// Constructs a dense float elements attribute from an array of APFloat
-// values. Each APFloat value is expected to have the same bitwidth as the
-// element type of 'type'.
-DenseElementsAttr DenseElementsAttr::get(ShapedType type,
-                                         ArrayRef<APFloat> values) {
-  assert(type.getElementType().isa<FloatType>());
-
-  // Convert the APFloat values to APInt and create a dense elements attribute.
-  std::vector<APInt> intValues(values.size());
-  for (unsigned i = 0, e = values.size(); i != e; ++i)
-    intValues[i] = values[i].bitcastToAPInt();
-  return getRaw(type, intValues);
-}
-
-// Constructs a dense elements attribute from an array of raw APInt values.
-// Each APInt value is expected to have the same bitwidth as the element type
-// of 'type'.
-DenseElementsAttr DenseElementsAttr::getRaw(ShapedType type,
-                                            ArrayRef<APInt> values) {
-  assert(hasSameElementsOrSplat(type, values));
-
-  size_t bitWidth = getDenseElementBitwidth(type.getElementType());
-  size_t storageBitWidth = getDenseElementStorageWidth(bitWidth);
-  std::vector<char> elementData(llvm::divideCeil(storageBitWidth, CHAR_BIT) *
-                                values.size());
-  for (unsigned i = 0, e = values.size(); i != e; ++i) {
-    assert(values[i].getBitWidth() == bitWidth);
-    writeBits(elementData.data(), i * storageBitWidth, values[i]);
-  }
-  return getRaw(type, elementData, /*isSplat=*/(values.size() == 1));
-}
-
-DenseElementsAttr DenseElementsAttr::getRaw(ShapedType type,
-                                            ArrayRef<char> data, bool isSplat) {
-  assert((type.isa<RankedTensorType>() || type.isa<VectorType>()) &&
-         "type must be ranked tensor or vector");
-  assert(type.hasStaticShape() && "type must have static shape");
-  return Base::get(type.getContext(), StandardAttributes::DenseElements, type,
-                   data, isSplat);
-}
-
-/// Check the information for a c++ data type, check if this type is valid for
-/// the current attribute. This method is used to verify specific type
-/// invariants that the templatized 'getValues' method cannot.
-static bool isValidIntOrFloat(ShapedType type, int64_t dataEltSize,
-                              bool isInt) {
-  // Make sure that the data element size is the same as the type element width.
-  if ((dataEltSize * CHAR_BIT) != type.getElementTypeBitWidth())
-    return false;
-
-  // Check that the element type is valid.
-  return isInt ? type.getElementType().isa<IntegerType>()
-               : type.getElementType().isa<FloatType>();
-}
-
-/// Overload of the 'getRaw' method that asserts that the given type is of
-/// integer type. This method is used to verify type invariants that the
-/// templatized 'get' method cannot.
-DenseElementsAttr DenseElementsAttr::getRawIntOrFloat(ShapedType type,
-                                                      ArrayRef<char> data,
-                                                      int64_t dataEltSize,
-                                                      bool isInt) {
-  assert(::isValidIntOrFloat(type, dataEltSize, isInt));
-
-  int64_t numElements = data.size() / dataEltSize;
-  assert(numElements == 1 || numElements == type.getNumElements());
-  return getRaw(type, data, /*isSplat=*/numElements == 1);
-}
-
-/// A method used to verify specific type invariants that the templatized 'get'
-/// method cannot.
-bool DenseElementsAttr::isValidIntOrFloat(int64_t dataEltSize,
-                                          bool isInt) const {
-  return ::isValidIntOrFloat(getType(), dataEltSize, isInt);
-}
-
-/// Return the raw storage data held by this attribute.
-ArrayRef<char> DenseElementsAttr::getRawData() const {
-  return static_cast<ImplType *>(impl)->data;
-}
-
-/// Returns if this attribute corresponds to a splat, i.e. if all element
-/// values are the same.
-bool DenseElementsAttr::isSplat() const { return getImpl()->isSplat; }
-
-/// Return the held element values as a range of Attributes.
-auto DenseElementsAttr::getAttributeValues() const
-    -> llvm::iterator_range<AttributeElementIterator> {
-  return {attr_value_begin(), attr_value_end()};
-}
-auto DenseElementsAttr::attr_value_begin() const -> AttributeElementIterator {
-  return AttributeElementIterator(*this, 0);
-}
-auto DenseElementsAttr::attr_value_end() const -> AttributeElementIterator {
-  return AttributeElementIterator(*this, getNumElements());
-}
-
-/// Return the held element values as a range of bool. The element type of
-/// this attribute must be of integer type of bitwidth 1.
-auto DenseElementsAttr::getBoolValues() const
-    -> llvm::iterator_range<BoolElementIterator> {
-  auto eltType = getType().getElementType().dyn_cast<IntegerType>();
-  assert(eltType && eltType.getWidth() == 1 && "expected i1 integer type");
-  (void)eltType;
-  return {BoolElementIterator(*this, 0),
-          BoolElementIterator(*this, getNumElements())};
-}
-
-/// Return the held element values as a range of APInts. The element type of
-/// this attribute must be of integer type.
-auto DenseElementsAttr::getIntValues() const
-    -> llvm::iterator_range<IntElementIterator> {
-  assert(getType().getElementType().isa<IntegerType>() &&
-         "expected integer type");
-  return {raw_int_begin(), raw_int_end()};
-}
-auto DenseElementsAttr::int_value_begin() const -> IntElementIterator {
-  assert(getType().getElementType().isa<IntegerType>() &&
-         "expected integer type");
-  return raw_int_begin();
-}
-auto DenseElementsAttr::int_value_end() const -> IntElementIterator {
-  assert(getType().getElementType().isa<IntegerType>() &&
-         "expected integer type");
-  return raw_int_end();
-}
-
-/// Return the held element values as a range of APFloat. The element type of
-/// this attribute must be of float type.
-auto DenseElementsAttr::getFloatValues() const
-    -> llvm::iterator_range<FloatElementIterator> {
-  auto elementType = getType().getElementType().cast<FloatType>();
-  assert(elementType.isa<FloatType>() && "expected float type");
-  const auto &elementSemantics = elementType.getFloatSemantics();
-  return {FloatElementIterator(elementSemantics, raw_int_begin()),
-          FloatElementIterator(elementSemantics, raw_int_end())};
-}
-auto DenseElementsAttr::float_value_begin() const -> FloatElementIterator {
-  return getFloatValues().begin();
-}
-auto DenseElementsAttr::float_value_end() const -> FloatElementIterator {
-  return getFloatValues().end();
-}
-
-/// Return a new DenseElementsAttr that has the same data as the current
-/// attribute, but has been reshaped to 'newType'. The new type must have the
-/// same total number of elements as well as element type.
-DenseElementsAttr DenseElementsAttr::reshape(ShapedType newType) {
-  ShapedType curType = getType();
-  if (curType == newType)
-    return *this;
-
-  (void)curType;
-  assert(newType.getElementType() == curType.getElementType() &&
-         "expected the same element type");
-  assert(newType.getNumElements() == curType.getNumElements() &&
-         "expected the same number of elements");
-  return getRaw(newType, getRawData(), isSplat());
-}
-
-DenseElementsAttr DenseElementsAttr::mapValues(
-    Type newElementType,
-    llvm::function_ref<APInt(const APInt &)> mapping) const {
-  return cast<DenseIntElementsAttr>().mapValues(newElementType, mapping);
-}
-
-DenseElementsAttr DenseElementsAttr::mapValues(
-    Type newElementType,
-    llvm::function_ref<APInt(const APFloat &)> mapping) const {
-  return cast<DenseFPElementsAttr>().mapValues(newElementType, mapping);
-}
-
-//===----------------------------------------------------------------------===//
-// DenseFPElementsAttr
-//===----------------------------------------------------------------------===//
-
-template <typename Fn, typename Attr>
-static ShapedType mappingHelper(Fn mapping, Attr &attr, ShapedType inType,
-                                Type newElementType,
-                                llvm::SmallVectorImpl<char> &data) {
-  size_t bitWidth = getDenseElementBitwidth(newElementType);
-  size_t storageBitWidth = getDenseElementStorageWidth(bitWidth);
-
-  ShapedType newArrayType;
-  if (inType.isa<RankedTensorType>())
-    newArrayType = RankedTensorType::get(inType.getShape(), newElementType);
-  else if (inType.isa<UnrankedTensorType>())
-    newArrayType = RankedTensorType::get(inType.getShape(), newElementType);
-  else if (inType.isa<VectorType>())
-    newArrayType = VectorType::get(inType.getShape(), newElementType);
-  else
-    assert(newArrayType && "Unhandled tensor type");
-
-  size_t numRawElements = attr.isSplat() ? 1 : newArrayType.getNumElements();
-  data.resize(llvm::divideCeil(storageBitWidth, CHAR_BIT) * numRawElements);
-
-  // Functor used to process a single element value of the attribute.
-  auto processElt = [&](decltype(*attr.begin()) value, size_t index) {
-    auto newInt = mapping(value);
-    assert(newInt.getBitWidth() == bitWidth);
-    writeBits(data.data(), index * storageBitWidth, newInt);
-  };
-
-  // Check for the splat case.
-  if (attr.isSplat()) {
-    processElt(*attr.begin(), /*index=*/0);
-    return newArrayType;
-  }
-
-  // Otherwise, process all of the element values.
-  uint64_t elementIdx = 0;
-  for (auto value : attr)
-    processElt(value, elementIdx++);
-  return newArrayType;
-}
-
-DenseElementsAttr DenseFPElementsAttr::mapValues(
-    Type newElementType,
-    llvm::function_ref<APInt(const APFloat &)> mapping) const {
-  llvm::SmallVector<char, 8> elementData;
-  auto newArrayType =
-      mappingHelper(mapping, *this, getType(), newElementType, elementData);
-
-  return getRaw(newArrayType, elementData, isSplat());
-}
-
-/// Method for supporting type inquiry through isa, cast and dyn_cast.
-bool DenseFPElementsAttr::classof(Attribute attr) {
-  return attr.isa<DenseElementsAttr>() &&
-         attr.getType().cast<ShapedType>().getElementType().isa<FloatType>();
-}
-
-//===----------------------------------------------------------------------===//
-// DenseIntElementsAttr
-//===----------------------------------------------------------------------===//
-
-DenseElementsAttr DenseIntElementsAttr::mapValues(
-    Type newElementType,
-    llvm::function_ref<APInt(const APInt &)> mapping) const {
-  llvm::SmallVector<char, 8> elementData;
-  auto newArrayType =
-      mappingHelper(mapping, *this, getType(), newElementType, elementData);
-
-  return getRaw(newArrayType, elementData, isSplat());
-}
-
-/// Method for supporting type inquiry through isa, cast and dyn_cast.
-bool DenseIntElementsAttr::classof(Attribute attr) {
-  return attr.isa<DenseElementsAttr>() &&
-         attr.getType().cast<ShapedType>().getElementType().isa<IntegerType>();
-}
-
-//===----------------------------------------------------------------------===//
-// OpaqueElementsAttr
-//===----------------------------------------------------------------------===//
-
-OpaqueElementsAttr OpaqueElementsAttr::get(Dialect *dialect, ShapedType type,
-                                           StringRef bytes) {
-  assert(TensorType::isValidElementType(type.getElementType()) &&
-         "Input element type should be a valid tensor element type");
-  return Base::get(type.getContext(), StandardAttributes::OpaqueElements, type,
-                   dialect, bytes);
-}
-
-StringRef OpaqueElementsAttr::getValue() const { return getImpl()->bytes; }
-
-/// Return the value at the given index. If index does not refer to a valid
-/// element, then a null attribute is returned.
-Attribute OpaqueElementsAttr::getValue(ArrayRef<uint64_t> index) const {
-  assert(isValidIndex(index) && "expected valid multi-dimensional index");
-  if (Dialect *dialect = getDialect())
-    return dialect->extractElementHook(*this, index);
-  return Attribute();
-}
-
-Dialect *OpaqueElementsAttr::getDialect() const { return getImpl()->dialect; }
-
-bool OpaqueElementsAttr::decode(ElementsAttr &result) {
-  if (auto *d = getDialect())
-    return d->decodeHook(*this, result);
-  return true;
-}
-
-//===----------------------------------------------------------------------===//
-// SparseElementsAttr
-//===----------------------------------------------------------------------===//
-
-SparseElementsAttr SparseElementsAttr::get(ShapedType type,
-                                           DenseElementsAttr indices,
-                                           DenseElementsAttr values) {
-  assert(indices.getType().getElementType().isInteger(64) &&
-         "expected sparse indices to be 64-bit integer values");
-  assert((type.isa<RankedTensorType>() || type.isa<VectorType>()) &&
-         "type must be ranked tensor or vector");
-  assert(type.hasStaticShape() && "type must have static shape");
-  return Base::get(type.getContext(), StandardAttributes::SparseElements, type,
-                   indices.cast<DenseIntElementsAttr>(), values);
-}
-
-DenseIntElementsAttr SparseElementsAttr::getIndices() const {
-  return getImpl()->indices;
-}
-
-DenseElementsAttr SparseElementsAttr::getValues() const {
-  return getImpl()->values;
-}
-
-/// Return the value of the element at the given index.
-Attribute SparseElementsAttr::getValue(ArrayRef<uint64_t> index) const {
-  assert(isValidIndex(index) && "expected valid multi-dimensional index");
-  auto type = getType();
-
-  // The sparse indices are 64-bit integers, so we can reinterpret the raw data
-  // as a 1-D index array.
-  auto sparseIndices = getIndices();
-  auto sparseIndexValues = sparseIndices.getValues<uint64_t>();
-
-  // Check to see if the indices are a splat.
-  if (sparseIndices.isSplat()) {
-    // If the index is also not a splat of the index value, we know that the
-    // value is zero.
-    auto splatIndex = *sparseIndexValues.begin();
-    if (llvm::any_of(index, [=](uint64_t i) { return i != splatIndex; }))
-      return getZeroAttr();
-
-    // If the indices are a splat, we also expect the values to be a splat.
-    assert(getValues().isSplat() && "expected splat values");
-    return getValues().getSplatValue();
-  }
-
-  // Build a mapping between known indices and the offset of the stored element.
-  llvm::SmallDenseMap<llvm::ArrayRef<uint64_t>, size_t> mappedIndices;
-  auto numSparseIndices = sparseIndices.getType().getDimSize(0);
-  size_t rank = type.getRank();
-  for (size_t i = 0, e = numSparseIndices; i != e; ++i)
-    mappedIndices.try_emplace(
-        {&*std::next(sparseIndexValues.begin(), i * rank), rank}, i);
-
-  // Look for the provided index key within the mapped indices. If the provided
-  // index is not found, then return a zero attribute.
-  auto it = mappedIndices.find(index);
-  if (it == mappedIndices.end())
-    return getZeroAttr();
-
-  // Otherwise, return the held sparse value element.
-  return getValues().getValue(it->second);
-}
-
-/// Get a zero APFloat for the given sparse attribute.
-APFloat SparseElementsAttr::getZeroAPFloat() const {
-  auto eltType = getType().getElementType().cast<FloatType>();
-  return APFloat(eltType.getFloatSemantics());
-}
-
-/// Get a zero APInt for the given sparse attribute.
-APInt SparseElementsAttr::getZeroAPInt() const {
-  auto eltType = getType().getElementType().cast<IntegerType>();
-  return APInt::getNullValue(eltType.getWidth());
-}
-
-/// Get a zero attribute for the given attribute type.
-Attribute SparseElementsAttr::getZeroAttr() const {
-  auto eltType = getType().getElementType();
-
-  // Handle floating point elements.
-  if (eltType.isa<FloatType>())
-    return FloatAttr::get(eltType, 0);
-
-  // Otherwise, this is an integer.
-  auto intEltTy = eltType.cast<IntegerType>();
-  if (intEltTy.getWidth() == 1)
-    return BoolAttr::get(false, eltType.getContext());
-  return IntegerAttr::get(eltType, 0);
-}
-
-/// Flatten, and return, all of the sparse indices in this attribute in
-/// row-major order.
-std::vector<ptrdiff_t> SparseElementsAttr::getFlattenedSparseIndices() const {
-  std::vector<ptrdiff_t> flatSparseIndices;
-
-  // The sparse indices are 64-bit integers, so we can reinterpret the raw data
-  // as a 1-D index array.
-  auto sparseIndices = getIndices();
-  auto sparseIndexValues = sparseIndices.getValues<uint64_t>();
-  if (sparseIndices.isSplat()) {
-    SmallVector<uint64_t, 8> indices(getType().getRank(),
-                                     *sparseIndexValues.begin());
-    flatSparseIndices.push_back(getFlattenedIndex(indices));
-    return flatSparseIndices;
-  }
-
-  // Otherwise, reinterpret each index as an ArrayRef when flattening.
-  auto numSparseIndices = sparseIndices.getType().getDimSize(0);
-  size_t rank = getType().getRank();
-  for (size_t i = 0, e = numSparseIndices; i != e; ++i)
-    flatSparseIndices.push_back(getFlattenedIndex(
-        {&*std::next(sparseIndexValues.begin(), i * rank), rank}));
-  return flatSparseIndices;
-}
-
-//===----------------------------------------------------------------------===//
-// NamedAttributeList
-//===----------------------------------------------------------------------===//
-
-NamedAttributeList::NamedAttributeList(ArrayRef<NamedAttribute> attributes) {
-  setAttrs(attributes);
-}
-
-ArrayRef<NamedAttribute> NamedAttributeList::getAttrs() const {
-  return attrs ? attrs.getValue() : llvm::None;
-}
-
-/// Replace the held attributes with ones provided in 'newAttrs'.
-void NamedAttributeList::setAttrs(ArrayRef<NamedAttribute> attributes) {
-  // Don't create an attribute list if there are no attributes.
-  if (attributes.empty())
-    attrs = nullptr;
-  else
-    attrs = DictionaryAttr::get(attributes, attributes[0].second.getContext());
-}
-
-/// Return the specified attribute if present, null otherwise.
-Attribute NamedAttributeList::get(StringRef name) const {
-  return attrs ? attrs.get(name) : nullptr;
-}
-
-/// Return the specified attribute if present, null otherwise.
-Attribute NamedAttributeList::get(Identifier name) const {
-  return attrs ? attrs.get(name) : nullptr;
-}
-
-/// If the an attribute exists with the specified name, change it to the new
-/// value.  Otherwise, add a new attribute with the specified name/value.
-void NamedAttributeList::set(Identifier name, Attribute value) {
-  assert(value && "attributes may never be null");
-
-  // If we already have this attribute, replace it.
-  auto origAttrs = getAttrs();
-  SmallVector<NamedAttribute, 8> newAttrs(origAttrs.begin(), origAttrs.end());
-  for (auto &elt : newAttrs)
-    if (elt.first == name) {
-      elt.second = value;
-      attrs = DictionaryAttr::get(newAttrs, value.getContext());
-      return;
-    }
-
-  // Otherwise, add it.
-  newAttrs.push_back({name, value});
-  attrs = DictionaryAttr::get(newAttrs, value.getContext());
-}
-
-/// Remove the attribute with the specified name if it exists.  The return
-/// value indicates whether the attribute was present or not.
-auto NamedAttributeList::remove(Identifier name) -> RemoveResult {
-  auto origAttrs = getAttrs();
-  for (unsigned i = 0, e = origAttrs.size(); i != e; ++i) {
-    if (origAttrs[i].first == name) {
-      // Handle the simple case of removing the only attribute in the list.
-      if (e == 1) {
-        attrs = nullptr;
-        return RemoveResult::Removed;
-      }
-
-      SmallVector<NamedAttribute, 8> newAttrs;
-      newAttrs.reserve(origAttrs.size() - 1);
-      newAttrs.append(origAttrs.begin(), origAttrs.begin() + i);
-      newAttrs.append(origAttrs.begin() + i + 1, origAttrs.end());
-      attrs = DictionaryAttr::get(newAttrs, newAttrs[0].second.getContext());
-      return RemoveResult::Removed;
-    }
-  }
-  return RemoveResult::NotFound;
-}
diff --git a/third_party/mlir/lib/IR/Block.cpp b/third_party/mlir/lib/IR/Block.cpp
deleted file mode 100644
index ea92422cd0e..00000000000
--- a/third_party/mlir/lib/IR/Block.cpp
+++ /dev/null
@@ -1,269 +0,0 @@
-//===- Block.cpp - MLIR Block Class ---------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Operation.h"
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// BlockArgument
-//===----------------------------------------------------------------------===//
-
-/// Returns the number of this argument.
-unsigned BlockArgument::getArgNumber() {
-  // Arguments are not stored in place, so we have to find it within the list.
-  auto argList = getOwner()->getArguments();
-  return std::distance(argList.begin(), llvm::find(argList, this));
-}
-
-//===----------------------------------------------------------------------===//
-// Block
-//===----------------------------------------------------------------------===//
-
-Block::~Block() {
-  assert(!verifyOpOrder() && "Expected valid operation ordering.");
-  clear();
-  llvm::DeleteContainerPointers(arguments);
-}
-
-Region *Block::getParent() const { return parentValidOpOrderPair.getPointer(); }
-
-/// Returns the closest surrounding operation that contains this block or
-/// nullptr if this block is unlinked.
-Operation *Block::getParentOp() {
-  return getParent() ? getParent()->getParentOp() : nullptr;
-}
-
-/// Return if this block is the entry block in the parent region.
-bool Block::isEntryBlock() { return this == &getParent()->front(); }
-
-/// Insert this block (which must not already be in a region) right before the
-/// specified block.
-void Block::insertBefore(Block *block) {
-  assert(!getParent() && "already inserted into a block!");
-  assert(block->getParent() && "cannot insert before a block without a parent");
-  block->getParent()->getBlocks().insert(Region::iterator(block), this);
-}
-
-/// Unlink this Block from its parent Region and delete it.
-void Block::erase() {
-  assert(getParent() && "Block has no parent");
-  getParent()->getBlocks().erase(this);
-}
-
-/// Returns 'op' if 'op' lies in this block, or otherwise finds the
-/// ancestor operation of 'op' that lies in this block. Returns nullptr if
-/// the latter fails.
-Operation *Block::findAncestorOpInBlock(Operation &op) {
-  // Traverse up the operation hierarchy starting from the owner of operand to
-  // find the ancestor operation that resides in the block of 'forOp'.
-  auto *currOp = &op;
-  while (currOp->getBlock() != this) {
-    currOp = currOp->getParentOp();
-    if (!currOp)
-      return nullptr;
-  }
-  return currOp;
-}
-
-/// This drops all operand uses from operations within this block, which is
-/// an essential step in breaking cyclic dependences between references when
-/// they are to be deleted.
-void Block::dropAllReferences() {
-  for (Operation &i : *this)
-    i.dropAllReferences();
-}
-
-void Block::dropAllDefinedValueUses() {
-  for (auto *arg : getArguments())
-    arg->dropAllUses();
-  for (auto &op : *this)
-    op.dropAllDefinedValueUses();
-  dropAllUses();
-}
-
-/// Returns true if the ordering of the child operations is valid, false
-/// otherwise.
-bool Block::isOpOrderValid() { return parentValidOpOrderPair.getInt(); }
-
-/// Invalidates the current ordering of operations.
-void Block::invalidateOpOrder() {
-  // Validate the current ordering.
-  assert(!verifyOpOrder());
-  parentValidOpOrderPair.setInt(false);
-}
-
-/// Verifies the current ordering of child operations. Returns false if the
-/// order is valid, true otherwise.
-bool Block::verifyOpOrder() {
-  // The order is already known to be invalid.
-  if (!isOpOrderValid())
-    return false;
-  // The order is valid if there are less than 2 operations.
-  if (operations.empty() || std::next(operations.begin()) == operations.end())
-    return false;
-
-  Operation *prev = nullptr;
-  for (auto &i : *this) {
-    // The previous operation must have a smaller order index than the next as
-    // it appears earlier in the list.
-    if (prev && prev->orderIndex != Operation::kInvalidOrderIdx &&
-        prev->orderIndex >= i.orderIndex)
-      return true;
-    prev = &i;
-  }
-  return false;
-}
-
-/// Recomputes the ordering of child operations within the block.
-void Block::recomputeOpOrder() {
-  parentValidOpOrderPair.setInt(true);
-
-  unsigned orderIndex = 0;
-  for (auto &op : *this)
-    op.orderIndex = (orderIndex += Operation::kOrderStride);
-}
-
-//===----------------------------------------------------------------------===//
-// Argument list management.
-//===----------------------------------------------------------------------===//
-
-BlockArgument *Block::addArgument(Type type) {
-  auto *arg = new BlockArgument(type, this);
-  arguments.push_back(arg);
-  return arg;
-}
-
-/// Add one argument to the argument list for each type specified in the list.
-auto Block::addArguments(ArrayRef<Type> types)
-    -> llvm::iterator_range<args_iterator> {
-  arguments.reserve(arguments.size() + types.size());
-  auto initialSize = arguments.size();
-  for (auto type : types) {
-    addArgument(type);
-  }
-  return {arguments.data() + initialSize, arguments.data() + arguments.size()};
-}
-
-void Block::eraseArgument(unsigned index, bool updatePredTerms) {
-  assert(index < arguments.size());
-
-  // Delete the argument.
-  delete arguments[index];
-  arguments.erase(arguments.begin() + index);
-
-  // If we aren't updating predecessors, there is nothing left to do.
-  if (!updatePredTerms)
-    return;
-
-  // Erase this argument from each of the predecessor's terminator.
-  for (auto predIt = pred_begin(), predE = pred_end(); predIt != predE;
-       ++predIt) {
-    auto *predTerminator = (*predIt)->getTerminator();
-    predTerminator->eraseSuccessorOperand(predIt.getSuccessorIndex(), index);
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Terminator management
-//===----------------------------------------------------------------------===//
-
-/// Get the terminator operation of this block. This function asserts that
-/// the block has a valid terminator operation.
-Operation *Block::getTerminator() {
-  assert(!empty() && !back().isKnownNonTerminator());
-  return &back();
-}
-
-/// Return true if this block has no predecessors.
-bool Block::hasNoPredecessors() { return pred_begin() == pred_end(); }
-
-// Indexed successor access.
-unsigned Block::getNumSuccessors() {
-  return empty() ? 0 : back().getNumSuccessors();
-}
-
-Block *Block::getSuccessor(unsigned i) {
-  assert(i < getNumSuccessors());
-  return getTerminator()->getSuccessor(i);
-}
-
-/// If this block has exactly one predecessor, return it.  Otherwise, return
-/// null.
-///
-/// Note that multiple edges from a single block (e.g. if you have a cond
-/// branch with the same block as the true/false destinations) is not
-/// considered to be a single predecessor.
-Block *Block::getSinglePredecessor() {
-  auto it = pred_begin();
-  if (it == pred_end())
-    return nullptr;
-  auto *firstPred = *it;
-  ++it;
-  return it == pred_end() ? firstPred : nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// Other
-//===----------------------------------------------------------------------===//
-
-/// Split the block into two blocks before the specified operation or
-/// iterator.
-///
-/// Note that all operations BEFORE the specified iterator stay as part of
-/// the original basic block, and the rest of the operations in the original
-/// block are moved to the new block, including the old terminator.  The
-/// original block is left without a terminator.
-///
-/// The newly formed Block is returned, and the specified iterator is
-/// invalidated.
-Block *Block::splitBlock(iterator splitBefore) {
-  // Start by creating a new basic block, and insert it immediate after this
-  // one in the containing region.
-  auto newBB = new Block();
-  getParent()->getBlocks().insert(std::next(Region::iterator(this)), newBB);
-
-  // Move all of the operations from the split point to the end of the region
-  // into the new block.
-  newBB->getOperations().splice(newBB->end(), getOperations(), splitBefore,
-                                end());
-  return newBB;
-}
-
-//===----------------------------------------------------------------------===//
-// Predecessors
-//===----------------------------------------------------------------------===//
-
-Block *PredecessorIterator::unwrap(BlockOperand &value) {
-  return value.getOwner()->getBlock();
-}
-
-/// Get the successor number in the predecessor terminator.
-unsigned PredecessorIterator::getSuccessorIndex() const {
-  return I->getOperandNumber();
-}
-
-//===----------------------------------------------------------------------===//
-// Successors
-//===----------------------------------------------------------------------===//
-
-SuccessorRange::SuccessorRange(Block *block) : SuccessorRange(nullptr, 0) {
-  if (Operation *term = block->getTerminator())
-    if ((count = term->getNumSuccessors()))
-      base = term->getBlockOperands().data();
-}
diff --git a/third_party/mlir/lib/IR/Builders.cpp b/third_party/mlir/lib/IR/Builders.cpp
deleted file mode 100644
index 691b2ad99c4..00000000000
--- a/third_party/mlir/lib/IR/Builders.cpp
+++ /dev/null
@@ -1,407 +0,0 @@
-//===- Builders.cpp - Helpers for constructing MLIR Classes ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/Functional.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace mlir;
-
-Builder::Builder(ModuleOp module) : context(module.getContext()) {}
-
-Identifier Builder::getIdentifier(StringRef str) {
-  return Identifier::get(str, context);
-}
-
-//===----------------------------------------------------------------------===//
-// Locations.
-//===----------------------------------------------------------------------===//
-
-Location Builder::getUnknownLoc() { return UnknownLoc::get(context); }
-
-Location Builder::getFileLineColLoc(Identifier filename, unsigned line,
-                                    unsigned column) {
-  return FileLineColLoc::get(filename, line, column, context);
-}
-
-Location Builder::getFusedLoc(ArrayRef<Location> locs, Attribute metadata) {
-  return FusedLoc::get(locs, metadata, context);
-}
-
-//===----------------------------------------------------------------------===//
-// Types.
-//===----------------------------------------------------------------------===//
-
-FloatType Builder::getBF16Type() { return FloatType::getBF16(context); }
-
-FloatType Builder::getF16Type() { return FloatType::getF16(context); }
-
-FloatType Builder::getF32Type() { return FloatType::getF32(context); }
-
-FloatType Builder::getF64Type() { return FloatType::getF64(context); }
-
-IndexType Builder::getIndexType() { return IndexType::get(context); }
-
-IntegerType Builder::getI1Type() { return IntegerType::get(1, context); }
-
-IntegerType Builder::getIntegerType(unsigned width) {
-  return IntegerType::get(width, context);
-}
-
-FunctionType Builder::getFunctionType(ArrayRef<Type> inputs,
-                                      ArrayRef<Type> results) {
-  return FunctionType::get(inputs, results, context);
-}
-
-TupleType Builder::getTupleType(ArrayRef<Type> elementTypes) {
-  return TupleType::get(elementTypes, context);
-}
-
-NoneType Builder::getNoneType() { return NoneType::get(context); }
-
-//===----------------------------------------------------------------------===//
-// Attributes.
-//===----------------------------------------------------------------------===//
-
-NamedAttribute Builder::getNamedAttr(StringRef name, Attribute val) {
-  return NamedAttribute(getIdentifier(name), val);
-}
-
-UnitAttr Builder::getUnitAttr() { return UnitAttr::get(context); }
-
-BoolAttr Builder::getBoolAttr(bool value) {
-  return BoolAttr::get(value, context);
-}
-
-DictionaryAttr Builder::getDictionaryAttr(ArrayRef<NamedAttribute> value) {
-  return DictionaryAttr::get(value, context);
-}
-
-IntegerAttr Builder::getI64IntegerAttr(int64_t value) {
-  return IntegerAttr::get(getIntegerType(64), APInt(64, value));
-}
-
-DenseIntElementsAttr Builder::getI32VectorAttr(ArrayRef<int32_t> values) {
-  return DenseElementsAttr::get(
-             VectorType::get(static_cast<int64_t>(values.size()),
-                             getIntegerType(32)),
-             values)
-      .cast<DenseIntElementsAttr>();
-}
-
-IntegerAttr Builder::getI32IntegerAttr(int32_t value) {
-  return IntegerAttr::get(getIntegerType(32), APInt(32, value));
-}
-
-IntegerAttr Builder::getI16IntegerAttr(int16_t value) {
-  return IntegerAttr::get(getIntegerType(16), APInt(16, value));
-}
-
-IntegerAttr Builder::getI8IntegerAttr(int8_t value) {
-  return IntegerAttr::get(getIntegerType(8), APInt(8, value));
-}
-
-IntegerAttr Builder::getIntegerAttr(Type type, int64_t value) {
-  if (type.isIndex())
-    return IntegerAttr::get(type, APInt(64, value));
-  return IntegerAttr::get(type, APInt(type.getIntOrFloatBitWidth(), value));
-}
-
-IntegerAttr Builder::getIntegerAttr(Type type, const APInt &value) {
-  return IntegerAttr::get(type, value);
-}
-
-FloatAttr Builder::getF64FloatAttr(double value) {
-  return FloatAttr::get(getF64Type(), APFloat(value));
-}
-
-FloatAttr Builder::getF32FloatAttr(float value) {
-  return FloatAttr::get(getF32Type(), APFloat(value));
-}
-
-FloatAttr Builder::getF16FloatAttr(float value) {
-  return FloatAttr::get(getF16Type(), value);
-}
-
-FloatAttr Builder::getFloatAttr(Type type, double value) {
-  return FloatAttr::get(type, value);
-}
-
-FloatAttr Builder::getFloatAttr(Type type, const APFloat &value) {
-  return FloatAttr::get(type, value);
-}
-
-StringAttr Builder::getStringAttr(StringRef bytes) {
-  return StringAttr::get(bytes, context);
-}
-
-ArrayAttr Builder::getArrayAttr(ArrayRef<Attribute> value) {
-  return ArrayAttr::get(value, context);
-}
-
-FlatSymbolRefAttr Builder::getSymbolRefAttr(Operation *value) {
-  auto symName =
-      value->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
-  assert(symName && "value does not have a valid symbol name");
-  return getSymbolRefAttr(symName.getValue());
-}
-FlatSymbolRefAttr Builder::getSymbolRefAttr(StringRef value) {
-  return SymbolRefAttr::get(value, getContext());
-}
-SymbolRefAttr
-Builder::getSymbolRefAttr(StringRef value,
-                          ArrayRef<FlatSymbolRefAttr> nestedReferences) {
-  return SymbolRefAttr::get(value, nestedReferences, getContext());
-}
-
-ArrayAttr Builder::getI32ArrayAttr(ArrayRef<int32_t> values) {
-  auto attrs = functional::map(
-      [this](int32_t v) -> Attribute { return getI32IntegerAttr(v); }, values);
-  return getArrayAttr(attrs);
-}
-
-ArrayAttr Builder::getI64ArrayAttr(ArrayRef<int64_t> values) {
-  auto attrs = functional::map(
-      [this](int64_t v) -> Attribute { return getI64IntegerAttr(v); }, values);
-  return getArrayAttr(attrs);
-}
-
-ArrayAttr Builder::getIndexArrayAttr(ArrayRef<int64_t> values) {
-  auto attrs = functional::map(
-      [this](int64_t v) -> Attribute {
-        return getIntegerAttr(IndexType::get(getContext()), v);
-      },
-      values);
-  return getArrayAttr(attrs);
-}
-
-ArrayAttr Builder::getF32ArrayAttr(ArrayRef<float> values) {
-  auto attrs = functional::map(
-      [this](float v) -> Attribute { return getF32FloatAttr(v); }, values);
-  return getArrayAttr(attrs);
-}
-
-ArrayAttr Builder::getF64ArrayAttr(ArrayRef<double> values) {
-  auto attrs = functional::map(
-      [this](double v) -> Attribute { return getF64FloatAttr(v); }, values);
-  return getArrayAttr(attrs);
-}
-
-ArrayAttr Builder::getStrArrayAttr(ArrayRef<StringRef> values) {
-  auto attrs = functional::map(
-      [this](StringRef v) -> Attribute { return getStringAttr(v); }, values);
-  return getArrayAttr(attrs);
-}
-
-ArrayAttr Builder::getAffineMapArrayAttr(ArrayRef<AffineMap> values) {
-  auto attrs = functional::map(
-      [](AffineMap v) -> Attribute { return AffineMapAttr::get(v); }, values);
-  return getArrayAttr(attrs);
-}
-
-Attribute Builder::getZeroAttr(Type type) {
-  switch (type.getKind()) {
-  case StandardTypes::BF16:
-  case StandardTypes::F16:
-  case StandardTypes::F32:
-  case StandardTypes::F64:
-    return getFloatAttr(type, 0.0);
-  case StandardTypes::Integer: {
-    auto width = type.cast<IntegerType>().getWidth();
-    if (width == 1)
-      return getBoolAttr(false);
-    return getIntegerAttr(type, APInt(width, 0));
-  }
-  case StandardTypes::Vector:
-  case StandardTypes::RankedTensor: {
-    auto vtType = type.cast<ShapedType>();
-    auto element = getZeroAttr(vtType.getElementType());
-    if (!element)
-      return {};
-    return DenseElementsAttr::get(vtType, element);
-  }
-  default:
-    break;
-  }
-  return {};
-}
-
-//===----------------------------------------------------------------------===//
-// Affine Expressions, Affine Maps, and Integer Sets.
-//===----------------------------------------------------------------------===//
-
-AffineExpr Builder::getAffineDimExpr(unsigned position) {
-  return mlir::getAffineDimExpr(position, context);
-}
-
-AffineExpr Builder::getAffineSymbolExpr(unsigned position) {
-  return mlir::getAffineSymbolExpr(position, context);
-}
-
-AffineExpr Builder::getAffineConstantExpr(int64_t constant) {
-  return mlir::getAffineConstantExpr(constant, context);
-}
-
-AffineMap Builder::getEmptyAffineMap() { return AffineMap::get(context); }
-
-AffineMap Builder::getConstantAffineMap(int64_t val) {
-  return AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0,
-                        {getAffineConstantExpr(val)});
-}
-
-AffineMap Builder::getDimIdentityMap() {
-  return AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0,
-                        {getAffineDimExpr(0)});
-}
-
-AffineMap Builder::getMultiDimIdentityMap(unsigned rank) {
-  SmallVector<AffineExpr, 4> dimExprs;
-  dimExprs.reserve(rank);
-  for (unsigned i = 0; i < rank; ++i)
-    dimExprs.push_back(getAffineDimExpr(i));
-  return AffineMap::get(/*dimCount=*/rank, /*symbolCount=*/0, dimExprs);
-}
-
-AffineMap Builder::getSymbolIdentityMap() {
-  return AffineMap::get(/*dimCount=*/0, /*symbolCount=*/1,
-                        {getAffineSymbolExpr(0)});
-}
-
-AffineMap Builder::getSingleDimShiftAffineMap(int64_t shift) {
-  // expr = d0 + shift.
-  auto expr = getAffineDimExpr(0) + shift;
-  return AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, {expr});
-}
-
-AffineMap Builder::getShiftedAffineMap(AffineMap map, int64_t shift) {
-  SmallVector<AffineExpr, 4> shiftedResults;
-  shiftedResults.reserve(map.getNumResults());
-  for (auto resultExpr : map.getResults())
-    shiftedResults.push_back(resultExpr + shift);
-  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), shiftedResults);
-}
-
-//===----------------------------------------------------------------------===//
-// OpBuilder.
-//===----------------------------------------------------------------------===//
-
-OpBuilder::~OpBuilder() {}
-
-/// Insert the given operation at the current insertion point and return it.
-Operation *OpBuilder::insert(Operation *op) {
-  if (block)
-    block->getOperations().insert(insertPoint, op);
-  return op;
-}
-
-/// Add new block and set the insertion point to the end of it. The block is
-/// inserted at the provided insertion point of 'parent'.
-Block *OpBuilder::createBlock(Region *parent, Region::iterator insertPt) {
-  assert(parent && "expected valid parent region");
-  if (insertPt == Region::iterator())
-    insertPt = parent->end();
-
-  Block *b = new Block();
-  parent->getBlocks().insert(insertPt, b);
-  setInsertionPointToEnd(b);
-  return b;
-}
-
-/// Add new block and set the insertion point to the end of it.  The block is
-/// placed before 'insertBefore'.
-Block *OpBuilder::createBlock(Block *insertBefore) {
-  assert(insertBefore && "expected valid insertion block");
-  return createBlock(insertBefore->getParent(), Region::iterator(insertBefore));
-}
-
-/// Create an operation given the fields represented as an OperationState.
-Operation *OpBuilder::createOperation(const OperationState &state) {
-  return insert(Operation::create(state));
-}
-
-/// Attempts to fold the given operation and places new results within
-/// 'results'. Returns success if the operation was folded, failure otherwise.
-/// Note: This function does not erase the operation on a successful fold.
-LogicalResult OpBuilder::tryFold(Operation *op,
-                                 SmallVectorImpl<Value *> &results) {
-  results.reserve(op->getNumResults());
-  auto cleanupFailure = [&] {
-    results.assign(op->result_begin(), op->result_end());
-    return failure();
-  };
-
-  // If this operation is already a constant, there is nothing to do.
-  Attribute unused;
-  if (matchPattern(op, m_Constant(&unused)))
-    return cleanupFailure();
-
-  // Check to see if any operands to the operation is constant and whether
-  // the operation knows how to constant fold itself.
-  SmallVector<Attribute, 4> constOperands(op->getNumOperands());
-  for (unsigned i = 0, e = op->getNumOperands(); i != e; ++i)
-    matchPattern(op->getOperand(i), m_Constant(&constOperands[i]));
-
-  // Try to fold the operation.
-  SmallVector<OpFoldResult, 4> foldResults;
-  if (failed(op->fold(constOperands, foldResults)) || foldResults.empty())
-    return cleanupFailure();
-
-  // A temporary builder used for creating constants during folding.
-  OpBuilder cstBuilder(context);
-  SmallVector<Operation *, 1> generatedConstants;
-
-  // Populate the results with the folded results.
-  Dialect *dialect = op->getDialect();
-  for (auto &it : llvm::enumerate(foldResults)) {
-    // Normal values get pushed back directly.
-    if (auto *value = it.value().dyn_cast<Value *>()) {
-      results.push_back(value);
-      continue;
-    }
-
-    // Otherwise, try to materialize a constant operation.
-    if (!dialect)
-      return cleanupFailure();
-
-    // Ask the dialect to materialize a constant operation for this value.
-    Attribute attr = it.value().get<Attribute>();
-    auto *constOp = dialect->materializeConstant(
-        cstBuilder, attr, op->getResult(it.index())->getType(), op->getLoc());
-    if (!constOp) {
-      // Erase any generated constants.
-      for (Operation *cst : generatedConstants)
-        cst->erase();
-      return cleanupFailure();
-    }
-    assert(matchPattern(constOp, m_Constant(&attr)));
-
-    generatedConstants.push_back(constOp);
-    results.push_back(constOp->getResult(0));
-  }
-
-  // If we were successful, insert any generated constants.
-  for (Operation *cst : generatedConstants)
-    insert(cst);
-
-  return success();
-}
diff --git a/third_party/mlir/lib/IR/CMakeLists.txt b/third_party/mlir/lib/IR/CMakeLists.txt
deleted file mode 100644
index 415d9d66e22..00000000000
--- a/third_party/mlir/lib/IR/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-file(GLOB globbed *.c *.cpp)
-add_llvm_library(MLIRIR
-  ${globbed}
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR
-  )
-add_dependencies(MLIRIR MLIRCallOpInterfacesIncGen MLIROpAsmInterfacesIncGen MLIRSupport LLVMSupport)
-target_link_libraries(MLIRIR MLIRSupport LLVMSupport)
diff --git a/third_party/mlir/lib/IR/Diagnostics.cpp b/third_party/mlir/lib/IR/Diagnostics.cpp
deleted file mode 100644
index 70a802cd856..00000000000
--- a/third_party/mlir/lib/IR/Diagnostics.cpp
+++ /dev/null
@@ -1,929 +0,0 @@
-//===- Diagnostics.cpp - MLIR Diagnostics ---------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/Types.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-static llvm::cl::opt<bool> printStackTraceOnDiagnostic(
-    "mlir-print-stacktrace-on-diagnostic",
-    llvm::cl::desc("When a diagnostic is emitted, also print the stack trace "
-                   "as an attached note"));
-
-//===----------------------------------------------------------------------===//
-// DiagnosticArgument
-//===----------------------------------------------------------------------===//
-
-// Construct from an Attribute.
-DiagnosticArgument::DiagnosticArgument(Attribute attr)
-    : kind(DiagnosticArgumentKind::Attribute),
-      opaqueVal(reinterpret_cast<intptr_t>(attr.getAsOpaquePointer())) {}
-
-// Construct from a Type.
-DiagnosticArgument::DiagnosticArgument(Type val)
-    : kind(DiagnosticArgumentKind::Type),
-      opaqueVal(reinterpret_cast<intptr_t>(val.getAsOpaquePointer())) {}
-
-/// Returns this argument as an Attribute.
-Attribute DiagnosticArgument::getAsAttribute() const {
-  assert(getKind() == DiagnosticArgumentKind::Attribute);
-  return Attribute::getFromOpaquePointer(
-      reinterpret_cast<const void *>(opaqueVal));
-}
-
-/// Returns this argument as a Type.
-Type DiagnosticArgument::getAsType() const {
-  assert(getKind() == DiagnosticArgumentKind::Type);
-  return Type::getFromOpaquePointer(reinterpret_cast<const void *>(opaqueVal));
-}
-
-/// Outputs this argument to a stream.
-void DiagnosticArgument::print(raw_ostream &os) const {
-  switch (kind) {
-  case DiagnosticArgumentKind::Attribute:
-    os << getAsAttribute();
-    break;
-  case DiagnosticArgumentKind::Double:
-    os << getAsDouble();
-    break;
-  case DiagnosticArgumentKind::Integer:
-    os << getAsInteger();
-    break;
-  case DiagnosticArgumentKind::Operation:
-    getAsOperation().print(os, OpPrintingFlags().useLocalScope());
-    break;
-  case DiagnosticArgumentKind::String:
-    os << getAsString();
-    break;
-  case DiagnosticArgumentKind::Type:
-    os << '\'' << getAsType() << '\'';
-    break;
-  case DiagnosticArgumentKind::Unsigned:
-    os << getAsUnsigned();
-    break;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Diagnostic
-//===----------------------------------------------------------------------===//
-
-/// Convert a Twine to a StringRef. Memory used for generating the StringRef is
-/// stored in 'strings'.
-static StringRef twineToStrRef(const Twine &val,
-                               std::vector<std::unique_ptr<char[]>> &strings) {
-  // Allocate memory to hold this string.
-  llvm::SmallString<64> data;
-  auto strRef = val.toStringRef(data);
-  strings.push_back(std::unique_ptr<char[]>(new char[strRef.size()]));
-  memcpy(&strings.back()[0], strRef.data(), strRef.size());
-
-  // Return a reference to the new string.
-  return StringRef(&strings.back()[0], strRef.size());
-}
-
-/// Stream in a Twine argument.
-Diagnostic &Diagnostic::operator<<(char val) { return *this << Twine(val); }
-Diagnostic &Diagnostic::operator<<(const Twine &val) {
-  arguments.push_back(DiagnosticArgument(twineToStrRef(val, strings)));
-  return *this;
-}
-Diagnostic &Diagnostic::operator<<(Twine &&val) {
-  arguments.push_back(DiagnosticArgument(twineToStrRef(val, strings)));
-  return *this;
-}
-
-/// Stream in an Identifier.
-Diagnostic &Diagnostic::operator<<(Identifier val) {
-  // An identifier is stored in the context, so we don't need to worry about the
-  // lifetime of its data.
-  arguments.push_back(DiagnosticArgument(val.strref()));
-  return *this;
-}
-
-/// Stream in an OperationName.
-Diagnostic &Diagnostic::operator<<(OperationName val) {
-  // An OperationName is stored in the context, so we don't need to worry about
-  // the lifetime of its data.
-  arguments.push_back(DiagnosticArgument(val.getStringRef()));
-  return *this;
-}
-
-/// Outputs this diagnostic to a stream.
-void Diagnostic::print(raw_ostream &os) const {
-  for (auto &arg : getArguments())
-    arg.print(os);
-}
-
-/// Convert the diagnostic to a string.
-std::string Diagnostic::str() const {
-  std::string str;
-  llvm::raw_string_ostream os(str);
-  print(os);
-  return os.str();
-}
-
-/// Attaches a note to this diagnostic. A new location may be optionally
-/// provided, if not, then the location defaults to the one specified for this
-/// diagnostic. Notes may not be attached to other notes.
-Diagnostic &Diagnostic::attachNote(llvm::Optional<Location> noteLoc) {
-  // We don't allow attaching notes to notes.
-  assert(severity != DiagnosticSeverity::Note &&
-         "cannot attach a note to a note");
-
-  // If a location wasn't provided then reuse our location.
-  if (!noteLoc)
-    noteLoc = loc;
-
-  /// Append and return a new note.
-  notes.push_back(
-      std::make_unique<Diagnostic>(*noteLoc, DiagnosticSeverity::Note));
-  return *notes.back();
-}
-
-/// Allow a diagnostic to be converted to 'failure'.
-Diagnostic::operator LogicalResult() const { return failure(); }
-
-//===----------------------------------------------------------------------===//
-// InFlightDiagnostic
-//===----------------------------------------------------------------------===//
-
-/// Allow an inflight diagnostic to be converted to 'failure', otherwise
-/// 'success' if this is an empty diagnostic.
-InFlightDiagnostic::operator LogicalResult() const {
-  return failure(isActive());
-}
-
-/// Reports the diagnostic to the engine.
-void InFlightDiagnostic::report() {
-  // If this diagnostic is still inflight and it hasn't been abandoned, then
-  // report it.
-  if (isInFlight()) {
-    owner->emit(std::move(*impl));
-    owner = nullptr;
-  }
-  impl.reset();
-}
-
-/// Abandons this diagnostic.
-void InFlightDiagnostic::abandon() { owner = nullptr; }
-
-//===----------------------------------------------------------------------===//
-// DiagnosticEngineImpl
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace detail {
-struct DiagnosticEngineImpl {
-  /// Emit a diagnostic using the registered issue handle if present, or with
-  /// the default behavior if not.
-  void emit(Diagnostic diag);
-
-  /// A mutex to ensure that diagnostics emission is thread-safe.
-  llvm::sys::SmartMutex<true> mutex;
-
-  /// These are the handlers used to report diagnostics.
-  llvm::SmallMapVector<DiagnosticEngine::HandlerID, DiagnosticEngine::HandlerTy,
-                       2>
-      handlers;
-
-  /// This is a unique identifier counter for diagnostic handlers in the
-  /// context. This id starts at 1 to allow for 0 to be used as a sentinel.
-  DiagnosticEngine::HandlerID uniqueHandlerId = 1;
-};
-} // namespace detail
-} // namespace mlir
-
-/// Emit a diagnostic using the registered issue handle if present, or with
-/// the default behavior if not.
-void DiagnosticEngineImpl::emit(Diagnostic diag) {
-  llvm::sys::SmartScopedLock<true> lock(mutex);
-
-  // Try to process the given diagnostic on one of the registered handlers.
-  // Handlers are walked in reverse order, so that the most recent handler is
-  // processed first.
-  for (auto &handlerIt : llvm::reverse(handlers))
-    if (succeeded(handlerIt.second(diag)))
-      return;
-
-  // Otherwise, if this is an error we emit it to stderr.
-  if (diag.getSeverity() != DiagnosticSeverity::Error)
-    return;
-
-  auto &os = llvm::errs();
-  if (!diag.getLocation().isa<UnknownLoc>())
-    os << diag.getLocation() << ": ";
-  os << "error: ";
-
-  // The default behavior for errors is to emit them to stderr.
-  os << diag << '\n';
-  os.flush();
-}
-
-//===----------------------------------------------------------------------===//
-// DiagnosticEngine
-//===----------------------------------------------------------------------===//
-
-DiagnosticEngine::DiagnosticEngine() : impl(new DiagnosticEngineImpl()) {}
-DiagnosticEngine::~DiagnosticEngine() {}
-
-/// Register a new handler for diagnostics to the engine. This function returns
-/// a unique identifier for the registered handler, which can be used to
-/// unregister this handler at a later time.
-auto DiagnosticEngine::registerHandler(const HandlerTy &handler) -> HandlerID {
-  llvm::sys::SmartScopedLock<true> lock(impl->mutex);
-  auto uniqueID = impl->uniqueHandlerId++;
-  impl->handlers.insert({uniqueID, handler});
-  return uniqueID;
-}
-
-/// Erase the registered diagnostic handler with the given identifier.
-void DiagnosticEngine::eraseHandler(HandlerID handlerID) {
-  llvm::sys::SmartScopedLock<true> lock(impl->mutex);
-  impl->handlers.erase(handlerID);
-}
-
-/// Emit a diagnostic using the registered issue handler if present, or with
-/// the default behavior if not.
-void DiagnosticEngine::emit(Diagnostic diag) {
-  assert(diag.getSeverity() != DiagnosticSeverity::Note &&
-         "notes should not be emitted directly");
-  impl->emit(std::move(diag));
-}
-
-/// Helper function used to emit a diagnostic with an optionally empty twine
-/// message. If the message is empty, then it is not inserted into the
-/// diagnostic.
-static InFlightDiagnostic emitDiag(Location location,
-                                   DiagnosticSeverity severity,
-                                   const llvm::Twine &message) {
-  auto &diagEngine = location->getContext()->getDiagEngine();
-  auto diag = diagEngine.emit(location, severity);
-  if (!message.isTriviallyEmpty())
-    diag << message;
-
-  // Add the stack trace as a note if necessary.
-  if (printStackTraceOnDiagnostic) {
-    std::string bt;
-    {
-      llvm::raw_string_ostream stream(bt);
-      llvm::sys::PrintStackTrace(stream);
-    }
-    if (!bt.empty())
-      diag.attachNote() << "diagnostic emitted with trace:\n" << bt;
-  }
-
-  return diag;
-}
-
-/// Emit an error message using this location.
-InFlightDiagnostic mlir::emitError(Location loc) { return emitError(loc, {}); }
-InFlightDiagnostic mlir::emitError(Location loc, const Twine &message) {
-  return emitDiag(loc, DiagnosticSeverity::Error, message);
-}
-
-/// Emit a warning message using this location.
-InFlightDiagnostic mlir::emitWarning(Location loc) {
-  return emitWarning(loc, {});
-}
-InFlightDiagnostic mlir::emitWarning(Location loc, const Twine &message) {
-  return emitDiag(loc, DiagnosticSeverity::Warning, message);
-}
-
-/// Emit a remark message using this location.
-InFlightDiagnostic mlir::emitRemark(Location loc) {
-  return emitRemark(loc, {});
-}
-InFlightDiagnostic mlir::emitRemark(Location loc, const Twine &message) {
-  return emitDiag(loc, DiagnosticSeverity::Remark, message);
-}
-
-//===----------------------------------------------------------------------===//
-// ScopedDiagnosticHandler
-//===----------------------------------------------------------------------===//
-
-ScopedDiagnosticHandler::~ScopedDiagnosticHandler() {
-  if (handlerID)
-    ctx->getDiagEngine().eraseHandler(handlerID);
-}
-
-//===----------------------------------------------------------------------===//
-// SourceMgrDiagnosticHandler
-//===----------------------------------------------------------------------===//
-namespace mlir {
-namespace detail {
-struct SourceMgrDiagnosticHandlerImpl {
-  /// Get a memory buffer for the given file, or nullptr if one is not found.
-  const llvm::MemoryBuffer *getBufferForFile(llvm::SourceMgr &mgr,
-                                             StringRef filename) {
-    // Check for an existing mapping to the buffer id for this file.
-    auto bufferIt = filenameToBuf.find(filename);
-    if (bufferIt != filenameToBuf.end())
-      return bufferIt->second;
-
-    // Look for a buffer in the manager that has this filename.
-    for (unsigned i = 1, e = mgr.getNumBuffers() + 1; i != e; ++i) {
-      auto *buf = mgr.getMemoryBuffer(i);
-      if (buf->getBufferIdentifier() == filename)
-        return filenameToBuf[filename] = buf;
-    }
-
-    // Otherwise, try to load the source file.
-    const llvm::MemoryBuffer *newBuf = nullptr;
-    std::string ignored;
-    if (auto newBufID = mgr.AddIncludeFile(filename, llvm::SMLoc(), ignored))
-      newBuf = mgr.getMemoryBuffer(newBufID);
-    return filenameToBuf[filename] = newBuf;
-  }
-
-  /// Mapping between file name and buffer pointer.
-  llvm::StringMap<const llvm::MemoryBuffer *> filenameToBuf;
-};
-} // end namespace detail
-} // end namespace mlir
-
-/// Return a processable FileLineColLoc from the given location.
-static llvm::Optional<FileLineColLoc> getFileLineColLoc(Location loc) {
-  switch (loc->getKind()) {
-  case StandardAttributes::NameLocation:
-    return getFileLineColLoc(loc.cast<NameLoc>().getChildLoc());
-  case StandardAttributes::FileLineColLocation:
-    return loc.cast<FileLineColLoc>();
-  case StandardAttributes::CallSiteLocation:
-    // Process the callee of a callsite location.
-    return getFileLineColLoc(loc.cast<CallSiteLoc>().getCallee());
-  default:
-    return llvm::None;
-  }
-}
-
-/// Given a diagnostic kind, returns the LLVM DiagKind.
-static llvm::SourceMgr::DiagKind getDiagKind(DiagnosticSeverity kind) {
-  switch (kind) {
-  case DiagnosticSeverity::Note:
-    return llvm::SourceMgr::DK_Note;
-  case DiagnosticSeverity::Warning:
-    return llvm::SourceMgr::DK_Warning;
-  case DiagnosticSeverity::Error:
-    return llvm::SourceMgr::DK_Error;
-  case DiagnosticSeverity::Remark:
-    return llvm::SourceMgr::DK_Remark;
-  }
-  llvm_unreachable("Unknown DiagnosticSeverity");
-}
-
-SourceMgrDiagnosticHandler::SourceMgrDiagnosticHandler(llvm::SourceMgr &mgr,
-                                                       MLIRContext *ctx,
-                                                       llvm::raw_ostream &os)
-    : ScopedDiagnosticHandler(ctx), mgr(mgr), os(os),
-      impl(new SourceMgrDiagnosticHandlerImpl()) {
-  setHandler([this](Diagnostic &diag) { emitDiagnostic(diag); });
-}
-
-SourceMgrDiagnosticHandler::SourceMgrDiagnosticHandler(llvm::SourceMgr &mgr,
-                                                       MLIRContext *ctx)
-    : SourceMgrDiagnosticHandler(mgr, ctx, llvm::errs()) {}
-
-SourceMgrDiagnosticHandler::~SourceMgrDiagnosticHandler() {}
-
-void SourceMgrDiagnosticHandler::emitDiagnostic(Location loc, Twine message,
-                                                DiagnosticSeverity kind) {
-  // Extract a file location from this loc.
-  auto fileLoc = getFileLineColLoc(loc);
-
-  // If one doesn't exist, then print the raw message without a source location.
-  if (!fileLoc) {
-    std::string str;
-    llvm::raw_string_ostream strOS(str);
-    if (!loc.isa<UnknownLoc>())
-      strOS << loc << ": ";
-    strOS << message;
-    return mgr.PrintMessage(os, llvm::SMLoc(), getDiagKind(kind), strOS.str());
-  }
-
-  // Otherwise, try to convert the file location to an SMLoc.
-  auto smloc = convertLocToSMLoc(*fileLoc);
-  if (smloc.isValid())
-    return mgr.PrintMessage(os, smloc, getDiagKind(kind), message);
-
-  // If the conversion was unsuccessful, create a diagnostic with the file
-  // information.
-  llvm::SMDiagnostic diag(fileLoc->getFilename(), getDiagKind(kind),
-                          message.str());
-  diag.print(nullptr, os);
-}
-
-/// Emit the given diagnostic with the held source manager.
-void SourceMgrDiagnosticHandler::emitDiagnostic(Diagnostic &diag) {
-  // Emit the diagnostic.
-  auto loc = diag.getLocation();
-  emitDiagnostic(loc, diag.str(), diag.getSeverity());
-
-  // If the diagnostic location was a call site location, then print the call
-  // stack as well.
-  if (auto callLoc = loc.dyn_cast<CallSiteLoc>()) {
-    // Print the call stack while valid, or until the limit is reached.
-    Location callerLoc = callLoc.getCaller();
-    for (unsigned curDepth = 0; curDepth < callStackLimit; ++curDepth) {
-      emitDiagnostic(callerLoc, "called from", DiagnosticSeverity::Note);
-      if ((callLoc = callerLoc.dyn_cast<CallSiteLoc>()))
-        callerLoc = callLoc.getCaller();
-      else
-        break;
-    }
-  }
-
-  // Emit each of the notes.
-  for (auto &note : diag.getNotes())
-    emitDiagnostic(note.getLocation(), note.str(), note.getSeverity());
-}
-
-/// Get a memory buffer for the given file, or nullptr if one is not found.
-const llvm::MemoryBuffer *
-SourceMgrDiagnosticHandler::getBufferForFile(StringRef filename) {
-  return impl->getBufferForFile(mgr, filename);
-}
-
-/// Get a memory buffer for the given file, or the main file of the source
-/// manager if one doesn't exist. This always returns non-null.
-llvm::SMLoc SourceMgrDiagnosticHandler::convertLocToSMLoc(FileLineColLoc loc) {
-  // Get the buffer for this filename.
-  auto *membuf = getBufferForFile(loc.getFilename());
-  if (!membuf)
-    return llvm::SMLoc();
-
-  // TODO: This should really be upstreamed to be a method on llvm::SourceMgr.
-  // Doing so would allow it to use the offset cache that is already maintained
-  // by SrcBuffer, making this more efficient.
-  unsigned lineNo = loc.getLine();
-  unsigned columnNo = loc.getColumn();
-
-  // Scan for the correct line number.
-  const char *position = membuf->getBufferStart();
-  const char *end = membuf->getBufferEnd();
-
-  // We start counting line and column numbers from 1.
-  if (lineNo != 0)
-    --lineNo;
-  if (columnNo != 0)
-    --columnNo;
-
-  while (position < end && lineNo) {
-    auto curChar = *position++;
-
-    // Scan for newlines.  If this isn't one, ignore it.
-    if (curChar != '\r' && curChar != '\n')
-      continue;
-
-    // We saw a line break, decrement our counter.
-    --lineNo;
-
-    // Check for \r\n and \n\r and treat it as a single escape.  We know that
-    // looking past one character is safe because MemoryBuffer's are always nul
-    // terminated.
-    if (*position != curChar && (*position == '\r' || *position == '\n'))
-      ++position;
-  }
-
-  // If the line/column counter was invalid, return a pointer to the start of
-  // the buffer.
-  if (lineNo || position + columnNo > end)
-    return llvm::SMLoc::getFromPointer(membuf->getBufferStart());
-
-  // If the column is zero, try to skip to the first non-whitespace character.
-  if (columnNo == 0) {
-    auto isNewline = [](char c) { return c == '\n' || c == '\r'; };
-    auto isWhitespace = [](char c) { return c == ' ' || c == '\t'; };
-
-    // Look for a valid non-whitespace character before the next line.
-    for (auto *newPos = position; newPos < end && !isNewline(*newPos); ++newPos)
-      if (!isWhitespace(*newPos))
-        return llvm::SMLoc::getFromPointer(newPos);
-  }
-
-  // Otherwise return the right pointer.
-  return llvm::SMLoc::getFromPointer(position + columnNo);
-}
-
-//===----------------------------------------------------------------------===//
-// SourceMgrDiagnosticVerifierHandler
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace detail {
-// Record the expected diagnostic's position, substring and whether it was
-// seen.
-struct ExpectedDiag {
-  DiagnosticSeverity kind;
-  unsigned lineNo;
-  StringRef substring;
-  llvm::SMLoc fileLoc;
-  bool matched;
-};
-
-struct SourceMgrDiagnosticVerifierHandlerImpl {
-  SourceMgrDiagnosticVerifierHandlerImpl() : status(success()) {}
-
-  /// Returns the expected diagnostics for the given source file.
-  llvm::Optional<MutableArrayRef<ExpectedDiag>>
-  getExpectedDiags(StringRef bufName);
-
-  /// Computes the expected diagnostics for the given source buffer.
-  MutableArrayRef<ExpectedDiag>
-  computeExpectedDiags(const llvm::MemoryBuffer *buf);
-
-  /// The current status of the verifier.
-  LogicalResult status;
-
-  /// A list of expected diagnostics for each buffer of the source manager.
-  llvm::StringMap<SmallVector<ExpectedDiag, 2>> expectedDiagsPerFile;
-
-  /// Regex to match the expected diagnostics format.
-  llvm::Regex expected = llvm::Regex("expected-(error|note|remark|warning) "
-                                     "*(@([+-][0-9]+|above|below))? *{{(.*)}}");
-};
-} // end namespace detail
-} // end namespace mlir
-
-/// Given a diagnostic kind, return a human readable string for it.
-static StringRef getDiagKindStr(DiagnosticSeverity kind) {
-  switch (kind) {
-  case DiagnosticSeverity::Note:
-    return "note";
-  case DiagnosticSeverity::Warning:
-    return "warning";
-  case DiagnosticSeverity::Error:
-    return "error";
-  case DiagnosticSeverity::Remark:
-    return "remark";
-  }
-  llvm_unreachable("Unknown DiagnosticSeverity");
-}
-
-/// Returns the expected diagnostics for the given source file.
-llvm::Optional<MutableArrayRef<ExpectedDiag>>
-SourceMgrDiagnosticVerifierHandlerImpl::getExpectedDiags(StringRef bufName) {
-  auto expectedDiags = expectedDiagsPerFile.find(bufName);
-  if (expectedDiags != expectedDiagsPerFile.end())
-    return MutableArrayRef<ExpectedDiag>(expectedDiags->second);
-  return llvm::None;
-}
-
-/// Computes the expected diagnostics for the given source buffer.
-MutableArrayRef<ExpectedDiag>
-SourceMgrDiagnosticVerifierHandlerImpl::computeExpectedDiags(
-    const llvm::MemoryBuffer *buf) {
-  // If the buffer is invalid, return an empty list.
-  if (!buf)
-    return llvm::None;
-  auto &expectedDiags = expectedDiagsPerFile[buf->getBufferIdentifier()];
-
-  // The number of the last line that did not correlate to a designator.
-  unsigned lastNonDesignatorLine = 0;
-
-  // The indices of designators that apply to the next non designator line.
-  SmallVector<unsigned, 1> designatorsForNextLine;
-
-  // Scan the file for expected-* designators.
-  SmallVector<StringRef, 100> lines;
-  buf->getBuffer().split(lines, '\n');
-  for (unsigned lineNo = 0, e = lines.size(); lineNo < e; ++lineNo) {
-    SmallVector<StringRef, 4> matches;
-    if (!expected.match(lines[lineNo], &matches)) {
-      // Check for designators that apply to this line.
-      if (!designatorsForNextLine.empty()) {
-        for (unsigned diagIndex : designatorsForNextLine)
-          expectedDiags[diagIndex].lineNo = lineNo + 1;
-        designatorsForNextLine.clear();
-      }
-      lastNonDesignatorLine = lineNo;
-      continue;
-    }
-
-    // Point to the start of expected-*.
-    auto expectedStart = llvm::SMLoc::getFromPointer(matches[0].data());
-
-    DiagnosticSeverity kind;
-    if (matches[1] == "error")
-      kind = DiagnosticSeverity::Error;
-    else if (matches[1] == "warning")
-      kind = DiagnosticSeverity::Warning;
-    else if (matches[1] == "remark")
-      kind = DiagnosticSeverity::Remark;
-    else {
-      assert(matches[1] == "note");
-      kind = DiagnosticSeverity::Note;
-    }
-
-    ExpectedDiag record{kind, lineNo + 1, matches[4], expectedStart, false};
-    auto offsetMatch = matches[2];
-    if (!offsetMatch.empty()) {
-      offsetMatch = offsetMatch.drop_front(1);
-
-      // Get the integer value without the @ and +/- prefix.
-      if (offsetMatch[0] == '+' || offsetMatch[0] == '-') {
-        int offset;
-        offsetMatch.drop_front().getAsInteger(0, offset);
-
-        if (offsetMatch.front() == '+')
-          record.lineNo += offset;
-        else
-          record.lineNo -= offset;
-      } else if (offsetMatch.consume_front("above")) {
-        // If the designator applies 'above' we add it to the last non
-        // designator line.
-        record.lineNo = lastNonDesignatorLine + 1;
-      } else {
-        // Otherwise, this is a 'below' designator and applies to the next
-        // non-designator line.
-        assert(offsetMatch.consume_front("below"));
-        designatorsForNextLine.push_back(expectedDiags.size());
-
-        // Set the line number to the last in the case that this designator ends
-        // up dangling.
-        record.lineNo = e;
-      }
-    }
-    expectedDiags.push_back(record);
-  }
-  return expectedDiags;
-}
-
-SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler(
-    llvm::SourceMgr &srcMgr, MLIRContext *ctx, llvm::raw_ostream &out)
-    : SourceMgrDiagnosticHandler(srcMgr, ctx, out),
-      impl(new SourceMgrDiagnosticVerifierHandlerImpl()) {
-  // Compute the expected diagnostics for each of the current files in the
-  // source manager.
-  for (unsigned i = 0, e = mgr.getNumBuffers(); i != e; ++i)
-    (void)impl->computeExpectedDiags(mgr.getMemoryBuffer(i + 1));
-
-  // Register a handler to verify the diagnostics.
-  setHandler([&](Diagnostic &diag) {
-    // Process the main diagnostics.
-    process(diag);
-
-    // Process each of the notes.
-    for (auto &note : diag.getNotes())
-      process(note);
-  });
-}
-
-SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler(
-    llvm::SourceMgr &srcMgr, MLIRContext *ctx)
-    : SourceMgrDiagnosticVerifierHandler(srcMgr, ctx, llvm::errs()) {}
-
-SourceMgrDiagnosticVerifierHandler::~SourceMgrDiagnosticVerifierHandler() {
-  // Ensure that all expected diagnostics were handled.
-  (void)verify();
-}
-
-/// Returns the status of the verifier and verifies that all expected
-/// diagnostics were emitted. This return success if all diagnostics were
-/// verified correctly, failure otherwise.
-LogicalResult SourceMgrDiagnosticVerifierHandler::verify() {
-  // Verify that all expected errors were seen.
-  for (auto &expectedDiagsPair : impl->expectedDiagsPerFile) {
-    for (auto &err : expectedDiagsPair.second) {
-      if (err.matched)
-        continue;
-      llvm::SMRange range(err.fileLoc,
-                          llvm::SMLoc::getFromPointer(err.fileLoc.getPointer() +
-                                                      err.substring.size()));
-      mgr.PrintMessage(os, err.fileLoc, llvm::SourceMgr::DK_Error,
-                       "expected " + getDiagKindStr(err.kind) + " \"" +
-                           err.substring + "\" was not produced",
-                       range);
-      impl->status = failure();
-    }
-  }
-  impl->expectedDiagsPerFile.clear();
-  return impl->status;
-}
-
-/// Process a single diagnostic.
-void SourceMgrDiagnosticVerifierHandler::process(Diagnostic &diag) {
-  auto kind = diag.getSeverity();
-
-  // Process a FileLineColLoc.
-  if (auto fileLoc = getFileLineColLoc(diag.getLocation()))
-    return process(*fileLoc, diag.str(), kind);
-
-  emitDiagnostic(diag.getLocation(),
-                 "unexpected " + getDiagKindStr(kind) + ": " + diag.str(),
-                 DiagnosticSeverity::Error);
-  impl->status = failure();
-}
-
-/// Process a FileLineColLoc diagnostic.
-void SourceMgrDiagnosticVerifierHandler::process(FileLineColLoc loc,
-                                                 StringRef msg,
-                                                 DiagnosticSeverity kind) {
-  // Get the expected diagnostics for this file.
-  auto diags = impl->getExpectedDiags(loc.getFilename());
-  if (!diags)
-    diags = impl->computeExpectedDiags(getBufferForFile(loc.getFilename()));
-
-  // Search for a matching expected diagnostic.
-  // If we find something that is close then emit a more specific error.
-  ExpectedDiag *nearMiss = nullptr;
-
-  // If this was an expected error, remember that we saw it and return.
-  unsigned line = loc.getLine();
-  for (auto &e : *diags) {
-    if (line == e.lineNo && msg.contains(e.substring)) {
-      if (e.kind == kind) {
-        e.matched = true;
-        return;
-      }
-
-      // If this only differs based on the diagnostic kind, then consider it
-      // to be a near miss.
-      nearMiss = &e;
-    }
-  }
-
-  // Otherwise, emit an error for the near miss.
-  if (nearMiss)
-    mgr.PrintMessage(os, nearMiss->fileLoc, llvm::SourceMgr::DK_Error,
-                     "'" + getDiagKindStr(kind) +
-                         "' diagnostic emitted when expecting a '" +
-                         getDiagKindStr(nearMiss->kind) + "'");
-  else
-    emitDiagnostic(loc, "unexpected " + getDiagKindStr(kind) + ": " + msg,
-                   DiagnosticSeverity::Error);
-  impl->status = failure();
-}
-
-//===----------------------------------------------------------------------===//
-// ParallelDiagnosticHandler
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace detail {
-struct ParallelDiagnosticHandlerImpl : public llvm::PrettyStackTraceEntry {
-  struct ThreadDiagnostic {
-    ThreadDiagnostic(size_t id, Diagnostic diag)
-        : id(id), diag(std::move(diag)) {}
-    bool operator<(const ThreadDiagnostic &rhs) const { return id < rhs.id; }
-
-    /// The id for this diagnostic, this is used for ordering.
-    /// Note: This id corresponds to the ordered position of the current element
-    ///       being processed by a given thread.
-    size_t id;
-
-    /// The diagnostic.
-    Diagnostic diag;
-  };
-
-  ParallelDiagnosticHandlerImpl(MLIRContext *ctx) : handlerID(0), context(ctx) {
-    handlerID = ctx->getDiagEngine().registerHandler([this](Diagnostic &diag) {
-      uint64_t tid = llvm::get_threadid();
-      llvm::sys::SmartScopedLock<true> lock(mutex);
-
-      // If this thread is not tracked, then return failure to let another
-      // handler process this diagnostic.
-      if (!threadToOrderID.count(tid))
-        return failure();
-
-      // Append a new diagnostic.
-      diagnostics.emplace_back(threadToOrderID[tid], std::move(diag));
-      return success();
-    });
-  }
-
-  ~ParallelDiagnosticHandlerImpl() override {
-    // Erase this handler from the context.
-    context->getDiagEngine().eraseHandler(handlerID);
-
-    // Early exit if there are no diagnostics, this is the common case.
-    if (diagnostics.empty())
-      return;
-
-    // Emit the diagnostics back to the context.
-    emitDiagnostics([&](Diagnostic diag) {
-      return context->getDiagEngine().emit(std::move(diag));
-    });
-  }
-
-  /// Utility method to emit any held diagnostics.
-  void emitDiagnostics(std::function<void(Diagnostic)> emitFn) const {
-    // Stable sort all of the diagnostics that were emitted. This creates a
-    // deterministic ordering for the diagnostics based upon which order id they
-    // were emitted for.
-    std::stable_sort(diagnostics.begin(), diagnostics.end());
-
-    // Emit each diagnostic to the context again.
-    for (ThreadDiagnostic &diag : diagnostics)
-      emitFn(std::move(diag.diag));
-  }
-
-  /// Set the order id for the current thread.
-  void setOrderIDForThread(size_t orderID) {
-    uint64_t tid = llvm::get_threadid();
-    llvm::sys::SmartScopedLock<true> lock(mutex);
-    threadToOrderID[tid] = orderID;
-  }
-
-  /// Remove the order id for the current thread.
-  void eraseOrderIDForThread() {
-    uint64_t tid = llvm::get_threadid();
-    llvm::sys::SmartScopedLock<true> lock(mutex);
-    threadToOrderID.erase(tid);
-  }
-
-  /// Dump the current diagnostics that were inflight.
-  void print(raw_ostream &os) const override {
-    // Early exit if there are no diagnostics, this is the common case.
-    if (diagnostics.empty())
-      return;
-
-    os << "In-Flight Diagnostics:\n";
-    emitDiagnostics([&](Diagnostic diag) {
-      os.indent(4);
-
-      // Print each diagnostic with the format:
-      //   "<location>: <kind>: <msg>"
-      if (!diag.getLocation().isa<UnknownLoc>())
-        os << diag.getLocation() << ": ";
-      switch (diag.getSeverity()) {
-      case DiagnosticSeverity::Error:
-        os << "error: ";
-        break;
-      case DiagnosticSeverity::Warning:
-        os << "warning: ";
-        break;
-      case DiagnosticSeverity::Note:
-        os << "note: ";
-        break;
-      case DiagnosticSeverity::Remark:
-        os << "remark: ";
-        break;
-      }
-      os << diag << '\n';
-    });
-  }
-
-  /// A smart mutex to lock access to the internal state.
-  llvm::sys::SmartMutex<true> mutex;
-
-  /// A mapping between the thread id and the current order id.
-  DenseMap<uint64_t, size_t> threadToOrderID;
-
-  /// An unordered list of diagnostics that were emitted.
-  mutable std::vector<ThreadDiagnostic> diagnostics;
-
-  /// The unique id for the parallel handler.
-  DiagnosticEngine::HandlerID handlerID;
-
-  /// The context to emit the diagnostics to.
-  MLIRContext *context;
-};
-} // end namespace detail
-} // end namespace mlir
-
-ParallelDiagnosticHandler::ParallelDiagnosticHandler(MLIRContext *ctx)
-    : impl(new ParallelDiagnosticHandlerImpl(ctx)) {}
-ParallelDiagnosticHandler::~ParallelDiagnosticHandler() {}
-
-/// Set the order id for the current thread.
-void ParallelDiagnosticHandler::setOrderIDForThread(size_t orderID) {
-  impl->setOrderIDForThread(orderID);
-}
-
-/// Remove the order id for the current thread. This removes the thread from
-/// diagnostics tracking.
-void ParallelDiagnosticHandler::eraseOrderIDForThread() {
-  impl->eraseOrderIDForThread();
-}
diff --git a/third_party/mlir/lib/IR/Dialect.cpp b/third_party/mlir/lib/IR/Dialect.cpp
deleted file mode 100644
index c6266b09668..00000000000
--- a/third_party/mlir/lib/IR/Dialect.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-//===- Dialect.cpp - Dialect implementation -------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/DialectHooks.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "mlir/IR/DialectInterface.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Operation.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Regex.h"
-
-using namespace mlir;
-using namespace detail;
-
-DialectAsmParser::~DialectAsmParser() {}
-
-//===----------------------------------------------------------------------===//
-// Dialect Registration
-//===----------------------------------------------------------------------===//
-
-// Registry for all dialect allocation functions.
-static llvm::ManagedStatic<SmallVector<DialectAllocatorFunction, 8>>
-    dialectRegistry;
-
-// Registry for functions that set dialect hooks.
-static llvm::ManagedStatic<SmallVector<DialectHooksSetter, 8>>
-    dialectHooksRegistry;
-
-/// Registers a specific dialect creation function with the system, typically
-/// used through the DialectRegistration template.
-void mlir::registerDialectAllocator(const DialectAllocatorFunction &function) {
-  assert(function &&
-         "Attempting to register an empty dialect initialize function");
-  dialectRegistry->push_back(function);
-}
-
-/// Registers a function to set specific hooks for a specific dialect, typically
-/// used through the DialectHooksRegistration template.
-void mlir::registerDialectHooksSetter(const DialectHooksSetter &function) {
-  assert(
-      function &&
-      "Attempting to register an empty dialect hooks initialization function");
-
-  dialectHooksRegistry->push_back(function);
-}
-
-/// Registers all dialects and their const folding hooks with the specified
-/// MLIRContext.
-void mlir::registerAllDialects(MLIRContext *context) {
-  for (const auto &fn : *dialectRegistry)
-    fn(context);
-  for (const auto &fn : *dialectHooksRegistry) {
-    fn(context);
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Dialect
-//===----------------------------------------------------------------------===//
-
-Dialect::Dialect(StringRef name, MLIRContext *context)
-    : name(name), context(context) {
-  assert(isValidNamespace(name) && "invalid dialect namespace");
-  registerDialect(context);
-}
-
-Dialect::~Dialect() {}
-
-/// Verify an attribute from this dialect on the argument at 'argIndex' for
-/// the region at 'regionIndex' on the given operation. Returns failure if
-/// the verification failed, success otherwise. This hook may optionally be
-/// invoked from any operation containing a region.
-LogicalResult Dialect::verifyRegionArgAttribute(Operation *, unsigned, unsigned,
-                                                NamedAttribute) {
-  return success();
-}
-
-/// Verify an attribute from this dialect on the result at 'resultIndex' for
-/// the region at 'regionIndex' on the given operation. Returns failure if
-/// the verification failed, success otherwise. This hook may optionally be
-/// invoked from any operation containing a region.
-LogicalResult Dialect::verifyRegionResultAttribute(Operation *, unsigned,
-                                                   unsigned, NamedAttribute) {
-  return success();
-}
-
-/// Parse an attribute registered to this dialect.
-Attribute Dialect::parseAttribute(DialectAsmParser &parser, Type type) const {
-  parser.emitError(parser.getNameLoc())
-      << "dialect '" << getNamespace()
-      << "' provides no attribute parsing hook";
-  return Attribute();
-}
-
-/// Parse a type registered to this dialect.
-Type Dialect::parseType(DialectAsmParser &parser) const {
-  // If this dialect allows unknown types, then represent this with OpaqueType.
-  if (allowsUnknownTypes()) {
-    auto ns = Identifier::get(getNamespace(), getContext());
-    return OpaqueType::get(ns, parser.getFullSymbolSpec(), getContext());
-  }
-
-  parser.emitError(parser.getNameLoc())
-      << "dialect '" << getNamespace() << "' provides no type parsing hook";
-  return Type();
-}
-
-/// Utility function that returns if the given string is a valid dialect
-/// namespace.
-bool Dialect::isValidNamespace(StringRef str) {
-  if (str.empty())
-    return true;
-  llvm::Regex dialectNameRegex("^[a-zA-Z_][a-zA-Z_0-9\\$]*$");
-  return dialectNameRegex.match(str);
-}
-
-/// Register a set of dialect interfaces with this dialect instance.
-void Dialect::addInterface(std::unique_ptr<DialectInterface> interface) {
-  auto it = registeredInterfaces.try_emplace(interface->getID(),
-                                             std::move(interface));
-  (void)it;
-  assert(it.second && "interface kind has already been registered");
-}
-
-//===----------------------------------------------------------------------===//
-// Dialect Interface
-//===----------------------------------------------------------------------===//
-
-DialectInterface::~DialectInterface() {}
-
-DialectInterfaceCollectionBase::DialectInterfaceCollectionBase(
-    MLIRContext *ctx, ClassID *interfaceKind) {
-  for (auto *dialect : ctx->getRegisteredDialects()) {
-    if (auto *interface = dialect->getRegisteredInterface(interfaceKind)) {
-      interfaces.insert(interface);
-      orderedInterfaces.push_back(interface);
-    }
-  }
-}
-
-DialectInterfaceCollectionBase::~DialectInterfaceCollectionBase() {}
-
-/// Get the interface for the dialect of given operation, or null if one
-/// is not registered.
-const DialectInterface *
-DialectInterfaceCollectionBase::getInterfaceFor(Operation *op) const {
-  return getInterfaceFor(op->getDialect());
-}
diff --git a/third_party/mlir/lib/IR/Function.cpp b/third_party/mlir/lib/IR/Function.cpp
deleted file mode 100644
index e5e854260f3..00000000000
--- a/third_party/mlir/lib/IR/Function.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===- Function.cpp - MLIR Function Classes -------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Function.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/FunctionImplementation.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpImplementation.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// Function Operation.
-//===----------------------------------------------------------------------===//
-
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      ArrayRef<NamedAttribute> attrs) {
-  OperationState state(location, "func");
-  Builder builder(location->getContext());
-  FuncOp::build(&builder, state, name, type, attrs);
-  return llvm::cast<FuncOp>(Operation::create(state));
-}
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      llvm::iterator_range<dialect_attr_iterator> attrs) {
-  SmallVector<NamedAttribute, 8> attrRef(attrs);
-  return create(location, name, type, llvm::makeArrayRef(attrRef));
-}
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      ArrayRef<NamedAttribute> attrs,
-                      ArrayRef<NamedAttributeList> argAttrs) {
-  FuncOp func = create(location, name, type, attrs);
-  func.setAllArgAttrs(argAttrs);
-  return func;
-}
-
-void FuncOp::build(Builder *builder, OperationState &result, StringRef name,
-                   FunctionType type, ArrayRef<NamedAttribute> attrs) {
-  result.addAttribute(SymbolTable::getSymbolAttrName(),
-                      builder->getStringAttr(name));
-  result.addAttribute(getTypeAttrName(), TypeAttr::get(type));
-  result.attributes.append(attrs.begin(), attrs.end());
-  result.addRegion();
-}
-
-void FuncOp::build(Builder *builder, OperationState &result, StringRef name,
-                   FunctionType type, ArrayRef<NamedAttribute> attrs,
-                   ArrayRef<NamedAttributeList> argAttrs) {
-  build(builder, result, name, type, attrs);
-  assert(type.getNumInputs() == argAttrs.size());
-  SmallString<8> argAttrName;
-  for (unsigned i = 0, e = type.getNumInputs(); i != e; ++i)
-    if (auto argDict = argAttrs[i].getDictionary())
-      result.addAttribute(getArgAttrName(i, argAttrName), argDict);
-}
-
-/// Parsing/Printing methods.
-
-ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
-  auto buildFuncType = [](Builder &builder, ArrayRef<Type> argTypes,
-                          ArrayRef<Type> results, impl::VariadicFlag,
-                          std::string &) {
-    return builder.getFunctionType(argTypes, results);
-  };
-
-  return impl::parseFunctionLikeOp(parser, result, /*allowVariadic=*/false,
-                                   buildFuncType);
-}
-
-void FuncOp::print(OpAsmPrinter &p) {
-  FunctionType fnType = getType();
-  impl::printFunctionLikeOp(p, *this, fnType.getInputs(), /*isVariadic=*/false,
-                            fnType.getResults());
-}
-
-LogicalResult FuncOp::verify() {
-  // If this function is external there is nothing to do.
-  if (isExternal())
-    return success();
-
-  // Verify that the argument list of the function and the arg list of the entry
-  // block line up.  The trait already verified that the number of arguments is
-  // the same between the signature and the block.
-  auto fnInputTypes = getType().getInputs();
-  Block &entryBlock = front();
-  for (unsigned i = 0, e = entryBlock.getNumArguments(); i != e; ++i)
-    if (fnInputTypes[i] != entryBlock.getArgument(i)->getType())
-      return emitOpError("type of entry block argument #")
-             << i << '(' << entryBlock.getArgument(i)->getType()
-             << ") must match the type of the corresponding argument in "
-             << "function signature(" << fnInputTypes[i] << ')';
-
-  return success();
-}
-
-void FuncOp::eraseArguments(ArrayRef<unsigned> argIndices) {
-  auto oldType = getType();
-  int originalNumArgs = oldType.getNumInputs();
-  llvm::BitVector eraseIndices(originalNumArgs);
-  for (auto index : argIndices)
-    eraseIndices.set(index);
-  auto shouldEraseArg = [&](int i) { return eraseIndices.test(i); };
-
-  // There are 3 things that need to be updated:
-  // - Function type.
-  // - Arg attrs.
-  // - Block arguments of entry block.
-
-  // Update the function type and arg attrs.
-  SmallVector<Type, 4> newInputTypes;
-  SmallVector<NamedAttributeList, 4> newArgAttrs;
-  for (int i = 0; i < originalNumArgs; i++) {
-    if (shouldEraseArg(i))
-      continue;
-    newInputTypes.emplace_back(oldType.getInput(i));
-    newArgAttrs.emplace_back(getArgAttrDict(i));
-  }
-  setType(FunctionType::get(newInputTypes, oldType.getResults(), getContext()));
-  setAllArgAttrs(newArgAttrs);
-
-  // Update the entry block's arguments.
-  // We do this in reverse so that we erase later indices before earlier
-  // indices, to avoid shifting the later indices.
-  Block &entry = front();
-  for (int i = 0; i < originalNumArgs; i++)
-    if (shouldEraseArg(originalNumArgs - i - 1))
-      entry.eraseArgument(originalNumArgs - i - 1);
-}
-
-/// Add an entry block to an empty function, and set up the block arguments
-/// to match the signature of the function.
-Block *FuncOp::addEntryBlock() {
-  assert(empty() && "function already has an entry block");
-  auto *entry = new Block();
-  push_back(entry);
-  entry->addArguments(getType().getInputs());
-  return entry;
-}
-
-/// Add a normal block to the end of the function's block list. The function
-/// should at least already have an entry block.
-Block *FuncOp::addBlock() {
-  assert(!empty() && "function should at least have an entry block");
-  push_back(new Block());
-  return &back();
-}
-
-/// Clone the internal blocks from this function into dest and all attributes
-/// from this function to dest.
-void FuncOp::cloneInto(FuncOp dest, BlockAndValueMapping &mapper) {
-  // Add the attributes of this function to dest.
-  llvm::MapVector<Identifier, Attribute> newAttrs;
-  for (auto &attr : dest.getAttrs())
-    newAttrs.insert(attr);
-  for (auto &attr : getAttrs())
-    newAttrs.insert(attr);
-  dest.getOperation()->setAttrs(
-      DictionaryAttr::get(newAttrs.takeVector(), getContext()));
-
-  // Clone the body.
-  getBody().cloneInto(&dest.getBody(), mapper);
-}
-
-/// Create a deep copy of this function and all of its blocks, remapping
-/// any operands that use values outside of the function using the map that is
-/// provided (leaving them alone if no entry is present). Replaces references
-/// to cloned sub-values with the corresponding value that is copied, and adds
-/// those mappings to the mapper.
-FuncOp FuncOp::clone(BlockAndValueMapping &mapper) {
-  FunctionType newType = getType();
-
-  // If the function has a body, then the user might be deleting arguments to
-  // the function by specifying them in the mapper. If so, we don't add the
-  // argument to the input type vector.
-  bool isExternalFn = isExternal();
-  if (!isExternalFn) {
-    SmallVector<Type, 4> inputTypes;
-    inputTypes.reserve(newType.getNumInputs());
-    for (unsigned i = 0, e = getNumArguments(); i != e; ++i)
-      if (!mapper.contains(getArgument(i)))
-        inputTypes.push_back(newType.getInput(i));
-    newType = FunctionType::get(inputTypes, newType.getResults(), getContext());
-  }
-
-  // Create the new function.
-  FuncOp newFunc = llvm::cast<FuncOp>(getOperation()->cloneWithoutRegions());
-  newFunc.setType(newType);
-
-  /// Set the argument attributes for arguments that aren't being replaced.
-  for (unsigned i = 0, e = getNumArguments(), destI = 0; i != e; ++i)
-    if (isExternalFn || !mapper.contains(getArgument(i)))
-      newFunc.setArgAttrs(destI++, getArgAttrs(i));
-
-  /// Clone the current function into the new one and return it.
-  cloneInto(newFunc, mapper);
-  return newFunc;
-}
-FuncOp FuncOp::clone() {
-  BlockAndValueMapping mapper;
-  return clone(mapper);
-}
diff --git a/third_party/mlir/lib/IR/FunctionImplementation.cpp b/third_party/mlir/lib/IR/FunctionImplementation.cpp
deleted file mode 100644
index 66c0d8af6d3..00000000000
--- a/third_party/mlir/lib/IR/FunctionImplementation.cpp
+++ /dev/null
@@ -1,330 +0,0 @@
-//===- FunctionImplementation.cpp - Utilities for function-like ops -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/FunctionImplementation.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/FunctionSupport.h"
-#include "mlir/IR/SymbolTable.h"
-
-using namespace mlir;
-
-static ParseResult
-parseArgumentList(OpAsmParser &parser, bool allowVariadic,
-                  SmallVectorImpl<Type> &argTypes,
-                  SmallVectorImpl<OpAsmParser::OperandType> &argNames,
-                  SmallVectorImpl<SmallVector<NamedAttribute, 2>> &argAttrs,
-                  bool &isVariadic) {
-  if (parser.parseLParen())
-    return failure();
-
-  // The argument list either has to consistently have ssa-id's followed by
-  // types, or just be a type list.  It isn't ok to sometimes have SSA ID's and
-  // sometimes not.
-  auto parseArgument = [&]() -> ParseResult {
-    llvm::SMLoc loc = parser.getCurrentLocation();
-
-    // Parse argument name if present.
-    OpAsmParser::OperandType argument;
-    Type argumentType;
-    if (succeeded(parser.parseOptionalRegionArgument(argument)) &&
-        !argument.name.empty()) {
-      // Reject this if the preceding argument was missing a name.
-      if (argNames.empty() && !argTypes.empty())
-        return parser.emitError(loc, "expected type instead of SSA identifier");
-      argNames.push_back(argument);
-
-      if (parser.parseColonType(argumentType))
-        return failure();
-    } else if (allowVariadic && succeeded(parser.parseOptionalEllipsis())) {
-      isVariadic = true;
-      return success();
-    } else if (!argNames.empty()) {
-      // Reject this if the preceding argument had a name.
-      return parser.emitError(loc, "expected SSA identifier");
-    } else if (parser.parseType(argumentType)) {
-      return failure();
-    }
-
-    // Add the argument type.
-    argTypes.push_back(argumentType);
-
-    // Parse any argument attributes.
-    SmallVector<NamedAttribute, 2> attrs;
-    if (parser.parseOptionalAttrDict(attrs))
-      return failure();
-    argAttrs.push_back(attrs);
-    return success();
-  };
-
-  // Parse the function arguments.
-  isVariadic = false;
-  if (failed(parser.parseOptionalRParen())) {
-    do {
-      unsigned numTypedArguments = argTypes.size();
-      if (parseArgument())
-        return failure();
-
-      llvm::SMLoc loc = parser.getCurrentLocation();
-      if (argTypes.size() == numTypedArguments &&
-          succeeded(parser.parseOptionalComma()))
-        return parser.emitError(
-            loc, "variadic arguments must be in the end of the argument list");
-    } while (succeeded(parser.parseOptionalComma()));
-    parser.parseRParen();
-  }
-
-  return success();
-}
-
-/// Parse a function result list.
-///
-///   function-result-list ::= function-result-list-parens
-///                          | non-function-type
-///   function-result-list-parens ::= `(` `)`
-///                                 | `(` function-result-list-no-parens `)`
-///   function-result-list-no-parens ::= function-result (`,` function-result)*
-///   function-result ::= type attribute-dict?
-///
-static ParseResult parseFunctionResultList(
-    OpAsmParser &parser, SmallVectorImpl<Type> &resultTypes,
-    SmallVectorImpl<SmallVector<NamedAttribute, 2>> &resultAttrs) {
-  if (failed(parser.parseOptionalLParen())) {
-    // We already know that there is no `(`, so parse a type.
-    // Because there is no `(`, it cannot be a function type.
-    Type ty;
-    if (parser.parseType(ty))
-      return failure();
-    resultTypes.push_back(ty);
-    resultAttrs.emplace_back();
-    return success();
-  }
-
-  // Special case for an empty set of parens.
-  if (succeeded(parser.parseOptionalRParen()))
-    return success();
-
-  // Parse individual function results.
-  do {
-    resultTypes.emplace_back();
-    resultAttrs.emplace_back();
-    if (parser.parseType(resultTypes.back()) ||
-        parser.parseOptionalAttrDict(resultAttrs.back())) {
-      return failure();
-    }
-  } while (succeeded(parser.parseOptionalComma()));
-  return parser.parseRParen();
-}
-
-/// Parses a function signature using `parser`. The `allowVariadic` argument
-/// indicates whether functions with variadic arguments are supported. The
-/// trailing arguments are populated by this function with names, types and
-/// attributes of the arguments and those of the results.
-ParseResult mlir::impl::parseFunctionSignature(
-    OpAsmParser &parser, bool allowVariadic,
-    SmallVectorImpl<OpAsmParser::OperandType> &argNames,
-    SmallVectorImpl<Type> &argTypes,
-    SmallVectorImpl<SmallVector<NamedAttribute, 2>> &argAttrs, bool &isVariadic,
-    SmallVectorImpl<Type> &resultTypes,
-    SmallVectorImpl<SmallVector<NamedAttribute, 2>> &resultAttrs) {
-  if (parseArgumentList(parser, allowVariadic, argTypes, argNames, argAttrs,
-                        isVariadic))
-    return failure();
-  if (succeeded(parser.parseOptionalArrow()))
-    return parseFunctionResultList(parser, resultTypes, resultAttrs);
-  return success();
-}
-
-void mlir::impl::addArgAndResultAttrs(
-    Builder &builder, OperationState &result,
-    ArrayRef<SmallVector<NamedAttribute, 2>> argAttrs,
-    ArrayRef<SmallVector<NamedAttribute, 2>> resultAttrs) {
-  // Add the attributes to the function arguments.
-  SmallString<8> attrNameBuf;
-  for (unsigned i = 0, e = argAttrs.size(); i != e; ++i)
-    if (!argAttrs[i].empty())
-      result.addAttribute(getArgAttrName(i, attrNameBuf),
-                          builder.getDictionaryAttr(argAttrs[i]));
-
-  // Add the attributes to the function results.
-  for (unsigned i = 0, e = resultAttrs.size(); i != e; ++i)
-    if (!resultAttrs[i].empty())
-      result.addAttribute(getResultAttrName(i, attrNameBuf),
-                          builder.getDictionaryAttr(resultAttrs[i]));
-}
-
-/// Parser implementation for function-like operations.  Uses `funcTypeBuilder`
-/// to construct the custom function type given lists of input and output types.
-ParseResult
-mlir::impl::parseFunctionLikeOp(OpAsmParser &parser, OperationState &result,
-                                bool allowVariadic,
-                                mlir::impl::FuncTypeBuilder funcTypeBuilder) {
-  SmallVector<OpAsmParser::OperandType, 4> entryArgs;
-  SmallVector<SmallVector<NamedAttribute, 2>, 4> argAttrs;
-  SmallVector<SmallVector<NamedAttribute, 2>, 4> resultAttrs;
-  SmallVector<Type, 4> argTypes;
-  SmallVector<Type, 4> resultTypes;
-  auto &builder = parser.getBuilder();
-
-  // Parse the name as a symbol.
-  StringAttr nameAttr;
-  if (parser.parseSymbolName(nameAttr, ::mlir::SymbolTable::getSymbolAttrName(),
-                             result.attributes))
-    return failure();
-
-  // Parse the function signature.
-  auto signatureLocation = parser.getCurrentLocation();
-  bool isVariadic = false;
-  if (parseFunctionSignature(parser, allowVariadic, entryArgs, argTypes,
-                             argAttrs, isVariadic, resultTypes, resultAttrs))
-    return failure();
-
-  std::string errorMessage;
-  if (auto type = funcTypeBuilder(builder, argTypes, resultTypes,
-                                  impl::VariadicFlag(isVariadic), errorMessage))
-    result.addAttribute(getTypeAttrName(), TypeAttr::get(type));
-  else
-    return parser.emitError(signatureLocation)
-           << "failed to construct function type"
-           << (errorMessage.empty() ? "" : ": ") << errorMessage;
-
-  // If function attributes are present, parse them.
-  if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
-    return failure();
-
-  // Add the attributes to the function arguments.
-  assert(argAttrs.size() == argTypes.size());
-  assert(resultAttrs.size() == resultTypes.size());
-  addArgAndResultAttrs(builder, result, argAttrs, resultAttrs);
-
-  // Parse the optional function body.
-  auto *body = result.addRegion();
-  return parser.parseOptionalRegion(
-      *body, entryArgs, entryArgs.empty() ? llvm::ArrayRef<Type>() : argTypes);
-}
-
-// Print a function result list.
-static void printFunctionResultList(OpAsmPrinter &p, ArrayRef<Type> types,
-                                    ArrayRef<ArrayRef<NamedAttribute>> attrs) {
-  assert(!types.empty() && "Should not be called for empty result list.");
-  auto &os = p.getStream();
-  bool needsParens =
-      types.size() > 1 || types[0].isa<FunctionType>() || !attrs[0].empty();
-  if (needsParens)
-    os << '(';
-  interleaveComma(llvm::zip(types, attrs), os,
-                  [&](const std::tuple<Type, ArrayRef<NamedAttribute>> &t) {
-                    p.printType(std::get<0>(t));
-                    p.printOptionalAttrDict(std::get<1>(t));
-                  });
-  if (needsParens)
-    os << ')';
-}
-
-/// Print the signature of the function-like operation `op`.  Assumes `op` has
-/// the FunctionLike trait and passed the verification.
-void mlir::impl::printFunctionSignature(OpAsmPrinter &p, Operation *op,
-                                        ArrayRef<Type> argTypes,
-                                        bool isVariadic,
-                                        ArrayRef<Type> resultTypes) {
-  Region &body = op->getRegion(0);
-  bool isExternal = body.empty();
-
-  p << '(';
-  for (unsigned i = 0, e = argTypes.size(); i < e; ++i) {
-    if (i > 0)
-      p << ", ";
-
-    if (!isExternal) {
-      p.printOperand(body.front().getArgument(i));
-      p << ": ";
-    }
-
-    p.printType(argTypes[i]);
-    p.printOptionalAttrDict(::mlir::impl::getArgAttrs(op, i));
-  }
-
-  if (isVariadic) {
-    if (!argTypes.empty())
-      p << ", ";
-    p << "...";
-  }
-
-  p << ')';
-
-  if (!resultTypes.empty()) {
-    p.getStream() << " -> ";
-    SmallVector<ArrayRef<NamedAttribute>, 4> resultAttrs;
-    for (int i = 0, e = resultTypes.size(); i < e; ++i)
-      resultAttrs.push_back(::mlir::impl::getResultAttrs(op, i));
-    printFunctionResultList(p, resultTypes, resultAttrs);
-  }
-}
-
-/// Prints the list of function prefixed with the "attributes" keyword. The
-/// attributes with names listed in "elided" as well as those used by the
-/// function-like operation internally are not printed. Nothing is printed
-/// if all attributes are elided. Assumes `op` has the `FunctionLike` trait and
-/// passed the verification.
-void mlir::impl::printFunctionAttributes(OpAsmPrinter &p, Operation *op,
-                                         unsigned numInputs,
-                                         unsigned numResults,
-                                         ArrayRef<StringRef> elided) {
-  // Print out function attributes, if present.
-  SmallVector<StringRef, 2> ignoredAttrs = {
-      ::mlir::SymbolTable::getSymbolAttrName(), getTypeAttrName()};
-  ignoredAttrs.append(elided.begin(), elided.end());
-
-  SmallString<8> attrNameBuf;
-
-  // Ignore any argument attributes.
-  std::vector<SmallString<8>> argAttrStorage;
-  for (unsigned i = 0; i != numInputs; ++i)
-    if (op->getAttr(getArgAttrName(i, attrNameBuf)))
-      argAttrStorage.emplace_back(attrNameBuf);
-  ignoredAttrs.append(argAttrStorage.begin(), argAttrStorage.end());
-
-  // Ignore any result attributes.
-  std::vector<SmallString<8>> resultAttrStorage;
-  for (unsigned i = 0; i != numResults; ++i)
-    if (op->getAttr(getResultAttrName(i, attrNameBuf)))
-      resultAttrStorage.emplace_back(attrNameBuf);
-  ignoredAttrs.append(resultAttrStorage.begin(), resultAttrStorage.end());
-
-  p.printOptionalAttrDictWithKeyword(op->getAttrs(), ignoredAttrs);
-}
-
-/// Printer implementation for function-like operations.  Accepts lists of
-/// argument and result types to use while printing.
-void mlir::impl::printFunctionLikeOp(OpAsmPrinter &p, Operation *op,
-                                     ArrayRef<Type> argTypes, bool isVariadic,
-                                     ArrayRef<Type> resultTypes) {
-  // Print the operation and the function name.
-  auto funcName =
-      op->getAttrOfType<StringAttr>(::mlir::SymbolTable::getSymbolAttrName())
-          .getValue();
-  p << op->getName() << ' ';
-  p.printSymbolName(funcName);
-
-  printFunctionSignature(p, op, argTypes, isVariadic, resultTypes);
-  printFunctionAttributes(p, op, argTypes.size(), resultTypes.size());
-
-  // Print the body if this is not an external function.
-  Region &body = op->getRegion(0);
-  if (!body.empty())
-    p.printRegion(body, /*printEntryBlockArgs=*/false,
-                  /*printBlockTerminators=*/true);
-}
diff --git a/third_party/mlir/lib/IR/IntegerSet.cpp b/third_party/mlir/lib/IR/IntegerSet.cpp
deleted file mode 100644
index e5715877649..00000000000
--- a/third_party/mlir/lib/IR/IntegerSet.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- IntegerSet.cpp - MLIR Integer Set class ----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/IntegerSet.h"
-#include "IntegerSetDetail.h"
-#include "mlir/IR/AffineExpr.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-unsigned IntegerSet::getNumDims() const { return set->dimCount; }
-unsigned IntegerSet::getNumSymbols() const { return set->symbolCount; }
-unsigned IntegerSet::getNumInputs() const {
-  return set->dimCount + set->symbolCount;
-}
-
-unsigned IntegerSet::getNumConstraints() const {
-  return set->constraints.size();
-}
-
-unsigned IntegerSet::getNumEqualities() const {
-  unsigned numEqualities = 0;
-  for (unsigned i = 0, e = getNumConstraints(); i < e; i++)
-    if (isEq(i))
-      ++numEqualities;
-  return numEqualities;
-}
-
-unsigned IntegerSet::getNumInequalities() const {
-  return getNumConstraints() - getNumEqualities();
-}
-
-bool IntegerSet::isEmptyIntegerSet() const {
-  // This will only work if uniquing is on.
-  static_assert(kUniquingThreshold >= 1,
-                "uniquing threshold should be at least one");
-  return *this == getEmptySet(set->dimCount, set->symbolCount, getContext());
-}
-
-ArrayRef<AffineExpr> IntegerSet::getConstraints() const {
-  return set->constraints;
-}
-
-AffineExpr IntegerSet::getConstraint(unsigned idx) const {
-  return getConstraints()[idx];
-}
-
-/// Returns the equality bits, which specify whether each of the constraints
-/// is an equality or inequality.
-ArrayRef<bool> IntegerSet::getEqFlags() const { return set->eqFlags; }
-
-/// Returns true if the idx^th constraint is an equality, false if it is an
-/// inequality.
-bool IntegerSet::isEq(unsigned idx) const { return getEqFlags()[idx]; }
-
-MLIRContext *IntegerSet::getContext() const {
-  return getConstraint(0).getContext();
-}
-
-/// Walk all of the AffineExpr's in this set. Each node in an expression
-/// tree is visited in postorder.
-void IntegerSet::walkExprs(
-    llvm::function_ref<void(AffineExpr)> callback) const {
-  for (auto expr : getConstraints())
-    expr.walk(callback);
-}
-
-IntegerSet IntegerSet::replaceDimsAndSymbols(
-    ArrayRef<AffineExpr> dimReplacements, ArrayRef<AffineExpr> symReplacements,
-    unsigned numResultDims, unsigned numResultSyms) {
-  SmallVector<AffineExpr, 8> constraints;
-  constraints.reserve(getNumConstraints());
-  for (auto cst : getConstraints())
-    constraints.push_back(
-        cst.replaceDimsAndSymbols(dimReplacements, symReplacements));
-
-  return get(numResultDims, numResultSyms, constraints, getEqFlags());
-}
diff --git a/third_party/mlir/lib/IR/IntegerSetDetail.h b/third_party/mlir/lib/IR/IntegerSetDetail.h
deleted file mode 100644
index b3eda5205fb..00000000000
--- a/third_party/mlir/lib/IR/IntegerSetDetail.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- IntegerSetDetail.h - MLIR IntegerSet storage details -----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This holds implementation details of IntegerSet.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef INTEGERSETDETAIL_H_
-#define INTEGERSETDETAIL_H_
-
-#include "mlir/IR/AffineExpr.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mlir {
-namespace detail {
-
-struct IntegerSetStorage {
-  unsigned dimCount;
-  unsigned symbolCount;
-
-  /// Array of affine constraints: a constraint is either an equality
-  /// (affine_expr == 0) or an inequality (affine_expr >= 0).
-  ArrayRef<AffineExpr> constraints;
-
-  // Bits to check whether a constraint is an equality or an inequality.
-  ArrayRef<bool> eqFlags;
-};
-
-} // end namespace detail
-} // end namespace mlir
-#endif // INTEGERSETDETAIL_H_
diff --git a/third_party/mlir/lib/IR/Location.cpp b/third_party/mlir/lib/IR/Location.cpp
deleted file mode 100644
index 1ea75d5e30e..00000000000
--- a/third_party/mlir/lib/IR/Location.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-//===- Location.cpp - MLIR Location Classes -------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Location.h"
-#include "LocationDetail.h"
-#include "llvm/ADT/SetVector.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-//===----------------------------------------------------------------------===//
-// CallSiteLoc
-//===----------------------------------------------------------------------===//
-
-Location CallSiteLoc::get(Location callee, Location caller) {
-  return Base::get(callee->getContext(), StandardAttributes::CallSiteLocation,
-                   callee, caller);
-}
-
-Location CallSiteLoc::get(Location name, ArrayRef<Location> frames) {
-  assert(!frames.empty() && "required at least 1 call frame");
-  Location caller = frames.back();
-  for (auto frame : llvm::reverse(frames.drop_back()))
-    caller = CallSiteLoc::get(frame, caller);
-  return CallSiteLoc::get(name, caller);
-}
-
-Location CallSiteLoc::getCallee() const { return getImpl()->callee; }
-
-Location CallSiteLoc::getCaller() const { return getImpl()->caller; }
-
-//===----------------------------------------------------------------------===//
-// FileLineColLoc
-//===----------------------------------------------------------------------===//
-
-Location FileLineColLoc::get(Identifier filename, unsigned line,
-                             unsigned column, MLIRContext *context) {
-  return Base::get(context, StandardAttributes::FileLineColLocation, filename,
-                   line, column);
-}
-
-Location FileLineColLoc::get(StringRef filename, unsigned line, unsigned column,
-                             MLIRContext *context) {
-  return get(Identifier::get(filename.empty() ? "-" : filename, context), line,
-             column, context);
-}
-
-StringRef FileLineColLoc::getFilename() const { return getImpl()->filename; }
-unsigned FileLineColLoc::getLine() const { return getImpl()->line; }
-unsigned FileLineColLoc::getColumn() const { return getImpl()->column; }
-
-//===----------------------------------------------------------------------===//
-// FusedLoc
-//===----------------------------------------------------------------------===//
-
-Location FusedLoc::get(ArrayRef<Location> locs, Attribute metadata,
-                       MLIRContext *context) {
-  // Unique the set of locations to be fused.
-  llvm::SmallSetVector<Location, 4> decomposedLocs;
-  for (auto loc : locs) {
-    // If the location is a fused location we decompose it if it has no
-    // metadata or the metadata is the same as the top level metadata.
-    if (auto fusedLoc = loc.dyn_cast<FusedLoc>()) {
-      if (fusedLoc.getMetadata() == metadata) {
-        // UnknownLoc's have already been removed from FusedLocs so we can
-        // simply add all of the internal locations.
-        decomposedLocs.insert(fusedLoc.getLocations().begin(),
-                              fusedLoc.getLocations().end());
-        continue;
-      }
-    }
-    // Otherwise, only add known locations to the set.
-    if (!loc.isa<UnknownLoc>())
-      decomposedLocs.insert(loc);
-  }
-  locs = decomposedLocs.getArrayRef();
-
-  // Handle the simple cases of less than two locations.
-  if (locs.empty())
-    return UnknownLoc::get(context);
-  if (locs.size() == 1)
-    return locs.front();
-  return Base::get(context, StandardAttributes::FusedLocation, locs, metadata);
-}
-
-ArrayRef<Location> FusedLoc::getLocations() const {
-  return getImpl()->getLocations();
-}
-
-Attribute FusedLoc::getMetadata() const { return getImpl()->metadata; }
-
-//===----------------------------------------------------------------------===//
-// NameLoc
-//===----------------------------------------------------------------------===//
-
-Location NameLoc::get(Identifier name, Location child) {
-  assert(!child.isa<NameLoc>() &&
-         "a NameLoc cannot be used as a child of another NameLoc");
-  return Base::get(child->getContext(), StandardAttributes::NameLocation, name,
-                   child);
-}
-
-Location NameLoc::get(Identifier name, MLIRContext *context) {
-  return get(name, UnknownLoc::get(context));
-}
-
-/// Return the name identifier.
-Identifier NameLoc::getName() const { return getImpl()->name; }
-
-/// Return the child location.
-Location NameLoc::getChildLoc() const { return getImpl()->child; }
-
-//===----------------------------------------------------------------------===//
-// OpaqueLoc
-//===----------------------------------------------------------------------===//
-
-Location OpaqueLoc::get(uintptr_t underlyingLocation, ClassID *classID,
-                        Location fallbackLocation) {
-  return Base::get(fallbackLocation->getContext(),
-                   StandardAttributes::OpaqueLocation, underlyingLocation,
-                   classID, fallbackLocation);
-}
-
-uintptr_t OpaqueLoc::getUnderlyingLocation() const {
-  return Base::getImpl()->underlyingLocation;
-}
-
-ClassID *OpaqueLoc::getClassId() const { return getImpl()->classId; }
-
-Location OpaqueLoc::getFallbackLocation() const {
-  return Base::getImpl()->fallbackLocation;
-}
diff --git a/third_party/mlir/lib/IR/LocationDetail.h b/third_party/mlir/lib/IR/LocationDetail.h
deleted file mode 100644
index 6ccaa17018c..00000000000
--- a/third_party/mlir/lib/IR/LocationDetail.h
+++ /dev/null
@@ -1,171 +0,0 @@
-//===- LocationDetail.h - MLIR Location storage details ---------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This holds implementation details of the location attributes.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_IR_LOCATIONDETAIL_H_
-#define MLIR_IR_LOCATIONDETAIL_H_
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/Location.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/TrailingObjects.h"
-
-namespace mlir {
-
-namespace detail {
-
-struct CallSiteLocationStorage : public AttributeStorage {
-  CallSiteLocationStorage(Location callee, Location caller)
-      : callee(callee), caller(caller) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::pair<Location, Location>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(callee, caller);
-  }
-
-  /// Construct a new storage instance.
-  static CallSiteLocationStorage *
-  construct(AttributeStorageAllocator &allocator, const KeyTy &key) {
-    return new (allocator.allocate<CallSiteLocationStorage>())
-        CallSiteLocationStorage(key.first, key.second);
-  }
-
-  Location callee, caller;
-};
-
-struct FileLineColLocationStorage : public AttributeStorage {
-  FileLineColLocationStorage(Identifier filename, unsigned line,
-                             unsigned column)
-      : filename(filename), line(line), column(column) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::tuple<Identifier, unsigned, unsigned>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(filename, line, column);
-  }
-
-  /// Construct a new storage instance.
-  static FileLineColLocationStorage *
-  construct(AttributeStorageAllocator &allocator, const KeyTy &key) {
-    return new (allocator.allocate<FileLineColLocationStorage>())
-        FileLineColLocationStorage(std::get<0>(key), std::get<1>(key),
-                                   std::get<2>(key));
-  }
-
-  Identifier filename;
-  unsigned line, column;
-};
-
-struct FusedLocationStorage final
-    : public AttributeStorage,
-      public llvm::TrailingObjects<FusedLocationStorage, Location> {
-  FusedLocationStorage(unsigned numLocs, Attribute metadata)
-      : numLocs(numLocs), metadata(metadata) {}
-
-  ArrayRef<Location> getLocations() const {
-    return ArrayRef<Location>(getTrailingObjects<Location>(), numLocs);
-  }
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::pair<ArrayRef<Location>, Attribute>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(getLocations(), metadata);
-  }
-
-  /// Construct a new storage instance.
-  static FusedLocationStorage *construct(AttributeStorageAllocator &allocator,
-                                         const KeyTy &key) {
-    ArrayRef<Location> locs = key.first;
-
-    auto byteSize = totalSizeToAlloc<Location>(locs.size());
-    auto rawMem = allocator.allocate(byteSize, alignof(FusedLocationStorage));
-    auto result = new (rawMem) FusedLocationStorage(locs.size(), key.second);
-
-    std::uninitialized_copy(locs.begin(), locs.end(),
-                            result->getTrailingObjects<Location>());
-    return result;
-  }
-
-  // This stuff is used by the TrailingObjects template.
-  friend llvm::TrailingObjects<FusedLocationStorage, Location>;
-  size_t numTrailingObjects(OverloadToken<Location>) const { return numLocs; }
-
-  /// Number of trailing location objects.
-  unsigned numLocs;
-
-  /// Metadata used to reason about the generation of this fused location.
-  Attribute metadata;
-};
-
-struct NameLocationStorage : public AttributeStorage {
-  NameLocationStorage(Identifier name, Location child)
-      : name(name), child(child) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::pair<Identifier, Location>;
-  bool operator==(const KeyTy &key) const { return key == KeyTy(name, child); }
-
-  /// Construct a new storage instance.
-  static NameLocationStorage *construct(AttributeStorageAllocator &allocator,
-                                        const KeyTy &key) {
-    return new (allocator.allocate<NameLocationStorage>())
-        NameLocationStorage(key.first, key.second);
-  }
-
-  Identifier name;
-  Location child;
-};
-
-struct OpaqueLocationStorage : public AttributeStorage {
-  OpaqueLocationStorage(uintptr_t underlyingLocation, ClassID *classId,
-                        Location fallbackLocation)
-      : underlyingLocation(underlyingLocation), classId(classId),
-        fallbackLocation(fallbackLocation) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::tuple<uintptr_t, ClassID *, Location>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(underlyingLocation, classId, fallbackLocation);
-  }
-
-  /// Construct a new storage instance.
-  static OpaqueLocationStorage *construct(AttributeStorageAllocator &allocator,
-                                          const KeyTy &key) {
-    return new (allocator.allocate<OpaqueLocationStorage>())
-        OpaqueLocationStorage(std::get<0>(key), std::get<1>(key),
-                              std::get<2>(key));
-  }
-
-  /// Pointer to the corresponding object.
-  uintptr_t underlyingLocation;
-
-  /// A unique pointer for each type of underlyingLocation.
-  ClassID *classId;
-
-  /// An additional location that can be used if the external one is not
-  /// suitable.
-  Location fallbackLocation;
-};
-
-} // end namespace detail
-} // end namespace mlir
-
-#endif // MLIR_IR_LOCATIONDETAIL_H_
diff --git a/third_party/mlir/lib/IR/MLIRContext.cpp b/third_party/mlir/lib/IR/MLIRContext.cpp
deleted file mode 100644
index d3feca14477..00000000000
--- a/third_party/mlir/lib/IR/MLIRContext.cpp
+++ /dev/null
@@ -1,650 +0,0 @@
-//===- MLIRContext.cpp - MLIR Type Classes --------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/MLIRContext.h"
-#include "AffineExprDetail.h"
-#include "AffineMapDetail.h"
-#include "AttributeDetail.h"
-#include "IntegerSetDetail.h"
-#include "LocationDetail.h"
-#include "TypeDetail.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/RWMutex.h"
-#include "llvm/Support/raw_ostream.h"
-#include <memory>
-
-using namespace mlir;
-using namespace mlir::detail;
-
-using llvm::hash_combine;
-using llvm::hash_combine_range;
-
-/// A utility function to safely get or create a uniqued instance within the
-/// given set container.
-template <typename ValueT, typename DenseInfoT, typename KeyT,
-          typename ConstructorFn>
-static ValueT safeGetOrCreate(DenseSet<ValueT, DenseInfoT> &container,
-                              KeyT &&key, llvm::sys::SmartRWMutex<true> &mutex,
-                              ConstructorFn &&constructorFn) {
-  { // Check for an existing instance in read-only mode.
-    llvm::sys::SmartScopedReader<true> instanceLock(mutex);
-    auto it = container.find_as(key);
-    if (it != container.end())
-      return *it;
-  }
-
-  // Acquire a writer-lock so that we can safely create the new instance.
-  llvm::sys::SmartScopedWriter<true> instanceLock(mutex);
-
-  // Check for an existing instance again here, because another writer thread
-  // may have already created one.
-  auto existing = container.insert_as(ValueT(), key);
-  if (!existing.second)
-    return *existing.first;
-
-  // Otherwise, construct a new instance of the value.
-  return *existing.first = constructorFn();
-}
-
-namespace {
-/// A builtin dialect to define types/etc that are necessary for the validity of
-/// the IR.
-struct BuiltinDialect : public Dialect {
-  BuiltinDialect(MLIRContext *context) : Dialect(/*name=*/"", context) {
-    addAttributes<AffineMapAttr, ArrayAttr, BoolAttr, DenseElementsAttr,
-                  DictionaryAttr, FloatAttr, SymbolRefAttr, IntegerAttr,
-                  IntegerSetAttr, OpaqueAttr, OpaqueElementsAttr,
-                  SparseElementsAttr, StringAttr, TypeAttr, UnitAttr>();
-    addAttributes<CallSiteLoc, FileLineColLoc, FusedLoc, NameLoc, OpaqueLoc,
-                  UnknownLoc>();
-
-    addTypes<ComplexType, FloatType, FunctionType, IndexType, IntegerType,
-             MemRefType, UnrankedMemRefType, NoneType, OpaqueType,
-             RankedTensorType, TupleType, UnrankedTensorType, VectorType>();
-
-    // TODO: These operations should be moved to a different dialect when they
-    // have been fully decoupled from the core.
-    addOperations<FuncOp, ModuleOp, ModuleTerminatorOp>();
-  }
-};
-
-struct AffineMapKeyInfo : DenseMapInfo<AffineMap> {
-  // Affine maps are uniqued based on their dim/symbol counts and affine
-  // expressions.
-  using KeyTy = std::tuple<unsigned, unsigned, ArrayRef<AffineExpr>>;
-  using DenseMapInfo<AffineMap>::isEqual;
-
-  static unsigned getHashValue(const AffineMap &key) {
-    return getHashValue(
-        KeyTy(key.getNumDims(), key.getNumSymbols(), key.getResults()));
-  }
-
-  static unsigned getHashValue(KeyTy key) {
-    return hash_combine(
-        std::get<0>(key), std::get<1>(key),
-        hash_combine_range(std::get<2>(key).begin(), std::get<2>(key).end()));
-  }
-
-  static bool isEqual(const KeyTy &lhs, AffineMap rhs) {
-    if (rhs == getEmptyKey() || rhs == getTombstoneKey())
-      return false;
-    return lhs == std::make_tuple(rhs.getNumDims(), rhs.getNumSymbols(),
-                                  rhs.getResults());
-  }
-};
-
-struct IntegerSetKeyInfo : DenseMapInfo<IntegerSet> {
-  // Integer sets are uniqued based on their dim/symbol counts, affine
-  // expressions appearing in the LHS of constraints, and eqFlags.
-  using KeyTy =
-      std::tuple<unsigned, unsigned, ArrayRef<AffineExpr>, ArrayRef<bool>>;
-  using DenseMapInfo<IntegerSet>::isEqual;
-
-  static unsigned getHashValue(const IntegerSet &key) {
-    return getHashValue(KeyTy(key.getNumDims(), key.getNumSymbols(),
-                              key.getConstraints(), key.getEqFlags()));
-  }
-
-  static unsigned getHashValue(KeyTy key) {
-    return hash_combine(
-        std::get<0>(key), std::get<1>(key),
-        hash_combine_range(std::get<2>(key).begin(), std::get<2>(key).end()),
-        hash_combine_range(std::get<3>(key).begin(), std::get<3>(key).end()));
-  }
-
-  static bool isEqual(const KeyTy &lhs, IntegerSet rhs) {
-    if (rhs == getEmptyKey() || rhs == getTombstoneKey())
-      return false;
-    return lhs == std::make_tuple(rhs.getNumDims(), rhs.getNumSymbols(),
-                                  rhs.getConstraints(), rhs.getEqFlags());
-  }
-};
-} // end anonymous namespace.
-
-namespace mlir {
-/// This is the implementation of the MLIRContext class, using the pImpl idiom.
-/// This class is completely private to this file, so everything is public.
-class MLIRContextImpl {
-public:
-  //===--------------------------------------------------------------------===//
-  // Identifier uniquing
-  //===--------------------------------------------------------------------===//
-
-  // Identifier allocator and mutex for thread safety.
-  llvm::BumpPtrAllocator identifierAllocator;
-  llvm::sys::SmartRWMutex<true> identifierMutex;
-
-  //===--------------------------------------------------------------------===//
-  // Diagnostics
-  //===--------------------------------------------------------------------===//
-  DiagnosticEngine diagEngine;
-
-  //===--------------------------------------------------------------------===//
-  // Other
-  //===--------------------------------------------------------------------===//
-
-  /// A general purpose mutex to lock access to parts of the context that do not
-  /// have a more specific mutex, e.g. registry operations.
-  llvm::sys::SmartRWMutex<true> contextMutex;
-
-  /// This is a list of dialects that are created referring to this context.
-  /// The MLIRContext owns the objects.
-  std::vector<std::unique_ptr<Dialect>> dialects;
-
-  /// This is a mapping from operation name to AbstractOperation for registered
-  /// operations.
-  llvm::StringMap<AbstractOperation> registeredOperations;
-
-  /// This is a mapping from class identifier to Dialect for registered
-  /// attributes and types.
-  DenseMap<const ClassID *, Dialect *> registeredDialectSymbols;
-
-  /// These are identifiers uniqued into this MLIRContext.
-  llvm::StringMap<char, llvm::BumpPtrAllocator &> identifiers;
-
-  //===--------------------------------------------------------------------===//
-  // Affine uniquing
-  //===--------------------------------------------------------------------===//
-
-  // Affine allocator and mutex for thread safety.
-  llvm::BumpPtrAllocator affineAllocator;
-  llvm::sys::SmartRWMutex<true> affineMutex;
-
-  // Affine map uniquing.
-  using AffineMapSet = DenseSet<AffineMap, AffineMapKeyInfo>;
-  AffineMapSet affineMaps;
-
-  // Integer set uniquing.
-  using IntegerSets = DenseSet<IntegerSet, IntegerSetKeyInfo>;
-  IntegerSets integerSets;
-
-  // Affine expression uniquing.
-  StorageUniquer affineUniquer;
-
-  //===--------------------------------------------------------------------===//
-  // Type uniquing
-  //===--------------------------------------------------------------------===//
-  StorageUniquer typeUniquer;
-
-  /// Cached Type Instances.
-  FloatType bf16Ty, f16Ty, f32Ty, f64Ty;
-  IndexType indexTy;
-  IntegerType int1Ty, int8Ty, int16Ty, int32Ty, int64Ty, int128Ty;
-  NoneType noneType;
-
-  //===--------------------------------------------------------------------===//
-  // Attribute uniquing
-  //===--------------------------------------------------------------------===//
-  StorageUniquer attributeUniquer;
-
-  /// Cached Attribute Instances.
-  BoolAttr falseAttr, trueAttr;
-  UnitAttr unitAttr;
-  UnknownLoc unknownLocAttr;
-
-public:
-  MLIRContextImpl() : identifiers(identifierAllocator) {}
-};
-} // end namespace mlir
-
-MLIRContext::MLIRContext() : impl(new MLIRContextImpl()) {
-  new BuiltinDialect(this);
-  registerAllDialects(this);
-
-  // Initialize several common attributes and types to avoid the need to lock
-  // the context when accessing them.
-
-  //// Types.
-  /// Floating-point Types.
-  impl->bf16Ty = TypeUniquer::get<FloatType>(this, StandardTypes::BF16);
-  impl->f16Ty = TypeUniquer::get<FloatType>(this, StandardTypes::F16);
-  impl->f32Ty = TypeUniquer::get<FloatType>(this, StandardTypes::F32);
-  impl->f64Ty = TypeUniquer::get<FloatType>(this, StandardTypes::F64);
-  /// Index Type.
-  impl->indexTy = TypeUniquer::get<IndexType>(this, StandardTypes::Index);
-  /// Integer Types.
-  impl->int1Ty = TypeUniquer::get<IntegerType>(this, StandardTypes::Integer, 1);
-  impl->int8Ty = TypeUniquer::get<IntegerType>(this, StandardTypes::Integer, 8);
-  impl->int16Ty =
-      TypeUniquer::get<IntegerType>(this, StandardTypes::Integer, 16);
-  impl->int32Ty =
-      TypeUniquer::get<IntegerType>(this, StandardTypes::Integer, 32);
-  impl->int64Ty =
-      TypeUniquer::get<IntegerType>(this, StandardTypes::Integer, 64);
-  impl->int128Ty =
-      TypeUniquer::get<IntegerType>(this, StandardTypes::Integer, 128);
-  /// None Type.
-  impl->noneType = TypeUniquer::get<NoneType>(this, StandardTypes::None);
-
-  //// Attributes.
-  //// Note: These must be registered after the types as they may generate one
-  //// of the above types internally.
-  /// Bool Attributes.
-  // Note: The context is also used within the BoolAttrStorage.
-  impl->falseAttr = AttributeUniquer::get<BoolAttr>(
-      this, StandardAttributes::Bool, this, false);
-  impl->trueAttr = AttributeUniquer::get<BoolAttr>(
-      this, StandardAttributes::Bool, this, true);
-  /// Unit Attribute.
-  impl->unitAttr =
-      AttributeUniquer::get<UnitAttr>(this, StandardAttributes::Unit);
-  /// Unknown Location Attribute.
-  impl->unknownLocAttr = AttributeUniquer::get<UnknownLoc>(
-      this, StandardAttributes::UnknownLocation);
-}
-
-MLIRContext::~MLIRContext() {}
-
-/// Copy the specified array of elements into memory managed by the provided
-/// bump pointer allocator.  This assumes the elements are all PODs.
-template <typename T>
-static ArrayRef<T> copyArrayRefInto(llvm::BumpPtrAllocator &allocator,
-                                    ArrayRef<T> elements) {
-  auto result = allocator.Allocate<T>(elements.size());
-  std::uninitialized_copy(elements.begin(), elements.end(), result);
-  return ArrayRef<T>(result, elements.size());
-}
-
-//===----------------------------------------------------------------------===//
-// Diagnostic Handlers
-//===----------------------------------------------------------------------===//
-
-/// Returns the diagnostic engine for this context.
-DiagnosticEngine &MLIRContext::getDiagEngine() { return getImpl().diagEngine; }
-
-//===----------------------------------------------------------------------===//
-// Dialect and Operation Registration
-//===----------------------------------------------------------------------===//
-
-/// Return information about all registered IR dialects.
-std::vector<Dialect *> MLIRContext::getRegisteredDialects() {
-  // Lock access to the context registry.
-  llvm::sys::SmartScopedReader<true> registryLock(getImpl().contextMutex);
-
-  std::vector<Dialect *> result;
-  result.reserve(getImpl().dialects.size());
-  for (auto &dialect : getImpl().dialects)
-    result.push_back(dialect.get());
-  return result;
-}
-
-/// Get a registered IR dialect with the given namespace. If none is found,
-/// then return nullptr.
-Dialect *MLIRContext::getRegisteredDialect(StringRef name) {
-  // Lock access to the context registry.
-  llvm::sys::SmartScopedReader<true> registryLock(getImpl().contextMutex);
-  for (auto &dialect : getImpl().dialects)
-    if (name == dialect->getNamespace())
-      return dialect.get();
-  return nullptr;
-}
-
-/// Register this dialect object with the specified context.  The context
-/// takes ownership of the heap allocated dialect.
-void Dialect::registerDialect(MLIRContext *context) {
-  auto &impl = context->getImpl();
-  std::unique_ptr<Dialect> dialect(this);
-
-  // Lock access to the context registry.
-  llvm::sys::SmartScopedWriter<true> registryLock(impl.contextMutex);
-
-  // Get the correct insertion position sorted by namespace.
-  auto insertPt =
-      llvm::lower_bound(impl.dialects, dialect,
-                        [](const std::unique_ptr<Dialect> &lhs,
-                           const std::unique_ptr<Dialect> &rhs) {
-                          return lhs->getNamespace() < rhs->getNamespace();
-                        });
-
-  // Abort if dialect with namespace has already been registered.
-  if (insertPt != impl.dialects.end() &&
-      (*insertPt)->getNamespace() == getNamespace()) {
-    llvm::report_fatal_error("a dialect with namespace '" + getNamespace() +
-                             "' has already been registered");
-  }
-  impl.dialects.insert(insertPt, std::move(dialect));
-}
-
-/// Return information about all registered operations.  This isn't very
-/// efficient, typically you should ask the operations about their properties
-/// directly.
-std::vector<AbstractOperation *> MLIRContext::getRegisteredOperations() {
-  std::vector<std::pair<StringRef, AbstractOperation *>> opsToSort;
-
-  { // Lock access to the context registry.
-    llvm::sys::SmartScopedReader<true> registryLock(getImpl().contextMutex);
-
-    // We just have the operations in a non-deterministic hash table order. Dump
-    // into a temporary array, then sort it by operation name to get a stable
-    // ordering.
-    llvm::StringMap<AbstractOperation> &registeredOps =
-        getImpl().registeredOperations;
-
-    opsToSort.reserve(registeredOps.size());
-    for (auto &elt : registeredOps)
-      opsToSort.push_back({elt.first(), &elt.second});
-  }
-
-  llvm::array_pod_sort(opsToSort.begin(), opsToSort.end());
-
-  std::vector<AbstractOperation *> result;
-  result.reserve(opsToSort.size());
-  for (auto &elt : opsToSort)
-    result.push_back(elt.second);
-  return result;
-}
-
-void Dialect::addOperation(AbstractOperation opInfo) {
-  assert((getNamespace().empty() ||
-          opInfo.name.split('.').first == getNamespace()) &&
-         "op name doesn't start with dialect namespace");
-  assert(&opInfo.dialect == this && "Dialect object mismatch");
-  auto &impl = context->getImpl();
-
-  // Lock access to the context registry.
-  llvm::sys::SmartScopedWriter<true> registryLock(impl.contextMutex);
-  if (!impl.registeredOperations.insert({opInfo.name, opInfo}).second) {
-    llvm::errs() << "error: operation named '" << opInfo.name
-                 << "' is already registered.\n";
-    abort();
-  }
-}
-
-/// Register a dialect-specific symbol(e.g. type) with the current context.
-void Dialect::addSymbol(const ClassID *const classID) {
-  auto &impl = context->getImpl();
-
-  // Lock access to the context registry.
-  llvm::sys::SmartScopedWriter<true> registryLock(impl.contextMutex);
-  if (!impl.registeredDialectSymbols.insert({classID, this}).second) {
-    llvm::errs() << "error: dialect symbol already registered.\n";
-    abort();
-  }
-}
-
-/// Look up the specified operation in the operation set and return a pointer
-/// to it if present.  Otherwise, return a null pointer.
-const AbstractOperation *AbstractOperation::lookup(StringRef opName,
-                                                   MLIRContext *context) {
-  auto &impl = context->getImpl();
-
-  // Lock access to the context registry.
-  llvm::sys::SmartScopedReader<true> registryLock(impl.contextMutex);
-  auto it = impl.registeredOperations.find(opName);
-  if (it != impl.registeredOperations.end())
-    return &it->second;
-  return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// Identifier uniquing
-//===----------------------------------------------------------------------===//
-
-/// Return an identifier for the specified string.
-Identifier Identifier::get(StringRef str, MLIRContext *context) {
-  assert(!str.empty() && "Cannot create an empty identifier");
-  assert(str.find('\0') == StringRef::npos &&
-         "Cannot create an identifier with a nul character");
-
-  auto &impl = context->getImpl();
-
-  { // Check for an existing identifier in read-only mode.
-    llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex);
-    auto it = impl.identifiers.find(str);
-    if (it != impl.identifiers.end())
-      return Identifier(it->getKeyData());
-  }
-
-  // Acquire a writer-lock so that we can safely create the new instance.
-  llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex);
-  auto it = impl.identifiers.insert({str, char()}).first;
-  return Identifier(it->getKeyData());
-}
-
-//===----------------------------------------------------------------------===//
-// Type uniquing
-//===----------------------------------------------------------------------===//
-
-static Dialect &lookupDialectForSymbol(MLIRContext *ctx,
-                                       const ClassID *const classID) {
-  auto &impl = ctx->getImpl();
-  auto it = impl.registeredDialectSymbols.find(classID);
-  assert(it != impl.registeredDialectSymbols.end() &&
-         "symbol is not registered.");
-  return *it->second;
-}
-
-/// Returns the storage unqiuer used for constructing type storage instances.
-/// This should not be used directly.
-StorageUniquer &MLIRContext::getTypeUniquer() { return getImpl().typeUniquer; }
-
-/// Get the dialect that registered the type with the provided typeid.
-Dialect &TypeUniquer::lookupDialectForType(MLIRContext *ctx,
-                                           const ClassID *const typeID) {
-  return lookupDialectForSymbol(ctx, typeID);
-}
-
-FloatType FloatType::get(StandardTypes::Kind kind, MLIRContext *context) {
-  assert(kindof(kind) && "Not a FP kind.");
-  switch (kind) {
-  case StandardTypes::BF16:
-    return context->getImpl().bf16Ty;
-  case StandardTypes::F16:
-    return context->getImpl().f16Ty;
-  case StandardTypes::F32:
-    return context->getImpl().f32Ty;
-  case StandardTypes::F64:
-    return context->getImpl().f64Ty;
-  default:
-    llvm_unreachable("unexpected floating-point kind");
-  }
-}
-
-/// Get an instance of the IndexType.
-IndexType IndexType::get(MLIRContext *context) {
-  return context->getImpl().indexTy;
-}
-
-/// Return an existing integer type instance if one is cached within the
-/// context.
-static IntegerType getCachedIntegerType(unsigned width, MLIRContext *context) {
-  switch (width) {
-  case 1:
-    return context->getImpl().int1Ty;
-  case 8:
-    return context->getImpl().int8Ty;
-  case 16:
-    return context->getImpl().int16Ty;
-  case 32:
-    return context->getImpl().int32Ty;
-  case 64:
-    return context->getImpl().int64Ty;
-  case 128:
-    return context->getImpl().int128Ty;
-  default:
-    return IntegerType();
-  }
-}
-
-IntegerType IntegerType::get(unsigned width, MLIRContext *context) {
-  if (auto cached = getCachedIntegerType(width, context))
-    return cached;
-  return Base::get(context, StandardTypes::Integer, width);
-}
-
-IntegerType IntegerType::getChecked(unsigned width, MLIRContext *context,
-                                    Location location) {
-  if (auto cached = getCachedIntegerType(width, context))
-    return cached;
-  return Base::getChecked(location, context, StandardTypes::Integer, width);
-}
-
-/// Get an instance of the NoneType.
-NoneType NoneType::get(MLIRContext *context) {
-  return context->getImpl().noneType;
-}
-
-//===----------------------------------------------------------------------===//
-// Attribute uniquing
-//===----------------------------------------------------------------------===//
-
-/// Returns the storage uniquer used for constructing attribute storage
-/// instances. This should not be used directly.
-StorageUniquer &MLIRContext::getAttributeUniquer() {
-  return getImpl().attributeUniquer;
-}
-
-/// Returns a functor used to initialize new attribute storage instances.
-std::function<void(AttributeStorage *)>
-AttributeUniquer::getInitFn(MLIRContext *ctx, const ClassID *const attrID) {
-  return [ctx, attrID](AttributeStorage *storage) {
-    storage->initializeDialect(lookupDialectForSymbol(ctx, attrID));
-
-    // If the attribute did not provide a type, then default to NoneType.
-    if (!storage->getType())
-      storage->setType(NoneType::get(ctx));
-  };
-}
-
-BoolAttr BoolAttr::get(bool value, MLIRContext *context) {
-  return value ? context->getImpl().trueAttr : context->getImpl().falseAttr;
-}
-
-UnitAttr UnitAttr::get(MLIRContext *context) {
-  return context->getImpl().unitAttr;
-}
-
-Location UnknownLoc::get(MLIRContext *context) {
-  return context->getImpl().unknownLocAttr;
-}
-
-//===----------------------------------------------------------------------===//
-// AffineMap uniquing
-//===----------------------------------------------------------------------===//
-
-StorageUniquer &MLIRContext::getAffineUniquer() {
-  return getImpl().affineUniquer;
-}
-
-AffineMap AffineMap::getImpl(unsigned dimCount, unsigned symbolCount,
-                             ArrayRef<AffineExpr> results,
-                             MLIRContext *context) {
-  auto &impl = context->getImpl();
-  auto key = std::make_tuple(dimCount, symbolCount, results);
-
-  // Safely get or create an AffineMap instance.
-  return safeGetOrCreate(impl.affineMaps, key, impl.affineMutex, [&] {
-    auto *res = impl.affineAllocator.Allocate<detail::AffineMapStorage>();
-
-    // Copy the results into the bump pointer.
-    results = copyArrayRefInto(impl.affineAllocator, results);
-
-    // Initialize the memory using placement new.
-    new (res) detail::AffineMapStorage{dimCount, symbolCount, results, context};
-    return AffineMap(res);
-  });
-}
-
-AffineMap AffineMap::get(MLIRContext *context) {
-  return getImpl(/*dimCount=*/0, /*symbolCount=*/0, /*results=*/{}, context);
-}
-
-AffineMap AffineMap::get(unsigned dimCount, unsigned symbolCount,
-                         ArrayRef<AffineExpr> results) {
-  // The number of results can't be zero.
-  assert(!results.empty());
-  return getImpl(dimCount, symbolCount, results, results[0].getContext());
-}
-
-//===----------------------------------------------------------------------===//
-// Integer Sets: these are allocated into the bump pointer, and are immutable.
-// Unlike AffineMap's, these are uniqued only if they are small.
-//===----------------------------------------------------------------------===//
-
-IntegerSet IntegerSet::get(unsigned dimCount, unsigned symbolCount,
-                           ArrayRef<AffineExpr> constraints,
-                           ArrayRef<bool> eqFlags) {
-  // The number of constraints can't be zero.
-  assert(!constraints.empty());
-  assert(constraints.size() == eqFlags.size());
-
-  auto &impl = constraints[0].getContext()->getImpl();
-
-  // A utility function to construct a new IntegerSetStorage instance.
-  auto constructorFn = [&] {
-    auto *res = impl.affineAllocator.Allocate<detail::IntegerSetStorage>();
-
-    // Copy the results and equality flags into the bump pointer.
-    constraints = copyArrayRefInto(impl.affineAllocator, constraints);
-    eqFlags = copyArrayRefInto(impl.affineAllocator, eqFlags);
-
-    // Initialize the memory using placement new.
-    new (res)
-        detail::IntegerSetStorage{dimCount, symbolCount, constraints, eqFlags};
-    return IntegerSet(res);
-  };
-
-  // If this instance is uniqued, then we handle it separately so that multiple
-  // threads may simultaneously access existing instances.
-  if (constraints.size() < IntegerSet::kUniquingThreshold) {
-    auto key = std::make_tuple(dimCount, symbolCount, constraints, eqFlags);
-    return safeGetOrCreate(impl.integerSets, key, impl.affineMutex,
-                           constructorFn);
-  }
-
-  // Otherwise, acquire a writer-lock so that we can safely create the new
-  // instance.
-  llvm::sys::SmartScopedWriter<true> affineLock(impl.affineMutex);
-  return constructorFn();
-}
diff --git a/third_party/mlir/lib/IR/Module.cpp b/third_party/mlir/lib/IR/Module.cpp
deleted file mode 100644
index 79e04521e9c..00000000000
--- a/third_party/mlir/lib/IR/Module.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//===- Module.cpp - MLIR Module Operation ---------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Module.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/OpImplementation.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// Module Operation.
-//===----------------------------------------------------------------------===//
-
-void ModuleOp::build(Builder *builder, OperationState &result,
-                     Optional<StringRef> name) {
-  ensureTerminator(*result.addRegion(), *builder, result.location);
-  if (name)
-    result.attributes.push_back(builder->getNamedAttr(
-        mlir::SymbolTable::getSymbolAttrName(), builder->getStringAttr(*name)));
-}
-
-/// Construct a module from the given context.
-ModuleOp ModuleOp::create(Location loc, Optional<StringRef> name) {
-  OperationState state(loc, "module");
-  Builder builder(loc->getContext());
-  ModuleOp::build(&builder, state, name);
-  return llvm::cast<ModuleOp>(Operation::create(state));
-}
-
-ParseResult ModuleOp::parse(OpAsmParser &parser, OperationState &result) {
-  // If the name is present, parse it.
-  StringAttr nameAttr;
-  (void)parser.parseOptionalSymbolName(
-      nameAttr, mlir::SymbolTable::getSymbolAttrName(), result.attributes);
-
-  // If module attributes are present, parse them.
-  if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
-    return failure();
-
-  // Parse the module body.
-  auto *body = result.addRegion();
-  if (parser.parseRegion(*body, llvm::None, llvm::None))
-    return failure();
-
-  // Ensure that this module has a valid terminator.
-  ensureTerminator(*body, parser.getBuilder(), result.location);
-  return success();
-}
-
-void ModuleOp::print(OpAsmPrinter &p) {
-  p << "module";
-
-  if (Optional<StringRef> name = getName()) {
-    p << ' ';
-    p.printSymbolName(*name);
-  }
-
-  // Print the module attributes.
-  p.printOptionalAttrDictWithKeyword(getAttrs(),
-                                     {mlir::SymbolTable::getSymbolAttrName()});
-
-  // Print the region.
-  p.printRegion(getOperation()->getRegion(0), /*printEntryBlockArgs=*/false,
-                /*printBlockTerminators=*/false);
-}
-
-LogicalResult ModuleOp::verify() {
-  auto &bodyRegion = getOperation()->getRegion(0);
-
-  // The body must contain a single basic block.
-  if (!has_single_element(bodyRegion))
-    return emitOpError("expected body region to have a single block");
-
-  // Check that the body has no block arguments.
-  auto *body = &bodyRegion.front();
-  if (body->getNumArguments() != 0)
-    return emitOpError("expected body to have no arguments");
-
-  // Check that none of the attributes are non-dialect attributes, except for
-  // the symbol name attribute.
-  for (auto attr : getOperation()->getAttrList().getAttrs()) {
-    if (!attr.first.strref().contains('.') &&
-        attr.first.strref() != mlir::SymbolTable::getSymbolAttrName())
-      return emitOpError(
-                 "can only contain dialect-specific attributes, found: '")
-             << attr.first << "'";
-  }
-
-  return success();
-}
-
-/// Return body of this module.
-Region &ModuleOp::getBodyRegion() { return getOperation()->getRegion(0); }
-Block *ModuleOp::getBody() { return &getBodyRegion().front(); }
-
-Optional<StringRef> ModuleOp::getName() {
-  if (auto nameAttr =
-          getAttrOfType<StringAttr>(mlir::SymbolTable::getSymbolAttrName()))
-    return nameAttr.getValue();
-  return llvm::None;
-}
diff --git a/third_party/mlir/lib/IR/Operation.cpp b/third_party/mlir/lib/IR/Operation.cpp
deleted file mode 100644
index fd747a98a40..00000000000
--- a/third_party/mlir/lib/IR/Operation.cpp
+++ /dev/null
@@ -1,1233 +0,0 @@
-//===- Operation.cpp - Operation support code -----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "llvm/Support/CommandLine.h"
-#include <numeric>
-
-using namespace mlir;
-
-static llvm::cl::opt<bool> printOpOnDiagnostic(
-    "mlir-print-op-on-diagnostic",
-    llvm::cl::desc("When a diagnostic is emitted on an operation, also print "
-                   "the operation as an attached note"));
-
-OpAsmParser::~OpAsmParser() {}
-
-//===----------------------------------------------------------------------===//
-// OperationName
-//===----------------------------------------------------------------------===//
-
-/// Form the OperationName for an op with the specified string.  This either is
-/// a reference to an AbstractOperation if one is known, or a uniqued Identifier
-/// if not.
-OperationName::OperationName(StringRef name, MLIRContext *context) {
-  if (auto *op = AbstractOperation::lookup(name, context))
-    representation = op;
-  else
-    representation = Identifier::get(name, context);
-}
-
-/// Return the name of the dialect this operation is registered to.
-StringRef OperationName::getDialect() const {
-  return getStringRef().split('.').first;
-}
-
-/// Return the name of this operation.  This always succeeds.
-StringRef OperationName::getStringRef() const {
-  if (auto *op = representation.dyn_cast<const AbstractOperation *>())
-    return op->name;
-  return representation.get<Identifier>().strref();
-}
-
-const AbstractOperation *OperationName::getAbstractOperation() const {
-  return representation.dyn_cast<const AbstractOperation *>();
-}
-
-OperationName OperationName::getFromOpaquePointer(void *pointer) {
-  return OperationName(RepresentationUnion::getFromOpaqueValue(pointer));
-}
-
-//===----------------------------------------------------------------------===//
-// OpResult
-//===----------------------------------------------------------------------===//
-
-/// Return the result number of this result.
-unsigned OpResult::getResultNumber() {
-  // Results are always stored consecutively, so use pointer subtraction to
-  // figure out what number this is.
-  return this - &getOwner()->getOpResults()[0];
-}
-
-//===----------------------------------------------------------------------===//
-// OpOperand
-//===----------------------------------------------------------------------===//
-
-// TODO: This namespace is only required because of a bug in GCC<7.0.
-namespace mlir {
-/// Return which operand this is in the operand list.
-template <> unsigned OpOperand::getOperandNumber() {
-  return this - &getOwner()->getOpOperands()[0];
-}
-} // end namespace mlir
-
-//===----------------------------------------------------------------------===//
-// BlockOperand
-//===----------------------------------------------------------------------===//
-
-// TODO: This namespace is only required because of a bug in GCC<7.0.
-namespace mlir {
-/// Return which operand this is in the operand list.
-template <> unsigned BlockOperand::getOperandNumber() {
-  return this - &getOwner()->getBlockOperands()[0];
-}
-} // end namespace mlir
-
-//===----------------------------------------------------------------------===//
-// Operation
-//===----------------------------------------------------------------------===//
-
-/// Create a new Operation with the specific fields.
-Operation *Operation::create(Location location, OperationName name,
-                             ArrayRef<Type> resultTypes,
-                             ArrayRef<Value *> operands,
-                             ArrayRef<NamedAttribute> attributes,
-                             ArrayRef<Block *> successors, unsigned numRegions,
-                             bool resizableOperandList) {
-  return create(location, name, resultTypes, operands,
-                NamedAttributeList(attributes), successors, numRegions,
-                resizableOperandList);
-}
-
-/// Create a new Operation from operation state.
-Operation *Operation::create(const OperationState &state) {
-  return Operation::create(state.location, state.name, state.types,
-                           state.operands, NamedAttributeList(state.attributes),
-                           state.successors, state.regions,
-                           state.resizableOperandList);
-}
-
-/// Create a new Operation with the specific fields.
-Operation *Operation::create(Location location, OperationName name,
-                             ArrayRef<Type> resultTypes,
-                             ArrayRef<Value *> operands,
-                             NamedAttributeList attributes,
-                             ArrayRef<Block *> successors, RegionRange regions,
-                             bool resizableOperandList) {
-  unsigned numRegions = regions.size();
-  Operation *op = create(location, name, resultTypes, operands, attributes,
-                         successors, numRegions, resizableOperandList);
-  for (unsigned i = 0; i < numRegions; ++i)
-    if (regions[i])
-      op->getRegion(i).takeBody(*regions[i]);
-  return op;
-}
-
-/// Overload of create that takes an existing NamedAttributeList to avoid
-/// unnecessarily uniquing a list of attributes.
-Operation *Operation::create(Location location, OperationName name,
-                             ArrayRef<Type> resultTypes,
-                             ArrayRef<Value *> operands,
-                             NamedAttributeList attributes,
-                             ArrayRef<Block *> successors, unsigned numRegions,
-                             bool resizableOperandList) {
-  unsigned numSuccessors = successors.size();
-
-  // Input operands are nullptr-separated for each successor, the null operands
-  // aren't actually stored.
-  unsigned numOperands = operands.size() - numSuccessors;
-
-  // Compute the byte size for the operation and the operand storage.
-  auto byteSize = totalSizeToAlloc<OpResult, BlockOperand, unsigned, Region,
-                                   detail::OperandStorage>(
-      resultTypes.size(), numSuccessors, numSuccessors, numRegions,
-      /*detail::OperandStorage*/ 1);
-  byteSize += llvm::alignTo(detail::OperandStorage::additionalAllocSize(
-                                numOperands, resizableOperandList),
-                            alignof(Operation));
-  void *rawMem = malloc(byteSize);
-
-  // Create the new Operation.
-  auto op = ::new (rawMem) Operation(location, name, resultTypes.size(),
-                                     numSuccessors, numRegions, attributes);
-
-  assert((numSuccessors == 0 || !op->isKnownNonTerminator()) &&
-         "unexpected successors in a non-terminator operation");
-
-  // Initialize the regions.
-  for (unsigned i = 0; i != numRegions; ++i)
-    new (&op->getRegion(i)) Region(op);
-
-  // Initialize the results and operands.
-  new (&op->getOperandStorage())
-      detail::OperandStorage(numOperands, resizableOperandList);
-
-  auto instResults = op->getOpResults();
-  for (unsigned i = 0, e = resultTypes.size(); i != e; ++i)
-    new (&instResults[i]) OpResult(resultTypes[i], op);
-
-  auto opOperands = op->getOpOperands();
-
-  // Initialize normal operands.
-  unsigned operandIt = 0, operandE = operands.size();
-  unsigned nextOperand = 0;
-  for (; operandIt != operandE; ++operandIt) {
-    // Null operands are used as sentinels between successor operand lists. If
-    // we encounter one here, break and handle the successor operands lists
-    // separately below.
-    if (!operands[operandIt])
-      break;
-    new (&opOperands[nextOperand++]) OpOperand(op, operands[operandIt]);
-  }
-
-  unsigned currentSuccNum = 0;
-  if (operandIt == operandE) {
-    // Verify that the amount of sentinel operands is equivalent to the number
-    // of successors.
-    assert(currentSuccNum == numSuccessors);
-    return op;
-  }
-
-  assert(!op->isKnownNonTerminator() &&
-         "Unexpected nullptr in operand list when creating non-terminator.");
-  auto instBlockOperands = op->getBlockOperands();
-  unsigned *succOperandCountIt = op->getTrailingObjects<unsigned>();
-  unsigned *succOperandCountE = succOperandCountIt + numSuccessors;
-  (void)succOperandCountE;
-
-  for (; operandIt != operandE; ++operandIt) {
-    // If we encounter a sentinel branch to the next operand update the count
-    // variable.
-    if (!operands[operandIt]) {
-      assert(currentSuccNum < numSuccessors);
-
-      // After the first iteration update the successor operand count
-      // variable.
-      if (currentSuccNum != 0) {
-        ++succOperandCountIt;
-        assert(succOperandCountIt != succOperandCountE &&
-               "More sentinel operands than successors.");
-      }
-
-      new (&instBlockOperands[currentSuccNum])
-          BlockOperand(op, successors[currentSuccNum]);
-      *succOperandCountIt = 0;
-      ++currentSuccNum;
-      continue;
-    }
-    new (&opOperands[nextOperand++]) OpOperand(op, operands[operandIt]);
-    ++(*succOperandCountIt);
-  }
-
-  // Verify that the amount of sentinel operands is equivalent to the number of
-  // successors.
-  assert(currentSuccNum == numSuccessors);
-
-  return op;
-}
-
-Operation::Operation(Location location, OperationName name, unsigned numResults,
-                     unsigned numSuccessors, unsigned numRegions,
-                     const NamedAttributeList &attributes)
-    : location(location), numResults(numResults), numSuccs(numSuccessors),
-      numRegions(numRegions), name(name), attrs(attributes) {}
-
-// Operations are deleted through the destroy() member because they are
-// allocated via malloc.
-Operation::~Operation() {
-  assert(block == nullptr && "operation destroyed but still in a block");
-
-  // Explicitly run the destructors for the operands and results.
-  getOperandStorage().~OperandStorage();
-
-  for (auto &result : getOpResults())
-    result.~OpResult();
-
-  // Explicitly run the destructors for the successors.
-  for (auto &successor : getBlockOperands())
-    successor.~BlockOperand();
-
-  // Explicitly destroy the regions.
-  for (auto &region : getRegions())
-    region.~Region();
-}
-
-/// Destroy this operation or one of its subclasses.
-void Operation::destroy() {
-  this->~Operation();
-  free(this);
-}
-
-/// Return the context this operation is associated with.
-MLIRContext *Operation::getContext() { return location->getContext(); }
-
-/// Return the dialect this operation is associated with, or nullptr if the
-/// associated dialect is not registered.
-Dialect *Operation::getDialect() {
-  if (auto *abstractOp = getAbstractOperation())
-    return &abstractOp->dialect;
-
-  // If this operation hasn't been registered or doesn't have abstract
-  // operation, try looking up the dialect name in the context.
-  return getContext()->getRegisteredDialect(getName().getDialect());
-}
-
-Region *Operation::getParentRegion() {
-  return block ? block->getParent() : nullptr;
-}
-
-Operation *Operation::getParentOp() {
-  return block ? block->getParentOp() : nullptr;
-}
-
-/// Return true if this operation is a proper ancestor of the `other`
-/// operation.
-bool Operation::isProperAncestor(Operation *other) {
-  while ((other = other->getParentOp()))
-    if (this == other)
-      return true;
-  return false;
-}
-
-/// Replace any uses of 'from' with 'to' within this operation.
-void Operation::replaceUsesOfWith(Value *from, Value *to) {
-  if (from == to)
-    return;
-  for (auto &operand : getOpOperands())
-    if (operand.get() == from)
-      operand.set(to);
-}
-
-/// Replace the current operands of this operation with the ones provided in
-/// 'operands'. If the operands list is not resizable, the size of 'operands'
-/// must be less than or equal to the current number of operands.
-void Operation::setOperands(ValueRange operands) {
-  getOperandStorage().setOperands(this, operands);
-}
-
-//===----------------------------------------------------------------------===//
-// Diagnostics
-//===----------------------------------------------------------------------===//
-
-/// Emit an error about fatal conditions with this operation, reporting up to
-/// any diagnostic handlers that may be listening.
-InFlightDiagnostic Operation::emitError(const Twine &message) {
-  InFlightDiagnostic diag = mlir::emitError(getLoc(), message);
-  if (printOpOnDiagnostic) {
-    // Print out the operation explicitly here so that we can print the generic
-    // form.
-    // TODO(riverriddle) It would be nice if we could instead provide the
-    // specific printing flags when adding the operation as an argument to the
-    // diagnostic.
-    std::string printedOp;
-    {
-      llvm::raw_string_ostream os(printedOp);
-      print(os, OpPrintingFlags().printGenericOpForm().useLocalScope());
-    }
-    diag.attachNote(getLoc()) << "see current operation: " << printedOp;
-  }
-  return diag;
-}
-
-/// Emit a warning about this operation, reporting up to any diagnostic
-/// handlers that may be listening.
-InFlightDiagnostic Operation::emitWarning(const Twine &message) {
-  InFlightDiagnostic diag = mlir::emitWarning(getLoc(), message);
-  if (printOpOnDiagnostic)
-    diag.attachNote(getLoc()) << "see current operation: " << *this;
-  return diag;
-}
-
-/// Emit a remark about this operation, reporting up to any diagnostic
-/// handlers that may be listening.
-InFlightDiagnostic Operation::emitRemark(const Twine &message) {
-  InFlightDiagnostic diag = mlir::emitRemark(getLoc(), message);
-  if (printOpOnDiagnostic)
-    diag.attachNote(getLoc()) << "see current operation: " << *this;
-  return diag;
-}
-
-//===----------------------------------------------------------------------===//
-// Operation Ordering
-//===----------------------------------------------------------------------===//
-
-constexpr unsigned Operation::kInvalidOrderIdx;
-constexpr unsigned Operation::kOrderStride;
-
-/// Given an operation 'other' that is within the same parent block, return
-/// whether the current operation is before 'other' in the operation list
-/// of the parent block.
-/// Note: This function has an average complexity of O(1), but worst case may
-/// take O(N) where N is the number of operations within the parent block.
-bool Operation::isBeforeInBlock(Operation *other) {
-  assert(block && "Operations without parent blocks have no order.");
-  assert(other && other->block == block &&
-         "Expected other operation to have the same parent block.");
-  // If the order of the block is already invalid, directly recompute the
-  // parent.
-  if (!block->isOpOrderValid()) {
-    block->recomputeOpOrder();
-  } else {
-    // Update the order either operation if necessary.
-    updateOrderIfNecessary();
-    other->updateOrderIfNecessary();
-  }
-
-  return orderIndex < other->orderIndex;
-}
-
-/// Update the order index of this operation of this operation if necessary,
-/// potentially recomputing the order of the parent block.
-void Operation::updateOrderIfNecessary() {
-  assert(block && "expected valid parent");
-
-  // If the order is valid for this operation there is nothing to do.
-  if (hasValidOrder())
-    return;
-  Operation *blockFront = &block->front();
-  Operation *blockBack = &block->back();
-
-  // This method is expected to only be invoked on blocks with more than one
-  // operation.
-  assert(blockFront != blockBack && "expected more than one operation");
-
-  // If the operation is at the end of the block.
-  if (this == blockBack) {
-    Operation *prevNode = getPrevNode();
-    if (!prevNode->hasValidOrder())
-      return block->recomputeOpOrder();
-
-    // Add the stride to the previous operation.
-    orderIndex = prevNode->orderIndex + kOrderStride;
-    return;
-  }
-
-  // If this is the first operation try to use the next operation to compute the
-  // ordering.
-  if (this == blockFront) {
-    Operation *nextNode = getNextNode();
-    if (!nextNode->hasValidOrder())
-      return block->recomputeOpOrder();
-    // There is no order to give this operation.
-    if (nextNode->orderIndex == 0)
-      return block->recomputeOpOrder();
-
-    // If we can't use the stride, just take the middle value left. This is safe
-    // because we know there is at least one valid index to assign to.
-    if (nextNode->orderIndex <= kOrderStride)
-      orderIndex = (nextNode->orderIndex / 2);
-    else
-      orderIndex = kOrderStride;
-    return;
-  }
-
-  // Otherwise, this operation is between two others. Place this operation in
-  // the middle of the previous and next if possible.
-  Operation *prevNode = getPrevNode(), *nextNode = getNextNode();
-  if (!prevNode->hasValidOrder() || !nextNode->hasValidOrder())
-    return block->recomputeOpOrder();
-  unsigned prevOrder = prevNode->orderIndex, nextOrder = nextNode->orderIndex;
-
-  // Check to see if there is a valid order between the two.
-  if (prevOrder + 1 == nextOrder)
-    return block->recomputeOpOrder();
-  orderIndex = prevOrder + 1 + ((nextOrder - prevOrder) / 2);
-}
-
-//===----------------------------------------------------------------------===//
-// ilist_traits for Operation
-//===----------------------------------------------------------------------===//
-
-auto llvm::ilist_detail::SpecificNodeAccess<
-    typename llvm::ilist_detail::compute_node_options<
-        ::mlir::Operation>::type>::getNodePtr(pointer N) -> node_type * {
-  return NodeAccess::getNodePtr<OptionsT>(N);
-}
-
-auto llvm::ilist_detail::SpecificNodeAccess<
-    typename llvm::ilist_detail::compute_node_options<
-        ::mlir::Operation>::type>::getNodePtr(const_pointer N)
-    -> const node_type * {
-  return NodeAccess::getNodePtr<OptionsT>(N);
-}
-
-auto llvm::ilist_detail::SpecificNodeAccess<
-    typename llvm::ilist_detail::compute_node_options<
-        ::mlir::Operation>::type>::getValuePtr(node_type *N) -> pointer {
-  return NodeAccess::getValuePtr<OptionsT>(N);
-}
-
-auto llvm::ilist_detail::SpecificNodeAccess<
-    typename llvm::ilist_detail::compute_node_options<
-        ::mlir::Operation>::type>::getValuePtr(const node_type *N)
-    -> const_pointer {
-  return NodeAccess::getValuePtr<OptionsT>(N);
-}
-
-void llvm::ilist_traits<::mlir::Operation>::deleteNode(Operation *op) {
-  op->destroy();
-}
-
-Block *llvm::ilist_traits<::mlir::Operation>::getContainingBlock() {
-  size_t Offset(size_t(&((Block *)nullptr->*Block::getSublistAccess(nullptr))));
-  iplist<Operation> *Anchor(static_cast<iplist<Operation> *>(this));
-  return reinterpret_cast<Block *>(reinterpret_cast<char *>(Anchor) - Offset);
-}
-
-/// This is a trait method invoked when a operation is added to a block.  We
-/// keep the block pointer up to date.
-void llvm::ilist_traits<::mlir::Operation>::addNodeToList(Operation *op) {
-  assert(!op->getBlock() && "already in a operation block!");
-  op->block = getContainingBlock();
-
-  // Invalidate the order on the operation.
-  op->orderIndex = Operation::kInvalidOrderIdx;
-}
-
-/// This is a trait method invoked when a operation is removed from a block.
-/// We keep the block pointer up to date.
-void llvm::ilist_traits<::mlir::Operation>::removeNodeFromList(Operation *op) {
-  assert(op->block && "not already in a operation block!");
-  op->block = nullptr;
-}
-
-/// This is a trait method invoked when a operation is moved from one block
-/// to another.  We keep the block pointer up to date.
-void llvm::ilist_traits<::mlir::Operation>::transferNodesFromList(
-    ilist_traits<Operation> &otherList, op_iterator first, op_iterator last) {
-  Block *curParent = getContainingBlock();
-
-  // Invalidate the ordering of the parent block.
-  curParent->invalidateOpOrder();
-
-  // If we are transferring operations within the same block, the block
-  // pointer doesn't need to be updated.
-  if (curParent == otherList.getContainingBlock())
-    return;
-
-  // Update the 'block' member of each operation.
-  for (; first != last; ++first)
-    first->block = curParent;
-}
-
-/// Remove this operation (and its descendants) from its Block and delete
-/// all of them.
-void Operation::erase() {
-  if (auto *parent = getBlock())
-    parent->getOperations().erase(this);
-  else
-    destroy();
-}
-
-/// Unlink this operation from its current block and insert it right before
-/// `existingOp` which may be in the same or another block in the same
-/// function.
-void Operation::moveBefore(Operation *existingOp) {
-  moveBefore(existingOp->getBlock(), existingOp->getIterator());
-}
-
-/// Unlink this operation from its current basic block and insert it right
-/// before `iterator` in the specified basic block.
-void Operation::moveBefore(Block *block,
-                           llvm::iplist<Operation>::iterator iterator) {
-  block->getOperations().splice(iterator, getBlock()->getOperations(),
-                                getIterator());
-}
-
-/// This drops all operand uses from this operation, which is an essential
-/// step in breaking cyclic dependences between references when they are to
-/// be deleted.
-void Operation::dropAllReferences() {
-  for (auto &op : getOpOperands())
-    op.drop();
-
-  for (auto &region : getRegions())
-    region.dropAllReferences();
-
-  for (auto &dest : getBlockOperands())
-    dest.drop();
-}
-
-/// This drops all uses of any values defined by this operation or its nested
-/// regions, wherever they are located.
-void Operation::dropAllDefinedValueUses() {
-  for (auto &val : getOpResults())
-    val.dropAllUses();
-
-  for (auto &region : getRegions())
-    for (auto &block : region)
-      block.dropAllDefinedValueUses();
-}
-
-/// Return true if there are no users of any results of this operation.
-bool Operation::use_empty() {
-  for (auto *result : getResults())
-    if (!result->use_empty())
-      return false;
-  return true;
-}
-
-void Operation::setSuccessor(Block *block, unsigned index) {
-  assert(index < getNumSuccessors());
-  getBlockOperands()[index].set(block);
-}
-
-auto Operation::getNonSuccessorOperands() -> operand_range {
-  return getOperands().take_front(hasSuccessors() ? getSuccessorOperandIndex(0)
-                                                  : getNumOperands());
-}
-
-/// Get the index of the first operand of the successor at the provided
-/// index.
-unsigned Operation::getSuccessorOperandIndex(unsigned index) {
-  assert(!isKnownNonTerminator() && "only terminators may have successors");
-  assert(index < getNumSuccessors());
-
-  // Count the number of operands for each of the successors after, and
-  // including, the one at 'index'. This is based upon the assumption that all
-  // non successor operands are placed at the beginning of the operand list.
-  auto *successorOpCountBegin = getTrailingObjects<unsigned>();
-  unsigned postSuccessorOpCount =
-      std::accumulate(successorOpCountBegin + index,
-                      successorOpCountBegin + getNumSuccessors(), 0u);
-  return getNumOperands() - postSuccessorOpCount;
-}
-
-Optional<std::pair<unsigned, unsigned>>
-Operation::decomposeSuccessorOperandIndex(unsigned operandIndex) {
-  assert(!isKnownNonTerminator() && "only terminators may have successors");
-  assert(operandIndex < getNumOperands());
-  unsigned currentOperandIndex = getNumOperands();
-  auto *successorOperandCounts = getTrailingObjects<unsigned>();
-  for (unsigned i = 0, e = getNumSuccessors(); i < e; i++) {
-    unsigned successorIndex = e - i - 1;
-    currentOperandIndex -= successorOperandCounts[successorIndex];
-    if (currentOperandIndex <= operandIndex)
-      return std::make_pair(successorIndex, operandIndex - currentOperandIndex);
-  }
-  return None;
-}
-
-auto Operation::getSuccessorOperands(unsigned index) -> operand_range {
-  unsigned succOperandIndex = getSuccessorOperandIndex(index);
-  return getOperands().slice(succOperandIndex, getNumSuccessorOperands(index));
-}
-
-/// Attempt to fold this operation using the Op's registered foldHook.
-LogicalResult Operation::fold(ArrayRef<Attribute> operands,
-                              SmallVectorImpl<OpFoldResult> &results) {
-  // If we have a registered operation definition matching this one, use it to
-  // try to constant fold the operation.
-  auto *abstractOp = getAbstractOperation();
-  if (abstractOp && succeeded(abstractOp->foldHook(this, operands, results)))
-    return success();
-
-  // Otherwise, fall back on the dialect hook to handle it.
-  Dialect *dialect = getDialect();
-  if (!dialect)
-    return failure();
-
-  SmallVector<Attribute, 8> constants;
-  if (failed(dialect->constantFoldHook(this, operands, constants)))
-    return failure();
-  results.assign(constants.begin(), constants.end());
-  return success();
-}
-
-/// Emit an error with the op name prefixed, like "'dim' op " which is
-/// convenient for verifiers.
-InFlightDiagnostic Operation::emitOpError(const Twine &message) {
-  return emitError() << "'" << getName() << "' op " << message;
-}
-
-//===----------------------------------------------------------------------===//
-// Operation Cloning
-//===----------------------------------------------------------------------===//
-
-/// Create a deep copy of this operation but keep the operation regions empty.
-/// Operands are remapped using `mapper` (if present), and `mapper` is updated
-/// to contain the results.
-Operation *Operation::cloneWithoutRegions(BlockAndValueMapping &mapper) {
-  SmallVector<Value *, 8> operands;
-  SmallVector<Block *, 2> successors;
-
-  operands.reserve(getNumOperands() + getNumSuccessors());
-
-  if (getNumSuccessors() == 0) {
-    // Non-branching operations can just add all the operands.
-    for (auto *opValue : getOperands())
-      operands.push_back(mapper.lookupOrDefault(opValue));
-  } else {
-    // We add the operands separated by nullptr's for each successor.
-    unsigned firstSuccOperand =
-        getNumSuccessors() ? getSuccessorOperandIndex(0) : getNumOperands();
-    auto opOperands = getOpOperands();
-
-    unsigned i = 0;
-    for (; i != firstSuccOperand; ++i)
-      operands.push_back(mapper.lookupOrDefault(opOperands[i].get()));
-
-    successors.reserve(getNumSuccessors());
-    for (unsigned succ = 0, e = getNumSuccessors(); succ != e; ++succ) {
-      successors.push_back(mapper.lookupOrDefault(getSuccessor(succ)));
-
-      // Add sentinel to delineate successor operands.
-      operands.push_back(nullptr);
-
-      // Remap the successors operands.
-      for (auto *operand : getSuccessorOperands(succ))
-        operands.push_back(mapper.lookupOrDefault(operand));
-    }
-  }
-
-  SmallVector<Type, 8> resultTypes(getResultTypes());
-  unsigned numRegions = getNumRegions();
-  auto *newOp =
-      Operation::create(getLoc(), getName(), resultTypes, operands, attrs,
-                        successors, numRegions, hasResizableOperandsList());
-
-  // Remember the mapping of any results.
-  for (unsigned i = 0, e = getNumResults(); i != e; ++i)
-    mapper.map(getResult(i), newOp->getResult(i));
-
-  return newOp;
-}
-
-Operation *Operation::cloneWithoutRegions() {
-  BlockAndValueMapping mapper;
-  return cloneWithoutRegions(mapper);
-}
-
-/// Create a deep copy of this operation, remapping any operands that use
-/// values outside of the operation using the map that is provided (leaving
-/// them alone if no entry is present).  Replaces references to cloned
-/// sub-operations to the corresponding operation that is copied, and adds
-/// those mappings to the map.
-Operation *Operation::clone(BlockAndValueMapping &mapper) {
-  auto *newOp = cloneWithoutRegions(mapper);
-
-  // Clone the regions.
-  for (unsigned i = 0; i != numRegions; ++i)
-    getRegion(i).cloneInto(&newOp->getRegion(i), mapper);
-
-  return newOp;
-}
-
-Operation *Operation::clone() {
-  BlockAndValueMapping mapper;
-  return clone(mapper);
-}
-
-//===----------------------------------------------------------------------===//
-// OpState trait class.
-//===----------------------------------------------------------------------===//
-
-// The fallback for the parser is to reject the custom assembly form.
-ParseResult OpState::parse(OpAsmParser &parser, OperationState &result) {
-  return parser.emitError(parser.getNameLoc(), "has no custom assembly form");
-}
-
-// The fallback for the printer is to print in the generic assembly form.
-void OpState::print(OpAsmPrinter &p) { p.printGenericOp(getOperation()); }
-
-/// Emit an error about fatal conditions with this operation, reporting up to
-/// any diagnostic handlers that may be listening.
-InFlightDiagnostic OpState::emitError(const Twine &message) {
-  return getOperation()->emitError(message);
-}
-
-/// Emit an error with the op name prefixed, like "'dim' op " which is
-/// convenient for verifiers.
-InFlightDiagnostic OpState::emitOpError(const Twine &message) {
-  return getOperation()->emitOpError(message);
-}
-
-/// Emit a warning about this operation, reporting up to any diagnostic
-/// handlers that may be listening.
-InFlightDiagnostic OpState::emitWarning(const Twine &message) {
-  return getOperation()->emitWarning(message);
-}
-
-/// Emit a remark about this operation, reporting up to any diagnostic
-/// handlers that may be listening.
-InFlightDiagnostic OpState::emitRemark(const Twine &message) {
-  return getOperation()->emitRemark(message);
-}
-
-//===----------------------------------------------------------------------===//
-// Op Trait implementations
-//===----------------------------------------------------------------------===//
-
-LogicalResult OpTrait::impl::verifyZeroOperands(Operation *op) {
-  if (op->getNumOperands() != 0)
-    return op->emitOpError() << "requires zero operands";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyOneOperand(Operation *op) {
-  if (op->getNumOperands() != 1)
-    return op->emitOpError() << "requires a single operand";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyNOperands(Operation *op,
-                                             unsigned numOperands) {
-  if (op->getNumOperands() != numOperands) {
-    return op->emitOpError() << "expected " << numOperands
-                             << " operands, but found " << op->getNumOperands();
-  }
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyAtLeastNOperands(Operation *op,
-                                                    unsigned numOperands) {
-  if (op->getNumOperands() < numOperands)
-    return op->emitOpError()
-           << "expected " << numOperands << " or more operands";
-  return success();
-}
-
-/// If this is a vector type, or a tensor type, return the scalar element type
-/// that it is built around, otherwise return the type unmodified.
-static Type getTensorOrVectorElementType(Type type) {
-  if (auto vec = type.dyn_cast<VectorType>())
-    return vec.getElementType();
-
-  // Look through tensor<vector<...>> to find the underlying element type.
-  if (auto tensor = type.dyn_cast<TensorType>())
-    return getTensorOrVectorElementType(tensor.getElementType());
-  return type;
-}
-
-LogicalResult OpTrait::impl::verifyOperandsAreIntegerLike(Operation *op) {
-  for (auto opType : op->getOperandTypes()) {
-    auto type = getTensorOrVectorElementType(opType);
-    if (!type.isIntOrIndex())
-      return op->emitOpError() << "requires an integer or index type";
-  }
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyOperandsAreFloatLike(Operation *op) {
-  for (auto opType : op->getOperandTypes()) {
-    auto type = getTensorOrVectorElementType(opType);
-    if (!type.isa<FloatType>())
-      return op->emitOpError("requires a float type");
-  }
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifySameTypeOperands(Operation *op) {
-  // Zero or one operand always have the "same" type.
-  unsigned nOperands = op->getNumOperands();
-  if (nOperands < 2)
-    return success();
-
-  auto type = op->getOperand(0)->getType();
-  for (auto opType : llvm::drop_begin(op->getOperandTypes(), 1))
-    if (opType != type)
-      return op->emitOpError() << "requires all operands to have the same type";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyZeroResult(Operation *op) {
-  if (op->getNumResults() != 0)
-    return op->emitOpError() << "requires zero results";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyOneResult(Operation *op) {
-  if (op->getNumResults() != 1)
-    return op->emitOpError() << "requires one result";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyNResults(Operation *op,
-                                            unsigned numOperands) {
-  if (op->getNumResults() != numOperands)
-    return op->emitOpError() << "expected " << numOperands << " results";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyAtLeastNResults(Operation *op,
-                                                   unsigned numOperands) {
-  if (op->getNumResults() < numOperands)
-    return op->emitOpError()
-           << "expected " << numOperands << " or more results";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifySameOperandsShape(Operation *op) {
-  if (failed(verifyAtLeastNOperands(op, 1)))
-    return failure();
-
-  auto type = op->getOperand(0)->getType();
-  for (auto opType : llvm::drop_begin(op->getOperandTypes(), 1)) {
-    if (failed(verifyCompatibleShape(opType, type)))
-      return op->emitOpError() << "requires the same shape for all operands";
-  }
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifySameOperandsAndResultShape(Operation *op) {
-  if (failed(verifyAtLeastNOperands(op, 1)) ||
-      failed(verifyAtLeastNResults(op, 1)))
-    return failure();
-
-  auto type = op->getOperand(0)->getType();
-  for (auto resultType : op->getResultTypes()) {
-    if (failed(verifyCompatibleShape(resultType, type)))
-      return op->emitOpError()
-             << "requires the same shape for all operands and results";
-  }
-  for (auto opType : llvm::drop_begin(op->getOperandTypes(), 1)) {
-    if (failed(verifyCompatibleShape(opType, type)))
-      return op->emitOpError()
-             << "requires the same shape for all operands and results";
-  }
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifySameOperandsElementType(Operation *op) {
-  if (failed(verifyAtLeastNOperands(op, 1)))
-    return failure();
-  auto elementType = getElementTypeOrSelf(op->getOperand(0));
-
-  for (auto operand : llvm::drop_begin(op->getOperands(), 1)) {
-    if (getElementTypeOrSelf(operand) != elementType)
-      return op->emitOpError("requires the same element type for all operands");
-  }
-
-  return success();
-}
-
-LogicalResult
-OpTrait::impl::verifySameOperandsAndResultElementType(Operation *op) {
-  if (failed(verifyAtLeastNOperands(op, 1)) ||
-      failed(verifyAtLeastNResults(op, 1)))
-    return failure();
-
-  auto elementType = getElementTypeOrSelf(op->getResult(0));
-
-  // Verify result element type matches first result's element type.
-  for (auto result : llvm::drop_begin(op->getResults(), 1)) {
-    if (getElementTypeOrSelf(result) != elementType)
-      return op->emitOpError(
-          "requires the same element type for all operands and results");
-  }
-
-  // Verify operand's element type matches first result's element type.
-  for (auto operand : op->getOperands()) {
-    if (getElementTypeOrSelf(operand) != elementType)
-      return op->emitOpError(
-          "requires the same element type for all operands and results");
-  }
-
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifySameOperandsAndResultType(Operation *op) {
-  if (failed(verifyAtLeastNOperands(op, 1)) ||
-      failed(verifyAtLeastNResults(op, 1)))
-    return failure();
-
-  auto type = op->getResult(0)->getType();
-  auto elementType = getElementTypeOrSelf(type);
-  for (auto resultType : llvm::drop_begin(op->getResultTypes(), 1)) {
-    if (getElementTypeOrSelf(resultType) != elementType ||
-        failed(verifyCompatibleShape(resultType, type)))
-      return op->emitOpError()
-             << "requires the same type for all operands and results";
-  }
-  for (auto opType : op->getOperandTypes()) {
-    if (getElementTypeOrSelf(opType) != elementType ||
-        failed(verifyCompatibleShape(opType, type)))
-      return op->emitOpError()
-             << "requires the same type for all operands and results";
-  }
-  return success();
-}
-
-static LogicalResult verifySuccessor(Operation *op, unsigned succNo) {
-  Operation::operand_range operands = op->getSuccessorOperands(succNo);
-  unsigned operandCount = op->getNumSuccessorOperands(succNo);
-  Block *destBB = op->getSuccessor(succNo);
-  if (operandCount != destBB->getNumArguments())
-    return op->emitError() << "branch has " << operandCount
-                           << " operands for successor #" << succNo
-                           << ", but target block has "
-                           << destBB->getNumArguments();
-
-  auto operandIt = operands.begin();
-  for (unsigned i = 0, e = operandCount; i != e; ++i, ++operandIt) {
-    if ((*operandIt)->getType() != destBB->getArgument(i)->getType())
-      return op->emitError() << "type mismatch for bb argument #" << i
-                             << " of successor #" << succNo;
-  }
-
-  return success();
-}
-
-static LogicalResult verifyTerminatorSuccessors(Operation *op) {
-  auto *parent = op->getParentRegion();
-
-  // Verify that the operands lines up with the BB arguments in the successor.
-  for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i) {
-    auto *succ = op->getSuccessor(i);
-    if (succ->getParent() != parent)
-      return op->emitError("reference to block defined in another region");
-    if (failed(verifySuccessor(op, i)))
-      return failure();
-  }
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyIsTerminator(Operation *op) {
-  Block *block = op->getBlock();
-  // Verify that the operation is at the end of the respective parent block.
-  if (!block || &block->back() != op)
-    return op->emitOpError("must be the last operation in the parent block");
-
-  // Verify the state of the successor blocks.
-  if (op->getNumSuccessors() != 0 && failed(verifyTerminatorSuccessors(op)))
-    return failure();
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyResultsAreBoolLike(Operation *op) {
-  for (auto resultType : op->getResultTypes()) {
-    auto elementType = getTensorOrVectorElementType(resultType);
-    bool isBoolType = elementType.isInteger(1);
-    if (!isBoolType)
-      return op->emitOpError() << "requires a bool result type";
-  }
-
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyResultsAreFloatLike(Operation *op) {
-  for (auto resultType : op->getResultTypes())
-    if (!getTensorOrVectorElementType(resultType).isa<FloatType>())
-      return op->emitOpError() << "requires a floating point type";
-
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyResultsAreIntegerLike(Operation *op) {
-  for (auto resultType : op->getResultTypes())
-    if (!getTensorOrVectorElementType(resultType).isIntOrIndex())
-      return op->emitOpError() << "requires an integer or index type";
-  return success();
-}
-
-static LogicalResult verifyValueSizeAttr(Operation *op, StringRef attrName,
-                                         bool isOperand) {
-  auto sizeAttr = op->getAttrOfType<DenseIntElementsAttr>(attrName);
-  if (!sizeAttr)
-    return op->emitOpError("requires 1D vector attribute '") << attrName << "'";
-
-  auto sizeAttrType = sizeAttr.getType().dyn_cast<VectorType>();
-  if (!sizeAttrType || sizeAttrType.getRank() != 1)
-    return op->emitOpError("requires 1D vector attribute '") << attrName << "'";
-
-  if (llvm::any_of(sizeAttr.getIntValues(), [](const APInt &element) {
-        return !element.isNonNegative();
-      }))
-    return op->emitOpError("'")
-           << attrName << "' attribute cannot have negative elements";
-
-  size_t totalCount = std::accumulate(
-      sizeAttr.begin(), sizeAttr.end(), 0,
-      [](unsigned all, APInt one) { return all + one.getZExtValue(); });
-
-  if (isOperand && totalCount != op->getNumOperands())
-    return op->emitOpError("operand count (")
-           << op->getNumOperands() << ") does not match with the total size ("
-           << totalCount << ") specified in attribute '" << attrName << "'";
-  else if (!isOperand && totalCount != op->getNumResults())
-    return op->emitOpError("result count (")
-           << op->getNumResults() << ") does not match with the total size ("
-           << totalCount << ") specified in attribute '" << attrName << "'";
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifyOperandSizeAttr(Operation *op,
-                                                   StringRef attrName) {
-  return verifyValueSizeAttr(op, attrName, /*isOperand=*/true);
-}
-
-LogicalResult OpTrait::impl::verifyResultSizeAttr(Operation *op,
-                                                  StringRef attrName) {
-  return verifyValueSizeAttr(op, attrName, /*isOperand=*/false);
-}
-
-//===----------------------------------------------------------------------===//
-// BinaryOp implementation
-//===----------------------------------------------------------------------===//
-
-// These functions are out-of-line implementations of the methods in BinaryOp,
-// which avoids them being template instantiated/duplicated.
-
-void impl::buildBinaryOp(Builder *builder, OperationState &result, Value *lhs,
-                         Value *rhs) {
-  assert(lhs->getType() == rhs->getType());
-  result.addOperands({lhs, rhs});
-  result.types.push_back(lhs->getType());
-}
-
-ParseResult impl::parseOneResultSameOperandTypeOp(OpAsmParser &parser,
-                                                  OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 2> ops;
-  Type type;
-  return failure(parser.parseOperandList(ops) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(type) ||
-                 parser.resolveOperands(ops, type, result.operands) ||
-                 parser.addTypeToList(type, result.types));
-}
-
-void impl::printOneResultOp(Operation *op, OpAsmPrinter &p) {
-  assert(op->getNumResults() == 1 && "op should have one result");
-
-  // If not all the operand and result types are the same, just use the
-  // generic assembly form to avoid omitting information in printing.
-  auto resultType = op->getResult(0)->getType();
-  if (llvm::any_of(op->getOperandTypes(),
-                   [&](Type type) { return type != resultType; })) {
-    p.printGenericOp(op);
-    return;
-  }
-
-  p << op->getName() << ' ';
-  p.printOperands(op->getOperands());
-  p.printOptionalAttrDict(op->getAttrs());
-  // Now we can output only one type for all operands and the result.
-  p << " : " << resultType;
-}
-
-//===----------------------------------------------------------------------===//
-// CastOp implementation
-//===----------------------------------------------------------------------===//
-
-void impl::buildCastOp(Builder *builder, OperationState &result, Value *source,
-                       Type destType) {
-  result.addOperands(source);
-  result.addTypes(destType);
-}
-
-ParseResult impl::parseCastOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType srcInfo;
-  Type srcType, dstType;
-  return failure(parser.parseOperand(srcInfo) ||
-                 parser.parseOptionalAttrDict(result.attributes) ||
-                 parser.parseColonType(srcType) ||
-                 parser.resolveOperand(srcInfo, srcType, result.operands) ||
-                 parser.parseKeywordType("to", dstType) ||
-                 parser.addTypeToList(dstType, result.types));
-}
-
-void impl::printCastOp(Operation *op, OpAsmPrinter &p) {
-  p << op->getName() << ' ' << *op->getOperand(0);
-  p.printOptionalAttrDict(op->getAttrs());
-  p << " : " << op->getOperand(0)->getType() << " to "
-    << op->getResult(0)->getType();
-}
-
-Value *impl::foldCastOp(Operation *op) {
-  // Identity cast
-  if (op->getOperand(0)->getType() == op->getResult(0)->getType())
-    return op->getOperand(0);
-  return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// Misc. utils
-//===----------------------------------------------------------------------===//
-
-/// Insert an operation, generated by `buildTerminatorOp`, at the end of the
-/// region's only block if it does not have a terminator already. If the region
-/// is empty, insert a new block first. `buildTerminatorOp` should return the
-/// terminator operation to insert.
-void impl::ensureRegionTerminator(
-    Region &region, Location loc,
-    llvm::function_ref<Operation *()> buildTerminatorOp) {
-  if (region.empty())
-    region.push_back(new Block);
-
-  Block &block = region.back();
-  if (!block.empty() && block.back().isKnownTerminator())
-    return;
-
-  block.push_back(buildTerminatorOp());
-}
-
-//===----------------------------------------------------------------------===//
-// UseIterator
-//===----------------------------------------------------------------------===//
-
-UseIterator::UseIterator(Operation *op, bool end)
-    : op(op), res(end ? op->result_end() : op->result_begin()) {
-  // Only initialize current use if there are results/can be uses.
-  if (op->getNumResults())
-    skipOverResultsWithNoUsers();
-}
-
-UseIterator &UseIterator::operator++() {
-  // We increment over uses, if we reach the last use then move to next
-  // result.
-  if (use != (*res)->use_end())
-    ++use;
-  if (use == (*res)->use_end()) {
-    ++res;
-    skipOverResultsWithNoUsers();
-  }
-  return *this;
-}
-
-bool UseIterator::operator==(const UseIterator &other) const {
-  if (op != other.op)
-    return false;
-  if (op->getNumResults() == 0)
-    return true;
-  return res == other.res && use == other.use;
-}
-
-bool UseIterator::operator!=(const UseIterator &other) const {
-  return !(*this == other);
-}
-
-void UseIterator::skipOverResultsWithNoUsers() {
-  while (res != op->result_end() && (*res)->use_empty())
-    ++res;
-
-  // If we are at the last result, then set use to first use of
-  // first result (sentinel value used for end).
-  if (res == op->result_end())
-    use = {};
-  else
-    use = (*res)->use_begin();
-}
diff --git a/third_party/mlir/lib/IR/OperationSupport.cpp b/third_party/mlir/lib/IR/OperationSupport.cpp
deleted file mode 100644
index 256a261acd8..00000000000
--- a/third_party/mlir/lib/IR/OperationSupport.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-//===- OperationSupport.cpp -----------------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains out-of-line implementations of the support types that
-// Operation and related classes build on top of.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Operation.h"
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// OperationState
-//===----------------------------------------------------------------------===//
-
-OperationState::OperationState(Location location, StringRef name)
-    : location(location), name(name, location->getContext()) {}
-
-OperationState::OperationState(Location location, OperationName name)
-    : location(location), name(name) {}
-
-OperationState::OperationState(Location location, StringRef name,
-                               ValueRange operands, ArrayRef<Type> types,
-                               ArrayRef<NamedAttribute> attributes,
-                               ArrayRef<Block *> successors,
-                               MutableArrayRef<std::unique_ptr<Region>> regions,
-                               bool resizableOperandList)
-    : location(location), name(name, location->getContext()),
-      operands(operands.begin(), operands.end()),
-      types(types.begin(), types.end()),
-      attributes(attributes.begin(), attributes.end()),
-      successors(successors.begin(), successors.end()) {
-  for (std::unique_ptr<Region> &r : regions)
-    this->regions.push_back(std::move(r));
-}
-
-void OperationState::addOperands(ValueRange newOperands) {
-  assert(successors.empty() && "Non successor operands should be added first.");
-  operands.append(newOperands.begin(), newOperands.end());
-}
-
-void OperationState::addSuccessor(Block *successor, ValueRange succOperands) {
-  successors.push_back(successor);
-  // Insert a sentinel operand to mark a barrier between successor operands.
-  operands.push_back(nullptr);
-  operands.append(succOperands.begin(), succOperands.end());
-}
-
-Region *OperationState::addRegion() {
-  regions.emplace_back(new Region);
-  return regions.back().get();
-}
-
-void OperationState::addRegion(std::unique_ptr<Region> &&region) {
-  regions.push_back(std::move(region));
-}
-
-//===----------------------------------------------------------------------===//
-// OperandStorage
-//===----------------------------------------------------------------------===//
-
-/// Replace the operands contained in the storage with the ones provided in
-/// 'operands'.
-void detail::OperandStorage::setOperands(Operation *owner,
-                                         ValueRange operands) {
-  // If the number of operands is less than or equal to the current amount, we
-  // can just update in place.
-  if (operands.size() <= numOperands) {
-    auto opOperands = getOperands();
-
-    // If the number of new operands is less than the current count, then remove
-    // any extra operands.
-    for (unsigned i = operands.size(); i != numOperands; ++i)
-      opOperands[i].~OpOperand();
-
-    // Set the operands in place.
-    numOperands = operands.size();
-    for (unsigned i = 0; i != numOperands; ++i)
-      opOperands[i].set(operands[i]);
-    return;
-  }
-
-  // Otherwise, we need to be resizable.
-  assert(resizable && "Only resizable operations may add operands");
-
-  // Grow the capacity if necessary.
-  auto &resizeUtil = getResizableStorage();
-  if (resizeUtil.capacity < operands.size())
-    grow(resizeUtil, operands.size());
-
-  // Set the operands.
-  OpOperand *opBegin = getRawOperands();
-  for (unsigned i = 0; i != numOperands; ++i)
-    opBegin[i].set(operands[i]);
-  for (unsigned e = operands.size(); numOperands != e; ++numOperands)
-    new (&opBegin[numOperands]) OpOperand(owner, operands[numOperands]);
-}
-
-/// Erase an operand held by the storage.
-void detail::OperandStorage::eraseOperand(unsigned index) {
-  assert(index < size());
-  auto operands = getOperands();
-  --numOperands;
-
-  // Shift all operands down by 1 if the operand to remove is not at the end.
-  auto indexIt = std::next(operands.begin(), index);
-  if (index != numOperands)
-    std::rotate(indexIt, std::next(indexIt), operands.end());
-  operands[numOperands].~OpOperand();
-}
-
-/// Grow the internal operand storage.
-void detail::OperandStorage::grow(ResizableStorage &resizeUtil,
-                                  size_t minSize) {
-  // Allocate a new storage array.
-  resizeUtil.capacity =
-      std::max(size_t(llvm::NextPowerOf2(resizeUtil.capacity + 2)), minSize);
-  OpOperand *newStorage = static_cast<OpOperand *>(
-      llvm::safe_malloc(resizeUtil.capacity * sizeof(OpOperand)));
-
-  // Move the current operands to the new storage.
-  auto operands = getOperands();
-  std::uninitialized_copy(std::make_move_iterator(operands.begin()),
-                          std::make_move_iterator(operands.end()), newStorage);
-
-  // Destroy the original operands and update the resizable storage pointer.
-  for (auto &operand : operands)
-    operand.~OpOperand();
-  resizeUtil.setDynamicStorage(newStorage);
-}
-
-//===----------------------------------------------------------------------===//
-// Operation Value-Iterators
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// OperandRange
-
-OperandRange::OperandRange(Operation *op)
-    : OperandRange(op->getOpOperands().data(), op->getNumOperands()) {}
-
-//===----------------------------------------------------------------------===//
-// ResultRange
-
-ResultRange::ResultRange(Operation *op)
-    : ResultRange(op->getOpResults().data(), op->getNumResults()) {}
-
-//===----------------------------------------------------------------------===//
-// ValueRange
-
-ValueRange::ValueRange(ArrayRef<Value *> values)
-    : ValueRange(values.data(), values.size()) {}
-ValueRange::ValueRange(OperandRange values)
-    : ValueRange(values.begin().getBase(), values.size()) {}
-ValueRange::ValueRange(ResultRange values)
-    : ValueRange(values.begin().getBase(), values.size()) {}
-
-/// See `detail::indexed_accessor_range_base` for details.
-ValueRange::OwnerT ValueRange::offset_base(const OwnerT &owner,
-                                           ptrdiff_t index) {
-  if (OpOperand *operand = owner.dyn_cast<OpOperand *>())
-    return operand + index;
-  if (OpResult *result = owner.dyn_cast<OpResult *>())
-    return result + index;
-  return owner.get<Value *const *>() + index;
-}
-/// See `detail::indexed_accessor_range_base` for details.
-Value *ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) {
-  // Operands access the held value via 'get'.
-  if (OpOperand *operand = owner.dyn_cast<OpOperand *>())
-    return operand[index].get();
-  // An OpResult is a value, so we can return it directly.
-  if (OpResult *result = owner.dyn_cast<OpResult *>())
-    return &result[index];
-  // Otherwise, this is a raw value array so just index directly.
-  return owner.get<Value *const *>()[index];
-}
diff --git a/third_party/mlir/lib/IR/PatternMatch.cpp b/third_party/mlir/lib/IR/PatternMatch.cpp
deleted file mode 100644
index 3887a0308b0..00000000000
--- a/third_party/mlir/lib/IR/PatternMatch.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-//===- PatternMatch.cpp - Base classes for pattern match ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/Value.h"
-using namespace mlir;
-
-PatternBenefit::PatternBenefit(unsigned benefit) : representation(benefit) {
-  assert(representation == benefit && benefit != ImpossibleToMatchSentinel &&
-         "This pattern match benefit is too large to represent");
-}
-
-unsigned short PatternBenefit::getBenefit() const {
-  assert(representation != ImpossibleToMatchSentinel &&
-         "Pattern doesn't match");
-  return representation;
-}
-
-//===----------------------------------------------------------------------===//
-// Pattern implementation
-//===----------------------------------------------------------------------===//
-
-Pattern::Pattern(StringRef rootName, PatternBenefit benefit,
-                 MLIRContext *context)
-    : rootKind(OperationName(rootName, context)), benefit(benefit) {}
-
-// Out-of-line vtable anchor.
-void Pattern::anchor() {}
-
-//===----------------------------------------------------------------------===//
-// RewritePattern and PatternRewriter implementation
-//===----------------------------------------------------------------------===//
-
-void RewritePattern::rewrite(Operation *op, std::unique_ptr<PatternState> state,
-                             PatternRewriter &rewriter) const {
-  rewrite(op, rewriter);
-}
-
-void RewritePattern::rewrite(Operation *op, PatternRewriter &rewriter) const {
-  llvm_unreachable("need to implement either matchAndRewrite or one of the "
-                   "rewrite functions!");
-}
-
-PatternMatchResult RewritePattern::match(Operation *op) const {
-  llvm_unreachable("need to implement either match or matchAndRewrite!");
-}
-
-/// Patterns must specify the root operation name they match against, and can
-/// also specify the benefit of the pattern matching. They can also specify the
-/// names of operations that may be generated during a successful rewrite.
-RewritePattern::RewritePattern(StringRef rootName,
-                               ArrayRef<StringRef> generatedNames,
-                               PatternBenefit benefit, MLIRContext *context)
-    : Pattern(rootName, benefit, context) {
-  generatedOps.reserve(generatedNames.size());
-  std::transform(generatedNames.begin(), generatedNames.end(),
-                 std::back_inserter(generatedOps), [context](StringRef name) {
-                   return OperationName(name, context);
-                 });
-}
-
-PatternRewriter::~PatternRewriter() {
-  // Out of line to provide a vtable anchor for the class.
-}
-
-/// This method performs the final replacement for a pattern, where the
-/// results of the operation are updated to use the specified list of SSA
-/// values.  In addition to replacing and removing the specified operation,
-/// clients can specify a list of other nodes that this replacement may make
-/// (perhaps transitively) dead.  If any of those ops are dead, this will
-/// remove them as well.
-void PatternRewriter::replaceOp(Operation *op, ValueRange newValues,
-                                ValueRange valuesToRemoveIfDead) {
-  // Notify the rewriter subclass that we're about to replace this root.
-  notifyRootReplaced(op);
-
-  assert(op->getNumResults() == newValues.size() &&
-         "incorrect # of replacement values");
-  op->replaceAllUsesWith(newValues);
-
-  notifyOperationRemoved(op);
-  op->erase();
-
-  // TODO: Process the valuesToRemoveIfDead list, removing things and calling
-  // the notifyOperationRemoved hook in the process.
-}
-
-/// This method erases an operation that is known to have no uses. The uses of
-/// the given operation *must* be known to be dead.
-void PatternRewriter::eraseOp(Operation *op) {
-  assert(op->use_empty() && "expected 'op' to have no uses");
-  notifyOperationRemoved(op);
-  op->erase();
-}
-
-/// Merge the operations of block 'source' into the end of block 'dest'.
-/// 'source's predecessors must be empty or only contain 'dest`.
-/// 'argValues' is used to replace the block arguments of 'source' after
-/// merging.
-void PatternRewriter::mergeBlocks(Block *source, Block *dest,
-                                  ValueRange argValues) {
-  assert(llvm::all_of(source->getPredecessors(),
-                      [dest](Block *succ) { return succ == dest; }) &&
-         "expected 'source' to have no predecessors or only 'dest'");
-  assert(argValues.size() == source->getNumArguments() &&
-         "incorrect # of argument replacement values");
-
-  // Replace all of the successor arguments with the provided values.
-  for (auto it : llvm::zip(source->getArguments(), argValues))
-    std::get<0>(it)->replaceAllUsesWith(std::get<1>(it));
-
-  // Splice the operations of the 'source' block into the 'dest' block and erase
-  // it.
-  dest->getOperations().splice(dest->end(), source->getOperations());
-  source->dropAllUses();
-  source->erase();
-}
-
-/// Split the operations starting at "before" (inclusive) out of the given
-/// block into a new block, and return it.
-Block *PatternRewriter::splitBlock(Block *block, Block::iterator before) {
-  return block->splitBlock(before);
-}
-
-/// op and newOp are known to have the same number of results, replace the
-/// uses of op with uses of newOp
-void PatternRewriter::replaceOpWithResultsOfAnotherOp(
-    Operation *op, Operation *newOp, ValueRange valuesToRemoveIfDead) {
-  assert(op->getNumResults() == newOp->getNumResults() &&
-         "replacement op doesn't match results of original op");
-  if (op->getNumResults() == 1)
-    return replaceOp(op, newOp->getResult(0), valuesToRemoveIfDead);
-  return replaceOp(op, newOp->getResults(), valuesToRemoveIfDead);
-}
-
-/// Move the blocks that belong to "region" before the given position in
-/// another region.  The two regions must be different.  The caller is in
-/// charge to update create the operation transferring the control flow to the
-/// region and pass it the correct block arguments.
-void PatternRewriter::inlineRegionBefore(Region &region, Region &parent,
-                                         Region::iterator before) {
-  parent.getBlocks().splice(before, region.getBlocks());
-}
-void PatternRewriter::inlineRegionBefore(Region &region, Block *before) {
-  inlineRegionBefore(region, *before->getParent(), before->getIterator());
-}
-
-/// Clone the blocks that belong to "region" before the given position in
-/// another region "parent". The two regions must be different. The caller is
-/// responsible for creating or updating the operation transferring flow of
-/// control to the region and passing it the correct block arguments.
-void PatternRewriter::cloneRegionBefore(Region &region, Region &parent,
-                                        Region::iterator before,
-                                        BlockAndValueMapping &mapping) {
-  region.cloneInto(&parent, before, mapping);
-}
-void PatternRewriter::cloneRegionBefore(Region &region, Region &parent,
-                                        Region::iterator before) {
-  BlockAndValueMapping mapping;
-  cloneRegionBefore(region, parent, before, mapping);
-}
-void PatternRewriter::cloneRegionBefore(Region &region, Block *before) {
-  cloneRegionBefore(region, *before->getParent(), before->getIterator());
-}
-
-/// This method is used as the final notification hook for patterns that end
-/// up modifying the pattern root in place, by changing its operands.  This is
-/// a minor efficiency win (it avoids creating a new operation and removing
-/// the old one) but also often allows simpler code in the client.
-///
-/// The opsToRemoveIfDead list is an optional list of nodes that the rewriter
-/// should remove if they are dead at this point.
-///
-void PatternRewriter::updatedRootInPlace(Operation *op,
-                                         ValueRange valuesToRemoveIfDead) {
-  // Notify the rewriter subclass that we're about to replace this root.
-  notifyRootUpdated(op);
-
-  // TODO: Process the valuesToRemoveIfDead list, removing things and calling
-  // the notifyOperationRemoved hook in the process.
-}
-
-//===----------------------------------------------------------------------===//
-// PatternMatcher implementation
-//===----------------------------------------------------------------------===//
-
-RewritePatternMatcher::RewritePatternMatcher(
-    const OwningRewritePatternList &patterns) {
-  for (auto &pattern : patterns)
-    this->patterns.push_back(pattern.get());
-
-  // Sort the patterns by benefit to simplify the matching logic.
-  std::stable_sort(this->patterns.begin(), this->patterns.end(),
-                   [](RewritePattern *l, RewritePattern *r) {
-                     return r->getBenefit() < l->getBenefit();
-                   });
-}
-
-/// Try to match the given operation to a pattern and rewrite it.
-bool RewritePatternMatcher::matchAndRewrite(Operation *op,
-                                            PatternRewriter &rewriter) {
-  for (auto *pattern : patterns) {
-    // Ignore patterns that are for the wrong root or are impossible to match.
-    if (pattern->getRootKind() != op->getName() ||
-        pattern->getBenefit().isImpossibleToMatch())
-      continue;
-
-    // Try to match and rewrite this pattern. The patterns are sorted by
-    // benefit, so if we match we can immediately rewrite and return.
-    if (pattern->matchAndRewrite(op, rewriter))
-      return true;
-  }
-  return false;
-}
diff --git a/third_party/mlir/lib/IR/Region.cpp b/third_party/mlir/lib/IR/Region.cpp
deleted file mode 100644
index c588e567bc3..00000000000
--- a/third_party/mlir/lib/IR/Region.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-//===- Region.cpp - MLIR Region Class -------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Region.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Operation.h"
-using namespace mlir;
-
-Region::Region(Operation *container) : container(container) {}
-
-Region::~Region() {
-  // Operations may have cyclic references, which need to be dropped before we
-  // can start deleting them.
-  dropAllReferences();
-}
-
-/// Return the context this region is inserted in. The region must have a valid
-/// parent container.
-MLIRContext *Region::getContext() {
-  assert(container && "region is not attached to a container");
-  return container->getContext();
-}
-
-/// Return a location for this region. This is the location attached to the
-/// parent container. The region must have a valid parent container.
-Location Region::getLoc() {
-  assert(container && "region is not attached to a container");
-  return container->getLoc();
-}
-
-Region *Region::getParentRegion() {
-  assert(container && "region is not attached to a container");
-  return container->getParentRegion();
-}
-
-Operation *Region::getParentOp() { return container; }
-
-bool Region::isProperAncestor(Region *other) {
-  if (this == other)
-    return false;
-
-  while ((other = other->getParentRegion())) {
-    if (this == other)
-      return true;
-  }
-  return false;
-}
-
-/// Return the number of this region in the parent operation.
-unsigned Region::getRegionNumber() {
-  // Regions are always stored consecutively, so use pointer subtraction to
-  // figure out what number this is.
-  return this - &getParentOp()->getRegions()[0];
-}
-
-/// Clone the internal blocks from this region into `dest`. Any
-/// cloned blocks are appended to the back of dest.
-void Region::cloneInto(Region *dest, BlockAndValueMapping &mapper) {
-  assert(dest && "expected valid region to clone into");
-  cloneInto(dest, dest->end(), mapper);
-}
-
-/// Clone this region into 'dest' before the given position in 'dest'.
-void Region::cloneInto(Region *dest, Region::iterator destPos,
-                       BlockAndValueMapping &mapper) {
-  assert(dest && "expected valid region to clone into");
-  assert(this != dest && "cannot clone region into itself");
-
-  // If the list is empty there is nothing to clone.
-  if (empty())
-    return;
-
-  for (Block &block : *this) {
-    Block *newBlock = new Block();
-    mapper.map(&block, newBlock);
-
-    // Clone the block arguments. The user might be deleting arguments to the
-    // block by specifying them in the mapper. If so, we don't add the
-    // argument to the cloned block.
-    for (auto *arg : block.getArguments())
-      if (!mapper.contains(arg))
-        mapper.map(arg, newBlock->addArgument(arg->getType()));
-
-    // Clone and remap the operations within this block.
-    for (auto &op : block)
-      newBlock->push_back(op.clone(mapper));
-
-    dest->getBlocks().insert(destPos, newBlock);
-  }
-
-  // Now that each of the blocks have been cloned, go through and remap the
-  // operands of each of the operations.
-  auto remapOperands = [&](Operation *op) {
-    for (auto &operand : op->getOpOperands())
-      if (auto *mappedOp = mapper.lookupOrNull(operand.get()))
-        operand.set(mappedOp);
-    for (auto &succOp : op->getBlockOperands())
-      if (auto *mappedOp = mapper.lookupOrNull(succOp.get()))
-        succOp.set(mappedOp);
-  };
-
-  for (iterator it(mapper.lookup(&front())); it != destPos; ++it)
-    it->walk(remapOperands);
-}
-
-void Region::dropAllReferences() {
-  for (Block &b : *this)
-    b.dropAllReferences();
-}
-
-/// Check if there are any values used by operations in `region` defined
-/// outside its ancestor region `limit`.  That is, given `A{B{C{}}}` with region
-/// `C` and limit `B`, the values defined in `B` can be used but the values
-/// defined in `A` cannot.  Emit errors if `noteLoc` is provided; this location
-/// is used to point to the operation containing the region, the actual error is
-/// reported at the operation with an offending use.
-static bool isIsolatedAbove(Region &region, Region &limit,
-                            llvm::Optional<Location> noteLoc) {
-  assert(limit.isAncestor(&region) &&
-         "expected isolation limit to be an ancestor of the given region");
-
-  // List of regions to analyze.  Each region is processed independently, with
-  // respect to the common `limit` region, so we can look at them in any order.
-  // Therefore, use a simple vector and push/pop back the current region.
-  SmallVector<Region *, 8> pendingRegions;
-  pendingRegions.push_back(&region);
-
-  // Traverse all operations in the region.
-  while (!pendingRegions.empty()) {
-    for (Block &block : *pendingRegions.pop_back_val()) {
-      for (Operation &op : block) {
-        for (Value *operand : op.getOperands()) {
-          // operand should be non-null here if the IR is well-formed. But
-          // we don't assert here as this function is called from the verifier
-          // and so could be called on invalid IR.
-          if (!operand) {
-            if (noteLoc)
-              op.emitOpError("block's operand not defined").attachNote(noteLoc);
-            return false;
-          }
-
-          // Check that any value that is used by an operation is defined in the
-          // same region as either an operation result or a block argument.
-          if (operand->getParentRegion()->isProperAncestor(&limit)) {
-            if (noteLoc) {
-              op.emitOpError("using value defined outside the region")
-                      .attachNote(noteLoc)
-                  << "required by region isolation constraints";
-            }
-            return false;
-          }
-        }
-        // Schedule any regions the operations contain for further checking.
-        pendingRegions.reserve(pendingRegions.size() + op.getNumRegions());
-        for (Region &subRegion : op.getRegions())
-          pendingRegions.push_back(&subRegion);
-      }
-    }
-  }
-  return true;
-}
-
-bool Region::isIsolatedFromAbove(llvm::Optional<Location> noteLoc) {
-  return isIsolatedAbove(*this, *this, noteLoc);
-}
-
-Region *llvm::ilist_traits<::mlir::Block>::getParentRegion() {
-  size_t Offset(
-      size_t(&((Region *)nullptr->*Region::getSublistAccess(nullptr))));
-  iplist<Block> *Anchor(static_cast<iplist<Block> *>(this));
-  return reinterpret_cast<Region *>(reinterpret_cast<char *>(Anchor) - Offset);
-}
-
-/// This is a trait method invoked when a basic block is added to a region.
-/// We keep the region pointer up to date.
-void llvm::ilist_traits<::mlir::Block>::addNodeToList(Block *block) {
-  assert(!block->getParent() && "already in a region!");
-  block->parentValidOpOrderPair.setPointer(getParentRegion());
-}
-
-/// This is a trait method invoked when an operation is removed from a
-/// region.  We keep the region pointer up to date.
-void llvm::ilist_traits<::mlir::Block>::removeNodeFromList(Block *block) {
-  assert(block->getParent() && "not already in a region!");
-  block->parentValidOpOrderPair.setPointer(nullptr);
-}
-
-/// This is a trait method invoked when an operation is moved from one block
-/// to another.  We keep the block pointer up to date.
-void llvm::ilist_traits<::mlir::Block>::transferNodesFromList(
-    ilist_traits<Block> &otherList, block_iterator first, block_iterator last) {
-  // If we are transferring operations within the same function, the parent
-  // pointer doesn't need to be updated.
-  auto *curParent = getParentRegion();
-  if (curParent == otherList.getParentRegion())
-    return;
-
-  // Update the 'parent' member of each Block.
-  for (; first != last; ++first)
-    first->parentValidOpOrderPair.setPointer(curParent);
-}
-
-//===----------------------------------------------------------------------===//
-// RegionRange
-//===----------------------------------------------------------------------===//
-
-RegionRange::RegionRange(MutableArrayRef<Region> regions)
-    : RegionRange(regions.data(), regions.size()) {}
-RegionRange::RegionRange(ArrayRef<std::unique_ptr<Region>> regions)
-    : RegionRange(regions.data(), regions.size()) {}
-
-/// See `detail::indexed_accessor_range_base` for details.
-RegionRange::OwnerT RegionRange::offset_base(const OwnerT &owner,
-                                             ptrdiff_t index) {
-  if (auto *operand = owner.dyn_cast<const std::unique_ptr<Region> *>())
-    return operand + index;
-  return &owner.get<Region *>()[index];
-}
-/// See `detail::indexed_accessor_range_base` for details.
-Region *RegionRange::dereference_iterator(const OwnerT &owner,
-                                          ptrdiff_t index) {
-  if (auto *operand = owner.dyn_cast<const std::unique_ptr<Region> *>())
-    return operand[index].get();
-  return &owner.get<Region *>()[index];
-}
diff --git a/third_party/mlir/lib/IR/StandardTypes.cpp b/third_party/mlir/lib/IR/StandardTypes.cpp
deleted file mode 100644
index 8a47c5b0b41..00000000000
--- a/third_party/mlir/lib/IR/StandardTypes.cpp
+++ /dev/null
@@ -1,729 +0,0 @@
-//===- StandardTypes.cpp - MLIR Standard Type Classes ---------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/StandardTypes.h"
-#include "TypeDetail.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Twine.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-//===----------------------------------------------------------------------===//
-// Type
-//===----------------------------------------------------------------------===//
-
-bool Type::isBF16() { return getKind() == StandardTypes::BF16; }
-bool Type::isF16() { return getKind() == StandardTypes::F16; }
-bool Type::isF32() { return getKind() == StandardTypes::F32; }
-bool Type::isF64() { return getKind() == StandardTypes::F64; }
-
-bool Type::isIndex() { return isa<IndexType>(); }
-
-/// Return true if this is an integer type with the specified width.
-bool Type::isInteger(unsigned width) {
-  if (auto intTy = dyn_cast<IntegerType>())
-    return intTy.getWidth() == width;
-  return false;
-}
-
-bool Type::isIntOrIndex() { return isa<IndexType>() || isa<IntegerType>(); }
-
-bool Type::isIntOrIndexOrFloat() {
-  return isa<IndexType>() || isa<IntegerType>() || isa<FloatType>();
-}
-
-bool Type::isIntOrFloat() { return isa<IntegerType>() || isa<FloatType>(); }
-
-//===----------------------------------------------------------------------===//
-// Integer Type
-//===----------------------------------------------------------------------===//
-
-// static constexpr must have a definition (until in C++17 and inline variable).
-constexpr unsigned IntegerType::kMaxWidth;
-
-/// Verify the construction of an integer type.
-LogicalResult IntegerType::verifyConstructionInvariants(Optional<Location> loc,
-                                                        MLIRContext *context,
-                                                        unsigned width) {
-  if (width > IntegerType::kMaxWidth) {
-    return emitOptionalError(loc, "integer bitwidth is limited to ",
-                             IntegerType::kMaxWidth, " bits");
-  }
-  return success();
-}
-
-unsigned IntegerType::getWidth() const { return getImpl()->width; }
-
-//===----------------------------------------------------------------------===//
-// Float Type
-//===----------------------------------------------------------------------===//
-
-unsigned FloatType::getWidth() {
-  switch (getKind()) {
-  case StandardTypes::BF16:
-  case StandardTypes::F16:
-    return 16;
-  case StandardTypes::F32:
-    return 32;
-  case StandardTypes::F64:
-    return 64;
-  default:
-    llvm_unreachable("unexpected type");
-  }
-}
-
-/// Returns the floating semantics for the given type.
-const llvm::fltSemantics &FloatType::getFloatSemantics() {
-  if (isBF16())
-    // Treat BF16 like a double. This is unfortunate but BF16 fltSemantics is
-    // not defined in LLVM.
-    // TODO(jpienaar): add BF16 to LLVM? fltSemantics are internal to APFloat.cc
-    // else one could add it.
-    //  static const fltSemantics semBF16 = {127, -126, 8, 16};
-    return APFloat::IEEEdouble();
-  if (isF16())
-    return APFloat::IEEEhalf();
-  if (isF32())
-    return APFloat::IEEEsingle();
-  if (isF64())
-    return APFloat::IEEEdouble();
-  llvm_unreachable("non-floating point type used");
-}
-
-unsigned Type::getIntOrFloatBitWidth() {
-  assert(isIntOrFloat() && "only ints and floats have a bitwidth");
-  if (auto intType = dyn_cast<IntegerType>()) {
-    return intType.getWidth();
-  }
-
-  auto floatType = cast<FloatType>();
-  return floatType.getWidth();
-}
-
-//===----------------------------------------------------------------------===//
-// ShapedType
-//===----------------------------------------------------------------------===//
-constexpr int64_t ShapedType::kDynamicSize;
-constexpr int64_t ShapedType::kDynamicStrideOrOffset;
-
-Type ShapedType::getElementType() const {
-  return static_cast<ImplType *>(impl)->elementType;
-}
-
-unsigned ShapedType::getElementTypeBitWidth() const {
-  return getElementType().getIntOrFloatBitWidth();
-}
-
-int64_t ShapedType::getNumElements() const {
-  assert(hasStaticShape() && "cannot get element count of dynamic shaped type");
-  auto shape = getShape();
-  int64_t num = 1;
-  for (auto dim : shape)
-    num *= dim;
-  return num;
-}
-
-int64_t ShapedType::getRank() const { return getShape().size(); }
-
-bool ShapedType::hasRank() const { return !isa<UnrankedTensorType>(); }
-
-int64_t ShapedType::getDimSize(int64_t i) const {
-  assert(i >= 0 && i < getRank() && "invalid index for shaped type");
-  return getShape()[i];
-}
-
-unsigned ShapedType::getDynamicDimIndex(unsigned index) const {
-  assert(index < getRank() && "invalid index");
-  assert(ShapedType::isDynamic(getDimSize(index)) && "invalid index");
-  return llvm::count_if(getShape().take_front(index), ShapedType::isDynamic);
-}
-
-/// Get the number of bits require to store a value of the given shaped type.
-/// Compute the value recursively since tensors are allowed to have vectors as
-/// elements.
-int64_t ShapedType::getSizeInBits() const {
-  assert(hasStaticShape() &&
-         "cannot get the bit size of an aggregate with a dynamic shape");
-
-  auto elementType = getElementType();
-  if (elementType.isIntOrFloat())
-    return elementType.getIntOrFloatBitWidth() * getNumElements();
-
-  // Tensors can have vectors and other tensors as elements, other shaped types
-  // cannot.
-  assert(isa<TensorType>() && "unsupported element type");
-  assert((elementType.isa<VectorType>() || elementType.isa<TensorType>()) &&
-         "unsupported tensor element type");
-  return getNumElements() * elementType.cast<ShapedType>().getSizeInBits();
-}
-
-ArrayRef<int64_t> ShapedType::getShape() const {
-  switch (getKind()) {
-  case StandardTypes::Vector:
-    return cast<VectorType>().getShape();
-  case StandardTypes::RankedTensor:
-    return cast<RankedTensorType>().getShape();
-  case StandardTypes::MemRef:
-    return cast<MemRefType>().getShape();
-  default:
-    llvm_unreachable("not a ShapedType or not ranked");
-  }
-}
-
-int64_t ShapedType::getNumDynamicDims() const {
-  return llvm::count_if(getShape(), isDynamic);
-}
-
-bool ShapedType::hasStaticShape() const {
-  return hasRank() && llvm::none_of(getShape(), isDynamic);
-}
-
-bool ShapedType::hasStaticShape(ArrayRef<int64_t> shape) const {
-  return hasStaticShape() && getShape() == shape;
-}
-
-//===----------------------------------------------------------------------===//
-// VectorType
-//===----------------------------------------------------------------------===//
-
-VectorType VectorType::get(ArrayRef<int64_t> shape, Type elementType) {
-  return Base::get(elementType.getContext(), StandardTypes::Vector, shape,
-                   elementType);
-}
-
-VectorType VectorType::getChecked(ArrayRef<int64_t> shape, Type elementType,
-                                  Location location) {
-  return Base::getChecked(location, elementType.getContext(),
-                          StandardTypes::Vector, shape, elementType);
-}
-
-LogicalResult VectorType::verifyConstructionInvariants(Optional<Location> loc,
-                                                       MLIRContext *context,
-                                                       ArrayRef<int64_t> shape,
-                                                       Type elementType) {
-  if (shape.empty())
-    return emitOptionalError(loc,
-                             "vector types must have at least one dimension");
-
-  if (!isValidElementType(elementType))
-    return emitOptionalError(loc, "vector elements must be int or float type");
-
-  if (any_of(shape, [](int64_t i) { return i <= 0; }))
-    return emitOptionalError(loc,
-                             "vector types must have positive constant sizes");
-
-  return success();
-}
-
-ArrayRef<int64_t> VectorType::getShape() const { return getImpl()->getShape(); }
-
-//===----------------------------------------------------------------------===//
-// TensorType
-//===----------------------------------------------------------------------===//
-
-// Check if "elementType" can be an element type of a tensor. Emit errors if
-// location is not nullptr.  Returns failure if check failed.
-static inline LogicalResult checkTensorElementType(Optional<Location> location,
-                                                   MLIRContext *context,
-                                                   Type elementType) {
-  if (!TensorType::isValidElementType(elementType))
-    return emitOptionalError(location, "invalid tensor element type");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// RankedTensorType
-//===----------------------------------------------------------------------===//
-
-RankedTensorType RankedTensorType::get(ArrayRef<int64_t> shape,
-                                       Type elementType) {
-  return Base::get(elementType.getContext(), StandardTypes::RankedTensor, shape,
-                   elementType);
-}
-
-RankedTensorType RankedTensorType::getChecked(ArrayRef<int64_t> shape,
-                                              Type elementType,
-                                              Location location) {
-  return Base::getChecked(location, elementType.getContext(),
-                          StandardTypes::RankedTensor, shape, elementType);
-}
-
-LogicalResult RankedTensorType::verifyConstructionInvariants(
-    Optional<Location> loc, MLIRContext *context, ArrayRef<int64_t> shape,
-    Type elementType) {
-  for (int64_t s : shape) {
-    if (s < -1)
-      return emitOptionalError(loc, "invalid tensor dimension size");
-  }
-  return checkTensorElementType(loc, context, elementType);
-}
-
-ArrayRef<int64_t> RankedTensorType::getShape() const {
-  return getImpl()->getShape();
-}
-
-//===----------------------------------------------------------------------===//
-// UnrankedTensorType
-//===----------------------------------------------------------------------===//
-
-UnrankedTensorType UnrankedTensorType::get(Type elementType) {
-  return Base::get(elementType.getContext(), StandardTypes::UnrankedTensor,
-                   elementType);
-}
-
-UnrankedTensorType UnrankedTensorType::getChecked(Type elementType,
-                                                  Location location) {
-  return Base::getChecked(location, elementType.getContext(),
-                          StandardTypes::UnrankedTensor, elementType);
-}
-
-LogicalResult UnrankedTensorType::verifyConstructionInvariants(
-    Optional<Location> loc, MLIRContext *context, Type elementType) {
-  return checkTensorElementType(loc, context, elementType);
-}
-
-//===----------------------------------------------------------------------===//
-// MemRefType
-//===----------------------------------------------------------------------===//
-
-/// Get or create a new MemRefType based on shape, element type, affine
-/// map composition, and memory space.  Assumes the arguments define a
-/// well-formed MemRef type.  Use getChecked to gracefully handle MemRefType
-/// construction failures.
-MemRefType MemRefType::get(ArrayRef<int64_t> shape, Type elementType,
-                           ArrayRef<AffineMap> affineMapComposition,
-                           unsigned memorySpace) {
-  auto result = getImpl(shape, elementType, affineMapComposition, memorySpace,
-                        /*location=*/llvm::None);
-  assert(result && "Failed to construct instance of MemRefType.");
-  return result;
-}
-
-/// Get or create a new MemRefType based on shape, element type, affine
-/// map composition, and memory space declared at the given location.
-/// If the location is unknown, the last argument should be an instance of
-/// UnknownLoc.  If the MemRefType defined by the arguments would be
-/// ill-formed, emits errors (to the handler registered with the context or to
-/// the error stream) and returns nullptr.
-MemRefType MemRefType::getChecked(ArrayRef<int64_t> shape, Type elementType,
-                                  ArrayRef<AffineMap> affineMapComposition,
-                                  unsigned memorySpace, Location location) {
-  return getImpl(shape, elementType, affineMapComposition, memorySpace,
-                 location);
-}
-
-/// Get or create a new MemRefType defined by the arguments.  If the resulting
-/// type would be ill-formed, return nullptr.  If the location is provided,
-/// emit detailed error messages.  To emit errors when the location is unknown,
-/// pass in an instance of UnknownLoc.
-MemRefType MemRefType::getImpl(ArrayRef<int64_t> shape, Type elementType,
-                               ArrayRef<AffineMap> affineMapComposition,
-                               unsigned memorySpace,
-                               Optional<Location> location) {
-  auto *context = elementType.getContext();
-
-  // Check that memref is formed from allowed types.
-  if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
-    return emitOptionalError(location, "invalid memref element type"),
-           MemRefType();
-
-  for (int64_t s : shape) {
-    // Negative sizes are not allowed except for `-1` that means dynamic size.
-    if (s < -1)
-      return emitOptionalError(location, "invalid memref size"), MemRefType();
-  }
-
-  // Check that the structure of the composition is valid, i.e. that each
-  // subsequent affine map has as many inputs as the previous map has results.
-  // Take the dimensionality of the MemRef for the first map.
-  auto dim = shape.size();
-  unsigned i = 0;
-  for (const auto &affineMap : affineMapComposition) {
-    if (affineMap.getNumDims() != dim) {
-      if (location)
-        emitError(*location)
-            << "memref affine map dimension mismatch between "
-            << (i == 0 ? Twine("memref rank") : "affine map " + Twine(i))
-            << " and affine map" << i + 1 << ": " << dim
-            << " != " << affineMap.getNumDims();
-      return nullptr;
-    }
-
-    dim = affineMap.getNumResults();
-    ++i;
-  }
-
-  // Drop identity maps from the composition.
-  // This may lead to the composition becoming empty, which is interpreted as an
-  // implicit identity.
-  llvm::SmallVector<AffineMap, 2> cleanedAffineMapComposition;
-  for (const auto &map : affineMapComposition) {
-    if (map.isIdentity())
-      continue;
-    cleanedAffineMapComposition.push_back(map);
-  }
-
-  return Base::get(context, StandardTypes::MemRef, shape, elementType,
-                   cleanedAffineMapComposition, memorySpace);
-}
-
-ArrayRef<int64_t> MemRefType::getShape() const { return getImpl()->getShape(); }
-
-ArrayRef<AffineMap> MemRefType::getAffineMaps() const {
-  return getImpl()->getAffineMaps();
-}
-
-unsigned MemRefType::getMemorySpace() const { return getImpl()->memorySpace; }
-
-//===----------------------------------------------------------------------===//
-// UnrankedMemRefType
-//===----------------------------------------------------------------------===//
-
-UnrankedMemRefType UnrankedMemRefType::get(Type elementType,
-                                           unsigned memorySpace) {
-  return Base::get(elementType.getContext(), StandardTypes::UnrankedMemRef,
-                   elementType, memorySpace);
-}
-
-UnrankedMemRefType UnrankedMemRefType::getChecked(Type elementType,
-                                                  unsigned memorySpace,
-                                                  Location location) {
-  return Base::getChecked(location, elementType.getContext(),
-                          StandardTypes::UnrankedMemRef, elementType,
-                          memorySpace);
-}
-
-unsigned UnrankedMemRefType::getMemorySpace() const {
-  return getImpl()->memorySpace;
-}
-
-LogicalResult UnrankedMemRefType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, Type elementType,
-    unsigned memorySpace) {
-  // Check that memref is formed from allowed types.
-  if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
-    return emitOptionalError(*loc, "invalid memref element type");
-  return success();
-}
-
-/// Given MemRef `sizes` that are either static or dynamic, returns the
-/// canonical "contiguous" strides AffineExpr. Strides are multiplicative and
-/// once a dynamic dimension is encountered, all canonical strides become
-/// dynamic and need to be encoded with a different symbol.
-/// For canonical strides expressions, the offset is always 0 and and fastest
-/// varying stride is always `1`.
-///
-/// Examples:
-///   - memref<3x4x5xf32> has canonical stride expression `20*d0 + 5*d1 + d2`.
-///   - memref<3x?x5xf32> has canonical stride expression `s0*d0 + 5*d1 + d2`.
-///   - memref<3x4x?xf32> has canonical stride expression `s1*d0 + s0*d1 + d2`.
-static AffineExpr makeCanonicalStridedLayoutExpr(ArrayRef<int64_t> sizes,
-                                                 MLIRContext *context) {
-  AffineExpr expr;
-  bool dynamicPoisonBit = false;
-  unsigned nSymbols = 0;
-  int64_t runningSize = 1;
-  unsigned rank = sizes.size();
-  for (auto en : llvm::enumerate(llvm::reverse(sizes))) {
-    auto size = en.value();
-    auto position = rank - 1 - en.index();
-    // Degenerate case, no size =-> no stride
-    if (size == 0)
-      continue;
-    auto d = getAffineDimExpr(position, context);
-    // Static case: stride = runningSize and runningSize *= size.
-    if (!dynamicPoisonBit) {
-      auto cst = getAffineConstantExpr(runningSize, context);
-      expr = expr ? expr + cst * d : cst * d;
-      if (size > 0)
-        runningSize *= size;
-      else
-        // From now on bail into dynamic mode.
-        dynamicPoisonBit = true;
-      continue;
-    }
-    // Dynamic case, new symbol for each new stride.
-    auto sym = getAffineSymbolExpr(nSymbols++, context);
-    expr = expr ? expr + d * sym : d * sym;
-  }
-  return expr;
-}
-
-// Factored out common logic to update `strides` and `seen` for `dim` with value
-// `val`. This handles both saturated and unsaturated cases.
-static void accumulateStrides(MutableArrayRef<int64_t> strides,
-                              MutableArrayRef<bool> seen, unsigned pos,
-                              int64_t val) {
-  if (!seen[pos]) {
-    // Newly seen case, sets value
-    strides[pos] = val;
-    seen[pos] = true;
-    return;
-  }
-  if (strides[pos] != MemRefType::getDynamicStrideOrOffset())
-    // Already seen case accumulates unless they are already saturated.
-    strides[pos] += val;
-}
-
-// This sums multiple offsets as they are seen. In the particular case of
-// accumulating a dynamic offset with either a static of dynamic one, this
-// saturates to MemRefType::getDynamicStrideOrOffset().
-static void accumulateOffset(int64_t &offset, bool &seenOffset, int64_t val) {
-  if (!seenOffset) {
-    // Newly seen case, sets value
-    offset = val;
-    seenOffset = true;
-    return;
-  }
-  if (offset != MemRefType::getDynamicStrideOrOffset())
-    // Already seen case accumulates unless they are already saturated.
-    offset += val;
-}
-
-/// Takes a single AffineExpr `e` and populates the `strides` and `seen` arrays
-/// with the strides values for each dim position and whether a value exists at
-/// that position, respectively.
-/// The convention is that the strides for dimensions d0, .. dn appear in
-/// order to make indexing intuitive into the result.
-static void extractStrides(AffineExpr e, MutableArrayRef<int64_t> strides,
-                           int64_t &offset, MutableArrayRef<bool> seen,
-                           bool &seenOffset, bool &failed) {
-  auto bin = e.dyn_cast<AffineBinaryOpExpr>();
-  if (!bin)
-    return;
-
-  if (bin.getKind() == AffineExprKind::CeilDiv ||
-      bin.getKind() == AffineExprKind::FloorDiv ||
-      bin.getKind() == AffineExprKind::Mod) {
-    failed = true;
-    return;
-  }
-  if (bin.getKind() == AffineExprKind::Mul) {
-    // LHS may be more complex than just a single dim (e.g. multiple syms and
-    // dims). Bail out for now and revisit when we have evidence this is needed.
-    auto dim = bin.getLHS().dyn_cast<AffineDimExpr>();
-    if (!dim) {
-      failed = true;
-      return;
-    }
-    auto cst = bin.getRHS().dyn_cast<AffineConstantExpr>();
-    if (!cst) {
-      strides[dim.getPosition()] = MemRefType::getDynamicStrideOrOffset();
-      seen[dim.getPosition()] = true;
-    } else {
-      accumulateStrides(strides, seen, dim.getPosition(), cst.getValue());
-    }
-    return;
-  }
-  if (bin.getKind() == AffineExprKind::Add) {
-    for (auto e : {bin.getLHS(), bin.getRHS()}) {
-      if (auto cst = e.dyn_cast<AffineConstantExpr>()) {
-        // Independent constants cumulate.
-        accumulateOffset(offset, seenOffset, cst.getValue());
-      } else if (auto sym = e.dyn_cast<AffineSymbolExpr>()) {
-        // Independent symbols saturate.
-        offset = MemRefType::getDynamicStrideOrOffset();
-        seenOffset = true;
-      } else if (auto dim = e.dyn_cast<AffineDimExpr>()) {
-        // Independent symbols cumulate 1.
-        accumulateStrides(strides, seen, dim.getPosition(), 1);
-      }
-      // Sum of binary ops dispatch to the respective exprs.
-    }
-    return;
-  }
-  llvm_unreachable("unexpected binary operation");
-}
-
-// Fallback cases for terminal dim/sym/cst that are not part of a binary op (
-// i.e. single term).
-static void extractStridesFromTerm(AffineExpr e,
-                                   MutableArrayRef<int64_t> strides,
-                                   int64_t &offset, MutableArrayRef<bool> seen,
-                                   bool &seenOffset) {
-  if (auto cst = e.dyn_cast<AffineConstantExpr>()) {
-    assert(!seenOffset && "unexpected `seen` bit with single term");
-    offset = cst.getValue();
-    seenOffset = true;
-    return;
-  }
-  if (auto sym = e.dyn_cast<AffineSymbolExpr>()) {
-    assert(!seenOffset && "unexpected `seen` bit with single term");
-    offset = MemRefType::getDynamicStrideOrOffset();
-    seenOffset = true;
-    return;
-  }
-  if (auto dim = e.dyn_cast<AffineDimExpr>()) {
-    assert(!seen[dim.getPosition()] &&
-           "unexpected `seen` bit with single term");
-    strides[dim.getPosition()] = 1;
-    seen[dim.getPosition()] = true;
-    return;
-  }
-  llvm_unreachable("unexpected binary operation");
-}
-
-LogicalResult mlir::getStridesAndOffset(MemRefType t,
-                                        SmallVectorImpl<int64_t> &strides,
-                                        int64_t &offset) {
-  auto affineMaps = t.getAffineMaps();
-  // For now strides are only computed on a single affine map with a single
-  // result (i.e. the closed subset of linearization maps that are compatible
-  // with striding semantics).
-  // TODO(ntv): support more forms on a per-need basis.
-  if (affineMaps.size() > 1)
-    return failure();
-  AffineExpr stridedExpr;
-  if (affineMaps.empty() || affineMaps[0].isIdentity()) {
-    if (t.getRank() == 0) {
-      // Handle 0-D corner case.
-      offset = 0;
-      return success();
-    }
-    stridedExpr = makeCanonicalStridedLayoutExpr(t.getShape(), t.getContext());
-  } else if (affineMaps[0].getNumResults() == 1) {
-    stridedExpr = affineMaps[0].getResult(0);
-  }
-  if (!stridedExpr)
-    return failure();
-
-  bool failed = false;
-  strides = SmallVector<int64_t, 4>(t.getRank(), 0);
-  bool seenOffset = false;
-  SmallVector<bool, 4> seen(t.getRank(), false);
-  if (stridedExpr.isa<AffineBinaryOpExpr>()) {
-    stridedExpr.walk([&](AffineExpr e) {
-      if (!failed)
-        extractStrides(e, strides, offset, seen, seenOffset, failed);
-    });
-  } else {
-    extractStridesFromTerm(stridedExpr, strides, offset, seen, seenOffset);
-  }
-
-  // Constant offset may not be present in `stridedExpr` which means it is
-  // implicitly 0.
-  if (!seenOffset)
-    offset = 0;
-
-  if (failed || !llvm::all_of(seen, [](bool b) { return b; })) {
-    strides.clear();
-    return failure();
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-/// ComplexType
-//===----------------------------------------------------------------------===//
-
-ComplexType ComplexType::get(Type elementType) {
-  return Base::get(elementType.getContext(), StandardTypes::Complex,
-                   elementType);
-}
-
-ComplexType ComplexType::getChecked(Type elementType, Location location) {
-  return Base::getChecked(location, elementType.getContext(),
-                          StandardTypes::Complex, elementType);
-}
-
-/// Verify the construction of an integer type.
-LogicalResult ComplexType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, Type elementType) {
-  if (!elementType.isa<FloatType>() && !elementType.isa<IntegerType>())
-    return emitOptionalError(loc, "invalid element type for complex");
-  return success();
-}
-
-Type ComplexType::getElementType() { return getImpl()->elementType; }
-
-//===----------------------------------------------------------------------===//
-/// TupleType
-//===----------------------------------------------------------------------===//
-
-/// Get or create a new TupleType with the provided element types. Assumes the
-/// arguments define a well-formed type.
-TupleType TupleType::get(ArrayRef<Type> elementTypes, MLIRContext *context) {
-  return Base::get(context, StandardTypes::Tuple, elementTypes);
-}
-
-/// Return the elements types for this tuple.
-ArrayRef<Type> TupleType::getTypes() const { return getImpl()->getTypes(); }
-
-/// Accumulate the types contained in this tuple and tuples nested within it.
-/// Note that this only flattens nested tuples, not any other container type,
-/// e.g. a tuple<i32, tensor<i32>, tuple<f32, tuple<i64>>> is flattened to
-/// (i32, tensor<i32>, f32, i64)
-void TupleType::getFlattenedTypes(SmallVectorImpl<Type> &types) {
-  for (Type type : getTypes()) {
-    if (auto nestedTuple = type.dyn_cast<TupleType>())
-      nestedTuple.getFlattenedTypes(types);
-    else
-      types.push_back(type);
-  }
-}
-
-/// Return the number of element types.
-size_t TupleType::size() const { return getImpl()->size(); }
-
-AffineMap mlir::makeStridedLinearLayoutMap(ArrayRef<int64_t> strides,
-                                           int64_t offset,
-                                           MLIRContext *context) {
-  AffineExpr expr;
-  unsigned nSymbols = 0;
-
-  // AffineExpr for offset.
-  // Static case.
-  if (offset != MemRefType::getDynamicStrideOrOffset()) {
-    auto cst = getAffineConstantExpr(offset, context);
-    expr = cst;
-  } else {
-    // Dynamic case, new symbol for the offset.
-    auto sym = getAffineSymbolExpr(nSymbols++, context);
-    expr = sym;
-  }
-
-  // AffineExpr for strides.
-  for (auto en : llvm::enumerate(strides)) {
-    auto dim = en.index();
-    auto stride = en.value();
-    assert(stride != 0 && "Invalid stride specification");
-    auto d = getAffineDimExpr(dim, context);
-    AffineExpr mult;
-    // Static case.
-    if (stride != MemRefType::getDynamicStrideOrOffset())
-      mult = getAffineConstantExpr(stride, context);
-    else
-      // Dynamic case, new symbol for each new stride.
-      mult = getAffineSymbolExpr(nSymbols++, context);
-    expr = expr + d * mult;
-  }
-
-  return AffineMap::get(strides.size(), nSymbols, expr);
-}
-
-bool mlir::isStrided(MemRefType t) {
-  int64_t offset;
-  SmallVector<int64_t, 4> stridesAndOffset;
-  auto res = getStridesAndOffset(t, stridesAndOffset, offset);
-  return succeeded(res);
-}
diff --git a/third_party/mlir/lib/IR/SymbolTable.cpp b/third_party/mlir/lib/IR/SymbolTable.cpp
deleted file mode 100644
index bd8cb59cea7..00000000000
--- a/third_party/mlir/lib/IR/SymbolTable.cpp
+++ /dev/null
@@ -1,468 +0,0 @@
-//===- SymbolTable.cpp - MLIR Symbol Table Class --------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/SymbolTable.h"
-#include "llvm/ADT/SmallString.h"
-
-using namespace mlir;
-
-/// Return true if the given operation is unknown and may potentially define a
-/// symbol table.
-static bool isPotentiallyUnknownSymbolTable(Operation *op) {
-  return !op->getDialect() && op->getNumRegions() == 1;
-}
-
-//===----------------------------------------------------------------------===//
-// SymbolTable
-//===----------------------------------------------------------------------===//
-
-/// Build a symbol table with the symbols within the given operation.
-SymbolTable::SymbolTable(Operation *symbolTableOp)
-    : symbolTableOp(symbolTableOp) {
-  assert(symbolTableOp->hasTrait<OpTrait::SymbolTable>() &&
-         "expected operation to have SymbolTable trait");
-  assert(symbolTableOp->getNumRegions() == 1 &&
-         "expected operation to have a single region");
-  assert(has_single_element(symbolTableOp->getRegion(0)) &&
-         "expected operation to have a single block");
-
-  for (auto &op : symbolTableOp->getRegion(0).front()) {
-    auto nameAttr = op.getAttrOfType<StringAttr>(getSymbolAttrName());
-    if (!nameAttr)
-      continue;
-
-    auto inserted = symbolTable.insert({nameAttr.getValue(), &op});
-    (void)inserted;
-    assert(inserted.second &&
-           "expected region to contain uniquely named symbol operations");
-  }
-}
-
-/// Look up a symbol with the specified name, returning null if no such name
-/// exists. Names never include the @ on them.
-Operation *SymbolTable::lookup(StringRef name) const {
-  return symbolTable.lookup(name);
-}
-
-/// Erase the given symbol from the table.
-void SymbolTable::erase(Operation *symbol) {
-  auto nameAttr = symbol->getAttrOfType<StringAttr>(getSymbolAttrName());
-  assert(nameAttr && "expected valid 'name' attribute");
-  assert(symbol->getParentOp() == symbolTableOp &&
-         "expected this operation to be inside of the operation with this "
-         "SymbolTable");
-
-  auto it = symbolTable.find(nameAttr.getValue());
-  if (it != symbolTable.end() && it->second == symbol) {
-    symbolTable.erase(it);
-    symbol->erase();
-  }
-}
-
-/// Insert a new symbol into the table and associated operation, and rename it
-/// as necessary to avoid collisions.
-void SymbolTable::insert(Operation *symbol, Block::iterator insertPt) {
-  auto nameAttr = symbol->getAttrOfType<StringAttr>(getSymbolAttrName());
-  assert(nameAttr && "expected valid 'name' attribute");
-
-  auto &body = symbolTableOp->getRegion(0).front();
-  if (insertPt == Block::iterator() || insertPt == body.end())
-    insertPt = Block::iterator(body.getTerminator());
-
-  assert(insertPt->getParentOp() == symbolTableOp &&
-         "expected insertPt to be in the associated module operation");
-
-  body.getOperations().insert(insertPt, symbol);
-
-  // Add this symbol to the symbol table, uniquing the name if a conflict is
-  // detected.
-  if (symbolTable.insert({nameAttr.getValue(), symbol}).second)
-    return;
-
-  // If a conflict was detected, then the symbol will not have been added to
-  // the symbol table. Try suffixes until we get to a unique name that works.
-  SmallString<128> nameBuffer(nameAttr.getValue());
-  unsigned originalLength = nameBuffer.size();
-
-  // Iteratively try suffixes until we find one that isn't used.
-  do {
-    nameBuffer.resize(originalLength);
-    nameBuffer += '_';
-    nameBuffer += std::to_string(uniquingCounter++);
-  } while (!symbolTable.insert({nameBuffer, symbol}).second);
-  symbol->setAttr(getSymbolAttrName(),
-                  StringAttr::get(nameBuffer, symbolTableOp->getContext()));
-}
-
-/// Returns the operation registered with the given symbol name with the
-/// regions of 'symbolTableOp'. 'symbolTableOp' is required to be an operation
-/// with the 'OpTrait::SymbolTable' trait. Returns nullptr if no valid symbol
-/// was found.
-Operation *SymbolTable::lookupSymbolIn(Operation *symbolTableOp,
-                                       StringRef symbol) {
-  assert(symbolTableOp->hasTrait<OpTrait::SymbolTable>());
-
-  // Look for a symbol with the given name.
-  for (auto &block : symbolTableOp->getRegion(0)) {
-    for (auto &op : block) {
-      auto nameAttr = op.template getAttrOfType<StringAttr>(
-          mlir::SymbolTable::getSymbolAttrName());
-      if (nameAttr && nameAttr.getValue() == symbol)
-        return &op;
-    }
-  }
-  return nullptr;
-}
-
-/// Returns the operation registered with the given symbol name within the
-/// closes parent operation with the 'OpTrait::SymbolTable' trait. Returns
-/// nullptr if no valid symbol was found.
-Operation *SymbolTable::lookupNearestSymbolFrom(Operation *from,
-                                                StringRef symbol) {
-  assert(from && "expected valid operation");
-  while (!from->hasTrait<OpTrait::SymbolTable>()) {
-    from = from->getParentOp();
-
-    // Check that this is a valid op and isn't an unknown symbol table.
-    if (!from || isPotentiallyUnknownSymbolTable(from))
-      return nullptr;
-  }
-  return lookupSymbolIn(from, symbol);
-}
-
-//===----------------------------------------------------------------------===//
-// SymbolTable Trait Types
-//===----------------------------------------------------------------------===//
-
-LogicalResult OpTrait::impl::verifySymbolTable(Operation *op) {
-  if (op->getNumRegions() != 1)
-    return op->emitOpError()
-           << "Operations with a 'SymbolTable' must have exactly one region";
-  if (!has_single_element(op->getRegion(0)))
-    return op->emitOpError()
-           << "Operations with a 'SymbolTable' must have exactly one block";
-
-  // Check that all symbols are uniquely named within child regions.
-  llvm::StringMap<Location> nameToOrigLoc;
-  for (auto &block : op->getRegion(0)) {
-    for (auto &op : block) {
-      // Check for a symbol name attribute.
-      auto nameAttr =
-          op.getAttrOfType<StringAttr>(mlir::SymbolTable::getSymbolAttrName());
-      if (!nameAttr)
-        continue;
-
-      // Try to insert this symbol into the table.
-      auto it = nameToOrigLoc.try_emplace(nameAttr.getValue(), op.getLoc());
-      if (!it.second)
-        return op.emitError()
-            .append("redefinition of symbol named '", nameAttr.getValue(), "'")
-            .attachNote(it.first->second)
-            .append("see existing symbol definition here");
-    }
-  }
-  return success();
-}
-
-LogicalResult OpTrait::impl::verifySymbol(Operation *op) {
-  if (!op->getAttrOfType<StringAttr>(mlir::SymbolTable::getSymbolAttrName()))
-    return op->emitOpError() << "requires string attribute '"
-                             << mlir::SymbolTable::getSymbolAttrName() << "'";
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Symbol Use Lists
-//===----------------------------------------------------------------------===//
-
-/// Walk all of the symbol references within the given operation, invoking the
-/// provided callback for each found use. The callbacks takes as arguments: the
-/// use of the symbol, and the nested access chain to the attribute within the
-/// operation dictionary. An access chain is a set of indices into nested
-/// container attributes. For example, a symbol use in an attribute dictionary
-/// that looks like the following:
-///
-///    {use = [{other_attr, @symbol}]}
-///
-/// May have the following access chain:
-///
-///     [0, 0, 1]
-///
-static WalkResult walkSymbolRefs(
-    Operation *op,
-    function_ref<WalkResult(SymbolTable::SymbolUse, ArrayRef<int>)> callback) {
-  // Check to see if the operation has any attributes.
-  DictionaryAttr attrDict = op->getAttrList().getDictionary();
-  if (!attrDict)
-    return WalkResult::advance();
-
-  // A worklist of a container attribute and the current index into the held
-  // attribute list.
-  SmallVector<Attribute, 1> attrWorklist(1, attrDict);
-  SmallVector<int, 1> curAccessChain(1, /*Value=*/-1);
-
-  // Process the symbol references within the given nested attribute range.
-  auto processAttrs = [&](int &index, auto attrRange) -> WalkResult {
-    for (Attribute attr : llvm::drop_begin(attrRange, index)) {
-      /// Check for a nested container attribute, these will also need to be
-      /// walked.
-      if (attr.isa<ArrayAttr>() || attr.isa<DictionaryAttr>()) {
-        attrWorklist.push_back(attr);
-        curAccessChain.push_back(-1);
-        return WalkResult::advance();
-      }
-
-      // Invoke the provided callback if we find a symbol use and check for a
-      // requested interrupt.
-      if (auto symbolRef = attr.dyn_cast<SymbolRefAttr>())
-        if (callback({op, symbolRef}, curAccessChain).wasInterrupted())
-          return WalkResult::interrupt();
-
-      // Make sure to keep the index counter in sync.
-      ++index;
-    }
-
-    // Pop this container attribute from the worklist.
-    attrWorklist.pop_back();
-    curAccessChain.pop_back();
-    return WalkResult::advance();
-  };
-
-  WalkResult result = WalkResult::advance();
-  do {
-    Attribute attr = attrWorklist.back();
-    int &index = curAccessChain.back();
-    ++index;
-
-    // Process the given attribute, which is guaranteed to be a container.
-    if (auto dict = attr.dyn_cast<DictionaryAttr>())
-      result = processAttrs(index, make_second_range(dict.getValue()));
-    else
-      result = processAttrs(index, attr.cast<ArrayAttr>().getValue());
-  } while (!attrWorklist.empty() && !result.wasInterrupted());
-  return result;
-}
-
-/// Walk all of the uses, for any symbol, that are nested within the given
-/// operation 'from', invoking the provided callback for each. This does not
-/// traverse into any nested symbol tables, and will also only return uses on
-/// 'from' if it does not also define a symbol table.
-static Optional<WalkResult> walkSymbolUses(
-    Operation *from,
-    function_ref<WalkResult(SymbolTable::SymbolUse, ArrayRef<int>)> callback) {
-  // If from is not a symbol table, check for uses. A symbol table defines a new
-  // scope, so we can't walk the attributes from the symbol table op.
-  if (!from->hasTrait<OpTrait::SymbolTable>()) {
-    if (walkSymbolRefs(from, callback).wasInterrupted())
-      return WalkResult::interrupt();
-  }
-
-  SmallVector<Region *, 1> worklist;
-  worklist.reserve(from->getNumRegions());
-  for (Region &region : from->getRegions())
-    worklist.push_back(&region);
-
-  while (!worklist.empty()) {
-    Region *region = worklist.pop_back_val();
-    for (Block &block : *region) {
-      for (Operation &op : block) {
-        if (walkSymbolRefs(&op, callback).wasInterrupted())
-          return WalkResult::interrupt();
-
-        // If this operation has regions, and it as well as its dialect aren't
-        // registered then conservatively fail. The operation may define a
-        // symbol table, so we can't opaquely know if we should traverse to find
-        // nested uses.
-        if (isPotentiallyUnknownSymbolTable(&op))
-          return llvm::None;
-
-        // If this op defines a new symbol table scope, we can't traverse. Any
-        // symbol references nested within 'op' are different semantically.
-        if (!op.hasTrait<OpTrait::SymbolTable>()) {
-          for (Region &region : op.getRegions())
-            worklist.push_back(&region);
-        }
-      }
-    }
-  }
-  return WalkResult::advance();
-}
-
-/// Get an iterator range for all of the uses, for any symbol, that are nested
-/// within the given operation 'from'. This does not traverse into any nested
-/// symbol tables, and will also only return uses on 'from' if it does not
-/// also define a symbol table. This is because we treat the region as the
-/// boundary of the symbol table, and not the op itself. This function returns
-/// None if there are any unknown operations that may potentially be symbol
-/// tables.
-auto SymbolTable::getSymbolUses(Operation *from) -> Optional<UseRange> {
-  std::vector<SymbolUse> uses;
-  Optional<WalkResult> result =
-      walkSymbolUses(from, [&](SymbolUse symbolUse, ArrayRef<int>) {
-        uses.push_back(symbolUse);
-        return WalkResult::advance();
-      });
-  return result ? Optional<UseRange>(std::move(uses)) : Optional<UseRange>();
-}
-
-/// Get all of the uses of the given symbol that are nested within the given
-/// operation 'from', invoking the provided callback for each. This does not
-/// traverse into any nested symbol tables, and will also only return uses on
-/// 'from' if it does not also define a symbol table. This is because we treat
-/// the region as the boundary of the symbol table, and not the op itself. This
-/// function returns None if there are any unknown operations that may
-/// potentially be symbol tables.
-auto SymbolTable::getSymbolUses(StringRef symbol, Operation *from)
-    -> Optional<UseRange> {
-  SymbolRefAttr symbolRefAttr = SymbolRefAttr::get(symbol, from->getContext());
-
-  std::vector<SymbolUse> uses;
-  Optional<WalkResult> result =
-      walkSymbolUses(from, [&](SymbolUse symbolUse, ArrayRef<int>) {
-        if (symbolRefAttr == symbolUse.getSymbolRef())
-          uses.push_back(symbolUse);
-        return WalkResult::advance();
-      });
-  return result ? Optional<UseRange>(std::move(uses)) : Optional<UseRange>();
-}
-
-/// Return if the given symbol is known to have no uses that are nested within
-/// the given operation 'from'. This does not traverse into any nested symbol
-/// tables, and will also only count uses on 'from' if it does not also define
-/// a symbol table. This is because we treat the region as the boundary of the
-/// symbol table, and not the op itself. This function will also return false if
-/// there are any unknown operations that may potentially be symbol tables.
-bool SymbolTable::symbolKnownUseEmpty(StringRef symbol, Operation *from) {
-  SymbolRefAttr symbolRefAttr = SymbolRefAttr::get(symbol, from->getContext());
-
-  // Walk all of the symbol uses looking for a reference to 'symbol'.
-  Optional<WalkResult> walkResult =
-      walkSymbolUses(from, [&](SymbolUse symbolUse, ArrayRef<int>) {
-        return symbolUse.getSymbolRef() == symbolRefAttr
-                   ? WalkResult::interrupt()
-                   : WalkResult::advance();
-      });
-  return walkResult && !walkResult->wasInterrupted();
-}
-
-/// Rebuild the given attribute container after replacing all references to a
-/// symbol with `newSymAttr`.
-static Attribute rebuildAttrAfterRAUW(Attribute container,
-                                      ArrayRef<SmallVector<int, 1>> accesses,
-                                      SymbolRefAttr newSymAttr,
-                                      unsigned depth) {
-  // Given a range of Attributes, update the ones referred to by the given
-  // access chains to point to the new symbol attribute.
-  auto updateAttrs = [&](auto &&attrRange) {
-    auto attrBegin = std::begin(attrRange);
-    for (unsigned i = 0, e = accesses.size(); i != e;) {
-      ArrayRef<int> access = accesses[i];
-      Attribute &attr = *std::next(attrBegin, access[depth]);
-
-      // Check to see if this is a leaf access, i.e. a SymbolRef.
-      if (access.size() == depth + 1) {
-        attr = newSymAttr;
-        ++i;
-        continue;
-      }
-
-      // Otherwise, this is a container. Collect all of the accesses for this
-      // index and recurse. The recursion here is bounded by the size of the
-      // largest access array.
-      auto nestedAccesses =
-          accesses.drop_front(i).take_while([&](ArrayRef<int> nextAccess) {
-            return nextAccess.size() > depth + 1 &&
-                   nextAccess[depth] == access[depth];
-          });
-      attr = rebuildAttrAfterRAUW(attr, nestedAccesses, newSymAttr, depth + 1);
-
-      // Skip over all of the accesses that refer to the nested container.
-      i += nestedAccesses.size();
-    }
-  };
-
-  if (auto dictAttr = container.dyn_cast<DictionaryAttr>()) {
-    auto newAttrs = llvm::to_vector<4>(dictAttr.getValue());
-    updateAttrs(make_second_range(newAttrs));
-    return DictionaryAttr::get(newAttrs, dictAttr.getContext());
-  }
-  auto newAttrs = llvm::to_vector<4>(container.cast<ArrayAttr>().getValue());
-  updateAttrs(newAttrs);
-  return ArrayAttr::get(newAttrs, container.getContext());
-}
-
-/// Attempt to replace all uses of the given symbol 'oldSymbol' with the
-/// provided symbol 'newSymbol' that are nested within the given operation
-/// 'from'. This does not traverse into any nested symbol tables, and will
-/// also only replace uses on 'from' if it does not also define a symbol
-/// table. This is because we treat the region as the boundary of the symbol
-/// table, and not the op itself. If there are any unknown operations that may
-/// potentially be symbol tables, no uses are replaced and failure is returned.
-LogicalResult SymbolTable::replaceAllSymbolUses(StringRef oldSymbol,
-                                                StringRef newSymbol,
-                                                Operation *from) {
-  SymbolRefAttr oldAttr = SymbolRefAttr::get(oldSymbol, from->getContext());
-  SymbolRefAttr newSymAttr = SymbolRefAttr::get(newSymbol, from->getContext());
-
-  // A collection of operations along with their new attribute dictionary.
-  std::vector<std::pair<Operation *, DictionaryAttr>> updatedAttrDicts;
-
-  // The current operation, and its old symbol access chains, being processed.
-  Operation *curOp = nullptr;
-  SmallVector<SmallVector<int, 1>, 1> accessChains;
-
-  // Generate a new attribute dictionary for the current operation by replacing
-  // references to the old symbol.
-  auto generateNewAttrDict = [&] {
-    auto newAttrDict =
-        rebuildAttrAfterRAUW(curOp->getAttrList().getDictionary(), accessChains,
-                             newSymAttr, /*depth=*/0);
-    return newAttrDict.cast<DictionaryAttr>();
-  };
-
-  // Walk the symbol uses collecting uses of the old symbol.
-  auto walkFn = [&](SymbolTable::SymbolUse symbolUse,
-                    ArrayRef<int> accessChain) {
-    if (symbolUse.getSymbolRef() != oldAttr)
-      return WalkResult::advance();
-
-    // If there was a previous operation, generate a new attribute dict for it.
-    // This means that we've finished processing the current operation, so
-    // generate a new dictionary for it.
-    if (curOp && symbolUse.getUser() != curOp) {
-      updatedAttrDicts.push_back({curOp, generateNewAttrDict()});
-      accessChains.clear();
-    }
-
-    // Record this access.
-    curOp = symbolUse.getUser();
-    accessChains.push_back(llvm::to_vector<1>(accessChain));
-    return WalkResult::advance();
-  };
-  if (!walkSymbolUses(from, walkFn))
-    return failure();
-
-  // Update the attribute dictionaries as necessary.
-  for (auto &it : updatedAttrDicts)
-    it.first->setAttrs(it.second);
-
-  // Check to see if we have a dangling op that needs to be processed.
-  if (curOp)
-    curOp->setAttrs(generateNewAttrDict());
-
-  return success();
-}
diff --git a/third_party/mlir/lib/IR/TypeDetail.h b/third_party/mlir/lib/IR/TypeDetail.h
deleted file mode 100644
index 5bcb0b61aa5..00000000000
--- a/third_party/mlir/lib/IR/TypeDetail.h
+++ /dev/null
@@ -1,333 +0,0 @@
-//===- TypeDetail.h - MLIR Type storage details -----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This holds implementation details of Type.
-//
-//===----------------------------------------------------------------------===//
-#ifndef TYPEDETAIL_H_
-#define TYPEDETAIL_H_
-
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/TypeSupport.h"
-#include "mlir/IR/Types.h"
-#include "llvm/Support/TrailingObjects.h"
-
-namespace mlir {
-
-class MLIRContext;
-
-namespace detail {
-
-/// Opaque Type Storage and Uniquing.
-struct OpaqueTypeStorage : public TypeStorage {
-  OpaqueTypeStorage(Identifier dialectNamespace, StringRef typeData)
-      : dialectNamespace(dialectNamespace), typeData(typeData) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::pair<Identifier, StringRef>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(dialectNamespace, typeData);
-  }
-
-  static OpaqueTypeStorage *construct(TypeStorageAllocator &allocator,
-                                      const KeyTy &key) {
-    StringRef tyData = allocator.copyInto(key.second);
-    return new (allocator.allocate<OpaqueTypeStorage>())
-        OpaqueTypeStorage(key.first, tyData);
-  }
-
-  // The dialect namespace.
-  Identifier dialectNamespace;
-
-  // The parser type data for this opaque type.
-  StringRef typeData;
-};
-
-/// Integer Type Storage and Uniquing.
-struct IntegerTypeStorage : public TypeStorage {
-  IntegerTypeStorage(unsigned width) : width(width) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = unsigned;
-  bool operator==(const KeyTy &key) const { return key == width; }
-
-  static IntegerTypeStorage *construct(TypeStorageAllocator &allocator,
-                                       KeyTy bitwidth) {
-    return new (allocator.allocate<IntegerTypeStorage>())
-        IntegerTypeStorage(bitwidth);
-  }
-
-  unsigned width;
-};
-
-/// Function Type Storage and Uniquing.
-struct FunctionTypeStorage : public TypeStorage {
-  FunctionTypeStorage(unsigned numInputs, unsigned numResults,
-                      Type const *inputsAndResults)
-      : TypeStorage(numInputs), numResults(numResults),
-        inputsAndResults(inputsAndResults) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::pair<ArrayRef<Type>, ArrayRef<Type>>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(getInputs(), getResults());
-  }
-
-  /// Construction.
-  static FunctionTypeStorage *construct(TypeStorageAllocator &allocator,
-                                        const KeyTy &key) {
-    ArrayRef<Type> inputs = key.first, results = key.second;
-
-    // Copy the inputs and results into the bump pointer.
-    SmallVector<Type, 16> types;
-    types.reserve(inputs.size() + results.size());
-    types.append(inputs.begin(), inputs.end());
-    types.append(results.begin(), results.end());
-    auto typesList = allocator.copyInto(ArrayRef<Type>(types));
-
-    // Initialize the memory using placement new.
-    return new (allocator.allocate<FunctionTypeStorage>())
-        FunctionTypeStorage(inputs.size(), results.size(), typesList.data());
-  }
-
-  ArrayRef<Type> getInputs() const {
-    return ArrayRef<Type>(inputsAndResults, getSubclassData());
-  }
-  ArrayRef<Type> getResults() const {
-    return ArrayRef<Type>(inputsAndResults + getSubclassData(), numResults);
-  }
-
-  unsigned numResults;
-  Type const *inputsAndResults;
-};
-
-/// Shaped Type Storage.
-struct ShapedTypeStorage : public TypeStorage {
-  ShapedTypeStorage(Type elementTy, unsigned subclassData = 0)
-      : TypeStorage(subclassData), elementType(elementTy) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = Type;
-  bool operator==(const KeyTy &key) const { return key == elementType; }
-
-  Type elementType;
-};
-
-/// Vector Type Storage and Uniquing.
-struct VectorTypeStorage : public ShapedTypeStorage {
-  VectorTypeStorage(unsigned shapeSize, Type elementTy,
-                    const int64_t *shapeElements)
-      : ShapedTypeStorage(elementTy, shapeSize), shapeElements(shapeElements) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::pair<ArrayRef<int64_t>, Type>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(getShape(), elementType);
-  }
-
-  /// Construction.
-  static VectorTypeStorage *construct(TypeStorageAllocator &allocator,
-                                      const KeyTy &key) {
-    // Copy the shape into the bump pointer.
-    ArrayRef<int64_t> shape = allocator.copyInto(key.first);
-
-    // Initialize the memory using placement new.
-    return new (allocator.allocate<VectorTypeStorage>())
-        VectorTypeStorage(shape.size(), key.second, shape.data());
-  }
-
-  ArrayRef<int64_t> getShape() const {
-    return ArrayRef<int64_t>(shapeElements, getSubclassData());
-  }
-
-  const int64_t *shapeElements;
-};
-
-struct RankedTensorTypeStorage : public ShapedTypeStorage {
-  RankedTensorTypeStorage(unsigned shapeSize, Type elementTy,
-                          const int64_t *shapeElements)
-      : ShapedTypeStorage(elementTy, shapeSize), shapeElements(shapeElements) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::pair<ArrayRef<int64_t>, Type>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(getShape(), elementType);
-  }
-
-  /// Construction.
-  static RankedTensorTypeStorage *construct(TypeStorageAllocator &allocator,
-                                            const KeyTy &key) {
-    // Copy the shape into the bump pointer.
-    ArrayRef<int64_t> shape = allocator.copyInto(key.first);
-
-    // Initialize the memory using placement new.
-    return new (allocator.allocate<RankedTensorTypeStorage>())
-        RankedTensorTypeStorage(shape.size(), key.second, shape.data());
-  }
-
-  ArrayRef<int64_t> getShape() const {
-    return ArrayRef<int64_t>(shapeElements, getSubclassData());
-  }
-
-  const int64_t *shapeElements;
-};
-
-struct UnrankedTensorTypeStorage : public ShapedTypeStorage {
-  using ShapedTypeStorage::KeyTy;
-  using ShapedTypeStorage::ShapedTypeStorage;
-
-  /// Construction.
-  static UnrankedTensorTypeStorage *construct(TypeStorageAllocator &allocator,
-                                              Type elementTy) {
-    return new (allocator.allocate<UnrankedTensorTypeStorage>())
-        UnrankedTensorTypeStorage(elementTy);
-  }
-};
-
-struct MemRefTypeStorage : public ShapedTypeStorage {
-  MemRefTypeStorage(unsigned shapeSize, Type elementType,
-                    const int64_t *shapeElements, const unsigned numAffineMaps,
-                    AffineMap const *affineMapList, const unsigned memorySpace)
-      : ShapedTypeStorage(elementType, shapeSize), shapeElements(shapeElements),
-        numAffineMaps(numAffineMaps), affineMapList(affineMapList),
-        memorySpace(memorySpace) {}
-
-  /// The hash key used for uniquing.
-  // MemRefs are uniqued based on their shape, element type, affine map
-  // composition, and memory space.
-  using KeyTy =
-      std::tuple<ArrayRef<int64_t>, Type, ArrayRef<AffineMap>, unsigned>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(getShape(), elementType, getAffineMaps(), memorySpace);
-  }
-
-  /// Construction.
-  static MemRefTypeStorage *construct(TypeStorageAllocator &allocator,
-                                      const KeyTy &key) {
-    // Copy the shape into the bump pointer.
-    ArrayRef<int64_t> shape = allocator.copyInto(std::get<0>(key));
-
-    // Copy the affine map composition into the bump pointer.
-    ArrayRef<AffineMap> affineMapComposition =
-        allocator.copyInto(std::get<2>(key));
-
-    // Initialize the memory using placement new.
-    return new (allocator.allocate<MemRefTypeStorage>())
-        MemRefTypeStorage(shape.size(), std::get<1>(key), shape.data(),
-                          affineMapComposition.size(),
-                          affineMapComposition.data(), std::get<3>(key));
-  }
-
-  ArrayRef<int64_t> getShape() const {
-    return ArrayRef<int64_t>(shapeElements, getSubclassData());
-  }
-
-  ArrayRef<AffineMap> getAffineMaps() const {
-    return ArrayRef<AffineMap>(affineMapList, numAffineMaps);
-  }
-
-  /// An array of integers which stores the shape dimension sizes.
-  const int64_t *shapeElements;
-  /// The number of affine maps in the 'affineMapList' array.
-  const unsigned numAffineMaps;
-  /// List of affine maps in the memref's layout/index map composition.
-  AffineMap const *affineMapList;
-  /// Memory space in which data referenced by memref resides.
-  const unsigned memorySpace;
-};
-
-/// Unranked MemRef is a MemRef with unknown rank.
-/// Only element type and memory space are known
-struct UnrankedMemRefTypeStorage : public ShapedTypeStorage {
-
-  UnrankedMemRefTypeStorage(Type elementTy, const unsigned memorySpace)
-      : ShapedTypeStorage(elementTy), memorySpace(memorySpace) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = std::tuple<Type, unsigned>;
-  bool operator==(const KeyTy &key) const {
-    return key == KeyTy(elementType, memorySpace);
-  }
-
-  /// Construction.
-  static UnrankedMemRefTypeStorage *construct(TypeStorageAllocator &allocator,
-                                              const KeyTy &key) {
-
-    // Initialize the memory using placement new.
-    return new (allocator.allocate<UnrankedMemRefTypeStorage>())
-        UnrankedMemRefTypeStorage(std::get<0>(key), std::get<1>(key));
-  }
-  /// Memory space in which data referenced by memref resides.
-  const unsigned memorySpace;
-};
-
-/// Complex Type Storage.
-struct ComplexTypeStorage : public TypeStorage {
-  ComplexTypeStorage(Type elementType) : elementType(elementType) {}
-
-  /// The hash key used for uniquing.
-  using KeyTy = Type;
-  bool operator==(const KeyTy &key) const { return key == elementType; }
-
-  /// Construction.
-  static ComplexTypeStorage *construct(TypeStorageAllocator &allocator,
-                                       Type elementType) {
-    return new (allocator.allocate<ComplexTypeStorage>())
-        ComplexTypeStorage(elementType);
-  }
-
-  Type elementType;
-};
-
-/// A type representing a collection of other types.
-struct TupleTypeStorage final
-    : public TypeStorage,
-      public llvm::TrailingObjects<TupleTypeStorage, Type> {
-  using KeyTy = ArrayRef<Type>;
-
-  TupleTypeStorage(unsigned numTypes) : TypeStorage(numTypes) {}
-
-  /// Construction.
-  static TupleTypeStorage *construct(TypeStorageAllocator &allocator,
-                                     ArrayRef<Type> key) {
-    // Allocate a new storage instance.
-    auto byteSize = TupleTypeStorage::totalSizeToAlloc<Type>(key.size());
-    auto rawMem = allocator.allocate(byteSize, alignof(TupleTypeStorage));
-    auto result = ::new (rawMem) TupleTypeStorage(key.size());
-
-    // Copy in the element types into the trailing storage.
-    std::uninitialized_copy(key.begin(), key.end(),
-                            result->getTrailingObjects<Type>());
-    return result;
-  }
-
-  bool operator==(const KeyTy &key) const { return key == getTypes(); }
-
-  /// Return the number of held types.
-  unsigned size() const { return getSubclassData(); }
-
-  /// Return the held types.
-  ArrayRef<Type> getTypes() const {
-    return {getTrailingObjects<Type>(), size()};
-  }
-};
-
-} // namespace detail
-} // namespace mlir
-#endif // TYPEDETAIL_H_
diff --git a/third_party/mlir/lib/IR/TypeUtilities.cpp b/third_party/mlir/lib/IR/TypeUtilities.cpp
deleted file mode 100644
index 54b1bf6329b..00000000000
--- a/third_party/mlir/lib/IR/TypeUtilities.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-//===- TypeUtilities.cpp - Helper function for type queries ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines generic type utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/Types.h"
-#include "mlir/IR/Value.h"
-
-using namespace mlir;
-
-Type mlir::getElementTypeOrSelf(Type type) {
-  if (auto st = type.dyn_cast<ShapedType>())
-    return st.getElementType();
-  return type;
-}
-
-Type mlir::getElementTypeOrSelf(Value *val) {
-  return getElementTypeOrSelf(val->getType());
-}
-
-Type mlir::getElementTypeOrSelf(Value &val) {
-  return getElementTypeOrSelf(val.getType());
-}
-
-Type mlir::getElementTypeOrSelf(Attribute attr) {
-  return getElementTypeOrSelf(attr.getType());
-}
-
-SmallVector<Type, 10> mlir::getFlattenedTypes(TupleType t) {
-  SmallVector<Type, 10> fTypes;
-  t.getFlattenedTypes(fTypes);
-  return fTypes;
-}
-
-/// Return true if the specified type is an opaque type with the specified
-/// dialect and typeData.
-bool mlir::isOpaqueTypeWithName(Type type, StringRef dialect,
-                                StringRef typeData) {
-  if (auto opaque = type.dyn_cast<mlir::OpaqueType>())
-    return opaque.getDialectNamespace().is(dialect) &&
-           opaque.getTypeData() == typeData;
-  return false;
-}
-
-/// Returns success if the given two shapes are compatible. That is, they have
-/// the same size and each pair of the elements are equal or one of them is
-/// dynamic.
-LogicalResult mlir::verifyCompatibleShape(ArrayRef<int64_t> shape1,
-                                          ArrayRef<int64_t> shape2) {
-  if (shape1.size() != shape2.size())
-    return failure();
-  for (const auto &dims : llvm::zip(shape1, shape2)) {
-    int64_t dim1 = std::get<0>(dims);
-    int64_t dim2 = std::get<1>(dims);
-    if (!ShapedType::isDynamic(dim1) && !ShapedType::isDynamic(dim2) &&
-        dim1 != dim2)
-      return failure();
-  }
-  return success();
-}
-
-/// Returns success if the given two types have compatible shape. That is,
-/// they are both scalars (not shaped), or they are both shaped types and at
-/// least one is unranked or they have compatible dimensions. Dimensions are
-/// compatible if at least one is dynamic or both are equal. The element type
-/// does not matter.
-LogicalResult mlir::verifyCompatibleShape(Type type1, Type type2) {
-  auto sType1 = type1.dyn_cast<ShapedType>();
-  auto sType2 = type2.dyn_cast<ShapedType>();
-
-  // Either both or neither type should be shaped.
-  if (!sType1)
-    return success(!sType2);
-  if (!sType2)
-    return failure();
-
-  if (!sType1.hasRank() || !sType2.hasRank())
-    return success();
-
-  return verifyCompatibleShape(sType1.getShape(), sType2.getShape());
-}
-
-OperandElementTypeIterator::OperandElementTypeIterator(
-    Operation::operand_iterator it)
-    : llvm::mapped_iterator<Operation::operand_iterator, Type (*)(Value *)>(
-          it, &unwrap) {}
-
-Type OperandElementTypeIterator::unwrap(Value *value) {
-  return value->getType().cast<ShapedType>().getElementType();
-}
-
-ResultElementTypeIterator::ResultElementTypeIterator(
-    Operation::result_iterator it)
-    : llvm::mapped_iterator<Operation::result_iterator, Type (*)(Value *)>(
-          it, &unwrap) {}
-
-Type ResultElementTypeIterator::unwrap(Value *value) {
-  return value->getType().cast<ShapedType>().getElementType();
-}
diff --git a/third_party/mlir/lib/IR/Types.cpp b/third_party/mlir/lib/IR/Types.cpp
deleted file mode 100644
index 23c80c96aad..00000000000
--- a/third_party/mlir/lib/IR/Types.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===- Types.cpp - MLIR Type Classes --------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Types.h"
-#include "TypeDetail.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Dialect.h"
-#include "llvm/ADT/Twine.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-//===----------------------------------------------------------------------===//
-// Type
-//===----------------------------------------------------------------------===//
-
-unsigned Type::getKind() const { return impl->getKind(); }
-
-/// Get the dialect this type is registered to.
-Dialect &Type::getDialect() const { return impl->getDialect(); }
-
-MLIRContext *Type::getContext() const { return getDialect().getContext(); }
-
-unsigned Type::getSubclassData() const { return impl->getSubclassData(); }
-void Type::setSubclassData(unsigned val) { impl->setSubclassData(val); }
-
-//===----------------------------------------------------------------------===//
-// FunctionType
-//===----------------------------------------------------------------------===//
-
-FunctionType FunctionType::get(ArrayRef<Type> inputs, ArrayRef<Type> results,
-                               MLIRContext *context) {
-  return Base::get(context, Type::Kind::Function, inputs, results);
-}
-
-ArrayRef<Type> FunctionType::getInputs() const {
-  return getImpl()->getInputs();
-}
-
-unsigned FunctionType::getNumResults() const { return getImpl()->numResults; }
-
-ArrayRef<Type> FunctionType::getResults() const {
-  return getImpl()->getResults();
-}
-
-//===----------------------------------------------------------------------===//
-// OpaqueType
-//===----------------------------------------------------------------------===//
-
-OpaqueType OpaqueType::get(Identifier dialect, StringRef typeData,
-                           MLIRContext *context) {
-  return Base::get(context, Type::Kind::Opaque, dialect, typeData);
-}
-
-OpaqueType OpaqueType::getChecked(Identifier dialect, StringRef typeData,
-                                  MLIRContext *context, Location location) {
-  return Base::getChecked(location, context, Kind::Opaque, dialect, typeData);
-}
-
-/// Returns the dialect namespace of the opaque type.
-Identifier OpaqueType::getDialectNamespace() const {
-  return getImpl()->dialectNamespace;
-}
-
-/// Returns the raw type data of the opaque type.
-StringRef OpaqueType::getTypeData() const { return getImpl()->typeData; }
-
-/// Verify the construction of an opaque type.
-LogicalResult OpaqueType::verifyConstructionInvariants(Optional<Location> loc,
-                                                       MLIRContext *context,
-                                                       Identifier dialect,
-                                                       StringRef typeData) {
-  if (!Dialect::isValidNamespace(dialect.strref()))
-    return emitOptionalError(loc, "invalid dialect namespace '", dialect, "'");
-  return success();
-}
diff --git a/third_party/mlir/lib/IR/Value.cpp b/third_party/mlir/lib/IR/Value.cpp
deleted file mode 100644
index 4c2ea5ac69c..00000000000
--- a/third_party/mlir/lib/IR/Value.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===- Value.cpp - MLIR Value Classes -------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Value.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Operation.h"
-using namespace mlir;
-
-/// If this value is the result of an Operation, return the operation that
-/// defines it.
-Operation *Value::getDefiningOp() {
-  if (auto *result = dyn_cast<OpResult>(this))
-    return result->getOwner();
-  return nullptr;
-}
-
-Location Value::getLoc() {
-  if (auto *op = getDefiningOp())
-    return op->getLoc();
-  return UnknownLoc::get(getContext());
-}
-
-/// Return the Region in which this Value is defined.
-Region *Value::getParentRegion() {
-  if (auto *op = getDefiningOp())
-    return op->getParentRegion();
-  return cast<BlockArgument>(this)->getOwner()->getParent();
-}
-
-//===----------------------------------------------------------------------===//
-// IRObjectWithUseList implementation.
-//===----------------------------------------------------------------------===//
-
-/// Replace all uses of 'this' value with the new value, updating anything in
-/// the IR that uses 'this' to use the other value instead.  When this returns
-/// there are zero uses of 'this'.
-void IRObjectWithUseList::replaceAllUsesWith(IRObjectWithUseList *newValue) {
-  assert(this != newValue && "cannot RAUW a value with itself");
-  while (!use_empty()) {
-    use_begin()->set(newValue);
-  }
-}
-
-/// Drop all uses of this object from their respective owners.
-void IRObjectWithUseList::dropAllUses() {
-  while (!use_empty()) {
-    use_begin()->drop();
-  }
-}
diff --git a/third_party/mlir/lib/IR/Visitors.cpp b/third_party/mlir/lib/IR/Visitors.cpp
deleted file mode 100644
index ea2a6d69418..00000000000
--- a/third_party/mlir/lib/IR/Visitors.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-//===- Visitors.cpp - MLIR Visitor Utilties -------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Visitors.h"
-#include "mlir/IR/Operation.h"
-
-using namespace mlir;
-
-/// Walk all of the operations nested under and including the given operations.
-void detail::walkOperations(Operation *op,
-                            function_ref<void(Operation *op)> callback) {
-  // TODO(b/140235992) This walk should be iterative over the operations.
-  for (auto &region : op->getRegions())
-    for (auto &block : region)
-      // Early increment here in the case where the operation is erased.
-      for (auto &nestedOp : llvm::make_early_inc_range(block))
-        walkOperations(&nestedOp, callback);
-
-  callback(op);
-}
-
-/// Walk all of the operations nested under and including the given operations.
-/// This methods walks operations until an interrupt signal is received.
-WalkResult
-detail::walkOperations(Operation *op,
-                       function_ref<WalkResult(Operation *op)> callback) {
-  // TODO(b/140235992) This walk should be iterative over the operations.
-  for (auto &region : op->getRegions()) {
-    for (auto &block : region) {
-      // Early increment here in the case where the operation is erased.
-      for (auto &nestedOp : llvm::make_early_inc_range(block))
-        if (walkOperations(&nestedOp, callback).wasInterrupted())
-          return WalkResult::interrupt();
-    }
-  }
-  return callback(op);
-}
diff --git a/third_party/mlir/lib/Parser/CMakeLists.txt b/third_party/mlir/lib/Parser/CMakeLists.txt
deleted file mode 100644
index 9fd29ae7879..00000000000
--- a/third_party/mlir/lib/Parser/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_llvm_library(MLIRParser
-  Lexer.cpp
-  Parser.cpp
-  Token.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Parser
-  )
-add_dependencies(MLIRParser MLIRIR MLIRAnalysis)
-target_link_libraries(MLIRParser MLIRIR MLIRAnalysis)
diff --git a/third_party/mlir/lib/Parser/Lexer.cpp b/third_party/mlir/lib/Parser/Lexer.cpp
deleted file mode 100644
index 29104c82e23..00000000000
--- a/third_party/mlir/lib/Parser/Lexer.cpp
+++ /dev/null
@@ -1,403 +0,0 @@
-//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the lexer for the MLIR textual form.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Lexer.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-#include "llvm/Support/SourceMgr.h"
-using namespace mlir;
-
-using llvm::SMLoc;
-using llvm::SourceMgr;
-
-// Returns true if 'c' is an allowable punctuation character: [$._-]
-// Returns false otherwise.
-static bool isPunct(char c) {
-  return c == '$' || c == '.' || c == '_' || c == '-';
-}
-
-Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context)
-    : sourceMgr(sourceMgr), context(context) {
-  auto bufferID = sourceMgr.getMainFileID();
-  curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
-  curPtr = curBuffer.begin();
-}
-
-/// Encode the specified source location information into an attribute for
-/// attachment to the IR.
-Location Lexer::getEncodedSourceLocation(llvm::SMLoc loc) {
-  auto &sourceMgr = getSourceMgr();
-  unsigned mainFileID = sourceMgr.getMainFileID();
-  auto lineAndColumn = sourceMgr.getLineAndColumn(loc, mainFileID);
-  auto *buffer = sourceMgr.getMemoryBuffer(mainFileID);
-
-  return FileLineColLoc::get(buffer->getBufferIdentifier(), lineAndColumn.first,
-                             lineAndColumn.second, context);
-}
-
-/// emitError - Emit an error message and return an Token::error token.
-Token Lexer::emitError(const char *loc, const Twine &message) {
-  mlir::emitError(getEncodedSourceLocation(SMLoc::getFromPointer(loc)),
-                  message);
-  return formToken(Token::error, loc);
-}
-
-Token Lexer::lexToken() {
-  while (true) {
-    const char *tokStart = curPtr;
-    switch (*curPtr++) {
-    default:
-      // Handle bare identifiers.
-      if (isalpha(curPtr[-1]))
-        return lexBareIdentifierOrKeyword(tokStart);
-
-      // Unknown character, emit an error.
-      return emitError(tokStart, "unexpected character");
-
-    case ' ':
-    case '\t':
-    case '\n':
-    case '\r':
-      // Handle whitespace.
-      continue;
-
-    case '_':
-      // Handle bare identifiers.
-      return lexBareIdentifierOrKeyword(tokStart);
-
-    case 0:
-      // This may either be a nul character in the source file or may be the EOF
-      // marker that llvm::MemoryBuffer guarantees will be there.
-      if (curPtr - 1 == curBuffer.end())
-        return formToken(Token::eof, tokStart);
-
-      LLVM_FALLTHROUGH;
-    case ':':
-      return formToken(Token::colon, tokStart);
-    case ',':
-      return formToken(Token::comma, tokStart);
-    case '.':
-      return lexEllipsis(tokStart);
-    case '(':
-      return formToken(Token::l_paren, tokStart);
-    case ')':
-      return formToken(Token::r_paren, tokStart);
-    case '{':
-      return formToken(Token::l_brace, tokStart);
-    case '}':
-      return formToken(Token::r_brace, tokStart);
-    case '[':
-      return formToken(Token::l_square, tokStart);
-    case ']':
-      return formToken(Token::r_square, tokStart);
-    case '<':
-      return formToken(Token::less, tokStart);
-    case '>':
-      return formToken(Token::greater, tokStart);
-    case '=':
-      return formToken(Token::equal, tokStart);
-
-    case '+':
-      return formToken(Token::plus, tokStart);
-    case '*':
-      return formToken(Token::star, tokStart);
-    case '-':
-      if (*curPtr == '>') {
-        ++curPtr;
-        return formToken(Token::arrow, tokStart);
-      }
-      return formToken(Token::minus, tokStart);
-
-    case '?':
-      return formToken(Token::question, tokStart);
-
-    case '/':
-      if (*curPtr == '/') {
-        skipComment();
-        continue;
-      }
-      return emitError(tokStart, "unexpected character");
-
-    case '@':
-      return lexAtIdentifier(tokStart);
-
-    case '!':
-      LLVM_FALLTHROUGH;
-    case '^':
-      LLVM_FALLTHROUGH;
-    case '#':
-      LLVM_FALLTHROUGH;
-    case '%':
-      return lexPrefixedIdentifier(tokStart);
-    case '"':
-      return lexString(tokStart);
-
-    case '0':
-    case '1':
-    case '2':
-    case '3':
-    case '4':
-    case '5':
-    case '6':
-    case '7':
-    case '8':
-    case '9':
-      return lexNumber(tokStart);
-    }
-  }
-}
-
-/// Lex an '@foo' identifier.
-///
-///   symbol-ref-id ::= `@` (bare-id | string-literal)
-///
-Token Lexer::lexAtIdentifier(const char *tokStart) {
-  char cur = *curPtr++;
-
-  // Try to parse a string literal, if present.
-  if (cur == '"') {
-    Token stringIdentifier = lexString(curPtr);
-    if (stringIdentifier.is(Token::error))
-      return stringIdentifier;
-    return formToken(Token::at_identifier, tokStart);
-  }
-
-  // Otherwise, these always start with a letter or underscore.
-  if (!isalpha(cur) && cur != '_')
-    return emitError(curPtr - 1,
-                     "@ identifier expected to start with letter or '_'");
-
-  while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
-         *curPtr == '$' || *curPtr == '.')
-    ++curPtr;
-  return formToken(Token::at_identifier, tokStart);
-}
-
-/// Lex a bare identifier or keyword that starts with a letter.
-///
-///   bare-id ::= (letter|[_]) (letter|digit|[_$.])*
-///   integer-type ::= `i[1-9][0-9]*`
-///
-Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
-  // Match the rest of the identifier regex: [0-9a-zA-Z_.$]*
-  while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
-         *curPtr == '$' || *curPtr == '.')
-    ++curPtr;
-
-  // Check to see if this identifier is a keyword.
-  StringRef spelling(tokStart, curPtr - tokStart);
-
-  // Check for i123.
-  if (tokStart[0] == 'i') {
-    bool allDigits = true;
-    for (auto c : spelling.drop_front())
-      allDigits &= isdigit(c) != 0;
-    if (allDigits && spelling.size() != 1)
-      return Token(Token::inttype, spelling);
-  }
-
-  Token::Kind kind = llvm::StringSwitch<Token::Kind>(spelling)
-#define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING)
-#include "TokenKinds.def"
-                         .Default(Token::bare_identifier);
-
-  return Token(kind, spelling);
-}
-
-/// Skip a comment line, starting with a '//'.
-///
-///   TODO: add a regex for comments here and to the spec.
-///
-void Lexer::skipComment() {
-  // Advance over the second '/' in a '//' comment.
-  assert(*curPtr == '/');
-  ++curPtr;
-
-  while (true) {
-    switch (*curPtr++) {
-    case '\n':
-    case '\r':
-      // Newline is end of comment.
-      return;
-    case 0:
-      // If this is the end of the buffer, end the comment.
-      if (curPtr - 1 == curBuffer.end()) {
-        --curPtr;
-        return;
-      }
-      LLVM_FALLTHROUGH;
-    default:
-      // Skip over other characters.
-      break;
-    }
-  }
-}
-
-/// Lex an ellipsis.
-///
-///   ellipsis ::= '...'
-///
-Token Lexer::lexEllipsis(const char *tokStart) {
-  assert(curPtr[-1] == '.');
-
-  if (curPtr == curBuffer.end() || *curPtr != '.' || *(curPtr + 1) != '.')
-    return emitError(curPtr, "expected three consecutive dots for an ellipsis");
-
-  curPtr += 2;
-  return formToken(Token::ellipsis, tokStart);
-}
-
-/// Lex a number literal.
-///
-///   integer-literal ::= digit+ | `0x` hex_digit+
-///   float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
-///
-Token Lexer::lexNumber(const char *tokStart) {
-  assert(isdigit(curPtr[-1]));
-
-  // Handle the hexadecimal case.
-  if (curPtr[-1] == '0' && *curPtr == 'x') {
-    // If we see stuff like 0xi32, this is a literal `0` followed by an
-    // identifier `xi32`, stop after `0`.
-    if (!isxdigit(curPtr[1]))
-      return formToken(Token::integer, tokStart);
-
-    curPtr += 2;
-    while (isxdigit(*curPtr))
-      ++curPtr;
-
-    return formToken(Token::integer, tokStart);
-  }
-
-  // Handle the normal decimal case.
-  while (isdigit(*curPtr))
-    ++curPtr;
-
-  if (*curPtr != '.')
-    return formToken(Token::integer, tokStart);
-  ++curPtr;
-
-  // Skip over [0-9]*([eE][-+]?[0-9]+)?
-  while (isdigit(*curPtr))
-    ++curPtr;
-
-  if (*curPtr == 'e' || *curPtr == 'E') {
-    if (isdigit(static_cast<unsigned char>(curPtr[1])) ||
-        ((curPtr[1] == '-' || curPtr[1] == '+') &&
-         isdigit(static_cast<unsigned char>(curPtr[2])))) {
-      curPtr += 2;
-      while (isdigit(*curPtr))
-        ++curPtr;
-    }
-  }
-  return formToken(Token::floatliteral, tokStart);
-}
-
-/// Lex an identifier that starts with a prefix followed by suffix-id.
-///
-///   attribute-id  ::= `#` suffix-id
-///   ssa-id        ::= '%' suffix-id
-///   block-id      ::= '^' suffix-id
-///   type-id       ::= '!' suffix-id
-///   suffix-id     ::= digit+ | (letter|id-punct) (letter|id-punct|digit)*
-///   id-punct      ::= `$` | `.` | `_` | `-`
-///
-Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
-  Token::Kind kind;
-  StringRef errorKind;
-  switch (*tokStart) {
-  case '#':
-    kind = Token::hash_identifier;
-    errorKind = "invalid attribute name";
-    break;
-  case '%':
-    kind = Token::percent_identifier;
-    errorKind = "invalid SSA name";
-    break;
-  case '^':
-    kind = Token::caret_identifier;
-    errorKind = "invalid block name";
-    break;
-  case '!':
-    kind = Token::exclamation_identifier;
-    errorKind = "invalid type identifier";
-    break;
-  default:
-    llvm_unreachable("invalid caller");
-  }
-
-  // Parse suffix-id.
-  if (isdigit(*curPtr)) {
-    // If suffix-id starts with a digit, the rest must be digits.
-    while (isdigit(*curPtr)) {
-      ++curPtr;
-    }
-  } else if (isalpha(*curPtr) || isPunct(*curPtr)) {
-    do {
-      ++curPtr;
-    } while (isalpha(*curPtr) || isdigit(*curPtr) || isPunct(*curPtr));
-  } else {
-    return emitError(curPtr - 1, errorKind);
-  }
-
-  return formToken(kind, tokStart);
-}
-
-/// Lex a string literal.
-///
-///   string-literal ::= '"' [^"\n\f\v\r]* '"'
-///
-/// TODO: define escaping rules.
-Token Lexer::lexString(const char *tokStart) {
-  assert(curPtr[-1] == '"');
-
-  while (true) {
-    switch (*curPtr++) {
-    case '"':
-      return formToken(Token::string, tokStart);
-    case 0:
-      // If this is a random nul character in the middle of a string, just
-      // include it.  If it is the end of file, then it is an error.
-      if (curPtr - 1 != curBuffer.end())
-        continue;
-      LLVM_FALLTHROUGH;
-    case '\n':
-    case '\v':
-    case '\f':
-      return emitError(curPtr - 1, "expected '\"' in string literal");
-    case '\\':
-      // Handle explicitly a few escapes.
-      if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' || *curPtr == 't')
-        ++curPtr;
-      else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1]))
-        // Support \xx for two hex digits.
-        curPtr += 2;
-      else
-        return emitError(curPtr - 1, "unknown escape in string literal");
-      continue;
-
-    default:
-      continue;
-    }
-  }
-}
diff --git a/third_party/mlir/lib/Parser/Lexer.h b/third_party/mlir/lib/Parser/Lexer.h
deleted file mode 100644
index a7a2ac4214c..00000000000
--- a/third_party/mlir/lib/Parser/Lexer.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===- Lexer.h - MLIR Lexer Interface ---------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file declares the MLIR Lexer class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_LIB_PARSER_LEXER_H
-#define MLIR_LIB_PARSER_LEXER_H
-
-#include "Token.h"
-#include "mlir/Parser.h"
-
-namespace mlir {
-class Location;
-
-/// This class breaks up the current file into a token stream.
-class Lexer {
-public:
-  explicit Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context);
-
-  const llvm::SourceMgr &getSourceMgr() { return sourceMgr; }
-
-  Token lexToken();
-
-  /// Encode the specified source location information into a Location object
-  /// for attachment to the IR or error reporting.
-  Location getEncodedSourceLocation(llvm::SMLoc loc);
-
-  /// Change the position of the lexer cursor.  The next token we lex will start
-  /// at the designated point in the input.
-  void resetPointer(const char *newPointer) { curPtr = newPointer; }
-
-  /// Returns the start of the buffer.
-  const char *getBufferBegin() { return curBuffer.data(); }
-
-private:
-  // Helpers.
-  Token formToken(Token::Kind kind, const char *tokStart) {
-    return Token(kind, StringRef(tokStart, curPtr - tokStart));
-  }
-
-  Token emitError(const char *loc, const Twine &message);
-
-  // Lexer implementation methods.
-  Token lexAtIdentifier(const char *tokStart);
-  Token lexBareIdentifierOrKeyword(const char *tokStart);
-  Token lexEllipsis(const char *tokStart);
-  Token lexNumber(const char *tokStart);
-  Token lexPrefixedIdentifier(const char *tokStart);
-  Token lexString(const char *tokStart);
-
-  /// Skip a comment line, starting with a '//'.
-  void skipComment();
-
-  const llvm::SourceMgr &sourceMgr;
-  MLIRContext *context;
-
-  StringRef curBuffer;
-  const char *curPtr;
-
-  Lexer(const Lexer &) = delete;
-  void operator=(const Lexer &) = delete;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_LIB_PARSER_LEXER_H
diff --git a/third_party/mlir/lib/Parser/Parser.cpp b/third_party/mlir/lib/Parser/Parser.cpp
deleted file mode 100644
index ddc8d0191f5..00000000000
--- a/third_party/mlir/lib/Parser/Parser.cpp
+++ /dev/null
@@ -1,4829 +0,0 @@
-//===- Parser.cpp - MLIR Parser Implementation ----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the parser for the MLIR textual form.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Parser.h"
-#include "Lexer.h"
-#include "mlir/Analysis/Verifier.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/bit.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/SourceMgr.h"
-#include <algorithm>
-using namespace mlir;
-using llvm::MemoryBuffer;
-using llvm::SMLoc;
-using llvm::SourceMgr;
-
-namespace {
-class Parser;
-
-//===----------------------------------------------------------------------===//
-// SymbolState
-//===----------------------------------------------------------------------===//
-
-/// This class contains record of any parsed top-level symbols.
-struct SymbolState {
-  // A map from attribute alias identifier to Attribute.
-  llvm::StringMap<Attribute> attributeAliasDefinitions;
-
-  // A map from type alias identifier to Type.
-  llvm::StringMap<Type> typeAliasDefinitions;
-
-  /// A set of locations into the main parser memory buffer for each of the
-  /// active nested parsers. Given that some nested parsers, i.e. custom dialect
-  /// parsers, operate on a temporary memory buffer, this provides an anchor
-  /// point for emitting diagnostics.
-  SmallVector<llvm::SMLoc, 1> nestedParserLocs;
-
-  /// The top-level lexer that contains the original memory buffer provided by
-  /// the user. This is used by nested parsers to get a properly encoded source
-  /// location.
-  Lexer *topLevelLexer = nullptr;
-};
-
-//===----------------------------------------------------------------------===//
-// ParserState
-//===----------------------------------------------------------------------===//
-
-/// This class refers to all of the state maintained globally by the parser,
-/// such as the current lexer position etc.
-struct ParserState {
-  ParserState(const llvm::SourceMgr &sourceMgr, MLIRContext *ctx,
-              SymbolState &symbols)
-      : context(ctx), lex(sourceMgr, ctx), curToken(lex.lexToken()),
-        symbols(symbols), parserDepth(symbols.nestedParserLocs.size()) {
-    // Set the top level lexer for the symbol state if one doesn't exist.
-    if (!symbols.topLevelLexer)
-      symbols.topLevelLexer = &lex;
-  }
-  ~ParserState() {
-    // Reset the top level lexer if it refers the lexer in our state.
-    if (symbols.topLevelLexer == &lex)
-      symbols.topLevelLexer = nullptr;
-  }
-  ParserState(const ParserState &) = delete;
-  void operator=(const ParserState &) = delete;
-
-  /// The context we're parsing into.
-  MLIRContext *const context;
-
-  /// The lexer for the source file we're parsing.
-  Lexer lex;
-
-  /// This is the next token that hasn't been consumed yet.
-  Token curToken;
-
-  /// The current state for symbol parsing.
-  SymbolState &symbols;
-
-  /// The depth of this parser in the nested parsing stack.
-  size_t parserDepth;
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// This class implement support for parsing global entities like types and
-/// shared entities like SSA names.  It is intended to be subclassed by
-/// specialized subparsers that include state, e.g. when a local symbol table.
-class Parser {
-public:
-  Builder builder;
-
-  Parser(ParserState &state) : builder(state.context), state(state) {}
-
-  // Helper methods to get stuff from the parser-global state.
-  ParserState &getState() const { return state; }
-  MLIRContext *getContext() const { return state.context; }
-  const llvm::SourceMgr &getSourceMgr() { return state.lex.getSourceMgr(); }
-
-  /// Parse a comma-separated list of elements up until the specified end token.
-  ParseResult
-  parseCommaSeparatedListUntil(Token::Kind rightToken,
-                               const std::function<ParseResult()> &parseElement,
-                               bool allowEmptyList = true);
-
-  /// Parse a comma separated list of elements that must have at least one entry
-  /// in it.
-  ParseResult
-  parseCommaSeparatedList(const std::function<ParseResult()> &parseElement);
-
-  ParseResult parsePrettyDialectSymbolName(StringRef &prettyName);
-
-  // We have two forms of parsing methods - those that return a non-null
-  // pointer on success, and those that return a ParseResult to indicate whether
-  // they returned a failure.  The second class fills in by-reference arguments
-  // as the results of their action.
-
-  //===--------------------------------------------------------------------===//
-  // Error Handling
-  //===--------------------------------------------------------------------===//
-
-  /// Emit an error and return failure.
-  InFlightDiagnostic emitError(const Twine &message = {}) {
-    return emitError(state.curToken.getLoc(), message);
-  }
-  InFlightDiagnostic emitError(SMLoc loc, const Twine &message = {});
-
-  /// Encode the specified source location information into an attribute for
-  /// attachment to the IR.
-  Location getEncodedSourceLocation(llvm::SMLoc loc) {
-    // If there are no active nested parsers, we can get the encoded source
-    // location directly.
-    if (state.parserDepth == 0)
-      return state.lex.getEncodedSourceLocation(loc);
-    // Otherwise, we need to re-encode it to point to the top level buffer.
-    return state.symbols.topLevelLexer->getEncodedSourceLocation(
-        remapLocationToTopLevelBuffer(loc));
-  }
-
-  /// Remaps the given SMLoc to the top level lexer of the parser. This is used
-  /// to adjust locations of potentially nested parsers to ensure that they can
-  /// be emitted properly as diagnostics.
-  llvm::SMLoc remapLocationToTopLevelBuffer(llvm::SMLoc loc) {
-    // If there are no active nested parsers, we can return location directly.
-    SymbolState &symbols = state.symbols;
-    if (state.parserDepth == 0)
-      return loc;
-    assert(symbols.topLevelLexer && "expected valid top-level lexer");
-
-    // Otherwise, we need to remap the location to the main parser. This is
-    // simply offseting the location onto the location of the last nested
-    // parser.
-    size_t offset = loc.getPointer() - state.lex.getBufferBegin();
-    auto *rawLoc =
-        symbols.nestedParserLocs[state.parserDepth - 1].getPointer() + offset;
-    return llvm::SMLoc::getFromPointer(rawLoc);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Token Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Return the current token the parser is inspecting.
-  const Token &getToken() const { return state.curToken; }
-  StringRef getTokenSpelling() const { return state.curToken.getSpelling(); }
-
-  /// If the current token has the specified kind, consume it and return true.
-  /// If not, return false.
-  bool consumeIf(Token::Kind kind) {
-    if (state.curToken.isNot(kind))
-      return false;
-    consumeToken(kind);
-    return true;
-  }
-
-  /// Advance the current lexer onto the next token.
-  void consumeToken() {
-    assert(state.curToken.isNot(Token::eof, Token::error) &&
-           "shouldn't advance past EOF or errors");
-    state.curToken = state.lex.lexToken();
-  }
-
-  /// Advance the current lexer onto the next token, asserting what the expected
-  /// current token is.  This is preferred to the above method because it leads
-  /// to more self-documenting code with better checking.
-  void consumeToken(Token::Kind kind) {
-    assert(state.curToken.is(kind) && "consumed an unexpected token");
-    consumeToken();
-  }
-
-  /// Consume the specified token if present and return success.  On failure,
-  /// output a diagnostic and return failure.
-  ParseResult parseToken(Token::Kind expectedToken, const Twine &message);
-
-  //===--------------------------------------------------------------------===//
-  // Type Parsing
-  //===--------------------------------------------------------------------===//
-
-  ParseResult parseFunctionResultTypes(SmallVectorImpl<Type> &elements);
-  ParseResult parseTypeListNoParens(SmallVectorImpl<Type> &elements);
-  ParseResult parseTypeListParens(SmallVectorImpl<Type> &elements);
-
-  /// Parse an arbitrary type.
-  Type parseType();
-
-  /// Parse a complex type.
-  Type parseComplexType();
-
-  /// Parse an extended type.
-  Type parseExtendedType();
-
-  /// Parse a function type.
-  Type parseFunctionType();
-
-  /// Parse a memref type.
-  Type parseMemRefType();
-
-  /// Parse a non function type.
-  Type parseNonFunctionType();
-
-  /// Parse a tensor type.
-  Type parseTensorType();
-
-  /// Parse a tuple type.
-  Type parseTupleType();
-
-  /// Parse a vector type.
-  VectorType parseVectorType();
-  ParseResult parseDimensionListRanked(SmallVectorImpl<int64_t> &dimensions,
-                                       bool allowDynamic = true);
-  ParseResult parseXInDimensionList();
-
-  /// Parse strided layout specification.
-  ParseResult parseStridedLayout(int64_t &offset,
-                                 SmallVectorImpl<int64_t> &strides);
-
-  // Parse a brace-delimiter list of comma-separated integers with `?` as an
-  // unknown marker.
-  ParseResult parseStrideList(SmallVectorImpl<int64_t> &dimensions);
-
-  //===--------------------------------------------------------------------===//
-  // Attribute Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an arbitrary attribute with an optional type.
-  Attribute parseAttribute(Type type = {});
-
-  /// Parse an attribute dictionary.
-  ParseResult parseAttributeDict(SmallVectorImpl<NamedAttribute> &attributes);
-
-  /// Parse an extended attribute.
-  Attribute parseExtendedAttr(Type type);
-
-  /// Parse a float attribute.
-  Attribute parseFloatAttr(Type type, bool isNegative);
-
-  /// Parse a decimal or a hexadecimal literal, which can be either an integer
-  /// or a float attribute.
-  Attribute parseDecOrHexAttr(Type type, bool isNegative);
-
-  /// Parse an opaque elements attribute.
-  Attribute parseOpaqueElementsAttr();
-
-  /// Parse a dense elements attribute.
-  Attribute parseDenseElementsAttr();
-  ShapedType parseElementsLiteralType();
-
-  /// Parse a sparse elements attribute.
-  Attribute parseSparseElementsAttr();
-
-  //===--------------------------------------------------------------------===//
-  // Location Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an inline location.
-  ParseResult parseLocation(LocationAttr &loc);
-
-  /// Parse a raw location instance.
-  ParseResult parseLocationInstance(LocationAttr &loc);
-
-  /// Parse a callsite location instance.
-  ParseResult parseCallSiteLocation(LocationAttr &loc);
-
-  /// Parse a fused location instance.
-  ParseResult parseFusedLocation(LocationAttr &loc);
-
-  /// Parse a name or FileLineCol location instance.
-  ParseResult parseNameOrFileLineColLocation(LocationAttr &loc);
-
-  /// Parse an optional trailing location.
-  ///
-  ///   trailing-location     ::= (`loc` `(` location `)`)?
-  ///
-  ParseResult parseOptionalTrailingLocation(Location &loc) {
-    // If there is a 'loc' we parse a trailing location.
-    if (!getToken().is(Token::kw_loc))
-      return success();
-
-    // Parse the location.
-    LocationAttr directLoc;
-    if (parseLocation(directLoc))
-      return failure();
-    loc = directLoc;
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Affine Parsing
-  //===--------------------------------------------------------------------===//
-
-  ParseResult parseAffineMapOrIntegerSetReference(AffineMap &map,
-                                                  IntegerSet &set);
-
-  /// Parse an AffineMap where the dim and symbol identifiers are SSA ids.
-  ParseResult
-  parseAffineMapOfSSAIds(AffineMap &map,
-                         llvm::function_ref<ParseResult(bool)> parseElement);
-
-private:
-  /// The Parser is subclassed and reinstantiated.  Do not add additional
-  /// non-trivial state here, add it to the ParserState class.
-  ParserState &state;
-};
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// Helper methods.
-//===----------------------------------------------------------------------===//
-
-/// Parse a comma separated list of elements that must have at least one entry
-/// in it.
-ParseResult Parser::parseCommaSeparatedList(
-    const std::function<ParseResult()> &parseElement) {
-  // Non-empty case starts with an element.
-  if (parseElement())
-    return failure();
-
-  // Otherwise we have a list of comma separated elements.
-  while (consumeIf(Token::comma)) {
-    if (parseElement())
-      return failure();
-  }
-  return success();
-}
-
-/// Parse a comma-separated list of elements, terminated with an arbitrary
-/// token.  This allows empty lists if allowEmptyList is true.
-///
-///   abstract-list ::= rightToken                  // if allowEmptyList == true
-///   abstract-list ::= element (',' element)* rightToken
-///
-ParseResult Parser::parseCommaSeparatedListUntil(
-    Token::Kind rightToken, const std::function<ParseResult()> &parseElement,
-    bool allowEmptyList) {
-  // Handle the empty case.
-  if (getToken().is(rightToken)) {
-    if (!allowEmptyList)
-      return emitError("expected list element");
-    consumeToken(rightToken);
-    return success();
-  }
-
-  if (parseCommaSeparatedList(parseElement) ||
-      parseToken(rightToken, "expected ',' or '" +
-                                 Token::getTokenSpelling(rightToken) + "'"))
-    return failure();
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// DialectAsmParser
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This class provides the main implementation of the DialectAsmParser that
-/// allows for dialects to parse attributes and types. This allows for dialect
-/// hooking into the main MLIR parsing logic.
-class CustomDialectAsmParser : public DialectAsmParser {
-public:
-  CustomDialectAsmParser(StringRef fullSpec, Parser &parser)
-      : fullSpec(fullSpec), nameLoc(parser.getToken().getLoc()),
-        parser(parser) {}
-  ~CustomDialectAsmParser() override {}
-
-  /// Emit a diagnostic at the specified location and return failure.
-  InFlightDiagnostic emitError(llvm::SMLoc loc, const Twine &message) override {
-    return parser.emitError(loc, message);
-  }
-
-  /// Return a builder which provides useful access to MLIRContext, global
-  /// objects like types and attributes.
-  Builder &getBuilder() const override { return parser.builder; }
-
-  /// Get the location of the next token and store it into the argument.  This
-  /// always succeeds.
-  llvm::SMLoc getCurrentLocation() override {
-    return parser.getToken().getLoc();
-  }
-
-  /// Return the location of the original name token.
-  llvm::SMLoc getNameLoc() const override { return nameLoc; }
-
-  /// Re-encode the given source location as an MLIR location and return it.
-  Location getEncodedSourceLoc(llvm::SMLoc loc) override {
-    return parser.getEncodedSourceLocation(loc);
-  }
-
-  /// Returns the full specification of the symbol being parsed. This allows
-  /// for using a separate parser if necessary.
-  StringRef getFullSymbolSpec() const override { return fullSpec; }
-
-  /// Parse a floating point value from the stream.
-  ParseResult parseFloat(double &result) override {
-    bool negative = parser.consumeIf(Token::minus);
-    Token curTok = parser.getToken();
-
-    // Check for a floating point value.
-    if (curTok.is(Token::floatliteral)) {
-      auto val = curTok.getFloatingPointValue();
-      if (!val.hasValue())
-        return emitError(curTok.getLoc(), "floating point value too large");
-      parser.consumeToken(Token::floatliteral);
-      result = negative ? -*val : *val;
-      return success();
-    }
-
-    // TODO(riverriddle) support hex floating point values.
-    return emitError(getCurrentLocation(), "expected floating point literal");
-  }
-
-  /// Parse an optional integer value from the stream.
-  OptionalParseResult parseOptionalInteger(uint64_t &result) override {
-    Token curToken = parser.getToken();
-    if (curToken.isNot(Token::integer, Token::minus))
-      return llvm::None;
-
-    bool negative = parser.consumeIf(Token::minus);
-    Token curTok = parser.getToken();
-    if (parser.parseToken(Token::integer, "expected integer value"))
-      return failure();
-
-    auto val = curTok.getUInt64IntegerValue();
-    if (!val)
-      return emitError(curTok.getLoc(), "integer value too large");
-    result = negative ? -*val : *val;
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Token Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a `->` token.
-  ParseResult parseArrow() override {
-    return parser.parseToken(Token::arrow, "expected '->'");
-  }
-
-  /// Parses a `->` if present.
-  ParseResult parseOptionalArrow() override {
-    return success(parser.consumeIf(Token::arrow));
-  }
-
-  /// Parse a '{' token.
-  ParseResult parseLBrace() override {
-    return parser.parseToken(Token::l_brace, "expected '{'");
-  }
-
-  /// Parse a '{' token if present
-  ParseResult parseOptionalLBrace() override {
-    return success(parser.consumeIf(Token::l_brace));
-  }
-
-  /// Parse a `}` token.
-  ParseResult parseRBrace() override {
-    return parser.parseToken(Token::r_brace, "expected '}'");
-  }
-
-  /// Parse a `}` token if present
-  ParseResult parseOptionalRBrace() override {
-    return success(parser.consumeIf(Token::r_brace));
-  }
-
-  /// Parse a `:` token.
-  ParseResult parseColon() override {
-    return parser.parseToken(Token::colon, "expected ':'");
-  }
-
-  /// Parse a `:` token if present.
-  ParseResult parseOptionalColon() override {
-    return success(parser.consumeIf(Token::colon));
-  }
-
-  /// Parse a `,` token.
-  ParseResult parseComma() override {
-    return parser.parseToken(Token::comma, "expected ','");
-  }
-
-  /// Parse a `,` token if present.
-  ParseResult parseOptionalComma() override {
-    return success(parser.consumeIf(Token::comma));
-  }
-
-  /// Parses a `...` if present.
-  ParseResult parseOptionalEllipsis() override {
-    return success(parser.consumeIf(Token::ellipsis));
-  }
-
-  /// Parse a `=` token.
-  ParseResult parseEqual() override {
-    return parser.parseToken(Token::equal, "expected '='");
-  }
-
-  /// Parse a '<' token.
-  ParseResult parseLess() override {
-    return parser.parseToken(Token::less, "expected '<'");
-  }
-
-  /// Parse a `<` token if present.
-  ParseResult parseOptionalLess() override {
-    return success(parser.consumeIf(Token::less));
-  }
-
-  /// Parse a '>' token.
-  ParseResult parseGreater() override {
-    return parser.parseToken(Token::greater, "expected '>'");
-  }
-
-  /// Parse a `>` token if present.
-  ParseResult parseOptionalGreater() override {
-    return success(parser.consumeIf(Token::greater));
-  }
-
-  /// Parse a `(` token.
-  ParseResult parseLParen() override {
-    return parser.parseToken(Token::l_paren, "expected '('");
-  }
-
-  /// Parses a '(' if present.
-  ParseResult parseOptionalLParen() override {
-    return success(parser.consumeIf(Token::l_paren));
-  }
-
-  /// Parse a `)` token.
-  ParseResult parseRParen() override {
-    return parser.parseToken(Token::r_paren, "expected ')'");
-  }
-
-  /// Parses a ')' if present.
-  ParseResult parseOptionalRParen() override {
-    return success(parser.consumeIf(Token::r_paren));
-  }
-
-  /// Parse a `[` token.
-  ParseResult parseLSquare() override {
-    return parser.parseToken(Token::l_square, "expected '['");
-  }
-
-  /// Parses a '[' if present.
-  ParseResult parseOptionalLSquare() override {
-    return success(parser.consumeIf(Token::l_square));
-  }
-
-  /// Parse a `]` token.
-  ParseResult parseRSquare() override {
-    return parser.parseToken(Token::r_square, "expected ']'");
-  }
-
-  /// Parses a ']' if present.
-  ParseResult parseOptionalRSquare() override {
-    return success(parser.consumeIf(Token::r_square));
-  }
-
-  /// Parses a '?' if present.
-  ParseResult parseOptionalQuestion() override {
-    return success(parser.consumeIf(Token::question));
-  }
-
-  /// Parses a '*' if present.
-  ParseResult parseOptionalStar() override {
-    return success(parser.consumeIf(Token::star));
-  }
-
-  /// Returns if the current token corresponds to a keyword.
-  bool isCurrentTokenAKeyword() const {
-    return parser.getToken().is(Token::bare_identifier) ||
-           parser.getToken().isKeyword();
-  }
-
-  /// Parse the given keyword if present.
-  ParseResult parseOptionalKeyword(StringRef keyword) override {
-    // Check that the current token has the same spelling.
-    if (!isCurrentTokenAKeyword() || parser.getTokenSpelling() != keyword)
-      return failure();
-    parser.consumeToken();
-    return success();
-  }
-
-  /// Parse a keyword, if present, into 'keyword'.
-  ParseResult parseOptionalKeyword(StringRef *keyword) override {
-    // Check that the current token is a keyword.
-    if (!isCurrentTokenAKeyword())
-      return failure();
-
-    *keyword = parser.getTokenSpelling();
-    parser.consumeToken();
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Attribute Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an arbitrary attribute and return it in result.
-  ParseResult parseAttribute(Attribute &result, Type type) override {
-    result = parser.parseAttribute(type);
-    return success(static_cast<bool>(result));
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Type Parsing
-  //===--------------------------------------------------------------------===//
-
-  ParseResult parseType(Type &result) override {
-    result = parser.parseType();
-    return success(static_cast<bool>(result));
-  }
-
-  ParseResult parseDimensionList(SmallVectorImpl<int64_t> &dimensions,
-                                 bool allowDynamic) override {
-    return parser.parseDimensionListRanked(dimensions, allowDynamic);
-  }
-
-private:
-  /// The full symbol specification.
-  StringRef fullSpec;
-
-  /// The source location of the dialect symbol.
-  SMLoc nameLoc;
-
-  /// The main parser.
-  Parser &parser;
-};
-} // namespace
-
-/// Parse the body of a pretty dialect symbol, which starts and ends with <>'s,
-/// and may be recursive.  Return with the 'prettyName' StringRef encompassing
-/// the entire pretty name.
-///
-///   pretty-dialect-sym-body ::= '<' pretty-dialect-sym-contents+ '>'
-///   pretty-dialect-sym-contents ::= pretty-dialect-sym-body
-///                                  | '(' pretty-dialect-sym-contents+ ')'
-///                                  | '[' pretty-dialect-sym-contents+ ']'
-///                                  | '{' pretty-dialect-sym-contents+ '}'
-///                                  | '[^[<({>\])}\0]+'
-///
-ParseResult Parser::parsePrettyDialectSymbolName(StringRef &prettyName) {
-  // Pretty symbol names are a relatively unstructured format that contains a
-  // series of properly nested punctuation, with anything else in the middle.
-  // Scan ahead to find it and consume it if successful, otherwise emit an
-  // error.
-  auto *curPtr = getTokenSpelling().data();
-
-  SmallVector<char, 8> nestedPunctuation;
-
-  // Scan over the nested punctuation, bailing out on error and consuming until
-  // we find the end.  We know that we're currently looking at the '<', so we
-  // can go until we find the matching '>' character.
-  assert(*curPtr == '<');
-  do {
-    char c = *curPtr++;
-    switch (c) {
-    case '\0':
-      // This also handles the EOF case.
-      return emitError("unexpected nul or EOF in pretty dialect name");
-    case '<':
-    case '[':
-    case '(':
-    case '{':
-      nestedPunctuation.push_back(c);
-      continue;
-
-    case '-':
-      // The sequence `->` is treated as special token.
-      if (*curPtr == '>')
-        ++curPtr;
-      continue;
-
-    case '>':
-      if (nestedPunctuation.pop_back_val() != '<')
-        return emitError("unbalanced '>' character in pretty dialect name");
-      break;
-    case ']':
-      if (nestedPunctuation.pop_back_val() != '[')
-        return emitError("unbalanced ']' character in pretty dialect name");
-      break;
-    case ')':
-      if (nestedPunctuation.pop_back_val() != '(')
-        return emitError("unbalanced ')' character in pretty dialect name");
-      break;
-    case '}':
-      if (nestedPunctuation.pop_back_val() != '{')
-        return emitError("unbalanced '}' character in pretty dialect name");
-      break;
-
-    default:
-      continue;
-    }
-  } while (!nestedPunctuation.empty());
-
-  // Ok, we succeeded, remember where we stopped, reset the lexer to know it is
-  // consuming all this stuff, and return.
-  state.lex.resetPointer(curPtr);
-
-  unsigned length = curPtr - prettyName.begin();
-  prettyName = StringRef(prettyName.begin(), length);
-  consumeToken();
-  return success();
-}
-
-/// Parse an extended dialect symbol.
-template <typename Symbol, typename SymbolAliasMap, typename CreateFn>
-static Symbol parseExtendedSymbol(Parser &p, Token::Kind identifierTok,
-                                  SymbolAliasMap &aliases,
-                                  CreateFn &&createSymbol) {
-  // Parse the dialect namespace.
-  StringRef identifier = p.getTokenSpelling().drop_front();
-  auto loc = p.getToken().getLoc();
-  p.consumeToken(identifierTok);
-
-  // If there is no '<' token following this, and if the typename contains no
-  // dot, then we are parsing a symbol alias.
-  if (p.getToken().isNot(Token::less) && !identifier.contains('.')) {
-    // Check for an alias for this type.
-    auto aliasIt = aliases.find(identifier);
-    if (aliasIt == aliases.end())
-      return (p.emitError("undefined symbol alias id '" + identifier + "'"),
-              nullptr);
-    return aliasIt->second;
-  }
-
-  // Otherwise, we are parsing a dialect-specific symbol.  If the name contains
-  // a dot, then this is the "pretty" form.  If not, it is the verbose form that
-  // looks like <"...">.
-  std::string symbolData;
-  auto dialectName = identifier;
-
-  // Handle the verbose form, where "identifier" is a simple dialect name.
-  if (!identifier.contains('.')) {
-    // Consume the '<'.
-    if (p.parseToken(Token::less, "expected '<' in dialect type"))
-      return nullptr;
-
-    // Parse the symbol specific data.
-    if (p.getToken().isNot(Token::string))
-      return (p.emitError("expected string literal data in dialect symbol"),
-              nullptr);
-    symbolData = p.getToken().getStringValue();
-    loc = llvm::SMLoc::getFromPointer(p.getToken().getLoc().getPointer() + 1);
-    p.consumeToken(Token::string);
-
-    // Consume the '>'.
-    if (p.parseToken(Token::greater, "expected '>' in dialect symbol"))
-      return nullptr;
-  } else {
-    // Ok, the dialect name is the part of the identifier before the dot, the
-    // part after the dot is the dialect's symbol, or the start thereof.
-    auto dotHalves = identifier.split('.');
-    dialectName = dotHalves.first;
-    auto prettyName = dotHalves.second;
-    loc = llvm::SMLoc::getFromPointer(prettyName.data());
-
-    // If the dialect's symbol is followed immediately by a <, then lex the body
-    // of it into prettyName.
-    if (p.getToken().is(Token::less) &&
-        prettyName.bytes_end() == p.getTokenSpelling().bytes_begin()) {
-      if (p.parsePrettyDialectSymbolName(prettyName))
-        return nullptr;
-    }
-
-    symbolData = prettyName.str();
-  }
-
-  // Record the name location of the type remapped to the top level buffer.
-  llvm::SMLoc locInTopLevelBuffer = p.remapLocationToTopLevelBuffer(loc);
-  p.getState().symbols.nestedParserLocs.push_back(locInTopLevelBuffer);
-
-  // Call into the provided symbol construction function.
-  Symbol sym = createSymbol(dialectName, symbolData, loc);
-
-  // Pop the last parser location.
-  p.getState().symbols.nestedParserLocs.pop_back();
-  return sym;
-}
-
-/// Parses a symbol, of type 'T', and returns it if parsing was successful. If
-/// parsing failed, nullptr is returned. The number of bytes read from the input
-/// string is returned in 'numRead'.
-template <typename T, typename ParserFn>
-static T parseSymbol(llvm::StringRef inputStr, MLIRContext *context,
-                     SymbolState &symbolState, ParserFn &&parserFn,
-                     size_t *numRead = nullptr) {
-  SourceMgr sourceMgr;
-  auto memBuffer = MemoryBuffer::getMemBuffer(
-      inputStr, /*BufferName=*/"<mlir_parser_buffer>",
-      /*RequiresNullTerminator=*/false);
-  sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
-  ParserState state(sourceMgr, context, symbolState);
-  Parser parser(state);
-
-  Token startTok = parser.getToken();
-  T symbol = parserFn(parser);
-  if (!symbol)
-    return T();
-
-  // If 'numRead' is valid, then provide the number of bytes that were read.
-  Token endTok = parser.getToken();
-  if (numRead) {
-    *numRead = static_cast<size_t>(endTok.getLoc().getPointer() -
-                                   startTok.getLoc().getPointer());
-
-    // Otherwise, ensure that all of the tokens were parsed.
-  } else if (startTok.getLoc() != endTok.getLoc() && endTok.isNot(Token::eof)) {
-    parser.emitError(endTok.getLoc(), "encountered unexpected token");
-    return T();
-  }
-  return symbol;
-}
-
-//===----------------------------------------------------------------------===//
-// Error Handling
-//===----------------------------------------------------------------------===//
-
-InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
-  auto diag = mlir::emitError(getEncodedSourceLocation(loc), message);
-
-  // If we hit a parse error in response to a lexer error, then the lexer
-  // already reported the error.
-  if (getToken().is(Token::error))
-    diag.abandon();
-  return diag;
-}
-
-//===----------------------------------------------------------------------===//
-// Token Parsing
-//===----------------------------------------------------------------------===//
-
-/// Consume the specified token if present and return success.  On failure,
-/// output a diagnostic and return failure.
-ParseResult Parser::parseToken(Token::Kind expectedToken,
-                               const Twine &message) {
-  if (consumeIf(expectedToken))
-    return success();
-  return emitError(message);
-}
-
-//===----------------------------------------------------------------------===//
-// Type Parsing
-//===----------------------------------------------------------------------===//
-
-/// Parse an arbitrary type.
-///
-///   type ::= function-type
-///          | non-function-type
-///
-Type Parser::parseType() {
-  if (getToken().is(Token::l_paren))
-    return parseFunctionType();
-  return parseNonFunctionType();
-}
-
-/// Parse a function result type.
-///
-///   function-result-type ::= type-list-parens
-///                          | non-function-type
-///
-ParseResult Parser::parseFunctionResultTypes(SmallVectorImpl<Type> &elements) {
-  if (getToken().is(Token::l_paren))
-    return parseTypeListParens(elements);
-
-  Type t = parseNonFunctionType();
-  if (!t)
-    return failure();
-  elements.push_back(t);
-  return success();
-}
-
-/// Parse a list of types without an enclosing parenthesis.  The list must have
-/// at least one member.
-///
-///   type-list-no-parens ::=  type (`,` type)*
-///
-ParseResult Parser::parseTypeListNoParens(SmallVectorImpl<Type> &elements) {
-  auto parseElt = [&]() -> ParseResult {
-    auto elt = parseType();
-    elements.push_back(elt);
-    return elt ? success() : failure();
-  };
-
-  return parseCommaSeparatedList(parseElt);
-}
-
-/// Parse a parenthesized list of types.
-///
-///   type-list-parens ::= `(` `)`
-///                      | `(` type-list-no-parens `)`
-///
-ParseResult Parser::parseTypeListParens(SmallVectorImpl<Type> &elements) {
-  if (parseToken(Token::l_paren, "expected '('"))
-    return failure();
-
-  // Handle empty lists.
-  if (getToken().is(Token::r_paren))
-    return consumeToken(), success();
-
-  if (parseTypeListNoParens(elements) ||
-      parseToken(Token::r_paren, "expected ')'"))
-    return failure();
-  return success();
-}
-
-/// Parse a complex type.
-///
-///   complex-type ::= `complex` `<` type `>`
-///
-Type Parser::parseComplexType() {
-  consumeToken(Token::kw_complex);
-
-  // Parse the '<'.
-  if (parseToken(Token::less, "expected '<' in complex type"))
-    return nullptr;
-
-  auto typeLocation = getEncodedSourceLocation(getToken().getLoc());
-  auto elementType = parseType();
-  if (!elementType ||
-      parseToken(Token::greater, "expected '>' in complex type"))
-    return nullptr;
-
-  return ComplexType::getChecked(elementType, typeLocation);
-}
-
-/// Parse an extended type.
-///
-///   extended-type ::= (dialect-type | type-alias)
-///   dialect-type  ::= `!` dialect-namespace `<` `"` type-data `"` `>`
-///   dialect-type  ::= `!` alias-name pretty-dialect-attribute-body?
-///   type-alias    ::= `!` alias-name
-///
-Type Parser::parseExtendedType() {
-  return parseExtendedSymbol<Type>(
-      *this, Token::exclamation_identifier, state.symbols.typeAliasDefinitions,
-      [&](StringRef dialectName, StringRef symbolData,
-          llvm::SMLoc loc) -> Type {
-        // If we found a registered dialect, then ask it to parse the type.
-        if (auto *dialect = state.context->getRegisteredDialect(dialectName)) {
-          return parseSymbol<Type>(
-              symbolData, state.context, state.symbols, [&](Parser &parser) {
-                CustomDialectAsmParser customParser(symbolData, parser);
-                return dialect->parseType(customParser);
-              });
-        }
-
-        // Otherwise, form a new opaque type.
-        return OpaqueType::getChecked(
-            Identifier::get(dialectName, state.context), symbolData,
-            state.context, getEncodedSourceLocation(loc));
-      });
-}
-
-/// Parse a function type.
-///
-///   function-type ::= type-list-parens `->` function-result-type
-///
-Type Parser::parseFunctionType() {
-  assert(getToken().is(Token::l_paren));
-
-  SmallVector<Type, 4> arguments, results;
-  if (parseTypeListParens(arguments) ||
-      parseToken(Token::arrow, "expected '->' in function type") ||
-      parseFunctionResultTypes(results))
-    return nullptr;
-
-  return builder.getFunctionType(arguments, results);
-}
-
-/// Parse the offset and strides from a strided layout specification.
-///
-///   strided-layout ::= `offset:` dimension `,` `strides: ` stride-list
-///
-ParseResult Parser::parseStridedLayout(int64_t &offset,
-                                       SmallVectorImpl<int64_t> &strides) {
-  // Parse offset.
-  consumeToken(Token::kw_offset);
-  if (!consumeIf(Token::colon))
-    return emitError("expected colon after `offset` keyword");
-  auto maybeOffset = getToken().getUnsignedIntegerValue();
-  bool question = getToken().is(Token::question);
-  if (!maybeOffset && !question)
-    return emitError("invalid offset");
-  offset = maybeOffset ? static_cast<int64_t>(maybeOffset.getValue())
-                       : MemRefType::getDynamicStrideOrOffset();
-  consumeToken();
-
-  if (!consumeIf(Token::comma))
-    return emitError("expected comma after offset value");
-
-  // Parse stride list.
-  if (!consumeIf(Token::kw_strides))
-    return emitError("expected `strides` keyword after offset specification");
-  if (!consumeIf(Token::colon))
-    return emitError("expected colon after `strides` keyword");
-  if (failed(parseStrideList(strides)))
-    return emitError("invalid braces-enclosed stride list");
-  if (llvm::any_of(strides, [](int64_t st) { return st == 0; }))
-    return emitError("invalid memref stride");
-
-  return success();
-}
-
-/// Parse a memref type.
-///
-///   memref-type ::= ranked-memref-type | unranked-memref-type
-///
-///   ranked-memref-type ::= `memref` `<` dimension-list-ranked type
-///                          (`,` semi-affine-map-composition)? (`,`
-///                          memory-space)? `>`
-///
-///   unranked-memref-type ::= `memref` `<*x` type (`,` memory-space)? `>`
-///
-///   semi-affine-map-composition ::= (semi-affine-map `,` )* semi-affine-map
-///   memory-space ::= integer-literal /* | TODO: address-space-id */
-///
-Type Parser::parseMemRefType() {
-  consumeToken(Token::kw_memref);
-
-  if (parseToken(Token::less, "expected '<' in memref type"))
-    return nullptr;
-
-  bool isUnranked;
-  SmallVector<int64_t, 4> dimensions;
-
-  if (consumeIf(Token::star)) {
-    // This is an unranked memref type.
-    isUnranked = true;
-    if (parseXInDimensionList())
-      return nullptr;
-
-  } else {
-    isUnranked = false;
-    if (parseDimensionListRanked(dimensions))
-      return nullptr;
-  }
-
-  // Parse the element type.
-  auto typeLoc = getToken().getLoc();
-  auto elementType = parseType();
-  if (!elementType)
-    return nullptr;
-
-  // Parse semi-affine-map-composition.
-  SmallVector<AffineMap, 2> affineMapComposition;
-  unsigned memorySpace = 0;
-  bool parsedMemorySpace = false;
-
-  auto parseElt = [&]() -> ParseResult {
-    if (getToken().is(Token::integer)) {
-      // Parse memory space.
-      if (parsedMemorySpace)
-        return emitError("multiple memory spaces specified in memref type");
-      auto v = getToken().getUnsignedIntegerValue();
-      if (!v.hasValue())
-        return emitError("invalid memory space in memref type");
-      memorySpace = v.getValue();
-      consumeToken(Token::integer);
-      parsedMemorySpace = true;
-    } else {
-      if (isUnranked)
-        return emitError("cannot have affine map for unranked memref type");
-      if (parsedMemorySpace)
-        return emitError("expected memory space to be last in memref type");
-      if (getToken().is(Token::kw_offset)) {
-        int64_t offset;
-        SmallVector<int64_t, 4> strides;
-        if (failed(parseStridedLayout(offset, strides)))
-          return failure();
-        // Construct strided affine map.
-        auto map = makeStridedLinearLayoutMap(strides, offset,
-                                              elementType.getContext());
-        affineMapComposition.push_back(map);
-      } else {
-        // Parse affine map.
-        auto affineMap = parseAttribute();
-        if (!affineMap)
-          return failure();
-        // Verify that the parsed attribute is an affine map.
-        if (auto affineMapAttr = affineMap.dyn_cast<AffineMapAttr>())
-          affineMapComposition.push_back(affineMapAttr.getValue());
-        else
-          return emitError("expected affine map in memref type");
-      }
-    }
-    return success();
-  };
-
-  // Parse a list of mappings and address space if present.
-  if (consumeIf(Token::comma)) {
-    // Parse comma separated list of affine maps, followed by memory space.
-    if (parseCommaSeparatedListUntil(Token::greater, parseElt,
-                                     /*allowEmptyList=*/false)) {
-      return nullptr;
-    }
-  } else {
-    if (parseToken(Token::greater, "expected ',' or '>' in memref type"))
-      return nullptr;
-  }
-
-  if (isUnranked)
-    return UnrankedMemRefType::getChecked(elementType, memorySpace,
-                                          getEncodedSourceLocation(typeLoc));
-
-  return MemRefType::getChecked(dimensions, elementType, affineMapComposition,
-                                memorySpace, getEncodedSourceLocation(typeLoc));
-}
-
-/// Parse any type except the function type.
-///
-///   non-function-type ::= integer-type
-///                       | index-type
-///                       | float-type
-///                       | extended-type
-///                       | vector-type
-///                       | tensor-type
-///                       | memref-type
-///                       | complex-type
-///                       | tuple-type
-///                       | none-type
-///
-///   index-type ::= `index`
-///   float-type ::= `f16` | `bf16` | `f32` | `f64`
-///   none-type ::= `none`
-///
-Type Parser::parseNonFunctionType() {
-  switch (getToken().getKind()) {
-  default:
-    return (emitError("expected non-function type"), nullptr);
-  case Token::kw_memref:
-    return parseMemRefType();
-  case Token::kw_tensor:
-    return parseTensorType();
-  case Token::kw_complex:
-    return parseComplexType();
-  case Token::kw_tuple:
-    return parseTupleType();
-  case Token::kw_vector:
-    return parseVectorType();
-  // integer-type
-  case Token::inttype: {
-    auto width = getToken().getIntTypeBitwidth();
-    if (!width.hasValue())
-      return (emitError("invalid integer width"), nullptr);
-    auto loc = getEncodedSourceLocation(getToken().getLoc());
-    consumeToken(Token::inttype);
-    return IntegerType::getChecked(width.getValue(), builder.getContext(), loc);
-  }
-
-  // float-type
-  case Token::kw_bf16:
-    consumeToken(Token::kw_bf16);
-    return builder.getBF16Type();
-  case Token::kw_f16:
-    consumeToken(Token::kw_f16);
-    return builder.getF16Type();
-  case Token::kw_f32:
-    consumeToken(Token::kw_f32);
-    return builder.getF32Type();
-  case Token::kw_f64:
-    consumeToken(Token::kw_f64);
-    return builder.getF64Type();
-
-  // index-type
-  case Token::kw_index:
-    consumeToken(Token::kw_index);
-    return builder.getIndexType();
-
-  // none-type
-  case Token::kw_none:
-    consumeToken(Token::kw_none);
-    return builder.getNoneType();
-
-  // extended type
-  case Token::exclamation_identifier:
-    return parseExtendedType();
-  }
-}
-
-/// Parse a tensor type.
-///
-///   tensor-type ::= `tensor` `<` dimension-list type `>`
-///   dimension-list ::= dimension-list-ranked | `*x`
-///
-Type Parser::parseTensorType() {
-  consumeToken(Token::kw_tensor);
-
-  if (parseToken(Token::less, "expected '<' in tensor type"))
-    return nullptr;
-
-  bool isUnranked;
-  SmallVector<int64_t, 4> dimensions;
-
-  if (consumeIf(Token::star)) {
-    // This is an unranked tensor type.
-    isUnranked = true;
-
-    if (parseXInDimensionList())
-      return nullptr;
-
-  } else {
-    isUnranked = false;
-    if (parseDimensionListRanked(dimensions))
-      return nullptr;
-  }
-
-  // Parse the element type.
-  auto typeLocation = getEncodedSourceLocation(getToken().getLoc());
-  auto elementType = parseType();
-  if (!elementType || parseToken(Token::greater, "expected '>' in tensor type"))
-    return nullptr;
-
-  if (isUnranked)
-    return UnrankedTensorType::getChecked(elementType, typeLocation);
-  return RankedTensorType::getChecked(dimensions, elementType, typeLocation);
-}
-
-/// Parse a tuple type.
-///
-///   tuple-type ::= `tuple` `<` (type (`,` type)*)? `>`
-///
-Type Parser::parseTupleType() {
-  consumeToken(Token::kw_tuple);
-
-  // Parse the '<'.
-  if (parseToken(Token::less, "expected '<' in tuple type"))
-    return nullptr;
-
-  // Check for an empty tuple by directly parsing '>'.
-  if (consumeIf(Token::greater))
-    return TupleType::get(getContext());
-
-  // Parse the element types and the '>'.
-  SmallVector<Type, 4> types;
-  if (parseTypeListNoParens(types) ||
-      parseToken(Token::greater, "expected '>' in tuple type"))
-    return nullptr;
-
-  return TupleType::get(types, getContext());
-}
-
-/// Parse a vector type.
-///
-///   vector-type ::= `vector` `<` non-empty-static-dimension-list type `>`
-///   non-empty-static-dimension-list ::= decimal-literal `x`
-///                                       static-dimension-list
-///   static-dimension-list ::= (decimal-literal `x`)*
-///
-VectorType Parser::parseVectorType() {
-  consumeToken(Token::kw_vector);
-
-  if (parseToken(Token::less, "expected '<' in vector type"))
-    return nullptr;
-
-  SmallVector<int64_t, 4> dimensions;
-  if (parseDimensionListRanked(dimensions, /*allowDynamic=*/false))
-    return nullptr;
-  if (dimensions.empty())
-    return (emitError("expected dimension size in vector type"), nullptr);
-
-  // Parse the element type.
-  auto typeLoc = getToken().getLoc();
-  auto elementType = parseType();
-  if (!elementType || parseToken(Token::greater, "expected '>' in vector type"))
-    return nullptr;
-
-  return VectorType::getChecked(dimensions, elementType,
-                                getEncodedSourceLocation(typeLoc));
-}
-
-/// Parse a dimension list of a tensor or memref type.  This populates the
-/// dimension list, using -1 for the `?` dimensions if `allowDynamic` is set and
-/// errors out on `?` otherwise.
-///
-///   dimension-list-ranked ::= (dimension `x`)*
-///   dimension ::= `?` | decimal-literal
-///
-/// When `allowDynamic` is not set, this is used to parse:
-///
-///   static-dimension-list ::= (decimal-literal `x`)*
-ParseResult
-Parser::parseDimensionListRanked(SmallVectorImpl<int64_t> &dimensions,
-                                 bool allowDynamic) {
-  while (getToken().isAny(Token::integer, Token::question)) {
-    if (consumeIf(Token::question)) {
-      if (!allowDynamic)
-        return emitError("expected static shape");
-      dimensions.push_back(-1);
-    } else {
-      // Hexadecimal integer literals (starting with `0x`) are not allowed in
-      // aggregate type declarations.  Therefore, `0xf32` should be processed as
-      // a sequence of separate elements `0`, `x`, `f32`.
-      if (getTokenSpelling().size() > 1 && getTokenSpelling()[1] == 'x') {
-        // We can get here only if the token is an integer literal.  Hexadecimal
-        // integer literals can only start with `0x` (`1x` wouldn't lex as a
-        // literal, just `1` would, at which point we don't get into this
-        // branch).
-        assert(getTokenSpelling()[0] == '0' && "invalid integer literal");
-        dimensions.push_back(0);
-        state.lex.resetPointer(getTokenSpelling().data() + 1);
-        consumeToken();
-      } else {
-        // Make sure this integer value is in bound and valid.
-        auto dimension = getToken().getUnsignedIntegerValue();
-        if (!dimension.hasValue())
-          return emitError("invalid dimension");
-        dimensions.push_back((int64_t)dimension.getValue());
-        consumeToken(Token::integer);
-      }
-    }
-
-    // Make sure we have an 'x' or something like 'xbf32'.
-    if (parseXInDimensionList())
-      return failure();
-  }
-
-  return success();
-}
-
-/// Parse an 'x' token in a dimension list, handling the case where the x is
-/// juxtaposed with an element type, as in "xf32", leaving the "f32" as the next
-/// token.
-ParseResult Parser::parseXInDimensionList() {
-  if (getToken().isNot(Token::bare_identifier) || getTokenSpelling()[0] != 'x')
-    return emitError("expected 'x' in dimension list");
-
-  // If we had a prefix of 'x', lex the next token immediately after the 'x'.
-  if (getTokenSpelling().size() != 1)
-    state.lex.resetPointer(getTokenSpelling().data() + 1);
-
-  // Consume the 'x'.
-  consumeToken(Token::bare_identifier);
-
-  return success();
-}
-
-// Parse a comma-separated list of dimensions, possibly empty:
-//   stride-list ::= `[` (dimension (`,` dimension)*)? `]`
-ParseResult Parser::parseStrideList(SmallVectorImpl<int64_t> &dimensions) {
-  if (!consumeIf(Token::l_square))
-    return failure();
-  // Empty list early exit.
-  if (consumeIf(Token::r_square))
-    return success();
-  while (true) {
-    if (consumeIf(Token::question)) {
-      dimensions.push_back(MemRefType::getDynamicStrideOrOffset());
-    } else {
-      // This must be an integer value.
-      int64_t val;
-      if (getToken().getSpelling().getAsInteger(10, val))
-        return emitError("invalid integer value: ") << getToken().getSpelling();
-      // Make sure it is not the one value for `?`.
-      if (ShapedType::isDynamic(val))
-        return emitError("invalid integer value: ")
-               << getToken().getSpelling()
-               << ", use `?` to specify a dynamic dimension";
-      dimensions.push_back(val);
-      consumeToken(Token::integer);
-    }
-    if (!consumeIf(Token::comma))
-      break;
-  }
-  if (!consumeIf(Token::r_square))
-    return failure();
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Attribute parsing.
-//===----------------------------------------------------------------------===//
-
-/// Return the symbol reference referred to by the given token, that is known to
-/// be an @-identifier.
-static std::string extractSymbolReference(Token tok) {
-  assert(tok.is(Token::at_identifier) && "expected valid @-identifier");
-  StringRef nameStr = tok.getSpelling().drop_front();
-
-  // Check to see if the reference is a string literal, or a bare identifier.
-  if (nameStr.front() == '"')
-    return tok.getStringValue();
-  return nameStr;
-}
-
-/// Parse an arbitrary attribute.
-///
-///  attribute-value ::= `unit`
-///                    | bool-literal
-///                    | integer-literal (`:` (index-type | integer-type))?
-///                    | float-literal (`:` float-type)?
-///                    | string-literal (`:` type)?
-///                    | type
-///                    | `[` (attribute-value (`,` attribute-value)*)? `]`
-///                    | `{` (attribute-entry (`,` attribute-entry)*)? `}`
-///                    | symbol-ref-id (`::` symbol-ref-id)*
-///                    | `dense` `<` attribute-value `>` `:`
-///                      (tensor-type | vector-type)
-///                    | `sparse` `<` attribute-value `,` attribute-value `>`
-///                      `:` (tensor-type | vector-type)
-///                    | `opaque` `<` dialect-namespace  `,` hex-string-literal
-///                      `>` `:` (tensor-type | vector-type)
-///                    | extended-attribute
-///
-Attribute Parser::parseAttribute(Type type) {
-  switch (getToken().getKind()) {
-  // Parse an AffineMap or IntegerSet attribute.
-  case Token::l_paren: {
-    // Try to parse an affine map or an integer set reference.
-    AffineMap map;
-    IntegerSet set;
-    if (parseAffineMapOrIntegerSetReference(map, set))
-      return nullptr;
-    if (map)
-      return AffineMapAttr::get(map);
-    assert(set);
-    return IntegerSetAttr::get(set);
-  }
-
-  // Parse an array attribute.
-  case Token::l_square: {
-    consumeToken(Token::l_square);
-
-    SmallVector<Attribute, 4> elements;
-    auto parseElt = [&]() -> ParseResult {
-      elements.push_back(parseAttribute());
-      return elements.back() ? success() : failure();
-    };
-
-    if (parseCommaSeparatedListUntil(Token::r_square, parseElt))
-      return nullptr;
-    return builder.getArrayAttr(elements);
-  }
-
-  // Parse a boolean attribute.
-  case Token::kw_false:
-    consumeToken(Token::kw_false);
-    return builder.getBoolAttr(false);
-  case Token::kw_true:
-    consumeToken(Token::kw_true);
-    return builder.getBoolAttr(true);
-
-  // Parse a dense elements attribute.
-  case Token::kw_dense:
-    return parseDenseElementsAttr();
-
-  // Parse a dictionary attribute.
-  case Token::l_brace: {
-    SmallVector<NamedAttribute, 4> elements;
-    if (parseAttributeDict(elements))
-      return nullptr;
-    return builder.getDictionaryAttr(elements);
-  }
-
-  // Parse an extended attribute, i.e. alias or dialect attribute.
-  case Token::hash_identifier:
-    return parseExtendedAttr(type);
-
-  // Parse floating point and integer attributes.
-  case Token::floatliteral:
-    return parseFloatAttr(type, /*isNegative=*/false);
-  case Token::integer:
-    return parseDecOrHexAttr(type, /*isNegative=*/false);
-  case Token::minus: {
-    consumeToken(Token::minus);
-    if (getToken().is(Token::integer))
-      return parseDecOrHexAttr(type, /*isNegative=*/true);
-    if (getToken().is(Token::floatliteral))
-      return parseFloatAttr(type, /*isNegative=*/true);
-
-    return (emitError("expected constant integer or floating point value"),
-            nullptr);
-  }
-
-  // Parse a location attribute.
-  case Token::kw_loc: {
-    LocationAttr attr;
-    return failed(parseLocation(attr)) ? Attribute() : attr;
-  }
-
-  // Parse an opaque elements attribute.
-  case Token::kw_opaque:
-    return parseOpaqueElementsAttr();
-
-  // Parse a sparse elements attribute.
-  case Token::kw_sparse:
-    return parseSparseElementsAttr();
-
-  // Parse a string attribute.
-  case Token::string: {
-    auto val = getToken().getStringValue();
-    consumeToken(Token::string);
-    // Parse the optional trailing colon type if one wasn't explicitly provided.
-    if (!type && consumeIf(Token::colon) && !(type = parseType()))
-      return Attribute();
-
-    return type ? StringAttr::get(val, type)
-                : StringAttr::get(val, getContext());
-  }
-
-  // Parse a symbol reference attribute.
-  case Token::at_identifier: {
-    std::string nameStr = extractSymbolReference(getToken());
-    consumeToken(Token::at_identifier);
-
-    // Parse any nested references.
-    std::vector<FlatSymbolRefAttr> nestedRefs;
-    while (getToken().is(Token::colon)) {
-      // Check for the '::' prefix.
-      const char *curPointer = getToken().getLoc().getPointer();
-      consumeToken(Token::colon);
-      if (!consumeIf(Token::colon)) {
-        state.lex.resetPointer(curPointer);
-        consumeToken();
-        break;
-      }
-      // Parse the reference itself.
-      auto curLoc = getToken().getLoc();
-      if (getToken().isNot(Token::at_identifier)) {
-        emitError(curLoc, "expected nested symbol reference identifier");
-        return Attribute();
-      }
-
-      std::string nameStr = extractSymbolReference(getToken());
-      consumeToken(Token::at_identifier);
-      nestedRefs.push_back(SymbolRefAttr::get(nameStr, getContext()));
-    }
-
-    return builder.getSymbolRefAttr(nameStr, nestedRefs);
-  }
-
-  // Parse a 'unit' attribute.
-  case Token::kw_unit:
-    consumeToken(Token::kw_unit);
-    return builder.getUnitAttr();
-
-  default:
-    // Parse a type attribute.
-    if (Type type = parseType())
-      return TypeAttr::get(type);
-    return nullptr;
-  }
-}
-
-/// Attribute dictionary.
-///
-///   attribute-dict ::= `{` `}`
-///                    | `{` attribute-entry (`,` attribute-entry)* `}`
-///   attribute-entry ::= bare-id `=` attribute-value
-///
-ParseResult
-Parser::parseAttributeDict(SmallVectorImpl<NamedAttribute> &attributes) {
-  if (parseToken(Token::l_brace, "expected '{' in attribute dictionary"))
-    return failure();
-
-  auto parseElt = [&]() -> ParseResult {
-    // We allow keywords as attribute names.
-    if (getToken().isNot(Token::bare_identifier, Token::inttype) &&
-        !getToken().isKeyword())
-      return emitError("expected attribute name");
-    Identifier nameId = builder.getIdentifier(getTokenSpelling());
-    consumeToken();
-
-    // Try to parse the '=' for the attribute value.
-    if (!consumeIf(Token::equal)) {
-      // If there is no '=', we treat this as a unit attribute.
-      attributes.push_back({nameId, builder.getUnitAttr()});
-      return success();
-    }
-
-    auto attr = parseAttribute();
-    if (!attr)
-      return failure();
-
-    attributes.push_back({nameId, attr});
-    return success();
-  };
-
-  if (parseCommaSeparatedListUntil(Token::r_brace, parseElt))
-    return failure();
-
-  return success();
-}
-
-/// Parse an extended attribute.
-///
-///   extended-attribute ::= (dialect-attribute | attribute-alias)
-///   dialect-attribute  ::= `#` dialect-namespace `<` `"` attr-data `"` `>`
-///   dialect-attribute  ::= `#` alias-name pretty-dialect-sym-body?
-///   attribute-alias    ::= `#` alias-name
-///
-Attribute Parser::parseExtendedAttr(Type type) {
-  Attribute attr = parseExtendedSymbol<Attribute>(
-      *this, Token::hash_identifier, state.symbols.attributeAliasDefinitions,
-      [&](StringRef dialectName, StringRef symbolData,
-          llvm::SMLoc loc) -> Attribute {
-        // Parse an optional trailing colon type.
-        Type attrType = type;
-        if (consumeIf(Token::colon) && !(attrType = parseType()))
-          return Attribute();
-
-        // If we found a registered dialect, then ask it to parse the attribute.
-        if (auto *dialect = state.context->getRegisteredDialect(dialectName)) {
-          return parseSymbol<Attribute>(
-              symbolData, state.context, state.symbols, [&](Parser &parser) {
-                CustomDialectAsmParser customParser(symbolData, parser);
-                return dialect->parseAttribute(customParser, attrType);
-              });
-        }
-
-        // Otherwise, form a new opaque attribute.
-        return OpaqueAttr::getChecked(
-            Identifier::get(dialectName, state.context), symbolData,
-            attrType ? attrType : NoneType::get(state.context),
-            getEncodedSourceLocation(loc));
-      });
-
-  // Ensure that the attribute has the same type as requested.
-  if (attr && type && attr.getType() != type) {
-    emitError("attribute type different than expected: expected ")
-        << type << ", but got " << attr.getType();
-    return nullptr;
-  }
-  return attr;
-}
-
-/// Parse a float attribute.
-Attribute Parser::parseFloatAttr(Type type, bool isNegative) {
-  auto val = getToken().getFloatingPointValue();
-  if (!val.hasValue())
-    return (emitError("floating point value too large for attribute"), nullptr);
-  consumeToken(Token::floatliteral);
-  if (!type) {
-    // Default to F64 when no type is specified.
-    if (!consumeIf(Token::colon))
-      type = builder.getF64Type();
-    else if (!(type = parseType()))
-      return nullptr;
-  }
-  if (!type.isa<FloatType>())
-    return (emitError("floating point value not valid for specified type"),
-            nullptr);
-  return FloatAttr::get(type, isNegative ? -val.getValue() : val.getValue());
-}
-
-/// Construct a float attribute bitwise equivalent to the integer literal.
-static FloatAttr buildHexadecimalFloatLiteral(Parser *p, FloatType type,
-                                              uint64_t value) {
-  int width = type.getIntOrFloatBitWidth();
-  APInt apInt(width, value);
-  if (apInt != value) {
-    p->emitError("hexadecimal float constant out of range for type");
-    return nullptr;
-  }
-  APFloat apFloat(type.getFloatSemantics(), apInt);
-  return p->builder.getFloatAttr(type, apFloat);
-}
-
-/// Parse a decimal or a hexadecimal literal, which can be either an integer
-/// or a float attribute.
-Attribute Parser::parseDecOrHexAttr(Type type, bool isNegative) {
-  auto val = getToken().getUInt64IntegerValue();
-  if (!val.hasValue())
-    return (emitError("integer constant out of range for attribute"), nullptr);
-
-  // Remember if the literal is hexadecimal.
-  StringRef spelling = getToken().getSpelling();
-  auto loc = state.curToken.getLoc();
-  bool isHex = spelling.size() > 1 && spelling[1] == 'x';
-
-  consumeToken(Token::integer);
-  if (!type) {
-    // Default to i64 if not type is specified.
-    if (!consumeIf(Token::colon))
-      type = builder.getIntegerType(64);
-    else if (!(type = parseType()))
-      return nullptr;
-  }
-
-  if (auto floatType = type.dyn_cast<FloatType>()) {
-    // TODO(zinenko): Update once hex format for bfloat16 is supported.
-    if (type.isBF16())
-      return emitError(loc,
-                       "hexadecimal float literal not supported for bfloat16"),
-             nullptr;
-    if (isNegative)
-      return emitError(
-                 loc,
-                 "hexadecimal float literal should not have a leading minus"),
-             nullptr;
-    if (!isHex) {
-      emitError(loc, "unexpected decimal integer literal for a float attribute")
-              .attachNote()
-          << "add a trailing dot to make the literal a float";
-      return nullptr;
-    }
-
-    // Construct a float attribute bitwise equivalent to the integer literal.
-    return buildHexadecimalFloatLiteral(this, floatType, *val);
-  }
-
-  if (!type.isIntOrIndex())
-    return emitError(loc, "integer literal not valid for specified type"),
-           nullptr;
-
-  // Parse the integer literal.
-  int width = type.isIndex() ? 64 : type.getIntOrFloatBitWidth();
-  APInt apInt(width, *val, isNegative);
-  if (apInt != *val)
-    return emitError(loc, "integer constant out of range for attribute"),
-           nullptr;
-
-  // Otherwise construct an integer attribute.
-  if (isNegative ? (int64_t)-val.getValue() >= 0 : (int64_t)val.getValue() < 0)
-    return emitError(loc, "integer constant out of range for attribute"),
-           nullptr;
-
-  return builder.getIntegerAttr(type, isNegative ? -apInt : apInt);
-}
-
-/// Parse an opaque elements attribute.
-Attribute Parser::parseOpaqueElementsAttr() {
-  consumeToken(Token::kw_opaque);
-  if (parseToken(Token::less, "expected '<' after 'opaque'"))
-    return nullptr;
-
-  if (getToken().isNot(Token::string))
-    return (emitError("expected dialect namespace"), nullptr);
-
-  auto name = getToken().getStringValue();
-  auto *dialect = builder.getContext()->getRegisteredDialect(name);
-  // TODO(shpeisman): Allow for having an unknown dialect on an opaque
-  // attribute. Otherwise, it can't be roundtripped without having the dialect
-  // registered.
-  if (!dialect)
-    return (emitError("no registered dialect with namespace '" + name + "'"),
-            nullptr);
-
-  consumeToken(Token::string);
-  if (parseToken(Token::comma, "expected ','"))
-    return nullptr;
-
-  if (getToken().getKind() != Token::string)
-    return (emitError("opaque string should start with '0x'"), nullptr);
-
-  auto val = getToken().getStringValue();
-  if (val.size() < 2 || val[0] != '0' || val[1] != 'x')
-    return (emitError("opaque string should start with '0x'"), nullptr);
-
-  val = val.substr(2);
-  if (!llvm::all_of(val, llvm::isHexDigit))
-    return (emitError("opaque string only contains hex digits"), nullptr);
-
-  consumeToken(Token::string);
-  if (parseToken(Token::greater, "expected '>'") ||
-      parseToken(Token::colon, "expected ':'"))
-    return nullptr;
-
-  auto type = parseElementsLiteralType();
-  if (!type)
-    return nullptr;
-
-  return OpaqueElementsAttr::get(dialect, type, llvm::fromHex(val));
-}
-
-namespace {
-class TensorLiteralParser {
-public:
-  TensorLiteralParser(Parser &p) : p(p) {}
-
-  ParseResult parse() {
-    if (p.getToken().is(Token::l_square))
-      return parseList(shape);
-    return parseElement();
-  }
-
-  /// Build a dense attribute instance with the parsed elements and the given
-  /// shaped type.
-  DenseElementsAttr getAttr(llvm::SMLoc loc, ShapedType type);
-
-  ArrayRef<int64_t> getShape() const { return shape; }
-
-private:
-  enum class ElementKind { Boolean, Integer, Float };
-
-  /// Return a string to represent the given element kind.
-  const char *getElementKindStr(ElementKind kind) {
-    switch (kind) {
-    case ElementKind::Boolean:
-      return "'boolean'";
-    case ElementKind::Integer:
-      return "'integer'";
-    case ElementKind::Float:
-      return "'float'";
-    }
-    llvm_unreachable("unknown element kind");
-  }
-
-  /// Build a Dense Integer attribute for the given type.
-  DenseElementsAttr getIntAttr(llvm::SMLoc loc, ShapedType type,
-                               IntegerType eltTy);
-
-  /// Build a Dense Float attribute for the given type.
-  DenseElementsAttr getFloatAttr(llvm::SMLoc loc, ShapedType type,
-                                 FloatType eltTy);
-
-  /// Parse a single element, returning failure if it isn't a valid element
-  /// literal. For example:
-  /// parseElement(1) -> Success, 1
-  /// parseElement([1]) -> Failure
-  ParseResult parseElement();
-
-  /// Parse a list of either lists or elements, returning the dimensions of the
-  /// parsed sub-tensors in dims. For example:
-  ///   parseList([1, 2, 3]) -> Success, [3]
-  ///   parseList([[1, 2], [3, 4]]) -> Success, [2, 2]
-  ///   parseList([[1, 2], 3]) -> Failure
-  ///   parseList([[1, [2, 3]], [4, [5]]]) -> Failure
-  ParseResult parseList(llvm::SmallVectorImpl<int64_t> &dims);
-
-  Parser &p;
-
-  /// The shape inferred from the parsed elements.
-  SmallVector<int64_t, 4> shape;
-
-  /// Storage used when parsing elements, this is a pair of <is_negated, token>.
-  std::vector<std::pair<bool, Token>> storage;
-
-  /// A flag that indicates the type of elements that have been parsed.
-  llvm::Optional<ElementKind> knownEltKind;
-};
-} // namespace
-
-/// Build a dense attribute instance with the parsed elements and the given
-/// shaped type.
-DenseElementsAttr TensorLiteralParser::getAttr(llvm::SMLoc loc,
-                                               ShapedType type) {
-  // Check that the parsed storage size has the same number of elements to the
-  // type, or is a known splat.
-  if (!shape.empty() && getShape() != type.getShape()) {
-    p.emitError(loc) << "inferred shape of elements literal ([" << getShape()
-                     << "]) does not match type ([" << type.getShape() << "])";
-    return nullptr;
-  }
-
-  // If the type is an integer, build a set of APInt values from the storage
-  // with the correct bitwidth.
-  if (auto intTy = type.getElementType().dyn_cast<IntegerType>())
-    return getIntAttr(loc, type, intTy);
-
-  // Otherwise, this must be a floating point type.
-  auto floatTy = type.getElementType().dyn_cast<FloatType>();
-  if (!floatTy) {
-    p.emitError(loc) << "expected floating-point or integer element type, got "
-                     << type.getElementType();
-    return nullptr;
-  }
-  return getFloatAttr(loc, type, floatTy);
-}
-
-/// Build a Dense Integer attribute for the given type.
-DenseElementsAttr TensorLiteralParser::getIntAttr(llvm::SMLoc loc,
-                                                  ShapedType type,
-                                                  IntegerType eltTy) {
-  std::vector<APInt> intElements;
-  intElements.reserve(storage.size());
-  for (const auto &signAndToken : storage) {
-    bool isNegative = signAndToken.first;
-    const Token &token = signAndToken.second;
-
-    // Check to see if floating point values were parsed.
-    if (token.is(Token::floatliteral)) {
-      p.emitError() << "expected integer elements, but parsed floating-point";
-      return nullptr;
-    }
-
-    assert(token.isAny(Token::integer, Token::kw_true, Token::kw_false) &&
-           "unexpected token type");
-    if (token.isAny(Token::kw_true, Token::kw_false)) {
-      if (!eltTy.isInteger(1))
-        p.emitError() << "expected i1 type for 'true' or 'false' values";
-      APInt apInt(eltTy.getWidth(), token.is(Token::kw_true),
-                  /*isSigned=*/false);
-      intElements.push_back(apInt);
-      continue;
-    }
-
-    // Create APInt values for each element with the correct bitwidth.
-    auto val = token.getUInt64IntegerValue();
-    if (!val.hasValue() || (isNegative ? (int64_t)-val.getValue() >= 0
-                                       : (int64_t)val.getValue() < 0)) {
-      p.emitError(token.getLoc(),
-                  "integer constant out of range for attribute");
-      return nullptr;
-    }
-    APInt apInt(eltTy.getWidth(), val.getValue(), isNegative);
-    if (apInt != val.getValue())
-      return (p.emitError("integer constant out of range for type"), nullptr);
-    intElements.push_back(isNegative ? -apInt : apInt);
-  }
-
-  return DenseElementsAttr::get(type, intElements);
-}
-
-/// Build a Dense Float attribute for the given type.
-DenseElementsAttr TensorLiteralParser::getFloatAttr(llvm::SMLoc loc,
-                                                    ShapedType type,
-                                                    FloatType eltTy) {
-  std::vector<Attribute> floatValues;
-  floatValues.reserve(storage.size());
-  for (const auto &signAndToken : storage) {
-    bool isNegative = signAndToken.first;
-    const Token &token = signAndToken.second;
-
-    // Handle hexadecimal float literals.
-    if (token.is(Token::integer) && token.getSpelling().startswith("0x")) {
-      if (isNegative) {
-        p.emitError(token.getLoc())
-            << "hexadecimal float literal should not have a leading minus";
-        return nullptr;
-      }
-      auto val = token.getUInt64IntegerValue();
-      if (!val.hasValue()) {
-        p.emitError("hexadecimal float constant out of range for attribute");
-        return nullptr;
-      }
-      FloatAttr attr = buildHexadecimalFloatLiteral(&p, eltTy, *val);
-      if (!attr)
-        return nullptr;
-      floatValues.push_back(attr);
-      continue;
-    }
-
-    // Check to see if any decimal integers or booleans were parsed.
-    if (!token.is(Token::floatliteral)) {
-      p.emitError() << "expected floating-point elements, but parsed integer";
-      return nullptr;
-    }
-
-    // Build the float values from tokens.
-    auto val = token.getFloatingPointValue();
-    if (!val.hasValue()) {
-      p.emitError("floating point value too large for attribute");
-      return nullptr;
-    }
-    floatValues.push_back(FloatAttr::get(eltTy, isNegative ? -*val : *val));
-  }
-
-  return DenseElementsAttr::get(type, floatValues);
-}
-
-ParseResult TensorLiteralParser::parseElement() {
-  switch (p.getToken().getKind()) {
-  // Parse a boolean element.
-  case Token::kw_true:
-  case Token::kw_false:
-  case Token::floatliteral:
-  case Token::integer:
-    storage.emplace_back(/*isNegative=*/false, p.getToken());
-    p.consumeToken();
-    break;
-
-  // Parse a signed integer or a negative floating-point element.
-  case Token::minus:
-    p.consumeToken(Token::minus);
-    if (!p.getToken().isAny(Token::floatliteral, Token::integer))
-      return p.emitError("expected integer or floating point literal");
-    storage.emplace_back(/*isNegative=*/true, p.getToken());
-    p.consumeToken();
-    break;
-
-  default:
-    return p.emitError("expected element literal of primitive type");
-  }
-
-  return success();
-}
-
-/// Parse a list of either lists or elements, returning the dimensions of the
-/// parsed sub-tensors in dims. For example:
-///   parseList([1, 2, 3]) -> Success, [3]
-///   parseList([[1, 2], [3, 4]]) -> Success, [2, 2]
-///   parseList([[1, 2], 3]) -> Failure
-///   parseList([[1, [2, 3]], [4, [5]]]) -> Failure
-ParseResult
-TensorLiteralParser::parseList(llvm::SmallVectorImpl<int64_t> &dims) {
-  p.consumeToken(Token::l_square);
-
-  auto checkDims =
-      [&](const llvm::SmallVectorImpl<int64_t> &prevDims,
-          const llvm::SmallVectorImpl<int64_t> &newDims) -> ParseResult {
-    if (prevDims == newDims)
-      return success();
-    return p.emitError("tensor literal is invalid; ranks are not consistent "
-                       "between elements");
-  };
-
-  bool first = true;
-  llvm::SmallVector<int64_t, 4> newDims;
-  unsigned size = 0;
-  auto parseCommaSeparatedList = [&]() -> ParseResult {
-    llvm::SmallVector<int64_t, 4> thisDims;
-    if (p.getToken().getKind() == Token::l_square) {
-      if (parseList(thisDims))
-        return failure();
-    } else if (parseElement()) {
-      return failure();
-    }
-    ++size;
-    if (!first)
-      return checkDims(newDims, thisDims);
-    newDims = thisDims;
-    first = false;
-    return success();
-  };
-  if (p.parseCommaSeparatedListUntil(Token::r_square, parseCommaSeparatedList))
-    return failure();
-
-  // Return the sublists' dimensions with 'size' prepended.
-  dims.clear();
-  dims.push_back(size);
-  dims.append(newDims.begin(), newDims.end());
-  return success();
-}
-
-/// Parse a dense elements attribute.
-Attribute Parser::parseDenseElementsAttr() {
-  consumeToken(Token::kw_dense);
-  if (parseToken(Token::less, "expected '<' after 'dense'"))
-    return nullptr;
-
-  // Parse the literal data.
-  TensorLiteralParser literalParser(*this);
-  if (literalParser.parse())
-    return nullptr;
-
-  if (parseToken(Token::greater, "expected '>'") ||
-      parseToken(Token::colon, "expected ':'"))
-    return nullptr;
-
-  auto typeLoc = getToken().getLoc();
-  auto type = parseElementsLiteralType();
-  if (!type)
-    return nullptr;
-  return literalParser.getAttr(typeLoc, type);
-}
-
-/// Shaped type for elements attribute.
-///
-///   elements-literal-type ::= vector-type | ranked-tensor-type
-///
-/// This method also checks the type has static shape.
-ShapedType Parser::parseElementsLiteralType() {
-  auto type = parseType();
-  if (!type)
-    return nullptr;
-
-  if (!type.isa<RankedTensorType>() && !type.isa<VectorType>()) {
-    emitError("elements literal must be a ranked tensor or vector type");
-    return nullptr;
-  }
-
-  auto sType = type.cast<ShapedType>();
-  if (!sType.hasStaticShape())
-    return (emitError("elements literal type must have static shape"), nullptr);
-
-  return sType;
-}
-
-/// Parse a sparse elements attribute.
-Attribute Parser::parseSparseElementsAttr() {
-  consumeToken(Token::kw_sparse);
-  if (parseToken(Token::less, "Expected '<' after 'sparse'"))
-    return nullptr;
-
-  /// Parse indices
-  auto indicesLoc = getToken().getLoc();
-  TensorLiteralParser indiceParser(*this);
-  if (indiceParser.parse())
-    return nullptr;
-
-  if (parseToken(Token::comma, "expected ','"))
-    return nullptr;
-
-  /// Parse values.
-  auto valuesLoc = getToken().getLoc();
-  TensorLiteralParser valuesParser(*this);
-  if (valuesParser.parse())
-    return nullptr;
-
-  if (parseToken(Token::greater, "expected '>'") ||
-      parseToken(Token::colon, "expected ':'"))
-    return nullptr;
-
-  auto type = parseElementsLiteralType();
-  if (!type)
-    return nullptr;
-
-  // If the indices are a splat, i.e. the literal parser parsed an element and
-  // not a list, we set the shape explicitly. The indices are represented by a
-  // 2-dimensional shape where the second dimension is the rank of the type.
-  // Given that the parsed indices is a splat, we know that we only have one
-  // indice and thus one for the first dimension.
-  auto indiceEltType = builder.getIntegerType(64);
-  ShapedType indicesType;
-  if (indiceParser.getShape().empty()) {
-    indicesType = RankedTensorType::get({1, type.getRank()}, indiceEltType);
-  } else {
-    // Otherwise, set the shape to the one parsed by the literal parser.
-    indicesType = RankedTensorType::get(indiceParser.getShape(), indiceEltType);
-  }
-  auto indices = indiceParser.getAttr(indicesLoc, indicesType);
-
-  // If the values are a splat, set the shape explicitly based on the number of
-  // indices. The number of indices is encoded in the first dimension of the
-  // indice shape type.
-  auto valuesEltType = type.getElementType();
-  ShapedType valuesType =
-      valuesParser.getShape().empty()
-          ? RankedTensorType::get({indicesType.getDimSize(0)}, valuesEltType)
-          : RankedTensorType::get(valuesParser.getShape(), valuesEltType);
-  auto values = valuesParser.getAttr(valuesLoc, valuesType);
-
-  /// Sanity check.
-  if (valuesType.getRank() != 1)
-    return (emitError("expected 1-d tensor for values"), nullptr);
-
-  auto sameShape = (indicesType.getRank() == 1) ||
-                   (type.getRank() == indicesType.getDimSize(1));
-  auto sameElementNum = indicesType.getDimSize(0) == valuesType.getDimSize(0);
-  if (!sameShape || !sameElementNum) {
-    emitError() << "expected shape ([" << type.getShape()
-                << "]); inferred shape of indices literal (["
-                << indicesType.getShape()
-                << "]); inferred shape of values literal (["
-                << valuesType.getShape() << "])";
-    return nullptr;
-  }
-
-  // Build the sparse elements attribute by the indices and values.
-  return SparseElementsAttr::get(type, indices, values);
-}
-
-//===----------------------------------------------------------------------===//
-// Location parsing.
-//===----------------------------------------------------------------------===//
-
-/// Parse a location.
-///
-///   location           ::= `loc` inline-location
-///   inline-location    ::= '(' location-inst ')'
-///
-ParseResult Parser::parseLocation(LocationAttr &loc) {
-  // Check for 'loc' identifier.
-  if (parseToken(Token::kw_loc, "expected 'loc' keyword"))
-    return emitError();
-
-  // Parse the inline-location.
-  if (parseToken(Token::l_paren, "expected '(' in inline location") ||
-      parseLocationInstance(loc) ||
-      parseToken(Token::r_paren, "expected ')' in inline location"))
-    return failure();
-  return success();
-}
-
-/// Specific location instances.
-///
-/// location-inst ::= filelinecol-location |
-///                   name-location |
-///                   callsite-location |
-///                   fused-location |
-///                   unknown-location
-/// filelinecol-location ::= string-literal ':' integer-literal
-///                                         ':' integer-literal
-/// name-location ::= string-literal
-/// callsite-location ::= 'callsite' '(' location-inst 'at' location-inst ')'
-/// fused-location ::= fused ('<' attribute-value '>')?
-///                    '[' location-inst (location-inst ',')* ']'
-/// unknown-location ::= 'unknown'
-///
-ParseResult Parser::parseCallSiteLocation(LocationAttr &loc) {
-  consumeToken(Token::bare_identifier);
-
-  // Parse the '('.
-  if (parseToken(Token::l_paren, "expected '(' in callsite location"))
-    return failure();
-
-  // Parse the callee location.
-  LocationAttr calleeLoc;
-  if (parseLocationInstance(calleeLoc))
-    return failure();
-
-  // Parse the 'at'.
-  if (getToken().isNot(Token::bare_identifier) ||
-      getToken().getSpelling() != "at")
-    return emitError("expected 'at' in callsite location");
-  consumeToken(Token::bare_identifier);
-
-  // Parse the caller location.
-  LocationAttr callerLoc;
-  if (parseLocationInstance(callerLoc))
-    return failure();
-
-  // Parse the ')'.
-  if (parseToken(Token::r_paren, "expected ')' in callsite location"))
-    return failure();
-
-  // Return the callsite location.
-  loc = CallSiteLoc::get(calleeLoc, callerLoc);
-  return success();
-}
-
-ParseResult Parser::parseFusedLocation(LocationAttr &loc) {
-  consumeToken(Token::bare_identifier);
-
-  // Try to parse the optional metadata.
-  Attribute metadata;
-  if (consumeIf(Token::less)) {
-    metadata = parseAttribute();
-    if (!metadata)
-      return emitError("expected valid attribute metadata");
-    // Parse the '>' token.
-    if (parseToken(Token::greater,
-                   "expected '>' after fused location metadata"))
-      return failure();
-  }
-
-  llvm::SmallVector<Location, 4> locations;
-  auto parseElt = [&] {
-    LocationAttr newLoc;
-    if (parseLocationInstance(newLoc))
-      return failure();
-    locations.push_back(newLoc);
-    return success();
-  };
-
-  if (parseToken(Token::l_square, "expected '[' in fused location") ||
-      parseCommaSeparatedList(parseElt) ||
-      parseToken(Token::r_square, "expected ']' in fused location"))
-    return failure();
-
-  // Return the fused location.
-  loc = FusedLoc::get(locations, metadata, getContext());
-  return success();
-}
-
-ParseResult Parser::parseNameOrFileLineColLocation(LocationAttr &loc) {
-  auto *ctx = getContext();
-  auto str = getToken().getStringValue();
-  consumeToken(Token::string);
-
-  // If the next token is ':' this is a filelinecol location.
-  if (consumeIf(Token::colon)) {
-    // Parse the line number.
-    if (getToken().isNot(Token::integer))
-      return emitError("expected integer line number in FileLineColLoc");
-    auto line = getToken().getUnsignedIntegerValue();
-    if (!line.hasValue())
-      return emitError("expected integer line number in FileLineColLoc");
-    consumeToken(Token::integer);
-
-    // Parse the ':'.
-    if (parseToken(Token::colon, "expected ':' in FileLineColLoc"))
-      return failure();
-
-    // Parse the column number.
-    if (getToken().isNot(Token::integer))
-      return emitError("expected integer column number in FileLineColLoc");
-    auto column = getToken().getUnsignedIntegerValue();
-    if (!column.hasValue())
-      return emitError("expected integer column number in FileLineColLoc");
-    consumeToken(Token::integer);
-
-    loc = FileLineColLoc::get(str, line.getValue(), column.getValue(), ctx);
-    return success();
-  }
-
-  // Otherwise, this is a NameLoc.
-
-  // Check for a child location.
-  if (consumeIf(Token::l_paren)) {
-    auto childSourceLoc = getToken().getLoc();
-
-    // Parse the child location.
-    LocationAttr childLoc;
-    if (parseLocationInstance(childLoc))
-      return failure();
-
-    // The child must not be another NameLoc.
-    if (childLoc.isa<NameLoc>())
-      return emitError(childSourceLoc,
-                       "child of NameLoc cannot be another NameLoc");
-    loc = NameLoc::get(Identifier::get(str, ctx), childLoc);
-
-    // Parse the closing ')'.
-    if (parseToken(Token::r_paren,
-                   "expected ')' after child location of NameLoc"))
-      return failure();
-  } else {
-    loc = NameLoc::get(Identifier::get(str, ctx), ctx);
-  }
-
-  return success();
-}
-
-ParseResult Parser::parseLocationInstance(LocationAttr &loc) {
-  // Handle either name or filelinecol locations.
-  if (getToken().is(Token::string))
-    return parseNameOrFileLineColLocation(loc);
-
-  // Bare tokens required for other cases.
-  if (!getToken().is(Token::bare_identifier))
-    return emitError("expected location instance");
-
-  // Check for the 'callsite' signifying a callsite location.
-  if (getToken().getSpelling() == "callsite")
-    return parseCallSiteLocation(loc);
-
-  // If the token is 'fused', then this is a fused location.
-  if (getToken().getSpelling() == "fused")
-    return parseFusedLocation(loc);
-
-  // Check for a 'unknown' for an unknown location.
-  if (getToken().getSpelling() == "unknown") {
-    consumeToken(Token::bare_identifier);
-    loc = UnknownLoc::get(getContext());
-    return success();
-  }
-
-  return emitError("expected location instance");
-}
-
-//===----------------------------------------------------------------------===//
-// Affine parsing.
-//===----------------------------------------------------------------------===//
-
-/// Lower precedence ops (all at the same precedence level). LNoOp is false in
-/// the boolean sense.
-enum AffineLowPrecOp {
-  /// Null value.
-  LNoOp,
-  Add,
-  Sub
-};
-
-/// Higher precedence ops - all at the same precedence level. HNoOp is false
-/// in the boolean sense.
-enum AffineHighPrecOp {
-  /// Null value.
-  HNoOp,
-  Mul,
-  FloorDiv,
-  CeilDiv,
-  Mod
-};
-
-namespace {
-/// This is a specialized parser for affine structures (affine maps, affine
-/// expressions, and integer sets), maintaining the state transient to their
-/// bodies.
-class AffineParser : public Parser {
-public:
-  AffineParser(ParserState &state, bool allowParsingSSAIds = false,
-               llvm::function_ref<ParseResult(bool)> parseElement = nullptr)
-      : Parser(state), allowParsingSSAIds(allowParsingSSAIds),
-        parseElement(parseElement), numDimOperands(0), numSymbolOperands(0) {}
-
-  AffineMap parseAffineMapRange(unsigned numDims, unsigned numSymbols);
-  ParseResult parseAffineMapOrIntegerSetInline(AffineMap &map, IntegerSet &set);
-  IntegerSet parseIntegerSetConstraints(unsigned numDims, unsigned numSymbols);
-  ParseResult parseAffineMapOfSSAIds(AffineMap &map);
-  void getDimsAndSymbolSSAIds(SmallVectorImpl<StringRef> &dimAndSymbolSSAIds,
-                              unsigned &numDims);
-
-private:
-  // Binary affine op parsing.
-  AffineLowPrecOp consumeIfLowPrecOp();
-  AffineHighPrecOp consumeIfHighPrecOp();
-
-  // Identifier lists for polyhedral structures.
-  ParseResult parseDimIdList(unsigned &numDims);
-  ParseResult parseSymbolIdList(unsigned &numSymbols);
-  ParseResult parseDimAndOptionalSymbolIdList(unsigned &numDims,
-                                              unsigned &numSymbols);
-  ParseResult parseIdentifierDefinition(AffineExpr idExpr);
-
-  AffineExpr parseAffineExpr();
-  AffineExpr parseParentheticalExpr();
-  AffineExpr parseNegateExpression(AffineExpr lhs);
-  AffineExpr parseIntegerExpr();
-  AffineExpr parseBareIdExpr();
-  AffineExpr parseSSAIdExpr(bool isSymbol);
-  AffineExpr parseSymbolSSAIdExpr();
-
-  AffineExpr getAffineBinaryOpExpr(AffineHighPrecOp op, AffineExpr lhs,
-                                   AffineExpr rhs, SMLoc opLoc);
-  AffineExpr getAffineBinaryOpExpr(AffineLowPrecOp op, AffineExpr lhs,
-                                   AffineExpr rhs);
-  AffineExpr parseAffineOperandExpr(AffineExpr lhs);
-  AffineExpr parseAffineLowPrecOpExpr(AffineExpr llhs, AffineLowPrecOp llhsOp);
-  AffineExpr parseAffineHighPrecOpExpr(AffineExpr llhs, AffineHighPrecOp llhsOp,
-                                       SMLoc llhsOpLoc);
-  AffineExpr parseAffineConstraint(bool *isEq);
-
-private:
-  bool allowParsingSSAIds;
-  llvm::function_ref<ParseResult(bool)> parseElement;
-  unsigned numDimOperands;
-  unsigned numSymbolOperands;
-  SmallVector<std::pair<StringRef, AffineExpr>, 4> dimsAndSymbols;
-};
-} // end anonymous namespace
-
-/// Create an affine binary high precedence op expression (mul's, div's, mod).
-/// opLoc is the location of the op token to be used to report errors
-/// for non-conforming expressions.
-AffineExpr AffineParser::getAffineBinaryOpExpr(AffineHighPrecOp op,
-                                               AffineExpr lhs, AffineExpr rhs,
-                                               SMLoc opLoc) {
-  // TODO: make the error location info accurate.
-  switch (op) {
-  case Mul:
-    if (!lhs.isSymbolicOrConstant() && !rhs.isSymbolicOrConstant()) {
-      emitError(opLoc, "non-affine expression: at least one of the multiply "
-                       "operands has to be either a constant or symbolic");
-      return nullptr;
-    }
-    return lhs * rhs;
-  case FloorDiv:
-    if (!rhs.isSymbolicOrConstant()) {
-      emitError(opLoc, "non-affine expression: right operand of floordiv "
-                       "has to be either a constant or symbolic");
-      return nullptr;
-    }
-    return lhs.floorDiv(rhs);
-  case CeilDiv:
-    if (!rhs.isSymbolicOrConstant()) {
-      emitError(opLoc, "non-affine expression: right operand of ceildiv "
-                       "has to be either a constant or symbolic");
-      return nullptr;
-    }
-    return lhs.ceilDiv(rhs);
-  case Mod:
-    if (!rhs.isSymbolicOrConstant()) {
-      emitError(opLoc, "non-affine expression: right operand of mod "
-                       "has to be either a constant or symbolic");
-      return nullptr;
-    }
-    return lhs % rhs;
-  case HNoOp:
-    llvm_unreachable("can't create affine expression for null high prec op");
-    return nullptr;
-  }
-  llvm_unreachable("Unknown AffineHighPrecOp");
-}
-
-/// Create an affine binary low precedence op expression (add, sub).
-AffineExpr AffineParser::getAffineBinaryOpExpr(AffineLowPrecOp op,
-                                               AffineExpr lhs, AffineExpr rhs) {
-  switch (op) {
-  case AffineLowPrecOp::Add:
-    return lhs + rhs;
-  case AffineLowPrecOp::Sub:
-    return lhs - rhs;
-  case AffineLowPrecOp::LNoOp:
-    llvm_unreachable("can't create affine expression for null low prec op");
-    return nullptr;
-  }
-  llvm_unreachable("Unknown AffineLowPrecOp");
-}
-
-/// Consume this token if it is a lower precedence affine op (there are only
-/// two precedence levels).
-AffineLowPrecOp AffineParser::consumeIfLowPrecOp() {
-  switch (getToken().getKind()) {
-  case Token::plus:
-    consumeToken(Token::plus);
-    return AffineLowPrecOp::Add;
-  case Token::minus:
-    consumeToken(Token::minus);
-    return AffineLowPrecOp::Sub;
-  default:
-    return AffineLowPrecOp::LNoOp;
-  }
-}
-
-/// Consume this token if it is a higher precedence affine op (there are only
-/// two precedence levels)
-AffineHighPrecOp AffineParser::consumeIfHighPrecOp() {
-  switch (getToken().getKind()) {
-  case Token::star:
-    consumeToken(Token::star);
-    return Mul;
-  case Token::kw_floordiv:
-    consumeToken(Token::kw_floordiv);
-    return FloorDiv;
-  case Token::kw_ceildiv:
-    consumeToken(Token::kw_ceildiv);
-    return CeilDiv;
-  case Token::kw_mod:
-    consumeToken(Token::kw_mod);
-    return Mod;
-  default:
-    return HNoOp;
-  }
-}
-
-/// Parse a high precedence op expression list: mul, div, and mod are high
-/// precedence binary ops, i.e., parse a
-///   expr_1 op_1 expr_2 op_2 ... expr_n
-/// where op_1, op_2 are all a AffineHighPrecOp (mul, div, mod).
-/// All affine binary ops are left associative.
-/// Given llhs, returns (llhs llhsOp lhs) op rhs, or (lhs op rhs) if llhs is
-/// null. If no rhs can be found, returns (llhs llhsOp lhs) or lhs if llhs is
-/// null. llhsOpLoc is the location of the llhsOp token that will be used to
-/// report an error for non-conforming expressions.
-AffineExpr AffineParser::parseAffineHighPrecOpExpr(AffineExpr llhs,
-                                                   AffineHighPrecOp llhsOp,
-                                                   SMLoc llhsOpLoc) {
-  AffineExpr lhs = parseAffineOperandExpr(llhs);
-  if (!lhs)
-    return nullptr;
-
-  // Found an LHS. Parse the remaining expression.
-  auto opLoc = getToken().getLoc();
-  if (AffineHighPrecOp op = consumeIfHighPrecOp()) {
-    if (llhs) {
-      AffineExpr expr = getAffineBinaryOpExpr(llhsOp, llhs, lhs, opLoc);
-      if (!expr)
-        return nullptr;
-      return parseAffineHighPrecOpExpr(expr, op, opLoc);
-    }
-    // No LLHS, get RHS
-    return parseAffineHighPrecOpExpr(lhs, op, opLoc);
-  }
-
-  // This is the last operand in this expression.
-  if (llhs)
-    return getAffineBinaryOpExpr(llhsOp, llhs, lhs, llhsOpLoc);
-
-  // No llhs, 'lhs' itself is the expression.
-  return lhs;
-}
-
-/// Parse an affine expression inside parentheses.
-///
-///   affine-expr ::= `(` affine-expr `)`
-AffineExpr AffineParser::parseParentheticalExpr() {
-  if (parseToken(Token::l_paren, "expected '('"))
-    return nullptr;
-  if (getToken().is(Token::r_paren))
-    return (emitError("no expression inside parentheses"), nullptr);
-
-  auto expr = parseAffineExpr();
-  if (!expr)
-    return nullptr;
-  if (parseToken(Token::r_paren, "expected ')'"))
-    return nullptr;
-
-  return expr;
-}
-
-/// Parse the negation expression.
-///
-///   affine-expr ::= `-` affine-expr
-AffineExpr AffineParser::parseNegateExpression(AffineExpr lhs) {
-  if (parseToken(Token::minus, "expected '-'"))
-    return nullptr;
-
-  AffineExpr operand = parseAffineOperandExpr(lhs);
-  // Since negation has the highest precedence of all ops (including high
-  // precedence ops) but lower than parentheses, we are only going to use
-  // parseAffineOperandExpr instead of parseAffineExpr here.
-  if (!operand)
-    // Extra error message although parseAffineOperandExpr would have
-    // complained. Leads to a better diagnostic.
-    return (emitError("missing operand of negation"), nullptr);
-  return (-1) * operand;
-}
-
-/// Parse a bare id that may appear in an affine expression.
-///
-///   affine-expr ::= bare-id
-AffineExpr AffineParser::parseBareIdExpr() {
-  if (getToken().isNot(Token::bare_identifier))
-    return (emitError("expected bare identifier"), nullptr);
-
-  StringRef sRef = getTokenSpelling();
-  for (auto entry : dimsAndSymbols) {
-    if (entry.first == sRef) {
-      consumeToken(Token::bare_identifier);
-      return entry.second;
-    }
-  }
-
-  return (emitError("use of undeclared identifier"), nullptr);
-}
-
-/// Parse an SSA id which may appear in an affine expression.
-AffineExpr AffineParser::parseSSAIdExpr(bool isSymbol) {
-  if (!allowParsingSSAIds)
-    return (emitError("unexpected ssa identifier"), nullptr);
-  if (getToken().isNot(Token::percent_identifier))
-    return (emitError("expected ssa identifier"), nullptr);
-  auto name = getTokenSpelling();
-  // Check if we already parsed this SSA id.
-  for (auto entry : dimsAndSymbols) {
-    if (entry.first == name) {
-      consumeToken(Token::percent_identifier);
-      return entry.second;
-    }
-  }
-  // Parse the SSA id and add an AffineDim/SymbolExpr to represent it.
-  if (parseElement(isSymbol))
-    return (emitError("failed to parse ssa identifier"), nullptr);
-  auto idExpr = isSymbol
-                    ? getAffineSymbolExpr(numSymbolOperands++, getContext())
-                    : getAffineDimExpr(numDimOperands++, getContext());
-  dimsAndSymbols.push_back({name, idExpr});
-  return idExpr;
-}
-
-AffineExpr AffineParser::parseSymbolSSAIdExpr() {
-  if (parseToken(Token::kw_symbol, "expected symbol keyword") ||
-      parseToken(Token::l_paren, "expected '(' at start of SSA symbol"))
-    return nullptr;
-  AffineExpr symbolExpr = parseSSAIdExpr(/*isSymbol=*/true);
-  if (!symbolExpr)
-    return nullptr;
-  if (parseToken(Token::r_paren, "expected ')' at end of SSA symbol"))
-    return nullptr;
-  return symbolExpr;
-}
-
-/// Parse a positive integral constant appearing in an affine expression.
-///
-///   affine-expr ::= integer-literal
-AffineExpr AffineParser::parseIntegerExpr() {
-  auto val = getToken().getUInt64IntegerValue();
-  if (!val.hasValue() || (int64_t)val.getValue() < 0)
-    return (emitError("constant too large for index"), nullptr);
-
-  consumeToken(Token::integer);
-  return builder.getAffineConstantExpr((int64_t)val.getValue());
-}
-
-/// Parses an expression that can be a valid operand of an affine expression.
-/// lhs: if non-null, lhs is an affine expression that is the lhs of a binary
-/// operator, the rhs of which is being parsed. This is used to determine
-/// whether an error should be emitted for a missing right operand.
-//  Eg: for an expression without parentheses (like i + j + k + l), each
-//  of the four identifiers is an operand. For i + j*k + l, j*k is not an
-//  operand expression, it's an op expression and will be parsed via
-//  parseAffineHighPrecOpExpression(). However, for i + (j*k) + -l, (j*k) and
-//  -l are valid operands that will be parsed by this function.
-AffineExpr AffineParser::parseAffineOperandExpr(AffineExpr lhs) {
-  switch (getToken().getKind()) {
-  case Token::bare_identifier:
-    return parseBareIdExpr();
-  case Token::kw_symbol:
-    return parseSymbolSSAIdExpr();
-  case Token::percent_identifier:
-    return parseSSAIdExpr(/*isSymbol=*/false);
-  case Token::integer:
-    return parseIntegerExpr();
-  case Token::l_paren:
-    return parseParentheticalExpr();
-  case Token::minus:
-    return parseNegateExpression(lhs);
-  case Token::kw_ceildiv:
-  case Token::kw_floordiv:
-  case Token::kw_mod:
-  case Token::plus:
-  case Token::star:
-    if (lhs)
-      emitError("missing right operand of binary operator");
-    else
-      emitError("missing left operand of binary operator");
-    return nullptr;
-  default:
-    if (lhs)
-      emitError("missing right operand of binary operator");
-    else
-      emitError("expected affine expression");
-    return nullptr;
-  }
-}
-
-/// Parse affine expressions that are bare-id's, integer constants,
-/// parenthetical affine expressions, and affine op expressions that are a
-/// composition of those.
-///
-/// All binary op's associate from left to right.
-///
-/// {add, sub} have lower precedence than {mul, div, and mod}.
-///
-/// Add, sub'are themselves at the same precedence level. Mul, floordiv,
-/// ceildiv, and mod are at the same higher precedence level. Negation has
-/// higher precedence than any binary op.
-///
-/// llhs: the affine expression appearing on the left of the one being parsed.
-/// This function will return ((llhs llhsOp lhs) op rhs) if llhs is non null,
-/// and lhs op rhs otherwise; if there is no rhs, llhs llhsOp lhs is returned
-/// if llhs is non-null; otherwise lhs is returned. This is to deal with left
-/// associativity.
-///
-/// Eg: when the expression is e1 + e2*e3 + e4, with e1 as llhs, this function
-/// will return the affine expr equivalent of (e1 + (e2*e3)) + e4, where
-/// (e2*e3) will be parsed using parseAffineHighPrecOpExpr().
-AffineExpr AffineParser::parseAffineLowPrecOpExpr(AffineExpr llhs,
-                                                  AffineLowPrecOp llhsOp) {
-  AffineExpr lhs;
-  if (!(lhs = parseAffineOperandExpr(llhs)))
-    return nullptr;
-
-  // Found an LHS. Deal with the ops.
-  if (AffineLowPrecOp lOp = consumeIfLowPrecOp()) {
-    if (llhs) {
-      AffineExpr sum = getAffineBinaryOpExpr(llhsOp, llhs, lhs);
-      return parseAffineLowPrecOpExpr(sum, lOp);
-    }
-    // No LLHS, get RHS and form the expression.
-    return parseAffineLowPrecOpExpr(lhs, lOp);
-  }
-  auto opLoc = getToken().getLoc();
-  if (AffineHighPrecOp hOp = consumeIfHighPrecOp()) {
-    // We have a higher precedence op here. Get the rhs operand for the llhs
-    // through parseAffineHighPrecOpExpr.
-    AffineExpr highRes = parseAffineHighPrecOpExpr(lhs, hOp, opLoc);
-    if (!highRes)
-      return nullptr;
-
-    // If llhs is null, the product forms the first operand of the yet to be
-    // found expression. If non-null, the op to associate with llhs is llhsOp.
-    AffineExpr expr =
-        llhs ? getAffineBinaryOpExpr(llhsOp, llhs, highRes) : highRes;
-
-    // Recurse for subsequent low prec op's after the affine high prec op
-    // expression.
-    if (AffineLowPrecOp nextOp = consumeIfLowPrecOp())
-      return parseAffineLowPrecOpExpr(expr, nextOp);
-    return expr;
-  }
-  // Last operand in the expression list.
-  if (llhs)
-    return getAffineBinaryOpExpr(llhsOp, llhs, lhs);
-  // No llhs, 'lhs' itself is the expression.
-  return lhs;
-}
-
-/// Parse an affine expression.
-///  affine-expr ::= `(` affine-expr `)`
-///                | `-` affine-expr
-///                | affine-expr `+` affine-expr
-///                | affine-expr `-` affine-expr
-///                | affine-expr `*` affine-expr
-///                | affine-expr `floordiv` affine-expr
-///                | affine-expr `ceildiv` affine-expr
-///                | affine-expr `mod` affine-expr
-///                | bare-id
-///                | integer-literal
-///
-/// Additional conditions are checked depending on the production. For eg.,
-/// one of the operands for `*` has to be either constant/symbolic; the second
-/// operand for floordiv, ceildiv, and mod has to be a positive integer.
-AffineExpr AffineParser::parseAffineExpr() {
-  return parseAffineLowPrecOpExpr(nullptr, AffineLowPrecOp::LNoOp);
-}
-
-/// Parse a dim or symbol from the lists appearing before the actual
-/// expressions of the affine map. Update our state to store the
-/// dimensional/symbolic identifier.
-ParseResult AffineParser::parseIdentifierDefinition(AffineExpr idExpr) {
-  if (getToken().isNot(Token::bare_identifier))
-    return emitError("expected bare identifier");
-
-  auto name = getTokenSpelling();
-  for (auto entry : dimsAndSymbols) {
-    if (entry.first == name)
-      return emitError("redefinition of identifier '" + name + "'");
-  }
-  consumeToken(Token::bare_identifier);
-
-  dimsAndSymbols.push_back({name, idExpr});
-  return success();
-}
-
-/// Parse the list of dimensional identifiers to an affine map.
-ParseResult AffineParser::parseDimIdList(unsigned &numDims) {
-  if (parseToken(Token::l_paren,
-                 "expected '(' at start of dimensional identifiers list")) {
-    return failure();
-  }
-
-  auto parseElt = [&]() -> ParseResult {
-    auto dimension = getAffineDimExpr(numDims++, getContext());
-    return parseIdentifierDefinition(dimension);
-  };
-  return parseCommaSeparatedListUntil(Token::r_paren, parseElt);
-}
-
-/// Parse the list of symbolic identifiers to an affine map.
-ParseResult AffineParser::parseSymbolIdList(unsigned &numSymbols) {
-  consumeToken(Token::l_square);
-  auto parseElt = [&]() -> ParseResult {
-    auto symbol = getAffineSymbolExpr(numSymbols++, getContext());
-    return parseIdentifierDefinition(symbol);
-  };
-  return parseCommaSeparatedListUntil(Token::r_square, parseElt);
-}
-
-/// Parse the list of symbolic identifiers to an affine map.
-ParseResult
-AffineParser::parseDimAndOptionalSymbolIdList(unsigned &numDims,
-                                              unsigned &numSymbols) {
-  if (parseDimIdList(numDims)) {
-    return failure();
-  }
-  if (!getToken().is(Token::l_square)) {
-    numSymbols = 0;
-    return success();
-  }
-  return parseSymbolIdList(numSymbols);
-}
-
-/// Parses an ambiguous affine map or integer set definition inline.
-ParseResult AffineParser::parseAffineMapOrIntegerSetInline(AffineMap &map,
-                                                           IntegerSet &set) {
-  unsigned numDims = 0, numSymbols = 0;
-
-  // List of dimensional and optional symbol identifiers.
-  if (parseDimAndOptionalSymbolIdList(numDims, numSymbols)) {
-    return failure();
-  }
-
-  // This is needed for parsing attributes as we wouldn't know whether we would
-  // be parsing an integer set attribute or an affine map attribute.
-  bool isArrow = getToken().is(Token::arrow);
-  bool isColon = getToken().is(Token::colon);
-  if (!isArrow && !isColon) {
-    return emitError("expected '->' or ':'");
-  } else if (isArrow) {
-    parseToken(Token::arrow, "expected '->' or '['");
-    map = parseAffineMapRange(numDims, numSymbols);
-    return map ? success() : failure();
-  } else if (parseToken(Token::colon, "expected ':' or '['")) {
-    return failure();
-  }
-
-  if ((set = parseIntegerSetConstraints(numDims, numSymbols)))
-    return success();
-
-  return failure();
-}
-
-/// Parse an AffineMap where the dim and symbol identifiers are SSA ids.
-ParseResult AffineParser::parseAffineMapOfSSAIds(AffineMap &map) {
-  if (parseToken(Token::l_square, "expected '['"))
-    return failure();
-
-  SmallVector<AffineExpr, 4> exprs;
-  auto parseElt = [&]() -> ParseResult {
-    auto elt = parseAffineExpr();
-    exprs.push_back(elt);
-    return elt ? success() : failure();
-  };
-
-  // Parse a multi-dimensional affine expression (a comma-separated list of
-  // 1-d affine expressions); the list cannot be empty. Grammar:
-  // multi-dim-affine-expr ::= `(` affine-expr (`,` affine-expr)* `)
-  if (parseCommaSeparatedListUntil(Token::r_square, parseElt,
-                                   /*allowEmptyList=*/true))
-    return failure();
-  // Parsed a valid affine map.
-  if (exprs.empty())
-    map = AffineMap::get(getContext());
-  else
-    map = AffineMap::get(numDimOperands, dimsAndSymbols.size() - numDimOperands,
-                         exprs);
-  return success();
-}
-
-/// Parse the range and sizes affine map definition inline.
-///
-///  affine-map ::= dim-and-symbol-id-lists `->` multi-dim-affine-expr
-///
-///  multi-dim-affine-expr ::= `(` `)`
-///  multi-dim-affine-expr ::= `(` affine-expr (`,` affine-expr)* `)`
-AffineMap AffineParser::parseAffineMapRange(unsigned numDims,
-                                            unsigned numSymbols) {
-  parseToken(Token::l_paren, "expected '(' at start of affine map range");
-
-  SmallVector<AffineExpr, 4> exprs;
-  auto parseElt = [&]() -> ParseResult {
-    auto elt = parseAffineExpr();
-    ParseResult res = elt ? success() : failure();
-    exprs.push_back(elt);
-    return res;
-  };
-
-  // Parse a multi-dimensional affine expression (a comma-separated list of
-  // 1-d affine expressions); the list cannot be empty. Grammar:
-  // multi-dim-affine-expr ::= `(` affine-expr (`,` affine-expr)* `)
-  if (parseCommaSeparatedListUntil(Token::r_paren, parseElt, true))
-    return AffineMap();
-
-  if (exprs.empty())
-    return AffineMap::get(getContext());
-
-  // Parsed a valid affine map.
-  return AffineMap::get(numDims, numSymbols, exprs);
-}
-
-/// Parse an affine constraint.
-///  affine-constraint ::= affine-expr `>=` `0`
-///                      | affine-expr `==` `0`
-///
-/// isEq is set to true if the parsed constraint is an equality, false if it
-/// is an inequality (greater than or equal).
-///
-AffineExpr AffineParser::parseAffineConstraint(bool *isEq) {
-  AffineExpr expr = parseAffineExpr();
-  if (!expr)
-    return nullptr;
-
-  if (consumeIf(Token::greater) && consumeIf(Token::equal) &&
-      getToken().is(Token::integer)) {
-    auto dim = getToken().getUnsignedIntegerValue();
-    if (dim.hasValue() && dim.getValue() == 0) {
-      consumeToken(Token::integer);
-      *isEq = false;
-      return expr;
-    }
-    return (emitError("expected '0' after '>='"), nullptr);
-  }
-
-  if (consumeIf(Token::equal) && consumeIf(Token::equal) &&
-      getToken().is(Token::integer)) {
-    auto dim = getToken().getUnsignedIntegerValue();
-    if (dim.hasValue() && dim.getValue() == 0) {
-      consumeToken(Token::integer);
-      *isEq = true;
-      return expr;
-    }
-    return (emitError("expected '0' after '=='"), nullptr);
-  }
-
-  return (emitError("expected '== 0' or '>= 0' at end of affine constraint"),
-          nullptr);
-}
-
-/// Parse the constraints that are part of an integer set definition.
-///  integer-set-inline
-///                ::= dim-and-symbol-id-lists `:`
-///                '(' affine-constraint-conjunction? ')'
-///  affine-constraint-conjunction ::= affine-constraint (`,`
-///                                       affine-constraint)*
-///
-IntegerSet AffineParser::parseIntegerSetConstraints(unsigned numDims,
-                                                    unsigned numSymbols) {
-  if (parseToken(Token::l_paren,
-                 "expected '(' at start of integer set constraint list"))
-    return IntegerSet();
-
-  SmallVector<AffineExpr, 4> constraints;
-  SmallVector<bool, 4> isEqs;
-  auto parseElt = [&]() -> ParseResult {
-    bool isEq;
-    auto elt = parseAffineConstraint(&isEq);
-    ParseResult res = elt ? success() : failure();
-    if (elt) {
-      constraints.push_back(elt);
-      isEqs.push_back(isEq);
-    }
-    return res;
-  };
-
-  // Parse a list of affine constraints (comma-separated).
-  if (parseCommaSeparatedListUntil(Token::r_paren, parseElt, true))
-    return IntegerSet();
-
-  // If no constraints were parsed, then treat this as a degenerate 'true' case.
-  if (constraints.empty()) {
-    /* 0 == 0 */
-    auto zero = getAffineConstantExpr(0, getContext());
-    return IntegerSet::get(numDims, numSymbols, zero, true);
-  }
-
-  // Parsed a valid integer set.
-  return IntegerSet::get(numDims, numSymbols, constraints, isEqs);
-}
-
-/// Parse an ambiguous reference to either and affine map or an integer set.
-ParseResult Parser::parseAffineMapOrIntegerSetReference(AffineMap &map,
-                                                        IntegerSet &set) {
-  return AffineParser(state).parseAffineMapOrIntegerSetInline(map, set);
-}
-
-/// Parse an AffineMap of SSA ids. The callback 'parseElement' is used to
-/// parse SSA value uses encountered while parsing affine expressions.
-ParseResult Parser::parseAffineMapOfSSAIds(
-    AffineMap &map, llvm::function_ref<ParseResult(bool)> parseElement) {
-  return AffineParser(state, /*allowParsingSSAIds=*/true, parseElement)
-      .parseAffineMapOfSSAIds(map);
-}
-
-//===----------------------------------------------------------------------===//
-// OperationParser
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This class provides support for parsing operations and regions of
-/// operations.
-class OperationParser : public Parser {
-public:
-  OperationParser(ParserState &state, ModuleOp moduleOp)
-      : Parser(state), opBuilder(moduleOp.getBodyRegion()), moduleOp(moduleOp) {
-  }
-
-  ~OperationParser();
-
-  /// After parsing is finished, this function must be called to see if there
-  /// are any remaining issues.
-  ParseResult finalize();
-
-  //===--------------------------------------------------------------------===//
-  // SSA Value Handling
-  //===--------------------------------------------------------------------===//
-
-  /// This represents a use of an SSA value in the program.  The first two
-  /// entries in the tuple are the name and result number of a reference.  The
-  /// third is the location of the reference, which is used in case this ends
-  /// up being a use of an undefined value.
-  struct SSAUseInfo {
-    StringRef name;  // Value name, e.g. %42 or %abc
-    unsigned number; // Number, specified with #12
-    SMLoc loc;       // Location of first definition or use.
-  };
-
-  /// Push a new SSA name scope to the parser.
-  void pushSSANameScope(bool isIsolated);
-
-  /// Pop the last SSA name scope from the parser.
-  ParseResult popSSANameScope();
-
-  /// Register a definition of a value with the symbol table.
-  ParseResult addDefinition(SSAUseInfo useInfo, Value *value);
-
-  /// Parse an optional list of SSA uses into 'results'.
-  ParseResult parseOptionalSSAUseList(SmallVectorImpl<SSAUseInfo> &results);
-
-  /// Parse a single SSA use into 'result'.
-  ParseResult parseSSAUse(SSAUseInfo &result);
-
-  /// Given a reference to an SSA value and its type, return a reference. This
-  /// returns null on failure.
-  Value *resolveSSAUse(SSAUseInfo useInfo, Type type);
-
-  ParseResult parseSSADefOrUseAndType(
-      const std::function<ParseResult(SSAUseInfo, Type)> &action);
-
-  ParseResult parseOptionalSSAUseAndTypeList(SmallVectorImpl<Value *> &results);
-
-  /// Return the location of the value identified by its name and number if it
-  /// has been already reference.
-  llvm::Optional<SMLoc> getReferenceLoc(StringRef name, unsigned number) {
-    auto &values = isolatedNameScopes.back().values;
-    if (!values.count(name) || number >= values[name].size())
-      return {};
-    if (values[name][number].first)
-      return values[name][number].second;
-    return {};
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Operation Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an operation instance.
-  ParseResult parseOperation();
-
-  /// Parse a single operation successor and its operand list.
-  ParseResult parseSuccessorAndUseList(Block *&dest,
-                                       SmallVectorImpl<Value *> &operands);
-
-  /// Parse a comma-separated list of operation successors in brackets.
-  ParseResult
-  parseSuccessors(SmallVectorImpl<Block *> &destinations,
-                  SmallVectorImpl<SmallVector<Value *, 4>> &operands);
-
-  /// Parse an operation instance that is in the generic form.
-  Operation *parseGenericOperation();
-
-  /// Parse an operation instance that is in the generic form and insert it at
-  /// the provided insertion point.
-  Operation *parseGenericOperation(Block *insertBlock,
-                                   Block::iterator insertPt);
-
-  /// Parse an operation instance that is in the op-defined custom form.
-  Operation *parseCustomOperation();
-
-  //===--------------------------------------------------------------------===//
-  // Region Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a region into 'region' with the provided entry block arguments.
-  /// 'isIsolatedNameScope' indicates if the naming scope of this region is
-  /// isolated from those above.
-  ParseResult parseRegion(Region &region,
-                          ArrayRef<std::pair<SSAUseInfo, Type>> entryArguments,
-                          bool isIsolatedNameScope = false);
-
-  /// Parse a region body into 'region'.
-  ParseResult parseRegionBody(Region &region);
-
-  //===--------------------------------------------------------------------===//
-  // Block Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a new block into 'block'.
-  ParseResult parseBlock(Block *&block);
-
-  /// Parse a list of operations into 'block'.
-  ParseResult parseBlockBody(Block *block);
-
-  /// Parse a (possibly empty) list of block arguments.
-  ParseResult
-  parseOptionalBlockArgList(SmallVectorImpl<BlockArgument *> &results,
-                            Block *owner);
-
-  /// Get the block with the specified name, creating it if it doesn't
-  /// already exist.  The location specified is the point of use, which allows
-  /// us to diagnose references to blocks that are not defined precisely.
-  Block *getBlockNamed(StringRef name, SMLoc loc);
-
-  /// Define the block with the specified name. Returns the Block* or nullptr in
-  /// the case of redefinition.
-  Block *defineBlockNamed(StringRef name, SMLoc loc, Block *existing);
-
-private:
-  /// Returns the info for a block at the current scope for the given name.
-  std::pair<Block *, SMLoc> &getBlockInfoByName(StringRef name) {
-    return blocksByName.back()[name];
-  }
-
-  /// Insert a new forward reference to the given block.
-  void insertForwardRef(Block *block, SMLoc loc) {
-    forwardRef.back().try_emplace(block, loc);
-  }
-
-  /// Erase any forward reference to the given block.
-  bool eraseForwardRef(Block *block) { return forwardRef.back().erase(block); }
-
-  /// Record that a definition was added at the current scope.
-  void recordDefinition(StringRef def);
-
-  /// Get the value entry for the given SSA name.
-  SmallVectorImpl<std::pair<Value *, SMLoc>> &getSSAValueEntry(StringRef name);
-
-  /// Create a forward reference placeholder value with the given location and
-  /// result type.
-  Value *createForwardRefPlaceholder(SMLoc loc, Type type);
-
-  /// Return true if this is a forward reference.
-  bool isForwardRefPlaceholder(Value *value) {
-    return forwardRefPlaceholders.count(value);
-  }
-
-  /// This struct represents an isolated SSA name scope. This scope may contain
-  /// other nested non-isolated scopes. These scopes are used for operations
-  /// that are known to be isolated to allow for reusing names within their
-  /// regions, even if those names are used above.
-  struct IsolatedSSANameScope {
-    /// Record that a definition was added at the current scope.
-    void recordDefinition(StringRef def) {
-      definitionsPerScope.back().insert(def);
-    }
-
-    /// Push a nested name scope.
-    void pushSSANameScope() { definitionsPerScope.push_back({}); }
-
-    /// Pop a nested name scope.
-    void popSSANameScope() {
-      for (auto &def : definitionsPerScope.pop_back_val())
-        values.erase(def.getKey());
-    }
-
-    /// This keeps track of all of the SSA values we are tracking for each name
-    /// scope, indexed by their name. This has one entry per result number.
-    llvm::StringMap<SmallVector<std::pair<Value *, SMLoc>, 1>> values;
-
-    /// This keeps track of all of the values defined by a specific name scope.
-    SmallVector<llvm::StringSet<>, 2> definitionsPerScope;
-  };
-
-  /// A list of isolated name scopes.
-  SmallVector<IsolatedSSANameScope, 2> isolatedNameScopes;
-
-  /// This keeps track of the block names as well as the location of the first
-  /// reference for each nested name scope. This is used to diagnose invalid
-  /// block references and memorize them.
-  SmallVector<DenseMap<StringRef, std::pair<Block *, SMLoc>>, 2> blocksByName;
-  SmallVector<DenseMap<Block *, SMLoc>, 2> forwardRef;
-
-  /// These are all of the placeholders we've made along with the location of
-  /// their first reference, to allow checking for use of undefined values.
-  DenseMap<Value *, SMLoc> forwardRefPlaceholders;
-
-  /// The builder used when creating parsed operation instances.
-  OpBuilder opBuilder;
-
-  /// The top level module operation.
-  ModuleOp moduleOp;
-};
-} // end anonymous namespace
-
-OperationParser::~OperationParser() {
-  for (auto &fwd : forwardRefPlaceholders) {
-    // Drop all uses of undefined forward declared reference and destroy
-    // defining operation.
-    fwd.first->dropAllUses();
-    fwd.first->getDefiningOp()->destroy();
-  }
-}
-
-/// After parsing is finished, this function must be called to see if there are
-/// any remaining issues.
-ParseResult OperationParser::finalize() {
-  // Check for any forward references that are left.  If we find any, error
-  // out.
-  if (!forwardRefPlaceholders.empty()) {
-    SmallVector<std::pair<const char *, Value *>, 4> errors;
-    // Iteration over the map isn't deterministic, so sort by source location.
-    for (auto entry : forwardRefPlaceholders)
-      errors.push_back({entry.second.getPointer(), entry.first});
-    llvm::array_pod_sort(errors.begin(), errors.end());
-
-    for (auto entry : errors) {
-      auto loc = SMLoc::getFromPointer(entry.first);
-      emitError(loc, "use of undeclared SSA value name");
-    }
-    return failure();
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// SSA Value Handling
-//===----------------------------------------------------------------------===//
-
-void OperationParser::pushSSANameScope(bool isIsolated) {
-  blocksByName.push_back(DenseMap<StringRef, std::pair<Block *, SMLoc>>());
-  forwardRef.push_back(DenseMap<Block *, SMLoc>());
-
-  // Push back a new name definition scope.
-  if (isIsolated)
-    isolatedNameScopes.push_back({});
-  isolatedNameScopes.back().pushSSANameScope();
-}
-
-ParseResult OperationParser::popSSANameScope() {
-  auto forwardRefInCurrentScope = forwardRef.pop_back_val();
-
-  // Verify that all referenced blocks were defined.
-  if (!forwardRefInCurrentScope.empty()) {
-    SmallVector<std::pair<const char *, Block *>, 4> errors;
-    // Iteration over the map isn't deterministic, so sort by source location.
-    for (auto entry : forwardRefInCurrentScope) {
-      errors.push_back({entry.second.getPointer(), entry.first});
-      // Add this block to the top-level region to allow for automatic cleanup.
-      moduleOp.getOperation()->getRegion(0).push_back(entry.first);
-    }
-    llvm::array_pod_sort(errors.begin(), errors.end());
-
-    for (auto entry : errors) {
-      auto loc = SMLoc::getFromPointer(entry.first);
-      emitError(loc, "reference to an undefined block");
-    }
-    return failure();
-  }
-
-  // Pop the next nested namescope. If there is only one internal namescope,
-  // just pop the isolated scope.
-  auto &currentNameScope = isolatedNameScopes.back();
-  if (currentNameScope.definitionsPerScope.size() == 1)
-    isolatedNameScopes.pop_back();
-  else
-    currentNameScope.popSSANameScope();
-
-  blocksByName.pop_back();
-  return success();
-}
-
-/// Register a definition of a value with the symbol table.
-ParseResult OperationParser::addDefinition(SSAUseInfo useInfo, Value *value) {
-  auto &entries = getSSAValueEntry(useInfo.name);
-
-  // Make sure there is a slot for this value.
-  if (entries.size() <= useInfo.number)
-    entries.resize(useInfo.number + 1);
-
-  // If we already have an entry for this, check to see if it was a definition
-  // or a forward reference.
-  if (auto *existing = entries[useInfo.number].first) {
-    if (!isForwardRefPlaceholder(existing)) {
-      return emitError(useInfo.loc)
-          .append("redefinition of SSA value '", useInfo.name, "'")
-          .attachNote(getEncodedSourceLocation(entries[useInfo.number].second))
-          .append("previously defined here");
-    }
-
-    // If it was a forward reference, update everything that used it to use
-    // the actual definition instead, delete the forward ref, and remove it
-    // from our set of forward references we track.
-    existing->replaceAllUsesWith(value);
-    existing->getDefiningOp()->destroy();
-    forwardRefPlaceholders.erase(existing);
-  }
-
-  /// Record this definition for the current scope.
-  entries[useInfo.number] = {value, useInfo.loc};
-  recordDefinition(useInfo.name);
-  return success();
-}
-
-/// Parse a (possibly empty) list of SSA operands.
-///
-///   ssa-use-list ::= ssa-use (`,` ssa-use)*
-///   ssa-use-list-opt ::= ssa-use-list?
-///
-ParseResult
-OperationParser::parseOptionalSSAUseList(SmallVectorImpl<SSAUseInfo> &results) {
-  if (getToken().isNot(Token::percent_identifier))
-    return success();
-  return parseCommaSeparatedList([&]() -> ParseResult {
-    SSAUseInfo result;
-    if (parseSSAUse(result))
-      return failure();
-    results.push_back(result);
-    return success();
-  });
-}
-
-/// Parse a SSA operand for an operation.
-///
-///   ssa-use ::= ssa-id
-///
-ParseResult OperationParser::parseSSAUse(SSAUseInfo &result) {
-  result.name = getTokenSpelling();
-  result.number = 0;
-  result.loc = getToken().getLoc();
-  if (parseToken(Token::percent_identifier, "expected SSA operand"))
-    return failure();
-
-  // If we have an attribute ID, it is a result number.
-  if (getToken().is(Token::hash_identifier)) {
-    if (auto value = getToken().getHashIdentifierNumber())
-      result.number = value.getValue();
-    else
-      return emitError("invalid SSA value result number");
-    consumeToken(Token::hash_identifier);
-  }
-
-  return success();
-}
-
-/// Given an unbound reference to an SSA value and its type, return the value
-/// it specifies.  This returns null on failure.
-Value *OperationParser::resolveSSAUse(SSAUseInfo useInfo, Type type) {
-  auto &entries = getSSAValueEntry(useInfo.name);
-
-  // If we have already seen a value of this name, return it.
-  if (useInfo.number < entries.size() && entries[useInfo.number].first) {
-    auto *result = entries[useInfo.number].first;
-    // Check that the type matches the other uses.
-    if (result->getType() == type)
-      return result;
-
-    emitError(useInfo.loc, "use of value '")
-        .append(useInfo.name,
-                "' expects different type than prior uses: ", type, " vs ",
-                result->getType())
-        .attachNote(getEncodedSourceLocation(entries[useInfo.number].second))
-        .append("prior use here");
-    return nullptr;
-  }
-
-  // Make sure we have enough slots for this.
-  if (entries.size() <= useInfo.number)
-    entries.resize(useInfo.number + 1);
-
-  // If the value has already been defined and this is an overly large result
-  // number, diagnose that.
-  if (entries[0].first && !isForwardRefPlaceholder(entries[0].first))
-    return (emitError(useInfo.loc, "reference to invalid result number"),
-            nullptr);
-
-  // Otherwise, this is a forward reference.  Create a placeholder and remember
-  // that we did so.
-  auto *result = createForwardRefPlaceholder(useInfo.loc, type);
-  entries[useInfo.number].first = result;
-  entries[useInfo.number].second = useInfo.loc;
-  return result;
-}
-
-/// Parse an SSA use with an associated type.
-///
-///   ssa-use-and-type ::= ssa-use `:` type
-ParseResult OperationParser::parseSSADefOrUseAndType(
-    const std::function<ParseResult(SSAUseInfo, Type)> &action) {
-  SSAUseInfo useInfo;
-  if (parseSSAUse(useInfo) ||
-      parseToken(Token::colon, "expected ':' and type for SSA operand"))
-    return failure();
-
-  auto type = parseType();
-  if (!type)
-    return failure();
-
-  return action(useInfo, type);
-}
-
-/// Parse a (possibly empty) list of SSA operands, followed by a colon, then
-/// followed by a type list.
-///
-///   ssa-use-and-type-list
-///     ::= ssa-use-list ':' type-list-no-parens
-///
-ParseResult OperationParser::parseOptionalSSAUseAndTypeList(
-    SmallVectorImpl<Value *> &results) {
-  SmallVector<SSAUseInfo, 4> valueIDs;
-  if (parseOptionalSSAUseList(valueIDs))
-    return failure();
-
-  // If there were no operands, then there is no colon or type lists.
-  if (valueIDs.empty())
-    return success();
-
-  SmallVector<Type, 4> types;
-  if (parseToken(Token::colon, "expected ':' in operand list") ||
-      parseTypeListNoParens(types))
-    return failure();
-
-  if (valueIDs.size() != types.size())
-    return emitError("expected ")
-           << valueIDs.size() << " types to match operand list";
-
-  results.reserve(valueIDs.size());
-  for (unsigned i = 0, e = valueIDs.size(); i != e; ++i) {
-    if (auto *value = resolveSSAUse(valueIDs[i], types[i]))
-      results.push_back(value);
-    else
-      return failure();
-  }
-
-  return success();
-}
-
-/// Record that a definition was added at the current scope.
-void OperationParser::recordDefinition(StringRef def) {
-  isolatedNameScopes.back().recordDefinition(def);
-}
-
-/// Get the value entry for the given SSA name.
-SmallVectorImpl<std::pair<Value *, SMLoc>> &
-OperationParser::getSSAValueEntry(StringRef name) {
-  return isolatedNameScopes.back().values[name];
-}
-
-/// Create and remember a new placeholder for a forward reference.
-Value *OperationParser::createForwardRefPlaceholder(SMLoc loc, Type type) {
-  // Forward references are always created as operations, because we just need
-  // something with a def/use chain.
-  //
-  // We create these placeholders as having an empty name, which we know
-  // cannot be created through normal user input, allowing us to distinguish
-  // them.
-  auto name = OperationName("placeholder", getContext());
-  auto *op = Operation::create(
-      getEncodedSourceLocation(loc), name, type, /*operands=*/{},
-      /*attributes=*/llvm::None, /*successors=*/{}, /*numRegions=*/0,
-      /*resizableOperandList=*/false);
-  forwardRefPlaceholders[op->getResult(0)] = loc;
-  return op->getResult(0);
-}
-
-//===----------------------------------------------------------------------===//
-// Operation Parsing
-//===----------------------------------------------------------------------===//
-
-/// Parse an operation.
-///
-///  operation         ::= op-result-list?
-///                        (generic-operation | custom-operation)
-///                        trailing-location?
-///  generic-operation ::= string-literal '(' ssa-use-list? ')' attribute-dict?
-///                        `:` function-type
-///  custom-operation  ::= bare-id custom-operation-format
-///  op-result-list    ::= op-result (`,` op-result)* `=`
-///  op-result         ::= ssa-id (`:` integer-literal)
-///
-ParseResult OperationParser::parseOperation() {
-  auto loc = getToken().getLoc();
-  SmallVector<std::tuple<StringRef, unsigned, SMLoc>, 1> resultIDs;
-  size_t numExpectedResults = 0;
-  if (getToken().is(Token::percent_identifier)) {
-    // Parse the group of result ids.
-    auto parseNextResult = [&]() -> ParseResult {
-      // Parse the next result id.
-      if (!getToken().is(Token::percent_identifier))
-        return emitError("expected valid ssa identifier");
-
-      Token nameTok = getToken();
-      consumeToken(Token::percent_identifier);
-
-      // If the next token is a ':', we parse the expected result count.
-      size_t expectedSubResults = 1;
-      if (consumeIf(Token::colon)) {
-        // Check that the next token is an integer.
-        if (!getToken().is(Token::integer))
-          return emitError("expected integer number of results");
-
-        // Check that number of results is > 0.
-        auto val = getToken().getUInt64IntegerValue();
-        if (!val.hasValue() || val.getValue() < 1)
-          return emitError("expected named operation to have atleast 1 result");
-        consumeToken(Token::integer);
-        expectedSubResults = *val;
-      }
-
-      resultIDs.emplace_back(nameTok.getSpelling(), expectedSubResults,
-                             nameTok.getLoc());
-      numExpectedResults += expectedSubResults;
-      return success();
-    };
-    if (parseCommaSeparatedList(parseNextResult))
-      return failure();
-
-    if (parseToken(Token::equal, "expected '=' after SSA name"))
-      return failure();
-  }
-
-  Operation *op;
-  if (getToken().is(Token::bare_identifier) || getToken().isKeyword())
-    op = parseCustomOperation();
-  else if (getToken().is(Token::string))
-    op = parseGenericOperation();
-  else
-    return emitError("expected operation name in quotes");
-
-  // If parsing of the basic operation failed, then this whole thing fails.
-  if (!op)
-    return failure();
-
-  // If the operation had a name, register it.
-  if (!resultIDs.empty()) {
-    if (op->getNumResults() == 0)
-      return emitError(loc, "cannot name an operation with no results");
-    if (numExpectedResults != op->getNumResults())
-      return emitError(loc, "operation defines ")
-             << op->getNumResults() << " results but was provided "
-             << numExpectedResults << " to bind";
-
-    // Add definitions for each of the result groups.
-    unsigned opResI = 0;
-    for (std::tuple<StringRef, unsigned, SMLoc> &resIt : resultIDs) {
-      for (unsigned subRes : llvm::seq<unsigned>(0, std::get<1>(resIt))) {
-        if (addDefinition({std::get<0>(resIt), subRes, std::get<2>(resIt)},
-                          op->getResult(opResI++)))
-          return failure();
-      }
-    }
-  }
-
-  return success();
-}
-
-/// Parse a single operation successor and its operand list.
-///
-///   successor ::= block-id branch-use-list?
-///   branch-use-list ::= `(` ssa-use-list ':' type-list-no-parens `)`
-///
-ParseResult
-OperationParser::parseSuccessorAndUseList(Block *&dest,
-                                          SmallVectorImpl<Value *> &operands) {
-  // Verify branch is identifier and get the matching block.
-  if (!getToken().is(Token::caret_identifier))
-    return emitError("expected block name");
-  dest = getBlockNamed(getTokenSpelling(), getToken().getLoc());
-  consumeToken();
-
-  // Handle optional arguments.
-  if (consumeIf(Token::l_paren) &&
-      (parseOptionalSSAUseAndTypeList(operands) ||
-       parseToken(Token::r_paren, "expected ')' to close argument list"))) {
-    return failure();
-  }
-
-  return success();
-}
-
-/// Parse a comma-separated list of operation successors in brackets.
-///
-///   successor-list ::= `[` successor (`,` successor )* `]`
-///
-ParseResult OperationParser::parseSuccessors(
-    SmallVectorImpl<Block *> &destinations,
-    SmallVectorImpl<SmallVector<Value *, 4>> &operands) {
-  if (parseToken(Token::l_square, "expected '['"))
-    return failure();
-
-  auto parseElt = [this, &destinations, &operands]() {
-    Block *dest;
-    SmallVector<Value *, 4> destOperands;
-    auto res = parseSuccessorAndUseList(dest, destOperands);
-    destinations.push_back(dest);
-    operands.push_back(destOperands);
-    return res;
-  };
-  return parseCommaSeparatedListUntil(Token::r_square, parseElt,
-                                      /*allowEmptyList=*/false);
-}
-
-namespace {
-// RAII-style guard for cleaning up the regions in the operation state before
-// deleting them.  Within the parser, regions may get deleted if parsing failed,
-// and other errors may be present, in particular undominated uses.  This makes
-// sure such uses are deleted.
-struct CleanupOpStateRegions {
-  ~CleanupOpStateRegions() {
-    SmallVector<Region *, 4> regionsToClean;
-    regionsToClean.reserve(state.regions.size());
-    for (auto &region : state.regions)
-      if (region)
-        for (auto &block : *region)
-          block.dropAllDefinedValueUses();
-  }
-  OperationState &state;
-};
-} // namespace
-
-Operation *OperationParser::parseGenericOperation() {
-  // Get location information for the operation.
-  auto srcLocation = getEncodedSourceLocation(getToken().getLoc());
-
-  auto name = getToken().getStringValue();
-  if (name.empty())
-    return (emitError("empty operation name is invalid"), nullptr);
-  if (name.find('\0') != StringRef::npos)
-    return (emitError("null character not allowed in operation name"), nullptr);
-
-  consumeToken(Token::string);
-
-  OperationState result(srcLocation, name);
-
-  // Generic operations have a resizable operation list.
-  result.setOperandListToResizable();
-
-  // Parse the operand list.
-  SmallVector<SSAUseInfo, 8> operandInfos;
-
-  if (parseToken(Token::l_paren, "expected '(' to start operand list") ||
-      parseOptionalSSAUseList(operandInfos) ||
-      parseToken(Token::r_paren, "expected ')' to end operand list")) {
-    return nullptr;
-  }
-
-  // Parse the successor list but don't add successors to the result yet to
-  // avoid messing up with the argument order.
-  SmallVector<Block *, 2> successors;
-  SmallVector<SmallVector<Value *, 4>, 2> successorOperands;
-  if (getToken().is(Token::l_square)) {
-    // Check if the operation is a known terminator.
-    const AbstractOperation *abstractOp = result.name.getAbstractOperation();
-    if (abstractOp && !abstractOp->hasProperty(OperationProperty::Terminator))
-      return emitError("successors in non-terminator"), nullptr;
-    if (parseSuccessors(successors, successorOperands))
-      return nullptr;
-  }
-
-  // Parse the region list.
-  CleanupOpStateRegions guard{result};
-  if (consumeIf(Token::l_paren)) {
-    do {
-      // Create temporary regions with the top level region as parent.
-      result.regions.emplace_back(new Region(moduleOp));
-      if (parseRegion(*result.regions.back(), /*entryArguments=*/{}))
-        return nullptr;
-    } while (consumeIf(Token::comma));
-    if (parseToken(Token::r_paren, "expected ')' to end region list"))
-      return nullptr;
-  }
-
-  if (getToken().is(Token::l_brace)) {
-    if (parseAttributeDict(result.attributes))
-      return nullptr;
-  }
-
-  if (parseToken(Token::colon, "expected ':' followed by operation type"))
-    return nullptr;
-
-  auto typeLoc = getToken().getLoc();
-  auto type = parseType();
-  if (!type)
-    return nullptr;
-  auto fnType = type.dyn_cast<FunctionType>();
-  if (!fnType)
-    return (emitError(typeLoc, "expected function type"), nullptr);
-
-  result.addTypes(fnType.getResults());
-
-  // Check that we have the right number of types for the operands.
-  auto operandTypes = fnType.getInputs();
-  if (operandTypes.size() != operandInfos.size()) {
-    auto plural = "s"[operandInfos.size() == 1];
-    return (emitError(typeLoc, "expected ")
-                << operandInfos.size() << " operand type" << plural
-                << " but had " << operandTypes.size(),
-            nullptr);
-  }
-
-  // Resolve all of the operands.
-  for (unsigned i = 0, e = operandInfos.size(); i != e; ++i) {
-    result.operands.push_back(resolveSSAUse(operandInfos[i], operandTypes[i]));
-    if (!result.operands.back())
-      return nullptr;
-  }
-
-  // Add the successors, and their operands after the proper operands.
-  for (const auto &succ : llvm::zip(successors, successorOperands)) {
-    Block *successor = std::get<0>(succ);
-    const SmallVector<Value *, 4> &operands = std::get<1>(succ);
-    result.addSuccessor(successor, operands);
-  }
-
-  // Parse a location if one is present.
-  if (parseOptionalTrailingLocation(result.location))
-    return nullptr;
-
-  return opBuilder.createOperation(result);
-}
-
-Operation *OperationParser::parseGenericOperation(Block *insertBlock,
-                                                  Block::iterator insertPt) {
-  OpBuilder::InsertionGuard restoreInsertionPoint(opBuilder);
-  opBuilder.setInsertionPoint(insertBlock, insertPt);
-  return parseGenericOperation();
-}
-
-namespace {
-class CustomOpAsmParser : public OpAsmParser {
-public:
-  CustomOpAsmParser(SMLoc nameLoc, const AbstractOperation *opDefinition,
-                    OperationParser &parser)
-      : nameLoc(nameLoc), opDefinition(opDefinition), parser(parser) {}
-
-  /// Parse an instance of the operation described by 'opDefinition' into the
-  /// provided operation state.
-  ParseResult parseOperation(OperationState &opState) {
-    if (opDefinition->parseAssembly(*this, opState))
-      return failure();
-    return success();
-  }
-
-  Operation *parseGenericOperation(Block *insertBlock,
-                                   Block::iterator insertPt) final {
-    return parser.parseGenericOperation(insertBlock, insertPt);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Utilities
-  //===--------------------------------------------------------------------===//
-
-  /// Return if any errors were emitted during parsing.
-  bool didEmitError() const { return emittedError; }
-
-  /// Emit a diagnostic at the specified location and return failure.
-  InFlightDiagnostic emitError(llvm::SMLoc loc, const Twine &message) override {
-    emittedError = true;
-    return parser.emitError(loc, "custom op '" + opDefinition->name + "' " +
-                                     message);
-  }
-
-  llvm::SMLoc getCurrentLocation() override {
-    return parser.getToken().getLoc();
-  }
-
-  Builder &getBuilder() const override { return parser.builder; }
-
-  llvm::SMLoc getNameLoc() const override { return nameLoc; }
-
-  //===--------------------------------------------------------------------===//
-  // Token Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a `->` token.
-  ParseResult parseArrow() override {
-    return parser.parseToken(Token::arrow, "expected '->'");
-  }
-
-  /// Parses a `->` if present.
-  ParseResult parseOptionalArrow() override {
-    return success(parser.consumeIf(Token::arrow));
-  }
-
-  /// Parse a `:` token.
-  ParseResult parseColon() override {
-    return parser.parseToken(Token::colon, "expected ':'");
-  }
-
-  /// Parse a `:` token if present.
-  ParseResult parseOptionalColon() override {
-    return success(parser.consumeIf(Token::colon));
-  }
-
-  /// Parse a `,` token.
-  ParseResult parseComma() override {
-    return parser.parseToken(Token::comma, "expected ','");
-  }
-
-  /// Parse a `,` token if present.
-  ParseResult parseOptionalComma() override {
-    return success(parser.consumeIf(Token::comma));
-  }
-
-  /// Parses a `...` if present.
-  ParseResult parseOptionalEllipsis() override {
-    return success(parser.consumeIf(Token::ellipsis));
-  }
-
-  /// Parse a `=` token.
-  ParseResult parseEqual() override {
-    return parser.parseToken(Token::equal, "expected '='");
-  }
-
-  /// Parse a `(` token.
-  ParseResult parseLParen() override {
-    return parser.parseToken(Token::l_paren, "expected '('");
-  }
-
-  /// Parses a '(' if present.
-  ParseResult parseOptionalLParen() override {
-    return success(parser.consumeIf(Token::l_paren));
-  }
-
-  /// Parse a `)` token.
-  ParseResult parseRParen() override {
-    return parser.parseToken(Token::r_paren, "expected ')'");
-  }
-
-  /// Parses a ')' if present.
-  ParseResult parseOptionalRParen() override {
-    return success(parser.consumeIf(Token::r_paren));
-  }
-
-  /// Parse a `[` token.
-  ParseResult parseLSquare() override {
-    return parser.parseToken(Token::l_square, "expected '['");
-  }
-
-  /// Parses a '[' if present.
-  ParseResult parseOptionalLSquare() override {
-    return success(parser.consumeIf(Token::l_square));
-  }
-
-  /// Parse a `]` token.
-  ParseResult parseRSquare() override {
-    return parser.parseToken(Token::r_square, "expected ']'");
-  }
-
-  /// Parses a ']' if present.
-  ParseResult parseOptionalRSquare() override {
-    return success(parser.consumeIf(Token::r_square));
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Attribute Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse an arbitrary attribute of a given type and return it in result. This
-  /// also adds the attribute to the specified attribute list with the specified
-  /// name.
-  ParseResult parseAttribute(Attribute &result, Type type, StringRef attrName,
-                             SmallVectorImpl<NamedAttribute> &attrs) override {
-    result = parser.parseAttribute(type);
-    if (!result)
-      return failure();
-
-    attrs.push_back(parser.builder.getNamedAttr(attrName, result));
-    return success();
-  }
-
-  /// Parse a named dictionary into 'result' if it is present.
-  ParseResult
-  parseOptionalAttrDict(SmallVectorImpl<NamedAttribute> &result) override {
-    if (parser.getToken().isNot(Token::l_brace))
-      return success();
-    return parser.parseAttributeDict(result);
-  }
-
-  /// Parse a named dictionary into 'result' if the `attributes` keyword is
-  /// present.
-  ParseResult parseOptionalAttrDictWithKeyword(
-      SmallVectorImpl<NamedAttribute> &result) override {
-    if (failed(parseOptionalKeyword("attributes")))
-      return success();
-    return parser.parseAttributeDict(result);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Identifier Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Returns if the current token corresponds to a keyword.
-  bool isCurrentTokenAKeyword() const {
-    return parser.getToken().is(Token::bare_identifier) ||
-           parser.getToken().isKeyword();
-  }
-
-  /// Parse the given keyword if present.
-  ParseResult parseOptionalKeyword(StringRef keyword) override {
-    // Check that the current token has the same spelling.
-    if (!isCurrentTokenAKeyword() || parser.getTokenSpelling() != keyword)
-      return failure();
-    parser.consumeToken();
-    return success();
-  }
-
-  /// Parse a keyword, if present, into 'keyword'.
-  ParseResult parseOptionalKeyword(StringRef *keyword) override {
-    // Check that the current token is a keyword.
-    if (!isCurrentTokenAKeyword())
-      return failure();
-
-    *keyword = parser.getTokenSpelling();
-    parser.consumeToken();
-    return success();
-  }
-
-  /// Parse an optional @-identifier and store it (without the '@' symbol) in a
-  /// string attribute named 'attrName'.
-  ParseResult
-  parseOptionalSymbolName(StringAttr &result, StringRef attrName,
-                          SmallVectorImpl<NamedAttribute> &attrs) override {
-    Token atToken = parser.getToken();
-    if (atToken.isNot(Token::at_identifier))
-      return failure();
-
-    result = getBuilder().getStringAttr(extractSymbolReference(atToken));
-    attrs.push_back(getBuilder().getNamedAttr(attrName, result));
-    parser.consumeToken();
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Operand Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a single operand.
-  ParseResult parseOperand(OperandType &result) override {
-    OperationParser::SSAUseInfo useInfo;
-    if (parser.parseSSAUse(useInfo))
-      return failure();
-
-    result = {useInfo.loc, useInfo.name, useInfo.number};
-    return success();
-  }
-
-  /// Parse zero or more SSA comma-separated operand references with a specified
-  /// surrounding delimiter, and an optional required operand count.
-  ParseResult parseOperandList(SmallVectorImpl<OperandType> &result,
-                               int requiredOperandCount = -1,
-                               Delimiter delimiter = Delimiter::None) override {
-    return parseOperandOrRegionArgList(result, /*isOperandList=*/true,
-                                       requiredOperandCount, delimiter);
-  }
-
-  /// Parse zero or more SSA comma-separated operand or region arguments with
-  ///  optional surrounding delimiter and required operand count.
-  ParseResult
-  parseOperandOrRegionArgList(SmallVectorImpl<OperandType> &result,
-                              bool isOperandList, int requiredOperandCount = -1,
-                              Delimiter delimiter = Delimiter::None) {
-    auto startLoc = parser.getToken().getLoc();
-
-    // Handle delimiters.
-    switch (delimiter) {
-    case Delimiter::None:
-      // Don't check for the absence of a delimiter if the number of operands
-      // is unknown (and hence the operand list could be empty).
-      if (requiredOperandCount == -1)
-        break;
-      // Token already matches an identifier and so can't be a delimiter.
-      if (parser.getToken().is(Token::percent_identifier))
-        break;
-      // Test against known delimiters.
-      if (parser.getToken().is(Token::l_paren) ||
-          parser.getToken().is(Token::l_square))
-        return emitError(startLoc, "unexpected delimiter");
-      return emitError(startLoc, "invalid operand");
-    case Delimiter::OptionalParen:
-      if (parser.getToken().isNot(Token::l_paren))
-        return success();
-      LLVM_FALLTHROUGH;
-    case Delimiter::Paren:
-      if (parser.parseToken(Token::l_paren, "expected '(' in operand list"))
-        return failure();
-      break;
-    case Delimiter::OptionalSquare:
-      if (parser.getToken().isNot(Token::l_square))
-        return success();
-      LLVM_FALLTHROUGH;
-    case Delimiter::Square:
-      if (parser.parseToken(Token::l_square, "expected '[' in operand list"))
-        return failure();
-      break;
-    }
-
-    // Check for zero operands.
-    if (parser.getToken().is(Token::percent_identifier)) {
-      do {
-        OperandType operandOrArg;
-        if (isOperandList ? parseOperand(operandOrArg)
-                          : parseRegionArgument(operandOrArg))
-          return failure();
-        result.push_back(operandOrArg);
-      } while (parser.consumeIf(Token::comma));
-    }
-
-    // Handle delimiters.   If we reach here, the optional delimiters were
-    // present, so we need to parse their closing one.
-    switch (delimiter) {
-    case Delimiter::None:
-      break;
-    case Delimiter::OptionalParen:
-    case Delimiter::Paren:
-      if (parser.parseToken(Token::r_paren, "expected ')' in operand list"))
-        return failure();
-      break;
-    case Delimiter::OptionalSquare:
-    case Delimiter::Square:
-      if (parser.parseToken(Token::r_square, "expected ']' in operand list"))
-        return failure();
-      break;
-    }
-
-    if (requiredOperandCount != -1 &&
-        result.size() != static_cast<size_t>(requiredOperandCount))
-      return emitError(startLoc, "expected ")
-             << requiredOperandCount << " operands";
-    return success();
-  }
-
-  /// Parse zero or more trailing SSA comma-separated trailing operand
-  /// references with a specified surrounding delimiter, and an optional
-  /// required operand count. A leading comma is expected before the operands.
-  ParseResult parseTrailingOperandList(SmallVectorImpl<OperandType> &result,
-                                       int requiredOperandCount,
-                                       Delimiter delimiter) override {
-    if (parser.getToken().is(Token::comma)) {
-      parseComma();
-      return parseOperandList(result, requiredOperandCount, delimiter);
-    }
-    if (requiredOperandCount != -1)
-      return emitError(parser.getToken().getLoc(), "expected ")
-             << requiredOperandCount << " operands";
-    return success();
-  }
-
-  /// Resolve an operand to an SSA value, emitting an error on failure.
-  ParseResult resolveOperand(const OperandType &operand, Type type,
-                             SmallVectorImpl<Value *> &result) override {
-    OperationParser::SSAUseInfo operandInfo = {operand.name, operand.number,
-                                               operand.location};
-    if (auto *value = parser.resolveSSAUse(operandInfo, type)) {
-      result.push_back(value);
-      return success();
-    }
-    return failure();
-  }
-
-  /// Parse an AffineMap of SSA ids.
-  ParseResult
-  parseAffineMapOfSSAIds(SmallVectorImpl<OperandType> &operands,
-                         Attribute &mapAttr, StringRef attrName,
-                         SmallVectorImpl<NamedAttribute> &attrs) override {
-    SmallVector<OperandType, 2> dimOperands;
-    SmallVector<OperandType, 1> symOperands;
-
-    auto parseElement = [&](bool isSymbol) -> ParseResult {
-      OperandType operand;
-      if (parseOperand(operand))
-        return failure();
-      if (isSymbol)
-        symOperands.push_back(operand);
-      else
-        dimOperands.push_back(operand);
-      return success();
-    };
-
-    AffineMap map;
-    if (parser.parseAffineMapOfSSAIds(map, parseElement))
-      return failure();
-    // Add AffineMap attribute.
-    if (map) {
-      mapAttr = AffineMapAttr::get(map);
-      attrs.push_back(parser.builder.getNamedAttr(attrName, mapAttr));
-    }
-
-    // Add dim operands before symbol operands in 'operands'.
-    operands.assign(dimOperands.begin(), dimOperands.end());
-    operands.append(symOperands.begin(), symOperands.end());
-    return success();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Region Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a region that takes `arguments` of `argTypes` types.  This
-  /// effectively defines the SSA values of `arguments` and assigns their type.
-  ParseResult parseRegion(Region &region, ArrayRef<OperandType> arguments,
-                          ArrayRef<Type> argTypes,
-                          bool enableNameShadowing) override {
-    assert(arguments.size() == argTypes.size() &&
-           "mismatching number of arguments and types");
-
-    SmallVector<std::pair<OperationParser::SSAUseInfo, Type>, 2>
-        regionArguments;
-    for (const auto &pair : llvm::zip(arguments, argTypes)) {
-      const OperandType &operand = std::get<0>(pair);
-      Type type = std::get<1>(pair);
-      OperationParser::SSAUseInfo operandInfo = {operand.name, operand.number,
-                                                 operand.location};
-      regionArguments.emplace_back(operandInfo, type);
-    }
-
-    // Try to parse the region.
-    assert((!enableNameShadowing ||
-            opDefinition->hasProperty(OperationProperty::IsolatedFromAbove)) &&
-           "name shadowing is only allowed on isolated regions");
-    if (parser.parseRegion(region, regionArguments, enableNameShadowing))
-      return failure();
-    return success();
-  }
-
-  /// Parses a region if present.
-  ParseResult parseOptionalRegion(Region &region,
-                                  ArrayRef<OperandType> arguments,
-                                  ArrayRef<Type> argTypes,
-                                  bool enableNameShadowing) override {
-    if (parser.getToken().isNot(Token::l_brace))
-      return success();
-    return parseRegion(region, arguments, argTypes, enableNameShadowing);
-  }
-
-  /// Parse a region argument. The type of the argument will be resolved later
-  /// by a call to `parseRegion`.
-  ParseResult parseRegionArgument(OperandType &argument) override {
-    return parseOperand(argument);
-  }
-
-  /// Parse a region argument if present.
-  ParseResult parseOptionalRegionArgument(OperandType &argument) override {
-    if (parser.getToken().isNot(Token::percent_identifier))
-      return success();
-    return parseRegionArgument(argument);
-  }
-
-  ParseResult
-  parseRegionArgumentList(SmallVectorImpl<OperandType> &result,
-                          int requiredOperandCount = -1,
-                          Delimiter delimiter = Delimiter::None) override {
-    return parseOperandOrRegionArgList(result, /*isOperandList=*/false,
-                                       requiredOperandCount, delimiter);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Successor Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a single operation successor and its operand list.
-  ParseResult
-  parseSuccessorAndUseList(Block *&dest,
-                           SmallVectorImpl<Value *> &operands) override {
-    return parser.parseSuccessorAndUseList(dest, operands);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Type Parsing
-  //===--------------------------------------------------------------------===//
-
-  /// Parse a type.
-  ParseResult parseType(Type &result) override {
-    return failure(!(result = parser.parseType()));
-  }
-
-  /// Parse an optional arrow followed by a type list.
-  ParseResult
-  parseOptionalArrowTypeList(SmallVectorImpl<Type> &result) override {
-    if (!parser.consumeIf(Token::arrow))
-      return success();
-    return parser.parseFunctionResultTypes(result);
-  }
-
-  /// Parse a colon followed by a type.
-  ParseResult parseColonType(Type &result) override {
-    return failure(parser.parseToken(Token::colon, "expected ':'") ||
-                   !(result = parser.parseType()));
-  }
-
-  /// Parse a colon followed by a type list, which must have at least one type.
-  ParseResult parseColonTypeList(SmallVectorImpl<Type> &result) override {
-    if (parser.parseToken(Token::colon, "expected ':'"))
-      return failure();
-    return parser.parseTypeListNoParens(result);
-  }
-
-  /// Parse an optional colon followed by a type list, which if present must
-  /// have at least one type.
-  ParseResult
-  parseOptionalColonTypeList(SmallVectorImpl<Type> &result) override {
-    if (!parser.consumeIf(Token::colon))
-      return success();
-    return parser.parseTypeListNoParens(result);
-  }
-
-private:
-  /// The source location of the operation name.
-  SMLoc nameLoc;
-
-  /// The abstract information of the operation.
-  const AbstractOperation *opDefinition;
-
-  /// The main operation parser.
-  OperationParser &parser;
-
-  /// A flag that indicates if any errors were emitted during parsing.
-  bool emittedError = false;
-};
-} // end anonymous namespace.
-
-Operation *OperationParser::parseCustomOperation() {
-  auto opLoc = getToken().getLoc();
-  auto opName = getTokenSpelling();
-
-  auto *opDefinition = AbstractOperation::lookup(opName, getContext());
-  if (!opDefinition && !opName.contains('.')) {
-    // If the operation name has no namespace prefix we treat it as a standard
-    // operation and prefix it with "std".
-    // TODO: Would it be better to just build a mapping of the registered
-    // operations in the standard dialect?
-    opDefinition =
-        AbstractOperation::lookup(Twine("std." + opName).str(), getContext());
-  }
-
-  if (!opDefinition) {
-    emitError(opLoc) << "custom op '" << opName << "' is unknown";
-    return nullptr;
-  }
-
-  consumeToken();
-
-  // If the custom op parser crashes, produce some indication to help
-  // debugging.
-  std::string opNameStr = opName.str();
-  llvm::PrettyStackTraceFormat fmt("MLIR Parser: custom op parser '%s'",
-                                   opNameStr.c_str());
-
-  // Get location information for the operation.
-  auto srcLocation = getEncodedSourceLocation(opLoc);
-
-  // Have the op implementation take a crack and parsing this.
-  OperationState opState(srcLocation, opDefinition->name);
-  CleanupOpStateRegions guard{opState};
-  CustomOpAsmParser opAsmParser(opLoc, opDefinition, *this);
-  if (opAsmParser.parseOperation(opState))
-    return nullptr;
-
-  // If it emitted an error, we failed.
-  if (opAsmParser.didEmitError())
-    return nullptr;
-
-  // Parse a location if one is present.
-  if (parseOptionalTrailingLocation(opState.location))
-    return nullptr;
-
-  // Otherwise, we succeeded.  Use the state it parsed as our op information.
-  return opBuilder.createOperation(opState);
-}
-
-//===----------------------------------------------------------------------===//
-// Region Parsing
-//===----------------------------------------------------------------------===//
-
-/// Region.
-///
-///   region ::= '{' region-body
-///
-ParseResult OperationParser::parseRegion(
-    Region &region,
-    ArrayRef<std::pair<OperationParser::SSAUseInfo, Type>> entryArguments,
-    bool isIsolatedNameScope) {
-  // Parse the '{'.
-  if (parseToken(Token::l_brace, "expected '{' to begin a region"))
-    return failure();
-
-  // Check for an empty region.
-  if (entryArguments.empty() && consumeIf(Token::r_brace))
-    return success();
-  auto currentPt = opBuilder.saveInsertionPoint();
-
-  // Push a new named value scope.
-  pushSSANameScope(isIsolatedNameScope);
-
-  // Parse the first block directly to allow for it to be unnamed.
-  Block *block = new Block();
-
-  // Add arguments to the entry block.
-  if (!entryArguments.empty()) {
-    for (auto &placeholderArgPair : entryArguments) {
-      auto &argInfo = placeholderArgPair.first;
-      // Ensure that the argument was not already defined.
-      if (auto defLoc = getReferenceLoc(argInfo.name, argInfo.number)) {
-        return emitError(argInfo.loc, "region entry argument '" + argInfo.name +
-                                          "' is already in use")
-                   .attachNote(getEncodedSourceLocation(*defLoc))
-               << "previously referenced here";
-      }
-      if (addDefinition(placeholderArgPair.first,
-                        block->addArgument(placeholderArgPair.second))) {
-        delete block;
-        return failure();
-      }
-    }
-
-    // If we had named arguments, then don't allow a block name.
-    if (getToken().is(Token::caret_identifier))
-      return emitError("invalid block name in region with named arguments");
-  }
-
-  if (parseBlock(block)) {
-    delete block;
-    return failure();
-  }
-
-  // Verify that no other arguments were parsed.
-  if (!entryArguments.empty() &&
-      block->getNumArguments() > entryArguments.size()) {
-    delete block;
-    return emitError("entry block arguments were already defined");
-  }
-
-  // Parse the rest of the region.
-  region.push_back(block);
-  if (parseRegionBody(region))
-    return failure();
-
-  // Pop the SSA value scope for this region.
-  if (popSSANameScope())
-    return failure();
-
-  // Reset the original insertion point.
-  opBuilder.restoreInsertionPoint(currentPt);
-  return success();
-}
-
-/// Region.
-///
-///   region-body ::= block* '}'
-///
-ParseResult OperationParser::parseRegionBody(Region &region) {
-  // Parse the list of blocks.
-  while (!consumeIf(Token::r_brace)) {
-    Block *newBlock = nullptr;
-    if (parseBlock(newBlock))
-      return failure();
-    region.push_back(newBlock);
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Block Parsing
-//===----------------------------------------------------------------------===//
-
-/// Block declaration.
-///
-///   block ::= block-label? operation*
-///   block-label    ::= block-id block-arg-list? `:`
-///   block-id       ::= caret-id
-///   block-arg-list ::= `(` ssa-id-and-type-list? `)`
-///
-ParseResult OperationParser::parseBlock(Block *&block) {
-  // The first block of a region may already exist, if it does the caret
-  // identifier is optional.
-  if (block && getToken().isNot(Token::caret_identifier))
-    return parseBlockBody(block);
-
-  SMLoc nameLoc = getToken().getLoc();
-  auto name = getTokenSpelling();
-  if (parseToken(Token::caret_identifier, "expected block name"))
-    return failure();
-
-  block = defineBlockNamed(name, nameLoc, block);
-
-  // Fail if the block was already defined.
-  if (!block)
-    return emitError(nameLoc, "redefinition of block '") << name << "'";
-
-  // If an argument list is present, parse it.
-  if (consumeIf(Token::l_paren)) {
-    SmallVector<BlockArgument *, 8> bbArgs;
-    if (parseOptionalBlockArgList(bbArgs, block) ||
-        parseToken(Token::r_paren, "expected ')' to end argument list"))
-      return failure();
-  }
-
-  if (parseToken(Token::colon, "expected ':' after block name"))
-    return failure();
-
-  return parseBlockBody(block);
-}
-
-ParseResult OperationParser::parseBlockBody(Block *block) {
-  // Set the insertion point to the end of the block to parse.
-  opBuilder.setInsertionPointToEnd(block);
-
-  // Parse the list of operations that make up the body of the block.
-  while (getToken().isNot(Token::caret_identifier, Token::r_brace))
-    if (parseOperation())
-      return failure();
-
-  return success();
-}
-
-/// Get the block with the specified name, creating it if it doesn't already
-/// exist.  The location specified is the point of use, which allows
-/// us to diagnose references to blocks that are not defined precisely.
-Block *OperationParser::getBlockNamed(StringRef name, SMLoc loc) {
-  auto &blockAndLoc = getBlockInfoByName(name);
-  if (!blockAndLoc.first) {
-    blockAndLoc = {new Block(), loc};
-    insertForwardRef(blockAndLoc.first, loc);
-  }
-
-  return blockAndLoc.first;
-}
-
-/// Define the block with the specified name. Returns the Block* or nullptr in
-/// the case of redefinition.
-Block *OperationParser::defineBlockNamed(StringRef name, SMLoc loc,
-                                         Block *existing) {
-  auto &blockAndLoc = getBlockInfoByName(name);
-  if (!blockAndLoc.first) {
-    // If the caller provided a block, use it.  Otherwise create a new one.
-    if (!existing)
-      existing = new Block();
-    blockAndLoc.first = existing;
-    blockAndLoc.second = loc;
-    return blockAndLoc.first;
-  }
-
-  // Forward declarations are removed once defined, so if we are defining a
-  // existing block and it is not a forward declaration, then it is a
-  // redeclaration.
-  if (!eraseForwardRef(blockAndLoc.first))
-    return nullptr;
-  return blockAndLoc.first;
-}
-
-/// Parse a (possibly empty) list of SSA operands with types as block arguments.
-///
-///   ssa-id-and-type-list ::= ssa-id-and-type (`,` ssa-id-and-type)*
-///
-ParseResult OperationParser::parseOptionalBlockArgList(
-    SmallVectorImpl<BlockArgument *> &results, Block *owner) {
-  if (getToken().is(Token::r_brace))
-    return success();
-
-  // If the block already has arguments, then we're handling the entry block.
-  // Parse and register the names for the arguments, but do not add them.
-  bool definingExistingArgs = owner->getNumArguments() != 0;
-  unsigned nextArgument = 0;
-
-  return parseCommaSeparatedList([&]() -> ParseResult {
-    return parseSSADefOrUseAndType(
-        [&](SSAUseInfo useInfo, Type type) -> ParseResult {
-          // If this block did not have existing arguments, define a new one.
-          if (!definingExistingArgs)
-            return addDefinition(useInfo, owner->addArgument(type));
-
-          // Otherwise, ensure that this argument has already been created.
-          if (nextArgument >= owner->getNumArguments())
-            return emitError("too many arguments specified in argument list");
-
-          // Finally, make sure the existing argument has the correct type.
-          auto *arg = owner->getArgument(nextArgument++);
-          if (arg->getType() != type)
-            return emitError("argument and block argument type mismatch");
-          return addDefinition(useInfo, arg);
-        });
-  });
-}
-
-//===----------------------------------------------------------------------===//
-// Top-level entity parsing.
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This parser handles entities that are only valid at the top level of the
-/// file.
-class ModuleParser : public Parser {
-public:
-  explicit ModuleParser(ParserState &state) : Parser(state) {}
-
-  ParseResult parseModule(ModuleOp module);
-
-private:
-  /// Parse an attribute alias declaration.
-  ParseResult parseAttributeAliasDef();
-
-  /// Parse an attribute alias declaration.
-  ParseResult parseTypeAliasDef();
-};
-} // end anonymous namespace
-
-/// Parses an attribute alias declaration.
-///
-///   attribute-alias-def ::= '#' alias-name `=` attribute-value
-///
-ParseResult ModuleParser::parseAttributeAliasDef() {
-  assert(getToken().is(Token::hash_identifier));
-  StringRef aliasName = getTokenSpelling().drop_front();
-
-  // Check for redefinitions.
-  if (getState().symbols.attributeAliasDefinitions.count(aliasName) > 0)
-    return emitError("redefinition of attribute alias id '" + aliasName + "'");
-
-  // Make sure this isn't invading the dialect attribute namespace.
-  if (aliasName.contains('.'))
-    return emitError("attribute names with a '.' are reserved for "
-                     "dialect-defined names");
-
-  consumeToken(Token::hash_identifier);
-
-  // Parse the '='.
-  if (parseToken(Token::equal, "expected '=' in attribute alias definition"))
-    return failure();
-
-  // Parse the attribute value.
-  Attribute attr = parseAttribute();
-  if (!attr)
-    return failure();
-
-  getState().symbols.attributeAliasDefinitions[aliasName] = attr;
-  return success();
-}
-
-/// Parse a type alias declaration.
-///
-///   type-alias-def ::= '!' alias-name `=` 'type' type
-///
-ParseResult ModuleParser::parseTypeAliasDef() {
-  assert(getToken().is(Token::exclamation_identifier));
-  StringRef aliasName = getTokenSpelling().drop_front();
-
-  // Check for redefinitions.
-  if (getState().symbols.typeAliasDefinitions.count(aliasName) > 0)
-    return emitError("redefinition of type alias id '" + aliasName + "'");
-
-  // Make sure this isn't invading the dialect type namespace.
-  if (aliasName.contains('.'))
-    return emitError("type names with a '.' are reserved for "
-                     "dialect-defined names");
-
-  consumeToken(Token::exclamation_identifier);
-
-  // Parse the '=' and 'type'.
-  if (parseToken(Token::equal, "expected '=' in type alias definition") ||
-      parseToken(Token::kw_type, "expected 'type' in type alias definition"))
-    return failure();
-
-  // Parse the type.
-  Type aliasedType = parseType();
-  if (!aliasedType)
-    return failure();
-
-  // Register this alias with the parser state.
-  getState().symbols.typeAliasDefinitions.try_emplace(aliasName, aliasedType);
-  return success();
-}
-
-/// This is the top-level module parser.
-ParseResult ModuleParser::parseModule(ModuleOp module) {
-  OperationParser opParser(getState(), module);
-
-  // Module itself is a name scope.
-  opParser.pushSSANameScope(/*isIsolated=*/true);
-
-  while (true) {
-    switch (getToken().getKind()) {
-    default:
-      // Parse a top-level operation.
-      if (opParser.parseOperation())
-        return failure();
-      break;
-
-    // If we got to the end of the file, then we're done.
-    case Token::eof: {
-      if (opParser.finalize())
-        return failure();
-
-      // Handle the case where the top level module was explicitly defined.
-      auto &bodyBlocks = module.getBodyRegion().getBlocks();
-      auto &operations = bodyBlocks.front().getOperations();
-      assert(!operations.empty() && "expected a valid module terminator");
-
-      // Check that the first operation is a module, and it is the only
-      // non-terminator operation.
-      ModuleOp nested = dyn_cast<ModuleOp>(operations.front());
-      if (nested && std::next(operations.begin(), 2) == operations.end()) {
-        // Merge the data of the nested module operation into 'module'.
-        module.setLoc(nested.getLoc());
-        module.setAttrs(nested.getOperation()->getAttrList());
-        bodyBlocks.splice(bodyBlocks.end(), nested.getBodyRegion().getBlocks());
-
-        // Erase the original module body.
-        bodyBlocks.pop_front();
-      }
-
-      return opParser.popSSANameScope();
-    }
-
-    // If we got an error token, then the lexer already emitted an error, just
-    // stop.  Someday we could introduce error recovery if there was demand
-    // for it.
-    case Token::error:
-      return failure();
-
-    // Parse an attribute alias.
-    case Token::hash_identifier:
-      if (parseAttributeAliasDef())
-        return failure();
-      break;
-
-    // Parse a type alias.
-    case Token::exclamation_identifier:
-      if (parseTypeAliasDef())
-        return failure();
-      break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-
-/// This parses the file specified by the indicated SourceMgr and returns an
-/// MLIR module if it was valid.  If not, it emits diagnostics and returns
-/// null.
-OwningModuleRef mlir::parseSourceFile(const llvm::SourceMgr &sourceMgr,
-                                      MLIRContext *context) {
-  auto sourceBuf = sourceMgr.getMemoryBuffer(sourceMgr.getMainFileID());
-
-  // This is the result module we are parsing into.
-  OwningModuleRef module(ModuleOp::create(FileLineColLoc::get(
-      sourceBuf->getBufferIdentifier(), /*line=*/0, /*column=*/0, context)));
-
-  SymbolState aliasState;
-  ParserState state(sourceMgr, context, aliasState);
-  if (ModuleParser(state).parseModule(*module))
-    return nullptr;
-
-  // Make sure the parse module has no other structural problems detected by
-  // the verifier.
-  if (failed(verify(*module)))
-    return nullptr;
-
-  return module;
-}
-
-/// This parses the file specified by the indicated filename and returns an
-/// MLIR module if it was valid.  If not, the error message is emitted through
-/// the error handler registered in the context, and a null pointer is returned.
-OwningModuleRef mlir::parseSourceFile(StringRef filename,
-                                      MLIRContext *context) {
-  llvm::SourceMgr sourceMgr;
-  return parseSourceFile(filename, sourceMgr, context);
-}
-
-/// This parses the file specified by the indicated filename using the provided
-/// SourceMgr and returns an MLIR module if it was valid.  If not, the error
-/// message is emitted through the error handler registered in the context, and
-/// a null pointer is returned.
-OwningModuleRef mlir::parseSourceFile(StringRef filename,
-                                      llvm::SourceMgr &sourceMgr,
-                                      MLIRContext *context) {
-  if (sourceMgr.getNumBuffers() != 0) {
-    // TODO(b/136086478): Extend to support multiple buffers.
-    emitError(mlir::UnknownLoc::get(context),
-              "only main buffer parsed at the moment");
-    return nullptr;
-  }
-  auto file_or_err = llvm::MemoryBuffer::getFileOrSTDIN(filename);
-  if (std::error_code error = file_or_err.getError()) {
-    emitError(mlir::UnknownLoc::get(context),
-              "could not open input file " + filename);
-    return nullptr;
-  }
-
-  // Load the MLIR module.
-  sourceMgr.AddNewSourceBuffer(std::move(*file_or_err), llvm::SMLoc());
-  return parseSourceFile(sourceMgr, context);
-}
-
-/// This parses the program string to a MLIR module if it was valid. If not,
-/// it emits diagnostics and returns null.
-OwningModuleRef mlir::parseSourceString(StringRef moduleStr,
-                                        MLIRContext *context) {
-  auto memBuffer = MemoryBuffer::getMemBuffer(moduleStr);
-  if (!memBuffer)
-    return nullptr;
-
-  SourceMgr sourceMgr;
-  sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
-  return parseSourceFile(sourceMgr, context);
-}
-
-/// Parses a symbol, of type 'T', and returns it if parsing was successful. If
-/// parsing failed, nullptr is returned. The number of bytes read from the input
-/// string is returned in 'numRead'.
-template <typename T, typename ParserFn>
-static T parseSymbol(llvm::StringRef inputStr, MLIRContext *context,
-                     size_t &numRead, ParserFn &&parserFn) {
-  SymbolState aliasState;
-  return parseSymbol<T>(
-      inputStr, context, aliasState,
-      [&](Parser &parser) {
-        SourceMgrDiagnosticHandler handler(
-            const_cast<llvm::SourceMgr &>(parser.getSourceMgr()),
-            parser.getContext());
-        return parserFn(parser);
-      },
-      &numRead);
-}
-
-Attribute mlir::parseAttribute(llvm::StringRef attrStr, MLIRContext *context) {
-  size_t numRead = 0;
-  return parseAttribute(attrStr, context, numRead);
-}
-Attribute mlir::parseAttribute(llvm::StringRef attrStr, Type type) {
-  size_t numRead = 0;
-  return parseAttribute(attrStr, type, numRead);
-}
-
-Attribute mlir::parseAttribute(llvm::StringRef attrStr, MLIRContext *context,
-                               size_t &numRead) {
-  return parseSymbol<Attribute>(attrStr, context, numRead, [](Parser &parser) {
-    return parser.parseAttribute();
-  });
-}
-Attribute mlir::parseAttribute(llvm::StringRef attrStr, Type type,
-                               size_t &numRead) {
-  return parseSymbol<Attribute>(
-      attrStr, type.getContext(), numRead,
-      [type](Parser &parser) { return parser.parseAttribute(type); });
-}
-
-Type mlir::parseType(llvm::StringRef typeStr, MLIRContext *context) {
-  size_t numRead = 0;
-  return parseType(typeStr, context, numRead);
-}
-
-Type mlir::parseType(llvm::StringRef typeStr, MLIRContext *context,
-                     size_t &numRead) {
-  return parseSymbol<Type>(typeStr, context, numRead,
-                           [](Parser &parser) { return parser.parseType(); });
-}
diff --git a/third_party/mlir/lib/Parser/Token.cpp b/third_party/mlir/lib/Parser/Token.cpp
deleted file mode 100644
index c01d6032cbd..00000000000
--- a/third_party/mlir/lib/Parser/Token.cpp
+++ /dev/null
@@ -1,164 +0,0 @@
-//===- Token.cpp - MLIR Token Implementation ------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the Token class for the MLIR textual form.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Token.h"
-#include "llvm/ADT/StringExtras.h"
-using namespace mlir;
-using llvm::SMLoc;
-using llvm::SMRange;
-
-SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
-
-SMLoc Token::getEndLoc() const {
-  return SMLoc::getFromPointer(spelling.data() + spelling.size());
-}
-
-SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
-
-/// For an integer token, return its value as an unsigned.  If it doesn't fit,
-/// return None.
-Optional<unsigned> Token::getUnsignedIntegerValue() const {
-  bool isHex = spelling.size() > 1 && spelling[1] == 'x';
-
-  unsigned result = 0;
-  if (spelling.getAsInteger(isHex ? 0 : 10, result))
-    return None;
-  return result;
-}
-
-/// For an integer token, return its value as a uint64_t.  If it doesn't fit,
-/// return None.
-Optional<uint64_t> Token::getUInt64IntegerValue() const {
-  bool isHex = spelling.size() > 1 && spelling[1] == 'x';
-
-  uint64_t result = 0;
-  if (spelling.getAsInteger(isHex ? 0 : 10, result))
-    return None;
-  return result;
-}
-
-/// For a floatliteral, return its value as a double. Return None if the value
-/// underflows or overflows.
-Optional<double> Token::getFloatingPointValue() const {
-  double result = 0;
-  if (spelling.getAsDouble(result))
-    return None;
-  return result;
-}
-
-/// For an inttype token, return its bitwidth.
-Optional<unsigned> Token::getIntTypeBitwidth() const {
-  unsigned result = 0;
-  if (spelling[1] == '0' || spelling.drop_front().getAsInteger(10, result) ||
-      result == 0)
-    return None;
-  return result;
-}
-
-/// Given a token containing a string literal, return its value, including
-/// removing the quote characters and unescaping the contents of the string. The
-/// lexer has already verified that this token is valid.
-std::string Token::getStringValue() const {
-  assert(getKind() == string ||
-         (getKind() == at_identifier && getSpelling()[1] == '"'));
-  // Start by dropping the quotes.
-  StringRef bytes = getSpelling().drop_front().drop_back();
-  if (getKind() == at_identifier)
-    bytes = bytes.drop_front();
-
-  std::string result;
-  result.reserve(bytes.size());
-  for (unsigned i = 0, e = bytes.size(); i != e;) {
-    auto c = bytes[i++];
-    if (c != '\\') {
-      result.push_back(c);
-      continue;
-    }
-
-    assert(i + 1 <= e && "invalid string should be caught by lexer");
-    auto c1 = bytes[i++];
-    switch (c1) {
-    case '"':
-    case '\\':
-      result.push_back(c1);
-      continue;
-    case 'n':
-      result.push_back('\n');
-      continue;
-    case 't':
-      result.push_back('\t');
-      continue;
-    default:
-      break;
-    }
-
-    assert(i + 1 <= e && "invalid string should be caught by lexer");
-    auto c2 = bytes[i++];
-
-    assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
-    result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
-  }
-
-  return result;
-}
-
-/// Given a hash_identifier token like #123, try to parse the number out of
-/// the identifier, returning None if it is a named identifier like #x or
-/// if the integer doesn't fit.
-Optional<unsigned> Token::getHashIdentifierNumber() const {
-  assert(getKind() == hash_identifier);
-  unsigned result = 0;
-  if (spelling.drop_front().getAsInteger(10, result))
-    return None;
-  return result;
-}
-
-/// Given a punctuation or keyword token kind, return the spelling of the
-/// token as a string.  Warning: This will abort on markers, identifiers and
-/// literal tokens since they have no fixed spelling.
-StringRef Token::getTokenSpelling(Kind kind) {
-  switch (kind) {
-  default:
-    llvm_unreachable("This token kind has no fixed spelling");
-#define TOK_PUNCTUATION(NAME, SPELLING)                                        \
-  case NAME:                                                                   \
-    return SPELLING;
-#define TOK_OPERATOR(NAME, SPELLING)                                           \
-  case NAME:                                                                   \
-    return SPELLING;
-#define TOK_KEYWORD(SPELLING)                                                  \
-  case kw_##SPELLING:                                                          \
-    return #SPELLING;
-#include "TokenKinds.def"
-  }
-}
-
-/// Return true if this is one of the keyword token kinds (e.g. kw_if).
-bool Token::isKeyword() const {
-  switch (kind) {
-  default:
-    return false;
-#define TOK_KEYWORD(SPELLING)                                                  \
-  case kw_##SPELLING:                                                          \
-    return true;
-#include "TokenKinds.def"
-  }
-}
diff --git a/third_party/mlir/lib/Parser/Token.h b/third_party/mlir/lib/Parser/Token.h
deleted file mode 100644
index 333c4d29aad..00000000000
--- a/third_party/mlir/lib/Parser/Token.h
+++ /dev/null
@@ -1,116 +0,0 @@
-//===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef MLIR_LIB_PARSER_TOKEN_H
-#define MLIR_LIB_PARSER_TOKEN_H
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/SMLoc.h"
-
-namespace mlir {
-
-/// This represents a token in the MLIR syntax.
-class Token {
-public:
-  enum Kind {
-#define TOK_MARKER(NAME) NAME,
-#define TOK_IDENTIFIER(NAME) NAME,
-#define TOK_LITERAL(NAME) NAME,
-#define TOK_PUNCTUATION(NAME, SPELLING) NAME,
-#define TOK_OPERATOR(NAME, SPELLING) NAME,
-#define TOK_KEYWORD(SPELLING) kw_##SPELLING,
-#include "TokenKinds.def"
-  };
-
-  Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
-
-  // Return the bytes that make up this token.
-  StringRef getSpelling() const { return spelling; }
-
-  // Token classification.
-  Kind getKind() const { return kind; }
-  bool is(Kind K) const { return kind == K; }
-
-  bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
-
-  /// Return true if this token is one of the specified kinds.
-  template <typename... T>
-  bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
-    if (is(k1))
-      return true;
-    return isAny(k2, k3, others...);
-  }
-
-  bool isNot(Kind k) const { return kind != k; }
-
-  /// Return true if this token isn't one of the specified kinds.
-  template <typename... T> bool isNot(Kind k1, Kind k2, T... others) const {
-    return !isAny(k1, k2, others...);
-  }
-
-  /// Return true if this is one of the keyword token kinds (e.g. kw_if).
-  bool isKeyword() const;
-
-  // Helpers to decode specific sorts of tokens.
-
-  /// For an integer token, return its value as an unsigned.  If it doesn't fit,
-  /// return None.
-  Optional<unsigned> getUnsignedIntegerValue() const;
-
-  /// For an integer token, return its value as an uint64_t.  If it doesn't fit,
-  /// return None.
-  Optional<uint64_t> getUInt64IntegerValue() const;
-
-  /// For a floatliteral token, return its value as a double. Returns None in
-  /// the case of underflow or overflow.
-  Optional<double> getFloatingPointValue() const;
-
-  /// For an inttype token, return its bitwidth.
-  Optional<unsigned> getIntTypeBitwidth() const;
-
-  /// Given a hash_identifier token like #123, try to parse the number out of
-  /// the identifier, returning None if it is a named identifier like #x or
-  /// if the integer doesn't fit.
-  Optional<unsigned> getHashIdentifierNumber() const;
-
-  /// Given a token containing a string literal, return its value, including
-  /// removing the quote characters and unescaping the contents of the string.
-  std::string getStringValue() const;
-
-  // Location processing.
-  llvm::SMLoc getLoc() const;
-  llvm::SMLoc getEndLoc() const;
-  llvm::SMRange getLocRange() const;
-
-  /// Given a punctuation or keyword token kind, return the spelling of the
-  /// token as a string.  Warning: This will abort on markers, identifiers and
-  /// literal tokens since they have no fixed spelling.
-  static StringRef getTokenSpelling(Kind kind);
-
-private:
-  /// Discriminator that indicates the sort of token this is.
-  Kind kind;
-
-  /// A reference to the entire token contents; this is always a pointer into
-  /// a memory buffer owned by the source manager.
-  StringRef spelling;
-};
-
-} // end namespace mlir
-
-#endif // MLIR_LIB_PARSER_TOKEN_H
diff --git a/third_party/mlir/lib/Parser/TokenKinds.def b/third_party/mlir/lib/Parser/TokenKinds.def
deleted file mode 100644
index 19cd343274d..00000000000
--- a/third_party/mlir/lib/Parser/TokenKinds.def
+++ /dev/null
@@ -1,133 +0,0 @@
-//===- TokenKinds.def - MLIR Token Description ------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file is intended to be #include'd multiple times to extract information
-// about tokens for various clients in the lexer.
-//
-//===----------------------------------------------------------------------===//
-
-#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && !defined(TOK_LITERAL)&&\
-    !defined(TOK_PUNCTUATION) && !defined(TOK_OPERATOR) && !defined(TOK_KEYWORD)
-#  error Must define one of the TOK_ macros.
-#endif
-
-#ifndef TOK_MARKER
-#define TOK_MARKER(X)
-#endif
-#ifndef TOK_IDENTIFIER
-#define TOK_IDENTIFIER(NAME)
-#endif
-#ifndef TOK_LITERAL
-#define TOK_LITERAL(NAME)
-#endif
-#ifndef TOK_PUNCTUATION
-#define TOK_PUNCTUATION(NAME, SPELLING)
-#endif
-#ifndef TOK_OPERATOR
-#define TOK_OPERATOR(NAME, SPELLING)
-#endif
-#ifndef TOK_KEYWORD
-#define TOK_KEYWORD(SPELLING)
-#endif
-
-
-// Markers
-TOK_MARKER(eof)
-TOK_MARKER(error)
-
-// Identifiers.
-TOK_IDENTIFIER(bare_identifier)        // foo
-TOK_IDENTIFIER(at_identifier)          // @foo
-TOK_IDENTIFIER(hash_identifier)        // #foo
-TOK_IDENTIFIER(percent_identifier)     // %foo
-TOK_IDENTIFIER(caret_identifier)       // ^foo
-TOK_IDENTIFIER(exclamation_identifier) // !foo
-
-// Literals
-TOK_LITERAL(floatliteral)               // 2.0
-TOK_LITERAL(integer)                    // 42
-TOK_LITERAL(string)                     // "foo"
-TOK_LITERAL(inttype)                    // i421
-
-// Punctuation.
-TOK_PUNCTUATION(arrow,            "->")
-TOK_PUNCTUATION(at,               "@")
-TOK_PUNCTUATION(colon,            ":")
-TOK_PUNCTUATION(comma,            ",")
-TOK_PUNCTUATION(question,         "?")
-TOK_PUNCTUATION(l_paren,          "(")
-TOK_PUNCTUATION(r_paren,          ")")
-TOK_PUNCTUATION(l_brace,          "{")
-TOK_PUNCTUATION(r_brace,          "}")
-TOK_PUNCTUATION(l_square,         "[")
-TOK_PUNCTUATION(r_square,         "]")
-TOK_PUNCTUATION(less,             "<")
-TOK_PUNCTUATION(greater,          ">")
-TOK_PUNCTUATION(equal,            "=")
-TOK_PUNCTUATION(ellipsis,         "...")
-// TODO: More punctuation.
-
-// Operators.
-TOK_OPERATOR(plus,               "+")
-TOK_OPERATOR(minus,              "-")
-TOK_OPERATOR(star,               "*")
-// TODO: More operator tokens
-
-// Keywords.  These turn "foo" into Token::kw_foo enums.
-
-// NOTE: Please key these alphabetized to make it easier to find something in
-// this list and to cater to OCD.
-TOK_KEYWORD(attributes)
-TOK_KEYWORD(bf16)
-TOK_KEYWORD(ceildiv)
-TOK_KEYWORD(complex)
-TOK_KEYWORD(dense)
-TOK_KEYWORD(f16)
-TOK_KEYWORD(f32)
-TOK_KEYWORD(f64)
-TOK_KEYWORD(false)
-TOK_KEYWORD(floordiv)
-TOK_KEYWORD(for)
-TOK_KEYWORD(func)
-TOK_KEYWORD(index)
-TOK_KEYWORD(loc)
-TOK_KEYWORD(max)
-TOK_KEYWORD(memref)
-TOK_KEYWORD(min)
-TOK_KEYWORD(mod)
-TOK_KEYWORD(none)
-TOK_KEYWORD(offset)
-TOK_KEYWORD(opaque)
-TOK_KEYWORD(size)
-TOK_KEYWORD(sparse)
-TOK_KEYWORD(step)
-TOK_KEYWORD(strides)
-TOK_KEYWORD(symbol)
-TOK_KEYWORD(tensor)
-TOK_KEYWORD(to)
-TOK_KEYWORD(true)
-TOK_KEYWORD(tuple)
-TOK_KEYWORD(type)
-TOK_KEYWORD(unit)
-TOK_KEYWORD(vector)
-
-#undef TOK_MARKER
-#undef TOK_IDENTIFIER
-#undef TOK_LITERAL
-#undef TOK_PUNCTUATION
-#undef TOK_OPERATOR
-#undef TOK_KEYWORD
diff --git a/third_party/mlir/lib/Pass/CMakeLists.txt b/third_party/mlir/lib/Pass/CMakeLists.txt
deleted file mode 100644
index 05122f5a7ed..00000000000
--- a/third_party/mlir/lib/Pass/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-file(GLOB globbed *.c *.cpp)
-add_llvm_library(MLIRPass
-  ${globbed}
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Pass
-  )
-add_dependencies(MLIRPass MLIRAnalysis MLIRIR LLVMSupport)
-target_link_libraries(MLIRPass MLIRAnalysis MLIRIR LLVMSupport)
diff --git a/third_party/mlir/lib/Pass/IRPrinting.cpp b/third_party/mlir/lib/Pass/IRPrinting.cpp
deleted file mode 100644
index 8e172156f05..00000000000
--- a/third_party/mlir/lib/Pass/IRPrinting.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-//===- IRPrinting.cpp -----------------------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "PassDetail.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Pass/PassManager.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/SHA1.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-namespace {
-//===----------------------------------------------------------------------===//
-// OperationFingerPrint
-//===----------------------------------------------------------------------===//
-
-/// A unique fingerprint for a specific operation, and all of it's internal
-/// operations.
-class OperationFingerPrint {
-public:
-  OperationFingerPrint(Operation *topOp) {
-    llvm::SHA1 hasher;
-
-    // Hash each of the operations based upon their mutable bits:
-    topOp->walk([&](Operation *op) {
-      //   - Operation pointer
-      addDataToHash(hasher, op);
-      //   - Attributes
-      addDataToHash(hasher,
-                    op->getAttrList().getDictionary().getAsOpaquePointer());
-      //   - Blocks in Regions
-      for (Region &region : op->getRegions()) {
-        for (Block &block : region) {
-          addDataToHash(hasher, &block);
-          for (BlockArgument *arg : block.getArguments())
-            addDataToHash(hasher, arg);
-        }
-      }
-      //   - Location
-      addDataToHash(hasher, op->getLoc().getAsOpaquePointer());
-      //   - Operands
-      for (Value *operand : op->getOperands())
-        addDataToHash(hasher, operand);
-      //   - Successors
-      for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i)
-        addDataToHash(hasher, op->getSuccessor(i));
-    });
-    hash = hasher.result();
-  }
-
-  bool operator==(const OperationFingerPrint &other) const {
-    return hash == other.hash;
-  }
-  bool operator!=(const OperationFingerPrint &other) const {
-    return !(*this == other);
-  }
-
-private:
-  template <typename T> void addDataToHash(llvm::SHA1 &hasher, const T &data) {
-    hasher.update(
-        ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&data), sizeof(T)));
-  }
-
-  SmallString<20> hash;
-};
-
-//===----------------------------------------------------------------------===//
-// IRPrinter
-//===----------------------------------------------------------------------===//
-
-class IRPrinterInstrumentation : public PassInstrumentation {
-public:
-  IRPrinterInstrumentation(std::unique_ptr<PassManager::IRPrinterConfig> config)
-      : config(std::move(config)) {}
-
-private:
-  /// Instrumentation hooks.
-  void runBeforePass(Pass *pass, Operation *op) override;
-  void runAfterPass(Pass *pass, Operation *op) override;
-  void runAfterPassFailed(Pass *pass, Operation *op) override;
-
-  /// Configuration to use.
-  std::unique_ptr<PassManager::IRPrinterConfig> config;
-
-  /// The following is a set of fingerprints for operations that are currently
-  /// being operated on in a pass. This field is only used when the
-  /// configuration asked for change detection.
-  DenseMap<Pass *, OperationFingerPrint> beforePassFingerPrints;
-};
-} // end anonymous namespace
-
-/// Returns true if the given pass is hidden from IR printing.
-static bool isHiddenPass(Pass *pass) {
-  return isAdaptorPass(pass) || isa<VerifierPass>(pass);
-}
-
-static void printIR(Operation *op, bool printModuleScope, raw_ostream &out,
-                    OpPrintingFlags flags) {
-  // Check to see if we are printing the top-level module.
-  auto module = dyn_cast<ModuleOp>(op);
-  if (module && !op->getBlock())
-    return module.print(out << "\n", flags);
-
-  // Otherwise, check to see if we are not printing at module scope.
-  if (!printModuleScope)
-    return op->print(out << "\n", flags.useLocalScope());
-
-  // Otherwise, we are printing at module scope.
-  out << " ('" << op->getName() << "' operation";
-  if (auto symbolName =
-          op->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName()))
-    out << ": @" << symbolName.getValue();
-  out << ")\n";
-
-  // Find the top-level module operation.
-  auto *topLevelOp = op;
-  while (auto *parentOp = topLevelOp->getParentOp())
-    topLevelOp = parentOp;
-
-  // Check to see if the top-level operation is actually a module in the case of
-  // invalid-ir.
-  if (auto module = dyn_cast<ModuleOp>(topLevelOp))
-    module.print(out, flags);
-  else
-    topLevelOp->print(out, flags);
-}
-
-/// Instrumentation hooks.
-void IRPrinterInstrumentation::runBeforePass(Pass *pass, Operation *op) {
-  if (isHiddenPass(pass))
-    return;
-  // If the config asked to detect changes, record the current fingerprint.
-  if (config->shouldPrintAfterOnlyOnChange())
-    beforePassFingerPrints.try_emplace(pass, op);
-
-  config->printBeforeIfEnabled(pass, op, [&](raw_ostream &out) {
-    out << formatv("*** IR Dump Before {0} ***", pass->getName());
-    printIR(op, config->shouldPrintAtModuleScope(), out, OpPrintingFlags());
-    out << "\n\n";
-  });
-}
-
-void IRPrinterInstrumentation::runAfterPass(Pass *pass, Operation *op) {
-  if (isHiddenPass(pass))
-    return;
-  // If the config asked to detect changes, compare the current fingerprint with
-  // the previous.
-  if (config->shouldPrintAfterOnlyOnChange()) {
-    auto fingerPrintIt = beforePassFingerPrints.find(pass);
-    assert(fingerPrintIt != beforePassFingerPrints.end() &&
-           "expected valid fingerprint");
-    // If the fingerprints are the same, we don't print the IR.
-    if (fingerPrintIt->second == OperationFingerPrint(op)) {
-      beforePassFingerPrints.erase(fingerPrintIt);
-      return;
-    }
-    beforePassFingerPrints.erase(fingerPrintIt);
-  }
-
-  config->printAfterIfEnabled(pass, op, [&](raw_ostream &out) {
-    out << formatv("*** IR Dump After {0} ***", pass->getName());
-    printIR(op, config->shouldPrintAtModuleScope(), out, OpPrintingFlags());
-    out << "\n\n";
-  });
-}
-
-void IRPrinterInstrumentation::runAfterPassFailed(Pass *pass, Operation *op) {
-  if (isAdaptorPass(pass))
-    return;
-  if (config->shouldPrintAfterOnlyOnChange())
-    beforePassFingerPrints.erase(pass);
-
-  config->printAfterIfEnabled(pass, op, [&](raw_ostream &out) {
-    out << formatv("*** IR Dump After {0} Failed ***", pass->getName());
-    printIR(op, config->shouldPrintAtModuleScope(), out,
-            OpPrintingFlags().printGenericOpForm());
-    out << "\n\n";
-  });
-}
-
-//===----------------------------------------------------------------------===//
-// IRPrinterConfig
-//===----------------------------------------------------------------------===//
-
-/// Initialize the configuration.
-PassManager::IRPrinterConfig::IRPrinterConfig(bool printModuleScope,
-                                              bool printAfterOnlyOnChange)
-    : printModuleScope(printModuleScope),
-      printAfterOnlyOnChange(printAfterOnlyOnChange) {}
-PassManager::IRPrinterConfig::~IRPrinterConfig() {}
-
-/// A hook that may be overridden by a derived config that checks if the IR
-/// of 'operation' should be dumped *before* the pass 'pass' has been
-/// executed. If the IR should be dumped, 'printCallback' should be invoked
-/// with the stream to dump into.
-void PassManager::IRPrinterConfig::printBeforeIfEnabled(
-    Pass *pass, Operation *operation, PrintCallbackFn printCallback) {
-  // By default, never print.
-}
-
-/// A hook that may be overridden by a derived config that checks if the IR
-/// of 'operation' should be dumped *after* the pass 'pass' has been
-/// executed. If the IR should be dumped, 'printCallback' should be invoked
-/// with the stream to dump into.
-void PassManager::IRPrinterConfig::printAfterIfEnabled(
-    Pass *pass, Operation *operation, PrintCallbackFn printCallback) {
-  // By default, never print.
-}
-
-//===----------------------------------------------------------------------===//
-// PassManager
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// Simple wrapper config that allows for the simpler interface defined above.
-struct BasicIRPrinterConfig : public PassManager::IRPrinterConfig {
-  BasicIRPrinterConfig(
-      std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
-      std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
-      bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out)
-      : IRPrinterConfig(printModuleScope, printAfterOnlyOnChange),
-        shouldPrintBeforePass(shouldPrintBeforePass),
-        shouldPrintAfterPass(shouldPrintAfterPass), out(out) {
-    assert((shouldPrintBeforePass || shouldPrintAfterPass) &&
-           "expected at least one valid filter function");
-  }
-
-  void printBeforeIfEnabled(Pass *pass, Operation *operation,
-                            PrintCallbackFn printCallback) final {
-    if (shouldPrintBeforePass && shouldPrintBeforePass(pass, operation))
-      printCallback(out);
-  }
-
-  void printAfterIfEnabled(Pass *pass, Operation *operation,
-                           PrintCallbackFn printCallback) final {
-    if (shouldPrintAfterPass && shouldPrintAfterPass(pass, operation))
-      printCallback(out);
-  }
-
-  /// Filter functions for before and after pass execution.
-  std::function<bool(Pass *, Operation *)> shouldPrintBeforePass;
-  std::function<bool(Pass *, Operation *)> shouldPrintAfterPass;
-
-  /// The stream to output to.
-  raw_ostream &out;
-};
-} // end anonymous namespace
-
-/// Add an instrumentation to print the IR before and after pass execution,
-/// using the provided configuration.
-void PassManager::enableIRPrinting(std::unique_ptr<IRPrinterConfig> config) {
-  addInstrumentation(
-      std::make_unique<IRPrinterInstrumentation>(std::move(config)));
-}
-
-/// Add an instrumentation to print the IR before and after pass execution.
-void PassManager::enableIRPrinting(
-    std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
-    std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
-    bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out) {
-  enableIRPrinting(std::make_unique<BasicIRPrinterConfig>(
-      std::move(shouldPrintBeforePass), std::move(shouldPrintAfterPass),
-      printModuleScope, printAfterOnlyOnChange, out));
-}
diff --git a/third_party/mlir/lib/Pass/Pass.cpp b/third_party/mlir/lib/Pass/Pass.cpp
deleted file mode 100644
index cb5194acf21..00000000000
--- a/third_party/mlir/lib/Pass/Pass.cpp
+++ /dev/null
@@ -1,750 +0,0 @@
-//===- Pass.cpp - Pass infrastructure implementation ----------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements common pass infrastructure.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Pass/Pass.h"
-#include "PassDetail.h"
-#include "mlir/Analysis/Verifier.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Support/FileUtilities.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/CrashRecoveryContext.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/Parallel.h"
-#include "llvm/Support/Threading.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-//===----------------------------------------------------------------------===//
-// Pass
-//===----------------------------------------------------------------------===//
-
-/// Out of line virtual method to ensure vtables and metadata are emitted to a
-/// single .o file.
-void Pass::anchor() {}
-
-/// Prints out the pass in the textual representation of pipelines. If this is
-/// an adaptor pass, print with the op_name(sub_pass,...) format.
-void Pass::printAsTextualPipeline(raw_ostream &os) {
-  // Special case for adaptors to use the 'op_name(sub_passes)' format.
-  if (auto *adaptor = getAdaptorPassBase(this)) {
-    interleaveComma(adaptor->getPassManagers(), os, [&](OpPassManager &pm) {
-      os << pm.getOpName() << "(";
-      pm.printAsTextualPipeline(os);
-      os << ")";
-    });
-  } else if (const PassInfo *info = lookupPassInfo()) {
-    os << info->getPassArgument();
-  } else {
-    os << getName();
-  }
-}
-
-/// Forwarding function to execute this pass.
-LogicalResult Pass::run(Operation *op, AnalysisManager am) {
-  passState.emplace(op, am);
-
-  // Instrument before the pass has run.
-  auto pi = am.getPassInstrumentor();
-  if (pi)
-    pi->runBeforePass(this, op);
-
-  // Invoke the virtual runOnOperation method.
-  runOnOperation();
-
-  // Invalidate any non preserved analyses.
-  am.invalidate(passState->preservedAnalyses);
-
-  // Instrument after the pass has run.
-  bool passFailed = passState->irAndPassFailed.getInt();
-  if (pi) {
-    if (passFailed)
-      pi->runAfterPassFailed(this, op);
-    else
-      pi->runAfterPass(this, op);
-  }
-
-  // Return if the pass signaled a failure.
-  return failure(passFailed);
-}
-
-//===----------------------------------------------------------------------===//
-// Verifier Passes
-//===----------------------------------------------------------------------===//
-
-void VerifierPass::runOnOperation() {
-  if (failed(verify(getOperation())))
-    signalPassFailure();
-  markAllAnalysesPreserved();
-}
-
-//===----------------------------------------------------------------------===//
-// OpPassManagerImpl
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace detail {
-struct OpPassManagerImpl {
-  OpPassManagerImpl(OperationName name, bool disableThreads, bool verifyPasses)
-      : name(name), disableThreads(disableThreads), verifyPasses(verifyPasses) {
-  }
-
-  /// Merge the passes of this pass manager into the one provided.
-  void mergeInto(OpPassManagerImpl &rhs) {
-    assert(name == rhs.name && "merging unrelated pass managers");
-    for (auto &pass : passes)
-      rhs.passes.push_back(std::move(pass));
-    passes.clear();
-  }
-
-  /// Coalesce adjacent AdaptorPasses into one large adaptor. This runs
-  /// recursively through the pipeline graph.
-  void coalesceAdjacentAdaptorPasses();
-
-  /// The name of the operation that passes of this pass manager operate on.
-  OperationName name;
-
-  /// Flag to disable multi-threading of passes.
-  bool disableThreads : 1;
-
-  /// Flag that specifies if the IR should be verified after each pass has run.
-  bool verifyPasses : 1;
-
-  /// The set of passes to run as part of this pass manager.
-  std::vector<std::unique_ptr<Pass>> passes;
-};
-} // end namespace detail
-} // end namespace mlir
-
-/// Coalesce adjacent AdaptorPasses into one large adaptor. This runs
-/// recursively through the pipeline graph.
-void OpPassManagerImpl::coalesceAdjacentAdaptorPasses() {
-  // Bail out early if there are no adaptor passes.
-  if (llvm::none_of(passes, [](std::unique_ptr<Pass> &pass) {
-        return isAdaptorPass(pass.get());
-      }))
-    return;
-
-  // Walk the pass list and merge adjacent adaptors.
-  OpToOpPassAdaptorBase *lastAdaptor = nullptr;
-  for (auto it = passes.begin(), e = passes.end(); it != e; ++it) {
-    // Check to see if this pass is an adaptor.
-    if (auto *currentAdaptor = getAdaptorPassBase(it->get())) {
-      // If it is the first adaptor in a possible chain, remember it and
-      // continue.
-      if (!lastAdaptor) {
-        lastAdaptor = currentAdaptor;
-        continue;
-      }
-
-      // Otherwise, merge into the existing adaptor and delete the current one.
-      currentAdaptor->mergeInto(*lastAdaptor);
-      it->reset();
-
-      // If the verifier is enabled, then next pass is a verifier run so
-      // drop it. Verifier passes are inserted after every pass, so this one
-      // would be a duplicate.
-      if (verifyPasses) {
-        assert(std::next(it) != e && isa<VerifierPass>(*std::next(it)));
-        (++it)->reset();
-      }
-    } else if (lastAdaptor && !isa<VerifierPass>(*it)) {
-      // If this pass is not an adaptor and not a verifier pass, then coalesce
-      // and forget any existing adaptor.
-      for (auto &pm : lastAdaptor->getPassManagers())
-        pm.getImpl().coalesceAdjacentAdaptorPasses();
-      lastAdaptor = nullptr;
-    }
-  }
-
-  // If there was an adaptor at the end of the manager, coalesce it as well.
-  if (lastAdaptor) {
-    for (auto &pm : lastAdaptor->getPassManagers())
-      pm.getImpl().coalesceAdjacentAdaptorPasses();
-  }
-
-  // Now that the adaptors have been merged, erase the empty slot corresponding
-  // to the merged adaptors that were nulled-out in the loop above.
-  llvm::erase_if(passes, std::logical_not<std::unique_ptr<Pass>>());
-}
-
-//===----------------------------------------------------------------------===//
-// OpPassManager
-//===----------------------------------------------------------------------===//
-
-OpPassManager::OpPassManager(OperationName name, bool disableThreads,
-                             bool verifyPasses)
-    : impl(new OpPassManagerImpl(name, disableThreads, verifyPasses)) {
-  assert(name.getAbstractOperation() &&
-         "OpPassManager can only operate on registered operations");
-  assert(name.getAbstractOperation()->hasProperty(
-             OperationProperty::IsolatedFromAbove) &&
-         "OpPassManager only supports operating on operations marked as "
-         "'IsolatedFromAbove'");
-}
-OpPassManager::OpPassManager(OpPassManager &&rhs) : impl(std::move(rhs.impl)) {}
-OpPassManager::OpPassManager(const OpPassManager &rhs) { *this = rhs; }
-OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) {
-  impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->disableThreads,
-                                   rhs.impl->verifyPasses));
-  for (auto &pass : rhs.impl->passes)
-    impl->passes.emplace_back(pass->clone());
-  return *this;
-}
-
-OpPassManager::~OpPassManager() {}
-
-OpPassManager::pass_iterator OpPassManager::begin() {
-  return impl->passes.begin();
-}
-OpPassManager::pass_iterator OpPassManager::end() { return impl->passes.end(); }
-
-/// Run all of the passes in this manager over the current operation.
-LogicalResult OpPassManager::run(Operation *op, AnalysisManager am) {
-  // Run each of the held passes.
-  for (auto &pass : impl->passes)
-    if (failed(pass->run(op, am)))
-      return failure();
-  return success();
-}
-
-/// Nest a new operation pass manager for the given operation kind under this
-/// pass manager.
-OpPassManager &OpPassManager::nest(const OperationName &nestedName) {
-  OpPassManager nested(nestedName, impl->disableThreads, impl->verifyPasses);
-
-  /// Create an adaptor for this pass. If multi-threading is disabled, then
-  /// create a synchronous adaptor.
-  if (impl->disableThreads || !llvm::llvm_is_multithreaded()) {
-    auto *adaptor = new OpToOpPassAdaptor(std::move(nested));
-    addPass(std::unique_ptr<Pass>(adaptor));
-    return adaptor->getPassManagers().front();
-  }
-
-  auto *adaptor = new OpToOpPassAdaptorParallel(std::move(nested));
-  addPass(std::unique_ptr<Pass>(adaptor));
-  return adaptor->getPassManagers().front();
-}
-OpPassManager &OpPassManager::nest(StringRef nestedName) {
-  return nest(OperationName(nestedName, getContext()));
-}
-
-/// Add the given pass to this pass manager. If this pass has a concrete
-/// operation type, it must be the same type as this pass manager.
-void OpPassManager::addPass(std::unique_ptr<Pass> pass) {
-  // If this pass runs on a different operation than this pass manager, then
-  // implicitly nest a pass manager for this operation.
-  auto passOpName = pass->getOpName();
-  if (passOpName && passOpName != impl->name.getStringRef())
-    return nest(*passOpName).addPass(std::move(pass));
-
-  impl->passes.emplace_back(std::move(pass));
-  if (impl->verifyPasses)
-    impl->passes.emplace_back(std::make_unique<VerifierPass>());
-}
-
-/// Returns the number of passes held by this manager.
-size_t OpPassManager::size() const { return impl->passes.size(); }
-
-/// Returns the internal implementation instance.
-OpPassManagerImpl &OpPassManager::getImpl() { return *impl; }
-
-/// Return an instance of the context.
-MLIRContext *OpPassManager::getContext() const {
-  return impl->name.getAbstractOperation()->dialect.getContext();
-}
-
-/// Return the operation name that this pass manager operates on.
-const OperationName &OpPassManager::getOpName() const { return impl->name; }
-
-/// Prints out the passes of the pass manager as the textual representation
-/// of pipelines.
-void OpPassManager::printAsTextualPipeline(raw_ostream &os) {
-  // Filter out passes that are not part of the public pipeline.
-  auto filteredPasses = llvm::make_filter_range(
-      impl->passes, [](const std::unique_ptr<Pass> &pass) {
-        return !isa<VerifierPass>(pass);
-      });
-  interleaveComma(filteredPasses, os, [&](const std::unique_ptr<Pass> &pass) {
-    pass->printAsTextualPipeline(os);
-  });
-}
-
-//===----------------------------------------------------------------------===//
-// OpToOpPassAdaptor
-//===----------------------------------------------------------------------===//
-
-/// Utility to run the given operation and analysis manager on a provided op
-/// pass manager.
-static LogicalResult runPipeline(OpPassManager &pm, Operation *op,
-                                 AnalysisManager am) {
-  // Run the pipeline over the provided operation.
-  auto result = pm.run(op, am);
-
-  // Clear out any computed operation analyses. These analyses won't be used
-  // any more in this pipeline, and this helps reduce the current working set
-  // of memory. If preserving these analyses becomes important in the future
-  // we can re-evaluate this.
-  am.clear();
-  return result;
-}
-
-/// Find an operation pass manager that can operate on an operation of the given
-/// type, or nullptr if one does not exist.
-static OpPassManager *findPassManagerFor(MutableArrayRef<OpPassManager> mgrs,
-                                         const OperationName &name) {
-  auto it = llvm::find_if(
-      mgrs, [&](OpPassManager &mgr) { return mgr.getOpName() == name; });
-  return it == mgrs.end() ? nullptr : &*it;
-}
-
-OpToOpPassAdaptorBase::OpToOpPassAdaptorBase(OpPassManager &&mgr) {
-  mgrs.emplace_back(std::move(mgr));
-}
-
-/// Merge the current pass adaptor into given 'rhs'.
-void OpToOpPassAdaptorBase::mergeInto(OpToOpPassAdaptorBase &rhs) {
-  for (auto &pm : mgrs) {
-    // If an existing pass manager exists, then merge the given pass manager
-    // into it.
-    if (auto *existingPM = findPassManagerFor(rhs.mgrs, pm.getOpName())) {
-      pm.getImpl().mergeInto(existingPM->getImpl());
-    } else {
-      // Otherwise, add the given pass manager to the list.
-      rhs.mgrs.emplace_back(std::move(pm));
-    }
-  }
-  mgrs.clear();
-
-  // After coalescing, sort the pass managers within rhs by name.
-  llvm::array_pod_sort(rhs.mgrs.begin(), rhs.mgrs.end(),
-                       [](const OpPassManager *lhs, const OpPassManager *rhs) {
-                         return lhs->getOpName().getStringRef().compare(
-                             rhs->getOpName().getStringRef());
-                       });
-}
-
-/// Returns the adaptor pass name.
-std::string OpToOpPassAdaptorBase::getName() {
-  std::string name = "Pipeline Collection : [";
-  llvm::raw_string_ostream os(name);
-  interleaveComma(getPassManagers(), os, [&](OpPassManager &pm) {
-    os << '\'' << pm.getOpName() << '\'';
-  });
-  os << ']';
-  return os.str();
-}
-
-OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr)
-    : OpToOpPassAdaptorBase(std::move(mgr)) {}
-
-/// Run the held pipeline over all nested operations.
-void OpToOpPassAdaptor::runOnOperation() {
-  auto am = getAnalysisManager();
-  PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(),
-                                                        this};
-  auto *instrumentor = am.getPassInstrumentor();
-  for (auto &region : getOperation()->getRegions()) {
-    for (auto &block : region) {
-      for (auto &op : block) {
-        auto *mgr = findPassManagerFor(mgrs, op.getName());
-        if (!mgr)
-          continue;
-
-        // Run the held pipeline over the current operation.
-        if (instrumentor)
-          instrumentor->runBeforePipeline(mgr->getOpName(), parentInfo);
-        auto result = runPipeline(*mgr, &op, am.slice(&op));
-        if (instrumentor)
-          instrumentor->runAfterPipeline(mgr->getOpName(), parentInfo);
-
-        if (failed(result))
-          return signalPassFailure();
-      }
-    }
-  }
-}
-
-OpToOpPassAdaptorParallel::OpToOpPassAdaptorParallel(OpPassManager &&mgr)
-    : OpToOpPassAdaptorBase(std::move(mgr)) {}
-
-/// Utility functor that checks if the two ranges of pass managers have a size
-/// mismatch.
-static bool hasSizeMismatch(ArrayRef<OpPassManager> lhs,
-                            ArrayRef<OpPassManager> rhs) {
-  return lhs.size() != rhs.size() ||
-         llvm::any_of(llvm::seq<size_t>(0, lhs.size()),
-                      [&](size_t i) { return lhs[i].size() != rhs[i].size(); });
-}
-
-// Run the held pipeline asynchronously across the functions within the module.
-void OpToOpPassAdaptorParallel::runOnOperation() {
-  AnalysisManager am = getAnalysisManager();
-
-  // Create the async executors if they haven't been created, or if the main
-  // pipeline has changed.
-  if (asyncExecutors.empty() || hasSizeMismatch(asyncExecutors.front(), mgrs))
-    asyncExecutors.assign(llvm::hardware_concurrency(), mgrs);
-
-  // Run a prepass over the module to collect the operations to execute over.
-  // This ensures that an analysis manager exists for each operation, as well as
-  // providing a queue of operations to execute over.
-  std::vector<std::pair<Operation *, AnalysisManager>> opAMPairs;
-  for (auto &region : getOperation()->getRegions()) {
-    for (auto &block : region) {
-      for (auto &op : block) {
-        // Add this operation iff the name matches the any of the pass managers.
-        if (findPassManagerFor(mgrs, op.getName()))
-          opAMPairs.emplace_back(&op, am.slice(&op));
-      }
-    }
-  }
-
-  // A parallel diagnostic handler that provides deterministic diagnostic
-  // ordering.
-  ParallelDiagnosticHandler diagHandler(&getContext());
-
-  // An index for the current operation/analysis manager pair.
-  std::atomic<unsigned> opIt(0);
-
-  // Get the current thread for this adaptor.
-  PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(),
-                                                        this};
-  auto *instrumentor = am.getPassInstrumentor();
-
-  // An atomic failure variable for the async executors.
-  std::atomic<bool> passFailed(false);
-  llvm::parallel::for_each(
-      llvm::parallel::par, asyncExecutors.begin(),
-      std::next(asyncExecutors.begin(),
-                std::min(asyncExecutors.size(), opAMPairs.size())),
-      [&](MutableArrayRef<OpPassManager> pms) {
-        for (auto e = opAMPairs.size(); !passFailed && opIt < e;) {
-          // Get the next available operation index.
-          unsigned nextID = opIt++;
-          if (nextID >= e)
-            break;
-
-          // Set the order id for this thread in the diagnostic handler.
-          diagHandler.setOrderIDForThread(nextID);
-
-          // Get the pass manager for this operation and execute it.
-          auto &it = opAMPairs[nextID];
-          auto *pm = findPassManagerFor(pms, it.first->getName());
-          assert(pm && "expected valid pass manager for operation");
-
-          if (instrumentor)
-            instrumentor->runBeforePipeline(pm->getOpName(), parentInfo);
-          auto pipelineResult = runPipeline(*pm, it.first, it.second);
-          if (instrumentor)
-            instrumentor->runAfterPipeline(pm->getOpName(), parentInfo);
-
-          // Drop this thread from being tracked by the diagnostic handler.
-          // After this task has finished, the thread may be used outside of
-          // this pass manager context meaning that we don't want to track
-          // diagnostics from it anymore.
-          diagHandler.eraseOrderIDForThread();
-
-          // Handle a failed pipeline result.
-          if (failed(pipelineResult)) {
-            passFailed = true;
-            break;
-          }
-        }
-      });
-
-  // Signal a failure if any of the executors failed.
-  if (passFailed)
-    signalPassFailure();
-}
-
-/// Utility function to convert the given class to the base adaptor it is an
-/// adaptor pass, returns nullptr otherwise.
-OpToOpPassAdaptorBase *mlir::detail::getAdaptorPassBase(Pass *pass) {
-  if (auto *adaptor = dyn_cast<OpToOpPassAdaptor>(pass))
-    return adaptor;
-  if (auto *adaptor = dyn_cast<OpToOpPassAdaptorParallel>(pass))
-    return adaptor;
-  return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// PassCrashReproducer
-//===----------------------------------------------------------------------===//
-
-/// Safely run the pass manager over the given module, creating a reproducible
-/// on failure or crash.
-static LogicalResult runWithCrashRecovery(OpPassManager &pm,
-                                          ModuleAnalysisManager &am,
-                                          ModuleOp module,
-                                          StringRef crashReproducerFileName) {
-  /// Enable crash recovery.
-  llvm::CrashRecoveryContext::Enable();
-
-  // Grab the textual pipeline executing within the pass manager first, just in
-  // case the pass manager becomes compromised.
-  std::string pipeline;
-  {
-    llvm::raw_string_ostream pipelineOS(pipeline);
-    pm.printAsTextualPipeline(pipelineOS);
-  }
-
-  // Clone the initial module before running it through the pass pipeline.
-  OwningModuleRef reproducerModule = module.clone();
-
-  // Safely invoke the pass manager within a recovery context.
-  LogicalResult passManagerResult = failure();
-  llvm::CrashRecoveryContext recoveryContext;
-  recoveryContext.RunSafelyOnThread(
-      [&] { passManagerResult = pm.run(module, am); });
-
-  /// Disable crash recovery.
-  llvm::CrashRecoveryContext::Disable();
-  if (succeeded(passManagerResult))
-    return success();
-
-  // The conversion failed, so generate a reproducible.
-  std::string error;
-  std::unique_ptr<llvm::ToolOutputFile> outputFile =
-      mlir::openOutputFile(crashReproducerFileName, &error);
-  if (!outputFile)
-    return emitError(UnknownLoc::get(pm.getContext()),
-                     "<MLIR-PassManager-Crash-Reproducer>: ")
-           << error;
-  auto &outputOS = outputFile->os();
-
-  // Output the current pass manager configuration.
-  outputOS << "// configuration: -pass-pipeline='" << pipeline << "'";
-  if (pm.getImpl().disableThreads)
-    outputOS << " -disable-pass-threading";
-
-  // TODO(riverriddle) Should this also be configured with a pass manager flag?
-  outputOS << "\n// note: verifyPasses="
-           << (pm.getImpl().verifyPasses ? "true" : "false") << "\n";
-
-  // Output the .mlir module.
-  reproducerModule->print(outputOS);
-  outputFile->keep();
-
-  return reproducerModule->emitError()
-         << "A failure has been detected while processing the MLIR module, a "
-            "reproducer has been generated in '"
-         << crashReproducerFileName << "'";
-}
-
-//===----------------------------------------------------------------------===//
-// PassManager
-//===----------------------------------------------------------------------===//
-
-PassManager::PassManager(MLIRContext *ctx, bool verifyPasses)
-    : OpPassManager(OperationName(ModuleOp::getOperationName(), ctx),
-                    /*disableThreads=*/false, verifyPasses),
-      passTiming(false) {}
-
-PassManager::~PassManager() {}
-
-/// Run the passes within this manager on the provided module.
-LogicalResult PassManager::run(ModuleOp module) {
-  // Before running, make sure to coalesce any adjacent pass adaptors in the
-  // pipeline.
-  getImpl().coalesceAdjacentAdaptorPasses();
-
-  // Construct an analysis manager for the pipeline.
-  ModuleAnalysisManager am(module, instrumentor.get());
-
-  // If reproducer generation is enabled, run the pass manager with crash
-  // handling enabled.
-  LogicalResult result =
-      crashReproducerFileName
-          ? runWithCrashRecovery(*this, am, module, *crashReproducerFileName)
-          : OpPassManager::run(module, am);
-
-  // Dump all of the pass statistics if necessary.
-  if (passStatisticsMode)
-    dumpStatistics();
-  return result;
-}
-
-/// Disable support for multi-threading within the pass manager.
-void PassManager::disableMultithreading(bool disable) {
-  getImpl().disableThreads = disable;
-}
-
-/// Enable support for the pass manager to generate a reproducer on the event
-/// of a crash or a pass failure. `outputFile` is a .mlir filename used to write
-/// the generated reproducer.
-void PassManager::enableCrashReproducerGeneration(StringRef outputFile) {
-  crashReproducerFileName = outputFile;
-}
-
-/// Add the provided instrumentation to the pass manager.
-void PassManager::addInstrumentation(std::unique_ptr<PassInstrumentation> pi) {
-  if (!instrumentor)
-    instrumentor = std::make_unique<PassInstrumentor>();
-
-  instrumentor->addInstrumentation(std::move(pi));
-}
-
-//===----------------------------------------------------------------------===//
-// AnalysisManager
-//===----------------------------------------------------------------------===//
-
-/// Returns a pass instrumentation object for the current operation.
-PassInstrumentor *AnalysisManager::getPassInstrumentor() const {
-  ParentPointerT curParent = parent;
-  while (auto *parentAM = curParent.dyn_cast<const AnalysisManager *>())
-    curParent = parentAM->parent;
-  return curParent.get<const ModuleAnalysisManager *>()->getPassInstrumentor();
-}
-
-/// Get an analysis manager for the given child operation.
-AnalysisManager AnalysisManager::slice(Operation *op) {
-  assert(op->getParentOp() == impl->getOperation() &&
-         "'op' has a different parent operation");
-  auto it = impl->childAnalyses.find(op);
-  if (it == impl->childAnalyses.end())
-    it = impl->childAnalyses
-             .try_emplace(op, std::make_unique<NestedAnalysisMap>(op))
-             .first;
-  return {this, it->second.get()};
-}
-
-/// Invalidate any non preserved analyses.
-void detail::NestedAnalysisMap::invalidate(
-    const detail::PreservedAnalyses &pa) {
-  // If all analyses were preserved, then there is nothing to do here.
-  if (pa.isAll())
-    return;
-
-  // Invalidate the analyses for the current operation directly.
-  analyses.invalidate(pa);
-
-  // If no analyses were preserved, then just simply clear out the child
-  // analysis results.
-  if (pa.isNone()) {
-    childAnalyses.clear();
-    return;
-  }
-
-  // Otherwise, invalidate each child analysis map.
-  SmallVector<NestedAnalysisMap *, 8> mapsToInvalidate(1, this);
-  while (!mapsToInvalidate.empty()) {
-    auto *map = mapsToInvalidate.pop_back_val();
-    for (auto &analysisPair : map->childAnalyses) {
-      analysisPair.second->invalidate(pa);
-      if (!analysisPair.second->childAnalyses.empty())
-        mapsToInvalidate.push_back(analysisPair.second.get());
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// PassInstrumentation
-//===----------------------------------------------------------------------===//
-
-PassInstrumentation::~PassInstrumentation() {}
-
-//===----------------------------------------------------------------------===//
-// PassInstrumentor
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace detail {
-struct PassInstrumentorImpl {
-  /// Mutex to keep instrumentation access thread-safe.
-  llvm::sys::SmartMutex<true> mutex;
-
-  /// Set of registered instrumentations.
-  std::vector<std::unique_ptr<PassInstrumentation>> instrumentations;
-};
-} // end namespace detail
-} // end namespace mlir
-
-PassInstrumentor::PassInstrumentor() : impl(new PassInstrumentorImpl()) {}
-PassInstrumentor::~PassInstrumentor() {}
-
-/// See PassInstrumentation::runBeforePipeline for details.
-void PassInstrumentor::runBeforePipeline(
-    const OperationName &name,
-    const PassInstrumentation::PipelineParentInfo &parentInfo) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  for (auto &instr : impl->instrumentations)
-    instr->runBeforePipeline(name, parentInfo);
-}
-
-/// See PassInstrumentation::runAfterPipeline for details.
-void PassInstrumentor::runAfterPipeline(
-    const OperationName &name,
-    const PassInstrumentation::PipelineParentInfo &parentInfo) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  for (auto &instr : llvm::reverse(impl->instrumentations))
-    instr->runAfterPipeline(name, parentInfo);
-}
-
-/// See PassInstrumentation::runBeforePass for details.
-void PassInstrumentor::runBeforePass(Pass *pass, Operation *op) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  for (auto &instr : impl->instrumentations)
-    instr->runBeforePass(pass, op);
-}
-
-/// See PassInstrumentation::runAfterPass for details.
-void PassInstrumentor::runAfterPass(Pass *pass, Operation *op) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  for (auto &instr : llvm::reverse(impl->instrumentations))
-    instr->runAfterPass(pass, op);
-}
-
-/// See PassInstrumentation::runAfterPassFailed for details.
-void PassInstrumentor::runAfterPassFailed(Pass *pass, Operation *op) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  for (auto &instr : llvm::reverse(impl->instrumentations))
-    instr->runAfterPassFailed(pass, op);
-}
-
-/// See PassInstrumentation::runBeforeAnalysis for details.
-void PassInstrumentor::runBeforeAnalysis(llvm::StringRef name, AnalysisID *id,
-                                         Operation *op) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  for (auto &instr : impl->instrumentations)
-    instr->runBeforeAnalysis(name, id, op);
-}
-
-/// See PassInstrumentation::runAfterAnalysis for details.
-void PassInstrumentor::runAfterAnalysis(llvm::StringRef name, AnalysisID *id,
-                                        Operation *op) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  for (auto &instr : llvm::reverse(impl->instrumentations))
-    instr->runAfterAnalysis(name, id, op);
-}
-
-/// Add the given instrumentation to the collection.
-void PassInstrumentor::addInstrumentation(
-    std::unique_ptr<PassInstrumentation> pi) {
-  llvm::sys::SmartScopedLock<true> instrumentationLock(impl->mutex);
-  impl->instrumentations.emplace_back(std::move(pi));
-}
-
-constexpr AnalysisID mlir::detail::PreservedAnalyses::allAnalysesID;
diff --git a/third_party/mlir/lib/Pass/PassDetail.h b/third_party/mlir/lib/Pass/PassDetail.h
deleted file mode 100644
index d0a2ea63e7d..00000000000
--- a/third_party/mlir/lib/Pass/PassDetail.h
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- PassDetail.h - MLIR Pass details -------------------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef MLIR_PASS_PASSDETAIL_H_
-#define MLIR_PASS_PASSDETAIL_H_
-
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-
-namespace mlir {
-namespace detail {
-
-//===----------------------------------------------------------------------===//
-// Verifier Pass
-//===----------------------------------------------------------------------===//
-
-/// Pass to verify an operation and signal failure if necessary.
-class VerifierPass : public OperationPass<VerifierPass> {
-  void runOnOperation() override;
-};
-
-//===----------------------------------------------------------------------===//
-// OpToOpPassAdaptor
-//===----------------------------------------------------------------------===//
-
-/// A base class for Op-to-Op adaptor passes.
-class OpToOpPassAdaptorBase {
-public:
-  OpToOpPassAdaptorBase(OpPassManager &&mgr);
-  OpToOpPassAdaptorBase(const OpToOpPassAdaptorBase &rhs) = default;
-
-  /// Merge the current pass adaptor into given 'rhs'.
-  void mergeInto(OpToOpPassAdaptorBase &rhs);
-
-  /// Returns the pass managers held by this adaptor.
-  MutableArrayRef<OpPassManager> getPassManagers() { return mgrs; }
-
-  /// Returns the adaptor pass name.
-  std::string getName();
-
-protected:
-  // A set of adaptors to run.
-  SmallVector<OpPassManager, 1> mgrs;
-};
-
-/// An adaptor pass used to run operation passes over nested operations
-/// synchronously on a single thread.
-class OpToOpPassAdaptor : public OperationPass<OpToOpPassAdaptor>,
-                          public OpToOpPassAdaptorBase {
-public:
-  OpToOpPassAdaptor(OpPassManager &&mgr);
-
-  /// Run the held pipeline over all operations.
-  void runOnOperation() override;
-};
-
-/// An adaptor pass used to run operation passes over nested operations
-/// asynchronously across multiple threads.
-class OpToOpPassAdaptorParallel
-    : public OperationPass<OpToOpPassAdaptorParallel>,
-      public OpToOpPassAdaptorBase {
-public:
-  OpToOpPassAdaptorParallel(OpPassManager &&mgr);
-
-  /// Run the held pipeline over all operations.
-  void runOnOperation() override;
-
-  /// Return the async pass managers held by this parallel adaptor.
-  MutableArrayRef<SmallVector<OpPassManager, 1>> getParallelPassManagers() {
-    return asyncExecutors;
-  }
-
-private:
-  // A set of executors, cloned from the main executor, that run asynchronously
-  // on different threads.
-  SmallVector<SmallVector<OpPassManager, 1>, 8> asyncExecutors;
-};
-
-/// Utility function to convert the given class to the base adaptor it is an
-/// adaptor pass, returns nullptr otherwise.
-OpToOpPassAdaptorBase *getAdaptorPassBase(Pass *pass);
-
-/// Utility function to return if a pass refers to an adaptor pass. Adaptor
-/// passes are those that internally execute a pipeline.
-inline bool isAdaptorPass(Pass *pass) {
-  return isa<OpToOpPassAdaptorParallel>(pass) || isa<OpToOpPassAdaptor>(pass);
-}
-
-} // end namespace detail
-} // end namespace mlir
-#endif // MLIR_PASS_PASSDETAIL_H_
diff --git a/third_party/mlir/lib/Pass/PassManagerOptions.cpp b/third_party/mlir/lib/Pass/PassManagerOptions.cpp
deleted file mode 100644
index 932bf98f61e..00000000000
--- a/third_party/mlir/lib/Pass/PassManagerOptions.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===- PassManagerOptions.cpp - PassManager Command Line Options ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Pass/PassRegistry.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ManagedStatic.h"
-
-using namespace mlir;
-
-namespace {
-struct PassManagerOptions {
-  //===--------------------------------------------------------------------===//
-  // Crash Reproducer Generator
-  //===--------------------------------------------------------------------===//
-  llvm::cl::opt<std::string> reproducerFile{
-      "pass-pipeline-crash-reproducer",
-      llvm::cl::desc("Generate a .mlir reproducer file at the given output path"
-                     " if the pass manager crashes or fails")};
-
-  //===--------------------------------------------------------------------===//
-  // Multi-threading
-  //===--------------------------------------------------------------------===//
-  llvm::cl::opt<bool> disableThreads{
-      "disable-pass-threading",
-      llvm::cl::desc("Disable multithreading in the pass manager"),
-      llvm::cl::init(false)};
-
-  //===--------------------------------------------------------------------===//
-  // IR Printing
-  //===--------------------------------------------------------------------===//
-  PassPipelineCLParser printBefore{"print-ir-before",
-                                   "Print IR before specified passes"};
-  PassPipelineCLParser printAfter{"print-ir-after",
-                                  "Print IR after specified passes"};
-  llvm::cl::opt<bool> printBeforeAll{
-      "print-ir-before-all", llvm::cl::desc("Print IR before each pass"),
-      llvm::cl::init(false)};
-  llvm::cl::opt<bool> printAfterAll{"print-ir-after-all",
-                                    llvm::cl::desc("Print IR after each pass"),
-                                    llvm::cl::init(false)};
-  llvm::cl::opt<bool> printAfterChange{
-      "print-ir-after-change",
-      llvm::cl::desc(
-          "When printing the IR after a pass, only print if the IR changed"),
-      llvm::cl::init(false)};
-  llvm::cl::opt<bool> printModuleScope{
-      "print-ir-module-scope",
-      llvm::cl::desc("When printing IR for print-ir-[before|after]{-all} "
-                     "always print the top-level module operation"),
-      llvm::cl::init(false)};
-
-  /// Add an IR printing instrumentation if enabled by any 'print-ir' flags.
-  void addPrinterInstrumentation(PassManager &pm);
-
-  //===--------------------------------------------------------------------===//
-  // Pass Timing
-  //===--------------------------------------------------------------------===//
-  llvm::cl::opt<bool> passTiming{
-      "pass-timing",
-      llvm::cl::desc("Display the execution times of each pass")};
-  llvm::cl::opt<PassDisplayMode> passTimingDisplayMode{
-      "pass-timing-display",
-      llvm::cl::desc("Display method for pass timing data"),
-      llvm::cl::init(PassDisplayMode::Pipeline),
-      llvm::cl::values(
-          clEnumValN(PassDisplayMode::List, "list",
-                     "display the results in a list sorted by total time"),
-          clEnumValN(PassDisplayMode::Pipeline, "pipeline",
-                     "display the results with a nested pipeline view"))};
-
-  //===--------------------------------------------------------------------===//
-  // Pass Statistics
-  //===--------------------------------------------------------------------===//
-  llvm::cl::opt<bool> passStatistics{
-      "pass-statistics", llvm::cl::desc("Display the statistics of each pass")};
-  llvm::cl::opt<PassDisplayMode> passStatisticsDisplayMode{
-      "pass-statistics-display",
-      llvm::cl::desc("Display method for pass statistics"),
-      llvm::cl::init(PassDisplayMode::Pipeline),
-      llvm::cl::values(
-          clEnumValN(
-              PassDisplayMode::List, "list",
-              "display the results in a merged list sorted by pass name"),
-          clEnumValN(PassDisplayMode::Pipeline, "pipeline",
-                     "display the results with a nested pipeline view"))};
-
-  /// Add a pass timing instrumentation if enabled by 'pass-timing' flags.
-  void addTimingInstrumentation(PassManager &pm);
-};
-} // end anonymous namespace
-
-static llvm::ManagedStatic<llvm::Optional<PassManagerOptions>> options;
-
-/// Add an IR printing instrumentation if enabled by any 'print-ir' flags.
-void PassManagerOptions::addPrinterInstrumentation(PassManager &pm) {
-  std::function<bool(Pass *, Operation *)> shouldPrintBeforePass;
-  std::function<bool(Pass *, Operation *)> shouldPrintAfterPass;
-
-  // Handle print-before.
-  if (printBeforeAll) {
-    // If we are printing before all, then just return true for the filter.
-    shouldPrintBeforePass = [](Pass *, Operation *) { return true; };
-  } else if (printBefore.hasAnyOccurrences()) {
-    // Otherwise if there are specific passes to print before, then check to see
-    // if the pass info for the current pass is included in the list.
-    shouldPrintBeforePass = [&](Pass *pass, Operation *) {
-      auto *passInfo = pass->lookupPassInfo();
-      return passInfo && printBefore.contains(passInfo);
-    };
-  }
-
-  // Handle print-after.
-  if (printAfterAll) {
-    // If we are printing after all, then just return true for the filter.
-    shouldPrintAfterPass = [](Pass *, Operation *) { return true; };
-  } else if (printAfter.hasAnyOccurrences()) {
-    // Otherwise if there are specific passes to print after, then check to see
-    // if the pass info for the current pass is included in the list.
-    shouldPrintAfterPass = [&](Pass *pass, Operation *) {
-      auto *passInfo = pass->lookupPassInfo();
-      return passInfo && printAfter.contains(passInfo);
-    };
-  }
-
-  // If there are no valid printing filters, then just return.
-  if (!shouldPrintBeforePass && !shouldPrintAfterPass)
-    return;
-
-  // Otherwise, add the IR printing instrumentation.
-  pm.enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass,
-                      printModuleScope, printAfterChange, llvm::errs());
-}
-
-/// Add a pass timing instrumentation if enabled by 'pass-timing' flags.
-void PassManagerOptions::addTimingInstrumentation(PassManager &pm) {
-  if (passTiming)
-    pm.enableTiming(passTimingDisplayMode);
-}
-
-void mlir::registerPassManagerCLOptions() {
-  // Reset the options instance if it hasn't been enabled yet.
-  if (!options->hasValue())
-    options->emplace();
-}
-
-void mlir::applyPassManagerCLOptions(PassManager &pm) {
-  // Generate a reproducer on crash/failure.
-  if ((*options)->reproducerFile.getNumOccurrences())
-    pm.enableCrashReproducerGeneration((*options)->reproducerFile);
-
-  // Disable multi-threading.
-  if ((*options)->disableThreads)
-    pm.disableMultithreading();
-
-  // Enable statistics dumping.
-  if ((*options)->passStatistics)
-    pm.enableStatistics((*options)->passStatisticsDisplayMode);
-
-  // Add the IR printing instrumentation.
-  (*options)->addPrinterInstrumentation(pm);
-
-  // Note: The pass timing instrumentation should be added last to avoid any
-  // potential "ghost" timing from other instrumentations being unintentionally
-  // included in the timing results.
-  (*options)->addTimingInstrumentation(pm);
-}
diff --git a/third_party/mlir/lib/Pass/PassRegistry.cpp b/third_party/mlir/lib/Pass/PassRegistry.cpp
deleted file mode 100644
index 397fef3ef5d..00000000000
--- a/third_party/mlir/lib/Pass/PassRegistry.cpp
+++ /dev/null
@@ -1,510 +0,0 @@
-//===- PassRegistry.cpp - Pass Registration Utilities ---------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Pass/PassRegistry.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-using namespace detail;
-
-/// Static mapping of all of the registered passes.
-static llvm::ManagedStatic<llvm::DenseMap<const PassID *, PassInfo>>
-    passRegistry;
-
-/// Static mapping of all of the registered pass pipelines.
-static llvm::ManagedStatic<llvm::StringMap<PassPipelineInfo>>
-    passPipelineRegistry;
-
-// Helper to avoid exposing OpPassManager.
-void mlir::detail::addPassToPassManager(OpPassManager &pm,
-                                        std::unique_ptr<Pass> pass) {
-  pm.addPass(std::move(pass));
-}
-
-//===----------------------------------------------------------------------===//
-// PassPipelineInfo
-//===----------------------------------------------------------------------===//
-
-void mlir::registerPassPipeline(StringRef arg, StringRef description,
-                                const PassRegistryFunction &function) {
-  PassPipelineInfo pipelineInfo(arg, description, function);
-  bool inserted = passPipelineRegistry->try_emplace(arg, pipelineInfo).second;
-  assert(inserted && "Pass pipeline registered multiple times");
-  (void)inserted;
-}
-
-//===----------------------------------------------------------------------===//
-// PassInfo
-//===----------------------------------------------------------------------===//
-
-void mlir::registerPass(StringRef arg, StringRef description,
-                        const PassID *passID,
-                        const PassRegistryFunction &function) {
-  PassInfo passInfo(arg, description, passID, function);
-  bool inserted = passRegistry->try_emplace(passID, passInfo).second;
-  assert(inserted && "Pass registered multiple times");
-  (void)inserted;
-}
-
-/// Returns the pass info for the specified pass class or null if unknown.
-const PassInfo *mlir::Pass::lookupPassInfo(const PassID *passID) {
-  auto it = passRegistry->find(passID);
-  if (it == passRegistry->end())
-    return nullptr;
-  return &it->getSecond();
-}
-
-//===----------------------------------------------------------------------===//
-// PassOptions
-//===----------------------------------------------------------------------===//
-
-LogicalResult PassOptionsBase::parseFromString(StringRef options) {
-  // TODO(parkers): Handle escaping strings.
-  // NOTE: `options` is modified in place to always refer to the unprocessed
-  // part of the string.
-  while (!options.empty()) {
-    size_t spacePos = options.find(' ');
-    StringRef arg = options;
-    if (spacePos != StringRef::npos) {
-      arg = options.substr(0, spacePos);
-      options = options.substr(spacePos + 1);
-    } else {
-      options = StringRef();
-    }
-    if (arg.empty())
-      continue;
-
-    // At this point, arg refers to everything that is non-space in options
-    // upto the next space, and options refers to the rest of the string after
-    // that point.
-
-    // Split the individual option on '=' to form key and value. If there is no
-    // '=', then value is `StringRef()`.
-    size_t equalPos = arg.find('=');
-    StringRef key = arg;
-    StringRef value;
-    if (equalPos != StringRef::npos) {
-      key = arg.substr(0, equalPos);
-      value = arg.substr(equalPos + 1);
-    }
-    auto it = OptionsMap.find(key);
-    if (it == OptionsMap.end()) {
-      llvm::errs() << "<Pass-Options-Parser>: no such option " << key << "\n";
-
-      return failure();
-    }
-    if (llvm::cl::ProvidePositionalOption(it->second, value, 0))
-      return failure();
-  }
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// TextualPassPipeline Parser
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This class represents a textual description of a pass pipeline.
-class TextualPipeline {
-public:
-  /// Try to initialize this pipeline with the given pipeline text.
-  /// `errorStream` is the output stream to emit errors to.
-  LogicalResult initialize(StringRef text, raw_ostream &errorStream);
-
-  /// Add the internal pipeline elements to the provided pass manager.
-  LogicalResult addToPipeline(OpPassManager &pm) const;
-
-private:
-  /// A functor used to emit errors found during pipeline handling. The first
-  /// parameter corresponds to the raw location within the pipeline string. This
-  /// should always return failure.
-  using ErrorHandlerT = function_ref<LogicalResult(const char *, llvm::Twine)>;
-
-  /// A struct to capture parsed pass pipeline names.
-  ///
-  /// A pipeline is defined as a series of names, each of which may in itself
-  /// recursively contain a nested pipeline. A name is either the name of a pass
-  /// (e.g. "cse") or the name of an operation type (e.g. "func"). If the name
-  /// is the name of a pass, the InnerPipeline is empty, since passes cannot
-  /// contain inner pipelines.
-  struct PipelineElement {
-    PipelineElement(StringRef name) : name(name), registryEntry(nullptr) {}
-
-    StringRef name;
-    StringRef options;
-    const PassRegistryEntry *registryEntry;
-    std::vector<PipelineElement> innerPipeline;
-  };
-
-  /// Parse the given pipeline text into the internal pipeline vector. This
-  /// function only parses the structure of the pipeline, and does not resolve
-  /// its elements.
-  LogicalResult parsePipelineText(StringRef text, ErrorHandlerT errorHandler);
-
-  /// Resolve the elements of the pipeline, i.e. connect passes and pipelines to
-  /// the corresponding registry entry.
-  LogicalResult
-  resolvePipelineElements(MutableArrayRef<PipelineElement> elements,
-                          ErrorHandlerT errorHandler);
-
-  /// Resolve a single element of the pipeline.
-  LogicalResult resolvePipelineElement(PipelineElement &element,
-                                       ErrorHandlerT errorHandler);
-
-  /// Add the given pipeline elements to the provided pass manager.
-  LogicalResult addToPipeline(ArrayRef<PipelineElement> elements,
-                              OpPassManager &pm) const;
-
-  std::vector<PipelineElement> pipeline;
-};
-
-} // end anonymous namespace
-
-/// Try to initialize this pipeline with the given pipeline text. An option is
-/// given to enable accurate error reporting.
-LogicalResult TextualPipeline::initialize(StringRef text,
-                                          raw_ostream &errorStream) {
-  // Build a source manager to use for error reporting.
-  llvm::SourceMgr pipelineMgr;
-  pipelineMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(
-                                     text, "MLIR Textual PassPipeline Parser"),
-                                 llvm::SMLoc());
-  auto errorHandler = [&](const char *rawLoc, llvm::Twine msg) {
-    pipelineMgr.PrintMessage(errorStream, llvm::SMLoc::getFromPointer(rawLoc),
-                             llvm::SourceMgr::DK_Error, msg);
-    return failure();
-  };
-
-  // Parse the provided pipeline string.
-  if (failed(parsePipelineText(text, errorHandler)))
-    return failure();
-  return resolvePipelineElements(pipeline, errorHandler);
-}
-
-/// Add the internal pipeline elements to the provided pass manager.
-LogicalResult TextualPipeline::addToPipeline(OpPassManager &pm) const {
-  return addToPipeline(pipeline, pm);
-}
-
-/// Parse the given pipeline text into the internal pipeline vector. This
-/// function only parses the structure of the pipeline, and does not resolve
-/// its elements.
-LogicalResult TextualPipeline::parsePipelineText(StringRef text,
-                                                 ErrorHandlerT errorHandler) {
-  SmallVector<std::vector<PipelineElement> *, 4> pipelineStack = {&pipeline};
-  for (;;) {
-    std::vector<PipelineElement> &pipeline = *pipelineStack.back();
-    size_t pos = text.find_first_of(",(){");
-    pipeline.emplace_back(/*name=*/text.substr(0, pos).trim());
-
-    // If we have a single terminating name, we're done.
-    if (pos == text.npos)
-      break;
-
-    text = text.substr(pos);
-    char sep = text[0];
-
-    // Handle pulling ... from 'pass{...}' out as PipelineElement.options.
-    if (sep == '{') {
-      text = text.substr(1);
-
-      // Skip over everything until the closing '}' and store as options.
-      size_t close = text.find('}');
-
-      // TODO(parkers): Handle skipping over quoted sub-strings.
-      if (close == StringRef::npos) {
-        return errorHandler(
-            /*rawLoc=*/text.data() - 1,
-            "missing closing '}' while processing pass options");
-      }
-      pipeline.back().options = text.substr(0, close);
-      text = text.substr(close + 1);
-
-      // Skip checking for '(' because nested pipelines cannot have options.
-    } else if (sep == '(') {
-      text = text.substr(1);
-
-      // Push the inner pipeline onto the stack to continue processing.
-      pipelineStack.push_back(&pipeline.back().innerPipeline);
-      continue;
-    }
-
-    // When handling the close parenthesis, we greedily consume them to avoid
-    // empty strings in the pipeline.
-    while (text.consume_front(")")) {
-      // If we try to pop the outer pipeline we have unbalanced parentheses.
-      if (pipelineStack.size() == 1)
-        return errorHandler(/*rawLoc=*/text.data() - 1,
-                            "encountered extra closing ')' creating unbalanced "
-                            "parentheses while parsing pipeline");
-
-      pipelineStack.pop_back();
-    }
-
-    // Check if we've finished parsing.
-    if (text.empty())
-      break;
-
-    // Otherwise, the end of an inner pipeline always has to be followed by
-    // a comma, and then we can continue.
-    if (!text.consume_front(","))
-      return errorHandler(text.data(), "expected ',' after parsing pipeline");
-  }
-
-  // Check for unbalanced parentheses.
-  if (pipelineStack.size() > 1)
-    return errorHandler(
-        text.data(),
-        "encountered unbalanced parentheses while parsing pipeline");
-
-  assert(pipelineStack.back() == &pipeline &&
-         "wrong pipeline at the bottom of the stack");
-  return success();
-}
-
-/// Resolve the elements of the pipeline, i.e. connect passes and pipelines to
-/// the corresponding registry entry.
-LogicalResult TextualPipeline::resolvePipelineElements(
-    MutableArrayRef<PipelineElement> elements, ErrorHandlerT errorHandler) {
-  for (auto &elt : elements)
-    if (failed(resolvePipelineElement(elt, errorHandler)))
-      return failure();
-  return success();
-}
-
-/// Resolve a single element of the pipeline.
-LogicalResult
-TextualPipeline::resolvePipelineElement(PipelineElement &element,
-                                        ErrorHandlerT errorHandler) {
-  // If the inner pipeline of this element is not empty, this is an operation
-  // pipeline.
-  if (!element.innerPipeline.empty())
-    return resolvePipelineElements(element.innerPipeline, errorHandler);
-
-  // Otherwise, this must be a pass or pass pipeline.
-  // Check to see if a pipeline was registered with this name.
-  auto pipelineRegistryIt = passPipelineRegistry->find(element.name);
-  if (pipelineRegistryIt != passPipelineRegistry->end()) {
-    element.registryEntry = &pipelineRegistryIt->second;
-    return success();
-  }
-
-  // If not, then this must be a specific pass name.
-  for (auto &passIt : *passRegistry) {
-    if (passIt.second.getPassArgument() == element.name) {
-      element.registryEntry = &passIt.second;
-      return success();
-    }
-  }
-
-  // Emit an error for the unknown pass.
-  auto *rawLoc = element.name.data();
-  return errorHandler(rawLoc, "'" + element.name +
-                                  "' does not refer to a "
-                                  "registered pass or pass pipeline");
-}
-
-/// Add the given pipeline elements to the provided pass manager.
-LogicalResult TextualPipeline::addToPipeline(ArrayRef<PipelineElement> elements,
-                                             OpPassManager &pm) const {
-  for (auto &elt : elements) {
-    if (elt.registryEntry) {
-      if (failed(elt.registryEntry->addToPipeline(pm, elt.options)))
-        return failure();
-    } else if (failed(addToPipeline(elt.innerPipeline, pm.nest(elt.name)))) {
-      return failure();
-    }
-  }
-  return success();
-}
-
-/// This function parses the textual representation of a pass pipeline, and adds
-/// the result to 'pm' on success. This function returns failure if the given
-/// pipeline was invalid. 'errorStream' is an optional parameter that, if
-/// non-null, will be used to emit errors found during parsing.
-LogicalResult mlir::parsePassPipeline(StringRef pipeline, OpPassManager &pm,
-                                      raw_ostream &errorStream) {
-  TextualPipeline pipelineParser;
-  if (failed(pipelineParser.initialize(pipeline, errorStream)))
-    return failure();
-  if (failed(pipelineParser.addToPipeline(pm)))
-    return failure();
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// PassNameParser
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This struct represents the possible data entries in a parsed pass pipeline
-/// list.
-struct PassArgData {
-  PassArgData() : registryEntry(nullptr) {}
-  PassArgData(const PassRegistryEntry *registryEntry)
-      : registryEntry(registryEntry) {}
-
-  /// This field is used when the parsed option corresponds to a registered pass
-  /// or pass pipeline.
-  const PassRegistryEntry *registryEntry;
-
-  /// This field is set when instance specific pass options have been provided
-  /// on the command line.
-  StringRef options;
-
-  /// This field is used when the parsed option corresponds to an explicit
-  /// pipeline.
-  TextualPipeline pipeline;
-};
-} // end anonymous namespace
-
-namespace llvm {
-namespace cl {
-/// Define a valid OptionValue for the command line pass argument.
-template <>
-struct OptionValue<PassArgData> final
-    : OptionValueBase<PassArgData, /*isClass=*/true> {
-  OptionValue(const PassArgData &value) { this->setValue(value); }
-  OptionValue() = default;
-  void anchor() override {}
-
-  bool hasValue() const { return true; }
-  const PassArgData &getValue() const { return value; }
-  void setValue(const PassArgData &value) { this->value = value; }
-
-  PassArgData value;
-};
-} // end namespace cl
-} // end namespace llvm
-
-namespace {
-
-/// The name for the command line option used for parsing the textual pass
-/// pipeline.
-static constexpr llvm::StringLiteral passPipelineArg = "pass-pipeline";
-
-/// Adds command line option for each registered pass or pass pipeline, as well
-/// as textual pass pipelines.
-struct PassNameParser : public llvm::cl::parser<PassArgData> {
-  PassNameParser(llvm::cl::Option &opt) : llvm::cl::parser<PassArgData>(opt) {}
-
-  void initialize();
-  void printOptionInfo(const llvm::cl::Option &opt,
-                       size_t globalWidth) const override;
-  bool parse(llvm::cl::Option &opt, StringRef argName, StringRef arg,
-             PassArgData &value);
-};
-} // namespace
-
-void PassNameParser::initialize() {
-  llvm::cl::parser<PassArgData>::initialize();
-
-  /// Add an entry for the textual pass pipeline option.
-  addLiteralOption(passPipelineArg, PassArgData(),
-                   "A textual description of a pass pipeline to run");
-
-  /// Add the pass entries.
-  for (const auto &kv : *passRegistry) {
-    addLiteralOption(kv.second.getPassArgument(), &kv.second,
-                     kv.second.getPassDescription());
-  }
-  /// Add the pass pipeline entries.
-  for (const auto &kv : *passPipelineRegistry) {
-    addLiteralOption(kv.second.getPassArgument(), &kv.second,
-                     kv.second.getPassDescription());
-  }
-}
-
-void PassNameParser::printOptionInfo(const llvm::cl::Option &O,
-                                     size_t GlobalWidth) const {
-  PassNameParser *TP = const_cast<PassNameParser *>(this);
-  llvm::array_pod_sort(TP->Values.begin(), TP->Values.end(),
-                       [](const PassNameParser::OptionInfo *VT1,
-                          const PassNameParser::OptionInfo *VT2) {
-                         return VT1->Name.compare(VT2->Name);
-                       });
-  llvm::cl::parser<PassArgData>::printOptionInfo(O, GlobalWidth);
-}
-
-bool PassNameParser::parse(llvm::cl::Option &opt, StringRef argName,
-                           StringRef arg, PassArgData &value) {
-  // Handle the pipeline option explicitly.
-  if (argName == passPipelineArg)
-    return failed(value.pipeline.initialize(arg, llvm::errs()));
-
-  // Otherwise, default to the base for handling.
-  if (llvm::cl::parser<PassArgData>::parse(opt, argName, arg, value))
-    return true;
-  value.options = arg;
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// PassPipelineCLParser
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-namespace detail {
-struct PassPipelineCLParserImpl {
-  PassPipelineCLParserImpl(StringRef arg, StringRef description)
-      : passList(arg, llvm::cl::desc(description)) {
-    passList.setValueExpectedFlag(llvm::cl::ValueExpected::ValueOptional);
-  }
-
-  /// The set of passes and pass pipelines to run.
-  llvm::cl::list<PassArgData, bool, PassNameParser> passList;
-};
-} // end namespace detail
-} // end namespace mlir
-
-/// Construct a pass pipeline parser with the given command line description.
-PassPipelineCLParser::PassPipelineCLParser(StringRef arg, StringRef description)
-    : impl(std::make_unique<detail::PassPipelineCLParserImpl>(arg,
-                                                              description)) {}
-PassPipelineCLParser::~PassPipelineCLParser() {}
-
-/// Returns true if this parser contains any valid options to add.
-bool PassPipelineCLParser::hasAnyOccurrences() const {
-  return impl->passList.getNumOccurrences() != 0;
-}
-
-/// Returns true if the given pass registry entry was registered at the
-/// top-level of the parser, i.e. not within an explicit textual pipeline.
-bool PassPipelineCLParser::contains(const PassRegistryEntry *entry) const {
-  return llvm::any_of(impl->passList, [&](const PassArgData &data) {
-    return data.registryEntry == entry;
-  });
-}
-
-/// Adds the passes defined by this parser entry to the given pass manager.
-LogicalResult PassPipelineCLParser::addToPipeline(OpPassManager &pm) const {
-  for (auto &passIt : impl->passList) {
-    if (passIt.registryEntry) {
-      if (failed(passIt.registryEntry->addToPipeline(pm, passIt.options)))
-        return failure();
-    } else if (failed(passIt.pipeline.addToPipeline(pm))) {
-      return failure();
-    }
-  }
-  return success();
-}
diff --git a/third_party/mlir/lib/Pass/PassStatistics.cpp b/third_party/mlir/lib/Pass/PassStatistics.cpp
deleted file mode 100644
index 3c46b0bf255..00000000000
--- a/third_party/mlir/lib/Pass/PassStatistics.cpp
+++ /dev/null
@@ -1,258 +0,0 @@
-//===- PassStatistics.cpp -------------------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "PassDetail.h"
-#include "mlir/Pass/PassManager.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Format.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-constexpr llvm::StringLiteral kPassStatsDescription =
-    "... Pass statistics report ...";
-
-namespace {
-/// Information pertaining to a specific statistic.
-struct Statistic {
-  const char *name, *desc;
-  unsigned value;
-};
-} // end anonymous namespace
-
-/// Utility to print a pass entry in the statistics output.
-static void printPassEntry(raw_ostream &os, unsigned indent, StringRef pass,
-                           MutableArrayRef<Statistic> stats = llvm::None) {
-  os.indent(indent) << pass << "\n";
-  if (stats.empty())
-    return;
-
-  // Make sure to sort the statistics by name.
-  llvm::array_pod_sort(stats.begin(), stats.end(),
-                       [](const auto *lhs, const auto *rhs) {
-                         return llvm::array_pod_sort_comparator<const char *>(
-                             &lhs->name, &rhs->name);
-                       });
-
-  // Collect the largest name and value length from each of the statistics.
-  size_t largestName = 0, largestValue = 0;
-  for (auto &stat : stats) {
-    largestName = std::max(largestName, (size_t)strlen(stat.name));
-    largestValue =
-        std::max(largestValue, (size_t)llvm::utostr(stat.value).size());
-  }
-
-  // Print each of the statistics.
-  for (auto &stat : stats) {
-    os.indent(indent + 2) << llvm::format("(S) %*u %-*s - %s\n", largestValue,
-                                          stat.value, largestName, stat.name,
-                                          stat.desc);
-  }
-}
-
-/// Print the statistics results in a list form, where each pass is sorted by
-/// name.
-static void printResultsAsList(raw_ostream &os, OpPassManager &pm) {
-  llvm::StringMap<std::vector<Statistic>> mergedStats;
-  std::function<void(Pass *)> addStats = [&](Pass *pass) {
-    auto *adaptor = getAdaptorPassBase(pass);
-
-    // If this is not an adaptor, add the stats to the list if there are any.
-    if (!adaptor) {
-      auto statistics = pass->getStatistics();
-      if (statistics.empty())
-        return;
-
-      auto &passEntry = mergedStats[pass->getName()];
-      if (passEntry.empty()) {
-        for (Pass::Statistic *it : pass->getStatistics())
-          passEntry.push_back({it->getName(), it->getDesc(), it->getValue()});
-      } else {
-        for (auto &it : llvm::enumerate(pass->getStatistics()))
-          passEntry[it.index()].value += it.value()->getValue();
-      }
-      return;
-    }
-
-    // Otherwise, recursively add each of the children.
-    for (auto &mgr : adaptor->getPassManagers())
-      for (Pass &pass : mgr.getPasses())
-        addStats(&pass);
-  };
-  for (Pass &pass : pm.getPasses())
-    addStats(&pass);
-
-  // Sort the statistics by pass name and then by record name.
-  std::vector<std::pair<StringRef, std::vector<Statistic>>> passAndStatistics;
-  for (auto &passIt : mergedStats)
-    passAndStatistics.push_back({passIt.first(), std::move(passIt.second)});
-  llvm::sort(passAndStatistics, [](const auto &lhs, const auto &rhs) {
-    return lhs.first.compare(rhs.first) < 0;
-  });
-
-  // Print the timing information sequentially.
-  for (auto &statData : passAndStatistics)
-    printPassEntry(os, /*indent=*/2, statData.first, statData.second);
-}
-
-/// Print the results in pipeline mode that mirrors the internal pass manager
-/// structure.
-static void printResultsAsPipeline(raw_ostream &os, OpPassManager &pm) {
-  std::function<void(unsigned, Pass *)> printPass = [&](unsigned indent,
-                                                        Pass *pass) {
-    // Handle the case of an adaptor pass.
-    if (auto *adaptor = getAdaptorPassBase(pass)) {
-      // If this adaptor has more than one internal pipeline, print an entry for
-      // it.
-      auto mgrs = adaptor->getPassManagers();
-      if (mgrs.size() > 1) {
-        printPassEntry(os, indent, adaptor->getName());
-        indent += 2;
-      }
-
-      // Print each of the children passes.
-      for (OpPassManager &mgr : mgrs) {
-        auto name = ("'" + mgr.getOpName().getStringRef() + "' Pipeline").str();
-        printPassEntry(os, indent, name);
-        for (Pass &pass : mgr.getPasses())
-          printPass(indent + 2, &pass);
-      }
-      return;
-    }
-
-    // Otherwise, we print the statistics for this pass.
-    std::vector<Statistic> stats;
-    for (Pass::Statistic *stat : pass->getStatistics())
-      stats.push_back({stat->getName(), stat->getDesc(), stat->getValue()});
-    printPassEntry(os, indent, pass->getName(), stats);
-  };
-  for (Pass &pass : pm.getPasses())
-    printPass(/*indent=*/0, &pass);
-}
-
-void printStatistics(OpPassManager &pm, PassDisplayMode displayMode) {
-  auto os = llvm::CreateInfoOutputFile();
-
-  // Print the stats header.
-  *os << "===" << std::string(73, '-') << "===\n";
-  // Figure out how many spaces for the description name.
-  unsigned padding = (80 - kPassStatsDescription.size()) / 2;
-  os->indent(padding) << kPassStatsDescription << '\n';
-  *os << "===" << std::string(73, '-') << "===\n";
-
-  // Defer to a specialized printer for each display mode.
-  switch (displayMode) {
-  case PassDisplayMode::List:
-    printResultsAsList(*os, pm);
-    break;
-  case PassDisplayMode::Pipeline:
-    printResultsAsPipeline(*os, pm);
-    break;
-  }
-  *os << "\n";
-  os->flush();
-}
-
-//===----------------------------------------------------------------------===//
-// PassStatistics
-//===----------------------------------------------------------------------===//
-
-Pass::Statistic::Statistic(Pass *owner, const char *name,
-                           const char *description)
-    : llvm::Statistic{/*DebugType=*/"", name, description} {
-#if LLVM_ENABLE_STATS
-  // Always set the 'initialized' bit to true so that this statistic isn't
-  // placed in the static registry.
-  // TODO: This is sort of hack as `llvm::Statistic`s can't be setup to avoid
-  // automatic registration with the global registry. We should either add
-  // support for this in LLVM, or just write our own statistics classes.
-  Initialized = true;
-#endif
-
-  // Register this statistic with the parent.
-  owner->statistics.push_back(this);
-}
-
-auto Pass::Statistic::operator=(unsigned value) -> Statistic & {
-  llvm::Statistic::operator=(value);
-  return *this;
-}
-
-//===----------------------------------------------------------------------===//
-// PassManager
-//===----------------------------------------------------------------------===//
-
-/// Merge the pass statistics of this class into 'other'.
-void OpPassManager::mergeStatisticsInto(OpPassManager &other) {
-  auto passes = getPasses(), otherPasses = other.getPasses();
-
-  for (auto passPair : llvm::zip(passes, otherPasses)) {
-    Pass &pass = std::get<0>(passPair), &otherPass = std::get<1>(passPair);
-
-    // If this is an adaptor, then recursively merge the pass managers.
-    if (auto *adaptorPass = getAdaptorPassBase(&pass)) {
-      auto *otherAdaptorPass = getAdaptorPassBase(&otherPass);
-      for (auto mgrs : llvm::zip(adaptorPass->getPassManagers(),
-                                 otherAdaptorPass->getPassManagers()))
-        std::get<0>(mgrs).mergeStatisticsInto(std::get<1>(mgrs));
-      continue;
-    }
-    // Otherwise, merge the statistics for the current pass.
-    assert(pass.statistics.size() == otherPass.statistics.size());
-    for (unsigned i = 0, e = pass.statistics.size(); i != e; ++i) {
-      assert(pass.statistics[i]->getName() ==
-             StringRef(otherPass.statistics[i]->getName()));
-      *otherPass.statistics[i] += *pass.statistics[i];
-      *pass.statistics[i] = 0;
-    }
-  }
-}
-
-/// Prepare the statistics of passes within the given pass manager for
-/// consumption(e.g. dumping).
-static void prepareStatistics(OpPassManager &pm) {
-  for (Pass &pass : pm.getPasses()) {
-    OpToOpPassAdaptorBase *adaptor = getAdaptorPassBase(&pass);
-    if (!adaptor)
-      continue;
-    MutableArrayRef<OpPassManager> nestedPms = adaptor->getPassManagers();
-
-    // If this is a parallel adaptor, merge the statistics from the async
-    // pass managers into the main nested pass managers.
-    if (auto *parallelAdaptor = dyn_cast<OpToOpPassAdaptorParallel>(&pass)) {
-      for (auto &asyncPM : parallelAdaptor->getParallelPassManagers()) {
-        for (unsigned i = 0, e = asyncPM.size(); i != e; ++i)
-          asyncPM[i].mergeStatisticsInto(nestedPms[i]);
-      }
-    }
-
-    // Prepare the statistics of each of the nested passes.
-    for (OpPassManager &nestedPM : nestedPms)
-      prepareStatistics(nestedPM);
-  }
-}
-
-/// Dump the statistics of the passes within this pass manager.
-void PassManager::dumpStatistics() {
-  prepareStatistics(*this);
-  printStatistics(*this, *passStatisticsMode);
-}
-
-/// Dump the statistics for each pass after running.
-void PassManager::enableStatistics(PassDisplayMode displayMode) {
-  passStatisticsMode = displayMode;
-}
diff --git a/third_party/mlir/lib/Pass/PassTiming.cpp b/third_party/mlir/lib/Pass/PassTiming.cpp
deleted file mode 100644
index dd193a4d9a9..00000000000
--- a/third_party/mlir/lib/Pass/PassTiming.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-//===- PassTiming.cpp -----------------------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "PassDetail.h"
-#include "mlir/Pass/PassManager.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Threading.h"
-#include <chrono>
-
-using namespace mlir;
-using namespace mlir::detail;
-
-constexpr llvm::StringLiteral kPassTimingDescription =
-    "... Pass execution timing report ...";
-
-namespace {
-/// Simple record class to record timing information.
-struct TimeRecord {
-  TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
-
-  TimeRecord &operator+=(const TimeRecord &other) {
-    wall += other.wall;
-    user += other.user;
-    return *this;
-  }
-
-  /// Print the current time record to 'os', with a breakdown showing
-  /// contributions to the give 'total' time record.
-  void print(raw_ostream &os, const TimeRecord &total) {
-    if (total.user != total.wall)
-      os << llvm::format("  %7.4f (%5.1f%%)  ", user,
-                         100.0 * user / total.user);
-    os << llvm::format("  %7.4f (%5.1f%%)  ", wall, 100.0 * wall / total.wall);
-  }
-
-  double wall, user;
-};
-
-/// An enumeration of the different types of timers.
-enum class TimerKind {
-  /// This timer represents an ordered collection of pass timers, corresponding
-  /// to a pass pipeline.
-  Pipeline,
-
-  /// This timer represents a collection of pipeline timers.
-  PipelineCollection,
-
-  /// This timer represents an analysis or pass timer.
-  PassOrAnalysis
-};
-
-struct Timer {
-  explicit Timer(std::string &&name, TimerKind kind)
-      : name(std::move(name)), kind(kind) {}
-
-  /// Start the timer.
-  void start() { startTime = std::chrono::system_clock::now(); }
-
-  /// Stop the timer.
-  void stop() {
-    auto newTime = std::chrono::system_clock::now() - startTime;
-    wallTime += newTime;
-    userTime += newTime;
-  }
-
-  /// Get or create a child timer with the provided name and id.
-  Timer *getChildTimer(const void *id, TimerKind kind,
-                       std::function<std::string()> &&nameBuilder) {
-    auto &child = children[id];
-    if (!child)
-      child = std::make_unique<Timer>(nameBuilder(), kind);
-    return child.get();
-  }
-
-  /// Returns the total time for this timer in seconds.
-  TimeRecord getTotalTime() {
-    // If this is a pass or analysis timer, use the recorded time directly.
-    if (kind == TimerKind::PassOrAnalysis) {
-      return TimeRecord(
-          std::chrono::duration_cast<std::chrono::duration<double>>(wallTime)
-              .count(),
-          std::chrono::duration_cast<std::chrono::duration<double>>(userTime)
-              .count());
-    }
-
-    // Otherwise, accumulate the timing from each of the children.
-    TimeRecord totalTime;
-    for (auto &child : children)
-      totalTime += child.second->getTotalTime();
-    return totalTime;
-  }
-
-  /// A map of unique identifiers to child timers.
-  using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<Timer>>;
-
-  /// Merge the timing data from 'other' into this timer.
-  void merge(Timer &&other) {
-    if (wallTime < other.wallTime)
-      wallTime = other.wallTime;
-    userTime += other.userTime;
-    mergeChildren(std::move(other.children));
-  }
-
-  /// Merge the timer children in 'otherChildren' with the children of this
-  /// timer.
-  void mergeChildren(ChildrenMap &&otherChildren) {
-    // Check for an empty children list.
-    if (children.empty()) {
-      children = std::move(otherChildren);
-      return;
-    }
-
-    // Pipeline merges are handled separately as the children are merged
-    // lexicographically.
-    if (kind == TimerKind::Pipeline) {
-      assert(children.size() == otherChildren.size() &&
-             "pipeline merge requires the same number of children");
-      for (auto it : llvm::zip(children, otherChildren))
-        std::get<0>(it).second->merge(std::move(*std::get<1>(it).second));
-      return;
-    }
-
-    // Otherwise, we merge children based upon their timer key.
-    for (auto &otherChild : otherChildren)
-      mergeChild(std::move(otherChild));
-  }
-
-  /// Merge in the given child timer and id into this timer.
-  void mergeChild(ChildrenMap::value_type &&childIt) {
-    auto &child = children[childIt.first];
-    if (!child)
-      child = std::move(childIt.second);
-    else
-      child->merge(std::move(*childIt.second));
-  }
-
-  /// Raw timing information.
-  std::chrono::time_point<std::chrono::system_clock> startTime;
-  std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
-  std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
-
-  /// A map of unique identifiers to child timers.
-  ChildrenMap children;
-
-  /// A descriptive name for this timer.
-  std::string name;
-
-  /// The type of timer this instance represents.
-  TimerKind kind;
-};
-
-struct PassTiming : public PassInstrumentation {
-  PassTiming(PassDisplayMode displayMode) : displayMode(displayMode) {}
-  ~PassTiming() override { print(); }
-
-  /// Setup the instrumentation hooks.
-  void runBeforePipeline(const OperationName &name,
-                         const PipelineParentInfo &parentInfo) override;
-  void runAfterPipeline(const OperationName &name,
-                        const PipelineParentInfo &parentInfo) override;
-  void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); }
-  void runAfterPass(Pass *pass, Operation *) override;
-  void runAfterPassFailed(Pass *pass, Operation *op) override {
-    runAfterPass(pass, op);
-  }
-  void runBeforeAnalysis(llvm::StringRef name, AnalysisID *id,
-                         Operation *) override {
-    startAnalysisTimer(name, id);
-  }
-  void runAfterAnalysis(llvm::StringRef, AnalysisID *, Operation *) override;
-
-  /// Print and clear the timing results.
-  void print();
-
-  /// Start a new timer for the given pass.
-  void startPassTimer(Pass *pass);
-
-  /// Start a new timer for the given analysis.
-  void startAnalysisTimer(llvm::StringRef name, AnalysisID *id);
-
-  /// Pop the last active timer for the current thread.
-  Timer *popLastActiveTimer() {
-    auto tid = llvm::get_threadid();
-    auto &activeTimers = activeThreadTimers[tid];
-    assert(!activeTimers.empty() && "expected active timer");
-    return activeTimers.pop_back_val();
-  }
-
-  /// Print the timing result in list mode.
-  void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime);
-
-  /// Print the timing result in pipeline mode.
-  void printResultsAsPipeline(raw_ostream &os, Timer *root,
-                              TimeRecord totalTime);
-
-  /// Returns a timer for the provided identifier and name.
-  Timer *getTimer(const void *id, TimerKind kind,
-                  std::function<std::string()> &&nameBuilder) {
-    auto tid = llvm::get_threadid();
-
-    // If there is no active timer then add to the root timer.
-    auto &activeTimers = activeThreadTimers[tid];
-    Timer *parentTimer;
-    if (activeTimers.empty()) {
-      auto &rootTimer = rootTimers[tid];
-      if (!rootTimer)
-        rootTimer = std::make_unique<Timer>("root", TimerKind::Pipeline);
-      parentTimer = rootTimer.get();
-    } else {
-      // Otherwise, add this to the active timer.
-      parentTimer = activeTimers.back();
-    }
-
-    auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder));
-    activeTimers.push_back(timer);
-    return timer;
-  }
-
-  /// The root top level timers for each thread.
-  DenseMap<uint64_t, std::unique_ptr<Timer>> rootTimers;
-
-  /// A stack of the currently active pass timers per thread.
-  DenseMap<uint64_t, SmallVector<Timer *, 4>> activeThreadTimers;
-
-  /// The display mode to use when printing the timing results.
-  PassDisplayMode displayMode;
-
-  /// A mapping of pipeline timers that need to be merged into the parent
-  /// collection. The timers are mapped to the parent info to merge into.
-  DenseMap<PipelineParentInfo, SmallVector<Timer::ChildrenMap::value_type, 4>>
-      pipelinesToMerge;
-};
-} // end anonymous namespace
-
-void PassTiming::runBeforePipeline(const OperationName &name,
-                                   const PipelineParentInfo &parentInfo) {
-  // We don't actually want to time the piplelines, they gather their total
-  // from their held passes.
-  getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline,
-           [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); });
-}
-
-void PassTiming::runAfterPipeline(const OperationName &name,
-                                  const PipelineParentInfo &parentInfo) {
-  // Pop the timer for the pipeline.
-  auto tid = llvm::get_threadid();
-  auto &activeTimers = activeThreadTimers[tid];
-  assert(!activeTimers.empty() && "expected active timer");
-  activeTimers.pop_back();
-
-  // If the current thread is the same as the parent, there is nothing left to
-  // do.
-  if (tid == parentInfo.parentThreadID)
-    return;
-
-  // Otherwise, mark the pipeline timer for merging into the correct parent
-  // thread.
-  assert(activeTimers.empty() && "expected parent timer to be root");
-  auto *parentTimer = rootTimers[tid].get();
-  assert(parentTimer->children.size() == 1 &&
-         parentTimer->children.count(name.getAsOpaquePointer()) &&
-         "expected a single pipeline timer");
-  pipelinesToMerge[parentInfo].push_back(
-      std::move(*parentTimer->children.begin()));
-  rootTimers.erase(tid);
-}
-
-/// Start a new timer for the given pass.
-void PassTiming::startPassTimer(Pass *pass) {
-  auto kind = isAdaptorPass(pass) ? TimerKind::PipelineCollection
-                                  : TimerKind::PassOrAnalysis;
-  Timer *timer = getTimer(pass, kind, [pass]() -> std::string {
-    if (auto *adaptor = getAdaptorPassBase(pass))
-      return adaptor->getName();
-    return pass->getName();
-  });
-
-  // We don't actually want to time the adaptor passes, they gather their total
-  // from their held passes.
-  if (!isAdaptorPass(pass))
-    timer->start();
-}
-
-/// Start a new timer for the given analysis.
-void PassTiming::startAnalysisTimer(llvm::StringRef name, AnalysisID *id) {
-  Timer *timer = getTimer(id, TimerKind::PassOrAnalysis,
-                          [name] { return "(A) " + name.str(); });
-  timer->start();
-}
-
-/// Stop a pass timer.
-void PassTiming::runAfterPass(Pass *pass, Operation *) {
-  Timer *timer = popLastActiveTimer();
-
-  // If this is an OpToOpPassAdaptorParallel, then we need to merge in the
-  // timing data for the pipelines running on other threads.
-  if (isa<OpToOpPassAdaptorParallel>(pass)) {
-    auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass});
-    if (toMerge != pipelinesToMerge.end()) {
-      for (auto &it : toMerge->second)
-        timer->mergeChild(std::move(it));
-      pipelinesToMerge.erase(toMerge);
-    }
-    return;
-  }
-
-  // Adaptor passes aren't timed directly, so we don't need to stop their
-  // timers.
-  if (!isAdaptorPass(pass))
-    timer->stop();
-}
-
-/// Stop a timer.
-void PassTiming::runAfterAnalysis(llvm::StringRef, AnalysisID *, Operation *) {
-  popLastActiveTimer()->stop();
-}
-
-/// Utility to print the timer heading information.
-static void printTimerHeader(llvm::raw_ostream &os, TimeRecord total) {
-  os << "===" << std::string(73, '-') << "===\n";
-  // Figure out how many spaces to description name.
-  unsigned padding = (80 - kPassTimingDescription.size()) / 2;
-  os.indent(padding) << kPassTimingDescription << '\n';
-  os << "===" << std::string(73, '-') << "===\n";
-
-  // Print the total time followed by the section headers.
-  os << llvm::format("  Total Execution Time: %5.4f seconds\n\n", total.wall);
-  if (total.user != total.wall)
-    os << "   ---User Time---";
-  os << "   ---Wall Time---  --- Name ---\n";
-}
-
-/// Utility to print a single line entry in the timer output.
-static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
-                           TimeRecord time, TimeRecord totalTime) {
-  time.print(os, totalTime);
-  os.indent(indent) << name << "\n";
-}
-
-/// Print out the current timing information.
-void PassTiming::print() {
-  // Don't print anything if there is no timing data.
-  if (rootTimers.empty())
-    return;
-
-  assert(rootTimers.size() == 1 && "expected one remaining root timer");
-  auto &rootTimer = rootTimers.begin()->second;
-  auto os = llvm::CreateInfoOutputFile();
-
-  // Print the timer header.
-  TimeRecord totalTime = rootTimer->getTotalTime();
-  printTimerHeader(*os, totalTime);
-
-  // Defer to a specialized printer for each display mode.
-  switch (displayMode) {
-  case PassDisplayMode::List:
-    printResultsAsList(*os, rootTimer.get(), totalTime);
-    break;
-  case PassDisplayMode::Pipeline:
-    printResultsAsPipeline(*os, rootTimer.get(), totalTime);
-    break;
-  }
-  printTimeEntry(*os, 0, "Total", totalTime, totalTime);
-  os->flush();
-
-  // Reset root timers.
-  rootTimers.clear();
-  activeThreadTimers.clear();
-}
-
-/// Print the timing result in list mode.
-void PassTiming::printResultsAsList(raw_ostream &os, Timer *root,
-                                    TimeRecord totalTime) {
-  llvm::StringMap<TimeRecord> mergedTimings;
-
-  std::function<void(Timer *)> addTimer = [&](Timer *timer) {
-    // Only add timing information for passes and analyses.
-    if (timer->kind == TimerKind::PassOrAnalysis)
-      mergedTimings[timer->name] += timer->getTotalTime();
-    for (auto &children : timer->children)
-      addTimer(children.second.get());
-  };
-
-  // Add each of the top level timers.
-  for (auto &topLevelTimer : root->children)
-    addTimer(topLevelTimer.second.get());
-
-  // Sort the timing information by wall time.
-  std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
-  for (auto &it : mergedTimings)
-    timerNameAndTime.emplace_back(it.first(), it.second);
-  llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(),
-                       [](const std::pair<StringRef, TimeRecord> *lhs,
-                          const std::pair<StringRef, TimeRecord> *rhs) {
-                         return llvm::array_pod_sort_comparator<double>(
-                             &rhs->second.wall, &lhs->second.wall);
-                       });
-
-  // Print the timing information sequentially.
-  for (auto &timeData : timerNameAndTime)
-    printTimeEntry(os, 0, timeData.first, timeData.second, totalTime);
-}
-
-/// Print the timing result in pipeline mode.
-void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root,
-                                        TimeRecord totalTime) {
-  std::function<void(unsigned, Timer *)> printTimer = [&](unsigned indent,
-                                                          Timer *timer) {
-    // If this is a timer for a pipeline collection and the collection only has
-    // one pipeline child, then only print the child.
-    if (timer->kind == TimerKind::PipelineCollection &&
-        timer->children.size() == 1)
-      return printTimer(indent, timer->children.begin()->second.get());
-
-    printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime);
-
-    // If this timer is a pipeline, then print the children in-order.
-    if (timer->kind == TimerKind::Pipeline) {
-      for (auto &child : timer->children)
-        printTimer(indent + 2, child.second.get());
-      return;
-    }
-
-    // Otherwise, sort the children by name to give a deterministic ordering
-    // when emitting the time.
-    SmallVector<Timer *, 4> children;
-    children.reserve(timer->children.size());
-    for (auto &child : timer->children)
-      children.push_back(child.second.get());
-    llvm::array_pod_sort(children.begin(), children.end(),
-                         [](Timer *const *lhs, Timer *const *rhs) {
-                           return (*lhs)->name.compare((*rhs)->name);
-                         });
-    for (auto &child : children)
-      printTimer(indent + 2, child);
-  };
-
-  // Print each of the top level timers.
-  for (auto &topLevelTimer : root->children)
-    printTimer(0, topLevelTimer.second.get());
-}
-
-//===----------------------------------------------------------------------===//
-// PassManager
-//===----------------------------------------------------------------------===//
-
-/// Add an instrumentation to time the execution of passes and the computation
-/// of analyses.
-void PassManager::enableTiming(PassDisplayMode displayMode) {
-  // Check if pass timing is already enabled.
-  if (passTiming)
-    return;
-  addInstrumentation(std::make_unique<PassTiming>(displayMode));
-  passTiming = true;
-}
diff --git a/third_party/mlir/lib/Quantizer/CMakeLists.txt b/third_party/mlir/lib/Quantizer/CMakeLists.txt
deleted file mode 100644
index bc157d0e979..00000000000
--- a/third_party/mlir/lib/Quantizer/CMakeLists.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-# Support.
-add_llvm_library(MLIRQuantizerSupport
-  Support/Configuration.cpp
-  Support/ConstraintAnalysisGraph.cpp
-  Support/Metadata.cpp
-  Support/Statistics.cpp
-  Support/TypeUtils.cpp
-  Support/UniformConstraints.cpp
-  Support/UniformSolvers.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  )
-add_dependencies(MLIRQuantizerSupport
-                 MLIRIR
-                 MLIRQuantOps
-                 MLIRSupport
-                 MLIRStandardOps)
-
-# Configurations.
-add_llvm_library(MLIRQuantizerFxpMathConfig
-  Configurations/FxpMathConfig.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  )
-add_dependencies(MLIRQuantizerFxpMathConfig
-                 MLIRFxpMathOpsIncGen
-                 MLIRQuantizerSupport)
-
-# Transforms.
-add_llvm_library(MLIRQuantizerTransforms
-  Transforms/AddDefaultStatsTestPass.cpp
-  Transforms/InferQuantizedTypesPass.cpp
-  Transforms/RemoveInstrumentationPass.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  )
-add_dependencies(MLIRQuantizerTransforms
-  MLIRQuantizerFxpMathConfig
-  MLIRQuantizerSupport
-  MLIRPass)
-target_link_libraries(MLIRQuantizerTransforms
-  MLIRQuantizerFxpMathConfig
-  MLIRQuantizerSupport
-  MLIRPass)
diff --git a/third_party/mlir/lib/Quantizer/Configurations/FxpMathConfig.cpp b/third_party/mlir/lib/Quantizer/Configurations/FxpMathConfig.cpp
deleted file mode 100644
index 94e364238c5..00000000000
--- a/third_party/mlir/lib/Quantizer/Configurations/FxpMathConfig.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
-//===- FxpMathConfig.cpp - Reference fixed point config -------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a TargetConfiguration for reference fixed-point math
-// quantization scheme based on the FxpMathOps (plus a small category of
-// extension ops that can be added from other dialects).
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Quantizer/Configurations/FxpMathConfig.h"
-
-#include "mlir/Dialect/FxpMathOps/FxpMathOps.h"
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraph.h"
-#include "mlir/Quantizer/Support/Metadata.h"
-#include "mlir/Quantizer/Support/Statistics.h"
-#include "mlir/Quantizer/Support/UniformConstraints.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-using namespace mlir::fxpmath;
-using namespace mlir::quant;
-using namespace std::placeholders;
-
-namespace {
-
-struct FxpMathTargetConfigImpl : public FxpMathTargetConfig {
-  FxpMathTargetConfigImpl(SolverContext &context)
-      : FxpMathTargetConfig(context) {
-    Builder b(&context.getMlirContext());
-    IntegerType i8Type = b.getIntegerType(8);
-    IntegerType i16Type = b.getIntegerType(16);
-    IntegerType i32Type = b.getIntegerType(32);
-
-    q8 = addCandidateType(
-        AnyQuantizedType::get(QuantizationFlags::Signed, i8Type, nullptr,
-                              std::numeric_limits<int8_t>::min(),
-                              std::numeric_limits<int8_t>::max()),
-        CandidateQuantizedType::Scheme::UniformPerLayer);
-    q16 = addCandidateType(
-        AnyQuantizedType::get(QuantizationFlags::Signed, i16Type, nullptr,
-                              std::numeric_limits<int16_t>::min(),
-                              std::numeric_limits<int16_t>::max()),
-        CandidateQuantizedType::Scheme::UniformPerLayer);
-    q32ExplicitFixedPoint = addCandidateType(
-        AnyQuantizedType::get(QuantizationFlags::Signed, i32Type, nullptr,
-                              std::numeric_limits<int32_t>::min(),
-                              std::numeric_limits<int32_t>::max()),
-        CandidateQuantizedType::Scheme::UniformExplicitFixedPointScale);
-
-    // Op handlers.
-    addOpHandler<ConstantOp>(
-        std::bind(&FxpMathTargetConfigImpl::handleConstant, this, _1, _2));
-    addOpHandler<ReturnOp>(
-        std::bind(&FxpMathTargetConfigImpl::handleTerminal, this, _1, _2));
-    addOpHandler<quant::StatisticsOp>(
-        std::bind(&FxpMathTargetConfigImpl::handleStats, this, _1, _2));
-
-    // FxpMathOps.
-    addOpHandler<RealAddEwOp>(
-        std::bind(&FxpMathTargetConfigImpl::handleAdd, this, _1, _2));
-    addOpHandler<RealMulEwOp>(
-        std::bind(&FxpMathTargetConfigImpl::handleMul, this, _1, _2));
-    addOpHandler<RealMatMulOp>(
-        std::bind(&FxpMathTargetConfigImpl::handleMatMul, this, _1, _2));
-    addOpHandler<RealMatMulBiasOp>(
-        std::bind(&FxpMathTargetConfigImpl::handleMatMulBias, this, _1, _2));
-
-    // Require stats ops.
-    addRequireStatsOp<RealAddEwOp>();
-    addRequireStatsOp<RealSubEwOp>();
-    addRequireStatsOp<RealDivEwOp>();
-    addRequireStatsOp<RealMulEwOp>();
-    addRequireStatsOp<RealMatMulOp>();
-    addRequireStatsOp<RealMatMulBiasOp>();
-  }
-
-  bool isHandledType(Type t) const final {
-    if (t.isa<FloatType>())
-      return true;
-    return (t.isa<VectorType>() || t.isa<TensorType>()) &&
-           t.cast<ShapedType>().getElementType().isa<FloatType>();
-  }
-
-  void finalizeAnchors(CAGSlice &cag) const override {
-    cag.enumerateImpliedConnections(
-        [&](CAGAnchorNode *from, CAGAnchorNode *to) {
-          UniformConstraintsBuilder(cag).coupleAnchors(from, to);
-        });
-  }
-
-  void addValueIdentityOpByName(StringRef opName) override {
-    addOpHandlerByName(
-        opName,
-        std::bind(&FxpMathTargetConfigImpl::handleValueIdentity, this, _1, _2));
-  }
-
-  void handleValueIdentity(Operation *op, CAGSlice &cag) const {
-    assert(op->getNumResults() == 1);
-    if (!isHandledType(op->getResult(0)->getType()))
-      return;
-
-    auto resultNode = cag.getResultAnchor(op, 0);
-    resultNode->setTypeTransformRule(
-        CAGAnchorNode::TypeTransformRule::DirectStorage);
-
-    for (unsigned opIdx = 0, e = op->getNumOperands(); opIdx < e; ++opIdx) {
-      if (!isHandledType(op->getOperand(opIdx)->getType()))
-        continue;
-      auto operandNode = cag.getOperandAnchor(op, opIdx);
-      operandNode->setTypeTransformRule(
-          CAGAnchorNode::TypeTransformRule::DirectStorage);
-      UniformConstraintsBuilder(cag).coupleAnchors(operandNode, resultNode);
-    }
-  }
-
-  void handleConstant(Operation *op, CAGSlice &cag) const {
-    if (!isHandledType(op->getResult(0)->getType()))
-      return;
-
-    auto resultNode = cag.getResultAnchor(op, 0);
-    resultNode->setTypeTransformRule(
-        CAGAnchorNode::TypeTransformRule::ExpressedOnly);
-    Attribute valueAttr;
-    if (!matchPattern(op, m_Constant(&valueAttr))) {
-      return;
-    }
-
-    AttributeTensorStatistics stats(valueAttr);
-    TensorAxisStatistics layerStats;
-    if (!stats.get(layerStats)) {
-      op->emitOpError("could not compute statistics");
-      return;
-    }
-
-    UniformConstraintsBuilder(cag).applyStats(resultNode, layerStats);
-  }
-
-  void handleTerminal(Operation *op, CAGSlice &cag) const {
-    if (!isHandledType(op->getOperand(0)->getType()))
-      return;
-    auto operandNode = cag.getOperandAnchor(op, 0);
-    operandNode->setTypeTransformRule(
-        CAGAnchorNode::TypeTransformRule::ExpressedOnly);
-  }
-
-  void handleStats(Operation *op, CAGSlice &cag) const {
-    if (!isHandledType(op->getResult(0)->getType()))
-      return;
-
-    auto argNode = cag.getOperandAnchor(op, 0);
-    auto resultNode = cag.getResultAnchor(op, 0);
-    UniformConstraintsBuilder(cag).coupleAnchors(argNode, resultNode);
-
-    TensorAxisStatistics layerStats;
-    auto statsOp = cast<quant::StatisticsOp>(op);
-    auto layerStatsAttr = statsOp.layerStats();
-    layerStats.minValue =
-        layerStatsAttr.getValue<FloatAttr>(0).getValueAsDouble();
-    layerStats.maxValue =
-        layerStatsAttr.getValue<FloatAttr>(1).getValueAsDouble();
-    UniformConstraintsBuilder(cag).applyStats(resultNode, layerStats);
-  }
-
-  void handleAdd(Operation *op, CAGSlice &cag) const {
-    if (!isHandledType(op->getResult(0)->getType()))
-      return;
-
-    auto lhs = cag.getOperandAnchor(op, 0);
-    auto rhs = cag.getOperandAnchor(op, 1);
-    auto resultNode = cag.getResultAnchor(op, 0);
-    // Add supports 8/16 bit math.
-    llvm::SmallBitVector disableMask =
-        getCandidateTypeDisabledExceptMask({q8, q16});
-    lhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    rhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    resultNode->getUniformMetadata().disabledCandidateTypes = disableMask;
-    // NOTE: We couple the add such that the scale/zeroPoint match between
-    // both args and the result. This is overly constrained in that it is
-    // possible to write efficient add kernels with a bit more freedom (i.e.
-    // zeroPoints can vary, scales can differ by a power of two, etc).
-    // However, fully coupled yields the simples solutions on the fast path.
-    // Further efficiency can be had by constraining the zeroPoint to 0, but
-    // there isn't a constraint for this yet (and there are tradeoffs).
-    UniformConstraintsBuilder(cag).coupleAnchors(lhs, resultNode);
-    UniformConstraintsBuilder(cag).coupleAnchors(rhs, resultNode);
-    addRealMathOptionalConstraints(op, resultNode, cag);
-  }
-
-  void handleMul(Operation *op, CAGSlice &cag) const {
-    if (!isHandledType(op->getResult(0)->getType()))
-      return;
-
-    auto lhs = cag.getOperandAnchor(op, 0);
-    auto rhs = cag.getOperandAnchor(op, 1);
-    auto resultNode = cag.getResultAnchor(op, 0);
-    // Mul supports 8/16 bit math.
-    llvm::SmallBitVector disableMask =
-        getCandidateTypeDisabledExceptMask({q8, q16});
-    lhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    rhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    resultNode->getUniformMetadata().disabledCandidateTypes = disableMask;
-    addRealMathOptionalConstraints(op, resultNode, cag);
-  }
-
-  void handleMatMul(Operation *op, CAGSlice &cag) const {
-    if (!isHandledType(op->getResult(0)->getType()))
-      return;
-
-    auto lhs = cag.getOperandAnchor(op, 0);
-    auto rhs = cag.getOperandAnchor(op, 1);
-    auto resultNode = cag.getResultAnchor(op, 0);
-    // Mul supports 8/16 bit math.
-    llvm::SmallBitVector disableMask =
-        getCandidateTypeDisabledExceptMask({q8, q16});
-    lhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    rhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    resultNode->getUniformMetadata().disabledCandidateTypes = disableMask;
-    addRealMathOptionalConstraints(op, resultNode, cag);
-  }
-
-  void handleMatMulBias(Operation *op, CAGSlice &cag) const {
-    if (!isHandledType(op->getResult(0)->getType()))
-      return;
-
-    auto lhs = cag.getOperandAnchor(op, 0);
-    auto rhs = cag.getOperandAnchor(op, 1);
-    auto bias = cag.getOperandAnchor(op, 2);
-    bias->getUniformMetadata().disabledCandidateTypes =
-        getCandidateTypeDisabledExceptMask({q32ExplicitFixedPoint});
-
-    auto resultNode = cag.getResultAnchor(op, 0);
-    UniformConstraintsBuilder(cag).propagateExplicitScale(resultNode, bias);
-
-    // Mul supports 8/16 bit math.
-    llvm::SmallBitVector disableMask =
-        getCandidateTypeDisabledExceptMask({q8, q16});
-    lhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    rhs->getUniformMetadata().disabledCandidateTypes = disableMask;
-    resultNode->getUniformMetadata().disabledCandidateTypes = disableMask;
-    addRealMathOptionalConstraints(op, resultNode, cag);
-  }
-
-  void addRealMathOptionalConstraints(Operation *op, CAGAnchorNode *anchor,
-                                      CAGSlice &cag) const {
-    // TODO: It would be nice if these all extended some base trait instead
-    // of requiring name lookup.
-    auto clampMinAttr = op->getAttrOfType<FloatAttr>("clamp_min");
-    auto clampMaxAttr = op->getAttrOfType<FloatAttr>("clamp_max");
-
-    if (clampMinAttr || clampMaxAttr) {
-      auto nan = APFloat::getQNaN(APFloat::IEEEdouble());
-      auto clampMin = clampMinAttr ? clampMinAttr.getValue() : nan;
-      auto clampMax = clampMaxAttr ? clampMaxAttr.getValue() : nan;
-      UniformConstraintsBuilder(cag).clamp(anchor, clampMin, clampMax);
-    }
-  }
-
-  unsigned q8;
-  unsigned q16;
-  unsigned q32ExplicitFixedPoint;
-};
-
-} // anonymous namespace
-
-std::unique_ptr<FxpMathTargetConfig>
-FxpMathTargetConfig::create(SolverContext &context) {
-  return std::make_unique<FxpMathTargetConfigImpl>(context);
-}
diff --git a/third_party/mlir/lib/Quantizer/Support/Configuration.cpp b/third_party/mlir/lib/Quantizer/Support/Configuration.cpp
deleted file mode 100644
index 78a74514f8b..00000000000
--- a/third_party/mlir/lib/Quantizer/Support/Configuration.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//===- Configuration.cpp - Configuration object base classes --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Quantizer/Support/Configuration.h"
-
-#include <limits>
-
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Identifier.h"
-#include "mlir/IR/MLIRContext.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-
-TargetConfiguration::TargetConfiguration(SolverContext &context) {}
-
-void TargetConfiguration::addOpHandlerByName(StringRef name, OpHandlerFn fn) {
-  opHandlers[name] = fn;
-}
-
-void TargetConfiguration::addRequireStatsOpByName(StringRef opName) {
-  requireStatsOpNames.insert(opName);
-}
-
-bool TargetConfiguration::isRequireStatsOp(Operation *op) const {
-  return requireStatsOpNames.find(op->getName().getStringRef()) !=
-         requireStatsOpNames.end();
-}
-
-void TargetConfiguration::handleOp(Operation *op, CAGSlice &cag) const {
-  auto found_it = opHandlers.find(op->getName().getStringRef());
-  if (found_it != opHandlers.end())
-    found_it->second(op, cag);
-}
diff --git a/third_party/mlir/lib/Quantizer/Support/ConstraintAnalysisGraph.cpp b/third_party/mlir/lib/Quantizer/Support/ConstraintAnalysisGraph.cpp
deleted file mode 100644
index cfed2a2647c..00000000000
--- a/third_party/mlir/lib/Quantizer/Support/ConstraintAnalysisGraph.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-//===- ConstraintAnalysisGraph.cpp - Graphs type for constraints ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraph.h"
-
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Quantizer/Support/Configuration.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-
-void CAGNode::replaceIncoming(CAGNode *otherNode) {
-  if (this == otherNode)
-    return;
-  for (CAGNode *parentNode : incoming) {
-    for (CAGNode *&it : parentNode->outgoing) {
-      if (it == this) {
-        it = otherNode;
-        otherNode->incoming.push_back(parentNode);
-      }
-    }
-  }
-  incoming.clear();
-}
-
-void CAGNode::addOutgoing(CAGNode *toNode) {
-  if (!llvm::is_contained(outgoing, toNode)) {
-    outgoing.push_back(toNode);
-    toNode->incoming.push_back(this);
-  }
-}
-
-CAGOperandAnchor::CAGOperandAnchor(Operation *op, unsigned operandIdx)
-    : CAGAnchorNode(Kind::OperandAnchor, op->getOperand(operandIdx)->getType()),
-      op(op), operandIdx(operandIdx) {}
-
-CAGResultAnchor::CAGResultAnchor(Operation *op, unsigned resultIdx)
-    : CAGAnchorNode(Kind::ResultAnchor, op->getResult(resultIdx)->getType()),
-      resultValue(op->getResult(resultIdx)) {}
-
-CAGSlice::CAGSlice(SolverContext &context) : context(context) {}
-CAGSlice::~CAGSlice() { llvm::DeleteContainerPointers(allNodes); }
-
-CAGOperandAnchor *CAGSlice::getOperandAnchor(Operation *op,
-                                             unsigned operandIdx) {
-  assert(operandIdx < op->getNumOperands() && "illegal operand index");
-
-  // Dedup.
-  auto key = std::make_pair(op, operandIdx);
-  auto foundIt = operandAnchors.find(key);
-  if (foundIt != operandAnchors.end()) {
-    return foundIt->second;
-  }
-
-  // Create.
-  auto anchor = std::make_unique<CAGOperandAnchor>(op, operandIdx);
-  auto *unowned = anchor.release();
-  unowned->nodeId = allNodes.size();
-  allNodes.push_back(unowned);
-  operandAnchors.insert(std::make_pair(key, unowned));
-  return unowned;
-}
-
-CAGResultAnchor *CAGSlice::getResultAnchor(Operation *op, unsigned resultIdx) {
-  assert(resultIdx < op->getNumResults() && "illegal result index");
-
-  // Dedup.
-  auto key = std::make_pair(op, resultIdx);
-  auto foundIt = resultAnchors.find(key);
-  if (foundIt != resultAnchors.end()) {
-    return foundIt->second;
-  }
-
-  // Create.
-  auto anchor = std::make_unique<CAGResultAnchor>(op, resultIdx);
-  auto *unowned = anchor.release();
-  unowned->nodeId = allNodes.size();
-  allNodes.push_back(unowned);
-  resultAnchors.insert(std::make_pair(key, unowned));
-  return unowned;
-}
-
-void CAGSlice::enumerateImpliedConnections(
-    std::function<void(CAGAnchorNode *from, CAGAnchorNode *to)> callback) {
-  // Discover peer identity pairs (i.e. implied edges from Result->Operand and
-  // Arg->Call). Use an intermediate vector so that the callback can modify.
-  std::vector<std::pair<CAGAnchorNode *, CAGAnchorNode *>> impliedPairs;
-  for (auto &resultAnchorPair : resultAnchors) {
-    CAGResultAnchor *resultAnchor = resultAnchorPair.second;
-    Value *resultValue = resultAnchor->getValue();
-    for (auto &use : resultValue->getUses()) {
-      Operation *operandOp = use.getOwner();
-      unsigned operandIdx = use.getOperandNumber();
-      auto foundIt = operandAnchors.find(std::make_pair(operandOp, operandIdx));
-      if (foundIt != operandAnchors.end()) {
-        impliedPairs.push_back(std::make_pair(resultAnchor, foundIt->second));
-      }
-    }
-  }
-
-  // Callback for each pair.
-  for (auto &impliedPair : impliedPairs) {
-    callback(impliedPair.first, impliedPair.second);
-  }
-}
-
-unsigned CAGSlice::propagate(const TargetConfiguration &config) {
-  std::vector<CAGNode *> dirtyNodes;
-  dirtyNodes.reserve(allNodes.size());
-  // Note that because iteration happens in nodeId order, there is no need
-  // to sort in order to make deterministic. If the selection method changes,
-  // a sort should be explicitly done.
-  for (CAGNode *child : *this) {
-    if (child->isDirty()) {
-      dirtyNodes.push_back(child);
-    }
-  }
-
-  if (dirtyNodes.empty()) {
-    return 0;
-  }
-  for (auto dirtyNode : dirtyNodes) {
-    dirtyNode->clearDirty();
-    dirtyNode->propagate(context, config);
-  }
-
-  return dirtyNodes.size();
-}
-
-void CAGAnchorNode::propagate(SolverContext &solverContext,
-                              const TargetConfiguration &config) {
-  for (CAGNode *child : *this) {
-    child->markDirty();
-  }
-}
-
-Type CAGAnchorNode::getTransformedType() {
-  if (!getUniformMetadata().selectedType) {
-    return nullptr;
-  }
-  return getUniformMetadata().selectedType.castFromExpressedType(
-      getOriginalType());
-}
-
-void CAGNode::printLabel(llvm::raw_ostream &os) const {
-  os << "Node<" << static_cast<const void *>(this) << ">";
-}
-
-void CAGAnchorNode::printLabel(llvm::raw_ostream &os) const {
-  getUniformMetadata().printSummary(os);
-}
-
-void CAGOperandAnchor::printLabel(llvm::raw_ostream &os) const {
-  os << "Operand<";
-  op->getName().print(os);
-  os << "," << operandIdx;
-  os << ">";
-  CAGAnchorNode::printLabel(os);
-}
-
-void CAGResultAnchor::printLabel(llvm::raw_ostream &os) const {
-  os << "Result<";
-  getOp()->getName().print(os);
-  os << ">";
-  CAGAnchorNode::printLabel(os);
-}
diff --git a/third_party/mlir/lib/Quantizer/Support/Metadata.cpp b/third_party/mlir/lib/Quantizer/Support/Metadata.cpp
deleted file mode 100644
index 3661f52b52f..00000000000
--- a/third_party/mlir/lib/Quantizer/Support/Metadata.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- Metadata.cpp - Top level types and metadata ------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Quantizer/Support/Metadata.h"
-
-#include "mlir/IR/MLIRContext.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-
-void CAGUniformMetadata::printSummary(llvm::raw_ostream &os) const {
-  if (requiredRange.hasValue()) {
-    os << "\n[" << requiredRange.getValue().first << ","
-       << requiredRange.getValue().second << "]";
-  }
-
-  if (disabledCandidateTypes.any()) {
-    os << "\n![";
-    mlir::interleaveComma(disabledCandidateTypes.set_bits(), os);
-    os << "]";
-  }
-
-  if (selectedType) {
-    os << "\n" << selectedType;
-  }
-}
diff --git a/third_party/mlir/lib/Quantizer/Support/Statistics.cpp b/third_party/mlir/lib/Quantizer/Support/Statistics.cpp
deleted file mode 100644
index 788c2f67e27..00000000000
--- a/third_party/mlir/lib/Quantizer/Support/Statistics.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- Statistics.cpp - Collects statistics over tensors ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Quantizer/Support/Statistics.h"
-
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/StandardTypes.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-
-//===----------------------------------------------------------------------===//
-// AttributeTensorStatistics implementation
-//===----------------------------------------------------------------------===//
-
-static void
-collectElementsStatisticsDim(ElementsAttr attr, unsigned numElements,
-                             ArrayRef<int64_t> shape,
-                             llvm::SmallVectorImpl<uint64_t> &indices,
-                             uint64_t dim, TensorAxisStatistics &statistics) {
-  // Recursive terminating condition.
-  if (dim >= shape.size())
-    return;
-
-  if (dim < (shape.size() - 1)) {
-    // Recurse past dim.
-    for (uint64_t i = 0, s = shape[dim]; i < s; ++i) {
-      indices[dim] = i;
-      collectElementsStatisticsDim(attr, numElements, shape, indices, dim + 1,
-                                   statistics);
-    }
-    return;
-  }
-
-  // Collection dim.
-  for (uint64_t i = 0, s = shape[dim]; i < s; ++i) {
-    indices[dim] = i;
-    double value = attr.getValue<FloatAttr>(indices).getValueAsDouble();
-    statistics.minValue = std::min(statistics.minValue, value);
-    statistics.maxValue = std::max(statistics.maxValue, value);
-    statistics.mean += value / numElements;
-    // TODO: Calculate a running variance.
-  }
-}
-
-static bool getElementsStatistics(ElementsAttr attr,
-                                  TensorAxisStatistics &statistics) {
-  statistics.clear();
-  statistics.minValue = std::numeric_limits<double>::infinity();
-  statistics.maxValue = -std::numeric_limits<double>::infinity();
-
-  ShapedType sType = attr.getType();
-  if (!sType.hasStaticShape())
-    return false;
-  Type elementTy = sType.getElementType();
-  if (!elementTy.isa<FloatType>())
-    return false;
-
-  llvm::SmallVector<uint64_t, 4> indices;
-  indices.resize(sType.getRank());
-  ArrayRef<int64_t> shape = sType.getShape();
-
-  auto numElements = sType.getNumElements();
-  collectElementsStatisticsDim(attr, numElements, shape, indices, 0,
-                               statistics);
-  statistics.sampleSize = numElements;
-
-  return true;
-}
-
-bool AttributeTensorStatistics::get(TensorAxisStatistics &stats) const {
-  if (FloatAttr floatAttr = attr.dyn_cast<FloatAttr>()) {
-    double value = floatAttr.getValueAsDouble();
-    stats = TensorAxisStatistics(1, value, value, value, 0);
-    return true;
-  } else if (auto eltAttr = attr.dyn_cast<ElementsAttr>()) {
-    return getElementsStatistics(eltAttr, stats);
-  }
-  return false;
-}
-
-namespace mlir {
-namespace quantizer {
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                              const TensorAxisStatistics &stats) {
-  os << "STATS[sampleSize=" << stats.sampleSize << ", min=" << stats.minValue
-     << ", maxValue=" << stats.maxValue << ", mean=" << stats.mean
-     << ", variance=" << stats.variance << "]";
-  return os;
-}
-
-} // end namespace quantizer
-} // end namespace mlir
diff --git a/third_party/mlir/lib/Quantizer/Support/TypeUtils.cpp b/third_party/mlir/lib/Quantizer/Support/TypeUtils.cpp
deleted file mode 100644
index fab4e565308..00000000000
--- a/third_party/mlir/lib/Quantizer/Support/TypeUtils.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- TypeUtils.cpp - Helper function for manipulating types -------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Quantizer/Support/TypeUtils.h"
-
-#include "mlir/IR/StandardTypes.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-
-Type mlir::quantizer::getElementOrPrimitiveType(Type t) {
-  if (auto sType = t.dyn_cast<ShapedType>()) {
-    return sType.getElementType();
-  } else {
-    return t;
-  }
-}
diff --git a/third_party/mlir/lib/Quantizer/Support/UniformConstraints.cpp b/third_party/mlir/lib/Quantizer/Support/UniformConstraints.cpp
deleted file mode 100644
index c43ecdfb5c2..00000000000
--- a/third_party/mlir/lib/Quantizer/Support/UniformConstraints.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- UniformConstraints.cpp - Constraints for uniform quant -------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Quantizer/Support/UniformConstraints.h"
-
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Quantizer/Support/Configuration.h"
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraph.h"
-#include "mlir/Quantizer/Support/Metadata.h"
-#include "mlir/Quantizer/Support/Rules.h"
-#include "mlir/Quantizer/Support/TypeUtils.h"
-#include "mlir/Quantizer/Support/UniformSolvers.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-using namespace mlir::quant;
-
-namespace {
-
-struct ClusteredFacts {
-  ExpandingMinMaxFact requiredRange;
-  DiscreteScaleZeroPointFact explicitScaleZeroPoint;
-};
-
-} // end anonymous namespace
-
-static QuantizedType solveUniformType(SolverContext &solverContext,
-                                      const ClusteredFacts &clusteredFacts,
-                                      const CandidateQuantizedType &ct,
-                                      Type originalElementType, Location loc) {
-  switch (ct.scheme) {
-  default:
-    emitError(loc, "unsupported scheme for uniform type conversion");
-    return nullptr;
-
-  case CandidateQuantizedType::Scheme::UniformPerLayer: {
-    if (!clusteredFacts.requiredRange.hasValue()) {
-      // TODO: Issue some kind of diagnostic. This is not an error.
-      return nullptr;
-    }
-
-    uint64_t numLevels = ct.quantizedType.getStorageTypeMax() -
-                         ct.quantizedType.getStorageTypeMin();
-    UniformStorageParams params{numLevels,
-                                ct.quantizedType.getStorageTypeMin()};
-    UniformParamsFromMinMaxSolver solver(
-        params, clusteredFacts.requiredRange.getValue().first,
-        clusteredFacts.requiredRange.getValue().second);
-    if (!solver.compute()) {
-      emitWarning(loc) << "unable to solve uniform type with "
-                       << "UniformParamsFromMinMaxSolver";
-      return nullptr;
-    }
-
-    return UniformQuantizedType::getChecked(
-        ct.quantizedType.getFlags(), ct.quantizedType.getStorageType(),
-        originalElementType, solver.getScale(), solver.getZp(),
-        ct.quantizedType.getStorageTypeMin(),
-        ct.quantizedType.getStorageTypeMax(), loc);
-  }
-  case CandidateQuantizedType::Scheme::UniformExplicitFixedPointScale: {
-    if (!clusteredFacts.explicitScaleZeroPoint.hasValue()) {
-      emitRemark(loc)
-          << "unable to solve uniform type with UniformExplicitFixedPointScale "
-          << "(no explicitScaleZeroPoint)";
-      return nullptr;
-    }
-
-    const auto &scaleZp = clusteredFacts.explicitScaleZeroPoint.getValue();
-    assert(scaleZp.value && "optional value not set on fact");
-
-    if (scaleZp.conflict) {
-      emitWarning(loc)
-          << "conflicting explicit scale/zeroPoint on node cluster: "
-          << "an arbitrary scale/zeroPoint will be used";
-    }
-
-    return UniformQuantizedType::getChecked(
-        ct.quantizedType.getFlags(), ct.quantizedType.getStorageType(),
-        originalElementType,
-        scaleZp.value->first, // scale
-        0, // zeroPoint (fixed point solutions only for this scheme)
-        ct.quantizedType.getStorageTypeMin(),
-        ct.quantizedType.getStorageTypeMax(), loc);
-
-    return nullptr;
-  }
-  }
-}
-
-namespace {
-
-class PropagateExplicitScale : public CAGConstraintNode {
-public:
-  PropagateExplicitScale()
-      : CAGConstraintNode(Kind::UniformPropagateExplicitScale) {}
-  static bool classof(const CAGNode *n) {
-    return n->getKind() == Kind::Constraint ||
-           n->getKind() == Kind::UniformPropagateExplicitScale;
-  }
-
-private:
-  void printLabel(llvm::raw_ostream &os) const override {
-    os << "PropagateExplicitScale";
-  }
-  void propagate(SolverContext &solverContext,
-                 const TargetConfiguration &config) override {
-    DiscreteScaleZeroPointFact scaleZp;
-
-    // Get scale/zp from all parents.
-    for (auto it = incoming_begin(), e = incoming_end(); it != e; ++it) {
-      auto parentAnchor = llvm::cast<CAGAnchorNode>(*it);
-      auto selectedType = parentAnchor->getUniformMetadata().selectedType;
-      if (auto uqType = selectedType.dyn_cast_or_null<UniformQuantizedType>()) {
-        scaleZp.assertValue(
-            CAGUniformMetadata::SalienceRequired,
-            std::make_pair(uqType.getScale(), static_cast<int64_t>(0)));
-      }
-    }
-
-    // Propagate to children.
-    if (scaleZp.hasValue()) {
-      for (auto it = begin(), e = end(); it != e; ++it) {
-        auto childAnchor = llvm::cast<CAGAnchorNode>(*it);
-        if (modified(childAnchor->getUniformMetadata()
-                         .explicitScaleZeroPoint.mergeFrom(scaleZp))) {
-          childAnchor->markDirty();
-        }
-      }
-    }
-  }
-};
-
-/// A constraint node which will solve uniform quantization for all parents
-/// of the constraint, assuming that they are coupled.
-class SolveUniformConstraintNode : public CAGConstraintNode {
-public:
-  SolveUniformConstraintNode()
-      : CAGConstraintNode(Kind::SolveUniformConstraint) {
-    markDirty();
-  }
-  static bool classof(const CAGNode *n) {
-    return n->getKind() == Kind::Constraint ||
-           n->getKind() == Kind::SolveUniformConstraint;
-  }
-
-private:
-  void printLabel(llvm::raw_ostream &os) const override {
-    os << "SolveUniform";
-  }
-
-  void propagate(SolverContext &solverContext,
-                 const TargetConfiguration &config) override {
-    // First determine the required min/max range and type constraints.
-    Location fusedLoc = UnknownLoc::get(&solverContext.getMlirContext());
-    llvm::SmallBitVector enabledCandidateTypesMask(
-        config.getAllCandidateTypesMask());
-    ClusteredFacts clusteredFacts;
-    Type originalElementType;
-    for (auto it = incoming_begin(), e = incoming_end(); it != e; ++it) {
-      auto parentAnchor = llvm::cast<CAGAnchorNode>(*it);
-      auto metadata = parentAnchor->getUniformMetadata();
-      // TODO: Possibly use a location that fuses all involved parents.
-      fusedLoc = parentAnchor->getOp()->getLoc();
-
-      // Shared element type.
-      auto parentOriginalElementType =
-          getElementOrPrimitiveType(parentAnchor->getOriginalType());
-      if (!originalElementType) {
-        originalElementType = parentOriginalElementType;
-      } else {
-        if (originalElementType != parentOriginalElementType) {
-          parentAnchor->getOp()->emitError()
-              << "cannot compute uniform type: parent element types mismatch";
-          return;
-        }
-      }
-      // Range.
-      clusteredFacts.requiredRange.mergeFrom(metadata.requiredRange);
-
-      // Explicit scale and zero point.
-      clusteredFacts.explicitScaleZeroPoint.mergeFrom(
-          metadata.explicitScaleZeroPoint);
-
-      // Shared candidate types.
-      enabledCandidateTypesMask.reset(metadata.disabledCandidateTypes);
-    }
-
-    // Find the first enabled candidate type.
-    const CandidateQuantizedType *bestCandidateType = nullptr;
-    for (auto &ct : config.getCandidateTypes()) {
-      if (enabledCandidateTypesMask.test(ct.ordinal)) {
-        bestCandidateType = &ct;
-        break;
-      }
-    }
-
-    if (!bestCandidateType || !originalElementType) {
-      emitRemark(fusedLoc)
-          << "not solving uniform type (no viable candidate type)";
-      return;
-    }
-
-    // Solve for the type.
-    QuantizedType selectedType =
-        solveUniformType(solverContext, clusteredFacts, *bestCandidateType,
-                         originalElementType, fusedLoc);
-
-    // Apply it to all parents.
-    for (auto it = incoming_begin(), e = incoming_end(); it != e; ++it) {
-      auto parentAnchor = llvm::cast<CAGAnchorNode>(*it);
-      auto &metadata = parentAnchor->getUniformMetadata();
-      if (metadata.selectedType != selectedType) {
-        metadata.selectedType = selectedType;
-        // And mark all children of the parent dirty (except us).
-        for (auto child : *parentAnchor) {
-          if (child != this) {
-            child->markDirty();
-          }
-        }
-      }
-    }
-  }
-};
-
-} // end anonymous namespace
-
-void UniformConstraintsBuilder::coupleAnchors(CAGAnchorNode *a,
-                                              CAGAnchorNode *b) {
-  slice.addClusteredConstraint<SolveUniformConstraintNode>({a, b});
-}
-
-void UniformConstraintsBuilder::applyStats(CAGAnchorNode *a,
-                                           TensorAxisStatistics stats) {
-  a->getUniformMetadata().requiredRange.assertValue(
-      CAGUniformMetadata::SalienceDefault, {stats.minValue, stats.maxValue});
-}
-
-void UniformConstraintsBuilder::clamp(CAGAnchorNode *a, APFloat minValue,
-                                      APFloat maxValue) {
-  a->getUniformMetadata().requiredRange.assertValue(
-      CAGUniformMetadata::SalienceDefault,
-      {minValue.convertToDouble(), maxValue.convertToDouble()});
-}
-
-void UniformConstraintsBuilder::propagateExplicitScale(CAGAnchorNode *from,
-                                                       CAGAnchorNode *to) {
-  slice.addUnidirectionalConstraint<PropagateExplicitScale>(from, {to});
-}
diff --git a/third_party/mlir/lib/Quantizer/Support/UniformSolvers.cpp b/third_party/mlir/lib/Quantizer/Support/UniformSolvers.cpp
deleted file mode 100644
index 341df5bf888..00000000000
--- a/third_party/mlir/lib/Quantizer/Support/UniformSolvers.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===- UniformSolvers.cpp - Uniform type solver algorithms ----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Quantizer/Support/UniformSolvers.h"
-
-#include "llvm/Support/raw_ostream.h"
-
-#include <cmath>
-
-using namespace mlir;
-using namespace mlir::quantizer;
-
-bool UniformParamsFromMinMaxSolver::compute() {
-  // Compute adjMin, adjMax, clamping to ensure that they straddle zero.
-  if (boundingMin > 0 && boundingMax >= boundingMin) {
-    // Lop-sided to the positive.
-    adjMin = 0;
-    adjMax = boundingMax;
-  } else if (boundingMax < 0 && boundingMin <= boundingMax) {
-    // Lop-sided to the negative.
-    adjMin = boundingMin;
-    adjMax = 0;
-  } else if (boundingMin <= 0 && boundingMax >= 0) {
-    adjMin = boundingMin;
-    adjMax = boundingMax;
-  } else {
-    // Illegal bounds.
-    return satisfied = false;
-  }
-
-  const double origMinAdj = adjMin;
-  const double origMaxAdj = adjMax;
-  const double numLevelsDouble = storageParams.numLevels;
-
-  struct fns {
-    static std::pair<double, double>
-    computeMinMax(double boundingMin, double numLevels, double delta) {
-      double adjMin = delta * std::floor(boundingMin / delta);
-      return std::make_pair(adjMin, adjMin + numLevels * delta);
-    }
-    static double overshoot(double boundingMin, double boundingMax,
-                            double numLevels, double delta) {
-      auto adjMinMax = computeMinMax(boundingMin, numLevels, delta);
-      double maxOvershoot = adjMinMax.second - boundingMax;
-      double minOvershoot = boundingMin - adjMinMax.first;
-      // If undershooting on the min or max end, return that because it is
-      // to be unconditionally avoided. Otherwise return the end with the
-      // greatest magnitude of overshoot.
-      if (maxOvershoot < 0)
-        return maxOvershoot;
-      if (minOvershoot < 0)
-        return minOvershoot;
-      return std::max(maxOvershoot, minOvershoot);
-    }
-  };
-
-  // Bisect to find a suitable delta, starting with bounds of deltaInit
-  // and deltaMax.
-  double deltaInit = (adjMax - adjMin) / numLevelsDouble;
-  double deltaMax =
-      ((numLevelsDouble * deltaInit) + 2 * deltaInit) / numLevelsDouble;
-  double deltaMid;
-  double prevDeltaMid = 0.0;
-  for (stepCount = 0; stepCount < 60; ++stepCount) {
-    deltaMid = (deltaInit + deltaMax) / 2.0;
-    auto fInit =
-        fns::overshoot(origMinAdj, origMaxAdj, numLevelsDouble, deltaInit);
-    auto fMid =
-        fns::overshoot(origMinAdj, origMaxAdj, numLevelsDouble, deltaMid);
-    if (fMid == 0 || (fMid > 0 && std::fabs(deltaMid - prevDeltaMid) < 1e-15)) {
-      // Solution found (or step size is infinitesimal and an overshoot).
-      // Empirically, this seems to terminate around 30-50 steps or so.
-      // This will find a zero point for exactly representable ranges and
-      // will terminate on a small step size for inexact, biasing towards
-      // overshooting.
-      delta = deltaMid;
-      break;
-    }
-    bool signMid = fMid > 0;
-    bool signInit = fInit > 0;
-    if (signMid == signInit) {
-      deltaInit = deltaMid;
-    } else {
-      deltaMax = deltaMid;
-    }
-    prevDeltaMid = deltaMid;
-  }
-  delta = deltaMid;
-
-  // Recalculate adjMin/adjMax based on new delta.
-  auto adjMinMax = fns::computeMinMax(origMinAdj, numLevelsDouble, delta);
-  adjMin = adjMinMax.first;
-  adjMax = adjMinMax.second;
-
-  satisfied = false;
-  zp = 0;
-
-  if (!std::isnan(delta) && !std::isnan(adjMin) && !std::isnan(adjMax)) {
-    satisfied = true;
-    // Finally, scale and zeroPoint. Since it casts to integer, only valid
-    // if the inputs are valid.
-    zp = std::round(storageParams.minValue - adjMin / delta);
-  }
-
-  return satisfied;
-}
-
-int64_t UniformParamsFromMinMaxSolver::quantize(double x) const {
-  int64_t xq = std::round(x / delta + zp);
-  return std::max<int64_t>(0, std::min<int64_t>(storageParams.numLevels, xq));
-}
-
-double UniformParamsFromMinMaxSolver::dequantize(int64_t xq) const {
-  return (xq - zp) * delta;
-}
-
-namespace mlir {
-namespace quantizer {
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                              const UniformStorageParams &p) {
-  os << "UniformStorageParams{" << p.numLevels << ", " << p.minValue << "}";
-  return os;
-}
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
-                              const UniformParamsFromMinMaxSolver &s) {
-  os << "UniformParamsFromMinMaxSolver(" << s.getStepCount() << "){";
-  os << "(" << s.getBoundingMin() << ":" << s.getBoundingMax() << ") -> ";
-  if (!s.isSatisfied()) {
-    os << "unsat}";
-    return os;
-  }
-
-  os << "(" << s.getAdjMin() << ":" << s.getAdjMax() << ")";
-  os << ", scale = " << s.getScale();
-  os << ", zp = " << s.getZp();
-  os << "}";
-
-  return os;
-}
-
-} // end namespace quantizer
-} // end namespace mlir
diff --git a/third_party/mlir/lib/Quantizer/Transforms/AddDefaultStatsTestPass.cpp b/third_party/mlir/lib/Quantizer/Transforms/AddDefaultStatsTestPass.cpp
deleted file mode 100644
index a32bb2c9b3c..00000000000
--- a/third_party/mlir/lib/Quantizer/Transforms/AddDefaultStatsTestPass.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-//===- AddDefaultStatsTestPass.cpp - Testing pass to add default stats ----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a testing pass to add default statistics nodes to every
-// quantization eligible op. Useful for unit testing.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Quantizer/Configurations/FxpMathConfig.h"
-#include "mlir/Quantizer/Support/Configuration.h"
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraph.h"
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraphTraits.h"
-#include "mlir/Quantizer/Transforms/Passes.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-using namespace mlir::quant;
-
-namespace {
-
-class AddDefaultStatsPass : public FunctionPass<AddDefaultStatsPass> {
-public:
-  AddDefaultStatsPass() = default;
-  AddDefaultStatsPass(SolverContext &solverContext,
-                      const TargetConfiguration &config)
-      : explicitSolverContext(&solverContext), explicitConfig(&config) {}
-
-  void runOnFunction() override;
-  void runWithConfig(SolverContext &solverContext,
-                     const TargetConfiguration &config);
-
-private:
-  SolverContext *explicitSolverContext = nullptr;
-  const TargetConfiguration *explicitConfig = nullptr;
-};
-
-} // end anonymous namespace
-
-void AddDefaultStatsPass::runOnFunction() {
-  if (explicitSolverContext && explicitConfig) {
-    // If explicitly constructed with a config and context.
-    runWithConfig(*explicitSolverContext, *explicitConfig);
-    return;
-  }
-  // For global pass registration, use defaults.
-  SolverContext solverContext(*getFunction().getContext());
-  auto config = FxpMathTargetConfig::create(solverContext);
-  runWithConfig(solverContext, *config);
-}
-
-void AddDefaultStatsPass::runWithConfig(SolverContext &solverContext,
-                                        const TargetConfiguration &config) {
-  auto func = getFunction();
-
-  // Insert stats for each argument.
-  for (auto *arg : func.getArguments()) {
-    if (!config.isHandledType(arg->getType()))
-      continue;
-    OpBuilder b(func.getBody());
-    APFloat minValue(-1.0f);
-    APFloat maxValue(1.0f);
-    ElementsAttr layerStats = DenseFPElementsAttr::get(
-        RankedTensorType::get({2}, b.getF32Type()), {minValue, maxValue});
-    auto statsOp = b.create<StatisticsOp>(func.getLoc(), arg, layerStats,
-                                          nullptr, nullptr);
-    arg->replaceAllUsesWith(statsOp);
-
-    // StatsOp contained a use to 'arg' so make sure to reset it after replacing
-    // all of the uses of 'arg'.
-    statsOp.getOperation()->replaceUsesOfWith(statsOp, arg);
-  }
-
-  // Walk the ops and insert stats.
-  func.walk([&](Operation *op) {
-    if (!config.isRequireStatsOp(op)) {
-      return;
-    }
-    assert(op->getNumResults() == 1);
-
-    auto originalResult = op->getResult(0);
-    if (!config.isHandledType(originalResult->getType()))
-      return;
-
-    OpBuilder b(op->getBlock(), ++op->getIterator());
-
-    APFloat minValue(-1.0f);
-    APFloat maxValue(1.0f);
-    ElementsAttr layerStats = DenseFPElementsAttr::get(
-        RankedTensorType::get({2}, b.getF32Type()), {minValue, maxValue});
-    auto statsOp = b.create<StatisticsOp>(op->getLoc(), op->getResult(0),
-                                          layerStats, nullptr, nullptr);
-    originalResult->replaceAllUsesWith(statsOp);
-
-    // StatsOp contained a use to 'op' so make sure to reset it after replacing
-    // all of the uses of 'op'.
-    statsOp.getOperation()->replaceUsesOfWith(statsOp, originalResult);
-  });
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::quantizer::createAddDefaultStatsPass() {
-  return std::make_unique<AddDefaultStatsPass>();
-}
-
-static PassRegistration<AddDefaultStatsPass> pass(
-    "quantizer-add-default-stats-test",
-    "Adds default (dummy) statistics to all ops that can benefit from "
-    "runtime statistics. This is meant to help in early stage bootstrapping.");
diff --git a/third_party/mlir/lib/Quantizer/Transforms/InferQuantizedTypesPass.cpp b/third_party/mlir/lib/Quantizer/Transforms/InferQuantizedTypesPass.cpp
deleted file mode 100644
index 7c449e32c4c..00000000000
--- a/third_party/mlir/lib/Quantizer/Transforms/InferQuantizedTypesPass.cpp
+++ /dev/null
@@ -1,297 +0,0 @@
-//===- InferQuantizedTypesPass.cpp - Infers quantized types ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines the primary pass for instantiating a CAG, running it to
-// convergence on a module to determine eligible quantized type transforms, and
-// applying those transforms to the IR.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/Dialect/QuantOps/QuantTypes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Quantizer/Configurations/FxpMathConfig.h"
-#include "mlir/Quantizer/Support/Configuration.h"
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraph.h"
-#include "mlir/Quantizer/Support/ConstraintAnalysisGraphTraits.h"
-#include "mlir/Quantizer/Transforms/Passes.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-using namespace mlir::quant;
-
-namespace llvm {
-
-template <>
-struct DOTGraphTraits<const CAGSlice *>
-    : public DOTGraphTraits<const CAGNode *> {
-  DOTGraphTraits(bool isSimple = false)
-      : DOTGraphTraits<const CAGNode *>(isSimple) {}
-
-  std::string getNodeLabel(const CAGNode *node, const CAGSlice *graph) {
-    std::string s;
-    llvm::raw_string_ostream out(s);
-    node->printLabel(out);
-    return out.str();
-  }
-
-  static std::string getGraphProperties(const CAGSlice *) {
-    return "rankdir=LR;";
-  }
-
-  static bool isNodeHidden(const CAGNode *node) {
-    // Filter constraint nodes with no incoming or outgoing connections.
-    // These orphans are often created as part of graph merging operations.
-    return llvm::isa<CAGConstraintNode>(node) && node->isOrphan();
-  }
-
-  std::string getNodeAttributes(const CAGNode *node, const CAGSlice *graph) {
-    switch (node->getKind()) {
-    default:
-      return std::string();
-    case CAGNode::Kind::OperandAnchor:
-      return "shape=record,color=yellow,style=filled";
-    case CAGNode::Kind::ResultAnchor:
-      return "shape=record,color=lightblue,style=filled";
-    case CAGNode::Kind::Constraint:
-      return "shape=record,style=dotted";
-    }
-  }
-};
-
-} // end namespace llvm
-
-namespace {
-
-class InferQuantizedTypesPass : public ModulePass<InferQuantizedTypesPass> {
-public:
-  InferQuantizedTypesPass() = default;
-  InferQuantizedTypesPass(SolverContext &solverContext,
-                          const TargetConfiguration &config)
-      : explicitSolverContext(&solverContext), explicitConfig(&config) {}
-  void runOnModule() override;
-  void runWithConfig(SolverContext &solverContext,
-                     const TargetConfiguration &config);
-
-  void transformOperandType(CAGOperandAnchor *anchor, Type newType);
-  void transformResultType(CAGResultAnchor *anchor, Type newType);
-
-private:
-  SolverContext *explicitSolverContext = nullptr;
-  const TargetConfiguration *explicitConfig = nullptr;
-};
-
-} // end anonymous namespace
-
-/// Maximum number of propagation rounds to run to converge the CAG before
-/// signalling an error.
-static const int kMaximumPropagationRounds = 1000;
-
-static LogicalResult validateTypeConversion(Type newType, Type origType,
-                                            Operation *op) {
-  if (!newType) {
-    return op->emitOpError() << "unsupported type conversion from " << newType;
-  }
-  return success();
-}
-
-void InferQuantizedTypesPass::runOnModule() {
-  if (explicitSolverContext && explicitConfig) {
-    // If explicitly constructed with a config and context.
-    runWithConfig(*explicitSolverContext, *explicitConfig);
-    return;
-  }
-
-  // For global pass registration, use defaults.
-  SolverContext solverContext(*getModule().getContext());
-  auto config = FxpMathTargetConfig::create(solverContext);
-  runWithConfig(solverContext, *config);
-}
-
-void InferQuantizedTypesPass::runWithConfig(SolverContext &solverContext,
-                                            const TargetConfiguration &config) {
-  CAGSlice cag(solverContext);
-  for (auto f : getModule().getOps<FuncOp>()) {
-    f.walk([&cag, &config](Operation *op) { config.handleOp(op, cag); });
-  }
-  config.finalizeAnchors(cag);
-
-  // Propagate.
-  int propRound;
-  for (propRound = kMaximumPropagationRounds; propRound > 0; --propRound) {
-    auto propCount = cag.propagate(config);
-    if (propCount == 0)
-      break;
-  }
-  if (propRound == 0) {
-    emitError(UnknownLoc::get(&getContext()),
-              "exceeded maximum number of solver iterations (infinite loop?)");
-    return;
-  }
-
-  // TODO: Only dump the GraphViz if a flag is set and move to a utility.
-  // GraphViz.
-  if (!solverContext.getDebugCAGDotPath().empty()) {
-    auto actFileName =
-        llvm::WriteGraph(const_cast<const CAGSlice *>(&cag), "CAG",
-                         /*ShortNames=*/false,
-                         /*Title=*/"CAG",
-                         /*Filename=*/solverContext.getDebugCAGDotPath());
-    llvm::errs() << "Wrote graphviz file: " << actFileName << "\n";
-  }
-
-  // Start transforming the types in order of anchor type (results, then
-  // operands).
-  // Apply result types.
-  for (auto *node : cag) {
-    auto anchorNode = llvm::dyn_cast<CAGResultAnchor>(node);
-    if (!anchorNode)
-      continue;
-    if (Type newType = anchorNode->getTransformedType())
-      transformResultType(anchorNode, newType);
-  }
-
-  // Apply operand types.
-  for (auto *node : cag) {
-    auto anchorNode = llvm::dyn_cast<CAGOperandAnchor>(node);
-    if (!anchorNode)
-      continue;
-    if (Type newType = anchorNode->getTransformedType())
-      transformOperandType(anchorNode, newType);
-  }
-}
-
-void InferQuantizedTypesPass::transformOperandType(CAGOperandAnchor *anchor,
-                                                   Type newType) {
-  Value *inputValue = anchor->getValue();
-  Operation *op = anchor->getOp();
-  OpBuilder b(op->getBlock(), Block::iterator(op));
-
-  SmallVector<Value *, 1> removeValuesIfDead;
-
-  // Because we've already run the result transforms at this phase, it is
-  // very likely that inputValue points to a dcast op whose input matches
-  // our type. We detect that situation and route around just to save some
-  // bulk in the IR.
-  Value *newTypedInputValue = inputValue;
-  auto inputDcastOp =
-      dyn_cast_or_null<DequantizeCastOp>(inputValue->getDefiningOp());
-  if (inputDcastOp && inputDcastOp.arg()->getType() == newType) {
-    // Can just use the dcast's input value.
-    newTypedInputValue = inputDcastOp.arg();
-    removeValuesIfDead.push_back(inputDcastOp);
-  } else {
-    // Need to synthesize a qcast.
-    newTypedInputValue =
-        b.create<QuantizeCastOp>(op->getLoc(), newType, inputValue);
-  }
-
-  switch (anchor->getTypeTransformRule()) {
-  case CAGAnchorNode::TypeTransformRule::Direct:
-    anchor->getOp()->setOperand(anchor->getOperandIdx(), newTypedInputValue);
-    break;
-
-  case CAGAnchorNode::TypeTransformRule::DirectStorage: {
-    Type storageType = QuantizedType::castToStorageType(newType);
-    if (failed(validateTypeConversion(storageType, newType, op)))
-      return;
-    anchor->getOp()->setOperand(
-        anchor->getOperandIdx(),
-        b.create<StorageCastOp>(op->getLoc(), storageType, newTypedInputValue));
-    break;
-  }
-
-  case CAGAnchorNode::TypeTransformRule::ExpressedOnly:
-    // Leave the anchor as-is and just cast in/out after it.
-    anchor->getOp()->setOperand(
-        anchor->getOperandIdx(),
-        b.create<DequantizeCastOp>(op->getLoc(), anchor->getOriginalType(),
-                                   newTypedInputValue));
-    break;
-  }
-
-  for (Value *removeValueIfDead : removeValuesIfDead) {
-    if (removeValueIfDead->use_empty()) {
-      removeValueIfDead->getDefiningOp()->erase();
-    }
-  }
-}
-
-void InferQuantizedTypesPass::transformResultType(CAGResultAnchor *anchor,
-                                                  Type newType) {
-  Value *origResultValue = anchor->getValue();
-  Operation *op = origResultValue->getDefiningOp();
-  OpBuilder b(op->getBlock(), ++Block::iterator(op));
-
-  Value *replacedResultValue = nullptr;
-  Value *newResultValue = nullptr;
-  switch (anchor->getTypeTransformRule()) {
-  case CAGAnchorNode::TypeTransformRule::Direct:
-    origResultValue->setType(newType);
-    replacedResultValue = newResultValue = b.create<DequantizeCastOp>(
-        op->getLoc(), anchor->getOriginalType(), origResultValue);
-    break;
-
-  case CAGAnchorNode::TypeTransformRule::DirectStorage: {
-    Type storageType = QuantizedType::castToStorageType(newType);
-    if (failed(validateTypeConversion(storageType, newType, op)))
-      return;
-    origResultValue->setType(storageType);
-    replacedResultValue =
-        b.create<StorageCastOp>(op->getLoc(), newType, origResultValue);
-    newResultValue = b.create<DequantizeCastOp>(
-        op->getLoc(), anchor->getOriginalType(), replacedResultValue);
-    break;
-  }
-
-  case CAGAnchorNode::TypeTransformRule::ExpressedOnly:
-    // Leave the anchor as-is and just cast in/out after it.
-    replacedResultValue =
-        b.create<QuantizeCastOp>(op->getLoc(), newType, origResultValue);
-    newResultValue = b.create<DequantizeCastOp>(
-        op->getLoc(), anchor->getOriginalType(), replacedResultValue);
-    break;
-  }
-
-  if (replacedResultValue) {
-    // Transform:
-    //   origResultValue -->  replaceResultValue -> newResultValue
-    //                   \->  [original uses]
-    // To:
-    //   origResultValue -> replaceResultValue ->
-    //                      newResultValue -> [original uses]
-    // Note that replaceResultValue may equal newResultValue or there may
-    // be operands between the two.
-    origResultValue->replaceAllUsesWith(newResultValue);
-    replacedResultValue->getDefiningOp()->replaceUsesOfWith(newResultValue,
-                                                            origResultValue);
-  }
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::quantizer::createInferQuantizedTypesPass(
-    SolverContext &solverContext, const TargetConfiguration &config) {
-  return std::make_unique<InferQuantizedTypesPass>(solverContext, config);
-}
-
-static PassRegistration<InferQuantizedTypesPass>
-    pass("quantizer-infer-quantized-types",
-         "Infers quantized types for a module");
diff --git a/third_party/mlir/lib/Quantizer/Transforms/RemoveInstrumentationPass.cpp b/third_party/mlir/lib/Quantizer/Transforms/RemoveInstrumentationPass.cpp
deleted file mode 100644
index 0266520bec3..00000000000
--- a/third_party/mlir/lib/Quantizer/Transforms/RemoveInstrumentationPass.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- RemoveInstrumentationPass.cpp - Removes instrumentation ------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a pass to remove any instrumentation ops. It is often one
-// of the final steps when performing quantization and is run after any
-// decisions requiring instrumentation have been made.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/QuantOps/QuantOps.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Quantizer/Transforms/Passes.h"
-
-using namespace mlir;
-using namespace mlir::quantizer;
-using namespace mlir::quant;
-
-namespace {
-
-class RemoveInstrumentationPass
-    : public FunctionPass<RemoveInstrumentationPass> {
-  void runOnFunction() override;
-};
-
-template <typename OpTy>
-class RemoveIdentityOpRewrite : public RewritePattern {
-public:
-  RemoveIdentityOpRewrite(MLIRContext *context)
-      : RewritePattern(OpTy::getOperationName(), 1, context) {}
-
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const override {
-    assert(op->getNumOperands() == 1);
-    assert(op->getNumResults() == 1);
-
-    rewriter.replaceOp(op, op->getOperand(0));
-    return matchSuccess();
-  }
-};
-
-} // end anonymous namespace
-
-void RemoveInstrumentationPass::runOnFunction() {
-  OwningRewritePatternList patterns;
-  auto func = getFunction();
-  auto *context = &getContext();
-  patterns.insert<RemoveIdentityOpRewrite<StatisticsOp>,
-                  RemoveIdentityOpRewrite<StatisticsRefOp>,
-                  RemoveIdentityOpRewrite<CoupledRefOp>>(context);
-  applyPatternsGreedily(func, patterns);
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::quantizer::createRemoveInstrumentationPass() {
-  return std::make_unique<RemoveInstrumentationPass>();
-}
-
-static PassRegistration<RemoveInstrumentationPass>
-    pass("quantizer-remove-instrumentation",
-         "Removes instrumentation and hints which have no effect on final "
-         "execution");
diff --git a/third_party/mlir/lib/Support/CMakeLists.txt b/third_party/mlir/lib/Support/CMakeLists.txt
deleted file mode 100644
index 7594a8c7bbe..00000000000
--- a/third_party/mlir/lib/Support/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
-  FileUtilities.cpp
-  JitRunner.cpp
-  MlirOptMain.cpp
-  StorageUniquer.cpp
-  ToolUtilities.cpp
-  TranslateClParser.cpp
-)
-
-add_llvm_library(MLIRSupport
-  FileUtilities.cpp
-  StorageUniquer.cpp
-  ToolUtilities.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support
-  )
-target_link_libraries(MLIRSupport LLVMSupport)
-
-add_llvm_library(MLIROptMain
-  MlirOptMain.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support
-  )
-target_link_libraries(MLIROptMain
-  MLIRPass
-  LLVMSupport
-  MLIRSupport
-  )
-
-add_llvm_library(MLIRTranslateClParser
-  TranslateClParser.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support
-  )
-target_link_libraries(MLIRTranslateClParser LLVMSupport)
-
-add_llvm_library(MLIRJitRunner
-  JitRunner.cpp
-)
-target_link_libraries(MLIRJitRunner PRIVATE
-  MLIRExecutionEngine
-  MLIRIR
-  MLIRParser
-  MLIRStandardOps
-  MLIRTargetLLVMIR
-  MLIRTransforms
-  MLIRStandardToLLVM
-  MLIRSupport
-  LLVMCore
-  LLVMSupport
-)
diff --git a/third_party/mlir/lib/Support/FileUtilities.cpp b/third_party/mlir/lib/Support/FileUtilities.cpp
deleted file mode 100644
index 6f0dc93b235..00000000000
--- a/third_party/mlir/lib/Support/FileUtilities.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- FileUtilities.cpp - utilities for working with files ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Definitions of common utilities for working with files.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/FileUtilities.h"
-#include "mlir/Support/LLVM.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace mlir;
-
-std::unique_ptr<llvm::MemoryBuffer>
-mlir::openInputFile(StringRef inputFilename, std::string *errorMessage) {
-  auto fileOrErr = llvm::MemoryBuffer::getFileOrSTDIN(inputFilename);
-  if (std::error_code error = fileOrErr.getError()) {
-    if (errorMessage)
-      *errorMessage = "cannot open input file '" + inputFilename.str() +
-                      "': " + error.message();
-    return nullptr;
-  }
-
-  return std::move(*fileOrErr);
-}
-
-std::unique_ptr<llvm::ToolOutputFile>
-mlir::openOutputFile(StringRef outputFilename, std::string *errorMessage) {
-  std::error_code error;
-  auto result = std::make_unique<llvm::ToolOutputFile>(outputFilename, error,
-                                                       llvm::sys::fs::F_None);
-  if (error) {
-    if (errorMessage)
-      *errorMessage = "cannot open output file '" + outputFilename.str() +
-                      "': " + error.message();
-    return nullptr;
-  }
-
-  return result;
-}
diff --git a/third_party/mlir/lib/Support/JitRunner.cpp b/third_party/mlir/lib/Support/JitRunner.cpp
deleted file mode 100644
index 8914681cdd9..00000000000
--- a/third_party/mlir/lib/Support/JitRunner.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-//===- jit-runner.cpp - MLIR CPU Execution Driver Library -----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is a library that provides a shared implementation for command line
-// utilities that execute an MLIR file on the CPU by translating MLIR to LLVM
-// IR before JIT-compiling and executing the latter.
-//
-// The translation can be customized by providing an MLIR to MLIR
-// transformation.
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/JitRunner.h"
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/ExecutionEngine/ExecutionEngine.h"
-#include "mlir/ExecutionEngine/OptUtils.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Parser.h"
-#include "mlir/Support/FileUtilities.h"
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/LegacyPassNameParser.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/StringSaver.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include <numeric>
-
-using namespace mlir;
-using llvm::Error;
-
-static llvm::cl::opt<std::string> inputFilename(llvm::cl::Positional,
-                                                llvm::cl::desc("<input file>"),
-                                                llvm::cl::init("-"));
-static llvm::cl::opt<std::string>
-    mainFuncName("e", llvm::cl::desc("The function to be called"),
-                 llvm::cl::value_desc("<function name>"),
-                 llvm::cl::init("main"));
-static llvm::cl::opt<std::string> mainFuncType(
-    "entry-point-result",
-    llvm::cl::desc("Textual description of the function type to be called"),
-    llvm::cl::value_desc("f32 | void"), llvm::cl::init("f32"));
-
-static llvm::cl::OptionCategory optFlags("opt-like flags");
-
-// CLI list of pass information
-static llvm::cl::list<const llvm::PassInfo *, bool, llvm::PassNameParser>
-    llvmPasses(llvm::cl::desc("LLVM optimizing passes to run"),
-               llvm::cl::cat(optFlags));
-
-// CLI variables for -On options.
-static llvm::cl::opt<bool>
-    optO0("O0", llvm::cl::desc("Run opt passes and codegen at O0"),
-          llvm::cl::cat(optFlags));
-static llvm::cl::opt<bool>
-    optO1("O1", llvm::cl::desc("Run opt passes and codegen at O1"),
-          llvm::cl::cat(optFlags));
-static llvm::cl::opt<bool>
-    optO2("O2", llvm::cl::desc("Run opt passes and codegen at O2"),
-          llvm::cl::cat(optFlags));
-static llvm::cl::opt<bool>
-    optO3("O3", llvm::cl::desc("Run opt passes and codegen at O3"),
-          llvm::cl::cat(optFlags));
-
-static llvm::cl::OptionCategory clOptionsCategory("linking options");
-static llvm::cl::list<std::string>
-    clSharedLibs("shared-libs", llvm::cl::desc("Libraries to link dynamically"),
-                 llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
-                 llvm::cl::cat(clOptionsCategory));
-
-// CLI variables for debugging.
-static llvm::cl::opt<bool> dumpObjectFile(
-    "dump-object-file",
-    llvm::cl::desc("Dump JITted-compiled object to file specified with "
-                   "-object-filename (<input file>.o by default)."));
-
-static llvm::cl::opt<std::string> objectFilename(
-    "object-filename",
-    llvm::cl::desc("Dump JITted-compiled object to file <input file>.o"));
-
-static OwningModuleRef parseMLIRInput(StringRef inputFilename,
-                                      MLIRContext *context) {
-  // Set up the input file.
-  std::string errorMessage;
-  auto file = openInputFile(inputFilename, &errorMessage);
-  if (!file) {
-    llvm::errs() << errorMessage << "\n";
-    return nullptr;
-  }
-
-  llvm::SourceMgr sourceMgr;
-  sourceMgr.AddNewSourceBuffer(std::move(file), llvm::SMLoc());
-  return OwningModuleRef(parseSourceFile(sourceMgr, context));
-}
-
-// Initialize the relevant subsystems of LLVM.
-static void initializeLLVM() {
-  llvm::InitializeNativeTarget();
-  llvm::InitializeNativeTargetAsmPrinter();
-}
-
-static inline Error make_string_error(const llvm::Twine &message) {
-  return llvm::make_error<llvm::StringError>(message.str(),
-                                             llvm::inconvertibleErrorCode());
-}
-
-static llvm::Optional<unsigned> getCommandLineOptLevel() {
-  llvm::Optional<unsigned> optLevel;
-  llvm::SmallVector<std::reference_wrapper<llvm::cl::opt<bool>>, 4> optFlags{
-      optO0, optO1, optO2, optO3};
-
-  // Determine if there is an optimization flag present.
-  for (unsigned j = 0; j < 4; ++j) {
-    auto &flag = optFlags[j].get();
-    if (flag) {
-      optLevel = j;
-      break;
-    }
-  }
-  return optLevel;
-}
-
-// JIT-compile the given module and run "entryPoint" with "args" as arguments.
-static Error
-compileAndExecute(ModuleOp module, StringRef entryPoint,
-                  std::function<llvm::Error(llvm::Module *)> transformer,
-                  void **args) {
-  Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel;
-  if (auto clOptLevel = getCommandLineOptLevel())
-    jitCodeGenOptLevel =
-        static_cast<llvm::CodeGenOpt::Level>(clOptLevel.getValue());
-  SmallVector<StringRef, 4> libs(clSharedLibs.begin(), clSharedLibs.end());
-  auto expectedEngine = mlir::ExecutionEngine::create(module, transformer,
-                                                      jitCodeGenOptLevel, libs);
-  if (!expectedEngine)
-    return expectedEngine.takeError();
-
-  auto engine = std::move(*expectedEngine);
-  auto expectedFPtr = engine->lookup(entryPoint);
-  if (!expectedFPtr)
-    return expectedFPtr.takeError();
-
-  if (dumpObjectFile)
-    engine->dumpToObjectFile(objectFilename.empty() ? inputFilename + ".o"
-                                                    : objectFilename);
-
-  void (*fptr)(void **) = *expectedFPtr;
-  (*fptr)(args);
-
-  return Error::success();
-}
-
-static Error compileAndExecuteVoidFunction(
-    ModuleOp module, StringRef entryPoint,
-    std::function<llvm::Error(llvm::Module *)> transformer) {
-  auto mainFunction = module.lookupSymbol<LLVM::LLVMFuncOp>(entryPoint);
-  if (!mainFunction || mainFunction.getBlocks().empty())
-    return make_string_error("entry point not found");
-  void *empty = nullptr;
-  return compileAndExecute(module, entryPoint, transformer, &empty);
-}
-
-static Error compileAndExecuteSingleFloatReturnFunction(
-    ModuleOp module, StringRef entryPoint,
-    std::function<llvm::Error(llvm::Module *)> transformer) {
-  auto mainFunction = module.lookupSymbol<LLVM::LLVMFuncOp>(entryPoint);
-  if (!mainFunction || mainFunction.isExternal())
-    return make_string_error("entry point not found");
-
-  if (mainFunction.getType().getFunctionNumParams() != 0)
-    return make_string_error("function inputs not supported");
-
-  if (!mainFunction.getType().getFunctionResultType().isFloatTy())
-    return make_string_error("only single llvm.f32 function result supported");
-
-  float res;
-  struct {
-    void *data;
-  } data;
-  data.data = &res;
-  if (auto error =
-          compileAndExecute(module, entryPoint, transformer, (void **)&data))
-    return error;
-
-  // Intentional printing of the output so we can test.
-  llvm::outs() << res << '\n';
-
-  return Error::success();
-}
-
-// Entry point for all CPU runners. Expects the common argc/argv arguments for
-// standard C++ main functions and an mlirTransformer.
-// The latter is applied after parsing the input into MLIR IR and before passing
-// the MLIR module to the ExecutionEngine.
-int mlir::JitRunnerMain(
-    int argc, char **argv,
-    llvm::function_ref<LogicalResult(mlir::ModuleOp)> mlirTransformer) {
-  llvm::InitLLVM y(argc, argv);
-
-  initializeLLVM();
-  mlir::initializeLLVMPasses();
-
-  llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR CPU execution driver\n");
-
-  llvm::Optional<unsigned> optLevel = getCommandLineOptLevel();
-  llvm::SmallVector<std::reference_wrapper<llvm::cl::opt<bool>>, 4> optFlags{
-      optO0, optO1, optO2, optO3};
-  unsigned optCLIPosition = 0;
-  // Determine if there is an optimization flag present, and its CLI position
-  // (optCLIPosition).
-  for (unsigned j = 0; j < 4; ++j) {
-    auto &flag = optFlags[j].get();
-    if (flag) {
-      optCLIPosition = flag.getPosition();
-      break;
-    }
-  }
-  // Generate vector of pass information, plus the index at which we should
-  // insert any optimization passes in that vector (optPosition).
-  llvm::SmallVector<const llvm::PassInfo *, 4> passes;
-  unsigned optPosition = 0;
-  for (unsigned i = 0, e = llvmPasses.size(); i < e; ++i) {
-    passes.push_back(llvmPasses[i]);
-    if (optCLIPosition < llvmPasses.getPosition(i)) {
-      optPosition = i;
-      optCLIPosition = UINT_MAX; // To ensure we never insert again
-    }
-  }
-
-  MLIRContext context;
-  auto m = parseMLIRInput(inputFilename, &context);
-  if (!m) {
-    llvm::errs() << "could not parse the input IR\n";
-    return 1;
-  }
-
-  if (mlirTransformer)
-    if (failed(mlirTransformer(m.get())))
-      return EXIT_FAILURE;
-
-  auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
-  if (!tmBuilderOrError) {
-    llvm::errs() << "Failed to create a JITTargetMachineBuilder for the host\n";
-    return EXIT_FAILURE;
-  }
-  auto tmOrError = tmBuilderOrError->createTargetMachine();
-  if (!tmOrError) {
-    llvm::errs() << "Failed to create a TargetMachine for the host\n";
-    return EXIT_FAILURE;
-  }
-
-  auto transformer = mlir::makeLLVMPassesTransformer(
-      passes, optLevel, /*targetMachine=*/tmOrError->get(), optPosition);
-
-  // Get the function used to compile and execute the module.
-  using CompileAndExecuteFnT = Error (*)(
-      ModuleOp, StringRef, std::function<llvm::Error(llvm::Module *)>);
-  auto compileAndExecuteFn =
-      llvm::StringSwitch<CompileAndExecuteFnT>(mainFuncType.getValue())
-          .Case("f32", compileAndExecuteSingleFloatReturnFunction)
-          .Case("void", compileAndExecuteVoidFunction)
-          .Default(nullptr);
-
-  Error error =
-      compileAndExecuteFn
-          ? compileAndExecuteFn(m.get(), mainFuncName.getValue(), transformer)
-          : make_string_error("unsupported function type");
-
-  int exitCode = EXIT_SUCCESS;
-  llvm::handleAllErrors(std::move(error),
-                        [&exitCode](const llvm::ErrorInfoBase &info) {
-                          llvm::errs() << "Error: ";
-                          info.log(llvm::errs());
-                          llvm::errs() << '\n';
-                          exitCode = EXIT_FAILURE;
-                        });
-
-  return exitCode;
-}
diff --git a/third_party/mlir/lib/Support/MlirOptMain.cpp b/third_party/mlir/lib/Support/MlirOptMain.cpp
deleted file mode 100644
index c256e970c95..00000000000
--- a/third_party/mlir/lib/Support/MlirOptMain.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-//===- MlirOptMain.cpp - MLIR Optimizer Driver ----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is a utility that runs an optimization pass and prints the result back
-// out. It is designed to support unit testing.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/MlirOptMain.h"
-#include "mlir/Analysis/Passes.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Parser.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/ToolUtilities.h"
-#include "mlir/Transforms/Passes.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-using namespace llvm;
-using llvm::SMLoc;
-
-/// Perform the actions on the input file indicated by the command line flags
-/// within the specified context.
-///
-/// This typically parses the main source file, runs zero or more optimization
-/// passes, then prints the output.
-///
-static LogicalResult performActions(raw_ostream &os, bool verifyDiagnostics,
-                                    bool verifyPasses, SourceMgr &sourceMgr,
-                                    MLIRContext *context,
-                                    const PassPipelineCLParser &passPipeline) {
-  OwningModuleRef module(parseSourceFile(sourceMgr, context));
-  if (!module)
-    return failure();
-
-  // Apply any pass manager command line options.
-  PassManager pm(context, verifyPasses);
-  applyPassManagerCLOptions(pm);
-
-  // Build the provided pipeline.
-  if (failed(passPipeline.addToPipeline(pm)))
-    return failure();
-
-  // Run the pipeline.
-  if (failed(pm.run(*module)))
-    return failure();
-
-  // Print the output.
-  module->print(os);
-  return success();
-}
-
-/// Parses the memory buffer.  If successfully, run a series of passes against
-/// it and print the result.
-static LogicalResult processBuffer(raw_ostream &os,
-                                   std::unique_ptr<MemoryBuffer> ownedBuffer,
-                                   bool verifyDiagnostics, bool verifyPasses,
-                                   const PassPipelineCLParser &passPipeline) {
-  // Tell sourceMgr about this buffer, which is what the parser will pick up.
-  SourceMgr sourceMgr;
-  sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), SMLoc());
-
-  // Parse the input file.
-  MLIRContext context;
-
-  // If we are in verify diagnostics mode then we have a lot of work to do,
-  // otherwise just perform the actions without worrying about it.
-  if (!verifyDiagnostics) {
-    SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context);
-    return performActions(os, verifyDiagnostics, verifyPasses, sourceMgr,
-                          &context, passPipeline);
-  }
-
-  SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, &context);
-
-  // Do any processing requested by command line flags.  We don't care whether
-  // these actions succeed or fail, we only care what diagnostics they produce
-  // and whether they match our expectations.
-  performActions(os, verifyDiagnostics, verifyPasses, sourceMgr, &context,
-                 passPipeline);
-
-  // Verify the diagnostic handler to make sure that each of the diagnostics
-  // matched.
-  return sourceMgrHandler.verify();
-}
-
-LogicalResult mlir::MlirOptMain(raw_ostream &os,
-                                std::unique_ptr<MemoryBuffer> buffer,
-                                const PassPipelineCLParser &passPipeline,
-                                bool splitInputFile, bool verifyDiagnostics,
-                                bool verifyPasses) {
-  // The split-input-file mode is a very specific mode that slices the file
-  // up into small pieces and checks each independently.
-  if (splitInputFile)
-    return splitAndProcessBuffer(
-        std::move(buffer),
-        [&](std::unique_ptr<MemoryBuffer> chunkBuffer, raw_ostream &os) {
-          return processBuffer(os, std::move(chunkBuffer), verifyDiagnostics,
-                               verifyPasses, passPipeline);
-        },
-        os);
-
-  return processBuffer(os, std::move(buffer), verifyDiagnostics, verifyPasses,
-                       passPipeline);
-}
diff --git a/third_party/mlir/lib/Support/StorageUniquer.cpp b/third_party/mlir/lib/Support/StorageUniquer.cpp
deleted file mode 100644
index 3672c0f3759..00000000000
--- a/third_party/mlir/lib/Support/StorageUniquer.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-//===- StorageUniquer.cpp - Common Storage Class Uniquer ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Support/StorageUniquer.h"
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/Support/RWMutex.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-namespace mlir {
-namespace detail {
-/// This is the implementation of the StorageUniquer class.
-struct StorageUniquerImpl {
-  using BaseStorage = StorageUniquer::BaseStorage;
-  using StorageAllocator = StorageUniquer::StorageAllocator;
-
-  /// A lookup key for derived instances of storage objects.
-  struct LookupKey {
-    /// The known derived kind for the storage.
-    unsigned kind;
-
-    /// The known hash value of the key.
-    unsigned hashValue;
-
-    /// An equality function for comparing with an existing storage instance.
-    llvm::function_ref<bool(const BaseStorage *)> isEqual;
-  };
-
-  /// A utility wrapper object representing a hashed storage object. This class
-  /// contains a storage object and an existing computed hash value.
-  struct HashedStorage {
-    unsigned hashValue;
-    BaseStorage *storage;
-  };
-
-  /// Get or create an instance of a complex derived type.
-  BaseStorage *
-  getOrCreate(unsigned kind, unsigned hashValue,
-              llvm::function_ref<bool(const BaseStorage *)> isEqual,
-              llvm::function_ref<BaseStorage *(StorageAllocator &)> ctorFn) {
-    LookupKey lookupKey{kind, hashValue, isEqual};
-
-    // Check for an existing instance in read-only mode.
-    {
-      llvm::sys::SmartScopedReader<true> typeLock(mutex);
-      auto it = storageTypes.find_as(lookupKey);
-      if (it != storageTypes.end())
-        return it->storage;
-    }
-
-    // Acquire a writer-lock so that we can safely create the new type instance.
-    llvm::sys::SmartScopedWriter<true> typeLock(mutex);
-
-    // Check for an existing instance again here, because another writer thread
-    // may have already created one.
-    auto existing = storageTypes.insert_as({}, lookupKey);
-    if (!existing.second)
-      return existing.first->storage;
-
-    // Otherwise, construct and initialize the derived storage for this type
-    // instance.
-    BaseStorage *storage = initializeStorage(kind, ctorFn);
-    *existing.first = HashedStorage{hashValue, storage};
-    return storage;
-  }
-
-  /// Get or create an instance of a simple derived type.
-  BaseStorage *
-  getOrCreate(unsigned kind,
-              llvm::function_ref<BaseStorage *(StorageAllocator &)> ctorFn) {
-    // Check for an existing instance in read-only mode.
-    {
-      llvm::sys::SmartScopedReader<true> typeLock(mutex);
-      auto it = simpleTypes.find(kind);
-      if (it != simpleTypes.end())
-        return it->second;
-    }
-
-    // Acquire a writer-lock so that we can safely create the new type instance.
-    llvm::sys::SmartScopedWriter<true> typeLock(mutex);
-
-    // Check for an existing instance again here, because another writer thread
-    // may have already created one.
-    auto &result = simpleTypes[kind];
-    if (result)
-      return result;
-
-    // Otherwise, create and return a new storage instance.
-    return result = initializeStorage(kind, ctorFn);
-  }
-
-  /// Erase an instance of a complex derived type.
-  void erase(unsigned kind, unsigned hashValue,
-             llvm::function_ref<bool(const BaseStorage *)> isEqual,
-             llvm::function_ref<void(BaseStorage *)> cleanupFn) {
-    LookupKey lookupKey{kind, hashValue, isEqual};
-
-    // Acquire a writer-lock so that we can safely erase the type instance.
-    llvm::sys::SmartScopedWriter<true> typeLock(mutex);
-    auto existing = storageTypes.find_as(lookupKey);
-    if (existing == storageTypes.end())
-      return;
-
-    // Cleanup the storage and remove it from the map.
-    cleanupFn(existing->storage);
-    storageTypes.erase(existing);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Instance Storage
-  //===--------------------------------------------------------------------===//
-
-  /// Utility to create and initialize a storage instance.
-  BaseStorage *initializeStorage(
-      unsigned kind,
-      llvm::function_ref<BaseStorage *(StorageAllocator &)> ctorFn) {
-    BaseStorage *storage = ctorFn(allocator);
-    storage->kind = kind;
-    return storage;
-  }
-
-  /// Storage info for derived TypeStorage objects.
-  struct StorageKeyInfo : DenseMapInfo<HashedStorage> {
-    static HashedStorage getEmptyKey() {
-      return HashedStorage{0, DenseMapInfo<BaseStorage *>::getEmptyKey()};
-    }
-    static HashedStorage getTombstoneKey() {
-      return HashedStorage{0, DenseMapInfo<BaseStorage *>::getTombstoneKey()};
-    }
-
-    static unsigned getHashValue(const HashedStorage &key) {
-      return key.hashValue;
-    }
-    static unsigned getHashValue(LookupKey key) { return key.hashValue; }
-
-    static bool isEqual(const HashedStorage &lhs, const HashedStorage &rhs) {
-      return lhs.storage == rhs.storage;
-    }
-    static bool isEqual(const LookupKey &lhs, const HashedStorage &rhs) {
-      if (isEqual(rhs, getEmptyKey()) || isEqual(rhs, getTombstoneKey()))
-        return false;
-      // If the lookup kind matches the kind of the storage, then invoke the
-      // equality function on the lookup key.
-      return lhs.kind == rhs.storage->getKind() && lhs.isEqual(rhs.storage);
-    }
-  };
-
-  // Unique types with specific hashing or storage constraints.
-  using StorageTypeSet = llvm::DenseSet<HashedStorage, StorageKeyInfo>;
-  StorageTypeSet storageTypes;
-
-  // Unique types with just the kind.
-  llvm::DenseMap<unsigned, BaseStorage *> simpleTypes;
-
-  // Allocator to use when constructing derived type instances.
-  StorageUniquer::StorageAllocator allocator;
-
-  // A mutex to keep type uniquing thread-safe.
-  llvm::sys::SmartRWMutex<true> mutex;
-};
-} // end namespace detail
-} // namespace mlir
-
-StorageUniquer::StorageUniquer() : impl(new StorageUniquerImpl()) {}
-StorageUniquer::~StorageUniquer() {}
-
-/// Implementation for getting/creating an instance of a derived type with
-/// complex storage.
-auto StorageUniquer::getImpl(
-    unsigned kind, unsigned hashValue,
-    llvm::function_ref<bool(const BaseStorage *)> isEqual,
-    std::function<BaseStorage *(StorageAllocator &)> ctorFn) -> BaseStorage * {
-  return impl->getOrCreate(kind, hashValue, isEqual, ctorFn);
-}
-
-/// Implementation for getting/creating an instance of a derived type with
-/// default storage.
-auto StorageUniquer::getImpl(
-    unsigned kind, std::function<BaseStorage *(StorageAllocator &)> ctorFn)
-    -> BaseStorage * {
-  return impl->getOrCreate(kind, ctorFn);
-}
-
-/// Implementation for erasing an instance of a derived type with complex
-/// storage.
-void StorageUniquer::eraseImpl(
-    unsigned kind, unsigned hashValue,
-    llvm::function_ref<bool(const BaseStorage *)> isEqual,
-    std::function<void(BaseStorage *)> cleanupFn) {
-  impl->erase(kind, hashValue, isEqual, cleanupFn);
-}
diff --git a/third_party/mlir/lib/Support/ToolUtilities.cpp b/third_party/mlir/lib/Support/ToolUtilities.cpp
deleted file mode 100644
index 60d0eee6b8a..00000000000
--- a/third_party/mlir/lib/Support/ToolUtilities.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-//===- ToolUtilities.cpp - MLIR Tool Utilities ----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines common utilities for implementing MLIR tools.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/ToolUtilities.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-
-LogicalResult
-mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
-                            ChunkBufferHandler processChunkBuffer,
-                            raw_ostream &os) {
-  const char splitMarker[] = "// -----";
-
-  auto *origMemBuffer = originalBuffer.get();
-  SmallVector<StringRef, 8> sourceBuffers;
-  origMemBuffer->getBuffer().split(sourceBuffers, splitMarker);
-
-  // Add the original buffer to the source manager.
-  llvm::SourceMgr fileSourceMgr;
-  fileSourceMgr.AddNewSourceBuffer(std::move(originalBuffer), llvm::SMLoc());
-
-  // Process each chunk in turn.
-  bool hadFailure = false;
-  for (auto &subBuffer : sourceBuffers) {
-    auto splitLoc = llvm::SMLoc::getFromPointer(subBuffer.data());
-    unsigned splitLine = fileSourceMgr.getLineAndColumn(splitLoc).first;
-    auto subMemBuffer = llvm::MemoryBuffer::getMemBufferCopy(
-        subBuffer, origMemBuffer->getBufferIdentifier() +
-                       Twine(" split at line #") + Twine(splitLine));
-    if (failed(processChunkBuffer(std::move(subMemBuffer), os)))
-      hadFailure = true;
-  }
-
-  // If any fails, then return a failure of the tool.
-  return failure(hadFailure);
-}
diff --git a/third_party/mlir/lib/Support/TranslateClParser.cpp b/third_party/mlir/lib/Support/TranslateClParser.cpp
deleted file mode 100644
index dae0437813f..00000000000
--- a/third_party/mlir/lib/Support/TranslateClParser.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-//===- TranslateClParser.h - Translations command line parser -------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains custom command line parser for translations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/TranslateClParser.h"
-
-#include "mlir/Analysis/Verifier.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Parser.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Translation.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace mlir;
-
-// Storage for the translation function wrappers that survive the parser.
-static llvm::SmallVector<TranslateFunction, 16> wrapperStorage;
-
-static LogicalResult printMLIROutput(ModuleOp module, llvm::raw_ostream &os) {
-  if (failed(verify(module)))
-    return failure();
-  module.print(os);
-  return success();
-}
-
-TranslationParser::TranslationParser(llvm::cl::Option &opt)
-    : llvm::cl::parser<const TranslateFunction *>(opt) {
-  const auto &toMLIRRegistry = getTranslationToMLIRRegistry();
-  const auto &fromMLIRRegistry = getTranslationFromMLIRRegistry();
-  const auto &fileToFileRegistry = getTranslationRegistry();
-
-  // Reserve the required capacity upfront so that pointers are not
-  // invalidated on reallocation.
-  wrapperStorage.reserve(toMLIRRegistry.size() + fromMLIRRegistry.size() +
-                         fileToFileRegistry.size());
-  for (const auto &kv : toMLIRRegistry) {
-    TranslateSourceMgrToMLIRFunction function = kv.second;
-    TranslateFunction wrapper = [function](llvm::SourceMgr &sourceMgr,
-                                           llvm::raw_ostream &output,
-                                           MLIRContext *context) {
-      OwningModuleRef module = function(sourceMgr, context);
-      if (!module)
-        return failure();
-      return printMLIROutput(*module, output);
-    };
-    wrapperStorage.emplace_back(std::move(wrapper));
-
-    addLiteralOption(kv.first(), &wrapperStorage.back(), kv.first());
-  }
-
-  for (const auto &kv : fromMLIRRegistry) {
-    TranslateFromMLIRFunction function = kv.second;
-    TranslateFunction wrapper = [function](llvm::SourceMgr &sourceMgr,
-                                           llvm::raw_ostream &output,
-                                           MLIRContext *context) {
-      auto module = OwningModuleRef(parseSourceFile(sourceMgr, context));
-      if (!module)
-        return failure();
-      return function(module.get(), output);
-    };
-    wrapperStorage.emplace_back(std::move(wrapper));
-
-    addLiteralOption(kv.first(), &wrapperStorage.back(), kv.first());
-  }
-  for (const auto &kv : fileToFileRegistry) {
-    wrapperStorage.emplace_back(kv.second);
-    addLiteralOption(kv.first(), &wrapperStorage.back(), kv.first());
-  }
-}
-
-void TranslationParser::printOptionInfo(const llvm::cl::Option &O,
-                                        size_t GlobalWidth) const {
-  TranslationParser *TP = const_cast<TranslationParser *>(this);
-  llvm::array_pod_sort(TP->Values.begin(), TP->Values.end(),
-                       [](const TranslationParser::OptionInfo *VT1,
-                          const TranslationParser::OptionInfo *VT2) {
-                         return VT1->Name.compare(VT2->Name);
-                       });
-  using llvm::cl::parser;
-  parser<const TranslateFunction *>::printOptionInfo(O, GlobalWidth);
-}
diff --git a/third_party/mlir/lib/TableGen/Argument.cpp b/third_party/mlir/lib/TableGen/Argument.cpp
deleted file mode 100644
index 17dba054e4f..00000000000
--- a/third_party/mlir/lib/TableGen/Argument.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===- Argument.cpp - Argument definitions --------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/TableGen/Argument.h"
-#include "llvm/TableGen/Record.h"
-
-using namespace mlir;
-
-bool tblgen::NamedTypeConstraint::hasPredicate() const {
-  return !constraint.getPredicate().isNull();
-}
-
-bool tblgen::NamedTypeConstraint::isVariadic() const {
-  return constraint.isVariadic();
-}
diff --git a/third_party/mlir/lib/TableGen/Attribute.cpp b/third_party/mlir/lib/TableGen/Attribute.cpp
deleted file mode 100644
index ec946a855fc..00000000000
--- a/third_party/mlir/lib/TableGen/Attribute.cpp
+++ /dev/null
@@ -1,274 +0,0 @@
-//===- Attribute.cpp - Attribute wrapper class ----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Attribute wrapper to simplify using TableGen Record defining a MLIR
-// Attribute.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Format.h"
-#include "mlir/TableGen/Operator.h"
-#include "llvm/TableGen/Record.h"
-
-using namespace mlir;
-
-using llvm::CodeInit;
-using llvm::DefInit;
-using llvm::Init;
-using llvm::Record;
-using llvm::StringInit;
-
-// Returns the initializer's value as string if the given TableGen initializer
-// is a code or string initializer. Returns the empty StringRef otherwise.
-static StringRef getValueAsString(const Init *init) {
-  if (const auto *code = dyn_cast<CodeInit>(init))
-    return code->getValue().trim();
-  else if (const auto *str = dyn_cast<StringInit>(init))
-    return str->getValue().trim();
-  return {};
-}
-
-tblgen::AttrConstraint::AttrConstraint(const Record *record)
-    : Constraint(Constraint::CK_Attr, record) {
-  assert(isSubClassOf("AttrConstraint") &&
-         "must be subclass of TableGen 'AttrConstraint' class");
-}
-
-bool tblgen::AttrConstraint::isSubClassOf(StringRef className) const {
-  return def->isSubClassOf(className);
-}
-
-tblgen::Attribute::Attribute(const Record *record) : AttrConstraint(record) {
-  assert(record->isSubClassOf("Attr") &&
-         "must be subclass of TableGen 'Attr' class");
-}
-
-tblgen::Attribute::Attribute(const DefInit *init) : Attribute(init->getDef()) {}
-
-bool tblgen::Attribute::isDerivedAttr() const {
-  return isSubClassOf("DerivedAttr");
-}
-
-bool tblgen::Attribute::isTypeAttr() const {
-  return isSubClassOf("TypeAttrBase");
-}
-
-bool tblgen::Attribute::isEnumAttr() const {
-  return isSubClassOf("EnumAttrInfo");
-}
-
-StringRef tblgen::Attribute::getStorageType() const {
-  const auto *init = def->getValueInit("storageType");
-  auto type = getValueAsString(init);
-  if (type.empty())
-    return "Attribute";
-  return type;
-}
-
-StringRef tblgen::Attribute::getReturnType() const {
-  const auto *init = def->getValueInit("returnType");
-  return getValueAsString(init);
-}
-
-StringRef tblgen::Attribute::getConvertFromStorageCall() const {
-  const auto *init = def->getValueInit("convertFromStorage");
-  return getValueAsString(init);
-}
-
-bool tblgen::Attribute::isConstBuildable() const {
-  const auto *init = def->getValueInit("constBuilderCall");
-  return !getValueAsString(init).empty();
-}
-
-StringRef tblgen::Attribute::getConstBuilderTemplate() const {
-  const auto *init = def->getValueInit("constBuilderCall");
-  return getValueAsString(init);
-}
-
-tblgen::Attribute tblgen::Attribute::getBaseAttr() const {
-  if (const auto *defInit =
-          llvm::dyn_cast<llvm::DefInit>(def->getValueInit("baseAttr"))) {
-    return Attribute(defInit).getBaseAttr();
-  }
-  return *this;
-}
-
-bool tblgen::Attribute::hasDefaultValue() const {
-  const auto *init = def->getValueInit("defaultValue");
-  return !getValueAsString(init).empty();
-}
-
-StringRef tblgen::Attribute::getDefaultValue() const {
-  const auto *init = def->getValueInit("defaultValue");
-  return getValueAsString(init);
-}
-
-bool tblgen::Attribute::isOptional() const {
-  return def->getValueAsBit("isOptional");
-}
-
-StringRef tblgen::Attribute::getAttrDefName() const {
-  if (def->isAnonymous()) {
-    return getBaseAttr().def->getName();
-  }
-  return def->getName();
-}
-
-StringRef tblgen::Attribute::getDerivedCodeBody() const {
-  assert(isDerivedAttr() && "only derived attribute has 'body' field");
-  return def->getValueAsString("body");
-}
-
-tblgen::ConstantAttr::ConstantAttr(const DefInit *init) : def(init->getDef()) {
-  assert(def->isSubClassOf("ConstantAttr") &&
-         "must be subclass of TableGen 'ConstantAttr' class");
-}
-
-tblgen::Attribute tblgen::ConstantAttr::getAttribute() const {
-  return Attribute(def->getValueAsDef("attr"));
-}
-
-StringRef tblgen::ConstantAttr::getConstantValue() const {
-  return def->getValueAsString("value");
-}
-
-tblgen::EnumAttrCase::EnumAttrCase(const llvm::DefInit *init)
-    : Attribute(init) {
-  assert(isSubClassOf("EnumAttrCaseInfo") &&
-         "must be subclass of TableGen 'EnumAttrInfo' class");
-}
-
-bool tblgen::EnumAttrCase::isStrCase() const {
-  return isSubClassOf("StrEnumAttrCase");
-}
-
-StringRef tblgen::EnumAttrCase::getSymbol() const {
-  return def->getValueAsString("symbol");
-}
-
-int64_t tblgen::EnumAttrCase::getValue() const {
-  return def->getValueAsInt("value");
-}
-
-tblgen::EnumAttr::EnumAttr(const llvm::Record *record) : Attribute(record) {
-  assert(isSubClassOf("EnumAttrInfo") &&
-         "must be subclass of TableGen 'EnumAttr' class");
-}
-
-tblgen::EnumAttr::EnumAttr(const llvm::Record &record) : Attribute(&record) {}
-
-tblgen::EnumAttr::EnumAttr(const llvm::DefInit *init)
-    : EnumAttr(init->getDef()) {}
-
-bool tblgen::EnumAttr::isBitEnum() const { return isSubClassOf("BitEnumAttr"); }
-
-StringRef tblgen::EnumAttr::getEnumClassName() const {
-  return def->getValueAsString("className");
-}
-
-StringRef tblgen::EnumAttr::getCppNamespace() const {
-  return def->getValueAsString("cppNamespace");
-}
-
-StringRef tblgen::EnumAttr::getUnderlyingType() const {
-  return def->getValueAsString("underlyingType");
-}
-
-StringRef tblgen::EnumAttr::getUnderlyingToSymbolFnName() const {
-  return def->getValueAsString("underlyingToSymbolFnName");
-}
-
-StringRef tblgen::EnumAttr::getStringToSymbolFnName() const {
-  return def->getValueAsString("stringToSymbolFnName");
-}
-
-StringRef tblgen::EnumAttr::getSymbolToStringFnName() const {
-  return def->getValueAsString("symbolToStringFnName");
-}
-
-StringRef tblgen::EnumAttr::getSymbolToStringFnRetType() const {
-  return def->getValueAsString("symbolToStringFnRetType");
-}
-
-StringRef tblgen::EnumAttr::getMaxEnumValFnName() const {
-  return def->getValueAsString("maxEnumValFnName");
-}
-
-std::vector<tblgen::EnumAttrCase> tblgen::EnumAttr::getAllCases() const {
-  const auto *inits = def->getValueAsListInit("enumerants");
-
-  std::vector<tblgen::EnumAttrCase> cases;
-  cases.reserve(inits->size());
-
-  for (const llvm::Init *init : *inits) {
-    cases.push_back(tblgen::EnumAttrCase(cast<llvm::DefInit>(init)));
-  }
-
-  return cases;
-}
-
-tblgen::StructFieldAttr::StructFieldAttr(const llvm::Record *record)
-    : def(record) {
-  assert(def->isSubClassOf("StructFieldAttr") &&
-         "must be subclass of TableGen 'StructFieldAttr' class");
-}
-
-tblgen::StructFieldAttr::StructFieldAttr(const llvm::Record &record)
-    : StructFieldAttr(&record) {}
-
-tblgen::StructFieldAttr::StructFieldAttr(const llvm::DefInit *init)
-    : StructFieldAttr(init->getDef()) {}
-
-StringRef tblgen::StructFieldAttr::getName() const {
-  return def->getValueAsString("name");
-}
-
-tblgen::Attribute tblgen::StructFieldAttr::getType() const {
-  auto init = def->getValueInit("type");
-  return tblgen::Attribute(cast<llvm::DefInit>(init));
-}
-
-tblgen::StructAttr::StructAttr(const llvm::Record *record) : Attribute(record) {
-  assert(isSubClassOf("StructAttr") &&
-         "must be subclass of TableGen 'StructAttr' class");
-}
-
-tblgen::StructAttr::StructAttr(const llvm::DefInit *init)
-    : StructAttr(init->getDef()) {}
-
-StringRef tblgen::StructAttr::getStructClassName() const {
-  return def->getValueAsString("className");
-}
-
-StringRef tblgen::StructAttr::getCppNamespace() const {
-  Dialect dialect(def->getValueAsDef("structDialect"));
-  return dialect.getCppNamespace();
-}
-
-std::vector<mlir::tblgen::StructFieldAttr>
-tblgen::StructAttr::getAllFields() const {
-  std::vector<mlir::tblgen::StructFieldAttr> attributes;
-
-  const auto *inits = def->getValueAsListInit("fields");
-  attributes.reserve(inits->size());
-
-  for (const llvm::Init *init : *inits) {
-    attributes.emplace_back(cast<llvm::DefInit>(init));
-  }
-
-  return attributes;
-}
diff --git a/third_party/mlir/lib/TableGen/CMakeLists.txt b/third_party/mlir/lib/TableGen/CMakeLists.txt
deleted file mode 100644
index 89e0f024664..00000000000
--- a/third_party/mlir/lib/TableGen/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-add_llvm_library(LLVMMLIRTableGen
-  Argument.cpp
-  Attribute.cpp
-  Constraint.cpp
-  Dialect.cpp
-  Format.cpp
-  Operator.cpp
-  OpInterfaces.cpp
-  OpTrait.cpp
-  Pattern.cpp
-  Predicate.cpp
-  Type.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/TableGen
-  )
-target_link_libraries(LLVMMLIRTableGen LLVMSupport LLVMTableGen)
diff --git a/third_party/mlir/lib/TableGen/Constraint.cpp b/third_party/mlir/lib/TableGen/Constraint.cpp
deleted file mode 100644
index ef3fa5271fa..00000000000
--- a/third_party/mlir/lib/TableGen/Constraint.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//===- Constraint.cpp - Constraint class ----------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Constraint wrapper to simplify using TableGen Record for constraints.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Constraint.h"
-#include "llvm/TableGen/Record.h"
-
-using namespace mlir::tblgen;
-
-Constraint::Constraint(const llvm::Record *record)
-    : def(record), kind(CK_Uncategorized) {
-  if (record->isSubClassOf("TypeConstraint")) {
-    kind = CK_Type;
-  } else if (record->isSubClassOf("AttrConstraint")) {
-    kind = CK_Attr;
-  } else if (record->isSubClassOf("RegionConstraint")) {
-    kind = CK_Region;
-  } else {
-    assert(record->isSubClassOf("Constraint"));
-  }
-}
-
-Constraint::Constraint(Kind kind, const llvm::Record *record)
-    : def(record), kind(kind) {}
-
-Pred Constraint::getPredicate() const {
-  auto *val = def->getValue("predicate");
-
-  // If no predicate is specified, then return the null predicate (which
-  // corresponds to true).
-  if (!val)
-    return Pred();
-
-  const auto *pred = dyn_cast<llvm::DefInit>(val->getValue());
-  return Pred(pred);
-}
-
-std::string Constraint::getConditionTemplate() const {
-  return getPredicate().getCondition();
-}
-
-llvm::StringRef Constraint::getDescription() const {
-  auto doc = def->getValueAsString("description");
-  if (doc.empty())
-    return def->getName();
-  return doc;
-}
-
-AppliedConstraint::AppliedConstraint(Constraint &&constraint,
-                                     llvm::StringRef self,
-                                     std::vector<std::string> &&entities)
-    : constraint(constraint), self(self), entities(std::move(entities)) {}
diff --git a/third_party/mlir/lib/TableGen/Dialect.cpp b/third_party/mlir/lib/TableGen/Dialect.cpp
deleted file mode 100644
index ace4ce3d0f6..00000000000
--- a/third_party/mlir/lib/TableGen/Dialect.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===- Dialect.cpp - Dialect wrapper class --------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Dialect wrapper to simplify using TableGen Record defining a MLIR dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Dialect.h"
-#include "llvm/TableGen/Record.h"
-
-namespace mlir {
-namespace tblgen {
-
-StringRef tblgen::Dialect::getName() const {
-  return def->getValueAsString("name");
-}
-
-StringRef tblgen::Dialect::getCppNamespace() const {
-  return def->getValueAsString("cppNamespace");
-}
-
-static StringRef getAsStringOrEmpty(const llvm::Record &record,
-                                    StringRef fieldName) {
-  if (auto valueInit = record.getValueInit(fieldName)) {
-    if (llvm::isa<llvm::CodeInit>(valueInit) ||
-        llvm::isa<llvm::StringInit>(valueInit))
-      return record.getValueAsString(fieldName);
-  }
-  return "";
-}
-
-StringRef tblgen::Dialect::getSummary() const {
-  return getAsStringOrEmpty(*def, "summary");
-}
-
-StringRef tblgen::Dialect::getDescription() const {
-  return getAsStringOrEmpty(*def, "description");
-}
-
-bool Dialect::operator==(const Dialect &other) const {
-  return def == other.def;
-}
-
-bool Dialect::operator<(const Dialect &other) const {
-  return getName() < other.getName();
-}
-
-} // end namespace tblgen
-} // end namespace mlir
diff --git a/third_party/mlir/lib/TableGen/Format.cpp b/third_party/mlir/lib/TableGen/Format.cpp
deleted file mode 100644
index 967d51a61f7..00000000000
--- a/third_party/mlir/lib/TableGen/Format.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-//===- Format.cpp - Utilities for String Format ---------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines utilities for formatting strings. They are specially
-// tailored to the needs of TableGen'ing op definitions and rewrite rules,
-// so they are not expected to be used as widely applicable utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Format.h"
-#include <cctype>
-
-using namespace mlir;
-using namespace mlir::tblgen;
-
-// Marker to indicate an error happened when replacing a placeholder.
-const char *const kMarkerForNoSubst = "<no-subst-found>";
-
-FmtContext &tblgen::FmtContext::addSubst(StringRef placeholder, Twine subst) {
-  customSubstMap[placeholder] = subst.str();
-  return *this;
-}
-
-FmtContext &tblgen::FmtContext::withBuilder(Twine subst) {
-  builtinSubstMap[PHKind::Builder] = subst.str();
-  return *this;
-}
-
-FmtContext &tblgen::FmtContext::withOp(Twine subst) {
-  builtinSubstMap[PHKind::Op] = subst.str();
-  return *this;
-}
-
-FmtContext &tblgen::FmtContext::withSelf(Twine subst) {
-  builtinSubstMap[PHKind::Self] = subst.str();
-  return *this;
-}
-
-Optional<StringRef>
-tblgen::FmtContext::getSubstFor(FmtContext::PHKind placeholder) const {
-  if (placeholder == FmtContext::PHKind::None ||
-      placeholder == FmtContext::PHKind::Custom)
-    return {};
-  auto it = builtinSubstMap.find(placeholder);
-  if (it == builtinSubstMap.end())
-    return {};
-  return StringRef(it->second);
-}
-
-Optional<StringRef>
-tblgen::FmtContext::getSubstFor(StringRef placeholder) const {
-  auto it = customSubstMap.find(placeholder);
-  if (it == customSubstMap.end())
-    return {};
-  return StringRef(it->second);
-}
-
-FmtContext::PHKind tblgen::FmtContext::getPlaceHolderKind(StringRef str) {
-  return llvm::StringSwitch<FmtContext::PHKind>(str)
-      .Case("_builder", FmtContext::PHKind::Builder)
-      .Case("_op", FmtContext::PHKind::Op)
-      .Case("_self", FmtContext::PHKind::Self)
-      .Case("", FmtContext::PHKind::None)
-      .Default(FmtContext::PHKind::Custom);
-}
-
-std::pair<FmtReplacement, StringRef>
-tblgen::FmtObjectBase::splitFmtSegment(StringRef fmt) {
-  size_t begin = fmt.find_first_of('$');
-  if (begin == StringRef::npos) {
-    // No placeholders: the whole format string should be returned as a
-    // literal string.
-    return {FmtReplacement{fmt}, StringRef()};
-  }
-  if (begin != 0) {
-    // The first placeholder is not at the beginning: we can split the format
-    // string into a literal string and the rest.
-    return {FmtReplacement{fmt.substr(0, begin)}, fmt.substr(begin)};
-  }
-
-  // The first placeholder is at the beginning
-
-  if (fmt.size() == 1) {
-    // The whole format string just contains '$': treat as literal.
-    return {FmtReplacement{fmt}, StringRef()};
-  }
-
-  // Allow escaping dollar with '$$'
-  if (fmt[1] == '$') {
-    return {FmtReplacement{fmt.substr(0, 1)}, fmt.substr(2)};
-  }
-
-  // First try to see if it's a positional placeholder, and then handle special
-  // placeholders.
-
-  size_t end = fmt.find_if_not([](char c) { return std::isdigit(c); }, 1);
-  if (end != 1) {
-    // We have a positional placeholder. Parse the index.
-    size_t index = 0;
-    if (fmt.substr(1, end - 1).consumeInteger(0, index)) {
-      llvm_unreachable("invalid replacement sequence index");
-    }
-
-    if (end == StringRef::npos) {
-      // All the remaining characters are part of the positional placeholder.
-      return {FmtReplacement{fmt, index}, StringRef()};
-    }
-    return {FmtReplacement{fmt.substr(0, end), index}, fmt.substr(end)};
-  }
-
-  end = fmt.find_if_not([](char c) { return std::isalnum(c) || c == '_'; }, 1);
-  auto placeholder = FmtContext::getPlaceHolderKind(fmt.substr(1, end - 1));
-  if (end == StringRef::npos) {
-    // All the remaining characters are part of the special placeholder.
-    return {FmtReplacement{fmt, placeholder}, StringRef()};
-  }
-  return {FmtReplacement{fmt.substr(0, end), placeholder}, fmt.substr(end)};
-}
-
-std::vector<FmtReplacement> FmtObjectBase::parseFormatString(StringRef fmt) {
-  std::vector<FmtReplacement> replacements;
-  FmtReplacement repl;
-  while (!fmt.empty()) {
-    std::tie(repl, fmt) = splitFmtSegment(fmt);
-    if (repl.type != FmtReplacement::Type::Empty)
-      replacements.push_back(repl);
-  }
-  return replacements;
-}
-
-void FmtObjectBase::format(raw_ostream &s) const {
-  for (auto &repl : replacements) {
-    if (repl.type == FmtReplacement::Type::Empty)
-      continue;
-
-    if (repl.type == FmtReplacement::Type::Literal) {
-      s << repl.spec;
-      continue;
-    }
-
-    if (repl.type == FmtReplacement::Type::SpecialPH) {
-      if (repl.placeholder == FmtContext::PHKind::None) {
-        s << repl.spec;
-      } else if (!context) {
-        // We need the context to replace special placeholders.
-        s << repl.spec << kMarkerForNoSubst;
-      } else {
-        Optional<StringRef> subst;
-        if (repl.placeholder == FmtContext::PHKind::Custom) {
-          // Skip the leading '$' sign for the custom placeholder
-          subst = context->getSubstFor(repl.spec.substr(1));
-        } else {
-          subst = context->getSubstFor(repl.placeholder);
-        }
-        if (subst)
-          s << *subst;
-        else
-          s << repl.spec << kMarkerForNoSubst;
-      }
-      continue;
-    }
-
-    assert(repl.type == FmtReplacement::Type::PositionalPH);
-
-    if (repl.index >= adapters.size()) {
-      s << repl.spec << kMarkerForNoSubst;
-      continue;
-    }
-    adapters[repl.index]->format(s, /*Options=*/"");
-  }
-}
diff --git a/third_party/mlir/lib/TableGen/OpInterfaces.cpp b/third_party/mlir/lib/TableGen/OpInterfaces.cpp
deleted file mode 100644
index e4e80e06676..00000000000
--- a/third_party/mlir/lib/TableGen/OpInterfaces.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===- OpInterfaces.cpp - OpInterfaces class ------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// OpInterfaces wrapper to simplify using TableGen OpInterfaces.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/OpInterfaces.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-
-using namespace mlir;
-using namespace mlir::tblgen;
-
-OpInterfaceMethod::OpInterfaceMethod(const llvm::Record *def) : def(def) {
-  llvm::DagInit *args = def->getValueAsDag("arguments");
-  for (unsigned i = 0, e = args->getNumArgs(); i != e; ++i) {
-    arguments.push_back(
-        {llvm::cast<llvm::StringInit>(args->getArg(i))->getValue(),
-         args->getArgNameStr(i)});
-  }
-}
-
-StringRef OpInterfaceMethod::getReturnType() const {
-  return def->getValueAsString("returnType");
-}
-
-// Return the name of this method.
-StringRef OpInterfaceMethod::getName() const {
-  return def->getValueAsString("name");
-}
-
-// Return if this method is static.
-bool OpInterfaceMethod::isStatic() const {
-  return def->isSubClassOf("StaticInterfaceMethod");
-}
-
-// Return the body for this method if it has one.
-llvm::Optional<StringRef> OpInterfaceMethod::getBody() const {
-  auto value = def->getValueAsString("body");
-  return value.empty() ? llvm::Optional<StringRef>() : value;
-}
-
-// Return the description of this method if it has one.
-llvm::Optional<StringRef> OpInterfaceMethod::getDescription() const {
-  auto value = def->getValueAsString("description");
-  return value.empty() ? llvm::Optional<StringRef>() : value;
-}
-
-ArrayRef<OpInterfaceMethod::Argument> OpInterfaceMethod::getArguments() const {
-  return arguments;
-}
-
-bool OpInterfaceMethod::arg_empty() const { return arguments.empty(); }
-
-OpInterface::OpInterface(const llvm::Record *def) : def(def) {
-  auto *listInit = dyn_cast<llvm::ListInit>(def->getValueInit("methods"));
-  for (llvm::Init *init : listInit->getValues())
-    methods.emplace_back(cast<llvm::DefInit>(init)->getDef());
-}
-
-// Return the name of this interface.
-StringRef OpInterface::getName() const {
-  return def->getValueAsString("cppClassName");
-}
-
-// Return the methods of this interface.
-ArrayRef<OpInterfaceMethod> OpInterface::getMethods() const { return methods; }
-
-// Return the description of this method if it has one.
-llvm::Optional<StringRef> OpInterface::getDescription() const {
-  auto value = def->getValueAsString("description");
-  return value.empty() ? llvm::Optional<StringRef>() : value;
-}
diff --git a/third_party/mlir/lib/TableGen/OpTrait.cpp b/third_party/mlir/lib/TableGen/OpTrait.cpp
deleted file mode 100644
index 0e436a87497..00000000000
--- a/third_party/mlir/lib/TableGen/OpTrait.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===- OpTrait.cpp - OpTrait class ----------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// OpTrait wrapper to simplify using TableGen Record defining a MLIR OpTrait.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/OpTrait.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/TableGen/OpInterfaces.h"
-#include "mlir/TableGen/Predicate.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-
-using namespace mlir;
-using namespace mlir::tblgen;
-
-OpTrait OpTrait::create(const llvm::Init *init) {
-  auto def = cast<llvm::DefInit>(init)->getDef();
-  if (def->isSubClassOf("PredOpTrait"))
-    return OpTrait(Kind::Pred, def);
-  if (def->isSubClassOf("GenInternalOpTrait"))
-    return OpTrait(Kind::Internal, def);
-  if (def->isSubClassOf("OpInterface"))
-    return OpTrait(Kind::Interface, def);
-  assert(def->isSubClassOf("NativeOpTrait"));
-  return OpTrait(Kind::Native, def);
-}
-
-OpTrait::OpTrait(Kind kind, const llvm::Record *def) : def(def), kind(kind) {}
-
-llvm::StringRef NativeOpTrait::getTrait() const {
-  return def->getValueAsString("trait");
-}
-
-llvm::StringRef InternalOpTrait::getTrait() const {
-  return def->getValueAsString("trait");
-}
-
-std::string PredOpTrait::getPredTemplate() const {
-  auto pred = tblgen::Pred(def->getValueInit("predicate"));
-  return pred.getCondition();
-}
-
-llvm::StringRef PredOpTrait::getDescription() const {
-  return def->getValueAsString("description");
-}
-
-OpInterface InterfaceOpTrait::getOpInterface() const {
-  return OpInterface(def);
-}
-
-llvm::StringRef InterfaceOpTrait::getTrait() const {
-  return def->getValueAsString("trait");
-}
-
-bool InterfaceOpTrait::shouldDeclareMethods() const {
-  return def->isSubClassOf("DeclareOpInterfaceMethods");
-}
diff --git a/third_party/mlir/lib/TableGen/Operator.cpp b/third_party/mlir/lib/TableGen/Operator.cpp
deleted file mode 100644
index 4529208a39c..00000000000
--- a/third_party/mlir/lib/TableGen/Operator.cpp
+++ /dev/null
@@ -1,349 +0,0 @@
-//===- Operator.cpp - Operator class --------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Operator wrapper to simplify using TableGen Record defining a MLIR Op.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Operator.h"
-#include "mlir/TableGen/OpTrait.h"
-#include "mlir/TableGen/Predicate.h"
-#include "mlir/TableGen/Type.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-
-#define DEBUG_TYPE "mlir-tblgen-operator"
-
-using namespace mlir;
-
-using llvm::DagInit;
-using llvm::DefInit;
-using llvm::Record;
-
-tblgen::Operator::Operator(const llvm::Record &def)
-    : dialect(def.getValueAsDef("opDialect")), def(def) {
-  // The first `_` in the op's TableGen def name is treated as separating the
-  // dialect prefix and the op class name. The dialect prefix will be ignored if
-  // not empty. Otherwise, if def name starts with a `_`, the `_` is considered
-  // as part of the class name.
-  StringRef prefix;
-  std::tie(prefix, cppClassName) = def.getName().split('_');
-  if (prefix.empty()) {
-    // Class name with a leading underscore and without dialect prefix
-    cppClassName = def.getName();
-  } else if (cppClassName.empty()) {
-    // Class name without dialect prefix
-    cppClassName = prefix;
-  }
-
-  populateOpStructure();
-}
-
-std::string tblgen::Operator::getOperationName() const {
-  auto prefix = dialect.getName();
-  auto opName = def.getValueAsString("opName");
-  if (prefix.empty())
-    return opName;
-  return llvm::formatv("{0}.{1}", prefix, opName);
-}
-
-StringRef tblgen::Operator::getDialectName() const { return dialect.getName(); }
-
-StringRef tblgen::Operator::getCppClassName() const { return cppClassName; }
-
-std::string tblgen::Operator::getQualCppClassName() const {
-  auto prefix = dialect.getCppNamespace();
-  if (prefix.empty())
-    return cppClassName;
-  return llvm::formatv("{0}::{1}", prefix, cppClassName);
-}
-
-int tblgen::Operator::getNumResults() const {
-  DagInit *results = def.getValueAsDag("results");
-  return results->getNumArgs();
-}
-
-StringRef tblgen::Operator::getExtraClassDeclaration() const {
-  constexpr auto attr = "extraClassDeclaration";
-  if (def.isValueUnset(attr))
-    return {};
-  return def.getValueAsString(attr);
-}
-
-const llvm::Record &tblgen::Operator::getDef() const { return def; }
-
-bool tblgen::Operator::isVariadic() const {
-  return getNumVariadicOperands() != 0 || getNumVariadicResults() != 0;
-}
-
-bool tblgen::Operator::skipDefaultBuilders() const {
-  return def.getValueAsBit("skipDefaultBuilders");
-}
-
-auto tblgen::Operator::result_begin() -> value_iterator {
-  return results.begin();
-}
-
-auto tblgen::Operator::result_end() -> value_iterator { return results.end(); }
-
-auto tblgen::Operator::getResults() -> value_range {
-  return {result_begin(), result_end()};
-}
-
-tblgen::TypeConstraint
-tblgen::Operator::getResultTypeConstraint(int index) const {
-  DagInit *results = def.getValueAsDag("results");
-  return TypeConstraint(cast<DefInit>(results->getArg(index)));
-}
-
-StringRef tblgen::Operator::getResultName(int index) const {
-  DagInit *results = def.getValueAsDag("results");
-  return results->getArgNameStr(index);
-}
-
-unsigned tblgen::Operator::getNumVariadicResults() const {
-  return std::count_if(
-      results.begin(), results.end(),
-      [](const NamedTypeConstraint &c) { return c.constraint.isVariadic(); });
-}
-
-unsigned tblgen::Operator::getNumVariadicOperands() const {
-  return std::count_if(
-      operands.begin(), operands.end(),
-      [](const NamedTypeConstraint &c) { return c.constraint.isVariadic(); });
-}
-
-tblgen::Operator::arg_iterator tblgen::Operator::arg_begin() const {
-  return arguments.begin();
-}
-
-tblgen::Operator::arg_iterator tblgen::Operator::arg_end() const {
-  return arguments.end();
-}
-
-tblgen::Operator::arg_range tblgen::Operator::getArgs() const {
-  return {arg_begin(), arg_end()};
-}
-
-StringRef tblgen::Operator::getArgName(int index) const {
-  DagInit *argumentValues = def.getValueAsDag("arguments");
-  return argumentValues->getArgName(index)->getValue();
-}
-
-const tblgen::OpTrait *tblgen::Operator::getTrait(StringRef trait) const {
-  for (const auto &t : traits) {
-    if (auto opTrait = dyn_cast<tblgen::NativeOpTrait>(&t)) {
-      if (opTrait->getTrait() == trait)
-        return opTrait;
-    } else if (auto opTrait = dyn_cast<tblgen::InternalOpTrait>(&t)) {
-      if (opTrait->getTrait() == trait)
-        return opTrait;
-    } else if (auto opTrait = dyn_cast<tblgen::InterfaceOpTrait>(&t)) {
-      if (opTrait->getTrait() == trait)
-        return opTrait;
-    }
-  }
-  return nullptr;
-}
-
-unsigned tblgen::Operator::getNumRegions() const { return regions.size(); }
-
-const tblgen::NamedRegion &tblgen::Operator::getRegion(unsigned index) const {
-  return regions[index];
-}
-
-auto tblgen::Operator::trait_begin() const -> const_trait_iterator {
-  return traits.begin();
-}
-auto tblgen::Operator::trait_end() const -> const_trait_iterator {
-  return traits.end();
-}
-auto tblgen::Operator::getTraits() const
-    -> llvm::iterator_range<const_trait_iterator> {
-  return {trait_begin(), trait_end()};
-}
-
-auto tblgen::Operator::attribute_begin() const -> attribute_iterator {
-  return attributes.begin();
-}
-auto tblgen::Operator::attribute_end() const -> attribute_iterator {
-  return attributes.end();
-}
-auto tblgen::Operator::getAttributes() const
-    -> llvm::iterator_range<attribute_iterator> {
-  return {attribute_begin(), attribute_end()};
-}
-
-auto tblgen::Operator::operand_begin() -> value_iterator {
-  return operands.begin();
-}
-auto tblgen::Operator::operand_end() -> value_iterator {
-  return operands.end();
-}
-auto tblgen::Operator::getOperands() -> value_range {
-  return {operand_begin(), operand_end()};
-}
-
-auto tblgen::Operator::getArg(int index) const -> Argument {
-  return arguments[index];
-}
-
-void tblgen::Operator::populateOpStructure() {
-  auto &recordKeeper = def.getRecords();
-  auto typeConstraintClass = recordKeeper.getClass("TypeConstraint");
-  auto attrClass = recordKeeper.getClass("Attr");
-  auto derivedAttrClass = recordKeeper.getClass("DerivedAttr");
-  numNativeAttributes = 0;
-
-  DagInit *argumentValues = def.getValueAsDag("arguments");
-  unsigned numArgs = argumentValues->getNumArgs();
-
-  // Handle operands and native attributes.
-  for (unsigned i = 0; i != numArgs; ++i) {
-    auto arg = argumentValues->getArg(i);
-    auto givenName = argumentValues->getArgNameStr(i);
-    auto argDefInit = dyn_cast<DefInit>(arg);
-    if (!argDefInit)
-      PrintFatalError(def.getLoc(),
-                      Twine("undefined type for argument #") + Twine(i));
-    Record *argDef = argDefInit->getDef();
-
-    if (argDef->isSubClassOf(typeConstraintClass)) {
-      operands.push_back(
-          NamedTypeConstraint{givenName, TypeConstraint(argDefInit)});
-    } else if (argDef->isSubClassOf(attrClass)) {
-      if (givenName.empty())
-        PrintFatalError(argDef->getLoc(), "attributes must be named");
-      if (argDef->isSubClassOf(derivedAttrClass))
-        PrintFatalError(argDef->getLoc(),
-                        "derived attributes not allowed in argument list");
-      attributes.push_back({givenName, Attribute(argDef)});
-      ++numNativeAttributes;
-    } else {
-      PrintFatalError(def.getLoc(), "unexpected def type; only defs deriving "
-                                    "from TypeConstraint or Attr are allowed");
-    }
-  }
-
-  // Handle derived attributes.
-  for (const auto &val : def.getValues()) {
-    if (auto *record = dyn_cast<llvm::RecordRecTy>(val.getType())) {
-      if (!record->isSubClassOf(attrClass))
-        continue;
-      if (!record->isSubClassOf(derivedAttrClass))
-        PrintFatalError(def.getLoc(),
-                        "unexpected Attr where only DerivedAttr is allowed");
-
-      if (record->getClasses().size() != 1) {
-        PrintFatalError(
-            def.getLoc(),
-            "unsupported attribute modelling, only single class expected");
-      }
-      attributes.push_back(
-          {cast<llvm::StringInit>(val.getNameInit())->getValue(),
-           Attribute(cast<DefInit>(val.getValue()))});
-    }
-  }
-
-  // Populate `arguments`. This must happen after we've finalized `operands` and
-  // `attributes` because we will put their elements' pointers in `arguments`.
-  // SmallVector may perform re-allocation under the hood when adding new
-  // elements.
-  int operandIndex = 0, attrIndex = 0;
-  for (unsigned i = 0; i != numArgs; ++i) {
-    Record *argDef = dyn_cast<DefInit>(argumentValues->getArg(i))->getDef();
-
-    if (argDef->isSubClassOf(typeConstraintClass)) {
-      arguments.emplace_back(&operands[operandIndex++]);
-    } else {
-      assert(argDef->isSubClassOf(attrClass));
-      arguments.emplace_back(&attributes[attrIndex++]);
-    }
-  }
-
-  auto *resultsDag = def.getValueAsDag("results");
-  auto *outsOp = dyn_cast<DefInit>(resultsDag->getOperator());
-  if (!outsOp || outsOp->getDef()->getName() != "outs") {
-    PrintFatalError(def.getLoc(), "'results' must have 'outs' directive");
-  }
-
-  // Handle results.
-  for (unsigned i = 0, e = resultsDag->getNumArgs(); i < e; ++i) {
-    auto name = resultsDag->getArgNameStr(i);
-    auto *resultDef = dyn_cast<DefInit>(resultsDag->getArg(i));
-    if (!resultDef) {
-      PrintFatalError(def.getLoc(),
-                      Twine("undefined type for result #") + Twine(i));
-    }
-    results.push_back({name, TypeConstraint(resultDef)});
-  }
-
-  auto traitListInit = def.getValueAsListInit("traits");
-  if (!traitListInit)
-    return;
-  traits.reserve(traitListInit->size());
-  for (auto traitInit : *traitListInit)
-    traits.push_back(OpTrait::create(traitInit));
-
-  // Handle regions
-  auto *regionsDag = def.getValueAsDag("regions");
-  auto *regionsOp = dyn_cast<DefInit>(regionsDag->getOperator());
-  if (!regionsOp || regionsOp->getDef()->getName() != "region") {
-    PrintFatalError(def.getLoc(), "'regions' must have 'region' directive");
-  }
-
-  for (unsigned i = 0, e = regionsDag->getNumArgs(); i < e; ++i) {
-    auto name = regionsDag->getArgNameStr(i);
-    auto *regionInit = dyn_cast<DefInit>(regionsDag->getArg(i));
-    if (!regionInit) {
-      PrintFatalError(def.getLoc(),
-                      Twine("undefined kind for region #") + Twine(i));
-    }
-    regions.push_back({name, Region(regionInit->getDef())});
-  }
-
-  LLVM_DEBUG(print(llvm::dbgs()));
-}
-
-ArrayRef<llvm::SMLoc> tblgen::Operator::getLoc() const { return def.getLoc(); }
-
-bool tblgen::Operator::hasDescription() const {
-  return def.getValue("description") != nullptr;
-}
-
-StringRef tblgen::Operator::getDescription() const {
-  return def.getValueAsString("description");
-}
-
-bool tblgen::Operator::hasSummary() const {
-  return def.getValue("summary") != nullptr;
-}
-
-StringRef tblgen::Operator::getSummary() const {
-  return def.getValueAsString("summary");
-}
-
-void tblgen::Operator::print(llvm::raw_ostream &os) const {
-  os << "op '" << getOperationName() << "'\n";
-  for (Argument arg : arguments) {
-    if (auto *attr = arg.dyn_cast<NamedAttribute *>())
-      os << "[attribute] " << attr->name << '\n';
-    else
-      os << "[operand] " << arg.get<NamedTypeConstraint *>()->name << '\n';
-  }
-}
diff --git a/third_party/mlir/lib/TableGen/Pattern.cpp b/third_party/mlir/lib/TableGen/Pattern.cpp
deleted file mode 100644
index 098dba3ae6e..00000000000
--- a/third_party/mlir/lib/TableGen/Pattern.cpp
+++ /dev/null
@@ -1,578 +0,0 @@
-//===- Pattern.cpp - Pattern wrapper class --------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Pattern wrapper class to simplify using TableGen Record defining a MLIR
-// Pattern.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Pattern.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-
-#define DEBUG_TYPE "mlir-tblgen-pattern"
-
-using namespace mlir;
-
-using llvm::formatv;
-using mlir::tblgen::Operator;
-
-//===----------------------------------------------------------------------===//
-// DagLeaf
-//===----------------------------------------------------------------------===//
-
-bool tblgen::DagLeaf::isUnspecified() const {
-  return dyn_cast_or_null<llvm::UnsetInit>(def);
-}
-
-bool tblgen::DagLeaf::isOperandMatcher() const {
-  // Operand matchers specify a type constraint.
-  return isSubClassOf("TypeConstraint");
-}
-
-bool tblgen::DagLeaf::isAttrMatcher() const {
-  // Attribute matchers specify an attribute constraint.
-  return isSubClassOf("AttrConstraint");
-}
-
-bool tblgen::DagLeaf::isNativeCodeCall() const {
-  return isSubClassOf("NativeCodeCall");
-}
-
-bool tblgen::DagLeaf::isConstantAttr() const {
-  return isSubClassOf("ConstantAttr");
-}
-
-bool tblgen::DagLeaf::isEnumAttrCase() const {
-  return isSubClassOf("EnumAttrCaseInfo");
-}
-
-tblgen::Constraint tblgen::DagLeaf::getAsConstraint() const {
-  assert((isOperandMatcher() || isAttrMatcher()) &&
-         "the DAG leaf must be operand or attribute");
-  return Constraint(cast<llvm::DefInit>(def)->getDef());
-}
-
-tblgen::ConstantAttr tblgen::DagLeaf::getAsConstantAttr() const {
-  assert(isConstantAttr() && "the DAG leaf must be constant attribute");
-  return ConstantAttr(cast<llvm::DefInit>(def));
-}
-
-tblgen::EnumAttrCase tblgen::DagLeaf::getAsEnumAttrCase() const {
-  assert(isEnumAttrCase() && "the DAG leaf must be an enum attribute case");
-  return EnumAttrCase(cast<llvm::DefInit>(def));
-}
-
-std::string tblgen::DagLeaf::getConditionTemplate() const {
-  return getAsConstraint().getConditionTemplate();
-}
-
-llvm::StringRef tblgen::DagLeaf::getNativeCodeTemplate() const {
-  assert(isNativeCodeCall() && "the DAG leaf must be NativeCodeCall");
-  return cast<llvm::DefInit>(def)->getDef()->getValueAsString("expression");
-}
-
-bool tblgen::DagLeaf::isSubClassOf(StringRef superclass) const {
-  if (auto *defInit = dyn_cast_or_null<llvm::DefInit>(def))
-    return defInit->getDef()->isSubClassOf(superclass);
-  return false;
-}
-
-void tblgen::DagLeaf::print(raw_ostream &os) const {
-  if (def)
-    def->print(os);
-}
-
-//===----------------------------------------------------------------------===//
-// DagNode
-//===----------------------------------------------------------------------===//
-
-bool tblgen::DagNode::isNativeCodeCall() const {
-  if (auto *defInit = dyn_cast_or_null<llvm::DefInit>(node->getOperator()))
-    return defInit->getDef()->isSubClassOf("NativeCodeCall");
-  return false;
-}
-
-bool tblgen::DagNode::isOperation() const {
-  return !(isNativeCodeCall() || isReplaceWithValue());
-}
-
-llvm::StringRef tblgen::DagNode::getNativeCodeTemplate() const {
-  assert(isNativeCodeCall() && "the DAG leaf must be NativeCodeCall");
-  return cast<llvm::DefInit>(node->getOperator())
-      ->getDef()
-      ->getValueAsString("expression");
-}
-
-llvm::StringRef tblgen::DagNode::getSymbol() const {
-  return node->getNameStr();
-}
-
-Operator &tblgen::DagNode::getDialectOp(RecordOperatorMap *mapper) const {
-  llvm::Record *opDef = cast<llvm::DefInit>(node->getOperator())->getDef();
-  auto it = mapper->find(opDef);
-  if (it != mapper->end())
-    return *it->second;
-  return *mapper->try_emplace(opDef, std::make_unique<Operator>(opDef))
-              .first->second;
-}
-
-int tblgen::DagNode::getNumOps() const {
-  int count = isReplaceWithValue() ? 0 : 1;
-  for (int i = 0, e = getNumArgs(); i != e; ++i) {
-    if (auto child = getArgAsNestedDag(i))
-      count += child.getNumOps();
-  }
-  return count;
-}
-
-int tblgen::DagNode::getNumArgs() const { return node->getNumArgs(); }
-
-bool tblgen::DagNode::isNestedDagArg(unsigned index) const {
-  return isa<llvm::DagInit>(node->getArg(index));
-}
-
-tblgen::DagNode tblgen::DagNode::getArgAsNestedDag(unsigned index) const {
-  return DagNode(dyn_cast_or_null<llvm::DagInit>(node->getArg(index)));
-}
-
-tblgen::DagLeaf tblgen::DagNode::getArgAsLeaf(unsigned index) const {
-  assert(!isNestedDagArg(index));
-  return DagLeaf(node->getArg(index));
-}
-
-StringRef tblgen::DagNode::getArgName(unsigned index) const {
-  return node->getArgNameStr(index);
-}
-
-bool tblgen::DagNode::isReplaceWithValue() const {
-  auto *dagOpDef = cast<llvm::DefInit>(node->getOperator())->getDef();
-  return dagOpDef->getName() == "replaceWithValue";
-}
-
-void tblgen::DagNode::print(raw_ostream &os) const {
-  if (node)
-    node->print(os);
-}
-
-//===----------------------------------------------------------------------===//
-// SymbolInfoMap
-//===----------------------------------------------------------------------===//
-
-StringRef tblgen::SymbolInfoMap::getValuePackName(StringRef symbol,
-                                                  int *index) {
-  StringRef name, indexStr;
-  int idx = -1;
-  std::tie(name, indexStr) = symbol.rsplit("__");
-
-  if (indexStr.consumeInteger(10, idx)) {
-    // The second part is not an index; we return the whole symbol as-is.
-    return symbol;
-  }
-  if (index) {
-    *index = idx;
-  }
-  return name;
-}
-
-tblgen::SymbolInfoMap::SymbolInfo::SymbolInfo(const Operator *op,
-                                              SymbolInfo::Kind kind,
-                                              Optional<int> index)
-    : op(op), kind(kind), argIndex(index) {}
-
-int tblgen::SymbolInfoMap::SymbolInfo::getStaticValueCount() const {
-  switch (kind) {
-  case Kind::Attr:
-  case Kind::Operand:
-  case Kind::Value:
-    return 1;
-  case Kind::Result:
-    return op->getNumResults();
-  }
-  llvm_unreachable("unknown kind");
-}
-
-std::string
-tblgen::SymbolInfoMap::SymbolInfo::getVarDecl(StringRef name) const {
-  LLVM_DEBUG(llvm::dbgs() << "getVarDecl for '" << name << "': ");
-  switch (kind) {
-  case Kind::Attr: {
-    auto type =
-        op->getArg(*argIndex).get<NamedAttribute *>()->attr.getStorageType();
-    return formatv("{0} {1};\n", type, name);
-  }
-  case Kind::Operand: {
-    // Use operand range for captured operands (to support potential variadic
-    // operands).
-    return formatv("Operation::operand_range {0}(op0->getOperands());\n", name);
-  }
-  case Kind::Value: {
-    return formatv("ArrayRef<Value *> {0};\n", name);
-  }
-  case Kind::Result: {
-    // Use the op itself for captured results.
-    return formatv("{0} {1};\n", op->getQualCppClassName(), name);
-  }
-  }
-  llvm_unreachable("unknown kind");
-}
-
-std::string tblgen::SymbolInfoMap::SymbolInfo::getValueAndRangeUse(
-    StringRef name, int index, const char *fmt, const char *separator) const {
-  LLVM_DEBUG(llvm::dbgs() << "getValueAndRangeUse for '" << name << "': ");
-  switch (kind) {
-  case Kind::Attr: {
-    assert(index < 0);
-    auto repl = formatv(fmt, name);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Attr)\n");
-    return repl;
-  }
-  case Kind::Operand: {
-    assert(index < 0);
-    auto *operand = op->getArg(*argIndex).get<NamedTypeConstraint *>();
-    // If this operand is variadic, then return a range. Otherwise, return the
-    // value itself.
-    if (operand->isVariadic()) {
-      auto repl = formatv(fmt, name);
-      LLVM_DEBUG(llvm::dbgs() << repl << " (VariadicOperand)\n");
-      return repl;
-    }
-    auto repl = formatv(fmt, formatv("(*{0}.begin())", name));
-    LLVM_DEBUG(llvm::dbgs() << repl << " (SingleOperand)\n");
-    return repl;
-  }
-  case Kind::Result: {
-    // If `index` is greater than zero, then we are referencing a specific
-    // result of a multi-result op. The result can still be variadic.
-    if (index >= 0) {
-      std::string v = formatv("{0}.getODSResults({1})", name, index);
-      if (!op->getResult(index).isVariadic())
-        v = formatv("(*{0}.begin())", v);
-      auto repl = formatv(fmt, v);
-      LLVM_DEBUG(llvm::dbgs() << repl << " (SingleResult)\n");
-      return repl;
-    }
-
-    // If this op has no result at all but still we bind a symbol to it, it
-    // means we want to capture the op itself.
-    if (op->getNumResults() == 0) {
-      LLVM_DEBUG(llvm::dbgs() << name << " (Op)\n");
-      return name;
-    }
-
-    // We are referencing all results of the multi-result op. A specific result
-    // can either be a value or a range. Then join them with `separator`.
-    SmallVector<std::string, 4> values;
-    values.reserve(op->getNumResults());
-
-    for (int i = 0, e = op->getNumResults(); i < e; ++i) {
-      std::string v = formatv("{0}.getODSResults({1})", name, i);
-      if (!op->getResult(i).isVariadic()) {
-        v = formatv("(*{0}.begin())", v);
-      }
-      values.push_back(formatv(fmt, v));
-    }
-    auto repl = llvm::join(values, separator);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (VariadicResult)\n");
-    return repl;
-  }
-  case Kind::Value: {
-    assert(index < 0);
-    assert(op == nullptr);
-    auto repl = formatv(fmt, name);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Value)\n");
-    return repl;
-  }
-  }
-  llvm_unreachable("unknown kind");
-}
-
-std::string tblgen::SymbolInfoMap::SymbolInfo::getAllRangeUse(
-    StringRef name, int index, const char *fmt, const char *separator) const {
-  LLVM_DEBUG(llvm::dbgs() << "getAllRangeUse for '" << name << "': ");
-  switch (kind) {
-  case Kind::Attr:
-  case Kind::Operand: {
-    assert(index < 0 && "only allowed for symbol bound to result");
-    auto repl = formatv(fmt, name);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Operand/Attr)\n");
-    return repl;
-  }
-  case Kind::Result: {
-    if (index >= 0) {
-      auto repl = formatv(fmt, formatv("{0}.getODSResults({1})", name, index));
-      LLVM_DEBUG(llvm::dbgs() << repl << " (SingleResult)\n");
-      return repl;
-    }
-
-    // We are referencing all results of the multi-result op. Each result should
-    // have a value range, and then join them with `separator`.
-    SmallVector<std::string, 4> values;
-    values.reserve(op->getNumResults());
-
-    for (int i = 0, e = op->getNumResults(); i < e; ++i) {
-      values.push_back(
-          formatv(fmt, formatv("{0}.getODSResults({1})", name, i)));
-    }
-    auto repl = llvm::join(values, separator);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (VariadicResult)\n");
-    return repl;
-  }
-  case Kind::Value: {
-    assert(index < 0 && "only allowed for symbol bound to result");
-    assert(op == nullptr);
-    auto repl = formatv(fmt, formatv("{{{0}}", name));
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Value)\n");
-    return repl;
-  }
-  }
-  llvm_unreachable("unknown kind");
-}
-
-bool tblgen::SymbolInfoMap::bindOpArgument(StringRef symbol, const Operator &op,
-                                           int argIndex) {
-  StringRef name = getValuePackName(symbol);
-  if (name != symbol) {
-    auto error = formatv(
-        "symbol '{0}' with trailing index cannot bind to op argument", symbol);
-    PrintFatalError(loc, error);
-  }
-
-  auto symInfo = op.getArg(argIndex).is<NamedAttribute *>()
-                     ? SymbolInfo::getAttr(&op, argIndex)
-                     : SymbolInfo::getOperand(&op, argIndex);
-
-  return symbolInfoMap.insert({symbol, symInfo}).second;
-}
-
-bool tblgen::SymbolInfoMap::bindOpResult(StringRef symbol, const Operator &op) {
-  StringRef name = getValuePackName(symbol);
-  return symbolInfoMap.insert({name, SymbolInfo::getResult(&op)}).second;
-}
-
-bool tblgen::SymbolInfoMap::bindValue(StringRef symbol) {
-  return symbolInfoMap.insert({symbol, SymbolInfo::getValue()}).second;
-}
-
-bool tblgen::SymbolInfoMap::contains(StringRef symbol) const {
-  return find(symbol) != symbolInfoMap.end();
-}
-
-tblgen::SymbolInfoMap::const_iterator
-tblgen::SymbolInfoMap::find(StringRef key) const {
-  StringRef name = getValuePackName(key);
-  return symbolInfoMap.find(name);
-}
-
-int tblgen::SymbolInfoMap::getStaticValueCount(StringRef symbol) const {
-  StringRef name = getValuePackName(symbol);
-  if (name != symbol) {
-    // If there is a trailing index inside symbol, it references just one
-    // static value.
-    return 1;
-  }
-  // Otherwise, find how many it represents by querying the symbol's info.
-  return find(name)->getValue().getStaticValueCount();
-}
-
-std::string
-tblgen::SymbolInfoMap::getValueAndRangeUse(StringRef symbol, const char *fmt,
-                                           const char *separator) const {
-  int index = -1;
-  StringRef name = getValuePackName(symbol, &index);
-
-  auto it = symbolInfoMap.find(name);
-  if (it == symbolInfoMap.end()) {
-    auto error = formatv("referencing unbound symbol '{0}'", symbol);
-    PrintFatalError(loc, error);
-  }
-
-  return it->getValue().getValueAndRangeUse(name, index, fmt, separator);
-}
-
-std::string tblgen::SymbolInfoMap::getAllRangeUse(StringRef symbol,
-                                                  const char *fmt,
-                                                  const char *separator) const {
-  int index = -1;
-  StringRef name = getValuePackName(symbol, &index);
-
-  auto it = symbolInfoMap.find(name);
-  if (it == symbolInfoMap.end()) {
-    auto error = formatv("referencing unbound symbol '{0}'", symbol);
-    PrintFatalError(loc, error);
-  }
-
-  return it->getValue().getAllRangeUse(name, index, fmt, separator);
-}
-
-//===----------------------------------------------------------------------===//
-// Pattern
-//==----------------------------------------------------------------------===//
-
-tblgen::Pattern::Pattern(const llvm::Record *def, RecordOperatorMap *mapper)
-    : def(*def), recordOpMap(mapper) {}
-
-tblgen::DagNode tblgen::Pattern::getSourcePattern() const {
-  return tblgen::DagNode(def.getValueAsDag("sourcePattern"));
-}
-
-int tblgen::Pattern::getNumResultPatterns() const {
-  auto *results = def.getValueAsListInit("resultPatterns");
-  return results->size();
-}
-
-tblgen::DagNode tblgen::Pattern::getResultPattern(unsigned index) const {
-  auto *results = def.getValueAsListInit("resultPatterns");
-  return tblgen::DagNode(cast<llvm::DagInit>(results->getElement(index)));
-}
-
-void tblgen::Pattern::collectSourcePatternBoundSymbols(
-    tblgen::SymbolInfoMap &infoMap) {
-  LLVM_DEBUG(llvm::dbgs() << "start collecting source pattern bound symbols\n");
-  collectBoundSymbols(getSourcePattern(), infoMap, /*isSrcPattern=*/true);
-  LLVM_DEBUG(llvm::dbgs() << "done collecting source pattern bound symbols\n");
-}
-
-void tblgen::Pattern::collectResultPatternBoundSymbols(
-    tblgen::SymbolInfoMap &infoMap) {
-  LLVM_DEBUG(llvm::dbgs() << "start collecting result pattern bound symbols\n");
-  for (int i = 0, e = getNumResultPatterns(); i < e; ++i) {
-    auto pattern = getResultPattern(i);
-    collectBoundSymbols(pattern, infoMap, /*isSrcPattern=*/false);
-  }
-  LLVM_DEBUG(llvm::dbgs() << "done collecting result pattern bound symbols\n");
-}
-
-const tblgen::Operator &tblgen::Pattern::getSourceRootOp() {
-  return getSourcePattern().getDialectOp(recordOpMap);
-}
-
-tblgen::Operator &tblgen::Pattern::getDialectOp(DagNode node) {
-  return node.getDialectOp(recordOpMap);
-}
-
-std::vector<tblgen::AppliedConstraint> tblgen::Pattern::getConstraints() const {
-  auto *listInit = def.getValueAsListInit("constraints");
-  std::vector<tblgen::AppliedConstraint> ret;
-  ret.reserve(listInit->size());
-
-  for (auto it : *listInit) {
-    auto *dagInit = dyn_cast<llvm::DagInit>(it);
-    if (!dagInit)
-      PrintFatalError(def.getLoc(), "all elements in Pattern multi-entity "
-                                    "constraints should be DAG nodes");
-
-    std::vector<std::string> entities;
-    entities.reserve(dagInit->arg_size());
-    for (auto *argName : dagInit->getArgNames()) {
-      if (!argName) {
-        PrintFatalError(
-            def.getLoc(),
-            "operands to additional constraints can only be symbol references");
-      }
-      entities.push_back(argName->getValue());
-    }
-
-    ret.emplace_back(cast<llvm::DefInit>(dagInit->getOperator())->getDef(),
-                     dagInit->getNameStr(), std::move(entities));
-  }
-  return ret;
-}
-
-int tblgen::Pattern::getBenefit() const {
-  // The initial benefit value is a heuristic with number of ops in the source
-  // pattern.
-  int initBenefit = getSourcePattern().getNumOps();
-  llvm::DagInit *delta = def.getValueAsDag("benefitDelta");
-  if (delta->getNumArgs() != 1 || !isa<llvm::IntInit>(delta->getArg(0))) {
-    PrintFatalError(def.getLoc(),
-                    "The 'addBenefit' takes and only takes one integer value");
-  }
-  return initBenefit + dyn_cast<llvm::IntInit>(delta->getArg(0))->getValue();
-}
-
-std::vector<tblgen::Pattern::IdentifierLine>
-tblgen::Pattern::getLocation() const {
-  std::vector<std::pair<StringRef, unsigned>> result;
-  result.reserve(def.getLoc().size());
-  for (auto loc : def.getLoc()) {
-    unsigned buf = llvm::SrcMgr.FindBufferContainingLoc(loc);
-    assert(buf && "invalid source location");
-    result.emplace_back(
-        llvm::SrcMgr.getBufferInfo(buf).Buffer->getBufferIdentifier(),
-        llvm::SrcMgr.getLineAndColumn(loc, buf).first);
-  }
-  return result;
-}
-
-void tblgen::Pattern::collectBoundSymbols(DagNode tree, SymbolInfoMap &infoMap,
-                                          bool isSrcPattern) {
-  auto treeName = tree.getSymbol();
-  if (!tree.isOperation()) {
-    if (!treeName.empty()) {
-      PrintFatalError(
-          def.getLoc(),
-          formatv("binding symbol '{0}' to non-operation unsupported right now",
-                  treeName));
-    }
-    return;
-  }
-
-  auto &op = getDialectOp(tree);
-  auto numOpArgs = op.getNumArgs();
-  auto numTreeArgs = tree.getNumArgs();
-
-  if (numOpArgs != numTreeArgs) {
-    auto err = formatv("op '{0}' argument number mismatch: "
-                       "{1} in pattern vs. {2} in definition",
-                       op.getOperationName(), numTreeArgs, numOpArgs);
-    PrintFatalError(def.getLoc(), err);
-  }
-
-  // The name attached to the DAG node's operator is for representing the
-  // results generated from this op. It should be remembered as bound results.
-  if (!treeName.empty()) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "found symbol bound to op result: " << treeName << '\n');
-    if (!infoMap.bindOpResult(treeName, op))
-      PrintFatalError(def.getLoc(),
-                      formatv("symbol '{0}' bound more than once", treeName));
-  }
-
-  for (int i = 0; i != numTreeArgs; ++i) {
-    if (auto treeArg = tree.getArgAsNestedDag(i)) {
-      // This DAG node argument is a DAG node itself. Go inside recursively.
-      collectBoundSymbols(treeArg, infoMap, isSrcPattern);
-    } else if (isSrcPattern) {
-      // We can only bind symbols to op arguments in source pattern. Those
-      // symbols are referenced in result patterns.
-      auto treeArgName = tree.getArgName(i);
-      // `$_` is a special symbol meaning ignore the current argument.
-      if (!treeArgName.empty() && treeArgName != "_") {
-        LLVM_DEBUG(llvm::dbgs() << "found symbol bound to op argument: "
-                                << treeArgName << '\n');
-        if (!infoMap.bindOpArgument(treeArgName, op, i)) {
-          auto err = formatv("symbol '{0}' bound more than once", treeArgName);
-          PrintFatalError(def.getLoc(), err);
-        }
-      }
-    }
-  }
-}
diff --git a/third_party/mlir/lib/TableGen/Predicate.cpp b/third_party/mlir/lib/TableGen/Predicate.cpp
deleted file mode 100644
index f8f23e04c3f..00000000000
--- a/third_party/mlir/lib/TableGen/Predicate.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-//===- Predicate.cpp - Predicate class ------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Wrapper around predicates defined in TableGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Predicate.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-
-using namespace mlir;
-
-// Construct a Predicate from a record.
-tblgen::Pred::Pred(const llvm::Record *record) : def(record) {
-  assert(def->isSubClassOf("Pred") &&
-         "must be a subclass of TableGen 'Pred' class");
-}
-
-// Construct a Predicate from an initializer.
-tblgen::Pred::Pred(const llvm::Init *init) : def(nullptr) {
-  if (const auto *defInit = dyn_cast_or_null<llvm::DefInit>(init))
-    def = defInit->getDef();
-}
-
-std::string tblgen::Pred::getCondition() const {
-  // Static dispatch to subclasses.
-  if (def->isSubClassOf("CombinedPred"))
-    return static_cast<const CombinedPred *>(this)->getConditionImpl();
-  if (def->isSubClassOf("CPred"))
-    return static_cast<const CPred *>(this)->getConditionImpl();
-  llvm_unreachable("Pred::getCondition must be overridden in subclasses");
-}
-
-bool tblgen::Pred::isCombined() const {
-  return def && def->isSubClassOf("CombinedPred");
-}
-
-ArrayRef<llvm::SMLoc> tblgen::Pred::getLoc() const { return def->getLoc(); }
-
-tblgen::CPred::CPred(const llvm::Record *record) : Pred(record) {
-  assert(def->isSubClassOf("CPred") &&
-         "must be a subclass of Tablegen 'CPred' class");
-}
-
-tblgen::CPred::CPred(const llvm::Init *init) : Pred(init) {
-  assert((!def || def->isSubClassOf("CPred")) &&
-         "must be a subclass of Tablegen 'CPred' class");
-}
-
-// Get condition of the C Predicate.
-std::string tblgen::CPred::getConditionImpl() const {
-  assert(!isNull() && "null predicate does not have a condition");
-  return def->getValueAsString("predExpr");
-}
-
-tblgen::CombinedPred::CombinedPred(const llvm::Record *record) : Pred(record) {
-  assert(def->isSubClassOf("CombinedPred") &&
-         "must be a subclass of Tablegen 'CombinedPred' class");
-}
-
-tblgen::CombinedPred::CombinedPred(const llvm::Init *init) : Pred(init) {
-  assert((!def || def->isSubClassOf("CombinedPred")) &&
-         "must be a subclass of Tablegen 'CombinedPred' class");
-}
-
-const llvm::Record *tblgen::CombinedPred::getCombinerDef() const {
-  assert(def->getValue("kind") && "CombinedPred must have a value 'kind'");
-  return def->getValueAsDef("kind");
-}
-
-const std::vector<llvm::Record *> tblgen::CombinedPred::getChildren() const {
-  assert(def->getValue("children") &&
-         "CombinedPred must have a value 'children'");
-  return def->getValueAsListOfDefs("children");
-}
-
-namespace {
-// Kinds of nodes in a logical predicate tree.
-enum class PredCombinerKind {
-  Leaf,
-  And,
-  Or,
-  Not,
-  SubstLeaves,
-  Concat,
-  // Special kinds that are used in simplification.
-  False,
-  True
-};
-
-// A node in a logical predicate tree.
-struct PredNode {
-  PredCombinerKind kind;
-  const tblgen::Pred *predicate;
-  SmallVector<PredNode *, 4> children;
-  std::string expr;
-
-  // Prefix and suffix are used by ConcatPred.
-  std::string prefix;
-  std::string suffix;
-};
-} // end anonymous namespace
-
-// Get a predicate tree node kind based on the kind used in the predicate
-// TableGen record.
-static PredCombinerKind getPredCombinerKind(const tblgen::Pred &pred) {
-  if (!pred.isCombined())
-    return PredCombinerKind::Leaf;
-
-  const auto &combinedPred = static_cast<const tblgen::CombinedPred &>(pred);
-  return llvm::StringSwitch<PredCombinerKind>(
-             combinedPred.getCombinerDef()->getName())
-      .Case("PredCombinerAnd", PredCombinerKind::And)
-      .Case("PredCombinerOr", PredCombinerKind::Or)
-      .Case("PredCombinerNot", PredCombinerKind::Not)
-      .Case("PredCombinerSubstLeaves", PredCombinerKind::SubstLeaves)
-      .Case("PredCombinerConcat", PredCombinerKind::Concat);
-}
-
-namespace {
-// Substitution<pattern, replacement>.
-using Subst = std::pair<StringRef, StringRef>;
-} // end anonymous namespace
-
-// Build the predicate tree starting from the top-level predicate, which may
-// have children, and perform leaf substitutions inplace.  Note that after
-// substitution, nodes are still pointing to the original TableGen record.
-// All nodes are created within "allocator".
-static PredNode *buildPredicateTree(const tblgen::Pred &root,
-                                    llvm::BumpPtrAllocator &allocator,
-                                    ArrayRef<Subst> substitutions) {
-  auto *rootNode = allocator.Allocate<PredNode>();
-  new (rootNode) PredNode;
-  rootNode->kind = getPredCombinerKind(root);
-  rootNode->predicate = &root;
-  if (!root.isCombined()) {
-    rootNode->expr = root.getCondition();
-    // Apply all parent substitutions from innermost to outermost.
-    for (const auto &subst : llvm::reverse(substitutions)) {
-      auto pos = rootNode->expr.find(subst.first);
-      while (pos != std::string::npos) {
-        rootNode->expr.replace(pos, subst.first.size(), subst.second);
-        // Skip the newly inserted substring, which itself may consider the
-        // pattern to match.
-        pos += subst.second.size();
-        // Find the next possible match position.
-        pos = rootNode->expr.find(subst.first, pos);
-      }
-    }
-    return rootNode;
-  }
-
-  // If the current combined predicate is a leaf substitution, append it to the
-  // list before continuing.
-  auto allSubstitutions = llvm::to_vector<4>(substitutions);
-  if (rootNode->kind == PredCombinerKind::SubstLeaves) {
-    const auto &substPred = static_cast<const tblgen::SubstLeavesPred &>(root);
-    allSubstitutions.push_back(
-        {substPred.getPattern(), substPred.getReplacement()});
-  }
-  // If the current predicate is a ConcatPred, record the prefix and suffix.
-  else if (rootNode->kind == PredCombinerKind::Concat) {
-    const auto &concatPred = static_cast<const tblgen::ConcatPred &>(root);
-    rootNode->prefix = concatPred.getPrefix();
-    rootNode->suffix = concatPred.getSuffix();
-  }
-
-  // Build child subtrees.
-  auto combined = static_cast<const tblgen::CombinedPred &>(root);
-  for (const auto *record : combined.getChildren()) {
-    auto childTree =
-        buildPredicateTree(tblgen::Pred(record), allocator, allSubstitutions);
-    rootNode->children.push_back(childTree);
-  }
-  return rootNode;
-}
-
-// Simplify a predicate tree rooted at "node" using the predicates that are
-// known to be true(false).  For AND(OR) combined predicates, if any of the
-// children is known to be false(true), the result is also false(true).
-// Furthermore, for AND(OR) combined predicates, children that are known to be
-// true(false) don't have to be checked dynamically.
-static PredNode *propagateGroundTruth(
-    PredNode *node, const llvm::SmallPtrSetImpl<tblgen::Pred *> &knownTruePreds,
-    const llvm::SmallPtrSetImpl<tblgen::Pred *> &knownFalsePreds) {
-  // If the current predicate is known to be true or false, change the kind of
-  // the node and return immediately.
-  if (knownTruePreds.count(node->predicate) != 0) {
-    node->kind = PredCombinerKind::True;
-    node->children.clear();
-    return node;
-  }
-  if (knownFalsePreds.count(node->predicate) != 0) {
-    node->kind = PredCombinerKind::False;
-    node->children.clear();
-    return node;
-  }
-
-  // If the current node is a substitution, stop recursion now.
-  // The expressions in the leaves below this node were rewritten, but the nodes
-  // still point to the original predicate records.  While the original
-  // predicate may be known to be true or false, it is not necessarily the case
-  // after rewriting.
-  // TODO(zinenko,jpienaar): we can support ground truth for rewritten
-  // predicates by either (a) having our own unique'ing of the predicates
-  // instead of relying on TableGen record pointers or (b) taking ground truth
-  // values optionally prefixed with a list of substitutions to apply, e.g.
-  // "predX is true by itself as well as predSubY leaf substitution had been
-  // applied to it".
-  if (node->kind == PredCombinerKind::SubstLeaves) {
-    return node;
-  }
-
-  // Otherwise, look at child nodes.
-
-  // Move child nodes into some local variable so that they can be optimized
-  // separately and re-added if necessary.
-  llvm::SmallVector<PredNode *, 4> children;
-  std::swap(node->children, children);
-
-  for (auto &child : children) {
-    // First, simplify the child.  This maintains the predicate as it was.
-    auto simplifiedChild =
-        propagateGroundTruth(child, knownTruePreds, knownFalsePreds);
-
-    // Just add the child if we don't know how to simplify the current node.
-    if (node->kind != PredCombinerKind::And &&
-        node->kind != PredCombinerKind::Or) {
-      node->children.push_back(simplifiedChild);
-      continue;
-    }
-
-    // Second, based on the type define which known values of child predicates
-    // immediately collapse this predicate to a known value, and which others
-    // may be safely ignored.
-    //   OR(..., True, ...) = True
-    //   OR(..., False, ...) = OR(..., ...)
-    //   AND(..., False, ...) = False
-    //   AND(..., True, ...) = AND(..., ...)
-    auto collapseKind = node->kind == PredCombinerKind::And
-                            ? PredCombinerKind::False
-                            : PredCombinerKind::True;
-    auto eraseKind = node->kind == PredCombinerKind::And
-                         ? PredCombinerKind::True
-                         : PredCombinerKind::False;
-    const auto &collapseList =
-        node->kind == PredCombinerKind::And ? knownFalsePreds : knownTruePreds;
-    const auto &eraseList =
-        node->kind == PredCombinerKind::And ? knownTruePreds : knownFalsePreds;
-    if (simplifiedChild->kind == collapseKind ||
-        collapseList.count(simplifiedChild->predicate) != 0) {
-      node->kind = collapseKind;
-      node->children.clear();
-      return node;
-    } else if (simplifiedChild->kind == eraseKind ||
-               eraseList.count(simplifiedChild->predicate) != 0) {
-      continue;
-    }
-    node->children.push_back(simplifiedChild);
-  }
-  return node;
-}
-
-// Combine a list of predicate expressions using a binary combiner.  If a list
-// is empty, return "init".
-static std::string combineBinary(ArrayRef<std::string> children,
-                                 std::string combiner, std::string init) {
-  if (children.empty())
-    return init;
-
-  auto size = children.size();
-  if (size == 1)
-    return children.front();
-
-  std::string str;
-  llvm::raw_string_ostream os(str);
-  os << '(' << children.front() << ')';
-  for (unsigned i = 1; i < size; ++i) {
-    os << ' ' << combiner << " (" << children[i] << ')';
-  }
-  return os.str();
-}
-
-// Prepend negation to the only condition in the predicate expression list.
-static std::string combineNot(ArrayRef<std::string> children) {
-  assert(children.size() == 1 && "expected exactly one child predicate of Neg");
-  return (Twine("!(") + children.front() + Twine(')')).str();
-}
-
-// Recursively traverse the predicate tree in depth-first post-order and build
-// the final expression.
-static std::string getCombinedCondition(const PredNode &root) {
-  // Immediately return for non-combiner predicates that don't have children.
-  if (root.kind == PredCombinerKind::Leaf)
-    return root.expr;
-  if (root.kind == PredCombinerKind::True)
-    return "true";
-  if (root.kind == PredCombinerKind::False)
-    return "false";
-
-  // Recurse into children.
-  llvm::SmallVector<std::string, 4> childExpressions;
-  childExpressions.reserve(root.children.size());
-  for (const auto &child : root.children)
-    childExpressions.push_back(getCombinedCondition(*child));
-
-  // Combine the expressions based on the predicate node kind.
-  if (root.kind == PredCombinerKind::And)
-    return combineBinary(childExpressions, "&&", "true");
-  if (root.kind == PredCombinerKind::Or)
-    return combineBinary(childExpressions, "||", "false");
-  if (root.kind == PredCombinerKind::Not)
-    return combineNot(childExpressions);
-  if (root.kind == PredCombinerKind::Concat) {
-    assert(childExpressions.size() == 1 &&
-           "ConcatPred should only have one child");
-    return root.prefix + childExpressions.front() + root.suffix;
-  }
-
-  // Substitutions were applied before so just ignore them.
-  if (root.kind == PredCombinerKind::SubstLeaves) {
-    assert(childExpressions.size() == 1 &&
-           "substitution predicate must have one child");
-    return childExpressions[0];
-  }
-
-  llvm::PrintFatalError(root.predicate->getLoc(), "unsupported predicate kind");
-}
-
-std::string tblgen::CombinedPred::getConditionImpl() const {
-  llvm::BumpPtrAllocator allocator;
-  auto predicateTree = buildPredicateTree(*this, allocator, {});
-  predicateTree = propagateGroundTruth(
-      predicateTree,
-      /*knownTruePreds=*/llvm::SmallPtrSet<tblgen::Pred *, 2>(),
-      /*knownFalsePreds=*/llvm::SmallPtrSet<tblgen::Pred *, 2>());
-
-  return getCombinedCondition(*predicateTree);
-}
-
-StringRef tblgen::SubstLeavesPred::getPattern() const {
-  return def->getValueAsString("pattern");
-}
-
-StringRef tblgen::SubstLeavesPred::getReplacement() const {
-  return def->getValueAsString("replacement");
-}
-
-StringRef tblgen::ConcatPred::getPrefix() const {
-  return def->getValueAsString("prefix");
-}
-
-StringRef tblgen::ConcatPred::getSuffix() const {
-  return def->getValueAsString("suffix");
-}
diff --git a/third_party/mlir/lib/TableGen/Type.cpp b/third_party/mlir/lib/TableGen/Type.cpp
deleted file mode 100644
index a558be4c89d..00000000000
--- a/third_party/mlir/lib/TableGen/Type.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- Type.cpp - Type class ----------------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Type wrapper to simplify using TableGen Record defining a MLIR Type.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Type.h"
-#include "llvm/TableGen/Record.h"
-
-using namespace mlir;
-using namespace mlir::tblgen;
-
-TypeConstraint::TypeConstraint(const llvm::Record *record)
-    : Constraint(Constraint::CK_Type, record) {
-  assert(def->isSubClassOf("TypeConstraint") &&
-         "must be subclass of TableGen 'TypeConstraint' class");
-}
-
-TypeConstraint::TypeConstraint(const llvm::DefInit *init)
-    : TypeConstraint(init->getDef()) {}
-
-bool TypeConstraint::isVariadic() const {
-  return def->isSubClassOf("Variadic");
-}
-
-Type::Type(const llvm::Record *record) : TypeConstraint(record) {}
-
-StringRef Type::getTypeDescription() const {
-  return def->getValueAsString("typeDescription");
-}
-
-Dialect Type::getDialect() const {
-  return Dialect(def->getValueAsDef("dialect"));
-}
diff --git a/third_party/mlir/lib/Target/CMakeLists.txt b/third_party/mlir/lib/Target/CMakeLists.txt
deleted file mode 100644
index 25000d97f5f..00000000000
--- a/third_party/mlir/lib/Target/CMakeLists.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-add_llvm_library(MLIRTargetLLVMIRModuleTranslation
-  LLVMIR/ModuleTranslation.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR
-  DEPENDS
-  intrinsics_gen
-  )
-target_link_libraries(MLIRTargetLLVMIRModuleTranslation
-  MLIRLLVMIR LLVMCore LLVMIRReader LLVMSupport LLVMTransformUtils
-  MLIRTranslation)
-add_llvm_library(MLIRTargetLLVMIR
-  LLVMIR/ConvertFromLLVMIR.cpp
-  LLVMIR/ConvertToLLVMIR.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR
-  )
-target_link_libraries(MLIRTargetLLVMIR MLIRTargetLLVMIRModuleTranslation)
-add_llvm_library(MLIRTargetNVVMIR
-  LLVMIR/ConvertToNVVMIR.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR
-  DEPENDS
-  intrinsics_gen
-  )
-target_link_libraries(MLIRTargetNVVMIR
-  MLIRGPU
-  MLIRIR
-  MLIRLLVMIR
-  MLIRNVVMIR
-  MLIRTargetLLVMIRModuleTranslation
-  )
-add_llvm_library(MLIRTargetROCDLIR
-  LLVMIR/ConvertToROCDLIR.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR
-  DEPENDS
-  intrinsics_gen
-  )
-target_link_libraries(MLIRTargetROCDLIR
-  MLIRGPU
-  MLIRIR
-  MLIRLLVMIR
-  MLIRROCDLIR
-  MLIRTargetLLVMIRModuleTranslation
-  )
diff --git a/third_party/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/third_party/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
deleted file mode 100644
index 6cf975bcce2..00000000000
--- a/third_party/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ /dev/null
@@ -1,631 +0,0 @@
-//===- ConvertFromLLVMIR.cpp - MLIR to LLVM IR conversion -----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a translation between LLVM IR and the MLIR LLVM dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Target/LLVMIR.h"
-#include "mlir/Translation.h"
-
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-using namespace mlir::LLVM;
-
-// Utility to print an LLVM value as a string for passing to emitError().
-// FIXME: Diagnostic should be able to natively handle types that have
-// operator << (raw_ostream&) defined.
-static std::string diag(llvm::Value &v) {
-  std::string s;
-  llvm::raw_string_ostream os(s);
-  os << v;
-  return os.str();
-}
-
-// Handles importing globals and functions from an LLVM module.
-namespace {
-class Importer {
-public:
-  Importer(MLIRContext *context, ModuleOp module)
-      : b(context), context(context), module(module),
-        unknownLoc(FileLineColLoc::get("imported-bitcode", 0, 0, context)) {
-    b.setInsertionPointToStart(module.getBody());
-    dialect = context->getRegisteredDialect<LLVMDialect>();
-  }
-
-  /// Imports `f` into the current module.
-  LogicalResult processFunction(llvm::Function *f);
-
-  /// Imports GV as a GlobalOp, creating it if it doesn't exist.
-  GlobalOp processGlobal(llvm::GlobalVariable *GV);
-
-private:
-  /// Imports `bb` into `block`, which must be initially empty.
-  LogicalResult processBasicBlock(llvm::BasicBlock *bb, Block *block);
-  /// Imports `inst` and populates instMap[inst] with the imported Value.
-  LogicalResult processInstruction(llvm::Instruction *inst);
-  /// Creates an LLVMType for `type`.
-  LLVMType processType(llvm::Type *type);
-  /// `value` is an SSA-use. Return the remapped version of `value` or a
-  /// placeholder that will be remapped later if this is an instruction that
-  /// has not yet been visited.
-  Value *processValue(llvm::Value *value);
-  /// Create the most accurate Location possible using a llvm::DebugLoc and
-  /// possibly an llvm::Instruction to narrow the Location if debug information
-  /// is unavailable.
-  Location processDebugLoc(const llvm::DebugLoc &loc,
-                           llvm::Instruction *inst = nullptr);
-  /// `br` branches to `target`. Return the block arguments to attach to the
-  /// generated branch op. These should be in the same order as the PHIs in
-  /// `target`.
-  SmallVector<Value *, 4> processBranchArgs(llvm::BranchInst *br,
-                                            llvm::BasicBlock *target);
-  /// Return `value` as an attribute to attach to a GlobalOp.
-  Attribute getConstantAsAttr(llvm::Constant *value);
-  /// Return `c` as an MLIR Value. This could either be a ConstantOp, or
-  /// an expanded sequence of ops in the current function's entry block (for
-  /// ConstantExprs or ConstantGEPs).
-  Value *processConstant(llvm::Constant *c);
-
-  /// The current builder, pointing at where the next Instruction should be
-  /// generated.
-  OpBuilder b;
-  /// The current context.
-  MLIRContext *context;
-  /// The current module being created.
-  ModuleOp module;
-  /// The entry block of the current function being processed.
-  Block *currentEntryBlock;
-
-  /// Globals are inserted before the first function, if any.
-  Block::iterator getGlobalInsertPt() {
-    auto i = module.getBody()->begin();
-    while (!isa<LLVMFuncOp>(i) && !isa<ModuleTerminatorOp>(i))
-      ++i;
-    return i;
-  }
-
-  /// Functions are always inserted before the module terminator.
-  Block::iterator getFuncInsertPt() {
-    return std::prev(module.getBody()->end());
-  }
-
-  /// Remapped blocks, for the current function.
-  DenseMap<llvm::BasicBlock *, Block *> blocks;
-  /// Remapped values. These are function-local.
-  DenseMap<llvm::Value *, Value *> instMap;
-  /// Instructions that had not been defined when first encountered as a use.
-  /// Maps to the dummy Operation that was created in processValue().
-  DenseMap<llvm::Value *, Operation *> unknownInstMap;
-  /// Uniquing map of GlobalVariables.
-  DenseMap<llvm::GlobalVariable *, GlobalOp> globals;
-  /// Cached FileLineColLoc::get("imported-bitcode", 0, 0).
-  Location unknownLoc;
-  /// Cached dialect.
-  LLVMDialect *dialect;
-};
-} // namespace
-
-Location Importer::processDebugLoc(const llvm::DebugLoc &loc,
-                                   llvm::Instruction *inst) {
-  if (!loc && inst) {
-    std::string s;
-    llvm::raw_string_ostream os(s);
-    os << "llvm-imported-inst-%";
-    inst->printAsOperand(os, /*PrintType=*/false);
-    return FileLineColLoc::get(os.str(), 0, 0, context);
-  } else if (!loc) {
-    return unknownLoc;
-  }
-  // FIXME: Obtain the filename from DILocationInfo.
-  return FileLineColLoc::get("imported-bitcode", loc.getLine(), loc.getCol(),
-                             context);
-}
-
-LLVMType Importer::processType(llvm::Type *type) {
-  switch (type->getTypeID()) {
-  case llvm::Type::FloatTyID:
-    return LLVMType::getFloatTy(dialect);
-  case llvm::Type::DoubleTyID:
-    return LLVMType::getDoubleTy(dialect);
-  case llvm::Type::IntegerTyID:
-    return LLVMType::getIntNTy(dialect, type->getIntegerBitWidth());
-  case llvm::Type::PointerTyID:
-    return processType(type->getPointerElementType())
-        .getPointerTo(type->getPointerAddressSpace());
-  case llvm::Type::ArrayTyID:
-    return LLVMType::getArrayTy(processType(type->getArrayElementType()),
-                                type->getArrayNumElements());
-  case llvm::Type::VectorTyID: {
-    if (type->getVectorIsScalable())
-      emitError(unknownLoc) << "scalable vector types not supported";
-    return LLVMType::getVectorTy(processType(type->getVectorElementType()),
-                                 type->getVectorNumElements());
-  }
-  case llvm::Type::VoidTyID:
-    return LLVMType::getVoidTy(dialect);
-  case llvm::Type::FP128TyID:
-    return LLVMType::getFP128Ty(dialect);
-  case llvm::Type::X86_FP80TyID:
-    return LLVMType::getX86_FP80Ty(dialect);
-  case llvm::Type::StructTyID: {
-    SmallVector<LLVMType, 4> elementTypes;
-    for (unsigned i = 0, e = type->getStructNumElements(); i != e; ++i)
-      elementTypes.push_back(processType(type->getStructElementType(i)));
-    return LLVMType::getStructTy(dialect, elementTypes,
-                                 cast<llvm::StructType>(type)->isPacked());
-  }
-  case llvm::Type::FunctionTyID: {
-    llvm::FunctionType *fty = cast<llvm::FunctionType>(type);
-    SmallVector<LLVMType, 4> paramTypes;
-    for (unsigned i = 0, e = fty->getNumParams(); i != e; ++i)
-      paramTypes.push_back(processType(fty->getParamType(i)));
-    return LLVMType::getFunctionTy(processType(fty->getReturnType()),
-                                   paramTypes, fty->isVarArg());
-  }
-  default: {
-    // FIXME: Diagnostic should be able to natively handle types that have
-    // operator<<(raw_ostream&) defined.
-    std::string s;
-    llvm::raw_string_ostream os(s);
-    os << *type;
-    emitError(unknownLoc) << "unhandled type: " << os.str();
-    return {};
-  }
-  }
-}
-
-// Get the given constant as an attribute. Not all constants can be represented
-// as attributes.
-Attribute Importer::getConstantAsAttr(llvm::Constant *value) {
-  if (auto *ci = dyn_cast<llvm::ConstantInt>(value))
-    return b.getIntegerAttr(
-        IntegerType::get(ci->getType()->getBitWidth(), context),
-        ci->getValue());
-  if (auto *c = dyn_cast<llvm::ConstantDataArray>(value))
-    if (c->isString())
-      return b.getStringAttr(c->getAsString());
-  return Attribute();
-}
-
-/// Converts LLVM global variable linkage type into the LLVM dialect predicate.
-static LLVM::Linkage
-processLinkage(llvm::GlobalVariable::LinkageTypes linkage) {
-  switch (linkage) {
-  case llvm::GlobalValue::PrivateLinkage:
-    return LLVM::Linkage::Private;
-  case llvm::GlobalValue::InternalLinkage:
-    return LLVM::Linkage::Internal;
-  case llvm::GlobalValue::AvailableExternallyLinkage:
-    return LLVM::Linkage::AvailableExternally;
-  case llvm::GlobalValue::LinkOnceAnyLinkage:
-    return LLVM::Linkage::Linkonce;
-  case llvm::GlobalValue::WeakAnyLinkage:
-    return LLVM::Linkage::Weak;
-  case llvm::GlobalValue::CommonLinkage:
-    return LLVM::Linkage::Common;
-  case llvm::GlobalValue::AppendingLinkage:
-    return LLVM::Linkage::Appending;
-  case llvm::GlobalValue::ExternalWeakLinkage:
-    return LLVM::Linkage::ExternWeak;
-  case llvm::GlobalValue::LinkOnceODRLinkage:
-    return LLVM::Linkage::LinkonceODR;
-  case llvm::GlobalValue::WeakODRLinkage:
-    return LLVM::Linkage::WeakODR;
-  case llvm::GlobalValue::ExternalLinkage:
-    return LLVM::Linkage::External;
-  }
-
-  llvm_unreachable("unhandled linkage type");
-}
-
-GlobalOp Importer::processGlobal(llvm::GlobalVariable *GV) {
-  auto it = globals.find(GV);
-  if (it != globals.end())
-    return it->second;
-
-  OpBuilder b(module.getBody(), getGlobalInsertPt());
-  Attribute valueAttr;
-  if (GV->hasInitializer())
-    valueAttr = getConstantAsAttr(GV->getInitializer());
-  GlobalOp op = b.create<GlobalOp>(
-      UnknownLoc::get(context), processType(GV->getValueType()),
-      GV->isConstant(), processLinkage(GV->getLinkage()), GV->getName(),
-      valueAttr);
-  if (GV->hasInitializer() && !valueAttr) {
-    Region &r = op.getInitializerRegion();
-    currentEntryBlock = b.createBlock(&r);
-    b.setInsertionPoint(currentEntryBlock, currentEntryBlock->begin());
-    Value *v = processConstant(GV->getInitializer());
-    b.create<ReturnOp>(op.getLoc(), ArrayRef<Value *>({v}));
-  }
-  return globals[GV] = op;
-}
-
-Value *Importer::processConstant(llvm::Constant *c) {
-  if (Attribute attr = getConstantAsAttr(c)) {
-    // These constants can be represented as attributes.
-    OpBuilder b(currentEntryBlock, currentEntryBlock->begin());
-    return instMap[c] = b.create<ConstantOp>(unknownLoc,
-                                             processType(c->getType()), attr);
-  }
-  if (auto *cn = dyn_cast<llvm::ConstantPointerNull>(c)) {
-    OpBuilder b(currentEntryBlock, currentEntryBlock->begin());
-    return instMap[c] =
-               b.create<NullOp>(unknownLoc, processType(cn->getType()));
-  }
-  if (auto *ce = dyn_cast<llvm::ConstantExpr>(c)) {
-    llvm::Instruction *i = ce->getAsInstruction();
-    OpBuilder::InsertionGuard guard(b);
-    b.setInsertionPoint(currentEntryBlock, currentEntryBlock->begin());
-    if (failed(processInstruction(i)))
-      return nullptr;
-    assert(instMap.count(i));
-
-    // Remove this zombie LLVM instruction now, leaving us only with the MLIR
-    // op.
-    i->deleteValue();
-    return instMap[c] = instMap[i];
-  }
-  emitError(unknownLoc) << "unhandled constant: " << diag(*c);
-  return nullptr;
-}
-
-Value *Importer::processValue(llvm::Value *value) {
-  auto it = instMap.find(value);
-  if (it != instMap.end())
-    return it->second;
-
-  // We don't expect to see instructions in dominator order. If we haven't seen
-  // this instruction yet, create an unknown op and remap it later.
-  if (isa<llvm::Instruction>(value)) {
-    OperationState state(UnknownLoc::get(context), "unknown");
-    state.addTypes({processType(value->getType())});
-    unknownInstMap[value] = b.createOperation(state);
-    return unknownInstMap[value]->getResult(0);
-  }
-
-  if (auto *GV = dyn_cast<llvm::GlobalVariable>(value)) {
-    return b.create<AddressOfOp>(UnknownLoc::get(context), processGlobal(GV),
-                                 ArrayRef<NamedAttribute>());
-  }
-
-  // Note, constant global variables are both GlobalVariables and Constants,
-  // so we handle GlobalVariables first above.
-  if (auto *c = dyn_cast<llvm::Constant>(value))
-    return processConstant(c);
-
-  emitError(unknownLoc) << "unhandled value: " << diag(*value);
-  return nullptr;
-}
-
-// Maps from LLVM opcode to MLIR OperationName. This is deliberately ordered
-// as in llvm/IR/Instructions.def to aid comprehension and spot missing
-// instructions.
-#define INST(llvm_n, mlir_n)                                                   \
-  { llvm::Instruction::llvm_n, LLVM::mlir_n##Op::getOperationName() }
-static const DenseMap<unsigned, StringRef> opcMap = {
-    // Ret is handled specially.
-    // Br is handled specially.
-    // FIXME: switch
-    // FIXME: indirectbr
-    // FIXME: invoke
-    // FIXME: resume
-    // FIXME: unreachable
-    // FIXME: cleanupret
-    // FIXME: catchret
-    // FIXME: catchswitch
-    // FIXME: callbr
-    // FIXME: fneg
-    INST(Add, Add), INST(FAdd, FAdd), INST(Sub, Sub), INST(FSub, FSub),
-    INST(Mul, Mul), INST(FMul, FMul), INST(UDiv, UDiv), INST(SDiv, SDiv),
-    INST(FDiv, FDiv), INST(URem, URem), INST(SRem, SRem), INST(FRem, FRem),
-    INST(Shl, Shl), INST(LShr, LShr), INST(AShr, AShr), INST(And, And),
-    INST(Or, Or), INST(Xor, XOr), INST(Alloca, Alloca), INST(Load, Load),
-    INST(Store, Store),
-    // Getelementptr is handled specially.
-    INST(Ret, Return),
-    // FIXME: fence
-    // FIXME: atomiccmpxchg
-    // FIXME: atomicrmw
-    INST(Trunc, Trunc), INST(ZExt, ZExt), INST(SExt, SExt),
-    INST(FPToUI, FPToUI), INST(FPToSI, FPToSI), INST(UIToFP, UIToFP),
-    INST(SIToFP, SIToFP), INST(FPTrunc, FPTrunc), INST(FPExt, FPExt),
-    INST(PtrToInt, PtrToInt), INST(IntToPtr, IntToPtr), INST(BitCast, Bitcast),
-    INST(AddrSpaceCast, AddrSpaceCast),
-    // FIXME: cleanuppad
-    // FIXME: catchpad
-    // ICmp is handled specially.
-    // FIXME: fcmp
-    // PHI is handled specially.
-    INST(Call, Call),
-    // FIXME: select
-    // FIXME: vaarg
-    // FIXME: extractelement
-    // FIXME: insertelement
-    // FIXME: shufflevector
-    // FIXME: extractvalue
-    // FIXME: insertvalue
-    // FIXME: landingpad
-};
-#undef INST
-
-static ICmpPredicate getICmpPredicate(llvm::CmpInst::Predicate p) {
-  switch (p) {
-  default:
-    llvm_unreachable("incorrect comparison predicate");
-  case llvm::CmpInst::Predicate::ICMP_EQ:
-    return LLVM::ICmpPredicate::eq;
-  case llvm::CmpInst::Predicate::ICMP_NE:
-    return LLVM::ICmpPredicate::ne;
-  case llvm::CmpInst::Predicate::ICMP_SLT:
-    return LLVM::ICmpPredicate::slt;
-  case llvm::CmpInst::Predicate::ICMP_SLE:
-    return LLVM::ICmpPredicate::sle;
-  case llvm::CmpInst::Predicate::ICMP_SGT:
-    return LLVM::ICmpPredicate::sgt;
-  case llvm::CmpInst::Predicate::ICMP_SGE:
-    return LLVM::ICmpPredicate::sge;
-  case llvm::CmpInst::Predicate::ICMP_ULT:
-    return LLVM::ICmpPredicate::ult;
-  case llvm::CmpInst::Predicate::ICMP_ULE:
-    return LLVM::ICmpPredicate::ule;
-  case llvm::CmpInst::Predicate::ICMP_UGT:
-    return LLVM::ICmpPredicate::ugt;
-  case llvm::CmpInst::Predicate::ICMP_UGE:
-    return LLVM::ICmpPredicate::uge;
-  }
-  llvm_unreachable("incorrect comparison predicate");
-}
-
-// `br` branches to `target`. Return the branch arguments to `br`, in the
-// same order of the PHIs in `target`.
-SmallVector<Value *, 4> Importer::processBranchArgs(llvm::BranchInst *br,
-                                                    llvm::BasicBlock *target) {
-  SmallVector<Value *, 4> v;
-  for (auto inst = target->begin(); isa<llvm::PHINode>(inst); ++inst) {
-    auto *PN = cast<llvm::PHINode>(&*inst);
-    v.push_back(processValue(PN->getIncomingValueForBlock(br->getParent())));
-  }
-  return v;
-}
-
-LogicalResult Importer::processInstruction(llvm::Instruction *inst) {
-  // FIXME: Support uses of SubtargetData. Currently inbounds GEPs, fast-math
-  // flags and call / operand attributes are not supported.
-  Location loc = processDebugLoc(inst->getDebugLoc(), inst);
-  Value *&v = instMap[inst];
-  assert(!v && "processInstruction must be called only once per instruction!");
-  switch (inst->getOpcode()) {
-  default:
-    return emitError(loc) << "unknown instruction: " << diag(*inst);
-  case llvm::Instruction::Add:
-  case llvm::Instruction::FAdd:
-  case llvm::Instruction::Sub:
-  case llvm::Instruction::FSub:
-  case llvm::Instruction::Mul:
-  case llvm::Instruction::FMul:
-  case llvm::Instruction::UDiv:
-  case llvm::Instruction::SDiv:
-  case llvm::Instruction::FDiv:
-  case llvm::Instruction::URem:
-  case llvm::Instruction::SRem:
-  case llvm::Instruction::FRem:
-  case llvm::Instruction::Shl:
-  case llvm::Instruction::LShr:
-  case llvm::Instruction::AShr:
-  case llvm::Instruction::And:
-  case llvm::Instruction::Or:
-  case llvm::Instruction::Xor:
-  case llvm::Instruction::Alloca:
-  case llvm::Instruction::Load:
-  case llvm::Instruction::Store:
-  case llvm::Instruction::Ret:
-  case llvm::Instruction::Trunc:
-  case llvm::Instruction::ZExt:
-  case llvm::Instruction::SExt:
-  case llvm::Instruction::FPToUI:
-  case llvm::Instruction::FPToSI:
-  case llvm::Instruction::UIToFP:
-  case llvm::Instruction::SIToFP:
-  case llvm::Instruction::FPTrunc:
-  case llvm::Instruction::FPExt:
-  case llvm::Instruction::PtrToInt:
-  case llvm::Instruction::IntToPtr:
-  case llvm::Instruction::AddrSpaceCast:
-  case llvm::Instruction::BitCast: {
-    OperationState state(loc, opcMap.lookup(inst->getOpcode()));
-    SmallVector<Value *, 4> ops;
-    ops.reserve(inst->getNumOperands());
-    for (auto *op : inst->operand_values())
-      ops.push_back(processValue(op));
-    state.addOperands(ops);
-    if (!inst->getType()->isVoidTy())
-      state.addTypes(ArrayRef<Type>({processType(inst->getType())}));
-    Operation *op = b.createOperation(state);
-    if (!inst->getType()->isVoidTy())
-      v = op->getResult(0);
-    return success();
-  }
-  case llvm::Instruction::ICmp: {
-    v = b.create<ICmpOp>(
-        loc, getICmpPredicate(cast<llvm::ICmpInst>(inst)->getPredicate()),
-        processValue(inst->getOperand(0)), processValue(inst->getOperand(1)));
-    return success();
-  }
-  case llvm::Instruction::Br: {
-    auto *brInst = cast<llvm::BranchInst>(inst);
-    OperationState state(loc,
-                         brInst->isConditional() ? "llvm.cond_br" : "llvm.br");
-    SmallVector<Value *, 4> ops;
-    if (brInst->isConditional())
-      ops.push_back(processValue(brInst->getCondition()));
-    state.addOperands(ops);
-    SmallVector<Block *, 4> succs;
-    for (auto *succ : llvm::reverse(brInst->successors()))
-      state.addSuccessor(blocks[succ], processBranchArgs(brInst, succ));
-    b.createOperation(state);
-    return success();
-  }
-  case llvm::Instruction::PHI: {
-    v = b.getInsertionBlock()->addArgument(processType(inst->getType()));
-    return success();
-  }
-  case llvm::Instruction::Call: {
-    llvm::CallInst *ci = cast<llvm::CallInst>(inst);
-    SmallVector<Value *, 4> ops;
-    ops.reserve(inst->getNumOperands());
-    for (auto &op : ci->arg_operands())
-      ops.push_back(processValue(op.get()));
-
-    SmallVector<Type, 2> tys;
-    if (!ci->getType()->isVoidTy())
-      tys.push_back(processType(inst->getType()));
-    Operation *op;
-    if (llvm::Function *callee = ci->getCalledFunction()) {
-      op = b.create<CallOp>(loc, tys, b.getSymbolRefAttr(callee->getName()),
-                            ops);
-    } else {
-      ops.insert(ops.begin(), processValue(ci->getCalledValue()));
-      op = b.create<CallOp>(loc, tys, ops, ArrayRef<NamedAttribute>());
-    }
-    if (!ci->getType()->isVoidTy())
-      v = op->getResult(0);
-    return success();
-  }
-  case llvm::Instruction::GetElementPtr: {
-    // FIXME: Support inbounds GEPs.
-    llvm::GetElementPtrInst *gep = cast<llvm::GetElementPtrInst>(inst);
-    SmallVector<Value *, 4> ops;
-    for (auto *op : gep->operand_values())
-      ops.push_back(processValue(op));
-    v = b.create<GEPOp>(loc, processType(inst->getType()), ops,
-                        ArrayRef<NamedAttribute>());
-    return success();
-  }
-  }
-}
-
-LogicalResult Importer::processFunction(llvm::Function *f) {
-  blocks.clear();
-  instMap.clear();
-  unknownInstMap.clear();
-
-  b.setInsertionPoint(module.getBody(), getFuncInsertPt());
-  LLVMFuncOp fop = b.create<LLVMFuncOp>(UnknownLoc::get(context), f->getName(),
-                                        processType(f->getFunctionType()));
-  if (f->isDeclaration())
-    return success();
-
-  // Eagerly create all blocks.
-  SmallVector<Block *, 4> blockList;
-  for (llvm::BasicBlock &bb : *f) {
-    blockList.push_back(b.createBlock(&fop.body(), fop.body().end()));
-    blocks[&bb] = blockList.back();
-  }
-  currentEntryBlock = blockList[0];
-
-  // Add function arguments to the entry block.
-  for (auto &arg : f->args())
-    instMap[&arg] = blockList[0]->addArgument(processType(arg.getType()));
-
-  for (auto bbs : llvm::zip(*f, blockList)) {
-    if (failed(processBasicBlock(&std::get<0>(bbs), std::get<1>(bbs))))
-      return failure();
-  }
-
-  // Now that all instructions are guaranteed to have been visited, ensure
-  // any unknown uses we encountered are remapped.
-  for (auto &llvmAndUnknown : unknownInstMap) {
-    assert(instMap.count(llvmAndUnknown.first));
-    Value *newValue = instMap[llvmAndUnknown.first];
-    Value *oldValue = llvmAndUnknown.second->getResult(0);
-    oldValue->replaceAllUsesWith(newValue);
-    llvmAndUnknown.second->erase();
-  }
-  return success();
-}
-
-LogicalResult Importer::processBasicBlock(llvm::BasicBlock *bb, Block *block) {
-  b.setInsertionPointToStart(block);
-  for (llvm::Instruction &inst : *bb) {
-    if (failed(processInstruction(&inst)))
-      return failure();
-  }
-  return success();
-}
-
-OwningModuleRef
-mlir::translateLLVMIRToModule(std::unique_ptr<llvm::Module> llvmModule,
-                              MLIRContext *context) {
-  OwningModuleRef module(ModuleOp::create(
-      FileLineColLoc::get("", /*line=*/0, /*column=*/0, context)));
-
-  Importer deserializer(context, module.get());
-  for (llvm::GlobalVariable &gv : llvmModule->globals()) {
-    if (!deserializer.processGlobal(&gv))
-      return {};
-  }
-  for (llvm::Function &f : llvmModule->functions()) {
-    if (failed(deserializer.processFunction(&f)))
-      return {};
-  }
-
-  return module;
-}
-
-// Deserializes the LLVM bitcode stored in `input` into an MLIR module in the
-// LLVM dialect.
-OwningModuleRef translateLLVMIRToModule(llvm::SourceMgr &sourceMgr,
-                                        MLIRContext *context) {
-  LLVMDialect *dialect = context->getRegisteredDialect<LLVMDialect>();
-  assert(dialect && "Could not find LLVMDialect?");
-
-  llvm::SMDiagnostic err;
-  std::unique_ptr<llvm::Module> llvmModule =
-      llvm::parseIR(*sourceMgr.getMemoryBuffer(sourceMgr.getMainFileID()), err,
-                    dialect->getLLVMContext(),
-                    /*UpgradeDebugInfo=*/true,
-                    /*DataLayoutString=*/"");
-  if (!llvmModule) {
-    std::string errStr;
-    llvm::raw_string_ostream errStream(errStr);
-    err.print(/*ProgName=*/"", errStream);
-    emitError(UnknownLoc::get(context)) << errStream.str();
-    return {};
-  }
-  return translateLLVMIRToModule(std::move(llvmModule), context);
-}
-
-static TranslateToMLIRRegistration
-    fromLLVM("import-llvm",
-             [](llvm::SourceMgr &sourceMgr, MLIRContext *context) {
-               return translateLLVMIRToModule(sourceMgr, context);
-             });
diff --git a/third_party/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp b/third_party/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
deleted file mode 100644
index b3bea0f036b..00000000000
--- a/third_party/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- ConvertToLLVMIR.cpp - MLIR to LLVM IR conversion -------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a translation between the MLIR LLVM dialect and LLVM IR.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Target/LLVMIR.h"
-
-#include "mlir/Target/LLVMIR/ModuleTranslation.h"
-#include "mlir/Translation.h"
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace mlir;
-
-std::unique_ptr<llvm::Module> mlir::translateModuleToLLVMIR(ModuleOp m) {
-  return LLVM::ModuleTranslation::translateModule<>(m);
-}
-
-static TranslateFromMLIRRegistration registration(
-    "mlir-to-llvmir", [](ModuleOp module, llvm::raw_ostream &output) {
-      auto llvmModule = LLVM::ModuleTranslation::translateModule<>(module);
-      if (!llvmModule)
-        return failure();
-
-      llvmModule->print(output, nullptr);
-      return success();
-    });
diff --git a/third_party/mlir/lib/Target/LLVMIR/ConvertToNVVMIR.cpp b/third_party/mlir/lib/Target/LLVMIR/ConvertToNVVMIR.cpp
deleted file mode 100644
index 728dc864ae5..00000000000
--- a/third_party/mlir/lib/Target/LLVMIR/ConvertToNVVMIR.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//===- ConvertToNVVMIR.cpp - MLIR to LLVM IR conversion -------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a translation between the MLIR LLVM + NVVM dialects and
-// LLVM IR with NVVM intrinsics and metadata.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Target/NVVMIR.h"
-
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Target/LLVMIR/ModuleTranslation.h"
-#include "mlir/Translation.h"
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace mlir;
-
-namespace {
-static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder,
-                                        llvm::Intrinsic::ID intrinsic,
-                                        ArrayRef<llvm::Value *> args = {}) {
-  llvm::Module *module = builder.GetInsertBlock()->getModule();
-  llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic);
-  return builder.CreateCall(fn, args);
-}
-
-static llvm::Intrinsic::ID getShflBflyIntrinsicId(llvm::Type *resultType,
-                                                  bool withPredicate) {
-  if (withPredicate) {
-    resultType = cast<llvm::StructType>(resultType)->getElementType(0);
-    return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32p
-                                   : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32p;
-  }
-  return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32
-                                 : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32;
-}
-
-class ModuleTranslation : public LLVM::ModuleTranslation {
-
-public:
-  explicit ModuleTranslation(Operation *module)
-      : LLVM::ModuleTranslation(module) {}
-  ~ModuleTranslation() override {}
-
-protected:
-  LogicalResult convertOperation(Operation &opInst,
-                                 llvm::IRBuilder<> &builder) override {
-
-#include "mlir/Dialect/LLVMIR/NVVMConversions.inc"
-
-    return LLVM::ModuleTranslation::convertOperation(opInst, builder);
-  }
-};
-} // namespace
-
-std::unique_ptr<llvm::Module> mlir::translateModuleToNVVMIR(Operation *m) {
-  ModuleTranslation translation(m);
-  auto llvmModule =
-      LLVM::ModuleTranslation::translateModule<ModuleTranslation>(m);
-  if (!llvmModule)
-    return llvmModule;
-
-  // Insert the nvvm.annotations kernel so that the NVVM backend recognizes the
-  // function as a kernel.
-  for (auto func :
-       ModuleTranslation::getModuleBody(m).getOps<LLVM::LLVMFuncOp>()) {
-    if (!gpu::GPUDialect::isKernel(func))
-      continue;
-
-    auto *llvmFunc = llvmModule->getFunction(func.getName());
-
-    llvm::Metadata *llvmMetadata[] = {
-        llvm::ValueAsMetadata::get(llvmFunc),
-        llvm::MDString::get(llvmModule->getContext(), "kernel"),
-        llvm::ValueAsMetadata::get(llvm::ConstantInt::get(
-            llvm::Type::getInt32Ty(llvmModule->getContext()), 1))};
-    llvm::MDNode *llvmMetadataNode =
-        llvm::MDNode::get(llvmModule->getContext(), llvmMetadata);
-    llvmModule->getOrInsertNamedMetadata("nvvm.annotations")
-        ->addOperand(llvmMetadataNode);
-  }
-
-  return llvmModule;
-}
-
-static TranslateFromMLIRRegistration
-    registration("mlir-to-nvvmir",
-                 [](ModuleOp module, llvm::raw_ostream &output) {
-                   auto llvmModule = mlir::translateModuleToNVVMIR(module);
-                   if (!llvmModule)
-                     return failure();
-
-                   llvmModule->print(output, nullptr);
-                   return success();
-                 });
diff --git a/third_party/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp b/third_party/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp
deleted file mode 100644
index 7b7c3681371..00000000000
--- a/third_party/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-//===- ConvertToROCDLIR.cpp - MLIR to LLVM IR conversion ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a translation between the MLIR LLVM + ROCDL dialects and
-// LLVM IR with ROCDL intrinsics and metadata.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Target/ROCDLIR.h"
-
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Target/LLVMIR/ModuleTranslation.h"
-#include "mlir/Translation.h"
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-#include <iostream>
-
-using namespace mlir;
-
-namespace {
-// Create a call to llvm intrinsic
-static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder,
-                                        llvm::Intrinsic::ID intrinsic,
-                                        ArrayRef<llvm::Value *> args = {}) {
-  llvm::Module *module = builder.GetInsertBlock()->getModule();
-  llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic);
-  return builder.CreateCall(fn, args);
-}
-
-// Create a call to ROCm-Device-Library function
-//   Currently this routine will work only for calling ROCDL functions that
-// take a single int32 argument. It is likely that the interface of this
-// function will change to make it more generic.
-static llvm::Value *createDeviceFunctionCall(llvm::IRBuilder<> &builder,
-                                             StringRef fn_name, int parameter) {
-  llvm::Module *module = builder.GetInsertBlock()->getModule();
-  llvm::FunctionType *function_type = llvm::FunctionType::get(
-      llvm::Type::getInt64Ty(module->getContext()), // return type.
-      llvm::Type::getInt32Ty(module->getContext()), // parameter type.
-      false);                                       // no variadic arguments.
-  llvm::Function *fn = llvm::dyn_cast<llvm::Function>(
-      module->getOrInsertFunction(fn_name, function_type).getCallee());
-  llvm::Value *fn_op0 = llvm::ConstantInt::get(
-      llvm::Type::getInt32Ty(module->getContext()), parameter);
-  return builder.CreateCall(fn, llvm::ArrayRef<llvm::Value *>(fn_op0));
-}
-
-class ModuleTranslation : public LLVM::ModuleTranslation {
-
-public:
-  explicit ModuleTranslation(Operation *module)
-      : LLVM::ModuleTranslation(module) {}
-  ~ModuleTranslation() override {}
-
-protected:
-  LogicalResult convertOperation(Operation &opInst,
-                                 llvm::IRBuilder<> &builder) override {
-
-#include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
-
-    return LLVM::ModuleTranslation::convertOperation(opInst, builder);
-  }
-};
-} // namespace
-
-std::unique_ptr<llvm::Module> mlir::translateModuleToROCDLIR(Operation *m) {
-  ModuleTranslation translation(m);
-
-  // lower MLIR (with RODL Dialect) to LLVM IR (with ROCDL intrinsics)
-  auto llvmModule =
-      LLVM::ModuleTranslation::translateModule<ModuleTranslation>(m);
-
-  // foreach GPU kernel
-  // 1. Insert AMDGPU_KERNEL calling convention.
-  // 2. Insert amdgpu-flat-workgroup-size(1, 1024) attribute.
-  for (auto func :
-       ModuleTranslation::getModuleBody(m).getOps<LLVM::LLVMFuncOp>()) {
-    if (!func.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelFuncAttrName()))
-      continue;
-
-    auto *llvmFunc = llvmModule->getFunction(func.getName());
-
-    llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
-
-    llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
-  }
-
-  return llvmModule;
-}
-
-static TranslateFromMLIRRegistration
-    registration("mlir-to-rocdlir",
-                 [](ModuleOp module, llvm::raw_ostream &output) {
-                   auto llvmModule = mlir::translateModuleToROCDLIR(module);
-                   if (!llvmModule)
-                     return failure();
-
-                   llvmModule->print(output, nullptr);
-                   return success();
-                 });
diff --git a/third_party/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/third_party/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
deleted file mode 100644
index f5f9ccabd76..00000000000
--- a/third_party/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ /dev/null
@@ -1,529 +0,0 @@
-//===- ModuleTranslation.cpp - MLIR to LLVM conversion --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements the translation between an MLIR LLVM dialect module and
-// the corresponding LLVMIR module. It only handles core LLVM IR operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Target/LLVMIR/ModuleTranslation.h"
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Support/LLVM.h"
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-namespace mlir {
-namespace LLVM {
-
-// Create an LLVM IR constant of `llvmType` from the MLIR attribute `attr`.
-// This currently supports integer, floating point, splat and dense element
-// attributes and combinations thereof.  In case of error, report it to `loc`
-// and return nullptr.
-llvm::Constant *ModuleTranslation::getLLVMConstant(llvm::Type *llvmType,
-                                                   Attribute attr,
-                                                   Location loc) {
-  if (!attr)
-    return llvm::UndefValue::get(llvmType);
-  if (auto intAttr = attr.dyn_cast<IntegerAttr>())
-    return llvm::ConstantInt::get(llvmType, intAttr.getValue());
-  if (auto floatAttr = attr.dyn_cast<FloatAttr>())
-    return llvm::ConstantFP::get(llvmType, floatAttr.getValue());
-  if (auto funcAttr = attr.dyn_cast<FlatSymbolRefAttr>())
-    return functionMapping.lookup(funcAttr.getValue());
-  if (auto splatAttr = attr.dyn_cast<SplatElementsAttr>()) {
-    auto *sequentialType = cast<llvm::SequentialType>(llvmType);
-    auto elementType = sequentialType->getElementType();
-    uint64_t numElements = sequentialType->getNumElements();
-    auto *child = getLLVMConstant(elementType, splatAttr.getSplatValue(), loc);
-    if (llvmType->isVectorTy())
-      return llvm::ConstantVector::getSplat(numElements, child);
-    if (llvmType->isArrayTy()) {
-      auto arrayType = llvm::ArrayType::get(elementType, numElements);
-      SmallVector<llvm::Constant *, 8> constants(numElements, child);
-      return llvm::ConstantArray::get(arrayType, constants);
-    }
-  }
-  if (auto elementsAttr = attr.dyn_cast<ElementsAttr>()) {
-    auto *sequentialType = cast<llvm::SequentialType>(llvmType);
-    auto elementType = sequentialType->getElementType();
-    uint64_t numElements = sequentialType->getNumElements();
-    SmallVector<llvm::Constant *, 8> constants;
-    constants.reserve(numElements);
-    for (auto n : elementsAttr.getValues<Attribute>()) {
-      constants.push_back(getLLVMConstant(elementType, n, loc));
-      if (!constants.back())
-        return nullptr;
-    }
-    if (llvmType->isVectorTy())
-      return llvm::ConstantVector::get(constants);
-    if (llvmType->isArrayTy()) {
-      auto arrayType = llvm::ArrayType::get(elementType, numElements);
-      return llvm::ConstantArray::get(arrayType, constants);
-    }
-  }
-  if (auto stringAttr = attr.dyn_cast<StringAttr>()) {
-    return llvm::ConstantDataArray::get(
-        llvmModule->getContext(), ArrayRef<char>{stringAttr.getValue().data(),
-                                                 stringAttr.getValue().size()});
-  }
-  emitError(loc, "unsupported constant value");
-  return nullptr;
-}
-
-// Convert MLIR integer comparison predicate to LLVM IR comparison predicate.
-static llvm::CmpInst::Predicate getLLVMCmpPredicate(ICmpPredicate p) {
-  switch (p) {
-  case LLVM::ICmpPredicate::eq:
-    return llvm::CmpInst::Predicate::ICMP_EQ;
-  case LLVM::ICmpPredicate::ne:
-    return llvm::CmpInst::Predicate::ICMP_NE;
-  case LLVM::ICmpPredicate::slt:
-    return llvm::CmpInst::Predicate::ICMP_SLT;
-  case LLVM::ICmpPredicate::sle:
-    return llvm::CmpInst::Predicate::ICMP_SLE;
-  case LLVM::ICmpPredicate::sgt:
-    return llvm::CmpInst::Predicate::ICMP_SGT;
-  case LLVM::ICmpPredicate::sge:
-    return llvm::CmpInst::Predicate::ICMP_SGE;
-  case LLVM::ICmpPredicate::ult:
-    return llvm::CmpInst::Predicate::ICMP_ULT;
-  case LLVM::ICmpPredicate::ule:
-    return llvm::CmpInst::Predicate::ICMP_ULE;
-  case LLVM::ICmpPredicate::ugt:
-    return llvm::CmpInst::Predicate::ICMP_UGT;
-  case LLVM::ICmpPredicate::uge:
-    return llvm::CmpInst::Predicate::ICMP_UGE;
-  }
-  llvm_unreachable("incorrect comparison predicate");
-}
-
-static llvm::CmpInst::Predicate getLLVMCmpPredicate(FCmpPredicate p) {
-  switch (p) {
-  case LLVM::FCmpPredicate::_false:
-    return llvm::CmpInst::Predicate::FCMP_FALSE;
-  case LLVM::FCmpPredicate::oeq:
-    return llvm::CmpInst::Predicate::FCMP_OEQ;
-  case LLVM::FCmpPredicate::ogt:
-    return llvm::CmpInst::Predicate::FCMP_OGT;
-  case LLVM::FCmpPredicate::oge:
-    return llvm::CmpInst::Predicate::FCMP_OGE;
-  case LLVM::FCmpPredicate::olt:
-    return llvm::CmpInst::Predicate::FCMP_OLT;
-  case LLVM::FCmpPredicate::ole:
-    return llvm::CmpInst::Predicate::FCMP_OLE;
-  case LLVM::FCmpPredicate::one:
-    return llvm::CmpInst::Predicate::FCMP_ONE;
-  case LLVM::FCmpPredicate::ord:
-    return llvm::CmpInst::Predicate::FCMP_ORD;
-  case LLVM::FCmpPredicate::ueq:
-    return llvm::CmpInst::Predicate::FCMP_UEQ;
-  case LLVM::FCmpPredicate::ugt:
-    return llvm::CmpInst::Predicate::FCMP_UGT;
-  case LLVM::FCmpPredicate::uge:
-    return llvm::CmpInst::Predicate::FCMP_UGE;
-  case LLVM::FCmpPredicate::ult:
-    return llvm::CmpInst::Predicate::FCMP_ULT;
-  case LLVM::FCmpPredicate::ule:
-    return llvm::CmpInst::Predicate::FCMP_ULE;
-  case LLVM::FCmpPredicate::une:
-    return llvm::CmpInst::Predicate::FCMP_UNE;
-  case LLVM::FCmpPredicate::uno:
-    return llvm::CmpInst::Predicate::FCMP_UNO;
-  case LLVM::FCmpPredicate::_true:
-    return llvm::CmpInst::Predicate::FCMP_TRUE;
-  }
-  llvm_unreachable("incorrect comparison predicate");
-}
-
-// A helper to look up remapped operands in the value remapping table.
-template <typename Range>
-SmallVector<llvm::Value *, 8> ModuleTranslation::lookupValues(Range &&values) {
-  SmallVector<llvm::Value *, 8> remapped;
-  remapped.reserve(llvm::size(values));
-  for (Value *v : values) {
-    remapped.push_back(valueMapping.lookup(v));
-  }
-  return remapped;
-}
-
-// Given a single MLIR operation, create the corresponding LLVM IR operation
-// using the `builder`.  LLVM IR Builder does not have a generic interface so
-// this has to be a long chain of `if`s calling different functions with a
-// different number of arguments.
-LogicalResult ModuleTranslation::convertOperation(Operation &opInst,
-                                                  llvm::IRBuilder<> &builder) {
-  auto extractPosition = [](ArrayAttr attr) {
-    SmallVector<unsigned, 4> position;
-    position.reserve(attr.size());
-    for (Attribute v : attr)
-      position.push_back(v.cast<IntegerAttr>().getValue().getZExtValue());
-    return position;
-  };
-
-#include "mlir/Dialect/LLVMIR/LLVMConversions.inc"
-
-  // Emit function calls.  If the "callee" attribute is present, this is a
-  // direct function call and we also need to look up the remapped function
-  // itself.  Otherwise, this is an indirect call and the callee is the first
-  // operand, look it up as a normal value.  Return the llvm::Value representing
-  // the function result, which may be of llvm::VoidTy type.
-  auto convertCall = [this, &builder](Operation &op) -> llvm::Value * {
-    auto operands = lookupValues(op.getOperands());
-    ArrayRef<llvm::Value *> operandsRef(operands);
-    if (auto attr = op.getAttrOfType<FlatSymbolRefAttr>("callee")) {
-      return builder.CreateCall(functionMapping.lookup(attr.getValue()),
-                                operandsRef);
-    } else {
-      return builder.CreateCall(operandsRef.front(), operandsRef.drop_front());
-    }
-  };
-
-  // Emit calls.  If the called function has a result, remap the corresponding
-  // value.  Note that LLVM IR dialect CallOp has either 0 or 1 result.
-  if (isa<LLVM::CallOp>(opInst)) {
-    llvm::Value *result = convertCall(opInst);
-    if (opInst.getNumResults() != 0) {
-      valueMapping[opInst.getResult(0)] = result;
-      return success();
-    }
-    // Check that LLVM call returns void for 0-result functions.
-    return success(result->getType()->isVoidTy());
-  }
-
-  // Emit branches.  We need to look up the remapped blocks and ignore the block
-  // arguments that were transformed into PHI nodes.
-  if (auto brOp = dyn_cast<LLVM::BrOp>(opInst)) {
-    builder.CreateBr(blockMapping[brOp.getSuccessor(0)]);
-    return success();
-  }
-  if (auto condbrOp = dyn_cast<LLVM::CondBrOp>(opInst)) {
-    builder.CreateCondBr(valueMapping.lookup(condbrOp.getOperand(0)),
-                         blockMapping[condbrOp.getSuccessor(0)],
-                         blockMapping[condbrOp.getSuccessor(1)]);
-    return success();
-  }
-
-  // Emit addressof.  We need to look up the global value referenced by the
-  // operation and store it in the MLIR-to-LLVM value mapping.  This does not
-  // emit any LLVM instruction.
-  if (auto addressOfOp = dyn_cast<LLVM::AddressOfOp>(opInst)) {
-    LLVM::GlobalOp global = addressOfOp.getGlobal();
-    // The verifier should not have allowed this.
-    assert(global && "referencing an undefined global");
-
-    valueMapping[addressOfOp.getResult()] = globalsMapping.lookup(global);
-    return success();
-  }
-
-  return opInst.emitError("unsupported or non-LLVM operation: ")
-         << opInst.getName();
-}
-
-// Convert block to LLVM IR.  Unless `ignoreArguments` is set, emit PHI nodes
-// to define values corresponding to the MLIR block arguments.  These nodes
-// are not connected to the source basic blocks, which may not exist yet.
-LogicalResult ModuleTranslation::convertBlock(Block &bb, bool ignoreArguments) {
-  llvm::IRBuilder<> builder(blockMapping[&bb]);
-
-  // Before traversing operations, make block arguments available through
-  // value remapping and PHI nodes, but do not add incoming edges for the PHI
-  // nodes just yet: those values may be defined by this or following blocks.
-  // This step is omitted if "ignoreArguments" is set.  The arguments of the
-  // first block have been already made available through the remapping of
-  // LLVM function arguments.
-  if (!ignoreArguments) {
-    auto predecessors = bb.getPredecessors();
-    unsigned numPredecessors =
-        std::distance(predecessors.begin(), predecessors.end());
-    for (auto *arg : bb.getArguments()) {
-      auto wrappedType = arg->getType().dyn_cast<LLVM::LLVMType>();
-      if (!wrappedType)
-        return emitError(bb.front().getLoc(),
-                         "block argument does not have an LLVM type");
-      llvm::Type *type = wrappedType.getUnderlyingType();
-      llvm::PHINode *phi = builder.CreatePHI(type, numPredecessors);
-      valueMapping[arg] = phi;
-    }
-  }
-
-  // Traverse operations.
-  for (auto &op : bb) {
-    if (failed(convertOperation(op, builder)))
-      return failure();
-  }
-
-  return success();
-}
-
-// Convert the LLVM dialect linkage type to LLVM IR linkage type.
-llvm::GlobalVariable::LinkageTypes convertLinkageType(LLVM::Linkage linkage) {
-  switch (linkage) {
-  case LLVM::Linkage::Private:
-    return llvm::GlobalValue::PrivateLinkage;
-  case LLVM::Linkage::Internal:
-    return llvm::GlobalValue::InternalLinkage;
-  case LLVM::Linkage::AvailableExternally:
-    return llvm::GlobalValue::AvailableExternallyLinkage;
-  case LLVM::Linkage::Linkonce:
-    return llvm::GlobalValue::LinkOnceAnyLinkage;
-  case LLVM::Linkage::Weak:
-    return llvm::GlobalValue::WeakAnyLinkage;
-  case LLVM::Linkage::Common:
-    return llvm::GlobalValue::CommonLinkage;
-  case LLVM::Linkage::Appending:
-    return llvm::GlobalValue::AppendingLinkage;
-  case LLVM::Linkage::ExternWeak:
-    return llvm::GlobalValue::ExternalWeakLinkage;
-  case LLVM::Linkage::LinkonceODR:
-    return llvm::GlobalValue::LinkOnceODRLinkage;
-  case LLVM::Linkage::WeakODR:
-    return llvm::GlobalValue::WeakODRLinkage;
-  case LLVM::Linkage::External:
-    return llvm::GlobalValue::ExternalLinkage;
-  }
-  llvm_unreachable("unknown linkage type");
-}
-
-// Create named global variables that correspond to llvm.mlir.global
-// definitions.
-void ModuleTranslation::convertGlobals() {
-  for (auto op : getModuleBody(mlirModule).getOps<LLVM::GlobalOp>()) {
-    llvm::Type *type = op.getType().getUnderlyingType();
-    llvm::Constant *cst = llvm::UndefValue::get(type);
-    if (op.getValueOrNull()) {
-      // String attributes are treated separately because they cannot appear as
-      // in-function constants and are thus not supported by getLLVMConstant.
-      if (auto strAttr = op.getValueOrNull().dyn_cast_or_null<StringAttr>()) {
-        cst = llvm::ConstantDataArray::getString(
-            llvmModule->getContext(), strAttr.getValue(), /*AddNull=*/false);
-        type = cst->getType();
-      } else {
-        cst = getLLVMConstant(type, op.getValueOrNull(), op.getLoc());
-      }
-    } else if (Block *initializer = op.getInitializerBlock()) {
-      llvm::IRBuilder<> builder(llvmModule->getContext());
-      for (auto &op : initializer->without_terminator()) {
-        if (failed(convertOperation(op, builder)) ||
-            !isa<llvm::Constant>(valueMapping.lookup(op.getResult(0)))) {
-          emitError(op.getLoc(), "unemittable constant value");
-          return;
-        }
-      }
-      ReturnOp ret = cast<ReturnOp>(initializer->getTerminator());
-      cst = cast<llvm::Constant>(valueMapping.lookup(ret.getOperand(0)));
-    }
-
-    auto linkage = convertLinkageType(op.linkage());
-    bool anyExternalLinkage =
-        (linkage == llvm::GlobalVariable::ExternalLinkage ||
-         linkage == llvm::GlobalVariable::ExternalWeakLinkage);
-    auto addrSpace = op.addr_space().getLimitedValue();
-    auto *var = new llvm::GlobalVariable(
-        *llvmModule, type, op.constant(), linkage,
-        anyExternalLinkage ? nullptr : cst, op.sym_name(),
-        /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, addrSpace);
-
-    globalsMapping.try_emplace(op, var);
-  }
-}
-
-// Get the SSA value passed to the current block from the terminator operation
-// of its predecessor.
-static Value *getPHISourceValue(Block *current, Block *pred,
-                                unsigned numArguments, unsigned index) {
-  auto &terminator = *pred->getTerminator();
-  if (isa<LLVM::BrOp>(terminator)) {
-    return terminator.getOperand(index);
-  }
-
-  // For conditional branches, we need to check if the current block is reached
-  // through the "true" or the "false" branch and take the relevant operands.
-  auto condBranchOp = dyn_cast<LLVM::CondBrOp>(terminator);
-  assert(condBranchOp &&
-         "only branch operations can be terminators of a block that "
-         "has successors");
-  assert((condBranchOp.getSuccessor(0) != condBranchOp.getSuccessor(1)) &&
-         "successors with arguments in LLVM conditional branches must be "
-         "different blocks");
-
-  return condBranchOp.getSuccessor(0) == current
-             ? terminator.getSuccessorOperand(0, index)
-             : terminator.getSuccessorOperand(1, index);
-}
-
-void ModuleTranslation::connectPHINodes(LLVMFuncOp func) {
-  // Skip the first block, it cannot be branched to and its arguments correspond
-  // to the arguments of the LLVM function.
-  for (auto it = std::next(func.begin()), eit = func.end(); it != eit; ++it) {
-    Block *bb = &*it;
-    llvm::BasicBlock *llvmBB = blockMapping.lookup(bb);
-    auto phis = llvmBB->phis();
-    auto numArguments = bb->getNumArguments();
-    assert(numArguments == std::distance(phis.begin(), phis.end()));
-    for (auto &numberedPhiNode : llvm::enumerate(phis)) {
-      auto &phiNode = numberedPhiNode.value();
-      unsigned index = numberedPhiNode.index();
-      for (auto *pred : bb->getPredecessors()) {
-        phiNode.addIncoming(valueMapping.lookup(getPHISourceValue(
-                                bb, pred, numArguments, index)),
-                            blockMapping.lookup(pred));
-      }
-    }
-  }
-}
-
-// TODO(mlir-team): implement an iterative version
-static void topologicalSortImpl(llvm::SetVector<Block *> &blocks, Block *b) {
-  blocks.insert(b);
-  for (Block *bb : b->getSuccessors()) {
-    if (blocks.count(bb) == 0)
-      topologicalSortImpl(blocks, bb);
-  }
-}
-
-// Sort function blocks topologically.
-static llvm::SetVector<Block *> topologicalSort(LLVMFuncOp f) {
-  // For each blocks that has not been visited yet (i.e. that has no
-  // predecessors), add it to the list and traverse its successors in DFS
-  // preorder.
-  llvm::SetVector<Block *> blocks;
-  for (Block &b : f.getBlocks()) {
-    if (blocks.count(&b) == 0)
-      topologicalSortImpl(blocks, &b);
-  }
-  assert(blocks.size() == f.getBlocks().size() && "some blocks are not sorted");
-
-  return blocks;
-}
-
-LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
-  // Clear the block and value mappings, they are only relevant within one
-  // function.
-  blockMapping.clear();
-  valueMapping.clear();
-  llvm::Function *llvmFunc = functionMapping.lookup(func.getName());
-  // Add function arguments to the value remapping table.
-  // If there was noalias info then we decorate each argument accordingly.
-  unsigned int argIdx = 0;
-  for (const auto &kvp : llvm::zip(func.getArguments(), llvmFunc->args())) {
-    llvm::Argument &llvmArg = std::get<1>(kvp);
-    BlockArgument *mlirArg = std::get<0>(kvp);
-
-    if (auto attr = func.getArgAttrOfType<BoolAttr>(argIdx, "llvm.noalias")) {
-      // NB: Attribute already verified to be boolean, so check if we can indeed
-      // attach the attribute to this argument, based on its type.
-      auto argTy = mlirArg->getType().dyn_cast<LLVM::LLVMType>();
-      if (!argTy.getUnderlyingType()->isPointerTy())
-        return func.emitError(
-            "llvm.noalias attribute attached to LLVM non-pointer argument");
-      if (attr.getValue())
-        llvmArg.addAttr(llvm::Attribute::AttrKind::NoAlias);
-    }
-    valueMapping[mlirArg] = &llvmArg;
-    argIdx++;
-  }
-
-  // First, create all blocks so we can jump to them.
-  llvm::LLVMContext &llvmContext = llvmFunc->getContext();
-  for (auto &bb : func) {
-    auto *llvmBB = llvm::BasicBlock::Create(llvmContext);
-    llvmBB->insertInto(llvmFunc);
-    blockMapping[&bb] = llvmBB;
-  }
-
-  // Then, convert blocks one by one in topological order to ensure defs are
-  // converted before uses.
-  auto blocks = topologicalSort(func);
-  for (auto indexedBB : llvm::enumerate(blocks)) {
-    auto *bb = indexedBB.value();
-    if (failed(convertBlock(*bb, /*ignoreArguments=*/indexedBB.index() == 0)))
-      return failure();
-  }
-
-  // Finally, after all blocks have been traversed and values mapped, connect
-  // the PHI nodes to the results of preceding blocks.
-  connectPHINodes(func);
-  return success();
-}
-
-LogicalResult ModuleTranslation::checkSupportedModuleOps(Operation *m) {
-  for (Operation &o : getModuleBody(m).getOperations())
-    if (!isa<LLVM::LLVMFuncOp>(&o) && !isa<LLVM::GlobalOp>(&o) &&
-        !o.isKnownTerminator())
-      return o.emitOpError("unsupported module-level operation");
-  return success();
-}
-
-LogicalResult ModuleTranslation::convertFunctions() {
-  // Declare all functions first because there may be function calls that form a
-  // call graph with cycles.
-  for (auto function : getModuleBody(mlirModule).getOps<LLVMFuncOp>()) {
-    llvm::FunctionCallee llvmFuncCst = llvmModule->getOrInsertFunction(
-        function.getName(),
-        llvm::cast<llvm::FunctionType>(function.getType().getUnderlyingType()));
-    assert(isa<llvm::Function>(llvmFuncCst.getCallee()));
-    functionMapping[function.getName()] =
-        cast<llvm::Function>(llvmFuncCst.getCallee());
-  }
-
-  // Convert functions.
-  for (auto function : getModuleBody(mlirModule).getOps<LLVMFuncOp>()) {
-    // Ignore external functions.
-    if (function.isExternal())
-      continue;
-
-    if (failed(convertOneFunction(function)))
-      return failure();
-  }
-
-  return success();
-}
-
-std::unique_ptr<llvm::Module>
-ModuleTranslation::prepareLLVMModule(Operation *m) {
-  auto *dialect = m->getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
-  assert(dialect && "LLVM dialect must be registered");
-
-  auto llvmModule = llvm::CloneModule(dialect->getLLVMModule());
-  if (!llvmModule)
-    return nullptr;
-
-  llvm::LLVMContext &llvmContext = llvmModule->getContext();
-  llvm::IRBuilder<> builder(llvmContext);
-
-  // Inject declarations for `malloc` and `free` functions that can be used in
-  // memref allocation/deallocation coming from standard ops lowering.
-  llvmModule->getOrInsertFunction("malloc", builder.getInt8PtrTy(),
-                                  builder.getInt64Ty());
-  llvmModule->getOrInsertFunction("free", builder.getVoidTy(),
-                                  builder.getInt8PtrTy());
-
-  return llvmModule;
-}
-
-} // namespace LLVM
-} // namespace mlir
diff --git a/third_party/mlir/lib/Transforms/AffineDataCopyGeneration.cpp b/third_party/mlir/lib/Transforms/AffineDataCopyGeneration.cpp
deleted file mode 100644
index 7fb356f3ad2..00000000000
--- a/third_party/mlir/lib/Transforms/AffineDataCopyGeneration.cpp
+++ /dev/null
@@ -1,277 +0,0 @@
-//===- AffineDataCopyGeneration.cpp - Explicit memref copying pass ------*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to automatically promote accessed memref regions
-// to buffers in a faster memory space that is explicitly managed, with the
-// necessary data movement operations performed through either regular
-// point-wise load/store's or DMAs. Such explicit copying (also referred to as
-// array packing/unpacking in the literature), when done on arrays that exhibit
-// reuse, results in near elimination of conflict misses, TLB misses, reduced
-// use of hardware prefetch streams, and reduced false sharing. It is also
-// necessary for hardware that explicitly managed levels in the memory
-// hierarchy, and where DMAs may have to be used. This optimization is often
-// performed on already tiled code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include <algorithm>
-
-#define DEBUG_TYPE "affine-data-copy-generate"
-
-using namespace mlir;
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long> clFastMemoryCapacity(
-    "affine-data-copy-generate-fast-mem-capacity",
-    llvm::cl::desc(
-        "Set fast memory space capacity in KiB (default: unlimited)"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool>
-    clDma("affine-data-copy-generate-dma",
-          llvm::cl::desc("Generate DMA instead of point-wise copy"),
-          llvm::cl::cat(clOptionsCategory), llvm::cl::init(true));
-
-static llvm::cl::opt<unsigned> clFastMemorySpace(
-    "affine-data-copy-generate-fast-mem-space", llvm::cl::init(1),
-    llvm::cl::desc(
-        "Fast memory space identifier for copy generation (default: 1)"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool> clSkipNonUnitStrideLoop(
-    "affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden,
-    llvm::cl::init(false),
-    llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths "
-                   "for copy placement"),
-    llvm::cl::cat(clOptionsCategory));
-
-namespace {
-
-/// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
-/// introducing copy operations to transfer data into `fastMemorySpace` and
-/// rewriting the original load's/store's to instead load/store from the
-/// allocated fast memory buffers. Additional options specify the identifier
-/// corresponding to the fast memory space and the amount of fast memory space
-/// available. The pass traverses through the nesting structure, recursing to
-/// inner levels if necessary to determine at what depth copies need to be
-/// placed so that the allocated buffers fit within the memory capacity
-/// provided.
-// TODO(bondhugula): We currently can't generate copies correctly when stores
-// are strided. Check for strided stores.
-struct AffineDataCopyGeneration
-    : public FunctionPass<AffineDataCopyGeneration> {
-  explicit AffineDataCopyGeneration(
-      unsigned slowMemorySpace = 0,
-      unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0,
-      int minDmaTransferSize = 1024,
-      uint64_t fastMemCapacityBytes =
-          (clFastMemoryCapacity.getNumOccurrences() > 0
-               ? clFastMemoryCapacity * 1024 // cl-provided size is in KiB
-               : std::numeric_limits<uint64_t>::max()),
-      bool generateDma = clDma,
-      bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop)
-      : slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace),
-        tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize),
-        fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma),
-        skipNonUnitStrideLoops(skipNonUnitStrideLoops) {}
-
-  explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other)
-      : slowMemorySpace(other.slowMemorySpace),
-        fastMemorySpace(other.fastMemorySpace),
-        tagMemorySpace(other.tagMemorySpace),
-        minDmaTransferSize(other.minDmaTransferSize),
-        fastMemCapacityBytes(other.fastMemCapacityBytes),
-        generateDma(other.generateDma),
-        skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {}
-
-  void runOnFunction() override;
-  LogicalResult runOnBlock(Block *block, DenseSet<Operation *> &copyNests);
-
-  // Slow memory space associated with copies.
-  const unsigned slowMemorySpace;
-  // Fast memory space associated with copies.
-  unsigned fastMemorySpace;
-  // Memory space associated with DMA tags.
-  unsigned tagMemorySpace;
-  // Minimum DMA transfer size supported by the target in bytes.
-  const int minDmaTransferSize;
-  // Capacity of the faster memory space.
-  uint64_t fastMemCapacityBytes;
-
-  // If set, generate DMA operations instead of read/write.
-  bool generateDma;
-
-  // If set, ignore loops with steps other than 1.
-  bool skipNonUnitStrideLoops;
-
-  // Constant zero index to avoid too many duplicates.
-  Value *zeroIndex = nullptr;
-};
-
-} // end anonymous namespace
-
-/// Generates copies for memref's living in 'slowMemorySpace' into newly created
-/// buffers in 'fastMemorySpace', and replaces memory operations to the former
-/// by the latter. Only load op's handled for now.
-/// TODO(bondhugula): extend this to store op's.
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createAffineDataCopyGenerationPass(
-    unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace,
-    int minDmaTransferSize, uint64_t fastMemCapacityBytes) {
-  return std::make_unique<AffineDataCopyGeneration>(
-      slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
-      fastMemCapacityBytes);
-}
-
-/// Generate copies for this block. The block is partitioned into separate
-/// ranges: each range is either a sequence of one or more operations starting
-/// and ending with an affine load or store op, or just an affine.forop (which
-/// could have other affine for op's nested within).
-LogicalResult
-AffineDataCopyGeneration::runOnBlock(Block *block,
-                                     DenseSet<Operation *> &copyNests) {
-  if (block->empty())
-    return success();
-
-  AffineCopyOptions copyOptions = {generateDma, slowMemorySpace,
-                                   fastMemorySpace, tagMemorySpace,
-                                   fastMemCapacityBytes};
-
-  // Every affine.forop in the block starts and ends a block range for copying;
-  // in addition, a contiguous sequence of operations starting with a
-  // load/store op but not including any copy nests themselves is also
-  // identified as a copy block range. Straightline code (a contiguous chunk of
-  // operations excluding AffineForOp's) are always assumed to not exhaust
-  // memory. As a result, this approach is conservative in some cases at the
-  // moment; we do a check later and report an error with location info.
-  // TODO(bondhugula): An 'affine.if' operation is being treated similar to an
-  // operation. 'affine.if''s could have 'affine.for's in them;
-  // treat them separately.
-
-  // Get to the first load, store, or for op (that is not a copy nest itself).
-  auto curBegin =
-      std::find_if(block->begin(), block->end(), [&](Operation &op) {
-        return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
-                isa<AffineForOp>(op)) &&
-               copyNests.count(&op) == 0;
-      });
-
-  // Create [begin, end) ranges.
-  auto it = curBegin;
-  while (it != block->end()) {
-    AffineForOp forOp;
-    // If you hit a non-copy for loop, we will split there.
-    if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
-      // Perform the copying up unti this 'for' op first.
-      affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions,
-                             copyNests);
-
-      // Returns true if the footprint is known to exceed capacity.
-      auto exceedsCapacity = [&](AffineForOp forOp) {
-        Optional<int64_t> footprint =
-            getMemoryFootprintBytes(forOp,
-                                    /*memorySpace=*/0);
-        return (footprint.hasValue() &&
-                static_cast<uint64_t>(footprint.getValue()) >
-                    fastMemCapacityBytes);
-      };
-
-      // If the memory footprint of the 'affine.for' loop is higher than fast
-      // memory capacity (when provided), we recurse to copy at an inner level
-      // until we find a depth at which footprint fits in fast mem capacity. If
-      // the footprint can't be calculated, we assume for now it fits. Recurse
-      // inside if footprint for 'forOp' exceeds capacity, or when
-      // skipNonUnitStrideLoops is set and the step size is not one.
-      bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
-                                                 : exceedsCapacity(forOp);
-      if (recurseInner) {
-        // We'll recurse and do the copies at an inner level for 'forInst'.
-        // Recurse onto the body of this loop.
-        runOnBlock(forOp.getBody(), copyNests);
-      } else {
-        // We have enough capacity, i.e., copies will be computed for the
-        // portion of the block until 'it', and for 'it', which is 'forOp'. Note
-        // that for the latter, the copies are placed just before this loop (for
-        // incoming copies) and right after (for outgoing ones).
-
-        // Inner loop copies have their own scope - we don't thus update
-        // consumed capacity. The footprint check above guarantees this inner
-        // loop's footprint fits.
-        affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it), copyOptions,
-                               copyNests);
-      }
-      // Get to the next load or store op after 'forOp'.
-      curBegin = std::find_if(std::next(it), block->end(), [&](Operation &op) {
-        return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
-                isa<AffineForOp>(op)) &&
-               copyNests.count(&op) == 0;
-      });
-      it = curBegin;
-    } else {
-      assert(copyNests.count(&*it) == 0 &&
-             "all copy nests generated should have been skipped above");
-      // We simply include this op in the current range and continue for more.
-      ++it;
-    }
-  }
-
-  // Generate the copy for the final block range.
-  if (curBegin != block->end()) {
-    // Can't be a terminator because it would have been skipped above.
-    assert(!curBegin->isKnownTerminator() && "can't be a terminator");
-    // Exclude the affine terminator - hence, the std::prev.
-    affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/std::prev(block->end()),
-                           copyOptions, copyNests);
-  }
-
-  return success();
-}
-
-void AffineDataCopyGeneration::runOnFunction() {
-  FuncOp f = getFunction();
-  OpBuilder topBuilder(f.getBody());
-  zeroIndex = topBuilder.create<ConstantIndexOp>(f.getLoc(), 0);
-
-  // Nests that are copy-in's or copy-out's; the root AffineForOps of those
-  // nests are stored herein.
-  DenseSet<Operation *> copyNests;
-
-  // Clear recorded copy nests.
-  copyNests.clear();
-
-  for (auto &block : f)
-    runOnBlock(&block, copyNests);
-
-  // Promote any single iteration loops in the copy nests.
-  for (auto nest : copyNests) {
-    nest->walk([](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
-  }
-}
-
-static PassRegistration<AffineDataCopyGeneration>
-    pass("affine-data-copy-generate",
-         "Generate explicit copying for memory operations");
diff --git a/third_party/mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp b/third_party/mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp
deleted file mode 100644
index f384f6d3fb1..00000000000
--- a/third_party/mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//===- AffineLoopInvariantCodeMotion.cpp - Code to perform loop fusion-----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements loop invariant code motion.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "licm"
-
-using namespace mlir;
-
-namespace {
-
-/// Loop invariant code motion (LICM) pass.
-/// TODO(asabne) : The pass is missing zero-trip tests.
-/// TODO(asabne) : Check for the presence of side effects before hoisting.
-/// TODO: This code should be removed once the new LICM pass can handle its
-///       uses.
-struct LoopInvariantCodeMotion : public FunctionPass<LoopInvariantCodeMotion> {
-  void runOnFunction() override;
-  void runOnAffineForOp(AffineForOp forOp);
-};
-} // end anonymous namespace
-
-static bool
-checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
-                             SmallPtrSetImpl<Operation *> &definedOps,
-                             SmallPtrSetImpl<Operation *> &opsToHoist);
-static bool isOpLoopInvariant(Operation &op, Value *indVar,
-                              SmallPtrSetImpl<Operation *> &definedOps,
-                              SmallPtrSetImpl<Operation *> &opsToHoist);
-
-static bool
-areAllOpsInTheBlockListInvariant(Region &blockList, Value *indVar,
-                                 SmallPtrSetImpl<Operation *> &definedOps,
-                                 SmallPtrSetImpl<Operation *> &opsToHoist);
-
-static bool isMemRefDereferencingOp(Operation &op) {
-  // TODO(asabne): Support DMA Ops.
-  if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op)) {
-    return true;
-  }
-  return false;
-}
-
-// Returns true if the individual op is loop invariant.
-bool isOpLoopInvariant(Operation &op, Value *indVar,
-                       SmallPtrSetImpl<Operation *> &definedOps,
-                       SmallPtrSetImpl<Operation *> &opsToHoist) {
-  LLVM_DEBUG(llvm::dbgs() << "iterating on op: " << op;);
-
-  if (isa<AffineIfOp>(op)) {
-    if (!checkInvarianceOfNestedIfOps(&op, indVar, definedOps, opsToHoist)) {
-      return false;
-    }
-  } else if (isa<AffineForOp>(op)) {
-    // If the body of a predicated region has a for loop, we don't hoist the
-    // 'affine.if'.
-    return false;
-  } else if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
-    // TODO(asabne): Support DMA ops.
-    return false;
-  } else if (!isa<ConstantOp>(op)) {
-    if (isMemRefDereferencingOp(op)) {
-      Value *memref = isa<AffineLoadOp>(op)
-                          ? cast<AffineLoadOp>(op).getMemRef()
-                          : cast<AffineStoreOp>(op).getMemRef();
-      for (auto *user : memref->getUsers()) {
-        // If this memref has a user that is a DMA, give up because these
-        // operations write to this memref.
-        if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
-          return false;
-        }
-        // If the memref used by the load/store is used in a store elsewhere in
-        // the loop nest, we do not hoist. Similarly, if the memref used in a
-        // load is also being stored too, we do not hoist the load.
-        if (isa<AffineStoreOp>(user) ||
-            (isa<AffineLoadOp>(user) && isa<AffineStoreOp>(op))) {
-          if (&op != user) {
-            SmallVector<AffineForOp, 8> userIVs;
-            getLoopIVs(*user, &userIVs);
-            // Check that userIVs don't contain the for loop around the op.
-            if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) {
-              return false;
-            }
-          }
-        }
-      }
-    }
-
-    // Insert this op in the defined ops list.
-    definedOps.insert(&op);
-
-    if (op.getNumOperands() == 0 && !isa<AffineTerminatorOp>(op)) {
-      LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
-      return false;
-    }
-    for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
-      auto *operandSrc = op.getOperand(i)->getDefiningOp();
-
-      LLVM_DEBUG(
-          op.getOperand(i)->print(llvm::dbgs() << "\nIterating on operand\n"));
-
-      // If the loop IV is the operand, this op isn't loop invariant.
-      if (indVar == op.getOperand(i)) {
-        LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
-        return false;
-      }
-
-      if (operandSrc != nullptr) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << *operandSrc << "\nIterating on operand src\n");
-
-        // If the value was defined in the loop (outside of the
-        // if/else region), and that operation itself wasn't meant to
-        // be hoisted, then mark this operation loop dependent.
-        if (definedOps.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
-          return false;
-        }
-      }
-    }
-  }
-
-  // If no operand was loop variant, mark this op for motion.
-  opsToHoist.insert(&op);
-  return true;
-}
-
-// Checks if all ops in a region (i.e. list of blocks) are loop invariant.
-bool areAllOpsInTheBlockListInvariant(
-    Region &blockList, Value *indVar, SmallPtrSetImpl<Operation *> &definedOps,
-    SmallPtrSetImpl<Operation *> &opsToHoist) {
-
-  for (auto &b : blockList) {
-    for (auto &op : b) {
-      if (!isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-// Returns true if the affine.if op can be hoisted.
-bool checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
-                                  SmallPtrSetImpl<Operation *> &definedOps,
-                                  SmallPtrSetImpl<Operation *> &opsToHoist) {
-  assert(isa<AffineIfOp>(op));
-  auto ifOp = cast<AffineIfOp>(op);
-
-  if (!areAllOpsInTheBlockListInvariant(ifOp.thenRegion(), indVar, definedOps,
-                                        opsToHoist)) {
-    return false;
-  }
-
-  if (!areAllOpsInTheBlockListInvariant(ifOp.elseRegion(), indVar, definedOps,
-                                        opsToHoist)) {
-    return false;
-  }
-
-  return true;
-}
-
-void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
-  auto *loopBody = forOp.getBody();
-  auto *indVar = forOp.getInductionVar();
-
-  SmallPtrSet<Operation *, 8> definedOps;
-  // This is the place where hoisted instructions would reside.
-  OpBuilder b(forOp.getOperation());
-
-  SmallPtrSet<Operation *, 8> opsToHoist;
-  SmallVector<Operation *, 8> opsToMove;
-
-  for (auto &op : *loopBody) {
-    // We don't hoist for loops.
-    if (!isa<AffineForOp>(op)) {
-      if (!isa<AffineTerminatorOp>(op)) {
-        if (isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
-          opsToMove.push_back(&op);
-        }
-      }
-    }
-  }
-
-  // For all instructions that we found to be invariant, place sequentially
-  // right before the for loop.
-  for (auto *op : opsToMove) {
-    op->moveBefore(forOp);
-  }
-
-  LLVM_DEBUG(forOp.getOperation()->print(llvm::dbgs() << "Modified loop\n"));
-}
-
-void LoopInvariantCodeMotion::runOnFunction() {
-  // Walk through all loops in a function in innermost-loop-first order.  This
-  // way, we first LICM from the inner loop, and place the ops in
-  // the outer loop, which in turn can be further LICM'ed.
-  getFunction().walk([&](AffineForOp op) {
-    LLVM_DEBUG(op.getOperation()->print(llvm::dbgs() << "\nOriginal loop\n"));
-    runOnAffineForOp(op);
-  });
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createAffineLoopInvariantCodeMotionPass() {
-  return std::make_unique<LoopInvariantCodeMotion>();
-}
-
-static PassRegistration<LoopInvariantCodeMotion>
-    pass("affine-loop-invariant-code-motion",
-         "Hoist loop invariant instructions outside of the loop");
diff --git a/third_party/mlir/lib/Transforms/CMakeLists.txt b/third_party/mlir/lib/Transforms/CMakeLists.txt
deleted file mode 100644
index d6c5bd88f7f..00000000000
--- a/third_party/mlir/lib/Transforms/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-add_subdirectory(Utils)
-
-add_llvm_library(MLIRTransforms
-  AffineDataCopyGeneration.cpp
-  AffineLoopInvariantCodeMotion.cpp
-  Canonicalizer.cpp
-  CSE.cpp
-  DialectConversion.cpp
-  Inliner.cpp
-  LoopCoalescing.cpp
-  LoopFusion.cpp
-  LoopInvariantCodeMotion.cpp
-  LoopTiling.cpp
-  LoopUnrollAndJam.cpp
-  LoopUnroll.cpp
-  MemRefDataFlowOpt.cpp
-  PipelineDataTransfer.cpp
-  SimplifyAffineStructures.cpp
-  StripDebugInfo.cpp
-  Vectorize.cpp
-  ViewOpGraph.cpp
-  ViewRegionGraph.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
-  )
-
-add_dependencies(MLIRTransforms
-  MLIRLoopLikeInterfaceIncGen
-  MLIRStandardOpsIncGen)
-target_link_libraries(MLIRTransforms
-  MLIRAffineOps
-  MLIRAnalysis
-  MLIRLoopOps
-  MLIRPass
-  MLIRTransformUtils
-  MLIRVectorOps
-  )
diff --git a/third_party/mlir/lib/Transforms/CSE.cpp b/third_party/mlir/lib/Transforms/CSE.cpp
deleted file mode 100644
index 18f9fce5e46..00000000000
--- a/third_party/mlir/lib/Transforms/CSE.cpp
+++ /dev/null
@@ -1,272 +0,0 @@
-//===- CSE.cpp - Common Sub-expression Elimination ------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This transformation pass performs a simple common sub-expression elimination
-// algorithm on operations within a function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Dominance.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/RecyclingAllocator.h"
-#include <deque>
-using namespace mlir;
-
-namespace {
-// TODO(riverriddle) Handle commutative operations.
-struct SimpleOperationInfo : public llvm::DenseMapInfo<Operation *> {
-  static unsigned getHashValue(const Operation *opC) {
-    auto *op = const_cast<Operation *>(opC);
-    // Hash the operations based upon their:
-    //   - Operation Name
-    //   - Attributes
-    //   - Result Types
-    //   - Operands
-    return hash_combine(
-        op->getName(), op->getAttrList().getDictionary(),
-        hash_combine_range(op->result_type_begin(), op->result_type_end()),
-        hash_combine_range(op->operand_begin(), op->operand_end()));
-  }
-  static bool isEqual(const Operation *lhsC, const Operation *rhsC) {
-    auto *lhs = const_cast<Operation *>(lhsC);
-    auto *rhs = const_cast<Operation *>(rhsC);
-    if (lhs == rhs)
-      return true;
-    if (lhs == getTombstoneKey() || lhs == getEmptyKey() ||
-        rhs == getTombstoneKey() || rhs == getEmptyKey())
-      return false;
-
-    // Compare the operation name.
-    if (lhs->getName() != rhs->getName())
-      return false;
-    // Check operand and result type counts.
-    if (lhs->getNumOperands() != rhs->getNumOperands() ||
-        lhs->getNumResults() != rhs->getNumResults())
-      return false;
-    // Compare attributes.
-    if (lhs->getAttrList() != rhs->getAttrList())
-      return false;
-    // Compare operands.
-    if (!std::equal(lhs->operand_begin(), lhs->operand_end(),
-                    rhs->operand_begin()))
-      return false;
-    // Compare result types.
-    return std::equal(lhs->result_type_begin(), lhs->result_type_end(),
-                      rhs->result_type_begin());
-  }
-};
-} // end anonymous namespace
-
-namespace {
-/// Simple common sub-expression elimination.
-struct CSE : public OperationPass<CSE> {
-  CSE() = default;
-  CSE(const CSE &) {}
-
-  /// Shared implementation of operation elimination and scoped map definitions.
-  using AllocatorTy = llvm::RecyclingAllocator<
-      llvm::BumpPtrAllocator,
-      llvm::ScopedHashTableVal<Operation *, Operation *>>;
-  using ScopedMapTy = llvm::ScopedHashTable<Operation *, Operation *,
-                                            SimpleOperationInfo, AllocatorTy>;
-
-  /// Represents a single entry in the depth first traversal of a CFG.
-  struct CFGStackNode {
-    CFGStackNode(ScopedMapTy &knownValues, DominanceInfoNode *node)
-        : scope(knownValues), node(node), childIterator(node->begin()),
-          processed(false) {}
-
-    /// Scope for the known values.
-    ScopedMapTy::ScopeTy scope;
-
-    DominanceInfoNode *node;
-    DominanceInfoNode::iterator childIterator;
-
-    /// If this node has been fully processed yet or not.
-    bool processed;
-  };
-
-  /// Attempt to eliminate a redundant operation. Returns success if the
-  /// operation was marked for removal, failure otherwise.
-  LogicalResult simplifyOperation(ScopedMapTy &knownValues, Operation *op);
-
-  void simplifyBlock(ScopedMapTy &knownValues, DominanceInfo &domInfo,
-                     Block *bb);
-  void simplifyRegion(ScopedMapTy &knownValues, DominanceInfo &domInfo,
-                      Region &region);
-
-  void runOnOperation() override;
-
-private:
-  /// Operations marked as dead and to be erased.
-  std::vector<Operation *> opsToErase;
-
-  /// Statistics for CSE.
-  Statistic numCSE{this, "num-cse'd", "Number of operations CSE'd"};
-  Statistic numDCE{this, "num-dce'd", "Number of operations trivially DCE'd"};
-};
-} // end anonymous namespace
-
-/// Attempt to eliminate a redundant operation.
-LogicalResult CSE::simplifyOperation(ScopedMapTy &knownValues, Operation *op) {
-  // Don't simplify operations with nested blocks. We don't currently model
-  // equality comparisons correctly among other things. It is also unclear
-  // whether we would want to CSE such operations.
-  if (op->getNumRegions() != 0)
-    return failure();
-
-  // TODO(riverriddle) We currently only eliminate non side-effecting
-  // operations.
-  if (!op->hasNoSideEffect())
-    return failure();
-
-  // If the operation is already trivially dead just add it to the erase list.
-  if (op->use_empty()) {
-    opsToErase.push_back(op);
-    ++numDCE;
-    return success();
-  }
-
-  // Look for an existing definition for the operation.
-  if (auto *existing = knownValues.lookup(op)) {
-    // If we find one then replace all uses of the current operation with the
-    // existing one and mark it for deletion.
-    op->replaceAllUsesWith(existing);
-    opsToErase.push_back(op);
-
-    // If the existing operation has an unknown location and the current
-    // operation doesn't, then set the existing op's location to that of the
-    // current op.
-    if (existing->getLoc().isa<UnknownLoc>() &&
-        !op->getLoc().isa<UnknownLoc>()) {
-      existing->setLoc(op->getLoc());
-    }
-
-    ++numCSE;
-    return success();
-  }
-
-  // Otherwise, we add this operation to the known values map.
-  knownValues.insert(op, op);
-  return failure();
-}
-
-void CSE::simplifyBlock(ScopedMapTy &knownValues, DominanceInfo &domInfo,
-                        Block *bb) {
-  for (auto &inst : *bb) {
-    // If the operation is simplified, we don't process any held regions.
-    if (succeeded(simplifyOperation(knownValues, &inst)))
-      continue;
-
-    // If this operation is isolated above, we can't process nested regions with
-    // the given 'knownValues' map. This would cause the insertion of implicit
-    // captures in explicit capture only regions.
-    if (!inst.isRegistered() || inst.isKnownIsolatedFromAbove()) {
-      ScopedMapTy nestedKnownValues;
-      for (auto &region : inst.getRegions())
-        simplifyRegion(nestedKnownValues, domInfo, region);
-      continue;
-    }
-
-    // Otherwise, process nested regions normally.
-    for (auto &region : inst.getRegions())
-      simplifyRegion(knownValues, domInfo, region);
-  }
-}
-
-void CSE::simplifyRegion(ScopedMapTy &knownValues, DominanceInfo &domInfo,
-                         Region &region) {
-  // If the region is empty there is nothing to do.
-  if (region.empty())
-    return;
-
-  // If the region only contains one block, then simplify it directly.
-  if (std::next(region.begin()) == region.end()) {
-    ScopedMapTy::ScopeTy scope(knownValues);
-    simplifyBlock(knownValues, domInfo, &region.front());
-    return;
-  }
-
-  // Note, deque is being used here because there was significant performance
-  // gains over vector when the container becomes very large due to the
-  // specific access patterns. If/when these performance issues are no
-  // longer a problem we can change this to vector. For more information see
-  // the llvm mailing list discussion on this:
-  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
-  std::deque<std::unique_ptr<CFGStackNode>> stack;
-
-  // Process the nodes of the dom tree for this region.
-  stack.emplace_back(std::make_unique<CFGStackNode>(
-      knownValues, domInfo.getRootNode(&region)));
-
-  while (!stack.empty()) {
-    auto &currentNode = stack.back();
-
-    // Check to see if we need to process this node.
-    if (!currentNode->processed) {
-      currentNode->processed = true;
-      simplifyBlock(knownValues, domInfo, currentNode->node->getBlock());
-    }
-
-    // Otherwise, check to see if we need to process a child node.
-    if (currentNode->childIterator != currentNode->node->end()) {
-      auto *childNode = *(currentNode->childIterator++);
-      stack.emplace_back(
-          std::make_unique<CFGStackNode>(knownValues, childNode));
-    } else {
-      // Finally, if the node and all of its children have been processed
-      // then we delete the node.
-      stack.pop_back();
-    }
-  }
-}
-
-void CSE::runOnOperation() {
-  /// A scoped hash table of defining operations within a region.
-  ScopedMapTy knownValues;
-
-  DominanceInfo &domInfo = getAnalysis<DominanceInfo>();
-  for (Region &region : getOperation()->getRegions())
-    simplifyRegion(knownValues, domInfo, region);
-
-  // If no operations were erased, then we mark all analyses as preserved.
-  if (opsToErase.empty())
-    return markAllAnalysesPreserved();
-
-  /// Erase any operations that were marked as dead during simplification.
-  for (auto *op : opsToErase)
-    op->erase();
-  opsToErase.clear();
-
-  // We currently don't remove region operations, so mark dominance as
-  // preserved.
-  markAnalysesPreserved<DominanceInfo, PostDominanceInfo>();
-}
-
-std::unique_ptr<Pass> mlir::createCSEPass() { return std::make_unique<CSE>(); }
-
-static PassRegistration<CSE> pass("cse", "Eliminate common sub-expressions");
diff --git a/third_party/mlir/lib/Transforms/Canonicalizer.cpp b/third_party/mlir/lib/Transforms/Canonicalizer.cpp
deleted file mode 100644
index 7dcdeb67cdc..00000000000
--- a/third_party/mlir/lib/Transforms/Canonicalizer.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- Canonicalizer.cpp - Canonicalize MLIR operations -------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This transformation pass converts operations into their canonical forms by
-// folding constants, applying operation identity transformations etc.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
-using namespace mlir;
-
-namespace {
-/// Canonicalize operations in nested regions.
-struct Canonicalizer : public OperationPass<Canonicalizer> {
-  void runOnOperation() override {
-    OwningRewritePatternList patterns;
-
-    // TODO: Instead of adding all known patterns from the whole system lazily
-    // add and cache the canonicalization patterns for ops we see in practice
-    // when building the worklist.  For now, we just grab everything.
-    auto *context = &getContext();
-    for (auto *op : context->getRegisteredOperations())
-      op->getCanonicalizationPatterns(patterns, context);
-
-    Operation *op = getOperation();
-    applyPatternsGreedily(op->getRegions(), patterns);
-  }
-};
-} // end anonymous namespace
-
-/// Create a Canonicalizer pass.
-std::unique_ptr<Pass> mlir::createCanonicalizerPass() {
-  return std::make_unique<Canonicalizer>();
-}
-
-static PassRegistration<Canonicalizer> pass("canonicalize",
-                                            "Canonicalize operations");
diff --git a/third_party/mlir/lib/Transforms/DialectConversion.cpp b/third_party/mlir/lib/Transforms/DialectConversion.cpp
deleted file mode 100644
index ac13bc2ba5b..00000000000
--- a/third_party/mlir/lib/Transforms/DialectConversion.cpp
+++ /dev/null
@@ -1,1733 +0,0 @@
-//===- DialectConversion.cpp - MLIR dialect conversion generic pass -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Debug.h"
-
-using namespace mlir;
-using namespace mlir::detail;
-
-#define DEBUG_TYPE "dialect-conversion"
-
-/// Recursively collect all of the operations to convert from within 'region'.
-/// If 'target' is nonnull, operations that are recursively legal have their
-/// regions pre-filtered to avoid considering them for legalization.
-static LogicalResult
-computeConversionSet(llvm::iterator_range<Region::iterator> region,
-                     Location regionLoc, std::vector<Operation *> &toConvert,
-                     ConversionTarget *target = nullptr) {
-  if (llvm::empty(region))
-    return success();
-
-  // Traverse starting from the entry block.
-  SmallVector<Block *, 16> worklist(1, &*region.begin());
-  DenseSet<Block *> visitedBlocks;
-  visitedBlocks.insert(worklist.front());
-  while (!worklist.empty()) {
-    Block *block = worklist.pop_back_val();
-
-    // Compute the conversion set of each of the nested operations.
-    for (Operation &op : *block) {
-      toConvert.emplace_back(&op);
-
-      // Don't check this operation's children for conversion if the operation
-      // is recursively legal.
-      auto legalityInfo = target ? target->isLegal(&op)
-                                 : Optional<ConversionTarget::LegalOpDetails>();
-      if (legalityInfo && legalityInfo->isRecursivelyLegal)
-        continue;
-      for (auto &region : op.getRegions())
-        computeConversionSet(region.getBlocks(), region.getLoc(), toConvert,
-                             target);
-    }
-
-    // Recurse to children that haven't been visited.
-    for (Block *succ : block->getSuccessors())
-      if (visitedBlocks.insert(succ).second)
-        worklist.push_back(succ);
-  }
-
-  // Check that all blocks in the region were visited.
-  if (llvm::any_of(llvm::drop_begin(region, 1),
-                   [&](Block &block) { return !visitedBlocks.count(&block); }))
-    return emitError(regionLoc, "unreachable blocks were not converted");
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Multi-Level Value Mapper
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// This class wraps a BlockAndValueMapping to provide recursive lookup
-/// functionality, i.e. we will traverse if the mapped value also has a mapping.
-struct ConversionValueMapping {
-  /// Lookup a mapped value within the map. If a mapping for the provided value
-  /// does not exist then return the provided value.
-  Value *lookupOrDefault(Value *from) const;
-
-  /// Map a value to the one provided.
-  void map(Value *oldVal, Value *newVal) { mapping.map(oldVal, newVal); }
-
-  /// Drop the last mapping for the given value.
-  void erase(Value *value) { mapping.erase(value); }
-
-private:
-  /// Current value mappings.
-  BlockAndValueMapping mapping;
-};
-} // end anonymous namespace
-
-/// Lookup a mapped value within the map. If a mapping for the provided value
-/// does not exist then return the provided value.
-Value *ConversionValueMapping::lookupOrDefault(Value *from) const {
-  // If this value had a valid mapping, unmap that value as well in the case
-  // that it was also replaced.
-  while (auto *mappedValue = mapping.lookupOrNull(from))
-    from = mappedValue;
-  return from;
-}
-
-//===----------------------------------------------------------------------===//
-// ArgConverter
-//===----------------------------------------------------------------------===//
-namespace {
-/// This class provides a simple interface for converting the types of block
-/// arguments. This is done by creating a new block that contains the new legal
-/// types and extracting the block that contains the old illegal types to allow
-/// for undoing pending rewrites in the case of failure.
-struct ArgConverter {
-  ArgConverter(TypeConverter *typeConverter, PatternRewriter &rewriter)
-      : loc(rewriter.getUnknownLoc()), typeConverter(typeConverter),
-        rewriter(rewriter) {}
-
-  /// This structure contains the information pertaining to an argument that has
-  /// been converted.
-  struct ConvertedArgInfo {
-    ConvertedArgInfo(unsigned newArgIdx, unsigned newArgSize,
-                     Value *castValue = nullptr)
-        : newArgIdx(newArgIdx), newArgSize(newArgSize), castValue(castValue) {}
-
-    /// The start index of in the new argument list that contains arguments that
-    /// replace the original.
-    unsigned newArgIdx;
-
-    /// The number of arguments that replaced the original argument.
-    unsigned newArgSize;
-
-    /// The cast value that was created to cast from the new arguments to the
-    /// old. This only used if 'newArgSize' > 1.
-    Value *castValue;
-  };
-
-  /// This structure contains information pertaining to a block that has had its
-  /// signature converted.
-  struct ConvertedBlockInfo {
-    ConvertedBlockInfo(Block *origBlock) : origBlock(origBlock) {}
-
-    /// The original block that was requested to have its signature converted.
-    Block *origBlock;
-
-    /// The conversion information for each of the arguments. The information is
-    /// None if the argument was dropped during conversion.
-    SmallVector<Optional<ConvertedArgInfo>, 1> argInfo;
-  };
-
-  /// Return if the signature of the given block has already been converted.
-  bool hasBeenConverted(Block *block) const {
-    return conversionInfo.count(block);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Rewrite Application
-  //===--------------------------------------------------------------------===//
-
-  /// Erase any rewrites registered for the current block that is about to be
-  /// removed. This merely drops the rewrites without undoing them.
-  void notifyBlockRemoved(Block *block);
-
-  /// Cleanup and undo any generated conversions for the arguments of block.
-  /// This method replaces the new block with the original, reverting the IR to
-  /// its original state.
-  void discardRewrites(Block *block);
-
-  /// Fully replace uses of the old arguments with the new, materializing cast
-  /// operations as necessary.
-  // FIXME(riverriddle) The 'mapping' parameter is only necessary because the
-  // implementation of replaceUsesOfBlockArgument is buggy.
-  void applyRewrites(ConversionValueMapping &mapping);
-
-  //===--------------------------------------------------------------------===//
-  // Conversion
-  //===--------------------------------------------------------------------===//
-
-  /// Attempt to convert the signature of the given block, if successful a new
-  /// block is returned containing the new arguments. On failure, nullptr is
-  /// returned.
-  Block *convertSignature(Block *block, ConversionValueMapping &mapping);
-
-  /// Apply the given signature conversion on the given block. The new block
-  /// containing the updated signature is returned.
-  Block *applySignatureConversion(
-      Block *block, TypeConverter::SignatureConversion &signatureConversion,
-      ConversionValueMapping &mapping);
-
-  /// A collection of blocks that have had their arguments converted.
-  llvm::MapVector<Block *, ConvertedBlockInfo> conversionInfo;
-
-  /// An instance of the unknown location that is used when materializing
-  /// conversions.
-  Location loc;
-
-  /// The type converter to use when changing types.
-  TypeConverter *typeConverter;
-
-  /// The pattern rewriter to use when materializing conversions.
-  PatternRewriter &rewriter;
-};
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// Rewrite Application
-
-void ArgConverter::notifyBlockRemoved(Block *block) {
-  auto it = conversionInfo.find(block);
-  if (it == conversionInfo.end())
-    return;
-
-  // Drop all uses of the original arguments and delete the original block.
-  Block *origBlock = it->second.origBlock;
-  for (BlockArgument *arg : origBlock->getArguments())
-    arg->dropAllUses();
-  delete origBlock;
-
-  conversionInfo.erase(it);
-}
-
-void ArgConverter::discardRewrites(Block *block) {
-  auto it = conversionInfo.find(block);
-  if (it == conversionInfo.end())
-    return;
-  Block *origBlock = it->second.origBlock;
-
-  // Drop all uses of the new block arguments and replace uses of the new block.
-  for (int i = block->getNumArguments() - 1; i >= 0; --i)
-    block->getArgument(i)->dropAllUses();
-  block->replaceAllUsesWith(origBlock);
-
-  // Move the operations back the original block and the delete the new block.
-  origBlock->getOperations().splice(origBlock->end(), block->getOperations());
-  origBlock->insertBefore(block);
-  block->erase();
-
-  conversionInfo.erase(it);
-}
-
-void ArgConverter::applyRewrites(ConversionValueMapping &mapping) {
-  for (auto &info : conversionInfo) {
-    Block *newBlock = info.first;
-    ConvertedBlockInfo &blockInfo = info.second;
-    Block *origBlock = blockInfo.origBlock;
-
-    // Process the remapping for each of the original arguments.
-    for (unsigned i = 0, e = origBlock->getNumArguments(); i != e; ++i) {
-      Optional<ConvertedArgInfo> &argInfo = blockInfo.argInfo[i];
-      BlockArgument *origArg = origBlock->getArgument(i);
-
-      // Handle the case of a 1->0 value mapping.
-      if (!argInfo) {
-        // If a replacement value was given for this argument, use that to
-        // replace all uses.
-        auto argReplacementValue = mapping.lookupOrDefault(origArg);
-        if (argReplacementValue != origArg) {
-          origArg->replaceAllUsesWith(argReplacementValue);
-          continue;
-        }
-        // If there are any dangling uses then replace the argument with one
-        // generated by the type converter. This is necessary as the cast must
-        // persist in the IR after conversion.
-        if (!origArg->use_empty()) {
-          rewriter.setInsertionPointToStart(newBlock);
-          auto *newOp = typeConverter->materializeConversion(
-              rewriter, origArg->getType(), llvm::None, loc);
-          origArg->replaceAllUsesWith(newOp->getResult(0));
-        }
-        continue;
-      }
-
-      // If mapping is 1-1, replace the remaining uses and drop the cast
-      // operation.
-      // FIXME(riverriddle) This should check that the result type and operand
-      // type are the same, otherwise it should force a conversion to be
-      // materialized.
-      if (argInfo->newArgSize == 1) {
-        origArg->replaceAllUsesWith(
-            mapping.lookupOrDefault(newBlock->getArgument(argInfo->newArgIdx)));
-        continue;
-      }
-
-      // Otherwise this is a 1->N value mapping.
-      Value *castValue = argInfo->castValue;
-      assert(argInfo->newArgSize > 1 && castValue && "expected 1->N mapping");
-
-      // If the argument is still used, replace it with the generated cast.
-      if (!origArg->use_empty())
-        origArg->replaceAllUsesWith(mapping.lookupOrDefault(castValue));
-
-      // If all users of the cast were removed, we can drop it. Otherwise, keep
-      // the operation alive and let the user handle any remaining usages.
-      if (castValue->use_empty())
-        castValue->getDefiningOp()->erase();
-    }
-
-    // Drop the original block now the rewrites were applied.
-    delete origBlock;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Conversion
-
-Block *ArgConverter::convertSignature(Block *block,
-                                      ConversionValueMapping &mapping) {
-  if (auto conversion = typeConverter->convertBlockSignature(block))
-    return applySignatureConversion(block, *conversion, mapping);
-  return nullptr;
-}
-
-Block *ArgConverter::applySignatureConversion(
-    Block *block, TypeConverter::SignatureConversion &signatureConversion,
-    ConversionValueMapping &mapping) {
-  // If no arguments are being changed or added, there is nothing to do.
-  unsigned origArgCount = block->getNumArguments();
-  auto convertedTypes = signatureConversion.getConvertedTypes();
-  if (origArgCount == 0 && convertedTypes.empty())
-    return block;
-
-  // Split the block at the beginning to get a new block to use for the updated
-  // signature.
-  Block *newBlock = block->splitBlock(block->begin());
-  block->replaceAllUsesWith(newBlock);
-
-  SmallVector<Value *, 4> newArgRange(newBlock->addArguments(convertedTypes));
-  ArrayRef<Value *> newArgs(newArgRange);
-
-  // Remap each of the original arguments as determined by the signature
-  // conversion.
-  ConvertedBlockInfo info(block);
-  info.argInfo.resize(origArgCount);
-
-  OpBuilder::InsertionGuard guard(rewriter);
-  rewriter.setInsertionPointToStart(newBlock);
-  for (unsigned i = 0; i != origArgCount; ++i) {
-    auto inputMap = signatureConversion.getInputMapping(i);
-    if (!inputMap)
-      continue;
-    BlockArgument *origArg = block->getArgument(i);
-
-    // If inputMap->replacementValue is not nullptr, then the argument is
-    // dropped and a replacement value is provided to be the remappedValue.
-    if (inputMap->replacementValue) {
-      assert(inputMap->size == 0 &&
-             "invalid to provide a replacement value when the argument isn't "
-             "dropped");
-      mapping.map(origArg, inputMap->replacementValue);
-      continue;
-    }
-
-    // If this is a 1->1 mapping, then map the argument directly.
-    if (inputMap->size == 1) {
-      mapping.map(origArg, newArgs[inputMap->inputNo]);
-      info.argInfo[i] = ConvertedArgInfo(inputMap->inputNo, inputMap->size);
-      continue;
-    }
-
-    // Otherwise, this is a 1->N mapping. Call into the provided type converter
-    // to pack the new values.
-    auto replArgs = newArgs.slice(inputMap->inputNo, inputMap->size);
-    Operation *cast = typeConverter->materializeConversion(
-        rewriter, origArg->getType(), replArgs, loc);
-    assert(cast->getNumResults() == 1 &&
-           cast->getNumOperands() == replArgs.size());
-    mapping.map(origArg, cast->getResult(0));
-    info.argInfo[i] =
-        ConvertedArgInfo(inputMap->inputNo, inputMap->size, cast->getResult(0));
-  }
-
-  // Remove the original block from the region and return the new one.
-  newBlock->getParent()->getBlocks().remove(block);
-  conversionInfo.insert({newBlock, std::move(info)});
-  return newBlock;
-}
-
-//===----------------------------------------------------------------------===//
-// ConversionPatternRewriterImpl
-//===----------------------------------------------------------------------===//
-namespace {
-/// This class contains a snapshot of the current conversion rewriter state.
-/// This is useful when saving and undoing a set of rewrites.
-struct RewriterState {
-  RewriterState(unsigned numCreatedOperations, unsigned numReplacements,
-                unsigned numBlockActions, unsigned numIgnoredOperations)
-      : numCreatedOperations(numCreatedOperations),
-        numReplacements(numReplacements), numBlockActions(numBlockActions),
-        numIgnoredOperations(numIgnoredOperations) {}
-
-  /// The current number of created operations.
-  unsigned numCreatedOperations;
-
-  /// The current number of replacements queued.
-  unsigned numReplacements;
-
-  /// The current number of block actions performed.
-  unsigned numBlockActions;
-
-  /// The current number of ignored operations.
-  unsigned numIgnoredOperations;
-};
-} // end anonymous namespace
-
-namespace mlir {
-namespace detail {
-struct ConversionPatternRewriterImpl {
-  /// This class represents one requested operation replacement via 'replaceOp'.
-  struct OpReplacement {
-    OpReplacement() = default;
-    OpReplacement(Operation *op, ValueRange newValues)
-        : op(op), newValues(newValues.begin(), newValues.end()) {}
-
-    Operation *op;
-    SmallVector<Value *, 2> newValues;
-  };
-
-  /// The kind of the block action performed during the rewrite.  Actions can be
-  /// undone if the conversion fails.
-  enum class BlockActionKind { Create, Move, Split, TypeConversion };
-
-  /// Original position of the given block in its parent region.  We cannot use
-  /// a region iterator because it could have been invalidated by other region
-  /// operations since the position was stored.
-  struct BlockPosition {
-    Region *region;
-    Region::iterator::difference_type position;
-  };
-
-  /// The storage class for an undoable block action (one of BlockActionKind),
-  /// contains the information necessary to undo this action.
-  struct BlockAction {
-    static BlockAction getCreate(Block *block) {
-      return {BlockActionKind::Create, block, {}};
-    }
-    static BlockAction getMove(Block *block, BlockPosition originalPos) {
-      return {BlockActionKind::Move, block, {originalPos}};
-    }
-    static BlockAction getSplit(Block *block, Block *originalBlock) {
-      BlockAction action{BlockActionKind::Split, block, {}};
-      action.originalBlock = originalBlock;
-      return action;
-    }
-    static BlockAction getTypeConversion(Block *block) {
-      return BlockAction{BlockActionKind::TypeConversion, block, {}};
-    }
-
-    // The action kind.
-    BlockActionKind kind;
-
-    // A pointer to the block that was created by the action.
-    Block *block;
-
-    union {
-      // In use if kind == BlockActionKind::Move and contains a pointer to the
-      // region that originally contained the block as well as the position of
-      // the block in that region.
-      BlockPosition originalPosition;
-      // In use if kind == BlockActionKind::Split and contains a pointer to the
-      // block that was split into two parts.
-      Block *originalBlock;
-    };
-  };
-
-  ConversionPatternRewriterImpl(PatternRewriter &rewriter,
-                                TypeConverter *converter)
-      : argConverter(converter, rewriter) {}
-
-  /// Return the current state of the rewriter.
-  RewriterState getCurrentState();
-
-  /// Reset the state of the rewriter to a previously saved point.
-  void resetState(RewriterState state);
-
-  /// Undo the block actions (motions, splits) one by one in reverse order until
-  /// "numActionsToKeep" actions remains.
-  void undoBlockActions(unsigned numActionsToKeep = 0);
-
-  /// Cleanup and destroy any generated rewrite operations. This method is
-  /// invoked when the conversion process fails.
-  void discardRewrites();
-
-  /// Apply all requested operation rewrites. This method is invoked when the
-  /// conversion process succeeds.
-  void applyRewrites();
-
-  /// Convert the signature of the given block.
-  LogicalResult convertBlockSignature(Block *block);
-
-  /// Apply a signature conversion on the given region.
-  Block *
-  applySignatureConversion(Region *region,
-                           TypeConverter::SignatureConversion &conversion);
-
-  /// PatternRewriter hook for replacing the results of an operation.
-  void replaceOp(Operation *op, ValueRange newValues,
-                 ValueRange valuesToRemoveIfDead);
-
-  /// Notifies that a block was split.
-  void notifySplitBlock(Block *block, Block *continuation);
-
-  /// Notifies that the blocks of a region are about to be moved.
-  void notifyRegionIsBeingInlinedBefore(Region &region, Region &parent,
-                                        Region::iterator before);
-
-  /// Notifies that the blocks of a region were cloned into another.
-  void
-  notifyRegionWasClonedBefore(llvm::iterator_range<Region::iterator> &blocks,
-                              Location origRegionLoc);
-
-  /// Remap the given operands to those with potentially different types.
-  void remapValues(Operation::operand_range operands,
-                   SmallVectorImpl<Value *> &remapped);
-
-  /// Returns true if the given operation is ignored, and does not need to be
-  /// converted.
-  bool isOpIgnored(Operation *op) const;
-
-  /// Recursively marks the nested operations under 'op' as ignored. This
-  /// removes them from being considered for legalization.
-  void markNestedOpsIgnored(Operation *op);
-
-  // Mapping between replaced values that differ in type. This happens when
-  // replacing a value with one of a different type.
-  ConversionValueMapping mapping;
-
-  /// Utility used to convert block arguments.
-  ArgConverter argConverter;
-
-  /// Ordered vector of all of the newly created operations during conversion.
-  std::vector<Operation *> createdOps;
-
-  /// Ordered vector of any requested operation replacements.
-  SmallVector<OpReplacement, 4> replacements;
-
-  /// Ordered list of block operations (creations, splits, motions).
-  SmallVector<BlockAction, 4> blockActions;
-
-  /// A set of operations that have been erased/replaced/etc that should no
-  /// longer be considered for legalization. This is not meant to be an
-  /// exhaustive list of all operations, but the minimal set that can be used to
-  /// detect if a given operation should be `ignored`. For example, we may add
-  /// the operations that define non-empty regions to the set, but not any of
-  /// the others. This simplifies the amount of memory needed as we can query if
-  /// the parent operation was ignored.
-  llvm::SetVector<Operation *> ignoredOps;
-};
-} // end namespace detail
-} // end namespace mlir
-
-RewriterState ConversionPatternRewriterImpl::getCurrentState() {
-  return RewriterState(createdOps.size(), replacements.size(),
-                       blockActions.size(), ignoredOps.size());
-}
-
-void ConversionPatternRewriterImpl::resetState(RewriterState state) {
-  // Undo any block actions.
-  undoBlockActions(state.numBlockActions);
-
-  // Reset any replaced operations and undo any saved mappings.
-  for (auto &repl : llvm::drop_begin(replacements, state.numReplacements))
-    for (auto *result : repl.op->getResults())
-      mapping.erase(result);
-  replacements.resize(state.numReplacements);
-
-  // Pop all of the newly created operations.
-  while (createdOps.size() != state.numCreatedOperations) {
-    createdOps.back()->erase();
-    createdOps.pop_back();
-  }
-
-  // Pop all of the recorded ignored operations that are no longer valid.
-  while (ignoredOps.size() != state.numIgnoredOperations)
-    ignoredOps.pop_back();
-}
-
-void ConversionPatternRewriterImpl::undoBlockActions(
-    unsigned numActionsToKeep) {
-  for (auto &action :
-       llvm::reverse(llvm::drop_begin(blockActions, numActionsToKeep))) {
-    switch (action.kind) {
-    // Delete the created block.
-    case BlockActionKind::Create: {
-      // Unlink all of the operations within this block, they will be deleted
-      // separately.
-      auto &blockOps = action.block->getOperations();
-      while (!blockOps.empty())
-        blockOps.remove(blockOps.begin());
-      action.block->dropAllDefinedValueUses();
-      action.block->erase();
-      break;
-    }
-    // Move the block back to its original position.
-    case BlockActionKind::Move: {
-      Region *originalRegion = action.originalPosition.region;
-      originalRegion->getBlocks().splice(
-          std::next(originalRegion->begin(), action.originalPosition.position),
-          action.block->getParent()->getBlocks(), action.block);
-      break;
-    }
-    // Merge back the block that was split out.
-    case BlockActionKind::Split: {
-      action.originalBlock->getOperations().splice(
-          action.originalBlock->end(), action.block->getOperations());
-      action.block->dropAllDefinedValueUses();
-      action.block->erase();
-      break;
-    }
-    // Undo the type conversion.
-    case BlockActionKind::TypeConversion: {
-      argConverter.discardRewrites(action.block);
-      break;
-    }
-    }
-  }
-  blockActions.resize(numActionsToKeep);
-}
-
-void ConversionPatternRewriterImpl::discardRewrites() {
-  undoBlockActions();
-
-  // Remove any newly created ops.
-  for (auto *op : llvm::reverse(createdOps))
-    op->erase();
-}
-
-void ConversionPatternRewriterImpl::applyRewrites() {
-  // Apply all of the rewrites replacements requested during conversion.
-  for (auto &repl : replacements) {
-    for (unsigned i = 0, e = repl.newValues.size(); i != e; ++i) {
-      if (auto *newValue = repl.newValues[i])
-        repl.op->getResult(i)->replaceAllUsesWith(
-            mapping.lookupOrDefault(newValue));
-    }
-
-    // If this operation defines any regions, drop any pending argument
-    // rewrites.
-    if (argConverter.typeConverter && repl.op->getNumRegions()) {
-      for (auto &region : repl.op->getRegions())
-        for (auto &block : region)
-          argConverter.notifyBlockRemoved(&block);
-    }
-  }
-
-  // In a second pass, erase all of the replaced operations in reverse. This
-  // allows processing nested operations before their parent region is
-  // destroyed.
-  for (auto &repl : llvm::reverse(replacements))
-    repl.op->erase();
-
-  argConverter.applyRewrites(mapping);
-}
-
-LogicalResult
-ConversionPatternRewriterImpl::convertBlockSignature(Block *block) {
-  // Check to see if this block should not be converted:
-  // * There is no type converter.
-  // * The block has already been converted.
-  // * This is an entry block, these are converted explicitly via patterns.
-  if (!argConverter.typeConverter || argConverter.hasBeenConverted(block) ||
-      !block->getParent() || block->isEntryBlock())
-    return success();
-
-  // Otherwise, try to convert the block signature.
-  Block *newBlock = argConverter.convertSignature(block, mapping);
-  if (newBlock)
-    blockActions.push_back(BlockAction::getTypeConversion(newBlock));
-  return success(newBlock);
-}
-
-Block *ConversionPatternRewriterImpl::applySignatureConversion(
-    Region *region, TypeConverter::SignatureConversion &conversion) {
-  if (!region->empty()) {
-    Block *newEntry = argConverter.applySignatureConversion(
-        &region->front(), conversion, mapping);
-    blockActions.push_back(BlockAction::getTypeConversion(newEntry));
-    return newEntry;
-  }
-  return nullptr;
-}
-
-void ConversionPatternRewriterImpl::replaceOp(Operation *op,
-                                              ValueRange newValues,
-                                              ValueRange valuesToRemoveIfDead) {
-  assert(newValues.size() == op->getNumResults());
-
-  // Create mappings for each of the new result values.
-  for (unsigned i = 0, e = newValues.size(); i < e; ++i)
-    if (auto *repl = newValues[i])
-      mapping.map(op->getResult(i), repl);
-
-  // Record the requested operation replacement.
-  replacements.emplace_back(op, newValues);
-
-  /// Mark this operation as recursively ignored so that we don't need to
-  /// convert any nested operations.
-  markNestedOpsIgnored(op);
-}
-
-void ConversionPatternRewriterImpl::notifySplitBlock(Block *block,
-                                                     Block *continuation) {
-  blockActions.push_back(BlockAction::getSplit(continuation, block));
-}
-
-void ConversionPatternRewriterImpl::notifyRegionIsBeingInlinedBefore(
-    Region &region, Region &parent, Region::iterator before) {
-  for (auto &pair : llvm::enumerate(region)) {
-    Block &block = pair.value();
-    unsigned position = pair.index();
-    blockActions.push_back(BlockAction::getMove(&block, {&region, position}));
-  }
-}
-
-void ConversionPatternRewriterImpl::notifyRegionWasClonedBefore(
-    llvm::iterator_range<Region::iterator> &blocks, Location origRegionLoc) {
-  for (Block &block : blocks)
-    blockActions.push_back(BlockAction::getCreate(&block));
-
-  // Compute the conversion set for the inlined region.
-  auto result = computeConversionSet(blocks, origRegionLoc, createdOps);
-
-  // This original region has already had its conversion set computed, so there
-  // shouldn't be any new failures.
-  (void)result;
-  assert(succeeded(result) && "expected region to have no unreachable blocks");
-}
-
-void ConversionPatternRewriterImpl::remapValues(
-    Operation::operand_range operands, SmallVectorImpl<Value *> &remapped) {
-  remapped.reserve(llvm::size(operands));
-  for (Value *operand : operands)
-    remapped.push_back(mapping.lookupOrDefault(operand));
-}
-
-bool ConversionPatternRewriterImpl::isOpIgnored(Operation *op) const {
-  // Check to see if this operation or its parent were ignored.
-  return ignoredOps.count(op) || ignoredOps.count(op->getParentOp());
-}
-
-void ConversionPatternRewriterImpl::markNestedOpsIgnored(Operation *op) {
-  // Walk this operation and collect nested operations that define non-empty
-  // regions. We mark such operations as 'ignored' so that we know we don't have
-  // to convert them, or their nested ops.
-  if (op->getNumRegions() == 0)
-    return;
-  op->walk([&](Operation *op) {
-    if (llvm::any_of(op->getRegions(),
-                     [](Region &region) { return !region.empty(); }))
-      ignoredOps.insert(op);
-  });
-}
-
-//===----------------------------------------------------------------------===//
-// ConversionPatternRewriter
-//===----------------------------------------------------------------------===//
-
-ConversionPatternRewriter::ConversionPatternRewriter(MLIRContext *ctx,
-                                                     TypeConverter *converter)
-    : PatternRewriter(ctx),
-      impl(new detail::ConversionPatternRewriterImpl(*this, converter)) {}
-ConversionPatternRewriter::~ConversionPatternRewriter() {}
-
-/// PatternRewriter hook for replacing the results of an operation.
-void ConversionPatternRewriter::replaceOp(Operation *op, ValueRange newValues,
-                                          ValueRange valuesToRemoveIfDead) {
-  LLVM_DEBUG(llvm::dbgs() << "** Replacing operation : " << op->getName()
-                          << "\n");
-  impl->replaceOp(op, newValues, valuesToRemoveIfDead);
-}
-
-/// PatternRewriter hook for erasing a dead operation. The uses of this
-/// operation *must* be made dead by the end of the conversion process,
-/// otherwise an assert will be issued.
-void ConversionPatternRewriter::eraseOp(Operation *op) {
-  LLVM_DEBUG(llvm::dbgs() << "** Erasing operation : " << op->getName()
-                          << "\n");
-  SmallVector<Value *, 1> nullRepls(op->getNumResults(), nullptr);
-  impl->replaceOp(op, nullRepls, /*valuesToRemoveIfDead=*/llvm::None);
-}
-
-/// Apply a signature conversion to the entry block of the given region.
-Block *ConversionPatternRewriter::applySignatureConversion(
-    Region *region, TypeConverter::SignatureConversion &conversion) {
-  return impl->applySignatureConversion(region, conversion);
-}
-
-void ConversionPatternRewriter::replaceUsesOfBlockArgument(BlockArgument *from,
-                                                           Value *to) {
-  for (auto &u : from->getUses()) {
-    if (u.getOwner() == to->getDefiningOp())
-      continue;
-    u.getOwner()->replaceUsesOfWith(from, to);
-  }
-  impl->mapping.map(impl->mapping.lookupOrDefault(from), to);
-}
-
-/// Return the converted value that replaces 'key'. Return 'key' if there is
-/// no such a converted value.
-Value *ConversionPatternRewriter::getRemappedValue(Value *key) {
-  return impl->mapping.lookupOrDefault(key);
-}
-
-/// PatternRewriter hook for splitting a block into two parts.
-Block *ConversionPatternRewriter::splitBlock(Block *block,
-                                             Block::iterator before) {
-  auto *continuation = PatternRewriter::splitBlock(block, before);
-  impl->notifySplitBlock(block, continuation);
-  return continuation;
-}
-
-/// PatternRewriter hook for merging a block into another.
-void ConversionPatternRewriter::mergeBlocks(Block *source, Block *dest,
-                                            ValueRange argValues) {
-  // TODO(riverriddle) This requires fixing the implementation of
-  // 'replaceUsesOfBlockArgument', which currently isn't undoable.
-  llvm_unreachable("block merging updates are currently not supported");
-}
-
-/// PatternRewriter hook for moving blocks out of a region.
-void ConversionPatternRewriter::inlineRegionBefore(Region &region,
-                                                   Region &parent,
-                                                   Region::iterator before) {
-  impl->notifyRegionIsBeingInlinedBefore(region, parent, before);
-  PatternRewriter::inlineRegionBefore(region, parent, before);
-}
-
-/// PatternRewriter hook for cloning blocks of one region into another.
-void ConversionPatternRewriter::cloneRegionBefore(
-    Region &region, Region &parent, Region::iterator before,
-    BlockAndValueMapping &mapping) {
-  if (region.empty())
-    return;
-  PatternRewriter::cloneRegionBefore(region, parent, before, mapping);
-
-  // Collect the range of the cloned blocks.
-  auto clonedBeginIt = mapping.lookup(&region.front())->getIterator();
-  auto clonedBlocks = llvm::make_range(clonedBeginIt, before);
-  impl->notifyRegionWasClonedBefore(clonedBlocks, region.getLoc());
-}
-
-/// PatternRewriter hook for creating a new operation.
-Operation *ConversionPatternRewriter::insert(Operation *op) {
-  LLVM_DEBUG(llvm::dbgs() << "** Inserting operation : " << op->getName()
-                          << "\n");
-  impl->createdOps.push_back(op);
-  return OpBuilder::insert(op);
-}
-
-/// PatternRewriter hook for updating the root operation in-place.
-void ConversionPatternRewriter::notifyRootUpdated(Operation *op) {
-  // The rewriter caches changes to the IR to allow for operating in-place and
-  // backtracking. The rewriter is currently not capable of backtracking
-  // in-place modifications.
-  llvm_unreachable("in-place operation updates are not supported");
-}
-
-/// Return a reference to the internal implementation.
-detail::ConversionPatternRewriterImpl &ConversionPatternRewriter::getImpl() {
-  return *impl;
-}
-
-//===----------------------------------------------------------------------===//
-// Conversion Patterns
-//===----------------------------------------------------------------------===//
-
-/// Attempt to match and rewrite the IR root at the specified operation.
-PatternMatchResult
-ConversionPattern::matchAndRewrite(Operation *op,
-                                   PatternRewriter &rewriter) const {
-  SmallVector<Value *, 4> operands;
-  auto &dialectRewriter = static_cast<ConversionPatternRewriter &>(rewriter);
-  dialectRewriter.getImpl().remapValues(op->getOperands(), operands);
-
-  // If this operation has no successors, invoke the rewrite directly.
-  if (op->getNumSuccessors() == 0)
-    return matchAndRewrite(op, operands, dialectRewriter);
-
-  // Otherwise, we need to remap the successors.
-  SmallVector<Block *, 2> destinations;
-  destinations.reserve(op->getNumSuccessors());
-
-  SmallVector<ArrayRef<Value *>, 2> operandsPerDestination;
-  unsigned firstSuccessorOperand = op->getSuccessorOperandIndex(0);
-  for (unsigned i = 0, seen = 0, e = op->getNumSuccessors(); i < e; ++i) {
-    destinations.push_back(op->getSuccessor(i));
-
-    // Lookup the successors operands.
-    unsigned n = op->getNumSuccessorOperands(i);
-    operandsPerDestination.push_back(
-        llvm::makeArrayRef(operands.data() + firstSuccessorOperand + seen, n));
-    seen += n;
-  }
-
-  // Rewrite the operation.
-  return matchAndRewrite(
-      op,
-      llvm::makeArrayRef(operands.data(),
-                         operands.data() + firstSuccessorOperand),
-      destinations, operandsPerDestination, dialectRewriter);
-}
-
-//===----------------------------------------------------------------------===//
-// OperationLegalizer
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// A set of rewrite patterns that can be used to legalize a given operation.
-using LegalizationPatterns = SmallVector<RewritePattern *, 1>;
-
-/// This class defines a recursive operation legalizer.
-class OperationLegalizer {
-public:
-  using LegalizationAction = ConversionTarget::LegalizationAction;
-
-  OperationLegalizer(ConversionTarget &targetInfo,
-                     const OwningRewritePatternList &patterns)
-      : target(targetInfo) {
-    buildLegalizationGraph(patterns);
-    computeLegalizationGraphBenefit();
-  }
-
-  /// Returns if the given operation is known to be illegal on the target.
-  bool isIllegal(Operation *op) const;
-
-  /// Attempt to legalize the given operation. Returns success if the operation
-  /// was legalized, failure otherwise.
-  LogicalResult legalize(Operation *op, ConversionPatternRewriter &rewriter);
-
-  /// Returns the conversion target in use by the legalizer.
-  ConversionTarget &getTarget() { return target; }
-
-private:
-  /// Attempt to legalize the given operation by folding it.
-  LogicalResult legalizeWithFold(Operation *op,
-                                 ConversionPatternRewriter &rewriter);
-
-  /// Attempt to legalize the given operation by applying the provided pattern.
-  /// Returns success if the operation was legalized, failure otherwise.
-  LogicalResult legalizePattern(Operation *op, RewritePattern *pattern,
-                                ConversionPatternRewriter &rewriter);
-
-  /// Build an optimistic legalization graph given the provided patterns. This
-  /// function populates 'legalizerPatterns' with the operations that are not
-  /// directly legal, but may be transitively legal for the current target given
-  /// the provided patterns.
-  void buildLegalizationGraph(const OwningRewritePatternList &patterns);
-
-  /// Compute the benefit of each node within the computed legalization graph.
-  /// This orders the patterns within 'legalizerPatterns' based upon two
-  /// criteria:
-  ///  1) Prefer patterns that have the lowest legalization depth, i.e.
-  ///     represent the more direct mapping to the target.
-  ///  2) When comparing patterns with the same legalization depth, prefer the
-  ///     pattern with the highest PatternBenefit. This allows for users to
-  ///     prefer specific legalizations over others.
-  void computeLegalizationGraphBenefit();
-
-  /// The current set of patterns that have been applied.
-  llvm::SmallPtrSet<RewritePattern *, 8> appliedPatterns;
-
-  /// The set of legality information for operations transitively supported by
-  /// the target.
-  DenseMap<OperationName, LegalizationPatterns> legalizerPatterns;
-
-  /// The legalization information provided by the target.
-  ConversionTarget &target;
-};
-} // namespace
-
-bool OperationLegalizer::isIllegal(Operation *op) const {
-  // Check if the target explicitly marked this operation as illegal.
-  return target.getOpAction(op->getName()) == LegalizationAction::Illegal;
-}
-
-LogicalResult
-OperationLegalizer::legalize(Operation *op,
-                             ConversionPatternRewriter &rewriter) {
-  LLVM_DEBUG(llvm::dbgs() << "Legalizing operation : " << op->getName()
-                          << "\n");
-
-  // Check if this operation is legal on the target.
-  if (auto legalityInfo = target.isLegal(op)) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "-- Success : Operation marked legal by the target\n");
-    // If this operation is recursively legal, mark its children as ignored so
-    // that we don't consider them for legalization.
-    if (legalityInfo->isRecursivelyLegal) {
-      LLVM_DEBUG(llvm::dbgs() << "-- Success : Operation is recursively legal; "
-                                 "Skipping internals\n");
-      rewriter.getImpl().markNestedOpsIgnored(op);
-    }
-    return success();
-  }
-
-  // Check to see if the operation is ignored and doesn't need to be converted.
-  if (rewriter.getImpl().isOpIgnored(op)) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "-- Success : Operation marked ignored during conversion\n");
-    return success();
-  }
-
-  // If the operation isn't legal, try to fold it in-place.
-  // TODO(riverriddle) Should we always try to do this, even if the op is
-  // already legal?
-  if (succeeded(legalizeWithFold(op, rewriter))) {
-    LLVM_DEBUG(llvm::dbgs() << "-- Success : Operation was folded\n");
-    return success();
-  }
-
-  // Otherwise, we need to apply a legalization pattern to this operation.
-  auto it = legalizerPatterns.find(op->getName());
-  if (it == legalizerPatterns.end()) {
-    LLVM_DEBUG(llvm::dbgs() << "-- FAIL : no known legalization path.\n");
-    return failure();
-  }
-
-  // The patterns are sorted by expected benefit, so try to apply each in-order.
-  for (auto *pattern : it->second)
-    if (succeeded(legalizePattern(op, pattern, rewriter)))
-      return success();
-
-  LLVM_DEBUG(llvm::dbgs() << "-- FAIL : no matched legalization pattern.\n");
-  return failure();
-}
-
-LogicalResult
-OperationLegalizer::legalizeWithFold(Operation *op,
-                                     ConversionPatternRewriter &rewriter) {
-  auto &rewriterImpl = rewriter.getImpl();
-  RewriterState curState = rewriterImpl.getCurrentState();
-
-  // Try to fold the operation.
-  SmallVector<Value *, 2> replacementValues;
-  rewriter.setInsertionPoint(op);
-  if (failed(rewriter.tryFold(op, replacementValues)))
-    return failure();
-
-  // Insert a replacement for 'op' with the folded replacement values.
-  rewriter.replaceOp(op, replacementValues);
-
-  // Recursively legalize any new constant operations.
-  for (unsigned i = curState.numCreatedOperations,
-                e = rewriterImpl.createdOps.size();
-       i != e; ++i) {
-    Operation *cstOp = rewriterImpl.createdOps[i];
-    if (failed(legalize(cstOp, rewriter))) {
-      LLVM_DEBUG(llvm::dbgs() << "-- FAIL: Generated folding constant '"
-                              << cstOp->getName() << "' was illegal.\n");
-      rewriterImpl.resetState(curState);
-      return failure();
-    }
-  }
-  return success();
-}
-
-LogicalResult
-OperationLegalizer::legalizePattern(Operation *op, RewritePattern *pattern,
-                                    ConversionPatternRewriter &rewriter) {
-  LLVM_DEBUG({
-    llvm::dbgs() << "-* Applying rewrite pattern '" << op->getName() << " -> (";
-    interleaveComma(pattern->getGeneratedOps(), llvm::dbgs());
-    llvm::dbgs() << ")'.\n";
-  });
-
-  // Ensure that we don't cycle by not allowing the same pattern to be
-  // applied twice in the same recursion stack.
-  // TODO(riverriddle) We could eventually converge, but that requires more
-  // complicated analysis.
-  if (!appliedPatterns.insert(pattern).second) {
-    LLVM_DEBUG(llvm::dbgs() << "-- FAIL: Pattern was already applied.\n");
-    return failure();
-  }
-
-  auto &rewriterImpl = rewriter.getImpl();
-  RewriterState curState = rewriterImpl.getCurrentState();
-  auto cleanupFailure = [&] {
-    // Reset the rewriter state and pop this pattern.
-    rewriterImpl.resetState(curState);
-    appliedPatterns.erase(pattern);
-    return failure();
-  };
-
-  // Try to rewrite with the given pattern.
-  rewriter.setInsertionPoint(op);
-  if (!pattern->matchAndRewrite(op, rewriter)) {
-    LLVM_DEBUG(llvm::dbgs() << "-- FAIL: Pattern failed to match.\n");
-    return cleanupFailure();
-  }
-
-  // If the pattern moved or created any blocks, try to legalize their types.
-  // This ensures that the types of the block arguments are legal for the region
-  // they were moved into.
-  for (unsigned i = curState.numBlockActions,
-                e = rewriterImpl.blockActions.size();
-       i != e; ++i) {
-    auto &action = rewriterImpl.blockActions[i];
-    if (action.kind ==
-        ConversionPatternRewriterImpl::BlockActionKind::TypeConversion)
-      continue;
-
-    // Convert the block signature.
-    if (failed(rewriterImpl.convertBlockSignature(action.block))) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "-- FAIL: failed to convert types of moved block.\n");
-      return cleanupFailure();
-    }
-  }
-
-  // Check all of the replacements to ensure that the pattern actually replaced
-  // the root operation. We also mark any other replaced ops as 'dead' so that
-  // we don't try to legalize them later.
-  bool replacedRoot = false;
-  for (unsigned i = curState.numReplacements,
-                e = rewriterImpl.replacements.size();
-       i != e; ++i) {
-    Operation *replacedOp = rewriterImpl.replacements[i].op;
-    if (replacedOp == op)
-      replacedRoot = true;
-    else
-      rewriterImpl.ignoredOps.insert(replacedOp);
-  }
-  assert(replacedRoot && "expected pattern to replace the root operation");
-  (void)replacedRoot;
-
-  // Recursively legalize each of the new operations.
-  for (unsigned i = curState.numCreatedOperations,
-                e = rewriterImpl.createdOps.size();
-       i != e; ++i) {
-    Operation *op = rewriterImpl.createdOps[i];
-    if (failed(legalize(op, rewriter))) {
-      LLVM_DEBUG(llvm::dbgs() << "-- FAIL: Generated operation '"
-                              << op->getName() << "' was illegal.\n");
-      return cleanupFailure();
-    }
-  }
-
-  appliedPatterns.erase(pattern);
-  return success();
-}
-
-void OperationLegalizer::buildLegalizationGraph(
-    const OwningRewritePatternList &patterns) {
-  // A mapping between an operation and a set of operations that can be used to
-  // generate it.
-  DenseMap<OperationName, SmallPtrSet<OperationName, 2>> parentOps;
-  // A mapping between an operation and any currently invalid patterns it has.
-  DenseMap<OperationName, SmallPtrSet<RewritePattern *, 2>> invalidPatterns;
-  // A worklist of patterns to consider for legality.
-  llvm::SetVector<RewritePattern *> patternWorklist;
-
-  // Build the mapping from operations to the parent ops that may generate them.
-  for (auto &pattern : patterns) {
-    auto root = pattern->getRootKind();
-
-    // Skip operations that are always known to be legal.
-    if (target.getOpAction(root) == LegalizationAction::Legal)
-      continue;
-
-    // Add this pattern to the invalid set for the root op and record this root
-    // as a parent for any generated operations.
-    invalidPatterns[root].insert(pattern.get());
-    for (auto op : pattern->getGeneratedOps())
-      parentOps[op].insert(root);
-
-    // Add this pattern to the worklist.
-    patternWorklist.insert(pattern.get());
-  }
-
-  while (!patternWorklist.empty()) {
-    auto *pattern = patternWorklist.pop_back_val();
-
-    // Check to see if any of the generated operations are invalid.
-    if (llvm::any_of(pattern->getGeneratedOps(), [&](OperationName op) {
-          Optional<LegalizationAction> action = target.getOpAction(op);
-          return !legalizerPatterns.count(op) &&
-                 (!action || action == LegalizationAction::Illegal);
-        }))
-      continue;
-
-    // Otherwise, if all of the generated operation are valid, this op is now
-    // legal so add all of the child patterns to the worklist.
-    legalizerPatterns[pattern->getRootKind()].push_back(pattern);
-    invalidPatterns[pattern->getRootKind()].erase(pattern);
-
-    // Add any invalid patterns of the parent operations to see if they have now
-    // become legal.
-    for (auto op : parentOps[pattern->getRootKind()])
-      patternWorklist.set_union(invalidPatterns[op]);
-  }
-}
-
-void OperationLegalizer::computeLegalizationGraphBenefit() {
-  // The smallest pattern depth, when legalizing an operation.
-  DenseMap<OperationName, unsigned> minPatternDepth;
-
-  // Compute the minimum legalization depth for a given operation.
-  std::function<unsigned(OperationName)> computeDepth = [&](OperationName op) {
-    // Check for existing depth.
-    auto depthIt = minPatternDepth.find(op);
-    if (depthIt != minPatternDepth.end())
-      return depthIt->second;
-
-    // If a mapping for this operation does not exist, then this operation
-    // is always legal. Return 0 as the depth for a directly legal operation.
-    auto opPatternsIt = legalizerPatterns.find(op);
-    if (opPatternsIt == legalizerPatterns.end() || opPatternsIt->second.empty())
-      return 0u;
-
-    // Initialize the depth to the maximum value.
-    unsigned minDepth = std::numeric_limits<unsigned>::max();
-
-    // Record this initial depth in case we encounter this op again when
-    // recursively computing the depth.
-    minPatternDepth.try_emplace(op, minDepth);
-
-    // Compute the depth for each pattern used to legalize this operation.
-    SmallVector<std::pair<RewritePattern *, unsigned>, 4> patternsByDepth;
-    patternsByDepth.reserve(opPatternsIt->second.size());
-    for (RewritePattern *pattern : opPatternsIt->second) {
-      unsigned depth = 0;
-      for (auto generatedOp : pattern->getGeneratedOps())
-        depth = std::max(depth, computeDepth(generatedOp) + 1);
-      patternsByDepth.emplace_back(pattern, depth);
-
-      // Update the min depth for this operation.
-      minDepth = std::min(minDepth, depth);
-    }
-
-    // Update the pattern depth.
-    minPatternDepth[op] = minDepth;
-
-    // If the operation only has one legalization pattern, there is no need to
-    // sort them.
-    if (patternsByDepth.size() == 1)
-      return minDepth;
-
-    // Sort the patterns by those likely to be the most beneficial.
-    llvm::array_pod_sort(
-        patternsByDepth.begin(), patternsByDepth.end(),
-        [](const std::pair<RewritePattern *, unsigned> *lhs,
-           const std::pair<RewritePattern *, unsigned> *rhs) {
-          // First sort by the smaller pattern legalization depth.
-          if (lhs->second != rhs->second)
-            return llvm::array_pod_sort_comparator<unsigned>(&lhs->second,
-                                                             &rhs->second);
-
-          // Then sort by the larger pattern benefit.
-          auto lhsBenefit = lhs->first->getBenefit();
-          auto rhsBenefit = rhs->first->getBenefit();
-          return llvm::array_pod_sort_comparator<PatternBenefit>(&rhsBenefit,
-                                                                 &lhsBenefit);
-        });
-
-    // Update the legalization pattern to use the new sorted list.
-    opPatternsIt->second.clear();
-    for (auto &patternIt : patternsByDepth)
-      opPatternsIt->second.push_back(patternIt.first);
-
-    return minDepth;
-  };
-
-  // For each operation that is transitively legal, compute a cost for it.
-  for (auto &opIt : legalizerPatterns)
-    if (!minPatternDepth.count(opIt.first))
-      computeDepth(opIt.first);
-}
-
-//===----------------------------------------------------------------------===//
-// OperationConverter
-//===----------------------------------------------------------------------===//
-namespace {
-enum OpConversionMode {
-  // In this mode, the conversion will ignore failed conversions to allow
-  // illegal operations to co-exist in the IR.
-  Partial,
-
-  // In this mode, all operations must be legal for the given target for the
-  // conversion to succeed.
-  Full,
-
-  // In this mode, operations are analyzed for legality. No actual rewrites are
-  // applied to the operations on success.
-  Analysis,
-};
-
-// This class converts operations to a given conversion target via a set of
-// rewrite patterns. The conversion behaves differently depending on the
-// conversion mode.
-struct OperationConverter {
-  explicit OperationConverter(ConversionTarget &target,
-                              const OwningRewritePatternList &patterns,
-                              OpConversionMode mode,
-                              DenseSet<Operation *> *legalizableOps = nullptr)
-      : opLegalizer(target, patterns), mode(mode),
-        legalizableOps(legalizableOps) {}
-
-  /// Converts the given operations to the conversion target.
-  LogicalResult convertOperations(ArrayRef<Operation *> ops,
-                                  TypeConverter *typeConverter);
-
-private:
-  /// Converts an operation with the given rewriter.
-  LogicalResult convert(ConversionPatternRewriter &rewriter, Operation *op);
-
-  /// Converts the type signatures of the blocks nested within 'op'.
-  LogicalResult convertBlockSignatures(ConversionPatternRewriter &rewriter,
-                                       Operation *op);
-
-  /// The legalizer to use when converting operations.
-  OperationLegalizer opLegalizer;
-
-  /// The conversion mode to use when legalizing operations.
-  OpConversionMode mode;
-
-  /// A set of pre-existing operations that were found to be legalizable to the
-  /// target. This field is only used when mode == OpConversionMode::Analysis.
-  DenseSet<Operation *> *legalizableOps;
-};
-} // end anonymous namespace
-
-LogicalResult
-OperationConverter::convertBlockSignatures(ConversionPatternRewriter &rewriter,
-                                           Operation *op) {
-  // Check to see if type signatures need to be converted.
-  if (!rewriter.getImpl().argConverter.typeConverter)
-    return success();
-
-  for (auto &region : op->getRegions()) {
-    for (auto &block : llvm::make_early_inc_range(region))
-      if (failed(rewriter.getImpl().convertBlockSignature(&block)))
-        return failure();
-  }
-  return success();
-}
-
-LogicalResult OperationConverter::convert(ConversionPatternRewriter &rewriter,
-                                          Operation *op) {
-  // Legalize the given operation.
-  if (failed(opLegalizer.legalize(op, rewriter))) {
-    // Handle the case of a failed conversion for each of the different modes.
-    /// Full conversions expect all operations to be converted.
-    if (mode == OpConversionMode::Full)
-      return op->emitError()
-             << "failed to legalize operation '" << op->getName() << "'";
-    /// Partial conversions allow conversions to fail iff the operation was not
-    /// explicitly marked as illegal.
-    if (mode == OpConversionMode::Partial && opLegalizer.isIllegal(op))
-      return op->emitError()
-             << "failed to legalize operation '" << op->getName()
-             << "' that was explicitly marked illegal";
-  } else {
-    /// Analysis conversions don't fail if any operations fail to legalize,
-    /// they are only interested in the operations that were successfully
-    /// legalized.
-    if (mode == OpConversionMode::Analysis)
-      legalizableOps->insert(op);
-
-    // If legalization succeeded, convert the types any of the blocks within
-    // this operation.
-    if (failed(convertBlockSignatures(rewriter, op)))
-      return failure();
-  }
-  return success();
-}
-
-LogicalResult
-OperationConverter::convertOperations(ArrayRef<Operation *> ops,
-                                      TypeConverter *typeConverter) {
-  if (ops.empty())
-    return success();
-  ConversionTarget &target = opLegalizer.getTarget();
-
-  /// Compute the set of operations and blocks to convert.
-  std::vector<Operation *> toConvert;
-  for (auto *op : ops) {
-    toConvert.emplace_back(op);
-    for (auto &region : op->getRegions())
-      if (failed(computeConversionSet(region.getBlocks(), region.getLoc(),
-                                      toConvert, &target)))
-        return failure();
-  }
-
-  // Convert each operation and discard rewrites on failure.
-  ConversionPatternRewriter rewriter(ops.front()->getContext(), typeConverter);
-  for (auto *op : toConvert)
-    if (failed(convert(rewriter, op)))
-      return rewriter.getImpl().discardRewrites(), failure();
-
-  // Otherwise, the body conversion succeeded. Apply rewrites if this is not an
-  // analysis conversion.
-  if (mode == OpConversionMode::Analysis)
-    rewriter.getImpl().discardRewrites();
-  else
-    rewriter.getImpl().applyRewrites();
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Type Conversion
-//===----------------------------------------------------------------------===//
-
-/// Remap an input of the original signature with a new set of types. The
-/// new types are appended to the new signature conversion.
-void TypeConverter::SignatureConversion::addInputs(unsigned origInputNo,
-                                                   ArrayRef<Type> types) {
-  assert(!types.empty() && "expected valid types");
-  remapInput(origInputNo, /*newInputNo=*/argTypes.size(), types.size());
-  addInputs(types);
-}
-
-/// Append new input types to the signature conversion, this should only be
-/// used if the new types are not intended to remap an existing input.
-void TypeConverter::SignatureConversion::addInputs(ArrayRef<Type> types) {
-  assert(!types.empty() &&
-         "1->0 type remappings don't need to be added explicitly");
-  argTypes.append(types.begin(), types.end());
-}
-
-/// Remap an input of the original signature with a range of types in the
-/// new signature.
-void TypeConverter::SignatureConversion::remapInput(unsigned origInputNo,
-                                                    unsigned newInputNo,
-                                                    unsigned newInputCount) {
-  assert(!remappedInputs[origInputNo] && "input has already been remapped");
-  assert(newInputCount != 0 && "expected valid input count");
-  remappedInputs[origInputNo] =
-      InputMapping{newInputNo, newInputCount, /*replacementValue=*/nullptr};
-}
-
-/// Remap an input of the original signature to another `replacementValue`
-/// value. This would make the signature converter drop this argument.
-void TypeConverter::SignatureConversion::remapInput(unsigned origInputNo,
-                                                    Value *replacementValue) {
-  assert(!remappedInputs[origInputNo] && "input has already been remapped");
-  remappedInputs[origInputNo] =
-      InputMapping{origInputNo, /*size=*/0, replacementValue};
-}
-
-/// This hooks allows for converting a type.
-LogicalResult TypeConverter::convertType(Type t,
-                                         SmallVectorImpl<Type> &results) {
-  if (auto newT = convertType(t)) {
-    results.push_back(newT);
-    return success();
-  }
-  return failure();
-}
-
-/// Convert the given set of types, filling 'results' as necessary. This
-/// returns failure if the conversion of any of the types fails, success
-/// otherwise.
-LogicalResult TypeConverter::convertTypes(ArrayRef<Type> types,
-                                          SmallVectorImpl<Type> &results) {
-  for (auto type : types)
-    if (failed(convertType(type, results)))
-      return failure();
-  return success();
-}
-
-/// Return true if the given type is legal for this type converter, i.e. the
-/// type converts to itself.
-bool TypeConverter::isLegal(Type type) {
-  SmallVector<Type, 1> results;
-  return succeeded(convertType(type, results)) && results.size() == 1 &&
-         results.front() == type;
-}
-
-/// Return true if the inputs and outputs of the given function type are
-/// legal.
-bool TypeConverter::isSignatureLegal(FunctionType funcType) {
-  return llvm::all_of(
-      llvm::concat<const Type>(funcType.getInputs(), funcType.getResults()),
-      [this](Type type) { return isLegal(type); });
-}
-
-/// This hook allows for converting a specific argument of a signature.
-LogicalResult TypeConverter::convertSignatureArg(unsigned inputNo, Type type,
-                                                 SignatureConversion &result) {
-  // Try to convert the given input type.
-  SmallVector<Type, 1> convertedTypes;
-  if (failed(convertType(type, convertedTypes)))
-    return failure();
-
-  // If this argument is being dropped, there is nothing left to do.
-  if (convertedTypes.empty())
-    return success();
-
-  // Otherwise, add the new inputs.
-  result.addInputs(inputNo, convertedTypes);
-  return success();
-}
-
-/// Create a default conversion pattern that rewrites the type signature of a
-/// FuncOp.
-namespace {
-struct FuncOpSignatureConversion : public OpConversionPattern<FuncOp> {
-  FuncOpSignatureConversion(MLIRContext *ctx, TypeConverter &converter)
-      : OpConversionPattern(ctx), converter(converter) {}
-
-  /// Hook for derived classes to implement combined matching and rewriting.
-  PatternMatchResult
-  matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    FunctionType type = funcOp.getType();
-
-    // Convert the original function arguments.
-    TypeConverter::SignatureConversion result(type.getNumInputs());
-    for (unsigned i = 0, e = type.getNumInputs(); i != e; ++i)
-      if (failed(converter.convertSignatureArg(i, type.getInput(i), result)))
-        return matchFailure();
-
-    // Convert the original function results.
-    SmallVector<Type, 1> convertedResults;
-    if (failed(converter.convertTypes(type.getResults(), convertedResults)))
-      return matchFailure();
-
-    // Create a new function with an updated signature.
-    auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
-    rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
-                                newFuncOp.end());
-    newFuncOp.setType(FunctionType::get(result.getConvertedTypes(),
-                                        convertedResults, funcOp.getContext()));
-
-    // Tell the rewriter to convert the region signature.
-    rewriter.applySignatureConversion(&newFuncOp.getBody(), result);
-    rewriter.eraseOp(funcOp);
-    return matchSuccess();
-  }
-
-  /// The type converter to use when rewriting the signature.
-  TypeConverter &converter;
-};
-} // end anonymous namespace
-
-void mlir::populateFuncOpTypeConversionPattern(
-    OwningRewritePatternList &patterns, MLIRContext *ctx,
-    TypeConverter &converter) {
-  patterns.insert<FuncOpSignatureConversion>(ctx, converter);
-}
-
-/// This function converts the type signature of the given block, by invoking
-/// 'convertSignatureArg' for each argument. This function should return a valid
-/// conversion for the signature on success, None otherwise.
-auto TypeConverter::convertBlockSignature(Block *block)
-    -> llvm::Optional<SignatureConversion> {
-  SignatureConversion conversion(block->getNumArguments());
-  for (unsigned i = 0, e = block->getNumArguments(); i != e; ++i)
-    if (failed(convertSignatureArg(i, block->getArgument(i)->getType(),
-                                   conversion)))
-      return llvm::None;
-  return conversion;
-}
-
-//===----------------------------------------------------------------------===//
-// ConversionTarget
-//===----------------------------------------------------------------------===//
-
-/// Register a legality action for the given operation.
-void ConversionTarget::setOpAction(OperationName op,
-                                   LegalizationAction action) {
-  legalOperations[op] = {action, /*isRecursivelyLegal=*/false};
-}
-
-/// Register a legality action for the given dialects.
-void ConversionTarget::setDialectAction(ArrayRef<StringRef> dialectNames,
-                                        LegalizationAction action) {
-  for (StringRef dialect : dialectNames)
-    legalDialects[dialect] = action;
-}
-
-/// Get the legality action for the given operation.
-auto ConversionTarget::getOpAction(OperationName op) const
-    -> Optional<LegalizationAction> {
-  Optional<LegalizationInfo> info = getOpInfo(op);
-  return info ? info->action : Optional<LegalizationAction>();
-}
-
-/// If the given operation instance is legal on this target, a structure
-/// containing legality information is returned. If the operation is not legal,
-/// None is returned.
-auto ConversionTarget::isLegal(Operation *op) const
-    -> Optional<LegalOpDetails> {
-  Optional<LegalizationInfo> info = getOpInfo(op->getName());
-  if (!info)
-    return llvm::None;
-
-  // Returns true if this operation instance is known to be legal.
-  auto isOpLegal = [&] {
-    // Handle dynamic legality.
-    if (info->action == LegalizationAction::Dynamic) {
-      // Check for callbacks on the operation or dialect.
-      auto opFn = opLegalityFns.find(op->getName());
-      if (opFn != opLegalityFns.end())
-        return opFn->second(op);
-      auto dialectFn = dialectLegalityFns.find(op->getName().getDialect());
-      if (dialectFn != dialectLegalityFns.end())
-        return dialectFn->second(op);
-
-      // Otherwise, invoke the hook on the derived instance.
-      return isDynamicallyLegal(op);
-    }
-
-    // Otherwise, the operation is only legal if it was marked 'Legal'.
-    return info->action == LegalizationAction::Legal;
-  };
-  if (!isOpLegal())
-    return llvm::None;
-
-  // This operation is legal, compute any additional legality information.
-  LegalOpDetails legalityDetails;
-
-  if (info->isRecursivelyLegal) {
-    auto legalityFnIt = opRecursiveLegalityFns.find(op->getName());
-    if (legalityFnIt != opRecursiveLegalityFns.end())
-      legalityDetails.isRecursivelyLegal = legalityFnIt->second(op);
-    else
-      legalityDetails.isRecursivelyLegal = true;
-  }
-  return legalityDetails;
-}
-
-/// Set the dynamic legality callback for the given operation.
-void ConversionTarget::setLegalityCallback(
-    OperationName name, const DynamicLegalityCallbackFn &callback) {
-  assert(callback && "expected valid legality callback");
-  opLegalityFns[name] = callback;
-}
-
-/// Set the recursive legality callback for the given operation and mark the
-/// operation as recursively legal.
-void ConversionTarget::markOpRecursivelyLegal(
-    OperationName name, const DynamicLegalityCallbackFn &callback) {
-  auto infoIt = legalOperations.find(name);
-  assert(infoIt != legalOperations.end() &&
-         infoIt->second.action != LegalizationAction::Illegal &&
-         "expected operation to already be marked as legal");
-  infoIt->second.isRecursivelyLegal = true;
-  if (callback)
-    opRecursiveLegalityFns[name] = callback;
-  else
-    opRecursiveLegalityFns.erase(name);
-}
-
-/// Set the dynamic legality callback for the given dialects.
-void ConversionTarget::setLegalityCallback(
-    ArrayRef<StringRef> dialects, const DynamicLegalityCallbackFn &callback) {
-  assert(callback && "expected valid legality callback");
-  for (StringRef dialect : dialects)
-    dialectLegalityFns[dialect] = callback;
-}
-
-/// Get the legalization information for the given operation.
-auto ConversionTarget::getOpInfo(OperationName op) const
-    -> Optional<LegalizationInfo> {
-  // Check for info for this specific operation.
-  auto it = legalOperations.find(op);
-  if (it != legalOperations.end())
-    return it->second;
-  // Otherwise, default to checking on the parent dialect.
-  auto dialectIt = legalDialects.find(op.getDialect());
-  if (dialectIt != legalDialects.end())
-    return LegalizationInfo{dialectIt->second, /*isRecursivelyLegal=*/false};
-  return llvm::None;
-}
-
-//===----------------------------------------------------------------------===//
-// Op Conversion Entry Points
-//===----------------------------------------------------------------------===//
-
-/// Apply a partial conversion on the given operations, and all nested
-/// operations. This method converts as many operations to the target as
-/// possible, ignoring operations that failed to legalize.
-LogicalResult mlir::applyPartialConversion(
-    ArrayRef<Operation *> ops, ConversionTarget &target,
-    const OwningRewritePatternList &patterns, TypeConverter *converter) {
-  OperationConverter opConverter(target, patterns, OpConversionMode::Partial);
-  return opConverter.convertOperations(ops, converter);
-}
-LogicalResult
-mlir::applyPartialConversion(Operation *op, ConversionTarget &target,
-                             const OwningRewritePatternList &patterns,
-                             TypeConverter *converter) {
-  return applyPartialConversion(llvm::makeArrayRef(op), target, patterns,
-                                converter);
-}
-
-/// Apply a complete conversion on the given operations, and all nested
-/// operations. This method will return failure if the conversion of any
-/// operation fails.
-LogicalResult
-mlir::applyFullConversion(ArrayRef<Operation *> ops, ConversionTarget &target,
-                          const OwningRewritePatternList &patterns,
-                          TypeConverter *converter) {
-  OperationConverter opConverter(target, patterns, OpConversionMode::Full);
-  return opConverter.convertOperations(ops, converter);
-}
-LogicalResult
-mlir::applyFullConversion(Operation *op, ConversionTarget &target,
-                          const OwningRewritePatternList &patterns,
-                          TypeConverter *converter) {
-  return applyFullConversion(llvm::makeArrayRef(op), target, patterns,
-                             converter);
-}
-
-/// Apply an analysis conversion on the given operations, and all nested
-/// operations. This method analyzes which operations would be successfully
-/// converted to the target if a conversion was applied. All operations that
-/// were found to be legalizable to the given 'target' are placed within the
-/// provided 'convertedOps' set; note that no actual rewrites are applied to the
-/// operations on success and only pre-existing operations are added to the set.
-LogicalResult mlir::applyAnalysisConversion(
-    ArrayRef<Operation *> ops, ConversionTarget &target,
-    const OwningRewritePatternList &patterns,
-    DenseSet<Operation *> &convertedOps, TypeConverter *converter) {
-  OperationConverter opConverter(target, patterns, OpConversionMode::Analysis,
-                                 &convertedOps);
-  return opConverter.convertOperations(ops, converter);
-}
-LogicalResult
-mlir::applyAnalysisConversion(Operation *op, ConversionTarget &target,
-                              const OwningRewritePatternList &patterns,
-                              DenseSet<Operation *> &convertedOps,
-                              TypeConverter *converter) {
-  return applyAnalysisConversion(llvm::makeArrayRef(op), target, patterns,
-                                 convertedOps, converter);
-}
diff --git a/third_party/mlir/lib/Transforms/Inliner.cpp b/third_party/mlir/lib/Transforms/Inliner.cpp
deleted file mode 100644
index dbb5381ed70..00000000000
--- a/third_party/mlir/lib/Transforms/Inliner.cpp
+++ /dev/null
@@ -1,298 +0,0 @@
-//===- Inliner.cpp - Pass to inline function calls ------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a basic inlining algorithm that operates bottom up over
-// the Strongly Connect Components(SCCs) of the CallGraph. This enables a more
-// incremental propagation of inlining decisions from the leafs to the roots of
-// the callgraph.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/CallGraph.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/Support/Parallel.h"
-
-using namespace mlir;
-
-static llvm::cl::opt<bool> disableCanonicalization(
-    "mlir-disable-inline-simplify",
-    llvm::cl::desc("Disable running simplifications during inlining"),
-    llvm::cl::ReallyHidden, llvm::cl::init(false));
-
-static llvm::cl::opt<unsigned> maxInliningIterations(
-    "mlir-max-inline-iterations",
-    llvm::cl::desc("Maximum number of iterations when inlining within an SCC"),
-    llvm::cl::ReallyHidden, llvm::cl::init(4));
-
-//===----------------------------------------------------------------------===//
-// CallGraph traversal
-//===----------------------------------------------------------------------===//
-
-/// Run a given transformation over the SCCs of the callgraph in a bottom up
-/// traversal.
-static void runTransformOnCGSCCs(
-    const CallGraph &cg,
-    function_ref<void(ArrayRef<CallGraphNode *>)> sccTransformer) {
-  std::vector<CallGraphNode *> currentSCCVec;
-  auto cgi = llvm::scc_begin(&cg);
-  while (!cgi.isAtEnd()) {
-    // Copy the current SCC and increment so that the transformer can modify the
-    // SCC without invalidating our iterator.
-    currentSCCVec = *cgi;
-    ++cgi;
-    sccTransformer(currentSCCVec);
-  }
-}
-
-namespace {
-/// This struct represents a resolved call to a given callgraph node. Given that
-/// the call does not actually contain a direct reference to the
-/// Region(CallGraphNode) that it is dispatching to, we need to resolve them
-/// explicitly.
-struct ResolvedCall {
-  ResolvedCall(CallOpInterface call, CallGraphNode *targetNode)
-      : call(call), targetNode(targetNode) {}
-  CallOpInterface call;
-  CallGraphNode *targetNode;
-};
-} // end anonymous namespace
-
-/// Collect all of the callable operations within the given range of blocks. If
-/// `traverseNestedCGNodes` is true, this will also collect call operations
-/// inside of nested callgraph nodes.
-static void collectCallOps(llvm::iterator_range<Region::iterator> blocks,
-                           CallGraph &cg, SmallVectorImpl<ResolvedCall> &calls,
-                           bool traverseNestedCGNodes) {
-  SmallVector<Block *, 8> worklist;
-  auto addToWorklist = [&](llvm::iterator_range<Region::iterator> blocks) {
-    for (Block &block : blocks)
-      worklist.push_back(&block);
-  };
-
-  addToWorklist(blocks);
-  while (!worklist.empty()) {
-    for (Operation &op : *worklist.pop_back_val()) {
-      if (auto call = dyn_cast<CallOpInterface>(op)) {
-        CallGraphNode *node =
-            cg.resolveCallable(call.getCallableForCallee(), &op);
-        if (!node->isExternal())
-          calls.emplace_back(call, node);
-        continue;
-      }
-
-      // If this is not a call, traverse the nested regions. If
-      // `traverseNestedCGNodes` is false, then don't traverse nested call graph
-      // regions.
-      for (auto &nestedRegion : op.getRegions())
-        if (traverseNestedCGNodes || !cg.lookupNode(&nestedRegion))
-          addToWorklist(nestedRegion);
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Inliner
-//===----------------------------------------------------------------------===//
-namespace {
-/// This class provides a specialization of the main inlining interface.
-struct Inliner : public InlinerInterface {
-  Inliner(MLIRContext *context, CallGraph &cg)
-      : InlinerInterface(context), cg(cg) {}
-
-  /// Process a set of blocks that have been inlined. This callback is invoked
-  /// *before* inlined terminator operations have been processed.
-  void processInlinedBlocks(
-      llvm::iterator_range<Region::iterator> inlinedBlocks) final {
-    collectCallOps(inlinedBlocks, cg, calls, /*traverseNestedCGNodes=*/true);
-  }
-
-  /// The current set of call instructions to consider for inlining.
-  SmallVector<ResolvedCall, 8> calls;
-
-  /// The callgraph being operated on.
-  CallGraph &cg;
-};
-} // namespace
-
-/// Returns true if the given call should be inlined.
-static bool shouldInline(ResolvedCall &resolvedCall) {
-  // Don't allow inlining terminator calls. We currently don't support this
-  // case.
-  if (resolvedCall.call.getOperation()->isKnownTerminator())
-    return false;
-
-  // Don't allow inlining if the target is an ancestor of the call. This
-  // prevents inlining recursively.
-  if (resolvedCall.targetNode->getCallableRegion()->isAncestor(
-          resolvedCall.call.getParentRegion()))
-    return false;
-
-  // Otherwise, inline.
-  return true;
-}
-
-/// Attempt to inline calls within the given scc. This function returns
-/// success if any calls were inlined, failure otherwise.
-static LogicalResult inlineCallsInSCC(Inliner &inliner,
-                                      ArrayRef<CallGraphNode *> currentSCC) {
-  CallGraph &cg = inliner.cg;
-  auto &calls = inliner.calls;
-
-  // Collect all of the direct calls within the nodes of the current SCC. We
-  // don't traverse nested callgraph nodes, because they are handled separately
-  // likely within a different SCC.
-  for (auto *node : currentSCC) {
-    if (!node->isExternal())
-      collectCallOps(*node->getCallableRegion(), cg, calls,
-                     /*traverseNestedCGNodes=*/false);
-  }
-  if (calls.empty())
-    return failure();
-
-  // Try to inline each of the call operations. Don't cache the end iterator
-  // here as more calls may be added during inlining.
-  bool inlinedAnyCalls = false;
-  for (unsigned i = 0; i != calls.size(); ++i) {
-    ResolvedCall &it = calls[i];
-    if (!shouldInline(it))
-      continue;
-
-    CallOpInterface call = it.call;
-    Region *targetRegion = it.targetNode->getCallableRegion();
-    LogicalResult inlineResult = inlineCall(
-        inliner, call, cast<CallableOpInterface>(targetRegion->getParentOp()),
-        targetRegion);
-    if (failed(inlineResult))
-      continue;
-
-    // If the inlining was successful, then erase the call.
-    call.erase();
-    inlinedAnyCalls = true;
-  }
-  calls.clear();
-  return success(inlinedAnyCalls);
-}
-
-/// Canonicalize the nodes within the given SCC with the given set of
-/// canonicalization patterns.
-static void canonicalizeSCC(CallGraph &cg, ArrayRef<CallGraphNode *> currentSCC,
-                            MLIRContext *context,
-                            const OwningRewritePatternList &canonPatterns) {
-  // Collect the sets of nodes to canonicalize.
-  SmallVector<CallGraphNode *, 4> nodesToCanonicalize;
-  for (auto *node : currentSCC) {
-    // Don't canonicalize the external node, it has no valid callable region.
-    if (node->isExternal())
-      continue;
-
-    // Don't canonicalize nodes with children. Nodes with children
-    // require special handling as we may remove the node during
-    // canonicalization. In the future, we should be able to handle this
-    // case with proper node deletion tracking.
-    if (node->hasChildren())
-      continue;
-
-    // We also won't apply canonicalizations for nodes that are not
-    // isolated. This avoids potentially mutating the regions of nodes defined
-    // above, this is also a stipulation of the 'applyPatternsGreedily' driver.
-    auto *region = node->getCallableRegion();
-    if (!region->getParentOp()->isKnownIsolatedFromAbove())
-      continue;
-    nodesToCanonicalize.push_back(node);
-  }
-  if (nodesToCanonicalize.empty())
-    return;
-
-  // Canonicalize each of the nodes within the SCC in parallel.
-  // NOTE: This is simple now, because we don't enable canonicalizing nodes
-  // within children. When we remove this restriction, this logic will need to
-  // be reworked.
-  ParallelDiagnosticHandler canonicalizationHandler(context);
-  llvm::parallel::for_each_n(
-      llvm::parallel::par, /*Begin=*/size_t(0),
-      /*End=*/nodesToCanonicalize.size(), [&](size_t index) {
-        // Set the order for this thread so that diagnostics will be properly
-        // ordered.
-        canonicalizationHandler.setOrderIDForThread(index);
-
-        // Apply the canonicalization patterns to this region.
-        auto *node = nodesToCanonicalize[index];
-        applyPatternsGreedily(*node->getCallableRegion(), canonPatterns);
-
-        // Make sure to reset the order ID for the diagnostic handler, as this
-        // thread may be used in a different context.
-        canonicalizationHandler.eraseOrderIDForThread();
-      });
-}
-
-/// Attempt to inline calls within the given scc, and run canonicalizations with
-/// the given patterns, until a fixed point is reached. This allows for the
-/// inlining of newly devirtualized calls.
-static void inlineSCC(Inliner &inliner, ArrayRef<CallGraphNode *> currentSCC,
-                      MLIRContext *context,
-                      const OwningRewritePatternList &canonPatterns) {
-  // If we successfully inlined any calls, run some simplifications on the
-  // nodes of the scc. Continue attempting to inline until we reach a fixed
-  // point, or a maximum iteration count. We canonicalize here as it may
-  // devirtualize new calls, as well as give us a better cost model.
-  unsigned iterationCount = 0;
-  while (succeeded(inlineCallsInSCC(inliner, currentSCC))) {
-    // If we aren't allowing simplifications or the max iteration count was
-    // reached, then bail out early.
-    if (disableCanonicalization || ++iterationCount >= maxInliningIterations)
-      break;
-    canonicalizeSCC(inliner.cg, currentSCC, context, canonPatterns);
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// InlinerPass
-//===----------------------------------------------------------------------===//
-
-// TODO(riverriddle) This pass should currently only be used for basic testing
-// of inlining functionality.
-namespace {
-struct InlinerPass : public OperationPass<InlinerPass> {
-  void runOnOperation() override {
-    CallGraph &cg = getAnalysis<CallGraph>();
-    auto *context = &getContext();
-
-    // Collect a set of canonicalization patterns to use when simplifying
-    // callable regions within an SCC.
-    OwningRewritePatternList canonPatterns;
-    for (auto *op : context->getRegisteredOperations())
-      op->getCanonicalizationPatterns(canonPatterns, context);
-
-    // Run the inline transform in post-order over the SCCs in the callgraph.
-    Inliner inliner(context, cg);
-    runTransformOnCGSCCs(cg, [&](ArrayRef<CallGraphNode *> scc) {
-      inlineSCC(inliner, scc, context, canonPatterns);
-    });
-  }
-};
-} // end anonymous namespace
-
-std::unique_ptr<Pass> mlir::createInlinerPass() {
-  return std::make_unique<InlinerPass>();
-}
-
-static PassRegistration<InlinerPass> pass("inline", "Inline function calls");
diff --git a/third_party/mlir/lib/Transforms/LoopCoalescing.cpp b/third_party/mlir/lib/Transforms/LoopCoalescing.cpp
deleted file mode 100644
index c1eec56526e..00000000000
--- a/third_party/mlir/lib/Transforms/LoopCoalescing.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-//===- LoopCoalescing.cpp - Pass transforming loop nests into single loops-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/RegionUtils.h"
-#include "llvm/Support/Debug.h"
-
-#define PASS_NAME "loop-coalescing"
-#define DEBUG_TYPE PASS_NAME
-
-using namespace mlir;
-
-namespace {
-class LoopCoalescingPass : public FunctionPass<LoopCoalescingPass> {
-public:
-  void runOnFunction() override {
-    FuncOp func = getFunction();
-
-    func.walk([](loop::ForOp op) {
-      // Ignore nested loops.
-      if (op.getParentOfType<loop::ForOp>())
-        return;
-
-      SmallVector<loop::ForOp, 4> loops;
-      getPerfectlyNestedLoops(loops, op);
-      LLVM_DEBUG(llvm::dbgs()
-                 << "found a perfect nest of depth " << loops.size() << '\n');
-
-      // Look for a band of loops that can be coalesced, i.e. perfectly nested
-      // loops with bounds defined above some loop.
-      // 1. For each loop, find above which parent loop its operands are
-      // defined.
-      SmallVector<unsigned, 4> operandsDefinedAbove(loops.size());
-      for (unsigned i = 0, e = loops.size(); i < e; ++i) {
-        operandsDefinedAbove[i] = i;
-        for (unsigned j = 0; j < i; ++j) {
-          if (areValuesDefinedAbove(loops[i].getOperands(),
-                                    loops[j].region())) {
-            operandsDefinedAbove[i] = j;
-            break;
-          }
-        }
-        LLVM_DEBUG(llvm::dbgs()
-                   << "  bounds of loop " << i << " are known above depth "
-                   << operandsDefinedAbove[i] << '\n');
-      }
-
-      // 2. Identify bands of loops such that the operands of all of them are
-      // defined above the first loop in the band.  Traverse the nest bottom-up
-      // so that modifications don't invalidate the inner loops.
-      for (unsigned end = loops.size(); end > 0; --end) {
-        unsigned start = 0;
-        for (; start < end - 1; ++start) {
-          auto maxPos =
-              *std::max_element(std::next(operandsDefinedAbove.begin(), start),
-                                std::next(operandsDefinedAbove.begin(), end));
-          if (maxPos > start)
-            continue;
-
-          assert(maxPos == start &&
-                 "expected loop bounds to be known at the start of the band");
-          LLVM_DEBUG(llvm::dbgs() << "  found coalesceable band from " << start
-                                  << " to " << end << '\n');
-
-          auto band =
-              llvm::makeMutableArrayRef(loops.data() + start, end - start);
-          coalesceLoops(band);
-          break;
-        }
-        // If a band was found and transformed, keep looking at the loops above
-        // the outermost transformed loop.
-        if (start != end - 1)
-          end = start + 1;
-      }
-    });
-  }
-};
-
-} // namespace
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createLoopCoalescingPass() {
-  return std::make_unique<LoopCoalescingPass>();
-}
-
-static PassRegistration<LoopCoalescingPass>
-    reg(PASS_NAME,
-        "coalesce nested loops with independent bounds into a single loop");
diff --git a/third_party/mlir/lib/Transforms/LoopFusion.cpp b/third_party/mlir/lib/Transforms/LoopFusion.cpp
deleted file mode 100644
index 6627e73056a..00000000000
--- a/third_party/mlir/lib/Transforms/LoopFusion.cpp
+++ /dev/null
@@ -1,1984 +0,0 @@
-//===- LoopFusion.cpp - Code to perform loop fusion -----------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements loop fusion.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopFusionUtils.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <iomanip>
-#include <sstream>
-#define DEBUG_TYPE "affine-loop-fusion"
-
-using llvm::SetVector;
-
-using namespace mlir;
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-/// Disables fusion profitability check and fuses if valid. Ignore any
-/// additional (redundant) computation tolerance threshold
-/// that would have prevented fusion.
-static llvm::cl::opt<bool>
-    clMaximalLoopFusion("fusion-maximal",
-                        llvm::cl::desc("Enables maximal loop fusion"),
-                        llvm::cl::cat(clOptionsCategory));
-
-/// A threshold in percent of additional computation allowed when fusing.
-static llvm::cl::opt<double> clFusionAddlComputeTolerance(
-    "fusion-compute-tolerance",
-    llvm::cl::desc("Fractional increase in additional "
-                   "computation tolerated while fusing"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<unsigned> clFusionFastMemorySpace(
-    "fusion-fast-mem-space",
-    llvm::cl::desc("Faster memory space number to promote fusion buffers to"),
-    llvm::cl::cat(clOptionsCategory));
-
-// A local buffer of size less than or equal to this size is automatically
-// promoted to fast memory after producer-consumer fusion.
-static llvm::cl::opt<unsigned long long> clFusionLocalBufThreshold(
-    "fusion-local-buf-threshold",
-    llvm::cl::desc("Threshold size (KiB) for promoting local buffers to fast "
-                   "memory space"),
-    llvm::cl::cat(clOptionsCategory));
-
-namespace {
-
-/// Loop fusion pass. This pass currently supports a greedy fusion policy,
-/// which fuses loop nests with single-writer/single-reader memref dependences
-/// with the goal of improving locality.
-
-// TODO(andydavis) Support fusion of source loop nests which write to multiple
-// memrefs, where each memref can have multiple users (if profitable).
-// TODO(andydavis) Extend this pass to check for fusion preventing dependences,
-// and add support for more general loop fusion algorithms.
-
-struct LoopFusion : public FunctionPass<LoopFusion> {
-  LoopFusion(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0,
-             bool maximalFusion = false)
-      : localBufSizeThreshold(localBufSizeThreshold),
-        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
-
-  void runOnFunction() override;
-
-  // Any local buffers smaller than this size (in bytes) will be created in
-  // `fastMemorySpace` if provided.
-  uint64_t localBufSizeThreshold;
-  Optional<unsigned> fastMemorySpace = None;
-  // If true, ignore any additional (redundant) computation tolerance threshold
-  // that would have prevented fusion.
-  bool maximalFusion;
-
-  // The amount of additional computation that is tolerated while fusing
-  // pair-wise as a fraction of the total computation.
-  constexpr static double kComputeToleranceThreshold = 0.30f;
-};
-
-} // end anonymous namespace
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createLoopFusionPass(unsigned fastMemorySpace,
-                           uint64_t localBufSizeThreshold, bool maximalFusion) {
-  return std::make_unique<LoopFusion>(fastMemorySpace, localBufSizeThreshold,
-                                      maximalFusion);
-}
-
-namespace {
-
-// LoopNestStateCollector walks loop nests and collects load and store
-// operations, and whether or not an IfInst was encountered in the loop nest.
-struct LoopNestStateCollector {
-  SmallVector<AffineForOp, 4> forOps;
-  SmallVector<Operation *, 4> loadOpInsts;
-  SmallVector<Operation *, 4> storeOpInsts;
-  bool hasNonForRegion = false;
-
-  void collect(Operation *opToWalk) {
-    opToWalk->walk([&](Operation *op) {
-      if (isa<AffineForOp>(op))
-        forOps.push_back(cast<AffineForOp>(op));
-      else if (op->getNumRegions() != 0)
-        hasNonForRegion = true;
-      else if (isa<AffineLoadOp>(op))
-        loadOpInsts.push_back(op);
-      else if (isa<AffineStoreOp>(op))
-        storeOpInsts.push_back(op);
-    });
-  }
-};
-
-// TODO(b/117228571) Replace when this is modeled through side-effects/op traits
-static bool isMemRefDereferencingOp(Operation &op) {
-  if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
-      isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op))
-    return true;
-  return false;
-}
-
-// MemRefDependenceGraph is a graph data structure where graph nodes are
-// top-level operations in a FuncOp which contain load/store ops, and edges
-// are memref dependences between the nodes.
-// TODO(andydavis) Add a more flexible dependence graph representation.
-// TODO(andydavis) Add a depth parameter to dependence graph construction.
-struct MemRefDependenceGraph {
-public:
-  // Node represents a node in the graph. A Node is either an entire loop nest
-  // rooted at the top level which contains loads/stores, or a top level
-  // load/store.
-  struct Node {
-    // The unique identifier of this node in the graph.
-    unsigned id;
-    // The top-level statement which is (or contains) a load/store.
-    Operation *op;
-    // List of load operations.
-    SmallVector<Operation *, 4> loads;
-    // List of store op insts.
-    SmallVector<Operation *, 4> stores;
-    Node(unsigned id, Operation *op) : id(id), op(op) {}
-
-    // Returns the load op count for 'memref'.
-    unsigned getLoadOpCount(Value *memref) {
-      unsigned loadOpCount = 0;
-      for (auto *loadOpInst : loads) {
-        if (memref == cast<AffineLoadOp>(loadOpInst).getMemRef())
-          ++loadOpCount;
-      }
-      return loadOpCount;
-    }
-
-    // Returns the store op count for 'memref'.
-    unsigned getStoreOpCount(Value *memref) {
-      unsigned storeOpCount = 0;
-      for (auto *storeOpInst : stores) {
-        if (memref == cast<AffineStoreOp>(storeOpInst).getMemRef())
-          ++storeOpCount;
-      }
-      return storeOpCount;
-    }
-
-    // Returns all store ops in 'storeOps' which access 'memref'.
-    void getStoreOpsForMemref(Value *memref,
-                              SmallVectorImpl<Operation *> *storeOps) {
-      for (auto *storeOpInst : stores) {
-        if (memref == cast<AffineStoreOp>(storeOpInst).getMemRef())
-          storeOps->push_back(storeOpInst);
-      }
-    }
-
-    // Returns all load ops in 'loadOps' which access 'memref'.
-    void getLoadOpsForMemref(Value *memref,
-                             SmallVectorImpl<Operation *> *loadOps) {
-      for (auto *loadOpInst : loads) {
-        if (memref == cast<AffineLoadOp>(loadOpInst).getMemRef())
-          loadOps->push_back(loadOpInst);
-      }
-    }
-
-    // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
-    // has at least one load and store operation.
-    void getLoadAndStoreMemrefSet(DenseSet<Value *> *loadAndStoreMemrefSet) {
-      llvm::SmallDenseSet<Value *, 2> loadMemrefs;
-      for (auto *loadOpInst : loads) {
-        loadMemrefs.insert(cast<AffineLoadOp>(loadOpInst).getMemRef());
-      }
-      for (auto *storeOpInst : stores) {
-        auto *memref = cast<AffineStoreOp>(storeOpInst).getMemRef();
-        if (loadMemrefs.count(memref) > 0)
-          loadAndStoreMemrefSet->insert(memref);
-      }
-    }
-  };
-
-  // Edge represents a data dependence between nodes in the graph.
-  struct Edge {
-    // The id of the node at the other end of the edge.
-    // If this edge is stored in Edge = Node.inEdges[i], then
-    // 'Node.inEdges[i].id' is the identifier of the source node of the edge.
-    // If this edge is stored in Edge = Node.outEdges[i], then
-    // 'Node.outEdges[i].id' is the identifier of the dest node of the edge.
-    unsigned id;
-    // The SSA value on which this edge represents a dependence.
-    // If the value is a memref, then the dependence is between graph nodes
-    // which contain accesses to the same memref 'value'. If the value is a
-    // non-memref value, then the dependence is between a graph node which
-    // defines an SSA value and another graph node which uses the SSA value
-    // (e.g. a constant operation defining a value which is used inside a loop
-    // nest).
-    Value *value;
-  };
-
-  // Map from node id to Node.
-  DenseMap<unsigned, Node> nodes;
-  // Map from node id to list of input edges.
-  DenseMap<unsigned, SmallVector<Edge, 2>> inEdges;
-  // Map from node id to list of output edges.
-  DenseMap<unsigned, SmallVector<Edge, 2>> outEdges;
-  // Map from memref to a count on the dependence edges associated with that
-  // memref.
-  DenseMap<Value *, unsigned> memrefEdgeCount;
-  // The next unique identifier to use for newly created graph nodes.
-  unsigned nextNodeId = 0;
-
-  MemRefDependenceGraph() {}
-
-  // Initializes the dependence graph based on operations in 'f'.
-  // Returns true on success, false otherwise.
-  bool init(FuncOp f);
-
-  // Returns the graph node for 'id'.
-  Node *getNode(unsigned id) {
-    auto it = nodes.find(id);
-    assert(it != nodes.end());
-    return &it->second;
-  }
-
-  // Returns the graph node for 'forOp'.
-  Node *getForOpNode(AffineForOp forOp) {
-    for (auto &idAndNode : nodes)
-      if (idAndNode.second.op == forOp.getOperation())
-        return &idAndNode.second;
-    return nullptr;
-  }
-
-  // Adds a node with 'op' to the graph and returns its unique identifier.
-  unsigned addNode(Operation *op) {
-    Node node(nextNodeId++, op);
-    nodes.insert({node.id, node});
-    return node.id;
-  }
-
-  // Remove node 'id' (and its associated edges) from graph.
-  void removeNode(unsigned id) {
-    // Remove each edge in 'inEdges[id]'.
-    if (inEdges.count(id) > 0) {
-      SmallVector<Edge, 2> oldInEdges = inEdges[id];
-      for (auto &inEdge : oldInEdges) {
-        removeEdge(inEdge.id, id, inEdge.value);
-      }
-    }
-    // Remove each edge in 'outEdges[id]'.
-    if (outEdges.count(id) > 0) {
-      SmallVector<Edge, 2> oldOutEdges = outEdges[id];
-      for (auto &outEdge : oldOutEdges) {
-        removeEdge(id, outEdge.id, outEdge.value);
-      }
-    }
-    // Erase remaining node state.
-    inEdges.erase(id);
-    outEdges.erase(id);
-    nodes.erase(id);
-  }
-
-  // Returns true if node 'id' writes to any memref which escapes (or is an
-  // argument to) the function/block. Returns false otherwise.
-  bool writesToLiveInOrEscapingMemrefs(unsigned id) {
-    Node *node = getNode(id);
-    for (auto *storeOpInst : node->stores) {
-      auto *memref = cast<AffineStoreOp>(storeOpInst).getMemRef();
-      auto *op = memref->getDefiningOp();
-      // Return true if 'memref' is a block argument.
-      if (!op)
-        return true;
-      // Return true if any use of 'memref' escapes the function.
-      for (auto *user : memref->getUsers())
-        if (!isMemRefDereferencingOp(*user))
-          return true;
-    }
-    return false;
-  }
-
-  // Returns the unique AffineStoreOp in `node` that meets all the following:
-  //   *) store is the only one that writes to a function-local memref live out
-  //      of `node`,
-  //   *) store is not the source of a self-dependence on `node`.
-  // Otherwise, returns a null AffineStoreOp.
-  AffineStoreOp getUniqueOutgoingStore(Node *node) {
-    AffineStoreOp uniqueStore;
-
-    // Return null if `node` doesn't have any outgoing edges.
-    auto outEdgeIt = outEdges.find(node->id);
-    if (outEdgeIt == outEdges.end())
-      return nullptr;
-
-    const auto &nodeOutEdges = outEdgeIt->second;
-    for (auto *op : node->stores) {
-      auto storeOp = cast<AffineStoreOp>(op);
-      auto *memref = storeOp.getMemRef();
-      // Skip this store if there are no dependences on its memref. This means
-      // that store either:
-      // *) writes to a memref that is only read within the same loop nest
-      //    (self-dependence edges are not represented in graph at the moment),
-      // *) writes to a function live out memref (function parameter), or
-      // *) is dead.
-      if (llvm::all_of(nodeOutEdges, [=](const Edge &edge) {
-            return (edge.value != memref);
-          }))
-        continue;
-
-      if (uniqueStore)
-        // Found multiple stores to function-local live-out memrefs.
-        return nullptr;
-      // Found first store to function-local live-out memref.
-      uniqueStore = storeOp;
-    }
-
-    return uniqueStore;
-  }
-
-  // Returns true if node 'id' can be removed from the graph. Returns false
-  // otherwise. A node can be removed from the graph iff the following
-  // conditions are met:
-  // *) The node does not write to any memref which escapes (or is a
-  //    function/block argument).
-  // *) The node has no successors in the dependence graph.
-  bool canRemoveNode(unsigned id) {
-    if (writesToLiveInOrEscapingMemrefs(id))
-      return false;
-    Node *node = getNode(id);
-    for (auto *storeOpInst : node->stores) {
-      // Return false if there exist out edges from 'id' on 'memref'.
-      if (getOutEdgeCount(id, cast<AffineStoreOp>(storeOpInst).getMemRef()) > 0)
-        return false;
-    }
-    return true;
-  }
-
-  // Returns true iff there is an edge from node 'srcId' to node 'dstId' which
-  // is for 'value' if non-null, or for any value otherwise. Returns false
-  // otherwise.
-  bool hasEdge(unsigned srcId, unsigned dstId, Value *value = nullptr) {
-    if (outEdges.count(srcId) == 0 || inEdges.count(dstId) == 0) {
-      return false;
-    }
-    bool hasOutEdge = llvm::any_of(outEdges[srcId], [=](Edge &edge) {
-      return edge.id == dstId && (!value || edge.value == value);
-    });
-    bool hasInEdge = llvm::any_of(inEdges[dstId], [=](Edge &edge) {
-      return edge.id == srcId && (!value || edge.value == value);
-    });
-    return hasOutEdge && hasInEdge;
-  }
-
-  // Adds an edge from node 'srcId' to node 'dstId' for 'value'.
-  void addEdge(unsigned srcId, unsigned dstId, Value *value) {
-    if (!hasEdge(srcId, dstId, value)) {
-      outEdges[srcId].push_back({dstId, value});
-      inEdges[dstId].push_back({srcId, value});
-      if (value->getType().isa<MemRefType>())
-        memrefEdgeCount[value]++;
-    }
-  }
-
-  // Removes an edge from node 'srcId' to node 'dstId' for 'value'.
-  void removeEdge(unsigned srcId, unsigned dstId, Value *value) {
-    assert(inEdges.count(dstId) > 0);
-    assert(outEdges.count(srcId) > 0);
-    if (value->getType().isa<MemRefType>()) {
-      assert(memrefEdgeCount.count(value) > 0);
-      memrefEdgeCount[value]--;
-    }
-    // Remove 'srcId' from 'inEdges[dstId]'.
-    for (auto it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
-      if ((*it).id == srcId && (*it).value == value) {
-        inEdges[dstId].erase(it);
-        break;
-      }
-    }
-    // Remove 'dstId' from 'outEdges[srcId]'.
-    for (auto it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
-      if ((*it).id == dstId && (*it).value == value) {
-        outEdges[srcId].erase(it);
-        break;
-      }
-    }
-  }
-
-  // Returns true if there is a path in the dependence graph from node 'srcId'
-  // to node 'dstId'. Returns false otherwise.
-  bool hasDependencePath(unsigned srcId, unsigned dstId) {
-    // Worklist state is: <node-id, next-output-edge-index-to-visit>
-    SmallVector<std::pair<unsigned, unsigned>, 4> worklist;
-    worklist.push_back({srcId, 0});
-    // Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
-    while (!worklist.empty()) {
-      auto &idAndIndex = worklist.back();
-      // Return true if we have reached 'dstId'.
-      if (idAndIndex.first == dstId)
-        return true;
-      // Pop and continue if node has no out edges, or if all out edges have
-      // already been visited.
-      if (outEdges.count(idAndIndex.first) == 0 ||
-          idAndIndex.second == outEdges[idAndIndex.first].size()) {
-        worklist.pop_back();
-        continue;
-      }
-      // Get graph edge to traverse.
-      Edge edge = outEdges[idAndIndex.first][idAndIndex.second];
-      // Increment next output edge index for 'idAndIndex'.
-      ++idAndIndex.second;
-      // Add node at 'edge.id' to worklist.
-      worklist.push_back({edge.id, 0});
-    }
-    return false;
-  }
-
-  // Returns the input edge count for node 'id' and 'memref' from src nodes
-  // which access 'memref' with a store operation.
-  unsigned getIncomingMemRefAccesses(unsigned id, Value *memref) {
-    unsigned inEdgeCount = 0;
-    if (inEdges.count(id) > 0)
-      for (auto &inEdge : inEdges[id])
-        if (inEdge.value == memref) {
-          Node *srcNode = getNode(inEdge.id);
-          // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
-          if (srcNode->getStoreOpCount(memref) > 0)
-            ++inEdgeCount;
-        }
-    return inEdgeCount;
-  }
-
-  // Returns the output edge count for node 'id' and 'memref' (if non-null),
-  // otherwise returns the total output edge count from node 'id'.
-  unsigned getOutEdgeCount(unsigned id, Value *memref = nullptr) {
-    unsigned outEdgeCount = 0;
-    if (outEdges.count(id) > 0)
-      for (auto &outEdge : outEdges[id])
-        if (!memref || outEdge.value == memref)
-          ++outEdgeCount;
-    return outEdgeCount;
-  }
-
-  // Computes and returns an insertion point operation, before which the
-  // the fused <srcId, dstId> loop nest can be inserted while preserving
-  // dependences. Returns nullptr if no such insertion point is found.
-  Operation *getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) {
-    if (outEdges.count(srcId) == 0)
-      return getNode(dstId)->op;
-
-    // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
-    SmallPtrSet<Operation *, 2> srcDepInsts;
-    for (auto &outEdge : outEdges[srcId])
-      if (outEdge.id != dstId)
-        srcDepInsts.insert(getNode(outEdge.id)->op);
-
-    // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
-    SmallPtrSet<Operation *, 2> dstDepInsts;
-    for (auto &inEdge : inEdges[dstId])
-      if (inEdge.id != srcId)
-        dstDepInsts.insert(getNode(inEdge.id)->op);
-
-    Operation *srcNodeInst = getNode(srcId)->op;
-    Operation *dstNodeInst = getNode(dstId)->op;
-
-    // Computing insertion point:
-    // *) Walk all operation positions in Block operation list in the
-    //    range (src, dst). For each operation 'op' visited in this search:
-    //   *) Store in 'firstSrcDepPos' the first position where 'op' has a
-    //      dependence edge from 'srcNode'.
-    //   *) Store in 'lastDstDepPost' the last position where 'op' has a
-    //      dependence edge to 'dstNode'.
-    // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
-    //    operation insertion point (or return null pointer if no such
-    //    insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
-    SmallVector<Operation *, 2> depInsts;
-    Optional<unsigned> firstSrcDepPos;
-    Optional<unsigned> lastDstDepPos;
-    unsigned pos = 0;
-    for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
-         it != Block::iterator(dstNodeInst); ++it) {
-      Operation *op = &(*it);
-      if (srcDepInsts.count(op) > 0 && firstSrcDepPos == None)
-        firstSrcDepPos = pos;
-      if (dstDepInsts.count(op) > 0)
-        lastDstDepPos = pos;
-      depInsts.push_back(op);
-      ++pos;
-    }
-
-    if (firstSrcDepPos.hasValue()) {
-      if (lastDstDepPos.hasValue()) {
-        if (firstSrcDepPos.getValue() <= lastDstDepPos.getValue()) {
-          // No valid insertion point exists which preserves dependences.
-          return nullptr;
-        }
-      }
-      // Return the insertion point at 'firstSrcDepPos'.
-      return depInsts[firstSrcDepPos.getValue()];
-    }
-    // No dependence targets in range (or only dst deps in range), return
-    // 'dstNodInst' insertion point.
-    return dstNodeInst;
-  }
-
-  // Updates edge mappings from node 'srcId' to node 'dstId' after 'oldMemRef'
-  // has been replaced in node at 'dstId' by a private memref depending
-  // on the value of 'createPrivateMemRef'.
-  void updateEdges(unsigned srcId, unsigned dstId, Value *oldMemRef,
-                   bool createPrivateMemRef) {
-    // For each edge in 'inEdges[srcId]': add new edge remaping to 'dstId'.
-    if (inEdges.count(srcId) > 0) {
-      SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
-      for (auto &inEdge : oldInEdges) {
-        // Add edge from 'inEdge.id' to 'dstId' if not for 'oldMemRef'.
-        if (inEdge.value != oldMemRef)
-          addEdge(inEdge.id, dstId, inEdge.value);
-      }
-    }
-    // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
-    if (outEdges.count(srcId) > 0) {
-      SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
-      for (auto &outEdge : oldOutEdges) {
-        // Remove any out edges from 'srcId' to 'dstId' across memrefs.
-        if (outEdge.id == dstId)
-          removeEdge(srcId, outEdge.id, outEdge.value);
-      }
-    }
-    // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
-    // replaced by a private memref). These edges could come from nodes
-    // other than 'srcId' which were removed in the previous step.
-    if (inEdges.count(dstId) > 0 && createPrivateMemRef) {
-      SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
-      for (auto &inEdge : oldInEdges)
-        if (inEdge.value == oldMemRef)
-          removeEdge(inEdge.id, dstId, inEdge.value);
-    }
-  }
-
-  // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
-  // of sibling node 'sidId' into node 'dstId'.
-  void updateEdges(unsigned sibId, unsigned dstId) {
-    // For each edge in 'inEdges[sibId]':
-    // *) Add new edge from source node 'inEdge.id' to 'dstNode'.
-    // *) Remove edge from source node 'inEdge.id' to 'sibNode'.
-    if (inEdges.count(sibId) > 0) {
-      SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
-      for (auto &inEdge : oldInEdges) {
-        addEdge(inEdge.id, dstId, inEdge.value);
-        removeEdge(inEdge.id, sibId, inEdge.value);
-      }
-    }
-
-    // For each edge in 'outEdges[sibId]' to node 'id'
-    // *) Add new edge from 'dstId' to 'outEdge.id'.
-    // *) Remove edge from 'sibId' to 'outEdge.id'.
-    if (outEdges.count(sibId) > 0) {
-      SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
-      for (auto &outEdge : oldOutEdges) {
-        addEdge(dstId, outEdge.id, outEdge.value);
-        removeEdge(sibId, outEdge.id, outEdge.value);
-      }
-    }
-  }
-
-  // Adds ops in 'loads' and 'stores' to node at 'id'.
-  void addToNode(unsigned id, const SmallVectorImpl<Operation *> &loads,
-                 const SmallVectorImpl<Operation *> &stores) {
-    Node *node = getNode(id);
-    for (auto *loadOpInst : loads)
-      node->loads.push_back(loadOpInst);
-    for (auto *storeOpInst : stores)
-      node->stores.push_back(storeOpInst);
-  }
-
-  void clearNodeLoadAndStores(unsigned id) {
-    Node *node = getNode(id);
-    node->loads.clear();
-    node->stores.clear();
-  }
-
-  // Calls 'callback' for each input edge incident to node 'id' which carries a
-  // memref dependence.
-  void forEachMemRefInputEdge(unsigned id,
-                              const std::function<void(Edge)> &callback) {
-    if (inEdges.count(id) > 0)
-      forEachMemRefEdge(inEdges[id], callback);
-  }
-
-  // Calls 'callback' for each output edge from node 'id' which carries a
-  // memref dependence.
-  void forEachMemRefOutputEdge(unsigned id,
-                               const std::function<void(Edge)> &callback) {
-    if (outEdges.count(id) > 0)
-      forEachMemRefEdge(outEdges[id], callback);
-  }
-
-  // Calls 'callback' for each edge in 'edges' which carries a memref
-  // dependence.
-  void forEachMemRefEdge(ArrayRef<Edge> edges,
-                         const std::function<void(Edge)> &callback) {
-    for (auto &edge : edges) {
-      // Skip if 'edge' is not a memref dependence edge.
-      if (!edge.value->getType().isa<MemRefType>())
-        continue;
-      assert(nodes.count(edge.id) > 0);
-      // Skip if 'edge.id' is not a loop nest.
-      if (!isa<AffineForOp>(getNode(edge.id)->op))
-        continue;
-      // Visit current input edge 'edge'.
-      callback(edge);
-    }
-  }
-
-  void print(raw_ostream &os) const {
-    os << "\nMemRefDependenceGraph\n";
-    os << "\nNodes:\n";
-    for (auto &idAndNode : nodes) {
-      os << "Node: " << idAndNode.first << "\n";
-      auto it = inEdges.find(idAndNode.first);
-      if (it != inEdges.end()) {
-        for (const auto &e : it->second)
-          os << "  InEdge: " << e.id << " " << e.value << "\n";
-      }
-      it = outEdges.find(idAndNode.first);
-      if (it != outEdges.end()) {
-        for (const auto &e : it->second)
-          os << "  OutEdge: " << e.id << " " << e.value << "\n";
-      }
-    }
-  }
-  void dump() const { print(llvm::errs()); }
-};
-
-// Initializes the data dependence graph by walking operations in 'f'.
-// Assigns each node in the graph a node id based on program order in 'f'.
-// TODO(andydavis) Add support for taking a Block arg to construct the
-// dependence graph at a different depth.
-bool MemRefDependenceGraph::init(FuncOp f) {
-  DenseMap<Value *, SetVector<unsigned>> memrefAccesses;
-
-  // TODO: support multi-block functions.
-  if (f.getBlocks().size() != 1)
-    return false;
-
-  DenseMap<Operation *, unsigned> forToNodeMap;
-  for (auto &op : f.front()) {
-    if (auto forOp = dyn_cast<AffineForOp>(op)) {
-      // Create graph node 'id' to represent top-level 'forOp' and record
-      // all loads and store accesses it contains.
-      LoopNestStateCollector collector;
-      collector.collect(&op);
-      // Return false if a non 'affine.for' region was found (not currently
-      // supported).
-      if (collector.hasNonForRegion)
-        return false;
-      Node node(nextNodeId++, &op);
-      for (auto *opInst : collector.loadOpInsts) {
-        node.loads.push_back(opInst);
-        auto *memref = cast<AffineLoadOp>(opInst).getMemRef();
-        memrefAccesses[memref].insert(node.id);
-      }
-      for (auto *opInst : collector.storeOpInsts) {
-        node.stores.push_back(opInst);
-        auto *memref = cast<AffineStoreOp>(opInst).getMemRef();
-        memrefAccesses[memref].insert(node.id);
-      }
-      forToNodeMap[&op] = node.id;
-      nodes.insert({node.id, node});
-    } else if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
-      // Create graph node for top-level load op.
-      Node node(nextNodeId++, &op);
-      node.loads.push_back(&op);
-      auto *memref = cast<AffineLoadOp>(op).getMemRef();
-      memrefAccesses[memref].insert(node.id);
-      nodes.insert({node.id, node});
-    } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
-      // Create graph node for top-level store op.
-      Node node(nextNodeId++, &op);
-      node.stores.push_back(&op);
-      auto *memref = cast<AffineStoreOp>(op).getMemRef();
-      memrefAccesses[memref].insert(node.id);
-      nodes.insert({node.id, node});
-    } else if (op.getNumRegions() != 0) {
-      // Return false if another region is found (not currently supported).
-      return false;
-    } else if (op.getNumResults() > 0 && !op.use_empty()) {
-      // Create graph node for top-level producer of SSA values, which
-      // could be used by loop nest nodes.
-      Node node(nextNodeId++, &op);
-      nodes.insert({node.id, node});
-    }
-  }
-
-  // Add dependence edges between nodes which produce SSA values and their
-  // users.
-  for (auto &idAndNode : nodes) {
-    const Node &node = idAndNode.second;
-    if (!node.loads.empty() || !node.stores.empty())
-      continue;
-    auto *opInst = node.op;
-    for (auto *value : opInst->getResults()) {
-      for (auto *user : value->getUsers()) {
-        SmallVector<AffineForOp, 4> loops;
-        getLoopIVs(*user, &loops);
-        if (loops.empty())
-          continue;
-        assert(forToNodeMap.count(loops[0].getOperation()) > 0);
-        unsigned userLoopNestId = forToNodeMap[loops[0].getOperation()];
-        addEdge(node.id, userLoopNestId, value);
-      }
-    }
-  }
-
-  // Walk memref access lists and add graph edges between dependent nodes.
-  for (auto &memrefAndList : memrefAccesses) {
-    unsigned n = memrefAndList.second.size();
-    for (unsigned i = 0; i < n; ++i) {
-      unsigned srcId = memrefAndList.second[i];
-      bool srcHasStore =
-          getNode(srcId)->getStoreOpCount(memrefAndList.first) > 0;
-      for (unsigned j = i + 1; j < n; ++j) {
-        unsigned dstId = memrefAndList.second[j];
-        bool dstHasStore =
-            getNode(dstId)->getStoreOpCount(memrefAndList.first) > 0;
-        if (srcHasStore || dstHasStore)
-          addEdge(srcId, dstId, memrefAndList.first);
-      }
-    }
-  }
-  return true;
-}
-
-// Removes load operations from 'srcLoads' which operate on 'memref', and
-// adds them to 'dstLoads'.
-static void moveLoadsAccessingMemrefTo(Value *memref,
-                                       SmallVectorImpl<Operation *> *srcLoads,
-                                       SmallVectorImpl<Operation *> *dstLoads) {
-  dstLoads->clear();
-  SmallVector<Operation *, 4> srcLoadsToKeep;
-  for (auto *load : *srcLoads) {
-    if (cast<AffineLoadOp>(load).getMemRef() == memref)
-      dstLoads->push_back(load);
-    else
-      srcLoadsToKeep.push_back(load);
-  }
-  srcLoads->swap(srcLoadsToKeep);
-}
-
-// Returns the innermost common loop depth for the set of operations in 'ops'.
-static unsigned getInnermostCommonLoopDepth(ArrayRef<Operation *> ops) {
-  unsigned numOps = ops.size();
-  assert(numOps > 0);
-
-  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
-  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
-  for (unsigned i = 0; i < numOps; ++i) {
-    getLoopIVs(*ops[i], &loops[i]);
-    loopDepthLimit =
-        std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
-  }
-
-  unsigned loopDepth = 0;
-  for (unsigned d = 0; d < loopDepthLimit; ++d) {
-    unsigned i;
-    for (i = 1; i < numOps; ++i) {
-      if (loops[i - 1][d] != loops[i][d])
-        break;
-    }
-    if (i != numOps)
-      break;
-    ++loopDepth;
-  }
-  return loopDepth;
-}
-
-// Returns the maximum loop depth at which no dependences between 'loadOpInsts'
-// and 'storeOpInsts' are satisfied.
-static unsigned getMaxLoopDepth(ArrayRef<Operation *> loadOpInsts,
-                                ArrayRef<Operation *> storeOpInsts) {
-  // Merge loads and stores into the same array.
-  SmallVector<Operation *, 2> ops(loadOpInsts.begin(), loadOpInsts.end());
-  ops.append(storeOpInsts.begin(), storeOpInsts.end());
-
-  // Compute the innermost common loop depth for loads and stores.
-  unsigned loopDepth = getInnermostCommonLoopDepth(ops);
-
-  // Return common loop depth for loads if there are no store ops.
-  if (storeOpInsts.empty())
-    return loopDepth;
-
-  // Check dependences on all pairs of ops in 'ops' and store the minimum
-  // loop depth at which a dependence is satisfied.
-  for (unsigned i = 0, e = ops.size(); i < e; ++i) {
-    auto *srcOpInst = ops[i];
-    MemRefAccess srcAccess(srcOpInst);
-    for (unsigned j = 0; j < e; ++j) {
-      auto *dstOpInst = ops[j];
-      MemRefAccess dstAccess(dstOpInst);
-
-      unsigned numCommonLoops =
-          getNumCommonSurroundingLoops(*srcOpInst, *dstOpInst);
-      for (unsigned d = 1; d <= numCommonLoops + 1; ++d) {
-        FlatAffineConstraints dependenceConstraints;
-        // TODO(andydavis) Cache dependence analysis results, check cache here.
-        DependenceResult result = checkMemrefAccessDependence(
-            srcAccess, dstAccess, d, &dependenceConstraints,
-            /*dependenceComponents=*/nullptr);
-        if (hasDependence(result)) {
-          // Store minimum loop depth and break because we want the min 'd' at
-          // which there is a dependence.
-          loopDepth = std::min(loopDepth, d - 1);
-          break;
-        }
-      }
-    }
-  }
-  return loopDepth;
-}
-
-// Sinks all sequential loops to the innermost levels (while preserving
-// relative order among them) and moves all parallel loops to the
-// outermost (while again preserving relative order among them).
-// This can increase the loop depth at which we can fuse a slice, since we are
-// pushing loop carried dependence to a greater depth in the loop nest.
-static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
-  assert(isa<AffineForOp>(node->op));
-  AffineForOp newRootForOp = sinkSequentialLoops(cast<AffineForOp>(node->op));
-  node->op = newRootForOp.getOperation();
-}
-
-//  TODO(mlir-team): improve/complete this when we have target data.
-unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
-  auto elementType = memRefType.getElementType();
-
-  unsigned sizeInBits;
-  if (elementType.isIntOrFloat()) {
-    sizeInBits = elementType.getIntOrFloatBitWidth();
-  } else {
-    auto vectorType = elementType.cast<VectorType>();
-    sizeInBits =
-        vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
-  }
-  return llvm::divideCeil(sizeInBits, 8);
-}
-
-// Creates and returns a private (single-user) memref for fused loop rooted
-// at 'forOp', with (potentially reduced) memref size based on the
-// MemRefRegion written to by 'srcStoreOpInst' at depth 'dstLoopDepth'.
-// TODO(bondhugula): consider refactoring the common code from generateDma and
-// this one.
-static Value *createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst,
-                                  unsigned dstLoopDepth,
-                                  Optional<unsigned> fastMemorySpace,
-                                  uint64_t localBufSizeThreshold) {
-  auto *forInst = forOp.getOperation();
-
-  // Create builder to insert alloc op just before 'forOp'.
-  OpBuilder b(forInst);
-  // Builder to create constants at the top level.
-  OpBuilder top(forInst->getParentOfType<FuncOp>().getBody());
-  // Create new memref type based on slice bounds.
-  auto *oldMemRef = cast<AffineStoreOp>(srcStoreOpInst).getMemRef();
-  auto oldMemRefType = oldMemRef->getType().cast<MemRefType>();
-  unsigned rank = oldMemRefType.getRank();
-
-  // Compute MemRefRegion for 'srcStoreOpInst' at depth 'dstLoopDepth'.
-  MemRefRegion region(srcStoreOpInst->getLoc());
-  bool validRegion = succeeded(region.compute(srcStoreOpInst, dstLoopDepth));
-  (void)validRegion;
-  assert(validRegion && "unexpected memref region failure");
-  SmallVector<int64_t, 4> newShape;
-  std::vector<SmallVector<int64_t, 4>> lbs;
-  SmallVector<int64_t, 8> lbDivisors;
-  lbs.reserve(rank);
-  // Query 'region' for 'newShape' and lower bounds of MemRefRegion accessed
-  // by 'srcStoreOpInst' at depth 'dstLoopDepth'.
-  Optional<int64_t> numElements =
-      region.getConstantBoundingSizeAndShape(&newShape, &lbs, &lbDivisors);
-  assert(numElements.hasValue() &&
-         "non-constant number of elts in local buffer");
-
-  const FlatAffineConstraints *cst = region.getConstraints();
-  // 'outerIVs' holds the values that this memory region is symbolic/parametric
-  // on; this would correspond to loop IVs surrounding the level at which the
-  // slice is being materialized.
-  SmallVector<Value *, 8> outerIVs;
-  cst->getIdValues(rank, cst->getNumIds(), &outerIVs);
-
-  // Build 'rank' AffineExprs from MemRefRegion 'lbs'
-  SmallVector<AffineExpr, 4> offsets;
-  offsets.reserve(rank);
-  for (unsigned d = 0; d < rank; ++d) {
-    assert(lbs[d].size() == cst->getNumCols() - rank && "incorrect bound size");
-
-    AffineExpr offset = top.getAffineConstantExpr(0);
-    for (unsigned j = 0, e = cst->getNumCols() - rank - 1; j < e; j++) {
-      offset = offset + lbs[d][j] * top.getAffineDimExpr(j);
-    }
-    assert(lbDivisors[d] > 0);
-    offset =
-        (offset + lbs[d][cst->getNumCols() - 1 - rank]).floorDiv(lbDivisors[d]);
-    offsets.push_back(offset);
-  }
-
-  // Create 'newMemRefType' using 'newShape' from MemRefRegion accessed
-  // by 'srcStoreOpInst'.
-  uint64_t bufSize =
-      getMemRefEltSizeInBytes(oldMemRefType) * numElements.getValue();
-  unsigned newMemSpace;
-  if (bufSize <= localBufSizeThreshold && fastMemorySpace.hasValue()) {
-    newMemSpace = fastMemorySpace.getValue();
-  } else {
-    newMemSpace = oldMemRefType.getMemorySpace();
-  }
-  auto newMemRefType = MemRefType::get(newShape, oldMemRefType.getElementType(),
-                                       {}, newMemSpace);
-  // Gather alloc operands for the dynamic dimensions of the memref.
-  SmallVector<Value *, 4> allocOperands;
-  unsigned dynamicDimCount = 0;
-  for (auto dimSize : oldMemRefType.getShape()) {
-    if (dimSize == -1)
-      allocOperands.push_back(
-          top.create<DimOp>(forOp.getLoc(), oldMemRef, dynamicDimCount++));
-  }
-
-  // Create new private memref for fused loop 'forOp'.
-  // TODO(andydavis) Create/move alloc ops for private memrefs closer to their
-  // consumer loop nests to reduce their live range. Currently they are added
-  // at the beginning of the function, because loop nests can be reordered
-  // during the fusion pass.
-  Value *newMemRef =
-      top.create<AllocOp>(forOp.getLoc(), newMemRefType, allocOperands);
-
-  // Build an AffineMap to remap access functions based on lower bound offsets.
-  SmallVector<AffineExpr, 4> remapExprs;
-  remapExprs.reserve(rank);
-  unsigned zeroOffsetCount = 0;
-  for (unsigned i = 0; i < rank; i++) {
-    if (auto constExpr = offsets[i].dyn_cast<AffineConstantExpr>())
-      if (constExpr.getValue() == 0)
-        ++zeroOffsetCount;
-    auto dimExpr = b.getAffineDimExpr(outerIVs.size() + i);
-
-    auto remapExpr =
-        simplifyAffineExpr(dimExpr - offsets[i], outerIVs.size() + rank, 0);
-    remapExprs.push_back(remapExpr);
-  }
-  auto indexRemap = zeroOffsetCount == rank
-                        ? AffineMap()
-                        : AffineMap::get(outerIVs.size() + rank, 0, remapExprs);
-  // Replace all users of 'oldMemRef' with 'newMemRef'.
-  LogicalResult res =
-      replaceAllMemRefUsesWith(oldMemRef, newMemRef, {}, indexRemap,
-                               /*extraOperands=*/outerIVs,
-                               /*symbolOperands=*/{},
-                               /*domInstFilter=*/&*forOp.getBody()->begin());
-  assert(succeeded(res) &&
-         "replaceAllMemrefUsesWith should always succeed here");
-  (void)res;
-  return newMemRef;
-}
-
-// Checks if node 'srcId' can be safely fused into node 'dstId'. Node 'srcId'
-// may write to multiple memrefs but it is required that only one of them,
-// 'srcLiveOutStoreOp', has output edges.
-// Returns true if 'dstNode's read/write region to 'memref' is a super set of
-// 'srcNode's write region to 'memref' and 'srcId' has only one output edge.
-// TODO(andydavis) Generalize this to handle more live in/out cases.
-static bool canFuseSrcWhichWritesToLiveOut(unsigned srcId, unsigned dstId,
-                                           AffineStoreOp srcLiveOutStoreOp,
-                                           MemRefDependenceGraph *mdg) {
-  assert(srcLiveOutStoreOp && "Expected a valid store op");
-  auto *dstNode = mdg->getNode(dstId);
-  Value *memref = srcLiveOutStoreOp.getMemRef();
-  // Return false if 'srcNode' has more than one output edge on 'memref'.
-  if (mdg->getOutEdgeCount(srcId, memref) > 1)
-    return false;
-
-  // Compute MemRefRegion 'srcWriteRegion' for 'srcStoreOp' on 'memref'.
-  MemRefRegion srcWriteRegion(srcLiveOutStoreOp.getLoc());
-  if (failed(srcWriteRegion.compute(srcLiveOutStoreOp, /*loopDepth=*/0))) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "Unable to compute MemRefRegion for source operation\n.");
-    return false;
-  }
-  SmallVector<int64_t, 4> srcShape;
-  // Query 'srcWriteRegion' for 'srcShape' and 'srcNumElements'.
-  // by 'srcStoreOp' at depth 'dstLoopDepth'.
-  Optional<int64_t> srcNumElements =
-      srcWriteRegion.getConstantBoundingSizeAndShape(&srcShape);
-  if (!srcNumElements.hasValue())
-    return false;
-
-  // Compute MemRefRegion 'dstRegion' for 'dstStore/LoadOpInst' on 'memref'.
-  // TODO(andydavis) Compute 'unionboundingbox' of all write regions (one for
-  // each store op in 'dstStoreOps').
-  SmallVector<Operation *, 2> dstStoreOps;
-  dstNode->getStoreOpsForMemref(memref, &dstStoreOps);
-  SmallVector<Operation *, 2> dstLoadOps;
-  dstNode->getLoadOpsForMemref(memref, &dstLoadOps);
-
-  auto *dstOpInst = dstStoreOps.empty() ? dstLoadOps[0] : dstStoreOps[0];
-  MemRefRegion dstRegion(dstOpInst->getLoc());
-  if (failed(dstRegion.compute(dstOpInst, /*loopDepth=*/0))) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "Unable to compute MemRefRegion for dest operation\n.");
-    return false;
-  }
-  SmallVector<int64_t, 4> dstShape;
-  // Query 'dstRegion' for 'dstShape' and 'dstNumElements'.
-  // by 'dstOpInst' at depth 'dstLoopDepth'.
-  Optional<int64_t> dstNumElements =
-      dstRegion.getConstantBoundingSizeAndShape(&dstShape);
-  if (!dstNumElements.hasValue())
-    return false;
-
-  // Return false if write region is not a superset of 'srcNodes' write
-  // region to 'memref'.
-  // TODO(andydavis) Check the shape and lower bounds here too.
-  if (srcNumElements != dstNumElements)
-    return false;
-  return true;
-}
-
-// Checks the profitability of fusing a backwards slice of the loop nest
-// surrounding 'srcOpInst' into the loop nest surrounding 'dstLoadOpInsts'.
-// The argument 'srcStoreOpInst' is used to calculate the storage reduction on
-// the memref being produced and consumed, which is an input to the cost model.
-// For producer-consumer fusion, 'srcStoreOpInst' will be the same as
-// 'srcOpInst', as we are slicing w.r.t to that producer.
-// For input-reuse fusion, 'srcOpInst' will be the src loop nest LoadOp which
-// reads from the same memref as dst loop nest load ops, and 'srcStoreOpInst'
-// will be the unique store op in the src node, which will be used to check
-// that the write region is the same after input-reuse fusion.
-// Returns true if it is profitable to fuse the candidate loop nests. Returns
-// false otherwise. `dstLoopDepth` is set to the most profitable depth at which
-// to materialize the source loop nest slice.
-// The profitability model executes the following steps:
-// *) Computes the backward computation slice at 'srcOpInst'. This
-//    computation slice of the loop nest surrounding 'srcOpInst' is
-//    represented by modified src loop bounds in 'sliceState', which are
-//    functions of loop IVs in the loop nest surrounding 'srcOpInst'.
-// *) Computes the cost of unfused src/dst loop nests (currently the cost of a
-//    loop nest is the total number of dynamic operation instances in the loop
-//    nest).
-// *) Computes the cost of fusing a slice of the src loop nest into the dst
-//    loop nest at various values of dst loop depth, attempting to fuse
-//    the largest computation slice at the maximal dst loop depth (closest to
-//    the load) to minimize reuse distance and potentially enable subsequent
-//    load/store forwarding.
-//    NOTE: If the dst loop nest includes multiple loads in 'dstLoadOpInsts' for
-//    the same memref as is written by 'srcOpInst', then the union of slice
-//    loop bounds is used to compute the slice and associated slice cost.
-//    NOTE: 'dstLoopDepth' refers to the loop depth within the destination loop
-//    nest, at which the src computation slice is inserted/fused.
-//    NOTE: We attempt to maximize the dst loop depth, but there are cases
-//    where a particular setting for 'dstLoopNest' might fuse an unsliced
-//    loop (within the src computation slice) at a depth which results in
-//    excessive recomputation (see unit tests for examples).
-// *) Compares the total cost of the unfused loop nests to the min cost fused
-//    loop nest computed in the previous step, and returns true if the latter
-//    is lower.
-static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
-                               ArrayRef<Operation *> dstLoadOpInsts,
-                               ArrayRef<Operation *> dstStoreOpInsts,
-                               ComputationSliceState *sliceState,
-                               unsigned *dstLoopDepth, bool maximalFusion) {
-  LLVM_DEBUG({
-    llvm::dbgs() << "Checking whether fusion is profitable between:\n";
-    llvm::dbgs() << " " << *srcOpInst << " and \n";
-    for (auto dstOpInst : dstLoadOpInsts) {
-      llvm::dbgs() << " " << *dstOpInst << "\n";
-    };
-  });
-
-  // Compute cost of sliced and unsliced src loop nest.
-  SmallVector<AffineForOp, 4> srcLoopIVs;
-  getLoopIVs(*srcOpInst, &srcLoopIVs);
-  unsigned numSrcLoopIVs = srcLoopIVs.size();
-
-  // Walk src loop nest and collect stats.
-  LoopNestStats srcLoopNestStats;
-  if (!getLoopNestStats(srcLoopIVs[0], &srcLoopNestStats))
-    return false;
-
-  // Compute cost of dst loop nest.
-  SmallVector<AffineForOp, 4> dstLoopIVs;
-  getLoopIVs(*dstLoadOpInsts[0], &dstLoopIVs);
-
-  LoopNestStats dstLoopNestStats;
-  if (!getLoopNestStats(dstLoopIVs[0], &dstLoopNestStats))
-    return false;
-
-  // Compute the maximum loop depth at which we can can insert the src slice
-  // and still satisfy dest loop nest dependences, for producer-consumer fusion.
-  unsigned maxDstLoopDepth =
-      (srcOpInst == srcStoreOpInst)
-          ? getMaxLoopDepth(dstLoadOpInsts, dstStoreOpInsts)
-          : dstLoopIVs.size();
-  if (maxDstLoopDepth == 0) {
-    LLVM_DEBUG(llvm::dbgs() << "Can't fuse: maxDstLoopDepth == 0 .\n");
-    return false;
-  }
-
-  // Search for min cost value for 'dstLoopDepth'. At each value of
-  // 'dstLoopDepth' from 'maxDstLoopDepth' to '1', compute computation slice
-  // bounds between 'srcOpInst' and each op in 'dstOpinsts' (taking the union
-  // of these bounds). Next the union slice bounds are used to calculate
-  // the cost of the slice and the cost of the slice inserted into the dst
-  // loop nest at 'dstLoopDepth'.
-  uint64_t minFusedLoopNestComputeCost = std::numeric_limits<uint64_t>::max();
-  double maxStorageReduction = 0.0;
-  Optional<uint64_t> sliceMemEstimate = None;
-
-  SmallVector<ComputationSliceState, 4> sliceStates;
-  sliceStates.resize(maxDstLoopDepth);
-  // The best loop depth at which to materialize the slice.
-  Optional<unsigned> bestDstLoopDepth = None;
-
-  // Compute op instance count for the src loop nest without iteration slicing.
-  uint64_t srcLoopNestCost = getComputeCost(srcLoopIVs[0], srcLoopNestStats);
-
-  // Compute src loop nest write region size.
-  MemRefRegion srcWriteRegion(srcStoreOpInst->getLoc());
-  if (failed(srcWriteRegion.compute(srcStoreOpInst, /*loopDepth=*/0))) {
-    LLVM_DEBUG(llvm::dbgs()
-               << "Unable to compute MemRefRegion for source operation\n.");
-    return false;
-  }
-
-  Optional<int64_t> maybeSrcWriteRegionSizeBytes =
-      srcWriteRegion.getRegionSize();
-  if (!maybeSrcWriteRegionSizeBytes.hasValue())
-    return false;
-  int64_t srcWriteRegionSizeBytes = maybeSrcWriteRegionSizeBytes.getValue();
-
-  // Compute op instance count for the src loop nest.
-  uint64_t dstLoopNestCost = getComputeCost(dstLoopIVs[0], dstLoopNestStats);
-
-  // Evaluate all depth choices for materializing the slice in the destination
-  // loop nest.
-  for (unsigned i = maxDstLoopDepth; i >= 1; --i) {
-    // Compute the union of slice bounds of all ops in 'dstLoadOpInsts'.
-    if (failed(mlir::computeSliceUnion({srcOpInst}, dstLoadOpInsts,
-                                       /*loopDepth=*/i,
-                                       /*numCommonLoops=*/0,
-                                       /*isBackwardSlice=*/true,
-                                       &sliceStates[i - 1]))) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "computeSliceUnion failed for loopDepth: " << i << "\n");
-      continue;
-    }
-
-    int64_t fusedLoopNestComputeCost;
-    if (!getFusionComputeCost(srcLoopIVs[0], srcLoopNestStats, dstLoopIVs[0],
-                              dstLoopNestStats, &sliceStates[i - 1],
-                              &fusedLoopNestComputeCost)) {
-      LLVM_DEBUG(llvm::dbgs() << "Unable to compute fusion compute cost.\n.");
-      continue;
-    }
-
-    double additionalComputeFraction =
-        fusedLoopNestComputeCost /
-            (static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -
-        1;
-
-    // Determine what the slice write MemRefRegion would be, if the src loop
-    // nest slice 'sliceStates[i - 1]' were to be inserted into the dst loop
-    // nest at loop depth 'i'
-    MemRefRegion sliceWriteRegion(srcStoreOpInst->getLoc());
-    if (failed(sliceWriteRegion.compute(srcStoreOpInst, /*loopDepth=*/0,
-                                        &sliceStates[i - 1]))) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "Failed to compute slice write region at loopDepth: " << i
-                 << "\n");
-      continue;
-    }
-
-    Optional<int64_t> maybeSliceWriteRegionSizeBytes =
-        sliceWriteRegion.getRegionSize();
-    if (!maybeSliceWriteRegionSizeBytes.hasValue() ||
-        maybeSliceWriteRegionSizeBytes.getValue() == 0) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "Failed to get slice write region size at loopDepth: " << i
-                 << "\n");
-      continue;
-    }
-    int64_t sliceWriteRegionSizeBytes =
-        maybeSliceWriteRegionSizeBytes.getValue();
-
-    // If we are fusing for reuse, check that write regions remain the same.
-    // TODO(andydavis) Write region check should check sizes and offsets in
-    // each dimension, so that we are sure they are covering the same memref
-    // region. Also, move this out to a isMemRefRegionSuperSet helper function.
-    if (srcOpInst != srcStoreOpInst &&
-        sliceWriteRegionSizeBytes != srcWriteRegionSizeBytes)
-      continue;
-
-    double storageReduction = static_cast<double>(srcWriteRegionSizeBytes) /
-                              static_cast<double>(sliceWriteRegionSizeBytes);
-
-    LLVM_DEBUG({
-      std::stringstream msg;
-      msg << "  evaluating fusion profitability at depth : " << i << "\n"
-          << std::fixed << std::setprecision(2)
-          << "   additional compute fraction: "
-          << 100.0 * additionalComputeFraction << "%\n"
-          << "   storage reduction factor: " << storageReduction << "x\n"
-          << "   fused nest cost: " << fusedLoopNestComputeCost << "\n"
-          << "   src write region size: " << srcWriteRegionSizeBytes << "\n"
-          << "   slice write region size: " << sliceWriteRegionSizeBytes
-          << "\n";
-      llvm::dbgs() << msg.str();
-    });
-
-    double computeToleranceThreshold =
-        clFusionAddlComputeTolerance.getNumOccurrences() > 0
-            ? clFusionAddlComputeTolerance
-            : LoopFusion::kComputeToleranceThreshold;
-
-    // TODO(b/123247369): This is a placeholder cost model.
-    // Among all choices that add an acceptable amount of redundant computation
-    // (as per computeToleranceThreshold), we will simply pick the one that
-    // reduces the intermediary size the most.
-    if ((storageReduction > maxStorageReduction) &&
-        (maximalFusion ||
-         (additionalComputeFraction < computeToleranceThreshold))) {
-      maxStorageReduction = storageReduction;
-      bestDstLoopDepth = i;
-      minFusedLoopNestComputeCost = fusedLoopNestComputeCost;
-      sliceMemEstimate = sliceWriteRegionSizeBytes;
-    }
-  }
-
-  // A simple cost model: fuse if it reduces the memory footprint. If
-  // -maximal-fusion is set, fuse nevertheless.
-
-  if (!maximalFusion && !bestDstLoopDepth.hasValue()) {
-    LLVM_DEBUG(
-        llvm::dbgs()
-        << "All fusion choices involve more than the threshold amount of "
-           "redundant computation; NOT fusing.\n");
-    return false;
-  }
-
-  if (!bestDstLoopDepth.hasValue()) {
-    LLVM_DEBUG(llvm::dbgs() << "no fusion depth could be evaluated.\n");
-    return false;
-  }
-
-  // Set dstLoopDepth based on best values from search.
-  *dstLoopDepth = bestDstLoopDepth.getValue();
-
-  LLVM_DEBUG(
-      llvm::dbgs() << " LoopFusion fusion stats:"
-                   << "\n  best loop depth: " << bestDstLoopDepth
-                   << "\n  src loop nest compute cost: " << srcLoopNestCost
-                   << "\n  dst loop nest compute cost: " << dstLoopNestCost
-                   << "\n  fused loop nest compute cost: "
-                   << minFusedLoopNestComputeCost << "\n");
-
-  auto dstMemSize = getMemoryFootprintBytes(dstLoopIVs[0]);
-  auto srcMemSize = getMemoryFootprintBytes(srcLoopIVs[0]);
-
-  Optional<double> storageReduction = None;
-
-  if (!maximalFusion) {
-    if (!dstMemSize.hasValue() || !srcMemSize.hasValue()) {
-      LLVM_DEBUG(
-          llvm::dbgs()
-          << "  fusion memory benefit cannot be evaluated; NOT fusing.\n");
-      return false;
-    }
-
-    auto srcMemSizeVal = srcMemSize.getValue();
-    auto dstMemSizeVal = dstMemSize.getValue();
-
-    assert(sliceMemEstimate.hasValue() && "expected value");
-    auto fusedMem = dstMemSizeVal + sliceMemEstimate.getValue();
-
-    LLVM_DEBUG(llvm::dbgs() << "   src mem: " << srcMemSizeVal << "\n"
-                            << "   dst mem: " << dstMemSizeVal << "\n"
-                            << "   fused mem: " << fusedMem << "\n"
-                            << "   slice mem: " << sliceMemEstimate << "\n");
-
-    if (static_cast<long>(fusedMem) > srcMemSizeVal + dstMemSizeVal) {
-      LLVM_DEBUG(llvm::dbgs() << "Fusion is not profitable; NOT fusing.\n");
-      return false;
-    }
-    storageReduction =
-        100.0 *
-        (1.0 - fusedMem / (static_cast<double>(srcMemSizeVal) + dstMemSizeVal));
-  }
-
-  double additionalComputeFraction =
-      100.0 * (minFusedLoopNestComputeCost /
-                   (static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -
-               1);
-  (void)additionalComputeFraction;
-  LLVM_DEBUG({
-    std::stringstream msg;
-    msg << " fusion is most profitable at depth " << *dstLoopDepth << " with "
-        << std::setprecision(2) << additionalComputeFraction
-        << "% redundant computation and a ";
-    msg << (storageReduction.hasValue()
-                ? std::to_string(storageReduction.getValue())
-                : "<unknown>");
-    msg << "% storage reduction.\n";
-    llvm::dbgs() << msg.str();
-  });
-
-  // Update return parameter 'sliceState' with 'bestSliceState'.
-  ComputationSliceState *bestSliceState = &sliceStates[*dstLoopDepth - 1];
-  sliceState->lbs = bestSliceState->lbs;
-  sliceState->ubs = bestSliceState->ubs;
-  sliceState->lbOperands = bestSliceState->lbOperands;
-  sliceState->ubOperands = bestSliceState->ubOperands;
-
-  // Canonicalize slice bound affine maps.
-  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
-    if (sliceState->lbs[i] != AffineMap()) {
-      canonicalizeMapAndOperands(&sliceState->lbs[i],
-                                 &sliceState->lbOperands[i]);
-    }
-    if (sliceState->ubs[i] != AffineMap()) {
-      canonicalizeMapAndOperands(&sliceState->ubs[i],
-                                 &sliceState->ubOperands[i]);
-    }
-  }
-  return true;
-}
-
-// GreedyFusion greedily fuses loop nests which have a producer/consumer or
-// input-reuse relationship on a memref, with the goal of improving locality.
-//
-// The steps of the producer-consumer fusion algorithm are as follows:
-//
-// *) A worklist is initialized with node ids from the dependence graph.
-// *) For each node id in the worklist:
-//   *) Pop an AffineForOp of the worklist. This 'dstAffineForOp' will be a
-//      candidate destination AffineForOp into which fusion will be attempted.
-//   *) Add each LoadOp currently in 'dstAffineForOp' into list 'dstLoadOps'.
-//   *) For each LoadOp in 'dstLoadOps' do:
-//      *) Look up dependent loop nests which have a single store op to the same
-//         memref.
-//      *) Check if dependences would be violated by the fusion.
-//      *) Get a computation slice of 'srcLoopNest', which adjusts its loop
-//         bounds to be functions of 'dstLoopNest' IVs and symbols.
-//      *) Fuse the 'srcLoopNest' computation slice into the 'dstLoopNest',
-//         at a loop depth determined by the cost model in 'isFusionProfitable'.
-//      *) Add the newly fused load/store operations to the state,
-//         and also add newly fused load ops to 'dstLoopOps' to be considered
-//         as fusion dst load ops in another iteration.
-//      *) Remove old src loop nest and its associated state.
-//
-// The steps of the input-reuse fusion algorithm are as follows:
-//
-// *) Initialize 'worklist' with node ids from the dependence graph.
-// *) For each 'dstNode' in the worklist:
-//   *) Find a candidate sibling node 'sibNode' to fuse with 'dstNode' which
-//      loads from the same memref, but which has no dependence paths to/from.
-//   *) Get a computation slice of 'sibLoopNest', which adjusts its loop
-//      bounds to be functions of 'dstLoopNest' IVs and symbols.
-//   *) Fuse the 'sibLoopNest' computation slice into the 'dstLoopNest',
-//      at a loop depth determined by the cost model in 'isFusionProfitable'.
-//      This function also checks that the memref write region of 'sibLoopNest',
-//      is preserved in the fused loop nest.
-//   *) Update graph state to reflect the fusion of 'sibNode' into 'dstNode'.
-//
-// Given a graph where top-level operations are vertices in the set 'V' and
-// edges in the set 'E' are dependences between vertices, this algorithm
-// takes O(V) time for initialization, and has runtime O(V + E).
-//
-// This greedy algorithm is not 'maximal' due to the current restriction of
-// fusing along single producer consumer edges, but there is a TODO to fix this.
-//
-// TODO(andydavis) Experiment with other fusion policies.
-struct GreedyFusion {
-public:
-  // The data dependence graph to traverse during fusion.
-  MemRefDependenceGraph *mdg;
-  // Worklist of graph nodes visited during the fusion pass.
-  SmallVector<unsigned, 8> worklist;
-  // Set of graph nodes which are present on the worklist.
-  llvm::SmallDenseSet<unsigned, 16> worklistSet;
-  // Parameter for local buffer size threshold.
-  unsigned localBufSizeThreshold;
-  // Parameter for fast memory space.
-  Optional<unsigned> fastMemorySpace;
-  // If true, ignore any additional (redundant) computation tolerance threshold
-  // that would have prevented fusion.
-  bool maximalFusion;
-
-  using Node = MemRefDependenceGraph::Node;
-
-  GreedyFusion(MemRefDependenceGraph *mdg, unsigned localBufSizeThreshold,
-               Optional<unsigned> fastMemorySpace, bool maximalFusion)
-      : mdg(mdg), localBufSizeThreshold(localBufSizeThreshold),
-        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
-
-  // Initializes 'worklist' with nodes from 'mdg'
-  void init() {
-    // TODO(andydavis) Add a priority queue for prioritizing nodes by different
-    // metrics (e.g. arithmetic intensity/flops-to-bytes ratio).
-    worklist.clear();
-    worklistSet.clear();
-    for (auto &idAndNode : mdg->nodes) {
-      const Node &node = idAndNode.second;
-      worklist.push_back(node.id);
-      worklistSet.insert(node.id);
-    }
-  }
-
-  // Run the GreedyFusion pass.
-  // *) First pass through the nodes fuses single-use producer nodes into their
-  //    unique consumer.
-  // *) Second pass fuses sibling nodes which share no dependence edges.
-  // *) Third pass fuses any remaining producer nodes into their users.
-  void run() {
-    // TODO(andydavis) Run this repeatedly until a fixed-point is reached.
-    fuseProducerConsumerNodes(/*maxSrcUserCount=*/1);
-    fuseSiblingNodes();
-    fuseProducerConsumerNodes(
-        /*maxSrcUserCount=*/std::numeric_limits<unsigned>::max());
-    eraseUnusedMemRefAllocations();
-  }
-
-  void fuseProducerConsumerNodes(unsigned maxSrcUserCount) {
-    init();
-    while (!worklist.empty()) {
-      unsigned dstId = worklist.back();
-      worklist.pop_back();
-      worklistSet.erase(dstId);
-
-      // Skip if this node was removed (fused into another node).
-      if (mdg->nodes.count(dstId) == 0)
-        continue;
-      // Get 'dstNode' into which to attempt fusion.
-      auto *dstNode = mdg->getNode(dstId);
-      // Skip if 'dstNode' is not a loop nest.
-      if (!isa<AffineForOp>(dstNode->op))
-        continue;
-      // Sink sequential loops in 'dstNode' (and thus raise parallel loops)
-      // while preserving relative order. This can increase the maximum loop
-      // depth at which we can fuse a slice of a producer loop nest into a
-      // consumer loop nest.
-      sinkSequentialLoops(dstNode);
-
-      SmallVector<Operation *, 4> loads = dstNode->loads;
-      SmallVector<Operation *, 4> dstLoadOpInsts;
-      DenseSet<Value *> visitedMemrefs;
-      while (!loads.empty()) {
-        // Get memref of load on top of the stack.
-        auto *memref = cast<AffineLoadOp>(loads.back()).getMemRef();
-        if (visitedMemrefs.count(memref) > 0)
-          continue;
-        visitedMemrefs.insert(memref);
-        // Move all loads in 'loads' accessing 'memref' to 'dstLoadOpInsts'.
-        moveLoadsAccessingMemrefTo(memref, &loads, &dstLoadOpInsts);
-        // Skip if no input edges along which to fuse.
-        if (mdg->inEdges.count(dstId) == 0)
-          continue;
-        // Iterate through in-edges for 'dstId' and src node id for any
-        // edges on 'memref'.
-        SmallVector<unsigned, 2> srcNodeIds;
-        for (auto &srcEdge : mdg->inEdges[dstId]) {
-          // Skip 'srcEdge' if not for 'memref'.
-          if (srcEdge.value != memref)
-            continue;
-          srcNodeIds.push_back(srcEdge.id);
-        }
-        for (unsigned srcId : srcNodeIds) {
-          // Skip if this node was removed (fused into another node).
-          if (mdg->nodes.count(srcId) == 0)
-            continue;
-          // Get 'srcNode' from which to attempt fusion into 'dstNode'.
-          auto *srcNode = mdg->getNode(srcId);
-          // Skip if 'srcNode' is not a loop nest.
-          if (!isa<AffineForOp>(srcNode->op))
-            continue;
-          // Skip if 'srcNode' has more than one live-out store to a
-          // function-local memref.
-          // TODO(andydavis) Support more generic multi-output src loop nests
-          // fusion.
-          auto srcStoreOp = mdg->getUniqueOutgoingStore(srcNode);
-          if (!srcStoreOp) {
-            // Get the src store op at the deepest loop depth.
-            // We will use 'LoopFusionUtils::canFuseLoops' to check fusion
-            // feasibility for loops with multiple stores.
-            unsigned maxLoopDepth = 0;
-            for (auto *op : srcNode->stores) {
-              auto storeOp = cast<AffineStoreOp>(op);
-              if (storeOp.getMemRef() != memref) {
-                srcStoreOp = nullptr;
-                break;
-              }
-              unsigned loopDepth = getNestingDepth(*storeOp);
-              if (loopDepth > maxLoopDepth) {
-                maxLoopDepth = loopDepth;
-                srcStoreOp = storeOp;
-              }
-            }
-            if (!srcStoreOp)
-              continue;
-          }
-
-          // Unique outgoing store found must write to 'memref' since 'memref'
-          // is the one that established the producer-consumer relationship
-          // between 'srcNode' and 'dstNode'.
-          assert(srcStoreOp.getMemRef() == memref &&
-                 "Found store to unexpected memref");
-
-          // Skip if 'srcNode' writes to any live in or escaping memrefs,
-          // and cannot be fused.
-          bool writesToLiveInOrOut =
-              mdg->writesToLiveInOrEscapingMemrefs(srcNode->id);
-          if (writesToLiveInOrOut &&
-              !canFuseSrcWhichWritesToLiveOut(srcId, dstId, srcStoreOp, mdg))
-            continue;
-
-          // Don't create a private memref if 'writesToLiveInOrOut'.
-          bool createPrivateMemref = !writesToLiveInOrOut;
-          // Don't create a private memref if 'srcNode' has in edges on
-          // 'memref', or if 'dstNode' has out edges on 'memref'.
-          if (mdg->getIncomingMemRefAccesses(srcNode->id, memref) > 0 ||
-              mdg->getOutEdgeCount(dstNode->id, memref) > 0) {
-            createPrivateMemref = false;
-          }
-
-          // Skip if 'srcNode' out edge count on 'memref' > 'maxSrcUserCount'.
-          if (mdg->getOutEdgeCount(srcNode->id, memref) > maxSrcUserCount)
-            continue;
-
-          // Compute an operation list insertion point for the fused loop
-          // nest which preserves dependences.
-          Operation *insertPointInst =
-              mdg->getFusedLoopNestInsertionPoint(srcNode->id, dstNode->id);
-          if (insertPointInst == nullptr)
-            continue;
-
-          // Compute the innermost common loop depth for dstNode loads/stores.
-          SmallVector<Operation *, 2> dstOps(dstNode->loads.begin(),
-                                             dstNode->loads.end());
-          dstOps.append(dstNode->stores.begin(), dstNode->stores.end());
-          unsigned dstLoopDepthTest = getInnermostCommonLoopDepth(dstOps);
-          // Check the feasibility of fusing src loop nest into dst loop nest
-          // at loop depths in range [1, dstLoopDepthTest].
-          // TODO(andydavis) Use slice union computation and union of memref
-          // read/write regions to cost model and fusion.
-          bool canFuse = false;
-          for (unsigned i = 1; i <= dstLoopDepthTest; ++i) {
-            ComputationSliceState sliceUnion;
-            FusionResult result = mlir::canFuseLoops(
-                cast<AffineForOp>(srcNode->op), cast<AffineForOp>(dstNode->op),
-                /*dstLoopDepth=*/i, &sliceUnion);
-            if (result.value == FusionResult::Success)
-              canFuse = true;
-          }
-
-          // Skip if fusion is not feasible at all loop depths.
-          if (!canFuse)
-            continue;
-
-          // Gather 'dstNode' store ops to 'memref'.
-          SmallVector<Operation *, 2> dstStoreOpInsts;
-          for (auto *storeOpInst : dstNode->stores)
-            if (cast<AffineStoreOp>(storeOpInst).getMemRef() == memref)
-              dstStoreOpInsts.push_back(storeOpInst);
-
-          unsigned bestDstLoopDepth;
-          mlir::ComputationSliceState sliceState;
-          // Check if fusion would be profitable.
-          if (!isFusionProfitable(srcStoreOp, srcStoreOp, dstLoadOpInsts,
-                                  dstStoreOpInsts, &sliceState,
-                                  &bestDstLoopDepth, maximalFusion))
-            continue;
-
-          // Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
-          auto sliceLoopNest = mlir::insertBackwardComputationSlice(
-              srcStoreOp, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
-          if (sliceLoopNest) {
-            LLVM_DEBUG(llvm::dbgs() << "\tslice loop nest:\n"
-                                    << *sliceLoopNest.getOperation() << "\n");
-            // Move 'dstAffineForOp' before 'insertPointInst' if needed.
-            auto dstAffineForOp = cast<AffineForOp>(dstNode->op);
-            if (insertPointInst != dstAffineForOp.getOperation()) {
-              dstAffineForOp.getOperation()->moveBefore(insertPointInst);
-            }
-            // Update edges between 'srcNode' and 'dstNode'.
-            mdg->updateEdges(srcNode->id, dstNode->id, memref,
-                             createPrivateMemref);
-
-            // Collect slice loop stats.
-            LoopNestStateCollector sliceCollector;
-            sliceCollector.collect(sliceLoopNest.getOperation());
-            // Promote single iteration slice loops to single IV value.
-            for (auto forOp : sliceCollector.forOps) {
-              promoteIfSingleIteration(forOp);
-            }
-            if (createPrivateMemref) {
-              // Create private memref for 'memref' in 'dstAffineForOp'.
-              SmallVector<Operation *, 4> storesForMemref;
-              for (auto *storeOpInst : sliceCollector.storeOpInsts) {
-                if (cast<AffineStoreOp>(storeOpInst).getMemRef() == memref)
-                  storesForMemref.push_back(storeOpInst);
-              }
-              // TODO(andydavis) Use union of memref write regions to compute
-              // private memref footprint.
-              auto *newMemRef = createPrivateMemRef(
-                  dstAffineForOp, storesForMemref[0], bestDstLoopDepth,
-                  fastMemorySpace, localBufSizeThreshold);
-              visitedMemrefs.insert(newMemRef);
-              // Create new node in dependence graph for 'newMemRef' alloc op.
-              unsigned newMemRefNodeId =
-                  mdg->addNode(newMemRef->getDefiningOp());
-              // Add edge from 'newMemRef' node to dstNode.
-              mdg->addEdge(newMemRefNodeId, dstId, newMemRef);
-            }
-
-            // Collect dst loop stats after memref privatization transformation.
-            LoopNestStateCollector dstLoopCollector;
-            dstLoopCollector.collect(dstAffineForOp.getOperation());
-
-            // Add new load ops to current Node load op list 'loads' to
-            // continue fusing based on new operands.
-            for (auto *loadOpInst : dstLoopCollector.loadOpInsts) {
-              auto *loadMemRef = cast<AffineLoadOp>(loadOpInst).getMemRef();
-              if (visitedMemrefs.count(loadMemRef) == 0)
-                loads.push_back(loadOpInst);
-            }
-
-            // Clear and add back loads and stores.
-            mdg->clearNodeLoadAndStores(dstNode->id);
-            mdg->addToNode(dstId, dstLoopCollector.loadOpInsts,
-                           dstLoopCollector.storeOpInsts);
-            // Remove old src loop nest if it no longer has outgoing dependence
-            // edges, and if it does not write to a memref which escapes the
-            // function. If 'writesToLiveInOrOut' is true, then 'srcNode' has
-            // been fused into 'dstNode' and write region of 'dstNode' covers
-            // the write region of 'srcNode', and 'srcNode' has no other users
-            // so it is safe to remove.
-            if (writesToLiveInOrOut || mdg->canRemoveNode(srcNode->id)) {
-              mdg->removeNode(srcNode->id);
-              srcNode->op->erase();
-            } else {
-              // Add remaining users of 'oldMemRef' back on the worklist (if not
-              // already there), as its replacement with a local/private memref
-              // has reduced dependences on 'oldMemRef' which may have created
-              // new fusion opportunities.
-              if (mdg->outEdges.count(srcNode->id) > 0) {
-                SmallVector<MemRefDependenceGraph::Edge, 2> oldOutEdges =
-                    mdg->outEdges[srcNode->id];
-                for (auto &outEdge : oldOutEdges) {
-                  if (outEdge.value == memref &&
-                      worklistSet.count(outEdge.id) == 0) {
-                    worklist.push_back(outEdge.id);
-                    worklistSet.insert(outEdge.id);
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // Visits each node in the graph, and for each node, attempts to fuse it with
-  // its sibling nodes (nodes which share a parent, but no dependence edges).
-  void fuseSiblingNodes() {
-    init();
-    while (!worklist.empty()) {
-      unsigned dstId = worklist.back();
-      worklist.pop_back();
-      worklistSet.erase(dstId);
-
-      // Skip if this node was removed (fused into another node).
-      if (mdg->nodes.count(dstId) == 0)
-        continue;
-      // Get 'dstNode' into which to attempt fusion.
-      auto *dstNode = mdg->getNode(dstId);
-      // Skip if 'dstNode' is not a loop nest.
-      if (!isa<AffineForOp>(dstNode->op))
-        continue;
-      // Attempt to fuse 'dstNode' with its sibling nodes in the graph.
-      fuseWithSiblingNodes(dstNode);
-    }
-  }
-
-  // Attempt to fuse 'dstNode' with sibling nodes in the graph.
-  void fuseWithSiblingNodes(Node *dstNode) {
-    DenseSet<unsigned> visitedSibNodeIds;
-    std::pair<unsigned, Value *> idAndMemref;
-    while (findSiblingNodeToFuse(dstNode, &visitedSibNodeIds, &idAndMemref)) {
-      unsigned sibId = idAndMemref.first;
-      Value *memref = idAndMemref.second;
-      // TODO(andydavis) Check that 'sibStoreOpInst' post-dominates all other
-      // stores to the same memref in 'sibNode' loop nest.
-      auto *sibNode = mdg->getNode(sibId);
-      // Compute an operation list insertion point for the fused loop
-      // nest which preserves dependences.
-      assert(sibNode->op->getBlock() == dstNode->op->getBlock());
-      Operation *insertPointInst =
-          sibNode->op->isBeforeInBlock(dstNode->op)
-              ? mdg->getFusedLoopNestInsertionPoint(sibNode->id, dstNode->id)
-              : mdg->getFusedLoopNestInsertionPoint(dstNode->id, sibNode->id);
-      if (insertPointInst == nullptr)
-        continue;
-
-      // Check if fusion would be profitable and at what depth.
-
-      // Get unique 'sibNode' load op to 'memref'.
-      SmallVector<Operation *, 2> sibLoadOpInsts;
-      sibNode->getLoadOpsForMemref(memref, &sibLoadOpInsts);
-      // Currently findSiblingNodeToFuse searches for siblings with one load.
-      assert(sibLoadOpInsts.size() == 1);
-      Operation *sibLoadOpInst = sibLoadOpInsts[0];
-      assert(!sibNode->stores.empty());
-      // TODO(andydavis) Choose the store which postdominates all other stores.
-      auto *sibStoreOpInst = sibNode->stores.back();
-
-      // Gather 'dstNode' load ops to 'memref'.
-      SmallVector<Operation *, 2> dstLoadOpInsts;
-      dstNode->getLoadOpsForMemref(memref, &dstLoadOpInsts);
-
-      // Gather 'dstNode' store ops to 'memref'.
-      SmallVector<Operation *, 2> dstStoreOpInsts;
-      dstNode->getStoreOpsForMemref(memref, &dstStoreOpInsts);
-
-      unsigned bestDstLoopDepth;
-      mlir::ComputationSliceState sliceState;
-
-      // Check if fusion would be profitable.
-      if (!isFusionProfitable(sibLoadOpInst, sibStoreOpInst, dstLoadOpInsts,
-                              dstStoreOpInsts, &sliceState, &bestDstLoopDepth,
-                              maximalFusion))
-        continue;
-
-      // Fuse computation slice of 'sibLoopNest' into 'dstLoopNest'.
-      auto sliceLoopNest = mlir::insertBackwardComputationSlice(
-          sibLoadOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
-      if (sliceLoopNest != nullptr) {
-        auto dstForInst = cast<AffineForOp>(dstNode->op);
-        // Update operation position of fused loop nest (if needed).
-        if (insertPointInst != dstForInst.getOperation()) {
-          dstForInst.getOperation()->moveBefore(insertPointInst);
-        }
-        // Update data dependence graph state post fusion.
-        updateStateAfterSiblingFusion(sliceLoopNest, sibNode, dstNode);
-      }
-    }
-  }
-
-  // Searches function argument uses and the graph from 'dstNode' looking for a
-  // fusion candidate sibling node which shares no dependences with 'dstNode'
-  // but which loads from the same memref. Returns true and sets
-  // 'idAndMemrefToFuse' on success. Returns false otherwise.
-  bool findSiblingNodeToFuse(Node *dstNode,
-                             DenseSet<unsigned> *visitedSibNodeIds,
-                             std::pair<unsigned, Value *> *idAndMemrefToFuse) {
-    // Returns true if 'sibNode' can be fused with 'dstNode' for input reuse
-    // on 'memref'.
-    auto canFuseWithSibNode = [&](Node *sibNode, Value *memref) {
-      // Skip if 'outEdge' is not a read-after-write dependence.
-      // TODO(andydavis) Remove restrict to single load op restriction.
-      if (sibNode->getLoadOpCount(memref) != 1)
-        return false;
-      // Skip if there exists a path of dependent edges between
-      // 'sibNode' and 'dstNode'.
-      if (mdg->hasDependencePath(sibNode->id, dstNode->id) ||
-          mdg->hasDependencePath(dstNode->id, sibNode->id))
-        return false;
-      // Skip sib node if it loads to (and stores from) the same memref on
-      // which it also has an input dependence edge.
-      DenseSet<Value *> loadAndStoreMemrefSet;
-      sibNode->getLoadAndStoreMemrefSet(&loadAndStoreMemrefSet);
-      if (llvm::any_of(loadAndStoreMemrefSet, [=](Value *memref) {
-            return mdg->getIncomingMemRefAccesses(sibNode->id, memref) > 0;
-          }))
-        return false;
-
-      // Check that all stores are to the same memref.
-      DenseSet<Value *> storeMemrefs;
-      for (auto *storeOpInst : sibNode->stores) {
-        storeMemrefs.insert(cast<AffineStoreOp>(storeOpInst).getMemRef());
-      }
-      if (storeMemrefs.size() != 1)
-        return false;
-      return true;
-    };
-
-    // Search for siblings which load the same memref function argument.
-    auto fn = dstNode->op->getParentOfType<FuncOp>();
-    for (unsigned i = 0, e = fn.getNumArguments(); i != e; ++i) {
-      for (auto *user : fn.getArgument(i)->getUsers()) {
-        if (auto loadOp = dyn_cast<AffineLoadOp>(user)) {
-          // Gather loops surrounding 'use'.
-          SmallVector<AffineForOp, 4> loops;
-          getLoopIVs(*user, &loops);
-          // Skip 'use' if it is not within a loop nest.
-          if (loops.empty())
-            continue;
-          Node *sibNode = mdg->getForOpNode(loops[0]);
-          assert(sibNode != nullptr);
-          // Skip 'use' if it not a sibling to 'dstNode'.
-          if (sibNode->id == dstNode->id)
-            continue;
-          // Skip 'use' if it has been visited.
-          if (visitedSibNodeIds->count(sibNode->id) > 0)
-            continue;
-          // Skip 'use' if it does not load from the same memref as 'dstNode'.
-          auto *memref = loadOp.getMemRef();
-          if (dstNode->getLoadOpCount(memref) == 0)
-            continue;
-          // Check if 'sibNode/dstNode' can be input-reuse fused on 'memref'.
-          if (canFuseWithSibNode(sibNode, memref)) {
-            visitedSibNodeIds->insert(sibNode->id);
-            idAndMemrefToFuse->first = sibNode->id;
-            idAndMemrefToFuse->second = memref;
-            return true;
-          }
-        }
-      }
-    }
-
-    // Search for siblings by following edges through an intermediate src node.
-    // Collect candidate 'dstNode' input edges in 'inEdges'.
-    SmallVector<MemRefDependenceGraph::Edge, 2> inEdges;
-    mdg->forEachMemRefInputEdge(
-        dstNode->id, [&](MemRefDependenceGraph::Edge inEdge) {
-          // Add 'inEdge' if it is a read-after-write dependence.
-          if (dstNode->getLoadOpCount(inEdge.value) > 0 &&
-              mdg->getNode(inEdge.id)->getStoreOpCount(inEdge.value) > 0)
-            inEdges.push_back(inEdge);
-        });
-
-    // Search for sibling nodes to fuse by visiting output edges from each input
-    // edge in 'inEdges'.
-    for (auto &inEdge : inEdges) {
-      // Collect candidate output edges from each node 'inEdge.id' in 'inEdges'.
-      SmallVector<MemRefDependenceGraph::Edge, 2> outEdges;
-      mdg->forEachMemRefOutputEdge(
-          inEdge.id, [&](MemRefDependenceGraph::Edge outEdge) {
-            unsigned sibNodeId = outEdge.id;
-            if (visitedSibNodeIds->count(sibNodeId) > 0)
-              return;
-            // Skip output edge if not a sibling using the same memref.
-            if (outEdge.id == dstNode->id || outEdge.value != inEdge.value)
-              return;
-            auto *sibNode = mdg->getNode(sibNodeId);
-            if (!isa<AffineForOp>(sibNode->op))
-              return;
-            // Check if 'sibNode/dstNode' can be input-reuse fused on 'memref'.
-            if (canFuseWithSibNode(sibNode, outEdge.value)) {
-              // Add candidate 'outEdge' to sibling node.
-              outEdges.push_back(outEdge);
-            }
-          });
-
-      // Add first candidate if any were returned.
-      if (!outEdges.empty()) {
-        visitedSibNodeIds->insert(outEdges[0].id);
-        idAndMemrefToFuse->first = outEdges[0].id;
-        idAndMemrefToFuse->second = outEdges[0].value;
-        return true;
-      }
-    }
-    return false;
-  }
-
-  void updateStateAfterSiblingFusion(AffineForOp sliceLoopNest, Node *sibNode,
-                                     Node *dstNode) {
-    // Update 'sibNode' and 'dstNode' input/output edges to reflect fusion.
-    mdg->updateEdges(sibNode->id, dstNode->id);
-
-    // Collect slice loop stats.
-    LoopNestStateCollector sliceCollector;
-    sliceCollector.collect(sliceLoopNest.getOperation());
-    // Promote single iteration slice loops to single IV value.
-    for (auto forOp : sliceCollector.forOps) {
-      promoteIfSingleIteration(forOp);
-    }
-
-    // Collect dst loop stats after memref privatization transformation.
-    auto dstForInst = cast<AffineForOp>(dstNode->op);
-    LoopNestStateCollector dstLoopCollector;
-    dstLoopCollector.collect(dstForInst.getOperation());
-    // Clear and add back loads and stores
-    mdg->clearNodeLoadAndStores(dstNode->id);
-    mdg->addToNode(dstNode->id, dstLoopCollector.loadOpInsts,
-                   dstLoopCollector.storeOpInsts);
-    // Remove old sibling loop nest if it no longer has outgoing dependence
-    // edges, and it does not write to a memref which escapes the
-    // function.
-    if (mdg->getOutEdgeCount(sibNode->id) == 0) {
-      mdg->removeNode(sibNode->id);
-      sibNode->op->erase();
-    }
-  }
-
-  // Clean up any allocs with no users.
-  void eraseUnusedMemRefAllocations() {
-    for (auto &pair : mdg->memrefEdgeCount) {
-      if (pair.second > 0)
-        continue;
-      auto *memref = pair.first;
-      // Skip if there exist other uses (return operation or function calls).
-      if (!memref->use_empty())
-        continue;
-      // Use list expected to match the dep graph info.
-      auto *op = memref->getDefiningOp();
-      if (isa_and_nonnull<AllocOp>(op))
-        op->erase();
-    }
-  }
-};
-
-} // end anonymous namespace
-
-void LoopFusion::runOnFunction() {
-  // Override if a command line argument was provided.
-  if (clFusionFastMemorySpace.getNumOccurrences() > 0) {
-    fastMemorySpace = clFusionFastMemorySpace.getValue();
-  }
-
-  // Override if a command line argument was provided.
-  if (clFusionLocalBufThreshold.getNumOccurrences() > 0) {
-    localBufSizeThreshold = clFusionLocalBufThreshold * 1024;
-  }
-
-  if (clMaximalLoopFusion.getNumOccurrences() > 0)
-    maximalFusion = clMaximalLoopFusion;
-
-  MemRefDependenceGraph g;
-  if (g.init(getFunction()))
-    GreedyFusion(&g, localBufSizeThreshold, fastMemorySpace, maximalFusion)
-        .run();
-}
-
-static PassRegistration<LoopFusion> pass("affine-loop-fusion",
-                                         "Fuse loop nests");
diff --git a/third_party/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp b/third_party/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
deleted file mode 100644
index 738524aa6ec..00000000000
--- a/third_party/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-//===- LoopInvariantCodeMotion.cpp - Code to perform loop fusion-----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements loop invariant code motion.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/Passes.h"
-
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopLikeInterface.h"
-#include "mlir/Transforms/SideEffectsInterface.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "licm"
-
-using namespace mlir;
-
-namespace {
-
-using SideEffecting = SideEffectsInterface::SideEffecting;
-
-/// Loop invariant code motion (LICM) pass.
-struct LoopInvariantCodeMotion : public OperationPass<LoopInvariantCodeMotion> {
-public:
-  void runOnOperation() override;
-};
-
-// Checks whether the given op can be hoisted by checking that
-// - the op and any of its contained operations do not depend on SSA values
-//   defined inside of the loop (by means of calling definedOutside).
-// - the op has no side-effects. If sideEffecting is Never, sideeffects of this
-//   op and its nested ops are ignored.
-static bool canBeHoisted(Operation *op,
-                         llvm::function_ref<bool(Value *)> definedOutside,
-                         SideEffecting sideEffecting,
-                         SideEffectsInterface &interface) {
-  // Check that dependencies are defined outside of loop.
-  if (!llvm::all_of(op->getOperands(), definedOutside))
-    return false;
-  // Check whether this op is side-effect free. If we already know that there
-  // can be no side-effects because the surrounding op has claimed so, we can
-  // (and have to) skip this step.
-  auto thisOpIsSideEffecting = sideEffecting;
-  if (thisOpIsSideEffecting != SideEffecting::Never) {
-    thisOpIsSideEffecting = interface.isSideEffecting(op);
-    // If the op always has sideeffects, we cannot hoist.
-    if (thisOpIsSideEffecting == SideEffecting::Always)
-      return false;
-  }
-  // Recurse into the regions for this op and check whether the contained ops
-  // can be hoisted.
-  for (auto &region : op->getRegions()) {
-    for (auto &block : region.getBlocks()) {
-      for (auto &innerOp : block) {
-        if (innerOp.isKnownTerminator())
-          continue;
-        if (!canBeHoisted(&innerOp, definedOutside, thisOpIsSideEffecting,
-                          interface))
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
-static LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike,
-                                           SideEffectsInterface &interface) {
-  auto &loopBody = looplike.getLoopBody();
-
-  // We use two collections here as we need to preserve the order for insertion
-  // and this is easiest.
-  SmallPtrSet<Operation *, 8> willBeMovedSet;
-  SmallVector<Operation *, 8> opsToMove;
-
-  // Helper to check whether an operation is loop invariant wrt. SSA properties.
-  auto isDefinedOutsideOfBody = [&](Value *value) {
-    auto definingOp = value->getDefiningOp();
-    return (definingOp && !!willBeMovedSet.count(definingOp)) ||
-           looplike.isDefinedOutsideOfLoop(value);
-  };
-
-  // Do not use walk here, as we do not want to go into nested regions and hoist
-  // operations from there. These regions might have semantics unknown to this
-  // rewriting. If the nested regions are loops, they will have been processed.
-  for (auto &block : loopBody) {
-    for (auto &op : block.without_terminator()) {
-      if (canBeHoisted(&op, isDefinedOutsideOfBody,
-                       mlir::SideEffectsDialectInterface::Recursive,
-                       interface)) {
-        opsToMove.push_back(&op);
-        willBeMovedSet.insert(&op);
-      }
-    }
-  }
-
-  // For all instructions that we found to be invariant, move outside of the
-  // loop.
-  auto result = looplike.moveOutOfLoop(opsToMove);
-  LLVM_DEBUG(looplike.print(llvm::dbgs() << "Modified loop\n"));
-  return result;
-}
-
-} // end anonymous namespace
-
-void LoopInvariantCodeMotion::runOnOperation() {
-  SideEffectsInterface interface(&getContext());
-  // Walk through all loops in a function in innermost-loop-first order. This
-  // way, we first LICM from the inner loop, and place the ops in
-  // the outer loop, which in turn can be further LICM'ed.
-  getOperation()->walk([&](Operation *op) {
-    if (auto looplike = dyn_cast<LoopLikeOpInterface>(op)) {
-      LLVM_DEBUG(op->print(llvm::dbgs() << "\nOriginal loop\n"));
-      if (failed(moveLoopInvariantCode(looplike, interface)))
-        signalPassFailure();
-    }
-  });
-}
-
-// Include the generated code for the loop-like interface here, as it otherwise
-// has no compilation unit. This works as loop-invariant code motion is the
-// only user of that interface.
-#include "mlir/Transforms/LoopLikeInterface.cpp.inc"
-
-std::unique_ptr<Pass> mlir::createLoopInvariantCodeMotionPass() {
-  return std::make_unique<LoopInvariantCodeMotion>();
-}
-
-static PassRegistration<LoopInvariantCodeMotion>
-    pass("loop-invariant-code-motion",
-         "Hoist loop invariant instructions outside of the loop");
diff --git a/third_party/mlir/lib/Transforms/LoopTiling.cpp b/third_party/mlir/lib/Transforms/LoopTiling.cpp
deleted file mode 100644
index 10654783aa9..00000000000
--- a/third_party/mlir/lib/Transforms/LoopTiling.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-//===- LoopTiling.cpp --- Loop tiling pass ------------------------------*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to tile loop nests.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-using namespace mlir;
-
-#define DEBUG_TYPE "affine-loop-tile"
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long>
-    clCacheSizeKiB("tile-cache-size",
-                   llvm::cl::desc("Set size of cache to tile for in KiB"),
-                   llvm::cl::cat(clOptionsCategory));
-
-// Tile size to use for all loops (overrides -tile-sizes if provided).
-static llvm::cl::opt<unsigned>
-    clTileSize("tile-size", llvm::cl::desc("Use this tile size for all loops"),
-               llvm::cl::cat(clOptionsCategory));
-
-// List of tile sizes. If any of them aren't provided, they are filled with
-// clTileSize / kDefaultTileSize.
-static llvm::cl::list<unsigned> clTileSizes(
-    "tile-sizes",
-    llvm::cl::desc(
-        "List of tile sizes for each perfect nest (overridden by -tile-size)"),
-    llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
-
-namespace {
-
-/// A pass to perform loop tiling on all suitable loop nests of a Function.
-struct LoopTiling : public FunctionPass<LoopTiling> {
-  explicit LoopTiling(uint64_t cacheSizeBytes = kDefaultCacheMemCapacity,
-                      bool avoidMaxMinBounds = true)
-      : cacheSizeBytes(cacheSizeBytes), avoidMaxMinBounds(avoidMaxMinBounds) {}
-
-  void runOnFunction() override;
-  void getTileSizes(ArrayRef<AffineForOp> band,
-                    SmallVectorImpl<unsigned> *tileSizes);
-
-  // Default tile size if nothing is provided.
-  constexpr static unsigned kDefaultTileSize = 4;
-  constexpr static uint64_t kDefaultCacheMemCapacity = 512 * 1024UL;
-
-  // Capacity of the cache to tile for.
-  uint64_t cacheSizeBytes;
-  // If true, tile sizes are set to avoid max/min in bounds if possible.
-  bool avoidMaxMinBounds;
-};
-
-} // end anonymous namespace
-
-/// Creates a pass to perform loop tiling on all suitable loop nests of a
-/// Function.
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createLoopTilingPass(uint64_t cacheSizeBytes) {
-  return std::make_unique<LoopTiling>(cacheSizeBytes);
-}
-
-// Move the loop body of AffineForOp 'src' from 'src' into the specified
-// location in destination's body, ignoring the terminator.
-static inline void moveLoopBody(AffineForOp src, AffineForOp dest,
-                                Block::iterator loc) {
-  auto &insts = src.getBody()->getOperations();
-  dest.getBody()->getOperations().splice(loc, insts, insts.begin(),
-                                         std::prev(insts.end()));
-}
-
-// Move the loop body of AffineForOp 'src' from 'src' to the start of dest's
-// body.
-static inline void moveLoopBody(AffineForOp src, AffineForOp dest) {
-  moveLoopBody(src, dest, dest.getBody()->begin());
-}
-
-/// Constructs and sets new loop bounds after tiling for the case of
-/// hyper-rectangular index sets, where the bounds of one dimension do not
-/// depend on other dimensions. Bounds of each dimension can thus be treated
-/// independently, and deriving the new bounds is much simpler and faster
-/// than for the case of tiling arbitrary polyhedral shapes.
-static void
-constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
-                                MutableArrayRef<AffineForOp> newLoops,
-                                ArrayRef<unsigned> tileSizes) {
-  assert(!origLoops.empty());
-  assert(origLoops.size() == tileSizes.size());
-
-  OpBuilder b(origLoops[0].getOperation());
-  unsigned width = origLoops.size();
-
-  // Bounds for tile space loops.
-  for (unsigned i = 0; i < width; i++) {
-    auto lbOperands = origLoops[i].getLowerBoundOperands();
-    auto ubOperands = origLoops[i].getUpperBoundOperands();
-    SmallVector<Value *, 4> newLbOperands(lbOperands);
-    SmallVector<Value *, 4> newUbOperands(ubOperands);
-    newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
-    newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
-    newLoops[i].setStep(tileSizes[i]);
-  }
-  // Bounds for intra-tile loops.
-  for (unsigned i = 0; i < width; i++) {
-    int64_t largestDiv = getLargestDivisorOfTripCount(origLoops[i]);
-    auto mayBeConstantCount = getConstantTripCount(origLoops[i]);
-    // The lower bound is just the tile-space loop.
-    AffineMap lbMap = b.getDimIdentityMap();
-    newLoops[width + i].setLowerBound(
-        /*operands=*/newLoops[i].getInductionVar(), lbMap);
-
-    // Set the upper bound.
-    if (mayBeConstantCount.hasValue() &&
-        mayBeConstantCount.getValue() < tileSizes[i]) {
-      // Trip count is less than tile size; upper bound is the trip count.
-      auto ubMap = b.getConstantAffineMap(mayBeConstantCount.getValue());
-      newLoops[width + i].setUpperBoundMap(ubMap);
-    } else if (largestDiv % tileSizes[i] != 0) {
-      // Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
-      // Construct the upper bound map; the operands are the original operands
-      // with 'i' (tile-space loop) appended to it. The new upper bound map is
-      // the original one with an additional expression i + tileSize appended.
-      auto ub = origLoops[i].getUpperBound();
-      SmallVector<Value *, 4> ubOperands;
-      ubOperands.reserve(ub.getNumOperands() + 1);
-      auto origUbMap = ub.getMap();
-      // Add dim operands from original upper bound.
-      for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j) {
-        ubOperands.push_back(ub.getOperand(j));
-      }
-      // Add dim operand for new loop upper bound.
-      ubOperands.push_back(newLoops[i].getInductionVar());
-      // Add symbol operands from original upper bound.
-      for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j) {
-        ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
-      }
-      SmallVector<AffineExpr, 4> boundExprs;
-      boundExprs.reserve(1 + origUbMap.getNumResults());
-      auto dim = b.getAffineDimExpr(origUbMap.getNumDims());
-      // The new upper bound map is the original one with an additional
-      // expression i + tileSize appended.
-      boundExprs.push_back(dim + tileSizes[i]);
-      boundExprs.append(origUbMap.getResults().begin(),
-                        origUbMap.getResults().end());
-      auto ubMap = AffineMap::get(origUbMap.getNumDims() + 1,
-                                  origUbMap.getNumSymbols(), boundExprs);
-      newLoops[width + i].setUpperBound(/*operands=*/ubOperands, ubMap);
-    } else {
-      // No need of the min expression.
-      auto dim = b.getAffineDimExpr(0);
-      auto ubMap = AffineMap::get(1, 0, dim + tileSizes[i]);
-      newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
-    }
-  }
-}
-
-/// Tiles the specified band of perfectly nested loops creating tile-space loops
-/// and intra-tile loops. A band is a contiguous set of loops.
-//  TODO(bondhugula): handle non hyper-rectangular spaces.
-LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
-                                ArrayRef<unsigned> tileSizes) {
-  assert(!band.empty());
-  assert(band.size() == tileSizes.size() && "Incorrect number of tile sizes");
-
-  // Check if the supplied for op's are all successively nested.
-  for (unsigned i = 1, e = band.size(); i < e; i++) {
-    assert(band[i].getParentOp() == band[i - 1].getOperation());
-  }
-
-  auto origLoops = band;
-
-  AffineForOp rootAffineForOp = origLoops[0];
-  auto loc = rootAffineForOp.getLoc();
-  // Note that width is at least one since band isn't empty.
-  unsigned width = band.size();
-
-  SmallVector<AffineForOp, 12> newLoops(2 * width);
-  AffineForOp innermostPointLoop;
-
-  // The outermost among the loops as we add more..
-  auto *topLoop = rootAffineForOp.getOperation();
-
-  // Add intra-tile (or point) loops.
-  for (unsigned i = 0; i < width; i++) {
-    OpBuilder b(topLoop);
-    // Loop bounds will be set later.
-    auto pointLoop = b.create<AffineForOp>(loc, 0, 0);
-    pointLoop.getBody()->getOperations().splice(
-        pointLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
-        topLoop);
-    newLoops[2 * width - 1 - i] = pointLoop;
-    topLoop = pointLoop.getOperation();
-    if (i == 0)
-      innermostPointLoop = pointLoop;
-  }
-
-  // Add tile space loops;
-  for (unsigned i = width; i < 2 * width; i++) {
-    OpBuilder b(topLoop);
-    // Loop bounds will be set later.
-    auto tileSpaceLoop = b.create<AffineForOp>(loc, 0, 0);
-    tileSpaceLoop.getBody()->getOperations().splice(
-        tileSpaceLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
-        topLoop);
-    newLoops[2 * width - i - 1] = tileSpaceLoop;
-    topLoop = tileSpaceLoop.getOperation();
-  }
-
-  // Move the loop body of the original nest to the new one.
-  moveLoopBody(origLoops[origLoops.size() - 1], innermostPointLoop);
-
-  SmallVector<Value *, 8> origLoopIVs;
-  extractForInductionVars(band, &origLoopIVs);
-  SmallVector<Optional<Value *>, 6> ids(origLoopIVs.begin(), origLoopIVs.end());
-  FlatAffineConstraints cst;
-  getIndexSet(band, &cst);
-
-  if (!cst.isHyperRectangular(0, width)) {
-    rootAffineForOp.emitError("tiled code generation unimplemented for the "
-                              "non-hyperrectangular case");
-    return failure();
-  }
-
-  constructTiledIndexSetHyperRect(origLoops, newLoops, tileSizes);
-  // In this case, the point loop IVs just replace the original ones.
-  for (unsigned i = 0; i < width; i++) {
-    origLoopIVs[i]->replaceAllUsesWith(newLoops[i + width].getInductionVar());
-  }
-
-  // Erase the old loop nest.
-  rootAffineForOp.erase();
-
-  return success();
-}
-
-// Identify valid and profitable bands of loops to tile. This is currently just
-// a temporary placeholder to test the mechanics of tiled code generation.
-// Returns all maximal outermost perfect loop nests to tile.
-static void getTileableBands(FuncOp f,
-                             std::vector<SmallVector<AffineForOp, 6>> *bands) {
-  // Get maximal perfect nest of 'affine.for' insts starting from root
-  // (inclusive).
-  auto getMaximalPerfectLoopNest = [&](AffineForOp root) {
-    SmallVector<AffineForOp, 6> band;
-    getPerfectlyNestedLoops(band, root);
-    bands->push_back(band);
-  };
-
-  for (auto &block : f)
-    for (auto &op : block)
-      if (auto forOp = dyn_cast<AffineForOp>(op))
-        getMaximalPerfectLoopNest(forOp);
-}
-
-// Reduce each tile size to the largest divisor of the corresponding trip count
-// (if the trip count is known).
-static void adjustToDivisorsOfTripCounts(ArrayRef<AffineForOp> band,
-                                         SmallVectorImpl<unsigned> *tileSizes) {
-  assert(band.size() == tileSizes->size() && "invalid tile size count");
-  for (unsigned i = 0, e = band.size(); i < e; i++) {
-    unsigned &tSizeAdjusted = (*tileSizes)[i];
-    auto mayConst = getConstantTripCount(band[i]);
-    if (!mayConst.hasValue())
-      continue;
-    // Adjust the tile size to largest factor of the trip count less than
-    // tSize.
-    uint64_t constTripCount = mayConst.getValue();
-    if (constTripCount > 1 && tSizeAdjusted > constTripCount / 2)
-      tSizeAdjusted = constTripCount / 2;
-    while (constTripCount % tSizeAdjusted != 0)
-      tSizeAdjusted--;
-  }
-}
-
-// Returns tile sizes to use. Checks CL options; if none are specified, sets it
-// based on a simple model that looks at the memory footprint and determines
-// tile sizes assuming identity accesses / 1:1 tile size proportional footprint
-// along each of the dimensions being tiled.
-// TODO(mlir-team): evolve this model. Tile size determination is a large area
-// to play with in general.
-void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
-                              SmallVectorImpl<unsigned> *tileSizes) {
-  if (band.empty())
-    return;
-
-  tileSizes->resize(band.size());
-
-  // Use clTileSize for all loops if specified.
-  if (clTileSize.getNumOccurrences() > 0) {
-    std::fill(tileSizes->begin(), tileSizes->end(), clTileSize);
-    return;
-  }
-
-  // Use clTileSizes and fill them with default tile size if it's short.
-  if (!clTileSizes.empty()) {
-    std::fill(tileSizes->begin(), tileSizes->end(),
-              LoopTiling::kDefaultTileSize);
-    std::copy(clTileSizes.begin(),
-              clTileSizes.begin() + std::min(clTileSizes.size(), band.size()),
-              tileSizes->begin());
-    return;
-  }
-
-  // The first loop in the band.
-  auto rootForOp = band[0];
-  (void)rootForOp;
-
-  // Obtain memory footprint and set tile sizes so that a tile fits in
-  // the cache size. This is an approximation with the assumption that the
-  // footprint increases with the tile size linearly in that dimension (i.e.,
-  // assumes one-to-one access function).
-  auto fp = getMemoryFootprintBytes(band[0], 0);
-  if (!fp.hasValue()) {
-    // Fill with default tile sizes if footprint is unknown.
-    std::fill(tileSizes->begin(), tileSizes->end(),
-              LoopTiling::kDefaultTileSize);
-    if (avoidMaxMinBounds)
-      adjustToDivisorsOfTripCounts(band, tileSizes);
-    LLVM_DEBUG(
-        rootForOp.emitWarning("memory footprint unknown: using default tile "
-                              "sizes adjusted to trip count divisors"));
-    return;
-  }
-
-  // Check how many times larger the cache size is when compared to footprint.
-  uint64_t excessFactor = llvm::divideCeil(fp.getValue(), cacheSizeBytes);
-  if (excessFactor <= 1) {
-    // No need of any tiling - set tile size to 1.
-    std::fill(tileSizes->begin(), tileSizes->end(), 1);
-    return;
-  }
-
-  // Divide all loops equally in an attempt to reduce footprint.
-  // TODO(bondhugula): this is approximate. Ideally, obtain reuse factor /
-  // profitability along each dimension and weight tile sizes based on that as
-  // one possible approach. Or compute a polynomial in tile sizes and solve for
-  // it.
-
-  // For an n-d tileable band, compute n^th root of the excess.
-  unsigned tSize =
-      static_cast<unsigned>(floorl(std::pow(excessFactor, 1.0 / band.size())));
-  // We'll keep a running product to determine the last tile size better.
-  unsigned cumulProductOfTileSizes = 1;
-  for (unsigned i = 0, e = band.size(); i < e; i++) {
-    if (i < e - 1)
-      (*tileSizes)[i] = tSize;
-    else
-      // Set last tile size to cover the balance.
-      (*tileSizes)[i] = std::max(
-          1U, static_cast<unsigned>(excessFactor / cumulProductOfTileSizes));
-    cumulProductOfTileSizes *= (*tileSizes)[i];
-  }
-  if (avoidMaxMinBounds)
-    adjustToDivisorsOfTripCounts(band, tileSizes);
-}
-
-void LoopTiling::runOnFunction() {
-  // Override cache size if provided on command line.
-  if (clCacheSizeKiB.getNumOccurrences() > 0)
-    cacheSizeBytes = clCacheSizeKiB * 1024;
-
-  // Bands of loops to tile.
-  std::vector<SmallVector<AffineForOp, 6>> bands;
-  getTileableBands(getFunction(), &bands);
-
-  for (auto &band : bands) {
-    // Set up tile sizes; fill missing tile sizes at the end with default tile
-    // size or clTileSize if one was provided.
-    SmallVector<unsigned, 6> tileSizes;
-    getTileSizes(band, &tileSizes);
-    if (llvm::DebugFlag) {
-      auto diag = band[0].emitRemark("using tile sizes [");
-      for (auto tSize : tileSizes)
-        diag << tSize << " ";
-      diag << "]\n";
-    }
-    if (failed(tileCodeGen(band, tileSizes)))
-      return signalPassFailure();
-  }
-}
-
-constexpr unsigned LoopTiling::kDefaultTileSize;
-constexpr uint64_t LoopTiling::kDefaultCacheMemCapacity;
-
-static PassRegistration<LoopTiling> pass("affine-loop-tile", "Tile loop nests");
diff --git a/third_party/mlir/lib/Transforms/LoopUnroll.cpp b/third_party/mlir/lib/Transforms/LoopUnroll.cpp
deleted file mode 100644
index 40f48ada4d7..00000000000
--- a/third_party/mlir/lib/Transforms/LoopUnroll.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-//===- LoopUnroll.cpp - Code to perform loop unrolling --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements loop unrolling.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/Passes.h"
-
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-using namespace mlir;
-
-#define DEBUG_TYPE "affine-loop-unroll"
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-// Loop unrolling factor.
-static llvm::cl::opt<unsigned> clUnrollFactor(
-    "unroll-factor",
-    llvm::cl::desc("Use this unroll factor for all loops being unrolled"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool> clUnrollFull("unroll-full",
-                                        llvm::cl::desc("Fully unroll loops"),
-                                        llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<unsigned> clUnrollNumRepetitions(
-    "unroll-num-reps",
-    llvm::cl::desc("Unroll innermost loops repeatedly this many times"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<unsigned> clUnrollFullThreshold(
-    "unroll-full-threshold", llvm::cl::Hidden,
-    llvm::cl::desc(
-        "Unroll all loops with trip count less than or equal to this"),
-    llvm::cl::cat(clOptionsCategory));
-
-namespace {
-/// Loop unrolling pass. Unrolls all innermost loops unless full unrolling and a
-/// full unroll threshold was specified, in which case, fully unrolls all loops
-/// with trip count less than the specified threshold. The latter is for testing
-/// purposes, especially for testing outer loop unrolling.
-struct LoopUnroll : public FunctionPass<LoopUnroll> {
-  const Optional<unsigned> unrollFactor;
-  const Optional<bool> unrollFull;
-  // Callback to obtain unroll factors; if this has a callable target, takes
-  // precedence over command-line argument or passed argument.
-  const std::function<unsigned(AffineForOp)> getUnrollFactor;
-
-  explicit LoopUnroll(
-      Optional<unsigned> unrollFactor = None, Optional<bool> unrollFull = None,
-      const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr)
-      : unrollFactor(unrollFactor), unrollFull(unrollFull),
-        getUnrollFactor(getUnrollFactor) {}
-
-  void runOnFunction() override;
-
-  /// Unroll this for op. Returns failure if nothing was done.
-  LogicalResult runOnAffineForOp(AffineForOp forOp);
-
-  static const unsigned kDefaultUnrollFactor = 4;
-};
-} // end anonymous namespace
-
-void LoopUnroll::runOnFunction() {
-  // Gathers all innermost loops through a post order pruned walk.
-  struct InnermostLoopGatherer {
-    // Store innermost loops as we walk.
-    std::vector<AffineForOp> loops;
-
-    void walkPostOrder(FuncOp f) {
-      for (auto &b : f)
-        walkPostOrder(b.begin(), b.end());
-    }
-
-    bool walkPostOrder(Block::iterator Start, Block::iterator End) {
-      bool hasInnerLoops = false;
-      // We need to walk all elements since all innermost loops need to be
-      // gathered as opposed to determining whether this list has any inner
-      // loops or not.
-      while (Start != End)
-        hasInnerLoops |= walkPostOrder(&(*Start++));
-      return hasInnerLoops;
-    }
-    bool walkPostOrder(Operation *opInst) {
-      bool hasInnerLoops = false;
-      for (auto &region : opInst->getRegions())
-        for (auto &block : region)
-          hasInnerLoops |= walkPostOrder(block.begin(), block.end());
-      if (isa<AffineForOp>(opInst)) {
-        if (!hasInnerLoops)
-          loops.push_back(cast<AffineForOp>(opInst));
-        return true;
-      }
-      return hasInnerLoops;
-    }
-  };
-
-  if (clUnrollFull.getNumOccurrences() > 0 &&
-      clUnrollFullThreshold.getNumOccurrences() > 0) {
-    // Store short loops as we walk.
-    std::vector<AffineForOp> loops;
-
-    // Gathers all loops with trip count <= minTripCount. Do a post order walk
-    // so that loops are gathered from innermost to outermost (or else unrolling
-    // an outer one may delete gathered inner ones).
-    getFunction().walk([&](AffineForOp forOp) {
-      Optional<uint64_t> tripCount = getConstantTripCount(forOp);
-      if (tripCount.hasValue() && tripCount.getValue() <= clUnrollFullThreshold)
-        loops.push_back(forOp);
-    });
-    for (auto forOp : loops)
-      loopUnrollFull(forOp);
-    return;
-  }
-
-  unsigned numRepetitions = clUnrollNumRepetitions.getNumOccurrences() > 0
-                                ? clUnrollNumRepetitions
-                                : 1;
-  // If the call back is provided, we will recurse until no loops are found.
-  FuncOp func = getFunction();
-  for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
-    InnermostLoopGatherer ilg;
-    ilg.walkPostOrder(func);
-    auto &loops = ilg.loops;
-    if (loops.empty())
-      break;
-    bool unrolled = false;
-    for (auto forOp : loops)
-      unrolled |= succeeded(runOnAffineForOp(forOp));
-    if (!unrolled)
-      // Break out if nothing was unrolled.
-      break;
-  }
-}
-
-/// Unrolls a 'affine.for' op. Returns success if the loop was unrolled,
-/// failure otherwise. The default unroll factor is 4.
-LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
-  // Use the function callback if one was provided.
-  if (getUnrollFactor) {
-    return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
-  }
-  // Unroll by the factor passed, if any.
-  if (unrollFactor.hasValue())
-    return loopUnrollByFactor(forOp, unrollFactor.getValue());
-  // Unroll by the command line factor if one was specified.
-  if (clUnrollFactor.getNumOccurrences() > 0)
-    return loopUnrollByFactor(forOp, clUnrollFactor);
-  // Unroll completely if full loop unroll was specified.
-  if (clUnrollFull.getNumOccurrences() > 0 ||
-      (unrollFull.hasValue() && unrollFull.getValue()))
-    return loopUnrollFull(forOp);
-
-  // Unroll by four otherwise.
-  return loopUnrollByFactor(forOp, kDefaultUnrollFactor);
-}
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createLoopUnrollPass(
-    int unrollFactor, int unrollFull,
-    const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
-  return std::make_unique<LoopUnroll>(
-      unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor),
-      unrollFull == -1 ? None : Optional<bool>(unrollFull), getUnrollFactor);
-}
-
-static PassRegistration<LoopUnroll> pass("affine-loop-unroll", "Unroll loops");
diff --git a/third_party/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/third_party/mlir/lib/Transforms/LoopUnrollAndJam.cpp
deleted file mode 100644
index 230869abcd5..00000000000
--- a/third_party/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-//===- LoopUnrollAndJam.cpp - Code to perform loop unroll and jam ---------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements loop unroll and jam. Unroll and jam is a transformation
-// that improves locality, in particular, register reuse, while also improving
-// operation level parallelism. The example below shows what it does in nearly
-// the general case. Loop unroll and jam currently works if the bounds of the
-// loops inner to the loop being unroll-jammed do not depend on the latter.
-//
-// Before      After unroll and jam of i by factor 2:
-//
-//             for i, step = 2
-// for i         S1(i);
-//   S1;         S2(i);
-//   S2;         S1(i+1);
-//   for j       S2(i+1);
-//     S3;       for j
-//     S4;         S3(i, j);
-//   S5;           S4(i, j);
-//   S6;           S3(i+1, j)
-//                 S4(i+1, j)
-//               S5(i);
-//               S6(i);
-//               S5(i+1);
-//               S6(i+1);
-//
-// Note: 'if/else' blocks are not jammed. So, if there are loops inside if
-// op's, bodies of those loops will not be jammed.
-//===----------------------------------------------------------------------===//
-#include "mlir/Transforms/Passes.h"
-
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace mlir;
-
-#define DEBUG_TYPE "affine-loop-unroll-jam"
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-// Loop unroll and jam factor.
-static llvm::cl::opt<unsigned>
-    clUnrollJamFactor("unroll-jam-factor", llvm::cl::Hidden,
-                      llvm::cl::desc("Use this unroll jam factor for all loops"
-                                     " (default 4)"),
-                      llvm::cl::cat(clOptionsCategory));
-
-namespace {
-/// Loop unroll jam pass. Currently, this just unroll jams the first
-/// outer loop in a Function.
-struct LoopUnrollAndJam : public FunctionPass<LoopUnrollAndJam> {
-  Optional<unsigned> unrollJamFactor;
-  static const unsigned kDefaultUnrollJamFactor = 4;
-
-  explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None)
-      : unrollJamFactor(unrollJamFactor) {}
-
-  void runOnFunction() override;
-  LogicalResult runOnAffineForOp(AffineForOp forOp);
-};
-} // end anonymous namespace
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createLoopUnrollAndJamPass(int unrollJamFactor) {
-  return std::make_unique<LoopUnrollAndJam>(
-      unrollJamFactor == -1 ? None : Optional<unsigned>(unrollJamFactor));
-}
-
-void LoopUnrollAndJam::runOnFunction() {
-  // Currently, just the outermost loop from the first loop nest is
-  // unroll-and-jammed by this pass. However, runOnAffineForOp can be called on
-  // any for operation.
-  auto &entryBlock = getFunction().front();
-  if (auto forOp = dyn_cast<AffineForOp>(entryBlock.front()))
-    runOnAffineForOp(forOp);
-}
-
-/// Unroll and jam a 'affine.for' op. Default unroll jam factor is
-/// kDefaultUnrollJamFactor. Return failure if nothing was done.
-LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
-  // Unroll and jam by the factor that was passed if any.
-  if (unrollJamFactor.hasValue())
-    return loopUnrollJamByFactor(forOp, unrollJamFactor.getValue());
-  // Otherwise, unroll jam by the command-line factor if one was specified.
-  if (clUnrollJamFactor.getNumOccurrences() > 0)
-    return loopUnrollJamByFactor(forOp, clUnrollJamFactor);
-
-  // Unroll and jam by four otherwise.
-  return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor);
-}
-
-LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
-                                            uint64_t unrollJamFactor) {
-  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
-
-  if (mayBeConstantTripCount.hasValue() &&
-      mayBeConstantTripCount.getValue() < unrollJamFactor)
-    return loopUnrollJamByFactor(forOp, mayBeConstantTripCount.getValue());
-  return loopUnrollJamByFactor(forOp, unrollJamFactor);
-}
-
-/// Unrolls and jams this loop by the specified factor.
-LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
-                                          uint64_t unrollJamFactor) {
-  // Gathers all maximal sub-blocks of operations that do not themselves
-  // include a for op (a operation could have a descendant for op though
-  // in its tree).  Ignore the block terminators.
-  struct JamBlockGatherer {
-    // Store iterators to the first and last op of each sub-block found.
-    std::vector<std::pair<Block::iterator, Block::iterator>> subBlocks;
-
-    // This is a linear time walk.
-    void walk(Operation *op) {
-      for (auto &region : op->getRegions())
-        for (auto &block : region)
-          walk(block);
-    }
-    void walk(Block &block) {
-      for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
-        auto subBlockStart = it;
-        while (it != e && !isa<AffineForOp>(&*it))
-          ++it;
-        if (it != subBlockStart)
-          subBlocks.push_back({subBlockStart, std::prev(it)});
-        // Process all for insts that appear next.
-        while (it != e && isa<AffineForOp>(&*it))
-          walk(&*it++);
-      }
-    }
-  };
-
-  assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
-
-  if (unrollJamFactor == 1)
-    return promoteIfSingleIteration(forOp);
-
-  if (forOp.getBody()->empty() ||
-      forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
-    return failure();
-
-  // Loops where both lower and upper bounds are multi-result maps won't be
-  // unrolled (since the trip can't be expressed as an affine function in
-  // general).
-  // TODO(mlir-team): this may not be common, but we could support the case
-  // where the lower bound is a multi-result map and the ub is a single result
-  // one.
-  if (forOp.getLowerBoundMap().getNumResults() != 1)
-    return failure();
-
-  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
-  // If the trip count is lower than the unroll jam factor, no unroll jam.
-  if (mayBeConstantTripCount.hasValue() &&
-      mayBeConstantTripCount.getValue() < unrollJamFactor)
-    return failure();
-
-  auto *forInst = forOp.getOperation();
-
-  // Gather all sub-blocks to jam upon the loop being unrolled.
-  JamBlockGatherer jbg;
-  jbg.walk(forInst);
-  auto &subBlocks = jbg.subBlocks;
-
-  // Generate the cleanup loop if trip count isn't a multiple of
-  // unrollJamFactor.
-  if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
-    // Insert the cleanup loop right after 'forOp'.
-    OpBuilder builder(forInst->getBlock(), std::next(Block::iterator(forInst)));
-    auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forInst));
-    // Adjust the lower bound of the cleanup loop; its upper bound is the same
-    // as the original loop's upper bound.
-    AffineMap cleanupMap;
-    SmallVector<Value *, 4> cleanupOperands;
-    getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
-                             &cleanupOperands, builder);
-    cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
-
-    // Promote the cleanup loop if it has turned into a single iteration loop.
-    promoteIfSingleIteration(cleanupAffineForOp);
-
-    // Adjust the upper bound of the original loop - it will be the same as the
-    // cleanup loop's lower bound. Its lower bound remains unchanged.
-    forOp.setUpperBound(cleanupOperands, cleanupMap);
-  }
-
-  // Scale the step of loop being unroll-jammed by the unroll-jam factor.
-  int64_t step = forOp.getStep();
-  forOp.setStep(step * unrollJamFactor);
-
-  auto *forOpIV = forOp.getInductionVar();
-  // Unroll and jam (appends unrollJamFactor - 1 additional copies).
-  for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
-    // Operand map persists across all sub-blocks.
-    BlockAndValueMapping operandMapping;
-    for (auto &subBlock : subBlocks) {
-      // Builder to insert unroll-jammed bodies. Insert right at the end of
-      // sub-block.
-      OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
-
-      // If the induction variable is used, create a remapping to the value for
-      // this unrolled instance.
-      if (!forOpIV->use_empty()) {
-        // iv' = iv + i, i = 1 to unrollJamFactor-1.
-        auto d0 = builder.getAffineDimExpr(0);
-        auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
-        auto ivUnroll =
-            builder.create<AffineApplyOp>(forInst->getLoc(), bumpMap, forOpIV);
-        operandMapping.map(forOpIV, ivUnroll);
-      }
-      // Clone the sub-block being unroll-jammed.
-      for (auto it = subBlock.first; it != std::next(subBlock.second); ++it) {
-        builder.clone(*it, operandMapping);
-      }
-    }
-  }
-
-  // Promote the loop body up if this has turned into a single iteration loop.
-  promoteIfSingleIteration(forOp);
-  return success();
-}
-
-static PassRegistration<LoopUnrollAndJam> pass("affine-loop-unroll-jam",
-                                               "Unroll and jam loops");
diff --git a/third_party/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/third_party/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
deleted file mode 100644
index c531ca551b4..00000000000
--- a/third_party/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-//===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to forward memref stores to loads, thereby
-// potentially getting rid of intermediate memref's entirely.
-// TODO(mlir-team): In the future, similar techniques could be used to eliminate
-// dead memref store's and perform more complex forwarding when support for
-// SSA scalars live out of 'affine.for'/'affine.if' statements is available.
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/Dominance.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include <algorithm>
-
-#define DEBUG_TYPE "memref-dataflow-opt"
-
-using namespace mlir;
-
-namespace {
-
-// The store to load forwarding relies on three conditions:
-//
-// 1) they need to have mathematically equivalent affine access functions
-// (checked after full composition of load/store operands); this implies that
-// they access the same single memref element for all iterations of the common
-// surrounding loop,
-//
-// 2) the store op should dominate the load op,
-//
-// 3) among all op's that satisfy both (1) and (2), the one that postdominates
-// all store op's that have a dependence into the load, is provably the last
-// writer to the particular memref location being loaded at the load op, and its
-// store value can be forwarded to the load. Note that the only dependences
-// that are to be considered are those that are satisfied at the block* of the
-// innermost common surrounding loop of the <store, load> being considered.
-//
-// (* A dependence being satisfied at a block: a dependence that is satisfied by
-// virtue of the destination operation appearing textually / lexically after
-// the source operation within the body of a 'affine.for' operation; thus, a
-// dependence is always either satisfied by a loop or by a block).
-//
-// The above conditions are simple to check, sufficient, and powerful for most
-// cases in practice - they are sufficient, but not necessary --- since they
-// don't reason about loops that are guaranteed to execute at least once or
-// multiple sources to forward from.
-//
-// TODO(mlir-team): more forwarding can be done when support for
-// loop/conditional live-out SSA values is available.
-// TODO(mlir-team): do general dead store elimination for memref's. This pass
-// currently only eliminates the stores only if no other loads/uses (other
-// than dealloc) remain.
-//
-struct MemRefDataFlowOpt : public FunctionPass<MemRefDataFlowOpt> {
-  void runOnFunction() override;
-
-  void forwardStoreToLoad(AffineLoadOp loadOp);
-
-  // A list of memref's that are potentially dead / could be eliminated.
-  SmallPtrSet<Value *, 4> memrefsToErase;
-  // Load op's whose results were replaced by those forwarded from stores.
-  SmallVector<Operation *, 8> loadOpsToErase;
-
-  DominanceInfo *domInfo = nullptr;
-  PostDominanceInfo *postDomInfo = nullptr;
-};
-
-} // end anonymous namespace
-
-/// Creates a pass to perform optimizations relying on memref dataflow such as
-/// store to load forwarding, elimination of dead stores, and dead allocs.
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createMemRefDataFlowOptPass() {
-  return std::make_unique<MemRefDataFlowOpt>();
-}
-
-// This is a straightforward implementation not optimized for speed. Optimize
-// if needed.
-void MemRefDataFlowOpt::forwardStoreToLoad(AffineLoadOp loadOp) {
-  Operation *loadOpInst = loadOp.getOperation();
-
-  // First pass over the use list to get minimum number of surrounding
-  // loops common between the load op and the store op, with min taken across
-  // all store ops.
-  SmallVector<Operation *, 8> storeOps;
-  unsigned minSurroundingLoops = getNestingDepth(*loadOpInst);
-  for (auto *user : loadOp.getMemRef()->getUsers()) {
-    auto storeOp = dyn_cast<AffineStoreOp>(user);
-    if (!storeOp)
-      continue;
-    auto *storeOpInst = storeOp.getOperation();
-    unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
-    minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
-    storeOps.push_back(storeOpInst);
-  }
-
-  // The list of store op candidates for forwarding that satisfy conditions
-  // (1) and (2) above - they will be filtered later when checking (3).
-  SmallVector<Operation *, 8> fwdingCandidates;
-
-  // Store ops that have a dependence into the load (even if they aren't
-  // forwarding candidates). Each forwarding candidate will be checked for a
-  // post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
-  SmallVector<Operation *, 8> depSrcStores;
-
-  for (auto *storeOpInst : storeOps) {
-    MemRefAccess srcAccess(storeOpInst);
-    MemRefAccess destAccess(loadOpInst);
-    // Find stores that may be reaching the load.
-    FlatAffineConstraints dependenceConstraints;
-    unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
-    unsigned d;
-    // Dependences at loop depth <= minSurroundingLoops do NOT matter.
-    for (d = nsLoops + 1; d > minSurroundingLoops; d--) {
-      DependenceResult result = checkMemrefAccessDependence(
-          srcAccess, destAccess, d, &dependenceConstraints,
-          /*dependenceComponents=*/nullptr);
-      if (hasDependence(result))
-        break;
-    }
-    if (d == minSurroundingLoops)
-      continue;
-
-    // Stores that *may* be reaching the load.
-    depSrcStores.push_back(storeOpInst);
-
-    // 1. Check if the store and the load have mathematically equivalent
-    // affine access functions; this implies that they statically refer to the
-    // same single memref element. As an example this filters out cases like:
-    //     store %A[%i0 + 1]
-    //     load %A[%i0]
-    //     store %A[%M]
-    //     load %A[%N]
-    // Use the AffineValueMap difference based memref access equality checking.
-    if (srcAccess != destAccess)
-      continue;
-
-    // 2. The store has to dominate the load op to be candidate.
-    if (!domInfo->dominates(storeOpInst, loadOpInst))
-      continue;
-
-    // We now have a candidate for forwarding.
-    fwdingCandidates.push_back(storeOpInst);
-  }
-
-  // 3. Of all the store op's that meet the above criteria, the store that
-  // postdominates all 'depSrcStores' (if one exists) is the unique store
-  // providing the value to the load, i.e., provably the last writer to that
-  // memref loc.
-  // Note: this can be implemented in a cleaner way with postdominator tree
-  // traversals. Consider this for the future if needed.
-  Operation *lastWriteStoreOp = nullptr;
-  for (auto *storeOpInst : fwdingCandidates) {
-    if (llvm::all_of(depSrcStores, [&](Operation *depStore) {
-          return postDomInfo->postDominates(storeOpInst, depStore);
-        })) {
-      lastWriteStoreOp = storeOpInst;
-      break;
-    }
-  }
-  if (!lastWriteStoreOp)
-    return;
-
-  // Perform the actual store to load forwarding.
-  Value *storeVal = cast<AffineStoreOp>(lastWriteStoreOp).getValueToStore();
-  loadOp.replaceAllUsesWith(storeVal);
-  // Record the memref for a later sweep to optimize away.
-  memrefsToErase.insert(loadOp.getMemRef());
-  // Record this to erase later.
-  loadOpsToErase.push_back(loadOpInst);
-}
-
-void MemRefDataFlowOpt::runOnFunction() {
-  // Only supports single block functions at the moment.
-  FuncOp f = getFunction();
-  if (f.getBlocks().size() != 1) {
-    markAllAnalysesPreserved();
-    return;
-  }
-
-  domInfo = &getAnalysis<DominanceInfo>();
-  postDomInfo = &getAnalysis<PostDominanceInfo>();
-
-  loadOpsToErase.clear();
-  memrefsToErase.clear();
-
-  // Walk all load's and perform load/store forwarding.
-  f.walk([&](AffineLoadOp loadOp) { forwardStoreToLoad(loadOp); });
-
-  // Erase all load op's whose results were replaced with store fwd'ed ones.
-  for (auto *loadOp : loadOpsToErase) {
-    loadOp->erase();
-  }
-
-  // Check if the store fwd'ed memrefs are now left with only stores and can
-  // thus be completely deleted. Note: the canonicalize pass should be able
-  // to do this as well, but we'll do it here since we collected these anyway.
-  for (auto *memref : memrefsToErase) {
-    // If the memref hasn't been alloc'ed in this function, skip.
-    Operation *defInst = memref->getDefiningOp();
-    if (!defInst || !isa<AllocOp>(defInst))
-      // TODO(mlir-team): if the memref was returned by a 'call' operation, we
-      // could still erase it if the call had no side-effects.
-      continue;
-    if (llvm::any_of(memref->getUsers(), [&](Operation *ownerInst) {
-          return (!isa<AffineStoreOp>(ownerInst) && !isa<DeallocOp>(ownerInst));
-        }))
-      continue;
-
-    // Erase all stores, the dealloc, and the alloc on the memref.
-    for (auto *user : llvm::make_early_inc_range(memref->getUsers()))
-      user->erase();
-    defInst->erase();
-  }
-}
-
-static PassRegistration<MemRefDataFlowOpt>
-    pass("memref-dataflow-opt", "Perform store/load forwarding for memrefs");
diff --git a/third_party/mlir/lib/Transforms/PipelineDataTransfer.cpp b/third_party/mlir/lib/Transforms/PipelineDataTransfer.cpp
deleted file mode 100644
index fdf01351549..00000000000
--- a/third_party/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ /dev/null
@@ -1,388 +0,0 @@
-//===- PipelineDataTransfer.cpp --- Pass for pipelining data movement ---*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to pipeline data transfers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/Passes.h"
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/Debug.h"
-#define DEBUG_TYPE "affine-pipeline-data-transfer"
-
-using namespace mlir;
-
-namespace {
-
-struct PipelineDataTransfer : public FunctionPass<PipelineDataTransfer> {
-  void runOnFunction() override;
-  void runOnAffineForOp(AffineForOp forOp);
-
-  std::vector<AffineForOp> forOps;
-};
-
-} // end anonymous namespace
-
-/// Creates a pass to pipeline explicit movement of data across levels of the
-/// memory hierarchy.
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createPipelineDataTransferPass() {
-  return std::make_unique<PipelineDataTransfer>();
-}
-
-// Returns the position of the tag memref operand given a DMA operation.
-// Temporary utility: will be replaced when DmaStart/DmaFinish abstract op's are
-// added.  TODO(b/117228571)
-static unsigned getTagMemRefPos(Operation &dmaInst) {
-  assert(isa<AffineDmaStartOp>(dmaInst) || isa<AffineDmaWaitOp>(dmaInst));
-  if (auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaInst)) {
-    return dmaStartOp.getTagMemRefOperandIndex();
-  }
-  // First operand for a dma finish operation.
-  return 0;
-}
-
-/// Doubles the buffer of the supplied memref on the specified 'affine.for'
-/// operation by adding a leading dimension of size two to the memref.
-/// Replaces all uses of the old memref by the new one while indexing the newly
-/// added dimension by the loop IV of the specified 'affine.for' operation
-/// modulo 2. Returns false if such a replacement cannot be performed.
-static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
-  auto *forBody = forOp.getBody();
-  OpBuilder bInner(forBody, forBody->begin());
-
-  // Doubles the shape with a leading dimension extent of 2.
-  auto doubleShape = [&](MemRefType oldMemRefType) -> MemRefType {
-    // Add the leading dimension in the shape for the double buffer.
-    ArrayRef<int64_t> oldShape = oldMemRefType.getShape();
-    SmallVector<int64_t, 4> newShape(1 + oldMemRefType.getRank());
-    newShape[0] = 2;
-    std::copy(oldShape.begin(), oldShape.end(), newShape.begin() + 1);
-    auto newMemRefType =
-        MemRefType::get(newShape, oldMemRefType.getElementType(), {},
-                        oldMemRefType.getMemorySpace());
-    return newMemRefType;
-  };
-
-  auto oldMemRefType = oldMemRef->getType().cast<MemRefType>();
-  auto newMemRefType = doubleShape(oldMemRefType);
-
-  // The double buffer is allocated right before 'forInst'.
-  auto *forInst = forOp.getOperation();
-  OpBuilder bOuter(forInst);
-  // Put together alloc operands for any dynamic dimensions of the memref.
-  SmallVector<Value *, 4> allocOperands;
-  unsigned dynamicDimCount = 0;
-  for (auto dimSize : oldMemRefType.getShape()) {
-    if (dimSize == -1)
-      allocOperands.push_back(bOuter.create<DimOp>(forInst->getLoc(), oldMemRef,
-                                                   dynamicDimCount++));
-  }
-
-  // Create and place the alloc right before the 'affine.for' operation.
-  Value *newMemRef =
-      bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);
-
-  // Create 'iv mod 2' value to index the leading dimension.
-  auto d0 = bInner.getAffineDimExpr(0);
-  int64_t step = forOp.getStep();
-  auto modTwoMap = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0,
-                                  {d0.floorDiv(step) % 2});
-  auto ivModTwoOp = bInner.create<AffineApplyOp>(forOp.getLoc(), modTwoMap,
-                                                 forOp.getInductionVar());
-
-  // replaceAllMemRefUsesWith will succeed unless the forOp body has
-  // non-dereferencing uses of the memref (dealloc's are fine though).
-  if (failed(replaceAllMemRefUsesWith(
-          oldMemRef, newMemRef,
-          /*extraIndices=*/{ivModTwoOp},
-          /*indexRemap=*/AffineMap(),
-          /*extraOperands=*/{},
-          /*symbolOperands=*/{},
-          /*domInstFilter=*/&*forOp.getBody()->begin()))) {
-    LLVM_DEBUG(
-        forOp.emitError("memref replacement for double buffering failed"));
-    ivModTwoOp.erase();
-    return false;
-  }
-  // Insert the dealloc op right after the for loop.
-  bOuter.setInsertionPointAfter(forInst);
-  bOuter.create<DeallocOp>(forInst->getLoc(), newMemRef);
-
-  return true;
-}
-
-/// Returns success if the IR is in a valid state.
-void PipelineDataTransfer::runOnFunction() {
-  // Do a post order walk so that inner loop DMAs are processed first. This is
-  // necessary since 'affine.for' operations nested within would otherwise
-  // become invalid (erased) when the outer loop is pipelined (the pipelined one
-  // gets deleted and replaced by a prologue, a new steady-state loop and an
-  // epilogue).
-  forOps.clear();
-  getFunction().walk([&](AffineForOp forOp) { forOps.push_back(forOp); });
-  for (auto forOp : forOps)
-    runOnAffineForOp(forOp);
-}
-
-// Check if tags of the dma start op and dma wait op match.
-static bool checkTagMatch(AffineDmaStartOp startOp, AffineDmaWaitOp waitOp) {
-  if (startOp.getTagMemRef() != waitOp.getTagMemRef())
-    return false;
-  auto startIndices = startOp.getTagIndices();
-  auto waitIndices = waitOp.getTagIndices();
-  // Both of these have the same number of indices since they correspond to the
-  // same tag memref.
-  for (auto it = startIndices.begin(), wIt = waitIndices.begin(),
-            e = startIndices.end();
-       it != e; ++it, ++wIt) {
-    // Keep it simple for now, just checking if indices match.
-    // TODO(mlir-team): this would in general need to check if there is no
-    // intervening write writing to the same tag location, i.e., memory last
-    // write/data flow analysis. This is however sufficient/powerful enough for
-    // now since the DMA generation pass or the input for it will always have
-    // start/wait with matching tags (same SSA operand indices).
-    if (*it != *wIt)
-      return false;
-  }
-  return true;
-}
-
-// Identify matching DMA start/finish operations to overlap computation with.
-static void findMatchingStartFinishInsts(
-    AffineForOp forOp,
-    SmallVectorImpl<std::pair<Operation *, Operation *>> &startWaitPairs) {
-
-  // Collect outgoing DMA operations - needed to check for dependences below.
-  SmallVector<AffineDmaStartOp, 4> outgoingDmaOps;
-  for (auto &op : *forOp.getBody()) {
-    auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
-    if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster())
-      outgoingDmaOps.push_back(dmaStartOp);
-  }
-
-  SmallVector<Operation *, 4> dmaStartInsts, dmaFinishInsts;
-  for (auto &op : *forOp.getBody()) {
-    // Collect DMA finish operations.
-    if (isa<AffineDmaWaitOp>(op)) {
-      dmaFinishInsts.push_back(&op);
-      continue;
-    }
-    auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
-    if (!dmaStartOp)
-      continue;
-
-    // Only DMAs incoming into higher memory spaces are pipelined for now.
-    // TODO(bondhugula): handle outgoing DMA pipelining.
-    if (!dmaStartOp.isDestMemorySpaceFaster())
-      continue;
-
-    // Check for dependence with outgoing DMAs. Doing this conservatively.
-    // TODO(andydavis,bondhugula): use the dependence analysis to check for
-    // dependences between an incoming and outgoing DMA in the same iteration.
-    auto it = outgoingDmaOps.begin();
-    for (; it != outgoingDmaOps.end(); ++it) {
-      if (it->getDstMemRef() == dmaStartOp.getSrcMemRef())
-        break;
-    }
-    if (it != outgoingDmaOps.end())
-      continue;
-
-    // We only double buffer if the buffer is not live out of loop.
-    auto *memref = dmaStartOp.getOperand(dmaStartOp.getFasterMemPos());
-    bool escapingUses = false;
-    for (auto *user : memref->getUsers()) {
-      // We can double buffer regardless of dealloc's outside the loop.
-      if (isa<DeallocOp>(user))
-        continue;
-      if (!forOp.getBody()->findAncestorOpInBlock(*user)) {
-        LLVM_DEBUG(llvm::dbgs()
-                       << "can't pipeline: buffer is live out of loop\n";);
-        escapingUses = true;
-        break;
-      }
-    }
-    if (!escapingUses)
-      dmaStartInsts.push_back(&op);
-  }
-
-  // For each start operation, we look for a matching finish operation.
-  for (auto *dmaStartInst : dmaStartInsts) {
-    for (auto *dmaFinishInst : dmaFinishInsts) {
-      if (checkTagMatch(cast<AffineDmaStartOp>(dmaStartInst),
-                        cast<AffineDmaWaitOp>(dmaFinishInst))) {
-        startWaitPairs.push_back({dmaStartInst, dmaFinishInst});
-        break;
-      }
-    }
-  }
-}
-
-/// Overlap DMA transfers with computation in this loop. If successful,
-/// 'forOp' is deleted, and a prologue, a new pipelined loop, and epilogue are
-/// inserted right before where it was.
-void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
-  auto mayBeConstTripCount = getConstantTripCount(forOp);
-  if (!mayBeConstTripCount.hasValue()) {
-    LLVM_DEBUG(
-        forOp.emitRemark("won't pipeline due to unknown trip count loop"));
-    return;
-  }
-
-  SmallVector<std::pair<Operation *, Operation *>, 4> startWaitPairs;
-  findMatchingStartFinishInsts(forOp, startWaitPairs);
-
-  if (startWaitPairs.empty()) {
-    LLVM_DEBUG(forOp.emitRemark("No dma start/finish pairs\n"));
-    return;
-  }
-
-  // Double the buffers for the higher memory space memref's.
-  // Identify memref's to replace by scanning through all DMA start
-  // operations. A DMA start operation has two memref's - the one from the
-  // higher level of memory hierarchy is the one to double buffer.
-  // TODO(bondhugula): check whether double-buffering is even necessary.
-  // TODO(bondhugula): make this work with different layouts: assuming here that
-  // the dimension we are adding here for the double buffering is the outermost
-  // dimension.
-  for (auto &pair : startWaitPairs) {
-    auto *dmaStartInst = pair.first;
-    Value *oldMemRef = dmaStartInst->getOperand(
-        cast<AffineDmaStartOp>(dmaStartInst).getFasterMemPos());
-    if (!doubleBuffer(oldMemRef, forOp)) {
-      // Normally, double buffering should not fail because we already checked
-      // that there are no uses outside.
-      LLVM_DEBUG(llvm::dbgs()
-                     << "double buffering failed for" << dmaStartInst << "\n";);
-      // IR still valid and semantically correct.
-      return;
-    }
-    // If the old memref has no more uses, remove its 'dead' alloc if it was
-    // alloc'ed. (note: DMA buffers are rarely function live-in; but a 'dim'
-    // operation could have been used on it if it was dynamically shaped in
-    // order to create the double buffer above.)
-    // '-canonicalize' does this in a more general way, but we'll anyway do the
-    // simple/common case so that the output / test cases looks clear.
-    if (auto *allocInst = oldMemRef->getDefiningOp()) {
-      if (oldMemRef->use_empty()) {
-        allocInst->erase();
-      } else if (oldMemRef->hasOneUse()) {
-        if (auto dealloc = dyn_cast<DeallocOp>(*oldMemRef->user_begin())) {
-          dealloc.erase();
-          allocInst->erase();
-        }
-      }
-    }
-  }
-
-  // Double the buffers for tag memrefs.
-  for (auto &pair : startWaitPairs) {
-    auto *dmaFinishInst = pair.second;
-    Value *oldTagMemRef =
-        dmaFinishInst->getOperand(getTagMemRefPos(*dmaFinishInst));
-    if (!doubleBuffer(oldTagMemRef, forOp)) {
-      LLVM_DEBUG(llvm::dbgs() << "tag double buffering failed\n";);
-      return;
-    }
-    // If the old tag has no uses or a single dealloc use, remove it.
-    // (canonicalization handles more complex cases).
-    if (auto *tagAllocInst = oldTagMemRef->getDefiningOp()) {
-      if (oldTagMemRef->use_empty()) {
-        tagAllocInst->erase();
-      } else if (oldTagMemRef->hasOneUse()) {
-        if (auto dealloc = dyn_cast<DeallocOp>(*oldTagMemRef->user_begin())) {
-          dealloc.erase();
-          tagAllocInst->erase();
-        }
-      }
-    }
-  }
-
-  // Double buffering would have invalidated all the old DMA start/wait insts.
-  startWaitPairs.clear();
-  findMatchingStartFinishInsts(forOp, startWaitPairs);
-
-  // Store shift for operation for later lookup for AffineApplyOp's.
-  DenseMap<Operation *, unsigned> instShiftMap;
-  for (auto &pair : startWaitPairs) {
-    auto *dmaStartInst = pair.first;
-    assert(isa<AffineDmaStartOp>(dmaStartInst));
-    instShiftMap[dmaStartInst] = 0;
-    // Set shifts for DMA start op's affine operand computation slices to 0.
-    SmallVector<AffineApplyOp, 4> sliceOps;
-    mlir::createAffineComputationSlice(dmaStartInst, &sliceOps);
-    if (!sliceOps.empty()) {
-      for (auto sliceOp : sliceOps) {
-        instShiftMap[sliceOp.getOperation()] = 0;
-      }
-    } else {
-      // If a slice wasn't created, the reachable affine.apply op's from its
-      // operands are the ones that go with it.
-      SmallVector<Operation *, 4> affineApplyInsts;
-      SmallVector<Value *, 4> operands(dmaStartInst->getOperands());
-      getReachableAffineApplyOps(operands, affineApplyInsts);
-      for (auto *op : affineApplyInsts) {
-        instShiftMap[op] = 0;
-      }
-    }
-  }
-  // Everything else (including compute ops and dma finish) are shifted by one.
-  for (auto &op : *forOp.getBody()) {
-    if (instShiftMap.find(&op) == instShiftMap.end()) {
-      instShiftMap[&op] = 1;
-    }
-  }
-
-  // Get shifts stored in map.
-  std::vector<uint64_t> shifts(forOp.getBody()->getOperations().size());
-  unsigned s = 0;
-  for (auto &op : *forOp.getBody()) {
-    assert(instShiftMap.find(&op) != instShiftMap.end());
-    shifts[s++] = instShiftMap[&op];
-
-    // Tagging operations with shifts for debugging purposes.
-    LLVM_DEBUG({
-      OpBuilder b(&op);
-      op.setAttr("shift", b.getI64IntegerAttr(shifts[s - 1]));
-    });
-  }
-
-  if (!isInstwiseShiftValid(forOp, shifts)) {
-    // Violates dependences.
-    LLVM_DEBUG(llvm::dbgs() << "Shifts invalid - unexpected\n";);
-    return;
-  }
-
-  if (failed(instBodySkew(forOp, shifts))) {
-    LLVM_DEBUG(llvm::dbgs() << "op body skewing failed - unexpected\n";);
-    return;
-  }
-}
-
-static PassRegistration<PipelineDataTransfer> pass(
-    "affine-pipeline-data-transfer",
-    "Pipeline non-blocking data transfers between explicitly managed levels of "
-    "the memory hierarchy");
diff --git a/third_party/mlir/lib/Transforms/SimplifyAffineStructures.cpp b/third_party/mlir/lib/Transforms/SimplifyAffineStructures.cpp
deleted file mode 100644
index 9512ff738aa..00000000000
--- a/third_party/mlir/lib/Transforms/SimplifyAffineStructures.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- SimplifyAffineStructures.cpp ---------------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to simplify affine structures.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-
-#define DEBUG_TYPE "simplify-affine-structure"
-
-using namespace mlir;
-
-namespace {
-
-/// Simplifies affine maps and sets appearing in the operations of the Function.
-/// This part is mainly to test the simplifyAffineExpr method. In addition,
-/// all memrefs with non-trivial layout maps are converted to ones with trivial
-/// identity layout ones.
-struct SimplifyAffineStructures
-    : public FunctionPass<SimplifyAffineStructures> {
-  void runOnFunction() override;
-
-  /// Utility to simplify an affine attribute and update its entry in the parent
-  /// operation if necessary.
-  template <typename AttributeT>
-  void simplifyAndUpdateAttribute(Operation *op, Identifier name,
-                                  AttributeT attr) {
-    auto &simplified = simplifiedAttributes[attr];
-    if (simplified == attr)
-      return;
-
-    // This is a newly encountered attribute.
-    if (!simplified) {
-      // Try to simplify the value of the attribute.
-      auto value = attr.getValue();
-      auto simplifiedValue = simplify(value);
-      if (simplifiedValue == value) {
-        simplified = attr;
-        return;
-      }
-      simplified = AttributeT::get(simplifiedValue);
-    }
-
-    // Simplification was successful, so update the attribute.
-    op->setAttr(name, simplified);
-  }
-
-  /// Performs basic integer set simplifications. Checks if it's empty, and
-  /// replaces it with the canonical empty set if it is.
-  IntegerSet simplify(IntegerSet set) {
-    FlatAffineConstraints fac(set);
-    if (fac.isEmpty())
-      return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
-                                     &getContext());
-    return set;
-  }
-
-  /// Performs basic affine map simplifications.
-  AffineMap simplify(AffineMap map) {
-    MutableAffineMap mMap(map);
-    mMap.simplify();
-    return mMap.getAffineMap();
-  }
-
-  DenseMap<Attribute, Attribute> simplifiedAttributes;
-};
-
-} // end anonymous namespace
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createSimplifyAffineStructuresPass() {
-  return std::make_unique<SimplifyAffineStructures>();
-}
-
-void SimplifyAffineStructures::runOnFunction() {
-  auto func = getFunction();
-  simplifiedAttributes.clear();
-  func.walk([&](Operation *opInst) {
-    for (auto attr : opInst->getAttrs()) {
-      if (auto mapAttr = attr.second.dyn_cast<AffineMapAttr>())
-        simplifyAndUpdateAttribute(opInst, attr.first, mapAttr);
-      else if (auto setAttr = attr.second.dyn_cast<IntegerSetAttr>())
-        simplifyAndUpdateAttribute(opInst, attr.first, setAttr);
-    }
-  });
-
-  // Turn memrefs' non-identity layouts maps into ones with identity. Collect
-  // alloc ops first and then process since normalizeMemRef replaces/erases ops
-  // during memref rewriting.
-  SmallVector<AllocOp, 4> allocOps;
-  func.walk([&](AllocOp op) { allocOps.push_back(op); });
-  for (auto allocOp : allocOps) {
-    normalizeMemRef(allocOp);
-  }
-}
-
-static PassRegistration<SimplifyAffineStructures>
-    pass("simplify-affine-structures",
-         "Simplify affine expressions in maps/sets and normalize memrefs");
diff --git a/third_party/mlir/lib/Transforms/StripDebugInfo.cpp b/third_party/mlir/lib/Transforms/StripDebugInfo.cpp
deleted file mode 100644
index 772df3da3c7..00000000000
--- a/third_party/mlir/lib/Transforms/StripDebugInfo.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===- StripDebugInfo.cpp - Pass to strip debug information ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
-
-using namespace mlir;
-
-namespace {
-struct StripDebugInfo : public FunctionPass<StripDebugInfo> {
-  void runOnFunction() override;
-};
-} // end anonymous namespace
-
-void StripDebugInfo::runOnFunction() {
-  FuncOp func = getFunction();
-  auto unknownLoc = UnknownLoc::get(&getContext());
-
-  // Strip the debug info from the function and its operations.
-  func.setLoc(unknownLoc);
-  func.walk([&](Operation *op) { op->setLoc(unknownLoc); });
-}
-
-/// Creates a pass to strip debug information from a function.
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createStripDebugInfoPass() {
-  return std::make_unique<StripDebugInfo>();
-}
-
-static PassRegistration<StripDebugInfo>
-    pass("strip-debuginfo", "Strip debug info from functions and operations");
diff --git a/third_party/mlir/lib/Transforms/Utils/CMakeLists.txt b/third_party/mlir/lib/Transforms/Utils/CMakeLists.txt
deleted file mode 100644
index 4e1dc5e4b4e..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-add_llvm_library(MLIRTransformUtils
-  FoldUtils.cpp
-  GreedyPatternRewriteDriver.cpp
-  InliningUtils.cpp
-  LoopFusionUtils.cpp
-  LoopUtils.cpp
-  RegionUtils.cpp
-  Utils.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
-  )
-
-add_dependencies(MLIRTransformUtils MLIRStandardOpsIncGen)
-target_link_libraries(MLIRTransformUtils
-  MLIRAffineOps
-  MLIRAnalysis
-  MLIRLoopOps
-  MLIRPass
-  MLIRStandardOps
-  )
diff --git a/third_party/mlir/lib/Transforms/Utils/FoldUtils.cpp b/third_party/mlir/lib/Transforms/Utils/FoldUtils.cpp
deleted file mode 100644
index 5faca1296a8..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/FoldUtils.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-//===- FoldUtils.cpp ---- Fold Utilities ----------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines various operation fold utilities. These utilities are
-// intended to be used by passes to unify and simply their logic.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/FoldUtils.h"
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Operation.h"
-
-using namespace mlir;
-
-/// Given an operation, find the parent region that folded constants should be
-/// inserted into.
-static Region *getInsertionRegion(
-    DialectInterfaceCollection<OpFolderDialectInterface> &interfaces,
-    Operation *op) {
-  while (Region *region = op->getParentRegion()) {
-    // Insert in this region for any of the following scenarios:
-    //  * The parent is unregistered, or is known to be isolated from above.
-    //  * The parent is a top-level operation.
-    auto *parentOp = region->getParentOp();
-    if (!parentOp->isRegistered() || parentOp->isKnownIsolatedFromAbove() ||
-        !parentOp->getBlock())
-      return region;
-
-    // Otherwise, check if this region is a desired insertion region.
-    auto *interface = interfaces.getInterfaceFor(parentOp);
-    if (LLVM_UNLIKELY(interface && interface->shouldMaterializeInto(region)))
-      return region;
-
-    // Traverse up the parent looking for an insertion region.
-    op = parentOp;
-  }
-  llvm_unreachable("expected valid insertion region");
-}
-
-/// A utility function used to materialize a constant for a given attribute and
-/// type. On success, a valid constant value is returned. Otherwise, null is
-/// returned
-static Operation *materializeConstant(Dialect *dialect, OpBuilder &builder,
-                                      Attribute value, Type type,
-                                      Location loc) {
-  auto insertPt = builder.getInsertionPoint();
-  (void)insertPt;
-
-  // Ask the dialect to materialize a constant operation for this value.
-  if (auto *constOp = dialect->materializeConstant(builder, value, type, loc)) {
-    assert(insertPt == builder.getInsertionPoint());
-    assert(matchPattern(constOp, m_Constant(&value)));
-    return constOp;
-  }
-
-  // If the dialect is unable to materialize a constant, check to see if the
-  // standard constant can be used.
-  if (ConstantOp::isBuildableWith(value, type))
-    return builder.create<ConstantOp>(loc, type, value);
-  return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// OperationFolder
-//===----------------------------------------------------------------------===//
-
-LogicalResult OperationFolder::tryToFold(
-    Operation *op,
-    llvm::function_ref<void(Operation *)> processGeneratedConstants,
-    llvm::function_ref<void(Operation *)> preReplaceAction) {
-  // If this is a unique'd constant, return failure as we know that it has
-  // already been folded.
-  if (referencedDialects.count(op))
-    return failure();
-
-  // Try to fold the operation.
-  SmallVector<Value *, 8> results;
-  if (failed(tryToFold(op, results, processGeneratedConstants)))
-    return failure();
-
-  // Constant folding succeeded. We will start replacing this op's uses and
-  // eventually erase this op. Invoke the callback provided by the caller to
-  // perform any pre-replacement action.
-  if (preReplaceAction)
-    preReplaceAction(op);
-
-  // Check to see if the operation was just updated in place.
-  if (results.empty())
-    return success();
-
-  // Otherwise, replace all of the result values and erase the operation.
-  for (unsigned i = 0, e = results.size(); i != e; ++i)
-    op->getResult(i)->replaceAllUsesWith(results[i]);
-  op->erase();
-  return success();
-}
-
-/// Notifies that the given constant `op` should be remove from this
-/// OperationFolder's internal bookkeeping.
-void OperationFolder::notifyRemoval(Operation *op) {
-  // Check to see if this operation is uniqued within the folder.
-  auto it = referencedDialects.find(op);
-  if (it == referencedDialects.end())
-    return;
-
-  // Get the constant value for this operation, this is the value that was used
-  // to unique the operation internally.
-  Attribute constValue;
-  matchPattern(op, m_Constant(&constValue));
-  assert(constValue);
-
-  // Get the constant map that this operation was uniqued in.
-  auto &uniquedConstants = foldScopes[getInsertionRegion(interfaces, op)];
-
-  // Erase all of the references to this operation.
-  auto type = op->getResult(0)->getType();
-  for (auto *dialect : it->second)
-    uniquedConstants.erase(std::make_tuple(dialect, constValue, type));
-  referencedDialects.erase(it);
-}
-
-/// Tries to perform folding on the given `op`. If successful, populates
-/// `results` with the results of the folding.
-LogicalResult OperationFolder::tryToFold(
-    Operation *op, SmallVectorImpl<Value *> &results,
-    llvm::function_ref<void(Operation *)> processGeneratedConstants) {
-  SmallVector<Attribute, 8> operandConstants;
-  SmallVector<OpFoldResult, 8> foldResults;
-
-  // Check to see if any operands to the operation is constant and whether
-  // the operation knows how to constant fold itself.
-  operandConstants.assign(op->getNumOperands(), Attribute());
-  for (unsigned i = 0, e = op->getNumOperands(); i != e; ++i)
-    matchPattern(op->getOperand(i), m_Constant(&operandConstants[i]));
-
-  // If this is a commutative binary operation with a constant on the left
-  // side move it to the right side.
-  if (operandConstants.size() == 2 && operandConstants[0] &&
-      !operandConstants[1] && op->isCommutative()) {
-    std::swap(op->getOpOperand(0), op->getOpOperand(1));
-    std::swap(operandConstants[0], operandConstants[1]);
-  }
-
-  // Attempt to constant fold the operation.
-  if (failed(op->fold(operandConstants, foldResults)))
-    return failure();
-
-  // Check to see if the operation was just updated in place.
-  if (foldResults.empty())
-    return success();
-  assert(foldResults.size() == op->getNumResults());
-
-  // Create a builder to insert new operations into the entry block of the
-  // insertion region.
-  auto *insertRegion = getInsertionRegion(interfaces, op);
-  auto &entry = insertRegion->front();
-  OpBuilder builder(&entry, entry.begin());
-
-  // Get the constant map for the insertion region of this operation.
-  auto &uniquedConstants = foldScopes[insertRegion];
-
-  // Create the result constants and replace the results.
-  auto *dialect = op->getDialect();
-  for (unsigned i = 0, e = op->getNumResults(); i != e; ++i) {
-    assert(!foldResults[i].isNull() && "expected valid OpFoldResult");
-
-    // Check if the result was an SSA value.
-    if (auto *repl = foldResults[i].dyn_cast<Value *>()) {
-      results.emplace_back(repl);
-      continue;
-    }
-
-    // Check to see if there is a canonicalized version of this constant.
-    auto *res = op->getResult(i);
-    Attribute attrRepl = foldResults[i].get<Attribute>();
-    if (auto *constOp =
-            tryGetOrCreateConstant(uniquedConstants, dialect, builder, attrRepl,
-                                   res->getType(), op->getLoc())) {
-      results.push_back(constOp->getResult(0));
-      continue;
-    }
-    // If materialization fails, cleanup any operations generated for the
-    // previous results and return failure.
-    for (Operation &op : llvm::make_early_inc_range(
-             llvm::make_range(entry.begin(), builder.getInsertionPoint()))) {
-      notifyRemoval(&op);
-      op.erase();
-    }
-    return failure();
-  }
-
-  // Process any newly generated operations.
-  if (processGeneratedConstants) {
-    for (auto i = entry.begin(), e = builder.getInsertionPoint(); i != e; ++i)
-      processGeneratedConstants(&*i);
-  }
-
-  return success();
-}
-
-/// Try to get or create a new constant entry. On success this returns the
-/// constant operation value, nullptr otherwise.
-Operation *OperationFolder::tryGetOrCreateConstant(
-    ConstantMap &uniquedConstants, Dialect *dialect, OpBuilder &builder,
-    Attribute value, Type type, Location loc) {
-  // Check if an existing mapping already exists.
-  auto constKey = std::make_tuple(dialect, value, type);
-  auto *&constInst = uniquedConstants[constKey];
-  if (constInst)
-    return constInst;
-
-  // If one doesn't exist, try to materialize one.
-  if (!(constInst = materializeConstant(dialect, builder, value, type, loc)))
-    return nullptr;
-
-  // Check to see if the generated constant is in the expected dialect.
-  auto *newDialect = constInst->getDialect();
-  if (newDialect == dialect) {
-    referencedDialects[constInst].push_back(dialect);
-    return constInst;
-  }
-
-  // If it isn't, then we also need to make sure that the mapping for the new
-  // dialect is valid.
-  auto newKey = std::make_tuple(newDialect, value, type);
-
-  // If an existing operation in the new dialect already exists, delete the
-  // materialized operation in favor of the existing one.
-  if (auto *existingOp = uniquedConstants.lookup(newKey)) {
-    constInst->erase();
-    referencedDialects[existingOp].push_back(dialect);
-    return constInst = existingOp;
-  }
-
-  // Otherwise, update the new dialect to the materialized operation.
-  referencedDialects[constInst].assign({dialect, newDialect});
-  auto newIt = uniquedConstants.insert({newKey, constInst});
-  return newIt.first->second;
-}
diff --git a/third_party/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/third_party/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
deleted file mode 100644
index e2ca3f8fc5e..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-//===- GreedyPatternRewriteDriver.cpp - A greedy rewriter -----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements mlir::applyPatternsGreedily.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Transforms/FoldUtils.h"
-#include "mlir/Transforms/RegionUtils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-#define DEBUG_TYPE "pattern-matcher"
-
-static llvm::cl::opt<unsigned> maxPatternMatchIterations(
-    "mlir-max-pattern-match-iterations",
-    llvm::cl::desc("Max number of iterations scanning for pattern match"),
-    llvm::cl::init(10));
-
-namespace {
-
-/// This is a worklist-driven driver for the PatternMatcher, which repeatedly
-/// applies the locally optimal patterns in a roughly "bottom up" way.
-class GreedyPatternRewriteDriver : public PatternRewriter {
-public:
-  explicit GreedyPatternRewriteDriver(MLIRContext *ctx,
-                                      const OwningRewritePatternList &patterns)
-      : PatternRewriter(ctx), matcher(patterns), folder(ctx) {
-    worklist.reserve(64);
-  }
-
-  /// Perform the rewrites. Return true if the rewrite converges in
-  /// `maxIterations`.
-  bool simplify(MutableArrayRef<Region> regions, int maxIterations);
-
-  void addToWorklist(Operation *op) {
-    // Check to see if the worklist already contains this op.
-    if (worklistMap.count(op))
-      return;
-
-    worklistMap[op] = worklist.size();
-    worklist.push_back(op);
-  }
-
-  Operation *popFromWorklist() {
-    auto *op = worklist.back();
-    worklist.pop_back();
-
-    // This operation is no longer in the worklist, keep worklistMap up to date.
-    if (op)
-      worklistMap.erase(op);
-    return op;
-  }
-
-  /// If the specified operation is in the worklist, remove it.  If not, this is
-  /// a no-op.
-  void removeFromWorklist(Operation *op) {
-    auto it = worklistMap.find(op);
-    if (it != worklistMap.end()) {
-      assert(worklist[it->second] == op && "malformed worklist data structure");
-      worklist[it->second] = nullptr;
-      worklistMap.erase(it);
-    }
-  }
-
-  // These are hooks implemented for PatternRewriter.
-protected:
-  // Implement the hook for inserting operations, and make sure that newly
-  // inserted ops are added to the worklist for processing.
-  Operation *insert(Operation *op) override {
-    addToWorklist(op);
-    return OpBuilder::insert(op);
-  }
-
-  // If an operation is about to be removed, make sure it is not in our
-  // worklist anymore because we'd get dangling references to it.
-  void notifyOperationRemoved(Operation *op) override {
-    addToWorklist(op->getOperands());
-    op->walk([this](Operation *operation) {
-      removeFromWorklist(operation);
-      folder.notifyRemoval(operation);
-    });
-  }
-
-  // When the root of a pattern is about to be replaced, it can trigger
-  // simplifications to its users - make sure to add them to the worklist
-  // before the root is changed.
-  void notifyRootReplaced(Operation *op) override {
-    for (auto *result : op->getResults())
-      for (auto *user : result->getUsers())
-        addToWorklist(user);
-  }
-
-private:
-  // Look over the provided operands for any defining operations that should
-  // be re-added to the worklist. This function should be called when an
-  // operation is modified or removed, as it may trigger further
-  // simplifications.
-  template <typename Operands> void addToWorklist(Operands &&operands) {
-    for (Value *operand : operands) {
-      // If the use count of this operand is now < 2, we re-add the defining
-      // operation to the worklist.
-      // TODO(riverriddle) This is based on the fact that zero use operations
-      // may be deleted, and that single use values often have more
-      // canonicalization opportunities.
-      if (!operand->use_empty() && !operand->hasOneUse())
-        continue;
-      if (auto *defInst = operand->getDefiningOp())
-        addToWorklist(defInst);
-    }
-  }
-
-  /// The low-level pattern matcher.
-  RewritePatternMatcher matcher;
-
-  /// The worklist for this transformation keeps track of the operations that
-  /// need to be revisited, plus their index in the worklist.  This allows us to
-  /// efficiently remove operations from the worklist when they are erased, even
-  /// if they aren't the root of a pattern.
-  std::vector<Operation *> worklist;
-  DenseMap<Operation *, unsigned> worklistMap;
-
-  /// Non-pattern based folder for operations.
-  OperationFolder folder;
-};
-} // end anonymous namespace
-
-/// Perform the rewrites.
-bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
-                                          int maxIterations) {
-  // Add the given operation to the worklist.
-  auto collectOps = [this](Operation *op) { addToWorklist(op); };
-
-  bool changed = false;
-  int i = 0;
-  do {
-    // Add all nested operations to the worklist.
-    for (auto &region : regions)
-      region.walk(collectOps);
-
-    // These are scratch vectors used in the folding loop below.
-    SmallVector<Value *, 8> originalOperands, resultValues;
-
-    changed = false;
-    while (!worklist.empty()) {
-      auto *op = popFromWorklist();
-
-      // Nulls get added to the worklist when operations are removed, ignore
-      // them.
-      if (op == nullptr)
-        continue;
-
-      // If the operation has no side effects, and no users, then it is
-      // trivially dead - remove it.
-      if (op->hasNoSideEffect() && op->use_empty()) {
-        // Be careful to update bookkeeping.
-        notifyOperationRemoved(op);
-        op->erase();
-        continue;
-      }
-
-      // Collects all the operands and result uses of the given `op` into work
-      // list. Also remove `op` and nested ops from worklist.
-      originalOperands.assign(op->operand_begin(), op->operand_end());
-      auto preReplaceAction = [&](Operation *op) {
-        // Add the operands to the worklist for visitation.
-        addToWorklist(originalOperands);
-
-        // Add all the users of the result to the worklist so we make sure
-        // to revisit them.
-        for (auto *result : op->getResults())
-          for (auto *operand : result->getUsers())
-            addToWorklist(operand);
-
-        notifyOperationRemoved(op);
-      };
-
-      // Try to fold this op.
-      if (succeeded(folder.tryToFold(op, collectOps, preReplaceAction))) {
-        changed |= true;
-        continue;
-      }
-
-      // Make sure that any new operations are inserted at this point.
-      setInsertionPoint(op);
-
-      // Try to match one of the patterns. The rewriter is automatically
-      // notified of any necessary changes, so there is nothing else to do here.
-      changed |= matcher.matchAndRewrite(op, *this);
-    }
-
-    // After applying patterns, make sure that the CFG of each of the regions is
-    // kept up to date.
-    changed |= succeeded(simplifyRegions(regions));
-  } while (changed && ++i < maxIterations);
-  // Whether the rewrite converges, i.e. wasn't changed in the last iteration.
-  return !changed;
-}
-
-/// Rewrite the regions of the specified operation, which must be isolated from
-/// above, by repeatedly applying the highest benefit patterns in a greedy
-/// work-list driven manner. Return true if no more patterns can be matched in
-/// the result operation regions.
-/// Note: This does not apply patterns to the top-level operation itself.
-///
-bool mlir::applyPatternsGreedily(Operation *op,
-                                 const OwningRewritePatternList &patterns) {
-  return applyPatternsGreedily(op->getRegions(), patterns);
-}
-
-/// Rewrite the given regions, which must be isolated from above.
-bool mlir::applyPatternsGreedily(MutableArrayRef<Region> regions,
-                                 const OwningRewritePatternList &patterns) {
-  if (regions.empty())
-    return true;
-
-  // The top-level operation must be known to be isolated from above to
-  // prevent performing canonicalizations on operations defined at or above
-  // the region containing 'op'.
-  auto regionIsIsolated = [](Region &region) {
-    return region.getParentOp()->isKnownIsolatedFromAbove();
-  };
-  (void)regionIsIsolated;
-  assert(llvm::all_of(regions, regionIsIsolated) &&
-         "patterns can only be applied to operations IsolatedFromAbove");
-
-  // Start the pattern driver.
-  GreedyPatternRewriteDriver driver(regions[0].getContext(), patterns);
-  bool converged = driver.simplify(regions, maxPatternMatchIterations);
-  LLVM_DEBUG(if (!converged) {
-    llvm::dbgs() << "The pattern rewrite doesn't converge after scanning "
-                 << maxPatternMatchIterations << " times";
-  });
-  return converged;
-}
diff --git a/third_party/mlir/lib/Transforms/Utils/InliningUtils.cpp b/third_party/mlir/lib/Transforms/Utils/InliningUtils.cpp
deleted file mode 100644
index fd08c53b0dc..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/InliningUtils.cpp
+++ /dev/null
@@ -1,360 +0,0 @@
-//===- InliningUtils.cpp ---- Misc utilities for inlining -----------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements miscellaneous inlining utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/InliningUtils.h"
-
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Operation.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "inlining"
-
-using namespace mlir;
-
-/// Remap locations from the inlined blocks with CallSiteLoc locations with the
-/// provided caller location.
-static void
-remapInlinedLocations(llvm::iterator_range<Region::iterator> inlinedBlocks,
-                      Location callerLoc) {
-  DenseMap<Location, Location> mappedLocations;
-  auto remapOpLoc = [&](Operation *op) {
-    auto it = mappedLocations.find(op->getLoc());
-    if (it == mappedLocations.end()) {
-      auto newLoc = CallSiteLoc::get(op->getLoc(), callerLoc);
-      it = mappedLocations.try_emplace(op->getLoc(), newLoc).first;
-    }
-    op->setLoc(it->second);
-  };
-  for (auto &block : inlinedBlocks)
-    block.walk(remapOpLoc);
-}
-
-static void
-remapInlinedOperands(llvm::iterator_range<Region::iterator> inlinedBlocks,
-                     BlockAndValueMapping &mapper) {
-  auto remapOperands = [&](Operation *op) {
-    for (auto &operand : op->getOpOperands())
-      if (auto *mappedOp = mapper.lookupOrNull(operand.get()))
-        operand.set(mappedOp);
-  };
-  for (auto &block : inlinedBlocks)
-    block.walk(remapOperands);
-}
-
-//===----------------------------------------------------------------------===//
-// InlinerInterface
-//===----------------------------------------------------------------------===//
-
-bool InlinerInterface::isLegalToInline(
-    Region *dest, Region *src, BlockAndValueMapping &valueMapping) const {
-  // Regions can always be inlined into functions.
-  if (isa<FuncOp>(dest->getParentOp()))
-    return true;
-
-  auto *handler = getInterfaceFor(dest->getParentOp());
-  return handler ? handler->isLegalToInline(dest, src, valueMapping) : false;
-}
-
-bool InlinerInterface::isLegalToInline(
-    Operation *op, Region *dest, BlockAndValueMapping &valueMapping) const {
-  auto *handler = getInterfaceFor(op);
-  return handler ? handler->isLegalToInline(op, dest, valueMapping) : false;
-}
-
-bool InlinerInterface::shouldAnalyzeRecursively(Operation *op) const {
-  auto *handler = getInterfaceFor(op);
-  return handler ? handler->shouldAnalyzeRecursively(op) : true;
-}
-
-/// Handle the given inlined terminator by replacing it with a new operation
-/// as necessary.
-void InlinerInterface::handleTerminator(Operation *op, Block *newDest) const {
-  auto *handler = getInterfaceFor(op);
-  assert(handler && "expected valid dialect handler");
-  handler->handleTerminator(op, newDest);
-}
-
-/// Handle the given inlined terminator by replacing it with a new operation
-/// as necessary.
-void InlinerInterface::handleTerminator(Operation *op,
-                                        ArrayRef<Value *> valuesToRepl) const {
-  auto *handler = getInterfaceFor(op);
-  assert(handler && "expected valid dialect handler");
-  handler->handleTerminator(op, valuesToRepl);
-}
-
-/// Utility to check that all of the operations within 'src' can be inlined.
-static bool isLegalToInline(InlinerInterface &interface, Region *src,
-                            Region *insertRegion,
-                            BlockAndValueMapping &valueMapping) {
-  for (auto &block : *src) {
-    for (auto &op : block) {
-      // Check this operation.
-      if (!interface.isLegalToInline(&op, insertRegion, valueMapping))
-        return false;
-      // Check any nested regions.
-      if (interface.shouldAnalyzeRecursively(&op) &&
-          llvm::any_of(op.getRegions(), [&](Region &region) {
-            return !isLegalToInline(interface, &region, insertRegion,
-                                    valueMapping);
-          }))
-        return false;
-    }
-  }
-  return true;
-}
-
-//===----------------------------------------------------------------------===//
-// Inline Methods
-//===----------------------------------------------------------------------===//
-
-LogicalResult mlir::inlineRegion(InlinerInterface &interface, Region *src,
-                                 Operation *inlinePoint,
-                                 BlockAndValueMapping &mapper,
-                                 ArrayRef<Value *> resultsToReplace,
-                                 llvm::Optional<Location> inlineLoc,
-                                 bool shouldCloneInlinedRegion) {
-  // We expect the region to have at least one block.
-  if (src->empty())
-    return failure();
-
-  // Check that all of the region arguments have been mapped.
-  auto *srcEntryBlock = &src->front();
-  if (llvm::any_of(srcEntryBlock->getArguments(),
-                   [&](BlockArgument *arg) { return !mapper.contains(arg); }))
-    return failure();
-
-  // The insertion point must be within a block.
-  Block *insertBlock = inlinePoint->getBlock();
-  if (!insertBlock)
-    return failure();
-  Region *insertRegion = insertBlock->getParent();
-
-  // Check that the operations within the source region are valid to inline.
-  if (!interface.isLegalToInline(insertRegion, src, mapper) ||
-      !isLegalToInline(interface, src, insertRegion, mapper))
-    return failure();
-
-  // Split the insertion block.
-  Block *postInsertBlock =
-      insertBlock->splitBlock(++inlinePoint->getIterator());
-
-  // Check to see if the region is being cloned, or moved inline. In either
-  // case, move the new blocks after the 'insertBlock' to improve IR
-  // readability.
-  if (shouldCloneInlinedRegion)
-    src->cloneInto(insertRegion, postInsertBlock->getIterator(), mapper);
-  else
-    insertRegion->getBlocks().splice(postInsertBlock->getIterator(),
-                                     src->getBlocks(), src->begin(),
-                                     src->end());
-
-  // Get the range of newly inserted blocks.
-  auto newBlocks = llvm::make_range(std::next(insertBlock->getIterator()),
-                                    postInsertBlock->getIterator());
-  Block *firstNewBlock = &*newBlocks.begin();
-
-  // Remap the locations of the inlined operations if a valid source location
-  // was provided.
-  if (inlineLoc && !inlineLoc->isa<UnknownLoc>())
-    remapInlinedLocations(newBlocks, *inlineLoc);
-
-  // If the blocks were moved in-place, make sure to remap any necessary
-  // operands.
-  if (!shouldCloneInlinedRegion)
-    remapInlinedOperands(newBlocks, mapper);
-
-  // Process the newly inlined blocks.
-  interface.processInlinedBlocks(newBlocks);
-
-  // Handle the case where only a single block was inlined.
-  if (std::next(newBlocks.begin()) == newBlocks.end()) {
-    // Have the interface handle the terminator of this block.
-    auto *firstBlockTerminator = firstNewBlock->getTerminator();
-    interface.handleTerminator(firstBlockTerminator, resultsToReplace);
-    firstBlockTerminator->erase();
-
-    // Merge the post insert block into the cloned entry block.
-    firstNewBlock->getOperations().splice(firstNewBlock->end(),
-                                          postInsertBlock->getOperations());
-    postInsertBlock->erase();
-  } else {
-    // Otherwise, there were multiple blocks inlined. Add arguments to the post
-    // insertion block to represent the results to replace.
-    for (Value *resultToRepl : resultsToReplace) {
-      resultToRepl->replaceAllUsesWith(
-          postInsertBlock->addArgument(resultToRepl->getType()));
-    }
-
-    /// Handle the terminators for each of the new blocks.
-    for (auto &newBlock : newBlocks)
-      interface.handleTerminator(newBlock.getTerminator(), postInsertBlock);
-  }
-
-  // Splice the instructions of the inlined entry block into the insert block.
-  insertBlock->getOperations().splice(insertBlock->end(),
-                                      firstNewBlock->getOperations());
-  firstNewBlock->erase();
-  return success();
-}
-
-/// This function is an overload of the above 'inlineRegion' that allows for
-/// providing the set of operands ('inlinedOperands') that should be used
-/// in-favor of the region arguments when inlining.
-LogicalResult mlir::inlineRegion(InlinerInterface &interface, Region *src,
-                                 Operation *inlinePoint,
-                                 ArrayRef<Value *> inlinedOperands,
-                                 ArrayRef<Value *> resultsToReplace,
-                                 llvm::Optional<Location> inlineLoc,
-                                 bool shouldCloneInlinedRegion) {
-  // We expect the region to have at least one block.
-  if (src->empty())
-    return failure();
-
-  auto *entryBlock = &src->front();
-  if (inlinedOperands.size() != entryBlock->getNumArguments())
-    return failure();
-
-  // Map the provided call operands to the arguments of the region.
-  BlockAndValueMapping mapper;
-  for (unsigned i = 0, e = inlinedOperands.size(); i != e; ++i) {
-    // Verify that the types of the provided values match the function argument
-    // types.
-    BlockArgument *regionArg = entryBlock->getArgument(i);
-    if (inlinedOperands[i]->getType() != regionArg->getType())
-      return failure();
-    mapper.map(regionArg, inlinedOperands[i]);
-  }
-
-  // Call into the main region inliner function.
-  return inlineRegion(interface, src, inlinePoint, mapper, resultsToReplace,
-                      inlineLoc, shouldCloneInlinedRegion);
-}
-
-/// Utility function used to generate a cast operation from the given interface,
-/// or return nullptr if a cast could not be generated.
-static Value *materializeConversion(const DialectInlinerInterface *interface,
-                                    SmallVectorImpl<Operation *> &castOps,
-                                    OpBuilder &castBuilder, Value *arg,
-                                    Type type, Location conversionLoc) {
-  if (!interface)
-    return nullptr;
-
-  // Check to see if the interface for the call can materialize a conversion.
-  Operation *castOp = interface->materializeCallConversion(castBuilder, arg,
-                                                           type, conversionLoc);
-  if (!castOp)
-    return nullptr;
-  castOps.push_back(castOp);
-
-  // Ensure that the generated cast is correct.
-  assert(castOp->getNumOperands() == 1 && castOp->getOperand(0) == arg &&
-         castOp->getNumResults() == 1 && *castOp->result_type_begin() == type);
-  return castOp->getResult(0);
-}
-
-/// This function inlines a given region, 'src', of a callable operation,
-/// 'callable', into the location defined by the given call operation. This
-/// function returns failure if inlining is not possible, success otherwise. On
-/// failure, no changes are made to the module. 'shouldCloneInlinedRegion'
-/// corresponds to whether the source region should be cloned into the 'call' or
-/// spliced directly.
-LogicalResult mlir::inlineCall(InlinerInterface &interface,
-                               CallOpInterface call,
-                               CallableOpInterface callable, Region *src,
-                               bool shouldCloneInlinedRegion) {
-  // We expect the region to have at least one block.
-  if (src->empty())
-    return failure();
-  auto *entryBlock = &src->front();
-  ArrayRef<Type> callableResultTypes = callable.getCallableResults(src);
-
-  // Make sure that the number of arguments and results matchup between the call
-  // and the region.
-  SmallVector<Value *, 8> callOperands(call.getArgOperands());
-  SmallVector<Value *, 8> callResults(call.getOperation()->getResults());
-  if (callOperands.size() != entryBlock->getNumArguments() ||
-      callResults.size() != callableResultTypes.size())
-    return failure();
-
-  // A set of cast operations generated to matchup the signature of the region
-  // with the signature of the call.
-  SmallVector<Operation *, 4> castOps;
-  castOps.reserve(callOperands.size() + callResults.size());
-
-  // Functor used to cleanup generated state on failure.
-  auto cleanupState = [&] {
-    for (auto *op : castOps) {
-      op->getResult(0)->replaceAllUsesWith(op->getOperand(0));
-      op->erase();
-    }
-    return failure();
-  };
-
-  // Builder used for any conversion operations that need to be materialized.
-  OpBuilder castBuilder(call);
-  Location castLoc = call.getLoc();
-  auto *callInterface = interface.getInterfaceFor(call.getDialect());
-
-  // Map the provided call operands to the arguments of the region.
-  BlockAndValueMapping mapper;
-  for (unsigned i = 0, e = callOperands.size(); i != e; ++i) {
-    BlockArgument *regionArg = entryBlock->getArgument(i);
-    Value *operand = callOperands[i];
-
-    // If the call operand doesn't match the expected region argument, try to
-    // generate a cast.
-    Type regionArgType = regionArg->getType();
-    if (operand->getType() != regionArgType) {
-      if (!(operand = materializeConversion(callInterface, castOps, castBuilder,
-                                            operand, regionArgType, castLoc)))
-        return cleanupState();
-    }
-    mapper.map(regionArg, operand);
-  }
-
-  // Ensure that the resultant values of the call, match the callable.
-  castBuilder.setInsertionPointAfter(call);
-  for (unsigned i = 0, e = callResults.size(); i != e; ++i) {
-    Value *callResult = callResults[i];
-    if (callResult->getType() == callableResultTypes[i])
-      continue;
-
-    // Generate a conversion that will produce the original type, so that the IR
-    // is still valid after the original call gets replaced.
-    Value *castResult =
-        materializeConversion(callInterface, castOps, castBuilder, callResult,
-                              callResult->getType(), castLoc);
-    if (!castResult)
-      return cleanupState();
-    callResult->replaceAllUsesWith(castResult);
-    castResult->getDefiningOp()->replaceUsesOfWith(castResult, callResult);
-  }
-
-  // Attempt to inline the call.
-  if (failed(inlineRegion(interface, src, call, mapper, callResults,
-                          call.getLoc(), shouldCloneInlinedRegion)))
-    return cleanupState();
-  return success();
-}
diff --git a/third_party/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/third_party/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
deleted file mode 100644
index fd803390ce7..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
+++ /dev/null
@@ -1,489 +0,0 @@
-//===- LoopFusionUtils.cpp ---- Utilities for loop fusion ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements loop fusion transformation utility functions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/LoopFusionUtils.h"
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Operation.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "loop-fusion-utils"
-
-using namespace mlir;
-
-// Gathers all load and store memref accesses in 'opA' into 'values', where
-// 'values[memref] == true' for each store operation.
-static void getLoadAndStoreMemRefAccesses(Operation *opA,
-                                          DenseMap<Value *, bool> &values) {
-  opA->walk([&](Operation *op) {
-    if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
-      if (values.count(loadOp.getMemRef()) == 0)
-        values[loadOp.getMemRef()] = false;
-    } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
-      values[storeOp.getMemRef()] = true;
-    }
-  });
-}
-
-// Returns true if 'op' is a load or store operation which access an memref
-// accessed 'values' and at least one of the access is a store operation.
-// Returns false otherwise.
-static bool isDependentLoadOrStoreOp(Operation *op,
-                                     DenseMap<Value *, bool> &values) {
-  if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
-    return values.count(loadOp.getMemRef()) > 0 &&
-           values[loadOp.getMemRef()] == true;
-  } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
-    return values.count(storeOp.getMemRef()) > 0;
-  }
-  return false;
-}
-
-// Returns the first operation in range ('opA', 'opB') which has a data
-// dependence on 'opA'. Returns 'nullptr' of no dependence exists.
-static Operation *getFirstDependentOpInRange(Operation *opA, Operation *opB) {
-  // Record memref values from all loads/store in loop nest rooted at 'opA'.
-  // Map from memref value to bool which is true if store, false otherwise.
-  DenseMap<Value *, bool> values;
-  getLoadAndStoreMemRefAccesses(opA, values);
-
-  // For each 'opX' in block in range ('opA', 'opB'), check if there is a data
-  // dependence from 'opA' to 'opX' ('opA' and 'opX' access the same memref
-  // and at least one of the accesses is a store).
-  Operation *firstDepOp = nullptr;
-  for (Block::iterator it = std::next(Block::iterator(opA));
-       it != Block::iterator(opB); ++it) {
-    Operation *opX = &(*it);
-    opX->walk([&](Operation *op) {
-      if (!firstDepOp && isDependentLoadOrStoreOp(op, values))
-        firstDepOp = opX;
-    });
-    if (firstDepOp)
-      break;
-  }
-  return firstDepOp;
-}
-
-// Returns the last operation 'opX' in range ('opA', 'opB'), for which there
-// exists a data dependence from 'opX' to 'opB'.
-// Returns 'nullptr' of no dependence exists.
-static Operation *getLastDependentOpInRange(Operation *opA, Operation *opB) {
-  // Record memref values from all loads/store in loop nest rooted at 'opB'.
-  // Map from memref value to bool which is true if store, false otherwise.
-  DenseMap<Value *, bool> values;
-  getLoadAndStoreMemRefAccesses(opB, values);
-
-  // For each 'opX' in block in range ('opA', 'opB') in reverse order,
-  // check if there is a data dependence from 'opX' to 'opB':
-  // *) 'opX' and 'opB' access the same memref and at least one of the accesses
-  //    is a store.
-  // *) 'opX' produces an SSA Value which is used by 'opB'.
-  Operation *lastDepOp = nullptr;
-  for (Block::reverse_iterator it = std::next(Block::reverse_iterator(opB));
-       it != Block::reverse_iterator(opA); ++it) {
-    Operation *opX = &(*it);
-    opX->walk([&](Operation *op) {
-      if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op)) {
-        if (isDependentLoadOrStoreOp(op, values)) {
-          lastDepOp = opX;
-          return WalkResult::interrupt();
-        }
-        return WalkResult::advance();
-      }
-      for (auto *value : op->getResults()) {
-        for (auto *user : value->getUsers()) {
-          SmallVector<AffineForOp, 4> loops;
-          // Check if any loop in loop nest surrounding 'user' is 'opB'.
-          getLoopIVs(*user, &loops);
-          if (llvm::is_contained(loops, cast<AffineForOp>(opB))) {
-            lastDepOp = opX;
-            return WalkResult::interrupt();
-          }
-        }
-      }
-      return WalkResult::advance();
-    });
-    if (lastDepOp)
-      break;
-  }
-  return lastDepOp;
-}
-
-// Computes and returns an insertion point operation, before which the
-// the fused <srcForOp, dstForOp> loop nest can be inserted while preserving
-// dependences. Returns nullptr if no such insertion point is found.
-static Operation *getFusedLoopNestInsertionPoint(AffineForOp srcForOp,
-                                                 AffineForOp dstForOp) {
-  bool isSrcForOpBeforeDstForOp =
-      srcForOp.getOperation()->isBeforeInBlock(dstForOp.getOperation());
-  auto forOpA = isSrcForOpBeforeDstForOp ? srcForOp : dstForOp;
-  auto forOpB = isSrcForOpBeforeDstForOp ? dstForOp : srcForOp;
-
-  auto *firstDepOpA =
-      getFirstDependentOpInRange(forOpA.getOperation(), forOpB.getOperation());
-  auto *lastDepOpB =
-      getLastDependentOpInRange(forOpA.getOperation(), forOpB.getOperation());
-  // Block:
-  //      ...
-  //  |-- opA
-  //  |   ...
-  //  |   lastDepOpB --|
-  //  |   ...          |
-  //  |-> firstDepOpA  |
-  //      ...          |
-  //      opB <---------
-  //
-  // Valid insertion point range: (lastDepOpB, firstDepOpA)
-  //
-  if (firstDepOpA != nullptr) {
-    if (lastDepOpB != nullptr) {
-      if (firstDepOpA->isBeforeInBlock(lastDepOpB) || firstDepOpA == lastDepOpB)
-        // No valid insertion point exists which preserves dependences.
-        return nullptr;
-    }
-    // Return insertion point in valid range closest to 'opB'.
-    // TODO(andydavis) Consider other insertion points in valid range.
-    return firstDepOpA;
-  }
-  // No dependences from 'opA' to operation in range ('opA', 'opB'), return
-  // 'opB' insertion point.
-  return forOpB.getOperation();
-}
-
-// Gathers all load and store ops in loop nest rooted at 'forOp' into
-// 'loadAndStoreOps'.
-static bool
-gatherLoadsAndStores(AffineForOp forOp,
-                     SmallVectorImpl<Operation *> &loadAndStoreOps) {
-  bool hasIfOp = false;
-  forOp.walk([&](Operation *op) {
-    if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op))
-      loadAndStoreOps.push_back(op);
-    else if (isa<AffineIfOp>(op))
-      hasIfOp = true;
-  });
-  return !hasIfOp;
-}
-
-// TODO(andydavis) Prevent fusion of loop nests with side-effecting operations.
-FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
-                                unsigned dstLoopDepth,
-                                ComputationSliceState *srcSlice) {
-  // Return 'failure' if 'dstLoopDepth == 0'.
-  if (dstLoopDepth == 0) {
-    LLVM_DEBUG(llvm::dbgs() << "Cannot fuse loop nests at depth 0\n.");
-    return FusionResult::FailPrecondition;
-  }
-  // Return 'failure' if 'srcForOp' and 'dstForOp' are not in the same block.
-  auto *block = srcForOp.getOperation()->getBlock();
-  if (block != dstForOp.getOperation()->getBlock()) {
-    LLVM_DEBUG(llvm::dbgs() << "Cannot fuse loop nests in different blocks\n.");
-    return FusionResult::FailPrecondition;
-  }
-
-  // Return 'failure' if no valid insertion point for fused loop nest in 'block'
-  // exists which would preserve dependences.
-  if (!getFusedLoopNestInsertionPoint(srcForOp, dstForOp)) {
-    LLVM_DEBUG(llvm::dbgs() << "Fusion would violate dependences in block\n.");
-    return FusionResult::FailBlockDependence;
-  }
-
-  // Check if 'srcForOp' precedes 'dstForOp' in 'block'.
-  bool isSrcForOpBeforeDstForOp =
-      srcForOp.getOperation()->isBeforeInBlock(dstForOp.getOperation());
-  // 'forOpA' executes before 'forOpB' in 'block'.
-  auto forOpA = isSrcForOpBeforeDstForOp ? srcForOp : dstForOp;
-  auto forOpB = isSrcForOpBeforeDstForOp ? dstForOp : srcForOp;
-
-  // Gather all load and store from 'forOpA' which precedes 'forOpB' in 'block'.
-  SmallVector<Operation *, 4> opsA;
-  if (!gatherLoadsAndStores(forOpA, opsA)) {
-    LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
-    return FusionResult::FailPrecondition;
-  }
-
-  // Gather all load and store from 'forOpB' which succeeds 'forOpA' in 'block'.
-  SmallVector<Operation *, 4> opsB;
-  if (!gatherLoadsAndStores(forOpB, opsB)) {
-    LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
-    return FusionResult::FailPrecondition;
-  }
-
-  // Calculate the number of common loops surrounding 'srcForOp' and 'dstForOp'.
-  unsigned numCommonLoops = mlir::getNumCommonSurroundingLoops(
-      *srcForOp.getOperation(), *dstForOp.getOperation());
-
-  // Compute union of computation slices computed between all pairs of ops
-  // from 'forOpA' and 'forOpB'.
-  if (failed(mlir::computeSliceUnion(opsA, opsB, dstLoopDepth, numCommonLoops,
-                                     isSrcForOpBeforeDstForOp, srcSlice))) {
-    LLVM_DEBUG(llvm::dbgs() << "computeSliceUnion failed\n");
-    return FusionResult::FailPrecondition;
-  }
-
-  return FusionResult::Success;
-}
-
-/// Collect loop nest statistics (eg. loop trip count and operation count)
-/// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
-/// returns false otherwise.
-bool mlir::getLoopNestStats(AffineForOp forOpRoot, LoopNestStats *stats) {
-  auto walkResult = forOpRoot.walk([&](AffineForOp forOp) {
-    auto *childForOp = forOp.getOperation();
-    auto *parentForOp = forOp.getParentOp();
-    if (!llvm::isa<FuncOp>(parentForOp)) {
-      if (!isa<AffineForOp>(parentForOp)) {
-        LLVM_DEBUG(llvm::dbgs() << "Expected parent AffineForOp");
-        return WalkResult::interrupt();
-      }
-      // Add mapping to 'forOp' from its parent AffineForOp.
-      stats->loopMap[parentForOp].push_back(forOp);
-    }
-
-    // Record the number of op operations in the body of 'forOp'.
-    unsigned count = 0;
-    stats->opCountMap[childForOp] = 0;
-    for (auto &op : *forOp.getBody()) {
-      if (!isa<AffineForOp>(op) && !isa<AffineIfOp>(op))
-        ++count;
-    }
-    stats->opCountMap[childForOp] = count;
-
-    // Record trip count for 'forOp'. Set flag if trip count is not
-    // constant.
-    Optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
-    if (!maybeConstTripCount.hasValue()) {
-      // Currently only constant trip count loop nests are supported.
-      LLVM_DEBUG(llvm::dbgs() << "Non-constant trip count unsupported");
-      return WalkResult::interrupt();
-    }
-
-    stats->tripCountMap[childForOp] = maybeConstTripCount.getValue();
-    return WalkResult::advance();
-  });
-  return !walkResult.wasInterrupted();
-}
-
-// Computes the total cost of the loop nest rooted at 'forOp'.
-// Currently, the total cost is computed by counting the total operation
-// instance count (i.e. total number of operations in the loop bodyloop
-// operation count * loop trip count) for the entire loop nest.
-// If 'tripCountOverrideMap' is non-null, overrides the trip count for loops
-// specified in the map when computing the total op instance count.
-// NOTEs: 1) This is used to compute the cost of computation slices, which are
-// sliced along the iteration dimension, and thus reduce the trip count.
-// If 'computeCostMap' is non-null, the total op count for forOps specified
-// in the map is increased (not overridden) by adding the op count from the
-// map to the existing op count for the for loop. This is done before
-// multiplying by the loop's trip count, and is used to model the cost of
-// inserting a sliced loop nest of known cost into the loop's body.
-// 2) This is also used to compute the cost of fusing a slice of some loop nest
-// within another loop.
-static int64_t getComputeCostHelper(
-    Operation *forOp, LoopNestStats &stats,
-    llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountOverrideMap,
-    DenseMap<Operation *, int64_t> *computeCostMap) {
-  // 'opCount' is the total number operations in one iteration of 'forOp' body,
-  // minus terminator op which is a no-op.
-  int64_t opCount = stats.opCountMap[forOp] - 1;
-  if (stats.loopMap.count(forOp) > 0) {
-    for (auto childForOp : stats.loopMap[forOp]) {
-      opCount += getComputeCostHelper(childForOp.getOperation(), stats,
-                                      tripCountOverrideMap, computeCostMap);
-    }
-  }
-  // Add in additional op instances from slice (if specified in map).
-  if (computeCostMap != nullptr) {
-    auto it = computeCostMap->find(forOp);
-    if (it != computeCostMap->end()) {
-      opCount += it->second;
-    }
-  }
-  // Override trip count (if specified in map).
-  int64_t tripCount = stats.tripCountMap[forOp];
-  if (tripCountOverrideMap != nullptr) {
-    auto it = tripCountOverrideMap->find(forOp);
-    if (it != tripCountOverrideMap->end()) {
-      tripCount = it->second;
-    }
-  }
-  // Returns the total number of dynamic instances of operations in loop body.
-  return tripCount * opCount;
-}
-
-// TODO(andydavis,b/126426796): extend this to handle multiple result maps.
-static Optional<uint64_t> getConstDifference(AffineMap lbMap, AffineMap ubMap) {
-  assert(lbMap.getNumResults() == 1 && "expected single result bound map");
-  assert(ubMap.getNumResults() == 1 && "expected single result bound map");
-  assert(lbMap.getNumDims() == ubMap.getNumDims());
-  assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
-  AffineExpr lbExpr(lbMap.getResult(0));
-  AffineExpr ubExpr(ubMap.getResult(0));
-  auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
-                                         lbMap.getNumSymbols());
-  auto cExpr = loopSpanExpr.dyn_cast<AffineConstantExpr>();
-  if (!cExpr)
-    return None;
-  return cExpr.getValue();
-}
-
-// Return the number of iterations in the given slice.
-static uint64_t getSliceIterationCount(
-    const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
-  uint64_t iterCount = 1;
-  for (const auto &count : sliceTripCountMap) {
-    iterCount *= count.second;
-  }
-  return iterCount;
-}
-
-// Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
-// nest surrounding represented by slice loop bounds in 'slice'.
-// Returns true on success, false otherwise (if a non-constant trip count
-// was encountered).
-// TODO(andydavis) Make this work with non-unit step loops.
-static bool buildSliceTripCountMap(
-    ComputationSliceState *slice,
-    llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
-  unsigned numSrcLoopIVs = slice->ivs.size();
-  // Populate map from AffineForOp -> trip count
-  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
-    AffineForOp forOp = getForInductionVarOwner(slice->ivs[i]);
-    auto *op = forOp.getOperation();
-    AffineMap lbMap = slice->lbs[i];
-    AffineMap ubMap = slice->ubs[i];
-    if (lbMap == AffineMap() || ubMap == AffineMap()) {
-      // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
-      if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
-        (*tripCountMap)[op] =
-            forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
-        continue;
-      }
-      Optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
-      if (maybeConstTripCount.hasValue()) {
-        (*tripCountMap)[op] = maybeConstTripCount.getValue();
-        continue;
-      }
-      return false;
-    }
-    Optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
-    // Slice bounds are created with a constant ub - lb difference.
-    if (!tripCount.hasValue())
-      return false;
-    (*tripCountMap)[op] = tripCount.getValue();
-  }
-  return true;
-}
-
-/// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.
-/// Currently, the total cost is computed by counting the total operation
-/// instance count (i.e. total number of operations in the loop body * loop
-/// trip count) for the entire loop nest.
-int64_t mlir::getComputeCost(AffineForOp forOp, LoopNestStats &stats) {
-  return getComputeCostHelper(forOp.getOperation(), stats,
-                              /*tripCountOverrideMap=*/nullptr,
-                              /*computeCostMap=*/nullptr);
-}
-
-/// Computes and returns in 'computeCost', the total compute cost of fusing the
-/// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently,
-/// the total cost is computed by counting the total operation instance count
-/// (i.e. total number of operations in the loop body * loop trip count) for
-/// the entire loop nest.
-bool mlir::getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
-                                AffineForOp dstForOp, LoopNestStats &dstStats,
-                                ComputationSliceState *slice,
-                                int64_t *computeCost) {
-  llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
-  DenseMap<Operation *, int64_t> computeCostMap;
-
-  // Build trip count map for computation slice.
-  if (!buildSliceTripCountMap(slice, &sliceTripCountMap))
-    return false;
-  // Checks whether a store to load forwarding will happen.
-  int64_t sliceIterationCount = getSliceIterationCount(sliceTripCountMap);
-  assert(sliceIterationCount > 0);
-  bool storeLoadFwdGuaranteed = (sliceIterationCount == 1);
-  auto *insertPointParent = slice->insertPoint->getParentOp();
-
-  // The store and loads to this memref will disappear.
-  // TODO(andydavis) Add load coalescing to memref data flow opt pass.
-  if (storeLoadFwdGuaranteed) {
-    // Subtract from operation count the loads/store we expect load/store
-    // forwarding to remove.
-    unsigned storeCount = 0;
-    llvm::SmallDenseSet<Value *, 4> storeMemrefs;
-    srcForOp.walk([&](Operation *op) {
-      if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
-        storeMemrefs.insert(storeOp.getMemRef());
-        ++storeCount;
-      }
-    });
-    // Subtract out any store ops in single-iteration src slice loop nest.
-    if (storeCount > 0)
-      computeCostMap[insertPointParent] = -storeCount;
-    // Subtract out any load users of 'storeMemrefs' nested below
-    // 'insertPointParent'.
-    for (auto *value : storeMemrefs) {
-      for (auto *user : value->getUsers()) {
-        if (auto loadOp = dyn_cast<AffineLoadOp>(user)) {
-          SmallVector<AffineForOp, 4> loops;
-          // Check if any loop in loop nest surrounding 'user' is
-          // 'insertPointParent'.
-          getLoopIVs(*user, &loops);
-          if (llvm::is_contained(loops, cast<AffineForOp>(insertPointParent))) {
-            if (auto forOp =
-                    dyn_cast_or_null<AffineForOp>(user->getParentOp())) {
-              if (computeCostMap.count(forOp) == 0)
-                computeCostMap[forOp] = 0;
-              computeCostMap[forOp] -= 1;
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // Compute op instance count for the src loop nest with iteration slicing.
-  int64_t sliceComputeCost = getComputeCostHelper(
-      srcForOp.getOperation(), srcStats, &sliceTripCountMap, &computeCostMap);
-
-  // Compute cost of fusion for this depth.
-  computeCostMap[insertPointParent] = sliceComputeCost;
-
-  *computeCost =
-      getComputeCostHelper(dstForOp.getOperation(), dstStats,
-                           /*tripCountOverrideMap=*/nullptr, &computeCostMap);
-  return true;
-}
diff --git a/third_party/mlir/lib/Transforms/Utils/LoopUtils.cpp b/third_party/mlir/lib/Transforms/Utils/LoopUtils.cpp
deleted file mode 100644
index 50248b01359..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ /dev/null
@@ -1,1790 +0,0 @@
-//===- LoopUtils.cpp ---- Misc utilities for loop transformation ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements miscellaneous loop transformation routines.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/LoopUtils.h"
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Function.h"
-#include "mlir/Transforms/RegionUtils.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "LoopUtils"
-
-using namespace mlir;
-using llvm::SetVector;
-using llvm::SmallMapVector;
-
-/// Computes the cleanup loop lower bound of the loop being unrolled with
-/// the specified unroll factor; this bound will also be upper bound of the main
-/// part of the unrolled loop. Computes the bound as an AffineMap with its
-/// operands or a null map when the trip count can't be expressed as an affine
-/// expression.
-void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
-                                    AffineMap *map,
-                                    SmallVectorImpl<Value *> *operands,
-                                    OpBuilder &b) {
-  auto lbMap = forOp.getLowerBoundMap();
-
-  // Single result lower bound map only.
-  if (lbMap.getNumResults() != 1) {
-    *map = AffineMap();
-    return;
-  }
-
-  AffineMap tripCountMap;
-  SmallVector<Value *, 4> tripCountOperands;
-  buildTripCountMapAndOperands(forOp, &tripCountMap, &tripCountOperands);
-
-  // Sometimes the trip count cannot be expressed as an affine expression.
-  if (!tripCountMap) {
-    *map = AffineMap();
-    return;
-  }
-
-  unsigned step = forOp.getStep();
-  auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap,
-                                    forOp.getLowerBoundOperands());
-
-  // For each upper bound expr, get the range.
-  // Eg: affine.for %i = lb to min (ub1, ub2),
-  // where tripCountExprs yield (tr1, tr2), we create affine.apply's:
-  // lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all
-  // these affine.apply's make up the cleanup loop lower bound.
-  SmallVector<AffineExpr, 4> bumpExprs(tripCountMap.getNumResults());
-  SmallVector<Value *, 4> bumpValues(tripCountMap.getNumResults());
-  for (unsigned i = 0, e = tripCountMap.getNumResults(); i < e; i++) {
-    auto tripCountExpr = tripCountMap.getResult(i);
-    bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
-    auto bumpMap = AffineMap::get(tripCountMap.getNumDims(),
-                                  tripCountMap.getNumSymbols(), bumpExprs[i]);
-    bumpValues[i] =
-        b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, tripCountOperands);
-  }
-
-  SmallVector<AffineExpr, 4> newUbExprs(tripCountMap.getNumResults());
-  for (unsigned i = 0, e = bumpExprs.size(); i < e; i++)
-    newUbExprs[i] = b.getAffineDimExpr(0) + b.getAffineDimExpr(i + 1);
-
-  operands->clear();
-  operands->push_back(lb);
-  operands->append(bumpValues.begin(), bumpValues.end());
-  *map = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs);
-  // Simplify the map + operands.
-  fullyComposeAffineMapAndOperands(map, operands);
-  *map = simplifyAffineMap(*map);
-  canonicalizeMapAndOperands(map, operands);
-  // Remove any affine.apply's that became dead from the simplification above.
-  for (auto *v : bumpValues) {
-    if (v->use_empty()) {
-      v->getDefiningOp()->erase();
-    }
-  }
-  if (lb.use_empty())
-    lb.erase();
-}
-
-/// Promotes the loop body of a forOp to its containing block if the forOp
-/// was known to have a single iteration.
-// TODO(bondhugula): extend this for arbitrary affine bounds.
-LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
-  Optional<uint64_t> tripCount = getConstantTripCount(forOp);
-  if (!tripCount.hasValue() || tripCount.getValue() != 1)
-    return failure();
-
-  // TODO(mlir-team): there is no builder for a max.
-  if (forOp.getLowerBoundMap().getNumResults() != 1)
-    return failure();
-
-  // Replaces all IV uses to its single iteration value.
-  auto *iv = forOp.getInductionVar();
-  Operation *op = forOp.getOperation();
-  if (!iv->use_empty()) {
-    if (forOp.hasConstantLowerBound()) {
-      OpBuilder topBuilder(op->getParentOfType<FuncOp>().getBody());
-      auto constOp = topBuilder.create<ConstantIndexOp>(
-          forOp.getLoc(), forOp.getConstantLowerBound());
-      iv->replaceAllUsesWith(constOp);
-    } else {
-      AffineBound lb = forOp.getLowerBound();
-      SmallVector<Value *, 4> lbOperands(lb.operand_begin(), lb.operand_end());
-      OpBuilder builder(op->getBlock(), Block::iterator(op));
-      if (lb.getMap() == builder.getDimIdentityMap()) {
-        // No need of generating an affine.apply.
-        iv->replaceAllUsesWith(lbOperands[0]);
-      } else {
-        auto affineApplyOp = builder.create<AffineApplyOp>(
-            op->getLoc(), lb.getMap(), lbOperands);
-        iv->replaceAllUsesWith(affineApplyOp);
-      }
-    }
-  }
-  // Move the loop body operations, except for terminator, to the loop's
-  // containing block.
-  auto *block = op->getBlock();
-  forOp.getBody()->getOperations().back().erase();
-  block->getOperations().splice(Block::iterator(op),
-                                forOp.getBody()->getOperations());
-  forOp.erase();
-  return success();
-}
-
-/// Promotes all single iteration for op's in the FuncOp, i.e., moves
-/// their body into the containing Block.
-void mlir::promoteSingleIterationLoops(FuncOp f) {
-  // Gathers all innermost loops through a post order pruned walk.
-  f.walk([](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
-}
-
-/// Generates a 'affine.for' op with the specified lower and upper bounds
-/// while generating the right IV remappings for the shifted operations. The
-/// operation blocks that go into the loop are specified in instGroupQueue
-/// starting from the specified offset, and in that order; the first element of
-/// the pair specifies the shift applied to that group of operations; note
-/// that the shift is multiplied by the loop step before being applied. Returns
-/// nullptr if the generated loop simplifies to a single iteration one.
-static AffineForOp
-generateLoop(AffineMap lbMap, AffineMap ubMap,
-             const std::vector<std::pair<uint64_t, ArrayRef<Operation *>>>
-                 &instGroupQueue,
-             unsigned offset, AffineForOp srcForInst, OpBuilder b) {
-  SmallVector<Value *, 4> lbOperands(srcForInst.getLowerBoundOperands());
-  SmallVector<Value *, 4> ubOperands(srcForInst.getUpperBoundOperands());
-
-  assert(lbMap.getNumInputs() == lbOperands.size());
-  assert(ubMap.getNumInputs() == ubOperands.size());
-
-  auto loopChunk =
-      b.create<AffineForOp>(srcForInst.getLoc(), lbOperands, lbMap, ubOperands,
-                            ubMap, srcForInst.getStep());
-  auto *loopChunkIV = loopChunk.getInductionVar();
-  auto *srcIV = srcForInst.getInductionVar();
-
-  BlockAndValueMapping operandMap;
-
-  OpBuilder bodyBuilder = loopChunk.getBodyBuilder();
-  for (auto it = instGroupQueue.begin() + offset, e = instGroupQueue.end();
-       it != e; ++it) {
-    uint64_t shift = it->first;
-    auto insts = it->second;
-    // All 'same shift' operations get added with their operands being
-    // remapped to results of cloned operations, and their IV used remapped.
-    // Generate the remapping if the shift is not zero: remappedIV = newIV -
-    // shift.
-    if (!srcIV->use_empty() && shift != 0) {
-      auto ivRemap = bodyBuilder.create<AffineApplyOp>(
-          srcForInst.getLoc(),
-          bodyBuilder.getSingleDimShiftAffineMap(
-              -static_cast<int64_t>(srcForInst.getStep() * shift)),
-          loopChunkIV);
-      operandMap.map(srcIV, ivRemap);
-    } else {
-      operandMap.map(srcIV, loopChunkIV);
-    }
-    for (auto *op : insts) {
-      if (!isa<AffineTerminatorOp>(op))
-        bodyBuilder.clone(*op, operandMap);
-    }
-  };
-  if (succeeded(promoteIfSingleIteration(loopChunk)))
-    return AffineForOp();
-  return loopChunk;
-}
-
-/// Skew the operations in the body of a 'affine.for' operation with the
-/// specified operation-wise shifts. The shifts are with respect to the
-/// original execution order, and are multiplied by the loop 'step' before being
-/// applied. A shift of zero for each operation will lead to no change.
-// The skewing of operations with respect to one another can be used for
-// example to allow overlap of asynchronous operations (such as DMA
-// communication) with computation, or just relative shifting of operations
-// for better register reuse, locality or parallelism. As such, the shifts are
-// typically expected to be at most of the order of the number of operations.
-// This method should not be used as a substitute for loop distribution/fission.
-// This method uses an algorithm// in time linear in the number of operations
-// in the body of the for loop - (using the 'sweep line' paradigm). This method
-// asserts preservation of SSA dominance. A check for that as well as that for
-// memory-based dependence preservation check rests with the users of this
-// method.
-LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
-                                 bool unrollPrologueEpilogue) {
-  if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
-    return success();
-
-  // If the trip counts aren't constant, we would need versioning and
-  // conditional guards (or context information to prevent such versioning). The
-  // better way to pipeline for such loops is to first tile them and extract
-  // constant trip count "full tiles" before applying this.
-  auto mayBeConstTripCount = getConstantTripCount(forOp);
-  if (!mayBeConstTripCount.hasValue()) {
-    LLVM_DEBUG(forOp.emitRemark("non-constant trip count loop not handled"));
-    return success();
-  }
-  uint64_t tripCount = mayBeConstTripCount.getValue();
-
-  assert(isInstwiseShiftValid(forOp, shifts) &&
-         "shifts will lead to an invalid transformation\n");
-
-  int64_t step = forOp.getStep();
-
-  unsigned numChildInsts = forOp.getBody()->getOperations().size();
-
-  // Do a linear time (counting) sort for the shifts.
-  uint64_t maxShift = 0;
-  for (unsigned i = 0; i < numChildInsts; i++) {
-    maxShift = std::max(maxShift, shifts[i]);
-  }
-  // Such large shifts are not the typical use case.
-  if (maxShift >= numChildInsts) {
-    forOp.emitWarning("not shifting because shifts are unrealistically large");
-    return success();
-  }
-
-  // An array of operation groups sorted by shift amount; each group has all
-  // operations with the same shift in the order in which they appear in the
-  // body of the 'affine.for' op.
-  std::vector<std::vector<Operation *>> sortedInstGroups(maxShift + 1);
-  unsigned pos = 0;
-  for (auto &op : *forOp.getBody()) {
-    auto shift = shifts[pos++];
-    sortedInstGroups[shift].push_back(&op);
-  }
-
-  // Unless the shifts have a specific pattern (which actually would be the
-  // common use case), prologue and epilogue are not meaningfully defined.
-  // Nevertheless, if 'unrollPrologueEpilogue' is set, we will treat the first
-  // loop generated as the prologue and the last as epilogue and unroll these
-  // fully.
-  AffineForOp prologue;
-  AffineForOp epilogue;
-
-  // Do a sweep over the sorted shifts while storing open groups in a
-  // vector, and generating loop portions as necessary during the sweep. A block
-  // of operations is paired with its shift.
-  std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> instGroupQueue;
-
-  auto origLbMap = forOp.getLowerBoundMap();
-  uint64_t lbShift = 0;
-  OpBuilder b(forOp.getOperation());
-  for (uint64_t d = 0, e = sortedInstGroups.size(); d < e; ++d) {
-    // If nothing is shifted by d, continue.
-    if (sortedInstGroups[d].empty())
-      continue;
-    if (!instGroupQueue.empty()) {
-      assert(d >= 1 &&
-             "Queue expected to be empty when the first block is found");
-      // The interval for which the loop needs to be generated here is:
-      // [lbShift, min(lbShift + tripCount, d)) and the body of the
-      // loop needs to have all operations in instQueue in that order.
-      AffineForOp res;
-      if (lbShift + tripCount * step < d * step) {
-        res = generateLoop(
-            b.getShiftedAffineMap(origLbMap, lbShift),
-            b.getShiftedAffineMap(origLbMap, lbShift + tripCount * step),
-            instGroupQueue, 0, forOp, b);
-        // Entire loop for the queued op groups generated, empty it.
-        instGroupQueue.clear();
-        lbShift += tripCount * step;
-      } else {
-        res = generateLoop(b.getShiftedAffineMap(origLbMap, lbShift),
-                           b.getShiftedAffineMap(origLbMap, d), instGroupQueue,
-                           0, forOp, b);
-        lbShift = d * step;
-      }
-      if (!prologue && res)
-        prologue = res;
-      epilogue = res;
-    } else {
-      // Start of first interval.
-      lbShift = d * step;
-    }
-    // Augment the list of operations that get into the current open interval.
-    instGroupQueue.push_back({d, sortedInstGroups[d]});
-  }
-
-  // Those operations groups left in the queue now need to be processed (FIFO)
-  // and their loops completed.
-  for (unsigned i = 0, e = instGroupQueue.size(); i < e; ++i) {
-    uint64_t ubShift = (instGroupQueue[i].first + tripCount) * step;
-    epilogue = generateLoop(b.getShiftedAffineMap(origLbMap, lbShift),
-                            b.getShiftedAffineMap(origLbMap, ubShift),
-                            instGroupQueue, i, forOp, b);
-    lbShift = ubShift;
-    if (!prologue)
-      prologue = epilogue;
-  }
-
-  // Erase the original for op.
-  forOp.erase();
-
-  if (unrollPrologueEpilogue && prologue)
-    loopUnrollFull(prologue);
-  if (unrollPrologueEpilogue && !epilogue &&
-      epilogue.getOperation() != prologue.getOperation())
-    loopUnrollFull(epilogue);
-
-  return success();
-}
-
-// Collect perfectly nested loops starting from `rootForOps`.  Loops are
-// perfectly nested if each loop is the first and only non-terminator operation
-// in the parent loop.  Collect at most `maxLoops` loops and append them to
-// `forOps`.
-template <typename T>
-void getPerfectlyNestedLoopsImpl(
-    SmallVectorImpl<T> &forOps, T rootForOp,
-    unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
-  for (unsigned i = 0; i < maxLoops; ++i) {
-    forOps.push_back(rootForOp);
-    Block &body = rootForOp.region().front();
-    if (body.begin() != std::prev(body.end(), 2))
-      return;
-
-    rootForOp = dyn_cast<T>(&body.front());
-    if (!rootForOp)
-      return;
-  }
-}
-
-/// Get perfectly nested sequence of loops starting at root of loop nest
-/// (the first op being another AffineFor, and the second op - a terminator).
-/// A loop is perfectly nested iff: the first op in the loop's body is another
-/// AffineForOp, and the second op is a terminator).
-void mlir::getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops,
-                                   AffineForOp root) {
-  getPerfectlyNestedLoopsImpl(nestedLoops, root);
-}
-
-void mlir::getPerfectlyNestedLoops(SmallVectorImpl<loop::ForOp> &nestedLoops,
-                                   loop::ForOp root) {
-  getPerfectlyNestedLoopsImpl(nestedLoops, root);
-}
-
-/// Unrolls this loop completely.
-LogicalResult mlir::loopUnrollFull(AffineForOp forOp) {
-  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
-  if (mayBeConstantTripCount.hasValue()) {
-    uint64_t tripCount = mayBeConstantTripCount.getValue();
-    if (tripCount == 1) {
-      return promoteIfSingleIteration(forOp);
-    }
-    return loopUnrollByFactor(forOp, tripCount);
-  }
-  return failure();
-}
-
-/// Unrolls and jams this loop by the specified factor or by the trip count (if
-/// constant) whichever is lower.
-LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
-                                         uint64_t unrollFactor) {
-  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
-
-  if (mayBeConstantTripCount.hasValue() &&
-      mayBeConstantTripCount.getValue() < unrollFactor)
-    return loopUnrollByFactor(forOp, mayBeConstantTripCount.getValue());
-  return loopUnrollByFactor(forOp, unrollFactor);
-}
-
-/// Unrolls this loop by the specified factor. Returns success if the loop
-/// is successfully unrolled.
-LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
-                                       uint64_t unrollFactor) {
-  assert(unrollFactor >= 1 && "unroll factor should be >= 1");
-
-  if (unrollFactor == 1)
-    return promoteIfSingleIteration(forOp);
-
-  if (forOp.getBody()->empty() ||
-      forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
-    return failure();
-
-  // Loops where the lower bound is a max expression isn't supported for
-  // unrolling since the trip count can be expressed as an affine function when
-  // both the lower bound and the upper bound are multi-result maps. However,
-  // one meaningful way to do such unrolling would be to specialize the loop for
-  // the 'hotspot' case and unroll that hotspot.
-  if (forOp.getLowerBoundMap().getNumResults() != 1)
-    return failure();
-
-  // If the trip count is lower than the unroll factor, no unrolled body.
-  // TODO(bondhugula): option to specify cleanup loop unrolling.
-  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
-  if (mayBeConstantTripCount.hasValue() &&
-      mayBeConstantTripCount.getValue() < unrollFactor)
-    return failure();
-
-  // Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
-  Operation *op = forOp.getOperation();
-  if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
-    OpBuilder builder(op->getBlock(), ++Block::iterator(op));
-    auto cleanupForInst = cast<AffineForOp>(builder.clone(*op));
-    AffineMap cleanupMap;
-    SmallVector<Value *, 4> cleanupOperands;
-    getCleanupLoopLowerBound(forOp, unrollFactor, &cleanupMap, &cleanupOperands,
-                             builder);
-    assert(cleanupMap &&
-           "cleanup loop lower bound map for single result lower bound maps "
-           "can always be determined");
-    cleanupForInst.setLowerBound(cleanupOperands, cleanupMap);
-    // Promote the loop body up if this has turned into a single iteration loop.
-    promoteIfSingleIteration(cleanupForInst);
-
-    // Adjust upper bound of the original loop; this is the same as the lower
-    // bound of the cleanup loop.
-    forOp.setUpperBound(cleanupOperands, cleanupMap);
-  }
-
-  // Scale the step of loop being unrolled by unroll factor.
-  int64_t step = forOp.getStep();
-  forOp.setStep(step * unrollFactor);
-
-  // Builder to insert unrolled bodies just before the terminator of the body of
-  // 'forOp'.
-  OpBuilder builder = forOp.getBodyBuilder();
-
-  // Keep a pointer to the last non-terminator operation in the original block
-  // so that we know what to clone (since we are doing this in-place).
-  Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2);
-
-  // Unroll the contents of 'forOp' (append unrollFactor-1 additional copies).
-  auto *forOpIV = forOp.getInductionVar();
-  for (unsigned i = 1; i < unrollFactor; i++) {
-    BlockAndValueMapping operandMap;
-
-    // If the induction variable is used, create a remapping to the value for
-    // this unrolled instance.
-    if (!forOpIV->use_empty()) {
-      // iv' = iv + 1/2/3...unrollFactor-1;
-      auto d0 = builder.getAffineDimExpr(0);
-      auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
-      auto ivUnroll =
-          builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
-      operandMap.map(forOpIV, ivUnroll);
-    }
-
-    // Clone the original body of 'forOp'.
-    for (auto it = forOp.getBody()->begin(); it != std::next(srcBlockEnd);
-         it++) {
-      builder.clone(*it, operandMap);
-    }
-  }
-
-  // Promote the loop body up if this has turned into a single iteration loop.
-  promoteIfSingleIteration(forOp);
-  return success();
-}
-
-/// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
-/// nested within 'forOpA' as the only non-terminator operation in its block.
-void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
-  auto *forOpAInst = forOpA.getOperation();
-
-  assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
-  auto &forOpABody = forOpA.getBody()->getOperations();
-  auto &forOpBBody = forOpB.getBody()->getOperations();
-
-  // 1) Splice forOpA's non-terminator operations (which is just forOpB) just
-  // before forOpA (in ForOpA's parent's block) this should leave 'forOpA's
-  // body containing only the terminator.
-  forOpAInst->getBlock()->getOperations().splice(Block::iterator(forOpAInst),
-                                                 forOpABody, forOpABody.begin(),
-                                                 std::prev(forOpABody.end()));
-  // 2) Splice forOpB's non-terminator operations into the beginning of forOpA's
-  // body (this leaves forOpB's body containing only the terminator).
-  forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
-                    std::prev(forOpBBody.end()));
-  // 3) Splice forOpA into the beginning of forOpB's body.
-  forOpBBody.splice(forOpBBody.begin(), forOpAInst->getBlock()->getOperations(),
-                    Block::iterator(forOpAInst));
-}
-
-// Checks each dependence component against the permutation to see if the
-// desired loop interchange would violate dependences by making the
-// dependence component lexicographically negative.
-static bool checkLoopInterchangeDependences(
-    const std::vector<llvm::SmallVector<DependenceComponent, 2>> &depCompsVec,
-    ArrayRef<AffineForOp> loops, ArrayRef<unsigned> loopPermMap) {
-  // Invert permutation map.
-  unsigned maxLoopDepth = loops.size();
-  llvm::SmallVector<unsigned, 4> loopPermMapInv;
-  loopPermMapInv.resize(maxLoopDepth);
-  for (unsigned i = 0; i < maxLoopDepth; ++i)
-    loopPermMapInv[loopPermMap[i]] = i;
-
-  // Check each dependence component against the permutation to see if the
-  // desired loop interchange permutation would make the dependence vectors
-  // lexicographically negative.
-  // Example 1: [-1, 1][0, 0]
-  // Example 2: [0, 0][-1, 1]
-  for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) {
-    const llvm::SmallVector<DependenceComponent, 2> &depComps = depCompsVec[i];
-    assert(depComps.size() >= maxLoopDepth);
-    // Check if the first non-zero dependence component is positive.
-    // This iterates through loops in the desired order.
-    for (unsigned j = 0; j < maxLoopDepth; ++j) {
-      unsigned permIndex = loopPermMapInv[j];
-      assert(depComps[permIndex].lb.hasValue());
-      int64_t depCompLb = depComps[permIndex].lb.getValue();
-      if (depCompLb > 0)
-        break;
-      if (depCompLb < 0)
-        return false;
-    }
-  }
-  return true;
-}
-
-/// Checks if the loop interchange permutation 'loopPermMap' of the perfectly
-/// nested sequence of loops in 'loops' would violate dependences.
-bool mlir::isValidLoopInterchangePermutation(ArrayRef<AffineForOp> loops,
-                                             ArrayRef<unsigned> loopPermMap) {
-  // Gather dependence components for dependences between all ops in loop nest
-  // rooted at 'loops[0]', at loop depths in range [1, maxLoopDepth].
-  assert(loopPermMap.size() == loops.size());
-  unsigned maxLoopDepth = loops.size();
-  std::vector<llvm::SmallVector<DependenceComponent, 2>> depCompsVec;
-  getDependenceComponents(loops[0], maxLoopDepth, &depCompsVec);
-  return checkLoopInterchangeDependences(depCompsVec, loops, loopPermMap);
-}
-
-/// Performs a sequence of loop interchanges of loops in perfectly nested
-/// sequence of loops in 'loops', as specified by permutation in 'loopPermMap'.
-unsigned mlir::interchangeLoops(ArrayRef<AffineForOp> loops,
-                                ArrayRef<unsigned> loopPermMap) {
-  Optional<unsigned> loopNestRootIndex;
-  for (int i = loops.size() - 1; i >= 0; --i) {
-    int permIndex = static_cast<int>(loopPermMap[i]);
-    // Store the index of the for loop which will be the new loop nest root.
-    if (permIndex == 0)
-      loopNestRootIndex = i;
-    if (permIndex > i) {
-      // Sink loop 'i' by 'permIndex - i' levels deeper into the loop nest.
-      sinkLoop(loops[i], permIndex - i);
-    }
-  }
-  assert(loopNestRootIndex.hasValue());
-  return loopNestRootIndex.getValue();
-}
-
-// Sinks all sequential loops to the innermost levels (while preserving
-// relative order among them) and moves all parallel loops to the
-// outermost (while again preserving relative order among them).
-AffineForOp mlir::sinkSequentialLoops(AffineForOp forOp) {
-  SmallVector<AffineForOp, 4> loops;
-  getPerfectlyNestedLoops(loops, forOp);
-  if (loops.size() < 2)
-    return forOp;
-
-  // Gather dependence components for dependences between all ops in loop nest
-  // rooted at 'loops[0]', at loop depths in range [1, maxLoopDepth].
-  unsigned maxLoopDepth = loops.size();
-  std::vector<llvm::SmallVector<DependenceComponent, 2>> depCompsVec;
-  getDependenceComponents(loops[0], maxLoopDepth, &depCompsVec);
-
-  // Mark loops as either parallel or sequential.
-  llvm::SmallVector<bool, 8> isParallelLoop(maxLoopDepth, true);
-  for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) {
-    llvm::SmallVector<DependenceComponent, 2> &depComps = depCompsVec[i];
-    assert(depComps.size() >= maxLoopDepth);
-    for (unsigned j = 0; j < maxLoopDepth; ++j) {
-      DependenceComponent &depComp = depComps[j];
-      assert(depComp.lb.hasValue() && depComp.ub.hasValue());
-      if (depComp.lb.getValue() != 0 || depComp.ub.getValue() != 0)
-        isParallelLoop[j] = false;
-    }
-  }
-
-  // Count the number of parallel loops.
-  unsigned numParallelLoops = 0;
-  for (unsigned i = 0, e = isParallelLoop.size(); i < e; ++i)
-    if (isParallelLoop[i])
-      ++numParallelLoops;
-
-  // Compute permutation of loops that sinks sequential loops (and thus raises
-  // parallel loops) while preserving relative order.
-  llvm::SmallVector<unsigned, 4> loopPermMap(maxLoopDepth);
-  unsigned nextSequentialLoop = numParallelLoops;
-  unsigned nextParallelLoop = 0;
-  for (unsigned i = 0; i < maxLoopDepth; ++i) {
-    if (isParallelLoop[i]) {
-      loopPermMap[i] = nextParallelLoop++;
-    } else {
-      loopPermMap[i] = nextSequentialLoop++;
-    }
-  }
-
-  // Check if permutation 'loopPermMap' would violate dependences.
-  if (!checkLoopInterchangeDependences(depCompsVec, loops, loopPermMap))
-    return forOp;
-  // Perform loop interchange according to permutation 'loopPermMap'.
-  unsigned loopNestRootIndex = interchangeLoops(loops, loopPermMap);
-  return loops[loopNestRootIndex];
-}
-
-/// Performs a series of loop interchanges to sink 'forOp' 'loopDepth' levels
-/// deeper in the loop nest.
-void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) {
-  for (unsigned i = 0; i < loopDepth; ++i) {
-    AffineForOp nextForOp = cast<AffineForOp>(forOp.getBody()->front());
-    interchangeLoops(forOp, nextForOp);
-  }
-}
-
-// Factors out common behavior to add a new `iv` (resp. `iv` + `offset`) to the
-// lower (resp. upper) loop bound. When called for both the lower and upper
-// bounds, the resulting IR resembles:
-//
-// ```mlir
-//    affine.for %i = max (`iv, ...) to min (`iv` + `offset`) {
-//      ...
-//    }
-// ```
-static void augmentMapAndBounds(OpBuilder &b, Value *iv, AffineMap *map,
-                                SmallVector<Value *, 4> *operands,
-                                int64_t offset = 0) {
-  auto bounds = llvm::to_vector<4>(map->getResults());
-  bounds.push_back(b.getAffineDimExpr(map->getNumDims()) + offset);
-  operands->insert(operands->begin() + map->getNumDims(), iv);
-  *map = AffineMap::get(map->getNumDims() + 1, map->getNumSymbols(), bounds);
-  canonicalizeMapAndOperands(map, operands);
-}
-
-// Stripmines `forOp` by `factor` and sinks it under each of the `targets`.
-// Stripmine-sink is a primitive building block for generalized tiling of
-// imperfectly nested loops.
-// This transformation is purely mechanical and does not check legality,
-// profitability or even structural correctness. It is the user's
-// responsibility to specify `targets` that are dominated by `forOp`.
-// Returns the new AffineForOps, one per `targets`, nested immediately under
-// each of the `targets`.
-static SmallVector<AffineForOp, 8>
-stripmineSink(AffineForOp forOp, uint64_t factor,
-              ArrayRef<AffineForOp> targets) {
-  auto originalStep = forOp.getStep();
-  auto scaledStep = originalStep * factor;
-  forOp.setStep(scaledStep);
-
-  auto *op = forOp.getOperation();
-  OpBuilder b(op->getBlock(), ++Block::iterator(op));
-
-  // Lower-bound map creation.
-  auto lbMap = forOp.getLowerBoundMap();
-  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
-  augmentMapAndBounds(b, forOp.getInductionVar(), &lbMap, &lbOperands);
-
-  // Upper-bound map creation.
-  auto ubMap = forOp.getUpperBoundMap();
-  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
-  augmentMapAndBounds(b, forOp.getInductionVar(), &ubMap, &ubOperands,
-                      /*offset=*/scaledStep);
-
-  auto *iv = forOp.getInductionVar();
-  SmallVector<AffineForOp, 8> innerLoops;
-  for (auto t : targets) {
-    // Insert newForOp before the terminator of `t`.
-    OpBuilder b = t.getBodyBuilder();
-    auto newForOp = b.create<AffineForOp>(t.getLoc(), lbOperands, lbMap,
-                                          ubOperands, ubMap, originalStep);
-    auto begin = t.getBody()->begin();
-    // Skip terminator and `newForOp` which is just before the terminator.
-    auto nOps = t.getBody()->getOperations().size() - 2;
-    newForOp.getBody()->getOperations().splice(
-        newForOp.getBody()->getOperations().begin(),
-        t.getBody()->getOperations(), begin, std::next(begin, nOps));
-    replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
-                               newForOp.region());
-    innerLoops.push_back(newForOp);
-  }
-
-  return innerLoops;
-}
-
-static Loops stripmineSink(loop::ForOp forOp, Value *factor,
-                           ArrayRef<loop::ForOp> targets) {
-  auto *originalStep = forOp.step();
-  auto *iv = forOp.getInductionVar();
-
-  OpBuilder b(forOp);
-  forOp.setStep(b.create<MulIOp>(forOp.getLoc(), originalStep, factor));
-
-  Loops innerLoops;
-  for (auto t : targets) {
-    // Save information for splicing ops out of t when done
-    auto begin = t.getBody()->begin();
-    auto nOps = t.getBody()->getOperations().size();
-
-    // Insert newForOp before the terminator of `t`.
-    OpBuilder b(t.getBodyBuilder());
-    Value *stepped = b.create<AddIOp>(t.getLoc(), iv, forOp.step());
-    Value *less = b.create<CmpIOp>(t.getLoc(), CmpIPredicate::slt,
-                                   forOp.upperBound(), stepped);
-    Value *ub =
-        b.create<SelectOp>(t.getLoc(), less, forOp.upperBound(), stepped);
-
-    // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
-    auto newForOp = b.create<loop::ForOp>(t.getLoc(), iv, ub, originalStep);
-    newForOp.getBody()->getOperations().splice(
-        newForOp.getBody()->getOperations().begin(),
-        t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
-    replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
-                               newForOp.region());
-
-    innerLoops.push_back(newForOp);
-  }
-
-  return innerLoops;
-}
-
-// Stripmines a `forOp` by `factor` and sinks it under a single `target`.
-// Returns the new AffineForOps, nested immediately under `target`.
-template <typename ForType, typename SizeType>
-static ForType stripmineSink(ForType forOp, SizeType factor, ForType target) {
-  // TODO(ntv): Use cheap structural assertions that targets are nested under
-  // forOp and that targets are not nested under each other when DominanceInfo
-  // exposes the capability. It seems overkill to construct a whole function
-  // dominance tree at this point.
-  auto res = stripmineSink(forOp, factor, ArrayRef<ForType>{target});
-  assert(res.size() == 1 && "Expected 1 inner forOp");
-  return res[0];
-}
-
-template <typename ForType, typename SizeType>
-static SmallVector<SmallVector<ForType, 8>, 8>
-tileImpl(ArrayRef<ForType> forOps, ArrayRef<SizeType> sizes,
-         ArrayRef<ForType> targets) {
-  SmallVector<SmallVector<ForType, 8>, 8> res;
-  SmallVector<ForType, 8> currentTargets(targets.begin(), targets.end());
-  for (auto it : llvm::zip(forOps, sizes)) {
-    auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
-    res.push_back(step);
-    currentTargets = step;
-  }
-  return res;
-}
-
-SmallVector<SmallVector<AffineForOp, 8>, 8>
-mlir::tile(ArrayRef<AffineForOp> forOps, ArrayRef<uint64_t> sizes,
-           ArrayRef<AffineForOp> targets) {
-  return tileImpl(forOps, sizes, targets);
-}
-
-SmallVector<Loops, 8> mlir::tile(ArrayRef<loop::ForOp> forOps,
-                                 ArrayRef<Value *> sizes,
-                                 ArrayRef<loop::ForOp> targets) {
-  return tileImpl(forOps, sizes, targets);
-}
-
-template <typename ForType, typename SizeType>
-static SmallVector<ForType, 8>
-tileImpl(ArrayRef<ForType> forOps, ArrayRef<SizeType> sizes, ForType target) {
-  SmallVector<ForType, 8> res;
-  for (auto loops : tile(forOps, sizes, ArrayRef<ForType>{target})) {
-    assert(loops.size() == 1);
-    res.push_back(loops[0]);
-  }
-  return res;
-}
-
-SmallVector<AffineForOp, 8> mlir::tile(ArrayRef<AffineForOp> forOps,
-                                       ArrayRef<uint64_t> sizes,
-                                       AffineForOp target) {
-  return tileImpl(forOps, sizes, target);
-}
-
-Loops mlir::tile(ArrayRef<loop::ForOp> forOps, ArrayRef<Value *> sizes,
-                 loop::ForOp target) {
-  return tileImpl(forOps, sizes, target);
-}
-
-Loops mlir::tilePerfectlyNested(loop::ForOp rootForOp,
-                                ArrayRef<Value *> sizes) {
-  // Collect perfectly nested loops.  If more size values provided than nested
-  // loops available, truncate `sizes`.
-  SmallVector<loop::ForOp, 4> forOps;
-  forOps.reserve(sizes.size());
-  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
-  if (forOps.size() < sizes.size())
-    sizes = sizes.take_front(forOps.size());
-
-  return ::tile(forOps, sizes, forOps.back());
-}
-
-// Build the IR that performs ceil division of a positive value by a constant:
-//    ceildiv(a, B) = divis(a + (B-1), B)
-// where divis is rounding-to-zero division.
-static Value *ceilDivPositive(OpBuilder &builder, Location loc, Value *dividend,
-                              int64_t divisor) {
-  assert(divisor > 0 && "expected positive divisor");
-  assert(dividend->getType().isIndex() && "expected index-typed value");
-
-  Value *divisorMinusOneCst = builder.create<ConstantIndexOp>(loc, divisor - 1);
-  Value *divisorCst = builder.create<ConstantIndexOp>(loc, divisor);
-  Value *sum = builder.create<AddIOp>(loc, dividend, divisorMinusOneCst);
-  return builder.create<DivISOp>(loc, sum, divisorCst);
-}
-
-// Build the IR that performs ceil division of a positive value by another
-// positive value:
-//    ceildiv(a, b) = divis(a + (b - 1), b)
-// where divis is rounding-to-zero division.
-static Value *ceilDivPositive(OpBuilder &builder, Location loc, Value *dividend,
-                              Value *divisor) {
-  assert(dividend->getType().isIndex() && "expected index-typed value");
-
-  Value *cstOne = builder.create<ConstantIndexOp>(loc, 1);
-  Value *divisorMinusOne = builder.create<SubIOp>(loc, divisor, cstOne);
-  Value *sum = builder.create<AddIOp>(loc, dividend, divisorMinusOne);
-  return builder.create<DivISOp>(loc, sum, divisor);
-}
-
-// Hoist the ops within `outer` that appear before `inner`.
-// Such ops include the ops that have been introduced by parametric tiling.
-// Ops that come from triangular loops (i.e. that belong to the program slice
-// rooted at `outer`) and ops that have side effects cannot be hoisted.
-// Return failure when any op fails to hoist.
-static LogicalResult hoistOpsBetween(loop::ForOp outer, loop::ForOp inner) {
-  SetVector<Operation *> forwardSlice;
-  getForwardSlice(outer.getOperation(), &forwardSlice, [&inner](Operation *op) {
-    return op != inner.getOperation();
-  });
-  LogicalResult status = success();
-  SmallVector<Operation *, 8> toHoist;
-  for (auto &op : outer.getBody()->getOperations()) {
-    // Stop when encountering the inner loop.
-    if (&op == inner.getOperation())
-      break;
-    // Skip over non-hoistable ops.
-    if (forwardSlice.count(&op) > 0) {
-      status = failure();
-      continue;
-    }
-    // Skip loop::ForOp, these are not considered a failure.
-    if (op.getNumRegions() > 0)
-      continue;
-    // Skip other ops with regions.
-    if (op.getNumRegions() > 0) {
-      status = failure();
-      continue;
-    }
-    // Skip if op has side effects.
-    // TODO(ntv): loads to immutable memory regions are ok.
-    if (!op.hasNoSideEffect()) {
-      status = failure();
-      continue;
-    }
-    toHoist.push_back(&op);
-  }
-  auto *outerForOp = outer.getOperation();
-  for (auto *op : toHoist)
-    op->moveBefore(outerForOp);
-  return status;
-}
-
-// Traverse the interTile and intraTile loops and try to hoist ops such that
-// bands of perfectly nested loops are isolated.
-// Return failure if either perfect interTile or perfect intraTile bands cannot
-// be formed.
-static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
-  LogicalResult status = success();
-  auto &interTile = tileLoops.first;
-  auto &intraTile = tileLoops.second;
-  auto size = interTile.size();
-  assert(size == intraTile.size());
-  if (size <= 1)
-    return success();
-  for (unsigned s = 1; s < size; ++s)
-    status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
-                               : failure();
-  for (unsigned s = 1; s < size; ++s)
-    status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
-                               : failure();
-  return status;
-}
-
-TileLoops mlir::extractFixedOuterLoops(loop::ForOp rootForOp,
-                                       ArrayRef<int64_t> sizes) {
-  // Collect perfectly nested loops.  If more size values provided than nested
-  // loops available, truncate `sizes`.
-  SmallVector<loop::ForOp, 4> forOps;
-  forOps.reserve(sizes.size());
-  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
-  if (forOps.size() < sizes.size())
-    sizes = sizes.take_front(forOps.size());
-
-  // Compute the tile sizes such that i-th outer loop executes size[i]
-  // iterations.  Given that the loop current executes
-  //   numIterations = ceildiv((upperBound - lowerBound), step)
-  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
-  SmallVector<Value *, 4> tileSizes;
-  tileSizes.reserve(sizes.size());
-  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
-    assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
-
-    auto forOp = forOps[i];
-    OpBuilder builder(forOp);
-    auto loc = forOp.getLoc();
-    Value *diff =
-        builder.create<SubIOp>(loc, forOp.upperBound(), forOp.lowerBound());
-    Value *numIterations = ceilDivPositive(builder, loc, diff, forOp.step());
-    Value *iterationsPerBlock =
-        ceilDivPositive(builder, loc, numIterations, sizes[i]);
-    tileSizes.push_back(iterationsPerBlock);
-  }
-
-  // Call parametric tiling with the given sizes.
-  auto intraTile = tile(forOps, tileSizes, forOps.back());
-  TileLoops tileLoops = std::make_pair(forOps, intraTile);
-
-  // TODO(ntv, zinenko) for now we just ignore the result of band isolation.
-  // In the future, mapping decisions may be impacted by the ability to
-  // isolate perfectly nested bands.
-  tryIsolateBands(tileLoops);
-
-  return tileLoops;
-}
-
-// Replaces all uses of `orig` with `replacement` except if the user is listed
-// in `exceptions`.
-static void
-replaceAllUsesExcept(Value *orig, Value *replacement,
-                     const SmallPtrSetImpl<Operation *> &exceptions) {
-  for (auto &use : llvm::make_early_inc_range(orig->getUses())) {
-    if (exceptions.count(use.getOwner()) == 0)
-      use.set(replacement);
-  }
-}
-
-// Transform a loop with a strictly positive step
-//   for %i = %lb to %ub step %s
-// into a 0-based loop with step 1
-//   for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
-//     %i = %ii * %s + %lb
-// Insert the induction variable remapping in the body of `inner`, which is
-// expected to be either `loop` or another loop perfectly nested under `loop`.
-// Insert the definition of new bounds immediate before `outer`, which is
-// expected to be either `loop` or its parent in the loop nest.
-static void normalizeLoop(loop::ForOp loop, loop::ForOp outer,
-                          loop::ForOp inner) {
-  OpBuilder builder(outer);
-  Location loc = loop.getLoc();
-
-  // Check if the loop is already known to have a constant zero lower bound or
-  // a constant one step.
-  bool isZeroBased = false;
-  if (auto ubCst =
-          dyn_cast_or_null<ConstantIndexOp>(loop.lowerBound()->getDefiningOp()))
-    isZeroBased = ubCst.getValue() == 0;
-
-  bool isStepOne = false;
-  if (auto stepCst =
-          dyn_cast_or_null<ConstantIndexOp>(loop.step()->getDefiningOp()))
-    isStepOne = stepCst.getValue() == 1;
-
-  if (isZeroBased && isStepOne)
-    return;
-
-  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
-  // assuming the step is strictly positive.  Update the bounds and the step
-  // of the loop to go from 0 to the number of iterations, if necessary.
-  // TODO(zinenko): introduce support for negative steps or emit dynamic asserts
-  // on step positivity, whatever gets implemented first.
-  Value *diff =
-      builder.create<SubIOp>(loc, loop.upperBound(), loop.lowerBound());
-  Value *numIterations = ceilDivPositive(builder, loc, diff, loop.step());
-  loop.setUpperBound(numIterations);
-
-  Value *lb = loop.lowerBound();
-  if (!isZeroBased) {
-    Value *cst0 = builder.create<ConstantIndexOp>(loc, 0);
-    loop.setLowerBound(cst0);
-  }
-
-  Value *step = loop.step();
-  if (!isStepOne) {
-    Value *cst1 = builder.create<ConstantIndexOp>(loc, 1);
-    loop.setStep(cst1);
-  }
-
-  // Insert code computing the value of the original loop induction variable
-  // from the "normalized" one.
-  builder.setInsertionPointToStart(inner.getBody());
-  Value *scaled =
-      isStepOne ? loop.getInductionVar()
-                : builder.create<MulIOp>(loc, loop.getInductionVar(), step);
-  Value *shifted =
-      isZeroBased ? scaled : builder.create<AddIOp>(loc, scaled, lb);
-
-  SmallPtrSet<Operation *, 2> preserve{scaled->getDefiningOp(),
-                                       shifted->getDefiningOp()};
-  replaceAllUsesExcept(loop.getInductionVar(), shifted, preserve);
-}
-
-void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {
-  if (loops.size() < 2)
-    return;
-
-  loop::ForOp innermost = loops.back();
-  loop::ForOp outermost = loops.front();
-
-  // 1. Make sure all loops iterate from 0 to upperBound with step 1.  This
-  // allows the following code to assume upperBound is the number of iterations.
-  for (auto loop : loops)
-    normalizeLoop(loop, outermost, innermost);
-
-  // 2. Emit code computing the upper bound of the coalesced loop as product
-  // of the number of iterations of all loops.
-  OpBuilder builder(outermost);
-  Location loc = outermost.getLoc();
-  Value *upperBound = outermost.upperBound();
-  for (auto loop : loops.drop_front())
-    upperBound = builder.create<MulIOp>(loc, upperBound, loop.upperBound());
-  outermost.setUpperBound(upperBound);
-
-  builder.setInsertionPointToStart(outermost.getBody());
-
-  // 3. Remap induction variables.  For each original loop, the value of the
-  // induction variable can be obtained by dividing the induction variable of
-  // the linearized loop by the total number of iterations of the loops nested
-  // in it modulo the number of iterations in this loop (remove the values
-  // related to the outer loops):
-  //   iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
-  // Compute these iteratively from the innermost loop by creating a "running
-  // quotient" of division by the range.
-  Value *previous = outermost.getInductionVar();
-  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
-    unsigned idx = loops.size() - i - 1;
-    if (i != 0)
-      previous =
-          builder.create<DivISOp>(loc, previous, loops[idx + 1].upperBound());
-
-    Value *iv = (i == e - 1) ? previous
-                             : builder.create<RemISOp>(loc, previous,
-                                                       loops[idx].upperBound());
-    replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
-                               loops.back().region());
-  }
-
-  // 4. Move the operations from the innermost just above the second-outermost
-  // loop, delete the extra terminator and the second-outermost loop.
-  loop::ForOp second = loops[1];
-  innermost.getBody()->back().erase();
-  outermost.getBody()->getOperations().splice(
-      Block::iterator(second.getOperation()),
-      innermost.getBody()->getOperations());
-  second.erase();
-}
-
-void mlir::mapLoopToProcessorIds(loop::ForOp forOp,
-                                 ArrayRef<Value *> processorId,
-                                 ArrayRef<Value *> numProcessors) {
-  assert(processorId.size() == numProcessors.size());
-  if (processorId.empty())
-    return;
-
-  OpBuilder b(forOp);
-  Location loc(forOp.getLoc());
-  Value *mul = processorId.front();
-  for (unsigned i = 1, e = processorId.size(); i < e; ++i)
-    mul = b.create<AddIOp>(loc, b.create<MulIOp>(loc, mul, numProcessors[i]),
-                           processorId[i]);
-  Value *lb = b.create<AddIOp>(loc, forOp.lowerBound(),
-                               b.create<MulIOp>(loc, forOp.step(), mul));
-  forOp.setLowerBound(lb);
-
-  Value *step = forOp.step();
-  for (auto *numProcs : numProcessors)
-    step = b.create<MulIOp>(loc, step, numProcs);
-  forOp.setStep(step);
-}
-
-/// Given a memref region, determine the lowest depth at which transfers can be
-/// placed for it, and return the corresponding block, start and end positions
-/// in the block for placing incoming (read) and outgoing (write) copies
-/// respectively. The lowest depth depends on whether the region being accessed
-/// is hoistable with respect to one or more immediately surrounding loops.
-static void
-findHighestBlockForPlacement(const MemRefRegion &region, Block &block,
-                             Block::iterator &begin, Block::iterator &end,
-                             Block **copyPlacementBlock,
-                             Block::iterator *copyInPlacementStart,
-                             Block::iterator *copyOutPlacementStart) {
-  const auto *cst = region.getConstraints();
-  SmallVector<Value *, 4> symbols;
-  cst->getIdValues(cst->getNumDimIds(), cst->getNumDimAndSymbolIds(), &symbols);
-
-  SmallVector<AffineForOp, 4> enclosingFors;
-  getLoopIVs(*block.begin(), &enclosingFors);
-  // Walk up loop parents till we find an IV on which this region is
-  // symbolic/variant.
-  auto it = enclosingFors.rbegin();
-  for (auto e = enclosingFors.rend(); it != e; ++it) {
-    // TODO(bondhugula): also need to be checking this for regions symbols that
-    // aren't loop IVs, whether we are within their resp. defs' dominance scope.
-    if (llvm::is_contained(symbols, it->getInductionVar()))
-      break;
-  }
-
-  if (it != enclosingFors.rbegin()) {
-    auto lastInvariantIV = *std::prev(it);
-    *copyInPlacementStart = Block::iterator(lastInvariantIV.getOperation());
-    *copyOutPlacementStart = std::next(*copyInPlacementStart);
-    *copyPlacementBlock = lastInvariantIV.getOperation()->getBlock();
-  } else {
-    *copyInPlacementStart = begin;
-    *copyOutPlacementStart = end;
-    *copyPlacementBlock = &block;
-  }
-}
-
-// Info comprising stride and number of elements transferred every stride.
-struct StrideInfo {
-  int64_t stride;
-  int64_t numEltPerStride;
-};
-
-/// Returns striding information for a copy/transfer of this region with
-/// potentially multiple striding levels from outermost to innermost. For an
-/// n-dimensional region, there can be at most n-1 levels of striding
-/// successively nested.
-//  TODO(bondhugula): make this work with non-identity layout maps.
-static void getMultiLevelStrides(const MemRefRegion &region,
-                                 ArrayRef<int64_t> bufferShape,
-                                 SmallVectorImpl<StrideInfo> *strideInfos) {
-  if (bufferShape.size() <= 1)
-    return;
-
-  int64_t numEltPerStride = 1;
-  int64_t stride = 1;
-  for (int d = bufferShape.size() - 1; d >= 1; d--) {
-    int64_t dimSize = region.memref->getType().cast<MemRefType>().getDimSize(d);
-    stride *= dimSize;
-    numEltPerStride *= bufferShape[d];
-    // A stride is needed only if the region has a shorter extent than the
-    // memref along the dimension *and* has an extent greater than one along the
-    // next major dimension.
-    if (bufferShape[d] < dimSize && bufferShape[d - 1] > 1) {
-      strideInfos->push_back({stride, numEltPerStride});
-    }
-  }
-}
-
-/// Generates a point-wise copy from/to `memref' to/from `fastMemRef' and
-/// returns the outermost AffineForOp of the copy loop nest. `memIndicesStart'
-/// holds the lower coordinates of the region in the original memref to copy
-/// in/out. If `copyOut' is true, generates a copy-out; otherwise a copy-in.
-static AffineForOp generatePointWiseCopy(Location loc, Value *memref,
-                                         Value *fastMemRef,
-                                         AffineMap memAffineMap,
-                                         ArrayRef<Value *> memIndicesStart,
-                                         ArrayRef<int64_t> fastBufferShape,
-                                         bool isCopyOut, OpBuilder b) {
-  assert(!memIndicesStart.empty() && "only 1-d or more memrefs");
-
-  // The copy-in nest is generated as follows as an example for a 2-d region:
-  // for x = ...
-  //   for y = ...
-  //     fast_buf[x][y] = buf[mem_x + x][mem_y + y]
-
-  SmallVector<Value *, 4> fastBufIndices, memIndices;
-  AffineForOp copyNestRoot;
-  for (unsigned d = 0, e = fastBufferShape.size(); d < e; ++d) {
-    auto forOp = b.create<AffineForOp>(loc, 0, fastBufferShape[d]);
-    if (d == 0)
-      copyNestRoot = forOp;
-    b = forOp.getBodyBuilder();
-    fastBufIndices.push_back(forOp.getInductionVar());
-
-    Value *memBase =
-        (memAffineMap == b.getMultiDimIdentityMap(memAffineMap.getNumDims()))
-            ? memIndicesStart[d]
-            : b.create<AffineApplyOp>(
-                  loc,
-                  AffineMap::get(memAffineMap.getNumDims(),
-                                 memAffineMap.getNumSymbols(),
-                                 memAffineMap.getResult(d)),
-                  memIndicesStart);
-
-    // Construct the subscript for the slow memref being copied.
-    auto memIndex = b.create<AffineApplyOp>(
-        loc,
-        AffineMap::get(2, 0, b.getAffineDimExpr(0) + b.getAffineDimExpr(1)),
-        ValueRange({memBase, forOp.getInductionVar()}));
-    memIndices.push_back(memIndex);
-  }
-
-  if (!isCopyOut) {
-    // Copy in.
-    auto load = b.create<AffineLoadOp>(loc, memref, memIndices);
-    b.create<AffineStoreOp>(loc, load, fastMemRef, fastBufIndices);
-    return copyNestRoot;
-  }
-
-  // Copy out.
-  auto load = b.create<AffineLoadOp>(loc, fastMemRef, fastBufIndices);
-  b.create<AffineStoreOp>(loc, load, memref, memIndices);
-  return copyNestRoot;
-}
-
-static InFlightDiagnostic LLVM_ATTRIBUTE_UNUSED
-emitRemarkForBlock(Block &block) {
-  return block.getParentOp()->emitRemark();
-}
-
-/// Creates a buffer in the faster memory space for the specified memref region;
-/// generates a copy from the lower memory space to this one, and replaces all
-/// loads/stores in the block range [`begin', `end') of `block' to load/store
-/// from that buffer. Returns failure if copies could not be generated due to
-/// yet unimplemented cases. `copyInPlacementStart` and `copyOutPlacementStart`
-/// in copyPlacementBlock specify the insertion points where the incoming copies
-/// and outgoing copies, respectively, should be inserted (the insertion happens
-/// right before the insertion point). Since `begin` can itself be invalidated
-/// due to the memref rewriting done from this method, the output argument
-/// `nBegin` is set to its replacement (set to `begin` if no invalidation
-/// happens). Since outgoing copies could have  been inserted at `end`, the
-/// output argument `nEnd` is set to the new end. `sizeInBytes` is set to the
-/// size of the fast buffer allocated.
-static LogicalResult generateCopy(
-    const MemRefRegion &region, Block *block, Block::iterator begin,
-    Block::iterator end, Block *copyPlacementBlock,
-    Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart,
-    AffineCopyOptions copyOptions, DenseMap<Value *, Value *> &fastBufferMap,
-    DenseSet<Operation *> &copyNests, uint64_t *sizeInBytes,
-    Block::iterator *nBegin, Block::iterator *nEnd) {
-  *nBegin = begin;
-  *nEnd = end;
-
-  FuncOp f = begin->getParentOfType<FuncOp>();
-  OpBuilder topBuilder(f.getBody());
-  Value *zeroIndex = topBuilder.create<ConstantIndexOp>(f.getLoc(), 0);
-
-  if (begin == end)
-    return success();
-
-  // Is the copy out point at the end of the block where we are doing
-  // explicit copying.
-  bool isCopyOutAtEndOfBlock = (end == copyOutPlacementStart);
-
-  // Copies for read regions are going to be inserted at 'begin'.
-  OpBuilder prologue(copyPlacementBlock, copyInPlacementStart);
-  // Copies for write regions are going to be inserted at 'end'.
-  OpBuilder epilogue(copyPlacementBlock, copyOutPlacementStart);
-  OpBuilder &b = region.isWrite() ? epilogue : prologue;
-
-  // Builder to create constants at the top level.
-  auto func = copyPlacementBlock->getParent()->getParentOfType<FuncOp>();
-  OpBuilder top(func.getBody());
-
-  auto loc = region.loc;
-  auto *memref = region.memref;
-  auto memRefType = memref->getType().cast<MemRefType>();
-
-  auto layoutMaps = memRefType.getAffineMaps();
-  if (layoutMaps.size() > 1 ||
-      (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) {
-    LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
-    return failure();
-  }
-
-  // Indices to use for the copying.
-  // Indices for the original memref being copied from/to.
-  SmallVector<Value *, 4> memIndices;
-  // Indices for the faster buffer being copied into/from.
-  SmallVector<Value *, 4> bufIndices;
-
-  unsigned rank = memRefType.getRank();
-  SmallVector<int64_t, 4> fastBufferShape;
-
-  // Compute the extents of the buffer.
-  std::vector<SmallVector<int64_t, 4>> lbs;
-  SmallVector<int64_t, 8> lbDivisors;
-  lbs.reserve(rank);
-  Optional<int64_t> numElements = region.getConstantBoundingSizeAndShape(
-      &fastBufferShape, &lbs, &lbDivisors);
-  if (!numElements.hasValue()) {
-    LLVM_DEBUG(llvm::dbgs() << "Non-constant region size not supported\n");
-    return failure();
-  }
-
-  if (numElements.getValue() == 0) {
-    LLVM_DEBUG(llvm::dbgs() << "Nothing to copy\n");
-    *sizeInBytes = 0;
-    return success();
-  }
-
-  const FlatAffineConstraints *cst = region.getConstraints();
-  // 'regionSymbols' hold values that this memory region is symbolic/parametric
-  // on; these typically include loop IVs surrounding the level at which the
-  // copy generation is being done or other valid symbols in MLIR.
-  SmallVector<Value *, 8> regionSymbols;
-  cst->getIdValues(rank, cst->getNumIds(), &regionSymbols);
-
-  // Construct the index expressions for the fast memory buffer. The index
-  // expression for a particular dimension of the fast buffer is obtained by
-  // subtracting out the lower bound on the original memref's data region
-  // along the corresponding dimension.
-
-  // Index start offsets for faster memory buffer relative to the original.
-  SmallVector<AffineExpr, 4> offsets;
-  offsets.reserve(rank);
-  for (unsigned d = 0; d < rank; d++) {
-    assert(lbs[d].size() == cst->getNumCols() - rank && "incorrect bound size");
-
-    AffineExpr offset = top.getAffineConstantExpr(0);
-    for (unsigned j = 0, e = cst->getNumCols() - rank - 1; j < e; j++) {
-      offset = offset + lbs[d][j] * top.getAffineDimExpr(j);
-    }
-    assert(lbDivisors[d] > 0);
-    offset =
-        (offset + lbs[d][cst->getNumCols() - 1 - rank]).floorDiv(lbDivisors[d]);
-
-    // Set copy start location for this dimension in the lower memory space
-    // memref.
-    if (auto caf = offset.dyn_cast<AffineConstantExpr>()) {
-      auto indexVal = caf.getValue();
-      if (indexVal == 0) {
-        memIndices.push_back(zeroIndex);
-      } else {
-        memIndices.push_back(
-            top.create<ConstantIndexOp>(loc, indexVal).getResult());
-      }
-    } else {
-      // The coordinate for the start location is just the lower bound along the
-      // corresponding dimension on the memory region (stored in 'offset').
-      auto map = AffineMap::get(
-          cst->getNumDimIds() + cst->getNumSymbolIds() - rank, 0, offset);
-      memIndices.push_back(b.create<AffineApplyOp>(loc, map, regionSymbols));
-    }
-    // The fast buffer is copied into at location zero; addressing is relative.
-    bufIndices.push_back(zeroIndex);
-
-    // Record the offsets since they are needed to remap the memory accesses of
-    // the original memref further below.
-    offsets.push_back(offset);
-  }
-
-  // The faster memory space buffer.
-  Value *fastMemRef;
-
-  // Check if a buffer was already created.
-  bool existingBuf = fastBufferMap.count(memref) > 0;
-  if (!existingBuf) {
-    AffineMap fastBufferLayout = b.getMultiDimIdentityMap(rank);
-    auto fastMemRefType =
-        MemRefType::get(fastBufferShape, memRefType.getElementType(),
-                        fastBufferLayout, copyOptions.fastMemorySpace);
-
-    // Create the fast memory space buffer just before the 'affine.for'
-    // operation.
-    fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType).getResult();
-    // Record it.
-    fastBufferMap[memref] = fastMemRef;
-    // fastMemRefType is a constant shaped memref.
-    *sizeInBytes = getMemRefSizeInBytes(fastMemRefType).getValue();
-    LLVM_DEBUG(emitRemarkForBlock(*block)
-               << "Creating fast buffer of type " << fastMemRefType
-               << " and size " << llvm::divideCeil(*sizeInBytes, 1024)
-               << " KiB\n");
-  } else {
-    // Reuse the one already created.
-    fastMemRef = fastBufferMap[memref];
-    *sizeInBytes = 0;
-  }
-
-  auto numElementsSSA =
-      top.create<ConstantIndexOp>(loc, numElements.getValue());
-
-  SmallVector<StrideInfo, 4> strideInfos;
-  getMultiLevelStrides(region, fastBufferShape, &strideInfos);
-
-  // TODO(bondhugula): use all stride levels once DmaStartOp is extended for
-  // multi-level strides.
-  if (strideInfos.size() > 1) {
-    LLVM_DEBUG(llvm::dbgs() << "Only up to one level of stride supported\n");
-    return failure();
-  }
-
-  Value *stride = nullptr;
-  Value *numEltPerStride = nullptr;
-  if (!strideInfos.empty()) {
-    stride = top.create<ConstantIndexOp>(loc, strideInfos[0].stride);
-    numEltPerStride =
-        top.create<ConstantIndexOp>(loc, strideInfos[0].numEltPerStride);
-  }
-
-  // Record the last operation where we want the memref replacement to end. We
-  // later do the memref replacement only in [begin, postDomFilter] so
-  // that the original memref's used in the data movement code themselves don't
-  // get replaced.
-  auto postDomFilter = std::prev(end);
-
-  // Create fully composed affine maps for each memref.
-  auto memAffineMap = b.getMultiDimIdentityMap(memIndices.size());
-  fullyComposeAffineMapAndOperands(&memAffineMap, &memIndices);
-  auto bufAffineMap = b.getMultiDimIdentityMap(bufIndices.size());
-  fullyComposeAffineMapAndOperands(&bufAffineMap, &bufIndices);
-
-  if (!copyOptions.generateDma) {
-    // Point-wise copy generation.
-    auto copyNest = generatePointWiseCopy(loc, memref, fastMemRef, memAffineMap,
-                                          memIndices, fastBufferShape,
-                                          /*isCopyOut=*/region.isWrite(), b);
-
-    // Record this so that we can skip it from yet another copy.
-    copyNests.insert(copyNest);
-
-    // Since new ops are being appended (for copy out's), adjust the end to
-    // mark end of block range being processed if necessary.
-    if (region.isWrite() && isCopyOutAtEndOfBlock)
-      *nEnd = Block::iterator(copyNest.getOperation());
-  } else {
-    // DMA generation.
-    // Create a tag (single element 1-d memref) for the DMA.
-    auto tagMemRefType = MemRefType::get({1}, top.getIntegerType(32), {},
-                                         copyOptions.tagMemorySpace);
-    auto tagMemRef = prologue.create<AllocOp>(loc, tagMemRefType);
-
-    SmallVector<Value *, 4> tagIndices({zeroIndex});
-    auto tagAffineMap = b.getMultiDimIdentityMap(tagIndices.size());
-    fullyComposeAffineMapAndOperands(&tagAffineMap, &tagIndices);
-    if (!region.isWrite()) {
-      // DMA non-blocking read from original buffer to fast buffer.
-      b.create<AffineDmaStartOp>(loc, memref, memAffineMap, memIndices,
-                                 fastMemRef, bufAffineMap, bufIndices,
-                                 tagMemRef, tagAffineMap, tagIndices,
-                                 numElementsSSA, stride, numEltPerStride);
-    } else {
-      // DMA non-blocking write from fast buffer to the original memref.
-      auto op = b.create<AffineDmaStartOp>(
-          loc, fastMemRef, bufAffineMap, bufIndices, memref, memAffineMap,
-          memIndices, tagMemRef, tagAffineMap, tagIndices, numElementsSSA,
-          stride, numEltPerStride);
-      // Since new ops may be appended at 'end' (for outgoing DMAs), adjust the
-      // end to mark end of block range being processed.
-      if (isCopyOutAtEndOfBlock)
-        *nEnd = Block::iterator(op.getOperation());
-    }
-
-    // Matching DMA wait to block on completion; tag always has a 0 index.
-    b.create<AffineDmaWaitOp>(loc, tagMemRef, tagAffineMap, zeroIndex,
-                              numElementsSSA);
-
-    // Generate dealloc for the tag.
-    auto tagDeallocOp = epilogue.create<DeallocOp>(loc, tagMemRef);
-    if (*nEnd == end && isCopyOutAtEndOfBlock)
-      // Since new ops are being appended (for outgoing DMAs), adjust the end to
-      // mark end of range of the original.
-      *nEnd = Block::iterator(tagDeallocOp.getOperation());
-  }
-
-  // Generate dealloc for the buffer.
-  if (!existingBuf) {
-    auto bufDeallocOp = epilogue.create<DeallocOp>(loc, fastMemRef);
-    // When generating pointwise copies, `nEnd' has to be set to deallocOp on
-    // the fast buffer (since it marks the new end insertion point).
-    if (!copyOptions.generateDma && *nEnd == end && isCopyOutAtEndOfBlock)
-      *nEnd = Block::iterator(bufDeallocOp.getOperation());
-  }
-
-  // Replace all uses of the old memref with the faster one while remapping
-  // access indices (subtracting out lower bound offsets for each dimension).
-  // Ex: to replace load %A[%i, %j] with load %Abuf[%i - %iT, %j - %jT],
-  // index remap will be (%i, %j) -> (%i - %iT, %j - %jT),
-  // i.e., affine.apply (d0, d1, d2, d3) -> (d2-d0, d3-d1) (%iT, %jT, %i, %j),
-  // and (%iT, %jT) will be the 'extraOperands' for 'rep all memref uses with'.
-  // d2, d3 correspond to the original indices (%i, %j).
-  SmallVector<AffineExpr, 4> remapExprs;
-  remapExprs.reserve(rank);
-  for (unsigned i = 0; i < rank; i++) {
-    // The starting operands of indexRemap will be regionSymbols (the symbols on
-    // which the memref region is parametric); then those corresponding to
-    // the memref's original indices follow.
-    auto dimExpr = b.getAffineDimExpr(regionSymbols.size() + i);
-    remapExprs.push_back(dimExpr - offsets[i]);
-  }
-  auto indexRemap = AffineMap::get(regionSymbols.size() + rank, 0, remapExprs);
-
-  // Record the begin since it may be invalidated by memref replacement.
-  Block::iterator prevOfBegin;
-  bool isBeginAtStartOfBlock = (begin == block->begin());
-  if (!isBeginAtStartOfBlock)
-    prevOfBegin = std::prev(begin);
-
-  // *Only* those uses within the range [begin, end) of 'block' are replaced.
-  replaceAllMemRefUsesWith(memref, fastMemRef,
-                           /*extraIndices=*/{}, indexRemap,
-                           /*extraOperands=*/regionSymbols,
-                           /*symbolOperands=*/{},
-                           /*domInstFilter=*/&*begin,
-                           /*postDomInstFilter=*/&*postDomFilter);
-
-  *nBegin = isBeginAtStartOfBlock ? block->begin() : std::next(prevOfBegin);
-
-  return success();
-}
-
-/// Construct the memref region to just include the entire memref. Returns false
-/// dynamic shaped memref's for now. `numParamLoopIVs` is the number of
-/// enclosing loop IVs of opInst (starting from the outermost) that the region
-/// is parametric on.
-static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs,
-                                  MemRefRegion *region) {
-  unsigned rank;
-  if (auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
-    rank = loadOp.getMemRefType().getRank();
-    region->memref = loadOp.getMemRef();
-    region->setWrite(false);
-  } else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
-    rank = storeOp.getMemRefType().getRank();
-    region->memref = storeOp.getMemRef();
-    region->setWrite(true);
-  } else {
-    assert(false && "expected load or store op");
-    return false;
-  }
-  auto memRefType = region->memref->getType().cast<MemRefType>();
-  if (!memRefType.hasStaticShape())
-    return false;
-
-  auto *regionCst = region->getConstraints();
-
-  // Just get the first numSymbols IVs, which the memref region is parametric
-  // on.
-  SmallVector<AffineForOp, 4> ivs;
-  getLoopIVs(*opInst, &ivs);
-  ivs.resize(numParamLoopIVs);
-  SmallVector<Value *, 4> symbols;
-  extractForInductionVars(ivs, &symbols);
-  regionCst->reset(rank, numParamLoopIVs, 0);
-  regionCst->setIdValues(rank, rank + numParamLoopIVs, symbols);
-
-  // Memref dim sizes provide the bounds.
-  for (unsigned d = 0; d < rank; d++) {
-    auto dimSize = memRefType.getDimSize(d);
-    assert(dimSize > 0 && "filtered dynamic shapes above");
-    regionCst->addConstantLowerBound(d, 0);
-    regionCst->addConstantUpperBound(d, dimSize - 1);
-  }
-  return true;
-}
-
-/// Generates copies for a contiguous sequence of operations in `block` in the
-/// iterator range [`begin', `end'), where `end' can't be past the terminator of
-/// the block (since additional operations are potentially inserted right before
-/// `end'. Returns the total size of the fast buffers used.
-//  Since we generate alloc's and dealloc's for all fast buffers (before and
-//  after the range of operations resp.), all of the fast memory capacity is
-//  assumed to be available for processing this block range.
-uint64_t mlir::affineDataCopyGenerate(Block::iterator begin,
-                                      Block::iterator end,
-                                      const AffineCopyOptions &copyOptions,
-                                      DenseSet<Operation *> &copyNests) {
-  if (begin == end)
-    return 0;
-
-  assert(begin->getBlock() == std::prev(end)->getBlock() &&
-         "Inconsistent block begin/end args");
-  assert(end != end->getBlock()->end() && "end can't be the block terminator");
-
-  Block *block = begin->getBlock();
-
-  // Copies will be generated for this depth, i.e., symbolic in all loops
-  // surrounding the this block range.
-  unsigned copyDepth = getNestingDepth(*begin);
-
-  LLVM_DEBUG(llvm::dbgs() << "Generating copies at depth " << copyDepth
-                          << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "from begin: " << *begin << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "to inclusive end: " << *std::prev(end) << "\n");
-
-  // List of memory regions to copy for. We need a map vector to have a
-  // guaranteed iteration order to write test cases. CHECK-DAG doesn't help here
-  // since the alloc's for example are identical except for the SSA id.
-  SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4> readRegions;
-  SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4> writeRegions;
-
-  // Map from original memref's to the fast buffers that their accesses are
-  // replaced with.
-  DenseMap<Value *, Value *> fastBufferMap;
-
-  // To check for errors when walking the block.
-  bool error = false;
-
-  // Walk this range of operations  to gather all memory regions.
-  block->walk(begin, end, [&](Operation *opInst) {
-    // Gather regions to allocate to buffers in faster memory space.
-    if (auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
-      if ((loadOp.getMemRefType().getMemorySpace() !=
-           copyOptions.slowMemorySpace))
-        return;
-    } else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
-      if (storeOp.getMemRefType().getMemorySpace() !=
-          copyOptions.slowMemorySpace)
-        return;
-    } else {
-      // Neither load nor a store op.
-      return;
-    }
-
-    // Compute the MemRefRegion accessed.
-    auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
-    if (failed(region->compute(opInst, copyDepth))) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "Error obtaining memory region: semi-affine maps?\n");
-      LLVM_DEBUG(llvm::dbgs() << "over-approximating to the entire memref\n");
-      if (!getFullMemRefAsRegion(opInst, copyDepth, region.get())) {
-        LLVM_DEBUG(
-            opInst->emitError("non-constant memref sizes not yet supported"));
-        error = true;
-        return;
-      }
-    }
-
-    // Each memref has a single buffer associated with it irrespective of how
-    // many load's and store's happen on it.
-    // TODO(bondhugula): in the future, when regions don't intersect and satisfy
-    // other properties (based on load/store regions), we could consider
-    // multiple buffers per memref.
-
-    // Add to the appropriate region if it's not already in it, or take a
-    // bounding box union with the existing one if it's already in there.
-    // Note that a memref may have both read and write regions - so update the
-    // region in the other list if one exists (write in case of read and vice
-    // versa) since there is a single bounding box for a memref across all reads
-    // and writes that happen on it.
-
-    // Attempts to update; returns true if 'region' exists in targetRegions.
-    auto updateRegion =
-        [&](const SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4>
-                &targetRegions) {
-          auto it = targetRegions.find(region->memref);
-          if (it == targetRegions.end())
-            return false;
-
-          // Perform a union with the existing region.
-          if (failed(it->second->unionBoundingBox(*region))) {
-            LLVM_DEBUG(llvm::dbgs()
-                       << "Memory region bounding box failed; "
-                          "over-approximating to the entire memref\n");
-            // If the union fails, we will overapproximate.
-            if (!getFullMemRefAsRegion(opInst, copyDepth, region.get())) {
-              LLVM_DEBUG(opInst->emitError(
-                  "non-constant memref sizes not yet supported"));
-              error = true;
-              return true;
-            }
-            it->second->getConstraints()->clearAndCopyFrom(
-                *region->getConstraints());
-          } else {
-            // Union was computed and stored in 'it->second': copy to 'region'.
-            region->getConstraints()->clearAndCopyFrom(
-                *it->second->getConstraints());
-          }
-          return true;
-        };
-
-    bool existsInRead = updateRegion(readRegions);
-    if (error)
-      return;
-    bool existsInWrite = updateRegion(writeRegions);
-    if (error)
-      return;
-
-    // Finally add it to the region list.
-    if (region->isWrite() && !existsInWrite) {
-      writeRegions[region->memref] = std::move(region);
-    } else if (!region->isWrite() && !existsInRead) {
-      readRegions[region->memref] = std::move(region);
-    }
-  });
-
-  if (error) {
-    begin->emitError(
-        "copy generation failed for one or more memref's in this block\n");
-    return 0;
-  }
-
-  uint64_t totalCopyBuffersSizeInBytes = 0;
-  bool ret = true;
-  auto processRegions =
-      [&](const SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4>
-              &regions) {
-        for (const auto &regionEntry : regions) {
-          // For each region, hoist copy in/out past all hoistable
-          // 'affine.for's.
-          Block::iterator copyInPlacementStart, copyOutPlacementStart;
-          Block *copyPlacementBlock;
-          findHighestBlockForPlacement(
-              *regionEntry.second, *block, begin, end, &copyPlacementBlock,
-              &copyInPlacementStart, &copyOutPlacementStart);
-
-          uint64_t sizeInBytes;
-          Block::iterator nBegin, nEnd;
-          LogicalResult iRet = generateCopy(
-              *regionEntry.second, block, begin, end, copyPlacementBlock,
-              copyInPlacementStart, copyOutPlacementStart, copyOptions,
-              fastBufferMap, copyNests, &sizeInBytes, &nBegin, &nEnd);
-          if (succeeded(iRet)) {
-            // begin/end could have been invalidated, and need update.
-            begin = nBegin;
-            end = nEnd;
-            totalCopyBuffersSizeInBytes += sizeInBytes;
-          }
-          ret = ret & succeeded(iRet);
-        }
-      };
-  processRegions(readRegions);
-  processRegions(writeRegions);
-
-  if (!ret) {
-    begin->emitError(
-        "copy generation failed for one or more memref's in this block\n");
-    return totalCopyBuffersSizeInBytes;
-  }
-
-  // For a range of operations, a note will be emitted at the caller.
-  AffineForOp forOp;
-  uint64_t sizeInKib = llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024);
-  if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) {
-    forOp.emitRemark()
-        << sizeInKib
-        << " KiB of copy buffers in fast memory space for this block\n";
-  }
-
-  if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) {
-    StringRef str = "Total size of all copy buffers' for this block "
-                    "exceeds fast memory capacity\n";
-    block->getParentOp()->emitError(str);
-  }
-
-  return totalCopyBuffersSizeInBytes;
-}
diff --git a/third_party/mlir/lib/Transforms/Utils/RegionUtils.cpp b/third_party/mlir/lib/Transforms/Utils/RegionUtils.cpp
deleted file mode 100644
index ba77ceacf28..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/RegionUtils.cpp
+++ /dev/null
@@ -1,359 +0,0 @@
-//===- RegionUtils.cpp - Region-related transformation utilities ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Transforms/RegionUtils.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/RegionGraphTraits.h"
-#include "mlir/IR/Value.h"
-
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallSet.h"
-
-using namespace mlir;
-
-void mlir::replaceAllUsesInRegionWith(Value *orig, Value *replacement,
-                                      Region &region) {
-  for (IROperand &use : llvm::make_early_inc_range(orig->getUses())) {
-    if (region.isAncestor(use.getOwner()->getParentRegion()))
-      use.set(replacement);
-  }
-}
-
-void mlir::visitUsedValuesDefinedAbove(
-    Region &region, Region &limit,
-    llvm::function_ref<void(OpOperand *)> callback) {
-  assert(limit.isAncestor(&region) &&
-         "expected isolation limit to be an ancestor of the given region");
-
-  // Collect proper ancestors of `limit` upfront to avoid traversing the region
-  // tree for every value.
-  llvm::SmallPtrSet<Region *, 4> properAncestors;
-  for (auto *reg = limit.getParentRegion(); reg != nullptr;
-       reg = reg->getParentRegion()) {
-    properAncestors.insert(reg);
-  }
-
-  region.walk([callback, &properAncestors](Operation *op) {
-    for (OpOperand &operand : op->getOpOperands())
-      // Callback on values defined in a proper ancestor of region.
-      if (properAncestors.count(operand.get()->getParentRegion()))
-        callback(&operand);
-  });
-}
-
-void mlir::visitUsedValuesDefinedAbove(
-    llvm::MutableArrayRef<Region> regions,
-    llvm::function_ref<void(OpOperand *)> callback) {
-  for (Region &region : regions)
-    visitUsedValuesDefinedAbove(region, region, callback);
-}
-
-void mlir::getUsedValuesDefinedAbove(Region &region, Region &limit,
-                                     llvm::SetVector<Value *> &values) {
-  visitUsedValuesDefinedAbove(region, limit, [&](OpOperand *operand) {
-    values.insert(operand->get());
-  });
-}
-
-void mlir::getUsedValuesDefinedAbove(llvm::MutableArrayRef<Region> regions,
-                                     llvm::SetVector<Value *> &values) {
-  for (Region &region : regions)
-    getUsedValuesDefinedAbove(region, region, values);
-}
-
-//===----------------------------------------------------------------------===//
-// Unreachable Block Elimination
-//===----------------------------------------------------------------------===//
-
-/// Erase the unreachable blocks within the provided regions. Returns success
-/// if any blocks were erased, failure otherwise.
-// TODO: We could likely merge this with the DCE algorithm below.
-static LogicalResult eraseUnreachableBlocks(MutableArrayRef<Region> regions) {
-  // Set of blocks found to be reachable within a given region.
-  llvm::df_iterator_default_set<Block *, 16> reachable;
-  // If any blocks were found to be dead.
-  bool erasedDeadBlocks = false;
-
-  SmallVector<Region *, 1> worklist;
-  worklist.reserve(regions.size());
-  for (Region &region : regions)
-    worklist.push_back(&region);
-  while (!worklist.empty()) {
-    Region *region = worklist.pop_back_val();
-    if (region->empty())
-      continue;
-
-    // If this is a single block region, just collect the nested regions.
-    if (std::next(region->begin()) == region->end()) {
-      for (Operation &op : region->front())
-        for (Region &region : op.getRegions())
-          worklist.push_back(&region);
-      continue;
-    }
-
-    // Mark all reachable blocks.
-    reachable.clear();
-    for (Block *block : depth_first_ext(&region->front(), reachable))
-      (void)block /* Mark all reachable blocks */;
-
-    // Collect all of the dead blocks and push the live regions onto the
-    // worklist.
-    for (Block &block : llvm::make_early_inc_range(*region)) {
-      if (!reachable.count(&block)) {
-        block.dropAllDefinedValueUses();
-        block.erase();
-        erasedDeadBlocks = true;
-        continue;
-      }
-
-      // Walk any regions within this block.
-      for (Operation &op : block)
-        for (Region &region : op.getRegions())
-          worklist.push_back(&region);
-    }
-  }
-
-  return success(erasedDeadBlocks);
-}
-
-//===----------------------------------------------------------------------===//
-// Dead Code Elimination
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// Data structure used to track which values have already been proved live.
-///
-/// Because Operation's can have multiple results, this data structure tracks
-/// liveness for both Value's and Operation's to avoid having to look through
-/// all Operation results when analyzing a use.
-///
-/// This data structure essentially tracks the dataflow lattice.
-/// The set of values/ops proved live increases monotonically to a fixed-point.
-class LiveMap {
-public:
-  /// Value methods.
-  bool wasProvenLive(Value *value) { return liveValues.count(value); }
-  void setProvedLive(Value *value) {
-    changed |= liveValues.insert(value).second;
-  }
-
-  /// Operation methods.
-  bool wasProvenLive(Operation *op) { return liveOps.count(op); }
-  void setProvedLive(Operation *op) { changed |= liveOps.insert(op).second; }
-
-  /// Methods for tracking if we have reached a fixed-point.
-  void resetChanged() { changed = false; }
-  bool hasChanged() { return changed; }
-
-private:
-  bool changed = false;
-  DenseSet<Value *> liveValues;
-  DenseSet<Operation *> liveOps;
-};
-} // namespace
-
-static bool isUseSpeciallyKnownDead(OpOperand &use, LiveMap &liveMap) {
-  Operation *owner = use.getOwner();
-  unsigned operandIndex = use.getOperandNumber();
-  // This pass generally treats all uses of an op as live if the op itself is
-  // considered live. However, for successor operands to terminators we need a
-  // finer-grained notion where we deduce liveness for operands individually.
-  // The reason for this is easiest to think about in terms of a classical phi
-  // node based SSA IR, where each successor operand is really an operand to a
-  // *separate* phi node, rather than all operands to the branch itself as with
-  // the block argument representation that MLIR uses.
-  //
-  // And similarly, because each successor operand is really an operand to a phi
-  // node, rather than to the terminator op itself, a terminator op can't e.g.
-  // "print" the value of a successor operand.
-  if (owner->isKnownTerminator()) {
-    if (auto arg = owner->getSuccessorBlockArgument(operandIndex))
-      return !liveMap.wasProvenLive(*arg);
-    return false;
-  }
-  return false;
-}
-
-static void processValue(Value *value, LiveMap &liveMap) {
-  bool provedLive = llvm::any_of(value->getUses(), [&](OpOperand &use) {
-    if (isUseSpeciallyKnownDead(use, liveMap))
-      return false;
-    return liveMap.wasProvenLive(use.getOwner());
-  });
-  if (provedLive)
-    liveMap.setProvedLive(value);
-}
-
-static bool isOpIntrinsicallyLive(Operation *op) {
-  // This pass doesn't modify the CFG, so terminators are never deleted.
-  if (!op->isKnownNonTerminator())
-    return true;
-  // If the op has a side effect, we treat it as live.
-  if (!op->hasNoSideEffect())
-    return true;
-  return false;
-}
-
-static void propagateLiveness(Region &region, LiveMap &liveMap);
-static void propagateLiveness(Operation *op, LiveMap &liveMap) {
-  // All Value's are either a block argument or an op result.
-  // We call processValue on those cases.
-
-  // Recurse on any regions the op has.
-  for (Region &region : op->getRegions())
-    propagateLiveness(region, liveMap);
-
-  // Process the op itself.
-  if (isOpIntrinsicallyLive(op)) {
-    liveMap.setProvedLive(op);
-    return;
-  }
-  for (Value *value : op->getResults())
-    processValue(value, liveMap);
-  bool provedLive = llvm::any_of(op->getResults(), [&](Value *value) {
-    return liveMap.wasProvenLive(value);
-  });
-  if (provedLive)
-    liveMap.setProvedLive(op);
-}
-
-static void propagateLiveness(Region &region, LiveMap &liveMap) {
-  if (region.empty())
-    return;
-
-  for (Block *block : llvm::post_order(&region.front())) {
-    // We process block arguments after the ops in the block, to promote
-    // faster convergence to a fixed point (we try to visit uses before defs).
-    for (Operation &op : llvm::reverse(block->getOperations()))
-      propagateLiveness(&op, liveMap);
-    for (Value *value : block->getArguments())
-      processValue(value, liveMap);
-  }
-}
-
-static void eraseTerminatorSuccessorOperands(Operation *terminator,
-                                             LiveMap &liveMap) {
-  for (unsigned succI = 0, succE = terminator->getNumSuccessors();
-       succI < succE; succI++) {
-    // Iterating successors in reverse is not strictly needed, since we
-    // aren't erasing any successors. But it is slightly more efficient
-    // since it will promote later operands of the terminator being erased
-    // first, reducing the quadratic-ness.
-    unsigned succ = succE - succI - 1;
-    for (unsigned argI = 0, argE = terminator->getNumSuccessorOperands(succ);
-         argI < argE; argI++) {
-      // Iterating args in reverse is needed for correctness, to avoid
-      // shifting later args when earlier args are erased.
-      unsigned arg = argE - argI - 1;
-      Value *value = terminator->getSuccessor(succ)->getArgument(arg);
-      if (!liveMap.wasProvenLive(value)) {
-        terminator->eraseSuccessorOperand(succ, arg);
-      }
-    }
-  }
-}
-
-static LogicalResult deleteDeadness(MutableArrayRef<Region> regions,
-                                    LiveMap &liveMap) {
-  bool erasedAnything = false;
-  for (Region &region : regions) {
-    if (region.empty())
-      continue;
-
-    // We do the deletion in an order that deletes all uses before deleting
-    // defs.
-    // MLIR's SSA structural invariants guarantee that except for block
-    // arguments, the use-def graph is acyclic, so this is possible with a
-    // single walk of ops and then a final pass to clean up block arguments.
-    //
-    // To do this, we visit ops in an order that visits domtree children
-    // before domtree parents. A CFG post-order (with reverse iteration with a
-    // block) satisfies that without needing an explicit domtree calculation.
-    for (Block *block : llvm::post_order(&region.front())) {
-      eraseTerminatorSuccessorOperands(block->getTerminator(), liveMap);
-      for (Operation &childOp :
-           llvm::make_early_inc_range(llvm::reverse(block->getOperations()))) {
-        erasedAnything |=
-            succeeded(deleteDeadness(childOp.getRegions(), liveMap));
-        if (!liveMap.wasProvenLive(&childOp)) {
-          erasedAnything = true;
-          childOp.erase();
-        }
-      }
-    }
-    // Delete block arguments.
-    // The entry block has an unknown contract with their enclosing block, so
-    // skip it.
-    for (Block &block : llvm::drop_begin(region.getBlocks(), 1)) {
-      // Iterate in reverse to avoid shifting later arguments when deleting
-      // earlier arguments.
-      for (unsigned i = 0, e = block.getNumArguments(); i < e; i++)
-        if (!liveMap.wasProvenLive(block.getArgument(e - i - 1))) {
-          block.eraseArgument(e - i - 1, /*updatePredTerms=*/false);
-          erasedAnything = true;
-        }
-    }
-  }
-  return success(erasedAnything);
-}
-
-// This function performs a simple dead code elimination algorithm over the
-// given regions.
-//
-// The overall goal is to prove that Values are dead, which allows deleting ops
-// and block arguments.
-//
-// This uses an optimistic algorithm that assumes everything is dead until
-// proved otherwise, allowing it to delete recursively dead cycles.
-//
-// This is a simple fixed-point dataflow analysis algorithm on a lattice
-// {Dead,Alive}. Because liveness flows backward, we generally try to
-// iterate everything backward to speed up convergence to the fixed-point. This
-// allows for being able to delete recursively dead cycles of the use-def graph,
-// including block arguments.
-//
-// This function returns success if any operations or arguments were deleted,
-// failure otherwise.
-static LogicalResult runRegionDCE(MutableArrayRef<Region> regions) {
-  assert(regions.size() == 1);
-
-  LiveMap liveMap;
-  do {
-    liveMap.resetChanged();
-
-    for (Region &region : regions)
-      propagateLiveness(region, liveMap);
-  } while (liveMap.hasChanged());
-
-  return deleteDeadness(regions, liveMap);
-}
-
-//===----------------------------------------------------------------------===//
-// Region Simplification
-//===----------------------------------------------------------------------===//
-
-/// Run a set of structural simplifications over the given regions. This
-/// includes transformations like unreachable block elimination, dead argument
-/// elimination, as well as some other DCE. This function returns success if any
-/// of the regions were simplified, failure otherwise.
-LogicalResult mlir::simplifyRegions(llvm::MutableArrayRef<Region> regions) {
-  LogicalResult eliminatedBlocks = eraseUnreachableBlocks(regions);
-  LogicalResult eliminatedOpsOrArgs = runRegionDCE(regions);
-  return success(succeeded(eliminatedBlocks) || succeeded(eliminatedOpsOrArgs));
-}
diff --git a/third_party/mlir/lib/Transforms/Utils/Utils.cpp b/third_party/mlir/lib/Transforms/Utils/Utils.cpp
deleted file mode 100644
index 190b6c3155e..00000000000
--- a/third_party/mlir/lib/Transforms/Utils/Utils.cpp
+++ /dev/null
@@ -1,481 +0,0 @@
-//===- Utils.cpp ---- Misc utilities for code and data transformation -----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements miscellaneous transformation routines for non-loop IR
-// structures.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/Utils.h"
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/Dominance.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/DenseMap.h"
-using namespace mlir;
-
-/// Return true if this operation dereferences one or more memref's.
-// Temporary utility: will be replaced when this is modeled through
-// side-effects/op traits. TODO(b/117228571)
-static bool isMemRefDereferencingOp(Operation &op) {
-  if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
-      isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op))
-    return true;
-  return false;
-}
-
-/// Return the AffineMapAttr associated with memory 'op' on 'memref'.
-static NamedAttribute getAffineMapAttrForMemRef(Operation *op, Value *memref) {
-  if (auto loadOp = dyn_cast<AffineLoadOp>(op))
-    return loadOp.getAffineMapAttrForMemRef(memref);
-  else if (auto storeOp = dyn_cast<AffineStoreOp>(op))
-    return storeOp.getAffineMapAttrForMemRef(memref);
-  else if (auto dmaStart = dyn_cast<AffineDmaStartOp>(op))
-    return dmaStart.getAffineMapAttrForMemRef(memref);
-  assert(isa<AffineDmaWaitOp>(op));
-  return cast<AffineDmaWaitOp>(op).getAffineMapAttrForMemRef(memref);
-}
-
-// Perform the replacement in `op`.
-LogicalResult mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
-                                             Operation *op,
-                                             ArrayRef<Value *> extraIndices,
-                                             AffineMap indexRemap,
-                                             ArrayRef<Value *> extraOperands,
-                                             ArrayRef<Value *> symbolOperands) {
-  unsigned newMemRefRank = newMemRef->getType().cast<MemRefType>().getRank();
-  (void)newMemRefRank; // unused in opt mode
-  unsigned oldMemRefRank = oldMemRef->getType().cast<MemRefType>().getRank();
-  (void)oldMemRefRank; // unused in opt mode
-  if (indexRemap) {
-    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
-           "symbolic operand count mismatch");
-    assert(indexRemap.getNumInputs() ==
-           extraOperands.size() + oldMemRefRank + symbolOperands.size());
-    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
-  } else {
-    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
-  }
-
-  // Assert same elemental type.
-  assert(oldMemRef->getType().cast<MemRefType>().getElementType() ==
-         newMemRef->getType().cast<MemRefType>().getElementType());
-
-  if (!isMemRefDereferencingOp(*op))
-    // Failure: memref used in a non-dereferencing context (potentially
-    // escapes); no replacement in these cases.
-    return failure();
-
-  SmallVector<unsigned, 2> usePositions;
-  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
-    if (opEntry.value() == oldMemRef)
-      usePositions.push_back(opEntry.index());
-  }
-
-  // If memref doesn't appear, nothing to do.
-  if (usePositions.empty())
-    return success();
-
-  if (usePositions.size() > 1) {
-    // TODO(mlir-team): extend it for this case when needed (rare).
-    assert(false && "multiple dereferencing uses in a single op not supported");
-    return failure();
-  }
-
-  unsigned memRefOperandPos = usePositions.front();
-
-  OpBuilder builder(op);
-  NamedAttribute oldMapAttrPair = getAffineMapAttrForMemRef(op, oldMemRef);
-  AffineMap oldMap = oldMapAttrPair.second.cast<AffineMapAttr>().getValue();
-  unsigned oldMapNumInputs = oldMap.getNumInputs();
-  SmallVector<Value *, 4> oldMapOperands(
-      op->operand_begin() + memRefOperandPos + 1,
-      op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
-
-  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
-  SmallVector<Value *, 4> oldMemRefOperands;
-  SmallVector<Value *, 4> affineApplyOps;
-  oldMemRefOperands.reserve(oldMemRefRank);
-  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
-    for (auto resultExpr : oldMap.getResults()) {
-      auto singleResMap = AffineMap::get(oldMap.getNumDims(),
-                                         oldMap.getNumSymbols(), resultExpr);
-      auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
-                                                oldMapOperands);
-      oldMemRefOperands.push_back(afOp);
-      affineApplyOps.push_back(afOp);
-    }
-  } else {
-    oldMemRefOperands.append(oldMapOperands.begin(), oldMapOperands.end());
-  }
-
-  // Construct new indices as a remap of the old ones if a remapping has been
-  // provided. The indices of a memref come right after it, i.e.,
-  // at position memRefOperandPos + 1.
-  SmallVector<Value *, 4> remapOperands;
-  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
-                        symbolOperands.size());
-  remapOperands.append(extraOperands.begin(), extraOperands.end());
-  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
-  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
-
-  SmallVector<Value *, 4> remapOutputs;
-  remapOutputs.reserve(oldMemRefRank);
-
-  if (indexRemap &&
-      indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
-    // Remapped indices.
-    for (auto resultExpr : indexRemap.getResults()) {
-      auto singleResMap = AffineMap::get(
-          indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
-      auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
-                                                remapOperands);
-      remapOutputs.push_back(afOp);
-      affineApplyOps.push_back(afOp);
-    }
-  } else {
-    // No remapping specified.
-    remapOutputs.append(remapOperands.begin(), remapOperands.end());
-  }
-
-  SmallVector<Value *, 4> newMapOperands;
-  newMapOperands.reserve(newMemRefRank);
-
-  // Prepend 'extraIndices' in 'newMapOperands'.
-  for (auto *extraIndex : extraIndices) {
-    assert(extraIndex->getDefiningOp()->getNumResults() == 1 &&
-           "single result op's expected to generate these indices");
-    assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
-           "invalid memory op index");
-    newMapOperands.push_back(extraIndex);
-  }
-
-  // Append 'remapOutputs' to 'newMapOperands'.
-  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
-
-  // Create new fully composed AffineMap for new op to be created.
-  assert(newMapOperands.size() == newMemRefRank);
-  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
-  // TODO(b/136262594) Avoid creating/deleting temporary AffineApplyOps here.
-  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
-  newMap = simplifyAffineMap(newMap);
-  canonicalizeMapAndOperands(&newMap, &newMapOperands);
-  // Remove any affine.apply's that became dead as a result of composition.
-  for (auto *value : affineApplyOps)
-    if (value->use_empty())
-      value->getDefiningOp()->erase();
-
-  // Construct the new operation using this memref.
-  OperationState state(op->getLoc(), op->getName());
-  state.setOperandListToResizable(op->hasResizableOperandsList());
-  state.operands.reserve(op->getNumOperands() + extraIndices.size());
-  // Insert the non-memref operands.
-  state.operands.append(op->operand_begin(),
-                        op->operand_begin() + memRefOperandPos);
-  // Insert the new memref value.
-  state.operands.push_back(newMemRef);
-
-  // Insert the new memref map operands.
-  state.operands.append(newMapOperands.begin(), newMapOperands.end());
-
-  // Insert the remaining operands unmodified.
-  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
-                            oldMapNumInputs,
-                        op->operand_end());
-
-  // Result types don't change. Both memref's are of the same elemental type.
-  state.types.reserve(op->getNumResults());
-  for (auto *result : op->getResults())
-    state.types.push_back(result->getType());
-
-  // Add attribute for 'newMap', other Attributes do not change.
-  auto newMapAttr = AffineMapAttr::get(newMap);
-  for (auto namedAttr : op->getAttrs()) {
-    if (namedAttr.first == oldMapAttrPair.first) {
-      state.attributes.push_back({namedAttr.first, newMapAttr});
-    } else {
-      state.attributes.push_back(namedAttr);
-    }
-  }
-
-  // Create the new operation.
-  auto *repOp = builder.createOperation(state);
-  op->replaceAllUsesWith(repOp);
-  op->erase();
-
-  return success();
-}
-
-LogicalResult mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
-                                             ArrayRef<Value *> extraIndices,
-                                             AffineMap indexRemap,
-                                             ArrayRef<Value *> extraOperands,
-                                             ArrayRef<Value *> symbolOperands,
-                                             Operation *domInstFilter,
-                                             Operation *postDomInstFilter) {
-  unsigned newMemRefRank = newMemRef->getType().cast<MemRefType>().getRank();
-  (void)newMemRefRank; // unused in opt mode
-  unsigned oldMemRefRank = oldMemRef->getType().cast<MemRefType>().getRank();
-  (void)oldMemRefRank;
-  if (indexRemap) {
-    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
-           "symbol operand count mismatch");
-    assert(indexRemap.getNumInputs() ==
-           extraOperands.size() + oldMemRefRank + symbolOperands.size());
-    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
-  } else {
-    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
-  }
-
-  // Assert same elemental type.
-  assert(oldMemRef->getType().cast<MemRefType>().getElementType() ==
-         newMemRef->getType().cast<MemRefType>().getElementType());
-
-  std::unique_ptr<DominanceInfo> domInfo;
-  std::unique_ptr<PostDominanceInfo> postDomInfo;
-  if (domInstFilter)
-    domInfo = std::make_unique<DominanceInfo>(
-        domInstFilter->getParentOfType<FuncOp>());
-
-  if (postDomInstFilter)
-    postDomInfo = std::make_unique<PostDominanceInfo>(
-        postDomInstFilter->getParentOfType<FuncOp>());
-
-  // Walk all uses of old memref; collect ops to perform replacement. We use a
-  // DenseSet since an operation could potentially have multiple uses of a
-  // memref (although rare), and the replacement later is going to erase ops.
-  DenseSet<Operation *> opsToReplace;
-  for (auto *op : oldMemRef->getUsers()) {
-    // Skip this use if it's not dominated by domInstFilter.
-    if (domInstFilter && !domInfo->dominates(domInstFilter, op))
-      continue;
-
-    // Skip this use if it's not post-dominated by postDomInstFilter.
-    if (postDomInstFilter && !postDomInfo->postDominates(postDomInstFilter, op))
-      continue;
-
-    // Skip dealloc's - no replacement is necessary, and a memref replacement
-    // at other uses doesn't hurt these dealloc's.
-    if (isa<DeallocOp>(op))
-      continue;
-
-    // Check if the memref was used in a non-dereferencing context. It is fine
-    // for the memref to be used in a non-dereferencing way outside of the
-    // region where this replacement is happening.
-    if (!isMemRefDereferencingOp(*op))
-      // Failure: memref used in a non-dereferencing op (potentially escapes);
-      // no replacement in these cases.
-      return failure();
-
-    // We'll first collect and then replace --- since replacement erases the op
-    // that has the use, and that op could be postDomFilter or domFilter itself!
-    opsToReplace.insert(op);
-  }
-
-  for (auto *op : opsToReplace) {
-    if (failed(replaceAllMemRefUsesWith(oldMemRef, newMemRef, op, extraIndices,
-                                        indexRemap, extraOperands,
-                                        symbolOperands)))
-      llvm_unreachable("memref replacement guaranteed to succeed here");
-  }
-
-  return success();
-}
-
-/// Given an operation, inserts one or more single result affine
-/// apply operations, results of which are exclusively used by this operation
-/// operation. The operands of these newly created affine apply ops are
-/// guaranteed to be loop iterators or terminal symbols of a function.
-///
-/// Before
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "send"(%idx, %A, ...)
-///   "compute"(%idx)
-///
-/// After
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "send"(%idx, %A, ...)
-///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "compute"(%idx_)
-///
-/// This allows applying different transformations on send and compute (for eg.
-/// different shifts/delays).
-///
-/// Returns nullptr either if none of opInst's operands were the result of an
-/// affine.apply and thus there was no affine computation slice to create, or if
-/// all the affine.apply op's supplying operands to this opInst did not have any
-/// uses besides this opInst; otherwise returns the list of affine.apply
-/// operations created in output argument `sliceOps`.
-void mlir::createAffineComputationSlice(
-    Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
-  // Collect all operands that are results of affine apply ops.
-  SmallVector<Value *, 4> subOperands;
-  subOperands.reserve(opInst->getNumOperands());
-  for (auto *operand : opInst->getOperands())
-    if (isa_and_nonnull<AffineApplyOp>(operand->getDefiningOp()))
-      subOperands.push_back(operand);
-
-  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
-  SmallVector<Operation *, 4> affineApplyOps;
-  getReachableAffineApplyOps(subOperands, affineApplyOps);
-  // Skip transforming if there are no affine maps to compose.
-  if (affineApplyOps.empty())
-    return;
-
-  // Check if all uses of the affine apply op's lie only in this op op, in
-  // which case there would be nothing to do.
-  bool localized = true;
-  for (auto *op : affineApplyOps) {
-    for (auto *result : op->getResults()) {
-      for (auto *user : result->getUsers()) {
-        if (user != opInst) {
-          localized = false;
-          break;
-        }
-      }
-    }
-  }
-  if (localized)
-    return;
-
-  OpBuilder builder(opInst);
-  SmallVector<Value *, 4> composedOpOperands(subOperands);
-  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
-  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
-
-  // Create an affine.apply for each of the map results.
-  sliceOps->reserve(composedMap.getNumResults());
-  for (auto resultExpr : composedMap.getResults()) {
-    auto singleResMap = AffineMap::get(composedMap.getNumDims(),
-                                       composedMap.getNumSymbols(), resultExpr);
-    sliceOps->push_back(builder.create<AffineApplyOp>(
-        opInst->getLoc(), singleResMap, composedOpOperands));
-  }
-
-  // Construct the new operands that include the results from the composed
-  // affine apply op above instead of existing ones (subOperands). So, they
-  // differ from opInst's operands only for those operands in 'subOperands', for
-  // which they will be replaced by the corresponding one from 'sliceOps'.
-  SmallVector<Value *, 4> newOperands(opInst->getOperands());
-  for (unsigned i = 0, e = newOperands.size(); i < e; i++) {
-    // Replace the subOperands from among the new operands.
-    unsigned j, f;
-    for (j = 0, f = subOperands.size(); j < f; j++) {
-      if (newOperands[i] == subOperands[j])
-        break;
-    }
-    if (j < subOperands.size()) {
-      newOperands[i] = (*sliceOps)[j];
-    }
-  }
-  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) {
-    opInst->setOperand(idx, newOperands[idx]);
-  }
-}
-
-// TODO: Currently works for static memrefs with a single layout map.
-LogicalResult mlir::normalizeMemRef(AllocOp allocOp) {
-  MemRefType memrefType = allocOp.getType();
-  unsigned rank = memrefType.getRank();
-  if (rank == 0)
-    return success();
-
-  auto layoutMaps = memrefType.getAffineMaps();
-  OpBuilder b(allocOp);
-  if (layoutMaps.size() != 1)
-    return failure();
-
-  AffineMap layoutMap = layoutMaps.front();
-
-  // Nothing to do for identity layout maps.
-  if (layoutMap == b.getMultiDimIdentityMap(rank))
-    return success();
-
-  // We don't do any checks for one-to-one'ness; we assume that it is
-  // one-to-one.
-
-  // TODO: Only for static memref's for now.
-  if (memrefType.getNumDynamicDims() > 0)
-    return failure();
-
-  // We have a single map that is not an identity map. Create a new memref with
-  // the right shape and an identity layout map.
-  auto shape = memrefType.getShape();
-  FlatAffineConstraints fac(rank, allocOp.getNumSymbolicOperands());
-  for (unsigned d = 0; d < rank; ++d) {
-    fac.addConstantLowerBound(d, 0);
-    fac.addConstantUpperBound(d, shape[d] - 1);
-  }
-
-  // We compose this map with the original index (logical) space to derive the
-  // upper bounds for the new index space.
-  unsigned newRank = layoutMap.getNumResults();
-  if (failed(fac.composeMatchingMap(layoutMap)))
-    // TODO: semi-affine maps.
-    return failure();
-
-  // Project out the old data dimensions.
-  fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds());
-  SmallVector<int64_t, 4> newShape(newRank);
-  for (unsigned d = 0; d < newRank; ++d) {
-    // The lower bound for the shape is always zero.
-    auto ubConst = fac.getConstantUpperBound(d);
-    // For a static memref and an affine map with no symbols, this is always
-    // bounded.
-    assert(ubConst.hasValue() && "should always have an upper bound");
-    if (ubConst.getValue() < 0)
-      // This is due to an invalid map that maps to a negative space.
-      return failure();
-    newShape[d] = ubConst.getValue() + 1;
-  }
-
-  auto *oldMemRef = allocOp.getResult();
-  SmallVector<Value *, 4> symbolOperands(allocOp.getSymbolicOperands());
-
-  auto newMemRefType = MemRefType::get(newShape, memrefType.getElementType(),
-                                       b.getMultiDimIdentityMap(newRank));
-  auto newAlloc = b.create<AllocOp>(allocOp.getLoc(), newMemRefType);
-
-  // Replace all uses of the old memref.
-  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
-                                      /*extraIndices=*/{},
-                                      /*indexRemap=*/layoutMap,
-                                      /*extraOperands=*/{},
-                                      /*symbolOperands=*/symbolOperands))) {
-    // If it failed (due to escapes for example), bail out.
-    newAlloc.erase();
-    return failure();
-  }
-  // Replace any uses of the original alloc op and erase it. All remaining uses
-  // have to be dealloc's; RAMUW above would've failed otherwise.
-  assert(std::all_of(oldMemRef->user_begin(), oldMemRef->user_end(),
-                     [](Operation *op) { return isa<DeallocOp>(op); }));
-  oldMemRef->replaceAllUsesWith(newAlloc);
-  allocOp.erase();
-  return success();
-}
diff --git a/third_party/mlir/lib/Transforms/Vectorize.cpp b/third_party/mlir/lib/Transforms/Vectorize.cpp
deleted file mode 100644
index 036e53435ae..00000000000
--- a/third_party/mlir/lib/Transforms/Vectorize.cpp
+++ /dev/null
@@ -1,1302 +0,0 @@
-//===- Vectorize.cpp - Vectorize Pass Impl --------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements vectorization of loops, operations and data types to
-// a target-independent, n-D super-vector abstraction.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Dialect/VectorOps/Utils.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/FoldUtils.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-using namespace mlir;
-
-///
-/// Implements a high-level vectorization strategy on a Function.
-/// The abstraction used is that of super-vectors, which provide a single,
-/// compact, representation in the vector types, information that is expected
-/// to reduce the impact of the phase ordering problem
-///
-/// Vector granularity:
-/// ===================
-/// This pass is designed to perform vectorization at a super-vector
-/// granularity. A super-vector is loosely defined as a vector type that is a
-/// multiple of a "good" vector size so the HW can efficiently implement a set
-/// of high-level primitives. Multiple is understood along any dimension; e.g.
-/// both vector<16xf32> and vector<2x8xf32> are valid super-vectors for a
-/// vector<8xf32> HW vector. Note that a "good vector size so the HW can
-/// efficiently implement a set of high-level primitives" is not necessarily an
-/// integer multiple of actual hardware registers. We leave details of this
-/// distinction unspecified for now.
-///
-/// Some may prefer the terminology a "tile of HW vectors". In this case, one
-/// should note that super-vectors implement an "always full tile" abstraction.
-/// They guarantee no partial-tile separation is necessary by relying on a
-/// high-level copy-reshape abstraction that we call vector.transfer. This
-/// copy-reshape operations is also responsible for performing layout
-/// transposition if necessary. In the general case this will require a scoped
-/// allocation in some notional local memory.
-///
-/// Whatever the mental model one prefers to use for this abstraction, the key
-/// point is that we burn into a single, compact, representation in the vector
-/// types, information that is expected to reduce the impact of the phase
-/// ordering problem. Indeed, a vector type conveys information that:
-///   1. the associated loops have dependency semantics that do not prevent
-///      vectorization;
-///   2. the associate loops have been sliced in chunks of static sizes that are
-///      compatible with vector sizes (i.e. similar to unroll-and-jam);
-///   3. the inner loops, in the unroll-and-jam analogy of 2, are captured by
-///   the
-///      vector type and no vectorization hampering transformations can be
-///      applied to them anymore;
-///   4. the underlying memrefs are accessed in some notional contiguous way
-///      that allows loading into vectors with some amount of spatial locality;
-/// In other words, super-vectorization provides a level of separation of
-/// concern by way of opacity to subsequent passes. This has the effect of
-/// encapsulating and propagating vectorization constraints down the list of
-/// passes until we are ready to lower further.
-///
-/// For a particular target, a notion of minimal n-d vector size will be
-/// specified and vectorization targets a multiple of those. In the following
-/// paragraph, let "k ." represent "a multiple of", to be understood as a
-/// multiple in the same dimension (e.g. vector<16 x k . 128> summarizes
-/// vector<16 x 128>, vector<16 x 256>, vector<16 x 1024>, etc).
-///
-/// Some non-exhaustive notable super-vector sizes of interest include:
-///   - CPU: vector<k . HW_vector_size>,
-///          vector<k' . core_count x k . HW_vector_size>,
-///          vector<socket_count x k' . core_count x k . HW_vector_size>;
-///   - GPU: vector<k . warp_size>,
-///          vector<k . warp_size x float2>,
-///          vector<k . warp_size x float4>,
-///          vector<k . warp_size x 4 x 4x 4> (for tensor_core sizes).
-///
-/// Loops and operations are emitted that operate on those super-vector shapes.
-/// Subsequent lowering passes will materialize to actual HW vector sizes. These
-/// passes are expected to be (gradually) more target-specific.
-///
-/// At a high level, a vectorized load in a loop will resemble:
-/// ```mlir
-///   affine.for %i = ? to ? step ? {
-///     %v_a = vector.transfer_read A[%i] : memref<?xf32>, vector<128xf32>
-///   }
-/// ```
-/// It is the responsibility of the implementation of vector.transfer_read to
-/// materialize vector registers from the original scalar memrefs. A later (more
-/// target-dependent) lowering pass will materialize to actual HW vector sizes.
-/// This lowering may be occur at different times:
-///   1. at the MLIR level into a combination of loops, unrolling, DmaStartOp +
-///      DmaWaitOp + vectorized operations for data transformations and shuffle;
-///      thus opening opportunities for unrolling and pipelining. This is an
-///      instance of library call "whiteboxing"; or
-///   2. later in the a target-specific lowering pass or hand-written library
-///      call; achieving full separation of concerns. This is an instance of
-///      library call; or
-///   3. a mix of both, e.g. based on a model.
-/// In the future, these operations will expose a contract to constrain the
-/// search on vectorization patterns and sizes.
-///
-/// Occurrence of super-vectorization in the compiler flow:
-/// =======================================================
-/// This is an active area of investigation. We start with 2 remarks to position
-/// super-vectorization in the context of existing ongoing work: LLVM VPLAN
-/// and LLVM SLP Vectorizer.
-///
-/// LLVM VPLAN:
-/// -----------
-/// The astute reader may have noticed that in the limit, super-vectorization
-/// can be applied at a similar time and with similar objectives than VPLAN.
-/// For instance, in the case of a traditional, polyhedral compilation-flow (for
-/// instance, the PPCG project uses ISL to provide dependence analysis,
-/// multi-level(scheduling + tiling), lifting footprint to fast memory,
-/// communication synthesis, mapping, register optimizations) and before
-/// unrolling. When vectorization is applied at this *late* level in a typical
-/// polyhedral flow, and is instantiated with actual hardware vector sizes,
-/// super-vectorization is expected to match (or subsume) the type of patterns
-/// that LLVM's VPLAN aims at targeting. The main difference here is that MLIR
-/// is higher level and our implementation should be significantly simpler. Also
-/// note that in this mode, recursive patterns are probably a bit of an overkill
-/// although it is reasonable to expect that mixing a bit of outer loop and
-/// inner loop vectorization + unrolling will provide interesting choices to
-/// MLIR.
-///
-/// LLVM SLP Vectorizer:
-/// --------------------
-/// Super-vectorization however is not meant to be usable in a similar fashion
-/// to the SLP vectorizer. The main difference lies in the information that
-/// both vectorizers use: super-vectorization examines contiguity of memory
-/// references along fastest varying dimensions and loops with recursive nested
-/// patterns capturing imperfectly-nested loop nests; the SLP vectorizer, on
-/// the other hand, performs flat pattern matching inside a single unrolled loop
-/// body and stitches together pieces of load and store operations into full
-/// 1-D vectors. We envision that the SLP vectorizer is a good way to capture
-/// innermost loop, control-flow dependent patterns that super-vectorization may
-/// not be able to capture easily. In other words, super-vectorization does not
-/// aim at replacing the SLP vectorizer and the two solutions are complementary.
-///
-/// Ongoing investigations:
-/// -----------------------
-/// We discuss the following *early* places where super-vectorization is
-/// applicable and touch on the expected benefits and risks . We list the
-/// opportunities in the context of the traditional polyhedral compiler flow
-/// described in PPCG. There are essentially 6 places in the MLIR pass pipeline
-/// we expect to experiment with super-vectorization:
-/// 1. Right after language lowering to MLIR: this is the earliest time where
-///    super-vectorization is expected to be applied. At this level, all the
-///    language/user/library-level annotations are available and can be fully
-///    exploited. Examples include loop-type annotations (such as parallel,
-///    reduction, scan, dependence distance vector, vectorizable) as well as
-///    memory access annotations (such as non-aliasing writes guaranteed,
-///    indirect accesses that are permutations by construction) accesses or
-///    that a particular operation is prescribed atomic by the user. At this
-///    level, anything that enriches what dependence analysis can do should be
-///    aggressively exploited. At this level we are close to having explicit
-///    vector types in the language, except we do not impose that burden on the
-///    programmer/library: we derive information from scalar code + annotations.
-/// 2. After dependence analysis and before polyhedral scheduling: the
-///    information that supports vectorization does not need to be supplied by a
-///    higher level of abstraction. Traditional dependence analysis is available
-///    in MLIR and will be used to drive vectorization and cost models.
-///
-/// Let's pause here and remark that applying super-vectorization as described
-/// in 1. and 2. presents clear opportunities and risks:
-///   - the opportunity is that vectorization is burned in the type system and
-///   is protected from the adverse effect of loop scheduling, tiling, loop
-///   interchange and all passes downstream. Provided that subsequent passes are
-///   able to operate on vector types; the vector shapes, associated loop
-///   iterator properties, alignment, and contiguity of fastest varying
-///   dimensions are preserved until we lower the super-vector types. We expect
-///   this to significantly rein in on the adverse effects of phase ordering.
-///   - the risks are that a. all passes after super-vectorization have to work
-///   on elemental vector types (not that this is always true, wherever
-///   vectorization is applied) and b. that imposing vectorization constraints
-///   too early may be overall detrimental to loop fusion, tiling and other
-///   transformations because the dependence distances are coarsened when
-///   operating on elemental vector types. For this reason, the pattern
-///   profitability analysis should include a component that also captures the
-///   maximal amount of fusion available under a particular pattern. This is
-///   still at the stage of rough ideas but in this context, search is our
-///   friend as the Tensor Comprehensions and auto-TVM contributions
-///   demonstrated previously.
-/// Bottom-line is we do not yet have good answers for the above but aim at
-/// making it easy to answer such questions.
-///
-/// Back to our listing, the last places where early super-vectorization makes
-/// sense are:
-/// 3. right after polyhedral-style scheduling: PLUTO-style algorithms are known
-///    to improve locality, parallelism and be configurable (e.g. max-fuse,
-///    smart-fuse etc). They can also have adverse effects on contiguity
-///    properties that are required for vectorization but the vector.transfer
-///    copy-reshape-pad-transpose abstraction is expected to help recapture
-///    these properties.
-/// 4. right after polyhedral-style scheduling+tiling;
-/// 5. right after scheduling+tiling+rescheduling: points 4 and 5 represent
-///    probably the most promising places because applying tiling achieves a
-///    separation of concerns that allows rescheduling to worry less about
-///    locality and more about parallelism and distribution (e.g. min-fuse).
-///
-/// At these levels the risk-reward looks different: on one hand we probably
-/// lost a good deal of language/user/library-level annotation; on the other
-/// hand we gained parallelism and locality through scheduling and tiling.
-/// However we probably want to ensure tiling is compatible with the
-/// full-tile-only abstraction used in super-vectorization or suffer the
-/// consequences. It is too early to place bets on what will win but we expect
-/// super-vectorization to be the right abstraction to allow exploring at all
-/// these levels. And again, search is our friend.
-///
-/// Lastly, we mention it again here:
-/// 6. as a MLIR-based alternative to VPLAN.
-///
-/// Lowering, unrolling, pipelining:
-/// ================================
-/// TODO(ntv): point to the proper places.
-///
-/// Algorithm:
-/// ==========
-/// The algorithm proceeds in a few steps:
-///  1. defining super-vectorization patterns and matching them on the tree of
-///     AffineForOp. A super-vectorization pattern is defined as a recursive
-///     data structures that matches and captures nested, imperfectly-nested
-///     loops that have a. conformable loop annotations attached (e.g. parallel,
-///     reduction, vectorizable, ...) as well as b. all contiguous load/store
-///     operations along a specified minor dimension (not necessarily the
-///     fastest varying) ;
-///  2. analyzing those patterns for profitability (TODO(ntv): and
-///     interference);
-///  3. Then, for each pattern in order:
-///    a. applying iterative rewriting of the loop and the load operations in
-///       DFS postorder. Rewriting is implemented by coarsening the loops and
-///       turning load operations into opaque vector.transfer_read ops;
-///    b. keeping track of the load operations encountered as "roots" and the
-///       store operations as "terminals";
-///    c. traversing the use-def chains starting from the roots and iteratively
-///       propagating vectorized values. Scalar values that are encountered
-///       during this process must come from outside the scope of the current
-///       pattern (TODO(ntv): enforce this and generalize). Such a scalar value
-///       is vectorized only if it is a constant (into a vector splat). The
-///       non-constant case is not supported for now and results in the pattern
-///       failing to vectorize;
-///    d. performing a second traversal on the terminals (store ops) to
-///       rewriting the scalar value they write to memory into vector form.
-///       If the scalar value has been vectorized previously, we simply replace
-///       it by its vector form. Otherwise, if the scalar value is a constant,
-///       it is vectorized into a splat. In all other cases, vectorization for
-///       the pattern currently fails.
-///    e. if everything under the root AffineForOp in the current pattern
-///       vectorizes properly, we commit that loop to the IR. Otherwise we
-///       discard it and restore a previously cloned version of the loop. Thanks
-///       to the recursive scoping nature of matchers and captured patterns,
-///       this is transparently achieved by a simple RAII implementation.
-///    f. vectorization is applied on the next pattern in the list. Because
-///       pattern interference avoidance is not yet implemented and that we do
-///       not support further vectorizing an already vector load we need to
-///       re-verify that the pattern is still vectorizable. This is expected to
-///       make cost models more difficult to write and is subject to improvement
-///       in the future.
-///
-/// Points c. and d. above are worth additional comment. In most passes that
-/// do not change the type of operands, it is usually preferred to eagerly
-/// `replaceAllUsesWith`. Unfortunately this does not work for vectorization
-/// because during the use-def chain traversal, all the operands of an operation
-/// must be available in vector form. Trying to propagate eagerly makes the IR
-/// temporarily invalid and results in errors such as:
-///   `vectorize.mlir:308:13: error: 'addf' op requires the same type for all
-///   operands and results
-///      %s5 = addf %a5, %b5 : f32`
-///
-/// Lastly, we show a minimal example for which use-def chains rooted in load /
-/// vector.transfer_read are not enough. This is what motivated splitting
-/// terminal processing out of the use-def chains starting from loads. In the
-/// following snippet, there is simply no load::
-/// ```mlir
-/// func @fill(%A : memref<128xf32>) -> () {
-///   %f1 = constant 1.0 : f32
-///   affine.for %i0 = 0 to 32 {
-///     affine.store %f1, %A[%i0] : memref<128xf32, 0>
-///   }
-///   return
-/// }
-/// ```
-///
-/// Choice of loop transformation to support the algorithm:
-/// =======================================================
-/// The choice of loop transformation to apply for coarsening vectorized loops
-/// is still subject to exploratory tradeoffs. In particular, say we want to
-/// vectorize by a factor 128, we want to transform the following input:
-/// ```mlir
-///   affine.for %i = %M to %N {
-///     %a = affine.load %A[%i] : memref<?xf32>
-///   }
-/// ```
-///
-/// Traditionally, one would vectorize late (after scheduling, tiling,
-/// memory promotion etc) say after stripmining (and potentially unrolling in
-/// the case of LLVM's SLP vectorizer):
-/// ```mlir
-///   affine.for %i = floor(%M, 128) to ceil(%N, 128) {
-///     affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
-///       %a = affine.load %A[%ii] : memref<?xf32>
-///     }
-///   }
-/// ```
-///
-/// Instead, we seek to vectorize early and freeze vector types before
-/// scheduling, so we want to generate a pattern that resembles:
-/// ```mlir
-///   affine.for %i = ? to ? step ? {
-///     %v_a = vector.transfer_read %A[%i] : memref<?xf32>, vector<128xf32>
-///   }
-/// ```
-///
-/// i. simply dividing the lower / upper bounds by 128 creates issues
-///    when representing expressions such as ii + 1 because now we only
-///    have access to original values that have been divided. Additional
-///    information is needed to specify accesses at below-128 granularity;
-/// ii. another alternative is to coarsen the loop step but this may have
-///    consequences on dependence analysis and fusability of loops: fusable
-///    loops probably need to have the same step (because we don't want to
-///    stripmine/unroll to enable fusion).
-/// As a consequence, we choose to represent the coarsening using the loop
-/// step for now and reevaluate in the future. Note that we can renormalize
-/// loop steps later if/when we have evidence that they are problematic.
-///
-/// For the simple strawman example above, vectorizing for a 1-D vector
-/// abstraction of size 128 returns code similar to:
-/// ```mlir
-///   affine.for %i = %M to %N step 128 {
-///     %v_a = vector.transfer_read %A[%i] : memref<?xf32>, vector<128xf32>
-///   }
-/// ```
-///
-/// Unsupported cases, extensions, and work in progress (help welcome :-) ):
-/// ========================================================================
-///   1. lowering to concrete vector types for various HW;
-///   2. reduction support;
-///   3. non-effecting padding during vector.transfer_read and filter during
-///      vector.transfer_write;
-///   4. misalignment support vector.transfer_read / vector.transfer_write
-///      (hopefully without read-modify-writes);
-///   5. control-flow support;
-///   6. cost-models, heuristics and search;
-///   7. Op implementation, extensions and implication on memref views;
-///   8. many TODOs left around.
-///
-/// Examples:
-/// =========
-/// Consider the following Function:
-/// ```mlir
-/// func @vector_add_2d(%M : index, %N : index) -> f32 {
-///   %A = alloc (%M, %N) : memref<?x?xf32, 0>
-///   %B = alloc (%M, %N) : memref<?x?xf32, 0>
-///   %C = alloc (%M, %N) : memref<?x?xf32, 0>
-///   %f1 = constant 1.0 : f32
-///   %f2 = constant 2.0 : f32
-///   affine.for %i0 = 0 to %M {
-///     affine.for %i1 = 0 to %N {
-///       // non-scoped %f1
-///       affine.store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
-///     }
-///   }
-///   affine.for %i2 = 0 to %M {
-///     affine.for %i3 = 0 to %N {
-///       // non-scoped %f2
-///       affine.store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
-///     }
-///   }
-///   affine.for %i4 = 0 to %M {
-///     affine.for %i5 = 0 to %N {
-///       %a5 = affine.load %A[%i4, %i5] : memref<?x?xf32, 0>
-///       %b5 = affine.load %B[%i4, %i5] : memref<?x?xf32, 0>
-///       %s5 = addf %a5, %b5 : f32
-///       // non-scoped %f1
-///       %s6 = addf %s5, %f1 : f32
-///       // non-scoped %f2
-///       %s7 = addf %s5, %f2 : f32
-///       // diamond dependency.
-///       %s8 = addf %s7, %s6 : f32
-///       affine.store %s8, %C[%i4, %i5] : memref<?x?xf32, 0>
-///     }
-///   }
-///   %c7 = constant 7 : index
-///   %c42 = constant 42 : index
-///   %res = load %C[%c7, %c42] : memref<?x?xf32, 0>
-///   return %res : f32
-/// }
-/// ```
-///
-/// The -affine-vectorize pass with the following arguments:
-/// ```
-/// -affine-vectorize -virtual-vector-size 256 --test-fastest-varying=0
-/// ```
-///
-/// produces this standard innermost-loop vectorized code:
-/// ```mlir
-/// func @vector_add_2d(%arg0 : index, %arg1 : index) -> f32 {
-///   %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-///   %1 = alloc(%arg0, %arg1) : memref<?x?xf32>
-///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
-///   %cst = constant 1.0 : f32
-///   %cst_0 = constant 2.0 : f32
-///   affine.for %i0 = 0 to %arg0 {
-///     affine.for %i1 = 0 to %arg1 step 256 {
-///       %cst_1 = constant dense<vector<256xf32>, 1.0> :
-///                vector<256xf32>
-///       vector.transfer_write %cst_1, %0[%i0, %i1] :
-///                vector<256xf32>, memref<?x?xf32>
-///     }
-///   }
-///   affine.for %i2 = 0 to %arg0 {
-///     affine.for %i3 = 0 to %arg1 step 256 {
-///       %cst_2 = constant dense<vector<256xf32>, 2.0> :
-///                vector<256xf32>
-///       vector.transfer_write %cst_2, %1[%i2, %i3] :
-///                vector<256xf32>, memref<?x?xf32>
-///     }
-///   }
-///   affine.for %i4 = 0 to %arg0 {
-///     affine.for %i5 = 0 to %arg1 step 256 {
-///       %3 = vector.transfer_read %0[%i4, %i5] :
-///            memref<?x?xf32>, vector<256xf32>
-///       %4 = vector.transfer_read %1[%i4, %i5] :
-///            memref<?x?xf32>, vector<256xf32>
-///       %5 = addf %3, %4 : vector<256xf32>
-///       %cst_3 = constant dense<vector<256xf32>, 1.0> :
-///                vector<256xf32>
-///       %6 = addf %5, %cst_3 : vector<256xf32>
-///       %cst_4 = constant dense<vector<256xf32>, 2.0> :
-///                vector<256xf32>
-///       %7 = addf %5, %cst_4 : vector<256xf32>
-///       %8 = addf %7, %6 : vector<256xf32>
-///       vector.transfer_write %8, %2[%i4, %i5] :
-///                vector<256xf32>, memref<?x?xf32>
-///     }
-///   }
-///   %c7 = constant 7 : index
-///   %c42 = constant 42 : index
-///   %9 = load %2[%c7, %c42] : memref<?x?xf32>
-///   return %9 : f32
-/// }
-/// ```
-///
-/// The -affine-vectorize pass with the following arguments:
-/// ```
-/// -affine-vectorize -virtual-vector-size 32 -virtual-vector-size 256
-/// --test-fastest-varying=1 --test-fastest-varying=0
-/// ```
-///
-/// produces this more interesting mixed outer-innermost-loop vectorized code:
-/// ```mlir
-/// func @vector_add_2d(%arg0 : index, %arg1 : index) -> f32 {
-///   %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-///   %1 = alloc(%arg0, %arg1) : memref<?x?xf32>
-///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
-///   %cst = constant 1.0 : f32
-///   %cst_0 = constant 2.0 : f32
-///   affine.for %i0 = 0 to %arg0 step 32 {
-///     affine.for %i1 = 0 to %arg1 step 256 {
-///       %cst_1 = constant dense<vector<32x256xf32>, 1.0> :
-///                vector<32x256xf32>
-///       vector.transfer_write %cst_1, %0[%i0, %i1] :
-///                vector<32x256xf32>, memref<?x?xf32>
-///     }
-///   }
-///   affine.for %i2 = 0 to %arg0 step 32 {
-///     affine.for %i3 = 0 to %arg1 step 256 {
-///       %cst_2 = constant dense<vector<32x256xf32>, 2.0> :
-///                vector<32x256xf32>
-///       vector.transfer_write %cst_2, %1[%i2, %i3] :
-///                vector<32x256xf32>, memref<?x?xf32>
-///     }
-///   }
-///   affine.for %i4 = 0 to %arg0 step 32 {
-///     affine.for %i5 = 0 to %arg1 step 256 {
-///       %3 = vector.transfer_read %0[%i4, %i5] :
-///                memref<?x?xf32> vector<32x256xf32>
-///       %4 = vector.transfer_read %1[%i4, %i5] :
-///                memref<?x?xf32>, vector<32x256xf32>
-///       %5 = addf %3, %4 : vector<32x256xf32>
-///       %cst_3 = constant dense<vector<32x256xf32>, 1.0> :
-///                vector<32x256xf32>
-///       %6 = addf %5, %cst_3 : vector<32x256xf32>
-///       %cst_4 = constant dense<vector<32x256xf32>, 2.0> :
-///                vector<32x256xf32>
-///       %7 = addf %5, %cst_4 : vector<32x256xf32>
-///       %8 = addf %7, %6 : vector<32x256xf32>
-///       vector.transfer_write %8, %2[%i4, %i5] :
-///                vector<32x256xf32>, memref<?x?xf32>
-///     }
-///   }
-///   %c7 = constant 7 : index
-///   %c42 = constant 42 : index
-///   %9 = load %2[%c7, %c42] : memref<?x?xf32>
-///   return %9 : f32
-/// }
-/// ```
-///
-/// Of course, much more intricate n-D imperfectly-nested patterns can be
-/// vectorized too and specified in a fully declarative fashion.
-
-#define DEBUG_TYPE "early-vect"
-
-using functional::makePtrDynCaster;
-using functional::map;
-using llvm::dbgs;
-using llvm::SetVector;
-
-static llvm::cl::OptionCategory clOptionsCategory("vectorize options");
-
-static llvm::cl::list<int> clVirtualVectorSize(
-    "virtual-vector-size",
-    llvm::cl::desc("Specify an n-D virtual vector size for vectorization"),
-    llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::list<int> clFastestVaryingPattern(
-    "test-fastest-varying",
-    llvm::cl::desc(
-        "Specify a 1-D, 2-D or 3-D pattern of fastest varying memory"
-        " dimensions to match. See defaultPatterns in Vectorize.cpp for a"
-        " description and examples. This is used for testing purposes"),
-    llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
-
-/// Forward declaration.
-static FilterFunctionType
-isVectorizableLoopPtrFactory(const llvm::DenseSet<Operation *> &parallelLoops,
-                             int fastestVaryingMemRefDimension);
-
-/// Creates a vectorization pattern from the command line arguments.
-/// Up to 3-D patterns are supported.
-/// If the command line argument requests a pattern of higher order, returns an
-/// empty pattern list which will conservatively result in no vectorization.
-static std::vector<NestedPattern>
-makePatterns(const llvm::DenseSet<Operation *> &parallelLoops, int vectorRank,
-             ArrayRef<int64_t> fastestVaryingPattern) {
-  using matcher::For;
-  int64_t d0 = fastestVaryingPattern.empty() ? -1 : fastestVaryingPattern[0];
-  int64_t d1 = fastestVaryingPattern.size() < 2 ? -1 : fastestVaryingPattern[1];
-  int64_t d2 = fastestVaryingPattern.size() < 3 ? -1 : fastestVaryingPattern[2];
-  switch (vectorRank) {
-  case 1:
-    return {For(isVectorizableLoopPtrFactory(parallelLoops, d0))};
-  case 2:
-    return {For(isVectorizableLoopPtrFactory(parallelLoops, d0),
-                For(isVectorizableLoopPtrFactory(parallelLoops, d1)))};
-  case 3:
-    return {For(isVectorizableLoopPtrFactory(parallelLoops, d0),
-                For(isVectorizableLoopPtrFactory(parallelLoops, d1),
-                    For(isVectorizableLoopPtrFactory(parallelLoops, d2))))};
-  default: {
-    return std::vector<NestedPattern>();
-  }
-  }
-}
-
-static NestedPattern &vectorTransferPattern() {
-  static auto pattern = matcher::Op([](Operation &op) {
-    return isa<vector::TransferReadOp>(op) || isa<vector::TransferWriteOp>(op);
-  });
-  return pattern;
-}
-
-namespace {
-
-/// Base state for the vectorize pass.
-/// Command line arguments are preempted by non-empty pass arguments.
-struct Vectorize : public FunctionPass<Vectorize> {
-  Vectorize();
-  Vectorize(ArrayRef<int64_t> virtualVectorSize);
-  void runOnFunction() override;
-
-  // The virtual vector size that we vectorize to.
-  SmallVector<int64_t, 4> vectorSizes;
-  // Optionally, the fixed mapping from loop to fastest varying MemRef dimension
-  // for all the MemRefs within a loop pattern:
-  //   the index represents the loop depth, the value represents the k^th
-  //   fastest varying memory dimension.
-  // This is voluntarily restrictive and is meant to precisely target a
-  // particular loop/op pair, for testing purposes.
-  SmallVector<int64_t, 4> fastestVaryingPattern;
-};
-
-} // end anonymous namespace
-
-Vectorize::Vectorize()
-    : vectorSizes(clVirtualVectorSize.begin(), clVirtualVectorSize.end()),
-      fastestVaryingPattern(clFastestVaryingPattern.begin(),
-                            clFastestVaryingPattern.end()) {}
-
-Vectorize::Vectorize(ArrayRef<int64_t> virtualVectorSize) : Vectorize() {
-  if (!virtualVectorSize.empty()) {
-    this->vectorSizes.assign(virtualVectorSize.begin(),
-                             virtualVectorSize.end());
-  }
-}
-
-/////// TODO(ntv): Hoist to a VectorizationStrategy.cpp when appropriate.
-/////////
-namespace {
-
-struct VectorizationStrategy {
-  SmallVector<int64_t, 8> vectorSizes;
-  DenseMap<Operation *, unsigned> loopToVectorDim;
-};
-
-} // end anonymous namespace
-
-static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern,
-                                      unsigned patternDepth,
-                                      VectorizationStrategy *strategy) {
-  assert(patternDepth > depthInPattern &&
-         "patternDepth is greater than depthInPattern");
-  if (patternDepth - depthInPattern > strategy->vectorSizes.size()) {
-    // Don't vectorize this loop
-    return;
-  }
-  strategy->loopToVectorDim[loop] =
-      strategy->vectorSizes.size() - (patternDepth - depthInPattern);
-}
-
-/// Implements a simple strawman strategy for vectorization.
-/// Given a matched pattern `matches` of depth `patternDepth`, this strategy
-/// greedily assigns the fastest varying dimension ** of the vector ** to the
-/// innermost loop in the pattern.
-/// When coupled with a pattern that looks for the fastest varying dimension in
-/// load/store MemRefs, this creates a generic vectorization strategy that works
-/// for any loop in a hierarchy (outermost, innermost or intermediate).
-///
-/// TODO(ntv): In the future we should additionally increase the power of the
-/// profitability analysis along 3 directions:
-///   1. account for loop extents (both static and parametric + annotations);
-///   2. account for data layout permutations;
-///   3. account for impact of vectorization on maximal loop fusion.
-/// Then we can quantify the above to build a cost model and search over
-/// strategies.
-static LogicalResult analyzeProfitability(ArrayRef<NestedMatch> matches,
-                                          unsigned depthInPattern,
-                                          unsigned patternDepth,
-                                          VectorizationStrategy *strategy) {
-  for (auto m : matches) {
-    if (failed(analyzeProfitability(m.getMatchedChildren(), depthInPattern + 1,
-                                    patternDepth, strategy))) {
-      return failure();
-    }
-    vectorizeLoopIfProfitable(m.getMatchedOperation(), depthInPattern,
-                              patternDepth, strategy);
-  }
-  return success();
-}
-
-///// end TODO(ntv): Hoist to a VectorizationStrategy.cpp when appropriate /////
-
-namespace {
-
-struct VectorizationState {
-  /// Adds an entry of pre/post vectorization operations in the state.
-  void registerReplacement(Operation *key, Operation *value);
-  /// When the current vectorization pattern is successful, this erases the
-  /// operations that were marked for erasure in the proper order and resets
-  /// the internal state for the next pattern.
-  void finishVectorizationPattern();
-
-  // In-order tracking of original Operation that have been vectorized.
-  // Erase in reverse order.
-  SmallVector<Operation *, 16> toErase;
-  // Set of Operation that have been vectorized (the values in the
-  // vectorizationMap for hashed access). The vectorizedSet is used in
-  // particular to filter the operations that have already been vectorized by
-  // this pattern, when iterating over nested loops in this pattern.
-  DenseSet<Operation *> vectorizedSet;
-  // Map of old scalar Operation to new vectorized Operation.
-  DenseMap<Operation *, Operation *> vectorizationMap;
-  // Map of old scalar Value to new vectorized Value.
-  DenseMap<Value *, Value *> replacementMap;
-  // The strategy drives which loop to vectorize by which amount.
-  const VectorizationStrategy *strategy;
-  // Use-def roots. These represent the starting points for the worklist in the
-  // vectorizeNonTerminals function. They consist of the subset of load
-  // operations that have been vectorized. They can be retrieved from
-  // `vectorizationMap` but it is convenient to keep track of them in a separate
-  // data structure.
-  DenseSet<Operation *> roots;
-  // Terminal operations for the worklist in the vectorizeNonTerminals
-  // function. They consist of the subset of store operations that have been
-  // vectorized. They can be retrieved from `vectorizationMap` but it is
-  // convenient to keep track of them in a separate data structure. Since they
-  // do not necessarily belong to use-def chains starting from loads (e.g
-  // storing a constant), we need to handle them in a post-pass.
-  DenseSet<Operation *> terminals;
-  // Checks that the type of `op` is AffineStoreOp and adds it to the terminals
-  // set.
-  void registerTerminal(Operation *op);
-  // Folder used to factor out constant creation.
-  OperationFolder *folder;
-
-private:
-  void registerReplacement(Value *key, Value *value);
-};
-
-} // end namespace
-
-void VectorizationState::registerReplacement(Operation *key, Operation *value) {
-  LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ commit vectorized op: ");
-  LLVM_DEBUG(key->print(dbgs()));
-  LLVM_DEBUG(dbgs() << "  into  ");
-  LLVM_DEBUG(value->print(dbgs()));
-  assert(key->getNumResults() == 1 && "already registered");
-  assert(value->getNumResults() == 1 && "already registered");
-  assert(vectorizedSet.count(value) == 0 && "already registered");
-  assert(vectorizationMap.count(key) == 0 && "already registered");
-  toErase.push_back(key);
-  vectorizedSet.insert(value);
-  vectorizationMap.insert(std::make_pair(key, value));
-  registerReplacement(key->getResult(0), value->getResult(0));
-  if (isa<AffineLoadOp>(key)) {
-    assert(roots.count(key) == 0 && "root was already inserted previously");
-    roots.insert(key);
-  }
-}
-
-void VectorizationState::registerTerminal(Operation *op) {
-  assert(isa<AffineStoreOp>(op) && "terminal must be a AffineStoreOp");
-  assert(terminals.count(op) == 0 &&
-         "terminal was already inserted previously");
-  terminals.insert(op);
-}
-
-void VectorizationState::finishVectorizationPattern() {
-  while (!toErase.empty()) {
-    auto *op = toErase.pop_back_val();
-    LLVM_DEBUG(dbgs() << "\n[early-vect] finishVectorizationPattern erase: ");
-    LLVM_DEBUG(op->print(dbgs()));
-    op->erase();
-  }
-}
-
-void VectorizationState::registerReplacement(Value *key, Value *value) {
-  assert(replacementMap.count(key) == 0 && "replacement already registered");
-  replacementMap.insert(std::make_pair(key, value));
-}
-
-// Apply 'map' with 'mapOperands' returning resulting values in 'results'.
-static void computeMemoryOpIndices(Operation *op, AffineMap map,
-                                   ValueRange mapOperands,
-                                   SmallVectorImpl<Value *> &results) {
-  OpBuilder builder(op);
-  for (auto resultExpr : map.getResults()) {
-    auto singleResMap =
-        AffineMap::get(map.getNumDims(), map.getNumSymbols(), resultExpr);
-    auto afOp =
-        builder.create<AffineApplyOp>(op->getLoc(), singleResMap, mapOperands);
-    results.push_back(afOp);
-  }
-}
-
-////// TODO(ntv): Hoist to a VectorizationMaterialize.cpp when appropriate. ////
-
-/// Handles the vectorization of load and store MLIR operations.
-///
-/// AffineLoadOp operations are the roots of the vectorizeNonTerminals call.
-/// They are vectorized immediately. The resulting vector.transfer_read is
-/// immediately registered to replace all uses of the AffineLoadOp in this
-/// pattern's scope.
-///
-/// AffineStoreOp are the terminals of the vectorizeNonTerminals call. They
-/// need to be vectorized late once all the use-def chains have been traversed.
-/// Additionally, they may have ssa-values operands which come from outside the
-/// scope of the current pattern.
-/// Such special cases force us to delay the vectorization of the stores until
-/// the last step. Here we merely register the store operation.
-template <typename LoadOrStoreOpPointer>
-static LogicalResult vectorizeRootOrTerminal(Value *iv,
-                                             LoadOrStoreOpPointer memoryOp,
-                                             VectorizationState *state) {
-  auto memRefType = memoryOp.getMemRef()->getType().template cast<MemRefType>();
-
-  auto elementType = memRefType.getElementType();
-  // TODO(ntv): ponder whether we want to further vectorize a vector value.
-  assert(VectorType::isValidElementType(elementType) &&
-         "Not a valid vector element type");
-  auto vectorType = VectorType::get(state->strategy->vectorSizes, elementType);
-
-  // Materialize a MemRef with 1 vector.
-  auto *opInst = memoryOp.getOperation();
-  // For now, vector.transfers must be aligned, operate only on indices with an
-  // identity subset of AffineMap and do not change layout.
-  // TODO(ntv): increase the expressiveness power of vector.transfer operations
-  // as needed by various targets.
-  if (auto load = dyn_cast<AffineLoadOp>(opInst)) {
-    OpBuilder b(opInst);
-    ValueRange mapOperands = load.getMapOperands();
-    SmallVector<Value *, 8> indices;
-    indices.reserve(load.getMemRefType().getRank());
-    if (load.getAffineMap() !=
-        b.getMultiDimIdentityMap(load.getMemRefType().getRank())) {
-      computeMemoryOpIndices(opInst, load.getAffineMap(), mapOperands, indices);
-    } else {
-      indices.append(mapOperands.begin(), mapOperands.end());
-    }
-    auto permutationMap =
-        makePermutationMap(opInst, indices, state->strategy->loopToVectorDim);
-    if (!permutationMap)
-      return LogicalResult::Failure;
-    LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ permutationMap: ");
-    LLVM_DEBUG(permutationMap.print(dbgs()));
-    auto transfer = b.create<vector::TransferReadOp>(
-        opInst->getLoc(), vectorType, memoryOp.getMemRef(),
-        map(makePtrDynCaster<Value>(), indices),
-        AffineMapAttr::get(permutationMap),
-        // TODO(b/144455320) add a proper padding value, not just 0.0 : f32
-        state->folder->create<ConstantFloatOp>(
-            b, opInst->getLoc(), llvm::APFloat(0.0f), b.getF32Type()));
-    state->registerReplacement(opInst, transfer.getOperation());
-  } else {
-    state->registerTerminal(opInst);
-  }
-  return success();
-}
-/// end TODO(ntv): Hoist to a VectorizationMaterialize.cpp when appropriate. ///
-
-/// Coarsens the loops bounds and transforms all remaining load and store
-/// operations into the appropriate vector.transfer.
-static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step,
-                                          VectorizationState *state) {
-  using namespace functional;
-  loop.setStep(step);
-
-  FilterFunctionType notVectorizedThisPattern = [state](Operation &op) {
-    if (!matcher::isLoadOrStore(op)) {
-      return false;
-    }
-    return state->vectorizationMap.count(&op) == 0 &&
-           state->vectorizedSet.count(&op) == 0 &&
-           state->roots.count(&op) == 0 && state->terminals.count(&op) == 0;
-  };
-  auto loadAndStores = matcher::Op(notVectorizedThisPattern);
-  SmallVector<NestedMatch, 8> loadAndStoresMatches;
-  loadAndStores.match(loop.getOperation(), &loadAndStoresMatches);
-  for (auto ls : loadAndStoresMatches) {
-    auto *opInst = ls.getMatchedOperation();
-    auto load = dyn_cast<AffineLoadOp>(opInst);
-    auto store = dyn_cast<AffineStoreOp>(opInst);
-    LLVM_DEBUG(opInst->print(dbgs()));
-    LogicalResult result =
-        load ? vectorizeRootOrTerminal(loop.getInductionVar(), load, state)
-             : vectorizeRootOrTerminal(loop.getInductionVar(), store, state);
-    if (failed(result)) {
-      return failure();
-    }
-  }
-  return success();
-}
-
-/// Returns a FilterFunctionType that can be used in NestedPattern to match a
-/// loop whose underlying load/store accesses are either invariant or all
-// varying along the `fastestVaryingMemRefDimension`.
-static FilterFunctionType
-isVectorizableLoopPtrFactory(const llvm::DenseSet<Operation *> &parallelLoops,
-                             int fastestVaryingMemRefDimension) {
-  return [&parallelLoops, fastestVaryingMemRefDimension](Operation &forOp) {
-    auto loop = cast<AffineForOp>(forOp);
-    auto parallelIt = parallelLoops.find(loop);
-    if (parallelIt == parallelLoops.end())
-      return false;
-    int memRefDim = -1;
-    auto vectorizableBody =
-        isVectorizableLoopBody(loop, &memRefDim, vectorTransferPattern());
-    if (!vectorizableBody)
-      return false;
-    return memRefDim == -1 || fastestVaryingMemRefDimension == -1 ||
-           memRefDim == fastestVaryingMemRefDimension;
-  };
-}
-
-/// Apply vectorization of `loop` according to `state`. This is only triggered
-/// if all vectorizations in `childrenMatches` have already succeeded
-/// recursively in DFS post-order.
-static LogicalResult
-vectorizeLoopsAndLoadsRecursively(NestedMatch oneMatch,
-                                  VectorizationState *state) {
-  auto *loopInst = oneMatch.getMatchedOperation();
-  auto loop = cast<AffineForOp>(loopInst);
-  auto childrenMatches = oneMatch.getMatchedChildren();
-
-  // 1. DFS postorder recursion, if any of my children fails, I fail too.
-  for (auto m : childrenMatches) {
-    if (failed(vectorizeLoopsAndLoadsRecursively(m, state))) {
-      return failure();
-    }
-  }
-
-  // 2. This loop may have been omitted from vectorization for various reasons
-  // (e.g. due to the performance model or pattern depth > vector size).
-  auto it = state->strategy->loopToVectorDim.find(loopInst);
-  if (it == state->strategy->loopToVectorDim.end()) {
-    return success();
-  }
-
-  // 3. Actual post-order transformation.
-  auto vectorDim = it->second;
-  assert(vectorDim < state->strategy->vectorSizes.size() &&
-         "vector dim overflow");
-  //   a. get actual vector size
-  auto vectorSize = state->strategy->vectorSizes[vectorDim];
-  //   b. loop transformation for early vectorization is still subject to
-  //     exploratory tradeoffs (see top of the file). Apply coarsening, i.e.:
-  //        | ub -> ub
-  //        | step -> step * vectorSize
-  LLVM_DEBUG(dbgs() << "\n[early-vect] vectorizeForOp by " << vectorSize
-                    << " : ");
-  LLVM_DEBUG(loopInst->print(dbgs()));
-  return vectorizeAffineForOp(loop, loop.getStep() * vectorSize, state);
-}
-
-/// Tries to transform a scalar constant into a vector splat of that constant.
-/// Returns the vectorized splat operation if the constant is a valid vector
-/// element type.
-/// If `type` is not a valid vector type or if the scalar constant is not a
-/// valid vector element type, returns nullptr.
-static Value *vectorizeConstant(Operation *op, ConstantOp constant, Type type) {
-  if (!type || !type.isa<VectorType>() ||
-      !VectorType::isValidElementType(constant.getType())) {
-    return nullptr;
-  }
-  OpBuilder b(op);
-  Location loc = op->getLoc();
-  auto vectorType = type.cast<VectorType>();
-  auto attr = DenseElementsAttr::get(vectorType, constant.getValue());
-  auto *constantOpInst = constant.getOperation();
-
-  OperationState state(loc, constantOpInst->getName().getStringRef(), {},
-                       {vectorType}, {b.getNamedAttr("value", attr)});
-
-  return b.createOperation(state)->getResult(0);
-}
-
-/// Tries to vectorize a given operand `op` of Operation `op` during
-/// def-chain propagation or during terminal vectorization, by applying the
-/// following logic:
-/// 1. if the defining operation is part of the vectorizedSet (i.e. vectorized
-///    useby -def propagation), `op` is already in the proper vector form;
-/// 2. otherwise, the `op` may be in some other vector form that fails to
-///    vectorize atm (i.e. broadcasting required), returns nullptr to indicate
-///    failure;
-/// 3. if the `op` is a constant, returns the vectorized form of the constant;
-/// 4. non-constant scalars are currently non-vectorizable, in particular to
-///    guard against vectorizing an index which may be loop-variant and needs
-///    special handling.
-///
-/// In particular this logic captures some of the use cases where definitions
-/// that are not scoped under the current pattern are needed to vectorize.
-/// One such example is top level function constants that need to be splatted.
-///
-/// Returns an operand that has been vectorized to match `state`'s strategy if
-/// vectorization is possible with the above logic. Returns nullptr otherwise.
-///
-/// TODO(ntv): handle more complex cases.
-static Value *vectorizeOperand(Value *operand, Operation *op,
-                               VectorizationState *state) {
-  LLVM_DEBUG(dbgs() << "\n[early-vect]vectorize operand: ");
-  LLVM_DEBUG(operand->print(dbgs()));
-  // 1. If this value has already been vectorized this round, we are done.
-  if (state->vectorizedSet.count(operand->getDefiningOp()) > 0) {
-    LLVM_DEBUG(dbgs() << " -> already vector operand");
-    return operand;
-  }
-  // 1.b. Delayed on-demand replacement of a use.
-  //    Note that we cannot just call replaceAllUsesWith because it may result
-  //    in ops with mixed types, for ops whose operands have not all yet
-  //    been vectorized. This would be invalid IR.
-  auto it = state->replacementMap.find(operand);
-  if (it != state->replacementMap.end()) {
-    auto *res = it->second;
-    LLVM_DEBUG(dbgs() << "-> delayed replacement by: ");
-    LLVM_DEBUG(res->print(dbgs()));
-    return res;
-  }
-  // 2. TODO(ntv): broadcast needed.
-  if (operand->getType().isa<VectorType>()) {
-    LLVM_DEBUG(dbgs() << "-> non-vectorizable");
-    return nullptr;
-  }
-  // 3. vectorize constant.
-  if (auto constant = dyn_cast<ConstantOp>(operand->getDefiningOp())) {
-    return vectorizeConstant(
-        op, constant,
-        VectorType::get(state->strategy->vectorSizes, operand->getType()));
-  }
-  // 4. currently non-vectorizable.
-  LLVM_DEBUG(dbgs() << "-> non-vectorizable");
-  LLVM_DEBUG(operand->print(dbgs()));
-  return nullptr;
-}
-
-/// Encodes Operation-specific behavior for vectorization. In general we assume
-/// that all operands of an op must be vectorized but this is not always true.
-/// In the future, it would be nice to have a trait that describes how a
-/// particular operation vectorizes. For now we implement the case distinction
-/// here.
-/// Returns a vectorized form of an operation or nullptr if vectorization fails.
-// TODO(ntv): consider adding a trait to Op to describe how it gets vectorized.
-// Maybe some Ops are not vectorizable or require some tricky logic, we cannot
-// do one-off logic here; ideally it would be TableGen'd.
-static Operation *vectorizeOneOperation(Operation *opInst,
-                                        VectorizationState *state) {
-  // Sanity checks.
-  assert(!isa<AffineLoadOp>(opInst) &&
-         "all loads must have already been fully vectorized independently");
-  assert(!isa<vector::TransferReadOp>(opInst) &&
-         "vector.transfer_read cannot be further vectorized");
-  assert(!isa<vector::TransferWriteOp>(opInst) &&
-         "vector.transfer_write cannot be further vectorized");
-
-  if (auto store = dyn_cast<AffineStoreOp>(opInst)) {
-    OpBuilder b(opInst);
-    auto *memRef = store.getMemRef();
-    auto *value = store.getValueToStore();
-    auto *vectorValue = vectorizeOperand(value, opInst, state);
-
-    ValueRange mapOperands = store.getMapOperands();
-    SmallVector<Value *, 8> indices;
-    indices.reserve(store.getMemRefType().getRank());
-    if (store.getAffineMap() !=
-        b.getMultiDimIdentityMap(store.getMemRefType().getRank())) {
-      computeMemoryOpIndices(opInst, store.getAffineMap(), mapOperands,
-                             indices);
-    } else {
-      indices.append(mapOperands.begin(), mapOperands.end());
-    }
-
-    auto permutationMap =
-        makePermutationMap(opInst, indices, state->strategy->loopToVectorDim);
-    if (!permutationMap)
-      return nullptr;
-    LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ permutationMap: ");
-    LLVM_DEBUG(permutationMap.print(dbgs()));
-    auto transfer = b.create<vector::TransferWriteOp>(
-        opInst->getLoc(), vectorValue, memRef, indices,
-        AffineMapAttr::get(permutationMap));
-    auto *res = transfer.getOperation();
-    LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ vectorized store: " << *res);
-    // "Terminals" (i.e. AffineStoreOps) are erased on the spot.
-    opInst->erase();
-    return res;
-  }
-  if (opInst->getNumRegions() != 0)
-    return nullptr;
-
-  SmallVector<Type, 8> vectorTypes;
-  for (auto *v : opInst->getResults()) {
-    vectorTypes.push_back(
-        VectorType::get(state->strategy->vectorSizes, v->getType()));
-  }
-  SmallVector<Value *, 8> vectorOperands;
-  for (auto *v : opInst->getOperands()) {
-    vectorOperands.push_back(vectorizeOperand(v, opInst, state));
-  }
-  // Check whether a single operand is null. If so, vectorization failed.
-  bool success = llvm::all_of(vectorOperands, [](Value *op) { return op; });
-  if (!success) {
-    LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ an operand failed vectorize");
-    return nullptr;
-  }
-
-  // Create a clone of the op with the proper operands and return types.
-  // TODO(ntv): The following assumes there is always an op with a fixed
-  // name that works both in scalar mode and vector mode.
-  // TODO(ntv): Is it worth considering an Operation.clone operation which
-  // changes the type so we can promote an Operation with less boilerplate?
-  OpBuilder b(opInst);
-  OperationState newOp(opInst->getLoc(), opInst->getName().getStringRef(),
-                       vectorOperands, vectorTypes, opInst->getAttrs(),
-                       /*successors=*/{},
-                       /*regions=*/{}, opInst->hasResizableOperandsList());
-  return b.createOperation(newOp);
-}
-
-/// Iterates over the forward slice from the loads in the vectorization pattern
-/// and rewrites them using their vectorized counterpart by:
-///   1. Create the forward slice starting from the loads in the vectorization
-///   pattern.
-///   2. Topologically sorts the forward slice.
-///   3. For each operation in the slice, create the vector form of this
-///   operation, replacing each operand by a replacement operands retrieved from
-///   replacementMap. If any such replacement is missing, vectorization fails.
-static LogicalResult vectorizeNonTerminals(VectorizationState *state) {
-  // 1. create initial worklist with the uses of the roots.
-  SetVector<Operation *> worklist;
-  // Note: state->roots have already been vectorized and must not be vectorized
-  // again. This fits `getForwardSlice` which does not insert `op` in the
-  // result.
-  // Note: we have to exclude terminals because some of their defs may not be
-  // nested under the vectorization pattern (e.g. constants defined in an
-  // encompassing scope).
-  // TODO(ntv): Use a backward slice for terminals, avoid special casing and
-  // merge implementations.
-  for (auto *op : state->roots) {
-    getForwardSlice(op, &worklist, [state](Operation *op) {
-      return state->terminals.count(op) == 0; // propagate if not terminal
-    });
-  }
-  // We merged multiple slices, topological order may not hold anymore.
-  worklist = topologicalSort(worklist);
-
-  for (unsigned i = 0; i < worklist.size(); ++i) {
-    auto *op = worklist[i];
-    LLVM_DEBUG(dbgs() << "\n[early-vect] vectorize use: ");
-    LLVM_DEBUG(op->print(dbgs()));
-
-    // Create vector form of the operation.
-    // Insert it just before op, on success register op as replaced.
-    auto *vectorizedInst = vectorizeOneOperation(op, state);
-    if (!vectorizedInst) {
-      return failure();
-    }
-
-    // 3. Register replacement for future uses in the scope.
-    //    Note that we cannot just call replaceAllUsesWith because it may
-    //    result in ops with mixed types, for ops whose operands have not all
-    //    yet been vectorized. This would be invalid IR.
-    state->registerReplacement(op, vectorizedInst);
-  }
-  return success();
-}
-
-/// Vectorization is a recursive procedure where anything below can fail.
-/// The root match thus needs to maintain a clone for handling failure.
-/// Each root may succeed independently but will otherwise clean after itself if
-/// anything below it fails.
-static LogicalResult vectorizeRootMatch(NestedMatch m,
-                                        VectorizationStrategy *strategy) {
-  auto loop = cast<AffineForOp>(m.getMatchedOperation());
-  OperationFolder folder(loop.getContext());
-  VectorizationState state;
-  state.strategy = strategy;
-  state.folder = &folder;
-
-  // Since patterns are recursive, they can very well intersect.
-  // Since we do not want a fully greedy strategy in general, we decouple
-  // pattern matching, from profitability analysis, from application.
-  // As a consequence we must check that each root pattern is still
-  // vectorizable. If a pattern is not vectorizable anymore, we just skip it.
-  // TODO(ntv): implement a non-greedy profitability analysis that keeps only
-  // non-intersecting patterns.
-  if (!isVectorizableLoopBody(loop, vectorTransferPattern())) {
-    LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ loop is not vectorizable");
-    return failure();
-  }
-
-  /// Sets up error handling for this root loop. This is how the root match
-  /// maintains a clone for handling failure and restores the proper state via
-  /// RAII.
-  auto *loopInst = loop.getOperation();
-  OpBuilder builder(loopInst);
-  auto clonedLoop = cast<AffineForOp>(builder.clone(*loopInst));
-  struct Guard {
-    LogicalResult failure() {
-      loop.getInductionVar()->replaceAllUsesWith(clonedLoop.getInductionVar());
-      loop.erase();
-      return mlir::failure();
-    }
-    LogicalResult success() {
-      clonedLoop.erase();
-      return mlir::success();
-    }
-    AffineForOp loop;
-    AffineForOp clonedLoop;
-  } guard{loop, clonedLoop};
-
-  //////////////////////////////////////////////////////////////////////////////
-  // Start vectorizing.
-  // From now on, any error triggers the scope guard above.
-  //////////////////////////////////////////////////////////////////////////////
-  // 1. Vectorize all the loops matched by the pattern, recursively.
-  // This also vectorizes the roots (AffineLoadOp) as well as registers the
-  // terminals (AffineStoreOp) for post-processing vectorization (we need to
-  // wait for all use-def chains into them to be vectorized first).
-  if (failed(vectorizeLoopsAndLoadsRecursively(m, &state))) {
-    LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed root vectorizeLoop");
-    return guard.failure();
-  }
-
-  // 2. Vectorize operations reached by use-def chains from root except the
-  // terminals (store operations) that need to be post-processed separately.
-  // TODO(ntv): add more as we expand.
-  if (failed(vectorizeNonTerminals(&state))) {
-    LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed vectorizeNonTerminals");
-    return guard.failure();
-  }
-
-  // 3. Post-process terminals.
-  // Note: we have to post-process terminals because some of their defs may not
-  // be nested under the vectorization pattern (e.g. constants defined in an
-  // encompassing scope).
-  // TODO(ntv): Use a backward slice for terminals, avoid special casing and
-  // merge implementations.
-  for (auto *op : state.terminals) {
-    if (!vectorizeOneOperation(op, &state)) { // nullptr == failure
-      LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed to vectorize terminals");
-      return guard.failure();
-    }
-  }
-
-  // 4. Finish this vectorization pattern.
-  LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ success vectorizing pattern");
-  state.finishVectorizationPattern();
-  return guard.success();
-}
-
-/// Applies vectorization to the current Function by searching over a bunch of
-/// predetermined patterns.
-void Vectorize::runOnFunction() {
-  FuncOp f = getFunction();
-  if (!fastestVaryingPattern.empty() &&
-      fastestVaryingPattern.size() != vectorSizes.size()) {
-    f.emitRemark("Fastest varying pattern specified with different size than "
-                 "the vector size.");
-    return signalPassFailure();
-  }
-
-  // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
-  NestedPatternContext mlContext;
-
-  llvm::DenseSet<Operation *> parallelLoops;
-  f.walk([&parallelLoops](AffineForOp loop) {
-    if (isLoopParallel(loop))
-      parallelLoops.insert(loop);
-  });
-
-  for (auto &pat :
-       makePatterns(parallelLoops, vectorSizes.size(), fastestVaryingPattern)) {
-    LLVM_DEBUG(dbgs() << "\n******************************************");
-    LLVM_DEBUG(dbgs() << "\n******************************************");
-    LLVM_DEBUG(dbgs() << "\n[early-vect] new pattern on Function\n");
-    LLVM_DEBUG(f.print(dbgs()));
-    unsigned patternDepth = pat.getDepth();
-
-    SmallVector<NestedMatch, 8> matches;
-    pat.match(f, &matches);
-    // Iterate over all the top-level matches and vectorize eagerly.
-    // This automatically prunes intersecting matches.
-    for (auto m : matches) {
-      VectorizationStrategy strategy;
-      // TODO(ntv): depending on profitability, elect to reduce the vector size.
-      strategy.vectorSizes.assign(vectorSizes.begin(), vectorSizes.end());
-      if (failed(analyzeProfitability(m.getMatchedChildren(), 1, patternDepth,
-                                      &strategy))) {
-        continue;
-      }
-      vectorizeLoopIfProfitable(m.getMatchedOperation(), 0, patternDepth,
-                                &strategy);
-      // TODO(ntv): if pattern does not apply, report it; alter the
-      // cost/benefit.
-      vectorizeRootMatch(m, &strategy);
-      // TODO(ntv): some diagnostics if failure to vectorize occurs.
-    }
-  }
-  LLVM_DEBUG(dbgs() << "\n");
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createVectorizePass(llvm::ArrayRef<int64_t> virtualVectorSize) {
-  return std::make_unique<Vectorize>(virtualVectorSize);
-}
-
-static PassRegistration<Vectorize>
-    pass("affine-vectorize",
-         "Vectorize to a target independent n-D vector abstraction");
diff --git a/third_party/mlir/lib/Transforms/ViewOpGraph.cpp b/third_party/mlir/lib/Transforms/ViewOpGraph.cpp
deleted file mode 100644
index 503a82bf82b..00000000000
--- a/third_party/mlir/lib/Transforms/ViewOpGraph.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-//===- ViewOpGraph.cpp - View/write op graphviz graphs --------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Transforms/ViewOpGraph.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
-
-static llvm::cl::opt<int> elideIfLarger(
-    "print-op-graph-elide-if-larger",
-    llvm::cl::desc("Upper limit to emit elements attribute rather than elide"),
-    llvm::cl::init(16));
-
-namespace llvm {
-
-// Specialize GraphTraits to treat Block as a graph of Operations as nodes and
-// uses as edges.
-template <> struct GraphTraits<mlir::Block *> {
-  using GraphType = mlir::Block *;
-  using NodeRef = mlir::Operation *;
-
-  using ChildIteratorType = mlir::UseIterator;
-  static ChildIteratorType child_begin(NodeRef n) {
-    return ChildIteratorType(n);
-  }
-  static ChildIteratorType child_end(NodeRef n) {
-    return ChildIteratorType(n, /*end=*/true);
-  }
-
-  // Operation's destructor is private so use Operation* instead and use
-  // mapped iterator.
-  static mlir::Operation *AddressOf(mlir::Operation &op) { return &op; }
-  using nodes_iterator =
-      mapped_iterator<mlir::Block::iterator, decltype(&AddressOf)>;
-  static nodes_iterator nodes_begin(mlir::Block *b) {
-    return nodes_iterator(b->begin(), &AddressOf);
-  }
-  static nodes_iterator nodes_end(mlir::Block *b) {
-    return nodes_iterator(b->end(), &AddressOf);
-  }
-};
-
-// Specialize DOTGraphTraits to produce more readable output.
-template <>
-struct DOTGraphTraits<mlir::Block *> : public DefaultDOTGraphTraits {
-  using DefaultDOTGraphTraits::DefaultDOTGraphTraits;
-  static std::string getNodeLabel(mlir::Operation *op, mlir::Block *);
-};
-
-std::string DOTGraphTraits<mlir::Block *>::getNodeLabel(mlir::Operation *op,
-                                                        mlir::Block *b) {
-  // Reuse the print output for the node labels.
-  std::string ostr;
-  raw_string_ostream os(ostr);
-  os << op->getName() << "\n";
-
-  if (!op->getLoc().isa<mlir::UnknownLoc>()) {
-    os << op->getLoc() << "\n";
-  }
-
-  // Print resultant types
-  mlir::interleaveComma(op->getResultTypes(), os);
-  os << "\n";
-
-  for (auto attr : op->getAttrs()) {
-    os << '\n' << attr.first << ": ";
-    // Always emit splat attributes.
-    if (attr.second.isa<mlir::SplatElementsAttr>()) {
-      attr.second.print(os);
-      continue;
-    }
-
-    // Elide "big" elements attributes.
-    auto elements = attr.second.dyn_cast<mlir::ElementsAttr>();
-    if (elements && elements.getNumElements() > elideIfLarger) {
-      os << std::string(elements.getType().getRank(), '[') << "..."
-         << std::string(elements.getType().getRank(), ']') << " : "
-         << elements.getType();
-      continue;
-    }
-
-    auto array = attr.second.dyn_cast<mlir::ArrayAttr>();
-    if (array && static_cast<int64_t>(array.size()) > elideIfLarger) {
-      os << "[...]";
-      continue;
-    }
-
-    // Print all other attributes.
-    attr.second.print(os);
-  }
-  return os.str();
-}
-
-} // end namespace llvm
-
-namespace {
-// PrintOpPass is simple pass to write graph per function.
-// Note: this is a module pass only to avoid interleaving on the same ostream
-// due to multi-threading over functions.
-struct PrintOpPass : public mlir::ModulePass<PrintOpPass> {
-  explicit PrintOpPass(llvm::raw_ostream &os = llvm::errs(),
-                       bool short_names = false, const llvm::Twine &title = "")
-      : os(os), title(title.str()), short_names(short_names) {}
-
-  std::string getOpName(mlir::Operation &op) {
-    auto symbolAttr = op.getAttrOfType<mlir::StringAttr>(
-        mlir::SymbolTable::getSymbolAttrName());
-    if (symbolAttr)
-      return symbolAttr.getValue();
-    ++unnamedOpCtr;
-    return (op.getName().getStringRef() + llvm::utostr(unnamedOpCtr)).str();
-  }
-
-  // Print all the ops in a module.
-  void processModule(mlir::ModuleOp module) {
-    for (mlir::Operation &op : module) {
-      // Modules may actually be nested, recurse on nesting.
-      if (auto nestedModule = llvm::dyn_cast<mlir::ModuleOp>(op)) {
-        processModule(nestedModule);
-        continue;
-      }
-      auto opName = getOpName(op);
-      for (mlir::Region &region : op.getRegions()) {
-        for (auto indexed_block : llvm::enumerate(region)) {
-          // Suffix block number if there are more than 1 block.
-          auto blockName = region.getBlocks().size() == 1
-                               ? ""
-                               : ("__" + llvm::utostr(indexed_block.index()));
-          llvm::WriteGraph(os, &indexed_block.value(), short_names,
-                           llvm::Twine(title) + opName + blockName);
-        }
-      }
-    }
-  }
-
-  void runOnModule() override { processModule(getModule()); }
-
-private:
-  llvm::raw_ostream &os;
-  std::string title;
-  int unnamedOpCtr = 0;
-  bool short_names;
-};
-} // namespace
-
-void mlir::viewGraph(mlir::Block &block, const llvm::Twine &name,
-                     bool shortNames, const llvm::Twine &title,
-                     llvm::GraphProgram::Name program) {
-  llvm::ViewGraph(&block, name, shortNames, title, program);
-}
-
-llvm::raw_ostream &mlir::writeGraph(llvm::raw_ostream &os, mlir::Block &block,
-                                    bool shortNames, const llvm::Twine &title) {
-  return llvm::WriteGraph(os, &block, shortNames, title);
-}
-
-std::unique_ptr<mlir::OpPassBase<mlir::ModuleOp>>
-mlir::createPrintOpGraphPass(llvm::raw_ostream &os, bool shortNames,
-                             const llvm::Twine &title) {
-  return std::make_unique<PrintOpPass>(os, shortNames, title);
-}
-
-static mlir::PassRegistration<PrintOpPass> pass("print-op-graph",
-                                                "Print op graph per region");
diff --git a/third_party/mlir/lib/Transforms/ViewRegionGraph.cpp b/third_party/mlir/lib/Transforms/ViewRegionGraph.cpp
deleted file mode 100644
index 57c2f31e6a4..00000000000
--- a/third_party/mlir/lib/Transforms/ViewRegionGraph.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-//===- ViewRegionGraph.cpp - View/write graphviz graphs -------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Transforms/ViewRegionGraph.h"
-#include "mlir/IR/RegionGraphTraits.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace llvm {
-
-// Specialize DOTGraphTraits to produce more readable output.
-template <> struct DOTGraphTraits<Region *> : public DefaultDOTGraphTraits {
-  using DefaultDOTGraphTraits::DefaultDOTGraphTraits;
-
-  static std::string getNodeLabel(Block *Block, Region *);
-};
-
-std::string DOTGraphTraits<Region *>::getNodeLabel(Block *Block, Region *) {
-  // Reuse the print output for the node labels.
-  std::string outStreamStr;
-  raw_string_ostream os(outStreamStr);
-  Block->print(os);
-  std::string &outStr = os.str();
-
-  if (outStr[0] == '\n')
-    outStr.erase(outStr.begin());
-
-  // Process string output to left justify the block.
-  for (unsigned i = 0; i != outStr.length(); ++i) {
-    if (outStr[i] == '\n') {
-      outStr[i] = '\\';
-      outStr.insert(outStr.begin() + i + 1, 'l');
-    }
-  }
-
-  return outStr;
-}
-
-} // end namespace llvm
-
-void mlir::viewGraph(Region &region, const llvm::Twine &name, bool shortNames,
-                     const llvm::Twine &title,
-                     llvm::GraphProgram::Name program) {
-  llvm::ViewGraph(&region, name, shortNames, title, program);
-}
-
-llvm::raw_ostream &mlir::writeGraph(llvm::raw_ostream &os, Region &region,
-                                    bool shortNames, const llvm::Twine &title) {
-  return llvm::WriteGraph(os, &region, shortNames, title);
-}
-
-void mlir::Region::viewGraph(const llvm::Twine &regionName) {
-  ::mlir::viewGraph(*this, regionName);
-}
-void mlir::Region::viewGraph() { viewGraph("region"); }
-
-namespace {
-struct PrintCFGPass : public FunctionPass<PrintCFGPass> {
-  PrintCFGPass(llvm::raw_ostream &os = llvm::errs(), bool shortNames = false,
-               const llvm::Twine &title = "")
-      : os(os), shortNames(shortNames), title(title.str()) {}
-  void runOnFunction() override {
-    mlir::writeGraph(os, getFunction().getBody(), shortNames, title);
-  }
-
-private:
-  llvm::raw_ostream &os;
-  bool shortNames;
-  std::string title;
-};
-} // namespace
-
-std::unique_ptr<mlir::OpPassBase<mlir::FuncOp>>
-mlir::createPrintCFGGraphPass(llvm::raw_ostream &os, bool shortNames,
-                              const llvm::Twine &title) {
-  return std::make_unique<PrintCFGPass>(os, shortNames, title);
-}
-
-static PassRegistration<PrintCFGPass> pass("print-cfg-graph",
-                                           "Print CFG graph per Function");
diff --git a/third_party/mlir/lib/Translation/CMakeLists.txt b/third_party/mlir/lib/Translation/CMakeLists.txt
deleted file mode 100644
index 122db2e6a31..00000000000
--- a/third_party/mlir/lib/Translation/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_llvm_library(MLIRTranslation
-  Translation.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Translation
-  )
-target_link_libraries(MLIRTranslation LLVMSupport)
diff --git a/third_party/mlir/lib/Translation/Translation.cpp b/third_party/mlir/lib/Translation/Translation.cpp
deleted file mode 100644
index 8b5f98714e4..00000000000
--- a/third_party/mlir/lib/Translation/Translation.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-//===- Translation.cpp - Translation registry -----------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Definitions of the translation registry.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Translation.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-#include "llvm/Support/SourceMgr.h"
-
-using namespace mlir;
-
-// Get the mutable static map between registered "to MLIR" translations and the
-// TranslateToMLIRFunctions that perform those translations.
-static llvm::StringMap<TranslateSourceMgrToMLIRFunction> &
-getMutableTranslationToMLIRRegistry() {
-  static llvm::StringMap<TranslateSourceMgrToMLIRFunction>
-      translationToMLIRRegistry;
-  return translationToMLIRRegistry;
-}
-// Get the mutable static map between registered "from MLIR" translations and
-// the TranslateFromMLIRFunctions that perform those translations.
-static llvm::StringMap<TranslateFromMLIRFunction> &
-getMutableTranslationFromMLIRRegistry() {
-  static llvm::StringMap<TranslateFromMLIRFunction> translationFromMLIRRegistry;
-  return translationFromMLIRRegistry;
-}
-
-// Get the mutable static map between registered file-to-file MLIR translations
-// and the TranslateFunctions that perform those translations.
-static llvm::StringMap<TranslateFunction> &getMutableTranslationRegistry() {
-  static llvm::StringMap<TranslateFunction> translationRegistry;
-  return translationRegistry;
-}
-
-// Puts `function` into the to-MLIR translation registry unless there is already
-// a function registered for the same name.
-static void registerTranslateToMLIRFunction(
-    StringRef name, const TranslateSourceMgrToMLIRFunction &function) {
-  auto &translationToMLIRRegistry = getMutableTranslationToMLIRRegistry();
-  if (translationToMLIRRegistry.find(name) != translationToMLIRRegistry.end())
-    llvm::report_fatal_error(
-        "Attempting to overwrite an existing <to> function");
-  assert(function && "Attempting to register an empty translate <to> function");
-  translationToMLIRRegistry[name] = function;
-}
-
-TranslateToMLIRRegistration::TranslateToMLIRRegistration(
-    StringRef name, const TranslateSourceMgrToMLIRFunction &function) {
-  registerTranslateToMLIRFunction(name, function);
-}
-
-// Wraps `function` with a lambda that extracts a StringRef from a source
-// manager and registers the wrapper lambda as a to-MLIR conversion.
-TranslateToMLIRRegistration::TranslateToMLIRRegistration(
-    StringRef name, const TranslateStringRefToMLIRFunction &function) {
-  auto translationFunction = [function](llvm::SourceMgr &sourceMgr,
-                                        MLIRContext *ctx) {
-    const llvm::MemoryBuffer *buffer =
-        sourceMgr.getMemoryBuffer(sourceMgr.getMainFileID());
-    return function(buffer->getBuffer(), ctx);
-  };
-  registerTranslateToMLIRFunction(name, translationFunction);
-}
-
-TranslateFromMLIRRegistration::TranslateFromMLIRRegistration(
-    StringRef name, const TranslateFromMLIRFunction &function) {
-  auto &translationFromMLIRRegistry = getMutableTranslationFromMLIRRegistry();
-  if (translationFromMLIRRegistry.find(name) !=
-      translationFromMLIRRegistry.end())
-    llvm::report_fatal_error(
-        "Attempting to overwrite an existing <from> function");
-  assert(function &&
-         "Attempting to register an empty translate <from> function");
-  translationFromMLIRRegistry[name] = function;
-}
-
-TranslateRegistration::TranslateRegistration(
-    StringRef name, const TranslateFunction &function) {
-  auto &translationRegistry = getMutableTranslationRegistry();
-  if (translationRegistry.find(name) != translationRegistry.end())
-    llvm::report_fatal_error(
-        "Attempting to overwrite an existing <file-to-file> function");
-  assert(function &&
-         "Attempting to register an empty translate <file-to-file> function");
-  translationRegistry[name] = function;
-}
-
-// Merely add the const qualifier to the mutable registry so that external users
-// cannot modify it.
-const llvm::StringMap<TranslateSourceMgrToMLIRFunction> &
-mlir::getTranslationToMLIRRegistry() {
-  return getMutableTranslationToMLIRRegistry();
-}
-
-const llvm::StringMap<TranslateFromMLIRFunction> &
-mlir::getTranslationFromMLIRRegistry() {
-  return getMutableTranslationFromMLIRRegistry();
-}
-
-const llvm::StringMap<TranslateFunction> &mlir::getTranslationRegistry() {
-  return getMutableTranslationRegistry();
-}
diff --git a/third_party/mlir/mlir_configure.bzl b/third_party/mlir/mlir_configure.bzl
deleted file mode 100644
index 6ffbd90ecfc..00000000000
--- a/third_party/mlir/mlir_configure.bzl
+++ /dev/null
@@ -1,22 +0,0 @@
-"""Repository rule for MLIR autoconfiguration."""
-
-def _mlir_configure_impl(repository_ctx):
-    repository_ctx.file("WORKSPACE", "")
-    label = Label("@org_tensorflow//third_party/mlir:mlir_configure.bzl")
-    for entry in repository_ctx.path(label).dirname.readdir():
-        repository_ctx.symlink(entry, entry.basename)
-
-mlir_configure = repository_rule(
-    implementation = _mlir_configure_impl,
-)
-"""Detects and configures the MLIR configuration.
-
-Add the following to your WORKSPACE FILE:
-
-```python
-mlir_configure(name = "local_config_mlir")
-```
-
-Args:
-  name: A unique name for this workspace rule.
-"""
diff --git a/third_party/mlir/tblgen.bzl b/third_party/mlir/tblgen.bzl
index 372e180d974..1335ce6bf6e 100644
--- a/third_party/mlir/tblgen.bzl
+++ b/third_party/mlir/tblgen.bzl
@@ -20,7 +20,7 @@ def gentbl(name, tblgen, td_file, tbl_outs, td_srcs = [], td_includes = [], stri
     if td_file not in td_srcs:
         srcs += [td_file]
 
-    td_includes_cmd = ["-I external/local_config_mlir/include -I external/org_tensorflow"]
+    td_includes_cmd = ["-I external/llvm-project/mlir/include -I external/org_tensorflow"]
     for td_include in td_includes:
         td_includes_cmd += ["-I%s" % td_include]
     local_inc = "-I $$(dirname $(location %s))" % td_file
diff --git a/third_party/mlir/test/BUILD b/third_party/mlir/test.BUILD
similarity index 64%
rename from third_party/mlir/test/BUILD
rename to third_party/mlir/test.BUILD
index b4f2461afd6..1d99c002a4a 100644
--- a/third_party/mlir/test/BUILD
+++ b/third_party/mlir/test.BUILD
@@ -1,4 +1,4 @@
-load("@local_config_mlir//:tblgen.bzl", "gentbl")
+load("@org_tensorflow//third_party/mlir:tblgen.bzl", "gentbl")
 
 licenses(["notice"])
 
@@ -25,10 +25,10 @@ gentbl(
             "lib/DeclarativeTransforms/TestLinalgTransformPatterns.h.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "lib/DeclarativeTransforms/TestLinalgTransformPatterns.td",
     td_srcs = [
-        "@local_config_mlir//:LinalgTransformPatternsTdFiles",
+        "@llvm-project//mlir:LinalgTransformPatternsTdFiles",
     ],
 )
 
@@ -40,10 +40,10 @@ gentbl(
             "lib/DeclarativeTransforms/TestVectorTransformPatterns.h.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "lib/DeclarativeTransforms/TestVectorTransformPatterns.td",
     td_srcs = [
-        "@local_config_mlir//:VectorTransformPatternsTdFiles",
+        "@llvm-project//mlir:VectorTransformPatternsTdFiles",
     ],
 )
 
@@ -72,13 +72,13 @@ gentbl(
             "lib/TestDialect/TestPatterns.inc",
         ),
     ],
-    tblgen = "@local_config_mlir//:mlir-tblgen",
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
     td_file = "lib/TestDialect/TestOps.td",
     td_srcs = [
-        "@local_config_mlir//:OpBaseTdFiles",
-        "@local_config_mlir//:include/mlir/IR/OpAsmInterface.td",
-        "@local_config_mlir//:include/mlir/Analysis/CallInterfaces.td",
-        "@local_config_mlir//:include/mlir/Analysis/InferTypeOpInterface.td",
+        "@llvm-project//mlir:OpBaseTdFiles",
+        "@llvm-project//mlir:include/mlir/IR/OpAsmInterface.td",
+        "@llvm-project//mlir:include/mlir/Analysis/CallInterfaces.td",
+        "@llvm-project//mlir:include/mlir/Analysis/InferTypeOpInterface.td",
     ],
     test = True,
 )
@@ -98,13 +98,13 @@ cc_library(
     ],
     deps = [
         ":TestOpsIncGen",
-        "@llvm//:support",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:Dialect",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:TransformUtils",
-        "@local_config_mlir//:Transforms",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:Dialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:TransformUtils",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
@@ -118,11 +118,11 @@ cc_library(
     ],
     deps = [
         ":TestDialect",
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -133,10 +133,10 @@ cc_library(
         "lib/Pass/TestPassManager.cpp",
     ],
     deps = [
-        "@llvm//:support",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:Support",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
 )
@@ -163,21 +163,21 @@ cc_library(
         ":TestDialect",
         ":TestLinalgTransformPatternsIncGen",
         ":TestVectorTransformPatternsIncGen",
-        "@llvm//:support",
-        "@local_config_mlir//:AffineOps",
-        "@local_config_mlir//:Analysis",
-        "@local_config_mlir//:EDSC",
-        "@local_config_mlir//:IR",
-        "@local_config_mlir//:Linalg",
-        "@local_config_mlir//:LoopOps",
-        "@local_config_mlir//:Pass",
-        "@local_config_mlir//:StandardOps",
-        "@local_config_mlir//:Support",
-        "@local_config_mlir//:TransformUtils",
-        "@local_config_mlir//:Transforms",
-        "@local_config_mlir//:VectorOps",
-        "@local_config_mlir//:VectorToLLVM",
-        "@local_config_mlir//:VectorToLoops",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:AffineOps",
+        "@llvm-project//mlir:Analysis",
+        "@llvm-project//mlir:EDSC",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Linalg",
+        "@llvm-project//mlir:LoopOps",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TransformUtils",
+        "@llvm-project//mlir:Transforms",
+        "@llvm-project//mlir:VectorOps",
+        "@llvm-project//mlir:VectorToLLVM",
+        "@llvm-project//mlir:VectorToLoops",
     ],
     alwayslink = 1,
 )
diff --git a/third_party/mlir/test/APITest.h b/third_party/mlir/test/APITest.h
deleted file mode 100644
index 9475bae2b58..00000000000
--- a/third_party/mlir/test/APITest.h
+++ /dev/null
@@ -1,72 +0,0 @@
-//===- Test.h - Simple macros for API unit tests ----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file define simple macros for declaring test functions and running them.
-// The actual checking must be performed on the outputs with FileCheck.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TEST_TEST_H_
-#define MLIR_TEST_TEST_H_
-
-#include <functional>
-#include <vector>
-
-namespace test_detail {
-// Returns a mutable list of known test functions.  Used internally by test
-// macros to add and run tests.  This function is static to ensure it creates a
-// new list in each test file.
-static std::vector<std::function<void()>> &tests() {
-  static std::vector<std::function<void()>> list;
-  return list;
-}
-
-// Test registration class.  Used internally by test macros to register tests
-// during static allocation.
-struct TestRegistration {
-  explicit TestRegistration(std::function<void()> func) {
-    test_detail::tests().push_back(func);
-  }
-};
-} // end namespace test_detail
-
-/// Declares a test function with the given name and adds it to the list of
-/// known tests.  The body of the function must follow immediately.  Example:
-///
-/// TEST_FUNC(mytest) {
-///   // CHECK: expected-output-here
-///   emitSomethingToStdOut();
-/// }
-///
-#define TEST_FUNC(name)                                                        \
-  void name();                                                                 \
-  static test_detail::TestRegistration name##Registration(name);               \
-  void name()
-
-/// Runs all registered tests.  Example:
-///
-/// int main() {
-///   RUN_TESTS();
-///   return 0;
-/// }
-#define RUN_TESTS                                                              \
-  []() {                                                                       \
-    for (auto f : test_detail::tests())                                        \
-      f();                                                                     \
-  }
-
-#endif // MLIR_TEST_TEST_H_
diff --git a/third_party/mlir/test/CMakeLists.txt b/third_party/mlir/test/CMakeLists.txt
deleted file mode 100644
index 95792548221..00000000000
--- a/third_party/mlir/test/CMakeLists.txt
+++ /dev/null
@@ -1,71 +0,0 @@
-add_subdirectory(EDSC)
-add_subdirectory(mlir-cpu-runner)
-add_subdirectory(SDBM)
-add_subdirectory(lib)
-
-llvm_canonicalize_cmake_booleans(
-  LLVM_BUILD_EXAMPLES
-  )
-
-# Passed to lit.site.cfg.py.in to set up the path where to find the libraries
-# for linalg integration tests.
-set(MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
-
-# Passed to lit.site.cfg.py.in to set up the path where to find the libraries
-# for the mlir cuda runner tests.
-set(MLIR_CUDA_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
-
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
-  MAIN_CONFIG
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
-  )
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in
-  ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py
-  MAIN_CONFIG
-  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py
-  )
-
-set(MLIR_TEST_DEPENDS
-  FileCheck count not
-  MLIRUnitTests
-  mlir-cpu-runner
-  mlir-edsc-builder-api-test
-  mlir-opt
-  mlir-sdbm-api-test
-  mlir-tblgen
-  mlir-translate
-  cblas
-  cblas_interface
-  mlir_runner_utils
-  )
-
-if(LLVM_BUILD_EXAMPLES)
-  list(APPEND MLIR_TEST_DEPENDS
-    toyc-ch1
-    toyc-ch2
-    toyc-ch3
-    toyc-ch4
-    toyc-ch5
-    toyc-ch6
-    toyc-ch7
-    )
-endif()
-
-if(MLIR_CUDA_RUNNER_ENABLED)
-  list(APPEND MLIR_TEST_DEPENDS
-    mlir-cuda-runner
-  )
-endif()
-
-add_lit_testsuite(check-mlir "Running the MLIR regression tests"
-  ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS ${MLIR_TEST_DEPENDS}
-  )
-set_target_properties(check-mlir PROPERTIES FOLDER "Tests")
-
-add_lit_testsuites(MLIR ${CMAKE_CURRENT_SOURCE_DIR}
-  DEPENDS ${MLIR_TEST_DEPS}
-)
diff --git a/third_party/mlir/test/lib/CMakeLists.txt b/third_party/mlir/test/lib/CMakeLists.txt
deleted file mode 100644
index 534d0d31216..00000000000
--- a/third_party/mlir/test/lib/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_subdirectory(DeclarativeTransforms)
-add_subdirectory(IR)
-add_subdirectory(Pass)
-add_subdirectory(TestDialect)
-add_subdirectory(Transforms)
diff --git a/third_party/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt b/third_party/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt
deleted file mode 100644
index 9672edb4c49..00000000000
--- a/third_party/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS TestLinalgTransformPatterns.td)
-mlir_tablegen(TestLinalgTransformPatterns.h.inc -gen-rewriters)
-add_public_tablegen_target(MLIRTestLinalgTransformPatternsIncGen)
-
-set(LLVM_TARGET_DEFINITIONS TestVectorTransformPatterns.td)
-mlir_tablegen(TestVectorTransformPatterns.h.inc -gen-rewriters)
-add_public_tablegen_target(MLIRTestVectorTransformPatternsIncGen)
diff --git a/third_party/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td b/third_party/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td
deleted file mode 100644
index 4d8c9282f2d..00000000000
--- a/third_party/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td
+++ /dev/null
@@ -1,129 +0,0 @@
-//===- TestLinalgTransformPatterns.td - Test patterns --*- tablegen ----*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the pattern definition file for declarative Linalg transformations
-// tests.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TEST_LINALG_TRANSFORMS_PATTERNS
-#define TEST_LINALG_TRANSFORMS_PATTERNS
-
-include "mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td"
-
-//===----------------------------------------------------------------------===//
-// Test Linalg fusion patterns.
-//===----------------------------------------------------------------------===//
-def : Pat<(MatmulOp:$consumer $A, $B, $C),
-          (TileAndFuseLinalgOp<[100, 150], [0], "L1"> $consumer),
-          [
-            (Constraint<HasNoLinalgTransformMarker> $consumer),
-            (Constraint<IsProducedByOpOfType<"MatmulOp">> $consumer, $A),
-          ],
-          // In the buffer world there is no use-def chains or dags so benefits
-          // cannot be computed automatically from the length of the matched
-          // pattern. Instead we specify the benefit ourselves for now.
-          // This is not expected to be a big challenge long-term because
-          // pattern benefits are akin to feature engineering: features should
-          // be learned.
-          (addBenefit 1)>;
-
-//===----------------------------------------------------------------------===//
-// Linalg tiling patterns.
-//===----------------------------------------------------------------------===//
-def : Pat<(MatmulOp:$op $A, $B, $C),
-          (TileLinalgOp<[2000, 3000, 4000], "L3"> $op),
-          [(Constraint<Or<[HasNoLinalgTransformMarker,
-                           HasLinalgTransformMarker<"MEM">]>> $op)]>;
-def : Pat<(MatmulOp:$op $A, $B, $C),
-          (TileLinalgOp<[200, 300, 400], "L2"> $op),
-          [(Constraint<HasLinalgTransformMarker<"L3">> $op)]>;
-def : Pat<(MatmulOp:$op $A, $B, $C),
-          (TileLinalgOp<[20, 30, 40], "L1"> $op),
-          [(Constraint<HasLinalgTransformMarker<"L2">> $op)]>;
-def : Pat<(MatmulOp:$op $A, $B, $C),
-          (TileLinalgOp<[2, 3, 4], "REG"> $op),
-          [(Constraint<HasLinalgTransformMarker<"L1">> $op)]>;
-
-def : Pattern<(MatvecOp:$op $A, $b, $c),
-              [(TileLinalgOp<[5, 6], "L1"> $op)],
-              [(Constraint<HasNoLinalgTransformMarker> $op)]>;
-
-def : Pattern<(DotOp:$op $a, $b, $c),
-              [(TileLinalgOp<[8000], "L1"> $op)],
-              [(Constraint<Or<[HasNoLinalgTransformMarker,
-                               HasLinalgTransformMarker<"MEM">,
-                               HasLinalgTransformMarker<"L3">,
-                               HasLinalgTransformMarker<"L2">]>> $op)]>;
-def : Pattern<(DotOp:$op $a, $b, $c),
-              [(TileLinalgOp<[8], "REG"> $op)],
-              [(Constraint<HasLinalgTransformMarker<"L1">> $op)]>;
-
-//===----------------------------------------------------------------------===//
-// Linalg tiling and permutation patterns.
-//===----------------------------------------------------------------------===//
-def : Pat<(MatmulOp:$op $A, $B, $C),
-          (TileLinalgOp<[2000, 3000, 4000], "L2__with_perm__", [1,2,0]> $op),
-          [(Constraint<HasLinalgTransformMarker<"__with_perm__">> $op)]>;
-def : Pat<(MatmulOp:$op $A, $B, $C),
-          (TileLinalgOp<[200, 300, 400], "L1__with_perm__", [1,0,2]> $op),
-          [(Constraint<HasLinalgTransformMarker<"L2__with_perm__">> $op)]>;
-def : Pat<(MatmulOp:$op $A, $B, $C),
-          (TileLinalgOp<[20, 30, 40], "REG__with_perm__"> $op),
-          [(Constraint<HasLinalgTransformMarker<"L1__with_perm__">> $op)]>;
-
-
-def : Pattern<(MatvecOp:$op $A, $b, $c),
-              [(TileLinalgOp<[5, 6], "L1__with_perm__", [1,0]> $op)],
-              [(Constraint<HasLinalgTransformMarker<"__with_perm__">> $op)]>;
-
-def : Pattern<(DotOp:$op $a, $b, $c),
-              [(TileLinalgOp<[8000], "L1__with_perm__"> $op)],
-              [(Constraint<HasLinalgTransformMarker<"__with_perm__">> $op)]>;
-def : Pattern<(DotOp:$op $a, $b, $c),
-              [(TileLinalgOp<[8], "REG__with_perm__"> $op)],
-              [(Constraint<HasLinalgTransformMarker<"L1__with_perm__">> $op)]>;
-
-//===----------------------------------------------------------------------===//
-// Linalg to loops patterns.
-//===----------------------------------------------------------------------===//
-def : Pattern<(DotOp:$op $a, $b, $c),
-              [(LinalgOpToLoops<"DotOp"> $op)],
-              [(Constraint<HasLinalgTransformMarker<"REG">> $op)]>;
-
-//===----------------------------------------------------------------------===//
-// Linalg to vector contraction patterns.
-//===----------------------------------------------------------------------===//
-def : Pattern<(GenericOp:$op $_1, $_2, $_3, $_4, $_5, $_6, $_7, $_8),
-              [(LinalgOpToVectorContraction<"GenericOp"> $op)],
-              [(Constraint<HasLinalgTransformMarker<"_marked_matmul_">> $op)]>;
-
-//===----------------------------------------------------------------------===//
-// Linalg generic permutation patterns.
-//===----------------------------------------------------------------------===//
-
-def : Pat<(GenericOp:$op $_1, $_2, $_3, $_4, $_5, $_6, $_7, $_8),
-              (PermuteGenericLinalgOp<[1,2,0],"PERMUTED"> $op),
-              [(Constraint<And<[HasNoLinalgTransformMarker,
-                           AffineMapDomainHasDim<3>]>> $op)]>;
-
-def : Pat<(IndexedGenericOp:$op $_1, $_2, $_3, $_4, $_5, $_6, $_7, $_8),
-              (PermuteGenericLinalgOp<[1,2,0],"PERMUTED"> $op),
-              [(Constraint<And<[HasNoLinalgTransformMarker,
-                           AffineMapDomainHasDim<3>]>> $op)]>;
-
-#endif // TEST_LINALG_TRANSFORMS_PATTERNS
diff --git a/third_party/mlir/test/lib/DeclarativeTransforms/TestVectorTransformPatterns.td b/third_party/mlir/test/lib/DeclarativeTransforms/TestVectorTransformPatterns.td
deleted file mode 100644
index 228a8a018d6..00000000000
--- a/third_party/mlir/test/lib/DeclarativeTransforms/TestVectorTransformPatterns.td
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- TestVectorTransformPatterns.td - Test patterns ---*- tablegen ----*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is the pattern definition file for declarative Vector transformations
-// tests.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TEST_VECTOR_TRANSFORMS_PATTERNS
-#define TEST_VECTOR_TRANSFORMS_PATTERNS
-
-include "mlir/Dialect/StandardOps/Ops.td"
-include "mlir/Dialect/VectorOps/VectorOps.td"
-include "mlir/Dialect/VectorOps/VectorTransformPatterns.td"
-
-def : Pat<(AddFOp:$op_results $a, $b),
-          (UnrollVectorOp<[2, 2]> $op_results, $a, $b),
-          [(Constraint<HasShape<[4, 2]>> $a)]>;
-
-def : Pat<(AddFOp:$op_results $a, $b),
-          (UnrollVectorOp<[2, 2]> $op_results, $a, $b),
-          [(Constraint<HasShape<[4, 4]>> $a)]>;
-
-// TODO(andydavis) Add Constraints on lhs/rhs shapes.
-def : Pat<(Vector_ContractionOp:$op_results $a, $b, $c, $masks, $attr0, $attr1),
-          (UnrollVectorOp<[2, 2, 2]> $op_results, $a, $b, $c),
-          [(Constraint<HasShape<[4, 4]>> $c)]>;
-
-#endif // TEST_VECTOR_TRANSFORMS_PATTERNS
diff --git a/third_party/mlir/test/lib/DeclarativeTransforms/lit.local.cfg b/third_party/mlir/test/lib/DeclarativeTransforms/lit.local.cfg
deleted file mode 100644
index edb5b44b2e2..00000000000
--- a/third_party/mlir/test/lib/DeclarativeTransforms/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes.remove('.td')
\ No newline at end of file
diff --git a/third_party/mlir/test/lib/IR/CMakeLists.txt b/third_party/mlir/test/lib/IR/CMakeLists.txt
deleted file mode 100644
index 5cb2769a1f0..00000000000
--- a/third_party/mlir/test/lib/IR/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-add_llvm_library(MLIRTestIR
-  TestFunc.cpp
-  TestMatchers.cpp
-  TestSymbolUses.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  )
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../TestDialect)
-include_directories(${CMAKE_CURRENT_BINARY_DIR}/../TestDialect)
-add_dependencies(MLIRTestIR
-  MLIRTestDialect
-)
-target_link_libraries(MLIRTestIR
-  MLIRPass
-  )
diff --git a/third_party/mlir/test/lib/IR/TestFunc.cpp b/third_party/mlir/test/lib/IR/TestFunc.cpp
deleted file mode 100644
index 880d0785bb5..00000000000
--- a/third_party/mlir/test/lib/IR/TestFunc.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//===- TestFunctionLike.cpp - Pass to test helpers on FunctionLike --------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// This is a test pass for verifying FuncOp's eraseArgument method.
-struct TestFuncEraseArg : public ModulePass<TestFuncEraseArg> {
-  void runOnModule() override {
-    auto module = getModule();
-
-    for (FuncOp func : module.getOps<FuncOp>()) {
-      SmallVector<unsigned, 4> indicesToErase;
-      for (auto argIndex : llvm::seq<int>(0, func.getNumArguments())) {
-        if (func.getArgAttr(argIndex, "test.erase_this_arg")) {
-          // Push back twice to test that duplicate arg indices are handled
-          // correctly.
-          indicesToErase.push_back(argIndex);
-          indicesToErase.push_back(argIndex);
-        }
-      }
-      // Reverse the order to test that unsorted index lists are handled
-      // correctly.
-      std::reverse(indicesToErase.begin(), indicesToErase.end());
-      func.eraseArguments(indicesToErase);
-    }
-  }
-};
-
-/// This is a test pass for verifying FuncOp's setType method.
-struct TestFuncSetType : public ModulePass<TestFuncSetType> {
-  void runOnModule() override {
-    auto module = getModule();
-    SymbolTable symbolTable(module);
-
-    for (FuncOp func : module.getOps<FuncOp>()) {
-      auto sym = func.getAttrOfType<FlatSymbolRefAttr>("test.set_type_from");
-      if (!sym)
-        continue;
-      func.setType(symbolTable.lookup<FuncOp>(sym.getValue()).getType());
-    }
-  }
-};
-} // end anonymous namespace
-
-static PassRegistration<TestFuncEraseArg> pass("test-func-erase-arg",
-                                               "Test erasing func args.");
-
-static PassRegistration<TestFuncSetType> pass2("test-func-set-type",
-                                               "Test FuncOp::setType.");
diff --git a/third_party/mlir/test/lib/IR/TestMatchers.cpp b/third_party/mlir/test/lib/IR/TestMatchers.cpp
deleted file mode 100644
index 5985a88ffa6..00000000000
--- a/third_party/mlir/test/lib/IR/TestMatchers.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===- TestMatchers.cpp - Pass to test matchers ---------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// This is a test pass for verifying matchers.
-struct TestMatchers : public FunctionPass<TestMatchers> {
-  void runOnFunction() override;
-};
-} // end anonymous namespace
-
-// This could be done better but is not worth the variadic template trouble.
-template <typename Matcher> unsigned countMatches(FuncOp f, Matcher &matcher) {
-  unsigned count = 0;
-  f.walk([&count, &matcher](Operation *op) {
-    if (matcher.match(op))
-      ++count;
-  });
-  return count;
-}
-
-using mlir::matchers::m_Any;
-using mlir::matchers::m_Val;
-static void test1(FuncOp f) {
-  assert(f.getNumArguments() == 3 && "matcher test funcs must have 3 args");
-
-  auto a = m_Val(f.getArgument(0));
-  auto b = m_Val(f.getArgument(1));
-  auto c = m_Val(f.getArgument(2));
-
-  auto p0 = m_Op<AddFOp>(); // using 0-arity matcher
-  llvm::outs() << "Pattern add(*) matched " << countMatches(f, p0)
-               << " times\n";
-
-  auto p1 = m_Op<MulFOp>(); // using 0-arity matcher
-  llvm::outs() << "Pattern mul(*) matched " << countMatches(f, p1)
-               << " times\n";
-
-  auto p2 = m_Op<AddFOp>(m_Op<AddFOp>(), m_Any());
-  llvm::outs() << "Pattern add(add(*), *) matched " << countMatches(f, p2)
-               << " times\n";
-
-  auto p3 = m_Op<AddFOp>(m_Any(), m_Op<AddFOp>());
-  llvm::outs() << "Pattern add(*, add(*)) matched " << countMatches(f, p3)
-               << " times\n";
-
-  auto p4 = m_Op<MulFOp>(m_Op<AddFOp>(), m_Any());
-  llvm::outs() << "Pattern mul(add(*), *) matched " << countMatches(f, p4)
-               << " times\n";
-
-  auto p5 = m_Op<MulFOp>(m_Any(), m_Op<AddFOp>());
-  llvm::outs() << "Pattern mul(*, add(*)) matched " << countMatches(f, p5)
-               << " times\n";
-
-  auto p6 = m_Op<MulFOp>(m_Op<MulFOp>(), m_Any());
-  llvm::outs() << "Pattern mul(mul(*), *) matched " << countMatches(f, p6)
-               << " times\n";
-
-  auto p7 = m_Op<MulFOp>(m_Op<MulFOp>(), m_Op<MulFOp>());
-  llvm::outs() << "Pattern mul(mul(*), mul(*)) matched " << countMatches(f, p7)
-               << " times\n";
-
-  auto mul_of_mulmul = m_Op<MulFOp>(m_Op<MulFOp>(), m_Op<MulFOp>());
-  auto p8 = m_Op<MulFOp>(mul_of_mulmul, mul_of_mulmul);
-  llvm::outs()
-      << "Pattern mul(mul(mul(*), mul(*)), mul(mul(*), mul(*))) matched "
-      << countMatches(f, p8) << " times\n";
-
-  // clang-format off
-  auto mul_of_muladd = m_Op<MulFOp>(m_Op<MulFOp>(), m_Op<AddFOp>());
-  auto mul_of_anyadd = m_Op<MulFOp>(m_Any(), m_Op<AddFOp>());
-  auto p9 = m_Op<MulFOp>(m_Op<MulFOp>(
-                     mul_of_muladd, m_Op<MulFOp>()),
-                   m_Op<MulFOp>(mul_of_anyadd, mul_of_anyadd));
-  // clang-format on
-  llvm::outs() << "Pattern mul(mul(mul(mul(*), add(*)), mul(*)), mul(mul(*, "
-                  "add(*)), mul(*, add(*)))) matched "
-               << countMatches(f, p9) << " times\n";
-
-  auto p10 = m_Op<AddFOp>(a, b);
-  llvm::outs() << "Pattern add(a, b) matched " << countMatches(f, p10)
-               << " times\n";
-
-  auto p11 = m_Op<AddFOp>(a, c);
-  llvm::outs() << "Pattern add(a, c) matched " << countMatches(f, p11)
-               << " times\n";
-
-  auto p12 = m_Op<AddFOp>(b, a);
-  llvm::outs() << "Pattern add(b, a) matched " << countMatches(f, p12)
-               << " times\n";
-
-  auto p13 = m_Op<AddFOp>(c, a);
-  llvm::outs() << "Pattern add(c, a) matched " << countMatches(f, p13)
-               << " times\n";
-
-  auto p14 = m_Op<MulFOp>(a, m_Op<AddFOp>(c, b));
-  llvm::outs() << "Pattern mul(a, add(c, b)) matched " << countMatches(f, p14)
-               << " times\n";
-
-  auto p15 = m_Op<MulFOp>(a, m_Op<AddFOp>(b, c));
-  llvm::outs() << "Pattern mul(a, add(b, c)) matched " << countMatches(f, p15)
-               << " times\n";
-
-  auto mul_of_aany = m_Op<MulFOp>(a, m_Any());
-  auto p16 = m_Op<MulFOp>(mul_of_aany, m_Op<AddFOp>(a, c));
-  llvm::outs() << "Pattern mul(mul(a, *), add(a, c)) matched "
-               << countMatches(f, p16) << " times\n";
-
-  auto p17 = m_Op<MulFOp>(mul_of_aany, m_Op<AddFOp>(c, b));
-  llvm::outs() << "Pattern mul(mul(a, *), add(c, b)) matched "
-               << countMatches(f, p17) << " times\n";
-}
-
-void test2(FuncOp f) {
-  auto a = m_Val(f.getArgument(0));
-  FloatAttr floatAttr;
-  auto p = m_Op<MulFOp>(a, m_Op<AddFOp>(a, m_Constant(&floatAttr)));
-  // Last operation that is not the terminator.
-  Operation *lastOp = f.getBody().front().back().getPrevNode();
-  if (p.match(lastOp))
-    llvm::outs()
-        << "Pattern add(add(a, constant), a) matched and bound constant to: "
-        << floatAttr.getValueAsDouble() << "\n";
-}
-
-void TestMatchers::runOnFunction() {
-  auto f = getFunction();
-  llvm::outs() << f.getName() << "\n";
-  if (f.getName() == "test1")
-    test1(f);
-  if (f.getName() == "test2")
-    test2(f);
-}
-
-static PassRegistration<TestMatchers> pass("test-matchers",
-                                           "Test C++ pattern matchers.");
diff --git a/third_party/mlir/test/lib/IR/TestSymbolUses.cpp b/third_party/mlir/test/lib/IR/TestSymbolUses.cpp
deleted file mode 100644
index 8ef4bb48a1c..00000000000
--- a/third_party/mlir/test/lib/IR/TestSymbolUses.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- TestSymbolUses.cpp - Pass to test symbol uselists ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "TestDialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// This is a symbol test pass that tests the symbol uselist functionality
-/// provided by the symbol table along with erasing from the symbol table.
-struct SymbolUsesPass : public ModulePass<SymbolUsesPass> {
-  void runOnModule() override {
-    auto module = getModule();
-    std::vector<FuncOp> ops_to_delete;
-
-    for (FuncOp func : module.getOps<FuncOp>()) {
-      // Test computing uses on a non symboltable op.
-      Optional<SymbolTable::UseRange> symbolUses =
-          SymbolTable::getSymbolUses(func);
-
-      // Test the conservative failure case.
-      if (!symbolUses) {
-        func.emitRemark() << "function contains an unknown nested operation "
-                             "that 'may' define a new symbol table";
-        return;
-      }
-      if (unsigned numUses = llvm::size(*symbolUses))
-        func.emitRemark() << "function contains " << numUses
-                          << " nested references";
-
-      // Test the functionality of symbolKnownUseEmpty.
-      if (func.symbolKnownUseEmpty(module)) {
-        func.emitRemark() << "function has no uses";
-        if (func.getBody().empty())
-          ops_to_delete.push_back(func);
-        continue;
-      }
-
-      // Test the functionality of getSymbolUses.
-      symbolUses = func.getSymbolUses(module);
-      assert(symbolUses.hasValue() && "expected no unknown operations");
-      for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
-        symbolUse.getUser()->emitRemark()
-            << "found use of function : " << symbolUse.getSymbolRef();
-      }
-      func.emitRemark() << "function has " << llvm::size(*symbolUses)
-                        << " uses";
-    }
-
-    for (FuncOp func : ops_to_delete) {
-      // In order to test the SymbolTable::erase method, also erase completely
-      // useless functions.
-      SymbolTable table(module);
-      auto func_name = func.getName();
-      assert(table.lookup(func_name) && "expected no unknown operations");
-      table.erase(func);
-      assert(!table.lookup(func_name) &&
-             "expected erased operation to be unknown now");
-      module.emitRemark() << func_name << " function successfully erased";
-    }
-  }
-};
-
-/// This is a symbol test pass that tests the symbol use replacement
-/// functionality provided by the symbol table.
-struct SymbolReplacementPass : public ModulePass<SymbolReplacementPass> {
-  void runOnModule() override {
-    auto module = getModule();
-
-    for (FuncOp func : module.getOps<FuncOp>()) {
-      StringAttr newName = func.getAttrOfType<StringAttr>("sym.new_name");
-      if (!newName)
-        continue;
-      if (succeeded(func.replaceAllSymbolUses(newName.getValue(), module)))
-        func.setName(newName.getValue());
-    }
-  }
-};
-} // end anonymous namespace
-
-static PassRegistration<SymbolUsesPass> pass("test-symbol-uses",
-                                             "Test detection of symbol uses");
-
-static PassRegistration<SymbolReplacementPass>
-    rauwPass("test-symbol-rauw", "Test replacement of symbol uses");
diff --git a/third_party/mlir/test/lib/Pass/CMakeLists.txt b/third_party/mlir/test/lib/Pass/CMakeLists.txt
deleted file mode 100644
index 3289a7add94..00000000000
--- a/third_party/mlir/test/lib/Pass/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_llvm_library(MLIRTestPass
-  TestPassManager.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Pass
-  )
-target_link_libraries(MLIRTestPass
-  MLIRIR
-  MLIRPass
-  )
diff --git a/third_party/mlir/test/lib/Pass/TestPassManager.cpp b/third_party/mlir/test/lib/Pass/TestPassManager.cpp
deleted file mode 100644
index d1e1a6d13ee..00000000000
--- a/third_party/mlir/test/lib/Pass/TestPassManager.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-//===- TestPassManager.cpp - Test pass manager functionality --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-
-using namespace mlir;
-
-namespace {
-struct TestModulePass : public ModulePass<TestModulePass> {
-  void runOnModule() final {}
-};
-struct TestFunctionPass : public FunctionPass<TestFunctionPass> {
-  void runOnFunction() final {}
-};
-class TestOptionsPass : public FunctionPass<TestOptionsPass> {
-public:
-  struct Options : public PassOptions<Options> {
-    List<int> listOption{*this, "list", llvm::cl::MiscFlags::CommaSeparated,
-                         llvm::cl::desc("Example list option")};
-    List<std::string> stringListOption{
-        *this, "string-list", llvm::cl::MiscFlags::CommaSeparated,
-        llvm::cl::desc("Example string list option")};
-    Option<std::string> stringOption{*this, "string",
-                                     llvm::cl::desc("Example string option")};
-  };
-  TestOptionsPass(const Options &options) {
-    listOption.assign(options.listOption.begin(), options.listOption.end());
-    stringOption = options.stringOption;
-    stringListOption.assign(options.stringListOption.begin(),
-                            options.stringListOption.end());
-  }
-
-  void printAsTextualPipeline(raw_ostream &os) final {
-    os << "test-options-pass{";
-    if (!listOption.empty()) {
-      os << "list=";
-      // Not interleaveComma to avoid spaces between the elements.
-      interleave(listOption, os, ",");
-    }
-    if (!stringListOption.empty()) {
-      os << " string-list=";
-      interleave(stringListOption, os, ",");
-    }
-    if (!stringOption.empty())
-      os << " string=" << stringOption;
-    os << "}";
-  }
-
-  void runOnFunction() final {}
-
-  SmallVector<int64_t, 4> listOption;
-  SmallVector<std::string, 4> stringListOption;
-  std::string stringOption;
-};
-
-/// A test pass that always aborts to enable testing the crash recovery
-/// mechanism of the pass manager.
-class TestCrashRecoveryPass : public OperationPass<TestCrashRecoveryPass> {
-  void runOnOperation() final { abort(); }
-};
-
-/// A test pass that contains a statistic.
-struct TestStatisticPass : public OperationPass<TestStatisticPass> {
-  TestStatisticPass() = default;
-  TestStatisticPass(const TestStatisticPass &) {}
-
-  Statistic opCount{this, "num-ops", "Number of operations counted"};
-
-  void runOnOperation() final {
-    getOperation()->walk([&](Operation *) { ++opCount; });
-  }
-};
-} // end anonymous namespace
-
-static void testNestedPipeline(OpPassManager &pm) {
-  // Nest a module pipeline that contains:
-  /// A module pass.
-  auto &modulePM = pm.nest<ModuleOp>();
-  modulePM.addPass(std::make_unique<TestModulePass>());
-  /// A nested function pass.
-  auto &nestedFunctionPM = modulePM.nest<FuncOp>();
-  nestedFunctionPM.addPass(std::make_unique<TestFunctionPass>());
-
-  // Nest a function pipeline that contains a single pass.
-  auto &functionPM = pm.nest<FuncOp>();
-  functionPM.addPass(std::make_unique<TestFunctionPass>());
-}
-
-static void testNestedPipelineTextual(OpPassManager &pm) {
-  (void)parsePassPipeline("test-pm-nested-pipeline", pm);
-}
-
-static PassRegistration<TestOptionsPass, TestOptionsPass::Options>
-    reg("test-options-pass", "Test options parsing capabilities");
-
-static PassRegistration<TestModulePass>
-    unusedMP("test-module-pass", "Test a module pass in the pass manager");
-static PassRegistration<TestFunctionPass>
-    unusedFP("test-function-pass", "Test a function pass in the pass manager");
-
-static PassRegistration<TestCrashRecoveryPass>
-    unusedCrashP("test-pass-crash",
-                 "Test a pass in the pass manager that always crashes");
-
-static PassRegistration<TestStatisticPass> unusedStatP("test-stats-pass",
-                                                       "Test pass statistics");
-
-static PassPipelineRegistration<>
-    unused("test-pm-nested-pipeline",
-           "Test a nested pipeline in the pass manager", testNestedPipeline);
-static PassPipelineRegistration<>
-    unusedTextual("test-textual-pm-nested-pipeline",
-                  "Test a nested pipeline in the pass manager",
-                  testNestedPipelineTextual);
-static PassPipelineRegistration<>
-    unusedDump("test-dump-pipeline",
-               "Dumps the pipeline build so far for debugging purposes",
-               [](OpPassManager &pm) {
-                 pm.printAsTextualPipeline(llvm::errs());
-                 llvm::errs() << "\n";
-               });
-
-static PassPipelineRegistration<TestOptionsPass::Options>
-    registerOptionsPassPipeline(
-        "test-options-pass-pipeline",
-        "Parses options using pass pipeline registration",
-        [](OpPassManager &pm, const TestOptionsPass::Options &options) {
-          pm.addPass(std::make_unique<TestOptionsPass>(options));
-        });
diff --git a/third_party/mlir/test/lib/TestDialect/CMakeLists.txt b/third_party/mlir/test/lib/TestDialect/CMakeLists.txt
deleted file mode 100644
index e6a22833de4..00000000000
--- a/third_party/mlir/test/lib/TestDialect/CMakeLists.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
-  TestDialect.cpp
-  TestPatterns.cpp
-)
-
-set(LLVM_TARGET_DEFINITIONS TestOps.td)
-mlir_tablegen(TestOps.h.inc -gen-op-decls)
-mlir_tablegen(TestOps.cpp.inc -gen-op-defs)
-mlir_tablegen(TestOpEnums.h.inc -gen-enum-decls)
-mlir_tablegen(TestOpEnums.cpp.inc -gen-enum-defs)
-mlir_tablegen(TestPatterns.inc -gen-rewriters)
-add_public_tablegen_target(MLIRTestOpsIncGen)
-
-add_llvm_library(MLIRTestDialect
-  TestDialect.cpp
-  TestPatterns.cpp
-)
-add_dependencies(MLIRTestDialect
-  MLIRTestOpsIncGen
-  MLIRIR
-  LLVMSupport
-  MLIRTypeInferOpInterfaceIncGen
-)
-target_link_libraries(MLIRTestDialect
-  MLIRDialect
-  MLIRIR
-  LLVMSupport
-)
diff --git a/third_party/mlir/test/lib/TestDialect/TestDialect.cpp b/third_party/mlir/test/lib/TestDialect/TestDialect.cpp
deleted file mode 100644
index 059cfb3dce7..00000000000
--- a/third_party/mlir/test/lib/TestDialect/TestDialect.cpp
+++ /dev/null
@@ -1,311 +0,0 @@
-//===- TestDialect.cpp - MLIR Dialect for Testing -------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "TestDialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/Transforms/FoldUtils.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "llvm/ADT/StringSwitch.h"
-
-using namespace mlir;
-
-//===----------------------------------------------------------------------===//
-// TestDialect Interfaces
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-// Test support for interacting with the AsmPrinter.
-struct TestOpAsmInterface : public OpAsmDialectInterface {
-  using OpAsmDialectInterface::OpAsmDialectInterface;
-
-  void getAsmResultNames(Operation *op,
-                         OpAsmSetValueNameFn setNameFn) const final {
-    if (auto asmOp = dyn_cast<AsmDialectInterfaceOp>(op))
-      setNameFn(asmOp, "result");
-  }
-};
-
-struct TestOpFolderDialectInterface : public OpFolderDialectInterface {
-  using OpFolderDialectInterface::OpFolderDialectInterface;
-
-  /// Registered hook to check if the given region, which is attached to an
-  /// operation that is *not* isolated from above, should be used when
-  /// materializing constants.
-  bool shouldMaterializeInto(Region *region) const final {
-    // If this is a one region operation, then insert into it.
-    return isa<OneRegionOp>(region->getParentOp());
-  }
-};
-
-/// This class defines the interface for handling inlining with standard
-/// operations.
-struct TestInlinerInterface : public DialectInlinerInterface {
-  using DialectInlinerInterface::DialectInlinerInterface;
-
-  //===--------------------------------------------------------------------===//
-  // Analysis Hooks
-  //===--------------------------------------------------------------------===//
-
-  bool isLegalToInline(Region *, Region *, BlockAndValueMapping &) const final {
-    // Inlining into test dialect regions is legal.
-    return true;
-  }
-  bool isLegalToInline(Operation *, Region *,
-                       BlockAndValueMapping &) const final {
-    return true;
-  }
-
-  bool shouldAnalyzeRecursively(Operation *op) const final {
-    // Analyze recursively if this is not a functional region operation, it
-    // froms a separate functional scope.
-    return !isa<FunctionalRegionOp>(op);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Transformation Hooks
-  //===--------------------------------------------------------------------===//
-
-  /// Handle the given inlined terminator by replacing it with a new operation
-  /// as necessary.
-  void handleTerminator(Operation *op,
-                        ArrayRef<Value *> valuesToRepl) const final {
-    // Only handle "test.return" here.
-    auto returnOp = dyn_cast<TestReturnOp>(op);
-    if (!returnOp)
-      return;
-
-    // Replace the values directly with the return operands.
-    assert(returnOp.getNumOperands() == valuesToRepl.size());
-    for (const auto &it : llvm::enumerate(returnOp.getOperands()))
-      valuesToRepl[it.index()]->replaceAllUsesWith(it.value());
-  }
-
-  /// Attempt to materialize a conversion for a type mismatch between a call
-  /// from this dialect, and a callable region. This method should generate an
-  /// operation that takes 'input' as the only operand, and produces a single
-  /// result of 'resultType'. If a conversion can not be generated, nullptr
-  /// should be returned.
-  Operation *materializeCallConversion(OpBuilder &builder, Value *input,
-                                       Type resultType,
-                                       Location conversionLoc) const final {
-    // Only allow conversion for i16/i32 types.
-    if (!(resultType.isInteger(16) || resultType.isInteger(32)) ||
-        !(input->getType().isInteger(16) || input->getType().isInteger(32)))
-      return nullptr;
-    return builder.create<TestCastOp>(conversionLoc, resultType, input);
-  }
-};
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// TestDialect
-//===----------------------------------------------------------------------===//
-
-TestDialect::TestDialect(MLIRContext *context)
-    : Dialect(getDialectName(), context) {
-  addOperations<
-#define GET_OP_LIST
-#include "TestOps.cpp.inc"
-      >();
-  addInterfaces<TestOpAsmInterface, TestOpFolderDialectInterface,
-                TestInlinerInterface>();
-  allowUnknownOperations();
-}
-
-LogicalResult TestDialect::verifyOperationAttribute(Operation *op,
-                                                    NamedAttribute namedAttr) {
-  if (namedAttr.first == "test.invalid_attr")
-    return op->emitError() << "invalid to use 'test.invalid_attr'";
-  return success();
-}
-
-LogicalResult TestDialect::verifyRegionArgAttribute(Operation *op,
-                                                    unsigned regionIndex,
-                                                    unsigned argIndex,
-                                                    NamedAttribute namedAttr) {
-  if (namedAttr.first == "test.invalid_attr")
-    return op->emitError() << "invalid to use 'test.invalid_attr'";
-  return success();
-}
-
-LogicalResult
-TestDialect::verifyRegionResultAttribute(Operation *op, unsigned regionIndex,
-                                         unsigned resultIndex,
-                                         NamedAttribute namedAttr) {
-  if (namedAttr.first == "test.invalid_attr")
-    return op->emitError() << "invalid to use 'test.invalid_attr'";
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Test IsolatedRegionOp - parse passthrough region arguments.
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseIsolatedRegionOp(OpAsmParser &parser,
-                                         OperationState &result) {
-  OpAsmParser::OperandType argInfo;
-  Type argType = parser.getBuilder().getIndexType();
-
-  // Parse the input operand.
-  if (parser.parseOperand(argInfo) ||
-      parser.resolveOperand(argInfo, argType, result.operands))
-    return failure();
-
-  // Parse the body region, and reuse the operand info as the argument info.
-  Region *body = result.addRegion();
-  return parser.parseRegion(*body, argInfo, argType,
-                            /*enableNameShadowing=*/true);
-}
-
-static void print(OpAsmPrinter &p, IsolatedRegionOp op) {
-  p << "test.isolated_region ";
-  p.printOperand(op.getOperand());
-  p.shadowRegionArgs(op.region(), op.getOperand());
-  p.printRegion(op.region(), /*printEntryBlockArgs=*/false);
-}
-
-//===----------------------------------------------------------------------===//
-// Test parser.
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseWrappedKeywordOp(OpAsmParser &parser,
-                                         OperationState &result) {
-  StringRef keyword;
-  if (parser.parseKeyword(&keyword))
-    return failure();
-  result.addAttribute("keyword", parser.getBuilder().getStringAttr(keyword));
-  return success();
-}
-
-static void print(OpAsmPrinter &p, WrappedKeywordOp op) {
-  p << WrappedKeywordOp::getOperationName() << " " << op.keyword();
-}
-
-//===----------------------------------------------------------------------===//
-// Test WrapRegionOp - wrapping op exercising `parseGenericOperation()`.
-
-static ParseResult parseWrappingRegionOp(OpAsmParser &parser,
-                                         OperationState &result) {
-  if (parser.parseKeyword("wraps"))
-    return failure();
-
-  // Parse the wrapped op in a region
-  Region &body = *result.addRegion();
-  body.push_back(new Block);
-  Block &block = body.back();
-  Operation *wrapped_op = parser.parseGenericOperation(&block, block.begin());
-  if (!wrapped_op)
-    return failure();
-
-  // Create a return terminator in the inner region, pass as operand to the
-  // terminator the returned values from the wrapped operation.
-  SmallVector<Value *, 8> return_operands(wrapped_op->getResults());
-  OpBuilder builder(parser.getBuilder().getContext());
-  builder.setInsertionPointToEnd(&block);
-  builder.create<TestReturnOp>(wrapped_op->getLoc(), return_operands);
-
-  // Get the results type for the wrapping op from the terminator operands.
-  Operation &return_op = body.back().back();
-  result.types.append(return_op.operand_type_begin(),
-                      return_op.operand_type_end());
-
-  // Use the location of the wrapped op for the "test.wrapping_region" op.
-  result.location = wrapped_op->getLoc();
-
-  return success();
-}
-
-static void print(OpAsmPrinter &p, WrappingRegionOp op) {
-  p << op.getOperationName() << " wraps ";
-  p.printGenericOp(&op.region().front().front());
-}
-
-//===----------------------------------------------------------------------===//
-// Test PolyForOp - parse list of region arguments.
-//===----------------------------------------------------------------------===//
-
-static ParseResult parsePolyForOp(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::OperandType, 4> ivsInfo;
-  // Parse list of region arguments without a delimiter.
-  if (parser.parseRegionArgumentList(ivsInfo))
-    return failure();
-
-  // Parse the body region.
-  Region *body = result.addRegion();
-  auto &builder = parser.getBuilder();
-  SmallVector<Type, 4> argTypes(ivsInfo.size(), builder.getIndexType());
-  return parser.parseRegion(*body, ivsInfo, argTypes);
-}
-
-//===----------------------------------------------------------------------===//
-// Test removing op with inner ops.
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct TestRemoveOpWithInnerOps
-    : public OpRewritePattern<TestOpWithRegionPattern> {
-  using OpRewritePattern<TestOpWithRegionPattern>::OpRewritePattern;
-
-  PatternMatchResult matchAndRewrite(TestOpWithRegionPattern op,
-                                     PatternRewriter &rewriter) const override {
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-} // end anonymous namespace
-
-void TestOpWithRegionPattern::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<TestRemoveOpWithInnerOps>(context);
-}
-
-OpFoldResult TestOpWithRegionFold::fold(ArrayRef<Attribute> operands) {
-  return operand();
-}
-
-LogicalResult TestOpWithVariadicResultsAndFolder::fold(
-    ArrayRef<Attribute> operands, SmallVectorImpl<OpFoldResult> &results) {
-  for (Value *input : this->operands()) {
-    results.push_back(input);
-  }
-  return success();
-}
-
-LogicalResult mlir::OpWithInferTypeInterfaceOp::inferReturnTypes(
-    llvm::Optional<Location> location, ValueRange operands,
-    ArrayRef<NamedAttribute> attributes, RegionRange regions,
-    SmallVectorImpl<Type> &inferedReturnTypes) {
-  if (operands[0]->getType() != operands[1]->getType()) {
-    return emitOptionalError(location, "operand type mismatch ",
-                             operands[0]->getType(), " vs ",
-                             operands[1]->getType());
-  }
-  inferedReturnTypes.assign({operands[0]->getType()});
-  return success();
-}
-
-// Static initialization for Test dialect registration.
-static mlir::DialectRegistration<mlir::TestDialect> testDialect;
-
-#include "TestOpEnums.cpp.inc"
-
-#define GET_OP_CLASSES
-#include "TestOps.cpp.inc"
diff --git a/third_party/mlir/test/lib/TestDialect/TestDialect.h b/third_party/mlir/test/lib/TestDialect/TestDialect.h
deleted file mode 100644
index 783b8a1bcdd..00000000000
--- a/third_party/mlir/test/lib/TestDialect/TestDialect.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===- TestDialect.h - MLIR Dialect for testing -----------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines a fake 'test' dialect that can be used for testing things
-// that do not have a respective counterpart in the main source directories.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TESTDIALECT_H
-#define MLIR_TESTDIALECT_H
-
-#include "mlir/Analysis/CallInterfaces.h"
-#include "mlir/Analysis/InferTypeOpInterface.h"
-#include "mlir/Dialect/Traits.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/SymbolTable.h"
-
-#include "TestOpEnums.h.inc"
-
-namespace mlir {
-
-class TestDialect : public Dialect {
-public:
-  /// Create the dialect in the given `context`.
-  TestDialect(MLIRContext *context);
-
-  /// Get the canonical string name of the dialect.
-  static StringRef getDialectName() { return "test"; }
-
-  LogicalResult verifyOperationAttribute(Operation *op,
-                                         NamedAttribute namedAttr) override;
-  LogicalResult verifyRegionArgAttribute(Operation *op, unsigned regionIndex,
-                                         unsigned argIndex,
-                                         NamedAttribute namedAttr) override;
-  LogicalResult verifyRegionResultAttribute(Operation *op, unsigned regionIndex,
-                                            unsigned resultIndex,
-                                            NamedAttribute namedAttr) override;
-};
-
-#define GET_OP_CLASSES
-#include "TestOps.h.inc"
-
-} // end namespace mlir
-
-#endif // MLIR_TESTDIALECT_H
diff --git a/third_party/mlir/test/lib/TestDialect/TestOps.td b/third_party/mlir/test/lib/TestDialect/TestOps.td
deleted file mode 100644
index cf15d0e49f3..00000000000
--- a/third_party/mlir/test/lib/TestDialect/TestOps.td
+++ /dev/null
@@ -1,1057 +0,0 @@
-//===-- TestOps.td - Test dialect operation definitions ----*- tablegen -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef TEST_OPS
-#define TEST_OPS
-
-include "mlir/IR/OpBase.td"
-include "mlir/IR/OpAsmInterface.td"
-include "mlir/Analysis/CallInterfaces.td"
-include "mlir/Analysis/InferTypeOpInterface.td"
-
-def TEST_Dialect : Dialect {
-  let name = "test";
-  let cppNamespace = "";
-}
-
-class TEST_Op<string mnemonic, list<OpTrait> traits = []> :
-    Op<TEST_Dialect, mnemonic, traits>;
-
-//===----------------------------------------------------------------------===//
-// Test Types
-//===----------------------------------------------------------------------===//
-
-def ComplexF64 : Complex<F64>;
-def ComplexOp : TEST_Op<"complex_f64"> {
-  let results = (outs ComplexF64);
-}
-
-def ComplexTensorOp : TEST_Op<"complex_f64_tensor"> {
-  let results = (outs TensorOf<[ComplexF64]>);
-}
-
-def AnyShaped: ShapedContainerType<[AnyType], IsShapedTypePred, "shaped">;
-
-def TupleOp : TEST_Op<"tuple_32_bit"> {
-  let results = (outs TupleOf<[I32, F32]>);
-}
-
-def NestedTupleOp : TEST_Op<"nested_tuple_32_bit"> {
-  let results = (outs NestedTupleOf<[I32, F32]>);
-}
-
-def TakesStaticMemRefOp : TEST_Op<"takes_static_memref"> {
-  let arguments = (ins AnyStaticShapeMemRef:$x);
-}
-
-def RankLessThan2I8F32MemRefOp : TEST_Op<"rank_less_than_2_I8_F32_memref"> {
-  let results = (outs MemRefRankOf<[I8, F32], [0, 1]>);
-}
-
-def NDTensorOfOp : TEST_Op<"nd_tensor_of"> {
-  let arguments = (ins
-    0DTensorOf<[F32]>:$arg0,
-    1DTensorOf<[F32]>:$arg1,
-    2DTensorOf<[I16]>:$arg2,
-    3DTensorOf<[I16]>:$arg3,
-    4DTensorOf<[I16]>:$arg4
-  );
-}
-
-def RankedTensorOp : TEST_Op<"ranked_tensor_op"> {
-  let arguments = (ins AnyRankedTensor:$input);
-}
-
-def MultiTensorRankOf : TEST_Op<"multi_tensor_rank_of"> {
-  let arguments = (ins
-    TensorRankOf<[I8, I32, F32], [0, 1]>:$arg0
-  );
-}
-
-//===----------------------------------------------------------------------===//
-// Test Operands
-//===----------------------------------------------------------------------===//
-
-def SymbolScopeOp : TEST_Op<"symbol_scope",
-    [SymbolTable, SingleBlockImplicitTerminator<"TerminatorOp">]> {
-  let summary =  "operation which defines a new symbol table";
-  let regions = (region SizedRegion<1>:$region);
-}
-
-def SymbolTableRegionOp : TEST_Op<"symbol_table_region", [SymbolTable]> {
-  let summary =  "operation which defines a new symbol table without a "
-                 "restriction on a terminator";
-  let regions = (region SizedRegion<1>:$region);
-}
-
-//===----------------------------------------------------------------------===//
-// Test Operands
-//===----------------------------------------------------------------------===//
-
-def MixedNormalVariadicOperandOp : TEST_Op<
-    "mixed_normal_variadic_operand", [SameVariadicOperandSize]> {
-  let arguments = (ins
-    Variadic<AnyTensor>:$input1,
-    AnyTensor:$input2,
-    Variadic<AnyTensor>:$input3
-  );
-}
-
-//===----------------------------------------------------------------------===//
-// Test Results
-//===----------------------------------------------------------------------===//
-
-def MixedNormalVariadicResults : TEST_Op<
-    "mixed_normal_variadic_result", [SameVariadicResultSize]> {
-  let results = (outs
-    Variadic<AnyTensor>:$output1,
-    AnyTensor:$output2,
-    Variadic<AnyTensor>:$output3
-  );
-}
-
-//===----------------------------------------------------------------------===//
-// Test Attributes
-//===----------------------------------------------------------------------===//
-
-def NonNegIntAttrOp : TEST_Op<"non_negative_int_attr"> {
-  let arguments = (ins
-      NonNegativeI32Attr:$i32attr,
-      NonNegativeI64Attr:$i64attr
-  );
-}
-
-def PositiveIntAttrOp : TEST_Op<"positive_int_attr"> {
-  let arguments = (ins
-      PositiveI32Attr:$i32attr,
-      PositiveI64Attr:$i64attr
-  );
-}
-
-def TypeArrayAttrOp : TEST_Op<"type_array_attr"> {
-  let arguments = (ins TypeArrayAttr:$attr);
-}
-def TypeStringAttrWithTypeOp : TEST_Op<"string_attr_with_type"> {
-  let arguments = (ins StrAttr:$attr);
-  let printer = [{ p << getAttr("attr"); }];
-  let parser = [{
-    Attribute attr;
-    Type stringType = OpaqueType::get(Identifier::get("foo",
-                                      result.getContext()), "string",
-                                      result.getContext());
-    return parser.parseAttribute(attr, stringType, "attr", result.attributes);
-  }];
-}
-
-def StrCaseA: StrEnumAttrCase<"A">;
-def StrCaseB: StrEnumAttrCase<"B">;
-
-def SomeStrEnum: StrEnumAttr<
-  "SomeStrEnum", "", [StrCaseA, StrCaseB]>;
-
-def StrEnumAttrOp : TEST_Op<"str_enum_attr"> {
-  let arguments = (ins SomeStrEnum:$attr);
-  let results = (outs I32:$val);
-}
-
-def I32Case5:  I32EnumAttrCase<"case5", 5>;
-def I32Case10: I32EnumAttrCase<"case10", 10>;
-
-def SomeI32Enum: I32EnumAttr<
-  "SomeI32Enum", "", [I32Case5, I32Case10]>;
-
-def I32EnumAttrOp : TEST_Op<"i32_enum_attr"> {
-  let arguments = (ins SomeI32Enum:$attr);
-  let results = (outs I32:$val);
-}
-
-def I64Case5:  I64EnumAttrCase<"case5", 5>;
-def I64Case10: I64EnumAttrCase<"case10", 10>;
-
-def SomeI64Enum: I64EnumAttr<
-  "SomeI64Enum", "", [I64Case5, I64Case10]>;
-
-def I64EnumAttrOp : TEST_Op<"i64_enum_attr"> {
-  let arguments = (ins SomeI64Enum:$attr);
-  let results = (outs I32:$val);
-}
-
-def FloatElementsAttrOp : TEST_Op<"float_elements_attr"> {
-  let arguments = (ins
-      RankedF32ElementsAttr<[2]>:$scalar_f32_attr,
-      RankedF64ElementsAttr<[4, 8]>:$tensor_f64_attr
-  );
-}
-
-// A pattern that updates dense<[3.0, 4.0]> to dense<[5.0, 6.0]>.
-// This tests both matching and generating float elements attributes.
-def UpdateFloatElementsAttr : Pat<
-  (FloatElementsAttrOp
-    ConstantAttr<RankedF32ElementsAttr<[2]>, "{3.0f, 4.0f}">:$f32attr,
-    $f64attr),
-  (FloatElementsAttrOp
-    ConstantAttr<RankedF32ElementsAttr<[2]>, "{5.0f, 6.0f}">:$f32attr,
-    $f64attr)>;
-
-//===----------------------------------------------------------------------===//
-// Test Attribute Constraints
-//===----------------------------------------------------------------------===//
-
-def SymbolRefOp : TEST_Op<"symbol_ref_attr"> {
-  let arguments = (ins
-    Confined<FlatSymbolRefAttr, [ReferToOp<"FuncOp">]>:$symbol
-  );
-}
-
-//===----------------------------------------------------------------------===//
-// Test Regions
-//===----------------------------------------------------------------------===//
-
-def OneRegionOp : TEST_Op<"one_region_op", []> {
-  let regions = (region AnyRegion);
-}
-
-def TwoRegionOp : TEST_Op<"two_region_op", []> {
-  let regions = (region AnyRegion, AnyRegion);
-}
-
-def SizedRegionOp : TEST_Op<"sized_region_op", []> {
-  let regions = (region SizedRegion<2>:$my_region, SizedRegion<1>);
-}
-
-//===----------------------------------------------------------------------===//
-// Test Call Interfaces
-//===----------------------------------------------------------------------===//
-
-def ConversionCallOp : TEST_Op<"conversion_call_op",
-    [CallOpInterface]> {
-  let arguments = (ins Variadic<AnyType>:$inputs, FlatSymbolRefAttr:$callee);
-  let results = (outs Variadic<AnyType>);
-
-  let extraClassDeclaration = [{
-    /// Get the argument operands to the called function.
-    operand_range getArgOperands() { return inputs(); }
-
-    /// Return the callee of this operation.
-    CallInterfaceCallable getCallableForCallee() {
-      return getAttrOfType<FlatSymbolRefAttr>("callee");
-    }
-  }];
-}
-
-def FunctionalRegionOp : TEST_Op<"functional_region_op",
-    [CallableOpInterface]> {
-  let regions = (region AnyRegion:$body);
-  let results = (outs FunctionType);
-
-  let extraClassDeclaration = [{
-    Region *getCallableRegion(CallInterfaceCallable) { return &body(); }
-    void getCallableRegions(SmallVectorImpl<Region *> &callables) {
-      callables.push_back(&body());
-    }
-    ArrayRef<Type> getCallableResults(Region *) {
-      return getType().cast<FunctionType>().getResults();
-    }
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// Test Traits
-//===----------------------------------------------------------------------===//
-
-def SameOperandElementTypeOp : TEST_Op<"same_operand_element_type",
-    [SameOperandsElementType]> {
-  let arguments = (ins AnyType, AnyType);
-  let results = (outs AnyType);
-}
-
-def SameOperandAndResultElementTypeOp : TEST_Op<"same_operand_and_result_element_type",
-    [SameOperandsAndResultElementType]> {
-  let arguments = (ins Variadic<AnyType>);
-  let results = (outs Variadic<AnyType>);
-}
-
-def SameOperandShapeOp : TEST_Op<"same_operand_shape", [SameOperandsShape]> {
-  let arguments = (ins Variadic<AnyShaped>);
-}
-
-def SameOperandAndResultShapeOp : TEST_Op<"same_operand_and_result_shape",
-    [SameOperandsAndResultShape]> {
-  let arguments = (ins Variadic<AnyShaped>);
-  let results = (outs Variadic<AnyShaped>);
-}
-
-def SameOperandAndResultTypeOp : TEST_Op<"same_operand_and_result_type",
-    [SameOperandsAndResultType]> {
-  let arguments = (ins Variadic<AnyType>);
-  let results = (outs Variadic<AnyType>);
-}
-
-def ArgAndResHaveFixedElementTypesOp :
-    TEST_Op<"arg_and_res_have_fixed_element_types",
-      [PredOpTrait<"fixed type combination",
-         And<[ElementTypeIsPred<"x", I32>,
-              ElementTypeIsPred<"y", F32>]>>,
-      ElementTypeIs<"res", I16>]> {
-  let arguments = (ins
-    AnyShaped:$x, AnyShaped:$y);
-  let results = (outs AnyShaped:$res);
-}
-
-def OperandsHaveSameElementType : TEST_Op<"operands_have_same_element_type", [
-    AllElementTypesMatch<["x", "y"]>]> {
-  let arguments = (ins AnyType:$x, AnyType:$y);
-}
-
-def OperandZeroAndResultHaveSameElementType : TEST_Op<
-    "operand0_and_result_have_same_element_type",
-    [AllElementTypesMatch<["x", "res"]>]> {
-  let arguments = (ins AnyType:$x, AnyType:$y);
-  let results = (outs AnyType:$res);
-}
-
-def OperandsHaveSameType :
-    TEST_Op<"operands_have_same_type", [AllTypesMatch<["x", "y"]>]> {
-  let arguments = (ins AnyType:$x, AnyType:$y);
-}
-
-def OperandZeroAndResultHaveSameType :
-    TEST_Op<"operand0_and_result_have_same_type",
-            [AllTypesMatch<["x", "res"]>]> {
-  let arguments = (ins AnyType:$x, AnyType:$y);
-  let results = (outs AnyType:$res);
-}
-
-def OperandsHaveSameRank :
-    TEST_Op<"operands_have_same_rank", [AllRanksMatch<["x", "y"]>]> {
-  let arguments = (ins AnyShaped:$x, AnyShaped:$y);
-}
-
-def OperandZeroAndResultHaveSameRank :
-    TEST_Op<"operand0_and_result_have_same_rank",
-            [AllRanksMatch<["x", "res"]>]> {
-  let arguments = (ins AnyShaped:$x, AnyShaped:$y);
-  let results = (outs AnyShaped:$res);
-}
-
-def OperandZeroAndResultHaveSameShape :
-    TEST_Op<"operand0_and_result_have_same_shape",
-            [AllShapesMatch<["x", "res"]>]> {
-  let arguments = (ins AnyShaped:$x, AnyShaped:$y);
-  let results = (outs AnyShaped:$res);
-}
-
-def OperandZeroAndResultHaveSameElementCount :
-    TEST_Op<"operand0_and_result_have_same_element_count",
-            [AllElementCountsMatch<["x", "res"]>]> {
-  let arguments = (ins AnyShaped:$x, AnyShaped:$y);
-  let results = (outs AnyShaped:$res);
-}
-
-def FourEqualsFive :
-    TEST_Op<"four_equals_five", [AllMatch<["5", "4"], "4 equals 5">]>;
-
-def OperandRankEqualsResultSize :
-    TEST_Op<"operand_rank_equals_result_size",
-            [AllMatch<[Rank<"operand">.result, ElementCount<"result">.result],
-                      "operand rank equals result size">]> {
-  let arguments = (ins AnyShaped:$operand);
-  let results = (outs AnyShaped:$result);
-}
-
-def IfFirstOperandIsNoneThenSoIsSecond :
-    TEST_Op<"if_first_operand_is_none_then_so_is_second", [PredOpTrait<
-    "has either both none type operands or first is not none",
-     Or<[
-        And<[TypeIsPred<"x", NoneType>, TypeIsPred<"y", NoneType>]>,
-        Neg<TypeIsPred<"x", NoneType>>]>>]> {
-  let arguments = (ins AnyType:$x, AnyType:$y);
-}
-
-def BroadcastableOp : TEST_Op<"broadcastable", [Broadcastable]> {
-  let arguments = (ins AnyTensor, AnyTensor);
-  let results = (outs AnyTensor);
-}
-
-// There the "HasParent" trait.
-def ParentOp : TEST_Op<"parent">;
-def ChildOp : TEST_Op<"child", [HasParent<"ParentOp">]>;
-
-
-def TerminatorOp : TEST_Op<"finish", [Terminator]>;
-def SingleBlockImplicitTerminatorOp : TEST_Op<"SingleBlockImplicitTerminator",
-    [SingleBlockImplicitTerminator<"TerminatorOp">]> {
-  let regions = (region SizedRegion<1>:$region);
-}
-
-def I32ElementsAttrOp : TEST_Op<"i32ElementsAttr"> {
-  let arguments = (ins I32ElementsAttr:$attr);
-}
-
-def OpWithInferTypeInterfaceOp : TEST_Op<"op_with_infer_type_if", [
-    DeclareOpInterfaceMethods<InferTypeOpInterface>,
-    InferTypeOpInterfaceDefault]> {
-  let arguments = (ins AnyTensor, AnyTensor);
-  let results = (outs AnyTensor);
-}
-
-def IsNotScalar : Constraint<CPred<"$0.getType().getRank() != 0">>;
-
-def UpdateAttr : Pat<(I32ElementsAttrOp $attr),
-                     (I32ElementsAttrOp ConstantAttr<I32ElementsAttr, "0">),
-                     [(IsNotScalar $attr)]>;
-
-def TestBranchOp : TEST_Op<"br", [Terminator]> {
-  let arguments = (ins Variadic<AnyType>:$operands);
-}
-
-def AttrSizedOperandOp : TEST_Op<"attr_sized_operands",
-                                 [AttrSizedOperandSegments]> {
-  let arguments = (ins
-    Variadic<I32>:$a,
-    Variadic<I32>:$b,
-    I32:$c,
-    Variadic<I32>:$d,
-    I32ElementsAttr:$operand_segment_sizes
-  );
-}
-
-def AttrSizedResultOp : TEST_Op<"attr_sized_results",
-                                [AttrSizedResultSegments]> {
-  let arguments = (ins
-    I32ElementsAttr:$result_segment_sizes
-  );
-  let results = (outs
-    Variadic<I32>:$a,
-    Variadic<I32>:$b,
-    I32:$c,
-    Variadic<I32>:$d
-  );
-}
-
-//===----------------------------------------------------------------------===//
-// Test Patterns
-//===----------------------------------------------------------------------===//
-
-def OpA : TEST_Op<"op_a"> {
-  let arguments = (ins I32, I32Attr:$attr);
-  let results = (outs I32);
-}
-
-def OpB : TEST_Op<"op_b"> {
-  let arguments = (ins I32, I32Attr:$attr);
-  let results = (outs I32);
-}
-
-// Test named pattern.
-def TestNamedPatternRule : Pat<(OpA $input, $attr), (OpB $input, $attr)>;
-
-// Test with fused location.
-def : Pat<(OpA (OpA $input, $attr), $bttr), (OpB $input, $bttr)>;
-
-// Test added benefit.
-def OpD : TEST_Op<"op_d">, Arguments<(ins I32)>, Results<(outs I32)>;
-def OpE : TEST_Op<"op_e">, Arguments<(ins I32)>, Results<(outs I32)>;
-def OpF : TEST_Op<"op_f">, Arguments<(ins I32)>, Results<(outs I32)>;
-def OpG : TEST_Op<"op_g">, Arguments<(ins I32)>, Results<(outs I32)>;
-// Verify that bumping benefit results in selecting different op.
-def : Pat<(OpD $input), (OpE $input)>;
-def : Pat<(OpD $input), (OpF $input), [], (addBenefit 10)>;
-// Verify that patterns with more source nodes are selected before those with fewer.
-def : Pat<(OpG $input), (OpB $input, ConstantAttr<I32Attr, "20">:$attr)>;
-def : Pat<(OpG (OpG $input)), (OpB $input, ConstantAttr<I32Attr, "34">:$attr)>;
-
-// Test patterns for zero-result op.
-def OpH : TEST_Op<"op_h">, Arguments<(ins I32)>, Results<(outs)>;
-def OpI : TEST_Op<"op_i">, Arguments<(ins I32)>, Results<(outs)>;
-def : Pat<(OpH $input), (OpI $input)>;
-
-// Test patterns for zero-input op.
-def OpJ : TEST_Op<"op_j">, Arguments<(ins)>, Results<(outs I32)>;
-def OpK : TEST_Op<"op_k">, Arguments<(ins)>, Results<(outs I32)>;
-def : Pat<(OpJ), (OpK)>;
-
-// Test `$_` for ignoring op argument match.
-def TestIgnoreArgMatchSrcOp : TEST_Op<"ignore_arg_match_src"> {
-  let arguments = (ins
-    AnyType:$a, AnyType:$b, AnyType:$c,
-    AnyAttr:$d, AnyAttr:$e, AnyAttr:$f);
-}
-def TestIgnoreArgMatchDstOp : TEST_Op<"ignore_arg_match_dst"> {
-  let arguments = (ins AnyType:$b, AnyAttr:$f);
-}
-def : Pat<(TestIgnoreArgMatchSrcOp $_, $b, I32, I64Attr:$_, $_, $f),
-          (TestIgnoreArgMatchDstOp $b, $f)>;
-
-def OpInterleavedOperandAttribute1 : TEST_Op<"interleaved_operand_attr1"> {
-  let arguments = (ins
-    I32:$input1,
-    I64Attr:$attr1,
-    I32:$input2,
-    I64Attr:$attr2
-  );
-}
-
-def OpInterleavedOperandAttribute2 : TEST_Op<"interleaved_operand_attr2"> {
-  let arguments = (ins
-    I32:$input1,
-    I64Attr:$attr1,
-    I32:$input2,
-    I64Attr:$attr2
-  );
-}
-
-def ManyArgsOp : TEST_Op<"many_arguments"> {
-  let arguments = (ins
-    I32:$input1, I32:$input2, I32:$input3, I32:$input4, I32:$input5,
-    I32:$input6, I32:$input7, I32:$input8, I32:$input9,
-    I64Attr:$attr1, I64Attr:$attr2, I64Attr:$attr3, I64Attr:$attr4,
-    I64Attr:$attr5, I64Attr:$attr6, I64Attr:$attr7, I64Attr:$attr8,
-    I64Attr:$attr9
-  );
-}
-
-// Test that DRR does not blow up when seeing lots of arguments.
-def : Pat<(ManyArgsOp
-            $input1, $input2, $input3, $input4, $input5,
-            $input6, $input7, $input8, $input9,
-            ConstantAttr<I64Attr, "42">,
-            $attr2, $attr3, $attr4, $attr5, $attr6,
-            $attr7, $attr8, $attr9),
-          (ManyArgsOp
-            $input1, $input2, $input3, $input4, $input5,
-            $input6, $input7, $input8, $input9,
-            ConstantAttr<I64Attr, "24">,
-            $attr2, $attr3, $attr4, $attr5, $attr6,
-            $attr7, $attr8, $attr9)>;
-
-// Test that we can capture and reference interleaved operands and attributes.
-def : Pat<(OpInterleavedOperandAttribute1 $input1, $attr1, $input2, $attr2),
-          (OpInterleavedOperandAttribute2 $input1, $attr1, $input2, $attr2)>;
-
-// Test NativeCodeCall.
-def OpNativeCodeCall1 : TEST_Op<"native_code_call1"> {
-  let arguments = (ins
-    I32:$input1, I32:$input2,
-    BoolAttr:$choice,
-    I64Attr:$attr1, I64Attr:$attr2
-  );
-  let results = (outs I32);
-}
-def OpNativeCodeCall2 : TEST_Op<"native_code_call2"> {
-  let arguments = (ins I32:$input, I64ArrayAttr:$attr);
-  let results = (outs I32);
-}
-// Native code call to invoke a C++ function
-def CreateOperand: NativeCodeCall<"chooseOperand($0, $1, $2)">;
-// Native code call to invoke a C++ expression
-def CreateArrayAttr: NativeCodeCall<"$_builder.getArrayAttr({$0, $1})">;
-// Test that we can use NativeCodeCall to create operand and attribute.
-// This pattern chooses between $input1 and $input2 according to $choice and
-// it combines $attr1 and $attr2 into an array attribute.
-def : Pat<(OpNativeCodeCall1 $input1, $input2,
-                             ConstBoolAttrTrue:$choice, $attr1, $attr2),
-          (OpNativeCodeCall2 (CreateOperand $input1, $input2, $choice),
-                             (CreateArrayAttr $attr1, $attr2))>;
-// Note: the following is just for testing purpose.
-// Should use the replaceWithValue directive instead.
-def UseOpResult: NativeCodeCall<"$0">;
-// Test that we can use NativeCodeCall to create result.
-def : Pat<(OpNativeCodeCall1 $input1, $input2,
-                             ConstBoolAttrFalse, $attr1, $attr2),
-          (UseOpResult $input2)>;
-
-def OpNativeCodeCall3 : TEST_Op<"native_code_call3"> {
-  let arguments = (ins I32:$input);
-  let results = (outs I32);
-}
-// Test that NativeCodeCall is not ignored if it is not used to directly
-// replace the matched root op.
-def : Pattern<(OpNativeCodeCall3 $input),
-              [(NativeCodeCall<"createOpI($_builder, $0)"> $input), (OpK)]>;
-
-// Test AllAttrConstraintsOf.
-def OpAllAttrConstraint1 : TEST_Op<"all_attr_constraint_of1"> {
-  let arguments = (ins I64ArrayAttr:$attr);
-  let results = (outs I32);
-}
-def OpAllAttrConstraint2 : TEST_Op<"all_attr_constraint_of2"> {
-  let arguments = (ins I64ArrayAttr:$attr);
-  let results = (outs I32);
-}
-def Constraint0 : AttrConstraint<
-    CPred<"$_self.cast<ArrayAttr>().getValue()[0]."
-          "cast<IntegerAttr>().getInt() == 0">,
-    "[0] == 0">;
-def Constraint1 : AttrConstraint<
-    CPred<"$_self.cast<ArrayAttr>().getValue()[1]."
-          "cast<IntegerAttr>().getInt() == 1">,
-    "[1] == 1">;
-def : Pat<(OpAllAttrConstraint1
-            AllAttrConstraintsOf<[Constraint0, Constraint1]>:$attr),
-          (OpAllAttrConstraint2 $attr)>;
-
-// Op for testing RewritePattern removing op with inner ops.
-def TestOpWithRegionPattern : TEST_Op<"op_with_region_pattern"> {
-  let regions = (region SizedRegion<1>:$region);
-  let hasCanonicalizer = 1;
-}
-
-// Op for testing trivial removal via folding of op with inner ops and no uses.
-def TestOpWithRegionFoldNoSideEffect : TEST_Op<
-    "op_with_region_fold_no_side_effect", [NoSideEffect]> {
-  let regions = (region SizedRegion<1>:$region);
-}
-
-// Op for testing folding of outer op with inner ops.
-def TestOpWithRegionFold : TEST_Op<"op_with_region_fold"> {
-  let arguments = (ins I32:$operand);
-  let results = (outs I32);
-  let regions = (region SizedRegion<1>:$region);
-  let hasFolder = 1;
-}
-
-def TestOpWithVariadicResultsAndFolder: TEST_Op<"op_with_variadic_results_and_folder"> {
-  let arguments = (ins Variadic<I32>:$operands);
-  let results = (outs Variadic<I32>);
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Test Patterns (Symbol Binding)
-
-// Test symbol binding.
-def OpSymbolBindingA : TEST_Op<"symbol_binding_a", []> {
-  let arguments = (ins I32:$operand, I64Attr:$attr);
-  let results = (outs I32);
-}
-def OpSymbolBindingB : TEST_Op<"symbol_binding_b", []> {
-  let arguments = (ins I32:$operand);
-  let results = (outs I32);
-
-  let builders = [
-    OpBuilder<
-      "Builder *builder, OperationState &state, Value *operand",
-      [{
-        state.types.assign({builder->getIntegerType(32)});
-        state.addOperands({operand});
-      }]>
-  ];
-}
-def OpSymbolBindingC : TEST_Op<"symbol_binding_c", []> {
-  let arguments = (ins I32:$operand);
-  let results = (outs I32);
-  let builders = OpSymbolBindingB.builders;
-}
-def OpSymbolBindingD : TEST_Op<"symbol_binding_d", []> {
-  let arguments = (ins I32:$input1, I32:$input2, I64Attr:$attr);
-  let results = (outs I32);
-}
-def HasOneUse: Constraint<CPred<"$0->hasOneUse()">, "has one use">;
-def : Pattern<
-    // Bind to source pattern op operand/attribute/result
-    (OpSymbolBindingA:$res_a $operand, $attr), [
-        // Bind to auxiliary op result
-        (OpSymbolBindingC:$res_c (OpSymbolBindingB:$res_b $operand)),
-
-        // Use bound symbols in resultant ops
-        (OpSymbolBindingD $res_b, $res_c, $attr)],
-    // Use bound symbols in additional constraints
-    [(HasOneUse $res_a)]>;
-
-def OpSymbolBindingNoResult : TEST_Op<"symbol_binding_no_result", []> {
-  let arguments = (ins I32:$operand);
-}
-
-// Test that we can bind to an op without results and reference it later.
-def : Pat<(OpSymbolBindingNoResult:$op $operand),
-          (NativeCodeCall<"handleNoResultOp($_builder, $0)"> $op)>;
-
-//===----------------------------------------------------------------------===//
-// Test Patterns (Attributes)
-
-// Test matching against op attributes.
-def OpAttrMatch1 : TEST_Op<"match_op_attribute1"> {
-  let arguments = (ins
-    I32Attr:$required_attr,
-    OptionalAttr<I32Attr>:$optional_attr,
-    DefaultValuedAttr<I32Attr, "42">:$default_valued_attr,
-    I32Attr:$more_attr
-  );
-  let results = (outs I32);
-}
-def OpAttrMatch2 : TEST_Op<"match_op_attribute2"> {
-  let arguments = OpAttrMatch1.arguments;
-  let results = (outs I32);
-}
-def MoreConstraint : AttrConstraint<
-    CPred<"$_self.cast<IntegerAttr>().getInt() == 4">, "more constraint">;
-def : Pat<(OpAttrMatch1 $required, $optional, $default_valued,
-                        MoreConstraint:$more),
-          (OpAttrMatch2 $required, $optional, $default_valued, $more)>;
-
-// Test unit attrs.
-def OpAttrMatch3 : TEST_Op<"match_op_attribute3"> {
-  let arguments = (ins UnitAttr:$attr);
-  let results = (outs I32);
-}
-def OpAttrMatch4 : TEST_Op<"match_op_attribute4"> {
-  let arguments = (ins UnitAttr:$attr1, UnitAttr:$attr2);
-  let results = (outs I32);
-}
-def : Pat<(OpAttrMatch3 $attr), (OpAttrMatch4 ConstUnitAttr, $attr)>;
-
-// Test with constant attr.
-def OpC : TEST_Op<"op_c">, Arguments<(ins I32)>, Results<(outs I32)>;
-def : Pat<(OpC $input), (OpB $input, ConstantAttr<I32Attr, "17">:$attr)>;
-
-// Test string enum attribute in rewrites.
-def : Pat<(StrEnumAttrOp StrCaseA), (StrEnumAttrOp StrCaseB)>;
-// Test integer enum attribute in rewrites.
-def : Pat<(I32EnumAttrOp I32Case5), (I32EnumAttrOp I32Case10)>;
-def : Pat<(I64EnumAttrOp I64Case5), (I64EnumAttrOp I64Case10)>;
-
-//===----------------------------------------------------------------------===//
-// Test Patterns (Multi-result Ops)
-
-def MultiResultOpKind1: I64EnumAttrCase<"kind1", 1>;
-def MultiResultOpKind2: I64EnumAttrCase<"kind2", 2>;
-def MultiResultOpKind3: I64EnumAttrCase<"kind3", 3>;
-def MultiResultOpKind4: I64EnumAttrCase<"kind4", 4>;
-def MultiResultOpKind5: I64EnumAttrCase<"kind5", 5>;
-def MultiResultOpKind6: I64EnumAttrCase<"kind6", 6>;
-
-def MultiResultOpEnum: I64EnumAttr<
-  "MultiResultOpEnum", "Multi-result op kinds", [
-    MultiResultOpKind1, MultiResultOpKind2, MultiResultOpKind3,
-    MultiResultOpKind4, MultiResultOpKind5, MultiResultOpKind6
-  ]>;
-
-def ThreeResultOp : TEST_Op<"three_result"> {
-  let arguments = (ins MultiResultOpEnum:$kind);
-  let results = (outs I32:$result1, F32:$result2, F32:$result3);
-}
-
-def AnotherThreeResultOp : TEST_Op<"another_three_result"> {
-  let arguments = (ins MultiResultOpEnum:$kind);
-  let results = (outs I32:$result1, F32:$result2, F32:$result3);
-}
-
-def TwoResultOp : TEST_Op<"two_result"> {
-  let arguments = (ins MultiResultOpEnum:$kind);
-  let results = (outs I32:$result1, F32:$result2);
-
-  let builders = [
-    OpBuilder<
-      "Builder *builder, OperationState &state, IntegerAttr kind",
-      [{
-        auto i32 = builder->getIntegerType(32);
-        auto f32 = builder->getF32Type();
-        state.types.assign({i32, f32});
-        state.addAttribute("kind", kind);
-      }]>
-  ];
-}
-
-def AnotherTwoResultOp : TEST_Op<"another_two_result"> {
-  let arguments = (ins MultiResultOpEnum:$kind);
-  let results = (outs F32:$result1, F32:$result2);
-}
-
-def OneResultOp1 : TEST_Op<"one_result1"> {
-  let arguments = (ins MultiResultOpEnum:$kind);
-  let results = (outs F32:$result1);
-}
-
-def OneResultOp2 : TEST_Op<"one_result2"> {
-  let arguments = (ins MultiResultOpEnum:$kind);
-  let results = (outs I32:$result1);
-}
-
-def OneResultOp3 : TEST_Op<"one_result3"> {
-  let arguments = (ins F32);
-  let results = (outs I32:$result1);
-}
-
-// Test using multi-result op as a whole
-def : Pat<(ThreeResultOp MultiResultOpKind1),
-          (AnotherThreeResultOp MultiResultOpKind1)>;
-
-// Test using multi-result op as a whole for partial replacement
-def : Pattern<(ThreeResultOp MultiResultOpKind2),
-              [(TwoResultOp MultiResultOpKind2),
-               (OneResultOp1 MultiResultOpKind2)]>;
-def : Pattern<(ThreeResultOp MultiResultOpKind3),
-              [(OneResultOp2 MultiResultOpKind3),
-               (AnotherTwoResultOp MultiResultOpKind3)]>;
-
-// Test using results separately in a multi-result op
-def : Pattern<(ThreeResultOp MultiResultOpKind4),
-              [(TwoResultOp:$res1__0 MultiResultOpKind4),
-               (OneResultOp1 MultiResultOpKind4),
-               (TwoResultOp:$res2__1 MultiResultOpKind4)]>;
-
-// Test referencing a single value in the value pack
-// This rule only matches TwoResultOp if its second result has no use.
-def : Pattern<(TwoResultOp:$res MultiResultOpKind5),
-              [(OneResultOp2 MultiResultOpKind5),
-               (OneResultOp1 MultiResultOpKind5)],
-              [(HasNoUseOf:$res__1)]>;
-
-// Test using auxiliary ops for replacing multi-result op
-def : Pattern<
-    (ThreeResultOp MultiResultOpKind6), [
-        // Auxiliary op generated to help building the final result but not
-        // directly used to replace the source op's results.
-        (TwoResultOp:$interm MultiResultOpKind6),
-
-        (OneResultOp3 $interm__1),
-        (AnotherTwoResultOp MultiResultOpKind6)
-    ]>;
-
-//===----------------------------------------------------------------------===//
-// Test Patterns (Variadic Ops)
-
-def OneVResOneVOperandOp1 : TEST_Op<"one_variadic_out_one_variadic_in1"> {
-  let arguments = (ins Variadic<I32>);
-  let results = (outs Variadic<I32>);
-}
-def OneVResOneVOperandOp2 : TEST_Op<"one_variadic_out_one_variadic_in2"> {
-  let arguments = (ins Variadic<I32>);
-  let results = (outs Variadic<I32>);
-}
-
-// Rewrite an op with one variadic operand and one variadic result to
-// another similar op.
-def : Pat<(OneVResOneVOperandOp1 $inputs), (OneVResOneVOperandOp2 $inputs)>;
-
-def MixedVOperandOp1 : TEST_Op<"mixed_variadic_in1",
-                               [SameVariadicOperandSize]> {
-  let arguments = (ins
-    Variadic<I32>:$input1,
-    F32:$input2,
-    Variadic<I32>:$input3
-  );
-}
-
-def MixedVOperandOp2 : TEST_Op<"mixed_variadic_in2",
-                               [SameVariadicOperandSize]> {
-  let arguments = (ins
-    Variadic<I32>:$input1,
-    F32:$input2,
-    Variadic<I32>:$input3
-  );
-}
-
-// Rewrite an op with both variadic operands and normal operands.
-def : Pat<(MixedVOperandOp1 $input1, $input2, $input3),
-          (MixedVOperandOp2 $input1, $input2, $input3)>;
-
-def MixedVResultOp1 : TEST_Op<"mixed_variadic_out1", [SameVariadicResultSize]> {
-  let results = (outs
-    Variadic<I32>:$output1,
-    F32:$output2,
-    Variadic<I32>:$output3
-  );
-}
-
-def MixedVResultOp2 : TEST_Op<"mixed_variadic_out2", [SameVariadicResultSize]> {
-  let results = (outs
-    Variadic<I32>:$output1,
-    F32:$output2,
-    Variadic<I32>:$output3
-  );
-}
-
-// Rewrite an op with both variadic results and normal results.
-// Note that because we are generating the op with a top-level result pattern,
-// we are able to deduce the correct result types for the generated op using
-// the information from the matched root op.
-def : Pat<(MixedVResultOp1), (MixedVResultOp2)>;
-
-def OneI32ResultOp : TEST_Op<"one_i32_out"> {
-  let results = (outs I32);
-}
-
-def MixedVOperandOp3 : TEST_Op<"mixed_variadic_in3",
-                               [SameVariadicOperandSize]> {
-  let arguments = (ins
-    I32:$input1,
-    Variadic<I32>:$input2,
-    Variadic<I32>:$input3,
-    I32Attr:$count
-  );
-
-  let results = (outs I32);
-}
-
-def MixedVResultOp3 : TEST_Op<"mixed_variadic_out3",
-                               [SameVariadicResultSize]> {
-  let arguments = (ins I32Attr:$count);
-
-  let results = (outs
-    I32:$output1,
-    Variadic<I32>:$output2,
-    Variadic<I32>:$output3
-  );
-
-  // We will use this op in a nested result pattern, where we cannot deduce the
-  // result type. So need to provide a builder not requiring result types.
-  let builders = [
-    OpBuilder<
-      "Builder *builder, OperationState &state, IntegerAttr count",
-      [{
-        auto i32Type = builder->getIntegerType(32);
-        state.addTypes(i32Type); // $output1
-        SmallVector<Type, 4> types(count.getInt(), i32Type);
-        state.addTypes(types); // $output2
-        state.addTypes(types); // $output3
-        state.addAttribute("count", count);
-      }]>
-  ];
-}
-
-// Generates an op with variadic results using nested pattern.
-def : Pat<(OneI32ResultOp),
-          (MixedVOperandOp3
-              (MixedVResultOp3:$results__0 ConstantAttr<I32Attr, "2">),
-              (replaceWithValue $results__1),
-              (replaceWithValue $results__2),
-              ConstantAttr<I32Attr, "2">)>;
-
-//===----------------------------------------------------------------------===//
-// Test Legalization
-//===----------------------------------------------------------------------===//
-
-def Test_LegalizerEnum_Success : StrEnumAttrCase<"Success">;
-def Test_LegalizerEnum_Failure : StrEnumAttrCase<"Failure">;
-
-def Test_LegalizerEnum : StrEnumAttr<"Success", "Failure",
-  [Test_LegalizerEnum_Success, Test_LegalizerEnum_Failure]>;
-
-def ILLegalOpA : TEST_Op<"illegal_op_a">, Results<(outs I32)>;
-def ILLegalOpB : TEST_Op<"illegal_op_b">, Results<(outs I32)>;
-def ILLegalOpC : TEST_Op<"illegal_op_c">, Results<(outs I32)>;
-def ILLegalOpD : TEST_Op<"illegal_op_d">, Results<(outs I32)>;
-def ILLegalOpE : TEST_Op<"illegal_op_e">, Results<(outs I32)>;
-def ILLegalOpF : TEST_Op<"illegal_op_f">, Results<(outs I32)>;
-def LegalOpA : TEST_Op<"legal_op_a">,
-  Arguments<(ins Test_LegalizerEnum:$status)>, Results<(outs I32)>;
-def LegalOpB : TEST_Op<"legal_op_b">, Results<(outs I32)>;
-
-// Check that smaller pattern depths are chosen, i.e. prioritize more direct
-// mappings.
-def : Pat<(ILLegalOpA), (LegalOpA Test_LegalizerEnum_Success)>;
-
-def : Pat<(ILLegalOpA), (ILLegalOpB)>;
-def : Pat<(ILLegalOpB), (LegalOpA Test_LegalizerEnum_Failure)>;
-
-// Check that the higher benefit pattern is taken for multiple legalizations
-// with the same depth.
-def : Pat<(ILLegalOpC), (ILLegalOpD)>;
-def : Pat<(ILLegalOpD), (LegalOpA Test_LegalizerEnum_Failure)>;
-
-def : Pat<(ILLegalOpC), (ILLegalOpE), [], (addBenefit 10)>;
-def : Pat<(ILLegalOpE), (LegalOpA Test_LegalizerEnum_Success)>;
-
-// Check that patterns use the most up-to-date value when being replaced.
-def TestRewriteOp : TEST_Op<"rewrite">,
-  Arguments<(ins AnyType)>, Results<(outs AnyType)>;
-def : Pat<(TestRewriteOp $input), (replaceWithValue $input)>;
-
-//===----------------------------------------------------------------------===//
-// Test Type Legalization
-//===----------------------------------------------------------------------===//
-
-def TestRegionBuilderOp : TEST_Op<"region_builder">;
-def TestReturnOp : TEST_Op<"return", [Terminator]>,
-  Arguments<(ins Variadic<AnyType>)>;
-def TestCastOp : TEST_Op<"cast">,
-  Arguments<(ins Variadic<AnyType>)>, Results<(outs AnyType)>;
-def TestInvalidOp : TEST_Op<"invalid", [Terminator]>,
-  Arguments<(ins Variadic<AnyType>)>;
-def TestTypeProducerOp : TEST_Op<"type_producer">,
-  Results<(outs AnyType)>;
-def TestTypeConsumerOp : TEST_Op<"type_consumer">,
-  Arguments<(ins AnyType)>;
-def TestValidOp : TEST_Op<"valid", [Terminator]>,
-  Arguments<(ins Variadic<AnyType>)>;
-
-//===----------------------------------------------------------------------===//
-// Test parser.
-//===----------------------------------------------------------------------===//
-
-def WrappedKeywordOp : TEST_Op<"wrapped_keyword"> {
-  let arguments = (ins StrAttr:$keyword);
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-  let printer = [{ return ::print(p, *this); }];
-}
-
-//===----------------------------------------------------------------------===//
-// Test region argument list parsing.
-
-def IsolatedRegionOp : TEST_Op<"isolated_region", [IsolatedFromAbove]> {
-  let summary =  "isolated region operation";
-  let description = [{
-    Test op with an isolated region, to test passthrough region arguments. Each
-    argument is of index type.
-  }];
-
-  let arguments = (ins Index);
-  let regions = (region SizedRegion<1>:$region);
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-  let printer = [{ return ::print(p, *this); }];
-}
-
-def WrappingRegionOp : TEST_Op<"wrapping_region",
-    [SingleBlockImplicitTerminator<"TestReturnOp">]> {
-  let summary =  "wrapping region operation";
-  let description = [{
-    Test op wrapping another op in a region, to test calling
-    parseGenericOperation from the custom parser.
-  }];
-
-  let results = (outs Variadic<AnyType>);
-  let regions = (region SizedRegion<1>:$region);
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-  let printer = [{ return ::print(p, *this); }];
-}
-
-def PolyForOp : TEST_Op<"polyfor">
-{
-  let summary =  "polyfor operation";
-  let description = [{
-    Test op with multiple region arguments, each argument of index type.
-  }];
-
-  let regions = (region SizedRegion<1>:$region);
-  let parser = [{ return ::parse$cppClass(parser, result); }];
-}
-
-//===----------------------------------------------------------------------===//
-// Test OpAsmInterface.
-
-def AsmInterfaceOp : TEST_Op<"asm_interface_op"> {
-  let results = (outs AnyType:$first, Variadic<AnyType>:$middle_results,
-                      AnyType);
-}
-
-def AsmDialectInterfaceOp : TEST_Op<"asm_dialect_interface_op"> {
-  let results = (outs AnyType);
-}
-
-#endif // TEST_OPS
diff --git a/third_party/mlir/test/lib/TestDialect/TestPatterns.cpp b/third_party/mlir/test/lib/TestDialect/TestPatterns.cpp
deleted file mode 100644
index 94eb792cc66..00000000000
--- a/third_party/mlir/test/lib/TestDialect/TestPatterns.cpp
+++ /dev/null
@@ -1,513 +0,0 @@
-//===- TestPatterns.cpp - Test dialect pattern driver ---------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "TestDialect.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/DialectConversion.h"
-using namespace mlir;
-
-// Native function for testing NativeCodeCall
-static Value *chooseOperand(Value *input1, Value *input2, BoolAttr choice) {
-  return choice.getValue() ? input1 : input2;
-}
-
-static void createOpI(PatternRewriter &rewriter, Value *input) {
-  rewriter.create<OpI>(rewriter.getUnknownLoc(), input);
-}
-
-void handleNoResultOp(PatternRewriter &rewriter, OpSymbolBindingNoResult op) {
-  // Turn the no result op to a one-result op.
-  rewriter.create<OpSymbolBindingB>(op.getLoc(), op.operand()->getType(),
-                                    op.operand());
-}
-
-namespace {
-#include "TestPatterns.inc"
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// Canonicalizer Driver.
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct TestPatternDriver : public FunctionPass<TestPatternDriver> {
-  void runOnFunction() override {
-    mlir::OwningRewritePatternList patterns;
-    populateWithGenerated(&getContext(), &patterns);
-
-    // Verify named pattern is generated with expected name.
-    patterns.insert<TestNamedPatternRule>(&getContext());
-
-    applyPatternsGreedily(getFunction(), patterns);
-  }
-};
-} // end anonymous namespace
-
-static mlir::PassRegistration<TestPatternDriver>
-    pass("test-patterns", "Run test dialect patterns");
-
-//===----------------------------------------------------------------------===//
-// ReturnType Driver.
-//===----------------------------------------------------------------------===//
-
-struct ReturnTypeOpMatch : public RewritePattern {
-  ReturnTypeOpMatch(MLIRContext *ctx)
-      : RewritePattern(OpWithInferTypeInterfaceOp::getOperationName(), 1, ctx) {
-  }
-
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const final {
-    if (auto retTypeFn = dyn_cast<InferTypeOpInterface>(op)) {
-      SmallVector<Value *, 4> values(op->getOperands());
-      SmallVector<Type, 2> inferedReturnTypes;
-      if (failed(retTypeFn.inferReturnTypes(op->getLoc(), values,
-                                            op->getAttrs(), op->getRegions(),
-                                            inferedReturnTypes)))
-        return matchFailure();
-      SmallVector<Type, 1> resultTypes(op->getResultTypes());
-      if (!retTypeFn.isCompatibleReturnTypes(inferedReturnTypes, resultTypes))
-        return op->emitOpError(
-                   "inferred type incompatible with return type of operation"),
-               matchFailure();
-
-      // TODO(jpienaar): Split this out to make the test more focused.
-      // Create new op with unknown location to verify building with
-      // InferTypeOpInterface is triggered.
-      auto fop = op->getParentOfType<FuncOp>();
-      if (values[0] == fop.getArgument(0)) {
-        // Use the 2nd function argument if the first function argument is used
-        // when constructing the new op so that a new return type is inferred.
-        values[0] = fop.getArgument(1);
-        values[1] = fop.getArgument(1);
-        // TODO(jpienaar): Expand to regions.
-        rewriter.create<OpWithInferTypeInterfaceOp>(
-            UnknownLoc::get(op->getContext()), values, op->getAttrs());
-      }
-    }
-    return matchFailure();
-  }
-};
-
-namespace {
-struct TestReturnTypeDriver : public FunctionPass<TestReturnTypeDriver> {
-  void runOnFunction() override {
-    mlir::OwningRewritePatternList patterns;
-    populateWithGenerated(&getContext(), &patterns);
-    patterns.insert<ReturnTypeOpMatch>(&getContext());
-    applyPatternsGreedily(getFunction(), patterns);
-  }
-};
-} // end anonymous namespace
-
-static mlir::PassRegistration<TestReturnTypeDriver>
-    rt_pass("test-return-type", "Run return type functions");
-
-//===----------------------------------------------------------------------===//
-// Legalization Driver.
-//===----------------------------------------------------------------------===//
-
-namespace {
-//===----------------------------------------------------------------------===//
-// Region-Block Rewrite Testing
-
-/// This pattern is a simple pattern that inlines the first region of a given
-/// operation into the parent region.
-struct TestRegionRewriteBlockMovement : public ConversionPattern {
-  TestRegionRewriteBlockMovement(MLIRContext *ctx)
-      : ConversionPattern("test.region", 1, ctx) {}
-
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    // Inline this region into the parent region.
-    auto &parentRegion = *op->getParentRegion();
-    if (op->getAttr("legalizer.should_clone"))
-      rewriter.cloneRegionBefore(op->getRegion(0), parentRegion,
-                                 parentRegion.end());
-    else
-      rewriter.inlineRegionBefore(op->getRegion(0), parentRegion,
-                                  parentRegion.end());
-
-    // Drop this operation.
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-/// This pattern is a simple pattern that generates a region containing an
-/// illegal operation.
-struct TestRegionRewriteUndo : public RewritePattern {
-  TestRegionRewriteUndo(MLIRContext *ctx)
-      : RewritePattern("test.region_builder", 1, ctx) {}
-
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const final {
-    // Create the region operation with an entry block containing arguments.
-    OperationState newRegion(op->getLoc(), "test.region");
-    newRegion.addRegion();
-    auto *regionOp = rewriter.createOperation(newRegion);
-    auto *entryBlock = rewriter.createBlock(&regionOp->getRegion(0));
-    entryBlock->addArgument(rewriter.getIntegerType(64));
-
-    // Add an explicitly illegal operation to ensure the conversion fails.
-    rewriter.create<ILLegalOpF>(op->getLoc(), rewriter.getIntegerType(32));
-    rewriter.create<TestValidOp>(op->getLoc(), ArrayRef<Value *>());
-
-    // Drop this operation.
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Type-Conversion Rewrite Testing
-
-/// This patterns erases a region operation that has had a type conversion.
-struct TestDropOpSignatureConversion : public ConversionPattern {
-  TestDropOpSignatureConversion(MLIRContext *ctx, TypeConverter &converter)
-      : ConversionPattern("test.drop_region_op", 1, ctx), converter(converter) {
-  }
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    Region &region = op->getRegion(0);
-    Block *entry = &region.front();
-
-    // Convert the original entry arguments.
-    TypeConverter::SignatureConversion result(entry->getNumArguments());
-    for (unsigned i = 0, e = entry->getNumArguments(); i != e; ++i)
-      if (failed(converter.convertSignatureArg(
-              i, entry->getArgument(i)->getType(), result)))
-        return matchFailure();
-
-    // Convert the region signature and just drop the operation.
-    rewriter.applySignatureConversion(&region, result);
-    rewriter.eraseOp(op);
-    return matchSuccess();
-  }
-
-  /// The type converter to use when rewriting the signature.
-  TypeConverter &converter;
-};
-/// This pattern simply updates the operands of the given operation.
-struct TestPassthroughInvalidOp : public ConversionPattern {
-  TestPassthroughInvalidOp(MLIRContext *ctx)
-      : ConversionPattern("test.invalid", 1, ctx) {}
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    rewriter.replaceOpWithNewOp<TestValidOp>(op, llvm::None, operands,
-                                             llvm::None);
-    return matchSuccess();
-  }
-};
-/// This pattern handles the case of a split return value.
-struct TestSplitReturnType : public ConversionPattern {
-  TestSplitReturnType(MLIRContext *ctx)
-      : ConversionPattern("test.return", 1, ctx) {}
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    // Check for a return of F32.
-    if (op->getNumOperands() != 1 || !op->getOperand(0)->getType().isF32())
-      return matchFailure();
-
-    // Check if the first operation is a cast operation, if it is we use the
-    // results directly.
-    auto *defOp = operands[0]->getDefiningOp();
-    if (auto packerOp = llvm::dyn_cast_or_null<TestCastOp>(defOp)) {
-      rewriter.replaceOpWithNewOp<TestReturnOp>(op, packerOp.getOperands());
-      return matchSuccess();
-    }
-
-    // Otherwise, fail to match.
-    return matchFailure();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Multi-Level Type-Conversion Rewrite Testing
-struct TestChangeProducerTypeI32ToF32 : public ConversionPattern {
-  TestChangeProducerTypeI32ToF32(MLIRContext *ctx)
-      : ConversionPattern("test.type_producer", 1, ctx) {}
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    // If the type is I32, change the type to F32.
-    if (!(*op->result_type_begin()).isInteger(32))
-      return matchFailure();
-    rewriter.replaceOpWithNewOp<TestTypeProducerOp>(op, rewriter.getF32Type());
-    return matchSuccess();
-  }
-};
-struct TestChangeProducerTypeF32ToF64 : public ConversionPattern {
-  TestChangeProducerTypeF32ToF64(MLIRContext *ctx)
-      : ConversionPattern("test.type_producer", 1, ctx) {}
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    // If the type is F32, change the type to F64.
-    if (!(*op->result_type_begin()).isF32())
-      return matchFailure();
-    rewriter.replaceOpWithNewOp<TestTypeProducerOp>(op, rewriter.getF64Type());
-    return matchSuccess();
-  }
-};
-struct TestChangeProducerTypeF32ToInvalid : public ConversionPattern {
-  TestChangeProducerTypeF32ToInvalid(MLIRContext *ctx)
-      : ConversionPattern("test.type_producer", 10, ctx) {}
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    // Always convert to B16, even though it is not a legal type. This tests
-    // that values are unmapped correctly.
-    rewriter.replaceOpWithNewOp<TestTypeProducerOp>(op, rewriter.getBF16Type());
-    return matchSuccess();
-  }
-};
-struct TestUpdateConsumerType : public ConversionPattern {
-  TestUpdateConsumerType(MLIRContext *ctx)
-      : ConversionPattern("test.type_consumer", 1, ctx) {}
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const final {
-    // Verify that the incoming operand has been successfully remapped to F64.
-    if (!operands[0]->getType().isF64())
-      return matchFailure();
-    rewriter.replaceOpWithNewOp<TestTypeConsumerOp>(op, operands[0]);
-    return matchSuccess();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Non-Root Replacement Rewrite Testing
-/// This pattern generates an invalid operation, but replaces it before the
-/// pattern is finished. This checks that we don't need to legalize the
-/// temporary op.
-struct TestNonRootReplacement : public RewritePattern {
-  TestNonRootReplacement(MLIRContext *ctx)
-      : RewritePattern("test.replace_non_root", 1, ctx) {}
-
-  PatternMatchResult matchAndRewrite(Operation *op,
-                                     PatternRewriter &rewriter) const final {
-    auto resultType = *op->result_type_begin();
-    auto illegalOp = rewriter.create<ILLegalOpF>(op->getLoc(), resultType);
-    auto legalOp = rewriter.create<LegalOpB>(op->getLoc(), resultType);
-
-    rewriter.replaceOp(illegalOp, {legalOp});
-    rewriter.replaceOp(op, {illegalOp});
-    return matchSuccess();
-  }
-};
-} // namespace
-
-namespace {
-struct TestTypeConverter : public TypeConverter {
-  using TypeConverter::TypeConverter;
-
-  LogicalResult convertType(Type t, SmallVectorImpl<Type> &results) override {
-    // Drop I16 types.
-    if (t.isInteger(16))
-      return success();
-
-    // Convert I64 to F64.
-    if (t.isInteger(64)) {
-      results.push_back(FloatType::getF64(t.getContext()));
-      return success();
-    }
-
-    // Split F32 into F16,F16.
-    if (t.isF32()) {
-      results.assign(2, FloatType::getF16(t.getContext()));
-      return success();
-    }
-
-    // Otherwise, convert the type directly.
-    results.push_back(t);
-    return success();
-  }
-
-  /// Override the hook to materialize a conversion. This is necessary because
-  /// we generate 1->N type mappings.
-  Operation *materializeConversion(PatternRewriter &rewriter, Type resultType,
-                                   ArrayRef<Value *> inputs,
-                                   Location loc) override {
-    return rewriter.create<TestCastOp>(loc, resultType, inputs);
-  }
-};
-
-struct TestLegalizePatternDriver
-    : public ModulePass<TestLegalizePatternDriver> {
-  /// The mode of conversion to use with the driver.
-  enum class ConversionMode { Analysis, Full, Partial };
-
-  TestLegalizePatternDriver(ConversionMode mode) : mode(mode) {}
-
-  void runOnModule() override {
-    TestTypeConverter converter;
-    mlir::OwningRewritePatternList patterns;
-    populateWithGenerated(&getContext(), &patterns);
-    patterns
-        .insert<TestRegionRewriteBlockMovement, TestRegionRewriteUndo,
-                TestPassthroughInvalidOp, TestSplitReturnType,
-                TestChangeProducerTypeI32ToF32, TestChangeProducerTypeF32ToF64,
-                TestChangeProducerTypeF32ToInvalid, TestUpdateConsumerType,
-                TestNonRootReplacement>(&getContext());
-    patterns.insert<TestDropOpSignatureConversion>(&getContext(), converter);
-    mlir::populateFuncOpTypeConversionPattern(patterns, &getContext(),
-                                              converter);
-
-    // Define the conversion target used for the test.
-    ConversionTarget target(getContext());
-    target.addLegalOp<ModuleOp, ModuleTerminatorOp>();
-    target.addLegalOp<LegalOpA, LegalOpB, TestCastOp, TestValidOp>();
-    target
-        .addIllegalOp<ILLegalOpF, TestRegionBuilderOp, TestOpWithRegionFold>();
-    target.addDynamicallyLegalOp<TestReturnOp>([](TestReturnOp op) {
-      // Don't allow F32 operands.
-      return llvm::none_of(op.getOperandTypes(),
-                           [](Type type) { return type.isF32(); });
-    });
-    target.addDynamicallyLegalOp<FuncOp>(
-        [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
-
-    // Expect the type_producer/type_consumer operations to only operate on f64.
-    target.addDynamicallyLegalOp<TestTypeProducerOp>(
-        [](TestTypeProducerOp op) { return op.getType().isF64(); });
-    target.addDynamicallyLegalOp<TestTypeConsumerOp>([](TestTypeConsumerOp op) {
-      return op.getOperand()->getType().isF64();
-    });
-
-    // Check support for marking certain operations as recursively legal.
-    target.markOpRecursivelyLegal<FuncOp, ModuleOp>([](Operation *op) {
-      return static_cast<bool>(
-          op->getAttrOfType<UnitAttr>("test.recursively_legal"));
-    });
-
-    // Handle a partial conversion.
-    if (mode == ConversionMode::Partial) {
-      (void)applyPartialConversion(getModule(), target, patterns, &converter);
-      return;
-    }
-
-    // Handle a full conversion.
-    if (mode == ConversionMode::Full) {
-      (void)applyFullConversion(getModule(), target, patterns, &converter);
-      return;
-    }
-
-    // Otherwise, handle an analysis conversion.
-    assert(mode == ConversionMode::Analysis);
-
-    // Analyze the convertible operations.
-    DenseSet<Operation *> legalizedOps;
-    if (failed(applyAnalysisConversion(getModule(), target, patterns,
-                                       legalizedOps, &converter)))
-      return signalPassFailure();
-
-    // Emit remarks for each legalizable operation.
-    for (auto *op : legalizedOps)
-      op->emitRemark() << "op '" << op->getName() << "' is legalizable";
-  }
-
-  /// The mode of conversion to use.
-  ConversionMode mode;
-};
-} // end anonymous namespace
-
-static llvm::cl::opt<TestLegalizePatternDriver::ConversionMode>
-    legalizerConversionMode(
-        "test-legalize-mode",
-        llvm::cl::desc("The legalization mode to use with the test driver"),
-        llvm::cl::init(TestLegalizePatternDriver::ConversionMode::Partial),
-        llvm::cl::values(
-            clEnumValN(TestLegalizePatternDriver::ConversionMode::Analysis,
-                       "analysis", "Perform an analysis conversion"),
-            clEnumValN(TestLegalizePatternDriver::ConversionMode::Full, "full",
-                       "Perform a full conversion"),
-            clEnumValN(TestLegalizePatternDriver::ConversionMode::Partial,
-                       "partial", "Perform a partial conversion")));
-
-static mlir::PassRegistration<TestLegalizePatternDriver>
-    legalizer_pass("test-legalize-patterns",
-                   "Run test dialect legalization patterns", [] {
-                     return std::make_unique<TestLegalizePatternDriver>(
-                         legalizerConversionMode);
-                   });
-
-//===----------------------------------------------------------------------===//
-// ConversionPatternRewriter::getRemappedValue testing. This method is used
-// to get the remapped value of a original value that was replaced using
-// ConversionPatternRewriter.
-namespace {
-/// Converter that replaces a one-result one-operand OneVResOneVOperandOp1 with
-/// a one-operand two-result OneVResOneVOperandOp1 by replicating its original
-/// operand twice.
-///
-/// Example:
-///   %1 = test.one_variadic_out_one_variadic_in1"(%0)
-/// is replaced with:
-///   %1 = test.one_variadic_out_one_variadic_in1"(%0, %0)
-struct OneVResOneVOperandOp1Converter
-    : public OpConversionPattern<OneVResOneVOperandOp1> {
-  using OpConversionPattern<OneVResOneVOperandOp1>::OpConversionPattern;
-
-  PatternMatchResult
-  matchAndRewrite(OneVResOneVOperandOp1 op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto origOps = op.getOperands();
-    assert(std::distance(origOps.begin(), origOps.end()) == 1 &&
-           "One operand expected");
-    Value *origOp = *origOps.begin();
-    SmallVector<Value *, 2> remappedOperands;
-    // Replicate the remapped original operand twice. Note that we don't used
-    // the remapped 'operand' since the goal is testing 'getRemappedValue'.
-    remappedOperands.push_back(rewriter.getRemappedValue(origOp));
-    remappedOperands.push_back(rewriter.getRemappedValue(origOp));
-
-    SmallVector<Type, 1> resultTypes(op.getResultTypes());
-    rewriter.replaceOpWithNewOp<OneVResOneVOperandOp1>(op, resultTypes,
-                                                       remappedOperands);
-    return matchSuccess();
-  }
-};
-
-struct TestRemappedValue : public mlir::FunctionPass<TestRemappedValue> {
-  void runOnFunction() override {
-    mlir::OwningRewritePatternList patterns;
-    patterns.insert<OneVResOneVOperandOp1Converter>(&getContext());
-
-    mlir::ConversionTarget target(getContext());
-    target.addLegalOp<ModuleOp, ModuleTerminatorOp, FuncOp, TestReturnOp>();
-    // We make OneVResOneVOperandOp1 legal only when it has more that one
-    // operand. This will trigger the conversion that will replace one-operand
-    // OneVResOneVOperandOp1 with two-operand OneVResOneVOperandOp1.
-    target.addDynamicallyLegalOp<OneVResOneVOperandOp1>(
-        [](Operation *op) -> bool {
-          return std::distance(op->operand_begin(), op->operand_end()) > 1;
-        });
-
-    if (failed(mlir::applyFullConversion(getFunction(), target, patterns))) {
-      signalPassFailure();
-    }
-  }
-};
-} // end anonymous namespace
-
-static PassRegistration<TestRemappedValue> remapped_value_pass(
-    "test-remapped-value",
-    "Test public remapped value mechanism in ConversionPatternRewriter");
diff --git a/third_party/mlir/test/lib/TestDialect/lit.local.cfg b/third_party/mlir/test/lib/TestDialect/lit.local.cfg
deleted file mode 100644
index edb5b44b2e2..00000000000
--- a/third_party/mlir/test/lib/TestDialect/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes.remove('.td')
\ No newline at end of file
diff --git a/third_party/mlir/test/lib/Transforms/CMakeLists.txt b/third_party/mlir/test/lib/Transforms/CMakeLists.txt
deleted file mode 100644
index b6338e1d167..00000000000
--- a/third_party/mlir/test/lib/Transforms/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-add_llvm_library(MLIRTestTransforms
-  TestCallGraph.cpp
-  TestConstantFold.cpp
-  TestLoopFusion.cpp
-  TestInlining.cpp
-  TestLinalgTransforms.cpp
-  TestLiveness.cpp
-  TestLoopMapping.cpp
-  TestLoopParametricTiling.cpp
-  TestOpaqueLoc.cpp
-  TestMemRefStrideCalculation.cpp
-  TestVectorToLoopsConversion.cpp
-  TestVectorTransforms.cpp
-  TestVectorizationUtils.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
-  )
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../TestDialect)
-include_directories(${CMAKE_CURRENT_BINARY_DIR}/../TestDialect)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../DeclarativeTransforms)
-include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms)
-add_dependencies(MLIRTestTransforms MLIRStandardOpsIncGen)
-add_dependencies(MLIRTestTransforms MLIRTestLinalgTransformPatternsIncGen)
-add_dependencies(MLIRTestTransforms MLIRTestVectorTransformPatternsIncGen)
-target_link_libraries(MLIRTestTransforms
-  MLIRAffineOps
-  MLIRAnalysis
-  MLIRLoopOps
-  MLIRPass
-  MLIRTestDialect
-  MLIRVectorOps
-  )
diff --git a/third_party/mlir/test/lib/Transforms/TestCallGraph.cpp b/third_party/mlir/test/lib/Transforms/TestCallGraph.cpp
deleted file mode 100644
index debf5e77645..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestCallGraph.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-//===- TestCallGraph.cpp - Test callgraph construction and iteration ------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains test passes for constructing and iterating over a
-// callgraph.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/CallGraph.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-struct TestCallGraphPass : public ModulePass<TestCallGraphPass> {
-  void runOnModule() {
-    llvm::errs() << "Testing : " << getModule().getAttr("test.name") << "\n";
-    getAnalysis<CallGraph>().print(llvm::errs());
-  }
-};
-} // end anonymous namespace
-
-static PassRegistration<TestCallGraphPass>
-    pass("test-print-callgraph",
-         "Print the contents of a constructed callgraph.");
diff --git a/third_party/mlir/test/lib/Transforms/TestConstantFold.cpp b/third_party/mlir/test/lib/Transforms/TestConstantFold.cpp
deleted file mode 100644
index 5a0e9ed3f3c..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestConstantFold.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- TestConstantFold.cpp - Pass to test constant folding ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/FoldUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-
-using namespace mlir;
-
-namespace {
-/// Simple constant folding pass.
-struct TestConstantFold : public FunctionPass<TestConstantFold> {
-  // All constants in the function post folding.
-  SmallVector<Operation *, 8> existingConstants;
-
-  void foldOperation(Operation *op, OperationFolder &helper);
-  void runOnFunction() override;
-};
-} // end anonymous namespace
-
-void TestConstantFold::foldOperation(Operation *op, OperationFolder &helper) {
-  auto processGeneratedConstants = [this](Operation *op) {
-    existingConstants.push_back(op);
-  };
-
-  // Attempt to fold the specified operation, including handling unused or
-  // duplicated constants.
-  (void)helper.tryToFold(op, processGeneratedConstants);
-}
-
-// For now, we do a simple top-down pass over a function folding constants.  We
-// don't handle conditional control flow, block arguments, folding conditional
-// branches, or anything else fancy.
-void TestConstantFold::runOnFunction() {
-  existingConstants.clear();
-
-  // Collect and fold the operations within the function.
-  SmallVector<Operation *, 8> ops;
-  getFunction().walk([&](Operation *op) { ops.push_back(op); });
-
-  // Fold the constants in reverse so that the last generated constants from
-  // folding are at the beginning. This creates somewhat of a linear ordering to
-  // the newly generated constants that matches the operation order and improves
-  // the readability of test cases.
-  OperationFolder helper(&getContext());
-  for (Operation *op : llvm::reverse(ops))
-    foldOperation(op, helper);
-
-  // By the time we are done, we may have simplified a bunch of code, leaving
-  // around dead constants.  Check for them now and remove them.
-  for (auto *cst : existingConstants) {
-    if (cst->use_empty())
-      cst->erase();
-  }
-}
-
-static PassRegistration<TestConstantFold>
-    pass("test-constant-fold", "Test operation constant folding");
diff --git a/third_party/mlir/test/lib/Transforms/TestInlining.cpp b/third_party/mlir/test/lib/Transforms/TestInlining.cpp
deleted file mode 100644
index 0571dc62b73..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestInlining.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- TestInlining.cpp - Pass to inline calls in the test dialect --------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// TODO(riverriddle) This pass is only necessary because the main inlining pass
-// has no abstracted away the call+callee relationship. When the inlining
-// interface has this support, this pass should be removed.
-//
-//===----------------------------------------------------------------------===//
-
-#include "TestDialect.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Function.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "mlir/Transforms/Passes.h"
-#include "llvm/ADT/StringSet.h"
-
-using namespace mlir;
-
-namespace {
-struct Inliner : public FunctionPass<Inliner> {
-  void runOnFunction() override {
-    auto function = getFunction();
-
-    // Collect each of the direct function calls within the module.
-    SmallVector<CallIndirectOp, 16> callers;
-    function.walk([&](CallIndirectOp caller) { callers.push_back(caller); });
-
-    // Build the inliner interface.
-    InlinerInterface interface(&getContext());
-
-    // Try to inline each of the call operations.
-    for (auto caller : callers) {
-      auto callee = dyn_cast_or_null<FunctionalRegionOp>(
-          caller.getCallee()->getDefiningOp());
-      if (!callee)
-        continue;
-
-      // Inline the functional region operation, but only clone the internal
-      // region if there is more than one use.
-      if (failed(inlineRegion(
-              interface, &callee.body(), caller,
-              llvm::to_vector<8>(caller.getArgOperands()),
-              llvm::to_vector<8>(caller.getResults()), caller.getLoc(),
-              /*shouldCloneInlinedRegion=*/!callee.getResult()->hasOneUse())))
-        continue;
-
-      // If the inlining was successful then erase the call and callee if
-      // possible.
-      caller.erase();
-      if (callee.use_empty())
-        callee.erase();
-    }
-  }
-};
-} // end anonymous namespace
-
-static PassRegistration<Inliner> pass("test-inline",
-                                      "Test inlining region calls");
diff --git a/third_party/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/third_party/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
deleted file mode 100644
index 37030ca2059..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-//===- TestLinalgTransforms.cpp - Test Linalg transformation patterns -----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements logic for testing Linalg transformations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-using namespace mlir::linalg;
-
-namespace mlir {
-namespace linalg {
-namespace {
-#include "TestLinalgTransformPatterns.h.inc"
-} // end namespace
-} // end namespace linalg
-} // end namespace mlir
-
-namespace {
-struct TestLinalgTransforms : public FunctionPass<TestLinalgTransforms> {
-  void runOnFunction() override;
-};
-} // end anonymous namespace
-
-/// Apply transformations specified as patterns.
-void TestLinalgTransforms::runOnFunction() {
-  OwningRewritePatternList patterns;
-  auto funcOp = getFunction();
-
-  // Add the generated patterns to the list.
-  linalg::populateWithGenerated(&getContext(), &patterns);
-  applyPatternsGreedily(funcOp, patterns);
-
-  // Drop the marker.
-  funcOp.walk([](LinalgOp op) {
-    op.removeAttr(LinalgTransforms::kLinalgTransformMarker);
-  });
-}
-
-static PassRegistration<TestLinalgTransforms>
-    pass("test-linalg-transform-patterns",
-         "Test Linalg transformation patterns by applying them greedily.");
diff --git a/third_party/mlir/test/lib/Transforms/TestLiveness.cpp b/third_party/mlir/test/lib/Transforms/TestLiveness.cpp
deleted file mode 100644
index d97060247f4..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestLiveness.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- TestLiveness.cpp - Test liveness construction and information
-//-------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains test passes for constructing and resolving liveness
-// information.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Liveness.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-
-struct TestLivenessPass : public FunctionPass<TestLivenessPass> {
-  void runOnFunction() override {
-    llvm::errs() << "Testing : " << getFunction().getName() << "\n";
-    getAnalysis<Liveness>().print(llvm::errs());
-  }
-};
-
-} // end anonymous namespace
-
-static PassRegistration<TestLivenessPass>
-    pass("test-print-liveness",
-         "Print the contents of a constructed liveness information.");
diff --git a/third_party/mlir/test/lib/Transforms/TestLoopFusion.cpp b/third_party/mlir/test/lib/Transforms/TestLoopFusion.cpp
deleted file mode 100644
index 7dc722f21f6..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestLoopFusion.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-//===- TestLoopFusion.cpp - Test loop fusion ------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to test various loop fusion utility functions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/Passes.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopFusionUtils.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "test-loop-fusion"
-
-using namespace mlir;
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<bool> clTestDependenceCheck(
-    "test-loop-fusion-dependence-check",
-    llvm::cl::desc("Enable testing of loop fusion dependence check"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool> clTestSliceComputation(
-    "test-loop-fusion-slice-computation",
-    llvm::cl::desc("Enable testing of loop fusion slice computation"),
-    llvm::cl::cat(clOptionsCategory));
-
-namespace {
-
-struct TestLoopFusion : public FunctionPass<TestLoopFusion> {
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createTestLoopFusionPass() {
-  return std::make_unique<TestLoopFusion>();
-}
-
-// Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
-static void
-gatherLoops(Block *block, unsigned currLoopDepth,
-            DenseMap<unsigned, SmallVector<AffineForOp, 2>> &depthToLoops) {
-  auto &loopsAtDepth = depthToLoops[currLoopDepth];
-  for (auto &op : *block) {
-    if (auto forOp = dyn_cast<AffineForOp>(op)) {
-      loopsAtDepth.push_back(forOp);
-      gatherLoops(forOp.getBody(), currLoopDepth + 1, depthToLoops);
-    }
-  }
-}
-
-// Run fusion dependence check on 'loops[i]' and 'loops[j]' at loop depths
-// in range ['loopDepth' + 1, 'maxLoopDepth'].
-// Emits a remark on 'loops[i]' if a fusion-preventing dependence exists.
-static void testDependenceCheck(SmallVector<AffineForOp, 2> &loops, unsigned i,
-                                unsigned j, unsigned loopDepth,
-                                unsigned maxLoopDepth) {
-  AffineForOp srcForOp = loops[i];
-  AffineForOp dstForOp = loops[j];
-  mlir::ComputationSliceState sliceUnion;
-  for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
-    FusionResult result =
-        mlir::canFuseLoops(srcForOp, dstForOp, d, &sliceUnion);
-    if (result.value == FusionResult::FailBlockDependence) {
-      srcForOp.getOperation()->emitRemark("block-level dependence preventing"
-                                          " fusion of loop nest ")
-          << i << " into loop nest " << j << " at depth " << loopDepth;
-    }
-  }
-}
-
-// Returns the index of 'op' in its block.
-static unsigned getBlockIndex(Operation &op) {
-  unsigned index = 0;
-  for (auto &opX : *op.getBlock()) {
-    if (&op == &opX)
-      break;
-    ++index;
-  }
-  return index;
-}
-
-// Returns a string representation of 'sliceUnion'.
-static std::string getSliceStr(const mlir::ComputationSliceState &sliceUnion) {
-  std::string result;
-  llvm::raw_string_ostream os(result);
-  // Slice insertion point format [loop-depth, operation-block-index]
-  unsigned ipd = getNestingDepth(*sliceUnion.insertPoint);
-  unsigned ipb = getBlockIndex(*sliceUnion.insertPoint);
-  os << "insert point: (" << std::to_string(ipd) << ", " << std::to_string(ipb)
-     << ")";
-  assert(sliceUnion.lbs.size() == sliceUnion.ubs.size());
-  os << " loop bounds: ";
-  for (unsigned k = 0, e = sliceUnion.lbs.size(); k < e; ++k) {
-    os << '[';
-    sliceUnion.lbs[k].print(os);
-    os << ", ";
-    sliceUnion.ubs[k].print(os);
-    os << "] ";
-  }
-  return os.str();
-}
-
-// Computes fusion slice union on 'loops[i]' and 'loops[j]' at loop depths
-// in range ['loopDepth' + 1, 'maxLoopDepth'].
-// Emits a string representation of the slice union as a remark on 'loops[j]'.
-static void testSliceComputation(SmallVector<AffineForOp, 2> &loops, unsigned i,
-                                 unsigned j, unsigned loopDepth,
-                                 unsigned maxLoopDepth) {
-  AffineForOp forOpA = loops[i];
-  AffineForOp forOpB = loops[j];
-  for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
-    mlir::ComputationSliceState sliceUnion;
-    FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
-    if (result.value == FusionResult::Success) {
-      forOpB.getOperation()->emitRemark("slice (")
-          << " src loop: " << i << ", dst loop: " << j << ", depth: " << d
-          << " : " << getSliceStr(sliceUnion) << ")";
-    }
-  }
-}
-
-void TestLoopFusion::runOnFunction() {
-  // Gather all AffineForOps by loop depth.
-  DenseMap<unsigned, SmallVector<AffineForOp, 2>> depthToLoops;
-  for (auto &block : getFunction()) {
-    gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops);
-  }
-
-  // Run tests on all combinations of src/dst loop nests in 'depthToLoops'.
-  for (auto &depthAndLoops : depthToLoops) {
-    unsigned loopDepth = depthAndLoops.first;
-    auto &loops = depthAndLoops.second;
-    unsigned numLoops = loops.size();
-    for (unsigned j = 0; j < numLoops; ++j) {
-      for (unsigned k = 0; k < numLoops; ++k) {
-        if (j == k)
-          continue;
-        if (clTestDependenceCheck)
-          testDependenceCheck(loops, j, k, loopDepth, depthToLoops.size());
-        if (clTestSliceComputation)
-          testSliceComputation(loops, j, k, loopDepth, depthToLoops.size());
-      }
-    }
-  }
-}
-
-static PassRegistration<TestLoopFusion>
-    pass("test-loop-fusion", "Tests loop fusion utility functions.");
diff --git a/third_party/mlir/test/lib/Transforms/TestLoopMapping.cpp b/third_party/mlir/test/lib/Transforms/TestLoopMapping.cpp
deleted file mode 100644
index c25fea9aa13..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestLoopMapping.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- TestLoopMapping.cpp --- Parametric loop mapping pass ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to parametrically map loop.for loops to virtual
-// processing element dimensions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/ADT/SetVector.h"
-
-using namespace mlir;
-
-namespace {
-class TestLoopMappingPass : public FunctionPass<TestLoopMappingPass> {
-public:
-  explicit TestLoopMappingPass() {}
-
-  void runOnFunction() override {
-    FuncOp func = getFunction();
-
-    // SSA values for the transformation are created out of thin air by
-    // unregistered "new_processor_id_and_range" operations. This is enough to
-    // emulate mapping conditions.
-    SmallVector<Value *, 8> processorIds, numProcessors;
-    func.walk([&processorIds, &numProcessors](Operation *op) {
-      if (op->getName().getStringRef() != "new_processor_id_and_range")
-        return;
-      processorIds.push_back(op->getResult(0));
-      numProcessors.push_back(op->getResult(1));
-    });
-
-    func.walk([&processorIds, &numProcessors](loop::ForOp op) {
-      // Ignore nested loops.
-      if (op.getParentRegion()->getParentOfType<loop::ForOp>())
-        return;
-      mapLoopToProcessorIds(op, processorIds, numProcessors);
-    });
-  }
-};
-} // end namespace
-
-static PassRegistration<TestLoopMappingPass>
-    reg("test-mapping-to-processing-elements",
-        "test mapping a single loop on a virtual processor grid",
-        [] { return std::make_unique<TestLoopMappingPass>(); });
diff --git a/third_party/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp b/third_party/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp
deleted file mode 100644
index 9a8e1917e1f..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- TestLoopParametricTiling.cpp --- Parametric loop tiling pass -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a pass to parametrically tile nests of standard loops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/LoopOps/LoopOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
-
-using namespace mlir;
-
-namespace {
-
-// Extracts fixed-range loops for top-level loop nests with ranges defined in
-// the pass constructor.  Assumes loops are permutable.
-class SimpleParametricLoopTilingPass
-    : public FunctionPass<SimpleParametricLoopTilingPass> {
-public:
-  struct Options : public PassOptions<Options> {
-    List<int> clOuterLoopSizes{
-        *this, "test-outer-loop-sizes", llvm::cl::MiscFlags::CommaSeparated,
-        llvm::cl::desc(
-            "fixed number of iterations that the outer loops should have")};
-  };
-
-  explicit SimpleParametricLoopTilingPass(ArrayRef<int64_t> outerLoopSizes)
-      : sizes(outerLoopSizes.begin(), outerLoopSizes.end()) {}
-  explicit SimpleParametricLoopTilingPass(const Options &options) {
-    sizes.assign(options.clOuterLoopSizes.begin(),
-                 options.clOuterLoopSizes.end());
-  }
-
-  void runOnFunction() override {
-    FuncOp func = getFunction();
-    func.walk([this](loop::ForOp op) {
-      // Ignore nested loops.
-      if (op.getParentRegion()->getParentOfType<loop::ForOp>())
-        return;
-      extractFixedOuterLoops(op, sizes);
-    });
-  }
-
-  SmallVector<int64_t, 4> sizes;
-};
-} // end namespace
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createSimpleParametricTilingPass(ArrayRef<int64_t> outerLoopSizes) {
-  return std::make_unique<SimpleParametricLoopTilingPass>(outerLoopSizes);
-}
-
-static PassRegistration<SimpleParametricLoopTilingPass,
-                        SimpleParametricLoopTilingPass::Options>
-    reg("test-extract-fixed-outer-loops",
-        "test application of parametric tiling to the outer loops so that the "
-        "ranges of outer loops become static");
diff --git a/third_party/mlir/test/lib/Transforms/TestMemRefStrideCalculation.cpp b/third_party/mlir/test/lib/Transforms/TestMemRefStrideCalculation.cpp
deleted file mode 100644
index 40788b259c5..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestMemRefStrideCalculation.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===- TestMemRefStrideCalculation.cpp - Pass to test strides computation--===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
-
-using namespace mlir;
-
-namespace {
-/// Simple constant folding pass.
-struct TestMemRefStrideCalculation
-    : public FunctionPass<struct TestMemRefStrideCalculation> {
-  void runOnFunction() override;
-};
-} // end anonymous namespace
-
-// Traverse AllocOp and compute strides of each MemRefType independently.
-void TestMemRefStrideCalculation::runOnFunction() {
-  llvm::outs() << "Testing: " << getFunction().getName() << "\n";
-  getFunction().walk([&](AllocOp allocOp) {
-    auto memrefType = allocOp.getResult()->getType().cast<MemRefType>();
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    if (failed(getStridesAndOffset(memrefType, strides, offset))) {
-      llvm::outs() << "MemRefType " << memrefType << " cannot be converted to "
-                   << "strided form\n";
-      return;
-    }
-    llvm::outs() << "MemRefType offset: ";
-    if (offset == MemRefType::getDynamicStrideOrOffset())
-      llvm::outs() << "?";
-    else
-      llvm::outs() << offset;
-    llvm::outs() << " strides: ";
-    interleaveComma(strides, llvm::outs(), [&](int64_t v) {
-      if (v == MemRefType::getDynamicStrideOrOffset())
-        llvm::outs() << "?";
-      else
-        llvm::outs() << v;
-    });
-    llvm::outs() << "\n";
-  });
-  llvm::outs().flush();
-}
-
-static PassRegistration<TestMemRefStrideCalculation>
-    pass("test-memref-stride-calculation", "Test operation constant folding");
diff --git a/third_party/mlir/test/lib/Transforms/TestOpaqueLoc.cpp b/third_party/mlir/test/lib/Transforms/TestOpaqueLoc.cpp
deleted file mode 100644
index 0db53322fb8..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestOpaqueLoc.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-//===- TestOpaqueLoc.cpp - Pass to test opaque locations ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// Pass that changes locations to opaque locations for each operation.
-/// It also takes all operations that are not function operations or
-/// terminators and clones them with opaque locations which store the initial
-/// locations.
-struct TestOpaqueLoc : public ModulePass<TestOpaqueLoc> {
-
-  /// A simple structure which is used for testing as an underlying location in
-  /// OpaqueLoc.
-  struct MyLocation {
-    MyLocation() : id(42) {}
-    MyLocation(int id) : id(id) {}
-    int getId() { return id; }
-
-    int id;
-  };
-
-  void runOnModule() override {
-    std::vector<std::unique_ptr<MyLocation>> myLocs;
-    int last_it = 0;
-
-    getModule().walk([&](Operation *op) {
-      myLocs.push_back(std::make_unique<MyLocation>(last_it++));
-
-      Location loc = op->getLoc();
-
-      /// Set opaque location without fallback location to test the
-      /// corresponding get method.
-      op->setLoc(
-          OpaqueLoc::get<MyLocation *>(myLocs.back().get(), &getContext()));
-
-      if (isa<FuncOp>(op) || op->isKnownTerminator())
-        return;
-
-      OpBuilder builder(op);
-
-      /// Add the same operation but with fallback location to test the
-      /// corresponding get method and serialization.
-      Operation *op_cloned_1 = builder.clone(*op);
-      op_cloned_1->setLoc(
-          OpaqueLoc::get<MyLocation *>(myLocs.back().get(), loc));
-
-      /// Add the same operation but with void* instead of MyLocation* to test
-      /// getUnderlyingLocationOrNull method.
-      Operation *op_cloned_2 = builder.clone(*op);
-      op_cloned_2->setLoc(OpaqueLoc::get<void *>(nullptr, loc));
-    });
-
-    ScopedDiagnosticHandler diagHandler(&getContext(), [](Diagnostic &diag) {
-      auto &os = llvm::outs();
-      if (diag.getLocation().isa<OpaqueLoc>()) {
-        MyLocation *loc = OpaqueLoc::getUnderlyingLocationOrNull<MyLocation *>(
-            diag.getLocation());
-        if (loc)
-          os << "MyLocation: " << loc->id;
-        else
-          os << "nullptr";
-      }
-      os << ": " << diag << '\n';
-      os.flush();
-    });
-
-    getModule().walk([&](Operation *op) { op->emitOpError(); });
-  }
-};
-
-} // end anonymous namespace
-
-static PassRegistration<TestOpaqueLoc>
-    pass("test-opaque-loc", "Changes all leaf locations to opaque locations");
diff --git a/third_party/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp b/third_party/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp
deleted file mode 100644
index e5f5f749bd0..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- TestVectorToLoopsConversion.cpp - Test VectorTransfers lowering ----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include <type_traits>
-
-#include "mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
-
-using namespace mlir;
-
-namespace {
-
-struct TestVectorToLoopsPass
-    : public FunctionPass<TestVectorToLoopsPass> {
-  void runOnFunction() override {
-    OwningRewritePatternList patterns;
-    auto *context = &getContext();
-    populateVectorToAffineLoopsConversionPatterns(context, patterns);
-    applyPatternsGreedily(getFunction(), patterns);
-  }
-};
-
-} // end anonymous namespace
-
-static PassRegistration<TestVectorToLoopsPass>
-    pass("test-convert-vector-to-loops",
-         "Converts vector transfer ops to loops over scalars and vector casts");
diff --git a/third_party/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/third_party/mlir/test/lib/Transforms/TestVectorTransforms.cpp
deleted file mode 100644
index 1d513065330..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestVectorTransforms.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- TestVectorToVectorConversion.cpp - Test VectorTransfers lowering ---===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include <type_traits>
-
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/Dialect/VectorOps/VectorTransforms.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-using namespace mlir::vector;
-
-namespace {
-#include "TestVectorTransformPatterns.h.inc"
-
-struct TestVectorToVectorConversion
-    : public FunctionPass<TestVectorToVectorConversion> {
-  void runOnFunction() override {
-    OwningRewritePatternList patterns;
-    auto *context = &getContext();
-    populateWithGenerated(context, &patterns);
-    populateVectorToVectorCanonicalizationPatterns(patterns, context);
-    populateVectorToVectorTransformationPatterns(patterns, context);
-    applyPatternsGreedily(getFunction(), patterns);
-  }
-};
-} // end anonymous namespace
-
-static PassRegistration<TestVectorToVectorConversion>
-    pass("test-vector-to-vector-conversion",
-         "Test conversion patterns between ops in the vector dialect");
diff --git a/third_party/mlir/test/lib/Transforms/TestVectorizationUtils.cpp b/third_party/mlir/test/lib/Transforms/TestVectorizationUtils.cpp
deleted file mode 100644
index 7efc74f2304..00000000000
--- a/third_party/mlir/test/lib/Transforms/TestVectorizationUtils.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-//===- VectorizerTestPass.cpp - VectorizerTestPass Pass Impl --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a simple testing pass for vectorization functionality.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/VectorOps/Utils.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "affine-vectorizer-test"
-
-using namespace mlir;
-
-using llvm::SetVector;
-
-using functional::map;
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::list<int> clTestVectorShapeRatio(
-    "vector-shape-ratio",
-    llvm::cl::desc("Specify the HW vector size for vectorization"),
-    llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
-static llvm::cl::opt<bool> clTestForwardSlicingAnalysis(
-    "forward-slicing",
-    llvm::cl::desc("Enable testing forward static slicing and topological sort "
-                   "functionalities"),
-    llvm::cl::cat(clOptionsCategory));
-static llvm::cl::opt<bool> clTestBackwardSlicingAnalysis(
-    "backward-slicing",
-    llvm::cl::desc("Enable testing backward static slicing and "
-                   "topological sort functionalities"),
-    llvm::cl::cat(clOptionsCategory));
-static llvm::cl::opt<bool> clTestSlicingAnalysis(
-    "slicing",
-    llvm::cl::desc("Enable testing static slicing and topological sort "
-                   "functionalities"),
-    llvm::cl::cat(clOptionsCategory));
-static llvm::cl::opt<bool> clTestComposeMaps(
-    "compose-maps",
-    llvm::cl::desc(
-        "Enable testing the composition of AffineMap where each "
-        "AffineMap in the composition is specified as the affine_map attribute "
-        "in a constant op."),
-    llvm::cl::cat(clOptionsCategory));
-static llvm::cl::opt<bool> clTestNormalizeMaps(
-    "normalize-maps",
-    llvm::cl::desc(
-        "Enable testing the normalization of AffineAffineApplyOp "
-        "where each AffineAffineApplyOp in the composition is a single output "
-        "operation."),
-    llvm::cl::cat(clOptionsCategory));
-
-namespace {
-struct VectorizerTestPass : public FunctionPass<VectorizerTestPass> {
-  static constexpr auto kTestAffineMapOpName = "test_affine_map";
-  static constexpr auto kTestAffineMapAttrName = "affine_map";
-
-  void runOnFunction() override;
-  void testVectorShapeRatio(llvm::raw_ostream &outs);
-  void testForwardSlicing(llvm::raw_ostream &outs);
-  void testBackwardSlicing(llvm::raw_ostream &outs);
-  void testSlicing(llvm::raw_ostream &outs);
-  void testComposeMaps(llvm::raw_ostream &outs);
-  void testNormalizeMaps();
-};
-
-} // end anonymous namespace
-
-void VectorizerTestPass::testVectorShapeRatio(llvm::raw_ostream &outs) {
-  auto f = getFunction();
-  using matcher::Op;
-  SmallVector<int64_t, 8> shape(clTestVectorShapeRatio.begin(),
-                                clTestVectorShapeRatio.end());
-  auto subVectorType =
-      VectorType::get(shape, FloatType::getF32(f.getContext()));
-  // Only filter operations that operate on a strict super-vector and have one
-  // return. This makes testing easier.
-  auto filter = [&](Operation &op) {
-    assert(subVectorType.getElementType().isF32() &&
-           "Only f32 supported for now");
-    if (!matcher::operatesOnSuperVectorsOf(op, subVectorType)) {
-      return false;
-    }
-    if (op.getNumResults() != 1) {
-      return false;
-    }
-    return true;
-  };
-  auto pat = Op(filter);
-  SmallVector<NestedMatch, 8> matches;
-  pat.match(f, &matches);
-  for (auto m : matches) {
-    auto *opInst = m.getMatchedOperation();
-    // This is a unit test that only checks and prints shape ratio.
-    // As a consequence we write only Ops with a single return type for the
-    // purpose of this test. If we need to test more intricate behavior in the
-    // future we can always extend.
-    auto superVectorType = opInst->getResult(0)->getType().cast<VectorType>();
-    auto ratio = shapeRatio(superVectorType, subVectorType);
-    if (!ratio.hasValue()) {
-      opInst->emitRemark("NOT MATCHED");
-    } else {
-      outs << "\nmatched: " << *opInst << " with shape ratio: ";
-      interleaveComma(MutableArrayRef<int64_t>(*ratio), outs);
-    }
-  }
-}
-
-static NestedPattern patternTestSlicingOps() {
-  using functional::map;
-  using matcher::Op;
-  // Match all operations with the kTestSlicingOpName name.
-  auto filter = [](Operation &op) {
-    // Just use a custom op name for this test, it makes life easier.
-    return op.getName().getStringRef() == "slicing-test-op";
-  };
-  return Op(filter);
-}
-
-void VectorizerTestPass::testBackwardSlicing(llvm::raw_ostream &outs) {
-  auto f = getFunction();
-  outs << "\n" << f.getName();
-
-  SmallVector<NestedMatch, 8> matches;
-  patternTestSlicingOps().match(f, &matches);
-  for (auto m : matches) {
-    SetVector<Operation *> backwardSlice;
-    getBackwardSlice(m.getMatchedOperation(), &backwardSlice);
-    outs << "\nmatched: " << *m.getMatchedOperation()
-         << " backward static slice: ";
-    for (auto *op : backwardSlice)
-      outs << "\n" << *op;
-  }
-}
-
-void VectorizerTestPass::testForwardSlicing(llvm::raw_ostream &outs) {
-  auto f = getFunction();
-  outs << "\n" << f.getName();
-
-  SmallVector<NestedMatch, 8> matches;
-  patternTestSlicingOps().match(f, &matches);
-  for (auto m : matches) {
-    SetVector<Operation *> forwardSlice;
-    getForwardSlice(m.getMatchedOperation(), &forwardSlice);
-    outs << "\nmatched: " << *m.getMatchedOperation()
-         << " forward static slice: ";
-    for (auto *op : forwardSlice)
-      outs << "\n" << *op;
-  }
-}
-
-void VectorizerTestPass::testSlicing(llvm::raw_ostream &outs) {
-  auto f = getFunction();
-  outs << "\n" << f.getName();
-
-  SmallVector<NestedMatch, 8> matches;
-  patternTestSlicingOps().match(f, &matches);
-  for (auto m : matches) {
-    SetVector<Operation *> staticSlice = getSlice(m.getMatchedOperation());
-    outs << "\nmatched: " << *m.getMatchedOperation() << " static slice: ";
-    for (auto *op : staticSlice)
-      outs << "\n" << *op;
-  }
-}
-
-static bool customOpWithAffineMapAttribute(Operation &op) {
-  return op.getName().getStringRef() ==
-         VectorizerTestPass::kTestAffineMapOpName;
-}
-
-void VectorizerTestPass::testComposeMaps(llvm::raw_ostream &outs) {
-  auto f = getFunction();
-
-  using matcher::Op;
-  auto pattern = Op(customOpWithAffineMapAttribute);
-  SmallVector<NestedMatch, 8> matches;
-  pattern.match(f, &matches);
-  SmallVector<AffineMap, 4> maps;
-  maps.reserve(matches.size());
-  for (auto m : llvm::reverse(matches)) {
-    auto *opInst = m.getMatchedOperation();
-    auto map = opInst->getAttr(VectorizerTestPass::kTestAffineMapAttrName)
-                   .cast<AffineMapAttr>()
-                   .getValue();
-    maps.push_back(map);
-  }
-  AffineMap res;
-  for (auto m : maps) {
-    res = res ? res.compose(m) : m;
-  }
-  simplifyAffineMap(res).print(outs << "\nComposed map: ");
-}
-
-static bool affineApplyOp(Operation &op) { return isa<AffineApplyOp>(op); }
-
-static bool singleResultAffineApplyOpWithoutUses(Operation &op) {
-  auto app = dyn_cast<AffineApplyOp>(op);
-  return app && app.use_empty();
-}
-
-void VectorizerTestPass::testNormalizeMaps() {
-  using matcher::Op;
-
-  auto f = getFunction();
-
-  // Save matched AffineApplyOp that all need to be erased in the end.
-  auto pattern = Op(affineApplyOp);
-  SmallVector<NestedMatch, 8> toErase;
-  pattern.match(f, &toErase);
-  {
-    // Compose maps.
-    auto pattern = Op(singleResultAffineApplyOpWithoutUses);
-    SmallVector<NestedMatch, 8> matches;
-    pattern.match(f, &matches);
-    for (auto m : matches) {
-      auto app = cast<AffineApplyOp>(m.getMatchedOperation());
-      OpBuilder b(m.getMatchedOperation());
-      SmallVector<Value *, 8> operands(app.getOperands());
-      makeComposedAffineApply(b, app.getLoc(), app.getAffineMap(), operands);
-    }
-  }
-  // We should now be able to erase everything in reverse order in this test.
-  for (auto m : llvm::reverse(toErase)) {
-    m.getMatchedOperation()->erase();
-  }
-}
-
-void VectorizerTestPass::runOnFunction() {
-  // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
-  NestedPatternContext mlContext;
-
-  // Only support single block functions at this point.
-  FuncOp f = getFunction();
-  if (f.getBlocks().size() != 1)
-    return;
-
-  std::string str;
-  llvm::raw_string_ostream outs(str);
-
-  if (!clTestVectorShapeRatio.empty())
-    testVectorShapeRatio(outs);
-
-  if (clTestForwardSlicingAnalysis)
-    testForwardSlicing(outs);
-
-  if (clTestBackwardSlicingAnalysis)
-    testBackwardSlicing(outs);
-
-  if (clTestSlicingAnalysis)
-    testSlicing(outs);
-
-  if (clTestComposeMaps)
-    testComposeMaps(outs);
-
-  if (clTestNormalizeMaps)
-    testNormalizeMaps();
-
-  if (!outs.str().empty()) {
-    emitRemark(UnknownLoc::get(&getContext()), outs.str());
-  }
-}
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createVectorizerTestPass() {
-  return std::make_unique<VectorizerTestPass>();
-}
-
-static PassRegistration<VectorizerTestPass>
-    pass("affine-vectorizer-test",
-         "Tests vectorizer standalone functionality.");
-
-#undef DEBUG_TYPE
diff --git a/third_party/mlir/test/lit.cfg.py b/third_party/mlir/test/lit.cfg.py
deleted file mode 100644
index 6333eaab89a..00000000000
--- a/third_party/mlir/test/lit.cfg.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# -*- Python -*-
-
-import os
-import platform
-import re
-import subprocess
-import tempfile
-
-import lit.formats
-import lit.util
-
-from lit.llvm import llvm_config
-from lit.llvm.subst import ToolSubst
-from lit.llvm.subst import FindTool
-
-# Configuration file for the 'lit' test runner.
-
-# name: The name of this test suite.
-config.name = 'MLIR'
-
-config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
-
-# suffixes: A list of file extensions to treat as test files.
-config.suffixes = ['.td', '.mlir', '.toy', '.ll']
-
-# test_source_root: The root path where tests are located.
-config.test_source_root = os.path.dirname(__file__)
-
-# test_exec_root: The root path where tests should be run.
-config.test_exec_root = os.path.join(config.mlir_obj_root, 'test')
-
-config.substitutions.append(('%PATH%', config.environment['PATH']))
-config.substitutions.append(('%shlibext', config.llvm_shlib_ext))
-
-llvm_config.with_system_environment(
-    ['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP'])
-
-llvm_config.use_default_substitutions()
-
-# excludes: A list of directories to exclude from the testsuite. The 'Inputs'
-# subdirectories contain auxiliary inputs for various tests in their parent
-# directories.
-config.excludes = ['Inputs', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt']
-
-# test_source_root: The root path where tests are located.
-config.test_source_root = os.path.dirname(__file__)
-
-# test_exec_root: The root path where tests should be run.
-config.test_exec_root = os.path.join(config.mlir_obj_root, 'test')
-
-# Tweak the PATH to include the tools dir.
-llvm_config.with_environment('PATH', config.llvm_tools_dir, append_path=True)
-
-tool_dirs = [config.mlir_tools_dir, config.llvm_tools_dir]
-tools = [
-    'mlir-opt',
-    'mlir-tblgen',
-    'mlir-translate',
-    'mlir-edsc-builder-api-test',
-]
-
-# The following tools are optional
-tools.extend([
-    ToolSubst('toy-ch1', unresolved='ignore'),
-    ToolSubst('toy-ch2', unresolved='ignore'),
-    ToolSubst('toy-ch3', unresolved='ignore'),
-    ToolSubst('toy-ch4', unresolved='ignore'),
-    ToolSubst('toy-ch5', unresolved='ignore'),
-    ToolSubst('%linalg_test_lib_dir', config.linalg_test_lib_dir, unresolved='ignore'),
-    ToolSubst('%cuda_wrapper_library_dir', config.cuda_wrapper_library_dir, unresolved='ignore')
-])
-
-llvm_config.add_tool_substitutions(tools, tool_dirs)
diff --git a/third_party/mlir/test/lit.site.cfg.py.in b/third_party/mlir/test/lit.site.cfg.py.in
deleted file mode 100644
index aab566173e6..00000000000
--- a/third_party/mlir/test/lit.site.cfg.py.in
+++ /dev/null
@@ -1,54 +0,0 @@
-@LIT_SITE_CFG_IN_HEADER@
-
-import sys
-
-config.host_triple = "@LLVM_HOST_TRIPLE@"
-config.target_triple = "@TARGET_TRIPLE@"
-config.llvm_src_root = "@LLVM_SOURCE_DIR@"
-config.llvm_obj_root = "@LLVM_BINARY_DIR@"
-config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
-config.llvm_lib_dir = "@LLVM_LIBRARY_DIR@"
-config.llvm_shlib_dir = "@SHLIBDIR@"
-config.llvm_shlib_ext = "@SHLIBEXT@"
-config.llvm_exe_ext = "@EXEEXT@"
-config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
-config.python_executable = "@PYTHON_EXECUTABLE@"
-config.gold_executable = "@GOLD_EXECUTABLE@"
-config.ld64_executable = "@LD64_EXECUTABLE@"
-config.enable_shared = @ENABLE_SHARED@
-config.enable_assertions = @ENABLE_ASSERTIONS@
-config.targets_to_build = "@TARGETS_TO_BUILD@"
-config.native_target = "@LLVM_NATIVE_ARCH@"
-config.llvm_bindings = "@LLVM_BINDINGS@".split(' ')
-config.host_os = "@HOST_OS@"
-config.host_cc = "@HOST_CC@"
-config.host_cxx = "@HOST_CXX@"
-# Note: ldflags can contain double-quoted paths, so must use single quotes here.
-config.host_ldflags = '@HOST_LDFLAGS@'
-config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
-config.llvm_host_triple = '@LLVM_HOST_TRIPLE@'
-config.host_arch = "@HOST_ARCH@"
-config.mlir_src_root = "@MLIR_SOURCE_DIR@"
-config.mlir_obj_root = "@MLIR_BINARY_DIR@"
-config.mlir_tools_dir = "@MLIR_TOOLS_DIR@"
-config.linalg_test_lib_dir = "@MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR@"
-config.build_examples = @LLVM_BUILD_EXAMPLES@
-config.run_cuda_tests = @MLIR_CUDA_CONVERSIONS_ENABLED@
-config.cuda_wrapper_library_dir = "@MLIR_CUDA_WRAPPER_LIBRARY_DIR@"
-config.enable_cuda_runner = @MLIR_CUDA_RUNNER_ENABLED@
-
-# Support substitution of the tools_dir with user parameters. This is
-# used when we can't determine the tool dir at configuration time.
-try:
-    config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
-    config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params
-except KeyError:
-    e = sys.exc_info()[1]
-    key, = e.args
-    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
-
-import lit.llvm
-lit.llvm.initialize(lit_config, config)
-
-# Let the main config do the real work.
-lit_config.load_config(config, "@MLIR_SOURCE_DIR@/test/lit.cfg.py")
diff --git a/third_party/mlir/tools/CMakeLists.txt b/third_party/mlir/tools/CMakeLists.txt
deleted file mode 100644
index 2566dd87288..00000000000
--- a/third_party/mlir/tools/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_subdirectory(mlir-cuda-runner)
-add_subdirectory(mlir-cpu-runner)
-add_subdirectory(mlir-opt)
-add_subdirectory(mlir-tblgen)
-add_subdirectory(mlir-translate)
diff --git a/third_party/mlir/tools/mlir-cpu-runner/CMakeLists.txt b/third_party/mlir/tools/mlir-cpu-runner/CMakeLists.txt
deleted file mode 100644
index 8227ac27c96..00000000000
--- a/third_party/mlir/tools/mlir-cpu-runner/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-add_llvm_executable(mlir-cpu-runner
-  mlir-cpu-runner.cpp
-)
-llvm_update_compile_flags(mlir-cpu-runner)
-whole_archive_link(mlir-cpu-runner
-  MLIRAffineOps
-  MLIRLLVMIR
-  MLIRTargetLLVMIR
-  MLIRTranslation
-)
-target_link_libraries(mlir-cpu-runner PRIVATE
-  MLIRAnalysis
-  MLIREDSC
-  MLIRExecutionEngine
-  MLIRIR
-  MLIRJitRunner
-  MLIRLLVMIR
-  MLIRParser
-  MLIRTargetLLVMIR
-  MLIRSupport
-  LLVMCore
-  LLVMSupport
-)
diff --git a/third_party/mlir/tools/mlir-cpu-runner/mlir-cpu-runner.cpp b/third_party/mlir/tools/mlir-cpu-runner/mlir-cpu-runner.cpp
deleted file mode 100644
index f7023c4cf61..00000000000
--- a/third_party/mlir/tools/mlir-cpu-runner/mlir-cpu-runner.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===- mlir-cpu-runner.cpp - MLIR CPU Execution Driver---------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Main entry point to a command line utility that executes an MLIR file on the
-// CPU by  translating MLIR to LLVM IR before JIT-compiling and executing the
-// latter.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/JitRunner.h"
-
-int main(int argc, char **argv) {
-  return mlir::JitRunnerMain(argc, argv, nullptr);
-}
diff --git a/third_party/mlir/tools/mlir-cuda-runner/CMakeLists.txt b/third_party/mlir/tools/mlir-cuda-runner/CMakeLists.txt
deleted file mode 100644
index 6d296a5a5c7..00000000000
--- a/third_party/mlir/tools/mlir-cuda-runner/CMakeLists.txt
+++ /dev/null
@@ -1,74 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
-  cuda-runtime-wrappers.cpp
-  mlir-cuda-runner.cpp
-  )
-
-if(MLIR_CUDA_RUNNER_ENABLED)
-  if (NOT ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD))
-    message(SEND_ERROR
-      "Building the mlir cuda runner requires the NVPTX backend")
-  endif()
-
-  # Configure CUDA runner support. Using check_language first allows us to give
-  # a custom error message.
-  include(CheckLanguage)
-  check_language(CUDA)
-  if (CMAKE_CUDA_COMPILER)
-    enable_language(CUDA)
-  else()
-    message(SEND_ERROR
-      "Building the mlir cuda runner requires a working CUDA install")
-  endif()
-
-  # We need the libcuda.so library.
-  find_library(CUDA_RUNTIME_LIBRARY cuda)
-
-  add_llvm_library(cuda-runtime-wrappers SHARED
-    cuda-runtime-wrappers.cpp
-  )
-  target_include_directories(cuda-runtime-wrappers
-    PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
-    LLVMSupport
-  )
-  target_link_libraries(cuda-runtime-wrappers
-    LLVMSupport
-    ${CUDA_RUNTIME_LIBRARY}
-  )
-
-  set(FULL_LINK_LIBS
-    MLIRAffineOps
-    MLIRLoopToStandard
-    MLIRGPU
-    MLIRGPUtoCUDATransforms
-    MLIRGPUtoNVVMTransforms
-    MLIRLLVMIR
-    MLIRStandardOps
-    MLIRStandardToLLVM
-    MLIRTargetLLVMIR
-    MLIRTransforms
-    MLIRTranslation
-  )
-  set(LIBS
-    MLIRIR
-    MLIRParser
-    MLIREDSC
-    MLIRAnalysis
-    MLIRExecutionEngine
-    MLIRJitRunner
-    MLIRSupport
-    LLVMCore
-    LLVMSupport
-    ${CUDA_RUNTIME_LIBRARY}
-  )
-  add_llvm_executable(mlir-cuda-runner
-    mlir-cuda-runner.cpp
-  )
-  add_dependencies(mlir-cuda-runner cuda-runtime-wrappers)
-  target_include_directories(mlir-cuda-runner
-    PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
-  )
-  llvm_update_compile_flags(mlir-cuda-runner)
-  whole_archive_link(mlir-cuda-runner ${FULL_LINK_LIBS})
-  target_link_libraries(mlir-cuda-runner PRIVATE ${FULL_LINK_LIBS} ${LIBS})
-
-endif()
diff --git a/third_party/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/third_party/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
deleted file mode 100644
index 0698095afcf..00000000000
--- a/third_party/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//===- cuda-runtime-wrappers.cpp - MLIR CUDA runner wrapper library -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Implements C wrappers around the CUDA library for easy linking in ORC jit.
-// Also adds some debugging helpers that are helpful when writing MLIR code to
-// run on GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-#include <numeric>
-
-#include "llvm/Support/raw_ostream.h"
-
-#include "cuda.h"
-
-namespace {
-int32_t reportErrorIfAny(CUresult result, const char *where) {
-  if (result != CUDA_SUCCESS) {
-    llvm::errs() << "CUDA failed with " << result << " in " << where << "\n";
-  }
-  return result;
-}
-} // anonymous namespace
-
-extern "C" int32_t mcuModuleLoad(void **module, void *data) {
-  int32_t err = reportErrorIfAny(
-      cuModuleLoadData(reinterpret_cast<CUmodule *>(module), data),
-      "ModuleLoad");
-  return err;
-}
-
-extern "C" int32_t mcuModuleGetFunction(void **function, void *module,
-                                        const char *name) {
-  return reportErrorIfAny(
-      cuModuleGetFunction(reinterpret_cast<CUfunction *>(function),
-                          reinterpret_cast<CUmodule>(module), name),
-      "GetFunction");
-}
-
-// The wrapper uses intptr_t instead of CUDA's unsigned int to match
-// the type of MLIR's index type. This avoids the need for casts in the
-// generated MLIR code.
-extern "C" int32_t mcuLaunchKernel(void *function, intptr_t gridX,
-                                   intptr_t gridY, intptr_t gridZ,
-                                   intptr_t blockX, intptr_t blockY,
-                                   intptr_t blockZ, int32_t smem, void *stream,
-                                   void **params, void **extra) {
-  return reportErrorIfAny(
-      cuLaunchKernel(reinterpret_cast<CUfunction>(function), gridX, gridY,
-                     gridZ, blockX, blockY, blockZ, smem,
-                     reinterpret_cast<CUstream>(stream), params, extra),
-      "LaunchKernel");
-}
-
-extern "C" void *mcuGetStreamHelper() {
-  CUstream stream;
-  reportErrorIfAny(cuStreamCreate(&stream, CU_STREAM_DEFAULT), "StreamCreate");
-  return stream;
-}
-
-extern "C" int32_t mcuStreamSynchronize(void *stream) {
-  return reportErrorIfAny(
-      cuStreamSynchronize(reinterpret_cast<CUstream>(stream)), "StreamSync");
-}
-
-/// Helper functions for writing mlir example code
-
-// Allows to register byte array with the CUDA runtime. Helpful until we have
-// transfer functions implemented.
-extern "C" void mcuMemHostRegister(void *ptr, uint64_t sizeBytes) {
-  reportErrorIfAny(cuMemHostRegister(ptr, sizeBytes, /*flags=*/0),
-                   "MemHostRegister");
-}
-
-// A struct that corresponds to how MLIR represents memrefs.
-template <typename T, int N> struct MemRefType {
-  T *basePtr;
-  T *data;
-  int64_t offset;
-  int64_t sizes[N];
-  int64_t strides[N];
-};
-
-// Allows to register a MemRef with the CUDA runtime. Initializes array with
-// value. Helpful until we have transfer functions implemented.
-template <typename T, int N>
-void mcuMemHostRegisterMemRef(const MemRefType<T, N> *arg, T value) {
-  auto count = std::accumulate(arg->sizes, arg->sizes + N, 1,
-                               std::multiplies<int64_t>());
-  std::fill_n(arg->data, count, value);
-  mcuMemHostRegister(arg->data, count * sizeof(T));
-}
-extern "C" void
-mcuMemHostRegisterMemRef1dFloat(const MemRefType<float, 1> *arg) {
-  mcuMemHostRegisterMemRef(arg, 1.23f);
-}
-extern "C" void
-mcuMemHostRegisterMemRef3dFloat(const MemRefType<float, 3> *arg) {
-  mcuMemHostRegisterMemRef(arg, 1.23f);
-}
diff --git a/third_party/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/third_party/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
deleted file mode 100644
index c1ca4ebd8e1..00000000000
--- a/third_party/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-//===- mlir-cpu-runner.cpp - MLIR CPU Execution Driver---------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is a command line utility that executes an MLIR file on the GPU by
-// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the
-// latter.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-
-#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
-#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/GPU/Passes.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Module.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/JitRunner.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-#include "cuda.h"
-
-using namespace mlir;
-
-inline void emit_cuda_error(const llvm::Twine &message, const char *buffer,
-                            CUresult error, Location loc) {
-  emitError(loc, message.concat(" failed with error code ")
-                     .concat(llvm::Twine{error})
-                     .concat("[")
-                     .concat(buffer)
-                     .concat("]"));
-}
-
-#define RETURN_ON_CUDA_ERROR(expr, msg)                                        \
-  {                                                                            \
-    auto _cuda_error = (expr);                                                 \
-    if (_cuda_error != CUDA_SUCCESS) {                                         \
-      emit_cuda_error(msg, jitErrorBuffer, _cuda_error, loc);                  \
-      return {};                                                               \
-    }                                                                          \
-  }
-
-OwnedCubin compilePtxToCubin(const std::string ptx, Location loc,
-                             StringRef name) {
-  char jitErrorBuffer[4096] = {0};
-
-  RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit");
-
-  // Linking requires a device context.
-  CUdevice device;
-  RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0), "cuDeviceGet");
-  CUcontext context;
-  RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device), "cuCtxCreate");
-  CUlinkState linkState;
-
-  CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
-                               CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
-  void *jitOptionsVals[] = {jitErrorBuffer,
-                            reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
-
-  RETURN_ON_CUDA_ERROR(cuLinkCreate(2,              /* number of jit options */
-                                    jitOptions,     /* jit options */
-                                    jitOptionsVals, /* jit option values */
-                                    &linkState),
-                       "cuLinkCreate");
-
-  RETURN_ON_CUDA_ERROR(
-      cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
-                    const_cast<void *>(static_cast<const void *>(ptx.c_str())),
-                    ptx.length(), name.data(), /* kernel name */
-                    0,                         /* number of jit options */
-                    nullptr,                   /* jit options */
-                    nullptr                    /* jit option values */
-                    ),
-      "cuLinkAddData");
-
-  void *cubinData;
-  size_t cubinSize;
-  RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize),
-                       "cuLinkComplete");
-
-  char *cubinAsChar = static_cast<char *>(cubinData);
-  OwnedCubin result =
-      std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
-
-  // This will also destroy the cubin data.
-  RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState), "cuLinkDestroy");
-
-  return result;
-}
-
-static LogicalResult runMLIRPasses(ModuleOp m) {
-  PassManager pm(m.getContext());
-  applyPassManagerCLOptions(pm);
-
-  pm.addPass(createGpuKernelOutliningPass());
-  auto &kernelPm = pm.nest<ModuleOp>();
-  kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
-  kernelPm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));
-  pm.addPass(createLowerToLLVMPass());
-  pm.addPass(createConvertGpuLaunchFuncToCudaCallsPass());
-
-  return pm.run(m);
-}
-
-int main(int argc, char **argv) {
-  registerPassManagerCLOptions();
-  return mlir::JitRunnerMain(argc, argv, &runMLIRPasses);
-}
diff --git a/third_party/mlir/tools/mlir-opt/CMakeLists.txt b/third_party/mlir/tools/mlir-opt/CMakeLists.txt
deleted file mode 100644
index b30d7e39ce8..00000000000
--- a/third_party/mlir/tools/mlir-opt/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
-  null.cpp
-)
-
-set(LIB_LIBS
-  MLIRAnalysis
-  MLIRLLVMIR
-  MLIRParser
-  MLIRPass
-  MLIRTransforms
-  MLIRSupport
-)
-add_llvm_library(MLIRMlirOptLib
-  mlir-opt.cpp
-)
-target_link_libraries(MLIRMlirOptLib ${LIB_LIBS})
-
-set(LIBS
-  MLIRAnalysis
-  MLIRAffineOps
-  MLIRAffineToStandard
-  MLIRLoopsToGPU
-  MLIRLinalgToLLVM
-
-  MLIRLoopToStandard
-  MLIREDSC
-  MLIRFxpMathOps
-  MLIRGPU
-  MLIRGPUtoNVVMTransforms
-  MLIRGPUtoROCDLTransforms
-  MLIRGPUtoSPIRVTransforms
-  MLIRLinalg
-  MLIRLLVMIR
-  MLIRLoopOps
-  MLIRNVVMIR
-  MLIROptMain
-  MLIRParser
-  MLIRPass
-  MLIRQuantizerTransforms
-  MLIRQuantOps
-  MLIRROCDLIR
-  MLIRSPIRV
-  MLIRStandardToSPIRVTransforms
-  MLIRSPIRVTransforms
-  MLIRStandardOps
-  MLIRStandardToLLVM
-  MLIRTransforms
-  MLIRTestDialect
-  MLIRTestIR
-  MLIRTestPass
-  MLIRTestTransforms
-  MLIRSupport
-  MLIRVectorOps
-  MLIRVectorToLLVM
-  MLIRVectorToLoops
-)
-if(MLIR_CUDA_CONVERSIONS_ENABLED)
-  list(APPEND LIBS
-    MLIRGPUtoCUDATransforms
-  )
-endif()
-add_llvm_tool(mlir-opt
- mlir-opt.cpp
-)
-llvm_update_compile_flags(mlir-opt)
-whole_archive_link(mlir-opt ${LIBS})
-target_link_libraries(mlir-opt PRIVATE MLIRIR MLIRMlirOptLib ${LIBS} LLVMSupport)
diff --git a/third_party/mlir/tools/mlir-opt/mlir-opt.cpp b/third_party/mlir/tools/mlir-opt/mlir-opt.cpp
deleted file mode 100644
index d01f66d4e0b..00000000000
--- a/third_party/mlir/tools/mlir-opt/mlir-opt.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- mlir-opt.cpp - MLIR Optimizer Driver -------------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// Main entry function for mlir-opt for when built as standalone binary.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/FileUtilities.h"
-#include "mlir/Support/MlirOptMain.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace llvm;
-using namespace mlir;
-
-static cl::opt<std::string>
-    inputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
-
-static cl::opt<std::string> outputFilename("o", cl::desc("Output filename"),
-                                           cl::value_desc("filename"),
-                                           cl::init("-"));
-
-static cl::opt<bool>
-    splitInputFile("split-input-file",
-                   cl::desc("Split the input file into pieces and process each "
-                            "chunk independently"),
-                   cl::init(false));
-
-static cl::opt<bool>
-    verifyDiagnostics("verify-diagnostics",
-                      cl::desc("Check that emitted diagnostics match "
-                               "expected-* lines on the corresponding line"),
-                      cl::init(false));
-
-static cl::opt<bool>
-    verifyPasses("verify-each",
-                 cl::desc("Run the verifier after each transformation pass"),
-                 cl::init(true));
-
-int main(int argc, char **argv) {
-  InitLLVM y(argc, argv);
-
-  // Register any pass manager command line options.
-  registerPassManagerCLOptions();
-  PassPipelineCLParser passPipeline("", "Compiler passes to run");
-
-  // Parse pass names in main to ensure static initialization completed.
-  cl::ParseCommandLineOptions(argc, argv, "MLIR modular optimizer driver\n");
-
-  // Set up the input file.
-  std::string errorMessage;
-  auto file = openInputFile(inputFilename, &errorMessage);
-  if (!file) {
-    llvm::errs() << errorMessage << "\n";
-    return 1;
-  }
-
-  auto output = openOutputFile(outputFilename, &errorMessage);
-  if (!output) {
-    llvm::errs() << errorMessage << "\n";
-    exit(1);
-  }
-
-  return failed(MlirOptMain(output->os(), std::move(file), passPipeline,
-                            splitInputFile, verifyDiagnostics, verifyPasses));
-}
diff --git a/third_party/mlir/tools/mlir-tblgen/CMakeLists.txt b/third_party/mlir/tools/mlir-tblgen/CMakeLists.txt
deleted file mode 100644
index 31c23b8bd38..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  MLIRTableGen
-  Support
-  )
-
-add_tablegen(mlir-tblgen MLIR
-  EnumsGen.cpp
-  LLVMIRConversionGen.cpp
-  mlir-tblgen.cpp
-  OpDefinitionsGen.cpp
-  OpDocGen.cpp
-  OpInterfacesGen.cpp
-  ReferenceImplGen.cpp
-  RewriterGen.cpp
-  SPIRVUtilsGen.cpp
-  StructsGen.cpp
-  )
-set_target_properties(mlir-tblgen PROPERTIES FOLDER "Tablegenning")
diff --git a/third_party/mlir/tools/mlir-tblgen/DocGenUtilities.h b/third_party/mlir/tools/mlir-tblgen/DocGenUtilities.h
deleted file mode 100644
index b7617742727..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/DocGenUtilities.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- DocGenUtilities.h - MLIR doc gen utilities ---------------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file defines common utilities for generating documents from tablegen
-// structures.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TOOLS_MLIRTBLGEN_DOCGENUTILITIES_H_
-#define MLIR_TOOLS_MLIRTBLGEN_DOCGENUTILITIES_H_
-
-namespace llvm {
-class raw_ostream;
-class StringRef;
-} // namespace llvm
-
-namespace mlir {
-namespace tblgen {
-
-// Emit the description by aligning the text to the left per line (e.g.
-// removing the minimum indentation across the block).
-//
-// This expects that the description in the tablegen file is already formatted
-// in a way the user wanted but has some additional indenting due to being
-// nested.
-void emitDescription(llvm::StringRef description, llvm::raw_ostream &os);
-
-} // namespace tblgen
-} // namespace mlir
-
-#endif // MLIR_TOOLS_MLIRTBLGEN_DOCGENUTILITIES_H_
diff --git a/third_party/mlir/tools/mlir-tblgen/EnumsGen.cpp b/third_party/mlir/tools/mlir-tblgen/EnumsGen.cpp
deleted file mode 100644
index e278fdd80e8..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/EnumsGen.cpp
+++ /dev/null
@@ -1,442 +0,0 @@
-//===- EnumsGen.cpp - MLIR enum utility generator -------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// EnumsGen generates common utility functions for enums.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Attribute.h"
-#include "mlir/TableGen/GenInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using llvm::formatv;
-using llvm::isDigit;
-using llvm::raw_ostream;
-using llvm::Record;
-using llvm::RecordKeeper;
-using llvm::StringRef;
-using mlir::tblgen::EnumAttr;
-using mlir::tblgen::EnumAttrCase;
-
-static std::string makeIdentifier(StringRef str) {
-  if (!str.empty() && isDigit(static_cast<unsigned char>(str.front()))) {
-    std::string newStr = std::string("_") + str.str();
-    return newStr;
-  }
-  return str.str();
-}
-
-static void emitEnumClass(const Record &enumDef, StringRef enumName,
-                          StringRef underlyingType, StringRef description,
-                          const std::vector<EnumAttrCase> &enumerants,
-                          raw_ostream &os) {
-  os << "// " << description << "\n";
-  os << "enum class " << enumName;
-
-  if (!underlyingType.empty())
-    os << " : " << underlyingType;
-  os << " {\n";
-
-  for (const auto &enumerant : enumerants) {
-    auto symbol = makeIdentifier(enumerant.getSymbol());
-    auto value = enumerant.getValue();
-    if (value >= 0) {
-      os << formatv("  {0} = {1},\n", symbol, value);
-    } else {
-      os << formatv("  {0},\n", symbol);
-    }
-  }
-  os << "};\n\n";
-}
-
-static void emitDenseMapInfo(StringRef enumName, std::string underlyingType,
-                             StringRef cppNamespace, raw_ostream &os) {
-  std::string qualName = formatv("{0}::{1}", cppNamespace, enumName);
-  if (underlyingType.empty())
-    underlyingType = formatv("std::underlying_type<{0}>::type", qualName);
-
-  const char *const mapInfo = R"(
-namespace llvm {
-template<> struct DenseMapInfo<{0}> {{
-  using StorageInfo = llvm::DenseMapInfo<{1}>;
-
-  static inline {0} getEmptyKey() {{
-    return static_cast<{0}>(StorageInfo::getEmptyKey());
-  }
-
-  static inline {0} getTombstoneKey() {{
-    return static_cast<{0}>(StorageInfo::getTombstoneKey());
-  }
-
-  static unsigned getHashValue(const {0} &val) {{
-    return StorageInfo::getHashValue(static_cast<{1}>(val));
-  }
-
-  static bool isEqual(const {0} &lhs, const {0} &rhs) {{
-    return lhs == rhs;
-  }
-};
-})";
-  os << formatv(mapInfo, qualName, underlyingType);
-  os << "\n\n";
-}
-
-static void emitMaxValueFn(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef maxEnumValFnName = enumAttr.getMaxEnumValFnName();
-  auto enumerants = enumAttr.getAllCases();
-
-  unsigned maxEnumVal = 0;
-  for (const auto &enumerant : enumerants) {
-    int64_t value = enumerant.getValue();
-    // Avoid generating the max value function if there is an enumerant without
-    // explicit value.
-    if (value < 0)
-      return;
-
-    maxEnumVal = std::max(maxEnumVal, static_cast<unsigned>(value));
-  }
-
-  // Emit the function to return the max enum value
-  os << formatv("inline constexpr unsigned {0}() {{\n", maxEnumValFnName);
-  os << formatv("  return {0};\n", maxEnumVal);
-  os << "}\n\n";
-}
-
-// Returns the EnumAttrCase whose value is zero if exists; returns llvm::None
-// otherwise.
-static llvm::Optional<EnumAttrCase>
-getAllBitsUnsetCase(llvm::ArrayRef<EnumAttrCase> cases) {
-  for (auto attrCase : cases) {
-    if (attrCase.getValue() == 0)
-      return attrCase;
-  }
-  return llvm::None;
-}
-
-// Emits the following inline function for bit enums:
-//
-// inline <enum-type> operator|(<enum-type> a, <enum-type> b);
-// inline <enum-type> operator&(<enum-type> a, <enum-type> b);
-// inline <enum-type> bitEnumContains(<enum-type> a, <enum-type> b);
-static void emitOperators(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  std::string underlyingType = enumAttr.getUnderlyingType();
-  os << formatv("inline {0} operator|({0} lhs, {0} rhs) {{\n", enumName)
-     << formatv("  return static_cast<{0}>("
-                "static_cast<{1}>(lhs) | static_cast<{1}>(rhs));\n",
-                enumName, underlyingType)
-     << "}\n";
-  os << formatv("inline {0} operator&({0} lhs, {0} rhs) {{\n", enumName)
-     << formatv("  return static_cast<{0}>("
-                "static_cast<{1}>(lhs) & static_cast<{1}>(rhs));\n",
-                enumName, underlyingType)
-     << "}\n";
-  os << formatv(
-            "inline bool bitEnumContains({0} bits, {0} bit) {{\n"
-            "  return (static_cast<{1}>(bits) & static_cast<{1}>(bit)) != 0;\n",
-            enumName, underlyingType)
-     << "}\n";
-}
-
-static void emitSymToStrFnForIntEnum(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  StringRef symToStrFnName = enumAttr.getSymbolToStringFnName();
-  StringRef symToStrFnRetType = enumAttr.getSymbolToStringFnRetType();
-  auto enumerants = enumAttr.getAllCases();
-
-  os << formatv("{2} {1}({0} val) {{\n", enumName, symToStrFnName,
-                symToStrFnRetType);
-  os << "  switch (val) {\n";
-  for (const auto &enumerant : enumerants) {
-    auto symbol = enumerant.getSymbol();
-    os << formatv("    case {0}::{1}: return \"{2}\";\n", enumName,
-                  makeIdentifier(symbol), symbol);
-  }
-  os << "  }\n";
-  os << "  return \"\";\n";
-  os << "}\n\n";
-}
-
-static void emitSymToStrFnForBitEnum(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  StringRef symToStrFnName = enumAttr.getSymbolToStringFnName();
-  StringRef symToStrFnRetType = enumAttr.getSymbolToStringFnRetType();
-  StringRef separator = enumDef.getValueAsString("separator");
-  auto enumerants = enumAttr.getAllCases();
-  auto allBitsUnsetCase = getAllBitsUnsetCase(enumerants);
-
-  os << formatv("{2} {1}({0} symbol) {{\n", enumName, symToStrFnName,
-                symToStrFnRetType);
-
-  os << formatv("  auto val = static_cast<{0}>(symbol);\n",
-                enumAttr.getUnderlyingType());
-  if (allBitsUnsetCase) {
-    os << "  // Special case for all bits unset.\n";
-    os << formatv("  if (val == 0) return \"{0}\";\n\n",
-                  allBitsUnsetCase->getSymbol());
-  }
-  os << "  llvm::SmallVector<llvm::StringRef, 2> strs;\n";
-  for (const auto &enumerant : enumerants) {
-    // Skip the special enumerant for None.
-    if (auto val = enumerant.getValue())
-      os << formatv("  if ({0}u & val) {{ strs.push_back(\"{1}\"); "
-                    "val &= ~{0}u; }\n",
-                    val, enumerant.getSymbol());
-  }
-  // If we have unknown bit set, return an empty string to signal errors.
-  os << "\n  if (val) return \"\";\n";
-  os << formatv("  return llvm::join(strs, \"{0}\");\n", separator);
-
-  os << "}\n\n";
-}
-
-static void emitStrToSymFnForIntEnum(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  StringRef strToSymFnName = enumAttr.getStringToSymbolFnName();
-  auto enumerants = enumAttr.getAllCases();
-
-  os << formatv("llvm::Optional<{0}> {1}(llvm::StringRef str) {{\n", enumName,
-                strToSymFnName);
-  os << formatv("  return llvm::StringSwitch<llvm::Optional<{0}>>(str)\n",
-                enumName);
-  for (const auto &enumerant : enumerants) {
-    auto symbol = enumerant.getSymbol();
-    os << formatv("      .Case(\"{1}\", {0}::{2})\n", enumName, symbol,
-                  makeIdentifier(symbol));
-  }
-  os << "      .Default(llvm::None);\n";
-  os << "}\n";
-}
-
-static void emitStrToSymFnForBitEnum(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  std::string underlyingType = enumAttr.getUnderlyingType();
-  StringRef strToSymFnName = enumAttr.getStringToSymbolFnName();
-  StringRef separator = enumDef.getValueAsString("separator");
-  auto enumerants = enumAttr.getAllCases();
-  auto allBitsUnsetCase = getAllBitsUnsetCase(enumerants);
-
-  os << formatv("llvm::Optional<{0}> {1}(llvm::StringRef str) {{\n", enumName,
-                strToSymFnName);
-
-  if (allBitsUnsetCase) {
-    os << "  // Special case for all bits unset.\n";
-    StringRef caseSymbol = allBitsUnsetCase->getSymbol();
-    os << formatv("  if (str == \"{1}\") return {0}::{2};\n\n", enumName,
-                  caseSymbol, makeIdentifier(caseSymbol));
-  }
-
-  // Split the string to get symbols for all the bits.
-  os << "  llvm::SmallVector<llvm::StringRef, 2> symbols;\n";
-  os << formatv("  str.split(symbols, \"{0}\");\n\n", separator);
-
-  os << formatv("  {0} val = 0;\n", underlyingType);
-  os << "  for (auto symbol : symbols) {\n";
-
-  // Convert each symbol to the bit ordinal and set the corresponding bit.
-  os << formatv(
-      "    auto bit = llvm::StringSwitch<llvm::Optional<{0}>>(symbol)\n",
-      underlyingType);
-  for (const auto &enumerant : enumerants) {
-    // Skip the special enumerant for None.
-    if (auto val = enumerant.getValue())
-      os.indent(6) << formatv(".Case(\"{0}\", {1})\n", enumerant.getSymbol(),
-                              val);
-  }
-  os.indent(6) << ".Default(llvm::None);\n";
-
-  os << "    if (bit) { val |= *bit; } else { return llvm::None; }\n";
-  os << "  }\n";
-
-  os << formatv("  return static_cast<{0}>(val);\n", enumName);
-  os << "}\n\n";
-}
-
-static void emitUnderlyingToSymFnForIntEnum(const Record &enumDef,
-                                            raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  std::string underlyingType = enumAttr.getUnderlyingType();
-  StringRef underlyingToSymFnName = enumAttr.getUnderlyingToSymbolFnName();
-  auto enumerants = enumAttr.getAllCases();
-
-  // Avoid generating the underlying value to symbol conversion function if
-  // there is an enumerant without explicit value.
-  if (llvm::any_of(enumerants, [](EnumAttrCase enumerant) {
-        return enumerant.getValue() < 0;
-      }))
-    return;
-
-  os << formatv("llvm::Optional<{0}> {1}({2} value) {{\n", enumName,
-                underlyingToSymFnName,
-                underlyingType.empty() ? std::string("unsigned")
-                                       : underlyingType)
-     << "  switch (value) {\n";
-  for (const auto &enumerant : enumerants) {
-    auto symbol = enumerant.getSymbol();
-    auto value = enumerant.getValue();
-    os << formatv("  case {0}: return {1}::{2};\n", value, enumName,
-                  makeIdentifier(symbol));
-  }
-  os << "  default: return llvm::None;\n"
-     << "  }\n"
-     << "}\n\n";
-}
-
-static void emitUnderlyingToSymFnForBitEnum(const Record &enumDef,
-                                            raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  std::string underlyingType = enumAttr.getUnderlyingType();
-  StringRef underlyingToSymFnName = enumAttr.getUnderlyingToSymbolFnName();
-  auto enumerants = enumAttr.getAllCases();
-  auto allBitsUnsetCase = getAllBitsUnsetCase(enumerants);
-
-  os << formatv("llvm::Optional<{0}> {1}({2} value) {{\n", enumName,
-                underlyingToSymFnName, underlyingType);
-  if (allBitsUnsetCase) {
-    os << "  // Special case for all bits unset.\n";
-    os << formatv("  if (value == 0) return {0}::{1};\n\n", enumName,
-                  makeIdentifier(allBitsUnsetCase->getSymbol()));
-  }
-  llvm::SmallVector<std::string, 8> values;
-  for (const auto &enumerant : enumerants) {
-    if (auto val = enumerant.getValue())
-      values.push_back(formatv("{0}u", val));
-  }
-  os << formatv("  if (value & ~({0})) return llvm::None;\n",
-                llvm::join(values, " | "));
-  os << formatv("  return static_cast<{0}>(value);\n", enumName);
-  os << "}\n";
-}
-
-static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef enumName = enumAttr.getEnumClassName();
-  StringRef cppNamespace = enumAttr.getCppNamespace();
-  std::string underlyingType = enumAttr.getUnderlyingType();
-  StringRef description = enumAttr.getDescription();
-  StringRef strToSymFnName = enumAttr.getStringToSymbolFnName();
-  StringRef symToStrFnName = enumAttr.getSymbolToStringFnName();
-  StringRef symToStrFnRetType = enumAttr.getSymbolToStringFnRetType();
-  StringRef underlyingToSymFnName = enumAttr.getUnderlyingToSymbolFnName();
-  auto enumerants = enumAttr.getAllCases();
-
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
-
-  // Emit the enum class definition
-  emitEnumClass(enumDef, enumName, underlyingType, description, enumerants, os);
-
-  // Emit conversion function declarations
-  if (llvm::all_of(enumerants, [](EnumAttrCase enumerant) {
-        return enumerant.getValue() >= 0;
-      })) {
-    os << formatv(
-        "llvm::Optional<{0}> {1}({2});\n", enumName, underlyingToSymFnName,
-        underlyingType.empty() ? std::string("unsigned") : underlyingType);
-  }
-  os << formatv("{2} {1}({0});\n", enumName, symToStrFnName, symToStrFnRetType);
-  os << formatv("llvm::Optional<{0}> {1}(llvm::StringRef);\n", enumName,
-                strToSymFnName);
-
-  if (enumAttr.isBitEnum()) {
-    emitOperators(enumDef, os);
-  } else {
-    emitMaxValueFn(enumDef, os);
-  }
-
-  for (auto ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
-
-  // Emit DenseMapInfo for this enum class
-  emitDenseMapInfo(enumName, underlyingType, cppNamespace, os);
-}
-
-static bool emitEnumDecls(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  llvm::emitSourceFileHeader("Enum Utility Declarations", os);
-
-  auto defs = recordKeeper.getAllDerivedDefinitions("EnumAttrInfo");
-  for (const auto *def : defs)
-    emitEnumDecl(*def, os);
-
-  return false;
-}
-
-static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
-  EnumAttr enumAttr(enumDef);
-  StringRef cppNamespace = enumAttr.getCppNamespace();
-
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
-
-  if (enumAttr.isBitEnum()) {
-    emitSymToStrFnForBitEnum(enumDef, os);
-    emitStrToSymFnForBitEnum(enumDef, os);
-    emitUnderlyingToSymFnForBitEnum(enumDef, os);
-  } else {
-    emitSymToStrFnForIntEnum(enumDef, os);
-    emitStrToSymFnForIntEnum(enumDef, os);
-    emitUnderlyingToSymFnForIntEnum(enumDef, os);
-  }
-
-  for (auto ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
-  os << "\n";
-}
-
-static bool emitEnumDefs(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  llvm::emitSourceFileHeader("Enum Utility Definitions", os);
-
-  auto defs = recordKeeper.getAllDerivedDefinitions("EnumAttrInfo");
-  for (const auto *def : defs)
-    emitEnumDef(*def, os);
-
-  return false;
-}
-
-// Registers the enum utility generator to mlir-tblgen.
-static mlir::GenRegistration
-    genEnumDecls("gen-enum-decls", "Generate enum utility declarations",
-                 [](const RecordKeeper &records, raw_ostream &os) {
-                   return emitEnumDecls(records, os);
-                 });
-
-// Registers the enum utility generator to mlir-tblgen.
-static mlir::GenRegistration
-    genEnumDefs("gen-enum-defs", "Generate enum utility definitions",
-                [](const RecordKeeper &records, raw_ostream &os) {
-                  return emitEnumDefs(records, os);
-                });
diff --git a/third_party/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp b/third_party/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp
deleted file mode 100644
index f4b1279f11e..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-//===- LLVMIRConversionGen.cpp - MLIR LLVM IR builder generator -----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file uses tablegen definitions of the LLVM IR Dialect operations to
-// generate the code building the LLVM IR from it.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/Operator.h"
-
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using namespace llvm;
-using namespace mlir;
-
-static bool emitError(const Twine &message) {
-  llvm::errs() << message << "\n";
-  return false;
-}
-
-namespace {
-// Helper structure to return a position of the substring in a string.
-struct StringLoc {
-  size_t pos;
-  size_t length;
-
-  // Take a substring identified by this location in the given string.
-  StringRef in(StringRef str) const { return str.substr(pos, length); }
-
-  // A location is invalid if its position is outside the string.
-  explicit operator bool() { return pos != std::string::npos; }
-};
-} // namespace
-
-// Find the next TableGen variable in the given pattern.  These variables start
-// with a `$` character and can contain alphanumeric characters or underscores.
-// Return the position of the variable in the pattern and its length, including
-// the `$` character.  The escape syntax `$$` is also detected and returned.
-static StringLoc findNextVariable(StringRef str) {
-  size_t startPos = str.find('$');
-  if (startPos == std::string::npos)
-    return {startPos, 0};
-
-  // If we see "$$", return immediately.
-  if (startPos != str.size() - 1 && str[startPos + 1] == '$')
-    return {startPos, 2};
-
-  // Otherwise, the symbol spans until the first character that is not
-  // alphanumeric or '_'.
-  size_t endPos = str.find_if_not([](char c) { return isAlnum(c) || c == '_'; },
-                                  startPos + 1);
-  if (endPos == std::string::npos)
-    endPos = str.size();
-
-  return {startPos, endPos - startPos};
-}
-
-// Check if `name` is the name of the variadic operand of `op`.  The variadic
-// operand can only appear at the last position in the list of operands.
-static bool isVariadicOperandName(const tblgen::Operator &op, StringRef name) {
-  unsigned numOperands = op.getNumOperands();
-  if (numOperands == 0)
-    return false;
-  const auto &operand = op.getOperand(numOperands - 1);
-  return operand.isVariadic() && operand.name == name;
-}
-
-// Check if `result` is a known name of a result of `op`.
-static bool isResultName(const tblgen::Operator &op, StringRef name) {
-  for (int i = 0, e = op.getNumResults(); i < e; ++i)
-    if (op.getResultName(i) == name)
-      return true;
-  return false;
-}
-
-// Check if `name` is a known name of an attribute of `op`.
-static bool isAttributeName(const tblgen::Operator &op, StringRef name) {
-  return llvm::any_of(
-      op.getAttributes(),
-      [name](const tblgen::NamedAttribute &attr) { return attr.name == name; });
-}
-
-// Check if `name` is a known name of an operand of `op`.
-static bool isOperandName(const tblgen::Operator &op, StringRef name) {
-  for (int i = 0, e = op.getNumOperands(); i < e; ++i)
-    if (op.getOperand(i).name == name)
-      return true;
-  return false;
-}
-
-// Emit to `os` the operator-name driven check and the call to LLVM IRBuilder
-// for one definition of a LLVM IR Dialect operation.  Return true on success.
-static bool emitOneBuilder(const Record &record, raw_ostream &os) {
-  auto op = tblgen::Operator(record);
-
-  if (!record.getValue("llvmBuilder"))
-    return emitError("no 'llvmBuilder' field for op " + op.getOperationName());
-
-  // Return early if there is no builder specified.
-  auto builderStrRef = record.getValueAsString("llvmBuilder");
-  if (builderStrRef.empty())
-    return true;
-
-  // Progressively create the builder string by replacing $-variables with
-  // value lookups.  Keep only the not-yet-traversed part of the builder pattern
-  // to avoid re-traversing the string multiple times.
-  std::string builder;
-  llvm::raw_string_ostream bs(builder);
-  while (auto loc = findNextVariable(builderStrRef)) {
-    auto name = loc.in(builderStrRef).drop_front();
-    // First, insert the non-matched part as is.
-    bs << builderStrRef.substr(0, loc.pos);
-    // Then, rewrite the name based on its kind.
-    bool isVariadicOperand = isVariadicOperandName(op, name);
-    if (isOperandName(op, name)) {
-      auto result = isVariadicOperand
-                        ? formatv("lookupValues(op.{0}())", name)
-                        : formatv("valueMapping.lookup(op.{0}())", name);
-      bs << result;
-    } else if (isAttributeName(op, name)) {
-      bs << formatv("op.{0}()", name);
-    } else if (isResultName(op, name)) {
-      bs << formatv("valueMapping[op.{0}()]", name);
-    } else if (name == "_resultType") {
-      bs << "op.getResult()->getType().cast<LLVM::LLVMType>()."
-            "getUnderlyingType()";
-    } else if (name == "_hasResult") {
-      bs << "opInst.getNumResults() == 1";
-    } else if (name == "_location") {
-      bs << "opInst.getLoc()";
-    } else if (name == "_numOperands") {
-      bs << "opInst.getNumOperands()";
-    } else if (name == "$") {
-      bs << '$';
-    } else {
-      return emitError(name + " is neither an argument nor a result of " +
-                       op.getOperationName());
-    }
-    // Finally, only keep the untraversed part of the string.
-    builderStrRef = builderStrRef.substr(loc.pos + loc.length);
-  }
-
-  // Output the check and the rewritten builder string.
-  os << "if (auto op = dyn_cast<" << op.getQualCppClassName()
-     << ">(opInst)) {\n";
-  os << bs.str() << builderStrRef << "\n";
-  os << "  return success();\n";
-  os << "}\n";
-
-  return true;
-}
-
-// Emit all builders.  Returns false on success because of the generator
-// registration requirements.
-static bool emitBuilders(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  for (const auto *def : recordKeeper.getAllDerivedDefinitions("LLVM_OpBase")) {
-    if (!emitOneBuilder(*def, os))
-      return true;
-  }
-  return false;
-}
-
-static mlir::GenRegistration
-    genLLVMIRConversions("gen-llvmir-conversions",
-                         "Generate LLVM IR conversions", emitBuilders);
diff --git a/third_party/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/third_party/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
deleted file mode 100644
index dd56458ccb3..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ /dev/null
@@ -1,1801 +0,0 @@
-//===- OpDefinitionsGen.cpp - MLIR op definitions generator ---------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// OpDefinitionsGen uses the description of operations to generate C++
-// definitions for ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/STLExtras.h"
-#include "mlir/TableGen/Format.h"
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/OpInterfaces.h"
-#include "mlir/TableGen/OpTrait.h"
-#include "mlir/TableGen/Operator.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-#define DEBUG_TYPE "mlir-tblgen-opdefgen"
-
-using namespace llvm;
-using namespace mlir;
-using namespace mlir::tblgen;
-
-static const char *const tblgenNamePrefix = "tblgen_";
-static const char *const generatedArgName = "tblgen_arg";
-static const char *const builderOpState = "tblgen_state";
-
-// The logic to calculate the actual value range for a declared operand/result
-// of an op with variadic operands/results. Note that this logic is not for
-// general use; it assumes all variadic operands/results must have the same
-// number of values.
-//
-// {0}: The list of whether each declared operand/result is variadic.
-// {1}: The total number of non-variadic operands/results.
-// {2}: The total number of variadic operands/results.
-// {3}: The total number of actual values.
-// {4}: The begin iterator of the actual values.
-// {5}: "operand" or "result".
-const char *sameVariadicSizeValueRangeCalcCode = R"(
-  bool isVariadic[] = {{{0}};
-  int prevVariadicCount = 0;
-  for (unsigned i = 0; i < index; ++i)
-    if (isVariadic[i]) ++prevVariadicCount;
-
-  // Calculate how many dynamic values a static variadic {5} corresponds to.
-  // This assumes all static variadic {5}s have the same dynamic value count.
-  int variadicSize = ({3} - {1}) / {2};
-  // `index` passed in as the parameter is the static index which counts each
-  // {5} (variadic or not) as size 1. So here for each previous static variadic
-  // {5}, we need to offset by (variadicSize - 1) to get where the dynamic
-  // value pack for this static {5} starts.
-  int offset = index + (variadicSize - 1) * prevVariadicCount;
-  int size = isVariadic[index] ? variadicSize : 1;
-
-  return {{std::next({4}, offset), std::next({4}, offset + size)};
-)";
-
-// The logic to calculate the actual value range for a declared operand/result
-// of an op with variadic operands/results. Note that this logic is assumes
-// the op has an attribute specifying the size of each operand/result segment
-// (variadic or not).
-//
-// {0}: The name of the attribute specifying the segment sizes.
-// {1}: The begin iterator of the actual values.
-const char *attrSizedSegmentValueRangeCalcCode = R"(
-  auto sizeAttr = getAttrOfType<DenseIntElementsAttr>("{0}");
-  unsigned start = 0;
-  for (unsigned i = 0; i < index; ++i)
-    start += (*(sizeAttr.begin() + i)).getZExtValue();
-  unsigned end = start + (*(sizeAttr.begin() + index)).getZExtValue();
-  return {{std::next({1}, start), std::next({1}, end)};
-)";
-
-static const char *const opCommentHeader = R"(
-//===----------------------------------------------------------------------===//
-// {0} {1}
-//===----------------------------------------------------------------------===//
-
-)";
-
-//===----------------------------------------------------------------------===//
-// Utility structs and functions
-//===----------------------------------------------------------------------===//
-
-// Returns whether the record has a value of the given name that can be returned
-// via getValueAsString.
-static inline bool hasStringAttribute(const Record &record,
-                                      StringRef fieldName) {
-  auto valueInit = record.getValueInit(fieldName);
-  return isa<CodeInit>(valueInit) || isa<StringInit>(valueInit);
-}
-
-static std::string getArgumentName(const Operator &op, int index) {
-  const auto &operand = op.getOperand(index);
-  if (!operand.name.empty())
-    return operand.name;
-  else
-    return formatv("{0}_{1}", generatedArgName, index);
-}
-
-// Returns true if we can use unwrapped value for the given `attr` in builders.
-static bool canUseUnwrappedRawValue(const tblgen::Attribute &attr) {
-  return attr.getReturnType() != attr.getStorageType() &&
-         // We need to wrap the raw value into an attribute in the builder impl
-         // so we need to make sure that the attribute specifies how to do that.
-         !attr.getConstBuilderTemplate().empty();
-}
-
-namespace {
-// Simple RAII helper for defining ifdef-undef-endif scopes.
-class IfDefScope {
-public:
-  IfDefScope(StringRef name, raw_ostream &os) : name(name), os(os) {
-    os << "#ifdef " << name << "\n"
-       << "#undef " << name << "\n\n";
-  }
-
-  ~IfDefScope() { os << "\n#endif  // " << name << "\n\n"; }
-
-private:
-  StringRef name;
-  raw_ostream &os;
-};
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// Classes for C++ code emission
-//===----------------------------------------------------------------------===//
-
-// We emit the op declaration and definition into separate files: *Ops.h.inc
-// and *Ops.cpp.inc. The former is to be included in the dialect *Ops.h and
-// the latter for dialect *Ops.cpp. This way provides a cleaner interface.
-//
-// In order to do this split, we need to track method signature and
-// implementation logic separately. Signature information is used for both
-// declaration and definition, while implementation logic is only for
-// definition. So we have the following classes for C++ code emission.
-
-namespace {
-// Class for holding the signature of an op's method for C++ code emission
-class OpMethodSignature {
-public:
-  OpMethodSignature(StringRef retType, StringRef name, StringRef params);
-
-  // Writes the signature as a method declaration to the given `os`.
-  void writeDeclTo(raw_ostream &os) const;
-  // Writes the signature as the start of a method definition to the given `os`.
-  // `namePrefix` is the prefix to be prepended to the method name (typically
-  // namespaces for qualifying the method definition).
-  void writeDefTo(raw_ostream &os, StringRef namePrefix) const;
-
-private:
-  // Returns true if the given C++ `type` ends with '&' or '*', or is empty.
-  static bool elideSpaceAfterType(StringRef type);
-
-  std::string returnType;
-  std::string methodName;
-  std::string parameters;
-};
-
-// Class for holding the body of an op's method for C++ code emission
-class OpMethodBody {
-public:
-  explicit OpMethodBody(bool declOnly);
-
-  OpMethodBody &operator<<(Twine content);
-  OpMethodBody &operator<<(int content);
-  OpMethodBody &operator<<(const FmtObjectBase &content);
-
-  void writeTo(raw_ostream &os) const;
-
-private:
-  // Whether this class should record method body.
-  bool isEffective;
-  std::string body;
-};
-
-// Class for holding an op's method for C++ code emission
-class OpMethod {
-public:
-  // Properties (qualifiers) of class methods. Bitfield is used here to help
-  // querying properties.
-  enum Property {
-    MP_None = 0x0,
-    MP_Static = 0x1,      // Static method
-    MP_Constructor = 0x2, // Constructor
-    MP_Private = 0x4,     // Private method
-  };
-
-  OpMethod(StringRef retType, StringRef name, StringRef params,
-           Property property, bool declOnly);
-
-  OpMethodBody &body();
-
-  // Returns true if this is a static method.
-  bool isStatic() const;
-
-  // Returns true if this is a private method.
-  bool isPrivate() const;
-
-  // Writes the method as a declaration to the given `os`.
-  void writeDeclTo(raw_ostream &os) const;
-  // Writes the method as a definition to the given `os`. `namePrefix` is the
-  // prefix to be prepended to the method name (typically namespaces for
-  // qualifying the method definition).
-  void writeDefTo(raw_ostream &os, StringRef namePrefix) const;
-
-private:
-  Property properties;
-  // Whether this method only contains a declaration.
-  bool isDeclOnly;
-  OpMethodSignature methodSignature;
-  OpMethodBody methodBody;
-};
-
-// A class used to emit C++ classes from Tablegen.  Contains a list of public
-// methods and a list of private fields to be emitted.
-class Class {
-public:
-  explicit Class(StringRef name);
-
-  // Creates a new method in this class.
-  OpMethod &newMethod(StringRef retType, StringRef name, StringRef params = "",
-                      OpMethod::Property = OpMethod::MP_None,
-                      bool declOnly = false);
-
-  OpMethod &newConstructor(StringRef params = "", bool declOnly = false);
-
-  // Creates a new field in this class.
-  void newField(StringRef type, StringRef name, StringRef defaultValue = "");
-
-  // Writes this op's class as a declaration to the given `os`.
-  void writeDeclTo(raw_ostream &os) const;
-  // Writes the method definitions in this op's class to the given `os`.
-  void writeDefTo(raw_ostream &os) const;
-
-  // Returns the C++ class name of the op.
-  StringRef getClassName() const { return className; }
-
-protected:
-  std::string className;
-  SmallVector<OpMethod, 8> methods;
-  SmallVector<std::string, 4> fields;
-};
-
-// Class for holding an op for C++ code emission
-class OpClass : public Class {
-public:
-  explicit OpClass(StringRef name, StringRef extraClassDeclaration = "");
-
-  // Sets whether this OpClass should generate the using directive for its
-  // associate operand adaptor class.
-  void setHasOperandAdaptorClass(bool has);
-
-  // Adds an op trait.
-  void addTrait(Twine trait);
-
-  // Writes this op's class as a declaration to the given `os`.  Redefines
-  // Class::writeDeclTo to also emit traits and extra class declarations.
-  void writeDeclTo(raw_ostream &os) const;
-
-private:
-  StringRef extraClassDeclaration;
-  SmallVector<std::string, 4> traits;
-  bool hasOperandAdaptor;
-};
-} // end anonymous namespace
-
-OpMethodSignature::OpMethodSignature(StringRef retType, StringRef name,
-                                     StringRef params)
-    : returnType(retType), methodName(name), parameters(params) {}
-
-void OpMethodSignature::writeDeclTo(raw_ostream &os) const {
-  os << returnType << (elideSpaceAfterType(returnType) ? "" : " ") << methodName
-     << "(" << parameters << ")";
-}
-
-void OpMethodSignature::writeDefTo(raw_ostream &os,
-                                   StringRef namePrefix) const {
-  // We need to remove the default values for parameters in method definition.
-  // TODO(antiagainst): We are using '=' and ',' as delimiters for parameter
-  // initializers. This is incorrect for initializer list with more than one
-  // element. Change to a more robust approach.
-  auto removeParamDefaultValue = [](StringRef params) {
-    std::string result;
-    std::pair<StringRef, StringRef> parts;
-    while (!params.empty()) {
-      parts = params.split("=");
-      result.append(result.empty() ? "" : ", ");
-      result.append(parts.first);
-      params = parts.second.split(",").second;
-    }
-    return result;
-  };
-
-  os << returnType << (elideSpaceAfterType(returnType) ? "" : " ") << namePrefix
-     << (namePrefix.empty() ? "" : "::") << methodName << "("
-     << removeParamDefaultValue(parameters) << ")";
-}
-
-bool OpMethodSignature::elideSpaceAfterType(StringRef type) {
-  return type.empty() || type.endswith("&") || type.endswith("*");
-}
-
-OpMethodBody::OpMethodBody(bool declOnly) : isEffective(!declOnly) {}
-
-OpMethodBody &OpMethodBody::operator<<(Twine content) {
-  if (isEffective)
-    body.append(content.str());
-  return *this;
-}
-
-OpMethodBody &OpMethodBody::operator<<(int content) {
-  if (isEffective)
-    body.append(std::to_string(content));
-  return *this;
-}
-
-OpMethodBody &OpMethodBody::operator<<(const FmtObjectBase &content) {
-  if (isEffective)
-    body.append(content.str());
-  return *this;
-}
-
-void OpMethodBody::writeTo(raw_ostream &os) const {
-  auto bodyRef = StringRef(body).drop_while([](char c) { return c == '\n'; });
-  os << bodyRef;
-  if (bodyRef.empty() || bodyRef.back() != '\n')
-    os << "\n";
-}
-
-OpMethod::OpMethod(StringRef retType, StringRef name, StringRef params,
-                   OpMethod::Property property, bool declOnly)
-    : properties(property), isDeclOnly(declOnly),
-      methodSignature(retType, name, params), methodBody(declOnly) {}
-
-OpMethodBody &OpMethod::body() { return methodBody; }
-
-bool OpMethod::isStatic() const { return properties & MP_Static; }
-
-bool OpMethod::isPrivate() const { return properties & MP_Private; }
-
-void OpMethod::writeDeclTo(raw_ostream &os) const {
-  os.indent(2);
-  if (isStatic())
-    os << "static ";
-  methodSignature.writeDeclTo(os);
-  os << ";";
-}
-
-void OpMethod::writeDefTo(raw_ostream &os, StringRef namePrefix) const {
-  if (isDeclOnly)
-    return;
-
-  methodSignature.writeDefTo(os, namePrefix);
-  os << " {\n";
-  methodBody.writeTo(os);
-  os << "}";
-}
-
-Class::Class(StringRef name) : className(name) {}
-
-OpMethod &Class::newMethod(StringRef retType, StringRef name, StringRef params,
-                           OpMethod::Property property, bool declOnly) {
-  methods.emplace_back(retType, name, params, property, declOnly);
-  return methods.back();
-}
-
-OpMethod &Class::newConstructor(StringRef params, bool declOnly) {
-  return newMethod("", getClassName(), params, OpMethod::MP_Constructor,
-                   declOnly);
-}
-
-void Class::newField(StringRef type, StringRef name, StringRef defaultValue) {
-  std::string varName = formatv("{0} {1}", type, name).str();
-  std::string field = defaultValue.empty()
-                          ? varName
-                          : formatv("{0} = {1}", varName, defaultValue).str();
-  fields.push_back(std::move(field));
-}
-
-void Class::writeDeclTo(raw_ostream &os) const {
-  bool hasPrivateMethod = false;
-  os << "class " << className << " {\n";
-  os << "public:\n";
-  for (const auto &method : methods) {
-    if (!method.isPrivate()) {
-      method.writeDeclTo(os);
-      os << '\n';
-    } else {
-      hasPrivateMethod = true;
-    }
-  }
-  os << '\n';
-  os << "private:\n";
-  if (hasPrivateMethod) {
-    for (const auto &method : methods) {
-      if (method.isPrivate()) {
-        method.writeDeclTo(os);
-        os << '\n';
-      }
-    }
-    os << '\n';
-  }
-  for (const auto &field : fields)
-    os.indent(2) << field << ";\n";
-  os << "};\n";
-}
-
-void Class::writeDefTo(raw_ostream &os) const {
-  for (const auto &method : methods) {
-    method.writeDefTo(os, className);
-    os << "\n\n";
-  }
-}
-
-OpClass::OpClass(StringRef name, StringRef extraClassDeclaration)
-    : Class(name), extraClassDeclaration(extraClassDeclaration),
-      hasOperandAdaptor(true) {}
-
-void OpClass::setHasOperandAdaptorClass(bool has) { hasOperandAdaptor = has; }
-
-// Adds the given trait to this op.
-void OpClass::addTrait(Twine trait) { traits.push_back(trait.str()); }
-
-void OpClass::writeDeclTo(raw_ostream &os) const {
-  os << "class " << className << " : public Op<" << className;
-  for (const auto &trait : traits)
-    os << ", " << trait;
-  os << "> {\npublic:\n";
-  os << "  using Op::Op;\n";
-  if (hasOperandAdaptor)
-    os << "  using OperandAdaptor = " << className << "OperandAdaptor;\n";
-
-  bool hasPrivateMethod = false;
-  for (const auto &method : methods) {
-    if (!method.isPrivate()) {
-      method.writeDeclTo(os);
-      os << "\n";
-    } else {
-      hasPrivateMethod = true;
-    }
-  }
-
-  // TODO: Add line control markers to make errors easier to debug.
-  if (!extraClassDeclaration.empty())
-    os << extraClassDeclaration << "\n";
-
-  if (hasPrivateMethod) {
-    os << "\nprivate:\n";
-    for (const auto &method : methods) {
-      if (method.isPrivate()) {
-        method.writeDeclTo(os);
-        os << "\n";
-      }
-    }
-  }
-
-  os << "};\n";
-}
-
-//===----------------------------------------------------------------------===//
-// Op emitter
-//===----------------------------------------------------------------------===//
-
-namespace {
-// Helper class to emit a record into the given output stream.
-class OpEmitter {
-public:
-  static void emitDecl(const Operator &op, raw_ostream &os);
-  static void emitDef(const Operator &op, raw_ostream &os);
-
-private:
-  OpEmitter(const Operator &op);
-
-  void emitDecl(raw_ostream &os);
-  void emitDef(raw_ostream &os);
-
-  // Generates the OpAsmOpInterface for this operation if possible.
-  void genOpAsmInterface();
-
-  // Generates the `getOperationName` method for this op.
-  void genOpNameGetter();
-
-  // Generates getters for the attributes.
-  void genAttrGetters();
-
-  // Generates getters for named operands.
-  void genNamedOperandGetters();
-
-  // Generates getters for named results.
-  void genNamedResultGetters();
-
-  // Generates getters for named regions.
-  void genNamedRegionGetters();
-
-  // Generates builder methods for the operation.
-  void genBuilder();
-
-  // Generates the build() method that takes each operand/attribute
-  // as a stand-alone parameter.
-  void genSeparateArgParamBuilder();
-
-  // Generates the build() method that takes each operand/attribute as a
-  // stand-alone parameter. The generated build() method uses first operand's
-  // type as all results' types.
-  void genUseOperandAsResultTypeSeparateParamBuilder();
-
-  // Generates the build() method that takes all operands/attributes
-  // collectively as one parameter. The generated build() method uses first
-  // operand's type as all results' types.
-  void genUseOperandAsResultTypeCollectiveParamBuilder();
-
-  // Generates the build() method that takes aggregate operands/attributes
-  // parameters. This build() method uses inferred types as result types.
-  // Requires: The type needs to be inferable via InferTypeOpInterface.
-  void genInferedTypeCollectiveParamBuilder();
-
-  // Generates the build() method that takes each operand/attribute as a
-  // stand-alone parameter. The generated build() method uses first attribute's
-  // type as all result's types.
-  void genUseAttrAsResultTypeBuilder();
-
-  // Generates the build() method that takes all result types collectively as
-  // one parameter. Similarly for operands and attributes.
-  void genCollectiveParamBuilder();
-
-  // The kind of parameter to generate for result types in builders.
-  enum class TypeParamKind {
-    None,       // No result type in parameter list.
-    Separate,   // A separate parameter for each result type.
-    Collective, // An ArrayRef<Type> for all result types.
-  };
-
-  // The kind of parameter to generate for attributes in builders.
-  enum class AttrParamKind {
-    WrappedAttr,    // A wrapped MLIR Attribute instance.
-    UnwrappedValue, // A raw value without MLIR Attribute wrapper.
-  };
-
-  // Builds the parameter list for build() method of this op. This method writes
-  // to `paramList` the comma-separated parameter list and updates
-  // `resultTypeNames` with the names for parameters for specifying result
-  // types. The given `typeParamKind` and `attrParamKind` controls how result
-  // types and attributes are placed in the parameter list.
-  void buildParamList(std::string &paramList,
-                      SmallVectorImpl<std::string> &resultTypeNames,
-                      TypeParamKind typeParamKind,
-                      AttrParamKind attrParamKind = AttrParamKind::WrappedAttr);
-
-  // Adds op arguments and regions into operation state for build() methods.
-  void genCodeForAddingArgAndRegionForBuilder(OpMethodBody &body,
-                                              bool isRawValueAttr = false);
-
-  // Generates canonicalizer declaration for the operation.
-  void genCanonicalizerDecls();
-
-  // Generates the folder declaration for the operation.
-  void genFolderDecls();
-
-  // Generates the parser for the operation.
-  void genParser();
-
-  // Generates the printer for the operation.
-  void genPrinter();
-
-  // Generates verify method for the operation.
-  void genVerifier();
-
-  // Generates verify statements for operands and results in the operation.
-  // The generated code will be attached to `body`.
-  void genOperandResultVerifier(OpMethodBody &body,
-                                Operator::value_range values,
-                                StringRef valueKind);
-
-  // Generates verify statements for regions in the operation.
-  // The generated code will be attached to `body`.
-  void genRegionVerifier(OpMethodBody &body);
-
-  // Generates the traits used by the object.
-  void genTraits();
-
-  // Generate the OpInterface methods.
-  void genOpInterfaceMethods();
-
-private:
-  // The TableGen record for this op.
-  // TODO(antiagainst,zinenko): OpEmitter should not have a Record directly,
-  // it should rather go through the Operator for better abstraction.
-  const Record &def;
-
-  // The wrapper operator class for querying information from this op.
-  Operator op;
-
-  // The C++ code builder for this op
-  OpClass opClass;
-
-  // The format context for verification code generation.
-  FmtContext verifyCtx;
-};
-} // end anonymous namespace
-
-OpEmitter::OpEmitter(const Operator &op)
-    : def(op.getDef()), op(op),
-      opClass(op.getCppClassName(), op.getExtraClassDeclaration()) {
-  verifyCtx.withOp("(*this->getOperation())");
-
-  genTraits();
-  // Generate C++ code for various op methods. The order here determines the
-  // methods in the generated file.
-  genOpAsmInterface();
-  genOpNameGetter();
-  genNamedOperandGetters();
-  genNamedResultGetters();
-  genNamedRegionGetters();
-  genAttrGetters();
-  genBuilder();
-  genParser();
-  genPrinter();
-  genVerifier();
-  genCanonicalizerDecls();
-  genFolderDecls();
-  genOpInterfaceMethods();
-}
-
-void OpEmitter::emitDecl(const Operator &op, raw_ostream &os) {
-  OpEmitter(op).emitDecl(os);
-}
-
-void OpEmitter::emitDef(const Operator &op, raw_ostream &os) {
-  OpEmitter(op).emitDef(os);
-}
-
-void OpEmitter::emitDecl(raw_ostream &os) { opClass.writeDeclTo(os); }
-
-void OpEmitter::emitDef(raw_ostream &os) { opClass.writeDefTo(os); }
-
-void OpEmitter::genAttrGetters() {
-  FmtContext fctx;
-  fctx.withBuilder("mlir::Builder(this->getContext())");
-
-  // Emit the derived attribute body.
-  auto emitDerivedAttr = [&](StringRef name, Attribute attr) {
-    auto &method = opClass.newMethod(attr.getReturnType(), name);
-    auto &body = method.body();
-    body << "  " << attr.getDerivedCodeBody() << "\n";
-  };
-
-  // Emit with return type specified.
-  auto emitAttrWithReturnType = [&](StringRef name, Attribute attr) {
-    auto &method = opClass.newMethod(attr.getReturnType(), name);
-    auto &body = method.body();
-    body << "  auto attr = " << name << "Attr();\n";
-    if (attr.hasDefaultValue()) {
-      // Returns the default value if not set.
-      // TODO: this is inefficient, we are recreating the attribute for every
-      // call. This should be set instead.
-      std::string defaultValue =
-          tgfmt(attr.getConstBuilderTemplate(), &fctx, attr.getDefaultValue());
-      body << "    if (!attr)\n      return "
-           << tgfmt(attr.getConvertFromStorageCall(),
-                    &fctx.withSelf(defaultValue))
-           << ";\n";
-    }
-    body << "  return "
-         << tgfmt(attr.getConvertFromStorageCall(), &fctx.withSelf("attr"))
-         << ";\n";
-  };
-
-  // Generate raw named accessor type. This is a wrapper class that allows
-  // referring to the attributes via accessors instead of having to use
-  // the string interface for better compile time verification.
-  auto emitAttrWithStorageType = [&](StringRef name, Attribute attr) {
-    auto &method =
-        opClass.newMethod(attr.getStorageType(), (name + "Attr").str());
-    auto &body = method.body();
-    body << "  return this->getAttr(\"" << name << "\").";
-    if (attr.isOptional() || attr.hasDefaultValue())
-      body << "dyn_cast_or_null<";
-    else
-      body << "cast<";
-    body << attr.getStorageType() << ">();";
-  };
-
-  for (auto &namedAttr : op.getAttributes()) {
-    const auto &name = namedAttr.name;
-    const auto &attr = namedAttr.attr;
-    if (attr.isDerivedAttr()) {
-      emitDerivedAttr(name, attr);
-    } else {
-      emitAttrWithStorageType(name, attr);
-      emitAttrWithReturnType(name, attr);
-    }
-  }
-}
-
-// Generates the named operand getter methods for the given Operator `op` and
-// puts them in `opClass`.  Uses `rangeType` as the return type of getters that
-// return a range of operands (individual operands are `Value *` and each
-// element in the range must also be `Value *`); use `rangeBeginCall` to get an
-// iterator to the beginning of the operand range; use `rangeSizeCall` to obtain
-// the number of operands. `getOperandCallPattern` contains the code necessary
-// to obtain a single operand whose position will be substituted instead of
-// "{0}" marker in the pattern.  Note that the pattern should work for any kind
-// of ops, in particular for one-operand ops that may not have the
-// `getOperand(unsigned)` method.
-static void generateNamedOperandGetters(const Operator &op, Class &opClass,
-                                        StringRef rangeType,
-                                        StringRef rangeBeginCall,
-                                        StringRef rangeSizeCall,
-                                        StringRef getOperandCallPattern) {
-  const int numOperands = op.getNumOperands();
-  const int numVariadicOperands = op.getNumVariadicOperands();
-  const int numNormalOperands = numOperands - numVariadicOperands;
-
-  const auto *sameVariadicSize =
-      op.getTrait("OpTrait::SameVariadicOperandSize");
-  const auto *attrSizedOperands =
-      op.getTrait("OpTrait::AttrSizedOperandSegments");
-
-  if (numVariadicOperands > 1 && !sameVariadicSize && !attrSizedOperands) {
-    PrintFatalError(op.getLoc(), "op has multiple variadic operands but no "
-                                 "specification over their sizes");
-  }
-
-  if (numVariadicOperands < 2 && attrSizedOperands) {
-    PrintFatalError(op.getLoc(), "op must have at least two variadic operands "
-                                 "to use 'AttrSizedOperandSegments' trait");
-  }
-
-  if (attrSizedOperands && sameVariadicSize) {
-    PrintFatalError(op.getLoc(),
-                    "op cannot have both 'AttrSizedOperandSegments' and "
-                    "'SameVariadicOperandSize' traits");
-  }
-
-  // First emit a "sink" getter method upon which we layer all nicer named
-  // getter methods.
-  auto &m = opClass.newMethod(rangeType, "getODSOperands", "unsigned index");
-
-  if (numVariadicOperands == 0) {
-    // We still need to match the return type, which is a range.
-    m.body() << "  return {std::next(" << rangeBeginCall
-             << ", index), std::next(" << rangeBeginCall << ", index + 1)};";
-  } else if (attrSizedOperands) {
-    m.body() << formatv(attrSizedSegmentValueRangeCalcCode,
-                        "operand_segment_sizes", rangeBeginCall);
-  } else {
-    // Because the op can have arbitrarily interleaved variadic and non-variadic
-    // operands, we need to embed a list in the "sink" getter method for
-    // calculation at run-time.
-    llvm::SmallVector<StringRef, 4> isVariadic;
-    isVariadic.reserve(numOperands);
-    for (int i = 0; i < numOperands; ++i) {
-      isVariadic.push_back(llvm::toStringRef(op.getOperand(i).isVariadic()));
-    }
-    std::string isVariadicList = llvm::join(isVariadic, ", ");
-
-    m.body() << formatv(sameVariadicSizeValueRangeCalcCode, isVariadicList,
-                        numNormalOperands, numVariadicOperands, rangeSizeCall,
-                        rangeBeginCall, "operand");
-  }
-
-  // Then we emit nicer named getter methods by redirecting to the "sink" getter
-  // method.
-
-  for (int i = 0; i != numOperands; ++i) {
-    const auto &operand = op.getOperand(i);
-    if (operand.name.empty())
-      continue;
-
-    if (operand.isVariadic()) {
-      auto &m = opClass.newMethod(rangeType, operand.name);
-      m.body() << "  return getODSOperands(" << i << ");";
-    } else {
-      auto &m = opClass.newMethod("Value *", operand.name);
-      m.body() << "  return *getODSOperands(" << i << ").begin();";
-    }
-  }
-}
-
-void OpEmitter::genNamedOperandGetters() {
-  if (op.getTrait("OpTrait::AttrSizedOperandSegments"))
-    opClass.setHasOperandAdaptorClass(false);
-
-  generateNamedOperandGetters(
-      op, opClass, /*rangeType=*/"Operation::operand_range",
-      /*rangeBeginCall=*/"getOperation()->operand_begin()",
-      /*rangeSizeCall=*/"getOperation()->getNumOperands()",
-      /*getOperandCallPattern=*/"getOperation()->getOperand({0})");
-}
-
-void OpEmitter::genNamedResultGetters() {
-  const int numResults = op.getNumResults();
-  const int numVariadicResults = op.getNumVariadicResults();
-  const int numNormalResults = numResults - numVariadicResults;
-
-  // If we have more than one variadic results, we need more complicated logic
-  // to calculate the value range for each result.
-
-  const auto *sameVariadicSize = op.getTrait("OpTrait::SameVariadicResultSize");
-  const auto *attrSizedResults =
-      op.getTrait("OpTrait::AttrSizedResultSegments");
-
-  if (numVariadicResults > 1 && !sameVariadicSize && !attrSizedResults) {
-    PrintFatalError(op.getLoc(), "op has multiple variadic results but no "
-                                 "specification over their sizes");
-  }
-
-  if (numVariadicResults < 2 && attrSizedResults) {
-    PrintFatalError(op.getLoc(), "op must have at least two variadic results "
-                                 "to use 'AttrSizedResultSegments' trait");
-  }
-
-  if (attrSizedResults && sameVariadicSize) {
-    PrintFatalError(op.getLoc(),
-                    "op cannot have both 'AttrSizedResultSegments' and "
-                    "'SameVariadicResultSize' traits");
-  }
-
-  auto &m = opClass.newMethod("Operation::result_range", "getODSResults",
-                              "unsigned index");
-
-  if (numVariadicResults == 0) {
-    m.body() << "  return {std::next(getOperation()->result_begin(), index), "
-                "std::next(getOperation()->result_begin(), index + 1)};";
-  } else if (attrSizedResults) {
-    m.body() << formatv(attrSizedSegmentValueRangeCalcCode,
-                        "result_segment_sizes",
-                        "getOperation()->result_begin()");
-  } else {
-    llvm::SmallVector<StringRef, 4> isVariadic;
-    isVariadic.reserve(numResults);
-    for (int i = 0; i < numResults; ++i) {
-      isVariadic.push_back(llvm::toStringRef(op.getResult(i).isVariadic()));
-    }
-    std::string isVariadicList = llvm::join(isVariadic, ", ");
-
-    m.body() << formatv(sameVariadicSizeValueRangeCalcCode, isVariadicList,
-                        numNormalResults, numVariadicResults,
-                        "getOperation()->getNumResults()",
-                        "getOperation()->result_begin()", "result");
-  }
-
-  for (int i = 0; i != numResults; ++i) {
-    const auto &result = op.getResult(i);
-    if (result.name.empty())
-      continue;
-
-    if (result.isVariadic()) {
-      auto &m = opClass.newMethod("Operation::result_range", result.name);
-      m.body() << "  return getODSResults(" << i << ");";
-    } else {
-      auto &m = opClass.newMethod("Value *", result.name);
-      m.body() << "  return *getODSResults(" << i << ").begin();";
-    }
-  }
-}
-
-void OpEmitter::genNamedRegionGetters() {
-  unsigned numRegions = op.getNumRegions();
-  for (unsigned i = 0; i < numRegions; ++i) {
-    const auto &region = op.getRegion(i);
-    if (!region.name.empty()) {
-      auto &m = opClass.newMethod("Region &", region.name);
-      m.body() << formatv("  return this->getOperation()->getRegion({0});", i);
-    }
-  }
-}
-
-static bool canGenerateUnwrappedBuilder(Operator &op) {
-  // If this op does not have native attributes at all, return directly to avoid
-  // redefining builders.
-  if (op.getNumNativeAttributes() == 0)
-    return false;
-
-  bool canGenerate = false;
-  // We are generating builders that take raw values for attributes. We need to
-  // make sure the native attributes have a meaningful "unwrapped" value type
-  // different from the wrapped mlir::Attribute type to avoid redefining
-  // builders. This checks for the op has at least one such native attribute.
-  for (int i = 0, e = op.getNumNativeAttributes(); i < e; ++i) {
-    NamedAttribute &namedAttr = op.getAttribute(i);
-    if (canUseUnwrappedRawValue(namedAttr.attr)) {
-      canGenerate = true;
-      break;
-    }
-  }
-  return canGenerate;
-}
-
-void OpEmitter::genSeparateArgParamBuilder() {
-  SmallVector<AttrParamKind, 2> attrBuilderType;
-  attrBuilderType.push_back(AttrParamKind::WrappedAttr);
-  if (canGenerateUnwrappedBuilder(op))
-    attrBuilderType.push_back(AttrParamKind::UnwrappedValue);
-
-  // Emit with separate builders with or without unwrapped attributes and/or
-  // inferring result type.
-  auto emit = [&](AttrParamKind attrType, TypeParamKind paramKind,
-                  bool inferType) {
-    std::string paramList;
-    llvm::SmallVector<std::string, 4> resultNames;
-    buildParamList(paramList, resultNames, paramKind, attrType);
-
-    auto &m =
-        opClass.newMethod("void", "build", paramList, OpMethod::MP_Static);
-    auto &body = m.body();
-    genCodeForAddingArgAndRegionForBuilder(
-        body, /*isRawValueAttr=*/attrType == AttrParamKind::UnwrappedValue);
-
-    // Push all result types to the operation state
-
-    if (inferType) {
-      // Generate builder that infers type too.
-      // TODO(jpienaar): Subsume this with general checking if type can be
-      // infered automatically.
-      // TODO(jpienaar): Expand to handle regions.
-      body << formatv(R"(
-        SmallVector<Type, 2> inferedReturnTypes;
-        if (succeeded({0}::inferReturnTypes({1}.location, {1}.operands,
-                      {1}.attributes, /*regions=*/{{}, inferedReturnTypes)))
-          {1}.addTypes(inferedReturnTypes);
-        else
-          llvm::report_fatal_error("Failed to infer result type(s).");)",
-                      opClass.getClassName(), builderOpState);
-      return;
-    }
-
-    switch (paramKind) {
-    case TypeParamKind::None:
-      return;
-    case TypeParamKind::Separate:
-      for (int i = 0, e = op.getNumResults(); i < e; ++i) {
-        body << "  " << builderOpState << ".addTypes(" << resultNames[i]
-             << ");\n";
-      }
-      return;
-    case TypeParamKind::Collective:
-      body << "  " << builderOpState << ".addTypes(resultTypes);\n";
-      return;
-    };
-    llvm_unreachable("unhandled TypeParamKind");
-  };
-
-  bool canInferType =
-      op.getTrait("InferTypeOpInterface::Trait") && op.getNumRegions() == 0;
-  for (auto attrType : attrBuilderType) {
-    emit(attrType, TypeParamKind::Separate, /*inferType=*/false);
-    if (canInferType)
-      emit(attrType, TypeParamKind::None, /*inferType=*/true);
-    // Emit separate arg build with collective type, unless there is only one
-    // variadic result, in which case the above would have already generated
-    // the same build method.
-    if (!(op.getNumResults() == 1 && op.getResult(0).isVariadic()))
-      emit(attrType, TypeParamKind::Collective, /*inferType=*/false);
-  }
-}
-
-void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() {
-  // If this op has a variadic result, we cannot generate this builder because
-  // we don't know how many results to create.
-  if (op.getNumVariadicResults() != 0)
-    return;
-
-  int numResults = op.getNumResults();
-
-  // Signature
-  std::string params =
-      std::string("Builder *, OperationState &") + builderOpState +
-      ", ValueRange operands, ArrayRef<NamedAttribute> attributes";
-  auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
-  auto &body = m.body();
-
-  // Operands
-  body << "  " << builderOpState << ".addOperands(operands);\n\n";
-
-  // Attributes
-  body << "  " << builderOpState << ".addAttributes(attributes);\n";
-
-  // Create the correct number of regions
-  if (int numRegions = op.getNumRegions()) {
-    for (int i = 0; i < numRegions; ++i)
-      m.body() << "  (void)" << builderOpState << ".addRegion();\n";
-  }
-
-  // Result types
-  SmallVector<std::string, 2> resultTypes(numResults, "operands[0]->getType()");
-  body << "  " << builderOpState << ".addTypes({"
-       << llvm::join(resultTypes, ", ") << "});\n\n";
-}
-
-void OpEmitter::genInferedTypeCollectiveParamBuilder() {
-  // TODO(jpienaar): Expand to support regions.
-  const char *params =
-      "Builder *builder, OperationState &{0}, "
-      "ValueRange operands, ArrayRef<NamedAttribute> attributes";
-  auto &m =
-      opClass.newMethod("void", "build", formatv(params, builderOpState).str(),
-                        OpMethod::MP_Static);
-  auto &body = m.body();
-  body << formatv(R"(
-    SmallVector<Type, 2> inferedReturnTypes;
-    if (succeeded({0}::inferReturnTypes({1}.location, operands, attributes,
-                  /*regions=*/{{}, inferedReturnTypes)))
-      build(builder, tblgen_state, inferedReturnTypes, operands, attributes);
-    else
-      llvm::report_fatal_error("Failed to infer result type(s).");)",
-                  opClass.getClassName(), builderOpState);
-}
-
-void OpEmitter::genUseOperandAsResultTypeSeparateParamBuilder() {
-  std::string paramList;
-  llvm::SmallVector<std::string, 4> resultNames;
-  buildParamList(paramList, resultNames, TypeParamKind::None);
-
-  auto &m = opClass.newMethod("void", "build", paramList, OpMethod::MP_Static);
-  genCodeForAddingArgAndRegionForBuilder(m.body());
-
-  auto numResults = op.getNumResults();
-  if (numResults == 0)
-    return;
-
-  // Push all result types to the operation state
-  const char *index = op.getOperand(0).isVariadic() ? ".front()" : "";
-  std::string resultType =
-      formatv("{0}{1}->getType()", getArgumentName(op, 0), index).str();
-  m.body() << "  " << builderOpState << ".addTypes({" << resultType;
-  for (int i = 1; i != numResults; ++i)
-    m.body() << ", " << resultType;
-  m.body() << "});\n\n";
-}
-
-void OpEmitter::genUseAttrAsResultTypeBuilder() {
-  std::string params =
-      std::string("Builder *, OperationState &") + builderOpState +
-      ", ValueRange operands, ArrayRef<NamedAttribute> attributes";
-  auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
-  auto &body = m.body();
-
-  // Push all result types to the operation state
-  std::string resultType;
-  const auto &namedAttr = op.getAttribute(0);
-
-  body << "  for (auto attr : attributes) {\n";
-  body << "    if (attr.first != \"" << namedAttr.name << "\") continue;\n";
-  if (namedAttr.attr.isTypeAttr()) {
-    resultType = "attr.second.cast<TypeAttr>().getValue()";
-  } else {
-    resultType = "attr.second.getType()";
-  }
-
-  // Operands
-  body << "  " << builderOpState << ".addOperands(operands);\n\n";
-  // Attributes
-  body << "  " << builderOpState << ".addAttributes(attributes);\n";
-
-  // Result types
-  SmallVector<std::string, 2> resultTypes(op.getNumResults(), resultType);
-  body << "    " << builderOpState << ".addTypes({"
-       << llvm::join(resultTypes, ", ") << "});\n";
-  body << "  }\n";
-}
-
-void OpEmitter::genBuilder() {
-  // Handle custom builders if provided.
-  // TODO(antiagainst): Create wrapper class for OpBuilder to hide the native
-  // TableGen API calls here.
-  {
-    auto *listInit = dyn_cast_or_null<ListInit>(def.getValueInit("builders"));
-    if (listInit) {
-      for (Init *init : listInit->getValues()) {
-        Record *builderDef = cast<DefInit>(init)->getDef();
-        StringRef params = builderDef->getValueAsString("params");
-        StringRef body = builderDef->getValueAsString("body");
-        bool hasBody = !body.empty();
-
-        auto &method =
-            opClass.newMethod("void", "build", params, OpMethod::MP_Static,
-                              /*declOnly=*/!hasBody);
-        if (hasBody)
-          method.body() << body;
-      }
-    }
-    if (op.skipDefaultBuilders()) {
-      if (!listInit || listInit->empty())
-        PrintFatalError(
-            op.getLoc(),
-            "default builders are skipped and no custom builders provided");
-      return;
-    }
-  }
-
-  // Generate default builders that requires all result type, operands, and
-  // attributes as parameters.
-
-  // We generate three classes of builders here:
-  // 1. one having a stand-alone parameter for each operand / attribute, and
-  genSeparateArgParamBuilder();
-  // 2. one having an aggregated parameter for all result types / operands /
-  //    attributes, and
-  genCollectiveParamBuilder();
-  // 3. one having a stand-alone parameter for each operand and attribute,
-  //    use the first operand or attribute's type as all result types
-  //    to facilitate different call patterns.
-  if (op.getNumVariadicResults() == 0) {
-    if (op.getTrait("OpTrait::SameOperandsAndResultType")) {
-      genUseOperandAsResultTypeSeparateParamBuilder();
-      genUseOperandAsResultTypeCollectiveParamBuilder();
-    }
-    if (op.getTrait("OpTrait::FirstAttrDerivedResultType"))
-      genUseAttrAsResultTypeBuilder();
-  }
-}
-
-void OpEmitter::genCollectiveParamBuilder() {
-  int numResults = op.getNumResults();
-  int numVariadicResults = op.getNumVariadicResults();
-  int numNonVariadicResults = numResults - numVariadicResults;
-
-  int numOperands = op.getNumOperands();
-  int numVariadicOperands = op.getNumVariadicOperands();
-  int numNonVariadicOperands = numOperands - numVariadicOperands;
-  // Signature
-  std::string params = std::string("Builder *, OperationState &") +
-                       builderOpState +
-                       ", ArrayRef<Type> resultTypes, ValueRange operands, "
-                       "ArrayRef<NamedAttribute> attributes";
-  auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
-  auto &body = m.body();
-
-  // Operands
-  if (numVariadicOperands == 0 || numNonVariadicOperands != 0)
-    body << "  assert(operands.size()"
-         << (numVariadicOperands != 0 ? " >= " : " == ")
-         << numNonVariadicOperands
-         << "u && \"mismatched number of parameters\");\n";
-  body << "  " << builderOpState << ".addOperands(operands);\n\n";
-
-  // Attributes
-  body << "  " << builderOpState << ".addAttributes(attributes);\n";
-
-  // Create the correct number of regions
-  if (int numRegions = op.getNumRegions()) {
-    for (int i = 0; i < numRegions; ++i)
-      m.body() << "  (void)" << builderOpState << ".addRegion();\n";
-  }
-
-  // Result types
-  if (numVariadicResults == 0 || numNonVariadicResults != 0)
-    body << "  assert(resultTypes.size()"
-         << (numVariadicResults != 0 ? " >= " : " == ") << numNonVariadicResults
-         << "u && \"mismatched number of return types\");\n";
-  body << "  " << builderOpState << ".addTypes(resultTypes);\n";
-
-  // Generate builder that infers type too.
-  // TODO(jpienaar): Subsume this with general checking if type can be infered
-  // automatically.
-  // TODO(jpienaar): Expand to handle regions.
-  if (op.getTrait("InferTypeOpInterface::Trait") && op.getNumRegions() == 0)
-    genInferedTypeCollectiveParamBuilder();
-}
-
-void OpEmitter::buildParamList(std::string &paramList,
-                               SmallVectorImpl<std::string> &resultTypeNames,
-                               TypeParamKind typeParamKind,
-                               AttrParamKind attrParamKind) {
-  resultTypeNames.clear();
-  auto numResults = op.getNumResults();
-  resultTypeNames.reserve(numResults);
-
-  paramList = "Builder *tblgen_builder, OperationState &";
-  paramList.append(builderOpState);
-
-  switch (typeParamKind) {
-  case TypeParamKind::None:
-    break;
-  case TypeParamKind::Separate: {
-    // Add parameters for all return types
-    for (int i = 0; i < numResults; ++i) {
-      const auto &result = op.getResult(i);
-      std::string resultName = result.name;
-      if (resultName.empty())
-        resultName = formatv("resultType{0}", i);
-
-      paramList.append(result.isVariadic() ? ", ArrayRef<Type> " : ", Type ");
-      paramList.append(resultName);
-
-      resultTypeNames.emplace_back(std::move(resultName));
-    }
-  } break;
-  case TypeParamKind::Collective: {
-    paramList.append(", ArrayRef<Type> resultTypes");
-    resultTypeNames.push_back("resultTypes");
-  } break;
-  }
-
-  // Add parameters for all arguments (operands and attributes).
-
-  int numOperands = 0;
-  int numAttrs = 0;
-
-  int defaultValuedAttrStartIndex = op.getNumArgs();
-  if (attrParamKind == AttrParamKind::UnwrappedValue) {
-    // Calculate the start index from which we can attach default values in the
-    // builder declaration.
-    for (int i = op.getNumArgs() - 1; i >= 0; --i) {
-      auto *namedAttr = op.getArg(i).dyn_cast<tblgen::NamedAttribute *>();
-      if (!namedAttr || !namedAttr->attr.hasDefaultValue())
-        break;
-
-      if (!canUseUnwrappedRawValue(namedAttr->attr))
-        break;
-
-      // Creating an APInt requires us to provide bitwidth, value, and
-      // signedness, which is complicated compared to others. Similarly
-      // for APFloat.
-      // TODO(b/144412160) Adjust the 'returnType' field of such attributes
-      // to support them.
-      StringRef retType = namedAttr->attr.getReturnType();
-      if (retType == "APInt" || retType == "APFloat")
-        break;
-
-      defaultValuedAttrStartIndex = i;
-    }
-  }
-
-  for (int i = 0, e = op.getNumArgs(); i < e; ++i) {
-    auto argument = op.getArg(i);
-    if (argument.is<tblgen::NamedTypeConstraint *>()) {
-      const auto &operand = op.getOperand(numOperands);
-      paramList.append(operand.isVariadic() ? ", ValueRange " : ", Value *");
-      paramList.append(getArgumentName(op, numOperands));
-      ++numOperands;
-    } else {
-      const auto &namedAttr = op.getAttribute(numAttrs);
-      const auto &attr = namedAttr.attr;
-      paramList.append(", ");
-
-      if (attr.isOptional())
-        paramList.append("/*optional*/");
-
-      switch (attrParamKind) {
-      case AttrParamKind::WrappedAttr:
-        paramList.append(attr.getStorageType());
-        break;
-      case AttrParamKind::UnwrappedValue:
-        if (canUseUnwrappedRawValue(attr)) {
-          paramList.append(attr.getReturnType());
-        } else {
-          paramList.append(attr.getStorageType());
-        }
-        break;
-      }
-      paramList.append(" ");
-      paramList.append(namedAttr.name);
-
-      // Attach default value if requested and possible.
-      if (attrParamKind == AttrParamKind::UnwrappedValue &&
-          i >= defaultValuedAttrStartIndex) {
-        bool isString = attr.getReturnType() == "StringRef";
-        paramList.append(" = ");
-        if (isString)
-          paramList.append("\"");
-        paramList.append(attr.getDefaultValue());
-        if (isString)
-          paramList.append("\"");
-      }
-      ++numAttrs;
-    }
-  }
-}
-
-void OpEmitter::genCodeForAddingArgAndRegionForBuilder(OpMethodBody &body,
-                                                       bool isRawValueAttr) {
-  // Push all operands to the result
-  for (int i = 0, e = op.getNumOperands(); i < e; ++i) {
-    body << "  " << builderOpState << ".addOperands(" << getArgumentName(op, i)
-         << ");\n";
-  }
-
-  // Push all attributes to the result
-  for (const auto &namedAttr : op.getAttributes()) {
-    auto &attr = namedAttr.attr;
-    if (!attr.isDerivedAttr()) {
-      bool emitNotNullCheck = attr.isOptional();
-      if (emitNotNullCheck) {
-        body << formatv("  if ({0}) ", namedAttr.name) << "{\n";
-      }
-      if (isRawValueAttr && canUseUnwrappedRawValue(attr)) {
-        // If this is a raw value, then we need to wrap it in an Attribute
-        // instance.
-        FmtContext fctx;
-        fctx.withBuilder("(*tblgen_builder)");
-        std::string value =
-            tgfmt(attr.getConstBuilderTemplate(), &fctx, namedAttr.name);
-        body << formatv("  {0}.addAttribute(\"{1}\", {2});\n", builderOpState,
-                        namedAttr.name, value);
-      } else {
-        body << formatv("  {0}.addAttribute(\"{1}\", {1});\n", builderOpState,
-                        namedAttr.name);
-      }
-      if (emitNotNullCheck) {
-        body << "  }\n";
-      }
-    }
-  }
-
-  // Create the correct number of regions
-  if (int numRegions = op.getNumRegions()) {
-    for (int i = 0; i < numRegions; ++i)
-      body << "  (void)" << builderOpState << ".addRegion();\n";
-  }
-}
-
-void OpEmitter::genCanonicalizerDecls() {
-  if (!def.getValueAsBit("hasCanonicalizer"))
-    return;
-
-  const char *const params =
-      "OwningRewritePatternList &results, MLIRContext *context";
-  opClass.newMethod("void", "getCanonicalizationPatterns", params,
-                    OpMethod::MP_Static, /*declOnly=*/true);
-}
-
-void OpEmitter::genFolderDecls() {
-  bool hasSingleResult =
-      op.getNumResults() == 1 && op.getNumVariadicResults() == 0;
-
-  if (def.getValueAsBit("hasFolder")) {
-    if (hasSingleResult) {
-      const char *const params = "ArrayRef<Attribute> operands";
-      opClass.newMethod("OpFoldResult", "fold", params, OpMethod::MP_None,
-                        /*declOnly=*/true);
-    } else {
-      const char *const params = "ArrayRef<Attribute> operands, "
-                                 "SmallVectorImpl<OpFoldResult> &results";
-      opClass.newMethod("LogicalResult", "fold", params, OpMethod::MP_None,
-                        /*declOnly=*/true);
-    }
-  }
-}
-
-void OpEmitter::genOpInterfaceMethods() {
-  for (const auto &trait : op.getTraits()) {
-    auto opTrait = dyn_cast<tblgen::InterfaceOpTrait>(&trait);
-    if (!opTrait || !opTrait->shouldDeclareMethods())
-      continue;
-    auto interface = opTrait->getOpInterface();
-    for (auto method : interface.getMethods()) {
-      // Don't declare if the method has a body.
-      if (method.getBody())
-        continue;
-      std::string args;
-      llvm::raw_string_ostream os(args);
-      mlir::interleaveComma(method.getArguments(), os,
-                            [&](const OpInterfaceMethod::Argument &arg) {
-                              os << arg.type << " " << arg.name;
-                            });
-      opClass.newMethod(method.getReturnType(), method.getName(), os.str(),
-                        method.isStatic() ? OpMethod::MP_Static
-                                          : OpMethod::MP_None,
-                        /*declOnly=*/true);
-    }
-  }
-}
-
-void OpEmitter::genParser() {
-  if (!hasStringAttribute(def, "parser"))
-    return;
-
-  auto &method = opClass.newMethod(
-      "ParseResult", "parse", "OpAsmParser &parser, OperationState &result",
-      OpMethod::MP_Static);
-  FmtContext fctx;
-  fctx.addSubst("cppClass", opClass.getClassName());
-  auto parser = def.getValueAsString("parser").ltrim().rtrim(" \t\v\f\r");
-  method.body() << "  " << tgfmt(parser, &fctx);
-}
-
-void OpEmitter::genPrinter() {
-  auto valueInit = def.getValueInit("printer");
-  CodeInit *codeInit = dyn_cast<CodeInit>(valueInit);
-  if (!codeInit)
-    return;
-
-  auto &method = opClass.newMethod("void", "print", "OpAsmPrinter &p");
-  FmtContext fctx;
-  fctx.addSubst("cppClass", opClass.getClassName());
-  auto printer = codeInit->getValue().ltrim().rtrim(" \t\v\f\r");
-  method.body() << "  " << tgfmt(printer, &fctx);
-}
-
-void OpEmitter::genVerifier() {
-  auto valueInit = def.getValueInit("verifier");
-  CodeInit *codeInit = dyn_cast<CodeInit>(valueInit);
-  bool hasCustomVerify = codeInit && !codeInit->getValue().empty();
-
-  auto &method = opClass.newMethod("LogicalResult", "verify", /*params=*/"");
-  auto &body = method.body();
-
-  // Populate substitutions for attributes and named operands and results.
-  for (const auto &namedAttr : op.getAttributes())
-    verifyCtx.addSubst(namedAttr.name,
-                       formatv("this->getAttr(\"{0}\")", namedAttr.name));
-  for (int i = 0, e = op.getNumOperands(); i < e; ++i) {
-    auto &value = op.getOperand(i);
-    // Skip from from first variadic operands for now. Else getOperand index
-    // used below doesn't match.
-    if (value.isVariadic())
-      break;
-    if (!value.name.empty())
-      verifyCtx.addSubst(
-          value.name, formatv("(*this->getOperation()->getOperand({0}))", i));
-  }
-  for (int i = 0, e = op.getNumResults(); i < e; ++i) {
-    auto &value = op.getResult(i);
-    // Skip from from first variadic results for now. Else getResult index used
-    // below doesn't match.
-    if (value.isVariadic())
-      break;
-    if (!value.name.empty())
-      verifyCtx.addSubst(value.name,
-                         formatv("(*this->getOperation()->getResult({0}))", i));
-  }
-
-  // Verify the attributes have the correct type.
-  for (const auto &namedAttr : op.getAttributes()) {
-    const auto &attr = namedAttr.attr;
-    if (attr.isDerivedAttr())
-      continue;
-
-    auto attrName = namedAttr.name;
-    // Prefix with `tblgen_` to avoid hiding the attribute accessor.
-    auto varName = tblgenNamePrefix + attrName;
-    body << formatv("  auto {0} = this->getAttr(\"{1}\");\n", varName,
-                    attrName);
-
-    bool allowMissingAttr = attr.hasDefaultValue() || attr.isOptional();
-    if (allowMissingAttr) {
-      // If the attribute has a default value, then only verify the predicate if
-      // set. This does effectively assume that the default value is valid.
-      // TODO: verify the debug value is valid (perhaps in debug mode only).
-      body << "  if (" << varName << ") {\n";
-    } else {
-      body << "  if (!" << varName
-           << ") return emitOpError(\"requires attribute '" << attrName
-           << "'\");\n  {\n";
-    }
-
-    auto attrPred = attr.getPredicate();
-    if (!attrPred.isNull()) {
-      body << tgfmt(
-          "    if (!($0)) return emitOpError(\"attribute '$1' "
-          "failed to satisfy constraint: $2\");\n",
-          /*ctx=*/nullptr,
-          tgfmt(attrPred.getCondition(), &verifyCtx.withSelf(varName)),
-          attrName, attr.getDescription());
-    }
-
-    body << "  }\n";
-  }
-
-  const char *code = R"(
-  auto sizeAttr = getAttrOfType<DenseIntElementsAttr>("{0}");
-  auto numElements = sizeAttr.getType().cast<ShapedType>().getNumElements();
-  if (numElements != {1}) {{
-    return emitOpError("'{0}' attribute for specifying {2} segments "
-                       "must have {1} elements");
-  }
-  )";
-
-  for (auto &trait : op.getTraits()) {
-    if (auto *t = dyn_cast<tblgen::PredOpTrait>(&trait)) {
-      body << tgfmt("  if (!($0)) {\n    "
-                    "return emitOpError(\"failed to verify that $1\");\n  }\n",
-                    &verifyCtx, tgfmt(t->getPredTemplate(), &verifyCtx),
-                    t->getDescription());
-    } else if (auto *t = dyn_cast<tblgen::NativeOpTrait>(&trait)) {
-      if (t->getTrait() == "OpTrait::AttrSizedOperandSegments") {
-        body << formatv(code, "operand_segment_sizes", op.getNumOperands(),
-                        "operand");
-      } else if (t->getTrait() == "OpTrait::AttrSizedResultSegments") {
-        body << formatv(code, "result_segment_sizes", op.getNumResults(),
-                        "result");
-      }
-    }
-  }
-
-  // These should happen after we verified the traits because
-  // getODSOperands()/getODSResults() may depend on traits (e.g.,
-  // AttrSizedOperandSegments/AttrSizedResultSegments).
-  genOperandResultVerifier(body, op.getOperands(), "operand");
-  genOperandResultVerifier(body, op.getResults(), "result");
-
-  genRegionVerifier(body);
-
-  if (hasCustomVerify) {
-    FmtContext fctx;
-    fctx.addSubst("cppClass", opClass.getClassName());
-    auto printer = codeInit->getValue().ltrim().rtrim(" \t\v\f\r");
-    body << "  " << tgfmt(printer, &fctx);
-  } else {
-    body << "  return mlir::success();\n";
-  }
-}
-
-void OpEmitter::genOperandResultVerifier(OpMethodBody &body,
-                                         Operator::value_range values,
-                                         StringRef valueKind) {
-  FmtContext fctx;
-
-  body << "  {\n";
-  body << "    unsigned index = 0; (void)index;\n";
-
-  for (auto staticValue : llvm::enumerate(values)) {
-    if (!staticValue.value().hasPredicate())
-      continue;
-
-    // Emit a loop to check all the dynamic values in the pack.
-    body << formatv("    for (Value *v : getODS{0}{1}s({2})) {{\n",
-                    // Capitalize the first letter to match the function name
-                    valueKind.substr(0, 1).upper(), valueKind.substr(1),
-                    staticValue.index());
-
-    auto constraint = staticValue.value().constraint;
-
-    body << "      (void)v;\n"
-         << "      if (!("
-         << tgfmt(constraint.getConditionTemplate(),
-                  &fctx.withSelf("v->getType()"))
-         << ")) {\n"
-         << formatv("        return emitOpError(\"{0} #\") << index "
-                    "<< \" must be {1}, but got \" << v->getType();\n",
-                    valueKind, constraint.getDescription())
-         << "      }\n" // if
-         << "      ++index;\n"
-         << "    }\n"; // for
-  }
-
-  body << "  }\n";
-}
-
-void OpEmitter::genRegionVerifier(OpMethodBody &body) {
-  unsigned numRegions = op.getNumRegions();
-
-  // Verify this op has the correct number of regions
-  body << formatv(
-      "  if (this->getOperation()->getNumRegions() != {0}) {\n    "
-      "return emitOpError(\"has incorrect number of regions: expected {0} but "
-      "found \") << this->getOperation()->getNumRegions();\n  }\n",
-      numRegions);
-
-  for (unsigned i = 0; i < numRegions; ++i) {
-    const auto &region = op.getRegion(i);
-
-    std::string name = formatv("#{0}", i);
-    if (!region.name.empty()) {
-      name += formatv(" ('{0}')", region.name);
-    }
-
-    auto getRegion = formatv("this->getOperation()->getRegion({0})", i).str();
-    auto constraint = tgfmt(region.constraint.getConditionTemplate(),
-                            &verifyCtx.withSelf(getRegion))
-                          .str();
-
-    body << formatv("  if (!({0})) {\n    "
-                    "return emitOpError(\"region {1} failed to verify "
-                    "constraint: {2}\");\n  }\n",
-                    constraint, name, region.constraint.getDescription());
-  }
-}
-
-void OpEmitter::genTraits() {
-  int numResults = op.getNumResults();
-  int numVariadicResults = op.getNumVariadicResults();
-
-  // Add return size trait.
-  if (numVariadicResults != 0) {
-    if (numResults == numVariadicResults)
-      opClass.addTrait("OpTrait::VariadicResults");
-    else
-      opClass.addTrait("OpTrait::AtLeastNResults<" +
-                       Twine(numResults - numVariadicResults) + ">::Impl");
-  } else {
-    switch (numResults) {
-    case 0:
-      opClass.addTrait("OpTrait::ZeroResult");
-      break;
-    case 1:
-      opClass.addTrait("OpTrait::OneResult");
-      break;
-    default:
-      opClass.addTrait("OpTrait::NResults<" + Twine(numResults) + ">::Impl");
-      break;
-    }
-  }
-
-  for (const auto &trait : op.getTraits()) {
-    if (auto opTrait = dyn_cast<tblgen::NativeOpTrait>(&trait))
-      opClass.addTrait(opTrait->getTrait());
-    else if (auto opTrait = dyn_cast<tblgen::InterfaceOpTrait>(&trait))
-      opClass.addTrait(opTrait->getTrait());
-  }
-
-  // Add variadic size trait and normal op traits.
-  int numOperands = op.getNumOperands();
-  int numVariadicOperands = op.getNumVariadicOperands();
-
-  // Add operand size trait.
-  if (numVariadicOperands != 0) {
-    if (numOperands == numVariadicOperands)
-      opClass.addTrait("OpTrait::VariadicOperands");
-    else
-      opClass.addTrait("OpTrait::AtLeastNOperands<" +
-                       Twine(numOperands - numVariadicOperands) + ">::Impl");
-  } else {
-    switch (numOperands) {
-    case 0:
-      opClass.addTrait("OpTrait::ZeroOperands");
-      break;
-    case 1:
-      opClass.addTrait("OpTrait::OneOperand");
-      break;
-    default:
-      opClass.addTrait("OpTrait::NOperands<" + Twine(numOperands) + ">::Impl");
-      break;
-    }
-  }
-}
-
-void OpEmitter::genOpNameGetter() {
-  auto &method = opClass.newMethod("StringRef", "getOperationName",
-                                   /*params=*/"", OpMethod::MP_Static);
-  method.body() << "  return \"" << op.getOperationName() << "\";\n";
-}
-
-void OpEmitter::genOpAsmInterface() {
-  // If the user only has one results or specifically added the Asm trait,
-  // then don't generate it for them. We specifically only handle multi result
-  // operations, because the name of a single result in the common case is not
-  // interesting(generally 'result'/'output'/etc.).
-  // TODO: We could also add a flag to allow operations to opt in to this
-  // generation, even if they only have a single operation.
-  int numResults = op.getNumResults();
-  if (numResults <= 1 || op.getTrait("OpAsmOpInterface::Trait"))
-    return;
-
-  SmallVector<StringRef, 4> resultNames(numResults);
-  for (int i = 0; i != numResults; ++i)
-    resultNames[i] = op.getResultName(i);
-
-  // Don't add the trait if none of the results have a valid name.
-  if (llvm::all_of(resultNames, [](StringRef name) { return name.empty(); }))
-    return;
-  opClass.addTrait("OpAsmOpInterface::Trait");
-
-  // Generate the right accessor for the number of results.
-  auto &method = opClass.newMethod("void", "getAsmResultNames",
-                                   "OpAsmSetValueNameFn setNameFn");
-  auto &body = method.body();
-  for (int i = 0; i != numResults; ++i) {
-    body << "  auto resultGroup" << i << " = getODSResults(" << i << ");\n"
-         << "  if (!llvm::empty(resultGroup" << i << "))\n"
-         << "    setNameFn(*resultGroup" << i << ".begin(), \""
-         << resultNames[i] << "\");\n";
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// OpOperandAdaptor emitter
-//===----------------------------------------------------------------------===//
-
-namespace {
-// Helper class to emit Op operand adaptors to an output stream.  Operand
-// adaptors are wrappers around ArrayRef<Value *> that provide named operand
-// getters identical to those defined in the Op.
-class OpOperandAdaptorEmitter {
-public:
-  static void emitDecl(const Operator &op, raw_ostream &os);
-  static void emitDef(const Operator &op, raw_ostream &os);
-
-private:
-  explicit OpOperandAdaptorEmitter(const Operator &op);
-
-  Class adapterClass;
-};
-} // end namespace
-
-OpOperandAdaptorEmitter::OpOperandAdaptorEmitter(const Operator &op)
-    : adapterClass(op.getCppClassName().str() + "OperandAdaptor") {
-  adapterClass.newField("ArrayRef<Value *>", "tblgen_operands");
-  auto &constructor = adapterClass.newConstructor("ArrayRef<Value *> values");
-  constructor.body() << "  tblgen_operands = values;\n";
-
-  generateNamedOperandGetters(op, adapterClass,
-                              /*rangeType=*/"ArrayRef<Value *>",
-                              /*rangeBeginCall=*/"tblgen_operands.begin()",
-                              /*rangeSizeCall=*/"tblgen_operands.size()",
-                              /*getOperandCallPattern=*/"tblgen_operands[{0}]");
-}
-
-void OpOperandAdaptorEmitter::emitDecl(const Operator &op, raw_ostream &os) {
-  OpOperandAdaptorEmitter(op).adapterClass.writeDeclTo(os);
-}
-
-void OpOperandAdaptorEmitter::emitDef(const Operator &op, raw_ostream &os) {
-  OpOperandAdaptorEmitter(op).adapterClass.writeDefTo(os);
-}
-
-// Emits the opcode enum and op classes.
-static void emitOpClasses(const std::vector<Record *> &defs, raw_ostream &os,
-                          bool emitDecl) {
-  IfDefScope scope("GET_OP_CLASSES", os);
-  // First emit forward declaration for each class, this allows them to refer
-  // to each others in traits for example.
-  if (emitDecl) {
-    for (auto *def : defs) {
-      Operator op(*def);
-      os << "class " << op.getCppClassName() << ";\n";
-    }
-  }
-  for (auto *def : defs) {
-    Operator op(*def);
-    const auto *attrSizedOperands =
-        op.getTrait("OpTrait::AttrSizedOperandSegments");
-    if (emitDecl) {
-      os << formatv(opCommentHeader, op.getQualCppClassName(), "declarations");
-      // We cannot generate the operand adaptor class if operand getters depend
-      // on an attribute.
-      if (!attrSizedOperands)
-        OpOperandAdaptorEmitter::emitDecl(op, os);
-      OpEmitter::emitDecl(op, os);
-    } else {
-      os << formatv(opCommentHeader, op.getQualCppClassName(), "definitions");
-      if (!attrSizedOperands)
-        OpOperandAdaptorEmitter::emitDef(op, os);
-      OpEmitter::emitDef(op, os);
-    }
-  }
-}
-
-// Emits a comma-separated list of the ops.
-static void emitOpList(const std::vector<Record *> &defs, raw_ostream &os) {
-  IfDefScope scope("GET_OP_LIST", os);
-
-  interleave(
-      // TODO: We are constructing the Operator wrapper instance just for
-      // getting it's qualified class name here. Reduce the overhead by having a
-      // lightweight version of Operator class just for that purpose.
-      defs, [&os](Record *def) { os << Operator(def).getQualCppClassName(); },
-      [&os]() { os << ",\n"; });
-}
-
-static bool emitOpDecls(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  emitSourceFileHeader("Op Declarations", os);
-
-  const auto &defs = recordKeeper.getAllDerivedDefinitions("Op");
-  emitOpClasses(defs, os, /*emitDecl=*/true);
-
-  return false;
-}
-
-static bool emitOpDefs(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  emitSourceFileHeader("Op Definitions", os);
-
-  const auto &defs = recordKeeper.getAllDerivedDefinitions("Op");
-  emitOpList(defs, os);
-  emitOpClasses(defs, os, /*emitDecl=*/false);
-
-  return false;
-}
-
-static mlir::GenRegistration
-    genOpDecls("gen-op-decls", "Generate op declarations",
-               [](const RecordKeeper &records, raw_ostream &os) {
-                 return emitOpDecls(records, os);
-               });
-
-static mlir::GenRegistration genOpDefs("gen-op-defs", "Generate op definitions",
-                                       [](const RecordKeeper &records,
-                                          raw_ostream &os) {
-                                         return emitOpDefs(records, os);
-                                       });
diff --git a/third_party/mlir/tools/mlir-tblgen/OpDocGen.cpp b/third_party/mlir/tools/mlir-tblgen/OpDocGen.cpp
deleted file mode 100644
index 8b048d9ea94..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/OpDocGen.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-//===- OpDocGen.cpp - MLIR operation documentation generator --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// OpDocGen uses the description of operations to generate documentation for the
-// operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "DocGenUtilities.h"
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/Operator.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using namespace llvm;
-using namespace mlir;
-using namespace mlir::tblgen;
-
-using mlir::tblgen::Operator;
-
-// Emit the description by aligning the text to the left per line (e.g.,
-// removing the minimum indentation across the block).
-//
-// This expects that the description in the tablegen file is already formatted
-// in a way the user wanted but has some additional indenting due to being
-// nested in the op definition.
-void mlir::tblgen::emitDescription(StringRef description, raw_ostream &os) {
-  // Determine the minimum number of spaces in a line.
-  size_t min_indent = -1;
-  StringRef remaining = description;
-  while (!remaining.empty()) {
-    auto split = remaining.split('\n');
-    size_t indent = split.first.find_first_not_of(" \t");
-    if (indent != StringRef::npos)
-      min_indent = std::min(indent, min_indent);
-    remaining = split.second;
-  }
-
-  // Print out the description indented.
-  os << "\n";
-  remaining = description;
-  bool printed = false;
-  while (!remaining.empty()) {
-    auto split = remaining.split('\n');
-    if (split.second.empty()) {
-      // Skip last line with just spaces.
-      if (split.first.ltrim().empty())
-        break;
-    }
-    // Print empty new line without spaces if line only has spaces, unless no
-    // text has been emitted before.
-    if (split.first.ltrim().empty()) {
-      if (printed)
-        os << "\n";
-    } else {
-      os << split.first.substr(min_indent) << "\n";
-      printed = true;
-    }
-    remaining = split.second;
-  }
-}
-
-// Emits `str` with trailing newline if not empty.
-static void emitIfNotEmpty(StringRef str, raw_ostream &os) {
-  if (!str.empty()) {
-    emitDescription(str, os);
-    os << "\n";
-  }
-}
-
-static void emitOpDocForDialect(const Dialect &dialect,
-                                const std::vector<Operator> &ops,
-                                const std::vector<Type> &types,
-                                raw_ostream &os) {
-  os << "# Dialect '" << dialect.getName() << "' definition\n\n";
-  emitIfNotEmpty(dialect.getSummary(), os);
-  emitIfNotEmpty(dialect.getDescription(), os);
-
-  // TODO(b/143543720) Generate TOC where extension is not supported.
-  os << "[TOC]\n\n";
-
-  // TODO(antiagainst): Add link between use and def for types
-  if (!types.empty())
-    os << "## Type definition\n\n";
-  for (auto type : types) {
-    os << "### " << type.getDescription() << "\n";
-    emitDescription(type.getTypeDescription(), os);
-    os << "\n";
-  }
-
-  if (!ops.empty())
-    os << "## Operation definition\n\n";
-  for (auto op : ops) {
-    os << "### " << op.getOperationName() << " (" << op.getQualCppClassName()
-       << ")";
-
-    // Emit summary & description of operator.
-    if (op.hasSummary())
-      os << "\n" << op.getSummary() << "\n";
-    os << "\n#### Description:\n\n";
-    if (op.hasDescription())
-      mlir::tblgen::emitDescription(op.getDescription(), os);
-
-    // Emit operands & type of operand. All operands are numbered, some may be
-    // named too.
-    os << "\n#### Operands:\n\n";
-    for (const auto &operand : op.getOperands()) {
-      os << "1. ";
-      if (!operand.name.empty())
-        os << "`" << operand.name << "`: ";
-      else
-        os << "&laquo;unnamed&raquo;: ";
-      os << operand.constraint.getDescription() << "\n";
-    }
-
-    // Emit attributes.
-    // TODO: Attributes are only documented by TableGen name, with no further
-    // info. This should be improved.
-    os << "\n#### Attributes:\n\n";
-    if (op.getNumAttributes() > 0) {
-      os << "| Attribute | MLIR Type | Description |\n"
-         << "| :-------: | :-------: | ----------- |\n";
-    }
-    for (auto namedAttr : op.getAttributes()) {
-      os << "| `" << namedAttr.name << "` | `"
-         << namedAttr.attr.getStorageType() << "` | "
-         << namedAttr.attr.getDescription() << " attribute |\n";
-    }
-
-    // Emit results.
-    os << "\n#### Results:\n\n";
-    for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
-      os << "1. ";
-      auto name = op.getResultName(i);
-      if (name.empty())
-        os << "&laquo;unnamed&raquo;: ";
-      else
-        os << "`" << name << "`: ";
-      os << op.getResultTypeConstraint(i).getDescription() << "\n";
-    }
-
-    os << "\n";
-  }
-}
-
-static void emitOpDoc(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  const auto &opDefs = recordKeeper.getAllDerivedDefinitions("Op");
-  const auto &typeDefs = recordKeeper.getAllDerivedDefinitions("DialectType");
-
-  std::map<Dialect, std::vector<Operator>> dialectOps;
-  std::map<Dialect, std::vector<Type>> dialectTypes;
-  for (auto *opDef : opDefs) {
-    Operator op(opDef);
-    dialectOps[op.getDialect()].push_back(op);
-  }
-  for (auto *typeDef : typeDefs) {
-    Type type(typeDef);
-    if (auto dialect = type.getDialect())
-      dialectTypes[dialect].push_back(type);
-  }
-
-  os << "<!-- Autogenerated by mlir-tblgen; don't manually edit -->\n";
-  for (auto dialectWithOps : dialectOps)
-    emitOpDocForDialect(dialectWithOps.first, dialectWithOps.second,
-                        dialectTypes[dialectWithOps.first], os);
-}
-
-static mlir::GenRegistration
-    genRegister("gen-op-doc", "Generate operation documentation",
-                [](const RecordKeeper &records, raw_ostream &os) {
-                  emitOpDoc(records, os);
-                  return false;
-                });
diff --git a/third_party/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/third_party/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
deleted file mode 100644
index 4c22d6236e0..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ /dev/null
@@ -1,275 +0,0 @@
-//===- OpInterfacesGen.cpp - MLIR op interface utility generator ----------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// OpInterfacesGen generates definitions for operation interfaces.
-//
-//===----------------------------------------------------------------------===//
-
-#include "DocGenUtilities.h"
-#include "mlir/Support/STLExtras.h"
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/OpInterfaces.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using namespace llvm;
-using namespace mlir;
-using mlir::tblgen::OpInterface;
-using mlir::tblgen::OpInterfaceMethod;
-
-// Emit the method name and argument list for the given method. If
-// 'addOperationArg' is true, then an Operation* argument is added to the
-// beginning of the argument list.
-static void emitMethodNameAndArgs(const OpInterfaceMethod &method,
-                                  raw_ostream &os, bool addOperationArg) {
-  os << method.getName() << '(';
-  if (addOperationArg)
-    os << "Operation *tablegen_opaque_op" << (method.arg_empty() ? "" : ", ");
-  interleaveComma(method.getArguments(), os,
-                  [&](const OpInterfaceMethod::Argument &arg) {
-                    os << arg.type << " " << arg.name;
-                  });
-  os << ')';
-}
-
-// Get an array of all OpInterface definitions but exclude those subclassing
-// "DeclareOpInterfaceMethods".
-static std::vector<Record *>
-getAllOpInterfaceDefinitions(const RecordKeeper &recordKeeper) {
-  std::vector<Record *> defs =
-      recordKeeper.getAllDerivedDefinitions("OpInterface");
-
-  llvm::erase_if(defs, [](const Record *def) {
-    return def->isSubClassOf("DeclareOpInterfaceMethods");
-  });
-  return defs;
-}
-
-//===----------------------------------------------------------------------===//
-// GEN: Interface definitions
-//===----------------------------------------------------------------------===//
-
-static void emitInterfaceDef(OpInterface &interface, raw_ostream &os) {
-  StringRef interfaceName = interface.getName();
-
-  // Insert the method definitions.
-  for (auto &method : interface.getMethods()) {
-    os << method.getReturnType() << " " << interfaceName << "::";
-    emitMethodNameAndArgs(method, os, /*addOperationArg=*/false);
-
-    // Forward to the method on the concrete operation type.
-    os << " {\n      return getImpl()->" << method.getName() << '(';
-    if (!method.isStatic())
-      os << "getOperation()" << (method.arg_empty() ? "" : ", ");
-    interleaveComma(
-        method.getArguments(), os,
-        [&](const OpInterfaceMethod::Argument &arg) { os << arg.name; });
-    os << ");\n  }\n";
-  }
-}
-
-static bool emitInterfaceDefs(const RecordKeeper &recordKeeper,
-                              raw_ostream &os) {
-  llvm::emitSourceFileHeader("Operation Interface Definitions", os);
-
-  for (const auto *def : getAllOpInterfaceDefinitions(recordKeeper)) {
-    OpInterface interface(def);
-    emitInterfaceDef(interface, os);
-  }
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// GEN: Interface declarations
-//===----------------------------------------------------------------------===//
-
-static void emitConceptDecl(OpInterface &interface, raw_ostream &os) {
-  os << "  class Concept {\n"
-     << "  public:\n"
-     << "    virtual ~Concept() = default;\n";
-
-  // Insert each of the pure virtual concept methods.
-  for (auto &method : interface.getMethods()) {
-    os << "    virtual " << method.getReturnType() << " ";
-    emitMethodNameAndArgs(method, os, /*addOperationArg=*/!method.isStatic());
-    os << " = 0;\n";
-  }
-  os << "  };\n";
-}
-
-static void emitModelDecl(OpInterface &interface, raw_ostream &os) {
-  os << "  template<typename ConcreteOp>\n";
-  os << "  class Model : public Concept {\npublic:\n";
-
-  // Insert each of the virtual method overrides.
-  for (auto &method : interface.getMethods()) {
-    os << "    " << method.getReturnType() << " ";
-    emitMethodNameAndArgs(method, os, /*addOperationArg=*/!method.isStatic());
-    os << " final {\n";
-
-    // Provide a definition of the concrete op if this is non static.
-    if (!method.isStatic()) {
-      os << "      auto op = llvm::cast<ConcreteOp>(tablegen_opaque_op);\n"
-         << "      (void)op;\n";
-    }
-
-    // Check for a provided body to the function.
-    if (auto body = method.getBody()) {
-      os << body << "\n    }\n";
-      continue;
-    }
-
-    // Forward to the method on the concrete operation type.
-    os << "      return " << (method.isStatic() ? "ConcreteOp::" : "op.");
-
-    // Add the arguments to the call.
-    os << method.getName() << '(';
-    interleaveComma(
-        method.getArguments(), os,
-        [&](const OpInterfaceMethod::Argument &arg) { os << arg.name; });
-    os << ");\n    }\n";
-  }
-  os << "  };\n";
-}
-
-static void emitInterfaceDecl(OpInterface &interface, raw_ostream &os) {
-  StringRef interfaceName = interface.getName();
-  auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
-
-  // Emit the traits struct containing the concept and model declarations.
-  os << "namespace detail {\n"
-     << "struct " << interfaceTraitsName << " {\n";
-  emitConceptDecl(interface, os);
-  emitModelDecl(interface, os);
-  os << "};\n} // end namespace detail\n";
-
-  // Emit the main interface class declaration.
-  os << llvm::formatv("class {0} : public OpInterface<{1}, detail::{2}> {\n"
-                      "public:\n"
-                      "  using OpInterface<{1}, detail::{2}>::OpInterface;\n",
-                      interfaceName, interfaceName, interfaceTraitsName);
-
-  // Insert the method declarations.
-  for (auto &method : interface.getMethods()) {
-    os << "  " << method.getReturnType() << " ";
-    emitMethodNameAndArgs(method, os, /*addOperationArg=*/false);
-    os << ";\n";
-  }
-  os << "};\n";
-}
-
-static bool emitInterfaceDecls(const RecordKeeper &recordKeeper,
-                               raw_ostream &os) {
-  llvm::emitSourceFileHeader("Operation Interface Declarations", os);
-
-  for (const auto *def : getAllOpInterfaceDefinitions(recordKeeper)) {
-    OpInterface interface(def);
-    emitInterfaceDecl(interface, os);
-  }
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// GEN: Interface documentation
-//===----------------------------------------------------------------------===//
-
-/// Emit a string corresponding to a C++ type, followed by a space if necessary.
-static raw_ostream &emitCPPType(StringRef type, raw_ostream &os) {
-  type = type.trim();
-  os << type;
-  if (type.back() != '&' && type.back() != '*')
-    os << " ";
-  return os;
-}
-
-static void emitInterfaceDoc(const Record &interfaceDef, raw_ostream &os) {
-  OpInterface interface(&interfaceDef);
-
-  // Emit the interface name followed by the description.
-  os << "## " << interface.getName() << " (" << interfaceDef.getName() << ")";
-  if (auto description = interface.getDescription())
-    mlir::tblgen::emitDescription(*description, os);
-
-  // Emit the methods required by the interface.
-  os << "\n### Methods:\n";
-  for (const auto &method : interface.getMethods()) {
-    // Emit the method name.
-    os << "#### `" << method.getName() << "`\n\n```c++\n";
-
-    // Emit the method signature.
-    if (method.isStatic())
-      os << "static ";
-    emitCPPType(method.getReturnType(), os) << method.getName() << '(';
-    interleaveComma(method.getArguments(), os,
-                    [&](const OpInterfaceMethod::Argument &arg) {
-                      emitCPPType(arg.type, os) << arg.name;
-                    });
-    os << ");\n```\n";
-
-    // Emit the description.
-    if (auto description = method.getDescription())
-      mlir::tblgen::emitDescription(*description, os);
-
-    // If the body is not provided, this method must be provided by the
-    // operation.
-    if (!method.getBody())
-      os << "\nNOTE: This method *must* be implemented by the operation.\n\n";
-  }
-}
-
-static bool emitInterfaceDocs(const RecordKeeper &recordKeeper,
-                              raw_ostream &os) {
-  os << "<!-- Autogenerated by mlir-tblgen; don't manually edit -->\n";
-  os << "# Operation Interface definition\n";
-
-  for (const auto *def : getAllOpInterfaceDefinitions(recordKeeper))
-    emitInterfaceDoc(*def, os);
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// GEN: Interface registration hooks
-//===----------------------------------------------------------------------===//
-
-// Registers the operation interface generator to mlir-tblgen.
-static mlir::GenRegistration
-    genInterfaceDecls("gen-op-interface-decls",
-                      "Generate op interface declarations",
-                      [](const RecordKeeper &records, raw_ostream &os) {
-                        return emitInterfaceDecls(records, os);
-                      });
-
-// Registers the operation interface generator to mlir-tblgen.
-static mlir::GenRegistration
-    genInterfaceDefs("gen-op-interface-defs",
-                     "Generate op interface definitions",
-                     [](const RecordKeeper &records, raw_ostream &os) {
-                       return emitInterfaceDefs(records, os);
-                     });
-
-// Registers the operation interface document generator to mlir-tblgen.
-static mlir::GenRegistration
-    genInterfaceDocs("gen-op-interface-doc",
-                     "Generate op interface documentation",
-                     [](const RecordKeeper &records, raw_ostream &os) {
-                       return emitInterfaceDocs(records, os);
-                     });
diff --git a/third_party/mlir/tools/mlir-tblgen/ReferenceImplGen.cpp b/third_party/mlir/tools/mlir-tblgen/ReferenceImplGen.cpp
deleted file mode 100644
index 9181d0e90ed..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/ReferenceImplGen.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- ReferenceImplGen.cpp - MLIR reference implementation generator -----===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// ReferenceImplGen uses the description of operations to generate reference
-// implementations for the ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/Operator.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using namespace llvm;
-using namespace mlir;
-
-using mlir::tblgen::Operator;
-
-static void emitReferenceImplementations(const RecordKeeper &recordKeeper,
-                                         raw_ostream &os) {
-  emitSourceFileHeader("Reference implementation file", os);
-  const auto &defs = recordKeeper.getAllDerivedDefinitions("Op");
-
-  os << "void printRefImplementation(StringRef opName, mlir::FuncOp *f) {\n"
-     << "  using namespace ::mlir::edsc;\n"
-     << "if (false) {}";
-  for (auto *def : defs) {
-    Operator op(def);
-    auto referenceImplGenerator = def->getValueInit("referenceImplementation");
-    if (!referenceImplGenerator)
-      continue;
-    os << " else if (opName == \"" << op.getOperationName() << "\") {\n"
-       << "  edsc::ScopedContext scope(f);\n";
-
-    for (auto en : llvm::enumerate(op.getOperands())) {
-      os.indent(2) << formatv("ValueHandle arg_{0}(f->getArgument({1})); "
-                              "(void)arg_{0};\n",
-                              en.value().name, en.index());
-      // TODO(jpienaar): this is generally incorrect, not all args are memref
-      // in the general case.
-      os.indent(2) << formatv("MemRefView view_{0}(f->getArgument({1})); "
-                              "(void)view_{0};\n",
-                              en.value().name, en.index());
-    }
-    unsigned numOperands = op.getNumOperands();
-    unsigned numResults = op.getNumResults();
-    for (unsigned idx = 0; idx < numResults; ++idx) {
-      os.indent(2) << formatv("ValueHandle arg_{0}(f->getArgument({1})); "
-                              "(void)arg_{0};\n",
-                              op.getResult(idx).name, numOperands + idx);
-      // TODO(jpienaar): this is generally incorrect, not all args are memref
-      // in the general case.
-      os.indent(2) << formatv("MemRefView view_{0}(f->getArgument({1})); "
-                              "(void)view_{0};\n",
-                              op.getResult(idx).name, numOperands + idx);
-    }
-
-    // Print the EDSC.
-    os << referenceImplGenerator->getAsUnquotedString() << "\n";
-    os.indent(2) << "f->print(llvm::outs());\n\n";
-    os << "}";
-  }
-  os << " else {\n";
-  os.indent(2) << "f->emitError(\"no reference impl. for \" + opName);\n";
-  os.indent(2) << "return;\n";
-  os << "}\n";
-  os << "}\n";
-}
-
-static mlir::GenRegistration
-    genRegister("gen-reference-implementations",
-                "Generate reference implementations",
-                [](const RecordKeeper &records, raw_ostream &os) {
-                  emitReferenceImplementations(records, os);
-                  return false;
-                });
diff --git a/third_party/mlir/tools/mlir-tblgen/RewriterGen.cpp b/third_party/mlir/tools/mlir-tblgen/RewriterGen.cpp
deleted file mode 100644
index f229a349d27..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/RewriterGen.cpp
+++ /dev/null
@@ -1,1046 +0,0 @@
-//===- RewriterGen.cpp - MLIR pattern rewriter generator ------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// RewriterGen uses pattern rewrite definitions to generate rewriter matchers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/STLExtras.h"
-#include "mlir/TableGen/Attribute.h"
-#include "mlir/TableGen/Format.h"
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/Operator.h"
-#include "mlir/TableGen/Pattern.h"
-#include "mlir/TableGen/Predicate.h"
-#include "mlir/TableGen/Type.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormatAdapters.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Main.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using namespace mlir;
-using namespace mlir::tblgen;
-
-using llvm::formatv;
-using llvm::Record;
-using llvm::RecordKeeper;
-
-#define DEBUG_TYPE "mlir-tblgen-rewritergen"
-
-namespace llvm {
-template <> struct format_provider<mlir::tblgen::Pattern::IdentifierLine> {
-  static void format(const mlir::tblgen::Pattern::IdentifierLine &v,
-                     raw_ostream &os, StringRef style) {
-    os << v.first << ":" << v.second;
-  }
-};
-} // end namespace llvm
-
-//===----------------------------------------------------------------------===//
-// PatternEmitter
-//===----------------------------------------------------------------------===//
-
-namespace {
-class PatternEmitter {
-public:
-  PatternEmitter(Record *pat, RecordOperatorMap *mapper, raw_ostream &os);
-
-  // Emits the mlir::RewritePattern struct named `rewriteName`.
-  void emit(StringRef rewriteName);
-
-private:
-  // Emits the code for matching ops.
-  void emitMatchLogic(DagNode tree);
-
-  // Emits the code for rewriting ops.
-  void emitRewriteLogic();
-
-  //===--------------------------------------------------------------------===//
-  // Match utilities
-  //===--------------------------------------------------------------------===//
-
-  // Emits C++ statements for matching the op constrained by the given DAG
-  // `tree`.
-  void emitOpMatch(DagNode tree, int depth);
-
-  // Emits C++ statements for matching the `argIndex`-th argument of the given
-  // DAG `tree` as an operand.
-  void emitOperandMatch(DagNode tree, int argIndex, int depth, int indent);
-
-  // Emits C++ statements for matching the `argIndex`-th argument of the given
-  // DAG `tree` as an attribute.
-  void emitAttributeMatch(DagNode tree, int argIndex, int depth, int indent);
-
-  //===--------------------------------------------------------------------===//
-  // Rewrite utilities
-  //===--------------------------------------------------------------------===//
-
-  // The entry point for handling a result pattern rooted at `resultTree`. This
-  // method dispatches to concrete handlers according to `resultTree`'s kind and
-  // returns a symbol representing the whole value pack. Callers are expected to
-  // further resolve the symbol according to the specific use case.
-  //
-  // `depth` is the nesting level of `resultTree`; 0 means top-level result
-  // pattern. For top-level result pattern, `resultIndex` indicates which result
-  // of the matched root op this pattern is intended to replace, which can be
-  // used to deduce the result type of the op generated from this result
-  // pattern.
-  std::string handleResultPattern(DagNode resultTree, int resultIndex,
-                                  int depth);
-
-  // Emits the C++ statement to replace the matched DAG with a value built via
-  // calling native C++ code.
-  std::string handleReplaceWithNativeCodeCall(DagNode resultTree);
-
-  // Returns the C++ expression referencing the old value serving as the
-  // replacement.
-  std::string handleReplaceWithValue(DagNode tree);
-
-  // Emits the C++ statement to build a new op out of the given DAG `tree` and
-  // returns the variable name that this op is assigned to. If the root op in
-  // DAG `tree` has a specified name, the created op will be assigned to a
-  // variable of the given name. Otherwise, a unique name will be used as the
-  // result value name.
-  std::string handleOpCreation(DagNode tree, int resultIndex, int depth);
-
-  using ChildNodeIndexNameMap = DenseMap<unsigned, std::string>;
-
-  // Emits a local variable for each value and attribute to be used for creating
-  // an op.
-  void createSeparateLocalVarsForOpArgs(DagNode node,
-                                        ChildNodeIndexNameMap &childNodeNames);
-
-  // Emits the concrete arguments used to call a op's builder.
-  void supplyValuesForOpArgs(DagNode node,
-                             const ChildNodeIndexNameMap &childNodeNames);
-
-  // Emits the local variables for holding all values as a whole and all named
-  // attributes as a whole to be used for creating an op.
-  void createAggregateLocalVarsForOpArgs(
-      DagNode node, const ChildNodeIndexNameMap &childNodeNames);
-
-  // Returns the C++ expression to construct a constant attribute of the given
-  // `value` for the given attribute kind `attr`.
-  std::string handleConstantAttr(Attribute attr, StringRef value);
-
-  // Returns the C++ expression to build an argument from the given DAG `leaf`.
-  // `patArgName` is used to bound the argument to the source pattern.
-  std::string handleOpArgument(DagLeaf leaf, StringRef patArgName);
-
-  //===--------------------------------------------------------------------===//
-  // General utilities
-  //===--------------------------------------------------------------------===//
-
-  // Collects all of the operations within the given dag tree.
-  void collectOps(DagNode tree, llvm::SmallPtrSetImpl<const Operator *> &ops);
-
-  // Returns a unique symbol for a local variable of the given `op`.
-  std::string getUniqueSymbol(const Operator *op);
-
-  //===--------------------------------------------------------------------===//
-  // Symbol utilities
-  //===--------------------------------------------------------------------===//
-
-  // Returns how many static values the given DAG `node` correspond to.
-  int getNodeValueCount(DagNode node);
-
-private:
-  // Pattern instantiation location followed by the location of multiclass
-  // prototypes used. This is intended to be used as a whole to
-  // PrintFatalError() on errors.
-  ArrayRef<llvm::SMLoc> loc;
-
-  // Op's TableGen Record to wrapper object.
-  RecordOperatorMap *opMap;
-
-  // Handy wrapper for pattern being emitted.
-  Pattern pattern;
-
-  // Map for all bound symbols' info.
-  SymbolInfoMap symbolInfoMap;
-
-  // The next unused ID for newly created values.
-  unsigned nextValueId;
-
-  raw_ostream &os;
-
-  // Format contexts containing placeholder substitutions.
-  FmtContext fmtCtx;
-
-  // Number of op processed.
-  int opCounter = 0;
-};
-} // end anonymous namespace
-
-PatternEmitter::PatternEmitter(Record *pat, RecordOperatorMap *mapper,
-                               raw_ostream &os)
-    : loc(pat->getLoc()), opMap(mapper), pattern(pat, mapper),
-      symbolInfoMap(pat->getLoc()), nextValueId(0), os(os) {
-  fmtCtx.withBuilder("rewriter");
-}
-
-std::string PatternEmitter::handleConstantAttr(Attribute attr,
-                                               StringRef value) {
-  if (!attr.isConstBuildable())
-    PrintFatalError(loc, "Attribute " + attr.getAttrDefName() +
-                             " does not have the 'constBuilderCall' field");
-
-  // TODO(jpienaar): Verify the constants here
-  return tgfmt(attr.getConstBuilderTemplate(), &fmtCtx, value);
-}
-
-// Helper function to match patterns.
-void PatternEmitter::emitOpMatch(DagNode tree, int depth) {
-  Operator &op = tree.getDialectOp(opMap);
-  LLVM_DEBUG(llvm::dbgs() << "start emitting match for op '"
-                          << op.getOperationName() << "' at depth " << depth
-                          << '\n');
-
-  int indent = 4 + 2 * depth;
-  os.indent(indent) << formatv(
-      "auto castedOp{0} = dyn_cast_or_null<{1}>(op{0}); (void)castedOp{0};\n",
-      depth, op.getQualCppClassName());
-  // Skip the operand matching at depth 0 as the pattern rewriter already does.
-  if (depth != 0) {
-    // Skip if there is no defining operation (e.g., arguments to function).
-    os.indent(indent) << formatv("if (!castedOp{0}) return matchFailure();\n",
-                                 depth);
-  }
-  if (tree.getNumArgs() != op.getNumArgs()) {
-    PrintFatalError(loc, formatv("op '{0}' argument number mismatch: {1} in "
-                                 "pattern vs. {2} in definition",
-                                 op.getOperationName(), tree.getNumArgs(),
-                                 op.getNumArgs()));
-  }
-
-  // If the operand's name is set, set to that variable.
-  auto name = tree.getSymbol();
-  if (!name.empty())
-    os.indent(indent) << formatv("{0} = castedOp{1};\n", name, depth);
-
-  for (int i = 0, e = tree.getNumArgs(); i != e; ++i) {
-    auto opArg = op.getArg(i);
-
-    // Handle nested DAG construct first
-    if (DagNode argTree = tree.getArgAsNestedDag(i)) {
-      if (auto *operand = opArg.dyn_cast<NamedTypeConstraint *>()) {
-        if (operand->isVariadic()) {
-          auto error = formatv("use nested DAG construct to match op {0}'s "
-                               "variadic operand #{1} unsupported now",
-                               op.getOperationName(), i);
-          PrintFatalError(loc, error);
-        }
-      }
-      os.indent(indent) << "{\n";
-
-      os.indent(indent + 2) << formatv(
-          "auto *op{0} = "
-          "(*castedOp{1}.getODSOperands({2}).begin())->getDefiningOp();\n",
-          depth + 1, depth, i);
-      emitOpMatch(argTree, depth + 1);
-      os.indent(indent + 2)
-          << formatv("tblgen_ops[{0}] = op{1};\n", ++opCounter, depth + 1);
-      os.indent(indent) << "}\n";
-      continue;
-    }
-
-    // Next handle DAG leaf: operand or attribute
-    if (opArg.is<NamedTypeConstraint *>()) {
-      emitOperandMatch(tree, i, depth, indent);
-    } else if (opArg.is<NamedAttribute *>()) {
-      emitAttributeMatch(tree, i, depth, indent);
-    } else {
-      PrintFatalError(loc, "unhandled case when matching op");
-    }
-  }
-  LLVM_DEBUG(llvm::dbgs() << "done emitting match for op '"
-                          << op.getOperationName() << "' at depth " << depth
-                          << '\n');
-}
-
-void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth,
-                                      int indent) {
-  Operator &op = tree.getDialectOp(opMap);
-  auto *operand = op.getArg(argIndex).get<NamedTypeConstraint *>();
-  auto matcher = tree.getArgAsLeaf(argIndex);
-
-  // If a constraint is specified, we need to generate C++ statements to
-  // check the constraint.
-  if (!matcher.isUnspecified()) {
-    if (!matcher.isOperandMatcher()) {
-      PrintFatalError(
-          loc, formatv("the {1}-th argument of op '{0}' should be an operand",
-                       op.getOperationName(), argIndex + 1));
-    }
-
-    // Only need to verify if the matcher's type is different from the one
-    // of op definition.
-    if (operand->constraint != matcher.getAsConstraint()) {
-      if (operand->isVariadic()) {
-        auto error = formatv(
-            "further constrain op {0}'s variadic operand #{1} unsupported now",
-            op.getOperationName(), argIndex);
-        PrintFatalError(loc, error);
-      }
-      auto self =
-          formatv("(*castedOp{0}.getODSOperands({1}).begin())->getType()",
-                  depth, argIndex);
-      os.indent(indent) << "if (!("
-                        << tgfmt(matcher.getConditionTemplate(),
-                                 &fmtCtx.withSelf(self))
-                        << ")) return matchFailure();\n";
-    }
-  }
-
-  // Capture the value
-  auto name = tree.getArgName(argIndex);
-  // `$_` is a special symbol to ignore op argument matching.
-  if (!name.empty() && name != "_") {
-    // We need to subtract the number of attributes before this operand to get
-    // the index in the operand list.
-    auto numPrevAttrs = std::count_if(
-        op.arg_begin(), op.arg_begin() + argIndex,
-        [](const Argument &arg) { return arg.is<NamedAttribute *>(); });
-
-    os.indent(indent) << formatv("{0} = castedOp{1}.getODSOperands({2});\n",
-                                 name, depth, argIndex - numPrevAttrs);
-  }
-}
-
-void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth,
-                                        int indent) {
-
-  Operator &op = tree.getDialectOp(opMap);
-  auto *namedAttr = op.getArg(argIndex).get<NamedAttribute *>();
-  const auto &attr = namedAttr->attr;
-
-  os.indent(indent) << "{\n";
-  indent += 2;
-  os.indent(indent) << formatv(
-      "auto tblgen_attr = op{0}->getAttrOfType<{1}>(\"{2}\");\n", depth,
-      attr.getStorageType(), namedAttr->name);
-
-  // TODO(antiagainst): This should use getter method to avoid duplication.
-  if (attr.hasDefaultValue()) {
-    os.indent(indent) << "if (!tblgen_attr) tblgen_attr = "
-                      << tgfmt(attr.getConstBuilderTemplate(), &fmtCtx,
-                               attr.getDefaultValue())
-                      << ";\n";
-  } else if (attr.isOptional()) {
-    // For a missing attribute that is optional according to definition, we
-    // should just capture a mlir::Attribute() to signal the missing state.
-    // That is precisely what getAttr() returns on missing attributes.
-  } else {
-    os.indent(indent) << "if (!tblgen_attr) return matchFailure();\n";
-  }
-
-  auto matcher = tree.getArgAsLeaf(argIndex);
-  if (!matcher.isUnspecified()) {
-    if (!matcher.isAttrMatcher()) {
-      PrintFatalError(
-          loc, formatv("the {1}-th argument of op '{0}' should be an attribute",
-                       op.getOperationName(), argIndex + 1));
-    }
-
-    // If a constraint is specified, we need to generate C++ statements to
-    // check the constraint.
-    os.indent(indent) << "if (!("
-                      << tgfmt(matcher.getConditionTemplate(),
-                               &fmtCtx.withSelf("tblgen_attr"))
-                      << ")) return matchFailure();\n";
-  }
-
-  // Capture the value
-  auto name = tree.getArgName(argIndex);
-  // `$_` is a special symbol to ignore op argument matching.
-  if (!name.empty() && name != "_") {
-    os.indent(indent) << formatv("{0} = tblgen_attr;\n", name);
-  }
-
-  indent -= 2;
-  os.indent(indent) << "}\n";
-}
-
-void PatternEmitter::emitMatchLogic(DagNode tree) {
-  LLVM_DEBUG(llvm::dbgs() << "--- start emitting match logic ---\n");
-  emitOpMatch(tree, 0);
-
-  for (auto &appliedConstraint : pattern.getConstraints()) {
-    auto &constraint = appliedConstraint.constraint;
-    auto &entities = appliedConstraint.entities;
-
-    auto condition = constraint.getConditionTemplate();
-    auto cmd = "if (!({0})) return matchFailure();\n";
-
-    if (isa<TypeConstraint>(constraint)) {
-      auto self = formatv("({0}->getType())",
-                          symbolInfoMap.getValueAndRangeUse(entities.front()));
-      os.indent(4) << formatv(cmd,
-                              tgfmt(condition, &fmtCtx.withSelf(self.str())));
-    } else if (isa<AttrConstraint>(constraint)) {
-      PrintFatalError(
-          loc, "cannot use AttrConstraint in Pattern multi-entity constraints");
-    } else {
-      // TODO(b/138794486): replace formatv arguments with the exact specified
-      // args.
-      if (entities.size() > 4) {
-        PrintFatalError(loc, "only support up to 4-entity constraints now");
-      }
-      SmallVector<std::string, 4> names;
-      int i = 0;
-      for (int e = entities.size(); i < e; ++i)
-        names.push_back(symbolInfoMap.getValueAndRangeUse(entities[i]));
-      std::string self = appliedConstraint.self;
-      if (!self.empty())
-        self = symbolInfoMap.getValueAndRangeUse(self);
-      for (; i < 4; ++i)
-        names.push_back("<unused>");
-      os.indent(4) << formatv(cmd,
-                              tgfmt(condition, &fmtCtx.withSelf(self), names[0],
-                                    names[1], names[2], names[3]));
-    }
-  }
-  LLVM_DEBUG(llvm::dbgs() << "--- done emitting match logic ---\n");
-}
-
-void PatternEmitter::collectOps(DagNode tree,
-                                llvm::SmallPtrSetImpl<const Operator *> &ops) {
-  // Check if this tree is an operation.
-  if (tree.isOperation()) {
-    const Operator &op = tree.getDialectOp(opMap);
-    LLVM_DEBUG(llvm::dbgs()
-               << "found operation " << op.getOperationName() << '\n');
-    ops.insert(&op);
-  }
-
-  // Recurse the arguments of the tree.
-  for (unsigned i = 0, e = tree.getNumArgs(); i != e; ++i)
-    if (auto child = tree.getArgAsNestedDag(i))
-      collectOps(child, ops);
-}
-
-void PatternEmitter::emit(StringRef rewriteName) {
-  // Get the DAG tree for the source pattern.
-  DagNode sourceTree = pattern.getSourcePattern();
-
-  const Operator &rootOp = pattern.getSourceRootOp();
-  auto rootName = rootOp.getOperationName();
-
-  // Collect the set of result operations.
-  llvm::SmallPtrSet<const Operator *, 4> resultOps;
-  LLVM_DEBUG(llvm::dbgs() << "start collecting ops used in result patterns\n");
-  for (unsigned i = 0, e = pattern.getNumResultPatterns(); i != e; ++i) {
-    collectOps(pattern.getResultPattern(i), resultOps);
-  }
-  LLVM_DEBUG(llvm::dbgs() << "done collecting ops used in result patterns\n");
-
-  // Emit RewritePattern for Pattern.
-  auto locs = pattern.getLocation();
-  os << formatv("/* Generated from:\n\t{0:$[ instantiating\n\t]}\n*/\n",
-                make_range(locs.rbegin(), locs.rend()));
-  os << formatv(R"(struct {0} : public RewritePattern {
-  {0}(MLIRContext *context)
-      : RewritePattern("{1}", {{)",
-                rewriteName, rootName);
-  // Sort result operators by name.
-  llvm::SmallVector<const Operator *, 4> sortedResultOps(resultOps.begin(),
-                                                         resultOps.end());
-  llvm::sort(sortedResultOps, [&](const Operator *lhs, const Operator *rhs) {
-    return lhs->getOperationName() < rhs->getOperationName();
-  });
-  interleaveComma(sortedResultOps, os, [&](const Operator *op) {
-    os << '"' << op->getOperationName() << '"';
-  });
-  os << formatv(R"(}, {0}, context) {{})", pattern.getBenefit()) << "\n";
-
-  // Emit matchAndRewrite() function.
-  os << R"(
-  PatternMatchResult matchAndRewrite(Operation *op0,
-                                     PatternRewriter &rewriter) const override {
-)";
-
-  // Register all symbols bound in the source pattern.
-  pattern.collectSourcePatternBoundSymbols(symbolInfoMap);
-
-  LLVM_DEBUG(
-      llvm::dbgs() << "start creating local variables for capturing matches\n");
-  os.indent(4) << "// Variables for capturing values and attributes used for "
-                  "creating ops\n";
-  // Create local variables for storing the arguments and results bound
-  // to symbols.
-  for (const auto &symbolInfoPair : symbolInfoMap) {
-    StringRef symbol = symbolInfoPair.getKey();
-    auto &info = symbolInfoPair.getValue();
-    os.indent(4) << info.getVarDecl(symbol);
-  }
-  // TODO(jpienaar): capture ops with consistent numbering so that it can be
-  // reused for fused loc.
-  os.indent(4) << formatv("Operation *tblgen_ops[{0}];\n\n",
-                          pattern.getSourcePattern().getNumOps());
-  LLVM_DEBUG(
-      llvm::dbgs() << "done creating local variables for capturing matches\n");
-
-  os.indent(4) << "// Match\n";
-  os.indent(4) << "tblgen_ops[0] = op0;\n";
-  emitMatchLogic(sourceTree);
-  os << "\n";
-
-  os.indent(4) << "// Rewrite\n";
-  emitRewriteLogic();
-
-  os.indent(4) << "return matchSuccess();\n";
-  os << "  };\n";
-  os << "};\n";
-}
-
-void PatternEmitter::emitRewriteLogic() {
-  LLVM_DEBUG(llvm::dbgs() << "--- start emitting rewrite logic ---\n");
-  const Operator &rootOp = pattern.getSourceRootOp();
-  int numExpectedResults = rootOp.getNumResults();
-  int numResultPatterns = pattern.getNumResultPatterns();
-
-  // First register all symbols bound to ops generated in result patterns.
-  pattern.collectResultPatternBoundSymbols(symbolInfoMap);
-
-  // Only the last N static values generated are used to replace the matched
-  // root N-result op. We need to calculate the starting index (of the results
-  // of the matched op) each result pattern is to replace.
-  SmallVector<int, 4> offsets(numResultPatterns + 1, numExpectedResults);
-  // If we don't need to replace any value at all, set the replacement starting
-  // index as the number of result patterns so we skip all of them when trying
-  // to replace the matched op's results.
-  int replStartIndex = numExpectedResults == 0 ? numResultPatterns : -1;
-  for (int i = numResultPatterns - 1; i >= 0; --i) {
-    auto numValues = getNodeValueCount(pattern.getResultPattern(i));
-    offsets[i] = offsets[i + 1] - numValues;
-    if (offsets[i] == 0) {
-      if (replStartIndex == -1)
-        replStartIndex = i;
-    } else if (offsets[i] < 0 && offsets[i + 1] > 0) {
-      auto error = formatv(
-          "cannot use the same multi-result op '{0}' to generate both "
-          "auxiliary values and values to be used for replacing the matched op",
-          pattern.getResultPattern(i).getSymbol());
-      PrintFatalError(loc, error);
-    }
-  }
-
-  if (offsets.front() > 0) {
-    const char error[] = "no enough values generated to replace the matched op";
-    PrintFatalError(loc, error);
-  }
-
-  os.indent(4) << "auto loc = rewriter.getFusedLoc({";
-  for (int i = 0, e = pattern.getSourcePattern().getNumOps(); i != e; ++i) {
-    os << (i ? ", " : "") << "tblgen_ops[" << i << "]->getLoc()";
-  }
-  os << "}); (void)loc;\n";
-
-  // Process auxiliary result patterns.
-  for (int i = 0; i < replStartIndex; ++i) {
-    DagNode resultTree = pattern.getResultPattern(i);
-    auto val = handleResultPattern(resultTree, offsets[i], 0);
-    // Normal op creation will be streamed to `os` by the above call; but
-    // NativeCodeCall will only be materialized to `os` if it is used. Here
-    // we are handling auxiliary patterns so we want the side effect even if
-    // NativeCodeCall is not replacing matched root op's results.
-    if (resultTree.isNativeCodeCall())
-      os.indent(4) << val << ";\n";
-  }
-
-  if (numExpectedResults == 0) {
-    assert(replStartIndex >= numResultPatterns &&
-           "invalid auxiliary vs. replacement pattern division!");
-    // No result to replace. Just erase the op.
-    os.indent(4) << "rewriter.eraseOp(op0);\n";
-  } else {
-    // Process replacement result patterns.
-    os.indent(4) << "SmallVector<Value *, 4> tblgen_repl_values;\n";
-    for (int i = replStartIndex; i < numResultPatterns; ++i) {
-      DagNode resultTree = pattern.getResultPattern(i);
-      auto val = handleResultPattern(resultTree, offsets[i], 0);
-      os.indent(4) << "\n";
-      // Resolve each symbol for all range use so that we can loop over them.
-      os << symbolInfoMap.getAllRangeUse(
-          val, "    for (auto *v : {0}) {{ tblgen_repl_values.push_back(v); }",
-          "\n");
-    }
-    os.indent(4) << "\n";
-    os.indent(4) << "rewriter.replaceOp(op0, tblgen_repl_values);\n";
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "--- done emitting rewrite logic ---\n");
-}
-
-std::string PatternEmitter::getUniqueSymbol(const Operator *op) {
-  return formatv("tblgen_{0}_{1}", op->getCppClassName(), nextValueId++);
-}
-
-std::string PatternEmitter::handleResultPattern(DagNode resultTree,
-                                                int resultIndex, int depth) {
-  LLVM_DEBUG(llvm::dbgs() << "handle result pattern: ");
-  LLVM_DEBUG(resultTree.print(llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << '\n');
-
-  if (resultTree.isNativeCodeCall()) {
-    auto symbol = handleReplaceWithNativeCodeCall(resultTree);
-    symbolInfoMap.bindValue(symbol);
-    return symbol;
-  }
-
-  if (resultTree.isReplaceWithValue()) {
-    return handleReplaceWithValue(resultTree);
-  }
-
-  // Normal op creation.
-  auto symbol = handleOpCreation(resultTree, resultIndex, depth);
-  if (resultTree.getSymbol().empty()) {
-    // This is an op not explicitly bound to a symbol in the rewrite rule.
-    // Register the auto-generated symbol for it.
-    symbolInfoMap.bindOpResult(symbol, pattern.getDialectOp(resultTree));
-  }
-  return symbol;
-}
-
-std::string PatternEmitter::handleReplaceWithValue(DagNode tree) {
-  assert(tree.isReplaceWithValue());
-
-  if (tree.getNumArgs() != 1) {
-    PrintFatalError(
-        loc, "replaceWithValue directive must take exactly one argument");
-  }
-
-  if (!tree.getSymbol().empty()) {
-    PrintFatalError(loc, "cannot bind symbol to replaceWithValue");
-  }
-
-  return tree.getArgName(0);
-}
-
-std::string PatternEmitter::handleOpArgument(DagLeaf leaf,
-                                             StringRef patArgName) {
-  if (leaf.isConstantAttr()) {
-    auto constAttr = leaf.getAsConstantAttr();
-    return handleConstantAttr(constAttr.getAttribute(),
-                              constAttr.getConstantValue());
-  }
-  if (leaf.isEnumAttrCase()) {
-    auto enumCase = leaf.getAsEnumAttrCase();
-    if (enumCase.isStrCase())
-      return handleConstantAttr(enumCase, enumCase.getSymbol());
-    // This is an enum case backed by an IntegerAttr. We need to get its value
-    // to build the constant.
-    std::string val = std::to_string(enumCase.getValue());
-    return handleConstantAttr(enumCase, val);
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "handle argument '" << patArgName << "'\n");
-  auto argName = symbolInfoMap.getValueAndRangeUse(patArgName);
-  if (leaf.isUnspecified() || leaf.isOperandMatcher()) {
-    LLVM_DEBUG(llvm::dbgs() << "replace " << patArgName << " with '" << argName
-                            << "' (via symbol ref)\n");
-    return argName;
-  }
-  if (leaf.isNativeCodeCall()) {
-    auto repl = tgfmt(leaf.getNativeCodeTemplate(), &fmtCtx.withSelf(argName));
-    LLVM_DEBUG(llvm::dbgs() << "replace " << patArgName << " with '" << repl
-                            << "' (via NativeCodeCall)\n");
-    return repl;
-  }
-  PrintFatalError(loc, "unhandled case when rewriting op");
-}
-
-std::string PatternEmitter::handleReplaceWithNativeCodeCall(DagNode tree) {
-  LLVM_DEBUG(llvm::dbgs() << "handle NativeCodeCall pattern: ");
-  LLVM_DEBUG(tree.print(llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << '\n');
-
-  auto fmt = tree.getNativeCodeTemplate();
-  // TODO(b/138794486): replace formatv arguments with the exact specified args.
-  SmallVector<std::string, 8> attrs(8);
-  if (tree.getNumArgs() > 8) {
-    PrintFatalError(loc, "unsupported NativeCodeCall argument numbers: " +
-                             Twine(tree.getNumArgs()));
-  }
-  for (int i = 0, e = tree.getNumArgs(); i != e; ++i) {
-    attrs[i] = handleOpArgument(tree.getArgAsLeaf(i), tree.getArgName(i));
-    LLVM_DEBUG(llvm::dbgs() << "NativeCodeCall argument #" << i
-                            << " replacement: " << attrs[i] << "\n");
-  }
-  return tgfmt(fmt, &fmtCtx, attrs[0], attrs[1], attrs[2], attrs[3], attrs[4],
-               attrs[5], attrs[6], attrs[7]);
-}
-
-int PatternEmitter::getNodeValueCount(DagNode node) {
-  if (node.isOperation()) {
-    // If the op is bound to a symbol in the rewrite rule, query its result
-    // count from the symbol info map.
-    auto symbol = node.getSymbol();
-    if (!symbol.empty()) {
-      return symbolInfoMap.getStaticValueCount(symbol);
-    }
-    // Otherwise this is an unbound op; we will use all its results.
-    return pattern.getDialectOp(node).getNumResults();
-  }
-  // TODO(antiagainst): This considers all NativeCodeCall as returning one
-  // value. Enhance if multi-value ones are needed.
-  return 1;
-}
-
-std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex,
-                                             int depth) {
-  LLVM_DEBUG(llvm::dbgs() << "create op for pattern: ");
-  LLVM_DEBUG(tree.print(llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << '\n');
-
-  Operator &resultOp = tree.getDialectOp(opMap);
-  auto numOpArgs = resultOp.getNumArgs();
-
-  if (numOpArgs != tree.getNumArgs()) {
-    PrintFatalError(loc, formatv("resultant op '{0}' argument number mismatch: "
-                                 "{1} in pattern vs. {2} in definition",
-                                 resultOp.getOperationName(), tree.getNumArgs(),
-                                 numOpArgs));
-  }
-
-  // A map to collect all nested DAG child nodes' names, with operand index as
-  // the key. This includes both bound and unbound child nodes.
-  ChildNodeIndexNameMap childNodeNames;
-
-  // First go through all the child nodes who are nested DAG constructs to
-  // create ops for them and remember the symbol names for them, so that we can
-  // use the results in the current node. This happens in a recursive manner.
-  for (int i = 0, e = resultOp.getNumOperands(); i != e; ++i) {
-    if (auto child = tree.getArgAsNestedDag(i)) {
-      childNodeNames[i] = handleResultPattern(child, i, depth + 1);
-    }
-  }
-
-  // The name of the local variable holding this op.
-  std::string valuePackName;
-  // The symbol for holding the result of this pattern. Note that the result of
-  // this pattern is not necessarily the same as the variable created by this
-  // pattern because we can use `__N` suffix to refer only a specific result if
-  // the generated op is a multi-result op.
-  std::string resultValue;
-  if (tree.getSymbol().empty()) {
-    // No symbol is explicitly bound to this op in the pattern. Generate a
-    // unique name.
-    valuePackName = resultValue = getUniqueSymbol(&resultOp);
-  } else {
-    resultValue = tree.getSymbol();
-    // Strip the index to get the name for the value pack and use it to name the
-    // local variable for the op.
-    valuePackName = SymbolInfoMap::getValuePackName(resultValue);
-  }
-
-  // Create the local variable for this op.
-  os.indent(4) << formatv("{0} {1};\n", resultOp.getQualCppClassName(),
-                          valuePackName);
-  os.indent(4) << "{\n";
-
-  // Right now ODS don't have general type inference support. Except a few
-  // special cases listed below, DRR needs to supply types for all results
-  // when building an op.
-  bool isSameOperandsAndResultType =
-      resultOp.getTrait("OpTrait::SameOperandsAndResultType");
-  bool useFirstAttr = resultOp.getTrait("OpTrait::FirstAttrDerivedResultType");
-
-  if (isSameOperandsAndResultType || useFirstAttr) {
-    // We know how to deduce the result type for ops with these traits and we've
-    // generated builders taking aggregate parameters. Use those builders to
-    // create the ops.
-
-    // First prepare local variables for op arguments used in builder call.
-    createAggregateLocalVarsForOpArgs(tree, childNodeNames);
-    // Then create the op.
-    os.indent(6) << formatv(
-        "{0} = rewriter.create<{1}>(loc, tblgen_values, tblgen_attrs);\n",
-        valuePackName, resultOp.getQualCppClassName());
-    os.indent(4) << "}\n";
-    return resultValue;
-  }
-
-  bool isBroadcastable =
-      resultOp.getTrait("OpTrait::BroadcastableTwoOperandsOneResult");
-  bool usePartialResults = valuePackName != resultValue;
-
-  if (isBroadcastable || usePartialResults || depth > 0 || resultIndex < 0) {
-    // For these cases (broadcastable ops, op results used both as auxiliary
-    // values and replacement values, ops in nested patterns, auxiliary ops), we
-    // still need to supply the result types when building the op. But because
-    // we don't generate a builder automatically with ODS for them, it's the
-    // developer's responsiblity to make sure such a builder (with result type
-    // deduction ability) exists. We go through the separate-parameter builder
-    // here given that it's easier for developers to write compared to
-    // aggregate-parameter builders.
-    createSeparateLocalVarsForOpArgs(tree, childNodeNames);
-    os.indent(6) << formatv("{0} = rewriter.create<{1}>(loc", valuePackName,
-                            resultOp.getQualCppClassName());
-    supplyValuesForOpArgs(tree, childNodeNames);
-    os << "\n      );\n";
-    os.indent(4) << "}\n";
-    return resultValue;
-  }
-
-  // If depth == 0 and resultIndex >= 0, it means we are replacing the values
-  // generated from the source pattern root op. Then we can use the source
-  // pattern's value types to determine the value type of the generated op
-  // here.
-
-  // First prepare local variables for op arguments used in builder call.
-  createAggregateLocalVarsForOpArgs(tree, childNodeNames);
-
-  // Then prepare the result types. We need to specify the types for all
-  // results.
-  os.indent(6) << formatv(
-      "SmallVector<Type, 4> tblgen_types; (void)tblgen_types;\n");
-  int numResults = resultOp.getNumResults();
-  if (numResults != 0) {
-    for (int i = 0; i < numResults; ++i)
-      os.indent(6) << formatv("for (auto *v : castedOp0.getODSResults({0})) {{"
-                              "tblgen_types.push_back(v->getType()); }\n",
-                              resultIndex + i);
-  }
-  os.indent(6) << formatv("{0} = rewriter.create<{1}>(loc, tblgen_types, "
-                          "tblgen_values, tblgen_attrs);\n",
-                          valuePackName, resultOp.getQualCppClassName());
-  os.indent(4) << "}\n";
-  return resultValue;
-}
-
-void PatternEmitter::createSeparateLocalVarsForOpArgs(
-    DagNode node, ChildNodeIndexNameMap &childNodeNames) {
-  Operator &resultOp = node.getDialectOp(opMap);
-
-  // Now prepare operands used for building this op:
-  // * If the operand is non-variadic, we create a `Value*` local variable.
-  // * If the operand is variadic, we create a `SmallVector<Value*>` local
-  //   variable.
-
-  int valueIndex = 0; // An index for uniquing local variable names.
-  for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) {
-    const auto *operand =
-        resultOp.getArg(argIndex).dyn_cast<NamedTypeConstraint *>();
-    if (!operand) {
-      // We do not need special handling for attributes.
-      continue;
-    }
-
-    std::string varName;
-    if (operand->isVariadic()) {
-      varName = formatv("tblgen_values_{0}", valueIndex++);
-      os.indent(6) << formatv("SmallVector<Value *, 4> {0};\n", varName);
-      std::string range;
-      if (node.isNestedDagArg(argIndex)) {
-        range = childNodeNames[argIndex];
-      } else {
-        range = node.getArgName(argIndex);
-      }
-      // Resolve the symbol for all range use so that we have a uniform way of
-      // capturing the values.
-      range = symbolInfoMap.getValueAndRangeUse(range);
-      os.indent(6) << formatv("for (auto *v : {0}) {1}.push_back(v);\n", range,
-                              varName);
-    } else {
-      varName = formatv("tblgen_value_{0}", valueIndex++);
-      os.indent(6) << formatv("Value *{0} = ", varName);
-      if (node.isNestedDagArg(argIndex)) {
-        os << symbolInfoMap.getValueAndRangeUse(childNodeNames[argIndex]);
-      } else {
-        DagLeaf leaf = node.getArgAsLeaf(argIndex);
-        auto symbol =
-            symbolInfoMap.getValueAndRangeUse(node.getArgName(argIndex));
-        if (leaf.isNativeCodeCall()) {
-          os << tgfmt(leaf.getNativeCodeTemplate(), &fmtCtx.withSelf(symbol));
-        } else {
-          os << symbol;
-        }
-      }
-      os << ";\n";
-    }
-
-    // Update to use the newly created local variable for building the op later.
-    childNodeNames[argIndex] = varName;
-  }
-}
-
-void PatternEmitter::supplyValuesForOpArgs(
-    DagNode node, const ChildNodeIndexNameMap &childNodeNames) {
-  Operator &resultOp = node.getDialectOp(opMap);
-  for (int argIndex = 0, numOpArgs = resultOp.getNumArgs();
-       argIndex != numOpArgs; ++argIndex) {
-    // Start each argument on its own line.
-    (os << ",\n").indent(8);
-
-    Argument opArg = resultOp.getArg(argIndex);
-    // Handle the case of operand first.
-    if (auto *operand = opArg.dyn_cast<NamedTypeConstraint *>()) {
-      if (!operand->name.empty())
-        os << "/*" << operand->name << "=*/";
-      os << childNodeNames.lookup(argIndex);
-      continue;
-    }
-
-    // The argument in the op definition.
-    auto opArgName = resultOp.getArgName(argIndex);
-    if (auto subTree = node.getArgAsNestedDag(argIndex)) {
-      if (!subTree.isNativeCodeCall())
-        PrintFatalError(loc, "only NativeCodeCall allowed in nested dag node "
-                             "for creating attribute");
-      os << formatv("/*{0}=*/{1}", opArgName,
-                    handleReplaceWithNativeCodeCall(subTree));
-    } else {
-      auto leaf = node.getArgAsLeaf(argIndex);
-      // The argument in the result DAG pattern.
-      auto patArgName = node.getArgName(argIndex);
-      if (leaf.isConstantAttr() || leaf.isEnumAttrCase()) {
-        // TODO(jpienaar): Refactor out into map to avoid recomputing these.
-        if (!opArg.is<NamedAttribute *>())
-          PrintFatalError(loc, Twine("expected attribute ") + Twine(argIndex));
-        if (!patArgName.empty())
-          os << "/*" << patArgName << "=*/";
-      } else {
-        os << "/*" << opArgName << "=*/";
-      }
-      os << handleOpArgument(leaf, patArgName);
-    }
-  }
-}
-
-void PatternEmitter::createAggregateLocalVarsForOpArgs(
-    DagNode node, const ChildNodeIndexNameMap &childNodeNames) {
-  Operator &resultOp = node.getDialectOp(opMap);
-
-  os.indent(6) << formatv(
-      "SmallVector<Value *, 4> tblgen_values; (void)tblgen_values;\n");
-  os.indent(6) << formatv(
-      "SmallVector<NamedAttribute, 4> tblgen_attrs; (void)tblgen_attrs;\n");
-
-  for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) {
-    if (const auto *attr =
-            resultOp.getArg(argIndex).dyn_cast<NamedAttribute *>()) {
-      const char *addAttrCmd = "if ({1}) {{"
-                               "  tblgen_attrs.emplace_back(rewriter."
-                               "getIdentifier(\"{0}\"), {1}); }\n";
-      // The argument in the op definition.
-      auto opArgName = resultOp.getArgName(argIndex);
-      if (auto subTree = node.getArgAsNestedDag(argIndex)) {
-        if (!subTree.isNativeCodeCall())
-          PrintFatalError(loc, "only NativeCodeCall allowed in nested dag node "
-                               "for creating attribute");
-        os.indent(6) << formatv(addAttrCmd, opArgName,
-                                handleReplaceWithNativeCodeCall(subTree));
-      } else {
-        auto leaf = node.getArgAsLeaf(argIndex);
-        // The argument in the result DAG pattern.
-        auto patArgName = node.getArgName(argIndex);
-        os.indent(6) << formatv(addAttrCmd, opArgName,
-                                handleOpArgument(leaf, patArgName));
-      }
-      continue;
-    }
-
-    const auto *operand =
-        resultOp.getArg(argIndex).get<NamedTypeConstraint *>();
-    std::string varName;
-    if (operand->isVariadic()) {
-      std::string range;
-      if (node.isNestedDagArg(argIndex)) {
-        range = childNodeNames.lookup(argIndex);
-      } else {
-        range = node.getArgName(argIndex);
-      }
-      // Resolve the symbol for all range use so that we have a uniform way of
-      // capturing the values.
-      range = symbolInfoMap.getValueAndRangeUse(range);
-      os.indent(6) << formatv(
-          "for (auto *v : {0}) tblgen_values.push_back(v);\n", range);
-    } else {
-      os.indent(6) << formatv("tblgen_values.push_back(", varName);
-      if (node.isNestedDagArg(argIndex)) {
-        os << symbolInfoMap.getValueAndRangeUse(
-            childNodeNames.lookup(argIndex));
-      } else {
-        DagLeaf leaf = node.getArgAsLeaf(argIndex);
-        auto symbol =
-            symbolInfoMap.getValueAndRangeUse(node.getArgName(argIndex));
-        if (leaf.isNativeCodeCall()) {
-          os << tgfmt(leaf.getNativeCodeTemplate(), &fmtCtx.withSelf(symbol));
-        } else {
-          os << symbol;
-        }
-      }
-      os << ");\n";
-    }
-  }
-}
-
-static void emitRewriters(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  emitSourceFileHeader("Rewriters", os);
-
-  const auto &patterns = recordKeeper.getAllDerivedDefinitions("Pattern");
-  auto numPatterns = patterns.size();
-
-  // We put the map here because it can be shared among multiple patterns.
-  RecordOperatorMap recordOpMap;
-
-  std::vector<std::string> rewriterNames;
-  rewriterNames.reserve(numPatterns);
-
-  std::string baseRewriterName = "GeneratedConvert";
-  int rewriterIndex = 0;
-
-  for (Record *p : patterns) {
-    std::string name;
-    if (p->isAnonymous()) {
-      // If no name is provided, ensure unique rewriter names simply by
-      // appending unique suffix.
-      name = baseRewriterName + llvm::utostr(rewriterIndex++);
-    } else {
-      name = p->getName();
-    }
-    LLVM_DEBUG(llvm::dbgs()
-               << "=== start generating pattern '" << name << "' ===\n");
-    PatternEmitter(p, &recordOpMap, os).emit(name);
-    LLVM_DEBUG(llvm::dbgs()
-               << "=== done generating pattern '" << name << "' ===\n");
-    rewriterNames.push_back(std::move(name));
-  }
-
-  // Emit function to add the generated matchers to the pattern list.
-  os << "void populateWithGenerated(MLIRContext *context, "
-     << "OwningRewritePatternList *patterns) {\n";
-  for (const auto &name : rewriterNames) {
-    os << "  patterns->insert<" << name << ">(context);\n";
-  }
-  os << "}\n";
-}
-
-static mlir::GenRegistration
-    genRewriters("gen-rewriters", "Generate pattern rewriters",
-                 [](const RecordKeeper &records, raw_ostream &os) {
-                   emitRewriters(records, os);
-                   return false;
-                 });
diff --git a/third_party/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/third_party/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
deleted file mode 100644
index 422183ed948..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
+++ /dev/null
@@ -1,722 +0,0 @@
-//===- SPIRVSerializationGen.cpp - SPIR-V serialization utility generator -===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// SPIRVSerializationGen generates common utility functions for SPIR-V
-// serialization.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Support/StringExtras.h"
-#include "mlir/TableGen/Attribute.h"
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/Operator.h"
-#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using llvm::ArrayRef;
-using llvm::formatv;
-using llvm::raw_ostream;
-using llvm::raw_string_ostream;
-using llvm::Record;
-using llvm::RecordKeeper;
-using llvm::SmallVector;
-using llvm::SMLoc;
-using llvm::StringMap;
-using llvm::StringRef;
-using llvm::Twine;
-using mlir::tblgen::Attribute;
-using mlir::tblgen::EnumAttr;
-using mlir::tblgen::NamedAttribute;
-using mlir::tblgen::NamedTypeConstraint;
-using mlir::tblgen::Operator;
-
-//===----------------------------------------------------------------------===//
-// Serialization AutoGen
-//===----------------------------------------------------------------------===//
-
-// Writes the following function to `os`:
-//   inline uint32_t getOpcode(<op-class-name>) { return <opcode>; }
-static void emitGetOpcodeFunction(const Record *record, Operator const &op,
-                                  raw_ostream &os) {
-  os << formatv("template <> constexpr inline ::mlir::spirv::Opcode "
-                "getOpcode<{0}>() {{\n",
-                op.getQualCppClassName());
-  os << formatv("  return ::mlir::spirv::Opcode::{0};\n",
-                record->getValueAsString("spirvOpName"));
-  os << "}\n";
-}
-
-/// Forward declaration of function to return the SPIR-V opcode corresponding to
-/// an operation. This function will be generated for all SPV_Op instances that
-/// have hasOpcode = 1.
-static void declareOpcodeFn(raw_ostream &os) {
-  os << "template <typename OpClass> inline constexpr ::mlir::spirv::Opcode "
-        "getOpcode();\n";
-}
-
-/// Generates code to serialize attributes of a SPV_Op `op` into `os`. The
-/// generates code extracts the attribute with name `attrName` from
-/// `operandList` of `op`.
-static void emitAttributeSerialization(const Attribute &attr,
-                                       ArrayRef<SMLoc> loc, StringRef tabs,
-                                       StringRef opVar, StringRef operandList,
-                                       StringRef attrName, raw_ostream &os) {
-  os << tabs << formatv("auto attr = {0}.getAttr(\"{1}\");\n", opVar, attrName);
-  os << tabs << "if (attr) {\n";
-  if (attr.getAttrDefName() == "I32ArrayAttr") {
-    // Serialize all the elements of the array
-    os << tabs << "  for (auto attrElem : attr.cast<ArrayAttr>()) {\n";
-    os << tabs
-       << formatv("    {0}.push_back(static_cast<uint32_t>("
-                  "attrElem.cast<IntegerAttr>().getValue().getZExtValue()));\n",
-                  operandList);
-    os << tabs << "  }\n";
-  } else if (attr.isEnumAttr() || attr.getAttrDefName() == "I32Attr") {
-    os << tabs
-       << formatv("  {0}.push_back(static_cast<uint32_t>("
-                  "attr.cast<IntegerAttr>().getValue().getZExtValue()));\n",
-                  operandList);
-  } else {
-    PrintFatalError(
-        loc,
-        llvm::Twine(
-            "unhandled attribute type in SPIR-V serialization generation : '") +
-            attr.getAttrDefName() + llvm::Twine("'"));
-  }
-  os << tabs << "}\n";
-}
-
-/// Generates code to serialize the operands of a SPV_Op `op` into `os`. The
-/// generated queries the SSA-ID if operand is a SSA-Value, or serializes the
-/// attributes. The `operands` vector is updated appropriately. `elidedAttrs`
-/// updated as well to include the serialized attributes.
-static void emitOperandSerialization(const Operator &op, ArrayRef<SMLoc> loc,
-                                     StringRef tabs, StringRef opVar,
-                                     StringRef operands, StringRef elidedAttrs,
-                                     raw_ostream &os) {
-  auto operandNum = 0;
-  for (unsigned i = 0, e = op.getNumArgs(); i < e; ++i) {
-    auto argument = op.getArg(i);
-    os << tabs << "{\n";
-    if (argument.is<NamedTypeConstraint *>()) {
-      os << tabs
-         << formatv("  for (auto arg : {0}.getODSOperands({1})) {{\n", opVar,
-                    operandNum);
-      os << tabs << "    auto argID = getValueID(arg);\n";
-      os << tabs << "    if (!argID) {\n";
-      os << tabs
-         << formatv("      return emitError({0}.getLoc(), "
-                    "\"operand #{1} has a use before def\");\n",
-                    opVar, operandNum);
-      os << tabs << "    }\n";
-      os << tabs << formatv("    {0}.push_back(argID);\n", operands);
-      os << "    }\n";
-      operandNum++;
-    } else {
-      auto attr = argument.get<NamedAttribute *>();
-      auto newtabs = tabs.str() + "  ";
-      emitAttributeSerialization(
-          (attr->attr.isOptional() ? attr->attr.getBaseAttr() : attr->attr),
-          loc, newtabs, opVar, operands, attr->name, os);
-      os << newtabs
-         << formatv("{0}.push_back(\"{1}\");\n", elidedAttrs, attr->name);
-    }
-    os << tabs << "}\n";
-  }
-}
-
-/// Generates code to serializes the result of SPV_Op `op` into `os`. The
-/// generated gets the ID for the type of the result (if any), the SSA-ID of
-/// the result and updates `resultID` with the SSA-ID.
-static void emitResultSerialization(const Operator &op, ArrayRef<SMLoc> loc,
-                                    StringRef tabs, StringRef opVar,
-                                    StringRef operands, StringRef resultID,
-                                    raw_ostream &os) {
-  if (op.getNumResults() == 1) {
-    StringRef resultTypeID("resultTypeID");
-    os << tabs << formatv("uint32_t {0} = 0;\n", resultTypeID);
-    os << tabs
-       << formatv(
-              "if (failed(processType({0}.getLoc(), {0}.getType(), {1}))) {{\n",
-              opVar, resultTypeID);
-    os << tabs << "  return failure();\n";
-    os << tabs << "}\n";
-    os << tabs << formatv("{0}.push_back({1});\n", operands, resultTypeID);
-    // Create an SSA result <id> for the op
-    os << tabs << formatv("{0} = getNextID();\n", resultID);
-    os << tabs
-       << formatv("valueIDMap[{0}.getResult()] = {1};\n", opVar, resultID);
-    os << tabs << formatv("{0}.push_back({1});\n", operands, resultID);
-  } else if (op.getNumResults() != 0) {
-    PrintFatalError(loc, "SPIR-V ops can only have zero or one result");
-  }
-}
-
-/// Generates code to serialize attributes of SPV_Op `op` that become
-/// decorations on the `resultID` of the serialized operation `opVar` in the
-/// SPIR-V binary.
-static void emitDecorationSerialization(const Operator &op, StringRef tabs,
-                                        StringRef opVar, StringRef elidedAttrs,
-                                        StringRef resultID, raw_ostream &os) {
-  if (op.getNumResults() == 1) {
-    // All non-argument attributes translated into OpDecorate instruction
-    os << tabs << formatv("for (auto attr : {0}.getAttrs()) {{\n", opVar);
-    os << tabs
-       << formatv("  if (llvm::any_of({0}, [&](StringRef elided)", elidedAttrs);
-    os << " {return attr.first.is(elided);})) {\n";
-    os << tabs << "    continue;\n";
-    os << tabs << "  }\n";
-    os << tabs
-       << formatv(
-              "  if (failed(processDecoration({0}.getLoc(), {1}, attr))) {{\n",
-              opVar, resultID);
-    os << tabs << "    return failure();\n";
-    os << tabs << "  }\n";
-    os << tabs << "}\n";
-  }
-}
-
-/// Generates code to serialize an SPV_Op `op` into `os`.
-static void emitSerializationFunction(const Record *attrClass,
-                                      const Record *record, const Operator &op,
-                                      raw_ostream &os) {
-  // If the record has 'autogenSerialization' set to 0, nothing to do
-  if (!record->getValueAsBit("autogenSerialization")) {
-    return;
-  }
-  StringRef opVar("op"), operands("operands"), elidedAttrs("elidedAttrs"),
-      resultID("resultID");
-  os << formatv(
-      "template <> LogicalResult\nSerializer::processOp<{0}>({0} {1}) {{\n",
-      op.getQualCppClassName(), opVar);
-  os << formatv("  SmallVector<uint32_t, 4> {0};\n", operands);
-  os << formatv("  SmallVector<StringRef, 2> {0};\n", elidedAttrs);
-
-  // Serialize result information.
-  if (op.getNumResults() == 1) {
-    os << formatv("  uint32_t {0} = 0;\n", resultID);
-    emitResultSerialization(op, record->getLoc(), "  ", opVar, operands,
-                            resultID, os);
-  }
-
-  // Process arguments.
-  emitOperandSerialization(op, record->getLoc(), "  ", opVar, operands,
-                           elidedAttrs, os);
-
-  if (record->isSubClassOf("SPV_ExtInstOp")) {
-    os << formatv("  encodeExtensionInstruction({0}, \"{1}\", {2}, {3});\n",
-                  opVar, record->getValueAsString("extendedInstSetName"),
-                  record->getValueAsInt("extendedInstOpcode"), operands);
-  } else {
-    os << formatv("  encodeInstructionInto("
-                  "functionBody, spirv::getOpcode<{0}>(), {1});\n",
-                  op.getQualCppClassName(), operands);
-  }
-
-  // Process decorations.
-  emitDecorationSerialization(op, "  ", opVar, elidedAttrs, resultID, os);
-
-  os << "  return success();\n";
-  os << "}\n\n";
-}
-
-/// Generates the prologue for the function that dispatches the serialization of
-/// the operation `opVar` based on its opcode.
-static void initDispatchSerializationFn(StringRef opVar, raw_ostream &os) {
-  os << formatv(
-      "LogicalResult Serializer::dispatchToAutogenSerialization(Operation "
-      "*{0}) {{\n ",
-      opVar);
-}
-
-/// Generates the body of the dispatch function. This function generates the
-/// check that if satisfied, will call the serialization function generated for
-/// the `op`.
-static void emitSerializationDispatch(const Operator &op, StringRef tabs,
-                                      StringRef opVar, raw_ostream &os) {
-  os << tabs
-     << formatv("if (isa<{0}>({1})) {{\n", op.getQualCppClassName(), opVar);
-  os << tabs
-     << formatv("  return processOp(cast<{0}>({1}));\n",
-                op.getQualCppClassName(), opVar);
-  os << tabs << "} else";
-}
-
-/// Generates the epilogue for the function that dispatches the serialization of
-/// the operation.
-static void finalizeDispatchSerializationFn(StringRef opVar, raw_ostream &os) {
-  os << " {\n";
-  os << formatv(
-      "    return {0}->emitError(\"unhandled operation serialization\");\n",
-      opVar);
-  os << "  }\n";
-  os << "  return success();\n";
-  os << "}\n\n";
-}
-
-/// Generates code to deserialize the attribute of a SPV_Op into `os`. The
-/// generated code reads the `words` of the serialized instruction at
-/// position `wordIndex` and adds the deserialized attribute into `attrList`.
-static void emitAttributeDeserialization(const Attribute &attr,
-                                         ArrayRef<SMLoc> loc, StringRef tabs,
-                                         StringRef attrList, StringRef attrName,
-                                         StringRef words, StringRef wordIndex,
-                                         raw_ostream &os) {
-  if (attr.getAttrDefName() == "I32ArrayAttr") {
-    os << tabs << "SmallVector<Attribute, 4> attrListElems;\n";
-    os << tabs << formatv("while ({0} < {1}.size()) {{\n", wordIndex, words);
-    os << tabs
-       << formatv(
-              "  "
-              "attrListElems.push_back(opBuilder.getI32IntegerAttr({0}[{1}++]))"
-              ";\n",
-              words, wordIndex);
-    os << tabs << "}\n";
-    os << tabs
-       << formatv("{0}.push_back(opBuilder.getNamedAttr(\"{1}\", "
-                  "opBuilder.getArrayAttr(attrListElems)));\n",
-                  attrList, attrName);
-  } else if (attr.isEnumAttr() || attr.getAttrDefName() == "I32Attr") {
-    os << tabs
-       << formatv("{0}.push_back(opBuilder.getNamedAttr(\"{1}\", "
-                  "opBuilder.getI32IntegerAttr({2}[{3}++])));\n",
-                  attrList, attrName, words, wordIndex);
-  } else {
-    PrintFatalError(
-        loc, llvm::Twine(
-                 "unhandled attribute type in deserialization generation : '") +
-                 attr.getAttrDefName() + llvm::Twine("'"));
-  }
-}
-
-/// Generates the code to deserialize the result of an SPV_Op `op` into
-/// `os`. The generated code gets the type of the result specified at
-/// `words`[`wordIndex`], the SSA ID for the result at position `wordIndex` + 1
-/// and updates the `resultType` and `valueID` with the parsed type and SSA ID,
-/// respectively.
-static void emitResultDeserialization(const Operator &op, ArrayRef<SMLoc> loc,
-                                      StringRef tabs, StringRef words,
-                                      StringRef wordIndex,
-                                      StringRef resultTypes, StringRef valueID,
-                                      raw_ostream &os) {
-  // Deserialize result information if it exists
-  if (op.getNumResults() == 1) {
-    os << tabs << "{\n";
-    os << tabs << formatv("  if ({0} >= {1}.size()) {{\n", wordIndex, words);
-    os << tabs
-       << formatv(
-              "    return emitError(unknownLoc, \"expected result type <id> "
-              "while deserializing {0}\");\n",
-              op.getQualCppClassName());
-    os << tabs << "  }\n";
-    os << tabs << formatv("  auto ty = getType({0}[{1}]);\n", words, wordIndex);
-    os << tabs << "  if (!ty) {\n";
-    os << tabs
-       << formatv(
-              "    return emitError(unknownLoc, \"unknown type result <id> : "
-              "\") << {0}[{1}];\n",
-              words, wordIndex);
-    os << tabs << "  }\n";
-    os << tabs << formatv("  {0}.push_back(ty);\n", resultTypes);
-    os << tabs << formatv("  {0}++;\n", wordIndex);
-    os << tabs << formatv("  if ({0} >= {1}.size()) {{\n", wordIndex, words);
-    os << tabs
-       << formatv(
-              "    return emitError(unknownLoc, \"expected result <id> while "
-              "deserializing {0}\");\n",
-              op.getQualCppClassName());
-    os << tabs << "  }\n";
-    os << tabs << "}\n";
-    os << tabs << formatv("{0} = {1}[{2}++];\n", valueID, words, wordIndex);
-  } else if (op.getNumResults() != 0) {
-    PrintFatalError(loc, "SPIR-V ops can have only zero or one result");
-  }
-}
-
-/// Generates the code to deserialize the operands of an SPV_Op `op` into
-/// `os`. The generated code reads the `words` of the binary instruction, from
-/// position `wordIndex` to the end, and either gets the Value corresponding to
-/// the ID encoded, or deserializes the attributes encoded. The parsed
-/// operand(attribute) is added to the `operands` list or `attributes` list.
-static void emitOperandDeserialization(const Operator &op, ArrayRef<SMLoc> loc,
-                                       StringRef tabs, StringRef words,
-                                       StringRef wordIndex, StringRef operands,
-                                       StringRef attributes, raw_ostream &os) {
-  // Process operands/attributes
-  unsigned operandNum = 0;
-  for (unsigned i = 0, e = op.getNumArgs(); i < e; ++i) {
-    auto argument = op.getArg(i);
-    if (auto valueArg = argument.dyn_cast<NamedTypeConstraint *>()) {
-      if (valueArg->isVariadic()) {
-        if (i != e - 1) {
-          PrintFatalError(loc,
-                          "SPIR-V ops can have Variadic<..> argument only if "
-                          "it's the last argument");
-        }
-        os << tabs
-           << formatv("for (; {0} < {1}.size(); ++{0})", wordIndex, words);
-      } else {
-        os << tabs << formatv("if ({0} < {1}.size())", wordIndex, words);
-      }
-      os << " {\n";
-      os << tabs
-         << formatv("  auto arg = getValue({0}[{1}]);\n", words, wordIndex);
-      os << tabs << "  if (!arg) {\n";
-      os << tabs
-         << formatv(
-                "    return emitError(unknownLoc, \"unknown result <id> : \") "
-                "<< {0}[{1}];\n",
-                words, wordIndex);
-      os << tabs << "  }\n";
-      os << tabs << formatv("  {0}.push_back(arg);\n", operands);
-      if (!valueArg->isVariadic()) {
-        os << tabs << formatv("  {0}++;\n", wordIndex);
-      }
-      operandNum++;
-      os << tabs << "}\n";
-    } else {
-      os << tabs << formatv("if ({0} < {1}.size()) {{\n", wordIndex, words);
-      auto attr = argument.get<NamedAttribute *>();
-      auto newtabs = tabs.str() + "  ";
-      emitAttributeDeserialization(
-          (attr->attr.isOptional() ? attr->attr.getBaseAttr() : attr->attr),
-          loc, newtabs, attributes, attr->name, words, wordIndex, os);
-      os << "  }\n";
-    }
-  }
-
-  os << tabs << formatv("if ({0} != {1}.size()) {{\n", wordIndex, words);
-  os << tabs
-     << formatv(
-            "  return emitError(unknownLoc, \"found more operands than "
-            "expected when deserializing {0}, only \") << {1} << \" of \" << "
-            "{2}.size() << \" processed\";\n",
-            op.getQualCppClassName(), wordIndex, words);
-  os << tabs << "}\n\n";
-}
-
-/// Generates code to update the `attributes` vector with the attributes
-/// obtained from parsing the decorations in the SPIR-V binary associated with
-/// an <id> `valueID`
-static void emitDecorationDeserialization(const Operator &op, StringRef tabs,
-                                          StringRef valueID,
-                                          StringRef attributes,
-                                          raw_ostream &os) {
-  // Import decorations parsed
-  if (op.getNumResults() == 1) {
-    os << tabs << formatv("if (decorations.count({0})) {{\n", valueID);
-    os << tabs
-       << formatv("  auto attrs = decorations[{0}].getAttrs();\n", valueID);
-    os << tabs
-       << formatv("  {0}.append(attrs.begin(), attrs.end());\n", attributes);
-    os << tabs << "}\n";
-  }
-}
-
-/// Generates code to deserialize an SPV_Op `op` into `os`.
-static void emitDeserializationFunction(const Record *attrClass,
-                                        const Record *record,
-                                        const Operator &op, raw_ostream &os) {
-  // If the record has 'autogenSerialization' set to 0, nothing to do
-  if (!record->getValueAsBit("autogenSerialization")) {
-    return;
-  }
-  StringRef resultTypes("resultTypes"), valueID("valueID"), words("words"),
-      wordIndex("wordIndex"), opVar("op"), operands("operands"),
-      attributes("attributes");
-  os << formatv("template <> "
-                "LogicalResult\nDeserializer::processOp<{0}>(ArrayRef<"
-                "uint32_t> {1}) {{\n",
-                op.getQualCppClassName(), words);
-  os << formatv("  SmallVector<Type, 1> {0};\n", resultTypes);
-  os << formatv("  size_t {0} = 0; (void){0};\n", wordIndex);
-  os << formatv("  uint32_t {0} = 0; (void){0};\n", valueID);
-
-  // Deserialize result information
-  emitResultDeserialization(op, record->getLoc(), "  ", words, wordIndex,
-                            resultTypes, valueID, os);
-
-  os << formatv("  SmallVector<Value *, 4> {0};\n", operands);
-  os << formatv("  SmallVector<NamedAttribute, 4> {0};\n", attributes);
-  // Operand deserialization
-  emitOperandDeserialization(op, record->getLoc(), "  ", words, wordIndex,
-                             operands, attributes, os);
-
-  os << formatv(
-      "  auto {1} = opBuilder.create<{0}>(unknownLoc, {2}, {3}, {4}); "
-      "(void){1};\n",
-      op.getQualCppClassName(), opVar, resultTypes, operands, attributes);
-  if (op.getNumResults() == 1) {
-    os << formatv("  valueMap[{0}] = {1}.getResult();\n\n", valueID, opVar);
-  }
-
-  // Decorations
-  emitDecorationDeserialization(op, "  ", valueID, attributes, os);
-  os << "  return success();\n";
-  os << "}\n\n";
-}
-
-/// Generates the prologue for the function that dispatches the deserialization
-/// based on the `opcode`.
-static void initDispatchDeserializationFn(StringRef opcode, StringRef words,
-                                          raw_ostream &os) {
-  os << formatv(
-      "LogicalResult "
-      "Deserializer::dispatchToAutogenDeserialization(spirv::Opcode {0}, "
-      "ArrayRef<uint32_t> {1}) {{\n",
-      opcode, words);
-  os << formatv("  switch ({0}) {{\n", opcode);
-}
-
-/// Generates the body of the dispatch function, by generating the case label
-/// for an opcode and the call to the method to perform the deserialization.
-static void emitDeserializationDispatch(const Operator &op, const Record *def,
-                                        StringRef tabs, StringRef words,
-                                        raw_ostream &os) {
-  os << tabs
-     << formatv("case spirv::Opcode::{0}:\n",
-                def->getValueAsString("spirvOpName"));
-  os << tabs
-     << formatv("  return processOp<{0}>({1});\n", op.getQualCppClassName(),
-                words);
-}
-
-/// Generates the epilogue for the function that dispatches the deserialization
-/// of the operation.
-static void finalizeDispatchDeserializationFn(StringRef opcode,
-                                              raw_ostream &os) {
-  os << "  default:\n";
-  os << "    ;\n";
-  os << "  }\n";
-  StringRef opcodeVar("opcodeString");
-  os << formatv("  auto {0} = spirv::stringifyOpcode({1});\n", opcodeVar,
-                opcode);
-  os << formatv("  if (!{0}.empty()) {{\n", opcodeVar);
-  os << formatv("    return emitError(unknownLoc, \"unhandled deserialization "
-                "of \") << {0};\n",
-                opcodeVar);
-  os << "  } else {\n";
-  os << formatv("   return emitError(unknownLoc, \"unhandled opcode \") << "
-                "static_cast<uint32_t>({0});\n",
-                opcode);
-  os << "  }\n";
-  os << "}\n";
-}
-
-static void initExtendedSetDeserializationDispatch(StringRef extensionSetName,
-                                                   StringRef instructionID,
-                                                   StringRef words,
-                                                   raw_ostream &os) {
-  os << formatv("LogicalResult "
-                "Deserializer::dispatchToExtensionSetAutogenDeserialization("
-                "StringRef {0}, uint32_t {1}, ArrayRef<uint32_t> {2}) {{\n",
-                extensionSetName, instructionID, words);
-}
-
-static void
-emitExtendedSetDeserializationDispatch(const RecordKeeper &recordKeeper,
-                                       raw_ostream &os) {
-  StringRef extensionSetName("extensionSetName"),
-      instructionID("instructionID"), words("words");
-
-  // First iterate over all ops derived from SPV_ExtensionSetOps to get all
-  // extensionSets.
-
-  // For each of the extensions a separate raw_string_ostream is used to
-  // generate code into. These are then concatenated at the end. Since
-  // raw_string_ostream needs a string&, use a vector to store all the string
-  // that are captured by reference within raw_string_ostream.
-  StringMap<raw_string_ostream> extensionSets;
-  SmallVector<std::string, 1> extensionSetNames;
-
-  initExtendedSetDeserializationDispatch(extensionSetName, instructionID, words,
-                                         os);
-  auto defs = recordKeeper.getAllDerivedDefinitions("SPV_ExtInstOp");
-  for (const auto *def : defs) {
-    if (!def->getValueAsBit("autogenSerialization")) {
-      continue;
-    }
-    Operator op(def);
-    auto setName = def->getValueAsString("extendedInstSetName");
-    if (!extensionSets.count(setName)) {
-      extensionSetNames.push_back("");
-      extensionSets.try_emplace(setName, extensionSetNames.back());
-      auto &setos = extensionSets.find(setName)->second;
-      setos << formatv("  if ({0} == \"{1}\") {{\n", extensionSetName, setName);
-      setos << formatv("    switch ({0}) {{\n", instructionID);
-    }
-    auto &setos = extensionSets.find(setName)->second;
-    setos << formatv("    case {0}:\n",
-                     def->getValueAsInt("extendedInstOpcode"));
-    setos << formatv("      return processOp<{0}>({1});\n",
-                     op.getQualCppClassName(), words);
-  }
-
-  // Append the dispatch code for all the extended sets.
-  for (auto &extensionSet : extensionSets) {
-    os << extensionSet.second.str();
-    os << "    default:\n";
-    os << formatv(
-        "      return emitError(unknownLoc, \"unhandled deserializations of "
-        "\") << {0} << \" from extension set \" << {1};\n",
-        instructionID, extensionSetName);
-    os << "    }\n";
-    os << "  }\n";
-  }
-
-  os << formatv("  return emitError(unknownLoc, \"unhandled deserialization of "
-                "extended instruction set {0}\");\n",
-                extensionSetName);
-  os << "}\n";
-}
-
-/// Emits all the autogenerated serialization/deserializations functions for the
-/// SPV_Ops.
-static bool emitSerializationFns(const RecordKeeper &recordKeeper,
-                                 raw_ostream &os) {
-  llvm::emitSourceFileHeader("SPIR-V Serialization Utilities/Functions", os);
-
-  std::string dSerFnString, dDesFnString, serFnString, deserFnString,
-      utilsString;
-  raw_string_ostream dSerFn(dSerFnString), dDesFn(dDesFnString),
-      serFn(serFnString), deserFn(deserFnString), utils(utilsString);
-  auto attrClass = recordKeeper.getClass("Attr");
-
-  // Emit the serialization and deserialization functions simultaneously.
-  declareOpcodeFn(utils);
-  StringRef opVar("op");
-  StringRef opcode("opcode"), words("words");
-
-  // Handle the SPIR-V ops.
-  initDispatchSerializationFn(opVar, dSerFn);
-  initDispatchDeserializationFn(opcode, words, dDesFn);
-  auto defs = recordKeeper.getAllDerivedDefinitions("SPV_Op");
-  for (const auto *def : defs) {
-    Operator op(def);
-    emitSerializationFunction(attrClass, def, op, serFn);
-    emitDeserializationFunction(attrClass, def, op, deserFn);
-    if (def->getValueAsBit("hasOpcode") || def->isSubClassOf("SPV_ExtInstOp")) {
-      emitSerializationDispatch(op, "  ", opVar, dSerFn);
-    }
-    if (def->getValueAsBit("hasOpcode")) {
-      emitGetOpcodeFunction(def, op, utils);
-      emitDeserializationDispatch(op, def, "  ", words, dDesFn);
-    }
-  }
-  finalizeDispatchSerializationFn(opVar, dSerFn);
-  finalizeDispatchDeserializationFn(opcode, dDesFn);
-
-  emitExtendedSetDeserializationDispatch(recordKeeper, dDesFn);
-
-  os << "#ifdef GET_SPIRV_SERIALIZATION_UTILS\n";
-  os << utils.str();
-  os << "#endif // GET_SPIRV_SERIALIZATION_UTILS\n\n";
-
-  os << "#ifdef GET_SERIALIZATION_FNS\n\n";
-  os << serFn.str();
-  os << dSerFn.str();
-  os << "#endif // GET_SERIALIZATION_FNS\n\n";
-
-  os << "#ifdef GET_DESERIALIZATION_FNS\n\n";
-  os << deserFn.str();
-  os << dDesFn.str();
-  os << "#endif // GET_DESERIALIZATION_FNS\n\n";
-
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// Op Utils AutoGen
-//===----------------------------------------------------------------------===//
-
-static void emitEnumGetAttrNameFnDecl(raw_ostream &os) {
-  os << formatv("template <typename EnumClass> inline constexpr StringRef "
-                "attributeName();\n");
-}
-
-static void emitEnumGetSymbolizeFnDecl(raw_ostream &os) {
-  os << "template <typename EnumClass> using SymbolizeFnTy = "
-        "llvm::Optional<EnumClass> (*)(StringRef);\n";
-  os << "template <typename EnumClass> inline constexpr "
-        "SymbolizeFnTy<EnumClass> symbolizeEnum();\n";
-}
-
-static void emitEnumGetAttrNameFnDefn(const EnumAttr &enumAttr,
-                                      raw_ostream &os) {
-  auto enumName = enumAttr.getEnumClassName();
-  os << formatv("template <> inline StringRef attributeName<{0}>() {{\n",
-                enumName);
-  os << "  "
-     << formatv("static constexpr const char attrName[] = \"{0}\";\n",
-                mlir::convertToSnakeCase(enumName));
-  os << "  return attrName;\n";
-  os << "}\n";
-}
-
-static void emitEnumGetSymbolizeFnDefn(const EnumAttr &enumAttr,
-                                       raw_ostream &os) {
-  auto enumName = enumAttr.getEnumClassName();
-  auto strToSymFnName = enumAttr.getStringToSymbolFnName();
-  os << formatv(
-      "template <> inline SymbolizeFnTy<{0}> symbolizeEnum<{0}>() {{\n",
-      enumName);
-  os << "  return " << strToSymFnName << ";\n";
-  os << "}\n";
-}
-
-static bool emitOpUtils(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  llvm::emitSourceFileHeader("SPIR-V Op Utilities", os);
-
-  auto defs = recordKeeper.getAllDerivedDefinitions("EnumAttrInfo");
-  os << "#ifndef SPIRV_OP_UTILS_H_\n";
-  os << "#define SPIRV_OP_UTILS_H_\n";
-  emitEnumGetAttrNameFnDecl(os);
-  emitEnumGetSymbolizeFnDecl(os);
-  for (const auto *def : defs) {
-    EnumAttr enumAttr(*def);
-    emitEnumGetAttrNameFnDefn(enumAttr, os);
-    emitEnumGetSymbolizeFnDefn(enumAttr, os);
-  }
-  os << "#endif // SPIRV_OP_UTILS_H\n";
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// Hook Registration
-//===----------------------------------------------------------------------===//
-
-static mlir::GenRegistration genSerialization(
-    "gen-spirv-serialization",
-    "Generate SPIR-V (de)serialization utilities and functions",
-    [](const RecordKeeper &records, raw_ostream &os) {
-      return emitSerializationFns(records, os);
-    });
-
-static mlir::GenRegistration
-    genOpUtils("gen-spirv-op-utils",
-               "Generate SPIR-V operation utility definitions",
-               [](const RecordKeeper &records, raw_ostream &os) {
-                 return emitOpUtils(records, os);
-               });
diff --git a/third_party/mlir/tools/mlir-tblgen/StructsGen.cpp b/third_party/mlir/tools/mlir-tblgen/StructsGen.cpp
deleted file mode 100644
index d8844957ece..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/StructsGen.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-//===- StructsGen.cpp - MLIR struct utility generator ---------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// StructsGen generates common utility functions for grouping attributes into a
-// set of structured data.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/Attribute.h"
-#include "mlir/TableGen/Format.h"
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/Operator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using llvm::raw_ostream;
-using llvm::Record;
-using llvm::RecordKeeper;
-using llvm::StringRef;
-using mlir::tblgen::StructAttr;
-
-static void
-emitStructClass(const Record &structDef, StringRef structName,
-                llvm::ArrayRef<mlir::tblgen::StructFieldAttr> fields,
-                StringRef description, raw_ostream &os) {
-  const char *structInfo = R"(
-// {0}
-class {1} : public mlir::DictionaryAttr)";
-  const char *structInfoEnd = R"( {
-public:
-  using DictionaryAttr::DictionaryAttr;
-  static bool classof(mlir::Attribute attr);
-)";
-  os << formatv(structInfo, description, structName) << structInfoEnd;
-
-  // Declares a constructor function for the tablegen structure.
-  //   TblgenStruct::get(MLIRContext context, Type1 Field1, Type2 Field2, ...);
-  const char *getInfoDecl = "  static {0} get(\n";
-  const char *getInfoDeclArg = "      {0} {1},\n";
-  const char *getInfoDeclEnd = "      mlir::MLIRContext* context);\n\n";
-
-  os << llvm::formatv(getInfoDecl, structName);
-
-  for (auto field : fields) {
-    auto name = field.getName();
-    auto type = field.getType();
-    auto storage = type.getStorageType();
-    os << llvm::formatv(getInfoDeclArg, storage, name);
-  }
-  os << getInfoDeclEnd;
-
-  // Declares an accessor for the fields owned by the tablegen structure.
-  //   namespace::storage TblgenStruct::field1() const;
-  const char *fieldInfo = R"(  {0} {1}() const;
-)";
-  for (const auto field : fields) {
-    auto name = field.getName();
-    auto type = field.getType();
-    auto storage = type.getStorageType();
-    os << formatv(fieldInfo, storage, name);
-  }
-
-  os << "};\n\n";
-}
-
-static void emitStructDecl(const Record &structDef, raw_ostream &os) {
-  StructAttr structAttr(&structDef);
-  StringRef structName = structAttr.getStructClassName();
-  StringRef cppNamespace = structAttr.getCppNamespace();
-  StringRef description = structAttr.getDescription();
-  auto fields = structAttr.getAllFields();
-
-  // Wrap in the appropriate namespace.
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
-
-  // Emit the struct class definition
-  emitStructClass(structDef, structName, fields, description, os);
-
-  // Close the declared namespace.
-  for (auto ns : namespaces)
-    os << "} // namespace " << ns << "\n";
-}
-
-static bool emitStructDecls(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  llvm::emitSourceFileHeader("Struct Utility Declarations", os);
-
-  auto defs = recordKeeper.getAllDerivedDefinitions("StructAttr");
-  for (const auto *def : defs) {
-    emitStructDecl(*def, os);
-  }
-
-  return false;
-}
-
-static void emitFactoryDef(llvm::StringRef structName,
-                           llvm::ArrayRef<mlir::tblgen::StructFieldAttr> fields,
-                           raw_ostream &os) {
-  const char *getInfoDecl = "{0} {0}::get(\n";
-  const char *getInfoDeclArg = "    {0} {1},\n";
-  const char *getInfoDeclEnd = "    mlir::MLIRContext* context) {";
-
-  os << llvm::formatv(getInfoDecl, structName);
-
-  for (auto field : fields) {
-    auto name = field.getName();
-    auto type = field.getType();
-    auto storage = type.getStorageType();
-    os << llvm::formatv(getInfoDeclArg, storage, name);
-  }
-  os << getInfoDeclEnd;
-
-  const char *fieldStart = R"(
-  llvm::SmallVector<mlir::NamedAttribute, {0}> fields;
-)";
-  os << llvm::formatv(fieldStart, fields.size());
-
-  const char *getFieldInfo = R"(
-  assert({0});
-  auto {0}_id = mlir::Identifier::get("{0}", context);
-  fields.emplace_back({0}_id, {0});
-)";
-
-  for (auto field : fields) {
-    os << llvm::formatv(getFieldInfo, field.getName());
-  }
-
-  const char *getEndInfo = R"(
-  Attribute dict = mlir::DictionaryAttr::get(fields, context);
-  return dict.dyn_cast<{0}>();
-}
-)";
-  os << llvm::formatv(getEndInfo, structName);
-}
-
-static void emitClassofDef(llvm::StringRef structName,
-                           llvm::ArrayRef<mlir::tblgen::StructFieldAttr> fields,
-                           raw_ostream &os) {
-  const char *classofInfo = R"(
-bool {0}::classof(mlir::Attribute attr))";
-
-  const char *classofInfoHeader = R"(
-   auto derived = attr.dyn_cast<mlir::DictionaryAttr>();
-   if (!derived)
-     return false;
-   if (derived.size() != {0})
-     return false;
-)";
-
-  os << llvm::formatv(classofInfo, structName) << " {";
-  os << llvm::formatv(classofInfoHeader, fields.size());
-
-  const char *classofArgInfo = R"(
-  auto {0} = derived.get("{0}");
-  if (!{0} || !{0}.isa<{1}>())
-    return false;
-)";
-  for (auto field : fields) {
-    auto name = field.getName();
-    auto type = field.getType();
-    auto storage = type.getStorageType();
-    os << llvm::formatv(classofArgInfo, name, storage);
-  }
-
-  const char *classofEndInfo = R"(
-  return true;
-}
-)";
-  os << classofEndInfo;
-}
-
-static void
-emitAccessorDef(llvm::StringRef structName,
-                llvm::ArrayRef<mlir::tblgen::StructFieldAttr> fields,
-                raw_ostream &os) {
-  const char *fieldInfo = R"(
-{0} {2}::{1}() const {
-  auto derived = this->cast<mlir::DictionaryAttr>();
-  auto {1} = derived.get("{1}");
-  assert({1} && "attribute not found.");
-  assert({1}.isa<{0}>() && "incorrect Attribute type found.");
-  return {1}.cast<{0}>();
-}
-)";
-  for (auto field : fields) {
-    auto name = field.getName();
-    auto type = field.getType();
-    auto storage = type.getStorageType();
-    os << llvm::formatv(fieldInfo, storage, name, structName);
-  }
-}
-
-static void emitStructDef(const Record &structDef, raw_ostream &os) {
-  StructAttr structAttr(&structDef);
-  StringRef cppNamespace = structAttr.getCppNamespace();
-  StringRef structName = structAttr.getStructClassName();
-  mlir::tblgen::FmtContext ctx;
-  auto fields = structAttr.getAllFields();
-
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
-
-  emitFactoryDef(structName, fields, os);
-  emitClassofDef(structName, fields, os);
-  emitAccessorDef(structName, fields, os);
-
-  for (auto ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
-}
-
-static bool emitStructDefs(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  llvm::emitSourceFileHeader("Struct Utility Definitions", os);
-
-  auto defs = recordKeeper.getAllDerivedDefinitions("StructAttr");
-  for (const auto *def : defs)
-    emitStructDef(*def, os);
-
-  return false;
-}
-
-// Registers the struct utility generator to mlir-tblgen.
-static mlir::GenRegistration
-    genStructDecls("gen-struct-attr-decls",
-                   "Generate struct utility declarations",
-                   [](const RecordKeeper &records, raw_ostream &os) {
-                     return emitStructDecls(records, os);
-                   });
-
-// Registers the struct utility generator to mlir-tblgen.
-static mlir::GenRegistration
-    genStructDefs("gen-struct-attr-defs", "Generate struct utility definitions",
-                  [](const RecordKeeper &records, raw_ostream &os) {
-                    return emitStructDefs(records, os);
-                  });
diff --git a/third_party/mlir/tools/mlir-tblgen/mlir-tblgen.cpp b/third_party/mlir/tools/mlir-tblgen/mlir-tblgen.cpp
deleted file mode 100644
index 993a05d7095..00000000000
--- a/third_party/mlir/tools/mlir-tblgen/mlir-tblgen.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- mlir-tblgen.cpp - Top-Level TableGen implementation for MLIR -------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file contains the main function for MLIR's TableGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/TableGen/GenInfo.h"
-#include "mlir/TableGen/GenNameParser.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Main.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using namespace llvm;
-using namespace mlir;
-
-static llvm::ManagedStatic<std::vector<GenInfo>> generatorRegistry;
-
-mlir::GenRegistration::GenRegistration(StringRef arg, StringRef description,
-                                       GenFunction function) {
-  generatorRegistry->emplace_back(arg, description, function);
-}
-
-GenNameParser::GenNameParser(llvm::cl::Option &opt)
-    : llvm::cl::parser<const GenInfo *>(opt) {
-  for (const auto &kv : *generatorRegistry) {
-    addLiteralOption(kv.getGenArgument(), &kv, kv.getGenDescription());
-  }
-}
-
-void GenNameParser::printOptionInfo(const llvm::cl::Option &O,
-                                    size_t GlobalWidth) const {
-  GenNameParser *TP = const_cast<GenNameParser *>(this);
-  llvm::array_pod_sort(TP->Values.begin(), TP->Values.end(),
-                       [](const GenNameParser::OptionInfo *VT1,
-                          const GenNameParser::OptionInfo *VT2) {
-                         return VT1->Name.compare(VT2->Name);
-                       });
-  using llvm::cl::parser;
-  parser<const GenInfo *>::printOptionInfo(O, GlobalWidth);
-}
-
-// Generator that prints records.
-GenRegistration printRecords("print-records", "Print all records to stdout",
-                             [](const RecordKeeper &records, raw_ostream &os) {
-                               os << records;
-                               return false;
-                             });
-
-// Generator to invoke.
-const mlir::GenInfo *generator;
-
-// TableGenMain requires a function pointer so this function is passed in which
-// simply wraps the call to the generator.
-static bool MlirTableGenMain(raw_ostream &os, RecordKeeper &records) {
-  if (!generator) {
-    os << records;
-    return false;
-  }
-  return generator->invoke(records, os);
-}
-
-int main(int argc, char **argv) {
-  llvm::InitLLVM y(argc, argv);
-  llvm::cl::opt<const mlir::GenInfo *, false, mlir::GenNameParser> generator(
-      "", llvm::cl::desc("Generator to run"));
-  cl::ParseCommandLineOptions(argc, argv);
-  ::generator = generator.getValue();
-
-  return TableGenMain(argv[0], &MlirTableGenMain);
-}
diff --git a/third_party/mlir/tools/mlir-translate/CMakeLists.txt b/third_party/mlir/tools/mlir-translate/CMakeLists.txt
deleted file mode 100644
index 22b5ff35679..00000000000
--- a/third_party/mlir/tools/mlir-translate/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-set(LIBS
-  MLIRParser
-  MLIRPass
-  MLIRSPIRV
-  MLIRSPIRVSerialization
-  MLIRTargetLLVMIR
-  MLIRTargetNVVMIR
-  MLIRTargetROCDLIR
-  MLIRTranslation
-  MLIRSupport
-)
-add_llvm_executable(mlir-translate
-  mlir-translate.cpp
-)
-llvm_update_compile_flags(mlir-translate)
-whole_archive_link(mlir-translate ${LIBS})
-target_link_libraries(mlir-translate PRIVATE MLIRIR MLIRTranslateClParser ${LIBS} LLVMSupport)
diff --git a/third_party/mlir/tools/mlir-translate/mlir-translate.cpp b/third_party/mlir/tools/mlir-translate/mlir-translate.cpp
deleted file mode 100644
index b5622e3ecf8..00000000000
--- a/third_party/mlir/tools/mlir-translate/mlir-translate.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-//===- mlir-translate.cpp - MLIR Translate Driver -------------------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is a command line utility that translates a file from/to MLIR using one
-// of the registered translations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Support/FileUtilities.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Support/ToolUtilities.h"
-#include "mlir/Support/TranslateClParser.h"
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/ToolOutputFile.h"
-
-using namespace mlir;
-
-static llvm::cl::opt<std::string> inputFilename(llvm::cl::Positional,
-                                                llvm::cl::desc("<input file>"),
-                                                llvm::cl::init("-"));
-
-static llvm::cl::opt<std::string>
-    outputFilename("o", llvm::cl::desc("Output filename"),
-                   llvm::cl::value_desc("filename"), llvm::cl::init("-"));
-
-static llvm::cl::opt<bool>
-    splitInputFile("split-input-file",
-                   llvm::cl::desc("Split the input file into pieces and "
-                                  "process each chunk independently"),
-                   llvm::cl::init(false));
-
-static llvm::cl::opt<bool> verifyDiagnostics(
-    "verify-diagnostics",
-    llvm::cl::desc("Check that emitted diagnostics match "
-                   "expected-* lines on the corresponding line"),
-    llvm::cl::init(false));
-
-int main(int argc, char **argv) {
-  llvm::InitLLVM y(argc, argv);
-
-  // Add flags for all the registered translations.
-  llvm::cl::opt<const TranslateFunction *, false, TranslationParser>
-      translationRequested("", llvm::cl::desc("Translation to perform"),
-                           llvm::cl::Required);
-
-  llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR translation driver\n");
-
-  std::string errorMessage;
-  auto input = openInputFile(inputFilename, &errorMessage);
-  if (!input) {
-    llvm::errs() << errorMessage << "\n";
-    return 1;
-  }
-
-  auto output = openOutputFile(outputFilename, &errorMessage);
-  if (!output) {
-    llvm::errs() << errorMessage << "\n";
-    return 1;
-  }
-
-  // Processes the memory buffer with a new MLIRContext.
-  auto processBuffer = [&](std::unique_ptr<llvm::MemoryBuffer> ownedBuffer,
-                           raw_ostream &os) {
-    MLIRContext context;
-    llvm::SourceMgr sourceMgr;
-    sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), llvm::SMLoc());
-
-    if (!verifyDiagnostics) {
-      SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context);
-      return (*translationRequested)(sourceMgr, os, &context);
-    }
-
-    // In the diagnostic verification flow, we ignore whether the translation
-    // failed (in most cases, it is expected to fail). Instead, we check if the
-    // diagnostics were produced as expected.
-    SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, &context);
-    (*translationRequested)(sourceMgr, os, &context);
-    return sourceMgrHandler.verify();
-  };
-
-  if (splitInputFile) {
-    if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
-                                     output->os())))
-      return 1;
-  } else {
-    if (failed(processBuffer(std::move(input), output->os())))
-      return 1;
-  }
-
-  output->keep();
-  return 0;
-}
diff --git a/third_party/mlir/utils/emacs/mlir-mode.el b/third_party/mlir/utils/emacs/mlir-mode.el
deleted file mode 100644
index 636c5db9961..00000000000
--- a/third_party/mlir/utils/emacs/mlir-mode.el
+++ /dev/null
@@ -1,79 +0,0 @@
-;;; mlir-mode.el --- Major mode for the MLIR assembler language.
-
-;; Copyright (C) 2019 The MLIR Authors.
-;;
-;; Licensed under the Apache License, Version 2.0 (the "License");
-;; you may not use this file except in compliance with the License.
-;; You may obtain a copy of the License at
-;;
-;;      http://www.apache.org/licenses/LICENSE-2.0
-;;
-;; Unless required by applicable law or agreed to in writing, software
-;; distributed under the License is distributed on an "AS IS" BASIS,
-;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-;; See the License for the specific language governing permissions and
-;; limitations under the License.
-
-;;; Commentary:
-
-;; Major mode for editing MLIR files.
-
-;;; Code:
-
-(defvar mlir-mode-syntax-table
-  (let ((table (make-syntax-table)))
-    (modify-syntax-entry ?% "_" table)
-    (modify-syntax-entry ?@ "_" table)
-    (modify-syntax-entry ?# "_" table)
-    (modify-syntax-entry ?. "_" table)
-    (modify-syntax-entry ?/ ". 12" table)
-    (modify-syntax-entry ?\n "> " table)
-    table)
-  "Syntax table used while in MLIR mode.")
-
-(defvar mlir-font-lock-keywords
-  (list
-   ;; Variables
-   '("%[-a-zA-Z$._0-9]*" . font-lock-variable-name-face)
-   ;; Functions
-   '("@[-a-zA-Z$._0-9]*" . font-lock-function-name-face)
-   ;; Affinemaps
-   '("#[-a-zA-Z$._0-9]*" . font-lock-variable-name-face)
-   ;; Types
-   '("\\b\\(f16\\|bf16\\|f32\\|f64\\|index\\|tf_control\\|i[1-9][0-9]*\\)\\b" . font-lock-type-face)
-   '("\\b\\(tensor\\|vector\\|memref\\)\\b" . font-lock-type-face)
-   ;; Dimension lists
-   '("\\b\\([0-9?]+x\\)*\\(f16\\|bf16\\|f32\\|f64\\|index\\|i[1-9][0-9]*\\)\\b" . font-lock-preprocessor-face)
-   ;; Integer literals
-   '("\\b[-]?[0-9]+\\b" . font-lock-preprocessor-face)
-   ;; Floating point constants
-   '("\\b[-+]?[0-9]+.[0-9]*\\([eE][-+]?[0-9]+\\)?\\b" . font-lock-preprocessor-face)
-   ;; Hex constants
-   '("\\b0x[0-9A-Fa-f]+\\b" . font-lock-preprocessor-face)
-   ;; Keywords
-   `(,(regexp-opt
-       '(;; Toplevel entities
-         "br" "ceildiv" "func" "cond_br" "else" "extfunc" "false" "floordiv" "for" "if" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
-  "Syntax highlighting for MLIR.")
-
-;; Emacs 23 compatibility.
-(defalias 'mlir-mode-prog-mode
-  (if (fboundp 'prog-mode)
-      'prog-mode
-    'fundamental-mode))
-
-;;;###autoload
-(define-derived-mode mlir-mode mlir-mode-prog-mode "MLIR"
-  "Major mode for editing MLIR source files.
-\\{mlir-mode-map}
-  Runs `mlir-mode-hook' on startup."
-  (setq font-lock-defaults `(mlir-font-lock-keywords))
-  (setq-local comment-start "//"))
-
-;; Associate .mlir files with mlir-mode
-;;;###autoload
-(add-to-list 'auto-mode-alist (cons "\\.mlir\\'" 'mlir-mode))
-
-(provide 'mlir-mode)
-
-;;; mlir-mode.el ends here
diff --git a/third_party/mlir/utils/generate-test-checks.py b/third_party/mlir/utils/generate-test-checks.py
deleted file mode 100755
index 3bb4ffe4a4f..00000000000
--- a/third_party/mlir/utils/generate-test-checks.py
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/usr/bin/env python
-"""A script to generate FileCheck statements for mlir unit tests.
-
-This script is a utility to add FileCheck patterns to an mlir file.
-
-NOTE: The input .mlir is expected to be the output from the parser, not a
-stripped down variant.
-
-Example usage:
-$ generate-test-checks.py foo.mlir
-$ mlir-opt foo.mlir -transformation | generate-test-checks.py
-
-The script will heuristically insert CHECK/CHECK-LABEL commands for each line
-within the file. By default this script will also try to insert string
-substitution blocks for all SSA value names. The script is designed to make
-adding checks to a test case fast, it is *not* designed to be authoritative
-about what constitutes a good test!
-"""
-
-# Copyright 2019 The MLIR Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os  # Used to advertise this file's name ("autogenerated_note").
-import re
-import sys
-
-ADVERT = '// NOTE: Assertions have been autogenerated by '
-
-# Regex command to match an SSA identifier.
-SSA_RE_STR = '[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*'
-SSA_RE = re.compile(SSA_RE_STR)
-
-
-# Class used to generate and manage string substitution blocks for SSA value
-# names.
-class SSAVariableNamer:
-
-  def __init__(self):
-    self.scopes = []
-    self.name_counter = 0
-
-  # Generate a substitution name for the given ssa value name.
-  def generate_name(self, ssa_name):
-    variable = 'VAL_' + str(self.name_counter)
-    self.name_counter += 1
-    self.scopes[-1][ssa_name] = variable
-    return variable
-
-  # Push a new variable name scope.
-  def push_name_scope(self):
-    self.scopes.append({})
-
-  # Pop the last variable name scope.
-  def pop_name_scope(self):
-    self.scopes.pop()
-
-
-# Process a line of input that has been split at each SSA identifier '%'.
-def process_line(line_chunks, variable_namer):
-  output_line = ''
-
-  # Process the rest that contained an SSA value name.
-  for chunk in line_chunks:
-    m = SSA_RE.match(chunk)
-    ssa_name = m.group(0)
-
-    # Check if an existing variable exists for this name.
-    variable = None
-    for scope in variable_namer.scopes:
-      variable = scope.get(ssa_name)
-      if variable is not None:
-        break
-
-    # If one exists, then output the existing name.
-    if variable is not None:
-      output_line += '[[' + variable + ']]'
-    else:
-      # Otherwise, generate a new variable.
-      variable = variable_namer.generate_name(ssa_name)
-      output_line += '[[' + variable + ':%.*]]'
-
-    # Append the non named group.
-    output_line += chunk[len(ssa_name):]
-
-  return output_line + '\n'
-
-
-# Pre-process a line of input to remove any character sequences that will be
-# problematic with FileCheck.
-def preprocess_line(line):
-  # Replace any double brackets, '[[' with escaped replacements. '[['
-  # corresponds to variable names in FileCheck.
-  output_line = line.replace('[[', '{{\\[\\[}}')
-
-  # Replace any single brackets that are followed by an SSA identifier, the
-  # identifier will be replace by a variable; Creating the same situation as
-  # above.
-  output_line = output_line.replace('[%', '{{\\[}}%')
-
-  return output_line
-
-
-def main():
-  parser = argparse.ArgumentParser(
-      description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
-  parser.add_argument(
-      '--check-prefix', default='CHECK', help='Prefix to use from check file.')
-  parser.add_argument(
-      '-o',
-      '--output',
-      nargs='?',
-      type=argparse.FileType('w'),
-      default=sys.stdout)
-  parser.add_argument(
-      'input',
-      nargs='?',
-      type=argparse.FileType('r'),
-      default=sys.stdin)
-  args = parser.parse_args()
-
-  # Open the given input file.
-  input_lines = [l.rstrip() for l in args.input]
-  args.input.close()
-
-  output_lines = []
-
-  # Generate a note used for the generated check file.
-  script_name = os.path.basename(__file__)
-  autogenerated_note = (ADVERT + 'utils/' + script_name)
-  output_lines.append(autogenerated_note + '\n')
-
-  # A map containing data used for naming SSA value names.
-  variable_namer = SSAVariableNamer()
-  for input_line in input_lines:
-    if not input_line:
-      continue
-    lstripped_input_line = input_line.lstrip()
-
-    # Lines with blocks begin with a ^. These lines have a trailing comment
-    # that needs to be stripped.
-    is_block = lstripped_input_line[0] == '^'
-    if is_block:
-      input_line = input_line.rsplit('//', 1)[0].rstrip()
-
-    # Top-level operations are heuristically the operations at nesting level 1.
-    is_toplevel_op = (not is_block and input_line.startswith('  ') and
-                      input_line[2] != ' ' and input_line[2] != '}')
-
-    # If the line starts with a '}', pop the last name scope.
-    if lstripped_input_line[0] == '}':
-      variable_namer.pop_name_scope()
-
-    # If the line ends with a '{', push a new name scope.
-    if input_line[-1] == '{':
-      variable_namer.push_name_scope()
-
-    # Preprocess the input to remove any sequences that may be problematic with
-    # FileCheck.
-    input_line = preprocess_line(input_line)
-
-    # Split the line at the each SSA value name.
-    ssa_split = input_line.split('%')
-
-    # If this is a top-level operation use 'CHECK-LABEL', otherwise 'CHECK:'.
-    if not is_toplevel_op or not ssa_split[0]:
-      output_line = '// ' + args.check_prefix + ': '
-      # Pad to align with the 'LABEL' statements.
-      output_line += (' ' * len('-LABEL'))
-
-      # Output the first line chunk that does not contain an SSA name.
-      output_line += ssa_split[0]
-
-      # Process the rest of the input line.
-      output_line += process_line(ssa_split[1:], variable_namer)
-
-    else:
-      # Append a newline to the output to separate the logical blocks.
-      output_lines.append('\n')
-      output_line = '// ' + args.check_prefix + '-LABEL: '
-
-      # Output the first line chunk that does not contain an SSA name for the
-      # label.
-      output_line += ssa_split[0] + '\n'
-
-      # Process the rest of the input line on a separate check line.
-      if len(ssa_split) > 1:
-        output_line += '// ' + args.check_prefix + '-SAME:  '
-
-        # Pad to align with the original position in the line.
-        output_line += ' ' * len(ssa_split[0])
-
-        # Process the rest of the line.
-        output_line += process_line(ssa_split[1:], variable_namer)
-
-    # Append the output line.
-    output_lines.append(output_line)
-
-  # Write the output.
-  for output_line in output_lines:
-    args.output.write(output_line)
-  args.output.write('\n')
-  args.output.close()
-
-
-if __name__ == '__main__':
-  main()
diff --git a/third_party/mlir/utils/spirv/define_enum.sh b/third_party/mlir/utils/spirv/define_enum.sh
deleted file mode 100755
index 9da898f7d4c..00000000000
--- a/third_party/mlir/utils/spirv/define_enum.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 The MLIR Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Script for defining a new enum attr using SPIR-V spec from the Internet.
-#
-# Run as:
-# ./define_enum.sh <enum-class-name>
-#
-# The 'operand_kinds' dict of spirv.core.grammar.json contains all supported
-# SPIR-V enum classes.
-#
-# If <enum-name> is missing, this script updates existing ones.
-
-set -e
-
-new_enum=$1
-
-current_file="$(readlink -f "$0")"
-current_dir="$(dirname "$current_file")"
-
-python3 ${current_dir}/gen_spirv_dialect.py \
-  --base-td-path ${current_dir}/../../include/mlir/Dialect/SPIRV/SPIRVBase.td \
-  --new-enum "${new_enum}"
diff --git a/third_party/mlir/utils/spirv/define_inst.sh b/third_party/mlir/utils/spirv/define_inst.sh
deleted file mode 100755
index 3508c4f9b4f..00000000000
--- a/third_party/mlir/utils/spirv/define_inst.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-# Copyright 2019 The MLIR Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Script for defining a new op using SPIR-V spec from the Internet.
-#
-# Run as:
-# ./define_inst.sh <filename> <inst_category> (<opname>)*
-
-# <filename> is required, which is the file name of MLIR SPIR-V op definitions
-# spec.
-# <inst_category> is required. It can be one of
-# (Op|ArithmeticOp|LogicalOp|ControlFlowOp|StructureOp). Based on the
-# inst_category the file SPIRV<inst_category>s.td is updated with the
-# instruction definition. If <opname> is missing, this script updates existing
-# ones in SPIRV<inst_category>s.td
-
-# For example:
-# ./define_inst.sh SPIRVArithmeticOps.td ArithmeticOp OpIAdd
-# ./define_inst.sh SPIRVLogicalOps.td LogicalOp OpFOrdEqual
-set -e
-
-file_name=$1
-inst_category=$2
-
-case $inst_category in
-  Op | ArithmeticOp | LogicalOp | CastOp | ControlFlowOp | StructureOp)
-  ;;
-  *)
-    echo "Usage : " $0 "<filename> <inst_category> (<opname>)*"
-    echo "<filename> is the file name of MLIR SPIR-V op definitions spec"
-    echo "<inst_category> must be one of " \
-      "(Op|ArithmeticOp|LogicalOp|CastOp|ControlFlowOp|StructureOp)"
-    exit 1;
-  ;;
-esac
-
-shift
-shift
-
-current_file="$(readlink -f "$0")"
-current_dir="$(dirname "$current_file")"
-
-python3 ${current_dir}/gen_spirv_dialect.py \
-  --op-td-path \
-  ${current_dir}/../../include/mlir/Dialect/SPIRV/${file_name} \
-  --inst-category $inst_category --new-inst "$@"
-
-${current_dir}/define_opcodes.sh "$@"
-
diff --git a/third_party/mlir/utils/spirv/define_opcodes.sh b/third_party/mlir/utils/spirv/define_opcodes.sh
deleted file mode 100755
index 05c36571115..00000000000
--- a/third_party/mlir/utils/spirv/define_opcodes.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 The MLIR Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Script for defining map for opname to opcode using SPIR-V spec from the
-# Internet
-#
-# Run as:
-# ./define_opcode.sh (<op-name>)*
-#
-# For example:
-# ./define_opcode.sh OpTypeVoid OpTypeFunction
-#
-# If no op-name is specified, the existing opcodes are updated
-#
-# The 'instructions' list of spirv.core.grammar.json contains all instructions
-# in SPIR-V
-
-set -e
-
-current_file="$(readlink -f "$0")"
-current_dir="$(dirname "$current_file")"
-
-python3 ${current_dir}/gen_spirv_dialect.py \
-  --base-td-path ${current_dir}/../../include/mlir/Dialect/SPIRV/SPIRVBase.td \
-  --new-opcode $@
diff --git a/third_party/mlir/utils/spirv/gen_spirv_dialect.py b/third_party/mlir/utils/spirv/gen_spirv_dialect.py
deleted file mode 100755
index bf4886dfd51..00000000000
--- a/third_party/mlir/utils/spirv/gen_spirv_dialect.py
+++ /dev/null
@@ -1,738 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Copyright 2019 The MLIR Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Script for updating SPIR-V dialect by scraping information from SPIR-V
-# HTML and JSON specs from the Internet.
-#
-# For example, to define the enum attribute for SPIR-V memory model:
-#
-# ./gen_spirv_dialect.py --base_td_path /path/to/SPIRVBase.td \
-#                        --new-enum MemoryModel
-#
-# The 'operand_kinds' dict of spirv.core.grammar.json contains all supported
-# SPIR-V enum classes.
-
-import itertools
-import re
-import requests
-import textwrap
-
-SPIRV_HTML_SPEC_URL = 'https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html'
-SPIRV_JSON_SPEC_URL = 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Headers/master/include/spirv/unified1/spirv.core.grammar.json'
-
-AUTOGEN_OP_DEF_SEPARATOR = '\n// -----\n\n'
-AUTOGEN_ENUM_SECTION_MARKER = 'enum section. Generated from SPIR-V spec; DO NOT MODIFY!'
-AUTOGEN_OPCODE_SECTION_MARKER = (
-    'opcode section. Generated from SPIR-V spec; DO NOT MODIFY!')
-
-
-def get_spirv_doc_from_html_spec():
-  """Extracts instruction documentation from SPIR-V HTML spec.
-
-  Returns:
-    - A dict mapping from instruction opcode to documentation.
-  """
-  response = requests.get(SPIRV_HTML_SPEC_URL)
-  spec = response.content
-
-  from bs4 import BeautifulSoup
-  spirv = BeautifulSoup(spec, 'html.parser')
-
-  section_anchor = spirv.find('h3', {'id': '_a_id_instructions_a_instructions'})
-
-  doc = {}
-
-  for section in section_anchor.parent.find_all('div', {'class': 'sect3'}):
-    for table in section.find_all('table'):
-      inst_html = table.tbody.tr.td.p
-      opname = inst_html.a['id']
-      # Ignore the first line, which is just the opname.
-      doc[opname] = inst_html.text.split('\n', 1)[1].strip()
-
-  return doc
-
-
-def get_spirv_grammar_from_json_spec():
-  """Extracts operand kind and instruction grammar from SPIR-V JSON spec.
-
-  Returns:
-    - A list containing all operand kinds' grammar
-    - A list containing all instructions' grammar
-  """
-  response = requests.get(SPIRV_JSON_SPEC_URL)
-  spec = response.content
-
-  import json
-  spirv = json.loads(spec)
-
-  return spirv['operand_kinds'], spirv['instructions']
-
-
-def split_list_into_sublists(items, offset):
-  """Split the list of items into multiple sublists.
-
-  This is to make sure the string composed from each sublist won't exceed
-  80 characters.
-
-  Arguments:
-    - items: a list of strings
-    - offset: the offset in calculating each sublist's length
-  """
-  chuncks = []
-  chunk = []
-  chunk_len = 0
-
-  for item in items:
-    chunk_len += len(item) + 2
-    if chunk_len > 80:
-      chuncks.append(chunk)
-      chunk = []
-      chunk_len = len(item) + 2
-    chunk.append(item)
-
-  if len(chunk) != 0:
-    chuncks.append(chunk)
-
-  return chuncks
-
-
-def uniquify_enum_cases(lst):
-  """Prunes duplicate enum cases from the list.
-
-  Arguments:
-   - lst: List whose elements are to be uniqued. Assumes each element is a
-     (symbol, value) pair and elements already sorted according to value.
-
-  Returns:
-   - A list with all duplicates removed. The elements are sorted according to
-     value and, for each value, uniqued according to symbol.
-     original list,
-  """
-  cases = lst
-  uniqued_cases = []
-
-  # First sort according to the value
-  cases.sort(key=lambda x: x[1])
-
-  # Then group them according to the value
-  for _, groups in itertools.groupby(cases, key=lambda x: x[1]):
-    # For each value, sort according to the enumerant symbol.
-    sorted_group = sorted(groups, key=lambda x: x[0])
-    # Keep the "smallest" case, which is typically the symbol without extension
-    # suffix. But we have special cases that we want to fix.
-    case = sorted_group[0]
-    if case[0] == 'HlslSemanticGOOGLE':
-      case = sorted_group[1]
-    uniqued_cases.append(case)
-
-  return uniqued_cases
-
-
-def gen_operand_kind_enum_attr(operand_kind):
-  """Generates the TableGen EnumAttr definition for the given operand kind.
-
-  Returns:
-    - The operand kind's name
-    - A string containing the TableGen EnumAttr definition
-  """
-  if 'enumerants' not in operand_kind:
-    return '', ''
-
-  # Returns a symbol for the given case in the given kind. This function
-  # handles Dim specially to avoid having numbers as the start of symbols,
-  # which does not play well with C++ and the MLIR parser.
-  def get_case_symbol(kind_name, case_name):
-    if kind_name == 'Dim':
-      if case_name == '1D' or case_name == '2D' or case_name == '3D':
-        return 'Dim{}'.format(case_name)
-    return case_name
-
-  kind_name = operand_kind['kind']
-  is_bit_enum = operand_kind['category'] == 'BitEnum'
-  kind_category = 'Bit' if is_bit_enum else 'I32'
-  kind_acronym = ''.join([c for c in kind_name if c >= 'A' and c <= 'Z'])
-  kind_cases = [(case['enumerant'], case['value'])
-                for case in operand_kind['enumerants']]
-  kind_cases = uniquify_enum_cases(kind_cases)
-  max_len = max([len(symbol) for (symbol, _) in kind_cases])
-
-  # Generate the definition for each enum case
-  fmt_str = 'def SPV_{acronym}_{case} {colon:>{offset}} '\
-            '{category}EnumAttrCase<"{symbol}", {value}>;'
-  case_defs = [
-      fmt_str.format(
-          category=kind_category,
-          acronym=kind_acronym,
-          case=case[0],
-          symbol=get_case_symbol(kind_name, case[0]),
-          value=case[1],
-          colon=':',
-          offset=(max_len + 1 - len(case[0]))) for case in kind_cases
-  ]
-  case_defs = '\n'.join(case_defs)
-
-  # Generate the list of enum case names
-  fmt_str = 'SPV_{acronym}_{symbol}';
-  case_names = [fmt_str.format(acronym=kind_acronym,symbol=case[0])
-                for case in kind_cases]
-
-  # Split them into sublists and concatenate into multiple lines
-  case_names = split_list_into_sublists(case_names, 6)
-  case_names = ['{:6}'.format('') + ', '.join(sublist)
-                for sublist in case_names]
-  case_names = ',\n'.join(case_names)
-
-  # Generate the enum attribute definition
-  enum_attr = 'def SPV_{name}Attr :\n    '\
-      '{category}EnumAttr<"{name}", "valid SPIR-V {name}", [\n{cases}\n'\
-      '    ]> {{\n'\
-      '  let cppNamespace = "::mlir::spirv";\n}}'.format(
-          name=kind_name, category=kind_category, cases=case_names)
-  return kind_name, case_defs + '\n\n' + enum_attr
-
-
-def gen_opcode(instructions):
-  """ Generates the TableGen definition to map opname to opcode
-
-  Returns:
-    - A string containing the TableGen SPV_OpCode definition
-  """
-
-  max_len = max([len(inst['opname']) for inst in instructions])
-  def_fmt_str = 'def SPV_OC_{name} {colon:>{offset}} '\
-            'I32EnumAttrCase<"{name}", {value}>;'
-  opcode_defs = [
-      def_fmt_str.format(
-          name=inst['opname'],
-          value=inst['opcode'],
-          colon=':',
-          offset=(max_len + 1 - len(inst['opname']))) for inst in instructions
-  ]
-  opcode_str = '\n'.join(opcode_defs)
-
-  decl_fmt_str = 'SPV_OC_{name}'
-  opcode_list = [
-      decl_fmt_str.format(name=inst['opname']) for inst in instructions
-  ]
-  opcode_list = split_list_into_sublists(opcode_list, 6)
-  opcode_list = [
-      '{:6}'.format('') + ', '.join(sublist) for sublist in opcode_list
-  ]
-  opcode_list = ',\n'.join(opcode_list)
-  enum_attr = 'def SPV_OpcodeAttr :\n'\
-              '    I32EnumAttr<"{name}", "valid SPIR-V instructions", [\n'\
-              '{lst}\n'\
-              '      ]> {{\n'\
-              '    let cppNamespace = "::mlir::spirv";\n}}'.format(
-                  name='Opcode', lst=opcode_list)
-  return opcode_str + '\n\n' + enum_attr
-
-
-def update_td_opcodes(path, instructions, filter_list):
-  """Updates SPIRBase.td with new generated opcode cases.
-
-  Arguments:
-    - path: the path to SPIRBase.td
-    - instructions: a list containing all SPIR-V instructions' grammar
-    - filter_list: a list containing new opnames to add
-  """
-
-  with open(path, 'r') as f:
-    content = f.read()
-
-  content = content.split(AUTOGEN_OPCODE_SECTION_MARKER)
-  assert len(content) == 3
-
-  # Extend opcode list with existing list
-  existing_opcodes = [k[11:] for k in re.findall('def SPV_OC_\w+', content[1])]
-  filter_list.extend(existing_opcodes)
-  filter_list = list(set(filter_list))
-
-  # Generate the opcode for all instructions in SPIR-V
-  filter_instrs = list(
-      filter(lambda inst: (inst['opname'] in filter_list), instructions))
-  # Sort instruction based on opcode
-  filter_instrs.sort(key=lambda inst: inst['opcode'])
-  opcode = gen_opcode(filter_instrs)
-
-  # Substitute the opcode
-  content = content[0] + AUTOGEN_OPCODE_SECTION_MARKER + '\n\n' + \
-        opcode + '\n\n// End ' + AUTOGEN_OPCODE_SECTION_MARKER \
-        + content[2]
-
-  with open(path, 'w') as f:
-    f.write(content)
-
-
-def update_td_enum_attrs(path, operand_kinds, filter_list):
-  """Updates SPIRBase.td with new generated enum definitions.
-
-  Arguments:
-    - path: the path to SPIRBase.td
-    - operand_kinds: a list containing all operand kinds' grammar
-    - filter_list: a list containing new enums to add
-  """
-  with open(path, 'r') as f:
-    content = f.read()
-
-  content = content.split(AUTOGEN_ENUM_SECTION_MARKER)
-  assert len(content) == 3
-
-  # Extend filter list with existing enum definitions
-  existing_kinds = [
-      k[8:-4] for k in re.findall('def SPV_\w+Attr', content[1])]
-  filter_list.extend(existing_kinds)
-
-  # Generate definitions for all enums in filter list
-  defs = [gen_operand_kind_enum_attr(kind)
-          for kind in operand_kinds if kind['kind'] in filter_list]
-  # Sort alphabetically according to enum name
-  defs.sort(key=lambda enum : enum[0])
-  # Only keep the definitions from now on
-  # Put Capability's definition at the very beginning because capability cases
-  # will be referenced later
-  defs = [enum[1] for enum in defs if enum[0] == 'Capability'
-         ] + [enum[1] for enum in defs if enum[0] != 'Capability']
-
-  # Substitute the old section
-  content = content[0] + AUTOGEN_ENUM_SECTION_MARKER + '\n\n' + \
-      '\n\n'.join(defs) + "\n\n// End " + AUTOGEN_ENUM_SECTION_MARKER  \
-      + content[2];
-
-  with open(path, 'w') as f:
-    f.write(content)
-
-
-def snake_casify(name):
-  """Turns the given name to follow snake_case convention."""
-  name = re.sub('\W+', '', name).split()
-  name = [s.lower() for s in name]
-  return '_'.join(name)
-
-
-def map_spec_operand_to_ods_argument(operand):
-  """Maps a operand in SPIR-V JSON spec to an op argument in ODS.
-
-  Arguments:
-    - A dict containing the operand's kind, quantifier, and name
-
-  Returns:
-    - A string containing both the type and name for the argument
-  """
-  kind = operand['kind']
-  quantifier = operand.get('quantifier', '')
-
-  # These instruction "operands" are for encoding the results; they should
-  # not be handled here.
-  assert kind != 'IdResultType', 'unexpected to handle "IdResultType" kind'
-  assert kind != 'IdResult', 'unexpected to handle "IdResult" kind'
-
-  if kind == 'IdRef':
-    if quantifier == '':
-      arg_type = 'SPV_Type'
-    elif quantifier == '?':
-      arg_type = 'SPV_Optional<SPV_Type>'
-    else:
-      arg_type = 'Variadic<SPV_Type>'
-  elif kind == 'IdMemorySemantics' or kind == 'IdScope':
-    # TODO(antiagainst): Need to further constrain 'IdMemorySemantics'
-    # and 'IdScope' given that they should be generated from OpConstant.
-    assert quantifier == '', ('unexpected to have optional/variadic memory '
-                              'semantics or scope <id>')
-    arg_type = 'I32'
-  elif kind == 'LiteralInteger':
-    if quantifier == '':
-      arg_type = 'I32Attr'
-    elif quantifier == '?':
-      arg_type = 'OptionalAttr<I32Attr>'
-    else:
-      arg_type = 'OptionalAttr<I32ArrayAttr>'
-  elif kind == 'LiteralString' or \
-      kind == 'LiteralContextDependentNumber' or \
-      kind == 'LiteralExtInstInteger' or \
-      kind == 'LiteralSpecConstantOpInteger' or \
-      kind == 'PairLiteralIntegerIdRef' or \
-      kind == 'PairIdRefLiteralInteger' or \
-      kind == 'PairIdRefIdRef':
-    assert False, '"{}" kind unimplemented'.format(kind)
-  else:
-    # The rest are all enum operands that we represent with op attributes.
-    assert quantifier != '*', 'unexpected to have variadic enum attribute'
-    arg_type = 'SPV_{}Attr'.format(kind)
-    if quantifier == '?':
-      arg_type = 'OptionalAttr<{}>'.format(arg_type)
-
-  name = operand.get('name', '')
-  name = snake_casify(name) if name else kind.lower()
-
-  return '{}:${}'.format(arg_type, name)
-
-
-def get_description(text, assembly):
-  """Generates the description for the given SPIR-V instruction.
-
-  Arguments:
-    - text: Textual description of the operation as string.
-    - assembly: Custom Assembly format with example as string.
-
-  Returns:
-    - A string that corresponds to the description of the Tablegen op.
-  """
-  fmt_str = ('{text}\n\n    ### Custom assembly ' 'form\n{assembly}\n  ')
-  return fmt_str.format(
-      text=text, assembly=assembly)
-
-
-def get_op_definition(instruction, doc, existing_info):
-  """Generates the TableGen op definition for the given SPIR-V instruction.
-
-  Arguments:
-    - instruction: the instruction's SPIR-V JSON grammar
-    - doc: the instruction's SPIR-V HTML doc
-    - existing_info: a dict containing potential manually specified sections for
-      this instruction
-
-  Returns:
-    - A string containing the TableGen op definition
-  """
-  fmt_str = ('def SPV_{opname}Op : '
-             'SPV_{inst_category}<"{opname}"{category_args}[{traits}]> '
-             '{{\n  let summary = {summary};\n\n  let description = '
-             '[{{\n{description}}}];\n')
-  inst_category = existing_info.get('inst_category', 'Op')
-  if inst_category == 'Op':
-    fmt_str +='\n  let arguments = (ins{args});\n\n'\
-              '  let results = (outs{results});\n'
-
-  fmt_str +='{extras}'\
-            '}}\n'
-
-  opname = instruction['opname'][2:]
-  category_args = existing_info.get('category_args', '')
-  # Make sure we have ', ' to separate the category arguments from traits
-  category_args = category_args.rstrip(', ') + ', '
-
-  if '\n' in doc:
-    summary, text = doc.split('\n', 1)
-  else:
-    summary = doc
-    text = ''
-  wrapper = textwrap.TextWrapper(
-      width=76, initial_indent='    ', subsequent_indent='    ')
-
-  # Format summary. If the summary can fit in the same line, we print it out
-  # as a "-quoted string; otherwise, wrap the lines using "[{...}]".
-  summary = summary.strip();
-  if len(summary) + len('  let summary = "";') <= 80:
-    summary = '"{}"'.format(summary)
-  else:
-    summary = '[{{\n{}\n  }}]'.format(wrapper.fill(summary))
-
-  # Wrap text
-  text = text.split('\n')
-  text = [wrapper.fill(line) for line in text if line]
-  text = '\n\n'.join(text)
-
-  operands = instruction.get('operands', [])
-
-  # Set op's result
-  results = ''
-  if len(operands) > 0 and operands[0]['kind'] == 'IdResultType':
-    results = '\n    SPV_Type:$result\n  '
-    operands = operands[1:]
-  if 'results' in existing_info:
-    results = existing_info['results']
-
-  # Ignore the operand standing for the result <id>
-  if len(operands) > 0 and operands[0]['kind'] == 'IdResult':
-    operands = operands[1:]
-
-  # Set op' argument
-  arguments = existing_info.get('arguments', None)
-  if arguments is None:
-    arguments = [map_spec_operand_to_ods_argument(o) for o in operands]
-    arguments = ',\n    '.join(arguments)
-    if arguments:
-      # Prepend and append whitespace for formatting
-      arguments = '\n    {}\n  '.format(arguments)
-
-  description = existing_info.get('description', None)
-  if description is None:
-    assembly = '\n    ```\n'\
-               '    [TODO]\n'\
-               '    ```\n\n'\
-               '    For example:\n\n'\
-               '    ```\n'\
-               '    [TODO]\n' \
-               '    ```'
-    description = get_description(text, assembly)
-
-  return fmt_str.format(
-      opname=opname,
-      category_args=category_args,
-      inst_category=inst_category,
-      traits=existing_info.get('traits', ''),
-      summary=summary,
-      description=description,
-      args=arguments,
-      results=results,
-      extras=existing_info.get('extras', ''))
-
-
-def get_string_between(base, start, end):
-  """Extracts a substring with a specified start and end from a string.
-
-  Arguments:
-    - base: string to extract from.
-    - start: string to use as the start of the substring.
-    - end: string to use as the end of the substring.
-
-  Returns:
-    - The substring if found
-    - The part of the base after end of the substring. Is the base string itself
-      if the substring wasnt found.
-  """
-  split = base.split(start, 1)
-  if len(split) == 2:
-    rest = split[1].split(end, 1)
-    assert len(rest) == 2, \
-           'cannot find end "{end}" while extracting substring '\
-           'starting with {start}'.format(start=start, end=end)
-    return rest[0].rstrip(end), rest[1]
-  return '', split[0]
-
-
-def get_string_between_nested(base, start, end):
-  """Extracts a substring with a nested start and end from a string.
-
-  Arguments:
-    - base: string to extract from.
-    - start: string to use as the start of the substring.
-    - end: string to use as the end of the substring.
-
-  Returns:
-    - The substring if found
-    - The part of the base after end of the substring. Is the base string itself
-      if the substring wasnt found.
-  """
-  split = base.split(start, 1)
-  if len(split) == 2:
-    # Handle nesting delimiters
-    rest = split[1]
-    unmatched_start = 1
-    index = 0
-    while unmatched_start > 0 and index < len(rest):
-      if rest[index:].startswith(end):
-        unmatched_start -= 1
-        index += len(end)
-      elif rest[index:].startswith(start):
-        unmatched_start += 1
-        index += len(start)
-      else:
-        index += 1
-
-    assert index < len(rest), \
-           'cannot find end "{end}" while extracting substring '\
-           'starting with "{start}"'.format(start=start, end=end)
-    return rest[:index - len(end)].rstrip(end), rest[index:]
-  return '', split[0]
-
-
-def extract_td_op_info(op_def):
-  """Extracts potentially manually specified sections in op's definition.
-
-  Arguments: - A string containing the op's TableGen definition
-    - doc: the instruction's SPIR-V HTML doc
-
-  Returns:
-    - A dict containing potential manually specified sections
-  """
-  # Get opname
-  opname = [o[8:-2] for o in re.findall('def SPV_\w+Op', op_def)]
-  assert len(opname) == 1, 'more than one ops in the same section!'
-  opname = opname[0]
-
-  # Get instruction category
-  inst_category = [
-      o[4:] for o in re.findall('SPV_\w+Op',
-                                op_def.split(':', 1)[1])
-  ]
-  assert len(inst_category) <= 1, 'more than one ops in the same section!'
-  inst_category = inst_category[0] if len(inst_category) == 1 else 'Op'
-
-  # Get category_args
-  op_tmpl_params = get_string_between_nested(op_def, '<', '>')[0]
-  opstringname, rest = get_string_between(op_tmpl_params, '"', '"')
-  category_args = rest.split('[', 1)[0]
-
-  # Get traits
-  traits, _ = get_string_between(rest, '[', ']')
-
-  # Get description
-  description, rest = get_string_between(op_def, 'let description = [{\n',
-                                         '}];\n')
-
-  # Get arguments
-  args, rest = get_string_between(rest, '  let arguments = (ins', ');\n')
-
-  # Get results
-  results, rest = get_string_between(rest, '  let results = (outs', ');\n')
-
-  extras = rest.strip(' }\n')
-  if extras:
-    extras = '\n  {}\n'.format(extras)
-
-  return {
-      # Prefix with 'Op' to make it consistent with SPIR-V spec
-      'opname': 'Op{}'.format(opname),
-      'inst_category': inst_category,
-      'category_args': category_args,
-      'traits': traits,
-      'description': description,
-      'arguments': args,
-      'results': results,
-      'extras': extras
-  }
-
-
-def update_td_op_definitions(path, instructions, docs, filter_list,
-                             inst_category):
-  """Updates SPIRVOps.td with newly generated op definition.
-
-  Arguments:
-    - path: path to SPIRVOps.td
-    - instructions: SPIR-V JSON grammar for all instructions
-    - docs: SPIR-V HTML doc for all instructions
-    - filter_list: a list containing new opnames to include
-
-  Returns:
-    - A string containing all the TableGen op definitions
-  """
-  with open(path, 'r') as f:
-    content = f.read()
-
-  # Split the file into chuncks, each containing one op.
-  ops = content.split(AUTOGEN_OP_DEF_SEPARATOR)
-  header = ops[0]
-  footer = ops[-1]
-  ops = ops[1:-1]
-
-  # For each existing op, extract the manually-written sections out to retain
-  # them when re-generating the ops. Also append the existing ops to filter
-  # list.
-  name_op_map = {}  # Map from opname to its existing ODS definition
-  op_info_dict = {}
-  for op in ops:
-    info_dict = extract_td_op_info(op)
-    opname = info_dict['opname']
-    name_op_map[opname] = op
-    op_info_dict[opname] = info_dict
-    filter_list.append(opname)
-  filter_list = sorted(list(set(filter_list)))
-
-  op_defs = []
-  for opname in filter_list:
-    # Find the grammar spec for this op
-    try:
-      instruction = next(
-          inst for inst in instructions if inst['opname'] == opname)
-      op_defs.append(
-          get_op_definition(instruction, docs[opname],
-                            op_info_dict.get(opname, {})))
-    except StopIteration:
-      # This is an op added by us; use the existing ODS definition.
-      op_defs.append(name_op_map[opname])
-
-  # Substitute the old op definitions
-  op_defs = [header] + op_defs + [footer]
-  content = AUTOGEN_OP_DEF_SEPARATOR.join(op_defs)
-
-  with open(path, 'w') as f:
-    f.write(content)
-
-
-if __name__ == '__main__':
-  import argparse
-
-  cli_parser = argparse.ArgumentParser(
-      description='Update SPIR-V dialect definitions using SPIR-V spec')
-
-  cli_parser.add_argument(
-      '--base-td-path',
-      dest='base_td_path',
-      type=str,
-      default=None,
-      help='Path to SPIRVBase.td')
-  cli_parser.add_argument(
-      '--op-td-path',
-      dest='op_td_path',
-      type=str,
-      default=None,
-      help='Path to SPIRVOps.td')
-
-  cli_parser.add_argument(
-      '--new-enum',
-      dest='new_enum',
-      type=str,
-      default=None,
-      help='SPIR-V enum to be added to SPIRVBase.td')
-  cli_parser.add_argument(
-      '--new-opcodes',
-      dest='new_opcodes',
-      type=str,
-      default=None,
-      nargs='*',
-      help='update SPIR-V opcodes in SPIRVBase.td')
-  cli_parser.add_argument(
-      '--new-inst',
-      dest='new_inst',
-      type=str,
-      default=None,
-      nargs='*',
-      help='SPIR-V instruction to be added to ops file')
-  cli_parser.add_argument(
-      '--inst-category',
-      dest='inst_category',
-      type=str,
-      default='Op',
-      help='SPIR-V instruction category used for choosing '\
-           'the TableGen base class to define this op')
-
-  args = cli_parser.parse_args()
-
-  operand_kinds, instructions = get_spirv_grammar_from_json_spec()
-
-  # Define new enum attr
-  if args.new_enum is not None:
-    assert args.base_td_path is not None
-    filter_list = [args.new_enum] if args.new_enum else []
-    update_td_enum_attrs(args.base_td_path, operand_kinds, filter_list)
-
-  # Define new opcode
-  if args.new_opcodes is not None:
-    assert args.base_td_path is not None
-    update_td_opcodes(args.base_td_path, instructions, args.new_opcodes)
-
-  # Define new op
-  if args.new_inst is not None:
-    assert args.op_td_path is not None
-    docs = get_spirv_doc_from_html_spec()
-    update_td_op_definitions(args.op_td_path, instructions, docs, args.new_inst,
-                             args.inst_category)
-    print('Done. Note that this script just generates a template; ', end='')
-    print('please read the spec and update traits, arguments, and ', end='')
-    print('results accordingly.')
diff --git a/third_party/mlir/utils/textmate/mlir.json b/third_party/mlir/utils/textmate/mlir.json
deleted file mode 100644
index 8399a972d81..00000000000
--- a/third_party/mlir/utils/textmate/mlir.json
+++ /dev/null
@@ -1,113 +0,0 @@
-{
-  "fileTypes":[
-    "mlir"
-  ],
-  "repository":{
-    "attribute":{
-      "match":"\\W[\\w_][\\w\\d_.$]*\\s*=",
-      "name":"meta.attribute.mlir"
-    },
-    "branch_target":{
-      "match":"\\^bb[\\w\\d_$\\.-]+",
-      "name":"entity.name.label.mlir"
-    },
-    "comment":{
-      "match":"\/\/.*$",
-      "name":"comment.line.double-slash.mlir"
-    },
-    "identifier":{
-      "match":"[\\%#@][\\w_][\\w\\d_.$]*",
-      "captures":{
-        "0":{
-          "name":"variable.mlir"
-        }
-      },
-      "name":"meta.identifier.mlir"
-    },
-    "integer":{
-      "match":"[\\Wx]([0-9]+)",
-      "captures":{
-        "1":{
-          "name":"constant.numeric.mlir"
-        }
-      },
-      "name":"meta.identifier.mlir"
-    },
-    "string":{
-      "end":"\"",
-      "begin":"\"",
-      "beginCaptures":{
-        "0":{
-          "name":"punctuation.definition.string.begin.mlir"
-        }
-      },
-      "patterns":[
-        {
-          "match":"\\\\[nt\"]",
-          "name":"constant.character.escape.mlir"
-        },
-        {
-          "match":"\\\\.",
-          "name":"invalid.illegal.mlir"
-        }
-      ],
-      "endCaptures":{
-        "0":{
-          "name":"punctuation.definition.string.end.mlir"
-        }
-      },
-      "name":"string.quoted.double.mlir"
-    },
-    "types":{
-      "match":"[\\Wx](index|i[1-9][0-9]*|f16|bf16|f32|f64|memref|tensor|vector)\\b",
-      "captures":{
-        "1":{
-          "name":"storage.type.mlir"
-        }
-      },
-      "name":"meta.types.simple.mlir"
-    }
-  },
-  "patterns":[
-    {
-      "include":"#comment"
-    },
-    {
-      "include":"#string"
-    },
-    {
-      "match":"\\b(func)\\b\\s*(@[\\w_][\\w\\d_.$]*)",
-      "captures":{
-        "1":{
-          "name":"keyword.function.mlir"
-        },
-        "2":{
-          "name":"entity.name.function.mlir"
-        }
-      },
-      "name":"support.function.mlir"
-    },
-    {
-      "match":"\\b(attributes|br|call|constant|loc|return)\\b",
-      "name":"keyword.module.mlir"
-    },
-    {
-      "include":"#identifier"
-    },
-    {
-      "include":"#branch_target"
-    },
-    {
-      "include":"#attribute"
-    },
-    {
-      "include":"#types"
-    },
-    {
-      "include":"#integer"
-    }
-  ],
-  "name":"MLIR",
-  "scopeName":"source.mlir"
-}
-
diff --git a/third_party/mlir/utils/vim/README b/third_party/mlir/utils/vim/README
deleted file mode 100644
index d9a1f7f3da5..00000000000
--- a/third_party/mlir/utils/vim/README
+++ /dev/null
@@ -1,9 +0,0 @@
--*- mlir/utils/vim/README -*-
-
-This directory contains settings for the vim editor to work on MLIR *.mlir 
-files.  It comes with filetype detection rules in the (ftdetect),
-syntax highlighting (syntax), some minimal sensible default settings (ftplugin)
-and indentation plugins (indent).
-
-To install, copy all subdirectories to your $HOME/.vim/, or if you
-prefer, create symlinks to the files here.
diff --git a/third_party/mlir/utils/vim/ftdetect/mlir.vim b/third_party/mlir/utils/vim/ftdetect/mlir.vim
deleted file mode 100644
index f8b1de4830d..00000000000
--- a/third_party/mlir/utils/vim/ftdetect/mlir.vim
+++ /dev/null
@@ -1 +0,0 @@
-au BufRead,BufNewFile *.mlir set filetype=mlir
diff --git a/third_party/mlir/utils/vim/ftplugin/mlir.vim b/third_party/mlir/utils/vim/ftplugin/mlir.vim
deleted file mode 100644
index 83b07f51001..00000000000
--- a/third_party/mlir/utils/vim/ftplugin/mlir.vim
+++ /dev/null
@@ -1,12 +0,0 @@
-" Vim filetype plugin file
-" Language: MLIR Assembly
-" Maintainer: The MLIR team
-
-if exists("b:did_ftplugin")
-  finish
-endif
-let b:did_ftplugin = 1
-
-setlocal softtabstop=2 shiftwidth=2
-setlocal expandtab
-setlocal comments+=://
diff --git a/third_party/mlir/utils/vim/indent/mlir.vim b/third_party/mlir/utils/vim/indent/mlir.vim
deleted file mode 100644
index 2c5b4aea534..00000000000
--- a/third_party/mlir/utils/vim/indent/mlir.vim
+++ /dev/null
@@ -1,75 +0,0 @@
-" Vim indent file
-" Language:   mlir
-" Maintainer: The MLIR team
-" Adapted from the LLVM vim indent file
-" What this indent plugin currently does:
-"  - If no other rule matches copy indent from previous non-empty,
-"    non-commented line.
-"  - On '}' align the same as the line containing the matching '{'.
-"  - If previous line starts with a block label, increase indentation.
-"  - If the current line is a block label and ends with ':' indent at the same
-"    level as the enclosing '{'/'}' block.
-" Stuff that would be nice to add:
-"  - Continue comments on next line.
-"  - If there is an opening+unclosed parenthesis on previous line indent to
-"    that.
-if exists("b:did_indent")
-  finish
-endif
-let b:did_indent = 1
-
-setlocal shiftwidth=2 expandtab
-
-setlocal indentkeys=0{,0},<:>,!^F,o,O,e
-setlocal indentexpr=GetMLIRIndent()
-
-if exists("*GetMLIRIndent")
-  finish
-endif
-
-function! FindOpenBrace(lnum)
-  call cursor(a:lnum, 1)
-  return searchpair('{', '', '}', 'bW')
-endfun
-
-function! GetMLIRIndent()
-  " On '}' align the same as the line containing the matching '{'
-  let thisline = getline(v:lnum)
-  if thisline =~ '^\s*}'
-    call cursor(v:lnum, 1)
-    silent normal %
-    let opening_lnum = line('.')
-    if opening_lnum != v:lnum
-      return indent(opening_lnum)
-    endif
-  endif
-
-  " Indent labels the same as the current opening block
-  if thisline =~ '\^\h\+.*:\s*$'
-    let blockbegin = FindOpenBrace(v:lnum)
-    if blockbegin > 0
-      return indent(blockbegin)
-    endif
-  endif
-
-  " Find a non-blank not-completely commented line above the current line.
-  let prev_lnum = prevnonblank(v:lnum - 1)
-  while prev_lnum > 0 && synIDattr(synID(prev_lnum, 1 + indent(prev_lnum), 0), "name") == "mlirComment"
-    let prev_lnum = prevnonblank(prev_lnum-1)
-  endwhile
-  " Hit the start of the file, use zero indent.
-  if prev_lnum == 0
-    return 0
-  endif
-
-  let ind = indent(prev_lnum)
-  let prevline = getline(prev_lnum)
-
-  " Add a 'shiftwidth' after lines that start a function, block/labels, or a
-  " region.
-  if prevline =~ '{\s*$' || prevline =~ '\^\h\+.*:\s*$'
-    let ind = ind + &shiftwidth
-  endif
-
-  return ind
-endfunction
diff --git a/third_party/mlir/utils/vim/syntax/mlir.vim b/third_party/mlir/utils/vim/syntax/mlir.vim
deleted file mode 100644
index d9b6a3b29a1..00000000000
--- a/third_party/mlir/utils/vim/syntax/mlir.vim
+++ /dev/null
@@ -1,115 +0,0 @@
-" Vim syntax file
-" Language:   mlir
-" Maintainer: The MLIR team, http://github.com/tensorflow/mlir/
-" Version:      $Revision$
-" Some parts adapted from the LLVM vim syntax file.
-
-if version < 600
-  syntax clear
-elseif exists("b:current_syntax")
-  finish
-endif
-
-syn case match
-
-" Types.
-syn keyword mlirType index f16 f32 f64
-" Integer type.
-syn match mlirType /\<i\d\+\>/
-
-" Elemental types inside memref, tensor, or vector types.
-syn match mlirType /x\s*\zs\(f16\|f32\|f64\|i\d\+\)/
-
-" Shaped types.
-syn match mlirType /\<memref\ze\s*<.*>/
-syn match mlirType /\<tensor\ze\s*<.*>/
-syn match mlirType /\<vector\ze\s*<.*>/
-
-" vector types inside memref or tensor.
-syn match mlirType /x\s*\zsvector/
-
-" Operations.
-" Core ops (not exhaustive yet).
-" TODO: the list is not exhaustive.
-syn keyword mlirOps alloc alloca addf addi call call_indirect cmpi constant
-syn keyword mlirOps dealloc divf dma_start dma_wait dim extract_element
-syn keyword getTensor index_cast load memref_cast memref_shape_cast mulf muli
-syn keyword prefetch sitofp splat store select subf subi subview tensor_cast
-syn keyword view
-
-" Affine ops.
-syn match mlirOps /\<affine\.apply\>/
-syn match mlirOps /\<affine\.dma_start\>/
-syn match mlirOps /\<affine\.dma_wait\>/
-syn match mlirOps /\<affine\.for\>/
-syn match mlirOps /\<affine\.if\>/
-syn match mlirOps /\<affine\.load\>/
-syn match mlirOps /\<affine\.store\>/
-syn match mlirOps /\<loop\.for\>/
-syn match mlirOps /\<loop\.if\>/
-
-" TODO: dialect name prefixed ops (llvm or std).
-
-" Keywords.
-syn keyword mlirKeyword
-      \ dense
-      \ else
-      \ func
-      \ module
-      \ return
-      \ step
-      \ to
-
-" Misc syntax.
-
-syn match   mlirNumber /-\?\<\d\+\>/
-" Match numbers even in shaped types.
-syn match   mlirNumber /-\?\<\d\+\ze\s*x/
-syn match   mlirNumber /x\s*\zs-\?\d\+\ze\s*x/
-
-syn match   mlirFloat  /-\?\<\d\+\.\d*\(e[+-]\d\+\)\?\>/
-syn match   mlirFloat  /\<0x\x\+\>/
-syn keyword mlirBoolean true false
-syn match   mlirComment /\/\/.*$/
-syn region  mlirString start=/"/ skip=/\\"/ end=/"/
-syn match   mlirLabel /[-a-zA-Z$._][-a-zA-Z$._0-9]*:/
-syn match   mlirIdentifier /[%@][a-zA-Z$._-][a-zA-Z0-9$._-]*/
-syn match   mlirIdentifier /[%@!]\d\+\>/
-syn match mlirMapSetOutline "#.*$"
-
-" Syntax-highlight lit test commands and bug numbers.
-syn match  mlirSpecialComment /\/\/\s*RUN:.*$/
-syn match  mlirSpecialComment /\/\/\s*CHECK:.*$/
-syn match  mlirSpecialComment "\v\/\/\s*CHECK-(NEXT|NOT|DAG|SAME|LABEL):.*$"
-syn match  mlirSpecialComment /\/\/\s*expected-error.*$/
-syn match  mlirSpecialComment /\/\/\s*expected-remark.*$/
-syn match  mlirSpecialComment /;\s*XFAIL:.*$/
-syn match  mlirSpecialComment /\/\/\s*PR\d*\s*$/
-syn match  mlirSpecialComment /\/\/\s*REQUIRES:.*$/
-
-if version >= 508 || !exists("did_c_syn_inits")
-  if version < 508
-    let did_c_syn_inits = 1
-    command -nargs=+ HiLink hi link <args>
-  else
-    command -nargs=+ HiLink hi def link <args>
-  endif
-
-  HiLink mlirType Type
-  HiLink mlirOps Statement
-  HiLink mlirMapSetOutline PreProc
-  HiLink mlirNumber Number
-  HiLink mlirComment Comment
-  HiLink mlirString String
-  HiLink mlirLabel Label
-  HiLink mlirKeyword Keyword
-  HiLink mlirBoolean Boolean
-  HiLink mlirFloat Float
-  HiLink mlirConstant Constant
-  HiLink mlirSpecialComment SpecialComment
-  HiLink mlirIdentifier Identifier
-
-  delcommand HiLink
-endif
-
-let b:current_syntax = "mlir"
diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index 8cb10cd3319..a4d2b899f80 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -87,6 +87,15 @@ def _tf_http_archive(ctx):
              "someone will come along shortly thereafter and mirror the file.")
 
     use_syslib = _use_system_lib(ctx, ctx.attr.name)
+
+    # Work around the bazel bug that redownloads the whole library.
+    # Remove this after https://github.com/bazelbuild/bazel/issues/10515 is fixed.
+    if ctx.attr.additional_build_files:
+        for internal_src in ctx.attr.additional_build_files:
+            _ = ctx.path(Label(internal_src))
+
+    # End of workaround.
+
     if not use_syslib:
         ctx.download_and_extract(
             ctx.attr.urls,
@@ -118,11 +127,17 @@ def _tf_http_archive(ctx):
         for internal_src, external_dest in ctx.attr.system_link_files.items():
             ctx.symlink(Label(internal_src), ctx.path(external_dest))
 
+    if ctx.attr.additional_build_files:
+        for internal_src, external_dest in ctx.attr.additional_build_files.items():
+            ctx.symlink(Label(internal_src), ctx.path(external_dest))
+
 tf_http_archive = repository_rule(
-    implementation = _tf_http_archive,
     attrs = {
         "sha256": attr.string(mandatory = True),
-        "urls": attr.string_list(mandatory = True, allow_empty = False),
+        "urls": attr.string_list(
+            mandatory = True,
+            allow_empty = False,
+        ),
         "strip_prefix": attr.string(),
         "type": attr.string(),
         "delete": attr.string_list(),
@@ -130,11 +145,14 @@ tf_http_archive = repository_rule(
         "build_file": attr.label(),
         "system_build_file": attr.label(),
         "system_link_files": attr.string_dict(),
+        "additional_build_files": attr.string_dict(),
     },
     environ = [
         "TF_SYSTEM_LIBS",
     ],
+    implementation = _tf_http_archive,
 )
+
 """Downloads and creates Bazel repos for dependencies.
 
 This is a swappable replacement for both http_archive() and
@@ -199,10 +217,12 @@ def _third_party_http_archive(ctx):
 # For link_files, specify each dict entry as:
 # "//path/to/source:file": "localfile"
 third_party_http_archive = repository_rule(
-    implementation = _third_party_http_archive,
     attrs = {
         "sha256": attr.string(mandatory = True),
-        "urls": attr.string_list(mandatory = True, allow_empty = False),
+        "urls": attr.string_list(
+            mandatory = True,
+            allow_empty = False,
+        ),
         "strip_prefix": attr.string(),
         "type": attr.string(),
         "delete": attr.string_list(),
@@ -215,4 +235,5 @@ third_party_http_archive = repository_rule(
     environ = [
         "TF_SYSTEM_LIBS",
     ],
+    implementation = _third_party_http_archive,
 )
diff --git a/third_party/sobol_data/BUILD b/third_party/sobol_data/BUILD
new file mode 100644
index 00000000000..82bab3ffd96
--- /dev/null
+++ b/third_party/sobol_data/BUILD
@@ -0,0 +1 @@
+# This empty BUILD file is required to make Bazel treat this directory as a package.
diff --git a/third_party/sobol_data/BUILD.bazel b/third_party/sobol_data/BUILD.bazel
new file mode 100644
index 00000000000..09b51dfbe39
--- /dev/null
+++ b/third_party/sobol_data/BUILD.bazel
@@ -0,0 +1,10 @@
+licenses(["notice"])
+
+package(default_visibility = ["//visibility:public"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "sobol_data",
+    hdrs = ["sobol_data.h"],
+)
diff --git a/third_party/sobol_data/workspace.bzl b/third_party/sobol_data/workspace.bzl
new file mode 100644
index 00000000000..fcc645492ca
--- /dev/null
+++ b/third_party/sobol_data/workspace.bzl
@@ -0,0 +1,15 @@
+"""Loads the sobol_data library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "sobol_data",
+        urls = [
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/joe-kuo/sobol_data/archive/835a7d7b1ee3bc83e575e302a985c66ec4b65249.tar.gz",
+            "https://github.com/joe-kuo/sobol_data/archive/835a7d7b1ee3bc83e575e302a985c66ec4b65249.tar.gz",
+        ],
+        sha256 = "583d7b975e506c076fc579d9139530596906b9195b203d42361417e9aad79b73",
+        strip_prefix = "sobol_data-835a7d7b1ee3bc83e575e302a985c66ec4b65249",
+        build_file = "//third_party/sobol_data:BUILD.bazel",
+    )
diff --git a/third_party/toolchains/preconfig/win_1803/BUILD b/third_party/toolchains/preconfig/win_1803/BUILD
index 0477bc60954..38ccc100e67 100644
--- a/third_party/toolchains/preconfig/win_1803/BUILD
+++ b/third_party/toolchains/preconfig/win_1803/BUILD
@@ -17,7 +17,7 @@ platform(
     remote_execution_properties = """
         properties:{
           name:"container-image"
-          value:"docker://gcr.io/tensorflow-testing/tf-win-rbe@sha256:f954613b8773930142ac101b6731283bc3a3bc0ef811b7cfe6ae159f412762e7"
+          value:"docker://gcr.io/tensorflow-testing/tf-win-rbe@sha256:6bee34693b356baf3b0ba700f4eb27a23d3a04e0c47cbb9de813c61ef30c0b9f"
         }
         properties:{
           name: "OSFamily" value: "Windows"
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_121/BUILD b/third_party/toolchains/preconfig/win_1803/bazel_121/BUILD
new file mode 100644
index 00000000000..67ec0423a57
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/bazel_121/BUILD
@@ -0,0 +1,365 @@
+# Copyright 2018 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This becomes the BUILD file for @local_config_cc// under Windows.
+
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_toolchain", "cc_toolchain_suite")
+load(":windows_cc_toolchain_config.bzl", "cc_toolchain_config")
+load(":armeabi_cc_toolchain_config.bzl", "armeabi_cc_toolchain_config")
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "malloc",
+)
+
+filegroup(
+    name = "empty",
+    srcs = [],
+)
+
+filegroup(
+    name = "mingw_compiler_files",
+    srcs = [":builtin_include_directory_paths_mingw"],
+)
+
+filegroup(
+    name = "clangcl_compiler_files",
+    srcs = [":builtin_include_directory_paths_clangcl"],
+)
+
+filegroup(
+    name = "msvc_compiler_files",
+    srcs = [":builtin_include_directory_paths_msvc"],
+)
+
+# Hardcoded toolchain, legacy behaviour.
+cc_toolchain_suite(
+    name = "toolchain",
+    toolchains = {
+        "armeabi-v7a|compiler": ":cc-compiler-armeabi-v7a",
+        "x64_windows|msvc-cl": ":cc-compiler-x64_windows",
+        "x64_windows|msys-gcc": ":cc-compiler-x64_windows_msys",
+        "x64_windows|mingw-gcc": ":cc-compiler-x64_windows_mingw",
+        "x64_windows|clang-cl": ":cc-compiler-x64_windows-clang-cl",
+        "x64_windows_msys": ":cc-compiler-x64_windows_msys",
+        "x64_windows": ":cc-compiler-x64_windows",
+        "armeabi-v7a": ":cc-compiler-armeabi-v7a",
+    },
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows_msys",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":mingw_compiler_files",
+    compiler_files = ":mingw_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+    toolchain_config = ":msys_x64",
+    toolchain_identifier = "msys_x64",
+)
+
+cc_toolchain_config(
+    name = "msys_x64",
+    abi_libc_version = "local",
+    abi_version = "local",
+    compiler = "msys-gcc",
+    cpu = "x64_windows",
+    cxx_builtin_include_directories = [
+        # This is a workaround for https://github.com/bazelbuild/bazel/issues/5087.
+        "C:\\botcode\\w",
+        "c:/tools/msys64/usr/",
+    ],
+    dbg_mode_debug_flag = "/DEBUG:FULL",
+    fastbuild_mode_debug_flag = "/DEBUG:FASTLINK",
+    host_system_name = "local",
+    target_libc = "msys",
+    target_system_name = "local",
+    tool_bin_path = "c:/tools/msys64/usr/bin",
+    tool_paths = {
+        "ar": "c:/tools/msys64/usr/bin/ar",
+        "compat-ld": "c:/tools/msys64/usr/bin/compat-ld",
+        "cpp": "c:/tools/msys64/usr/bin/cpp",
+        "dwp": "c:/tools/msys64/usr/bin/dwp",
+        "gcc": "c:/tools/msys64/usr/bin/gcc",
+        "gcov": "c:/tools/msys64/usr/bin/gcov",
+        "ld": "c:/tools/msys64/usr/bin/ld",
+        "nm": "c:/tools/msys64/usr/bin/nm",
+        "objcopy": "c:/tools/msys64/usr/bin/objcopy",
+        "objdump": "c:/tools/msys64/usr/bin/objdump",
+        "strip": "c:/tools/msys64/usr/bin/strip",
+    },
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows_msys",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+        "@bazel_tools//tools/cpp:msys",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows_msys",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows_mingw",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":mingw_compiler_files",
+    compiler_files = ":mingw_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 0,
+    toolchain_config = ":msys_x64_mingw",
+    toolchain_identifier = "msys_x64_mingw",
+)
+
+cc_toolchain_config(
+    name = "msys_x64_mingw",
+    abi_libc_version = "local",
+    abi_version = "local",
+    compiler = "mingw-gcc",
+    cpu = "x64_windows",
+    cxx_builtin_include_directories = [
+        # This is a workaround for https://github.com/bazelbuild/bazel/issues/5087.
+        "C:\\botcode\\w",
+        "c:/tools/msys64/mingw64/",
+    ],
+    dbg_mode_debug_flag = "/DEBUG:FULL",
+    fastbuild_mode_debug_flag = "/DEBUG:FASTLINK",
+    host_system_name = "local",
+    target_libc = "mingw",
+    target_system_name = "local",
+    tool_bin_path = "c:/tools/msys64/mingw64/bin",
+    tool_paths = {
+        "ar": "c:/tools/msys64/mingw64/bin/ar",
+        "compat-ld": "c:/tools/msys64/mingw64/bin/compat-ld",
+        "cpp": "c:/tools/msys64/mingw64/bin/cpp",
+        "dwp": "c:/tools/msys64/mingw64/bin/dwp",
+        "gcc": "c:/tools/msys64/mingw64/bin/gcc",
+        "gcov": "c:/tools/msys64/mingw64/bin/gcov",
+        "ld": "c:/tools/msys64/mingw64/bin/ld",
+        "nm": "c:/tools/msys64/mingw64/bin/nm",
+        "objcopy": "c:/tools/msys64/mingw64/bin/objcopy",
+        "objdump": "c:/tools/msys64/mingw64/bin/objdump",
+        "strip": "c:/tools/msys64/mingw64/bin/strip",
+    },
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows_mingw",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+        "@bazel_tools//tools/cpp:mingw",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows_mingw",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":msvc_compiler_files",
+    compiler_files = ":msvc_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+    toolchain_config = ":msvc_x64",
+    toolchain_identifier = "msvc_x64",
+)
+
+cc_toolchain_config(
+    name = "msvc_x64",
+    abi_libc_version = "local",
+    abi_version = "local",
+    compiler = "msvc-cl",
+    cpu = "x64_windows",
+    cxx_builtin_include_directories = [
+        # This is a workaround for https://github.com/bazelbuild/bazel/issues/5087.
+        "C:\\botcode\\w",
+        "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.24.28314\\include",
+        "C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\ucrt",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\shared",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\um",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\winrt",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\cppwinrt",
+    ],
+    dbg_mode_debug_flag = "/DEBUG:FULL",
+    default_link_flags = ["/MACHINE:X64"],
+    fastbuild_mode_debug_flag = "/DEBUG:FASTLINK",
+    host_system_name = "local",
+    msvc_cl_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe",
+    msvc_env_include = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.24.28314\\include;C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\ucrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\shared;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\um;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\winrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\cppwinrt",
+    msvc_env_lib = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.24.28314\\lib\\x64;C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\lib\\um\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.17763.0\\ucrt\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.17763.0\\um\\x64;",
+    msvc_env_path = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.24.28314\\bin\\HostX64\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\VC\\VCPackages;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\TeamFoundation\\Team Explorer;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\MSBuild\\Current\\bin\\Roslyn;C:\\Program Files (x86)\\Microsoft SDKs\\Windows\\v10.0A\\bin\\NETFX 4.8 Tools\\x64\\;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\10.0.17763.0\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\\\MSBuild\\Current\\Bin;C:\\Windows\\Microsoft.NET\\Framework64\\v4.0.30319;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\Tools\\;;C:\\Windows\\system32",
+    msvc_env_tmp = "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp",
+    msvc_lib_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/lib.exe",
+    msvc_link_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/link.exe",
+    msvc_ml_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/ml64.exe",
+    target_libc = "msvcrt",
+    target_system_name = "local",
+    tool_paths = {
+        "ar": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/lib.exe",
+        "ml": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/ml64.exe",
+        "cpp": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe",
+        "gcc": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/cl.exe",
+        "gcov": "wrapper/bin/msvc_nop.bat",
+        "ld": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/HostX64/x64/link.exe",
+        "nm": "wrapper/bin/msvc_nop.bat",
+        "objcopy": "wrapper/bin/msvc_nop.bat",
+        "objdump": "wrapper/bin/msvc_nop.bat",
+        "strip": "wrapper/bin/msvc_nop.bat",
+    },
+    toolchain_identifier = "msvc_x64",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows-clang-cl",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":clangcl_compiler_files",
+    compiler_files = ":clangcl_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+    toolchain_config = ":clang_cl_x64",
+    toolchain_identifier = "clang_cl_x64",
+)
+
+cc_toolchain_config(
+    name = "clang_cl_x64",
+    abi_libc_version = "local",
+    abi_version = "local",
+    compiler = "clang-cl",
+    cpu = "x64_windows",
+    cxx_builtin_include_directories = [
+        # This is a workaround for https://github.com/bazelbuild/bazel/issues/5087.
+        "C:\\botcode\\w",
+    ],
+    dbg_mode_debug_flag = "/DEBUG",
+    default_link_flags = [
+        "/MACHINE:X64",
+        "/DEFAULTLIB:clang_rt.builtins-x86_64.lib",
+    ],
+    fastbuild_mode_debug_flag = "/DEBUG",
+    host_system_name = "local",
+    msvc_cl_path = "clang_installation_error.bat",
+    msvc_env_include = "clang_cl_not_found",
+    msvc_env_lib = "clang_cl_not_found",
+    msvc_env_path = "clang_cl_not_found",
+    msvc_env_tmp = "clang_cl_not_found",
+    msvc_lib_path = "clang_installation_error.bat",
+    msvc_link_path = "clang_installation_error.bat",
+    msvc_ml_path = "clang_installation_error.bat",
+    target_libc = "msvcrt",
+    target_system_name = "local",
+    tool_paths = {
+        "ar": "clang_installation_error.bat",
+        "ml": "clang_installation_error.bat",
+        "cpp": "clang_installation_error.bat",
+        "gcc": "clang_installation_error.bat",
+        "gcov": "wrapper/bin/msvc_nop.bat",
+        "ld": "clang_installation_error.bat",
+        "nm": "wrapper/bin/msvc_nop.bat",
+        "objcopy": "wrapper/bin/msvc_nop.bat",
+        "objdump": "wrapper/bin/msvc_nop.bat",
+        "strip": "wrapper/bin/msvc_nop.bat",
+    },
+    toolchain_identifier = "clang_cl_x64",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows-clang-cl",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+        "@bazel_tools//tools/cpp:clang-cl",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows-clang-cl",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-armeabi-v7a",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":empty",
+    compiler_files = ":empty",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+    toolchain_config = ":stub_armeabi-v7a",
+    toolchain_identifier = "stub_armeabi-v7a",
+)
+
+armeabi_cc_toolchain_config(name = "stub_armeabi-v7a")
+
+toolchain(
+    name = "cc-toolchain-armeabi-v7a",
+    exec_compatible_with = [
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:arm",
+        "@platforms//os:android",
+    ],
+    toolchain = ":cc-compiler-armeabi-v7a",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+filegroup(
+    name = "link_dynamic_library",
+    srcs = ["link_dynamic_library.sh"],
+)
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_121/armeabi_cc_toolchain_config.bzl b/third_party/toolchains/preconfig/win_1803/bazel_121/armeabi_cc_toolchain_config.bzl
new file mode 100644
index 00000000000..94e0720bf6c
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/bazel_121/armeabi_cc_toolchain_config.bzl
@@ -0,0 +1,82 @@
+# Copyright 2019 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A Starlark cc_toolchain configuration rule"""
+
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "feature",
+    "tool_path",
+)
+
+def _impl(ctx):
+    toolchain_identifier = "stub_armeabi-v7a"
+    host_system_name = "armeabi-v7a"
+    target_system_name = "armeabi-v7a"
+    target_cpu = "armeabi-v7a"
+    target_libc = "armeabi-v7a"
+    compiler = "compiler"
+    abi_version = "armeabi-v7a"
+    abi_libc_version = "armeabi-v7a"
+    cc_target_os = None
+    builtin_sysroot = None
+    action_configs = []
+
+    supports_pic_feature = feature(name = "supports_pic", enabled = True)
+    supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)
+    features = [supports_dynamic_linker_feature, supports_pic_feature]
+
+    cxx_builtin_include_directories = []
+    artifact_name_patterns = []
+    make_variables = []
+
+    tool_paths = [
+        tool_path(name = "ar", path = "/bin/false"),
+        tool_path(name = "compat-ld", path = "/bin/false"),
+        tool_path(name = "cpp", path = "/bin/false"),
+        tool_path(name = "dwp", path = "/bin/false"),
+        tool_path(name = "gcc", path = "/bin/false"),
+        tool_path(name = "gcov", path = "/bin/false"),
+        tool_path(name = "ld", path = "/bin/false"),
+        tool_path(name = "nm", path = "/bin/false"),
+        tool_path(name = "objcopy", path = "/bin/false"),
+        tool_path(name = "objdump", path = "/bin/false"),
+        tool_path(name = "strip", path = "/bin/false"),
+    ]
+
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = features,
+        action_configs = action_configs,
+        artifact_name_patterns = artifact_name_patterns,
+        cxx_builtin_include_directories = cxx_builtin_include_directories,
+        toolchain_identifier = toolchain_identifier,
+        host_system_name = host_system_name,
+        target_system_name = target_system_name,
+        target_cpu = target_cpu,
+        target_libc = target_libc,
+        compiler = compiler,
+        abi_version = abi_version,
+        abi_libc_version = abi_libc_version,
+        tool_paths = tool_paths,
+        make_variables = make_variables,
+        builtin_sysroot = builtin_sysroot,
+        cc_target_os = cc_target_os,
+    )
+
+armeabi_cc_toolchain_config = rule(
+    implementation = _impl,
+    attrs = {},
+    provides = [CcToolchainConfigInfo],
+)
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_121/builtin_include_directory_paths_msvc b/third_party/toolchains/preconfig/win_1803/bazel_121/builtin_include_directory_paths_msvc
new file mode 100644
index 00000000000..69cd622c878
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/bazel_121/builtin_include_directory_paths_msvc
@@ -0,0 +1,13 @@
+This file is generated by cc_configure and contains builtin include directories
+that msvc reported. This file is a dependency of every compilation action and
+changes to it will be reflected in the action cache key. When some of these
+paths change, Bazel will make sure to rerun the action, even though none of
+declared action inputs or the action commandline changes.
+
+"C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.24.28314\\include"
+"C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um"
+"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\ucrt"
+"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\shared"
+"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\um"
+"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\winrt"
+"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.17763.0\\cppwinrt"
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_121/windows_cc_toolchain_config.bzl b/third_party/toolchains/preconfig/win_1803/bazel_121/windows_cc_toolchain_config.bzl
new file mode 100644
index 00000000000..30571b6a5ac
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/bazel_121/windows_cc_toolchain_config.bzl
@@ -0,0 +1,1342 @@
+# Copyright 2019 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A Starlark cc_toolchain configuration rule for Windows"""
+
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "action_config",
+    "artifact_name_pattern",
+    "env_entry",
+    "env_set",
+    "feature",
+    "feature_set",
+    "flag_group",
+    "flag_set",
+    "tool",
+    "tool_path",
+    "variable_with_value",
+    "with_feature_set",
+)
+load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES")
+
+all_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.assemble,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.clif_match,
+    ACTION_NAMES.lto_backend,
+]
+
+all_cpp_compile_actions = [
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.clif_match,
+]
+
+preprocessor_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.clif_match,
+]
+
+codegen_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.assemble,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.lto_backend,
+]
+
+all_link_actions = [
+    ACTION_NAMES.cpp_link_executable,
+    ACTION_NAMES.cpp_link_dynamic_library,
+    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+]
+
+def _use_msvc_toolchain(ctx):
+    return ctx.attr.cpu == "x64_windows" and (ctx.attr.compiler == "msvc-cl" or ctx.attr.compiler == "clang-cl")
+
+def _impl(ctx):
+    if _use_msvc_toolchain(ctx):
+        artifact_name_patterns = [
+            artifact_name_pattern(
+                category_name = "object_file",
+                prefix = "",
+                extension = ".obj",
+            ),
+            artifact_name_pattern(
+                category_name = "static_library",
+                prefix = "",
+                extension = ".lib",
+            ),
+            artifact_name_pattern(
+                category_name = "alwayslink_static_library",
+                prefix = "",
+                extension = ".lo.lib",
+            ),
+            artifact_name_pattern(
+                category_name = "executable",
+                prefix = "",
+                extension = ".exe",
+            ),
+            artifact_name_pattern(
+                category_name = "dynamic_library",
+                prefix = "",
+                extension = ".dll",
+            ),
+            artifact_name_pattern(
+                category_name = "interface_library",
+                prefix = "",
+                extension = ".if.lib",
+            ),
+        ]
+    else:
+        artifact_name_patterns = [
+            artifact_name_pattern(
+                category_name = "executable",
+                prefix = "",
+                extension = ".exe",
+            ),
+        ]
+
+    if _use_msvc_toolchain(ctx):
+        cpp_link_nodeps_dynamic_library_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+            implies = [
+                "nologo",
+                "shared_flag",
+                "linkstamps",
+                "output_execpath_flags",
+                "input_param_flags",
+                "user_link_flags",
+                "default_link_flags",
+                "linker_subsystem_flag",
+                "linker_param_file",
+                "msvc_env",
+                "no_stripping",
+                "has_configured_linker_path",
+                "def_file",
+            ],
+            tools = [tool(path = ctx.attr.msvc_link_path)],
+        )
+
+        cpp_link_static_library_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_static_library,
+            implies = [
+                "nologo",
+                "archiver_flags",
+                "input_param_flags",
+                "linker_param_file",
+                "msvc_env",
+            ],
+            tools = [tool(path = ctx.attr.msvc_lib_path)],
+        )
+
+        assemble_action = action_config(
+            action_name = ACTION_NAMES.assemble,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "nologo",
+                "msvc_env",
+                "sysroot",
+            ],
+            tools = [tool(path = ctx.attr.msvc_ml_path)],
+        )
+
+        preprocess_assemble_action = action_config(
+            action_name = ACTION_NAMES.preprocess_assemble,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "nologo",
+                "msvc_env",
+                "sysroot",
+            ],
+            tools = [tool(path = ctx.attr.msvc_ml_path)],
+        )
+
+        c_compile_action = action_config(
+            action_name = ACTION_NAMES.c_compile,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "default_compile_flags",
+                "nologo",
+                "msvc_env",
+                "parse_showincludes",
+                "user_compile_flags",
+                "sysroot",
+                "unfiltered_compile_flags",
+            ],
+            tools = [tool(path = ctx.attr.msvc_cl_path)],
+        )
+
+        cpp_compile_action = action_config(
+            action_name = ACTION_NAMES.cpp_compile,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "default_compile_flags",
+                "nologo",
+                "msvc_env",
+                "parse_showincludes",
+                "user_compile_flags",
+                "sysroot",
+                "unfiltered_compile_flags",
+            ],
+            tools = [tool(path = ctx.attr.msvc_cl_path)],
+        )
+
+        cpp_link_executable_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_executable,
+            implies = [
+                "nologo",
+                "linkstamps",
+                "output_execpath_flags",
+                "input_param_flags",
+                "user_link_flags",
+                "default_link_flags",
+                "linker_subsystem_flag",
+                "linker_param_file",
+                "msvc_env",
+                "no_stripping",
+            ],
+            tools = [tool(path = ctx.attr.msvc_link_path)],
+        )
+
+        cpp_link_dynamic_library_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_dynamic_library,
+            implies = [
+                "nologo",
+                "shared_flag",
+                "linkstamps",
+                "output_execpath_flags",
+                "input_param_flags",
+                "user_link_flags",
+                "default_link_flags",
+                "linker_subsystem_flag",
+                "linker_param_file",
+                "msvc_env",
+                "no_stripping",
+                "has_configured_linker_path",
+                "def_file",
+            ],
+            tools = [tool(path = ctx.attr.msvc_link_path)],
+        )
+
+        action_configs = [
+            assemble_action,
+            preprocess_assemble_action,
+            c_compile_action,
+            cpp_compile_action,
+            cpp_link_executable_action,
+            cpp_link_dynamic_library_action,
+            cpp_link_nodeps_dynamic_library_action,
+            cpp_link_static_library_action,
+        ]
+    else:
+        action_configs = []
+
+    if _use_msvc_toolchain(ctx):
+        msvc_link_env_feature = feature(
+            name = "msvc_link_env",
+            env_sets = [
+                env_set(
+                    actions = all_link_actions +
+                              [ACTION_NAMES.cpp_link_static_library],
+                    env_entries = [env_entry(key = "LIB", value = ctx.attr.msvc_env_lib)],
+                ),
+            ],
+        )
+
+        shared_flag_feature = feature(
+            name = "shared_flag",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ],
+                    flag_groups = [flag_group(flags = ["/DLL"])],
+                ),
+            ],
+        )
+
+        determinism_feature = feature(
+            name = "determinism",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [
+                        flag_group(
+                            flags = [
+                                "/wd4117",
+                                "-D__DATE__=\"redacted\"",
+                                "-D__TIMESTAMP__=\"redacted\"",
+                                "-D__TIME__=\"redacted\"",
+                            ] + (["-Wno-builtin-macro-redefined"] if ctx.attr.compiler == "clang-cl" else []),
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        sysroot_feature = feature(
+            name = "sysroot",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["--sysroot=%{sysroot}"],
+                            iterate_over = "sysroot",
+                            expand_if_available = "sysroot",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        unfiltered_compile_flags_feature = feature(
+            name = "unfiltered_compile_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{unfiltered_compile_flags}"],
+                            iterate_over = "unfiltered_compile_flags",
+                            expand_if_available = "unfiltered_compile_flags",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        compiler_param_file_feature = feature(
+            name = "compiler_param_file",
+        )
+
+        copy_dynamic_libraries_to_binary_feature = feature(
+            name = "copy_dynamic_libraries_to_binary",
+        )
+
+        input_param_flags_feature = feature(
+            name = "input_param_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/IMPLIB:%{interface_library_output_path}"],
+                            expand_if_available = "interface_library_output_path",
+                        ),
+                    ],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{libopts}"],
+                            iterate_over = "libopts",
+                            expand_if_available = "libopts",
+                        ),
+                    ],
+                ),
+                flag_set(
+                    actions = all_link_actions +
+                              [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [
+                        flag_group(
+                            iterate_over = "libraries_to_link",
+                            flag_groups = [
+                                flag_group(
+                                    iterate_over = "libraries_to_link.object_files",
+                                    flag_groups = [flag_group(flags = ["%{libraries_to_link.object_files}"])],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "object_file_group",
+                                    ),
+                                ),
+                                flag_group(
+                                    flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "object_file",
+                                    ),
+                                ),
+                                flag_group(
+                                    flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "interface_library",
+                                    ),
+                                ),
+                                flag_group(
+                                    flag_groups = [
+                                        flag_group(
+                                            flags = ["%{libraries_to_link.name}"],
+                                            expand_if_false = "libraries_to_link.is_whole_archive",
+                                        ),
+                                        flag_group(
+                                            flags = ["/WHOLEARCHIVE:%{libraries_to_link.name}"],
+                                            expand_if_true = "libraries_to_link.is_whole_archive",
+                                        ),
+                                    ],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "static_library",
+                                    ),
+                                ),
+                            ],
+                            expand_if_available = "libraries_to_link",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        fastbuild_feature = feature(
+            name = "fastbuild",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Od", "/Z7"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = [ctx.attr.fastbuild_mode_debug_flag, "/INCREMENTAL:NO"],
+                        ),
+                    ],
+                ),
+            ],
+            implies = ["generate_pdb_file"],
+        )
+
+        user_compile_flags_feature = feature(
+            name = "user_compile_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{user_compile_flags}"],
+                            iterate_over = "user_compile_flags",
+                            expand_if_available = "user_compile_flags",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        archiver_flags_feature = feature(
+            name = "archiver_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/OUT:%{output_execpath}"],
+                            expand_if_available = "output_execpath",
+                        ),
+                        flag_group(
+                            flags = ["/MACHINE:X64"],
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        default_link_flags_feature = feature(
+            name = "default_link_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ctx.attr.default_link_flags)],
+                ),
+            ],
+        )
+
+        static_link_msvcrt_feature = feature(name = "static_link_msvcrt")
+
+        dynamic_link_msvcrt_debug_feature = feature(
+            name = "dynamic_link_msvcrt_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MDd"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrtd.lib"])],
+                ),
+            ],
+            requires = [feature_set(features = ["dbg"])],
+        )
+
+        dbg_feature = feature(
+            name = "dbg",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Od", "/Z7"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = [ctx.attr.dbg_mode_debug_flag, "/INCREMENTAL:NO"],
+                        ),
+                    ],
+                ),
+            ],
+            implies = ["generate_pdb_file"],
+        )
+
+        opt_feature = feature(
+            name = "opt",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/O2"])],
+                ),
+            ],
+            implies = ["frame_pointer"],
+        )
+
+        supports_interface_shared_libraries_feature = feature(
+            name = "supports_interface_shared_libraries",
+            enabled = True,
+        )
+
+        user_link_flags_feature = feature(
+            name = "user_link_flags",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{user_link_flags}"],
+                            iterate_over = "user_link_flags",
+                            expand_if_available = "user_link_flags",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        default_compile_flags_feature = feature(
+            name = "default_compile_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.linkstamp_compile,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.lto_backend,
+                        ACTION_NAMES.clif_match,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = [
+                                "/DCOMPILER_MSVC",
+                                "/DNOMINMAX",
+                                "/D_WIN32_WINNT=0x0601",
+                                "/D_CRT_SECURE_NO_DEPRECATE",
+                                "/D_CRT_SECURE_NO_WARNINGS",
+                                "/bigobj",
+                                "/Zm500",
+                                "/EHsc",
+                                "/wd4351",
+                                "/wd4291",
+                                "/wd4250",
+                                "/wd4996",
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        msvc_compile_env_feature = feature(
+            name = "msvc_compile_env",
+            env_sets = [
+                env_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                    ],
+                    env_entries = [env_entry(key = "INCLUDE", value = ctx.attr.msvc_env_include)],
+                ),
+            ],
+        )
+
+        preprocessor_defines_feature = feature(
+            name = "preprocessor_defines",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/D%{preprocessor_defines}"],
+                            iterate_over = "preprocessor_defines",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        generate_pdb_file_feature = feature(
+            name = "generate_pdb_file",
+            requires = [
+                feature_set(features = ["dbg"]),
+                feature_set(features = ["fastbuild"]),
+            ],
+        )
+
+        output_execpath_flags_feature = feature(
+            name = "output_execpath_flags",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/OUT:%{output_execpath}"],
+                            expand_if_available = "output_execpath",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        dynamic_link_msvcrt_no_debug_feature = feature(
+            name = "dynamic_link_msvcrt_no_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MD"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrt.lib"])],
+                ),
+            ],
+            requires = [
+                feature_set(features = ["fastbuild"]),
+                feature_set(features = ["opt"]),
+            ],
+        )
+
+        disable_assertions_feature = feature(
+            name = "disable_assertions",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/DNDEBUG"])],
+                    with_features = [with_feature_set(features = ["opt"])],
+                ),
+            ],
+        )
+
+        has_configured_linker_path_feature = feature(name = "has_configured_linker_path")
+
+        supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)
+
+        no_stripping_feature = feature(name = "no_stripping")
+
+        linker_param_file_feature = feature(
+            name = "linker_param_file",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions +
+                              [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["@%{linker_param_file}"],
+                            expand_if_available = "linker_param_file",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        ignore_noisy_warnings_feature = feature(
+            name = "ignore_noisy_warnings",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [flag_group(flags = ["/ignore:4221"])],
+                ),
+            ],
+        )
+
+        no_legacy_features_feature = feature(name = "no_legacy_features")
+
+        parse_showincludes_feature = feature(
+            name = "parse_showincludes",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                    ],
+                    flag_groups = [flag_group(flags = ["/showIncludes"])],
+                ),
+            ],
+        )
+
+        static_link_msvcrt_no_debug_feature = feature(
+            name = "static_link_msvcrt_no_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MT"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmt.lib"])],
+                ),
+            ],
+            requires = [
+                feature_set(features = ["fastbuild"]),
+                feature_set(features = ["opt"]),
+            ],
+        )
+
+        treat_warnings_as_errors_feature = feature(
+            name = "treat_warnings_as_errors",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/WX"])],
+                ),
+            ],
+        )
+
+        windows_export_all_symbols_feature = feature(name = "windows_export_all_symbols")
+
+        no_windows_export_all_symbols_feature = feature(name = "no_windows_export_all_symbols")
+
+        include_paths_feature = feature(
+            name = "include_paths",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/I%{quote_include_paths}"],
+                            iterate_over = "quote_include_paths",
+                        ),
+                        flag_group(
+                            flags = ["/I%{include_paths}"],
+                            iterate_over = "include_paths",
+                        ),
+                        flag_group(
+                            flags = ["/I%{system_include_paths}"],
+                            iterate_over = "system_include_paths",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        linkstamps_feature = feature(
+            name = "linkstamps",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{linkstamp_paths}"],
+                            iterate_over = "linkstamp_paths",
+                            expand_if_available = "linkstamp_paths",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        targets_windows_feature = feature(
+            name = "targets_windows",
+            enabled = True,
+            implies = ["copy_dynamic_libraries_to_binary"],
+        )
+
+        linker_subsystem_flag_feature = feature(
+            name = "linker_subsystem_flag",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/SUBSYSTEM:CONSOLE"])],
+                ),
+            ],
+        )
+
+        static_link_msvcrt_debug_feature = feature(
+            name = "static_link_msvcrt_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MTd"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmtd.lib"])],
+                ),
+            ],
+            requires = [feature_set(features = ["dbg"])],
+        )
+
+        frame_pointer_feature = feature(
+            name = "frame_pointer",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Oy-"])],
+                ),
+            ],
+        )
+
+        compiler_output_flags_feature = feature(
+            name = "compiler_output_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.assemble],
+                    flag_groups = [
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/Fo%{output_file}", "/Zi"],
+                                    expand_if_available = "output_file",
+                                    expand_if_not_available = "output_assembly_file",
+                                ),
+                            ],
+                            expand_if_not_available = "output_preprocess_file",
+                        ),
+                    ],
+                ),
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/Fo%{output_file}"],
+                                    expand_if_not_available = "output_preprocess_file",
+                                ),
+                            ],
+                            expand_if_available = "output_file",
+                            expand_if_not_available = "output_assembly_file",
+                        ),
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/Fa%{output_file}"],
+                                    expand_if_available = "output_assembly_file",
+                                ),
+                            ],
+                            expand_if_available = "output_file",
+                        ),
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/P", "/Fi%{output_file}"],
+                                    expand_if_available = "output_preprocess_file",
+                                ),
+                            ],
+                            expand_if_available = "output_file",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        nologo_feature = feature(
+            name = "nologo",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ACTION_NAMES.cpp_link_static_library,
+                    ],
+                    flag_groups = [flag_group(flags = ["/nologo"])],
+                ),
+            ],
+        )
+
+        smaller_binary_feature = feature(
+            name = "smaller_binary",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Gy", "/Gw"])],
+                    with_features = [with_feature_set(features = ["opt"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/OPT:ICF", "/OPT:REF"])],
+                    with_features = [with_feature_set(features = ["opt"])],
+                ),
+            ],
+        )
+
+        compiler_input_flags_feature = feature(
+            name = "compiler_input_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/c", "%{source_file}"],
+                            expand_if_available = "source_file",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        def_file_feature = feature(
+            name = "def_file",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/DEF:%{def_file_path}", "/ignore:4070"],
+                            expand_if_available = "def_file_path",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        msvc_env_feature = feature(
+            name = "msvc_env",
+            env_sets = [
+                env_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ACTION_NAMES.cpp_link_static_library,
+                    ],
+                    env_entries = [
+                        env_entry(key = "PATH", value = ctx.attr.msvc_env_path),
+                        env_entry(key = "TMP", value = ctx.attr.msvc_env_tmp),
+                        env_entry(key = "TEMP", value = ctx.attr.msvc_env_tmp),
+                    ],
+                ),
+            ],
+            implies = ["msvc_compile_env", "msvc_link_env"],
+        )
+        features = [
+            no_legacy_features_feature,
+            nologo_feature,
+            has_configured_linker_path_feature,
+            no_stripping_feature,
+            targets_windows_feature,
+            copy_dynamic_libraries_to_binary_feature,
+            default_compile_flags_feature,
+            msvc_env_feature,
+            msvc_compile_env_feature,
+            msvc_link_env_feature,
+            include_paths_feature,
+            preprocessor_defines_feature,
+            parse_showincludes_feature,
+            generate_pdb_file_feature,
+            shared_flag_feature,
+            linkstamps_feature,
+            output_execpath_flags_feature,
+            archiver_flags_feature,
+            input_param_flags_feature,
+            linker_subsystem_flag_feature,
+            user_link_flags_feature,
+            default_link_flags_feature,
+            linker_param_file_feature,
+            static_link_msvcrt_feature,
+            static_link_msvcrt_no_debug_feature,
+            dynamic_link_msvcrt_no_debug_feature,
+            static_link_msvcrt_debug_feature,
+            dynamic_link_msvcrt_debug_feature,
+            dbg_feature,
+            fastbuild_feature,
+            opt_feature,
+            frame_pointer_feature,
+            disable_assertions_feature,
+            determinism_feature,
+            treat_warnings_as_errors_feature,
+            smaller_binary_feature,
+            ignore_noisy_warnings_feature,
+            user_compile_flags_feature,
+            sysroot_feature,
+            unfiltered_compile_flags_feature,
+            compiler_param_file_feature,
+            compiler_output_flags_feature,
+            compiler_input_flags_feature,
+            def_file_feature,
+            windows_export_all_symbols_feature,
+            no_windows_export_all_symbols_feature,
+            supports_dynamic_linker_feature,
+            supports_interface_shared_libraries_feature,
+        ]
+    else:
+        targets_windows_feature = feature(
+            name = "targets_windows",
+            implies = ["copy_dynamic_libraries_to_binary"],
+            enabled = True,
+        )
+
+        copy_dynamic_libraries_to_binary_feature = feature(name = "copy_dynamic_libraries_to_binary")
+
+        gcc_env_feature = feature(
+            name = "gcc_env",
+            enabled = True,
+            env_sets = [
+                env_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ACTION_NAMES.cpp_link_static_library,
+                    ],
+                    env_entries = [
+                        env_entry(key = "PATH", value = ctx.attr.tool_bin_path),
+                    ],
+                ),
+            ],
+        )
+
+        default_compile_flags_feature = feature(
+            name = "default_compile_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.linkstamp_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.lto_backend,
+                        ACTION_NAMES.clif_match,
+                    ],
+                    flag_groups = [flag_group(flags = ["-std=gnu++0x"])],
+                ),
+            ],
+        )
+
+        default_link_flags_feature = feature(
+            name = "default_link_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["-lstdc++"])],
+                ),
+            ],
+        )
+
+        supports_dynamic_linker_feature = feature(
+            name = "supports_dynamic_linker",
+            enabled = True,
+        )
+
+        if ctx.attr.cpu == "x64_windows" and ctx.attr.compiler == "mingw-gcc":
+            compiler_param_file_feature = feature(
+                name = "compiler_param_file",
+            )
+
+            features = [
+                targets_windows_feature,
+                copy_dynamic_libraries_to_binary_feature,
+                gcc_env_feature,
+                default_compile_flags_feature,
+                compiler_param_file_feature,
+                default_link_flags_feature,
+                supports_dynamic_linker_feature,
+            ]
+        else:
+            supports_pic_feature = feature(
+                name = "supports_pic",
+                enabled = True,
+            )
+            supports_start_end_lib_feature = feature(
+                name = "supports_start_end_lib",
+                enabled = True,
+            )
+
+            dbg_feature = feature(name = "dbg")
+
+            opt_feature = feature(name = "opt")
+
+            sysroot_feature = feature(
+                name = "sysroot",
+                enabled = True,
+                flag_sets = [
+                    flag_set(
+                        actions = [
+                            ACTION_NAMES.preprocess_assemble,
+                            ACTION_NAMES.linkstamp_compile,
+                            ACTION_NAMES.c_compile,
+                            ACTION_NAMES.cpp_compile,
+                            ACTION_NAMES.cpp_header_parsing,
+                            ACTION_NAMES.cpp_module_compile,
+                            ACTION_NAMES.cpp_module_codegen,
+                            ACTION_NAMES.lto_backend,
+                            ACTION_NAMES.clif_match,
+                            ACTION_NAMES.cpp_link_executable,
+                            ACTION_NAMES.cpp_link_dynamic_library,
+                            ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ],
+                        flag_groups = [
+                            flag_group(
+                                flags = ["--sysroot=%{sysroot}"],
+                                expand_if_available = "sysroot",
+                            ),
+                        ],
+                    ),
+                ],
+            )
+
+            fdo_optimize_feature = feature(
+                name = "fdo_optimize",
+                flag_sets = [
+                    flag_set(
+                        actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                        flag_groups = [
+                            flag_group(
+                                flags = [
+                                    "-fprofile-use=%{fdo_profile_path}",
+                                    "-fprofile-correction",
+                                ],
+                                expand_if_available = "fdo_profile_path",
+                            ),
+                        ],
+                    ),
+                ],
+                provides = ["profile"],
+            )
+
+            user_compile_flags_feature = feature(
+                name = "user_compile_flags",
+                enabled = True,
+                flag_sets = [
+                    flag_set(
+                        actions = [
+                            ACTION_NAMES.assemble,
+                            ACTION_NAMES.preprocess_assemble,
+                            ACTION_NAMES.linkstamp_compile,
+                            ACTION_NAMES.c_compile,
+                            ACTION_NAMES.cpp_compile,
+                            ACTION_NAMES.cpp_header_parsing,
+                            ACTION_NAMES.cpp_module_compile,
+                            ACTION_NAMES.cpp_module_codegen,
+                            ACTION_NAMES.lto_backend,
+                            ACTION_NAMES.clif_match,
+                        ],
+                        flag_groups = [
+                            flag_group(
+                                flags = ["%{user_compile_flags}"],
+                                iterate_over = "user_compile_flags",
+                                expand_if_available = "user_compile_flags",
+                            ),
+                        ],
+                    ),
+                ],
+            )
+
+            features = [
+                targets_windows_feature,
+                copy_dynamic_libraries_to_binary_feature,
+                gcc_env_feature,
+                supports_pic_feature,
+                default_compile_flags_feature,
+                default_link_flags_feature,
+                fdo_optimize_feature,
+                supports_dynamic_linker_feature,
+                dbg_feature,
+                opt_feature,
+                user_compile_flags_feature,
+                sysroot_feature,
+            ]
+
+    tool_paths = [
+        tool_path(name = name, path = path)
+        for name, path in ctx.attr.tool_paths.items()
+    ]
+
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = features,
+        action_configs = action_configs,
+        artifact_name_patterns = artifact_name_patterns,
+        cxx_builtin_include_directories = ctx.attr.cxx_builtin_include_directories,
+        toolchain_identifier = ctx.attr.toolchain_identifier,
+        host_system_name = ctx.attr.host_system_name,
+        target_system_name = ctx.attr.target_system_name,
+        target_cpu = ctx.attr.cpu,
+        target_libc = ctx.attr.target_libc,
+        compiler = ctx.attr.compiler,
+        abi_version = ctx.attr.abi_version,
+        abi_libc_version = ctx.attr.abi_libc_version,
+        tool_paths = tool_paths,
+    )
+
+cc_toolchain_config = rule(
+    implementation = _impl,
+    attrs = {
+        "cpu": attr.string(mandatory = True),
+        "compiler": attr.string(),
+        "toolchain_identifier": attr.string(),
+        "host_system_name": attr.string(),
+        "target_system_name": attr.string(),
+        "target_libc": attr.string(),
+        "abi_version": attr.string(),
+        "abi_libc_version": attr.string(),
+        "tool_paths": attr.string_dict(),
+        "cxx_builtin_include_directories": attr.string_list(),
+        "default_link_flags": attr.string_list(default = []),
+        "msvc_env_tmp": attr.string(default = "msvc_not_found"),
+        "msvc_env_path": attr.string(default = "msvc_not_found"),
+        "msvc_env_include": attr.string(default = "msvc_not_found"),
+        "msvc_env_lib": attr.string(default = "msvc_not_found"),
+        "msvc_cl_path": attr.string(default = "vc_installation_error.bat"),
+        "msvc_ml_path": attr.string(default = "vc_installation_error.bat"),
+        "msvc_link_path": attr.string(default = "vc_installation_error.bat"),
+        "msvc_lib_path": attr.string(default = "vc_installation_error.bat"),
+        "dbg_mode_debug_flag": attr.string(),
+        "fastbuild_mode_debug_flag": attr.string(),
+        "tool_bin_path": attr.string(default = "not_found"),
+    },
+    provides = [CcToolchainConfigInfo],
+)